From 12df7b1b7e3dec1b6691b678702bb0bdae8fa578 Mon Sep 17 00:00:00 2001 From: Jon Medhurst Date: Tue, 4 Mar 2014 19:10:17 +0100 Subject: [PATCH 001/277] drm/cma: Fix printk formats in drm_gem_cma_describe MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This fixes the following warnings when building for arm64... drivers/gpu/drm/drm_gem_cma_helper.c: In function ‘drm_gem_cma_descr ibe’: drivers/gpu/drm/drm_gem_cma_helper.c:273:4: warning: format ‘%Zx’ expects argument of type ‘size_t’, but argument 6 has type ‘dma_addr_t’ [-Wformat=] off, cma_obj->paddr, cma_obj->vaddr, obj->size); ^ drivers/gpu/drm/drm_gem_cma_helper.c:273:4: warning: format ‘%d’ expects argument of type ‘int’, but argument 8 has type ‘size_t’ [-Wformat=] Signed-off-by: Jon Medhurst --- drivers/gpu/drm/drm_gem_cma_helper.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/drm_gem_cma_helper.c b/drivers/gpu/drm/drm_gem_cma_helper.c index 0a7e011509bd..c25b7a0eb0e3 100644 --- a/drivers/gpu/drm/drm_gem_cma_helper.c +++ b/drivers/gpu/drm/drm_gem_cma_helper.c @@ -262,9 +262,9 @@ void drm_gem_cma_describe(struct drm_gem_cma_object *cma_obj, struct seq_file *m if (obj->map_list.map) off = (uint64_t)obj->map_list.hash.key; - seq_printf(m, "%2d (%2d) %08llx %08Zx %p %d", + seq_printf(m, "%2d (%2d) %08llx %pad %p %zd", obj->name, obj->refcount.refcount.counter, - off, cma_obj->paddr, cma_obj->vaddr, obj->size); + off, &cma_obj->paddr, cma_obj->vaddr, obj->size); seq_printf(m, "\n"); } From 1248206dd0384a9141501b39bfcc67053fa97ff5 Mon Sep 17 00:00:00 2001 From: Jon Medhurst Date: Thu, 9 Apr 2015 10:40:50 +0100 Subject: [PATCH 002/277] =?UTF-8?q?netfilter:=20nfnetlink=5Fqueue:=20Fix?= =?UTF-8?q?=20"discards=20=E2=80=98const=E2=80=99=20qualifier"=20warning?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit nfqnl_zcopy can now modify the 'from' sk_buff, so drop the const qualifier and fix build warnings like: net/netfilter/nfnetlink_queue_core.c: In function ‘nfqnl_zcopy’: net/netfilter/nfnetlink_queue_core.c:264:15: warning: passing argument 1 of ‘skb_orphan_frags’ discards ‘const’ qualifier from pointer target type if (unlikely(skb_orphan_frags(from, GFP_ATOMIC))) { Fixes: c5f0c0e75254 ("core, nfqueue, openvswitch: Orphan frags in skb_zerocopy and handle errors") Signed-off-by: Jon Medhurst --- net/netfilter/nfnetlink_queue_core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/netfilter/nfnetlink_queue_core.c b/net/netfilter/nfnetlink_queue_core.c index 2b8199f68785..5497f50af2f0 100644 --- a/net/netfilter/nfnetlink_queue_core.c +++ b/net/netfilter/nfnetlink_queue_core.c @@ -228,7 +228,7 @@ nfqnl_flush(struct nfqnl_instance *queue, nfqnl_cmpfn cmpfn, unsigned long data) } static int -nfqnl_zcopy(struct sk_buff *to, const struct sk_buff *from, int len, int hlen) +nfqnl_zcopy(struct sk_buff *to, struct sk_buff *from, int len, int hlen) { int i, j = 0; int plen = 0; /* length of skb->head fragment */ From 99984c61919c62621c62befb490d820cef213b0e Mon Sep 17 00:00:00 2001 From: Tim Gardner Date: Mon, 13 Oct 2014 15:54:20 -0700 Subject: [PATCH 003/277] scripts/sortextable: suppress warning: `relocs_size' may be used uninitialized In file included from scripts/sortextable.c:194:0: scripts/sortextable.c: In function `main': scripts/sortextable.h:176:3: warning: `relocs_size' may be used uninitialized in this function [-Wmaybe-uninitialized] memset(relocs, 0, relocs_size); ^ scripts/sortextable.h:106:6: note: `relocs_size' was declared here int relocs_size; ^ In file included from scripts/sortextable.c:192:0: scripts/sortextable.h:176:3: warning: `relocs_size' may be used uninitialized in this function [-Wmaybe-uninitialized] memset(relocs, 0, relocs_size); ^ scripts/sortextable.h:106:6: note: `relocs_size' was declared here int relocs_size; ^ gcc 4.9.1 Signed-off-by: Tim Gardner Reviewed-by: Jamie Iles Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds (cherry picked from commit 7cbc0ea79da2cbe70d8da9319895f07f872a3190) Signed-off-by: Jon Medhurst --- scripts/sortextable.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/sortextable.h b/scripts/sortextable.h index f5eb43d42926..3f064799a8c3 100644 --- a/scripts/sortextable.h +++ b/scripts/sortextable.h @@ -101,7 +101,7 @@ do_func(Elf_Ehdr *ehdr, char const *const fname, table_sort_t custom_sort) Elf_Sym *sort_needed_sym; Elf_Shdr *sort_needed_sec; Elf_Rel *relocs = NULL; - int relocs_size; + int relocs_size = 0; uint32_t *sort_done_location; const char *secstrtab; const char *strtab; From 6566ace5507e27c8b8a51c5e89ddfd19506c5f6e Mon Sep 17 00:00:00 2001 From: Martin Walch Date: Thu, 3 Oct 2013 18:35:16 +0200 Subject: [PATCH 004/277] kconfig: fix bug in search results string: use strlen(gstr->s), not gstr->len The struct gstr has a capacity that may differ from the actual string length. However, a string manipulation in the function search_conf made the assumption that it is the same, which led to messing up some search results, especially when the content of the gstr in use had not yet reached at least 63 chars. Signed-off-by: Martin Walch Acked-by: Wang YanQing Acked-by: Benjamin Poirier Reviewed-by: "Yann E. MORIN" Signed-off-by: "Yann E. MORIN" (cherry picked from commit 503c823048e81cc85c0e9d8c297cc70203e335e5) Signed-off-by: Jon Medhurst --- scripts/kconfig/menu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/kconfig/menu.c b/scripts/kconfig/menu.c index fd3f0180e08f..24ad40942586 100644 --- a/scripts/kconfig/menu.c +++ b/scripts/kconfig/menu.c @@ -564,7 +564,7 @@ static void get_prompt_str(struct gstr *r, struct property *prop, for (j = 4; --i >= 0; j += 2) { menu = submenu[i]; if (head && location && menu == location) - jump->offset = r->len - 1; + jump->offset = strlen(r->s); str_printf(r, "%*c-> %s", j, ' ', _(menu_get_prompt(menu))); if (menu->sym) { From 1ae008b6737e92e745b56c2b2fe32c89f7bcc314 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Peter=20K=C3=BCmmel?= Date: Tue, 4 Nov 2014 12:01:59 +0100 Subject: [PATCH 005/277] =?UTF-8?q?kconfig:=20Fix=20warning=20"=E2=80=98ju?= =?UTF-8?q?mp=E2=80=99=20may=20be=20used=20uninitialized"?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Warning: In file included from scripts/kconfig/zconf.tab.c:2537:0: scripts/kconfig/menu.c: In function ‘get_symbol_str’: scripts/kconfig/menu.c:590:18: warning: ‘jump’ may be used uninitialized in this function [-Wmaybe-uninitialized] jump->offset = strlen(r->s); Simplifies the test logic because (head && local) means (jump != 0) and makes GCC happy when checking if the jump pointer was initialized. Signed-off-by: Peter Kümmel Signed-off-by: Michal Marek (cherry picked from commit 2d560306096739e2251329ab5c16059311a151b0) Signed-off-by: Jon Medhurst --- scripts/kconfig/menu.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/kconfig/menu.c b/scripts/kconfig/menu.c index 24ad40942586..d908e5496532 100644 --- a/scripts/kconfig/menu.c +++ b/scripts/kconfig/menu.c @@ -525,7 +525,7 @@ static void get_prompt_str(struct gstr *r, struct property *prop, { int i, j; struct menu *submenu[8], *menu, *location = NULL; - struct jump_key *jump; + struct jump_key *jump = NULL; str_printf(r, _("Prompt: %s\n"), _(prop->text)); menu = prop->menu->parent; @@ -563,7 +563,7 @@ static void get_prompt_str(struct gstr *r, struct property *prop, str_printf(r, _(" Location:\n")); for (j = 4; --i >= 0; j += 2) { menu = submenu[i]; - if (head && location && menu == location) + if (jump && menu == location) jump->offset = strlen(r->s); str_printf(r, "%*c-> %s", j, ' ', _(menu_get_prompt(menu))); From b4ef43d5de3b44a2d98198d5d9b66d9fc97b2924 Mon Sep 17 00:00:00 2001 From: Jon Medhurst Date: Thu, 9 Apr 2015 11:30:36 +0100 Subject: [PATCH 006/277] configs: Remove duplicate CONFIG_FUNCTION_TRACER To avoid the following warning when running merge_config.sh with this config fragment... ./.tmp.config.37TtCkZvtp:92:warning: override: reassigning to symbol FUNCTION_TRACER Signed-off-by: Jon Medhurst --- linaro/configs/linaro-base.conf | 1 - 1 file changed, 1 deletion(-) diff --git a/linaro/configs/linaro-base.conf b/linaro/configs/linaro-base.conf index 15f6ea784f63..c2a5143518fb 100644 --- a/linaro/configs/linaro-base.conf +++ b/linaro/configs/linaro-base.conf @@ -98,7 +98,6 @@ CONFIG_KGDB=y CONFIG_KGDB_TESTS=y CONFIG_OF_IDLE_STATES=y CONFIG_FTRACE=y -CONFIG_FUNCTION_TRACER=y CONFIG_FTRACE_SYSCALLS=y CONFIG_STACK_TRACER=y CONFIG_FUNCTION_PROFILER=y From 6902d1f6c261ed7fe256f75611bc7125a160b621 Mon Sep 17 00:00:00 2001 From: "rahul.khandelwal" Date: Fri, 17 Apr 2015 11:45:23 +0530 Subject: [PATCH 007/277] subsystem: CPU FREQUENCY DRIVERS- Set cpu_load calculation on current frequency In timer, cpu_load is calcuated on target_freq. cpu_load = loadadjfreq / pcpu->target_freq; But cpu is actually running on current freq i.e. pcpu->policy->cur. So cpu_load should be calculated on current frequency. cpu_load = loadadjfreq / pcpu->policy->cur; Change-Id: I89db6b68e9f82aa52077f6bf7d819dab74265790 Signed-off-by: rahul.khandelwal --- drivers/cpufreq/cpufreq_interactive.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/cpufreq/cpufreq_interactive.c b/drivers/cpufreq/cpufreq_interactive.c index 0d4a4f30efab..0654e4043558 100644 --- a/drivers/cpufreq/cpufreq_interactive.c +++ b/drivers/cpufreq/cpufreq_interactive.c @@ -403,7 +403,7 @@ static void cpufreq_interactive_timer(unsigned long data) spin_lock_irqsave(&pcpu->target_freq_lock, flags); do_div(cputime_speedadj, delta_time); loadadjfreq = (unsigned int)cputime_speedadj * 100; - cpu_load = loadadjfreq / pcpu->target_freq; + cpu_load = loadadjfreq / pcpu->policy->cur; tunables->boosted = tunables->boost_val || now < tunables->boostpulse_endtime; if (cpu_load >= tunables->go_hispeed_load || tunables->boosted) { From 8f21510a905be77dca0c4e7f068e5338d97f6162 Mon Sep 17 00:00:00 2001 From: Hui Wang Date: Thu, 26 Mar 2015 17:14:55 +0800 Subject: [PATCH 008/277] ALSA: hda - Add one more node in the EAPD supporting candidate list commit af95b41426e0b58279f8ff0ebe420df49a4e96b8 upstream. We have a HP machine which use the codec node 0x17 connecting the internal speaker, and from the node capability, we saw the EAPD, if we don't set the EAPD on for this node, the internal speaker can't output any sound. BugLink: https://bugs.launchpad.net/bugs/1436745 Signed-off-by: Hui Wang Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_realtek.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 4008034b6ebe..4ae22e36156c 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -266,7 +266,7 @@ static void alc_auto_setup_eapd(struct hda_codec *codec, bool on) { /* We currently only handle front, HP */ static hda_nid_t pins[] = { - 0x0f, 0x10, 0x14, 0x15, 0 + 0x0f, 0x10, 0x14, 0x15, 0x17, 0 }; hda_nid_t *p; for (p = pins; *p; p++) From 082bf2f8175c59b967ddd4cbffa09b0529e1d742 Mon Sep 17 00:00:00 2001 From: "Dmitry M. Fedin" Date: Thu, 9 Apr 2015 17:37:03 +0300 Subject: [PATCH 009/277] ALSA: usb - Creative USB X-Fi Pro SB1095 volume knob support commit 3dc8523fa7412e731441c01fb33f003eb3cfece1 upstream. Adds an entry for Creative USB X-Fi to the rc_config array in mixer_quirks.c to allow use of volume knob on the device. Adds support for newer X-Fi Pro card, known as "Model No. SB1095" with USB ID "041e:3237" Signed-off-by: Dmitry M. Fedin Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/usb/mixer_quirks.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/usb/mixer_quirks.c b/sound/usb/mixer_quirks.c index c89a5bf5c00e..c311681bd390 100644 --- a/sound/usb/mixer_quirks.c +++ b/sound/usb/mixer_quirks.c @@ -175,6 +175,7 @@ static const struct rc_config { { USB_ID(0x041e, 0x3040), 2, 2, 6, 6, 2, 0x6e91 }, /* Live! 24-bit */ { USB_ID(0x041e, 0x3042), 0, 1, 1, 1, 1, 0x000d }, /* Usb X-Fi S51 */ { USB_ID(0x041e, 0x30df), 0, 1, 1, 1, 1, 0x000d }, /* Usb X-Fi S51 Pro */ + { USB_ID(0x041e, 0x3237), 0, 1, 1, 1, 1, 0x000d }, /* Usb X-Fi S51 Pro */ { USB_ID(0x041e, 0x3048), 2, 2, 6, 6, 2, 0x6e91 }, /* Toshiba SB0500 */ }; From a13a3624298bbcb80c6ba46fbf7e2213a3d41013 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Wed, 8 Apr 2015 20:47:55 +0200 Subject: [PATCH 010/277] ALSA: hda - Fix headphone pin config for Lifebook T731 commit cc7016ab1a22fb26f388c2fb2b692b89897cbc3e upstream. Some BIOS version of Fujitsu Lifebook T731 seems to set up the headphone pin (0x21) without the assoc number 0x0f while it's set only to the output on the docking port (0x1a). With the recent commit [03ad6a8c93b6: ALSA: hda - Fix "PCM" name being used on one DAC when there are two DACs], this resulted in the weird mixer element mapping where the headphone on the laptop is assigned as a shared volume with the speaker and the docking port is assigned as an individual headphone. This patch improves the situation by correcting the headphone pin config to the more appropriate value. Reported-and-tested-by: Taylor Smock Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_realtek.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 4ae22e36156c..a8eb7fe2766e 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -3363,6 +3363,7 @@ enum { ALC269_FIXUP_QUANTA_MUTE, ALC269_FIXUP_LIFEBOOK, ALC269_FIXUP_LIFEBOOK_EXTMIC, + ALC269_FIXUP_LIFEBOOK_HP_PIN, ALC269_FIXUP_AMIC, ALC269_FIXUP_DMIC, ALC269VB_FIXUP_AMIC, @@ -3477,6 +3478,13 @@ static const struct hda_fixup alc269_fixups[] = { { } }, }, + [ALC269_FIXUP_LIFEBOOK_HP_PIN] = { + .type = HDA_FIXUP_PINS, + .v.pins = (const struct hda_pintbl[]) { + { 0x21, 0x0221102f }, /* HP out */ + { } + }, + }, [ALC269_FIXUP_AMIC] = { .type = HDA_FIXUP_PINS, .v.pins = (const struct hda_pintbl[]) { @@ -3727,6 +3735,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1025, 0x0742, "Acer AO756", ALC271_FIXUP_HP_GATE_MIC_JACK), SND_PCI_QUIRK_VENDOR(0x1025, "Acer Aspire", ALC271_FIXUP_DMIC), SND_PCI_QUIRK(0x10cf, 0x1475, "Lifebook", ALC269_FIXUP_LIFEBOOK), + SND_PCI_QUIRK(0x10cf, 0x15dc, "Lifebook T731", ALC269_FIXUP_LIFEBOOK_HP_PIN), SND_PCI_QUIRK(0x10cf, 0x1845, "Lifebook U904", ALC269_FIXUP_LIFEBOOK_EXTMIC), SND_PCI_QUIRK(0x17aa, 0x20f2, "Thinkpad SL410/510", ALC269_FIXUP_SKU_IGNORE), SND_PCI_QUIRK(0x17aa, 0x215e, "Thinkpad L512", ALC269_FIXUP_SKU_IGNORE), From 9dece36273f91a8e7467b7eadbe319fa924eec83 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Mon, 23 Mar 2015 18:01:35 -0700 Subject: [PATCH 011/277] selinux: fix sel_write_enforce broken return value commit 6436a123a147db51a0b06024a8350f4c230e73ff upstream. Return a negative error value like the rest of the entries in this function. Signed-off-by: Joe Perches Acked-by: Stephen Smalley [PM: tweaked subject line] Signed-off-by: Paul Moore Signed-off-by: Greg Kroah-Hartman --- security/selinux/selinuxfs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/security/selinux/selinuxfs.c b/security/selinux/selinuxfs.c index ff427733c290..464be51025f6 100644 --- a/security/selinux/selinuxfs.c +++ b/security/selinux/selinuxfs.c @@ -150,7 +150,7 @@ static ssize_t sel_write_enforce(struct file *file, const char __user *buf, goto out; /* No partial writes. */ - length = EINVAL; + length = -EINVAL; if (*ppos != 0) goto out; From 75a518b09f0939ad5d5bf12e91b9faec12c66d10 Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Wed, 15 Apr 2015 19:00:32 +0100 Subject: [PATCH 012/277] tcp: Fix crash in TCP Fast Open Commit 355a901e6cf1 ("tcp: make connect() mem charging friendly") changed tcp_send_syn_data() to perform an open-coded copy of the 'syn' skb rather than using skb_copy_expand(). The open-coded copy does not cover the skb_shared_info::gso_segs field, so in the new skb it is left set to 0. When this commit was backported into stable branches between 3.10.y and 3.16.7-ckty inclusive, it triggered the BUG() in tcp_transmit_skb(). Since Linux 3.18 the GSO segment count is kept in the tcp_skb_cb::tcp_gso_segs field and tcp_send_syn_data() does copy the tcp_skb_cb structure to the new skb, so mainline and newer stable branches are not affected. Set skb_shared_info::gso_segs to the correct value of 1. Signed-off-by: Ben Hutchings Acked-by: Eric Dumazet Signed-off-by: Greg Kroah-Hartman --- net/ipv4/tcp_output.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 913dc4f49b10..92b5e1f7d3b0 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -2909,6 +2909,7 @@ static int tcp_send_syn_data(struct sock *sk, struct sk_buff *syn) goto fallback; syn_data->ip_summed = CHECKSUM_PARTIAL; memcpy(syn_data->cb, syn->cb, sizeof(syn->cb)); + skb_shinfo(syn_data)->gso_segs = 1; if (unlikely(memcpy_fromiovecend(skb_put(syn_data, space), fo->data->msg_iov, 0, space))) { kfree_skb(syn_data); From 3af9e9334102d7436bb509c8d3d99c695190c58f Mon Sep 17 00:00:00 2001 From: Eli Cohen Date: Sun, 14 Sep 2014 16:47:52 +0300 Subject: [PATCH 013/277] IB/core: Avoid leakage from kernel to user space commit 377b513485fd885dea1083a9a5430df65b35e048 upstream. Clear the reserved field of struct ib_uverbs_async_event_desc which is copied to user space. Signed-off-by: Eli Cohen Reviewed-by: Yann Droneaud Signed-off-by: Roland Dreier Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/core/uverbs_main.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c index 2c6f0f2ecd9d..949b38633496 100644 --- a/drivers/infiniband/core/uverbs_main.c +++ b/drivers/infiniband/core/uverbs_main.c @@ -460,6 +460,7 @@ static void ib_uverbs_async_handler(struct ib_uverbs_file *file, entry->desc.async.element = element; entry->desc.async.event_type = event; + entry->desc.async.reserved = 0; entry->counter = counter; list_add_tail(&entry->list, &file->async_file->event_list); From 0cfcc3250e9e571ef79627850430e1fda55f4cad Mon Sep 17 00:00:00 2001 From: Shachar Raindel Date: Wed, 18 Mar 2015 17:39:08 +0000 Subject: [PATCH 014/277] IB/uverbs: Prevent integer overflow in ib_umem_get address arithmetic commit 8494057ab5e40df590ef6ef7d66324d3ae33356b upstream. Properly verify that the resulting page aligned end address is larger than both the start address and the length of the memory area requested. Both the start and length arguments for ib_umem_get are controlled by the user. A misbehaving user can provide values which will cause an integer overflow when calculating the page aligned end address. This overflow can cause also miscalculation of the number of pages mapped, and additional logic issues. Addresses: CVE-2014-8159 Signed-off-by: Shachar Raindel Signed-off-by: Jack Morgenstein Signed-off-by: Or Gerlitz Signed-off-by: Roland Dreier Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/core/umem.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/infiniband/core/umem.c b/drivers/infiniband/core/umem.c index a84112322071..055ebebc07dd 100644 --- a/drivers/infiniband/core/umem.c +++ b/drivers/infiniband/core/umem.c @@ -94,6 +94,14 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr, if (dmasync) dma_set_attr(DMA_ATTR_WRITE_BARRIER, &attrs); + /* + * If the combination of the addr and size requested for this memory + * region causes an integer overflow, return error. + */ + if ((PAGE_ALIGN(addr + size) <= size) || + (PAGE_ALIGN(addr + size) <= addr)) + return ERR_PTR(-EINVAL); + if (!can_do_mlock()) return ERR_PTR(-EPERM); From a0688f524c6dcd43e5dff5f57f46e962e4c44548 Mon Sep 17 00:00:00 2001 From: Emmanuel Grumbach Date: Mon, 16 Mar 2015 09:08:07 +0200 Subject: [PATCH 015/277] iwlwifi: dvm: run INIT firmware again upon .start() commit 9c8928f5176766bec79f272bd47b7124e11cccbd upstream. The assumption before this patch was that we don't need to run again the INIT firmware after the system booted. The INIT firmware runs calibrations which impact the physical layer's behavior. Users reported that it may be helpful to run these calibrations again every time the interface is brought up. The penatly is minimal, since the calibrations run fast. This fixes: https://bugzilla.kernel.org/show_bug.cgi?id=94341 Signed-off-by: Emmanuel Grumbach Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/iwlwifi/dvm/dev.h | 1 - drivers/net/wireless/iwlwifi/dvm/ucode.c | 5 ----- 2 files changed, 6 deletions(-) diff --git a/drivers/net/wireless/iwlwifi/dvm/dev.h b/drivers/net/wireless/iwlwifi/dvm/dev.h index 71ea77576d22..e783ea0e3837 100644 --- a/drivers/net/wireless/iwlwifi/dvm/dev.h +++ b/drivers/net/wireless/iwlwifi/dvm/dev.h @@ -670,7 +670,6 @@ struct iwl_priv { unsigned long reload_jiffies; int reload_count; bool ucode_loaded; - bool init_ucode_run; /* Don't run init uCode again */ u8 plcp_delta_threshold; diff --git a/drivers/net/wireless/iwlwifi/dvm/ucode.c b/drivers/net/wireless/iwlwifi/dvm/ucode.c index 0a1cdc5e856b..5ad94a8080b8 100644 --- a/drivers/net/wireless/iwlwifi/dvm/ucode.c +++ b/drivers/net/wireless/iwlwifi/dvm/ucode.c @@ -425,9 +425,6 @@ int iwl_run_init_ucode(struct iwl_priv *priv) if (!priv->fw->img[IWL_UCODE_INIT].sec[0].len) return 0; - if (priv->init_ucode_run) - return 0; - iwl_init_notification_wait(&priv->notif_wait, &calib_wait, calib_complete, ARRAY_SIZE(calib_complete), iwlagn_wait_calib, priv); @@ -447,8 +444,6 @@ int iwl_run_init_ucode(struct iwl_priv *priv) */ ret = iwl_wait_notification(&priv->notif_wait, &calib_wait, UCODE_CALIB_TIMEOUT); - if (!ret) - priv->init_ucode_run = true; goto out; From ef6b5eaddd79fd6515c4eaf7506839bd9c60921c Mon Sep 17 00:00:00 2001 From: Sudip Mukherjee Date: Tue, 27 Jan 2015 18:08:22 +0530 Subject: [PATCH 016/277] nbd: fix possible memory leak commit ff6b8090e26ef7649ef0cc6b42389141ef48b0cf upstream. we have already allocated memory for nbd_dev, but we were not releasing that memory and just returning the error value. Signed-off-by: Sudip Mukherjee Acked-by: Paul Clements Signed-off-by: Markus Pargmann Signed-off-by: Greg Kroah-Hartman --- drivers/block/nbd.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c index cf1576d54363..a5c987ae665d 100644 --- a/drivers/block/nbd.c +++ b/drivers/block/nbd.c @@ -815,10 +815,6 @@ static int __init nbd_init(void) return -EINVAL; } - nbd_dev = kcalloc(nbds_max, sizeof(*nbd_dev), GFP_KERNEL); - if (!nbd_dev) - return -ENOMEM; - part_shift = 0; if (max_part > 0) { part_shift = fls(max_part); @@ -840,6 +836,10 @@ static int __init nbd_init(void) if (nbds_max > 1UL << (MINORBITS - part_shift)) return -EINVAL; + nbd_dev = kcalloc(nbds_max, sizeof(*nbd_dev), GFP_KERNEL); + if (!nbd_dev) + return -ENOMEM; + for (i = 0; i < nbds_max; i++) { struct gendisk *disk = alloc_disk(1 << part_shift); if (!disk) From dfb06c85571e687aee393dfb72222e3eeec24813 Mon Sep 17 00:00:00 2001 From: Gu Zheng Date: Wed, 25 Mar 2015 15:55:20 -0700 Subject: [PATCH 017/277] mm/memory hotplug: postpone the reset of obsolete pgdat commit b0dc3a342af36f95a68fe229b8f0f73552c5ca08 upstream. Qiu Xishi reported the following BUG when testing hot-add/hot-remove node under stress condition: BUG: unable to handle kernel paging request at 0000000000025f60 IP: next_online_pgdat+0x1/0x50 PGD 0 Oops: 0000 [#1] SMP ACPI: Device does not support D3cold Modules linked in: fuse nls_iso8859_1 nls_cp437 vfat fat loop dm_mod coretemp mperf crc32c_intel ghash_clmulni_intel aesni_intel ablk_helper cryptd lrw gf128mul glue_helper aes_x86_64 pcspkr microcode igb dca i2c_algo_bit ipv6 megaraid_sas iTCO_wdt i2c_i801 i2c_core iTCO_vendor_support tg3 sg hwmon ptp lpc_ich pps_core mfd_core acpi_pad rtc_cmos button ext3 jbd mbcache sd_mod crc_t10dif scsi_dh_alua scsi_dh_rdac scsi_dh_hp_sw scsi_dh_emc scsi_dh ahci libahci libata scsi_mod [last unloaded: rasf] CPU: 23 PID: 238 Comm: kworker/23:1 Tainted: G O 3.10.15-5885-euler0302 #1 Hardware name: HUAWEI TECHNOLOGIES CO.,LTD. Huawei N1/Huawei N1, BIOS V100R001 03/02/2015 Workqueue: events vmstat_update task: ffffa800d32c0000 ti: ffffa800d32ae000 task.ti: ffffa800d32ae000 RIP: 0010: next_online_pgdat+0x1/0x50 RSP: 0018:ffffa800d32afce8 EFLAGS: 00010286 RAX: 0000000000001440 RBX: ffffffff81da53b8 RCX: 0000000000000082 RDX: 0000000000000000 RSI: 0000000000000082 RDI: 0000000000000000 RBP: ffffa800d32afd28 R08: ffffffff81c93bfc R09: ffffffff81cbdc96 R10: 00000000000040ec R11: 00000000000000a0 R12: ffffa800fffb3440 R13: ffffa800d32afd38 R14: 0000000000000017 R15: ffffa800e6616800 FS: 0000000000000000(0000) GS:ffffa800e6600000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000025f60 CR3: 0000000001a0b000 CR4: 00000000001407e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: refresh_cpu_vm_stats+0xd0/0x140 vmstat_update+0x11/0x50 process_one_work+0x194/0x3d0 worker_thread+0x12b/0x410 kthread+0xc6/0xd0 ret_from_fork+0x7c/0xb0 The cause is the "memset(pgdat, 0, sizeof(*pgdat))" at the end of try_offline_node, which will reset all the content of pgdat to 0, as the pgdat is accessed lock-free, so that the users still using the pgdat will panic, such as the vmstat_update routine. process A: offline node XX: vmstat_updat() refresh_cpu_vm_stats() for_each_populated_zone() find online node XX cond_resched() offline cpu and memory, then try_offline_node() node_set_offline(nid), and memset(pgdat, 0, sizeof(*pgdat)) zone = next_zone(zone) pg_data_t *pgdat = zone->zone_pgdat; // here pgdat is NULL now next_online_pgdat(pgdat) next_online_node(pgdat->node_id); // NULL pointer access So the solution here is postponing the reset of obsolete pgdat from try_offline_node() to hotadd_new_pgdat(), and just resetting pgdat->nr_zones and pgdat->classzone_idx to be 0 rather than the memset 0 to avoid breaking pointer information in pgdat. Signed-off-by: Gu Zheng Reported-by: Xishi Qiu Suggested-by: KAMEZAWA Hiroyuki Cc: David Rientjes Cc: Yasuaki Ishimatsu Cc: Taku Izumi Cc: Tang Chen Cc: Xie XiuQi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- mm/memory_hotplug.c | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index 1ad92b46753e..2298237db142 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -1039,6 +1039,10 @@ static pg_data_t __ref *hotadd_new_pgdat(int nid, u64 start) return NULL; arch_refresh_nodedata(nid, pgdat); + } else { + /* Reset the nr_zones and classzone_idx to 0 before reuse */ + pgdat->nr_zones = 0; + pgdat->classzone_idx = 0; } /* we can use NODE_DATA(nid) from here */ @@ -1802,15 +1806,6 @@ void try_offline_node(int nid) if (is_vmalloc_addr(zone->wait_table)) vfree(zone->wait_table); } - - /* - * Since there is no way to guarentee the address of pgdat/zone is not - * on stack of any kernel threads or used by other kernel objects - * without reference counting or other symchronizing method, do not - * reset node_data and free pgdat here. Just reset it to 0 and reuse - * the memory when the node is online again. - */ - memset(pgdat, 0, sizeof(*pgdat)); } EXPORT_SYMBOL(try_offline_node); From f16678367dae3cd3da3fd0c93b13bad7ad8d301b Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 4 Mar 2015 10:37:43 -0500 Subject: [PATCH 018/277] writeback: add missing INITIAL_JIFFIES init in global_update_bandwidth() commit 7d70e15480c0450d2bfafaad338a32e884fc215e upstream. global_update_bandwidth() uses static variable update_time as the timestamp for the last update but forgets to initialize it to INITIALIZE_JIFFIES. This means that global_dirty_limit will be 5 mins into the future on 32bit and some large amount jiffies into the past on 64bit. This isn't critical as the only effect is that global_dirty_limit won't be updated for the first 5 mins after booting on 32bit machines, especially given the auxiliary nature of global_dirty_limit's role - protecting against global dirty threshold's sudden dips; however, it does lead to unintended suboptimal behavior. Fix it. Fixes: c42843f2f0bb ("writeback: introduce smoothed global dirty limit") Signed-off-by: Tejun Heo Acked-by: Jan Kara Cc: Wu Fengguang Cc: Jens Axboe Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman --- mm/page-writeback.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/page-writeback.c b/mm/page-writeback.c index 73cbc5dc150b..d42a8a83aaf1 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -858,7 +858,7 @@ static void global_update_bandwidth(unsigned long thresh, unsigned long now) { static DEFINE_SPINLOCK(dirty_lock); - static unsigned long update_time; + static unsigned long update_time = INITIAL_JIFFIES; /* * check locklessly first to optimize away locking for the most time From e58126f57083a3160c2883d7cf22c38ed1a75f58 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 23 Mar 2015 00:18:48 -0400 Subject: [PATCH 019/277] writeback: fix possible underflow in write bandwidth calculation commit c72efb658f7c8b27ca3d0efb5cfd5ded9fcac89e upstream. From 1ebf33901ecc75d9496862dceb1ef0377980587c Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 23 Mar 2015 00:08:19 -0400 2f800fbd777b ("writeback: fix dirtied pages accounting on redirty") introduced account_page_redirty() which reverts stat updates for a redirtied page, making BDI_DIRTIED no longer monotonically increasing. bdi_update_write_bandwidth() uses the delta in BDI_DIRTIED as the basis for bandwidth calculation. While unlikely, since the above patch, the newer value may be lower than the recorded past value and underflow the bandwidth calculation leading to a wild result. Fix it by subtracing min of the old and new values when calculating delta. AFAIK, there hasn't been any report of it happening but the resulting erratic behavior would be non-critical and temporary, so it's possible that the issue is happening without being reported. The risk of the fix is very low, so tagged for -stable. Signed-off-by: Tejun Heo Cc: Jens Axboe Cc: Jan Kara Cc: Wu Fengguang Cc: Greg Thelen Fixes: 2f800fbd777b ("writeback: fix dirtied pages accounting on redirty") Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman --- mm/page-writeback.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/mm/page-writeback.c b/mm/page-writeback.c index d42a8a83aaf1..b034f79deb0e 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -793,8 +793,11 @@ static void bdi_update_write_bandwidth(struct backing_dev_info *bdi, * bw * elapsed + write_bandwidth * (period - elapsed) * write_bandwidth = --------------------------------------------------- * period + * + * @written may have decreased due to account_page_redirty(). + * Avoid underflowing @bw calculation. */ - bw = written - bdi->written_stamp; + bw = written - min(written, bdi->written_stamp); bw *= HZ; if (unlikely(elapsed > period)) { do_div(bw, elapsed); From 9e79894da599f54425592a1ea2d5dfb4397b7367 Mon Sep 17 00:00:00 2001 From: David Miller Date: Wed, 18 Mar 2015 23:18:40 -0400 Subject: [PATCH 020/277] radeon: Do not directly dereference pointers to BIOS area. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit f2c9e560b406f2f6b14b345c7da33467dee9cdf2 upstream. Use readb() and memcpy_fromio() accessors instead. Reviewed-by: Christian König Signed-off-by: David S. Miller Signed-off-by: Alex Deucher Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/radeon/radeon_bios.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/radeon/radeon_bios.c b/drivers/gpu/drm/radeon/radeon_bios.c index b131520521e4..72b02483ff03 100644 --- a/drivers/gpu/drm/radeon/radeon_bios.c +++ b/drivers/gpu/drm/radeon/radeon_bios.c @@ -76,7 +76,7 @@ static bool igp_read_bios_from_vram(struct radeon_device *rdev) static bool radeon_read_bios(struct radeon_device *rdev) { - uint8_t __iomem *bios; + uint8_t __iomem *bios, val1, val2; size_t size; rdev->bios = NULL; @@ -86,15 +86,19 @@ static bool radeon_read_bios(struct radeon_device *rdev) return false; } - if (size == 0 || bios[0] != 0x55 || bios[1] != 0xaa) { + val1 = readb(&bios[0]); + val2 = readb(&bios[1]); + + if (size == 0 || val1 != 0x55 || val2 != 0xaa) { pci_unmap_rom(rdev->pdev, bios); return false; } - rdev->bios = kmemdup(bios, size, GFP_KERNEL); + rdev->bios = kzalloc(size, GFP_KERNEL); if (rdev->bios == NULL) { pci_unmap_rom(rdev->pdev, bios); return false; } + memcpy_fromio(rdev->bios, bios, size); pci_unmap_rom(rdev->pdev, bios); return true; } From 4ae961697ec5c240d3c1e0ba29b75e05800550d0 Mon Sep 17 00:00:00 2001 From: Doug Goldstein Date: Sun, 15 Mar 2015 21:56:04 -0500 Subject: [PATCH 021/277] USB: ftdi_sio: Added custom PID for Synapse Wireless product commit 4899c054a90439477b24da8977db8d738376fe90 upstream. Synapse Wireless uses the FTDI VID with a custom PID of 0x9090 for their SNAP Stick 200 product. Signed-off-by: Doug Goldstein Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/ftdi_sio.c | 1 + drivers/usb/serial/ftdi_sio_ids.h | 6 ++++++ 2 files changed, 7 insertions(+) diff --git a/drivers/usb/serial/ftdi_sio.c b/drivers/usb/serial/ftdi_sio.c index cf127a080644..2f2b23cc5cd5 100644 --- a/drivers/usb/serial/ftdi_sio.c +++ b/drivers/usb/serial/ftdi_sio.c @@ -620,6 +620,7 @@ static struct usb_device_id id_table_combined [] = { .driver_info = (kernel_ulong_t)&ftdi_jtag_quirk }, { USB_DEVICE(FTDI_VID, FTDI_NT_ORIONLXM_PID), .driver_info = (kernel_ulong_t)&ftdi_jtag_quirk }, + { USB_DEVICE(FTDI_VID, FTDI_SYNAPSE_SS200_PID) }, /* * ELV devices: */ diff --git a/drivers/usb/serial/ftdi_sio_ids.h b/drivers/usb/serial/ftdi_sio_ids.h index e8d352615297..e906b6aa2424 100644 --- a/drivers/usb/serial/ftdi_sio_ids.h +++ b/drivers/usb/serial/ftdi_sio_ids.h @@ -561,6 +561,12 @@ */ #define FTDI_NT_ORIONLXM_PID 0x7c90 /* OrionLXm Substation Automation Platform */ +/* + * Synapse Wireless product ids (FTDI_VID) + * http://www.synapse-wireless.com + */ +#define FTDI_SYNAPSE_SS200_PID 0x9090 /* SS200 - SNAP Stick 200 */ + /********************************/ /** third-party VID/PID combos **/ From 18c9e01df5c59c6a9dc70d3427c4e34080610e2d Mon Sep 17 00:00:00 2001 From: Doug Goldstein Date: Mon, 23 Mar 2015 20:34:48 -0500 Subject: [PATCH 022/277] USB: ftdi_sio: Use jtag quirk for SNAP Connect E10 commit b229a0f840f774d29d8fedbf5deb344ca36b7f1a upstream. This patch uses the existing CALAO Systems ftdi_8u2232c_probe in order to avoid attaching a TTY to the JTAG port as this board is based on the CALAO Systems reference design and needs the same fix up. Signed-off-by: Doug Goldstein [johan: clean up probe logic ] Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/ftdi_sio.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/usb/serial/ftdi_sio.c b/drivers/usb/serial/ftdi_sio.c index 2f2b23cc5cd5..4d918d5f945a 100644 --- a/drivers/usb/serial/ftdi_sio.c +++ b/drivers/usb/serial/ftdi_sio.c @@ -1900,8 +1900,12 @@ static int ftdi_8u2232c_probe(struct usb_serial *serial) { struct usb_device *udev = serial->dev; - if ((udev->manufacturer && !strcmp(udev->manufacturer, "CALAO Systems")) || - (udev->product && !strcmp(udev->product, "BeagleBone/XDS100V2"))) + if (udev->manufacturer && !strcmp(udev->manufacturer, "CALAO Systems")) + return ftdi_jtag_probe(serial); + + if (udev->product && + (!strcmp(udev->product, "BeagleBone/XDS100V2") || + !strcmp(udev->product, "SNAP Connect E10"))) return ftdi_jtag_probe(serial); return 0; From 3e01cca39c3eef60046d0dc922bfe1a275a18f51 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Wed, 4 Mar 2015 10:31:47 +0100 Subject: [PATCH 023/277] Defer processing of REQ_PREEMPT requests for blocked devices commit bba0bdd7ad4713d82338bcd9b72d57e9335a664b upstream. SCSI transport drivers and SCSI LLDs block a SCSI device if the transport layer is not operational. This means that in this state no requests should be processed, even if the REQ_PREEMPT flag has been set. This patch avoids that a rescan shortly after a cable pull sporadically triggers the following kernel oops: BUG: unable to handle kernel paging request at ffffc9001a6bc084 IP: [] mlx4_ib_post_send+0xd2/0xb30 [mlx4_ib] Process rescan-scsi-bus (pid: 9241, threadinfo ffff88053484a000, task ffff880534aae100) Call Trace: [] srp_post_send+0x65/0x70 [ib_srp] [] srp_queuecommand+0x1cf/0x3e0 [ib_srp] [] scsi_dispatch_cmd+0x101/0x280 [scsi_mod] [] scsi_request_fn+0x411/0x4d0 [scsi_mod] [] __blk_run_queue+0x27/0x30 [] blk_execute_rq_nowait+0x82/0x110 [] blk_execute_rq+0x62/0xf0 [] scsi_execute+0xe8/0x190 [scsi_mod] [] scsi_execute_req+0xa3/0x130 [scsi_mod] [] scsi_probe_lun+0x17a/0x450 [scsi_mod] [] scsi_probe_and_add_lun+0x156/0x480 [scsi_mod] [] __scsi_scan_target+0xdf/0x1f0 [scsi_mod] [] scsi_scan_host_selected+0x183/0x1c0 [scsi_mod] [] scsi_scan+0xdb/0xe0 [scsi_mod] [] store_scan+0x13/0x20 [scsi_mod] [] sysfs_write_file+0xcb/0x160 [] vfs_write+0xce/0x140 [] sys_write+0x53/0xa0 [] system_call_fastpath+0x16/0x1b [<00007f611c9d9300>] 0x7f611c9d92ff Reported-by: Max Gurtuvoy Signed-off-by: Bart Van Assche Reviewed-by: Mike Christie Signed-off-by: James Bottomley Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/scsi_lib.c | 4 +++- include/linux/blk_types.h | 4 +++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c index e5953c8018c5..9f3168e8e5a8 100644 --- a/drivers/scsi/scsi_lib.c +++ b/drivers/scsi/scsi_lib.c @@ -1242,9 +1242,11 @@ int scsi_prep_state_check(struct scsi_device *sdev, struct request *req) "rejecting I/O to dead device\n"); ret = BLKPREP_KILL; break; - case SDEV_QUIESCE: case SDEV_BLOCK: case SDEV_CREATED_BLOCK: + ret = BLKPREP_DEFER; + break; + case SDEV_QUIESCE: /* * If the devices is blocked we defer normal commands. */ diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index fa1abeb45b76..49c48dda162d 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -170,7 +170,9 @@ enum rq_flag_bits { __REQ_ELVPRIV, /* elevator private data attached */ __REQ_FAILED, /* set if the request failed */ __REQ_QUIET, /* don't worry about errors */ - __REQ_PREEMPT, /* set for "ide_preempt" requests */ + __REQ_PREEMPT, /* set for "ide_preempt" requests and also + for requests for which the SCSI "quiesce" + state must be ignored. */ __REQ_ALLOCED, /* request came from our alloc pool */ __REQ_COPY_USER, /* contains copies of user pages */ __REQ_FLUSH_SEQ, /* request for flush sequence */ From 9c28f1ed67be192f73a679b53d2306db1459b444 Mon Sep 17 00:00:00 2001 From: Viorel Suman Date: Wed, 18 Feb 2015 20:05:21 +0200 Subject: [PATCH 024/277] iio: inv_mpu6050: Clear timestamps fifo while resetting hardware fifo commit 4dac0a8eefd55bb1f157d1a5a084531334a2d74c upstream. A hardware fifo reset always imply an invalidation of the existing timestamps, so we'll clear timestamps fifo on successfull hardware fifo reset. Signed-off-by: Viorel Suman Signed-off-by: Jonathan Cameron Signed-off-by: Greg Kroah-Hartman --- drivers/iio/imu/inv_mpu6050/inv_mpu_ring.c | 25 ++++++++++++---------- 1 file changed, 14 insertions(+), 11 deletions(-) diff --git a/drivers/iio/imu/inv_mpu6050/inv_mpu_ring.c b/drivers/iio/imu/inv_mpu6050/inv_mpu_ring.c index 7da0832f187b..01d661e0fa6c 100644 --- a/drivers/iio/imu/inv_mpu6050/inv_mpu_ring.c +++ b/drivers/iio/imu/inv_mpu6050/inv_mpu_ring.c @@ -25,6 +25,16 @@ #include #include "inv_mpu_iio.h" +static void inv_clear_kfifo(struct inv_mpu6050_state *st) +{ + unsigned long flags; + + /* take the spin lock sem to avoid interrupt kick in */ + spin_lock_irqsave(&st->time_stamp_lock, flags); + kfifo_reset(&st->timestamps); + spin_unlock_irqrestore(&st->time_stamp_lock, flags); +} + int inv_reset_fifo(struct iio_dev *indio_dev) { int result; @@ -51,6 +61,10 @@ int inv_reset_fifo(struct iio_dev *indio_dev) INV_MPU6050_BIT_FIFO_RST); if (result) goto reset_fifo_fail; + + /* clear timestamps fifo */ + inv_clear_kfifo(st); + /* enable interrupt */ if (st->chip_config.accl_fifo_enable || st->chip_config.gyro_fifo_enable) { @@ -84,16 +98,6 @@ int inv_reset_fifo(struct iio_dev *indio_dev) return result; } -static void inv_clear_kfifo(struct inv_mpu6050_state *st) -{ - unsigned long flags; - - /* take the spin lock sem to avoid interrupt kick in */ - spin_lock_irqsave(&st->time_stamp_lock, flags); - kfifo_reset(&st->timestamps); - spin_unlock_irqrestore(&st->time_stamp_lock, flags); -} - /** * inv_mpu6050_irq_handler() - Cache a timestamp at each data ready interrupt. */ @@ -187,7 +191,6 @@ irqreturn_t inv_mpu6050_read_fifo(int irq, void *p) flush_fifo: /* Flush HW and SW FIFOs. */ inv_reset_fifo(indio_dev); - inv_clear_kfifo(st); mutex_unlock(&indio_dev->mlock); iio_trigger_notify_done(indio_dev->trig); From 0ef5fdbb2cd17f88372c1bb7f1f45266ee557432 Mon Sep 17 00:00:00 2001 From: Darshana Padmadas Date: Sat, 28 Mar 2015 12:07:14 +0530 Subject: [PATCH 025/277] iio: imu: Use iio_trigger_get for indio_dev->trig assignment commit 4ce7ca89d6e8eae9e201cd0e972ba323f33e2fb4 upstream. This patch uses iio_trigger_get to increment the reference count of trigger device, to avoid incorrect assignment. Can result in a null pointer dereference during removal if the trigger has been changed before removal. This patch refers to a similar situation encountered through the following discussion: http://www.spinics.net/lists/linux-iio/msg13669.html Signed-off-by: Darshana Padmadas Signed-off-by: Jonathan Cameron Signed-off-by: Greg Kroah-Hartman --- drivers/iio/imu/adis_trigger.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iio/imu/adis_trigger.c b/drivers/iio/imu/adis_trigger.c index e0017c22bb9c..f53e9a803a0e 100644 --- a/drivers/iio/imu/adis_trigger.c +++ b/drivers/iio/imu/adis_trigger.c @@ -60,7 +60,7 @@ int adis_probe_trigger(struct adis *adis, struct iio_dev *indio_dev) iio_trigger_set_drvdata(adis->trig, adis); ret = iio_trigger_register(adis->trig); - indio_dev->trig = adis->trig; + indio_dev->trig = iio_trigger_get(adis->trig); if (ret) goto error_free_irq; From 8990b6ab3d341753244cba9992026b07038f1eea Mon Sep 17 00:00:00 2001 From: Peter Ujfalusi Date: Fri, 27 Mar 2015 13:35:52 +0200 Subject: [PATCH 026/277] dmaengine: omap-dma: Fix memory leak when terminating running transfer commit 02d88b735f5a60f04dbf6d051b76e1877a0d0844 upstream. In omap_dma_start_desc the vdesc->node is removed from the virt-dma framework managed lists (to be precise from the desc_issued list). If a terminate_all comes before the transfer finishes the omap_desc will not be freed up because it is not in any of the lists and we stopped the DMA channel so the transfer will not going to complete. There is no special sequence for leaking memory when using cyclic (audio) transfer: with every start and stop of a cyclic transfer the driver leaks struct omap_desc worth of memory. Free up the allocated memory directly in omap_dma_terminate_all() since the framework will not going to do that for us. Signed-off-by: Peter Ujfalusi CC: Signed-off-by: Vinod Koul Signed-off-by: Greg Kroah-Hartman --- drivers/dma/omap-dma.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/dma/omap-dma.c b/drivers/dma/omap-dma.c index ec3fc4fd9160..b94a37630e36 100644 --- a/drivers/dma/omap-dma.c +++ b/drivers/dma/omap-dma.c @@ -487,6 +487,7 @@ static int omap_dma_terminate_all(struct omap_chan *c) * c->desc is NULL and exit.) */ if (c->desc) { + omap_dma_desc_free(&c->desc->vd); c->desc = NULL; /* Avoid stopping the dma twice */ if (!c->paused) From f4a1af9ffb87829c1501820deadeeb350cd34d4a Mon Sep 17 00:00:00 2001 From: Thomas Schlichter Date: Tue, 31 Mar 2015 20:24:39 +0200 Subject: [PATCH 027/277] cpuidle: ACPI: do not overwrite name and description of C0 commit c7e8bdf5872c5a8f5a6494e16fe839c38a0d3d3d upstream. Fix a bug that leads to showing the name and description of C-state C0 as "" in sysfs after the ACPI C-states changed (e.g. after AC->DC or DC->AC transition). The function poll_idle_init() in drivers/cpuidle/driver.c initializes the state 0 during cpuidle_register_driver(), so we better do not overwrite it again with '\0' during acpi_processor_cst_has_changed(). Signed-off-by: Thomas Schlichter Reviewed-by: Bartlomiej Zolnierkiewicz Signed-off-by: Rafael J. Wysocki Signed-off-by: Greg Kroah-Hartman --- drivers/acpi/processor_idle.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/acpi/processor_idle.c b/drivers/acpi/processor_idle.c index a88894190e41..c991fe680e58 100644 --- a/drivers/acpi/processor_idle.c +++ b/drivers/acpi/processor_idle.c @@ -978,7 +978,7 @@ static int acpi_processor_setup_cpuidle_states(struct acpi_processor *pr) return -EINVAL; drv->safe_state_index = -1; - for (i = 0; i < CPUIDLE_STATE_MAX; i++) { + for (i = CPUIDLE_DRIVER_STATE_START; i < CPUIDLE_STATE_MAX; i++) { drv->states[i].name[0] = '\0'; drv->states[i].desc[0] = '\0'; } From 2cb264a36f293b3a50809324abea2aff4889af60 Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Mon, 23 Mar 2015 18:27:42 +0200 Subject: [PATCH 028/277] usb: xhci: apply XHCI_AVOID_BEI quirk to all Intel xHCI controllers commit 227a4fd801c8a9fa2c4700ab98ec1aec06e3b44d upstream. When a device with an isochronous endpoint is plugged into the Intel xHCI host controller, and the driver submits multiple frames per URB, the xHCI driver will set the Block Event Interrupt (BEI) flag on all but the last TD for the URB. This causes the host controller to place an event on the event ring, but not send an interrupt. When the last TD for the URB completes, BEI is cleared, and we get an interrupt for the whole URB. However, under Intel xHCI host controllers, if the event ring is full of events from transfers with BEI set, an "Event Ring is Full" event will be posted to the last entry of the event ring, but no interrupt is generated. Host will cease all transfer and command executions and wait until software completes handling the pending events in the event ring. That means xHC stops, but event of "event ring is full" is not notified. As the result, the xHC looks like dead to user. This patch is to apply XHCI_AVOID_BEI quirk to Intel xHC devices. And it should be backported to kernels as old as 3.0, that contains the commit 69e848c2090a ("Intel xhci: Support EHCI/xHCI port switching."). Signed-off-by: Lu Baolu Tested-by: Alistair Grant Signed-off-by: Mathias Nyman Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-pci.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/host/xhci-pci.c b/drivers/usb/host/xhci-pci.c index 0e57bcb8e3f7..2320e20d5be7 100644 --- a/drivers/usb/host/xhci-pci.c +++ b/drivers/usb/host/xhci-pci.c @@ -94,6 +94,7 @@ static void xhci_pci_quirks(struct device *dev, struct xhci_hcd *xhci) if (pdev->vendor == PCI_VENDOR_ID_INTEL) { xhci->quirks |= XHCI_LPM_SUPPORT; xhci->quirks |= XHCI_INTEL_HOST; + xhci->quirks |= XHCI_AVOID_BEI; } if (pdev->vendor == PCI_VENDOR_ID_INTEL && pdev->device == PCI_DEVICE_ID_INTEL_PANTHERPOINT_XHCI) { @@ -109,7 +110,6 @@ static void xhci_pci_quirks(struct device *dev, struct xhci_hcd *xhci) * PPT chipsets. */ xhci->quirks |= XHCI_SPURIOUS_REBOOT; - xhci->quirks |= XHCI_AVOID_BEI; } if (pdev->vendor == PCI_VENDOR_ID_ETRON && pdev->device == PCI_DEVICE_ID_ASROCK_P67) { From 57cf01ac9ff841958cbed2b727c271ab420b0ffa Mon Sep 17 00:00:00 2001 From: David Disseldorp Date: Fri, 13 Mar 2015 14:20:29 +0100 Subject: [PATCH 029/277] cifs: fix use-after-free bug in find_writable_file commit e1e9bda22d7ddf88515e8fe401887e313922823e upstream. Under intermittent network outages, find_writable_file() is susceptible to the following race condition, which results in a user-after-free in the cifs_writepages code-path: Thread 1 Thread 2 ======== ======== inv_file = NULL refind = 0 spin_lock(&cifs_file_list_lock) // invalidHandle found on openFileList inv_file = open_file // inv_file->count currently 1 cifsFileInfo_get(inv_file) // inv_file->count = 2 spin_unlock(&cifs_file_list_lock); cifs_reopen_file() cifs_close() // fails (rc != 0) ->cifsFileInfo_put() spin_lock(&cifs_file_list_lock) // inv_file->count = 1 spin_unlock(&cifs_file_list_lock) spin_lock(&cifs_file_list_lock); list_move_tail(&inv_file->flist, &cifs_inode->openFileList); spin_unlock(&cifs_file_list_lock); cifsFileInfo_put(inv_file); ->spin_lock(&cifs_file_list_lock) // inv_file->count = 0 list_del(&cifs_file->flist); // cleanup!! kfree(cifs_file); spin_unlock(&cifs_file_list_lock); spin_lock(&cifs_file_list_lock); ++refind; // refind = 1 goto refind_writable; At this point we loop back through with an invalid inv_file pointer and a refind value of 1. On second pass, inv_file is not overwritten on openFileList traversal, and is subsequently dereferenced. Signed-off-by: David Disseldorp Reviewed-by: Jeff Layton Signed-off-by: Steve French Signed-off-by: Greg Kroah-Hartman --- fs/cifs/file.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/cifs/file.c b/fs/cifs/file.c index 5fcc10fa62bd..f4a8577c3e91 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -1789,6 +1789,7 @@ struct cifsFileInfo *find_writable_file(struct cifsInodeInfo *cifs_inode, cifsFileInfo_put(inv_file); spin_lock(&cifs_file_list_lock); ++refind; + inv_file = NULL; goto refind_writable; } } From 054fa2f11b8458100b7c839a728f0439be863740 Mon Sep 17 00:00:00 2001 From: John Soni Jose Date: Thu, 12 Feb 2015 06:45:47 +0530 Subject: [PATCH 030/277] be2iscsi: Fix kernel panic when device initialization fails commit 2e7cee027b26cbe7e6685a7a14bd2850bfe55d33 upstream. Kernel panic was happening as iscsi_host_remove() was called on a host which was not yet added. Signed-off-by: John Soni Jose Reviewed-by: Mike Christie Signed-off-by: James Bottomley Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/be2iscsi/be_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/be2iscsi/be_main.c b/drivers/scsi/be2iscsi/be_main.c index 1ad39c799c74..bfe812fcce34 100644 --- a/drivers/scsi/be2iscsi/be_main.c +++ b/drivers/scsi/be2iscsi/be_main.c @@ -5080,9 +5080,9 @@ static int beiscsi_dev_probe(struct pci_dev *pcidev, hba_free: if (phba->msix_enabled) pci_disable_msix(phba->pcidev); - iscsi_host_remove(phba->shost); pci_dev_put(phba->pcidev); iscsi_host_free(phba->shost); + pci_set_drvdata(pcidev, NULL); disable_pci: pci_disable_device(pcidev); return ret; From f3326a5594dbb8faf47fb105740baa936d3445d1 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 8 Apr 2015 17:00:32 -0400 Subject: [PATCH 031/277] ocfs2: _really_ sync the right range commit 64b4e2526d1cf6e6a4db6213d6e2b6e6ab59479a upstream. "ocfs2 syncs the wrong range" had been broken; prior to it the code was doing the wrong thing in case of O_APPEND, all right, but _after_ it we were syncing the wrong range in 100% cases. *ppos, aka iocb->ki_pos is incremented prior to that point, so we are always doing sync on the area _after_ the one we'd written to. Spotted by Joseph Qi back in January; unfortunately, I'd missed his mail back then ;-/ Signed-off-by: Al Viro Signed-off-by: Greg Kroah-Hartman --- fs/ocfs2/file.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c index 46387e49aa46..8cd6474e248f 100644 --- a/fs/ocfs2/file.c +++ b/fs/ocfs2/file.c @@ -2372,10 +2372,14 @@ static ssize_t ocfs2_file_aio_write(struct kiocb *iocb, /* buffered aio wouldn't have proper lock coverage today */ BUG_ON(ret == -EIOCBQUEUED && !(file->f_flags & O_DIRECT)); + if (unlikely(written <= 0)) + goto no_sync; + if (((file->f_flags & O_DSYNC) && !direct_io) || IS_SYNC(inode) || ((file->f_flags & O_DIRECT) && !direct_io)) { - ret = filemap_fdatawrite_range(file->f_mapping, *ppos, - *ppos + count - 1); + ret = filemap_fdatawrite_range(file->f_mapping, + iocb->ki_pos - written, + iocb->ki_pos - 1); if (ret < 0) written = ret; @@ -2388,10 +2392,12 @@ static ssize_t ocfs2_file_aio_write(struct kiocb *iocb, } if (!ret) - ret = filemap_fdatawait_range(file->f_mapping, *ppos, - *ppos + count - 1); + ret = filemap_fdatawait_range(file->f_mapping, + iocb->ki_pos - written, + iocb->ki_pos - 1); } +no_sync: /* * deep in g_f_a_w_n()->ocfs2_direct_IO we pass in a ocfs2_dio_end_io * function pointer which is called when o_direct io completes so that From 81b444c779298d121606e956122bbcbe395ffc60 Mon Sep 17 00:00:00 2001 From: Mike Christie Date: Fri, 10 Apr 2015 02:47:27 -0500 Subject: [PATCH 032/277] iscsi target: fix oops when adding reject pdu commit b815fc12d4dd2b5586184fb4f867caff05a810d4 upstream. This fixes a oops due to a double list add when adding a reject PDU for iscsit_allocate_iovecs allocation failures. The cmd has already been added to the conn_cmd_list in iscsit_setup_scsi_cmd, so this has us call iscsit_reject_cmd. Note that for ERL0 the reject PDU is not actually sent, so this patch is not completely tested. Just verified we do not oops. The problem is the add reject functions return -1 which is returned all the way up to iscsi_target_rx_thread which for ERL0 will drop the connection. Signed-off-by: Mike Christie Signed-off-by: Nicholas Bellinger Signed-off-by: Greg Kroah-Hartman --- drivers/target/iscsi/iscsi_target.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/target/iscsi/iscsi_target.c b/drivers/target/iscsi/iscsi_target.c index 9559ea749d83..5a3ea20e9cb5 100644 --- a/drivers/target/iscsi/iscsi_target.c +++ b/drivers/target/iscsi/iscsi_target.c @@ -1179,7 +1179,7 @@ iscsit_handle_scsi_cmd(struct iscsi_conn *conn, struct iscsi_cmd *cmd, * traditional iSCSI block I/O. */ if (iscsit_allocate_iovecs(cmd) < 0) { - return iscsit_add_reject_cmd(cmd, + return iscsit_reject_cmd(cmd, ISCSI_REASON_BOOKMARK_NO_RESOURCES, buf); } immed_data = cmd->immediate_data; From 1190df7d8f1cd3aca590c4153441f40b10cc047f Mon Sep 17 00:00:00 2001 From: Marek Szyprowski Date: Wed, 4 Mar 2015 05:55:21 -0800 Subject: [PATCH 033/277] media: s5p-mfc: fix mmap support for 64bit arch commit 05b676ab42f624425d5f6519276e506b812fa058 upstream. TASK_SIZE is depends on the systems architecture (32 or 64 bits) and it should not be used for defining offset boundary for mmaping buffers for CAPTURE and OUTPUT queues. This patch fixes support for MMAP calls on the CAPTURE queue on 64bit architectures (like ARM64). Signed-off-by: Marek Szyprowski Signed-off-by: Kamil Debski Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Greg Kroah-Hartman --- drivers/media/platform/s5p-mfc/s5p_mfc_common.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/media/platform/s5p-mfc/s5p_mfc_common.h b/drivers/media/platform/s5p-mfc/s5p_mfc_common.h index f804c1faa7ff..d3b54f7b849f 100644 --- a/drivers/media/platform/s5p-mfc/s5p_mfc_common.h +++ b/drivers/media/platform/s5p-mfc/s5p_mfc_common.h @@ -29,7 +29,7 @@ /* Offset base used to differentiate between CAPTURE and OUTPUT * while mmaping */ -#define DST_QUEUE_OFF_BASE (TASK_SIZE / 2) +#define DST_QUEUE_OFF_BASE (1 << 30) #define MFC_BANK1_ALLOC_CTX 0 #define MFC_BANK2_ALLOC_CTX 1 From d212dc60a1c2a41e9e1d2e69f1c137ffd3af909b Mon Sep 17 00:00:00 2001 From: Jiri Slaby Date: Mon, 13 Apr 2015 16:41:28 +0200 Subject: [PATCH 034/277] core, nfqueue, openvswitch: fix compilation warning MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Stable commit "core, nfqueue, openvswitch: Orphan frags in skb_zerocopy and handle errors", upstream commit 36d5fe6a000790f56039afe26834265db0a3ad4c, was not correctly backported and missed to change a const 'from' parameter to non-const. This results in a new batch of warnings: net/netfilter/nfnetlink_queue_core.c: In function ‘nfqnl_zcopy’: net/netfilter/nfnetlink_queue_core.c:272:2: warning: passing argument 1 of ‘skb_orphan_frags’ discards ‘const’ qualifier from pointer target type [enabled by default] if (unlikely(skb_orphan_frags(from, GFP_ATOMIC))) { ^ In file included from net/netfilter/nfnetlink_queue_core.c:18:0: include/linux/skbuff.h:1822:19: note: expected ‘struct sk_buff *’ but argument is of type ‘const struct sk_buff *’ static inline int skb_orphan_frags(struct sk_buff *skb, gfp_t gfp_mask) ^ net/netfilter/nfnetlink_queue_core.c:273:3: warning: passing argument 1 of ‘skb_tx_error’ discards ‘const’ qualifier from pointer target type [enabled by default] skb_tx_error(from); ^ In file included from net/netfilter/nfnetlink_queue_core.c:18:0: include/linux/skbuff.h:630:13: note: expected ‘struct sk_buff *’ but argument is of type ‘const struct sk_buff *’ extern void skb_tx_error(struct sk_buff *skb); Remove const from the 'from' parameter, the same as in the upstream commit. As far as I can see, this leaked into 3.10, 3.12, and 3.13 already. Cc: Zoltan Kiss Cc: David S. Miller Cc: Ben Hutchings Cc: Greg Kroah-Hartman Cc: Kamal Mostafa Signed-off-by: Jiri Slaby Signed-off-by: Greg Kroah-Hartman --- net/netfilter/nfnetlink_queue_core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/netfilter/nfnetlink_queue_core.c b/net/netfilter/nfnetlink_queue_core.c index 2b8199f68785..5497f50af2f0 100644 --- a/net/netfilter/nfnetlink_queue_core.c +++ b/net/netfilter/nfnetlink_queue_core.c @@ -228,7 +228,7 @@ nfqnl_flush(struct nfqnl_instance *queue, nfqnl_cmpfn cmpfn, unsigned long data) } static int -nfqnl_zcopy(struct sk_buff *to, const struct sk_buff *from, int len, int hlen) +nfqnl_zcopy(struct sk_buff *to, struct sk_buff *from, int len, int hlen) { int i, j = 0; int plen = 0; /* length of skb->head fragment */ From 1f55176763a6556916d4c41c80eba7c69d5d3e5a Mon Sep 17 00:00:00 2001 From: Mateusz Guzik Date: Mon, 27 Jan 2014 17:07:11 -0800 Subject: [PATCH 035/277] ipc: fix compat msgrcv with negative msgtyp commit e7ca2552369c1dfe0216c626baf82c3d83ec36bb upstream. Compat function takes msgtyp argument as u32 and passes it down to do_msgrcv which results in casting to long, thus the sign is lost and we get a big positive number instead. Cast the argument to signed type before passing it down. Signed-off-by: Mateusz Guzik Reported-by: Gabriellla Schmidt Cc: Al Viro Cc: Davidlohr Bueso Cc: Manfred Spraul Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Cc: Masanari Iida Signed-off-by: Greg Kroah-Hartman --- ipc/compat.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ipc/compat.c b/ipc/compat.c index 892f6585dd60..d3b376025e9b 100644 --- a/ipc/compat.c +++ b/ipc/compat.c @@ -381,7 +381,7 @@ COMPAT_SYSCALL_DEFINE6(ipc, u32, call, int, first, int, second, uptr = compat_ptr(ipck.msgp); fifth = ipck.msgtyp; } - return do_msgrcv(first, uptr, second, fifth, third, + return do_msgrcv(first, uptr, second, (s32)fifth, third, compat_do_msg_fill); } case MSGGET: From 8e519c3eb9823f0f6cbffb1dfaede0252df3c350 Mon Sep 17 00:00:00 2001 From: Sasha Levin Date: Tue, 3 Feb 2015 08:55:58 -0500 Subject: [PATCH 036/277] net: rds: use correct size for max unacked packets and bytes commit db27ebb111e9f69efece08e4cb6a34ff980f8896 upstream. Max unacked packets/bytes is an int while sizeof(long) was used in the sysctl table. This means that when they were getting read we'd also leak kernel memory to userspace along with the timeout values. Signed-off-by: Sasha Levin Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/rds/sysctl.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/rds/sysctl.c b/net/rds/sysctl.c index 907214b4c4d0..fc6cbe827856 100644 --- a/net/rds/sysctl.c +++ b/net/rds/sysctl.c @@ -71,14 +71,14 @@ static ctl_table rds_sysctl_rds_table[] = { { .procname = "max_unacked_packets", .data = &rds_sysctl_max_unacked_packets, - .maxlen = sizeof(unsigned long), + .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec, }, { .procname = "max_unacked_bytes", .data = &rds_sysctl_max_unacked_bytes, - .maxlen = sizeof(unsigned long), + .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec, }, From 85ec36aada19a7873bb2cb1677f910e8ce30f998 Mon Sep 17 00:00:00 2001 From: Sasha Levin Date: Fri, 23 Jan 2015 20:47:00 -0500 Subject: [PATCH 037/277] net: llc: use correct size for sysctl timeout entries commit 6b8d9117ccb4f81b1244aafa7bc70ef8fa45fc49 upstream. The timeout entries are sizeof(int) rather than sizeof(long), which means that when they were getting read we'd also leak kernel memory to userspace along with the timeout values. Signed-off-by: Sasha Levin Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/llc/sysctl_net_llc.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/net/llc/sysctl_net_llc.c b/net/llc/sysctl_net_llc.c index 612a5ddaf93b..799bafc2af39 100644 --- a/net/llc/sysctl_net_llc.c +++ b/net/llc/sysctl_net_llc.c @@ -18,28 +18,28 @@ static struct ctl_table llc2_timeout_table[] = { { .procname = "ack", .data = &sysctl_llc2_ack_timeout, - .maxlen = sizeof(long), + .maxlen = sizeof(sysctl_llc2_ack_timeout), .mode = 0644, .proc_handler = proc_dointvec_jiffies, }, { .procname = "busy", .data = &sysctl_llc2_busy_timeout, - .maxlen = sizeof(long), + .maxlen = sizeof(sysctl_llc2_busy_timeout), .mode = 0644, .proc_handler = proc_dointvec_jiffies, }, { .procname = "p", .data = &sysctl_llc2_p_timeout, - .maxlen = sizeof(long), + .maxlen = sizeof(sysctl_llc2_p_timeout), .mode = 0644, .proc_handler = proc_dointvec_jiffies, }, { .procname = "rej", .data = &sysctl_llc2_rej_timeout, - .maxlen = sizeof(long), + .maxlen = sizeof(sysctl_llc2_rej_timeout), .mode = 0644, .proc_handler = proc_dointvec_jiffies, }, From 0121b8bf67ce4d613b58855f6e8558356bffe789 Mon Sep 17 00:00:00 2001 From: Alex Elder Date: Thu, 23 Jan 2014 15:54:00 -0800 Subject: [PATCH 038/277] kernel.h: define u8, s8, u32, etc. limits commit 89a0714106aac7309c7dfa0f004b39e1e89d2942 upstream. Create constants that define the maximum and minimum values representable by the kernel types u8, s8, u16, s16, and so on. Signed-off-by: Alex Elder Cc: Sage Weil Cc: David Miller Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- include/linux/kernel.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/include/linux/kernel.h b/include/linux/kernel.h index e9ef6d6b51d5..341551c7b4c8 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -29,6 +29,19 @@ #define ULLONG_MAX (~0ULL) #define SIZE_MAX (~(size_t)0) +#define U8_MAX ((u8)~0U) +#define S8_MAX ((s8)(U8_MAX>>1)) +#define S8_MIN ((s8)(-S8_MAX - 1)) +#define U16_MAX ((u16)~0U) +#define S16_MAX ((s16)(U16_MAX>>1)) +#define S16_MIN ((s16)(-S16_MAX - 1)) +#define U32_MAX ((u32)~0U) +#define S32_MAX ((s32)(U32_MAX>>1)) +#define S32_MIN ((s32)(-S32_MAX - 1)) +#define U64_MAX ((u64)~0ULL) +#define S64_MAX ((s64)(U64_MAX>>1)) +#define S64_MIN ((s64)(-S64_MAX - 1)) + #define STACK_MAGIC 0xdeadbeef #define REPEAT_BYTE(x) ((~0ul / 0xff) * (x)) From 94efa6abf172d13d70c167388f327f4b5cee7e02 Mon Sep 17 00:00:00 2001 From: Majd Dibbiny Date: Wed, 18 Mar 2015 16:51:37 +0200 Subject: [PATCH 039/277] IB/mlx4: Saturate RoCE port PMA counters in case of overflow commit 61a3855bb726cbb062ef02a31a832dea455456e0 upstream. For RoCE ports, we set the u32 PMA values based on u64 HCA counters. In case of overflow, according to the IB spec, we have to saturate a counter to its max value, do that. Fixes: c37791349cc7 ('IB/mlx4: Support PMA counters for IBoE') Signed-off-by: Majd Dibbiny Signed-off-by: Eran Ben Elisha Signed-off-by: Hadar Hen Zion Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/hw/mlx4/mad.c | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) diff --git a/drivers/infiniband/hw/mlx4/mad.c b/drivers/infiniband/hw/mlx4/mad.c index 4d599cedbb0b..6ee534874535 100644 --- a/drivers/infiniband/hw/mlx4/mad.c +++ b/drivers/infiniband/hw/mlx4/mad.c @@ -64,6 +64,14 @@ enum { #define GUID_TBL_BLK_NUM_ENTRIES 8 #define GUID_TBL_BLK_SIZE (GUID_TBL_ENTRY_SIZE * GUID_TBL_BLK_NUM_ENTRIES) +/* Counters should be saturate once they reach their maximum value */ +#define ASSIGN_32BIT_COUNTER(counter, value) do {\ + if ((value) > U32_MAX) \ + counter = cpu_to_be32(U32_MAX); \ + else \ + counter = cpu_to_be32(value); \ +} while (0) + struct mlx4_mad_rcv_buf { struct ib_grh grh; u8 payload[256]; @@ -730,10 +738,14 @@ static int ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, static void edit_counter(struct mlx4_counter *cnt, struct ib_pma_portcounters *pma_cnt) { - pma_cnt->port_xmit_data = cpu_to_be32((be64_to_cpu(cnt->tx_bytes)>>2)); - pma_cnt->port_rcv_data = cpu_to_be32((be64_to_cpu(cnt->rx_bytes)>>2)); - pma_cnt->port_xmit_packets = cpu_to_be32(be64_to_cpu(cnt->tx_frames)); - pma_cnt->port_rcv_packets = cpu_to_be32(be64_to_cpu(cnt->rx_frames)); + ASSIGN_32BIT_COUNTER(pma_cnt->port_xmit_data, + (be64_to_cpu(cnt->tx_bytes) >> 2)); + ASSIGN_32BIT_COUNTER(pma_cnt->port_rcv_data, + (be64_to_cpu(cnt->rx_bytes) >> 2)); + ASSIGN_32BIT_COUNTER(pma_cnt->port_xmit_packets, + be64_to_cpu(cnt->tx_frames)); + ASSIGN_32BIT_COUNTER(pma_cnt->port_rcv_packets, + be64_to_cpu(cnt->rx_frames)); } static int iboe_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, From 391f1c610abe2db94c3e5c7ae20528ebf9ed682f Mon Sep 17 00:00:00 2001 From: Peter Hurley Date: Sun, 1 Mar 2015 10:11:05 -0500 Subject: [PATCH 040/277] console: Fix console name size mismatch commit 30a22c215a0007603ffc08021f2e8b64018517dd upstream. commit 6ae9200f2cab7 ("enlarge console.name") increased the storage for the console name to 16 bytes, but not the corresponding struct console_cmdline::name storage. Console names longer than 8 bytes cause read beyond end-of-string and failure to match console; I'm not sure if there are other unexpected consequences. Signed-off-by: Peter Hurley Signed-off-by: Greg Kroah-Hartman --- kernel/printk.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/kernel/printk.c b/kernel/printk.c index f7aff4bd5454..fd0154a57d6e 100644 --- a/kernel/printk.c +++ b/kernel/printk.c @@ -107,7 +107,7 @@ static struct console *exclusive_console; */ struct console_cmdline { - char name[8]; /* Name of the driver */ + char name[16]; /* Name of the driver */ int index; /* Minor dev. to use */ char *options; /* Options for the driver */ #ifdef CONFIG_A11Y_BRAILLE_CONSOLE @@ -2290,6 +2290,8 @@ void register_console(struct console *newcon) */ for (i = 0; i < MAX_CMDLINECONSOLES && console_cmdline[i].name[0]; i++) { + BUILD_BUG_ON(sizeof(console_cmdline[i].name) != + sizeof(newcon->name)); if (strcmp(console_cmdline[i].name, newcon->name) != 0) continue; if (newcon->index >= 0 && From e11b708502b0e249772e485585bec44be5fe8c70 Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Mon, 9 Mar 2015 23:11:12 +0200 Subject: [PATCH 041/277] pagemap: do not leak physical addresses to non-privileged userspace commit ab676b7d6fbf4b294bf198fb27ade5b0e865c7ce upstream. As pointed by recent post[1] on exploiting DRAM physical imperfection, /proc/PID/pagemap exposes sensitive information which can be used to do attacks. This disallows anybody without CAP_SYS_ADMIN to read the pagemap. [1] http://googleprojectzero.blogspot.com/2015/03/exploiting-dram-rowhammer-bug-to-gain.html [ Eventually we might want to do anything more finegrained, but for now this is the simple model. - Linus ] Signed-off-by: Kirill A. Shutemov Acked-by: Konstantin Khlebnikov Acked-by: Andy Lutomirski Cc: Pavel Emelyanov Cc: Andrew Morton Cc: Mark Seaborn Signed-off-by: Linus Torvalds Signed-off-by: mancha security Signed-off-by: Greg Kroah-Hartman --- fs/proc/task_mmu.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index 65fc60a07c47..9f285fb9bab3 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -1110,9 +1110,19 @@ static ssize_t pagemap_read(struct file *file, char __user *buf, return ret; } +static int pagemap_open(struct inode *inode, struct file *file) +{ + /* do not disclose physical addresses to unprivileged + userspace (closes a rowhammer attack vector) */ + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + return 0; +} + const struct file_operations proc_pagemap_operations = { .llseek = mem_lseek, /* borrow this */ .read = pagemap_read, + .open = pagemap_open, }; #endif /* CONFIG_PROC_PAGE_MONITOR */ From 9ccc5af34b4f4eb27014f2e7e40db5e7c9522e5e Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Sun, 19 Apr 2015 10:12:19 +0200 Subject: [PATCH 042/277] Linux 3.10.75 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index d2a3930159ea..87909d8302ad 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,6 @@ VERSION = 3 PATCHLEVEL = 10 -SUBLEVEL = 74 +SUBLEVEL = 75 EXTRAVERSION = NAME = TOSSUG Baby Fish From 98a4d82a78914c2496989d669ecae04dedcf361f Mon Sep 17 00:00:00 2001 From: Jeff Vander Stoep Date: Mon, 20 Apr 2015 17:45:42 -0700 Subject: [PATCH 043/277] SELinux: use deletion-safe iterator to free list This code is not exercised by policy version 26, but will be upon upgrade to policy version 30. Bug: 18087110 Change-Id: I07c6f34607713294a6a12c43a64d9936f0602200 Signed-off-by: Jeff Vander Stoep --- security/selinux/avc.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/security/selinux/avc.c b/security/selinux/avc.c index 5c8e7cfa9de3..f3dbbc0f15dd 100644 --- a/security/selinux/avc.c +++ b/security/selinux/avc.c @@ -298,13 +298,15 @@ static void avc_operation_decision_free( static void avc_operation_free(struct avc_operation_node *ops_node) { - struct avc_operation_decision_node *od_node; + struct avc_operation_decision_node *od_node, *tmp; if (!ops_node) return; - list_for_each_entry(od_node, &ops_node->od_head, od_list) + list_for_each_entry_safe(od_node, tmp, &ops_node->od_head, od_list) { + list_del(&od_node->od_list); avc_operation_decision_free(od_node); + } kmem_cache_free(avc_operation_node_cachep, ops_node); } From 1f9ac0ffb8e5c3385c76b0538cc754a839329912 Mon Sep 17 00:00:00 2001 From: Stephen Smalley Date: Wed, 2 Apr 2014 09:10:44 -0400 Subject: [PATCH 044/277] android: configs: Enable SELinux and its dependencies. Change-Id: I979813b95c0a9a79913df0913e6888f566da5ff1 Signed-off-by: Stephen Smalley --- android/configs/android-base.cfg | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/android/configs/android-base.cfg b/android/configs/android-base.cfg index 3ddcecd716be..765f36038eda 100644 --- a/android/configs/android-base.cfg +++ b/android/configs/android-base.cfg @@ -9,6 +9,7 @@ CONFIG_ANDROID_BINDER_IPC=y CONFIG_ANDROID_LOW_MEMORY_KILLER=y CONFIG_ARMV7_COMPAT=y CONFIG_ASHMEM=y +CONFIG_AUDIT=y CONFIG_BLK_DEV_DM=y CONFIG_BLK_DEV_INITRD=y CONFIG_CGROUPS=y @@ -51,6 +52,7 @@ CONFIG_IP_NF_MATCH_AH=y CONFIG_IP_NF_MATCH_ECN=y CONFIG_IP_NF_MATCH_TTL=y CONFIG_IP_NF_RAW=y +CONFIG_IP_NF_SECURITY=y CONFIG_IP_NF_TARGET_MASQUERADE=y CONFIG_IP_NF_TARGET_NETMAP=y CONFIG_IP_NF_TARGET_REDIRECT=y @@ -85,10 +87,12 @@ CONFIG_NETFILTER_XT_MATCH_TIME=y CONFIG_NETFILTER_XT_MATCH_U32=y CONFIG_NETFILTER_XT_TARGET_CLASSIFY=y CONFIG_NETFILTER_XT_TARGET_CONNMARK=y +CONFIG_NETFILTER_XT_TARGET_CONNSECMARK=y CONFIG_NETFILTER_XT_TARGET_IDLETIMER=y CONFIG_NETFILTER_XT_TARGET_MARK=y CONFIG_NETFILTER_XT_TARGET_NFLOG=y CONFIG_NETFILTER_XT_TARGET_NFQUEUE=y +CONFIG_NETFILTER_XT_TARGET_SECMARK=y CONFIG_NETFILTER_XT_TARGET_TCPMSS=y CONFIG_NETFILTER_XT_TARGET_TPROXY=y CONFIG_NETFILTER_XT_TARGET_TRACE=y @@ -110,6 +114,7 @@ CONFIG_NF_CONNTRACK_IRC=y CONFIG_NF_CONNTRACK_NETBIOS_NS=y CONFIG_NF_CONNTRACK_PPTP=y CONFIG_NF_CONNTRACK_SANE=y +CONFIG_NF_CONNTRACK_SECMARK=y CONFIG_NF_CONNTRACK_TFTP=y CONFIG_NF_CT_NETLINK=y CONFIG_NF_CT_PROTO_DCCP=y @@ -130,6 +135,9 @@ CONFIG_PREEMPT=y CONFIG_RESOURCE_COUNTERS=y CONFIG_RTC_CLASS=y CONFIG_RT_GROUP_SCHED=y +CONFIG_SECURITY=y +CONFIG_SECURITY_NETWORK=y +CONFIG_SECURITY_SELINUX=y CONFIG_STAGING=y CONFIG_SWITCH=y CONFIG_SYNC=y From b4a13f60e0fbd2be0cb4f2001810fc32412d7d70 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Wed, 25 Feb 2015 12:10:35 +0000 Subject: [PATCH 045/277] arm64: psci: move psci firmware calls out of line An arm64 allmodconfig fails to build with GCC 5 due to __asmeq assertions in the PSCI firmware calling code firing due to mcount preambles breaking our assumptions about register allocation of function arguments: /tmp/ccDqJsJ6.s: Assembler messages: /tmp/ccDqJsJ6.s:60: Error: .err encountered /tmp/ccDqJsJ6.s:61: Error: .err encountered /tmp/ccDqJsJ6.s:62: Error: .err encountered /tmp/ccDqJsJ6.s:99: Error: .err encountered /tmp/ccDqJsJ6.s:100: Error: .err encountered /tmp/ccDqJsJ6.s:101: Error: .err encountered This patch fixes the issue by moving the PSCI calls out-of-line into their own assembly files, which are safe from the compiler's meddling fingers. Reported-by: Andy Whitcroft Signed-off-by: Will Deacon Signed-off-by: Catalin Marinas (cherry picked from commit f5e0a12ca2d939e47995f73428d9bf1ad372b289) Signed-off-by: Jon Medhurst --- arch/arm64/kernel/Makefile | 2 +- arch/arm64/kernel/psci-call.S | 28 ++++++++++++++++++++++++++ arch/arm64/kernel/psci.c | 37 +++-------------------------------- 3 files changed, 32 insertions(+), 35 deletions(-) create mode 100644 arch/arm64/kernel/psci-call.S diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile index a8ad571c4758..12618cbd1bab 100644 --- a/arch/arm64/kernel/Makefile +++ b/arch/arm64/kernel/Makefile @@ -15,7 +15,7 @@ CFLAGS_REMOVE_return_address.o = -pg arm64-obj-y := cputable.o debug-monitors.o entry.o irq.o fpsimd.o \ entry-fpsimd.o process.o ptrace.o setup.o signal.o \ sys.o stacktrace.o time.o traps.o io.o vdso.o \ - hyp-stub.o psci.o cpu_ops.o insn.o return_address.o + hyp-stub.o psci.o psci-call.o cpu_ops.o insn.o return_address.o arm64-obj-$(CONFIG_COMPAT) += sys32.o kuser32.o signal32.o \ sys_compat.o diff --git a/arch/arm64/kernel/psci-call.S b/arch/arm64/kernel/psci-call.S new file mode 100644 index 000000000000..cf83e61cd3b5 --- /dev/null +++ b/arch/arm64/kernel/psci-call.S @@ -0,0 +1,28 @@ +/* + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * Copyright (C) 2015 ARM Limited + * + * Author: Will Deacon + */ + +#include + +/* int __invoke_psci_fn_hvc(u64 function_id, u64 arg0, u64 arg1, u64 arg2) */ +ENTRY(__invoke_psci_fn_hvc) + hvc #0 + ret +ENDPROC(__invoke_psci_fn_hvc) + +/* int __invoke_psci_fn_smc(u64 function_id, u64 arg0, u64 arg1, u64 arg2) */ +ENTRY(__invoke_psci_fn_smc) + smc #0 + ret +ENDPROC(__invoke_psci_fn_smc) diff --git a/arch/arm64/kernel/psci.c b/arch/arm64/kernel/psci.c index 4d827dd62199..fc457d0fee9c 100644 --- a/arch/arm64/kernel/psci.c +++ b/arch/arm64/kernel/psci.c @@ -58,6 +58,9 @@ static struct psci_operations psci_ops; static int (*invoke_psci_fn)(u64, u64, u64, u64); typedef int (*psci_initcall_t)(const struct device_node *); +asmlinkage int __invoke_psci_fn_hvc(u64, u64, u64, u64); +asmlinkage int __invoke_psci_fn_smc(u64, u64, u64, u64); + enum psci_function { PSCI_FN_CPU_SUSPEND, PSCI_FN_CPU_ON, @@ -109,40 +112,6 @@ static void psci_power_state_unpack(u32 power_state, & PSCI_0_2_POWER_STATE_AFFL_MASK; } -/* - * The following two functions are invoked via the invoke_psci_fn pointer - * and will not be inlined, allowing us to piggyback on the AAPCS. - */ -static noinline int __invoke_psci_fn_hvc(u64 function_id, u64 arg0, u64 arg1, - u64 arg2) -{ - asm volatile( - __asmeq("%0", "x0") - __asmeq("%1", "x1") - __asmeq("%2", "x2") - __asmeq("%3", "x3") - "hvc #0\n" - : "+r" (function_id) - : "r" (arg0), "r" (arg1), "r" (arg2)); - - return function_id; -} - -static noinline int __invoke_psci_fn_smc(u64 function_id, u64 arg0, u64 arg1, - u64 arg2) -{ - asm volatile( - __asmeq("%0", "x0") - __asmeq("%1", "x1") - __asmeq("%2", "x2") - __asmeq("%3", "x3") - "smc #0\n" - : "+r" (function_id) - : "r" (arg0), "r" (arg1), "r" (arg2)); - - return function_id; -} - static int psci_get_version(void) { int err; From 1bc0c2460f5e63c8e14e3aa216f7e4763555dd10 Mon Sep 17 00:00:00 2001 From: Ruchi Kandoi Date: Thu, 23 Apr 2015 12:09:09 -0700 Subject: [PATCH 046/277] nf: IDLETIMER: Adds the uid field in the msg Message notifications contains an additional uid field. This field represents the uid that was responsible for waking the radio. And hence it is present only in notifications stating that the radio is now active. Change-Id: I18fc73eada512e370d7ab24fc9f890845037b729 Signed-off-by: Ruchi Kandoi Bug: 20264396 --- net/netfilter/xt_IDLETIMER.c | 37 +++++++++++++++++++++++++++++++----- 1 file changed, 32 insertions(+), 5 deletions(-) diff --git a/net/netfilter/xt_IDLETIMER.c b/net/netfilter/xt_IDLETIMER.c index f6562ba97a97..ddf77f7fbe24 100644 --- a/net/netfilter/xt_IDLETIMER.c +++ b/net/netfilter/xt_IDLETIMER.c @@ -48,6 +48,7 @@ #include #include #include +#include struct idletimer_tg_attr { struct attribute attr; @@ -73,6 +74,7 @@ struct idletimer_tg { bool work_pending; bool send_nl_msg; bool active; + uid_t uid; }; static LIST_HEAD(idletimer_tg_list); @@ -117,7 +119,8 @@ static void notify_netlink_uevent(const char *iface, struct idletimer_tg *timer) char iface_msg[NLMSG_MAX_SIZE]; char state_msg[NLMSG_MAX_SIZE]; char timestamp_msg[NLMSG_MAX_SIZE]; - char *envp[] = { iface_msg, state_msg, timestamp_msg, NULL }; + char uid_msg[NLMSG_MAX_SIZE]; + char *envp[] = { iface_msg, state_msg, timestamp_msg, uid_msg, NULL }; int res; struct timespec ts; uint64_t time_ns; @@ -140,6 +143,16 @@ static void notify_netlink_uevent(const char *iface, struct idletimer_tg *timer) return; } + if (state) { + res = snprintf(uid_msg, NLMSG_MAX_SIZE, "UID=%u", timer->uid); + if (NLMSG_MAX_SIZE <= res) + pr_err("message too long (%d)", res); + } else { + res = snprintf(uid_msg, NLMSG_MAX_SIZE, "UID="); + if (NLMSG_MAX_SIZE <= res) + pr_err("message too long (%d)", res); + } + time_ns = timespec_to_ns(&ts); res = snprintf(timestamp_msg, NLMSG_MAX_SIZE, "TIME_NS=%llu", time_ns); if (NLMSG_MAX_SIZE <= res) { @@ -147,7 +160,8 @@ static void notify_netlink_uevent(const char *iface, struct idletimer_tg *timer) pr_err("message too long (%d)", res); } - pr_debug("putting nlmsg: <%s> <%s>\n", iface_msg, state_msg); + pr_debug("putting nlmsg: <%s> <%s> <%s> <%s>\n", iface_msg, state_msg, + timestamp_msg, uid_msg); kobject_uevent_env(idletimer_tg_kobj, KOBJ_CHANGE, envp); return; @@ -298,6 +312,7 @@ static int idletimer_tg_create(struct idletimer_tg_info *info) info->timer->delayed_timer_trigger.tv_sec = 0; info->timer->delayed_timer_trigger.tv_nsec = 0; info->timer->work_pending = false; + info->timer->uid = 0; get_monotonic_boottime(&info->timer->last_modified_timer); info->timer->pm_nb.notifier_call = idletimer_resume; @@ -321,7 +336,8 @@ static int idletimer_tg_create(struct idletimer_tg_info *info) return ret; } -static void reset_timer(const struct idletimer_tg_info *info) +static void reset_timer(const struct idletimer_tg_info *info, + struct sk_buff *skb) { unsigned long now = jiffies; struct idletimer_tg *timer = info->timer; @@ -334,6 +350,17 @@ static void reset_timer(const struct idletimer_tg_info *info) if (!timer_prev || time_before(timer->timer.expires, now)) { pr_debug("Starting Checkentry timer (Expired, Jiffies): %lu, %lu\n", timer->timer.expires, now); + + /* Stores the uid resposible for waking up the radio */ + if (skb && (skb->sk)) { + struct sock *sk = skb->sk; + read_lock_bh(&sk->sk_callback_lock); + if ((sk->sk_socket) && (sk->sk_socket->file) && + (sk->sk_socket->file->f_cred)) + timer->uid = sk->sk_socket->file->f_cred->uid; + read_unlock_bh(&sk->sk_callback_lock); + } + /* checks if there is a pending inactive notification*/ if (timer->work_pending) timer->delayed_timer_trigger = timer->last_modified_timer; @@ -372,7 +399,7 @@ static unsigned int idletimer_tg_target(struct sk_buff *skb, } /* TODO: Avoid modifying timers on each packet */ - reset_timer(info); + reset_timer(info, skb); return XT_CONTINUE; } @@ -400,7 +427,7 @@ static int idletimer_tg_checkentry(const struct xt_tgchk_param *par) info->timer = __idletimer_tg_find_by_label(info->label); if (info->timer) { info->timer->refcnt++; - reset_timer(info); + reset_timer(info, NULL); pr_debug("increased refcnt of timer %s to %u\n", info->label, info->timer->refcnt); } else { From b81036aa3558b934e2ea17e93e637f99796d88a0 Mon Sep 17 00:00:00 2001 From: Alex Elder Date: Thu, 23 Jan 2014 15:53:59 -0800 Subject: [PATCH 047/277] conditionally define U32_MAX commit 77719536dc00f8fd8f5abe6dadbde5331c37f996 upstream. The symbol U32_MAX is defined in several spots. Change these definitions to be conditional. This is in preparation for the next patch, which centralizes the definition in . Signed-off-by: Alex Elder Cc: Sage Weil Cc: David Miller Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- fs/reiserfs/reiserfs.h | 2 ++ include/linux/ceph/decode.h | 2 ++ net/ipv4/tcp_illinois.c | 2 ++ 3 files changed, 6 insertions(+) diff --git a/fs/reiserfs/reiserfs.h b/fs/reiserfs/reiserfs.h index 157e474ab303..55dc1b079ed6 100644 --- a/fs/reiserfs/reiserfs.h +++ b/fs/reiserfs/reiserfs.h @@ -1954,7 +1954,9 @@ struct treepath var = {.path_length = ILLEGAL_PATH_ELEMENT_OFFSET, .reada = 0,} #define MAX_US_INT 0xffff // reiserfs version 2 has max offset 60 bits. Version 1 - 32 bit offset +#ifndef U32_MAX #define U32_MAX (~(__u32)0) +#endif /* !U32_MAX */ static inline loff_t max_reiserfs_offset(struct inode *inode) { diff --git a/include/linux/ceph/decode.h b/include/linux/ceph/decode.h index 0442c3d800f0..27fe66a279b1 100644 --- a/include/linux/ceph/decode.h +++ b/include/linux/ceph/decode.h @@ -10,6 +10,7 @@ /* This seemed to be the easiest place to define these */ +#ifndef U32_MAX #define U8_MAX ((u8)(~0U)) #define U16_MAX ((u16)(~0U)) #define U32_MAX ((u32)(~0U)) @@ -24,6 +25,7 @@ #define S16_MIN ((s16)(-S16_MAX - 1)) #define S32_MIN ((s32)(-S32_MAX - 1)) #define S64_MIN ((s64)(-S64_MAX - 1LL)) +#endif /* !U32_MAX */ /* * in all cases, diff --git a/net/ipv4/tcp_illinois.c b/net/ipv4/tcp_illinois.c index 834857f3c871..ffb2615d286f 100644 --- a/net/ipv4/tcp_illinois.c +++ b/net/ipv4/tcp_illinois.c @@ -23,7 +23,9 @@ #define ALPHA_MIN ((3*ALPHA_SCALE)/10) /* ~0.3 */ #define ALPHA_MAX (10*ALPHA_SCALE) /* 10.0 */ #define ALPHA_BASE ALPHA_SCALE /* 1.0 */ +#ifndef U32_MAX #define U32_MAX ((u32)~0U) +#endif /* !U32_MAX */ #define RTT_MAX (U32_MAX / ALPHA_MAX) /* 3.3 secs */ #define BETA_SHIFT 6 From 1554b19c4080476e4e5b678febd5fc7f2d102322 Mon Sep 17 00:00:00 2001 From: Alex Elder Date: Thu, 23 Jan 2014 15:54:01 -0800 Subject: [PATCH 048/277] remove extra definitions of U32_MAX commit 04f9b74e4d96d349de12fdd4e6626af4a9f75e09 upstream. Now that the definition is centralized in , the definitions of U32_MAX (and related) elsewhere in the kernel can be removed. Signed-off-by: Alex Elder Acked-by: Sage Weil Acked-by: David S. Miller Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- fs/reiserfs/reiserfs.h | 4 ---- include/linux/ceph/decode.h | 19 ------------------- net/ipv4/tcp_illinois.c | 3 --- 3 files changed, 26 deletions(-) diff --git a/fs/reiserfs/reiserfs.h b/fs/reiserfs/reiserfs.h index 55dc1b079ed6..635a1425d370 100644 --- a/fs/reiserfs/reiserfs.h +++ b/fs/reiserfs/reiserfs.h @@ -1954,10 +1954,6 @@ struct treepath var = {.path_length = ILLEGAL_PATH_ELEMENT_OFFSET, .reada = 0,} #define MAX_US_INT 0xffff // reiserfs version 2 has max offset 60 bits. Version 1 - 32 bit offset -#ifndef U32_MAX -#define U32_MAX (~(__u32)0) -#endif /* !U32_MAX */ - static inline loff_t max_reiserfs_offset(struct inode *inode) { if (get_inode_item_key_version(inode) == KEY_FORMAT_3_5) diff --git a/include/linux/ceph/decode.h b/include/linux/ceph/decode.h index 27fe66a279b1..a6ef9cc267ec 100644 --- a/include/linux/ceph/decode.h +++ b/include/linux/ceph/decode.h @@ -8,25 +8,6 @@ #include -/* This seemed to be the easiest place to define these */ - -#ifndef U32_MAX -#define U8_MAX ((u8)(~0U)) -#define U16_MAX ((u16)(~0U)) -#define U32_MAX ((u32)(~0U)) -#define U64_MAX ((u64)(~0ULL)) - -#define S8_MAX ((s8)(U8_MAX >> 1)) -#define S16_MAX ((s16)(U16_MAX >> 1)) -#define S32_MAX ((s32)(U32_MAX >> 1)) -#define S64_MAX ((s64)(U64_MAX >> 1LL)) - -#define S8_MIN ((s8)(-S8_MAX - 1)) -#define S16_MIN ((s16)(-S16_MAX - 1)) -#define S32_MIN ((s32)(-S32_MAX - 1)) -#define S64_MIN ((s64)(-S64_MAX - 1LL)) -#endif /* !U32_MAX */ - /* * in all cases, * void **p pointer to position pointer diff --git a/net/ipv4/tcp_illinois.c b/net/ipv4/tcp_illinois.c index ffb2615d286f..86183c4e4fd5 100644 --- a/net/ipv4/tcp_illinois.c +++ b/net/ipv4/tcp_illinois.c @@ -23,9 +23,6 @@ #define ALPHA_MIN ((3*ALPHA_SCALE)/10) /* ~0.3 */ #define ALPHA_MAX (10*ALPHA_SCALE) /* 10.0 */ #define ALPHA_BASE ALPHA_SCALE /* 1.0 */ -#ifndef U32_MAX -#define U32_MAX ((u32)~0U) -#endif /* !U32_MAX */ #define RTT_MAX (U32_MAX / ALPHA_MAX) /* 3.3 secs */ #define BETA_SHIFT 6 From 1b946e381d185db8700e96634074052a62a11509 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michal=20Kube=C4=8Dek?= Date: Mon, 23 Mar 2015 15:14:00 +0100 Subject: [PATCH 049/277] tcp: prevent fetching dst twice in early demux code [ Upstream commit d0c294c53a771ae7e84506dfbd8c18c30f078735 ] On s390x, gcc 4.8 compiles this part of tcp_v6_early_demux() struct dst_entry *dst = sk->sk_rx_dst; if (dst) dst = dst_check(dst, inet6_sk(sk)->rx_dst_cookie); to code reading sk->sk_rx_dst twice, once for the test and once for the argument of ip6_dst_check() (dst_check() is inline). This allows ip6_dst_check() to be called with null first argument, causing a crash. Protect sk->sk_rx_dst access by ACCESS_ONCE() both in IPv4 and IPv6 TCP early demux code. Fixes: 41063e9dd119 ("ipv4: Early TCP socket demux.") Fixes: c7109986db3c ("ipv6: Early TCP socket demux") Signed-off-by: Michal Kubecek Acked-by: Eric Dumazet Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/tcp_ipv4.c | 2 +- net/ipv6/tcp_ipv6.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index cce35e5a7ee6..7c3eec386a4b 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1901,7 +1901,7 @@ void tcp_v4_early_demux(struct sk_buff *skb) skb->sk = sk; skb->destructor = sock_edemux; if (sk->sk_state != TCP_TIME_WAIT) { - struct dst_entry *dst = sk->sk_rx_dst; + struct dst_entry *dst = ACCESS_ONCE(sk->sk_rx_dst); if (dst) dst = dst_check(dst, 0); diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 1a87659a6139..4659b8ab55d9 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1616,7 +1616,7 @@ static void tcp_v6_early_demux(struct sk_buff *skb) skb->sk = sk; skb->destructor = sock_edemux; if (sk->sk_state != TCP_TIME_WAIT) { - struct dst_entry *dst = sk->sk_rx_dst; + struct dst_entry *dst = ACCESS_ONCE(sk->sk_rx_dst); if (dst) dst = dst_check(dst, inet6_sk(sk)->rx_dst_cookie); From 5a2267373e3d66b6df7d37b7366ed7a11bc29f4f Mon Sep 17 00:00:00 2001 From: "D.S. Ljungmark" Date: Wed, 25 Mar 2015 09:28:15 +0100 Subject: [PATCH 050/277] ipv6: Don't reduce hop limit for an interface [ Upstream commit 6fd99094de2b83d1d4c8457f2c83483b2828e75a ] A local route may have a lower hop_limit set than global routes do. RFC 3756, Section 4.2.7, "Parameter Spoofing" > 1. The attacker includes a Current Hop Limit of one or another small > number which the attacker knows will cause legitimate packets to > be dropped before they reach their destination. > As an example, one possible approach to mitigate this threat is to > ignore very small hop limits. The nodes could implement a > configurable minimum hop limit, and ignore attempts to set it below > said limit. Signed-off-by: D.S. Ljungmark Acked-by: Hannes Frederic Sowa Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv6/ndisc.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index 060a0449acaa..05f361338c2e 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -1193,7 +1193,14 @@ static void ndisc_router_discovery(struct sk_buff *skb) if (rt) rt6_set_expires(rt, jiffies + (HZ * lifetime)); if (ra_msg->icmph.icmp6_hop_limit) { - in6_dev->cnf.hop_limit = ra_msg->icmph.icmp6_hop_limit; + /* Only set hop_limit on the interface if it is higher than + * the current hop_limit. + */ + if (in6_dev->cnf.hop_limit < ra_msg->icmph.icmp6_hop_limit) { + in6_dev->cnf.hop_limit = ra_msg->icmph.icmp6_hop_limit; + } else { + ND_PRINTK(2, warn, "RA: Got route advertisement with lower hop_limit than current\n"); + } if (rt) dst_metric_set(&rt->dst, RTAX_HOPLIMIT, ra_msg->icmph.icmp6_hop_limit); From c31d60c29774e1c5650f89d5edccfd8314152af6 Mon Sep 17 00:00:00 2001 From: Neal Cardwell Date: Wed, 1 Apr 2015 20:26:46 -0400 Subject: [PATCH 051/277] tcp: fix FRTO undo on cumulative ACK of SACKed range [ Upstream commit 666b805150efd62f05810ff0db08f44a2370c937 ] On processing cumulative ACKs, the FRTO code was not checking the SACKed bit, meaning that there could be a spurious FRTO undo on a cumulative ACK of a previously SACKed skb. The FRTO code should only consider a cumulative ACK to indicate that an original/unretransmitted skb is newly ACKed if the skb was not yet SACKed. The effect of the spurious FRTO undo would typically be to make the connection think that all previously-sent packets were in flight when they really weren't, leading to a stall and an RTO. Signed-off-by: Neal Cardwell Signed-off-by: Yuchung Cheng Fixes: e33099f96d99c ("tcp: implement RFC5682 F-RTO") Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/tcp_input.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index ea7f52f3062d..a8be45e4d34f 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -3076,10 +3076,11 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets, if (seq_rtt < 0) { seq_rtt = ca_seq_rtt; } - if (!(sacked & TCPCB_SACKED_ACKED)) + if (!(sacked & TCPCB_SACKED_ACKED)) { reord = min(pkts_acked, reord); - if (!after(scb->end_seq, tp->high_seq)) - flag |= FLAG_ORIG_SACK_ACKED; + if (!after(scb->end_seq, tp->high_seq)) + flag |= FLAG_ORIG_SACK_ACKED; + } } if (sacked & TCPCB_SACKED_ACKED) From ef15025a4eb2cecc162a349bef1392b620966e40 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 9 Apr 2015 13:31:56 -0700 Subject: [PATCH 052/277] tcp: tcp_make_synack() should clear skb->tstamp [ Upstream commit b50edd7812852d989f2ef09dcfc729690f54a42d ] I noticed tcpdump was giving funky timestamps for locally generated SYNACK messages on loopback interface. 11:42:46.938990 IP 127.0.0.1.48245 > 127.0.0.2.23850: S 945476042:945476042(0) win 43690 20:28:58.502209 IP 127.0.0.2.23850 > 127.0.0.1.48245: S 3160535375:3160535375(0) ack 945476043 win 43690 This is because we need to clear skb->tstamp before entering lower stack, otherwise net_timestamp_check() does not set skb->tstamp. Fixes: 7faee5c0d514 ("tcp: remove TCP_SKB_CB(skb)->when") Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/tcp_output.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 92b5e1f7d3b0..7681a1bbd97f 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -2772,6 +2772,8 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst, } #endif + /* Do not fool tcpdump (if any), clean our debris */ + skb->tstamp.tv64 = 0; return skb; } EXPORT_SYMBOL(tcp_make_synack); From 16767ec632cffde8a1176d2038a3d2f34c6fefa9 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Tue, 11 Mar 2014 14:14:58 -0700 Subject: [PATCH 053/277] 8139cp: Call dev_kfree_skby_any instead of kfree_skb. Replace kfree_skb with dev_kfree_skb_any in cp_start_xmit as it can be called in both hard irq and other contexts. Signed-off-by: "Eric W. Biederman" Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/realtek/8139cp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/realtek/8139cp.c b/drivers/net/ethernet/realtek/8139cp.c index 064425d3178d..437d4cfd42cc 100644 --- a/drivers/net/ethernet/realtek/8139cp.c +++ b/drivers/net/ethernet/realtek/8139cp.c @@ -899,7 +899,7 @@ static netdev_tx_t cp_start_xmit (struct sk_buff *skb, return NETDEV_TX_OK; out_dma_error: - kfree_skb(skb); + dev_kfree_skb_any(skb); cp->dev->stats.tx_dropped++; goto out_unlock; } From 2886482ca51647ab0625231052caf626cf018729 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Tue, 11 Mar 2014 14:15:36 -0700 Subject: [PATCH 054/277] 8139too: Call dev_kfree_skby_any instead of dev_kfree_skb. Replace dev_kfree_skb with dev_kfree_skb_any in functions that can be called in hard irq and other contexts. Signed-off-by: "Eric W. Biederman" Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/realtek/8139too.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/realtek/8139too.c b/drivers/net/ethernet/realtek/8139too.c index 3ccedeb8aba0..942673fcb391 100644 --- a/drivers/net/ethernet/realtek/8139too.c +++ b/drivers/net/ethernet/realtek/8139too.c @@ -1715,9 +1715,9 @@ static netdev_tx_t rtl8139_start_xmit (struct sk_buff *skb, if (len < ETH_ZLEN) memset(tp->tx_buf[entry], 0, ETH_ZLEN); skb_copy_and_csum_dev(skb, tp->tx_buf[entry]); - dev_kfree_skb(skb); + dev_kfree_skb_any(skb); } else { - dev_kfree_skb(skb); + dev_kfree_skb_any(skb); dev->stats.tx_dropped++; return NETDEV_TX_OK; } From 4324a943dcc772a304f6d2f82294c42c51afdd4d Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Tue, 11 Mar 2014 14:16:14 -0700 Subject: [PATCH 055/277] r8169: Call dev_kfree_skby_any instead of dev_kfree_skb. Replace dev_kfree_skb with dev_kfree_skb_any in functions that can be called in hard irq and other contexts. Signed-off-by: "Eric W. Biederman" Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/realtek/r8169.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/realtek/r8169.c b/drivers/net/ethernet/realtek/r8169.c index e9b5d77a90db..2183c6189148 100644 --- a/drivers/net/ethernet/realtek/r8169.c +++ b/drivers/net/ethernet/realtek/r8169.c @@ -5768,7 +5768,7 @@ static void rtl8169_tx_clear_range(struct rtl8169_private *tp, u32 start, tp->TxDescArray + entry); if (skb) { tp->dev->stats.tx_dropped++; - dev_kfree_skb(skb); + dev_kfree_skb_any(skb); tx_skb->skb = NULL; } } @@ -5993,7 +5993,7 @@ static netdev_tx_t rtl8169_start_xmit(struct sk_buff *skb, err_dma_1: rtl8169_unmap_tx_skb(d, tp->tx_skb + entry, txd); err_dma_0: - dev_kfree_skb(skb); + dev_kfree_skb_any(skb); err_update_stats: dev->stats.tx_dropped++; return NETDEV_TX_OK; @@ -6076,7 +6076,7 @@ static void rtl_tx(struct net_device *dev, struct rtl8169_private *tp) tp->tx_stats.packets++; tp->tx_stats.bytes += tx_skb->skb->len; u64_stats_update_end(&tp->tx_stats.syncp); - dev_kfree_skb(tx_skb->skb); + dev_kfree_skb_any(tx_skb->skb); tx_skb->skb = NULL; } dirty_tx++; From 248b28006ace38a3b91c347fcdff7d695d11776a Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Tue, 11 Mar 2014 14:17:41 -0700 Subject: [PATCH 056/277] bnx2: Call dev_kfree_skby_any instead of dev_kfree_skb. Replace dev_kfree_skb with dev_kfree_skb_any in functions that can be called in hard irq and other contexts. Signed-off-by: "Eric W. Biederman" Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/broadcom/bnx2.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnx2.c b/drivers/net/ethernet/broadcom/bnx2.c index 5d204492c603..161dcba13c47 100644 --- a/drivers/net/ethernet/broadcom/bnx2.c +++ b/drivers/net/ethernet/broadcom/bnx2.c @@ -2869,7 +2869,7 @@ bnx2_tx_int(struct bnx2 *bp, struct bnx2_napi *bnapi, int budget) sw_cons = BNX2_NEXT_TX_BD(sw_cons); tx_bytes += skb->len; - dev_kfree_skb(skb); + dev_kfree_skb_any(skb); tx_pkt++; if (tx_pkt == budget) break; @@ -6610,7 +6610,7 @@ bnx2_start_xmit(struct sk_buff *skb, struct net_device *dev) mapping = dma_map_single(&bp->pdev->dev, skb->data, len, PCI_DMA_TODEVICE); if (dma_mapping_error(&bp->pdev->dev, mapping)) { - dev_kfree_skb(skb); + dev_kfree_skb_any(skb); return NETDEV_TX_OK; } @@ -6703,7 +6703,7 @@ bnx2_start_xmit(struct sk_buff *skb, struct net_device *dev) PCI_DMA_TODEVICE); } - dev_kfree_skb(skb); + dev_kfree_skb_any(skb); return NETDEV_TX_OK; } From 543c297eca0a06cfddabc54672805864c8777ce3 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Tue, 11 Mar 2014 14:18:14 -0700 Subject: [PATCH 057/277] tg3: Call dev_kfree_skby_any instead of dev_kfree_skb. Replace dev_kfree_skb with dev_kfree_skb_any in functions that can be called in hard irq and other contexts. Signed-off-by: "Eric W. Biederman" Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/broadcom/tg3.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/broadcom/tg3.c b/drivers/net/ethernet/broadcom/tg3.c index 8c1eab1151b8..680d26d6d2c3 100644 --- a/drivers/net/ethernet/broadcom/tg3.c +++ b/drivers/net/ethernet/broadcom/tg3.c @@ -6437,7 +6437,7 @@ static void tg3_tx(struct tg3_napi *tnapi) pkts_compl++; bytes_compl += skb->len; - dev_kfree_skb(skb); + dev_kfree_skb_any(skb); if (unlikely(tx_bug)) { tg3_tx_recover(tp); @@ -6769,7 +6769,7 @@ static int tg3_rx(struct tg3_napi *tnapi, int budget) if (len > (tp->dev->mtu + ETH_HLEN) && skb->protocol != htons(ETH_P_8021Q) && skb->protocol != htons(ETH_P_8021AD)) { - dev_kfree_skb(skb); + dev_kfree_skb_any(skb); goto drop_it_no_recycle; } @@ -7652,7 +7652,7 @@ static int tigon3_dma_hwbug_workaround(struct tg3_napi *tnapi, PCI_DMA_TODEVICE); /* Make sure the mapping succeeded */ if (pci_dma_mapping_error(tp->pdev, new_addr)) { - dev_kfree_skb(new_skb); + dev_kfree_skb_any(new_skb); ret = -1; } else { u32 save_entry = *entry; @@ -7667,13 +7667,13 @@ static int tigon3_dma_hwbug_workaround(struct tg3_napi *tnapi, new_skb->len, base_flags, mss, vlan)) { tg3_tx_skb_unmap(tnapi, save_entry, -1); - dev_kfree_skb(new_skb); + dev_kfree_skb_any(new_skb); ret = -1; } } } - dev_kfree_skb(skb); + dev_kfree_skb_any(skb); *pskb = new_skb; return ret; } @@ -7716,7 +7716,7 @@ static int tg3_tso_bug(struct tg3 *tp, struct sk_buff *skb) } while (segs); tg3_tso_bug_end: - dev_kfree_skb(skb); + dev_kfree_skb_any(skb); return NETDEV_TX_OK; } @@ -7954,7 +7954,7 @@ static netdev_tx_t tg3_start_xmit(struct sk_buff *skb, struct net_device *dev) tg3_tx_skb_unmap(tnapi, tnapi->tx_prod, --i); tnapi->tx_buffers[tnapi->tx_prod].skb = NULL; drop: - dev_kfree_skb(skb); + dev_kfree_skb_any(skb); drop_nofree: tp->tx_dropped++; return NETDEV_TX_OK; From 97aa254018122e86bf2b3498d610c6c662bbc8c0 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Tue, 11 Mar 2014 14:18:42 -0700 Subject: [PATCH 058/277] ixgb: Call dev_kfree_skby_any instead of dev_kfree_skb. Replace dev_kfree_skb with dev_kfree_skb_any in functions that can be called in hard irq and other contexts. Signed-off-by: "Eric W. Biederman" Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/intel/ixgb/ixgb_main.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/intel/ixgb/ixgb_main.c b/drivers/net/ethernet/intel/ixgb/ixgb_main.c index fce3e92f9d11..c5a9dcc01ca8 100644 --- a/drivers/net/ethernet/intel/ixgb/ixgb_main.c +++ b/drivers/net/ethernet/intel/ixgb/ixgb_main.c @@ -1527,12 +1527,12 @@ ixgb_xmit_frame(struct sk_buff *skb, struct net_device *netdev) int tso; if (test_bit(__IXGB_DOWN, &adapter->flags)) { - dev_kfree_skb(skb); + dev_kfree_skb_any(skb); return NETDEV_TX_OK; } if (skb->len <= 0) { - dev_kfree_skb(skb); + dev_kfree_skb_any(skb); return NETDEV_TX_OK; } @@ -1549,7 +1549,7 @@ ixgb_xmit_frame(struct sk_buff *skb, struct net_device *netdev) tso = ixgb_tso(adapter, skb); if (tso < 0) { - dev_kfree_skb(skb); + dev_kfree_skb_any(skb); return NETDEV_TX_OK; } From 57a99bf7d2f467714ba9db090a2f700cb080e0d9 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Tue, 11 Mar 2014 14:19:50 -0700 Subject: [PATCH 059/277] benet: Call dev_kfree_skby_any instead of kfree_skb. Replace free_skb with dev_kfree_skb_any in be_tx_compl_process as which can be called in hard irq by netpoll, softirq context by normal napi polling, and in normal sleepable context by the network device close method. Signed-off-by: "Eric W. Biederman" Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/emulex/benet/be_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c index d81a7dbfeef6..88e85cb88342 100644 --- a/drivers/net/ethernet/emulex/benet/be_main.c +++ b/drivers/net/ethernet/emulex/benet/be_main.c @@ -1767,7 +1767,7 @@ static u16 be_tx_compl_process(struct be_adapter *adapter, queue_tail_inc(txq); } while (cur_index != last_index); - kfree_skb(sent_skb); + dev_kfree_skb_any(sent_skb); return num_wrbs; } From e3f5ff371c1e9ad273f403c6a487f972bc5bd058 Mon Sep 17 00:00:00 2001 From: Peter Hurley Date: Wed, 11 Mar 2015 09:19:16 -0400 Subject: [PATCH 060/277] serial: 8250_dw: Fix deadlock in LCR workaround commit 7fd6f640f2dd17dac6ddd6702c378cb0bb9cfa11 upstream. Trying to write console output from within the serial console driver while the port->lock is held causes recursive deadlock: CPU 0 spin_lock_irqsave(&port->lock) printk() console_unlock() call_console_drivers() serial8250_console_write() spin_lock_irqsave(&port->lock) ** DEADLOCK ** The 8250_dw i/o accessors try to write a console error message if the LCR workaround was unsuccessful. When the port->lock is already held (eg., when called from serial8250_set_termios()), this deadlocks. Make the error message a FIXME until a general solution is devised. Cc: Tim Kryger Reported-by: Zhang Zhen Signed-off-by: Peter Hurley Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/8250/8250_dw.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/tty/serial/8250/8250_dw.c b/drivers/tty/serial/8250/8250_dw.c index 345b5ddcb1a0..86281fa5dcc3 100644 --- a/drivers/tty/serial/8250/8250_dw.c +++ b/drivers/tty/serial/8250/8250_dw.c @@ -98,7 +98,10 @@ static void dw8250_serial_out(struct uart_port *p, int offset, int value) dw8250_force_idle(p); writeb(value, p->membase + (UART_LCR << p->regshift)); } - dev_err(p->dev, "Couldn't set LCR to %d\n", value); + /* + * FIXME: this deadlocks if port->lock is already held + * dev_err(p->dev, "Couldn't set LCR to %d\n", value); + */ } } @@ -128,7 +131,10 @@ static void dw8250_serial_out32(struct uart_port *p, int offset, int value) dw8250_force_idle(p); writel(value, p->membase + (UART_LCR << p->regshift)); } - dev_err(p->dev, "Couldn't set LCR to %d\n", value); + /* + * FIXME: this deadlocks if port->lock is already held + * dev_err(p->dev, "Couldn't set LCR to %d\n", value); + */ } } From 1ca630d975c5abb422ef7969494cacd91a1c125a Mon Sep 17 00:00:00 2001 From: Dave Kleikamp Date: Mon, 23 Mar 2015 16:06:26 -0500 Subject: [PATCH 061/277] jfs: fix readdir regression Upstream commit 44512449, "jfs: fix readdir cookie incompatibility with NFSv4", was backported incorrectly into the stable trees which used the filldir callback (rather than dir_emit). The position is being incorrectly passed to filldir for the . and .. entries. The still-maintained stable trees that need to be fixed are 3.2.y, 3.4.y and 3.10.y. https://bugzilla.kernel.org/show_bug.cgi?id=94741 Signed-off-by: Dave Kleikamp Cc: jfs-discussion@lists.sourceforge.net Signed-off-by: Greg Kroah-Hartman --- fs/jfs/jfs_dtree.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/jfs/jfs_dtree.c b/fs/jfs/jfs_dtree.c index c450fdb3d78d..5d876b1c9ea4 100644 --- a/fs/jfs/jfs_dtree.c +++ b/fs/jfs/jfs_dtree.c @@ -3103,7 +3103,7 @@ int jfs_readdir(struct file *filp, void *dirent, filldir_t filldir) * self "." */ filp->f_pos = 1; - if (filldir(dirent, ".", 1, 0, ip->i_ino, + if (filldir(dirent, ".", 1, 1, ip->i_ino, DT_DIR)) return 0; } @@ -3111,7 +3111,7 @@ int jfs_readdir(struct file *filp, void *dirent, filldir_t filldir) * parent ".." */ filp->f_pos = 2; - if (filldir(dirent, "..", 2, 1, PARENT(ip), DT_DIR)) + if (filldir(dirent, "..", 2, 2, PARENT(ip), DT_DIR)) return 0; /* From 13d32f27d15c5c53254ed88e3d2042c34de1bfaa Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Thu, 29 Jan 2015 02:50:33 +0000 Subject: [PATCH 062/277] splice: Apply generic position and size checks to each write commit 894c6350eaad7e613ae267504014a456e00a3e2a from the 3.2-stable branch. We need to check the position and size of file writes against various limits, using generic_write_check(). This was not being done for the splice write path. It was fixed upstream by commit 8d0207652cbe ("->splice_write() via ->write_iter()") but we can't apply that. CVE-2014-7822 Signed-off-by: Ben Hutchings [Ben fixed it in 3.2 stable, i ported it to 3.10 stable] Signed-off-by: Zhang Zhen Signed-off-by: Greg Kroah-Hartman --- fs/ocfs2/file.c | 8 +++++--- fs/splice.c | 8 ++++++-- 2 files changed, 11 insertions(+), 5 deletions(-) diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c index 8cd6474e248f..d0e8c0b1767f 100644 --- a/fs/ocfs2/file.c +++ b/fs/ocfs2/file.c @@ -2459,12 +2459,14 @@ static ssize_t ocfs2_file_splice_write(struct pipe_inode_info *pipe, struct address_space *mapping = out->f_mapping; struct inode *inode = mapping->host; struct splice_desc sd = { - .total_len = len, .flags = flags, - .pos = *ppos, .u.file = out, }; - + ret = generic_write_checks(out, ppos, &len, 0); + if(ret) + return ret; + sd.total_len = len; + sd.pos = *ppos; trace_ocfs2_file_splice_write(inode, out, out->f_path.dentry, (unsigned long long)OCFS2_I(inode)->ip_blkno, diff --git a/fs/splice.c b/fs/splice.c index 4b5a5fac3383..f183f1342c01 100644 --- a/fs/splice.c +++ b/fs/splice.c @@ -1012,13 +1012,17 @@ generic_file_splice_write(struct pipe_inode_info *pipe, struct file *out, struct address_space *mapping = out->f_mapping; struct inode *inode = mapping->host; struct splice_desc sd = { - .total_len = len, .flags = flags, - .pos = *ppos, .u.file = out, }; ssize_t ret; + ret = generic_write_checks(out, ppos, &len, S_ISBLK(inode->i_mode)); + if (ret) + return ret; + sd.total_len = len; + sd.pos = *ppos; + pipe_lock(pipe); splice_from_pipe_begin(&sd); From 23f1538b9cffe4229e703d72101e73e168abae57 Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Fri, 20 Dec 2013 15:10:03 +0200 Subject: [PATCH 063/277] mm: Fix NULL pointer dereference in madvise(MADV_WILLNEED) support commit ee53664bda169f519ce3c6a22d378f0b946c8178 upstream. Sasha Levin found a NULL pointer dereference that is due to a missing page table lock, which in turn is due to the pmd entry in question being a transparent huge-table entry. The code - introduced in commit 1998cc048901 ("mm: make madvise(MADV_WILLNEED) support swap file prefetch") - correctly checks for this situation using pmd_none_or_trans_huge_or_clear_bad(), but it turns out that that function doesn't work correctly. pmd_none_or_trans_huge_or_clear_bad() expected that pmd_bad() would trigger if the transparent hugepage bit was set, but it doesn't do that if pmd_numa() is also set. Note that the NUMA bit only gets set on real NUMA machines, so people trying to reproduce this on most normal development systems would never actually trigger this. Fix it by removing the very subtle (and subtly incorrect) expectation, and instead just checking pmd_trans_huge() explicitly. Reported-by: Sasha Levin Acked-by: Andrea Arcangeli [ Additionally remove the now stale test for pmd_trans_huge() inside the pmd_bad() case - Linus ] Signed-off-by: Linus Torvalds Cc: Wang Long Signed-off-by: Greg Kroah-Hartman --- include/asm-generic/pgtable.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h index 17bccd3a4b03..dd6d9b89d338 100644 --- a/include/asm-generic/pgtable.h +++ b/include/asm-generic/pgtable.h @@ -550,11 +550,10 @@ static inline int pmd_none_or_trans_huge_or_clear_bad(pmd_t *pmd) #ifdef CONFIG_TRANSPARENT_HUGEPAGE barrier(); #endif - if (pmd_none(pmdval)) + if (pmd_none(pmdval) || pmd_trans_huge(pmdval)) return 1; if (unlikely(pmd_bad(pmdval))) { - if (!pmd_trans_huge(pmdval)) - pmd_clear_bad(pmd); + pmd_clear_bad(pmd); return 1; } return 0; From a713ad60cb761fcc48c2b71f798b8389852051f1 Mon Sep 17 00:00:00 2001 From: Oliver Neukum Date: Thu, 16 Jan 2014 15:37:11 +0100 Subject: [PATCH 064/277] Bluetooth: Enable Atheros 0cf3:311e for firmware upload commit b131237ca3995edad9efc162d0bc959c3b1dddc2 upstream. The device will bind to btusb without firmware, but with the original buggy firmware device discovery does not work. No devices are detected. Device descriptor without firmware: T: Bus=03 Lev=01 Prnt=01 Port=02 Cnt=01 Dev#= 2 Spd=12 MxCh= 0 D: Ver= 1.10 Cls=e0(wlcon) Sub=01 Prot=01 MxPS=64 #Cfgs= 1 P: Vendor=0cf3 ProdID=311e Rev= 0.01 C:* #Ifs= 2 Cfg#= 1 Atr=e0 MxPwr=100mA I:* If#= 0 Alt= 0 #EPs= 3 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb E: Ad=81(I) Atr=03(Int.) MxPS= 16 Ivl=1ms E: Ad=82(I) Atr=02(Bulk) MxPS= 64 Ivl=0ms E: Ad=02(O) Atr=02(Bulk) MxPS= 64 Ivl=0ms I:* If#= 1 Alt= 0 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb E: Ad=83(I) Atr=01(Isoc) MxPS= 0 Ivl=1ms E: Ad=03(O) Atr=01(Isoc) MxPS= 0 Ivl=1ms I: If#= 1 Alt= 1 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb E: Ad=83(I) Atr=01(Isoc) MxPS= 9 Ivl=1ms E: Ad=03(O) Atr=01(Isoc) MxPS= 9 Ivl=1ms I: If#= 1 Alt= 2 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb E: Ad=83(I) Atr=01(Isoc) MxPS= 17 Ivl=1ms E: Ad=03(O) Atr=01(Isoc) MxPS= 17 Ivl=1ms I: If#= 1 Alt= 3 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb E: Ad=83(I) Atr=01(Isoc) MxPS= 25 Ivl=1ms E: Ad=03(O) Atr=01(Isoc) MxPS= 25 Ivl=1ms I: If#= 1 Alt= 4 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb E: Ad=83(I) Atr=01(Isoc) MxPS= 33 Ivl=1ms E: Ad=03(O) Atr=01(Isoc) MxPS= 33 Ivl=1ms I: If#= 1 Alt= 5 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb E: Ad=83(I) Atr=01(Isoc) MxPS= 49 Ivl=1ms E: Ad=03(O) Atr=01(Isoc) MxPS= 49 Ivl=1ms with firmware: T: Bus=03 Lev=01 Prnt=01 Port=02 Cnt=01 Dev#= 3 Spd=12 MxCh= 0 D: Ver= 1.10 Cls=e0(wlcon) Sub=01 Prot=01 MxPS=64 #Cfgs= 1 P: Vendor=0cf3 ProdID=311e Rev= 0.02 C:* #Ifs= 2 Cfg#= 1 Atr=e0 MxPwr=100mA I:* If#= 0 Alt= 0 #EPs= 3 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb E: Ad=81(I) Atr=03(Int.) MxPS= 16 Ivl=1ms E: Ad=82(I) Atr=02(Bulk) MxPS= 64 Ivl=0ms E: Ad=02(O) Atr=02(Bulk) MxPS= 64 Ivl=0ms I:* If#= 1 Alt= 0 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb E: Ad=83(I) Atr=01(Isoc) MxPS= 0 Ivl=1ms E: Ad=03(O) Atr=01(Isoc) MxPS= 0 Ivl=1ms I: If#= 1 Alt= 1 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb E: Ad=83(I) Atr=01(Isoc) MxPS= 9 Ivl=1ms E: Ad=03(O) Atr=01(Isoc) MxPS= 9 Ivl=1ms I: If#= 1 Alt= 2 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb E: Ad=83(I) Atr=01(Isoc) MxPS= 17 Ivl=1ms E: Ad=03(O) Atr=01(Isoc) MxPS= 17 Ivl=1ms I: If#= 1 Alt= 3 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb E: Ad=83(I) Atr=01(Isoc) MxPS= 25 Ivl=1ms E: Ad=03(O) Atr=01(Isoc) MxPS= 25 Ivl=1ms I: If#= 1 Alt= 4 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb E: Ad=83(I) Atr=01(Isoc) MxPS= 33 Ivl=1ms E: Ad=03(O) Atr=01(Isoc) MxPS= 33 Ivl=1ms I: If#= 1 Alt= 5 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb E: Ad=83(I) Atr=01(Isoc) MxPS= 49 Ivl=1ms E: Ad=03(O) Atr=01(Isoc) MxPS= 49 Ivl=1ms Signed-off-by: Oliver Neukum Signed-off-by: Marcel Holtmann Signed-off-by: Greg Kroah-Hartman --- drivers/bluetooth/ath3k.c | 2 ++ drivers/bluetooth/btusb.c | 1 + 2 files changed, 3 insertions(+) diff --git a/drivers/bluetooth/ath3k.c b/drivers/bluetooth/ath3k.c index dad8891ecbfa..72b64567b193 100644 --- a/drivers/bluetooth/ath3k.c +++ b/drivers/bluetooth/ath3k.c @@ -77,6 +77,7 @@ static struct usb_device_id ath3k_table[] = { { USB_DEVICE(0x0CF3, 0x3004) }, { USB_DEVICE(0x0CF3, 0x3008) }, { USB_DEVICE(0x0CF3, 0x311D) }, + { USB_DEVICE(0x0CF3, 0x311E) }, { USB_DEVICE(0x0CF3, 0x817a) }, { USB_DEVICE(0x13d3, 0x3375) }, { USB_DEVICE(0x04CA, 0x3004) }, @@ -120,6 +121,7 @@ static struct usb_device_id ath3k_blist_tbl[] = { { USB_DEVICE(0x0cf3, 0x3004), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x0cf3, 0x3008), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x0cf3, 0x311D), .driver_info = BTUSB_ATH3012 }, + { USB_DEVICE(0x0cf3, 0x311E), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x0CF3, 0x817a), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x13d3, 0x3375), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x04ca, 0x3004), .driver_info = BTUSB_ATH3012 }, diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c index 61a8ec4e5f4d..0f3010122701 100644 --- a/drivers/bluetooth/btusb.c +++ b/drivers/bluetooth/btusb.c @@ -141,6 +141,7 @@ static struct usb_device_id blacklist_table[] = { { USB_DEVICE(0x0cf3, 0x3004), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x0cf3, 0x3008), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x0cf3, 0x311d), .driver_info = BTUSB_ATH3012 }, + { USB_DEVICE(0x0cf3, 0x311e), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x0cf3, 0x817a), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x13d3, 0x3375), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x04ca, 0x3004), .driver_info = BTUSB_ATH3012 }, From 3672f3f2159e90132bf827791f6960e396c81c02 Mon Sep 17 00:00:00 2001 From: Oliver Neukum Date: Thu, 16 Jan 2014 16:02:58 +0100 Subject: [PATCH 065/277] Bluetooth: Add firmware update for Atheros 0cf3:311f commit 1e56f1eb2bbeab0ddc3a1e536d2a0065cfe4c131 upstream. The device is not functional without firmware. The device without firmware: T: Bus=02 Lev=02 Prnt=02 Port=05 Cnt=01 Dev#= 3 Spd=12 MxCh= 0 D: Ver= 1.10 Cls=e0(wlcon) Sub=01 Prot=01 MxPS=64 #Cfgs= 1 P: Vendor=0cf3 ProdID=311f Rev=00.01 C: #Ifs= 2 Cfg#= 1 Atr=e0 MxPwr=100mA I: If#= 0 Alt= 0 #EPs= 3 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb I: If#= 1 Alt= 0 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb The device with firmware: T: Bus=02 Lev=02 Prnt=02 Port=05 Cnt=01 Dev#= 4 Spd=12 MxCh= 0 D: Ver= 1.10 Cls=e0(wlcon) Sub=01 Prot=01 MxPS=64 #Cfgs= 1 P: Vendor=0cf3 ProdID=3007 Rev=00.01 C: #Ifs= 2 Cfg#= 1 Atr=e0 MxPwr=100mA I: If#= 0 Alt= 0 #EPs= 3 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb I: If#= 1 Alt= 0 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb Signed-off-by: Oliver Neukum Signed-off-by: Marcel Holtmann Signed-off-by: Greg Kroah-Hartman --- drivers/bluetooth/ath3k.c | 2 ++ drivers/bluetooth/btusb.c | 1 + 2 files changed, 3 insertions(+) diff --git a/drivers/bluetooth/ath3k.c b/drivers/bluetooth/ath3k.c index 72b64567b193..9c2c4eca52e3 100644 --- a/drivers/bluetooth/ath3k.c +++ b/drivers/bluetooth/ath3k.c @@ -78,6 +78,7 @@ static struct usb_device_id ath3k_table[] = { { USB_DEVICE(0x0CF3, 0x3008) }, { USB_DEVICE(0x0CF3, 0x311D) }, { USB_DEVICE(0x0CF3, 0x311E) }, + { USB_DEVICE(0x0CF3, 0x311F) }, { USB_DEVICE(0x0CF3, 0x817a) }, { USB_DEVICE(0x13d3, 0x3375) }, { USB_DEVICE(0x04CA, 0x3004) }, @@ -122,6 +123,7 @@ static struct usb_device_id ath3k_blist_tbl[] = { { USB_DEVICE(0x0cf3, 0x3008), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x0cf3, 0x311D), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x0cf3, 0x311E), .driver_info = BTUSB_ATH3012 }, + { USB_DEVICE(0x0cf3, 0x311F), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x0CF3, 0x817a), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x13d3, 0x3375), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x04ca, 0x3004), .driver_info = BTUSB_ATH3012 }, diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c index 0f3010122701..c67c3a1a3b83 100644 --- a/drivers/bluetooth/btusb.c +++ b/drivers/bluetooth/btusb.c @@ -142,6 +142,7 @@ static struct usb_device_id blacklist_table[] = { { USB_DEVICE(0x0cf3, 0x3008), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x0cf3, 0x311d), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x0cf3, 0x311e), .driver_info = BTUSB_ATH3012 }, + { USB_DEVICE(0x0cf3, 0x311f), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x0cf3, 0x817a), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x13d3, 0x3375), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x04ca, 0x3004), .driver_info = BTUSB_ATH3012 }, From 7ac28a3240ed0035ac1d203269169da20e2fee14 Mon Sep 17 00:00:00 2001 From: Jurgen Kramer Date: Sat, 15 Feb 2014 12:01:09 +0100 Subject: [PATCH 066/277] Bluetooth: btusb: Add IMC Networks (Broadcom based) commit 9113bfd82dc8ece9cbb898df8794f58a78a36e97 upstream. Add support for IMC Networks (Broadcom based) to btusb driver. Below the output of /sys/kernel/debug/usb/devices for this device: T: Bus=01 Lev=02 Prnt=02 Port=04 Cnt=01 Dev#= 3 Spd=12 MxCh= 0 D: Ver= 2.00 Cls=ff(vend.) Sub=01 Prot=01 MxPS=64 #Cfgs= 1 P: Vendor=13d3 ProdID=3404 Rev= 1.12 S: Manufacturer=Broadcom Corp S: Product=BCM20702A0 S: SerialNumber=240A649F8246 C:* #Ifs= 4 Cfg#= 1 Atr=e0 MxPwr= 0mA I:* If#= 0 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=01 Prot=01 Driver=btusb E: Ad=81(I) Atr=03(Int.) MxPS= 16 Ivl=1ms E: Ad=82(I) Atr=02(Bulk) MxPS= 64 Ivl=0ms E: Ad=02(O) Atr=02(Bulk) MxPS= 64 Ivl=0ms I:* If#= 1 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=01 Prot=01 Driver=btusb E: Ad=83(I) Atr=01(Isoc) MxPS= 0 Ivl=1ms E: Ad=03(O) Atr=01(Isoc) MxPS= 0 Ivl=1ms I: If#= 1 Alt= 1 #EPs= 2 Cls=ff(vend.) Sub=01 Prot=01 Driver=btusb E: Ad=83(I) Atr=01(Isoc) MxPS= 9 Ivl=1ms E: Ad=03(O) Atr=01(Isoc) MxPS= 9 Ivl=1ms I: If#= 1 Alt= 2 #EPs= 2 Cls=ff(vend.) Sub=01 Prot=01 Driver=btusb E: Ad=83(I) Atr=01(Isoc) MxPS= 17 Ivl=1ms E: Ad=03(O) Atr=01(Isoc) MxPS= 17 Ivl=1ms I: If#= 1 Alt= 3 #EPs= 2 Cls=ff(vend.) Sub=01 Prot=01 Driver=btusb E: Ad=83(I) Atr=01(Isoc) MxPS= 25 Ivl=1ms E: Ad=03(O) Atr=01(Isoc) MxPS= 25 Ivl=1ms I: If#= 1 Alt= 4 #EPs= 2 Cls=ff(vend.) Sub=01 Prot=01 Driver=btusb E: Ad=83(I) Atr=01(Isoc) MxPS= 33 Ivl=1ms E: Ad=03(O) Atr=01(Isoc) MxPS= 33 Ivl=1ms I: If#= 1 Alt= 5 #EPs= 2 Cls=ff(vend.) Sub=01 Prot=01 Driver=btusb E: Ad=83(I) Atr=01(Isoc) MxPS= 49 Ivl=1ms E: Ad=03(O) Atr=01(Isoc) MxPS= 49 Ivl=1ms I:* If#= 2 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=ff Driver=(none) E: Ad=84(I) Atr=02(Bulk) MxPS= 32 Ivl=0ms E: Ad=04(O) Atr=02(Bulk) MxPS= 32 Ivl=0ms I:* If#= 3 Alt= 0 #EPs= 0 Cls=fe(app. ) Sub=01 Prot=01 Driver=(none) Signed-off-by: Jurgen Kramer Signed-off-by: Marcel Holtmann Signed-off-by: Greg Kroah-Hartman --- drivers/bluetooth/btusb.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c index c67c3a1a3b83..8d1c2f7c8ba5 100644 --- a/drivers/bluetooth/btusb.c +++ b/drivers/bluetooth/btusb.c @@ -113,6 +113,9 @@ static struct usb_device_id btusb_table[] = { /*Broadcom devices with vendor specific id */ { USB_VENDOR_AND_INTERFACE_INFO(0x0a5c, 0xff, 0x01, 0x01) }, + /* IMC Networks - Broadcom based */ + { USB_VENDOR_AND_INTERFACE_INFO(0x13d3, 0xff, 0x01, 0x01) }, + { } /* Terminating entry */ }; From 541086495d38c4d9811e93df545663c515b231e1 Mon Sep 17 00:00:00 2001 From: Marcel Holtmann Date: Sun, 6 Jul 2014 13:29:58 +0200 Subject: [PATCH 067/277] Bluetooth: Add support for Intel bootloader devices commit 40df783d1ef1989ac454e3dfcda017270b8950e6 upstream. Intel Bluetooth devices that boot up in bootloader mode can not be used as generic HCI devices, but their HCI transport is still valuable and so bring that up as raw-only devices. T: Bus=02 Lev=02 Prnt=03 Port=00 Cnt=01 Dev#= 14 Spd=12 MxCh= 0 D: Ver= 1.10 Cls=ff(vend.) Sub=00 Prot=00 MxPS=64 #Cfgs= 1 P: Vendor=8087 ProdID=0a5a Rev= 0.00 S: Manufacturer=Intel(R) Corporation S: Product=Intel(R) Wilkins Peak 2x2 S: SerialNumber=001122334455 WP_A0 C:* #Ifs= 2 Cfg#= 1 Atr=e0 MxPwr=100mA I:* If#= 0 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=(none) E: Ad=81(I) Atr=03(Int.) MxPS= 64 Ivl=1ms E: Ad=02(O) Atr=02(Bulk) MxPS= 64 Ivl=0ms E: Ad=82(I) Atr=02(Bulk) MxPS= 64 Ivl=0ms I:* If#= 1 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=00 Prot=00 Driver=(none) E: Ad=03(O) Atr=01(Isoc) MxPS= 0 Ivl=1ms E: Ad=83(I) Atr=01(Isoc) MxPS= 0 Ivl=1ms I: If#= 1 Alt= 1 #EPs= 2 Cls=ff(vend.) Sub=00 Prot=00 Driver=(none) E: Ad=03(O) Atr=01(Isoc) MxPS= 9 Ivl=1ms E: Ad=83(I) Atr=01(Isoc) MxPS= 9 Ivl=1ms I: If#= 1 Alt= 2 #EPs= 2 Cls=ff(vend.) Sub=00 Prot=00 Driver=(none) E: Ad=03(O) Atr=01(Isoc) MxPS= 17 Ivl=1ms E: Ad=83(I) Atr=01(Isoc) MxPS= 17 Ivl=1ms I: If#= 1 Alt= 3 #EPs= 2 Cls=ff(vend.) Sub=00 Prot=00 Driver=(none) E: Ad=03(O) Atr=01(Isoc) MxPS= 25 Ivl=1ms E: Ad=83(I) Atr=01(Isoc) MxPS= 25 Ivl=1ms I: If#= 1 Alt= 4 #EPs= 2 Cls=ff(vend.) Sub=00 Prot=00 Driver=(none) E: Ad=03(O) Atr=01(Isoc) MxPS= 33 Ivl=1ms E: Ad=83(I) Atr=01(Isoc) MxPS= 33 Ivl=1ms I: If#= 1 Alt= 5 #EPs= 2 Cls=ff(vend.) Sub=00 Prot=00 Driver=(none) E: Ad=03(O) Atr=01(Isoc) MxPS= 49 Ivl=1ms E: Ad=83(I) Atr=01(Isoc) MxPS= 49 Ivl=1ms Signed-off-by: Marcel Holtmann Signed-off-by: Johan Hedberg [bwh: Backported to 3.14: adjust context] Signed-off-by: Johan Hedberg Signed-off-by: Greg Kroah-Hartman --- drivers/bluetooth/btusb.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c index 8d1c2f7c8ba5..5fd1674fd15d 100644 --- a/drivers/bluetooth/btusb.c +++ b/drivers/bluetooth/btusb.c @@ -49,6 +49,7 @@ static struct usb_driver btusb_driver; #define BTUSB_WRONG_SCO_MTU 0x40 #define BTUSB_ATH3012 0x80 #define BTUSB_INTEL 0x100 +#define BTUSB_INTEL_BOOT 0x200 static struct usb_device_id btusb_table[] = { /* Generic Bluetooth USB device */ @@ -116,6 +117,9 @@ static struct usb_device_id btusb_table[] = { /* IMC Networks - Broadcom based */ { USB_VENDOR_AND_INTERFACE_INFO(0x13d3, 0xff, 0x01, 0x01) }, + /* Intel Bluetooth USB Bootloader (RAM module) */ + { USB_DEVICE(0x8087, 0x0a5a), .driver_info = BTUSB_INTEL_BOOT }, + { } /* Terminating entry */ }; @@ -1449,6 +1453,9 @@ static int btusb_probe(struct usb_interface *intf, if (id->driver_info & BTUSB_INTEL) hdev->setup = btusb_setup_intel; + if (id->driver_info & BTUSB_INTEL_BOOT) + set_bit(HCI_QUIRK_RAW_DEVICE, &hdev->quirks); + /* Interface numbers are hardcoded in the specification */ data->isoc = usb_ifnum_to_if(data->udev, 1); From 532caffeb3b1b52e07d97fc3530cf7670895880e Mon Sep 17 00:00:00 2001 From: Marcel Holtmann Date: Sun, 6 Jul 2014 14:53:55 +0200 Subject: [PATCH 068/277] Bluetooth: Ignore isochronous endpoints for Intel USB bootloader commit d92f2df0565ea04101d6ac04bdc10feeb1d93c94 upstream. The isochronous endpoints are not valid when the Intel Bluetooth controller boots up in bootloader mode. So just mark these endpoints as broken and then they will not be configured. Signed-off-by: Marcel Holtmann Signed-off-by: Johan Hedberg Signed-off-by: Greg Kroah-Hartman --- drivers/bluetooth/btusb.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c index 5fd1674fd15d..92b985317770 100644 --- a/drivers/bluetooth/btusb.c +++ b/drivers/bluetooth/btusb.c @@ -118,7 +118,8 @@ static struct usb_device_id btusb_table[] = { { USB_VENDOR_AND_INTERFACE_INFO(0x13d3, 0xff, 0x01, 0x01) }, /* Intel Bluetooth USB Bootloader (RAM module) */ - { USB_DEVICE(0x8087, 0x0a5a), .driver_info = BTUSB_INTEL_BOOT }, + { USB_DEVICE(0x8087, 0x0a5a), + .driver_info = BTUSB_INTEL_BOOT | BTUSB_BROKEN_ISOC }, { } /* Terminating entry */ }; From 752b388c92ed22e527ddb22fe137fa21095fb554 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Fri, 26 Sep 2014 11:35:42 +0200 Subject: [PATCH 069/277] netfilter: conntrack: disable generic tracking for known protocols commit db29a9508a9246e77087c5531e45b2c88ec6988b upstream. Given following iptables ruleset: -P FORWARD DROP -A FORWARD -m sctp --dport 9 -j ACCEPT -A FORWARD -p tcp --dport 80 -j ACCEPT -A FORWARD -p tcp -m conntrack -m state ESTABLISHED,RELATED -j ACCEPT One would assume that this allows SCTP on port 9 and TCP on port 80. Unfortunately, if the SCTP conntrack module is not loaded, this allows *all* SCTP communication, to pass though, i.e. -p sctp -j ACCEPT, which we think is a security issue. This is because on the first SCTP packet on port 9, we create a dummy "generic l4" conntrack entry without any port information (since conntrack doesn't know how to extract this information). All subsequent packets that are unknown will then be in established state since they will fallback to proto_generic and will match the 'generic' entry. Our originally proposed version [1] completely disabled generic protocol tracking, but Jozsef suggests to not track protocols for which a more suitable helper is available, hence we now mitigate the issue for in tree known ct protocol helpers only, so that at least NAT and direction information will still be preserved for others. [1] http://www.spinics.net/lists/netfilter-devel/msg33430.html Joint work with Daniel Borkmann. Fixes CVE-2014-8160. Signed-off-by: Florian Westphal Signed-off-by: Daniel Borkmann Acked-by: Jozsef Kadlecsik Signed-off-by: Pablo Neira Ayuso Signed-off-by: Zhiqiang Zhang Signed-off-by: Greg Kroah-Hartman --- net/netfilter/nf_conntrack_proto_generic.c | 26 +++++++++++++++++++++- 1 file changed, 25 insertions(+), 1 deletion(-) diff --git a/net/netfilter/nf_conntrack_proto_generic.c b/net/netfilter/nf_conntrack_proto_generic.c index d25f29377648..957c1db66652 100644 --- a/net/netfilter/nf_conntrack_proto_generic.c +++ b/net/netfilter/nf_conntrack_proto_generic.c @@ -14,6 +14,30 @@ static unsigned int nf_ct_generic_timeout __read_mostly = 600*HZ; +static bool nf_generic_should_process(u8 proto) +{ + switch (proto) { +#ifdef CONFIG_NF_CT_PROTO_SCTP_MODULE + case IPPROTO_SCTP: + return false; +#endif +#ifdef CONFIG_NF_CT_PROTO_DCCP_MODULE + case IPPROTO_DCCP: + return false; +#endif +#ifdef CONFIG_NF_CT_PROTO_GRE_MODULE + case IPPROTO_GRE: + return false; +#endif +#ifdef CONFIG_NF_CT_PROTO_UDPLITE_MODULE + case IPPROTO_UDPLITE: + return false; +#endif + default: + return true; + } +} + static inline struct nf_generic_net *generic_pernet(struct net *net) { return &net->ct.nf_ct_proto.generic; @@ -67,7 +91,7 @@ static int generic_packet(struct nf_conn *ct, static bool generic_new(struct nf_conn *ct, const struct sk_buff *skb, unsigned int dataoff, unsigned int *timeouts) { - return true; + return nf_generic_should_process(nf_ct_protonum(ct)); } #if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT) From 3478a33e0c6204b4368c508a1175b2c027644968 Mon Sep 17 00:00:00 2001 From: Nadav Amit Date: Thu, 1 Jan 2015 23:11:11 +0200 Subject: [PATCH 070/277] KVM: x86: SYSENTER emulation is broken commit f3747379accba8e95d70cec0eae0582c8c182050 upstream. SYSENTER emulation is broken in several ways: 1. It misses the case of 16-bit code segments completely (CVE-2015-0239). 2. MSR_IA32_SYSENTER_CS is checked in 64-bit mode incorrectly (bits 0 and 1 can still be set without causing #GP). 3. MSR_IA32_SYSENTER_EIP and MSR_IA32_SYSENTER_ESP are not masked in legacy-mode. 4. There is some unneeded code. Fix it. Signed-off-by: Nadav Amit Signed-off-by: Paolo Bonzini [zhangzhiqiang: backport to 3.10: - adjust context - in 3.10 context "ctxt->eflags &= ~(EFLG_VM | EFLG_IF | EFLG_RF)" is replaced by "ctxt->eflags &= ~(EFLG_VM | EFLG_IF)" in upstream, which was changed by another commit. - After the above adjustments, becomes same to the original patch: https://github.com/torvalds/linux/commit/f3747379accba8e95d70cec0eae0582c8c182050 ] Signed-off-by: Zhiqiang Zhang Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/emulate.c | 27 ++++++++------------------- 1 file changed, 8 insertions(+), 19 deletions(-) diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index af88fa20dbe8..ddad189e596e 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -2450,7 +2450,7 @@ static int em_sysenter(struct x86_emulate_ctxt *ctxt) * Not recognized on AMD in compat mode (but is recognized in legacy * mode). */ - if ((ctxt->mode == X86EMUL_MODE_PROT32) && (efer & EFER_LMA) + if ((ctxt->mode != X86EMUL_MODE_PROT64) && (efer & EFER_LMA) && !vendor_intel(ctxt)) return emulate_ud(ctxt); @@ -2463,25 +2463,13 @@ static int em_sysenter(struct x86_emulate_ctxt *ctxt) setup_syscalls_segments(ctxt, &cs, &ss); ops->get_msr(ctxt, MSR_IA32_SYSENTER_CS, &msr_data); - switch (ctxt->mode) { - case X86EMUL_MODE_PROT32: - if ((msr_data & 0xfffc) == 0x0) - return emulate_gp(ctxt, 0); - break; - case X86EMUL_MODE_PROT64: - if (msr_data == 0x0) - return emulate_gp(ctxt, 0); - break; - default: - break; - } + if ((msr_data & 0xfffc) == 0x0) + return emulate_gp(ctxt, 0); ctxt->eflags &= ~(EFLG_VM | EFLG_IF | EFLG_RF); - cs_sel = (u16)msr_data; - cs_sel &= ~SELECTOR_RPL_MASK; + cs_sel = (u16)msr_data & ~SELECTOR_RPL_MASK; ss_sel = cs_sel + 8; - ss_sel &= ~SELECTOR_RPL_MASK; - if (ctxt->mode == X86EMUL_MODE_PROT64 || (efer & EFER_LMA)) { + if (efer & EFER_LMA) { cs.d = 0; cs.l = 1; } @@ -2490,10 +2478,11 @@ static int em_sysenter(struct x86_emulate_ctxt *ctxt) ops->set_segment(ctxt, ss_sel, &ss, 0, VCPU_SREG_SS); ops->get_msr(ctxt, MSR_IA32_SYSENTER_EIP, &msr_data); - ctxt->_eip = msr_data; + ctxt->_eip = (efer & EFER_LMA) ? msr_data : (u32)msr_data; ops->get_msr(ctxt, MSR_IA32_SYSENTER_ESP, &msr_data); - *reg_write(ctxt, VCPU_REGS_RSP) = msr_data; + *reg_write(ctxt, VCPU_REGS_RSP) = (efer & EFER_LMA) ? msr_data : + (u32)msr_data; return X86EMUL_CONTINUE; } From f2b1b66341df87cd33cf7db7cb43c409c0fc23f5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Peter=20K=C3=BCmmel?= Date: Tue, 4 Nov 2014 12:01:59 +0100 Subject: [PATCH 071/277] =?UTF-8?q?kconfig:=20Fix=20warning=20"=E2=80=98ju?= =?UTF-8?q?mp=E2=80=99=20may=20be=20used=20uninitialized"?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 2d560306096739e2251329ab5c16059311a151b0 upstream. Warning: In file included from scripts/kconfig/zconf.tab.c:2537:0: scripts/kconfig/menu.c: In function ‘get_symbol_str’: scripts/kconfig/menu.c:590:18: warning: ‘jump’ may be used uninitialized in this function [-Wmaybe-uninitialized] jump->offset = strlen(r->s); Simplifies the test logic because (head && local) means (jump != 0) and makes GCC happy when checking if the jump pointer was initialized. Signed-off-by: Peter Kümmel Signed-off-by: Michal Marek [ dileks: v2: Backported to fit v3.10 ] Cc: Sedat Dilek Signed-off-by: Greg Kroah-Hartman --- scripts/kconfig/menu.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/kconfig/menu.c b/scripts/kconfig/menu.c index fd3f0180e08f..6af1c42a9cf3 100644 --- a/scripts/kconfig/menu.c +++ b/scripts/kconfig/menu.c @@ -525,7 +525,7 @@ static void get_prompt_str(struct gstr *r, struct property *prop, { int i, j; struct menu *submenu[8], *menu, *location = NULL; - struct jump_key *jump; + struct jump_key *jump = NULL; str_printf(r, _("Prompt: %s\n"), _(prop->text)); menu = prop->menu->parent; @@ -563,7 +563,7 @@ static void get_prompt_str(struct gstr *r, struct property *prop, str_printf(r, _(" Location:\n")); for (j = 4; --i >= 0; j += 2) { menu = submenu[i]; - if (head && location && menu == location) + if (jump && menu == location) jump->offset = r->len - 1; str_printf(r, "%*c-> %s", j, ' ', _(menu_get_prompt(menu))); From 6637ecd306a94a03dd5b8e4e8d3f260d9877c5b0 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 26 Oct 2014 19:19:16 -0400 Subject: [PATCH 072/277] move d_rcu from overlapping d_child to overlapping d_alias commit 946e51f2bf37f1656916eb75bd0742ba33983c28 upstream. Signed-off-by: Al Viro Cc: Ben Hutchings [hujianyang: Backported to 3.10 refer to the work of Ben Hutchings in 3.2: - Apply name changes in all the different places we use d_alias and d_child - Move the WARN_ON() in __d_free() to d_free() as we don't have dentry_free()] Signed-off-by: hujianyang Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/platforms/cell/spufs/inode.c | 2 +- fs/affs/amigaffs.c | 2 +- fs/autofs4/expire.c | 12 ++-- fs/autofs4/root.c | 2 +- fs/ceph/dir.c | 8 +-- fs/ceph/inode.c | 6 +- fs/cifs/inode.c | 2 +- fs/coda/cache.c | 2 +- fs/dcache.c | 72 +++++++++++------------ fs/debugfs/inode.c | 6 +- fs/exportfs/expfs.c | 2 +- fs/libfs.c | 12 ++-- fs/ncpfs/dir.c | 2 +- fs/ncpfs/ncplib_kernel.h | 4 +- fs/nfs/getroot.c | 2 +- fs/notify/fsnotify.c | 4 +- fs/ocfs2/dcache.c | 2 +- include/linux/dcache.h | 8 +-- kernel/cgroup.c | 2 +- kernel/trace/trace.c | 4 +- kernel/trace/trace_events.c | 2 +- security/selinux/selinuxfs.c | 6 +- 22 files changed, 82 insertions(+), 82 deletions(-) diff --git a/arch/powerpc/platforms/cell/spufs/inode.c b/arch/powerpc/platforms/cell/spufs/inode.c index 35f77a42bedf..c5c5788e8a13 100644 --- a/arch/powerpc/platforms/cell/spufs/inode.c +++ b/arch/powerpc/platforms/cell/spufs/inode.c @@ -164,7 +164,7 @@ static void spufs_prune_dir(struct dentry *dir) struct dentry *dentry, *tmp; mutex_lock(&dir->d_inode->i_mutex); - list_for_each_entry_safe(dentry, tmp, &dir->d_subdirs, d_u.d_child) { + list_for_each_entry_safe(dentry, tmp, &dir->d_subdirs, d_child) { spin_lock(&dentry->d_lock); if (!(d_unhashed(dentry)) && dentry->d_inode) { dget_dlock(dentry); diff --git a/fs/affs/amigaffs.c b/fs/affs/amigaffs.c index d9a43674cb94..9cca0ea4e479 100644 --- a/fs/affs/amigaffs.c +++ b/fs/affs/amigaffs.c @@ -126,7 +126,7 @@ affs_fix_dcache(struct inode *inode, u32 entry_ino) { struct dentry *dentry; spin_lock(&inode->i_lock); - hlist_for_each_entry(dentry, &inode->i_dentry, d_alias) { + hlist_for_each_entry(dentry, &inode->i_dentry, d_u.d_alias) { if (entry_ino == (u32)(long)dentry->d_fsdata) { dentry->d_fsdata = (void *)inode->i_ino; break; diff --git a/fs/autofs4/expire.c b/fs/autofs4/expire.c index 13ddec92341c..8ad277990eac 100644 --- a/fs/autofs4/expire.c +++ b/fs/autofs4/expire.c @@ -91,7 +91,7 @@ static struct dentry *get_next_positive_subdir(struct dentry *prev, spin_lock(&root->d_lock); if (prev) - next = prev->d_u.d_child.next; + next = prev->d_child.next; else { prev = dget_dlock(root); next = prev->d_subdirs.next; @@ -105,13 +105,13 @@ static struct dentry *get_next_positive_subdir(struct dentry *prev, return NULL; } - q = list_entry(next, struct dentry, d_u.d_child); + q = list_entry(next, struct dentry, d_child); spin_lock_nested(&q->d_lock, DENTRY_D_LOCK_NESTED); /* Already gone or negative dentry (under construction) - try next */ if (q->d_count == 0 || !simple_positive(q)) { spin_unlock(&q->d_lock); - next = q->d_u.d_child.next; + next = q->d_child.next; goto cont; } dget_dlock(q); @@ -161,13 +161,13 @@ static struct dentry *get_next_positive_dentry(struct dentry *prev, goto relock; } spin_unlock(&p->d_lock); - next = p->d_u.d_child.next; + next = p->d_child.next; p = parent; if (next != &parent->d_subdirs) break; } } - ret = list_entry(next, struct dentry, d_u.d_child); + ret = list_entry(next, struct dentry, d_child); spin_lock_nested(&ret->d_lock, DENTRY_D_LOCK_NESTED); /* Negative dentry - try next */ @@ -447,7 +447,7 @@ struct dentry *autofs4_expire_indirect(struct super_block *sb, spin_lock(&sbi->lookup_lock); spin_lock(&expired->d_parent->d_lock); spin_lock_nested(&expired->d_lock, DENTRY_D_LOCK_NESTED); - list_move(&expired->d_parent->d_subdirs, &expired->d_u.d_child); + list_move(&expired->d_parent->d_subdirs, &expired->d_child); spin_unlock(&expired->d_lock); spin_unlock(&expired->d_parent->d_lock); spin_unlock(&sbi->lookup_lock); diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c index 085da86e07c2..79ab4cb3590a 100644 --- a/fs/autofs4/root.c +++ b/fs/autofs4/root.c @@ -655,7 +655,7 @@ static void autofs_clear_leaf_automount_flags(struct dentry *dentry) /* only consider parents below dentrys in the root */ if (IS_ROOT(parent->d_parent)) return; - d_child = &dentry->d_u.d_child; + d_child = &dentry->d_child; /* Set parent managed if it's becoming empty */ if (d_child->next == &parent->d_subdirs && d_child->prev == &parent->d_subdirs) diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c index f02d82b7933e..ccb43298e272 100644 --- a/fs/ceph/dir.c +++ b/fs/ceph/dir.c @@ -103,7 +103,7 @@ static unsigned fpos_off(loff_t p) /* * When possible, we try to satisfy a readdir by peeking at the * dcache. We make this work by carefully ordering dentries on - * d_u.d_child when we initially get results back from the MDS, and + * d_child when we initially get results back from the MDS, and * falling back to a "normal" sync readdir if any dentries in the dir * are dropped. * @@ -139,11 +139,11 @@ static int __dcache_readdir(struct file *filp, p = parent->d_subdirs.prev; dout(" initial p %p/%p\n", p->prev, p->next); } else { - p = last->d_u.d_child.prev; + p = last->d_child.prev; } more: - dentry = list_entry(p, struct dentry, d_u.d_child); + dentry = list_entry(p, struct dentry, d_child); di = ceph_dentry(dentry); while (1) { dout(" p %p/%p %s d_subdirs %p/%p\n", p->prev, p->next, @@ -165,7 +165,7 @@ static int __dcache_readdir(struct file *filp, !dentry->d_inode ? " null" : ""); spin_unlock(&dentry->d_lock); p = p->prev; - dentry = list_entry(p, struct dentry, d_u.d_child); + dentry = list_entry(p, struct dentry, d_child); di = ceph_dentry(dentry); } diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c index be0f7e20d62e..0cf23a7b88c2 100644 --- a/fs/ceph/inode.c +++ b/fs/ceph/inode.c @@ -867,9 +867,9 @@ static void ceph_set_dentry_offset(struct dentry *dn) spin_lock(&dir->d_lock); spin_lock_nested(&dn->d_lock, DENTRY_D_LOCK_NESTED); - list_move(&dn->d_u.d_child, &dir->d_subdirs); + list_move(&dn->d_child, &dir->d_subdirs); dout("set_dentry_offset %p %lld (%p %p)\n", dn, di->offset, - dn->d_u.d_child.prev, dn->d_u.d_child.next); + dn->d_child.prev, dn->d_child.next); spin_unlock(&dn->d_lock); spin_unlock(&dir->d_lock); } @@ -1296,7 +1296,7 @@ int ceph_readdir_prepopulate(struct ceph_mds_request *req, /* reorder parent's d_subdirs */ spin_lock(&parent->d_lock); spin_lock_nested(&dn->d_lock, DENTRY_D_LOCK_NESTED); - list_move(&dn->d_u.d_child, &parent->d_subdirs); + list_move(&dn->d_child, &parent->d_subdirs); spin_unlock(&dn->d_lock); spin_unlock(&parent->d_lock); } diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c index 0dee93706c98..54304ccae7e7 100644 --- a/fs/cifs/inode.c +++ b/fs/cifs/inode.c @@ -832,7 +832,7 @@ inode_has_hashed_dentries(struct inode *inode) struct dentry *dentry; spin_lock(&inode->i_lock); - hlist_for_each_entry(dentry, &inode->i_dentry, d_alias) { + hlist_for_each_entry(dentry, &inode->i_dentry, d_u.d_alias) { if (!d_unhashed(dentry) || IS_ROOT(dentry)) { spin_unlock(&inode->i_lock); return true; diff --git a/fs/coda/cache.c b/fs/coda/cache.c index 1da168c61d35..9bc1147a6c5d 100644 --- a/fs/coda/cache.c +++ b/fs/coda/cache.c @@ -92,7 +92,7 @@ static void coda_flag_children(struct dentry *parent, int flag) struct dentry *de; spin_lock(&parent->d_lock); - list_for_each_entry(de, &parent->d_subdirs, d_u.d_child) { + list_for_each_entry(de, &parent->d_subdirs, d_child) { /* don't know what to do with negative dentries */ if (de->d_inode ) coda_flag_inode(de->d_inode, flag); diff --git a/fs/dcache.c b/fs/dcache.c index 25c0a1b5f6c0..b3cb521809e9 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -43,7 +43,7 @@ /* * Usage: * dcache->d_inode->i_lock protects: - * - i_dentry, d_alias, d_inode of aliases + * - i_dentry, d_u.d_alias, d_inode of aliases * dcache_hash_bucket lock protects: * - the dcache hash table * s_anon bl list spinlock protects: @@ -58,7 +58,7 @@ * - d_unhashed() * - d_parent and d_subdirs * - childrens' d_child and d_parent - * - d_alias, d_inode + * - d_u.d_alias, d_inode * * Ordering: * dentry->d_inode->i_lock @@ -215,7 +215,6 @@ static void __d_free(struct rcu_head *head) { struct dentry *dentry = container_of(head, struct dentry, d_u.d_rcu); - WARN_ON(!hlist_unhashed(&dentry->d_alias)); if (dname_external(dentry)) kfree(dentry->d_name.name); kmem_cache_free(dentry_cache, dentry); @@ -226,6 +225,7 @@ static void __d_free(struct rcu_head *head) */ static void d_free(struct dentry *dentry) { + WARN_ON(!hlist_unhashed(&dentry->d_u.d_alias)); BUG_ON(dentry->d_count); this_cpu_dec(nr_dentry); if (dentry->d_op && dentry->d_op->d_release) @@ -264,7 +264,7 @@ static void dentry_iput(struct dentry * dentry) struct inode *inode = dentry->d_inode; if (inode) { dentry->d_inode = NULL; - hlist_del_init(&dentry->d_alias); + hlist_del_init(&dentry->d_u.d_alias); spin_unlock(&dentry->d_lock); spin_unlock(&inode->i_lock); if (!inode->i_nlink) @@ -288,7 +288,7 @@ static void dentry_unlink_inode(struct dentry * dentry) { struct inode *inode = dentry->d_inode; dentry->d_inode = NULL; - hlist_del_init(&dentry->d_alias); + hlist_del_init(&dentry->d_u.d_alias); dentry_rcuwalk_barrier(dentry); spin_unlock(&dentry->d_lock); spin_unlock(&inode->i_lock); @@ -364,7 +364,7 @@ static struct dentry *d_kill(struct dentry *dentry, struct dentry *parent) __releases(parent->d_lock) __releases(dentry->d_inode->i_lock) { - list_del(&dentry->d_u.d_child); + list_del(&dentry->d_child); /* * Inform try_to_ascend() that we are no longer attached to the * dentry tree @@ -660,7 +660,7 @@ static struct dentry *__d_find_alias(struct inode *inode, int want_discon) again: discon_alias = NULL; - hlist_for_each_entry(alias, &inode->i_dentry, d_alias) { + hlist_for_each_entry(alias, &inode->i_dentry, d_u.d_alias) { spin_lock(&alias->d_lock); if (S_ISDIR(inode->i_mode) || !d_unhashed(alias)) { if (IS_ROOT(alias) && @@ -713,7 +713,7 @@ void d_prune_aliases(struct inode *inode) struct dentry *dentry; restart: spin_lock(&inode->i_lock); - hlist_for_each_entry(dentry, &inode->i_dentry, d_alias) { + hlist_for_each_entry(dentry, &inode->i_dentry, d_u.d_alias) { spin_lock(&dentry->d_lock); if (!dentry->d_count) { __dget_dlock(dentry); @@ -893,7 +893,7 @@ static void shrink_dcache_for_umount_subtree(struct dentry *dentry) /* descend to the first leaf in the current subtree */ while (!list_empty(&dentry->d_subdirs)) dentry = list_entry(dentry->d_subdirs.next, - struct dentry, d_u.d_child); + struct dentry, d_child); /* consume the dentries from this leaf up through its parents * until we find one with children or run out altogether */ @@ -927,17 +927,17 @@ static void shrink_dcache_for_umount_subtree(struct dentry *dentry) if (IS_ROOT(dentry)) { parent = NULL; - list_del(&dentry->d_u.d_child); + list_del(&dentry->d_child); } else { parent = dentry->d_parent; parent->d_count--; - list_del(&dentry->d_u.d_child); + list_del(&dentry->d_child); } inode = dentry->d_inode; if (inode) { dentry->d_inode = NULL; - hlist_del_init(&dentry->d_alias); + hlist_del_init(&dentry->d_u.d_alias); if (dentry->d_op && dentry->d_op->d_iput) dentry->d_op->d_iput(dentry, inode); else @@ -955,7 +955,7 @@ static void shrink_dcache_for_umount_subtree(struct dentry *dentry) } while (list_empty(&dentry->d_subdirs)); dentry = list_entry(dentry->d_subdirs.next, - struct dentry, d_u.d_child); + struct dentry, d_child); } } @@ -1048,7 +1048,7 @@ int have_submounts(struct dentry *parent) resume: while (next != &this_parent->d_subdirs) { struct list_head *tmp = next; - struct dentry *dentry = list_entry(tmp, struct dentry, d_u.d_child); + struct dentry *dentry = list_entry(tmp, struct dentry, d_child); next = tmp->next; spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED); @@ -1075,7 +1075,7 @@ int have_submounts(struct dentry *parent) this_parent = try_to_ascend(this_parent, locked, seq); if (!this_parent) goto rename_retry; - next = child->d_u.d_child.next; + next = child->d_child.next; goto resume; } spin_unlock(&this_parent->d_lock); @@ -1131,7 +1131,7 @@ static int select_parent(struct dentry *parent, struct list_head *dispose) resume: while (next != &this_parent->d_subdirs) { struct list_head *tmp = next; - struct dentry *dentry = list_entry(tmp, struct dentry, d_u.d_child); + struct dentry *dentry = list_entry(tmp, struct dentry, d_child); next = tmp->next; spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED); @@ -1182,7 +1182,7 @@ static int select_parent(struct dentry *parent, struct list_head *dispose) this_parent = try_to_ascend(this_parent, locked, seq); if (!this_parent) goto rename_retry; - next = child->d_u.d_child.next; + next = child->d_child.next; goto resume; } out: @@ -1278,8 +1278,8 @@ struct dentry *__d_alloc(struct super_block *sb, const struct qstr *name) INIT_HLIST_BL_NODE(&dentry->d_hash); INIT_LIST_HEAD(&dentry->d_lru); INIT_LIST_HEAD(&dentry->d_subdirs); - INIT_HLIST_NODE(&dentry->d_alias); - INIT_LIST_HEAD(&dentry->d_u.d_child); + INIT_HLIST_NODE(&dentry->d_u.d_alias); + INIT_LIST_HEAD(&dentry->d_child); d_set_d_op(dentry, dentry->d_sb->s_d_op); this_cpu_inc(nr_dentry); @@ -1309,7 +1309,7 @@ struct dentry *d_alloc(struct dentry * parent, const struct qstr *name) */ __dget_dlock(parent); dentry->d_parent = parent; - list_add(&dentry->d_u.d_child, &parent->d_subdirs); + list_add(&dentry->d_child, &parent->d_subdirs); spin_unlock(&parent->d_lock); return dentry; @@ -1369,7 +1369,7 @@ static void __d_instantiate(struct dentry *dentry, struct inode *inode) if (inode) { if (unlikely(IS_AUTOMOUNT(inode))) dentry->d_flags |= DCACHE_NEED_AUTOMOUNT; - hlist_add_head(&dentry->d_alias, &inode->i_dentry); + hlist_add_head(&dentry->d_u.d_alias, &inode->i_dentry); } dentry->d_inode = inode; dentry_rcuwalk_barrier(dentry); @@ -1394,7 +1394,7 @@ static void __d_instantiate(struct dentry *dentry, struct inode *inode) void d_instantiate(struct dentry *entry, struct inode * inode) { - BUG_ON(!hlist_unhashed(&entry->d_alias)); + BUG_ON(!hlist_unhashed(&entry->d_u.d_alias)); if (inode) spin_lock(&inode->i_lock); __d_instantiate(entry, inode); @@ -1433,7 +1433,7 @@ static struct dentry *__d_instantiate_unique(struct dentry *entry, return NULL; } - hlist_for_each_entry(alias, &inode->i_dentry, d_alias) { + hlist_for_each_entry(alias, &inode->i_dentry, d_u.d_alias) { /* * Don't need alias->d_lock here, because aliases with * d_parent == entry->d_parent are not subject to name or @@ -1459,7 +1459,7 @@ struct dentry *d_instantiate_unique(struct dentry *entry, struct inode *inode) { struct dentry *result; - BUG_ON(!hlist_unhashed(&entry->d_alias)); + BUG_ON(!hlist_unhashed(&entry->d_u.d_alias)); if (inode) spin_lock(&inode->i_lock); @@ -1502,7 +1502,7 @@ static struct dentry * __d_find_any_alias(struct inode *inode) if (hlist_empty(&inode->i_dentry)) return NULL; - alias = hlist_entry(inode->i_dentry.first, struct dentry, d_alias); + alias = hlist_entry(inode->i_dentry.first, struct dentry, d_u.d_alias); __dget(alias); return alias; } @@ -1576,7 +1576,7 @@ struct dentry *d_obtain_alias(struct inode *inode) spin_lock(&tmp->d_lock); tmp->d_inode = inode; tmp->d_flags |= DCACHE_DISCONNECTED; - hlist_add_head(&tmp->d_alias, &inode->i_dentry); + hlist_add_head(&tmp->d_u.d_alias, &inode->i_dentry); hlist_bl_lock(&tmp->d_sb->s_anon); hlist_bl_add_head(&tmp->d_hash, &tmp->d_sb->s_anon); hlist_bl_unlock(&tmp->d_sb->s_anon); @@ -2019,7 +2019,7 @@ int d_validate(struct dentry *dentry, struct dentry *dparent) struct dentry *child; spin_lock(&dparent->d_lock); - list_for_each_entry(child, &dparent->d_subdirs, d_u.d_child) { + list_for_each_entry(child, &dparent->d_subdirs, d_child) { if (dentry == child) { spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED); __dget_dlock(dentry); @@ -2266,8 +2266,8 @@ static void __d_move(struct dentry * dentry, struct dentry * target) /* Unhash the target: dput() will then get rid of it */ __d_drop(target); - list_del(&dentry->d_u.d_child); - list_del(&target->d_u.d_child); + list_del(&dentry->d_child); + list_del(&target->d_child); /* Switch the names.. */ switch_names(dentry, target); @@ -2277,15 +2277,15 @@ static void __d_move(struct dentry * dentry, struct dentry * target) if (IS_ROOT(dentry)) { dentry->d_parent = target->d_parent; target->d_parent = target; - INIT_LIST_HEAD(&target->d_u.d_child); + INIT_LIST_HEAD(&target->d_child); } else { swap(dentry->d_parent, target->d_parent); /* And add them back to the (new) parent lists */ - list_add(&target->d_u.d_child, &target->d_parent->d_subdirs); + list_add(&target->d_child, &target->d_parent->d_subdirs); } - list_add(&dentry->d_u.d_child, &dentry->d_parent->d_subdirs); + list_add(&dentry->d_child, &dentry->d_parent->d_subdirs); write_seqcount_end(&target->d_seq); write_seqcount_end(&dentry->d_seq); @@ -2392,9 +2392,9 @@ static void __d_materialise_dentry(struct dentry *dentry, struct dentry *anon) swap(dentry->d_name.hash, anon->d_name.hash); dentry->d_parent = dentry; - list_del_init(&dentry->d_u.d_child); + list_del_init(&dentry->d_child); anon->d_parent = dparent; - list_move(&anon->d_u.d_child, &dparent->d_subdirs); + list_move(&anon->d_child, &dparent->d_subdirs); write_seqcount_end(&dentry->d_seq); write_seqcount_end(&anon->d_seq); @@ -2933,7 +2933,7 @@ void d_genocide(struct dentry *root) resume: while (next != &this_parent->d_subdirs) { struct list_head *tmp = next; - struct dentry *dentry = list_entry(tmp, struct dentry, d_u.d_child); + struct dentry *dentry = list_entry(tmp, struct dentry, d_child); next = tmp->next; spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED); @@ -2963,7 +2963,7 @@ void d_genocide(struct dentry *root) this_parent = try_to_ascend(this_parent, locked, seq); if (!this_parent) goto rename_retry; - next = child->d_u.d_child.next; + next = child->d_child.next; goto resume; } spin_unlock(&this_parent->d_lock); diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c index 7269ec329c01..26d7fff8d78e 100644 --- a/fs/debugfs/inode.c +++ b/fs/debugfs/inode.c @@ -545,7 +545,7 @@ void debugfs_remove_recursive(struct dentry *dentry) parent = dentry; down: mutex_lock(&parent->d_inode->i_mutex); - list_for_each_entry_safe(child, next, &parent->d_subdirs, d_u.d_child) { + list_for_each_entry_safe(child, next, &parent->d_subdirs, d_child) { if (!debugfs_positive(child)) continue; @@ -566,8 +566,8 @@ void debugfs_remove_recursive(struct dentry *dentry) mutex_lock(&parent->d_inode->i_mutex); if (child != dentry) { - next = list_entry(child->d_u.d_child.next, struct dentry, - d_u.d_child); + next = list_entry(child->d_child.next, struct dentry, + d_child); goto up; } diff --git a/fs/exportfs/expfs.c b/fs/exportfs/expfs.c index 262fc9940982..b4eec4c9a790 100644 --- a/fs/exportfs/expfs.c +++ b/fs/exportfs/expfs.c @@ -50,7 +50,7 @@ find_acceptable_alias(struct dentry *result, inode = result->d_inode; spin_lock(&inode->i_lock); - hlist_for_each_entry(dentry, &inode->i_dentry, d_alias) { + hlist_for_each_entry(dentry, &inode->i_dentry, d_u.d_alias) { dget(dentry); spin_unlock(&inode->i_lock); if (toput) diff --git a/fs/libfs.c b/fs/libfs.c index 916da8c4158b..1299bd5e07b7 100644 --- a/fs/libfs.c +++ b/fs/libfs.c @@ -104,18 +104,18 @@ loff_t dcache_dir_lseek(struct file *file, loff_t offset, int whence) spin_lock(&dentry->d_lock); /* d_lock not required for cursor */ - list_del(&cursor->d_u.d_child); + list_del(&cursor->d_child); p = dentry->d_subdirs.next; while (n && p != &dentry->d_subdirs) { struct dentry *next; - next = list_entry(p, struct dentry, d_u.d_child); + next = list_entry(p, struct dentry, d_child); spin_lock_nested(&next->d_lock, DENTRY_D_LOCK_NESTED); if (simple_positive(next)) n--; spin_unlock(&next->d_lock); p = p->next; } - list_add_tail(&cursor->d_u.d_child, p); + list_add_tail(&cursor->d_child, p); spin_unlock(&dentry->d_lock); } } @@ -139,7 +139,7 @@ int dcache_readdir(struct file * filp, void * dirent, filldir_t filldir) { struct dentry *dentry = filp->f_path.dentry; struct dentry *cursor = filp->private_data; - struct list_head *p, *q = &cursor->d_u.d_child; + struct list_head *p, *q = &cursor->d_child; ino_t ino; int i = filp->f_pos; @@ -165,7 +165,7 @@ int dcache_readdir(struct file * filp, void * dirent, filldir_t filldir) for (p=q->next; p != &dentry->d_subdirs; p=p->next) { struct dentry *next; - next = list_entry(p, struct dentry, d_u.d_child); + next = list_entry(p, struct dentry, d_child); spin_lock_nested(&next->d_lock, DENTRY_D_LOCK_NESTED); if (!simple_positive(next)) { spin_unlock(&next->d_lock); @@ -289,7 +289,7 @@ int simple_empty(struct dentry *dentry) int ret = 0; spin_lock(&dentry->d_lock); - list_for_each_entry(child, &dentry->d_subdirs, d_u.d_child) { + list_for_each_entry(child, &dentry->d_subdirs, d_child) { spin_lock_nested(&child->d_lock, DENTRY_D_LOCK_NESTED); if (simple_positive(child)) { spin_unlock(&child->d_lock); diff --git a/fs/ncpfs/dir.c b/fs/ncpfs/dir.c index 6792ce11f2bf..c578ba9949e6 100644 --- a/fs/ncpfs/dir.c +++ b/fs/ncpfs/dir.c @@ -391,7 +391,7 @@ ncp_dget_fpos(struct dentry *dentry, struct dentry *parent, unsigned long fpos) spin_lock(&parent->d_lock); next = parent->d_subdirs.next; while (next != &parent->d_subdirs) { - dent = list_entry(next, struct dentry, d_u.d_child); + dent = list_entry(next, struct dentry, d_child); if ((unsigned long)dent->d_fsdata == fpos) { if (dent->d_inode) dget(dent); diff --git a/fs/ncpfs/ncplib_kernel.h b/fs/ncpfs/ncplib_kernel.h index 32c06587351a..6d5e7c56c79d 100644 --- a/fs/ncpfs/ncplib_kernel.h +++ b/fs/ncpfs/ncplib_kernel.h @@ -194,7 +194,7 @@ ncp_renew_dentries(struct dentry *parent) spin_lock(&parent->d_lock); next = parent->d_subdirs.next; while (next != &parent->d_subdirs) { - dentry = list_entry(next, struct dentry, d_u.d_child); + dentry = list_entry(next, struct dentry, d_child); if (dentry->d_fsdata == NULL) ncp_age_dentry(server, dentry); @@ -216,7 +216,7 @@ ncp_invalidate_dircache_entries(struct dentry *parent) spin_lock(&parent->d_lock); next = parent->d_subdirs.next; while (next != &parent->d_subdirs) { - dentry = list_entry(next, struct dentry, d_u.d_child); + dentry = list_entry(next, struct dentry, d_child); dentry->d_fsdata = NULL; ncp_age_dentry(server, dentry); next = next->next; diff --git a/fs/nfs/getroot.c b/fs/nfs/getroot.c index 44efaa8c5f78..0fe3ced6438c 100644 --- a/fs/nfs/getroot.c +++ b/fs/nfs/getroot.c @@ -58,7 +58,7 @@ static int nfs_superblock_set_dummy_root(struct super_block *sb, struct inode *i */ spin_lock(&sb->s_root->d_inode->i_lock); spin_lock(&sb->s_root->d_lock); - hlist_del_init(&sb->s_root->d_alias); + hlist_del_init(&sb->s_root->d_u.d_alias); spin_unlock(&sb->s_root->d_lock); spin_unlock(&sb->s_root->d_inode->i_lock); } diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c index 4bb21d67d9b1..a3153e2d0f1f 100644 --- a/fs/notify/fsnotify.c +++ b/fs/notify/fsnotify.c @@ -63,14 +63,14 @@ void __fsnotify_update_child_dentry_flags(struct inode *inode) spin_lock(&inode->i_lock); /* run all of the dentries associated with this inode. Since this is a * directory, there damn well better only be one item on this list */ - hlist_for_each_entry(alias, &inode->i_dentry, d_alias) { + hlist_for_each_entry(alias, &inode->i_dentry, d_u.d_alias) { struct dentry *child; /* run all of the children of the original inode and fix their * d_flags to indicate parental interest (their parent is the * original inode) */ spin_lock(&alias->d_lock); - list_for_each_entry(child, &alias->d_subdirs, d_u.d_child) { + list_for_each_entry(child, &alias->d_subdirs, d_child) { if (!child->d_inode) continue; diff --git a/fs/ocfs2/dcache.c b/fs/ocfs2/dcache.c index ef999729e274..ce37013b4a59 100644 --- a/fs/ocfs2/dcache.c +++ b/fs/ocfs2/dcache.c @@ -172,7 +172,7 @@ struct dentry *ocfs2_find_local_alias(struct inode *inode, struct dentry *dentry; spin_lock(&inode->i_lock); - hlist_for_each_entry(dentry, &inode->i_dentry, d_alias) { + hlist_for_each_entry(dentry, &inode->i_dentry, d_u.d_alias) { spin_lock(&dentry->d_lock); if (ocfs2_match_dentry(dentry, parent_blkno, skip_unhashed)) { trace_ocfs2_find_local_alias(dentry->d_name.len, diff --git a/include/linux/dcache.h b/include/linux/dcache.h index 9be5ac960fd8..c1999d1fe6f8 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -120,15 +120,15 @@ struct dentry { void *d_fsdata; /* fs-specific data */ struct list_head d_lru; /* LRU list */ + struct list_head d_child; /* child of parent list */ + struct list_head d_subdirs; /* our children */ /* - * d_child and d_rcu can share memory + * d_alias and d_rcu can share memory */ union { - struct list_head d_child; /* child of parent list */ + struct hlist_node d_alias; /* inode alias list */ struct rcu_head d_rcu; } d_u; - struct list_head d_subdirs; /* our children */ - struct hlist_node d_alias; /* inode alias list */ }; /* diff --git a/kernel/cgroup.c b/kernel/cgroup.c index d0def7fc2848..ef130605ac43 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -984,7 +984,7 @@ static void cgroup_d_remove_dir(struct dentry *dentry) parent = dentry->d_parent; spin_lock(&parent->d_lock); spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED); - list_del_init(&dentry->d_u.d_child); + list_del_init(&dentry->d_child); spin_unlock(&dentry->d_lock); spin_unlock(&parent->d_lock); remove_dir(dentry); diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 8d7e8098e768..640e4c44b170 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -6063,7 +6063,7 @@ static int instance_mkdir (struct inode *inode, struct dentry *dentry, umode_t m int ret; /* Paranoid: Make sure the parent is the "instances" directory */ - parent = hlist_entry(inode->i_dentry.first, struct dentry, d_alias); + parent = hlist_entry(inode->i_dentry.first, struct dentry, d_u.d_alias); if (WARN_ON_ONCE(parent != trace_instance_dir)) return -ENOENT; @@ -6090,7 +6090,7 @@ static int instance_rmdir(struct inode *inode, struct dentry *dentry) int ret; /* Paranoid: Make sure the parent is the "instances" directory */ - parent = hlist_entry(inode->i_dentry.first, struct dentry, d_alias); + parent = hlist_entry(inode->i_dentry.first, struct dentry, d_u.d_alias); if (WARN_ON_ONCE(parent != trace_instance_dir)) return -ENOENT; diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index 001b349af939..5a898f15bfc6 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -425,7 +425,7 @@ static void remove_event_file_dir(struct ftrace_event_file *file) if (dir) { spin_lock(&dir->d_lock); /* probably unneeded */ - list_for_each_entry(child, &dir->d_subdirs, d_u.d_child) { + list_for_each_entry(child, &dir->d_subdirs, d_child) { if (child->d_inode) /* probably unneeded */ child->d_inode->i_private = NULL; } diff --git a/security/selinux/selinuxfs.c b/security/selinux/selinuxfs.c index 464be51025f6..a96bed4db3e8 100644 --- a/security/selinux/selinuxfs.c +++ b/security/selinux/selinuxfs.c @@ -1190,7 +1190,7 @@ static void sel_remove_entries(struct dentry *de) spin_lock(&de->d_lock); node = de->d_subdirs.next; while (node != &de->d_subdirs) { - struct dentry *d = list_entry(node, struct dentry, d_u.d_child); + struct dentry *d = list_entry(node, struct dentry, d_child); spin_lock_nested(&d->d_lock, DENTRY_D_LOCK_NESTED); list_del_init(node); @@ -1664,12 +1664,12 @@ static void sel_remove_classes(void) list_for_each(class_node, &class_dir->d_subdirs) { struct dentry *class_subdir = list_entry(class_node, - struct dentry, d_u.d_child); + struct dentry, d_child); struct list_head *class_subdir_node; list_for_each(class_subdir_node, &class_subdir->d_subdirs) { struct dentry *d = list_entry(class_subdir_node, - struct dentry, d_u.d_child); + struct dentry, d_child); if (d->d_inode) if (d->d_inode->i_mode & S_IFDIR) From 5f03ac13d87590b0ee879c77e68df63a3d9b3e07 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 26 Oct 2014 19:31:10 -0400 Subject: [PATCH 073/277] deal with deadlock in d_walk() commit ca5358ef75fc69fee5322a38a340f5739d997c10 upstream. ... by not hitting rename_retry for reasons other than rename having happened. In other words, do _not_ restart when finding that between unlocking the child and locking the parent the former got into __dentry_kill(). Skip the killed siblings instead... Signed-off-by: Al Viro Cc: Ben Hutchings [hujianyang: Backported to 3.10 refer to the work of Ben Hutchings in 3.2: - As we only have try_to_ascend() and not d_walk(), apply this change to all callers of try_to_ascend() - Adjust context to make __dentry_kill() apply to d_kill()] Signed-off-by: hujianyang Signed-off-by: Greg Kroah-Hartman --- fs/dcache.c | 102 +++++++++++++++++++++++++++++++--------------------- 1 file changed, 62 insertions(+), 40 deletions(-) diff --git a/fs/dcache.c b/fs/dcache.c index b3cb521809e9..b1b8a7e9db0c 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -364,9 +364,9 @@ static struct dentry *d_kill(struct dentry *dentry, struct dentry *parent) __releases(parent->d_lock) __releases(dentry->d_inode->i_lock) { - list_del(&dentry->d_child); + __list_del_entry(&dentry->d_child); /* - * Inform try_to_ascend() that we are no longer attached to the + * Inform ascending readers that we are no longer attached to the * dentry tree */ dentry->d_flags |= DCACHE_DENTRY_KILLED; @@ -987,35 +987,6 @@ void shrink_dcache_for_umount(struct super_block *sb) } } -/* - * This tries to ascend one level of parenthood, but - * we can race with renaming, so we need to re-check - * the parenthood after dropping the lock and check - * that the sequence number still matches. - */ -static struct dentry *try_to_ascend(struct dentry *old, int locked, unsigned seq) -{ - struct dentry *new = old->d_parent; - - rcu_read_lock(); - spin_unlock(&old->d_lock); - spin_lock(&new->d_lock); - - /* - * might go back up the wrong parent if we have had a rename - * or deletion - */ - if (new != old->d_parent || - (old->d_flags & DCACHE_DENTRY_KILLED) || - (!locked && read_seqretry(&rename_lock, seq))) { - spin_unlock(&new->d_lock); - new = NULL; - } - rcu_read_unlock(); - return new; -} - - /* * Search for at least 1 mount point in the dentry's subdirs. * We descend to the next level whenever the d_subdirs @@ -1070,17 +1041,32 @@ int have_submounts(struct dentry *parent) /* * All done at this level ... ascend and resume the search. */ + rcu_read_lock(); +ascend: if (this_parent != parent) { struct dentry *child = this_parent; - this_parent = try_to_ascend(this_parent, locked, seq); - if (!this_parent) + this_parent = child->d_parent; + + spin_unlock(&child->d_lock); + spin_lock(&this_parent->d_lock); + + /* might go back up the wrong parent if we have had a rename. */ + if (!locked && read_seqretry(&rename_lock, seq)) goto rename_retry; next = child->d_child.next; + while (unlikely(child->d_flags & DCACHE_DENTRY_KILLED)) { + if (next == &this_parent->d_subdirs) + goto ascend; + child = list_entry(next, struct dentry, d_child); + next = next->next; + } + rcu_read_unlock(); goto resume; } - spin_unlock(&this_parent->d_lock); if (!locked && read_seqretry(&rename_lock, seq)) goto rename_retry; + spin_unlock(&this_parent->d_lock); + rcu_read_unlock(); if (locked) write_sequnlock(&rename_lock); return 0; /* No mount points found in tree */ @@ -1092,6 +1078,8 @@ int have_submounts(struct dentry *parent) return 1; rename_retry: + spin_unlock(&this_parent->d_lock); + rcu_read_unlock(); if (locked) goto again; locked = 1; @@ -1177,23 +1165,40 @@ static int select_parent(struct dentry *parent, struct list_head *dispose) /* * All done at this level ... ascend and resume the search. */ + rcu_read_lock(); +ascend: if (this_parent != parent) { struct dentry *child = this_parent; - this_parent = try_to_ascend(this_parent, locked, seq); - if (!this_parent) + this_parent = child->d_parent; + + spin_unlock(&child->d_lock); + spin_lock(&this_parent->d_lock); + + /* might go back up the wrong parent if we have had a rename. */ + if (!locked && read_seqretry(&rename_lock, seq)) goto rename_retry; next = child->d_child.next; + while (unlikely(child->d_flags & DCACHE_DENTRY_KILLED)) { + if (next == &this_parent->d_subdirs) + goto ascend; + child = list_entry(next, struct dentry, d_child); + next = next->next; + } + rcu_read_unlock(); goto resume; } out: - spin_unlock(&this_parent->d_lock); if (!locked && read_seqretry(&rename_lock, seq)) goto rename_retry; + spin_unlock(&this_parent->d_lock); + rcu_read_unlock(); if (locked) write_sequnlock(&rename_lock); return found; rename_retry: + spin_unlock(&this_parent->d_lock); + rcu_read_unlock(); if (found) return found; if (locked) @@ -2954,26 +2959,43 @@ void d_genocide(struct dentry *root) } spin_unlock(&dentry->d_lock); } + rcu_read_lock(); +ascend: if (this_parent != root) { struct dentry *child = this_parent; if (!(this_parent->d_flags & DCACHE_GENOCIDE)) { this_parent->d_flags |= DCACHE_GENOCIDE; this_parent->d_count--; } - this_parent = try_to_ascend(this_parent, locked, seq); - if (!this_parent) + this_parent = child->d_parent; + + spin_unlock(&child->d_lock); + spin_lock(&this_parent->d_lock); + + /* might go back up the wrong parent if we have had a rename. */ + if (!locked && read_seqretry(&rename_lock, seq)) goto rename_retry; next = child->d_child.next; + while (unlikely(child->d_flags & DCACHE_DENTRY_KILLED)) { + if (next == &this_parent->d_subdirs) + goto ascend; + child = list_entry(next, struct dentry, d_child); + next = next->next; + } + rcu_read_unlock(); goto resume; } - spin_unlock(&this_parent->d_lock); if (!locked && read_seqretry(&rename_lock, seq)) goto rename_retry; + spin_unlock(&this_parent->d_lock); + rcu_read_unlock(); if (locked) write_sequnlock(&rename_lock); return; rename_retry: + spin_unlock(&this_parent->d_lock); + rcu_read_unlock(); if (locked) goto again; locked = 1; From 0c42d1fbb33f7e3fc97a4854e1f9804951ebdd0d Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Thu, 29 Jan 2015 10:51:32 -0800 Subject: [PATCH 074/277] vm: add VM_FAULT_SIGSEGV handling support commit 33692f27597fcab536d7cbbcc8f52905133e4aa7 upstream. The core VM already knows about VM_FAULT_SIGBUS, but cannot return a "you should SIGSEGV" error, because the SIGSEGV case was generally handled by the caller - usually the architecture fault handler. That results in lots of duplication - all the architecture fault handlers end up doing very similar "look up vma, check permissions, do retries etc" - but it generally works. However, there are cases where the VM actually wants to SIGSEGV, and applications _expect_ SIGSEGV. In particular, when accessing the stack guard page, libsigsegv expects a SIGSEGV. And it usually got one, because the stack growth is handled by that duplicated architecture fault handler. However, when the generic VM layer started propagating the error return from the stack expansion in commit fee7e49d4514 ("mm: propagate error from stack expansion even for guard page"), that now exposed the existing VM_FAULT_SIGBUS result to user space. And user space really expected SIGSEGV, not SIGBUS. To fix that case, we need to add a VM_FAULT_SIGSEGV, and teach all those duplicate architecture fault handlers about it. They all already have the code to handle SIGSEGV, so it's about just tying that new return value to the existing code, but it's all a bit annoying. This is the mindless minimal patch to do this. A more extensive patch would be to try to gather up the mostly shared fault handling logic into one generic helper routine, and long-term we really should do that cleanup. Just from this patch, you can generally see that most architectures just copied (directly or indirectly) the old x86 way of doing things, but in the meantime that original x86 model has been improved to hold the VM semaphore for shorter times etc and to handle VM_FAULT_RETRY and other "newer" things, so it would be a good idea to bring all those improvements to the generic case and teach other architectures about them too. Reported-and-tested-by: Takashi Iwai Tested-by: Jan Engelhardt Acked-by: Heiko Carstens # "s390 still compiles and boots" Cc: linux-arch@vger.kernel.org Signed-off-by: Linus Torvalds [shengyong: Backport to 3.10 - adjust context - ignore modification for arch nios2, because 3.10 does not support it - ignore modification for driver lustre, because 3.10 does not support it - ignore VM_FAULT_FALLBACK in VM_FAULT_ERROR, becase 3.10 does not support this flag - add SIGSEGV handling to powerpc/cell spu_fault.c, because 3.10 does not separate it to copro_fault.c - add SIGSEGV handling in mm/memory.c, because 3.10 does not separate it to gup.c ] Signed-off-by: Sheng Yong Signed-off-by: Greg Kroah-Hartman --- arch/alpha/mm/fault.c | 2 ++ arch/arc/mm/fault.c | 2 ++ arch/avr32/mm/fault.c | 2 ++ arch/cris/mm/fault.c | 2 ++ arch/frv/mm/fault.c | 2 ++ arch/ia64/mm/fault.c | 2 ++ arch/m32r/mm/fault.c | 2 ++ arch/m68k/mm/fault.c | 2 ++ arch/metag/mm/fault.c | 2 ++ arch/microblaze/mm/fault.c | 2 ++ arch/mips/mm/fault.c | 2 ++ arch/mn10300/mm/fault.c | 2 ++ arch/openrisc/mm/fault.c | 2 ++ arch/parisc/mm/fault.c | 2 ++ arch/powerpc/mm/fault.c | 2 ++ arch/powerpc/platforms/cell/spu_fault.c | 2 +- arch/s390/mm/fault.c | 6 ++++++ arch/score/mm/fault.c | 2 ++ arch/sh/mm/fault.c | 2 ++ arch/sparc/mm/fault_32.c | 2 ++ arch/sparc/mm/fault_64.c | 2 ++ arch/tile/mm/fault.c | 2 ++ arch/um/kernel/trap.c | 2 ++ arch/x86/mm/fault.c | 2 ++ arch/xtensa/mm/fault.c | 2 ++ include/linux/mm.h | 5 +++-- mm/ksm.c | 2 +- mm/memory.c | 5 +++-- 28 files changed, 60 insertions(+), 6 deletions(-) diff --git a/arch/alpha/mm/fault.c b/arch/alpha/mm/fault.c index 98838a05ba6d..9d0ac091a52a 100644 --- a/arch/alpha/mm/fault.c +++ b/arch/alpha/mm/fault.c @@ -156,6 +156,8 @@ do_page_fault(unsigned long address, unsigned long mmcsr, if (unlikely(fault & VM_FAULT_ERROR)) { if (fault & VM_FAULT_OOM) goto out_of_memory; + else if (fault & VM_FAULT_SIGSEGV) + goto bad_area; else if (fault & VM_FAULT_SIGBUS) goto do_sigbus; BUG(); diff --git a/arch/arc/mm/fault.c b/arch/arc/mm/fault.c index 50533b750a99..4b70fc309c17 100644 --- a/arch/arc/mm/fault.c +++ b/arch/arc/mm/fault.c @@ -160,6 +160,8 @@ void do_page_fault(struct pt_regs *regs, int write, unsigned long address, /* TBD: switch to pagefault_out_of_memory() */ if (fault & VM_FAULT_OOM) goto out_of_memory; + else if (fault & VM_FAULT_SIGSEV) + goto bad_area; else if (fault & VM_FAULT_SIGBUS) goto do_sigbus; diff --git a/arch/avr32/mm/fault.c b/arch/avr32/mm/fault.c index 0eca93327195..d223a8b57c1e 100644 --- a/arch/avr32/mm/fault.c +++ b/arch/avr32/mm/fault.c @@ -142,6 +142,8 @@ asmlinkage void do_page_fault(unsigned long ecr, struct pt_regs *regs) if (unlikely(fault & VM_FAULT_ERROR)) { if (fault & VM_FAULT_OOM) goto out_of_memory; + else if (fault & VM_FAULT_SIGSEGV) + goto bad_area; else if (fault & VM_FAULT_SIGBUS) goto do_sigbus; BUG(); diff --git a/arch/cris/mm/fault.c b/arch/cris/mm/fault.c index 1790f22e71a2..2686a7aa8ec8 100644 --- a/arch/cris/mm/fault.c +++ b/arch/cris/mm/fault.c @@ -176,6 +176,8 @@ do_page_fault(unsigned long address, struct pt_regs *regs, if (unlikely(fault & VM_FAULT_ERROR)) { if (fault & VM_FAULT_OOM) goto out_of_memory; + else if (fault & VM_FAULT_SIGSEGV) + goto bad_area; else if (fault & VM_FAULT_SIGBUS) goto do_sigbus; BUG(); diff --git a/arch/frv/mm/fault.c b/arch/frv/mm/fault.c index 9a66372fc7c7..ec4917ddf678 100644 --- a/arch/frv/mm/fault.c +++ b/arch/frv/mm/fault.c @@ -168,6 +168,8 @@ asmlinkage void do_page_fault(int datammu, unsigned long esr0, unsigned long ear if (unlikely(fault & VM_FAULT_ERROR)) { if (fault & VM_FAULT_OOM) goto out_of_memory; + else if (fault & VM_FAULT_SIGSEGV) + goto bad_area; else if (fault & VM_FAULT_SIGBUS) goto do_sigbus; BUG(); diff --git a/arch/ia64/mm/fault.c b/arch/ia64/mm/fault.c index 7225dad87094..ba5ba7accd0d 100644 --- a/arch/ia64/mm/fault.c +++ b/arch/ia64/mm/fault.c @@ -172,6 +172,8 @@ ia64_do_page_fault (unsigned long address, unsigned long isr, struct pt_regs *re */ if (fault & VM_FAULT_OOM) { goto out_of_memory; + } else if (fault & VM_FAULT_SIGSEGV) { + goto bad_area; } else if (fault & VM_FAULT_SIGBUS) { signal = SIGBUS; goto bad_area; diff --git a/arch/m32r/mm/fault.c b/arch/m32r/mm/fault.c index e9c6a8014bd6..e3d4d4890104 100644 --- a/arch/m32r/mm/fault.c +++ b/arch/m32r/mm/fault.c @@ -200,6 +200,8 @@ asmlinkage void do_page_fault(struct pt_regs *regs, unsigned long error_code, if (unlikely(fault & VM_FAULT_ERROR)) { if (fault & VM_FAULT_OOM) goto out_of_memory; + else if (fault & VM_FAULT_SIGSEGV) + goto bad_area; else if (fault & VM_FAULT_SIGBUS) goto do_sigbus; BUG(); diff --git a/arch/m68k/mm/fault.c b/arch/m68k/mm/fault.c index eb1d61f68725..f0eef0491f77 100644 --- a/arch/m68k/mm/fault.c +++ b/arch/m68k/mm/fault.c @@ -153,6 +153,8 @@ int do_page_fault(struct pt_regs *regs, unsigned long address, if (unlikely(fault & VM_FAULT_ERROR)) { if (fault & VM_FAULT_OOM) goto out_of_memory; + else if (fault & VM_FAULT_SIGSEGV) + goto map_err; else if (fault & VM_FAULT_SIGBUS) goto bus_err; BUG(); diff --git a/arch/metag/mm/fault.c b/arch/metag/mm/fault.c index 332680e5ebf2..2de5dc695a87 100644 --- a/arch/metag/mm/fault.c +++ b/arch/metag/mm/fault.c @@ -141,6 +141,8 @@ int do_page_fault(struct pt_regs *regs, unsigned long address, if (unlikely(fault & VM_FAULT_ERROR)) { if (fault & VM_FAULT_OOM) goto out_of_memory; + else if (fault & VM_FAULT_SIGSEGV) + goto bad_area; else if (fault & VM_FAULT_SIGBUS) goto do_sigbus; BUG(); diff --git a/arch/microblaze/mm/fault.c b/arch/microblaze/mm/fault.c index fa4cf52aa7a6..d46a5ebb7570 100644 --- a/arch/microblaze/mm/fault.c +++ b/arch/microblaze/mm/fault.c @@ -224,6 +224,8 @@ void do_page_fault(struct pt_regs *regs, unsigned long address, if (unlikely(fault & VM_FAULT_ERROR)) { if (fault & VM_FAULT_OOM) goto out_of_memory; + else if (fault & VM_FAULT_SIGSEGV) + goto bad_area; else if (fault & VM_FAULT_SIGBUS) goto do_sigbus; BUG(); diff --git a/arch/mips/mm/fault.c b/arch/mips/mm/fault.c index 0214a43b9911..c40a8d1c43ba 100644 --- a/arch/mips/mm/fault.c +++ b/arch/mips/mm/fault.c @@ -157,6 +157,8 @@ asmlinkage void __kprobes do_page_fault(struct pt_regs *regs, unsigned long writ if (unlikely(fault & VM_FAULT_ERROR)) { if (fault & VM_FAULT_OOM) goto out_of_memory; + else if (fault & VM_FAULT_SIGSEGV) + goto bad_area; else if (fault & VM_FAULT_SIGBUS) goto do_sigbus; BUG(); diff --git a/arch/mn10300/mm/fault.c b/arch/mn10300/mm/fault.c index 3516cbdf1ee9..0c2cc5d39c8e 100644 --- a/arch/mn10300/mm/fault.c +++ b/arch/mn10300/mm/fault.c @@ -262,6 +262,8 @@ asmlinkage void do_page_fault(struct pt_regs *regs, unsigned long fault_code, if (unlikely(fault & VM_FAULT_ERROR)) { if (fault & VM_FAULT_OOM) goto out_of_memory; + else if (fault & VM_FAULT_SIGSEGV) + goto bad_area; else if (fault & VM_FAULT_SIGBUS) goto do_sigbus; BUG(); diff --git a/arch/openrisc/mm/fault.c b/arch/openrisc/mm/fault.c index 0703acf7d327..230ac20ae794 100644 --- a/arch/openrisc/mm/fault.c +++ b/arch/openrisc/mm/fault.c @@ -171,6 +171,8 @@ asmlinkage void do_page_fault(struct pt_regs *regs, unsigned long address, if (unlikely(fault & VM_FAULT_ERROR)) { if (fault & VM_FAULT_OOM) goto out_of_memory; + else if (fault & VM_FAULT_SIGSEGV) + goto bad_area; else if (fault & VM_FAULT_SIGBUS) goto do_sigbus; BUG(); diff --git a/arch/parisc/mm/fault.c b/arch/parisc/mm/fault.c index d10d27a720c0..c45130f56a93 100644 --- a/arch/parisc/mm/fault.c +++ b/arch/parisc/mm/fault.c @@ -220,6 +220,8 @@ void do_page_fault(struct pt_regs *regs, unsigned long code, */ if (fault & VM_FAULT_OOM) goto out_of_memory; + else if (fault & VM_FAULT_SIGSEGV) + goto bad_area; else if (fault & VM_FAULT_SIGBUS) goto bad_area; BUG(); diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c index d9196c9f93d9..d51a0c110eb4 100644 --- a/arch/powerpc/mm/fault.c +++ b/arch/powerpc/mm/fault.c @@ -425,6 +425,8 @@ int __kprobes do_page_fault(struct pt_regs *regs, unsigned long address, */ fault = handle_mm_fault(mm, vma, address, flags); if (unlikely(fault & (VM_FAULT_RETRY|VM_FAULT_ERROR))) { + if (fault & VM_FAULT_SIGSEGV) + goto bad_area; rc = mm_fault_error(regs, address, fault); if (rc >= MM_FAULT_RETURN) goto bail; diff --git a/arch/powerpc/platforms/cell/spu_fault.c b/arch/powerpc/platforms/cell/spu_fault.c index 641e7273d75a..62f3e4e48a0b 100644 --- a/arch/powerpc/platforms/cell/spu_fault.c +++ b/arch/powerpc/platforms/cell/spu_fault.c @@ -75,7 +75,7 @@ int spu_handle_mm_fault(struct mm_struct *mm, unsigned long ea, if (*flt & VM_FAULT_OOM) { ret = -ENOMEM; goto out_unlock; - } else if (*flt & VM_FAULT_SIGBUS) { + } else if (*flt & (VM_FAULT_SIGBUS | VM_FAULT_SIGSEGV)) { ret = -EFAULT; goto out_unlock; } diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c index 416facec4a33..d214321db727 100644 --- a/arch/s390/mm/fault.c +++ b/arch/s390/mm/fault.c @@ -244,6 +244,12 @@ static noinline void do_fault_error(struct pt_regs *regs, int fault) do_no_context(regs); else pagefault_out_of_memory(); + } else if (fault & VM_FAULT_SIGSEGV) { + /* Kernel mode? Handle exceptions or die */ + if (!user_mode(regs)) + do_no_context(regs); + else + do_sigsegv(regs, SEGV_MAPERR); } else if (fault & VM_FAULT_SIGBUS) { /* Kernel mode? Handle exceptions or die */ if (!user_mode(regs)) diff --git a/arch/score/mm/fault.c b/arch/score/mm/fault.c index 52238983527d..6860beb2a280 100644 --- a/arch/score/mm/fault.c +++ b/arch/score/mm/fault.c @@ -114,6 +114,8 @@ asmlinkage void do_page_fault(struct pt_regs *regs, unsigned long write, if (unlikely(fault & VM_FAULT_ERROR)) { if (fault & VM_FAULT_OOM) goto out_of_memory; + else if (fault & VM_FAULT_SIGSEGV) + goto bad_area; else if (fault & VM_FAULT_SIGBUS) goto do_sigbus; BUG(); diff --git a/arch/sh/mm/fault.c b/arch/sh/mm/fault.c index 541dc6101508..a58fec9b55e0 100644 --- a/arch/sh/mm/fault.c +++ b/arch/sh/mm/fault.c @@ -353,6 +353,8 @@ mm_fault_error(struct pt_regs *regs, unsigned long error_code, } else { if (fault & VM_FAULT_SIGBUS) do_sigbus(regs, error_code, address); + else if (fault & VM_FAULT_SIGSEGV) + bad_area(regs, error_code, address); else BUG(); } diff --git a/arch/sparc/mm/fault_32.c b/arch/sparc/mm/fault_32.c index 59dbd4645725..163c78712110 100644 --- a/arch/sparc/mm/fault_32.c +++ b/arch/sparc/mm/fault_32.c @@ -252,6 +252,8 @@ asmlinkage void do_sparc_fault(struct pt_regs *regs, int text_fault, int write, if (unlikely(fault & VM_FAULT_ERROR)) { if (fault & VM_FAULT_OOM) goto out_of_memory; + else if (fault & VM_FAULT_SIGSEGV) + goto bad_area; else if (fault & VM_FAULT_SIGBUS) goto do_sigbus; BUG(); diff --git a/arch/sparc/mm/fault_64.c b/arch/sparc/mm/fault_64.c index 3841a081beb3..ac2db923e51a 100644 --- a/arch/sparc/mm/fault_64.c +++ b/arch/sparc/mm/fault_64.c @@ -443,6 +443,8 @@ asmlinkage void __kprobes do_sparc64_fault(struct pt_regs *regs) if (unlikely(fault & VM_FAULT_ERROR)) { if (fault & VM_FAULT_OOM) goto out_of_memory; + else if (fault & VM_FAULT_SIGSEGV) + goto bad_area; else if (fault & VM_FAULT_SIGBUS) goto do_sigbus; BUG(); diff --git a/arch/tile/mm/fault.c b/arch/tile/mm/fault.c index 3ff289f422e6..12b732f593bb 100644 --- a/arch/tile/mm/fault.c +++ b/arch/tile/mm/fault.c @@ -446,6 +446,8 @@ static int handle_page_fault(struct pt_regs *regs, if (unlikely(fault & VM_FAULT_ERROR)) { if (fault & VM_FAULT_OOM) goto out_of_memory; + else if (fault & VM_FAULT_SIGSEGV) + goto bad_area; else if (fault & VM_FAULT_SIGBUS) goto do_sigbus; BUG(); diff --git a/arch/um/kernel/trap.c b/arch/um/kernel/trap.c index 5c3aef74237f..06ab0ebe0a0f 100644 --- a/arch/um/kernel/trap.c +++ b/arch/um/kernel/trap.c @@ -80,6 +80,8 @@ int handle_page_fault(unsigned long address, unsigned long ip, if (unlikely(fault & VM_FAULT_ERROR)) { if (fault & VM_FAULT_OOM) { goto out_of_memory; + } else if (fault & VM_FAULT_SIGSEGV) { + goto out; } else if (fault & VM_FAULT_SIGBUS) { err = -EACCES; goto out; diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index d8b1ff68dbb9..18bce18b2a7e 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -873,6 +873,8 @@ mm_fault_error(struct pt_regs *regs, unsigned long error_code, if (fault & (VM_FAULT_SIGBUS|VM_FAULT_HWPOISON| VM_FAULT_HWPOISON_LARGE)) do_sigbus(regs, error_code, address, fault); + else if (fault & VM_FAULT_SIGSEGV) + bad_area_nosemaphore(regs, error_code, address); else BUG(); } diff --git a/arch/xtensa/mm/fault.c b/arch/xtensa/mm/fault.c index 70fa7bc42b4a..38278337d85e 100644 --- a/arch/xtensa/mm/fault.c +++ b/arch/xtensa/mm/fault.c @@ -117,6 +117,8 @@ void do_page_fault(struct pt_regs *regs) if (unlikely(fault & VM_FAULT_ERROR)) { if (fault & VM_FAULT_OOM) goto out_of_memory; + else if (fault & VM_FAULT_SIGSEGV) + goto bad_area; else if (fault & VM_FAULT_SIGBUS) goto do_sigbus; BUG(); diff --git a/include/linux/mm.h b/include/linux/mm.h index c4085192c2b6..53b0d70120a1 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -891,6 +891,7 @@ static inline int page_mapped(struct page *page) #define VM_FAULT_WRITE 0x0008 /* Special case for get_user_pages */ #define VM_FAULT_HWPOISON 0x0010 /* Hit poisoned small page */ #define VM_FAULT_HWPOISON_LARGE 0x0020 /* Hit poisoned large page. Index encoded in upper bits */ +#define VM_FAULT_SIGSEGV 0x0040 #define VM_FAULT_NOPAGE 0x0100 /* ->fault installed the pte, not return page */ #define VM_FAULT_LOCKED 0x0200 /* ->fault locked the returned page */ @@ -898,8 +899,8 @@ static inline int page_mapped(struct page *page) #define VM_FAULT_HWPOISON_LARGE_MASK 0xf000 /* encodes hpage index for large hwpoison */ -#define VM_FAULT_ERROR (VM_FAULT_OOM | VM_FAULT_SIGBUS | VM_FAULT_HWPOISON | \ - VM_FAULT_HWPOISON_LARGE) +#define VM_FAULT_ERROR (VM_FAULT_OOM | VM_FAULT_SIGBUS | VM_FAULT_SIGSEGV | \ + VM_FAULT_HWPOISON | VM_FAULT_HWPOISON_LARGE) /* Encode hstate index for a hwpoisoned large page */ #define VM_FAULT_SET_HINDEX(x) ((x) << 12) diff --git a/mm/ksm.c b/mm/ksm.c index 784d1e4bc385..7bf748f30aab 100644 --- a/mm/ksm.c +++ b/mm/ksm.c @@ -376,7 +376,7 @@ static int break_ksm(struct vm_area_struct *vma, unsigned long addr) else ret = VM_FAULT_WRITE; put_page(page); - } while (!(ret & (VM_FAULT_WRITE | VM_FAULT_SIGBUS | VM_FAULT_OOM))); + } while (!(ret & (VM_FAULT_WRITE | VM_FAULT_SIGBUS | VM_FAULT_SIGSEGV | VM_FAULT_OOM))); /* * We must loop because handle_mm_fault() may back out if there's * any difficulty e.g. if pte accessed bit gets updated concurrently. diff --git a/mm/memory.c b/mm/memory.c index 04232bb173f0..cca323085876 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -1844,7 +1844,8 @@ long __get_user_pages(struct task_struct *tsk, struct mm_struct *mm, else return -EFAULT; } - if (ret & VM_FAULT_SIGBUS) + if (ret & (VM_FAULT_SIGBUS | + VM_FAULT_SIGSEGV)) return i ? i : -EFAULT; BUG(); } @@ -1954,7 +1955,7 @@ int fixup_user_fault(struct task_struct *tsk, struct mm_struct *mm, return -ENOMEM; if (ret & (VM_FAULT_HWPOISON | VM_FAULT_HWPOISON_LARGE)) return -EHWPOISON; - if (ret & VM_FAULT_SIGBUS) + if (ret & (VM_FAULT_SIGBUS | VM_FAULT_SIGSEGV)) return -EFAULT; BUG(); } From 1f74b26b0f118db0e658cbef2816d11d5ae0242c Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Thu, 29 Jan 2015 11:15:17 -0800 Subject: [PATCH 075/277] vm: make stack guard page errors return VM_FAULT_SIGSEGV rather than SIGBUS commit 9c145c56d0c8a0b62e48c8d71e055ad0fb2012ba upstream. The stack guard page error case has long incorrectly caused a SIGBUS rather than a SIGSEGV, but nobody actually noticed until commit fee7e49d4514 ("mm: propagate error from stack expansion even for guard page") because that error case was never actually triggered in any normal situations. Now that we actually report the error, people noticed the wrong signal that resulted. So far, only the test suite of libsigsegv seems to have actually cared, but there are real applications that use libsigsegv, so let's not wait for any of those to break. Reported-and-tested-by: Takashi Iwai Tested-by: Jan Engelhardt Acked-by: Heiko Carstens # "s390 still compiles and boots" Cc: linux-arch@vger.kernel.org Cc: stable@vger.kernel.org Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- mm/memory.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/memory.c b/mm/memory.c index cca323085876..e6b1da3a8924 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -3232,7 +3232,7 @@ static int do_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma, /* Check if we need to add a guard page to the stack */ if (check_stack_guard_page(vma, address) < 0) - return VM_FAULT_SIGBUS; + return VM_FAULT_SIGSEGV; /* Use the zero-page for reads */ if (!(flags & FAULT_FLAG_WRITE)) { From 2d892c7a1b4aa0dd42004a217c13cc90a37ae4dd Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Mon, 15 Dec 2014 14:46:06 -0800 Subject: [PATCH 076/277] x86: mm: move mmap_sem unlock from mm_fault_error() to caller commit 7fb08eca45270d0ae86e1ad9d39c40b7a55d0190 upstream. This replaces four copies in various stages of mm_fault_error() handling with just a single one. It will also allow for more natural placement of the unlocking after some further cleanup. Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- arch/x86/mm/fault.c | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index 18bce18b2a7e..e4780b052531 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -812,11 +812,8 @@ do_sigbus(struct pt_regs *regs, unsigned long error_code, unsigned long address, unsigned int fault) { struct task_struct *tsk = current; - struct mm_struct *mm = tsk->mm; int code = BUS_ADRERR; - up_read(&mm->mmap_sem); - /* Kernel mode? Handle exceptions or die: */ if (!(error_code & PF_USER)) { no_context(regs, error_code, address, SIGBUS, BUS_ADRERR); @@ -847,7 +844,6 @@ mm_fault_error(struct pt_regs *regs, unsigned long error_code, unsigned long address, unsigned int fault) { if (fatal_signal_pending(current) && !(error_code & PF_USER)) { - up_read(¤t->mm->mmap_sem); no_context(regs, error_code, address, 0, 0); return; } @@ -855,14 +851,11 @@ mm_fault_error(struct pt_regs *regs, unsigned long error_code, if (fault & VM_FAULT_OOM) { /* Kernel mode? Handle exceptions or die: */ if (!(error_code & PF_USER)) { - up_read(¤t->mm->mmap_sem); no_context(regs, error_code, address, SIGSEGV, SEGV_MAPERR); return; } - up_read(¤t->mm->mmap_sem); - /* * We ran out of memory, call the OOM killer, and return the * userspace (which will retry the fault, or kill us if we got @@ -1195,6 +1188,7 @@ __do_page_fault(struct pt_regs *regs, unsigned long error_code) return; if (unlikely(fault & VM_FAULT_ERROR)) { + up_read(&mm->mmap_sem); mm_fault_error(regs, error_code, address, fault); return; } From ebe7e30a9986c36388a5f304b10715d403a57097 Mon Sep 17 00:00:00 2001 From: Seth Jennings Date: Fri, 5 Sep 2014 14:28:47 -0500 Subject: [PATCH 077/277] sb_edac: avoid INTERNAL ERROR message in EDAC with unspecified channel commit 351fc4a99d49fde63fe5ab7412beb35c40d27269 upstream. Intel IA32 SDM Table 15-14 defines channel 0xf as 'not specified', but EDAC doesn't know about this and returns and INTERNAL ERROR when the channel is greater than NUM_CHANNELS: kernel: [ 1538.886456] CPU 0: Machine Check Exception: 0 Bank 1: 940000000000009f kernel: [ 1538.886669] TSC 2bc68b22e7e812 ADDR 46dae7000 MISC 0 PROCESSOR 0:306e4 TIME 1390414572 SOCKET 0 APIC 0 kernel: [ 1538.971948] EDAC MC1: INTERNAL ERROR: channel value is out of range (15 >= 4) kernel: [ 1538.972203] EDAC MC1: 0 CE memory read error on unknown memory (slot:0 page:0x46dae7 offset:0x0 grain:0 syndrome:0x0 - area:DRAM err_code:0000:009f socket:1 channel_mask:1 rank:0) This commit changes sb_edac to forward a channel of -1 to EDAC if the channel is not specified. edac_mc_handle_error() sets the channel to -1 internally after the error message anyway, so this commit should have no effect other than avoiding the INTERNAL ERROR message when the channel is not specified. Signed-off-by: Seth Jennings Signed-off-by: Mauro Carvalho Chehab Cc: Vinson Lee Signed-off-by: Greg Kroah-Hartman --- drivers/edac/sb_edac.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/edac/sb_edac.c b/drivers/edac/sb_edac.c index e04462b60756..f505e4ca6d58 100644 --- a/drivers/edac/sb_edac.c +++ b/drivers/edac/sb_edac.c @@ -270,8 +270,9 @@ static const u32 correrrthrsld[] = { * sbridge structs */ -#define NUM_CHANNELS 4 -#define MAX_DIMMS 3 /* Max DIMMS per channel */ +#define NUM_CHANNELS 4 +#define MAX_DIMMS 3 /* Max DIMMS per channel */ +#define CHANNEL_UNSPECIFIED 0xf /* Intel IA32 SDM 15-14 */ struct sbridge_info { u32 mcmtr; @@ -1451,6 +1452,9 @@ static void sbridge_mce_output_error(struct mem_ctl_info *mci, /* FIXME: need support for channel mask */ + if (channel == CHANNEL_UNSPECIFIED) + channel = -1; + /* Call the helper to output message */ edac_mc_handle_error(tp_event, mci, core_err_cnt, m->addr >> PAGE_SHIFT, m->addr & ~PAGE_MASK, 0, From 9e505ae71ac8c8f643498a9e8c01ace136140606 Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Thu, 29 Jan 2015 19:15:33 -0800 Subject: [PATCH 078/277] arc: mm: Fix build failure commit e262eb9381ad51b5de7a9e762ee773bbd25ce650 upstream. Fix misspelled define. Fixes: 33692f27597f ("vm: add VM_FAULT_SIGSEGV handling support") Signed-off-by: Guenter Roeck Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- arch/arc/mm/fault.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arc/mm/fault.c b/arch/arc/mm/fault.c index 4b70fc309c17..08f65bcf9130 100644 --- a/arch/arc/mm/fault.c +++ b/arch/arc/mm/fault.c @@ -160,7 +160,7 @@ void do_page_fault(struct pt_regs *regs, int write, unsigned long address, /* TBD: switch to pagefault_out_of_memory() */ if (fault & VM_FAULT_OOM) goto out_of_memory; - else if (fault & VM_FAULT_SIGSEV) + else if (fault & VM_FAULT_SIGSEGV) goto bad_area; else if (fault & VM_FAULT_SIGBUS) goto do_sigbus; From c190d250d8db5620218d5d56999580ed8488ec24 Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Wed, 11 Feb 2015 03:16:35 +0000 Subject: [PATCH 079/277] dcache: Fix locking bugs in backported "deal with deadlock in d_walk()" commit 20defcec264ceab2630356fb9d397f3d237b5e6d upstream in 3.2-stable Steven Rostedt reported: > Porting -rt to the latest 3.2 stable tree I triggered this bug: > > ===================================== > [ BUG: bad unlock balance detected! ] > ------------------------------------- > rm/1638 is trying to release lock (rcu_read_lock) at: > [] rcu_read_unlock+0x0/0x23 > but there are no more locks to release! > > other info that might help us debug this: > 2 locks held by rm/1638: > #0: (&sb->s_type->i_mutex_key#9/1){+.+.+.}, at: [] do_rmdir+0x5f/0xd2 > #1: (&sb->s_type->i_mutex_key#9){+.+.+.}, at: [] vfs_rmdir+0x49/0xac > > stack backtrace: > Pid: 1638, comm: rm Not tainted 3.2.66-test-rt96+ #2 > Call Trace: > [] ? printk+0x1d/0x1f > [] print_unlock_inbalance_bug+0xc3/0xcd > [] lock_release_non_nested+0x98/0x1ec > [] ? trace_hardirqs_off_caller+0x18/0x90 > [] ? local_clock+0x2d/0x50 > [] ? d_hash+0x2f/0x2f > [] ? d_hash+0x2f/0x2f > [] lock_release+0x192/0x1ad > [] rcu_read_unlock+0x17/0x23 > [] shrink_dcache_parent+0x227/0x270 > [] vfs_rmdir+0x68/0xac > [] do_rmdir+0x98/0xd2 > [] ? fput+0x1a3/0x1ab > [] ? sysenter_exit+0xf/0x1a > [] ? trace_hardirqs_on_caller+0x118/0x149 > [] sys_unlinkat+0x2b/0x35 > [] sysenter_do_call+0x12/0x12 > > > > > There's a path to calling rcu_read_unlock() without calling > rcu_read_lock() in have_submounts(). > > goto positive; > > positive: > if (!locked && read_seqretry(&rename_lock, seq)) > goto rename_retry; > > rename_retry: > rcu_read_unlock(); > > in the above path, rcu_read_lock() is never done before calling > rcu_read_unlock(); I reviewed locking contexts in all three functions that I changed when backporting "deal with deadlock in d_walk()". It's actually worse than this: - We don't hold this_parent->d_lock at the 'positive' label in have_submounts(), but it is unlocked after 'rename_retry'. - There is an rcu_read_unlock() after the 'out' label in select_parent(), but it's not held at the 'goto out'. Fix all three lock imbalances. Reported-by: Steven Rostedt Signed-off-by: Ben Hutchings Tested-by: Steven Rostedt Signed-off-by: Greg Kroah-Hartman --- fs/dcache.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/dcache.c b/fs/dcache.c index b1b8a7e9db0c..efa4602e064f 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -1072,7 +1072,7 @@ int have_submounts(struct dentry *parent) return 0; /* No mount points found in tree */ positive: if (!locked && read_seqretry(&rename_lock, seq)) - goto rename_retry; + goto rename_retry_unlocked; if (locked) write_sequnlock(&rename_lock); return 1; @@ -1082,6 +1082,7 @@ int have_submounts(struct dentry *parent) rcu_read_unlock(); if (locked) goto again; +rename_retry_unlocked: locked = 1; write_seqlock(&rename_lock); goto again; @@ -1146,6 +1147,7 @@ static int select_parent(struct dentry *parent, struct list_head *dispose) */ if (found && need_resched()) { spin_unlock(&dentry->d_lock); + rcu_read_lock(); goto out; } From 89bf6007dd14768791598eca6fd387b9637226eb Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 29 Apr 2015 10:34:22 +0200 Subject: [PATCH 080/277] Linux 3.10.76 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 87909d8302ad..019a6a4b386d 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,6 @@ VERSION = 3 PATCHLEVEL = 10 -SUBLEVEL = 75 +SUBLEVEL = 76 EXTRAVERSION = NAME = TOSSUG Baby Fish From 8cdfb356b51e29494ca0b9e4e86727d6f841a52d Mon Sep 17 00:00:00 2001 From: Jeff Vander Stoep Date: Wed, 29 Apr 2015 11:14:23 -0700 Subject: [PATCH 081/277] SELinux: ss: Fix policy write for ioctl operations Security server omits the type field when writing out the contents of the avtab from /sys/fs/selinux/policy. This leads to a corrupt output. No impact on the running kernel or its loaded policy. Impacts CTS neverallow tests. Bug: 20665861 Change-Id: I657e18013dd5a1f40052bc2b02dd8e0afee9bcfb Signed-off-by: Jeff Vander Stoep --- security/selinux/ss/avtab.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/security/selinux/ss/avtab.c b/security/selinux/ss/avtab.c index 2e4ff003abcd..dd7466cb2021 100644 --- a/security/selinux/ss/avtab.c +++ b/security/selinux/ss/avtab.c @@ -565,6 +565,9 @@ int avtab_write_item(struct policydb *p, struct avtab_node *cur, void *fp) return rc; if (cur->key.specified & AVTAB_OP) { + rc = put_entry(&cur->datum.u.ops->type, sizeof(u8), 1, fp); + if (rc) + return rc; for (i = 0; i < ARRAY_SIZE(cur->datum.u.ops->op.perms); i++) buf32[i] = cpu_to_le32(cur->datum.u.ops->op.perms[i]); rc = put_entry(buf32, sizeof(u32), From 5e87fbde59e896a85bfdfd3970b407961b48d9d1 Mon Sep 17 00:00:00 2001 From: Yuvaraj Kumar C D Date: Wed, 18 Sep 2013 15:41:53 +0530 Subject: [PATCH 082/277] ARM: exynos: dts: Update 5250 arch timer node with clock frequency Without the "clock-frequency" property in arch timer node, could able to see the below crash dump. [] (unwind_backtrace+0x0/0xf4) from [] (show_stack+0x10/0x14) [] (show_stack+0x10/0x14) from [] (dump_stack+0x7c/0xb0) [] (dump_stack+0x7c/0xb0) from [] (Ldiv0_64+0x8/0x18) [] (Ldiv0_64+0x8/0x18) from [] (clockevents_config.part.2+0x1c/0x74) [] (clockevents_config.part.2+0x1c/0x74) from [] (clockevents_config_and_register+0x20/0x2c) [] (clockevents_config_and_register+0x20/0x2c) from [] (arch_timer_setup+0xa8/0x134) [] (arch_timer_setup+0xa8/0x134) from [] (arch_timer_init+0x1f4/0x24c) [] (arch_timer_init+0x1f4/0x24c) from [] (clocksource_of_init+0x34/0x58) [] (clocksource_of_init+0x34/0x58) from [] (time_init+0x20/0x2c) [] (time_init+0x20/0x2c) from [] (start_kernel+0x1e0/0x39c) THis is because the Exynos u-boot, for example on the Chromebooks, doesn't set up the CNTFRQ register as expected by arch_timer. Instead, we have to specify the frequency in the device tree like this. Signed-off-by: Yuvaraj Kumar C D [olof: Changed subject, added comment, elaborated on commit message] Signed-off-by: Olof Johansson (cherry picked from commit 4d594dd3028ba8cdfcbd854bde3811a1ee4e36d7) Signed-off-by: Kevin Hilman --- arch/arm/boot/dts/exynos5250.dtsi | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/arch/arm/boot/dts/exynos5250.dtsi b/arch/arm/boot/dts/exynos5250.dtsi index fc9fb3d526e2..335f1e8bd9f6 100644 --- a/arch/arm/boot/dts/exynos5250.dtsi +++ b/arch/arm/boot/dts/exynos5250.dtsi @@ -89,6 +89,11 @@ timer { <1 14 0xf08>, <1 11 0xf08>, <1 10 0xf08>; + /* Unfortunately we need this since some versions of U-Boot + * on Exynos don't set the CNTFRQ register, so we need the + * value from DT. + */ + clock-frequency = <24000000>; }; combiner:interrupt-controller@10440000 { From 8aa01b2818461745950731539897ab057b54da2a Mon Sep 17 00:00:00 2001 From: Greg Hackmann Date: Thu, 30 Apr 2015 11:02:44 -0700 Subject: [PATCH 083/277] usb: gadget: add audio dependencies to USB_G_ANDROID Since the Android gadget is a superset of the audio and MIDI gadgets, it needs to take on their dependencies too Change-Id: Ib7444962dcdb197e8b7ad66f7a41f7bc40879d2c Signed-off-by: Greg Hackmann --- drivers/usb/gadget/Kconfig | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/usb/gadget/Kconfig b/drivers/usb/gadget/Kconfig index 67409fda70db..9de2eb28085e 100644 --- a/drivers/usb/gadget/Kconfig +++ b/drivers/usb/gadget/Kconfig @@ -820,6 +820,9 @@ config USB_G_PRINTER config USB_G_ANDROID boolean "Android Composite Gadget" + depends on SND + select SND_PCM + select SND_RAWMIDI select USB_F_ACM select USB_LIBCOMPOSITE select USB_U_SERIAL From d71efcedaad9d859a33ca6fa99f3fb548a7c27e4 Mon Sep 17 00:00:00 2001 From: Greg Hackmann Date: Thu, 30 Apr 2015 11:05:21 -0700 Subject: [PATCH 084/277] android: base-cfg: add ALSA base-cfg enables USB gadget support, which includes USB audio, which depends on ALSA Change-Id: Icc9a80bebb7e838715d679a096faba761a937333 Signed-off-by: Greg Hackmann --- android/configs/android-base.cfg | 2 ++ 1 file changed, 2 insertions(+) diff --git a/android/configs/android-base.cfg b/android/configs/android-base.cfg index 765f36038eda..1bee5d614d1b 100644 --- a/android/configs/android-base.cfg +++ b/android/configs/android-base.cfg @@ -138,6 +138,8 @@ CONFIG_RT_GROUP_SCHED=y CONFIG_SECURITY=y CONFIG_SECURITY_NETWORK=y CONFIG_SECURITY_SELINUX=y +CONFIG_SND=y +CONFIG_SOUND=y CONFIG_STAGING=y CONFIG_SWITCH=y CONFIG_SYNC=y From eaa478ab44cd84a788dee5f77ef3171ce83d04cf Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Fri, 1 May 2015 22:02:47 -0400 Subject: [PATCH 085/277] ipv4: Missing sk_nulls_node_init() in ping_unhash(). If we don't do that, then the poison value is left in the ->pprev backlink. This can cause crashes if we do a disconnect, followed by a connect(). Tested-by: Linus Torvalds Reported-by: Wen Xu Signed-off-by: David S. Miller Bug: 20770158 Change-Id: I944eb20fddea190892c2da681d934801d268096b --- net/ipv4/ping.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c index 8642f0044dfa..32d894fa1071 100644 --- a/net/ipv4/ping.c +++ b/net/ipv4/ping.c @@ -154,6 +154,7 @@ void ping_unhash(struct sock *sk) if (sk_hashed(sk)) { write_lock_bh(&ping_table.lock); hlist_nulls_del(&sk->sk_nulls_node); + sk_nulls_node_init(&sk->sk_nulls_node); sock_put(sk); isk->inet_num = 0; isk->inet_sport = 0; From 98fc0d41be3806ec5ffa160692a39e90b76898f0 Mon Sep 17 00:00:00 2001 From: Colin Cross Date: Wed, 22 Apr 2015 14:38:42 -0700 Subject: [PATCH 086/277] android: fiq_debugger: fix cut-off help message fiq_debugger_printf has a 256 byte limit, which was causing the help lines for "kmsg" and "version" to be dropped. Split the long string into two calls. Change-Id: I55f9f030247cc16d13ae6236736311a5ef0c7aa0 Signed-off-by: Colin Cross --- drivers/staging/android/fiq_debugger/fiq_debugger.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/staging/android/fiq_debugger/fiq_debugger.c b/drivers/staging/android/fiq_debugger/fiq_debugger.c index 7d6b4ae8a2cd..1d733624d70a 100644 --- a/drivers/staging/android/fiq_debugger/fiq_debugger.c +++ b/drivers/staging/android/fiq_debugger/fiq_debugger.c @@ -429,7 +429,8 @@ static void fiq_debugger_help(struct fiq_debugger_state *state) " pc PC status\n" " regs Register dump\n" " allregs Extended Register dump\n" - " bt Stack trace\n" + " bt Stack trace\n"); + fiq_debugger_printf(&state->output, " reboot [] Reboot with command \n" " reset [] Hard reset with command \n" " irqs Interupt status\n" From fb138c4699c94b16bf9400394142c6d86fe70226 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sebastian=20P=C3=B6hn?= Date: Mon, 20 Apr 2015 09:19:20 +0200 Subject: [PATCH 087/277] ip_forward: Drop frames with attached skb->sk [ Upstream commit 2ab957492d13bb819400ac29ae55911d50a82a13 ] Initial discussion was: [FYI] xfrm: Don't lookup sk_policy for timewait sockets Forwarded frames should not have a socket attached. Especially tw sockets will lead to panics later-on in the stack. This was observed with TPROXY assigning a tw socket and broken policy routing (misconfigured). As a result frame enters forwarding path instead of input. We cannot solve this in TPROXY as it cannot know that policy routing is broken. v2: Remove useless comment Signed-off-by: Sebastian Poehn Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/ip_forward.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/ipv4/ip_forward.c b/net/ipv4/ip_forward.c index 31ee5c6033df..479e8a63125a 100644 --- a/net/ipv4/ip_forward.c +++ b/net/ipv4/ip_forward.c @@ -126,6 +126,9 @@ int ip_forward(struct sk_buff *skb) struct rtable *rt; /* Route we use */ struct ip_options *opt = &(IPCB(skb)->opt); + if (unlikely(skb->sk)) + goto drop; + if (skb_warn_if_lro(skb)) goto drop; From aac9fda375bbe75e0b8c033874af25e0f7f5a3a4 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 21 Apr 2015 18:32:24 -0700 Subject: [PATCH 088/277] tcp: fix possible deadlock in tcp_send_fin() [ Upstream commit d83769a580f1132ac26439f50068a29b02be535e ] Using sk_stream_alloc_skb() in tcp_send_fin() is dangerous in case a huge process is killed by OOM, and tcp_mem[2] is hit. To be able to free memory we need to make progress, so this patch allows FIN packets to not care about tcp_mem[2], if skb allocation succeeded. In a follow-up patch, we might abort tcp_send_fin() infinite loop in case TIF_MEMDIE is set on this thread, as memory allocator did its best getting extra memory already. This patch reverts d22e15371811 ("tcp: fix tcp fin memory accounting") Fixes: d22e15371811 ("tcp: fix tcp fin memory accounting") Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/tcp_output.c | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 7681a1bbd97f..546371973b9a 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -2571,6 +2571,21 @@ void tcp_xmit_retransmit_queue(struct sock *sk) } } +/* We allow to exceed memory limits for FIN packets to expedite + * connection tear down and (memory) recovery. + * Otherwise tcp_send_fin() could loop forever. + */ +static void sk_forced_wmem_schedule(struct sock *sk, int size) +{ + int amt, status; + + if (size <= sk->sk_forward_alloc) + return; + amt = sk_mem_pages(size); + sk->sk_forward_alloc += amt * SK_MEM_QUANTUM; + sk_memory_allocated_add(sk, amt, &status); +} + /* Send a fin. The caller locks the socket for us. This cannot be * allowed to fail queueing a FIN frame under any circumstances. */ @@ -2593,11 +2608,14 @@ void tcp_send_fin(struct sock *sk) } else { /* Socket is locked, keep trying until memory is available. */ for (;;) { - skb = sk_stream_alloc_skb(sk, 0, sk->sk_allocation); + skb = alloc_skb_fclone(MAX_TCP_HEADER, + sk->sk_allocation); if (skb) break; yield(); } + skb_reserve(skb, MAX_TCP_HEADER); + sk_forced_wmem_schedule(sk, skb->truesize); /* FIN eats a sequence byte, write_seq advanced by tcp_queue_skb(). */ tcp_init_nondata_skb(skb, tp->write_seq, TCPHDR_ACK | TCPHDR_FIN); From bc9f0ea1c736358f2db263ee2afe5af5a9dfcf2a Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 23 Apr 2015 10:42:39 -0700 Subject: [PATCH 089/277] tcp: avoid looping in tcp_send_fin() [ Upstream commit 845704a535e9b3c76448f52af1b70e4422ea03fd ] Presence of an unbound loop in tcp_send_fin() had always been hard to explain when analyzing crash dumps involving gigantic dying processes with millions of sockets. Lets try a different strategy : In case of memory pressure, try to add the FIN flag to last packet in write queue, even if packet was already sent. TCP stack will be able to deliver this FIN after a timeout event. Note that this FIN being delivered by a retransmit, it also carries a Push flag given our current implementation. By checking sk_under_memory_pressure(), we anticipate that cooking many FIN packets might deplete tcp memory. In the case we could not allocate a packet, even with __GFP_WAIT allocation, then not sending a FIN seems quite reasonable if it allows to get rid of this socket, free memory, and not block the process from eventually doing other useful work. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/tcp_output.c | 50 +++++++++++++++++++++++++------------------ 1 file changed, 29 insertions(+), 21 deletions(-) diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 546371973b9a..76c80b59e80f 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -2573,7 +2573,8 @@ void tcp_xmit_retransmit_queue(struct sock *sk) /* We allow to exceed memory limits for FIN packets to expedite * connection tear down and (memory) recovery. - * Otherwise tcp_send_fin() could loop forever. + * Otherwise tcp_send_fin() could be tempted to either delay FIN + * or even be forced to close flow without any FIN. */ static void sk_forced_wmem_schedule(struct sock *sk, int size) { @@ -2586,33 +2587,40 @@ static void sk_forced_wmem_schedule(struct sock *sk, int size) sk_memory_allocated_add(sk, amt, &status); } -/* Send a fin. The caller locks the socket for us. This cannot be - * allowed to fail queueing a FIN frame under any circumstances. +/* Send a FIN. The caller locks the socket for us. + * We should try to send a FIN packet really hard, but eventually give up. */ void tcp_send_fin(struct sock *sk) { + struct sk_buff *skb, *tskb = tcp_write_queue_tail(sk); struct tcp_sock *tp = tcp_sk(sk); - struct sk_buff *skb = tcp_write_queue_tail(sk); - int mss_now; - /* Optimization, tack on the FIN if we have a queue of - * unsent frames. But be careful about outgoing SACKS - * and IP options. + /* Optimization, tack on the FIN if we have one skb in write queue and + * this skb was not yet sent, or we are under memory pressure. + * Note: in the latter case, FIN packet will be sent after a timeout, + * as TCP stack thinks it has already been transmitted. */ - mss_now = tcp_current_mss(sk); - - if (tcp_send_head(sk) != NULL) { - TCP_SKB_CB(skb)->tcp_flags |= TCPHDR_FIN; - TCP_SKB_CB(skb)->end_seq++; + if (tskb && (tcp_send_head(sk) || sk_under_memory_pressure(sk))) { +coalesce: + TCP_SKB_CB(tskb)->tcp_flags |= TCPHDR_FIN; + TCP_SKB_CB(tskb)->end_seq++; tp->write_seq++; + if (!tcp_send_head(sk)) { + /* This means tskb was already sent. + * Pretend we included the FIN on previous transmit. + * We need to set tp->snd_nxt to the value it would have + * if FIN had been sent. This is because retransmit path + * does not change tp->snd_nxt. + */ + tp->snd_nxt++; + return; + } } else { - /* Socket is locked, keep trying until memory is available. */ - for (;;) { - skb = alloc_skb_fclone(MAX_TCP_HEADER, - sk->sk_allocation); - if (skb) - break; - yield(); + skb = alloc_skb_fclone(MAX_TCP_HEADER, sk->sk_allocation); + if (unlikely(!skb)) { + if (tskb) + goto coalesce; + return; } skb_reserve(skb, MAX_TCP_HEADER); sk_forced_wmem_schedule(sk, skb->truesize); @@ -2621,7 +2629,7 @@ void tcp_send_fin(struct sock *sk) TCPHDR_ACK | TCPHDR_FIN); tcp_queue_skb(sk, skb); } - __tcp_push_pending_frames(sk, mss_now, TCP_NAGLE_OFF); + __tcp_push_pending_frames(sk, tcp_current_mss(sk), TCP_NAGLE_OFF); } /* We get here when a process closes a file descriptor (either due to From bf339141f604388ace1c3f97b9297683f45170dd Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Mon, 23 Mar 2015 14:07:40 +0000 Subject: [PATCH 090/277] Btrfs: fix log tree corruption when fs mounted with -o discard commit dcc82f4783ad91d4ab654f89f37ae9291cdc846a upstream. While committing a transaction we free the log roots before we write the new super block. Freeing the log roots implies marking the disk location of every node/leaf (metadata extent) as pinned before the new super block is written. This is to prevent the disk location of log metadata extents from being reused before the new super block is written, otherwise we would have a corrupted log tree if before the new super block is written a crash/reboot happens and the location of any log tree metadata extent ended up being reused and rewritten. Even though we pinned the log tree's metadata extents, we were issuing a discard against them if the fs was mounted with the -o discard option, resulting in corruption of the log tree if a crash/reboot happened before writing the new super block - the next time the fs was mounted, during the log replay process we would find nodes/leafs of the log btree with a content full of zeroes, causing the process to fail and require the use of the tool btrfs-zero-log to wipeout the log tree (and all data previously fsynced becoming lost forever). Fix this by not doing a discard when pinning an extent. The discard will be done later when it's safe (after the new super block is committed) at extent-tree.c:btrfs_finish_extent_commit(). Fixes: e688b7252f78 (Btrfs: fix extent pinning bugs in the tree log) Signed-off-by: Filipe Manana Signed-off-by: Chris Mason Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/extent-tree.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index f99c71e40f8b..07f167a1d271 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -6363,12 +6363,11 @@ static int __btrfs_free_reserved_extent(struct btrfs_root *root, return -ENOSPC; } - if (btrfs_test_opt(root, DISCARD)) - ret = btrfs_discard_extent(root, start, len, NULL); - if (pin) pin_down_extent(root, cache, start, len, 1); else { + if (btrfs_test_opt(root, DISCARD)) + ret = btrfs_discard_extent(root, start, len, NULL); btrfs_add_free_space(cache, start, len); btrfs_update_reserved_bytes(cache, len, RESERVE_FREE); } From 6073c4162289fcae16b359f8e105d60343a209ca Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Mon, 30 Mar 2015 18:23:59 +0100 Subject: [PATCH 091/277] Btrfs: fix inode eviction infinite loop after cloning into it commit ccccf3d67294714af2d72a6fd6fd7d73b01c9329 upstream. If we attempt to clone a 0 length region into a file we can end up inserting a range in the inode's extent_io tree with a start offset that is greater then the end offset, which triggers immediately the following warning: [ 3914.619057] WARNING: CPU: 17 PID: 4199 at fs/btrfs/extent_io.c:435 insert_state+0x4b/0x10b [btrfs]() [ 3914.620886] BTRFS: end < start 4095 4096 (...) [ 3914.638093] Call Trace: [ 3914.638636] [] dump_stack+0x4c/0x65 [ 3914.639620] [] warn_slowpath_common+0xa1/0xbb [ 3914.640789] [] ? insert_state+0x4b/0x10b [btrfs] [ 3914.642041] [] warn_slowpath_fmt+0x46/0x48 [ 3914.643236] [] insert_state+0x4b/0x10b [btrfs] [ 3914.644441] [] __set_extent_bit+0x107/0x3f4 [btrfs] [ 3914.645711] [] lock_extent_bits+0x65/0x1bf [btrfs] [ 3914.646914] [] ? _raw_spin_unlock+0x28/0x33 [ 3914.648058] [] ? test_range_bit+0xcc/0xde [btrfs] [ 3914.650105] [] lock_extent+0x13/0x15 [btrfs] [ 3914.651361] [] lock_extent_range+0x3d/0xcd [btrfs] [ 3914.652761] [] btrfs_ioctl_clone+0x278/0x388 [btrfs] [ 3914.654128] [] ? might_fault+0x58/0xb5 [ 3914.655320] [] btrfs_ioctl+0xb51/0x2195 [btrfs] (...) [ 3914.669271] ---[ end trace 14843d3e2e622fc1 ]--- This later makes the inode eviction handler enter an infinite loop that keeps dumping the following warning over and over: [ 3915.117629] WARNING: CPU: 22 PID: 4228 at fs/btrfs/extent_io.c:435 insert_state+0x4b/0x10b [btrfs]() [ 3915.119913] BTRFS: end < start 4095 4096 (...) [ 3915.137394] Call Trace: [ 3915.137913] [] dump_stack+0x4c/0x65 [ 3915.139154] [] warn_slowpath_common+0xa1/0xbb [ 3915.140316] [] ? insert_state+0x4b/0x10b [btrfs] [ 3915.141505] [] warn_slowpath_fmt+0x46/0x48 [ 3915.142709] [] insert_state+0x4b/0x10b [btrfs] [ 3915.143849] [] __set_extent_bit+0x107/0x3f4 [btrfs] [ 3915.145120] [] ? btrfs_kill_super+0x17/0x23 [btrfs] [ 3915.146352] [] ? deactivate_locked_super+0x3b/0x50 [ 3915.147565] [] lock_extent_bits+0x65/0x1bf [btrfs] [ 3915.148785] [] ? _raw_write_unlock+0x28/0x33 [ 3915.149931] [] btrfs_evict_inode+0x196/0x482 [btrfs] [ 3915.151154] [] evict+0xa0/0x148 [ 3915.152094] [] dispose_list+0x39/0x43 [ 3915.153081] [] evict_inodes+0xdc/0xeb [ 3915.154062] [] generic_shutdown_super+0x49/0xef [ 3915.155193] [] kill_anon_super+0x13/0x1e [ 3915.156274] [] btrfs_kill_super+0x17/0x23 [btrfs] (...) [ 3915.167404] ---[ end trace 14843d3e2e622fc2 ]--- So just bail out of the clone ioctl if the length of the region to clone is zero, without locking any extent range, in order to prevent this issue (same behaviour as a pwrite with a 0 length for example). This is trivial to reproduce. For example, the steps for the test I just made for fstests: mkfs.btrfs -f SCRATCH_DEV mount SCRATCH_DEV $SCRATCH_MNT touch $SCRATCH_MNT/foo touch $SCRATCH_MNT/bar $CLONER_PROG -s 0 -d 4096 -l 0 $SCRATCH_MNT/foo $SCRATCH_MNT/bar umount $SCRATCH_MNT A test case for fstests follows soon. Signed-off-by: Filipe Manana Reviewed-by: Omar Sandoval Signed-off-by: Chris Mason Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/ioctl.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 783906c687b5..dbefa6c609f4 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -2572,6 +2572,11 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd, if (off + len == src->i_size) len = ALIGN(src->i_size, bs) - off; + if (len == 0) { + ret = 0; + goto out_unlock; + } + /* verify the end result is block aligned */ if (!IS_ALIGNED(off, bs) || !IS_ALIGNED(off + len, bs) || !IS_ALIGNED(destoff, bs)) From 3518a2e4bbe8c336d3f59138d1f7bdd628a841b3 Mon Sep 17 00:00:00 2001 From: Felipe Balbi Date: Tue, 30 Sep 2014 16:08:03 -0500 Subject: [PATCH 092/277] usb: gadget: composite: enable BESL support commit a6615937bcd9234e6d6bb817c3701fce44d0a84d upstream. According to USB 2.0 ECN Errata for Link Power Management (USB2-LPM-Errata-final.pdf), BESL must be enabled if LPM is enabled. This helps with USB30CV TD 9.21 LPM L1 Suspend Resume Test. Signed-off-by: Felipe Balbi Signed-off-by: Du, Changbin Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/composite.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/gadget/composite.c b/drivers/usb/gadget/composite.c index 44a292b75012..a660716f9331 100644 --- a/drivers/usb/gadget/composite.c +++ b/drivers/usb/gadget/composite.c @@ -528,7 +528,7 @@ static int bos_desc(struct usb_composite_dev *cdev) usb_ext->bLength = USB_DT_USB_EXT_CAP_SIZE; usb_ext->bDescriptorType = USB_DT_DEVICE_CAPABILITY; usb_ext->bDevCapabilityType = USB_CAP_TYPE_EXT; - usb_ext->bmAttributes = cpu_to_le32(USB_LPM_SUPPORT); + usb_ext->bmAttributes = cpu_to_le32(USB_LPM_SUPPORT | USB_BESL_SUPPORT); /* * The Superspeed USB Capability descriptor shall be implemented by all From 38db3faa86115d76cc9df29b3706cb90d6583a77 Mon Sep 17 00:00:00 2001 From: Ekaterina Tumanova Date: Tue, 3 Mar 2015 09:54:41 +0100 Subject: [PATCH 093/277] KVM: s390: Zero out current VMDB of STSI before including level3 data. commit b75f4c9afac2604feb971441116c07a24ecca1ec upstream. s390 documentation requires words 0 and 10-15 to be reserved and stored as zeros. As we fill out all other fields, we can memset the full structure. Signed-off-by: Ekaterina Tumanova Reviewed-by: David Hildenbrand Signed-off-by: Christian Borntraeger Signed-off-by: Greg Kroah-Hartman --- arch/s390/kvm/priv.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c index 6bbd7b5a0bbe..0220c2ba7590 100644 --- a/arch/s390/kvm/priv.c +++ b/arch/s390/kvm/priv.c @@ -328,6 +328,7 @@ static void handle_stsi_3_2_2(struct kvm_vcpu *vcpu, struct sysinfo_3_2_2 *mem) for (n = mem->count - 1; n > 0 ; n--) memcpy(&mem->vm[n], &mem->vm[n - 1], sizeof(mem->vm[0])); + memset(&mem->vm[0], 0, sizeof(mem->vm[0])); mem->vm[0].cpus_total = cpus; mem->vm[0].cpus_configured = cpus; mem->vm[0].cpus_standby = 0; From 793d5cc7015f5ccea3114523471188ece3958904 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Wed, 25 Mar 2015 10:13:33 +0100 Subject: [PATCH 094/277] s390/hibernate: fix save and restore of kernel text section commit d74419495633493c9cd3f2bbeb7f3529d0edded6 upstream. Sebastian reported a crash caused by a jump label mismatch after resume. This happens because we do not save the kernel text section during suspend and therefore also do not restore it during resume, but use the kernel image that restores the old system. This means that after a suspend/resume cycle we lost all modifications done to the kernel text section. The reason for this is the pfn_is_nosave() function, which incorrectly returns that read-only pages don't need to be saved. This is incorrect since we mark the kernel text section read-only. We still need to make sure to not save and restore pages contained within NSS and DCSS segment. To fix this add an extra case for the kernel text section and only save those pages if they are not contained within an NSS segment. Fixes the following crash (and the above bugs as well): Jump label code mismatch at netif_receive_skb_internal+0x28/0xd0 Found: c0 04 00 00 00 00 Expected: c0 f4 00 00 00 11 New: c0 04 00 00 00 00 Kernel panic - not syncing: Corrupted kernel text CPU: 0 PID: 9 Comm: migration/0 Not tainted 3.19.0-01975-gb1b096e70f23 #4 Call Trace: [<0000000000113972>] show_stack+0x72/0xf0 [<000000000081f15e>] dump_stack+0x6e/0x90 [<000000000081c4e8>] panic+0x108/0x2b0 [<000000000081be64>] jump_label_bug.isra.2+0x104/0x108 [<0000000000112176>] __jump_label_transform+0x9e/0xd0 [<00000000001121e6>] __sm_arch_jump_label_transform+0x3e/0x50 [<00000000001d1136>] multi_cpu_stop+0x12e/0x170 [<00000000001d1472>] cpu_stopper_thread+0xb2/0x168 [<000000000015d2ac>] smpboot_thread_fn+0x134/0x1b0 [<0000000000158baa>] kthread+0x10a/0x110 [<0000000000824a86>] kernel_thread_starter+0x6/0xc Reported-and-tested-by: Sebastian Ott Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky Signed-off-by: Greg Kroah-Hartman --- arch/s390/kernel/suspend.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/s390/kernel/suspend.c b/arch/s390/kernel/suspend.c index c479d2f9605b..f176bc83cc8d 100644 --- a/arch/s390/kernel/suspend.c +++ b/arch/s390/kernel/suspend.c @@ -138,6 +138,8 @@ int pfn_is_nosave(unsigned long pfn) { unsigned long nosave_begin_pfn = PFN_DOWN(__pa(&__nosave_begin)); unsigned long nosave_end_pfn = PFN_DOWN(__pa(&__nosave_end)); + unsigned long eshared_pfn = PFN_DOWN(__pa(&_eshared)) - 1; + unsigned long stext_pfn = PFN_DOWN(__pa(&_stext)); /* Always save lowcore pages (LC protection might be enabled). */ if (pfn <= LC_PAGES) @@ -145,6 +147,8 @@ int pfn_is_nosave(unsigned long pfn) if (pfn >= nosave_begin_pfn && pfn < nosave_end_pfn) return 1; /* Skip memory holes and read-only pages (NSS, DCSS, ...). */ + if (pfn >= stext_pfn && pfn <= eshared_pfn) + return ipl_info.type == IPL_TYPE_NSS ? 1 : 0; if (tprot(PFN_PHYS(pfn))) return 1; return 0; From cad1af8cfbafe09d84ebd87977540fbd896f7225 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Radim=20Kr=C4=8Dm=C3=A1=C5=99?= Date: Wed, 8 Apr 2015 14:16:48 +0200 Subject: [PATCH 095/277] KVM: use slowpath for cross page cached accesses MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit ca3f0874723fad81d0c701b63ae3a17a408d5f25 upstream. kvm_write_guest_cached() does not mark all written pages as dirty and code comments in kvm_gfn_to_hva_cache_init() talk about NULL memslot with cross page accesses. Fix all the easy way. The check is '<= 1' to have the same result for 'len = 0' cache anywhere in the page. (nr_pages_needed is 0 on page boundary.) Fixes: 8f964525a121 ("KVM: Allow cross page reads and writes from cached translations.") Signed-off-by: Radim Krčmář Message-Id: <20150408121648.GA3519@potion.brq.redhat.com> Reviewed-by: Wanpeng Li Signed-off-by: Paolo Bonzini Signed-off-by: Greg Kroah-Hartman --- virt/kvm/kvm_main.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index a17f190be58e..1d4b8bed4e48 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -1549,8 +1549,8 @@ int kvm_gfn_to_hva_cache_init(struct kvm *kvm, struct gfn_to_hva_cache *ghc, ghc->generation = slots->generation; ghc->len = len; ghc->memslot = gfn_to_memslot(kvm, start_gfn); - ghc->hva = gfn_to_hva_many(ghc->memslot, start_gfn, &nr_pages_avail); - if (!kvm_is_error_hva(ghc->hva) && nr_pages_avail >= nr_pages_needed) { + ghc->hva = gfn_to_hva_many(ghc->memslot, start_gfn, NULL); + if (!kvm_is_error_hva(ghc->hva) && nr_pages_needed <= 1) { ghc->hva += offset; } else { /* From dacbdb36cc75ef32aa6b3dccfe5aef77f29bab90 Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Sun, 29 Mar 2015 10:54:05 +0800 Subject: [PATCH 096/277] MIPS: Hibernate: flush TLB entries earlier commit a843d00d038b11267279e3b5388222320f9ddc1d upstream. We found that TLB mismatch not only happens after kernel resume, but also happens during snapshot restore. So move it to the beginning of swsusp_arch_suspend(). Signed-off-by: Huacai Chen Cc: Steven J. Hill Cc: linux-mips@linux-mips.org Cc: Fuxin Zhang Cc: Zhangjin Wu Patchwork: https://patchwork.linux-mips.org/patch/9621/ Signed-off-by: Ralf Baechle Signed-off-by: Greg Kroah-Hartman --- arch/mips/power/hibernate.S | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/mips/power/hibernate.S b/arch/mips/power/hibernate.S index 32a7c828f073..e7567c8a9e79 100644 --- a/arch/mips/power/hibernate.S +++ b/arch/mips/power/hibernate.S @@ -30,6 +30,8 @@ LEAF(swsusp_arch_suspend) END(swsusp_arch_suspend) LEAF(swsusp_arch_resume) + /* Avoid TLB mismatch during and after kernel resume */ + jal local_flush_tlb_all PTR_L t0, restore_pblist 0: PTR_L t1, PBE_ADDRESS(t0) /* source */ @@ -43,7 +45,6 @@ LEAF(swsusp_arch_resume) bne t1, t3, 1b PTR_L t0, PBE_NEXT(t0) bnez t0, 0b - jal local_flush_tlb_all /* Avoid TLB mismatch after kernel resume */ PTR_LA t0, saved_regs PTR_L ra, PT_R31(t0) PTR_L sp, PT_R29(t0) From 317ff32c67fed6260972f3f706abc05c1752b16e Mon Sep 17 00:00:00 2001 From: Oliver Neukum Date: Fri, 20 Mar 2015 14:29:34 +0100 Subject: [PATCH 097/277] cdc-wdm: fix endianness bug in debug statements commit 323ece54e0761198946ecd0c2091f1d2bfdfcb64 upstream. Values directly from descriptors given in debug statements must be converted to native endianness. Signed-off-by: Oliver Neukum Signed-off-by: Greg Kroah-Hartman --- drivers/usb/class/cdc-wdm.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/drivers/usb/class/cdc-wdm.c b/drivers/usb/class/cdc-wdm.c index 6463ca3bcfba..07133d0c971b 100644 --- a/drivers/usb/class/cdc-wdm.c +++ b/drivers/usb/class/cdc-wdm.c @@ -244,7 +244,7 @@ static void wdm_int_callback(struct urb *urb) case USB_CDC_NOTIFY_RESPONSE_AVAILABLE: dev_dbg(&desc->intf->dev, "NOTIFY_RESPONSE_AVAILABLE received: index %d len %d", - dr->wIndex, dr->wLength); + le16_to_cpu(dr->wIndex), le16_to_cpu(dr->wLength)); break; case USB_CDC_NOTIFY_NETWORK_CONNECTION: @@ -257,7 +257,9 @@ static void wdm_int_callback(struct urb *urb) clear_bit(WDM_POLL_RUNNING, &desc->flags); dev_err(&desc->intf->dev, "unknown notification %d received: index %d len %d\n", - dr->bNotificationType, dr->wIndex, dr->wLength); + dr->bNotificationType, + le16_to_cpu(dr->wIndex), + le16_to_cpu(dr->wLength)); goto exit; } @@ -403,7 +405,7 @@ static ssize_t wdm_write USB_RECIP_INTERFACE); req->bRequest = USB_CDC_SEND_ENCAPSULATED_COMMAND; req->wValue = 0; - req->wIndex = desc->inum; + req->wIndex = desc->inum; /* already converted */ req->wLength = cpu_to_le16(count); set_bit(WDM_IN_USE, &desc->flags); desc->outbuf = buf; @@ -417,7 +419,7 @@ static ssize_t wdm_write rv = usb_translate_errors(rv); } else { dev_dbg(&desc->intf->dev, "Tx URB has been submitted index=%d", - req->wIndex); + le16_to_cpu(req->wIndex)); } out: usb_autopm_put_interface(desc->intf); @@ -780,7 +782,7 @@ static int wdm_create(struct usb_interface *intf, struct usb_endpoint_descriptor desc->irq->bRequestType = (USB_DIR_IN | USB_TYPE_CLASS | USB_RECIP_INTERFACE); desc->irq->bRequest = USB_CDC_GET_ENCAPSULATED_RESPONSE; desc->irq->wValue = 0; - desc->irq->wIndex = desc->inum; + desc->irq->wIndex = desc->inum; /* already converted */ desc->irq->wLength = cpu_to_le16(desc->wMaxCommand); usb_fill_control_urb( From 14a1fe5de9d7c391301b517e8effb5f098ada9c4 Mon Sep 17 00:00:00 2001 From: Ian Abbott Date: Mon, 23 Mar 2015 17:50:27 +0000 Subject: [PATCH 098/277] spi: spidev: fix possible arithmetic overflow for multi-transfer message commit f20fbaad7620af2df36a1f9d1c9ecf48ead5b747 upstream. `spidev_message()` sums the lengths of the individual SPI transfers to determine the overall SPI message length. It restricts the total length, returning an error if too long, but it does not check for arithmetic overflow. For example, if the SPI message consisted of two transfers and the first has a length of 10 and the second has a length of (__u32)(-1), the total length would be seen as 9, even though the second transfer is actually very long. If the second transfer specifies a null `rx_buf` and a non-null `tx_buf`, the `copy_from_user()` could overrun the spidev's pre-allocated tx buffer before it reaches an invalid user memory address. Fix it by checking that neither the total nor the individual transfer lengths exceed the maximum allowed value. Thanks to Dan Carpenter for reporting the potential integer overflow. Signed-off-by: Ian Abbott Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- drivers/spi/spidev.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/spi/spidev.c b/drivers/spi/spidev.c index 911e9e0711d2..a08f923b9925 100644 --- a/drivers/spi/spidev.c +++ b/drivers/spi/spidev.c @@ -243,7 +243,10 @@ static int spidev_message(struct spidev_data *spidev, k_tmp->len = u_tmp->len; total += k_tmp->len; - if (total > bufsiz) { + /* Check total length of transfers. Also check each + * transfer length to avoid arithmetic overflow. + */ + if (total > bufsiz || k_tmp->len > bufsiz) { status = -EMSGSIZE; goto done; } From faf8db2e2247ac49104653eddfca2e1eeb7efeea Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Tue, 17 Mar 2015 10:40:38 -0400 Subject: [PATCH 099/277] ring-buffer: Replace this_cpu_*() with __this_cpu_*() commit 80a9b64e2c156b6523e7a01f2ba6e5d86e722814 upstream. It has come to my attention that this_cpu_read/write are horrible on architectures other than x86. Worse yet, they actually disable preemption or interrupts! This caused some unexpected tracing results on ARM. 101.356868: preempt_count_add <-ring_buffer_lock_reserve 101.356870: preempt_count_sub <-ring_buffer_lock_reserve The ring_buffer_lock_reserve has recursion protection that requires accessing a per cpu variable. But since preempt_disable() is traced, it too got traced while accessing the variable that is suppose to prevent recursion like this. The generic version of this_cpu_read() and write() are: #define this_cpu_generic_read(pcp) \ ({ typeof(pcp) ret__; \ preempt_disable(); \ ret__ = *this_cpu_ptr(&(pcp)); \ preempt_enable(); \ ret__; \ }) #define this_cpu_generic_to_op(pcp, val, op) \ do { \ unsigned long flags; \ raw_local_irq_save(flags); \ *__this_cpu_ptr(&(pcp)) op val; \ raw_local_irq_restore(flags); \ } while (0) Which is unacceptable for locations that know they are within preempt disabled or interrupt disabled locations. Paul McKenney stated that __this_cpu_() versions produce much better code on other architectures than this_cpu_() does, if we know that the call is done in a preempt disabled location. I also changed the recursive_unlock() to use two local variables instead of accessing the per_cpu variable twice. Link: http://lkml.kernel.org/r/20150317114411.GE3589@linux.vnet.ibm.com Link: http://lkml.kernel.org/r/20150317104038.312e73d1@gandalf.local.home Acked-by: Christoph Lameter Reported-by: Uwe Kleine-Koenig Tested-by: Uwe Kleine-Koenig Signed-off-by: Steven Rostedt Signed-off-by: Greg Kroah-Hartman --- kernel/trace/ring_buffer.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 3d9fee3a80b3..ab21b8c66535 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -2650,7 +2650,7 @@ static DEFINE_PER_CPU(unsigned int, current_context); static __always_inline int trace_recursive_lock(void) { - unsigned int val = this_cpu_read(current_context); + unsigned int val = __this_cpu_read(current_context); int bit; if (in_interrupt()) { @@ -2667,18 +2667,17 @@ static __always_inline int trace_recursive_lock(void) return 1; val |= (1 << bit); - this_cpu_write(current_context, val); + __this_cpu_write(current_context, val); return 0; } static __always_inline void trace_recursive_unlock(void) { - unsigned int val = this_cpu_read(current_context); + unsigned int val = __this_cpu_read(current_context); - val--; - val &= this_cpu_read(current_context); - this_cpu_write(current_context, val); + val &= val & (val - 1); + __this_cpu_write(current_context, val); } #else From 7aea358e5c37d8f17e3911f21bd555d5533be7ec Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Fri, 20 Feb 2015 14:32:25 +0100 Subject: [PATCH 100/277] power_supply: lp8788-charger: Fix leaked power supply on probe fail commit a7117f81e8391e035c49b3440792f7e6cea28173 upstream. Driver forgot to unregister charger power supply if registering of battery supply failed in probe(). In such case the memory associated with power supply leaked. Signed-off-by: Krzysztof Kozlowski Fixes: 98a276649358 ("power_supply: Add new lp8788 charger driver") Signed-off-by: Sebastian Reichel Signed-off-by: Greg Kroah-Hartman --- drivers/power/lp8788-charger.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/power/lp8788-charger.c b/drivers/power/lp8788-charger.c index ed49b50b220b..72da2a6c22db 100644 --- a/drivers/power/lp8788-charger.c +++ b/drivers/power/lp8788-charger.c @@ -417,8 +417,10 @@ static int lp8788_psy_register(struct platform_device *pdev, pchg->battery.num_properties = ARRAY_SIZE(lp8788_battery_prop); pchg->battery.get_property = lp8788_battery_get_property; - if (power_supply_register(&pdev->dev, &pchg->battery)) + if (power_supply_register(&pdev->dev, &pchg->battery)) { + power_supply_unregister(&pchg->charger); return -EPERM; + } return 0; } From e26c761f5a57dc335748b2e730e89925744ce6c5 Mon Sep 17 00:00:00 2001 From: Andrey Ryabinin Date: Fri, 20 Mar 2015 15:42:27 +0100 Subject: [PATCH 101/277] ARM: 8320/1: fix integer overflow in ELF_ET_DYN_BASE commit 8defb3367fcd19d1af64c07792aade0747b54e0f upstream. Usually ELF_ET_DYN_BASE is 2/3 of TASK_SIZE. With 3G/1G user/kernel split this is not so, because 2*TASK_SIZE overflows 32 bits, so the actual value of ELF_ET_DYN_BASE is: (2 * TASK_SIZE / 3) = 0x2a000000 When ASLR is disabled PIE binaries will load at ELF_ET_DYN_BASE address. On 32bit platforms AddressSanitzer uses addresses [0x20000000 - 0x40000000] for shadow memory [1]. So ASan doesn't work for PIE binaries when ASLR disabled as it fails to map shadow memory. Also after Kees's 'split ET_DYN ASLR from mmap ASLR' patchset PIE binaries has a high chance of loading somewhere in between [0x2a000000 - 0x40000000] even if ASLR enabled. This makes ASan with PIE absolutely incompatible. Fix overflow by dividing TASK_SIZE prior to multiplying. After this patch ELF_ET_DYN_BASE equals to (for CONFIG_VMSPLIT_3G=y): (TASK_SIZE / 3 * 2) = 0x7f555554 [1] https://code.google.com/p/address-sanitizer/wiki/AddressSanitizerAlgorithm#Mapping Signed-off-by: Andrey Ryabinin Reported-by: Maria Guseva Signed-off-by: Russell King Signed-off-by: Greg Kroah-Hartman --- arch/arm/include/asm/elf.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/include/asm/elf.h b/arch/arm/include/asm/elf.h index 56211f2084ef..ce6e30628cc1 100644 --- a/arch/arm/include/asm/elf.h +++ b/arch/arm/include/asm/elf.h @@ -116,7 +116,7 @@ int dump_task_regs(struct task_struct *t, elf_gregset_t *elfregs); the loader. We need to make sure that it is out of the way of the program that it will "exec", and that there is sufficient room for the brk. */ -#define ELF_ET_DYN_BASE (2 * TASK_SIZE / 3) +#define ELF_ET_DYN_BASE (TASK_SIZE / 3 * 2) /* When the program starts, a1 contains a pointer to a function to be registered with atexit, as per the SVR4 ABI. A value of 0 means we From c82d2edfe720101550a377afceb903a52ac44e19 Mon Sep 17 00:00:00 2001 From: Charles Keepax Date: Fri, 27 Mar 2015 01:58:08 +0900 Subject: [PATCH 102/277] ARM: S3C64XX: Use fixed IRQ bases to avoid conflicts on Cragganmore commit 4e330ae4ab2915444f1e6dca1358a910aa259362 upstream. There are two PMICs on Cragganmore, currently one dynamically assign its IRQ base and the other uses a fixed base. It is possible for the statically assigned PMIC to fail if its IRQ is taken by the dynamically assigned one. Fix this by statically assigning both the IRQ bases. Signed-off-by: Charles Keepax Signed-off-by: Kukjin Kim Signed-off-by: Greg Kroah-Hartman --- arch/arm/mach-s3c64xx/crag6410.h | 1 + arch/arm/mach-s3c64xx/mach-crag6410.c | 1 + 2 files changed, 2 insertions(+) diff --git a/arch/arm/mach-s3c64xx/crag6410.h b/arch/arm/mach-s3c64xx/crag6410.h index 4c3c9994fc2c..81dc722ced57 100644 --- a/arch/arm/mach-s3c64xx/crag6410.h +++ b/arch/arm/mach-s3c64xx/crag6410.h @@ -14,6 +14,7 @@ #include #define GLENFARCLAS_PMIC_IRQ_BASE IRQ_BOARD_START +#define BANFF_PMIC_IRQ_BASE (IRQ_BOARD_START + 64) #define PCA935X_GPIO_BASE GPIO_BOARD_START #define CODEC_GPIO_BASE (GPIO_BOARD_START + 8) diff --git a/arch/arm/mach-s3c64xx/mach-crag6410.c b/arch/arm/mach-s3c64xx/mach-crag6410.c index 8ad88ace795a..5fa9ac9104e1 100644 --- a/arch/arm/mach-s3c64xx/mach-crag6410.c +++ b/arch/arm/mach-s3c64xx/mach-crag6410.c @@ -558,6 +558,7 @@ static struct wm831x_touch_pdata touch_pdata = { static struct wm831x_pdata crag_pmic_pdata = { .wm831x_num = 1, + .irq_base = BANFF_PMIC_IRQ_BASE, .gpio_base = BANFF_PMIC_GPIO_BASE, .soft_shutdown = true, From 99cecd301837acb25276c629483eb11205176414 Mon Sep 17 00:00:00 2001 From: Axel Lin Date: Thu, 12 Mar 2015 09:15:28 +0800 Subject: [PATCH 103/277] usb: phy: Find the right match in devm_usb_phy_match commit 869aee0f31429fa9d94d5aef539602b73ae0cf4b upstream. The res parameter passed to devm_usb_phy_match() is the location where the pointer to the usb_phy is stored, hence it needs to be dereferenced before comparing to the match data in order to find the correct match. Fixes: 410219dcd2ba ("usb: otg: utils: devres: Add API's to associate a device with the phy") Signed-off-by: Axel Lin Signed-off-by: Felipe Balbi Signed-off-by: Greg Kroah-Hartman --- drivers/usb/phy/phy.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/usb/phy/phy.c b/drivers/usb/phy/phy.c index a9984c700d2c..5f79d8e2caab 100644 --- a/drivers/usb/phy/phy.c +++ b/drivers/usb/phy/phy.c @@ -78,7 +78,9 @@ static void devm_usb_phy_release(struct device *dev, void *res) static int devm_usb_phy_match(struct device *dev, void *res, void *match_data) { - return res == match_data; + struct usb_phy **phy = res; + + return *phy == match_data; } /** From 7a2d2855fc7ba8eb962ff596f188b894f2b57eb1 Mon Sep 17 00:00:00 2001 From: Felipe Balbi Date: Fri, 13 Feb 2015 14:34:25 -0600 Subject: [PATCH 104/277] usb: define a generic USB_RESUME_TIMEOUT macro commit 62f0342de1f012f3e90607d39e20fce811391169 upstream. Every USB Host controller should use this new macro to define for how long resume signalling should be driven on the bus. Currently, almost every single USB controller is using a 20ms timeout for resume signalling. That's problematic for two reasons: a) sometimes that 20ms timer expires a little before 20ms, which makes us fail certification b) some (many) devices actually need more than 20ms resume signalling. Sure, in case of (b) we can state that the device is against the USB spec, but the fact is that we have no control over which device the certification lab will use. We also have no control over which host they will use. Most likely they'll be using a Windows PC which, again, we have no control over how that USB stack is written and how long resume signalling they are using. At the end of the day, we must make sure Linux passes electrical compliance when working as Host or as Device and currently we don't pass compliance as host because we're driving resume signallig for exactly 20ms and that confuses certification test setup resulting in Certification failure. Acked-by: Greg Kroah-Hartman Acked-by: Peter Chen Signed-off-by: Felipe Balbi Signed-off-by: Greg Kroah-Hartman --- include/linux/usb.h | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/include/linux/usb.h b/include/linux/usb.h index a0bee5a28d1a..28bd3a898cba 100644 --- a/include/linux/usb.h +++ b/include/linux/usb.h @@ -206,6 +206,32 @@ void usb_put_intf(struct usb_interface *intf); #define USB_MAXINTERFACES 32 #define USB_MAXIADS (USB_MAXINTERFACES/2) +/* + * USB Resume Timer: Every Host controller driver should drive the resume + * signalling on the bus for the amount of time defined by this macro. + * + * That way we will have a 'stable' behavior among all HCDs supported by Linux. + * + * Note that the USB Specification states we should drive resume for *at least* + * 20 ms, but it doesn't give an upper bound. This creates two possible + * situations which we want to avoid: + * + * (a) sometimes an msleep(20) might expire slightly before 20 ms, which causes + * us to fail USB Electrical Tests, thus failing Certification + * + * (b) Some (many) devices actually need more than 20 ms of resume signalling, + * and while we can argue that's against the USB Specification, we don't have + * control over which devices a certification laboratory will be using for + * certification. If CertLab uses a device which was tested against Windows and + * that happens to have relaxed resume signalling rules, we might fall into + * situations where we fail interoperability and electrical tests. + * + * In order to avoid both conditions, we're using a 40 ms resume timeout, which + * should cope with both LPJ calibration errors and devices not following every + * detail of the USB Specification. + */ +#define USB_RESUME_TIMEOUT 40 /* ms */ + /** * struct usb_interface_cache - long-term representation of a device interface * @num_altsetting: number of altsettings defined. From 3a8c01bc40fd66c07e8e943dd36dc9bc2090d6aa Mon Sep 17 00:00:00 2001 From: Felipe Balbi Date: Fri, 13 Feb 2015 14:58:53 -0600 Subject: [PATCH 105/277] usb: host: r8a66597: use new USB_RESUME_TIMEOUT commit 7a606ac29752a3e571b83f9b3fceb1eaa1d37781 upstream. While this driver was already using a 50ms resume timeout, let's make sure everybody uses the same macro so it's easy to fix later should anything go wrong. It also gives a more "stable" expectation to Linux users. Signed-off-by: Felipe Balbi Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/r8a66597-hcd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/host/r8a66597-hcd.c b/drivers/usb/host/r8a66597-hcd.c index a6fd8f5371df..6656dfda5665 100644 --- a/drivers/usb/host/r8a66597-hcd.c +++ b/drivers/usb/host/r8a66597-hcd.c @@ -2301,7 +2301,7 @@ static int r8a66597_bus_resume(struct usb_hcd *hcd) rh->port &= ~USB_PORT_STAT_SUSPEND; rh->port |= USB_PORT_STAT_C_SUSPEND << 16; r8a66597_mdfy(r8a66597, RESUME, RESUME | UACT, dvstctr_reg); - msleep(50); + msleep(USB_RESUME_TIMEOUT); r8a66597_mdfy(r8a66597, UACT, RESUME | UACT, dvstctr_reg); } From 38faf1a5651345e72bf176a45877400329fc411d Mon Sep 17 00:00:00 2001 From: Felipe Balbi Date: Fri, 13 Feb 2015 14:50:10 -0600 Subject: [PATCH 106/277] usb: host: isp116x: use new USB_RESUME_TIMEOUT commit 8c0ae6574ccfd3d619876a65829aad74c9d22ba5 upstream. Make sure we're using the new macro, so our resume signaling will always pass certification. Signed-off-by: Felipe Balbi Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/isp116x-hcd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/host/isp116x-hcd.c b/drivers/usb/host/isp116x-hcd.c index b64e661618bb..baf2807934c1 100644 --- a/drivers/usb/host/isp116x-hcd.c +++ b/drivers/usb/host/isp116x-hcd.c @@ -1488,7 +1488,7 @@ static int isp116x_bus_resume(struct usb_hcd *hcd) spin_unlock_irq(&isp116x->lock); hcd->state = HC_STATE_RESUMING; - msleep(20); + msleep(USB_RESUME_TIMEOUT); /* Go operational */ spin_lock_irq(&isp116x->lock); From 181d303cb1c16ef27ad48314675cbe47e6e45ffb Mon Sep 17 00:00:00 2001 From: Felipe Balbi Date: Fri, 13 Feb 2015 14:39:13 -0600 Subject: [PATCH 107/277] usb: host: xhci: use new USB_RESUME_TIMEOUT commit b9e451885deb6262dbaf5cd14aa77d192d9ac759 upstream. Make sure we're using the new macro, so our resume signaling will always pass certification. Acked-by: Mathias Nyman Signed-off-by: Felipe Balbi Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-ring.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c index 9948890ef93e..bc7a886e3c36 100644 --- a/drivers/usb/host/xhci-ring.c +++ b/drivers/usb/host/xhci-ring.c @@ -1697,7 +1697,7 @@ static void handle_port_status(struct xhci_hcd *xhci, } else { xhci_dbg(xhci, "resume HS port %d\n", port_id); bus_state->resume_done[faked_port_index] = jiffies + - msecs_to_jiffies(20); + msecs_to_jiffies(USB_RESUME_TIMEOUT); set_bit(faked_port_index, &bus_state->resuming_ports); mod_timer(&hcd->rh_timer, bus_state->resume_done[faked_port_index]); From e0565f62a0e1ebfe6fa62e633f75c2ec8be522c9 Mon Sep 17 00:00:00 2001 From: Felipe Balbi Date: Fri, 13 Feb 2015 15:00:38 -0600 Subject: [PATCH 108/277] usb: host: sl811: use new USB_RESUME_TIMEOUT commit 08debfb13b199716da6153940c31968c556b195d upstream. Make sure we're using the new macro, so our resume signaling will always pass certification. Signed-off-by: Felipe Balbi Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/sl811-hcd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/host/sl811-hcd.c b/drivers/usb/host/sl811-hcd.c index b2ec7fe758dd..b4cad9346035 100644 --- a/drivers/usb/host/sl811-hcd.c +++ b/drivers/usb/host/sl811-hcd.c @@ -1251,7 +1251,7 @@ sl811h_hub_control( sl811_write(sl811, SL11H_CTLREG1, sl811->ctrl1); mod_timer(&sl811->timer, jiffies - + msecs_to_jiffies(20)); + + msecs_to_jiffies(USB_RESUME_TIMEOUT)); break; case USB_PORT_FEAT_POWER: port_power(sl811, 0); From e217fcc13682beef6eaafbb5a419e6b50ddf94b9 Mon Sep 17 00:00:00 2001 From: Felipe Balbi Date: Fri, 13 Feb 2015 15:38:33 -0600 Subject: [PATCH 109/277] usb: core: hub: use new USB_RESUME_TIMEOUT commit bbc78c07a51f6fd29c227b1220a9016e585358ba upstream. Make sure we're using the new macro, so our resume signaling will always pass certification. Signed-off-by: Felipe Balbi Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/hub.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c index c9f56ffdba9a..11a073cda1d6 100644 --- a/drivers/usb/core/hub.c +++ b/drivers/usb/core/hub.c @@ -3282,10 +3282,10 @@ int usb_port_resume(struct usb_device *udev, pm_message_t msg) dev_dbg(hub->intfdev, "can't resume port %d, status %d\n", port1, status); } else { - /* drive resume for at least 20 msec */ + /* drive resume for USB_RESUME_TIMEOUT msec */ dev_dbg(&udev->dev, "usb %sresume\n", (PMSG_IS_AUTO(msg) ? "auto-" : "")); - msleep(25); + msleep(USB_RESUME_TIMEOUT); /* Virtual root hubs can trigger on GET_PORT_STATUS to * stop resume signaling. Then finish the resume From 6c6f8aac03406e6cba87267a6075b4e3340af892 Mon Sep 17 00:00:00 2001 From: Michael Gernoth Date: Thu, 9 Apr 2015 23:42:15 +0200 Subject: [PATCH 110/277] ALSA: emu10k1: don't deadlock in proc-functions commit 91bf0c2dcb935a87e5c0795f5047456b965fd143 upstream. The functions snd_emu10k1_proc_spdif_read and snd_emu1010_fpga_read acquire the emu_lock before accessing the FPGA. The function used to access the FPGA (snd_emu1010_fpga_read) also tries to take the emu_lock which causes a deadlock. Remove the outer locking in the proc-functions (guarding only the already safe fpga read) to prevent this deadlock. [removed superfluous flags variables too -- tiwai] Signed-off-by: Michael Gernoth Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/emu10k1/emuproc.c | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/sound/pci/emu10k1/emuproc.c b/sound/pci/emu10k1/emuproc.c index 2ca9f2e93139..53745f4c2bf5 100644 --- a/sound/pci/emu10k1/emuproc.c +++ b/sound/pci/emu10k1/emuproc.c @@ -241,31 +241,22 @@ static void snd_emu10k1_proc_spdif_read(struct snd_info_entry *entry, struct snd_emu10k1 *emu = entry->private_data; u32 value; u32 value2; - unsigned long flags; u32 rate; if (emu->card_capabilities->emu_model) { - spin_lock_irqsave(&emu->emu_lock, flags); snd_emu1010_fpga_read(emu, 0x38, &value); - spin_unlock_irqrestore(&emu->emu_lock, flags); if ((value & 0x1) == 0) { - spin_lock_irqsave(&emu->emu_lock, flags); snd_emu1010_fpga_read(emu, 0x2a, &value); snd_emu1010_fpga_read(emu, 0x2b, &value2); - spin_unlock_irqrestore(&emu->emu_lock, flags); rate = 0x1770000 / (((value << 5) | value2)+1); snd_iprintf(buffer, "ADAT Locked : %u\n", rate); } else { snd_iprintf(buffer, "ADAT Unlocked\n"); } - spin_lock_irqsave(&emu->emu_lock, flags); snd_emu1010_fpga_read(emu, 0x20, &value); - spin_unlock_irqrestore(&emu->emu_lock, flags); if ((value & 0x4) == 0) { - spin_lock_irqsave(&emu->emu_lock, flags); snd_emu1010_fpga_read(emu, 0x28, &value); snd_emu1010_fpga_read(emu, 0x29, &value2); - spin_unlock_irqrestore(&emu->emu_lock, flags); rate = 0x1770000 / (((value << 5) | value2)+1); snd_iprintf(buffer, "SPDIF Locked : %d\n", rate); } else { @@ -410,14 +401,11 @@ static void snd_emu_proc_emu1010_reg_read(struct snd_info_entry *entry, { struct snd_emu10k1 *emu = entry->private_data; u32 value; - unsigned long flags; int i; snd_iprintf(buffer, "EMU1010 Registers:\n\n"); for(i = 0; i < 0x40; i+=1) { - spin_lock_irqsave(&emu->emu_lock, flags); snd_emu1010_fpga_read(emu, i, &value); - spin_unlock_irqrestore(&emu->emu_lock, flags); snd_iprintf(buffer, "%02X: %08X, %02X\n", i, value, (value >> 8) & 0x7f); } } From b9c4062783872c5e6e7cda86e0fb55b34be41943 Mon Sep 17 00:00:00 2001 From: Ulrik De Bie Date: Mon, 6 Apr 2015 15:35:38 -0700 Subject: [PATCH 111/277] Input: elantech - fix absolute mode setting on some ASUS laptops commit bd884149aca61de269fd9bad83fe2a4232ffab21 upstream. On ASUS TP500LN and X750JN, the touchpad absolute mode is reset each time set_rate is done. In order to fix this, we will verify the firmware version, and if it matches the one in those laptops, the set_rate function is overloaded with a function elantech_set_rate_restore_reg_07 that performs the set_rate with the original function, followed by a restore of reg_07 (the register that sets the absolute mode on elantech v4 hardware). Also the ASUS TP500LN and X750JN firmware version, capabilities, and button constellation is added to elantech.c Reported-and-tested-by: George Moutsopoulos Signed-off-by: Ulrik De Bie Signed-off-by: Dmitry Torokhov Signed-off-by: Greg Kroah-Hartman --- drivers/input/mouse/elantech.c | 22 ++++++++++++++++++++++ drivers/input/mouse/elantech.h | 1 + 2 files changed, 23 insertions(+) diff --git a/drivers/input/mouse/elantech.c b/drivers/input/mouse/elantech.c index 85e75239c814..1af7df263368 100644 --- a/drivers/input/mouse/elantech.c +++ b/drivers/input/mouse/elantech.c @@ -783,6 +783,21 @@ static psmouse_ret_t elantech_process_byte(struct psmouse *psmouse) return PSMOUSE_FULL_PACKET; } +/* + * This writes the reg_07 value again to the hardware at the end of every + * set_rate call because the register loses its value. reg_07 allows setting + * absolute mode on v4 hardware + */ +static void elantech_set_rate_restore_reg_07(struct psmouse *psmouse, + unsigned int rate) +{ + struct elantech_data *etd = psmouse->private; + + etd->original_set_rate(psmouse, rate); + if (elantech_write_reg(psmouse, 0x07, etd->reg_07)) + psmouse_err(psmouse, "restoring reg_07 failed\n"); +} + /* * Put the touchpad into absolute mode */ @@ -985,6 +1000,8 @@ static int elantech_get_resolution_v4(struct psmouse *psmouse, * Asus K53SV 0x450f01 78, 15, 0c 2 hw buttons * Asus G46VW 0x460f02 00, 18, 0c 2 hw buttons * Asus G750JX 0x360f00 00, 16, 0c 2 hw buttons + * Asus TP500LN 0x381f17 10, 14, 0e clickpad + * Asus X750JN 0x381f17 10, 14, 0e clickpad * Asus UX31 0x361f00 20, 15, 0e clickpad * Asus UX32VD 0x361f02 00, 15, 0e clickpad * Avatar AVIU-145A2 0x361f00 ? clickpad @@ -1452,6 +1469,11 @@ int elantech_init(struct psmouse *psmouse) goto init_fail; } + if (etd->fw_version == 0x381f17) { + etd->original_set_rate = psmouse->set_rate; + psmouse->set_rate = elantech_set_rate_restore_reg_07; + } + if (elantech_set_input_params(psmouse)) { psmouse_err(psmouse, "failed to query touchpad range.\n"); goto init_fail; diff --git a/drivers/input/mouse/elantech.h b/drivers/input/mouse/elantech.h index c1c15ab6872d..13a12ccbff51 100644 --- a/drivers/input/mouse/elantech.h +++ b/drivers/input/mouse/elantech.h @@ -138,6 +138,7 @@ struct elantech_data { struct finger_pos mt[ETP_MAX_FINGERS]; unsigned char parity[256]; int (*send_cmd)(struct psmouse *psmouse, unsigned char c, unsigned char *param); + void (*original_set_rate)(struct psmouse *psmouse, unsigned int rate); }; #ifdef CONFIG_MOUSE_PS2_ELANTECH From 00c65c8a660bd5b5d4228137ff5be2654dd1840a Mon Sep 17 00:00:00 2001 From: Michael Davidson Date: Tue, 14 Apr 2015 15:47:38 -0700 Subject: [PATCH 112/277] fs/binfmt_elf.c: fix bug in loading of PIE binaries commit a87938b2e246b81b4fb713edb371a9fa3c5c3c86 upstream. With CONFIG_ARCH_BINFMT_ELF_RANDOMIZE_PIE enabled, and a normal top-down address allocation strategy, load_elf_binary() will attempt to map a PIE binary into an address range immediately below mm->mmap_base. Unfortunately, load_elf_ binary() does not take account of the need to allocate sufficient space for the entire binary which means that, while the first PT_LOAD segment is mapped below mm->mmap_base, the subsequent PT_LOAD segment(s) end up being mapped above mm->mmap_base into the are that is supposed to be the "gap" between the stack and the binary. Since the size of the "gap" on x86_64 is only guaranteed to be 128MB this means that binaries with large data segments > 128MB can end up mapping part of their data segment over their stack resulting in corruption of the stack (and the data segment once the binary starts to run). Any PIE binary with a data segment > 128MB is vulnerable to this although address randomization means that the actual gap between the stack and the end of the binary is normally greater than 128MB. The larger the data segment of the binary the higher the probability of failure. Fix this by calculating the total size of the binary in the same way as load_elf_interp(). Signed-off-by: Michael Davidson Cc: Alexander Viro Cc: Jiri Kosina Cc: Kees Cook Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- fs/binfmt_elf.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index 3c4d8797ea9a..53f620a4350e 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -756,6 +756,7 @@ static int load_elf_binary(struct linux_binprm *bprm) i < loc->elf_ex.e_phnum; i++, elf_ppnt++) { int elf_prot = 0, elf_flags; unsigned long k, vaddr; + unsigned long total_size = 0; if (elf_ppnt->p_type != PT_LOAD) continue; @@ -820,10 +821,16 @@ static int load_elf_binary(struct linux_binprm *bprm) #else load_bias = ELF_PAGESTART(ELF_ET_DYN_BASE - vaddr); #endif + total_size = total_mapping_size(elf_phdata, + loc->elf_ex.e_phnum); + if (!total_size) { + error = -EINVAL; + goto out_free_dentry; + } } error = elf_map(bprm->file, load_bias + vaddr, elf_ppnt, - elf_prot, elf_flags, 0); + elf_prot, elf_flags, total_size); if (BAD_ADDR(error)) { send_sig(SIGKILL, current, 0); retval = IS_ERR((void *)error) ? From ddb56eac0e63d9eea725bbbebdb3d1df7e58242c Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Thu, 16 Apr 2015 12:47:29 -0700 Subject: [PATCH 113/277] ptrace: fix race between ptrace_resume() and wait_task_stopped() commit b72c186999e689cb0b055ab1c7b3cd8fffbeb5ed upstream. ptrace_resume() is called when the tracee is still __TASK_TRACED. We set tracee->exit_code and then wake_up_state() changes tracee->state. If the tracer's sub-thread does wait() in between, task_stopped_code(ptrace => T) wrongly looks like another report from tracee. This confuses debugger, and since wait_task_stopped() clears ->exit_code the tracee can miss a signal. Test-case: #include #include #include #include #include #include int pid; void *waiter(void *arg) { int stat; for (;;) { assert(pid == wait(&stat)); assert(WIFSTOPPED(stat)); if (WSTOPSIG(stat) == SIGHUP) continue; assert(WSTOPSIG(stat) == SIGCONT); printf("ERR! extra/wrong report:%x\n", stat); } } int main(void) { pthread_t thread; pid = fork(); if (!pid) { assert(ptrace(PTRACE_TRACEME, 0,0,0) == 0); for (;;) kill(getpid(), SIGHUP); } assert(pthread_create(&thread, NULL, waiter, NULL) == 0); for (;;) ptrace(PTRACE_CONT, pid, 0, SIGCONT); return 0; } Note for stable: the bug is very old, but without 9899d11f6544 "ptrace: ensure arch_ptrace/ptrace_request can never race with SIGKILL" the fix should use lock_task_sighand(child). Signed-off-by: Oleg Nesterov Reported-by: Pavel Labath Tested-by: Pavel Labath Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- kernel/ptrace.c | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/kernel/ptrace.c b/kernel/ptrace.c index afadcf7b4a22..118323bc8529 100644 --- a/kernel/ptrace.c +++ b/kernel/ptrace.c @@ -720,6 +720,8 @@ static int ptrace_peek_siginfo(struct task_struct *child, static int ptrace_resume(struct task_struct *child, long request, unsigned long data) { + bool need_siglock; + if (!valid_signal(data)) return -EIO; @@ -747,8 +749,26 @@ static int ptrace_resume(struct task_struct *child, long request, user_disable_single_step(child); } + /* + * Change ->exit_code and ->state under siglock to avoid the race + * with wait_task_stopped() in between; a non-zero ->exit_code will + * wrongly look like another report from tracee. + * + * Note that we need siglock even if ->exit_code == data and/or this + * status was not reported yet, the new status must not be cleared by + * wait_task_stopped() after resume. + * + * If data == 0 we do not care if wait_task_stopped() reports the old + * status and clears the code too; this can't race with the tracee, it + * takes siglock after resume. + */ + need_siglock = data && !thread_group_empty(current); + if (need_siglock) + spin_lock_irq(&child->sighand->siglock); child->exit_code = data; wake_up_state(child, __TASK_TRACED); + if (need_siglock) + spin_unlock_irq(&child->sighand->siglock); return 0; } From 11f0198def5b4c143885f630bd964e3c2909783a Mon Sep 17 00:00:00 2001 From: Larry Finger Date: Mon, 23 Mar 2015 18:14:10 -0500 Subject: [PATCH 114/277] rtlwifi: rtl8192cu: Add new USB ID commit 2f92b314f4daff2117847ac5343c54d3d041bf78 upstream. USB ID 2001:330d is used for a D-Link DWA-131. Signed-off-by: Larry Finger Signed-off-by: Kalle Valo Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/rtlwifi/rtl8192cu/sw.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/wireless/rtlwifi/rtl8192cu/sw.c b/drivers/net/wireless/rtlwifi/rtl8192cu/sw.c index e7a2af3ad05a..8cf0d3919b98 100644 --- a/drivers/net/wireless/rtlwifi/rtl8192cu/sw.c +++ b/drivers/net/wireless/rtlwifi/rtl8192cu/sw.c @@ -369,6 +369,7 @@ static struct usb_device_id rtl8192c_usb_ids[] = { {RTL_USB_DEVICE(0x2001, 0x3307, rtl92cu_hal_cfg)}, /*D-Link-Cameo*/ {RTL_USB_DEVICE(0x2001, 0x3309, rtl92cu_hal_cfg)}, /*D-Link-Alpha*/ {RTL_USB_DEVICE(0x2001, 0x330a, rtl92cu_hal_cfg)}, /*D-Link-Alpha*/ + {RTL_USB_DEVICE(0x2001, 0x330d, rtl92cu_hal_cfg)}, /*D-Link DWA-131 */ {RTL_USB_DEVICE(0x2019, 0xab2b, rtl92cu_hal_cfg)}, /*Planex -Abocom*/ {RTL_USB_DEVICE(0x20f4, 0x624d, rtl92cu_hal_cfg)}, /*TRENDNet*/ {RTL_USB_DEVICE(0x2357, 0x0100, rtl92cu_hal_cfg)}, /*TP-Link WN8200ND*/ From 4cbb68afc05fe04c35f1b742639a9e4660219cd0 Mon Sep 17 00:00:00 2001 From: Marek Vasut Date: Thu, 26 Mar 2015 02:16:06 +0100 Subject: [PATCH 115/277] rtlwifi: rtl8192cu: Add new device ID commit 9374e7d2fdcad3c36dafc8d3effd554bc702c4b6 upstream. Add new ID for ASUS N10 WiFi dongle. Signed-off-by: Marek Vasut Tested-by: Marek Vasut Cc: Larry Finger Cc: John W. Linville Acked-by: Larry Finger Signed-off-by: Kalle Valo Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/rtlwifi/rtl8192cu/sw.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/wireless/rtlwifi/rtl8192cu/sw.c b/drivers/net/wireless/rtlwifi/rtl8192cu/sw.c index 8cf0d3919b98..7555095e0b74 100644 --- a/drivers/net/wireless/rtlwifi/rtl8192cu/sw.c +++ b/drivers/net/wireless/rtlwifi/rtl8192cu/sw.c @@ -313,6 +313,7 @@ static struct usb_device_id rtl8192c_usb_ids[] = { {RTL_USB_DEVICE(0x07b8, 0x8188, rtl92cu_hal_cfg)}, /*Abocom - Abocom*/ {RTL_USB_DEVICE(0x07b8, 0x8189, rtl92cu_hal_cfg)}, /*Funai - Abocom*/ {RTL_USB_DEVICE(0x0846, 0x9041, rtl92cu_hal_cfg)}, /*NetGear WNA1000M*/ + {RTL_USB_DEVICE(0x0b05, 0x17ba, rtl92cu_hal_cfg)}, /*ASUS-Edimax*/ {RTL_USB_DEVICE(0x0bda, 0x5088, rtl92cu_hal_cfg)}, /*Thinkware-CC&C*/ {RTL_USB_DEVICE(0x0df6, 0x0052, rtl92cu_hal_cfg)}, /*Sitecom - Edimax*/ {RTL_USB_DEVICE(0x0df6, 0x005c, rtl92cu_hal_cfg)}, /*Sitecom - Edimax*/ From bd147d13c83f04522ef5693845926713bb10e360 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Wed, 15 May 2013 22:51:15 +0200 Subject: [PATCH 116/277] parport: disable PC-style parallel port support on cris commit cb1ff5f90e1550d5752521205506b99f1aa8b1e0 upstream. Signed-off-by: Geert Uytterhoeven Cc: Guenter Roeck Signed-off-by: Greg Kroah-Hartman --- drivers/parport/Kconfig | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/parport/Kconfig b/drivers/parport/Kconfig index a50576081b34..dc82ef096f3b 100644 --- a/drivers/parport/Kconfig +++ b/drivers/parport/Kconfig @@ -36,7 +36,9 @@ if PARPORT config PARPORT_PC tristate "PC-style hardware" depends on (!SPARC64 || PCI) && !SPARC32 && !M32R && !FRV && !S390 && \ - (!M68K || ISA) && !MN10300 && !AVR32 && !BLACKFIN && !XTENSA + (!M68K || ISA) && !MN10300 && !AVR32 && !BLACKFIN && \ + !XTENSA && !CRIS + ---help--- You should say Y here if you have a PC-style parallel port. All IBM PC compatible computers and some Alphas have PC-style From 7a9a57cb8d0311a007c5e3f64b93bcf0d46038f2 Mon Sep 17 00:00:00 2001 From: Chen Gang Date: Fri, 30 Aug 2013 12:09:57 +0800 Subject: [PATCH 117/277] drivers: parport: Kconfig: exclude h8300 for PARPORT_PC commit d94bb2d756e525a7c67fa71762227533d48b03c9 upstream. h8300 does not support PARPORT_PC. The related error (with allmodconfig for h8300): CC [M] drivers/parport/parport_pc.o drivers/parport/parport_pc.c:67:25: fatal error: asm/parport.h: No such file or directory Signed-off-by: Chen Gang Cc: Guenter Roeck Signed-off-by: Greg Kroah-Hartman --- drivers/parport/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/parport/Kconfig b/drivers/parport/Kconfig index dc82ef096f3b..70694ce38be2 100644 --- a/drivers/parport/Kconfig +++ b/drivers/parport/Kconfig @@ -37,7 +37,7 @@ config PARPORT_PC tristate "PC-style hardware" depends on (!SPARC64 || PCI) && !SPARC32 && !M32R && !FRV && !S390 && \ (!M68K || ISA) && !MN10300 && !AVR32 && !BLACKFIN && \ - !XTENSA && !CRIS + !XTENSA && !CRIS && !H8300 ---help--- You should say Y here if you have a PC-style parallel port. All From 55ce22eeb9e8090ccef3de9c8129046a4589de63 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Fri, 17 May 2013 11:04:44 +0200 Subject: [PATCH 118/277] console: Disable VGA text console support on cris commit 3535629264e69ddbec0bd44b6f9a119947fbe4e2 upstream. Signed-off-by: Geert Uytterhoeven Cc: Guenter Roeck Signed-off-by: Greg Kroah-Hartman --- drivers/video/console/Kconfig | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/video/console/Kconfig b/drivers/video/console/Kconfig index bc922c47d046..84f04d9461a9 100644 --- a/drivers/video/console/Kconfig +++ b/drivers/video/console/Kconfig @@ -6,7 +6,9 @@ menu "Console display driver support" config VGA_CONSOLE bool "VGA text console" if EXPERT || !X86 - depends on !4xx && !8xx && !SPARC && !M68K && !PARISC && !FRV && !SUPERH && !BLACKFIN && !AVR32 && !MN10300 && (!ARM || ARCH_FOOTBRIDGE || ARCH_INTEGRATOR || ARCH_NETWINDER) + depends on !4xx && !8xx && !SPARC && !M68K && !PARISC && !FRV && \ + !SUPERH && !BLACKFIN && !AVR32 && !MN10300 && !CRIS && \ + (!ARM || ARCH_FOOTBRIDGE || ARCH_INTEGRATOR || ARCH_NETWINDER) default y help Saying Y here will allow you to use Linux in text mode through a From d821f4be02a54eba7ef6d69f051f8a79aac09096 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Tue, 17 Dec 2013 23:37:01 +0000 Subject: [PATCH 119/277] video: vgacon: Don't build on arm64 commit ee23794b86689e655cedd616e98c03bc3c74f5ec upstream. arm64 is unlikely to have a VGA console and does not export screen_info causing build failures if the driver is build, for example in all*config. Add a dependency on !ARM64 to prevent this. This list is getting quite long, it may be easier to depend on a symbol which architectures that do support the driver can select. Signed-off-by: Mark Brown [tomi.valkeinen@ti.com: moved && to first modified line] Signed-off-by: Tomi Valkeinen Cc: Guenter Roeck Signed-off-by: Greg Kroah-Hartman --- drivers/video/console/Kconfig | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/video/console/Kconfig b/drivers/video/console/Kconfig index 84f04d9461a9..37e62c7b3273 100644 --- a/drivers/video/console/Kconfig +++ b/drivers/video/console/Kconfig @@ -8,7 +8,8 @@ config VGA_CONSOLE bool "VGA text console" if EXPERT || !X86 depends on !4xx && !8xx && !SPARC && !M68K && !PARISC && !FRV && \ !SUPERH && !BLACKFIN && !AVR32 && !MN10300 && !CRIS && \ - (!ARM || ARCH_FOOTBRIDGE || ARCH_INTEGRATOR || ARCH_NETWINDER) + (!ARM || ARCH_FOOTBRIDGE || ARCH_INTEGRATOR || ARCH_NETWINDER) && \ + !ARM64 default y help Saying Y here will allow you to use Linux in text mode through a From 9818f16f1af710d6475ad73d589903914e4584cd Mon Sep 17 00:00:00 2001 From: Chen Gang Date: Tue, 21 May 2013 10:46:05 +0100 Subject: [PATCH 120/277] arm64: kernel: compiling issue, need delete read_current_timer() commit 6916b14ea140ff5c915895eefe9431888a39a84d upstream. Under arm64, we will calibrate the delay loop statically using a known timer frequency, so delete read_current_timer(), or it will cause compiling issue with allmodconfig. The related error: ERROR: "read_current_timer" [lib/rbtree_test.ko] undefined! ERROR: "read_current_timer" [lib/interval_tree_test.ko] undefined! ERROR: "read_current_timer" [fs/ext4/ext4.ko] undefined! ERROR: "read_current_timer" [crypto/tcrypt.ko] undefined! Signed-off-by: Chen Gang Acked-by: Marc Zyngier Signed-off-by: Catalin Marinas Cc: Guenter Roeck Signed-off-by: Greg Kroah-Hartman --- arch/arm64/include/asm/timex.h | 6 +++--- arch/arm64/kernel/time.c | 6 ------ 2 files changed, 3 insertions(+), 9 deletions(-) diff --git a/arch/arm64/include/asm/timex.h b/arch/arm64/include/asm/timex.h index b24a31a7e2c9..81a076eb37fa 100644 --- a/arch/arm64/include/asm/timex.h +++ b/arch/arm64/include/asm/timex.h @@ -16,14 +16,14 @@ #ifndef __ASM_TIMEX_H #define __ASM_TIMEX_H +#include + /* * Use the current timer as a cycle counter since this is what we use for * the delay loop. */ -#define get_cycles() ({ cycles_t c; read_current_timer(&c); c; }) +#define get_cycles() arch_counter_get_cntvct() #include -#define ARCH_HAS_READ_CURRENT_TIMER - #endif diff --git a/arch/arm64/kernel/time.c b/arch/arm64/kernel/time.c index a551f88ae2c1..03dc3718eb13 100644 --- a/arch/arm64/kernel/time.c +++ b/arch/arm64/kernel/time.c @@ -68,12 +68,6 @@ unsigned long long notrace sched_clock(void) return arch_timer_read_counter() * sched_clock_mult; } -int read_current_timer(unsigned long *timer_value) -{ - *timer_value = arch_timer_read_counter(); - return 0; -} - void __init time_init(void) { u32 arch_timer_rate; From 72a2c0394991ca2785f4fda903aeec82d578376a Mon Sep 17 00:00:00 2001 From: Lukas Czerner Date: Fri, 3 Apr 2015 10:46:58 -0400 Subject: [PATCH 121/277] ext4: make fsync to sync parent dir in no-journal for real this time commit e12fb97222fc41e8442896934f76d39ef99b590a upstream. Previously commit 14ece1028b3ed53ffec1b1213ffc6acaf79ad77c added a support for for syncing parent directory of newly created inodes to make sure that the inode is not lost after a power failure in no-journal mode. However this does not work in majority of cases, namely: - if the directory has inline data - if the directory is already indexed - if the directory already has at least one block and: - the new entry fits into it - or we've successfully converted it to indexed So in those cases we might lose the inode entirely even after fsync in the no-journal mode. This also includes ext2 default mode obviously. I've noticed this while running xfstest generic/321 and even though the test should fail (we need to run fsck after a crash in no-journal mode) I could not find a newly created entries even when if it was fsynced before. Fix this by adjusting the ext4_add_entry() successful exit paths to set the inode EXT4_STATE_NEWENTRY so that fsync has the chance to fsync the parent directory as well. Signed-off-by: Lukas Czerner Signed-off-by: Theodore Ts'o Reviewed-by: Jan Kara Cc: Frank Mayhar Signed-off-by: Greg Kroah-Hartman --- fs/ext4/namei.c | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index f1312173fa90..facf8590b714 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c @@ -1880,7 +1880,7 @@ static int ext4_add_entry(handle_t *handle, struct dentry *dentry, struct inode *inode) { struct inode *dir = dentry->d_parent->d_inode; - struct buffer_head *bh; + struct buffer_head *bh = NULL; struct ext4_dir_entry_2 *de; struct ext4_dir_entry_tail *t; struct super_block *sb; @@ -1905,14 +1905,14 @@ static int ext4_add_entry(handle_t *handle, struct dentry *dentry, return retval; if (retval == 1) { retval = 0; - return retval; + goto out; } } if (is_dx(dir)) { retval = ext4_dx_add_entry(handle, dentry, inode); if (!retval || (retval != ERR_BAD_DX_DIR)) - return retval; + goto out; ext4_clear_inode_flag(dir, EXT4_INODE_INDEX); dx_fallback++; ext4_mark_inode_dirty(handle, dir); @@ -1924,14 +1924,15 @@ static int ext4_add_entry(handle_t *handle, struct dentry *dentry, return PTR_ERR(bh); retval = add_dirent_to_buf(handle, dentry, inode, NULL, bh); - if (retval != -ENOSPC) { - brelse(bh); - return retval; - } + if (retval != -ENOSPC) + goto out; if (blocks == 1 && !dx_fallback && - EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_DIR_INDEX)) - return make_indexed_dir(handle, dentry, inode, bh); + EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_DIR_INDEX)) { + retval = make_indexed_dir(handle, dentry, inode, bh); + bh = NULL; /* make_indexed_dir releases bh */ + goto out; + } brelse(bh); } bh = ext4_append(handle, dir, &block); @@ -1947,6 +1948,7 @@ static int ext4_add_entry(handle_t *handle, struct dentry *dentry, } retval = add_dirent_to_buf(handle, dentry, inode, de, bh); +out: brelse(bh); if (retval == 0) ext4_set_inode_state(inode, EXT4_STATE_NEWENTRY); From 903b1970c835f69a1e2f1bde5ac446691b396a5c Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Tue, 14 Apr 2015 07:51:03 +1000 Subject: [PATCH 122/277] powerpc/perf: Cap 64bit userspace backtraces to PERF_MAX_STACK_DEPTH commit 9a5cbce421a283e6aea3c4007f141735bf9da8c3 upstream. We cap 32bit userspace backtraces to PERF_MAX_STACK_DEPTH (currently 127), but we forgot to do the same for 64bit backtraces. Signed-off-by: Anton Blanchard Signed-off-by: Michael Ellerman Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/perf/callchain.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/perf/callchain.c b/arch/powerpc/perf/callchain.c index 2396dda282cd..ead55351b254 100644 --- a/arch/powerpc/perf/callchain.c +++ b/arch/powerpc/perf/callchain.c @@ -243,7 +243,7 @@ static void perf_callchain_user_64(struct perf_callchain_entry *entry, sp = regs->gpr[1]; perf_callchain_store(entry, next_ip); - for (;;) { + while (entry->nr < PERF_MAX_STACK_DEPTH) { fp = (unsigned long __user *) sp; if (!valid_user_sp(sp, 1) || read_user_stack_64(fp, &next_sp)) return; From bc7df9868f9e35cd883d7cb61bbf60de2413c450 Mon Sep 17 00:00:00 2001 From: Thomas D Date: Mon, 5 Jan 2015 21:37:23 +0100 Subject: [PATCH 123/277] tools/power turbostat: Use $(CURDIR) instead of $(PWD) and add support for O= option in Makefile commit f82263c6989c31ae9b94cecddffb29dcbec38710 upstream. Since commit ee0778a30153 ("tools/power: turbostat: make Makefile a bit more capable") turbostat's Makefile is using [...] BUILD_OUTPUT := $(PWD) [...] which obviously causes trouble when building "turbostat" with make -C /usr/src/linux/tools/power/x86/turbostat ARCH=x86 turbostat because GNU make does not update nor guarantee that $PWD is set. This patch changes the Makefile to use $CURDIR instead, which GNU make guarantees to set and update (i.e. when using "make -C ...") and also adds support for the O= option (see "make help" in your root of your kernel source tree for more details). Link: https://bugs.gentoo.org/show_bug.cgi?id=533918 Fixes: ee0778a30153 ("tools/power: turbostat: make Makefile a bit more capable") Signed-off-by: Thomas D. Cc: Mark Asselstine Signed-off-by: Len Brown Signed-off-by: Greg Kroah-Hartman --- tools/power/x86/turbostat/Makefile | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/tools/power/x86/turbostat/Makefile b/tools/power/x86/turbostat/Makefile index d1b3a361e526..4039854560d0 100644 --- a/tools/power/x86/turbostat/Makefile +++ b/tools/power/x86/turbostat/Makefile @@ -1,8 +1,12 @@ CC = $(CROSS_COMPILE)gcc -BUILD_OUTPUT := $(PWD) +BUILD_OUTPUT := $(CURDIR) PREFIX := /usr DESTDIR := +ifeq ("$(origin O)", "command line") + BUILD_OUTPUT := $(O) +endif + turbostat : turbostat.c CFLAGS += -Wall CFLAGS += -DMSRHEADER='"../../../../arch/x86/include/uapi/asm/msr-index.h"' From ac38b131c6f9ccf52bf0687158540673da1d1f5b Mon Sep 17 00:00:00 2001 From: Brian Norris Date: Sat, 28 Feb 2015 02:23:25 -0800 Subject: [PATCH 124/277] UBI: account for bitflips in both the VID header and data commit 8eef7d70f7c6772c3490f410ee2bceab3b543fa1 upstream. We are completely discarding the earlier value of 'bitflips', which could reflect a bitflip found in ubi_io_read_vid_hdr(). Let's use the bitwise OR of header and data 'bitflip' statuses instead. Coverity CID #1226856 Signed-off-by: Brian Norris Signed-off-by: Richard Weinberger Signed-off-by: Greg Kroah-Hartman --- drivers/mtd/ubi/attach.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/mtd/ubi/attach.c b/drivers/mtd/ubi/attach.c index c071d410488f..79d69bd26dd2 100644 --- a/drivers/mtd/ubi/attach.c +++ b/drivers/mtd/ubi/attach.c @@ -408,7 +408,7 @@ int ubi_compare_lebs(struct ubi_device *ubi, const struct ubi_ainf_peb *aeb, second_is_newer = !second_is_newer; } else { dbg_bld("PEB %d CRC is OK", pnum); - bitflips = !!err; + bitflips |= !!err; } mutex_unlock(&ubi->buf_mutex); From 5fd7a188f8471516981df3f6b061f7d2ea470616 Mon Sep 17 00:00:00 2001 From: Brian Norris Date: Sat, 28 Feb 2015 02:23:26 -0800 Subject: [PATCH 125/277] UBI: fix out of bounds write commit d74adbdb9abf0d2506a6c4afa534d894f28b763f upstream. If aeb->len >= vol->reserved_pebs, we should not be writing aeb into the PEB->LEB mapping. Caught by Coverity, CID #711212. Signed-off-by: Brian Norris Signed-off-by: Richard Weinberger Signed-off-by: Greg Kroah-Hartman --- drivers/mtd/ubi/eba.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/mtd/ubi/eba.c b/drivers/mtd/ubi/eba.c index 0e11671dadc4..930cf2c77abb 100644 --- a/drivers/mtd/ubi/eba.c +++ b/drivers/mtd/ubi/eba.c @@ -1362,7 +1362,8 @@ int ubi_eba_init(struct ubi_device *ubi, struct ubi_attach_info *ai) * during re-size. */ ubi_move_aeb_to_list(av, aeb, &ai->erase); - vol->eba_tbl[aeb->lnum] = aeb->pnum; + else + vol->eba_tbl[aeb->lnum] = aeb->pnum; } } From 7e0a5b1f3d114523b282e71462dcf0cc6006a884 Mon Sep 17 00:00:00 2001 From: Brian Norris Date: Sat, 28 Feb 2015 02:23:27 -0800 Subject: [PATCH 126/277] UBI: initialize LEB number variable commit f16db8071ce18819fbd705ddcc91c6f392fb61f8 upstream. In some of the 'out_not_moved' error paths, lnum may be used uninitialized. Don't ignore the warning; let's fix it. This uninitialized variable doesn't have much visible effect in the end, since we just schedule the PEB for erasure, and its LEB number doesn't really matter (it just gets printed in debug messages). But let's get it straight anyway. Coverity CID #113449 Signed-off-by: Brian Norris Signed-off-by: Richard Weinberger Signed-off-by: Greg Kroah-Hartman --- drivers/mtd/ubi/wl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/mtd/ubi/wl.c b/drivers/mtd/ubi/wl.c index 49e570abe58b..c08254016fe8 100644 --- a/drivers/mtd/ubi/wl.c +++ b/drivers/mtd/ubi/wl.c @@ -999,7 +999,7 @@ static int wear_leveling_worker(struct ubi_device *ubi, struct ubi_work *wrk, int cancel) { int err, scrubbing = 0, torture = 0, protect = 0, erroneous = 0; - int vol_id = -1, uninitialized_var(lnum); + int vol_id = -1, lnum = -1; #ifdef CONFIG_MTD_UBI_FASTMAP int anchor = wrk->anchor; #endif From a4014203612e80fb6de1e0f3f70de84ad8ba75fa Mon Sep 17 00:00:00 2001 From: Brian Norris Date: Sat, 28 Feb 2015 02:23:28 -0800 Subject: [PATCH 127/277] UBI: fix check for "too many bytes" commit 299d0c5b27346a77a0777c993372bf8777d4f2e5 upstream. The comparison from the previous line seems to have been erroneously (partially) copied-and-pasted onto the next. The second line should be checking req.bytes, not req.lnum. Coverity CID #139400 Signed-off-by: Brian Norris [rw: Fixed comparison] Signed-off-by: Richard Weinberger Signed-off-by: Greg Kroah-Hartman --- drivers/mtd/ubi/cdev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/mtd/ubi/cdev.c b/drivers/mtd/ubi/cdev.c index 4f02848bb2bc..fc764e7976bd 100644 --- a/drivers/mtd/ubi/cdev.c +++ b/drivers/mtd/ubi/cdev.c @@ -475,7 +475,7 @@ static long vol_cdev_ioctl(struct file *file, unsigned int cmd, /* Validate the request */ err = -EINVAL; if (req.lnum < 0 || req.lnum >= vol->reserved_pebs || - req.bytes < 0 || req.lnum >= vol->usable_leb_size) + req.bytes < 0 || req.bytes > vol->usable_leb_size) break; err = get_exclusive(desc); From 7c906f0661e74cb0b1b8872f256e1f0014c82fd4 Mon Sep 17 00:00:00 2001 From: "K. Y. Srinivasan" Date: Fri, 27 Mar 2015 00:27:18 -0700 Subject: [PATCH 128/277] scsi: storvsc: Fix a bug in copy_from_bounce_buffer() commit 8de580742fee8bc34d116f57a20b22b9a5f08403 upstream. We may exit this function without properly freeing up the maapings we may have acquired. Fix the bug. Signed-off-by: K. Y. Srinivasan Reviewed-by: Long Li Signed-off-by: James Bottomley Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/storvsc_drv.c | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/drivers/scsi/storvsc_drv.c b/drivers/scsi/storvsc_drv.c index a8990783ba66..913b91c78a22 100644 --- a/drivers/scsi/storvsc_drv.c +++ b/drivers/scsi/storvsc_drv.c @@ -631,21 +631,22 @@ static unsigned int copy_to_bounce_buffer(struct scatterlist *orig_sgl, if (bounce_sgl[j].length == PAGE_SIZE) { /* full..move to next entry */ sg_kunmap_atomic(bounce_addr); + bounce_addr = 0; j++; - - /* if we need to use another bounce buffer */ - if (srclen || i != orig_sgl_count - 1) - bounce_addr = sg_kmap_atomic(bounce_sgl,j); - - } else if (srclen == 0 && i == orig_sgl_count - 1) { - /* unmap the last bounce that is < PAGE_SIZE */ - sg_kunmap_atomic(bounce_addr); } + + /* if we need to use another bounce buffer */ + if (srclen && bounce_addr == 0) + bounce_addr = sg_kmap_atomic(bounce_sgl, j); + } sg_kunmap_atomic(src_addr - orig_sgl[i].offset); } + if (bounce_addr) + sg_kunmap_atomic(bounce_addr); + local_irq_restore(flags); return total_copied; From 1c9b773d9183be9a8f36a9646910cc37d7578757 Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Wed, 22 Apr 2015 22:23:54 -0700 Subject: [PATCH 129/277] drivers: parport: Kconfig: exclude arm64 for PARPORT_PC Fix build problem seen with arm64:allmodconfig. drivers/parport/parport_pc.c:67:25: fatal error: asm/parport.h: No such file or directory arm64 does not support PARPORT_PC. Signed-off-by: Guenter Roeck Signed-off-by: Greg Kroah-Hartman --- drivers/parport/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/parport/Kconfig b/drivers/parport/Kconfig index 70694ce38be2..46d2de24bf3e 100644 --- a/drivers/parport/Kconfig +++ b/drivers/parport/Kconfig @@ -37,7 +37,7 @@ config PARPORT_PC tristate "PC-style hardware" depends on (!SPARC64 || PCI) && !SPARC32 && !M32R && !FRV && !S390 && \ (!M68K || ISA) && !MN10300 && !AVR32 && !BLACKFIN && \ - !XTENSA && !CRIS && !H8300 + !XTENSA && !CRIS && !H8300 && !ARM64 ---help--- You should say Y here if you have a PC-style parallel port. All From 27b22d0124993f4c321a3462946608dfd3512cda Mon Sep 17 00:00:00 2001 From: Lv Zheng Date: Mon, 13 Apr 2015 11:48:58 +0800 Subject: [PATCH 130/277] ACPICA: Utilities: split IO address types from data type models. commit 2b8760100e1de69b6ff004c986328a82947db4ad upstream. ACPICA commit aacf863cfffd46338e268b7415f7435cae93b451 It is reported that on a physically 64-bit addressed machine, 32-bit kernel can trigger crashes in accessing the memory regions that are beyond the 32-bit boundary. The region field's start address should still be 32-bit compliant, but after a calculation (adding some offsets), it may exceed the 32-bit boundary. This case is rare and buggy, but there are real BIOSes leaked with such issues (see References below). This patch fixes this gap by always defining IO addresses as 64-bit, and allows OSPMs to optimize it for a real 32-bit machine to reduce the size of the internal objects. Internal acpi_physical_address usages in the structures that can be fixed by this change include: 1. struct acpi_object_region: acpi_physical_address address; 2. struct acpi_address_range: acpi_physical_address start_address; acpi_physical_address end_address; 3. struct acpi_mem_space_context; acpi_physical_address address; 4. struct acpi_table_desc acpi_physical_address address; See known issues 1 for other usages. Note that acpi_io_address which is used for ACPI_PROCESSOR may also suffer from same problem, so this patch changes it accordingly. For iasl, it will enforce acpi_physical_address as 32-bit to generate 32-bit OSPM compatible tables on 32-bit platforms, we need to define ACPI_32BIT_PHYSICAL_ADDRESS for it in acenv.h. Known issues: 1. Cleanup of mapped virtual address In struct acpi_mem_space_context, acpi_physical_address is used as a virtual address: acpi_physical_address mapped_physical_address; It is better to introduce acpi_virtual_address or use acpi_size instead. This patch doesn't make such a change. Because this should be done along with a change to acpi_os_map_memory()/acpi_os_unmap_memory(). There should be no functional problem to leave this unchanged except that only this structure is enlarged unexpectedly. Link: https://github.com/acpica/acpica/commit/aacf863c Reference: https://bugzilla.kernel.org/show_bug.cgi?id=87971 Reference: https://bugzilla.kernel.org/show_bug.cgi?id=79501 Reported-and-tested-by: Paul Menzel Reported-and-tested-by: Sial Nije Signed-off-by: Lv Zheng Signed-off-by: Bob Moore Signed-off-by: Rafael J. Wysocki Signed-off-by: Greg Kroah-Hartman --- include/acpi/actypes.h | 20 ++++++++++++++++++++ include/acpi/platform/acenv.h | 1 + 2 files changed, 21 insertions(+) diff --git a/include/acpi/actypes.h b/include/acpi/actypes.h index a64adcc29ae5..f819e813c8ac 100644 --- a/include/acpi/actypes.h +++ b/include/acpi/actypes.h @@ -198,9 +198,29 @@ typedef int INT32; typedef s32 acpi_native_int; typedef u32 acpi_size; + +#ifdef ACPI_32BIT_PHYSICAL_ADDRESS + +/* + * OSPMs can define this to shrink the size of the structures for 32-bit + * none PAE environment. ASL compiler may always define this to generate + * 32-bit OSPM compliant tables. + */ typedef u32 acpi_io_address; typedef u32 acpi_physical_address; +#else /* ACPI_32BIT_PHYSICAL_ADDRESS */ + +/* + * It is reported that, after some calculations, the physical addresses can + * wrap over the 32-bit boundary on 32-bit PAE environment. + * https://bugzilla.kernel.org/show_bug.cgi?id=87971 + */ +typedef u64 acpi_io_address; +typedef u64 acpi_physical_address; + +#endif /* ACPI_32BIT_PHYSICAL_ADDRESS */ + #define ACPI_MAX_PTR ACPI_UINT32_MAX #define ACPI_SIZE_MAX ACPI_UINT32_MAX diff --git a/include/acpi/platform/acenv.h b/include/acpi/platform/acenv.h index ef04b36ca6ed..f7db107abb04 100644 --- a/include/acpi/platform/acenv.h +++ b/include/acpi/platform/acenv.h @@ -76,6 +76,7 @@ #define ACPI_LARGE_NAMESPACE_NODE #define ACPI_DATA_TABLE_DISASSEMBLY #define ACPI_SINGLE_THREADED +#define ACPI_32BIT_PHYSICAL_ADDRESS #endif /* acpi_exec configuration. Multithreaded with full AML debugger */ From e86de8b5eaf5415ab76cce33de00ad21e78c83e7 Mon Sep 17 00:00:00 2001 From: Max Filippov Date: Fri, 27 Feb 2015 06:28:00 +0300 Subject: [PATCH 131/277] xtensa: xtfpga: fix hardware lockup caused by LCD driver commit 4949009eb8d40a441dcddcd96e101e77d31cf1b2 upstream. LCD driver is always built for the XTFPGA platform, but its base address is not configurable, and is wrong for ML605/KC705. Its initialization locks up KC705 board hardware. Make the whole driver optional, and its base address and bus width configurable. Implement 4-bit bus access method. Signed-off-by: Max Filippov Signed-off-by: Greg Kroah-Hartman --- arch/xtensa/Kconfig | 30 ++++++++++ arch/xtensa/platforms/xtfpga/Makefile | 3 +- .../xtfpga/include/platform/hardware.h | 3 - .../platforms/xtfpga/include/platform/lcd.h | 15 +++++ arch/xtensa/platforms/xtfpga/lcd.c | 55 ++++++++++++------- 5 files changed, 81 insertions(+), 25 deletions(-) diff --git a/arch/xtensa/Kconfig b/arch/xtensa/Kconfig index 0a1b95f81a32..2b086a6ae6c7 100644 --- a/arch/xtensa/Kconfig +++ b/arch/xtensa/Kconfig @@ -287,6 +287,36 @@ menu "Executable file formats" source "fs/Kconfig.binfmt" +config XTFPGA_LCD + bool "Enable XTFPGA LCD driver" + depends on XTENSA_PLATFORM_XTFPGA + default n + help + There's a 2x16 LCD on most of XTFPGA boards, kernel may output + progress messages there during bootup/shutdown. It may be useful + during board bringup. + + If unsure, say N. + +config XTFPGA_LCD_BASE_ADDR + hex "XTFPGA LCD base address" + depends on XTFPGA_LCD + default "0x0d0c0000" + help + Base address of the LCD controller inside KIO region. + Different boards from XTFPGA family have LCD controller at different + addresses. Please consult prototyping user guide for your board for + the correct address. Wrong address here may lead to hardware lockup. + +config XTFPGA_LCD_8BIT_ACCESS + bool "Use 8-bit access to XTFPGA LCD" + depends on XTFPGA_LCD + default n + help + LCD may be connected with 4- or 8-bit interface, 8-bit access may + only be used with 8-bit interface. Please consult prototyping user + guide for your board for the correct interface width. + endmenu source "net/Kconfig" diff --git a/arch/xtensa/platforms/xtfpga/Makefile b/arch/xtensa/platforms/xtfpga/Makefile index b9ae206340cd..7839d38b2337 100644 --- a/arch/xtensa/platforms/xtfpga/Makefile +++ b/arch/xtensa/platforms/xtfpga/Makefile @@ -6,4 +6,5 @@ # # Note 2! The CFLAGS definitions are in the main makefile... -obj-y = setup.o lcd.o +obj-y += setup.o +obj-$(CONFIG_XTFPGA_LCD) += lcd.o diff --git a/arch/xtensa/platforms/xtfpga/include/platform/hardware.h b/arch/xtensa/platforms/xtfpga/include/platform/hardware.h index 4416773cbde5..b39fbcf5c611 100644 --- a/arch/xtensa/platforms/xtfpga/include/platform/hardware.h +++ b/arch/xtensa/platforms/xtfpga/include/platform/hardware.h @@ -44,9 +44,6 @@ /* UART */ #define DUART16552_PADDR (XCHAL_KIO_PADDR + 0x0D050020) -/* LCD instruction and data addresses. */ -#define LCD_INSTR_ADDR ((char *)IOADDR(0x0D040000)) -#define LCD_DATA_ADDR ((char *)IOADDR(0x0D040004)) /* Misc. */ #define XTFPGA_FPGAREGS_VADDR IOADDR(0x0D020000) diff --git a/arch/xtensa/platforms/xtfpga/include/platform/lcd.h b/arch/xtensa/platforms/xtfpga/include/platform/lcd.h index 0e435645af5a..4c8541ed1139 100644 --- a/arch/xtensa/platforms/xtfpga/include/platform/lcd.h +++ b/arch/xtensa/platforms/xtfpga/include/platform/lcd.h @@ -11,10 +11,25 @@ #ifndef __XTENSA_XTAVNET_LCD_H #define __XTENSA_XTAVNET_LCD_H +#ifdef CONFIG_XTFPGA_LCD /* Display string STR at position POS on the LCD. */ void lcd_disp_at_pos(char *str, unsigned char pos); /* Shift the contents of the LCD display left or right. */ void lcd_shiftleft(void); void lcd_shiftright(void); +#else +static inline void lcd_disp_at_pos(char *str, unsigned char pos) +{ +} + +static inline void lcd_shiftleft(void) +{ +} + +static inline void lcd_shiftright(void) +{ +} +#endif + #endif diff --git a/arch/xtensa/platforms/xtfpga/lcd.c b/arch/xtensa/platforms/xtfpga/lcd.c index 2872301598df..4dc0c1b43f4b 100644 --- a/arch/xtensa/platforms/xtfpga/lcd.c +++ b/arch/xtensa/platforms/xtfpga/lcd.c @@ -1,50 +1,63 @@ /* - * Driver for the LCD display on the Tensilica LX60 Board. + * Driver for the LCD display on the Tensilica XTFPGA board family. + * http://www.mytechcorp.com/cfdata/productFile/File1/MOC-16216B-B-A0A04.pdf * * This file is subject to the terms and conditions of the GNU General Public * License. See the file "COPYING" in the main directory of this archive * for more details. * * Copyright (C) 2001, 2006 Tensilica Inc. + * Copyright (C) 2015 Cadence Design Systems Inc. */ -/* - * - * FIXME: this code is from the examples from the LX60 user guide. - * - * The lcd_pause function does busy waiting, which is probably not - * great. Maybe the code could be changed to use kernel timers, or - * change the hardware to not need to wait. - */ - +#include #include #include #include #include -#include -#define LCD_PAUSE_ITERATIONS 4000 +/* LCD instruction and data addresses. */ +#define LCD_INSTR_ADDR ((char *)IOADDR(CONFIG_XTFPGA_LCD_BASE_ADDR)) +#define LCD_DATA_ADDR (LCD_INSTR_ADDR + 4) + #define LCD_CLEAR 0x1 #define LCD_DISPLAY_ON 0xc /* 8bit and 2 lines display */ #define LCD_DISPLAY_MODE8BIT 0x38 +#define LCD_DISPLAY_MODE4BIT 0x28 #define LCD_DISPLAY_POS 0x80 #define LCD_SHIFT_LEFT 0x18 #define LCD_SHIFT_RIGHT 0x1c +static void lcd_put_byte(u8 *addr, u8 data) +{ +#ifdef CONFIG_XTFPGA_LCD_8BIT_ACCESS + ACCESS_ONCE(*addr) = data; +#else + ACCESS_ONCE(*addr) = data & 0xf0; + ACCESS_ONCE(*addr) = (data << 4) & 0xf0; +#endif +} + static int __init lcd_init(void) { - *LCD_INSTR_ADDR = LCD_DISPLAY_MODE8BIT; + ACCESS_ONCE(*LCD_INSTR_ADDR) = LCD_DISPLAY_MODE8BIT; mdelay(5); - *LCD_INSTR_ADDR = LCD_DISPLAY_MODE8BIT; + ACCESS_ONCE(*LCD_INSTR_ADDR) = LCD_DISPLAY_MODE8BIT; udelay(200); - *LCD_INSTR_ADDR = LCD_DISPLAY_MODE8BIT; + ACCESS_ONCE(*LCD_INSTR_ADDR) = LCD_DISPLAY_MODE8BIT; udelay(50); - *LCD_INSTR_ADDR = LCD_DISPLAY_ON; +#ifndef CONFIG_XTFPGA_LCD_8BIT_ACCESS + ACCESS_ONCE(*LCD_INSTR_ADDR) = LCD_DISPLAY_MODE4BIT; udelay(50); - *LCD_INSTR_ADDR = LCD_CLEAR; + lcd_put_byte(LCD_INSTR_ADDR, LCD_DISPLAY_MODE4BIT); + udelay(50); +#endif + lcd_put_byte(LCD_INSTR_ADDR, LCD_DISPLAY_ON); + udelay(50); + lcd_put_byte(LCD_INSTR_ADDR, LCD_CLEAR); mdelay(10); lcd_disp_at_pos("XTENSA LINUX", 0); return 0; @@ -52,10 +65,10 @@ static int __init lcd_init(void) void lcd_disp_at_pos(char *str, unsigned char pos) { - *LCD_INSTR_ADDR = LCD_DISPLAY_POS | pos; + lcd_put_byte(LCD_INSTR_ADDR, LCD_DISPLAY_POS | pos); udelay(100); while (*str != 0) { - *LCD_DATA_ADDR = *str; + lcd_put_byte(LCD_DATA_ADDR, *str); udelay(200); str++; } @@ -63,13 +76,13 @@ void lcd_disp_at_pos(char *str, unsigned char pos) void lcd_shiftleft(void) { - *LCD_INSTR_ADDR = LCD_SHIFT_LEFT; + lcd_put_byte(LCD_INSTR_ADDR, LCD_SHIFT_LEFT); udelay(50); } void lcd_shiftright(void) { - *LCD_INSTR_ADDR = LCD_SHIFT_RIGHT; + lcd_put_byte(LCD_INSTR_ADDR, LCD_SHIFT_RIGHT); udelay(50); } From a2ed2ed6fe49a3ffe9fba835c3a11c0e73d0c4cc Mon Sep 17 00:00:00 2001 From: Max Filippov Date: Fri, 27 Feb 2015 11:02:38 +0300 Subject: [PATCH 132/277] xtensa: provide __NR_sync_file_range2 instead of __NR_sync_file_range commit 01e84c70fe40c8111f960987bcf7f931842e6d07 upstream. xtensa actually uses sync_file_range2 implementation, so it should define __NR_sync_file_range2 as other architectures that use that function. That fixes userspace interface (that apparently never worked) and avoids special-casing xtensa in libc implementations. See the thread ending at http://lists.busybox.net/pipermail/uclibc/2015-February/048833.html for more details. Signed-off-by: Max Filippov Signed-off-by: Greg Kroah-Hartman --- arch/xtensa/include/uapi/asm/unistd.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/xtensa/include/uapi/asm/unistd.h b/arch/xtensa/include/uapi/asm/unistd.h index 513effd48060..d07c1886bc8f 100644 --- a/arch/xtensa/include/uapi/asm/unistd.h +++ b/arch/xtensa/include/uapi/asm/unistd.h @@ -715,7 +715,7 @@ __SYSCALL(323, sys_process_vm_writev, 6) __SYSCALL(324, sys_name_to_handle_at, 5) #define __NR_open_by_handle_at 325 __SYSCALL(325, sys_open_by_handle_at, 3) -#define __NR_sync_file_range 326 +#define __NR_sync_file_range2 326 __SYSCALL(326, sys_sync_file_range2, 6) #define __NR_perf_event_open 327 __SYSCALL(327, sys_perf_event_open, 5) From b020a4676df1d550f893801f6c722845c2f6d5b3 Mon Sep 17 00:00:00 2001 From: "K. Y. Srinivasan" Date: Fri, 27 Feb 2015 11:26:04 -0800 Subject: [PATCH 133/277] Drivers: hv: vmbus: Fix a bug in the error path in vmbus_open() commit 40384e4bbeb9f2651fe9bffc0062d9f31ef625bf upstream. Correctly rollback state if the failure occurs after we have handed over the ownership of the buffer to the host. Signed-off-by: K. Y. Srinivasan Signed-off-by: Greg Kroah-Hartman --- drivers/hv/channel.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/hv/channel.c b/drivers/hv/channel.c index 92f34de7aee9..05e6a7d13d4e 100644 --- a/drivers/hv/channel.c +++ b/drivers/hv/channel.c @@ -169,7 +169,7 @@ int vmbus_open(struct vmbus_channel *newchannel, u32 send_ringbuffer_size, GFP_KERNEL); if (!open_info) { err = -ENOMEM; - goto error0; + goto error_gpadl; } init_completion(&open_info->waitevent); @@ -185,7 +185,7 @@ int vmbus_open(struct vmbus_channel *newchannel, u32 send_ringbuffer_size, if (userdatalen > MAX_USER_DEFINED_BYTES) { err = -EINVAL; - goto error0; + goto error_gpadl; } if (userdatalen) @@ -226,6 +226,9 @@ int vmbus_open(struct vmbus_channel *newchannel, u32 send_ringbuffer_size, list_del(&open_info->msglistentry); spin_unlock_irqrestore(&vmbus_connection.channelmsg_lock, flags); +error_gpadl: + vmbus_teardown_gpadl(newchannel, newchannel->ringbuffer_gpadlhandle); + error0: free_pages((unsigned long)out, get_order(send_ringbuffer_size + recv_ringbuffer_size)); From 62d37cc41178d2f0f8df6785e7a89e123154b278 Mon Sep 17 00:00:00 2001 From: James Bottomley Date: Wed, 15 Apr 2015 22:16:01 -0700 Subject: [PATCH 134/277] mvsas: fix panic on expander attached SATA devices commit 56cbd0ccc1b508de19561211d7ab9e1c77e6b384 upstream. mvsas is giving a General protection fault when it encounters an expander attached ATA device. Analysis of mvs_task_prep_ata() shows that the driver is assuming all ATA devices are locally attached and obtaining the phy mask by indexing the local phy table (in the HBA structure) with the phy id. Since expanders have many more phys than the HBA, this is causing the index into the HBA phy table to overflow and returning rubbish as the pointer. mvs_task_prep_ssp() instead does the phy mask using the port properties. Mirror this in mvs_task_prep_ata() to fix the panic. Reported-by: Adam Talbot Tested-by: Adam Talbot Signed-off-by: James Bottomley Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/mvsas/mv_sas.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/scsi/mvsas/mv_sas.c b/drivers/scsi/mvsas/mv_sas.c index c9e244984e30..fa50c7dc3d3e 100644 --- a/drivers/scsi/mvsas/mv_sas.c +++ b/drivers/scsi/mvsas/mv_sas.c @@ -441,14 +441,11 @@ static u32 mvs_get_ncq_tag(struct sas_task *task, u32 *tag) static int mvs_task_prep_ata(struct mvs_info *mvi, struct mvs_task_exec_info *tei) { - struct sas_ha_struct *sha = mvi->sas; struct sas_task *task = tei->task; struct domain_device *dev = task->dev; struct mvs_device *mvi_dev = dev->lldd_dev; struct mvs_cmd_hdr *hdr = tei->hdr; struct asd_sas_port *sas_port = dev->port; - struct sas_phy *sphy = dev->phy; - struct asd_sas_phy *sas_phy = sha->sas_phy[sphy->number]; struct mvs_slot_info *slot; void *buf_prd; u32 tag = tei->tag, hdr_tag; @@ -468,7 +465,7 @@ static int mvs_task_prep_ata(struct mvs_info *mvi, slot->tx = mvi->tx_prod; del_q = TXQ_MODE_I | tag | (TXQ_CMD_STP << TXQ_CMD_SHIFT) | - (MVS_PHY_ID << TXQ_PHY_SHIFT) | + ((sas_port->phy_mask & TXQ_PHY_MASK) << TXQ_PHY_SHIFT) | (mvi_dev->taskfileset << TXQ_SRS_SHIFT); mvi->tx[mvi->tx_prod] = cpu_to_le32(del_q); From 035d9f212c511009f13a6b26d9a3c2777d414804 Mon Sep 17 00:00:00 2001 From: Ezequiel Garcia Date: Tue, 10 Mar 2015 11:37:14 -0300 Subject: [PATCH 135/277] stk1160: Make sure current buffer is released commit aeff09276748b66072f2db2e668cec955cf41959 upstream. The available (i.e. not used) buffers are returned by stk1160_clear_queue(), on the stop_streaming() path. However, this is insufficient and the current buffer must be released as well. Fix it. Signed-off-by: Ezequiel Garcia Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Greg Kroah-Hartman --- drivers/media/usb/stk1160/stk1160-v4l.c | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/drivers/media/usb/stk1160/stk1160-v4l.c b/drivers/media/usb/stk1160/stk1160-v4l.c index a59153d2f8bf..518a5299ff0b 100644 --- a/drivers/media/usb/stk1160/stk1160-v4l.c +++ b/drivers/media/usb/stk1160/stk1160-v4l.c @@ -245,6 +245,11 @@ static int stk1160_stop_streaming(struct stk1160 *dev) if (mutex_lock_interruptible(&dev->v4l_lock)) return -ERESTARTSYS; + /* + * Once URBs are cancelled, the URB complete handler + * won't be running. This is required to safely release the + * current buffer (dev->isoc_ctl.buf). + */ stk1160_cancel_isoc(dev); /* @@ -665,8 +670,16 @@ void stk1160_clear_queue(struct stk1160 *dev) stk1160_info("buffer [%p/%d] aborted\n", buf, buf->vb.v4l2_buf.index); } - /* It's important to clear current buffer */ - dev->isoc_ctl.buf = NULL; + + /* It's important to release the current buffer */ + if (dev->isoc_ctl.buf) { + buf = dev->isoc_ctl.buf; + dev->isoc_ctl.buf = NULL; + + vb2_buffer_done(&buf->vb, VB2_BUF_STATE_ERROR); + stk1160_info("buffer [%p/%d] aborted\n", + buf, buf->vb.v4l2_buf.index); + } spin_unlock_irqrestore(&dev->buf_lock, flags); } From b55c80ba21ce7cd6aed4323c2b86552434b44fd6 Mon Sep 17 00:00:00 2001 From: Yann Droneaud Date: Mon, 13 Apr 2015 14:56:22 +0200 Subject: [PATCH 136/277] IB/core: disallow registering 0-sized memory region commit 8abaae62f3fdead8f4ce0ab46b4ab93dee39bab2 upstream. If ib_umem_get() is called with a size equal to 0 and an non-page aligned address, one page will be pinned and a 0-sized umem will be returned to the caller. This should not be allowed: it's not expected for a memory region to have a size equal to 0. This patch adds a check to explicitly refuse to register a 0-sized region. Link: http://mid.gmane.org/cover.1428929103.git.ydroneaud@opteya.com Cc: Shachar Raindel Cc: Jack Morgenstein Cc: Or Gerlitz Signed-off-by: Yann Droneaud Signed-off-by: Doug Ledford Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/core/umem.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/infiniband/core/umem.c b/drivers/infiniband/core/umem.c index 055ebebc07dd..dccb9aac35c3 100644 --- a/drivers/infiniband/core/umem.c +++ b/drivers/infiniband/core/umem.c @@ -94,6 +94,9 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr, if (dmasync) dma_set_attr(DMA_ATTR_WRITE_BARRIER, &attrs); + if (!size) + return ERR_PTR(-EINVAL); + /* * If the combination of the addr and size requested for this memory * region causes an integer overflow, return error. From d016c609f3165cfca74504c55ae55b2030d8a9e3 Mon Sep 17 00:00:00 2001 From: Yann Droneaud Date: Mon, 13 Apr 2015 14:56:23 +0200 Subject: [PATCH 137/277] IB/core: don't disallow registering region starting at 0x0 commit 66578b0b2f69659f00b6169e6fe7377c4b100d18 upstream. In a call to ib_umem_get(), if address is 0x0 and size is already page aligned, check added in commit 8494057ab5e4 ("IB/uverbs: Prevent integer overflow in ib_umem_get address arithmetic") will refuse to register a memory region that could otherwise be valid (provided vm.mmap_min_addr sysctl and mmap_low_allowed SELinux knobs allow userspace to map something at address 0x0). This patch allows back such registration: ib_umem_get() should probably don't care of the base address provided it can be pinned with get_user_pages(). There's two possible overflows, in (addr + size) and in PAGE_ALIGN(addr + size), this patch keep ensuring none of them happen while allowing to pin memory at address 0x0. Anyway, the case of size equal 0 is no more (partially) handled as 0-length memory region are disallowed by an earlier check. Link: http://mid.gmane.org/cover.1428929103.git.ydroneaud@opteya.com Cc: Shachar Raindel Cc: Jack Morgenstein Cc: Or Gerlitz Signed-off-by: Yann Droneaud Reviewed-by: Sagi Grimberg Reviewed-by: Haggai Eran Signed-off-by: Doug Ledford Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/core/umem.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/core/umem.c b/drivers/infiniband/core/umem.c index dccb9aac35c3..c1fef27010d4 100644 --- a/drivers/infiniband/core/umem.c +++ b/drivers/infiniband/core/umem.c @@ -101,8 +101,8 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr, * If the combination of the addr and size requested for this memory * region causes an integer overflow, return error. */ - if ((PAGE_ALIGN(addr + size) <= size) || - (PAGE_ALIGN(addr + size) <= addr)) + if (((addr + size) < addr) || + PAGE_ALIGN(addr + size) < (addr + size)) return ERR_PTR(-EINVAL); if (!can_do_mlock()) From 90615370309b5bb32962b50a822730dda467e7f5 Mon Sep 17 00:00:00 2001 From: Erez Shitrit Date: Thu, 2 Apr 2015 13:39:05 +0300 Subject: [PATCH 138/277] IB/mlx4: Fix WQE LSO segment calculation commit ca9b590caa17bcbbea119594992666e96cde9c2f upstream. The current code decreases from the mss size (which is the gso_size from the kernel skb) the size of the packet headers. It shouldn't do that because the mss that comes from the stack (e.g IPoIB) includes only the tcp payload without the headers. The result is indication to the HW that each packet that the HW sends is smaller than what it could be, and too many packets will be sent for big messages. An easy way to demonstrate one more aspect of the problem is by configuring the ipoib mtu to be less than 2*hlen (2*56) and then run app sending big TCP messages. This will tell the HW to send packets with giant (negative value which under unsigned arithmetics becomes a huge positive one) length and the QP moves to SQE state. Fixes: b832be1e4007 ('IB/mlx4: Add IPoIB LSO support') Reported-by: Matthew Finlay Signed-off-by: Erez Shitrit Signed-off-by: Or Gerlitz Signed-off-by: Doug Ledford Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/hw/mlx4/qp.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c index 4f10af2905b5..262a18437ceb 100644 --- a/drivers/infiniband/hw/mlx4/qp.c +++ b/drivers/infiniband/hw/mlx4/qp.c @@ -2174,8 +2174,7 @@ static int build_lso_seg(struct mlx4_wqe_lso_seg *wqe, struct ib_send_wr *wr, memcpy(wqe->header, wr->wr.ud.header, wr->wr.ud.hlen); - *lso_hdr_sz = cpu_to_be32((wr->wr.ud.mss - wr->wr.ud.hlen) << 16 | - wr->wr.ud.hlen); + *lso_hdr_sz = cpu_to_be32(wr->wr.ud.mss << 16 | wr->wr.ud.hlen); *lso_seg_len = halign; return 0; } From 0001a0ca47f3d5e7c97ae4e984166175b863c698 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Wed, 15 Apr 2015 19:18:39 +0100 Subject: [PATCH 139/277] i2c: core: Export bus recovery functions commit c1c21f4e60ed4523292f1a89ff45a208bddd3849 upstream. Current -next fails to link an ARM allmodconfig because drivers that use the core recovery functions can be built as modules but those functions are not exported: ERROR: "i2c_generic_gpio_recovery" [drivers/i2c/busses/i2c-davinci.ko] undefined! ERROR: "i2c_generic_scl_recovery" [drivers/i2c/busses/i2c-davinci.ko] undefined! ERROR: "i2c_recover_bus" [drivers/i2c/busses/i2c-davinci.ko] undefined! Add exports to fix this. Fixes: 5f9296ba21b3c (i2c: Add bus recovery infrastructure) Signed-off-by: Mark Brown Signed-off-by: Wolfram Sang Signed-off-by: Greg Kroah-Hartman --- drivers/i2c/i2c-core.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/i2c/i2c-core.c b/drivers/i2c/i2c-core.c index 48e31ed69dbf..9d539cbfc833 100644 --- a/drivers/i2c/i2c-core.c +++ b/drivers/i2c/i2c-core.c @@ -206,6 +206,7 @@ int i2c_generic_scl_recovery(struct i2c_adapter *adap) adap->bus_recovery_info->set_scl(adap, 1); return i2c_generic_recovery(adap); } +EXPORT_SYMBOL_GPL(i2c_generic_scl_recovery); int i2c_generic_gpio_recovery(struct i2c_adapter *adap) { @@ -220,6 +221,7 @@ int i2c_generic_gpio_recovery(struct i2c_adapter *adap) return ret; } +EXPORT_SYMBOL_GPL(i2c_generic_gpio_recovery); int i2c_recover_bus(struct i2c_adapter *adap) { @@ -229,6 +231,7 @@ int i2c_recover_bus(struct i2c_adapter *adap) dev_dbg(&adap->dev, "Trying i2c bus recovery\n"); return adap->bus_recovery_info->recover_bus(adap); } +EXPORT_SYMBOL_GPL(i2c_recover_bus); static int i2c_device_probe(struct device *dev) { From 2b239a97fc65379521fc92586654bcf9a230d878 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Tue, 24 Feb 2015 11:29:21 -0500 Subject: [PATCH 140/277] drm/radeon: fix doublescan modes (v2) commit fd99a0943ffaa0320ea4f69d09ed188f950c0432 upstream. Use the correct flags for atom. v2: handle DRM_MODE_FLAG_DBLCLK Signed-off-by: Alex Deucher Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/radeon/atombios_crtc.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/radeon/atombios_crtc.c b/drivers/gpu/drm/radeon/atombios_crtc.c index 971dd8795b68..8ac333094991 100644 --- a/drivers/gpu/drm/radeon/atombios_crtc.c +++ b/drivers/gpu/drm/radeon/atombios_crtc.c @@ -312,8 +312,10 @@ atombios_set_crtc_dtd_timing(struct drm_crtc *crtc, misc |= ATOM_COMPOSITESYNC; if (mode->flags & DRM_MODE_FLAG_INTERLACE) misc |= ATOM_INTERLACE; - if (mode->flags & DRM_MODE_FLAG_DBLSCAN) + if (mode->flags & DRM_MODE_FLAG_DBLCLK) misc |= ATOM_DOUBLE_CLOCK_MODE; + if (mode->flags & DRM_MODE_FLAG_DBLSCAN) + misc |= ATOM_H_REPLICATIONBY2 | ATOM_V_REPLICATIONBY2; args.susModeMiscInfo.usAccess = cpu_to_le16(misc); args.ucCRTC = radeon_crtc->crtc_id; @@ -356,8 +358,10 @@ static void atombios_crtc_set_timing(struct drm_crtc *crtc, misc |= ATOM_COMPOSITESYNC; if (mode->flags & DRM_MODE_FLAG_INTERLACE) misc |= ATOM_INTERLACE; - if (mode->flags & DRM_MODE_FLAG_DBLSCAN) + if (mode->flags & DRM_MODE_FLAG_DBLCLK) misc |= ATOM_DOUBLE_CLOCK_MODE; + if (mode->flags & DRM_MODE_FLAG_DBLSCAN) + misc |= ATOM_H_REPLICATIONBY2 | ATOM_V_REPLICATIONBY2; args.susModeMiscInfo.usAccess = cpu_to_le16(misc); args.ucCRTC = radeon_crtc->crtc_id; From 10e30152633a23d3da2326ff23f4b0b5fc1e94ce Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Tue, 21 Apr 2015 09:49:11 -0700 Subject: [PATCH 141/277] drm/i915: cope with large i2c transfers commit 9535c4757b881e06fae72a857485ad57c422b8d2 upstream. The hardware, according to the specs, is limited to 256 byte transfers, and current driver has no protections in case users attempt to do larger transfers. The code will just stomp over status register and mayhem ensues. Let's split larger transfers into digestable chunks. Doing this allows Atmel MXT driver on Pixel 1 function properly (it hasn't since commit 9d8dc3e529a19e427fd379118acd132520935c5d "Input: atmel_mxt_ts - implement T44 message handling" which tries to consume multiple touchscreen/touchpad reports in a single transaction). Reviewed-by: Chris Wilson Signed-off-by: Dmitry Torokhov Signed-off-by: Jani Nikula Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/i915/i915_reg.h | 1 + drivers/gpu/drm/i915/intel_i2c.c | 66 +++++++++++++++++++++++++++----- 2 files changed, 57 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 7695b5dd9d2d..35287ab445cd 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -909,6 +909,7 @@ #define GMBUS_CYCLE_INDEX (2<<25) #define GMBUS_CYCLE_STOP (4<<25) #define GMBUS_BYTE_COUNT_SHIFT 16 +#define GMBUS_BYTE_COUNT_MAX 256U #define GMBUS_SLAVE_INDEX_SHIFT 8 #define GMBUS_SLAVE_ADDR_SHIFT 1 #define GMBUS_SLAVE_READ (1<<0) diff --git a/drivers/gpu/drm/i915/intel_i2c.c b/drivers/gpu/drm/i915/intel_i2c.c index 639fe192997c..4a21e13cc58c 100644 --- a/drivers/gpu/drm/i915/intel_i2c.c +++ b/drivers/gpu/drm/i915/intel_i2c.c @@ -276,18 +276,17 @@ gmbus_wait_idle(struct drm_i915_private *dev_priv) } static int -gmbus_xfer_read(struct drm_i915_private *dev_priv, struct i2c_msg *msg, - u32 gmbus1_index) +gmbus_xfer_read_chunk(struct drm_i915_private *dev_priv, + unsigned short addr, u8 *buf, unsigned int len, + u32 gmbus1_index) { int reg_offset = dev_priv->gpio_mmio_base; - u16 len = msg->len; - u8 *buf = msg->buf; I915_WRITE(GMBUS1 + reg_offset, gmbus1_index | GMBUS_CYCLE_WAIT | (len << GMBUS_BYTE_COUNT_SHIFT) | - (msg->addr << GMBUS_SLAVE_ADDR_SHIFT) | + (addr << GMBUS_SLAVE_ADDR_SHIFT) | GMBUS_SLAVE_READ | GMBUS_SW_RDY); while (len) { int ret; @@ -309,11 +308,35 @@ gmbus_xfer_read(struct drm_i915_private *dev_priv, struct i2c_msg *msg, } static int -gmbus_xfer_write(struct drm_i915_private *dev_priv, struct i2c_msg *msg) +gmbus_xfer_read(struct drm_i915_private *dev_priv, struct i2c_msg *msg, + u32 gmbus1_index) +{ + u8 *buf = msg->buf; + unsigned int rx_size = msg->len; + unsigned int len; + int ret; + + do { + len = min(rx_size, GMBUS_BYTE_COUNT_MAX); + + ret = gmbus_xfer_read_chunk(dev_priv, msg->addr, + buf, len, gmbus1_index); + if (ret) + return ret; + + rx_size -= len; + buf += len; + } while (rx_size != 0); + + return 0; +} + +static int +gmbus_xfer_write_chunk(struct drm_i915_private *dev_priv, + unsigned short addr, u8 *buf, unsigned int len) { int reg_offset = dev_priv->gpio_mmio_base; - u16 len = msg->len; - u8 *buf = msg->buf; + unsigned int chunk_size = len; u32 val, loop; val = loop = 0; @@ -325,8 +348,8 @@ gmbus_xfer_write(struct drm_i915_private *dev_priv, struct i2c_msg *msg) I915_WRITE(GMBUS3 + reg_offset, val); I915_WRITE(GMBUS1 + reg_offset, GMBUS_CYCLE_WAIT | - (msg->len << GMBUS_BYTE_COUNT_SHIFT) | - (msg->addr << GMBUS_SLAVE_ADDR_SHIFT) | + (chunk_size << GMBUS_BYTE_COUNT_SHIFT) | + (addr << GMBUS_SLAVE_ADDR_SHIFT) | GMBUS_SLAVE_WRITE | GMBUS_SW_RDY); while (len) { int ret; @@ -343,6 +366,29 @@ gmbus_xfer_write(struct drm_i915_private *dev_priv, struct i2c_msg *msg) if (ret) return ret; } + + return 0; +} + +static int +gmbus_xfer_write(struct drm_i915_private *dev_priv, struct i2c_msg *msg) +{ + u8 *buf = msg->buf; + unsigned int tx_size = msg->len; + unsigned int len; + int ret; + + do { + len = min(tx_size, GMBUS_BYTE_COUNT_MAX); + + ret = gmbus_xfer_write_chunk(dev_priv, msg->addr, buf, len); + if (ret) + return ret; + + buf += len; + tx_size -= len; + } while (tx_size != 0); + return 0; } From 3b388f33a1fc643f5e9dc496c741eafdf6ebef49 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 24 Apr 2015 15:47:07 -0400 Subject: [PATCH 142/277] RCU pathwalk breakage when running into a symlink overmounting something commit 3cab989afd8d8d1bc3d99fef0e7ed87c31e7b647 upstream. Calling unlazy_walk() in walk_component() and do_last() when we find a symlink that needs to be followed doesn't acquire a reference to vfsmount. That's fine when the symlink is on the same vfsmount as the parent directory (which is almost always the case), but it's not always true - one _can_ manage to bind a symlink on top of something. And in such cases we end up with excessive mntput(). Signed-off-by: Al Viro Signed-off-by: Greg Kroah-Hartman --- fs/namei.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/fs/namei.c b/fs/namei.c index f7c4393f8535..036c21246d6a 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -1542,7 +1542,8 @@ static inline int walk_component(struct nameidata *nd, struct path *path, if (should_follow_link(inode, follow)) { if (nd->flags & LOOKUP_RCU) { - if (unlikely(unlazy_walk(nd, path->dentry))) { + if (unlikely(nd->path.mnt != path->mnt || + unlazy_walk(nd, path->dentry))) { err = -ECHILD; goto out_err; } @@ -2824,7 +2825,8 @@ static int do_last(struct nameidata *nd, struct path *path, if (should_follow_link(inode, !symlink_ok)) { if (nd->flags & LOOKUP_RCU) { - if (unlikely(unlazy_walk(nd, path->dentry))) { + if (unlikely(nd->path.mnt != path->mnt || + unlazy_walk(nd, path->dentry))) { error = -ECHILD; goto out; } From 61ea92b94820a4728f6ad1316a22d9dc0b9b4289 Mon Sep 17 00:00:00 2001 From: Calvin Owens Date: Tue, 13 Jan 2015 13:16:18 -0800 Subject: [PATCH 143/277] ksoftirqd: Enable IRQs and call cond_resched() before poking RCU commit 28423ad283d5348793b0c45cc9b1af058e776fd6 upstream. While debugging an issue with excessive softirq usage, I encountered the following note in commit 3e339b5dae24a706 ("softirq: Use hotplug thread infrastructure"): [ paulmck: Call rcu_note_context_switch() with interrupts enabled. ] ...but despite this note, the patch still calls RCU with IRQs disabled. This seemingly innocuous change caused a significant regression in softirq CPU usage on the sending side of a large TCP transfer (~1 GB/s): when introducing 0.01% packet loss, the softirq usage would jump to around 25%, spiking as high as 50%. Before the change, the usage would never exceed 5%. Moving the call to rcu_note_context_switch() after the cond_sched() call, as it was originally before the hotplug patch, completely eliminated this problem. Signed-off-by: Calvin Owens Signed-off-by: Paul E. McKenney Signed-off-by: Mike Galbraith Signed-off-by: Greg Kroah-Hartman --- kernel/softirq.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/kernel/softirq.c b/kernel/softirq.c index 787b3a032429..21956f00cb51 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c @@ -774,9 +774,13 @@ static void run_ksoftirqd(unsigned int cpu) local_irq_disable(); if (local_softirq_pending()) { __do_softirq(); - rcu_note_context_switch(cpu); local_irq_enable(); cond_resched(); + + preempt_disable(); + rcu_note_context_switch(cpu); + preempt_enable(); + return; } local_irq_enable(); From 9923e74aefabc4a73e572dc16e2999625885b1e7 Mon Sep 17 00:00:00 2001 From: Sabrina Dubroca Date: Thu, 26 Feb 2015 05:35:41 +0000 Subject: [PATCH 144/277] e1000: add dummy allocator to fix race condition between mtu change and netpoll commit 08e8331654d1d7b2c58045e549005bc356aa7810 upstream. There is a race condition between e1000_change_mtu's cleanups and netpoll, when we change the MTU across jumbo size: Changing MTU frees all the rx buffers: e1000_change_mtu -> e1000_down -> e1000_clean_all_rx_rings -> e1000_clean_rx_ring Then, close to the end of e1000_change_mtu: pr_info -> ... -> netpoll_poll_dev -> e1000_clean -> e1000_clean_rx_irq -> e1000_alloc_rx_buffers -> e1000_alloc_frag And when we come back to do the rest of the MTU change: e1000_up -> e1000_configure -> e1000_configure_rx -> e1000_alloc_jumbo_rx_buffers alloc_jumbo finds the buffers already != NULL, since data (shared with page in e1000_rx_buffer->rxbuf) has been re-alloc'd, but it's garbage, or at least not what is expected when in jumbo state. This results in an unusable adapter (packets don't get through), and a NULL pointer dereference on the next call to e1000_clean_rx_ring (other mtu change, link down, shutdown): BUG: unable to handle kernel NULL pointer dereference at (null) IP: [] put_compound_page+0x7e/0x330 [...] Call Trace: [] put_page+0x55/0x60 [] e1000_clean_rx_ring+0x134/0x200 [] e1000_clean_all_rx_rings+0x45/0x60 [] e1000_down+0x1c0/0x1d0 [] ? deactivate_slab+0x7f0/0x840 [] e1000_change_mtu+0xdc/0x170 [] dev_set_mtu+0xa0/0x140 [] do_setlink+0x218/0xac0 [] ? nla_parse+0xb9/0x120 [] rtnl_newlink+0x6d0/0x890 [] ? kvm_clock_read+0x20/0x40 [] ? sched_clock_cpu+0xa8/0x100 [] rtnetlink_rcv_msg+0x92/0x260 By setting the allocator to a dummy version, netpoll can't mess up our rx buffers. The allocator is set back to a sane value in e1000_configure_rx. Fixes: edbbb3ca1077 ("e1000: implement jumbo receive with partial descriptors") Signed-off-by: Sabrina Dubroca Tested-by: Aaron Brown Signed-off-by: Jeff Kirsher Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/intel/e1000/e1000_main.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/e1000/e1000_main.c b/drivers/net/ethernet/intel/e1000/e1000_main.c index 59ad007dd5aa..a978fc82ceb5 100644 --- a/drivers/net/ethernet/intel/e1000/e1000_main.c +++ b/drivers/net/ethernet/intel/e1000/e1000_main.c @@ -144,6 +144,11 @@ static bool e1000_clean_rx_irq(struct e1000_adapter *adapter, static bool e1000_clean_jumbo_rx_irq(struct e1000_adapter *adapter, struct e1000_rx_ring *rx_ring, int *work_done, int work_to_do); +static void e1000_alloc_dummy_rx_buffers(struct e1000_adapter *adapter, + struct e1000_rx_ring *rx_ring, + int cleaned_count) +{ +} static void e1000_alloc_rx_buffers(struct e1000_adapter *adapter, struct e1000_rx_ring *rx_ring, int cleaned_count); @@ -3555,8 +3560,11 @@ static int e1000_change_mtu(struct net_device *netdev, int new_mtu) msleep(1); /* e1000_down has a dependency on max_frame_size */ hw->max_frame_size = max_frame; - if (netif_running(netdev)) + if (netif_running(netdev)) { + /* prevent buffers from being reallocated */ + adapter->alloc_rx_buf = e1000_alloc_dummy_rx_buffers; e1000_down(adapter); + } /* NOTE: netdev_alloc_skb reserves 16 bytes, and typically NET_IP_ALIGN * means we reserve 2 more, this pushes us to allocate from the next From e60e4dc082ca9c63d2113be4be5ac4cf3fd2f2a8 Mon Sep 17 00:00:00 2001 From: mancha security Date: Wed, 18 Mar 2015 18:47:25 +0100 Subject: [PATCH 145/277] lib: memzero_explicit: use barrier instead of OPTIMIZER_HIDE_VAR commit 0b053c9518292705736329a8fe20ef4686ffc8e9 upstream. OPTIMIZER_HIDE_VAR(), as defined when using gcc, is insufficient to ensure protection from dead store optimization. For the random driver and crypto drivers, calls are emitted ... $ gdb vmlinux (gdb) disassemble memzero_explicit Dump of assembler code for function memzero_explicit: 0xffffffff813a18b0 <+0>: push %rbp 0xffffffff813a18b1 <+1>: mov %rsi,%rdx 0xffffffff813a18b4 <+4>: xor %esi,%esi 0xffffffff813a18b6 <+6>: mov %rsp,%rbp 0xffffffff813a18b9 <+9>: callq 0xffffffff813a7120 0xffffffff813a18be <+14>: pop %rbp 0xffffffff813a18bf <+15>: retq End of assembler dump. (gdb) disassemble extract_entropy [...] 0xffffffff814a5009 <+313>: mov %r12,%rdi 0xffffffff814a500c <+316>: mov $0xa,%esi 0xffffffff814a5011 <+321>: callq 0xffffffff813a18b0 0xffffffff814a5016 <+326>: mov -0x48(%rbp),%rax [...] ... but in case in future we might use facilities such as LTO, then OPTIMIZER_HIDE_VAR() is not sufficient to protect gcc from a possible eviction of the memset(). We have to use a compiler barrier instead. Minimal test example when we assume memzero_explicit() would *not* be a call, but would have been *inlined* instead: static inline void memzero_explicit(void *s, size_t count) { memset(s, 0, count); } int main(void) { char buff[20]; snprintf(buff, sizeof(buff) - 1, "test"); printf("%s", buff); memzero_explicit(buff, sizeof(buff)); return 0; } With := OPTIMIZER_HIDE_VAR(): (gdb) disassemble main Dump of assembler code for function main: [...] 0x0000000000400464 <+36>: callq 0x400410 0x0000000000400469 <+41>: xor %eax,%eax 0x000000000040046b <+43>: add $0x28,%rsp 0x000000000040046f <+47>: retq End of assembler dump. With := barrier(): (gdb) disassemble main Dump of assembler code for function main: [...] 0x0000000000400464 <+36>: callq 0x400410 0x0000000000400469 <+41>: movq $0x0,(%rsp) 0x0000000000400471 <+49>: movq $0x0,0x8(%rsp) 0x000000000040047a <+58>: movl $0x0,0x10(%rsp) 0x0000000000400482 <+66>: xor %eax,%eax 0x0000000000400484 <+68>: add $0x28,%rsp 0x0000000000400488 <+72>: retq End of assembler dump. As can be seen, movq, movq, movl are being emitted inlined via memset(). Reference: http://thread.gmane.org/gmane.linux.kernel.cryptoapi/13764/ Fixes: d4c5efdb9777 ("random: add and use memzero_explicit() for clearing data") Cc: Theodore Ts'o Signed-off-by: mancha security Signed-off-by: Daniel Borkmann Acked-by: Hannes Frederic Sowa Acked-by: Stephan Mueller Signed-off-by: Herbert Xu Signed-off-by: Greg Kroah-Hartman --- lib/string.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/string.c b/lib/string.c index 43d0781daf47..cb9ea2181557 100644 --- a/lib/string.c +++ b/lib/string.c @@ -598,7 +598,7 @@ EXPORT_SYMBOL(memset); void memzero_explicit(void *s, size_t count) { memset(s, 0, count); - OPTIMIZER_HIDE_VAR(s); + barrier(); } EXPORT_SYMBOL(memzero_explicit); From 7725bb06eba0b24a925c0739c117c907d6c59613 Mon Sep 17 00:00:00 2001 From: Nicolas Iooss Date: Fri, 13 Mar 2015 15:17:14 +0800 Subject: [PATCH 146/277] wl18xx: show rx_frames_per_rates as an array as it really is commit a3fa71c40f1853d0c27e8f5bc01a722a705d9682 upstream. In struct wl18xx_acx_rx_rate_stat, rx_frames_per_rates field is an array, not a number. This means WL18XX_DEBUGFS_FWSTATS_FILE can't be used to display this field in debugfs (it would display a pointer, not the actual data). Use WL18XX_DEBUGFS_FWSTATS_FILE_ARRAY instead. This bug has been found by adding a __printf attribute to wl1271_format_buffer. gcc complained about "format '%u' expects argument of type 'unsigned int', but argument 5 has type 'u32 *'". Fixes: c5d94169e818 ("wl18xx: use new fw stats structures") Signed-off-by: Nicolas Iooss Signed-off-by: Kalle Valo Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/ti/wl18xx/debugfs.c | 2 +- drivers/net/wireless/ti/wlcore/debugfs.h | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/wireless/ti/wl18xx/debugfs.c b/drivers/net/wireless/ti/wl18xx/debugfs.c index 7f1669cdea09..779dc2b2ca75 100644 --- a/drivers/net/wireless/ti/wl18xx/debugfs.c +++ b/drivers/net/wireless/ti/wl18xx/debugfs.c @@ -136,7 +136,7 @@ WL18XX_DEBUGFS_FWSTATS_FILE(rx_filter, protection_filter, "%u"); WL18XX_DEBUGFS_FWSTATS_FILE(rx_filter, accum_arp_pend_requests, "%u"); WL18XX_DEBUGFS_FWSTATS_FILE(rx_filter, max_arp_queue_dep, "%u"); -WL18XX_DEBUGFS_FWSTATS_FILE(rx_rate, rx_frames_per_rates, "%u"); +WL18XX_DEBUGFS_FWSTATS_FILE_ARRAY(rx_rate, rx_frames_per_rates, 50); WL18XX_DEBUGFS_FWSTATS_FILE_ARRAY(aggr_size, tx_agg_vs_rate, AGGR_STATS_TX_AGG*AGGR_STATS_TX_RATE); diff --git a/drivers/net/wireless/ti/wlcore/debugfs.h b/drivers/net/wireless/ti/wlcore/debugfs.h index f7381dd69009..1bce4325e86b 100644 --- a/drivers/net/wireless/ti/wlcore/debugfs.h +++ b/drivers/net/wireless/ti/wlcore/debugfs.h @@ -26,8 +26,8 @@ #include "wlcore.h" -int wl1271_format_buffer(char __user *userbuf, size_t count, - loff_t *ppos, char *fmt, ...); +__printf(4, 5) int wl1271_format_buffer(char __user *userbuf, size_t count, + loff_t *ppos, char *fmt, ...); int wl1271_debugfs_init(struct wl1271 *wl); void wl1271_debugfs_exit(struct wl1271 *wl); From 781dd2886e87d5f978a16082c878afe86df24093 Mon Sep 17 00:00:00 2001 From: Nishanth Menon Date: Sat, 7 Mar 2015 03:39:05 -0600 Subject: [PATCH 147/277] C6x: time: Ensure consistency in __init commit f4831605f2dacd12730fe73961c77253cc2ea425 upstream. time_init invokes timer64_init (which is __init annotation) since all of these are invoked at init time, lets maintain consistency by ensuring time_init is marked appropriately as well. This fixes the following warning with CONFIG_DEBUG_SECTION_MISMATCH=y WARNING: vmlinux.o(.text+0x3bfc): Section mismatch in reference from the function time_init() to the function .init.text:timer64_init() The function time_init() references the function __init timer64_init(). This is often because time_init lacks a __init annotation or the annotation of timer64_init is wrong. Fixes: 546a39546c64 ("C6X: time management") Signed-off-by: Nishanth Menon Signed-off-by: Mark Salter Signed-off-by: Greg Kroah-Hartman --- arch/c6x/kernel/time.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/c6x/kernel/time.c b/arch/c6x/kernel/time.c index 356ee84cad95..04845aaf5985 100644 --- a/arch/c6x/kernel/time.c +++ b/arch/c6x/kernel/time.c @@ -49,7 +49,7 @@ u64 sched_clock(void) return (tsc * sched_clock_multiplier) >> SCHED_CLOCK_SHIFT; } -void time_init(void) +void __init time_init(void) { u64 tmp = (u64)NSEC_PER_SEC << SCHED_CLOCK_SHIFT; From b0635e862c10d47ee5f13d0422f7bc9da718bab4 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Thu, 16 Apr 2015 12:48:35 -0700 Subject: [PATCH 148/277] memstick: mspro_block: add missing curly braces commit 13f6b191aaa11c7fd718d35a0c565f3c16bc1d99 upstream. Using the indenting we can see the curly braces were obviously intended. This is a static checker fix, but my guess is that we don't read enough bytes, because we don't calculate "t_len" correctly. Fixes: f1d82698029b ('memstick: use fully asynchronous request processing') Signed-off-by: Dan Carpenter Cc: Alex Dubov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- drivers/memstick/core/mspro_block.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/memstick/core/mspro_block.c b/drivers/memstick/core/mspro_block.c index f4176ca3a794..cdd61ab5c2b5 100644 --- a/drivers/memstick/core/mspro_block.c +++ b/drivers/memstick/core/mspro_block.c @@ -758,7 +758,7 @@ static int mspro_block_complete_req(struct memstick_dev *card, int error) if (error || (card->current_mrq.tpc == MSPRO_CMD_STOP)) { if (msb->data_dir == READ) { - for (cnt = 0; cnt < msb->current_seg; cnt++) + for (cnt = 0; cnt < msb->current_seg; cnt++) { t_len += msb->req_sg[cnt].length / msb->page_size; @@ -766,6 +766,7 @@ static int mspro_block_complete_req(struct memstick_dev *card, int error) t_len += msb->current_page - 1; t_len *= msb->page_size; + } } } else t_len = blk_rq_bytes(msb->block_req); From e034445e41bcaa968e8dc7721f5a581d5db34bff Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Thu, 9 Oct 2014 15:30:30 -0700 Subject: [PATCH 149/277] nosave: consolidate __nosave_{begin,end} in commit 7f8998c7aef3ac9c5f3f2943e083dfa6302e90d0 upstream. The different architectures used their own (and different) declarations: extern __visible const void __nosave_begin, __nosave_end; extern const void __nosave_begin, __nosave_end; extern long __nosave_begin, __nosave_end; Consolidate them using the first variant in . Signed-off-by: Geert Uytterhoeven Cc: Russell King Cc: Ralf Baechle Cc: Benjamin Herrenschmidt Cc: Martin Schwidefsky Cc: "David S. Miller" Cc: Guan Xuetao Cc: Thomas Gleixner Cc: Ingo Molnar Cc: "H. Peter Anvin" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- arch/mips/include/asm/suspend.h | 7 ------- arch/mips/power/cpu.c | 2 +- arch/powerpc/kernel/suspend.c | 4 +--- arch/s390/kernel/suspend.c | 6 +----- arch/sh/include/asm/sections.h | 1 - arch/sparc/power/hibernate.c | 4 +--- arch/unicore32/include/mach/pm.h | 3 --- arch/unicore32/kernel/hibernate.c | 1 + arch/x86/power/hibernate_32.c | 4 +--- arch/x86/power/hibernate_64.c | 4 +--- include/asm-generic/sections.h | 4 ++++ 11 files changed, 11 insertions(+), 29 deletions(-) delete mode 100644 arch/mips/include/asm/suspend.h diff --git a/arch/mips/include/asm/suspend.h b/arch/mips/include/asm/suspend.h deleted file mode 100644 index 3adac3b53d19..000000000000 --- a/arch/mips/include/asm/suspend.h +++ /dev/null @@ -1,7 +0,0 @@ -#ifndef __ASM_SUSPEND_H -#define __ASM_SUSPEND_H - -/* References to section boundaries */ -extern const void __nosave_begin, __nosave_end; - -#endif /* __ASM_SUSPEND_H */ diff --git a/arch/mips/power/cpu.c b/arch/mips/power/cpu.c index 521e5963df05..2129e67723ff 100644 --- a/arch/mips/power/cpu.c +++ b/arch/mips/power/cpu.c @@ -7,7 +7,7 @@ * Author: Hu Hongbing * Wu Zhangjin */ -#include +#include #include #include diff --git a/arch/powerpc/kernel/suspend.c b/arch/powerpc/kernel/suspend.c index 0167d53da30c..a531154cc0f3 100644 --- a/arch/powerpc/kernel/suspend.c +++ b/arch/powerpc/kernel/suspend.c @@ -9,9 +9,7 @@ #include #include - -/* References to section boundaries */ -extern const void __nosave_begin, __nosave_end; +#include /* * pfn_is_nosave - check if given pfn is in the 'nosave' section diff --git a/arch/s390/kernel/suspend.c b/arch/s390/kernel/suspend.c index f176bc83cc8d..a3ab6798ce68 100644 --- a/arch/s390/kernel/suspend.c +++ b/arch/s390/kernel/suspend.c @@ -9,13 +9,9 @@ #include #include #include +#include #include -/* - * References to section boundaries - */ -extern const void __nosave_begin, __nosave_end; - /* * The restore of the saved pages in an hibernation image will set * the change and referenced bits in the storage key for each page. diff --git a/arch/sh/include/asm/sections.h b/arch/sh/include/asm/sections.h index 1b6199740e98..7a99e6af6372 100644 --- a/arch/sh/include/asm/sections.h +++ b/arch/sh/include/asm/sections.h @@ -3,7 +3,6 @@ #include -extern long __nosave_begin, __nosave_end; extern long __machvec_start, __machvec_end; extern char __uncached_start, __uncached_end; extern char __start_eh_frame[], __stop_eh_frame[]; diff --git a/arch/sparc/power/hibernate.c b/arch/sparc/power/hibernate.c index 42b0b8ce699a..17bd2e167e07 100644 --- a/arch/sparc/power/hibernate.c +++ b/arch/sparc/power/hibernate.c @@ -9,11 +9,9 @@ #include #include #include +#include #include -/* References to section boundaries */ -extern const void __nosave_begin, __nosave_end; - struct saved_context saved_context; /* diff --git a/arch/unicore32/include/mach/pm.h b/arch/unicore32/include/mach/pm.h index 4dcd34ae194c..77b522694e74 100644 --- a/arch/unicore32/include/mach/pm.h +++ b/arch/unicore32/include/mach/pm.h @@ -36,8 +36,5 @@ extern int puv3_pm_enter(suspend_state_t state); /* Defined in hibernate_asm.S */ extern int restore_image(pgd_t *resume_pg_dir, struct pbe *restore_pblist); -/* References to section boundaries */ -extern const void __nosave_begin, __nosave_end; - extern struct pbe *restore_pblist; #endif diff --git a/arch/unicore32/kernel/hibernate.c b/arch/unicore32/kernel/hibernate.c index d75ef8b6cb56..9969ec374abb 100644 --- a/arch/unicore32/kernel/hibernate.c +++ b/arch/unicore32/kernel/hibernate.c @@ -18,6 +18,7 @@ #include #include #include +#include #include #include "mach/pm.h" diff --git a/arch/x86/power/hibernate_32.c b/arch/x86/power/hibernate_32.c index 7d28c885d238..291226b952a9 100644 --- a/arch/x86/power/hibernate_32.c +++ b/arch/x86/power/hibernate_32.c @@ -13,13 +13,11 @@ #include #include #include +#include /* Defined in hibernate_asm_32.S */ extern int restore_image(void); -/* References to section boundaries */ -extern const void __nosave_begin, __nosave_end; - /* Pointer to the temporary resume page tables */ pgd_t *resume_pg_dir; diff --git a/arch/x86/power/hibernate_64.c b/arch/x86/power/hibernate_64.c index a0fde91c16cf..8ecaed127634 100644 --- a/arch/x86/power/hibernate_64.c +++ b/arch/x86/power/hibernate_64.c @@ -17,11 +17,9 @@ #include #include #include +#include #include -/* References to section boundaries */ -extern const void __nosave_begin, __nosave_end; - /* Defined in hibernate_asm_64.S */ extern int restore_image(void); diff --git a/include/asm-generic/sections.h b/include/asm-generic/sections.h index c1a1216e29ce..87b27263f5e2 100644 --- a/include/asm-generic/sections.h +++ b/include/asm-generic/sections.h @@ -3,6 +3,8 @@ /* References to section boundaries */ +#include + extern char _text[], _stext[], _etext[]; extern char _data[], _sdata[], _edata[]; extern char __bss_start[], __bss_stop[]; @@ -18,6 +20,8 @@ extern char __start_rodata[], __end_rodata[]; /* Start and end of .ctors section - used for constructor calls. */ extern char __ctors_start[], __ctors_end[]; +extern __visible const void __nosave_begin, __nosave_end; + /* function descriptor handling (if any). Override * in asm/sections.h */ #ifndef dereference_function_descriptor From e6095e729fde00eff27d6a04b1173340b20d274f Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Mon, 4 May 2015 21:42:41 -0700 Subject: [PATCH 150/277] s390: Fix build error s390 images fail to build in 3.10 with arch/s390/kernel/suspend.c: In function 'pfn_is_nosave': arch/s390/kernel/suspend.c:147:10: error: 'ipl_info' undeclared arch/s390/kernel/suspend.c:147:27: error: 'IPL_TYPE_NSS' undeclared due to a missing include file. Signed-off-by: Guenter Roeck Signed-off-by: Greg Kroah-Hartman --- arch/s390/kernel/suspend.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/s390/kernel/suspend.c b/arch/s390/kernel/suspend.c index a3ab6798ce68..58cbb75e89e9 100644 --- a/arch/s390/kernel/suspend.c +++ b/arch/s390/kernel/suspend.c @@ -11,6 +11,7 @@ #include #include #include +#include /* * The restore of the saved pages in an hibernation image will set From 9182148a5315d4b1de68ac74fd54cbb5da5a3703 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 6 May 2015 21:56:44 +0200 Subject: [PATCH 151/277] Linux 3.10.77 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 019a6a4b386d..923ad8a64e3b 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,6 @@ VERSION = 3 PATCHLEVEL = 10 -SUBLEVEL = 76 +SUBLEVEL = 77 EXTRAVERSION = NAME = TOSSUG Baby Fish From b1d07441d04e4116638a1fc162cb7c6e55b2915a Mon Sep 17 00:00:00 2001 From: Jon Medhurst Date: Fri, 8 May 2015 12:04:18 +0100 Subject: [PATCH 152/277] gator: Version 5.21.1 Signed-off-by: Drew Richardson Signed-off-by: Jon Medhurst --- drivers/gator/{LICENSE => COPYING} | 0 drivers/gator/Makefile | 18 +- drivers/gator/gator.h | 41 +-- drivers/gator/gator_annotate.c | 2 +- drivers/gator/gator_annotate_kernel.c | 2 +- drivers/gator/gator_backtrace.c | 2 +- drivers/gator/gator_buffer.c | 2 +- drivers/gator/gator_buffer_write.c | 2 +- drivers/gator/gator_cookies.c | 2 +- drivers/gator/gator_events_armv6.c | 2 +- drivers/gator/gator_events_armv7.c | 2 +- drivers/gator/gator_events_block.c | 2 +- drivers/gator/gator_events_irq.c | 2 +- drivers/gator/gator_events_l2c-310.c | 2 +- drivers/gator/gator_events_mali_4xx.c | 2 +- drivers/gator/gator_events_mali_4xx.h | 2 +- drivers/gator/gator_events_mali_common.c | 2 +- drivers/gator/gator_events_mali_common.h | 2 +- drivers/gator/gator_events_mali_midgard.c | 2 +- drivers/gator/gator_events_mali_midgard_hw.c | 31 +- .../gator/gator_events_mali_midgard_hw_test.c | 2 +- drivers/gator/gator_events_meminfo.c | 5 +- drivers/gator/gator_events_mmapped.c | 2 +- drivers/gator/gator_events_net.c | 2 +- drivers/gator/gator_events_perf_pmu.c | 47 ++- drivers/gator/gator_events_sched.c | 2 +- drivers/gator/gator_events_scorpion.c | 2 +- drivers/gator/gator_hrtimer_gator.c | 2 +- drivers/gator/gator_iks.c | 2 +- drivers/gator/gator_main.c | 42 ++- drivers/gator/gator_marshaling.c | 16 +- drivers/gator/gator_trace_gpu.c | 2 +- drivers/gator/gator_trace_power.c | 10 +- drivers/gator/gator_trace_sched.c | 2 +- drivers/gator/mali/mali_kbase_gator_api.h | 219 ----------- .../mali/mali_mjollnir_profiling_gator_api.h | 2 +- .../mali/mali_utgard_profiling_gator_api.h | 2 +- drivers/gator/mali_midgard.mk | 1 + tools/gator/daemon/Android.mk | 4 +- tools/gator/daemon/AnnotateListener.cpp | 30 +- tools/gator/daemon/AnnotateListener.h | 16 +- tools/gator/daemon/Application.mk | 2 +- tools/gator/daemon/Buffer.cpp | 77 ++-- tools/gator/daemon/Buffer.h | 25 +- tools/gator/daemon/CCNDriver.cpp | 5 +- tools/gator/daemon/CCNDriver.h | 2 +- tools/gator/daemon/COPYING | 339 ++++++++++++++++++ tools/gator/daemon/CPUFreqDriver.cpp | 58 --- tools/gator/daemon/CPUFreqDriver.h | 34 -- tools/gator/daemon/CapturedXML.cpp | 5 +- tools/gator/daemon/CapturedXML.h | 2 +- tools/gator/daemon/Child.cpp | 49 +-- tools/gator/daemon/Child.h | 2 +- tools/gator/daemon/Command.cpp | 12 +- tools/gator/daemon/Command.h | 2 +- tools/gator/daemon/Config.h | 2 +- tools/gator/daemon/ConfigurationXML.cpp | 10 +- tools/gator/daemon/ConfigurationXML.h | 2 +- tools/gator/daemon/Counter.h | 2 +- tools/gator/daemon/DiskIODriver.cpp | 20 +- tools/gator/daemon/DiskIODriver.h | 2 +- tools/gator/daemon/Driver.cpp | 2 +- tools/gator/daemon/Driver.h | 2 +- tools/gator/daemon/DriverSource.cpp | 33 +- tools/gator/daemon/DriverSource.h | 2 +- tools/gator/daemon/DynBuf.cpp | 16 +- tools/gator/daemon/DynBuf.h | 2 +- tools/gator/daemon/EventsXML.cpp | 179 ++++++++- tools/gator/daemon/EventsXML.h | 9 +- tools/gator/daemon/ExternalSource.cpp | 59 +-- tools/gator/daemon/ExternalSource.h | 3 +- tools/gator/daemon/FSDriver.cpp | 32 +- tools/gator/daemon/FSDriver.h | 2 +- tools/gator/daemon/Fifo.cpp | 6 +- tools/gator/daemon/Fifo.h | 2 +- tools/gator/daemon/FtraceDriver.cpp | 119 ++++-- tools/gator/daemon/FtraceDriver.h | 4 +- tools/gator/daemon/FtraceSource.cpp | 56 ++- tools/gator/daemon/FtraceSource.h | 2 +- tools/gator/daemon/HwmonDriver.cpp | 158 ++++---- tools/gator/daemon/HwmonDriver.h | 2 +- tools/gator/daemon/KMod.cpp | 6 +- tools/gator/daemon/KMod.h | 2 +- tools/gator/daemon/LocalCapture.cpp | 8 +- tools/gator/daemon/LocalCapture.h | 2 +- tools/gator/daemon/Logging.cpp | 10 +- tools/gator/daemon/Logging.h | 16 +- tools/gator/daemon/MaliVideoDriver.cpp | 4 +- tools/gator/daemon/MaliVideoDriver.h | 2 +- tools/gator/daemon/MemInfoDriver.cpp | 4 +- tools/gator/daemon/MemInfoDriver.h | 2 +- tools/gator/daemon/Monitor.cpp | 20 +- tools/gator/daemon/Monitor.h | 2 +- tools/gator/daemon/NetDriver.cpp | 6 +- tools/gator/daemon/NetDriver.h | 2 +- tools/gator/daemon/OlySocket.cpp | 51 +-- tools/gator/daemon/OlySocket.h | 8 +- tools/gator/daemon/OlyUtility.cpp | 2 +- tools/gator/daemon/OlyUtility.h | 2 +- tools/gator/daemon/PerfBuffer.cpp | 131 +++++-- tools/gator/daemon/PerfBuffer.h | 3 +- tools/gator/daemon/PerfDriver.cpp | 180 ++++++---- tools/gator/daemon/PerfDriver.h | 8 +- tools/gator/daemon/PerfGroup.cpp | 238 ++++++++---- tools/gator/daemon/PerfGroup.h | 23 +- tools/gator/daemon/PerfSource.cpp | 230 ++++++------ tools/gator/daemon/PerfSource.h | 5 +- tools/gator/daemon/Proc.cpp | 62 ++-- tools/gator/daemon/Proc.h | 2 +- tools/gator/daemon/Sender.cpp | 8 +- tools/gator/daemon/Sender.h | 2 +- tools/gator/daemon/SessionData.cpp | 71 ++-- tools/gator/daemon/SessionData.h | 11 +- tools/gator/daemon/SessionXML.cpp | 8 +- tools/gator/daemon/SessionXML.h | 2 +- tools/gator/daemon/Setup.cpp | 202 ++++++++--- tools/gator/daemon/Setup.h | 4 +- tools/gator/daemon/Source.cpp | 4 +- tools/gator/daemon/Source.h | 2 +- tools/gator/daemon/StreamlineSetup.cpp | 22 +- tools/gator/daemon/StreamlineSetup.h | 2 +- tools/gator/daemon/UEvent.cpp | 9 +- tools/gator/daemon/UEvent.h | 2 +- tools/gator/daemon/UserSpaceSource.cpp | 37 +- tools/gator/daemon/UserSpaceSource.h | 2 +- tools/gator/daemon/c++.cpp | 2 +- tools/gator/daemon/common.mk | 9 +- tools/gator/daemon/defaults.xml | 21 +- tools/gator/daemon/escape.c | 2 +- tools/gator/daemon/events-ARM11.xml | 2 +- tools/gator/daemon/events-CCI-400.xml | 4 +- tools/gator/daemon/events-CCI-500.xml | 75 ++++ tools/gator/daemon/events-Cortex-A15.xml | 100 +++--- tools/gator/daemon/events-Cortex-A17.xml | 79 ++-- tools/gator/daemon/events-Cortex-A5.xml | 52 +-- tools/gator/daemon/events-Cortex-A53.xml | 117 +++--- tools/gator/daemon/events-Cortex-A57.xml | 58 +-- tools/gator/daemon/events-Cortex-A7.xml | 65 ++-- tools/gator/daemon/events-Cortex-A72.xml | 87 +++++ tools/gator/daemon/events-Cortex-A8.xml | 44 +-- tools/gator/daemon/events-Cortex-A9.xml | 66 ++-- tools/gator/daemon/events-Filesystem.xml | 3 +- tools/gator/daemon/events-L2C-310.xml | 4 +- tools/gator/daemon/events-Linux.xml | 4 +- tools/gator/daemon/events-Mali-4xx.xml | 4 +- tools/gator/daemon/events-Mali-Midgard.xml | 17 +- tools/gator/daemon/events-Mali-Midgard_hw.xml | 75 ++-- tools/gator/daemon/events-Mali-T60x_hw.xml | 66 ++-- tools/gator/daemon/events-Mali-T62x_hw.xml | 65 ++-- tools/gator/daemon/events-Mali-T72x_hw.xml | 56 +-- tools/gator/daemon/events-Mali-T76x_hw.xml | 67 ++-- tools/gator/daemon/events-Mali-T82x_hw.xml | 108 ++++++ tools/gator/daemon/events-Mali-T83x_hw.xml | 108 ++++++ tools/gator/daemon/events-Mali-T86x_hw.xml | 117 ++++++ tools/gator/daemon/events-Mali-T88x_hw.xml | 117 ++++++ tools/gator/daemon/events-Other.xml | 33 ++ tools/gator/daemon/events-ftrace.xml | 21 +- tools/gator/daemon/main.cpp | 100 ++++-- 158 files changed, 3276 insertions(+), 1767 deletions(-) rename drivers/gator/{LICENSE => COPYING} (100%) delete mode 100644 drivers/gator/mali/mali_kbase_gator_api.h create mode 100644 tools/gator/daemon/COPYING delete mode 100644 tools/gator/daemon/CPUFreqDriver.cpp delete mode 100644 tools/gator/daemon/CPUFreqDriver.h create mode 100644 tools/gator/daemon/events-CCI-500.xml create mode 100644 tools/gator/daemon/events-Cortex-A72.xml create mode 100644 tools/gator/daemon/events-Mali-T82x_hw.xml create mode 100644 tools/gator/daemon/events-Mali-T83x_hw.xml create mode 100644 tools/gator/daemon/events-Mali-T86x_hw.xml create mode 100644 tools/gator/daemon/events-Mali-T88x_hw.xml create mode 100644 tools/gator/daemon/events-Other.xml diff --git a/drivers/gator/LICENSE b/drivers/gator/COPYING similarity index 100% rename from drivers/gator/LICENSE rename to drivers/gator/COPYING diff --git a/drivers/gator/Makefile b/drivers/gator/Makefile index 28d2070b11d5..d14e2a02fee0 100644 --- a/drivers/gator/Makefile +++ b/drivers/gator/Makefile @@ -63,6 +63,22 @@ gator-$(CONFIG_ARM) += gator_events_armv6.o \ gator-$(CONFIG_ARM64) += +$(obj)/gator_main.o: $(obj)/gator_src_md5.h + +clean-files := gator_src_md5.h + +# Note, in the recipe below we use "cd $(srctree) && cd $(src)" rather than +# "cd $(srctree)/$(src)" because under DKMS $(src) is an absolute path, and we +# can't just use $(src) because for normal kernel builds this is relative to +# $(srctree) + + chk_events.h = : + quiet_chk_events.h = echo ' CHK $@' +silent_chk_events.h = : +$(obj)/gator_src_md5.h: FORCE + @$($(quiet)chk_events.h) + $(Q)cd $(srctree) && cd $(src) ; $(CONFIG_SHELL) -c "echo 'static char *gator_src_md5 = \"'\`ls *.c *.h mali/*.h | grep -Ev '^(gator_src_md5\.c|gator\.mod\.c)$$' | LC_ALL=C sort | xargs cat | md5sum | cut -b 1-32\`'\";'" > $(abspath $@) + else all: @@ -73,7 +89,7 @@ all: $(error) clean: - rm -f *.o .*.cmd modules.order Module.symvers gator.ko gator.mod.c + rm -f *.o .*.cmd gator_src_md5.h modules.order Module.symvers gator.ko gator.mod.c rm -rf .tmp_versions endif diff --git a/drivers/gator/gator.h b/drivers/gator/gator.h index 5cc73a388c4f..202eb41c485b 100644 --- a/drivers/gator/gator.h +++ b/drivers/gator/gator.h @@ -1,5 +1,5 @@ /** - * Copyright (C) ARM Limited 2010-2014. All rights reserved. + * Copyright (C) ARM Limited 2010-2015. All rights reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -21,25 +21,26 @@ #define GATOR_IKS_SUPPORT defined(CONFIG_BL_SWITCHER) /* cpu ids */ -#define ARM1136 0xb36 -#define ARM1156 0xb56 -#define ARM1176 0xb76 -#define ARM11MPCORE 0xb02 -#define CORTEX_A5 0xc05 -#define CORTEX_A7 0xc07 -#define CORTEX_A8 0xc08 -#define CORTEX_A9 0xc09 -#define CORTEX_A15 0xc0f -#define CORTEX_A17 0xc0e -#define SCORPION 0x00f -#define SCORPIONMP 0x02d -#define KRAITSIM 0x049 -#define KRAIT 0x04d -#define KRAIT_S4_PRO 0x06f -#define CORTEX_A53 0xd03 -#define CORTEX_A57 0xd07 -#define AARCH64 0xd0f -#define OTHER 0xfff +#define ARM1136 0x41b36 +#define ARM1156 0x41b56 +#define ARM1176 0x41b76 +#define ARM11MPCORE 0x41b02 +#define CORTEX_A5 0x41c05 +#define CORTEX_A7 0x41c07 +#define CORTEX_A8 0x41c08 +#define CORTEX_A9 0x41c09 +#define CORTEX_A15 0x41c0f +#define CORTEX_A12 0x41c0d +#define CORTEX_A17 0x41c0e +#define SCORPION 0x5100f +#define SCORPIONMP 0x5102d +#define KRAITSIM 0x51049 +#define KRAIT 0x5104d +#define KRAIT_S4_PRO 0x5106f +#define CORTEX_A53 0x41d03 +#define CORTEX_A57 0x41d07 +#define CORTEX_A72 0x41d08 +#define OTHER 0xfffff /* gpu enums */ #define MALI_4xx 1 diff --git a/drivers/gator/gator_annotate.c b/drivers/gator/gator_annotate.c index ff9a3cef7b2e..cc9ae02e5fba 100644 --- a/drivers/gator/gator_annotate.c +++ b/drivers/gator/gator_annotate.c @@ -1,5 +1,5 @@ /** - * Copyright (C) ARM Limited 2010-2014. All rights reserved. + * Copyright (C) ARM Limited 2010-2015. All rights reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as diff --git a/drivers/gator/gator_annotate_kernel.c b/drivers/gator/gator_annotate_kernel.c index 69471f99e5fb..54e8e86e34cf 100644 --- a/drivers/gator/gator_annotate_kernel.c +++ b/drivers/gator/gator_annotate_kernel.c @@ -1,5 +1,5 @@ /** - * Copyright (C) ARM Limited 2012-2014. All rights reserved. + * Copyright (C) ARM Limited 2012-2015. All rights reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as diff --git a/drivers/gator/gator_backtrace.c b/drivers/gator/gator_backtrace.c index 76c941d009a9..5557ec0b29ca 100644 --- a/drivers/gator/gator_backtrace.c +++ b/drivers/gator/gator_backtrace.c @@ -1,5 +1,5 @@ /** - * Copyright (C) ARM Limited 2010-2014. All rights reserved. + * Copyright (C) ARM Limited 2010-2015. All rights reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as diff --git a/drivers/gator/gator_buffer.c b/drivers/gator/gator_buffer.c index 910d5aa15066..f335457638ae 100644 --- a/drivers/gator/gator_buffer.c +++ b/drivers/gator/gator_buffer.c @@ -1,5 +1,5 @@ /** - * Copyright (C) ARM Limited 2010-2014. All rights reserved. + * Copyright (C) ARM Limited 2010-2015. All rights reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as diff --git a/drivers/gator/gator_buffer_write.c b/drivers/gator/gator_buffer_write.c index 654ec606cfad..b731e6a414d2 100644 --- a/drivers/gator/gator_buffer_write.c +++ b/drivers/gator/gator_buffer_write.c @@ -1,5 +1,5 @@ /** - * Copyright (C) ARM Limited 2010-2014. All rights reserved. + * Copyright (C) ARM Limited 2010-2015. All rights reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as diff --git a/drivers/gator/gator_cookies.c b/drivers/gator/gator_cookies.c index c43cce815226..9bd4c8b98c9e 100644 --- a/drivers/gator/gator_cookies.c +++ b/drivers/gator/gator_cookies.c @@ -1,5 +1,5 @@ /** - * Copyright (C) ARM Limited 2010-2014. All rights reserved. + * Copyright (C) ARM Limited 2010-2015. All rights reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as diff --git a/drivers/gator/gator_events_armv6.c b/drivers/gator/gator_events_armv6.c index a157a0013302..178397033e2c 100644 --- a/drivers/gator/gator_events_armv6.c +++ b/drivers/gator/gator_events_armv6.c @@ -1,5 +1,5 @@ /** - * Copyright (C) ARM Limited 2010-2014. All rights reserved. + * Copyright (C) ARM Limited 2010-2015. All rights reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as diff --git a/drivers/gator/gator_events_armv7.c b/drivers/gator/gator_events_armv7.c index 09c94220114c..e1f6a5fa9997 100644 --- a/drivers/gator/gator_events_armv7.c +++ b/drivers/gator/gator_events_armv7.c @@ -1,5 +1,5 @@ /** - * Copyright (C) ARM Limited 2010-2014. All rights reserved. + * Copyright (C) ARM Limited 2010-2015. All rights reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as diff --git a/drivers/gator/gator_events_block.c b/drivers/gator/gator_events_block.c index a352a54afa02..b3467b133712 100644 --- a/drivers/gator/gator_events_block.c +++ b/drivers/gator/gator_events_block.c @@ -1,5 +1,5 @@ /** - * Copyright (C) ARM Limited 2010-2014. All rights reserved. + * Copyright (C) ARM Limited 2010-2015. All rights reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as diff --git a/drivers/gator/gator_events_irq.c b/drivers/gator/gator_events_irq.c index 5221aac581b3..81b976a9b282 100644 --- a/drivers/gator/gator_events_irq.c +++ b/drivers/gator/gator_events_irq.c @@ -1,5 +1,5 @@ /** - * Copyright (C) ARM Limited 2010-2014. All rights reserved. + * Copyright (C) ARM Limited 2010-2015. All rights reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as diff --git a/drivers/gator/gator_events_l2c-310.c b/drivers/gator/gator_events_l2c-310.c index 73aaac32327e..063a06079c74 100644 --- a/drivers/gator/gator_events_l2c-310.c +++ b/drivers/gator/gator_events_l2c-310.c @@ -1,7 +1,7 @@ /** * l2c310 (L2 Cache Controller) event counters for gator * - * Copyright (C) ARM Limited 2010-2014. All rights reserved. + * Copyright (C) ARM Limited 2010-2015. All rights reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as diff --git a/drivers/gator/gator_events_mali_4xx.c b/drivers/gator/gator_events_mali_4xx.c index 9cf43fe2c29b..423b4e08e8cc 100644 --- a/drivers/gator/gator_events_mali_4xx.c +++ b/drivers/gator/gator_events_mali_4xx.c @@ -1,5 +1,5 @@ /** - * Copyright (C) ARM Limited 2010-2014. All rights reserved. + * Copyright (C) ARM Limited 2010-2015. All rights reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as diff --git a/drivers/gator/gator_events_mali_4xx.h b/drivers/gator/gator_events_mali_4xx.h index 976ca8c4cfa1..8f6a870e6d0c 100644 --- a/drivers/gator/gator_events_mali_4xx.h +++ b/drivers/gator/gator_events_mali_4xx.h @@ -1,5 +1,5 @@ /** - * Copyright (C) ARM Limited 2011-2014. All rights reserved. + * Copyright (C) ARM Limited 2011-2015. All rights reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as diff --git a/drivers/gator/gator_events_mali_common.c b/drivers/gator/gator_events_mali_common.c index 1af87d649afe..7741f2575542 100644 --- a/drivers/gator/gator_events_mali_common.c +++ b/drivers/gator/gator_events_mali_common.c @@ -1,5 +1,5 @@ /** - * Copyright (C) ARM Limited 2012-2014. All rights reserved. + * Copyright (C) ARM Limited 2012-2015. All rights reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as diff --git a/drivers/gator/gator_events_mali_common.h b/drivers/gator/gator_events_mali_common.h index e7082e62fe88..a4fc9d7d4cf6 100644 --- a/drivers/gator/gator_events_mali_common.h +++ b/drivers/gator/gator_events_mali_common.h @@ -1,5 +1,5 @@ /** - * Copyright (C) ARM Limited 2012-2014. All rights reserved. + * Copyright (C) ARM Limited 2012-2015. All rights reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as diff --git a/drivers/gator/gator_events_mali_midgard.c b/drivers/gator/gator_events_mali_midgard.c index 0aec906d7ae5..3b0963a8de21 100644 --- a/drivers/gator/gator_events_mali_midgard.c +++ b/drivers/gator/gator_events_mali_midgard.c @@ -1,5 +1,5 @@ /** - * Copyright (C) ARM Limited 2011-2014. All rights reserved. + * Copyright (C) ARM Limited 2011-2015. All rights reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as diff --git a/drivers/gator/gator_events_mali_midgard_hw.c b/drivers/gator/gator_events_mali_midgard_hw.c index c8065da56815..7e1eee30026d 100644 --- a/drivers/gator/gator_events_mali_midgard_hw.c +++ b/drivers/gator/gator_events_mali_midgard_hw.c @@ -1,5 +1,5 @@ /** - * Copyright (C) ARM Limited 2012-2014. All rights reserved. + * Copyright (C) ARM Limited 2012-2015. All rights reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -18,7 +18,7 @@ /* Mali Midgard DDK includes */ #if defined(MALI_SIMPLE_API) /* Header with wrapper functions to kbase structures and functions */ -#include "mali/mali_kbase_gator_api.h" +#include "mali_kbase_gator_api.h" #elif defined(MALI_DIR_MIDGARD) /* New DDK Directory structure with kernel/drivers/gpu/arm/midgard */ #include "mali_linux_trace.h" @@ -40,6 +40,10 @@ #error MALI_DDK_GATOR_API_VERSION is invalid (must be 1 for r1/r2 DDK, or 2 for r3/r4 DDK, or 3 for r5 and later DDK). #endif +#if !defined(CONFIG_MALI_GATOR_SUPPORT) +#error CONFIG_MALI_GATOR_SUPPORT is required for GPU activity and software counters +#endif + #include "gator_events_mali_common.h" /* @@ -748,12 +752,12 @@ static int read_counter(const int cnt, const int len, const struct mali_counter { const int block = GET_HW_BLOCK(cnt); const int counter_offset = GET_COUNTER_OFFSET(cnt); + u32 value = 0; #if MALI_DDK_GATOR_API_VERSION == 3 const char *block_base_address = (char *)in_out_info->kernel_dump_buffer; int i; int shader_core_count = 0; - u32 value = 0; for (i = 0; i < in_out_info->nr_hwc_blocks; i++) { if (block == in_out_info->hwc_layout[i]) { @@ -766,6 +770,12 @@ static int read_counter(const int cnt, const int len, const struct mali_counter if (shader_core_count > 1) value /= shader_core_count; #else + const unsigned int vithar_blocks[] = { + 0x700, /* VITHAR_JOB_MANAGER, Block 0 */ + 0x400, /* VITHAR_TILER, Block 1 */ + 0x000, /* VITHAR_SHADER_CORE, Block 2 */ + 0x500 /* VITHAR_MEMORY_SYSTEM, Block 3 */ + }; const char *block_base_address = (char *)kernel_dump_buffer + vithar_blocks[block]; /* If counter belongs to shader block need to take into account all cores */ @@ -831,31 +841,22 @@ static int read(int **buffer, bool sched_switch) * Only process hardware counters if at least one of the hardware counters is enabled. */ if (num_hardware_counters_enabled > 0) { -#if MALI_DDK_GATOR_API_VERSION != 3 - const unsigned int vithar_blocks[] = { - 0x700, /* VITHAR_JOB_MANAGER, Block 0 */ - 0x400, /* VITHAR_TILER, Block 1 */ - 0x000, /* VITHAR_SHADER_CORE, Block 2 */ - 0x500 /* VITHAR_MEMORY_SYSTEM, Block 3 */ - }; -#endif - #if MALI_DDK_GATOR_API_VERSION == 3 if (!handles) return -1; /* Mali symbols can be called safely since a kbcontext is valid */ - if (kbase_gator_instr_hwcnt_dump_complete_symbol(handles, &success) == MALI_TRUE) { + if (kbase_gator_instr_hwcnt_dump_complete_symbol(handles, &success)) { #else if (!kbcontext) return -1; /* Mali symbols can be called safely since a kbcontext is valid */ - if (kbase_instr_hwcnt_dump_complete_symbol(kbcontext, &success) == MALI_TRUE) { + if (kbase_instr_hwcnt_dump_complete_symbol(kbcontext, &success)) { #endif kbase_device_busy = false; - if (success == MALI_TRUE) { + if (success) { /* Cycle through hardware counters and accumulate totals */ for (cnt = 0; cnt < number_of_hardware_counters; cnt++) { const struct mali_counter *counter = &counters[cnt]; diff --git a/drivers/gator/gator_events_mali_midgard_hw_test.c b/drivers/gator/gator_events_mali_midgard_hw_test.c index 31a91e1c72b2..87c569cabf53 100644 --- a/drivers/gator/gator_events_mali_midgard_hw_test.c +++ b/drivers/gator/gator_events_mali_midgard_hw_test.c @@ -1,5 +1,5 @@ /** - * Copyright (C) ARM Limited 2012-2014. All rights reserved. + * Copyright (C) ARM Limited 2012-2015. All rights reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as diff --git a/drivers/gator/gator_events_meminfo.c b/drivers/gator/gator_events_meminfo.c index c625ac5af9cd..985b312ab92b 100644 --- a/drivers/gator/gator_events_meminfo.c +++ b/drivers/gator/gator_events_meminfo.c @@ -1,5 +1,5 @@ /** - * Copyright (C) ARM Limited 2010-2014. All rights reserved. + * Copyright (C) ARM Limited 2010-2015. All rights reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -58,6 +58,8 @@ static ulong proc_enabled[PROC_COUNT]; static ulong proc_keys[PROC_COUNT]; static DEFINE_PER_CPU(long long, proc_buffer[2 * (PROC_COUNT + 3)]); +static void do_read(void); + #if USE_THREAD static int gator_meminfo_func(void *data); @@ -177,6 +179,7 @@ static int gator_events_meminfo_start(void) if (GATOR_REGISTER_TRACE(mm_page_alloc)) goto mm_page_alloc_exit; + do_read(); #if USE_THREAD /* Start worker thread */ gator_meminfo_run = true; diff --git a/drivers/gator/gator_events_mmapped.c b/drivers/gator/gator_events_mmapped.c index 6b2af995ed41..7b517611f8ec 100644 --- a/drivers/gator/gator_events_mmapped.c +++ b/drivers/gator/gator_events_mmapped.c @@ -1,7 +1,7 @@ /* * Example events provider * - * Copyright (C) ARM Limited 2010-2014. All rights reserved. + * Copyright (C) ARM Limited 2010-2015. All rights reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as diff --git a/drivers/gator/gator_events_net.c b/drivers/gator/gator_events_net.c index d21b4db7b77c..1e36731479d2 100644 --- a/drivers/gator/gator_events_net.c +++ b/drivers/gator/gator_events_net.c @@ -1,5 +1,5 @@ /** - * Copyright (C) ARM Limited 2010-2014. All rights reserved. + * Copyright (C) ARM Limited 2010-2015. All rights reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as diff --git a/drivers/gator/gator_events_perf_pmu.c b/drivers/gator/gator_events_perf_pmu.c index 47cf278e508b..f6b4f18ad11a 100644 --- a/drivers/gator/gator_events_perf_pmu.c +++ b/drivers/gator/gator_events_perf_pmu.c @@ -1,5 +1,5 @@ /** - * Copyright (C) ARM Limited 2010-2014. All rights reserved. + * Copyright (C) ARM Limited 2010-2015. All rights reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -23,11 +23,13 @@ extern bool event_based_sampling; /* Maximum number of per-core counters - currently reserves enough space for two full hardware PMUs for big.LITTLE */ #define CNTMAX 16 #define CCI_400 4 +#define CCI_500 8 #define CCN_5XX 8 /* Maximum number of uncore counters */ /* + 1 for the cci-400 cycles counter */ +/* cci-500 has no cycles counter */ /* + 1 for the CCN-5xx cycles counter */ -#define UCCNT (CCI_400 + 1 + CCN_5XX + 1) +#define UCCNT (CCI_400 + 1 + CCI_500 + CCN_5XX + 1) /* Default to 0 if unable to probe the revision which was the previous behavior */ #define DEFAULT_CCI_REVISION 0 @@ -58,9 +60,9 @@ static struct gator_attr uc_attrs[UCCNT]; static int uc_attr_count; struct gator_event { - int curr; - int prev; - int prev_delta; + uint32_t curr; + uint32_t prev; + uint32_t prev_delta; bool zero; struct perf_event *pevent; struct perf_event_attr *pevent_attr; @@ -315,7 +317,7 @@ static void gator_events_perf_pmu_stop(void) static void __read(int *const len, int cpu, struct gator_attr *const attr, struct gator_event *const event) { - int delta; + uint32_t delta; struct perf_event *const ev = event->pevent; if (ev != NULL && ev->state == PERF_EVENT_STATE_ACTIVE) { @@ -341,8 +343,6 @@ static void __read(int *const len, int cpu, struct gator_attr *const attr, struc event->prev_delta = delta; event->prev = event->curr; per_cpu(perf_cnt, cpu)[(*len)++] = attr->key; - if (delta < 0) - delta *= -1; per_cpu(perf_cnt, cpu)[(*len)++] = delta; } } @@ -436,13 +436,15 @@ static int probe_cci_revision(void) #endif -static void gator_events_perf_pmu_uncore_init(const char *const name, const int type, const int count) +static void gator_events_perf_pmu_uncore_init(const char *const name, const int type, const int count, const bool has_cycles_counter) { int cnt; - snprintf(uc_attrs[uc_attr_count].name, sizeof(uc_attrs[uc_attr_count].name), "%s_ccnt", name); - uc_attrs[uc_attr_count].type = type; - ++uc_attr_count; + if (has_cycles_counter) { + snprintf(uc_attrs[uc_attr_count].name, sizeof(uc_attrs[uc_attr_count].name), "%s_ccnt", name); + uc_attrs[uc_attr_count].type = type; + ++uc_attr_count; + } for (cnt = 0; cnt < count; ++cnt, ++uc_attr_count) { struct gator_attr *const attr = &uc_attrs[uc_attr_count]; @@ -452,7 +454,7 @@ static void gator_events_perf_pmu_uncore_init(const char *const name, const int } } -static void gator_events_perf_pmu_cci_init(const int type) +static void gator_events_perf_pmu_cci_400_init(const int type) { const char *cci_name; @@ -468,7 +470,7 @@ static void gator_events_perf_pmu_cci_init(const int type) return; } - gator_events_perf_pmu_uncore_init(cci_name, type, CCI_400); + gator_events_perf_pmu_uncore_init(cci_name, type, CCI_400, true); } static void gator_events_perf_pmu_cpu_init(const struct gator_cpu *const gator_cpu, const int type) @@ -535,9 +537,11 @@ int gator_events_perf_pmu_init(void) if (pe->pmu != NULL && type == pe->pmu->type) { if (strcmp("CCI", pe->pmu->name) == 0 || strcmp("CCI_400", pe->pmu->name) == 0 || strcmp("CCI_400-r1", pe->pmu->name) == 0) { - gator_events_perf_pmu_cci_init(type); + gator_events_perf_pmu_cci_400_init(type); + } else if (strcmp("CCI_500", pe->pmu->name) == 0) { + gator_events_perf_pmu_uncore_init("CCI_500", type, CCI_500, false); } else if (strcmp("ccn", pe->pmu->name) == 0) { - gator_events_perf_pmu_uncore_init("ARM_CCN_5XX", type, CCN_5XX); + gator_events_perf_pmu_uncore_init("ARM_CCN_5XX", type, CCN_5XX, true); } else if ((gator_cpu = gator_find_cpu_by_pmu_name(pe->pmu->name)) != NULL) { found_cpu = true; gator_events_perf_pmu_cpu_init(gator_cpu, type); @@ -549,10 +553,15 @@ int gator_events_perf_pmu_init(void) } if (!found_cpu) { - const struct gator_cpu *const gator_cpu = gator_find_cpu_by_cpuid(gator_cpuid()); + const struct gator_cpu *gator_cpu = gator_find_cpu_by_cpuid(gator_cpuid()); - if (gator_cpu == NULL) - return -1; + if (gator_cpu == NULL) { + gator_cpu = gator_find_cpu_by_cpuid(OTHER); + if (gator_cpu == NULL) { + pr_err("gator: Didn't find cpu\n"); + return -1; + } + } gator_events_perf_pmu_cpu_init(gator_cpu, PERF_TYPE_RAW); } diff --git a/drivers/gator/gator_events_sched.c b/drivers/gator/gator_events_sched.c index 637107d6af1d..463d83496073 100644 --- a/drivers/gator/gator_events_sched.c +++ b/drivers/gator/gator_events_sched.c @@ -1,5 +1,5 @@ /** - * Copyright (C) ARM Limited 2010-2014. All rights reserved. + * Copyright (C) ARM Limited 2010-2015. All rights reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as diff --git a/drivers/gator/gator_events_scorpion.c b/drivers/gator/gator_events_scorpion.c index 49219362db09..b51dcd39a8e1 100644 --- a/drivers/gator/gator_events_scorpion.c +++ b/drivers/gator/gator_events_scorpion.c @@ -1,5 +1,5 @@ /** - * Copyright (C) ARM Limited 2011-2014. All rights reserved. + * Copyright (C) ARM Limited 2011-2015. All rights reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as diff --git a/drivers/gator/gator_hrtimer_gator.c b/drivers/gator/gator_hrtimer_gator.c index c1525e10a8da..36961f85b62a 100644 --- a/drivers/gator/gator_hrtimer_gator.c +++ b/drivers/gator/gator_hrtimer_gator.c @@ -1,5 +1,5 @@ /** - * Copyright (C) ARM Limited 2011-2014. All rights reserved. + * Copyright (C) ARM Limited 2011-2015. All rights reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as diff --git a/drivers/gator/gator_iks.c b/drivers/gator/gator_iks.c index fb78c10fd987..80535c7b2b6e 100644 --- a/drivers/gator/gator_iks.c +++ b/drivers/gator/gator_iks.c @@ -1,5 +1,5 @@ /** - * Copyright (C) ARM Limited 2013-2014. All rights reserved. + * Copyright (C) ARM Limited 2013-2015. All rights reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as diff --git a/drivers/gator/gator_main.c b/drivers/gator/gator_main.c index 30bf60d95286..affa1dc312e4 100644 --- a/drivers/gator/gator_main.c +++ b/drivers/gator/gator_main.c @@ -1,5 +1,5 @@ /** - * Copyright (C) ARM Limited 2010-2014. All rights reserved. + * Copyright (C) ARM Limited 2010-2015. All rights reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -8,7 +8,7 @@ */ /* This version must match the gator daemon version */ -#define PROTOCOL_VERSION 20 +#define PROTOCOL_VERSION 21 static unsigned long gator_protocol_version = PROTOCOL_VERSION; #include @@ -28,6 +28,7 @@ static unsigned long gator_protocol_version = PROTOCOL_VERSION; #include #include "gator.h" +#include "gator_src_md5.h" #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 32) #error kernels prior to 2.6.32 are not supported @@ -92,21 +93,17 @@ static unsigned long gator_protocol_version = PROTOCOL_VERSION; /* Name Frame Messages */ #define MESSAGE_COOKIE 1 #define MESSAGE_THREAD_NAME 2 -#define MESSAGE_LINK 4 /* Scheduler Trace Frame Messages */ #define MESSAGE_SCHED_SWITCH 1 #define MESSAGE_SCHED_EXIT 2 -/* Idle Frame Messages */ -#define MESSAGE_IDLE_ENTER 1 -#define MESSAGE_IDLE_EXIT 2 - /* Summary Frame Messages */ #define MESSAGE_SUMMARY 1 #define MESSAGE_CORE_NAME 3 /* Activity Frame Messages */ +#define MESSAGE_LINK 1 #define MESSAGE_SWITCH 2 #define MESSAGE_EXIT 3 @@ -267,6 +264,9 @@ GATOR_EVENTS_LIST * Misc ******************************************************************************/ +MODULE_PARM_DESC(gator_src_md5, "Gator driver source code md5sum"); +module_param_named(src_md5, gator_src_md5, charp, 0444); + static const struct gator_cpu gator_cpus[] = { { .cpuid = ARM1136, @@ -331,6 +331,13 @@ static const struct gator_cpu gator_cpus[] = { .dt_name = "arm,cortex-a15", .pmnc_counters = 6, }, + { + .cpuid = CORTEX_A12, + .core_name = "Cortex-A17", + .pmnc_name = "ARMv7_Cortex_A17", + .dt_name = "arm,cortex-a17", + .pmnc_counters = 6, + }, { .cpuid = CORTEX_A17, .core_name = "Cortex-A17", @@ -383,9 +390,10 @@ static const struct gator_cpu gator_cpus[] = { .pmnc_counters = 6, }, { - .cpuid = AARCH64, - .core_name = "AArch64", - .pmnc_name = "ARM_AArch64", + .cpuid = CORTEX_A72, + .core_name = "Cortex-A72", + .pmnc_name = "ARM_Cortex-A72", + .dt_name = "arm,cortex-a72", .pmnc_counters = 6, }, { @@ -443,7 +451,7 @@ u32 gator_cpuid(void) #else asm volatile("mrs %0, midr_el1" : "=r" (val)); #endif - return (val >> 4) & 0xfff; + return ((val & 0xff000000) >> 12) | ((val & 0xfff0) >> 4); #else return OTHER; #endif @@ -597,7 +605,7 @@ static void gator_send_core_name(const int cpu, const u32 cpuid) if (cpuid == -1) snprintf(core_name_buf, sizeof(core_name_buf), "Unknown"); else - snprintf(core_name_buf, sizeof(core_name_buf), "Unknown (0x%.3x)", cpuid); + snprintf(core_name_buf, sizeof(core_name_buf), "Unknown (0x%.5x)", cpuid); core_name = core_name_buf; } @@ -729,11 +737,11 @@ static void gator_emit_perf_time(u64 time) { #if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 10, 0) if (time >= gator_sync_time) { - int cpu = get_physical_cpu(); - marshal_event_single64(0, -1, local_clock()); gator_sync_time += NSEC_PER_SEC; - gator_commit_buffer(cpu, COUNTER_BUF, time); + if (gator_live_rate <= 0) { + gator_commit_buffer(get_physical_cpu(), COUNTER_BUF, time); + } } #endif } @@ -867,7 +875,9 @@ static void gator_summary(void) marshal_summary(timestamp, uptime, gator_monotonic_started, uname_buf); gator_sync_time = 0; - gator_emit_perf_time(gator_monotonic_started); + gator_emit_perf_time(gator_monotonic_started); + /* Always flush COUNTER_BUF so that the initial perf_time is received before it's used */ + gator_commit_buffer(get_physical_cpu(), COUNTER_BUF, 0); preempt_enable(); } diff --git a/drivers/gator/gator_marshaling.c b/drivers/gator/gator_marshaling.c index 0d1167643642..f5b81843d1c4 100644 --- a/drivers/gator/gator_marshaling.c +++ b/drivers/gator/gator_marshaling.c @@ -1,5 +1,5 @@ /** - * Copyright (C) ARM Limited 2012-2014. All rights reserved. + * Copyright (C) ARM Limited 2012-2015. All rights reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -107,16 +107,16 @@ static void marshal_link(int cookie, int tgid, int pid) local_irq_save(flags); time = gator_get_time(); - if (buffer_check_space(cpu, NAME_BUF, MAXSIZE_PACK64 + 5 * MAXSIZE_PACK32)) { - gator_buffer_write_packed_int(cpu, NAME_BUF, MESSAGE_LINK); - gator_buffer_write_packed_int64(cpu, NAME_BUF, time); - gator_buffer_write_packed_int(cpu, NAME_BUF, cookie); - gator_buffer_write_packed_int(cpu, NAME_BUF, tgid); - gator_buffer_write_packed_int(cpu, NAME_BUF, pid); + if (buffer_check_space(cpu, ACTIVITY_BUF, MAXSIZE_PACK64 + 5 * MAXSIZE_PACK32)) { + gator_buffer_write_packed_int(cpu, ACTIVITY_BUF, MESSAGE_LINK); + gator_buffer_write_packed_int64(cpu, ACTIVITY_BUF, time); + gator_buffer_write_packed_int(cpu, ACTIVITY_BUF, cookie); + gator_buffer_write_packed_int(cpu, ACTIVITY_BUF, tgid); + gator_buffer_write_packed_int(cpu, ACTIVITY_BUF, pid); } local_irq_restore(flags); /* Check and commit; commit is set to occur once buffer is 3/4 full */ - buffer_check(cpu, NAME_BUF, time); + buffer_check(cpu, ACTIVITY_BUF, time); } static bool marshal_backtrace_header(int exec_cookie, int tgid, int pid, u64 time) diff --git a/drivers/gator/gator_trace_gpu.c b/drivers/gator/gator_trace_gpu.c index 5de9152e365a..d9b82ee1857f 100644 --- a/drivers/gator/gator_trace_gpu.c +++ b/drivers/gator/gator_trace_gpu.c @@ -1,5 +1,5 @@ /** - * Copyright (C) ARM Limited 2010-2014. All rights reserved. + * Copyright (C) ARM Limited 2010-2015. All rights reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as diff --git a/drivers/gator/gator_trace_power.c b/drivers/gator/gator_trace_power.c index 46e04b29a187..aaa8f8636c83 100644 --- a/drivers/gator/gator_trace_power.c +++ b/drivers/gator/gator_trace_power.c @@ -1,5 +1,5 @@ /** - * Copyright (C) ARM Limited 2011-2014. All rights reserved. + * Copyright (C) ARM Limited 2011-2015. All rights reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -80,13 +80,7 @@ GATOR_DEFINE_PROBE(cpu_idle, TP_PROTO(unsigned int state, unsigned int cpu)) return; if (implements_wfi()) { - if (state == PWR_EVENT_EXIT) { - /* transition from wfi to non-wfi */ - marshal_idle(cpu, MESSAGE_IDLE_EXIT); - } else { - /* transition from non-wfi to wfi */ - marshal_idle(cpu, MESSAGE_IDLE_ENTER); - } + marshal_idle(cpu, state); } per_cpu(idle_prev_state, cpu) = state; diff --git a/drivers/gator/gator_trace_sched.c b/drivers/gator/gator_trace_sched.c index 6d7cbd7348e1..ad7c39e14a17 100644 --- a/drivers/gator/gator_trace_sched.c +++ b/drivers/gator/gator_trace_sched.c @@ -1,5 +1,5 @@ /** - * Copyright (C) ARM Limited 2010-2014. All rights reserved. + * Copyright (C) ARM Limited 2010-2015. All rights reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as diff --git a/drivers/gator/mali/mali_kbase_gator_api.h b/drivers/gator/mali/mali_kbase_gator_api.h deleted file mode 100644 index 5ed069797e36..000000000000 --- a/drivers/gator/mali/mali_kbase_gator_api.h +++ /dev/null @@ -1,219 +0,0 @@ -/** - * Copyright (C) ARM Limited 2014. All rights reserved. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - */ - -#ifndef _KBASE_GATOR_API_H_ -#define _KBASE_GATOR_API_H_ - -/** - * @brief This file describes the API used by Gator to collect hardware counters data from a Mali device. - */ - -/* This define is used by the gator kernel module compile to select which DDK - * API calling convention to use. If not defined (legacy DDK) gator assumes - * version 1. The version to DDK release mapping is: - * Version 1 API: DDK versions r1px, r2px - * Version 2 API: DDK versions r3px, r4px - * Version 3 API: DDK version r5p0 and newer - * - * API Usage - * ========= - * - * 1] Call kbase_gator_hwcnt_init_names() to return the list of short counter - * names for the GPU present in this device. - * - * 2] Create a kbase_gator_hwcnt_info structure and set the counter enables for - * the counters you want enabled. The enables can all be set for simplicity in - * most use cases, but disabling some will let you minimize bandwidth impact. - * - * 3] Call kbase_gator_hwcnt_init() using the above structure, to create a - * counter context. On successful return the DDK will have populated the - * structure with a variety of useful information. - * - * 4] Call kbase_gator_hwcnt_dump_irq() to queue a non-blocking request for a - * counter dump. If this returns a non-zero value the request has been queued, - * otherwise the driver has been unable to do so (typically because of another - * user of the instrumentation exists concurrently). - * - * 5] Call kbase_gator_hwcnt_dump_complete() to test whether the previously - * requested dump has been succesful. If this returns non-zero the counter dump - * has resolved, but the value of *success must also be tested as the dump - * may have not been successful. If it returns zero the counter dump was - * abandoned due to the device being busy (typically because of another - * user of the instrumentation exists concurrently). - * - * 6] Process the counters stored in the buffer pointed to by ... - * - * kbase_gator_hwcnt_info->kernel_dump_buffer - * - * In pseudo code you can find all of the counters via this approach: - * - * - * hwcnt_info # pointer to kbase_gator_hwcnt_info structure - * hwcnt_name # pointer to name list - * - * u32 * hwcnt_data = (u32*)hwcnt_info->kernel_dump_buffer - * - * # Iterate over each 64-counter block in this GPU configuration - * for( i = 0; i < hwcnt_info->nr_hwc_blocks; i++) { - * hwc_type type = hwcnt_info->hwc_layout[i]; - * - * # Skip reserved type blocks - they contain no counters at all - * if( type == RESERVED_BLOCK ) { - * continue; - * } - * - * size_t name_offset = type * 64; - * size_t data_offset = i * 64; - * - * # Iterate over the names of the counters in this block type - * for( j = 0; j < 64; j++) { - * const char * name = hwcnt_name[name_offset+j]; - * - * # Skip empty name strings - there is no counter here - * if( name[0] == '\0' ) { - * continue; - * } - * - * u32 data = hwcnt_data[data_offset+j]; - * - * printk( "COUNTER: %s DATA: %u\n", name, data ); - * } - * } - * - * - * Note that in most implementations you typically want to either SUM or - * AVERAGE multiple instances of the same counter if, for example, you have - * multiple shader cores or multiple L2 caches. The most sensible view for - * analysis is to AVERAGE shader core counters, but SUM L2 cache and MMU - * counters. - * - * 7] Goto 4, repeating until you want to stop collecting counters. - * - * 8] Release the dump resources by calling kbase_gator_hwcnt_term(). - * - * 9] Release the name table resources by calling kbase_gator_hwcnt_term_names(). - * This function must only be called if init_names() returned a non-NULL value. - **/ - -#define MALI_DDK_GATOR_API_VERSION 3 - -#if !defined(MALI_TRUE) - #define MALI_TRUE ((uint32_t)1) -#endif - -#if !defined(MALI_FALSE) - #define MALI_FALSE ((uint32_t)0) -#endif - -enum hwc_type { - JM_BLOCK = 0, - TILER_BLOCK, - SHADER_BLOCK, - MMU_L2_BLOCK, - RESERVED_BLOCK -}; - -struct kbase_gator_hwcnt_info { - - /* Passed from Gator to kbase */ - - /* the bitmask of enabled hardware counters for each counter block */ - uint16_t bitmask[4]; - - /* Passed from kbase to Gator */ - - /* ptr to counter dump memory */ - void *kernel_dump_buffer; - - /* size of counter dump memory */ - uint32_t size; - - /* the ID of the Mali device */ - uint32_t gpu_id; - - /* the number of shader cores in the GPU */ - uint32_t nr_cores; - - /* the number of core groups */ - uint32_t nr_core_groups; - - /* the memory layout of the performance counters */ - enum hwc_type *hwc_layout; - - /* the total number of hardware couter blocks */ - uint32_t nr_hwc_blocks; -}; - -/** - * @brief Opaque block of Mali data which Gator needs to return to the API later. - */ -struct kbase_gator_hwcnt_handles; - -/** - * @brief Initialize the resources Gator needs for performance profiling. - * - * @param in_out_info A pointer to a structure containing the enabled counters passed from Gator and all the Mali - * specific information that will be returned to Gator. On entry Gator must have populated the - * 'bitmask' field with the counters it wishes to enable for each class of counter block. - * Each entry in the array corresponds to a single counter class based on the "hwc_type" - * enumeration, and each bit corresponds to an enable for 4 sequential counters (LSB enables - * the first 4 counters in the block, and so on). See the GPU counter array as returned by - * kbase_gator_hwcnt_get_names() for the index values of each counter for the curernt GPU. - * - * @return Pointer to an opaque handle block on success, NULL on error. - */ -extern struct kbase_gator_hwcnt_handles *kbase_gator_hwcnt_init(struct kbase_gator_hwcnt_info *in_out_info); - -/** - * @brief Free all resources once Gator has finished using performance counters. - * - * @param in_out_info A pointer to a structure containing the enabled counters passed from Gator and all the - * Mali specific information that will be returned to Gator. - * @param opaque_handles A wrapper structure for kbase structures. - */ -extern void kbase_gator_hwcnt_term(struct kbase_gator_hwcnt_info *in_out_info, struct kbase_gator_hwcnt_handles *opaque_handles); - -/** - * @brief Poll whether a counter dump is successful. - * - * @param opaque_handles A wrapper structure for kbase structures. - * @param[out] success Non-zero on success, zero on failure. - * - * @return Zero if the dump is still pending, non-zero if the dump has completed. Note that a - * completed dump may not have dumped succesfully, so the caller must test for both - * a completed and successful dump before processing counters. - */ -extern uint32_t kbase_gator_instr_hwcnt_dump_complete(struct kbase_gator_hwcnt_handles *opaque_handles, uint32_t * const success); - -/** - * @brief Request the generation of a new counter dump. - * - * @param opaque_handles A wrapper structure for kbase structures. - * - * @return Zero if the hardware device is busy and cannot handle the request, non-zero otherwise. - */ -extern uint32_t kbase_gator_instr_hwcnt_dump_irq(struct kbase_gator_hwcnt_handles *opaque_handles); - -/** - * @brief This function is used to fetch the names table based on the Mali device in use. - * - * @param[out] total_number_of_counters The total number of counters short names in the Mali devices' list. - * - * @return Pointer to an array of strings of length *total_number_of_counters. - */ -extern const char * const *kbase_gator_hwcnt_init_names(uint32_t *total_number_of_counters); - -/** - * @brief This function is used to terminate the use of the names table. - * - * This function must only be called if the initial call to kbase_gator_hwcnt_init_names returned a non-NULL value. - */ -extern void kbase_gator_hwcnt_term_names(void); - -#endif diff --git a/drivers/gator/mali/mali_mjollnir_profiling_gator_api.h b/drivers/gator/mali/mali_mjollnir_profiling_gator_api.h index 2bc0b037eee6..a5d165157396 100644 --- a/drivers/gator/mali/mali_mjollnir_profiling_gator_api.h +++ b/drivers/gator/mali/mali_mjollnir_profiling_gator_api.h @@ -1,5 +1,5 @@ /** - * Copyright (C) ARM Limited 2013-2014. All rights reserved. + * Copyright (C) ARM Limited 2013-2015. All rights reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as diff --git a/drivers/gator/mali/mali_utgard_profiling_gator_api.h b/drivers/gator/mali/mali_utgard_profiling_gator_api.h index d6465312628e..f550490c2c39 100644 --- a/drivers/gator/mali/mali_utgard_profiling_gator_api.h +++ b/drivers/gator/mali/mali_utgard_profiling_gator_api.h @@ -1,5 +1,5 @@ /** - * Copyright (C) ARM Limited 2013-2014. All rights reserved. + * Copyright (C) ARM Limited 2013-2015. All rights reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as diff --git a/drivers/gator/mali_midgard.mk b/drivers/gator/mali_midgard.mk index 1b784d5c3d58..b0076c22da6b 100644 --- a/drivers/gator/mali_midgard.mk +++ b/drivers/gator/mali_midgard.mk @@ -23,6 +23,7 @@ endif ifneq ($(wildcard $(DDK_DIR)/drivers/gpu/arm/midgard/mali_kbase_gator_api.h),) EXTRA_CFLAGS += -DMALI_SIMPLE_API=1 +EXTRA_CFLAGS += -I$(DDK_DIR)/drivers/gpu/arm/midgard endif UMP_DIR = $(DDK_DIR)/include/linux diff --git a/tools/gator/daemon/Android.mk b/tools/gator/daemon/Android.mk index 970ac6946150..68f4a8397379 100644 --- a/tools/gator/daemon/Android.mk +++ b/tools/gator/daemon/Android.mk @@ -1,13 +1,12 @@ LOCAL_PATH := $(call my-dir) include $(CLEAR_VARS) -XML_H := $(shell cd $(LOCAL_PATH) && make events_xml.h defaults_xml.h) +XML_H := $(shell cd $(LOCAL_PATH) && make events_xml.h defaults_xml.h SrcMd5.cpp) LOCAL_SRC_FILES := \ AnnotateListener.cpp \ Buffer.cpp \ CCNDriver.cpp \ - CPUFreqDriver.cpp \ CapturedXML.cpp \ Child.cpp \ Command.cpp \ @@ -43,6 +42,7 @@ LOCAL_SRC_FILES := \ SessionXML.cpp \ Setup.cpp \ Source.cpp \ + SrcMd5.cpp \ StreamlineSetup.cpp \ UEvent.cpp \ UserSpaceSource.cpp \ diff --git a/tools/gator/daemon/AnnotateListener.cpp b/tools/gator/daemon/AnnotateListener.cpp index 50110b4dc84c..5966cbea8d13 100644 --- a/tools/gator/daemon/AnnotateListener.cpp +++ b/tools/gator/daemon/AnnotateListener.cpp @@ -1,5 +1,5 @@ /** - * Copyright (C) ARM Limited 2014. All rights reserved. + * Copyright (C) ARM Limited 2014-2015. All rights reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -12,36 +12,56 @@ #include "OlySocket.h" +static const char STREAMLINE_ANNOTATE_PARENT[] = "\0streamline-annotate-parent"; + struct AnnotateClient { AnnotateClient *next; int fd; }; -AnnotateListener::AnnotateListener() : mClients(NULL), mSock(NULL) { +AnnotateListener::AnnotateListener() : mClients(NULL), mSock(NULL), mUds(NULL) { } AnnotateListener::~AnnotateListener() { close(); + delete mUds; delete mSock; } void AnnotateListener::setup() { mSock = new OlyServerSocket(8082); + mUds = new OlyServerSocket(STREAMLINE_ANNOTATE_PARENT, sizeof(STREAMLINE_ANNOTATE_PARENT), true); } -int AnnotateListener::getFd() { +int AnnotateListener::getSockFd() { return mSock->getFd(); } -void AnnotateListener::handle() { +void AnnotateListener::handleSock() { AnnotateClient *const client = new AnnotateClient(); client->fd = mSock->acceptConnection(); client->next = mClients; mClients = client; } +int AnnotateListener::getUdsFd() { + return mUds->getFd(); +} + +void AnnotateListener::handleUds() { + AnnotateClient *const client = new AnnotateClient(); + client->fd = mUds->acceptConnection(); + client->next = mClients; + mClients = client; +} + void AnnotateListener::close() { - mSock->closeServerSocket(); + if (mUds != NULL) { + mUds->closeServerSocket(); + } + if (mSock != NULL) { + mSock->closeServerSocket(); + } while (mClients != NULL) { ::close(mClients->fd); AnnotateClient *next = mClients->next; diff --git a/tools/gator/daemon/AnnotateListener.h b/tools/gator/daemon/AnnotateListener.h index cdefef12db22..6bc747d42d18 100644 --- a/tools/gator/daemon/AnnotateListener.h +++ b/tools/gator/daemon/AnnotateListener.h @@ -1,12 +1,15 @@ /** - * Copyright (C) ARM Limited 2014. All rights reserved. + * Copyright (C) ARM Limited 2014-2015. All rights reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as * published by the Free Software Foundation. */ -class AnnotateClient; +#ifndef ANNOTATELISTENER_H +#define ANNOTATELISTENER_H + +struct AnnotateClient; class OlyServerSocket; class AnnotateListener { @@ -15,17 +18,22 @@ class AnnotateListener { ~AnnotateListener(); void setup(); - int getFd(); + int getSockFd(); + int getUdsFd(); - void handle(); + void handleSock(); + void handleUds(); void close(); void signal(); private: AnnotateClient *mClients; OlyServerSocket *mSock; + OlyServerSocket *mUds; // Intentionally unimplemented AnnotateListener(const AnnotateListener &); AnnotateListener &operator=(const AnnotateListener &); }; + +#endif // ANNOTATELISTENER_H diff --git a/tools/gator/daemon/Application.mk b/tools/gator/daemon/Application.mk index 3ada471cac19..8b0a7882e938 100644 --- a/tools/gator/daemon/Application.mk +++ b/tools/gator/daemon/Application.mk @@ -1,3 +1,3 @@ -APP_PLATFORM := android-8 +APP_PLATFORM := android-9 # Replace armeabi-v7a with arm64-v8a to build an arm64 gatord or with armeabi to build an ARM11 gatord APP_ABI := armeabi-v7a diff --git a/tools/gator/daemon/Buffer.cpp b/tools/gator/daemon/Buffer.cpp index 8fa628015069..c4ced9f607f9 100644 --- a/tools/gator/daemon/Buffer.cpp +++ b/tools/gator/daemon/Buffer.cpp @@ -1,5 +1,5 @@ /** - * Copyright (C) ARM Limited 2013-2014. All rights reserved. + * Copyright (C) ARM Limited 2013-2015. All rights reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -13,17 +13,19 @@ #include "SessionData.h" #define mask (mSize - 1) +#define FRAME_HEADER_SIZE 3 enum { - CODE_PEA = 1, - CODE_KEYS = 2, - CODE_FORMAT = 3, - CODE_MAPS = 4, - CODE_COMM = 5, - CODE_KEYS_OLD = 6, - CODE_ONLINE_CPU = 7, - CODE_OFFLINE_CPU = 8, - CODE_KALLSYMS = 9, + CODE_PEA = 1, + CODE_KEYS = 2, + CODE_FORMAT = 3, + CODE_MAPS = 4, + CODE_COMM = 5, + CODE_KEYS_OLD = 6, + CODE_ONLINE_CPU = 7, + CODE_OFFLINE_CPU = 8, + CODE_KALLSYMS = 9, + CODE_COUNTERS = 10, }; // Summary Frame Messages @@ -47,7 +49,7 @@ enum { Buffer::Buffer(const int32_t core, const int32_t buftype, const int size, sem_t *const readerSem) : mBuf(new char[size]), mReaderSem(readerSem), mCommitTime(gSessionData->mLiveRate), mSize(size), mReadPos(0), mWritePos(0), mCommitPos(0), mAvailable(true), mIsDone(false), mCore(core), mBufType(buftype) { if ((mSize & mask) != 0) { - logg->logError(__FILE__, __LINE__, "Buffer size is not a power of 2"); + logg->logError("Buffer size is not a power of 2"); handleException(); } sem_init(&mWriterSem, 0, 0); @@ -141,7 +143,7 @@ int Buffer::contiguousSpaceAvailable() const { } } -void Buffer::commit(const uint64_t time) { +void Buffer::commit(const uint64_t time, const bool force) { // post-populate the length, which does not include the response type length nor the length itself, i.e. only the length of the payload const int typeLength = gSessionData->mLocalCapture ? 0 : 1; int length = mWritePos - mCommitPos; @@ -149,6 +151,10 @@ void Buffer::commit(const uint64_t time) { length += mSize; } length = length - typeLength - sizeof(int32_t); + if (!force && !mIsDone && length <= FRAME_HEADER_SIZE) { + // Nothing to write, only the frame header is present + return; + } for (size_t byte = 0; byte < sizeof(int32_t); byte++) { mBuf[(mCommitPos + typeLength + byte) & mask] = (length >> byte * 8) & 0xFF; } @@ -317,7 +323,7 @@ void Buffer::event64(const int key, const int64_t value) { } } -void Buffer::pea(const uint64_t currTime, const struct perf_event_attr *const pea, int key) { +void Buffer::marshalPea(const uint64_t currTime, const struct perf_event_attr *const pea, int key) { while (!checkSpace(2 * MAXSIZE_PACK32 + pea->size)) { sem_wait(&mWriterSem); } @@ -327,7 +333,7 @@ void Buffer::pea(const uint64_t currTime, const struct perf_event_attr *const pe check(currTime); } -void Buffer::keys(const uint64_t currTime, const int count, const __u64 *const ids, const int *const keys) { +void Buffer::marshalKeys(const uint64_t currTime, const int count, const __u64 *const ids, const int *const keys) { while (!checkSpace(2 * MAXSIZE_PACK32 + count * (MAXSIZE_PACK32 + MAXSIZE_PACK64))) { sem_wait(&mWriterSem); } @@ -340,7 +346,7 @@ void Buffer::keys(const uint64_t currTime, const int count, const __u64 *const i check(currTime); } -void Buffer::keysOld(const uint64_t currTime, const int keyCount, const int *const keys, const int bytes, const char *const buf) { +void Buffer::marshalKeysOld(const uint64_t currTime, const int keyCount, const int *const keys, const int bytes, const char *const buf) { while (!checkSpace((2 + keyCount) * MAXSIZE_PACK32 + bytes)) { sem_wait(&mWriterSem); } @@ -353,7 +359,7 @@ void Buffer::keysOld(const uint64_t currTime, const int keyCount, const int *con check(currTime); } -void Buffer::format(const uint64_t currTime, const int length, const char *const format) { +void Buffer::marshalFormat(const uint64_t currTime, const int length, const char *const format) { while (!checkSpace(MAXSIZE_PACK32 + length + 1)) { sem_wait(&mWriterSem); } @@ -362,7 +368,7 @@ void Buffer::format(const uint64_t currTime, const int length, const char *const check(currTime); } -void Buffer::maps(const uint64_t currTime, const int pid, const int tid, const char *const maps) { +void Buffer::marshalMaps(const uint64_t currTime, const int pid, const int tid, const char *const maps) { const int mapsLen = strlen(maps) + 1; while (!checkSpace(3 * MAXSIZE_PACK32 + mapsLen)) { sem_wait(&mWriterSem); @@ -374,7 +380,7 @@ void Buffer::maps(const uint64_t currTime, const int pid, const int tid, const c check(currTime); } -void Buffer::comm(const uint64_t currTime, const int pid, const int tid, const char *const image, const char *const comm) { +void Buffer::marshalComm(const uint64_t currTime, const int pid, const int tid, const char *const image, const char *const comm) { const int imageLen = strlen(image) + 1; const int commLen = strlen(comm) + 1; while (!checkSpace(3 * MAXSIZE_PACK32 + imageLen + commLen)) { @@ -388,27 +394,27 @@ void Buffer::comm(const uint64_t currTime, const int pid, const int tid, const c check(currTime); } -void Buffer::onlineCPU(const uint64_t currTime, const uint64_t time, const int cpu) { +void Buffer::onlineCPU(const uint64_t currTime, const int cpu) { while (!checkSpace(MAXSIZE_PACK32 + MAXSIZE_PACK64)) { sem_wait(&mWriterSem); } packInt(CODE_ONLINE_CPU); - packInt64(time); + packInt64(currTime); packInt(cpu); check(currTime); } -void Buffer::offlineCPU(const uint64_t currTime, const uint64_t time, const int cpu) { +void Buffer::offlineCPU(const uint64_t currTime, const int cpu) { while (!checkSpace(MAXSIZE_PACK32 + MAXSIZE_PACK64)) { sem_wait(&mWriterSem); } packInt(CODE_OFFLINE_CPU); - packInt64(time); + packInt64(currTime); packInt(cpu); check(currTime); } -void Buffer::kallsyms(const uint64_t currTime, const char *const kallsyms) { +void Buffer::marshalKallsyms(const uint64_t currTime, const char *const kallsyms) { const int kallsymsLen = strlen(kallsyms) + 1; while (!checkSpace(3 * MAXSIZE_PACK32 + kallsymsLen)) { sem_wait(&mWriterSem); @@ -418,6 +424,31 @@ void Buffer::kallsyms(const uint64_t currTime, const char *const kallsyms) { check(currTime); } +void Buffer::perfCounterHeader(const uint64_t time) { + while (!checkSpace(MAXSIZE_PACK32 + MAXSIZE_PACK64)) { + sem_wait(&mWriterSem); + } + packInt(CODE_COUNTERS); + packInt64(time); +} + +void Buffer::perfCounter(const int core, const int key, const int64_t value) { + while (!checkSpace(2*MAXSIZE_PACK32 + MAXSIZE_PACK64)) { + sem_wait(&mWriterSem); + } + packInt(core); + packInt(key); + packInt64(value); +} + +void Buffer::perfCounterFooter(const uint64_t currTime) { + while (!checkSpace(MAXSIZE_PACK32)) { + sem_wait(&mWriterSem); + } + packInt(-1); + check(currTime); +} + void Buffer::setDone() { mIsDone = true; commit(0); diff --git a/tools/gator/daemon/Buffer.h b/tools/gator/daemon/Buffer.h index 6cffd8e39a36..13c44e1fd359 100644 --- a/tools/gator/daemon/Buffer.h +++ b/tools/gator/daemon/Buffer.h @@ -1,5 +1,5 @@ /** - * Copyright (C) ARM Limited 2013-2014. All rights reserved. + * Copyright (C) ARM Limited 2013-2015. All rights reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -36,7 +36,7 @@ class Buffer { int bytesAvailable() const; int contiguousSpaceAvailable() const; - void commit(const uint64_t time); + void commit(const uint64_t time, const bool force = false); void check(const uint64_t time); // Summary messages @@ -50,15 +50,18 @@ class Buffer { void event64(int key, int64_t value); // Perf Attrs messages - void pea(const uint64_t currTime, const struct perf_event_attr *const pea, int key); - void keys(const uint64_t currTime, const int count, const __u64 *const ids, const int *const keys); - void keysOld(const uint64_t currTime, const int keyCount, const int *const keys, const int bytes, const char *const buf); - void format(const uint64_t currTime, const int length, const char *const format); - void maps(const uint64_t currTime, const int pid, const int tid, const char *const maps); - void comm(const uint64_t currTime, const int pid, const int tid, const char *const image, const char *const comm); - void onlineCPU(const uint64_t currTime, const uint64_t time, const int cpu); - void offlineCPU(const uint64_t currTime, const uint64_t time, const int cpu); - void kallsyms(const uint64_t currTime, const char *const kallsyms); + void marshalPea(const uint64_t currTime, const struct perf_event_attr *const pea, int key); + void marshalKeys(const uint64_t currTime, const int count, const __u64 *const ids, const int *const keys); + void marshalKeysOld(const uint64_t currTime, const int keyCount, const int *const keys, const int bytes, const char *const buf); + void marshalFormat(const uint64_t currTime, const int length, const char *const format); + void marshalMaps(const uint64_t currTime, const int pid, const int tid, const char *const maps); + void marshalComm(const uint64_t currTime, const int pid, const int tid, const char *const image, const char *const comm); + void onlineCPU(const uint64_t currTime, const int cpu); + void offlineCPU(const uint64_t currTime, const int cpu); + void marshalKallsyms(const uint64_t currTime, const char *const kallsyms); + void perfCounterHeader(const uint64_t time); + void perfCounter(const int core, const int key, const int64_t value); + void perfCounterFooter(const uint64_t currTime); void setDone(); bool isDone() const; diff --git a/tools/gator/daemon/CCNDriver.cpp b/tools/gator/daemon/CCNDriver.cpp index dd1a2b133842..d77513acc4e4 100644 --- a/tools/gator/daemon/CCNDriver.cpp +++ b/tools/gator/daemon/CCNDriver.cpp @@ -1,5 +1,5 @@ /** - * Copyright (C) ARM Limited 2014. All rights reserved. + * Copyright (C) ARM Limited 2014-2015. All rights reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -28,7 +28,6 @@ static const char TAG_OPTION_SET[] = "option_set"; static const char ATTR_AVERAGE_SELECTION[] = "average_selection"; static const char ATTR_COUNTER[] = "counter"; -static const char ATTR_COUNTER_SET[] = "counter_set"; static const char ATTR_COUNT[] = "count"; static const char ATTR_DESCRIPTION[] = "description"; static const char ATTR_DISPLAY[] = "display"; @@ -110,7 +109,7 @@ void CCNDriver::readEvents(mxml_node_t *const) { int type; if (DriverSource::readIntDriver("/sys/bus/event_source/devices/ccn/type", &type) != 0) { - logg->logError(__FILE__, __LINE__, "Unable to read CCN-5xx type"); + logg->logError("Unable to read CCN-5xx type"); handleException(); } diff --git a/tools/gator/daemon/CCNDriver.h b/tools/gator/daemon/CCNDriver.h index fb4c717e969a..06ac33f07a36 100644 --- a/tools/gator/daemon/CCNDriver.h +++ b/tools/gator/daemon/CCNDriver.h @@ -1,5 +1,5 @@ /** - * Copyright (C) ARM Limited 2014. All rights reserved. + * Copyright (C) ARM Limited 2014-2015. All rights reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as diff --git a/tools/gator/daemon/COPYING b/tools/gator/daemon/COPYING new file mode 100644 index 000000000000..d159169d1050 --- /dev/null +++ b/tools/gator/daemon/COPYING @@ -0,0 +1,339 @@ + GNU GENERAL PUBLIC LICENSE + Version 2, June 1991 + + Copyright (C) 1989, 1991 Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + Preamble + + The licenses for most software are designed to take away your +freedom to share and change it. By contrast, the GNU General Public +License is intended to guarantee your freedom to share and change free +software--to make sure the software is free for all its users. This +General Public License applies to most of the Free Software +Foundation's software and to any other program whose authors commit to +using it. (Some other Free Software Foundation software is covered by +the GNU Lesser General Public License instead.) You can apply it to +your programs, too. + + When we speak of free software, we are referring to freedom, not +price. Our General Public Licenses are designed to make sure that you +have the freedom to distribute copies of free software (and charge for +this service if you wish), that you receive source code or can get it +if you want it, that you can change the software or use pieces of it +in new free programs; and that you know you can do these things. + + To protect your rights, we need to make restrictions that forbid +anyone to deny you these rights or to ask you to surrender the rights. +These restrictions translate to certain responsibilities for you if you +distribute copies of the software, or if you modify it. + + For example, if you distribute copies of such a program, whether +gratis or for a fee, you must give the recipients all the rights that +you have. You must make sure that they, too, receive or can get the +source code. And you must show them these terms so they know their +rights. + + We protect your rights with two steps: (1) copyright the software, and +(2) offer you this license which gives you legal permission to copy, +distribute and/or modify the software. + + Also, for each author's protection and ours, we want to make certain +that everyone understands that there is no warranty for this free +software. If the software is modified by someone else and passed on, we +want its recipients to know that what they have is not the original, so +that any problems introduced by others will not reflect on the original +authors' reputations. + + Finally, any free program is threatened constantly by software +patents. We wish to avoid the danger that redistributors of a free +program will individually obtain patent licenses, in effect making the +program proprietary. To prevent this, we have made it clear that any +patent must be licensed for everyone's free use or not licensed at all. + + The precise terms and conditions for copying, distribution and +modification follow. + + GNU GENERAL PUBLIC LICENSE + TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION + + 0. This License applies to any program or other work which contains +a notice placed by the copyright holder saying it may be distributed +under the terms of this General Public License. The "Program", below, +refers to any such program or work, and a "work based on the Program" +means either the Program or any derivative work under copyright law: +that is to say, a work containing the Program or a portion of it, +either verbatim or with modifications and/or translated into another +language. (Hereinafter, translation is included without limitation in +the term "modification".) Each licensee is addressed as "you". + +Activities other than copying, distribution and modification are not +covered by this License; they are outside its scope. The act of +running the Program is not restricted, and the output from the Program +is covered only if its contents constitute a work based on the +Program (independent of having been made by running the Program). +Whether that is true depends on what the Program does. + + 1. You may copy and distribute verbatim copies of the Program's +source code as you receive it, in any medium, provided that you +conspicuously and appropriately publish on each copy an appropriate +copyright notice and disclaimer of warranty; keep intact all the +notices that refer to this License and to the absence of any warranty; +and give any other recipients of the Program a copy of this License +along with the Program. + +You may charge a fee for the physical act of transferring a copy, and +you may at your option offer warranty protection in exchange for a fee. + + 2. You may modify your copy or copies of the Program or any portion +of it, thus forming a work based on the Program, and copy and +distribute such modifications or work under the terms of Section 1 +above, provided that you also meet all of these conditions: + + a) You must cause the modified files to carry prominent notices + stating that you changed the files and the date of any change. + + b) You must cause any work that you distribute or publish, that in + whole or in part contains or is derived from the Program or any + part thereof, to be licensed as a whole at no charge to all third + parties under the terms of this License. + + c) If the modified program normally reads commands interactively + when run, you must cause it, when started running for such + interactive use in the most ordinary way, to print or display an + announcement including an appropriate copyright notice and a + notice that there is no warranty (or else, saying that you provide + a warranty) and that users may redistribute the program under + these conditions, and telling the user how to view a copy of this + License. (Exception: if the Program itself is interactive but + does not normally print such an announcement, your work based on + the Program is not required to print an announcement.) + +These requirements apply to the modified work as a whole. If +identifiable sections of that work are not derived from the Program, +and can be reasonably considered independent and separate works in +themselves, then this License, and its terms, do not apply to those +sections when you distribute them as separate works. But when you +distribute the same sections as part of a whole which is a work based +on the Program, the distribution of the whole must be on the terms of +this License, whose permissions for other licensees extend to the +entire whole, and thus to each and every part regardless of who wrote it. + +Thus, it is not the intent of this section to claim rights or contest +your rights to work written entirely by you; rather, the intent is to +exercise the right to control the distribution of derivative or +collective works based on the Program. + +In addition, mere aggregation of another work not based on the Program +with the Program (or with a work based on the Program) on a volume of +a storage or distribution medium does not bring the other work under +the scope of this License. + + 3. You may copy and distribute the Program (or a work based on it, +under Section 2) in object code or executable form under the terms of +Sections 1 and 2 above provided that you also do one of the following: + + a) Accompany it with the complete corresponding machine-readable + source code, which must be distributed under the terms of Sections + 1 and 2 above on a medium customarily used for software interchange; or, + + b) Accompany it with a written offer, valid for at least three + years, to give any third party, for a charge no more than your + cost of physically performing source distribution, a complete + machine-readable copy of the corresponding source code, to be + distributed under the terms of Sections 1 and 2 above on a medium + customarily used for software interchange; or, + + c) Accompany it with the information you received as to the offer + to distribute corresponding source code. (This alternative is + allowed only for noncommercial distribution and only if you + received the program in object code or executable form with such + an offer, in accord with Subsection b above.) + +The source code for a work means the preferred form of the work for +making modifications to it. For an executable work, complete source +code means all the source code for all modules it contains, plus any +associated interface definition files, plus the scripts used to +control compilation and installation of the executable. However, as a +special exception, the source code distributed need not include +anything that is normally distributed (in either source or binary +form) with the major components (compiler, kernel, and so on) of the +operating system on which the executable runs, unless that component +itself accompanies the executable. + +If distribution of executable or object code is made by offering +access to copy from a designated place, then offering equivalent +access to copy the source code from the same place counts as +distribution of the source code, even though third parties are not +compelled to copy the source along with the object code. + + 4. You may not copy, modify, sublicense, or distribute the Program +except as expressly provided under this License. Any attempt +otherwise to copy, modify, sublicense or distribute the Program is +void, and will automatically terminate your rights under this License. +However, parties who have received copies, or rights, from you under +this License will not have their licenses terminated so long as such +parties remain in full compliance. + + 5. You are not required to accept this License, since you have not +signed it. However, nothing else grants you permission to modify or +distribute the Program or its derivative works. These actions are +prohibited by law if you do not accept this License. Therefore, by +modifying or distributing the Program (or any work based on the +Program), you indicate your acceptance of this License to do so, and +all its terms and conditions for copying, distributing or modifying +the Program or works based on it. + + 6. Each time you redistribute the Program (or any work based on the +Program), the recipient automatically receives a license from the +original licensor to copy, distribute or modify the Program subject to +these terms and conditions. You may not impose any further +restrictions on the recipients' exercise of the rights granted herein. +You are not responsible for enforcing compliance by third parties to +this License. + + 7. If, as a consequence of a court judgment or allegation of patent +infringement or for any other reason (not limited to patent issues), +conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot +distribute so as to satisfy simultaneously your obligations under this +License and any other pertinent obligations, then as a consequence you +may not distribute the Program at all. For example, if a patent +license would not permit royalty-free redistribution of the Program by +all those who receive copies directly or indirectly through you, then +the only way you could satisfy both it and this License would be to +refrain entirely from distribution of the Program. + +If any portion of this section is held invalid or unenforceable under +any particular circumstance, the balance of the section is intended to +apply and the section as a whole is intended to apply in other +circumstances. + +It is not the purpose of this section to induce you to infringe any +patents or other property right claims or to contest validity of any +such claims; this section has the sole purpose of protecting the +integrity of the free software distribution system, which is +implemented by public license practices. Many people have made +generous contributions to the wide range of software distributed +through that system in reliance on consistent application of that +system; it is up to the author/donor to decide if he or she is willing +to distribute software through any other system and a licensee cannot +impose that choice. + +This section is intended to make thoroughly clear what is believed to +be a consequence of the rest of this License. + + 8. If the distribution and/or use of the Program is restricted in +certain countries either by patents or by copyrighted interfaces, the +original copyright holder who places the Program under this License +may add an explicit geographical distribution limitation excluding +those countries, so that distribution is permitted only in or among +countries not thus excluded. In such case, this License incorporates +the limitation as if written in the body of this License. + + 9. The Free Software Foundation may publish revised and/or new versions +of the General Public License from time to time. Such new versions will +be similar in spirit to the present version, but may differ in detail to +address new problems or concerns. + +Each version is given a distinguishing version number. If the Program +specifies a version number of this License which applies to it and "any +later version", you have the option of following the terms and conditions +either of that version or of any later version published by the Free +Software Foundation. If the Program does not specify a version number of +this License, you may choose any version ever published by the Free Software +Foundation. + + 10. If you wish to incorporate parts of the Program into other free +programs whose distribution conditions are different, write to the author +to ask for permission. For software which is copyrighted by the Free +Software Foundation, write to the Free Software Foundation; we sometimes +make exceptions for this. Our decision will be guided by the two goals +of preserving the free status of all derivatives of our free software and +of promoting the sharing and reuse of software generally. + + NO WARRANTY + + 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY +FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN +OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES +PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED +OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS +TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE +PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, +REPAIR OR CORRECTION. + + 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING +WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR +REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, +INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING +OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED +TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY +YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER +PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE +POSSIBILITY OF SUCH DAMAGES. + + END OF TERMS AND CONDITIONS + + How to Apply These Terms to Your New Programs + + If you develop a new program, and you want it to be of the greatest +possible use to the public, the best way to achieve this is to make it +free software which everyone can redistribute and change under these terms. + + To do so, attach the following notices to the program. It is safest +to attach them to the start of each source file to most effectively +convey the exclusion of warranty; and each file should have at least +the "copyright" line and a pointer to where the full notice is found. + + + Copyright (C) + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +Also add information on how to contact you by electronic and paper mail. + +If the program is interactive, make it output a short notice like this +when it starts in an interactive mode: + + Gnomovision version 69, Copyright (C) year name of author + Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. + This is free software, and you are welcome to redistribute it + under certain conditions; type `show c' for details. + +The hypothetical commands `show w' and `show c' should show the appropriate +parts of the General Public License. Of course, the commands you use may +be called something other than `show w' and `show c'; they could even be +mouse-clicks or menu items--whatever suits your program. + +You should also get your employer (if you work as a programmer) or your +school, if any, to sign a "copyright disclaimer" for the program, if +necessary. Here is a sample; alter the names: + + Yoyodyne, Inc., hereby disclaims all copyright interest in the program + `Gnomovision' (which makes passes at compilers) written by James Hacker. + + , 1 April 1989 + Ty Coon, President of Vice + +This General Public License does not permit incorporating your program into +proprietary programs. If your program is a subroutine library, you may +consider it more useful to permit linking proprietary applications with the +library. If this is what you want to do, use the GNU Lesser General +Public License instead of this License. diff --git a/tools/gator/daemon/CPUFreqDriver.cpp b/tools/gator/daemon/CPUFreqDriver.cpp deleted file mode 100644 index 41f9d6f2b3f4..000000000000 --- a/tools/gator/daemon/CPUFreqDriver.cpp +++ /dev/null @@ -1,58 +0,0 @@ -/** - * Copyright (C) ARM Limited 2013-2014. All rights reserved. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ - -#include "CPUFreqDriver.h" - -#include "Buffer.h" -#include "DriverSource.h" -#include "Logging.h" -#include "SessionData.h" - -CPUFreqDriver::CPUFreqDriver() : mPrev() { -} - -CPUFreqDriver::~CPUFreqDriver() { -} - -void CPUFreqDriver::readEvents(mxml_node_t *const) { - // Only for use with perf - if (!gSessionData->perf.isSetup()) { - return; - } - - setCounters(new DriverCounter(getCounters(), strdup("Linux_power_cpu_freq"))); -} - -void CPUFreqDriver::read(Buffer *const buffer) { - char buf[64]; - const DriverCounter *const counter = getCounters(); - if ((counter == NULL) || !counter->isEnabled()) { - return; - } - - const int key = getCounters()->getKey(); - bool resetCores = false; - for (int i = 0; i < gSessionData->mCores; ++i) { - snprintf(buf, sizeof(buf), "/sys/devices/system/cpu/cpu%i/cpufreq/cpuinfo_cur_freq", i); - int64_t freq; - if (DriverSource::readInt64Driver(buf, &freq) != 0) { - freq = 0; - } - if (mPrev[i] != freq) { - mPrev[i] = freq; - // Change cores - buffer->event64(2, i); - resetCores = true; - buffer->event64(key, 1000*freq); - } - } - if (resetCores) { - // Revert cores, UserSpaceSource is all on core 0 - buffer->event64(2, 0); - } -} diff --git a/tools/gator/daemon/CPUFreqDriver.h b/tools/gator/daemon/CPUFreqDriver.h deleted file mode 100644 index ad8c9aaa9e7d..000000000000 --- a/tools/gator/daemon/CPUFreqDriver.h +++ /dev/null @@ -1,34 +0,0 @@ -/** - * Copyright (C) ARM Limited 2013-2014. All rights reserved. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ - -#ifndef CPUFREQDRIVER_H -#define CPUFREQDRIVER_H - -#include "Config.h" -#include "Driver.h" - -class CPUFreqDriver : public PolledDriver { -private: - typedef PolledDriver super; - -public: - CPUFreqDriver(); - ~CPUFreqDriver(); - - void readEvents(mxml_node_t *const root); - void read(Buffer *const buffer); - -private: - int64_t mPrev[NR_CPUS]; - - // Intentionally unimplemented - CPUFreqDriver(const CPUFreqDriver &); - CPUFreqDriver &operator=(const CPUFreqDriver &); -}; - -#endif // CPUFREQDRIVER_H diff --git a/tools/gator/daemon/CapturedXML.cpp b/tools/gator/daemon/CapturedXML.cpp index 0b5802c893bb..1854c77dcb0e 100644 --- a/tools/gator/daemon/CapturedXML.cpp +++ b/tools/gator/daemon/CapturedXML.cpp @@ -1,5 +1,5 @@ /** - * Copyright (C) ARM Limited 2010-2014. All rights reserved. + * Copyright (C) ARM Limited 2010-2015. All rights reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -34,7 +34,6 @@ mxml_node_t* CapturedXML::getTree(bool includeTime) { mxmlElementSetAttr(captured, "version", "1"); if (gSessionData->perf.isSetup()) { mxmlElementSetAttr(captured, "type", "Perf"); - mxmlElementSetAttr(captured, "perf_beta", "yes"); } mxmlElementSetAttrf(captured, "protocol", "%d", PROTOCOL_VERSION); if (includeTime) { // Send the following only after the capture is complete @@ -98,7 +97,7 @@ void CapturedXML::write(char* path) { char* xml = getXML(true); if (util->writeToDisk(file, xml) < 0) { - logg->logError(__FILE__, __LINE__, "Error writing %s\nPlease verify the path.", file); + logg->logError("Error writing %s\nPlease verify the path.", file); handleException(); } diff --git a/tools/gator/daemon/CapturedXML.h b/tools/gator/daemon/CapturedXML.h index b704f6e53bb5..69d80c09b514 100644 --- a/tools/gator/daemon/CapturedXML.h +++ b/tools/gator/daemon/CapturedXML.h @@ -1,5 +1,5 @@ /** - * Copyright (C) ARM Limited 2010-2014. All rights reserved. + * Copyright (C) ARM Limited 2010-2015. All rights reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as diff --git a/tools/gator/daemon/Child.cpp b/tools/gator/daemon/Child.cpp index 6b5bbb3bf6af..a19e9cf86805 100644 --- a/tools/gator/daemon/Child.cpp +++ b/tools/gator/daemon/Child.cpp @@ -1,5 +1,5 @@ /** - * Copyright (C) ARM Limited 2010-2014. All rights reserved. + * Copyright (C) ARM Limited 2010-2015. All rights reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -150,13 +150,12 @@ static void *senderThread(void *) { prctl(PR_SET_NAME, (unsigned long)&"gatord-sender", 0, 0, 0); sem_wait(&haltPipeline); - while (!primarySource->isDone() || - !externalSource->isDone() || + while (!externalSource->isDone() || (userSpaceSource != NULL && !userSpaceSource->isDone()) || - (ftraceSource != NULL && !ftraceSource->isDone())) { + (ftraceSource != NULL && !ftraceSource->isDone()) || + !primarySource->isDone()) { sem_wait(&senderSem); - primarySource->write(sender); externalSource->write(sender); if (userSpaceSource != NULL) { userSpaceSource->write(sender); @@ -164,6 +163,7 @@ static void *senderThread(void *) { if (ftraceSource != NULL) { ftraceSource->write(sender); } + primarySource->write(sender); } // write end-of-capture sequence @@ -232,7 +232,7 @@ void Child::run() { sender = new Sender(socket); if (mNumConnections > 1) { - logg->logError(__FILE__, __LINE__, "Session already in progress"); + logg->logError("Session already in progress"); handleException(); } @@ -267,7 +267,7 @@ void Child::run() { char* xmlString; xmlString = util->readFromDisk(gSessionData->mSessionXMLPath); if (xmlString == 0) { - logg->logError(__FILE__, __LINE__, "Unable to read session xml file: %s", gSessionData->mSessionXMLPath); + logg->logError("Unable to read session xml file: %s", gSessionData->mSessionXMLPath); handleException(); } gSessionData->parseSessionXML(xmlString); @@ -280,16 +280,27 @@ void Child::run() { } if (gSessionData->kmod.isMaliCapture() && (gSessionData->mSampleRate == 0)) { - logg->logError(__FILE__, __LINE__, "Mali counters are not supported with Sample Rate: None."); + logg->logError("Mali counters are not supported with Sample Rate: None."); handleException(); } + // Initialize ftrace source before child as it's slow and dependens on nothing else + // If initialized later, us gator with ftrace has time sync issues + if (gSessionData->ftraceDriver.countersEnabled()) { + ftraceSource = new FtraceSource(&senderSem); + if (!ftraceSource->prepare()) { + logg->logError("Unable to prepare userspace source for capture"); + handleException(); + } + ftraceSource->start(); + } + // Must be after session XML is parsed if (!primarySource->prepare()) { if (gSessionData->perf.isSetup()) { - logg->logError(__FILE__, __LINE__, "Unable to prepare gator driver for capture"); + logg->logError("Unable to communicate with the perf API, please ensure that CONFIG_TRACING and CONFIG_CONTEXT_SWITCH_TRACER are enabled. Please refer to README_Streamline.txt for more information."); } else { - logg->logError(__FILE__, __LINE__, "Unable to communicate with the perf API, please ensure that CONFIG_TRACING and CONFIG_CONTEXT_SWITCH_TRACER are enabled. Please refer to README_Streamline.txt for more information."); + logg->logError("Unable to prepare gator driver for capture"); } handleException(); } @@ -300,7 +311,7 @@ void Child::run() { // Must be initialized before senderThread is started as senderThread checks externalSource externalSource = new ExternalSource(&senderSem); if (!externalSource->prepare()) { - logg->logError(__FILE__, __LINE__, "Unable to prepare external source for capture"); + logg->logError("Unable to prepare external source for capture"); handleException(); } externalSource->start(); @@ -324,21 +335,12 @@ void Child::run() { if (startUSSource) { userSpaceSource = new UserSpaceSource(&senderSem); if (!userSpaceSource->prepare()) { - logg->logError(__FILE__, __LINE__, "Unable to prepare userspace source for capture"); + logg->logError("Unable to prepare userspace source for capture"); handleException(); } userSpaceSource->start(); } - if (gSessionData->ftraceDriver.countersEnabled()) { - ftraceSource = new FtraceSource(&senderSem); - if (!ftraceSource->prepare()) { - logg->logError(__FILE__, __LINE__, "Unable to prepare userspace source for capture"); - handleException(); - } - ftraceSource->start(); - } - if (gSessionData->mAllowCommands && (gSessionData->mCaptureCommand != NULL)) { pthread_t thread; if (pthread_create(&thread, NULL, commandThread, NULL)) { @@ -347,7 +349,7 @@ void Child::run() { } if (!thread_creation_success) { - logg->logError(__FILE__, __LINE__, "Failed to create gator threads"); + logg->logError("Failed to create gator threads"); handleException(); } @@ -357,6 +359,7 @@ void Child::run() { // Start profiling primarySource->run(); + // Wait for the other threads to exit if (ftraceSource != NULL) { ftraceSource->join(); } @@ -364,8 +367,6 @@ void Child::run() { userSpaceSource->join(); } externalSource->join(); - - // Wait for the other threads to exit pthread_join(senderThreadID, NULL); // Shutting down the connection should break the stop thread which is stalling on the socket recv() function diff --git a/tools/gator/daemon/Child.h b/tools/gator/daemon/Child.h index cc78202ceb5c..a6c54db70a70 100644 --- a/tools/gator/daemon/Child.h +++ b/tools/gator/daemon/Child.h @@ -1,5 +1,5 @@ /** - * Copyright (C) ARM Limited 2010-2014. All rights reserved. + * Copyright (C) ARM Limited 2010-2015. All rights reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as diff --git a/tools/gator/daemon/Command.cpp b/tools/gator/daemon/Command.cpp index 28d73cf5a905..0a6e3b9901ee 100644 --- a/tools/gator/daemon/Command.cpp +++ b/tools/gator/daemon/Command.cpp @@ -1,5 +1,5 @@ /** - * Copyright (C) ARM Limited 2014. All rights reserved. + * Copyright (C) ARM Limited 2014-2015. All rights reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -39,7 +39,7 @@ static int getUid(const char *const name, char *const shPath, const char *const const int pid = fork(); if (pid < 0) { - logg->logError(__FILE__, __LINE__, "fork failed"); + logg->logError("fork failed"); handleException(); } if (pid == 0) { @@ -94,7 +94,7 @@ void *commandThread(void *) { const char *const name = gSessionData->mCaptureUser == NULL ? "nobody" : gSessionData->mCaptureUser; const int uid = getUid(name); if (uid < 0) { - logg->logError(__FILE__, __LINE__, "Unable to lookup the user %s, please double check that the user exists", name); + logg->logError("Unable to look up the user %s, please double check that the user exists", name); handleException(); } @@ -103,13 +103,13 @@ void *commandThread(void *) { char buf[128]; int pipefd[2]; if (pipe_cloexec(pipefd) != 0) { - logg->logError(__FILE__, __LINE__, "pipe failed"); + logg->logError("pipe failed"); handleException(); } const int pid = fork(); if (pid < 0) { - logg->logError(__FILE__, __LINE__, "fork failed"); + logg->logError("fork failed"); handleException(); } if (pid == 0) { @@ -163,7 +163,7 @@ void *commandThread(void *) { close(pipefd[1]); const ssize_t bytes = read(pipefd[0], buf, sizeof(buf)); if (bytes > 0) { - logg->logError(__FILE__, __LINE__, buf); + logg->logError("%s", buf); handleException(); } close(pipefd[0]); diff --git a/tools/gator/daemon/Command.h b/tools/gator/daemon/Command.h index 17244b7aaebc..2838adcec661 100644 --- a/tools/gator/daemon/Command.h +++ b/tools/gator/daemon/Command.h @@ -1,5 +1,5 @@ /** - * Copyright (C) ARM Limited 2014. All rights reserved. + * Copyright (C) ARM Limited 2014-2015. All rights reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as diff --git a/tools/gator/daemon/Config.h b/tools/gator/daemon/Config.h index bee383a1c797..eb31556e6426 100644 --- a/tools/gator/daemon/Config.h +++ b/tools/gator/daemon/Config.h @@ -1,5 +1,5 @@ /** - * Copyright (C) ARM Limited 2010-2014. All rights reserved. + * Copyright (C) ARM Limited 2010-2015. All rights reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as diff --git a/tools/gator/daemon/ConfigurationXML.cpp b/tools/gator/daemon/ConfigurationXML.cpp index 6590dd389196..be224a4f2b1f 100644 --- a/tools/gator/daemon/ConfigurationXML.cpp +++ b/tools/gator/daemon/ConfigurationXML.cpp @@ -1,5 +1,5 @@ /** - * Copyright (C) ARM Limited 2010-2014. All rights reserved. + * Copyright (C) ARM Limited 2010-2015. All rights reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -106,7 +106,7 @@ void ConfigurationXML::validate(void) { const Counter & counter = gSessionData->mCounters[i]; if (counter.isEnabled()) { if (strcmp(counter.getType(), "") == 0) { - logg->logError(__FILE__, __LINE__, "Invalid required attribute in configuration.xml:\n counter=\"%s\"\n event=%d\n", counter.getType(), counter.getEvent()); + logg->logError("Invalid required attribute in configuration.xml:\n counter=\"%s\"\n event=%d\n", counter.getType(), counter.getEvent()); handleException(); } @@ -116,7 +116,7 @@ void ConfigurationXML::validate(void) { if (counter2.isEnabled()) { // check if the types are the same if (strcmp(counter.getType(), counter2.getType()) == 0) { - logg->logError(__FILE__, __LINE__, "Duplicate performance counter type in configuration.xml: %s", counter.getType()); + logg->logError("Duplicate performance counter type in configuration.xml: %s", counter.getType()); handleException(); } } @@ -169,7 +169,7 @@ void ConfigurationXML::configurationTag(mxml_node_t *node) { for (Driver *driver = Driver::getHead(); driver != NULL; driver = driver->getNext()) { if (driver->claimCounter(counter)) { if (counter.getDriver() != NULL) { - logg->logError(__FILE__, __LINE__, "More than one driver has claimed %s:%i", counter.getType(), counter.getEvent()); + logg->logError("More than one driver has claimed %s:%i", counter.getType(), counter.getEvent()); handleException(); } counter.setDriver(driver); @@ -210,7 +210,7 @@ void ConfigurationXML::remove() { getPath(path); if (::remove(path) != 0) { - logg->logError(__FILE__, __LINE__, "Invalid configuration.xml file detected and unable to delete it. To resolve, delete configuration.xml on disk"); + logg->logError("Invalid configuration.xml file detected and unable to delete it. To resolve, delete configuration.xml on disk"); handleException(); } logg->logMessage("Invalid configuration.xml file detected and removed"); diff --git a/tools/gator/daemon/ConfigurationXML.h b/tools/gator/daemon/ConfigurationXML.h index efa415e508b6..a986ce99381b 100644 --- a/tools/gator/daemon/ConfigurationXML.h +++ b/tools/gator/daemon/ConfigurationXML.h @@ -1,5 +1,5 @@ /** - * Copyright (C) ARM Limited 2010-2014. All rights reserved. + * Copyright (C) ARM Limited 2010-2015. All rights reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as diff --git a/tools/gator/daemon/Counter.h b/tools/gator/daemon/Counter.h index 5202aa046362..a4c22f571342 100644 --- a/tools/gator/daemon/Counter.h +++ b/tools/gator/daemon/Counter.h @@ -1,5 +1,5 @@ /** - * Copyright (C) ARM Limited 2013-2014. All rights reserved. + * Copyright (C) ARM Limited 2013-2015. All rights reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as diff --git a/tools/gator/daemon/DiskIODriver.cpp b/tools/gator/daemon/DiskIODriver.cpp index 5deb0f375f3a..af62bb9a95bd 100644 --- a/tools/gator/daemon/DiskIODriver.cpp +++ b/tools/gator/daemon/DiskIODriver.cpp @@ -1,5 +1,5 @@ /** - * Copyright (C) ARM Limited 2013-2014. All rights reserved. + * Copyright (C) ARM Limited 2013-2015. All rights reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -67,7 +67,7 @@ void DiskIODriver::doRead() { } if (!mBuf.read("/proc/diskstats")) { - logg->logError(__FILE__, __LINE__, "Unable to read /proc/diskstats"); + logg->logError("Unable to read /proc/diskstats"); handleException(); } @@ -76,9 +76,9 @@ void DiskIODriver::doRead() { char *lastName = NULL; int lastNameLen = -1; - char *start = mBuf.getBuf(); - while (*start != '\0') { - char *end = strchr(start, '\n'); + char *line = mBuf.getBuf(); + while (*line != '\0') { + char *end = strchr(line, '\n'); if (end != NULL) { *end = '\0'; } @@ -87,15 +87,15 @@ void DiskIODriver::doRead() { int nameEnd = -1; int64_t readBytes = -1; int64_t writeBytes = -1; - const int count = sscanf(start, "%*d %*d %n%*s%n %*u %*u %" SCNu64 " %*u %*u %*u %" SCNu64, &nameStart, &nameEnd, &readBytes, &writeBytes); + const int count = sscanf(line, "%*d %*d %n%*s%n %*u %*u %" SCNu64 " %*u %*u %*u %" SCNu64, &nameStart, &nameEnd, &readBytes, &writeBytes); if (count != 2) { - logg->logError(__FILE__, __LINE__, "Unable to parse /proc/diskstats"); + logg->logError("Unable to parse /proc/diskstats"); handleException(); } // Skip partitions which are identified if the name is a substring of the last non-partition - if ((lastName == NULL) || (strncmp(lastName, start + nameStart, lastNameLen) != 0)) { - lastName = start + nameStart; + if ((lastName == NULL) || (strncmp(lastName, line + nameStart, lastNameLen) != 0)) { + lastName = line + nameStart; lastNameLen = nameEnd - nameStart; mReadBytes += readBytes; mWriteBytes += writeBytes; @@ -104,7 +104,7 @@ void DiskIODriver::doRead() { if (end == NULL) { break; } - start = end + 1; + line = end + 1; } } diff --git a/tools/gator/daemon/DiskIODriver.h b/tools/gator/daemon/DiskIODriver.h index d0db18c77d04..6ecda5afc25a 100644 --- a/tools/gator/daemon/DiskIODriver.h +++ b/tools/gator/daemon/DiskIODriver.h @@ -1,5 +1,5 @@ /** - * Copyright (C) ARM Limited 2013-2014. All rights reserved. + * Copyright (C) ARM Limited 2013-2015. All rights reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as diff --git a/tools/gator/daemon/Driver.cpp b/tools/gator/daemon/Driver.cpp index 275da31c7a0d..72c731424bde 100644 --- a/tools/gator/daemon/Driver.cpp +++ b/tools/gator/daemon/Driver.cpp @@ -1,5 +1,5 @@ /** - * Copyright (C) ARM Limited 2013-2014. All rights reserved. + * Copyright (C) ARM Limited 2013-2015. All rights reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as diff --git a/tools/gator/daemon/Driver.h b/tools/gator/daemon/Driver.h index 72870e3dbca1..19ec12775a39 100644 --- a/tools/gator/daemon/Driver.h +++ b/tools/gator/daemon/Driver.h @@ -1,5 +1,5 @@ /** - * Copyright (C) ARM Limited 2013-2014. All rights reserved. + * Copyright (C) ARM Limited 2013-2015. All rights reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as diff --git a/tools/gator/daemon/DriverSource.cpp b/tools/gator/daemon/DriverSource.cpp index 7f299b646952..34920cee92fb 100644 --- a/tools/gator/daemon/DriverSource.cpp +++ b/tools/gator/daemon/DriverSource.cpp @@ -1,5 +1,5 @@ /** - * Copyright (C) ARM Limited 2010-2014. All rights reserved. + * Copyright (C) ARM Limited 2010-2015. All rights reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -32,7 +32,7 @@ DriverSource::DriverSource(sem_t *senderSem, sem_t *startProfile) : mBuffer(NULL mBuffer = new Buffer(0, FRAME_PERF_ATTRS, 4*1024*1024, senderSem); if (readIntDriver("/dev/gator/version", &driver_version) == -1) { - logg->logError(__FILE__, __LINE__, "Error reading gator driver version"); + logg->logError("Error reading gator driver version"); handleException(); } @@ -40,7 +40,7 @@ DriverSource::DriverSource(sem_t *senderSem, sem_t *startProfile) : mBuffer(NULL if (driver_version != PROTOCOL_VERSION) { if ((driver_version > PROTOCOL_DEV) || (PROTOCOL_VERSION > PROTOCOL_DEV)) { // One of the mismatched versions is development version - logg->logError(__FILE__, __LINE__, + logg->logError( "DEVELOPMENT BUILD MISMATCH: gator driver version \"%d\" is not in sync with gator daemon version \"%d\".\n" ">> The following must be synchronized from engineering repository:\n" ">> * gator driver\n" @@ -49,7 +49,7 @@ DriverSource::DriverSource(sem_t *senderSem, sem_t *startProfile) : mBuffer(NULL handleException(); } else { // Release version mismatch - logg->logError(__FILE__, __LINE__, + logg->logError( "gator driver version \"%d\" is different than gator daemon version \"%d\".\n" ">> Please upgrade the driver and daemon to the latest versions.", driver_version, PROTOCOL_VERSION); handleException(); @@ -58,7 +58,7 @@ DriverSource::DriverSource(sem_t *senderSem, sem_t *startProfile) : mBuffer(NULL int enable = -1; if (readIntDriver("/dev/gator/enable", &enable) != 0 || enable != 0) { - logg->logError(__FILE__, __LINE__, "Driver already enabled, possibly a session is already in progress."); + logg->logError("Driver already enabled, possibly a session is already in progress."); handleException(); } @@ -68,7 +68,7 @@ DriverSource::DriverSource(sem_t *senderSem, sem_t *startProfile) : mBuffer(NULL } if (readIntDriver("/dev/gator/buffer_size", &mBufferSize) || mBufferSize <= 0) { - logg->logError(__FILE__, __LINE__, "Unable to read the driver buffer size"); + logg->logError("Unable to read the driver buffer size"); handleException(); } } @@ -99,10 +99,11 @@ void DriverSource::bootstrapThread() { DynBuf printb; DynBuf b1; DynBuf b2; - const uint64_t currTime = getTime(); + // MonotonicStarted may not be not assigned yet + const uint64_t currTime = 0;//getTime() - gSessionData->mMonotonicStarted; if (!readProcComms(currTime, mBuffer, &printb, &b1, &b2)) { - logg->logError(__FILE__, __LINE__, "readProcComms failed"); + logg->logError("readProcComms failed"); handleException(); } @@ -124,33 +125,33 @@ void DriverSource::run() { // Set the maximum backtrace depth if (writeReadDriver("/dev/gator/backtrace_depth", &gSessionData->mBacktraceDepth)) { - logg->logError(__FILE__, __LINE__, "Unable to set the driver backtrace depth"); + logg->logError("Unable to set the driver backtrace depth"); handleException(); } // open the buffer which calls userspace_buffer_open() in the driver mBufferFD = open("/dev/gator/buffer", O_RDONLY | O_CLOEXEC); if (mBufferFD < 0) { - logg->logError(__FILE__, __LINE__, "The gator driver did not set up properly. Please view the linux console or dmesg log for more information on the failure."); + logg->logError("The gator driver did not set up properly. Please view the linux console or dmesg log for more information on the failure."); handleException(); } // set the tick rate of the profiling timer if (writeReadDriver("/dev/gator/tick", &gSessionData->mSampleRate) != 0) { - logg->logError(__FILE__, __LINE__, "Unable to set the driver tick"); + logg->logError("Unable to set the driver tick"); handleException(); } // notify the kernel of the response type int response_type = gSessionData->mLocalCapture ? 0 : RESPONSE_APC_DATA; if (writeDriver("/dev/gator/response_type", response_type)) { - logg->logError(__FILE__, __LINE__, "Unable to write the response type"); + logg->logError("Unable to write the response type"); handleException(); } // Set the live rate if (writeReadDriver("/dev/gator/live_rate", &gSessionData->mLiveRate)) { - logg->logError(__FILE__, __LINE__, "Unable to set the driver live rate"); + logg->logError("Unable to set the driver live rate"); handleException(); } @@ -158,7 +159,7 @@ void DriverSource::run() { // This command makes the driver start profiling by calling gator_op_start() in the driver if (writeDriver("/dev/gator/enable", "1") != 0) { - logg->logError(__FILE__, __LINE__, "The gator driver did not start properly. Please view the linux console or dmesg log for more information on the failure."); + logg->logError("The gator driver did not start properly. Please view the linux console or dmesg log for more information on the failure."); handleException(); } @@ -168,7 +169,7 @@ void DriverSource::run() { pthread_t bootstrapThreadID; if (pthread_create(&bootstrapThreadID, NULL, bootstrapThreadStatic, this) != 0) { - logg->logError(__FILE__, __LINE__, "Unable to start the gator_bootstrap thread"); + logg->logError("Unable to start the gator_bootstrap thread"); handleException(); } @@ -190,7 +191,7 @@ void DriverSource::run() { // In one shot mode, stop collection once all the buffers are filled if (gSessionData->mOneShot && gSessionData->mSessionIsActive) { if (bytesCollected == -1 || mFifo->willFill(bytesCollected)) { - logg->logMessage("One shot"); + logg->logMessage("One shot (gator.ko)"); child->endSession(); } } diff --git a/tools/gator/daemon/DriverSource.h b/tools/gator/daemon/DriverSource.h index ec27b0815bbf..32d983d6d036 100644 --- a/tools/gator/daemon/DriverSource.h +++ b/tools/gator/daemon/DriverSource.h @@ -1,5 +1,5 @@ /** - * Copyright (C) ARM Limited 2010-2014. All rights reserved. + * Copyright (C) ARM Limited 2010-2015. All rights reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as diff --git a/tools/gator/daemon/DynBuf.cpp b/tools/gator/daemon/DynBuf.cpp index df20713ad63c..690cbcb94293 100644 --- a/tools/gator/daemon/DynBuf.cpp +++ b/tools/gator/daemon/DynBuf.cpp @@ -1,5 +1,5 @@ /** - * Copyright (C) ARM Limited 2013-2014. All rights reserved. + * Copyright (C) ARM Limited 2013-2015. All rights reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -42,7 +42,7 @@ bool DynBuf::read(const char *const path) { const int fd = open(path, O_RDONLY | O_CLOEXEC); if (fd < 0) { - logg->logMessage("%s(%s:%i): open failed", __FUNCTION__, __FILE__, __LINE__); + logg->logMessage("open failed"); return false; } @@ -52,14 +52,14 @@ bool DynBuf::read(const char *const path) { const size_t minCapacity = length + MIN_BUFFER_FREE + 1; if (capacity < minCapacity) { if (resize(minCapacity) != 0) { - logg->logMessage("%s(%s:%i): DynBuf::resize failed", __FUNCTION__, __FILE__, __LINE__); + logg->logMessage("DynBuf::resize failed"); goto fail; } } const ssize_t bytes = ::read(fd, buf + length, capacity - length - 1); if (bytes < 0) { - logg->logMessage("%s(%s:%i): read failed", __FUNCTION__, __FILE__, __LINE__); + logg->logMessage("read failed"); goto fail; } else if (bytes == 0) { break; @@ -105,7 +105,7 @@ bool DynBuf::printf(const char *format, ...) { if (capacity <= 0) { if (resize(2 * MIN_BUFFER_FREE) != 0) { - logg->logMessage("%s(%s:%i): DynBuf::resize failed", __FUNCTION__, __FILE__, __LINE__); + logg->logMessage("DynBuf::resize failed"); return false; } } @@ -114,13 +114,13 @@ bool DynBuf::printf(const char *format, ...) { int bytes = vsnprintf(buf, capacity, format, ap); va_end(ap); if (bytes < 0) { - logg->logMessage("%s(%s:%i): fsnprintf failed", __FUNCTION__, __FILE__, __LINE__); + logg->logMessage("fsnprintf failed"); return false; } if (static_cast(bytes) > capacity) { if (resize(bytes + 1) != 0) { - logg->logMessage("%s(%s:%i): DynBuf::resize failed", __FUNCTION__, __FILE__, __LINE__); + logg->logMessage("DynBuf::resize failed"); return false; } @@ -128,7 +128,7 @@ bool DynBuf::printf(const char *format, ...) { bytes = vsnprintf(buf, capacity, format, ap); va_end(ap); if (bytes < 0) { - logg->logMessage("%s(%s:%i): fsnprintf failed", __FUNCTION__, __FILE__, __LINE__); + logg->logMessage("fsnprintf failed"); return false; } } diff --git a/tools/gator/daemon/DynBuf.h b/tools/gator/daemon/DynBuf.h index 2f4554ab2e49..da83cd65ca3e 100644 --- a/tools/gator/daemon/DynBuf.h +++ b/tools/gator/daemon/DynBuf.h @@ -1,5 +1,5 @@ /** - * Copyright (C) ARM Limited 2013-2014. All rights reserved. + * Copyright (C) ARM Limited 2013-2015. All rights reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as diff --git a/tools/gator/daemon/EventsXML.cpp b/tools/gator/daemon/EventsXML.cpp index d905bbabe988..cec08d5a6fff 100644 --- a/tools/gator/daemon/EventsXML.cpp +++ b/tools/gator/daemon/EventsXML.cpp @@ -1,5 +1,5 @@ /** - * Copyright (C) ARM Limited 2013-2014. All rights reserved. + * Copyright (C) ARM Limited 2013-2015. All rights reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -13,10 +13,35 @@ #include "OlyUtility.h" #include "SessionData.h" +class XMLList { +public: + XMLList(XMLList *const prev, mxml_node_t *const node) : mPrev(prev), mNode(node) {} + + XMLList *getPrev() { return mPrev; } + mxml_node_t *getNode() const { return mNode; } + void setNode(mxml_node_t *const node) { mNode = node; } + + static void free(XMLList *list) { + while (list != NULL) { + XMLList *prev = list->getPrev(); + delete list; + list = prev; + } + } + +private: + XMLList *const mPrev; + mxml_node_t *mNode; + + // Intentionally unimplemented + XMLList(const XMLList &); + XMLList &operator=(const XMLList &); +}; + mxml_node_t *EventsXML::getTree() { #include "events_xml.h" // defines and initializes char events_xml[] and int events_xml_len char path[PATH_MAX]; - mxml_node_t *xml; + mxml_node_t *xml = NULL; FILE *fl; // Avoid unused variable warning @@ -25,19 +50,147 @@ mxml_node_t *EventsXML::getTree() { // Load the provided or default events xml if (gSessionData->mEventsXMLPath) { strncpy(path, gSessionData->mEventsXMLPath, PATH_MAX); - } else { - util->getApplicationFullPath(path, PATH_MAX); - strncat(path, "events.xml", PATH_MAX - strlen(path) - 1); + fl = fopen(path, "r"); + if (fl) { + xml = mxmlLoadFile(NULL, fl, MXML_NO_CALLBACK); + fclose(fl); + } } - fl = fopen(path, "r"); - if (fl) { - xml = mxmlLoadFile(NULL, fl, MXML_NO_CALLBACK); - fclose(fl); - } else { + if (xml == NULL) { logg->logMessage("Unable to locate events.xml, using default"); xml = mxmlLoadString(NULL, (const char *)events_xml, MXML_NO_CALLBACK); } + // Append additional events XML + if (gSessionData->mEventsXMLAppend) { + fl = fopen(gSessionData->mEventsXMLAppend, "r"); + if (fl == NULL) { + logg->logError("Unable to open additional events XML %s", gSessionData->mEventsXMLAppend); + handleException(); + } + mxml_node_t *append = mxmlLoadFile(NULL, fl, MXML_NO_CALLBACK); + fclose(fl); + + mxml_node_t *events = mxmlFindElement(xml, xml, "events", NULL, NULL, MXML_DESCEND); + if (!events) { + logg->logError("Unable to find node in the events.xml, please ensure the first two lines of events XML starts with:\n" + "\n" + ""); + handleException(); + } + + XMLList *categoryList = NULL; + XMLList *eventList = NULL; + { + // Make list of all categories in xml + mxml_node_t *node = xml; + while (true) { + node = mxmlFindElement(node, xml, "category", NULL, NULL, MXML_DESCEND); + if (node == NULL) { + break; + } + categoryList = new XMLList(categoryList, node); + } + + // Make list of all events in xml + node = xml; + while (true) { + node = mxmlFindElement(node, xml, "event", NULL, NULL, MXML_DESCEND); + if (node == NULL) { + break; + } + eventList = new XMLList(eventList, node); + } + } + + // Handle events + for (mxml_node_t *node = mxmlFindElement(append, append, "event", NULL, NULL, MXML_DESCEND), + *next = mxmlFindElement(node, append, "event", NULL, NULL, MXML_DESCEND); + node != NULL; + node = next, next = mxmlFindElement(node, append, "event", NULL, NULL, MXML_DESCEND)) { + const char *const category = mxmlElementGetAttr(mxmlGetParent(node), "name"); + const char *const title = mxmlElementGetAttr(node, "title"); + const char *const name = mxmlElementGetAttr(node, "name"); + if (category == NULL || title == NULL || name == NULL) { + logg->logError("Not all event XML nodes have the required title and name and parent name attributes"); + handleException(); + } + + // Replace any duplicate events + for (XMLList *event = eventList; event != NULL; event = event->getPrev()) { + const char *const category2 = mxmlElementGetAttr(mxmlGetParent(event->getNode()), "name"); + const char *const title2 = mxmlElementGetAttr(event->getNode(), "title"); + const char *const name2 = mxmlElementGetAttr(event->getNode(), "name"); + if (category2 == NULL || title2 == NULL || name2 == NULL) { + logg->logError("Not all event XML nodes have the required title and name and parent name attributes"); + handleException(); + } + + if (strcmp(category, category2) == 0 && strcmp(title, title2) == 0 && strcmp(name, name2) == 0) { + logg->logMessage("Replacing counter %s %s: %s", category, title, name); + mxml_node_t *parent = mxmlGetParent(event->getNode()); + mxmlDelete(event->getNode()); + mxmlAdd(parent, MXML_ADD_AFTER, MXML_ADD_TO_PARENT, node); + event->setNode(node); + break; + } + } + } + + // Handle categories + for (mxml_node_t *node = strcmp(mxmlGetElement(append), "category") == 0 ? append : mxmlFindElement(append, append, "category", NULL, NULL, MXML_DESCEND), + *next = mxmlFindElement(node, append, "category", NULL, NULL, MXML_DESCEND); + node != NULL; + node = next, next = mxmlFindElement(node, append, "category", NULL, NULL, MXML_DESCEND)) { + // After replacing duplicate events, a category may be empty + if (mxmlGetFirstChild(node) == NULL) { + continue; + } + + const char *const name = mxmlElementGetAttr(node, "name"); + if (name == NULL) { + logg->logError("Not all event XML categories have the required name attribute"); + handleException(); + } + + // Merge identically named categories + bool merged = false; + for (XMLList *category = categoryList; category != NULL; category = category->getPrev()) { + const char *const name2 = mxmlElementGetAttr(category->getNode(), "name"); + if (name2 == NULL) { + logg->logError("Not all event XML categories have the required name attribute"); + handleException(); + } + + if (strcmp(name, name2) == 0) { + logg->logMessage("Merging category %s", name); + while (true) { + mxml_node_t *child = mxmlGetFirstChild(node); + if (child == NULL) { + break; + } + mxmlAdd(category->getNode(), MXML_ADD_AFTER, mxmlGetLastChild(category->getNode()), child); + } + merged = true; + break; + } + } + + if (merged) { + continue; + } + + // Add new categories + logg->logMessage("Appending category %s", name); + mxmlAdd(events, MXML_ADD_AFTER, mxmlGetLastChild(events), node); + } + + XMLList::free(eventList); + XMLList::free(categoryList); + + mxmlDelete(append); + } + return xml; } @@ -47,7 +200,9 @@ char *EventsXML::getXML() { // Add dynamic events from the drivers mxml_node_t *events = mxmlFindElement(xml, xml, "events", NULL, NULL, MXML_DESCEND); if (!events) { - logg->logError(__FILE__, __LINE__, "Unable to find node in the events.xml"); + logg->logError("Unable to find node in the events.xml, please ensure the first two lines of events XML are:\n" + "\n" + ""); handleException(); } for (Driver *driver = Driver::getHead(); driver != NULL; driver = driver->getNext()) { @@ -68,7 +223,7 @@ void EventsXML::write(const char *path) { char *buf = getXML(); if (util->writeToDisk(file, buf) < 0) { - logg->logError(__FILE__, __LINE__, "Error writing %s\nPlease verify the path.", file); + logg->logError("Error writing %s\nPlease verify the path.", file); handleException(); } diff --git a/tools/gator/daemon/EventsXML.h b/tools/gator/daemon/EventsXML.h index ff7a02fd3c78..2b38fa4364e0 100644 --- a/tools/gator/daemon/EventsXML.h +++ b/tools/gator/daemon/EventsXML.h @@ -1,5 +1,5 @@ /** - * Copyright (C) ARM Limited 2013-2014. All rights reserved. + * Copyright (C) ARM Limited 2013-2015. All rights reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -13,9 +13,16 @@ class EventsXML { public: + EventsXML() {} + mxml_node_t *getTree(); char *getXML(); void write(const char* path); + +private: + // Intentionally unimplemented + EventsXML(const EventsXML &); + EventsXML &operator=(const EventsXML &); }; #endif // EVENTS_XML diff --git a/tools/gator/daemon/ExternalSource.cpp b/tools/gator/daemon/ExternalSource.cpp index 8f5e6b684c53..8d71b6de3d5e 100644 --- a/tools/gator/daemon/ExternalSource.cpp +++ b/tools/gator/daemon/ExternalSource.cpp @@ -1,5 +1,5 @@ /** - * Copyright (C) ARM Limited 2010-2014. All rights reserved. + * Copyright (C) ARM Limited 2010-2015. All rights reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -12,10 +12,14 @@ #include #include +#include "Child.h" #include "Logging.h" #include "OlySocket.h" #include "SessionData.h" +extern Child *child; + +static const char STREAMLINE_ANNOTATE[] = "\0streamline-annotate"; static const char MALI_VIDEO[] = "\0mali-video"; static const char MALI_VIDEO_STARTUP[] = "\0mali-video-startup"; static const char MALI_VIDEO_V1[] = "MALI_VIDEO 1\n"; @@ -40,7 +44,7 @@ static bool setNonblock(const int fd) { return true; } -ExternalSource::ExternalSource(sem_t *senderSem) : mBuffer(0, FRAME_EXTERNAL, 128*1024, senderSem), mMonitor(), mMveStartupUds(MALI_VIDEO_STARTUP, sizeof(MALI_VIDEO_STARTUP)), mMaliStartupUds(MALI_GRAPHICS_STARTUP, sizeof(MALI_GRAPHICS_STARTUP)), mAnnotate(8083), mInterruptFd(-1), mMaliUds(-1), mMveUds(-1) { +ExternalSource::ExternalSource(sem_t *senderSem) : mBuffer(0, FRAME_EXTERNAL, 128*1024, senderSem), mMonitor(), mMveStartupUds(MALI_VIDEO_STARTUP, sizeof(MALI_VIDEO_STARTUP)), mMaliStartupUds(MALI_GRAPHICS_STARTUP, sizeof(MALI_GRAPHICS_STARTUP)), mAnnotate(8083), mAnnotateUds(STREAMLINE_ANNOTATE, sizeof(STREAMLINE_ANNOTATE), true), mInterruptFd(-1), mMaliUds(-1), mMveUds(-1) { sem_init(&mBufferSem, 0, 0); } @@ -49,18 +53,22 @@ ExternalSource::~ExternalSource() { void ExternalSource::waitFor(const int bytes) { while (mBuffer.bytesAvailable() <= bytes) { + if (gSessionData->mOneShot && gSessionData->mSessionIsActive) { + logg->logMessage("One shot (external)"); + child->endSession(); + } sem_wait(&mBufferSem); } } void ExternalSource::configureConnection(const int fd, const char *const handshake, size_t size) { if (!setNonblock(fd)) { - logg->logError(__FILE__, __LINE__, "Unable to set nonblock on fh"); + logg->logError("Unable to set nonblock on fh"); handleException(); } if (!mMonitor.add(fd)) { - logg->logError(__FILE__, __LINE__, "Unable to add fh to monitor"); + logg->logError("Unable to add fh to monitor"); handleException(); } @@ -68,7 +76,7 @@ void ExternalSource::configureConnection(const int fd, const char *const handsha waitFor(Buffer::MAXSIZE_PACK32 + size - 1); mBuffer.packInt(fd); mBuffer.writeBytes(handshake, size - 1); - mBuffer.commit(1); + mBuffer.commit(1, true); } bool ExternalSource::connectMali() { @@ -106,6 +114,7 @@ bool ExternalSource::prepare() { !setNonblock(mMveStartupUds.getFd()) || !mMonitor.add(mMveStartupUds.getFd()) || !setNonblock(mMaliStartupUds.getFd()) || !mMonitor.add(mMaliStartupUds.getFd()) || !setNonblock(mAnnotate.getFd()) || !mMonitor.add(mAnnotate.getFd()) || + !setNonblock(mAnnotateUds.getFd()) || !mMonitor.add(mAnnotateUds.getFd()) || false) { return false; } @@ -122,18 +131,21 @@ void ExternalSource::run() { prctl(PR_SET_NAME, (unsigned long)&"gatord-external", 0, 0, 0); if (pipe_cloexec(pipefd) != 0) { - logg->logError(__FILE__, __LINE__, "pipe failed"); + logg->logError("pipe failed"); handleException(); } mInterruptFd = pipefd[1]; if (!mMonitor.add(pipefd[0])) { - logg->logError(__FILE__, __LINE__, "Monitor::add failed"); + logg->logError("Monitor::add failed"); handleException(); } // Notify annotate clients to retry connecting to gatord - gSessionData->annotateListener.signal(); + uint64_t val = 1; + if (::write(gSessionData->mAnnotateStart, &val, sizeof(val)) != sizeof(val)) { + logg->logMessage("Writing to annotate pipe failed"); + } while (gSessionData->mSessionIsActive) { struct epoll_event events[16]; @@ -141,11 +153,11 @@ void ExternalSource::run() { while (sem_trywait(&mBufferSem) == 0); int ready = mMonitor.wait(events, ARRAY_LENGTH(events), -1); if (ready < 0) { - logg->logError(__FILE__, __LINE__, "Monitor::wait failed"); + logg->logError("Monitor::wait failed"); handleException(); } - const uint64_t currTime = getTime(); + const uint64_t currTime = getTime() - gSessionData->mMonotonicStarted; for (int i = 0; i < ready; ++i) { const int fd = events[i].data.fd; @@ -155,7 +167,7 @@ void ExternalSource::run() { // Don't read from this connection, establish a new connection to Mali-V500 close(client); if (!connectMve()) { - logg->logError(__FILE__, __LINE__, "Unable to configure incoming Mali video connection"); + logg->logError("Unable to configure incoming Mali video connection"); handleException(); } } else if (fd == mMaliStartupUds.getFd()) { @@ -164,13 +176,19 @@ void ExternalSource::run() { // Don't read from this connection, establish a new connection to Mali Graphics close(client); if (!connectMali()) { - logg->logError(__FILE__, __LINE__, "Unable to configure incoming Mali graphics connection"); + logg->logError("Unable to configure incoming Mali graphics connection"); handleException(); } } else if (fd == mAnnotate.getFd()) { int client = mAnnotate.acceptConnection(); if (!setNonblock(client) || !mMonitor.add(client)) { - logg->logError(__FILE__, __LINE__, "Unable to set socket options on incoming annotation connection"); + logg->logError("Unable to set socket options on incoming annotation connection"); + handleException(); + } + } else if (fd == mAnnotateUds.getFd()) { + int client = mAnnotateUds.acceptConnection(); + if (!setNonblock(client) || !mMonitor.add(client)) { + logg->logError("Unable to set socket options on incoming annotation connection"); handleException(); } } else if (fd == pipefd[0]) { @@ -190,28 +208,29 @@ void ExternalSource::run() { if (bytes < 0) { if (errno == EAGAIN) { // Nothing left to read - mBuffer.commit(currTime); + mBuffer.commit(currTime, true); break; } // Something else failed, close the socket - mBuffer.commit(currTime); + mBuffer.commit(currTime, true); mBuffer.packInt(-1); mBuffer.packInt(fd); - mBuffer.commit(currTime); + // Here and other commits, always force-flush the buffer as this frame don't work like others + mBuffer.commit(currTime, true); close(fd); break; } else if (bytes == 0) { // The other side is closed - mBuffer.commit(currTime); + mBuffer.commit(currTime, true); mBuffer.packInt(-1); mBuffer.packInt(fd); - mBuffer.commit(currTime); + mBuffer.commit(currTime, true); close(fd); break; } mBuffer.advanceWrite(bytes); - mBuffer.commit(currTime); + mBuffer.commit(currTime, true); // Short reads also mean nothing is left to read if (bytes < contiguous) { @@ -238,7 +257,7 @@ void ExternalSource::interrupt() { int8_t c = 0; // Write to the pipe to wake the monitor which will cause mSessionIsActive to be reread if (::write(mInterruptFd, &c, sizeof(c)) != sizeof(c)) { - logg->logError(__FILE__, __LINE__, "write failed"); + logg->logError("write failed"); handleException(); } } diff --git a/tools/gator/daemon/ExternalSource.h b/tools/gator/daemon/ExternalSource.h index 919e75e8a41a..25ae7cd4c4ab 100644 --- a/tools/gator/daemon/ExternalSource.h +++ b/tools/gator/daemon/ExternalSource.h @@ -1,5 +1,5 @@ /** - * Copyright (C) ARM Limited 2010-2014. All rights reserved. + * Copyright (C) ARM Limited 2010-2015. All rights reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -41,6 +41,7 @@ class ExternalSource : public Source { OlyServerSocket mMveStartupUds; OlyServerSocket mMaliStartupUds; OlyServerSocket mAnnotate; + OlyServerSocket mAnnotateUds; int mInterruptFd; int mMaliUds; int mMveUds; diff --git a/tools/gator/daemon/FSDriver.cpp b/tools/gator/daemon/FSDriver.cpp index dd8eb804dc99..afac9dfb6761 100644 --- a/tools/gator/daemon/FSDriver.cpp +++ b/tools/gator/daemon/FSDriver.cpp @@ -1,5 +1,5 @@ /** - * Copyright (C) ARM Limited 2014. All rights reserved. + * Copyright (C) ARM Limited 2014-2015. All rights reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -42,7 +42,7 @@ FSCounter::FSCounter(DriverCounter *next, char *name, char *path, const char *re if (result != 0) { char buf[128]; regerror(result, &mReg, buf, sizeof(buf)); - logg->logError(__FILE__, __LINE__, "Invalid regex '%s': %s", regex, buf); + logg->logError("Invalid regex '%s': %s", regex, buf); handleException(); } } @@ -79,21 +79,19 @@ int64_t FSCounter::read() { regmatch_t match[2]; int result = regexec(&mReg, buf, 2, match, 0); if (result != 0) { - regerror(result, &mReg, buf, sizeof(buf)); - logg->logError(__FILE__, __LINE__, "Parsing %s failed: %s", mPath, buf); - handleException(); + // No match + return 0; } if (match[1].rm_so < 0) { - logg->logError(__FILE__, __LINE__, "Parsing %s failed", mPath); - handleException(); - } - - errno = 0; - value = strtoll(buf + match[1].rm_so, NULL, 0); - if (errno != 0) { - logg->logError(__FILE__, __LINE__, "Parsing %s failed: %s", mPath, strerror(errno)); - handleException(); + value = 1; + } else { + errno = 0; + value = strtoll(buf + match[1].rm_so, NULL, 0); + if (errno != 0) { + logg->logError("Parsing %s failed: %s", mPath, strerror(errno)); + handleException(); + } } } else { if (DriverSource::readInt64Driver(mPath, &value) != 0) { @@ -103,7 +101,7 @@ int64_t FSCounter::read() { return value; fail: - logg->logError(__FILE__, __LINE__, "Unable to read %s", mPath); + logg->logError("Unable to read %s", mPath); handleException(); } @@ -126,7 +124,7 @@ void FSDriver::readEvents(mxml_node_t *const xml) { } if (counter[0] == '/') { - logg->logError(__FILE__, __LINE__, "Old style filesystem counter (%s) detected, please create a new unique counter value and move the filename into the path attribute, see events-Filesystem.xml for examples", counter); + logg->logError("Old style filesystem counter (%s) detected, please create a new unique counter value and move the filename into the path attribute, see events-Filesystem.xml for examples", counter); handleException(); } @@ -136,7 +134,7 @@ void FSDriver::readEvents(mxml_node_t *const xml) { const char *path = mxmlElementGetAttr(node, "path"); if (path == NULL) { - logg->logError(__FILE__, __LINE__, "The filesystem counter %s is missing the required path attribute", counter); + logg->logError("The filesystem counter %s is missing the required path attribute", counter); handleException(); } const char *regex = mxmlElementGetAttr(node, "regex"); diff --git a/tools/gator/daemon/FSDriver.h b/tools/gator/daemon/FSDriver.h index a7dc8b4df9dd..63a4e90a2b46 100644 --- a/tools/gator/daemon/FSDriver.h +++ b/tools/gator/daemon/FSDriver.h @@ -1,5 +1,5 @@ /** - * Copyright (C) ARM Limited 2014. All rights reserved. + * Copyright (C) ARM Limited 2014-2015. All rights reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as diff --git a/tools/gator/daemon/Fifo.cpp b/tools/gator/daemon/Fifo.cpp index 41275fd287b8..8d3b9ff8d385 100644 --- a/tools/gator/daemon/Fifo.cpp +++ b/tools/gator/daemon/Fifo.cpp @@ -1,5 +1,5 @@ /** - * Copyright (C) ARM Limited 2010-2014. All rights reserved. + * Copyright (C) ARM Limited 2010-2015. All rights reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -24,12 +24,12 @@ Fifo::Fifo(int singleBufferSize, int bufferSize, sem_t* readerSem) { mEnd = false; if (mBuffer == NULL) { - logg->logError(__FILE__, __LINE__, "failed to allocate %d bytes", bufferSize + singleBufferSize); + logg->logError("failed to allocate %d bytes", bufferSize + singleBufferSize); handleException(); } if (sem_init(&mWaitForSpaceSem, 0, 0)) { - logg->logError(__FILE__, __LINE__, "sem_init() failed"); + logg->logError("sem_init() failed"); handleException(); } } diff --git a/tools/gator/daemon/Fifo.h b/tools/gator/daemon/Fifo.h index 21c8d8580391..01fa11b2f2c4 100644 --- a/tools/gator/daemon/Fifo.h +++ b/tools/gator/daemon/Fifo.h @@ -1,5 +1,5 @@ /** - * Copyright (C) ARM Limited 2010-2014. All rights reserved. + * Copyright (C) ARM Limited 2010-2015. All rights reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as diff --git a/tools/gator/daemon/FtraceDriver.cpp b/tools/gator/daemon/FtraceDriver.cpp index b156f1c0b8b4..98bd0a5d9eec 100644 --- a/tools/gator/daemon/FtraceDriver.cpp +++ b/tools/gator/daemon/FtraceDriver.cpp @@ -1,5 +1,5 @@ /** - * Copyright (C) ARM Limited 2014. All rights reserved. + * Copyright (C) ARM Limited 2014-2015. All rights reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -9,56 +9,80 @@ #include "FtraceDriver.h" #include +#include +#include "DriverSource.h" #include "Logging.h" +#include "Setup.h" class FtraceCounter : public DriverCounter { public: - FtraceCounter(DriverCounter *next, char *name, const char *regex); + FtraceCounter(DriverCounter *next, char *name, const char *regex, const char *enable); ~FtraceCounter(); + void prepare(); int read(const char *const line, int64_t *values); + void stop(); private: - regex_t reg; + regex_t mReg; + char *const mEnable; + int mWasEnabled; // Intentionally unimplemented FtraceCounter(const FtraceCounter &); FtraceCounter &operator=(const FtraceCounter &); }; -FtraceCounter::FtraceCounter(DriverCounter *next, char *name, const char *regex) : DriverCounter(next, name) { - int result = regcomp(®, regex, REG_EXTENDED); +FtraceCounter::FtraceCounter(DriverCounter *next, char *name, const char *regex, const char *enable) : DriverCounter(next, name), mEnable(enable == NULL ? NULL : strdup(enable)) { + int result = regcomp(&mReg, regex, REG_EXTENDED); if (result != 0) { char buf[128]; - regerror(result, ®, buf, sizeof(buf)); - logg->logError(__FILE__, __LINE__, "Invalid regex '%s': %s", regex, buf); + regerror(result, &mReg, buf, sizeof(buf)); + logg->logError("Invalid regex '%s': %s", regex, buf); handleException(); } } FtraceCounter::~FtraceCounter() { - regfree(®); + regfree(&mReg); + if (mEnable != NULL) { + free(mEnable); + } +} + +void FtraceCounter::prepare() { + if (mEnable == NULL) { + return; + } + + char buf[1<<10]; + snprintf(buf, sizeof(buf), "/sys/kernel/debug/tracing/events/%s/enable", mEnable); + if ((DriverSource::readIntDriver(buf, &mWasEnabled) != 0) || + (DriverSource::writeDriver(buf, 1) != 0)) { + logg->logError("Unable to read or write to %s", buf); + handleException(); + } } int FtraceCounter::read(const char *const line, int64_t *values) { regmatch_t match[2]; - int result = regexec(®, line, 2, match, 0); + int result = regexec(&mReg, line, 2, match, 0); if (result != 0) { // No match return 0; } + int64_t value; if (match[1].rm_so < 0) { - logg->logError(__FILE__, __LINE__, "Parsing %s failed", getName()); - handleException(); - } - - errno = 0; - int64_t value = strtoll(line + match[1].rm_so, NULL, 0); - if (errno != 0) { - logg->logError(__FILE__, __LINE__, "Parsing %s failed: %s", getName(), strerror(errno)); - handleException(); + value = 1; + } else { + errno = 0; + value = strtoll(line + match[1].rm_so, NULL, 0); + if (errno != 0) { + logg->logError("Parsing %s failed: %s", getName(), strerror(errno)); + handleException(); + } } values[0] = getKey(); @@ -67,6 +91,16 @@ int FtraceCounter::read(const char *const line, int64_t *values) { return 1; } +void FtraceCounter::stop() { + if (mEnable == NULL) { + return; + } + + char buf[1<<10]; + snprintf(buf, sizeof(buf), "/sys/kernel/debug/tracing/events/%s/enable", mEnable); + DriverSource::writeDriver(buf, mWasEnabled); +} + FtraceDriver::FtraceDriver() : mValues(NULL) { } @@ -75,6 +109,19 @@ FtraceDriver::~FtraceDriver() { } void FtraceDriver::readEvents(mxml_node_t *const xml) { + // Check the kernel version + int release[3]; + if (!getLinuxVersion(release)) { + logg->logError("getLinuxVersion failed"); + handleException(); + } + + // The perf clock was added in 3.10 + if (KERNEL_VERSION(release[0], release[1], release[2]) < KERNEL_VERSION(3, 10, 0)) { + logg->logMessage("Unsupported kernel version, to use ftrace please upgrade to Linux 3.10 or later"); + return; + } + mxml_node_t *node = xml; int count = 0; while (true) { @@ -93,16 +140,37 @@ void FtraceDriver::readEvents(mxml_node_t *const xml) { const char *regex = mxmlElementGetAttr(node, "regex"); if (regex == NULL) { - logg->logError(__FILE__, __LINE__, "The regex counter %s is missing the required regex attribute", counter); + logg->logError("The regex counter %s is missing the required regex attribute", counter); handleException(); } - setCounters(new FtraceCounter(getCounters(), strdup(counter), regex)); - ++count; + bool addCounter = true; + const char *enable = mxmlElementGetAttr(node, "enable"); + if (enable != NULL) { + char buf[1<<10]; + snprintf(buf, sizeof(buf), "/sys/kernel/debug/tracing/events/%s/enable", enable); + if (access(buf, W_OK) != 0) { + logg->logMessage("Disabling counter %s, %s not found", counter, buf); + addCounter = false; + } + } + if (addCounter) { + setCounters(new FtraceCounter(getCounters(), strdup(counter), regex, enable)); + ++count; + } } mValues = new int64_t[2*count]; } +void FtraceDriver::prepare() { + for (FtraceCounter *counter = static_cast(getCounters()); counter != NULL; counter = static_cast(counter->getNext())) { + if (!counter->isEnabled()) { + continue; + } + counter->prepare(); + } +} + int FtraceDriver::read(const char *line, int64_t **buf) { int count = 0; @@ -116,3 +184,12 @@ int FtraceDriver::read(const char *line, int64_t **buf) { *buf = mValues; return count; } + +void FtraceDriver::stop() { + for (FtraceCounter *counter = static_cast(getCounters()); counter != NULL; counter = static_cast(counter->getNext())) { + if (!counter->isEnabled()) { + continue; + } + counter->stop(); + } +} diff --git a/tools/gator/daemon/FtraceDriver.h b/tools/gator/daemon/FtraceDriver.h index 5f958bec672c..b79dc9149d6a 100644 --- a/tools/gator/daemon/FtraceDriver.h +++ b/tools/gator/daemon/FtraceDriver.h @@ -1,5 +1,5 @@ /** - * Copyright (C) ARM Limited 2014. All rights reserved. + * Copyright (C) ARM Limited 2014-2015. All rights reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -18,7 +18,9 @@ class FtraceDriver : public SimpleDriver { void readEvents(mxml_node_t *const xml); + void prepare(); int read(const char *line, int64_t **buf); + void stop(); private: int64_t *mValues; diff --git a/tools/gator/daemon/FtraceSource.cpp b/tools/gator/daemon/FtraceSource.cpp index 521633357417..14a48b3b870c 100644 --- a/tools/gator/daemon/FtraceSource.cpp +++ b/tools/gator/daemon/FtraceSource.cpp @@ -1,5 +1,5 @@ /** - * Copyright (C) ARM Limited 2010-2014. All rights reserved. + * Copyright (C) ARM Limited 2010-2015. All rights reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -14,10 +14,13 @@ #include #include +#include "Child.h" #include "DriverSource.h" #include "Logging.h" #include "SessionData.h" +extern Child *child; + static void handler(int signum) { (void)signum; @@ -35,18 +38,20 @@ bool FtraceSource::prepare() { act.sa_handler = handler; act.sa_flags = (int)SA_RESETHAND; if (sigaction(SIGUSR1, &act, NULL) != 0) { - logg->logError(__FILE__, __LINE__, "sigaction failed: %s\n", strerror(errno)); + logg->logError("sigaction failed: %s\n", strerror(errno)); handleException(); } } + gSessionData->ftraceDriver.prepare(); + if (DriverSource::readIntDriver("/sys/kernel/debug/tracing/tracing_on", &mTracingOn)) { - logg->logError(__FILE__, __LINE__, "Unable to read if ftrace is enabled"); + logg->logError("Unable to read if ftrace is enabled"); handleException(); } if (DriverSource::writeDriver("/sys/kernel/debug/tracing/tracing_on", "0") != 0) { - logg->logError(__FILE__, __LINE__, "Unable to turn ftrace off before truncating the buffer"); + logg->logError("Unable to turn ftrace off before truncating the buffer"); handleException(); } @@ -54,20 +59,20 @@ bool FtraceSource::prepare() { int fd; fd = open("/sys/kernel/debug/tracing/trace", O_WRONLY | O_TRUNC | O_CLOEXEC, 0666); if (fd < 0) { - logg->logError(__FILE__, __LINE__, "Unable truncate ftrace buffer: %s", strerror(errno)); + logg->logError("Unable truncate ftrace buffer: %s", strerror(errno)); handleException(); } close(fd); } if (DriverSource::writeDriver("/sys/kernel/debug/tracing/trace_clock", "perf") != 0) { - logg->logError(__FILE__, __LINE__, "Unable to switch ftrace to the perf clock, please ensure you are running Linux 3.10 or later"); + logg->logError("Unable to switch ftrace to the perf clock, please ensure you are running Linux 3.10 or later"); handleException(); } mFtraceFh = fopen_cloexec("/sys/kernel/debug/tracing/trace_pipe", "rb"); if (mFtraceFh == NULL) { - logg->logError(__FILE__, __LINE__, "Unable to open trace_pipe"); + logg->logError("Unable to open trace_pipe"); handleException(); } @@ -79,10 +84,25 @@ void FtraceSource::run() { mTid = syscall(__NR_gettid); if (DriverSource::writeDriver("/sys/kernel/debug/tracing/tracing_on", "1") != 0) { - logg->logError(__FILE__, __LINE__, "Unable to turn ftrace on"); + logg->logError("Unable to turn ftrace on"); handleException(); } + // Wait until monotonicStarted is set before sending data + int64_t monotonicStarted = 0; + while (monotonicStarted <= 0 && gSessionData->mSessionIsActive) { + usleep(10); + + if (gSessionData->perf.isSetup()) { + monotonicStarted = gSessionData->mMonotonicStarted; + } else { + if (DriverSource::readInt64Driver("/dev/gator/started", &monotonicStarted) == -1) { + logg->logError("Error reading gator driver start time"); + handleException(); + } + } + } + while (gSessionData->mSessionIsActive) { char buf[1<<12]; @@ -91,22 +111,26 @@ void FtraceSource::run() { // Interrupted by interrupt - likely user request to terminate break; } - logg->logError(__FILE__, __LINE__, "Unable read trace data: %s", strerror(errno)); + logg->logError("Unable read trace data: %s", strerror(errno)); handleException(); } - const uint64_t currTime = getTime(); + const uint64_t currTime = getTime() - gSessionData->mMonotonicStarted; char *const colon = strstr(buf, ": "); if (colon == NULL) { - logg->logError(__FILE__, __LINE__, "Unable find colon: %s", buf); + if (strstr(buf, " [LOST ") != NULL) { + logg->logError("Ftrace events lost, aborting the capture. It is recommended to discard this report and collect a new capture. If this error occurs often, please reduce the number of ftrace counters selected or the amount of ftrace events generated."); + } else { + logg->logError("Unable to find colon: %s", buf); + } handleException(); } *colon = '\0'; char *const space = strrchr(buf, ' '); if (space == NULL) { - logg->logError(__FILE__, __LINE__, "Unable find space: %s", buf); + logg->logError("Unable to find space: %s", buf); handleException(); } *colon = ':'; @@ -117,7 +141,7 @@ void FtraceSource::run() { errno = 0; const long long time = strtod(space, NULL) * 1000000000; if (errno != 0) { - logg->logError(__FILE__, __LINE__, "Unable to parse time: %s", strerror(errno)); + logg->logError("Unable to parse time: %s", strerror(errno)); handleException(); } mBuffer.event64(-1, time); @@ -127,6 +151,11 @@ void FtraceSource::run() { } mBuffer.check(currTime); + + if (gSessionData->mOneShot && gSessionData->mSessionIsActive && (mBuffer.bytesAvailable() <= 0)) { + logg->logMessage("One shot (ftrace)"); + child->endSession(); + } } } @@ -136,6 +165,7 @@ void FtraceSource::run() { DriverSource::writeDriver("/sys/kernel/debug/tracing/tracing_on", mTracingOn); fclose(mFtraceFh); DriverSource::writeDriver("/sys/kernel/debug/tracing/trace_clock", "local"); + gSessionData->ftraceDriver.stop(); } void FtraceSource::interrupt() { diff --git a/tools/gator/daemon/FtraceSource.h b/tools/gator/daemon/FtraceSource.h index 2391b881494e..bc068d266654 100644 --- a/tools/gator/daemon/FtraceSource.h +++ b/tools/gator/daemon/FtraceSource.h @@ -1,5 +1,5 @@ /** - * Copyright (C) ARM Limited 2010-2014. All rights reserved. + * Copyright (C) ARM Limited 2010-2015. All rights reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as diff --git a/tools/gator/daemon/HwmonDriver.cpp b/tools/gator/daemon/HwmonDriver.cpp index 9d161ae5ac56..d8353b006e63 100644 --- a/tools/gator/daemon/HwmonDriver.cpp +++ b/tools/gator/daemon/HwmonDriver.cpp @@ -1,5 +1,5 @@ /** - * Copyright (C) ARM Limited 2013-2014. All rights reserved. + * Copyright (C) ARM Limited 2013-2015. All rights reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -23,7 +23,7 @@ static sensors_subfeature_type getInput(const sensors_feature_type type) { case SENSORS_FEATURE_CURR: return SENSORS_SUBFEATURE_CURR_INPUT; case SENSORS_FEATURE_HUMIDITY: return SENSORS_SUBFEATURE_HUMIDITY_INPUT; default: - logg->logError(__FILE__, __LINE__, "Unsupported hwmon feature %i", type); + logg->logError("Unsupported hwmon feature %i", type); handleException(); } }; @@ -33,112 +33,112 @@ public: HwmonCounter(DriverCounter *next, char *const name, const sensors_chip_name *chip, const sensors_feature *feature); ~HwmonCounter(); - const char *getLabel() const { return label; } - const char *getTitle() const { return title; } - bool isDuplicate() const { return duplicate; } - const char *getDisplay() const { return display; } - const char *getCounterClass() const { return counter_class; } - const char *getUnit() const { return unit; } - int getModifier() const { return modifier; } + const char *getLabel() const { return mLabel; } + const char *getTitle() const { return mTitle; } + bool isDuplicate() const { return mDuplicate; } + const char *getDisplay() const { return mDisplay; } + const char *getCounterClass() const { return mCounterClass; } + const char *getUnit() const { return mUnit; } + double getMultiplier() const { return mMultiplier; } int64_t read(); private: void init(const sensors_chip_name *chip, const sensors_feature *feature); - const sensors_chip_name *chip; - const sensors_feature *feature; - char *label; - const char *title; - const char *display; - const char *counter_class; - const char *unit; - double previous_value; - int modifier; - int monotonic: 1, - duplicate : 1; + const sensors_chip_name *mChip; + const sensors_feature *mFeature; + char *mLabel; + const char *mTitle; + const char *mDisplay; + const char *mCounterClass; + const char *mUnit; + double mPreviousValue; + double mMultiplier; + int mMonotonic: 1, + mDuplicate : 1; // Intentionally unimplemented HwmonCounter(const HwmonCounter &); HwmonCounter &operator=(const HwmonCounter &); }; -HwmonCounter::HwmonCounter(DriverCounter *next, char *const name, const sensors_chip_name *chip, const sensors_feature *feature) : DriverCounter(next, name), chip(chip), feature(feature), duplicate(false) { - label = sensors_get_label(chip, feature); +HwmonCounter::HwmonCounter(DriverCounter *next, char *const name, const sensors_chip_name *const chip, const sensors_feature *feature) : DriverCounter(next, name), mChip(chip), mFeature(feature), mDuplicate(false) { + mLabel = sensors_get_label(mChip, mFeature); - switch (feature->type) { + switch (mFeature->type) { case SENSORS_FEATURE_IN: - title = "Voltage"; - display = "maximum"; - counter_class = "absolute"; - unit = "V"; - modifier = 1000; - monotonic = false; + mTitle = "Voltage"; + mDisplay = "maximum"; + mCounterClass = "absolute"; + mUnit = "V"; + mMultiplier = 0.001; + mMonotonic = false; break; case SENSORS_FEATURE_FAN: - title = "Fan"; - display = "average"; - counter_class = "absolute"; - unit = "RPM"; - modifier = 1; - monotonic = false; + mTitle = "Fan"; + mDisplay = "average"; + mCounterClass = "absolute"; + mUnit = "RPM"; + mMultiplier = 1.0; + mMonotonic = false; break; case SENSORS_FEATURE_TEMP: - title = "Temperature"; - display = "maximum"; - counter_class = "absolute"; - unit = "°C"; - modifier = 1000; - monotonic = false; + mTitle = "Temperature"; + mDisplay = "maximum"; + mCounterClass = "absolute"; + mUnit = "°C"; + mMultiplier = 0.001; + mMonotonic = false; break; case SENSORS_FEATURE_POWER: - title = "Power"; - display = "maximum"; - counter_class = "absolute"; - unit = "W"; - modifier = 1000000; - monotonic = false; + mTitle = "Power"; + mDisplay = "maximum"; + mCounterClass = "absolute"; + mUnit = "W"; + mMultiplier = 0.000001; + mMonotonic = false; break; case SENSORS_FEATURE_ENERGY: - title = "Energy"; - display = "accumulate"; - counter_class = "delta"; - unit = "J"; - modifier = 1000000; - monotonic = true; + mTitle = "Energy"; + mDisplay = "accumulate"; + mCounterClass = "delta"; + mUnit = "J"; + mMultiplier = 0.000001; + mMonotonic = true; break; case SENSORS_FEATURE_CURR: - title = "Current"; - display = "maximum"; - counter_class = "absolute"; - unit = "A"; - modifier = 1000; - monotonic = false; + mTitle = "Current"; + mDisplay = "maximum"; + mCounterClass = "absolute"; + mUnit = "A"; + mMultiplier = 0.001; + mMonotonic = false; break; case SENSORS_FEATURE_HUMIDITY: - title = "Humidity"; - display = "average"; - counter_class = "absolute"; - unit = "%"; - modifier = 1000; - monotonic = false; + mTitle = "Humidity"; + mDisplay = "average"; + mCounterClass = "absolute"; + mUnit = "%"; + mMultiplier = 0.001; + mMonotonic = false; break; default: - logg->logError(__FILE__, __LINE__, "Unsupported hwmon feature %i", feature->type); + logg->logError("Unsupported hwmon feature %i", mFeature->type); handleException(); } for (HwmonCounter * counter = static_cast(next); counter != NULL; counter = static_cast(counter->getNext())) { - if (strcmp(label, counter->getLabel()) == 0 && strcmp(title, counter->getTitle()) == 0) { - duplicate = true; - counter->duplicate = true; + if (strcmp(mLabel, counter->getLabel()) == 0 && strcmp(mTitle, counter->getTitle()) == 0) { + mDuplicate = true; + counter->mDuplicate = true; break; } } } HwmonCounter::~HwmonCounter() { - free((void *)label); + free((void *)mLabel); } int64_t HwmonCounter::read() { @@ -147,19 +147,19 @@ int64_t HwmonCounter::read() { const sensors_subfeature *subfeature; // Keep in sync with the read check in HwmonDriver::readEvents - subfeature = sensors_get_subfeature(chip, feature, getInput(feature->type)); + subfeature = sensors_get_subfeature(mChip, mFeature, getInput(mFeature->type)); if (!subfeature) { - logg->logError(__FILE__, __LINE__, "No input value for hwmon sensor %s", label); + logg->logError("No input value for hwmon sensor %s", mLabel); handleException(); } - if (sensors_get_value(chip, subfeature->number, &value) != 0) { - logg->logError(__FILE__, __LINE__, "Can't get input value for hwmon sensor %s", label); + if (sensors_get_value(mChip, subfeature->number, &value) != 0) { + logg->logError("Can't get input value for hwmon sensor %s", mLabel); handleException(); } - result = (monotonic ? value - previous_value : value); - previous_value = value; + result = (mMonotonic ? value - mPreviousValue : value); + mPreviousValue = value; return result; } @@ -209,7 +209,7 @@ void HwmonDriver::readEvents(mxml_node_t *const) { void HwmonDriver::writeEvents(mxml_node_t *root) const { root = mxmlNewElement(root, "category"); - mxmlElementSetAttr(root, "name", "hwmon"); + mxmlElementSetAttr(root, "name", "Hardware Monitor"); char buf[1024]; for (HwmonCounter *counter = static_cast(getCounters()); counter != NULL; counter = static_cast(counter->getNext())) { @@ -224,8 +224,8 @@ void HwmonDriver::writeEvents(mxml_node_t *root) const { mxmlElementSetAttr(node, "display", counter->getDisplay()); mxmlElementSetAttr(node, "class", counter->getCounterClass()); mxmlElementSetAttr(node, "units", counter->getUnit()); - if (counter->getModifier() != 1) { - mxmlElementSetAttrf(node, "modifier", "%d", counter->getModifier()); + if (counter->getMultiplier() != 1.0) { + mxmlElementSetAttrf(node, "multiplier", "%lf", counter->getMultiplier()); } if (strcmp(counter->getDisplay(), "average") == 0 || strcmp(counter->getDisplay(), "maximum") == 0) { mxmlElementSetAttr(node, "average_selection", "yes"); diff --git a/tools/gator/daemon/HwmonDriver.h b/tools/gator/daemon/HwmonDriver.h index f28d825e3b7b..f15d557051dc 100644 --- a/tools/gator/daemon/HwmonDriver.h +++ b/tools/gator/daemon/HwmonDriver.h @@ -1,5 +1,5 @@ /** - * Copyright (C) ARM Limited 2013-2014. All rights reserved. + * Copyright (C) ARM Limited 2013-2015. All rights reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as diff --git a/tools/gator/daemon/KMod.cpp b/tools/gator/daemon/KMod.cpp index fe9dc6a7e4f7..e33b499d831b 100644 --- a/tools/gator/daemon/KMod.cpp +++ b/tools/gator/daemon/KMod.cpp @@ -1,5 +1,5 @@ /** - * Copyright (C) ARM Limited 2013-2014. All rights reserved. + * Copyright (C) ARM Limited 2013-2015. All rights reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -83,13 +83,13 @@ void KMod::setupCounter(Counter &counter) { if (access(text, F_OK) == 0) { int count = counter.getCount(); if (DriverSource::writeReadDriver(text, &count) && counter.getCount() > 0) { - logg->logError(__FILE__, __LINE__, "Cannot enable EBS for %s:%i with a count of %d\n", counter.getType(), counter.getEvent(), counter.getCount()); + logg->logError("Cannot enable EBS for %s:%i with a count of %d\n", counter.getType(), counter.getEvent(), counter.getCount()); handleException(); } counter.setCount(count); } else if (counter.getCount() > 0) { ConfigurationXML::remove(); - logg->logError(__FILE__, __LINE__, "Event Based Sampling is only supported with kernel versions 3.0.0 and higher with CONFIG_PERF_EVENTS=y, and CONFIG_HW_PERF_EVENTS=y. The invalid configuration.xml has been removed.\n"); + logg->logError("Event Based Sampling is only supported with kernel versions 3.0.0 and higher with CONFIG_PERF_EVENTS=y, and CONFIG_HW_PERF_EVENTS=y. The invalid configuration.xml has been removed.\n"); handleException(); } } diff --git a/tools/gator/daemon/KMod.h b/tools/gator/daemon/KMod.h index 900a60e87d24..7f06b4b34b5d 100644 --- a/tools/gator/daemon/KMod.h +++ b/tools/gator/daemon/KMod.h @@ -1,5 +1,5 @@ /** - * Copyright (C) ARM Limited 2013-2014. All rights reserved. + * Copyright (C) ARM Limited 2013-2015. All rights reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as diff --git a/tools/gator/daemon/LocalCapture.cpp b/tools/gator/daemon/LocalCapture.cpp index d2a4b799d7ac..56899871c03a 100644 --- a/tools/gator/daemon/LocalCapture.cpp +++ b/tools/gator/daemon/LocalCapture.cpp @@ -1,5 +1,5 @@ /** - * Copyright (C) ARM Limited 2010-2014. All rights reserved. + * Copyright (C) ARM Limited 2010-2015. All rights reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -27,7 +27,7 @@ LocalCapture::~LocalCapture() {} void LocalCapture::createAPCDirectory(char* target_path) { gSessionData->mAPCDir = createUniqueDirectory(target_path, ".apc"); if ((removeDirAndAllContents(gSessionData->mAPCDir) != 0 || mkdir(gSessionData->mAPCDir, S_IRWXU | S_IRWXG | S_IROTH | S_IXOTH) != 0)) { - logg->logError(__FILE__, __LINE__, "Unable to create directory %s", gSessionData->mAPCDir); + logg->logError("Unable to create directory %s", gSessionData->mAPCDir); handleException(); } } @@ -40,7 +40,7 @@ void LocalCapture::write(char* string) { // Write the file if (util->writeToDisk(file, string) < 0) { - logg->logError(__FILE__, __LINE__, "Error writing %s\nPlease verify the path.", file); + logg->logError("Error writing %s\nPlease verify the path.", file); handleException(); } @@ -55,7 +55,7 @@ char* LocalCapture::createUniqueDirectory(const char* initialPath, const char* e // Ensure the path is an absolute path, i.e. starts with a slash if (initialPath == 0 || strlen(initialPath) == 0) { - logg->logError(__FILE__, __LINE__, "Missing -o command line option required for a local capture."); + logg->logError("Missing -o command line option required for a local capture."); handleException(); } else if (initialPath[0] != '/') { if (getcwd(path, PATH_MAX) == 0) { diff --git a/tools/gator/daemon/LocalCapture.h b/tools/gator/daemon/LocalCapture.h index 25d281f8328b..807f49d16845 100644 --- a/tools/gator/daemon/LocalCapture.h +++ b/tools/gator/daemon/LocalCapture.h @@ -1,5 +1,5 @@ /** - * Copyright (C) ARM Limited 2010-2014. All rights reserved. + * Copyright (C) ARM Limited 2010-2015. All rights reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as diff --git a/tools/gator/daemon/Logging.cpp b/tools/gator/daemon/Logging.cpp index 41ffa1a45151..8846622d950f 100644 --- a/tools/gator/daemon/Logging.cpp +++ b/tools/gator/daemon/Logging.cpp @@ -1,5 +1,5 @@ /** - * Copyright (C) ARM Limited 2010-2014. All rights reserved. + * Copyright (C) ARM Limited 2010-2015. All rights reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -39,12 +39,12 @@ Logging::Logging(bool debug) { Logging::~Logging() { } -void Logging::logError(const char* file, int line, const char* fmt, ...) { +void Logging::_logError(const char *function, const char *file, int line, const char *fmt, ...) { va_list args; MUTEX_LOCK(); if (mDebug) { - snprintf(mErrBuf, sizeof(mErrBuf), "ERROR[%s:%d]: ", file, line); + snprintf(mErrBuf, sizeof(mErrBuf), "ERROR: %s(%s:%i): ", function, file, line); } else { mErrBuf[0] = 0; } @@ -59,12 +59,12 @@ void Logging::logError(const char* file, int line, const char* fmt, ...) { MUTEX_UNLOCK(); } -void Logging::logMessage(const char* fmt, ...) { +void Logging::_logMessage(const char *function, const char *file, int line, const char *fmt, ...) { if (mDebug) { va_list args; MUTEX_LOCK(); - strcpy(mLogBuf, "INFO: "); + snprintf(mLogBuf, sizeof(mLogBuf), "INFO: %s(%s:%i): ", function, file, line); va_start(args, fmt); vsnprintf(mLogBuf + strlen(mLogBuf), sizeof(mLogBuf) - 2 - strlen(mLogBuf), fmt, args); // subtract 2 for \n and \0 diff --git a/tools/gator/daemon/Logging.h b/tools/gator/daemon/Logging.h index 09e93ff13f7a..a7b45239f5d8 100644 --- a/tools/gator/daemon/Logging.h +++ b/tools/gator/daemon/Logging.h @@ -1,5 +1,5 @@ /** - * Copyright (C) ARM Limited 2010-2014. All rights reserved. + * Copyright (C) ARM Limited 2010-2015. All rights reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -17,10 +17,14 @@ class Logging { public: Logging(bool debug); ~Logging(); - void logError(const char* file, int line, const char* fmt, ...); - void logMessage(const char* fmt, ...); - char* getLastError() {return mErrBuf;} - char* getLastMessage() {return mLogBuf;} +#define logError(...) _logError(__func__, __FILE__, __LINE__, __VA_ARGS__) + __attribute__ ((format (printf, 5, 6))) + void _logError(const char *function, const char *file, int line, const char *fmt, ...); +#define logMessage(...) _logMessage(__func__, __FILE__, __LINE__, __VA_ARGS__) + __attribute__ ((format (printf, 5, 6))) + void _logMessage(const char *function, const char *file, int line, const char *fmt, ...); + char *getLastError() {return mErrBuf;} + char *getLastMessage() {return mLogBuf;} private: char mErrBuf[4096]; // Arbitrarily large buffer to hold a string @@ -29,7 +33,7 @@ class Logging { pthread_mutex_t mLoggingMutex; }; -extern Logging* logg; +extern Logging *logg; extern void handleException() __attribute__ ((noreturn)); diff --git a/tools/gator/daemon/MaliVideoDriver.cpp b/tools/gator/daemon/MaliVideoDriver.cpp index 5eef2643ab15..2db332d3e2e4 100644 --- a/tools/gator/daemon/MaliVideoDriver.cpp +++ b/tools/gator/daemon/MaliVideoDriver.cpp @@ -1,5 +1,5 @@ /** - * Copyright (C) ARM Limited 2014. All rights reserved. + * Copyright (C) ARM Limited 2014-2015. All rights reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -109,7 +109,7 @@ static bool writeAll(const int mveUds, const char *const buf, const int pos) { while (written < pos) { size_t bytes = ::write(mveUds, buf + written, pos - written); if (bytes <= 0) { - logg->logMessage("%s(%s:%i): write failed", __FUNCTION__, __FILE__, __LINE__); + logg->logMessage("write failed"); return false; } written += bytes; diff --git a/tools/gator/daemon/MaliVideoDriver.h b/tools/gator/daemon/MaliVideoDriver.h index 204a57a447ac..35b0558e390b 100644 --- a/tools/gator/daemon/MaliVideoDriver.h +++ b/tools/gator/daemon/MaliVideoDriver.h @@ -1,5 +1,5 @@ /** - * Copyright (C) ARM Limited 2014. All rights reserved. + * Copyright (C) ARM Limited 2014-2015. All rights reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as diff --git a/tools/gator/daemon/MemInfoDriver.cpp b/tools/gator/daemon/MemInfoDriver.cpp index cce15c16fcdc..6818b978dc5b 100644 --- a/tools/gator/daemon/MemInfoDriver.cpp +++ b/tools/gator/daemon/MemInfoDriver.cpp @@ -1,5 +1,5 @@ /** - * Copyright (C) ARM Limited 2013-2014. All rights reserved. + * Copyright (C) ARM Limited 2013-2015. All rights reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -59,7 +59,7 @@ void MemInfoDriver::read(Buffer *const buffer) { } if (!mBuf.read("/proc/meminfo")) { - logg->logError(__FILE__, __LINE__, "Failed to read /proc/meminfo"); + logg->logError("Failed to read /proc/meminfo"); handleException(); } diff --git a/tools/gator/daemon/MemInfoDriver.h b/tools/gator/daemon/MemInfoDriver.h index eb1b0417f309..ffeaf3009e93 100644 --- a/tools/gator/daemon/MemInfoDriver.h +++ b/tools/gator/daemon/MemInfoDriver.h @@ -1,5 +1,5 @@ /** - * Copyright (C) ARM Limited 2013-2014. All rights reserved. + * Copyright (C) ARM Limited 2013-2015. All rights reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as diff --git a/tools/gator/daemon/Monitor.cpp b/tools/gator/daemon/Monitor.cpp index 74f22ee29fec..04288872f515 100644 --- a/tools/gator/daemon/Monitor.cpp +++ b/tools/gator/daemon/Monitor.cpp @@ -1,5 +1,5 @@ /** - * Copyright (C) ARM Limited 2013-2014. All rights reserved. + * Copyright (C) ARM Limited 2013-2015. All rights reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -38,17 +38,17 @@ bool Monitor::init() { mFd = epoll_create(16); #endif if (mFd < 0) { - logg->logMessage("%s(%s:%i): epoll_create1 failed", __FUNCTION__, __FILE__, __LINE__); + logg->logMessage("epoll_create1 failed"); return false; } #ifndef EPOLL_CLOEXEC - int fdf = fcntl(mFd, F_GETFD); - if ((fdf == -1) || (fcntl(mFd, F_SETFD, fdf | FD_CLOEXEC) != 0)) { - logg->logMessage("%s(%s:%i): fcntl failed", __FUNCTION__, __FILE__, __LINE__); - ::close(mFd); - return -1; - } + int fdf = fcntl(mFd, F_GETFD); + if ((fdf == -1) || (fcntl(mFd, F_SETFD, fdf | FD_CLOEXEC) != 0)) { + logg->logMessage("fcntl failed"); + ::close(mFd); + return -1; + } #endif return true; @@ -60,7 +60,7 @@ bool Monitor::add(const int fd) { event.data.fd = fd; event.events = EPOLLIN; if (epoll_ctl(mFd, EPOLL_CTL_ADD, fd, &event) != 0) { - logg->logMessage("%s(%s:%i): epoll_ctl failed", __FUNCTION__, __FILE__, __LINE__); + logg->logMessage("epoll_ctl failed"); return false; } @@ -74,7 +74,7 @@ int Monitor::wait(struct epoll_event *const events, int maxevents, int timeout) if (errno == EINTR) { result = 0; } else { - logg->logMessage("%s(%s:%i): epoll_wait failed", __FUNCTION__, __FILE__, __LINE__); + logg->logMessage("epoll_wait failed"); } } diff --git a/tools/gator/daemon/Monitor.h b/tools/gator/daemon/Monitor.h index 7194e0e4ca50..55368fca3e0a 100644 --- a/tools/gator/daemon/Monitor.h +++ b/tools/gator/daemon/Monitor.h @@ -1,5 +1,5 @@ /** - * Copyright (C) ARM Limited 2013-2014. All rights reserved. + * Copyright (C) ARM Limited 2013-2015. All rights reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as diff --git a/tools/gator/daemon/NetDriver.cpp b/tools/gator/daemon/NetDriver.cpp index e75c0695779a..56b25e0cfa63 100644 --- a/tools/gator/daemon/NetDriver.cpp +++ b/tools/gator/daemon/NetDriver.cpp @@ -1,5 +1,5 @@ /** - * Copyright (C) ARM Limited 2013-2014. All rights reserved. + * Copyright (C) ARM Limited 2013-2015. All rights reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -108,7 +108,7 @@ bool NetDriver::doRead() { void NetDriver::start() { if (!doRead()) { - logg->logError(__FILE__, __LINE__, "Unable to read network stats"); + logg->logError("Unable to read network stats"); handleException(); } // Initialize previous values @@ -122,7 +122,7 @@ void NetDriver::start() { void NetDriver::read(Buffer *const buffer) { if (!doRead()) { - logg->logError(__FILE__, __LINE__, "Unable to read network stats"); + logg->logError("Unable to read network stats"); handleException(); } super::read(buffer); diff --git a/tools/gator/daemon/NetDriver.h b/tools/gator/daemon/NetDriver.h index 50ff850bfc6d..5f722800f75f 100644 --- a/tools/gator/daemon/NetDriver.h +++ b/tools/gator/daemon/NetDriver.h @@ -1,5 +1,5 @@ /** - * Copyright (C) ARM Limited 2013-2014. All rights reserved. + * Copyright (C) ARM Limited 2013-2015. All rights reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as diff --git a/tools/gator/daemon/OlySocket.cpp b/tools/gator/daemon/OlySocket.cpp index aa0ce4929916..078d20209226 100644 --- a/tools/gator/daemon/OlySocket.cpp +++ b/tools/gator/daemon/OlySocket.cpp @@ -1,5 +1,5 @@ /** - * Copyright (C) ARM Limited 2010-2014. All rights reserved. + * Copyright (C) ARM Limited 2010-2015. All rights reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -19,6 +19,7 @@ #include #include #include +#include #endif #include "Logging.h" @@ -78,7 +79,7 @@ OlyServerSocket::OlyServerSocket(int port) { #ifdef WIN32 WSADATA wsaData; if (WSAStartup(0x0202, &wsaData) != 0) { - logg->logError(__FILE__, __LINE__, "Windows socket initialization failed"); + logg->logError("Windows socket initialization failed"); handleException(); } #endif @@ -97,11 +98,11 @@ OlySocket::OlySocket(int socketID) : mSocketID(socketID) { __a > __b ? __b : __a; \ }) -OlyServerSocket::OlyServerSocket(const char* path, const size_t pathSize) { +OlyServerSocket::OlyServerSocket(const char* path, const size_t pathSize, const bool calculateAddrlen) { // Create socket mFDServer = socket_cloexec(PF_UNIX, SOCK_STREAM, 0); if (mFDServer < 0) { - logg->logError(__FILE__, __LINE__, "Error creating server socket"); + logg->logError("Error creating server socket"); handleException(); } @@ -113,19 +114,19 @@ OlyServerSocket::OlyServerSocket(const char* path, const size_t pathSize) { sockaddr.sun_path[sizeof(sockaddr.sun_path) - 1] = '\0'; // Bind the socket to an address - if (bind(mFDServer, (const struct sockaddr*)&sockaddr, sizeof(sockaddr)) < 0) { - logg->logError(__FILE__, __LINE__, "Binding of server socket failed."); + if (bind(mFDServer, (const struct sockaddr*)&sockaddr, calculateAddrlen ? offsetof(struct sockaddr_un, sun_path) + pathSize - 1 : sizeof(sockaddr)) < 0) { + logg->logError("Binding of server socket failed."); handleException(); } // Listen for connections on this socket if (listen(mFDServer, 1) < 0) { - logg->logError(__FILE__, __LINE__, "Listening of server socket failed"); + logg->logError("Listening of server socket failed"); handleException(); } } -int OlySocket::connect(const char* path, const size_t pathSize) { +int OlySocket::connect(const char* path, const size_t pathSize, const bool calculateAddrlen) { int fd = socket_cloexec(PF_UNIX, SOCK_STREAM, 0); if (fd < 0) { return -1; @@ -138,7 +139,7 @@ int OlySocket::connect(const char* path, const size_t pathSize) { memcpy(sockaddr.sun_path, path, MIN(pathSize, sizeof(sockaddr.sun_path))); sockaddr.sun_path[sizeof(sockaddr.sun_path) - 1] = '\0'; - if (::connect(fd, (const struct sockaddr*)&sockaddr, sizeof(sockaddr)) < 0) { + if (::connect(fd, (const struct sockaddr*)&sockaddr, calculateAddrlen ? offsetof(struct sockaddr_un, sun_path) + pathSize - 1 : sizeof(sockaddr)) < 0) { close(fd); return -1; } @@ -174,11 +175,11 @@ void OlySocket::closeSocket() { } void OlyServerSocket::closeServerSocket() { - if (CLOSE_SOCKET(mFDServer) != 0) { - logg->logError(__FILE__, __LINE__, "Failed to close server socket."); + if (mFDServer > 0 && CLOSE_SOCKET(mFDServer) != 0) { + logg->logError("Failed to close server socket."); handleException(); } - mFDServer = 0; + mFDServer = -1; } void OlyServerSocket::createServerSocket(int port) { @@ -190,7 +191,7 @@ void OlyServerSocket::createServerSocket(int port) { family = AF_INET; mFDServer = socket_cloexec(PF_INET, SOCK_STREAM, IPPROTO_TCP); if (mFDServer < 0) { - logg->logError(__FILE__, __LINE__, "Error creating server socket"); + logg->logError("Error creating server socket"); handleException(); } } @@ -198,10 +199,16 @@ void OlyServerSocket::createServerSocket(int port) { // Enable address reuse, another solution would be to create the server socket once and only close it when the object exits int on = 1; if (setsockopt(mFDServer, SOL_SOCKET, SO_REUSEADDR, (const char*)&on, sizeof(on)) != 0) { - logg->logError(__FILE__, __LINE__, "Setting server socket options failed"); + logg->logError("Setting server socket reuse option failed"); handleException(); } + // Listen on both IPv4 and IPv6 + on = 0; + if (setsockopt(mFDServer, IPPROTO_IPV6, IPV6_V6ONLY, (const char*)&on, sizeof(on)) != 0) { + logg->logMessage("setsockopt IPV6_V6ONLY failed"); + } + // Create sockaddr_in structure, ensuring non-populated fields are zero struct sockaddr_in6 sockaddr; memset((void*)&sockaddr, 0, sizeof(sockaddr)); @@ -211,13 +218,13 @@ void OlyServerSocket::createServerSocket(int port) { // Bind the socket to an address if (bind(mFDServer, (const struct sockaddr*)&sockaddr, sizeof(sockaddr)) < 0) { - logg->logError(__FILE__, __LINE__, "Binding of server socket failed.\nIs an instance already running?"); + logg->logError("Binding of server socket on port %i failed.\nIs an instance already running or is another application using that port?", port); handleException(); } // Listen for connections on this socket if (listen(mFDServer, 1) < 0) { - logg->logError(__FILE__, __LINE__, "Listening of server socket failed"); + logg->logError("Listening of server socket failed"); handleException(); } } @@ -227,14 +234,14 @@ void OlyServerSocket::createServerSocket(int port) { int OlyServerSocket::acceptConnection() { int socketID; if (mFDServer <= 0) { - logg->logError(__FILE__, __LINE__, "Attempting multiple connections on a single connection server socket or attempting to accept on a client socket"); + logg->logError("Attempting multiple connections on a single connection server socket or attempting to accept on a client socket"); handleException(); } // Accept a connection, note that this call blocks until a client connects socketID = accept_cloexec(mFDServer, NULL, NULL); if (socketID < 0) { - logg->logError(__FILE__, __LINE__, "Socket acceptance failed"); + logg->logError("Socket acceptance failed"); handleException(); } return socketID; @@ -248,7 +255,7 @@ void OlySocket::send(const char* buffer, int size) { while (size > 0) { int n = ::send(mSocketID, buffer, size, 0); if (n < 0) { - logg->logError(__FILE__, __LINE__, "Socket send error"); + logg->logError("Socket send error"); handleException(); } size -= n; @@ -264,7 +271,7 @@ int OlySocket::receive(char* buffer, int size) { int bytes = recv(mSocketID, buffer, size, 0); if (bytes < 0) { - logg->logError(__FILE__, __LINE__, "Socket receive error"); + logg->logError("Socket receive error"); handleException(); } else if (bytes == 0) { logg->logMessage("Socket disconnected"); @@ -279,7 +286,7 @@ int OlySocket::receiveNBytes(char* buffer, int size) { while (size > 0 && buffer != NULL) { bytes = recv(mSocketID, buffer, size, 0); if (bytes < 0) { - logg->logError(__FILE__, __LINE__, "Socket receive error"); + logg->logError("Socket receive error"); handleException(); } else if (bytes == 0) { logg->logMessage("Socket disconnected"); @@ -304,7 +311,7 @@ int OlySocket::receiveString(char* buffer, int size) { // Receive a single character int bytes = recv(mSocketID, &buffer[bytes_received], 1, 0); if (bytes < 0) { - logg->logError(__FILE__, __LINE__, "Socket receive error"); + logg->logError("Socket receive error"); handleException(); } else if (bytes == 0) { logg->logMessage("Socket disconnected"); diff --git a/tools/gator/daemon/OlySocket.h b/tools/gator/daemon/OlySocket.h index 6b53b01fc3ee..757a2522f926 100644 --- a/tools/gator/daemon/OlySocket.h +++ b/tools/gator/daemon/OlySocket.h @@ -1,5 +1,5 @@ /** - * Copyright (C) ARM Limited 2010-2014. All rights reserved. + * Copyright (C) ARM Limited 2010-2015. All rights reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -12,7 +12,7 @@ #include #ifdef WIN32 -typedef socklen_t int; +typedef int socklen_t; #else #include #endif @@ -20,7 +20,7 @@ typedef socklen_t int; class OlySocket { public: #ifndef WIN32 - static int connect(const char* path, const size_t pathSize); + static int connect(const char* path, const size_t pathSize, const bool calculateAddrlen = false); #endif OlySocket(int socketID); @@ -43,7 +43,7 @@ class OlyServerSocket { public: OlyServerSocket(int port); #ifndef WIN32 - OlyServerSocket(const char* path, const size_t pathSize); + OlyServerSocket(const char* path, const size_t pathSize, const bool calculateAddrlen = false); #endif ~OlyServerSocket(); diff --git a/tools/gator/daemon/OlyUtility.cpp b/tools/gator/daemon/OlyUtility.cpp index 45340a27d9fa..6f40168c1ee0 100644 --- a/tools/gator/daemon/OlyUtility.cpp +++ b/tools/gator/daemon/OlyUtility.cpp @@ -1,5 +1,5 @@ /** - * Copyright (C) ARM Limited 2010-2014. All rights reserved. + * Copyright (C) ARM Limited 2010-2015. All rights reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as diff --git a/tools/gator/daemon/OlyUtility.h b/tools/gator/daemon/OlyUtility.h index 1d26beb596fa..1525081a97a3 100644 --- a/tools/gator/daemon/OlyUtility.h +++ b/tools/gator/daemon/OlyUtility.h @@ -1,5 +1,5 @@ /** - * Copyright (C) ARM Limited 2010-2014. All rights reserved. + * Copyright (C) ARM Limited 2010-2015. All rights reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as diff --git a/tools/gator/daemon/PerfBuffer.cpp b/tools/gator/daemon/PerfBuffer.cpp index f127c996d43b..3b9da1dc6592 100644 --- a/tools/gator/daemon/PerfBuffer.cpp +++ b/tools/gator/daemon/PerfBuffer.cpp @@ -1,5 +1,5 @@ /** - * Copyright (C) ARM Limited 2013-2014. All rights reserved. + * Copyright (C) ARM Limited 2013-2015. All rights reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -35,14 +35,14 @@ PerfBuffer::~PerfBuffer() { bool PerfBuffer::useFd(const int cpu, const int fd) { if (mFds[cpu] < 0) { if (mBuf[cpu] != MAP_FAILED) { - logg->logMessage("%s(%s:%i): cpu %i already online or not correctly cleaned up", __FUNCTION__, __FILE__, __LINE__, cpu); + logg->logMessage("cpu %i already online or not correctly cleaned up", cpu); return false; } // The buffer isn't mapped yet mBuf[cpu] = mmap(NULL, gSessionData->mPageSize + BUF_SIZE, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); if (mBuf[cpu] == MAP_FAILED) { - logg->logMessage("%s(%s:%i): mmap failed", __FUNCTION__, __FILE__, __LINE__); + logg->logMessage("mmap failed"); return false; } mFds[cpu] = fd; @@ -50,17 +50,17 @@ bool PerfBuffer::useFd(const int cpu, const int fd) { // Check the version struct perf_event_mmap_page *pemp = static_cast(mBuf[cpu]); if (pemp->compat_version != 0) { - logg->logMessage("%s(%s:%i): Incompatible perf_event_mmap_page compat_version", __FUNCTION__, __FILE__, __LINE__); + logg->logMessage("Incompatible perf_event_mmap_page compat_version"); return false; } } else { if (mBuf[cpu] == MAP_FAILED) { - logg->logMessage("%s(%s:%i): cpu already online or not correctly cleaned up", __FUNCTION__, __FILE__, __LINE__); + logg->logMessage("cpu already online or not correctly cleaned up"); return false; } if (ioctl(fd, PERF_EVENT_IOC_SET_OUTPUT, mFds[cpu]) < 0) { - logg->logMessage("%s(%s:%i): ioctl failed", __FUNCTION__, __FILE__, __LINE__); + logg->logMessage("ioctl failed"); return false; } } @@ -79,8 +79,8 @@ bool PerfBuffer::isEmpty() { if (mBuf[cpu] != MAP_FAILED) { // Take a snapshot of the positions struct perf_event_mmap_page *pemp = static_cast(mBuf[cpu]); - const __u64 head = pemp->data_head; - const __u64 tail = pemp->data_tail; + const __u64 head = ACCESS_ONCE(pemp->data_head); + const __u64 tail = ACCESS_ONCE(pemp->data_tail); if (head != tail) { return false; @@ -91,42 +91,105 @@ bool PerfBuffer::isEmpty() { return true; } -static void compressAndSend(const int cpu, const __u64 head, __u64 tail, const uint8_t *const b, Sender *const sender) { - // Pick a big size but something smaller than the chunkSize in Sender::writeData which is 100k - char buf[1<<16]; - int writePos = 0; - const int typeLength = gSessionData->mLocalCapture ? 0 : 1; +bool PerfBuffer::isFull() { + for (int cpu = 0; cpu < gSessionData->mCores; ++cpu) { + if (mBuf[cpu] != MAP_FAILED) { + // Take a snapshot of the positions + struct perf_event_mmap_page *pemp = static_cast(mBuf[cpu]); + const __u64 head = ACCESS_ONCE(pemp->data_head); - while (head > tail) { - writePos = 0; - if (!gSessionData->mLocalCapture) { - buf[writePos++] = RESPONSE_APC_DATA; + if (head + 2000 <= (unsigned int)BUF_SIZE) { + return true; + } } - // Reserve space for size - writePos += sizeof(uint32_t); - Buffer::packInt(buf, sizeof(buf), writePos, FRAME_PERF); - Buffer::packInt(buf, sizeof(buf), writePos, cpu); + } + + return false; +} + +class PerfFrame { +public: + PerfFrame(Sender *const sender) : mSender(sender), mWritePos(-1), mCpuSizePos(-1) {} + + void add(const int cpu, const __u64 head, __u64 tail, const uint8_t *const b) { + cpuHeader(cpu); while (head > tail) { const int count = reinterpret_cast(b + (tail & BUF_MASK))->size/sizeof(uint64_t); // Can this whole message be written as Streamline assumes events are not split between frames - if (sizeof(buf) <= writePos + count*Buffer::MAXSIZE_PACK64) { - break; + if (sizeof(mBuf) <= mWritePos + count*Buffer::MAXSIZE_PACK64) { + send(); + cpuHeader(cpu); } for (int i = 0; i < count; ++i) { // Must account for message size - Buffer::packInt64(buf, sizeof(buf), writePos, *reinterpret_cast(b + (tail & BUF_MASK))); + Buffer::packInt64(mBuf, sizeof(mBuf), mWritePos, *reinterpret_cast(b + (tail & BUF_MASK))); tail += sizeof(uint64_t); } } - - // Write size - Buffer::writeLEInt(reinterpret_cast(buf + typeLength), writePos - typeLength - sizeof(uint32_t)); - sender->writeData(buf, writePos, RESPONSE_APC_DATA); } -} + + void send() { + if (mWritePos > 0) { + writeFrameSize(); + mSender->writeData(mBuf, mWritePos, RESPONSE_APC_DATA); + mWritePos = -1; + mCpuSizePos = -1; + } + } + +private: + void writeFrameSize() { + writeCpuSize(); + const int typeLength = gSessionData->mLocalCapture ? 0 : 1; + Buffer::writeLEInt(reinterpret_cast(mBuf + typeLength), mWritePos - typeLength - sizeof(uint32_t)); + } + + void frameHeader() { + if (mWritePos < 0) { + mWritePos = 0; + mCpuSizePos = -1; + if (!gSessionData->mLocalCapture) { + mBuf[mWritePos++] = RESPONSE_APC_DATA; + } + // Reserve space for frame size + mWritePos += sizeof(uint32_t); + Buffer::packInt(mBuf, sizeof(mBuf), mWritePos, FRAME_PERF); + } + } + + void writeCpuSize() { + if (mCpuSizePos >= 0) { + Buffer::writeLEInt(reinterpret_cast(mBuf + mCpuSizePos), mWritePos - mCpuSizePos - sizeof(uint32_t)); + } + } + + void cpuHeader(const int cpu) { + if (sizeof(mBuf) <= mWritePos + Buffer::MAXSIZE_PACK32 + sizeof(uint32_t)) { + send(); + } + frameHeader(); + writeCpuSize(); + Buffer::packInt(mBuf, sizeof(mBuf), mWritePos, cpu); + mCpuSizePos = mWritePos; + // Reserve space for cpu size + mWritePos += sizeof(uint32_t); + } + + // Pick a big size but something smaller than the chunkSize in Sender::writeData which is 100k + char mBuf[1<<16]; + Sender *const mSender; + int mWritePos; + int mCpuSizePos; + + // Intentionally unimplemented + PerfFrame(const PerfFrame &); + PerfFrame& operator=(const PerfFrame &); +}; bool PerfBuffer::send(Sender *const sender) { + PerfFrame frame(sender); + for (int cpu = 0; cpu < gSessionData->mCores; ++cpu) { if (mBuf[cpu] == MAP_FAILED) { continue; @@ -134,12 +197,12 @@ bool PerfBuffer::send(Sender *const sender) { // Take a snapshot of the positions struct perf_event_mmap_page *pemp = static_cast(mBuf[cpu]); - const __u64 head = pemp->data_head; - const __u64 tail = pemp->data_tail; + const __u64 head = ACCESS_ONCE(pemp->data_head); + const __u64 tail = ACCESS_ONCE(pemp->data_tail); if (head > tail) { const uint8_t *const b = static_cast(mBuf[cpu]) + gSessionData->mPageSize; - compressAndSend(cpu, head, tail, b, sender); + frame.add(cpu, head, tail, b); // Update tail with the data read pemp->data_tail = head; @@ -150,9 +213,11 @@ bool PerfBuffer::send(Sender *const sender) { mBuf[cpu] = MAP_FAILED; mDiscard[cpu] = false; mFds[cpu] = -1; - logg->logMessage("%s(%s:%i): Unmaped cpu %i", __FUNCTION__, __FILE__, __LINE__, cpu); + logg->logMessage("Unmaped cpu %i", cpu); } } + frame.send(); + return true; } diff --git a/tools/gator/daemon/PerfBuffer.h b/tools/gator/daemon/PerfBuffer.h index 25a10625a9e8..a2d0e9b44725 100644 --- a/tools/gator/daemon/PerfBuffer.h +++ b/tools/gator/daemon/PerfBuffer.h @@ -1,5 +1,5 @@ /** - * Copyright (C) ARM Limited 2013-2014. All rights reserved. + * Copyright (C) ARM Limited 2013-2015. All rights reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -24,6 +24,7 @@ class PerfBuffer { bool useFd(const int cpu, const int fd); void discard(const int cpu); bool isEmpty(); + bool isFull(); bool send(Sender *const sender); private: diff --git a/tools/gator/daemon/PerfDriver.cpp b/tools/gator/daemon/PerfDriver.cpp index ee90284cee41..796ee7564c66 100644 --- a/tools/gator/daemon/PerfDriver.cpp +++ b/tools/gator/daemon/PerfDriver.cpp @@ -1,5 +1,5 @@ /** - * Copyright (C) ARM Limited 2013-2014. All rights reserved. + * Copyright (C) ARM Limited 2013-2015. All rights reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -40,24 +40,25 @@ struct gator_cpu { // From gator_main.c static const struct gator_cpu gator_cpus[] = { - { 0xb36, "ARM1136", "ARM_ARM11", 3 }, - { 0xb56, "ARM1156", "ARM_ARM11", 3 }, - { 0xb76, "ARM1176", "ARM_ARM11", 3 }, - { 0xb02, "ARM11MPCore", "ARM_ARM11MPCore", 3 }, - { 0xc05, "Cortex-A5", "ARMv7_Cortex_A5", 2 }, - { 0xc07, "Cortex-A7", "ARMv7_Cortex_A7", 4 }, - { 0xc08, "Cortex-A8", "ARMv7_Cortex_A8", 4 }, - { 0xc09, "Cortex-A9", "ARMv7_Cortex_A9", 6 }, - { 0xc0f, "Cortex-A15", "ARMv7_Cortex_A15", 6 }, - { 0xc0e, "Cortex-A17", "ARMv7_Cortex_A17", 6 }, - { 0x00f, "Scorpion", "Scorpion", 4 }, - { 0x02d, "ScorpionMP", "ScorpionMP", 4 }, - { 0x049, "KraitSIM", "Krait", 4 }, - { 0x04d, "Krait", "Krait", 4 }, - { 0x06f, "Krait S4 Pro", "Krait", 4 }, - { 0xd03, "Cortex-A53", "ARM_Cortex-A53", 6 }, - { 0xd07, "Cortex-A57", "ARM_Cortex-A57", 6 }, - { 0xd0f, "AArch64", "ARM_AArch64", 6 }, + { 0x41b36, "ARM1136", "ARM_ARM11", 3 }, + { 0x41b56, "ARM1156", "ARM_ARM11", 3 }, + { 0x41b76, "ARM1176", "ARM_ARM11", 3 }, + { 0x41b02, "ARM11MPCore", "ARM_ARM11MPCore", 3 }, + { 0x41c05, "Cortex-A5", "ARMv7_Cortex_A5", 2 }, + { 0x41c07, "Cortex-A7", "ARMv7_Cortex_A7", 4 }, + { 0x41c08, "Cortex-A8", "ARMv7_Cortex_A8", 4 }, + { 0x41c09, "Cortex-A9", "ARMv7_Cortex_A9", 6 }, + { 0x41c0f, "Cortex-A15", "ARMv7_Cortex_A15", 6 }, + { 0x41c0d, "Cortex-A17", "ARMv7_Cortex_A17", 6 }, + { 0x41c0e, "Cortex-A17", "ARMv7_Cortex_A17", 6 }, + { 0x5100f, "Scorpion", "Scorpion", 4 }, + { 0x5102d, "ScorpionMP", "ScorpionMP", 4 }, + { 0x51049, "KraitSIM", "Krait", 4 }, + { 0x5104d, "Krait", "Krait", 4 }, + { 0x5106f, "Krait S4 Pro", "Krait", 4 }, + { 0x41d03, "Cortex-A53", "ARM_Cortex-A53", 6 }, + { 0x41d07, "Cortex-A57", "ARM_Cortex-A57", 6 }, + { 0x41d08, "Cortex-A72", "ARM_Cortex-A72", 6 }, }; static const char OLD_PMU_PREFIX[] = "ARMv7 Cortex-"; @@ -69,17 +70,19 @@ struct uncore_counter { // gatorfs event name const char *const gatorName; const int count; + const bool hasCyclesCounter; }; static const struct uncore_counter uncore_counters[] = { - { "CCI_400", "CCI_400", 4 }, - { "CCI_400-r1", "CCI_400-r1", 4 }, - { "ccn", "ARM_CCN_5XX", 8 }, + { "CCI_400", "CCI_400", 4, true }, + { "CCI_400-r1", "CCI_400-r1", 4, true }, + { "CCI_500", "CCI_500", 8, false }, + { "ccn", "ARM_CCN_5XX", 8, true }, }; class PerfCounter : public DriverCounter { public: - PerfCounter(DriverCounter *next, const char *name, uint32_t type, uint64_t config, bool perCpu) : DriverCounter(next, name), mType(type), mCount(0), mConfig(config), mPerCpu(perCpu) {} + PerfCounter(DriverCounter *next, const char *name, uint32_t type, uint64_t config, uint64_t sampleType, uint64_t flags) : DriverCounter(next, name), mType(type), mConfig(config), mSampleType(sampleType), mFlags(flags), mCount(0) {} ~PerfCounter() { } @@ -89,13 +92,41 @@ public: void setCount(const int count) { mCount = count; } uint64_t getConfig() const { return mConfig; } void setConfig(const uint64_t config) { mConfig = config; } - bool isPerCpu() const { return mPerCpu; } + uint64_t getSampleType() const { return mSampleType; } + uint64_t getFlags() const { return mFlags; } + virtual void read(Buffer *const, const int) {} private: const uint32_t mType; - int mCount; uint64_t mConfig; - bool mPerCpu; + const uint64_t mSampleType; + const uint64_t mFlags; + int mCount; + + // Intentionally undefined + PerfCounter(const PerfCounter &); + PerfCounter &operator=(const PerfCounter &); +}; + +class CPUFreqDriver : public PerfCounter { +public: + CPUFreqDriver(DriverCounter *next, uint64_t id) : PerfCounter(next, "Linux_power_cpu_freq", PERF_TYPE_TRACEPOINT, id, PERF_SAMPLE_RAW, PERF_GROUP_LEADER | PERF_GROUP_PER_CPU) {} + + void read(Buffer *const buffer, const int cpu) { + char buf[64]; + + snprintf(buf, sizeof(buf), "/sys/devices/system/cpu/cpu%i/cpufreq/cpuinfo_cur_freq", cpu); + int64_t freq; + if (DriverSource::readInt64Driver(buf, &freq) != 0) { + freq = 0; + } + buffer->perfCounter(cpu, getKey(), 1000*freq); + } + +private: + // Intentionally undefined + CPUFreqDriver(const CPUFreqDriver &); + CPUFreqDriver &operator=(const CPUFreqDriver &); }; PerfDriver::PerfDriver() : mIsSetup(false), mLegacySupport(false) { @@ -108,27 +139,32 @@ void PerfDriver::addCpuCounters(const char *const counterName, const int type, c int len = snprintf(NULL, 0, "%s_ccnt", counterName) + 1; char *name = new char[len]; snprintf(name, len, "%s_ccnt", counterName); - setCounters(new PerfCounter(getCounters(), name, type, -1, true)); + setCounters(new PerfCounter(getCounters(), name, type, -1, PERF_SAMPLE_READ, PERF_GROUP_PER_CPU | PERF_GROUP_CPU)); for (int j = 0; j < numCounters; ++j) { len = snprintf(NULL, 0, "%s_cnt%d", counterName, j) + 1; name = new char[len]; snprintf(name, len, "%s_cnt%d", counterName, j); - setCounters(new PerfCounter(getCounters(), name, type, -1, true)); + setCounters(new PerfCounter(getCounters(), name, type, -1, PERF_SAMPLE_READ, PERF_GROUP_PER_CPU | PERF_GROUP_CPU)); } } -void PerfDriver::addUncoreCounters(const char *const counterName, const int type, const int numCounters) { - int len = snprintf(NULL, 0, "%s_ccnt", counterName) + 1; - char *name = new char[len]; - snprintf(name, len, "%s_ccnt", counterName); - setCounters(new PerfCounter(getCounters(), name, type, -1, false)); +void PerfDriver::addUncoreCounters(const char *const counterName, const int type, const int numCounters, const bool hasCyclesCounter) { + int len; + char *name; + + if (hasCyclesCounter) { + len = snprintf(NULL, 0, "%s_ccnt", counterName) + 1; + name = new char[len]; + snprintf(name, len, "%s_ccnt", counterName); + setCounters(new PerfCounter(getCounters(), name, type, -1, PERF_SAMPLE_READ, 0)); + } for (int j = 0; j < numCounters; ++j) { len = snprintf(NULL, 0, "%s_cnt%d", counterName, j) + 1; name = new char[len]; snprintf(name, len, "%s_cnt%d", counterName, j); - setCounters(new PerfCounter(getCounters(), name, type, -1, false)); + setCounters(new PerfCounter(getCounters(), name, type, -1, PERF_SAMPLE_READ, 0)); } } @@ -136,18 +172,18 @@ bool PerfDriver::setup() { // Check the kernel version int release[3]; if (!getLinuxVersion(release)) { - logg->logMessage("%s(%s:%i): getLinuxVersion failed", __FUNCTION__, __FILE__, __LINE__); + logg->logMessage("getLinuxVersion failed"); return false; } if (KERNEL_VERSION(release[0], release[1], release[2]) < KERNEL_VERSION(3, 4, 0)) { - logg->logMessage("%s(%s:%i): Unsupported kernel version", __FUNCTION__, __FILE__, __LINE__); + logg->logMessage("Unsupported kernel version"); return false; } mLegacySupport = KERNEL_VERSION(release[0], release[1], release[2]) < KERNEL_VERSION(3, 12, 0); if (access(EVENTS_PATH, R_OK) != 0) { - logg->logMessage("%s(%s:%i): " EVENTS_PATH " does not exist, is CONFIG_TRACING and CONFIG_CONTEXT_SWITCH_TRACER enabled?", __FUNCTION__, __FILE__, __LINE__); + logg->logMessage(EVENTS_PATH " does not exist, is CONFIG_TRACING and CONFIG_CONTEXT_SWITCH_TRACER enabled?"); return false; } @@ -155,7 +191,7 @@ bool PerfDriver::setup() { bool foundCpu = false; DIR *dir = opendir(PERF_DEVICES); if (dir == NULL) { - logg->logMessage("%s(%s:%i): opendif failed", __FUNCTION__, __FILE__, __LINE__); + logg->logMessage("opendir failed"); return false; } @@ -198,7 +234,7 @@ bool PerfDriver::setup() { } logg->logMessage("Adding uncore counters for %s", uncore_counters[i].gatorName); - addUncoreCounters(uncore_counters[i].gatorName, type, uncore_counters[i].count); + addUncoreCounters(uncore_counters[i].gatorName, type, uncore_counters[i].count, uncore_counters[i].hasCyclesCounter); } } closedir(dir); @@ -216,13 +252,11 @@ bool PerfDriver::setup() { } } - /* if (!foundCpu) { - // If all else fails, use the perf architected counters - // 9 because that's how many are in events-Perf-Hardware.xml - assume they can all be enabled at once - addCpuCounters("Perf_Hardware", PERF_TYPE_HARDWARE, 9); + // If all else fails, use the ARM architected counters + logg->logMessage("Using Other cpu"); + addCpuCounters("Other", PERF_TYPE_RAW, 6); } - */ // Add supported software counters long long id; @@ -230,20 +264,25 @@ bool PerfDriver::setup() { id = getTracepointId("irq/softirq_exit", &printb); if (id >= 0) { - setCounters(new PerfCounter(getCounters(), "Linux_irq_softirq", PERF_TYPE_TRACEPOINT, id, true)); + setCounters(new PerfCounter(getCounters(), "Linux_irq_softirq", PERF_TYPE_TRACEPOINT, id, PERF_SAMPLE_READ, PERF_GROUP_PER_CPU | PERF_GROUP_CPU)); } id = getTracepointId("irq/irq_handler_exit", &printb); if (id >= 0) { - setCounters(new PerfCounter(getCounters(), "Linux_irq_irq", PERF_TYPE_TRACEPOINT, id, true)); + setCounters(new PerfCounter(getCounters(), "Linux_irq_irq", PERF_TYPE_TRACEPOINT, id, PERF_SAMPLE_READ, PERF_GROUP_PER_CPU | PERF_GROUP_CPU)); } id = getTracepointId(SCHED_SWITCH, &printb); if (id >= 0) { - setCounters(new PerfCounter(getCounters(), "Linux_sched_switch", PERF_TYPE_TRACEPOINT, id, true)); + setCounters(new PerfCounter(getCounters(), "Linux_sched_switch", PERF_TYPE_TRACEPOINT, id, PERF_SAMPLE_READ, PERF_GROUP_PER_CPU | PERF_GROUP_CPU)); } - setCounters(new PerfCounter(getCounters(), "Linux_cpu_wait_contention", TYPE_DERIVED, -1, false)); + id = getTracepointId(CPU_FREQUENCY, &printb); + if (id >= 0) { + setCounters(new CPUFreqDriver(getCounters(), id)); + } + + setCounters(new PerfCounter(getCounters(), "Linux_cpu_wait_contention", TYPE_DERIVED, -1, 0, 0)); //Linux_cpu_wait_io @@ -254,7 +293,7 @@ bool PerfDriver::setup() { bool PerfDriver::summary(Buffer *const buffer) { struct utsname utsname; if (uname(&utsname) != 0) { - logg->logMessage("%s(%s:%i): uname failed", __FUNCTION__, __FILE__, __LINE__); + logg->logMessage("uname failed"); return false; } @@ -263,25 +302,26 @@ bool PerfDriver::summary(Buffer *const buffer) { struct timespec ts; if (clock_gettime(CLOCK_REALTIME, &ts) != 0) { - logg->logMessage("%s(%s:%i): clock_gettime failed", __FUNCTION__, __FILE__, __LINE__); + logg->logMessage("clock_gettime failed"); return false; } const int64_t timestamp = (int64_t)ts.tv_sec * NS_PER_S + ts.tv_nsec; const uint64_t monotonicStarted = getTime(); gSessionData->mMonotonicStarted = monotonicStarted; + const uint64_t currTime = 0;//getTime() - gSessionData->mMonotonicStarted; - buffer->summary(monotonicStarted, timestamp, monotonicStarted, monotonicStarted, buf); + buffer->summary(currTime, timestamp, monotonicStarted, monotonicStarted, buf); for (int i = 0; i < gSessionData->mCores; ++i) { - coreName(monotonicStarted, buffer, i); + coreName(currTime, buffer, i); } - buffer->commit(monotonicStarted); + buffer->commit(currTime); return true; } -void PerfDriver::coreName(const uint32_t startTime, Buffer *const buffer, const int cpu) { +void PerfDriver::coreName(const uint64_t currTime, Buffer *const buffer, const int cpu) { // Don't send information on a cpu we know nothing about if (gSessionData->mCpuIds[cpu] == -1) { return; @@ -293,8 +333,8 @@ void PerfDriver::coreName(const uint32_t startTime, Buffer *const buffer, const break; } } - if (gator_cpus[j].cpuid == gSessionData->mCpuIds[cpu]) { - buffer->coreName(startTime, cpu, gSessionData->mCpuIds[cpu], gator_cpus[j].core_name); + if (j < ARRAY_LENGTH(gator_cpus) && gator_cpus[j].cpuid == gSessionData->mCpuIds[cpu]) { + buffer->coreName(currTime, cpu, gSessionData->mCpuIds[cpu], gator_cpus[j].core_name); } else { char buf[32]; if (gSessionData->mCpuIds[cpu] == -1) { @@ -302,7 +342,7 @@ void PerfDriver::coreName(const uint32_t startTime, Buffer *const buffer, const } else { snprintf(buf, sizeof(buf), "Unknown (0x%.3x)", gSessionData->mCpuIds[cpu]); } - buffer->coreName(startTime, cpu, gSessionData->mCpuIds[cpu], buf); + buffer->coreName(currTime, cpu, gSessionData->mCpuIds[cpu], buf); } } @@ -325,8 +365,17 @@ void PerfDriver::setupCounter(Counter &counter) { bool PerfDriver::enable(const uint64_t currTime, PerfGroup *const group, Buffer *const buffer) const { for (PerfCounter *counter = static_cast(getCounters()); counter != NULL; counter = static_cast(counter->getNext())) { if (counter->isEnabled() && (counter->getType() != TYPE_DERIVED)) { - if (!group->add(currTime, buffer, counter->getKey(), counter->getType(), counter->getConfig(), counter->getCount(), counter->getCount() > 0 ? PERF_SAMPLE_TID | PERF_SAMPLE_IP : 0, counter->isPerCpu() ? PERF_GROUP_PER_CPU : 0)) { - logg->logMessage("%s(%s:%i): PerfGroup::add failed", __FUNCTION__, __FILE__, __LINE__); + int count = counter->getCount(); + uint64_t sampleType = counter->getSampleType(); + if (sampleType & PERF_SAMPLE_RAW) { + // If raw is enabled, every sample is needed + count = 1; + } + if (!group->add(currTime, buffer, counter->getKey(), counter->getType(), counter->getConfig(), count, + // use getCount instead of count as EBS counters need TID and IP but RAW tracepoints don't + (counter->getCount() > 0 ? PERF_SAMPLE_TID | PERF_SAMPLE_IP : 0) | sampleType, + counter->getFlags())) { + logg->logMessage("PerfGroup::add failed"); return false; } } @@ -335,15 +384,24 @@ bool PerfDriver::enable(const uint64_t currTime, PerfGroup *const group, Buffer return true; } +void PerfDriver::read(Buffer *const buffer, const int cpu) { + for (PerfCounter *counter = static_cast(getCounters()); counter != NULL; counter = static_cast(counter->getNext())) { + if (!counter->isEnabled()) { + continue; + } + counter->read(buffer, cpu); + } +} + long long PerfDriver::getTracepointId(const char *const name, DynBuf *const printb) { if (!printb->printf(EVENTS_PATH "/%s/id", name)) { - logg->logMessage("%s(%s:%i): DynBuf::printf failed", __FUNCTION__, __FILE__, __LINE__); + logg->logMessage("DynBuf::printf failed"); return -1; } int64_t result; if (DriverSource::readInt64Driver(printb->getBuf(), &result) != 0) { - logg->logMessage("%s(%s:%i): DriverSource::readInt64Driver failed", __FUNCTION__, __FILE__, __LINE__); + logg->logMessage("DriverSource::readInt64Driver failed"); return -1; } diff --git a/tools/gator/daemon/PerfDriver.h b/tools/gator/daemon/PerfDriver.h index 846203a9e18b..95b42bfa30c0 100644 --- a/tools/gator/daemon/PerfDriver.h +++ b/tools/gator/daemon/PerfDriver.h @@ -1,5 +1,5 @@ /** - * Copyright (C) ARM Limited 2013-2014. All rights reserved. + * Copyright (C) ARM Limited 2013-2015. All rights reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -19,6 +19,7 @@ #define SCHED_SWITCH "sched/sched_switch" #define CPU_IDLE "power/cpu_idle" +#define CPU_FREQUENCY "power/cpu_frequency" class Buffer; class DynBuf; @@ -33,18 +34,19 @@ class PerfDriver : public SimpleDriver { bool setup(); bool summary(Buffer *const buffer); - void coreName(const uint32_t startTime, Buffer *const buffer, const int cpu); + void coreName(const uint64_t currTime, Buffer *const buffer, const int cpu); bool isSetup() const { return mIsSetup; } void setupCounter(Counter &counter); bool enable(const uint64_t currTime, PerfGroup *const group, Buffer *const buffer) const; + void read(Buffer *const buffer, const int cpu); static long long getTracepointId(const char *const name, DynBuf *const printb); private: void addCpuCounters(const char *const counterName, const int type, const int numCounters); - void addUncoreCounters(const char *const counterName, const int type, const int numCounters); + void addUncoreCounters(const char *const counterName, const int type, const int numCounters, const bool hasCyclesCounter); bool mIsSetup; bool mLegacySupport; diff --git a/tools/gator/daemon/PerfGroup.cpp b/tools/gator/daemon/PerfGroup.cpp index 4fd960a9058c..cfc62e4cc77e 100644 --- a/tools/gator/daemon/PerfGroup.cpp +++ b/tools/gator/daemon/PerfGroup.cpp @@ -1,5 +1,5 @@ /** - * Copyright (C) ARM Limited 2013-2014. All rights reserved. + * Copyright (C) ARM Limited 2013-2015. All rights reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -16,17 +16,21 @@ #include #include "Buffer.h" +#include "DynBuf.h" #include "Logging.h" #include "Monitor.h" #include "PerfBuffer.h" #include "SessionData.h" +static const int schedSwitchKey = getEventKey(); +static const int clockKey = getEventKey(); + #define DEFAULT_PEA_ARGS(pea, additionalSampleType) \ pea.size = sizeof(pea); \ /* Emit time, read_format below, group leader id, and raw tracepoint info */ \ pea.sample_type = (gSessionData->perf.getLegacySupport() \ - ? PERF_SAMPLE_TID | PERF_SAMPLE_IP | PERF_SAMPLE_TIME | PERF_SAMPLE_READ | PERF_SAMPLE_ID \ - : PERF_SAMPLE_TIME | PERF_SAMPLE_READ | PERF_SAMPLE_IDENTIFIER ) | additionalSampleType; \ + ? PERF_SAMPLE_TID | PERF_SAMPLE_IP | PERF_SAMPLE_ID \ + : PERF_SAMPLE_IDENTIFIER ) | PERF_SAMPLE_TIME | additionalSampleType; \ /* Emit emit value in group format */ \ pea.read_format = PERF_FORMAT_ID | PERF_FORMAT_GROUP; \ /* start out disabled */ \ @@ -49,11 +53,12 @@ static int sys_perf_event_open(struct perf_event_attr *const attr, const pid_t p return fd; } -PerfGroup::PerfGroup(PerfBuffer *const pb) : mPb(pb) { +PerfGroup::PerfGroup(PerfBuffer *const pb) : mPb(pb), mSchedSwitchId(-1) { memset(&mAttrs, 0, sizeof(mAttrs)); - memset(&mPerCpu, 0, sizeof(mPerCpu)); + memset(&mFlags, 0, sizeof(mFlags)); memset(&mKeys, -1, sizeof(mKeys)); memset(&mFds, -1, sizeof(mFds)); + memset(&mLeaders, -1, sizeof(mLeaders)); } PerfGroup::~PerfGroup() { @@ -64,7 +69,7 @@ PerfGroup::~PerfGroup() { } } -bool PerfGroup::add(const uint64_t currTime, Buffer *const buffer, const int key, const __u32 type, const __u64 config, const __u64 sample, const __u64 sampleType, const int flags) { +int PerfGroup::doAdd(const uint64_t currTime, Buffer *const buffer, const int key, const __u32 type, const __u64 config, const __u64 sample, const __u64 sampleType, const int flags) { int i; for (i = 0; i < ARRAY_LENGTH(mKeys); ++i) { if (mKeys[i] < 0) { @@ -73,8 +78,8 @@ bool PerfGroup::add(const uint64_t currTime, Buffer *const buffer, const int key } if (i >= ARRAY_LENGTH(mKeys)) { - logg->logMessage("%s(%s:%i): Too many counters", __FUNCTION__, __FILE__, __LINE__); - return false; + logg->logMessage("Too many counters"); + return -1; } DEFAULT_PEA_ARGS(mAttrs[i], sampleType); @@ -82,121 +87,230 @@ bool PerfGroup::add(const uint64_t currTime, Buffer *const buffer, const int key mAttrs[i].config = config; mAttrs[i].sample_period = sample; // always be on the CPU but only a group leader can be pinned - mAttrs[i].pinned = (i == 0 ? 1 : 0); + mAttrs[i].pinned = (flags & PERF_GROUP_LEADER ? 1 : 0); mAttrs[i].mmap = (flags & PERF_GROUP_MMAP ? 1 : 0); mAttrs[i].comm = (flags & PERF_GROUP_COMM ? 1 : 0); mAttrs[i].freq = (flags & PERF_GROUP_FREQ ? 1 : 0); mAttrs[i].task = (flags & PERF_GROUP_TASK ? 1 : 0); mAttrs[i].sample_id_all = (flags & PERF_GROUP_SAMPLE_ID_ALL ? 1 : 0); - mPerCpu[i] = (flags & PERF_GROUP_PER_CPU); + mFlags[i] = flags; mKeys[i] = key; - buffer->pea(currTime, &mAttrs[i], key); + buffer->marshalPea(currTime, &mAttrs[i], key); + + return i; +} + +/* Counters from different hardware PMUs need to be in different + * groups. Software counters can be in the same group as the CPU and + * should be marked as PERF_GROUP_CPU. The big and little clusters can + * be in the same group as only one or the other will be available on + * a given CPU. + */ +int PerfGroup::getEffectiveType(const int type, const int flags) { + const int effectiveType = flags & PERF_GROUP_CPU ? (int)PERF_TYPE_HARDWARE : type; + if (effectiveType >= ARRAY_LENGTH(mLeaders)) { + logg->logError("perf type is too large, please increase the size of PerfGroup::mLeaders"); + handleException(); + } + return effectiveType; +} + +bool PerfGroup::createCpuGroup(const uint64_t currTime, Buffer *const buffer) { + if (mSchedSwitchId < 0) { + DynBuf b; + mSchedSwitchId = PerfDriver::getTracepointId(SCHED_SWITCH, &b); + if (mSchedSwitchId < 0) { + logg->logMessage("Unable to read sched_switch id"); + return false; + } + } + + mLeaders[PERF_TYPE_HARDWARE] = doAdd(currTime, buffer, schedSwitchKey, PERF_TYPE_TRACEPOINT, mSchedSwitchId, 1, PERF_SAMPLE_READ | PERF_SAMPLE_RAW, PERF_GROUP_MMAP | PERF_GROUP_COMM | PERF_GROUP_TASK | PERF_GROUP_SAMPLE_ID_ALL | PERF_GROUP_PER_CPU | PERF_GROUP_LEADER | PERF_GROUP_CPU); + if (mLeaders[PERF_TYPE_HARDWARE] < 0) { + return false; + } + + if (gSessionData->mSampleRate > 0 && !gSessionData->mIsEBS && doAdd(currTime, buffer, clockKey, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_CPU_CLOCK, 1000000000UL / gSessionData->mSampleRate, PERF_SAMPLE_TID | PERF_SAMPLE_IP | PERF_SAMPLE_READ, PERF_GROUP_PER_CPU | PERF_GROUP_CPU) < 0) { + return false; + } return true; } +bool PerfGroup::add(const uint64_t currTime, Buffer *const buffer, const int key, const __u32 type, const __u64 config, const __u64 sample, const __u64 sampleType, const int flags) { + const int effectiveType = getEffectiveType(type, flags); + + // Does a group exist for this already? + if (!(flags & PERF_GROUP_LEADER) && mLeaders[effectiveType] < 0) { + // Create it + if (effectiveType == PERF_TYPE_HARDWARE) { + if (!createCpuGroup(currTime, buffer)) { + return false; + } + } else { + // Non-CPU PMUs are sampled every 100ms for Sample Rate: None and EBS, otherwise they would never be sampled + const uint64_t timeout = gSessionData->mSampleRate > 0 && !gSessionData->mIsEBS ? 1000000000UL / gSessionData->mSampleRate : 100000000UL; + // PERF_SAMPLE_TID | PERF_SAMPLE_IP aren't helpful on non-CPU or 'uncore' PMUs - which CPU is the right one to sample? But removing it causes problems, remove it later. + mLeaders[effectiveType] = doAdd(currTime, buffer, clockKey, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_CPU_CLOCK, timeout, PERF_SAMPLE_TID | PERF_SAMPLE_IP | PERF_SAMPLE_READ, PERF_GROUP_LEADER); + if (mLeaders[effectiveType] < 0) { + return false; + } + } + } + + if (!(flags & PERF_GROUP_LEADER) && effectiveType != PERF_TYPE_HARDWARE && (flags & PERF_GROUP_PER_CPU)) { + logg->logError("'uncore' counters are not permitted to be per-cpu"); + handleException(); + } + + return doAdd(currTime, buffer, key, type, config, sample, sampleType, flags) >= 0; +} + int PerfGroup::prepareCPU(const int cpu, Monitor *const monitor) { - logg->logMessage("%s(%s:%i): Onlining cpu %i", __FUNCTION__, __FILE__, __LINE__, cpu); + logg->logMessage("Onlining cpu %i", cpu); for (int i = 0; i < ARRAY_LENGTH(mKeys); ++i) { if (mKeys[i] < 0) { continue; } - if ((cpu != 0) && !mPerCpu[i]) { + if ((cpu != 0) && !(mFlags[i] & PERF_GROUP_PER_CPU)) { continue; } - const int offset = i * gSessionData->mCores; - if (mFds[cpu + offset] >= 0) { - logg->logMessage("%s(%s:%i): cpu already online or not correctly cleaned up", __FUNCTION__, __FILE__, __LINE__); + const int offset = i * gSessionData->mCores + cpu; + if (mFds[offset] >= 0) { + logg->logMessage("cpu already online or not correctly cleaned up"); return PG_FAILURE; } - logg->logMessage("%s(%s:%i): perf_event_open cpu: %i type: %lli config: %lli sample: %lli sample_type: 0x%llx pinned: %i mmap: %i comm: %i freq: %i task: %i sample_id_all: %i", __FUNCTION__, __FILE__, __LINE__, cpu, (long long)mAttrs[i].type, (long long)mAttrs[i].config, (long long)mAttrs[i].sample_period, (long long)mAttrs[i].sample_type, mAttrs[i].pinned, mAttrs[i].mmap, mAttrs[i].comm, mAttrs[i].freq, mAttrs[i].task, mAttrs[i].sample_id_all); - mFds[cpu + offset] = sys_perf_event_open(&mAttrs[i], -1, cpu, i == 0 ? -1 : mFds[cpu], i == 0 ? 0 : PERF_FLAG_FD_OUTPUT); - if (mFds[cpu + offset] < 0) { - logg->logMessage("%s(%s:%i): failed %s", __FUNCTION__, __FILE__, __LINE__, strerror(errno)); + logg->logMessage("perf_event_open cpu: %i type: %i config: %lli sample: %lli sample_type: 0x%llx pinned: %lli mmap: %lli comm: %lli freq: %lli task: %lli sample_id_all: %lli", cpu, mAttrs[i].type, mAttrs[i].config, mAttrs[i].sample_period, mAttrs[i].sample_type, mAttrs[i].pinned, mAttrs[i].mmap, mAttrs[i].comm, mAttrs[i].freq, mAttrs[i].task, mAttrs[i].sample_id_all); + mFds[offset] = sys_perf_event_open(&mAttrs[i], -1, cpu, mAttrs[i].pinned ? -1 : mFds[mLeaders[getEffectiveType(mAttrs[i].type, mFlags[i])] * gSessionData->mCores + cpu], mAttrs[i].pinned ? 0 : PERF_FLAG_FD_OUTPUT); + if (mFds[offset] < 0) { + logg->logMessage("failed %s", strerror(errno)); if (errno == ENODEV) { + // The core is offline return PG_CPU_OFFLINE; } +#ifndef USE_STRICTER_CHECK continue; +#else + if (errno == ENOENT) { + // This event doesn't apply to this CPU but should apply to a different one, ex bL + continue; + } + logg->logMessage("perf_event_open failed"); + return PG_FAILURE; +#endif } - if (!mPb->useFd(cpu, mFds[cpu + offset])) { - logg->logMessage("%s(%s:%i): PerfBuffer::useFd failed", __FUNCTION__, __FILE__, __LINE__); + if (!mPb->useFd(cpu, mFds[offset])) { + logg->logMessage("PerfBuffer::useFd failed"); return PG_FAILURE; } - if (!monitor->add(mFds[cpu + offset])) { - logg->logMessage("%s(%s:%i): Monitor::add failed", __FUNCTION__, __FILE__, __LINE__); - return PG_FAILURE; + if (!monitor->add(mFds[offset])) { + logg->logMessage("Monitor::add failed"); + return PG_FAILURE; } } return PG_SUCCESS; } -int PerfGroup::onlineCPU(const uint64_t currTime, const int cpu, const bool start, Buffer *const buffer) { - __u64 ids[ARRAY_LENGTH(mKeys)]; - int coreKeys[ARRAY_LENGTH(mKeys)]; - int idCount = 0; - - for (int i = 0; i < ARRAY_LENGTH(mKeys); ++i) { - const int fd = mFds[cpu + i * gSessionData->mCores]; - if (fd < 0) { - continue; - } - - coreKeys[idCount] = mKeys[i]; - if (!gSessionData->perf.getLegacySupport() && ioctl(fd, PERF_EVENT_IOC_ID, &ids[idCount]) != 0 && - // Workaround for running 32-bit gatord on 64-bit systems, kernel patch in the works - ioctl(fd, (PERF_EVENT_IOC_ID & ~IOCSIZE_MASK) | (8 << _IOC_SIZESHIFT), &ids[idCount]) != 0) { - logg->logMessage("%s(%s:%i): ioctl failed", __FUNCTION__, __FILE__, __LINE__); - return 0; - } - ++idCount; +static bool readAndSend(const uint64_t currTime, Buffer *const buffer, const int fd, const int keyCount, const int *const keys) { + char buf[1024]; + ssize_t bytes = read(fd, buf, sizeof(buf)); + if (bytes < 0) { + logg->logMessage("read failed"); + return false; } + buffer->marshalKeysOld(currTime, keyCount, keys, bytes, buf); + + return true; +} + +int PerfGroup::onlineCPU(const uint64_t currTime, const int cpu, const bool enable, Buffer *const buffer) { + bool addedEvents = false; if (!gSessionData->perf.getLegacySupport()) { - buffer->keys(currTime, idCount, ids, coreKeys); - } else { - char buf[1024]; - ssize_t bytes = read(mFds[cpu], buf, sizeof(buf)); - if (bytes < 0) { - logg->logMessage("read failed"); - return 0; + int idCount = 0; + int coreKeys[ARRAY_LENGTH(mKeys)]; + __u64 ids[ARRAY_LENGTH(mKeys)]; + + for (int i = 0; i < ARRAY_LENGTH(mKeys); ++i) { + const int fd = mFds[cpu + i * gSessionData->mCores]; + if (fd < 0) { + continue; + } + + coreKeys[idCount] = mKeys[i]; + if (ioctl(fd, PERF_EVENT_IOC_ID, &ids[idCount]) != 0 && + // Workaround for running 32-bit gatord on 64-bit systems, kernel patch in the works + ioctl(fd, (PERF_EVENT_IOC_ID & ~IOCSIZE_MASK) | (8 << _IOC_SIZESHIFT), &ids[idCount]) != 0) { + logg->logMessage("ioctl failed"); + return 0; + } + ++idCount; + addedEvents = true; + } + + buffer->marshalKeys(currTime, idCount, ids, coreKeys); + } else { + int idCounts[ARRAY_LENGTH(mLeaders)] = { 0 }; + int coreKeys[ARRAY_LENGTH(mLeaders)][ARRAY_LENGTH(mKeys)]; + for (int i = 0; i < ARRAY_LENGTH(mKeys); ++i) { + const int fd = mFds[cpu + i * gSessionData->mCores]; + if (fd < 0) { + continue; + } + + const int effectiveType = getEffectiveType(mAttrs[i].type, mFlags[i]); + if (mAttrs[i].pinned && mLeaders[effectiveType] != i) { + if (!readAndSend(currTime, buffer, fd, 1, mKeys + i)) { + return 0; + } + } else { + coreKeys[effectiveType][idCounts[effectiveType]] = mKeys[i]; + ++idCounts[effectiveType]; + addedEvents = true; + } + } + + for (int i = 0; i < ARRAY_LENGTH(mLeaders); ++i) { + if (idCounts[i] > 0 && !readAndSend(currTime, buffer, mFds[mLeaders[i] * gSessionData->mCores + cpu], idCounts[i], coreKeys[i])) { + return 0; + } } - buffer->keysOld(currTime, idCount, coreKeys, bytes, buf); } - if (start) { + if (enable) { for (int i = 0; i < ARRAY_LENGTH(mKeys); ++i) { int offset = i * gSessionData->mCores + cpu; if (mFds[offset] >= 0 && ioctl(mFds[offset], PERF_EVENT_IOC_ENABLE, 0) < 0) { - logg->logMessage("%s(%s:%i): ioctl failed", __FUNCTION__, __FILE__, __LINE__); + logg->logMessage("ioctl failed"); return 0; } } } - if (idCount == 0) { - logg->logMessage("%s(%s:%i): no events came online", __FUNCTION__, __FILE__, __LINE__); + if (!addedEvents) { + logg->logMessage("no events came online"); } - return idCount; + return 1; } bool PerfGroup::offlineCPU(const int cpu) { - logg->logMessage("%s(%s:%i): Offlining cpu %i", __FUNCTION__, __FILE__, __LINE__, cpu); + logg->logMessage("Offlining cpu %i", cpu); - for (int i = 0; i < ARRAY_LENGTH(mKeys); ++i) { + for (int i = ARRAY_LENGTH(mKeys) - 1; i >= 0; --i) { int offset = i * gSessionData->mCores + cpu; if (mFds[offset] >= 0 && ioctl(mFds[offset], PERF_EVENT_IOC_DISABLE, 0) < 0) { - logg->logMessage("%s(%s:%i): ioctl failed", __FUNCTION__, __FILE__, __LINE__); + logg->logMessage("ioctl failed"); return false; } } @@ -204,7 +318,7 @@ bool PerfGroup::offlineCPU(const int cpu) { // Mark the buffer so that it will be released next time it's read mPb->discard(cpu); - for (int i = 0; i < ARRAY_LENGTH(mKeys); ++i) { + for (int i = ARRAY_LENGTH(mKeys) - 1; i >= 0; --i) { if (mKeys[i] < 0) { continue; } @@ -222,7 +336,7 @@ bool PerfGroup::offlineCPU(const int cpu) { bool PerfGroup::start() { for (int pos = 0; pos < ARRAY_LENGTH(mFds); ++pos) { if (mFds[pos] >= 0 && ioctl(mFds[pos], PERF_EVENT_IOC_ENABLE, 0) < 0) { - logg->logMessage("%s(%s:%i): ioctl failed", __FUNCTION__, __FILE__, __LINE__); + logg->logMessage("ioctl failed"); goto fail; } } diff --git a/tools/gator/daemon/PerfGroup.h b/tools/gator/daemon/PerfGroup.h index f7b3d725bac7..f30d3a6a9c68 100644 --- a/tools/gator/daemon/PerfGroup.h +++ b/tools/gator/daemon/PerfGroup.h @@ -1,5 +1,5 @@ /** - * Copyright (C) ARM Limited 2013-2014. All rights reserved. + * Copyright (C) ARM Limited 2013-2015. All rights reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -27,6 +27,8 @@ enum PerfGroupFlags { PERF_GROUP_TASK = 1 << 3, PERF_GROUP_SAMPLE_ID_ALL = 1 << 4, PERF_GROUP_PER_CPU = 1 << 5, + PERF_GROUP_LEADER = 1 << 6, + PERF_GROUP_CPU = 1 << 7, }; enum { @@ -40,22 +42,29 @@ class PerfGroup { PerfGroup(PerfBuffer *const pb); ~PerfGroup(); + bool createCpuGroup(const uint64_t currTime, Buffer *const buffer); bool add(const uint64_t currTime, Buffer *const buffer, const int key, const __u32 type, const __u64 config, const __u64 sample, const __u64 sampleType, const int flags); // Safe to call concurrently int prepareCPU(const int cpu, Monitor *const monitor); // Not safe to call concurrently. Returns the number of events enabled - int onlineCPU(const uint64_t currTime, const int cpu, const bool start, Buffer *const buffer); + int onlineCPU(const uint64_t currTime, const int cpu, const bool enable, Buffer *const buffer); bool offlineCPU(int cpu); bool start(); void stop(); private: - // +1 for the group leader - struct perf_event_attr mAttrs[MAX_PERFORMANCE_COUNTERS + 1]; - bool mPerCpu[MAX_PERFORMANCE_COUNTERS + 1]; - int mKeys[MAX_PERFORMANCE_COUNTERS + 1]; - int mFds[NR_CPUS * (MAX_PERFORMANCE_COUNTERS + 1)]; + int getEffectiveType(const int type, const int flags); + int doAdd(const uint64_t currTime, Buffer *const buffer, const int key, const __u32 type, const __u64 config, const __u64 sample, const __u64 sampleType, const int flags); + + // 2* to be conservative for sched_switch, cpu_idle, hrtimer and non-CPU groups + struct perf_event_attr mAttrs[2*MAX_PERFORMANCE_COUNTERS]; PerfBuffer *const mPb; + int mFlags[2*MAX_PERFORMANCE_COUNTERS]; + int mKeys[2*MAX_PERFORMANCE_COUNTERS]; + int mFds[NR_CPUS * (2*MAX_PERFORMANCE_COUNTERS)]; + // Offset in mAttrs, mFlags and mKeys of the group leaders for each perf type + int mLeaders[16]; + int mSchedSwitchId; // Intentionally undefined PerfGroup(const PerfGroup &); diff --git a/tools/gator/daemon/PerfSource.cpp b/tools/gator/daemon/PerfSource.cpp index 193b7789a290..2c45de8e06e9 100644 --- a/tools/gator/daemon/PerfSource.cpp +++ b/tools/gator/daemon/PerfSource.cpp @@ -1,5 +1,5 @@ /** - * Copyright (C) ARM Limited 2010-2014. All rights reserved. + * Copyright (C) ARM Limited 2010-2015. All rights reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -31,16 +31,18 @@ extern Child *child; +static const int cpuIdleKey = getEventKey(); + static bool sendTracepointFormat(const uint64_t currTime, Buffer *const buffer, const char *const name, DynBuf *const printb, DynBuf *const b) { if (!printb->printf(EVENTS_PATH "/%s/format", name)) { - logg->logMessage("%s(%s:%i): DynBuf::printf failed", __FUNCTION__, __FILE__, __LINE__); + logg->logMessage("DynBuf::printf failed"); return false; } if (!b->read(printb->getBuf())) { - logg->logMessage("%s(%s:%i): DynBuf::read failed", __FUNCTION__, __FILE__, __LINE__); + logg->logMessage("DynBuf::read failed"); return false; } - buffer->format(currTime, b->getLength(), b->getBuf()); + buffer->marshalFormat(currTime, b->getLength(), b->getBuf()); return true; } @@ -58,18 +60,18 @@ static void *syncFunc(void *arg) { sigset_t set; if (sigfillset(&set) != 0) { - logg->logError(__FILE__, __LINE__, "sigfillset failed"); + logg->logError("sigfillset failed"); handleException(); } if ((err = pthread_sigmask(SIG_SETMASK, &set, NULL)) != 0) { - logg->logError(__FILE__, __LINE__, "pthread_sigmask failed"); + logg->logError("pthread_sigmask failed"); handleException(); } } for (;;) { if (clock_gettime(CLOCK_MONOTONIC_RAW, &ts) != 0) { - logg->logError(__FILE__, __LINE__, "clock_gettime failed"); + logg->logError("clock_gettime failed"); handleException(); } const int64_t currTime = ts.tv_sec * NS_PER_S + ts.tv_nsec; @@ -95,7 +97,7 @@ static void *syncFunc(void *arg) static long getMaxCoreNum() { DIR *dir = opendir("/sys/devices/system/cpu"); if (dir == NULL) { - logg->logError(__FILE__, __LINE__, "Unable to determine the number of cores on the target, opendir failed"); + logg->logError("Unable to determine the number of cores on the target, opendir failed"); handleException(); } @@ -114,22 +116,22 @@ static long getMaxCoreNum() { closedir(dir); if (maxCoreNum < 1) { - logg->logError(__FILE__, __LINE__, "Unable to determine the number of cores on the target, no cpu# directories found"); + logg->logError("Unable to determine the number of cores on the target, no cpu# directories found"); handleException(); } if (maxCoreNum >= NR_CPUS) { - logg->logError(__FILE__, __LINE__, "Too many cores on the target, please increase NR_CPUS in Config.h"); + logg->logError("Too many cores on the target, please increase NR_CPUS in Config.h"); handleException(); } return maxCoreNum; } -PerfSource::PerfSource(sem_t *senderSem, sem_t *startProfile) : mSummary(0, FRAME_SUMMARY, 1024, senderSem), mBuffer(0, FRAME_PERF_ATTRS, 1024*1024, senderSem), mCountersBuf(), mCountersGroup(&mCountersBuf), mIdleGroup(&mCountersBuf), mMonitor(), mUEvent(), mSenderSem(senderSem), mStartProfile(startProfile), mInterruptFd(-1), mIsDone(false) { +PerfSource::PerfSource(sem_t *senderSem, sem_t *startProfile) : mSummary(0, FRAME_SUMMARY, 1024, senderSem), mBuffer(NULL), mCountersBuf(), mCountersGroup(&mCountersBuf), mMonitor(), mUEvent(), mSenderSem(senderSem), mStartProfile(startProfile), mInterruptFd(-1), mIsDone(false) { long l = sysconf(_SC_PAGE_SIZE); if (l < 0) { - logg->logError(__FILE__, __LINE__, "Unable to obtain the page size"); + logg->logError("Unable to obtain the page size"); handleException(); } gSessionData->mPageSize = static_cast(l); @@ -137,15 +139,18 @@ PerfSource::PerfSource(sem_t *senderSem, sem_t *startProfile) : mSummary(0, FRAM } PerfSource::~PerfSource() { + delete mBuffer; } bool PerfSource::prepare() { DynBuf printb; DynBuf b1; - long long schedSwitchId; long long cpuIdleId; - const uint64_t currTime = getTime(); + // MonotonicStarted has not yet been assigned! + const uint64_t currTime = 0;//getTime() - gSessionData->mMonotonicStarted; + + mBuffer = new Buffer(0, FRAME_PERF_ATTRS, gSessionData->mTotalBufferSize*1024*1024, mSenderSem); // Reread cpuinfo since cores may have changed since startup gSessionData->readCpuInfo(); @@ -155,72 +160,59 @@ bool PerfSource::prepare() { || !mUEvent.init() || !mMonitor.add(mUEvent.getFd()) - || (schedSwitchId = PerfDriver::getTracepointId(SCHED_SWITCH, &printb)) < 0 - || !sendTracepointFormat(currTime, &mBuffer, SCHED_SWITCH, &printb, &b1) + || !sendTracepointFormat(currTime, mBuffer, SCHED_SWITCH, &printb, &b1) || (cpuIdleId = PerfDriver::getTracepointId(CPU_IDLE, &printb)) < 0 - || !sendTracepointFormat(currTime, &mBuffer, CPU_IDLE, &printb, &b1) + || !sendTracepointFormat(currTime, mBuffer, CPU_IDLE, &printb, &b1) - // Only want RAW but not IP on sched_switch and don't want TID on SAMPLE_ID - || !mCountersGroup.add(currTime, &mBuffer, 100/**/, PERF_TYPE_TRACEPOINT, schedSwitchId, 1, PERF_SAMPLE_RAW, PERF_GROUP_MMAP | PERF_GROUP_COMM | PERF_GROUP_TASK | PERF_GROUP_SAMPLE_ID_ALL | PERF_GROUP_PER_CPU) - || !mIdleGroup.add(currTime, &mBuffer, 101/**/, PERF_TYPE_TRACEPOINT, cpuIdleId, 1, PERF_SAMPLE_RAW, PERF_GROUP_PER_CPU) + || !sendTracepointFormat(currTime, mBuffer, CPU_FREQUENCY, &printb, &b1) - // Only want TID and IP but not RAW on timer - || (gSessionData->mSampleRate > 0 && !gSessionData->mIsEBS && !mCountersGroup.add(currTime, &mBuffer, 102/**/, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_CPU_CLOCK, 1000000000UL / gSessionData->mSampleRate, PERF_SAMPLE_TID | PERF_SAMPLE_IP, PERF_GROUP_PER_CPU)) + || !mCountersGroup.createCpuGroup(currTime, mBuffer) + || !mCountersGroup.add(currTime, mBuffer, cpuIdleKey, PERF_TYPE_TRACEPOINT, cpuIdleId, 1, PERF_SAMPLE_RAW, PERF_GROUP_LEADER | PERF_GROUP_PER_CPU) - || !gSessionData->perf.enable(currTime, &mCountersGroup, &mBuffer) + || !gSessionData->perf.enable(currTime, &mCountersGroup, mBuffer) || 0) { - logg->logMessage("%s(%s:%i): perf setup failed, are you running Linux 3.4 or later?", __FUNCTION__, __FILE__, __LINE__); + logg->logMessage("perf setup failed, are you running Linux 3.4 or later?"); return false; } for (int cpu = 0; cpu < gSessionData->mCores; ++cpu) { const int result = mCountersGroup.prepareCPU(cpu, &mMonitor); if ((result != PG_SUCCESS) && (result != PG_CPU_OFFLINE)) { - logg->logError(__FILE__, __LINE__, "PerfGroup::prepareCPU on mCountersGroup failed"); - handleException(); - } - } - for (int cpu = 0; cpu < gSessionData->mCores; ++cpu) { - const int result = mIdleGroup.prepareCPU(cpu, &mMonitor); - if ((result != PG_SUCCESS) && (result != PG_CPU_OFFLINE)) { - logg->logError(__FILE__, __LINE__, "PerfGroup::prepareCPU on mIdleGroup failed"); + logg->logError("PerfGroup::prepareCPU on mCountersGroup failed"); handleException(); } } int numEvents = 0; for (int cpu = 0; cpu < gSessionData->mCores; ++cpu) { - numEvents += mCountersGroup.onlineCPU(currTime, cpu, false, &mBuffer); - } - for (int cpu = 0; cpu < gSessionData->mCores; ++cpu) { - numEvents += mIdleGroup.onlineCPU(currTime, cpu, false, &mBuffer); + numEvents += mCountersGroup.onlineCPU(currTime, cpu, false, mBuffer); } if (numEvents <= 0) { - logg->logMessage("%s(%s:%i): PerfGroup::onlineCPU failed on all cores", __FUNCTION__, __FILE__, __LINE__); + logg->logMessage("PerfGroup::onlineCPU failed on all cores"); return false; } // Send the summary right before the start so that the monotonic delta is close to the start time if (!gSessionData->perf.summary(&mSummary)) { - logg->logError(__FILE__, __LINE__, "PerfDriver::summary failed", __FUNCTION__, __FILE__, __LINE__); - handleException(); + logg->logError("PerfDriver::summary failed"); + handleException(); } // Start the timer thread to used to sync perf and monotonic raw times pthread_t syncThread; if (pthread_create(&syncThread, NULL, syncFunc, NULL)) { - logg->logError(__FILE__, __LINE__, "pthread_create failed", __FUNCTION__, __FILE__, __LINE__); - handleException(); + logg->logError("pthread_create failed"); + handleException(); } struct sched_param param; param.sched_priority = sched_get_priority_max(SCHED_FIFO); if (pthread_setschedparam(syncThread, SCHED_FIFO | SCHED_RESET_ON_FORK, ¶m) != 0) { - logg->logError(__FILE__, __LINE__, "pthread_setschedparam failed"); - handleException(); + logg->logError("pthread_setschedparam failed"); + handleException(); } - mBuffer.commit(currTime); + mBuffer->commit(currTime); return true; } @@ -240,18 +232,17 @@ void *procFunc(void *arg) { // Gator runs at a high priority, reset the priority to the default if (setpriority(PRIO_PROCESS, syscall(__NR_gettid), 0) == -1) { - logg->logError(__FILE__, __LINE__, "setpriority failed"); + logg->logError("setpriority failed"); handleException(); } if (!readProcMaps(args->mCurrTime, args->mBuffer, &printb, &b)) { - logg->logError(__FILE__, __LINE__, "readProcMaps failed"); + logg->logError("readProcMaps failed"); handleException(); } - args->mBuffer->commit(args->mCurrTime); if (!readKallsyms(args->mCurrTime, args->mBuffer, &args->mIsDone)) { - logg->logError(__FILE__, __LINE__, "readKallsyms failed"); + logg->logError("readKallsyms failed"); handleException(); } args->mBuffer->commit(args->mCurrTime); @@ -266,67 +257,72 @@ void PerfSource::run() { pthread_t procThread; ProcThreadArgs procThreadArgs; - { - DynBuf printb; - DynBuf b1; - DynBuf b2; - - const uint64_t currTime = getTime(); - - // Start events before reading proc to avoid race conditions - if (!mCountersGroup.start() || !mIdleGroup.start()) { - logg->logError(__FILE__, __LINE__, "PerfGroup::start failed", __FUNCTION__, __FILE__, __LINE__); - handleException(); - } - - if (!readProcComms(currTime, &mBuffer, &printb, &b1, &b2)) { - logg->logError(__FILE__, __LINE__, "readProcComms failed"); - handleException(); - } - mBuffer.commit(currTime); - - // Postpone reading kallsyms as on android adb gets too backed up and data is lost - procThreadArgs.mBuffer = &mBuffer; - procThreadArgs.mCurrTime = currTime; - procThreadArgs.mIsDone = false; - if (pthread_create(&procThread, NULL, procFunc, &procThreadArgs)) { - logg->logError(__FILE__, __LINE__, "pthread_create failed", __FUNCTION__, __FILE__, __LINE__); - handleException(); - } - } - if (pipe_cloexec(pipefd) != 0) { - logg->logError(__FILE__, __LINE__, "pipe failed"); + logg->logError("pipe failed"); handleException(); } mInterruptFd = pipefd[1]; if (!mMonitor.add(pipefd[0])) { - logg->logError(__FILE__, __LINE__, "Monitor::add failed"); + logg->logError("Monitor::add failed"); handleException(); } - int timeout = -1; - if (gSessionData->mLiveRate > 0) { - timeout = gSessionData->mLiveRate/NS_PER_MS; + { + DynBuf printb; + DynBuf b1; + DynBuf b2; + + const uint64_t currTime = getTime() - gSessionData->mMonotonicStarted; + + // Start events before reading proc to avoid race conditions + if (!mCountersGroup.start()) { + logg->logError("PerfGroup::start failed"); + handleException(); + } + + mBuffer->perfCounterHeader(currTime); + for (int cpu = 0; cpu < gSessionData->mCores; ++cpu) { + gSessionData->perf.read(mBuffer, cpu); + } + mBuffer->perfCounterFooter(currTime); + + if (!readProcComms(currTime, mBuffer, &printb, &b1, &b2)) { + logg->logError("readProcComms failed"); + handleException(); + } + mBuffer->commit(currTime); + + // Postpone reading kallsyms as on android adb gets too backed up and data is lost + procThreadArgs.mBuffer = mBuffer; + procThreadArgs.mCurrTime = currTime; + procThreadArgs.mIsDone = false; + if (pthread_create(&procThread, NULL, procFunc, &procThreadArgs)) { + logg->logError("pthread_create failed"); + handleException(); + } } sem_post(mStartProfile); + const uint64_t NO_RATE = ~0ULL; + const uint64_t rate = gSessionData->mLiveRate > 0 && gSessionData->mSampleRate > 0 ? gSessionData->mLiveRate : NO_RATE; + uint64_t nextTime = 0; + int timeout = rate != NO_RATE ? 0 : -1; while (gSessionData->mSessionIsActive) { // +1 for uevents, +1 for pipe struct epoll_event events[NR_CPUS + 2]; int ready = mMonitor.wait(events, ARRAY_LENGTH(events), timeout); if (ready < 0) { - logg->logError(__FILE__, __LINE__, "Monitor::wait failed"); + logg->logError("Monitor::wait failed"); handleException(); } - const uint64_t currTime = getTime(); + const uint64_t currTime = getTime() - gSessionData->mMonotonicStarted; for (int i = 0; i < ready; ++i) { if (events[i].data.fd == mUEvent.getFd()) { if (!handleUEvent(currTime)) { - logg->logError(__FILE__, __LINE__, "PerfSource::handleUEvent failed"); + logg->logError("PerfSource::handleUEvent failed"); handleException(); } break; @@ -337,18 +333,24 @@ void PerfSource::run() { sem_post(mSenderSem); // In one shot mode, stop collection once all the buffers are filled - // Assume timeout == 0 in this case - if (gSessionData->mOneShot && gSessionData->mSessionIsActive) { - logg->logMessage("%s(%s:%i): One shot", __FUNCTION__, __FILE__, __LINE__); + if (gSessionData->mOneShot && gSessionData->mSessionIsActive && ((mSummary.bytesAvailable() <= 0) || (mBuffer->bytesAvailable() <= 0) || mCountersBuf.isFull())) { + logg->logMessage("One shot (perf)"); child->endSession(); } + + if (rate != NO_RATE) { + while (currTime > nextTime) { + nextTime += rate; + } + // + NS_PER_MS - 1 to ensure always rounding up + timeout = max(0, (int)((nextTime + NS_PER_MS - 1 - getTime() + gSessionData->mMonotonicStarted)/NS_PER_MS)); + } } procThreadArgs.mIsDone = true; pthread_join(procThread, NULL); - mIdleGroup.stop(); mCountersGroup.stop(); - mBuffer.setDone(); + mBuffer->setDone(); mIsDone = true; // send a notification that data is ready @@ -362,57 +364,53 @@ void PerfSource::run() { bool PerfSource::handleUEvent(const uint64_t currTime) { UEventResult result; if (!mUEvent.read(&result)) { - logg->logMessage("%s(%s:%i): UEvent::Read failed", __FUNCTION__, __FILE__, __LINE__); + logg->logMessage("UEvent::Read failed"); return false; } if (strcmp(result.mSubsystem, "cpu") == 0) { if (strncmp(result.mDevPath, CPU_DEVPATH, sizeof(CPU_DEVPATH) - 1) != 0) { - logg->logMessage("%s(%s:%i): Unexpected cpu DEVPATH format", __FUNCTION__, __FILE__, __LINE__); + logg->logMessage("Unexpected cpu DEVPATH format"); return false; } char *endptr; errno = 0; int cpu = strtol(result.mDevPath + sizeof(CPU_DEVPATH) - 1, &endptr, 10); if (errno != 0 || *endptr != '\0') { - logg->logMessage("%s(%s:%i): strtol failed", __FUNCTION__, __FILE__, __LINE__); + logg->logMessage("strtol failed"); return false; } if (cpu >= gSessionData->mCores) { - logg->logError(__FILE__, __LINE__, "Only %i cores are expected but core %i reports %s", gSessionData->mCores, cpu, result.mAction); + logg->logError("Only %i cores are expected but core %i reports %s", gSessionData->mCores, cpu, result.mAction); handleException(); } if (strcmp(result.mAction, "online") == 0) { - mBuffer.onlineCPU(currTime, currTime - gSessionData->mMonotonicStarted, cpu); + mBuffer->onlineCPU(currTime, cpu); // Only call onlineCPU if prepareCPU succeeded - bool result = false; + bool ret = false; int err = mCountersGroup.prepareCPU(cpu, &mMonitor); if (err == PG_CPU_OFFLINE) { - result = true; + ret = true; } else if (err == PG_SUCCESS) { - if (mCountersGroup.onlineCPU(currTime, cpu, true, &mBuffer)) { - err = mIdleGroup.prepareCPU(cpu, &mMonitor); - if (err == PG_CPU_OFFLINE) { - result = true; - } else if (err == PG_SUCCESS) { - if (mIdleGroup.onlineCPU(currTime, cpu, true, &mBuffer)) { - result = true; - } - } + if (mCountersGroup.onlineCPU(currTime, cpu, true, mBuffer) > 0) { + mBuffer->perfCounterHeader(currTime); + gSessionData->perf.read(mBuffer, cpu); + mBuffer->perfCounterFooter(currTime); + ret = true; } } - mBuffer.commit(currTime); + mBuffer->commit(currTime); gSessionData->readCpuInfo(); gSessionData->perf.coreName(currTime, &mSummary, cpu); mSummary.commit(currTime); - return result; + return ret; } else if (strcmp(result.mAction, "offline") == 0) { - const bool result = mCountersGroup.offlineCPU(cpu) && mIdleGroup.offlineCPU(cpu); - mBuffer.offlineCPU(currTime, currTime - gSessionData->mMonotonicStarted, cpu); - return result; + const bool ret = mCountersGroup.offlineCPU(cpu); + mBuffer->offlineCPU(currTime, cpu); + return ret; } } @@ -424,14 +422,14 @@ void PerfSource::interrupt() { int8_t c = 0; // Write to the pipe to wake the monitor which will cause mSessionIsActive to be reread if (::write(mInterruptFd, &c, sizeof(c)) != sizeof(c)) { - logg->logError(__FILE__, __LINE__, "write failed"); + logg->logError("write failed"); handleException(); } } } bool PerfSource::isDone () { - return mBuffer.isDone() && mIsDone && mCountersBuf.isEmpty(); + return mBuffer->isDone() && mIsDone && mCountersBuf.isEmpty(); } void PerfSource::write (Sender *sender) { @@ -439,11 +437,11 @@ void PerfSource::write (Sender *sender) { mSummary.write(sender); gSessionData->mSentSummary = true; } - if (!mBuffer.isDone()) { - mBuffer.write(sender); + if (!mBuffer->isDone()) { + mBuffer->write(sender); } if (!mCountersBuf.send(sender)) { - logg->logError(__FILE__, __LINE__, "PerfBuffer::send failed"); + logg->logError("PerfBuffer::send failed"); handleException(); } } diff --git a/tools/gator/daemon/PerfSource.h b/tools/gator/daemon/PerfSource.h index ce1eafe8e953..feec1c269922 100644 --- a/tools/gator/daemon/PerfSource.h +++ b/tools/gator/daemon/PerfSource.h @@ -1,5 +1,5 @@ /** - * Copyright (C) ARM Limited 2010-2014. All rights reserved. + * Copyright (C) ARM Limited 2010-2015. All rights reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -36,10 +36,9 @@ class PerfSource : public Source { bool handleUEvent(const uint64_t currTime); Buffer mSummary; - Buffer mBuffer; + Buffer *mBuffer; PerfBuffer mCountersBuf; PerfGroup mCountersGroup; - PerfGroup mIdleGroup; Monitor mMonitor; UEvent mUEvent; sem_t *const mSenderSem; diff --git a/tools/gator/daemon/Proc.cpp b/tools/gator/daemon/Proc.cpp index e6b26b1199fa..4ba59b632836 100644 --- a/tools/gator/daemon/Proc.cpp +++ b/tools/gator/daemon/Proc.cpp @@ -1,5 +1,5 @@ /** - * Copyright (C) ARM Limited 2013-2014. All rights reserved. + * Copyright (C) ARM Limited 2013-2015. All rights reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -31,20 +31,20 @@ struct ProcStat { static bool readProcStat(ProcStat *const ps, const char *const pathname, DynBuf *const b) { if (!b->read(pathname)) { - logg->logMessage("%s(%s:%i): DynBuf::read failed, likely because the thread exited", __FUNCTION__, __FILE__, __LINE__); + logg->logMessage("DynBuf::read failed, likely because the thread exited"); // This is not a fatal error - the thread just doesn't exist any more return true; } char *comm = strchr(b->getBuf(), '('); if (comm == NULL) { - logg->logMessage("%s(%s:%i): parsing stat failed", __FUNCTION__, __FILE__, __LINE__); + logg->logMessage("parsing stat failed"); return false; } ++comm; char *const str = strrchr(comm, ')'); if (str == NULL) { - logg->logMessage("%s(%s:%i): parsing stat failed", __FUNCTION__, __FILE__, __LINE__); + logg->logMessage("parsing stat failed"); return false; } *str = '\0'; @@ -53,7 +53,7 @@ static bool readProcStat(ProcStat *const ps, const char *const pathname, DynBuf const int count = sscanf(str + 2, " %*s %*s %*s %*s %*s %*s %*s %*s %*s %*s %*s %*s %*s %*s %*s %*s %*s %ld", &ps->numThreads); if (count != 1) { - logg->logMessage("%s(%s:%i): sscanf failed", __FUNCTION__, __FILE__, __LINE__); + logg->logMessage("sscanf failed"); return false; } @@ -65,7 +65,7 @@ static const char APP_PROCESS[] = "app_process"; static const char *readProcExe(DynBuf *const printb, const int pid, const int tid, DynBuf *const b) { if (tid == -1 ? !printb->printf("/proc/%i/exe", pid) : !printb->printf("/proc/%i/task/%i/exe", pid, tid)) { - logg->logMessage("%s(%s:%i): DynBuf::printf failed", __FUNCTION__, __FILE__, __LINE__); + logg->logMessage("DynBuf::printf failed"); return NULL; } @@ -82,7 +82,7 @@ static const char *readProcExe(DynBuf *const printb, const int pid, const int ti // readlink /proc/[pid]/exe returns ENOENT for kernel threads image = "\0"; } else { - logg->logMessage("%s(%s:%i): DynBuf::readlink failed", __FUNCTION__, __FILE__, __LINE__); + logg->logMessage("DynBuf::readlink failed"); return NULL; } @@ -94,12 +94,12 @@ static const char *readProcExe(DynBuf *const printb, const int pid, const int ti if (tid == -1 ? !printb->printf("/proc/%i/cmdline", pid) : !printb->printf("/proc/%i/task/%i/cmdline", pid, tid)) { - logg->logMessage("%s(%s:%i): DynBuf::printf failed", __FUNCTION__, __FILE__, __LINE__); + logg->logMessage("DynBuf::printf failed"); return NULL; } if (!b->read(printb->getBuf())) { - logg->logMessage("%s(%s:%i): DynBuf::read failed, likely because the thread exited", __FUNCTION__, __FILE__, __LINE__); + logg->logMessage("DynBuf::read failed, likely because the thread exited"); return NULL; } @@ -110,12 +110,12 @@ static bool readProcTask(const uint64_t currTime, Buffer *const buffer, const in bool result = false; if (!b1->printf("/proc/%i/task", pid)) { - logg->logMessage("%s(%s:%i): DynBuf::printf failed", __FUNCTION__, __FILE__, __LINE__); + logg->logMessage("DynBuf::printf failed"); return result; } DIR *task = opendir(b1->getBuf()); if (task == NULL) { - logg->logMessage("%s(%s:%i): opendir failed", __FUNCTION__, __FILE__, __LINE__); + logg->logMessage("opendir failed"); // This is not a fatal error - the thread just doesn't exist any more return true; } @@ -130,22 +130,22 @@ static bool readProcTask(const uint64_t currTime, Buffer *const buffer, const in } if (!printb->printf("/proc/%i/task/%i/stat", pid, tid)) { - logg->logMessage("%s(%s:%i): DynBuf::printf failed", __FUNCTION__, __FILE__, __LINE__); + logg->logMessage("DynBuf::printf failed"); goto fail; } ProcStat ps; if (!readProcStat(&ps, printb->getBuf(), b1)) { - logg->logMessage("%s(%s:%i): readProcStat failed", __FUNCTION__, __FILE__, __LINE__); + logg->logMessage("readProcStat failed"); goto fail; } const char *const image = readProcExe(printb, pid, tid, b2); if (image == NULL) { - logg->logMessage("%s(%s:%i): readImage failed", __FUNCTION__, __FILE__, __LINE__); + logg->logMessage("readImage failed"); goto fail; } - buffer->comm(currTime, pid, tid, image, ps.comm); + buffer->marshalComm(currTime, pid, tid, image, ps.comm); } result = true; @@ -161,7 +161,7 @@ bool readProcComms(const uint64_t currTime, Buffer *const buffer, DynBuf *const DIR *proc = opendir("/proc"); if (proc == NULL) { - logg->logMessage("%s(%s:%i): opendir failed", __FUNCTION__, __FILE__, __LINE__); + logg->logMessage("opendir failed"); return result; } @@ -175,26 +175,26 @@ bool readProcComms(const uint64_t currTime, Buffer *const buffer, DynBuf *const } if (!printb->printf("/proc/%i/stat", pid)) { - logg->logMessage("%s(%s:%i): DynBuf::printf failed", __FUNCTION__, __FILE__, __LINE__); + logg->logMessage("DynBuf::printf failed"); goto fail; } ProcStat ps; if (!readProcStat(&ps, printb->getBuf(), b1)) { - logg->logMessage("%s(%s:%i): readProcStat failed", __FUNCTION__, __FILE__, __LINE__); + logg->logMessage("readProcStat failed"); goto fail; } if (ps.numThreads <= 1) { const char *const image = readProcExe(printb, pid, -1, b1); if (image == NULL) { - logg->logMessage("%s(%s:%i): readImage failed", __FUNCTION__, __FILE__, __LINE__); + logg->logMessage("readImage failed"); goto fail; } - buffer->comm(currTime, pid, pid, image, ps.comm); + buffer->marshalComm(currTime, pid, pid, image, ps.comm); } else { if (!readProcTask(currTime, buffer, pid, printb, b1, b2)) { - logg->logMessage("%s(%s:%i): readProcTask failed", __FUNCTION__, __FILE__, __LINE__); + logg->logMessage("readProcTask failed"); goto fail; } } @@ -213,7 +213,7 @@ bool readProcMaps(const uint64_t currTime, Buffer *const buffer, DynBuf *const p DIR *proc = opendir("/proc"); if (proc == NULL) { - logg->logMessage("%s(%s:%i): opendir failed", __FUNCTION__, __FILE__, __LINE__); + logg->logMessage("opendir failed"); return result; } @@ -227,16 +227,16 @@ bool readProcMaps(const uint64_t currTime, Buffer *const buffer, DynBuf *const p } if (!printb->printf("/proc/%i/maps", pid)) { - logg->logMessage("%s(%s:%i): DynBuf::printf failed", __FUNCTION__, __FILE__, __LINE__); + logg->logMessage("DynBuf::printf failed"); goto fail; } if (!b->read(printb->getBuf())) { - logg->logMessage("%s(%s:%i): DynBuf::read failed, likely because the process exited", __FUNCTION__, __FILE__, __LINE__); + logg->logMessage("DynBuf::read failed, likely because the process exited"); // This is not a fatal error - the process just doesn't exist any more continue; } - buffer->maps(currTime, pid, pid, b->getBuf()); + buffer->marshalMaps(currTime, pid, pid, b->getBuf()); } result = true; @@ -251,7 +251,7 @@ bool readKallsyms(const uint64_t currTime, Buffer *const buffer, const bool *con int fd = ::open("/proc/kallsyms", O_RDONLY | O_CLOEXEC); if (fd < 0) { - logg->logMessage("%s(%s:%i): open failed", __FUNCTION__, __FILE__, __LINE__); + logg->logMessage("open failed"); return true; }; @@ -260,7 +260,7 @@ bool readKallsyms(const uint64_t currTime, Buffer *const buffer, const bool *con while (gSessionData->mSessionIsActive && !ACCESS_ONCE(*isDone)) { // Assert there is still space in the buffer if (sizeof(buf) - pos - 1 == 0) { - logg->logError(__FILE__, __LINE__, "no space left in buffer"); + logg->logError("no space left in buffer"); handleException(); } @@ -268,13 +268,13 @@ bool readKallsyms(const uint64_t currTime, Buffer *const buffer, const bool *con // -1 to reserve space for \0 const ssize_t bytes = ::read(fd, buf + pos, sizeof(buf) - pos - 1); if (bytes < 0) { - logg->logError(__FILE__, __LINE__, "read failed", __FUNCTION__, __FILE__, __LINE__); + logg->logError("read failed"); handleException(); } if (bytes == 0) { // Assert the buffer is empty if (pos != 0) { - logg->logError(__FILE__, __LINE__, "buffer not empty on eof"); + logg->logError("buffer not empty on eof"); handleException(); } break; @@ -288,13 +288,13 @@ bool readKallsyms(const uint64_t currTime, Buffer *const buffer, const bool *con if (buf[newline] == '\n') { const char was = buf[newline + 1]; buf[newline + 1] = '\0'; - buffer->kallsyms(currTime, buf); + buffer->marshalKallsyms(currTime, buf); // Sleep 3 ms to avoid sending out too much data too quickly usleep(3000); buf[0] = was; // Assert the memory regions do not overlap if (pos - newline >= newline + 1) { - logg->logError(__FILE__, __LINE__, "memcpy src and dst overlap"); + logg->logError("memcpy src and dst overlap"); handleException(); } if (pos - newline - 2 > 0) { diff --git a/tools/gator/daemon/Proc.h b/tools/gator/daemon/Proc.h index 2a1a7cbc1e99..fcc48c546d05 100644 --- a/tools/gator/daemon/Proc.h +++ b/tools/gator/daemon/Proc.h @@ -1,5 +1,5 @@ /** - * Copyright (C) ARM Limited 2013-2014. All rights reserved. + * Copyright (C) ARM Limited 2013-2015. All rights reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as diff --git a/tools/gator/daemon/Sender.cpp b/tools/gator/daemon/Sender.cpp index 8a54a6678974..d7ad757165a2 100644 --- a/tools/gator/daemon/Sender.cpp +++ b/tools/gator/daemon/Sender.cpp @@ -1,5 +1,5 @@ /** - * Copyright (C) ARM Limited 2010-2014. All rights reserved. + * Copyright (C) ARM Limited 2010-2015. All rights reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -30,7 +30,7 @@ Sender::Sender(OlySocket* socket) { // Streamline will send data prior to the magic sequence for legacy support, which should be ignored for v4+ while (strcmp("STREAMLINE", streamline) != 0) { if (mDataSocket->receiveString(streamline, sizeof(streamline)) == -1) { - logg->logError(__FILE__, __LINE__, "Socket disconnected"); + logg->logError("Socket disconnected"); handleException(); } } @@ -67,7 +67,7 @@ void Sender::createDataFile(char* apcDir) { sprintf(mDataFileName, "%s/0000000000", apcDir); mDataFile = fopen_cloexec(mDataFileName, "wb"); if (!mDataFile) { - logg->logError(__FILE__, __LINE__, "Failed to open binary file: %s", mDataFileName); + logg->logError("Failed to open binary file: %s", mDataFileName); handleException(); } } @@ -120,7 +120,7 @@ void Sender::writeData(const char* data, int length, int type) { logg->logMessage("Writing data with length %d", length); // Send data to the data file if (fwrite(data, 1, length, mDataFile) != (unsigned int)length) { - logg->logError(__FILE__, __LINE__, "Failed writing binary file %s", mDataFileName); + logg->logError("Failed writing binary file %s", mDataFileName); handleException(); } } diff --git a/tools/gator/daemon/Sender.h b/tools/gator/daemon/Sender.h index 5aa911713820..8f542026e90d 100644 --- a/tools/gator/daemon/Sender.h +++ b/tools/gator/daemon/Sender.h @@ -1,5 +1,5 @@ /** - * Copyright (C) ARM Limited 2010-2014. All rights reserved. + * Copyright (C) ARM Limited 2010-2015. All rights reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as diff --git a/tools/gator/daemon/SessionData.cpp b/tools/gator/daemon/SessionData.cpp index 0e65d7842647..2b661bdf294c 100644 --- a/tools/gator/daemon/SessionData.cpp +++ b/tools/gator/daemon/SessionData.cpp @@ -1,5 +1,5 @@ /** - * Copyright (C) ARM Limited 2010-2014. All rights reserved. + * Copyright (C) ARM Limited 2010-2015. All rights reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -13,7 +13,6 @@ #include #include -#include "CPUFreqDriver.h" #include "DiskIODriver.h" #include "FSDriver.h" #include "HwmonDriver.h" @@ -31,8 +30,7 @@ SessionData::SessionData() { usDrivers[1] = new FSDriver(); usDrivers[2] = new MemInfoDriver(); usDrivers[3] = new NetDriver(); - usDrivers[4] = new CPUFreqDriver(); - usDrivers[5] = new DiskIODriver(); + usDrivers[4] = new DiskIODriver(); initialize(); } @@ -50,7 +48,7 @@ void SessionData::initialize() { // Share mCpuIds across all instances of gatord mCpuIds = (int *)mmap(NULL, cpuIdSize, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS, -1, 0); if (mCpuIds == MAP_FAILED) { - logg->logError(__FILE__, __LINE__, "Unable to mmap shared memory for cpuids"); + logg->logError("Unable to mmap shared memory for cpuids"); handleException(); } memset(mCpuIds, -1, cpuIdSize); @@ -61,6 +59,7 @@ void SessionData::initialize() { mConfigurationXMLPath = NULL; mSessionXMLPath = NULL; mEventsXMLPath = NULL; + mEventsXMLAppend = NULL; mTargetPath = NULL; mAPCDir = NULL; mCaptureWorkingDir = NULL; @@ -75,6 +74,7 @@ void SessionData::initialize() { // sysconf(_SC_NPROCESSORS_CONF) is unreliable on 2.6 Android, get the value from the kernel module mCores = 1; mPageSize = 0; + mAnnotateStart = -1; } void SessionData::parseSessionXML(char* xmlString) { @@ -91,7 +91,7 @@ void SessionData::parseSessionXML(char* xmlString) { } else if (strcmp(session.parameters.sample_rate, "none") == 0) { mSampleRate = 0; } else { - logg->logError(__FILE__, __LINE__, "Invalid sample rate (%s) in session xml.", session.parameters.sample_rate); + logg->logError("Invalid sample rate (%s) in session xml.", session.parameters.sample_rate); handleException(); } mBacktraceDepth = session.parameters.call_stack_unwinding == true ? 128 : 0; @@ -108,7 +108,7 @@ void SessionData::parseSessionXML(char* xmlString) { } else if (strcmp(session.parameters.buffer_mode, "large") == 0) { mTotalBufferSize = 16; } else { - logg->logError(__FILE__, __LINE__, "Invalid value for buffer mode in session xml."); + logg->logError("Invalid value for buffer mode in session xml."); handleException(); } @@ -120,7 +120,7 @@ void SessionData::parseSessionXML(char* xmlString) { } if (!mAllowCommands && (mCaptureCommand != NULL)) { - logg->logError(__FILE__, __LINE__, "Running a command during a capture is not currently allowed. Please restart gatord with the -a flag."); + logg->logError("Running a command during a capture is not currently allowed. Please restart gatord with the -a flag."); handleException(); } } @@ -139,6 +139,20 @@ void SessionData::readModel() { fclose(fh); } +static void setImplementer(int &cpuId, const int implementer) { + if (cpuId == -1) { + cpuId = 0; + } + cpuId |= implementer << 12; +} + +static void setPart(int &cpuId, const int part) { + if (cpuId == -1) { + cpuId = 0; + } + cpuId |= part; +} + void SessionData::readCpuInfo() { char temp[256]; // arbitrarily large amount mMaxCpuId = -1; @@ -150,7 +164,7 @@ void SessionData::readCpuInfo() { return; } - bool foundCoreName = false; + bool foundCoreName = (strcmp(mCoreName, CORE_NAME_UNKNOWN) != 0); int processor = -1; while (fgets(temp, sizeof(temp), f)) { const size_t len = strlen(temp); @@ -166,10 +180,11 @@ void SessionData::readCpuInfo() { temp[len - 1] = '\0'; } - const bool foundHardware = strstr(temp, "Hardware") != 0; + const bool foundHardware = !foundCoreName && strstr(temp, "Hardware") != 0; + const bool foundCPUImplementer = strstr(temp, "CPU implementer") != 0; const bool foundCPUPart = strstr(temp, "CPU part") != 0; const bool foundProcessor = strstr(temp, "processor") != 0; - if (foundHardware || foundCPUPart || foundProcessor) { + if (foundHardware || foundCPUImplementer || foundCPUPart || foundProcessor) { char* position = strchr(temp, ':'); if (position == NULL || (unsigned int)(position - temp) + 2 >= strlen(temp)) { logg->logMessage("Unknown format of /proc/cpuinfo\n" @@ -178,22 +193,31 @@ void SessionData::readCpuInfo() { } position += 2; - if (foundHardware && (strcmp(mCoreName, CORE_NAME_UNKNOWN) == 0)) { + if (foundHardware) { strncpy(mCoreName, position, sizeof(mCoreName)); mCoreName[sizeof(mCoreName) - 1] = 0; // strncpy does not guarantee a null-terminated string foundCoreName = true; } - if (foundCPUPart) { - const int cpuId = strtol(position, NULL, 0); - // If this does not have the full topology in /proc/cpuinfo, mCpuIds[0] may not have the 1 CPU part emitted - this guarantees it's in mMaxCpuId - if (cpuId > mMaxCpuId) { - mMaxCpuId = cpuId; - } + if (foundCPUImplementer) { + const int implementer = strtol(position, NULL, 0); if (processor >= NR_CPUS) { logg->logMessage("Too many processors, please increase NR_CPUS"); } else if (processor >= 0) { - mCpuIds[processor] = cpuId; + setImplementer(mCpuIds[processor], implementer); + } else { + setImplementer(mMaxCpuId, implementer); + } + } + + if (foundCPUPart) { + const int cpuId = strtol(position, NULL, 0); + if (processor >= NR_CPUS) { + logg->logMessage("Too many processors, please increase NR_CPUS"); + } else if (processor >= 0) { + setPart(mCpuIds[processor], cpuId); + } else { + setPart(mMaxCpuId, cpuId); } } @@ -203,6 +227,13 @@ void SessionData::readCpuInfo() { } } + // If this does not have the full topology in /proc/cpuinfo, mCpuIds[0] may not have the 1 CPU part emitted - this guarantees it's in mMaxCpuId + for (int i = 0; i < NR_CPUS; ++i) { + if (mCpuIds[i] > mMaxCpuId) { + mMaxCpuId = mCpuIds[i]; + } + } + if (!foundCoreName) { logg->logMessage("Could not determine core name from /proc/cpuinfo\n" "The core name in the captured xml file will be 'unknown'."); @@ -213,7 +244,7 @@ void SessionData::readCpuInfo() { uint64_t getTime() { struct timespec ts; if (clock_gettime(CLOCK_MONOTONIC_RAW, &ts) != 0) { - logg->logError(__FILE__, __LINE__, "Failed to get uptime"); + logg->logError("Failed to get uptime"); handleException(); } return (NS_PER_S*ts.tv_sec + ts.tv_nsec); diff --git a/tools/gator/daemon/SessionData.h b/tools/gator/daemon/SessionData.h index ed282af4a869..d0c8900317a5 100644 --- a/tools/gator/daemon/SessionData.h +++ b/tools/gator/daemon/SessionData.h @@ -1,5 +1,5 @@ /** - * Copyright (C) ARM Limited 2010-2014. All rights reserved. + * Copyright (C) ARM Limited 2010-2015. All rights reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -11,7 +11,6 @@ #include -#include "AnnotateListener.h" #include "Config.h" #include "Counter.h" #include "FtraceDriver.h" @@ -19,7 +18,7 @@ #include "MaliVideoDriver.h" #include "PerfDriver.h" -#define PROTOCOL_VERSION 20 +#define PROTOCOL_VERSION 21 // Differentiates development versions (timestamp) from release versions #define PROTOCOL_DEV 1000 @@ -43,18 +42,18 @@ class SessionData { void readModel(); void readCpuInfo(); - PolledDriver *usDrivers[6]; + PolledDriver *usDrivers[5]; KMod kmod; PerfDriver perf; MaliVideoDriver maliVideo; FtraceDriver ftraceDriver; - AnnotateListener annotateListener; char mCoreName[MAX_STRING_LEN]; struct ImageLinkList *mImages; char *mConfigurationXMLPath; char *mSessionXMLPath; char *mEventsXMLPath; + char *mEventsXMLAppend; char *mTargetPath; char *mAPCDir; char *mCaptureWorkingDir; @@ -81,6 +80,7 @@ class SessionData { int mPageSize; int *mCpuIds; int mMaxCpuId; + int mAnnotateStart; // PMU Counters int mCounterOverflow; @@ -93,6 +93,7 @@ class SessionData { }; extern SessionData* gSessionData; +extern const char *const gSrcMd5; uint64_t getTime(); int getEventKey(); diff --git a/tools/gator/daemon/SessionXML.cpp b/tools/gator/daemon/SessionXML.cpp index dea4c8f299ec..c638dea45912 100644 --- a/tools/gator/daemon/SessionXML.cpp +++ b/tools/gator/daemon/SessionXML.cpp @@ -1,5 +1,5 @@ /** - * Copyright (C) ARM Limited 2010-2014. All rights reserved. + * Copyright (C) ARM Limited 2010-2015. All rights reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -36,7 +36,7 @@ SessionXML::SessionXML(const char *str) { parameters.call_stack_unwinding = false; parameters.live_rate = 0; mSessionXML = str; - logg->logMessage(mSessionXML); + logg->logMessage("%s", mSessionXML); } SessionXML::~SessionXML() { @@ -55,7 +55,7 @@ void SessionXML::parse() { return; } - logg->logError(__FILE__, __LINE__, "No session tag found in the session.xml file"); + logg->logError("No session tag found in the session.xml file"); handleException(); } @@ -63,7 +63,7 @@ void SessionXML::sessionTag(mxml_node_t *tree, mxml_node_t *node) { int version = 0; if (mxmlElementGetAttr(node, ATTR_VERSION)) version = strtol(mxmlElementGetAttr(node, ATTR_VERSION), NULL, 10); if (version != 1) { - logg->logError(__FILE__, __LINE__, "Invalid session.xml version: %d", version); + logg->logError("Invalid session.xml version: %d", version); handleException(); } diff --git a/tools/gator/daemon/SessionXML.h b/tools/gator/daemon/SessionXML.h index 53965749c74b..2ba276a38021 100644 --- a/tools/gator/daemon/SessionXML.h +++ b/tools/gator/daemon/SessionXML.h @@ -1,5 +1,5 @@ /** - * Copyright (C) ARM Limited 2010-2014. All rights reserved. + * Copyright (C) ARM Limited 2010-2015. All rights reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as diff --git a/tools/gator/daemon/Setup.cpp b/tools/gator/daemon/Setup.cpp index d4ce0328c633..7dd83ceafcce 100644 --- a/tools/gator/daemon/Setup.cpp +++ b/tools/gator/daemon/Setup.cpp @@ -1,5 +1,5 @@ /** - * Copyright (C) ARM Limited 2014. All rights reserved. + * Copyright (C) ARM Limited 2014-2015. All rights reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -15,7 +15,9 @@ #include #include #include +#include #include +#include #include #include #include @@ -24,12 +26,17 @@ #include "Config.h" #include "DynBuf.h" #include "Logging.h" +#include "SessionData.h" + +#define GATOR_MSG "gator: " +#define GATOR_ERROR "gator: error: " +#define GATOR_CONFIRM "gator: confirm: " bool getLinuxVersion(int version[3]) { // Check the kernel version struct utsname utsname; if (uname(&utsname) != 0) { - logg->logMessage("%s(%s:%i): uname failed", __FUNCTION__, __FILE__, __LINE__); + logg->logMessage("uname failed"); return false; } @@ -57,7 +64,7 @@ static int pgrep_gator(DynBuf *const printb) { DIR *proc = opendir("/proc"); if (proc == NULL) { - logg->logError(__FILE__, __LINE__, "gator: error: opendir failed"); + logg->logError(GATOR_ERROR "opendir failed"); handleException(); } @@ -73,7 +80,7 @@ static int pgrep_gator(DynBuf *const printb) { } if (!printb->printf("/proc/%i/stat", pid)) { - logg->logError(__FILE__, __LINE__, "gator: error: DynBuf::printf failed"); + logg->logError(GATOR_ERROR "DynBuf::printf failed"); handleException(); } @@ -84,21 +91,35 @@ static int pgrep_gator(DynBuf *const printb) { char *comm = strchr(b.getBuf(), '('); if (comm == NULL) { - logg->logError(__FILE__, __LINE__, "gator: error: parsing stat begin failed"); + logg->logError(GATOR_ERROR "parsing stat comm begin failed"); handleException(); } ++comm; char *const str = strrchr(comm, ')'); if (str == NULL) { - logg->logError(__FILE__, __LINE__, "gator: error: parsing stat end failed"); + logg->logError(GATOR_ERROR "parsing stat comm end failed"); handleException(); } *str = '\0'; - if (strncmp(comm, "gator", 5) == 0) { - // Assume there is only one gator process - return pid; + if (strncmp(comm, "gator", 5) != 0) { + continue; } + + char state; + const int count = sscanf(str + 2, " %c ", &state); + if (count != 1) { + logg->logError(GATOR_ERROR "parsing stat state failed"); + handleException(); + } + + if (state == 'Z') { + // This gator is a zombie, ignore + continue; + } + + // Assume there is only one gator process + return pid; } closedir(proc); @@ -106,73 +127,106 @@ static int pgrep_gator(DynBuf *const printb) { return -1; } -int update(const char *const gatorPath) { - printf("gator: starting\n"); +static bool confirm(const char *const message) { + char buf[1<<10]; + + printf(GATOR_CONFIRM "%s\n", message); + while (fgets(buf, sizeof(buf), stdin) != NULL) { + if (strcmp(buf, "y\n") == 0) { + return true; + } + if (strcmp(buf, "n\n") == 0) { + return false; + } + // Ignore unrecognized input + } + + return false; +} + +void update(const char *const gatorPath) { + printf(GATOR_MSG "starting\n"); int version[3]; if (!getLinuxVersion(version)) { - logg->logError(__FILE__, __LINE__, "gator: error: getLinuxVersion failed"); + logg->logError(GATOR_ERROR "getLinuxVersion failed"); handleException(); } if (KERNEL_VERSION(version[0], version[1], version[2]) < KERNEL_VERSION(2, 6, 32)) { - logg->logError(__FILE__, __LINE__, "gator: error: Streamline can't automatically setup gator as this kernel version is not supported. Please upgrade the kernel on your device."); + logg->logError(GATOR_ERROR "Streamline can't automatically setup gator as this kernel version is not supported. Please upgrade the kernel on your device."); handleException(); } if (KERNEL_VERSION(version[0], version[1], version[2]) < KERNEL_VERSION(3, 4, 0)) { - logg->logError(__FILE__, __LINE__, "gator: error: Streamline can't automatically setup gator as gator.ko is required for this version of Linux. Please build gator.ko and gatord and install them on your device."); - handleException(); - } - - if (access("/sys/module/gator", F_OK) == 0) { - logg->logError(__FILE__, __LINE__, "gator: error: Streamline has detected that the gator kernel module is loaded on your device. Please build an updated version of gator.ko and gatord and install them on your device."); + logg->logError(GATOR_ERROR "Streamline can't automatically setup gator as gator.ko is required for this version of Linux. Please build gator.ko and gatord and install them on your device."); handleException(); } if (geteuid() != 0) { - printf("gator: trying sudo\n"); + printf(GATOR_MSG "trying sudo\n"); execlp("sudo", "sudo", gatorPath, "-u", NULL); // Streamline will provide the password if needed - printf("gator: trying su\n"); + printf(GATOR_MSG "trying su\n"); char buf[1<<10]; - snprintf(buf, sizeof(buf), "%s -u", gatorPath); - execlp("su", "su", "-", "-c", buf, NULL); + /* + * Different versions of su handle additional -c command line options differently and expect the + * arguments in different ways. Try both ways wrapped in a shell. + * + * Then invoke another shell after su as it avoids odd failures on some Android systems + */ + snprintf(buf, sizeof(buf), "su -c \"sh -c '%s -u'\" || su -c sh -c '%s -u'", gatorPath, gatorPath); + execlp("sh", "sh", "-c", buf, NULL); // Streamline will provide the password if needed - logg->logError(__FILE__, __LINE__, "gator: error: Streamline was unable to sudo to root on your device. Please double check passwords, ensure sudo or su work with this user or try a different username."); + logg->logError(GATOR_ERROR "Streamline was unable to sudo to root on your device. Please double check passwords, ensure sudo or su work with this user or try a different username."); handleException(); } - printf("gator: now root\n"); + printf(GATOR_MSG "now root\n"); + + if (access("/sys/module/gator", F_OK) == 0) { + if (!confirm("Streamline has detected that the gator kernel module is loaded on your device. Click yes to switch to user space gator, click no to abort the install.")) { + printf("gator: cancel\n"); + exit(-1); + } + } // setenforce 0 not needed for userspace gator // Kill existing gator - DynBuf gatorStatPath; - int gator_main = pgrep_gator(&gatorStatPath); + DynBuf printb; + int gator_main = pgrep_gator(&printb); if (gator_main > 0) { if (kill(gator_main, SIGTERM) != 0) { - logg->logError(__FILE__, __LINE__, "gator: error: kill SIGTERM failed"); + logg->logError(GATOR_ERROR "kill SIGTERM failed"); + handleException(); + } + if (!printb.printf("/proc/%i/exe", gator_main)) { + logg->logError(GATOR_ERROR "DynBuf::printf failed"); handleException(); } for (int i = 0; ; ++i) { - if (access(gatorStatPath.getBuf(), F_OK) != 0) { + // /proc//exe exists but will not be accessible for zombies + if (access(printb.getBuf(), F_OK) != 0) { break; } if (i == 5) { if (kill(gator_main, SIGKILL) != 0) { - logg->logError(__FILE__, __LINE__, "gator: error: kill SIGKILL failed"); + logg->logError(GATOR_ERROR "kill SIGKILL failed"); handleException(); } } else if (i >= 10) { - logg->logError(__FILE__, __LINE__, "gator: error: unable to kill running gator"); + logg->logError(GATOR_ERROR "unable to kill running gator"); handleException(); } sleep(1); } } - printf("gator: no gatord running\n"); + printf(GATOR_MSG "no gatord running\n"); + + umount("/dev/gator"); + syscall(__NR_delete_module, "gator", O_NONBLOCK); rename("gatord", "gatord.old"); rename("gator.ko", "gator.ko.old"); @@ -183,50 +237,88 @@ int update(const char *const gatorPath) { if (dot != NULL) { *dot = '\0'; if (rename(gatorPath, newGatorPath) != 0) { - logg->logError(__FILE__, __LINE__, "gator: error: rename failed"); + logg->logError(GATOR_ERROR "rename failed"); handleException(); } } - // Fork and start gatord (redirect stdout and stderr) + char buf[128]; + int pipefd[2]; + if (pipe_cloexec(pipefd) != 0) { + logg->logError(GATOR_ERROR "pipe failed"); + handleException(); + } + + // Fork and start gatord (redirect stdin, stdout and stderr so shell can close) int child = fork(); if (child < 0) { - logg->logError(__FILE__, __LINE__, "gator: error: fork failed"); + logg->logError(GATOR_ERROR "fork failed"); handleException(); } else if (child == 0) { - int inFd = open("/dev/null", O_RDONLY | O_CLOEXEC); + int inFd; + int outFd; + int errFd; + int result = -1; + + buf[0] = '\0'; + close(pipefd[0]); + + inFd = open("/dev/null", O_RDONLY | O_CLOEXEC); if (inFd < 0) { - logg->logError(__FILE__, __LINE__, "gator: error: open of /dev/null failed"); - handleException(); + snprintf(buf, sizeof(buf), GATOR_ERROR "open of /dev/null failed"); + goto fail_exit; } - int outFd = open("gatord.out", O_WRONLY | O_CREAT | O_TRUNC | O_CLOEXEC, 0600); + outFd = open("gatord.out", O_WRONLY | O_CREAT | O_TRUNC | O_CLOEXEC, 0644); if (outFd < 0) { - logg->logError(__FILE__, __LINE__, "gator: error: open of gatord.out failed"); - handleException(); + snprintf(buf, sizeof(buf), GATOR_ERROR "open of gatord.out failed"); + goto fail_exit; } - int errFd = open("gatord.err", O_WRONLY | O_CREAT | O_TRUNC | O_CLOEXEC, 0600); + errFd = open("gatord.err", O_WRONLY | O_CREAT | O_TRUNC | O_CLOEXEC, 0644); if (errFd < 0) { - logg->logError(__FILE__, __LINE__, "gator: error: open of gatord.err failed"); - handleException(); + snprintf(buf, sizeof(buf), GATOR_ERROR "open of gatord.err failed"); + goto fail_exit; } if (dup2(inFd, STDIN_FILENO) < 0) { - logg->logError(__FILE__, __LINE__, "gator: error: dup2 for stdin failed"); - handleException(); + snprintf(buf, sizeof(buf), GATOR_ERROR "dup2 for stdin failed"); + goto fail_exit; } + fflush(stdout); if (dup2(outFd, STDOUT_FILENO) < 0) { - logg->logError(__FILE__, __LINE__, "gator: error: dup2 for stdout failed"); - handleException(); + snprintf(buf, sizeof(buf), GATOR_ERROR "dup2 for stdout failed"); + goto fail_exit; } + fflush(stderr); if (dup2(errFd, STDERR_FILENO) < 0) { - logg->logError(__FILE__, __LINE__, "gator: error: dup2 for stderr failed"); - handleException(); + snprintf(buf, sizeof(buf), GATOR_ERROR "dup2 for stderr failed"); + goto fail_exit; } - execlp(newGatorPath, newGatorPath, "-a", NULL); - logg->logError(__FILE__, __LINE__, "gator: error: execlp failed"); - handleException(); + + snprintf(buf, sizeof(buf), GATOR_MSG "done"); + result = 0; + + fail_exit: + if (buf[0] != '\0') { + const ssize_t bytes = write(pipefd[1], buf, sizeof(buf)); + // Can't do anything if this fails + (void)bytes; + } + close(pipefd[1]); + + if (result == 0) { + // Continue to execute gator normally + return; + } + exit(-1); } - printf("gator: done\n"); + close(pipefd[1]); + const ssize_t bytes = read(pipefd[0], buf, sizeof(buf)); + if (bytes > 0) { + logg->logError("%s", buf); + handleException(); + } + close(pipefd[0]); - return 0; + // Exit so parent shell can move on + exit(0); } diff --git a/tools/gator/daemon/Setup.h b/tools/gator/daemon/Setup.h index 280d61139784..427e71788c36 100644 --- a/tools/gator/daemon/Setup.h +++ b/tools/gator/daemon/Setup.h @@ -1,5 +1,5 @@ /** - * Copyright (C) ARM Limited 2014. All rights reserved. + * Copyright (C) ARM Limited 2014-2015. All rights reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -13,6 +13,6 @@ #define KERNEL_VERSION(a,b,c) (((a) << 16) + ((b) << 8) + (c)) bool getLinuxVersion(int version[3]); -int update(const char *const gatorPath); +void update(const char *const gatorPath); #endif // SETUP_H diff --git a/tools/gator/daemon/Source.cpp b/tools/gator/daemon/Source.cpp index 60cf704e599b..64d6206895b0 100644 --- a/tools/gator/daemon/Source.cpp +++ b/tools/gator/daemon/Source.cpp @@ -1,5 +1,5 @@ /** - * Copyright (C) ARM Limited 2010-2014. All rights reserved. + * Copyright (C) ARM Limited 2010-2015. All rights reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -18,7 +18,7 @@ Source::~Source() { void Source::start() { if (pthread_create(&mThreadID, NULL, runStatic, this)) { - logg->logError(__FILE__, __LINE__, "Failed to create source thread"); + logg->logError("Failed to create source thread"); handleException(); } } diff --git a/tools/gator/daemon/Source.h b/tools/gator/daemon/Source.h index 56ac3d6e94f3..b9369be5198b 100644 --- a/tools/gator/daemon/Source.h +++ b/tools/gator/daemon/Source.h @@ -1,5 +1,5 @@ /** - * Copyright (C) ARM Limited 2010-2014. All rights reserved. + * Copyright (C) ARM Limited 2010-2015. All rights reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as diff --git a/tools/gator/daemon/StreamlineSetup.cpp b/tools/gator/daemon/StreamlineSetup.cpp index 2b61eaeb290d..e37f2712cd04 100644 --- a/tools/gator/daemon/StreamlineSetup.cpp +++ b/tools/gator/daemon/StreamlineSetup.cpp @@ -1,5 +1,5 @@ /** - * Copyright (C) ARM Limited 2011-2014. All rights reserved. + * Copyright (C) ARM Limited 2011-2015. All rights reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -68,7 +68,7 @@ StreamlineSetup::StreamlineSetup(OlySocket* s) { sendData(NULL, 0, RESPONSE_ACK); break; default: - logg->logError(__FILE__, __LINE__, "Target error: Unknown command type, %d", type); + logg->logError("Target error: Unknown command type, %d", type); handleException(); } @@ -76,7 +76,7 @@ StreamlineSetup::StreamlineSetup(OlySocket* s) { } if (gSessionData->mCounterOverflow > 0) { - logg->logError(__FILE__, __LINE__, "Only %i performance counters are permitted, %i are selected", MAX_PERFORMANCE_COUNTERS, gSessionData->mCounterOverflow); + logg->logError("Only %i performance counters are permitted, %i are selected", MAX_PERFORMANCE_COUNTERS, gSessionData->mCounterOverflow); handleException(); } } @@ -96,7 +96,7 @@ char* StreamlineSetup::readCommand(int* command) { gSessionData->mWaitingOnCommand = false; if (response < 0) { - logg->logError(__FILE__, __LINE__, "Target error: Unexpected socket disconnect"); + logg->logError("Target error: Unexpected socket disconnect"); handleException(); } @@ -105,21 +105,21 @@ char* StreamlineSetup::readCommand(int* command) { // add artificial limit if ((length < 0) || length > 1024 * 1024) { - logg->logError(__FILE__, __LINE__, "Target error: Invalid length received, %d", length); + logg->logError("Target error: Invalid length received, %d", length); handleException(); } // allocate memory to contain the xml file, size of zero returns a zero size object data = (char*)calloc(length + 1, 1); if (data == NULL) { - logg->logError(__FILE__, __LINE__, "Unable to allocate memory for xml"); + logg->logError("Unable to allocate memory for xml"); handleException(); } // receive data response = mSocket->receiveNBytes(data, length); if (response < 0) { - logg->logError(__FILE__, __LINE__, "Target error: Unexpected socket disconnect"); + logg->logError("Target error: Unexpected socket disconnect"); handleException(); } @@ -222,7 +222,7 @@ void StreamlineSetup::sendDefaults() { // Artificial size restriction if (size > 1024*1024) { - logg->logError(__FILE__, __LINE__, "Corrupt default configuration file"); + logg->logError("Corrupt default configuration file"); handleException(); } @@ -241,7 +241,7 @@ void StreamlineSetup::sendCounters() { } if (count == 0) { - logg->logError(__FILE__, __LINE__, "No counters found, this could be because /dev/gator/events can not be read or because perf is not working correctly"); + logg->logError("No counters found, this could be because /dev/gator/events can not be read or because perf is not working correctly"); handleException(); } @@ -258,7 +258,7 @@ void StreamlineSetup::writeConfiguration(char* xml) { ConfigurationXML::getPath(path); if (util->writeToDisk(path, xml) < 0) { - logg->logError(__FILE__, __LINE__, "Error writing %s\nPlease verify write permissions to this path.", path); + logg->logError("Error writing %s\nPlease verify write permissions to this path.", path); handleException(); } @@ -266,7 +266,7 @@ void StreamlineSetup::writeConfiguration(char* xml) { { ConfigurationXML configuration; } if (gSessionData->mCounterOverflow > 0) { - logg->logError(__FILE__, __LINE__, "Only %i performance counters are permitted, %i are selected", MAX_PERFORMANCE_COUNTERS, gSessionData->mCounterOverflow); + logg->logError("Only %i performance counters are permitted, %i are selected", MAX_PERFORMANCE_COUNTERS, gSessionData->mCounterOverflow); handleException(); } } diff --git a/tools/gator/daemon/StreamlineSetup.h b/tools/gator/daemon/StreamlineSetup.h index 623e14f2b64a..d8b162606436 100644 --- a/tools/gator/daemon/StreamlineSetup.h +++ b/tools/gator/daemon/StreamlineSetup.h @@ -1,5 +1,5 @@ /** - * Copyright (C) ARM Limited 2010-2014. All rights reserved. + * Copyright (C) ARM Limited 2010-2015. All rights reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as diff --git a/tools/gator/daemon/UEvent.cpp b/tools/gator/daemon/UEvent.cpp index f94a995393e8..6a69f5ab137e 100644 --- a/tools/gator/daemon/UEvent.cpp +++ b/tools/gator/daemon/UEvent.cpp @@ -1,5 +1,5 @@ /** - * Copyright (C) ARM Limited 2013-2014. All rights reserved. + * Copyright (C) ARM Limited 2013-2015. All rights reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -34,7 +34,7 @@ UEvent::~UEvent() { bool UEvent::init() { mFd = socket_cloexec(PF_NETLINK, SOCK_RAW, NETLINK_KOBJECT_UEVENT); if (mFd < 0) { - logg->logMessage("%s(%s:%i): socket failed", __FUNCTION__, __FILE__, __LINE__); + logg->logMessage("socket failed"); return false; } @@ -44,7 +44,7 @@ bool UEvent::init() { sockaddr.nl_groups = 1; // bitmask: (1 << 0) == kernel events, (1 << 1) == udev events sockaddr.nl_pid = 0; if (bind(mFd, (struct sockaddr *)&sockaddr, sizeof(sockaddr)) != 0) { - logg->logMessage("%s(%s:%i): bind failed", __FUNCTION__, __FILE__, __LINE__); + logg->logMessage("bind failed"); return false; } @@ -54,7 +54,7 @@ bool UEvent::init() { bool UEvent::read(UEventResult *const result) { ssize_t bytes = recv(mFd, result->mBuf, sizeof(result->mBuf), 0); if (bytes <= 0) { - logg->logMessage("%s(%s:%i): recv failed", __FUNCTION__, __FILE__, __LINE__); + logg->logMessage("recv failed"); return false; } @@ -64,6 +64,7 @@ bool UEvent::read(UEventResult *const result) { for (int pos = 0; pos < bytes; pos += strlen(result->mBuf + pos) + 1) { char *const str = result->mBuf + pos; + logg->logMessage("uevent + %i: %s", pos, str); if (strncmp(str, ACTION, sizeof(ACTION) - 1) == 0) { result->mAction = str + sizeof(ACTION) - 1; } else if (strncmp(str, DEVPATH, sizeof(DEVPATH) - 1) == 0) { diff --git a/tools/gator/daemon/UEvent.h b/tools/gator/daemon/UEvent.h index 2f7ef2c93f5d..4c00f6cff112 100644 --- a/tools/gator/daemon/UEvent.h +++ b/tools/gator/daemon/UEvent.h @@ -1,5 +1,5 @@ /** - * Copyright (C) ARM Limited 2013-2014. All rights reserved. + * Copyright (C) ARM Limited 2013-2015. All rights reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as diff --git a/tools/gator/daemon/UserSpaceSource.cpp b/tools/gator/daemon/UserSpaceSource.cpp index 4a9b22f4b555..f58f828e6e2e 100644 --- a/tools/gator/daemon/UserSpaceSource.cpp +++ b/tools/gator/daemon/UserSpaceSource.cpp @@ -1,13 +1,16 @@ /** - * Copyright (C) ARM Limited 2010-2014. All rights reserved. + * Copyright (C) ARM Limited 2010-2015. All rights reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as * published by the Free Software Foundation. */ +#define __STDC_FORMAT_MACROS + #include "UserSpaceSource.h" +#include #include #include @@ -35,45 +38,45 @@ void UserSpaceSource::run() { gSessionData->usDrivers[i]->start(); } - int64_t monotonic_started = 0; - while (monotonic_started <= 0) { + int64_t monotonicStarted = 0; + while (monotonicStarted <= 0 && gSessionData->mSessionIsActive) { usleep(10); if (gSessionData->perf.isSetup()) { - monotonic_started = gSessionData->mMonotonicStarted; + monotonicStarted = gSessionData->mMonotonicStarted; } else { - if (DriverSource::readInt64Driver("/dev/gator/started", &monotonic_started) == -1) { - logg->logError(__FILE__, __LINE__, "Error reading gator driver start time"); + if (DriverSource::readInt64Driver("/dev/gator/started", &monotonicStarted) == -1) { + logg->logError("Error reading gator driver start time"); handleException(); } - gSessionData->mMonotonicStarted = monotonic_started; + gSessionData->mMonotonicStarted = monotonicStarted; } } - uint64_t next_time = 0; + uint64_t nextTime = 0; while (gSessionData->mSessionIsActive) { - const uint64_t curr_time = getTime() - monotonic_started; + const uint64_t currTime = getTime() - monotonicStarted; // Sample ten times a second ignoring gSessionData->mSampleRate - next_time += NS_PER_S/10;//gSessionData->mSampleRate; - if (next_time < curr_time) { - logg->logMessage("Too slow, curr_time: %lli next_time: %lli", curr_time, next_time); - next_time = curr_time; + nextTime += NS_PER_S/10;//gSessionData->mSampleRate; + if (nextTime < currTime) { + logg->logMessage("Too slow, currTime: %" PRIi64 " nextTime: %" PRIi64, currTime, nextTime); + nextTime = currTime; } - if (mBuffer.eventHeader(curr_time)) { + if (mBuffer.eventHeader(currTime)) { for (int i = 0; i < ARRAY_LENGTH(gSessionData->usDrivers); ++i) { gSessionData->usDrivers[i]->read(&mBuffer); } // Only check after writing all counters so that time and corresponding counters appear in the same frame - mBuffer.check(curr_time); + mBuffer.check(currTime); } - if (mBuffer.bytesAvailable() <= 0) { + if (gSessionData->mOneShot && gSessionData->mSessionIsActive && (mBuffer.bytesAvailable() <= 0)) { logg->logMessage("One shot (counters)"); child->endSession(); } - usleep((next_time - curr_time)/NS_PER_US); + usleep((nextTime - currTime)/NS_PER_US); } mBuffer.setDone(); diff --git a/tools/gator/daemon/UserSpaceSource.h b/tools/gator/daemon/UserSpaceSource.h index 9b3666016dc5..0038dcb4c3d5 100644 --- a/tools/gator/daemon/UserSpaceSource.h +++ b/tools/gator/daemon/UserSpaceSource.h @@ -1,5 +1,5 @@ /** - * Copyright (C) ARM Limited 2010-2014. All rights reserved. + * Copyright (C) ARM Limited 2010-2015. All rights reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as diff --git a/tools/gator/daemon/c++.cpp b/tools/gator/daemon/c++.cpp index 6041e5e96469..caf6f1efdcde 100644 --- a/tools/gator/daemon/c++.cpp +++ b/tools/gator/daemon/c++.cpp @@ -1,7 +1,7 @@ /** * Minimal set of C++ functions so that libstdc++ is not required * - * Copyright (C) ARM Limited 2010-2014. All rights reserved. + * Copyright (C) ARM Limited 2010-2015. All rights reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as diff --git a/tools/gator/daemon/common.mk b/tools/gator/daemon/common.mk index 769a92e51a35..0d5a0a835d2c 100644 --- a/tools/gator/daemon/common.mk +++ b/tools/gator/daemon/common.mk @@ -6,7 +6,7 @@ # -std=c++0x is the planned new c++ standard # -std=c++98 is the 1998 c++ standard CPPFLAGS += -O3 -Wall -fno-exceptions -pthread -MMD -DETCDIR=\"/etc\" -Ilibsensors -CXXFLAGS += -fno-rtti -Wextra # -Weffc++ +CXXFLAGS += -fno-rtti -Wextra -Wshadow # -Weffc++ ifeq ($(WERROR),1) CPPFLAGS += -Werror endif @@ -41,7 +41,10 @@ libsensors/conf-parse.c: ; %.o: %.cpp $(CXX) $(CXXFLAGS) $(CPPFLAGS) -c -o $@ $< -$(TARGET): $(CXX_SRC:%.cpp=%.o) $(C_SRC:%.c=%.o) +SrcMd5.cpp: $(wildcard *.cpp *.h mxml/*.c mxml/*.h libsensors/*.c libsensors/*.h) + echo 'extern const char *const gSrcMd5 = "'`ls $^ | grep -Ev '^(.*_xml\.h|$@)$$' | LC_ALL=C sort | xargs cat | md5sum | cut -b 1-32`'";' > $@ + +$(TARGET): $(CXX_SRC:%.cpp=%.o) $(C_SRC:%.c=%.o) SrcMd5.o $(CC) $(LDFLAGS) $^ $(LDLIBS) -o $@ # Intentionally ignore CC as a native binary is required @@ -49,4 +52,4 @@ escape: escape.c gcc $^ -o $@ clean: - rm -f *.d *.o mxml/*.d mxml/*.o libsensors/*.d libsensors/*.o $(TARGET) escape events.xml events_xml.h defaults_xml.h + rm -f *.d *.o mxml/*.d mxml/*.o libsensors/*.d libsensors/*.o $(TARGET) escape events.xml events_xml.h defaults_xml.h SrcMd5.cpp diff --git a/tools/gator/daemon/defaults.xml b/tools/gator/daemon/defaults.xml index 086eca1e804e..31b127cdcfc6 100644 --- a/tools/gator/daemon/defaults.xml +++ b/tools/gator/daemon/defaults.xml @@ -44,6 +44,11 @@ + + + + + @@ -64,21 +69,5 @@ - - - - - - - - - - - - - - - - diff --git a/tools/gator/daemon/escape.c b/tools/gator/daemon/escape.c index 2b0863aaf425..99f434848cb3 100644 --- a/tools/gator/daemon/escape.c +++ b/tools/gator/daemon/escape.c @@ -1,5 +1,5 @@ /** - * Copyright (C) ARM Limited 2010-2014. All rights reserved. + * Copyright (C) ARM Limited 2010-2015. All rights reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as diff --git a/tools/gator/daemon/events-ARM11.xml b/tools/gator/daemon/events-ARM11.xml index 57e323546314..e481267b21ae 100644 --- a/tools/gator/daemon/events-ARM11.xml +++ b/tools/gator/daemon/events-ARM11.xml @@ -16,7 +16,7 @@ - + diff --git a/tools/gator/daemon/events-CCI-400.xml b/tools/gator/daemon/events-CCI-400.xml index 20002efd1543..40d91e582c19 100644 --- a/tools/gator/daemon/events-CCI-400.xml +++ b/tools/gator/daemon/events-CCI-400.xml @@ -1,5 +1,5 @@ - + - + \ No newline at end of file diff --git a/tools/gator/daemon/events-Mali-T62x_hw.xml b/tools/gator/daemon/events-Mali-T62x_hw.xml index 6ecc53c2ada1..4bc93068f75b 100644 --- a/tools/gator/daemon/events-Mali-T62x_hw.xml +++ b/tools/gator/daemon/events-Mali-T62x_hw.xml @@ -7,12 +7,13 @@ - - - - - - + + + + + + + @@ -78,32 +79,38 @@ - - - - - - - - - + + + + + + + + + + + + - - - - - - - - - - - - - + + + - + + + + + + + + + + + + + + \ No newline at end of file diff --git a/tools/gator/daemon/events-Mali-T72x_hw.xml b/tools/gator/daemon/events-Mali-T72x_hw.xml index 5587534770c8..fd9cb0f16c6e 100644 --- a/tools/gator/daemon/events-Mali-T72x_hw.xml +++ b/tools/gator/daemon/events-Mali-T72x_hw.xml @@ -7,12 +7,13 @@ - - - - - - + + + + + + + @@ -67,29 +68,34 @@ - - - - - - - - - + + + + + + + + + + + + + - - - - - - - - - - + + + + + + + + + + + diff --git a/tools/gator/daemon/events-Mali-T76x_hw.xml b/tools/gator/daemon/events-Mali-T76x_hw.xml index be74c5a42624..94d059fc09dd 100644 --- a/tools/gator/daemon/events-Mali-T76x_hw.xml +++ b/tools/gator/daemon/events-Mali-T76x_hw.xml @@ -7,12 +7,13 @@ - - - - - - + + + + + + + @@ -78,31 +79,39 @@ - - - - - - - - - + + + + + + + + + + + + + - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/tools/gator/daemon/events-Mali-T82x_hw.xml b/tools/gator/daemon/events-Mali-T82x_hw.xml new file mode 100644 index 000000000000..5caa464a3078 --- /dev/null +++ b/tools/gator/daemon/events-Mali-T82x_hw.xml @@ -0,0 +1,108 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/tools/gator/daemon/events-Mali-T83x_hw.xml b/tools/gator/daemon/events-Mali-T83x_hw.xml new file mode 100644 index 000000000000..39f7acf31798 --- /dev/null +++ b/tools/gator/daemon/events-Mali-T83x_hw.xml @@ -0,0 +1,108 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/tools/gator/daemon/events-Mali-T86x_hw.xml b/tools/gator/daemon/events-Mali-T86x_hw.xml new file mode 100644 index 000000000000..6653543d5caa --- /dev/null +++ b/tools/gator/daemon/events-Mali-T86x_hw.xml @@ -0,0 +1,117 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/tools/gator/daemon/events-Mali-T88x_hw.xml b/tools/gator/daemon/events-Mali-T88x_hw.xml new file mode 100644 index 000000000000..19385d19c9de --- /dev/null +++ b/tools/gator/daemon/events-Mali-T88x_hw.xml @@ -0,0 +1,117 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/tools/gator/daemon/events-Other.xml b/tools/gator/daemon/events-Other.xml new file mode 100644 index 000000000000..8aec282b7e11 --- /dev/null +++ b/tools/gator/daemon/events-Other.xml @@ -0,0 +1,33 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/tools/gator/daemon/events-ftrace.xml b/tools/gator/daemon/events-ftrace.xml index 33ab7aab2196..ae5529f2678d 100644 --- a/tools/gator/daemon/events-ftrace.xml +++ b/tools/gator/daemon/events-ftrace.xml @@ -1,7 +1,22 @@ - - + + + + + + + + + + + + diff --git a/tools/gator/daemon/main.cpp b/tools/gator/daemon/main.cpp index fbce1e15d0d0..c68a892e74fc 100644 --- a/tools/gator/daemon/main.cpp +++ b/tools/gator/daemon/main.cpp @@ -1,5 +1,5 @@ /** - * Copyright (C) ARM Limited 2010-2014. All rights reserved. + * Copyright (C) ARM Limited 2010-2015. All rights reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -19,6 +19,7 @@ #include #include +#include "AnnotateListener.h" #include "CCNDriver.h" #include "Child.h" #include "EventsXML.h" @@ -133,7 +134,7 @@ public: memset(&mDstAns, 0, sizeof(mDstAns)); memcpy(mDstAns.rviHeader, "STR_ANS ", sizeof(mDstAns.rviHeader)); if (gethostname(mDstAns.dhcpName, sizeof(mDstAns.dhcpName) - 1) != 0) { - logg->logError(__FILE__, __LINE__, "gethostname failed"); + logg->logError("gethostname failed"); handleException(); } // Subvert the defaultGateway field for the port number @@ -156,7 +157,7 @@ public: addrlen = sizeof(sockaddr); read = recvfrom(mReq, &buf, sizeof(buf), 0, (struct sockaddr *)&sockaddr, &addrlen); if (read < 0) { - logg->logError(__FILE__, __LINE__, "recvfrom failed"); + logg->logError("recvfrom failed"); handleException(); } else if ((read == 12) && (memcmp(buf, DST_REQ, sizeof(DST_REQ)) == 0)) { // Don't care if sendto fails - gatord shouldn't exit because of it and Streamline will retry @@ -180,23 +181,29 @@ private: family = AF_INET; s = socket_cloexec(AF_INET, SOCK_DGRAM, IPPROTO_UDP); if (s == -1) { - logg->logError(__FILE__, __LINE__, "socket failed"); + logg->logError("socket failed"); handleException(); } } on = 1; if (setsockopt(s, SOL_SOCKET, SO_REUSEADDR, (const char*)&on, sizeof(on)) != 0) { - logg->logError(__FILE__, __LINE__, "setsockopt failed"); + logg->logError("setsockopt REUSEADDR failed"); handleException(); } + // Listen on both IPv4 and IPv6 + on = 0; + if (setsockopt(s, IPPROTO_IPV6, IPV6_V6ONLY, (const char*)&on, sizeof(on)) != 0) { + logg->logMessage("setsockopt IPV6_V6ONLY failed"); + } + memset((void*)&sockaddr, 0, sizeof(sockaddr)); sockaddr.sin6_family = family; sockaddr.sin6_port = htons(port); sockaddr.sin6_addr = in6addr_any; if (bind(s, (struct sockaddr *)&sockaddr, sizeof(sockaddr)) < 0) { - logg->logError(__FILE__, __LINE__, "socket failed"); + logg->logError("socket failed"); handleException(); } @@ -252,7 +259,7 @@ static bool setupFilesystem(char* module) { // if still mounted if (access("/dev/gator/buffer", F_OK) == 0) { - logg->logError(__FILE__, __LINE__, "Unable to remove the running gator.ko. Manually remove the module or use the running module by not specifying one on the commandline"); + logg->logError("Unable to remove the running gator.ko. Manually remove the module or use the running module by not specifying one on the commandline"); handleException(); } } @@ -284,7 +291,7 @@ static bool setupFilesystem(char* module) { return false; } else { // gator location specified on the command line but it was not found - logg->logError(__FILE__, __LINE__, "gator module not found at %s", location); + logg->logError("gator module not found at %s", location); handleException(); } } @@ -296,13 +303,13 @@ static bool setupFilesystem(char* module) { snprintf(command, sizeof(command), "insmod %s >/dev/null 2>&1", location); if (system(command) != 0) { logg->logMessage("Unable to load gator.ko driver with command: %s", command); - logg->logError(__FILE__, __LINE__, "Unable to load (insmod) gator.ko driver:\n >>> gator.ko must be built against the current kernel version & configuration\n >>> See dmesg for more details"); + logg->logError("Unable to load (insmod) gator.ko driver:\n >>> gator.ko must be built against the current kernel version & configuration\n >>> See dmesg for more details"); handleException(); } } if (mountGatorFS() == -1) { - logg->logError(__FILE__, __LINE__, "Unable to mount the gator filesystem needed for profiling."); + logg->logError("Unable to mount the gator filesystem needed for profiling."); handleException(); } } @@ -326,7 +333,7 @@ static int shutdownFilesystem() { return 0; // success } -static const char OPTSTRING[] = "hvudap:s:c:e:m:o:"; +static const char OPTSTRING[] = "hvVudap:s:c:e:E:m:o:"; static bool hasDebugFlag(int argc, char** argv) { int c; @@ -368,11 +375,18 @@ static struct cmdline_t parseCommandLine(int argc, char** argv) { case 'e': gSessionData->mEventsXMLPath = optarg; break; + case 'E': + gSessionData->mEventsXMLAppend = optarg; + break; case 'm': cmdline.module = optarg; break; case 'p': cmdline.port = strtol(optarg, NULL, 10); + if ((cmdline.port == 8082) || (cmdline.port == 8083)) { + logg->logError("Gator can't use port %i, as it already uses ports 8082 and 8083 for annotations. Please select a different port.", cmdline.port); + handleException(); + } break; case 's': gSessionData->mSessionXMLPath = optarg; @@ -388,10 +402,11 @@ static struct cmdline_t parseCommandLine(int argc, char** argv) { break; case 'h': case '?': - logg->logError(__FILE__, __LINE__, + logg->logError( "%s. All parameters are optional:\n" - "-c config_xml path and filename of the configuration.xml to use\n" - "-e events_xml path and filename of the events.xml to use\n" + "-c config_xml path and filename of the configuration XML to use\n" + "-e events_xml path and filename of the events XML to use\n" + "-E events_xml path and filename of events XML to append\n" "-h this help page\n" "-m module path and filename of gator.ko\n" "-p port_number port upon which the server listens; default is 8080\n" @@ -399,12 +414,16 @@ static struct cmdline_t parseCommandLine(int argc, char** argv) { "-o apc_dir path and name of the output for a local capture\n" "-v version information\n" "-d enable debug messages\n" - "-a allow the user user to provide a command to run at the start of a capture" + "-a allow the user to issue a command from Streamline" , version_string); handleException(); break; case 'v': - logg->logError(__FILE__, __LINE__, version_string); + logg->logError("%s", version_string); + handleException(); + break; + case 'V': + logg->logError("%s\nSRC_MD5: %s", version_string, gSrcMd5); handleException(); break; } @@ -412,35 +431,38 @@ static struct cmdline_t parseCommandLine(int argc, char** argv) { // Error checking if (cmdline.port != DEFAULT_PORT && gSessionData->mSessionXMLPath != NULL) { - logg->logError(__FILE__, __LINE__, "Only a port or a session xml can be specified, not both"); + logg->logError("Only a port or a session xml can be specified, not both"); handleException(); } if (gSessionData->mTargetPath != NULL && gSessionData->mSessionXMLPath == NULL) { - logg->logError(__FILE__, __LINE__, "Missing -s command line option required for a local capture."); + logg->logError("Missing -s command line option required for a local capture."); handleException(); } if (optind < argc) { - logg->logError(__FILE__, __LINE__, "Unknown argument: %s. Use '-h' for help.", argv[optind]); + logg->logError("Unknown argument: %s. Use '-h' for help.", argv[optind]); handleException(); } return cmdline; } +static AnnotateListener annotateListener; + static void handleClient() { OlySocket client(sock->acceptConnection()); int pid = fork(); if (pid < 0) { // Error - logg->logError(__FILE__, __LINE__, "Fork process failed. Please power cycle the target device if this error persists."); + logg->logError("Fork process failed. Please power cycle the target device if this error persists."); } else if (pid == 0) { // Child sock->closeServerSocket(); udpListener.close(); monitor.close(); + annotateListener.close(); child = new Child(&client, numSessions + 1); child->run(); delete child; @@ -500,21 +522,23 @@ int main(int argc, char** argv) { struct cmdline_t cmdline = parseCommandLine(argc, argv); if (cmdline.update) { - return update(argv[0]); + update(argv[0]); + cmdline.update = false; + gSessionData->mAllowCommands = true; } // Verify root permissions uid_t euid = geteuid(); if (euid) { - logg->logError(__FILE__, __LINE__, "gatord must be launched with root privileges"); + logg->logError("gatord must be launched with root privileges"); handleException(); } // Call before setting up the SIGCHLD handler, as system() spawns child processes if (!setupFilesystem(cmdline.module)) { - logg->logMessage("Unable to setup gatorfs, trying perf"); + logg->logMessage("Unable to set up gatorfs, trying perf"); if (!gSessionData->perf.setup()) { - logg->logError(__FILE__, __LINE__, + logg->logError( "Unable to locate gator.ko driver:\n" " >>> gator.ko should be co-located with gatord in the same directory\n" " >>> OR insmod gator.ko prior to launching gatord\n" @@ -547,15 +571,23 @@ int main(int argc, char** argv) { child->run(); delete child; } else { - gSessionData->annotateListener.setup(); + annotateListener.setup(); + int pipefd[2]; + if (pipe_cloexec(pipefd) != 0) { + logg->logError("Unable to set up annotate pipe"); + handleException(); + } + gSessionData->mAnnotateStart = pipefd[1]; sock = new OlyServerSocket(cmdline.port); udpListener.setup(cmdline.port); if (!monitor.init() || !monitor.add(sock->getFd()) || !monitor.add(udpListener.getReq()) || - !monitor.add(gSessionData->annotateListener.getFd()) || + !monitor.add(annotateListener.getSockFd()) || + !monitor.add(annotateListener.getUdsFd()) || + !monitor.add(pipefd[0]) || false) { - logg->logError(__FILE__, __LINE__, "Monitor setup failed"); + logg->logError("Monitor setup failed"); handleException(); } // Forever loop, can be exited via a signal or exception @@ -564,7 +596,7 @@ int main(int argc, char** argv) { logg->logMessage("Waiting on connection..."); int ready = monitor.wait(events, ARRAY_LENGTH(events), -1); if (ready < 0) { - logg->logError(__FILE__, __LINE__, "Monitor::wait failed"); + logg->logError("Monitor::wait failed"); handleException(); } for (int i = 0; i < ready; ++i) { @@ -572,8 +604,16 @@ int main(int argc, char** argv) { handleClient(); } else if (events[i].data.fd == udpListener.getReq()) { udpListener.handle(); - } else if (events[i].data.fd == gSessionData->annotateListener.getFd()) { - gSessionData->annotateListener.handle(); + } else if (events[i].data.fd == annotateListener.getSockFd()) { + annotateListener.handleSock(); + } else if (events[i].data.fd == annotateListener.getUdsFd()) { + annotateListener.handleUds(); + } else if (events[i].data.fd == pipefd[0]) { + uint64_t val; + if (read(pipefd[0], &val, sizeof(val)) != sizeof(val)) { + logg->logMessage("Reading annotate pipe failed"); + } + annotateListener.signal(); } } } From 0c2fc7bf0a5466f059675633b6b179a118757a13 Mon Sep 17 00:00:00 2001 From: Jon Medhurst Date: Thu, 10 May 2012 17:35:03 +0100 Subject: [PATCH 153/277] gator: Add config for building the module in-tree Signed-off-by: Jon Medhurst --- drivers/Kconfig | 2 ++ drivers/Makefile | 2 ++ 2 files changed, 4 insertions(+) diff --git a/drivers/Kconfig b/drivers/Kconfig index 9953a42809ec..d27feb5460f3 100644 --- a/drivers/Kconfig +++ b/drivers/Kconfig @@ -166,4 +166,6 @@ source "drivers/ipack/Kconfig" source "drivers/reset/Kconfig" +source "drivers/gator/Kconfig" + endmenu diff --git a/drivers/Makefile b/drivers/Makefile index 130abc1dfd65..092a62e79688 100644 --- a/drivers/Makefile +++ b/drivers/Makefile @@ -152,3 +152,5 @@ obj-$(CONFIG_IIO) += iio/ obj-$(CONFIG_VME_BUS) += vme/ obj-$(CONFIG_IPACK_BUS) += ipack/ obj-$(CONFIG_NTB) += ntb/ + +obj-$(CONFIG_GATOR) += gator/ From 7ac3eafc7692a85e46388680ff06a90af49cbc0b Mon Sep 17 00:00:00 2001 From: Marlies Ruck Date: Thu, 16 May 2013 14:30:39 -0400 Subject: [PATCH 154/277] Staging: Fixes string split across lines in zram Fixes the following checkpatch warning in zram_drv.c: WARNING: quoted string split across lines Signed-off-by: Marlies Ruck Signed-off-by: Greg Kroah-Hartman (cherry picked from commit 596b3dd4c8e172db7806372c9d0347a4e7d28bc5) Signed-off-by: Alex Shi --- drivers/staging/zram/zram_drv.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/staging/zram/zram_drv.c b/drivers/staging/zram/zram_drv.c index a333d44d0cff..2652dfac0b32 100644 --- a/drivers/staging/zram/zram_drv.c +++ b/drivers/staging/zram/zram_drv.c @@ -302,8 +302,8 @@ static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec, u32 index, handle = zs_malloc(meta->mem_pool, clen); if (!handle) { - pr_info("Error allocating memory for compressed " - "page: %u, size=%zu\n", index, clen); + pr_info("Error allocating memory for compressed page: %u, size=%zu\n", + index, clen); ret = -ENOMEM; goto out; } From f02a1549c748f57f7dadce5d10b4a23796584a72 Mon Sep 17 00:00:00 2001 From: Jiang Liu Date: Fri, 7 Jun 2013 00:07:28 +0800 Subject: [PATCH 155/277] zram: simplify and optimize dev_to_zram() Simplify and optimize dev_to_zram() without walking the zram_devices array. Signed-off-by: Jiang Liu Signed-off-by: Greg Kroah-Hartman (cherry picked from commit 80de574dca050b734d8413a98a983fba3d06240b) Signed-off-by: Alex Shi --- drivers/staging/zram/zram_sysfs.c | 13 ++----------- 1 file changed, 2 insertions(+), 11 deletions(-) diff --git a/drivers/staging/zram/zram_sysfs.c b/drivers/staging/zram/zram_sysfs.c index dc76a3dba1b8..e239d9452726 100644 --- a/drivers/staging/zram/zram_sysfs.c +++ b/drivers/staging/zram/zram_sysfs.c @@ -30,18 +30,9 @@ static u64 zram_stat64_read(struct zram *zram, u64 *v) return val; } -static struct zram *dev_to_zram(struct device *dev) +static inline struct zram *dev_to_zram(struct device *dev) { - int i; - struct zram *zram = NULL; - - for (i = 0; i < zram_get_num_devices(); i++) { - zram = &zram_devices[i]; - if (disk_to_dev(zram->disk) == dev) - break; - } - - return zram; + return (struct zram *)dev_to_disk(dev)->private_data; } static ssize_t disksize_show(struct device *dev, From 72f7aaa7ded9c9be6d5ec8886ced1238909a1c7a Mon Sep 17 00:00:00 2001 From: Jiang Liu Date: Fri, 7 Jun 2013 00:07:29 +0800 Subject: [PATCH 156/277] zram: kill unused zram_get_num_devices() Now there's no caller of zram_get_num_devices(), so kill it. And change zram_devices to static because it's only used in zram_drv.c. Signed-off-by: Jiang Liu Signed-off-by: Greg Kroah-Hartman (cherry picked from commit 0f0e3ba346c8d8d2cb409b157df79805931a1c2c) Signed-off-by: Alex Shi --- drivers/staging/zram/zram_drv.c | 7 +------ drivers/staging/zram/zram_drv.h | 2 -- 2 files changed, 1 insertion(+), 8 deletions(-) diff --git a/drivers/staging/zram/zram_drv.c b/drivers/staging/zram/zram_drv.c index 2652dfac0b32..49f34b065181 100644 --- a/drivers/staging/zram/zram_drv.c +++ b/drivers/staging/zram/zram_drv.c @@ -37,7 +37,7 @@ /* Globals */ static int zram_major; -struct zram *zram_devices; +static struct zram *zram_devices; /* Module params (documentation at end) */ static unsigned int num_devices = 1; @@ -679,11 +679,6 @@ static void destroy_device(struct zram *zram) blk_cleanup_queue(zram->queue); } -unsigned int zram_get_num_devices(void) -{ - return num_devices; -} - static int __init zram_init(void) { int ret, dev_id; diff --git a/drivers/staging/zram/zram_drv.h b/drivers/staging/zram/zram_drv.h index d542eee81357..b3a315d1eb23 100644 --- a/drivers/staging/zram/zram_drv.h +++ b/drivers/staging/zram/zram_drv.h @@ -110,8 +110,6 @@ struct zram { struct zram_stats stats; }; -extern struct zram *zram_devices; -unsigned int zram_get_num_devices(void); #ifdef CONFIG_SYSFS extern struct attribute_group zram_disk_attr_group; #endif From 728260fce381aa3a2b10a35d7ab727f9761f58bd Mon Sep 17 00:00:00 2001 From: Jiang Liu Date: Fri, 7 Jun 2013 00:07:30 +0800 Subject: [PATCH 157/277] zram: optimize memory operations with clear_page()/copy_page() Some architectures provides architecture-specific, optimized version of clear_page()/copy_page(), which may have better performance than memset()/memcpy(). So use clear_page()/copy_page() to optimize zram performance if possible. Signed-off-by: Jiang Liu Signed-off-by: Greg Kroah-Hartman (cherry picked from commit 42e99bd975fdd24d2bf1a24ebb8b0b42bab8ba65) Signed-off-by: Alex Shi --- drivers/staging/zram/zram_drv.c | 27 ++++++++++++++++----------- 1 file changed, 16 insertions(+), 11 deletions(-) diff --git a/drivers/staging/zram/zram_drv.c b/drivers/staging/zram/zram_drv.c index 49f34b065181..18a89863ae53 100644 --- a/drivers/staging/zram/zram_drv.c +++ b/drivers/staging/zram/zram_drv.c @@ -128,23 +128,26 @@ static void zram_free_page(struct zram *zram, size_t index) meta->table[index].size = 0; } +static inline int is_partial_io(struct bio_vec *bvec) +{ + return bvec->bv_len != PAGE_SIZE; +} + static void handle_zero_page(struct bio_vec *bvec) { struct page *page = bvec->bv_page; void *user_mem; user_mem = kmap_atomic(page); - memset(user_mem + bvec->bv_offset, 0, bvec->bv_len); + if (is_partial_io(bvec)) + memset(user_mem + bvec->bv_offset, 0, bvec->bv_len); + else + clear_page(user_mem); kunmap_atomic(user_mem); flush_dcache_page(page); } -static inline int is_partial_io(struct bio_vec *bvec) -{ - return bvec->bv_len != PAGE_SIZE; -} - static int zram_decompress_page(struct zram *zram, char *mem, u32 index) { int ret = LZO_E_OK; @@ -154,13 +157,13 @@ static int zram_decompress_page(struct zram *zram, char *mem, u32 index) unsigned long handle = meta->table[index].handle; if (!handle || zram_test_flag(meta, index, ZRAM_ZERO)) { - memset(mem, 0, PAGE_SIZE); + clear_page(mem); return 0; } cmem = zs_map_object(meta->mem_pool, handle, ZS_MM_RO); if (meta->table[index].size == PAGE_SIZE) - memcpy(mem, cmem, PAGE_SIZE); + copy_page(mem, cmem); else ret = lzo1x_decompress_safe(cmem, meta->table[index].size, mem, &clen); @@ -309,11 +312,13 @@ static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec, u32 index, } cmem = zs_map_object(meta->mem_pool, handle, ZS_MM_WO); - if ((clen == PAGE_SIZE) && !is_partial_io(bvec)) + if ((clen == PAGE_SIZE) && !is_partial_io(bvec)) { src = kmap_atomic(page); - memcpy(cmem, src, clen); - if ((clen == PAGE_SIZE) && !is_partial_io(bvec)) + copy_page(cmem, src); kunmap_atomic(src); + } else { + memcpy(cmem, src, clen); + } zs_unmap_object(meta->mem_pool, handle); From dc18dd5cb1f638af5fc0328677b5d96328769ca3 Mon Sep 17 00:00:00 2001 From: Jiang Liu Date: Fri, 7 Jun 2013 00:07:31 +0800 Subject: [PATCH 158/277] zram: use atomic64_xxx() to replace zram_stat64_xxx() Use atomic64_xxx() to replace open-coded zram_stat64_xxx(). Some architectures have native support of atomic64 operations, so we can get rid of the spin_lock() in zram_stat64_xxx(). On the other hand, for platforms use generic version of atomic64 implement, it may cause an extra save/restore of the interrupt flag. So it's a tradeoff. Signed-off-by: Jiang Liu Signed-off-by: Greg Kroah-Hartman (cherry picked from commit da5cc7d338f97886ebf35be92995460289379b73) Signed-off-by: Alex Shi --- drivers/staging/zram/zram_drv.c | 37 +++++++------------------------ drivers/staging/zram/zram_drv.h | 19 +++++++++------- drivers/staging/zram/zram_sysfs.c | 21 +++++------------- 3 files changed, 24 insertions(+), 53 deletions(-) diff --git a/drivers/staging/zram/zram_drv.c b/drivers/staging/zram/zram_drv.c index 18a89863ae53..97899eac15df 100644 --- a/drivers/staging/zram/zram_drv.c +++ b/drivers/staging/zram/zram_drv.c @@ -42,25 +42,6 @@ static struct zram *zram_devices; /* Module params (documentation at end) */ static unsigned int num_devices = 1; -static void zram_stat64_add(struct zram *zram, u64 *v, u64 inc) -{ - spin_lock(&zram->stat64_lock); - *v = *v + inc; - spin_unlock(&zram->stat64_lock); -} - -static void zram_stat64_sub(struct zram *zram, u64 *v, u64 dec) -{ - spin_lock(&zram->stat64_lock); - *v = *v - dec; - spin_unlock(&zram->stat64_lock); -} - -static void zram_stat64_inc(struct zram *zram, u64 *v) -{ - zram_stat64_add(zram, v, 1); -} - static int zram_test_flag(struct zram_meta *meta, u32 index, enum zram_pageflags flag) { @@ -120,8 +101,7 @@ static void zram_free_page(struct zram *zram, size_t index) if (size <= PAGE_SIZE / 2) zram->stats.good_compress--; - zram_stat64_sub(zram, &zram->stats.compr_size, - meta->table[index].size); + atomic64_sub(meta->table[index].size, &zram->stats.compr_size); zram->stats.pages_stored--; meta->table[index].handle = 0; @@ -172,7 +152,7 @@ static int zram_decompress_page(struct zram *zram, char *mem, u32 index) /* Should NEVER happen. Return bio error if it does. */ if (unlikely(ret != LZO_E_OK)) { pr_err("Decompression failed! err=%d, page=%u\n", ret, index); - zram_stat64_inc(zram, &zram->stats.failed_reads); + atomic64_inc(&zram->stats.failed_reads); return ret; } @@ -326,7 +306,7 @@ static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec, u32 index, meta->table[index].size = clen; /* Update stats */ - zram_stat64_add(zram, &zram->stats.compr_size, clen); + atomic64_add(clen, &zram->stats.compr_size); zram->stats.pages_stored++; if (clen <= PAGE_SIZE / 2) zram->stats.good_compress++; @@ -336,7 +316,7 @@ static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec, u32 index, kfree(uncmem); if (ret) - zram_stat64_inc(zram, &zram->stats.failed_writes); + atomic64_inc(&zram->stats.failed_writes); return ret; } @@ -373,10 +353,10 @@ static void __zram_make_request(struct zram *zram, struct bio *bio, int rw) switch (rw) { case READ: - zram_stat64_inc(zram, &zram->stats.num_reads); + atomic64_inc(&zram->stats.num_reads); break; case WRITE: - zram_stat64_inc(zram, &zram->stats.num_writes); + atomic64_inc(&zram->stats.num_writes); break; } @@ -456,7 +436,7 @@ static void zram_make_request(struct request_queue *queue, struct bio *bio) goto error; if (!valid_io_request(zram, bio)) { - zram_stat64_inc(zram, &zram->stats.invalid_io); + atomic64_inc(&zram->stats.invalid_io); goto error; } @@ -595,7 +575,7 @@ static void zram_slot_free_notify(struct block_device *bdev, down_write(&zram->lock); zram_free_page(zram, index); up_write(&zram->lock); - zram_stat64_inc(zram, &zram->stats.notify_free); + atomic64_inc(&zram->stats.notify_free); } static const struct block_device_operations zram_devops = { @@ -609,7 +589,6 @@ static int create_device(struct zram *zram, int device_id) init_rwsem(&zram->lock); init_rwsem(&zram->init_lock); - spin_lock_init(&zram->stat64_lock); zram->queue = blk_alloc_queue(GFP_KERNEL); if (!zram->queue) { diff --git a/drivers/staging/zram/zram_drv.h b/drivers/staging/zram/zram_drv.h index b3a315d1eb23..11b09fc25953 100644 --- a/drivers/staging/zram/zram_drv.h +++ b/drivers/staging/zram/zram_drv.h @@ -69,14 +69,18 @@ struct table { u8 flags; } __aligned(4); +/* + * All 64bit fields should only be manipulated by 64bit atomic accessors. + * All modifications to 32bit counter should be protected by zram->lock. + */ struct zram_stats { - u64 compr_size; /* compressed size of pages stored */ - u64 num_reads; /* failed + successful */ - u64 num_writes; /* --do-- */ - u64 failed_reads; /* should NEVER! happen */ - u64 failed_writes; /* can happen when memory is too low */ - u64 invalid_io; /* non-page-aligned I/O requests */ - u64 notify_free; /* no. of swap slot free notifications */ + atomic64_t compr_size; /* compressed size of pages stored */ + atomic64_t num_reads; /* failed + successful */ + atomic64_t num_writes; /* --do-- */ + atomic64_t failed_reads; /* should NEVER! happen */ + atomic64_t failed_writes; /* can happen when memory is too low */ + atomic64_t invalid_io; /* non-page-aligned I/O requests */ + atomic64_t notify_free; /* no. of swap slot free notifications */ u32 pages_zero; /* no. of zero filled pages */ u32 pages_stored; /* no. of pages currently stored */ u32 good_compress; /* % of pages with compression ratio<=50% */ @@ -92,7 +96,6 @@ struct zram_meta { struct zram { struct zram_meta *meta; - spinlock_t stat64_lock; /* protect 64-bit stats */ struct rw_semaphore lock; /* protect compression buffers, table, * 32bit stat counters against concurrent * notifications, reads and writes */ diff --git a/drivers/staging/zram/zram_sysfs.c b/drivers/staging/zram/zram_sysfs.c index e239d9452726..93a2f9cafd7c 100644 --- a/drivers/staging/zram/zram_sysfs.c +++ b/drivers/staging/zram/zram_sysfs.c @@ -19,17 +19,6 @@ #include "zram_drv.h" -static u64 zram_stat64_read(struct zram *zram, u64 *v) -{ - u64 val; - - spin_lock(&zram->stat64_lock); - val = *v; - spin_unlock(&zram->stat64_lock); - - return val; -} - static inline struct zram *dev_to_zram(struct device *dev) { return (struct zram *)dev_to_disk(dev)->private_data; @@ -116,7 +105,7 @@ static ssize_t num_reads_show(struct device *dev, struct zram *zram = dev_to_zram(dev); return sprintf(buf, "%llu\n", - zram_stat64_read(zram, &zram->stats.num_reads)); + (u64)atomic64_read(&zram->stats.num_reads)); } static ssize_t num_writes_show(struct device *dev, @@ -125,7 +114,7 @@ static ssize_t num_writes_show(struct device *dev, struct zram *zram = dev_to_zram(dev); return sprintf(buf, "%llu\n", - zram_stat64_read(zram, &zram->stats.num_writes)); + (u64)atomic64_read(&zram->stats.num_writes)); } static ssize_t invalid_io_show(struct device *dev, @@ -134,7 +123,7 @@ static ssize_t invalid_io_show(struct device *dev, struct zram *zram = dev_to_zram(dev); return sprintf(buf, "%llu\n", - zram_stat64_read(zram, &zram->stats.invalid_io)); + (u64)atomic64_read(&zram->stats.invalid_io)); } static ssize_t notify_free_show(struct device *dev, @@ -143,7 +132,7 @@ static ssize_t notify_free_show(struct device *dev, struct zram *zram = dev_to_zram(dev); return sprintf(buf, "%llu\n", - zram_stat64_read(zram, &zram->stats.notify_free)); + (u64)atomic64_read(&zram->stats.notify_free)); } static ssize_t zero_pages_show(struct device *dev, @@ -169,7 +158,7 @@ static ssize_t compr_data_size_show(struct device *dev, struct zram *zram = dev_to_zram(dev); return sprintf(buf, "%llu\n", - zram_stat64_read(zram, &zram->stats.compr_size)); + (u64)atomic64_read(&zram->stats.compr_size)); } static ssize_t mem_used_total_show(struct device *dev, From c5544682efcda470167318ac56559f023e578b09 Mon Sep 17 00:00:00 2001 From: Sergey Senozhatsky Date: Sat, 22 Jun 2013 03:21:18 +0300 Subject: [PATCH 159/277] zram: remove zram_sysfs file (v2) Move zram sysfs code to zram drv and remove zram_sysfs.c file. This gives ability to make static a number of previously exported zram functions, used from zram sysfs, e.g. internal zram zram_meta_alloc/free(). We also can drop zram_drv wrapper functions, used from zram sysfs: e.g. zram_reset_device()/__zram_reset_device() pair. v2: as suggested by Greg K-H, move MODULE description to the bottom of the file. Signed-off-by: Sergey Senozhatsky Signed-off-by: Greg Kroah-Hartman (cherry picked from commit 9b3bb7abcdf2df0f1b2657e6cbc9d06bc2b3b36f) Signed-off-by: Alex Shi --- drivers/staging/zram/Makefile | 2 +- drivers/staging/zram/zram_drv.c | 516 ++++++++++++++++++++---------- drivers/staging/zram/zram_drv.h | 10 - drivers/staging/zram/zram_sysfs.c | 209 ------------ 4 files changed, 350 insertions(+), 387 deletions(-) delete mode 100644 drivers/staging/zram/zram_sysfs.c diff --git a/drivers/staging/zram/Makefile b/drivers/staging/zram/Makefile index 7f4a3019e9c4..cb0f9ced6a93 100644 --- a/drivers/staging/zram/Makefile +++ b/drivers/staging/zram/Makefile @@ -1,3 +1,3 @@ -zram-y := zram_drv.o zram_sysfs.o +zram-y := zram_drv.o obj-$(CONFIG_ZRAM) += zram.o diff --git a/drivers/staging/zram/zram_drv.c b/drivers/staging/zram/zram_drv.c index 97899eac15df..753877431b5f 100644 --- a/drivers/staging/zram/zram_drv.c +++ b/drivers/staging/zram/zram_drv.c @@ -42,6 +42,104 @@ static struct zram *zram_devices; /* Module params (documentation at end) */ static unsigned int num_devices = 1; +static inline struct zram *dev_to_zram(struct device *dev) +{ + return (struct zram *)dev_to_disk(dev)->private_data; +} + +static ssize_t disksize_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct zram *zram = dev_to_zram(dev); + + return sprintf(buf, "%llu\n", zram->disksize); +} + +static ssize_t initstate_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct zram *zram = dev_to_zram(dev); + + return sprintf(buf, "%u\n", zram->init_done); +} + +static ssize_t num_reads_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct zram *zram = dev_to_zram(dev); + + return sprintf(buf, "%llu\n", + (u64)atomic64_read(&zram->stats.num_reads)); +} + +static ssize_t num_writes_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct zram *zram = dev_to_zram(dev); + + return sprintf(buf, "%llu\n", + (u64)atomic64_read(&zram->stats.num_writes)); +} + +static ssize_t invalid_io_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct zram *zram = dev_to_zram(dev); + + return sprintf(buf, "%llu\n", + (u64)atomic64_read(&zram->stats.invalid_io)); +} + +static ssize_t notify_free_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct zram *zram = dev_to_zram(dev); + + return sprintf(buf, "%llu\n", + (u64)atomic64_read(&zram->stats.notify_free)); +} + +static ssize_t zero_pages_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct zram *zram = dev_to_zram(dev); + + return sprintf(buf, "%u\n", zram->stats.pages_zero); +} + +static ssize_t orig_data_size_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct zram *zram = dev_to_zram(dev); + + return sprintf(buf, "%llu\n", + (u64)(zram->stats.pages_stored) << PAGE_SHIFT); +} + +static ssize_t compr_data_size_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct zram *zram = dev_to_zram(dev); + + return sprintf(buf, "%llu\n", + (u64)atomic64_read(&zram->stats.compr_size)); +} + +static ssize_t mem_used_total_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + u64 val = 0; + struct zram *zram = dev_to_zram(dev); + struct zram_meta *meta = zram->meta; + + down_read(&zram->init_lock); + if (zram->init_done) + val = zs_get_total_size_bytes(meta->mem_pool); + up_read(&zram->init_lock); + + return sprintf(buf, "%llu\n", val); +} + static int zram_test_flag(struct zram_meta *meta, u32 index, enum zram_pageflags flag) { @@ -60,6 +158,97 @@ static void zram_clear_flag(struct zram_meta *meta, u32 index, meta->table[index].flags &= ~BIT(flag); } +static inline int is_partial_io(struct bio_vec *bvec) +{ + return bvec->bv_len != PAGE_SIZE; +} + +/* + * Check if request is within bounds and aligned on zram logical blocks. + */ +static inline int valid_io_request(struct zram *zram, struct bio *bio) +{ + u64 start, end, bound; + + /* unaligned request */ + if (unlikely(bio->bi_sector & (ZRAM_SECTOR_PER_LOGICAL_BLOCK - 1))) + return 0; + if (unlikely(bio->bi_size & (ZRAM_LOGICAL_BLOCK_SIZE - 1))) + return 0; + + start = bio->bi_sector; + end = start + (bio->bi_size >> SECTOR_SHIFT); + bound = zram->disksize >> SECTOR_SHIFT; + /* out of range range */ + if (unlikely(start >= bound || end >= bound || start > end)) + return 0; + + /* I/O request is valid */ + return 1; +} + +static void zram_meta_free(struct zram_meta *meta) +{ + zs_destroy_pool(meta->mem_pool); + kfree(meta->compress_workmem); + free_pages((unsigned long)meta->compress_buffer, 1); + vfree(meta->table); + kfree(meta); +} + +static struct zram_meta *zram_meta_alloc(u64 disksize) +{ + size_t num_pages; + struct zram_meta *meta = kmalloc(sizeof(*meta), GFP_KERNEL); + if (!meta) + goto out; + + meta->compress_workmem = kzalloc(LZO1X_MEM_COMPRESS, GFP_KERNEL); + if (!meta->compress_workmem) + goto free_meta; + + meta->compress_buffer = + (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, 1); + if (!meta->compress_buffer) { + pr_err("Error allocating compressor buffer space\n"); + goto free_workmem; + } + + num_pages = disksize >> PAGE_SHIFT; + meta->table = vzalloc(num_pages * sizeof(*meta->table)); + if (!meta->table) { + pr_err("Error allocating zram address table\n"); + goto free_buffer; + } + + meta->mem_pool = zs_create_pool(GFP_NOIO | __GFP_HIGHMEM); + if (!meta->mem_pool) { + pr_err("Error creating memory pool\n"); + goto free_table; + } + + return meta; + +free_table: + vfree(meta->table); +free_buffer: + free_pages((unsigned long)meta->compress_buffer, 1); +free_workmem: + kfree(meta->compress_workmem); +free_meta: + kfree(meta); + meta = NULL; +out: + return meta; +} + +static void update_position(u32 *index, int *offset, struct bio_vec *bvec) +{ + if (*offset + bvec->bv_len >= PAGE_SIZE) + (*index)++; + *offset = (*offset + bvec->bv_len) % PAGE_SIZE; +} + static int page_zero_filled(void *ptr) { unsigned int pos; @@ -75,6 +264,21 @@ static int page_zero_filled(void *ptr) return 1; } +static void handle_zero_page(struct bio_vec *bvec) +{ + struct page *page = bvec->bv_page; + void *user_mem; + + user_mem = kmap_atomic(page); + if (is_partial_io(bvec)) + memset(user_mem + bvec->bv_offset, 0, bvec->bv_len); + else + clear_page(user_mem); + kunmap_atomic(user_mem); + + flush_dcache_page(page); +} + static void zram_free_page(struct zram *zram, size_t index) { struct zram_meta *meta = zram->meta; @@ -108,26 +312,6 @@ static void zram_free_page(struct zram *zram, size_t index) meta->table[index].size = 0; } -static inline int is_partial_io(struct bio_vec *bvec) -{ - return bvec->bv_len != PAGE_SIZE; -} - -static void handle_zero_page(struct bio_vec *bvec) -{ - struct page *page = bvec->bv_page; - void *user_mem; - - user_mem = kmap_atomic(page); - if (is_partial_io(bvec)) - memset(user_mem + bvec->bv_offset, 0, bvec->bv_len); - else - clear_page(user_mem); - kunmap_atomic(user_mem); - - flush_dcache_page(page); -} - static int zram_decompress_page(struct zram *zram, char *mem, u32 index) { int ret = LZO_E_OK; @@ -338,11 +522,117 @@ static int zram_bvec_rw(struct zram *zram, struct bio_vec *bvec, u32 index, return ret; } -static void update_position(u32 *index, int *offset, struct bio_vec *bvec) +static void zram_reset_device(struct zram *zram) { - if (*offset + bvec->bv_len >= PAGE_SIZE) - (*index)++; - *offset = (*offset + bvec->bv_len) % PAGE_SIZE; + size_t index; + struct zram_meta *meta; + + if (!zram->init_done) + return; + + meta = zram->meta; + zram->init_done = 0; + + /* Free all pages that are still in this zram device */ + for (index = 0; index < zram->disksize >> PAGE_SHIFT; index++) { + unsigned long handle = meta->table[index].handle; + if (!handle) + continue; + + zs_free(meta->mem_pool, handle); + } + + zram_meta_free(zram->meta); + zram->meta = NULL; + /* Reset stats */ + memset(&zram->stats, 0, sizeof(zram->stats)); + + zram->disksize = 0; + set_capacity(zram->disk, 0); +} + +static void zram_init_device(struct zram *zram, struct zram_meta *meta) +{ + if (zram->disksize > 2 * (totalram_pages << PAGE_SHIFT)) { + pr_info( + "There is little point creating a zram of greater than " + "twice the size of memory since we expect a 2:1 compression " + "ratio. Note that zram uses about 0.1%% of the size of " + "the disk when not in use so a huge zram is " + "wasteful.\n" + "\tMemory Size: %lu kB\n" + "\tSize you selected: %llu kB\n" + "Continuing anyway ...\n", + (totalram_pages << PAGE_SHIFT) >> 10, zram->disksize >> 10 + ); + } + + /* zram devices sort of resembles non-rotational disks */ + queue_flag_set_unlocked(QUEUE_FLAG_NONROT, zram->disk->queue); + + zram->meta = meta; + zram->init_done = 1; + + pr_debug("Initialization done!\n"); +} + +static ssize_t disksize_store(struct device *dev, + struct device_attribute *attr, const char *buf, size_t len) +{ + u64 disksize; + struct zram_meta *meta; + struct zram *zram = dev_to_zram(dev); + + disksize = memparse(buf, NULL); + if (!disksize) + return -EINVAL; + + disksize = PAGE_ALIGN(disksize); + meta = zram_meta_alloc(disksize); + down_write(&zram->init_lock); + if (zram->init_done) { + up_write(&zram->init_lock); + zram_meta_free(meta); + pr_info("Cannot change disksize for initialized device\n"); + return -EBUSY; + } + + zram->disksize = disksize; + set_capacity(zram->disk, zram->disksize >> SECTOR_SHIFT); + zram_init_device(zram, meta); + up_write(&zram->init_lock); + + return len; +} + +static ssize_t reset_store(struct device *dev, + struct device_attribute *attr, const char *buf, size_t len) +{ + int ret; + unsigned short do_reset; + struct zram *zram; + struct block_device *bdev; + + zram = dev_to_zram(dev); + bdev = bdget_disk(zram->disk, 0); + + /* Do not reset an active device! */ + if (bdev->bd_holders) + return -EBUSY; + + ret = kstrtou16(buf, 10, &do_reset); + if (ret) + return ret; + + if (!do_reset) + return -EINVAL; + + /* Make sure all pending I/O is finished */ + if (bdev) + fsync_bdev(bdev); + + zram_reset_device(zram); + return len; } static void __zram_make_request(struct zram *zram, struct bio *bio, int rw) @@ -400,30 +690,6 @@ static void __zram_make_request(struct zram *zram, struct bio *bio, int rw) bio_io_error(bio); } -/* - * Check if request is within bounds and aligned on zram logical blocks. - */ -static inline int valid_io_request(struct zram *zram, struct bio *bio) -{ - u64 start, end, bound; - - /* unaligned request */ - if (unlikely(bio->bi_sector & (ZRAM_SECTOR_PER_LOGICAL_BLOCK - 1))) - return 0; - if (unlikely(bio->bi_size & (ZRAM_LOGICAL_BLOCK_SIZE - 1))) - return 0; - - start = bio->bi_sector; - end = start + (bio->bi_size >> SECTOR_SHIFT); - bound = zram->disksize >> SECTOR_SHIFT; - /* out of range range */ - if (unlikely(start >= bound || end > bound || start > end)) - return 0; - - /* I/O request is valid */ - return 1; -} - /* * Handler function for all zram I/O requests. */ @@ -450,122 +716,6 @@ static void zram_make_request(struct request_queue *queue, struct bio *bio) bio_io_error(bio); } -static void __zram_reset_device(struct zram *zram) -{ - size_t index; - struct zram_meta *meta; - - if (!zram->init_done) - return; - - meta = zram->meta; - zram->init_done = 0; - - /* Free all pages that are still in this zram device */ - for (index = 0; index < zram->disksize >> PAGE_SHIFT; index++) { - unsigned long handle = meta->table[index].handle; - if (!handle) - continue; - - zs_free(meta->mem_pool, handle); - } - - zram_meta_free(zram->meta); - zram->meta = NULL; - /* Reset stats */ - memset(&zram->stats, 0, sizeof(zram->stats)); - - zram->disksize = 0; - set_capacity(zram->disk, 0); -} - -void zram_reset_device(struct zram *zram) -{ - down_write(&zram->init_lock); - __zram_reset_device(zram); - up_write(&zram->init_lock); -} - -void zram_meta_free(struct zram_meta *meta) -{ - zs_destroy_pool(meta->mem_pool); - kfree(meta->compress_workmem); - free_pages((unsigned long)meta->compress_buffer, 1); - vfree(meta->table); - kfree(meta); -} - -struct zram_meta *zram_meta_alloc(u64 disksize) -{ - size_t num_pages; - struct zram_meta *meta = kmalloc(sizeof(*meta), GFP_KERNEL); - if (!meta) - goto out; - - meta->compress_workmem = kzalloc(LZO1X_MEM_COMPRESS, GFP_KERNEL); - if (!meta->compress_workmem) - goto free_meta; - - meta->compress_buffer = - (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, 1); - if (!meta->compress_buffer) { - pr_err("Error allocating compressor buffer space\n"); - goto free_workmem; - } - - num_pages = disksize >> PAGE_SHIFT; - meta->table = vzalloc(num_pages * sizeof(*meta->table)); - if (!meta->table) { - pr_err("Error allocating zram address table\n"); - goto free_buffer; - } - - meta->mem_pool = zs_create_pool(GFP_NOIO | __GFP_HIGHMEM); - if (!meta->mem_pool) { - pr_err("Error creating memory pool\n"); - goto free_table; - } - - return meta; - -free_table: - vfree(meta->table); -free_buffer: - free_pages((unsigned long)meta->compress_buffer, 1); -free_workmem: - kfree(meta->compress_workmem); -free_meta: - kfree(meta); - meta = NULL; -out: - return meta; -} - -void zram_init_device(struct zram *zram, struct zram_meta *meta) -{ - if (zram->disksize > 2 * (totalram_pages << PAGE_SHIFT)) { - pr_info( - "There is little point creating a zram of greater than " - "twice the size of memory since we expect a 2:1 compression " - "ratio. Note that zram uses about 0.1%% of the size of " - "the disk when not in use so a huge zram is " - "wasteful.\n" - "\tMemory Size: %lu kB\n" - "\tSize you selected: %llu kB\n" - "Continuing anyway ...\n", - (totalram_pages << PAGE_SHIFT) >> 10, zram->disksize >> 10 - ); - } - - /* zram devices sort of resembles non-rotational disks */ - queue_flag_set_unlocked(QUEUE_FLAG_NONROT, zram->disk->queue); - - zram->meta = meta; - zram->init_done = 1; - - pr_debug("Initialization done!\n"); -} - static void zram_slot_free_notify(struct block_device *bdev, unsigned long index) { @@ -583,6 +733,38 @@ static const struct block_device_operations zram_devops = { .owner = THIS_MODULE }; +static DEVICE_ATTR(disksize, S_IRUGO | S_IWUSR, + disksize_show, disksize_store); +static DEVICE_ATTR(initstate, S_IRUGO, initstate_show, NULL); +static DEVICE_ATTR(reset, S_IWUSR, NULL, reset_store); +static DEVICE_ATTR(num_reads, S_IRUGO, num_reads_show, NULL); +static DEVICE_ATTR(num_writes, S_IRUGO, num_writes_show, NULL); +static DEVICE_ATTR(invalid_io, S_IRUGO, invalid_io_show, NULL); +static DEVICE_ATTR(notify_free, S_IRUGO, notify_free_show, NULL); +static DEVICE_ATTR(zero_pages, S_IRUGO, zero_pages_show, NULL); +static DEVICE_ATTR(orig_data_size, S_IRUGO, orig_data_size_show, NULL); +static DEVICE_ATTR(compr_data_size, S_IRUGO, compr_data_size_show, NULL); +static DEVICE_ATTR(mem_used_total, S_IRUGO, mem_used_total_show, NULL); + +static struct attribute *zram_disk_attrs[] = { + &dev_attr_disksize.attr, + &dev_attr_initstate.attr, + &dev_attr_reset.attr, + &dev_attr_num_reads.attr, + &dev_attr_num_writes.attr, + &dev_attr_invalid_io.attr, + &dev_attr_notify_free.attr, + &dev_attr_zero_pages.attr, + &dev_attr_orig_data_size.attr, + &dev_attr_compr_data_size.attr, + &dev_attr_mem_used_total.attr, + NULL, +}; + +static struct attribute_group zram_disk_attr_group = { + .attrs = zram_disk_attrs, +}; + static int create_device(struct zram *zram, int device_id) { int ret = -ENOMEM; @@ -728,12 +910,12 @@ static void __exit zram_exit(void) pr_debug("Cleanup done!\n"); } -module_param(num_devices, uint, 0); -MODULE_PARM_DESC(num_devices, "Number of zram devices"); - module_init(zram_init); module_exit(zram_exit); +module_param(num_devices, uint, 0); +MODULE_PARM_DESC(num_devices, "Number of zram devices"); + MODULE_LICENSE("Dual BSD/GPL"); MODULE_AUTHOR("Nitin Gupta "); MODULE_DESCRIPTION("Compressed RAM Block Device"); diff --git a/drivers/staging/zram/zram_drv.h b/drivers/staging/zram/zram_drv.h index 11b09fc25953..9e57bfb29b4f 100644 --- a/drivers/staging/zram/zram_drv.h +++ b/drivers/staging/zram/zram_drv.h @@ -112,14 +112,4 @@ struct zram { struct zram_stats stats; }; - -#ifdef CONFIG_SYSFS -extern struct attribute_group zram_disk_attr_group; -#endif - -extern void zram_reset_device(struct zram *zram); -extern struct zram_meta *zram_meta_alloc(u64 disksize); -extern void zram_meta_free(struct zram_meta *meta); -extern void zram_init_device(struct zram *zram, struct zram_meta *meta); - #endif diff --git a/drivers/staging/zram/zram_sysfs.c b/drivers/staging/zram/zram_sysfs.c deleted file mode 100644 index 93a2f9cafd7c..000000000000 --- a/drivers/staging/zram/zram_sysfs.c +++ /dev/null @@ -1,209 +0,0 @@ -/* - * Compressed RAM block device - * - * Copyright (C) 2008, 2009, 2010 Nitin Gupta - * - * This code is released using a dual license strategy: BSD/GPL - * You can choose the licence that better fits your requirements. - * - * Released under the terms of 3-clause BSD License - * Released under the terms of GNU General Public License Version 2.0 - * - * Project home: http://compcache.googlecode.com/ - */ - -#include -#include -#include -#include - -#include "zram_drv.h" - -static inline struct zram *dev_to_zram(struct device *dev) -{ - return (struct zram *)dev_to_disk(dev)->private_data; -} - -static ssize_t disksize_show(struct device *dev, - struct device_attribute *attr, char *buf) -{ - struct zram *zram = dev_to_zram(dev); - - return sprintf(buf, "%llu\n", zram->disksize); -} - -static ssize_t disksize_store(struct device *dev, - struct device_attribute *attr, const char *buf, size_t len) -{ - u64 disksize; - struct zram_meta *meta; - struct zram *zram = dev_to_zram(dev); - - disksize = memparse(buf, NULL); - if (!disksize) - return -EINVAL; - - disksize = PAGE_ALIGN(disksize); - meta = zram_meta_alloc(disksize); - down_write(&zram->init_lock); - if (zram->init_done) { - up_write(&zram->init_lock); - zram_meta_free(meta); - pr_info("Cannot change disksize for initialized device\n"); - return -EBUSY; - } - - zram->disksize = disksize; - set_capacity(zram->disk, zram->disksize >> SECTOR_SHIFT); - zram_init_device(zram, meta); - up_write(&zram->init_lock); - - return len; -} - -static ssize_t initstate_show(struct device *dev, - struct device_attribute *attr, char *buf) -{ - struct zram *zram = dev_to_zram(dev); - - return sprintf(buf, "%u\n", zram->init_done); -} - -static ssize_t reset_store(struct device *dev, - struct device_attribute *attr, const char *buf, size_t len) -{ - int ret; - unsigned short do_reset; - struct zram *zram; - struct block_device *bdev; - - zram = dev_to_zram(dev); - bdev = bdget_disk(zram->disk, 0); - - /* Do not reset an active device! */ - if (bdev->bd_holders) - return -EBUSY; - - ret = kstrtou16(buf, 10, &do_reset); - if (ret) - return ret; - - if (!do_reset) - return -EINVAL; - - /* Make sure all pending I/O is finished */ - if (bdev) - fsync_bdev(bdev); - - zram_reset_device(zram); - return len; -} - -static ssize_t num_reads_show(struct device *dev, - struct device_attribute *attr, char *buf) -{ - struct zram *zram = dev_to_zram(dev); - - return sprintf(buf, "%llu\n", - (u64)atomic64_read(&zram->stats.num_reads)); -} - -static ssize_t num_writes_show(struct device *dev, - struct device_attribute *attr, char *buf) -{ - struct zram *zram = dev_to_zram(dev); - - return sprintf(buf, "%llu\n", - (u64)atomic64_read(&zram->stats.num_writes)); -} - -static ssize_t invalid_io_show(struct device *dev, - struct device_attribute *attr, char *buf) -{ - struct zram *zram = dev_to_zram(dev); - - return sprintf(buf, "%llu\n", - (u64)atomic64_read(&zram->stats.invalid_io)); -} - -static ssize_t notify_free_show(struct device *dev, - struct device_attribute *attr, char *buf) -{ - struct zram *zram = dev_to_zram(dev); - - return sprintf(buf, "%llu\n", - (u64)atomic64_read(&zram->stats.notify_free)); -} - -static ssize_t zero_pages_show(struct device *dev, - struct device_attribute *attr, char *buf) -{ - struct zram *zram = dev_to_zram(dev); - - return sprintf(buf, "%u\n", zram->stats.pages_zero); -} - -static ssize_t orig_data_size_show(struct device *dev, - struct device_attribute *attr, char *buf) -{ - struct zram *zram = dev_to_zram(dev); - - return sprintf(buf, "%llu\n", - (u64)(zram->stats.pages_stored) << PAGE_SHIFT); -} - -static ssize_t compr_data_size_show(struct device *dev, - struct device_attribute *attr, char *buf) -{ - struct zram *zram = dev_to_zram(dev); - - return sprintf(buf, "%llu\n", - (u64)atomic64_read(&zram->stats.compr_size)); -} - -static ssize_t mem_used_total_show(struct device *dev, - struct device_attribute *attr, char *buf) -{ - u64 val = 0; - struct zram *zram = dev_to_zram(dev); - struct zram_meta *meta = zram->meta; - - down_read(&zram->init_lock); - if (zram->init_done) - val = zs_get_total_size_bytes(meta->mem_pool); - up_read(&zram->init_lock); - - return sprintf(buf, "%llu\n", val); -} - -static DEVICE_ATTR(disksize, S_IRUGO | S_IWUSR, - disksize_show, disksize_store); -static DEVICE_ATTR(initstate, S_IRUGO, initstate_show, NULL); -static DEVICE_ATTR(reset, S_IWUSR, NULL, reset_store); -static DEVICE_ATTR(num_reads, S_IRUGO, num_reads_show, NULL); -static DEVICE_ATTR(num_writes, S_IRUGO, num_writes_show, NULL); -static DEVICE_ATTR(invalid_io, S_IRUGO, invalid_io_show, NULL); -static DEVICE_ATTR(notify_free, S_IRUGO, notify_free_show, NULL); -static DEVICE_ATTR(zero_pages, S_IRUGO, zero_pages_show, NULL); -static DEVICE_ATTR(orig_data_size, S_IRUGO, orig_data_size_show, NULL); -static DEVICE_ATTR(compr_data_size, S_IRUGO, compr_data_size_show, NULL); -static DEVICE_ATTR(mem_used_total, S_IRUGO, mem_used_total_show, NULL); - -static struct attribute *zram_disk_attrs[] = { - &dev_attr_disksize.attr, - &dev_attr_initstate.attr, - &dev_attr_reset.attr, - &dev_attr_num_reads.attr, - &dev_attr_num_writes.attr, - &dev_attr_invalid_io.attr, - &dev_attr_notify_free.attr, - &dev_attr_zero_pages.attr, - &dev_attr_orig_data_size.attr, - &dev_attr_compr_data_size.attr, - &dev_attr_mem_used_total.attr, - NULL, -}; - -struct attribute_group zram_disk_attr_group = { - .attrs = zram_disk_attrs, -}; From 687f091b39ef238b975b6ad4e4e282ee9aade598 Mon Sep 17 00:00:00 2001 From: Sergey Senozhatsky Date: Wed, 26 Jun 2013 15:28:39 +0300 Subject: [PATCH 160/277] staging: zram: protect zram_reset_device() call Commit 9b3bb7abcdf2df0f1b2657e6cbc9d06bc2b3b36f (remove zram_sysfs file (v2)) accidentally made zram_reset_device() racy. Protect zram_reset_device() call with zram->lock. Signed-off-by: Sergey Senozhatsky Acked-by: Jerome Marchand Signed-off-by: Greg Kroah-Hartman (cherry picked from commit 644d478793c6594277f8ae76954da4ace7ac6f96) Signed-off-by: Alex Shi --- drivers/staging/zram/zram_drv.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/staging/zram/zram_drv.c b/drivers/staging/zram/zram_drv.c index 753877431b5f..c549e3940bcf 100644 --- a/drivers/staging/zram/zram_drv.c +++ b/drivers/staging/zram/zram_drv.c @@ -527,8 +527,11 @@ static void zram_reset_device(struct zram *zram) size_t index; struct zram_meta *meta; - if (!zram->init_done) + down_write(&zram->init_lock); + if (!zram->init_done) { + up_write(&zram->init_lock); return; + } meta = zram->meta; zram->init_done = 0; @@ -549,6 +552,7 @@ static void zram_reset_device(struct zram *zram) zram->disksize = 0; set_capacity(zram->disk, 0); + up_write(&zram->init_lock); } static void zram_init_device(struct zram *zram, struct zram_meta *meta) From 068e927e51265b16110951101506d112ef38fb36 Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Fri, 12 Jul 2013 14:20:52 -0400 Subject: [PATCH 161/277] staging: zram: Add auto loading of module if user opens /dev/zram. Greg spotted that said driver is not subscribing to the automagic mechanism of auto-loading if a user tries to open /dev/zram. This fixes it. CC: Minchan Kim Signed-off-by: Konrad Rzeszutek Wilk Signed-off-by: Greg Kroah-Hartman (cherry picked from commit c70bda992c12e593e411c02a52e4bd6985407539) Signed-off-by: Alex Shi --- drivers/staging/zram/zram_drv.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/staging/zram/zram_drv.c b/drivers/staging/zram/zram_drv.c index c549e3940bcf..77f40a7a6726 100644 --- a/drivers/staging/zram/zram_drv.c +++ b/drivers/staging/zram/zram_drv.c @@ -923,3 +923,4 @@ MODULE_PARM_DESC(num_devices, "Number of zram devices"); MODULE_LICENSE("Dual BSD/GPL"); MODULE_AUTHOR("Nitin Gupta "); MODULE_DESCRIPTION("Compressed RAM Block Device"); +MODULE_ALIAS("devname:zram"); From 1da6589d327fdfbb2c91551ea9047e598b4115f5 Mon Sep 17 00:00:00 2001 From: Sunghan Suh Date: Wed, 3 Jul 2013 20:10:05 +0900 Subject: [PATCH 162/277] zram: prevent data loss in error cases of function zram_bvec_write() In function zram_bvec_write(), previous data at the index is already freed by function zram_free_page(). When failed to compress or zs_malloc, there is no way to restore old data. Therefore, free previous data when it's about to update. Also, no need to check whether table is not empty outside of function zram_free_page(), because the function properly checks inside. Signed-off-by: Sunghan Suh Signed-off-by: Greg Kroah-Hartman (cherry picked from commit f40ac2ae1b506484dd9261a24bbf3e86b2206ff8) Signed-off-by: Alex Shi --- drivers/staging/zram/zram_drv.c | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/drivers/staging/zram/zram_drv.c b/drivers/staging/zram/zram_drv.c index 77f40a7a6726..84df3999d6af 100644 --- a/drivers/staging/zram/zram_drv.c +++ b/drivers/staging/zram/zram_drv.c @@ -418,14 +418,6 @@ static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec, u32 index, goto out; } - /* - * System overwrites unused sectors. Free memory associated - * with this sector now. - */ - if (meta->table[index].handle || - zram_test_flag(meta, index, ZRAM_ZERO)) - zram_free_page(zram, index); - user_mem = kmap_atomic(page); if (is_partial_io(bvec)) { @@ -439,6 +431,9 @@ static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec, u32 index, if (page_zero_filled(uncmem)) { kunmap_atomic(user_mem); + /* Free memory associated with this sector now. */ + zram_free_page(zram, index); + zram->stats.pages_zero++; zram_set_flag(meta, index, ZRAM_ZERO); ret = 0; @@ -486,6 +481,12 @@ static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec, u32 index, zs_unmap_object(meta->mem_pool, handle); + /* + * Free memory associated with this sector + * before overwriting unused sectors. + */ + zram_free_page(zram, index); + meta->table[index].handle = handle; meta->table[index].size = clen; From b237ffc2780c1c6d082930835e383143a51ea405 Mon Sep 17 00:00:00 2001 From: Kumar Gaurav Date: Thu, 8 Aug 2013 23:53:24 +0530 Subject: [PATCH 163/277] Staging: zram: zram_drv.c: Fixed Error of trailing whitespace Fixed by removing trailing whitespace Signed-off-by: Kumar Gaurav Signed-off-by: Greg Kroah-Hartman (cherry picked from commit a539c72a195c081d950475c2945cb82d80be9b66) Signed-off-by: Alex Shi --- drivers/staging/zram/zram_drv.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/staging/zram/zram_drv.c b/drivers/staging/zram/zram_drv.c index 84df3999d6af..35d536a11395 100644 --- a/drivers/staging/zram/zram_drv.c +++ b/drivers/staging/zram/zram_drv.c @@ -169,7 +169,7 @@ static inline int is_partial_io(struct bio_vec *bvec) static inline int valid_io_request(struct zram *zram, struct bio *bio) { u64 start, end, bound; - + /* unaligned request */ if (unlikely(bio->bi_sector & (ZRAM_SECTOR_PER_LOGICAL_BLOCK - 1))) return 0; From 5fc58bd448ae1b2bafd83fcb5e2c0d65fcee2c37 Mon Sep 17 00:00:00 2001 From: Minchan Kim Date: Mon, 12 Aug 2013 15:13:55 +0900 Subject: [PATCH 164/277] zram: fix invalid memory access [1] tried to fix invalid memory access on zram->disk but it didn't fix properly because get_disk failed during module exit path. Actually, we don't need to reset zram->disk's capacity to zero in module exit path so that this patch introduces new argument "reset_capacity" on zram_reset_divice and it only reset it when reset_store is called. [1] 6030ea9b, zram: avoid invalid memory access in zram_exit() Cc: Nitin Gupta Cc: Jiang Liu Cc: stable@vger.kernel.org Signed-off-by: Minchan Kim Signed-off-by: Greg Kroah-Hartman (cherry picked from commit 2b86ab9cc29fcd435cde9378c3b9ffe8b5c76128) Signed-off-by: Alex Shi --- drivers/staging/zram/zram_drv.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/drivers/staging/zram/zram_drv.c b/drivers/staging/zram/zram_drv.c index 35d536a11395..255d512763f2 100644 --- a/drivers/staging/zram/zram_drv.c +++ b/drivers/staging/zram/zram_drv.c @@ -523,7 +523,7 @@ static int zram_bvec_rw(struct zram *zram, struct bio_vec *bvec, u32 index, return ret; } -static void zram_reset_device(struct zram *zram) +static void zram_reset_device(struct zram *zram, bool reset_capacity) { size_t index; struct zram_meta *meta; @@ -552,7 +552,8 @@ static void zram_reset_device(struct zram *zram) memset(&zram->stats, 0, sizeof(zram->stats)); zram->disksize = 0; - set_capacity(zram->disk, 0); + if (reset_capacity) + set_capacity(zram->disk, 0); up_write(&zram->init_lock); } @@ -636,7 +637,7 @@ static ssize_t reset_store(struct device *dev, if (bdev) fsync_bdev(bdev); - zram_reset_device(zram); + zram_reset_device(zram, true); return len; } @@ -903,10 +904,12 @@ static void __exit zram_exit(void) for (i = 0; i < num_devices; i++) { zram = &zram_devices[i]; - get_disk(zram->disk); destroy_device(zram); - zram_reset_device(zram); - put_disk(zram->disk); + /* + * Shouldn't access zram->disk after destroy_device + * because destroy_device already released zram->disk. + */ + zram_reset_device(zram, false); } unregister_blkdev(zram_major, "zram"); From 92fe27ba3d29df5971bec2f2b55ff350194772a4 Mon Sep 17 00:00:00 2001 From: Minchan Kim Date: Mon, 12 Aug 2013 15:13:56 +0900 Subject: [PATCH 165/277] zram: don't grab mutex in zram_slot_free_noity [1] introduced down_write in zram_slot_free_notify to prevent race between zram_slot_free_notify and zram_bvec_[read|write]. The race could happen if somebody who has right permission to open swap device is reading swap device while it is used by swap in parallel. However, zram_slot_free_notify is called with holding spin_lock of swap layer so we shouldn't avoid holing mutex. Otherwise, lockdep warns it. This patch adds new list to handle free slot and workqueue so zram_slot_free_notify just registers slot index to be freed and registers the request to workqueue. If workqueue is expired, it holds mutex_lock so there is no problem any more. If any I/O is issued, zram handles pending slot-free request caused by zram_slot_free_notify right before handling issued request because workqueue wouldn't be expired yet so zram I/O request handling function can miss it. Lastly, when zram is reset, flush_work could handle all of pending free request so we shouldn't have memory leak. NOTE: If zram_slot_free_notify's kmalloc with GFP_ATOMIC would be failed, the slot will be freed when next write I/O write the slot. [1] [57ab0485, zram: use zram->lock to protect zram_free_page() in swap free notify path] * from v2 * refactoring * from v1 * totally redesign Cc: Nitin Gupta Cc: Jiang Liu Cc: stable@vger.kernel.org Signed-off-by: Minchan Kim Signed-off-by: Greg Kroah-Hartman (cherry picked from commit a0c516cbfc7452c8cbd564525fef66d9f20b46d1) Signed-off-by: Alex Shi --- drivers/staging/zram/zram_drv.c | 60 +++++++++++++++++++++++++++++++-- drivers/staging/zram/zram_drv.h | 10 ++++++ 2 files changed, 67 insertions(+), 3 deletions(-) diff --git a/drivers/staging/zram/zram_drv.c b/drivers/staging/zram/zram_drv.c index 255d512763f2..3d08ff11e700 100644 --- a/drivers/staging/zram/zram_drv.c +++ b/drivers/staging/zram/zram_drv.c @@ -440,6 +440,14 @@ static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec, u32 index, goto out; } + /* + * zram_slot_free_notify could miss free so that let's + * double check. + */ + if (unlikely(meta->table[index].handle || + zram_test_flag(meta, index, ZRAM_ZERO))) + zram_free_page(zram, index); + ret = lzo1x_1_compress(uncmem, PAGE_SIZE, src, &clen, meta->compress_workmem); @@ -505,6 +513,20 @@ static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec, u32 index, return ret; } +static void handle_pending_slot_free(struct zram *zram) +{ + struct zram_slot_free *free_rq; + + spin_lock(&zram->slot_free_lock); + while (zram->slot_free_rq) { + free_rq = zram->slot_free_rq; + zram->slot_free_rq = free_rq->next; + zram_free_page(zram, free_rq->index); + kfree(free_rq); + } + spin_unlock(&zram->slot_free_lock); +} + static int zram_bvec_rw(struct zram *zram, struct bio_vec *bvec, u32 index, int offset, struct bio *bio, int rw) { @@ -512,10 +534,12 @@ static int zram_bvec_rw(struct zram *zram, struct bio_vec *bvec, u32 index, if (rw == READ) { down_read(&zram->lock); + handle_pending_slot_free(zram); ret = zram_bvec_read(zram, bvec, index, offset, bio); up_read(&zram->lock); } else { down_write(&zram->lock); + handle_pending_slot_free(zram); ret = zram_bvec_write(zram, bvec, index, offset); up_write(&zram->lock); } @@ -528,6 +552,8 @@ static void zram_reset_device(struct zram *zram, bool reset_capacity) size_t index; struct zram_meta *meta; + flush_work(&zram->free_work); + down_write(&zram->init_lock); if (!zram->init_done) { up_write(&zram->init_lock); @@ -722,16 +748,40 @@ static void zram_make_request(struct request_queue *queue, struct bio *bio) bio_io_error(bio); } +static void zram_slot_free(struct work_struct *work) +{ + struct zram *zram; + + zram = container_of(work, struct zram, free_work); + down_write(&zram->lock); + handle_pending_slot_free(zram); + up_write(&zram->lock); +} + +static void add_slot_free(struct zram *zram, struct zram_slot_free *free_rq) +{ + spin_lock(&zram->slot_free_lock); + free_rq->next = zram->slot_free_rq; + zram->slot_free_rq = free_rq; + spin_unlock(&zram->slot_free_lock); +} + static void zram_slot_free_notify(struct block_device *bdev, unsigned long index) { struct zram *zram; + struct zram_slot_free *free_rq; zram = bdev->bd_disk->private_data; - down_write(&zram->lock); - zram_free_page(zram, index); - up_write(&zram->lock); atomic64_inc(&zram->stats.notify_free); + + free_rq = kmalloc(sizeof(struct zram_slot_free), GFP_ATOMIC); + if (!free_rq) + return; + + free_rq->index = index; + add_slot_free(zram, free_rq); + schedule_work(&zram->free_work); } static const struct block_device_operations zram_devops = { @@ -778,6 +828,10 @@ static int create_device(struct zram *zram, int device_id) init_rwsem(&zram->lock); init_rwsem(&zram->init_lock); + INIT_WORK(&zram->free_work, zram_slot_free); + spin_lock_init(&zram->slot_free_lock); + zram->slot_free_rq = NULL; + zram->queue = blk_alloc_queue(GFP_KERNEL); if (!zram->queue) { pr_err("Error allocating disk queue for device %d\n", diff --git a/drivers/staging/zram/zram_drv.h b/drivers/staging/zram/zram_drv.h index 9e57bfb29b4f..97a3acf6ab76 100644 --- a/drivers/staging/zram/zram_drv.h +++ b/drivers/staging/zram/zram_drv.h @@ -94,11 +94,20 @@ struct zram_meta { struct zs_pool *mem_pool; }; +struct zram_slot_free { + unsigned long index; + struct zram_slot_free *next; +}; + struct zram { struct zram_meta *meta; struct rw_semaphore lock; /* protect compression buffers, table, * 32bit stat counters against concurrent * notifications, reads and writes */ + + struct work_struct free_work; /* handle pending free request */ + struct zram_slot_free *slot_free_rq; /* list head of free request */ + struct request_queue *queue; struct gendisk *disk; int init_done; @@ -109,6 +118,7 @@ struct zram { * we can store in a disk. */ u64 disksize; /* bytes */ + spinlock_t slot_free_lock; struct zram_stats stats; }; From e2671233fa7df020b7ea720db33f1237c418f3ab Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Thu, 12 Sep 2013 15:41:31 -0700 Subject: [PATCH 166/277] Revert "staging: zram: Add auto loading of module if user opens /dev/zram." This reverts commit c70bda992c12e593e411c02a52e4bd6985407539. It's incorrect, Kay writes: Please just remove it. "devname" is meant to be used for single-instance devices with a static dev_t, never for things like zramX. It will not do anything useful here, it does nothing really without a statically assigned dev_t, and it should not be used for devices of this kind anyway. Reported-by: Tom Gundersen Reported-by: Kay Sievers Cc: Minchan Kim Cc: Konrad Rzeszutek Wilk Signed-off-by: Greg Kroah-Hartman (cherry picked from commit f0f65a95de2840db3fa61c953dca267e7b773168) Signed-off-by: Alex Shi --- drivers/staging/zram/zram_drv.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/staging/zram/zram_drv.c b/drivers/staging/zram/zram_drv.c index 3d08ff11e700..7d8ff31f67f2 100644 --- a/drivers/staging/zram/zram_drv.c +++ b/drivers/staging/zram/zram_drv.c @@ -981,4 +981,3 @@ MODULE_PARM_DESC(num_devices, "Number of zram devices"); MODULE_LICENSE("Dual BSD/GPL"); MODULE_AUTHOR("Nitin Gupta "); MODULE_DESCRIPTION("Compressed RAM Block Device"); -MODULE_ALIAS("devname:zram"); From 922959916229ecc5f143b0fe66a9a8bbf4b55169 Mon Sep 17 00:00:00 2001 From: Rashika Kheria Date: Wed, 30 Oct 2013 18:43:32 +0530 Subject: [PATCH 167/277] Staging: zram: Fix variable dereferenced before check This patch fixes the following Smatch warning in zram_drv.c- drivers/staging/zram/zram_drv.c:899 destroy_device() warn: variable dereferenced before check 'zram->disk' (see line 896) Acked-by: Minchan Kim Acked-by: Jerome Marchand Signed-off-by: Rashika Kheria Signed-off-by: Greg Kroah-Hartman (cherry picked from commit 59d3fe540454dd8fc48d4eda44e200f9c98bef10) Signed-off-by: Alex Shi --- drivers/staging/zram/zram_drv.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/drivers/staging/zram/zram_drv.c b/drivers/staging/zram/zram_drv.c index 7d8ff31f67f2..a125cfae6942 100644 --- a/drivers/staging/zram/zram_drv.c +++ b/drivers/staging/zram/zram_drv.c @@ -896,13 +896,10 @@ static void destroy_device(struct zram *zram) sysfs_remove_group(&disk_to_dev(zram->disk)->kobj, &zram_disk_attr_group); - if (zram->disk) { - del_gendisk(zram->disk); - put_disk(zram->disk); - } + del_gendisk(zram->disk); + put_disk(zram->disk); - if (zram->queue) - blk_cleanup_queue(zram->queue); + blk_cleanup_queue(zram->queue); } static int __init zram_init(void) From 3c073fe1e7f533e087a8e2faaff31f8b02e6aed9 Mon Sep 17 00:00:00 2001 From: Rashika Kheria Date: Wed, 30 Oct 2013 18:36:32 +0530 Subject: [PATCH 168/277] Staging: zram: Fix access of NULL pointer This patch fixes the bug in reset_store caused by accessing NULL pointer. The bdev gets its value from bdget_disk() which could fail when memory pressure is severe and hence can return NULL because allocation of inode in bdget could fail. Hence, this patch introduces a check for bdev to prevent reference to a NULL pointer in the later part of the code. It also removes unnecessary check of bdev for fsync_bdev(). Cc: stable Acked-by: Jerome Marchand Signed-off-by: Rashika Kheria Acked-by: Minchan Kim Signed-off-by: Greg Kroah-Hartman (cherry picked from commit 46a51c80216cb891f271ad021f59009f34677499) Signed-off-by: Alex Shi --- drivers/staging/zram/zram_drv.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/staging/zram/zram_drv.c b/drivers/staging/zram/zram_drv.c index a125cfae6942..206f59d9a7a8 100644 --- a/drivers/staging/zram/zram_drv.c +++ b/drivers/staging/zram/zram_drv.c @@ -648,6 +648,9 @@ static ssize_t reset_store(struct device *dev, zram = dev_to_zram(dev); bdev = bdget_disk(zram->disk, 0); + if (!bdev) + return -ENOMEM; + /* Do not reset an active device! */ if (bdev->bd_holders) return -EBUSY; @@ -660,8 +663,7 @@ static ssize_t reset_store(struct device *dev, return -EINVAL; /* Make sure all pending I/O is finished */ - if (bdev) - fsync_bdev(bdev); + fsync_bdev(bdev); zram_reset_device(zram, true); return len; From 2b299eb831324558d68be874fa1c9d65a8cfe5f3 Mon Sep 17 00:00:00 2001 From: Rashika Kheria Date: Sun, 10 Nov 2013 22:13:53 +0530 Subject: [PATCH 169/277] Staging: zram: Fix memory leak by refcount mismatch As suggested by Minchan Kim and Jerome Marchand "The code in reset_store get the block device (bdget_disk()) but it does not put it (bdput()) when it's done using it. The usage count is therefore incremented but never decremented." This patch also puts bdput() for all error cases. Acked-by: Minchan Kim Acked-by: Jerome Marchand Cc: stable@vger.kernel.org Signed-off-by: Rashika Kheria Signed-off-by: Greg Kroah-Hartman (cherry picked from commit 1b672224d128ec2570eb37572ff803cfe452b4f7) Signed-off-by: Alex Shi --- drivers/staging/zram/zram_drv.c | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/drivers/staging/zram/zram_drv.c b/drivers/staging/zram/zram_drv.c index 206f59d9a7a8..689ebf105acd 100644 --- a/drivers/staging/zram/zram_drv.c +++ b/drivers/staging/zram/zram_drv.c @@ -652,21 +652,30 @@ static ssize_t reset_store(struct device *dev, return -ENOMEM; /* Do not reset an active device! */ - if (bdev->bd_holders) - return -EBUSY; + if (bdev->bd_holders) { + ret = -EBUSY; + goto out; + } ret = kstrtou16(buf, 10, &do_reset); if (ret) - return ret; + goto out; - if (!do_reset) - return -EINVAL; + if (!do_reset) { + ret = -EINVAL; + goto out; + } /* Make sure all pending I/O is finished */ fsync_bdev(bdev); + bdput(bdev); zram_reset_device(zram, true); return len; + +out: + bdput(bdev); + return ret; } static void __zram_make_request(struct zram *zram, struct bio *bio, int rw) From 03dc2ac5b10ee9ab68090a486d54a7d53492c86d Mon Sep 17 00:00:00 2001 From: Minchan Kim Date: Thu, 30 Jan 2014 15:45:50 -0800 Subject: [PATCH 170/277] zsmalloc: move it under mm This patch moves zsmalloc under mm directory. Before that, description will explain why we have needed custom allocator. Zsmalloc is a new slab-based memory allocator for storing compressed pages. It is designed for low fragmentation and high allocation success rate on large object, but <= PAGE_SIZE allocations. zsmalloc differs from the kernel slab allocator in two primary ways to achieve these design goals. zsmalloc never requires high order page allocations to back slabs, or "size classes" in zsmalloc terms. Instead it allows multiple single-order pages to be stitched together into a "zspage" which backs the slab. This allows for higher allocation success rate under memory pressure. Also, zsmalloc allows objects to span page boundaries within the zspage. This allows for lower fragmentation than could be had with the kernel slab allocator for objects between PAGE_SIZE/2 and PAGE_SIZE. With the kernel slab allocator, if a page compresses to 60% of it original size, the memory savings gained through compression is lost in fragmentation because another object of the same size can't be stored in the leftover space. This ability to span pages results in zsmalloc allocations not being directly addressable by the user. The user is given an non-dereferencable handle in response to an allocation request. That handle must be mapped, using zs_map_object(), which returns a pointer to the mapped region that can be used. The mapping is necessary since the object data may reside in two different noncontigious pages. The zsmalloc fulfills the allocation needs for zram perfectly [sjenning@linux.vnet.ibm.com: borrow Seth's quote] Signed-off-by: Minchan Kim Acked-by: Nitin Gupta Reviewed-by: Konrad Rzeszutek Wilk Cc: Bob Liu Cc: Greg Kroah-Hartman Cc: Hugh Dickins Cc: Jens Axboe Cc: Luigi Semenzato Cc: Mel Gorman Cc: Pekka Enberg Cc: Rik van Riel Cc: Seth Jennings Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds (cherry picked from commit bcf1647d0899666f0fb90d176abf63bae22abb7c) Signed-off-by: Alex Shi Conflicts: drivers/staging/zsmalloc/Kconfig mm/Kconfig mm/Makefile Conflicts solutions: only move zsmalloc to mm/, skip unrelated cma/zbud/zswap --- drivers/staging/Kconfig | 2 -- drivers/staging/Makefile | 1 - drivers/staging/zram/zram_drv.h | 3 +-- drivers/staging/zsmalloc/Makefile | 3 --- .../zsmalloc => include/linux}/zsmalloc.h | 0 mm/Kconfig | 25 +++++++++++++++++++ mm/Makefile | 1 + .../zsmalloc/zsmalloc-main.c => mm/zsmalloc.c | 3 +-- 8 files changed, 28 insertions(+), 10 deletions(-) delete mode 100644 drivers/staging/zsmalloc/Makefile rename {drivers/staging/zsmalloc => include/linux}/zsmalloc.h (100%) rename drivers/staging/zsmalloc/zsmalloc-main.c => mm/zsmalloc.c (99%) diff --git a/drivers/staging/Kconfig b/drivers/staging/Kconfig index aefe820a8005..60585217481f 100644 --- a/drivers/staging/Kconfig +++ b/drivers/staging/Kconfig @@ -72,8 +72,6 @@ source "drivers/staging/sep/Kconfig" source "drivers/staging/iio/Kconfig" -source "drivers/staging/zsmalloc/Kconfig" - source "drivers/staging/zram/Kconfig" source "drivers/staging/wlags49_h2/Kconfig" diff --git a/drivers/staging/Makefile b/drivers/staging/Makefile index 415772ea306d..29aaeaa283eb 100644 --- a/drivers/staging/Makefile +++ b/drivers/staging/Makefile @@ -31,7 +31,6 @@ obj-$(CONFIG_VME_BUS) += vme/ obj-$(CONFIG_DX_SEP) += sep/ obj-$(CONFIG_IIO) += iio/ obj-$(CONFIG_ZRAM) += zram/ -obj-$(CONFIG_ZSMALLOC) += zsmalloc/ obj-$(CONFIG_WLAGS49_H2) += wlags49_h2/ obj-$(CONFIG_WLAGS49_H25) += wlags49_h25/ obj-$(CONFIG_FB_SM7XX) += sm7xxfb/ diff --git a/drivers/staging/zram/zram_drv.h b/drivers/staging/zram/zram_drv.h index 97a3acf6ab76..d8f6596513c3 100644 --- a/drivers/staging/zram/zram_drv.h +++ b/drivers/staging/zram/zram_drv.h @@ -17,8 +17,7 @@ #include #include - -#include "../zsmalloc/zsmalloc.h" +#include /* * Some arbitrary value. This is just to catch diff --git a/drivers/staging/zsmalloc/Makefile b/drivers/staging/zsmalloc/Makefile deleted file mode 100644 index b134848a590d..000000000000 --- a/drivers/staging/zsmalloc/Makefile +++ /dev/null @@ -1,3 +0,0 @@ -zsmalloc-y := zsmalloc-main.o - -obj-$(CONFIG_ZSMALLOC) += zsmalloc.o diff --git a/drivers/staging/zsmalloc/zsmalloc.h b/include/linux/zsmalloc.h similarity index 100% rename from drivers/staging/zsmalloc/zsmalloc.h rename to include/linux/zsmalloc.h diff --git a/mm/Kconfig b/mm/Kconfig index e742d06285b7..86919079b64c 100644 --- a/mm/Kconfig +++ b/mm/Kconfig @@ -477,3 +477,28 @@ config FRONTSWAP and swap data is stored as normal on the matching swap device. If unsure, say Y to enable frontswap. + +config ZSMALLOC + bool "Memory allocator for compressed pages" + depends on MMU + default n + help + zsmalloc is a slab-based memory allocator designed to store + compressed RAM pages. zsmalloc uses virtual memory mapping + in order to reduce fragmentation. However, this results in a + non-standard allocator interface where a handle, not a pointer, is + returned by an alloc(). This handle must be mapped in order to + access the allocated space. + +config PGTABLE_MAPPING + bool "Use page table mapping to access object in zsmalloc" + depends on ZSMALLOC + help + By default, zsmalloc uses a copy-based object mapping method to + access allocations that span two pages. However, if a particular + architecture (ex, ARM) performs VM mapping faster than copying, + then you should select this. This causes zsmalloc to use page table + mapping rather than copying for object mapping. + + You can check speed with zsmalloc benchmark[1]. + [1] https://github.com/spartacus06/zsmalloc diff --git a/mm/Makefile b/mm/Makefile index 72c5acb9345f..b5ae0b0cc26c 100644 --- a/mm/Makefile +++ b/mm/Makefile @@ -58,3 +58,4 @@ obj-$(CONFIG_DEBUG_KMEMLEAK) += kmemleak.o obj-$(CONFIG_DEBUG_KMEMLEAK_TEST) += kmemleak-test.o obj-$(CONFIG_CLEANCACHE) += cleancache.o obj-$(CONFIG_MEMORY_ISOLATION) += page_isolation.o +obj-$(CONFIG_ZSMALLOC) += zsmalloc.o diff --git a/drivers/staging/zsmalloc/zsmalloc-main.c b/mm/zsmalloc.c similarity index 99% rename from drivers/staging/zsmalloc/zsmalloc-main.c rename to mm/zsmalloc.c index 288f58252a18..6ad98bb06411 100644 --- a/drivers/staging/zsmalloc/zsmalloc-main.c +++ b/mm/zsmalloc.c @@ -78,8 +78,7 @@ #include #include #include - -#include "zsmalloc.h" +#include /* * This must be power of 2 and greater than of equal to sizeof(link_free). From 68955a0e9bb5f971229d3cabed259b31b39bda89 Mon Sep 17 00:00:00 2001 From: Minchan Kim Date: Thu, 30 Jan 2014 15:45:52 -0800 Subject: [PATCH 171/277] zram: promote zram from staging Zram has lived in staging for a LONG LONG time and have been fixed/improved by many contributors so code is clean and stable now. Of course, there are lots of product using zram in real practice. The major TV companys have used zram as swap since two years ago and recently our production team released android smart phone with zram which is used as swap, too and recently Android Kitkat start to use zram for small memory smart phone. And there was a report Google released their ChromeOS with zram, too and cyanogenmod have been used zram long time ago. And I heard some disto have used zram block device for tmpfs. In addition, I saw many report from many other peoples. For example, Lubuntu start to use it. The benefit of zram is very clear. With my experience, one of the benefit was to remove jitter of video application with backgroud memory pressure. It would be effect of efficient memory usage by compression but more issue is whether swap is there or not in the system. Recent mobile platforms have used JAVA so there are many anonymous pages. But embedded system normally are reluctant to use eMMC or SDCard as swap because there is wear-leveling and latency issues so if we do not use swap, it means we can't reclaim anoymous pages and at last, we could encounter OOM kill. :( Although we have real storage as swap, it was a problem, too. Because it sometime ends up making system very unresponsible caused by slow swap storage performance. Quote from Luigi on Google "Since Chrome OS was mentioned: the main reason why we don't use swap to a disk (rotating or SSD) is because it doesn't degrade gracefully and leads to a bad interactive experience. Generally we prefer to manage RAM at a higher level, by transparently killing and restarting processes. But we noticed that zram is fast enough to be competitive with the latter, and it lets us make more efficient use of the available RAM. " and he announced. http://www.spinics.net/lists/linux-mm/msg57717.html Other uses case is to use zram for block device. Zram is block device so anyone can format the block device and mount on it so some guys on the internet start zram as /var/tmp. http://forums.gentoo.org/viewtopic-t-838198-start-0.html Let's promote zram and enhance/maintain it instead of removing. Signed-off-by: Minchan Kim Reviewed-by: Konrad Rzeszutek Wilk Acked-by: Nitin Gupta Acked-by: Pekka Enberg Cc: Bob Liu Cc: Greg Kroah-Hartman Cc: Hugh Dickins Cc: Jens Axboe Cc: Luigi Semenzato Cc: Mel Gorman Cc: Rik van Riel Cc: Seth Jennings Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds (cherry picked from commit cd67e10ac6997c6d1e1504e3c111b693bfdbc148) Signed-off-by: Alex Shi --- {drivers/staging/zram => Documentation/blockdev}/zram.txt | 0 drivers/block/Kconfig | 2 ++ drivers/block/Makefile | 1 + drivers/{staging => block}/zram/Kconfig | 0 drivers/{staging => block}/zram/Makefile | 0 drivers/{staging => block}/zram/zram_drv.c | 0 drivers/{staging => block}/zram/zram_drv.h | 0 drivers/staging/Kconfig | 2 -- drivers/staging/Makefile | 1 - 9 files changed, 3 insertions(+), 3 deletions(-) rename {drivers/staging/zram => Documentation/blockdev}/zram.txt (100%) rename drivers/{staging => block}/zram/Kconfig (100%) rename drivers/{staging => block}/zram/Makefile (100%) rename drivers/{staging => block}/zram/zram_drv.c (100%) rename drivers/{staging => block}/zram/zram_drv.h (100%) diff --git a/drivers/staging/zram/zram.txt b/Documentation/blockdev/zram.txt similarity index 100% rename from drivers/staging/zram/zram.txt rename to Documentation/blockdev/zram.txt diff --git a/drivers/block/Kconfig b/drivers/block/Kconfig index b81ddfea1da0..9da952c9af91 100644 --- a/drivers/block/Kconfig +++ b/drivers/block/Kconfig @@ -105,6 +105,8 @@ source "drivers/block/paride/Kconfig" source "drivers/block/mtip32xx/Kconfig" +source "drivers/block/zram/Kconfig" + config BLK_CPQ_DA tristate "Compaq SMART2 support" depends on PCI && VIRT_TO_BUS diff --git a/drivers/block/Makefile b/drivers/block/Makefile index ca07399a8d99..3675937ab651 100644 --- a/drivers/block/Makefile +++ b/drivers/block/Makefile @@ -41,6 +41,7 @@ obj-$(CONFIG_BLK_DEV_RBD) += rbd.o obj-$(CONFIG_BLK_DEV_PCIESSD_MTIP32XX) += mtip32xx/ obj-$(CONFIG_BLK_DEV_RSXX) += rsxx/ +obj-$(CONFIG_ZRAM) += zram/ nvme-y := nvme-core.o nvme-scsi.o swim_mod-y := swim.o swim_asm.o diff --git a/drivers/staging/zram/Kconfig b/drivers/block/zram/Kconfig similarity index 100% rename from drivers/staging/zram/Kconfig rename to drivers/block/zram/Kconfig diff --git a/drivers/staging/zram/Makefile b/drivers/block/zram/Makefile similarity index 100% rename from drivers/staging/zram/Makefile rename to drivers/block/zram/Makefile diff --git a/drivers/staging/zram/zram_drv.c b/drivers/block/zram/zram_drv.c similarity index 100% rename from drivers/staging/zram/zram_drv.c rename to drivers/block/zram/zram_drv.c diff --git a/drivers/staging/zram/zram_drv.h b/drivers/block/zram/zram_drv.h similarity index 100% rename from drivers/staging/zram/zram_drv.h rename to drivers/block/zram/zram_drv.h diff --git a/drivers/staging/Kconfig b/drivers/staging/Kconfig index 60585217481f..25c8bffdd248 100644 --- a/drivers/staging/Kconfig +++ b/drivers/staging/Kconfig @@ -72,8 +72,6 @@ source "drivers/staging/sep/Kconfig" source "drivers/staging/iio/Kconfig" -source "drivers/staging/zram/Kconfig" - source "drivers/staging/wlags49_h2/Kconfig" source "drivers/staging/wlags49_h25/Kconfig" diff --git a/drivers/staging/Makefile b/drivers/staging/Makefile index 29aaeaa283eb..f9d86a4b48e9 100644 --- a/drivers/staging/Makefile +++ b/drivers/staging/Makefile @@ -30,7 +30,6 @@ obj-$(CONFIG_VT6656) += vt6656/ obj-$(CONFIG_VME_BUS) += vme/ obj-$(CONFIG_DX_SEP) += sep/ obj-$(CONFIG_IIO) += iio/ -obj-$(CONFIG_ZRAM) += zram/ obj-$(CONFIG_WLAGS49_H2) += wlags49_h2/ obj-$(CONFIG_WLAGS49_H25) += wlags49_h25/ obj-$(CONFIG_FB_SM7XX) += sm7xxfb/ From 15db1d2f8df0721faf81c4e23119a716e537b6cc Mon Sep 17 00:00:00 2001 From: Minchan Kim Date: Thu, 30 Jan 2014 15:45:54 -0800 Subject: [PATCH 172/277] zram: remove old private project comment Remove the old private compcache project address so upcoming patches should be sent to LKML because we Linux kernel community will take care. Signed-off-by: Minchan Kim Cc: Nitin Gupta Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds (cherry picked from commit 49061236a9c2e18b31617cef10d27ba136068bac) Signed-off-by: Alex Shi --- Documentation/blockdev/zram.txt | 6 ------ drivers/block/zram/Kconfig | 1 - drivers/block/zram/zram_drv.c | 1 - drivers/block/zram/zram_drv.h | 1 - 4 files changed, 9 deletions(-) diff --git a/Documentation/blockdev/zram.txt b/Documentation/blockdev/zram.txt index 765d790ae831..2eccddffa6c8 100644 --- a/Documentation/blockdev/zram.txt +++ b/Documentation/blockdev/zram.txt @@ -1,8 +1,6 @@ zram: Compressed RAM based block devices ---------------------------------------- -Project home: http://compcache.googlecode.com/ - * Introduction The zram module creates RAM based block devices named /dev/zram @@ -69,9 +67,5 @@ Following shows a typical sequence of steps for using zram. resets the disksize to zero. You must set the disksize again before reusing the device. -Please report any problems at: - - Mailing list: linux-mm-cc at laptop dot org - - Issue tracker: http://code.google.com/p/compcache/issues/list - Nitin Gupta ngupta@vflare.org diff --git a/drivers/block/zram/Kconfig b/drivers/block/zram/Kconfig index 983314c41349..3450be850399 100644 --- a/drivers/block/zram/Kconfig +++ b/drivers/block/zram/Kconfig @@ -14,7 +14,6 @@ config ZRAM disks and maybe many more. See zram.txt for more information. - Project home: config ZRAM_DEBUG bool "Compressed RAM block device debug support" diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index 689ebf105acd..4c492eade671 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -9,7 +9,6 @@ * Released under the terms of 3-clause BSD License * Released under the terms of GNU General Public License Version 2.0 * - * Project home: http://compcache.googlecode.com */ #define KMSG_COMPONENT "zram" diff --git a/drivers/block/zram/zram_drv.h b/drivers/block/zram/zram_drv.h index d8f6596513c3..92f70e8f457c 100644 --- a/drivers/block/zram/zram_drv.h +++ b/drivers/block/zram/zram_drv.h @@ -9,7 +9,6 @@ * Released under the terms of 3-clause BSD License * Released under the terms of GNU General Public License Version 2.0 * - * Project home: http://compcache.googlecode.com */ #ifndef _ZRAM_DRV_H_ From 851a07391ee1cea24cc2a2b099e12f5ce55edca9 Mon Sep 17 00:00:00 2001 From: Minchan Kim Date: Thu, 30 Jan 2014 15:45:55 -0800 Subject: [PATCH 173/277] zram: add copyright Add my copyright to the zram source code which I maintain. Signed-off-by: Minchan Kim Cc: Nitin Gupta Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds (cherry picked from commit 7bfb3de8a1b3bebc2dc68d381efe27448c0584c5) Signed-off-by: Alex Shi --- drivers/block/zram/zram_drv.c | 1 + drivers/block/zram/zram_drv.h | 1 + 2 files changed, 2 insertions(+) diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index 4c492eade671..d66b404fc535 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -2,6 +2,7 @@ * Compressed RAM block device * * Copyright (C) 2008, 2009, 2010 Nitin Gupta + * 2012, 2013 Minchan Kim * * This code is released using a dual license strategy: BSD/GPL * You can choose the licence that better fits your requirements. diff --git a/drivers/block/zram/zram_drv.h b/drivers/block/zram/zram_drv.h index 92f70e8f457c..0e46953c08e9 100644 --- a/drivers/block/zram/zram_drv.h +++ b/drivers/block/zram/zram_drv.h @@ -2,6 +2,7 @@ * Compressed RAM block device * * Copyright (C) 2008, 2009, 2010 Nitin Gupta + * 2012, 2013 Minchan Kim * * This code is released using a dual license strategy: BSD/GPL * You can choose the licence that better fits your requirements. From 00cfab35e838986cf72222f981444dab704db687 Mon Sep 17 00:00:00 2001 From: Minchan Kim Date: Thu, 30 Jan 2014 15:45:55 -0800 Subject: [PATCH 174/277] zsmalloc: add copyright Add my copyright to the zsmalloc source code which I maintain. Signed-off-by: Minchan Kim Cc: Nitin Gupta Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds (cherry picked from commit 31fc00bb788ffde7d8d861d8b2bba798ab445992) Signed-off-by: Alex Shi --- include/linux/zsmalloc.h | 1 + mm/zsmalloc.c | 1 + 2 files changed, 2 insertions(+) diff --git a/include/linux/zsmalloc.h b/include/linux/zsmalloc.h index 46dbd0558d86..d3f48686bceb 100644 --- a/include/linux/zsmalloc.h +++ b/include/linux/zsmalloc.h @@ -2,6 +2,7 @@ * zsmalloc memory allocator * * Copyright (C) 2011 Nitin Gupta + * Copyright (C) 2012, 2013 Minchan Kim * * This code is released using a dual license strategy: BSD/GPL * You can choose the license that better fits your requirements. diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c index 6ad98bb06411..7a8d161b4cd7 100644 --- a/mm/zsmalloc.c +++ b/mm/zsmalloc.c @@ -2,6 +2,7 @@ * zsmalloc memory allocator * * Copyright (C) 2011 Nitin Gupta + * Copyright (C) 2012, 2013 Minchan Kim * * This code is released using a dual license strategy: BSD/GPL * You can choose the license that better fits your requirements. From fbd4d659587f3be893860c57e7b338fe3f45284d Mon Sep 17 00:00:00 2001 From: Minchan Kim Date: Thu, 30 Jan 2014 15:45:58 -0800 Subject: [PATCH 175/277] zram: fix race between reset and flushing pending work Dan and Sergey reported that there is a racy between reset and flushing of pending work so that it could make oops by freeing zram->meta in reset while zram_slot_free can access zram->meta if new request is adding during the race window. This patch moves flush after taking init_lock so it prevents new request so that it closes the race. Signed-off-by: Minchan Kim Reported-by: Dan Carpenter Cc: Nitin Gupta Cc: Jerome Marchand Tested-by: Sergey Senozhatsky Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds (cherry picked from commit da4a04126baa3be03bc566d4a2ee0944c5e783d0) Signed-off-by: Alex Shi --- drivers/block/zram/zram_drv.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index d66b404fc535..1a377872729d 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -552,14 +552,14 @@ static void zram_reset_device(struct zram *zram, bool reset_capacity) size_t index; struct zram_meta *meta; - flush_work(&zram->free_work); - down_write(&zram->init_lock); if (!zram->init_done) { up_write(&zram->init_lock); return; } + flush_work(&zram->free_work); + meta = zram->meta; zram->init_done = 0; From ee76411779c7fb286de437bd15367ff39ae88e6a Mon Sep 17 00:00:00 2001 From: Minchan Kim Date: Thu, 30 Jan 2014 15:46:00 -0800 Subject: [PATCH 176/277] zram: delay pending free request in read path Sergey reported we don't need to handle pending free request every I/O so that this patch removes it in read path while we remain it in write path. Let's consider below example. Swap subsystem ask to zram "A" block free by swap_slot_free_notify but zram had been pended it without real freeing. Swap subsystem allocates "A" block for new data but request pended for a long time just handled and zram blindly free new data on the "A" block. :( That's why we couldn't remove handle pending free request right before zram-write. Signed-off-by: Minchan Kim Reported-by: Sergey Senozhatsky Tested-by: Sergey Senozhatsky Cc: Nitin Gupta Cc: Jerome Marchand Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds (cherry picked from commit 9b353db16d18f87242337e3e61a948c023505a65) Signed-off-by: Alex Shi --- drivers/block/zram/zram_drv.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index 1a377872729d..8b88a8d064af 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -534,7 +534,6 @@ static int zram_bvec_rw(struct zram *zram, struct bio_vec *bvec, u32 index, if (rw == READ) { down_read(&zram->lock); - handle_pending_slot_free(zram); ret = zram_bvec_read(zram, bvec, index, offset, bio); up_read(&zram->lock); } else { From 6eb4a8a4531c2411703a9c43428ad9de7e762a2f Mon Sep 17 00:00:00 2001 From: Minchan Kim Date: Thu, 30 Jan 2014 15:46:01 -0800 Subject: [PATCH 177/277] zram: remove unnecessary free Commit a0c516cbfc74 ("zram: don't grab mutex in zram_slot_free_noity") introduced pending zram slot free in zram's write path in case of missing slot free by memory allocation failure in zram_slot_free_notify but it is not necessary because we have already freed the slot right before overwriting. Signed-off-by: Minchan Kim Cc: Nitin Gupta Cc: Jerome Marchand Tested-by: Sergey Senozhatsky Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds (cherry picked from commit 874e3cddc33f0c0f9cc08ad2b73fa0cbe7dfaa63) Signed-off-by: Alex Shi --- drivers/block/zram/zram_drv.c | 8 -------- 1 file changed, 8 deletions(-) diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index 8b88a8d064af..a9c236de4676 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -440,14 +440,6 @@ static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec, u32 index, goto out; } - /* - * zram_slot_free_notify could miss free so that let's - * double check. - */ - if (unlikely(meta->table[index].handle || - zram_test_flag(meta, index, ZRAM_ZERO))) - zram_free_page(zram, index); - ret = lzo1x_1_compress(uncmem, PAGE_SIZE, src, &clen, meta->compress_workmem); From 671b5561a6c05a36da64a2b798b1b52dada37c2c Mon Sep 17 00:00:00 2001 From: Minchan Kim Date: Thu, 30 Jan 2014 15:46:02 -0800 Subject: [PATCH 178/277] zram: use atomic operation for stat Some of fields in zram->stats are protected by zram->lock which is rather coarse-grained so let's use atomic operation without explict locking. This patch is ready for removing dependency of zram->lock in read path which is very coarse-grained rw_semaphore. Of course, this patch adds new atomic operation so it might make slow but my 12CPU test couldn't spot any regression. All gain/lose is marginal within stddev. iozone -t -T -l 12 -u 12 -r 16K -s 60M -I +Z -V 0 ==Initial write ==Initial write records: 50 records: 50 avg: 412875.17 avg: 415638.23 std: 38543.12 (9.34%) std: 36601.11 (8.81%) max: 521262.03 max: 502976.72 min: 343263.13 min: 351389.12 ==Rewrite ==Rewrite records: 50 records: 50 avg: 416640.34 avg: 397914.33 std: 60798.92 (14.59%) std: 46150.42 (11.60%) max: 543057.07 max: 522669.17 min: 304071.67 min: 316588.77 ==Read ==Read records: 50 records: 50 avg: 4147338.63 avg: 4070736.51 std: 179333.25 (4.32%) std: 223499.89 (5.49%) max: 4459295.28 max: 4539514.44 min: 3753057.53 min: 3444686.31 ==Re-read ==Re-read records: 50 records: 50 avg: 4096706.71 avg: 4117218.57 std: 229735.04 (5.61%) std: 171676.25 (4.17%) max: 4430012.09 max: 4459263.94 min: 2987217.80 min: 3666904.28 ==Reverse Read ==Reverse Read records: 50 records: 50 avg: 4062763.83 avg: 4078508.32 std: 186208.46 (4.58%) std: 172684.34 (4.23%) max: 4401358.78 max: 4424757.22 min: 3381625.00 min: 3679359.94 ==Stride read ==Stride read records: 50 records: 50 avg: 4094933.49 avg: 4082170.22 std: 185710.52 (4.54%) std: 196346.68 (4.81%) max: 4478241.25 max: 4460060.97 min: 3732593.23 min: 3584125.78 ==Random read ==Random read records: 50 records: 50 avg: 4031070.04 avg: 4074847.49 std: 192065.51 (4.76%) std: 206911.33 (5.08%) max: 4356931.16 max: 4399442.56 min: 3481619.62 min: 3548372.44 ==Mixed workload ==Mixed workload records: 50 records: 50 avg: 149925.73 avg: 149675.54 std: 7701.26 (5.14%) std: 6902.09 (4.61%) max: 191301.56 max: 175162.05 min: 133566.28 min: 137762.87 ==Random write ==Random write records: 50 records: 50 avg: 404050.11 avg: 393021.47 std: 58887.57 (14.57%) std: 42813.70 (10.89%) max: 601798.09 max: 524533.43 min: 325176.99 min: 313255.34 ==Pwrite ==Pwrite records: 50 records: 50 avg: 411217.70 avg: 411237.96 std: 43114.99 (10.48%) std: 33136.29 (8.06%) max: 530766.79 max: 471899.76 min: 320786.84 min: 317906.94 ==Pread ==Pread records: 50 records: 50 avg: 4154908.65 avg: 4087121.92 std: 151272.08 (3.64%) std: 219505.04 (5.37%) max: 4459478.12 max: 4435857.38 min: 3730512.41 min: 3101101.67 Signed-off-by: Minchan Kim Cc: Nitin Gupta Tested-by: Sergey Senozhatsky Cc: Jerome Marchand Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds (cherry picked from commit deb0bdeb2f3d6b81d37fc778316dae46b6daab56) Signed-off-by: Alex Shi --- drivers/block/zram/zram_drv.c | 20 ++++++++++---------- drivers/block/zram/zram_drv.h | 16 ++++++---------- 2 files changed, 16 insertions(+), 20 deletions(-) diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index a9c236de4676..0b53d1db7eaf 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -104,7 +104,7 @@ static ssize_t zero_pages_show(struct device *dev, { struct zram *zram = dev_to_zram(dev); - return sprintf(buf, "%u\n", zram->stats.pages_zero); + return sprintf(buf, "%u\n", atomic_read(&zram->stats.pages_zero)); } static ssize_t orig_data_size_show(struct device *dev, @@ -113,7 +113,7 @@ static ssize_t orig_data_size_show(struct device *dev, struct zram *zram = dev_to_zram(dev); return sprintf(buf, "%llu\n", - (u64)(zram->stats.pages_stored) << PAGE_SHIFT); + (u64)(atomic_read(&zram->stats.pages_stored)) << PAGE_SHIFT); } static ssize_t compr_data_size_show(struct device *dev, @@ -292,21 +292,21 @@ static void zram_free_page(struct zram *zram, size_t index) */ if (zram_test_flag(meta, index, ZRAM_ZERO)) { zram_clear_flag(meta, index, ZRAM_ZERO); - zram->stats.pages_zero--; + atomic_dec(&zram->stats.pages_zero); } return; } if (unlikely(size > max_zpage_size)) - zram->stats.bad_compress--; + atomic_dec(&zram->stats.bad_compress); zs_free(meta->mem_pool, handle); if (size <= PAGE_SIZE / 2) - zram->stats.good_compress--; + atomic_dec(&zram->stats.good_compress); atomic64_sub(meta->table[index].size, &zram->stats.compr_size); - zram->stats.pages_stored--; + atomic_dec(&zram->stats.pages_stored); meta->table[index].handle = 0; meta->table[index].size = 0; @@ -434,7 +434,7 @@ static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec, u32 index, /* Free memory associated with this sector now. */ zram_free_page(zram, index); - zram->stats.pages_zero++; + atomic_inc(&zram->stats.pages_zero); zram_set_flag(meta, index, ZRAM_ZERO); ret = 0; goto out; @@ -455,7 +455,7 @@ static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec, u32 index, } if (unlikely(clen > max_zpage_size)) { - zram->stats.bad_compress++; + atomic_inc(&zram->stats.bad_compress); clen = PAGE_SIZE; src = NULL; if (is_partial_io(bvec)) @@ -492,9 +492,9 @@ static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec, u32 index, /* Update stats */ atomic64_add(clen, &zram->stats.compr_size); - zram->stats.pages_stored++; + atomic_inc(&zram->stats.pages_stored); if (clen <= PAGE_SIZE / 2) - zram->stats.good_compress++; + atomic_inc(&zram->stats.good_compress); out: if (is_partial_io(bvec)) diff --git a/drivers/block/zram/zram_drv.h b/drivers/block/zram/zram_drv.h index 0e46953c08e9..81b0170de369 100644 --- a/drivers/block/zram/zram_drv.h +++ b/drivers/block/zram/zram_drv.h @@ -68,10 +68,6 @@ struct table { u8 flags; } __aligned(4); -/* - * All 64bit fields should only be manipulated by 64bit atomic accessors. - * All modifications to 32bit counter should be protected by zram->lock. - */ struct zram_stats { atomic64_t compr_size; /* compressed size of pages stored */ atomic64_t num_reads; /* failed + successful */ @@ -80,10 +76,10 @@ struct zram_stats { atomic64_t failed_writes; /* can happen when memory is too low */ atomic64_t invalid_io; /* non-page-aligned I/O requests */ atomic64_t notify_free; /* no. of swap slot free notifications */ - u32 pages_zero; /* no. of zero filled pages */ - u32 pages_stored; /* no. of pages currently stored */ - u32 good_compress; /* % of pages with compression ratio<=50% */ - u32 bad_compress; /* % of pages with compression ratio>=75% */ + atomic_t pages_zero; /* no. of zero filled pages */ + atomic_t pages_stored; /* no. of pages currently stored */ + atomic_t good_compress; /* % of pages with compression ratio<=50% */ + atomic_t bad_compress; /* % of pages with compression ratio>=75% */ }; struct zram_meta { @@ -101,8 +97,8 @@ struct zram_slot_free { struct zram { struct zram_meta *meta; struct rw_semaphore lock; /* protect compression buffers, table, - * 32bit stat counters against concurrent - * notifications, reads and writes */ + * reads and writes + */ struct work_struct free_work; /* handle pending free request */ struct zram_slot_free *slot_free_rq; /* list head of free request */ From 76b3c1eb150766da298e3c68074dbd1401f22a7c Mon Sep 17 00:00:00 2001 From: Minchan Kim Date: Thu, 30 Jan 2014 15:46:03 -0800 Subject: [PATCH 179/277] zram: introduce zram->tb_lock Currently, the zram table is protected by zram->lock but it's rather coarse-grained lock and it makes hard for scalibility. Let's use own rwlock instead of depending on zram->lock. This patch adds new locking so obviously, it would make slow but this patch is just prepartion for removing coarse-grained rw_semaphore(ie, zram->lock) which is hurdle about zram scalability. Final patch in this patchset series will remove the lock from read-path and change rw_semaphore with mutex in write path. With bonus, we could drop pending slot free mess in next patch. Signed-off-by: Minchan Kim Cc: Nitin Gupta Tested-by: Sergey Senozhatsky Cc: Jerome Marchand Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds (cherry picked from commit 92967471b67163bb1654e9b7fe99449ab70a4aaa) Signed-off-by: Alex Shi --- drivers/block/zram/zram_drv.c | 26 +++++++++++++++++++++----- drivers/block/zram/zram_drv.h | 3 ++- 2 files changed, 23 insertions(+), 6 deletions(-) diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index 0b53d1db7eaf..42358b61e360 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -140,6 +140,7 @@ static ssize_t mem_used_total_show(struct device *dev, return sprintf(buf, "%llu\n", val); } +/* flag operations needs meta->tb_lock */ static int zram_test_flag(struct zram_meta *meta, u32 index, enum zram_pageflags flag) { @@ -227,6 +228,7 @@ static struct zram_meta *zram_meta_alloc(u64 disksize) goto free_table; } + rwlock_init(&meta->tb_lock); return meta; free_table: @@ -279,6 +281,7 @@ static void handle_zero_page(struct bio_vec *bvec) flush_dcache_page(page); } +/* NOTE: caller should hold meta->tb_lock with write-side */ static void zram_free_page(struct zram *zram, size_t index) { struct zram_meta *meta = zram->meta; @@ -318,20 +321,26 @@ static int zram_decompress_page(struct zram *zram, char *mem, u32 index) size_t clen = PAGE_SIZE; unsigned char *cmem; struct zram_meta *meta = zram->meta; - unsigned long handle = meta->table[index].handle; + unsigned long handle; + u16 size; + + read_lock(&meta->tb_lock); + handle = meta->table[index].handle; + size = meta->table[index].size; if (!handle || zram_test_flag(meta, index, ZRAM_ZERO)) { + read_unlock(&meta->tb_lock); clear_page(mem); return 0; } cmem = zs_map_object(meta->mem_pool, handle, ZS_MM_RO); - if (meta->table[index].size == PAGE_SIZE) + if (size == PAGE_SIZE) copy_page(mem, cmem); else - ret = lzo1x_decompress_safe(cmem, meta->table[index].size, - mem, &clen); + ret = lzo1x_decompress_safe(cmem, size, mem, &clen); zs_unmap_object(meta->mem_pool, handle); + read_unlock(&meta->tb_lock); /* Should NEVER happen. Return bio error if it does. */ if (unlikely(ret != LZO_E_OK)) { @@ -352,11 +361,14 @@ static int zram_bvec_read(struct zram *zram, struct bio_vec *bvec, struct zram_meta *meta = zram->meta; page = bvec->bv_page; + read_lock(&meta->tb_lock); if (unlikely(!meta->table[index].handle) || zram_test_flag(meta, index, ZRAM_ZERO)) { + read_unlock(&meta->tb_lock); handle_zero_page(bvec); return 0; } + read_unlock(&meta->tb_lock); if (is_partial_io(bvec)) /* Use a temporary buffer to decompress the page */ @@ -432,10 +444,12 @@ static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec, u32 index, if (page_zero_filled(uncmem)) { kunmap_atomic(user_mem); /* Free memory associated with this sector now. */ + write_lock(&zram->meta->tb_lock); zram_free_page(zram, index); + zram_set_flag(meta, index, ZRAM_ZERO); + write_unlock(&zram->meta->tb_lock); atomic_inc(&zram->stats.pages_zero); - zram_set_flag(meta, index, ZRAM_ZERO); ret = 0; goto out; } @@ -485,10 +499,12 @@ static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec, u32 index, * Free memory associated with this sector * before overwriting unused sectors. */ + write_lock(&zram->meta->tb_lock); zram_free_page(zram, index); meta->table[index].handle = handle; meta->table[index].size = clen; + write_unlock(&zram->meta->tb_lock); /* Update stats */ atomic64_add(clen, &zram->stats.compr_size); diff --git a/drivers/block/zram/zram_drv.h b/drivers/block/zram/zram_drv.h index 81b0170de369..c3f453f04974 100644 --- a/drivers/block/zram/zram_drv.h +++ b/drivers/block/zram/zram_drv.h @@ -83,6 +83,7 @@ struct zram_stats { }; struct zram_meta { + rwlock_t tb_lock; /* protect table */ void *compress_workmem; void *compress_buffer; struct table *table; @@ -96,7 +97,7 @@ struct zram_slot_free { struct zram { struct zram_meta *meta; - struct rw_semaphore lock; /* protect compression buffers, table, + struct rw_semaphore lock; /* protect compression buffers, * reads and writes */ From fa5b73b76279cd7e88743b4b8ec2fa61478d6776 Mon Sep 17 00:00:00 2001 From: Minchan Kim Date: Thu, 30 Jan 2014 15:46:04 -0800 Subject: [PATCH 180/277] zram: remove workqueue for freeing removed pending slot Commit a0c516cbfc74 ("zram: don't grab mutex in zram_slot_free_noity") introduced free request pending code to avoid scheduling by mutex under spinlock and it was a mess which made code lenghty and increased overhead. Now, we don't need zram->lock any more to free slot so this patch reverts it and then, tb_lock should protect it. Signed-off-by: Minchan Kim Cc: Nitin Gupta Tested-by: Sergey Senozhatsky Cc: Jerome Marchand Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds (cherry picked from commit f614a9f48dedd2b80d1dc8bae8094842fcdb39dd) Signed-off-by: Alex Shi --- drivers/block/zram/zram_drv.c | 54 ++++------------------------------- drivers/block/zram/zram_drv.h | 10 ------- 2 files changed, 6 insertions(+), 58 deletions(-) diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index 42358b61e360..3e797b844377 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -521,20 +521,6 @@ static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec, u32 index, return ret; } -static void handle_pending_slot_free(struct zram *zram) -{ - struct zram_slot_free *free_rq; - - spin_lock(&zram->slot_free_lock); - while (zram->slot_free_rq) { - free_rq = zram->slot_free_rq; - zram->slot_free_rq = free_rq->next; - zram_free_page(zram, free_rq->index); - kfree(free_rq); - } - spin_unlock(&zram->slot_free_lock); -} - static int zram_bvec_rw(struct zram *zram, struct bio_vec *bvec, u32 index, int offset, struct bio *bio, int rw) { @@ -546,7 +532,6 @@ static int zram_bvec_rw(struct zram *zram, struct bio_vec *bvec, u32 index, up_read(&zram->lock); } else { down_write(&zram->lock); - handle_pending_slot_free(zram); ret = zram_bvec_write(zram, bvec, index, offset); up_write(&zram->lock); } @@ -565,8 +550,6 @@ static void zram_reset_device(struct zram *zram, bool reset_capacity) return; } - flush_work(&zram->free_work); - meta = zram->meta; zram->init_done = 0; @@ -766,40 +749,19 @@ static void zram_make_request(struct request_queue *queue, struct bio *bio) bio_io_error(bio); } -static void zram_slot_free(struct work_struct *work) -{ - struct zram *zram; - - zram = container_of(work, struct zram, free_work); - down_write(&zram->lock); - handle_pending_slot_free(zram); - up_write(&zram->lock); -} - -static void add_slot_free(struct zram *zram, struct zram_slot_free *free_rq) -{ - spin_lock(&zram->slot_free_lock); - free_rq->next = zram->slot_free_rq; - zram->slot_free_rq = free_rq; - spin_unlock(&zram->slot_free_lock); -} - static void zram_slot_free_notify(struct block_device *bdev, unsigned long index) { struct zram *zram; - struct zram_slot_free *free_rq; + struct zram_meta *meta; zram = bdev->bd_disk->private_data; + meta = zram->meta; + + write_lock(&meta->tb_lock); + zram_free_page(zram, index); + write_unlock(&meta->tb_lock); atomic64_inc(&zram->stats.notify_free); - - free_rq = kmalloc(sizeof(struct zram_slot_free), GFP_ATOMIC); - if (!free_rq) - return; - - free_rq->index = index; - add_slot_free(zram, free_rq); - schedule_work(&zram->free_work); } static const struct block_device_operations zram_devops = { @@ -846,10 +808,6 @@ static int create_device(struct zram *zram, int device_id) init_rwsem(&zram->lock); init_rwsem(&zram->init_lock); - INIT_WORK(&zram->free_work, zram_slot_free); - spin_lock_init(&zram->slot_free_lock); - zram->slot_free_rq = NULL; - zram->queue = blk_alloc_queue(GFP_KERNEL); if (!zram->queue) { pr_err("Error allocating disk queue for device %d\n", diff --git a/drivers/block/zram/zram_drv.h b/drivers/block/zram/zram_drv.h index c3f453f04974..d876300da6c9 100644 --- a/drivers/block/zram/zram_drv.h +++ b/drivers/block/zram/zram_drv.h @@ -90,20 +90,11 @@ struct zram_meta { struct zs_pool *mem_pool; }; -struct zram_slot_free { - unsigned long index; - struct zram_slot_free *next; -}; - struct zram { struct zram_meta *meta; struct rw_semaphore lock; /* protect compression buffers, * reads and writes */ - - struct work_struct free_work; /* handle pending free request */ - struct zram_slot_free *slot_free_rq; /* list head of free request */ - struct request_queue *queue; struct gendisk *disk; int init_done; @@ -114,7 +105,6 @@ struct zram { * we can store in a disk. */ u64 disksize; /* bytes */ - spinlock_t slot_free_lock; struct zram_stats stats; }; From 3853d83925871721c6ee7b2bc40126183129a3ac Mon Sep 17 00:00:00 2001 From: Minchan Kim Date: Thu, 30 Jan 2014 15:46:06 -0800 Subject: [PATCH 181/277] zram: remove zram->lock in read path and change it with mutex Finally, we separated zram->lock dependency from 32bit stat/ table handling so there is no reason to use rw_semaphore between read and write path so this patch removes the lock from read path totally and changes rw_semaphore with mutex. So, we could do old: read-read: OK read-write: NO write-write: NO Now: read-read: OK read-write: OK write-write: NO The below data proves mixed workload performs well 11 times and there is also enhance on write-write path because current rw-semaphore doesn't support SPIN_ON_OWNER. It's side effect but anyway good thing for us. Write-related tests perform better (from 61% to 1058%) but read path has good/bad(from -2.22% to 1.45%) but they are all marginal within stddev. CPU 12 iozone -t -T -l 12 -u 12 -r 16K -s 60M -I +Z -V 0 ==Initial write ==Initial write records: 10 records: 10 avg: 516189.16 avg: 839907.96 std: 22486.53 (4.36%) std: 47902.17 (5.70%) max: 546970.60 max: 909910.35 min: 481131.54 min: 751148.38 ==Rewrite ==Rewrite records: 10 records: 10 avg: 509527.98 avg: 1050156.37 std: 45799.94 (8.99%) std: 40695.44 (3.88%) max: 611574.27 max: 1111929.26 min: 443679.95 min: 980409.62 ==Read ==Read records: 10 records: 10 avg: 4408624.17 avg: 4472546.76 std: 281152.61 (6.38%) std: 163662.78 (3.66%) max: 4867888.66 max: 4727351.03 min: 4058347.69 min: 4126520.88 ==Re-read ==Re-read records: 10 records: 10 avg: 4462147.53 avg: 4363257.75 std: 283546.11 (6.35%) std: 247292.63 (5.67%) max: 4912894.44 max: 4677241.75 min: 4131386.50 min: 4035235.84 ==Reverse Read ==Reverse Read records: 10 records: 10 avg: 4565865.97 avg: 4485818.08 std: 313395.63 (6.86%) std: 248470.10 (5.54%) max: 5232749.16 max: 4789749.94 min: 4185809.62 min: 3963081.34 ==Stride read ==Stride read records: 10 records: 10 avg: 4515981.80 avg: 4418806.01 std: 211192.32 (4.68%) std: 212837.97 (4.82%) max: 4889287.28 max: 4686967.22 min: 4210362.00 min: 4083041.84 ==Random read ==Random read records: 10 records: 10 avg: 4410525.23 avg: 4387093.18 std: 236693.22 (5.37%) std: 235285.23 (5.36%) max: 4713698.47 max: 4669760.62 min: 4057163.62 min: 3952002.16 ==Mixed workload ==Mixed workload records: 10 records: 10 avg: 243234.25 avg: 2818677.27 std: 28505.07 (11.72%) std: 195569.70 (6.94%) max: 288905.23 max: 3126478.11 min: 212473.16 min: 2484150.69 ==Random write ==Random write records: 10 records: 10 avg: 555887.07 avg: 1053057.79 std: 70841.98 (12.74%) std: 35195.36 (3.34%) max: 683188.28 max: 1096125.73 min: 437299.57 min: 992481.93 ==Pwrite ==Pwrite records: 10 records: 10 avg: 501745.93 avg: 810363.09 std: 16373.54 (3.26%) std: 19245.01 (2.37%) max: 518724.52 max: 833359.70 min: 464208.73 min: 765501.87 ==Pread ==Pread records: 10 records: 10 avg: 4539894.60 avg: 4457680.58 std: 197094.66 (4.34%) std: 188965.60 (4.24%) max: 4877170.38 max: 4689905.53 min: 4226326.03 min: 4095739.72 Signed-off-by: Minchan Kim Cc: Nitin Gupta Tested-by: Sergey Senozhatsky Cc: Jerome Marchand Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds (cherry picked from commit e46e33152eb82b8e2db7ffb3790a2a2653c34513) Signed-off-by: Alex Shi --- drivers/block/zram/zram_drv.c | 17 ++++++++--------- drivers/block/zram/zram_drv.h | 4 +--- 2 files changed, 9 insertions(+), 12 deletions(-) diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index 3e797b844377..c450dd9390ab 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -229,6 +229,7 @@ static struct zram_meta *zram_meta_alloc(u64 disksize) } rwlock_init(&meta->tb_lock); + mutex_init(&meta->buffer_lock); return meta; free_table: @@ -411,6 +412,7 @@ static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec, u32 index, struct page *page; unsigned char *user_mem, *cmem, *src, *uncmem = NULL; struct zram_meta *meta = zram->meta; + bool locked = false; page = bvec->bv_page; src = meta->compress_buffer; @@ -430,6 +432,8 @@ static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec, u32 index, goto out; } + mutex_lock(&meta->buffer_lock); + locked = true; user_mem = kmap_atomic(page); if (is_partial_io(bvec)) { @@ -456,7 +460,6 @@ static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec, u32 index, ret = lzo1x_1_compress(uncmem, PAGE_SIZE, src, &clen, meta->compress_workmem); - if (!is_partial_io(bvec)) { kunmap_atomic(user_mem); user_mem = NULL; @@ -513,6 +516,8 @@ static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec, u32 index, atomic_inc(&zram->stats.good_compress); out: + if (locked) + mutex_unlock(&meta->buffer_lock); if (is_partial_io(bvec)) kfree(uncmem); @@ -526,15 +531,10 @@ static int zram_bvec_rw(struct zram *zram, struct bio_vec *bvec, u32 index, { int ret; - if (rw == READ) { - down_read(&zram->lock); + if (rw == READ) ret = zram_bvec_read(zram, bvec, index, offset, bio); - up_read(&zram->lock); - } else { - down_write(&zram->lock); + else ret = zram_bvec_write(zram, bvec, index, offset); - up_write(&zram->lock); - } return ret; } @@ -805,7 +805,6 @@ static int create_device(struct zram *zram, int device_id) { int ret = -ENOMEM; - init_rwsem(&zram->lock); init_rwsem(&zram->init_lock); zram->queue = blk_alloc_queue(GFP_KERNEL); diff --git a/drivers/block/zram/zram_drv.h b/drivers/block/zram/zram_drv.h index d876300da6c9..ad8aa35bae00 100644 --- a/drivers/block/zram/zram_drv.h +++ b/drivers/block/zram/zram_drv.h @@ -88,13 +88,11 @@ struct zram_meta { void *compress_buffer; struct table *table; struct zs_pool *mem_pool; + struct mutex buffer_lock; /* protect compress buffers */ }; struct zram { struct zram_meta *meta; - struct rw_semaphore lock; /* protect compression buffers, - * reads and writes - */ struct request_queue *queue; struct gendisk *disk; int init_done; From b048aa137fe66b7f19d842a4b145296bbc26caff Mon Sep 17 00:00:00 2001 From: Minchan Kim Date: Mon, 3 Mar 2014 15:38:34 -0800 Subject: [PATCH 182/277] zram: avoid null access when fail to alloc meta zram_meta_alloc could fail so caller should check it. Otherwise, your system will hang. Signed-off-by: Minchan Kim Acked-by: Jerome Marchand Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds (cherry picked from commit db5d711e2db776f18219b033e5dc4fb7e4264dd7) Signed-off-by: Alex Shi --- drivers/block/zram/zram_drv.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index c450dd9390ab..cf77a8a1ae97 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -611,6 +611,8 @@ static ssize_t disksize_store(struct device *dev, disksize = PAGE_ALIGN(disksize); meta = zram_meta_alloc(disksize); + if (!meta) + return -ENOMEM; down_write(&zram->init_lock); if (zram->init_done) { up_write(&zram->init_lock); From 30e87174bf64cdce17005dc7fe031ea1aab98161 Mon Sep 17 00:00:00 2001 From: "Srivatsa S. Bhat" Date: Tue, 11 Mar 2014 02:04:14 +0530 Subject: [PATCH 183/277] CPU hotplug: Provide lockless versions of callback registration functions The following method of CPU hotplug callback registration is not safe due to the possibility of an ABBA deadlock involving the cpu_add_remove_lock and the cpu_hotplug.lock. get_online_cpus(); for_each_online_cpu(cpu) init_cpu(cpu); register_cpu_notifier(&foobar_cpu_notifier); put_online_cpus(); The deadlock is shown below: CPU 0 CPU 1 ----- ----- Acquire cpu_hotplug.lock [via get_online_cpus()] CPU online/offline operation takes cpu_add_remove_lock [via cpu_maps_update_begin()] Try to acquire cpu_add_remove_lock [via register_cpu_notifier()] CPU online/offline operation tries to acquire cpu_hotplug.lock [via cpu_hotplug_begin()] *** DEADLOCK! *** The problem here is that callback registration takes the locks in one order whereas the CPU hotplug operations take the same locks in the opposite order. To avoid this issue and to provide a race-free method to register CPU hotplug callbacks (along with initialization of already online CPUs), introduce new variants of the callback registration APIs that simply register the callbacks without holding the cpu_add_remove_lock during the registration. That way, we can avoid the ABBA scenario. However, we will need to hold the cpu_add_remove_lock throughout the entire critical section, to protect updates to the callback/notifier chain. This can be achieved by writing the callback registration code as follows: cpu_maps_update_begin(); [ or cpu_notifier_register_begin(); see below ] for_each_online_cpu(cpu) init_cpu(cpu); /* This doesn't take the cpu_add_remove_lock */ __register_cpu_notifier(&foobar_cpu_notifier); cpu_maps_update_done(); [ or cpu_notifier_register_done(); see below ] Note that we can't use get_online_cpus() here instead of cpu_maps_update_begin() because the cpu_hotplug.lock is dropped during the invocation of CPU_POST_DEAD notifiers, and hence get_online_cpus() cannot provide the necessary synchronization to protect the callback/notifier chains against concurrent reads and writes. On the other hand, since the cpu_add_remove_lock protects the entire hotplug operation (including CPU_POST_DEAD), we can use cpu_maps_update_begin/done() to guarantee proper synchronization. Also, since cpu_maps_update_begin/done() is like a super-set of get/put_online_cpus(), the former naturally protects the critical sections from concurrent hotplug operations. Since the names cpu_maps_update_begin/done() don't make much sense in CPU hotplug callback registration scenarios, we'll introduce new APIs named cpu_notifier_register_begin/done() and map them to cpu_maps_update_begin/done(). In summary, introduce the lockless variants of un/register_cpu_notifier() and also export the cpu_notifier_register_begin/done() APIs for use by modules. This way, we provide a race-free way to register hotplug callbacks as well as perform initialization for the CPUs that are already online. Cc: Thomas Gleixner Cc: Andrew Morton Cc: Peter Zijlstra Cc: Ingo Molnar Acked-by: Oleg Nesterov Acked-by: Toshi Kani Reviewed-by: Gautham R. Shenoy Signed-off-by: Srivatsa S. Bhat Signed-off-by: Rafael J. Wysocki (cherry picked from commit 93ae4f978ca7f26d17df915ac7afc919c1dd0353) Signed-off-by: Alex Shi --- include/linux/cpu.h | 47 +++++++++++++++++++++++++++++++++++++++++++++ kernel/cpu.c | 21 ++++++++++++++++++-- 2 files changed, 66 insertions(+), 2 deletions(-) diff --git a/include/linux/cpu.h b/include/linux/cpu.h index 9f3c7e81270a..096af4570d69 100644 --- a/include/linux/cpu.h +++ b/include/linux/cpu.h @@ -119,26 +119,46 @@ enum { { .notifier_call = fn, .priority = pri }; \ register_cpu_notifier(&fn##_nb); \ } + +#define __cpu_notifier(fn, pri) { \ + static struct notifier_block fn##_nb = \ + { .notifier_call = fn, .priority = pri }; \ + __register_cpu_notifier(&fn##_nb); \ +} #else /* #if defined(CONFIG_HOTPLUG_CPU) || !defined(MODULE) */ #define cpu_notifier(fn, pri) do { (void)(fn); } while (0) +#define __cpu_notifier(fn, pri) do { (void)(fn); } while (0) #endif /* #else #if defined(CONFIG_HOTPLUG_CPU) || !defined(MODULE) */ + #ifdef CONFIG_HOTPLUG_CPU extern int register_cpu_notifier(struct notifier_block *nb); +extern int __register_cpu_notifier(struct notifier_block *nb); extern void unregister_cpu_notifier(struct notifier_block *nb); +extern void __unregister_cpu_notifier(struct notifier_block *nb); #else #ifndef MODULE extern int register_cpu_notifier(struct notifier_block *nb); +extern int __register_cpu_notifier(struct notifier_block *nb); #else static inline int register_cpu_notifier(struct notifier_block *nb) { return 0; } + +static inline int __register_cpu_notifier(struct notifier_block *nb) +{ + return 0; +} #endif static inline void unregister_cpu_notifier(struct notifier_block *nb) { } + +static inline void __unregister_cpu_notifier(struct notifier_block *nb) +{ +} #endif int cpu_up(unsigned int cpu); @@ -146,19 +166,32 @@ void notify_cpu_starting(unsigned int cpu); extern void cpu_maps_update_begin(void); extern void cpu_maps_update_done(void); +#define cpu_notifier_register_begin cpu_maps_update_begin +#define cpu_notifier_register_done cpu_maps_update_done + #else /* CONFIG_SMP */ #define cpu_notifier(fn, pri) do { (void)(fn); } while (0) +#define __cpu_notifier(fn, pri) do { (void)(fn); } while (0) static inline int register_cpu_notifier(struct notifier_block *nb) { return 0; } +static inline int __register_cpu_notifier(struct notifier_block *nb) +{ + return 0; +} + static inline void unregister_cpu_notifier(struct notifier_block *nb) { } +static inline void __unregister_cpu_notifier(struct notifier_block *nb) +{ +} + static inline void cpu_maps_update_begin(void) { } @@ -167,6 +200,14 @@ static inline void cpu_maps_update_done(void) { } +static inline void cpu_notifier_register_begin(void) +{ +} + +static inline void cpu_notifier_register_done(void) +{ +} + #endif /* CONFIG_SMP */ extern struct bus_type cpu_subsys; @@ -178,8 +219,11 @@ extern void put_online_cpus(void); extern void cpu_hotplug_disable(void); extern void cpu_hotplug_enable(void); #define hotcpu_notifier(fn, pri) cpu_notifier(fn, pri) +#define __hotcpu_notifier(fn, pri) __cpu_notifier(fn, pri) #define register_hotcpu_notifier(nb) register_cpu_notifier(nb) +#define __register_hotcpu_notifier(nb) __register_cpu_notifier(nb) #define unregister_hotcpu_notifier(nb) unregister_cpu_notifier(nb) +#define __unregister_hotcpu_notifier(nb) __unregister_cpu_notifier(nb) void clear_tasks_mm_cpumask(int cpu); int cpu_down(unsigned int cpu); @@ -203,9 +247,12 @@ static inline void cpu_hotplug_driver_unlock(void) #define cpu_hotplug_disable() do { } while (0) #define cpu_hotplug_enable() do { } while (0) #define hotcpu_notifier(fn, pri) do { (void)(fn); } while (0) +#define __hotcpu_notifier(fn, pri) do { (void)(fn); } while (0) /* These aren't inline functions due to a GCC bug. */ #define register_hotcpu_notifier(nb) ({ (void)(nb); 0; }) +#define __register_hotcpu_notifier(nb) ({ (void)(nb); 0; }) #define unregister_hotcpu_notifier(nb) ({ (void)(nb); }) +#define __unregister_hotcpu_notifier(nb) ({ (void)(nb); }) #endif /* CONFIG_HOTPLUG_CPU */ #ifdef CONFIG_PM_SLEEP_SMP diff --git a/kernel/cpu.c b/kernel/cpu.c index bc255e25d5dd..5fbcbdd31fb3 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -27,18 +27,23 @@ static DEFINE_MUTEX(cpu_add_remove_lock); /* - * The following two API's must be used when attempting - * to serialize the updates to cpu_online_mask, cpu_present_mask. + * The following two APIs (cpu_maps_update_begin/done) must be used when + * attempting to serialize the updates to cpu_online_mask & cpu_present_mask. + * The APIs cpu_notifier_register_begin/done() must be used to protect CPU + * hotplug callback (un)registration performed using __register_cpu_notifier() + * or __unregister_cpu_notifier(). */ void cpu_maps_update_begin(void) { mutex_lock(&cpu_add_remove_lock); } +EXPORT_SYMBOL(cpu_notifier_register_begin); void cpu_maps_update_done(void) { mutex_unlock(&cpu_add_remove_lock); } +EXPORT_SYMBOL(cpu_notifier_register_done); static RAW_NOTIFIER_HEAD(cpu_chain); @@ -169,6 +174,11 @@ int __ref register_cpu_notifier(struct notifier_block *nb) return ret; } +int __ref __register_cpu_notifier(struct notifier_block *nb) +{ + return raw_notifier_chain_register(&cpu_chain, nb); +} + static int __cpu_notify(unsigned long val, void *v, int nr_to_call, int *nr_calls) { @@ -192,6 +202,7 @@ static void cpu_notify_nofail(unsigned long val, void *v) BUG_ON(cpu_notify(val, v)); } EXPORT_SYMBOL(register_cpu_notifier); +EXPORT_SYMBOL(__register_cpu_notifier); void __ref unregister_cpu_notifier(struct notifier_block *nb) { @@ -201,6 +212,12 @@ void __ref unregister_cpu_notifier(struct notifier_block *nb) } EXPORT_SYMBOL(unregister_cpu_notifier); +void __ref __unregister_cpu_notifier(struct notifier_block *nb) +{ + raw_notifier_chain_unregister(&cpu_chain, nb); +} +EXPORT_SYMBOL(__unregister_cpu_notifier); + /** * clear_tasks_mm_cpumask - Safely clear tasks' mm_cpumask for a CPU * @cpu: a CPU id From cda631acb964bb521add32cd60db08f43a23835c Mon Sep 17 00:00:00 2001 From: "Srivatsa S. Bhat" Date: Tue, 11 Mar 2014 02:09:59 +0530 Subject: [PATCH 184/277] zsmalloc: Fix CPU hotplug callback registration Subsystems that want to register CPU hotplug callbacks, as well as perform initialization for the CPUs that are already online, often do it as shown below: get_online_cpus(); for_each_online_cpu(cpu) init_cpu(cpu); register_cpu_notifier(&foobar_cpu_notifier); put_online_cpus(); This is wrong, since it is prone to ABBA deadlocks involving the cpu_add_remove_lock and the cpu_hotplug.lock (when running concurrently with CPU hotplug operations). Instead, the correct and race-free way of performing the callback registration is: cpu_notifier_register_begin(); for_each_online_cpu(cpu) init_cpu(cpu); /* Note the use of the double underscored version of the API */ __register_cpu_notifier(&foobar_cpu_notifier); cpu_notifier_register_done(); Fix the zsmalloc code by using this latter form of callback registration. Cc: Nitin Gupta Cc: Ingo Molnar Signed-off-by: Srivatsa S. Bhat Acked-by: Minchan Kim Signed-off-by: Rafael J. Wysocki (cherry picked from commit f0e71fcd0fa6f3f5495cd9ad3f1e4acd94446a55) Signed-off-by: Alex Shi --- mm/zsmalloc.c | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c index 7a8d161b4cd7..b728f10e353b 100644 --- a/mm/zsmalloc.c +++ b/mm/zsmalloc.c @@ -780,21 +780,32 @@ static void zs_exit(void) { int cpu; + cpu_notifier_register_begin(); + for_each_online_cpu(cpu) zs_cpu_notifier(NULL, CPU_DEAD, (void *)(long)cpu); - unregister_cpu_notifier(&zs_cpu_nb); + __unregister_cpu_notifier(&zs_cpu_nb); + + cpu_notifier_register_done(); } static int zs_init(void) { int cpu, ret; - register_cpu_notifier(&zs_cpu_nb); + cpu_notifier_register_begin(); + + __register_cpu_notifier(&zs_cpu_nb); for_each_online_cpu(cpu) { ret = zs_cpu_notifier(NULL, CPU_UP_PREPARE, (void *)(long)cpu); - if (notifier_to_errno(ret)) + if (notifier_to_errno(ret)) { + cpu_notifier_register_done(); goto fail; + } } + + cpu_notifier_register_done(); + return 0; fail: zs_exit(); From fafe199bf14378d4a0a309d15bba16ce6742bd19 Mon Sep 17 00:00:00 2001 From: Sergey Senozhatsky Date: Mon, 7 Apr 2014 15:38:00 -0700 Subject: [PATCH 185/277] zram: drop `init_done' struct zram member Introduce init_done() helper function which allows us to drop `init_done' struct zram member. init_done() uses the fact that ->init_done == 1 equals to ->meta != NULL. Signed-off-by: Sergey Senozhatsky Acked-by: Minchan Kim Acked-by: Jerome Marchand Cc: Nitin Gupta Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds (cherry picked from commit be2d1d56c82d8cf20e6c77515eb499f8e86eb5be) Signed-off-by: Alex Shi --- drivers/block/zram/zram_drv.c | 21 +++++++++++---------- drivers/block/zram/zram_drv.h | 1 - 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index cf77a8a1ae97..e75ccdf194a6 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -42,6 +42,11 @@ static struct zram *zram_devices; /* Module params (documentation at end) */ static unsigned int num_devices = 1; +static inline int init_done(struct zram *zram) +{ + return zram->meta != NULL; +} + static inline struct zram *dev_to_zram(struct device *dev) { return (struct zram *)dev_to_disk(dev)->private_data; @@ -60,7 +65,7 @@ static ssize_t initstate_show(struct device *dev, { struct zram *zram = dev_to_zram(dev); - return sprintf(buf, "%u\n", zram->init_done); + return sprintf(buf, "%u\n", init_done(zram)); } static ssize_t num_reads_show(struct device *dev, @@ -133,7 +138,7 @@ static ssize_t mem_used_total_show(struct device *dev, struct zram_meta *meta = zram->meta; down_read(&zram->init_lock); - if (zram->init_done) + if (init_done(zram)) val = zs_get_total_size_bytes(meta->mem_pool); up_read(&zram->init_lock); @@ -545,14 +550,12 @@ static void zram_reset_device(struct zram *zram, bool reset_capacity) struct zram_meta *meta; down_write(&zram->init_lock); - if (!zram->init_done) { + if (!init_done(zram)) { up_write(&zram->init_lock); return; } meta = zram->meta; - zram->init_done = 0; - /* Free all pages that are still in this zram device */ for (index = 0; index < zram->disksize >> PAGE_SHIFT; index++) { unsigned long handle = meta->table[index].handle; @@ -593,8 +596,6 @@ static void zram_init_device(struct zram *zram, struct zram_meta *meta) queue_flag_set_unlocked(QUEUE_FLAG_NONROT, zram->disk->queue); zram->meta = meta; - zram->init_done = 1; - pr_debug("Initialization done!\n"); } @@ -614,7 +615,7 @@ static ssize_t disksize_store(struct device *dev, if (!meta) return -ENOMEM; down_write(&zram->init_lock); - if (zram->init_done) { + if (init_done(zram)) { up_write(&zram->init_lock); zram_meta_free(meta); pr_info("Cannot change disksize for initialized device\n"); @@ -733,7 +734,7 @@ static void zram_make_request(struct request_queue *queue, struct bio *bio) struct zram *zram = queue->queuedata; down_read(&zram->init_lock); - if (unlikely(!zram->init_done)) + if (unlikely(!init_done(zram))) goto error; if (!valid_io_request(zram, bio)) { @@ -856,7 +857,7 @@ static int create_device(struct zram *zram, int device_id) goto out_free_disk; } - zram->init_done = 0; + zram->meta = NULL; return 0; out_free_disk: diff --git a/drivers/block/zram/zram_drv.h b/drivers/block/zram/zram_drv.h index ad8aa35bae00..e81e9cdf4147 100644 --- a/drivers/block/zram/zram_drv.h +++ b/drivers/block/zram/zram_drv.h @@ -95,7 +95,6 @@ struct zram { struct zram_meta *meta; struct request_queue *queue; struct gendisk *disk; - int init_done; /* Prevent concurrent execution of device init, reset and R/W request */ struct rw_semaphore init_lock; /* From 18bebabb7ecc3c03e3339d825141908814a4cf59 Mon Sep 17 00:00:00 2001 From: Sergey Senozhatsky Date: Mon, 7 Apr 2014 15:38:01 -0700 Subject: [PATCH 186/277] zram: do not pass rw argument to __zram_make_request() Do not pass rw argument down the __zram_make_request() -> zram_bvec_rw() chain, decode it in zram_bvec_rw() instead. Besides, this is the place where we distinguish READ and WRITE bio data directions, so account zram RW stats here, instead of __zram_make_request(). This also allows to account a real number of zram READ/WRITE operations, not just requests (single RW request may cause a number of zram RW ops with separate locking, compression/decompression, etc). Signed-off-by: Sergey Senozhatsky Acked-by: Minchan Kim Acked-by: Jerome Marchand Cc: Nitin Gupta Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds (cherry picked from commit be257c61306750d11c20d2ac567bf63304c696a3) Signed-off-by: Alex Shi Conflicts: drivers/block/zram/zram_drv.c Conflicts solution: keep bio struct as old before commit 4f024f3797 'block: Abstract out bvec iterator' --- drivers/block/zram/zram_drv.c | 29 ++++++++++++----------------- 1 file changed, 12 insertions(+), 17 deletions(-) diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index e75ccdf194a6..3f98cf21c7f0 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -532,14 +532,18 @@ static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec, u32 index, } static int zram_bvec_rw(struct zram *zram, struct bio_vec *bvec, u32 index, - int offset, struct bio *bio, int rw) + int offset, struct bio *bio) { int ret; + int rw = bio_data_dir(bio); - if (rw == READ) + if (rw == READ) { + atomic64_inc(&zram->stats.num_reads); ret = zram_bvec_read(zram, bvec, index, offset, bio); - else + } else { + atomic64_inc(&zram->stats.num_writes); ret = zram_bvec_write(zram, bvec, index, offset); + } return ret; } @@ -671,20 +675,12 @@ static ssize_t reset_store(struct device *dev, return ret; } -static void __zram_make_request(struct zram *zram, struct bio *bio, int rw) +static void __zram_make_request(struct zram *zram, struct bio *bio) { int i, offset; u32 index; struct bio_vec *bvec; - switch (rw) { - case READ: - atomic64_inc(&zram->stats.num_reads); - break; - case WRITE: - atomic64_inc(&zram->stats.num_writes); - break; - } index = bio->bi_sector >> SECTORS_PER_PAGE_SHIFT; offset = (bio->bi_sector & (SECTORS_PER_PAGE - 1)) << SECTOR_SHIFT; @@ -703,16 +699,15 @@ static void __zram_make_request(struct zram *zram, struct bio *bio, int rw) bv.bv_len = max_transfer_size; bv.bv_offset = bvec->bv_offset; - if (zram_bvec_rw(zram, &bv, index, offset, bio, rw) < 0) + if (zram_bvec_rw(zram, &bv, index, offset, bio) < 0) goto out; bv.bv_len = bvec->bv_len - max_transfer_size; bv.bv_offset += max_transfer_size; - if (zram_bvec_rw(zram, &bv, index+1, 0, bio, rw) < 0) + if (zram_bvec_rw(zram, &bv, index + 1, 0, bio) < 0) goto out; } else - if (zram_bvec_rw(zram, bvec, index, offset, bio, rw) - < 0) + if (zram_bvec_rw(zram, bvec, index, offset, bio) < 0) goto out; update_position(&index, &offset, bvec); @@ -742,7 +737,7 @@ static void zram_make_request(struct request_queue *queue, struct bio *bio) goto error; } - __zram_make_request(zram, bio, bio_data_dir(bio)); + __zram_make_request(zram, bio); up_read(&zram->init_lock); return; From e700d5f0d65058709b17623728a356e2c16a3a3a Mon Sep 17 00:00:00 2001 From: Sergey Senozhatsky Date: Mon, 7 Apr 2014 15:38:02 -0700 Subject: [PATCH 187/277] zram: remove good and bad compress stats Remove `good' and `bad' compressed sub-requests stats. RW request may cause a number of RW sub-requests. zram used to account `good' compressed sub-queries (with compressed size less than 50% of original size), `bad' compressed sub-queries (with compressed size greater that 75% of original size), leaving sub-requests with compression size between 50% and 75% of original size not accounted and not reported. zram already accounts each sub-request's compression size so we can calculate real device compression ratio. Signed-off-by: Sergey Senozhatsky Acked-by: Minchan Kim Acked-by: Jerome Marchand Cc: Nitin Gupta Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds (cherry picked from commit b7cccf8b4009bf74df61f3c9d86b95fabd807c11) Signed-off-by: Alex Shi --- drivers/block/zram/zram_drv.c | 11 ----------- drivers/block/zram/zram_drv.h | 2 -- 2 files changed, 13 deletions(-) diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index 3f98cf21c7f0..a09618cc9e68 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -292,7 +292,6 @@ static void zram_free_page(struct zram *zram, size_t index) { struct zram_meta *meta = zram->meta; unsigned long handle = meta->table[index].handle; - u16 size = meta->table[index].size; if (unlikely(!handle)) { /* @@ -306,14 +305,8 @@ static void zram_free_page(struct zram *zram, size_t index) return; } - if (unlikely(size > max_zpage_size)) - atomic_dec(&zram->stats.bad_compress); - zs_free(meta->mem_pool, handle); - if (size <= PAGE_SIZE / 2) - atomic_dec(&zram->stats.good_compress); - atomic64_sub(meta->table[index].size, &zram->stats.compr_size); atomic_dec(&zram->stats.pages_stored); @@ -477,7 +470,6 @@ static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec, u32 index, } if (unlikely(clen > max_zpage_size)) { - atomic_inc(&zram->stats.bad_compress); clen = PAGE_SIZE; src = NULL; if (is_partial_io(bvec)) @@ -517,9 +509,6 @@ static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec, u32 index, /* Update stats */ atomic64_add(clen, &zram->stats.compr_size); atomic_inc(&zram->stats.pages_stored); - if (clen <= PAGE_SIZE / 2) - atomic_inc(&zram->stats.good_compress); - out: if (locked) mutex_unlock(&meta->buffer_lock); diff --git a/drivers/block/zram/zram_drv.h b/drivers/block/zram/zram_drv.h index e81e9cdf4147..2f173cb1fd0a 100644 --- a/drivers/block/zram/zram_drv.h +++ b/drivers/block/zram/zram_drv.h @@ -78,8 +78,6 @@ struct zram_stats { atomic64_t notify_free; /* no. of swap slot free notifications */ atomic_t pages_zero; /* no. of zero filled pages */ atomic_t pages_stored; /* no. of pages currently stored */ - atomic_t good_compress; /* % of pages with compression ratio<=50% */ - atomic_t bad_compress; /* % of pages with compression ratio>=75% */ }; struct zram_meta { From 2385e8be54460cae2cf803a482a0b8b76cf8d001 Mon Sep 17 00:00:00 2001 From: Sergey Senozhatsky Date: Mon, 7 Apr 2014 15:38:03 -0700 Subject: [PATCH 188/277] zram: use atomic64_t for all zram stats This is a preparation patch for stats code duplication removal. 1) use atomic64_t for `pages_zero' and `pages_stored' zram stats. 2) `compr_size' and `pages_zero' struct zram_stats members did not follow the existing device attr naming scheme: zram_stats.ATTR has ATTR_show() function. rename them: -- compr_size -> compr_data_size -- pages_zero -> zero_pages Minchan Kim's note: If we really have trouble with atomic stat operation, we could change it with percpu_counter so that it could solve atomic overhead and unnecessary memory space by introducing unsigned long instead of 64bit atomic_t. Signed-off-by: Sergey Senozhatsky Acked-by: Minchan Kim Acked-by: Jerome Marchand Cc: Nitin Gupta Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds (cherry picked from commit 90a7806ea9b9f7cb4751859cc2506e2d80e36ef1) Signed-off-by: Alex Shi --- drivers/block/zram/zram_drv.c | 18 +++++++++--------- drivers/block/zram/zram_drv.h | 6 +++--- 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index a09618cc9e68..4da4210b3454 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -109,7 +109,7 @@ static ssize_t zero_pages_show(struct device *dev, { struct zram *zram = dev_to_zram(dev); - return sprintf(buf, "%u\n", atomic_read(&zram->stats.pages_zero)); + return sprintf(buf, "%llu\n", (u64)atomic64_read(&zram->stats.zero_pages)); } static ssize_t orig_data_size_show(struct device *dev, @@ -118,7 +118,7 @@ static ssize_t orig_data_size_show(struct device *dev, struct zram *zram = dev_to_zram(dev); return sprintf(buf, "%llu\n", - (u64)(atomic_read(&zram->stats.pages_stored)) << PAGE_SHIFT); + (u64)(atomic64_read(&zram->stats.pages_stored)) << PAGE_SHIFT); } static ssize_t compr_data_size_show(struct device *dev, @@ -127,7 +127,7 @@ static ssize_t compr_data_size_show(struct device *dev, struct zram *zram = dev_to_zram(dev); return sprintf(buf, "%llu\n", - (u64)atomic64_read(&zram->stats.compr_size)); + (u64)atomic64_read(&zram->stats.compr_data_size)); } static ssize_t mem_used_total_show(struct device *dev, @@ -300,15 +300,15 @@ static void zram_free_page(struct zram *zram, size_t index) */ if (zram_test_flag(meta, index, ZRAM_ZERO)) { zram_clear_flag(meta, index, ZRAM_ZERO); - atomic_dec(&zram->stats.pages_zero); + atomic64_dec(&zram->stats.zero_pages); } return; } zs_free(meta->mem_pool, handle); - atomic64_sub(meta->table[index].size, &zram->stats.compr_size); - atomic_dec(&zram->stats.pages_stored); + atomic64_sub(meta->table[index].size, &zram->stats.compr_data_size); + atomic64_dec(&zram->stats.pages_stored); meta->table[index].handle = 0; meta->table[index].size = 0; @@ -451,7 +451,7 @@ static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec, u32 index, zram_set_flag(meta, index, ZRAM_ZERO); write_unlock(&zram->meta->tb_lock); - atomic_inc(&zram->stats.pages_zero); + atomic64_inc(&zram->stats.zero_pages); ret = 0; goto out; } @@ -507,8 +507,8 @@ static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec, u32 index, write_unlock(&zram->meta->tb_lock); /* Update stats */ - atomic64_add(clen, &zram->stats.compr_size); - atomic_inc(&zram->stats.pages_stored); + atomic64_add(clen, &zram->stats.compr_data_size); + atomic64_inc(&zram->stats.pages_stored); out: if (locked) mutex_unlock(&meta->buffer_lock); diff --git a/drivers/block/zram/zram_drv.h b/drivers/block/zram/zram_drv.h index 2f173cb1fd0a..58d4ac537f65 100644 --- a/drivers/block/zram/zram_drv.h +++ b/drivers/block/zram/zram_drv.h @@ -69,15 +69,15 @@ struct table { } __aligned(4); struct zram_stats { - atomic64_t compr_size; /* compressed size of pages stored */ + atomic64_t compr_data_size; /* compressed size of pages stored */ atomic64_t num_reads; /* failed + successful */ atomic64_t num_writes; /* --do-- */ atomic64_t failed_reads; /* should NEVER! happen */ atomic64_t failed_writes; /* can happen when memory is too low */ atomic64_t invalid_io; /* non-page-aligned I/O requests */ atomic64_t notify_free; /* no. of swap slot free notifications */ - atomic_t pages_zero; /* no. of zero filled pages */ - atomic_t pages_stored; /* no. of pages currently stored */ + atomic64_t zero_pages; /* no. of zero filled pages */ + atomic64_t pages_stored; /* no. of pages currently stored */ }; struct zram_meta { From e72baa0d78767e5eeaba4f3a3f82be4b584f57fd Mon Sep 17 00:00:00 2001 From: Sergey Senozhatsky Date: Mon, 7 Apr 2014 15:38:04 -0700 Subject: [PATCH 189/277] zram: remove zram stats code duplication Introduce ZRAM_ATTR_RO macro that generates device_attribute and default ATTR show() function for existing atomic64_t zram stats. Signed-off-by: Sergey Senozhatsky Acked-by: Minchan Kim Cc: Jerome Marchand Cc: Nitin Gupta Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds (cherry picked from commit a68eb3b65e658406d386bebef02277f4007b2f45) Signed-off-by: Alex Shi --- drivers/block/zram/zram_drv.c | 82 ++++++++++------------------------- 1 file changed, 23 insertions(+), 59 deletions(-) diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index 4da4210b3454..c4179fb54fde 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -42,6 +42,17 @@ static struct zram *zram_devices; /* Module params (documentation at end) */ static unsigned int num_devices = 1; +#define ZRAM_ATTR_RO(name) \ +static ssize_t zram_attr_##name##_show(struct device *d, \ + struct device_attribute *attr, char *b) \ +{ \ + struct zram *zram = dev_to_zram(d); \ + return sprintf(b, "%llu\n", \ + (u64)atomic64_read(&zram->stats.name)); \ +} \ +static struct device_attribute dev_attr_##name = \ + __ATTR(name, S_IRUGO, zram_attr_##name##_show, NULL); + static inline int init_done(struct zram *zram) { return zram->meta != NULL; @@ -63,53 +74,14 @@ static ssize_t disksize_show(struct device *dev, static ssize_t initstate_show(struct device *dev, struct device_attribute *attr, char *buf) { + u32 val; struct zram *zram = dev_to_zram(dev); - return sprintf(buf, "%u\n", init_done(zram)); -} + down_read(&zram->init_lock); + val = init_done(zram); + up_read(&zram->init_lock); -static ssize_t num_reads_show(struct device *dev, - struct device_attribute *attr, char *buf) -{ - struct zram *zram = dev_to_zram(dev); - - return sprintf(buf, "%llu\n", - (u64)atomic64_read(&zram->stats.num_reads)); -} - -static ssize_t num_writes_show(struct device *dev, - struct device_attribute *attr, char *buf) -{ - struct zram *zram = dev_to_zram(dev); - - return sprintf(buf, "%llu\n", - (u64)atomic64_read(&zram->stats.num_writes)); -} - -static ssize_t invalid_io_show(struct device *dev, - struct device_attribute *attr, char *buf) -{ - struct zram *zram = dev_to_zram(dev); - - return sprintf(buf, "%llu\n", - (u64)atomic64_read(&zram->stats.invalid_io)); -} - -static ssize_t notify_free_show(struct device *dev, - struct device_attribute *attr, char *buf) -{ - struct zram *zram = dev_to_zram(dev); - - return sprintf(buf, "%llu\n", - (u64)atomic64_read(&zram->stats.notify_free)); -} - -static ssize_t zero_pages_show(struct device *dev, - struct device_attribute *attr, char *buf) -{ - struct zram *zram = dev_to_zram(dev); - - return sprintf(buf, "%llu\n", (u64)atomic64_read(&zram->stats.zero_pages)); + return sprintf(buf, "%u\n", val); } static ssize_t orig_data_size_show(struct device *dev, @@ -121,15 +93,6 @@ static ssize_t orig_data_size_show(struct device *dev, (u64)(atomic64_read(&zram->stats.pages_stored)) << PAGE_SHIFT); } -static ssize_t compr_data_size_show(struct device *dev, - struct device_attribute *attr, char *buf) -{ - struct zram *zram = dev_to_zram(dev); - - return sprintf(buf, "%llu\n", - (u64)atomic64_read(&zram->stats.compr_data_size)); -} - static ssize_t mem_used_total_show(struct device *dev, struct device_attribute *attr, char *buf) { @@ -760,15 +723,16 @@ static DEVICE_ATTR(disksize, S_IRUGO | S_IWUSR, disksize_show, disksize_store); static DEVICE_ATTR(initstate, S_IRUGO, initstate_show, NULL); static DEVICE_ATTR(reset, S_IWUSR, NULL, reset_store); -static DEVICE_ATTR(num_reads, S_IRUGO, num_reads_show, NULL); -static DEVICE_ATTR(num_writes, S_IRUGO, num_writes_show, NULL); -static DEVICE_ATTR(invalid_io, S_IRUGO, invalid_io_show, NULL); -static DEVICE_ATTR(notify_free, S_IRUGO, notify_free_show, NULL); -static DEVICE_ATTR(zero_pages, S_IRUGO, zero_pages_show, NULL); static DEVICE_ATTR(orig_data_size, S_IRUGO, orig_data_size_show, NULL); -static DEVICE_ATTR(compr_data_size, S_IRUGO, compr_data_size_show, NULL); static DEVICE_ATTR(mem_used_total, S_IRUGO, mem_used_total_show, NULL); +ZRAM_ATTR_RO(num_reads); +ZRAM_ATTR_RO(num_writes); +ZRAM_ATTR_RO(invalid_io); +ZRAM_ATTR_RO(notify_free); +ZRAM_ATTR_RO(zero_pages); +ZRAM_ATTR_RO(compr_data_size); + static struct attribute *zram_disk_attrs[] = { &dev_attr_disksize.attr, &dev_attr_initstate.attr, From 0a0d055ef99c20fe0e6b1cced9d358ab2e56eafd Mon Sep 17 00:00:00 2001 From: Sergey Senozhatsky Date: Mon, 7 Apr 2014 15:38:05 -0700 Subject: [PATCH 190/277] zram: report failed read and write stats zram accounted but did not report numbers of failed read and write queries. make these stats available as failed_reads and failed_writes attrs. Signed-off-by: Sergey Senozhatsky Acked-by: Minchan Kim Acked-by: Jerome Marchand Cc: Nitin Gupta Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds (cherry picked from commit 6444724939db5de7390c90f7b4a657159b3b4465) Signed-off-by: Alex Shi --- drivers/block/zram/zram_drv.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index c4179fb54fde..4043e783e50a 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -728,6 +728,8 @@ static DEVICE_ATTR(mem_used_total, S_IRUGO, mem_used_total_show, NULL); ZRAM_ATTR_RO(num_reads); ZRAM_ATTR_RO(num_writes); +ZRAM_ATTR_RO(failed_reads); +ZRAM_ATTR_RO(failed_writes); ZRAM_ATTR_RO(invalid_io); ZRAM_ATTR_RO(notify_free); ZRAM_ATTR_RO(zero_pages); @@ -739,6 +741,8 @@ static struct attribute *zram_disk_attrs[] = { &dev_attr_reset.attr, &dev_attr_num_reads.attr, &dev_attr_num_writes.attr, + &dev_attr_failed_reads.attr, + &dev_attr_failed_writes.attr, &dev_attr_invalid_io.attr, &dev_attr_notify_free.attr, &dev_attr_zero_pages.attr, From 7d9e350a92b55768e644d5f7b1aacab9acd032f6 Mon Sep 17 00:00:00 2001 From: Sergey Senozhatsky Date: Mon, 7 Apr 2014 15:38:06 -0700 Subject: [PATCH 191/277] zram: drop not used table `count' member struct table `count' member is not used. Signed-off-by: Sergey Senozhatsky Cc: Minchan Kim Acked-by: Jerome Marchand Cc: Nitin Gupta Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds (cherry picked from commit 59fc86a4922f1a1c0f69eac758a7e2b2b138aab4) Signed-off-by: Alex Shi --- drivers/block/zram/zram_drv.h | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/block/zram/zram_drv.h b/drivers/block/zram/zram_drv.h index 58d4ac537f65..1d5b1f5786a8 100644 --- a/drivers/block/zram/zram_drv.h +++ b/drivers/block/zram/zram_drv.h @@ -64,7 +64,6 @@ enum zram_pageflags { struct table { unsigned long handle; u16 size; /* object size (excluding header) */ - u8 count; /* object ref count (not yet used) */ u8 flags; } __aligned(4); From 65281bd78e42b4137f9cf87c4b734ff2325bf483 Mon Sep 17 00:00:00 2001 From: Sergey Senozhatsky Date: Mon, 7 Apr 2014 15:38:07 -0700 Subject: [PATCH 192/277] zram: move zram size warning to documentation Move zram warning about disksize and size of memory correlation to zram documentation. Signed-off-by: Sergey Senozhatsky Acked-by: Minchan Kim Cc: Jerome Marchand Cc: Nitin Gupta Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds (cherry picked from commit e64cd51d2fa87733176246101df871a8ac5c7c20) Signed-off-by: Alex Shi --- Documentation/blockdev/zram.txt | 5 +++++ drivers/block/zram/zram_drv.c | 15 --------------- 2 files changed, 5 insertions(+), 15 deletions(-) diff --git a/Documentation/blockdev/zram.txt b/Documentation/blockdev/zram.txt index 2eccddffa6c8..393541be1ec0 100644 --- a/Documentation/blockdev/zram.txt +++ b/Documentation/blockdev/zram.txt @@ -33,6 +33,11 @@ Following shows a typical sequence of steps for using zram. echo 512M > /sys/block/zram0/disksize echo 1G > /sys/block/zram0/disksize +Note: +There is little point creating a zram of greater than twice the size of memory +since we expect a 2:1 compression ratio. Note that zram uses about 0.1% of the +size of the disk when not in use so a huge zram is wasteful. + 3) Activate: mkswap /dev/zram0 swapon /dev/zram0 diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index 4043e783e50a..a083dffb699e 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -534,23 +534,8 @@ static void zram_reset_device(struct zram *zram, bool reset_capacity) static void zram_init_device(struct zram *zram, struct zram_meta *meta) { - if (zram->disksize > 2 * (totalram_pages << PAGE_SHIFT)) { - pr_info( - "There is little point creating a zram of greater than " - "twice the size of memory since we expect a 2:1 compression " - "ratio. Note that zram uses about 0.1%% of the size of " - "the disk when not in use so a huge zram is " - "wasteful.\n" - "\tMemory Size: %lu kB\n" - "\tSize you selected: %llu kB\n" - "Continuing anyway ...\n", - (totalram_pages << PAGE_SHIFT) >> 10, zram->disksize >> 10 - ); - } - /* zram devices sort of resembles non-rotational disks */ queue_flag_set_unlocked(QUEUE_FLAG_NONROT, zram->disk->queue); - zram->meta = meta; pr_debug("Initialization done!\n"); } From 6ab557fb5b5851a3848743ad6df597323b9d39ee Mon Sep 17 00:00:00 2001 From: Sergey Senozhatsky Date: Mon, 7 Apr 2014 15:38:09 -0700 Subject: [PATCH 193/277] zram: delete zram_init_device() allocate new `zram_meta' in disksize_store() only for uninitialised zram device, saving a number of allocations and deallocations in case if disksize_store() was called on currently used device. at the same time zram_meta stack variable is not necessary, because we can set ->meta directly. there is also no need in setting QUEUE_FLAG_NONROT queue on every disksize_store(), set it once during device creation. [minchan@kernel.org: handle zram->meta alloc fail case] [minchan@kernel.org: prevent lockdep spew of init_lock] Signed-off-by: Sergey Senozhatsky Signed-off-by: Minchan Kim Acked-by: Jerome Marchand Cc: Sasha Levin Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds (cherry picked from commit b67d1ec189ffb92cdad9b2bd29475fb1e0166983) Signed-off-by: Alex Shi --- drivers/block/zram/zram_drv.c | 16 +++++----------- 1 file changed, 5 insertions(+), 11 deletions(-) diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index a083dffb699e..9c71757aa1c6 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -532,14 +532,6 @@ static void zram_reset_device(struct zram *zram, bool reset_capacity) up_write(&zram->init_lock); } -static void zram_init_device(struct zram *zram, struct zram_meta *meta) -{ - /* zram devices sort of resembles non-rotational disks */ - queue_flag_set_unlocked(QUEUE_FLAG_NONROT, zram->disk->queue); - zram->meta = meta; - pr_debug("Initialization done!\n"); -} - static ssize_t disksize_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t len) { @@ -555,17 +547,18 @@ static ssize_t disksize_store(struct device *dev, meta = zram_meta_alloc(disksize); if (!meta) return -ENOMEM; + down_write(&zram->init_lock); if (init_done(zram)) { - up_write(&zram->init_lock); zram_meta_free(meta); + up_write(&zram->init_lock); pr_info("Cannot change disksize for initialized device\n"); return -EBUSY; } + zram->meta = meta; zram->disksize = disksize; set_capacity(zram->disk, zram->disksize >> SECTOR_SHIFT); - zram_init_device(zram, meta); up_write(&zram->init_lock); return len; @@ -774,7 +767,8 @@ static int create_device(struct zram *zram, int device_id) /* Actual capacity set using syfs (/sys/block/zram/disksize */ set_capacity(zram->disk, 0); - + /* zram devices sort of resembles non-rotational disks */ + queue_flag_set_unlocked(QUEUE_FLAG_NONROT, zram->disk->queue); /* * To ensure that we always get PAGE_SIZE aligned * and n*PAGE_SIZED sized I/O requests. From 4c48f57e379f75106cde867e4561375ed37ef69f Mon Sep 17 00:00:00 2001 From: Sergey Senozhatsky Date: Mon, 7 Apr 2014 15:38:11 -0700 Subject: [PATCH 194/277] zram: introduce compressing backend abstraction ZRAM performs direct LZO compression algorithm calls, making it the one and only option. While LZO is generally performs well, LZ4 algorithm tends to have a faster decompression (see http://code.google.com/p/lz4/ for full report) Name Ratio C.speed D.speed MB/s MB/s LZ4 (r101) 2.084 422 1820 LZO 2.06 2.106 414 600 Thus, users who have mostly read (decompress) usage scenarious or mixed workflow (writes with relatively high read ops number) will benefit from using LZ4 compression backend. Introduce compressing backend abstraction zcomp in order to support multiple compression algorithms with the following set of operations: .create .destroy .compress .decompress Schematically zram write() usually contains the following steps: 0) preparation (decompression of partioal IO, etc.) 1) lock buffer_lock mutex (protects meta compress buffers) 2) compress (using meta compress buffers) 3) alloc and map zs_pool object 4) copy compressed data (from meta compress buffers) to object allocated by 3) 5) free previous pool page, assign a new one 6) unlock buffer_lock mutex As we can see, compressing buffers must remain untouched from 1) to 4), because, otherwise, concurrent write() can overwrite data. At the same time, zram_meta must be aware of a) specific compression algorithm memory requirements and b) necessary locking to protect compression buffers. To remove requirement a) new struct zcomp_strm introduced, which contains a compress/decompress `buffer' and compression algorithm `private' part. While struct zcomp implements zcomp_strm stream handling and locking and removes requirement b) from zram meta. zcomp ->create() and ->destroy(), respectively, allocate and deallocate algorithm specific zcomp_strm `private' part. Every zcomp has zcomp stream and mutex to protect its compression stream. Stream usage semantics remains the same -- only one write can hold stream lock and use its buffers. zcomp_strm_find() turns caller into exclusive user of a stream (holding stream mutex until zram release stream), and zcomp_strm_release() makes zcomp stream available (unlock the stream mutex). Hence no concurrent write (compression) operations possible at the moment. iozone -t 3 -R -r 16K -s 60M -I +Z test base patched -------------------------------------------------- Initial write 597992.91 591660.58 Rewrite 609674.34 616054.97 Read 2404771.75 2452909.12 Re-read 2459216.81 2470074.44 Reverse Read 1652769.66 1589128.66 Stride read 2202441.81 2202173.31 Random read 2236311.47 2276565.31 Mixed workload 1423760.41 1709760.06 Random write 579584.08 615933.86 Pwrite 597550.02 594933.70 Pread 1703672.53 1718126.72 Fwrite 1330497.06 1461054.00 Fread 3922851.00 3957242.62 Usage examples: comp = zcomp_create(NAME) /* NAME e.g. "lzo" */ which initialises compressing backend if requested algorithm is supported. Compress: zstrm = zcomp_strm_find(comp) zcomp_compress(comp, zstrm, src, &dst_len) [..] /* copy compressed data */ zcomp_strm_release(comp, zstrm) Decompress: zcomp_decompress(comp, src, src_len, dst); Free compessing backend and its zcomp stream: zcomp_destroy(comp) Signed-off-by: Sergey Senozhatsky Acked-by: Minchan Kim Cc: Jerome Marchand Cc: Nitin Gupta Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds (cherry picked from commit e7e1ef439d18f9a21521116ea9f2b976d7230e54) Signed-off-by: Alex Shi --- drivers/block/zram/zcomp.c | 115 +++++++++++++++++++++++++++++++++ drivers/block/zram/zcomp.h | 58 +++++++++++++++++ drivers/block/zram/zcomp_lzo.c | 47 ++++++++++++++ drivers/block/zram/zcomp_lzo.h | 17 +++++ 4 files changed, 237 insertions(+) create mode 100644 drivers/block/zram/zcomp.c create mode 100644 drivers/block/zram/zcomp.h create mode 100644 drivers/block/zram/zcomp_lzo.c create mode 100644 drivers/block/zram/zcomp_lzo.h diff --git a/drivers/block/zram/zcomp.c b/drivers/block/zram/zcomp.c new file mode 100644 index 000000000000..22f4ae235660 --- /dev/null +++ b/drivers/block/zram/zcomp.c @@ -0,0 +1,115 @@ +/* + * Copyright (C) 2014 Sergey Senozhatsky. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include +#include +#include +#include +#include + +#include "zcomp.h" +#include "zcomp_lzo.h" + +static struct zcomp_backend *find_backend(const char *compress) +{ + if (strncmp(compress, "lzo", 3) == 0) + return &zcomp_lzo; + return NULL; +} + +static void zcomp_strm_free(struct zcomp *comp, struct zcomp_strm *zstrm) +{ + if (zstrm->private) + comp->backend->destroy(zstrm->private); + free_pages((unsigned long)zstrm->buffer, 1); + kfree(zstrm); +} + +/* + * allocate new zcomp_strm structure with ->private initialized by + * backend, return NULL on error + */ +static struct zcomp_strm *zcomp_strm_alloc(struct zcomp *comp) +{ + struct zcomp_strm *zstrm = kmalloc(sizeof(*zstrm), GFP_KERNEL); + if (!zstrm) + return NULL; + + zstrm->private = comp->backend->create(); + /* + * allocate 2 pages. 1 for compressed data, plus 1 extra for the + * case when compressed size is larger than the original one + */ + zstrm->buffer = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, 1); + if (!zstrm->private || !zstrm->buffer) { + zcomp_strm_free(comp, zstrm); + zstrm = NULL; + } + return zstrm; +} + +struct zcomp_strm *zcomp_strm_find(struct zcomp *comp) +{ + mutex_lock(&comp->strm_lock); + return comp->zstrm; +} + +void zcomp_strm_release(struct zcomp *comp, struct zcomp_strm *zstrm) +{ + mutex_unlock(&comp->strm_lock); +} + +int zcomp_compress(struct zcomp *comp, struct zcomp_strm *zstrm, + const unsigned char *src, size_t *dst_len) +{ + return comp->backend->compress(src, zstrm->buffer, dst_len, + zstrm->private); +} + +int zcomp_decompress(struct zcomp *comp, const unsigned char *src, + size_t src_len, unsigned char *dst) +{ + return comp->backend->decompress(src, src_len, dst); +} + +void zcomp_destroy(struct zcomp *comp) +{ + zcomp_strm_free(comp, comp->zstrm); + kfree(comp); +} + +/* + * search available compressors for requested algorithm. + * allocate new zcomp and initialize it. return NULL + * if requested algorithm is not supported or in case + * of init error + */ +struct zcomp *zcomp_create(const char *compress) +{ + struct zcomp *comp; + struct zcomp_backend *backend; + + backend = find_backend(compress); + if (!backend) + return NULL; + + comp = kzalloc(sizeof(struct zcomp), GFP_KERNEL); + if (!comp) + return NULL; + + comp->backend = backend; + mutex_init(&comp->strm_lock); + + comp->zstrm = zcomp_strm_alloc(comp); + if (!comp->zstrm) { + kfree(comp); + return NULL; + } + return comp; +} diff --git a/drivers/block/zram/zcomp.h b/drivers/block/zram/zcomp.h new file mode 100644 index 000000000000..c9a98e1317fe --- /dev/null +++ b/drivers/block/zram/zcomp.h @@ -0,0 +1,58 @@ +/* + * Copyright (C) 2014 Sergey Senozhatsky. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#ifndef _ZCOMP_H_ +#define _ZCOMP_H_ + +#include + +struct zcomp_strm { + /* compression/decompression buffer */ + void *buffer; + /* + * The private data of the compression stream, only compression + * stream backend can touch this (e.g. compression algorithm + * working memory) + */ + void *private; +}; + +/* static compression backend */ +struct zcomp_backend { + int (*compress)(const unsigned char *src, unsigned char *dst, + size_t *dst_len, void *private); + + int (*decompress)(const unsigned char *src, size_t src_len, + unsigned char *dst); + + void *(*create)(void); + void (*destroy)(void *private); + + const char *name; +}; + +/* dynamic per-device compression frontend */ +struct zcomp { + struct mutex strm_lock; + struct zcomp_strm *zstrm; + struct zcomp_backend *backend; +}; + +struct zcomp *zcomp_create(const char *comp); +void zcomp_destroy(struct zcomp *comp); + +struct zcomp_strm *zcomp_strm_find(struct zcomp *comp); +void zcomp_strm_release(struct zcomp *comp, struct zcomp_strm *zstrm); + +int zcomp_compress(struct zcomp *comp, struct zcomp_strm *zstrm, + const unsigned char *src, size_t *dst_len); + +int zcomp_decompress(struct zcomp *comp, const unsigned char *src, + size_t src_len, unsigned char *dst); +#endif /* _ZCOMP_H_ */ diff --git a/drivers/block/zram/zcomp_lzo.c b/drivers/block/zram/zcomp_lzo.c new file mode 100644 index 000000000000..da1bc47d588e --- /dev/null +++ b/drivers/block/zram/zcomp_lzo.c @@ -0,0 +1,47 @@ +/* + * Copyright (C) 2014 Sergey Senozhatsky. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include +#include +#include + +#include "zcomp_lzo.h" + +static void *lzo_create(void) +{ + return kzalloc(LZO1X_MEM_COMPRESS, GFP_KERNEL); +} + +static void lzo_destroy(void *private) +{ + kfree(private); +} + +static int lzo_compress(const unsigned char *src, unsigned char *dst, + size_t *dst_len, void *private) +{ + int ret = lzo1x_1_compress(src, PAGE_SIZE, dst, dst_len, private); + return ret == LZO_E_OK ? 0 : ret; +} + +static int lzo_decompress(const unsigned char *src, size_t src_len, + unsigned char *dst) +{ + size_t dst_len = PAGE_SIZE; + int ret = lzo1x_decompress_safe(src, src_len, dst, &dst_len); + return ret == LZO_E_OK ? 0 : ret; +} + +struct zcomp_backend zcomp_lzo = { + .compress = lzo_compress, + .decompress = lzo_decompress, + .create = lzo_create, + .destroy = lzo_destroy, + .name = "lzo", +}; diff --git a/drivers/block/zram/zcomp_lzo.h b/drivers/block/zram/zcomp_lzo.h new file mode 100644 index 000000000000..128c5807fa14 --- /dev/null +++ b/drivers/block/zram/zcomp_lzo.h @@ -0,0 +1,17 @@ +/* + * Copyright (C) 2014 Sergey Senozhatsky. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#ifndef _ZCOMP_LZO_H_ +#define _ZCOMP_LZO_H_ + +#include "zcomp.h" + +extern struct zcomp_backend zcomp_lzo; + +#endif /* _ZCOMP_LZO_H_ */ From 92e9d71d940158389d3553a382045c3c9a7a7e98 Mon Sep 17 00:00:00 2001 From: Sergey Senozhatsky Date: Mon, 7 Apr 2014 15:38:12 -0700 Subject: [PATCH 195/277] zram: use zcomp compressing backends Do not perform direct LZO compress/decompress calls, initialise and use zcomp LZO backend (single compression stream) instead. [akpm@linux-foundation.org: resolve conflicts with zram-delete-zram_init_device-fix.patch] Signed-off-by: Sergey Senozhatsky Acked-by: Minchan Kim Cc: Jerome Marchand Cc: Nitin Gupta Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds (cherry picked from commit b7ca232ee7e85ed3b18e39eb20a7f458ee1d6047) Signed-off-by: Alex Shi --- drivers/block/zram/Makefile | 2 +- drivers/block/zram/zram_drv.c | 69 ++++++++++++++++------------------- drivers/block/zram/zram_drv.h | 8 ++-- 3 files changed, 36 insertions(+), 43 deletions(-) diff --git a/drivers/block/zram/Makefile b/drivers/block/zram/Makefile index cb0f9ced6a93..757c6a5cadff 100644 --- a/drivers/block/zram/Makefile +++ b/drivers/block/zram/Makefile @@ -1,3 +1,3 @@ -zram-y := zram_drv.o +zram-y := zcomp_lzo.o zcomp.o zram_drv.o obj-$(CONFIG_ZRAM) += zram.o diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index 9c71757aa1c6..9f5d2c2f9ea7 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -29,7 +29,6 @@ #include #include #include -#include #include #include @@ -38,6 +37,7 @@ /* Globals */ static int zram_major; static struct zram *zram_devices; +static const char *default_compressor = "lzo"; /* Module params (documentation at end) */ static unsigned int num_devices = 1; @@ -159,8 +159,6 @@ static inline int valid_io_request(struct zram *zram, struct bio *bio) static void zram_meta_free(struct zram_meta *meta) { zs_destroy_pool(meta->mem_pool); - kfree(meta->compress_workmem); - free_pages((unsigned long)meta->compress_buffer, 1); vfree(meta->table); kfree(meta); } @@ -172,22 +170,11 @@ static struct zram_meta *zram_meta_alloc(u64 disksize) if (!meta) goto out; - meta->compress_workmem = kzalloc(LZO1X_MEM_COMPRESS, GFP_KERNEL); - if (!meta->compress_workmem) - goto free_meta; - - meta->compress_buffer = - (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, 1); - if (!meta->compress_buffer) { - pr_err("Error allocating compressor buffer space\n"); - goto free_workmem; - } - num_pages = disksize >> PAGE_SHIFT; meta->table = vzalloc(num_pages * sizeof(*meta->table)); if (!meta->table) { pr_err("Error allocating zram address table\n"); - goto free_buffer; + goto free_meta; } meta->mem_pool = zs_create_pool(GFP_NOIO | __GFP_HIGHMEM); @@ -197,15 +184,10 @@ static struct zram_meta *zram_meta_alloc(u64 disksize) } rwlock_init(&meta->tb_lock); - mutex_init(&meta->buffer_lock); return meta; free_table: vfree(meta->table); -free_buffer: - free_pages((unsigned long)meta->compress_buffer, 1); -free_workmem: - kfree(meta->compress_workmem); free_meta: kfree(meta); meta = NULL; @@ -279,8 +261,7 @@ static void zram_free_page(struct zram *zram, size_t index) static int zram_decompress_page(struct zram *zram, char *mem, u32 index) { - int ret = LZO_E_OK; - size_t clen = PAGE_SIZE; + int ret = 0; unsigned char *cmem; struct zram_meta *meta = zram->meta; unsigned long handle; @@ -300,12 +281,12 @@ static int zram_decompress_page(struct zram *zram, char *mem, u32 index) if (size == PAGE_SIZE) copy_page(mem, cmem); else - ret = lzo1x_decompress_safe(cmem, size, mem, &clen); + ret = zcomp_decompress(zram->comp, cmem, size, mem); zs_unmap_object(meta->mem_pool, handle); read_unlock(&meta->tb_lock); /* Should NEVER happen. Return bio error if it does. */ - if (unlikely(ret != LZO_E_OK)) { + if (unlikely(ret)) { pr_err("Decompression failed! err=%d, page=%u\n", ret, index); atomic64_inc(&zram->stats.failed_reads); return ret; @@ -348,7 +329,7 @@ static int zram_bvec_read(struct zram *zram, struct bio_vec *bvec, ret = zram_decompress_page(zram, uncmem, index); /* Should NEVER happen. Return bio error if it does. */ - if (unlikely(ret != LZO_E_OK)) + if (unlikely(ret)) goto out_cleanup; if (is_partial_io(bvec)) @@ -373,11 +354,10 @@ static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec, u32 index, struct page *page; unsigned char *user_mem, *cmem, *src, *uncmem = NULL; struct zram_meta *meta = zram->meta; + struct zcomp_strm *zstrm; bool locked = false; page = bvec->bv_page; - src = meta->compress_buffer; - if (is_partial_io(bvec)) { /* * This is a partial IO. We need to read the full page @@ -393,7 +373,7 @@ static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec, u32 index, goto out; } - mutex_lock(&meta->buffer_lock); + zstrm = zcomp_strm_find(zram->comp); locked = true; user_mem = kmap_atomic(page); @@ -419,22 +399,20 @@ static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec, u32 index, goto out; } - ret = lzo1x_1_compress(uncmem, PAGE_SIZE, src, &clen, - meta->compress_workmem); + ret = zcomp_compress(zram->comp, zstrm, uncmem, &clen); if (!is_partial_io(bvec)) { kunmap_atomic(user_mem); user_mem = NULL; uncmem = NULL; } - if (unlikely(ret != LZO_E_OK)) { + if (unlikely(ret)) { pr_err("Compression failed! err=%d\n", ret); goto out; } - + src = zstrm->buffer; if (unlikely(clen > max_zpage_size)) { clen = PAGE_SIZE; - src = NULL; if (is_partial_io(bvec)) src = uncmem; } @@ -456,6 +434,8 @@ static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec, u32 index, memcpy(cmem, src, clen); } + zcomp_strm_release(zram->comp, zstrm); + locked = false; zs_unmap_object(meta->mem_pool, handle); /* @@ -474,10 +454,9 @@ static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec, u32 index, atomic64_inc(&zram->stats.pages_stored); out: if (locked) - mutex_unlock(&meta->buffer_lock); + zcomp_strm_release(zram->comp, zstrm); if (is_partial_io(bvec)) kfree(uncmem); - if (ret) atomic64_inc(&zram->stats.failed_writes); return ret; @@ -521,6 +500,7 @@ static void zram_reset_device(struct zram *zram, bool reset_capacity) zs_free(meta->mem_pool, handle); } + zcomp_destroy(zram->comp); zram_meta_free(zram->meta); zram->meta = NULL; /* Reset stats */ @@ -538,6 +518,7 @@ static ssize_t disksize_store(struct device *dev, u64 disksize; struct zram_meta *meta; struct zram *zram = dev_to_zram(dev); + int err; disksize = memparse(buf, NULL); if (!disksize) @@ -550,10 +531,17 @@ static ssize_t disksize_store(struct device *dev, down_write(&zram->init_lock); if (init_done(zram)) { - zram_meta_free(meta); - up_write(&zram->init_lock); pr_info("Cannot change disksize for initialized device\n"); - return -EBUSY; + err = -EBUSY; + goto out_free_meta; + } + + zram->comp = zcomp_create(default_compressor); + if (!zram->comp) { + pr_info("Cannot initialise %s compressing backend\n", + default_compressor); + err = -EINVAL; + goto out_free_meta; } zram->meta = meta; @@ -562,6 +550,11 @@ static ssize_t disksize_store(struct device *dev, up_write(&zram->init_lock); return len; + +out_free_meta: + up_write(&zram->init_lock); + zram_meta_free(meta); + return err; } static ssize_t reset_store(struct device *dev, diff --git a/drivers/block/zram/zram_drv.h b/drivers/block/zram/zram_drv.h index 1d5b1f5786a8..45e04f7b713f 100644 --- a/drivers/block/zram/zram_drv.h +++ b/drivers/block/zram/zram_drv.h @@ -16,9 +16,10 @@ #define _ZRAM_DRV_H_ #include -#include #include +#include "zcomp.h" + /* * Some arbitrary value. This is just to catch * invalid value for num_devices module parameter. @@ -81,17 +82,16 @@ struct zram_stats { struct zram_meta { rwlock_t tb_lock; /* protect table */ - void *compress_workmem; - void *compress_buffer; struct table *table; struct zs_pool *mem_pool; - struct mutex buffer_lock; /* protect compress buffers */ }; struct zram { struct zram_meta *meta; struct request_queue *queue; struct gendisk *disk; + struct zcomp *comp; + /* Prevent concurrent execution of device init, reset and R/W request */ struct rw_semaphore init_lock; /* From 9381faeb8bc1d1fdfb6931b69ee70a594d04c4fe Mon Sep 17 00:00:00 2001 From: Sergey Senozhatsky Date: Mon, 7 Apr 2014 15:38:13 -0700 Subject: [PATCH 196/277] zram: factor out single stream compression This is preparation patch to add multi stream support to zcomp. Introduce struct zcomp_strm_single and a set of functions to manage zcomp_strm stream access. zcomp_strm_single implements single compession stream, same way as current zcomp implementation. This moves zcomp_strm stream control and locking from zcomp, so compressing backend zcomp is not aware of required locking. Single and multi streams require different locking schemes. Minchan Kim reported that spinlock-based locking scheme (which is used in multi stream implementation) has demonstrated a severe perfomance regression for single compression stream case, comparing to mutex-based. see https://lkml.org/lkml/2014/2/18/16 The following set of functions added: - zcomp_strm_single_find()/zcomp_strm_single_release() find and release a compression stream, implement required locking - zcomp_strm_single_create()/zcomp_strm_single_destroy() create and destroy zcomp_strm_single New ->strm_find() and ->strm_release() callbacks added to zcomp, which are set to zcomp_strm_single_find() and zcomp_strm_single_release() during initialisation. Instead of direct locking and zcomp_strm access from zcomp_strm_find() and zcomp_strm_release(), zcomp now calls ->strm_find() and ->strm_release() correspondingly. Signed-off-by: Sergey Senozhatsky Acked-by: Minchan Kim Cc: Jerome Marchand Cc: Nitin Gupta Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds (cherry picked from commit 9cc97529a180b369fcb7e5265771b6ba7e01f05b) Signed-off-by: Alex Shi --- drivers/block/zram/zcomp.c | 62 +++++++++++++++++++++++++++++++++----- drivers/block/zram/zcomp.h | 7 +++-- 2 files changed, 59 insertions(+), 10 deletions(-) diff --git a/drivers/block/zram/zcomp.c b/drivers/block/zram/zcomp.c index 22f4ae235660..72e8071f9d73 100644 --- a/drivers/block/zram/zcomp.c +++ b/drivers/block/zram/zcomp.c @@ -16,6 +16,14 @@ #include "zcomp.h" #include "zcomp_lzo.h" +/* + * single zcomp_strm backend + */ +struct zcomp_strm_single { + struct mutex strm_lock; + struct zcomp_strm *zstrm; +}; + static struct zcomp_backend *find_backend(const char *compress) { if (strncmp(compress, "lzo", 3) == 0) @@ -54,15 +62,56 @@ static struct zcomp_strm *zcomp_strm_alloc(struct zcomp *comp) return zstrm; } +static struct zcomp_strm *zcomp_strm_single_find(struct zcomp *comp) +{ + struct zcomp_strm_single *zs = comp->stream; + mutex_lock(&zs->strm_lock); + return zs->zstrm; +} + +static void zcomp_strm_single_release(struct zcomp *comp, + struct zcomp_strm *zstrm) +{ + struct zcomp_strm_single *zs = comp->stream; + mutex_unlock(&zs->strm_lock); +} + +static void zcomp_strm_single_destroy(struct zcomp *comp) +{ + struct zcomp_strm_single *zs = comp->stream; + zcomp_strm_free(comp, zs->zstrm); + kfree(zs); +} + +static int zcomp_strm_single_create(struct zcomp *comp) +{ + struct zcomp_strm_single *zs; + + comp->destroy = zcomp_strm_single_destroy; + comp->strm_find = zcomp_strm_single_find; + comp->strm_release = zcomp_strm_single_release; + zs = kmalloc(sizeof(struct zcomp_strm_single), GFP_KERNEL); + if (!zs) + return -ENOMEM; + + comp->stream = zs; + mutex_init(&zs->strm_lock); + zs->zstrm = zcomp_strm_alloc(comp); + if (!zs->zstrm) { + kfree(zs); + return -ENOMEM; + } + return 0; +} + struct zcomp_strm *zcomp_strm_find(struct zcomp *comp) { - mutex_lock(&comp->strm_lock); - return comp->zstrm; + return comp->strm_find(comp); } void zcomp_strm_release(struct zcomp *comp, struct zcomp_strm *zstrm) { - mutex_unlock(&comp->strm_lock); + comp->strm_release(comp, zstrm); } int zcomp_compress(struct zcomp *comp, struct zcomp_strm *zstrm, @@ -80,7 +129,7 @@ int zcomp_decompress(struct zcomp *comp, const unsigned char *src, void zcomp_destroy(struct zcomp *comp) { - zcomp_strm_free(comp, comp->zstrm); + comp->destroy(comp); kfree(comp); } @@ -104,10 +153,7 @@ struct zcomp *zcomp_create(const char *compress) return NULL; comp->backend = backend; - mutex_init(&comp->strm_lock); - - comp->zstrm = zcomp_strm_alloc(comp); - if (!comp->zstrm) { + if (zcomp_strm_single_create(comp) != 0) { kfree(comp); return NULL; } diff --git a/drivers/block/zram/zcomp.h b/drivers/block/zram/zcomp.h index c9a98e1317fe..dc3500d842a3 100644 --- a/drivers/block/zram/zcomp.h +++ b/drivers/block/zram/zcomp.h @@ -39,9 +39,12 @@ struct zcomp_backend { /* dynamic per-device compression frontend */ struct zcomp { - struct mutex strm_lock; - struct zcomp_strm *zstrm; + void *stream; struct zcomp_backend *backend; + + struct zcomp_strm *(*strm_find)(struct zcomp *comp); + void (*strm_release)(struct zcomp *comp, struct zcomp_strm *zstrm); + void (*destroy)(struct zcomp *comp); }; struct zcomp *zcomp_create(const char *comp); From de980e9147bd30182894d0bb6778fbd0c8a7f293 Mon Sep 17 00:00:00 2001 From: Sergey Senozhatsky Date: Mon, 7 Apr 2014 15:38:14 -0700 Subject: [PATCH 197/277] zram: add multi stream functionality Existing zram (zcomp) implementation has only one compression stream (buffer and algorithm private part), so in order to prevent data corruption only one write (compress operation) can use this compression stream, forcing all concurrent write operations to wait for stream lock to be released. This patch changes zcomp to keep a compression streams list of user-defined size (via sysfs device attr). Each write operation still exclusively holds compression stream, the difference is that we can have N write operations (depending on size of streams list) executing in parallel. See TEST section later in commit message for performance data. Introduce struct zcomp_strm_multi and a set of functions to manage zcomp_strm stream access. zcomp_strm_multi has a list of idle zcomp_strm structs, spinlock to protect idle list and wait queue, making it possible to perform parallel compressions. The following set of functions added: - zcomp_strm_multi_find()/zcomp_strm_multi_release() find and release a compression stream, implement required locking - zcomp_strm_multi_create()/zcomp_strm_multi_destroy() create and destroy zcomp_strm_multi zcomp ->strm_find() and ->strm_release() callbacks are set during initialisation to zcomp_strm_multi_find()/zcomp_strm_multi_release() correspondingly. Each time zcomp issues a zcomp_strm_multi_find() call, the following set of operations performed: - spin lock strm_lock - if idle list is not empty, remove zcomp_strm from idle list, spin unlock and return zcomp stream pointer to caller - if idle list is empty, current adds itself to wait queue. it will be awaken by zcomp_strm_multi_release() caller. zcomp_strm_multi_release(): - spin lock strm_lock - add zcomp stream to idle list - spin unlock, wake up sleeper Minchan Kim reported that spinlock-based locking scheme has demonstrated a severe perfomance regression for single compression stream case, comparing to mutex-based (see https://lkml.org/lkml/2014/2/18/16) base spinlock mutex ==Initial write ==Initial write ==Initial write records: 5 records: 5 records: 5 avg: 1642424.35 avg: 699610.40 avg: 1655583.71 std: 39890.95(2.43%) std: 232014.19(33.16%) std: 52293.96 max: 1690170.94 max: 1163473.45 max: 1697164.75 min: 1568669.52 min: 573429.88 min: 1553410.23 ==Rewrite ==Rewrite ==Rewrite records: 5 records: 5 records: 5 avg: 1611775.39 avg: 501406.64 avg: 1684419.11 std: 17144.58(1.06%) std: 15354.41(3.06%) std: 18367.42 max: 1641800.95 max: 531356.78 max: 1706445.84 min: 1593515.27 min: 488817.78 min: 1655335.73 When only one compression stream available, mutex with spin on owner tends to perform much better than frequent wait_event()/wake_up(). This is why single stream implemented as a special case with mutex locking. Introduce and document zram device attribute max_comp_streams. This attr shows and stores current zcomp's max number of zcomp streams (max_strm). Extend zcomp's zcomp_create() with `max_strm' parameter. `max_strm' limits the number of zcomp_strm structs in compression backend's idle list (max_comp_streams). max_comp_streams used during initialisation as follows: -- passing to zcomp_create() max_strm equals to 1 will initialise zcomp using single compression stream zcomp_strm_single (mutex-based locking). -- passing to zcomp_create() max_strm greater than 1 will initialise zcomp using multi compression stream zcomp_strm_multi (spinlock-based locking). default max_comp_streams value is 1, meaning that zram with single stream will be initialised. Later patch will introduce configuration knob to change max_comp_streams on already initialised and used zcomp. TEST iozone -t 3 -R -r 16K -s 60M -I +Z test base 1 strm (mutex) 3 strm (spinlock) ----------------------------------------------------------------------- Initial write 589286.78 583518.39 718011.05 Rewrite 604837.97 596776.38 1515125.72 Random write 584120.11 595714.58 1388850.25 Pwrite 535731.17 541117.38 739295.27 Fwrite 1418083.88 1478612.72 1484927.06 Usage example: set max_comp_streams to 4 echo 4 > /sys/block/zram0/max_comp_streams show current max_comp_streams (default value is 1). cat /sys/block/zram0/max_comp_streams Signed-off-by: Sergey Senozhatsky Acked-by: Minchan Kim Cc: Jerome Marchand Cc: Nitin Gupta Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds (cherry picked from commit beca3ec71fe5490ee9237dc42400f50402baf83e) Signed-off-by: Alex Shi --- Documentation/ABI/testing/sysfs-block-zram | 22 ++++ Documentation/blockdev/zram.txt | 31 +++++- drivers/block/zram/zcomp.c | 124 ++++++++++++++++++++- drivers/block/zram/zcomp.h | 4 +- drivers/block/zram/zram_drv.c | 42 ++++++- drivers/block/zram/zram_drv.h | 2 +- 6 files changed, 215 insertions(+), 10 deletions(-) diff --git a/Documentation/ABI/testing/sysfs-block-zram b/Documentation/ABI/testing/sysfs-block-zram index ec93fe33baa6..d67f0bb1c726 100644 --- a/Documentation/ABI/testing/sysfs-block-zram +++ b/Documentation/ABI/testing/sysfs-block-zram @@ -42,6 +42,28 @@ Description: The invalid_io file is read-only and specifies the number of non-page-size-aligned I/O requests issued to this device. +What: /sys/block/zram/failed_reads +Date: February 2014 +Contact: Sergey Senozhatsky +Description: + The failed_reads file is read-only and specifies the number of + failed reads happened on this device. + +What: /sys/block/zram/failed_writes +Date: February 2014 +Contact: Sergey Senozhatsky +Description: + The failed_writes file is read-only and specifies the number of + failed writes happened on this device. + +What: /sys/block/zram/max_comp_streams +Date: February 2014 +Contact: Sergey Senozhatsky +Description: + The max_comp_streams file is read-write and specifies the + number of backend's zcomp_strm compression streams (number of + concurrent compress operations). + What: /sys/block/zram/notify_free Date: August 2010 Contact: Nitin Gupta diff --git a/Documentation/blockdev/zram.txt b/Documentation/blockdev/zram.txt index 393541be1ec0..e82c03f26f31 100644 --- a/Documentation/blockdev/zram.txt +++ b/Documentation/blockdev/zram.txt @@ -21,7 +21,28 @@ Following shows a typical sequence of steps for using zram. This creates 4 devices: /dev/zram{0,1,2,3} (num_devices parameter is optional. Default: 1) -2) Set Disksize +2) Set max number of compression streams + Compression backend may use up to max_comp_streams compression streams, + thus allowing up to max_comp_streams concurrent compression operations. + By default, compression backend uses single compression stream. + + Examples: + #show max compression streams number + cat /sys/block/zram0/max_comp_streams + + #set max compression streams number to 3 + echo 3 > /sys/block/zram0/max_comp_streams + +Note: +In order to enable compression backend's multi stream support max_comp_streams +must be initially set to desired concurrency level before ZRAM device +initialisation. Once the device initialised as a single stream compression +backend (max_comp_streams equals to 0) changing the value of max_comp_streams +will not take any effect, because single stream compression backend implemented +as a special case and does not support dynamic max_comp_streams. Only multi +stream backend supports dynamic max_comp_streams adjustment. + +3) Set Disksize Set disk size by writing the value to sysfs node 'disksize'. The value can be either in bytes or you can use mem suffixes. Examples: @@ -38,14 +59,14 @@ There is little point creating a zram of greater than twice the size of memory since we expect a 2:1 compression ratio. Note that zram uses about 0.1% of the size of the disk when not in use so a huge zram is wasteful. -3) Activate: +4) Activate: mkswap /dev/zram0 swapon /dev/zram0 mkfs.ext4 /dev/zram1 mount /dev/zram1 /tmp -4) Stats: +5) Stats: Per-device statistics are exported as various nodes under /sys/block/zram/ disksize @@ -59,11 +80,11 @@ size of the disk when not in use so a huge zram is wasteful. compr_data_size mem_used_total -5) Deactivate: +6) Deactivate: swapoff /dev/zram0 umount /dev/zram1 -6) Reset: +7) Reset: Write any positive value to 'reset' sysfs node echo 1 > /sys/block/zram0/reset echo 1 > /sys/block/zram1/reset diff --git a/drivers/block/zram/zcomp.c b/drivers/block/zram/zcomp.c index 72e8071f9d73..c06f75f54718 100644 --- a/drivers/block/zram/zcomp.c +++ b/drivers/block/zram/zcomp.c @@ -24,6 +24,21 @@ struct zcomp_strm_single { struct zcomp_strm *zstrm; }; +/* + * multi zcomp_strm backend + */ +struct zcomp_strm_multi { + /* protect strm list */ + spinlock_t strm_lock; + /* max possible number of zstrm streams */ + int max_strm; + /* number of available zstrm streams */ + int avail_strm; + /* list of available strms */ + struct list_head idle_strm; + wait_queue_head_t strm_wait; +}; + static struct zcomp_backend *find_backend(const char *compress) { if (strncmp(compress, "lzo", 3) == 0) @@ -62,6 +77,107 @@ static struct zcomp_strm *zcomp_strm_alloc(struct zcomp *comp) return zstrm; } +/* + * get idle zcomp_strm or wait until other process release + * (zcomp_strm_release()) one for us + */ +static struct zcomp_strm *zcomp_strm_multi_find(struct zcomp *comp) +{ + struct zcomp_strm_multi *zs = comp->stream; + struct zcomp_strm *zstrm; + + while (1) { + spin_lock(&zs->strm_lock); + if (!list_empty(&zs->idle_strm)) { + zstrm = list_entry(zs->idle_strm.next, + struct zcomp_strm, list); + list_del(&zstrm->list); + spin_unlock(&zs->strm_lock); + return zstrm; + } + /* zstrm streams limit reached, wait for idle stream */ + if (zs->avail_strm >= zs->max_strm) { + spin_unlock(&zs->strm_lock); + wait_event(zs->strm_wait, !list_empty(&zs->idle_strm)); + continue; + } + /* allocate new zstrm stream */ + zs->avail_strm++; + spin_unlock(&zs->strm_lock); + + zstrm = zcomp_strm_alloc(comp); + if (!zstrm) { + spin_lock(&zs->strm_lock); + zs->avail_strm--; + spin_unlock(&zs->strm_lock); + wait_event(zs->strm_wait, !list_empty(&zs->idle_strm)); + continue; + } + break; + } + return zstrm; +} + +/* add stream back to idle list and wake up waiter or free the stream */ +static void zcomp_strm_multi_release(struct zcomp *comp, struct zcomp_strm *zstrm) +{ + struct zcomp_strm_multi *zs = comp->stream; + + spin_lock(&zs->strm_lock); + if (zs->avail_strm <= zs->max_strm) { + list_add(&zstrm->list, &zs->idle_strm); + spin_unlock(&zs->strm_lock); + wake_up(&zs->strm_wait); + return; + } + + zs->avail_strm--; + spin_unlock(&zs->strm_lock); + zcomp_strm_free(comp, zstrm); +} + +static void zcomp_strm_multi_destroy(struct zcomp *comp) +{ + struct zcomp_strm_multi *zs = comp->stream; + struct zcomp_strm *zstrm; + + while (!list_empty(&zs->idle_strm)) { + zstrm = list_entry(zs->idle_strm.next, + struct zcomp_strm, list); + list_del(&zstrm->list); + zcomp_strm_free(comp, zstrm); + } + kfree(zs); +} + +static int zcomp_strm_multi_create(struct zcomp *comp, int max_strm) +{ + struct zcomp_strm *zstrm; + struct zcomp_strm_multi *zs; + + comp->destroy = zcomp_strm_multi_destroy; + comp->strm_find = zcomp_strm_multi_find; + comp->strm_release = zcomp_strm_multi_release; + zs = kmalloc(sizeof(struct zcomp_strm_multi), GFP_KERNEL); + if (!zs) + return -ENOMEM; + + comp->stream = zs; + spin_lock_init(&zs->strm_lock); + INIT_LIST_HEAD(&zs->idle_strm); + init_waitqueue_head(&zs->strm_wait); + zs->max_strm = max_strm; + zs->avail_strm = 1; + + zstrm = zcomp_strm_alloc(comp); + if (!zstrm) { + kfree(zs); + return -ENOMEM; + } + list_add(&zstrm->list, &zs->idle_strm); + return 0; +} + static struct zcomp_strm *zcomp_strm_single_find(struct zcomp *comp) { struct zcomp_strm_single *zs = comp->stream; @@ -139,7 +255,7 @@ void zcomp_destroy(struct zcomp *comp) * if requested algorithm is not supported or in case * of init error */ -struct zcomp *zcomp_create(const char *compress) +struct zcomp *zcomp_create(const char *compress, int max_strm) { struct zcomp *comp; struct zcomp_backend *backend; @@ -153,7 +269,11 @@ struct zcomp *zcomp_create(const char *compress) return NULL; comp->backend = backend; - if (zcomp_strm_single_create(comp) != 0) { + if (max_strm > 1) + zcomp_strm_multi_create(comp, max_strm); + else + zcomp_strm_single_create(comp); + if (!comp->stream) { kfree(comp); return NULL; } diff --git a/drivers/block/zram/zcomp.h b/drivers/block/zram/zcomp.h index dc3500d842a3..2a3684446160 100644 --- a/drivers/block/zram/zcomp.h +++ b/drivers/block/zram/zcomp.h @@ -21,6 +21,8 @@ struct zcomp_strm { * working memory) */ void *private; + /* used in multi stream backend, protected by backend strm_lock */ + struct list_head list; }; /* static compression backend */ @@ -47,7 +49,7 @@ struct zcomp { void (*destroy)(struct zcomp *comp); }; -struct zcomp *zcomp_create(const char *comp); +struct zcomp *zcomp_create(const char *comp, int max_strm); void zcomp_destroy(struct zcomp *comp); struct zcomp_strm *zcomp_strm_find(struct zcomp *comp); diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index 9f5d2c2f9ea7..88ee317ce6d2 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -108,6 +108,40 @@ static ssize_t mem_used_total_show(struct device *dev, return sprintf(buf, "%llu\n", val); } +static ssize_t max_comp_streams_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + int val; + struct zram *zram = dev_to_zram(dev); + + down_read(&zram->init_lock); + val = zram->max_comp_streams; + up_read(&zram->init_lock); + + return sprintf(buf, "%d\n", val); +} + +static ssize_t max_comp_streams_store(struct device *dev, + struct device_attribute *attr, const char *buf, size_t len) +{ + int num; + struct zram *zram = dev_to_zram(dev); + + if (kstrtoint(buf, 0, &num)) + return -EINVAL; + if (num < 1) + return -EINVAL; + down_write(&zram->init_lock); + if (init_done(zram)) { + up_write(&zram->init_lock); + pr_info("Can't set max_comp_streams for initialized device\n"); + return -EBUSY; + } + zram->max_comp_streams = num; + up_write(&zram->init_lock); + return len; +} + /* flag operations needs meta->tb_lock */ static int zram_test_flag(struct zram_meta *meta, u32 index, enum zram_pageflags flag) @@ -501,6 +535,8 @@ static void zram_reset_device(struct zram *zram, bool reset_capacity) } zcomp_destroy(zram->comp); + zram->max_comp_streams = 1; + zram_meta_free(zram->meta); zram->meta = NULL; /* Reset stats */ @@ -536,7 +572,7 @@ static ssize_t disksize_store(struct device *dev, goto out_free_meta; } - zram->comp = zcomp_create(default_compressor); + zram->comp = zcomp_create(default_compressor, zram->max_comp_streams); if (!zram->comp) { pr_info("Cannot initialise %s compressing backend\n", default_compressor); @@ -696,6 +732,8 @@ static DEVICE_ATTR(initstate, S_IRUGO, initstate_show, NULL); static DEVICE_ATTR(reset, S_IWUSR, NULL, reset_store); static DEVICE_ATTR(orig_data_size, S_IRUGO, orig_data_size_show, NULL); static DEVICE_ATTR(mem_used_total, S_IRUGO, mem_used_total_show, NULL); +static DEVICE_ATTR(max_comp_streams, S_IRUGO | S_IWUSR, + max_comp_streams_show, max_comp_streams_store); ZRAM_ATTR_RO(num_reads); ZRAM_ATTR_RO(num_writes); @@ -720,6 +758,7 @@ static struct attribute *zram_disk_attrs[] = { &dev_attr_orig_data_size.attr, &dev_attr_compr_data_size.attr, &dev_attr_mem_used_total.attr, + &dev_attr_max_comp_streams.attr, NULL, }; @@ -782,6 +821,7 @@ static int create_device(struct zram *zram, int device_id) } zram->meta = NULL; + zram->max_comp_streams = 1; return 0; out_free_disk: diff --git a/drivers/block/zram/zram_drv.h b/drivers/block/zram/zram_drv.h index 45e04f7b713f..ccf36d11755a 100644 --- a/drivers/block/zram/zram_drv.h +++ b/drivers/block/zram/zram_drv.h @@ -99,7 +99,7 @@ struct zram { * we can store in a disk. */ u64 disksize; /* bytes */ - + int max_comp_streams; struct zram_stats stats; }; #endif From 9634c9a147869de6d78f53a49823980aaea7233c Mon Sep 17 00:00:00 2001 From: Sergey Senozhatsky Date: Mon, 7 Apr 2014 15:38:15 -0700 Subject: [PATCH 198/277] zram: add set_max_streams knob This patch allows to change max_comp_streams on initialised zcomp. Introduce zcomp set_max_streams() knob, zcomp_strm_multi_set_max_streams() and zcomp_strm_single_set_max_streams() callbacks to change streams limit for zcomp_strm_multi and zcomp_strm_single, accordingly. set_max_streams for single steam zcomp does nothing. If user has lowered the limit, then zcomp_strm_multi_set_max_streams() attempts to immediately free extra streams (as much as it can, depending on idle streams availability). Note, this patch does not allow to change stream 'policy' from single to multi stream (or vice versa) on already initialised compression backend. Signed-off-by: Sergey Senozhatsky Acked-by: Minchan Kim Cc: Jerome Marchand Cc: Nitin Gupta Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds (cherry picked from commit fe8eb122c82b2049c460fc6df6e8583a2f935cff) Signed-off-by: Alex Shi --- drivers/block/zram/zcomp.c | 36 +++++++++++++++++++++++++++++++++++ drivers/block/zram/zcomp.h | 3 +++ drivers/block/zram/zram_drv.c | 5 ++--- 3 files changed, 41 insertions(+), 3 deletions(-) diff --git a/drivers/block/zram/zcomp.c b/drivers/block/zram/zcomp.c index c06f75f54718..ac276f79f21c 100644 --- a/drivers/block/zram/zcomp.c +++ b/drivers/block/zram/zcomp.c @@ -136,6 +136,29 @@ static void zcomp_strm_multi_release(struct zcomp *comp, struct zcomp_strm *zstr zcomp_strm_free(comp, zstrm); } +/* change max_strm limit */ +static int zcomp_strm_multi_set_max_streams(struct zcomp *comp, int num_strm) +{ + struct zcomp_strm_multi *zs = comp->stream; + struct zcomp_strm *zstrm; + + spin_lock(&zs->strm_lock); + zs->max_strm = num_strm; + /* + * if user has lowered the limit and there are idle streams, + * immediately free as much streams (and memory) as we can. + */ + while (zs->avail_strm > num_strm && !list_empty(&zs->idle_strm)) { + zstrm = list_entry(zs->idle_strm.next, + struct zcomp_strm, list); + list_del(&zstrm->list); + zcomp_strm_free(comp, zstrm); + zs->avail_strm--; + } + spin_unlock(&zs->strm_lock); + return 0; +} + static void zcomp_strm_multi_destroy(struct zcomp *comp) { struct zcomp_strm_multi *zs = comp->stream; @@ -158,6 +181,7 @@ static int zcomp_strm_multi_create(struct zcomp *comp, int max_strm) comp->destroy = zcomp_strm_multi_destroy; comp->strm_find = zcomp_strm_multi_find; comp->strm_release = zcomp_strm_multi_release; + comp->set_max_streams = zcomp_strm_multi_set_max_streams; zs = kmalloc(sizeof(struct zcomp_strm_multi), GFP_KERNEL); if (!zs) return -ENOMEM; @@ -192,6 +216,12 @@ static void zcomp_strm_single_release(struct zcomp *comp, mutex_unlock(&zs->strm_lock); } +static int zcomp_strm_single_set_max_streams(struct zcomp *comp, int num_strm) +{ + /* zcomp_strm_single support only max_comp_streams == 1 */ + return -ENOTSUPP; +} + static void zcomp_strm_single_destroy(struct zcomp *comp) { struct zcomp_strm_single *zs = comp->stream; @@ -206,6 +236,7 @@ static int zcomp_strm_single_create(struct zcomp *comp) comp->destroy = zcomp_strm_single_destroy; comp->strm_find = zcomp_strm_single_find; comp->strm_release = zcomp_strm_single_release; + comp->set_max_streams = zcomp_strm_single_set_max_streams; zs = kmalloc(sizeof(struct zcomp_strm_single), GFP_KERNEL); if (!zs) return -ENOMEM; @@ -220,6 +251,11 @@ static int zcomp_strm_single_create(struct zcomp *comp) return 0; } +int zcomp_set_max_streams(struct zcomp *comp, int num_strm) +{ + return comp->set_max_streams(comp, num_strm); +} + struct zcomp_strm *zcomp_strm_find(struct zcomp *comp) { return comp->strm_find(comp); diff --git a/drivers/block/zram/zcomp.h b/drivers/block/zram/zcomp.h index 2a3684446160..bd11d59c5dd1 100644 --- a/drivers/block/zram/zcomp.h +++ b/drivers/block/zram/zcomp.h @@ -46,6 +46,7 @@ struct zcomp { struct zcomp_strm *(*strm_find)(struct zcomp *comp); void (*strm_release)(struct zcomp *comp, struct zcomp_strm *zstrm); + int (*set_max_streams)(struct zcomp *comp, int num_strm); void (*destroy)(struct zcomp *comp); }; @@ -60,4 +61,6 @@ int zcomp_compress(struct zcomp *comp, struct zcomp_strm *zstrm, int zcomp_decompress(struct zcomp *comp, const unsigned char *src, size_t src_len, unsigned char *dst); + +int zcomp_set_max_streams(struct zcomp *comp, int num_strm); #endif /* _ZCOMP_H_ */ diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index 88ee317ce6d2..c03d0053309b 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -133,9 +133,8 @@ static ssize_t max_comp_streams_store(struct device *dev, return -EINVAL; down_write(&zram->init_lock); if (init_done(zram)) { - up_write(&zram->init_lock); - pr_info("Can't set max_comp_streams for initialized device\n"); - return -EBUSY; + if (zcomp_set_max_streams(zram->comp, num)) + pr_info("Cannot change max compression streams\n"); } zram->max_comp_streams = num; up_write(&zram->init_lock); From 0626b80d185372c3b9f28f5c6c5e11647f381472 Mon Sep 17 00:00:00 2001 From: Sergey Senozhatsky Date: Mon, 7 Apr 2014 15:38:17 -0700 Subject: [PATCH 199/277] zram: make compression algorithm selection possible Add and document `comp_algorithm' device attribute. This attribute allows to show supported compression and currently selected compression algorithms: cat /sys/block/zram0/comp_algorithm [lzo] lz4 and change selected compression algorithm: echo lzo > /sys/block/zram0/comp_algorithm Signed-off-by: Sergey Senozhatsky Acked-by: Minchan Kim Cc: Jerome Marchand Cc: Nitin Gupta Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds (cherry picked from commit e46b8a030d76d3c94156c545c3f4c3676d813435) Signed-off-by: Alex Shi --- Documentation/ABI/testing/sysfs-block-zram | 8 +++++ Documentation/blockdev/zram.txt | 24 +++++++++++--- drivers/block/zram/zcomp.c | 32 +++++++++++++++++-- drivers/block/zram/zcomp.h | 2 ++ drivers/block/zram/zram_drv.c | 37 ++++++++++++++++++++-- drivers/block/zram/zram_drv.h | 1 + 6 files changed, 93 insertions(+), 11 deletions(-) diff --git a/Documentation/ABI/testing/sysfs-block-zram b/Documentation/ABI/testing/sysfs-block-zram index d67f0bb1c726..2775966c2d12 100644 --- a/Documentation/ABI/testing/sysfs-block-zram +++ b/Documentation/ABI/testing/sysfs-block-zram @@ -64,6 +64,14 @@ Description: number of backend's zcomp_strm compression streams (number of concurrent compress operations). +What: /sys/block/zram/comp_algorithm +Date: February 2014 +Contact: Sergey Senozhatsky +Description: + The comp_algorithm file is read-write and lets to show + available and selected compression algorithms, change + compression algorithm selection. + What: /sys/block/zram/notify_free Date: August 2010 Contact: Nitin Gupta diff --git a/Documentation/blockdev/zram.txt b/Documentation/blockdev/zram.txt index e82c03f26f31..4ab2ce98f63c 100644 --- a/Documentation/blockdev/zram.txt +++ b/Documentation/blockdev/zram.txt @@ -42,7 +42,21 @@ will not take any effect, because single stream compression backend implemented as a special case and does not support dynamic max_comp_streams. Only multi stream backend supports dynamic max_comp_streams adjustment. -3) Set Disksize +3) Select compression algorithm + Using comp_algorithm device attribute one can see available and + currently selected (shown in square brackets) compression algortithms, + change selected compression algorithm (once the device is initialised + there is no way to change compression algorithm). + + Examples: + #show supported compression algorithms + cat /sys/block/zram0/comp_algorithm + lzo [lz4] + + #select lzo compression algorithm + echo lzo > /sys/block/zram0/comp_algorithm + +4) Set Disksize Set disk size by writing the value to sysfs node 'disksize'. The value can be either in bytes or you can use mem suffixes. Examples: @@ -59,14 +73,14 @@ There is little point creating a zram of greater than twice the size of memory since we expect a 2:1 compression ratio. Note that zram uses about 0.1% of the size of the disk when not in use so a huge zram is wasteful. -4) Activate: +5) Activate: mkswap /dev/zram0 swapon /dev/zram0 mkfs.ext4 /dev/zram1 mount /dev/zram1 /tmp -5) Stats: +6) Stats: Per-device statistics are exported as various nodes under /sys/block/zram/ disksize @@ -80,11 +94,11 @@ size of the disk when not in use so a huge zram is wasteful. compr_data_size mem_used_total -6) Deactivate: +7) Deactivate: swapoff /dev/zram0 umount /dev/zram1 -7) Reset: +8) Reset: Write any positive value to 'reset' sysfs node echo 1 > /sys/block/zram0/reset echo 1 > /sys/block/zram1/reset diff --git a/drivers/block/zram/zcomp.c b/drivers/block/zram/zcomp.c index ac276f79f21c..aad533a8bc55 100644 --- a/drivers/block/zram/zcomp.c +++ b/drivers/block/zram/zcomp.c @@ -39,11 +39,20 @@ struct zcomp_strm_multi { wait_queue_head_t strm_wait; }; +static struct zcomp_backend *backends[] = { + &zcomp_lzo, + NULL +}; + static struct zcomp_backend *find_backend(const char *compress) { - if (strncmp(compress, "lzo", 3) == 0) - return &zcomp_lzo; - return NULL; + int i = 0; + while (backends[i]) { + if (sysfs_streq(compress, backends[i]->name)) + break; + i++; + } + return backends[i]; } static void zcomp_strm_free(struct zcomp *comp, struct zcomp_strm *zstrm) @@ -251,6 +260,23 @@ static int zcomp_strm_single_create(struct zcomp *comp) return 0; } +/* show available compressors */ +ssize_t zcomp_available_show(const char *comp, char *buf) +{ + ssize_t sz = 0; + int i = 0; + + while (backends[i]) { + if (sysfs_streq(comp, backends[i]->name)) + sz += sprintf(buf + sz, "[%s] ", backends[i]->name); + else + sz += sprintf(buf + sz, "%s ", backends[i]->name); + i++; + } + sz += sprintf(buf + sz, "\n"); + return sz; +} + int zcomp_set_max_streams(struct zcomp *comp, int num_strm) { return comp->set_max_streams(comp, num_strm); diff --git a/drivers/block/zram/zcomp.h b/drivers/block/zram/zcomp.h index bd11d59c5dd1..8b8997f8613b 100644 --- a/drivers/block/zram/zcomp.h +++ b/drivers/block/zram/zcomp.h @@ -50,6 +50,8 @@ struct zcomp { void (*destroy)(struct zcomp *comp); }; +ssize_t zcomp_available_show(const char *comp, char *buf); + struct zcomp *zcomp_create(const char *comp, int max_strm); void zcomp_destroy(struct zcomp *comp); diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index c03d0053309b..f15564e65d87 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -141,6 +141,34 @@ static ssize_t max_comp_streams_store(struct device *dev, return len; } +static ssize_t comp_algorithm_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + size_t sz; + struct zram *zram = dev_to_zram(dev); + + down_read(&zram->init_lock); + sz = zcomp_available_show(zram->compressor, buf); + up_read(&zram->init_lock); + + return sz; +} + +static ssize_t comp_algorithm_store(struct device *dev, + struct device_attribute *attr, const char *buf, size_t len) +{ + struct zram *zram = dev_to_zram(dev); + down_write(&zram->init_lock); + if (init_done(zram)) { + up_write(&zram->init_lock); + pr_info("Can't change algorithm for initialized device\n"); + return -EBUSY; + } + strlcpy(zram->compressor, buf, sizeof(zram->compressor)); + up_write(&zram->init_lock); + return len; +} + /* flag operations needs meta->tb_lock */ static int zram_test_flag(struct zram_meta *meta, u32 index, enum zram_pageflags flag) @@ -571,10 +599,10 @@ static ssize_t disksize_store(struct device *dev, goto out_free_meta; } - zram->comp = zcomp_create(default_compressor, zram->max_comp_streams); + zram->comp = zcomp_create(zram->compressor, zram->max_comp_streams); if (!zram->comp) { pr_info("Cannot initialise %s compressing backend\n", - default_compressor); + zram->compressor); err = -EINVAL; goto out_free_meta; } @@ -733,6 +761,8 @@ static DEVICE_ATTR(orig_data_size, S_IRUGO, orig_data_size_show, NULL); static DEVICE_ATTR(mem_used_total, S_IRUGO, mem_used_total_show, NULL); static DEVICE_ATTR(max_comp_streams, S_IRUGO | S_IWUSR, max_comp_streams_show, max_comp_streams_store); +static DEVICE_ATTR(comp_algorithm, S_IRUGO | S_IWUSR, + comp_algorithm_show, comp_algorithm_store); ZRAM_ATTR_RO(num_reads); ZRAM_ATTR_RO(num_writes); @@ -758,6 +788,7 @@ static struct attribute *zram_disk_attrs[] = { &dev_attr_compr_data_size.attr, &dev_attr_mem_used_total.attr, &dev_attr_max_comp_streams.attr, + &dev_attr_comp_algorithm.attr, NULL, }; @@ -818,7 +849,7 @@ static int create_device(struct zram *zram, int device_id) pr_warn("Error creating sysfs group"); goto out_free_disk; } - + strlcpy(zram->compressor, default_compressor, sizeof(zram->compressor)); zram->meta = NULL; zram->max_comp_streams = 1; return 0; diff --git a/drivers/block/zram/zram_drv.h b/drivers/block/zram/zram_drv.h index ccf36d11755a..7f21c145e317 100644 --- a/drivers/block/zram/zram_drv.h +++ b/drivers/block/zram/zram_drv.h @@ -101,5 +101,6 @@ struct zram { u64 disksize; /* bytes */ int max_comp_streams; struct zram_stats stats; + char compressor[10]; }; #endif From f84e7a4599807591a02c524314c1a78cc01ba41d Mon Sep 17 00:00:00 2001 From: Sergey Senozhatsky Date: Mon, 7 Apr 2014 15:38:18 -0700 Subject: [PATCH 200/277] zram: add lz4 algorithm backend Introduce LZ4 compression backend and make it available for selection. LZ4 support is optional and requires user to set ZRAM_LZ4_COMPRESS config option. The default compression backend is LZO. TEST (x86_64, core i5, 2 cores + 2 hyperthreading, zram disk size 1G, ext4 file system, 3 compression streams) iozone -t 3 -R -r 16K -s 60M -I +Z Test LZO LZ4 ---------------------------------------------- Initial write 1642744.62 1317005.09 Rewrite 2498980.88 1800645.16 Read 3957026.38 5877043.75 Re-read 3950997.38 5861847.00 Reverse Read 2937114.56 5047384.00 Stride read 2948163.19 4929587.38 Random read 3292692.69 4880793.62 Mixed workload 1545602.62 3502940.38 Random write 2448039.75 1758786.25 Pwrite 1670051.03 1338329.69 Pread 2530682.00 5097177.62 Fwrite 3232085.62 3275942.56 Fread 6306880.25 6645271.12 So on my system LZ4 is slower in write-only tests, while it performs better in read-only and mixed (reads + writes) tests. Official LZ4 benchmarks available here http://code.google.com/p/lz4/ (linux kernel uses revision r90). Signed-off-by: Sergey Senozhatsky Acked-by: Minchan Kim Cc: Jerome Marchand Cc: Nitin Gupta Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds (cherry picked from commit 6e76668e415adf799839f0ab205142ad7002d260) Signed-off-by: Alex Shi --- drivers/block/zram/Kconfig | 10 ++++++++ drivers/block/zram/Makefile | 2 ++ drivers/block/zram/zcomp.c | 6 +++++ drivers/block/zram/zcomp_lz4.c | 47 ++++++++++++++++++++++++++++++++++ drivers/block/zram/zcomp_lz4.h | 17 ++++++++++++ 5 files changed, 82 insertions(+) create mode 100644 drivers/block/zram/zcomp_lz4.c create mode 100644 drivers/block/zram/zcomp_lz4.h diff --git a/drivers/block/zram/Kconfig b/drivers/block/zram/Kconfig index 3450be850399..6489c0fd0ea6 100644 --- a/drivers/block/zram/Kconfig +++ b/drivers/block/zram/Kconfig @@ -15,6 +15,16 @@ config ZRAM See zram.txt for more information. +config ZRAM_LZ4_COMPRESS + bool "Enable LZ4 algorithm support" + depends on ZRAM + select LZ4_COMPRESS + select LZ4_DECOMPRESS + default n + help + This option enables LZ4 compression algorithm support. Compression + algorithm can be changed using `comp_algorithm' device attribute. + config ZRAM_DEBUG bool "Compressed RAM block device debug support" depends on ZRAM diff --git a/drivers/block/zram/Makefile b/drivers/block/zram/Makefile index 757c6a5cadff..be0763ff57a2 100644 --- a/drivers/block/zram/Makefile +++ b/drivers/block/zram/Makefile @@ -1,3 +1,5 @@ zram-y := zcomp_lzo.o zcomp.o zram_drv.o +zram-$(CONFIG_ZRAM_LZ4_COMPRESS) += zcomp_lz4.o + obj-$(CONFIG_ZRAM) += zram.o diff --git a/drivers/block/zram/zcomp.c b/drivers/block/zram/zcomp.c index aad533a8bc55..d5919031ca8b 100644 --- a/drivers/block/zram/zcomp.c +++ b/drivers/block/zram/zcomp.c @@ -15,6 +15,9 @@ #include "zcomp.h" #include "zcomp_lzo.h" +#ifdef CONFIG_ZRAM_LZ4_COMPRESS +#include "zcomp_lz4.h" +#endif /* * single zcomp_strm backend @@ -41,6 +44,9 @@ struct zcomp_strm_multi { static struct zcomp_backend *backends[] = { &zcomp_lzo, +#ifdef CONFIG_ZRAM_LZ4_COMPRESS + &zcomp_lz4, +#endif NULL }; diff --git a/drivers/block/zram/zcomp_lz4.c b/drivers/block/zram/zcomp_lz4.c new file mode 100644 index 000000000000..f2afb7e988c3 --- /dev/null +++ b/drivers/block/zram/zcomp_lz4.c @@ -0,0 +1,47 @@ +/* + * Copyright (C) 2014 Sergey Senozhatsky. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include +#include +#include + +#include "zcomp_lz4.h" + +static void *zcomp_lz4_create(void) +{ + return kzalloc(LZ4_MEM_COMPRESS, GFP_KERNEL); +} + +static void zcomp_lz4_destroy(void *private) +{ + kfree(private); +} + +static int zcomp_lz4_compress(const unsigned char *src, unsigned char *dst, + size_t *dst_len, void *private) +{ + /* return : Success if return 0 */ + return lz4_compress(src, PAGE_SIZE, dst, dst_len, private); +} + +static int zcomp_lz4_decompress(const unsigned char *src, size_t src_len, + unsigned char *dst) +{ + size_t dst_len = PAGE_SIZE; + /* return : Success if return 0 */ + return lz4_decompress_unknownoutputsize(src, src_len, dst, &dst_len); +} + +struct zcomp_backend zcomp_lz4 = { + .compress = zcomp_lz4_compress, + .decompress = zcomp_lz4_decompress, + .create = zcomp_lz4_create, + .destroy = zcomp_lz4_destroy, + .name = "lz4", +}; diff --git a/drivers/block/zram/zcomp_lz4.h b/drivers/block/zram/zcomp_lz4.h new file mode 100644 index 000000000000..60613fb29dd8 --- /dev/null +++ b/drivers/block/zram/zcomp_lz4.h @@ -0,0 +1,17 @@ +/* + * Copyright (C) 2014 Sergey Senozhatsky. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#ifndef _ZCOMP_LZ4_H_ +#define _ZCOMP_LZ4_H_ + +#include "zcomp.h" + +extern struct zcomp_backend zcomp_lz4; + +#endif /* _ZCOMP_LZ4_H_ */ From c7bb5623e03444502f37821e1a391ef61828d4c3 Mon Sep 17 00:00:00 2001 From: Sergey Senozhatsky Date: Mon, 7 Apr 2014 15:38:19 -0700 Subject: [PATCH 201/277] zram: move comp allocation out of init_lock While fixing lockdep spew of ->init_lock reported by Sasha Levin [1], Minchan Kim noted [2] that it's better to move compression backend allocation (using GPF_KERNEL) out of the ->init_lock lock, same way as with zram_meta_alloc(), in order to prevent the same lockdep spew. [1] https://lkml.org/lkml/2014/2/27/337 [2] https://lkml.org/lkml/2014/3/3/32 Signed-off-by: Sergey Senozhatsky Reported-by: Minchan Kim Acked-by: Minchan Kim Cc: Sasha Levin Acked-by: Jerome Marchand Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds (cherry picked from commit d61f98c70e8b0d324e8e83be2ed546d6295e63f3) Signed-off-by: Alex Shi --- drivers/block/zram/zram_drv.c | 31 +++++++++++++++++-------------- 1 file changed, 17 insertions(+), 14 deletions(-) diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index f15564e65d87..fe0daa9fe59e 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -579,9 +579,10 @@ static ssize_t disksize_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t len) { u64 disksize; + struct zcomp *comp; struct zram_meta *meta; struct zram *zram = dev_to_zram(dev); - int err; + int err = -EINVAL; disksize = memparse(buf, NULL); if (!disksize) @@ -592,30 +593,32 @@ static ssize_t disksize_store(struct device *dev, if (!meta) return -ENOMEM; - down_write(&zram->init_lock); - if (init_done(zram)) { - pr_info("Cannot change disksize for initialized device\n"); - err = -EBUSY; - goto out_free_meta; - } - - zram->comp = zcomp_create(zram->compressor, zram->max_comp_streams); - if (!zram->comp) { + comp = zcomp_create(zram->compressor, zram->max_comp_streams); + if (!comp) { pr_info("Cannot initialise %s compressing backend\n", zram->compressor); - err = -EINVAL; - goto out_free_meta; + goto out_cleanup; + } + + down_write(&zram->init_lock); + if (init_done(zram)) { + up_write(&zram->init_lock); + pr_info("Cannot change disksize for initialized device\n"); + err = -EBUSY; + goto out_cleanup; } zram->meta = meta; + zram->comp = comp; zram->disksize = disksize; set_capacity(zram->disk, zram->disksize >> SECTOR_SHIFT); up_write(&zram->init_lock); return len; -out_free_meta: - up_write(&zram->init_lock); +out_cleanup: + if (comp) + zcomp_destroy(comp); zram_meta_free(meta); return err; } From 6fef1131ae139299f9a45924ef866ede77c5c500 Mon Sep 17 00:00:00 2001 From: Sergey Senozhatsky Date: Mon, 7 Apr 2014 15:38:20 -0700 Subject: [PATCH 202/277] zram: return error-valued pointer from zcomp_create() Instead of returning just NULL, return ERR_PTR from zcomp_create() if compressing backend creation has failed. ERR_PTR(-EINVAL) for unsupported compression algorithm request, ERR_PTR(-ENOMEM) for allocation (zcomp or compression stream) error. Perform IS_ERR() check of returned from zcomp_create() value in disksize_store() and set return code to PTR_ERR(). Change suggested by Jerome Marchand. [akpm@linux-foundation.org: clean up error recovery flow] Signed-off-by: Sergey Senozhatsky Reported-by: Jerome Marchand Cc: Minchan Kim Cc: Nitin Gupta Cc: Arnd Bergmann Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds (cherry picked from commit fcfa8d95cacf5cbbe6dee6b8d229fe86142266e0) Signed-off-by: Alex Shi --- drivers/block/zram/zcomp.c | 14 ++++++++------ drivers/block/zram/zram_drv.c | 19 ++++++++++--------- 2 files changed, 18 insertions(+), 15 deletions(-) diff --git a/drivers/block/zram/zcomp.c b/drivers/block/zram/zcomp.c index d5919031ca8b..5647d8fe1dc1 100644 --- a/drivers/block/zram/zcomp.c +++ b/drivers/block/zram/zcomp.c @@ -9,6 +9,7 @@ #include #include +#include #include #include #include @@ -319,9 +320,10 @@ void zcomp_destroy(struct zcomp *comp) /* * search available compressors for requested algorithm. - * allocate new zcomp and initialize it. return NULL - * if requested algorithm is not supported or in case - * of init error + * allocate new zcomp and initialize it. return compressing + * backend pointer or ERR_PTR if things went bad. ERR_PTR(-EINVAL) + * if requested algorithm is not supported, ERR_PTR(-ENOMEM) in + * case of allocation error. */ struct zcomp *zcomp_create(const char *compress, int max_strm) { @@ -330,11 +332,11 @@ struct zcomp *zcomp_create(const char *compress, int max_strm) backend = find_backend(compress); if (!backend) - return NULL; + return ERR_PTR(-EINVAL); comp = kzalloc(sizeof(struct zcomp), GFP_KERNEL); if (!comp) - return NULL; + return ERR_PTR(-ENOMEM); comp->backend = backend; if (max_strm > 1) @@ -343,7 +345,7 @@ struct zcomp *zcomp_create(const char *compress, int max_strm) zcomp_strm_single_create(comp); if (!comp->stream) { kfree(comp); - return NULL; + return ERR_PTR(-ENOMEM); } return comp; } diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index fe0daa9fe59e..407f541b26a1 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -31,6 +31,7 @@ #include #include #include +#include #include "zram_drv.h" @@ -582,7 +583,7 @@ static ssize_t disksize_store(struct device *dev, struct zcomp *comp; struct zram_meta *meta; struct zram *zram = dev_to_zram(dev); - int err = -EINVAL; + int err; disksize = memparse(buf, NULL); if (!disksize) @@ -594,18 +595,18 @@ static ssize_t disksize_store(struct device *dev, return -ENOMEM; comp = zcomp_create(zram->compressor, zram->max_comp_streams); - if (!comp) { + if (IS_ERR(comp)) { pr_info("Cannot initialise %s compressing backend\n", zram->compressor); - goto out_cleanup; + err = PTR_ERR(comp); + goto out_free_meta; } down_write(&zram->init_lock); if (init_done(zram)) { - up_write(&zram->init_lock); pr_info("Cannot change disksize for initialized device\n"); err = -EBUSY; - goto out_cleanup; + goto out_destroy_comp; } zram->meta = meta; @@ -613,12 +614,12 @@ static ssize_t disksize_store(struct device *dev, zram->disksize = disksize; set_capacity(zram->disk, zram->disksize >> SECTOR_SHIFT); up_write(&zram->init_lock); - return len; -out_cleanup: - if (comp) - zcomp_destroy(comp); +out_destroy_comp: + up_write(&zram->init_lock); + zcomp_destroy(comp); +out_free_meta: zram_meta_free(meta); return err; } From 1b2dc1d89b1a5bff45782c9ddb6e0d384c7c6420 Mon Sep 17 00:00:00 2001 From: Minchan Kim Date: Mon, 7 Apr 2014 15:38:21 -0700 Subject: [PATCH 203/277] zram: propagate error to user When we initialized zcomp with single, we couldn't change max_comp_streams without zram reset but current interface doesn't show any error to user and even it changes max_comp_streams's value without any effect so it would make user very confusing. This patch prevents max_comp_streams's change when zcomp was initialized as single zcomp and emit the error to user(ex, echo). [akpm@linux-foundation.org: don't return with the lock held, per Sergey] [fengguang.wu@intel.com: fix coccinelle warnings] Signed-off-by: Minchan Kim Cc: Nitin Gupta Cc: Jerome Marchand Acked-by: Sergey Senozhatsky Signed-off-by: Fengguang Wu Cc: Stephen Rothwell Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds (cherry picked from commit 60a726e33375a1096e85399cfa1327081b4c38be) Signed-off-by: Alex Shi --- Documentation/blockdev/zram.txt | 9 +++++---- drivers/block/zram/zcomp.c | 10 +++++----- drivers/block/zram/zcomp.h | 4 ++-- drivers/block/zram/zram_drv.c | 17 +++++++++++++---- 4 files changed, 25 insertions(+), 15 deletions(-) diff --git a/Documentation/blockdev/zram.txt b/Documentation/blockdev/zram.txt index 4ab2ce98f63c..2db1687a4b10 100644 --- a/Documentation/blockdev/zram.txt +++ b/Documentation/blockdev/zram.txt @@ -37,10 +37,11 @@ Note: In order to enable compression backend's multi stream support max_comp_streams must be initially set to desired concurrency level before ZRAM device initialisation. Once the device initialised as a single stream compression -backend (max_comp_streams equals to 0) changing the value of max_comp_streams -will not take any effect, because single stream compression backend implemented -as a special case and does not support dynamic max_comp_streams. Only multi -stream backend supports dynamic max_comp_streams adjustment. +backend (max_comp_streams equals to 1), you will see error if you try to change +the value of max_comp_streams because single stream compression backend +implemented as a special case by lock overhead issue and does not support +dynamic max_comp_streams. Only multi stream backend supports dynamic +max_comp_streams adjustment. 3) Select compression algorithm Using comp_algorithm device attribute one can see available and diff --git a/drivers/block/zram/zcomp.c b/drivers/block/zram/zcomp.c index 5647d8fe1dc1..b0e7592c44d8 100644 --- a/drivers/block/zram/zcomp.c +++ b/drivers/block/zram/zcomp.c @@ -153,7 +153,7 @@ static void zcomp_strm_multi_release(struct zcomp *comp, struct zcomp_strm *zstr } /* change max_strm limit */ -static int zcomp_strm_multi_set_max_streams(struct zcomp *comp, int num_strm) +static bool zcomp_strm_multi_set_max_streams(struct zcomp *comp, int num_strm) { struct zcomp_strm_multi *zs = comp->stream; struct zcomp_strm *zstrm; @@ -172,7 +172,7 @@ static int zcomp_strm_multi_set_max_streams(struct zcomp *comp, int num_strm) zs->avail_strm--; } spin_unlock(&zs->strm_lock); - return 0; + return true; } static void zcomp_strm_multi_destroy(struct zcomp *comp) @@ -232,10 +232,10 @@ static void zcomp_strm_single_release(struct zcomp *comp, mutex_unlock(&zs->strm_lock); } -static int zcomp_strm_single_set_max_streams(struct zcomp *comp, int num_strm) +static bool zcomp_strm_single_set_max_streams(struct zcomp *comp, int num_strm) { /* zcomp_strm_single support only max_comp_streams == 1 */ - return -ENOTSUPP; + return false; } static void zcomp_strm_single_destroy(struct zcomp *comp) @@ -284,7 +284,7 @@ ssize_t zcomp_available_show(const char *comp, char *buf) return sz; } -int zcomp_set_max_streams(struct zcomp *comp, int num_strm) +bool zcomp_set_max_streams(struct zcomp *comp, int num_strm) { return comp->set_max_streams(comp, num_strm); } diff --git a/drivers/block/zram/zcomp.h b/drivers/block/zram/zcomp.h index 8b8997f8613b..c59d1fca72c0 100644 --- a/drivers/block/zram/zcomp.h +++ b/drivers/block/zram/zcomp.h @@ -46,7 +46,7 @@ struct zcomp { struct zcomp_strm *(*strm_find)(struct zcomp *comp); void (*strm_release)(struct zcomp *comp, struct zcomp_strm *zstrm); - int (*set_max_streams)(struct zcomp *comp, int num_strm); + bool (*set_max_streams)(struct zcomp *comp, int num_strm); void (*destroy)(struct zcomp *comp); }; @@ -64,5 +64,5 @@ int zcomp_compress(struct zcomp *comp, struct zcomp_strm *zstrm, int zcomp_decompress(struct zcomp *comp, const unsigned char *src, size_t src_len, unsigned char *dst); -int zcomp_set_max_streams(struct zcomp *comp, int num_strm); +bool zcomp_set_max_streams(struct zcomp *comp, int num_strm); #endif /* _ZCOMP_H_ */ diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index 407f541b26a1..27da5967b57b 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -127,19 +127,28 @@ static ssize_t max_comp_streams_store(struct device *dev, { int num; struct zram *zram = dev_to_zram(dev); + int ret; - if (kstrtoint(buf, 0, &num)) - return -EINVAL; + ret = kstrtoint(buf, 0, &num); + if (ret < 0) + return ret; if (num < 1) return -EINVAL; + down_write(&zram->init_lock); if (init_done(zram)) { - if (zcomp_set_max_streams(zram->comp, num)) + if (!zcomp_set_max_streams(zram->comp, num)) { pr_info("Cannot change max compression streams\n"); + ret = -EINVAL; + goto out; + } } + zram->max_comp_streams = num; + ret = len; +out: up_write(&zram->init_lock); - return len; + return ret; } static ssize_t comp_algorithm_show(struct device *dev, From 575ef7bc322df6e0384ef661f510928bfa1dab81 Mon Sep 17 00:00:00 2001 From: Sergey Senozhatsky Date: Mon, 7 Apr 2014 15:38:22 -0700 Subject: [PATCH 204/277] zram: use scnprintf() in attrs show() methods sysfs.txt documentation lists the following requirements: - The buffer will always be PAGE_SIZE bytes in length. On i386, this is 4096. - show() methods should return the number of bytes printed into the buffer. This is the return value of scnprintf(). - show() should always use scnprintf(). Use scnprintf() in show() functions. Signed-off-by: Sergey Senozhatsky Acked-by: Minchan Kim Cc: Jerome Marchand Cc: Nitin Gupta Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds (cherry picked from commit 56b4e8cb85827a2ccc4752a2a7148e56b62b7e96) Signed-off-by: Alex Shi --- drivers/block/zram/zcomp.c | 8 +++++--- drivers/block/zram/zram_drv.c | 12 ++++++------ 2 files changed, 11 insertions(+), 9 deletions(-) diff --git a/drivers/block/zram/zcomp.c b/drivers/block/zram/zcomp.c index b0e7592c44d8..f1ff39a3d1c1 100644 --- a/drivers/block/zram/zcomp.c +++ b/drivers/block/zram/zcomp.c @@ -275,12 +275,14 @@ ssize_t zcomp_available_show(const char *comp, char *buf) while (backends[i]) { if (sysfs_streq(comp, backends[i]->name)) - sz += sprintf(buf + sz, "[%s] ", backends[i]->name); + sz += scnprintf(buf + sz, PAGE_SIZE - sz - 2, + "[%s] ", backends[i]->name); else - sz += sprintf(buf + sz, "%s ", backends[i]->name); + sz += scnprintf(buf + sz, PAGE_SIZE - sz - 2, + "%s ", backends[i]->name); i++; } - sz += sprintf(buf + sz, "\n"); + sz += scnprintf(buf + sz, PAGE_SIZE - sz, "\n"); return sz; } diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index 27da5967b57b..031598bc14b4 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -48,7 +48,7 @@ static ssize_t zram_attr_##name##_show(struct device *d, \ struct device_attribute *attr, char *b) \ { \ struct zram *zram = dev_to_zram(d); \ - return sprintf(b, "%llu\n", \ + return scnprintf(b, PAGE_SIZE, "%llu\n", \ (u64)atomic64_read(&zram->stats.name)); \ } \ static struct device_attribute dev_attr_##name = \ @@ -69,7 +69,7 @@ static ssize_t disksize_show(struct device *dev, { struct zram *zram = dev_to_zram(dev); - return sprintf(buf, "%llu\n", zram->disksize); + return scnprintf(buf, PAGE_SIZE, "%llu\n", zram->disksize); } static ssize_t initstate_show(struct device *dev, @@ -82,7 +82,7 @@ static ssize_t initstate_show(struct device *dev, val = init_done(zram); up_read(&zram->init_lock); - return sprintf(buf, "%u\n", val); + return scnprintf(buf, PAGE_SIZE, "%u\n", val); } static ssize_t orig_data_size_show(struct device *dev, @@ -90,7 +90,7 @@ static ssize_t orig_data_size_show(struct device *dev, { struct zram *zram = dev_to_zram(dev); - return sprintf(buf, "%llu\n", + return scnprintf(buf, PAGE_SIZE, "%llu\n", (u64)(atomic64_read(&zram->stats.pages_stored)) << PAGE_SHIFT); } @@ -106,7 +106,7 @@ static ssize_t mem_used_total_show(struct device *dev, val = zs_get_total_size_bytes(meta->mem_pool); up_read(&zram->init_lock); - return sprintf(buf, "%llu\n", val); + return scnprintf(buf, PAGE_SIZE, "%llu\n", val); } static ssize_t max_comp_streams_show(struct device *dev, @@ -119,7 +119,7 @@ static ssize_t max_comp_streams_show(struct device *dev, val = zram->max_comp_streams; up_read(&zram->init_lock); - return sprintf(buf, "%d\n", val); + return scnprintf(buf, PAGE_SIZE, "%d\n", val); } static ssize_t max_comp_streams_store(struct device *dev, From ba6d7663fa3c9b310f33b9b8a18743af8b3727c9 Mon Sep 17 00:00:00 2001 From: Joonsoo Kim Date: Mon, 7 Apr 2014 15:38:24 -0700 Subject: [PATCH 205/277] zram: support REQ_DISCARD zram is ram based block device and can be used by backend of filesystem. When filesystem deletes a file, it normally doesn't do anything on data block of that file. It just marks on metadata of that file. This behavior has no problem on disk based block device, but has problems on ram based block device, since we can't free memory used for data block. To overcome this disadvantage, there is REQ_DISCARD functionality. If block device support REQ_DISCARD and filesystem is mounted with discard option, filesystem sends REQ_DISCARD to block device whenever some data blocks are discarded. All we have to do is to handle this request. This patch implements to flag up QUEUE_FLAG_DISCARD and handle this REQ_DISCARD request. With it, we can free memory used by zram if it isn't used. [akpm@linux-foundation.org: tweak comments] Signed-off-by: Joonsoo Kim Cc: Minchan Kim Cc: Nitin Gupta Cc: Sergey Senozhatsky Cc: Jerome Marchand Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds (cherry picked from commit f4659d8e620d08bd1a84a8aec5d2f5294a242764) Signed-off-by: Alex Shi Conflicts: drivers/block/zram/zram_drv.c Conflicts solution: keep use old bio struct, and bio_for_each_segment() --- drivers/block/zram/zram_drv.c | 62 +++++++++++++++++++++++++++++++++++ 1 file changed, 62 insertions(+) diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index 031598bc14b4..19cf51ad48ef 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -550,6 +550,47 @@ static int zram_bvec_rw(struct zram *zram, struct bio_vec *bvec, u32 index, return ret; } +/* + * zram_bio_discard - handler on discard request + * @index: physical block index in PAGE_SIZE units + * @offset: byte offset within physical block + */ +static void zram_bio_discard(struct zram *zram, u32 index, + int offset, struct bio *bio) +{ + size_t n = bio->bi_size; + + /* + * zram manages data in physical block size units. Because logical block + * size isn't identical with physical block size on some arch, we + * could get a discard request pointing to a specific offset within a + * certain physical block. Although we can handle this request by + * reading that physiclal block and decompressing and partially zeroing + * and re-compressing and then re-storing it, this isn't reasonable + * because our intent with a discard request is to save memory. So + * skipping this logical block is appropriate here. + */ + if (offset) { + if (n < offset) + return; + + n -= offset; + index++; + } + + while (n >= PAGE_SIZE) { + /* + * Discard request can be large so the lock hold times could be + * lengthy. So take the lock once per page. + */ + write_lock(&zram->meta->tb_lock); + zram_free_page(zram, index); + write_unlock(&zram->meta->tb_lock); + index++; + n -= PAGE_SIZE; + } +} + static void zram_reset_device(struct zram *zram, bool reset_capacity) { size_t index; @@ -684,6 +725,12 @@ static void __zram_make_request(struct zram *zram, struct bio *bio) index = bio->bi_sector >> SECTORS_PER_PAGE_SHIFT; offset = (bio->bi_sector & (SECTORS_PER_PAGE - 1)) << SECTOR_SHIFT; + if (unlikely(bio->bi_rw & REQ_DISCARD)) { + zram_bio_discard(zram, index, offset, bio); + bio_endio(bio, 0); + return; + } + bio_for_each_segment(bvec, bio, i) { int max_transfer_size = PAGE_SIZE - offset; @@ -853,6 +900,21 @@ static int create_device(struct zram *zram, int device_id) ZRAM_LOGICAL_BLOCK_SIZE); blk_queue_io_min(zram->disk->queue, PAGE_SIZE); blk_queue_io_opt(zram->disk->queue, PAGE_SIZE); + zram->disk->queue->limits.discard_granularity = PAGE_SIZE; + zram->disk->queue->limits.max_discard_sectors = UINT_MAX; + /* + * zram_bio_discard() will clear all logical blocks if logical block + * size is identical with physical block size(PAGE_SIZE). But if it is + * different, we will skip discarding some parts of logical blocks in + * the part of the request range which isn't aligned to physical block + * size. So we can't ensure that all discarded logical blocks are + * zeroed. + */ + if (ZRAM_LOGICAL_BLOCK_SIZE == PAGE_SIZE) + zram->disk->queue->limits.discard_zeroes_data = 1; + else + zram->disk->queue->limits.discard_zeroes_data = 0; + queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, zram->disk->queue); add_disk(zram->disk); From c14a95869402ecb41d86ffac66723575bf15f21c Mon Sep 17 00:00:00 2001 From: Kyungsik Lee Date: Mon, 8 Jul 2013 16:01:45 -0700 Subject: [PATCH 206/277] decompressor: add LZ4 decompressor module Add support for LZ4 decompression in the Linux Kernel. LZ4 Decompression APIs for kernel are based on LZ4 implementation by Yann Collet. Benchmark Results(PATCH v3) Compiler: Linaro ARM gcc 4.6.2 1. ARMv7, 1.5GHz based board Kernel: linux 3.4 Uncompressed Kernel Size: 14MB Compressed Size Decompression Speed LZO 6.7MB 20.1MB/s, 25.2MB/s(UA) LZ4 7.3MB 29.1MB/s, 45.6MB/s(UA) 2. ARMv7, 1.7GHz based board Kernel: linux 3.7 Uncompressed Kernel Size: 14MB Compressed Size Decompression Speed LZO 6.0MB 34.1MB/s, 52.2MB/s(UA) LZ4 6.5MB 86.7MB/s - UA: Unaligned memory Access support - Latest patch set for LZO applied This patch set is for adding support for LZ4-compressed Kernel. LZ4 is a very fast lossless compression algorithm and it also features an extremely fast decoder [1]. But we have five of decompressors already and one question which does arise, however, is that of where do we stop adding new ones? This issue had been discussed and came to the conclusion [2]. Russell King said that we should have: - one decompressor which is the fastest - one decompressor for the highest compression ratio - one popular decompressor (eg conventional gzip) If we have a replacement one for one of these, then it should do exactly that: replace it. The benchmark shows that an 8% increase in image size vs a 66% increase in decompression speed compared to LZO(which has been known as the fastest decompressor in the Kernel). Therefore the "fast but may not be small" compression title has clearly been taken by LZ4 [3]. [1] http://code.google.com/p/lz4/ [2] http://thread.gmane.org/gmane.linux.kbuild.devel/9157 [3] http://thread.gmane.org/gmane.linux.kbuild.devel/9347 LZ4 homepage: http://fastcompression.blogspot.com/p/lz4.html LZ4 source repository: http://code.google.com/p/lz4/ Signed-off-by: Kyungsik Lee Signed-off-by: Yann Collet Cc: "H. Peter Anvin" Cc: Ingo Molnar Cc: Thomas Gleixner Cc: Russell King Cc: Borislav Petkov Cc: Florian Fainelli Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds (cherry picked from commit cffb78b0e0b3a30b059b27a1d97500cf6464efa9) Signed-off-by: Alex Shi --- include/linux/lz4.h | 51 ++++++ lib/lz4/lz4_decompress.c | 326 +++++++++++++++++++++++++++++++++++++++ lib/lz4/lz4defs.h | 94 +++++++++++ 3 files changed, 471 insertions(+) create mode 100644 include/linux/lz4.h create mode 100644 lib/lz4/lz4_decompress.c create mode 100644 lib/lz4/lz4defs.h diff --git a/include/linux/lz4.h b/include/linux/lz4.h new file mode 100644 index 000000000000..7f6c75a093f8 --- /dev/null +++ b/include/linux/lz4.h @@ -0,0 +1,51 @@ +#ifndef __LZ4_H__ +#define __LZ4_H__ +/* + * LZ4 Kernel Interface + * + * Copyright (C) 2013, LG Electronics, Kyungsik Lee + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +/* + * lz4_compressbound() + * Provides the maximum size that LZ4 may output in a "worst case" scenario + * (input data not compressible) + */ +static inline size_t lz4_compressbound(size_t isize) +{ + return isize + (isize / 255) + 16; +} + +/* + * lz4_decompress() + * src : source address of the compressed data + * src_len : is the input size, whcih is returned after decompress done + * dest : output buffer address of the decompressed data + * actual_dest_len: is the size of uncompressed data, supposing it's known + * return : Success if return 0 + * Error if return (< 0) + * note : Destination buffer must be already allocated. + * slightly faster than lz4_decompress_unknownoutputsize() + */ +int lz4_decompress(const char *src, size_t *src_len, char *dest, + size_t actual_dest_len); + +/* + * lz4_decompress_unknownoutputsize() + * src : source address of the compressed data + * src_len : is the input size, therefore the compressed size + * dest : output buffer address of the decompressed data + * dest_len: is the max size of the destination buffer, which is + * returned with actual size of decompressed data after + * decompress done + * return : Success if return 0 + * Error if return (< 0) + * note : Destination buffer must be already allocated. + */ +int lz4_decompress_unknownoutputsize(const char *src, size_t src_len, + char *dest, size_t *dest_len); +#endif diff --git a/lib/lz4/lz4_decompress.c b/lib/lz4/lz4_decompress.c new file mode 100644 index 000000000000..dcc89753af65 --- /dev/null +++ b/lib/lz4/lz4_decompress.c @@ -0,0 +1,326 @@ +/* + * LZ4 Decompressor for Linux kernel + * + * Copyright (C) 2013 LG Electronics Co., Ltd. (http://www.lge.com/) + * + * Based on LZ4 implementation by Yann Collet. + * + * LZ4 - Fast LZ compression algorithm + * Copyright (C) 2011-2012, Yann Collet. + * BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * You can contact the author at : + * - LZ4 homepage : http://fastcompression.blogspot.com/p/lz4.html + * - LZ4 source repository : http://code.google.com/p/lz4/ + */ + +#ifndef STATIC +#include +#include +#endif +#include + +#include + +#include "lz4defs.h" + +static int lz4_uncompress(const char *source, char *dest, int osize) +{ + const BYTE *ip = (const BYTE *) source; + const BYTE *ref; + BYTE *op = (BYTE *) dest; + BYTE * const oend = op + osize; + BYTE *cpy; + unsigned token; + size_t length; + size_t dec32table[] = {0, 3, 2, 3, 0, 0, 0, 0}; +#if LZ4_ARCH64 + size_t dec64table[] = {0, 0, 0, -1, 0, 1, 2, 3}; +#endif + + while (1) { + + /* get runlength */ + token = *ip++; + length = (token >> ML_BITS); + if (length == RUN_MASK) { + size_t len; + + len = *ip++; + for (; len == 255; length += 255) + len = *ip++; + length += len; + } + + /* copy literals */ + cpy = op + length; + if (unlikely(cpy > oend - COPYLENGTH)) { + /* + * Error: not enough place for another match + * (min 4) + 5 literals + */ + if (cpy != oend) + goto _output_error; + + memcpy(op, ip, length); + ip += length; + break; /* EOF */ + } + LZ4_WILDCOPY(ip, op, cpy); + ip -= (op - cpy); + op = cpy; + + /* get offset */ + LZ4_READ_LITTLEENDIAN_16(ref, cpy, ip); + ip += 2; + + /* Error: offset create reference outside destination buffer */ + if (unlikely(ref < (BYTE *const) dest)) + goto _output_error; + + /* get matchlength */ + length = token & ML_MASK; + if (length == ML_MASK) { + for (; *ip == 255; length += 255) + ip++; + length += *ip++; + } + + /* copy repeated sequence */ + if (unlikely((op - ref) < STEPSIZE)) { +#if LZ4_ARCH64 + size_t dec64 = dec64table[op - ref]; +#else + const int dec64 = 0; +#endif + op[0] = ref[0]; + op[1] = ref[1]; + op[2] = ref[2]; + op[3] = ref[3]; + op += 4; + ref += 4; + ref -= dec32table[op-ref]; + PUT4(ref, op); + op += STEPSIZE - 4; + ref -= dec64; + } else { + LZ4_COPYSTEP(ref, op); + } + cpy = op + length - (STEPSIZE - 4); + if (cpy > (oend - COPYLENGTH)) { + + /* Error: request to write beyond destination buffer */ + if (cpy > oend) + goto _output_error; + LZ4_SECURECOPY(ref, op, (oend - COPYLENGTH)); + while (op < cpy) + *op++ = *ref++; + op = cpy; + /* + * Check EOF (should never happen, since last 5 bytes + * are supposed to be literals) + */ + if (op == oend) + goto _output_error; + continue; + } + LZ4_SECURECOPY(ref, op, cpy); + op = cpy; /* correction */ + } + /* end of decoding */ + return (int) (((char *)ip) - source); + + /* write overflow error detected */ +_output_error: + return (int) (-(((char *)ip) - source)); +} + +static int lz4_uncompress_unknownoutputsize(const char *source, char *dest, + int isize, size_t maxoutputsize) +{ + const BYTE *ip = (const BYTE *) source; + const BYTE *const iend = ip + isize; + const BYTE *ref; + + + BYTE *op = (BYTE *) dest; + BYTE * const oend = op + maxoutputsize; + BYTE *cpy; + + size_t dec32table[] = {0, 3, 2, 3, 0, 0, 0, 0}; +#if LZ4_ARCH64 + size_t dec64table[] = {0, 0, 0, -1, 0, 1, 2, 3}; +#endif + + /* Main Loop */ + while (ip < iend) { + + unsigned token; + size_t length; + + /* get runlength */ + token = *ip++; + length = (token >> ML_BITS); + if (length == RUN_MASK) { + int s = 255; + while ((ip < iend) && (s == 255)) { + s = *ip++; + length += s; + } + } + /* copy literals */ + cpy = op + length; + if ((cpy > oend - COPYLENGTH) || + (ip + length > iend - COPYLENGTH)) { + + if (cpy > oend) + goto _output_error;/* writes beyond buffer */ + + if (ip + length != iend) + goto _output_error;/* + * Error: LZ4 format requires + * to consume all input + * at this stage + */ + memcpy(op, ip, length); + op += length; + break;/* Necessarily EOF, due to parsing restrictions */ + } + LZ4_WILDCOPY(ip, op, cpy); + ip -= (op - cpy); + op = cpy; + + /* get offset */ + LZ4_READ_LITTLEENDIAN_16(ref, cpy, ip); + ip += 2; + if (ref < (BYTE * const) dest) + goto _output_error; + /* + * Error : offset creates reference + * outside of destination buffer + */ + + /* get matchlength */ + length = (token & ML_MASK); + if (length == ML_MASK) { + while (ip < iend) { + int s = *ip++; + length += s; + if (s == 255) + continue; + break; + } + } + + /* copy repeated sequence */ + if (unlikely((op - ref) < STEPSIZE)) { +#if LZ4_ARCH64 + size_t dec64 = dec64table[op - ref]; +#else + const int dec64 = 0; +#endif + op[0] = ref[0]; + op[1] = ref[1]; + op[2] = ref[2]; + op[3] = ref[3]; + op += 4; + ref += 4; + ref -= dec32table[op - ref]; + PUT4(ref, op); + op += STEPSIZE - 4; + ref -= dec64; + } else { + LZ4_COPYSTEP(ref, op); + } + cpy = op + length - (STEPSIZE-4); + if (cpy > oend - COPYLENGTH) { + if (cpy > oend) + goto _output_error; /* write outside of buf */ + + LZ4_SECURECOPY(ref, op, (oend - COPYLENGTH)); + while (op < cpy) + *op++ = *ref++; + op = cpy; + /* + * Check EOF (should never happen, since last 5 bytes + * are supposed to be literals) + */ + if (op == oend) + goto _output_error; + continue; + } + LZ4_SECURECOPY(ref, op, cpy); + op = cpy; /* correction */ + } + /* end of decoding */ + return (int) (((char *) op) - dest); + + /* write overflow error detected */ +_output_error: + return (int) (-(((char *) ip) - source)); +} + +int lz4_decompress(const char *src, size_t *src_len, char *dest, + size_t actual_dest_len) +{ + int ret = -1; + int input_len = 0; + + input_len = lz4_uncompress(src, dest, actual_dest_len); + if (input_len < 0) + goto exit_0; + *src_len = input_len; + + return 0; +exit_0: + return ret; +} +#ifndef STATIC +EXPORT_SYMBOL_GPL(lz4_decompress); +#endif + +int lz4_decompress_unknownoutputsize(const char *src, size_t src_len, + char *dest, size_t *dest_len) +{ + int ret = -1; + int out_len = 0; + + out_len = lz4_uncompress_unknownoutputsize(src, dest, src_len, + *dest_len); + if (out_len < 0) + goto exit_0; + *dest_len = out_len; + + return 0; +exit_0: + return ret; +} +#ifndef STATIC +EXPORT_SYMBOL_GPL(lz4_decompress_unknownoutputsize); + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("LZ4 Decompressor"); +#endif diff --git a/lib/lz4/lz4defs.h b/lib/lz4/lz4defs.h new file mode 100644 index 000000000000..43ac31d63f36 --- /dev/null +++ b/lib/lz4/lz4defs.h @@ -0,0 +1,94 @@ +/* + * lz4defs.h -- architecture specific defines + * + * Copyright (C) 2013, LG Electronics, Kyungsik Lee + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +/* + * Detects 64 bits mode + */ +#if (defined(__x86_64__) || defined(__x86_64) || defined(__amd64__) \ + || defined(__ppc64__) || defined(__LP64__)) +#define LZ4_ARCH64 1 +#else +#define LZ4_ARCH64 0 +#endif + +/* + * Architecture-specific macros + */ +#define BYTE u8 +#if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) \ + || defined(CONFIG_ARM) && __LINUX_ARM_ARCH__ >= 6 \ + && defined(ARM_EFFICIENT_UNALIGNED_ACCESS) +typedef struct _U32_S { u32 v; } U32_S; +typedef struct _U64_S { u64 v; } U64_S; + +#define A32(x) (((U32_S *)(x))->v) +#define A64(x) (((U64_S *)(x))->v) + +#define PUT4(s, d) (A32(d) = A32(s)) +#define PUT8(s, d) (A64(d) = A64(s)) +#else /* CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS */ + +#define PUT4(s, d) \ + put_unaligned(get_unaligned((const u32 *) s), (u32 *) d) +#define PUT8(s, d) \ + put_unaligned(get_unaligned((const u64 *) s), (u64 *) d) +#endif + +#define COPYLENGTH 8 +#define ML_BITS 4 +#define ML_MASK ((1U << ML_BITS) - 1) +#define RUN_BITS (8 - ML_BITS) +#define RUN_MASK ((1U << RUN_BITS) - 1) + +#if LZ4_ARCH64/* 64-bit */ +#define STEPSIZE 8 + +#define LZ4_COPYSTEP(s, d) \ + do { \ + PUT8(s, d); \ + d += 8; \ + s += 8; \ + } while (0) + +#define LZ4_COPYPACKET(s, d) LZ4_COPYSTEP(s, d) + +#define LZ4_SECURECOPY(s, d, e) \ + do { \ + if (d < e) { \ + LZ4_WILDCOPY(s, d, e); \ + } \ + } while (0) + +#else /* 32-bit */ +#define STEPSIZE 4 + +#define LZ4_COPYSTEP(s, d) \ + do { \ + PUT4(s, d); \ + d += 4; \ + s += 4; \ + } while (0) + +#define LZ4_COPYPACKET(s, d) \ + do { \ + LZ4_COPYSTEP(s, d); \ + LZ4_COPYSTEP(s, d); \ + } while (0) + +#define LZ4_SECURECOPY LZ4_WILDCOPY +#endif + +#define LZ4_READ_LITTLEENDIAN_16(d, s, p) \ + (d = s - get_unaligned_le16(p)) + +#define LZ4_WILDCOPY(s, d, e) \ + do { \ + LZ4_COPYPACKET(s, d); \ + } while (d < e) From 2da344e89ce0d5827a3eb3f306d63a62f0e6b03e Mon Sep 17 00:00:00 2001 From: Chanho Min Date: Mon, 8 Jul 2013 16:01:49 -0700 Subject: [PATCH 207/277] lib: add lz4 compressor module This patchset is for supporting LZ4 compression and the crypto API using it. As shown below, the size of data is a little bit bigger but compressing speed is faster under the enabled unaligned memory access. We can use lz4 de/compression through crypto API as well. Also, It will be useful for another potential user of lz4 compression. lz4 Compression Benchmark: Compiler: ARM gcc 4.6.4 ARMv7, 1 GHz based board Kernel: linux 3.4 Uncompressed data Size: 101 MB Compressed Size compression Speed LZO 72.1MB 32.1MB/s, 33.0MB/s(UA) LZ4 75.1MB 30.4MB/s, 35.9MB/s(UA) LZ4HC 59.8MB 2.4MB/s, 2.5MB/s(UA) - UA: Unaligned memory Access support - Latest patch set for LZO applied This patch: Add support for LZ4 compression in the Linux Kernel. LZ4 Compression APIs for kernel are based on LZ4 implementation by Yann Collet and were changed for kernel coding style. LZ4 homepage : http://fastcompression.blogspot.com/p/lz4.html LZ4 source repository : http://code.google.com/p/lz4/ svn revision : r90 Two APIs are added: lz4_compress() support basic lz4 compression whereas lz4hc_compress() support high compression or CPU performance get lower but compression ratio get higher. Also, we require the pre-allocated working memory with the defined size and destination buffer must be allocated with the size of lz4_compressbound. [akpm@linux-foundation.org: make lz4_compresshcctx() static] Signed-off-by: Chanho Min Cc: "Darrick J. Wong" Cc: Bob Pearson Cc: Richard Weinberger Cc: Herbert Xu Cc: Yann Collet Cc: Kyungsik Lee Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds (cherry picked from commit c72ac7a1a926dbffb59daf0f275450e5eecce16f) Signed-off-by: Alex Shi --- include/linux/lz4.h | 36 +++ lib/Kconfig | 9 + lib/Makefile | 3 + lib/lz4/Makefile | 3 + lib/lz4/lz4_compress.c | 443 ++++++++++++++++++++++++++++++++ lib/lz4/lz4defs.h | 66 ++++- lib/lz4/lz4hc_compress.c | 539 +++++++++++++++++++++++++++++++++++++++ 7 files changed, 1097 insertions(+), 2 deletions(-) create mode 100644 lib/lz4/Makefile create mode 100644 lib/lz4/lz4_compress.c create mode 100644 lib/lz4/lz4hc_compress.c diff --git a/include/linux/lz4.h b/include/linux/lz4.h index 7f6c75a093f8..d21c13f10a64 100644 --- a/include/linux/lz4.h +++ b/include/linux/lz4.h @@ -9,6 +9,8 @@ * it under the terms of the GNU General Public License version 2 as * published by the Free Software Foundation. */ +#define LZ4_MEM_COMPRESS (4096 * sizeof(unsigned char *)) +#define LZ4HC_MEM_COMPRESS (65538 * sizeof(unsigned char *)) /* * lz4_compressbound() @@ -20,6 +22,40 @@ static inline size_t lz4_compressbound(size_t isize) return isize + (isize / 255) + 16; } +/* + * lz4_compress() + * src : source address of the original data + * src_len : size of the original data + * dst : output buffer address of the compressed data + * This requires 'dst' of size LZ4_COMPRESSBOUND. + * dst_len : is the output size, which is returned after compress done + * workmem : address of the working memory. + * This requires 'workmem' of size LZ4_MEM_COMPRESS. + * return : Success if return 0 + * Error if return (< 0) + * note : Destination buffer and workmem must be already allocated with + * the defined size. + */ +int lz4_compress(const unsigned char *src, size_t src_len, + unsigned char *dst, size_t *dst_len, void *wrkmem); + + /* + * lz4hc_compress() + * src : source address of the original data + * src_len : size of the original data + * dst : output buffer address of the compressed data + * This requires 'dst' of size LZ4_COMPRESSBOUND. + * dst_len : is the output size, which is returned after compress done + * workmem : address of the working memory. + * This requires 'workmem' of size LZ4HC_MEM_COMPRESS. + * return : Success if return 0 + * Error if return (< 0) + * note : Destination buffer and workmem must be already allocated with + * the defined size. + */ +int lz4hc_compress(const unsigned char *src, size_t src_len, + unsigned char *dst, size_t *dst_len, void *wrkmem); + /* * lz4_decompress() * src : source address of the compressed data diff --git a/lib/Kconfig b/lib/Kconfig index fe01d418b09a..06d94d885877 100644 --- a/lib/Kconfig +++ b/lib/Kconfig @@ -189,6 +189,15 @@ config LZO_COMPRESS config LZO_DECOMPRESS tristate +config LZ4_COMPRESS + tristate + +config LZ4HC_COMPRESS + tristate + +config LZ4_DECOMPRESS + tristate + source "lib/xz/Kconfig" # diff --git a/lib/Makefile b/lib/Makefile index 9efe480b975e..b6d7848a526f 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -76,6 +76,9 @@ obj-$(CONFIG_REED_SOLOMON) += reed_solomon/ obj-$(CONFIG_BCH) += bch.o obj-$(CONFIG_LZO_COMPRESS) += lzo/ obj-$(CONFIG_LZO_DECOMPRESS) += lzo/ +obj-$(CONFIG_LZ4_COMPRESS) += lz4/ +obj-$(CONFIG_LZ4HC_COMPRESS) += lz4/ +obj-$(CONFIG_LZ4_DECOMPRESS) += lz4/ obj-$(CONFIG_XZ_DEC) += xz/ obj-$(CONFIG_RAID6_PQ) += raid6/ diff --git a/lib/lz4/Makefile b/lib/lz4/Makefile new file mode 100644 index 000000000000..8085d04e9309 --- /dev/null +++ b/lib/lz4/Makefile @@ -0,0 +1,3 @@ +obj-$(CONFIG_LZ4_COMPRESS) += lz4_compress.o +obj-$(CONFIG_LZ4HC_COMPRESS) += lz4hc_compress.o +obj-$(CONFIG_LZ4_DECOMPRESS) += lz4_decompress.o diff --git a/lib/lz4/lz4_compress.c b/lib/lz4/lz4_compress.c new file mode 100644 index 000000000000..fd94058bd7f9 --- /dev/null +++ b/lib/lz4/lz4_compress.c @@ -0,0 +1,443 @@ +/* + * LZ4 - Fast LZ compression algorithm + * Copyright (C) 2011-2012, Yann Collet. + * BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) + + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * You can contact the author at : + * - LZ4 homepage : http://fastcompression.blogspot.com/p/lz4.html + * - LZ4 source repository : http://code.google.com/p/lz4/ + * + * Changed for kernel use by: + * Chanho Min + */ + +#include +#include +#include +#include +#include "lz4defs.h" + +/* + * LZ4_compressCtx : + * ----------------- + * Compress 'isize' bytes from 'source' into an output buffer 'dest' of + * maximum size 'maxOutputSize'. * If it cannot achieve it, compression + * will stop, and result of the function will be zero. + * return : the number of bytes written in buffer 'dest', or 0 if the + * compression fails + */ +static inline int lz4_compressctx(void *ctx, + const char *source, + char *dest, + int isize, + int maxoutputsize) +{ + HTYPE *hashtable = (HTYPE *)ctx; + const u8 *ip = (u8 *)source; +#if LZ4_ARCH64 + const BYTE * const base = ip; +#else + const int base = 0; +#endif + const u8 *anchor = ip; + const u8 *const iend = ip + isize; + const u8 *const mflimit = iend - MFLIMIT; + #define MATCHLIMIT (iend - LASTLITERALS) + + u8 *op = (u8 *) dest; + u8 *const oend = op + maxoutputsize; + int length; + const int skipstrength = SKIPSTRENGTH; + u32 forwardh; + int lastrun; + + /* Init */ + if (isize < MINLENGTH) + goto _last_literals; + + memset((void *)hashtable, 0, LZ4_MEM_COMPRESS); + + /* First Byte */ + hashtable[LZ4_HASH_VALUE(ip)] = ip - base; + ip++; + forwardh = LZ4_HASH_VALUE(ip); + + /* Main Loop */ + for (;;) { + int findmatchattempts = (1U << skipstrength) + 3; + const u8 *forwardip = ip; + const u8 *ref; + u8 *token; + + /* Find a match */ + do { + u32 h = forwardh; + int step = findmatchattempts++ >> skipstrength; + ip = forwardip; + forwardip = ip + step; + + if (unlikely(forwardip > mflimit)) + goto _last_literals; + + forwardh = LZ4_HASH_VALUE(forwardip); + ref = base + hashtable[h]; + hashtable[h] = ip - base; + } while ((ref < ip - MAX_DISTANCE) || (A32(ref) != A32(ip))); + + /* Catch up */ + while ((ip > anchor) && (ref > (u8 *)source) && + unlikely(ip[-1] == ref[-1])) { + ip--; + ref--; + } + + /* Encode Literal length */ + length = (int)(ip - anchor); + token = op++; + /* check output limit */ + if (unlikely(op + length + (2 + 1 + LASTLITERALS) + + (length >> 8) > oend)) + return 0; + + if (length >= (int)RUN_MASK) { + int len; + *token = (RUN_MASK << ML_BITS); + len = length - RUN_MASK; + for (; len > 254 ; len -= 255) + *op++ = 255; + *op++ = (u8)len; + } else + *token = (length << ML_BITS); + + /* Copy Literals */ + LZ4_BLINDCOPY(anchor, op, length); +_next_match: + /* Encode Offset */ + LZ4_WRITE_LITTLEENDIAN_16(op, (u16)(ip - ref)); + + /* Start Counting */ + ip += MINMATCH; + /* MinMatch verified */ + ref += MINMATCH; + anchor = ip; + while (likely(ip < MATCHLIMIT - (STEPSIZE - 1))) { + #if LZ4_ARCH64 + u64 diff = A64(ref) ^ A64(ip); + #else + u32 diff = A32(ref) ^ A32(ip); + #endif + if (!diff) { + ip += STEPSIZE; + ref += STEPSIZE; + continue; + } + ip += LZ4_NBCOMMONBYTES(diff); + goto _endcount; + } + #if LZ4_ARCH64 + if ((ip < (MATCHLIMIT - 3)) && (A32(ref) == A32(ip))) { + ip += 4; + ref += 4; + } + #endif + if ((ip < (MATCHLIMIT - 1)) && (A16(ref) == A16(ip))) { + ip += 2; + ref += 2; + } + if ((ip < MATCHLIMIT) && (*ref == *ip)) + ip++; +_endcount: + /* Encode MatchLength */ + length = (int)(ip - anchor); + /* Check output limit */ + if (unlikely(op + (1 + LASTLITERALS) + (length >> 8) > oend)) + return 0; + if (length >= (int)ML_MASK) { + *token += ML_MASK; + length -= ML_MASK; + for (; length > 509 ; length -= 510) { + *op++ = 255; + *op++ = 255; + } + if (length > 254) { + length -= 255; + *op++ = 255; + } + *op++ = (u8)length; + } else + *token += length; + + /* Test end of chunk */ + if (ip > mflimit) { + anchor = ip; + break; + } + + /* Fill table */ + hashtable[LZ4_HASH_VALUE(ip-2)] = ip - 2 - base; + + /* Test next position */ + ref = base + hashtable[LZ4_HASH_VALUE(ip)]; + hashtable[LZ4_HASH_VALUE(ip)] = ip - base; + if ((ref > ip - (MAX_DISTANCE + 1)) && (A32(ref) == A32(ip))) { + token = op++; + *token = 0; + goto _next_match; + } + + /* Prepare next loop */ + anchor = ip++; + forwardh = LZ4_HASH_VALUE(ip); + } + +_last_literals: + /* Encode Last Literals */ + lastrun = (int)(iend - anchor); + if (((char *)op - dest) + lastrun + 1 + + ((lastrun + 255 - RUN_MASK) / 255) > (u32)maxoutputsize) + return 0; + + if (lastrun >= (int)RUN_MASK) { + *op++ = (RUN_MASK << ML_BITS); + lastrun -= RUN_MASK; + for (; lastrun > 254 ; lastrun -= 255) + *op++ = 255; + *op++ = (u8)lastrun; + } else + *op++ = (lastrun << ML_BITS); + memcpy(op, anchor, iend - anchor); + op += iend - anchor; + + /* End */ + return (int)(((char *)op) - dest); +} + +static inline int lz4_compress64kctx(void *ctx, + const char *source, + char *dest, + int isize, + int maxoutputsize) +{ + u16 *hashtable = (u16 *)ctx; + const u8 *ip = (u8 *) source; + const u8 *anchor = ip; + const u8 *const base = ip; + const u8 *const iend = ip + isize; + const u8 *const mflimit = iend - MFLIMIT; + #define MATCHLIMIT (iend - LASTLITERALS) + + u8 *op = (u8 *) dest; + u8 *const oend = op + maxoutputsize; + int len, length; + const int skipstrength = SKIPSTRENGTH; + u32 forwardh; + int lastrun; + + /* Init */ + if (isize < MINLENGTH) + goto _last_literals; + + memset((void *)hashtable, 0, LZ4_MEM_COMPRESS); + + /* First Byte */ + ip++; + forwardh = LZ4_HASH64K_VALUE(ip); + + /* Main Loop */ + for (;;) { + int findmatchattempts = (1U << skipstrength) + 3; + const u8 *forwardip = ip; + const u8 *ref; + u8 *token; + + /* Find a match */ + do { + u32 h = forwardh; + int step = findmatchattempts++ >> skipstrength; + ip = forwardip; + forwardip = ip + step; + + if (forwardip > mflimit) + goto _last_literals; + + forwardh = LZ4_HASH64K_VALUE(forwardip); + ref = base + hashtable[h]; + hashtable[h] = (u16)(ip - base); + } while (A32(ref) != A32(ip)); + + /* Catch up */ + while ((ip > anchor) && (ref > (u8 *)source) + && (ip[-1] == ref[-1])) { + ip--; + ref--; + } + + /* Encode Literal length */ + length = (int)(ip - anchor); + token = op++; + /* Check output limit */ + if (unlikely(op + length + (2 + 1 + LASTLITERALS) + + (length >> 8) > oend)) + return 0; + if (length >= (int)RUN_MASK) { + *token = (RUN_MASK << ML_BITS); + len = length - RUN_MASK; + for (; len > 254 ; len -= 255) + *op++ = 255; + *op++ = (u8)len; + } else + *token = (length << ML_BITS); + + /* Copy Literals */ + LZ4_BLINDCOPY(anchor, op, length); + +_next_match: + /* Encode Offset */ + LZ4_WRITE_LITTLEENDIAN_16(op, (u16)(ip - ref)); + + /* Start Counting */ + ip += MINMATCH; + /* MinMatch verified */ + ref += MINMATCH; + anchor = ip; + + while (ip < MATCHLIMIT - (STEPSIZE - 1)) { + #if LZ4_ARCH64 + u64 diff = A64(ref) ^ A64(ip); + #else + u32 diff = A32(ref) ^ A32(ip); + #endif + + if (!diff) { + ip += STEPSIZE; + ref += STEPSIZE; + continue; + } + ip += LZ4_NBCOMMONBYTES(diff); + goto _endcount; + } + #if LZ4_ARCH64 + if ((ip < (MATCHLIMIT - 3)) && (A32(ref) == A32(ip))) { + ip += 4; + ref += 4; + } + #endif + if ((ip < (MATCHLIMIT - 1)) && (A16(ref) == A16(ip))) { + ip += 2; + ref += 2; + } + if ((ip < MATCHLIMIT) && (*ref == *ip)) + ip++; +_endcount: + + /* Encode MatchLength */ + len = (int)(ip - anchor); + /* Check output limit */ + if (unlikely(op + (1 + LASTLITERALS) + (len >> 8) > oend)) + return 0; + if (len >= (int)ML_MASK) { + *token += ML_MASK; + len -= ML_MASK; + for (; len > 509 ; len -= 510) { + *op++ = 255; + *op++ = 255; + } + if (len > 254) { + len -= 255; + *op++ = 255; + } + *op++ = (u8)len; + } else + *token += len; + + /* Test end of chunk */ + if (ip > mflimit) { + anchor = ip; + break; + } + + /* Fill table */ + hashtable[LZ4_HASH64K_VALUE(ip-2)] = (u16)(ip - 2 - base); + + /* Test next position */ + ref = base + hashtable[LZ4_HASH64K_VALUE(ip)]; + hashtable[LZ4_HASH64K_VALUE(ip)] = (u16)(ip - base); + if (A32(ref) == A32(ip)) { + token = op++; + *token = 0; + goto _next_match; + } + + /* Prepare next loop */ + anchor = ip++; + forwardh = LZ4_HASH64K_VALUE(ip); + } + +_last_literals: + /* Encode Last Literals */ + lastrun = (int)(iend - anchor); + if (op + lastrun + 1 + (lastrun - RUN_MASK + 255) / 255 > oend) + return 0; + if (lastrun >= (int)RUN_MASK) { + *op++ = (RUN_MASK << ML_BITS); + lastrun -= RUN_MASK; + for (; lastrun > 254 ; lastrun -= 255) + *op++ = 255; + *op++ = (u8)lastrun; + } else + *op++ = (lastrun << ML_BITS); + memcpy(op, anchor, iend - anchor); + op += iend - anchor; + /* End */ + return (int)(((char *)op) - dest); +} + +int lz4_compress(const unsigned char *src, size_t src_len, + unsigned char *dst, size_t *dst_len, void *wrkmem) +{ + int ret = -1; + int out_len = 0; + + if (src_len < LZ4_64KLIMIT) + out_len = lz4_compress64kctx(wrkmem, src, dst, src_len, + lz4_compressbound(src_len)); + else + out_len = lz4_compressctx(wrkmem, src, dst, src_len, + lz4_compressbound(src_len)); + + if (out_len < 0) + goto exit; + + *dst_len = out_len; + + return 0; +exit: + return ret; +} +EXPORT_SYMBOL_GPL(lz4_compress); + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("LZ4 compressor"); diff --git a/lib/lz4/lz4defs.h b/lib/lz4/lz4defs.h index 43ac31d63f36..abcecdc2d0f2 100644 --- a/lib/lz4/lz4defs.h +++ b/lib/lz4/lz4defs.h @@ -22,23 +22,40 @@ * Architecture-specific macros */ #define BYTE u8 +typedef struct _U16_S { u16 v; } U16_S; +typedef struct _U32_S { u32 v; } U32_S; +typedef struct _U64_S { u64 v; } U64_S; #if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) \ || defined(CONFIG_ARM) && __LINUX_ARM_ARCH__ >= 6 \ && defined(ARM_EFFICIENT_UNALIGNED_ACCESS) -typedef struct _U32_S { u32 v; } U32_S; -typedef struct _U64_S { u64 v; } U64_S; +#define A16(x) (((U16_S *)(x))->v) #define A32(x) (((U32_S *)(x))->v) #define A64(x) (((U64_S *)(x))->v) #define PUT4(s, d) (A32(d) = A32(s)) #define PUT8(s, d) (A64(d) = A64(s)) +#define LZ4_WRITE_LITTLEENDIAN_16(p, v) \ + do { \ + A16(p) = v; \ + p += 2; \ + } while (0) #else /* CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS */ +#define A64(x) get_unaligned((u64 *)&(((U16_S *)(x))->v)) +#define A32(x) get_unaligned((u32 *)&(((U16_S *)(x))->v)) +#define A16(x) get_unaligned((u16 *)&(((U16_S *)(x))->v)) + #define PUT4(s, d) \ put_unaligned(get_unaligned((const u32 *) s), (u32 *) d) #define PUT8(s, d) \ put_unaligned(get_unaligned((const u64 *) s), (u64 *) d) + +#define LZ4_WRITE_LITTLEENDIAN_16(p, v) \ + do { \ + put_unaligned(v, (u16 *)(p)); \ + p += 2; \ + } while (0) #endif #define COPYLENGTH 8 @@ -46,6 +63,29 @@ typedef struct _U64_S { u64 v; } U64_S; #define ML_MASK ((1U << ML_BITS) - 1) #define RUN_BITS (8 - ML_BITS) #define RUN_MASK ((1U << RUN_BITS) - 1) +#define MEMORY_USAGE 14 +#define MINMATCH 4 +#define SKIPSTRENGTH 6 +#define LASTLITERALS 5 +#define MFLIMIT (COPYLENGTH + MINMATCH) +#define MINLENGTH (MFLIMIT + 1) +#define MAXD_LOG 16 +#define MAXD (1 << MAXD_LOG) +#define MAXD_MASK (u32)(MAXD - 1) +#define MAX_DISTANCE (MAXD - 1) +#define HASH_LOG (MAXD_LOG - 1) +#define HASHTABLESIZE (1 << HASH_LOG) +#define MAX_NB_ATTEMPTS 256 +#define OPTIMAL_ML (int)((ML_MASK-1)+MINMATCH) +#define LZ4_64KLIMIT ((1<<16) + (MFLIMIT - 1)) +#define HASHLOG64K ((MEMORY_USAGE - 2) + 1) +#define HASH64KTABLESIZE (1U << HASHLOG64K) +#define LZ4_HASH_VALUE(p) (((A32(p)) * 2654435761U) >> \ + ((MINMATCH * 8) - (MEMORY_USAGE-2))) +#define LZ4_HASH64K_VALUE(p) (((A32(p)) * 2654435761U) >> \ + ((MINMATCH * 8) - HASHLOG64K)) +#define HASH_VALUE(p) (((A32(p)) * 2654435761U) >> \ + ((MINMATCH * 8) - HASH_LOG)) #if LZ4_ARCH64/* 64-bit */ #define STEPSIZE 8 @@ -65,6 +105,13 @@ typedef struct _U64_S { u64 v; } U64_S; LZ4_WILDCOPY(s, d, e); \ } \ } while (0) +#define HTYPE u32 + +#ifdef __BIG_ENDIAN +#define LZ4_NBCOMMONBYTES(val) (__builtin_clzll(val) >> 3) +#else +#define LZ4_NBCOMMONBYTES(val) (__builtin_ctzll(val) >> 3) +#endif #else /* 32-bit */ #define STEPSIZE 4 @@ -83,6 +130,14 @@ typedef struct _U64_S { u64 v; } U64_S; } while (0) #define LZ4_SECURECOPY LZ4_WILDCOPY +#define HTYPE const u8* + +#ifdef __BIG_ENDIAN +#define LZ4_NBCOMMONBYTES(val) (__builtin_clz(val) >> 3) +#else +#define LZ4_NBCOMMONBYTES(val) (__builtin_ctz(val) >> 3) +#endif + #endif #define LZ4_READ_LITTLEENDIAN_16(d, s, p) \ @@ -92,3 +147,10 @@ typedef struct _U64_S { u64 v; } U64_S; do { \ LZ4_COPYPACKET(s, d); \ } while (d < e) + +#define LZ4_BLINDCOPY(s, d, l) \ + do { \ + u8 *e = (d) + l; \ + LZ4_WILDCOPY(s, d, e); \ + d = e; \ + } while (0) diff --git a/lib/lz4/lz4hc_compress.c b/lib/lz4/lz4hc_compress.c new file mode 100644 index 000000000000..eb1a74f5e368 --- /dev/null +++ b/lib/lz4/lz4hc_compress.c @@ -0,0 +1,539 @@ +/* + * LZ4 HC - High Compression Mode of LZ4 + * Copyright (C) 2011-2012, Yann Collet. + * BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * You can contact the author at : + * - LZ4 homepage : http://fastcompression.blogspot.com/p/lz4.html + * - LZ4 source repository : http://code.google.com/p/lz4/ + * + * Changed for kernel use by: + * Chanho Min + */ + +#include +#include +#include +#include +#include "lz4defs.h" + +struct lz4hc_data { + const u8 *base; + HTYPE hashtable[HASHTABLESIZE]; + u16 chaintable[MAXD]; + const u8 *nexttoupdate; +} __attribute__((__packed__)); + +static inline int lz4hc_init(struct lz4hc_data *hc4, const u8 *base) +{ + memset((void *)hc4->hashtable, 0, sizeof(hc4->hashtable)); + memset(hc4->chaintable, 0xFF, sizeof(hc4->chaintable)); + +#if LZ4_ARCH64 + hc4->nexttoupdate = base + 1; +#else + hc4->nexttoupdate = base; +#endif + hc4->base = base; + return 1; +} + +/* Update chains up to ip (excluded) */ +static inline void lz4hc_insert(struct lz4hc_data *hc4, const u8 *ip) +{ + u16 *chaintable = hc4->chaintable; + HTYPE *hashtable = hc4->hashtable; +#if LZ4_ARCH64 + const BYTE * const base = hc4->base; +#else + const int base = 0; +#endif + + while (hc4->nexttoupdate < ip) { + const u8 *p = hc4->nexttoupdate; + size_t delta = p - (hashtable[HASH_VALUE(p)] + base); + if (delta > MAX_DISTANCE) + delta = MAX_DISTANCE; + chaintable[(size_t)(p) & MAXD_MASK] = (u16)delta; + hashtable[HASH_VALUE(p)] = (p) - base; + hc4->nexttoupdate++; + } +} + +static inline size_t lz4hc_commonlength(const u8 *p1, const u8 *p2, + const u8 *const matchlimit) +{ + const u8 *p1t = p1; + + while (p1t < matchlimit - (STEPSIZE - 1)) { +#if LZ4_ARCH64 + u64 diff = A64(p2) ^ A64(p1t); +#else + u32 diff = A32(p2) ^ A32(p1t); +#endif + if (!diff) { + p1t += STEPSIZE; + p2 += STEPSIZE; + continue; + } + p1t += LZ4_NBCOMMONBYTES(diff); + return p1t - p1; + } +#if LZ4_ARCH64 + if ((p1t < (matchlimit-3)) && (A32(p2) == A32(p1t))) { + p1t += 4; + p2 += 4; + } +#endif + + if ((p1t < (matchlimit - 1)) && (A16(p2) == A16(p1t))) { + p1t += 2; + p2 += 2; + } + if ((p1t < matchlimit) && (*p2 == *p1t)) + p1t++; + return p1t - p1; +} + +static inline int lz4hc_insertandfindbestmatch(struct lz4hc_data *hc4, + const u8 *ip, const u8 *const matchlimit, const u8 **matchpos) +{ + u16 *const chaintable = hc4->chaintable; + HTYPE *const hashtable = hc4->hashtable; + const u8 *ref; +#if LZ4_ARCH64 + const BYTE * const base = hc4->base; +#else + const int base = 0; +#endif + int nbattempts = MAX_NB_ATTEMPTS; + size_t repl = 0, ml = 0; + u16 delta; + + /* HC4 match finder */ + lz4hc_insert(hc4, ip); + ref = hashtable[HASH_VALUE(ip)] + base; + + /* potential repetition */ + if (ref >= ip-4) { + /* confirmed */ + if (A32(ref) == A32(ip)) { + delta = (u16)(ip-ref); + repl = ml = lz4hc_commonlength(ip + MINMATCH, + ref + MINMATCH, matchlimit) + MINMATCH; + *matchpos = ref; + } + ref -= (size_t)chaintable[(size_t)(ref) & MAXD_MASK]; + } + + while ((ref >= ip - MAX_DISTANCE) && nbattempts) { + nbattempts--; + if (*(ref + ml) == *(ip + ml)) { + if (A32(ref) == A32(ip)) { + size_t mlt = + lz4hc_commonlength(ip + MINMATCH, + ref + MINMATCH, matchlimit) + MINMATCH; + if (mlt > ml) { + ml = mlt; + *matchpos = ref; + } + } + } + ref -= (size_t)chaintable[(size_t)(ref) & MAXD_MASK]; + } + + /* Complete table */ + if (repl) { + const BYTE *ptr = ip; + const BYTE *end; + end = ip + repl - (MINMATCH-1); + /* Pre-Load */ + while (ptr < end - delta) { + chaintable[(size_t)(ptr) & MAXD_MASK] = delta; + ptr++; + } + do { + chaintable[(size_t)(ptr) & MAXD_MASK] = delta; + /* Head of chain */ + hashtable[HASH_VALUE(ptr)] = (ptr) - base; + ptr++; + } while (ptr < end); + hc4->nexttoupdate = end; + } + + return (int)ml; +} + +static inline int lz4hc_insertandgetwidermatch(struct lz4hc_data *hc4, + const u8 *ip, const u8 *startlimit, const u8 *matchlimit, int longest, + const u8 **matchpos, const u8 **startpos) +{ + u16 *const chaintable = hc4->chaintable; + HTYPE *const hashtable = hc4->hashtable; +#if LZ4_ARCH64 + const BYTE * const base = hc4->base; +#else + const int base = 0; +#endif + const u8 *ref; + int nbattempts = MAX_NB_ATTEMPTS; + int delta = (int)(ip - startlimit); + + /* First Match */ + lz4hc_insert(hc4, ip); + ref = hashtable[HASH_VALUE(ip)] + base; + + while ((ref >= ip - MAX_DISTANCE) && (ref >= hc4->base) + && (nbattempts)) { + nbattempts--; + if (*(startlimit + longest) == *(ref - delta + longest)) { + if (A32(ref) == A32(ip)) { + const u8 *reft = ref + MINMATCH; + const u8 *ipt = ip + MINMATCH; + const u8 *startt = ip; + + while (ipt < matchlimit-(STEPSIZE - 1)) { + #if LZ4_ARCH64 + u64 diff = A64(reft) ^ A64(ipt); + #else + u32 diff = A32(reft) ^ A32(ipt); + #endif + + if (!diff) { + ipt += STEPSIZE; + reft += STEPSIZE; + continue; + } + ipt += LZ4_NBCOMMONBYTES(diff); + goto _endcount; + } + #if LZ4_ARCH64 + if ((ipt < (matchlimit - 3)) + && (A32(reft) == A32(ipt))) { + ipt += 4; + reft += 4; + } + ipt += 2; + #endif + if ((ipt < (matchlimit - 1)) + && (A16(reft) == A16(ipt))) { + reft += 2; + } + if ((ipt < matchlimit) && (*reft == *ipt)) + ipt++; +_endcount: + reft = ref; + + while ((startt > startlimit) + && (reft > hc4->base) + && (startt[-1] == reft[-1])) { + startt--; + reft--; + } + + if ((ipt - startt) > longest) { + longest = (int)(ipt - startt); + *matchpos = reft; + *startpos = startt; + } + } + } + ref -= (size_t)chaintable[(size_t)(ref) & MAXD_MASK]; + } + return longest; +} + +static inline int lz4_encodesequence(const u8 **ip, u8 **op, const u8 **anchor, + int ml, const u8 *ref) +{ + int length, len; + u8 *token; + + /* Encode Literal length */ + length = (int)(*ip - *anchor); + token = (*op)++; + if (length >= (int)RUN_MASK) { + *token = (RUN_MASK << ML_BITS); + len = length - RUN_MASK; + for (; len > 254 ; len -= 255) + *(*op)++ = 255; + *(*op)++ = (u8)len; + } else + *token = (length << ML_BITS); + + /* Copy Literals */ + LZ4_BLINDCOPY(*anchor, *op, length); + + /* Encode Offset */ + LZ4_WRITE_LITTLEENDIAN_16(*op, (u16)(*ip - ref)); + + /* Encode MatchLength */ + len = (int)(ml - MINMATCH); + if (len >= (int)ML_MASK) { + *token += ML_MASK; + len -= ML_MASK; + for (; len > 509 ; len -= 510) { + *(*op)++ = 255; + *(*op)++ = 255; + } + if (len > 254) { + len -= 255; + *(*op)++ = 255; + } + *(*op)++ = (u8)len; + } else + *token += len; + + /* Prepare next loop */ + *ip += ml; + *anchor = *ip; + + return 0; +} + +static int lz4_compresshcctx(struct lz4hc_data *ctx, + const char *source, + char *dest, + int isize) +{ + const u8 *ip = (const u8 *)source; + const u8 *anchor = ip; + const u8 *const iend = ip + isize; + const u8 *const mflimit = iend - MFLIMIT; + const u8 *const matchlimit = (iend - LASTLITERALS); + + u8 *op = (u8 *)dest; + + int ml, ml2, ml3, ml0; + const u8 *ref = NULL; + const u8 *start2 = NULL; + const u8 *ref2 = NULL; + const u8 *start3 = NULL; + const u8 *ref3 = NULL; + const u8 *start0; + const u8 *ref0; + int lastrun; + + ip++; + + /* Main Loop */ + while (ip < mflimit) { + ml = lz4hc_insertandfindbestmatch(ctx, ip, matchlimit, (&ref)); + if (!ml) { + ip++; + continue; + } + + /* saved, in case we would skip too much */ + start0 = ip; + ref0 = ref; + ml0 = ml; +_search2: + if (ip+ml < mflimit) + ml2 = lz4hc_insertandgetwidermatch(ctx, ip + ml - 2, + ip + 1, matchlimit, ml, &ref2, &start2); + else + ml2 = ml; + /* No better match */ + if (ml2 == ml) { + lz4_encodesequence(&ip, &op, &anchor, ml, ref); + continue; + } + + if (start0 < ip) { + /* empirical */ + if (start2 < ip + ml0) { + ip = start0; + ref = ref0; + ml = ml0; + } + } + /* + * Here, start0==ip + * First Match too small : removed + */ + if ((start2 - ip) < 3) { + ml = ml2; + ip = start2; + ref = ref2; + goto _search2; + } + +_search3: + /* + * Currently we have : + * ml2 > ml1, and + * ip1+3 <= ip2 (usually < ip1+ml1) + */ + if ((start2 - ip) < OPTIMAL_ML) { + int correction; + int new_ml = ml; + if (new_ml > OPTIMAL_ML) + new_ml = OPTIMAL_ML; + if (ip + new_ml > start2 + ml2 - MINMATCH) + new_ml = (int)(start2 - ip) + ml2 - MINMATCH; + correction = new_ml - (int)(start2 - ip); + if (correction > 0) { + start2 += correction; + ref2 += correction; + ml2 -= correction; + } + } + /* + * Now, we have start2 = ip+new_ml, + * with new_ml=min(ml, OPTIMAL_ML=18) + */ + if (start2 + ml2 < mflimit) + ml3 = lz4hc_insertandgetwidermatch(ctx, + start2 + ml2 - 3, start2, matchlimit, + ml2, &ref3, &start3); + else + ml3 = ml2; + + /* No better match : 2 sequences to encode */ + if (ml3 == ml2) { + /* ip & ref are known; Now for ml */ + if (start2 < ip+ml) + ml = (int)(start2 - ip); + + /* Now, encode 2 sequences */ + lz4_encodesequence(&ip, &op, &anchor, ml, ref); + ip = start2; + lz4_encodesequence(&ip, &op, &anchor, ml2, ref2); + continue; + } + + /* Not enough space for match 2 : remove it */ + if (start3 < ip + ml + 3) { + /* + * can write Seq1 immediately ==> Seq2 is removed, + * so Seq3 becomes Seq1 + */ + if (start3 >= (ip + ml)) { + if (start2 < ip + ml) { + int correction = + (int)(ip + ml - start2); + start2 += correction; + ref2 += correction; + ml2 -= correction; + if (ml2 < MINMATCH) { + start2 = start3; + ref2 = ref3; + ml2 = ml3; + } + } + + lz4_encodesequence(&ip, &op, &anchor, ml, ref); + ip = start3; + ref = ref3; + ml = ml3; + + start0 = start2; + ref0 = ref2; + ml0 = ml2; + goto _search2; + } + + start2 = start3; + ref2 = ref3; + ml2 = ml3; + goto _search3; + } + + /* + * OK, now we have 3 ascending matches; let's write at least + * the first one ip & ref are known; Now for ml + */ + if (start2 < ip + ml) { + if ((start2 - ip) < (int)ML_MASK) { + int correction; + if (ml > OPTIMAL_ML) + ml = OPTIMAL_ML; + if (ip + ml > start2 + ml2 - MINMATCH) + ml = (int)(start2 - ip) + ml2 + - MINMATCH; + correction = ml - (int)(start2 - ip); + if (correction > 0) { + start2 += correction; + ref2 += correction; + ml2 -= correction; + } + } else + ml = (int)(start2 - ip); + } + lz4_encodesequence(&ip, &op, &anchor, ml, ref); + + ip = start2; + ref = ref2; + ml = ml2; + + start2 = start3; + ref2 = ref3; + ml2 = ml3; + + goto _search3; + } + + /* Encode Last Literals */ + lastrun = (int)(iend - anchor); + if (lastrun >= (int)RUN_MASK) { + *op++ = (RUN_MASK << ML_BITS); + lastrun -= RUN_MASK; + for (; lastrun > 254 ; lastrun -= 255) + *op++ = 255; + *op++ = (u8) lastrun; + } else + *op++ = (lastrun << ML_BITS); + memcpy(op, anchor, iend - anchor); + op += iend - anchor; + /* End */ + return (int) (((char *)op) - dest); +} + +int lz4hc_compress(const unsigned char *src, size_t src_len, + unsigned char *dst, size_t *dst_len, void *wrkmem) +{ + int ret = -1; + int out_len = 0; + + struct lz4hc_data *hc4 = (struct lz4hc_data *)wrkmem; + lz4hc_init(hc4, (const u8 *)src); + out_len = lz4_compresshcctx((struct lz4hc_data *)hc4, (const u8 *)src, + (char *)dst, (int)src_len); + + if (out_len < 0) + goto exit; + + *dst_len = out_len; + return 0; + +exit: + return ret; +} +EXPORT_SYMBOL_GPL(lz4hc_compress); + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("LZ4HC compressor"); From 6de155507ea389d7863f34fe53b41a86a59228bf Mon Sep 17 00:00:00 2001 From: Sergey Senozhatsky Date: Wed, 11 Sep 2013 14:26:32 -0700 Subject: [PATCH 208/277] lz4: fix compression/decompression signedness mismatch LZ4 compression and decompression functions require different in signedness input/output parameters: unsigned char for compression and signed char for decompression. Change decompression API to require "(const) unsigned char *". Signed-off-by: Sergey Senozhatsky Cc: Kyungsik Lee Cc: Geert Uytterhoeven Cc: Yann Collet Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds (cherry picked from commit b34081f1cd59585451efaa69e1dff1b9507e6c89) Signed-off-by: Alex Shi --- include/linux/lz4.h | 8 ++++---- lib/lz4/lz4_decompress.c | 8 ++++---- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/include/linux/lz4.h b/include/linux/lz4.h index d21c13f10a64..4356686b0a39 100644 --- a/include/linux/lz4.h +++ b/include/linux/lz4.h @@ -67,8 +67,8 @@ int lz4hc_compress(const unsigned char *src, size_t src_len, * note : Destination buffer must be already allocated. * slightly faster than lz4_decompress_unknownoutputsize() */ -int lz4_decompress(const char *src, size_t *src_len, char *dest, - size_t actual_dest_len); +int lz4_decompress(const unsigned char *src, size_t *src_len, + unsigned char *dest, size_t actual_dest_len); /* * lz4_decompress_unknownoutputsize() @@ -82,6 +82,6 @@ int lz4_decompress(const char *src, size_t *src_len, char *dest, * Error if return (< 0) * note : Destination buffer must be already allocated. */ -int lz4_decompress_unknownoutputsize(const char *src, size_t src_len, - char *dest, size_t *dest_len); +int lz4_decompress_unknownoutputsize(const unsigned char *src, size_t src_len, + unsigned char *dest, size_t *dest_len); #endif diff --git a/lib/lz4/lz4_decompress.c b/lib/lz4/lz4_decompress.c index dcc89753af65..677d1ea4833d 100644 --- a/lib/lz4/lz4_decompress.c +++ b/lib/lz4/lz4_decompress.c @@ -283,8 +283,8 @@ static int lz4_uncompress_unknownoutputsize(const char *source, char *dest, return (int) (-(((char *) ip) - source)); } -int lz4_decompress(const char *src, size_t *src_len, char *dest, - size_t actual_dest_len) +int lz4_decompress(const unsigned char *src, size_t *src_len, + unsigned char *dest, size_t actual_dest_len) { int ret = -1; int input_len = 0; @@ -302,8 +302,8 @@ int lz4_decompress(const char *src, size_t *src_len, char *dest, EXPORT_SYMBOL_GPL(lz4_decompress); #endif -int lz4_decompress_unknownoutputsize(const char *src, size_t src_len, - char *dest, size_t *dest_len) +int lz4_decompress_unknownoutputsize(const unsigned char *src, size_t src_len, + unsigned char *dest, size_t *dest_len) { int ret = -1; int out_len = 0; From ce58d51a0e9b366876270f03555c209917d3b80d Mon Sep 17 00:00:00 2001 From: Weijie Yang Date: Wed, 4 Jun 2014 16:11:06 -0700 Subject: [PATCH 209/277] zram: correct offset usage in zram_bio_discard We want to skip the physical block(PAGE_SIZE) which is partially covered by the discard bio, so we check the remaining size and subtract it if there is a need to goto the next physical block. The current offset usage in zram_bio_discard is incorrect, it will cause its upper filesystem breakdown. Consider the following scenario: On some architecture or config, PAGE_SIZE is 64K for example, filesystem is set up on zram disk without PAGE_SIZE aligned, a discard bio leads to a offset = 4K and size=72K, normally, it should not really discard any physical block as it partially cover two physical blocks. However, with the current offset usage, it will discard the second physical block and free its memory, which will cause filesystem breakdown. This patch corrects the offset usage in zram_bio_discard. Signed-off-by: Weijie Yang Cc: Minchan Kim Cc: Nitin Gupta Acked-by: Joonsoo Kim Cc: Sergey Senozhatsky Cc: Bob Liu Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds (cherry picked from commit 38515c73398a4c58059ecf1087e844561b58ee0f) Signed-off-by: Alex Shi --- drivers/block/zram/zram_drv.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index 19cf51ad48ef..efb6ff2a3735 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -571,10 +571,10 @@ static void zram_bio_discard(struct zram *zram, u32 index, * skipping this logical block is appropriate here. */ if (offset) { - if (n < offset) + if (n <= (PAGE_SIZE - offset)) return; - n -= offset; + n -= (PAGE_SIZE - offset); index++; } From c398e6a3df7562a1a70e2ad487dcda2c3cb635e9 Mon Sep 17 00:00:00 2001 From: Weijie Yang Date: Wed, 4 Jun 2014 16:11:08 -0700 Subject: [PATCH 210/277] zsmalloc: fixup trivial zs size classes value in comments According to calculation, ZS_SIZE_CLASSES value is 255 on systems with 4K page size, not 254. The old value may forget count the ZS_MIN_ALLOC_SIZE in. This patch fixes this trivial issue in the comments. Signed-off-by: Weijie Yang Cc: Minchan Kim Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds (cherry picked from commit 7eb52512a977854eca51d9b692c2f3be8a0e5eeb) Signed-off-by: Alex Shi --- mm/zsmalloc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c index b728f10e353b..cee5c54c500f 100644 --- a/mm/zsmalloc.c +++ b/mm/zsmalloc.c @@ -129,7 +129,7 @@ #define ZS_MAX_ALLOC_SIZE PAGE_SIZE /* - * On systems with 4K page size, this gives 254 size classes! There is a + * On systems with 4K page size, this gives 255 size classes! There is a * trader-off here: * - Large number of size classes is potentially wasteful as free page are * spread across these classes From 888210267e226f437abb7fd0604530032a98cafa Mon Sep 17 00:00:00 2001 From: Minchan Kim Date: Wed, 2 Jul 2014 15:22:36 -0700 Subject: [PATCH 211/277] zram: revalidate disk after capacity change Alexander reported mkswap on /dev/zram0 is failed if other process is opening the block device file. Step is as follows, 0. Reset the unused zram device. 1. Use a program that opens /dev/zram0 with O_RDWR and sleeps until killed. 2. While that program sleeps, echo the correct value to /sys/block/zram0/disksize. 3. Verify (e.g. in /proc/partitions) that the disk size is applied correctly. It is. 4. While that program still sleeps, attempt to mkswap /dev/zram0. This fails: mkswap: error: swap area needs to be at least 40 KiB When I investigated, the size get by ioctl(fd, BLKGETSIZE64, xxx) on mkswap to get a size of blockdev was zero although zram0 has right size by 2. The reason is zram didn't revalidate disk after changing capacity so that size of blockdev's inode is not uptodate until all of file is close. This patch should fix the BUG. Signed-off-by: Minchan Kim Reported-by: Alexander E. Patrakov Tested-by: Alexander E. Patrakov Reviewed-by: Sergey Senozhatsky Cc: Nitin Gupta Acked-by: Jerome Marchand Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds (cherry picked from commit 2e32baea46ce542c561a519414c840295b229c8f) Signed-off-by: Alex Shi --- drivers/block/zram/zram_drv.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index efb6ff2a3735..1e14825bd02b 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -621,8 +621,10 @@ static void zram_reset_device(struct zram *zram, bool reset_capacity) memset(&zram->stats, 0, sizeof(zram->stats)); zram->disksize = 0; - if (reset_capacity) + if (reset_capacity) { set_capacity(zram->disk, 0); + revalidate_disk(zram->disk); + } up_write(&zram->init_lock); } @@ -663,6 +665,7 @@ static ssize_t disksize_store(struct device *dev, zram->comp = comp; zram->disksize = disksize; set_capacity(zram->disk, zram->disksize >> SECTOR_SHIFT); + revalidate_disk(zram->disk); up_write(&zram->init_lock); return len; From d022fbe2a0a481cb0d0c41fcacc47d62c3a16581 Mon Sep 17 00:00:00 2001 From: Minchan Kim Date: Wed, 23 Jul 2014 14:00:04 -0700 Subject: [PATCH 212/277] zram: avoid lockdep splat by revalidate_disk Sasha reported lockdep warning [1] introduced by [2]. It could be fixed by doing disk revalidation out of the init_lock. It's okay because disk capacity change is protected by init_lock so that revalidate_disk always sees up-to-date value so there is no race. [1] https://lkml.org/lkml/2014/7/3/735 [2] zram: revalidate disk after capacity change Fixes 2e32baea46ce ("zram: revalidate disk after capacity change"). Signed-off-by: Minchan Kim Reported-by: Sasha Levin Cc: "Alexander E. Patrakov" Cc: Nitin Gupta Cc: Jerome Marchand Cc: Sergey Senozhatsky CC: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds (cherry picked from commit b4c5c60920e3b0c4598f43e7317559f6aec51531) Signed-off-by: Alex Shi --- drivers/block/zram/zram_drv.c | 22 ++++++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index 1e14825bd02b..674b8517694d 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -621,11 +621,18 @@ static void zram_reset_device(struct zram *zram, bool reset_capacity) memset(&zram->stats, 0, sizeof(zram->stats)); zram->disksize = 0; - if (reset_capacity) { + if (reset_capacity) set_capacity(zram->disk, 0); - revalidate_disk(zram->disk); - } + up_write(&zram->init_lock); + + /* + * Revalidate disk out of the init_lock to avoid lockdep splat. + * It's okay because disk's capacity is protected by init_lock + * so that revalidate_disk always sees up-to-date capacity. + */ + if (reset_capacity) + revalidate_disk(zram->disk); } static ssize_t disksize_store(struct device *dev, @@ -665,8 +672,15 @@ static ssize_t disksize_store(struct device *dev, zram->comp = comp; zram->disksize = disksize; set_capacity(zram->disk, zram->disksize >> SECTOR_SHIFT); - revalidate_disk(zram->disk); up_write(&zram->init_lock); + + /* + * Revalidate disk out of the init_lock to avoid lockdep splat. + * It's okay because disk's capacity is protected by init_lock + * so that revalidate_disk always sees up-to-date capacity. + */ + revalidate_disk(zram->disk); + return len; out_destroy_comp: From e11cb6668f1a349250873337858a502e2ab73c8e Mon Sep 17 00:00:00 2001 From: Sergey Senozhatsky Date: Wed, 6 Aug 2014 16:08:25 -0700 Subject: [PATCH 213/277] zram: rename struct `table' to `zram_table_entry' Andrew Morton has recently noted that `struct table' actually represents table entry and, thus, should be renamed. Rename to `zram_table_entry'. Signed-off-by: Sergey Senozhatsky Cc: Minchan Kim Cc: Nitin Gupta Cc: Weijie Yang Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds (cherry picked from commit cb8f2eec3c5c87e31219c5e58625b8e890004e48) Signed-off-by: Alex Shi --- drivers/block/zram/zram_drv.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/block/zram/zram_drv.h b/drivers/block/zram/zram_drv.h index 7f21c145e317..8909f86caf0d 100644 --- a/drivers/block/zram/zram_drv.h +++ b/drivers/block/zram/zram_drv.h @@ -62,7 +62,7 @@ enum zram_pageflags { /*-- Data structures */ /* Allocated for each disk page */ -struct table { +struct zram_table_entry { unsigned long handle; u16 size; /* object size (excluding header) */ u8 flags; @@ -82,7 +82,7 @@ struct zram_stats { struct zram_meta { rwlock_t tb_lock; /* protect table */ - struct table *table; + struct zram_table_entry *table; struct zs_pool *mem_pool; }; From 61febc60cc77c7e06ec7d457f21fc9ce9885f7e8 Mon Sep 17 00:00:00 2001 From: Sergey Senozhatsky Date: Wed, 6 Aug 2014 16:08:27 -0700 Subject: [PATCH 214/277] zram: remove unused SECTOR_SIZE define Drop SECTOR_SIZE define, because it's not used. Signed-off-by: Sergey Senozhatsky Cc: Minchan Kim Cc: Nitin Gupta Cc: Weijie Yang Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds (cherry picked from commit a830eff749eb2bf906783f6bf74a74dad3de3aea) Signed-off-by: Alex Shi --- drivers/block/zram/zram_drv.h | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/block/zram/zram_drv.h b/drivers/block/zram/zram_drv.h index 8909f86caf0d..c8161bd8969c 100644 --- a/drivers/block/zram/zram_drv.h +++ b/drivers/block/zram/zram_drv.h @@ -43,7 +43,6 @@ static const size_t max_zpage_size = PAGE_SIZE / 4 * 3; /*-- End of configurable params */ #define SECTOR_SHIFT 9 -#define SECTOR_SIZE (1 << SECTOR_SHIFT) #define SECTORS_PER_PAGE_SHIFT (PAGE_SHIFT - SECTOR_SHIFT) #define SECTORS_PER_PAGE (1 << SECTORS_PER_PAGE_SHIFT) #define ZRAM_LOGICAL_BLOCK_SHIFT 12 From 244e41f94febd73d877a397a05e626563ebd2ceb Mon Sep 17 00:00:00 2001 From: Minchan Kim Date: Wed, 6 Aug 2014 16:08:29 -0700 Subject: [PATCH 215/277] zram: use size_t instead of u16 Some architectures (eg, hexagon and PowerPC) could use PAGE_SHIFT of 16 or more. In these cases u16 is not sufficiently large to represent a compressed page's size so use size_t. Signed-off-by: Minchan Kim Reported-by: Weijie Yang Acked-by: Sergey Senozhatsky Cc: Jerome Marchand Cc: Nitin Gupta Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds (cherry picked from commit 023b409f9dac4cdea3322009f2e592068558690c) Signed-off-by: Alex Shi --- drivers/block/zram/zram_drv.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index 674b8517694d..efd9e4f1605c 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -336,7 +336,7 @@ static int zram_decompress_page(struct zram *zram, char *mem, u32 index) unsigned char *cmem; struct zram_meta *meta = zram->meta; unsigned long handle; - u16 size; + size_t size; read_lock(&meta->tb_lock); handle = meta->table[index].handle; From 3a4d2c69adaee7391fcdbc782913629df2e49726 Mon Sep 17 00:00:00 2001 From: Weijie Yang Date: Wed, 6 Aug 2014 16:08:31 -0700 Subject: [PATCH 216/277] zram: replace global tb_lock with fine grain lock Currently, we use a rwlock tb_lock to protect concurrent access to the whole zram meta table. However, according to the actual access model, there is only a small chance for upper user to access the same table[index], so the current lock granularity is too big. The idea of optimization is to change the lock granularity from whole meta table to per table entry (table -> table[index]), so that we can protect concurrent access to the same table[index], meanwhile allow the maximum concurrency. With this in mind, several kinds of locks which could be used as a per-entry lock were tested and compared: Test environment: x86-64 Intel Core2 Q8400, system memory 4GB, Ubuntu 12.04, kernel v3.15.0-rc3 as base, zram with 4 max_comp_streams LZO. iozone test: iozone -t 4 -R -r 16K -s 200M -I +Z (1GB zram with ext4 filesystem, take the average of 10 tests, KB/s) Test base CAS spinlock rwlock bit_spinlock ------------------------------------------------------------------- Initial write 1381094 1425435 1422860 1423075 1421521 Rewrite 1529479 1641199 1668762 1672855 1654910 Read 8468009 11324979 11305569 11117273 10997202 Re-read 8467476 11260914 11248059 11145336 10906486 Reverse Read 6821393 8106334 8282174 8279195 8109186 Stride read 7191093 8994306 9153982 8961224 9004434 Random read 7156353 8957932 9167098 8980465 8940476 Mixed workload 4172747 5680814 5927825 5489578 5972253 Random write 1483044 1605588 1594329 1600453 1596010 Pwrite 1276644 1303108 1311612 1314228 1300960 Pread 4324337 4632869 4618386 4457870 4500166 To enhance the possibility of access the same table[index] concurrently, set zram a small disksize(10MB) and let threads run with large loop count. fio test: fio --bs=32k --randrepeat=1 --randseed=100 --refill_buffers --scramble_buffers=1 --direct=1 --loops=3000 --numjobs=4 --filename=/dev/zram0 --name=seq-write --rw=write --stonewall --name=seq-read --rw=read --stonewall --name=seq-readwrite --rw=rw --stonewall --name=rand-readwrite --rw=randrw --stonewall (10MB zram raw block device, take the average of 10 tests, KB/s) Test base CAS spinlock rwlock bit_spinlock ------------------------------------------------------------- seq-write 933789 999357 1003298 995961 1001958 seq-read 5634130 6577930 6380861 6243912 6230006 seq-rw 1405687 1638117 1640256 1633903 1634459 rand-rw 1386119 1614664 1617211 1609267 1612471 All the optimization methods show a higher performance than the base, however, it is hard to say which method is the most appropriate. On the other hand, zram is mostly used on small embedded system, so we don't want to increase any memory footprint. This patch pick the bit_spinlock method, pack object size and page_flag into an unsigned long table.value, so as to not increase any memory overhead on both 32-bit and 64-bit system. On the third hand, even though different kinds of locks have different performances, we can ignore this difference, because: if zram is used as zram swapfile, the swap subsystem can prevent concurrent access to the same swapslot; if zram is used as zram-blk for set up filesystem on it, the upper filesystem and the page cache also prevent concurrent access of the same block mostly. So we can ignore the different performances among locks. Acked-by: Sergey Senozhatsky Reviewed-by: Davidlohr Bueso Signed-off-by: Weijie Yang Signed-off-by: Minchan Kim Cc: Jerome Marchand Cc: Nitin Gupta Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds (cherry picked from commit d2d5e762c8990c4031890e03565983a05febd64a) Signed-off-by: Alex Shi Conflicts: drivers/block/zram/zram_drv.c Conflicts solution: using old bio struct --- drivers/block/zram/zram_drv.c | 69 +++++++++++++++++++++-------------- drivers/block/zram/zram_drv.h | 24 +++++++++--- 2 files changed, 60 insertions(+), 33 deletions(-) diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index efd9e4f1605c..a21f466c4e15 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -183,19 +183,32 @@ static ssize_t comp_algorithm_store(struct device *dev, static int zram_test_flag(struct zram_meta *meta, u32 index, enum zram_pageflags flag) { - return meta->table[index].flags & BIT(flag); + return meta->table[index].value & BIT(flag); } static void zram_set_flag(struct zram_meta *meta, u32 index, enum zram_pageflags flag) { - meta->table[index].flags |= BIT(flag); + meta->table[index].value |= BIT(flag); } static void zram_clear_flag(struct zram_meta *meta, u32 index, enum zram_pageflags flag) { - meta->table[index].flags &= ~BIT(flag); + meta->table[index].value &= ~BIT(flag); +} + +static size_t zram_get_obj_size(struct zram_meta *meta, u32 index) +{ + return meta->table[index].value & (BIT(ZRAM_FLAG_SHIFT) - 1); +} + +static void zram_set_obj_size(struct zram_meta *meta, + u32 index, size_t size) +{ + unsigned long flags = meta->table[index].value >> ZRAM_FLAG_SHIFT; + + meta->table[index].value = (flags << ZRAM_FLAG_SHIFT) | size; } static inline int is_partial_io(struct bio_vec *bvec) @@ -254,7 +267,6 @@ static struct zram_meta *zram_meta_alloc(u64 disksize) goto free_table; } - rwlock_init(&meta->tb_lock); return meta; free_table: @@ -303,7 +315,12 @@ static void handle_zero_page(struct bio_vec *bvec) flush_dcache_page(page); } -/* NOTE: caller should hold meta->tb_lock with write-side */ + +/* + * To protect concurrent access to the same index entry, + * caller should hold this table index entry's bit_spinlock to + * indicate this index entry is accessing. + */ static void zram_free_page(struct zram *zram, size_t index) { struct zram_meta *meta = zram->meta; @@ -323,11 +340,12 @@ static void zram_free_page(struct zram *zram, size_t index) zs_free(meta->mem_pool, handle); - atomic64_sub(meta->table[index].size, &zram->stats.compr_data_size); + atomic64_sub(zram_get_obj_size(meta, index), + &zram->stats.compr_data_size); atomic64_dec(&zram->stats.pages_stored); meta->table[index].handle = 0; - meta->table[index].size = 0; + zram_set_obj_size(meta, index, 0); } static int zram_decompress_page(struct zram *zram, char *mem, u32 index) @@ -338,12 +356,12 @@ static int zram_decompress_page(struct zram *zram, char *mem, u32 index) unsigned long handle; size_t size; - read_lock(&meta->tb_lock); + bit_spin_lock(ZRAM_ACCESS, &meta->table[index].value); handle = meta->table[index].handle; - size = meta->table[index].size; + size = zram_get_obj_size(meta, index); if (!handle || zram_test_flag(meta, index, ZRAM_ZERO)) { - read_unlock(&meta->tb_lock); + bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value); clear_page(mem); return 0; } @@ -354,7 +372,7 @@ static int zram_decompress_page(struct zram *zram, char *mem, u32 index) else ret = zcomp_decompress(zram->comp, cmem, size, mem); zs_unmap_object(meta->mem_pool, handle); - read_unlock(&meta->tb_lock); + bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value); /* Should NEVER happen. Return bio error if it does. */ if (unlikely(ret)) { @@ -375,14 +393,14 @@ static int zram_bvec_read(struct zram *zram, struct bio_vec *bvec, struct zram_meta *meta = zram->meta; page = bvec->bv_page; - read_lock(&meta->tb_lock); + bit_spin_lock(ZRAM_ACCESS, &meta->table[index].value); if (unlikely(!meta->table[index].handle) || zram_test_flag(meta, index, ZRAM_ZERO)) { - read_unlock(&meta->tb_lock); + bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value); handle_zero_page(bvec); return 0; } - read_unlock(&meta->tb_lock); + bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value); if (is_partial_io(bvec)) /* Use a temporary buffer to decompress the page */ @@ -460,10 +478,10 @@ static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec, u32 index, if (page_zero_filled(uncmem)) { kunmap_atomic(user_mem); /* Free memory associated with this sector now. */ - write_lock(&zram->meta->tb_lock); + bit_spin_lock(ZRAM_ACCESS, &meta->table[index].value); zram_free_page(zram, index); zram_set_flag(meta, index, ZRAM_ZERO); - write_unlock(&zram->meta->tb_lock); + bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value); atomic64_inc(&zram->stats.zero_pages); ret = 0; @@ -513,12 +531,12 @@ static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec, u32 index, * Free memory associated with this sector * before overwriting unused sectors. */ - write_lock(&zram->meta->tb_lock); + bit_spin_lock(ZRAM_ACCESS, &meta->table[index].value); zram_free_page(zram, index); meta->table[index].handle = handle; - meta->table[index].size = clen; - write_unlock(&zram->meta->tb_lock); + zram_set_obj_size(meta, index, clen); + bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value); /* Update stats */ atomic64_add(clen, &zram->stats.compr_data_size); @@ -559,6 +577,7 @@ static void zram_bio_discard(struct zram *zram, u32 index, int offset, struct bio *bio) { size_t n = bio->bi_size; + struct zram_meta *meta = zram->meta; /* * zram manages data in physical block size units. Because logical block @@ -579,13 +598,9 @@ static void zram_bio_discard(struct zram *zram, u32 index, } while (n >= PAGE_SIZE) { - /* - * Discard request can be large so the lock hold times could be - * lengthy. So take the lock once per page. - */ - write_lock(&zram->meta->tb_lock); + bit_spin_lock(ZRAM_ACCESS, &meta->table[index].value); zram_free_page(zram, index); - write_unlock(&zram->meta->tb_lock); + bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value); index++; n -= PAGE_SIZE; } @@ -819,9 +834,9 @@ static void zram_slot_free_notify(struct block_device *bdev, zram = bdev->bd_disk->private_data; meta = zram->meta; - write_lock(&meta->tb_lock); + bit_spin_lock(ZRAM_ACCESS, &meta->table[index].value); zram_free_page(zram, index); - write_unlock(&meta->tb_lock); + bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value); atomic64_inc(&zram->stats.notify_free); } diff --git a/drivers/block/zram/zram_drv.h b/drivers/block/zram/zram_drv.h index c8161bd8969c..5b0afde729cd 100644 --- a/drivers/block/zram/zram_drv.h +++ b/drivers/block/zram/zram_drv.h @@ -50,10 +50,24 @@ static const size_t max_zpage_size = PAGE_SIZE / 4 * 3; #define ZRAM_SECTOR_PER_LOGICAL_BLOCK \ (1 << (ZRAM_LOGICAL_BLOCK_SHIFT - SECTOR_SHIFT)) -/* Flags for zram pages (table[page_no].flags) */ + +/* + * The lower ZRAM_FLAG_SHIFT bits of table.value is for + * object size (excluding header), the higher bits is for + * zram_pageflags. + * + * zram is mainly used for memory efficiency so we want to keep memory + * footprint small so we can squeeze size and flags into a field. + * The lower ZRAM_FLAG_SHIFT bits is for object size (excluding header), + * the higher bits is for zram_pageflags. + */ +#define ZRAM_FLAG_SHIFT 24 + +/* Flags for zram pages (table[page_no].value) */ enum zram_pageflags { /* Page consists entirely of zeros */ - ZRAM_ZERO, + ZRAM_ZERO = ZRAM_FLAG_SHIFT + 1, + ZRAM_ACCESS, /* page in now accessed */ __NR_ZRAM_PAGEFLAGS, }; @@ -63,9 +77,8 @@ enum zram_pageflags { /* Allocated for each disk page */ struct zram_table_entry { unsigned long handle; - u16 size; /* object size (excluding header) */ - u8 flags; -} __aligned(4); + unsigned long value; +}; struct zram_stats { atomic64_t compr_data_size; /* compressed size of pages stored */ @@ -80,7 +93,6 @@ struct zram_stats { }; struct zram_meta { - rwlock_t tb_lock; /* protect table */ struct zram_table_entry *table; struct zs_pool *mem_pool; }; From 301b07c9f41b4f4111ac53336647788cd89af76d Mon Sep 17 00:00:00 2001 From: Dan Streetman Date: Wed, 6 Aug 2014 16:08:36 -0700 Subject: [PATCH 217/277] mm/zpool: implement common zpool api to zbud/zsmalloc Add zpool api. zpool provides an interface for memory storage, typically of compressed memory. Users can select what backend to use; currently the only implementations are zbud, a low density implementation with up to two compressed pages per storage page, and zsmalloc, a higher density implementation with multiple compressed pages per storage page. Signed-off-by: Dan Streetman Tested-by: Seth Jennings Cc: Minchan Kim Cc: Nitin Gupta Cc: Weijie Yang Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds (cherry picked from commit af8d417a04564bca0348e7e3c749ab12a3e837ad) Signed-off-by: Alex Shi Conflicts: mm/Kconfig mm/Makefile --- include/linux/zpool.h | 106 ++++++++++++ mm/Kconfig | 7 + mm/Makefile | 1 + mm/zpool.c | 364 ++++++++++++++++++++++++++++++++++++++++++ mm/zsmalloc.c | 1 - 5 files changed, 478 insertions(+), 1 deletion(-) create mode 100644 include/linux/zpool.h create mode 100644 mm/zpool.c diff --git a/include/linux/zpool.h b/include/linux/zpool.h new file mode 100644 index 000000000000..f14bd75f08b3 --- /dev/null +++ b/include/linux/zpool.h @@ -0,0 +1,106 @@ +/* + * zpool memory storage api + * + * Copyright (C) 2014 Dan Streetman + * + * This is a common frontend for the zbud and zsmalloc memory + * storage pool implementations. Typically, this is used to + * store compressed memory. + */ + +#ifndef _ZPOOL_H_ +#define _ZPOOL_H_ + +struct zpool; + +struct zpool_ops { + int (*evict)(struct zpool *pool, unsigned long handle); +}; + +/* + * Control how a handle is mapped. It will be ignored if the + * implementation does not support it. Its use is optional. + * Note that this does not refer to memory protection, it + * refers to how the memory will be copied in/out if copying + * is necessary during mapping; read-write is the safest as + * it copies the existing memory in on map, and copies the + * changed memory back out on unmap. Write-only does not copy + * in the memory and should only be used for initialization. + * If in doubt, use ZPOOL_MM_DEFAULT which is read-write. + */ +enum zpool_mapmode { + ZPOOL_MM_RW, /* normal read-write mapping */ + ZPOOL_MM_RO, /* read-only (no copy-out at unmap time) */ + ZPOOL_MM_WO, /* write-only (no copy-in at map time) */ + + ZPOOL_MM_DEFAULT = ZPOOL_MM_RW +}; + +struct zpool *zpool_create_pool(char *type, gfp_t gfp, struct zpool_ops *ops); + +char *zpool_get_type(struct zpool *pool); + +void zpool_destroy_pool(struct zpool *pool); + +int zpool_malloc(struct zpool *pool, size_t size, gfp_t gfp, + unsigned long *handle); + +void zpool_free(struct zpool *pool, unsigned long handle); + +int zpool_shrink(struct zpool *pool, unsigned int pages, + unsigned int *reclaimed); + +void *zpool_map_handle(struct zpool *pool, unsigned long handle, + enum zpool_mapmode mm); + +void zpool_unmap_handle(struct zpool *pool, unsigned long handle); + +u64 zpool_get_total_size(struct zpool *pool); + + +/** + * struct zpool_driver - driver implementation for zpool + * @type: name of the driver. + * @list: entry in the list of zpool drivers. + * @create: create a new pool. + * @destroy: destroy a pool. + * @malloc: allocate mem from a pool. + * @free: free mem from a pool. + * @shrink: shrink the pool. + * @map: map a handle. + * @unmap: unmap a handle. + * @total_size: get total size of a pool. + * + * This is created by a zpool implementation and registered + * with zpool. + */ +struct zpool_driver { + char *type; + struct module *owner; + atomic_t refcount; + struct list_head list; + + void *(*create)(gfp_t gfp, struct zpool_ops *ops); + void (*destroy)(void *pool); + + int (*malloc)(void *pool, size_t size, gfp_t gfp, + unsigned long *handle); + void (*free)(void *pool, unsigned long handle); + + int (*shrink)(void *pool, unsigned int pages, + unsigned int *reclaimed); + + void *(*map)(void *pool, unsigned long handle, + enum zpool_mapmode mm); + void (*unmap)(void *pool, unsigned long handle); + + u64 (*total_size)(void *pool); +}; + +void zpool_register_driver(struct zpool_driver *driver); + +int zpool_unregister_driver(struct zpool_driver *driver); + +int zpool_evict(void *pool, unsigned long handle); + +#endif diff --git a/mm/Kconfig b/mm/Kconfig index 86919079b64c..ac85efdeab45 100644 --- a/mm/Kconfig +++ b/mm/Kconfig @@ -478,6 +478,13 @@ config FRONTSWAP If unsure, say Y to enable frontswap. +config ZPOOL + tristate "Common API for compressed memory storage" + default n + help + Compressed memory storage API. This allows using either zbud or + zsmalloc. + config ZSMALLOC bool "Memory allocator for compressed pages" depends on MMU diff --git a/mm/Makefile b/mm/Makefile index b5ae0b0cc26c..fa2e7df37b85 100644 --- a/mm/Makefile +++ b/mm/Makefile @@ -58,4 +58,5 @@ obj-$(CONFIG_DEBUG_KMEMLEAK) += kmemleak.o obj-$(CONFIG_DEBUG_KMEMLEAK_TEST) += kmemleak-test.o obj-$(CONFIG_CLEANCACHE) += cleancache.o obj-$(CONFIG_MEMORY_ISOLATION) += page_isolation.o +obj-$(CONFIG_ZPOOL) += zpool.o obj-$(CONFIG_ZSMALLOC) += zsmalloc.o diff --git a/mm/zpool.c b/mm/zpool.c new file mode 100644 index 000000000000..e40612a1df00 --- /dev/null +++ b/mm/zpool.c @@ -0,0 +1,364 @@ +/* + * zpool memory storage api + * + * Copyright (C) 2014 Dan Streetman + * + * This is a common frontend for memory storage pool implementations. + * Typically, this is used to store compressed memory. + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include +#include +#include +#include +#include +#include +#include + +struct zpool { + char *type; + + struct zpool_driver *driver; + void *pool; + struct zpool_ops *ops; + + struct list_head list; +}; + +static LIST_HEAD(drivers_head); +static DEFINE_SPINLOCK(drivers_lock); + +static LIST_HEAD(pools_head); +static DEFINE_SPINLOCK(pools_lock); + +/** + * zpool_register_driver() - register a zpool implementation. + * @driver: driver to register + */ +void zpool_register_driver(struct zpool_driver *driver) +{ + spin_lock(&drivers_lock); + atomic_set(&driver->refcount, 0); + list_add(&driver->list, &drivers_head); + spin_unlock(&drivers_lock); +} +EXPORT_SYMBOL(zpool_register_driver); + +/** + * zpool_unregister_driver() - unregister a zpool implementation. + * @driver: driver to unregister. + * + * Module usage counting is used to prevent using a driver + * while/after unloading, so if this is called from module + * exit function, this should never fail; if called from + * other than the module exit function, and this returns + * failure, the driver is in use and must remain available. + */ +int zpool_unregister_driver(struct zpool_driver *driver) +{ + int ret = 0, refcount; + + spin_lock(&drivers_lock); + refcount = atomic_read(&driver->refcount); + WARN_ON(refcount < 0); + if (refcount > 0) + ret = -EBUSY; + else + list_del(&driver->list); + spin_unlock(&drivers_lock); + + return ret; +} +EXPORT_SYMBOL(zpool_unregister_driver); + +/** + * zpool_evict() - evict callback from a zpool implementation. + * @pool: pool to evict from. + * @handle: handle to evict. + * + * This can be used by zpool implementations to call the + * user's evict zpool_ops struct evict callback. + */ +int zpool_evict(void *pool, unsigned long handle) +{ + struct zpool *zpool; + + spin_lock(&pools_lock); + list_for_each_entry(zpool, &pools_head, list) { + if (zpool->pool == pool) { + spin_unlock(&pools_lock); + if (!zpool->ops || !zpool->ops->evict) + return -EINVAL; + return zpool->ops->evict(zpool, handle); + } + } + spin_unlock(&pools_lock); + + return -ENOENT; +} +EXPORT_SYMBOL(zpool_evict); + +static struct zpool_driver *zpool_get_driver(char *type) +{ + struct zpool_driver *driver; + + spin_lock(&drivers_lock); + list_for_each_entry(driver, &drivers_head, list) { + if (!strcmp(driver->type, type)) { + bool got = try_module_get(driver->owner); + + if (got) + atomic_inc(&driver->refcount); + spin_unlock(&drivers_lock); + return got ? driver : NULL; + } + } + + spin_unlock(&drivers_lock); + return NULL; +} + +static void zpool_put_driver(struct zpool_driver *driver) +{ + atomic_dec(&driver->refcount); + module_put(driver->owner); +} + +/** + * zpool_create_pool() - Create a new zpool + * @type The type of the zpool to create (e.g. zbud, zsmalloc) + * @gfp The GFP flags to use when allocating the pool. + * @ops The optional ops callback. + * + * This creates a new zpool of the specified type. The gfp flags will be + * used when allocating memory, if the implementation supports it. If the + * ops param is NULL, then the created zpool will not be shrinkable. + * + * Implementations must guarantee this to be thread-safe. + * + * Returns: New zpool on success, NULL on failure. + */ +struct zpool *zpool_create_pool(char *type, gfp_t gfp, struct zpool_ops *ops) +{ + struct zpool_driver *driver; + struct zpool *zpool; + + pr_info("creating pool type %s\n", type); + + driver = zpool_get_driver(type); + + if (!driver) { + request_module(type); + driver = zpool_get_driver(type); + } + + if (!driver) { + pr_err("no driver for type %s\n", type); + return NULL; + } + + zpool = kmalloc(sizeof(*zpool), gfp); + if (!zpool) { + pr_err("couldn't create zpool - out of memory\n"); + zpool_put_driver(driver); + return NULL; + } + + zpool->type = driver->type; + zpool->driver = driver; + zpool->pool = driver->create(gfp, ops); + zpool->ops = ops; + + if (!zpool->pool) { + pr_err("couldn't create %s pool\n", type); + zpool_put_driver(driver); + kfree(zpool); + return NULL; + } + + pr_info("created %s pool\n", type); + + spin_lock(&pools_lock); + list_add(&zpool->list, &pools_head); + spin_unlock(&pools_lock); + + return zpool; +} + +/** + * zpool_destroy_pool() - Destroy a zpool + * @pool The zpool to destroy. + * + * Implementations must guarantee this to be thread-safe, + * however only when destroying different pools. The same + * pool should only be destroyed once, and should not be used + * after it is destroyed. + * + * This destroys an existing zpool. The zpool should not be in use. + */ +void zpool_destroy_pool(struct zpool *zpool) +{ + pr_info("destroying pool type %s\n", zpool->type); + + spin_lock(&pools_lock); + list_del(&zpool->list); + spin_unlock(&pools_lock); + zpool->driver->destroy(zpool->pool); + zpool_put_driver(zpool->driver); + kfree(zpool); +} + +/** + * zpool_get_type() - Get the type of the zpool + * @pool The zpool to check + * + * This returns the type of the pool. + * + * Implementations must guarantee this to be thread-safe. + * + * Returns: The type of zpool. + */ +char *zpool_get_type(struct zpool *zpool) +{ + return zpool->type; +} + +/** + * zpool_malloc() - Allocate memory + * @pool The zpool to allocate from. + * @size The amount of memory to allocate. + * @gfp The GFP flags to use when allocating memory. + * @handle Pointer to the handle to set + * + * This allocates the requested amount of memory from the pool. + * The gfp flags will be used when allocating memory, if the + * implementation supports it. The provided @handle will be + * set to the allocated object handle. + * + * Implementations must guarantee this to be thread-safe. + * + * Returns: 0 on success, negative value on error. + */ +int zpool_malloc(struct zpool *zpool, size_t size, gfp_t gfp, + unsigned long *handle) +{ + return zpool->driver->malloc(zpool->pool, size, gfp, handle); +} + +/** + * zpool_free() - Free previously allocated memory + * @pool The zpool that allocated the memory. + * @handle The handle to the memory to free. + * + * This frees previously allocated memory. This does not guarantee + * that the pool will actually free memory, only that the memory + * in the pool will become available for use by the pool. + * + * Implementations must guarantee this to be thread-safe, + * however only when freeing different handles. The same + * handle should only be freed once, and should not be used + * after freeing. + */ +void zpool_free(struct zpool *zpool, unsigned long handle) +{ + zpool->driver->free(zpool->pool, handle); +} + +/** + * zpool_shrink() - Shrink the pool size + * @pool The zpool to shrink. + * @pages The number of pages to shrink the pool. + * @reclaimed The number of pages successfully evicted. + * + * This attempts to shrink the actual memory size of the pool + * by evicting currently used handle(s). If the pool was + * created with no zpool_ops, or the evict call fails for any + * of the handles, this will fail. If non-NULL, the @reclaimed + * parameter will be set to the number of pages reclaimed, + * which may be more than the number of pages requested. + * + * Implementations must guarantee this to be thread-safe. + * + * Returns: 0 on success, negative value on error/failure. + */ +int zpool_shrink(struct zpool *zpool, unsigned int pages, + unsigned int *reclaimed) +{ + return zpool->driver->shrink(zpool->pool, pages, reclaimed); +} + +/** + * zpool_map_handle() - Map a previously allocated handle into memory + * @pool The zpool that the handle was allocated from + * @handle The handle to map + * @mm How the memory should be mapped + * + * This maps a previously allocated handle into memory. The @mm + * param indicates to the implementation how the memory will be + * used, i.e. read-only, write-only, read-write. If the + * implementation does not support it, the memory will be treated + * as read-write. + * + * This may hold locks, disable interrupts, and/or preemption, + * and the zpool_unmap_handle() must be called to undo those + * actions. The code that uses the mapped handle should complete + * its operatons on the mapped handle memory quickly and unmap + * as soon as possible. As the implementation may use per-cpu + * data, multiple handles should not be mapped concurrently on + * any cpu. + * + * Returns: A pointer to the handle's mapped memory area. + */ +void *zpool_map_handle(struct zpool *zpool, unsigned long handle, + enum zpool_mapmode mapmode) +{ + return zpool->driver->map(zpool->pool, handle, mapmode); +} + +/** + * zpool_unmap_handle() - Unmap a previously mapped handle + * @pool The zpool that the handle was allocated from + * @handle The handle to unmap + * + * This unmaps a previously mapped handle. Any locks or other + * actions that the implementation took in zpool_map_handle() + * will be undone here. The memory area returned from + * zpool_map_handle() should no longer be used after this. + */ +void zpool_unmap_handle(struct zpool *zpool, unsigned long handle) +{ + zpool->driver->unmap(zpool->pool, handle); +} + +/** + * zpool_get_total_size() - The total size of the pool + * @pool The zpool to check + * + * This returns the total size in bytes of the pool. + * + * Returns: Total size of the zpool in bytes. + */ +u64 zpool_get_total_size(struct zpool *zpool) +{ + return zpool->driver->total_size(zpool->pool); +} + +static int __init init_zpool(void) +{ + pr_info("loaded\n"); + return 0; +} + +static void __exit exit_zpool(void) +{ + pr_info("unloaded\n"); +} + +module_init(init_zpool); +module_exit(exit_zpool); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Dan Streetman "); +MODULE_DESCRIPTION("Common API for compressed memory storage"); diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c index cee5c54c500f..3078eca4737d 100644 --- a/mm/zsmalloc.c +++ b/mm/zsmalloc.c @@ -239,7 +239,6 @@ struct mapping_area { enum zs_mapmode vm_mm; /* mapping mode */ }; - /* per-cpu VM mapping areas for zspage accesses that cross page boundaries */ static DEFINE_PER_CPU(struct mapping_area, zs_map_area); From 7b5c9b29fa4e0b9374511d6f76bd24ea5926ed29 Mon Sep 17 00:00:00 2001 From: Dan Streetman Date: Wed, 6 Aug 2014 16:08:38 -0700 Subject: [PATCH 218/277] mm/zpool: zbud/zsmalloc implement zpool Update zbud and zsmalloc to implement the zpool api. [fengguang.wu@intel.com: make functions static] Signed-off-by: Dan Streetman Tested-by: Seth Jennings Cc: Minchan Kim Cc: Nitin Gupta Cc: Weijie Yang Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds (cherry picked from commit c795779df29e180738568d2a5eb3a42f3b5e47f0) Signed-off-by: Alex Shi Conflicts: mm/zbud.c Conflicts solution: remove zbud --- mm/zsmalloc.c | 85 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 85 insertions(+) diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c index 3078eca4737d..fc25b58a02f8 100644 --- a/mm/zsmalloc.c +++ b/mm/zsmalloc.c @@ -80,6 +80,7 @@ #include #include #include +#include /* * This must be power of 2 and greater than of equal to sizeof(link_free). @@ -239,6 +240,82 @@ struct mapping_area { enum zs_mapmode vm_mm; /* mapping mode */ }; +/* zpool driver */ + +#ifdef CONFIG_ZPOOL + +static void *zs_zpool_create(gfp_t gfp, struct zpool_ops *zpool_ops) +{ + return zs_create_pool(gfp); +} + +static void zs_zpool_destroy(void *pool) +{ + zs_destroy_pool(pool); +} + +static int zs_zpool_malloc(void *pool, size_t size, gfp_t gfp, + unsigned long *handle) +{ + *handle = zs_malloc(pool, size); + return *handle ? 0 : -1; +} +static void zs_zpool_free(void *pool, unsigned long handle) +{ + zs_free(pool, handle); +} + +static int zs_zpool_shrink(void *pool, unsigned int pages, + unsigned int *reclaimed) +{ + return -EINVAL; +} + +static void *zs_zpool_map(void *pool, unsigned long handle, + enum zpool_mapmode mm) +{ + enum zs_mapmode zs_mm; + + switch (mm) { + case ZPOOL_MM_RO: + zs_mm = ZS_MM_RO; + break; + case ZPOOL_MM_WO: + zs_mm = ZS_MM_WO; + break; + case ZPOOL_MM_RW: /* fallthru */ + default: + zs_mm = ZS_MM_RW; + break; + } + + return zs_map_object(pool, handle, zs_mm); +} +static void zs_zpool_unmap(void *pool, unsigned long handle) +{ + zs_unmap_object(pool, handle); +} + +static u64 zs_zpool_total_size(void *pool) +{ + return zs_get_total_size_bytes(pool); +} + +static struct zpool_driver zs_zpool_driver = { + .type = "zsmalloc", + .owner = THIS_MODULE, + .create = zs_zpool_create, + .destroy = zs_zpool_destroy, + .malloc = zs_zpool_malloc, + .free = zs_zpool_free, + .shrink = zs_zpool_shrink, + .map = zs_zpool_map, + .unmap = zs_zpool_unmap, + .total_size = zs_zpool_total_size, +}; + +#endif /* CONFIG_ZPOOL */ + /* per-cpu VM mapping areas for zspage accesses that cross page boundaries */ static DEFINE_PER_CPU(struct mapping_area, zs_map_area); @@ -779,6 +856,10 @@ static void zs_exit(void) { int cpu; +#ifdef CONFIG_ZPOOL + zpool_unregister_driver(&zs_zpool_driver); +#endif + cpu_notifier_register_begin(); for_each_online_cpu(cpu) @@ -805,6 +886,10 @@ static int zs_init(void) cpu_notifier_register_done(); +#ifdef CONFIG_ZPOOL + zpool_register_driver(&zs_zpool_driver); +#endif + return 0; fail: zs_exit(); From 548233701e2be93471a703b490b6702c000d60bc Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Fri, 29 Aug 2014 15:18:37 -0700 Subject: [PATCH 219/277] zram: fix incorrect stat with failed_reads Since we allocate a temporary buffer in zram_bvec_read to handle partial page operations in commit 924bd88d703e ("Staging: zram: allow partial page operations"), our ->failed_reads value may be incorrect as we do not increase its value when failing to allocate the temporary buffer. Let's fix this issue and correct the annotation of failed_reads. Signed-off-by: Chao Yu Acked-by: Minchan Kim Cc: Nitin Gupta Acked-by: Jerome Marchand Acked-by: Sergey Senozhatsky Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds (cherry picked from commit 0cf1e9d6c34d4c82ac3af8015594849814843d36) Signed-off-by: Alex Shi --- drivers/block/zram/zram_drv.c | 10 +++++++--- drivers/block/zram/zram_drv.h | 2 +- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index a21f466c4e15..600aeab83d2a 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -377,7 +377,6 @@ static int zram_decompress_page(struct zram *zram, char *mem, u32 index) /* Should NEVER happen. Return bio error if it does. */ if (unlikely(ret)) { pr_err("Decompression failed! err=%d, page=%u\n", ret, index); - atomic64_inc(&zram->stats.failed_reads); return ret; } @@ -546,8 +545,6 @@ static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec, u32 index, zcomp_strm_release(zram->comp, zstrm); if (is_partial_io(bvec)) kfree(uncmem); - if (ret) - atomic64_inc(&zram->stats.failed_writes); return ret; } @@ -565,6 +562,13 @@ static int zram_bvec_rw(struct zram *zram, struct bio_vec *bvec, u32 index, ret = zram_bvec_write(zram, bvec, index, offset); } + if (unlikely(ret)) { + if (rw == READ) + atomic64_inc(&zram->stats.failed_reads); + else + atomic64_inc(&zram->stats.failed_writes); + } + return ret; } diff --git a/drivers/block/zram/zram_drv.h b/drivers/block/zram/zram_drv.h index 5b0afde729cd..e0f725c87cc6 100644 --- a/drivers/block/zram/zram_drv.h +++ b/drivers/block/zram/zram_drv.h @@ -84,7 +84,7 @@ struct zram_stats { atomic64_t compr_data_size; /* compressed size of pages stored */ atomic64_t num_reads; /* failed + successful */ atomic64_t num_writes; /* --do-- */ - atomic64_t failed_reads; /* should NEVER! happen */ + atomic64_t failed_reads; /* can happen when memory is too low */ atomic64_t failed_writes; /* can happen when memory is too low */ atomic64_t invalid_io; /* non-page-aligned I/O requests */ atomic64_t notify_free; /* no. of swap slot free notifications */ From 60fafac3c592636f0bbbc75e20e2d7dcc2284c20 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Fri, 29 Aug 2014 15:18:40 -0700 Subject: [PATCH 220/277] mm/zpool: use prefixed module loading To avoid potential format string expansion via module parameters, do not use the zpool type directly in request_module() without a format string. Additionally, to avoid arbitrary modules being loaded via zpool API (e.g. via the zswap_zpool_type module parameter) add a "zpool-" prefix to the requested module, as well as module aliases for the existing zpool types (zbud and zsmalloc). Signed-off-by: Kees Cook Cc: Seth Jennings Cc: Minchan Kim Cc: Nitin Gupta Acked-by: Dan Streetman Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds (cherry picked from commit 137f8cff505ace6251dc442c7aa973d60c801a79) Signed-off-by: Alex Shi Conflicts: mm/zbud.c Conflicts solution: remove zbud --- mm/zpool.c | 2 +- mm/zsmalloc.c | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/mm/zpool.c b/mm/zpool.c index e40612a1df00..739cdf0d183a 100644 --- a/mm/zpool.c +++ b/mm/zpool.c @@ -150,7 +150,7 @@ struct zpool *zpool_create_pool(char *type, gfp_t gfp, struct zpool_ops *ops) driver = zpool_get_driver(type); if (!driver) { - request_module(type); + request_module("zpool-%s", type); driver = zpool_get_driver(type); } diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c index fc25b58a02f8..f40ff48433e2 100644 --- a/mm/zsmalloc.c +++ b/mm/zsmalloc.c @@ -314,6 +314,7 @@ static struct zpool_driver zs_zpool_driver = { .total_size = zs_zpool_total_size, }; +MODULE_ALIAS("zpool-zsmalloc"); #endif /* CONFIG_ZPOOL */ /* per-cpu VM mapping areas for zspage accesses that cross page boundaries */ From 8ccca7290fdce49995c32fc514ff8281d476e417 Mon Sep 17 00:00:00 2001 From: Minchan Kim Date: Thu, 9 Oct 2014 15:29:48 -0700 Subject: [PATCH 221/277] zsmalloc: move pages_allocated to zs_pool Currently, zram has no feature to limit memory so theoretically zram can deplete system memory. Users have asked for a limit several times as even without exhaustion zram makes it hard to control memory usage of the platform. This patchset adds the feature. Patch 1 makes zs_get_total_size_bytes faster because it would be used frequently in later patches for the new feature. Patch 2 changes zs_get_total_size_bytes's return unit from bytes to page so that zsmalloc doesn't need unnecessary operation(ie, << PAGE_SHIFT). Patch 3 adds new feature. I added the feature into zram layer, not zsmalloc because limiation is zram's requirement, not zsmalloc so any other user using zsmalloc(ie, zpool) shouldn't affected by unnecessary branch of zsmalloc. In future, if every users of zsmalloc want the feature, then, we could move the feature from client side to zsmalloc easily but vice versa would be painful. Patch 4 adds news facility to report maximum memory usage of zram so that this avoids user polling frequently via /sys/block/zram0/ mem_used_total and ensures transient max are not missed. This patch (of 4): pages_allocated has counted in size_class structure and when user of zsmalloc want to see total_size_bytes, it should gather all of count from each size_class to report the sum. It's not bad if user don't see the value often but if user start to see the value frequently, it would be not a good deal for performance pov. This patch moves the count from size_class to zs_pool so it could reduce memory footprint (from [255 * 8byte] to [sizeof(atomic_long_t)]). Signed-off-by: Minchan Kim Reviewed-by: Dan Streetman Cc: Sergey Senozhatsky Cc: Jerome Marchand Cc: Cc: Cc: Luigi Semenzato Cc: Nitin Gupta Cc: Seth Jennings Reviewed-by: David Horner Cc: Joonsoo Kim Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds (cherry picked from commit 13de8933c96b4557f667c337676f05274e017f83) Signed-off-by: Alex Shi --- mm/zsmalloc.c | 23 ++++++++--------------- 1 file changed, 8 insertions(+), 15 deletions(-) diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c index f40ff48433e2..261be4654848 100644 --- a/mm/zsmalloc.c +++ b/mm/zsmalloc.c @@ -187,9 +187,6 @@ struct size_class { spinlock_t lock; - /* stats */ - u64 pages_allocated; - struct page *fullness_list[_ZS_NR_FULLNESS_GROUPS]; }; @@ -208,6 +205,7 @@ struct zs_pool { struct size_class size_class[ZS_SIZE_CLASSES]; gfp_t flags; /* allocation flags used when growing pool */ + atomic_long_t pages_allocated; }; /* @@ -995,8 +993,9 @@ unsigned long zs_malloc(struct zs_pool *pool, size_t size) return 0; set_zspage_mapping(first_page, class->index, ZS_EMPTY); + atomic_long_add(class->pages_per_zspage, + &pool->pages_allocated); spin_lock(&class->lock); - class->pages_allocated += class->pages_per_zspage; } obj = (unsigned long)first_page->freelist; @@ -1049,14 +1048,13 @@ void zs_free(struct zs_pool *pool, unsigned long obj) first_page->inuse--; fullness = fix_fullness_group(pool, first_page); - - if (fullness == ZS_EMPTY) - class->pages_allocated -= class->pages_per_zspage; - spin_unlock(&class->lock); - if (fullness == ZS_EMPTY) + if (fullness == ZS_EMPTY) { + atomic_long_sub(class->pages_per_zspage, + &pool->pages_allocated); free_zspage(first_page); + } } EXPORT_SYMBOL_GPL(zs_free); @@ -1152,12 +1150,7 @@ EXPORT_SYMBOL_GPL(zs_unmap_object); u64 zs_get_total_size_bytes(struct zs_pool *pool) { - int i; - u64 npages = 0; - - for (i = 0; i < ZS_SIZE_CLASSES; i++) - npages += pool->size_class[i].pages_allocated; - + u64 npages = atomic_long_read(&pool->pages_allocated); return npages << PAGE_SHIFT; } EXPORT_SYMBOL_GPL(zs_get_total_size_bytes); From 70a5d237c239ddde3c055784980635e39bd93885 Mon Sep 17 00:00:00 2001 From: Minchan Kim Date: Thu, 9 Oct 2014 15:29:50 -0700 Subject: [PATCH 222/277] zsmalloc: change return value unit of zs_get_total_size_bytes zs_get_total_size_bytes returns a amount of memory zsmalloc consumed with *byte unit* but zsmalloc operates *page unit* rather than byte unit so let's change the API so benefit we could get is that reduce unnecessary overhead (ie, change page unit with byte unit) in zsmalloc. Since return type is pages, "zs_get_total_pages" is better than "zs_get_total_size_bytes". Signed-off-by: Minchan Kim Reviewed-by: Dan Streetman Cc: Sergey Senozhatsky Cc: Jerome Marchand Cc: Cc: Cc: Luigi Semenzato Cc: Nitin Gupta Cc: Seth Jennings Cc: David Horner Cc: Joonsoo Kim Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds (cherry picked from commit 722cdc17232f0f684011407f7cf3c40d39457971) Signed-off-by: Alex Shi --- drivers/block/zram/zram_drv.c | 4 ++-- include/linux/zsmalloc.h | 2 +- mm/zsmalloc.c | 9 ++++----- 3 files changed, 7 insertions(+), 8 deletions(-) diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index 600aeab83d2a..d0717743e2df 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -103,10 +103,10 @@ static ssize_t mem_used_total_show(struct device *dev, down_read(&zram->init_lock); if (init_done(zram)) - val = zs_get_total_size_bytes(meta->mem_pool); + val = zs_get_total_pages(meta->mem_pool); up_read(&zram->init_lock); - return scnprintf(buf, PAGE_SIZE, "%llu\n", val); + return scnprintf(buf, PAGE_SIZE, "%llu\n", val << PAGE_SHIFT); } static ssize_t max_comp_streams_show(struct device *dev, diff --git a/include/linux/zsmalloc.h b/include/linux/zsmalloc.h index d3f48686bceb..ebb2841f752e 100644 --- a/include/linux/zsmalloc.h +++ b/include/linux/zsmalloc.h @@ -39,6 +39,6 @@ void *zs_map_object(struct zs_pool *pool, unsigned long handle, enum zs_mapmode mm); void zs_unmap_object(struct zs_pool *pool, unsigned long handle); -u64 zs_get_total_size_bytes(struct zs_pool *pool); +unsigned long zs_get_total_pages(struct zs_pool *pool); #endif diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c index 261be4654848..a255c6e87cab 100644 --- a/mm/zsmalloc.c +++ b/mm/zsmalloc.c @@ -296,7 +296,7 @@ static void zs_zpool_unmap(void *pool, unsigned long handle) static u64 zs_zpool_total_size(void *pool) { - return zs_get_total_size_bytes(pool); + return zs_get_total_pages(pool) << PAGE_SHIFT; } static struct zpool_driver zs_zpool_driver = { @@ -1148,12 +1148,11 @@ void zs_unmap_object(struct zs_pool *pool, unsigned long handle) } EXPORT_SYMBOL_GPL(zs_unmap_object); -u64 zs_get_total_size_bytes(struct zs_pool *pool) +unsigned long zs_get_total_pages(struct zs_pool *pool) { - u64 npages = atomic_long_read(&pool->pages_allocated); - return npages << PAGE_SHIFT; + return atomic_long_read(&pool->pages_allocated); } -EXPORT_SYMBOL_GPL(zs_get_total_size_bytes); +EXPORT_SYMBOL_GPL(zs_get_total_pages); module_init(zs_init); module_exit(zs_exit); From 3c854b64635888527504dfc898687ab10dad6191 Mon Sep 17 00:00:00 2001 From: Minchan Kim Date: Thu, 9 Oct 2014 15:29:53 -0700 Subject: [PATCH 223/277] zram: zram memory size limitation Since zram has no control feature to limit memory usage, it makes hard to manage system memrory. This patch adds new knob "mem_limit" via sysfs to set up the a limit so that zram could fail allocation once it reaches the limit. In addition, user could change the limit in runtime so that he could manage the memory more dynamically. Initial state is no limit so it doesn't break old behavior. [akpm@linux-foundation.org: fix typo, per Sergey] Signed-off-by: Minchan Kim Cc: Dan Streetman Cc: Sergey Senozhatsky Cc: Jerome Marchand Cc: Cc: Cc: Luigi Semenzato Cc: Nitin Gupta Cc: Seth Jennings Cc: David Horner Cc: Joonsoo Kim Cc: Minchan Kim Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds (cherry picked from commit 9ada9da9573f3460b156b7755c093e30b258eacb) Signed-off-by: Alex Shi --- Documentation/ABI/testing/sysfs-block-zram | 9 +++++ Documentation/blockdev/zram.txt | 24 ++++++++++-- drivers/block/zram/zram_drv.c | 45 ++++++++++++++++++++++ drivers/block/zram/zram_drv.h | 5 +++ 4 files changed, 79 insertions(+), 4 deletions(-) diff --git a/Documentation/ABI/testing/sysfs-block-zram b/Documentation/ABI/testing/sysfs-block-zram index 2775966c2d12..f861cbfab6b1 100644 --- a/Documentation/ABI/testing/sysfs-block-zram +++ b/Documentation/ABI/testing/sysfs-block-zram @@ -127,3 +127,12 @@ Description: efficiency can be calculated using compr_data_size and this statistic. Unit: bytes + +What: /sys/block/zram/mem_limit +Date: August 2014 +Contact: Minchan Kim +Description: + The mem_limit file is read/write and specifies the maximum + amount of memory ZRAM can use to store the compressed data. The + limit could be changed in run time and "0" means disable the + limit. No limit is the initial state. Unit: bytes diff --git a/Documentation/blockdev/zram.txt b/Documentation/blockdev/zram.txt index 2db1687a4b10..4331ebf94bf0 100644 --- a/Documentation/blockdev/zram.txt +++ b/Documentation/blockdev/zram.txt @@ -74,14 +74,30 @@ There is little point creating a zram of greater than twice the size of memory since we expect a 2:1 compression ratio. Note that zram uses about 0.1% of the size of the disk when not in use so a huge zram is wasteful. -5) Activate: +5) Set memory limit: Optional + Set memory limit by writing the value to sysfs node 'mem_limit'. + The value can be either in bytes or you can use mem suffixes. + In addition, you could change the value in runtime. + Examples: + # limit /dev/zram0 with 50MB memory + echo $((50*1024*1024)) > /sys/block/zram0/mem_limit + + # Using mem suffixes + echo 256K > /sys/block/zram0/mem_limit + echo 512M > /sys/block/zram0/mem_limit + echo 1G > /sys/block/zram0/mem_limit + + # To disable memory limit + echo 0 > /sys/block/zram0/mem_limit + +6) Activate: mkswap /dev/zram0 swapon /dev/zram0 mkfs.ext4 /dev/zram1 mount /dev/zram1 /tmp -6) Stats: +7) Stats: Per-device statistics are exported as various nodes under /sys/block/zram/ disksize @@ -95,11 +111,11 @@ size of the disk when not in use so a huge zram is wasteful. compr_data_size mem_used_total -7) Deactivate: +8) Deactivate: swapoff /dev/zram0 umount /dev/zram1 -8) Reset: +9) Reset: Write any positive value to 'reset' sysfs node echo 1 > /sys/block/zram0/reset echo 1 > /sys/block/zram1/reset diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index d0717743e2df..3f4da06c89c0 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -122,6 +122,37 @@ static ssize_t max_comp_streams_show(struct device *dev, return scnprintf(buf, PAGE_SIZE, "%d\n", val); } +static ssize_t mem_limit_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + u64 val; + struct zram *zram = dev_to_zram(dev); + + down_read(&zram->init_lock); + val = zram->limit_pages; + up_read(&zram->init_lock); + + return scnprintf(buf, PAGE_SIZE, "%llu\n", val << PAGE_SHIFT); +} + +static ssize_t mem_limit_store(struct device *dev, + struct device_attribute *attr, const char *buf, size_t len) +{ + u64 limit; + char *tmp; + struct zram *zram = dev_to_zram(dev); + + limit = memparse(buf, &tmp); + if (buf == tmp) /* no chars parsed, invalid input */ + return -EINVAL; + + down_write(&zram->init_lock); + zram->limit_pages = PAGE_ALIGN(limit) >> PAGE_SHIFT; + up_write(&zram->init_lock); + + return len; +} + static ssize_t max_comp_streams_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t len) { @@ -512,6 +543,14 @@ static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec, u32 index, ret = -ENOMEM; goto out; } + + if (zram->limit_pages && + zs_get_total_pages(meta->mem_pool) > zram->limit_pages) { + zs_free(meta->mem_pool, handle); + ret = -ENOMEM; + goto out; + } + cmem = zs_map_object(meta->mem_pool, handle, ZS_MM_WO); if ((clen == PAGE_SIZE) && !is_partial_io(bvec)) { @@ -616,6 +655,9 @@ static void zram_reset_device(struct zram *zram, bool reset_capacity) struct zram_meta *meta; down_write(&zram->init_lock); + + zram->limit_pages = 0; + if (!init_done(zram)) { up_write(&zram->init_lock); return; @@ -855,6 +897,8 @@ static DEVICE_ATTR(initstate, S_IRUGO, initstate_show, NULL); static DEVICE_ATTR(reset, S_IWUSR, NULL, reset_store); static DEVICE_ATTR(orig_data_size, S_IRUGO, orig_data_size_show, NULL); static DEVICE_ATTR(mem_used_total, S_IRUGO, mem_used_total_show, NULL); +static DEVICE_ATTR(mem_limit, S_IRUGO | S_IWUSR, mem_limit_show, + mem_limit_store); static DEVICE_ATTR(max_comp_streams, S_IRUGO | S_IWUSR, max_comp_streams_show, max_comp_streams_store); static DEVICE_ATTR(comp_algorithm, S_IRUGO | S_IWUSR, @@ -883,6 +927,7 @@ static struct attribute *zram_disk_attrs[] = { &dev_attr_orig_data_size.attr, &dev_attr_compr_data_size.attr, &dev_attr_mem_used_total.attr, + &dev_attr_mem_limit.attr, &dev_attr_max_comp_streams.attr, &dev_attr_comp_algorithm.attr, NULL, diff --git a/drivers/block/zram/zram_drv.h b/drivers/block/zram/zram_drv.h index e0f725c87cc6..b7aa9c21553f 100644 --- a/drivers/block/zram/zram_drv.h +++ b/drivers/block/zram/zram_drv.h @@ -112,6 +112,11 @@ struct zram { u64 disksize; /* bytes */ int max_comp_streams; struct zram_stats stats; + /* + * the number of pages zram can consume for storing compressed data + */ + unsigned long limit_pages; + char compressor[10]; }; #endif From cda6b06454ce34ec784cc45c427eb4b0131581e1 Mon Sep 17 00:00:00 2001 From: Minchan Kim Date: Thu, 9 Oct 2014 15:29:55 -0700 Subject: [PATCH 224/277] zram: report maximum used memory Normally, zram user could get maximum memory usage zram consumed via polling mem_used_total with sysfs in userspace. But it has a critical problem because user can miss peak memory usage during update inverval of polling. For avoiding that, user should poll it with shorter interval(ie, 0.0000000001s) with mlocking to avoid page fault delay when memory pressure is heavy. It would be troublesome. This patch adds new knob "mem_used_max" so user could see the maximum memory usage easily via reading the knob and reset it via "echo 0 > /sys/block/zram0/mem_used_max". Signed-off-by: Minchan Kim Reviewed-by: Dan Streetman Cc: Sergey Senozhatsky Cc: Jerome Marchand Cc: Cc: Cc: Luigi Semenzato Cc: Nitin Gupta Cc: Seth Jennings Reviewed-by: David Horner Cc: Joonsoo Kim Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds (cherry picked from commit 461a8eee6af3b55745be64bea403ed0b743563cf) Signed-off-by: Alex Shi --- Documentation/ABI/testing/sysfs-block-zram | 10 ++++ Documentation/blockdev/zram.txt | 1 + drivers/block/zram/zram_drv.c | 60 +++++++++++++++++++++- drivers/block/zram/zram_drv.h | 1 + 4 files changed, 70 insertions(+), 2 deletions(-) diff --git a/Documentation/ABI/testing/sysfs-block-zram b/Documentation/ABI/testing/sysfs-block-zram index f861cbfab6b1..31db44f01936 100644 --- a/Documentation/ABI/testing/sysfs-block-zram +++ b/Documentation/ABI/testing/sysfs-block-zram @@ -128,6 +128,16 @@ Description: statistic. Unit: bytes +What: /sys/block/zram/mem_used_max +Date: August 2014 +Contact: Minchan Kim +Description: + The mem_used_max file is read/write and specifies the amount + of maximum memory zram have consumed to store compressed data. + For resetting the value, you should write "0". Otherwise, + you could see -EINVAL. + Unit: bytes + What: /sys/block/zram/mem_limit Date: August 2014 Contact: Minchan Kim diff --git a/Documentation/blockdev/zram.txt b/Documentation/blockdev/zram.txt index 4331ebf94bf0..5cd0bd903f54 100644 --- a/Documentation/blockdev/zram.txt +++ b/Documentation/blockdev/zram.txt @@ -110,6 +110,7 @@ size of the disk when not in use so a huge zram is wasteful. orig_data_size compr_data_size mem_used_total + mem_used_max 8) Deactivate: swapoff /dev/zram0 diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index 3f4da06c89c0..204b7fa9d78e 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -153,6 +153,41 @@ static ssize_t mem_limit_store(struct device *dev, return len; } +static ssize_t mem_used_max_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + u64 val = 0; + struct zram *zram = dev_to_zram(dev); + + down_read(&zram->init_lock); + if (init_done(zram)) + val = atomic_long_read(&zram->stats.max_used_pages); + up_read(&zram->init_lock); + + return scnprintf(buf, PAGE_SIZE, "%llu\n", val << PAGE_SHIFT); +} + +static ssize_t mem_used_max_store(struct device *dev, + struct device_attribute *attr, const char *buf, size_t len) +{ + int err; + unsigned long val; + struct zram *zram = dev_to_zram(dev); + struct zram_meta *meta = zram->meta; + + err = kstrtoul(buf, 10, &val); + if (err || val != 0) + return -EINVAL; + + down_read(&zram->init_lock); + if (init_done(zram)) + atomic_long_set(&zram->stats.max_used_pages, + zs_get_total_pages(meta->mem_pool)); + up_read(&zram->init_lock); + + return len; +} + static ssize_t max_comp_streams_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t len) { @@ -464,6 +499,21 @@ static int zram_bvec_read(struct zram *zram, struct bio_vec *bvec, return ret; } +static inline void update_used_max(struct zram *zram, + const unsigned long pages) +{ + int old_max, cur_max; + + old_max = atomic_long_read(&zram->stats.max_used_pages); + + do { + cur_max = old_max; + if (pages > cur_max) + old_max = atomic_long_cmpxchg( + &zram->stats.max_used_pages, cur_max, pages); + } while (old_max != cur_max); +} + static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec, u32 index, int offset) { @@ -475,6 +525,7 @@ static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec, u32 index, struct zram_meta *meta = zram->meta; struct zcomp_strm *zstrm; bool locked = false; + unsigned long alloced_pages; page = bvec->bv_page; if (is_partial_io(bvec)) { @@ -544,13 +595,15 @@ static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec, u32 index, goto out; } - if (zram->limit_pages && - zs_get_total_pages(meta->mem_pool) > zram->limit_pages) { + alloced_pages = zs_get_total_pages(meta->mem_pool); + if (zram->limit_pages && alloced_pages > zram->limit_pages) { zs_free(meta->mem_pool, handle); ret = -ENOMEM; goto out; } + update_used_max(zram, alloced_pages); + cmem = zs_map_object(meta->mem_pool, handle, ZS_MM_WO); if ((clen == PAGE_SIZE) && !is_partial_io(bvec)) { @@ -899,6 +952,8 @@ static DEVICE_ATTR(orig_data_size, S_IRUGO, orig_data_size_show, NULL); static DEVICE_ATTR(mem_used_total, S_IRUGO, mem_used_total_show, NULL); static DEVICE_ATTR(mem_limit, S_IRUGO | S_IWUSR, mem_limit_show, mem_limit_store); +static DEVICE_ATTR(mem_used_max, S_IRUGO | S_IWUSR, mem_used_max_show, + mem_used_max_store); static DEVICE_ATTR(max_comp_streams, S_IRUGO | S_IWUSR, max_comp_streams_show, max_comp_streams_store); static DEVICE_ATTR(comp_algorithm, S_IRUGO | S_IWUSR, @@ -928,6 +983,7 @@ static struct attribute *zram_disk_attrs[] = { &dev_attr_compr_data_size.attr, &dev_attr_mem_used_total.attr, &dev_attr_mem_limit.attr, + &dev_attr_mem_used_max.attr, &dev_attr_max_comp_streams.attr, &dev_attr_comp_algorithm.attr, NULL, diff --git a/drivers/block/zram/zram_drv.h b/drivers/block/zram/zram_drv.h index b7aa9c21553f..c6ee271317f5 100644 --- a/drivers/block/zram/zram_drv.h +++ b/drivers/block/zram/zram_drv.h @@ -90,6 +90,7 @@ struct zram_stats { atomic64_t notify_free; /* no. of swap slot free notifications */ atomic64_t zero_pages; /* no. of zero filled pages */ atomic64_t pages_stored; /* no. of pages currently stored */ + atomic_long_t max_used_pages; /* no. of maximum pages stored */ }; struct zram_meta { From c85f1d31057bdde75716b19e6456a91c5c2273da Mon Sep 17 00:00:00 2001 From: Sergey Senozhatsky Date: Thu, 9 Oct 2014 15:29:57 -0700 Subject: [PATCH 225/277] zram: use notify_free to account all free notifications `notify_free' device attribute accounts the number of slot free notifications and internally represents the number of zram_free_page() calls. Slot free notifications are sent only when device is used as a swap device, hence `notify_free' is used only for swap devices. Since f4659d8e620d08 (zram: support REQ_DISCARD) ZRAM handles yet another one free notification (also via zram_free_page() call) -- REQ_DISCARD requests, which are sent by a filesystem, whenever some data blocks are discarded. However, there is no way to know the number of notifications in the latter case. Use `notify_free' to account the number of pages freed by zram_bio_discard() and zram_slot_free_notify(). Depending on usage scenario `notify_free' represents: a) the number of pages freed because of slot free notifications, which is equal to the number of swap_slot_free_notify() calls, so there is no behaviour change b) the number of pages freed because of REQ_DISCARD notifications Signed-off-by: Sergey Senozhatsky Acked-by: Minchan Kim Acked-by: Jerome Marchand Cc: Nitin Gupta Cc: Chao Yu Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds (cherry picked from commit 015254daf1753003c19c46b90ee85a963260d270) Signed-off-by: Alex Shi Conflicts: Documentation/ABI/testing/sysfs-block-zram --- Documentation/ABI/testing/sysfs-block-zram | 13 ++++++++----- drivers/block/zram/zram_drv.c | 1 + 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/Documentation/ABI/testing/sysfs-block-zram b/Documentation/ABI/testing/sysfs-block-zram index 31db44f01936..0c7f4f91c6b5 100644 --- a/Documentation/ABI/testing/sysfs-block-zram +++ b/Documentation/ABI/testing/sysfs-block-zram @@ -76,11 +76,14 @@ What: /sys/block/zram/notify_free Date: August 2010 Contact: Nitin Gupta Description: - The notify_free file is read-only and specifies the number of - swap slot free notifications received by this device. These - notifications are send to a swap block device when a swap slot - is freed. This statistic is applicable only when this disk is - being used as a swap disk. + The notify_free file is read-only. Depending on device usage + scenario it may account a) the number of pages freed because + of swap slot free notifications or b) the number of pages freed + because of REQ_DISCARD requests sent by bio. The former ones + are sent to a swap block device when a swap slot is freed, which + implies that this disk is being used as a swap disk. The latter + ones are sent by filesystem mounted with discard option, + whenever some data blocks are getting discarded. What: /sys/block/zram/discard Date: August 2010 diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index 204b7fa9d78e..3503019a9672 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -697,6 +697,7 @@ static void zram_bio_discard(struct zram *zram, u32 index, bit_spin_lock(ZRAM_ACCESS, &meta->table[index].value); zram_free_page(zram, index); bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value); + atomic64_inc(&zram->stats.notify_free); index++; n -= PAGE_SIZE; } From b4f773eb2609e4b4fdf1e4ba11fd5dbc4eb2e655 Mon Sep 17 00:00:00 2001 From: Wang Sheng-Hui Date: Thu, 9 Oct 2014 15:29:59 -0700 Subject: [PATCH 226/277] mm/zsmalloc.c: correct comment for fullness group computation The letter 'f' in "n <= N/f" stands for fullness_threshold_frac, not 1/fullness_threshold_frac. Signed-off-by: Wang Sheng-Hui Acked-by: Minchan Kim Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds (cherry picked from commit 6dd9737e31504f9377a8a19810ea4922e88516c1) Signed-off-by: Alex Shi --- mm/zsmalloc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c index a255c6e87cab..a4556ec316e4 100644 --- a/mm/zsmalloc.c +++ b/mm/zsmalloc.c @@ -163,7 +163,7 @@ enum fullness_group { * n <= N / f, where * n = number of allocated objects * N = total number of objects zspage can store - * f = 1/fullness_threshold_frac + * f = fullness_threshold_frac * * Similarly, we assign zspage to: * ZS_ALMOST_FULL when n > N / f From b900447eb093095e133ffacb6a7de29660e09eae Mon Sep 17 00:00:00 2001 From: Dan Streetman Date: Thu, 9 Oct 2014 15:30:01 -0700 Subject: [PATCH 227/277] zsmalloc: simplify init_zspage free obj linking Change zsmalloc init_zspage() logic to iterate through each object on each of its pages, checking the offset to verify the object is on the current page before linking it into the zspage. The current zsmalloc init_zspage free object linking code has logic that relies on there only being one page per zspage when PAGE_SIZE is a multiple of class->size. It calculates the number of objects for the current page, and iterates through all of them plus one, to account for the assumed partial object at the end of the page. While this currently works, the logic can be simplified to just link the object at each successive offset until the offset is larger than PAGE_SIZE, which does not rely on PAGE_SIZE being a multiple of class->size. Signed-off-by: Dan Streetman Acked-by: Minchan Kim Cc: Sergey Senozhatsky Cc: Nitin Gupta Cc: Seth Jennings Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds (cherry picked from commit 5538c562377580947916b3366898f1eb5f53768e) Signed-off-by: Alex Shi --- mm/zsmalloc.c | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c index a4556ec316e4..00c28039b06a 100644 --- a/mm/zsmalloc.c +++ b/mm/zsmalloc.c @@ -594,7 +594,7 @@ static void init_zspage(struct page *first_page, struct size_class *class) while (page) { struct page *next_page; struct link_free *link; - unsigned int i, objs_on_page; + unsigned int i = 1; /* * page->index stores offset of first object starting @@ -607,14 +607,10 @@ static void init_zspage(struct page *first_page, struct size_class *class) link = (struct link_free *)kmap_atomic(page) + off / sizeof(*link); - objs_on_page = (PAGE_SIZE - off) / class->size; - for (i = 1; i <= objs_on_page; i++) { - off += class->size; - if (off < PAGE_SIZE) { - link->next = obj_location_to_handle(page, i); - link += class->size / sizeof(*link); - } + while ((off += class->size) < PAGE_SIZE) { + link->next = obj_location_to_handle(page, i++); + link += class->size / sizeof(*link); } /* @@ -626,7 +622,7 @@ static void init_zspage(struct page *first_page, struct size_class *class) link->next = obj_location_to_handle(next_page, 0); kunmap_atomic(link); page = next_page; - off = (off + class->size) % PAGE_SIZE; + off %= PAGE_SIZE; } } From ea56241654a2c7ef9a1122a7ad5c751527ed9b17 Mon Sep 17 00:00:00 2001 From: Weijie Yang Date: Wed, 29 Oct 2014 14:50:57 -0700 Subject: [PATCH 228/277] zram: avoid NULL pointer access in concurrent situation There is a rare NULL pointer bug in mem_used_total_show() and mem_used_max_store() in concurrent situation, like this: zram is not initialized, process A is a mem_used_total reader which runs periodically, while process B try to init zram. process A process B access meta, get a NULL value init zram, done init_done() is true access meta->mem_pool, get a NULL pointer BUG This patch fixes this issue. Signed-off-by: Weijie Yang Acked-by: Minchan Kim Acked-by: Sergey Senozhatsky Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds (cherry picked from commit 5a99e95b8d1cd47f6feddcdca6c71d22060df8a2) Signed-off-by: Alex Shi --- drivers/block/zram/zram_drv.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index 3503019a9672..bf9fea268db4 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -99,11 +99,12 @@ static ssize_t mem_used_total_show(struct device *dev, { u64 val = 0; struct zram *zram = dev_to_zram(dev); - struct zram_meta *meta = zram->meta; down_read(&zram->init_lock); - if (init_done(zram)) + if (init_done(zram)) { + struct zram_meta *meta = zram->meta; val = zs_get_total_pages(meta->mem_pool); + } up_read(&zram->init_lock); return scnprintf(buf, PAGE_SIZE, "%llu\n", val << PAGE_SHIFT); @@ -173,16 +174,17 @@ static ssize_t mem_used_max_store(struct device *dev, int err; unsigned long val; struct zram *zram = dev_to_zram(dev); - struct zram_meta *meta = zram->meta; err = kstrtoul(buf, 10, &val); if (err || val != 0) return -EINVAL; down_read(&zram->init_lock); - if (init_done(zram)) + if (init_done(zram)) { + struct zram_meta *meta = zram->meta; atomic_long_set(&zram->stats.max_used_pages, zs_get_total_pages(meta->mem_pool)); + } up_read(&zram->init_lock); return len; From 7aecdc886e2e9bacfaf2377067072fa63d649b21 Mon Sep 17 00:00:00 2001 From: Weijie Yang Date: Thu, 13 Nov 2014 15:19:05 -0800 Subject: [PATCH 229/277] zram: avoid kunmap_atomic() of a NULL pointer zram could kunmap_atomic() a NULL pointer in a rare situation: a zram page becomes a full-zeroed page after a partial write io. The current code doesn't handle this case and performs kunmap_atomic() on a NULL pointer, which panics the kernel. This patch fixes this issue. Signed-off-by: Weijie Yang Cc: Sergey Senozhatsky Cc: Dan Streetman Cc: Nitin Gupta Cc: Weijie Yang Acked-by: Jerome Marchand Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds (cherry picked from commit c406515239376fc93a30d5d03192182160cbd3fb) Signed-off-by: Alex Shi --- drivers/block/zram/zram_drv.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index bf9fea268db4..8a1266ce8bbe 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -559,7 +559,8 @@ static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec, u32 index, } if (page_zero_filled(uncmem)) { - kunmap_atomic(user_mem); + if (user_mem) + kunmap_atomic(user_mem); /* Free memory associated with this sector now. */ bit_spin_lock(ZRAM_ACCESS, &meta->table[index].value); zram_free_page(zram, index); From 48eaa2e2b66fffbfa6eefa46c34cfbf47ccc1fce Mon Sep 17 00:00:00 2001 From: Joonsoo Kim Date: Fri, 12 Dec 2014 16:56:44 -0800 Subject: [PATCH 230/277] zsmalloc: merge size_class to reduce fragmentation zsmalloc has many size_classes to reduce fragmentation and they are in 16 bytes unit, for example, 16, 32, 48, etc., if PAGE_SIZE is 4096. And, zsmalloc has constraint that each zspage has 4 pages at maximum. In this situation, we can see interesting aspect. Let's think about size_class for 1488, 1472, ..., 1376. To prevent external fragmentation, they uses 4 pages per zspage and so all they can contain 11 objects at maximum. 16384 (4096 * 4) = 1488 * 11 + remains 16384 (4096 * 4) = 1472 * 11 + remains 16384 (4096 * 4) = ... 16384 (4096 * 4) = 1376 * 11 + remains It means that they have same characteristics and classification between them isn't needed. If we use one size_class for them, we can reduce fragementation and save some memory since both the 1488 and 1472 sized classes can only fit 11 objects into 4 pages, and an object that's 1472 bytes can fit into an object that's 1488 bytes, merging these classes to always use objects that are 1488 bytes will reduce the total number of size classes. And reducing the total number of size classes reduces overall fragmentation, because a wider range of compressed pages can fit into a single size class, leaving less unused objects in each size class. For this purpose, this patch implement size_class merging. If there is size_class that have same pages_per_zspage and same number of objects per zspage with previous size_class, we don't create new size_class. Instead, we use previous, same characteristic size_class. With this way, above example sizes (1488, 1472, ..., 1376) use just one size_class so we can get much more memory utilization. Below is result of my simple test. TEST ENV: EXT4 on zram, mount with discard option WORKLOAD: untar kernel source code, remove directory in descending order in size. (drivers arch fs sound include net Documentation firmware kernel tools) Each line represents orig_data_size, compr_data_size, mem_used_total, fragmentation overhead (mem_used - compr_data_size) and overhead ratio (overhead to compr_data_size), respectively, after untar and remove operation is executed. * untar-nomerge.out orig_size compr_size used_size overhead overhead_ratio 525.88MB 199.16MB 210.23MB 11.08MB 5.56% 288.32MB 97.43MB 105.63MB 8.20MB 8.41% 177.32MB 61.12MB 69.40MB 8.28MB 13.55% 146.47MB 47.32MB 56.10MB 8.78MB 18.55% 124.16MB 38.85MB 48.41MB 9.55MB 24.58% 103.93MB 31.68MB 40.93MB 9.25MB 29.21% 84.34MB 22.86MB 32.72MB 9.86MB 43.13% 66.87MB 14.83MB 23.83MB 9.00MB 60.70% 60.67MB 11.11MB 18.60MB 7.49MB 67.48% 55.86MB 8.83MB 16.61MB 7.77MB 88.03% 53.32MB 8.01MB 15.32MB 7.31MB 91.24% * untar-merge.out orig_size compr_size used_size overhead overhead_ratio 526.23MB 199.18MB 209.81MB 10.64MB 5.34% 288.68MB 97.45MB 104.08MB 6.63MB 6.80% 177.68MB 61.14MB 66.93MB 5.79MB 9.47% 146.83MB 47.34MB 52.79MB 5.45MB 11.51% 124.52MB 38.87MB 44.30MB 5.43MB 13.96% 104.29MB 31.70MB 36.83MB 5.13MB 16.19% 84.70MB 22.88MB 27.92MB 5.04MB 22.04% 67.11MB 14.83MB 19.26MB 4.43MB 29.86% 60.82MB 11.10MB 14.90MB 3.79MB 34.17% 55.90MB 8.82MB 12.61MB 3.79MB 42.97% 53.32MB 8.01MB 11.73MB 3.73MB 46.53% As you can see above result, merged one has better utilization (overhead ratio, 5th column) and uses less memory (mem_used_total, 3rd column). Signed-off-by: Joonsoo Kim Cc: Minchan Kim Cc: Nitin Gupta Cc: Jerome Marchand Cc: Sergey Senozhatsky Reviewed-by: Dan Streetman Cc: Luigi Semenzato Cc: Cc: "seungho1.park" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds (cherry picked from commit 9eec4cd53f9865b733dc78cf5f6465871beed014) Signed-off-by: Alex Shi --- mm/zsmalloc.c | 80 ++++++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 66 insertions(+), 14 deletions(-) diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c index 00c28039b06a..91eac3cf17e8 100644 --- a/mm/zsmalloc.c +++ b/mm/zsmalloc.c @@ -202,7 +202,7 @@ struct link_free { }; struct zs_pool { - struct size_class size_class[ZS_SIZE_CLASSES]; + struct size_class *size_class[ZS_SIZE_CLASSES]; gfp_t flags; /* allocation flags used when growing pool */ atomic_long_t pages_allocated; @@ -434,7 +434,7 @@ static enum fullness_group fix_fullness_group(struct zs_pool *pool, if (newfg == currfg) goto out; - class = &pool->size_class[class_idx]; + class = pool->size_class[class_idx]; remove_zspage(page, class, currfg); insert_zspage(page, class, newfg); set_zspage_mapping(page, class_idx, newfg); @@ -891,6 +891,23 @@ static int zs_init(void) return notifier_to_errno(ret); } +static unsigned int get_maxobj_per_zspage(int size, int pages_per_zspage) +{ + return pages_per_zspage * PAGE_SIZE / size; +} + +static bool can_merge(struct size_class *prev, int size, int pages_per_zspage) +{ + if (prev->pages_per_zspage != pages_per_zspage) + return false; + + if (get_maxobj_per_zspage(prev->size, prev->pages_per_zspage) + != get_maxobj_per_zspage(size, pages_per_zspage)) + return false; + + return true; +} + /** * zs_create_pool - Creates an allocation pool to work from. * @flags: allocation flags used to allocate pool metadata @@ -911,25 +928,56 @@ struct zs_pool *zs_create_pool(gfp_t flags) if (!pool) return NULL; - for (i = 0; i < ZS_SIZE_CLASSES; i++) { + /* + * Iterate reversly, because, size of size_class that we want to use + * for merging should be larger or equal to current size. + */ + for (i = ZS_SIZE_CLASSES - 1; i >= 0; i--) { int size; + int pages_per_zspage; struct size_class *class; + struct size_class *prev_class; size = ZS_MIN_ALLOC_SIZE + i * ZS_SIZE_CLASS_DELTA; if (size > ZS_MAX_ALLOC_SIZE) size = ZS_MAX_ALLOC_SIZE; + pages_per_zspage = get_pages_per_zspage(size); + + /* + * size_class is used for normal zsmalloc operation such + * as alloc/free for that size. Although it is natural that we + * have one size_class for each size, there is a chance that we + * can get more memory utilization if we use one size_class for + * many different sizes whose size_class have same + * characteristics. So, we makes size_class point to + * previous size_class if possible. + */ + if (i < ZS_SIZE_CLASSES - 1) { + prev_class = pool->size_class[i + 1]; + if (can_merge(prev_class, size, pages_per_zspage)) { + pool->size_class[i] = prev_class; + continue; + } + } + + class = kzalloc(sizeof(struct size_class), GFP_KERNEL); + if (!class) + goto err; - class = &pool->size_class[i]; class->size = size; class->index = i; + class->pages_per_zspage = pages_per_zspage; spin_lock_init(&class->lock); - class->pages_per_zspage = get_pages_per_zspage(size); - + pool->size_class[i] = class; } pool->flags = flags; return pool; + +err: + zs_destroy_pool(pool); + return NULL; } EXPORT_SYMBOL_GPL(zs_create_pool); @@ -939,7 +987,13 @@ void zs_destroy_pool(struct zs_pool *pool) for (i = 0; i < ZS_SIZE_CLASSES; i++) { int fg; - struct size_class *class = &pool->size_class[i]; + struct size_class *class = pool->size_class[i]; + + if (!class) + continue; + + if (class->index != i) + continue; for (fg = 0; fg < _ZS_NR_FULLNESS_GROUPS; fg++) { if (class->fullness_list[fg]) { @@ -948,6 +1002,7 @@ void zs_destroy_pool(struct zs_pool *pool) class->size, fg); } } + kfree(class); } kfree(pool); } @@ -966,7 +1021,6 @@ unsigned long zs_malloc(struct zs_pool *pool, size_t size) { unsigned long obj; struct link_free *link; - int class_idx; struct size_class *class; struct page *first_page, *m_page; @@ -975,9 +1029,7 @@ unsigned long zs_malloc(struct zs_pool *pool, size_t size) if (unlikely(!size || size > ZS_MAX_ALLOC_SIZE)) return 0; - class_idx = get_size_class_index(size); - class = &pool->size_class[class_idx]; - BUG_ON(class_idx != class->index); + class = pool->size_class[get_size_class_index(size)]; spin_lock(&class->lock); first_page = find_get_zspage(class); @@ -1030,7 +1082,7 @@ void zs_free(struct zs_pool *pool, unsigned long obj) first_page = get_first_page(f_page); get_zspage_mapping(first_page, &class_idx, &fullness); - class = &pool->size_class[class_idx]; + class = pool->size_class[class_idx]; f_offset = obj_idx_to_offset(f_page, f_objidx, class->size); spin_lock(&class->lock); @@ -1091,7 +1143,7 @@ void *zs_map_object(struct zs_pool *pool, unsigned long handle, obj_handle_to_location(handle, &page, &obj_idx); get_zspage_mapping(get_first_page(page), &class_idx, &fg); - class = &pool->size_class[class_idx]; + class = pool->size_class[class_idx]; off = obj_idx_to_offset(page, obj_idx, class->size); area = &get_cpu_var(zs_map_area); @@ -1125,7 +1177,7 @@ void zs_unmap_object(struct zs_pool *pool, unsigned long handle) obj_handle_to_location(handle, &page, &obj_idx); get_zspage_mapping(get_first_page(page), &class_idx, &fg); - class = &pool->size_class[class_idx]; + class = pool->size_class[class_idx]; off = obj_idx_to_offset(page, obj_idx, class->size); area = &__get_cpu_var(zs_map_area); From 66de4b5dc66d8a3f0990ad99ab868db4ad267240 Mon Sep 17 00:00:00 2001 From: Jon Medhurst Date: Mon, 11 May 2015 14:34:54 +0100 Subject: [PATCH 231/277] gator: Enable multiple source copies to exist in Android build environments An Android build environment may contain multiple copies of the gator source code, e.g. if it's been copied into a kernel tree as well as having a standalone copy, or if there are two kernel trees with copies. As Android builds tend to include all Android.mk it finds, this can lead to build errors because there is more that one makefile trying to build the daemon. To allow this situation to be catered for we update Android.mk so that if the variable GATOR_DAEMON_PATH is defined, and the makefile doesn't live under that path, then the makefile contents are ignored. An Android build environment can then set GATOR_DAEMON_PATH to specify the copy it wants to use. Signed-off-by: Jon Medhurst --- tools/gator/daemon/Android.mk | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/tools/gator/daemon/Android.mk b/tools/gator/daemon/Android.mk index 68f4a8397379..c5c8bdc1ff29 100644 --- a/tools/gator/daemon/Android.mk +++ b/tools/gator/daemon/Android.mk @@ -1,4 +1,8 @@ LOCAL_PATH := $(call my-dir) + +# Don't use this file if GATOR_DAEMON_PATH is set and we're not under that path +ifneq ($(and $(GATOR_DAEMON_PATH),$(filter $(patsubst %/,%,$(GATOR_DAEMON_PATH))/%,$(LOCAL_PATH)/)),) + include $(CLEAR_VARS) XML_H := $(shell cd $(LOCAL_PATH) && make events_xml.h defaults_xml.h SrcMd5.cpp) @@ -74,3 +78,5 @@ LOCAL_MODULE := gatord LOCAL_MODULE_TAGS := optional include $(BUILD_EXECUTABLE) + +endif From 211d013d9e8abbd172c43ace2b6d5ed928a4dd6a Mon Sep 17 00:00:00 2001 From: Sergey Senozhatsky Date: Fri, 12 Dec 2014 16:56:56 -0800 Subject: [PATCH 232/277] zsmalloc: fix zs_init cpu notifier error handling Mahendran Ganesh reported that zpool-enabled zsmalloc should not call zpool_unregister_driver() from zs_init() if cpu notifier registration has failed, because error handling is performed before we register the driver via zpool_register_driver() call. Factor out cpu notifier registration and unregistration code and fix zs_init() error handling. link: http://lkml.iu.edu//hypermail/linux/kernel/1411.1/04156.html [akpm@linux-foundation.org: squash bogus gcc warning] [akpm@linux-foundation.org: use __init and __exit] Signed-off-by: Sergey Senozhatsky Reported-by: Mahendran Ganesh Cc: Minchan Kim Cc: Nitin Gupta Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds (cherry picked from commit b1b00a5b8a6cf32e3973507decf1216709b55072) Signed-off-by: Alex Shi --- mm/zsmalloc.c | 39 ++++++++++++++++++++++++--------------- 1 file changed, 24 insertions(+), 15 deletions(-) diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c index 91eac3cf17e8..f3d9a14a23f6 100644 --- a/mm/zsmalloc.c +++ b/mm/zsmalloc.c @@ -847,14 +847,10 @@ static struct notifier_block zs_cpu_nb = { .notifier_call = zs_cpu_notifier }; -static void zs_exit(void) +static void zs_unregister_cpu_notifier(void) { int cpu; -#ifdef CONFIG_ZPOOL - zpool_unregister_driver(&zs_zpool_driver); -#endif - cpu_notifier_register_begin(); for_each_online_cpu(cpu) @@ -864,31 +860,44 @@ static void zs_exit(void) cpu_notifier_register_done(); } -static int zs_init(void) +static int zs_register_cpu_notifier(void) { - int cpu, ret; + int cpu, uninitialized_var(ret); cpu_notifier_register_begin(); __register_cpu_notifier(&zs_cpu_nb); for_each_online_cpu(cpu) { ret = zs_cpu_notifier(NULL, CPU_UP_PREPARE, (void *)(long)cpu); - if (notifier_to_errno(ret)) { - cpu_notifier_register_done(); - goto fail; - } + if (notifier_to_errno(ret)) + break; } cpu_notifier_register_done(); + return notifier_to_errno(ret); +} + +static void __exit zs_exit(void) +{ +#ifdef CONFIG_ZPOOL + zpool_unregister_driver(&zs_zpool_driver); +#endif + zs_unregister_cpu_notifier(); +} + +static int __init zs_init(void) +{ + int ret = zs_register_cpu_notifier(); + + if (ret) { + zs_unregister_cpu_notifier(); + return ret; + } #ifdef CONFIG_ZPOOL zpool_register_driver(&zs_zpool_driver); #endif - return 0; -fail: - zs_exit(); - return notifier_to_errno(ret); } static unsigned int get_maxobj_per_zspage(int size, int pages_per_zspage) From 5b0af867b453597531c989254f75be634b8e9197 Mon Sep 17 00:00:00 2001 From: Minchan Kim Date: Fri, 12 Dec 2014 16:56:58 -0800 Subject: [PATCH 233/277] zsmalloc: correct fragile [kmap|kunmap]_atomic use The kunmap_atomic should use virtual address getting by kmap_atomic. However, some pieces of code in zsmalloc uses modified address, not the one got by kmap_atomic for kunmap_atomic. It's okay for working because zsmalloc modifies the address inner PAGE_SIZE bounday so it works with current kmap_atomic's implementation. But it's still fragile with potential changing of kmap_atomic so let's correct it. I got a subtle bug when I implemented a new feature of zsmalloc (compaction) due to a link's mishandling (the link was over page boundary). Although it was totally my mistake, it took a while to find the cause because an unpredictable kmapped address was unmapped causing an almost random crash. Signed-off-by: Minchan Kim Cc: Nitin Gupta Cc: Sergey Senozhatsky Cc: Dan Streetman Cc: Seth Jennings Cc: Jerome Marchand Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds (cherry picked from commit af4ee5e977acb150371c28bd85cb7e34cac48b13) Signed-off-by: Alex Shi --- mm/zsmalloc.c | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c index f3d9a14a23f6..7031e12fcf2b 100644 --- a/mm/zsmalloc.c +++ b/mm/zsmalloc.c @@ -595,6 +595,7 @@ static void init_zspage(struct page *first_page, struct size_class *class) struct page *next_page; struct link_free *link; unsigned int i = 1; + void *vaddr; /* * page->index stores offset of first object starting @@ -605,8 +606,8 @@ static void init_zspage(struct page *first_page, struct size_class *class) if (page != first_page) page->index = off; - link = (struct link_free *)kmap_atomic(page) + - off / sizeof(*link); + vaddr = kmap_atomic(page); + link = (struct link_free *)vaddr + off / sizeof(*link); while ((off += class->size) < PAGE_SIZE) { link->next = obj_location_to_handle(page, i++); @@ -620,7 +621,7 @@ static void init_zspage(struct page *first_page, struct size_class *class) */ next_page = get_next_page(page); link->next = obj_location_to_handle(next_page, 0); - kunmap_atomic(link); + kunmap_atomic(vaddr); page = next_page; off %= PAGE_SIZE; } @@ -1031,6 +1032,7 @@ unsigned long zs_malloc(struct zs_pool *pool, size_t size) unsigned long obj; struct link_free *link; struct size_class *class; + void *vaddr; struct page *first_page, *m_page; unsigned long m_objidx, m_offset; @@ -1059,11 +1061,11 @@ unsigned long zs_malloc(struct zs_pool *pool, size_t size) obj_handle_to_location(obj, &m_page, &m_objidx); m_offset = obj_idx_to_offset(m_page, m_objidx, class->size); - link = (struct link_free *)kmap_atomic(m_page) + - m_offset / sizeof(*link); + vaddr = kmap_atomic(m_page); + link = (struct link_free *)vaddr + m_offset / sizeof(*link); first_page->freelist = link->next; memset(link, POISON_INUSE, sizeof(*link)); - kunmap_atomic(link); + kunmap_atomic(vaddr); first_page->inuse++; /* Now move the zspage to another fullness group, if required */ @@ -1079,6 +1081,7 @@ void zs_free(struct zs_pool *pool, unsigned long obj) struct link_free *link; struct page *first_page, *f_page; unsigned long f_objidx, f_offset; + void *vaddr; int class_idx; struct size_class *class; @@ -1097,10 +1100,10 @@ void zs_free(struct zs_pool *pool, unsigned long obj) spin_lock(&class->lock); /* Insert this object in containing zspage's freelist */ - link = (struct link_free *)((unsigned char *)kmap_atomic(f_page) - + f_offset); + vaddr = kmap_atomic(f_page); + link = (struct link_free *)(vaddr + f_offset); link->next = first_page->freelist; - kunmap_atomic(link); + kunmap_atomic(vaddr); first_page->freelist = (void *)obj; first_page->inuse--; From 60acd81baccac53be98e07c140a0579a3607be9d Mon Sep 17 00:00:00 2001 From: Mahendran Ganesh Date: Fri, 12 Dec 2014 16:57:04 -0800 Subject: [PATCH 234/277] mm/zram: correct ZRAM_ZERO flag bit position In struct zram_table_entry, the element *value* contains obj size and obj zram flags. Bit 0 to bit (ZRAM_FLAG_SHIFT - 1) represent obj size, and bit ZRAM_FLAG_SHIFT to the highest bit of unsigned long represent obj zram_flags. So the first zram flag(ZRAM_ZERO) should be from ZRAM_FLAG_SHIFT instead of (ZRAM_FLAG_SHIFT + 1). This patch fixes this cosmetic issue. Also fix a typo, "page in now accessed" -> "page is now accessed" Signed-off-by: Mahendran Ganesh Acked-by: Minchan Kim Acked-by: Weijie Yang Acked-by: Sergey Senozhatsky Cc: Nitin Gupta Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds (cherry picked from commit d49b1c254c997195872a9e8913660a788298921e) Signed-off-by: Alex Shi --- drivers/block/zram/zram_drv.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/block/zram/zram_drv.h b/drivers/block/zram/zram_drv.h index c6ee271317f5..b05a816b09ac 100644 --- a/drivers/block/zram/zram_drv.h +++ b/drivers/block/zram/zram_drv.h @@ -66,8 +66,8 @@ static const size_t max_zpage_size = PAGE_SIZE / 4 * 3; /* Flags for zram pages (table[page_no].value) */ enum zram_pageflags { /* Page consists entirely of zeros */ - ZRAM_ZERO = ZRAM_FLAG_SHIFT + 1, - ZRAM_ACCESS, /* page in now accessed */ + ZRAM_ZERO = ZRAM_FLAG_SHIFT, + ZRAM_ACCESS, /* page is now accessed */ __NR_ZRAM_PAGEFLAGS, }; From 837e91c8966924792bf5ba5ef9a62eaa55489ff7 Mon Sep 17 00:00:00 2001 From: Ganesh Mahendran Date: Thu, 12 Feb 2015 15:00:33 -0800 Subject: [PATCH 235/277] zram: free meta table in zram_meta_free zram_meta_alloc() and zram_meta_free() are a pair. In zram_meta_alloc(), meta table is allocated. So it it better to free it in zram_meta_free(). Signed-off-by: Ganesh Mahendran Acked-by: Minchan Kim Acked-by: Sergey Senozhatsky Cc: Nitin Gupta Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds (cherry picked from commit 1fec117281d9f5349c35279c9521f4096fa33357) Signed-off-by: Alex Shi --- drivers/block/zram/zram_drv.c | 33 ++++++++++++++++----------------- 1 file changed, 16 insertions(+), 17 deletions(-) diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index 8a1266ce8bbe..3e075d65a158 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -308,8 +308,21 @@ static inline int valid_io_request(struct zram *zram, struct bio *bio) return 1; } -static void zram_meta_free(struct zram_meta *meta) +static void zram_meta_free(struct zram_meta *meta, u64 disksize) { + size_t num_pages = disksize >> PAGE_SHIFT; + size_t index; + + /* Free all pages that are still in this zram device */ + for (index = 0; index < num_pages; index++) { + unsigned long handle = meta->table[index].handle; + + if (!handle) + continue; + + zs_free(meta->mem_pool, handle); + } + zs_destroy_pool(meta->mem_pool); vfree(meta->table); kfree(meta); @@ -708,9 +721,6 @@ static void zram_bio_discard(struct zram *zram, u32 index, static void zram_reset_device(struct zram *zram, bool reset_capacity) { - size_t index; - struct zram_meta *meta; - down_write(&zram->init_lock); zram->limit_pages = 0; @@ -720,20 +730,9 @@ static void zram_reset_device(struct zram *zram, bool reset_capacity) return; } - meta = zram->meta; - /* Free all pages that are still in this zram device */ - for (index = 0; index < zram->disksize >> PAGE_SHIFT; index++) { - unsigned long handle = meta->table[index].handle; - if (!handle) - continue; - - zs_free(meta->mem_pool, handle); - } - zcomp_destroy(zram->comp); zram->max_comp_streams = 1; - - zram_meta_free(zram->meta); + zram_meta_free(zram->meta, zram->disksize); zram->meta = NULL; /* Reset stats */ memset(&zram->stats, 0, sizeof(zram->stats)); @@ -805,7 +804,7 @@ static ssize_t disksize_store(struct device *dev, up_write(&zram->init_lock); zcomp_destroy(comp); out_free_meta: - zram_meta_free(meta); + zram_meta_free(meta, disksize); return err; } From 043787104c5b1386a96c317f2e82ecff257dd9c5 Mon Sep 17 00:00:00 2001 From: Sergey Senozhatsky Date: Thu, 12 Feb 2015 15:00:36 -0800 Subject: [PATCH 236/277] zram: fix umount-reset_store-mount race condition Ganesh Mahendran was the first one who proposed to use bdev->bd_mutex to avoid ->bd_holders race condition: CPU0 CPU1 umount /* zram->init_done is true */ reset_store() bdev->bd_holders == 0 mount ... zram_make_request() zram_reset_device() However, his solution required some considerable amount of code movement, which we can avoid. Apart from using bdev->bd_mutex in reset_store(), this patch also simplifies zram_reset_device(). zram_reset_device() has a bool parameter reset_capacity which tells it whether disk capacity and itself disk should be reset. There are two zram_reset_device() callers: -- zram_exit() passes reset_capacity=false -- reset_store() passes reset_capacity=true So we can move reset_capacity-sensitive work out of zram_reset_device() and perform it unconditionally in reset_store(). This also lets us drop reset_capacity parameter from zram_reset_device() and pass zram pointer only. Signed-off-by: Sergey Senozhatsky Reported-by: Ganesh Mahendran Cc: Minchan Kim Cc: Vlastimil Babka Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds (cherry picked from commit ba6b17d68c8e3aa8d55d0474299cb931965c5ea5) Signed-off-by: Alex Shi --- drivers/block/zram/zram_drv.c | 23 +++++++++-------------- 1 file changed, 9 insertions(+), 14 deletions(-) diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index 3e075d65a158..45e2e85815ab 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -719,7 +719,7 @@ static void zram_bio_discard(struct zram *zram, u32 index, } } -static void zram_reset_device(struct zram *zram, bool reset_capacity) +static void zram_reset_device(struct zram *zram) { down_write(&zram->init_lock); @@ -738,18 +738,7 @@ static void zram_reset_device(struct zram *zram, bool reset_capacity) memset(&zram->stats, 0, sizeof(zram->stats)); zram->disksize = 0; - if (reset_capacity) - set_capacity(zram->disk, 0); - up_write(&zram->init_lock); - - /* - * Revalidate disk out of the init_lock to avoid lockdep splat. - * It's okay because disk's capacity is protected by init_lock - * so that revalidate_disk always sees up-to-date capacity. - */ - if (reset_capacity) - revalidate_disk(zram->disk); } static ssize_t disksize_store(struct device *dev, @@ -822,6 +811,7 @@ static ssize_t reset_store(struct device *dev, if (!bdev) return -ENOMEM; + mutex_lock(&bdev->bd_mutex); /* Do not reset an active device! */ if (bdev->bd_holders) { ret = -EBUSY; @@ -839,12 +829,17 @@ static ssize_t reset_store(struct device *dev, /* Make sure all pending I/O is finished */ fsync_bdev(bdev); + zram_reset_device(zram); + set_capacity(zram->disk, 0); + + mutex_unlock(&bdev->bd_mutex); + revalidate_disk(zram->disk); bdput(bdev); - zram_reset_device(zram, true); return len; out: + mutex_unlock(&bdev->bd_mutex); bdput(bdev); return ret; } @@ -1147,7 +1142,7 @@ static void __exit zram_exit(void) * Shouldn't access zram->disk after destroy_device * because destroy_device already released zram->disk. */ - zram_reset_device(zram, false); + zram_reset_device(zram); } unregister_blkdev(zram_major, "zram"); From cf30cd6bbc0c82854e0b5551e57ee02d001e61db Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Fri, 1 May 2015 22:02:47 -0400 Subject: [PATCH 237/277] ipv4: Missing sk_nulls_node_init() in ping_unhash(). [ Upstream commit a134f083e79fb4c3d0a925691e732c56911b4326 ] If we don't do that, then the poison value is left in the ->pprev backlink. This can cause crashes if we do a disconnect, followed by a connect(). Tested-by: Linus Torvalds Reported-by: Wen Xu Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/ping.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c index 698f3a2ac5ae..459b957104a8 100644 --- a/net/ipv4/ping.c +++ b/net/ipv4/ping.c @@ -139,6 +139,7 @@ static void ping_v4_unhash(struct sock *sk) if (sk_hashed(sk)) { write_lock_bh(&ping_table.lock); hlist_nulls_del(&sk->sk_nulls_node); + sk_nulls_node_init(&sk->sk_nulls_node); sock_put(sk); isk->inet_num = 0; isk->inet_sport = 0; From c19e77907dc450c69028c03c4058e5a5688ab2a9 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Mon, 27 Apr 2015 14:50:39 +0200 Subject: [PATCH 238/277] ALSA: emux: Fix mutex deadlock at unloading commit 07b0e5d49d227e3950cb13a3e8caf248ef2a310e upstream. The emux-synth driver has a possible AB/BA mutex deadlock at unloading the emu10k1 driver: snd_emux_free() -> snd_emux_detach_seq(): mutex_lock(&emu->register_mutex) -> snd_seq_delete_kernel_client() -> snd_seq_free_client(): mutex_lock(®ister_mutex) snd_seq_release() -> snd_seq_free_client(): mutex_lock(®ister_mutex) -> snd_seq_delete_all_ports() -> snd_emux_unuse(): mutex_lock(&emu->register_mutex) Basically snd_emux_detach_seq() doesn't need a protection of emu->register_mutex as it's already being unregistered. So, we can get rid of this for avoiding the deadlock. Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/synth/emux/emux_seq.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/sound/synth/emux/emux_seq.c b/sound/synth/emux/emux_seq.c index 7778b8e19782..188fda0effb0 100644 --- a/sound/synth/emux/emux_seq.c +++ b/sound/synth/emux/emux_seq.c @@ -124,12 +124,10 @@ snd_emux_detach_seq(struct snd_emux *emu) if (emu->voices) snd_emux_terminate_all(emu); - mutex_lock(&emu->register_mutex); if (emu->client >= 0) { snd_seq_delete_kernel_client(emu->client); emu->client = -1; } - mutex_unlock(&emu->register_mutex); } From c542e91572c54d2127ab3da723a922893fef1fb6 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 28 Apr 2015 17:11:44 +0200 Subject: [PATCH 239/277] ALSA: emux: Fix mutex deadlock in OSS emulation commit 1c94e65c668f44d2c69ae7e7fc268ab3268fba3e upstream. The OSS emulation in synth-emux helper has a potential AB/BA deadlock at the simultaneous closing and opening: close -> snd_seq_release() -> sne_seq_free_client() -> snd_seq_delete_all_ports(): takes client->ports_mutex -> port_delete() -> snd_emux_unuse(): takes emux->register_mutex open -> snd_seq_oss_open() -> snd_emux_open_seq_oss(): takes emux->register_mutex -> snd_seq_event_port_attach() -> snd_seq_create_port(): takes client->ports_mutex This patch addresses the deadlock by reducing the rance taking emux->register_mutex in snd_emux_open_seq_oss(). The lock is needed for the refcount handling, so move it locally. The calls in emux_seq.c are already with the mutex, thus they are replaced with the version without mutex lock/unlock. Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/synth/emux/emux_oss.c | 11 +---------- sound/synth/emux/emux_seq.c | 27 +++++++++++++++++++++------ 2 files changed, 22 insertions(+), 16 deletions(-) diff --git a/sound/synth/emux/emux_oss.c b/sound/synth/emux/emux_oss.c index 319754cf6208..daf61abc3670 100644 --- a/sound/synth/emux/emux_oss.c +++ b/sound/synth/emux/emux_oss.c @@ -118,12 +118,8 @@ snd_emux_open_seq_oss(struct snd_seq_oss_arg *arg, void *closure) if (snd_BUG_ON(!arg || !emu)) return -ENXIO; - mutex_lock(&emu->register_mutex); - - if (!snd_emux_inc_count(emu)) { - mutex_unlock(&emu->register_mutex); + if (!snd_emux_inc_count(emu)) return -EFAULT; - } memset(&callback, 0, sizeof(callback)); callback.owner = THIS_MODULE; @@ -135,7 +131,6 @@ snd_emux_open_seq_oss(struct snd_seq_oss_arg *arg, void *closure) if (p == NULL) { snd_printk(KERN_ERR "can't create port\n"); snd_emux_dec_count(emu); - mutex_unlock(&emu->register_mutex); return -ENOMEM; } @@ -148,8 +143,6 @@ snd_emux_open_seq_oss(struct snd_seq_oss_arg *arg, void *closure) reset_port_mode(p, arg->seq_mode); snd_emux_reset_port(p); - - mutex_unlock(&emu->register_mutex); return 0; } @@ -195,13 +188,11 @@ snd_emux_close_seq_oss(struct snd_seq_oss_arg *arg) if (snd_BUG_ON(!emu)) return -ENXIO; - mutex_lock(&emu->register_mutex); snd_emux_sounds_off_all(p); snd_soundfont_close_check(emu->sflist, SF_CLIENT_NO(p->chset.port)); snd_seq_event_port_detach(p->chset.client, p->chset.port); snd_emux_dec_count(emu); - mutex_unlock(&emu->register_mutex); return 0; } diff --git a/sound/synth/emux/emux_seq.c b/sound/synth/emux/emux_seq.c index 188fda0effb0..a0209204ae48 100644 --- a/sound/synth/emux/emux_seq.c +++ b/sound/synth/emux/emux_seq.c @@ -267,8 +267,8 @@ snd_emux_event_input(struct snd_seq_event *ev, int direct, void *private_data, /* * increment usage count */ -int -snd_emux_inc_count(struct snd_emux *emu) +static int +__snd_emux_inc_count(struct snd_emux *emu) { emu->used++; if (!try_module_get(emu->ops.owner)) @@ -282,12 +282,21 @@ snd_emux_inc_count(struct snd_emux *emu) return 1; } +int snd_emux_inc_count(struct snd_emux *emu) +{ + int ret; + + mutex_lock(&emu->register_mutex); + ret = __snd_emux_inc_count(emu); + mutex_unlock(&emu->register_mutex); + return ret; +} /* * decrease usage count */ -void -snd_emux_dec_count(struct snd_emux *emu) +static void +__snd_emux_dec_count(struct snd_emux *emu) { module_put(emu->card->module); emu->used--; @@ -296,6 +305,12 @@ snd_emux_dec_count(struct snd_emux *emu) module_put(emu->ops.owner); } +void snd_emux_dec_count(struct snd_emux *emu) +{ + mutex_lock(&emu->register_mutex); + __snd_emux_dec_count(emu); + mutex_unlock(&emu->register_mutex); +} /* * Routine that is called upon a first use of a particular port @@ -315,7 +330,7 @@ snd_emux_use(void *private_data, struct snd_seq_port_subscribe *info) mutex_lock(&emu->register_mutex); snd_emux_init_port(p); - snd_emux_inc_count(emu); + __snd_emux_inc_count(emu); mutex_unlock(&emu->register_mutex); return 0; } @@ -338,7 +353,7 @@ snd_emux_unuse(void *private_data, struct snd_seq_port_subscribe *info) mutex_lock(&emu->register_mutex); snd_emux_sounds_off_all(p); - snd_emux_dec_count(emu); + __snd_emux_dec_count(emu); mutex_unlock(&emu->register_mutex); return 0; } From 0da12e28a60087aaaa3fb4a11c12795b228bdad1 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Mon, 27 Apr 2015 13:00:09 +0200 Subject: [PATCH 240/277] ALSA: emu10k1: Fix card shortname string buffer overflow commit d02260824e2cad626fb2a9d62e27006d34b6dedc upstream. Some models provide too long string for the shortname that has 32bytes including the terminator, and it results in a non-terminated string exposed to the user-space. This isn't too critical, though, as the string is stopped at the succeeding longname string. This patch fixes such entries by dropping "SB" prefix (it's enough to fit within 32 bytes, so far). Meanwhile, it also changes strcpy() with strlcpy() to make sure that this kind of problem won't happen in future, too. Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/emu10k1/emu10k1.c | 6 ++++-- sound/pci/emu10k1/emu10k1_main.c | 4 ++-- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/sound/pci/emu10k1/emu10k1.c b/sound/pci/emu10k1/emu10k1.c index 8c5010f7889c..70e6fe186d34 100644 --- a/sound/pci/emu10k1/emu10k1.c +++ b/sound/pci/emu10k1/emu10k1.c @@ -181,8 +181,10 @@ static int snd_card_emu10k1_probe(struct pci_dev *pci, } #endif - strcpy(card->driver, emu->card_capabilities->driver); - strcpy(card->shortname, emu->card_capabilities->name); + strlcpy(card->driver, emu->card_capabilities->driver, + sizeof(card->driver)); + strlcpy(card->shortname, emu->card_capabilities->name, + sizeof(card->shortname)); snprintf(card->longname, sizeof(card->longname), "%s (rev.%d, serial:0x%x) at 0x%lx, irq %i", card->shortname, emu->revision, emu->serial, emu->port, emu->irq); diff --git a/sound/pci/emu10k1/emu10k1_main.c b/sound/pci/emu10k1/emu10k1_main.c index bdd888ec9a84..134b7cf95ad4 100644 --- a/sound/pci/emu10k1/emu10k1_main.c +++ b/sound/pci/emu10k1/emu10k1_main.c @@ -1411,7 +1411,7 @@ static struct snd_emu_chip_details emu_chip_details[] = { * */ {.vendor = 0x1102, .device = 0x0008, .subsystem = 0x20011102, - .driver = "Audigy2", .name = "SB Audigy 2 ZS Notebook [SB0530]", + .driver = "Audigy2", .name = "Audigy 2 ZS Notebook [SB0530]", .id = "Audigy2", .emu10k2_chip = 1, .ca0108_chip = 1, @@ -1561,7 +1561,7 @@ static struct snd_emu_chip_details emu_chip_details[] = { .adc_1361t = 1, /* 24 bit capture instead of 16bit */ .ac97_chip = 1} , {.vendor = 0x1102, .device = 0x0004, .subsystem = 0x10051102, - .driver = "Audigy2", .name = "SB Audigy 2 Platinum EX [SB0280]", + .driver = "Audigy2", .name = "Audigy 2 Platinum EX [SB0280]", .id = "Audigy2", .emu10k2_chip = 1, .ca0102_chip = 1, From b68ed131b3e4f3cdb740ea70fe216ca8789419d7 Mon Sep 17 00:00:00 2001 From: Peter Zubaj Date: Tue, 28 Apr 2015 21:57:29 +0200 Subject: [PATCH 241/277] ALSA: emu10k1: Emu10k2 32 bit DMA mode commit 7241ea558c6715501e777396b5fc312c372e11d9 upstream. Looks like audigy emu10k2 (probably emu10k1 - sb live too) support two modes for DMA. Second mode is useful for 64 bit os with more then 2 GB of ram (fixes problems with big soundfont loading) 1) 32MB from 2 GB address space using 8192 pages (used now as default) 2) 16MB from 4 GB address space using 4096 pages Mode is set using HCFG_EXPANDED_MEM flag in HCFG register. Also format of emu10k2 page table is then different. Signed-off-by: Peter Zubaj Tested-by: Takashi Iwai Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- include/sound/emu10k1.h | 14 +++++++++----- sound/pci/emu10k1/emu10k1_callback.c | 4 ++-- sound/pci/emu10k1/emu10k1_main.c | 17 ++++++++++++----- sound/pci/emu10k1/emupcm.c | 2 +- sound/pci/emu10k1/memory.c | 11 ++++++----- 5 files changed, 30 insertions(+), 18 deletions(-) diff --git a/include/sound/emu10k1.h b/include/sound/emu10k1.h index dfb42ca6d043..8898cdeb42a4 100644 --- a/include/sound/emu10k1.h +++ b/include/sound/emu10k1.h @@ -41,7 +41,8 @@ #define EMUPAGESIZE 4096 #define MAXREQVOICES 8 -#define MAXPAGES 8192 +#define MAXPAGES0 4096 /* 32 bit mode */ +#define MAXPAGES1 8192 /* 31 bit mode */ #define RESERVED 0 #define NUM_MIDI 16 #define NUM_G 64 /* use all channels */ @@ -50,8 +51,7 @@ /* FIXME? - according to the OSS driver the EMU10K1 needs a 29 bit DMA mask */ #define EMU10K1_DMA_MASK 0x7fffffffUL /* 31bit */ -#define AUDIGY_DMA_MASK 0x7fffffffUL /* 31bit FIXME - 32 should work? */ - /* See ALSA bug #1276 - rlrevell */ +#define AUDIGY_DMA_MASK 0xffffffffUL /* 32bit mode */ #define TMEMSIZE 256*1024 #define TMEMSIZEREG 4 @@ -468,8 +468,11 @@ #define MAPB 0x0d /* Cache map B */ -#define MAP_PTE_MASK 0xffffe000 /* The 19 MSBs of the PTE indexed by the PTI */ -#define MAP_PTI_MASK 0x00001fff /* The 13 bit index to one of the 8192 PTE dwords */ +#define MAP_PTE_MASK0 0xfffff000 /* The 20 MSBs of the PTE indexed by the PTI */ +#define MAP_PTI_MASK0 0x00000fff /* The 12 bit index to one of the 4096 PTE dwords */ + +#define MAP_PTE_MASK1 0xffffe000 /* The 19 MSBs of the PTE indexed by the PTI */ +#define MAP_PTI_MASK1 0x00001fff /* The 13 bit index to one of the 8192 PTE dwords */ /* 0x0e, 0x0f: Not used */ @@ -1706,6 +1709,7 @@ struct snd_emu10k1 { unsigned short model; /* subsystem id */ unsigned int card_type; /* EMU10K1_CARD_* */ unsigned int ecard_ctrl; /* ecard control bits */ + unsigned int address_mode; /* address mode */ unsigned long dma_mask; /* PCI DMA mask */ unsigned int delay_pcm_irq; /* in samples */ int max_cache_pages; /* max memory size / PAGE_SIZE */ diff --git a/sound/pci/emu10k1/emu10k1_callback.c b/sound/pci/emu10k1/emu10k1_callback.c index 0a34b5f1c475..f8a6549f00e5 100644 --- a/sound/pci/emu10k1/emu10k1_callback.c +++ b/sound/pci/emu10k1/emu10k1_callback.c @@ -415,7 +415,7 @@ start_voice(struct snd_emux_voice *vp) snd_emu10k1_ptr_write(hw, Z2, ch, 0); /* invalidate maps */ - temp = (hw->silent_page.addr << 1) | MAP_PTI_MASK; + temp = (hw->silent_page.addr << hw->address_mode) | (hw->address_mode ? MAP_PTI_MASK1 : MAP_PTI_MASK0); snd_emu10k1_ptr_write(hw, MAPA, ch, temp); snd_emu10k1_ptr_write(hw, MAPB, ch, temp); #if 0 @@ -436,7 +436,7 @@ start_voice(struct snd_emux_voice *vp) snd_emu10k1_ptr_write(hw, CDF, ch, sample); /* invalidate maps */ - temp = ((unsigned int)hw->silent_page.addr << 1) | MAP_PTI_MASK; + temp = ((unsigned int)hw->silent_page.addr << hw_address_mode) | (hw->address_mode ? MAP_PTI_MASK1 : MAP_PTI_MASK0); snd_emu10k1_ptr_write(hw, MAPA, ch, temp); snd_emu10k1_ptr_write(hw, MAPB, ch, temp); diff --git a/sound/pci/emu10k1/emu10k1_main.c b/sound/pci/emu10k1/emu10k1_main.c index 134b7cf95ad4..a131092572e6 100644 --- a/sound/pci/emu10k1/emu10k1_main.c +++ b/sound/pci/emu10k1/emu10k1_main.c @@ -282,7 +282,7 @@ static int snd_emu10k1_init(struct snd_emu10k1 *emu, int enable_ir, int resume) snd_emu10k1_ptr_write(emu, TCB, 0, 0); /* taken from original driver */ snd_emu10k1_ptr_write(emu, TCBS, 0, 4); /* taken from original driver */ - silent_page = (emu->silent_page.addr << 1) | MAP_PTI_MASK; + silent_page = (emu->silent_page.addr << emu->address_mode) | (emu->address_mode ? MAP_PTI_MASK1 : MAP_PTI_MASK0); for (ch = 0; ch < NUM_G; ch++) { snd_emu10k1_ptr_write(emu, MAPA, ch, silent_page); snd_emu10k1_ptr_write(emu, MAPB, ch, silent_page); @@ -348,6 +348,11 @@ static int snd_emu10k1_init(struct snd_emu10k1 *emu, int enable_ir, int resume) outl(reg | A_IOCFG_GPOUT0, emu->port + A_IOCFG); } + if (emu->address_mode == 0) { + /* use 16M in 4G */ + outl(inl(emu->port + HCFG) | HCFG_EXPANDED_MEM, emu->port + HCFG); + } + return 0; } @@ -1865,8 +1870,10 @@ int snd_emu10k1_create(struct snd_card *card, is_audigy = emu->audigy = c->emu10k2_chip; + /* set addressing mode */ + emu->address_mode = is_audigy ? 0 : 1; /* set the DMA transfer mask */ - emu->dma_mask = is_audigy ? AUDIGY_DMA_MASK : EMU10K1_DMA_MASK; + emu->dma_mask = emu->address_mode ? EMU10K1_DMA_MASK : AUDIGY_DMA_MASK; if (pci_set_dma_mask(pci, emu->dma_mask) < 0 || pci_set_consistent_dma_mask(pci, emu->dma_mask) < 0) { snd_printk(KERN_ERR "architecture does not support PCI busmaster DMA with mask 0x%lx\n", emu->dma_mask); @@ -1889,7 +1896,7 @@ int snd_emu10k1_create(struct snd_card *card, emu->max_cache_pages = max_cache_bytes >> PAGE_SHIFT; if (snd_dma_alloc_pages(SNDRV_DMA_TYPE_DEV, snd_dma_pci_data(pci), - 32 * 1024, &emu->ptb_pages) < 0) { + (emu->address_mode ? 32 : 16) * 1024, &emu->ptb_pages) < 0) { err = -ENOMEM; goto error; } @@ -1988,8 +1995,8 @@ int snd_emu10k1_create(struct snd_card *card, /* Clear silent pages and set up pointers */ memset(emu->silent_page.area, 0, PAGE_SIZE); - silent_page = emu->silent_page.addr << 1; - for (idx = 0; idx < MAXPAGES; idx++) + silent_page = emu->silent_page.addr << emu->address_mode; + for (idx = 0; idx < (emu->address_mode ? MAXPAGES1 : MAXPAGES0); idx++) ((u32 *)emu->ptb_pages.area)[idx] = cpu_to_le32(silent_page | idx); /* set up voice indices */ diff --git a/sound/pci/emu10k1/emupcm.c b/sound/pci/emu10k1/emupcm.c index 5ae1d045bdcb..7581019d7c84 100644 --- a/sound/pci/emu10k1/emupcm.c +++ b/sound/pci/emu10k1/emupcm.c @@ -379,7 +379,7 @@ static void snd_emu10k1_pcm_init_voice(struct snd_emu10k1 *emu, snd_emu10k1_ptr_write(emu, Z1, voice, 0); snd_emu10k1_ptr_write(emu, Z2, voice, 0); /* invalidate maps */ - silent_page = ((unsigned int)emu->silent_page.addr << 1) | MAP_PTI_MASK; + silent_page = ((unsigned int)emu->silent_page.addr << emu->address_mode) | (emu->address_mode ? MAP_PTI_MASK1 : MAP_PTI_MASK0); snd_emu10k1_ptr_write(emu, MAPA, voice, silent_page); snd_emu10k1_ptr_write(emu, MAPB, voice, silent_page); /* modulation envelope */ diff --git a/sound/pci/emu10k1/memory.c b/sound/pci/emu10k1/memory.c index ae709c1ab3a8..d514458efe3d 100644 --- a/sound/pci/emu10k1/memory.c +++ b/sound/pci/emu10k1/memory.c @@ -34,10 +34,11 @@ * aligned pages in others */ #define __set_ptb_entry(emu,page,addr) \ - (((u32 *)(emu)->ptb_pages.area)[page] = cpu_to_le32(((addr) << 1) | (page))) + (((u32 *)(emu)->ptb_pages.area)[page] = cpu_to_le32(((addr) << (emu->address_mode)) | (page))) #define UNIT_PAGES (PAGE_SIZE / EMUPAGESIZE) -#define MAX_ALIGN_PAGES (MAXPAGES / UNIT_PAGES) +#define MAX_ALIGN_PAGES0 (MAXPAGES0 / UNIT_PAGES) +#define MAX_ALIGN_PAGES1 (MAXPAGES1 / UNIT_PAGES) /* get aligned page from offset address */ #define get_aligned_page(offset) ((offset) >> PAGE_SHIFT) /* get offset address from aligned page */ @@ -124,7 +125,7 @@ static int search_empty_map_area(struct snd_emu10k1 *emu, int npages, struct lis } page = blk->mapped_page + blk->pages; } - size = MAX_ALIGN_PAGES - page; + size = (emu->address_mode ? MAX_ALIGN_PAGES1 : MAX_ALIGN_PAGES0) - page; if (size >= max_size) { *nextp = pos; return page; @@ -181,7 +182,7 @@ static int unmap_memblk(struct snd_emu10k1 *emu, struct snd_emu10k1_memblk *blk) q = get_emu10k1_memblk(p, mapped_link); end_page = q->mapped_page; } else - end_page = MAX_ALIGN_PAGES; + end_page = (emu->address_mode ? MAX_ALIGN_PAGES1 : MAX_ALIGN_PAGES0); /* remove links */ list_del(&blk->mapped_link); @@ -305,7 +306,7 @@ snd_emu10k1_alloc_pages(struct snd_emu10k1 *emu, struct snd_pcm_substream *subst if (snd_BUG_ON(!emu)) return NULL; if (snd_BUG_ON(runtime->dma_bytes <= 0 || - runtime->dma_bytes >= MAXPAGES * EMUPAGESIZE)) + runtime->dma_bytes >= (emu->address_mode ? MAXPAGES1 : MAXPAGES0) * EMUPAGESIZE)) return NULL; hdr = emu->memhdr; if (snd_BUG_ON(!hdr)) From 4453c82b204f8ba4d3c107f04bd4f83c3132bde6 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Mon, 27 Apr 2015 10:36:11 +0200 Subject: [PATCH 242/277] ALSA: hda - Fix mute-LED fixed mode MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit ee52e56e7b12834476cd0031c5986254ba1b6317 upstream. The mute-LED mode control has the fixed on/off states that are supposed to remain on/off regardless of the master switch. However, this doesn't work actually because the vmaster hook is called in the vmaster code itself. This patch fixes it by calling the hook indirectly after checking the mute LED mode. Reported-and-tested-by: Pali Rohár Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/hda_codec.c | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/sound/pci/hda/hda_codec.c b/sound/pci/hda/hda_codec.c index 83a0f9b4452b..68261a778ee5 100644 --- a/sound/pci/hda/hda_codec.c +++ b/sound/pci/hda/hda_codec.c @@ -2078,6 +2078,16 @@ int snd_hda_codec_amp_init_stereo(struct hda_codec *codec, hda_nid_t nid, } EXPORT_SYMBOL_HDA(snd_hda_codec_amp_init_stereo); +/* meta hook to call each driver's vmaster hook */ +static void vmaster_hook(void *private_data, int enabled) +{ + struct hda_vmaster_mute_hook *hook = private_data; + + if (hook->mute_mode != HDA_VMUTE_FOLLOW_MASTER) + enabled = hook->mute_mode; + hook->hook(hook->codec, enabled); +} + /** * snd_hda_codec_resume_amp - Resume all AMP commands from the cache * @codec: HD-audio codec @@ -2772,9 +2782,9 @@ int snd_hda_add_vmaster_hook(struct hda_codec *codec, if (!hook->hook || !hook->sw_kctl) return 0; - snd_ctl_add_vmaster_hook(hook->sw_kctl, hook->hook, codec); hook->codec = codec; hook->mute_mode = HDA_VMUTE_FOLLOW_MASTER; + snd_ctl_add_vmaster_hook(hook->sw_kctl, vmaster_hook, hook); if (!expose_enum_ctl) return 0; kctl = snd_ctl_new1(&vmaster_mute_mode, hook); @@ -2797,14 +2807,7 @@ void snd_hda_sync_vmaster_hook(struct hda_vmaster_mute_hook *hook) */ if (hook->codec->bus->shutdown) return; - switch (hook->mute_mode) { - case HDA_VMUTE_FOLLOW_MASTER: - snd_ctl_sync_vmaster_hook(hook->sw_kctl); - break; - default: - hook->hook(hook->codec, hook->mute_mode); - break; - } + snd_ctl_sync_vmaster_hook(hook->sw_kctl); } EXPORT_SYMBOL_HDA(snd_hda_sync_vmaster_hook); From 31608b7e75bf65e0ca6a50e1d27c5583a223e283 Mon Sep 17 00:00:00 2001 From: Michal Simek Date: Tue, 14 Apr 2015 12:03:09 +0200 Subject: [PATCH 243/277] serial: of-serial: Remove device_type = "serial" registration commit 6befa9d883385c580369a2cc9e53fbf329771f6d upstream. Do not probe all serial drivers by of_serial.c which are using device_type = "serial"; property. Only drivers which have valid compatible strings listed in the driver should be probed. When PORT_UNKNOWN is setup probe will fail anyway. Arnd quotation about driver historical background: "when I wrote that driver initially, the idea was that it would get used as a stub to hook up all other serial drivers but after that, the common code learned to create platform devices from DT" This patch fix the problem with on the system with xilinx_uartps and 16550a where of_serial failed to register for xilinx_uartps and because of irq_dispose_mapping() removed irq_desc. Then when xilinx_uartps was asking for irq with request_irq() EINVAL is returned. Signed-off-by: Michal Simek Acked-by: Arnd Bergmann Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/of_serial.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/tty/serial/of_serial.c b/drivers/tty/serial/of_serial.c index 39c7ea4cb14f..2225f83f4c04 100644 --- a/drivers/tty/serial/of_serial.c +++ b/drivers/tty/serial/of_serial.c @@ -262,7 +262,6 @@ static struct of_device_id of_platform_serial_table[] = { { .compatible = "ibm,qpace-nwp-serial", .data = (void *)PORT_NWPSERIAL, }, #endif - { .type = "serial", .data = (void *)PORT_UNKNOWN, }, { /* end of list */ }, }; From be13cfbdf094e1303333e4adf41bd7cd749db4a0 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Sat, 25 Apr 2015 15:56:15 +0300 Subject: [PATCH 244/277] rbd: end I/O the entire obj_request on error commit 082a75dad84d79d1c15ea9e50f31cb4bb4fa7fd6 upstream. When we end I/O struct request with error, we need to pass obj_request->length as @nr_bytes so that the entire obj_request worth of bytes is completed. Otherwise block layer ends up confused and we trip on rbd_assert(more ^ (which == img_request->obj_request_count)); in rbd_img_obj_callback() due to more being true no matter what. We already do it in most cases but we are missing some, in particular those where we don't even get a chance to submit any obj_requests, due to an early -ENOMEM for example. A number of obj_request->xferred assignments seem to be redundant but I haven't touched any of obj_request->xferred stuff to keep this small and isolated. Cc: Alex Elder Reported-by: Shawn Edwards Reviewed-by: Sage Weil Signed-off-by: Ilya Dryomov Signed-off-by: Greg Kroah-Hartman --- drivers/block/rbd.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index 7e3f45105f11..dd297099c99d 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c @@ -2115,6 +2115,11 @@ static bool rbd_img_obj_end_request(struct rbd_obj_request *obj_request) result, xferred); if (!img_request->result) img_request->result = result; + /* + * Need to end I/O on the entire obj_request worth of + * bytes in case of error. + */ + xferred = obj_request->length; } /* Image object requests don't own their page array */ From 5a7d1e16a425786d8f6f5d4f707bb4dc879499d1 Mon Sep 17 00:00:00 2001 From: Lukas Czerner Date: Sat, 2 May 2015 21:36:55 -0400 Subject: [PATCH 245/277] ext4: fix data corruption caused by unwritten and delayed extents commit d2dc317d564a46dfc683978a2e5a4f91434e9711 upstream. Currently it is possible to lose whole file system block worth of data when we hit the specific interaction with unwritten and delayed extents in status extent tree. The problem is that when we insert delayed extent into extent status tree the only way to get rid of it is when we write out delayed buffer. However there is a limitation in the extent status tree implementation so that when inserting unwritten extent should there be even a single delayed block the whole unwritten extent would be marked as delayed. At this point, there is no way to get rid of the delayed extents, because there are no delayed buffers to write out. So when a we write into said unwritten extent we will convert it to written, but it still remains delayed. When we try to write into that block later ext4_da_map_blocks() will set the buffer new and delayed and map it to invalid block which causes the rest of the block to be zeroed loosing already written data. For now we can fix this by simply not allowing to set delayed status on written extent in the extent status tree. Also add WARN_ON() to make sure that we notice if this happens in the future. This problem can be easily reproduced by running the following xfs_io. xfs_io -f -c "pwrite -S 0xaa 4096 2048" \ -c "falloc 0 131072" \ -c "pwrite -S 0xbb 65536 2048" \ -c "fsync" /mnt/test/fff echo 3 > /proc/sys/vm/drop_caches xfs_io -c "pwrite -S 0xdd 67584 2048" /mnt/test/fff This can be theoretically also reproduced by at random by running fsx, but it's not very reliable, though on machines with bigger page size (like ppc) this can be seen more often (especially xfstest generic/127) Signed-off-by: Lukas Czerner Signed-off-by: Theodore Ts'o Signed-off-by: Greg Kroah-Hartman --- fs/ext4/extents_status.c | 8 ++++++++ fs/ext4/inode.c | 2 ++ 2 files changed, 10 insertions(+) diff --git a/fs/ext4/extents_status.c b/fs/ext4/extents_status.c index e6941e622d31..1fefeb7d14db 100644 --- a/fs/ext4/extents_status.c +++ b/fs/ext4/extents_status.c @@ -655,6 +655,14 @@ int ext4_es_insert_extent(struct inode *inode, ext4_lblk_t lblk, BUG_ON(end < lblk); + if ((status & EXTENT_STATUS_DELAYED) && + (status & EXTENT_STATUS_WRITTEN)) { + ext4_warning(inode->i_sb, "Inserting extent [%u/%u] as " + " delayed and written which can potentially " + " cause data loss.\n", lblk, len); + WARN_ON(1); + } + newes.es_lblk = lblk; newes.es_len = len; ext4_es_store_pblock(&newes, pblk); diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index e48bd5a1814b..9e3d8dd6c40a 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -626,6 +626,7 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode, status = map->m_flags & EXT4_MAP_UNWRITTEN ? EXTENT_STATUS_UNWRITTEN : EXTENT_STATUS_WRITTEN; if (!(flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) && + !(status & EXTENT_STATUS_WRITTEN) && ext4_find_delalloc_range(inode, map->m_lblk, map->m_lblk + map->m_len - 1)) status |= EXTENT_STATUS_DELAYED; @@ -736,6 +737,7 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode, status = map->m_flags & EXT4_MAP_UNWRITTEN ? EXTENT_STATUS_UNWRITTEN : EXTENT_STATUS_WRITTEN; if (!(flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) && + !(status & EXTENT_STATUS_WRITTEN) && ext4_find_delalloc_range(inode, map->m_lblk, map->m_lblk + map->m_len - 1)) status |= EXTENT_STATUS_DELAYED; From 1dff8de4ceeab5ea88372ec10966faa8e5817d51 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 23 Apr 2015 09:48:50 +0200 Subject: [PATCH 246/277] 3w-xxxx: fix command completion race commit 9cd9554615cba14f0877cc9972a6537ad2bdde61 upstream. The 3w-xxxx driver needs to tear down the dma mappings before returning the command to the midlayer, as there is no guarantee the sglist and count are valid after that point. Also remove the dma mapping helpers which have another inherent race due to the request_id index. Signed-off-by: Christoph Hellwig Acked-by: Adam Radford Signed-off-by: James Bottomley Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/3w-xxxx.c | 42 ++++++------------------------------------ drivers/scsi/3w-xxxx.h | 5 ----- 2 files changed, 6 insertions(+), 41 deletions(-) diff --git a/drivers/scsi/3w-xxxx.c b/drivers/scsi/3w-xxxx.c index 430ee3774c3b..8843ad783b41 100644 --- a/drivers/scsi/3w-xxxx.c +++ b/drivers/scsi/3w-xxxx.c @@ -1283,32 +1283,6 @@ static int tw_initialize_device_extension(TW_Device_Extension *tw_dev) return 0; } /* End tw_initialize_device_extension() */ -static int tw_map_scsi_sg_data(struct pci_dev *pdev, struct scsi_cmnd *cmd) -{ - int use_sg; - - dprintk(KERN_WARNING "3w-xxxx: tw_map_scsi_sg_data()\n"); - - use_sg = scsi_dma_map(cmd); - if (use_sg < 0) { - printk(KERN_WARNING "3w-xxxx: tw_map_scsi_sg_data(): pci_map_sg() failed.\n"); - return 0; - } - - cmd->SCp.phase = TW_PHASE_SGLIST; - cmd->SCp.have_data_in = use_sg; - - return use_sg; -} /* End tw_map_scsi_sg_data() */ - -static void tw_unmap_scsi_data(struct pci_dev *pdev, struct scsi_cmnd *cmd) -{ - dprintk(KERN_WARNING "3w-xxxx: tw_unmap_scsi_data()\n"); - - if (cmd->SCp.phase == TW_PHASE_SGLIST) - scsi_dma_unmap(cmd); -} /* End tw_unmap_scsi_data() */ - /* This function will reset a device extension */ static int tw_reset_device_extension(TW_Device_Extension *tw_dev) { @@ -1331,8 +1305,8 @@ static int tw_reset_device_extension(TW_Device_Extension *tw_dev) srb = tw_dev->srb[i]; if (srb != NULL) { srb->result = (DID_RESET << 16); - tw_dev->srb[i]->scsi_done(tw_dev->srb[i]); - tw_unmap_scsi_data(tw_dev->tw_pci_dev, tw_dev->srb[i]); + scsi_dma_unmap(srb); + srb->scsi_done(srb); } } } @@ -1779,8 +1753,8 @@ static int tw_scsiop_read_write(TW_Device_Extension *tw_dev, int request_id) command_packet->byte8.io.lba = lba; command_packet->byte6.block_count = num_sectors; - use_sg = tw_map_scsi_sg_data(tw_dev->tw_pci_dev, tw_dev->srb[request_id]); - if (!use_sg) + use_sg = scsi_dma_map(srb); + if (use_sg <= 0) return 1; scsi_for_each_sg(tw_dev->srb[request_id], sg, use_sg, i) { @@ -1967,9 +1941,6 @@ static int tw_scsi_queue_lck(struct scsi_cmnd *SCpnt, void (*done)(struct scsi_c /* Save the scsi command for use by the ISR */ tw_dev->srb[request_id] = SCpnt; - /* Initialize phase to zero */ - SCpnt->SCp.phase = TW_PHASE_INITIAL; - switch (*command) { case READ_10: case READ_6: @@ -2196,12 +2167,11 @@ static irqreturn_t tw_interrupt(int irq, void *dev_instance) /* Now complete the io */ if ((error != TW_ISR_DONT_COMPLETE)) { + scsi_dma_unmap(tw_dev->srb[request_id]); + tw_dev->srb[request_id]->scsi_done(tw_dev->srb[request_id]); tw_dev->state[request_id] = TW_S_COMPLETED; tw_state_request_finish(tw_dev, request_id); tw_dev->posted_request_count--; - tw_dev->srb[request_id]->scsi_done(tw_dev->srb[request_id]); - - tw_unmap_scsi_data(tw_dev->tw_pci_dev, tw_dev->srb[request_id]); } } diff --git a/drivers/scsi/3w-xxxx.h b/drivers/scsi/3w-xxxx.h index 49dcf03c631a..1d31858766ce 100644 --- a/drivers/scsi/3w-xxxx.h +++ b/drivers/scsi/3w-xxxx.h @@ -195,11 +195,6 @@ static unsigned char tw_sense_table[][4] = #define TW_AEN_SMART_FAIL 0x000F #define TW_AEN_SBUF_FAIL 0x0024 -/* Phase defines */ -#define TW_PHASE_INITIAL 0 -#define TW_PHASE_SINGLE 1 -#define TW_PHASE_SGLIST 2 - /* Misc defines */ #define TW_ALIGNMENT_6000 64 /* 64 bytes */ #define TW_ALIGNMENT_7000 4 /* 4 bytes */ From ea5f56ae530677ac4a671c6aff9f0c270b1b12ad Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 23 Apr 2015 09:48:51 +0200 Subject: [PATCH 247/277] 3w-9xxx: fix command completion race commit 118c855b5623f3e2e6204f02623d88c09e0c34de upstream. The 3w-9xxx driver needs to tear down the dma mappings before returning the command to the midlayer, as there is no guarantee the sglist and count are valid after that point. Also remove the dma mapping helpers which have another inherent race due to the request_id index. Signed-off-by: Christoph Hellwig Acked-by: Adam Radford Signed-off-by: James Bottomley Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/3w-9xxx.c | 57 ++++++++++-------------------------------- drivers/scsi/3w-9xxx.h | 5 ---- 2 files changed, 13 insertions(+), 49 deletions(-) diff --git a/drivers/scsi/3w-9xxx.c b/drivers/scsi/3w-9xxx.c index 0a7325361d29..5f57e3d35e26 100644 --- a/drivers/scsi/3w-9xxx.c +++ b/drivers/scsi/3w-9xxx.c @@ -149,7 +149,6 @@ static int twa_reset_sequence(TW_Device_Extension *tw_dev, int soft_reset); static int twa_scsiop_execute_scsi(TW_Device_Extension *tw_dev, int request_id, char *cdb, int use_sg, TW_SG_Entry *sglistarg); static void twa_scsiop_execute_scsi_complete(TW_Device_Extension *tw_dev, int request_id); static char *twa_string_lookup(twa_message_type *table, unsigned int aen_code); -static void twa_unmap_scsi_data(TW_Device_Extension *tw_dev, int request_id); /* Functions */ @@ -1352,11 +1351,11 @@ static irqreturn_t twa_interrupt(int irq, void *dev_instance) } /* Now complete the io */ + scsi_dma_unmap(cmd); + cmd->scsi_done(cmd); tw_dev->state[request_id] = TW_S_COMPLETED; twa_free_request_id(tw_dev, request_id); tw_dev->posted_request_count--; - tw_dev->srb[request_id]->scsi_done(tw_dev->srb[request_id]); - twa_unmap_scsi_data(tw_dev, request_id); } /* Check for valid status after each drain */ @@ -1414,26 +1413,6 @@ static void twa_load_sgl(TW_Device_Extension *tw_dev, TW_Command_Full *full_comm } } /* End twa_load_sgl() */ -/* This function will perform a pci-dma mapping for a scatter gather list */ -static int twa_map_scsi_sg_data(TW_Device_Extension *tw_dev, int request_id) -{ - int use_sg; - struct scsi_cmnd *cmd = tw_dev->srb[request_id]; - - use_sg = scsi_dma_map(cmd); - if (!use_sg) - return 0; - else if (use_sg < 0) { - TW_PRINTK(tw_dev->host, TW_DRIVER, 0x1c, "Failed to map scatter gather list"); - return 0; - } - - cmd->SCp.phase = TW_PHASE_SGLIST; - cmd->SCp.have_data_in = use_sg; - - return use_sg; -} /* End twa_map_scsi_sg_data() */ - /* This function will poll for a response interrupt of a request */ static int twa_poll_response(TW_Device_Extension *tw_dev, int request_id, int seconds) { @@ -1612,9 +1591,11 @@ static int twa_reset_device_extension(TW_Device_Extension *tw_dev) (tw_dev->state[i] != TW_S_INITIAL) && (tw_dev->state[i] != TW_S_COMPLETED)) { if (tw_dev->srb[i]) { - tw_dev->srb[i]->result = (DID_RESET << 16); - tw_dev->srb[i]->scsi_done(tw_dev->srb[i]); - twa_unmap_scsi_data(tw_dev, i); + struct scsi_cmnd *cmd = tw_dev->srb[i]; + + cmd->result = (DID_RESET << 16); + scsi_dma_unmap(cmd); + cmd->scsi_done(cmd); } } } @@ -1793,21 +1774,18 @@ static int twa_scsi_queue_lck(struct scsi_cmnd *SCpnt, void (*done)(struct scsi_ /* Save the scsi command for use by the ISR */ tw_dev->srb[request_id] = SCpnt; - /* Initialize phase to zero */ - SCpnt->SCp.phase = TW_PHASE_INITIAL; - retval = twa_scsiop_execute_scsi(tw_dev, request_id, NULL, 0, NULL); switch (retval) { case SCSI_MLQUEUE_HOST_BUSY: + scsi_dma_unmap(SCpnt); twa_free_request_id(tw_dev, request_id); - twa_unmap_scsi_data(tw_dev, request_id); break; case 1: + SCpnt->result = (DID_ERROR << 16); + scsi_dma_unmap(SCpnt); + done(SCpnt); tw_dev->state[request_id] = TW_S_COMPLETED; twa_free_request_id(tw_dev, request_id); - twa_unmap_scsi_data(tw_dev, request_id); - SCpnt->result = (DID_ERROR << 16); - done(SCpnt); retval = 0; } out: @@ -1875,8 +1853,8 @@ static int twa_scsiop_execute_scsi(TW_Device_Extension *tw_dev, int request_id, command_packet->sg_list[0].address = TW_CPU_TO_SGL(tw_dev->generic_buffer_phys[request_id]); command_packet->sg_list[0].length = cpu_to_le32(TW_MIN_SGL_LENGTH); } else { - sg_count = twa_map_scsi_sg_data(tw_dev, request_id); - if (sg_count == 0) + sg_count = scsi_dma_map(srb); + if (sg_count < 0) goto out; scsi_for_each_sg(srb, sg, sg_count, i) { @@ -1991,15 +1969,6 @@ static char *twa_string_lookup(twa_message_type *table, unsigned int code) return(table[index].text); } /* End twa_string_lookup() */ -/* This function will perform a pci-dma unmap */ -static void twa_unmap_scsi_data(TW_Device_Extension *tw_dev, int request_id) -{ - struct scsi_cmnd *cmd = tw_dev->srb[request_id]; - - if (cmd->SCp.phase == TW_PHASE_SGLIST) - scsi_dma_unmap(cmd); -} /* End twa_unmap_scsi_data() */ - /* This function gets called when a disk is coming on-line */ static int twa_slave_configure(struct scsi_device *sdev) { diff --git a/drivers/scsi/3w-9xxx.h b/drivers/scsi/3w-9xxx.h index 040f7214e5b7..0fdc83cfa0e1 100644 --- a/drivers/scsi/3w-9xxx.h +++ b/drivers/scsi/3w-9xxx.h @@ -324,11 +324,6 @@ static twa_message_type twa_error_table[] = { #define TW_CURRENT_DRIVER_BUILD 0 #define TW_CURRENT_DRIVER_BRANCH 0 -/* Phase defines */ -#define TW_PHASE_INITIAL 0 -#define TW_PHASE_SINGLE 1 -#define TW_PHASE_SGLIST 2 - /* Misc defines */ #define TW_9550SX_DRAIN_COMPLETED 0xFFFF #define TW_SECTOR_SIZE 512 From 7332f3217887bf622efd06ef39e59dbae60d63e3 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 23 Apr 2015 09:48:49 +0200 Subject: [PATCH 248/277] 3w-sas: fix command completion race commit 579d69bc1fd56d5af5761969aa529d1d1c188300 upstream. The 3w-sas driver needs to tear down the dma mappings before returning the command to the midlayer, as there is no guarantee the sglist and count are valid after that point. Also remove the dma mapping helpers which have another inherent race due to the request_id index. Signed-off-by: Christoph Hellwig Reported-by: Torsten Luettgert Tested-by: Bernd Kardatzki Acked-by: Adam Radford Signed-off-by: James Bottomley Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/3w-sas.c | 50 +++++++++---------------------------------- drivers/scsi/3w-sas.h | 4 ---- 2 files changed, 10 insertions(+), 44 deletions(-) diff --git a/drivers/scsi/3w-sas.c b/drivers/scsi/3w-sas.c index 4de346017e9f..61702ac00d42 100644 --- a/drivers/scsi/3w-sas.c +++ b/drivers/scsi/3w-sas.c @@ -303,26 +303,6 @@ static int twl_post_command_packet(TW_Device_Extension *tw_dev, int request_id) return 0; } /* End twl_post_command_packet() */ -/* This function will perform a pci-dma mapping for a scatter gather list */ -static int twl_map_scsi_sg_data(TW_Device_Extension *tw_dev, int request_id) -{ - int use_sg; - struct scsi_cmnd *cmd = tw_dev->srb[request_id]; - - use_sg = scsi_dma_map(cmd); - if (!use_sg) - return 0; - else if (use_sg < 0) { - TW_PRINTK(tw_dev->host, TW_DRIVER, 0x1, "Failed to map scatter gather list"); - return 0; - } - - cmd->SCp.phase = TW_PHASE_SGLIST; - cmd->SCp.have_data_in = use_sg; - - return use_sg; -} /* End twl_map_scsi_sg_data() */ - /* This function hands scsi cdb's to the firmware */ static int twl_scsiop_execute_scsi(TW_Device_Extension *tw_dev, int request_id, char *cdb, int use_sg, TW_SG_Entry_ISO *sglistarg) { @@ -370,8 +350,8 @@ static int twl_scsiop_execute_scsi(TW_Device_Extension *tw_dev, int request_id, if (!sglistarg) { /* Map sglist from scsi layer to cmd packet */ if (scsi_sg_count(srb)) { - sg_count = twl_map_scsi_sg_data(tw_dev, request_id); - if (sg_count == 0) + sg_count = scsi_dma_map(srb); + if (sg_count <= 0) goto out; scsi_for_each_sg(srb, sg, sg_count, i) { @@ -1116,15 +1096,6 @@ static int twl_initialize_device_extension(TW_Device_Extension *tw_dev) return retval; } /* End twl_initialize_device_extension() */ -/* This function will perform a pci-dma unmap */ -static void twl_unmap_scsi_data(TW_Device_Extension *tw_dev, int request_id) -{ - struct scsi_cmnd *cmd = tw_dev->srb[request_id]; - - if (cmd->SCp.phase == TW_PHASE_SGLIST) - scsi_dma_unmap(cmd); -} /* End twl_unmap_scsi_data() */ - /* This function will handle attention interrupts */ static int twl_handle_attention_interrupt(TW_Device_Extension *tw_dev) { @@ -1265,11 +1236,11 @@ static irqreturn_t twl_interrupt(int irq, void *dev_instance) } /* Now complete the io */ + scsi_dma_unmap(cmd); + cmd->scsi_done(cmd); tw_dev->state[request_id] = TW_S_COMPLETED; twl_free_request_id(tw_dev, request_id); tw_dev->posted_request_count--; - tw_dev->srb[request_id]->scsi_done(tw_dev->srb[request_id]); - twl_unmap_scsi_data(tw_dev, request_id); } /* Check for another response interrupt */ @@ -1414,10 +1385,12 @@ static int twl_reset_device_extension(TW_Device_Extension *tw_dev, int ioctl_res if ((tw_dev->state[i] != TW_S_FINISHED) && (tw_dev->state[i] != TW_S_INITIAL) && (tw_dev->state[i] != TW_S_COMPLETED)) { - if (tw_dev->srb[i]) { - tw_dev->srb[i]->result = (DID_RESET << 16); - tw_dev->srb[i]->scsi_done(tw_dev->srb[i]); - twl_unmap_scsi_data(tw_dev, i); + struct scsi_cmnd *cmd = tw_dev->srb[i]; + + if (cmd) { + cmd->result = (DID_RESET << 16); + scsi_dma_unmap(cmd); + cmd->scsi_done(cmd); } } } @@ -1521,9 +1494,6 @@ static int twl_scsi_queue_lck(struct scsi_cmnd *SCpnt, void (*done)(struct scsi_ /* Save the scsi command for use by the ISR */ tw_dev->srb[request_id] = SCpnt; - /* Initialize phase to zero */ - SCpnt->SCp.phase = TW_PHASE_INITIAL; - retval = twl_scsiop_execute_scsi(tw_dev, request_id, NULL, 0, NULL); if (retval) { tw_dev->state[request_id] = TW_S_COMPLETED; diff --git a/drivers/scsi/3w-sas.h b/drivers/scsi/3w-sas.h index d474892701d4..fec6449c7595 100644 --- a/drivers/scsi/3w-sas.h +++ b/drivers/scsi/3w-sas.h @@ -103,10 +103,6 @@ static char *twl_aen_severity_table[] = #define TW_CURRENT_DRIVER_BUILD 0 #define TW_CURRENT_DRIVER_BRANCH 0 -/* Phase defines */ -#define TW_PHASE_INITIAL 0 -#define TW_PHASE_SGLIST 2 - /* Misc defines */ #define TW_SECTOR_SIZE 512 #define TW_MAX_UNITS 32 From 228dae652188ab1e5ca96a6bb1adeb6acc2c43fd Mon Sep 17 00:00:00 2001 From: Felipe Balbi Date: Fri, 13 Feb 2015 14:57:54 -0600 Subject: [PATCH 249/277] usb: host: oxu210hp: use new USB_RESUME_TIMEOUT commit 84c0d178eb9f3a3ae4d63dc97a440266cf17f7f5 upstream. Make sure we're using the new macro, so our resume signaling will always pass certification. Signed-off-by: Felipe Balbi Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/oxu210hp-hcd.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/usb/host/oxu210hp-hcd.c b/drivers/usb/host/oxu210hp-hcd.c index 0f401dbfaf07..b5c4f4d81a38 100644 --- a/drivers/usb/host/oxu210hp-hcd.c +++ b/drivers/usb/host/oxu210hp-hcd.c @@ -2497,11 +2497,12 @@ static irqreturn_t oxu210_hcd_irq(struct usb_hcd *hcd) || oxu->reset_done[i] != 0) continue; - /* start 20 msec resume signaling from this port, - * and make khubd collect PORT_STAT_C_SUSPEND to + /* start USB_RESUME_TIMEOUT resume signaling from this + * port, and make hub_wq collect PORT_STAT_C_SUSPEND to * stop that signaling. */ - oxu->reset_done[i] = jiffies + msecs_to_jiffies(20); + oxu->reset_done[i] = jiffies + + msecs_to_jiffies(USB_RESUME_TIMEOUT); oxu_dbg(oxu, "port %d remote wakeup\n", i + 1); mod_timer(&hcd->rh_timer, oxu->reset_done[i]); } From db8349883cf762fe8862b514c1b510d04cb39928 Mon Sep 17 00:00:00 2001 From: Andrzej Pietrasiewicz Date: Tue, 3 Mar 2015 10:52:05 +0100 Subject: [PATCH 250/277] usb: gadget: printer: enqueue printer's response for setup request commit eb132ccbdec5df46e29c9814adf76075ce83576b upstream. Function-specific setup requests should be handled in such a way, that apart from filling in the data buffer, the requests are also actually enqueued: if function-specific setup is called from composte_setup(), the "usb_ep_queue()" block of code in composite_setup() is skipped. The printer function lacks this part and it results in e.g. get device id requests failing: the host expects some response, the device prepares it but does not equeue it for sending to the host, so the host finally asserts timeout. This patch adds enqueueing the prepared responses. Fixes: 2e87edf49227: "usb: gadget: make g_printer use composite" Signed-off-by: Andrzej Pietrasiewicz Signed-off-by: Felipe Balbi [ported to stable 3.10 and 3.14] Signed-off-by: Andrzej Pietrasiewicz Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/printer.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/usb/gadget/printer.c b/drivers/usb/gadget/printer.c index bf7a56b6d48a..a0dfdbddbf08 100644 --- a/drivers/usb/gadget/printer.c +++ b/drivers/usb/gadget/printer.c @@ -975,6 +975,15 @@ static int printer_func_setup(struct usb_function *f, break; } /* host either stalls (value < 0) or reports success */ + if (value >= 0) { + req->length = value; + req->zero = value < wLength; + value = usb_ep_queue(cdev->gadget->ep0, req, GFP_ATOMIC); + if (value < 0) { + ERROR(dev, "%s:%d Error!\n", __func__, __LINE__); + req->status = 0; + } + } return value; } From b68cec9dc947f92bf8cdbe9cadcbb7583f5b3d14 Mon Sep 17 00:00:00 2001 From: Sudip Mukherjee Date: Tue, 24 Mar 2015 16:29:32 +0530 Subject: [PATCH 251/277] staging: panel: fix lcd type commit 2c20d92dad5db6440cfa88d811b69fd605240ce4 upstream. the lcd type as defined in the Kconfig is not matching in the code. as a result the rs, rw and en pins were getting interchanged. Kconfig defines the value of PANEL_LCD to be 1 if we select custom configuration but in the code LCD_TYPE_CUSTOM is defined as 5. my hardware is LCD_TYPE_CUSTOM, but the pins were assigned to it as pins of LCD_TYPE_OLD, and it was not working. Now values are corrected with referenece to the values defined in Kconfig and it is working. checked on JHD204A lcd with LCD_TYPE_CUSTOM configuration. Signed-off-by: Sudip Mukherjee Acked-by: Willy Tarreau [wt: backport to 3.10 and 3.14] Signed-off-by: Willy Tarreau Signed-off-by: Greg Kroah-Hartman --- drivers/staging/panel/panel.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/drivers/staging/panel/panel.c b/drivers/staging/panel/panel.c index c54df3948e20..4d0a87b93b96 100644 --- a/drivers/staging/panel/panel.c +++ b/drivers/staging/panel/panel.c @@ -275,11 +275,11 @@ static unsigned char lcd_bits[LCD_PORTS][LCD_BITS][BIT_STATES]; * LCD types */ #define LCD_TYPE_NONE 0 -#define LCD_TYPE_OLD 1 -#define LCD_TYPE_KS0074 2 -#define LCD_TYPE_HANTRONIX 3 -#define LCD_TYPE_NEXCOM 4 -#define LCD_TYPE_CUSTOM 5 +#define LCD_TYPE_CUSTOM 1 +#define LCD_TYPE_OLD 2 +#define LCD_TYPE_KS0074 3 +#define LCD_TYPE_HANTRONIX 4 +#define LCD_TYPE_NEXCOM 5 /* * keypad types @@ -457,8 +457,7 @@ MODULE_PARM_DESC(keypad_enabled, "Deprecated option, use keypad_type instead"); static int lcd_type = -1; module_param(lcd_type, int, 0000); MODULE_PARM_DESC(lcd_type, - "LCD type: 0=none, 1=old //, 2=serial ks0074, " - "3=hantronix //, 4=nexcom //, 5=compiled-in"); + "LCD type: 0=none, 1=compiled-in, 2=old, 3=serial ks0074, 4=hantronix, 5=nexcom"); static int lcd_proto = -1; module_param(lcd_proto, int, 0000); From 0fe8926482939a49ea62ec4b8803f1b67eb26f6c Mon Sep 17 00:00:00 2001 From: Sebastian Hesselbarth Date: Tue, 17 Feb 2015 19:52:04 +0100 Subject: [PATCH 252/277] ARM: dts: dove: Fix uart[23] reg property commit a74cd13b807029397f7232449df929bac11fb228 upstream. Fix Dove's register addresses of uart2 and uart3 nodes that seem to be broken since ages due to a copy-and-paste error. Signed-off-by: Sebastian Hesselbarth Acked-by: Gregory CLEMENT Signed-off-by: Gregory CLEMENT Signed-off-by: Greg Kroah-Hartman --- arch/arm/boot/dts/dove.dtsi | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm/boot/dts/dove.dtsi b/arch/arm/boot/dts/dove.dtsi index 6cab46849cdb..d1e47092d21a 100644 --- a/arch/arm/boot/dts/dove.dtsi +++ b/arch/arm/boot/dts/dove.dtsi @@ -75,7 +75,7 @@ uart1: serial@12100 { uart2: serial@12200 { compatible = "ns16550a"; - reg = <0x12000 0x100>; + reg = <0x12200 0x100>; reg-shift = <2>; interrupts = <9>; clocks = <&core_clk 0>; @@ -84,7 +84,7 @@ uart2: serial@12200 { uart3: serial@12300 { compatible = "ns16550a"; - reg = <0x12100 0x100>; + reg = <0x12300 0x100>; reg-shift = <2>; interrupts = <10>; clocks = <&core_clk 0>; From b5837189c92212ed5b14a0cc7aca4e477d230bd8 Mon Sep 17 00:00:00 2001 From: "K. Y. Srinivasan" Date: Thu, 19 Mar 2015 08:11:34 -0700 Subject: [PATCH 253/277] Drivers: hv: vmbus: Don't wait after requesting offers commit 73cffdb65e679b98893f484063462c045adcf212 upstream. Don't wait after sending request for offers to the host. This wait is unnecessary and simply adds 5 seconds to the boot time. Signed-off-by: K. Y. Srinivasan Cc: Signed-off-by: Greg Kroah-Hartman --- drivers/hv/channel_mgmt.c | 12 +----------- 1 file changed, 1 insertion(+), 11 deletions(-) diff --git a/drivers/hv/channel_mgmt.c b/drivers/hv/channel_mgmt.c index 21ef68934a20..edf8995cb3b3 100644 --- a/drivers/hv/channel_mgmt.c +++ b/drivers/hv/channel_mgmt.c @@ -648,7 +648,7 @@ int vmbus_request_offers(void) { struct vmbus_channel_message_header *msg; struct vmbus_channel_msginfo *msginfo; - int ret, t; + int ret; msginfo = kmalloc(sizeof(*msginfo) + sizeof(struct vmbus_channel_message_header), @@ -656,8 +656,6 @@ int vmbus_request_offers(void) if (!msginfo) return -ENOMEM; - init_completion(&msginfo->waitevent); - msg = (struct vmbus_channel_message_header *)msginfo->msg; msg->msgtype = CHANNELMSG_REQUESTOFFERS; @@ -671,14 +669,6 @@ int vmbus_request_offers(void) goto cleanup; } - t = wait_for_completion_timeout(&msginfo->waitevent, 5*HZ); - if (t == 0) { - ret = -ETIMEDOUT; - goto cleanup; - } - - - cleanup: kfree(msginfo); From ad233850c2fa3057c0ba86e674ed9c26cc96ddd5 Mon Sep 17 00:00:00 2001 From: hujianyang Date: Tue, 30 Dec 2014 11:56:09 +0800 Subject: [PATCH 254/277] UBI: fix soft lockup in ubi_check_volume() commit 9aa272b492e7551a9ee0e2c83c720ea013698485 upstream. Running mtd-utils/tests/ubi-tests/io_basic.c could cause soft lockup or watchdog reset. It is because *updatevol* will perform ubi_check_volume() after updating finish and this function will full scan the updated lebs if the volume is initialized as STATIC_VOLUME. This patch adds *cond_resched()* in the loop of lebs scan to avoid soft lockup. Helped by Richard Weinberger [ 2158.067096] INFO: rcu_sched self-detected stall on CPU { 1} (t=2101 jiffies g=1606 c=1605 q=56) [ 2158.172867] CPU: 1 PID: 2073 Comm: io_basic Tainted: G O 3.10.53 #21 [ 2158.172898] [] (unwind_backtrace+0x0/0x120) from [] (show_stack+0x10/0x14) [ 2158.172918] [] (show_stack+0x10/0x14) from [] (rcu_check_callbacks+0x1c0/0x660) [ 2158.172936] [] (rcu_check_callbacks+0x1c0/0x660) from [] (update_process_times+0x38/0x64) [ 2158.172953] [] (update_process_times+0x38/0x64) from [] (tick_sched_handle+0x54/0x60) [ 2158.172966] [] (tick_sched_handle+0x54/0x60) from [] (tick_sched_timer+0x44/0x74) [ 2158.172978] [] (tick_sched_timer+0x44/0x74) from [] (__run_hrtimer+0xc8/0x1b8) [ 2158.172992] [] (__run_hrtimer+0xc8/0x1b8) from [] (hrtimer_interrupt+0x128/0x2a4) [ 2158.173007] [] (hrtimer_interrupt+0x128/0x2a4) from [] (arch_timer_handler_virt+0x28/0x30) [ 2158.173022] [] (arch_timer_handler_virt+0x28/0x30) from [] (handle_percpu_devid_irq+0x9c/0x124) [ 2158.173036] [] (handle_percpu_devid_irq+0x9c/0x124) from [] (generic_handle_irq+0x20/0x30) [ 2158.173049] [] (generic_handle_irq+0x20/0x30) from [] (handle_IRQ+0x64/0x8c) [ 2158.173060] [] (handle_IRQ+0x64/0x8c) from [] (gic_handle_irq+0x3c/0x60) [ 2158.173074] [] (gic_handle_irq+0x3c/0x60) from [] (__irq_svc+0x40/0x50) [ 2158.173083] Exception stack(0xc4043c98 to 0xc4043ce0) [ 2158.173092] 3c80: c4043ce4 00000019 [ 2158.173102] 3ca0: 1f8a865f c050ad10 1f8a864c 00000031 c04b5970 0003ebce 00000000 f3550000 [ 2158.173113] 3cc0: bf00bc68 00000800 0003ebce c4043ce0 c0186d14 c0186cb8 80000013 ffffffff [ 2158.173130] [] (__irq_svc+0x40/0x50) from [] (read_current_timer+0x4/0x38) [ 2158.173145] [] (read_current_timer+0x4/0x38) from [<1f8a865f>] (0x1f8a865f) [ 2183.927097] BUG: soft lockup - CPU#1 stuck for 22s! [io_basic:2073] [ 2184.002229] Modules linked in: nandflash(O) [last unloaded: nandflash] Signed-off-by: Wang Kai Signed-off-by: hujianyang Signed-off-by: Richard Weinberger Signed-off-by: Greg Kroah-Hartman --- drivers/mtd/ubi/misc.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/mtd/ubi/misc.c b/drivers/mtd/ubi/misc.c index f913d701a5b3..c4b1af07a121 100644 --- a/drivers/mtd/ubi/misc.c +++ b/drivers/mtd/ubi/misc.c @@ -74,6 +74,8 @@ int ubi_check_volume(struct ubi_device *ubi, int vol_id) for (i = 0; i < vol->used_ebs; i++) { int size; + cond_resched(); + if (i == vol->used_ebs - 1) size = vol->last_eb_bytes; else From eb7b2163c2c1bbac738be514e696d1b1a385c826 Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Thu, 26 Mar 2015 11:14:41 +0530 Subject: [PATCH 255/277] ARC: signal handling robustify commit e4140819dadc3624accac8294881bca8a3cba4ed upstream. A malicious signal handler / restorer can DOS the system by fudging the user regs saved on stack, causing weird things such as sigreturn returning to user mode PC but cpu state still being kernel mode.... Ensure that in sigreturn path status32 always has U bit; any other bogosity (gargbage PC etc) will be taken care of by normal user mode exceptions mechanisms. Reproducer signal handler: void handle_sig(int signo, siginfo_t *info, void *context) { ucontext_t *uc = context; struct user_regs_struct *regs = &(uc->uc_mcontext.regs); regs->scratch.status32 = 0; } Before the fix, kernel would go off to weeds like below: --------->8----------- [ARCLinux]$ ./signal-test Path: /signal-test CPU: 0 PID: 61 Comm: signal-test Not tainted 4.0.0-rc5+ #65 task: 8f177880 ti: 5ffe6000 task.ti: 8f15c000 [ECR ]: 0x00220200 => Invalid Write @ 0x00000010 by insn @ 0x00010698 [EFA ]: 0x00000010 [BLINK ]: 0x2007c1ee [ERET ]: 0x10698 [STAT32]: 0x00000000 : <-------- BTA: 0x00010680 SP: 0x5ffe7e48 FP: 0x00000000 LPS: 0x20003c6c LPE: 0x20003c70 LPC: 0x00000000 ... --------->8----------- Reported-by: Alexey Brodkin Signed-off-by: Vineet Gupta Signed-off-by: Greg Kroah-Hartman --- arch/arc/kernel/signal.c | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) diff --git a/arch/arc/kernel/signal.c b/arch/arc/kernel/signal.c index 7e95e1a86510..6763654239a2 100644 --- a/arch/arc/kernel/signal.c +++ b/arch/arc/kernel/signal.c @@ -131,6 +131,15 @@ SYSCALL_DEFINE0(rt_sigreturn) /* Don't restart from sigreturn */ syscall_wont_restart(regs); + /* + * Ensure that sigreturn always returns to user mode (in case the + * regs saved on user stack got fudged between save and sigreturn) + * Otherwise it is easy to panic the kernel with a custom + * signal handler and/or restorer which clobberes the status32/ret + * to return to a bogus location in kernel mode. + */ + regs->status32 |= STATUS_U_MASK; + return regs->r0; badframe: @@ -234,8 +243,11 @@ setup_rt_frame(int signo, struct k_sigaction *ka, siginfo_t *info, /* * handler returns using sigreturn stub provided already by userpsace + * If not, nuke the process right away */ - BUG_ON(!(ka->sa.sa_flags & SA_RESTORER)); + if(!(ka->sa.sa_flags & SA_RESTORER)) + return 1; + regs->blink = (unsigned long)ka->sa.sa_restorer; /* User Stack for signal handler will be above the frame just carved */ @@ -302,12 +314,12 @@ handle_signal(unsigned long sig, struct k_sigaction *ka, siginfo_t *info, struct pt_regs *regs) { sigset_t *oldset = sigmask_to_save(); - int ret; + int failed; /* Set up the stack frame */ - ret = setup_rt_frame(sig, ka, info, oldset, regs); + failed = setup_rt_frame(sig, ka, info, oldset, regs); - if (ret) + if (failed) force_sigsegv(sig, current); else signal_delivered(sig, info, ka, regs, 0); From c9cc129b6976b178041deaa3f68dc6c3784ef16d Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 13 May 2015 05:15:52 -0700 Subject: [PATCH 256/277] Linux 3.10.78 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 923ad8a64e3b..cf99a9b53c6f 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,6 @@ VERSION = 3 PATCHLEVEL = 10 -SUBLEVEL = 77 +SUBLEVEL = 78 EXTRAVERSION = NAME = TOSSUG Baby Fish From 93705a1b1b18be69065682726c1f775853b6691c Mon Sep 17 00:00:00 2001 From: Junxiao Bi Date: Tue, 5 May 2015 16:24:02 -0700 Subject: [PATCH 257/277] ocfs2: dlm: fix race between purge and get lock resource commit b1432a2a35565f538586774a03bf277c27fc267d upstream. There is a race window in dlm_get_lock_resource(), which may return a lock resource which has been purged. This will cause the process to hang forever in dlmlock() as the ast msg can't be handled due to its lock resource not existing. dlm_get_lock_resource { ... spin_lock(&dlm->spinlock); tmpres = __dlm_lookup_lockres_full(dlm, lockid, namelen, hash); if (tmpres) { spin_unlock(&dlm->spinlock); >>>>>>>> race window, dlm_run_purge_list() may run and purge the lock resource spin_lock(&tmpres->spinlock); ... spin_unlock(&tmpres->spinlock); } } Signed-off-by: Junxiao Bi Cc: Joseph Qi Cc: Mark Fasheh Cc: Joel Becker Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- fs/ocfs2/dlm/dlmmaster.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c index 2b941113e423..2c119d5d04c9 100644 --- a/fs/ocfs2/dlm/dlmmaster.c +++ b/fs/ocfs2/dlm/dlmmaster.c @@ -729,6 +729,19 @@ struct dlm_lock_resource * dlm_get_lock_resource(struct dlm_ctxt *dlm, if (tmpres) { spin_unlock(&dlm->spinlock); spin_lock(&tmpres->spinlock); + + /* + * Right after dlm spinlock was released, dlm_thread could have + * purged the lockres. Check if lockres got unhashed. If so + * start over. + */ + if (hlist_unhashed(&tmpres->hash_node)) { + spin_unlock(&tmpres->spinlock); + dlm_lockres_put(tmpres); + tmpres = NULL; + goto lookup; + } + /* Wait on the thread that is mastering the resource */ if (tmpres->owner == DLM_LOCK_RES_OWNER_UNKNOWN) { __dlm_wait_on_lockres(tmpres); From c043edcc42acda62fc86eb8a2d03122000421d74 Mon Sep 17 00:00:00 2001 From: Ryusuke Konishi Date: Tue, 5 May 2015 16:24:00 -0700 Subject: [PATCH 258/277] nilfs2: fix sanity check of btree level in nilfs_btree_root_broken() commit d8fd150fe3935e1692bf57c66691e17409ebb9c1 upstream. The range check for b-tree level parameter in nilfs_btree_root_broken() is wrong; it accepts the case of "level == NILFS_BTREE_LEVEL_MAX" even though the level is limited to values in the range of 0 to (NILFS_BTREE_LEVEL_MAX - 1). Since the level parameter is read from storage device and used to index nilfs_btree_path array whose element count is NILFS_BTREE_LEVEL_MAX, it can cause memory overrun during btree operations if the boundary value is set to the level parameter on device. This fixes the broken sanity check and adds a comment to clarify that the upper bound NILFS_BTREE_LEVEL_MAX is exclusive. Signed-off-by: Ryusuke Konishi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- fs/nilfs2/btree.c | 2 +- include/linux/nilfs2_fs.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/nilfs2/btree.c b/fs/nilfs2/btree.c index ecdbae19a766..090d8ce25bd1 100644 --- a/fs/nilfs2/btree.c +++ b/fs/nilfs2/btree.c @@ -388,7 +388,7 @@ static int nilfs_btree_root_broken(const struct nilfs_btree_node *node, nchildren = nilfs_btree_node_get_nchildren(node); if (unlikely(level < NILFS_BTREE_LEVEL_NODE_MIN || - level > NILFS_BTREE_LEVEL_MAX || + level >= NILFS_BTREE_LEVEL_MAX || nchildren < 0 || nchildren > NILFS_BTREE_ROOT_NCHILDREN_MAX)) { pr_crit("NILFS: bad btree root (inode number=%lu): level = %d, flags = 0x%x, nchildren = %d\n", diff --git a/include/linux/nilfs2_fs.h b/include/linux/nilfs2_fs.h index 98755767c7b0..1108acaacfc6 100644 --- a/include/linux/nilfs2_fs.h +++ b/include/linux/nilfs2_fs.h @@ -458,7 +458,7 @@ struct nilfs_btree_node { /* level */ #define NILFS_BTREE_LEVEL_DATA 0 #define NILFS_BTREE_LEVEL_NODE_MIN (NILFS_BTREE_LEVEL_DATA + 1) -#define NILFS_BTREE_LEVEL_MAX 14 +#define NILFS_BTREE_LEVEL_MAX 14 /* Max level (exclusive) */ /** * struct nilfs_palloc_group_desc - block group descriptor From 0073e613da6fb1f39ab75f0373a63ee30d51d635 Mon Sep 17 00:00:00 2001 From: Naoya Horiguchi Date: Tue, 5 May 2015 16:23:35 -0700 Subject: [PATCH 259/277] mm/memory-failure: call shake_page() when error hits thp tail page commit 09789e5de18e4e442870b2d700831f5cb802eb05 upstream. Currently memory_failure() calls shake_page() to sweep pages out from pcplists only when the victim page is 4kB LRU page or thp head page. But we should do this for a thp tail page too. Consider that a memory error hits a thp tail page whose head page is on a pcplist when memory_failure() runs. Then, the current kernel skips shake_pages() part, so hwpoison_user_mappings() returns without calling split_huge_page() nor try_to_unmap() because PageLRU of the thp head is still cleared due to the skip of shake_page(). As a result, me_huge_page() runs for the thp, which is broken behavior. One effect is a leak of the thp. And another is to fail to isolate the memory error, so later access to the error address causes another MCE, which kills the processes which used the thp. This patch fixes this problem by calling shake_page() for thp tail case. Fixes: 385de35722c9 ("thp: allow a hwpoisoned head page to be put back to LRU") Signed-off-by: Naoya Horiguchi Reviewed-by: Andi Kleen Acked-by: Dean Nelson Cc: Andrea Arcangeli Cc: Hidetoshi Seto Cc: Jin Dongming Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- mm/memory-failure.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/mm/memory-failure.c b/mm/memory-failure.c index 603f1fa1b7a3..ca96f411b034 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -1117,10 +1117,10 @@ int memory_failure(unsigned long pfn, int trapno, int flags) * The check (unnecessarily) ignores LRU pages being isolated and * walked by the page reclaim code, however that's not a big loss. */ - if (!PageHuge(p) && !PageTransTail(p)) { - if (!PageLRU(p)) - shake_page(p, 0); - if (!PageLRU(p)) { + if (!PageHuge(p)) { + if (!PageLRU(hpage)) + shake_page(hpage, 0); + if (!PageLRU(hpage)) { /* * shake_page could have turned it free. */ From 677c040f35923da4c9fadb2e645854044a5c6a8f Mon Sep 17 00:00:00 2001 From: Boris Ostrovsky Date: Wed, 29 Apr 2015 17:10:14 -0400 Subject: [PATCH 260/277] xen/console: Update console event channel on resume commit b9d934f27c91b878c4b2e64299d6e419a4022f8d upstream. After a resume the hypervisor/tools may change console event channel number. We should re-query it. Signed-off-by: Boris Ostrovsky Signed-off-by: David Vrabel Signed-off-by: Greg Kroah-Hartman --- drivers/tty/hvc/hvc_xen.c | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/drivers/tty/hvc/hvc_xen.c b/drivers/tty/hvc/hvc_xen.c index 4fc32c8091e9..ff92155dbc88 100644 --- a/drivers/tty/hvc/hvc_xen.c +++ b/drivers/tty/hvc/hvc_xen.c @@ -299,11 +299,27 @@ static int xen_initial_domain_console_init(void) return 0; } +static void xen_console_update_evtchn(struct xencons_info *info) +{ + if (xen_hvm_domain()) { + uint64_t v; + int err; + + err = hvm_get_parameter(HVM_PARAM_CONSOLE_EVTCHN, &v); + if (!err && v) + info->evtchn = v; + } else + info->evtchn = xen_start_info->console.domU.evtchn; +} + void xen_console_resume(void) { struct xencons_info *info = vtermno_to_xencons(HVC_COOKIE); - if (info != NULL && info->irq) + if (info != NULL && info->irq) { + if (!xen_initial_domain()) + xen_console_update_evtchn(info); rebind_evtchn_irq(info->evtchn, info->irq); + } } static void xencons_disconnect_backend(struct xencons_info *info) From 864c329b21a1914105a9e591b49def3f30184b65 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 12 Jan 2015 17:12:29 +0100 Subject: [PATCH 261/277] gpio: unregister gpiochip device before removing it commit 01cca93a9491ed95992523ff7e79dd9bfcdea8e0 upstream. Unregister gpiochip device (used to export information through sysfs) before removing it internally. This way removal will reverse addition. Signed-off-by: Johan Hovold Signed-off-by: Linus Walleij Signed-off-by: Greg Kroah-Hartman --- drivers/gpio/gpiolib.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c index ca1cb2d756c2..df4780810e83 100644 --- a/drivers/gpio/gpiolib.c +++ b/drivers/gpio/gpiolib.c @@ -1265,6 +1265,8 @@ int gpiochip_remove(struct gpio_chip *chip) int status = 0; unsigned id; + gpiochip_unexport(chip); + spin_lock_irqsave(&gpio_lock, flags); gpiochip_remove_pin_ranges(chip); @@ -1285,9 +1287,6 @@ int gpiochip_remove(struct gpio_chip *chip) spin_unlock_irqrestore(&gpio_lock, flags); - if (status == 0) - gpiochip_unexport(chip); - return status; } EXPORT_SYMBOL_GPL(gpiochip_remove); From a1f3ecb1f721e5d393f28898350fe3c75fde4c93 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Tue, 21 Apr 2015 17:42:09 +0200 Subject: [PATCH 262/277] gpio: sysfs: fix memory leaks and device hotplug commit 483d821108791092798f5d230686868112927044 upstream. Unregister GPIOs requested through sysfs at chip remove to avoid leaking the associated memory and sysfs entries. The stale sysfs entries prevented the gpio numbers from being exported when the gpio range was later reused (e.g. at device reconnect). This also fixes the related module-reference leak. Note that kernfs makes sure that any on-going sysfs operations finish before the class devices are unregistered and that further accesses fail. The chip exported flag is used to prevent gpiod exports during removal. This also makes it harder to trigger, but does not fix, the related race between gpiochip_remove and export_store, which is really a race with gpiod_request that needs to be addressed separately. Also note that this would prevent the crashes (e.g. NULL-dereferences) at reconnect that affects pre-3.18 kernels, as well as use-after-free on operations on open attribute files on pre-3.14 kernels (prior to kernfs). Fixes: d8f388d8dc8d ("gpio: sysfs interface") Signed-off-by: Johan Hovold Signed-off-by: Linus Walleij Signed-off-by: Greg Kroah-Hartman --- drivers/gpio/gpiolib.c | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c index df4780810e83..c3768fafff45 100644 --- a/drivers/gpio/gpiolib.c +++ b/drivers/gpio/gpiolib.c @@ -752,6 +752,7 @@ static struct class gpio_class = { */ static int gpiod_export(struct gpio_desc *desc, bool direction_may_change) { + struct gpio_chip *chip; unsigned long flags; int status; const char *ioname = NULL; @@ -769,8 +770,16 @@ static int gpiod_export(struct gpio_desc *desc, bool direction_may_change) return -EINVAL; } + chip = desc->chip; + mutex_lock(&sysfs_lock); + /* check if chip is being removed */ + if (!chip || !chip->exported) { + status = -ENODEV; + goto fail_unlock; + } + spin_lock_irqsave(&gpio_lock, flags); if (!test_bit(FLAG_REQUESTED, &desc->flags) || test_bit(FLAG_EXPORT, &desc->flags)) { @@ -1040,6 +1049,8 @@ static void gpiochip_unexport(struct gpio_chip *chip) { int status; struct device *dev; + struct gpio_desc *desc; + unsigned int i; mutex_lock(&sysfs_lock); dev = class_find_device(&gpio_class, NULL, chip, match_export); @@ -1047,6 +1058,7 @@ static void gpiochip_unexport(struct gpio_chip *chip) sysfs_remove_group(&dev->kobj, &gpiochip_attr_group); put_device(dev); device_unregister(dev); + /* prevent further gpiod exports */ chip->exported = 0; status = 0; } else @@ -1056,6 +1068,13 @@ static void gpiochip_unexport(struct gpio_chip *chip) if (status) pr_debug("%s: chip %s status %d\n", __func__, chip->label, status); + + /* unregister gpiod class devices owned by sysfs */ + for (i = 0; i < chip->ngpio; i++) { + desc = &chip->desc[i]; + if (test_and_clear_bit(FLAG_SYSFS, &desc->flags)) + gpiod_free(desc); + } } static int __init gpiolib_sysfs_init(void) From d4a07af667dea82aff61a90eef52aeddb06cae73 Mon Sep 17 00:00:00 2001 From: Markus Pargmann Date: Fri, 24 Apr 2015 09:27:33 +0200 Subject: [PATCH 263/277] ARM: dts: imx25: Add #pwm-cells to pwm4 commit f90d3f0d0a11fa77918fd5497cb616dd2faa8431 upstream. The property '#pwm-cells' is currently missing. It is not possible to use pwm4 without this property. Signed-off-by: Markus Pargmann Fixes: 5658a68fb578 ("ARM i.MX25: Add devicetree") Reviewed-by: Fabio Estevam Signed-off-by: Shawn Guo Signed-off-by: Greg Kroah-Hartman --- arch/arm/boot/dts/imx25.dtsi | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm/boot/dts/imx25.dtsi b/arch/arm/boot/dts/imx25.dtsi index 97d1a550eb98..2a571bcacaf4 100644 --- a/arch/arm/boot/dts/imx25.dtsi +++ b/arch/arm/boot/dts/imx25.dtsi @@ -393,6 +393,7 @@ slcdc@53fc0000 { pwm4: pwm@53fc8000 { compatible = "fsl,imx25-pwm", "fsl,imx27-pwm"; + #pwm-cells = <2>; reg = <0x53fc8000 0x4000>; clocks = <&clks 108>, <&clks 52>; clock-names = "ipg", "per"; From 84164aba196cab1060e595e795d0ad3b97c8e95b Mon Sep 17 00:00:00 2001 From: Marek Vasut Date: Fri, 24 Apr 2015 13:29:47 +0200 Subject: [PATCH 264/277] ARM: dts: imx28: Fix AUART4 TX-DMA interrupt name commit 4ada77e37a773168fea484899201e272ab44ba8b upstream. Fix a typo in the TX DMA interrupt name for AUART4. This patch makes AUART4 operational again. Signed-off-by: Marek Vasut Fixes: f30fb03d4d3a ("ARM: dts: add generic DMA device tree binding for mxs-dma") Acked-by: Stefan Wahren Signed-off-by: Shawn Guo Signed-off-by: Greg Kroah-Hartman --- Documentation/devicetree/bindings/dma/fsl-mxs-dma.txt | 2 +- arch/arm/boot/dts/imx28.dtsi | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Documentation/devicetree/bindings/dma/fsl-mxs-dma.txt b/Documentation/devicetree/bindings/dma/fsl-mxs-dma.txt index a4873e5e3e36..e30e184f50c7 100644 --- a/Documentation/devicetree/bindings/dma/fsl-mxs-dma.txt +++ b/Documentation/devicetree/bindings/dma/fsl-mxs-dma.txt @@ -38,7 +38,7 @@ dma_apbx: dma-apbx@80024000 { 80 81 68 69 70 71 72 73 74 75 76 77>; - interrupt-names = "auart4-rx", "aurat4-tx", "spdif-tx", "empty", + interrupt-names = "auart4-rx", "auart4-tx", "spdif-tx", "empty", "saif0", "saif1", "i2c0", "i2c1", "auart0-rx", "auart0-tx", "auart1-rx", "auart1-tx", "auart2-rx", "auart2-tx", "auart3-rx", "auart3-tx"; diff --git a/arch/arm/boot/dts/imx28.dtsi b/arch/arm/boot/dts/imx28.dtsi index 4c10a1968c0e..2e76b84c6bad 100644 --- a/arch/arm/boot/dts/imx28.dtsi +++ b/arch/arm/boot/dts/imx28.dtsi @@ -691,7 +691,7 @@ dma_apbx: dma-apbx@80024000 { 80 81 68 69 70 71 72 73 74 75 76 77>; - interrupt-names = "auart4-rx", "aurat4-tx", "spdif-tx", "empty", + interrupt-names = "auart4-rx", "auart4-tx", "spdif-tx", "empty", "saif0", "saif1", "i2c0", "i2c1", "auart0-rx", "auart0-tx", "auart1-rx", "auart1-tx", "auart2-rx", "auart2-tx", "auart3-rx", "auart3-tx"; From 059b5e34788834b14e022d7f4a6bd4595b35a0e6 Mon Sep 17 00:00:00 2001 From: Stefan Wahren Date: Tue, 14 Apr 2015 20:37:26 +0000 Subject: [PATCH 265/277] ARM: dts: imx23-olinuxino: Fix dr_mode of usb0 commit 0fdebe1a2f4d3a8fc03754022fabf8ba95e131a3 upstream. The dr_mode of usb0 on imx233-olinuxino is left to default "otg". Since the green LED (GPIO2_1) on imx233-olinuxino is connected to the same pin as USB_OTG_ID it's possible to disable USB host by LED toggling: echo 0 > /sys/class/leds/green/brightness [ 1068.890000] ci_hdrc ci_hdrc.0: remove, state 1 [ 1068.890000] usb usb1: USB disconnect, device number 1 [ 1068.920000] usb 1-1: USB disconnect, device number 2 [ 1068.920000] usb 1-1.1: USB disconnect, device number 3 [ 1069.070000] usb 1-1.2: USB disconnect, device number 4 [ 1069.450000] ci_hdrc ci_hdrc.0: USB bus 1 deregistered [ 1074.460000] ci_hdrc ci_hdrc.0: timeout waiting for 00000800 in 11 This patch fixes the issue by setting dr_mode to "host" in the dts file. Reported-by: Harald Geyer Signed-off-by: Stefan Wahren Reviewed-by: Fabio Estevam Reviewed-by: Marek Vasut Acked-by: Peter Chen Fixes: b49312948285 ("ARM: dts: imx23-olinuxino: Add USB host support") Signed-off-by: Shawn Guo Signed-off-by: Greg Kroah-Hartman --- arch/arm/boot/dts/imx23-olinuxino.dts | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm/boot/dts/imx23-olinuxino.dts b/arch/arm/boot/dts/imx23-olinuxino.dts index d107c4af321f..6fef54416cbf 100644 --- a/arch/arm/boot/dts/imx23-olinuxino.dts +++ b/arch/arm/boot/dts/imx23-olinuxino.dts @@ -89,6 +89,7 @@ usbphy0: usbphy@8007c000 { ahb@80080000 { usb0: usb@80080000 { + dr_mode = "host"; vbus-supply = <®_usb0_vbus>; status = "okay"; }; From df7f3363fb5c1fd106b245b122c40f2f3d586565 Mon Sep 17 00:00:00 2001 From: Gregory CLEMENT Date: Tue, 14 Apr 2015 11:50:13 +0200 Subject: [PATCH 266/277] ARM: mvebu: armada-xp-openblocks-ax3-4: Disable internal RTC commit 750e30d4076ae5e02ad13a376e96c95a2627742c upstream. There is no crystal connected to the internal RTC on the Open Block AX3. So let's disable it in order to prevent the kernel probing the driver uselessly. Eventually this patches removes the following warning message from the boot log: "rtc-mv d0010300.rtc: internal RTC not ticking" Acked-by: Andrew Lunn Signed-off-by: Gregory CLEMENT Signed-off-by: Greg Kroah-Hartman --- arch/arm/boot/dts/armada-xp-openblocks-ax3-4.dts | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/arm/boot/dts/armada-xp-openblocks-ax3-4.dts b/arch/arm/boot/dts/armada-xp-openblocks-ax3-4.dts index 9746d0e7fcb4..5dfb3d354470 100644 --- a/arch/arm/boot/dts/armada-xp-openblocks-ax3-4.dts +++ b/arch/arm/boot/dts/armada-xp-openblocks-ax3-4.dts @@ -32,6 +32,10 @@ soc { 0xf0000000 0 0xf0000000 0x8000000 /* Device Bus, NOR 128MiB */>; internal-regs { + rtc@10300 { + /* No crystal connected to the internal RTC */ + status = "disabled"; + }; serial@12000 { clock-frequency = <250000000>; status = "okay"; From 15e767666ce5a2e4e8ce31f8fe7e6576f392054e Mon Sep 17 00:00:00 2001 From: Lukas Wunner Date: Mon, 4 May 2015 15:06:49 +0200 Subject: [PATCH 267/277] drm/i915: Add missing MacBook Pro models with dual channel LVDS commit 3916e3fd81021fb795bfbdb17f375b6b3685bced upstream. Single channel LVDS maxes out at 112 MHz. The 15" pre-retina models shipped with 1440x900 (106 MHz) by default or 1680x1050 (119 MHz) as a BTO option, both versions used dual channel LVDS even though the smaller one would have fit into a single channel. Notes: Bug report showing that the MacBookPro8,2 with 1440x900 uses dual channel LVDS (this lead to it being hardcoded in intel_lvds.c by Daniel Vetter with commit 618563e3945b9d0864154bab3c607865b557cecc): https://bugzilla.kernel.org/show_bug.cgi?id=42842 If i915.lvds_channel_mode=2 is missing even though the machine needs it, every other vertical line is white and consequently, only the left half of the screen is visible (verified by myself on a MacBookPro9,1). Forum posting concerning a MacBookPro6,2 with 1440x900, author is using i915.lvds_channel_mode=2 on the kernel command line, proving that the machine uses dual channels: https://bbs.archlinux.org/viewtopic.php?id=185770 Chi Mei N154C6-L04 with 1440x900 is a replacement panel for all MacBook Pro "A1286" models, and that model number encompasses the MacBookPro6,2 / 8,2 / 9,1. Page 17 of the panel's datasheet shows it's driven with dual channel LVDS: http://www.ebay.com/itm/-/400690878560 http://www.everymac.com/ultimate-mac-lookup/?search_keywords=A1286 http://www.taopanel.com/chimei/datasheet/N154C6-L04.pdf Those three 15" models, MacBookPro6,2 / 8,2 / 9,1, are the only ones with i915 graphics and dual channel LVDS, so that list should be complete. And the 8,2 is already in intel_lvds.c. Possible motivation to use dual channel LVDS even on the 1440x900 models: Reduce the number of different parts, i.e. use identical logic boards and display cabling on both versions and the only differing component is the panel. Signed-off-by: Lukas Wunner Acked-by: Jani Nikula [Jani: included notes in the commit message for posterity] Signed-off-by: Jani Nikula Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/i915/intel_lvds.c | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/intel_lvds.c b/drivers/gpu/drm/i915/intel_lvds.c index f5d1dc5b5563..54ebfbe370c6 100644 --- a/drivers/gpu/drm/i915/intel_lvds.c +++ b/drivers/gpu/drm/i915/intel_lvds.c @@ -1007,12 +1007,28 @@ static int intel_dual_link_lvds_callback(const struct dmi_system_id *id) static const struct dmi_system_id intel_dual_link_lvds[] = { { .callback = intel_dual_link_lvds_callback, - .ident = "Apple MacBook Pro (Core i5/i7 Series)", + .ident = "Apple MacBook Pro 15\" (2010)", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Apple Inc."), + DMI_MATCH(DMI_PRODUCT_NAME, "MacBookPro6,2"), + }, + }, + { + .callback = intel_dual_link_lvds_callback, + .ident = "Apple MacBook Pro 15\" (2011)", .matches = { DMI_MATCH(DMI_SYS_VENDOR, "Apple Inc."), DMI_MATCH(DMI_PRODUCT_NAME, "MacBookPro8,2"), }, }, + { + .callback = intel_dual_link_lvds_callback, + .ident = "Apple MacBook Pro 15\" (2012)", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Apple Inc."), + DMI_MATCH(DMI_PRODUCT_NAME, "MacBookPro9,1"), + }, + }, { } /* terminating entry */ }; From a854bc61460f987e5ed716897e8bbfa846b29ce0 Mon Sep 17 00:00:00 2001 From: Doug Anderson Date: Fri, 1 May 2015 09:01:27 -0700 Subject: [PATCH 268/277] pinctrl: Don't just pretend to protect pinctrl_maps, do it for real commit c5272a28566b00cce79127ad382406e0a8650690 upstream. Way back, when the world was a simpler place and there was no war, no evil, and no kernel bugs, there was just a single pinctrl lock. That was how the world was when (57291ce pinctrl: core device tree mapping table parsing support) was written. In that case, there were instances where the pinctrl mutex was already held when pinctrl_register_map() was called, hence a "locked" parameter was passed to the function to indicate that the mutex was already locked (so we shouldn't lock it again). A few years ago in (42fed7b pinctrl: move subsystem mutex to pinctrl_dev struct), we switched to a separate pinctrl_maps_mutex. ...but (oops) we forgot to re-think about the whole "locked" parameter for pinctrl_register_map(). Basically the "locked" parameter appears to still refer to whether the bigger pinctrl_dev mutex is locked, but we're using it to skip locks of our (now separate) pinctrl_maps_mutex. That's kind of a bad thing(TM). Probably nobody noticed because most of the calls to pinctrl_register_map happen at boot time and we've got synchronous device probing. ...and even cases where we're asynchronous don't end up actually hitting the race too often. ...but after banging my head against the wall for a bug that reproduced 1 out of 1000 reboots and lots of looking through kgdb, I finally noticed this. Anyway, we can now safely remove the "locked" parameter and go back to a war-free, evil-free, and kernel-bug-free world. Fixes: 42fed7ba44e4 ("pinctrl: move subsystem mutex to pinctrl_dev struct") Signed-off-by: Doug Anderson Signed-off-by: Linus Walleij Signed-off-by: Greg Kroah-Hartman --- drivers/pinctrl/core.c | 10 ++++------ drivers/pinctrl/core.h | 2 +- drivers/pinctrl/devicetree.c | 2 +- 3 files changed, 6 insertions(+), 8 deletions(-) diff --git a/drivers/pinctrl/core.c b/drivers/pinctrl/core.c index 9c9fc69a01b3..ea40c5139766 100644 --- a/drivers/pinctrl/core.c +++ b/drivers/pinctrl/core.c @@ -1077,7 +1077,7 @@ void devm_pinctrl_put(struct pinctrl *p) EXPORT_SYMBOL_GPL(devm_pinctrl_put); int pinctrl_register_map(struct pinctrl_map const *maps, unsigned num_maps, - bool dup, bool locked) + bool dup) { int i, ret; struct pinctrl_maps *maps_node; @@ -1145,11 +1145,9 @@ int pinctrl_register_map(struct pinctrl_map const *maps, unsigned num_maps, maps_node->maps = maps; } - if (!locked) - mutex_lock(&pinctrl_maps_mutex); + mutex_lock(&pinctrl_maps_mutex); list_add_tail(&maps_node->node, &pinctrl_maps); - if (!locked) - mutex_unlock(&pinctrl_maps_mutex); + mutex_unlock(&pinctrl_maps_mutex); return 0; } @@ -1164,7 +1162,7 @@ int pinctrl_register_map(struct pinctrl_map const *maps, unsigned num_maps, int pinctrl_register_mappings(struct pinctrl_map const *maps, unsigned num_maps) { - return pinctrl_register_map(maps, num_maps, true, false); + return pinctrl_register_map(maps, num_maps, true); } void pinctrl_unregister_map(struct pinctrl_map const *map) diff --git a/drivers/pinctrl/core.h b/drivers/pinctrl/core.h index 75476b3d87da..b24ea846c867 100644 --- a/drivers/pinctrl/core.h +++ b/drivers/pinctrl/core.h @@ -183,7 +183,7 @@ static inline struct pin_desc *pin_desc_get(struct pinctrl_dev *pctldev, } int pinctrl_register_map(struct pinctrl_map const *maps, unsigned num_maps, - bool dup, bool locked); + bool dup); void pinctrl_unregister_map(struct pinctrl_map const *map); extern int pinctrl_force_sleep(struct pinctrl_dev *pctldev); diff --git a/drivers/pinctrl/devicetree.c b/drivers/pinctrl/devicetree.c index 340fb4e6c600..fd91c4c31f6b 100644 --- a/drivers/pinctrl/devicetree.c +++ b/drivers/pinctrl/devicetree.c @@ -92,7 +92,7 @@ static int dt_remember_or_free_map(struct pinctrl *p, const char *statename, dt_map->num_maps = num_maps; list_add_tail(&dt_map->node, &p->dt_maps); - return pinctrl_register_map(map, num_maps, false, true); + return pinctrl_register_map(map, num_maps, false); } struct pinctrl_dev *of_pinctrl_get(struct device_node *np) From 6186ada9e4801a27068373fae01d089e41054823 Mon Sep 17 00:00:00 2001 From: Chuanxiao Dong Date: Tue, 12 Aug 2014 12:01:30 +0800 Subject: [PATCH 269/277] mmc: card: Don't access RPMB partitions for normal read/write commit 4e93b9a6abc0d028daf3c8a00cb77b679d8a4df4 upstream. During kernel boot, it will try to read some logical sectors of each block device node for the possible partition table. But since RPMB partition is special and can not be accessed by normal eMMC read / write CMDs, it will cause below error messages during kernel boot: ... mmc0: Got data interrupt 0x00000002 even though no data operation was in progress. mmcblk0rpmb: error -110 transferring data, sector 0, nr 32, cmd response 0x900, card status 0xb00 mmcblk0rpmb: retrying using single block read mmcblk0rpmb: timed out sending r/w cmd command, card status 0x400900 mmcblk0rpmb: timed out sending r/w cmd command, card status 0x400900 mmcblk0rpmb: timed out sending r/w cmd command, card status 0x400900 mmcblk0rpmb: timed out sending r/w cmd command, card status 0x400900 mmcblk0rpmb: timed out sending r/w cmd command, card status 0x400900 mmcblk0rpmb: timed out sending r/w cmd command, card status 0x400900 end_request: I/O error, dev mmcblk0rpmb, sector 0 Buffer I/O error on device mmcblk0rpmb, logical block 0 end_request: I/O error, dev mmcblk0rpmb, sector 8 Buffer I/O error on device mmcblk0rpmb, logical block 1 end_request: I/O error, dev mmcblk0rpmb, sector 16 Buffer I/O error on device mmcblk0rpmb, logical block 2 end_request: I/O error, dev mmcblk0rpmb, sector 24 Buffer I/O error on device mmcblk0rpmb, logical block 3 ... This patch will discard the access request in eMMC queue if it is RPMB partition access request. By this way, it avoids trigger above error messages. Fixes: 090d25fe224c ("mmc: core: Expose access to RPMB partition") Signed-off-by: Yunpeng Gao Signed-off-by: Chuanxiao Dong Tested-by: Michael Shigorin Signed-off-by: Ulf Hansson Signed-off-by: Greg Kroah-Hartman --- drivers/mmc/card/block.c | 12 ++++++++++++ drivers/mmc/card/queue.c | 2 +- drivers/mmc/card/queue.h | 2 ++ 3 files changed, 15 insertions(+), 1 deletion(-) diff --git a/drivers/mmc/card/block.c b/drivers/mmc/card/block.c index 7ad66823d022..56998eca1a8d 100644 --- a/drivers/mmc/card/block.c +++ b/drivers/mmc/card/block.c @@ -908,6 +908,18 @@ static inline void mmc_blk_reset_success(struct mmc_blk_data *md, int type) md->reset_done &= ~type; } +int mmc_access_rpmb(struct mmc_queue *mq) +{ + struct mmc_blk_data *md = mq->data; + /* + * If this is a RPMB partition access, return ture + */ + if (md && md->part_type == EXT_CSD_PART_CONFIG_ACC_RPMB) + return true; + + return false; +} + static int mmc_blk_issue_discard_rq(struct mmc_queue *mq, struct request *req) { struct mmc_blk_data *md = mq->data; diff --git a/drivers/mmc/card/queue.c b/drivers/mmc/card/queue.c index 9447a0e970d1..645519fe3acd 100644 --- a/drivers/mmc/card/queue.c +++ b/drivers/mmc/card/queue.c @@ -37,7 +37,7 @@ static int mmc_prep_request(struct request_queue *q, struct request *req) return BLKPREP_KILL; } - if (mq && mmc_card_removed(mq->card)) + if (mq && (mmc_card_removed(mq->card) || mmc_access_rpmb(mq))) return BLKPREP_KILL; req->cmd_flags |= REQ_DONTPREP; diff --git a/drivers/mmc/card/queue.h b/drivers/mmc/card/queue.h index 5752d50049a3..99e6521e6169 100644 --- a/drivers/mmc/card/queue.h +++ b/drivers/mmc/card/queue.h @@ -73,4 +73,6 @@ extern void mmc_queue_bounce_post(struct mmc_queue_req *); extern int mmc_packed_init(struct mmc_queue *, struct mmc_card *); extern void mmc_packed_clean(struct mmc_queue *); +extern int mmc_access_rpmb(struct mmc_queue *); + #endif From cccea7f5c40ef96866d02be01e0712985940d102 Mon Sep 17 00:00:00 2001 From: Alexey Khoroshilov Date: Sat, 18 Apr 2015 02:53:25 +0300 Subject: [PATCH 270/277] sound/oss: fix deadlock in sequencer_ioctl(SNDCTL_SEQ_OUTOFBAND) commit bc26d4d06e337ade069f33d3f4377593b24e6e36 upstream. A deadlock can be initiated by userspace via ioctl(SNDCTL_SEQ_OUTOFBAND) on /dev/sequencer with TMR_ECHO midi event. In this case the control flow is: sound_ioctl() -> case SND_DEV_SEQ: case SND_DEV_SEQ2: sequencer_ioctl() -> case SNDCTL_SEQ_OUTOFBAND: spin_lock_irqsave(&lock,flags); play_event(); -> case EV_TIMING: seq_timing_event() -> case TMR_ECHO: seq_copy_to_input() -> spin_lock_irqsave(&lock,flags); It seems that spin_lock_irqsave() around play_event() is not necessary, because the only other call location in seq_startplay() makes the call without acquiring spinlock. So, the patch just removes spinlocks around play_event(). By the way, it removes unreachable code in seq_timing_event(), since (seq_mode == SEQ_2) case is handled in the beginning. Compile tested only. Found by Linux Driver Verification project (linuxtesting.org). Signed-off-by: Alexey Khoroshilov Signed-off-by: Takashi Iwai Cc: Willy Tarreau Signed-off-by: Greg Kroah-Hartman --- sound/oss/sequencer.c | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) diff --git a/sound/oss/sequencer.c b/sound/oss/sequencer.c index 4ff60a6427d9..2e67dd590be5 100644 --- a/sound/oss/sequencer.c +++ b/sound/oss/sequencer.c @@ -683,13 +683,8 @@ static int seq_timing_event(unsigned char *event_rec) break; case TMR_ECHO: - if (seq_mode == SEQ_2) - seq_copy_to_input(event_rec, 8); - else - { - parm = (parm << 8 | SEQ_ECHO); - seq_copy_to_input((unsigned char *) &parm, 4); - } + parm = (parm << 8 | SEQ_ECHO); + seq_copy_to_input((unsigned char *) &parm, 4); break; default:; @@ -1332,7 +1327,6 @@ int sequencer_ioctl(int dev, struct file *file, unsigned int cmd, void __user *a int mode = translate_mode(file); struct synth_info inf; struct seq_event_rec event_rec; - unsigned long flags; int __user *p = arg; orig_dev = dev = dev >> 4; @@ -1487,9 +1481,7 @@ int sequencer_ioctl(int dev, struct file *file, unsigned int cmd, void __user *a case SNDCTL_SEQ_OUTOFBAND: if (copy_from_user(&event_rec, arg, sizeof(event_rec))) return -EFAULT; - spin_lock_irqsave(&lock,flags); play_event(event_rec.arr); - spin_unlock_irqrestore(&lock,flags); return 0; case SNDCTL_MIDI_INFO: From 5a4d93f39c3b3bce899891d39013bfc4ef2ab85a Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 14 Nov 2013 14:32:06 -0800 Subject: [PATCH 271/277] revert "softirq: Add support for triggering softirq work on softirqs" commit fc21c0cff2f425891b28ff6fb6b03b325c977428 upstream. This commit was incomplete in that code to remove items from the per-cpu lists was missing and never acquired a user in the 5 years it has been in the tree. We're going to implement what it seems to try to archive in a simpler way, and this code is in the way of doing so. Signed-off-by: Christoph Hellwig Cc: Jan Kara Cc: Jens Axboe Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Cc: Pan Xinhui Signed-off-by: Greg Kroah-Hartman --- include/linux/interrupt.h | 22 ------- kernel/softirq.c | 131 -------------------------------------- 2 files changed, 153 deletions(-) diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h index 6de0f2c14ec0..f05efb1160fb 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h @@ -11,8 +11,6 @@ #include #include #include -#include -#include #include #include #include @@ -488,15 +486,6 @@ extern void __raise_softirq_irqoff(unsigned int nr); extern void raise_softirq_irqoff(unsigned int nr); extern void raise_softirq(unsigned int nr); -/* This is the worklist that queues up per-cpu softirq work. - * - * send_remote_sendirq() adds work to these lists, and - * the softirq handler itself dequeues from them. The queues - * are protected by disabling local cpu interrupts and they must - * only be accessed by the local cpu that they are for. - */ -DECLARE_PER_CPU(struct list_head [NR_SOFTIRQS], softirq_work_list); - DECLARE_PER_CPU(struct task_struct *, ksoftirqd); static inline struct task_struct *this_cpu_ksoftirqd(void) @@ -504,17 +493,6 @@ static inline struct task_struct *this_cpu_ksoftirqd(void) return this_cpu_read(ksoftirqd); } -/* Try to send a softirq to a remote cpu. If this cannot be done, the - * work will be queued to the local cpu. - */ -extern void send_remote_softirq(struct call_single_data *cp, int cpu, int softirq); - -/* Like send_remote_softirq(), but the caller must disable local cpu interrupts - * and compute the current cpu, passed in as 'this_cpu'. - */ -extern void __send_remote_softirq(struct call_single_data *cp, int cpu, - int this_cpu, int softirq); - /* Tasklets --- multithreaded analogue of BHs. Main feature differing them of generic softirqs: tasklet diff --git a/kernel/softirq.c b/kernel/softirq.c index 21956f00cb51..b538df367de3 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c @@ -6,8 +6,6 @@ * Distribute under GPLv2. * * Rewritten. Old one was good in 2.2, but in 2.3 it was immoral. --ANK (990903) - * - * Remote softirq infrastructure is by Jens Axboe. */ #include @@ -620,146 +618,17 @@ void tasklet_hrtimer_init(struct tasklet_hrtimer *ttimer, } EXPORT_SYMBOL_GPL(tasklet_hrtimer_init); -/* - * Remote softirq bits - */ - -DEFINE_PER_CPU(struct list_head [NR_SOFTIRQS], softirq_work_list); -EXPORT_PER_CPU_SYMBOL(softirq_work_list); - -static void __local_trigger(struct call_single_data *cp, int softirq) -{ - struct list_head *head = &__get_cpu_var(softirq_work_list[softirq]); - - list_add_tail(&cp->list, head); - - /* Trigger the softirq only if the list was previously empty. */ - if (head->next == &cp->list) - raise_softirq_irqoff(softirq); -} - -#ifdef CONFIG_USE_GENERIC_SMP_HELPERS -static void remote_softirq_receive(void *data) -{ - struct call_single_data *cp = data; - unsigned long flags; - int softirq; - - softirq = *(int *)cp->info; - local_irq_save(flags); - __local_trigger(cp, softirq); - local_irq_restore(flags); -} - -static int __try_remote_softirq(struct call_single_data *cp, int cpu, int softirq) -{ - if (cpu_online(cpu)) { - cp->func = remote_softirq_receive; - cp->info = &softirq; - cp->flags = 0; - - __smp_call_function_single(cpu, cp, 0); - return 0; - } - return 1; -} -#else /* CONFIG_USE_GENERIC_SMP_HELPERS */ -static int __try_remote_softirq(struct call_single_data *cp, int cpu, int softirq) -{ - return 1; -} -#endif - -/** - * __send_remote_softirq - try to schedule softirq work on a remote cpu - * @cp: private SMP call function data area - * @cpu: the remote cpu - * @this_cpu: the currently executing cpu - * @softirq: the softirq for the work - * - * Attempt to schedule softirq work on a remote cpu. If this cannot be - * done, the work is instead queued up on the local cpu. - * - * Interrupts must be disabled. - */ -void __send_remote_softirq(struct call_single_data *cp, int cpu, int this_cpu, int softirq) -{ - if (cpu == this_cpu || __try_remote_softirq(cp, cpu, softirq)) - __local_trigger(cp, softirq); -} -EXPORT_SYMBOL(__send_remote_softirq); - -/** - * send_remote_softirq - try to schedule softirq work on a remote cpu - * @cp: private SMP call function data area - * @cpu: the remote cpu - * @softirq: the softirq for the work - * - * Like __send_remote_softirq except that disabling interrupts and - * computing the current cpu is done for the caller. - */ -void send_remote_softirq(struct call_single_data *cp, int cpu, int softirq) -{ - unsigned long flags; - int this_cpu; - - local_irq_save(flags); - this_cpu = smp_processor_id(); - __send_remote_softirq(cp, cpu, this_cpu, softirq); - local_irq_restore(flags); -} -EXPORT_SYMBOL(send_remote_softirq); - -static int __cpuinit remote_softirq_cpu_notify(struct notifier_block *self, - unsigned long action, void *hcpu) -{ - /* - * If a CPU goes away, splice its entries to the current CPU - * and trigger a run of the softirq - */ - if (action == CPU_DEAD || action == CPU_DEAD_FROZEN) { - int cpu = (unsigned long) hcpu; - int i; - - local_irq_disable(); - for (i = 0; i < NR_SOFTIRQS; i++) { - struct list_head *head = &per_cpu(softirq_work_list[i], cpu); - struct list_head *local_head; - - if (list_empty(head)) - continue; - - local_head = &__get_cpu_var(softirq_work_list[i]); - list_splice_init(head, local_head); - raise_softirq_irqoff(i); - } - local_irq_enable(); - } - - return NOTIFY_OK; -} - -static struct notifier_block __cpuinitdata remote_softirq_cpu_notifier = { - .notifier_call = remote_softirq_cpu_notify, -}; - void __init softirq_init(void) { int cpu; for_each_possible_cpu(cpu) { - int i; - per_cpu(tasklet_vec, cpu).tail = &per_cpu(tasklet_vec, cpu).head; per_cpu(tasklet_hi_vec, cpu).tail = &per_cpu(tasklet_hi_vec, cpu).head; - for (i = 0; i < NR_SOFTIRQS; i++) - INIT_LIST_HEAD(&per_cpu(softirq_work_list[i], cpu)); } - register_hotcpu_notifier(&remote_softirq_cpu_notifier); - open_softirq(TASKLET_SOFTIRQ, tasklet_action); open_softirq(HI_SOFTIRQ, tasklet_hi_action); } From 85e014b4304626972bff42249556748ad3fcf812 Mon Sep 17 00:00:00 2001 From: Lv Zheng Date: Mon, 13 Apr 2015 11:48:18 +0800 Subject: [PATCH 272/277] ACPICA: Tables: Change acpi_find_root_pointer() to use acpi_physical_address. commit f254e3c57b9d952e987502aefa0804c177dd2503 upstream. ACPICA commit 7d9fd64397d7c38899d3dc497525f6e6b044e0e3 OSPMs like Linux expect an acpi_physical_address returning value from acpi_find_root_pointer(). This triggers warnings if sizeof (acpi_size) doesn't equal to sizeof (acpi_physical_address): drivers/acpi/osl.c:275:3: warning: passing argument 1 of 'acpi_find_root_pointer' from incompatible pointer type [enabled by default] In file included from include/acpi/acpi.h:64:0, from include/linux/acpi.h:36, from drivers/acpi/osl.c:41: include/acpi/acpixf.h:433:1: note: expected 'acpi_size *' but argument is of type 'acpi_physical_address *' This patch corrects acpi_find_root_pointer(). Link: https://github.com/acpica/acpica/commit/7d9fd643 Signed-off-by: Lv Zheng Signed-off-by: Bob Moore Signed-off-by: Rafael J. Wysocki Signed-off-by: Dirk Behme Signed-off-by: George G. Davis Signed-off-by: Greg Kroah-Hartman --- drivers/acpi/acpica/tbxfroot.c | 7 ++++--- include/acpi/acpixf.h | 2 +- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/acpi/acpica/tbxfroot.c b/drivers/acpi/acpica/tbxfroot.c index 7c2ecfb7c2c3..e12486031d97 100644 --- a/drivers/acpi/acpica/tbxfroot.c +++ b/drivers/acpi/acpica/tbxfroot.c @@ -118,7 +118,7 @@ static acpi_status acpi_tb_validate_rsdp(struct acpi_table_rsdp *rsdp) * ******************************************************************************/ -acpi_status acpi_find_root_pointer(acpi_size *table_address) +acpi_status acpi_find_root_pointer(acpi_physical_address * table_address) { u8 *table_ptr; u8 *mem_rover; @@ -176,7 +176,8 @@ acpi_status acpi_find_root_pointer(acpi_size *table_address) physical_address += (u32) ACPI_PTR_DIFF(mem_rover, table_ptr); - *table_address = physical_address; + *table_address = + (acpi_physical_address) physical_address; return_ACPI_STATUS(AE_OK); } } @@ -209,7 +210,7 @@ acpi_status acpi_find_root_pointer(acpi_size *table_address) (ACPI_HI_RSDP_WINDOW_BASE + ACPI_PTR_DIFF(mem_rover, table_ptr)); - *table_address = physical_address; + *table_address = (acpi_physical_address) physical_address; return_ACPI_STATUS(AE_OK); } diff --git a/include/acpi/acpixf.h b/include/acpi/acpixf.h index 454881e6450a..fcabb1597d5b 100644 --- a/include/acpi/acpixf.h +++ b/include/acpi/acpixf.h @@ -177,7 +177,7 @@ acpi_status acpi_load_tables(void); */ acpi_status acpi_reallocate_root_table(void); -acpi_status acpi_find_root_pointer(acpi_size *rsdp_address); +acpi_status acpi_find_root_pointer(acpi_physical_address *rsdp_address); acpi_status acpi_unload_table_id(acpi_owner_id id); From 626d4bdfef3c1ff402a79462d98babcffb66559b Mon Sep 17 00:00:00 2001 From: Lv Zheng Date: Mon, 13 Apr 2015 11:48:37 +0800 Subject: [PATCH 273/277] ACPICA: Utilities: Cleanup to enforce ACPI_PHYSADDR_TO_PTR()/ACPI_PTR_TO_PHYSADDR(). commit 6d3fd3cc33d50e4c0d0c0bd172de02caaec3127c upstream. ACPICA commit 154f6d074dd38d6ebc0467ad454454e6c5c9ecdf There are code pieces converting pointers using "(acpi_physical_address) x" or "ACPI_CAST_PTR (t, x)" formats, this patch cleans up them. Known issues: 1. Cleanup of "(ACPI_PHYSICAL_ADDRRESS) x" for a table field For the conversions around the table fields, it is better to fix it with alignment also fixed. So this patch doesn't modify such code. There should be no functional problem by leaving them unchanged. Link: https://github.com/acpica/acpica/commit/154f6d07 Signed-off-by: Lv Zheng Signed-off-by: Bob Moore Signed-off-by: Rafael J. Wysocki Signed-off-by: Dirk Behme Signed-off-by: George G. Davis Signed-off-by: Greg Kroah-Hartman --- drivers/acpi/acpica/dsopcode.c | 3 +-- drivers/acpi/acpica/tbinstal.c | 5 ++--- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/drivers/acpi/acpica/dsopcode.c b/drivers/acpi/acpica/dsopcode.c index ee6367b8eaf7..e9b13b92ba1e 100644 --- a/drivers/acpi/acpica/dsopcode.c +++ b/drivers/acpi/acpica/dsopcode.c @@ -539,8 +539,7 @@ acpi_ds_eval_table_region_operands(struct acpi_walk_state *walk_state, return_ACPI_STATUS(AE_NOT_EXIST); } - obj_desc->region.address = - (acpi_physical_address) ACPI_TO_INTEGER(table); + obj_desc->region.address = ACPI_PTR_TO_PHYSADDR(table); obj_desc->region.length = table->length; ACPI_DEBUG_PRINT((ACPI_DB_EXEC, "RgnObj %p Addr %8.8X%8.8X Len %X\n", diff --git a/drivers/acpi/acpica/tbinstal.c b/drivers/acpi/acpica/tbinstal.c index e57cd38004e3..0d2351596a3c 100644 --- a/drivers/acpi/acpica/tbinstal.c +++ b/drivers/acpi/acpica/tbinstal.c @@ -301,8 +301,7 @@ struct acpi_table_header *acpi_tb_table_override(struct acpi_table_header ACPI_EXCEPTION((AE_INFO, AE_NO_MEMORY, "%4.4s %p Attempted physical table override failed", table_header->signature, - ACPI_CAST_PTR(void, - table_desc->address))); + ACPI_PHYSADDR_TO_PTR(table_desc->address))); return (NULL); } @@ -318,7 +317,7 @@ struct acpi_table_header *acpi_tb_table_override(struct acpi_table_header ACPI_INFO((AE_INFO, "%4.4s %p %s table override, new table: %p", table_header->signature, - ACPI_CAST_PTR(void, table_desc->address), + ACPI_PHYSADDR_TO_PTR(table_desc->address), override_type, new_table)); /* We can now unmap/delete the original table (if fully mapped) */ From b5bac1f597ae5669dee0d2ae927b8ded0b8f6b34 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Sun, 17 May 2015 09:51:39 -0700 Subject: [PATCH 274/277] Linux 3.10.79 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index cf99a9b53c6f..e26cb1e56266 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,6 @@ VERSION = 3 PATCHLEVEL = 10 -SUBLEVEL = 78 +SUBLEVEL = 79 EXTRAVERSION = NAME = TOSSUG Baby Fish From 96dade5633c3ebb68fc191fd2cc03f89fc4f57f9 Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Mon, 2 Sep 2013 16:33:54 +0100 Subject: [PATCH 275/277] arm64: Remove unused cpu_name ascii in arch/arm64/mm/proc.S This string has been moved to arch/arm64/kernel/cputable.c. Signed-off-by: Catalin Marinas (cherry picked from commit f3a1d7d53dccf51959aec16b574617cc6bfeca09) Signed-off-by: Kevin Hilman --- arch/arm64/mm/proc.S | 4 ---- 1 file changed, 4 deletions(-) diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S index a82ae8868077..f84fcf71f129 100644 --- a/arch/arm64/mm/proc.S +++ b/arch/arm64/mm/proc.S @@ -95,10 +95,6 @@ ENTRY(cpu_do_switch_mm) ret ENDPROC(cpu_do_switch_mm) -cpu_name: - .ascii "AArch64 Processor" - .align - .section ".text.init", #alloc, #execinstr /* From a0121477d2c5e7b62436edca6dac4cd8058c15a5 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Mon, 23 Mar 2015 19:07:02 +0000 Subject: [PATCH 276/277] arm64: errata: add workaround for cortex-a53 erratum #845719 When running a compat (AArch32) userspace on Cortex-A53, a load at EL0 from a virtual address that matches the bottom 32 bits of the virtual address used by a recent load at (AArch64) EL1 might return incorrect data. This patch works around the issue by writing to the contextidr_el1 register on the exception return path when returning to a 32-bit task. Reviewed-by: Marc Zyngier Tested-by: Mark Rutland Signed-off-by: Will Deacon (cherry picked from commit 905e8c5dcaa147163672b06fe9dcb5abaacbc711) [khilman: modified to remove dependency on alternatives framwork. Feature is now only compile-time selectable, and defaults to off. ] Signed-off-by: Kevin Hilman --- arch/arm64/Kconfig | 26 ++++++++++++++++++++++++++ arch/arm64/kernel/entry.S | 11 +++++++++++ 2 files changed, 37 insertions(+) diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 56b3f6d447ae..50d090002bc8 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -122,6 +122,32 @@ endmenu menu "Kernel Features" +menu "ARM errata workarounds" + +config ARM64_ERRATUM_845719 + bool "Cortex-A53: 845719: a load might read incorrect data" + depends on COMPAT + default n + help + This option adds an alternative code sequence to work around ARM + erratum 845719 on Cortex-A53 parts up to r0p4. + + When running a compat (AArch32) userspace on an affected Cortex-A53 + part, a load at EL0 from a virtual address that matches the bottom 32 + bits of the virtual address used by a recent load at (AArch64) EL1 + might return incorrect data. + + The workaround is to write the contextidr_el1 register on exception + return to a 32-bit task. + Please note that this does not necessarily enable the workaround, + as it depends on the alternative framework, which will only patch + the kernel if an affected CPU is detected. + + If unsure, say Y. + +endmenu + + config ARM64_64K_PAGES bool "Enable 64KB pages support" help diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index 1d1314280a03..bb64eedfbefd 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -89,6 +89,17 @@ ldp x21, x22, [sp, #S_PC] // load ELR, SPSR .if \el == 0 ldr x23, [sp, #S_SP] // load return stack pointer +#ifdef CONFIG_ARM64_ERRATUM_845719 + tbz x22, #4, 1f +#ifdef CONFIG_PID_IN_CONTEXTIDR + mrs x29, contextidr_el1 + msr contextidr_el1, x29 +1: +#else + msr contextidr_el1, xzr +1: +#endif +#endif .endif .if \ret ldr x1, [sp, #S_X1] // preserve x0 (syscall return) From 16b1fe2ce62d6238acb13fbe59b4fc13b51b1445 Mon Sep 17 00:00:00 2001 From: Dmitry Monakhov Date: Wed, 28 Aug 2013 14:30:47 -0400 Subject: [PATCH 277/277] ext4: convert write_begin methods to stable_page_writes semantics Use wait_for_stable_page() instead of wait_on_page_writeback() Huawei engineer Jianfeng report that without this patch, the consequence write may cause seconds to finish. The patch helps because most of storage today doesn't require that the page isn't changed while IO is in flight. That is required only for data checksumming or copy-on-write semantics but ext4 does neither of those. So we don't have to wait for IO completion in ext4_write_begin() unless underlying storage requires it. --Honza Signed-off-by: Dmitry Monakhov Signed-off-by: "Theodore Ts'o" Reviewed-by: Jan Kara (cherry picked from commit 7afe5aa59ed3da7b6161617e7f157c7c680dc41e) Signed-off-by: Alex Shi --- fs/ext4/inode.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index d6382b89ecbd..e1892bb499cd 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -1029,7 +1029,8 @@ static int ext4_write_begin(struct file *file, struct address_space *mapping, ext4_journal_stop(handle); goto retry_grab; } - wait_on_page_writeback(page); + /* In case writeback began while the page was unlocked */ + wait_for_stable_page(page); if (ext4_should_dioread_nolock(inode)) ret = __block_write_begin(page, pos, len, ext4_get_block_write); @@ -2720,7 +2721,7 @@ static int ext4_da_write_begin(struct file *file, struct address_space *mapping, goto retry_grab; } /* In case writeback began while the page was unlocked */ - wait_on_page_writeback(page); + wait_for_stable_page(page); ret = __block_write_begin(page, pos, len, ext4_da_get_block_prep); if (ret < 0) {