From d0d1f54d367f959bc7484422fb51f61c00977a64 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 26 Jan 2015 12:02:45 +0100 Subject: [PATCH 001/204] gpio: sysfs: fix memory leak in gpiod_export_link commit 0f303db08df0df9bd0966443ad6001e63960af16 upstream. Fix memory leak in the gpio sysfs interface due to failure to drop reference to device returned by class_find_device when creating a link. Fixes: a4177ee7f1a8 ("gpiolib: allow exported GPIO nodes to be named using sysfs links") Signed-off-by: Johan Hovold Signed-off-by: Linus Walleij Signed-off-by: Greg Kroah-Hartman --- drivers/gpio/gpiolib.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c index 1d74a80e031e..6dd7df80ae20 100644 --- a/drivers/gpio/gpiolib.c +++ b/drivers/gpio/gpiolib.c @@ -874,6 +874,7 @@ static int gpiod_export_link(struct device *dev, const char *name, if (tdev != NULL) { status = sysfs_create_link(&dev->kobj, &tdev->kobj, name); + put_device(tdev); } else { status = -ENODEV; } From 4cd925d7c1d5d777f2f09db4aee4675d0978a95b Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 26 Jan 2015 12:02:46 +0100 Subject: [PATCH 002/204] gpio: sysfs: fix memory leak in gpiod_sysfs_set_active_low commit 49d2ca84e433dab854c7a866bc6add09cfab682d upstream. Fix memory leak in the gpio sysfs interface due to failure to drop reference to device returned by class_find_device when setting the gpio-line polarity. Fixes: 0769746183ca ("gpiolib: add support for changing value polarity in sysfs") Signed-off-by: Johan Hovold Signed-off-by: Linus Walleij Signed-off-by: Greg Kroah-Hartman --- drivers/gpio/gpiolib.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c index 6dd7df80ae20..ca1cb2d756c2 100644 --- a/drivers/gpio/gpiolib.c +++ b/drivers/gpio/gpiolib.c @@ -928,7 +928,7 @@ static int gpiod_sysfs_set_active_low(struct gpio_desc *desc, int value) } status = sysfs_set_active_low(desc, dev, value); - + put_device(dev); unlock: mutex_unlock(&sysfs_lock); From 9a1acfe2a3897827283b25fa24400935bc4ee2d2 Mon Sep 17 00:00:00 2001 From: Charlotte Richardson Date: Mon, 2 Feb 2015 09:36:23 -0600 Subject: [PATCH 003/204] PCI: Add NEC variants to Stratus ftServer PCIe DMI check commit 51ac3d2f0c505ca36ffc9715ffd518d756589ef8 upstream. NEC OEMs the same platforms as Stratus does, which have multiple devices on some PCIe buses under downstream ports. Link: https://bugzilla.kernel.org/show_bug.cgi?id=51331 Fixes: 1278998f8ff6 ("PCI: Work around Stratus ftServer broken PCIe hierarchy (fix DMI check)") Signed-off-by: Charlotte Richardson Signed-off-by: Bjorn Helgaas CC: Myron Stowe Signed-off-by: Greg Kroah-Hartman --- arch/x86/pci/common.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/arch/x86/pci/common.c b/arch/x86/pci/common.c index 981c2dbd72cc..88f143d9754e 100644 --- a/arch/x86/pci/common.c +++ b/arch/x86/pci/common.c @@ -448,6 +448,22 @@ static const struct dmi_system_id pciprobe_dmi_table[] = { DMI_MATCH(DMI_PRODUCT_NAME, "ftServer"), }, }, + { + .callback = set_scan_all, + .ident = "Stratus/NEC ftServer", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "NEC"), + DMI_MATCH(DMI_PRODUCT_NAME, "Express5800/R32"), + }, + }, + { + .callback = set_scan_all, + .ident = "Stratus/NEC ftServer", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "NEC"), + DMI_MATCH(DMI_PRODUCT_NAME, "Express5800/R31"), + }, + }, {} }; From 290deda94058c4aaeec0b0f9da43564c363b1fec Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Thu, 15 Jan 2015 19:05:28 +0100 Subject: [PATCH 004/204] MIPS: IRQ: Fix disable_irq on CPU IRQs commit a3e6c1eff54878506b2dddcc202df9cc8180facb upstream. If the irq_chip does not define .irq_disable, any call to disable_irq will defer disabling the IRQ until it fires while marked as disabled. This assumes that the handler function checks for this condition, which handle_percpu_irq does not. In this case, calling disable_irq leads to an IRQ storm, if the interrupt fires while disabled. This optimization is only useful when disabling the IRQ is slow, which is not true for the MIPS CPU IRQ. Disable this optimization by implementing .irq_disable and .irq_enable Signed-off-by: Felix Fietkau Cc: linux-mips@linux-mips.org Patchwork: https://patchwork.linux-mips.org/patch/8949/ Signed-off-by: Ralf Baechle Signed-off-by: Greg Kroah-Hartman --- arch/mips/kernel/irq_cpu.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/mips/kernel/irq_cpu.c b/arch/mips/kernel/irq_cpu.c index 72ef2d25cbf2..ab941a366012 100644 --- a/arch/mips/kernel/irq_cpu.c +++ b/arch/mips/kernel/irq_cpu.c @@ -56,6 +56,8 @@ static struct irq_chip mips_cpu_irq_controller = { .irq_mask_ack = mask_mips_irq, .irq_unmask = unmask_mips_irq, .irq_eoi = unmask_mips_irq, + .irq_disable = mask_mips_irq, + .irq_enable = unmask_mips_irq, }; /* @@ -92,6 +94,8 @@ static struct irq_chip mips_mt_cpu_irq_controller = { .irq_mask_ack = mips_mt_cpu_irq_ack, .irq_unmask = unmask_mips_irq, .irq_eoi = unmask_mips_irq, + .irq_disable = mask_mips_irq, + .irq_enable = unmask_mips_irq, }; void __init mips_cpu_irq_init(void) From 2ded944c7e92114ff28532d80c06e2cb9c8dd2fd Mon Sep 17 00:00:00 2001 From: Hemmo Nieminen Date: Thu, 15 Jan 2015 23:01:59 +0200 Subject: [PATCH 005/204] MIPS: Fix kernel lockup or crash after CPU offline/online commit c7754e75100ed5e3068ac5085747f2bfc386c8d6 upstream. As printk() invocation can cause e.g. a TLB miss, printk() cannot be called before the exception handlers have been properly initialized. This can happen e.g. when netconsole has been loaded as a kernel module and the TLB table has been cleared when a CPU was offline. Call cpu_report() in start_secondary() only after the exception handlers have been initialized to fix this. Without the patch the kernel will randomly either lockup or crash after a CPU is onlined and the console driver is a module. Signed-off-by: Hemmo Nieminen Signed-off-by: Aaro Koskinen Cc: David Daney Cc: linux-mips@linux-mips.org Cc: linux-kernel@vger.kernel.org Patchwork: https://patchwork.linux-mips.org/patch/8953/ Signed-off-by: Ralf Baechle Signed-off-by: Greg Kroah-Hartman --- arch/mips/kernel/smp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/mips/kernel/smp.c b/arch/mips/kernel/smp.c index 6e7862ab46cc..caeec21df1c4 100644 --- a/arch/mips/kernel/smp.c +++ b/arch/mips/kernel/smp.c @@ -109,10 +109,10 @@ asmlinkage __cpuinit void start_secondary(void) else #endif /* CONFIG_MIPS_MT_SMTC */ cpu_probe(); - cpu_report(); per_cpu_trap_init(false); mips_clockevent_init(); mp_ops->init_secondary(); + cpu_report(); /* * XXX parity protection should be folded in here when it's converted From 48f5cffe366348c089d4e21b526abb17df4372e9 Mon Sep 17 00:00:00 2001 From: Shiraz Hashim Date: Thu, 5 Feb 2015 12:25:06 -0800 Subject: [PATCH 006/204] mm: pagewalk: call pte_hole() for VM_PFNMAP during walk_page_range commit 23aaed6659df9adfabe9c583e67a36b54e21df46 upstream. walk_page_range() silently skips vma having VM_PFNMAP set, which leads to undesirable behaviour at client end (who called walk_page_range). Userspace applications get the wrong data, so the effect is like just confusing users (if the applications just display the data) or sometimes killing the processes (if the applications do something with misunderstanding virtual addresses due to the wrong data.) For example for pagemap_read, when no callbacks are called against VM_PFNMAP vma, pagemap_read may prepare pagemap data for next virtual address range at wrong index. Eventually userspace may get wrong pagemap data for a task. Corresponding to a VM_PFNMAP marked vma region, kernel may report mappings from subsequent vma regions. User space in turn may account more pages (than really are) to the task. In my case I was using procmem, procrack (Android utility) which uses pagemap interface to account RSS pages of a task. Due to this bug it was giving a wrong picture for vmas (with VM_PFNMAP set). Fixes: a9ff785e4437 ("mm/pagewalk.c: walk_page_range should avoid VM_PFNMAP areas") Signed-off-by: Shiraz Hashim Acked-by: Naoya Horiguchi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- mm/pagewalk.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/mm/pagewalk.c b/mm/pagewalk.c index 2beeabf502c5..9056d22d2880 100644 --- a/mm/pagewalk.c +++ b/mm/pagewalk.c @@ -199,7 +199,10 @@ int walk_page_range(unsigned long addr, unsigned long end, */ if ((vma->vm_start <= addr) && (vma->vm_flags & VM_PFNMAP)) { - next = vma->vm_end; + if (walk->pte_hole) + err = walk->pte_hole(addr, next, walk); + if (err) + break; pgd = pgd_offset(walk->mm, next); continue; } From 229d02538b35a1fded99cdd71ceb583c7b3d56bc Mon Sep 17 00:00:00 2001 From: karl beldan Date: Wed, 28 Jan 2015 10:58:11 +0100 Subject: [PATCH 007/204] lib/checksum.c: fix carry in csum_tcpudp_nofold commit 150ae0e94634714b23919f0c333fee28a5b199d5 upstream. The carry from the 64->32bits folding was dropped, e.g with: saddr=0xFFFFFFFF daddr=0xFF0000FF len=0xFFFF proto=0 sum=1, csum_tcpudp_nofold returned 0 instead of 1. Signed-off-by: Karl Beldan Cc: Al Viro Cc: Eric Dumazet Cc: Arnd Bergmann Cc: Mike Frysinger Cc: netdev@vger.kernel.org Cc: linux-kernel@vger.kernel.org Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- lib/checksum.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/lib/checksum.c b/lib/checksum.c index 129775eb6de6..fcf38943132c 100644 --- a/lib/checksum.c +++ b/lib/checksum.c @@ -47,6 +47,15 @@ static inline unsigned short from32to16(unsigned int x) return x; } +static inline u32 from64to32(u64 x) +{ + /* add up 32-bit and 32-bit for 32+c bit */ + x = (x & 0xffffffff) + (x >> 32); + /* add up carry.. */ + x = (x & 0xffffffff) + (x >> 32); + return (u32)x; +} + static unsigned int do_csum(const unsigned char *buff, int len) { int odd; @@ -195,8 +204,7 @@ __wsum csum_tcpudp_nofold(__be32 saddr, __be32 daddr, #else s += (proto + len) << 8; #endif - s += (s >> 32); - return (__force __wsum)s; + return (__force __wsum)from64to32(s); } EXPORT_SYMBOL(csum_tcpudp_nofold); #endif From ec7cae16b37ab478d6d7e33e8563b24ca189e6cf Mon Sep 17 00:00:00 2001 From: Ryusuke Konishi Date: Thu, 5 Feb 2015 12:25:20 -0800 Subject: [PATCH 008/204] nilfs2: fix deadlock of segment constructor over I_SYNC flag commit 7ef3ff2fea8bf5e4a21cef47ad87710a3d0fdb52 upstream. Nilfs2 eventually hangs in a stress test with fsstress program. This issue was caused by the following deadlock over I_SYNC flag between nilfs_segctor_thread() and writeback_sb_inodes(): nilfs_segctor_thread() nilfs_segctor_thread_construct() nilfs_segctor_unlock() nilfs_dispose_list() iput() iput_final() evict() inode_wait_for_writeback() * wait for I_SYNC flag writeback_sb_inodes() * set I_SYNC flag on inode->i_state __writeback_single_inode() do_writepages() nilfs_writepages() nilfs_construct_dsync_segment() nilfs_segctor_sync() * wait for completion of segment constructor inode_sync_complete() * clear I_SYNC flag after __writeback_single_inode() completed writeback_sb_inodes() calls do_writepages() for dirty inodes after setting I_SYNC flag on inode->i_state. do_writepages() in turn calls nilfs_writepages(), which can run segment constructor and wait for its completion. On the other hand, segment constructor calls iput(), which can call evict() and wait for the I_SYNC flag on inode_wait_for_writeback(). Since segment constructor doesn't know when I_SYNC will be set, it cannot know whether iput() will block or not unless inode->i_nlink has a non-zero count. We can prevent evict() from being called in iput() by implementing sop->drop_inode(), but it's not preferable to leave inodes with i_nlink == 0 for long periods because it even defers file truncation and inode deallocation. So, this instead resolves the deadlock by calling iput() asynchronously with a workqueue for inodes with i_nlink == 0. Signed-off-by: Ryusuke Konishi Cc: Al Viro Tested-by: Ryusuke Konishi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- fs/nilfs2/nilfs.h | 2 -- fs/nilfs2/segment.c | 44 +++++++++++++++++++++++++++++++++++++++----- fs/nilfs2/segment.h | 5 +++++ 3 files changed, 44 insertions(+), 7 deletions(-) diff --git a/fs/nilfs2/nilfs.h b/fs/nilfs2/nilfs.h index 9bc72dec3fa6..b02c202223a6 100644 --- a/fs/nilfs2/nilfs.h +++ b/fs/nilfs2/nilfs.h @@ -141,7 +141,6 @@ enum { * @ti_save: Backup of journal_info field of task_struct * @ti_flags: Flags * @ti_count: Nest level - * @ti_garbage: List of inode to be put when releasing semaphore */ struct nilfs_transaction_info { u32 ti_magic; @@ -150,7 +149,6 @@ struct nilfs_transaction_info { one of other filesystems has a bug. */ unsigned short ti_flags; unsigned short ti_count; - struct list_head ti_garbage; }; /* ti_magic */ diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c index 958a5b57ed4a..b3c95c1a4700 100644 --- a/fs/nilfs2/segment.c +++ b/fs/nilfs2/segment.c @@ -305,7 +305,6 @@ static void nilfs_transaction_lock(struct super_block *sb, ti->ti_count = 0; ti->ti_save = cur_ti; ti->ti_magic = NILFS_TI_MAGIC; - INIT_LIST_HEAD(&ti->ti_garbage); current->journal_info = ti; for (;;) { @@ -332,8 +331,6 @@ static void nilfs_transaction_unlock(struct super_block *sb) up_write(&nilfs->ns_segctor_sem); current->journal_info = ti->ti_save; - if (!list_empty(&ti->ti_garbage)) - nilfs_dispose_list(nilfs, &ti->ti_garbage, 0); } static void *nilfs_segctor_map_segsum_entry(struct nilfs_sc_info *sci, @@ -746,6 +743,15 @@ static void nilfs_dispose_list(struct the_nilfs *nilfs, } } +static void nilfs_iput_work_func(struct work_struct *work) +{ + struct nilfs_sc_info *sci = container_of(work, struct nilfs_sc_info, + sc_iput_work); + struct the_nilfs *nilfs = sci->sc_super->s_fs_info; + + nilfs_dispose_list(nilfs, &sci->sc_iput_queue, 0); +} + static int nilfs_test_metadata_dirty(struct the_nilfs *nilfs, struct nilfs_root *root) { @@ -1899,8 +1905,8 @@ static int nilfs_segctor_collect_dirty_files(struct nilfs_sc_info *sci, static void nilfs_segctor_drop_written_files(struct nilfs_sc_info *sci, struct the_nilfs *nilfs) { - struct nilfs_transaction_info *ti = current->journal_info; struct nilfs_inode_info *ii, *n; + int defer_iput = false; spin_lock(&nilfs->ns_inode_lock); list_for_each_entry_safe(ii, n, &sci->sc_dirty_files, i_dirty) { @@ -1911,9 +1917,24 @@ static void nilfs_segctor_drop_written_files(struct nilfs_sc_info *sci, clear_bit(NILFS_I_BUSY, &ii->i_state); brelse(ii->i_bh); ii->i_bh = NULL; - list_move_tail(&ii->i_dirty, &ti->ti_garbage); + list_del_init(&ii->i_dirty); + if (!ii->vfs_inode.i_nlink) { + /* + * Defer calling iput() to avoid a deadlock + * over I_SYNC flag for inodes with i_nlink == 0 + */ + list_add_tail(&ii->i_dirty, &sci->sc_iput_queue); + defer_iput = true; + } else { + spin_unlock(&nilfs->ns_inode_lock); + iput(&ii->vfs_inode); + spin_lock(&nilfs->ns_inode_lock); + } } spin_unlock(&nilfs->ns_inode_lock); + + if (defer_iput) + schedule_work(&sci->sc_iput_work); } /* @@ -2580,6 +2601,8 @@ static struct nilfs_sc_info *nilfs_segctor_new(struct super_block *sb, INIT_LIST_HEAD(&sci->sc_segbufs); INIT_LIST_HEAD(&sci->sc_write_logs); INIT_LIST_HEAD(&sci->sc_gc_inodes); + INIT_LIST_HEAD(&sci->sc_iput_queue); + INIT_WORK(&sci->sc_iput_work, nilfs_iput_work_func); init_timer(&sci->sc_timer); sci->sc_interval = HZ * NILFS_SC_DEFAULT_TIMEOUT; @@ -2606,6 +2629,8 @@ static void nilfs_segctor_write_out(struct nilfs_sc_info *sci) ret = nilfs_segctor_construct(sci, SC_LSEG_SR); nilfs_transaction_unlock(sci->sc_super); + flush_work(&sci->sc_iput_work); + } while (ret && retrycount-- > 0); } @@ -2630,6 +2655,9 @@ static void nilfs_segctor_destroy(struct nilfs_sc_info *sci) || sci->sc_seq_request != sci->sc_seq_done); spin_unlock(&sci->sc_state_lock); + if (flush_work(&sci->sc_iput_work)) + flag = true; + if (flag || !nilfs_segctor_confirm(sci)) nilfs_segctor_write_out(sci); @@ -2639,6 +2667,12 @@ static void nilfs_segctor_destroy(struct nilfs_sc_info *sci) nilfs_dispose_list(nilfs, &sci->sc_dirty_files, 1); } + if (!list_empty(&sci->sc_iput_queue)) { + nilfs_warning(sci->sc_super, __func__, + "iput queue is not empty\n"); + nilfs_dispose_list(nilfs, &sci->sc_iput_queue, 1); + } + WARN_ON(!list_empty(&sci->sc_segbufs)); WARN_ON(!list_empty(&sci->sc_write_logs)); diff --git a/fs/nilfs2/segment.h b/fs/nilfs2/segment.h index 38a1d0013314..a48d6de1e02c 100644 --- a/fs/nilfs2/segment.h +++ b/fs/nilfs2/segment.h @@ -26,6 +26,7 @@ #include #include #include +#include #include #include "nilfs.h" @@ -92,6 +93,8 @@ struct nilfs_segsum_pointer { * @sc_nblk_inc: Block count of current generation * @sc_dirty_files: List of files to be written * @sc_gc_inodes: List of GC inodes having blocks to be written + * @sc_iput_queue: list of inodes for which iput should be done + * @sc_iput_work: work struct to defer iput call * @sc_freesegs: array of segment numbers to be freed * @sc_nfreesegs: number of segments on @sc_freesegs * @sc_dsync_inode: inode whose data pages are written for a sync operation @@ -135,6 +138,8 @@ struct nilfs_sc_info { struct list_head sc_dirty_files; struct list_head sc_gc_inodes; + struct list_head sc_iput_queue; + struct work_struct sc_iput_work; __u64 *sc_freesegs; size_t sc_nfreesegs; From 72684eae7b0acf2d085e1e878caa44b5e0219b24 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Fri, 24 Oct 2014 14:56:40 +0100 Subject: [PATCH 009/204] arm64: Fix up /proc/cpuinfo commit 44b82b7700d05a52cd983799d3ecde1a976b3bed upstream. Commit d7a49086f263164a (arm64: cpuinfo: print info for all CPUs) attempted to clean up /proc/cpuinfo, but due to concerns regarding further changes was reverted in commit 5e39977edf6500fd (Revert "arm64: cpuinfo: print info for all CPUs"). There are two major issues with the arm64 /proc/cpuinfo format currently: * The "Features" line describes (only) the 64-bit hwcaps, which is problematic for some 32-bit applications which attempt to parse it. As the same names are used for analogous ISA features (e.g. aes) despite these generally being architecturally unrelated, it is not possible to simply append the 64-bit and 32-bit hwcaps in a manner that might not be misleading to some applications. Various potential solutions have appeared in vendor kernels. Typically the format of the Features line varies depending on whether the task is 32-bit. * Information is only printed regarding a single CPU. This does not match the ARM format, and does not provide sufficient information in big.LITTLE systems where CPUs are heterogeneous. The CPU information printed is queried from the current CPU's registers, which is racy w.r.t. cross-cpu migration. This patch attempts to solve these issues. The following changes are made: * When a task with a LINUX32 personality attempts to read /proc/cpuinfo, the "Features" line contains the decoded 32-bit hwcaps, as with the arm port. Otherwise, the decoded 64-bit hwcaps are shown. This aligns with the behaviour of COMPAT_UTS_MACHINE and COMPAT_ELF_PLATFORM. In the absense of compat support, the Features line is empty. The set of hwcaps injected into a task's auxval are unaffected. * Properties are printed per-cpu, as with the ARM port. The per-cpu information is queried from pre-recorded cpu information (as used by the sanity checks). * As with the previous attempt at fixing up /proc/cpuinfo, the hardware field is removed. The only users so far are 32-bit applications tied to particular boards, so no portable applications should be affected, and this should prevent future tying to particular boards. The following differences remain: * No model_name is printed, as this cannot be queried from the hardware and cannot be provided in a stable fashion. Use of the CPU {implementor,variant,part,revision} fields is sufficient to identify a CPU and is portable across arm and arm64. * The following system-wide properties are not provided, as they are not possible to provide generally. Programs relying on these are already tied to particular (32-bit only) boards: - Hardware - Revision - Serial No software has yet been identified for which these remaining differences are problematic. Cc: Greg Hackmann Cc: Ian Campbell Cc: Serban Constantinescu Cc: Will Deacon Cc: cross-distro@lists.linaro.org Cc: linux-api@vger.kernel.org Cc: linux-arm-kernel@lists.infradead.org Cc: linux-kernel@vger.kernel.org Acked-by: Catalin Marinas [Mark: backport to v3.10.x] Signed-off-by: Mark Rutland Signed-off-by: Will Deacon Signed-off-by: Greg Kroah-Hartman --- arch/arm64/include/asm/cputype.h | 2 + arch/arm64/kernel/setup.c | 100 +++++++++++++++++++++++-------- arch/arm64/kernel/smp.c | 5 ++ 3 files changed, 82 insertions(+), 25 deletions(-) diff --git a/arch/arm64/include/asm/cputype.h b/arch/arm64/include/asm/cputype.h index cf2749488cd4..be9b5ca9a6c0 100644 --- a/arch/arm64/include/asm/cputype.h +++ b/arch/arm64/include/asm/cputype.h @@ -74,6 +74,8 @@ static inline u32 __attribute_const__ read_cpuid_cachetype(void) return read_cpuid(ID_CTR_EL0); } +void cpuinfo_store_cpu(void); + #endif /* __ASSEMBLY__ */ #endif diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c index add6ea616843..7cc551d1b0e1 100644 --- a/arch/arm64/kernel/setup.c +++ b/arch/arm64/kernel/setup.c @@ -41,6 +41,7 @@ #include #include #include +#include #include #include @@ -97,6 +98,19 @@ void __init early_print(const char *str, ...) printk("%s", buf); } +struct cpuinfo_arm64 { + struct cpu cpu; + u32 reg_midr; +}; + +static DEFINE_PER_CPU(struct cpuinfo_arm64, cpu_data); + +void cpuinfo_store_cpu(void) +{ + struct cpuinfo_arm64 *info = this_cpu_ptr(&cpu_data); + info->reg_midr = read_cpuid_id(); +} + static void __init setup_processor(void) { struct cpu_info *cpu_info; @@ -127,6 +141,8 @@ static void __init setup_machine_fdt(phys_addr_t dt_phys) struct boot_param_header *devtree; unsigned long dt_root; + cpuinfo_store_cpu(); + /* Check we have a non-NULL DT pointer */ if (!dt_phys) { early_print("\n" @@ -290,14 +306,12 @@ static int __init arm64_device_init(void) } arch_initcall(arm64_device_init); -static DEFINE_PER_CPU(struct cpu, cpu_data); - static int __init topology_init(void) { int i; for_each_possible_cpu(i) { - struct cpu *cpu = &per_cpu(cpu_data, i); + struct cpu *cpu = &per_cpu(cpu_data.cpu, i); cpu->hotpluggable = 1; register_cpu(cpu, i); } @@ -312,14 +326,41 @@ static const char *hwcap_str[] = { NULL }; +#ifdef CONFIG_COMPAT +static const char *compat_hwcap_str[] = { + "swp", + "half", + "thumb", + "26bit", + "fastmult", + "fpa", + "vfp", + "edsp", + "java", + "iwmmxt", + "crunch", + "thumbee", + "neon", + "vfpv3", + "vfpv3d16", + "tls", + "vfpv4", + "idiva", + "idivt", + "vfpd32", + "lpae", + "evtstrm" +}; +#endif /* CONFIG_COMPAT */ + static int c_show(struct seq_file *m, void *v) { - int i; - - seq_printf(m, "Processor\t: %s rev %d (%s)\n", - cpu_name, read_cpuid_id() & 15, ELF_PLATFORM); + int i, j; for_each_online_cpu(i) { + struct cpuinfo_arm64 *cpuinfo = &per_cpu(cpu_data, i); + u32 midr = cpuinfo->reg_midr; + /* * glibc reads /proc/cpuinfo to determine the number of * online processors, looking for lines beginning with @@ -328,28 +369,37 @@ static int c_show(struct seq_file *m, void *v) #ifdef CONFIG_SMP seq_printf(m, "processor\t: %d\n", i); #endif - seq_printf(m, "BogoMIPS\t: %lu.%02lu\n\n", + seq_printf(m, "BogoMIPS\t: %lu.%02lu\n", loops_per_jiffy / (500000UL/HZ), loops_per_jiffy / (5000UL/HZ) % 100); + + /* + * Dump out the common processor features in a single line. + * Userspace should read the hwcaps with getauxval(AT_HWCAP) + * rather than attempting to parse this, but there's a body of + * software which does already (at least for 32-bit). + */ + seq_puts(m, "Features\t:"); + if (personality(current->personality) == PER_LINUX32) { +#ifdef CONFIG_COMPAT + for (j = 0; compat_hwcap_str[j]; j++) + if (COMPAT_ELF_HWCAP & (1 << j)) + seq_printf(m, " %s", compat_hwcap_str[j]); +#endif /* CONFIG_COMPAT */ + } else { + for (j = 0; hwcap_str[j]; j++) + if (elf_hwcap & (1 << j)) + seq_printf(m, " %s", hwcap_str[j]); + } + seq_puts(m, "\n"); + + seq_printf(m, "CPU implementer\t: 0x%02x\n", (midr >> 24)); + seq_printf(m, "CPU architecture: 8\n"); + seq_printf(m, "CPU variant\t: 0x%x\n", ((midr >> 20) & 0xf)); + seq_printf(m, "CPU part\t: 0x%03x\n", ((midr >> 4) & 0xfff)); + seq_printf(m, "CPU revision\t: %d\n\n", (midr & 0xf)); } - /* dump out the processor features */ - seq_puts(m, "Features\t: "); - - for (i = 0; hwcap_str[i]; i++) - if (elf_hwcap & (1 << i)) - seq_printf(m, "%s ", hwcap_str[i]); - - seq_printf(m, "\nCPU implementer\t: 0x%02x\n", read_cpuid_id() >> 24); - seq_printf(m, "CPU architecture: AArch64\n"); - seq_printf(m, "CPU variant\t: 0x%x\n", (read_cpuid_id() >> 20) & 15); - seq_printf(m, "CPU part\t: 0x%03x\n", (read_cpuid_id() >> 4) & 0xfff); - seq_printf(m, "CPU revision\t: %d\n", read_cpuid_id() & 15); - - seq_puts(m, "\n"); - - seq_printf(m, "Hardware\t: %s\n", machine_name); - return 0; } diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c index 9c93e126328c..b0a8703a25ec 100644 --- a/arch/arm64/kernel/smp.c +++ b/arch/arm64/kernel/smp.c @@ -199,6 +199,11 @@ asmlinkage void __cpuinit secondary_start_kernel(void) raw_spin_lock(&boot_lock); raw_spin_unlock(&boot_lock); + /* + * Log the CPU info before it is marked online and might get read. + */ + cpuinfo_store_cpu(); + /* * OK, now it's safe to let the boot CPU continue. Wait for * the CPU migration code to notice that the CPU is online From 30d8c8352812e924bbee639ca3a25e35daf74db3 Mon Sep 17 00:00:00 2001 From: Dmitry Monakhov Date: Thu, 30 Oct 2014 10:53:16 -0400 Subject: [PATCH 010/204] ext4: prevent bugon on race between write/fcntl commit a41537e69b4aa43f0fea02498c2595a81267383b upstream. O_DIRECT flags can be toggeled via fcntl(F_SETFL). But this value checked twice inside ext4_file_write_iter() and __generic_file_write() which result in BUG_ON inside ext4_direct_IO. Let's initialize iocb->private unconditionally. TESTCASE: xfstest:generic/036 https://patchwork.ozlabs.org/patch/402445/ #TYPICAL STACK TRACE: kernel BUG at fs/ext4/inode.c:2960! invalid opcode: 0000 [#1] SMP Modules linked in: brd iTCO_wdt lpc_ich mfd_core igb ptp dm_mirror dm_region_hash dm_log dm_mod CPU: 6 PID: 5505 Comm: aio-dio-fcntl-r Not tainted 3.17.0-rc2-00176-gff5c017 #161 Hardware name: Intel Corporation W2600CR/W2600CR, BIOS SE5C600.86B.99.99.x028.061320111235 06/13/2011 task: ffff88080e95a7c0 ti: ffff88080f908000 task.ti: ffff88080f908000 RIP: 0010:[] [] ext4_direct_IO+0x162/0x3d0 RSP: 0018:ffff88080f90bb58 EFLAGS: 00010246 RAX: 0000000000000400 RBX: ffff88080fdb2a28 RCX: 00000000a802c818 RDX: 0000040000080000 RSI: ffff88080d8aeb80 RDI: 0000000000000001 RBP: ffff88080f90bbc8 R08: 0000000000000000 R09: 0000000000001581 R10: 0000000000000000 R11: 0000000000000000 R12: ffff88080d8aeb80 R13: ffff88080f90bbf8 R14: ffff88080fdb28c8 R15: ffff88080fdb2a28 FS: 00007f23b2055700(0000) GS:ffff880818400000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007f23b2045000 CR3: 000000080cedf000 CR4: 00000000000407e0 Stack: ffff88080f90bb98 0000000000000000 7ffffffffffffffe ffff88080fdb2c30 0000000000000200 0000000000000200 0000000000000001 0000000000000200 ffff88080f90bbc8 ffff88080fdb2c30 ffff88080f90be08 0000000000000200 Call Trace: [] generic_file_direct_write+0xed/0x180 [] __generic_file_write_iter+0x222/0x370 [] ext4_file_write_iter+0x34b/0x400 [] ? aio_run_iocb+0x239/0x410 [] ? aio_run_iocb+0x239/0x410 [] ? local_clock+0x25/0x30 [] ? __lock_acquire+0x274/0x700 [] ? ext4_unwritten_wait+0xb0/0xb0 [] aio_run_iocb+0x286/0x410 [] ? local_clock+0x25/0x30 [] ? lock_release_holdtime+0x29/0x190 [] ? lookup_ioctx+0x4b/0xf0 [] do_io_submit+0x55b/0x740 [] ? do_io_submit+0x3ca/0x740 [] SyS_io_submit+0x10/0x20 [] system_call_fastpath+0x16/0x1b Code: 01 48 8b 80 f0 01 00 00 48 8b 18 49 8b 45 10 0f 85 f1 01 00 00 48 03 45 c8 48 3b 43 48 0f 8f e3 01 00 00 49 83 7c 24 18 00 75 04 <0f> 0b eb fe f0 ff 83 ec 01 00 00 49 8b 44 24 18 8b 00 85 c0 89 RIP [] ext4_direct_IO+0x162/0x3d0 RSP Reported-by: Sasha Levin Signed-off-by: Theodore Ts'o Signed-off-by: Dmitry Monakhov [hujianyang: Backported to 3.10 - Move initialization of iocb->private to ext4_file_write() as we don't have ext4_file_write_iter(), which is introduced by commit 9b884164. - Adjust context to make 'overwrite' changes apply to ext4_file_dio_write() as ext4_file_dio_write() is not move into ext4_file_write()] Signed-off-by: hujianyang Signed-off-by: Greg Kroah-Hartman --- fs/ext4/file.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/fs/ext4/file.c b/fs/ext4/file.c index 4635788e14bf..ec9770f42538 100644 --- a/fs/ext4/file.c +++ b/fs/ext4/file.c @@ -100,7 +100,7 @@ ext4_file_dio_write(struct kiocb *iocb, const struct iovec *iov, struct blk_plug plug; int unaligned_aio = 0; ssize_t ret; - int overwrite = 0; + int *overwrite = iocb->private; size_t length = iov_length(iov, nr_segs); if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS) && @@ -118,8 +118,6 @@ ext4_file_dio_write(struct kiocb *iocb, const struct iovec *iov, mutex_lock(&inode->i_mutex); blk_start_plug(&plug); - iocb->private = &overwrite; - /* check whether we do a DIO overwrite or not */ if (ext4_should_dioread_nolock(inode) && !unaligned_aio && !file->f_mapping->nrpages && pos + length <= i_size_read(inode)) { @@ -143,7 +141,7 @@ ext4_file_dio_write(struct kiocb *iocb, const struct iovec *iov, * So we should check these two conditions. */ if (err == len && (map.m_flags & EXT4_MAP_MAPPED)) - overwrite = 1; + *overwrite = 1; } ret = __generic_file_aio_write(iocb, iov, nr_segs, &iocb->ki_pos); @@ -170,6 +168,7 @@ ext4_file_write(struct kiocb *iocb, const struct iovec *iov, { struct inode *inode = file_inode(iocb->ki_filp); ssize_t ret; + int overwrite = 0; /* * If we have encountered a bitmap-format file, the size limit @@ -190,6 +189,7 @@ ext4_file_write(struct kiocb *iocb, const struct iovec *iov, } } + iocb->private = &overwrite; if (unlikely(iocb->ki_filp->f_flags & O_DIRECT)) ret = ext4_file_dio_write(iocb, iov, nr_segs, pos); else From 1c3f3138ea06ce56ae257b8d39e3968e3d38cbd8 Mon Sep 17 00:00:00 2001 From: karl beldan Date: Thu, 29 Jan 2015 11:10:22 +0100 Subject: [PATCH 011/204] lib/checksum.c: fix build for generic csum_tcpudp_nofold commit 9ce357795ef208faa0d59894d9d119a7434e37f3 upstream. Fixed commit added from64to32 under _#ifndef do_csum_ but used it under _#ifndef csum_tcpudp_nofold_, breaking some builds (Fengguang's robot reported TILEGX's). Move from64to32 under the latter. Fixes: 150ae0e94634 ("lib/checksum.c: fix carry in csum_tcpudp_nofold") Reported-by: kbuild test robot Signed-off-by: Karl Beldan Cc: Eric Dumazet Cc: David S. Miller Signed-off-by: David S. Miller Cc: Guenter Roeck Signed-off-by: Greg Kroah-Hartman --- lib/checksum.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/lib/checksum.c b/lib/checksum.c index fcf38943132c..8b39e86dbab5 100644 --- a/lib/checksum.c +++ b/lib/checksum.c @@ -47,15 +47,6 @@ static inline unsigned short from32to16(unsigned int x) return x; } -static inline u32 from64to32(u64 x) -{ - /* add up 32-bit and 32-bit for 32+c bit */ - x = (x & 0xffffffff) + (x >> 32); - /* add up carry.. */ - x = (x & 0xffffffff) + (x >> 32); - return (u32)x; -} - static unsigned int do_csum(const unsigned char *buff, int len) { int odd; @@ -190,6 +181,15 @@ csum_partial_copy(const void *src, void *dst, int len, __wsum sum) EXPORT_SYMBOL(csum_partial_copy); #ifndef csum_tcpudp_nofold +static inline u32 from64to32(u64 x) +{ + /* add up 32-bit and 32-bit for 32+c bit */ + x = (x & 0xffffffff) + (x >> 32); + /* add up carry.. */ + x = (x & 0xffffffff) + (x >> 32); + return (u32)x; +} + __wsum csum_tcpudp_nofold(__be32 saddr, __be32 daddr, unsigned short len, unsigned short proto, From d9c3bfc0e830c37c12961a26311f603eaea24046 Mon Sep 17 00:00:00 2001 From: Bo Shen Date: Tue, 20 Jan 2015 15:43:16 +0800 Subject: [PATCH 012/204] ASoC: atmel_ssc_dai: fix start event for I2S mode commit a43bd7e125143b875caae6d4f9938855b440faaf upstream. According to the I2S specification information as following: - WS = 0, channel 1 (left) - WS = 1, channel 2 (right) So, the start event should be TF/RF falling edge. Reported-by: Songjun Wu Signed-off-by: Bo Shen Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- sound/soc/atmel/atmel_ssc_dai.c | 18 ++++-------------- 1 file changed, 4 insertions(+), 14 deletions(-) diff --git a/sound/soc/atmel/atmel_ssc_dai.c b/sound/soc/atmel/atmel_ssc_dai.c index f3fdfa07fcb9..c5f79591e68b 100644 --- a/sound/soc/atmel/atmel_ssc_dai.c +++ b/sound/soc/atmel/atmel_ssc_dai.c @@ -331,7 +331,6 @@ static int atmel_ssc_hw_params(struct snd_pcm_substream *substream, struct atmel_pcm_dma_params *dma_params; int dir, channels, bits; u32 tfmr, rfmr, tcmr, rcmr; - int start_event; int ret; /* @@ -450,19 +449,10 @@ static int atmel_ssc_hw_params(struct snd_pcm_substream *substream, * The SSC transmit clock is obtained from the BCLK signal on * on the TK line, and the SSC receive clock is * generated from the transmit clock. - * - * For single channel data, one sample is transferred - * on the falling edge of the LRC clock. - * For two channel data, one sample is - * transferred on both edges of the LRC clock. */ - start_event = ((channels == 1) - ? SSC_START_FALLING_RF - : SSC_START_EDGE_RF); - rcmr = SSC_BF(RCMR_PERIOD, 0) | SSC_BF(RCMR_STTDLY, START_DELAY) - | SSC_BF(RCMR_START, start_event) + | SSC_BF(RCMR_START, SSC_START_FALLING_RF) | SSC_BF(RCMR_CKI, SSC_CKI_RISING) | SSC_BF(RCMR_CKO, SSC_CKO_NONE) | SSC_BF(RCMR_CKS, SSC_CKS_CLOCK); @@ -470,14 +460,14 @@ static int atmel_ssc_hw_params(struct snd_pcm_substream *substream, rfmr = SSC_BF(RFMR_FSEDGE, SSC_FSEDGE_POSITIVE) | SSC_BF(RFMR_FSOS, SSC_FSOS_NONE) | SSC_BF(RFMR_FSLEN, 0) - | SSC_BF(RFMR_DATNB, 0) + | SSC_BF(RFMR_DATNB, (channels - 1)) | SSC_BIT(RFMR_MSBF) | SSC_BF(RFMR_LOOP, 0) | SSC_BF(RFMR_DATLEN, (bits - 1)); tcmr = SSC_BF(TCMR_PERIOD, 0) | SSC_BF(TCMR_STTDLY, START_DELAY) - | SSC_BF(TCMR_START, start_event) + | SSC_BF(TCMR_START, SSC_START_FALLING_RF) | SSC_BF(TCMR_CKI, SSC_CKI_FALLING) | SSC_BF(TCMR_CKO, SSC_CKO_NONE) | SSC_BF(TCMR_CKS, SSC_CKS_PIN); @@ -486,7 +476,7 @@ static int atmel_ssc_hw_params(struct snd_pcm_substream *substream, | SSC_BF(TFMR_FSDEN, 0) | SSC_BF(TFMR_FSOS, SSC_FSOS_NONE) | SSC_BF(TFMR_FSLEN, 0) - | SSC_BF(TFMR_DATNB, 0) + | SSC_BF(TFMR_DATNB, (channels - 1)) | SSC_BIT(TFMR_MSBF) | SSC_BF(TFMR_DATDEF, 0) | SSC_BF(TFMR_DATLEN, (bits - 1)); From 48cc051f29814e8c8d4498a4fb99204fa31e30e2 Mon Sep 17 00:00:00 2001 From: Eric Nelson Date: Fri, 30 Jan 2015 14:07:55 -0700 Subject: [PATCH 013/204] ASoC: sgtl5000: add delay before first I2C access commit 58cc9c9a175885bbf6bae3acf18233d0a8229a84 upstream. To quote from section 1.3.1 of the data sheet: The SGTL5000 has an internal reset that is deasserted 8 SYS_MCLK cycles after all power rails have been brought up. After this time, communication can start ... 1.0us represents 8 SYS_MCLK cycles at the minimum 8.0 MHz SYS_MCLK. Signed-off-by: Eric Nelson Reviewed-by: Fabio Estevam Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- sound/soc/codecs/sgtl5000.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/sound/soc/codecs/sgtl5000.c b/sound/soc/codecs/sgtl5000.c index 23670737116e..3b5f565a23a5 100644 --- a/sound/soc/codecs/sgtl5000.c +++ b/sound/soc/codecs/sgtl5000.c @@ -1242,6 +1242,9 @@ static int sgtl5000_enable_regulators(struct snd_soc_codec *codec) /* wait for all power rails bring up */ udelay(10); + /* Need 8 clocks before I2C accesses */ + udelay(1); + /* read chip information */ reg = snd_soc_read(codec, SGTL5000_CHIP_ID); if (((reg & SGTL5000_PARTID_MASK) >> SGTL5000_PARTID_SHIFT) != From 15a9c9addacfc62780b7908e15e4fc87b508791f Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 13 Jan 2015 10:53:20 +0100 Subject: [PATCH 014/204] ALSA: ak411x: Fix stall in work callback commit 4161b4505f1690358ac0a9ee59845a7887336b21 upstream. When ak4114 work calls its callback and the callback invokes ak4114_reinit(), it stalls due to flush_delayed_work(). For avoiding this, control the reentrance by introducing a refcount. Also flush_delayed_work() is replaced with cancel_delayed_work_sync(). The exactly same bug is present in ak4113.c and fixed as well. Reported-by: Pavel Hofman Acked-by: Jaroslav Kysela Tested-by: Pavel Hofman Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- include/sound/ak4113.h | 2 +- include/sound/ak4114.h | 2 +- sound/i2c/other/ak4113.c | 17 ++++++++--------- sound/i2c/other/ak4114.c | 18 ++++++++---------- 4 files changed, 18 insertions(+), 21 deletions(-) diff --git a/include/sound/ak4113.h b/include/sound/ak4113.h index 2609048c1d44..3a34f6edc2d1 100644 --- a/include/sound/ak4113.h +++ b/include/sound/ak4113.h @@ -286,7 +286,7 @@ struct ak4113 { ak4113_write_t *write; ak4113_read_t *read; void *private_data; - unsigned int init:1; + atomic_t wq_processing; spinlock_t lock; unsigned char regmap[AK4113_WRITABLE_REGS]; struct snd_kcontrol *kctls[AK4113_CONTROLS]; diff --git a/include/sound/ak4114.h b/include/sound/ak4114.h index 3ce69fd92523..69441161009c 100644 --- a/include/sound/ak4114.h +++ b/include/sound/ak4114.h @@ -168,7 +168,7 @@ struct ak4114 { ak4114_write_t * write; ak4114_read_t * read; void * private_data; - unsigned int init: 1; + atomic_t wq_processing; spinlock_t lock; unsigned char regmap[7]; unsigned char txcsb[5]; diff --git a/sound/i2c/other/ak4113.c b/sound/i2c/other/ak4113.c index e04e750a77ed..7a9149bb2a38 100644 --- a/sound/i2c/other/ak4113.c +++ b/sound/i2c/other/ak4113.c @@ -56,8 +56,7 @@ static inline unsigned char reg_read(struct ak4113 *ak4113, unsigned char reg) static void snd_ak4113_free(struct ak4113 *chip) { - chip->init = 1; /* don't schedule new work */ - mb(); + atomic_inc(&chip->wq_processing); /* don't schedule new work */ cancel_delayed_work_sync(&chip->work); kfree(chip); } @@ -89,6 +88,7 @@ int snd_ak4113_create(struct snd_card *card, ak4113_read_t *read, chip->write = write; chip->private_data = private_data; INIT_DELAYED_WORK(&chip->work, ak4113_stats); + atomic_set(&chip->wq_processing, 0); for (reg = 0; reg < AK4113_WRITABLE_REGS ; reg++) chip->regmap[reg] = pgm[reg]; @@ -139,13 +139,11 @@ static void ak4113_init_regs(struct ak4113 *chip) void snd_ak4113_reinit(struct ak4113 *chip) { - chip->init = 1; - mb(); - flush_delayed_work(&chip->work); + if (atomic_inc_return(&chip->wq_processing) == 1) + cancel_delayed_work_sync(&chip->work); ak4113_init_regs(chip); /* bring up statistics / event queing */ - chip->init = 0; - if (chip->kctls[0]) + if (atomic_dec_and_test(&chip->wq_processing)) schedule_delayed_work(&chip->work, HZ / 10); } EXPORT_SYMBOL_GPL(snd_ak4113_reinit); @@ -632,8 +630,9 @@ static void ak4113_stats(struct work_struct *work) { struct ak4113 *chip = container_of(work, struct ak4113, work.work); - if (!chip->init) + if (atomic_inc_return(&chip->wq_processing) == 1) snd_ak4113_check_rate_and_errors(chip, chip->check_flags); - schedule_delayed_work(&chip->work, HZ / 10); + if (atomic_dec_and_test(&chip->wq_processing)) + schedule_delayed_work(&chip->work, HZ / 10); } diff --git a/sound/i2c/other/ak4114.c b/sound/i2c/other/ak4114.c index 5bf4fca19e48..84a1ee7af552 100644 --- a/sound/i2c/other/ak4114.c +++ b/sound/i2c/other/ak4114.c @@ -66,8 +66,7 @@ static void reg_dump(struct ak4114 *ak4114) static void snd_ak4114_free(struct ak4114 *chip) { - chip->init = 1; /* don't schedule new work */ - mb(); + atomic_inc(&chip->wq_processing); /* don't schedule new work */ cancel_delayed_work_sync(&chip->work); kfree(chip); } @@ -100,6 +99,7 @@ int snd_ak4114_create(struct snd_card *card, chip->write = write; chip->private_data = private_data; INIT_DELAYED_WORK(&chip->work, ak4114_stats); + atomic_set(&chip->wq_processing, 0); for (reg = 0; reg < 7; reg++) chip->regmap[reg] = pgm[reg]; @@ -152,13 +152,11 @@ static void ak4114_init_regs(struct ak4114 *chip) void snd_ak4114_reinit(struct ak4114 *chip) { - chip->init = 1; - mb(); - flush_delayed_work(&chip->work); + if (atomic_inc_return(&chip->wq_processing) == 1) + cancel_delayed_work_sync(&chip->work); ak4114_init_regs(chip); /* bring up statistics / event queing */ - chip->init = 0; - if (chip->kctls[0]) + if (atomic_dec_and_test(&chip->wq_processing)) schedule_delayed_work(&chip->work, HZ / 10); } @@ -612,10 +610,10 @@ static void ak4114_stats(struct work_struct *work) { struct ak4114 *chip = container_of(work, struct ak4114, work.work); - if (!chip->init) + if (atomic_inc_return(&chip->wq_processing) == 1) snd_ak4114_check_rate_and_errors(chip, chip->check_flags); - - schedule_delayed_work(&chip->work, HZ / 10); + if (atomic_dec_and_test(&chip->wq_processing)) + schedule_delayed_work(&chip->work, HZ / 10); } EXPORT_SYMBOL(snd_ak4114_create); From 677616e3ecaa43e77a81e36aa162d32bdba01476 Mon Sep 17 00:00:00 2001 From: Lai Jiangshan Date: Thu, 31 Jul 2014 11:30:17 +0800 Subject: [PATCH 015/204] smpboot: Add missing get_online_cpus() in smpboot_register_percpu_thread() commit 4bee96860a65c3a62d332edac331b3cf936ba3ad upstream. The following race exists in the smpboot percpu threads management: CPU0 CPU1 cpu_up(2) get_online_cpus(); smpboot_create_threads(2); smpboot_register_percpu_thread(); for_each_online_cpu(); __smpboot_create_thread(); __cpu_up(2); This results in a missing per cpu thread for the newly onlined cpu2 and in a NULL pointer dereference on a consecutive offline of that cpu. Proctect smpboot_register_percpu_thread() with get_online_cpus() to prevent that. [ tglx: Massaged changelog and removed the change in smpboot_unregister_percpu_thread() because that's an optimization and therefor not stable material. ] Signed-off-by: Lai Jiangshan Cc: Thomas Gleixner Cc: Rusty Russell Cc: Peter Zijlstra Cc: Srivatsa S. Bhat Cc: David Rientjes Link: http://lkml.kernel.org/r/1406777421-12830-1-git-send-email-laijs@cn.fujitsu.com Signed-off-by: Thomas Gleixner Signed-off-by: Greg Kroah-Hartman --- kernel/smpboot.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/kernel/smpboot.c b/kernel/smpboot.c index 02fc5c933673..7020eecb398b 100644 --- a/kernel/smpboot.c +++ b/kernel/smpboot.c @@ -279,6 +279,7 @@ int smpboot_register_percpu_thread(struct smp_hotplug_thread *plug_thread) unsigned int cpu; int ret = 0; + get_online_cpus(); mutex_lock(&smpboot_threads_lock); for_each_online_cpu(cpu) { ret = __smpboot_create_thread(plug_thread, cpu); @@ -291,6 +292,7 @@ int smpboot_register_percpu_thread(struct smp_hotplug_thread *plug_thread) list_add(&plug_thread->list, &hotplug_threads); out: mutex_unlock(&smpboot_threads_lock); + put_online_cpus(); return ret; } EXPORT_SYMBOL_GPL(smpboot_register_percpu_thread); From f9e5b0ded4be49805adcceff46c1f44137773b24 Mon Sep 17 00:00:00 2001 From: Petr Matousek Date: Tue, 23 Sep 2014 20:22:30 +0200 Subject: [PATCH 016/204] kvm: vmx: handle invvpid vm exit gracefully commit a642fc305053cc1c6e47e4f4df327895747ab485 upstream. On systems with invvpid instruction support (corresponding bit in IA32_VMX_EPT_VPID_CAP MSR is set) guest invocation of invvpid causes vm exit, which is currently not handled and results in propagation of unknown exit to userspace. Fix this by installing an invvpid vm exit handler. This is CVE-2014-3646. Cc: stable@vger.kernel.org Signed-off-by: Petr Matousek Signed-off-by: Paolo Bonzini [wangkai: Backport to 3.10: adjust context] Signed-off-by: Wang Kai Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/uapi/asm/vmx.h | 2 ++ arch/x86/kvm/vmx.c | 9 ++++++++- 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/arch/x86/include/uapi/asm/vmx.h b/arch/x86/include/uapi/asm/vmx.h index 7a34e8fe54bd..dccea7f29051 100644 --- a/arch/x86/include/uapi/asm/vmx.h +++ b/arch/x86/include/uapi/asm/vmx.h @@ -67,6 +67,7 @@ #define EXIT_REASON_EPT_MISCONFIG 49 #define EXIT_REASON_INVEPT 50 #define EXIT_REASON_PREEMPTION_TIMER 52 +#define EXIT_REASON_INVVPID 53 #define EXIT_REASON_WBINVD 54 #define EXIT_REASON_XSETBV 55 #define EXIT_REASON_APIC_WRITE 56 @@ -112,6 +113,7 @@ { EXIT_REASON_EOI_INDUCED, "EOI_INDUCED" }, \ { EXIT_REASON_INVALID_STATE, "INVALID_STATE" }, \ { EXIT_REASON_INVD, "INVD" }, \ + { EXIT_REASON_INVVPID, "INVVPID" }, \ { EXIT_REASON_INVPCID, "INVPCID" }, \ { EXIT_REASON_PREEMPTION_TIMER, "PREEMPTION_TIMER" } diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 7112be5f1eaf..b8a0ae75619f 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -6248,6 +6248,12 @@ static int handle_invept(struct kvm_vcpu *vcpu) return 1; } +static int handle_invvpid(struct kvm_vcpu *vcpu) +{ + kvm_queue_exception(vcpu, UD_VECTOR); + return 1; +} + /* * The exit handlers return 1 if the exit was handled fully and guest execution * may resume. Otherwise they set the kvm_run parameter to indicate what needs @@ -6293,6 +6299,7 @@ static int (*const kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu) = { [EXIT_REASON_MWAIT_INSTRUCTION] = handle_invalid_op, [EXIT_REASON_MONITOR_INSTRUCTION] = handle_invalid_op, [EXIT_REASON_INVEPT] = handle_invept, + [EXIT_REASON_INVVPID] = handle_invvpid, }; static const int kvm_vmx_max_exit_handlers = @@ -6519,7 +6526,7 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu) case EXIT_REASON_VMPTRST: case EXIT_REASON_VMREAD: case EXIT_REASON_VMRESUME: case EXIT_REASON_VMWRITE: case EXIT_REASON_VMOFF: case EXIT_REASON_VMON: - case EXIT_REASON_INVEPT: + case EXIT_REASON_INVEPT: case EXIT_REASON_INVVPID: /* * VMX instructions trap unconditionally. This allows L1 to * emulate them for its L2 guest, i.e., allows 3-level nesting! From 747a43ad696d6112d99d68c8462e4ddab0b7bab2 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Wed, 8 Oct 2014 09:02:13 -0700 Subject: [PATCH 017/204] x86,kvm,vmx: Preserve CR4 across VM entry commit d974baa398f34393db76be45f7d4d04fbdbb4a0a upstream. CR4 isn't constant; at least the TSD and PCE bits can vary. TBH, treating CR0 and CR3 as constant scares me a bit, too, but it looks like it's correct. This adds a branch and a read from cr4 to each vm entry. Because it is extremely likely that consecutive entries into the same vcpu will have the same host cr4 value, this fixes up the vmcs instead of restoring cr4 after the fact. A subsequent patch will add a kernel-wide cr4 shadow, reducing the overhead in the common case to just two memory reads and a branch. Signed-off-by: Andy Lutomirski Acked-by: Paolo Bonzini Cc: Petr Matousek Cc: Gleb Natapov Signed-off-by: Linus Torvalds [wangkai: Backport to 3.10: adjust context] Signed-off-by: Wang Kai Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/vmx.c | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index b8a0ae75619f..04cc2fa7744f 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -438,6 +438,7 @@ struct vcpu_vmx { #endif int gs_ldt_reload_needed; int fs_reload_needed; + unsigned long vmcs_host_cr4; /* May not match real cr4 */ } host_state; struct { int vm86_active; @@ -4076,11 +4077,16 @@ static void vmx_set_constant_host_state(struct vcpu_vmx *vmx) u32 low32, high32; unsigned long tmpl; struct desc_ptr dt; + unsigned long cr4; vmcs_writel(HOST_CR0, read_cr0() & ~X86_CR0_TS); /* 22.2.3 */ - vmcs_writel(HOST_CR4, read_cr4()); /* 22.2.3, 22.2.5 */ vmcs_writel(HOST_CR3, read_cr3()); /* 22.2.3 FIXME: shadow tables */ + /* Save the most likely value for this task's CR4 in the VMCS. */ + cr4 = read_cr4(); + vmcs_writel(HOST_CR4, cr4); /* 22.2.3, 22.2.5 */ + vmx->host_state.vmcs_host_cr4 = cr4; + vmcs_write16(HOST_CS_SELECTOR, __KERNEL_CS); /* 22.2.4 */ #ifdef CONFIG_X86_64 /* @@ -6971,7 +6977,7 @@ static void atomic_switch_perf_msrs(struct vcpu_vmx *vmx) static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) { struct vcpu_vmx *vmx = to_vmx(vcpu); - unsigned long debugctlmsr; + unsigned long debugctlmsr, cr4; /* Record the guest's net vcpu time for enforced NMI injections. */ if (unlikely(!cpu_has_virtual_nmis() && vmx->soft_vnmi_blocked)) @@ -6992,6 +6998,12 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) if (test_bit(VCPU_REGS_RIP, (unsigned long *)&vcpu->arch.regs_dirty)) vmcs_writel(GUEST_RIP, vcpu->arch.regs[VCPU_REGS_RIP]); + cr4 = read_cr4(); + if (unlikely(cr4 != vmx->host_state.vmcs_host_cr4)) { + vmcs_writel(HOST_CR4, cr4); + vmx->host_state.vmcs_host_cr4 = cr4; + } + /* When single-stepping over STI and MOV SS, we must clear the * corresponding interruptibility bits in the guest state. Otherwise * vmentry fails as it then expects bit 14 (BS) in pending debug From 967d2ebb7c8d8a3903fc7da303390178a63bb2af Mon Sep 17 00:00:00 2001 From: Mathias Krause Date: Tue, 10 Feb 2015 01:14:07 +0100 Subject: [PATCH 018/204] crypto: crc32c - add missing crypto module alias MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The backport of commit 5d26a105b5a7 ("crypto: prefix module autoloading with "crypto-"") lost the MODULE_ALIAS_CRYPTO() annotation of crc32c.c. Add it to fix the reported filesystem related regressions. Signed-off-by: Mathias Krause Reported-by: Philip Müller Cc: Kees Cook Cc: Rob McCathie Cc: Luis Henriques Cc: Kamal Mostafa Cc: Jiri Slaby Signed-off-by: Greg Kroah-Hartman --- crypto/crc32c.c | 1 + 1 file changed, 1 insertion(+) diff --git a/crypto/crc32c.c b/crypto/crc32c.c index 06f7018c9d95..238f0e627ef3 100644 --- a/crypto/crc32c.c +++ b/crypto/crc32c.c @@ -170,3 +170,4 @@ module_exit(crc32c_mod_fini); MODULE_AUTHOR("Clay Haapala "); MODULE_DESCRIPTION("CRC32c (Castagnoli) calculations wrapper for lib/crc32c"); MODULE_LICENSE("GPL"); +MODULE_ALIAS_CRYPTO("crc32c"); From 5cfc71ce138e79ceb6250f78137dd05ba52e9d34 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 11 Feb 2015 14:48:30 +0800 Subject: [PATCH 019/204] Linux 3.10.69 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index dd67be657716..81ede20061cf 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,6 @@ VERSION = 3 PATCHLEVEL = 10 -SUBLEVEL = 68 +SUBLEVEL = 69 EXTRAVERSION = NAME = TOSSUG Baby Fish From 1d480edb0cee748ef61b4b8f7b21ab2d1c3ff0a2 Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Thu, 15 Jan 2015 13:18:40 -0500 Subject: [PATCH 020/204] ip: zero sockaddr returned on error queue [ Upstream commit f812116b174e59a350acc8e4856213a166a91222 ] The sockaddr is returned in IP(V6)_RECVERR as part of errhdr. That structure is defined and allocated on the stack as struct { struct sock_extended_err ee; struct sockaddr_in(6) offender; } errhdr; The second part is only initialized for certain SO_EE_ORIGIN values. Always initialize it completely. An MTU exceeded error on a SOCK_RAW/IPPROTO_RAW is one example that would return uninitialized bytes. Signed-off-by: Willem de Bruijn ---- Also verified that there is no padding between errhdr.ee and errhdr.offender that could leak additional kernel data. Acked-by: Eric Dumazet Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/ip_sockglue.c | 8 ++------ net/ipv6/datagram.c | 10 +++------- 2 files changed, 5 insertions(+), 13 deletions(-) diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index 23e6ab0a2dc0..f6603142cb33 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c @@ -410,15 +410,11 @@ int ip_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len) memcpy(&errhdr.ee, &serr->ee, sizeof(struct sock_extended_err)); sin = &errhdr.offender; - sin->sin_family = AF_UNSPEC; + memset(sin, 0, sizeof(*sin)); if (serr->ee.ee_origin == SO_EE_ORIGIN_ICMP) { - struct inet_sock *inet = inet_sk(sk); - sin->sin_family = AF_INET; sin->sin_addr.s_addr = ip_hdr(skb)->saddr; - sin->sin_port = 0; - memset(&sin->sin_zero, 0, sizeof(sin->sin_zero)); - if (inet->cmsg_flags) + if (inet_sk(sk)->cmsg_flags) ip_cmsg_recv(msg, skb); } diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c index 8997340e3742..ce17d3da9b2b 100644 --- a/net/ipv6/datagram.c +++ b/net/ipv6/datagram.c @@ -374,11 +374,10 @@ int ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len) memcpy(&errhdr.ee, &serr->ee, sizeof(struct sock_extended_err)); sin = &errhdr.offender; - sin->sin6_family = AF_UNSPEC; + memset(sin, 0, sizeof(*sin)); + if (serr->ee.ee_origin != SO_EE_ORIGIN_LOCAL) { sin->sin6_family = AF_INET6; - sin->sin6_flowinfo = 0; - sin->sin6_port = 0; if (skb->protocol == htons(ETH_P_IPV6)) { sin->sin6_addr = ipv6_hdr(skb)->saddr; if (np->rxopt.all) @@ -387,12 +386,9 @@ int ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len) ipv6_iface_scope_id(&sin->sin6_addr, IP6CB(skb)->iif); } else { - struct inet_sock *inet = inet_sk(sk); - ipv6_addr_set_v4mapped(ip_hdr(skb)->saddr, &sin->sin6_addr); - sin->sin6_scope_id = 0; - if (inet->cmsg_flags) + if (inet_sk(sk)->cmsg_flags) ip_cmsg_recv(msg, skb); } } From 06b5ff9f351205b2900c9629addf74a4c875b12c Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 15 Jan 2015 17:04:22 -0800 Subject: [PATCH 021/204] net: rps: fix cpu unplug [ Upstream commit ac64da0b83d82abe62f78b3d0e21cca31aea24fa ] softnet_data.input_pkt_queue is protected by a spinlock that we must hold when transferring packets from victim queue to an active one. This is because other cpus could still be trying to enqueue packets into victim queue. A second problem is that when we transfert the NAPI poll_list from victim to current cpu, we absolutely need to special case the percpu backlog, because we do not want to add complex locking to protect process_queue : Only owner cpu is allowed to manipulate it, unless cpu is offline. Based on initial patch from Prasad Sodagudi & Subash Abhinov Kasiviswanathan. This version is better because we do not slow down packet processing, only make migration safer. Reported-by: Prasad Sodagudi Reported-by: Subash Abhinov Kasiviswanathan Signed-off-by: Eric Dumazet Cc: Tom Herbert Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/core/dev.c | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/net/core/dev.c b/net/core/dev.c index cca7ae0ba915..c310440309bb 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -6015,10 +6015,20 @@ static int dev_cpu_callback(struct notifier_block *nfb, oldsd->output_queue = NULL; oldsd->output_queue_tailp = &oldsd->output_queue; } - /* Append NAPI poll list from offline CPU. */ - if (!list_empty(&oldsd->poll_list)) { - list_splice_init(&oldsd->poll_list, &sd->poll_list); - raise_softirq_irqoff(NET_RX_SOFTIRQ); + /* Append NAPI poll list from offline CPU, with one exception : + * process_backlog() must be called by cpu owning percpu backlog. + * We properly handle process_queue & input_pkt_queue later. + */ + while (!list_empty(&oldsd->poll_list)) { + struct napi_struct *napi = list_first_entry(&oldsd->poll_list, + struct napi_struct, + poll_list); + + list_del_init(&napi->poll_list); + if (napi->poll == process_backlog) + napi->state = 0; + else + ____napi_schedule(sd, napi); } raise_softirq_irqoff(NET_TX_SOFTIRQ); @@ -6029,7 +6039,7 @@ static int dev_cpu_callback(struct notifier_block *nfb, netif_rx(skb); input_queue_head_incr(oldsd); } - while ((skb = __skb_dequeue(&oldsd->input_pkt_queue))) { + while ((skb = skb_dequeue(&oldsd->input_pkt_queue))) { netif_rx(skb); input_queue_head_incr(oldsd); } From fa3f55df7d5ae2d978024dda5b236c645a7c7819 Mon Sep 17 00:00:00 2001 From: Hagen Paul Pfeifer Date: Thu, 15 Jan 2015 22:34:25 +0100 Subject: [PATCH 022/204] ipv6: stop sending PTB packets for MTU < 1280 [ Upstream commit 9d289715eb5c252ae15bd547cb252ca547a3c4f2 ] Reduce the attack vector and stop generating IPv6 Fragment Header for paths with an MTU smaller than the minimum required IPv6 MTU size (1280 byte) - called atomic fragments. See IETF I-D "Deprecating the Generation of IPv6 Atomic Fragments" [1] for more information and how this "feature" can be misused. [1] https://tools.ietf.org/html/draft-ietf-6man-deprecate-atomfrag-generation-00 Signed-off-by: Fernando Gont Signed-off-by: Hagen Paul Pfeifer Acked-by: Hannes Frederic Sowa Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv6/route.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/net/ipv6/route.c b/net/ipv6/route.c index b2614b22622b..92274796eb71 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -1141,12 +1141,9 @@ static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk, struct net *net = dev_net(dst->dev); rt6->rt6i_flags |= RTF_MODIFIED; - if (mtu < IPV6_MIN_MTU) { - u32 features = dst_metric(dst, RTAX_FEATURES); + if (mtu < IPV6_MIN_MTU) mtu = IPV6_MIN_MTU; - features |= RTAX_FEATURE_ALLFRAG; - dst_metric_set(dst, RTAX_FEATURES, features); - } + dst_metric_set(dst, RTAX_MTU, mtu); rt6_update_expires(rt6, net->ipv6.sysctl.ip6_rt_mtu_expires); } From e98d2751bec14b3279edaaa0e5f6584254e13126 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 22 Jan 2015 07:56:18 -0800 Subject: [PATCH 023/204] netxen: fix netxen_nic_poll() logic [ Upstream commit 6088beef3f7517717bd21d90b379714dd0837079 ] NAPI poll logic now enforces that a poller returns exactly the budget when it wants to be called again. If a driver limits TX completion, it has to return budget as well when the limit is hit, not the number of received packets. Reported-and-tested-by: Mike Galbraith Signed-off-by: Eric Dumazet Fixes: d75b1ade567f ("net: less interrupt masking in NAPI") Cc: Manish Chopra Acked-by: Manish Chopra Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c b/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c index af951f343ff6..50104a7e963f 100644 --- a/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c +++ b/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c @@ -2315,7 +2315,10 @@ static int netxen_nic_poll(struct napi_struct *napi, int budget) work_done = netxen_process_rcv_ring(sds_ring, budget); - if ((work_done < budget) && tx_complete) { + if (!tx_complete) + work_done = budget; + + if (work_done < budget) { napi_complete(&sds_ring->napi); if (test_bit(__NX_DEV_UP, &adapter->state)) netxen_nic_enable_int(sds_ring); From 727ab4c06af65c1b2313c95dfcd8827318d5a438 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Thu, 22 Jan 2015 18:26:54 +0100 Subject: [PATCH 024/204] net: sctp: fix slab corruption from use after free on INIT collisions [ Upstream commit 600ddd6825543962fb807884169e57b580dba208 ] When hitting an INIT collision case during the 4WHS with AUTH enabled, as already described in detail in commit 1be9a950c646 ("net: sctp: inherit auth_capable on INIT collisions"), it can happen that we occasionally still remotely trigger the following panic on server side which seems to have been uncovered after the fix from commit 1be9a950c646 ... [ 533.876389] BUG: unable to handle kernel paging request at 00000000ffffffff [ 533.913657] IP: [] __kmalloc+0x95/0x230 [ 533.940559] PGD 5030f2067 PUD 0 [ 533.957104] Oops: 0000 [#1] SMP [ 533.974283] Modules linked in: sctp mlx4_en [...] [ 534.939704] Call Trace: [ 534.951833] [] ? crypto_init_shash_ops+0x60/0xf0 [ 534.984213] [] crypto_init_shash_ops+0x60/0xf0 [ 535.015025] [] __crypto_alloc_tfm+0x6d/0x170 [ 535.045661] [] crypto_alloc_base+0x4c/0xb0 [ 535.074593] [] ? _raw_spin_lock_bh+0x12/0x50 [ 535.105239] [] sctp_inet_listen+0x161/0x1e0 [sctp] [ 535.138606] [] SyS_listen+0x9d/0xb0 [ 535.166848] [] system_call_fastpath+0x16/0x1b ... or depending on the the application, for example this one: [ 1370.026490] BUG: unable to handle kernel paging request at 00000000ffffffff [ 1370.026506] IP: [] kmem_cache_alloc+0x75/0x1d0 [ 1370.054568] PGD 633c94067 PUD 0 [ 1370.070446] Oops: 0000 [#1] SMP [ 1370.085010] Modules linked in: sctp kvm_amd kvm [...] [ 1370.963431] Call Trace: [ 1370.974632] [] ? SyS_epoll_ctl+0x53f/0x960 [ 1371.000863] [] SyS_epoll_ctl+0x53f/0x960 [ 1371.027154] [] ? anon_inode_getfile+0xd3/0x170 [ 1371.054679] [] ? __alloc_fd+0xa7/0x130 [ 1371.080183] [] system_call_fastpath+0x16/0x1b With slab debugging enabled, we can see that the poison has been overwritten: [ 669.826368] BUG kmalloc-128 (Tainted: G W ): Poison overwritten [ 669.826385] INFO: 0xffff880228b32e50-0xffff880228b32e50. First byte 0x6a instead of 0x6b [ 669.826414] INFO: Allocated in sctp_auth_create_key+0x23/0x50 [sctp] age=3 cpu=0 pid=18494 [ 669.826424] __slab_alloc+0x4bf/0x566 [ 669.826433] __kmalloc+0x280/0x310 [ 669.826453] sctp_auth_create_key+0x23/0x50 [sctp] [ 669.826471] sctp_auth_asoc_create_secret+0xcb/0x1e0 [sctp] [ 669.826488] sctp_auth_asoc_init_active_key+0x68/0xa0 [sctp] [ 669.826505] sctp_do_sm+0x29d/0x17c0 [sctp] [...] [ 669.826629] INFO: Freed in kzfree+0x31/0x40 age=1 cpu=0 pid=18494 [ 669.826635] __slab_free+0x39/0x2a8 [ 669.826643] kfree+0x1d6/0x230 [ 669.826650] kzfree+0x31/0x40 [ 669.826666] sctp_auth_key_put+0x19/0x20 [sctp] [ 669.826681] sctp_assoc_update+0x1ee/0x2d0 [sctp] [ 669.826695] sctp_do_sm+0x674/0x17c0 [sctp] Since this only triggers in some collision-cases with AUTH, the problem at heart is that sctp_auth_key_put() on asoc->asoc_shared_key is called twice when having refcnt 1, once directly in sctp_assoc_update() and yet again from within sctp_auth_asoc_init_active_key() via sctp_assoc_update() on the already kzfree'd memory, which is also consistent with the observation of the poison decrease from 0x6b to 0x6a (note: the overwrite is detected at a later point in time when poison is checked on new allocation). Reference counting of auth keys revisited: Shared keys for AUTH chunks are being stored in endpoints and associations in endpoint_shared_keys list. On endpoint creation, a null key is being added; on association creation, all endpoint shared keys are being cached and thus cloned over to the association. struct sctp_shared_key only holds a pointer to the actual key bytes, that is, struct sctp_auth_bytes which keeps track of users internally through refcounting. Naturally, on assoc or enpoint destruction, sctp_shared_key are being destroyed directly and the reference on sctp_auth_bytes dropped. User space can add keys to either list via setsockopt(2) through struct sctp_authkey and by passing that to sctp_auth_set_key() which replaces or adds a new auth key. There, sctp_auth_create_key() creates a new sctp_auth_bytes with refcount 1 and in case of replacement drops the reference on the old sctp_auth_bytes. A key can be set active from user space through setsockopt() on the id via sctp_auth_set_active_key(), which iterates through either endpoint_shared_keys and in case of an assoc, invokes (one of various places) sctp_auth_asoc_init_active_key(). sctp_auth_asoc_init_active_key() computes the actual secret from local's and peer's random, hmac and shared key parameters and returns a new key directly as sctp_auth_bytes, that is asoc->asoc_shared_key, plus drops the reference if there was a previous one. The secret, which where we eventually double drop the ref comes from sctp_auth_asoc_set_secret() with intitial refcount of 1, which also stays unchanged eventually in sctp_assoc_update(). This key is later being used for crypto layer to set the key for the hash in crypto_hash_setkey() from sctp_auth_calculate_hmac(). To close the loop: asoc->asoc_shared_key is freshly allocated secret material and independant of the sctp_shared_key management keeping track of only shared keys in endpoints and assocs. Hence, also commit 4184b2a79a76 ("net: sctp: fix memory leak in auth key management") is independant of this bug here since it concerns a different layer (though same structures being used eventually). asoc->asoc_shared_key is reference dropped correctly on assoc destruction in sctp_association_free() and when active keys are being replaced in sctp_auth_asoc_init_active_key(), it always has a refcount of 1. Hence, it's freed prematurely in sctp_assoc_update(). Simple fix is to remove that sctp_auth_key_put() from there which fixes these panics. Fixes: 730fc3d05cd4 ("[SCTP]: Implete SCTP-AUTH parameter processing") Signed-off-by: Daniel Borkmann Acked-by: Vlad Yasevich Acked-by: Neil Horman Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/sctp/associola.c | 1 - 1 file changed, 1 deletion(-) diff --git a/net/sctp/associola.c b/net/sctp/associola.c index ca4a1a1b8e69..6360a14edeab 100644 --- a/net/sctp/associola.c +++ b/net/sctp/associola.c @@ -1297,7 +1297,6 @@ void sctp_assoc_update(struct sctp_association *asoc, asoc->peer.peer_hmacs = new->peer.peer_hmacs; new->peer.peer_hmacs = NULL; - sctp_auth_key_put(asoc->asoc_shared_key); sctp_auth_asoc_init_active_key(asoc, GFP_ATOMIC); } From 8c6dafeba6f8d1435f05e39142b50bc605f7a91c Mon Sep 17 00:00:00 2001 From: Hannes Frederic Sowa Date: Fri, 23 Jan 2015 12:01:26 +0100 Subject: [PATCH 025/204] ipv4: try to cache dst_entries which would cause a redirect [ Upstream commit df4d92549f23e1c037e83323aff58a21b3de7fe0 ] Not caching dst_entries which cause redirects could be exploited by hosts on the same subnet, causing a severe DoS attack. This effect aggravated since commit f88649721268999 ("ipv4: fix dst race in sk_dst_get()"). Lookups causing redirects will be allocated with DST_NOCACHE set which will force dst_release to free them via RCU. Unfortunately waiting for RCU grace period just takes too long, we can end up with >1M dst_entries waiting to be released and the system will run OOM. rcuos threads cannot catch up under high softirq load. Attaching the flag to emit a redirect later on to the specific skb allows us to cache those dst_entries thus reducing the pressure on allocation and deallocation. This issue was discovered by Marcelo Leitner. Cc: Julian Anastasov Signed-off-by: Marcelo Leitner Signed-off-by: Florian Westphal Signed-off-by: Hannes Frederic Sowa Signed-off-by: Julian Anastasov Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- include/net/ip.h | 11 ++++++----- net/ipv4/ip_forward.c | 3 ++- net/ipv4/route.c | 9 +++++---- 3 files changed, 13 insertions(+), 10 deletions(-) diff --git a/include/net/ip.h b/include/net/ip.h index 8695359982d1..e47ad4c01608 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -37,11 +37,12 @@ struct inet_skb_parm { struct ip_options opt; /* Compiled IP options */ unsigned char flags; -#define IPSKB_FORWARDED 1 -#define IPSKB_XFRM_TUNNEL_SIZE 2 -#define IPSKB_XFRM_TRANSFORMED 4 -#define IPSKB_FRAG_COMPLETE 8 -#define IPSKB_REROUTED 16 +#define IPSKB_FORWARDED BIT(0) +#define IPSKB_XFRM_TUNNEL_SIZE BIT(1) +#define IPSKB_XFRM_TRANSFORMED BIT(2) +#define IPSKB_FRAG_COMPLETE BIT(3) +#define IPSKB_REROUTED BIT(4) +#define IPSKB_DOREDIRECT BIT(5) u16 frag_max_size; }; diff --git a/net/ipv4/ip_forward.c b/net/ipv4/ip_forward.c index bd1c5baf69be..31ee5c6033df 100644 --- a/net/ipv4/ip_forward.c +++ b/net/ipv4/ip_forward.c @@ -175,7 +175,8 @@ int ip_forward(struct sk_buff *skb) * We now generate an ICMP HOST REDIRECT giving the route * we calculated. */ - if (rt->rt_flags&RTCF_DOREDIRECT && !opt->srr && !skb_sec_path(skb)) + if (IPCB(skb)->flags & IPSKB_DOREDIRECT && !opt->srr && + !skb_sec_path(skb)) ip_rt_send_redirect(skb); skb->priority = rt_tos2priority(iph->tos); diff --git a/net/ipv4/route.c b/net/ipv4/route.c index d4d162eac4df..e23c5f64286b 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1514,11 +1514,10 @@ static int __mkroute_input(struct sk_buff *skb, do_cache = res->fi && !itag; if (out_dev == in_dev && err && IN_DEV_TX_REDIRECTS(out_dev) && + skb->protocol == htons(ETH_P_IP) && (IN_DEV_SHARED_MEDIA(out_dev) || - inet_addr_onlink(out_dev, saddr, FIB_RES_GW(*res)))) { - flags |= RTCF_DOREDIRECT; - do_cache = false; - } + inet_addr_onlink(out_dev, saddr, FIB_RES_GW(*res)))) + IPCB(skb)->flags |= IPSKB_DOREDIRECT; if (skb->protocol != htons(ETH_P_IP)) { /* Not IP (i.e. ARP). Do not create route, if it is @@ -2255,6 +2254,8 @@ static int rt_fill_info(struct net *net, __be32 dst, __be32 src, r->rtm_flags = (rt->rt_flags & ~0xFFFF) | RTM_F_CLONED; if (rt->rt_flags & RTCF_NOTIFY) r->rtm_flags |= RTM_F_NOTIFY; + if (IPCB(skb)->flags & IPSKB_DOREDIRECT) + r->rtm_flags |= RTCF_DOREDIRECT; if (nla_put_be32(skb, RTA_DST, dst)) goto nla_put_failure; From bd1f50c627afe99e374da23b76f0d0ee83244223 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Sat, 24 Jan 2015 08:02:40 +1100 Subject: [PATCH 026/204] udp_diag: Fix socket skipping within chain [ Upstream commit 86f3cddbc3037882414c7308973530167906b7e9 ] While working on rhashtable walking I noticed that the UDP diag dumping code is buggy. In particular, the socket skipping within a chain never happens, even though we record the number of sockets that should be skipped. As this code was supposedly copied from TCP, this patch does what TCP does and resets num before we walk a chain. Signed-off-by: Herbert Xu Acked-by: Pavel Emelyanov Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/udp_diag.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/ipv4/udp_diag.c b/net/ipv4/udp_diag.c index 7927db0a9279..4a000f1dd757 100644 --- a/net/ipv4/udp_diag.c +++ b/net/ipv4/udp_diag.c @@ -99,11 +99,13 @@ static void udp_dump(struct udp_table *table, struct sk_buff *skb, struct netlin s_slot = cb->args[0]; num = s_num = cb->args[1]; - for (slot = s_slot; slot <= table->mask; num = s_num = 0, slot++) { + for (slot = s_slot; slot <= table->mask; s_num = 0, slot++) { struct sock *sk; struct hlist_nulls_node *node; struct udp_hslot *hslot = &table->hash[slot]; + num = 0; + if (hlist_nulls_empty(&hslot->head)) continue; From 688ba993d1b7e3c84b1a87f39cd14b01b4716306 Mon Sep 17 00:00:00 2001 From: "subashab@codeaurora.org" Date: Fri, 23 Jan 2015 22:26:02 +0000 Subject: [PATCH 027/204] ping: Fix race in free in receive path [ Upstream commit fc752f1f43c1c038a2c6ae58cc739ebb5953ccb0 ] An exception is seen in ICMP ping receive path where the skb destructor sock_rfree() tries to access a freed socket. This happens because ping_rcv() releases socket reference with sock_put() and this internally frees up the socket. Later icmp_rcv() will try to free the skb and as part of this, skb destructor is called and which leads to a kernel panic as the socket is freed already in ping_rcv(). -->|exception -007|sk_mem_uncharge -007|sock_rfree -008|skb_release_head_state -009|skb_release_all -009|__kfree_skb -010|kfree_skb -011|icmp_rcv -012|ip_local_deliver_finish Fix this incorrect free by cloning this skb and processing this cloned skb instead. This patch was suggested by Eric Dumazet Signed-off-by: Subash Abhinov Kasiviswanathan Cc: Eric Dumazet Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/ping.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c index aa857a4a06a8..698f3a2ac5ae 100644 --- a/net/ipv4/ping.c +++ b/net/ipv4/ping.c @@ -720,8 +720,11 @@ void ping_rcv(struct sk_buff *skb) sk = ping_v4_lookup(net, saddr, daddr, ntohs(icmph->un.echo.id), skb->dev->ifindex); if (sk != NULL) { + struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC); + pr_debug("rcv on socket %p\n", sk); - ping_queue_rcv_skb(sk, skb_get(skb)); + if (skb2) + ping_queue_rcv_skb(sk, skb2); sock_put(sk); return; } From 650a7901c0fe0c3b0e5d9c1c945f140058585a6f Mon Sep 17 00:00:00 2001 From: Hannes Frederic Sowa Date: Mon, 26 Jan 2015 15:11:17 +0100 Subject: [PATCH 028/204] ipv6: replacing a rt6_info needs to purge possible propagated rt6_infos too [ Upstream commit 6e9e16e6143b725662e47026a1d0f270721cdd24 ] Lubomir Rintel reported that during replacing a route the interface reference counter isn't correctly decremented. To quote bug : | [root@rhel7-5 lkundrak]# sh -x lal | + ip link add dev0 type dummy | + ip link set dev0 up | + ip link add dev1 type dummy | + ip link set dev1 up | + ip addr add 2001:db8:8086::2/64 dev dev0 | + ip route add 2001:db8:8086::/48 dev dev0 proto static metric 20 | + ip route add 2001:db8:8088::/48 dev dev1 proto static metric 10 | + ip route replace 2001:db8:8086::/48 dev dev1 proto static metric 20 | + ip link del dev0 type dummy | Message from syslogd@rhel7-5 at Jan 23 10:54:41 ... | kernel:unregister_netdevice: waiting for dev0 to become free. Usage count = 2 | | Message from syslogd@rhel7-5 at Jan 23 10:54:51 ... | kernel:unregister_netdevice: waiting for dev0 to become free. Usage count = 2 During replacement of a rt6_info we must walk all parent nodes and check if the to be replaced rt6_info got propagated. If so, replace it with an alive one. Fixes: 4a287eba2de3957 ("IPv6 routing, NLM_F_* flag support: REPLACE and EXCL flags support, warn about missing CREATE flag") Reported-by: Lubomir Rintel Signed-off-by: Hannes Frederic Sowa Tested-by: Lubomir Rintel Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv6/ip6_fib.c | 45 ++++++++++++++++++++++++++------------------- 1 file changed, 26 insertions(+), 19 deletions(-) diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index 009c9620f442..ceeb9458bb60 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -638,6 +638,29 @@ static inline bool rt6_qualify_for_ecmp(struct rt6_info *rt) RTF_GATEWAY; } +static void fib6_purge_rt(struct rt6_info *rt, struct fib6_node *fn, + struct net *net) +{ + if (atomic_read(&rt->rt6i_ref) != 1) { + /* This route is used as dummy address holder in some split + * nodes. It is not leaked, but it still holds other resources, + * which must be released in time. So, scan ascendant nodes + * and replace dummy references to this route with references + * to still alive ones. + */ + while (fn) { + if (!(fn->fn_flags & RTN_RTINFO) && fn->leaf == rt) { + fn->leaf = fib6_find_prefix(net, fn); + atomic_inc(&fn->leaf->rt6i_ref); + rt6_release(rt); + } + fn = fn->parent; + } + /* No more references are possible at this point. */ + BUG_ON(atomic_read(&rt->rt6i_ref) != 1); + } +} + /* * Insert routing information in a node. */ @@ -775,11 +798,12 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt, rt->dst.rt6_next = iter->dst.rt6_next; atomic_inc(&rt->rt6i_ref); inet6_rt_notify(RTM_NEWROUTE, rt, info); - rt6_release(iter); if (!(fn->fn_flags & RTN_RTINFO)) { info->nl_net->ipv6.rt6_stats->fib_route_nodes++; fn->fn_flags |= RTN_RTINFO; } + fib6_purge_rt(iter, fn, info->nl_net); + rt6_release(iter); } return 0; @@ -1284,24 +1308,7 @@ static void fib6_del_route(struct fib6_node *fn, struct rt6_info **rtp, fn = fib6_repair_tree(net, fn); } - if (atomic_read(&rt->rt6i_ref) != 1) { - /* This route is used as dummy address holder in some split - * nodes. It is not leaked, but it still holds other resources, - * which must be released in time. So, scan ascendant nodes - * and replace dummy references to this route with references - * to still alive ones. - */ - while (fn) { - if (!(fn->fn_flags & RTN_RTINFO) && fn->leaf == rt) { - fn->leaf = fib6_find_prefix(net, fn); - atomic_inc(&fn->leaf->rt6i_ref); - rt6_release(rt); - } - fn = fn->parent; - } - /* No more references are possible at this point. */ - BUG_ON(atomic_read(&rt->rt6i_ref) != 1); - } + fib6_purge_rt(rt, fn, net); inet6_rt_notify(RTM_DELROUTE, rt, info); rt6_release(rt); From b4faf21b76b2a0cf00e5a75e73efb22a09868b18 Mon Sep 17 00:00:00 2001 From: Roopa Prabhu Date: Wed, 28 Jan 2015 16:23:11 -0800 Subject: [PATCH 029/204] bridge: dont send notification when skb->len == 0 in rtnl_bridge_notify [ Upstream commit 59ccaaaa49b5b096cdc1f16706a9f931416b2332 ] Reported in: https://bugzilla.kernel.org/show_bug.cgi?id=92081 This patch avoids calling rtnl_notify if the device ndo_bridge_getlink handler does not return any bytes in the skb. Alternately, the skb->len check can be moved inside rtnl_notify. For the bridge vlan case described in 92081, there is also a fix needed in bridge driver to generate a proper notification. Will fix that in subsequent patch. v2: rebase patch on net tree Signed-off-by: Roopa Prabhu Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/core/rtnetlink.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 25c4dd563a79..279b5dcf09ae 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -2477,12 +2477,16 @@ static int rtnl_bridge_notify(struct net_device *dev, u16 flags) goto errout; } + if (!skb->len) + goto errout; + rtnl_notify(skb, net, 0, RTNLGRP_LINK, NULL, GFP_ATOMIC); return 0; errout: WARN_ON(err == -EMSGSIZE); kfree_skb(skb); - rtnl_set_sk_err(net, RTNLGRP_LINK, err); + if (err) + rtnl_set_sk_err(net, RTNLGRP_LINK, err); return err; } From 23990c29a7bc207bfcb3026e286e804c7cdee933 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 28 Jan 2015 05:47:11 -0800 Subject: [PATCH 030/204] tcp: ipv4: initialize unicast_sock sk_pacing_rate [ Upstream commit 811230cd853d62f09ed0addd0ce9a1b9b0e13fb5 ] When I added sk_pacing_rate field, I forgot to initialize its value in the per cpu unicast_sock used in ip_send_unicast_reply() This means that for sch_fq users, RST packets, or ACK packets sent on behalf of TIME_WAIT sockets might be sent to slowly or even dropped once we reach the per flow limit. Signed-off-by: Eric Dumazet Fixes: 95bd09eb2750 ("tcp: TSO packets automatic sizing") Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/ip_output.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 22fa05e041ea..ee104cbcefd2 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -1465,6 +1465,7 @@ static DEFINE_PER_CPU(struct inet_sock, unicast_sock) = { .sk_wmem_alloc = ATOMIC_INIT(1), .sk_allocation = GFP_ATOMIC, .sk_flags = (1UL << SOCK_USE_WRITE_QUEUE), + .sk_pacing_rate = ~0U, }, .pmtudisc = IP_PMTUDISC_WANT, .uc_ttl = -1, From 6bed3166d097a20ffcf2d440825c611500b0ff97 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 29 Jan 2015 21:35:05 -0800 Subject: [PATCH 031/204] ipv4: tcp: get rid of ugly unicast_sock [ Upstream commit bdbbb8527b6f6a358dbcb70dac247034d665b8e4 ] In commit be9f4a44e7d41 ("ipv4: tcp: remove per net tcp_sock") I tried to address contention on a socket lock, but the solution I chose was horrible : commit 3a7c384ffd57e ("ipv4: tcp: unicast_sock should not land outside of TCP stack") addressed a selinux regression. commit 0980e56e506b ("ipv4: tcp: set unicast_sock uc_ttl to -1") took care of another regression. commit b5ec8eeac46 ("ipv4: fix ip_send_skb()") fixed another regression. commit 811230cd85 ("tcp: ipv4: initialize unicast_sock sk_pacing_rate") was another shot in the dark. Really, just use a proper socket per cpu, and remove the skb_orphan() call, to re-enable flow control. This solves a serious problem with FQ packet scheduler when used in hostile environments, as we do not want to allocate a flow structure for every RST packet sent in response to a spoofed packet. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- include/net/ip.h | 2 +- include/net/netns/ipv4.h | 1 + net/ipv4/ip_output.c | 30 +++------------------------- net/ipv4/tcp_ipv4.c | 43 ++++++++++++++++++++++++++++++++-------- 4 files changed, 40 insertions(+), 36 deletions(-) diff --git a/include/net/ip.h b/include/net/ip.h index e47ad4c01608..0a62365149e2 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -163,7 +163,7 @@ static inline __u8 ip_reply_arg_flowi_flags(const struct ip_reply_arg *arg) return (arg->flags & IP_REPLY_ARG_NOSRCCHECK) ? FLOWI_FLAG_ANYSRC : 0; } -void ip_send_unicast_reply(struct net *net, struct sk_buff *skb, __be32 daddr, +void ip_send_unicast_reply(struct sock *sk, struct sk_buff *skb, __be32 daddr, __be32 saddr, const struct ip_reply_arg *arg, unsigned int len); diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index 2ba9de89e8ec..03e6378d5353 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -43,6 +43,7 @@ struct netns_ipv4 { struct inet_peer_base *peers; struct tcpm_hash_bucket *tcp_metrics_hash; unsigned int tcp_metrics_hash_log; + struct sock * __percpu *tcp_sk; struct netns_frags frags; #ifdef CONFIG_NETFILTER struct xt_table *iptable_filter; diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index ee104cbcefd2..def18547748e 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -1454,24 +1454,8 @@ static int ip_reply_glue_bits(void *dptr, char *to, int offset, /* * Generic function to send a packet as reply to another packet. * Used to send some TCP resets/acks so far. - * - * Use a fake percpu inet socket to avoid false sharing and contention. */ -static DEFINE_PER_CPU(struct inet_sock, unicast_sock) = { - .sk = { - .__sk_common = { - .skc_refcnt = ATOMIC_INIT(1), - }, - .sk_wmem_alloc = ATOMIC_INIT(1), - .sk_allocation = GFP_ATOMIC, - .sk_flags = (1UL << SOCK_USE_WRITE_QUEUE), - .sk_pacing_rate = ~0U, - }, - .pmtudisc = IP_PMTUDISC_WANT, - .uc_ttl = -1, -}; - -void ip_send_unicast_reply(struct net *net, struct sk_buff *skb, __be32 daddr, +void ip_send_unicast_reply(struct sock *sk, struct sk_buff *skb, __be32 daddr, __be32 saddr, const struct ip_reply_arg *arg, unsigned int len) { @@ -1479,9 +1463,8 @@ void ip_send_unicast_reply(struct net *net, struct sk_buff *skb, __be32 daddr, struct ipcm_cookie ipc; struct flowi4 fl4; struct rtable *rt = skb_rtable(skb); + struct net *net = sock_net(sk); struct sk_buff *nskb; - struct sock *sk; - struct inet_sock *inet; int err; if (ip_options_echo(&replyopts.opt.opt, skb)) @@ -1509,15 +1492,11 @@ void ip_send_unicast_reply(struct net *net, struct sk_buff *skb, __be32 daddr, if (IS_ERR(rt)) return; - inet = &get_cpu_var(unicast_sock); + inet_sk(sk)->tos = arg->tos; - inet->tos = arg->tos; - sk = &inet->sk; sk->sk_priority = skb->priority; sk->sk_protocol = ip_hdr(skb)->protocol; sk->sk_bound_dev_if = arg->bound_dev_if; - sock_net_set(sk, net); - __skb_queue_head_init(&sk->sk_write_queue); sk->sk_sndbuf = sysctl_wmem_default; err = ip_append_data(sk, &fl4, ip_reply_glue_bits, arg->iov->iov_base, len, 0, &ipc, &rt, MSG_DONTWAIT); @@ -1533,13 +1512,10 @@ void ip_send_unicast_reply(struct net *net, struct sk_buff *skb, __be32 daddr, arg->csumoffset) = csum_fold(csum_add(nskb->csum, arg->csum)); nskb->ip_summed = CHECKSUM_NONE; - skb_orphan(nskb); skb_set_queue_mapping(nskb, skb_get_queue_mapping(skb)); ip_push_pending_frames(sk, &fl4); } out: - put_cpu_var(unicast_sock); - ip_rt_put(rt); } diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index e025c1c788a1..cce35e5a7ee6 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -707,7 +707,8 @@ static void tcp_v4_send_reset(struct sock *sk, struct sk_buff *skb) net = dev_net(skb_dst(skb)->dev); arg.tos = ip_hdr(skb)->tos; - ip_send_unicast_reply(net, skb, ip_hdr(skb)->saddr, + ip_send_unicast_reply(*this_cpu_ptr(net->ipv4.tcp_sk), + skb, ip_hdr(skb)->saddr, ip_hdr(skb)->daddr, &arg, arg.iov[0].iov_len); TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS); @@ -790,7 +791,8 @@ static void tcp_v4_send_ack(struct sk_buff *skb, u32 seq, u32 ack, if (oif) arg.bound_dev_if = oif; arg.tos = tos; - ip_send_unicast_reply(net, skb, ip_hdr(skb)->saddr, + ip_send_unicast_reply(*this_cpu_ptr(net->ipv4.tcp_sk), + skb, ip_hdr(skb)->saddr, ip_hdr(skb)->daddr, &arg, arg.iov[0].iov_len); TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS); @@ -2898,14 +2900,39 @@ struct proto tcp_prot = { }; EXPORT_SYMBOL(tcp_prot); -static int __net_init tcp_sk_init(struct net *net) -{ - net->ipv4.sysctl_tcp_ecn = 2; - return 0; -} - static void __net_exit tcp_sk_exit(struct net *net) { + int cpu; + + for_each_possible_cpu(cpu) + inet_ctl_sock_destroy(*per_cpu_ptr(net->ipv4.tcp_sk, cpu)); + free_percpu(net->ipv4.tcp_sk); +} + +static int __net_init tcp_sk_init(struct net *net) +{ + int res, cpu; + + net->ipv4.tcp_sk = alloc_percpu(struct sock *); + if (!net->ipv4.tcp_sk) + return -ENOMEM; + + for_each_possible_cpu(cpu) { + struct sock *sk; + + res = inet_ctl_sock_create(&sk, PF_INET, SOCK_RAW, + IPPROTO_TCP, net); + if (res) + goto fail; + *per_cpu_ptr(net->ipv4.tcp_sk, cpu) = sk; + } + net->ipv4.sysctl_tcp_ecn = 2; + return 0; + +fail: + tcp_sk_exit(net); + + return res; } static void __net_exit tcp_sk_exit_batch(struct list_head *net_exit_list) From a7df378ab94e59b29128b6d6b95da9fd67b40337 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 28 Jan 2015 10:56:04 +0100 Subject: [PATCH 032/204] ppp: deflate: never return len larger than output buffer [ Upstream commit e2a4800e75780ccf4e6c2487f82b688ba736eb18 ] When we've run out of space in the output buffer to store more data, we will call zlib_deflate with a NULL output buffer until we've consumed remaining input. When this happens, olen contains the size the output buffer would have consumed iff we'd have had enough room. This can later cause skb_over_panic when ppp_generic skb_put()s the returned length. Reported-by: Iain Douglas Signed-off-by: Florian Westphal Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ppp/ppp_deflate.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ppp/ppp_deflate.c b/drivers/net/ppp/ppp_deflate.c index 602c625d95d5..b5edc7f96a39 100644 --- a/drivers/net/ppp/ppp_deflate.c +++ b/drivers/net/ppp/ppp_deflate.c @@ -246,7 +246,7 @@ static int z_compress(void *arg, unsigned char *rptr, unsigned char *obuf, /* * See if we managed to reduce the size of the packet. */ - if (olen < isize) { + if (olen < isize && olen <= osize) { state->stats.comp_bytes += olen; state->stats.comp_packets++; } else { From 572d332c02bb349d6fe428bf17e3068631064976 Mon Sep 17 00:00:00 2001 From: Saran Maruti Ramanara Date: Thu, 29 Jan 2015 11:05:58 +0100 Subject: [PATCH 033/204] net: sctp: fix passing wrong parameter header to param_type2af in sctp_process_param [ Upstream commit cfbf654efc6d78dc9812e030673b86f235bf677d ] When making use of RFC5061, section 4.2.4. for setting the primary IP address, we're passing a wrong parameter header to param_type2af(), resulting always in NULL being returned. At this point, param.p points to a sctp_addip_param struct, containing a sctp_paramhdr (type = 0xc004, length = var), and crr_id as a correlation id. Followed by that, as also presented in RFC5061 section 4.2.4., comes the actual sctp_addr_param, which also contains a sctp_paramhdr, but this time with the correct type SCTP_PARAM_IPV{4,6}_ADDRESS that param_type2af() can make use of. Since we already hold a pointer to addr_param from previous line, just reuse it for param_type2af(). Fixes: d6de3097592b ("[SCTP]: Add the handling of "Set Primary IP Address" parameter to INIT") Signed-off-by: Saran Maruti Ramanara Signed-off-by: Daniel Borkmann Acked-by: Vlad Yasevich Acked-by: Neil Horman Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/sctp/sm_make_chunk.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c index 29fc16f3633f..beedadf62f6c 100644 --- a/net/sctp/sm_make_chunk.c +++ b/net/sctp/sm_make_chunk.c @@ -2595,7 +2595,7 @@ static int sctp_process_param(struct sctp_association *asoc, addr_param = param.v + sizeof(sctp_addip_param_t); - af = sctp_get_af_specific(param_type2af(param.p->type)); + af = sctp_get_af_specific(param_type2af(addr_param->p.type)); if (af == NULL) break; From b2b501af4181db36a3301cef3e4ccab16571ad27 Mon Sep 17 00:00:00 2001 From: Austin Lund Date: Thu, 24 Jul 2014 07:40:20 -0300 Subject: [PATCH 034/204] media/rc: Send sync space information on the lirc device commit a8f29e89f2b54fbf2c52be341f149bc195b63a8b upstream. Userspace expects to see a long space before the first pulse is sent on the lirc device. Currently, if a long time has passed and a new packet is started, the lirc codec just returns and doesn't send anything. This makes lircd ignore many perfectly valid signals unless they are sent in quick sucession. When a reset event is delivered, we cannot know anything about the duration of the space. But it should be safe to assume it has been a long time and we just set the duration to maximum. Signed-off-by: Austin Lund Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Greg Kroah-Hartman --- drivers/media/rc/ir-lirc-codec.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/drivers/media/rc/ir-lirc-codec.c b/drivers/media/rc/ir-lirc-codec.c index e4561264e124..a895ed02da86 100644 --- a/drivers/media/rc/ir-lirc-codec.c +++ b/drivers/media/rc/ir-lirc-codec.c @@ -42,11 +42,17 @@ static int ir_lirc_decode(struct rc_dev *dev, struct ir_raw_event ev) return -EINVAL; /* Packet start */ - if (ev.reset) - return 0; + if (ev.reset) { + /* Userspace expects a long space event before the start of + * the signal to use as a sync. This may be done with repeat + * packets and normal samples. But if a reset has been sent + * then we assume that a long time has passed, so we send a + * space with the maximum time value. */ + sample = LIRC_SPACE(LIRC_VALUE_MASK); + IR_dprintk(2, "delivering reset sync space to lirc_dev\n"); /* Carrier reports */ - if (ev.carrier_report) { + } else if (ev.carrier_report) { sample = LIRC_FREQUENCY(ev.carrier); IR_dprintk(2, "carrier report (freq: %d)\n", sample); From c30748a365edbbc94084742b49d55c336a356f0b Mon Sep 17 00:00:00 2001 From: Alex Elder Date: Tue, 25 Mar 2014 15:36:02 +0200 Subject: [PATCH 035/204] rbd: drop an unsafe assertion commit 638c323c4d1f8eaf25224946e21ce8818f1bcee1 upstream. Olivier Bonvalet reported having repeated crashes due to a failed assertion he was hitting in rbd_img_obj_callback(): Assertion failure in rbd_img_obj_callback() at line 2165: rbd_assert(which >= img_request->next_completion); With a lot of help from Olivier with reproducing the problem we were able to determine the object and image requests had already been completed (and often freed) at the point the assertion failed. There was a great deal of discussion on the ceph-devel mailing list about this. The problem only arose when there were two (or more) object requests in an image request, and the problem was always seen when the second request was being completed. The problem is due to a race in the window between setting the "done" flag on an object request and checking the image request's next completion value. When the first object request completes, it checks to see if its successor request is marked "done", and if so, that request is also completed. In the process, the image request's next_completion value is updated to reflect that both the first and second requests are completed. By the time the second request is able to check the next_completion value, it has been set to a value *greater* than its own "which" value, which caused an assertion to fail. Fix this problem by skipping over any completion processing unless the completing object request is the next one expected. Test only for inequality (not >=), and eliminate the bad assertion. Tested-by: Olivier Bonvalet Signed-off-by: Alex Elder Reviewed-by: Sage Weil Reviewed-by: Ilya Dryomov Signed-off-by: Greg Kroah-Hartman --- drivers/block/rbd.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index 9951e66b8502..7e3f45105f11 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c @@ -2149,7 +2149,6 @@ static void rbd_img_obj_callback(struct rbd_obj_request *obj_request) rbd_assert(img_request->obj_request_count > 0); rbd_assert(which != BAD_WHICH); rbd_assert(which < img_request->obj_request_count); - rbd_assert(which >= img_request->next_completion); spin_lock_irq(&img_request->completion_lock); if (which != img_request->next_completion) From ef16065a9e97e4fbcecc42e9effa3d2f15119794 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Thu, 26 Feb 2015 17:49:14 -0800 Subject: [PATCH 036/204] Linux 3.10.70 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 81ede20061cf..402cbb7c27f1 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,6 @@ VERSION = 3 PATCHLEVEL = 10 -SUBLEVEL = 69 +SUBLEVEL = 70 EXTRAVERSION = NAME = TOSSUG Baby Fish From 883ad7b29f4f26c4d1d9cc2461fc52571be9ecd6 Mon Sep 17 00:00:00 2001 From: Mark Salyzyn Date: Mon, 2 Mar 2015 09:04:10 -0800 Subject: [PATCH 037/204] android: base-cfg: turn off /dev/mem and /dev/kmem Signed-off-by: Mark Salyzyn Bug: 19549480 Change-Id: Icd43da8712efab46aa70590311c0fb170a9e124c --- android/configs/android-base.cfg | 2 ++ 1 file changed, 2 insertions(+) diff --git a/android/configs/android-base.cfg b/android/configs/android-base.cfg index d8503e450957..6c08830ae6ed 100644 --- a/android/configs/android-base.cfg +++ b/android/configs/android-base.cfg @@ -1,4 +1,6 @@ # KEEP ALPHABETICALLY SORTED +# CONFIG_DEVKMEM is not set +# CONFIG_DEVMEM is not set # CONFIG_INET_LRO is not set # CONFIG_MODULES is not set # CONFIG_OABI_COMPAT is not set From 02de954e6c1842beabcd85e7824526dd1bd45c0e Mon Sep 17 00:00:00 2001 From: Lorenzo Colitti Date: Thu, 15 May 2014 16:38:41 -0700 Subject: [PATCH 038/204] net: ipv6: make "ip -6 route get mark xyz" work. Currently, "ip -6 route get mark xyz" ignores the mark passed in by userspace. Make it honour the mark, just like IPv4 does. [net-next commit 2e47b291953c35afa4e20a65475954c1a1b9afe1] Change-Id: Ief6cd1e1b7e43dc0d008b7e692be62cadc5cc7ca Signed-off-by: Lorenzo Colitti Signed-off-by: David S. Miller --- net/ipv6/route.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/ipv6/route.c b/net/ipv6/route.c index bad36468dcd7..28a664d98745 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -2587,6 +2587,9 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh) if (tb[RTA_OIF]) oif = nla_get_u32(tb[RTA_OIF]); + if (tb[RTA_MARK]) + fl6.flowi6_mark = nla_get_u32(tb[RTA_MARK]); + if (tb[RTA_UID]) fl6.flowi6_uid = make_kuid(current_user_ns(), nla_get_u32(tb[RTA_UID])); From dfca41467fa4570189f76e0fb9e0f911bc88ceb8 Mon Sep 17 00:00:00 2001 From: Richard Weinberger Date: Fri, 19 Jul 2013 11:31:36 +0200 Subject: [PATCH 039/204] um: siginfo cleanup Currently we use both struct siginfo and siginfo_t. Let's use struct siginfo internally to avoid ongoing compiler warning. We are allowed to do so because struct siginfo and siginfo_t are equivalent. [cherry-pick of upstream 9a8c1359571c5d5e2fbc43cf457a6486b70a70cb] Change-Id: I564775c9ed515c39ffff7d7d600a85d50291f31d Signed-off-by: Richard Weinberger Signed-off-by: Lorenzo Colitti --- arch/um/include/shared/frame_kern.h | 8 ++++---- arch/um/kernel/signal.c | 4 ++-- arch/um/os-Linux/signal.c | 8 ++++---- arch/um/os-Linux/skas/process.c | 10 +++++----- 4 files changed, 15 insertions(+), 15 deletions(-) diff --git a/arch/um/include/shared/frame_kern.h b/arch/um/include/shared/frame_kern.h index e584e40ee832..f2ca5702a4e2 100644 --- a/arch/um/include/shared/frame_kern.h +++ b/arch/um/include/shared/frame_kern.h @@ -6,13 +6,13 @@ #ifndef __FRAME_KERN_H_ #define __FRAME_KERN_H_ -extern int setup_signal_stack_sc(unsigned long stack_top, int sig, +extern int setup_signal_stack_sc(unsigned long stack_top, int sig, struct k_sigaction *ka, - struct pt_regs *regs, + struct pt_regs *regs, sigset_t *mask); -extern int setup_signal_stack_si(unsigned long stack_top, int sig, +extern int setup_signal_stack_si(unsigned long stack_top, int sig, struct k_sigaction *ka, - struct pt_regs *regs, siginfo_t *info, + struct pt_regs *regs, struct siginfo *info, sigset_t *mask); #endif diff --git a/arch/um/kernel/signal.c b/arch/um/kernel/signal.c index 3e831b3fd07b..f57e02e7910f 100644 --- a/arch/um/kernel/signal.c +++ b/arch/um/kernel/signal.c @@ -19,7 +19,7 @@ EXPORT_SYMBOL(unblock_signals); * OK, we're invoking a handler */ static void handle_signal(struct pt_regs *regs, unsigned long signr, - struct k_sigaction *ka, siginfo_t *info) + struct k_sigaction *ka, struct siginfo *info) { sigset_t *oldset = sigmask_to_save(); int singlestep = 0; @@ -71,7 +71,7 @@ static void handle_signal(struct pt_regs *regs, unsigned long signr, static int kern_do_signal(struct pt_regs *regs) { struct k_sigaction ka_copy; - siginfo_t info; + struct siginfo info; int sig, handled_sig = 0; while ((sig = get_signal_to_deliver(&info, &ka_copy, regs, NULL)) > 0) { diff --git a/arch/um/os-Linux/signal.c b/arch/um/os-Linux/signal.c index 9d9f1b4bf826..905924b773d3 100644 --- a/arch/um/os-Linux/signal.c +++ b/arch/um/os-Linux/signal.c @@ -25,7 +25,7 @@ void (*sig_info[NSIG])(int, struct siginfo *, struct uml_pt_regs *) = { [SIGIO] = sigio_handler, [SIGVTALRM] = timer_handler }; -static void sig_handler_common(int sig, siginfo_t *si, mcontext_t *mc) +static void sig_handler_common(int sig, struct siginfo *si, mcontext_t *mc) { struct uml_pt_regs r; int save_errno = errno; @@ -61,7 +61,7 @@ static void sig_handler_common(int sig, siginfo_t *si, mcontext_t *mc) static int signals_enabled; static unsigned int signals_pending; -void sig_handler(int sig, siginfo_t *si, mcontext_t *mc) +void sig_handler(int sig, struct siginfo *si, mcontext_t *mc) { int enabled; @@ -120,7 +120,7 @@ void set_sigstack(void *sig_stack, int size) panic("enabling signal stack failed, errno = %d\n", errno); } -static void (*handlers[_NSIG])(int sig, siginfo_t *si, mcontext_t *mc) = { +static void (*handlers[_NSIG])(int sig, struct siginfo *si, mcontext_t *mc) = { [SIGSEGV] = sig_handler, [SIGBUS] = sig_handler, [SIGILL] = sig_handler, @@ -162,7 +162,7 @@ static void hard_handler(int sig, siginfo_t *si, void *p) while ((sig = ffs(pending)) != 0){ sig--; pending &= ~(1 << sig); - (*handlers[sig])(sig, si, mc); + (*handlers[sig])(sig, (struct siginfo *)si, mc); } /* diff --git a/arch/um/os-Linux/skas/process.c b/arch/um/os-Linux/skas/process.c index 4625949bf1e4..908579f2b0ab 100644 --- a/arch/um/os-Linux/skas/process.c +++ b/arch/um/os-Linux/skas/process.c @@ -409,7 +409,7 @@ void userspace(struct uml_pt_regs *regs) if (WIFSTOPPED(status)) { int sig = WSTOPSIG(status); - ptrace(PTRACE_GETSIGINFO, pid, 0, &si); + ptrace(PTRACE_GETSIGINFO, pid, 0, (struct siginfo *)&si); switch (sig) { case SIGSEGV: @@ -417,7 +417,7 @@ void userspace(struct uml_pt_regs *regs) !ptrace_faultinfo) { get_skas_faultinfo(pid, ®s->faultinfo); - (*sig_info[SIGSEGV])(SIGSEGV, &si, + (*sig_info[SIGSEGV])(SIGSEGV, (struct siginfo *)&si, regs); } else handle_segv(pid, regs); @@ -426,14 +426,14 @@ void userspace(struct uml_pt_regs *regs) handle_trap(pid, regs, local_using_sysemu); break; case SIGTRAP: - relay_signal(SIGTRAP, &si, regs); + relay_signal(SIGTRAP, (struct siginfo *)&si, regs); break; case SIGVTALRM: now = os_nsecs(); if (now < nsecs) break; block_signals(); - (*sig_info[sig])(sig, &si, regs); + (*sig_info[sig])(sig, (struct siginfo *)&si, regs); unblock_signals(); nsecs = timer.it_value.tv_sec * UM_NSEC_PER_SEC + @@ -447,7 +447,7 @@ void userspace(struct uml_pt_regs *regs) case SIGFPE: case SIGWINCH: block_signals(); - (*sig_info[sig])(sig, &si, regs); + (*sig_info[sig])(sig, (struct siginfo *)&si, regs); unblock_signals(); break; default: From 0f708442d160846f3e0b0f390c0534e801f7b5f5 Mon Sep 17 00:00:00 2001 From: Richard Weinberger Date: Fri, 19 Jul 2013 11:35:32 +0200 Subject: [PATCH 040/204] um: remove dead code "me" is not used. [cherry-pick of upstream 9e82d450531c79b18ab18c9b9645cdd9db31ee98] Change-Id: Ifc3550184931dddf8feebd6c3137e60b97f6a0f1 Signed-off-by: Richard Weinberger Signed-off-by: Lorenzo Colitti --- arch/x86/um/signal.c | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/x86/um/signal.c b/arch/x86/um/signal.c index ae7319db18ee..5e04a1c899fa 100644 --- a/arch/x86/um/signal.c +++ b/arch/x86/um/signal.c @@ -508,7 +508,6 @@ int setup_signal_stack_si(unsigned long stack_top, int sig, { struct rt_sigframe __user *frame; int err = 0; - struct task_struct *me = current; frame = (struct rt_sigframe __user *) round_down(stack_top - sizeof(struct rt_sigframe), 16); From 9c086b4cf266e9ac1afabb86ff9ef54407b344e2 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Tue, 15 Apr 2014 16:25:34 -0700 Subject: [PATCH 041/204] ipv4, fib: pass LOOPBACK_IFINDEX instead of 0 to flowi4_iif MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit As suggested by Julian: Simply, flowi4_iif must not contain 0, it does not look logical to ignore all ip rules with specified iif. because in fib_rule_match() we do: if (rule->iifindex && (rule->iifindex != fl->flowi_iif)) goto out; flowi4_iif should be LOOPBACK_IFINDEX by default. We need to move LOOPBACK_IFINDEX to include/net/flow.h: 1) It is mostly used by flowi_iif 2) Fix the following compile error if we use it in flow.h by the patches latter: In file included from include/linux/netfilter.h:277:0, from include/net/netns/netfilter.h:5, from include/net/net_namespace.h:21, from include/linux/netdevice.h:43, from include/linux/icmpv6.h:12, from include/linux/ipv6.h:61, from include/net/ipv6.h:16, from include/linux/sunrpc/clnt.h:27, from include/linux/nfs_fs.h:30, from init/do_mounts.c:32: include/net/flow.h: In function ‘flowi4_init_output’: include/net/flow.h:84:32: error: ‘LOOPBACK_IFINDEX’ undeclared (first use in this function) [Backport of net-next 6a662719c9868b3d6c7d26b3a085f0cd3cc15e64] Change-Id: Ib7a0a08d78c03800488afa1b2c170cb70e34cfd9 Cc: Eric Biederman Cc: Julian Anastasov Cc: David S. Miller Signed-off-by: Cong Wang Signed-off-by: Cong Wang Signed-off-by: David S. Miller Signed-off-by: Lorenzo Colitti --- include/net/flow.h | 10 +++++++++- include/net/net_namespace.h | 9 +-------- net/ipv4/fib_frontend.c | 2 +- net/ipv4/fib_semantics.c | 1 + net/ipv4/ipmr.c | 2 +- net/ipv4/netfilter/ipt_rpfilter.c | 5 +---- net/ipv6/ip6mr.c | 2 +- 7 files changed, 15 insertions(+), 16 deletions(-) diff --git a/include/net/flow.h b/include/net/flow.h index c91e2aae3fb1..1426681f7cf3 100644 --- a/include/net/flow.h +++ b/include/net/flow.h @@ -12,6 +12,14 @@ #include #include +/* + * ifindex generation is per-net namespace, and loopback is + * always the 1st device in ns (see net_dev_init), thus any + * loopback device should get ifindex 1 + */ + +#define LOOPBACK_IFINDEX 1 + struct flowi_common { int flowic_oif; int flowic_iif; @@ -85,7 +93,7 @@ static inline void flowi4_init_output(struct flowi4 *fl4, int oif, kuid_t uid) { fl4->flowi4_oif = oif; - fl4->flowi4_iif = 0; + fl4->flowi4_iif = LOOPBACK_IFINDEX; fl4->flowi4_mark = mark; fl4->flowi4_tos = tos; fl4->flowi4_scope = scope; diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h index b17697827482..b064d6dd14fb 100644 --- a/include/net/net_namespace.h +++ b/include/net/net_namespace.h @@ -9,6 +9,7 @@ #include #include +#include #include #include #include @@ -120,14 +121,6 @@ struct net { atomic_t rt_genid; }; -/* - * ifindex generation is per-net namespace, and loopback is - * always the 1st device in ns (see net_dev_init), thus any - * loopback device should get ifindex 1 - */ - -#define LOOPBACK_IFINDEX 1 - #include /* Init's network namespace */ diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index ffffeb448ec4..5ae6d9449847 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -250,7 +250,7 @@ static int __fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst, bool dev_match; fl4.flowi4_oif = 0; - fl4.flowi4_iif = oif; + fl4.flowi4_iif = oif ? : LOOPBACK_IFINDEX; fl4.daddr = src; fl4.saddr = dst; fl4.flowi4_tos = tos; diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index 8f6cb7a87cd6..567285bb83d2 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -629,6 +629,7 @@ static int fib_check_nh(struct fib_config *cfg, struct fib_info *fi, .daddr = nh->nh_gw, .flowi4_scope = cfg->fc_scope + 1, .flowi4_oif = nh->nh_oif, + .flowi4_iif = LOOPBACK_IFINDEX, }; /* It is not necessary, but requires a bit of thinking */ diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 9d9610ae7855..2c538ec7d3ea 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -451,7 +451,7 @@ static netdev_tx_t reg_vif_xmit(struct sk_buff *skb, struct net_device *dev) struct mr_table *mrt; struct flowi4 fl4 = { .flowi4_oif = dev->ifindex, - .flowi4_iif = skb->skb_iif, + .flowi4_iif = skb->skb_iif ? : LOOPBACK_IFINDEX, .flowi4_mark = skb->mark, }; int err; diff --git a/net/ipv4/netfilter/ipt_rpfilter.c b/net/ipv4/netfilter/ipt_rpfilter.c index c49dcd0284a0..4bfaedf9b34e 100644 --- a/net/ipv4/netfilter/ipt_rpfilter.c +++ b/net/ipv4/netfilter/ipt_rpfilter.c @@ -89,11 +89,8 @@ static bool rpfilter_mt(const struct sk_buff *skb, struct xt_action_param *par) if (ipv4_is_multicast(iph->daddr)) { if (ipv4_is_zeronet(iph->saddr)) return ipv4_is_local_multicast(iph->daddr) ^ invert; - flow.flowi4_iif = 0; - } else { - flow.flowi4_iif = LOOPBACK_IFINDEX; } - + flow.flowi4_iif = LOOPBACK_IFINDEX; flow.daddr = iph->saddr; flow.saddr = rpfilter_get_saddr(iph->daddr); flow.flowi4_oif = 0; diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index 241fb8ad9fcf..7a3bd3b26c38 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -693,7 +693,7 @@ static netdev_tx_t reg_vif_xmit(struct sk_buff *skb, struct mr6_table *mrt; struct flowi6 fl6 = { .flowi6_oif = dev->ifindex, - .flowi6_iif = skb->skb_iif, + .flowi6_iif = skb->skb_iif ? : LOOPBACK_IFINDEX, .flowi6_mark = skb->mark, }; int err; From 78bae7615d4d1c8114a2259262648afceb9ef221 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 16 May 2013 01:17:58 -0400 Subject: [PATCH 042/204] [readdir] convert squashfs Signed-off-by: Al Viro --- fs/squashfs/dir.c | 40 ++++++++++++---------------------------- 1 file changed, 12 insertions(+), 28 deletions(-) diff --git a/fs/squashfs/dir.c b/fs/squashfs/dir.c index 57dc70ebbb19..f7f527bf8c10 100644 --- a/fs/squashfs/dir.c +++ b/fs/squashfs/dir.c @@ -100,7 +100,7 @@ static int get_dir_index_using_offset(struct super_block *sb, } -static int squashfs_readdir(struct file *file, void *dirent, filldir_t filldir) +static int squashfs_readdir(struct file *file, struct dir_context *ctx) { struct inode *inode = file_inode(file); struct squashfs_sb_info *msblk = inode->i_sb->s_fs_info; @@ -127,11 +127,11 @@ static int squashfs_readdir(struct file *file, void *dirent, filldir_t filldir) * It also means that the external f_pos is offset by 3 from the * on-disk directory f_pos. */ - while (file->f_pos < 3) { + while (ctx->pos < 3) { char *name; int i_ino; - if (file->f_pos == 0) { + if (ctx->pos == 0) { name = "."; size = 1; i_ino = inode->i_ino; @@ -141,24 +141,18 @@ static int squashfs_readdir(struct file *file, void *dirent, filldir_t filldir) i_ino = squashfs_i(inode)->parent; } - TRACE("Calling filldir(%p, %s, %d, %lld, %d, %d)\n", - dirent, name, size, file->f_pos, i_ino, - squashfs_filetype_table[1]); - - if (filldir(dirent, name, size, file->f_pos, i_ino, - squashfs_filetype_table[1]) < 0) { - TRACE("Filldir returned less than 0\n"); + if (!dir_emit(ctx, name, size, i_ino, + squashfs_filetype_table[1])) goto finish; - } - file->f_pos += size; + ctx->pos += size; } length = get_dir_index_using_offset(inode->i_sb, &block, &offset, squashfs_i(inode)->dir_idx_start, squashfs_i(inode)->dir_idx_offset, squashfs_i(inode)->dir_idx_cnt, - file->f_pos); + ctx->pos); while (length < i_size_read(inode)) { /* @@ -198,7 +192,7 @@ static int squashfs_readdir(struct file *file, void *dirent, filldir_t filldir) length += sizeof(*dire) + size; - if (file->f_pos >= length) + if (ctx->pos >= length) continue; dire->name[size] = '\0'; @@ -206,22 +200,12 @@ static int squashfs_readdir(struct file *file, void *dirent, filldir_t filldir) ((short) le16_to_cpu(dire->inode_number)); type = le16_to_cpu(dire->type); - TRACE("Calling filldir(%p, %s, %d, %lld, %x:%x, %d, %d)" - "\n", dirent, dire->name, size, - file->f_pos, - le32_to_cpu(dirh.start_block), - le16_to_cpu(dire->offset), + if (!dir_emit(ctx, dire->name, size, inode_number, - squashfs_filetype_table[type]); - - if (filldir(dirent, dire->name, size, file->f_pos, - inode_number, - squashfs_filetype_table[type]) < 0) { - TRACE("Filldir returned less than 0\n"); + squashfs_filetype_table[type])) goto finish; - } - file->f_pos = length; + ctx->pos = length; } } @@ -238,6 +222,6 @@ static int squashfs_readdir(struct file *file, void *dirent, filldir_t filldir) const struct file_operations squashfs_dir_ops = { .read = generic_read_dir, - .readdir = squashfs_readdir, + .iterate = squashfs_readdir, .llseek = default_llseek, }; From c301a0e047e401d41b26db1009d08e088ae2365a Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 15 May 2013 13:52:59 -0400 Subject: [PATCH 043/204] [readdir] introduce iterate_dir() and dir_context iterate_dir(): new helper, replacing vfs_readdir(). struct dir_context: contains the readdir callback (and will get more stuff in it), embedded into whatever data that callback wants to deal with; eventually, we'll be passing it to ->readdir() replacement instead of (data,filldir) pair. Signed-off-by: Al Viro --- Documentation/filesystems/porting | 3 +++ arch/alpha/kernel/osf_sys.c | 4 +++- arch/parisc/hpux/fs.c | 4 +++- fs/compat.c | 12 +++++++++--- fs/ecryptfs/file.c | 4 +++- fs/exportfs/expfs.c | 4 +++- fs/nfsd/nfs4recover.c | 13 +++++++++---- fs/nfsd/vfs.c | 4 +++- fs/readdir.c | 21 +++++++++++++-------- include/linux/fs.h | 4 ++++ 10 files changed, 53 insertions(+), 20 deletions(-) diff --git a/Documentation/filesystems/porting b/Documentation/filesystems/porting index 4db22f6491e0..85a4a033bae7 100644 --- a/Documentation/filesystems/porting +++ b/Documentation/filesystems/porting @@ -445,3 +445,6 @@ object doesn't exist. It's remote/distributed ones that might care... [mandatory] FS_REVAL_DOT is gone; if you used to have it, add ->d_weak_revalidate() in your dentry operations instead. +-- +[mandatory] + vfs_readdir() is gone; switch to iterate_dir() instead diff --git a/arch/alpha/kernel/osf_sys.c b/arch/alpha/kernel/osf_sys.c index b9e37ad6fa19..ac19c7299d8e 100644 --- a/arch/alpha/kernel/osf_sys.c +++ b/arch/alpha/kernel/osf_sys.c @@ -96,6 +96,7 @@ struct osf_dirent { }; struct osf_dirent_callback { + struct dir_context ctx; struct osf_dirent __user *dirent; long __user *basep; unsigned int count; @@ -155,8 +156,9 @@ SYSCALL_DEFINE4(osf_getdirentries, unsigned int, fd, buf.basep = basep; buf.count = count; buf.error = 0; + buf.ctx.actor = osf_filldir; - error = vfs_readdir(arg.file, osf_filldir, &buf); + error = iterate_dir(arg.file, &buf.ctx); if (error >= 0) error = buf.error; if (count != buf.count) diff --git a/arch/parisc/hpux/fs.c b/arch/parisc/hpux/fs.c index 838b479a42c4..fc2cbee86e34 100644 --- a/arch/parisc/hpux/fs.c +++ b/arch/parisc/hpux/fs.c @@ -60,6 +60,7 @@ struct hpux_dirent { }; struct getdents_callback { + struct dir_context ctx; struct hpux_dirent __user *current_dir; struct hpux_dirent __user *previous; int count; @@ -121,8 +122,9 @@ int hpux_getdents(unsigned int fd, struct hpux_dirent __user *dirent, unsigned i buf.previous = NULL; buf.count = count; buf.error = 0; + buf.ctx.actor = filldir; - error = vfs_readdir(arg.file, filldir, &buf); + error = iterate_dir(arg.file, &buf.ctx); if (error >= 0) error = buf.error; lastdirent = buf.previous; diff --git a/fs/compat.c b/fs/compat.c index fc3b55dce184..2279b59e81f2 100644 --- a/fs/compat.c +++ b/fs/compat.c @@ -832,6 +832,7 @@ struct compat_old_linux_dirent { }; struct compat_readdir_callback { + struct dir_context ctx; struct compat_old_linux_dirent __user *dirent; int result; }; @@ -880,8 +881,9 @@ asmlinkage long compat_sys_old_readdir(unsigned int fd, buf.result = 0; buf.dirent = dirent; + buf.ctx.actor = compat_fillonedir; - error = vfs_readdir(f.file, compat_fillonedir, &buf); + error = iterate_dir(f.file, &buf.ctx); if (buf.result) error = buf.result; @@ -897,6 +899,7 @@ struct compat_linux_dirent { }; struct compat_getdents_callback { + struct dir_context ctx; struct compat_linux_dirent __user *current_dir; struct compat_linux_dirent __user *previous; int count; @@ -965,8 +968,9 @@ asmlinkage long compat_sys_getdents(unsigned int fd, buf.previous = NULL; buf.count = count; buf.error = 0; + buf.ctx.actor = compat_filldir; - error = vfs_readdir(f.file, compat_filldir, &buf); + error = iterate_dir(f.file, &buf.ctx); if (error >= 0) error = buf.error; lastdirent = buf.previous; @@ -983,6 +987,7 @@ asmlinkage long compat_sys_getdents(unsigned int fd, #ifndef __ARCH_OMIT_COMPAT_SYS_GETDENTS64 struct compat_getdents_callback64 { + struct dir_context ctx; struct linux_dirent64 __user *current_dir; struct linux_dirent64 __user *previous; int count; @@ -1050,8 +1055,9 @@ asmlinkage long compat_sys_getdents64(unsigned int fd, buf.previous = NULL; buf.count = count; buf.error = 0; + buf.ctx.actor = compat_filldir64; - error = vfs_readdir(f.file, compat_filldir64, &buf); + error = iterate_dir(f.file, &buf.ctx); if (error >= 0) error = buf.error; lastdirent = buf.previous; diff --git a/fs/ecryptfs/file.c b/fs/ecryptfs/file.c index a7abbea2c096..041379a646b3 100644 --- a/fs/ecryptfs/file.c +++ b/fs/ecryptfs/file.c @@ -68,6 +68,7 @@ static ssize_t ecryptfs_read_update_atime(struct kiocb *iocb, } struct ecryptfs_getdents_callback { + struct dir_context ctx; void *dirent; struct dentry *dentry; filldir_t filldir; @@ -126,7 +127,8 @@ static int ecryptfs_readdir(struct file *file, void *dirent, filldir_t filldir) buf.filldir = filldir; buf.filldir_called = 0; buf.entries_written = 0; - rc = vfs_readdir(lower_file, ecryptfs_filldir, (void *)&buf); + buf.ctx.actor = ecryptfs_filldir; + rc = iterate_dir(lower_file, &buf.ctx); file->f_pos = lower_file->f_pos; if (rc < 0) goto out; diff --git a/fs/exportfs/expfs.c b/fs/exportfs/expfs.c index 262fc9940982..7cb190426cec 100644 --- a/fs/exportfs/expfs.c +++ b/fs/exportfs/expfs.c @@ -212,6 +212,7 @@ reconnect_path(struct vfsmount *mnt, struct dentry *target_dir, char *nbuf) } struct getdents_callback { + struct dir_context ctx; char *name; /* name that was found. It already points to a buffer NAME_MAX+1 is size */ unsigned long ino; /* the inum we are looking for */ @@ -278,10 +279,11 @@ static int get_name(const struct path *path, char *name, struct dentry *child) buffer.ino = child->d_inode->i_ino; buffer.found = 0; buffer.sequence = 0; + buffer.ctx.actor = filldir_one; while (1) { int old_seq = buffer.sequence; - error = vfs_readdir(file, filldir_one, &buffer); + error = iterate_dir(file, &buffer.ctx); if (buffer.found) { error = 0; break; diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c index 4e9a21db867a..4f8cc6ba7c28 100644 --- a/fs/nfsd/nfs4recover.c +++ b/fs/nfsd/nfs4recover.c @@ -263,7 +263,10 @@ nfsd4_list_rec_dir(recdir_func *f, struct nfsd_net *nn) { const struct cred *original_cred; struct dentry *dir = nn->rec_file->f_path.dentry; - LIST_HEAD(names); + struct { + struct dir_context ctx; + struct list_head names; + } ctx; int status; status = nfs4_save_creds(&original_cred); @@ -276,11 +279,13 @@ nfsd4_list_rec_dir(recdir_func *f, struct nfsd_net *nn) return status; } - status = vfs_readdir(nn->rec_file, nfsd4_build_namelist, &names); + INIT_LIST_HEAD(&ctx.names); + ctx.ctx.actor = nfsd4_build_namelist; + status = iterate_dir(nn->rec_file, &ctx.ctx); mutex_lock_nested(&dir->d_inode->i_mutex, I_MUTEX_PARENT); - while (!list_empty(&names)) { + while (!list_empty(&ctx.names)) { struct name_list *entry; - entry = list_entry(names.next, struct name_list, list); + entry = list_entry(ctx.names.next, struct name_list, list); if (!status) { struct dentry *dentry; dentry = lookup_one_len(entry->name, dir, HEXDIR_LEN-1); diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index 84ce601d8063..f939ba9bf8e8 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -1912,6 +1912,7 @@ struct buffered_dirent { }; struct readdir_data { + struct dir_context ctx; char *dirent; size_t used; int full; @@ -1949,6 +1950,7 @@ static __be32 nfsd_buffered_readdir(struct file *file, filldir_t func, int size; loff_t offset; + buf.ctx.actor = nfsd_buffered_filldir; buf.dirent = (void *)__get_free_page(GFP_KERNEL); if (!buf.dirent) return nfserrno(-ENOMEM); @@ -1963,7 +1965,7 @@ static __be32 nfsd_buffered_readdir(struct file *file, filldir_t func, buf.used = 0; buf.full = 0; - host_err = vfs_readdir(file, nfsd_buffered_filldir, &buf); + host_err = iterate_dir(file, &buf.ctx); if (buf.full) host_err = 0; diff --git a/fs/readdir.c b/fs/readdir.c index fee38e04fae4..5b620a2b45e6 100644 --- a/fs/readdir.c +++ b/fs/readdir.c @@ -20,7 +20,7 @@ #include -int vfs_readdir(struct file *file, filldir_t filler, void *buf) +int iterate_dir(struct file *file, struct dir_context *ctx) { struct inode *inode = file_inode(file); int res = -ENOTDIR; @@ -37,15 +37,14 @@ int vfs_readdir(struct file *file, filldir_t filler, void *buf) res = -ENOENT; if (!IS_DEADDIR(inode)) { - res = file->f_op->readdir(file, buf, filler); + res = file->f_op->readdir(file, ctx, ctx->actor); file_accessed(file); } mutex_unlock(&inode->i_mutex); out: return res; } - -EXPORT_SYMBOL(vfs_readdir); +EXPORT_SYMBOL(iterate_dir); /* * Traditional linux readdir() handling.. @@ -66,6 +65,7 @@ struct old_linux_dirent { }; struct readdir_callback { + struct dir_context ctx; struct old_linux_dirent __user * dirent; int result; }; @@ -73,7 +73,7 @@ struct readdir_callback { static int fillonedir(void * __buf, const char * name, int namlen, loff_t offset, u64 ino, unsigned int d_type) { - struct readdir_callback * buf = (struct readdir_callback *) __buf; + struct readdir_callback *buf = (struct readdir_callback *) __buf; struct old_linux_dirent __user * dirent; unsigned long d_ino; @@ -112,10 +112,11 @@ SYSCALL_DEFINE3(old_readdir, unsigned int, fd, if (!f.file) return -EBADF; + buf.ctx.actor = fillonedir; buf.result = 0; buf.dirent = dirent; - error = vfs_readdir(f.file, fillonedir, &buf); + error = iterate_dir(f.file, &buf.ctx); if (buf.result) error = buf.result; @@ -137,6 +138,7 @@ struct linux_dirent { }; struct getdents_callback { + struct dir_context ctx; struct linux_dirent __user * current_dir; struct linux_dirent __user * previous; int count; @@ -205,8 +207,9 @@ SYSCALL_DEFINE3(getdents, unsigned int, fd, buf.previous = NULL; buf.count = count; buf.error = 0; + buf.ctx.actor = filldir; - error = vfs_readdir(f.file, filldir, &buf); + error = iterate_dir(f.file, &buf.ctx); if (error >= 0) error = buf.error; lastdirent = buf.previous; @@ -221,6 +224,7 @@ SYSCALL_DEFINE3(getdents, unsigned int, fd, } struct getdents_callback64 { + struct dir_context ctx; struct linux_dirent64 __user * current_dir; struct linux_dirent64 __user * previous; int count; @@ -285,8 +289,9 @@ SYSCALL_DEFINE3(getdents64, unsigned int, fd, buf.previous = NULL; buf.count = count; buf.error = 0; + buf.ctx.actor = filldir64; - error = vfs_readdir(f.file, filldir64, &buf); + error = iterate_dir(f.file, &buf.ctx); if (error >= 0) error = buf.error; lastdirent = buf.previous; diff --git a/include/linux/fs.h b/include/linux/fs.h index 65c2be22b601..643e5b6cbaf5 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1506,6 +1506,9 @@ int fiemap_check_flags(struct fiemap_extent_info *fieinfo, u32 fs_flags); * to have different dirent layouts depending on the binary type. */ typedef int (*filldir_t)(void *, const char *, int, loff_t, u64, unsigned); +struct dir_context { + filldir_t actor; +}; struct block_device_operations; /* These macros are for out of kernel modules to test that @@ -2494,6 +2497,7 @@ loff_t inode_get_bytes(struct inode *inode); void inode_set_bytes(struct inode *inode, loff_t bytes); extern int vfs_readdir(struct file *, filldir_t, void *); +extern int iterate_dir(struct file *, struct dir_context *); extern int vfs_stat(const char __user *, struct kstat *); extern int vfs_lstat(const char __user *, struct kstat *); From 83fd542759010949ac7d9638b615fac1bb9744e1 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 15 May 2013 18:49:12 -0400 Subject: [PATCH 044/204] [readdir] introduce ->iterate(), ctx->pos, dir_emit() New method - ->iterate(file, ctx). That's the replacement for ->readdir(); it takes callback from ctx->actor, uses ctx->pos instead of file->f_pos and calls dir_emit(ctx, ...) instead of filldir(data, ...). It does *not* update file->f_pos (or look at it, for that matter); iterate_dir() does the update. Note that dir_emit() takes the offset from ctx->pos (and eventually filldir_t will lose that argument). Signed-off-by: Al Viro --- arch/parisc/hpux/fs.c | 2 +- fs/coda/dir.c | 19 +++++++++++++++---- fs/compat.c | 4 ++-- fs/exportfs/expfs.c | 2 +- fs/nfsd/nfs4recover.c | 14 ++++++++------ fs/readdir.c | 15 +++++++++++---- include/linux/fs.h | 9 +++++++++ 7 files changed, 47 insertions(+), 18 deletions(-) diff --git a/arch/parisc/hpux/fs.c b/arch/parisc/hpux/fs.c index fc2cbee86e34..eca8230267cc 100644 --- a/arch/parisc/hpux/fs.c +++ b/arch/parisc/hpux/fs.c @@ -129,7 +129,7 @@ int hpux_getdents(unsigned int fd, struct hpux_dirent __user *dirent, unsigned i error = buf.error; lastdirent = buf.previous; if (lastdirent) { - if (put_user(arg.file->f_pos, &lastdirent->d_off)) + if (put_user(buf.ctx.pos, &lastdirent->d_off)) error = -EFAULT; else error = count - buf.count; diff --git a/fs/coda/dir.c b/fs/coda/dir.c index b7d3a05c062c..fc66861b3598 100644 --- a/fs/coda/dir.c +++ b/fs/coda/dir.c @@ -391,8 +391,7 @@ static int coda_readdir(struct file *coda_file, void *buf, filldir_t filldir) if (!host_file->f_op) return -ENOTDIR; - if (host_file->f_op->readdir) - { + if (host_file->f_op->readdir) { /* potemkin case: we were handed a directory inode. * We can't use vfs_readdir because we have to keep the file * position in sync between the coda_file and the host_file. @@ -410,8 +409,20 @@ static int coda_readdir(struct file *coda_file, void *buf, filldir_t filldir) coda_file->f_pos = host_file->f_pos; mutex_unlock(&host_inode->i_mutex); - } - else /* Venus: we must read Venus dirents from a file */ + } else if (host_file->f_op->iterate) { + struct inode *host_inode = file_inode(host_file); + struct dir_context *ctx = buf; + + mutex_lock(&host_inode->i_mutex); + ret = -ENOENT; + if (!IS_DEADDIR(host_inode)) { + ret = host_file->f_op->iterate(host_file, ctx); + file_accessed(host_file); + } + mutex_unlock(&host_inode->i_mutex); + + coda_file->f_pos = ctx->pos; + } else /* Venus: we must read Venus dirents from a file */ ret = coda_venus_readdir(coda_file, buf, filldir); return ret; diff --git a/fs/compat.c b/fs/compat.c index 2279b59e81f2..69ca1e301766 100644 --- a/fs/compat.c +++ b/fs/compat.c @@ -975,7 +975,7 @@ asmlinkage long compat_sys_getdents(unsigned int fd, error = buf.error; lastdirent = buf.previous; if (lastdirent) { - if (put_user(f.file->f_pos, &lastdirent->d_off)) + if (put_user(buf.ctx.pos, &lastdirent->d_off)) error = -EFAULT; else error = count - buf.count; @@ -1062,7 +1062,7 @@ asmlinkage long compat_sys_getdents64(unsigned int fd, error = buf.error; lastdirent = buf.previous; if (lastdirent) { - typeof(lastdirent->d_off) d_off = f.file->f_pos; + typeof(lastdirent->d_off) d_off = buf.ctx.pos; if (__put_user_unaligned(d_off, &lastdirent->d_off)) error = -EFAULT; else diff --git a/fs/exportfs/expfs.c b/fs/exportfs/expfs.c index 7cb190426cec..6c8ef1dd4bdf 100644 --- a/fs/exportfs/expfs.c +++ b/fs/exportfs/expfs.c @@ -272,7 +272,7 @@ static int get_name(const struct path *path, char *name, struct dentry *child) goto out; error = -EINVAL; - if (!file->f_op->readdir) + if (!file->f_op->readdir && !file->f_op->iterate) goto out_close; buffer.name = name; diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c index 4f8cc6ba7c28..2fa2e2eb190b 100644 --- a/fs/nfsd/nfs4recover.c +++ b/fs/nfsd/nfs4recover.c @@ -240,11 +240,16 @@ struct name_list { struct list_head list; }; +struct nfs4_dir_ctx { + struct dir_context ctx; + struct list_head names; +}; + static int nfsd4_build_namelist(void *arg, const char *name, int namlen, loff_t offset, u64 ino, unsigned int d_type) { - struct list_head *names = arg; + struct nfs4_dir_ctx *ctx = arg; struct name_list *entry; if (namlen != HEXDIR_LEN - 1) @@ -254,7 +259,7 @@ nfsd4_build_namelist(void *arg, const char *name, int namlen, return -ENOMEM; memcpy(entry->name, name, HEXDIR_LEN - 1); entry->name[HEXDIR_LEN - 1] = '\0'; - list_add(&entry->list, names); + list_add(&entry->list, &ctx->names); return 0; } @@ -263,10 +268,7 @@ nfsd4_list_rec_dir(recdir_func *f, struct nfsd_net *nn) { const struct cred *original_cred; struct dentry *dir = nn->rec_file->f_path.dentry; - struct { - struct dir_context ctx; - struct list_head names; - } ctx; + struct nfs4_dir_ctx ctx; int status; status = nfs4_save_creds(&original_cred); diff --git a/fs/readdir.c b/fs/readdir.c index 5b620a2b45e6..5d6578affbbf 100644 --- a/fs/readdir.c +++ b/fs/readdir.c @@ -24,7 +24,7 @@ int iterate_dir(struct file *file, struct dir_context *ctx) { struct inode *inode = file_inode(file); int res = -ENOTDIR; - if (!file->f_op || !file->f_op->readdir) + if (!file->f_op || (!file->f_op->readdir && !file->f_op->iterate)) goto out; res = security_file_permission(file, MAY_READ); @@ -37,7 +37,14 @@ int iterate_dir(struct file *file, struct dir_context *ctx) res = -ENOENT; if (!IS_DEADDIR(inode)) { - res = file->f_op->readdir(file, ctx, ctx->actor); + if (file->f_op->iterate) { + ctx->pos = file->f_pos; + res = file->f_op->iterate(file, ctx); + file->f_pos = ctx->pos; + } else { + res = file->f_op->readdir(file, ctx, ctx->actor); + ctx->pos = file->f_pos; + } file_accessed(file); } mutex_unlock(&inode->i_mutex); @@ -214,7 +221,7 @@ SYSCALL_DEFINE3(getdents, unsigned int, fd, error = buf.error; lastdirent = buf.previous; if (lastdirent) { - if (put_user(f.file->f_pos, &lastdirent->d_off)) + if (put_user(buf.ctx.pos, &lastdirent->d_off)) error = -EFAULT; else error = count - buf.count; @@ -296,7 +303,7 @@ SYSCALL_DEFINE3(getdents64, unsigned int, fd, error = buf.error; lastdirent = buf.previous; if (lastdirent) { - typeof(lastdirent->d_off) d_off = f.file->f_pos; + typeof(lastdirent->d_off) d_off = buf.ctx.pos; if (__put_user(d_off, &lastdirent->d_off)) error = -EFAULT; else diff --git a/include/linux/fs.h b/include/linux/fs.h index 643e5b6cbaf5..b9641ae68da8 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1508,7 +1508,15 @@ int fiemap_check_flags(struct fiemap_extent_info *fieinfo, u32 fs_flags); typedef int (*filldir_t)(void *, const char *, int, loff_t, u64, unsigned); struct dir_context { filldir_t actor; + loff_t pos; }; + +static inline bool dir_emit(struct dir_context *ctx, + const char *name, int namelen, + u64 ino, unsigned type) +{ + return ctx->actor(ctx, name, namelen, ctx->pos, ino, type) == 0; +} struct block_device_operations; /* These macros are for out of kernel modules to test that @@ -1525,6 +1533,7 @@ struct file_operations { ssize_t (*aio_read) (struct kiocb *, const struct iovec *, unsigned long, loff_t); ssize_t (*aio_write) (struct kiocb *, const struct iovec *, unsigned long, loff_t); int (*readdir) (struct file *, void *, filldir_t); + int (*iterate) (struct file *, struct dir_context *); unsigned int (*poll) (struct file *, struct poll_table_struct *); long (*unlocked_ioctl) (struct file *, unsigned int, unsigned long); long (*compat_ioctl) (struct file *, unsigned int, unsigned long); From b2497fc3057ae27db9aa29579f16ae5afb6d6d08 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 22 May 2013 22:22:04 -0400 Subject: [PATCH 045/204] [readdir] constify ->actor Signed-off-by: Al Viro --- arch/alpha/kernel/osf_sys.c | 13 ++++++------- arch/parisc/hpux/fs.c | 12 +++++------- fs/compat.c | 33 ++++++++++++++------------------- fs/exportfs/expfs.c | 10 +++++----- fs/gfs2/export.c | 6 ++++-- fs/nfsd/nfs4recover.c | 7 ++++--- fs/nfsd/vfs.c | 7 ++++--- fs/readdir.c | 33 ++++++++++++++------------------- include/linux/fs.h | 2 +- 9 files changed, 57 insertions(+), 66 deletions(-) diff --git a/arch/alpha/kernel/osf_sys.c b/arch/alpha/kernel/osf_sys.c index ac19c7299d8e..1402fcc11c2c 100644 --- a/arch/alpha/kernel/osf_sys.c +++ b/arch/alpha/kernel/osf_sys.c @@ -147,17 +147,16 @@ SYSCALL_DEFINE4(osf_getdirentries, unsigned int, fd, { int error; struct fd arg = fdget(fd); - struct osf_dirent_callback buf; + struct osf_dirent_callback buf = { + .ctx.actor = osf_filldir, + .dirent = dirent, + .basep = basep, + .count = count + }; if (!arg.file) return -EBADF; - buf.dirent = dirent; - buf.basep = basep; - buf.count = count; - buf.error = 0; - buf.ctx.actor = osf_filldir; - error = iterate_dir(arg.file, &buf.ctx); if (error >= 0) error = buf.error; diff --git a/arch/parisc/hpux/fs.c b/arch/parisc/hpux/fs.c index eca8230267cc..88d0962de65a 100644 --- a/arch/parisc/hpux/fs.c +++ b/arch/parisc/hpux/fs.c @@ -111,19 +111,17 @@ int hpux_getdents(unsigned int fd, struct hpux_dirent __user *dirent, unsigned i { struct fd arg; struct hpux_dirent __user * lastdirent; - struct getdents_callback buf; + struct getdents_callback buf = { + .ctx.actor = filldir, + .current_dir = dirent, + .count = count + }; int error; arg = fdget(fd); if (!arg.file) return -EBADF; - buf.current_dir = dirent; - buf.previous = NULL; - buf.count = count; - buf.error = 0; - buf.ctx.actor = filldir; - error = iterate_dir(arg.file, &buf.ctx); if (error >= 0) error = buf.error; diff --git a/fs/compat.c b/fs/compat.c index 69ca1e301766..6af20de2c1a3 100644 --- a/fs/compat.c +++ b/fs/compat.c @@ -874,15 +874,14 @@ asmlinkage long compat_sys_old_readdir(unsigned int fd, { int error; struct fd f = fdget(fd); - struct compat_readdir_callback buf; + struct compat_readdir_callback buf = { + .ctx.actor = compat_fillonedir, + .dirent = dirent + }; if (!f.file) return -EBADF; - buf.result = 0; - buf.dirent = dirent; - buf.ctx.actor = compat_fillonedir; - error = iterate_dir(f.file, &buf.ctx); if (buf.result) error = buf.result; @@ -954,7 +953,11 @@ asmlinkage long compat_sys_getdents(unsigned int fd, { struct fd f; struct compat_linux_dirent __user * lastdirent; - struct compat_getdents_callback buf; + struct compat_getdents_callback buf = { + .ctx.actor = compat_filldir, + .current_dir = dirent, + .count = count + }; int error; if (!access_ok(VERIFY_WRITE, dirent, count)) @@ -964,12 +967,6 @@ asmlinkage long compat_sys_getdents(unsigned int fd, if (!f.file) return -EBADF; - buf.current_dir = dirent; - buf.previous = NULL; - buf.count = count; - buf.error = 0; - buf.ctx.actor = compat_filldir; - error = iterate_dir(f.file, &buf.ctx); if (error >= 0) error = buf.error; @@ -1041,7 +1038,11 @@ asmlinkage long compat_sys_getdents64(unsigned int fd, { struct fd f; struct linux_dirent64 __user * lastdirent; - struct compat_getdents_callback64 buf; + struct compat_getdents_callback64 buf = { + .ctx.actor = compat_filldir64, + .current_dir = dirent, + .count = count + }; int error; if (!access_ok(VERIFY_WRITE, dirent, count)) @@ -1051,12 +1052,6 @@ asmlinkage long compat_sys_getdents64(unsigned int fd, if (!f.file) return -EBADF; - buf.current_dir = dirent; - buf.previous = NULL; - buf.count = count; - buf.error = 0; - buf.ctx.actor = compat_filldir64; - error = iterate_dir(f.file, &buf.ctx); if (error >= 0) error = buf.error; diff --git a/fs/exportfs/expfs.c b/fs/exportfs/expfs.c index 6c8ef1dd4bdf..43b448ddc3dc 100644 --- a/fs/exportfs/expfs.c +++ b/fs/exportfs/expfs.c @@ -255,7 +255,11 @@ static int get_name(const struct path *path, char *name, struct dentry *child) struct inode *dir = path->dentry->d_inode; int error; struct file *file; - struct getdents_callback buffer; + struct getdents_callback buffer = { + .ctx.actor = filldir_one, + .name = name, + .ino = child->d_inode->i_ino + }; error = -ENOTDIR; if (!dir || !S_ISDIR(dir->i_mode)) @@ -275,11 +279,7 @@ static int get_name(const struct path *path, char *name, struct dentry *child) if (!file->f_op->readdir && !file->f_op->iterate) goto out_close; - buffer.name = name; - buffer.ino = child->d_inode->i_ino; - buffer.found = 0; buffer.sequence = 0; - buffer.ctx.actor = filldir_one; while (1) { int old_seq = buffer.sequence; diff --git a/fs/gfs2/export.c b/fs/gfs2/export.c index 9973df4ff565..7102c7a5af4d 100644 --- a/fs/gfs2/export.c +++ b/fs/gfs2/export.c @@ -88,7 +88,10 @@ static int gfs2_get_name(struct dentry *parent, char *name, struct inode *dir = parent->d_inode; struct inode *inode = child->d_inode; struct gfs2_inode *dip, *ip; - struct get_name_filldir gnfd; + struct get_name_filldir gnfd = { + .ctx.actor = get_name_filldir, + .name = name + }; struct gfs2_holder gh; u64 offset = 0; int error; @@ -106,7 +109,6 @@ static int gfs2_get_name(struct dentry *parent, char *name, *name = 0; gnfd.inum.no_addr = ip->i_no_addr; gnfd.inum.no_formal_ino = ip->i_no_formal_ino; - gnfd.name = name; error = gfs2_glock_nq_init(dip->i_gl, LM_ST_SHARED, 0, &gh); if (error) diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c index 2fa2e2eb190b..105a3b080d12 100644 --- a/fs/nfsd/nfs4recover.c +++ b/fs/nfsd/nfs4recover.c @@ -268,7 +268,10 @@ nfsd4_list_rec_dir(recdir_func *f, struct nfsd_net *nn) { const struct cred *original_cred; struct dentry *dir = nn->rec_file->f_path.dentry; - struct nfs4_dir_ctx ctx; + struct nfs4_dir_ctx ctx = { + .ctx.actor = nfsd4_build_namelist, + .names = LIST_HEAD_INIT(ctx.names) + }; int status; status = nfs4_save_creds(&original_cred); @@ -281,8 +284,6 @@ nfsd4_list_rec_dir(recdir_func *f, struct nfsd_net *nn) return status; } - INIT_LIST_HEAD(&ctx.names); - ctx.ctx.actor = nfsd4_build_namelist; status = iterate_dir(nn->rec_file, &ctx.ctx); mutex_lock_nested(&dir->d_inode->i_mutex, I_MUTEX_PARENT); while (!list_empty(&ctx.names)) { diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index f939ba9bf8e8..a6bc8a7423db 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -1944,14 +1944,15 @@ static int nfsd_buffered_filldir(void *__buf, const char *name, int namlen, static __be32 nfsd_buffered_readdir(struct file *file, filldir_t func, struct readdir_cd *cdp, loff_t *offsetp) { - struct readdir_data buf; struct buffered_dirent *de; int host_err; int size; loff_t offset; + struct readdir_data buf = { + .ctx.actor = nfsd_buffered_filldir, + .dirent = (void *)__get_free_page(GFP_KERNEL) + }; - buf.ctx.actor = nfsd_buffered_filldir; - buf.dirent = (void *)__get_free_page(GFP_KERNEL); if (!buf.dirent) return nfserrno(-ENOMEM); diff --git a/fs/readdir.c b/fs/readdir.c index 5d6578affbbf..d46eca8567a4 100644 --- a/fs/readdir.c +++ b/fs/readdir.c @@ -114,15 +114,14 @@ SYSCALL_DEFINE3(old_readdir, unsigned int, fd, { int error; struct fd f = fdget(fd); - struct readdir_callback buf; + struct readdir_callback buf = { + .ctx.actor = fillonedir, + .dirent = dirent + }; if (!f.file) return -EBADF; - buf.ctx.actor = fillonedir; - buf.result = 0; - buf.dirent = dirent; - error = iterate_dir(f.file, &buf.ctx); if (buf.result) error = buf.result; @@ -200,7 +199,11 @@ SYSCALL_DEFINE3(getdents, unsigned int, fd, { struct fd f; struct linux_dirent __user * lastdirent; - struct getdents_callback buf; + struct getdents_callback buf = { + .ctx.actor = filldir, + .count = count, + .current_dir = dirent + }; int error; if (!access_ok(VERIFY_WRITE, dirent, count)) @@ -210,12 +213,6 @@ SYSCALL_DEFINE3(getdents, unsigned int, fd, if (!f.file) return -EBADF; - buf.current_dir = dirent; - buf.previous = NULL; - buf.count = count; - buf.error = 0; - buf.ctx.actor = filldir; - error = iterate_dir(f.file, &buf.ctx); if (error >= 0) error = buf.error; @@ -282,7 +279,11 @@ SYSCALL_DEFINE3(getdents64, unsigned int, fd, { struct fd f; struct linux_dirent64 __user * lastdirent; - struct getdents_callback64 buf; + struct getdents_callback64 buf = { + .ctx.actor = filldir64, + .count = count, + .current_dir = dirent + }; int error; if (!access_ok(VERIFY_WRITE, dirent, count)) @@ -292,12 +293,6 @@ SYSCALL_DEFINE3(getdents64, unsigned int, fd, if (!f.file) return -EBADF; - buf.current_dir = dirent; - buf.previous = NULL; - buf.count = count; - buf.error = 0; - buf.ctx.actor = filldir64; - error = iterate_dir(f.file, &buf.ctx); if (error >= 0) error = buf.error; diff --git a/include/linux/fs.h b/include/linux/fs.h index b9641ae68da8..9257703686d4 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1507,7 +1507,7 @@ int fiemap_check_flags(struct fiemap_extent_info *fieinfo, u32 fs_flags); */ typedef int (*filldir_t)(void *, const char *, int, loff_t, u64, unsigned); struct dir_context { - filldir_t actor; + const filldir_t actor; loff_t pos; }; From 639839d89f7b7fd15160389f57edf06b93205b2d Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 17 Jul 2013 15:20:25 +0300 Subject: [PATCH 046/204] Squashfs: sanity check information from disk We read the size of the name from the disk, but a larger name than expected would cause memory corruption. Signed-off-by: Dan Carpenter Signed-off-by: Phillip Lougher --- fs/squashfs/namei.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/fs/squashfs/namei.c b/fs/squashfs/namei.c index 7834a517f7f4..f866d42a8b6f 100644 --- a/fs/squashfs/namei.c +++ b/fs/squashfs/namei.c @@ -79,7 +79,8 @@ static int get_dir_index_using_name(struct super_block *sb, int len) { struct squashfs_sb_info *msblk = sb->s_fs_info; - int i, size, length = 0, err; + int i, length = 0, err; + unsigned int size; struct squashfs_dir_index *index; char *str; @@ -103,6 +104,10 @@ static int get_dir_index_using_name(struct super_block *sb, size = le32_to_cpu(index->size) + 1; + if (size > SQUASHFS_NAME_LEN) { + err = -EINVAL; + break; + } err = squashfs_read_metadata(sb, index->name, &index_start, &index_offset, size); From 706a7943a802fedc861d39598a4399999209e341 Mon Sep 17 00:00:00 2001 From: Manish Sharma Date: Wed, 4 Sep 2013 22:31:23 +0530 Subject: [PATCH 047/204] Squashfs: Optimized uncompressed buffer loop Merged the two for loops. We might get a little gain by overlapping wait_on_bh and the memcpy operations. Signed-off-by: Manish Sharma Signed-off-by: Phillip Lougher --- fs/squashfs/block.c | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/fs/squashfs/block.c b/fs/squashfs/block.c index fb50652e4e11..41d108ecc9be 100644 --- a/fs/squashfs/block.c +++ b/fs/squashfs/block.c @@ -167,17 +167,14 @@ int squashfs_read_data(struct super_block *sb, void **buffer, u64 index, /* * Block is uncompressed. */ - int i, in, pg_offset = 0; - - for (i = 0; i < b; i++) { - wait_on_buffer(bh[i]); - if (!buffer_uptodate(bh[i])) - goto block_release; - } + int in, pg_offset = 0; for (bytes = length; k < b; k++) { in = min(bytes, msblk->devblksize - offset); bytes -= in; + wait_on_buffer(bh[k]); + if (!buffer_uptodate(bh[k])) + goto block_release; while (in) { if (pg_offset == PAGE_CACHE_SIZE) { page++; From edd2496ccf4a46cfb4c2d20ff9b93ab29b055e1b Mon Sep 17 00:00:00 2001 From: Phillip Lougher Date: Tue, 3 Sep 2013 04:02:53 +0100 Subject: [PATCH 048/204] Squashfs: fix corruption check in get_dir_index_using_name() Patch "Squashfs: sanity check information from disk" from Dan Carpenter adds a missing check for corruption in the "size" field while reading the directory index from disk. It, however, sets err to -EINVAL, this value is not used later, and so setting it is completely redundant. So remove it. Errors in reading the index are deliberately non-fatal. If we get an error in reading the index we just return the part of the index we have managed to read - the index isn't essential, just quicker. Signed-off-by: Phillip Lougher --- fs/squashfs/namei.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/fs/squashfs/namei.c b/fs/squashfs/namei.c index f866d42a8b6f..342a5aa5a0e4 100644 --- a/fs/squashfs/namei.c +++ b/fs/squashfs/namei.c @@ -104,10 +104,8 @@ static int get_dir_index_using_name(struct super_block *sb, size = le32_to_cpu(index->size) + 1; - if (size > SQUASHFS_NAME_LEN) { - err = -EINVAL; + if (size > SQUASHFS_NAME_LEN) break; - } err = squashfs_read_metadata(sb, index->name, &index_start, &index_offset, size); From 98bdbcf71ac52afdbf832489c6077790689c9018 Mon Sep 17 00:00:00 2001 From: Phillip Lougher Date: Tue, 3 Sep 2013 04:21:52 +0100 Subject: [PATCH 049/204] Squashfs: fix corruption checks in squashfs_lookup() The dir_count and size fields when read from disk are sanity checked for correctness. However, the sanity checks only check the values are not greater than expected. As dir_count and size were incorrectly defined as signed ints, this can lead to corrupted values appearing as negative which are not trapped. Signed-off-by: Phillip Lougher --- fs/squashfs/namei.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/squashfs/namei.c b/fs/squashfs/namei.c index 342a5aa5a0e4..67cad77fefb4 100644 --- a/fs/squashfs/namei.c +++ b/fs/squashfs/namei.c @@ -147,7 +147,8 @@ static struct dentry *squashfs_lookup(struct inode *dir, struct dentry *dentry, struct squashfs_dir_entry *dire; u64 block = squashfs_i(dir)->start + msblk->directory_table; int offset = squashfs_i(dir)->offset; - int err, length, dir_count, size; + int err, length; + unsigned int dir_count, size; TRACE("Entered squashfs_lookup [%llx:%x]\n", block, offset); From 4ff8d316da73165acdda9682ac0413b2a141c6d6 Mon Sep 17 00:00:00 2001 From: Phillip Lougher Date: Tue, 3 Sep 2013 04:38:43 +0100 Subject: [PATCH 050/204] Squashfs: fix corruption checks in squashfs_readdir() The dir_count and size fields when read from disk are sanity checked for correctness. However, the sanity checks only check the values are not greater than expected. As dir_count and size were incorrectly defined as signed ints, this can lead to corrupted values appearing as negative which are not trapped. Signed-off-by: Phillip Lougher --- fs/squashfs/dir.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/fs/squashfs/dir.c b/fs/squashfs/dir.c index f7f527bf8c10..119208422260 100644 --- a/fs/squashfs/dir.c +++ b/fs/squashfs/dir.c @@ -105,9 +105,8 @@ static int squashfs_readdir(struct file *file, struct dir_context *ctx) struct inode *inode = file_inode(file); struct squashfs_sb_info *msblk = inode->i_sb->s_fs_info; u64 block = squashfs_i(inode)->start + msblk->directory_table; - int offset = squashfs_i(inode)->offset, length, dir_count, size, - type, err; - unsigned int inode_number; + int offset = squashfs_i(inode)->offset, length, type, err; + unsigned int inode_number, dir_count, size; struct squashfs_dir_header dirh; struct squashfs_dir_entry *dire; From 4fed250b48dc5033a0fcada7aa2ca77f507c7ddc Mon Sep 17 00:00:00 2001 From: Phillip Lougher Date: Tue, 3 Sep 2013 04:52:52 +0100 Subject: [PATCH 051/204] Squashfs: add corruption check in get_dir_index_using_offset() We read the size (of the name) field from disk. This value should be sanity checked for correctness to avoid blindly reading huge amounts of unnecessary data from disk on corruption. Note, here we're not actually reading the name into a buffer, but skipping it, and so corruption doesn't cause buffer overflow, merely lots of unnecessary amounts of data to be read. Signed-off-by: Phillip Lougher --- fs/squashfs/dir.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/fs/squashfs/dir.c b/fs/squashfs/dir.c index 119208422260..bd7155b198a9 100644 --- a/fs/squashfs/dir.c +++ b/fs/squashfs/dir.c @@ -54,6 +54,7 @@ static int get_dir_index_using_offset(struct super_block *sb, { struct squashfs_sb_info *msblk = sb->s_fs_info; int err, i, index, length = 0; + unsigned int size; struct squashfs_dir_index dir_index; TRACE("Entered get_dir_index_using_offset, i_count %d, f_pos %lld\n", @@ -81,8 +82,14 @@ static int get_dir_index_using_offset(struct super_block *sb, */ break; + size = le32_to_cpu(dir_index.size) + 1; + + /* size should never be larger than SQUASHFS_NAME_LEN */ + if (size > SQUASHFS_NAME_LEN) + break; + err = squashfs_read_metadata(sb, NULL, &index_start, - &index_offset, le32_to_cpu(dir_index.size) + 1); + &index_offset, size); if (err < 0) break; From bb5919cc205072075aab3639a9c62302db148d25 Mon Sep 17 00:00:00 2001 From: Phillip Lougher Date: Wed, 4 Sep 2013 02:58:12 +0100 Subject: [PATCH 052/204] Squashfs: add corruption check for type in squashfs_readdir() We read the type field from disk. This value should be sanity checked for correctness to avoid an out of bounds access when reading the squashfs_filetype_table array. Signed-off-by: Phillip Lougher --- fs/squashfs/dir.c | 7 +++++-- fs/squashfs/squashfs_fs.h | 5 ++++- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/fs/squashfs/dir.c b/fs/squashfs/dir.c index bd7155b198a9..d8c2d747be28 100644 --- a/fs/squashfs/dir.c +++ b/fs/squashfs/dir.c @@ -112,8 +112,8 @@ static int squashfs_readdir(struct file *file, struct dir_context *ctx) struct inode *inode = file_inode(file); struct squashfs_sb_info *msblk = inode->i_sb->s_fs_info; u64 block = squashfs_i(inode)->start + msblk->directory_table; - int offset = squashfs_i(inode)->offset, length, type, err; - unsigned int inode_number, dir_count, size; + int offset = squashfs_i(inode)->offset, length, err; + unsigned int inode_number, dir_count, size, type; struct squashfs_dir_header dirh; struct squashfs_dir_entry *dire; @@ -206,6 +206,9 @@ static int squashfs_readdir(struct file *file, struct dir_context *ctx) ((short) le16_to_cpu(dire->inode_number)); type = le16_to_cpu(dire->type); + if (type > SQUASHFS_MAX_DIR_TYPE) + goto failed_read; + if (!dir_emit(ctx, dire->name, size, inode_number, squashfs_filetype_table[type])) diff --git a/fs/squashfs/squashfs_fs.h b/fs/squashfs/squashfs_fs.h index 9e2349d07cb1..4b2beda49498 100644 --- a/fs/squashfs/squashfs_fs.h +++ b/fs/squashfs/squashfs_fs.h @@ -87,7 +87,7 @@ #define SQUASHFS_COMP_OPTS(flags) SQUASHFS_BIT(flags, \ SQUASHFS_COMP_OPT) -/* Max number of types and file types */ +/* Inode types including extended types */ #define SQUASHFS_DIR_TYPE 1 #define SQUASHFS_REG_TYPE 2 #define SQUASHFS_SYMLINK_TYPE 3 @@ -103,6 +103,9 @@ #define SQUASHFS_LFIFO_TYPE 13 #define SQUASHFS_LSOCKET_TYPE 14 +/* Max type value stored in directory entry */ +#define SQUASHFS_MAX_DIR_TYPE 7 + /* Xattr types */ #define SQUASHFS_XATTR_USER 0 #define SQUASHFS_XATTR_TRUSTED 1 From 06e4ac0b0a206086bff4cb34c07f5a87e87aef4c Mon Sep 17 00:00:00 2001 From: Phillip Lougher Date: Wed, 13 Nov 2013 02:56:26 +0000 Subject: [PATCH 053/204] Squashfs: Refactor decompressor interface and code The decompressor interface and code was written from the point of view of single-threaded operation. In doing so it mixed a lot of single-threaded implementation specific aspects into the decompressor code and elsewhere which makes it difficult to seamlessly support multiple different decompressor implementations. This patch does the following: 1. It removes compressor_options parsing from the decompressor init() function. This allows the decompressor init() function to be dynamically called to instantiate multiple decompressors, without the compressor options needing to be read and parsed each time. 2. It moves threading and all sleeping operations out of the decompressors. In doing so, it makes the decompressors non-blocking wrappers which only deal with interfacing with the decompressor implementation. 3. It splits decompressor.[ch] into decompressor generic functions in decompressor.[ch], and moves the single threaded decompressor implementation into decompressor_single.c. The result of this patch is Squashfs should now be able to support multiple decompressors by adding new decompressor_xxx.c files with specialised implementations of the functions in decompressor_single.c Signed-off-by: Phillip Lougher Reviewed-by: Minchan Kim --- fs/squashfs/Makefile | 2 +- fs/squashfs/block.c | 11 ++-- fs/squashfs/decompressor.c | 49 +++++++++++------ fs/squashfs/decompressor.h | 21 +++----- fs/squashfs/decompressor_single.c | 86 +++++++++++++++++++++++++++++ fs/squashfs/lzo_wrapper.c | 24 ++------- fs/squashfs/squashfs.h | 9 +++- fs/squashfs/squashfs_fs_sb.h | 3 +- fs/squashfs/super.c | 10 ++-- fs/squashfs/xz_wrapper.c | 89 +++++++++++++++++-------------- fs/squashfs/zlib_wrapper.c | 50 ++++++----------- 11 files changed, 217 insertions(+), 137 deletions(-) create mode 100644 fs/squashfs/decompressor_single.c diff --git a/fs/squashfs/Makefile b/fs/squashfs/Makefile index 110b0476f3b4..c223c8439c21 100644 --- a/fs/squashfs/Makefile +++ b/fs/squashfs/Makefile @@ -4,7 +4,7 @@ obj-$(CONFIG_SQUASHFS) += squashfs.o squashfs-y += block.o cache.o dir.o export.o file.o fragment.o id.o inode.o -squashfs-y += namei.o super.o symlink.o decompressor.o +squashfs-y += namei.o super.o symlink.o decompressor.o decompressor_single.o squashfs-$(CONFIG_SQUASHFS_XATTR) += xattr.o xattr_id.o squashfs-$(CONFIG_SQUASHFS_LZO) += lzo_wrapper.o squashfs-$(CONFIG_SQUASHFS_XZ) += xz_wrapper.o diff --git a/fs/squashfs/block.c b/fs/squashfs/block.c index 41d108ecc9be..4dd402597f22 100644 --- a/fs/squashfs/block.c +++ b/fs/squashfs/block.c @@ -93,7 +93,7 @@ int squashfs_read_data(struct super_block *sb, void **buffer, u64 index, struct buffer_head **bh; int offset = index & ((1 << msblk->devblksize_log2) - 1); u64 cur_index = index >> msblk->devblksize_log2; - int bytes, compressed, b = 0, k = 0, page = 0, avail; + int bytes, compressed, b = 0, k = 0, page = 0, avail, i; bh = kcalloc(((srclength + msblk->devblksize - 1) >> msblk->devblksize_log2) + 1, sizeof(*bh), GFP_KERNEL); @@ -158,6 +158,12 @@ int squashfs_read_data(struct super_block *sb, void **buffer, u64 index, ll_rw_block(READ, b - 1, bh + 1); } + for (i = 0; i < b; i++) { + wait_on_buffer(bh[i]); + if (!buffer_uptodate(bh[i])) + goto block_release; + } + if (compressed) { length = squashfs_decompress(msblk, buffer, bh, b, offset, length, srclength, pages); @@ -172,9 +178,6 @@ int squashfs_read_data(struct super_block *sb, void **buffer, u64 index, for (bytes = length; k < b; k++) { in = min(bytes, msblk->devblksize - offset); bytes -= in; - wait_on_buffer(bh[k]); - if (!buffer_uptodate(bh[k])) - goto block_release; while (in) { if (pg_offset == PAGE_CACHE_SIZE) { page++; diff --git a/fs/squashfs/decompressor.c b/fs/squashfs/decompressor.c index 3f6271d86abc..234291f79ba5 100644 --- a/fs/squashfs/decompressor.c +++ b/fs/squashfs/decompressor.c @@ -37,29 +37,29 @@ */ static const struct squashfs_decompressor squashfs_lzma_unsupported_comp_ops = { - NULL, NULL, NULL, LZMA_COMPRESSION, "lzma", 0 + NULL, NULL, NULL, NULL, LZMA_COMPRESSION, "lzma", 0 }; #ifndef CONFIG_SQUASHFS_LZO static const struct squashfs_decompressor squashfs_lzo_comp_ops = { - NULL, NULL, NULL, LZO_COMPRESSION, "lzo", 0 + NULL, NULL, NULL, NULL, LZO_COMPRESSION, "lzo", 0 }; #endif #ifndef CONFIG_SQUASHFS_XZ static const struct squashfs_decompressor squashfs_xz_comp_ops = { - NULL, NULL, NULL, XZ_COMPRESSION, "xz", 0 + NULL, NULL, NULL, NULL, XZ_COMPRESSION, "xz", 0 }; #endif #ifndef CONFIG_SQUASHFS_ZLIB static const struct squashfs_decompressor squashfs_zlib_comp_ops = { - NULL, NULL, NULL, ZLIB_COMPRESSION, "zlib", 0 + NULL, NULL, NULL, NULL, ZLIB_COMPRESSION, "zlib", 0 }; #endif static const struct squashfs_decompressor squashfs_unknown_comp_ops = { - NULL, NULL, NULL, 0, "unknown", 0 + NULL, NULL, NULL, NULL, 0, "unknown", 0 }; static const struct squashfs_decompressor *decompressor[] = { @@ -83,10 +83,10 @@ const struct squashfs_decompressor *squashfs_lookup_decompressor(int id) } -void *squashfs_decompressor_init(struct super_block *sb, unsigned short flags) +static void *get_comp_opts(struct super_block *sb, unsigned short flags) { struct squashfs_sb_info *msblk = sb->s_fs_info; - void *strm, *buffer = NULL; + void *buffer = NULL, *comp_opts; int length = 0; /* @@ -94,23 +94,40 @@ void *squashfs_decompressor_init(struct super_block *sb, unsigned short flags) */ if (SQUASHFS_COMP_OPTS(flags)) { buffer = kmalloc(PAGE_CACHE_SIZE, GFP_KERNEL); - if (buffer == NULL) - return ERR_PTR(-ENOMEM); + if (buffer == NULL) { + comp_opts = ERR_PTR(-ENOMEM); + goto out; + } length = squashfs_read_data(sb, &buffer, sizeof(struct squashfs_super_block), 0, NULL, - PAGE_CACHE_SIZE, 1); + PAGE_CACHE_SIZE, 1); if (length < 0) { - strm = ERR_PTR(length); - goto finished; + comp_opts = ERR_PTR(length); + goto out; } } - strm = msblk->decompressor->init(msblk, buffer, length); + comp_opts = squashfs_comp_opts(msblk, buffer, length); -finished: +out: kfree(buffer); - - return strm; + return comp_opts; +} + + +void *squashfs_decompressor_setup(struct super_block *sb, unsigned short flags) +{ + struct squashfs_sb_info *msblk = sb->s_fs_info; + void *stream, *comp_opts = get_comp_opts(sb, flags); + + if (IS_ERR(comp_opts)) + return comp_opts; + + stream = squashfs_decompressor_create(msblk, comp_opts); + if (IS_ERR(stream)) + kfree(comp_opts); + + return stream; } diff --git a/fs/squashfs/decompressor.h b/fs/squashfs/decompressor.h index 330073e29029..6cdb20a3878a 100644 --- a/fs/squashfs/decompressor.h +++ b/fs/squashfs/decompressor.h @@ -24,28 +24,21 @@ */ struct squashfs_decompressor { - void *(*init)(struct squashfs_sb_info *, void *, int); + void *(*init)(struct squashfs_sb_info *, void *); + void *(*comp_opts)(struct squashfs_sb_info *, void *, int); void (*free)(void *); - int (*decompress)(struct squashfs_sb_info *, void **, + int (*decompress)(struct squashfs_sb_info *, void *, void **, struct buffer_head **, int, int, int, int, int); int id; char *name; int supported; }; -static inline void squashfs_decompressor_free(struct squashfs_sb_info *msblk, - void *s) +static inline void *squashfs_comp_opts(struct squashfs_sb_info *msblk, + void *buff, int length) { - if (msblk->decompressor) - msblk->decompressor->free(s); -} - -static inline int squashfs_decompress(struct squashfs_sb_info *msblk, - void **buffer, struct buffer_head **bh, int b, int offset, int length, - int srclength, int pages) -{ - return msblk->decompressor->decompress(msblk, buffer, bh, b, offset, - length, srclength, pages); + return msblk->decompressor->comp_opts ? + msblk->decompressor->comp_opts(msblk, buff, length) : NULL; } #ifdef CONFIG_SQUASHFS_XZ diff --git a/fs/squashfs/decompressor_single.c b/fs/squashfs/decompressor_single.c new file mode 100644 index 000000000000..f857cf6f22d4 --- /dev/null +++ b/fs/squashfs/decompressor_single.c @@ -0,0 +1,86 @@ +/* + * Copyright (c) 2013 + * Phillip Lougher + * + * This work is licensed under the terms of the GNU GPL, version 2. See + * the COPYING file in the top-level directory. + */ + +#include +#include +#include +#include + +#include "squashfs_fs.h" +#include "squashfs_fs_sb.h" +#include "decompressor.h" +#include "squashfs.h" + +/* + * This file implements single-threaded decompression in the + * decompressor framework + */ + +struct squashfs_stream { + void *stream; + struct mutex mutex; +}; + +void *squashfs_decompressor_create(struct squashfs_sb_info *msblk, + void *comp_opts) +{ + struct squashfs_stream *stream; + int err = -ENOMEM; + + stream = kmalloc(sizeof(*stream), GFP_KERNEL); + if (stream == NULL) + goto out; + + stream->stream = msblk->decompressor->init(msblk, comp_opts); + if (IS_ERR(stream->stream)) { + err = PTR_ERR(stream->stream); + goto out; + } + + kfree(comp_opts); + mutex_init(&stream->mutex); + return stream; + +out: + kfree(stream); + return ERR_PTR(err); +} + +void squashfs_decompressor_destroy(struct squashfs_sb_info *msblk) +{ + struct squashfs_stream *stream = msblk->stream; + + if (stream) { + msblk->decompressor->free(stream->stream); + kfree(stream); + } +} + +int squashfs_decompress(struct squashfs_sb_info *msblk, + void **buffer, struct buffer_head **bh, int b, int offset, int length, + int srclength, int pages) +{ + int res; + struct squashfs_stream *stream = msblk->stream; + + mutex_lock(&stream->mutex); + res = msblk->decompressor->decompress(msblk, stream->stream, buffer, + bh, b, offset, length, srclength, pages); + mutex_unlock(&stream->mutex); + + if (res < 0) + ERROR("%s decompression failed, data probably corrupt\n", + msblk->decompressor->name); + + return res; +} + +int squashfs_max_decompressors(void) +{ + return 1; +} diff --git a/fs/squashfs/lzo_wrapper.c b/fs/squashfs/lzo_wrapper.c index 00f4dfc5f088..75c3b5779172 100644 --- a/fs/squashfs/lzo_wrapper.c +++ b/fs/squashfs/lzo_wrapper.c @@ -37,7 +37,7 @@ struct squashfs_lzo { void *output; }; -static void *lzo_init(struct squashfs_sb_info *msblk, void *buff, int len) +static void *lzo_init(struct squashfs_sb_info *msblk, void *buff) { int block_size = max_t(int, msblk->block_size, SQUASHFS_METADATA_SIZE); @@ -74,22 +74,16 @@ static void lzo_free(void *strm) } -static int lzo_uncompress(struct squashfs_sb_info *msblk, void **buffer, - struct buffer_head **bh, int b, int offset, int length, int srclength, - int pages) +static int lzo_uncompress(struct squashfs_sb_info *msblk, void *strm, + void **buffer, struct buffer_head **bh, int b, int offset, int length, + int srclength, int pages) { - struct squashfs_lzo *stream = msblk->stream; + struct squashfs_lzo *stream = strm; void *buff = stream->input; int avail, i, bytes = length, res; size_t out_len = srclength; - mutex_lock(&msblk->read_data_mutex); - for (i = 0; i < b; i++) { - wait_on_buffer(bh[i]); - if (!buffer_uptodate(bh[i])) - goto block_release; - avail = min(bytes, msblk->devblksize - offset); memcpy(buff, bh[i]->b_data + offset, avail); buff += avail; @@ -111,17 +105,9 @@ static int lzo_uncompress(struct squashfs_sb_info *msblk, void **buffer, bytes -= avail; } - mutex_unlock(&msblk->read_data_mutex); return res; -block_release: - for (; i < b; i++) - put_bh(bh[i]); - failed: - mutex_unlock(&msblk->read_data_mutex); - - ERROR("lzo decompression failed, data probably corrupt\n"); return -EIO; } diff --git a/fs/squashfs/squashfs.h b/fs/squashfs/squashfs.h index d1266516ed08..2e2751df8452 100644 --- a/fs/squashfs/squashfs.h +++ b/fs/squashfs/squashfs.h @@ -48,7 +48,14 @@ extern void *squashfs_read_table(struct super_block *, u64, int); /* decompressor.c */ extern const struct squashfs_decompressor *squashfs_lookup_decompressor(int); -extern void *squashfs_decompressor_init(struct super_block *, unsigned short); +extern void *squashfs_decompressor_setup(struct super_block *, unsigned short); + +/* decompressor_xxx.c */ +extern void *squashfs_decompressor_create(struct squashfs_sb_info *, void *); +extern void squashfs_decompressor_destroy(struct squashfs_sb_info *); +extern int squashfs_decompress(struct squashfs_sb_info *, void **, + struct buffer_head **, int, int, int, int, int); +extern int squashfs_max_decompressors(void); /* export.c */ extern __le64 *squashfs_read_inode_lookup_table(struct super_block *, u64, u64, diff --git a/fs/squashfs/squashfs_fs_sb.h b/fs/squashfs/squashfs_fs_sb.h index 52934a22f296..9cdcf4150d59 100644 --- a/fs/squashfs/squashfs_fs_sb.h +++ b/fs/squashfs/squashfs_fs_sb.h @@ -63,10 +63,9 @@ struct squashfs_sb_info { __le64 *id_table; __le64 *fragment_index; __le64 *xattr_id_table; - struct mutex read_data_mutex; struct mutex meta_index_mutex; struct meta_index *meta_index; - void *stream; + struct squashfs_stream *stream; __le64 *inode_lookup_table; u64 inode_table; u64 directory_table; diff --git a/fs/squashfs/super.c b/fs/squashfs/super.c index 60553a9053ca..202df6312d4e 100644 --- a/fs/squashfs/super.c +++ b/fs/squashfs/super.c @@ -98,7 +98,6 @@ static int squashfs_fill_super(struct super_block *sb, void *data, int silent) msblk->devblksize = sb_min_blocksize(sb, SQUASHFS_DEVBLK_SIZE); msblk->devblksize_log2 = ffz(~msblk->devblksize); - mutex_init(&msblk->read_data_mutex); mutex_init(&msblk->meta_index_mutex); /* @@ -206,13 +205,14 @@ static int squashfs_fill_super(struct super_block *sb, void *data, int silent) goto failed_mount; /* Allocate read_page block */ - msblk->read_page = squashfs_cache_init("data", 1, msblk->block_size); + msblk->read_page = squashfs_cache_init("data", + squashfs_max_decompressors(), msblk->block_size); if (msblk->read_page == NULL) { ERROR("Failed to allocate read_page block\n"); goto failed_mount; } - msblk->stream = squashfs_decompressor_init(sb, flags); + msblk->stream = squashfs_decompressor_setup(sb, flags); if (IS_ERR(msblk->stream)) { err = PTR_ERR(msblk->stream); msblk->stream = NULL; @@ -336,7 +336,7 @@ static int squashfs_fill_super(struct super_block *sb, void *data, int silent) squashfs_cache_delete(msblk->block_cache); squashfs_cache_delete(msblk->fragment_cache); squashfs_cache_delete(msblk->read_page); - squashfs_decompressor_free(msblk, msblk->stream); + squashfs_decompressor_destroy(msblk); kfree(msblk->inode_lookup_table); kfree(msblk->fragment_index); kfree(msblk->id_table); @@ -383,7 +383,7 @@ static void squashfs_put_super(struct super_block *sb) squashfs_cache_delete(sbi->block_cache); squashfs_cache_delete(sbi->fragment_cache); squashfs_cache_delete(sbi->read_page); - squashfs_decompressor_free(sbi, sbi->stream); + squashfs_decompressor_destroy(sbi); kfree(sbi->id_table); kfree(sbi->fragment_index); kfree(sbi->meta_index); diff --git a/fs/squashfs/xz_wrapper.c b/fs/squashfs/xz_wrapper.c index 1760b7d108f6..5d1d07cca6b4 100644 --- a/fs/squashfs/xz_wrapper.c +++ b/fs/squashfs/xz_wrapper.c @@ -38,38 +38,63 @@ struct squashfs_xz { struct xz_buf buf; }; -struct comp_opts { +struct disk_comp_opts { __le32 dictionary_size; __le32 flags; }; -static void *squashfs_xz_init(struct squashfs_sb_info *msblk, void *buff, - int len) +struct comp_opts { + int dict_size; +}; + +static void *squashfs_xz_comp_opts(struct squashfs_sb_info *msblk, + void *buff, int len) { - struct comp_opts *comp_opts = buff; - struct squashfs_xz *stream; - int dict_size = msblk->block_size; - int err, n; + struct disk_comp_opts *comp_opts = buff; + struct comp_opts *opts; + int err = 0, n; + + opts = kmalloc(sizeof(*opts), GFP_KERNEL); + if (opts == NULL) { + err = -ENOMEM; + goto out2; + } if (comp_opts) { /* check compressor options are the expected length */ if (len < sizeof(*comp_opts)) { err = -EIO; - goto failed; + goto out; } - dict_size = le32_to_cpu(comp_opts->dictionary_size); + opts->dict_size = le32_to_cpu(comp_opts->dictionary_size); /* the dictionary size should be 2^n or 2^n+2^(n+1) */ - n = ffs(dict_size) - 1; - if (dict_size != (1 << n) && dict_size != (1 << n) + + n = ffs(opts->dict_size) - 1; + if (opts->dict_size != (1 << n) && opts->dict_size != (1 << n) + (1 << (n + 1))) { err = -EIO; - goto failed; + goto out; } - } + } else + /* use defaults */ + opts->dict_size = max_t(int, msblk->block_size, + SQUASHFS_METADATA_SIZE); - dict_size = max_t(int, dict_size, SQUASHFS_METADATA_SIZE); + return opts; + +out: + kfree(opts); +out2: + return ERR_PTR(err); +} + + +static void *squashfs_xz_init(struct squashfs_sb_info *msblk, void *buff) +{ + struct comp_opts *comp_opts = buff; + struct squashfs_xz *stream; + int err; stream = kmalloc(sizeof(*stream), GFP_KERNEL); if (stream == NULL) { @@ -77,7 +102,7 @@ static void *squashfs_xz_init(struct squashfs_sb_info *msblk, void *buff, goto failed; } - stream->state = xz_dec_init(XZ_PREALLOC, dict_size); + stream->state = xz_dec_init(XZ_PREALLOC, comp_opts->dict_size); if (stream->state == NULL) { kfree(stream); err = -ENOMEM; @@ -103,15 +128,13 @@ static void squashfs_xz_free(void *strm) } -static int squashfs_xz_uncompress(struct squashfs_sb_info *msblk, void **buffer, - struct buffer_head **bh, int b, int offset, int length, int srclength, - int pages) +static int squashfs_xz_uncompress(struct squashfs_sb_info *msblk, void *strm, + void **buffer, struct buffer_head **bh, int b, int offset, int length, + int srclength, int pages) { enum xz_ret xz_err; int avail, total = 0, k = 0, page = 0; - struct squashfs_xz *stream = msblk->stream; - - mutex_lock(&msblk->read_data_mutex); + struct squashfs_xz *stream = strm; xz_dec_reset(stream->state); stream->buf.in_pos = 0; @@ -124,10 +147,6 @@ static int squashfs_xz_uncompress(struct squashfs_sb_info *msblk, void **buffer, if (stream->buf.in_pos == stream->buf.in_size && k < b) { avail = min(length, msblk->devblksize - offset); length -= avail; - wait_on_buffer(bh[k]); - if (!buffer_uptodate(bh[k])) - goto release_mutex; - stream->buf.in = bh[k]->b_data + offset; stream->buf.in_size = avail; stream->buf.in_pos = 0; @@ -147,23 +166,12 @@ static int squashfs_xz_uncompress(struct squashfs_sb_info *msblk, void **buffer, put_bh(bh[k++]); } while (xz_err == XZ_OK); - if (xz_err != XZ_STREAM_END) { - ERROR("xz_dec_run error, data probably corrupt\n"); - goto release_mutex; - } + if (xz_err != XZ_STREAM_END || k < b) + goto out; - if (k < b) { - ERROR("xz_uncompress error, input remaining\n"); - goto release_mutex; - } - - total += stream->buf.out_pos; - mutex_unlock(&msblk->read_data_mutex); - return total; - -release_mutex: - mutex_unlock(&msblk->read_data_mutex); + return total + stream->buf.out_pos; +out: for (; k < b; k++) put_bh(bh[k]); @@ -172,6 +180,7 @@ static int squashfs_xz_uncompress(struct squashfs_sb_info *msblk, void **buffer, const struct squashfs_decompressor squashfs_xz_comp_ops = { .init = squashfs_xz_init, + .comp_opts = squashfs_xz_comp_opts, .free = squashfs_xz_free, .decompress = squashfs_xz_uncompress, .id = XZ_COMPRESSION, diff --git a/fs/squashfs/zlib_wrapper.c b/fs/squashfs/zlib_wrapper.c index 55d918fd2d86..bb049027d15c 100644 --- a/fs/squashfs/zlib_wrapper.c +++ b/fs/squashfs/zlib_wrapper.c @@ -33,7 +33,7 @@ #include "squashfs.h" #include "decompressor.h" -static void *zlib_init(struct squashfs_sb_info *dummy, void *buff, int len) +static void *zlib_init(struct squashfs_sb_info *dummy, void *buff) { z_stream *stream = kmalloc(sizeof(z_stream), GFP_KERNEL); if (stream == NULL) @@ -61,15 +61,13 @@ static void zlib_free(void *strm) } -static int zlib_uncompress(struct squashfs_sb_info *msblk, void **buffer, - struct buffer_head **bh, int b, int offset, int length, int srclength, - int pages) +static int zlib_uncompress(struct squashfs_sb_info *msblk, void *strm, + void **buffer, struct buffer_head **bh, int b, int offset, int length, + int srclength, int pages) { int zlib_err, zlib_init = 0; int k = 0, page = 0; - z_stream *stream = msblk->stream; - - mutex_lock(&msblk->read_data_mutex); + z_stream *stream = strm; stream->avail_out = 0; stream->avail_in = 0; @@ -78,10 +76,6 @@ static int zlib_uncompress(struct squashfs_sb_info *msblk, void **buffer, if (stream->avail_in == 0 && k < b) { int avail = min(length, msblk->devblksize - offset); length -= avail; - wait_on_buffer(bh[k]); - if (!buffer_uptodate(bh[k])) - goto release_mutex; - stream->next_in = bh[k]->b_data + offset; stream->avail_in = avail; offset = 0; @@ -94,12 +88,8 @@ static int zlib_uncompress(struct squashfs_sb_info *msblk, void **buffer, if (!zlib_init) { zlib_err = zlib_inflateInit(stream); - if (zlib_err != Z_OK) { - ERROR("zlib_inflateInit returned unexpected " - "result 0x%x, srclength %d\n", - zlib_err, srclength); - goto release_mutex; - } + if (zlib_err != Z_OK) + goto out; zlib_init = 1; } @@ -109,29 +99,19 @@ static int zlib_uncompress(struct squashfs_sb_info *msblk, void **buffer, put_bh(bh[k++]); } while (zlib_err == Z_OK); - if (zlib_err != Z_STREAM_END) { - ERROR("zlib_inflate error, data probably corrupt\n"); - goto release_mutex; - } + if (zlib_err != Z_STREAM_END) + goto out; zlib_err = zlib_inflateEnd(stream); - if (zlib_err != Z_OK) { - ERROR("zlib_inflate error, data probably corrupt\n"); - goto release_mutex; - } + if (zlib_err != Z_OK) + goto out; - if (k < b) { - ERROR("zlib_uncompress error, data remaining\n"); - goto release_mutex; - } + if (k < b) + goto out; - length = stream->total_out; - mutex_unlock(&msblk->read_data_mutex); - return length; - -release_mutex: - mutex_unlock(&msblk->read_data_mutex); + return stream->total_out; +out: for (; k < b; k++) put_bh(bh[k]); From c0f8b08ad95ca0724ac465c391aef3fc456f5aa9 Mon Sep 17 00:00:00 2001 From: Minchan Kim Date: Mon, 28 Oct 2013 14:26:30 +0900 Subject: [PATCH 054/204] squashfs: Enhance parallel I/O Now squashfs have used for only one stream buffer for decompression so it hurts parallel read performance so this patch supports multiple decompressor to enhance performance parallel I/O. Four 1G file dd read on KVM machine which has 2 CPU and 4G memory. dd if=test/test1.dat of=/dev/null & dd if=test/test2.dat of=/dev/null & dd if=test/test3.dat of=/dev/null & dd if=test/test4.dat of=/dev/null & old : 1m39s -> new : 9s * From v1 * Change comp_strm with decomp_strm - Phillip * Change/add comments - Phillip Signed-off-by: Minchan Kim Signed-off-by: Phillip Lougher --- fs/squashfs/Kconfig | 13 ++ fs/squashfs/Makefile | 9 +- fs/squashfs/decompressor_multi.c | 200 +++++++++++++++++++++++++++++++ 3 files changed, 221 insertions(+), 1 deletion(-) create mode 100644 fs/squashfs/decompressor_multi.c diff --git a/fs/squashfs/Kconfig b/fs/squashfs/Kconfig index c70111ebefd4..1c6d340fc61f 100644 --- a/fs/squashfs/Kconfig +++ b/fs/squashfs/Kconfig @@ -63,6 +63,19 @@ config SQUASHFS_LZO If unsure, say N. +config SQUASHFS_MULTI_DECOMPRESSOR + bool "Use multiple decompressors for handling parallel I/O" + depends on SQUASHFS + help + By default Squashfs uses a single decompressor but it gives + poor performance on parallel I/O workloads when using multiple CPU + machines due to waiting on decompressor availability. + + If you have a parallel I/O workload and your system has enough memory, + using this option may improve overall I/O performance. + + If unsure, say N. + config SQUASHFS_XZ bool "Include support for XZ compressed file systems" depends on SQUASHFS diff --git a/fs/squashfs/Makefile b/fs/squashfs/Makefile index c223c8439c21..dfebc3b12d61 100644 --- a/fs/squashfs/Makefile +++ b/fs/squashfs/Makefile @@ -4,8 +4,15 @@ obj-$(CONFIG_SQUASHFS) += squashfs.o squashfs-y += block.o cache.o dir.o export.o file.o fragment.o id.o inode.o -squashfs-y += namei.o super.o symlink.o decompressor.o decompressor_single.o +squashfs-y += namei.o super.o symlink.o decompressor.o + squashfs-$(CONFIG_SQUASHFS_XATTR) += xattr.o xattr_id.o squashfs-$(CONFIG_SQUASHFS_LZO) += lzo_wrapper.o squashfs-$(CONFIG_SQUASHFS_XZ) += xz_wrapper.o squashfs-$(CONFIG_SQUASHFS_ZLIB) += zlib_wrapper.o + +ifdef CONFIG_SQUASHFS_MULTI_DECOMPRESSOR + squashfs-y += decompressor_multi.o +else + squashfs-y += decompressor_single.o +endif diff --git a/fs/squashfs/decompressor_multi.c b/fs/squashfs/decompressor_multi.c new file mode 100644 index 000000000000..462731db5130 --- /dev/null +++ b/fs/squashfs/decompressor_multi.c @@ -0,0 +1,200 @@ +/* + * Copyright (c) 2013 + * Minchan Kim + * + * This work is licensed under the terms of the GNU GPL, version 2. See + * the COPYING file in the top-level directory. + */ +#include +#include +#include +#include +#include +#include +#include + +#include "squashfs_fs.h" +#include "squashfs_fs_sb.h" +#include "decompressor.h" +#include "squashfs.h" + +/* + * This file implements multi-threaded decompression in the + * decompressor framework + */ + + +/* + * The reason that multiply two is that a CPU can request new I/O + * while it is waiting previous request. + */ +#define MAX_DECOMPRESSOR (num_online_cpus() * 2) + + +int squashfs_max_decompressors(void) +{ + return MAX_DECOMPRESSOR; +} + + +struct squashfs_stream { + void *comp_opts; + struct list_head strm_list; + struct mutex mutex; + int avail_decomp; + wait_queue_head_t wait; +}; + + +struct decomp_stream { + void *stream; + struct list_head list; +}; + + +static void put_decomp_stream(struct decomp_stream *decomp_strm, + struct squashfs_stream *stream) +{ + mutex_lock(&stream->mutex); + list_add(&decomp_strm->list, &stream->strm_list); + mutex_unlock(&stream->mutex); + wake_up(&stream->wait); +} + +void *squashfs_decompressor_create(struct squashfs_sb_info *msblk, + void *comp_opts) +{ + struct squashfs_stream *stream; + struct decomp_stream *decomp_strm = NULL; + int err = -ENOMEM; + + stream = kzalloc(sizeof(*stream), GFP_KERNEL); + if (!stream) + goto out; + + stream->comp_opts = comp_opts; + mutex_init(&stream->mutex); + INIT_LIST_HEAD(&stream->strm_list); + init_waitqueue_head(&stream->wait); + + /* + * We should have a decompressor at least as default + * so if we fail to allocate new decompressor dynamically, + * we could always fall back to default decompressor and + * file system works. + */ + decomp_strm = kmalloc(sizeof(*decomp_strm), GFP_KERNEL); + if (!decomp_strm) + goto out; + + decomp_strm->stream = msblk->decompressor->init(msblk, + stream->comp_opts); + if (IS_ERR(decomp_strm->stream)) { + err = PTR_ERR(decomp_strm->stream); + goto out; + } + + list_add(&decomp_strm->list, &stream->strm_list); + stream->avail_decomp = 1; + return stream; + +out: + kfree(decomp_strm); + kfree(stream); + return ERR_PTR(err); +} + + +void squashfs_decompressor_destroy(struct squashfs_sb_info *msblk) +{ + struct squashfs_stream *stream = msblk->stream; + if (stream) { + struct decomp_stream *decomp_strm; + + while (!list_empty(&stream->strm_list)) { + decomp_strm = list_entry(stream->strm_list.prev, + struct decomp_stream, list); + list_del(&decomp_strm->list); + msblk->decompressor->free(decomp_strm->stream); + kfree(decomp_strm); + stream->avail_decomp--; + } + } + + WARN_ON(stream->avail_decomp); + kfree(stream->comp_opts); + kfree(stream); +} + + +static struct decomp_stream *get_decomp_stream(struct squashfs_sb_info *msblk, + struct squashfs_stream *stream) +{ + struct decomp_stream *decomp_strm; + + while (1) { + mutex_lock(&stream->mutex); + + /* There is available decomp_stream */ + if (!list_empty(&stream->strm_list)) { + decomp_strm = list_entry(stream->strm_list.prev, + struct decomp_stream, list); + list_del(&decomp_strm->list); + mutex_unlock(&stream->mutex); + break; + } + + /* + * If there is no available decomp and already full, + * let's wait for releasing decomp from other users. + */ + if (stream->avail_decomp >= MAX_DECOMPRESSOR) + goto wait; + + /* Let's allocate new decomp */ + decomp_strm = kmalloc(sizeof(*decomp_strm), GFP_KERNEL); + if (!decomp_strm) + goto wait; + + decomp_strm->stream = msblk->decompressor->init(msblk, + stream->comp_opts); + if (IS_ERR(decomp_strm->stream)) { + kfree(decomp_strm); + goto wait; + } + + stream->avail_decomp++; + WARN_ON(stream->avail_decomp > MAX_DECOMPRESSOR); + + mutex_unlock(&stream->mutex); + break; +wait: + /* + * If system memory is tough, let's for other's + * releasing instead of hurting VM because it could + * make page cache thrashing. + */ + mutex_unlock(&stream->mutex); + wait_event(stream->wait, + !list_empty(&stream->strm_list)); + } + + return decomp_strm; +} + + +int squashfs_decompress(struct squashfs_sb_info *msblk, + void **buffer, struct buffer_head **bh, int b, int offset, int length, + int srclength, int pages) +{ + int res; + struct squashfs_stream *stream = msblk->stream; + struct decomp_stream *decomp_stream = get_decomp_stream(msblk, stream); + res = msblk->decompressor->decompress(msblk, decomp_stream->stream, + buffer, bh, b, offset, length, srclength, pages); + put_decomp_stream(decomp_stream, stream); + if (res < 0) + ERROR("%s decompression failed, data probably corrupt\n", + msblk->decompressor->name); + return res; +} From 887bd836dc38e36525154e7dc592e5bc5e7e322c Mon Sep 17 00:00:00 2001 From: Phillip Lougher Date: Mon, 18 Nov 2013 02:31:36 +0000 Subject: [PATCH 055/204] Squashfs: add multi-threaded decompression using percpu variable Add a multi-threaded decompression implementation which uses percpu variables. Using percpu variables has advantages and disadvantages over implementations which do not use percpu variables. Advantages: * the nature of percpu variables ensures decompression is load-balanced across the multiple cores. * simplicity. Disadvantages: it limits decompression to one thread per core. Signed-off-by: Phillip Lougher --- fs/squashfs/Kconfig | 57 ++++++++++---- fs/squashfs/Makefile | 10 +-- fs/squashfs/decompressor_multi_percpu.c | 98 +++++++++++++++++++++++++ 3 files changed, 145 insertions(+), 20 deletions(-) create mode 100644 fs/squashfs/decompressor_multi_percpu.c diff --git a/fs/squashfs/Kconfig b/fs/squashfs/Kconfig index 1c6d340fc61f..159bd6676dc2 100644 --- a/fs/squashfs/Kconfig +++ b/fs/squashfs/Kconfig @@ -25,6 +25,50 @@ config SQUASHFS If unsure, say N. +choice + prompt "Decompressor parallelisation options" + depends on SQUASHFS + help + Squashfs now supports three parallelisation options for + decompression. Each one exhibits various trade-offs between + decompression performance and CPU and memory usage. + + If in doubt, select "Single threaded compression" + +config SQUASHFS_DECOMP_SINGLE + bool "Single threaded compression" + help + Traditionally Squashfs has used single-threaded decompression. + Only one block (data or metadata) can be decompressed at any + one time. This limits CPU and memory usage to a minimum. + +config SQUASHFS_DECOMP_MULTI + bool "Use multiple decompressors for parallel I/O" + help + By default Squashfs uses a single decompressor but it gives + poor performance on parallel I/O workloads when using multiple CPU + machines due to waiting on decompressor availability. + + If you have a parallel I/O workload and your system has enough memory, + using this option may improve overall I/O performance. + + This decompressor implementation uses up to two parallel + decompressors per core. It dynamically allocates decompressors + on a demand basis. + +config SQUASHFS_DECOMP_MULTI_PERCPU + bool "Use percpu multiple decompressors for parallel I/O" + help + By default Squashfs uses a single decompressor but it gives + poor performance on parallel I/O workloads when using multiple CPU + machines due to waiting on decompressor availability. + + This decompressor implementation uses a maximum of one + decompressor per core. It uses percpu variables to ensure + decompression is load-balanced across the cores. + +endchoice + config SQUASHFS_XATTR bool "Squashfs XATTR support" depends on SQUASHFS @@ -63,19 +107,6 @@ config SQUASHFS_LZO If unsure, say N. -config SQUASHFS_MULTI_DECOMPRESSOR - bool "Use multiple decompressors for handling parallel I/O" - depends on SQUASHFS - help - By default Squashfs uses a single decompressor but it gives - poor performance on parallel I/O workloads when using multiple CPU - machines due to waiting on decompressor availability. - - If you have a parallel I/O workload and your system has enough memory, - using this option may improve overall I/O performance. - - If unsure, say N. - config SQUASHFS_XZ bool "Include support for XZ compressed file systems" depends on SQUASHFS diff --git a/fs/squashfs/Makefile b/fs/squashfs/Makefile index dfebc3b12d61..5833b96ee69c 100644 --- a/fs/squashfs/Makefile +++ b/fs/squashfs/Makefile @@ -5,14 +5,10 @@ obj-$(CONFIG_SQUASHFS) += squashfs.o squashfs-y += block.o cache.o dir.o export.o file.o fragment.o id.o inode.o squashfs-y += namei.o super.o symlink.o decompressor.o - +squashfs-$(CONFIG_SQUASHFS_DECOMP_SINGLE) += decompressor_single.o +squashfs-$(CONFIG_SQUASHFS_DECOMP_MULTI) += decompressor_multi.o +squashfs-$(CONFIG_SQUASHFS_DECOMP_MULTI_PERCPU) += decompressor_multi_percpu.o squashfs-$(CONFIG_SQUASHFS_XATTR) += xattr.o xattr_id.o squashfs-$(CONFIG_SQUASHFS_LZO) += lzo_wrapper.o squashfs-$(CONFIG_SQUASHFS_XZ) += xz_wrapper.o squashfs-$(CONFIG_SQUASHFS_ZLIB) += zlib_wrapper.o - -ifdef CONFIG_SQUASHFS_MULTI_DECOMPRESSOR - squashfs-y += decompressor_multi.o -else - squashfs-y += decompressor_single.o -endif diff --git a/fs/squashfs/decompressor_multi_percpu.c b/fs/squashfs/decompressor_multi_percpu.c new file mode 100644 index 000000000000..0e7b679bc4ad --- /dev/null +++ b/fs/squashfs/decompressor_multi_percpu.c @@ -0,0 +1,98 @@ +/* + * Copyright (c) 2013 + * Phillip Lougher + * + * This work is licensed under the terms of the GNU GPL, version 2. See + * the COPYING file in the top-level directory. + */ + +#include +#include +#include +#include + +#include "squashfs_fs.h" +#include "squashfs_fs_sb.h" +#include "decompressor.h" +#include "squashfs.h" + +/* + * This file implements multi-threaded decompression using percpu + * variables, one thread per cpu core. + */ + +struct squashfs_stream { + void *stream; +}; + +void *squashfs_decompressor_create(struct squashfs_sb_info *msblk, + void *comp_opts) +{ + struct squashfs_stream *stream; + struct squashfs_stream __percpu *percpu; + int err, cpu; + + percpu = alloc_percpu(struct squashfs_stream); + if (percpu == NULL) + return ERR_PTR(-ENOMEM); + + for_each_possible_cpu(cpu) { + stream = per_cpu_ptr(percpu, cpu); + stream->stream = msblk->decompressor->init(msblk, comp_opts); + if (IS_ERR(stream->stream)) { + err = PTR_ERR(stream->stream); + goto out; + } + } + + kfree(comp_opts); + return (__force void *) percpu; + +out: + for_each_possible_cpu(cpu) { + stream = per_cpu_ptr(percpu, cpu); + if (!IS_ERR_OR_NULL(stream->stream)) + msblk->decompressor->free(stream->stream); + } + free_percpu(percpu); + return ERR_PTR(err); +} + +void squashfs_decompressor_destroy(struct squashfs_sb_info *msblk) +{ + struct squashfs_stream __percpu *percpu = + (struct squashfs_stream __percpu *) msblk->stream; + struct squashfs_stream *stream; + int cpu; + + if (msblk->stream) { + for_each_possible_cpu(cpu) { + stream = per_cpu_ptr(percpu, cpu); + msblk->decompressor->free(stream->stream); + } + free_percpu(percpu); + } +} + +int squashfs_decompress(struct squashfs_sb_info *msblk, + void **buffer, struct buffer_head **bh, int b, int offset, int length, + int srclength, int pages) +{ + struct squashfs_stream __percpu *percpu = + (struct squashfs_stream __percpu *) msblk->stream; + struct squashfs_stream *stream = get_cpu_ptr(percpu); + int res = msblk->decompressor->decompress(msblk, stream->stream, buffer, + bh, b, offset, length, srclength, pages); + put_cpu_ptr(stream); + + if (res < 0) + ERROR("%s decompression failed, data probably corrupt\n", + msblk->decompressor->name); + + return res; +} + +int squashfs_max_decompressors(void) +{ + return num_possible_cpus(); +} From dad13d68547bf13dde9a4ef7e24659e10250b7b1 Mon Sep 17 00:00:00 2001 From: Phillip Lougher Date: Mon, 18 Nov 2013 02:59:12 +0000 Subject: [PATCH 056/204] Squashfs: Generalise paging handling in the decompressors Further generalise the decompressors by adding a page handler abstraction. This adds helpers to allow the decompressors to access and process the output buffers in an implementation independant manner. This allows different types of output buffer to be passed to the decompressors, with the implementation specific aspects handled at decompression time, but without the knowledge being held in the decompressor wrapper code. This will allow the decompressors to handle Squashfs cache buffers, and page cache pages. This patch adds the abstraction and an implementation for the caches. Signed-off-by: Phillip Lougher Reviewed-by: Minchan Kim --- fs/squashfs/block.c | 27 ++++++++------ fs/squashfs/cache.c | 28 +++++++++++--- fs/squashfs/decompressor.c | 14 +++++-- fs/squashfs/decompressor.h | 5 ++- fs/squashfs/decompressor_multi.c | 7 ++-- fs/squashfs/decompressor_multi_percpu.c | 9 ++--- fs/squashfs/decompressor_single.c | 9 ++--- fs/squashfs/lzo_wrapper.c | 27 +++++++++----- fs/squashfs/page_actor.h | 49 +++++++++++++++++++++++++ fs/squashfs/squashfs.h | 8 ++-- fs/squashfs/squashfs_fs_sb.h | 1 + fs/squashfs/xz_wrapper.c | 22 ++++++----- fs/squashfs/zlib_wrapper.c | 24 +++++++----- 13 files changed, 163 insertions(+), 67 deletions(-) create mode 100644 fs/squashfs/page_actor.h diff --git a/fs/squashfs/block.c b/fs/squashfs/block.c index 4dd402597f22..0cea9b9236d0 100644 --- a/fs/squashfs/block.c +++ b/fs/squashfs/block.c @@ -36,6 +36,7 @@ #include "squashfs_fs_sb.h" #include "squashfs.h" #include "decompressor.h" +#include "page_actor.h" /* * Read the metadata block length, this is stored in the first two @@ -86,16 +87,16 @@ static struct buffer_head *get_block_length(struct super_block *sb, * generated a larger block - this does occasionally happen with compression * algorithms). */ -int squashfs_read_data(struct super_block *sb, void **buffer, u64 index, - int length, u64 *next_index, int srclength, int pages) +int squashfs_read_data(struct super_block *sb, u64 index, int length, + u64 *next_index, struct squashfs_page_actor *output) { struct squashfs_sb_info *msblk = sb->s_fs_info; struct buffer_head **bh; int offset = index & ((1 << msblk->devblksize_log2) - 1); u64 cur_index = index >> msblk->devblksize_log2; - int bytes, compressed, b = 0, k = 0, page = 0, avail, i; + int bytes, compressed, b = 0, k = 0, avail, i; - bh = kcalloc(((srclength + msblk->devblksize - 1) + bh = kcalloc(((output->length + msblk->devblksize - 1) >> msblk->devblksize_log2) + 1, sizeof(*bh), GFP_KERNEL); if (bh == NULL) return -ENOMEM; @@ -111,9 +112,9 @@ int squashfs_read_data(struct super_block *sb, void **buffer, u64 index, *next_index = index + length; TRACE("Block @ 0x%llx, %scompressed size %d, src size %d\n", - index, compressed ? "" : "un", length, srclength); + index, compressed ? "" : "un", length, output->length); - if (length < 0 || length > srclength || + if (length < 0 || length > output->length || (index + length) > msblk->bytes_used) goto read_failure; @@ -145,7 +146,7 @@ int squashfs_read_data(struct super_block *sb, void **buffer, u64 index, TRACE("Block @ 0x%llx, %scompressed size %d\n", index, compressed ? "" : "un", length); - if (length < 0 || length > srclength || + if (length < 0 || length > output->length || (index + length) > msblk->bytes_used) goto block_release; @@ -165,8 +166,8 @@ int squashfs_read_data(struct super_block *sb, void **buffer, u64 index, } if (compressed) { - length = squashfs_decompress(msblk, buffer, bh, b, offset, - length, srclength, pages); + length = squashfs_decompress(msblk, bh, b, offset, length, + output); if (length < 0) goto read_failure; } else { @@ -174,19 +175,20 @@ int squashfs_read_data(struct super_block *sb, void **buffer, u64 index, * Block is uncompressed. */ int in, pg_offset = 0; + void *data = squashfs_first_page(output); for (bytes = length; k < b; k++) { in = min(bytes, msblk->devblksize - offset); bytes -= in; while (in) { if (pg_offset == PAGE_CACHE_SIZE) { - page++; + data = squashfs_next_page(output); pg_offset = 0; } avail = min_t(int, in, PAGE_CACHE_SIZE - pg_offset); - memcpy(buffer[page] + pg_offset, - bh[k]->b_data + offset, avail); + memcpy(data + pg_offset, bh[k]->b_data + offset, + avail); in -= avail; pg_offset += avail; offset += avail; @@ -194,6 +196,7 @@ int squashfs_read_data(struct super_block *sb, void **buffer, u64 index, offset = 0; put_bh(bh[k]); } + squashfs_finish_page(output); } kfree(bh); diff --git a/fs/squashfs/cache.c b/fs/squashfs/cache.c index af0b73802592..1cb70a0b2168 100644 --- a/fs/squashfs/cache.c +++ b/fs/squashfs/cache.c @@ -56,6 +56,7 @@ #include "squashfs_fs.h" #include "squashfs_fs_sb.h" #include "squashfs.h" +#include "page_actor.h" /* * Look-up block in cache, and increment usage count. If not in cache, read @@ -119,9 +120,8 @@ struct squashfs_cache_entry *squashfs_cache_get(struct super_block *sb, entry->error = 0; spin_unlock(&cache->lock); - entry->length = squashfs_read_data(sb, entry->data, - block, length, &entry->next_index, - cache->block_size, cache->pages); + entry->length = squashfs_read_data(sb, block, length, + &entry->next_index, entry->actor); spin_lock(&cache->lock); @@ -220,6 +220,7 @@ void squashfs_cache_delete(struct squashfs_cache *cache) kfree(cache->entry[i].data[j]); kfree(cache->entry[i].data); } + kfree(cache->entry[i].actor); } kfree(cache->entry); @@ -280,6 +281,13 @@ struct squashfs_cache *squashfs_cache_init(char *name, int entries, goto cleanup; } } + + entry->actor = squashfs_page_actor_init(entry->data, + cache->pages, 0); + if (entry->actor == NULL) { + ERROR("Failed to allocate %s cache entry\n", name); + goto cleanup; + } } return cache; @@ -410,6 +418,7 @@ void *squashfs_read_table(struct super_block *sb, u64 block, int length) int pages = (length + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; int i, res; void *table, *buffer, **data; + struct squashfs_page_actor *actor; table = buffer = kmalloc(length, GFP_KERNEL); if (table == NULL) @@ -421,19 +430,28 @@ void *squashfs_read_table(struct super_block *sb, u64 block, int length) goto failed; } + actor = squashfs_page_actor_init(data, pages, length); + if (actor == NULL) { + res = -ENOMEM; + goto failed2; + } + for (i = 0; i < pages; i++, buffer += PAGE_CACHE_SIZE) data[i] = buffer; - res = squashfs_read_data(sb, data, block, length | - SQUASHFS_COMPRESSED_BIT_BLOCK, NULL, length, pages); + res = squashfs_read_data(sb, block, length | + SQUASHFS_COMPRESSED_BIT_BLOCK, NULL, actor); kfree(data); + kfree(actor); if (res < 0) goto failed; return table; +failed2: + kfree(data); failed: kfree(table); return ERR_PTR(res); diff --git a/fs/squashfs/decompressor.c b/fs/squashfs/decompressor.c index 234291f79ba5..ac22fe73b0ad 100644 --- a/fs/squashfs/decompressor.c +++ b/fs/squashfs/decompressor.c @@ -30,6 +30,7 @@ #include "squashfs_fs_sb.h" #include "decompressor.h" #include "squashfs.h" +#include "page_actor.h" /* * This file (and decompressor.h) implements a decompressor framework for @@ -87,6 +88,7 @@ static void *get_comp_opts(struct super_block *sb, unsigned short flags) { struct squashfs_sb_info *msblk = sb->s_fs_info; void *buffer = NULL, *comp_opts; + struct squashfs_page_actor *actor = NULL; int length = 0; /* @@ -99,9 +101,14 @@ static void *get_comp_opts(struct super_block *sb, unsigned short flags) goto out; } - length = squashfs_read_data(sb, &buffer, - sizeof(struct squashfs_super_block), 0, NULL, - PAGE_CACHE_SIZE, 1); + actor = squashfs_page_actor_init(&buffer, 1, 0); + if (actor == NULL) { + comp_opts = ERR_PTR(-ENOMEM); + goto out; + } + + length = squashfs_read_data(sb, + sizeof(struct squashfs_super_block), 0, NULL, actor); if (length < 0) { comp_opts = ERR_PTR(length); @@ -112,6 +119,7 @@ static void *get_comp_opts(struct super_block *sb, unsigned short flags) comp_opts = squashfs_comp_opts(msblk, buffer, length); out: + kfree(actor); kfree(buffer); return comp_opts; } diff --git a/fs/squashfs/decompressor.h b/fs/squashfs/decompressor.h index 6cdb20a3878a..af0985321808 100644 --- a/fs/squashfs/decompressor.h +++ b/fs/squashfs/decompressor.h @@ -27,8 +27,9 @@ struct squashfs_decompressor { void *(*init)(struct squashfs_sb_info *, void *); void *(*comp_opts)(struct squashfs_sb_info *, void *, int); void (*free)(void *); - int (*decompress)(struct squashfs_sb_info *, void *, void **, - struct buffer_head **, int, int, int, int, int); + int (*decompress)(struct squashfs_sb_info *, void *, + struct buffer_head **, int, int, int, + struct squashfs_page_actor *); int id; char *name; int supported; diff --git a/fs/squashfs/decompressor_multi.c b/fs/squashfs/decompressor_multi.c index 462731db5130..ae54675a3526 100644 --- a/fs/squashfs/decompressor_multi.c +++ b/fs/squashfs/decompressor_multi.c @@ -183,15 +183,14 @@ static struct decomp_stream *get_decomp_stream(struct squashfs_sb_info *msblk, } -int squashfs_decompress(struct squashfs_sb_info *msblk, - void **buffer, struct buffer_head **bh, int b, int offset, int length, - int srclength, int pages) +int squashfs_decompress(struct squashfs_sb_info *msblk, struct buffer_head **bh, + int b, int offset, int length, struct squashfs_page_actor *output) { int res; struct squashfs_stream *stream = msblk->stream; struct decomp_stream *decomp_stream = get_decomp_stream(msblk, stream); res = msblk->decompressor->decompress(msblk, decomp_stream->stream, - buffer, bh, b, offset, length, srclength, pages); + bh, b, offset, length, output); put_decomp_stream(decomp_stream, stream); if (res < 0) ERROR("%s decompression failed, data probably corrupt\n", diff --git a/fs/squashfs/decompressor_multi_percpu.c b/fs/squashfs/decompressor_multi_percpu.c index 0e7b679bc4ad..23a9c28ad8ea 100644 --- a/fs/squashfs/decompressor_multi_percpu.c +++ b/fs/squashfs/decompressor_multi_percpu.c @@ -74,15 +74,14 @@ void squashfs_decompressor_destroy(struct squashfs_sb_info *msblk) } } -int squashfs_decompress(struct squashfs_sb_info *msblk, - void **buffer, struct buffer_head **bh, int b, int offset, int length, - int srclength, int pages) +int squashfs_decompress(struct squashfs_sb_info *msblk, struct buffer_head **bh, + int b, int offset, int length, struct squashfs_page_actor *output) { struct squashfs_stream __percpu *percpu = (struct squashfs_stream __percpu *) msblk->stream; struct squashfs_stream *stream = get_cpu_ptr(percpu); - int res = msblk->decompressor->decompress(msblk, stream->stream, buffer, - bh, b, offset, length, srclength, pages); + int res = msblk->decompressor->decompress(msblk, stream->stream, bh, b, + offset, length, output); put_cpu_ptr(stream); if (res < 0) diff --git a/fs/squashfs/decompressor_single.c b/fs/squashfs/decompressor_single.c index f857cf6f22d4..a6c75929a00e 100644 --- a/fs/squashfs/decompressor_single.c +++ b/fs/squashfs/decompressor_single.c @@ -61,16 +61,15 @@ void squashfs_decompressor_destroy(struct squashfs_sb_info *msblk) } } -int squashfs_decompress(struct squashfs_sb_info *msblk, - void **buffer, struct buffer_head **bh, int b, int offset, int length, - int srclength, int pages) +int squashfs_decompress(struct squashfs_sb_info *msblk, struct buffer_head **bh, + int b, int offset, int length, struct squashfs_page_actor *output) { int res; struct squashfs_stream *stream = msblk->stream; mutex_lock(&stream->mutex); - res = msblk->decompressor->decompress(msblk, stream->stream, buffer, - bh, b, offset, length, srclength, pages); + res = msblk->decompressor->decompress(msblk, stream->stream, bh, b, + offset, length, output); mutex_unlock(&stream->mutex); if (res < 0) diff --git a/fs/squashfs/lzo_wrapper.c b/fs/squashfs/lzo_wrapper.c index 75c3b5779172..244b9fbfff7b 100644 --- a/fs/squashfs/lzo_wrapper.c +++ b/fs/squashfs/lzo_wrapper.c @@ -31,6 +31,7 @@ #include "squashfs_fs_sb.h" #include "squashfs.h" #include "decompressor.h" +#include "page_actor.h" struct squashfs_lzo { void *input; @@ -75,13 +76,13 @@ static void lzo_free(void *strm) static int lzo_uncompress(struct squashfs_sb_info *msblk, void *strm, - void **buffer, struct buffer_head **bh, int b, int offset, int length, - int srclength, int pages) + struct buffer_head **bh, int b, int offset, int length, + struct squashfs_page_actor *output) { struct squashfs_lzo *stream = strm; - void *buff = stream->input; + void *buff = stream->input, *data; int avail, i, bytes = length, res; - size_t out_len = srclength; + size_t out_len = output->length; for (i = 0; i < b; i++) { avail = min(bytes, msblk->devblksize - offset); @@ -98,12 +99,20 @@ static int lzo_uncompress(struct squashfs_sb_info *msblk, void *strm, goto failed; res = bytes = (int)out_len; - for (i = 0, buff = stream->output; bytes && i < pages; i++) { - avail = min_t(int, bytes, PAGE_CACHE_SIZE); - memcpy(buffer[i], buff, avail); - buff += avail; - bytes -= avail; + data = squashfs_first_page(output); + buff = stream->output; + while (data) { + if (bytes <= PAGE_CACHE_SIZE) { + memcpy(data, buff, bytes); + break; + } else { + memcpy(data, buff, PAGE_CACHE_SIZE); + buff += PAGE_CACHE_SIZE; + bytes -= PAGE_CACHE_SIZE; + data = squashfs_next_page(output); + } } + squashfs_finish_page(output); return res; diff --git a/fs/squashfs/page_actor.h b/fs/squashfs/page_actor.h new file mode 100644 index 000000000000..5b0ba5a7133a --- /dev/null +++ b/fs/squashfs/page_actor.h @@ -0,0 +1,49 @@ +#ifndef PAGE_ACTOR_H +#define PAGE_ACTOR_H +/* + * Copyright (c) 2013 + * Phillip Lougher + * + * This work is licensed under the terms of the GNU GPL, version 2. See + * the COPYING file in the top-level directory. + */ + +struct squashfs_page_actor { + void **page; + int pages; + int length; + int next_page; +}; + +static inline struct squashfs_page_actor *squashfs_page_actor_init(void **page, + int pages, int length) +{ + struct squashfs_page_actor *actor = kmalloc(sizeof(*actor), GFP_KERNEL); + + if (actor == NULL) + return NULL; + + actor->length = length ? : pages * PAGE_CACHE_SIZE; + actor->page = page; + actor->pages = pages; + actor->next_page = 0; + return actor; +} + +static inline void *squashfs_first_page(struct squashfs_page_actor *actor) +{ + actor->next_page = 1; + return actor->page[0]; +} + +static inline void *squashfs_next_page(struct squashfs_page_actor *actor) +{ + return actor->next_page == actor->pages ? NULL : + actor->page[actor->next_page++]; +} + +static inline void squashfs_finish_page(struct squashfs_page_actor *actor) +{ + /* empty */ +} +#endif diff --git a/fs/squashfs/squashfs.h b/fs/squashfs/squashfs.h index 2e2751df8452..6a97e63ca173 100644 --- a/fs/squashfs/squashfs.h +++ b/fs/squashfs/squashfs.h @@ -28,8 +28,8 @@ #define WARNING(s, args...) pr_warning("SQUASHFS: "s, ## args) /* block.c */ -extern int squashfs_read_data(struct super_block *, void **, u64, int, u64 *, - int, int); +extern int squashfs_read_data(struct super_block *, u64, int, u64 *, + struct squashfs_page_actor *); /* cache.c */ extern struct squashfs_cache *squashfs_cache_init(char *, int, int); @@ -53,8 +53,8 @@ extern void *squashfs_decompressor_setup(struct super_block *, unsigned short); /* decompressor_xxx.c */ extern void *squashfs_decompressor_create(struct squashfs_sb_info *, void *); extern void squashfs_decompressor_destroy(struct squashfs_sb_info *); -extern int squashfs_decompress(struct squashfs_sb_info *, void **, - struct buffer_head **, int, int, int, int, int); +extern int squashfs_decompress(struct squashfs_sb_info *, struct buffer_head **, + int, int, int, struct squashfs_page_actor *); extern int squashfs_max_decompressors(void); /* export.c */ diff --git a/fs/squashfs/squashfs_fs_sb.h b/fs/squashfs/squashfs_fs_sb.h index 9cdcf4150d59..1da565cb50c3 100644 --- a/fs/squashfs/squashfs_fs_sb.h +++ b/fs/squashfs/squashfs_fs_sb.h @@ -50,6 +50,7 @@ struct squashfs_cache_entry { wait_queue_head_t wait_queue; struct squashfs_cache *cache; void **data; + struct squashfs_page_actor *actor; }; struct squashfs_sb_info { diff --git a/fs/squashfs/xz_wrapper.c b/fs/squashfs/xz_wrapper.c index 5d1d07cca6b4..c609624e4b8a 100644 --- a/fs/squashfs/xz_wrapper.c +++ b/fs/squashfs/xz_wrapper.c @@ -32,6 +32,7 @@ #include "squashfs_fs_sb.h" #include "squashfs.h" #include "decompressor.h" +#include "page_actor.h" struct squashfs_xz { struct xz_dec *state; @@ -129,11 +130,11 @@ static void squashfs_xz_free(void *strm) static int squashfs_xz_uncompress(struct squashfs_sb_info *msblk, void *strm, - void **buffer, struct buffer_head **bh, int b, int offset, int length, - int srclength, int pages) + struct buffer_head **bh, int b, int offset, int length, + struct squashfs_page_actor *output) { enum xz_ret xz_err; - int avail, total = 0, k = 0, page = 0; + int avail, total = 0, k = 0; struct squashfs_xz *stream = strm; xz_dec_reset(stream->state); @@ -141,7 +142,7 @@ static int squashfs_xz_uncompress(struct squashfs_sb_info *msblk, void *strm, stream->buf.in_size = 0; stream->buf.out_pos = 0; stream->buf.out_size = PAGE_CACHE_SIZE; - stream->buf.out = buffer[page++]; + stream->buf.out = squashfs_first_page(output); do { if (stream->buf.in_pos == stream->buf.in_size && k < b) { @@ -153,11 +154,12 @@ static int squashfs_xz_uncompress(struct squashfs_sb_info *msblk, void *strm, offset = 0; } - if (stream->buf.out_pos == stream->buf.out_size - && page < pages) { - stream->buf.out = buffer[page++]; - stream->buf.out_pos = 0; - total += PAGE_CACHE_SIZE; + if (stream->buf.out_pos == stream->buf.out_size) { + stream->buf.out = squashfs_next_page(output); + if (stream->buf.out != NULL) { + stream->buf.out_pos = 0; + total += PAGE_CACHE_SIZE; + } } xz_err = xz_dec_run(stream->state, &stream->buf); @@ -166,6 +168,8 @@ static int squashfs_xz_uncompress(struct squashfs_sb_info *msblk, void *strm, put_bh(bh[k++]); } while (xz_err == XZ_OK); + squashfs_finish_page(output); + if (xz_err != XZ_STREAM_END || k < b) goto out; diff --git a/fs/squashfs/zlib_wrapper.c b/fs/squashfs/zlib_wrapper.c index bb049027d15c..8727caba6882 100644 --- a/fs/squashfs/zlib_wrapper.c +++ b/fs/squashfs/zlib_wrapper.c @@ -32,6 +32,7 @@ #include "squashfs_fs_sb.h" #include "squashfs.h" #include "decompressor.h" +#include "page_actor.h" static void *zlib_init(struct squashfs_sb_info *dummy, void *buff) { @@ -62,14 +63,14 @@ static void zlib_free(void *strm) static int zlib_uncompress(struct squashfs_sb_info *msblk, void *strm, - void **buffer, struct buffer_head **bh, int b, int offset, int length, - int srclength, int pages) + struct buffer_head **bh, int b, int offset, int length, + struct squashfs_page_actor *output) { - int zlib_err, zlib_init = 0; - int k = 0, page = 0; + int zlib_err, zlib_init = 0, k = 0; z_stream *stream = strm; - stream->avail_out = 0; + stream->avail_out = PAGE_CACHE_SIZE; + stream->next_out = squashfs_first_page(output); stream->avail_in = 0; do { @@ -81,15 +82,18 @@ static int zlib_uncompress(struct squashfs_sb_info *msblk, void *strm, offset = 0; } - if (stream->avail_out == 0 && page < pages) { - stream->next_out = buffer[page++]; - stream->avail_out = PAGE_CACHE_SIZE; + if (stream->avail_out == 0) { + stream->next_out = squashfs_next_page(output); + if (stream->next_out != NULL) + stream->avail_out = PAGE_CACHE_SIZE; } if (!zlib_init) { zlib_err = zlib_inflateInit(stream); - if (zlib_err != Z_OK) + if (zlib_err != Z_OK) { + squashfs_finish_page(output); goto out; + } zlib_init = 1; } @@ -99,6 +103,8 @@ static int zlib_uncompress(struct squashfs_sb_info *msblk, void *strm, put_bh(bh[k++]); } while (zlib_err == Z_OK); + squashfs_finish_page(output); + if (zlib_err != Z_STREAM_END) goto out; From 4ff4dc4d1508e64cb642dc1d1380085af47d1d6e Mon Sep 17 00:00:00 2001 From: Phillip Lougher Date: Thu, 31 Oct 2013 19:24:27 +0000 Subject: [PATCH 057/204] Squashfs: Restructure squashfs_readpage() Restructure squashfs_readpage() splitting it into separate functions for datablocks, fragments and sparse blocks. Move the memcpying (from squashfs cache entry) implementation of squashfs_readpage_block into file_cache.c This allows different implementations to be supported. Signed-off-by: Phillip Lougher Reviewed-by: Minchan Kim --- fs/squashfs/Makefile | 2 +- fs/squashfs/file.c | 142 ++++++++++++++++++++------------------- fs/squashfs/file_cache.c | 38 +++++++++++ fs/squashfs/squashfs.h | 7 ++ 4 files changed, 118 insertions(+), 71 deletions(-) create mode 100644 fs/squashfs/file_cache.c diff --git a/fs/squashfs/Makefile b/fs/squashfs/Makefile index 5833b96ee69c..e01ba1126c89 100644 --- a/fs/squashfs/Makefile +++ b/fs/squashfs/Makefile @@ -4,7 +4,7 @@ obj-$(CONFIG_SQUASHFS) += squashfs.o squashfs-y += block.o cache.o dir.o export.o file.o fragment.o id.o inode.o -squashfs-y += namei.o super.o symlink.o decompressor.o +squashfs-y += namei.o super.o symlink.o decompressor.o file_cache.o squashfs-$(CONFIG_SQUASHFS_DECOMP_SINGLE) += decompressor_single.o squashfs-$(CONFIG_SQUASHFS_DECOMP_MULTI) += decompressor_multi.o squashfs-$(CONFIG_SQUASHFS_DECOMP_MULTI_PERCPU) += decompressor_multi_percpu.o diff --git a/fs/squashfs/file.c b/fs/squashfs/file.c index 8ca62c28fe12..e5c9689062ba 100644 --- a/fs/squashfs/file.c +++ b/fs/squashfs/file.c @@ -370,77 +370,15 @@ static int read_blocklist(struct inode *inode, int index, u64 *block) return le32_to_cpu(size); } - -static int squashfs_readpage(struct file *file, struct page *page) +/* Copy data into page cache */ +void squashfs_copy_cache(struct page *page, struct squashfs_cache_entry *buffer, + int bytes, int offset) { struct inode *inode = page->mapping->host; struct squashfs_sb_info *msblk = inode->i_sb->s_fs_info; - int bytes, i, offset = 0, sparse = 0; - struct squashfs_cache_entry *buffer = NULL; void *pageaddr; - - int mask = (1 << (msblk->block_log - PAGE_CACHE_SHIFT)) - 1; - int index = page->index >> (msblk->block_log - PAGE_CACHE_SHIFT); - int start_index = page->index & ~mask; - int end_index = start_index | mask; - int file_end = i_size_read(inode) >> msblk->block_log; - - TRACE("Entered squashfs_readpage, page index %lx, start block %llx\n", - page->index, squashfs_i(inode)->start); - - if (page->index >= ((i_size_read(inode) + PAGE_CACHE_SIZE - 1) >> - PAGE_CACHE_SHIFT)) - goto out; - - if (index < file_end || squashfs_i(inode)->fragment_block == - SQUASHFS_INVALID_BLK) { - /* - * Reading a datablock from disk. Need to read block list - * to get location and block size. - */ - u64 block = 0; - int bsize = read_blocklist(inode, index, &block); - if (bsize < 0) - goto error_out; - - if (bsize == 0) { /* hole */ - bytes = index == file_end ? - (i_size_read(inode) & (msblk->block_size - 1)) : - msblk->block_size; - sparse = 1; - } else { - /* - * Read and decompress datablock. - */ - buffer = squashfs_get_datablock(inode->i_sb, - block, bsize); - if (buffer->error) { - ERROR("Unable to read page, block %llx, size %x" - "\n", block, bsize); - squashfs_cache_put(buffer); - goto error_out; - } - bytes = buffer->length; - } - } else { - /* - * Datablock is stored inside a fragment (tail-end packed - * block). - */ - buffer = squashfs_get_fragment(inode->i_sb, - squashfs_i(inode)->fragment_block, - squashfs_i(inode)->fragment_size); - - if (buffer->error) { - ERROR("Unable to read page, block %llx, size %x\n", - squashfs_i(inode)->fragment_block, - squashfs_i(inode)->fragment_size); - squashfs_cache_put(buffer); - goto error_out; - } - bytes = i_size_read(inode) & (msblk->block_size - 1); - offset = squashfs_i(inode)->fragment_offset; - } + int i, mask = (1 << (msblk->block_log - PAGE_CACHE_SHIFT)) - 1; + int start_index = page->index & ~mask, end_index = start_index | mask; /* * Loop copying datablock into pages. As the datablock likely covers @@ -451,7 +389,7 @@ static int squashfs_readpage(struct file *file, struct page *page) for (i = start_index; i <= end_index && bytes > 0; i++, bytes -= PAGE_CACHE_SIZE, offset += PAGE_CACHE_SIZE) { struct page *push_page; - int avail = sparse ? 0 : min_t(int, bytes, PAGE_CACHE_SIZE); + int avail = buffer ? min_t(int, bytes, PAGE_CACHE_SIZE) : 0; TRACE("bytes %d, i %d, available_bytes %d\n", bytes, i, avail); @@ -475,11 +413,75 @@ static int squashfs_readpage(struct file *file, struct page *page) if (i != page->index) page_cache_release(push_page); } +} - if (!sparse) - squashfs_cache_put(buffer); +/* Read datablock stored packed inside a fragment (tail-end packed block) */ +static int squashfs_readpage_fragment(struct page *page) +{ + struct inode *inode = page->mapping->host; + struct squashfs_sb_info *msblk = inode->i_sb->s_fs_info; + struct squashfs_cache_entry *buffer = squashfs_get_fragment(inode->i_sb, + squashfs_i(inode)->fragment_block, + squashfs_i(inode)->fragment_size); + int res = buffer->error; + if (res) + ERROR("Unable to read page, block %llx, size %x\n", + squashfs_i(inode)->fragment_block, + squashfs_i(inode)->fragment_size); + else + squashfs_copy_cache(page, buffer, i_size_read(inode) & + (msblk->block_size - 1), + squashfs_i(inode)->fragment_offset); + + squashfs_cache_put(buffer); + return res; +} + +static int squashfs_readpage_sparse(struct page *page, int index, int file_end) +{ + struct inode *inode = page->mapping->host; + struct squashfs_sb_info *msblk = inode->i_sb->s_fs_info; + int bytes = index == file_end ? + (i_size_read(inode) & (msblk->block_size - 1)) : + msblk->block_size; + + squashfs_copy_cache(page, NULL, bytes, 0); return 0; +} + +static int squashfs_readpage(struct file *file, struct page *page) +{ + struct inode *inode = page->mapping->host; + struct squashfs_sb_info *msblk = inode->i_sb->s_fs_info; + int index = page->index >> (msblk->block_log - PAGE_CACHE_SHIFT); + int file_end = i_size_read(inode) >> msblk->block_log; + int res; + void *pageaddr; + + TRACE("Entered squashfs_readpage, page index %lx, start block %llx\n", + page->index, squashfs_i(inode)->start); + + if (page->index >= ((i_size_read(inode) + PAGE_CACHE_SIZE - 1) >> + PAGE_CACHE_SHIFT)) + goto out; + + if (index < file_end || squashfs_i(inode)->fragment_block == + SQUASHFS_INVALID_BLK) { + u64 block = 0; + int bsize = read_blocklist(inode, index, &block); + if (bsize < 0) + goto error_out; + + if (bsize == 0) + res = squashfs_readpage_sparse(page, index, file_end); + else + res = squashfs_readpage_block(page, block, bsize); + } else + res = squashfs_readpage_fragment(page); + + if (!res) + return 0; error_out: SetPageError(page); diff --git a/fs/squashfs/file_cache.c b/fs/squashfs/file_cache.c new file mode 100644 index 000000000000..f2310d2a2019 --- /dev/null +++ b/fs/squashfs/file_cache.c @@ -0,0 +1,38 @@ +/* + * Copyright (c) 2013 + * Phillip Lougher + * + * This work is licensed under the terms of the GNU GPL, version 2. See + * the COPYING file in the top-level directory. + */ + +#include +#include +#include +#include +#include +#include +#include + +#include "squashfs_fs.h" +#include "squashfs_fs_sb.h" +#include "squashfs_fs_i.h" +#include "squashfs.h" + +/* Read separately compressed datablock and memcopy into page cache */ +int squashfs_readpage_block(struct page *page, u64 block, int bsize) +{ + struct inode *i = page->mapping->host; + struct squashfs_cache_entry *buffer = squashfs_get_datablock(i->i_sb, + block, bsize); + int res = buffer->error; + + if (res) + ERROR("Unable to read page, block %llx, size %x\n", block, + bsize); + else + squashfs_copy_cache(page, buffer, buffer->length, 0); + + squashfs_cache_put(buffer); + return res; +} diff --git a/fs/squashfs/squashfs.h b/fs/squashfs/squashfs.h index 6a97e63ca173..9e1bb79f7e6f 100644 --- a/fs/squashfs/squashfs.h +++ b/fs/squashfs/squashfs.h @@ -66,6 +66,13 @@ extern int squashfs_frag_lookup(struct super_block *, unsigned int, u64 *); extern __le64 *squashfs_read_fragment_index_table(struct super_block *, u64, u64, unsigned int); +/* file.c */ +void squashfs_copy_cache(struct page *, struct squashfs_cache_entry *, int, + int); + +/* file_xxx.c */ +extern int squashfs_readpage_block(struct page *, u64, int); + /* id.c */ extern int squashfs_get_id(struct super_block *, unsigned int, unsigned int *); extern __le64 *squashfs_read_id_index_table(struct super_block *, u64, u64, From a7b16c939e7759d65246b5363aeca1fac5999b62 Mon Sep 17 00:00:00 2001 From: Phillip Lougher Date: Wed, 13 Nov 2013 02:04:19 +0000 Subject: [PATCH 058/204] Squashfs: Directly decompress into the page cache for file data This introduces an implementation of squashfs_readpage_block() that directly decompresses into the page cache. This uses the previously added page handler abstraction to push down the necessary kmap_atomic/kunmap_atomic operations on the page cache buffers into the decompressors. This enables direct copying into the page cache without using the slow kmap/kunmap calls. The code detects when multiple threads are racing in squashfs_readpage() to decompress the same block, and avoids this regression by falling back to using an intermediate buffer. This patch enhances the performance of Squashfs significantly when multiple processes are accessing the filesystem simultaneously because it not only reduces memcopying, but it more importantly eliminates the lock contention on the intermediate buffer. Using single-thread decompression. dd if=file1 of=/dev/null bs=4096 & dd if=file2 of=/dev/null bs=4096 & dd if=file3 of=/dev/null bs=4096 & dd if=file4 of=/dev/null bs=4096 Before: 629145600 bytes (629 MB) copied, 45.8046 s, 13.7 MB/s After: 629145600 bytes (629 MB) copied, 9.29414 s, 67.7 MB/s Signed-off-by: Phillip Lougher Reviewed-by: Minchan Kim --- fs/squashfs/Kconfig | 28 ++++++ fs/squashfs/Makefile | 4 +- fs/squashfs/file_direct.c | 173 ++++++++++++++++++++++++++++++++++++++ fs/squashfs/page_actor.c | 100 ++++++++++++++++++++++ fs/squashfs/page_actor.h | 32 +++++++ 5 files changed, 336 insertions(+), 1 deletion(-) create mode 100644 fs/squashfs/file_direct.c create mode 100644 fs/squashfs/page_actor.c diff --git a/fs/squashfs/Kconfig b/fs/squashfs/Kconfig index 159bd6676dc2..b6fa8657dcbc 100644 --- a/fs/squashfs/Kconfig +++ b/fs/squashfs/Kconfig @@ -25,6 +25,34 @@ config SQUASHFS If unsure, say N. +choice + prompt "File decompression options" + depends on SQUASHFS + help + Squashfs now supports two options for decompressing file + data. Traditionally Squashfs has decompressed into an + intermediate buffer and then memcopied it into the page cache. + Squashfs now supports the ability to decompress directly into + the page cache. + + If unsure, select "Decompress file data into an intermediate buffer" + +config SQUASHFS_FILE_CACHE + bool "Decompress file data into an intermediate buffer" + help + Decompress file data into an intermediate buffer and then + memcopy it into the page cache. + +config SQUASHFS_FILE_DIRECT + bool "Decompress files directly into the page cache" + help + Directly decompress file data into the page cache. + Doing so can significantly improve performance because + it eliminates a memcpy and it also removes the lock contention + on the single buffer. + +endchoice + choice prompt "Decompressor parallelisation options" depends on SQUASHFS diff --git a/fs/squashfs/Makefile b/fs/squashfs/Makefile index e01ba1126c89..4132520b4ff2 100644 --- a/fs/squashfs/Makefile +++ b/fs/squashfs/Makefile @@ -4,7 +4,9 @@ obj-$(CONFIG_SQUASHFS) += squashfs.o squashfs-y += block.o cache.o dir.o export.o file.o fragment.o id.o inode.o -squashfs-y += namei.o super.o symlink.o decompressor.o file_cache.o +squashfs-y += namei.o super.o symlink.o decompressor.o +squashfs-$(CONFIG_SQUASHFS_FILE_CACHE) += file_cache.o +squashfs-$(CONFIG_SQUASHFS_FILE_DIRECT) += file_direct.o page_actor.o squashfs-$(CONFIG_SQUASHFS_DECOMP_SINGLE) += decompressor_single.o squashfs-$(CONFIG_SQUASHFS_DECOMP_MULTI) += decompressor_multi.o squashfs-$(CONFIG_SQUASHFS_DECOMP_MULTI_PERCPU) += decompressor_multi_percpu.o diff --git a/fs/squashfs/file_direct.c b/fs/squashfs/file_direct.c new file mode 100644 index 000000000000..2943b2bfae48 --- /dev/null +++ b/fs/squashfs/file_direct.c @@ -0,0 +1,173 @@ +/* + * Copyright (c) 2013 + * Phillip Lougher + * + * This work is licensed under the terms of the GNU GPL, version 2. See + * the COPYING file in the top-level directory. + */ + +#include +#include +#include +#include +#include +#include +#include + +#include "squashfs_fs.h" +#include "squashfs_fs_sb.h" +#include "squashfs_fs_i.h" +#include "squashfs.h" +#include "page_actor.h" + +static int squashfs_read_cache(struct page *target_page, u64 block, int bsize, + int pages, struct page **page); + +/* Read separately compressed datablock directly into page cache */ +int squashfs_readpage_block(struct page *target_page, u64 block, int bsize) + +{ + struct inode *inode = target_page->mapping->host; + struct squashfs_sb_info *msblk = inode->i_sb->s_fs_info; + + int file_end = (i_size_read(inode) - 1) >> PAGE_CACHE_SHIFT; + int mask = (1 << (msblk->block_log - PAGE_CACHE_SHIFT)) - 1; + int start_index = target_page->index & ~mask; + int end_index = start_index | mask; + int i, n, pages, missing_pages, bytes, res = -ENOMEM; + struct page **page; + struct squashfs_page_actor *actor; + void *pageaddr; + + if (end_index > file_end) + end_index = file_end; + + pages = end_index - start_index + 1; + + page = kmalloc(sizeof(void *) * pages, GFP_KERNEL); + if (page == NULL) + return res; + + /* + * Create a "page actor" which will kmap and kunmap the + * page cache pages appropriately within the decompressor + */ + actor = squashfs_page_actor_init_special(page, pages, 0); + if (actor == NULL) + goto out; + + /* Try to grab all the pages covered by the Squashfs block */ + for (missing_pages = 0, i = 0, n = start_index; i < pages; i++, n++) { + page[i] = (n == target_page->index) ? target_page : + grab_cache_page_nowait(target_page->mapping, n); + + if (page[i] == NULL) { + missing_pages++; + continue; + } + + if (PageUptodate(page[i])) { + unlock_page(page[i]); + page_cache_release(page[i]); + page[i] = NULL; + missing_pages++; + } + } + + if (missing_pages) { + /* + * Couldn't get one or more pages, this page has either + * been VM reclaimed, but others are still in the page cache + * and uptodate, or we're racing with another thread in + * squashfs_readpage also trying to grab them. Fall back to + * using an intermediate buffer. + */ + res = squashfs_read_cache(target_page, block, bsize, pages, + page); + goto out; + } + + /* Decompress directly into the page cache buffers */ + res = squashfs_read_data(inode->i_sb, block, bsize, NULL, actor); + if (res < 0) + goto mark_errored; + + /* Last page may have trailing bytes not filled */ + bytes = res % PAGE_CACHE_SIZE; + if (bytes) { + pageaddr = kmap_atomic(page[pages - 1]); + memset(pageaddr + bytes, 0, PAGE_CACHE_SIZE - bytes); + kunmap_atomic(pageaddr); + } + + /* Mark pages as uptodate, unlock and release */ + for (i = 0; i < pages; i++) { + flush_dcache_page(page[i]); + SetPageUptodate(page[i]); + unlock_page(page[i]); + if (page[i] != target_page) + page_cache_release(page[i]); + } + + kfree(actor); + kfree(page); + + return 0; + +mark_errored: + /* Decompression failed, mark pages as errored. Target_page is + * dealt with by the caller + */ + for (i = 0; i < pages; i++) { + if (page[i] == target_page) + continue; + flush_dcache_page(page[i]); + SetPageError(page[i]); + unlock_page(page[i]); + page_cache_release(page[i]); + } + +out: + kfree(actor); + kfree(page); + return res; +} + + +static int squashfs_read_cache(struct page *target_page, u64 block, int bsize, + int pages, struct page **page) +{ + struct inode *i = target_page->mapping->host; + struct squashfs_cache_entry *buffer = squashfs_get_datablock(i->i_sb, + block, bsize); + int bytes = buffer->length, res = buffer->error, n, offset = 0; + void *pageaddr; + + if (res) { + ERROR("Unable to read page, block %llx, size %x\n", block, + bsize); + goto out; + } + + for (n = 0; n < pages && bytes > 0; n++, + bytes -= PAGE_CACHE_SIZE, offset += PAGE_CACHE_SIZE) { + int avail = min_t(int, bytes, PAGE_CACHE_SIZE); + + if (page[n] == NULL) + continue; + + pageaddr = kmap_atomic(page[n]); + squashfs_copy_data(pageaddr, buffer, offset, avail); + memset(pageaddr + avail, 0, PAGE_CACHE_SIZE - avail); + kunmap_atomic(pageaddr); + flush_dcache_page(page[n]); + SetPageUptodate(page[n]); + unlock_page(page[n]); + if (page[n] != target_page) + page_cache_release(page[n]); + } + +out: + squashfs_cache_put(buffer); + return res; +} diff --git a/fs/squashfs/page_actor.c b/fs/squashfs/page_actor.c new file mode 100644 index 000000000000..5a1c11f56441 --- /dev/null +++ b/fs/squashfs/page_actor.c @@ -0,0 +1,100 @@ +/* + * Copyright (c) 2013 + * Phillip Lougher + * + * This work is licensed under the terms of the GNU GPL, version 2. See + * the COPYING file in the top-level directory. + */ + +#include +#include +#include +#include "page_actor.h" + +/* + * This file contains implementations of page_actor for decompressing into + * an intermediate buffer, and for decompressing directly into the + * page cache. + * + * Calling code should avoid sleeping between calls to squashfs_first_page() + * and squashfs_finish_page(). + */ + +/* Implementation of page_actor for decompressing into intermediate buffer */ +static void *cache_first_page(struct squashfs_page_actor *actor) +{ + actor->next_page = 1; + return actor->buffer[0]; +} + +static void *cache_next_page(struct squashfs_page_actor *actor) +{ + if (actor->next_page == actor->pages) + return NULL; + + return actor->buffer[actor->next_page++]; +} + +static void cache_finish_page(struct squashfs_page_actor *actor) +{ + /* empty */ +} + +struct squashfs_page_actor *squashfs_page_actor_init(void **buffer, + int pages, int length) +{ + struct squashfs_page_actor *actor = kmalloc(sizeof(*actor), GFP_KERNEL); + + if (actor == NULL) + return NULL; + + actor->length = length ? : pages * PAGE_CACHE_SIZE; + actor->buffer = buffer; + actor->pages = pages; + actor->next_page = 0; + actor->squashfs_first_page = cache_first_page; + actor->squashfs_next_page = cache_next_page; + actor->squashfs_finish_page = cache_finish_page; + return actor; +} + +/* Implementation of page_actor for decompressing directly into page cache. */ +static void *direct_first_page(struct squashfs_page_actor *actor) +{ + actor->next_page = 1; + return actor->pageaddr = kmap_atomic(actor->page[0]); +} + +static void *direct_next_page(struct squashfs_page_actor *actor) +{ + if (actor->pageaddr) + kunmap_atomic(actor->pageaddr); + + return actor->pageaddr = actor->next_page == actor->pages ? NULL : + kmap_atomic(actor->page[actor->next_page++]); +} + +static void direct_finish_page(struct squashfs_page_actor *actor) +{ + if (actor->pageaddr) + kunmap_atomic(actor->pageaddr); +} + +struct squashfs_page_actor *squashfs_page_actor_init_special(struct page **page, + int pages, int length) +{ + struct squashfs_page_actor *actor = kmalloc(sizeof(*actor), GFP_KERNEL); + + if (actor == NULL) + return NULL; + + actor->length = length ? : pages * PAGE_CACHE_SIZE; + actor->page = page; + actor->pages = pages; + actor->next_page = 0; + actor->pageaddr = NULL; + actor->squashfs_first_page = direct_first_page; + actor->squashfs_next_page = direct_next_page; + actor->squashfs_finish_page = direct_finish_page; + return actor; +} diff --git a/fs/squashfs/page_actor.h b/fs/squashfs/page_actor.h index 5b0ba5a7133a..26dd82008b82 100644 --- a/fs/squashfs/page_actor.h +++ b/fs/squashfs/page_actor.h @@ -8,6 +8,7 @@ * the COPYING file in the top-level directory. */ +#ifndef CONFIG_SQUASHFS_FILE_DIRECT struct squashfs_page_actor { void **page; int pages; @@ -46,4 +47,35 @@ static inline void squashfs_finish_page(struct squashfs_page_actor *actor) { /* empty */ } +#else +struct squashfs_page_actor { + union { + void **buffer; + struct page **page; + }; + void *pageaddr; + void *(*squashfs_first_page)(struct squashfs_page_actor *); + void *(*squashfs_next_page)(struct squashfs_page_actor *); + void (*squashfs_finish_page)(struct squashfs_page_actor *); + int pages; + int length; + int next_page; +}; + +extern struct squashfs_page_actor *squashfs_page_actor_init(void **, int, int); +extern struct squashfs_page_actor *squashfs_page_actor_init_special(struct page + **, int, int); +static inline void *squashfs_first_page(struct squashfs_page_actor *actor) +{ + return actor->squashfs_first_page(actor); +} +static inline void *squashfs_next_page(struct squashfs_page_actor *actor) +{ + return actor->squashfs_next_page(actor); +} +static inline void squashfs_finish_page(struct squashfs_page_actor *actor) +{ + actor->squashfs_finish_page(actor); +} +#endif #endif From 6a1e8fd1e4dc7c53d67f4091bea0cfd2139901d0 Mon Sep 17 00:00:00 2001 From: Phillip Lougher Date: Sun, 10 Nov 2013 00:02:29 +0000 Subject: [PATCH 059/204] Squashfs: Check stream is not NULL in decompressor_multi.c Fix static checker complaint that stream is not checked in squashfs_decompressor_destroy(). Reported-by: Dan Carpenter Signed-off-by: Phillip Lougher Reviewed-by: Minchan Kim --- fs/squashfs/decompressor_multi.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/fs/squashfs/decompressor_multi.c b/fs/squashfs/decompressor_multi.c index ae54675a3526..d6008a636479 100644 --- a/fs/squashfs/decompressor_multi.c +++ b/fs/squashfs/decompressor_multi.c @@ -119,11 +119,10 @@ void squashfs_decompressor_destroy(struct squashfs_sb_info *msblk) kfree(decomp_strm); stream->avail_decomp--; } + WARN_ON(stream->avail_decomp); + kfree(stream->comp_opts); + kfree(stream); } - - WARN_ON(stream->avail_decomp); - kfree(stream->comp_opts); - kfree(stream); } From 3a2109763d60cf4d0d92f7ae3c6e2c268fb7de04 Mon Sep 17 00:00:00 2001 From: Phillip Lougher Date: Sun, 24 Nov 2013 00:40:49 +0000 Subject: [PATCH 060/204] Squashfs: fix failure to unlock pages on decompress error Direct decompression into the page cache. If we fall back to using an intermediate buffer (because we cannot grab all the page cache pages) and we get a decompress fail, we forgot to release the pages. Reported-by: Roman Peniaev Signed-off-by: Phillip Lougher --- fs/squashfs/file_direct.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/fs/squashfs/file_direct.c b/fs/squashfs/file_direct.c index 2943b2bfae48..62a0de6632e1 100644 --- a/fs/squashfs/file_direct.c +++ b/fs/squashfs/file_direct.c @@ -84,6 +84,9 @@ int squashfs_readpage_block(struct page *target_page, u64 block, int bsize) */ res = squashfs_read_cache(target_page, block, bsize, pages, page); + if (res < 0) + goto mark_errored; + goto out; } @@ -119,7 +122,7 @@ int squashfs_readpage_block(struct page *target_page, u64 block, int bsize) * dealt with by the caller */ for (i = 0; i < pages; i++) { - if (page[i] == target_page) + if (page[i] == NULL || page[i] == target_page) continue; flush_dcache_page(page[i]); SetPageError(page[i]); From e57e5b786fcfe069afdfa8ae3abb1287ffe3d687 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Thu, 13 Mar 2014 10:14:33 -0400 Subject: [PATCH 061/204] fs: push sync_filesystem() down to the file system's remount_fs() Previously, the no-op "mount -o mount /dev/xxx" operation when the file system is already mounted read-write causes an implied, unconditional syncfs(). This seems pretty stupid, and it's certainly documented or guaraunteed to do this, nor is it particularly useful, except in the case where the file system was mounted rw and is getting remounted read-only. However, it's possible that there might be some file systems that are actually depending on this behavior. In most file systems, it's probably fine to only call sync_filesystem() when transitioning from read-write to read-only, and there are some file systems where this is not needed at all (for example, for a pseudo-filesystem or something like romfs). Signed-off-by: "Theodore Ts'o" Cc: linux-fsdevel@vger.kernel.org Cc: Christoph Hellwig Cc: Artem Bityutskiy Cc: Adrian Hunter Cc: Evgeniy Dushistov Cc: Jan Kara Cc: OGAWA Hirofumi Cc: Anders Larsen Cc: Phillip Lougher Cc: Kees Cook Cc: Mikulas Patocka Cc: Petr Vandrovec Cc: xfs@oss.sgi.com Cc: linux-btrfs@vger.kernel.org Cc: linux-cifs@vger.kernel.org Cc: samba-technical@lists.samba.org Cc: codalist@coda.cs.cmu.edu Cc: linux-ext4@vger.kernel.org Cc: linux-f2fs-devel@lists.sourceforge.net Cc: fuse-devel@lists.sourceforge.net Cc: cluster-devel@redhat.com Cc: linux-mtd@lists.infradead.org Cc: jfs-discussion@lists.sourceforge.net Cc: linux-nfs@vger.kernel.org Cc: linux-nilfs@vger.kernel.org Cc: linux-ntfs-dev@lists.sourceforge.net Cc: ocfs2-devel@oss.oracle.com Cc: reiserfs-devel@vger.kernel.org --- fs/adfs/super.c | 1 + fs/affs/super.c | 1 + fs/befs/linuxvfs.c | 1 + fs/btrfs/super.c | 1 + fs/cifs/cifsfs.c | 1 + fs/coda/inode.c | 1 + fs/cramfs/inode.c | 1 + fs/debugfs/inode.c | 1 + fs/devpts/inode.c | 1 + fs/efs/super.c | 1 + fs/ext2/super.c | 1 + fs/ext3/super.c | 2 ++ fs/ext4/super.c | 2 ++ fs/f2fs/super.c | 1 - fs/fat/inode.c | 2 ++ fs/freevxfs/vxfs_super.c | 1 + fs/fuse/inode.c | 1 + fs/gfs2/super.c | 2 ++ fs/hfs/super.c | 1 + fs/hfsplus/super.c | 1 + fs/hpfs/super.c | 2 ++ fs/jffs2/super.c | 1 + fs/jfs/super.c | 1 + fs/minix/inode.c | 1 + fs/ncpfs/inode.c | 1 + fs/nfs/super.c | 2 ++ fs/nilfs2/super.c | 1 + fs/ntfs/super.c | 2 ++ fs/ocfs2/super.c | 2 ++ fs/openpromfs/inode.c | 1 + fs/proc/root.c | 2 ++ fs/pstore/inode.c | 1 + fs/qnx4/inode.c | 1 + fs/qnx6/inode.c | 1 + fs/reiserfs/super.c | 1 + fs/romfs/super.c | 1 + fs/squashfs/super.c | 1 + fs/sysv/inode.c | 1 + fs/ubifs/super.c | 1 + fs/udf/super.c | 1 + fs/ufs/super.c | 1 + fs/xfs/xfs_super.c | 1 + 42 files changed, 50 insertions(+), 1 deletion(-) diff --git a/fs/adfs/super.c b/fs/adfs/super.c index 0ff4bae2c2a2..479ef5a9c5d3 100644 --- a/fs/adfs/super.c +++ b/fs/adfs/super.c @@ -213,6 +213,7 @@ static int parse_options(struct super_block *sb, char *options) static int adfs_remount(struct super_block *sb, int *flags, char *data) { + sync_filesystem(sb); *flags |= MS_NODIRATIME; return parse_options(sb, data); } diff --git a/fs/affs/super.c b/fs/affs/super.c index 45161a832bbc..01c6e07acbff 100644 --- a/fs/affs/super.c +++ b/fs/affs/super.c @@ -549,6 +549,7 @@ affs_remount(struct super_block *sb, int *flags, char *data) pr_debug("AFFS: remount(flags=0x%x,opts=\"%s\")\n",*flags,data); + sync_filesystem(sb); *flags |= MS_NODIRATIME; memcpy(volume, sbi->s_volume, 32); diff --git a/fs/befs/linuxvfs.c b/fs/befs/linuxvfs.c index f95dddced968..7192a7e99373 100644 --- a/fs/befs/linuxvfs.c +++ b/fs/befs/linuxvfs.c @@ -908,6 +908,7 @@ befs_fill_super(struct super_block *sb, void *data, int silent) static int befs_remount(struct super_block *sb, int *flags, char *data) { + sync_filesystem(sb); if (!(*flags & MS_RDONLY)) return -EINVAL; return 0; diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index f0857e092a3c..461731c7fa3a 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -1239,6 +1239,7 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data) unsigned int old_metadata_ratio = fs_info->metadata_ratio; int ret; + sync_filesystem(sb); btrfs_remount_prepare(fs_info); ret = btrfs_parse_options(root, data); diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index 3752b9f6d9e4..0d7daf714c4b 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c @@ -521,6 +521,7 @@ static int cifs_show_stats(struct seq_file *s, struct dentry *root) static int cifs_remount(struct super_block *sb, int *flags, char *data) { + sync_filesystem(sb); *flags |= MS_NODIRATIME; return 0; } diff --git a/fs/coda/inode.c b/fs/coda/inode.c index 4dcc0d81a7aa..0aa4c4d75ec6 100644 --- a/fs/coda/inode.c +++ b/fs/coda/inode.c @@ -96,6 +96,7 @@ void coda_destroy_inodecache(void) static int coda_remount(struct super_block *sb, int *flags, char *data) { + sync_filesystem(sb); *flags |= MS_NOATIME; return 0; } diff --git a/fs/cramfs/inode.c b/fs/cramfs/inode.c index 35b1c7bd18b7..c0148585670d 100644 --- a/fs/cramfs/inode.c +++ b/fs/cramfs/inode.c @@ -227,6 +227,7 @@ static void cramfs_put_super(struct super_block *sb) static int cramfs_remount(struct super_block *sb, int *flags, char *data) { + sync_filesystem(sb); *flags |= MS_RDONLY; return 0; } diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c index 4888cb3fdef7..60ef5f96ae3d 100644 --- a/fs/debugfs/inode.c +++ b/fs/debugfs/inode.c @@ -218,6 +218,7 @@ static int debugfs_remount(struct super_block *sb, int *flags, char *data) int err; struct debugfs_fs_info *fsi = sb->s_fs_info; + sync_filesystem(sb); err = debugfs_parse_options(data, &fsi->mount_opts); if (err) goto fail; diff --git a/fs/devpts/inode.c b/fs/devpts/inode.c index 073d30b9d1ac..5638ff720f66 100644 --- a/fs/devpts/inode.c +++ b/fs/devpts/inode.c @@ -313,6 +313,7 @@ static int devpts_remount(struct super_block *sb, int *flags, char *data) struct pts_fs_info *fsi = DEVPTS_SB(sb); struct pts_mount_opts *opts = &fsi->mount_opts; + sync_filesystem(sb); err = parse_mount_options(data, PARSE_REMOUNT, opts); /* diff --git a/fs/efs/super.c b/fs/efs/super.c index c6f57a74a559..4709692cd7a0 100644 --- a/fs/efs/super.c +++ b/fs/efs/super.c @@ -113,6 +113,7 @@ static void efs_put_super(struct super_block *s) static int efs_remount(struct super_block *sb, int *flags, char *data) { + sync_filesystem(sb); *flags |= MS_RDONLY; return 0; } diff --git a/fs/ext2/super.c b/fs/ext2/super.c index 288534920fe5..bc47f47a99fe 100644 --- a/fs/ext2/super.c +++ b/fs/ext2/super.c @@ -1254,6 +1254,7 @@ static int ext2_remount (struct super_block * sb, int * flags, char * data) unsigned long old_sb_flags; int err; + sync_filesystem(sb); spin_lock(&sbi->s_lock); /* Store the old options */ diff --git a/fs/ext3/super.c b/fs/ext3/super.c index 6356665a74bb..246bd04207bf 100644 --- a/fs/ext3/super.c +++ b/fs/ext3/super.c @@ -2595,6 +2595,8 @@ static int ext3_remount (struct super_block * sb, int * flags, char * data) int i; #endif + sync_filesystem(sb); + /* Store the original options */ old_sb_flags = sb->s_flags; old_opts.s_mount_opt = sbi->s_mount_opt; diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 94cc84db7c9a..48c8af5fe91d 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -4617,6 +4617,8 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data) #endif char *orig_data = kstrdup(data, GFP_KERNEL); + sync_filesystem(sb); + /* Store the original options */ old_sb_flags = sb->s_flags; old_opts.s_mount_opt = sbi->s_mount_opt; diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 8555f7df82c7..03ab8b830940 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -239,7 +239,6 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root) if (test_opt(sbi, DISABLE_EXT_IDENTIFY)) seq_puts(seq, ",disable_ext_identify"); - seq_printf(seq, ",active_logs=%u", sbi->active_logs); return 0; } diff --git a/fs/fat/inode.c b/fs/fat/inode.c index a14dd4c0528a..ebdc6656c067 100644 --- a/fs/fat/inode.c +++ b/fs/fat/inode.c @@ -632,6 +632,8 @@ static int fat_remount(struct super_block *sb, int *flags, char *data) struct msdos_sb_info *sbi = MSDOS_SB(sb); *flags |= MS_NODIRATIME | (sbi->options.isvfat ? 0 : MS_NOATIME); + sync_filesystem(sb); + /* make sure we update state on remount. */ new_rdonly = *flags & MS_RDONLY; if (new_rdonly != (sb->s_flags & MS_RDONLY)) { diff --git a/fs/freevxfs/vxfs_super.c b/fs/freevxfs/vxfs_super.c index e37eb274e492..7ca8c75d50d3 100644 --- a/fs/freevxfs/vxfs_super.c +++ b/fs/freevxfs/vxfs_super.c @@ -124,6 +124,7 @@ vxfs_statfs(struct dentry *dentry, struct kstatfs *bufp) static int vxfs_remount(struct super_block *sb, int *flags, char *data) { + sync_filesystem(sb); *flags |= MS_RDONLY; return 0; } diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index 9a0cdde14a08..a796d1c52a65 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -135,6 +135,7 @@ static void fuse_evict_inode(struct inode *inode) static int fuse_remount_fs(struct super_block *sb, int *flags, char *data) { + sync_filesystem(sb); if (*flags & MS_MANDLOCK) return -EINVAL; diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c index e5639dec66c4..db7fff5a29c1 100644 --- a/fs/gfs2/super.c +++ b/fs/gfs2/super.c @@ -1142,6 +1142,8 @@ static int gfs2_remount_fs(struct super_block *sb, int *flags, char *data) struct gfs2_tune *gt = &sdp->sd_tune; int error; + sync_filesystem(sb); + spin_lock(>->gt_spin); args.ar_commit = gt->gt_logd_secs; args.ar_quota_quantum = gt->gt_quota_quantum; diff --git a/fs/hfs/super.c b/fs/hfs/super.c index 2d2039e754cd..eee7206c38d1 100644 --- a/fs/hfs/super.c +++ b/fs/hfs/super.c @@ -112,6 +112,7 @@ static int hfs_statfs(struct dentry *dentry, struct kstatfs *buf) static int hfs_remount(struct super_block *sb, int *flags, char *data) { + sync_filesystem(sb); *flags |= MS_NODIRATIME; if ((*flags & MS_RDONLY) == (sb->s_flags & MS_RDONLY)) return 0; diff --git a/fs/hfsplus/super.c b/fs/hfsplus/super.c index 4c4d142cf890..1b9414f701e6 100644 --- a/fs/hfsplus/super.c +++ b/fs/hfsplus/super.c @@ -323,6 +323,7 @@ static int hfsplus_statfs(struct dentry *dentry, struct kstatfs *buf) static int hfsplus_remount(struct super_block *sb, int *flags, char *data) { + sync_filesystem(sb); if ((*flags & MS_RDONLY) == (sb->s_flags & MS_RDONLY)) return 0; if (!(*flags & MS_RDONLY)) { diff --git a/fs/hpfs/super.c b/fs/hpfs/super.c index a0617e706957..fc8037639064 100644 --- a/fs/hpfs/super.c +++ b/fs/hpfs/super.c @@ -395,6 +395,8 @@ static int hpfs_remount_fs(struct super_block *s, int *flags, char *data) struct hpfs_sb_info *sbi = hpfs_sb(s); char *new_opts = kstrdup(data, GFP_KERNEL); + sync_filesystem(s); + *flags |= MS_NOATIME; hpfs_lock(s); diff --git a/fs/jffs2/super.c b/fs/jffs2/super.c index 0defb1cc2a35..0918f0e2e266 100644 --- a/fs/jffs2/super.c +++ b/fs/jffs2/super.c @@ -243,6 +243,7 @@ static int jffs2_remount_fs(struct super_block *sb, int *flags, char *data) struct jffs2_sb_info *c = JFFS2_SB_INFO(sb); int err; + sync_filesystem(sb); err = jffs2_parse_options(c, data); if (err) return -EINVAL; diff --git a/fs/jfs/super.c b/fs/jfs/super.c index 788e0a9c1fb0..b7486dafa1cd 100644 --- a/fs/jfs/super.c +++ b/fs/jfs/super.c @@ -413,6 +413,7 @@ static int jfs_remount(struct super_block *sb, int *flags, char *data) int flag = JFS_SBI(sb)->flag; int ret; + sync_filesystem(sb); if (!parse_options(data, sb, &newLVSize, &flag)) { return -EINVAL; } diff --git a/fs/minix/inode.c b/fs/minix/inode.c index df122496f328..a54d08865e59 100644 --- a/fs/minix/inode.c +++ b/fs/minix/inode.c @@ -123,6 +123,7 @@ static int minix_remount (struct super_block * sb, int * flags, char * data) struct minix_sb_info * sbi = minix_sb(sb); struct minix_super_block * ms; + sync_filesystem(sb); ms = sbi->s_ms; if ((*flags & MS_RDONLY) == (sb->s_flags & MS_RDONLY)) return 0; diff --git a/fs/ncpfs/inode.c b/fs/ncpfs/inode.c index 26910c8154da..3f54348b926b 100644 --- a/fs/ncpfs/inode.c +++ b/fs/ncpfs/inode.c @@ -99,6 +99,7 @@ static void destroy_inodecache(void) static int ncp_remount(struct super_block *sb, int *flags, char* data) { + sync_filesystem(sb); *flags |= MS_NODIRATIME; return 0; } diff --git a/fs/nfs/super.c b/fs/nfs/super.c index 2d7525fbcf25..d85c1b819145 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -2133,6 +2133,8 @@ nfs_remount(struct super_block *sb, int *flags, char *raw_data) struct nfs4_mount_data *options4 = (struct nfs4_mount_data *)raw_data; u32 nfsvers = nfss->nfs_client->rpc_ops->version; + sync_filesystem(sb); + /* * Userspace mount programs that send binary options generally send * them populated with default values. We have no way to know which diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c index c7d1f9f18b09..4b0a8d4a8345 100644 --- a/fs/nilfs2/super.c +++ b/fs/nilfs2/super.c @@ -1114,6 +1114,7 @@ static int nilfs_remount(struct super_block *sb, int *flags, char *data) unsigned long old_mount_opt; int err; + sync_filesystem(sb); old_sb_flags = sb->s_flags; old_mount_opt = nilfs->ns_mount_opt; diff --git a/fs/ntfs/super.c b/fs/ntfs/super.c index 82650d52d916..bd5610d48242 100644 --- a/fs/ntfs/super.c +++ b/fs/ntfs/super.c @@ -468,6 +468,8 @@ static int ntfs_remount(struct super_block *sb, int *flags, char *opt) ntfs_debug("Entering with remount options string: %s", opt); + sync_filesystem(sb); + #ifndef NTFS_RW /* For read-only compiled driver, enforce read-only flag. */ *flags |= MS_RDONLY; diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c index 01b85165552b..616bde667d8d 100644 --- a/fs/ocfs2/super.c +++ b/fs/ocfs2/super.c @@ -632,6 +632,8 @@ static int ocfs2_remount(struct super_block *sb, int *flags, char *data) struct ocfs2_super *osb = OCFS2_SB(sb); u32 tmp; + sync_filesystem(sb); + if (!ocfs2_parse_options(sb, data, &parsed_options, 1) || !ocfs2_check_set_options(sb, &parsed_options)) { ret = -EINVAL; diff --git a/fs/openpromfs/inode.c b/fs/openpromfs/inode.c index 75885ffde44e..f4026aba26f3 100644 --- a/fs/openpromfs/inode.c +++ b/fs/openpromfs/inode.c @@ -375,6 +375,7 @@ static struct inode *openprom_iget(struct super_block *sb, ino_t ino) static int openprom_remount(struct super_block *sb, int *flags, char *data) { + sync_filesystem(sb); *flags |= MS_NOATIME; return 0; } diff --git a/fs/proc/root.c b/fs/proc/root.c index 41a6ea93f486..85c5018b5b89 100644 --- a/fs/proc/root.c +++ b/fs/proc/root.c @@ -92,6 +92,8 @@ static int proc_parse_options(char *options, struct pid_namespace *pid) int proc_remount(struct super_block *sb, int *flags, char *data) { struct pid_namespace *pid = sb->s_fs_info; + + sync_filesystem(sb); return !proc_parse_options(data, pid); } diff --git a/fs/pstore/inode.c b/fs/pstore/inode.c index 57bbfeaab663..0a57f914de2e 100644 --- a/fs/pstore/inode.c +++ b/fs/pstore/inode.c @@ -247,6 +247,7 @@ static void parse_options(char *options) static int pstore_remount(struct super_block *sb, int *flags, char *data) { + sync_filesystem(sb); parse_options(data); return 0; diff --git a/fs/qnx4/inode.c b/fs/qnx4/inode.c index 2e8caa62da78..3410e9f657ca 100644 --- a/fs/qnx4/inode.c +++ b/fs/qnx4/inode.c @@ -46,6 +46,7 @@ static int qnx4_remount(struct super_block *sb, int *flags, char *data) { struct qnx4_sb_info *qs; + sync_filesystem(sb); qs = qnx4_sb(sb); qs->Version = QNX4_VERSION; *flags |= MS_RDONLY; diff --git a/fs/qnx6/inode.c b/fs/qnx6/inode.c index 8d941edfefa1..65cdaab3ed49 100644 --- a/fs/qnx6/inode.c +++ b/fs/qnx6/inode.c @@ -55,6 +55,7 @@ static int qnx6_show_options(struct seq_file *seq, struct dentry *root) static int qnx6_remount(struct super_block *sb, int *flags, char *data) { + sync_filesystem(sb); *flags |= MS_RDONLY; return 0; } diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c index f8a23c3078f8..9fc9d56533fe 100644 --- a/fs/reiserfs/super.c +++ b/fs/reiserfs/super.c @@ -1318,6 +1318,7 @@ static int reiserfs_remount(struct super_block *s, int *mount_flags, char *arg) int i; #endif + sync_filesystem(s); reiserfs_write_lock(s); #ifdef CONFIG_QUOTA diff --git a/fs/romfs/super.c b/fs/romfs/super.c index 15cbc41ee365..ae839482c341 100644 --- a/fs/romfs/super.c +++ b/fs/romfs/super.c @@ -435,6 +435,7 @@ static int romfs_statfs(struct dentry *dentry, struct kstatfs *buf) */ static int romfs_remount(struct super_block *sb, int *flags, char *data) { + sync_filesystem(sb); *flags |= MS_RDONLY; return 0; } diff --git a/fs/squashfs/super.c b/fs/squashfs/super.c index 202df6312d4e..031c8d67fd51 100644 --- a/fs/squashfs/super.c +++ b/fs/squashfs/super.c @@ -371,6 +371,7 @@ static int squashfs_statfs(struct dentry *dentry, struct kstatfs *buf) static int squashfs_remount(struct super_block *sb, int *flags, char *data) { + sync_filesystem(sb); *flags |= MS_RDONLY; return 0; } diff --git a/fs/sysv/inode.c b/fs/sysv/inode.c index c327d4ee1235..4742e58f3fc5 100644 --- a/fs/sysv/inode.c +++ b/fs/sysv/inode.c @@ -60,6 +60,7 @@ static int sysv_remount(struct super_block *sb, int *flags, char *data) { struct sysv_sb_info *sbi = SYSV_SB(sb); + sync_filesystem(sb); if (sbi->s_forced_ro) *flags |= MS_RDONLY; return 0; diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c index f21acf0ef01f..0d0036750c71 100644 --- a/fs/ubifs/super.c +++ b/fs/ubifs/super.c @@ -1840,6 +1840,7 @@ static int ubifs_remount_fs(struct super_block *sb, int *flags, char *data) int err; struct ubifs_info *c = sb->s_fs_info; + sync_filesystem(sb); dbg_gen("old flags %#lx, new flags %#x", sb->s_flags, *flags); err = ubifs_parse_options(c, data, 1); diff --git a/fs/udf/super.c b/fs/udf/super.c index 9ac4057a86c9..3b1a4bea332d 100644 --- a/fs/udf/super.c +++ b/fs/udf/super.c @@ -629,6 +629,7 @@ static int udf_remount_fs(struct super_block *sb, int *flags, char *options) struct udf_options uopt; struct udf_sb_info *sbi = UDF_SB(sb); int error = 0; + sync_filesystem(sb); uopt.flags = sbi->s_flags; uopt.uid = sbi->s_uid; diff --git a/fs/ufs/super.c b/fs/ufs/super.c index 329f2f53b7ed..b8c6791f046f 100644 --- a/fs/ufs/super.c +++ b/fs/ufs/super.c @@ -1280,6 +1280,7 @@ static int ufs_remount (struct super_block *sb, int *mount_flags, char *data) unsigned new_mount_opt, ufstype; unsigned flags; + sync_filesystem(sb); lock_ufs(sb); mutex_lock(&UFS_SB(sb)->s_lock); uspi = UFS_SB(sb)->s_uspi; diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c index 3033ba5e9762..478c0ad5e36f 100644 --- a/fs/xfs/xfs_super.c +++ b/fs/xfs/xfs_super.c @@ -1218,6 +1218,7 @@ xfs_fs_remount( char *p; int error; + sync_filesystem(sb); while ((p = strsep(&options, ",")) != NULL) { int token; From d933d04a83dbd77eb7b1fc56e01ca45186cebb24 Mon Sep 17 00:00:00 2001 From: Fabian Frederick Date: Wed, 4 Jun 2014 16:05:52 -0700 Subject: [PATCH 062/204] fs/squashfs/squashfs.h: replace pr_warning by pr_warn Update the last pr_warning callsite in fs branch Signed-off-by: Fabian Frederick Cc: Phillip Lougher Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/squashfs/squashfs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/squashfs/squashfs.h b/fs/squashfs/squashfs.h index 9e1bb79f7e6f..887d6d270080 100644 --- a/fs/squashfs/squashfs.h +++ b/fs/squashfs/squashfs.h @@ -25,7 +25,7 @@ #define ERROR(s, args...) pr_err("SQUASHFS error: "s, ## args) -#define WARNING(s, args...) pr_warning("SQUASHFS: "s, ## args) +#define WARNING(s, args...) pr_warn("SQUASHFS: "s, ## args) /* block.c */ extern int squashfs_read_data(struct super_block *, u64, int, u64 *, From 7fcc243f303f04272db61110e70ed2dfde565378 Mon Sep 17 00:00:00 2001 From: Fabian Frederick Date: Wed, 6 Aug 2014 16:03:50 -0700 Subject: [PATCH 063/204] fs/squashfs/file_direct.c: replace count*size kmalloc by kmalloc_array kmalloc_array() manages count*sizeof overflow. Signed-off-by: Fabian Frederick Cc: Phillip Lougher Cc: Joe Perches Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/squashfs/file_direct.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/squashfs/file_direct.c b/fs/squashfs/file_direct.c index 62a0de6632e1..43e7a7eddac0 100644 --- a/fs/squashfs/file_direct.c +++ b/fs/squashfs/file_direct.c @@ -44,7 +44,7 @@ int squashfs_readpage_block(struct page *target_page, u64 block, int bsize) pages = end_index - start_index + 1; - page = kmalloc(sizeof(void *) * pages, GFP_KERNEL); + page = kmalloc_array(pages, sizeof(void *), GFP_KERNEL); if (page == NULL) return res; From b9281218c2d20a89b50bd6c152c9ec398527deca Mon Sep 17 00:00:00 2001 From: Fabian Frederick Date: Wed, 6 Aug 2014 16:03:52 -0700 Subject: [PATCH 064/204] fs/squashfs/super.c: logging cleanup - Convert printk to pr_foo() - Add pr_fmt for future logging entries - Coalesce formats Signed-off-by: Fabian Frederick Cc: Phillip Lougher Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/squashfs/super.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/fs/squashfs/super.c b/fs/squashfs/super.c index 031c8d67fd51..5056babe00df 100644 --- a/fs/squashfs/super.c +++ b/fs/squashfs/super.c @@ -27,6 +27,8 @@ * the filesystem. */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include #include #include @@ -448,8 +450,7 @@ static int __init init_squashfs_fs(void) return err; } - printk(KERN_INFO "squashfs: version 4.0 (2009/01/31) " - "Phillip Lougher\n"); + pr_info("version 4.0 (2009/01/31) Phillip Lougher\n"); return 0; } From 5c913a40d243d609a087703d9217e69553258866 Mon Sep 17 00:00:00 2001 From: Phillip Lougher Date: Thu, 27 Nov 2014 06:49:14 +0000 Subject: [PATCH 065/204] Squashfs: add LZ4 compression support Add support for reading file systems compressed with the LZ4 compression algorithm. This patch adds the LZ4 decompressor wrapper code. Signed-off-by: Phillip Lougher --- fs/squashfs/lz4_wrapper.c | 142 ++++++++++++++++++++++++++++++++++++++ fs/squashfs/squashfs_fs.h | 1 + 2 files changed, 143 insertions(+) create mode 100644 fs/squashfs/lz4_wrapper.c diff --git a/fs/squashfs/lz4_wrapper.c b/fs/squashfs/lz4_wrapper.c new file mode 100644 index 000000000000..c31e2bc9c081 --- /dev/null +++ b/fs/squashfs/lz4_wrapper.c @@ -0,0 +1,142 @@ +/* + * Copyright (c) 2013, 2014 + * Phillip Lougher + * + * This work is licensed under the terms of the GNU GPL, version 2. See + * the COPYING file in the top-level directory. + */ + +#include +#include +#include +#include +#include + +#include "squashfs_fs.h" +#include "squashfs_fs_sb.h" +#include "squashfs.h" +#include "decompressor.h" +#include "page_actor.h" + +#define LZ4_LEGACY 1 + +struct lz4_comp_opts { + __le32 version; + __le32 flags; +}; + +struct squashfs_lz4 { + void *input; + void *output; +}; + + +static void *lz4_comp_opts(struct squashfs_sb_info *msblk, + void *buff, int len) +{ + struct lz4_comp_opts *comp_opts = buff; + + /* LZ4 compressed filesystems always have compression options */ + if (comp_opts == NULL || len < sizeof(*comp_opts)) + return ERR_PTR(-EIO); + + if (le32_to_cpu(comp_opts->version) != LZ4_LEGACY) { + /* LZ4 format currently used by the kernel is the 'legacy' + * format */ + ERROR("Unknown LZ4 version\n"); + return ERR_PTR(-EINVAL); + } + + return NULL; +} + + +static void *lz4_init(struct squashfs_sb_info *msblk, void *buff) +{ + int block_size = max_t(int, msblk->block_size, SQUASHFS_METADATA_SIZE); + struct squashfs_lz4 *stream; + + stream = kzalloc(sizeof(*stream), GFP_KERNEL); + if (stream == NULL) + goto failed; + stream->input = vmalloc(block_size); + if (stream->input == NULL) + goto failed2; + stream->output = vmalloc(block_size); + if (stream->output == NULL) + goto failed3; + + return stream; + +failed3: + vfree(stream->input); +failed2: + kfree(stream); +failed: + ERROR("Failed to initialise LZ4 decompressor\n"); + return ERR_PTR(-ENOMEM); +} + + +static void lz4_free(void *strm) +{ + struct squashfs_lz4 *stream = strm; + + if (stream) { + vfree(stream->input); + vfree(stream->output); + } + kfree(stream); +} + + +static int lz4_uncompress(struct squashfs_sb_info *msblk, void *strm, + struct buffer_head **bh, int b, int offset, int length, + struct squashfs_page_actor *output) +{ + struct squashfs_lz4 *stream = strm; + void *buff = stream->input, *data; + int avail, i, bytes = length, res; + size_t dest_len = output->length; + + for (i = 0; i < b; i++) { + avail = min(bytes, msblk->devblksize - offset); + memcpy(buff, bh[i]->b_data + offset, avail); + buff += avail; + bytes -= avail; + offset = 0; + put_bh(bh[i]); + } + + res = lz4_decompress_unknownoutputsize(stream->input, length, + stream->output, &dest_len); + if (res) + return -EIO; + + bytes = dest_len; + data = squashfs_first_page(output); + buff = stream->output; + while (data) { + if (bytes <= PAGE_CACHE_SIZE) { + memcpy(data, buff, bytes); + break; + } + memcpy(data, buff, PAGE_CACHE_SIZE); + buff += PAGE_CACHE_SIZE; + bytes -= PAGE_CACHE_SIZE; + data = squashfs_next_page(output); + } + squashfs_finish_page(output); + + return dest_len; +} + +const struct squashfs_decompressor squashfs_lz4_comp_ops = { + .init = lz4_init, + .comp_opts = lz4_comp_opts, + .free = lz4_free, + .decompress = lz4_uncompress, + .id = LZ4_COMPRESSION, + .name = "lz4", + .supported = 1 +}; diff --git a/fs/squashfs/squashfs_fs.h b/fs/squashfs/squashfs_fs.h index 4b2beda49498..506f4ba5b983 100644 --- a/fs/squashfs/squashfs_fs.h +++ b/fs/squashfs/squashfs_fs.h @@ -240,6 +240,7 @@ struct meta_index { #define LZMA_COMPRESSION 2 #define LZO_COMPRESSION 3 #define XZ_COMPRESSION 4 +#define LZ4_COMPRESSION 5 struct squashfs_super_block { __le32 s_magic; From 163b9ffb69cea2390525aa64e85a089967b2bd61 Mon Sep 17 00:00:00 2001 From: Phillip Lougher Date: Thu, 27 Nov 2014 18:48:44 +0000 Subject: [PATCH 066/204] Squashfs: Add LZ4 compression configuration option Add the glue code, and also update the documentation. Signed-off-by: Phillip Lougher --- Documentation/filesystems/squashfs.txt | 8 ++++---- fs/squashfs/Kconfig | 15 +++++++++++++++ fs/squashfs/Makefile | 1 + fs/squashfs/decompressor.c | 7 +++++++ fs/squashfs/decompressor.h | 4 ++++ 5 files changed, 31 insertions(+), 4 deletions(-) diff --git a/Documentation/filesystems/squashfs.txt b/Documentation/filesystems/squashfs.txt index 403c090aca39..e5274f84dc56 100644 --- a/Documentation/filesystems/squashfs.txt +++ b/Documentation/filesystems/squashfs.txt @@ -2,10 +2,10 @@ SQUASHFS 4.0 FILESYSTEM ======================= Squashfs is a compressed read-only filesystem for Linux. -It uses zlib/lzo/xz compression to compress files, inodes and directories. -Inodes in the system are very small and all blocks are packed to minimise -data overhead. Block sizes greater than 4K are supported up to a maximum -of 1Mbytes (default block size 128K). +It uses zlib, lz4, lzo, or xz compression to compress files, inodes and +directories. Inodes in the system are very small and all blocks are packed to +minimise data overhead. Block sizes greater than 4K are supported up to a +maximum of 1Mbytes (default block size 128K). Squashfs is intended for general read-only filesystem use, for archival use (i.e. in cases where a .tar.gz file may be used), and in constrained diff --git a/fs/squashfs/Kconfig b/fs/squashfs/Kconfig index b6fa8657dcbc..ffb093e72b6c 100644 --- a/fs/squashfs/Kconfig +++ b/fs/squashfs/Kconfig @@ -120,6 +120,21 @@ config SQUASHFS_ZLIB If unsure, say Y. +config SQUASHFS_LZ4 + bool "Include support for LZ4 compressed file systems" + depends on SQUASHFS + select LZ4_DECOMPRESS + help + Saying Y here includes support for reading Squashfs file systems + compressed with LZ4 compression. LZ4 compression is mainly + aimed at embedded systems with slower CPUs where the overheads + of zlib are too high. + + LZ4 is not the standard compression used in Squashfs and so most + file systems will be readable without selecting this option. + + If unsure, say N. + config SQUASHFS_LZO bool "Include support for LZO compressed file systems" depends on SQUASHFS diff --git a/fs/squashfs/Makefile b/fs/squashfs/Makefile index 4132520b4ff2..246a6f329d89 100644 --- a/fs/squashfs/Makefile +++ b/fs/squashfs/Makefile @@ -11,6 +11,7 @@ squashfs-$(CONFIG_SQUASHFS_DECOMP_SINGLE) += decompressor_single.o squashfs-$(CONFIG_SQUASHFS_DECOMP_MULTI) += decompressor_multi.o squashfs-$(CONFIG_SQUASHFS_DECOMP_MULTI_PERCPU) += decompressor_multi_percpu.o squashfs-$(CONFIG_SQUASHFS_XATTR) += xattr.o xattr_id.o +squashfs-$(CONFIG_SQUASHFS_LZ4) += lz4_wrapper.o squashfs-$(CONFIG_SQUASHFS_LZO) += lzo_wrapper.o squashfs-$(CONFIG_SQUASHFS_XZ) += xz_wrapper.o squashfs-$(CONFIG_SQUASHFS_ZLIB) += zlib_wrapper.o diff --git a/fs/squashfs/decompressor.c b/fs/squashfs/decompressor.c index ac22fe73b0ad..e9034bf6e5ae 100644 --- a/fs/squashfs/decompressor.c +++ b/fs/squashfs/decompressor.c @@ -41,6 +41,12 @@ static const struct squashfs_decompressor squashfs_lzma_unsupported_comp_ops = { NULL, NULL, NULL, NULL, LZMA_COMPRESSION, "lzma", 0 }; +#ifndef CONFIG_SQUASHFS_LZ4 +static const struct squashfs_decompressor squashfs_lz4_comp_ops = { + NULL, NULL, NULL, NULL, LZ4_COMPRESSION, "lz4", 0 +}; +#endif + #ifndef CONFIG_SQUASHFS_LZO static const struct squashfs_decompressor squashfs_lzo_comp_ops = { NULL, NULL, NULL, NULL, LZO_COMPRESSION, "lzo", 0 @@ -65,6 +71,7 @@ static const struct squashfs_decompressor squashfs_unknown_comp_ops = { static const struct squashfs_decompressor *decompressor[] = { &squashfs_zlib_comp_ops, + &squashfs_lz4_comp_ops, &squashfs_lzo_comp_ops, &squashfs_xz_comp_ops, &squashfs_lzma_unsupported_comp_ops, diff --git a/fs/squashfs/decompressor.h b/fs/squashfs/decompressor.h index af0985321808..a25713c031a5 100644 --- a/fs/squashfs/decompressor.h +++ b/fs/squashfs/decompressor.h @@ -46,6 +46,10 @@ static inline void *squashfs_comp_opts(struct squashfs_sb_info *msblk, extern const struct squashfs_decompressor squashfs_xz_comp_ops; #endif +#ifdef CONFIG_SQUASHFS_LZ4 +extern const struct squashfs_decompressor squashfs_lz4_comp_ops; +#endif + #ifdef CONFIG_SQUASHFS_LZO extern const struct squashfs_decompressor squashfs_lzo_comp_ops; #endif From d10a716c35b315791a740a362e63a8928c8e473f Mon Sep 17 00:00:00 2001 From: Kyungsik Lee Date: Mon, 8 Jul 2013 16:01:45 -0700 Subject: [PATCH 067/204] decompressor: add LZ4 decompressor module Add support for LZ4 decompression in the Linux Kernel. LZ4 Decompression APIs for kernel are based on LZ4 implementation by Yann Collet. Benchmark Results(PATCH v3) Compiler: Linaro ARM gcc 4.6.2 1. ARMv7, 1.5GHz based board Kernel: linux 3.4 Uncompressed Kernel Size: 14MB Compressed Size Decompression Speed LZO 6.7MB 20.1MB/s, 25.2MB/s(UA) LZ4 7.3MB 29.1MB/s, 45.6MB/s(UA) 2. ARMv7, 1.7GHz based board Kernel: linux 3.7 Uncompressed Kernel Size: 14MB Compressed Size Decompression Speed LZO 6.0MB 34.1MB/s, 52.2MB/s(UA) LZ4 6.5MB 86.7MB/s - UA: Unaligned memory Access support - Latest patch set for LZO applied This patch set is for adding support for LZ4-compressed Kernel. LZ4 is a very fast lossless compression algorithm and it also features an extremely fast decoder [1]. But we have five of decompressors already and one question which does arise, however, is that of where do we stop adding new ones? This issue had been discussed and came to the conclusion [2]. Russell King said that we should have: - one decompressor which is the fastest - one decompressor for the highest compression ratio - one popular decompressor (eg conventional gzip) If we have a replacement one for one of these, then it should do exactly that: replace it. The benchmark shows that an 8% increase in image size vs a 66% increase in decompression speed compared to LZO(which has been known as the fastest decompressor in the Kernel). Therefore the "fast but may not be small" compression title has clearly been taken by LZ4 [3]. [1] http://code.google.com/p/lz4/ [2] http://thread.gmane.org/gmane.linux.kbuild.devel/9157 [3] http://thread.gmane.org/gmane.linux.kbuild.devel/9347 LZ4 homepage: http://fastcompression.blogspot.com/p/lz4.html LZ4 source repository: http://code.google.com/p/lz4/ Signed-off-by: Kyungsik Lee Signed-off-by: Yann Collet Cc: "H. Peter Anvin" Cc: Ingo Molnar Cc: Thomas Gleixner Cc: Russell King Cc: Borislav Petkov Cc: Florian Fainelli Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/lz4.h | 51 ++++++ lib/lz4/lz4_decompress.c | 326 +++++++++++++++++++++++++++++++++++++++ lib/lz4/lz4defs.h | 94 +++++++++++ 3 files changed, 471 insertions(+) create mode 100644 include/linux/lz4.h create mode 100644 lib/lz4/lz4_decompress.c create mode 100644 lib/lz4/lz4defs.h diff --git a/include/linux/lz4.h b/include/linux/lz4.h new file mode 100644 index 000000000000..7f6c75a093f8 --- /dev/null +++ b/include/linux/lz4.h @@ -0,0 +1,51 @@ +#ifndef __LZ4_H__ +#define __LZ4_H__ +/* + * LZ4 Kernel Interface + * + * Copyright (C) 2013, LG Electronics, Kyungsik Lee + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +/* + * lz4_compressbound() + * Provides the maximum size that LZ4 may output in a "worst case" scenario + * (input data not compressible) + */ +static inline size_t lz4_compressbound(size_t isize) +{ + return isize + (isize / 255) + 16; +} + +/* + * lz4_decompress() + * src : source address of the compressed data + * src_len : is the input size, whcih is returned after decompress done + * dest : output buffer address of the decompressed data + * actual_dest_len: is the size of uncompressed data, supposing it's known + * return : Success if return 0 + * Error if return (< 0) + * note : Destination buffer must be already allocated. + * slightly faster than lz4_decompress_unknownoutputsize() + */ +int lz4_decompress(const char *src, size_t *src_len, char *dest, + size_t actual_dest_len); + +/* + * lz4_decompress_unknownoutputsize() + * src : source address of the compressed data + * src_len : is the input size, therefore the compressed size + * dest : output buffer address of the decompressed data + * dest_len: is the max size of the destination buffer, which is + * returned with actual size of decompressed data after + * decompress done + * return : Success if return 0 + * Error if return (< 0) + * note : Destination buffer must be already allocated. + */ +int lz4_decompress_unknownoutputsize(const char *src, size_t src_len, + char *dest, size_t *dest_len); +#endif diff --git a/lib/lz4/lz4_decompress.c b/lib/lz4/lz4_decompress.c new file mode 100644 index 000000000000..dcc89753af65 --- /dev/null +++ b/lib/lz4/lz4_decompress.c @@ -0,0 +1,326 @@ +/* + * LZ4 Decompressor for Linux kernel + * + * Copyright (C) 2013 LG Electronics Co., Ltd. (http://www.lge.com/) + * + * Based on LZ4 implementation by Yann Collet. + * + * LZ4 - Fast LZ compression algorithm + * Copyright (C) 2011-2012, Yann Collet. + * BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * You can contact the author at : + * - LZ4 homepage : http://fastcompression.blogspot.com/p/lz4.html + * - LZ4 source repository : http://code.google.com/p/lz4/ + */ + +#ifndef STATIC +#include +#include +#endif +#include + +#include + +#include "lz4defs.h" + +static int lz4_uncompress(const char *source, char *dest, int osize) +{ + const BYTE *ip = (const BYTE *) source; + const BYTE *ref; + BYTE *op = (BYTE *) dest; + BYTE * const oend = op + osize; + BYTE *cpy; + unsigned token; + size_t length; + size_t dec32table[] = {0, 3, 2, 3, 0, 0, 0, 0}; +#if LZ4_ARCH64 + size_t dec64table[] = {0, 0, 0, -1, 0, 1, 2, 3}; +#endif + + while (1) { + + /* get runlength */ + token = *ip++; + length = (token >> ML_BITS); + if (length == RUN_MASK) { + size_t len; + + len = *ip++; + for (; len == 255; length += 255) + len = *ip++; + length += len; + } + + /* copy literals */ + cpy = op + length; + if (unlikely(cpy > oend - COPYLENGTH)) { + /* + * Error: not enough place for another match + * (min 4) + 5 literals + */ + if (cpy != oend) + goto _output_error; + + memcpy(op, ip, length); + ip += length; + break; /* EOF */ + } + LZ4_WILDCOPY(ip, op, cpy); + ip -= (op - cpy); + op = cpy; + + /* get offset */ + LZ4_READ_LITTLEENDIAN_16(ref, cpy, ip); + ip += 2; + + /* Error: offset create reference outside destination buffer */ + if (unlikely(ref < (BYTE *const) dest)) + goto _output_error; + + /* get matchlength */ + length = token & ML_MASK; + if (length == ML_MASK) { + for (; *ip == 255; length += 255) + ip++; + length += *ip++; + } + + /* copy repeated sequence */ + if (unlikely((op - ref) < STEPSIZE)) { +#if LZ4_ARCH64 + size_t dec64 = dec64table[op - ref]; +#else + const int dec64 = 0; +#endif + op[0] = ref[0]; + op[1] = ref[1]; + op[2] = ref[2]; + op[3] = ref[3]; + op += 4; + ref += 4; + ref -= dec32table[op-ref]; + PUT4(ref, op); + op += STEPSIZE - 4; + ref -= dec64; + } else { + LZ4_COPYSTEP(ref, op); + } + cpy = op + length - (STEPSIZE - 4); + if (cpy > (oend - COPYLENGTH)) { + + /* Error: request to write beyond destination buffer */ + if (cpy > oend) + goto _output_error; + LZ4_SECURECOPY(ref, op, (oend - COPYLENGTH)); + while (op < cpy) + *op++ = *ref++; + op = cpy; + /* + * Check EOF (should never happen, since last 5 bytes + * are supposed to be literals) + */ + if (op == oend) + goto _output_error; + continue; + } + LZ4_SECURECOPY(ref, op, cpy); + op = cpy; /* correction */ + } + /* end of decoding */ + return (int) (((char *)ip) - source); + + /* write overflow error detected */ +_output_error: + return (int) (-(((char *)ip) - source)); +} + +static int lz4_uncompress_unknownoutputsize(const char *source, char *dest, + int isize, size_t maxoutputsize) +{ + const BYTE *ip = (const BYTE *) source; + const BYTE *const iend = ip + isize; + const BYTE *ref; + + + BYTE *op = (BYTE *) dest; + BYTE * const oend = op + maxoutputsize; + BYTE *cpy; + + size_t dec32table[] = {0, 3, 2, 3, 0, 0, 0, 0}; +#if LZ4_ARCH64 + size_t dec64table[] = {0, 0, 0, -1, 0, 1, 2, 3}; +#endif + + /* Main Loop */ + while (ip < iend) { + + unsigned token; + size_t length; + + /* get runlength */ + token = *ip++; + length = (token >> ML_BITS); + if (length == RUN_MASK) { + int s = 255; + while ((ip < iend) && (s == 255)) { + s = *ip++; + length += s; + } + } + /* copy literals */ + cpy = op + length; + if ((cpy > oend - COPYLENGTH) || + (ip + length > iend - COPYLENGTH)) { + + if (cpy > oend) + goto _output_error;/* writes beyond buffer */ + + if (ip + length != iend) + goto _output_error;/* + * Error: LZ4 format requires + * to consume all input + * at this stage + */ + memcpy(op, ip, length); + op += length; + break;/* Necessarily EOF, due to parsing restrictions */ + } + LZ4_WILDCOPY(ip, op, cpy); + ip -= (op - cpy); + op = cpy; + + /* get offset */ + LZ4_READ_LITTLEENDIAN_16(ref, cpy, ip); + ip += 2; + if (ref < (BYTE * const) dest) + goto _output_error; + /* + * Error : offset creates reference + * outside of destination buffer + */ + + /* get matchlength */ + length = (token & ML_MASK); + if (length == ML_MASK) { + while (ip < iend) { + int s = *ip++; + length += s; + if (s == 255) + continue; + break; + } + } + + /* copy repeated sequence */ + if (unlikely((op - ref) < STEPSIZE)) { +#if LZ4_ARCH64 + size_t dec64 = dec64table[op - ref]; +#else + const int dec64 = 0; +#endif + op[0] = ref[0]; + op[1] = ref[1]; + op[2] = ref[2]; + op[3] = ref[3]; + op += 4; + ref += 4; + ref -= dec32table[op - ref]; + PUT4(ref, op); + op += STEPSIZE - 4; + ref -= dec64; + } else { + LZ4_COPYSTEP(ref, op); + } + cpy = op + length - (STEPSIZE-4); + if (cpy > oend - COPYLENGTH) { + if (cpy > oend) + goto _output_error; /* write outside of buf */ + + LZ4_SECURECOPY(ref, op, (oend - COPYLENGTH)); + while (op < cpy) + *op++ = *ref++; + op = cpy; + /* + * Check EOF (should never happen, since last 5 bytes + * are supposed to be literals) + */ + if (op == oend) + goto _output_error; + continue; + } + LZ4_SECURECOPY(ref, op, cpy); + op = cpy; /* correction */ + } + /* end of decoding */ + return (int) (((char *) op) - dest); + + /* write overflow error detected */ +_output_error: + return (int) (-(((char *) ip) - source)); +} + +int lz4_decompress(const char *src, size_t *src_len, char *dest, + size_t actual_dest_len) +{ + int ret = -1; + int input_len = 0; + + input_len = lz4_uncompress(src, dest, actual_dest_len); + if (input_len < 0) + goto exit_0; + *src_len = input_len; + + return 0; +exit_0: + return ret; +} +#ifndef STATIC +EXPORT_SYMBOL_GPL(lz4_decompress); +#endif + +int lz4_decompress_unknownoutputsize(const char *src, size_t src_len, + char *dest, size_t *dest_len) +{ + int ret = -1; + int out_len = 0; + + out_len = lz4_uncompress_unknownoutputsize(src, dest, src_len, + *dest_len); + if (out_len < 0) + goto exit_0; + *dest_len = out_len; + + return 0; +exit_0: + return ret; +} +#ifndef STATIC +EXPORT_SYMBOL_GPL(lz4_decompress_unknownoutputsize); + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("LZ4 Decompressor"); +#endif diff --git a/lib/lz4/lz4defs.h b/lib/lz4/lz4defs.h new file mode 100644 index 000000000000..43ac31d63f36 --- /dev/null +++ b/lib/lz4/lz4defs.h @@ -0,0 +1,94 @@ +/* + * lz4defs.h -- architecture specific defines + * + * Copyright (C) 2013, LG Electronics, Kyungsik Lee + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +/* + * Detects 64 bits mode + */ +#if (defined(__x86_64__) || defined(__x86_64) || defined(__amd64__) \ + || defined(__ppc64__) || defined(__LP64__)) +#define LZ4_ARCH64 1 +#else +#define LZ4_ARCH64 0 +#endif + +/* + * Architecture-specific macros + */ +#define BYTE u8 +#if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) \ + || defined(CONFIG_ARM) && __LINUX_ARM_ARCH__ >= 6 \ + && defined(ARM_EFFICIENT_UNALIGNED_ACCESS) +typedef struct _U32_S { u32 v; } U32_S; +typedef struct _U64_S { u64 v; } U64_S; + +#define A32(x) (((U32_S *)(x))->v) +#define A64(x) (((U64_S *)(x))->v) + +#define PUT4(s, d) (A32(d) = A32(s)) +#define PUT8(s, d) (A64(d) = A64(s)) +#else /* CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS */ + +#define PUT4(s, d) \ + put_unaligned(get_unaligned((const u32 *) s), (u32 *) d) +#define PUT8(s, d) \ + put_unaligned(get_unaligned((const u64 *) s), (u64 *) d) +#endif + +#define COPYLENGTH 8 +#define ML_BITS 4 +#define ML_MASK ((1U << ML_BITS) - 1) +#define RUN_BITS (8 - ML_BITS) +#define RUN_MASK ((1U << RUN_BITS) - 1) + +#if LZ4_ARCH64/* 64-bit */ +#define STEPSIZE 8 + +#define LZ4_COPYSTEP(s, d) \ + do { \ + PUT8(s, d); \ + d += 8; \ + s += 8; \ + } while (0) + +#define LZ4_COPYPACKET(s, d) LZ4_COPYSTEP(s, d) + +#define LZ4_SECURECOPY(s, d, e) \ + do { \ + if (d < e) { \ + LZ4_WILDCOPY(s, d, e); \ + } \ + } while (0) + +#else /* 32-bit */ +#define STEPSIZE 4 + +#define LZ4_COPYSTEP(s, d) \ + do { \ + PUT4(s, d); \ + d += 4; \ + s += 4; \ + } while (0) + +#define LZ4_COPYPACKET(s, d) \ + do { \ + LZ4_COPYSTEP(s, d); \ + LZ4_COPYSTEP(s, d); \ + } while (0) + +#define LZ4_SECURECOPY LZ4_WILDCOPY +#endif + +#define LZ4_READ_LITTLEENDIAN_16(d, s, p) \ + (d = s - get_unaligned_le16(p)) + +#define LZ4_WILDCOPY(s, d, e) \ + do { \ + LZ4_COPYPACKET(s, d); \ + } while (d < e) From 7207f784ce8414af2393dd239d9e453b2225854c Mon Sep 17 00:00:00 2001 From: Chanho Min Date: Mon, 8 Jul 2013 16:01:49 -0700 Subject: [PATCH 068/204] lib: add lz4 compressor module This patchset is for supporting LZ4 compression and the crypto API using it. As shown below, the size of data is a little bit bigger but compressing speed is faster under the enabled unaligned memory access. We can use lz4 de/compression through crypto API as well. Also, It will be useful for another potential user of lz4 compression. lz4 Compression Benchmark: Compiler: ARM gcc 4.6.4 ARMv7, 1 GHz based board Kernel: linux 3.4 Uncompressed data Size: 101 MB Compressed Size compression Speed LZO 72.1MB 32.1MB/s, 33.0MB/s(UA) LZ4 75.1MB 30.4MB/s, 35.9MB/s(UA) LZ4HC 59.8MB 2.4MB/s, 2.5MB/s(UA) - UA: Unaligned memory Access support - Latest patch set for LZO applied This patch: Add support for LZ4 compression in the Linux Kernel. LZ4 Compression APIs for kernel are based on LZ4 implementation by Yann Collet and were changed for kernel coding style. LZ4 homepage : http://fastcompression.blogspot.com/p/lz4.html LZ4 source repository : http://code.google.com/p/lz4/ svn revision : r90 Two APIs are added: lz4_compress() support basic lz4 compression whereas lz4hc_compress() support high compression or CPU performance get lower but compression ratio get higher. Also, we require the pre-allocated working memory with the defined size and destination buffer must be allocated with the size of lz4_compressbound. [akpm@linux-foundation.org: make lz4_compresshcctx() static] Signed-off-by: Chanho Min Cc: "Darrick J. Wong" Cc: Bob Pearson Cc: Richard Weinberger Cc: Herbert Xu Cc: Yann Collet Cc: Kyungsik Lee Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/lz4.h | 36 +++ lib/Kconfig | 9 + lib/Makefile | 3 + lib/lz4/Makefile | 3 + lib/lz4/lz4_compress.c | 443 ++++++++++++++++++++++++++++++++ lib/lz4/lz4defs.h | 66 ++++- lib/lz4/lz4hc_compress.c | 539 +++++++++++++++++++++++++++++++++++++++ 7 files changed, 1097 insertions(+), 2 deletions(-) create mode 100644 lib/lz4/Makefile create mode 100644 lib/lz4/lz4_compress.c create mode 100644 lib/lz4/lz4hc_compress.c diff --git a/include/linux/lz4.h b/include/linux/lz4.h index 7f6c75a093f8..d21c13f10a64 100644 --- a/include/linux/lz4.h +++ b/include/linux/lz4.h @@ -9,6 +9,8 @@ * it under the terms of the GNU General Public License version 2 as * published by the Free Software Foundation. */ +#define LZ4_MEM_COMPRESS (4096 * sizeof(unsigned char *)) +#define LZ4HC_MEM_COMPRESS (65538 * sizeof(unsigned char *)) /* * lz4_compressbound() @@ -20,6 +22,40 @@ static inline size_t lz4_compressbound(size_t isize) return isize + (isize / 255) + 16; } +/* + * lz4_compress() + * src : source address of the original data + * src_len : size of the original data + * dst : output buffer address of the compressed data + * This requires 'dst' of size LZ4_COMPRESSBOUND. + * dst_len : is the output size, which is returned after compress done + * workmem : address of the working memory. + * This requires 'workmem' of size LZ4_MEM_COMPRESS. + * return : Success if return 0 + * Error if return (< 0) + * note : Destination buffer and workmem must be already allocated with + * the defined size. + */ +int lz4_compress(const unsigned char *src, size_t src_len, + unsigned char *dst, size_t *dst_len, void *wrkmem); + + /* + * lz4hc_compress() + * src : source address of the original data + * src_len : size of the original data + * dst : output buffer address of the compressed data + * This requires 'dst' of size LZ4_COMPRESSBOUND. + * dst_len : is the output size, which is returned after compress done + * workmem : address of the working memory. + * This requires 'workmem' of size LZ4HC_MEM_COMPRESS. + * return : Success if return 0 + * Error if return (< 0) + * note : Destination buffer and workmem must be already allocated with + * the defined size. + */ +int lz4hc_compress(const unsigned char *src, size_t src_len, + unsigned char *dst, size_t *dst_len, void *wrkmem); + /* * lz4_decompress() * src : source address of the compressed data diff --git a/lib/Kconfig b/lib/Kconfig index fe01d418b09a..06d94d885877 100644 --- a/lib/Kconfig +++ b/lib/Kconfig @@ -189,6 +189,15 @@ config LZO_COMPRESS config LZO_DECOMPRESS tristate +config LZ4_COMPRESS + tristate + +config LZ4HC_COMPRESS + tristate + +config LZ4_DECOMPRESS + tristate + source "lib/xz/Kconfig" # diff --git a/lib/Makefile b/lib/Makefile index c55a037a354e..4bd29d037f22 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -75,6 +75,9 @@ obj-$(CONFIG_REED_SOLOMON) += reed_solomon/ obj-$(CONFIG_BCH) += bch.o obj-$(CONFIG_LZO_COMPRESS) += lzo/ obj-$(CONFIG_LZO_DECOMPRESS) += lzo/ +obj-$(CONFIG_LZ4_COMPRESS) += lz4/ +obj-$(CONFIG_LZ4HC_COMPRESS) += lz4/ +obj-$(CONFIG_LZ4_DECOMPRESS) += lz4/ obj-$(CONFIG_XZ_DEC) += xz/ obj-$(CONFIG_RAID6_PQ) += raid6/ diff --git a/lib/lz4/Makefile b/lib/lz4/Makefile new file mode 100644 index 000000000000..8085d04e9309 --- /dev/null +++ b/lib/lz4/Makefile @@ -0,0 +1,3 @@ +obj-$(CONFIG_LZ4_COMPRESS) += lz4_compress.o +obj-$(CONFIG_LZ4HC_COMPRESS) += lz4hc_compress.o +obj-$(CONFIG_LZ4_DECOMPRESS) += lz4_decompress.o diff --git a/lib/lz4/lz4_compress.c b/lib/lz4/lz4_compress.c new file mode 100644 index 000000000000..fd94058bd7f9 --- /dev/null +++ b/lib/lz4/lz4_compress.c @@ -0,0 +1,443 @@ +/* + * LZ4 - Fast LZ compression algorithm + * Copyright (C) 2011-2012, Yann Collet. + * BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) + + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * You can contact the author at : + * - LZ4 homepage : http://fastcompression.blogspot.com/p/lz4.html + * - LZ4 source repository : http://code.google.com/p/lz4/ + * + * Changed for kernel use by: + * Chanho Min + */ + +#include +#include +#include +#include +#include "lz4defs.h" + +/* + * LZ4_compressCtx : + * ----------------- + * Compress 'isize' bytes from 'source' into an output buffer 'dest' of + * maximum size 'maxOutputSize'. * If it cannot achieve it, compression + * will stop, and result of the function will be zero. + * return : the number of bytes written in buffer 'dest', or 0 if the + * compression fails + */ +static inline int lz4_compressctx(void *ctx, + const char *source, + char *dest, + int isize, + int maxoutputsize) +{ + HTYPE *hashtable = (HTYPE *)ctx; + const u8 *ip = (u8 *)source; +#if LZ4_ARCH64 + const BYTE * const base = ip; +#else + const int base = 0; +#endif + const u8 *anchor = ip; + const u8 *const iend = ip + isize; + const u8 *const mflimit = iend - MFLIMIT; + #define MATCHLIMIT (iend - LASTLITERALS) + + u8 *op = (u8 *) dest; + u8 *const oend = op + maxoutputsize; + int length; + const int skipstrength = SKIPSTRENGTH; + u32 forwardh; + int lastrun; + + /* Init */ + if (isize < MINLENGTH) + goto _last_literals; + + memset((void *)hashtable, 0, LZ4_MEM_COMPRESS); + + /* First Byte */ + hashtable[LZ4_HASH_VALUE(ip)] = ip - base; + ip++; + forwardh = LZ4_HASH_VALUE(ip); + + /* Main Loop */ + for (;;) { + int findmatchattempts = (1U << skipstrength) + 3; + const u8 *forwardip = ip; + const u8 *ref; + u8 *token; + + /* Find a match */ + do { + u32 h = forwardh; + int step = findmatchattempts++ >> skipstrength; + ip = forwardip; + forwardip = ip + step; + + if (unlikely(forwardip > mflimit)) + goto _last_literals; + + forwardh = LZ4_HASH_VALUE(forwardip); + ref = base + hashtable[h]; + hashtable[h] = ip - base; + } while ((ref < ip - MAX_DISTANCE) || (A32(ref) != A32(ip))); + + /* Catch up */ + while ((ip > anchor) && (ref > (u8 *)source) && + unlikely(ip[-1] == ref[-1])) { + ip--; + ref--; + } + + /* Encode Literal length */ + length = (int)(ip - anchor); + token = op++; + /* check output limit */ + if (unlikely(op + length + (2 + 1 + LASTLITERALS) + + (length >> 8) > oend)) + return 0; + + if (length >= (int)RUN_MASK) { + int len; + *token = (RUN_MASK << ML_BITS); + len = length - RUN_MASK; + for (; len > 254 ; len -= 255) + *op++ = 255; + *op++ = (u8)len; + } else + *token = (length << ML_BITS); + + /* Copy Literals */ + LZ4_BLINDCOPY(anchor, op, length); +_next_match: + /* Encode Offset */ + LZ4_WRITE_LITTLEENDIAN_16(op, (u16)(ip - ref)); + + /* Start Counting */ + ip += MINMATCH; + /* MinMatch verified */ + ref += MINMATCH; + anchor = ip; + while (likely(ip < MATCHLIMIT - (STEPSIZE - 1))) { + #if LZ4_ARCH64 + u64 diff = A64(ref) ^ A64(ip); + #else + u32 diff = A32(ref) ^ A32(ip); + #endif + if (!diff) { + ip += STEPSIZE; + ref += STEPSIZE; + continue; + } + ip += LZ4_NBCOMMONBYTES(diff); + goto _endcount; + } + #if LZ4_ARCH64 + if ((ip < (MATCHLIMIT - 3)) && (A32(ref) == A32(ip))) { + ip += 4; + ref += 4; + } + #endif + if ((ip < (MATCHLIMIT - 1)) && (A16(ref) == A16(ip))) { + ip += 2; + ref += 2; + } + if ((ip < MATCHLIMIT) && (*ref == *ip)) + ip++; +_endcount: + /* Encode MatchLength */ + length = (int)(ip - anchor); + /* Check output limit */ + if (unlikely(op + (1 + LASTLITERALS) + (length >> 8) > oend)) + return 0; + if (length >= (int)ML_MASK) { + *token += ML_MASK; + length -= ML_MASK; + for (; length > 509 ; length -= 510) { + *op++ = 255; + *op++ = 255; + } + if (length > 254) { + length -= 255; + *op++ = 255; + } + *op++ = (u8)length; + } else + *token += length; + + /* Test end of chunk */ + if (ip > mflimit) { + anchor = ip; + break; + } + + /* Fill table */ + hashtable[LZ4_HASH_VALUE(ip-2)] = ip - 2 - base; + + /* Test next position */ + ref = base + hashtable[LZ4_HASH_VALUE(ip)]; + hashtable[LZ4_HASH_VALUE(ip)] = ip - base; + if ((ref > ip - (MAX_DISTANCE + 1)) && (A32(ref) == A32(ip))) { + token = op++; + *token = 0; + goto _next_match; + } + + /* Prepare next loop */ + anchor = ip++; + forwardh = LZ4_HASH_VALUE(ip); + } + +_last_literals: + /* Encode Last Literals */ + lastrun = (int)(iend - anchor); + if (((char *)op - dest) + lastrun + 1 + + ((lastrun + 255 - RUN_MASK) / 255) > (u32)maxoutputsize) + return 0; + + if (lastrun >= (int)RUN_MASK) { + *op++ = (RUN_MASK << ML_BITS); + lastrun -= RUN_MASK; + for (; lastrun > 254 ; lastrun -= 255) + *op++ = 255; + *op++ = (u8)lastrun; + } else + *op++ = (lastrun << ML_BITS); + memcpy(op, anchor, iend - anchor); + op += iend - anchor; + + /* End */ + return (int)(((char *)op) - dest); +} + +static inline int lz4_compress64kctx(void *ctx, + const char *source, + char *dest, + int isize, + int maxoutputsize) +{ + u16 *hashtable = (u16 *)ctx; + const u8 *ip = (u8 *) source; + const u8 *anchor = ip; + const u8 *const base = ip; + const u8 *const iend = ip + isize; + const u8 *const mflimit = iend - MFLIMIT; + #define MATCHLIMIT (iend - LASTLITERALS) + + u8 *op = (u8 *) dest; + u8 *const oend = op + maxoutputsize; + int len, length; + const int skipstrength = SKIPSTRENGTH; + u32 forwardh; + int lastrun; + + /* Init */ + if (isize < MINLENGTH) + goto _last_literals; + + memset((void *)hashtable, 0, LZ4_MEM_COMPRESS); + + /* First Byte */ + ip++; + forwardh = LZ4_HASH64K_VALUE(ip); + + /* Main Loop */ + for (;;) { + int findmatchattempts = (1U << skipstrength) + 3; + const u8 *forwardip = ip; + const u8 *ref; + u8 *token; + + /* Find a match */ + do { + u32 h = forwardh; + int step = findmatchattempts++ >> skipstrength; + ip = forwardip; + forwardip = ip + step; + + if (forwardip > mflimit) + goto _last_literals; + + forwardh = LZ4_HASH64K_VALUE(forwardip); + ref = base + hashtable[h]; + hashtable[h] = (u16)(ip - base); + } while (A32(ref) != A32(ip)); + + /* Catch up */ + while ((ip > anchor) && (ref > (u8 *)source) + && (ip[-1] == ref[-1])) { + ip--; + ref--; + } + + /* Encode Literal length */ + length = (int)(ip - anchor); + token = op++; + /* Check output limit */ + if (unlikely(op + length + (2 + 1 + LASTLITERALS) + + (length >> 8) > oend)) + return 0; + if (length >= (int)RUN_MASK) { + *token = (RUN_MASK << ML_BITS); + len = length - RUN_MASK; + for (; len > 254 ; len -= 255) + *op++ = 255; + *op++ = (u8)len; + } else + *token = (length << ML_BITS); + + /* Copy Literals */ + LZ4_BLINDCOPY(anchor, op, length); + +_next_match: + /* Encode Offset */ + LZ4_WRITE_LITTLEENDIAN_16(op, (u16)(ip - ref)); + + /* Start Counting */ + ip += MINMATCH; + /* MinMatch verified */ + ref += MINMATCH; + anchor = ip; + + while (ip < MATCHLIMIT - (STEPSIZE - 1)) { + #if LZ4_ARCH64 + u64 diff = A64(ref) ^ A64(ip); + #else + u32 diff = A32(ref) ^ A32(ip); + #endif + + if (!diff) { + ip += STEPSIZE; + ref += STEPSIZE; + continue; + } + ip += LZ4_NBCOMMONBYTES(diff); + goto _endcount; + } + #if LZ4_ARCH64 + if ((ip < (MATCHLIMIT - 3)) && (A32(ref) == A32(ip))) { + ip += 4; + ref += 4; + } + #endif + if ((ip < (MATCHLIMIT - 1)) && (A16(ref) == A16(ip))) { + ip += 2; + ref += 2; + } + if ((ip < MATCHLIMIT) && (*ref == *ip)) + ip++; +_endcount: + + /* Encode MatchLength */ + len = (int)(ip - anchor); + /* Check output limit */ + if (unlikely(op + (1 + LASTLITERALS) + (len >> 8) > oend)) + return 0; + if (len >= (int)ML_MASK) { + *token += ML_MASK; + len -= ML_MASK; + for (; len > 509 ; len -= 510) { + *op++ = 255; + *op++ = 255; + } + if (len > 254) { + len -= 255; + *op++ = 255; + } + *op++ = (u8)len; + } else + *token += len; + + /* Test end of chunk */ + if (ip > mflimit) { + anchor = ip; + break; + } + + /* Fill table */ + hashtable[LZ4_HASH64K_VALUE(ip-2)] = (u16)(ip - 2 - base); + + /* Test next position */ + ref = base + hashtable[LZ4_HASH64K_VALUE(ip)]; + hashtable[LZ4_HASH64K_VALUE(ip)] = (u16)(ip - base); + if (A32(ref) == A32(ip)) { + token = op++; + *token = 0; + goto _next_match; + } + + /* Prepare next loop */ + anchor = ip++; + forwardh = LZ4_HASH64K_VALUE(ip); + } + +_last_literals: + /* Encode Last Literals */ + lastrun = (int)(iend - anchor); + if (op + lastrun + 1 + (lastrun - RUN_MASK + 255) / 255 > oend) + return 0; + if (lastrun >= (int)RUN_MASK) { + *op++ = (RUN_MASK << ML_BITS); + lastrun -= RUN_MASK; + for (; lastrun > 254 ; lastrun -= 255) + *op++ = 255; + *op++ = (u8)lastrun; + } else + *op++ = (lastrun << ML_BITS); + memcpy(op, anchor, iend - anchor); + op += iend - anchor; + /* End */ + return (int)(((char *)op) - dest); +} + +int lz4_compress(const unsigned char *src, size_t src_len, + unsigned char *dst, size_t *dst_len, void *wrkmem) +{ + int ret = -1; + int out_len = 0; + + if (src_len < LZ4_64KLIMIT) + out_len = lz4_compress64kctx(wrkmem, src, dst, src_len, + lz4_compressbound(src_len)); + else + out_len = lz4_compressctx(wrkmem, src, dst, src_len, + lz4_compressbound(src_len)); + + if (out_len < 0) + goto exit; + + *dst_len = out_len; + + return 0; +exit: + return ret; +} +EXPORT_SYMBOL_GPL(lz4_compress); + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("LZ4 compressor"); diff --git a/lib/lz4/lz4defs.h b/lib/lz4/lz4defs.h index 43ac31d63f36..abcecdc2d0f2 100644 --- a/lib/lz4/lz4defs.h +++ b/lib/lz4/lz4defs.h @@ -22,23 +22,40 @@ * Architecture-specific macros */ #define BYTE u8 +typedef struct _U16_S { u16 v; } U16_S; +typedef struct _U32_S { u32 v; } U32_S; +typedef struct _U64_S { u64 v; } U64_S; #if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) \ || defined(CONFIG_ARM) && __LINUX_ARM_ARCH__ >= 6 \ && defined(ARM_EFFICIENT_UNALIGNED_ACCESS) -typedef struct _U32_S { u32 v; } U32_S; -typedef struct _U64_S { u64 v; } U64_S; +#define A16(x) (((U16_S *)(x))->v) #define A32(x) (((U32_S *)(x))->v) #define A64(x) (((U64_S *)(x))->v) #define PUT4(s, d) (A32(d) = A32(s)) #define PUT8(s, d) (A64(d) = A64(s)) +#define LZ4_WRITE_LITTLEENDIAN_16(p, v) \ + do { \ + A16(p) = v; \ + p += 2; \ + } while (0) #else /* CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS */ +#define A64(x) get_unaligned((u64 *)&(((U16_S *)(x))->v)) +#define A32(x) get_unaligned((u32 *)&(((U16_S *)(x))->v)) +#define A16(x) get_unaligned((u16 *)&(((U16_S *)(x))->v)) + #define PUT4(s, d) \ put_unaligned(get_unaligned((const u32 *) s), (u32 *) d) #define PUT8(s, d) \ put_unaligned(get_unaligned((const u64 *) s), (u64 *) d) + +#define LZ4_WRITE_LITTLEENDIAN_16(p, v) \ + do { \ + put_unaligned(v, (u16 *)(p)); \ + p += 2; \ + } while (0) #endif #define COPYLENGTH 8 @@ -46,6 +63,29 @@ typedef struct _U64_S { u64 v; } U64_S; #define ML_MASK ((1U << ML_BITS) - 1) #define RUN_BITS (8 - ML_BITS) #define RUN_MASK ((1U << RUN_BITS) - 1) +#define MEMORY_USAGE 14 +#define MINMATCH 4 +#define SKIPSTRENGTH 6 +#define LASTLITERALS 5 +#define MFLIMIT (COPYLENGTH + MINMATCH) +#define MINLENGTH (MFLIMIT + 1) +#define MAXD_LOG 16 +#define MAXD (1 << MAXD_LOG) +#define MAXD_MASK (u32)(MAXD - 1) +#define MAX_DISTANCE (MAXD - 1) +#define HASH_LOG (MAXD_LOG - 1) +#define HASHTABLESIZE (1 << HASH_LOG) +#define MAX_NB_ATTEMPTS 256 +#define OPTIMAL_ML (int)((ML_MASK-1)+MINMATCH) +#define LZ4_64KLIMIT ((1<<16) + (MFLIMIT - 1)) +#define HASHLOG64K ((MEMORY_USAGE - 2) + 1) +#define HASH64KTABLESIZE (1U << HASHLOG64K) +#define LZ4_HASH_VALUE(p) (((A32(p)) * 2654435761U) >> \ + ((MINMATCH * 8) - (MEMORY_USAGE-2))) +#define LZ4_HASH64K_VALUE(p) (((A32(p)) * 2654435761U) >> \ + ((MINMATCH * 8) - HASHLOG64K)) +#define HASH_VALUE(p) (((A32(p)) * 2654435761U) >> \ + ((MINMATCH * 8) - HASH_LOG)) #if LZ4_ARCH64/* 64-bit */ #define STEPSIZE 8 @@ -65,6 +105,13 @@ typedef struct _U64_S { u64 v; } U64_S; LZ4_WILDCOPY(s, d, e); \ } \ } while (0) +#define HTYPE u32 + +#ifdef __BIG_ENDIAN +#define LZ4_NBCOMMONBYTES(val) (__builtin_clzll(val) >> 3) +#else +#define LZ4_NBCOMMONBYTES(val) (__builtin_ctzll(val) >> 3) +#endif #else /* 32-bit */ #define STEPSIZE 4 @@ -83,6 +130,14 @@ typedef struct _U64_S { u64 v; } U64_S; } while (0) #define LZ4_SECURECOPY LZ4_WILDCOPY +#define HTYPE const u8* + +#ifdef __BIG_ENDIAN +#define LZ4_NBCOMMONBYTES(val) (__builtin_clz(val) >> 3) +#else +#define LZ4_NBCOMMONBYTES(val) (__builtin_ctz(val) >> 3) +#endif + #endif #define LZ4_READ_LITTLEENDIAN_16(d, s, p) \ @@ -92,3 +147,10 @@ typedef struct _U64_S { u64 v; } U64_S; do { \ LZ4_COPYPACKET(s, d); \ } while (d < e) + +#define LZ4_BLINDCOPY(s, d, l) \ + do { \ + u8 *e = (d) + l; \ + LZ4_WILDCOPY(s, d, e); \ + d = e; \ + } while (0) diff --git a/lib/lz4/lz4hc_compress.c b/lib/lz4/lz4hc_compress.c new file mode 100644 index 000000000000..eb1a74f5e368 --- /dev/null +++ b/lib/lz4/lz4hc_compress.c @@ -0,0 +1,539 @@ +/* + * LZ4 HC - High Compression Mode of LZ4 + * Copyright (C) 2011-2012, Yann Collet. + * BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * You can contact the author at : + * - LZ4 homepage : http://fastcompression.blogspot.com/p/lz4.html + * - LZ4 source repository : http://code.google.com/p/lz4/ + * + * Changed for kernel use by: + * Chanho Min + */ + +#include +#include +#include +#include +#include "lz4defs.h" + +struct lz4hc_data { + const u8 *base; + HTYPE hashtable[HASHTABLESIZE]; + u16 chaintable[MAXD]; + const u8 *nexttoupdate; +} __attribute__((__packed__)); + +static inline int lz4hc_init(struct lz4hc_data *hc4, const u8 *base) +{ + memset((void *)hc4->hashtable, 0, sizeof(hc4->hashtable)); + memset(hc4->chaintable, 0xFF, sizeof(hc4->chaintable)); + +#if LZ4_ARCH64 + hc4->nexttoupdate = base + 1; +#else + hc4->nexttoupdate = base; +#endif + hc4->base = base; + return 1; +} + +/* Update chains up to ip (excluded) */ +static inline void lz4hc_insert(struct lz4hc_data *hc4, const u8 *ip) +{ + u16 *chaintable = hc4->chaintable; + HTYPE *hashtable = hc4->hashtable; +#if LZ4_ARCH64 + const BYTE * const base = hc4->base; +#else + const int base = 0; +#endif + + while (hc4->nexttoupdate < ip) { + const u8 *p = hc4->nexttoupdate; + size_t delta = p - (hashtable[HASH_VALUE(p)] + base); + if (delta > MAX_DISTANCE) + delta = MAX_DISTANCE; + chaintable[(size_t)(p) & MAXD_MASK] = (u16)delta; + hashtable[HASH_VALUE(p)] = (p) - base; + hc4->nexttoupdate++; + } +} + +static inline size_t lz4hc_commonlength(const u8 *p1, const u8 *p2, + const u8 *const matchlimit) +{ + const u8 *p1t = p1; + + while (p1t < matchlimit - (STEPSIZE - 1)) { +#if LZ4_ARCH64 + u64 diff = A64(p2) ^ A64(p1t); +#else + u32 diff = A32(p2) ^ A32(p1t); +#endif + if (!diff) { + p1t += STEPSIZE; + p2 += STEPSIZE; + continue; + } + p1t += LZ4_NBCOMMONBYTES(diff); + return p1t - p1; + } +#if LZ4_ARCH64 + if ((p1t < (matchlimit-3)) && (A32(p2) == A32(p1t))) { + p1t += 4; + p2 += 4; + } +#endif + + if ((p1t < (matchlimit - 1)) && (A16(p2) == A16(p1t))) { + p1t += 2; + p2 += 2; + } + if ((p1t < matchlimit) && (*p2 == *p1t)) + p1t++; + return p1t - p1; +} + +static inline int lz4hc_insertandfindbestmatch(struct lz4hc_data *hc4, + const u8 *ip, const u8 *const matchlimit, const u8 **matchpos) +{ + u16 *const chaintable = hc4->chaintable; + HTYPE *const hashtable = hc4->hashtable; + const u8 *ref; +#if LZ4_ARCH64 + const BYTE * const base = hc4->base; +#else + const int base = 0; +#endif + int nbattempts = MAX_NB_ATTEMPTS; + size_t repl = 0, ml = 0; + u16 delta; + + /* HC4 match finder */ + lz4hc_insert(hc4, ip); + ref = hashtable[HASH_VALUE(ip)] + base; + + /* potential repetition */ + if (ref >= ip-4) { + /* confirmed */ + if (A32(ref) == A32(ip)) { + delta = (u16)(ip-ref); + repl = ml = lz4hc_commonlength(ip + MINMATCH, + ref + MINMATCH, matchlimit) + MINMATCH; + *matchpos = ref; + } + ref -= (size_t)chaintable[(size_t)(ref) & MAXD_MASK]; + } + + while ((ref >= ip - MAX_DISTANCE) && nbattempts) { + nbattempts--; + if (*(ref + ml) == *(ip + ml)) { + if (A32(ref) == A32(ip)) { + size_t mlt = + lz4hc_commonlength(ip + MINMATCH, + ref + MINMATCH, matchlimit) + MINMATCH; + if (mlt > ml) { + ml = mlt; + *matchpos = ref; + } + } + } + ref -= (size_t)chaintable[(size_t)(ref) & MAXD_MASK]; + } + + /* Complete table */ + if (repl) { + const BYTE *ptr = ip; + const BYTE *end; + end = ip + repl - (MINMATCH-1); + /* Pre-Load */ + while (ptr < end - delta) { + chaintable[(size_t)(ptr) & MAXD_MASK] = delta; + ptr++; + } + do { + chaintable[(size_t)(ptr) & MAXD_MASK] = delta; + /* Head of chain */ + hashtable[HASH_VALUE(ptr)] = (ptr) - base; + ptr++; + } while (ptr < end); + hc4->nexttoupdate = end; + } + + return (int)ml; +} + +static inline int lz4hc_insertandgetwidermatch(struct lz4hc_data *hc4, + const u8 *ip, const u8 *startlimit, const u8 *matchlimit, int longest, + const u8 **matchpos, const u8 **startpos) +{ + u16 *const chaintable = hc4->chaintable; + HTYPE *const hashtable = hc4->hashtable; +#if LZ4_ARCH64 + const BYTE * const base = hc4->base; +#else + const int base = 0; +#endif + const u8 *ref; + int nbattempts = MAX_NB_ATTEMPTS; + int delta = (int)(ip - startlimit); + + /* First Match */ + lz4hc_insert(hc4, ip); + ref = hashtable[HASH_VALUE(ip)] + base; + + while ((ref >= ip - MAX_DISTANCE) && (ref >= hc4->base) + && (nbattempts)) { + nbattempts--; + if (*(startlimit + longest) == *(ref - delta + longest)) { + if (A32(ref) == A32(ip)) { + const u8 *reft = ref + MINMATCH; + const u8 *ipt = ip + MINMATCH; + const u8 *startt = ip; + + while (ipt < matchlimit-(STEPSIZE - 1)) { + #if LZ4_ARCH64 + u64 diff = A64(reft) ^ A64(ipt); + #else + u32 diff = A32(reft) ^ A32(ipt); + #endif + + if (!diff) { + ipt += STEPSIZE; + reft += STEPSIZE; + continue; + } + ipt += LZ4_NBCOMMONBYTES(diff); + goto _endcount; + } + #if LZ4_ARCH64 + if ((ipt < (matchlimit - 3)) + && (A32(reft) == A32(ipt))) { + ipt += 4; + reft += 4; + } + ipt += 2; + #endif + if ((ipt < (matchlimit - 1)) + && (A16(reft) == A16(ipt))) { + reft += 2; + } + if ((ipt < matchlimit) && (*reft == *ipt)) + ipt++; +_endcount: + reft = ref; + + while ((startt > startlimit) + && (reft > hc4->base) + && (startt[-1] == reft[-1])) { + startt--; + reft--; + } + + if ((ipt - startt) > longest) { + longest = (int)(ipt - startt); + *matchpos = reft; + *startpos = startt; + } + } + } + ref -= (size_t)chaintable[(size_t)(ref) & MAXD_MASK]; + } + return longest; +} + +static inline int lz4_encodesequence(const u8 **ip, u8 **op, const u8 **anchor, + int ml, const u8 *ref) +{ + int length, len; + u8 *token; + + /* Encode Literal length */ + length = (int)(*ip - *anchor); + token = (*op)++; + if (length >= (int)RUN_MASK) { + *token = (RUN_MASK << ML_BITS); + len = length - RUN_MASK; + for (; len > 254 ; len -= 255) + *(*op)++ = 255; + *(*op)++ = (u8)len; + } else + *token = (length << ML_BITS); + + /* Copy Literals */ + LZ4_BLINDCOPY(*anchor, *op, length); + + /* Encode Offset */ + LZ4_WRITE_LITTLEENDIAN_16(*op, (u16)(*ip - ref)); + + /* Encode MatchLength */ + len = (int)(ml - MINMATCH); + if (len >= (int)ML_MASK) { + *token += ML_MASK; + len -= ML_MASK; + for (; len > 509 ; len -= 510) { + *(*op)++ = 255; + *(*op)++ = 255; + } + if (len > 254) { + len -= 255; + *(*op)++ = 255; + } + *(*op)++ = (u8)len; + } else + *token += len; + + /* Prepare next loop */ + *ip += ml; + *anchor = *ip; + + return 0; +} + +static int lz4_compresshcctx(struct lz4hc_data *ctx, + const char *source, + char *dest, + int isize) +{ + const u8 *ip = (const u8 *)source; + const u8 *anchor = ip; + const u8 *const iend = ip + isize; + const u8 *const mflimit = iend - MFLIMIT; + const u8 *const matchlimit = (iend - LASTLITERALS); + + u8 *op = (u8 *)dest; + + int ml, ml2, ml3, ml0; + const u8 *ref = NULL; + const u8 *start2 = NULL; + const u8 *ref2 = NULL; + const u8 *start3 = NULL; + const u8 *ref3 = NULL; + const u8 *start0; + const u8 *ref0; + int lastrun; + + ip++; + + /* Main Loop */ + while (ip < mflimit) { + ml = lz4hc_insertandfindbestmatch(ctx, ip, matchlimit, (&ref)); + if (!ml) { + ip++; + continue; + } + + /* saved, in case we would skip too much */ + start0 = ip; + ref0 = ref; + ml0 = ml; +_search2: + if (ip+ml < mflimit) + ml2 = lz4hc_insertandgetwidermatch(ctx, ip + ml - 2, + ip + 1, matchlimit, ml, &ref2, &start2); + else + ml2 = ml; + /* No better match */ + if (ml2 == ml) { + lz4_encodesequence(&ip, &op, &anchor, ml, ref); + continue; + } + + if (start0 < ip) { + /* empirical */ + if (start2 < ip + ml0) { + ip = start0; + ref = ref0; + ml = ml0; + } + } + /* + * Here, start0==ip + * First Match too small : removed + */ + if ((start2 - ip) < 3) { + ml = ml2; + ip = start2; + ref = ref2; + goto _search2; + } + +_search3: + /* + * Currently we have : + * ml2 > ml1, and + * ip1+3 <= ip2 (usually < ip1+ml1) + */ + if ((start2 - ip) < OPTIMAL_ML) { + int correction; + int new_ml = ml; + if (new_ml > OPTIMAL_ML) + new_ml = OPTIMAL_ML; + if (ip + new_ml > start2 + ml2 - MINMATCH) + new_ml = (int)(start2 - ip) + ml2 - MINMATCH; + correction = new_ml - (int)(start2 - ip); + if (correction > 0) { + start2 += correction; + ref2 += correction; + ml2 -= correction; + } + } + /* + * Now, we have start2 = ip+new_ml, + * with new_ml=min(ml, OPTIMAL_ML=18) + */ + if (start2 + ml2 < mflimit) + ml3 = lz4hc_insertandgetwidermatch(ctx, + start2 + ml2 - 3, start2, matchlimit, + ml2, &ref3, &start3); + else + ml3 = ml2; + + /* No better match : 2 sequences to encode */ + if (ml3 == ml2) { + /* ip & ref are known; Now for ml */ + if (start2 < ip+ml) + ml = (int)(start2 - ip); + + /* Now, encode 2 sequences */ + lz4_encodesequence(&ip, &op, &anchor, ml, ref); + ip = start2; + lz4_encodesequence(&ip, &op, &anchor, ml2, ref2); + continue; + } + + /* Not enough space for match 2 : remove it */ + if (start3 < ip + ml + 3) { + /* + * can write Seq1 immediately ==> Seq2 is removed, + * so Seq3 becomes Seq1 + */ + if (start3 >= (ip + ml)) { + if (start2 < ip + ml) { + int correction = + (int)(ip + ml - start2); + start2 += correction; + ref2 += correction; + ml2 -= correction; + if (ml2 < MINMATCH) { + start2 = start3; + ref2 = ref3; + ml2 = ml3; + } + } + + lz4_encodesequence(&ip, &op, &anchor, ml, ref); + ip = start3; + ref = ref3; + ml = ml3; + + start0 = start2; + ref0 = ref2; + ml0 = ml2; + goto _search2; + } + + start2 = start3; + ref2 = ref3; + ml2 = ml3; + goto _search3; + } + + /* + * OK, now we have 3 ascending matches; let's write at least + * the first one ip & ref are known; Now for ml + */ + if (start2 < ip + ml) { + if ((start2 - ip) < (int)ML_MASK) { + int correction; + if (ml > OPTIMAL_ML) + ml = OPTIMAL_ML; + if (ip + ml > start2 + ml2 - MINMATCH) + ml = (int)(start2 - ip) + ml2 + - MINMATCH; + correction = ml - (int)(start2 - ip); + if (correction > 0) { + start2 += correction; + ref2 += correction; + ml2 -= correction; + } + } else + ml = (int)(start2 - ip); + } + lz4_encodesequence(&ip, &op, &anchor, ml, ref); + + ip = start2; + ref = ref2; + ml = ml2; + + start2 = start3; + ref2 = ref3; + ml2 = ml3; + + goto _search3; + } + + /* Encode Last Literals */ + lastrun = (int)(iend - anchor); + if (lastrun >= (int)RUN_MASK) { + *op++ = (RUN_MASK << ML_BITS); + lastrun -= RUN_MASK; + for (; lastrun > 254 ; lastrun -= 255) + *op++ = 255; + *op++ = (u8) lastrun; + } else + *op++ = (lastrun << ML_BITS); + memcpy(op, anchor, iend - anchor); + op += iend - anchor; + /* End */ + return (int) (((char *)op) - dest); +} + +int lz4hc_compress(const unsigned char *src, size_t src_len, + unsigned char *dst, size_t *dst_len, void *wrkmem) +{ + int ret = -1; + int out_len = 0; + + struct lz4hc_data *hc4 = (struct lz4hc_data *)wrkmem; + lz4hc_init(hc4, (const u8 *)src); + out_len = lz4_compresshcctx((struct lz4hc_data *)hc4, (const u8 *)src, + (char *)dst, (int)src_len); + + if (out_len < 0) + goto exit; + + *dst_len = out_len; + return 0; + +exit: + return ret; +} +EXPORT_SYMBOL_GPL(lz4hc_compress); + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("LZ4HC compressor"); From 92b78405e5264ad7913c2c94192b768277f93607 Mon Sep 17 00:00:00 2001 From: Sergey Senozhatsky Date: Wed, 11 Sep 2013 14:26:32 -0700 Subject: [PATCH 069/204] lz4: fix compression/decompression signedness mismatch LZ4 compression and decompression functions require different in signedness input/output parameters: unsigned char for compression and signed char for decompression. Change decompression API to require "(const) unsigned char *". Signed-off-by: Sergey Senozhatsky Cc: Kyungsik Lee Cc: Geert Uytterhoeven Cc: Yann Collet Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/lz4.h | 8 ++++---- lib/lz4/lz4_decompress.c | 8 ++++---- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/include/linux/lz4.h b/include/linux/lz4.h index d21c13f10a64..4356686b0a39 100644 --- a/include/linux/lz4.h +++ b/include/linux/lz4.h @@ -67,8 +67,8 @@ int lz4hc_compress(const unsigned char *src, size_t src_len, * note : Destination buffer must be already allocated. * slightly faster than lz4_decompress_unknownoutputsize() */ -int lz4_decompress(const char *src, size_t *src_len, char *dest, - size_t actual_dest_len); +int lz4_decompress(const unsigned char *src, size_t *src_len, + unsigned char *dest, size_t actual_dest_len); /* * lz4_decompress_unknownoutputsize() @@ -82,6 +82,6 @@ int lz4_decompress(const char *src, size_t *src_len, char *dest, * Error if return (< 0) * note : Destination buffer must be already allocated. */ -int lz4_decompress_unknownoutputsize(const char *src, size_t src_len, - char *dest, size_t *dest_len); +int lz4_decompress_unknownoutputsize(const unsigned char *src, size_t src_len, + unsigned char *dest, size_t *dest_len); #endif diff --git a/lib/lz4/lz4_decompress.c b/lib/lz4/lz4_decompress.c index dcc89753af65..677d1ea4833d 100644 --- a/lib/lz4/lz4_decompress.c +++ b/lib/lz4/lz4_decompress.c @@ -283,8 +283,8 @@ static int lz4_uncompress_unknownoutputsize(const char *source, char *dest, return (int) (-(((char *) ip) - source)); } -int lz4_decompress(const char *src, size_t *src_len, char *dest, - size_t actual_dest_len) +int lz4_decompress(const unsigned char *src, size_t *src_len, + unsigned char *dest, size_t actual_dest_len) { int ret = -1; int input_len = 0; @@ -302,8 +302,8 @@ int lz4_decompress(const char *src, size_t *src_len, char *dest, EXPORT_SYMBOL_GPL(lz4_decompress); #endif -int lz4_decompress_unknownoutputsize(const char *src, size_t src_len, - char *dest, size_t *dest_len) +int lz4_decompress_unknownoutputsize(const unsigned char *src, size_t src_len, + unsigned char *dest, size_t *dest_len) { int ret = -1; int out_len = 0; From 428e6d675d83323d05649d901cc0fa0069f8e825 Mon Sep 17 00:00:00 2001 From: Lorenzo Colitti Date: Tue, 3 Mar 2015 23:16:16 +0900 Subject: [PATCH 070/204] net: ping: Return EAFNOSUPPORT when appropriate. 1. For an IPv4 ping socket, ping_check_bind_addr does not check the family of the socket address that's passed in. Instead, make it behave like inet_bind, which enforces either that the address family is AF_INET, or that the family is AF_UNSPEC and the address is 0.0.0.0. 2. For an IPv6 ping socket, ping_check_bind_addr returns EINVAL if the socket family is not AF_INET6. Return EAFNOSUPPORT instead, for consistency with inet6_bind. 3. Make ping_v4_sendmsg and ping_v6_sendmsg return EAFNOSUPPORT instead of EINVAL if an incorrect socket address structure is passed in. 4. Make IPv6 ping sockets be IPv6-only. The code does not support IPv4, and it cannot easily be made to support IPv4 because the protocol numbers for ICMP and ICMPv6 are different. This makes connect(::ffff:192.0.2.1) fail with EAFNOSUPPORT instead of making the socket unusable. Among other things, this fixes an oops that can be triggered by: int s = socket(AF_INET, SOCK_DGRAM, IPPROTO_ICMP); struct sockaddr_in6 sin6 = { .sin6_family = AF_INET6, .sin6_addr = in6addr_any, }; bind(s, (struct sockaddr *) &sin6, sizeof(sin6)); [backport of net 9145736d4862145684009d6a72a6e61324a9439e] Change-Id: If06ca86d9f1e4593c0d6df174caca3487c57a241 Signed-off-by: Lorenzo Colitti Signed-off-by: David S. Miller --- net/ipv4/ping.c | 13 ++++++++++++- net/ipv6/ping.c | 5 +++-- 2 files changed, 15 insertions(+), 3 deletions(-) diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c index 634994af080c..8642f0044dfa 100644 --- a/net/ipv4/ping.c +++ b/net/ipv4/ping.c @@ -256,6 +256,9 @@ int ping_init_sock(struct sock *sk) kgid_t low, high; int ret = 0; + if (sk->sk_family == AF_INET6) + inet6_sk(sk)->ipv6only = 1; + inet_get_ping_group_range_net(net, &low, &high); if (gid_lte(low, group) && gid_lte(group, high)) return 0; @@ -302,6 +305,11 @@ int ping_check_bind_addr(struct sock *sk, struct inet_sock *isk, if (addr_len < sizeof(*addr)) return -EINVAL; + if (addr->sin_family != AF_INET && + !(addr->sin_family == AF_UNSPEC && + addr->sin_addr.s_addr == htonl(INADDR_ANY))) + return -EAFNOSUPPORT; + pr_debug("ping_check_bind_addr(sk=%p,addr=%pI4,port=%d)\n", sk, &addr->sin_addr.s_addr, ntohs(addr->sin_port)); @@ -326,6 +334,9 @@ int ping_check_bind_addr(struct sock *sk, struct inet_sock *isk, if (addr_len < sizeof(*addr)) return -EINVAL; + if (addr->sin6_family != AF_INET6) + return -EAFNOSUPPORT; + pr_debug("ping_check_bind_addr(sk=%p,addr=%pI6c,port=%d)\n", sk, addr->sin6_addr.s6_addr, ntohs(addr->sin6_port)); @@ -708,7 +719,7 @@ int ping_v4_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, if (msg->msg_namelen < sizeof(*usin)) return -EINVAL; if (usin->sin_family != AF_INET) - return -EINVAL; + return -EAFNOSUPPORT; daddr = usin->sin_addr.s_addr; /* no remote port */ } else { diff --git a/net/ipv6/ping.c b/net/ipv6/ping.c index 38ceca8a6358..b5f23f48ca74 100644 --- a/net/ipv6/ping.c +++ b/net/ipv6/ping.c @@ -126,9 +126,10 @@ int ping_v6_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, if (msg->msg_name) { struct sockaddr_in6 *u = (struct sockaddr_in6 *) msg->msg_name; - if (msg->msg_namelen < sizeof(struct sockaddr_in6) || - u->sin6_family != AF_INET6) { + if (msg->msg_namelen < sizeof(*u)) return -EINVAL; + if (u->sin6_family != AF_INET6) { + return -EAFNOSUPPORT; } if (sk->sk_bound_dev_if && sk->sk_bound_dev_if != u->sin6_scope_id) { From 740006ef285f77b02b016b9a21453e56db1915ed Mon Sep 17 00:00:00 2001 From: Petr Cermak Date: Wed, 18 Feb 2015 10:39:10 +0000 Subject: [PATCH 071/204] fs/proc/task_mmu.c: add user-space support for resetting mm->hiwater_rss (peak RSS) Peak resident size of a process can be reset back to the process's current rss value by writing "5" to /proc/pid/clear_refs. The driving use-case for this would be getting the peak RSS value, which can be retrieved from the VmHWM field in /proc/pid/status, per benchmark iteration or test scenario. Origin: https://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/commit/?id=695f055936938c674473ea071ca7359a863551e7 [akpm@linux-foundation.org: clarify behaviour in documentation] Signed-off-by: Petr Cermak Cc: Bjorn Helgaas Cc: Primiano Tucci Cc: Petr Cermak Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Change-Id: I543a7640639d9916e813af875003fe3ee3a6bfe0 --- Documentation/filesystems/proc.txt | 4 ++++ fs/proc/task_mmu.c | 17 ++++++++++++++++- include/linux/mm.h | 5 +++++ 3 files changed, 25 insertions(+), 1 deletion(-) diff --git a/Documentation/filesystems/proc.txt b/Documentation/filesystems/proc.txt index e0eb9d287312..22c3b9c17e5b 100644 --- a/Documentation/filesystems/proc.txt +++ b/Documentation/filesystems/proc.txt @@ -490,6 +490,10 @@ To clear the bits for the file mapped pages associated with the process > echo 3 > /proc/PID/clear_refs Any other value written to /proc/PID/clear_refs will have no effect. +To reset the peak resident set size ("high water mark") to the process's +current value: + > echo 5 > /proc/PID/clear_refs + The /proc/pid/pagemap gives the PFN, which can be used to find the pageflags using /proc/kpageflags and number of times a page is mapped using /proc/kpagecount. For detailed explanation, see Documentation/vm/pagemap.txt. diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index e2c925fa7827..e5bc1c7e2667 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -784,6 +784,7 @@ static int clear_refs_pte_range(pmd_t *pmd, unsigned long addr, #define CLEAR_REFS_ALL 1 #define CLEAR_REFS_ANON 2 #define CLEAR_REFS_MAPPED 3 +#define CLEAR_REFS_MM_HIWATER_RSS 5 static ssize_t clear_refs_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos) @@ -803,7 +804,8 @@ static ssize_t clear_refs_write(struct file *file, const char __user *buf, rv = kstrtoint(strstrip(buffer), 10, &type); if (rv < 0) return rv; - if (type < CLEAR_REFS_ALL || type > CLEAR_REFS_MAPPED) + if ((type < CLEAR_REFS_ALL || type > CLEAR_REFS_MAPPED) && + type != CLEAR_REFS_MM_HIWATER_RSS) return -EINVAL; task = get_proc_task(file_inode(file)); if (!task) @@ -814,6 +816,18 @@ static ssize_t clear_refs_write(struct file *file, const char __user *buf, .pmd_entry = clear_refs_pte_range, .mm = mm, }; + + if (type == CLEAR_REFS_MM_HIWATER_RSS) { + /* + * Writing 5 to /proc/pid/clear_refs resets the peak + * resident set size to this mm's current rss value. + */ + down_write(&mm->mmap_sem); + reset_mm_hiwater_rss(mm); + up_write(&mm->mmap_sem); + goto out_mm; + } + down_read(&mm->mmap_sem); for (vma = mm->mmap; vma; vma = vma->vm_next) { clear_refs_walk.private = vma; @@ -837,6 +851,7 @@ static ssize_t clear_refs_write(struct file *file, const char __user *buf, } flush_tlb_mm(mm); up_read(&mm->mmap_sem); +out_mm: mmput(mm); } put_task_struct(task); diff --git a/include/linux/mm.h b/include/linux/mm.h index ff7f6375f33f..2d8d22cf15ce 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1166,6 +1166,11 @@ static inline void update_hiwater_vm(struct mm_struct *mm) mm->hiwater_vm = mm->total_vm; } +static inline void reset_mm_hiwater_rss(struct mm_struct *mm) +{ + mm->hiwater_rss = get_mm_rss(mm); +} + static inline void setmax_mm_hiwater_rss(unsigned long *maxrss, struct mm_struct *mm) { From 31e48a8de983179e0aa005b27fc949c07d3eb44a Mon Sep 17 00:00:00 2001 From: Adam Lee Date: Wed, 28 Jan 2015 15:30:27 -0500 Subject: [PATCH 072/204] Bluetooth: ath3k: workaround the compatibility issue with xHCI controller commit c561a5753dd631920c4459a067d22679b3d110d6 upstream. BugLink: https://bugs.launchpad.net/bugs/1400215 ath3k devices fail to load firmwares on xHCI buses, but work well on EHCI, this might be a compatibility issue between xHCI and ath3k chips. As my testing result, those chips will work on xHCI buses again with this patch. This workaround is from Qualcomm, they also did some workarounds in Windows driver. Signed-off-by: Adam Lee Signed-off-by: Marcel Holtmann Signed-off-by: Greg Kroah-Hartman --- drivers/bluetooth/ath3k.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/bluetooth/ath3k.c b/drivers/bluetooth/ath3k.c index 2acabdaecec8..dad8891ecbfa 100644 --- a/drivers/bluetooth/ath3k.c +++ b/drivers/bluetooth/ath3k.c @@ -151,6 +151,8 @@ static struct usb_device_id ath3k_blist_tbl[] = { #define USB_REQ_DFU_DNLOAD 1 #define BULK_SIZE 4096 #define FW_HDR_SIZE 20 +#define TIMEGAP_USEC_MIN 50 +#define TIMEGAP_USEC_MAX 100 static int ath3k_load_firmware(struct usb_device *udev, const struct firmware *firmware) @@ -181,6 +183,9 @@ static int ath3k_load_firmware(struct usb_device *udev, count -= 20; while (count) { + /* workaround the compatibility issue with xHCI controller*/ + usleep_range(TIMEGAP_USEC_MIN, TIMEGAP_USEC_MAX); + size = min_t(uint, count, BULK_SIZE); pipe = usb_sndbulkpipe(udev, 0x02); memcpy(send_buf, firmware->data + sent, size); @@ -277,6 +282,9 @@ static int ath3k_load_fwfile(struct usb_device *udev, count -= size; while (count) { + /* workaround the compatibility issue with xHCI controller*/ + usleep_range(TIMEGAP_USEC_MIN, TIMEGAP_USEC_MAX); + size = min_t(uint, count, BULK_SIZE); pipe = usb_sndbulkpipe(udev, 0x02); From 8997bc45956a7c206326326eac2207c5dd4dc207 Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Thu, 22 Jan 2015 09:29:05 +1100 Subject: [PATCH 073/204] xfs: ensure buffer types are set correctly commit 0d612fb570b71ea2e49554a770cff4c489018b2c upstream. Jan Kara reported that log recovery was finding buffers with invalid types in them. This should not happen, and indicates a bug in the logging of buffers. To catch this, add asserts to the buffer formatting code to ensure that the buffer type is in range when the transaction is committed. We don't set a type on buffers being marked stale - they are not going to get replayed, the format item exists only for recovery to be able to prevent replay of the buffer, so the type does not matter. Hence that needs special casing here. Reported-by: Jan Kara Tested-by: Jan Kara Signed-off-by: Dave Chinner Reviewed-by: Brian Foster Signed-off-by: Dave Chinner Signed-off-by: Greg Kroah-Hartman --- fs/xfs/xfs_buf_item.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c index 4ec431777048..e0451f4201cf 100644 --- a/fs/xfs/xfs_buf_item.c +++ b/fs/xfs/xfs_buf_item.c @@ -296,6 +296,10 @@ xfs_buf_item_format( ASSERT(atomic_read(&bip->bli_refcount) > 0); ASSERT((bip->bli_flags & XFS_BLI_LOGGED) || (bip->bli_flags & XFS_BLI_STALE)); + ASSERT((bip->bli_flags & XFS_BLI_STALE) || + (xfs_blft_from_flags(&bip->__bli_format) > XFS_BLFT_UNKNOWN_BUF + && xfs_blft_from_flags(&bip->__bli_format) < XFS_BLFT_MAX_BUF)); + /* * If it is an inode buffer, transfer the in-memory state to the From 70c0c8b3d5844839658fd0c1127a9641127d5098 Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Thu, 22 Jan 2015 09:29:40 +1100 Subject: [PATCH 074/204] xfs: inode unlink does not set AGI buffer type commit f19b872b086711bb4b22c3a0f52f16aa920bcc61 upstream. This leads to log recovery throwing errors like: XFS (md0): Mounting V5 Filesystem XFS (md0): Starting recovery (logdev: internal) XFS (md0): Unknown buffer type 0! XFS (md0): _xfs_buf_ioapply: no ops on block 0xaea8802/0x1 ffff8800ffc53800: 58 41 47 49 ..... Which is the AGI buffer magic number. Ensure that we set the type appropriately in both unlink list addition and removal. Tested-by: Jan Kara Signed-off-by: Dave Chinner Reviewed-by: Brian Foster Signed-off-by: Dave Chinner Signed-off-by: Greg Kroah-Hartman --- fs/xfs/xfs_inode.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index 7f7be5f98f52..f010ab4594f1 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -1655,6 +1655,7 @@ xfs_iunlink( agi->agi_unlinked[bucket_index] = cpu_to_be32(agino); offset = offsetof(xfs_agi_t, agi_unlinked) + (sizeof(xfs_agino_t) * bucket_index); + xfs_trans_buf_set_type(tp, agibp, XFS_BLFT_AGI_BUF); xfs_trans_log_buf(tp, agibp, offset, (offset + sizeof(xfs_agino_t) - 1)); return 0; @@ -1746,6 +1747,7 @@ xfs_iunlink_remove( agi->agi_unlinked[bucket_index] = cpu_to_be32(next_agino); offset = offsetof(xfs_agi_t, agi_unlinked) + (sizeof(xfs_agino_t) * bucket_index); + xfs_trans_buf_set_type(tp, agibp, XFS_BLFT_AGI_BUF); xfs_trans_log_buf(tp, agibp, offset, (offset + sizeof(xfs_agino_t) - 1)); } else { From 66c4da6566ec1cf89349911fd83b9540e985e058 Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Thu, 22 Jan 2015 09:30:23 +1100 Subject: [PATCH 075/204] xfs: set superblock buffer type correctly commit 3443a3bca54588f43286b725d8648d33a38c86f1 upstream. When the superblock is modified in a transaction, the commonly modified fields are not actually copied to the superblock buffer to avoid the buffer lock becoming a serialisation point. However, there are some other operations that modify the superblock fields within the transaction that don't directly log to the superblock but rely on the changes to be applied during the transaction commit (to minimise the buffer lock hold time). When we do this, we fail to mark the buffer log item as being a superblock buffer and that can lead to the buffer not being marked with the corect type in the log and hence causing recovery issues. Fix it by setting the type correctly, similar to xfs_mod_sb()... Tested-by: Jan Kara Signed-off-by: Dave Chinner Reviewed-by: Brian Foster Signed-off-by: Dave Chinner Signed-off-by: Greg Kroah-Hartman --- fs/xfs/xfs_trans.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c index 2fd7c1ff1d21..b5d5beb7df3a 100644 --- a/fs/xfs/xfs_trans.c +++ b/fs/xfs/xfs_trans.c @@ -1100,6 +1100,7 @@ xfs_trans_apply_sb_deltas( whole = 1; } + xfs_trans_buf_set_type(tp, bp, XFS_BLFT_SB_BUF); if (whole) /* * Log the whole thing, the fields are noncontiguous. From 65c62025ac749e2597fcdc5e200479ba7ef26e9d Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Tue, 10 Feb 2015 14:08:32 -0800 Subject: [PATCH 076/204] fsnotify: fix handling of renames in audit commit 6ee8e25fc3e916193bce4ebb43d5439e1e2144ab upstream. Commit e9fd702a58c4 ("audit: convert audit watches to use fsnotify instead of inotify") broke handling of renames in audit. Audit code wants to update inode number of an inode corresponding to watched name in a directory. When something gets renamed into a directory to a watched name, inotify previously passed moved inode to audit code however new fsnotify code passes directory inode where the change happened. That confuses audit and it starts watching parent directory instead of a file in a directory. This can be observed for example by doing: cd /tmp touch foo bar auditctl -w /tmp/foo touch foo mv bar foo touch foo In audit log we see events like: type=CONFIG_CHANGE msg=audit(1423563584.155:90): auid=1000 ses=2 op="updated rules" path="/tmp/foo" key=(null) list=4 res=1 ... type=PATH msg=audit(1423563584.155:91): item=2 name="bar" inode=1046884 dev=08:0 2 mode=0100644 ouid=0 ogid=0 rdev=00:00 nametype=DELETE type=PATH msg=audit(1423563584.155:91): item=3 name="foo" inode=1046842 dev=08:0 2 mode=0100644 ouid=0 ogid=0 rdev=00:00 nametype=DELETE type=PATH msg=audit(1423563584.155:91): item=4 name="foo" inode=1046884 dev=08:0 2 mode=0100644 ouid=0 ogid=0 rdev=00:00 nametype=CREATE ... and that's it - we see event for the first touch after creating the audit rule, we see events for rename but we don't see any event for the last touch. However we start seeing events for unrelated stuff happening in /tmp. Fix the problem by passing moved inode as data in the FS_MOVED_FROM and FS_MOVED_TO events instead of the directory where the change happens. This doesn't introduce any new problems because noone besides audit_watch.c cares about the passed value: fs/notify/fanotify/fanotify.c cares only about FSNOTIFY_EVENT_PATH events. fs/notify/dnotify/dnotify.c doesn't care about passed 'data' value at all. fs/notify/inotify/inotify_fsnotify.c uses 'data' only for FSNOTIFY_EVENT_PATH. kernel/audit_tree.c doesn't care about passed 'data' at all. kernel/audit_watch.c expects moved inode as 'data'. Fixes: e9fd702a58c49db ("audit: convert audit watches to use fsnotify instead of inotify") Signed-off-by: Jan Kara Cc: Paul Moore Cc: Eric Paris Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- include/linux/fsnotify.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/include/linux/fsnotify.h b/include/linux/fsnotify.h index a78680a92dba..661c0aeef1c4 100644 --- a/include/linux/fsnotify.h +++ b/include/linux/fsnotify.h @@ -101,8 +101,10 @@ static inline void fsnotify_move(struct inode *old_dir, struct inode *new_dir, new_dir_mask |= FS_ISDIR; } - fsnotify(old_dir, old_dir_mask, old_dir, FSNOTIFY_EVENT_INODE, old_name, fs_cookie); - fsnotify(new_dir, new_dir_mask, new_dir, FSNOTIFY_EVENT_INODE, new_name, fs_cookie); + fsnotify(old_dir, old_dir_mask, source, FSNOTIFY_EVENT_INODE, old_name, + fs_cookie); + fsnotify(new_dir, new_dir_mask, source, FSNOTIFY_EVENT_INODE, new_name, + fs_cookie); if (target) fsnotify_link_count(target); From a7596982b02d15570974252f6b1aeaf4e5589e1a Mon Sep 17 00:00:00 2001 From: Emmanuel Grumbach Date: Thu, 29 Jan 2015 21:34:00 +0200 Subject: [PATCH 077/204] iwlwifi: pcie: disable the SCD_BASE_ADDR when we resume from WoWLAN commit cd8f438405032ac8ff88bd8f2eca5e0c0063b14b upstream. The base address of the scheduler in the device's memory (SRAM) comes from two different sources. The periphery register and the alive notification from the firmware. We have a check in iwl_pcie_tx_start that ensures that they are the same. When we resume from WoWLAN, the firmware may have crashed for whatever reason. In that case, the whole device may be reset which means that the periphery register will hold a meaningless value. When we come to compare trans_pcie->scd_base_addr (which really holds the value we had when we loaded the WoWLAN firmware upon suspend) and the current value of the register, we don't see a match unsurprisingly. Trick the check to avoid a loud yet harmless WARN. Note that when the WoWLAN has crashed, we will see that in iwl_trans_pcie_d3_resume which will let the op_mode know. Once the op_mode is informed that the WowLAN firmware has crashed, it can't do much besides resetting the whole device. Reviewed-by: Johannes Berg Signed-off-by: Emmanuel Grumbach Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/iwlwifi/pcie/tx.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/iwlwifi/pcie/tx.c b/drivers/net/wireless/iwlwifi/pcie/tx.c index 48acfc620191..f05962c32497 100644 --- a/drivers/net/wireless/iwlwifi/pcie/tx.c +++ b/drivers/net/wireless/iwlwifi/pcie/tx.c @@ -720,7 +720,12 @@ void iwl_trans_pcie_tx_reset(struct iwl_trans *trans) iwl_write_direct32(trans, FH_KW_MEM_ADDR_REG, trans_pcie->kw.dma >> 4); - iwl_pcie_tx_start(trans, trans_pcie->scd_base_addr); + /* + * Send 0 as the scd_base_addr since the device may have be reset + * while we were in WoWLAN in which case SCD_SRAM_BASE_ADDR will + * contain garbage. + */ + iwl_pcie_tx_start(trans, 0); } /* From 12faeccac04d9a018b662561204cb31c64aa3590 Mon Sep 17 00:00:00 2001 From: Eyal Shapira Date: Fri, 16 Jan 2015 11:09:30 +0200 Subject: [PATCH 078/204] iwlwifi: mvm: validate tid and sta_id in ba_notif commit 2cee4762c528a9bd2cdff793197bf591a2196c11 upstream. These are coming from the FW and are used to access arrays. Bad values can cause an out of bounds access so discard such ba_notifs and warn. Signed-off-by: Eyal Shapira Signed-off-by: Emmanuel Grumbach Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/iwlwifi/mvm/tx.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/net/wireless/iwlwifi/mvm/tx.c b/drivers/net/wireless/iwlwifi/mvm/tx.c index 4ec8385e4307..3dd0e5bfa0fc 100644 --- a/drivers/net/wireless/iwlwifi/mvm/tx.c +++ b/drivers/net/wireless/iwlwifi/mvm/tx.c @@ -832,6 +832,11 @@ int iwl_mvm_rx_ba_notif(struct iwl_mvm *mvm, struct iwl_rx_cmd_buffer *rxb, sta_id = ba_notif->sta_id; tid = ba_notif->tid; + if (WARN_ONCE(sta_id >= IWL_MVM_STATION_COUNT || + tid >= IWL_MAX_TID_COUNT, + "sta_id %d tid %d", sta_id, tid)) + return 0; + rcu_read_lock(); sta = rcu_dereference(mvm->fw_id_to_mac_id[sta_id]); From fce2d025479af5e1fa6717480c7853cdfb8b71aa Mon Sep 17 00:00:00 2001 From: Luciano Coelho Date: Tue, 27 Jan 2015 15:06:57 +0200 Subject: [PATCH 079/204] iwlwifi: mvm: fix failure path when power_update fails in add_interface commit fd66fc1cafd72ddf27dbec3a5e29e99839d1bc84 upstream. When iwl_mvm_power_update_mac() is called, we have already added the mac context, so if this call fails we should remove the mac. Fixes: commit e5e7aa8e2561 ('iwlwifi: mvm: refactor power code') Signed-off-by: Luciano Coelho Reviewed-by: Johannes Berg Signed-off-by: Emmanuel Grumbach Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/iwlwifi/mvm/mac80211.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/iwlwifi/mvm/mac80211.c b/drivers/net/wireless/iwlwifi/mvm/mac80211.c index 88b9c0964696..9341339da333 100644 --- a/drivers/net/wireless/iwlwifi/mvm/mac80211.c +++ b/drivers/net/wireless/iwlwifi/mvm/mac80211.c @@ -544,7 +544,7 @@ static int iwl_mvm_mac_add_interface(struct ieee80211_hw *hw, ret = iwl_mvm_mac_ctxt_add(mvm, vif); if (ret) - goto out_release; + goto out_remove_mac; /* * Update power state on the new interface. Admittedly, based on From 4313b9cb8964781bed89316a3c929f56b9651f9d Mon Sep 17 00:00:00 2001 From: Luciano Coelho Date: Thu, 29 Jan 2015 12:48:20 +0200 Subject: [PATCH 080/204] iwlwifi: mvm: always use mac color zero commit 5523d11cc46393a1e61b7ef4a0b2d4e7ed9521e4 upstream. We don't really need to use different mac colors when adding mac contexts, because they're not used anywhere. In fact, the firmware doesn't accept 255 as a valid color, so we get into a SYSASSERT 0x3401 when we reach that. Remove the color increment to use always zero and avoid reaching 255. Signed-off-by: Luciano Coelho Reviewed-by: Johannes Berg Signed-off-by: Emmanuel Grumbach Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/iwlwifi/mvm/mac80211.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/net/wireless/iwlwifi/mvm/mac80211.c b/drivers/net/wireless/iwlwifi/mvm/mac80211.c index 9341339da333..e6660d692008 100644 --- a/drivers/net/wireless/iwlwifi/mvm/mac80211.c +++ b/drivers/net/wireless/iwlwifi/mvm/mac80211.c @@ -360,9 +360,6 @@ static void iwl_mvm_cleanup_iterator(void *data, u8 *mac, mvmvif->uploaded = false; mvmvif->ap_sta_id = IWL_MVM_STATION_COUNT; - /* does this make sense at all? */ - mvmvif->color++; - spin_lock_bh(&mvm->time_event_lock); iwl_mvm_te_clear_data(mvm, &mvmvif->time_event_data); spin_unlock_bh(&mvm->time_event_lock); From de13322c2802b274eaff42c6027ee32e0567b75a Mon Sep 17 00:00:00 2001 From: Seth Forshee Date: Fri, 20 Feb 2015 11:45:11 -0600 Subject: [PATCH 081/204] HID: i2c-hid: Limit reads to wMaxInputLength bytes for input events commit 6d00f37e49d95e640a3937a4a1ae07dbe92a10cb upstream. d1c7e29e8d27 (HID: i2c-hid: prevent buffer overflow in early IRQ) changed hid_get_input() to read ihid->bufsize bytes, which can be more than wMaxInputLength. This is the case with the Dell XPS 13 9343, and it is causing events to be missed. In some cases the missed events are releases, which can cause the cursor to jump or freeze, among other problems. Limit the number of bytes read to min(wMaxInputLength, ihid->bufsize) to prevent such problems. Fixes: d1c7e29e8d27 "HID: i2c-hid: prevent buffer overflow in early IRQ" Signed-off-by: Seth Forshee Reviewed-by: Benjamin Tissoires Signed-off-by: Jiri Kosina Signed-off-by: Greg Kroah-Hartman --- drivers/hid/i2c-hid/i2c-hid.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/hid/i2c-hid/i2c-hid.c b/drivers/hid/i2c-hid/i2c-hid.c index 469daa04dadb..ccc2f36bb334 100644 --- a/drivers/hid/i2c-hid/i2c-hid.c +++ b/drivers/hid/i2c-hid/i2c-hid.c @@ -341,7 +341,10 @@ static int i2c_hid_hwreset(struct i2c_client *client) static void i2c_hid_get_input(struct i2c_hid *ihid) { int ret, ret_size; - int size = ihid->bufsize; + int size = le16_to_cpu(ihid->hdesc.wMaxInputLength); + + if (size > ihid->bufsize) + size = ihid->bufsize; ret = i2c_master_recv(ihid->client, ihid->inbuf, size); if (ret != size) { From fbc0c467414464bcb7d6a5303f448fdd246e9f71 Mon Sep 17 00:00:00 2001 From: Ricardo Ribalda Delgado Date: Tue, 2 Dec 2014 17:35:04 +0100 Subject: [PATCH 082/204] PCI: Generate uppercase hex for modalias var in uevent commit 145b3fe579db66fbe999a2bc3fd5b63dffe9636d upstream. Some implementations of modprobe fail to load the driver for a PCI device automatically because the "interface" part of the modalias from the kernel is lowercase, and the modalias from file2alias is uppercase. The "interface" is the low-order byte of the Class Code, defined in PCI r3.0, Appendix D. Most interface types defined in the spec do not use alpha characters, so they won't be affected. For example, 00h, 01h, 10h, 20h, etc. are unaffected. Print the "interface" byte of the Class Code in uppercase hex, as we already do for the Vendor ID, Device ID, Class, etc. Commit 89ec3dcf17fd ("PCI: Generate uppercase hex for modalias interface class") fixed only half of the problem. Some udev implementations rely on the uevent file and not the modalias file. Fixes: d1ded203adf1 ("PCI: add MODALIAS to hotplug event for pci devices") Fixes: 89ec3dcf17fd ("PCI: Generate uppercase hex for modalias interface class") Signed-off-by: Ricardo Ribalda Delgado Signed-off-by: Bjorn Helgaas Acked-by: Greg Kroah-Hartman Signed-off-by: Greg Kroah-Hartman --- drivers/pci/pci-driver.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/pci/pci-driver.c b/drivers/pci/pci-driver.c index 66aabde82727..5548a13617ab 100644 --- a/drivers/pci/pci-driver.c +++ b/drivers/pci/pci-driver.c @@ -1267,7 +1267,7 @@ static int pci_uevent(struct device *dev, struct kobj_uevent_env *env) if (add_uevent_var(env, "PCI_SLOT_NAME=%s", pci_name(pdev))) return -ENOMEM; - if (add_uevent_var(env, "MODALIAS=pci:v%08Xd%08Xsv%08Xsd%08Xbc%02Xsc%02Xi%02x", + if (add_uevent_var(env, "MODALIAS=pci:v%08Xd%08Xsv%08Xsd%08Xbc%02Xsc%02Xi%02X", pdev->vendor, pdev->device, pdev->subsystem_vendor, pdev->subsystem_device, (u8)(pdev->class >> 16), (u8)(pdev->class >> 8), From c1b940de40bc1cc28dbbe41c7416ef5031e093f8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michel=20D=C3=A4nzer?= Date: Mon, 19 Jan 2015 17:53:20 +0900 Subject: [PATCH 083/204] PCI: Fix infinite loop with ROM image of size 0 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 16b036af31e1456cb69243a5a0c9ef801ecd1f17 upstream. If the image size would ever read as 0, pci_get_rom_size() could keep processing the same image over and over again. Exit the loop if we ever read a length of zero. This fixes a soft lockup on boot when the radeon driver calls pci_get_rom_size() on an AMD Radeon R7 250X PCIe discrete graphics card. [bhelgaas: changelog, reference] Link: https://bugs.launchpad.net/ubuntu/+source/linux/+bug/1386973 Reported-by: Federico Signed-off-by: Michel Dänzer Signed-off-by: Bjorn Helgaas Reviewed-by: Alex Deucher Signed-off-by: Greg Kroah-Hartman --- drivers/pci/rom.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/pci/rom.c b/drivers/pci/rom.c index c5d0a08a8747..d6d499782fb4 100644 --- a/drivers/pci/rom.c +++ b/drivers/pci/rom.c @@ -69,6 +69,7 @@ size_t pci_get_rom_size(struct pci_dev *pdev, void __iomem *rom, size_t size) { void __iomem *image; int last_image; + unsigned length; image = rom; do { @@ -91,9 +92,9 @@ size_t pci_get_rom_size(struct pci_dev *pdev, void __iomem *rom, size_t size) if (readb(pds + 3) != 'R') break; last_image = readb(pds + 21) & 0x80; - /* this length is reliable */ - image += readw(pds + 16) * 512; - } while (!last_image); + length = readw(pds + 16); + image += length * 512; + } while (length && !last_image); /* never return a size larger than the PCI resource window */ /* there are known ROMs that get the size wrong */ From ea9bc74573040bb38b143a743281bc808a2136ba Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Mon, 9 Feb 2015 13:38:17 -0500 Subject: [PATCH 084/204] cpufreq: speedstep-smi: enable interrupts when waiting commit d4d4eda23794c701442e55129dd4f8f2fefd5e4d upstream. On Dell Latitude C600 laptop with Pentium 3 850MHz processor, the speedstep-smi driver sometimes loads and sometimes doesn't load with "change to state X failed" message. The hardware sometimes refuses to change frequency and in this case, we need to retry later. I found out that we need to enable interrupts while waiting. When we enable interrupts, the hardware blockage that prevents frequency transition resolves and the transition is possible. With disabled interrupts, the blockage doesn't resolve (no matter how long do we wait). The exact reasons for this hardware behavior are unknown. This patch enables interrupts in the function speedstep_set_state that can be called with disabled interrupts. However, this function is called with disabled interrupts only from speedstep_get_freqs, so it shouldn't cause any problem. Signed-off-by: Mikulas Patocka Signed-off-by: Rafael J. Wysocki Signed-off-by: Greg Kroah-Hartman --- drivers/cpufreq/speedstep-lib.c | 3 +++ drivers/cpufreq/speedstep-smi.c | 12 ++++++++++++ 2 files changed, 15 insertions(+) diff --git a/drivers/cpufreq/speedstep-lib.c b/drivers/cpufreq/speedstep-lib.c index 7047821a7f8a..4ab7a2156672 100644 --- a/drivers/cpufreq/speedstep-lib.c +++ b/drivers/cpufreq/speedstep-lib.c @@ -400,6 +400,7 @@ unsigned int speedstep_get_freqs(enum speedstep_processor processor, pr_debug("previous speed is %u\n", prev_speed); + preempt_disable(); local_irq_save(flags); /* switch to low state */ @@ -464,6 +465,8 @@ unsigned int speedstep_get_freqs(enum speedstep_processor processor, out: local_irq_restore(flags); + preempt_enable(); + return ret; } EXPORT_SYMBOL_GPL(speedstep_get_freqs); diff --git a/drivers/cpufreq/speedstep-smi.c b/drivers/cpufreq/speedstep-smi.c index f5a6b70ee6c0..2844009135f8 100644 --- a/drivers/cpufreq/speedstep-smi.c +++ b/drivers/cpufreq/speedstep-smi.c @@ -188,6 +188,7 @@ static void speedstep_set_state(unsigned int state) return; /* Disable IRQs */ + preempt_disable(); local_irq_save(flags); command = (smi_sig & 0xffffff00) | (smi_cmd & 0xff); @@ -198,9 +199,19 @@ static void speedstep_set_state(unsigned int state) do { if (retry) { + /* + * We need to enable interrupts, otherwise the blockage + * won't resolve. + * + * We disable preemption so that other processes don't + * run. If other processes were running, they could + * submit more DMA requests, making the blockage worse. + */ pr_debug("retry %u, previous result %u, waiting...\n", retry, result); + local_irq_enable(); mdelay(retry * 50); + local_irq_disable(); } retry++; __asm__ __volatile__( @@ -217,6 +228,7 @@ static void speedstep_set_state(unsigned int state) /* enable IRQs */ local_irq_restore(flags); + preempt_enable(); if (new_state == state) pr_debug("change to %u MHz succeeded after %u tries " From 4cf981513778209b86dc90ec8fb479929aef8d50 Mon Sep 17 00:00:00 2001 From: Malcolm Priestley Date: Fri, 2 Jan 2015 10:56:28 -0300 Subject: [PATCH 085/204] lmedm04: Fix usb_submit_urb BOGUS urb xfer, pipe 1 != type 3 in interrupt urb commit 15e1ce33182d1d5dbd8efe8d382b9352dc857527 upstream. A quirk of some older firmwares that report endpoint pipe type as PIPE_BULK but the endpoint otheriwse functions as interrupt. Check if usb_endpoint_type is USB_ENDPOINT_XFER_BULK and set as usb_rcvbulkpipe. Signed-off-by: Malcolm Priestley Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Greg Kroah-Hartman --- drivers/media/usb/dvb-usb-v2/lmedm04.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/media/usb/dvb-usb-v2/lmedm04.c b/drivers/media/usb/dvb-usb-v2/lmedm04.c index b3fd0ffa3c3f..fc28d514bff0 100644 --- a/drivers/media/usb/dvb-usb-v2/lmedm04.c +++ b/drivers/media/usb/dvb-usb-v2/lmedm04.c @@ -350,6 +350,7 @@ static int lme2510_int_read(struct dvb_usb_adapter *adap) { struct dvb_usb_device *d = adap_to_d(adap); struct lme2510_state *lme_int = adap_to_priv(adap); + struct usb_host_endpoint *ep; lme_int->lme_urb = usb_alloc_urb(0, GFP_ATOMIC); @@ -371,6 +372,12 @@ static int lme2510_int_read(struct dvb_usb_adapter *adap) adap, 8); + /* Quirk of pipe reporting PIPE_BULK but behaves as interrupt */ + ep = usb_pipe_endpoint(d->udev, lme_int->lme_urb->pipe); + + if (usb_endpoint_type(&ep->desc) == USB_ENDPOINT_XFER_BULK) + lme_int->lme_urb->pipe = usb_rcvbulkpipe(d->udev, 0xa), + lme_int->lme_urb->transfer_flags |= URB_NO_TRANSFER_DMA_MAP; usb_submit_urb(lme_int->lme_urb, GFP_ATOMIC); From 331e036da19ecd0e1d4e0feb78226f99c04fdc4b Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Mon, 9 Feb 2015 16:51:40 +0300 Subject: [PATCH 086/204] ALSA: off by one bug in snd_riptide_joystick_probe() commit e4940626defdf6c92da1052ad3f12741c1a28c90 upstream. The problem here is that we check: if (dev >= SNDRV_CARDS) Then we increment "dev". if (!joystick_port[dev++]) Then we use it as an offset into a array with SNDRV_CARDS elements. if (!request_region(joystick_port[dev], 8, "Riptide gameport")) { This has 3 effects: 1) If you use the module option to specify the joystick port then it has to be shifted one space over. 2) The wrong error message will be printed on failure if you have over 32 cards. 3) Static checkers will correctly complain that are off by one. Fixes: db1005ec6ff8 ('ALSA: riptide - Fix joystick resource handling') Signed-off-by: Dan Carpenter Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/riptide/riptide.c | 27 +++++++++++++++++++-------- 1 file changed, 19 insertions(+), 8 deletions(-) diff --git a/sound/pci/riptide/riptide.c b/sound/pci/riptide/riptide.c index 63c1c8041554..fa66ba30470e 100644 --- a/sound/pci/riptide/riptide.c +++ b/sound/pci/riptide/riptide.c @@ -2032,32 +2032,43 @@ snd_riptide_joystick_probe(struct pci_dev *pci, const struct pci_device_id *id) { static int dev; struct gameport *gameport; + int ret; if (dev >= SNDRV_CARDS) return -ENODEV; + if (!enable[dev]) { - dev++; - return -ENOENT; + ret = -ENOENT; + goto inc_dev; } - if (!joystick_port[dev++]) - return 0; + if (!joystick_port[dev]) { + ret = 0; + goto inc_dev; + } gameport = gameport_allocate_port(); - if (!gameport) - return -ENOMEM; + if (!gameport) { + ret = -ENOMEM; + goto inc_dev; + } if (!request_region(joystick_port[dev], 8, "Riptide gameport")) { snd_printk(KERN_WARNING "Riptide: cannot grab gameport 0x%x\n", joystick_port[dev]); gameport_free_port(gameport); - return -EBUSY; + ret = -EBUSY; + goto inc_dev; } gameport->io = joystick_port[dev]; gameport_register_port(gameport); pci_set_drvdata(pci, gameport); - return 0; + + ret = 0; +inc_dev: + dev++; + return ret; } static void snd_riptide_joystick_remove(struct pci_dev *pci) From 3962a253b21474940c8b858ec0aff0428c2c3e82 Mon Sep 17 00:00:00 2001 From: Adrian Knoth Date: Tue, 10 Feb 2015 11:33:50 +0100 Subject: [PATCH 087/204] ALSA: hdspm - Constrain periods to 2 on older cards commit f0153c3d948c1764f6c920a0675d86fc1d75813e upstream. RME RayDAT and AIO use a fixed buffer size of 16384 samples. With period sizes of 32-4096, this translates to 4-512 periods. The older RME cards have a variable buffer size but require exactly two periods. This patch enforces nperiods=2 on those cards. Signed-off-by: Adrian Knoth Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/rme9652/hdspm.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/sound/pci/rme9652/hdspm.c b/sound/pci/rme9652/hdspm.c index 9ea05e956474..dd910d249987 100644 --- a/sound/pci/rme9652/hdspm.c +++ b/sound/pci/rme9652/hdspm.c @@ -5789,6 +5789,9 @@ static int snd_hdspm_playback_open(struct snd_pcm_substream *substream) snd_pcm_hw_constraint_minmax(runtime, SNDRV_PCM_HW_PARAM_PERIOD_SIZE, 64, 8192); + snd_pcm_hw_constraint_minmax(runtime, + SNDRV_PCM_HW_PARAM_PERIODS, + 2, 2); break; } @@ -5863,6 +5866,9 @@ static int snd_hdspm_capture_open(struct snd_pcm_substream *substream) snd_pcm_hw_constraint_minmax(runtime, SNDRV_PCM_HW_PARAM_PERIOD_SIZE, 64, 8192); + snd_pcm_hw_constraint_minmax(runtime, + SNDRV_PCM_HW_PARAM_PERIODS, + 2, 2); break; } From 4315974144b8b18eeeb958f43a9d50825a5a4254 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Tue, 27 Jan 2015 16:51:54 +0100 Subject: [PATCH 088/204] power_supply: 88pm860x: Fix leaked power supply on probe fail commit 24727b45b484e8937dcde53fa8d1aa70ac30ec0c upstream. Driver forgot to unregister power supply if request_threaded_irq() failed in probe(). In such case the memory associated with power supply leaked. Signed-off-by: Krzysztof Kozlowski Fixes: a830d28b48bf ("power_supply: Enable battery-charger for 88pm860x") Signed-off-by: Sebastian Reichel Signed-off-by: Greg Kroah-Hartman --- drivers/power/88pm860x_charger.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/power/88pm860x_charger.c b/drivers/power/88pm860x_charger.c index 36fb4b5a4b0d..1ef35ab79d0f 100644 --- a/drivers/power/88pm860x_charger.c +++ b/drivers/power/88pm860x_charger.c @@ -711,6 +711,7 @@ static int pm860x_charger_probe(struct platform_device *pdev) return 0; out_irq: + power_supply_unregister(&info->usb); while (--i >= 0) free_irq(info->irq[i], info); out: From 5d37544fb1dedda1974bf406c0033bbbfa5944af Mon Sep 17 00:00:00 2001 From: Jisheng Zhang Date: Wed, 28 Jan 2015 19:54:12 +0800 Subject: [PATCH 089/204] mmc: sdhci-pxav3: fix setting of pdata->clk_delay_cycles commit 14460dbaf7a5a0488963fdb8232ad5c8a8cca7b7 upstream. Current code checks "clk_delay_cycles > 0" to know whether the optional "mrvl,clk_delay_cycles" is set or not. But of_property_read_u32() doesn't touch clk_delay_cycles if the property is not set. And type of clk_delay_cycles is u32, so we may always set pdata->clk_delay_cycles as a random value. This patch fix this problem by check the return value of of_property_read_u32() to know whether the optional clk-delay-cycles is set or not. Signed-off-by: Jisheng Zhang Signed-off-by: Ulf Hansson Signed-off-by: Greg Kroah-Hartman --- drivers/mmc/host/sdhci-pxav3.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/mmc/host/sdhci-pxav3.c b/drivers/mmc/host/sdhci-pxav3.c index 1ae358e0662d..4edb24bf56f8 100644 --- a/drivers/mmc/host/sdhci-pxav3.c +++ b/drivers/mmc/host/sdhci-pxav3.c @@ -201,8 +201,8 @@ static struct sdhci_pxa_platdata *pxav3_get_mmc_pdata(struct device *dev) if (!pdata) return NULL; - of_property_read_u32(np, "mrvl,clk-delay-cycles", &clk_delay_cycles); - if (clk_delay_cycles > 0) + if (!of_property_read_u32(np, "mrvl,clk-delay-cycles", + &clk_delay_cycles)) pdata->clk_delay_cycles = clk_delay_cycles; return pdata; From 1a2d3f26253901627f4b5ef8866e3adea434b4c8 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Wed, 14 Jan 2015 13:08:57 -0500 Subject: [PATCH 090/204] nfs: don't call blocking operations while !TASK_RUNNING commit 6ffa30d3f734d4f6b478081dfc09592021028f90 upstream. Bruce reported seeing this warning pop when mounting using v4.1: ------------[ cut here ]------------ WARNING: CPU: 1 PID: 1121 at kernel/sched/core.c:7300 __might_sleep+0xbd/0xd0() do not call blocking ops when !TASK_RUNNING; state=1 set at [] prepare_to_wait+0x2f/0x90 Modules linked in: rpcsec_gss_krb5 auth_rpcgss nfsv4 dns_resolver nfs lockd grace sunrpc fscache ip6t_rpfilter ip6t_REJECT nf_reject_ipv6 xt_conntrack ebtable_nat ebtable_broute bridge stp llc ebtable_filter ebtables ip6table_nat nf_conntrack_ipv6 nf_defrag_ipv6 nf_nat_ipv6 ip6table_mangle ip6table_security ip6table_raw ip6table_filter ip6_tables iptable_nat nf_conntrack_ipv4 nf_defrag_ipv4 nf_nat_ipv4 nf_nat nf_conntrack iptable_mangle iptable_security iptable_raw snd_hda_codec_generic snd_hda_intel snd_hda_controller snd_hda_codec snd_hwdep snd_pcm snd_timer ppdev joydev snd virtio_console virtio_balloon pcspkr serio_raw parport_pc parport pvpanic floppy soundcore i2c_piix4 virtio_blk virtio_net qxl drm_kms_helper ttm drm virtio_pci virtio_ring ata_generic virtio pata_acpi CPU: 1 PID: 1121 Comm: nfsv4.1-svc Not tainted 3.19.0-rc4+ #25 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.7.5-20140709_153950- 04/01/2014 0000000000000000 000000004e5e3f73 ffff8800b998fb48 ffffffff8186ac78 0000000000000000 ffff8800b998fba0 ffff8800b998fb88 ffffffff810ac9da ffff8800b998fb68 ffffffff81c923e7 00000000000004d9 0000000000000000 Call Trace: [] dump_stack+0x4c/0x65 [] warn_slowpath_common+0x8a/0xc0 [] warn_slowpath_fmt+0x55/0x70 [] ? prepare_to_wait+0x2f/0x90 [] ? prepare_to_wait+0x2f/0x90 [] __might_sleep+0xbd/0xd0 [] kmem_cache_alloc_trace+0x243/0x430 [] ? groups_alloc+0x3e/0x130 [] groups_alloc+0x3e/0x130 [] svcauth_unix_accept+0x16e/0x290 [sunrpc] [] svc_authenticate+0xe1/0xf0 [sunrpc] [] svc_process_common+0x244/0x6a0 [sunrpc] [] bc_svc_process+0x1c4/0x260 [sunrpc] [] nfs41_callback_svc+0x128/0x1f0 [nfsv4] [] ? wait_woken+0xc0/0xc0 [] ? nfs4_callback_svc+0x60/0x60 [nfsv4] [] kthread+0x11f/0x140 [] ? local_clock+0x15/0x30 [] ? kthread_create_on_node+0x250/0x250 [] ret_from_fork+0x7c/0xb0 [] ? kthread_create_on_node+0x250/0x250 ---[ end trace 675220a11e30f4f2 ]--- nfs41_callback_svc does most of its work while in TASK_INTERRUPTIBLE, which is just wrong. Fix that by finishing the wait immediately if we've found that the list has something on it. Also, we don't expect this kthread to accept signals, so we should be using a TASK_UNINTERRUPTIBLE sleep instead. That however, opens us up hung task warnings from the watchdog, so have the schedule_timeout wake up every 60s if there's no callback activity. Reported-by: "J. Bruce Fields" Signed-off-by: Jeff Layton Signed-off-by: Trond Myklebust Signed-off-by: Greg Kroah-Hartman --- fs/nfs/callback.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c index cff089a412c7..e05c96ebb27d 100644 --- a/fs/nfs/callback.c +++ b/fs/nfs/callback.c @@ -128,22 +128,24 @@ nfs41_callback_svc(void *vrqstp) if (try_to_freeze()) continue; - prepare_to_wait(&serv->sv_cb_waitq, &wq, TASK_INTERRUPTIBLE); + prepare_to_wait(&serv->sv_cb_waitq, &wq, TASK_UNINTERRUPTIBLE); spin_lock_bh(&serv->sv_cb_lock); if (!list_empty(&serv->sv_cb_list)) { req = list_first_entry(&serv->sv_cb_list, struct rpc_rqst, rq_bc_list); list_del(&req->rq_bc_list); spin_unlock_bh(&serv->sv_cb_lock); + finish_wait(&serv->sv_cb_waitq, &wq); dprintk("Invoking bc_svc_process()\n"); error = bc_svc_process(serv, req, rqstp); dprintk("bc_svc_process() returned w/ error code= %d\n", error); } else { spin_unlock_bh(&serv->sv_cb_lock); - schedule(); + /* schedule_timeout to game the hung task watchdog */ + schedule_timeout(60 * HZ); + finish_wait(&serv->sv_cb_waitq, &wq); } - finish_wait(&serv->sv_cb_waitq, &wq); } return 0; } From 7528bb2ef8466d7a1ff5f0e316b96383349a7d60 Mon Sep 17 00:00:00 2001 From: James Hogan Date: Thu, 29 May 2014 10:16:32 +0100 Subject: [PATCH 091/204] MIPS: KVM: Deliver guest interrupts after local_irq_disable() commit 044f0f03eca0110e1835b2ea038a484b93950328 upstream. When about to run the guest, deliver guest interrupts after disabling host interrupts. This should prevent an hrtimer interrupt from being handled after delivering guest interrupts, and therefore not delivering the guest timer interrupt until after the next guest exit. Signed-off-by: James Hogan Cc: Paolo Bonzini Cc: Gleb Natapov Cc: kvm@vger.kernel.org Cc: Ralf Baechle Cc: linux-mips@linux-mips.org Cc: Sanjay Lal Signed-off-by: Paolo Bonzini Signed-off-by: Greg Kroah-Hartman --- arch/mips/kvm/kvm_mips.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/mips/kvm/kvm_mips.c b/arch/mips/kvm/kvm_mips.c index 2c7b3ade8ec0..f957a8ac979b 100644 --- a/arch/mips/kvm/kvm_mips.c +++ b/arch/mips/kvm/kvm_mips.c @@ -413,11 +413,11 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) vcpu->mmio_needed = 0; } + local_irq_disable(); /* Check if we have any exceptions/interrupts pending */ kvm_mips_deliver_interrupts(vcpu, kvm_read_c0_guest_cause(vcpu->arch.cop0)); - local_irq_disable(); kvm_guest_enter(); r = __kvm_mips_vcpu_run(run, vcpu); From 0d998434cec4071f7ea4ec5f7c53aee681504e58 Mon Sep 17 00:00:00 2001 From: Vikram Mulukutla Date: Wed, 17 Dec 2014 18:50:56 -0800 Subject: [PATCH 092/204] tracing: Fix unmapping loop in tracing_mark_write commit 7215853e985a4bef1a6c14e00e89dfec84f1e457 upstream. Commit 6edb2a8a385f0cdef51dae37ff23e74d76d8a6ce introduced an array map_pages that contains the addresses returned by kmap_atomic. However, when unmapping those pages, map_pages[0] is unmapped before map_pages[1], breaking the nesting requirement as specified in the documentation for kmap_atomic/kunmap_atomic. This was caught by the highmem debug code present in kunmap_atomic. Fix the loop to do the unmapping properly. Link: http://lkml.kernel.org/r/1418871056-6614-1-git-send-email-markivx@codeaurora.org Reviewed-by: Stephen Boyd Reported-by: Lime Yang Signed-off-by: Vikram Mulukutla Signed-off-by: Steven Rostedt Signed-off-by: Greg Kroah-Hartman --- kernel/trace/trace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 18cdf91b2f85..8d7e8098e768 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -4588,7 +4588,7 @@ tracing_mark_write(struct file *filp, const char __user *ubuf, *fpos += written; out_unlock: - for (i = 0; i < nr_pages; i++){ + for (i = nr_pages - 1; i >= 0; i--) { kunmap_atomic(map_page[i]); put_page(pages[i]); } From 49d9336fac46885eb0b6ddf5072f3e20bdfdbdce Mon Sep 17 00:00:00 2001 From: Dmitry Eremin-Solenikov Date: Thu, 15 Jan 2015 03:06:22 +0100 Subject: [PATCH 093/204] ARM: 8284/1: sa1100: clear RCSR_SMR on resume commit e461894dc2ce7778ccde1c3483c9b15a85a7fc5f upstream. StrongARM core uses RCSR SMR bit to tell to bootloader that it was reset by entering the sleep mode. After we have resumed, there is little point in having that bit enabled. Moreover, if this bit is set before reboot, the bootloader can become confused. Thus clear the SMR bit on resume just before clearing the scratchpad (resume address) register. Signed-off-by: Dmitry Eremin-Solenikov Signed-off-by: Russell King Signed-off-by: Greg Kroah-Hartman --- arch/arm/mach-sa1100/pm.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm/mach-sa1100/pm.c b/arch/arm/mach-sa1100/pm.c index 6645d1e31f14..34853d5dfda2 100644 --- a/arch/arm/mach-sa1100/pm.c +++ b/arch/arm/mach-sa1100/pm.c @@ -81,6 +81,7 @@ static int sa11x0_pm_enter(suspend_state_t state) /* * Ensure not to come back here if it wasn't intended */ + RCSR = RCSR_SMR; PSPR = 0; /* From 8716dbb11fc845a2fa28d889efe638b9dae86daf Mon Sep 17 00:00:00 2001 From: Scot Doyle Date: Wed, 24 Sep 2014 22:41:10 +0000 Subject: [PATCH 094/204] tpm_tis: verify interrupt during init commit 448e9c55c12d6bd4fa90a7e31d802e045666d7c8 upstream. Some machines, such as the Acer C720 and Toshiba CB35, have TPMs that do not send IRQs while also having an ACPI TPM entry indicating that they will be sent. These machines freeze on resume while the tpm_tis module waits for an IRQ, eventually timing out. When in interrupt mode, the tpm_tis module should receive an IRQ during module init. Fall back to polling mode if none is received when expected. Signed-off-by: Scot Doyle Tested-by: Michael Mullin Reviewed-by: Jason Gunthorpe [phuewe: minor checkpatch fixed] Signed-off-by: Peter Huewe Signed-off-by: Greg Kroah-Hartman --- drivers/char/tpm/tpm_tis.c | 76 +++++++++++++++++++++++++++++++------- 1 file changed, 62 insertions(+), 14 deletions(-) diff --git a/drivers/char/tpm/tpm_tis.c b/drivers/char/tpm/tpm_tis.c index 72f21377fa02..323d02d33c70 100644 --- a/drivers/char/tpm/tpm_tis.c +++ b/drivers/char/tpm/tpm_tis.c @@ -75,6 +75,10 @@ enum tis_defaults { #define TPM_DID_VID(l) (0x0F00 | ((l) << 12)) #define TPM_RID(l) (0x0F04 | ((l) << 12)) +struct priv_data { + bool irq_tested; +}; + static LIST_HEAD(tis_chips); static DEFINE_MUTEX(tis_lock); @@ -338,12 +342,27 @@ static int tpm_tis_send_data(struct tpm_chip *chip, u8 *buf, size_t len) return rc; } +static void disable_interrupts(struct tpm_chip *chip) +{ + u32 intmask; + + intmask = + ioread32(chip->vendor.iobase + + TPM_INT_ENABLE(chip->vendor.locality)); + intmask &= ~TPM_GLOBAL_INT_ENABLE; + iowrite32(intmask, + chip->vendor.iobase + + TPM_INT_ENABLE(chip->vendor.locality)); + free_irq(chip->vendor.irq, chip); + chip->vendor.irq = 0; +} + /* * If interrupts are used (signaled by an irq set in the vendor structure) * tpm.c can skip polling for the data to be available as the interrupt is * waited for here */ -static int tpm_tis_send(struct tpm_chip *chip, u8 *buf, size_t len) +static int tpm_tis_send_main(struct tpm_chip *chip, u8 *buf, size_t len) { int rc; u32 ordinal; @@ -373,6 +392,30 @@ static int tpm_tis_send(struct tpm_chip *chip, u8 *buf, size_t len) return rc; } +static int tpm_tis_send(struct tpm_chip *chip, u8 *buf, size_t len) +{ + int rc, irq; + struct priv_data *priv = chip->vendor.priv; + + if (!chip->vendor.irq || priv->irq_tested) + return tpm_tis_send_main(chip, buf, len); + + /* Verify receipt of the expected IRQ */ + irq = chip->vendor.irq; + chip->vendor.irq = 0; + rc = tpm_tis_send_main(chip, buf, len); + chip->vendor.irq = irq; + if (!priv->irq_tested) + msleep(1); + if (!priv->irq_tested) { + disable_interrupts(chip); + dev_err(chip->dev, + FW_BUG "TPM interrupt not working, polling instead\n"); + } + priv->irq_tested = true; + return rc; +} + struct tis_vendor_timeout_override { u32 did_vid; unsigned long timeout_us[4]; @@ -546,6 +589,7 @@ static irqreturn_t tis_int_handler(int dummy, void *dev_id) if (interrupt == 0) return IRQ_NONE; + ((struct priv_data *)chip->vendor.priv)->irq_tested = true; if (interrupt & TPM_INTF_DATA_AVAIL_INT) wake_up_interruptible(&chip->vendor.read_queue); if (interrupt & TPM_INTF_LOCALITY_CHANGE_INT) @@ -575,9 +619,14 @@ static int tpm_tis_init(struct device *dev, resource_size_t start, u32 vendor, intfcaps, intmask; int rc, i, irq_s, irq_e, probe; struct tpm_chip *chip; + struct priv_data *priv; + priv = devm_kzalloc(dev, sizeof(struct priv_data), GFP_KERNEL); + if (priv == NULL) + return -ENOMEM; if (!(chip = tpm_register_hardware(dev, &tpm_tis))) return -ENODEV; + chip->vendor.priv = priv; chip->vendor.iobase = ioremap(start, len); if (!chip->vendor.iobase) { @@ -646,19 +695,6 @@ static int tpm_tis_init(struct device *dev, resource_size_t start, if (intfcaps & TPM_INTF_DATA_AVAIL_INT) dev_dbg(dev, "\tData Avail Int Support\n"); - /* get the timeouts before testing for irqs */ - if (tpm_get_timeouts(chip)) { - dev_err(dev, "Could not get TPM timeouts and durations\n"); - rc = -ENODEV; - goto out_err; - } - - if (tpm_do_selftest(chip)) { - dev_err(dev, "TPM self test failed\n"); - rc = -ENODEV; - goto out_err; - } - /* INTERRUPT Setup */ init_waitqueue_head(&chip->vendor.read_queue); init_waitqueue_head(&chip->vendor.int_queue); @@ -760,6 +796,18 @@ static int tpm_tis_init(struct device *dev, resource_size_t start, } } + if (tpm_get_timeouts(chip)) { + dev_err(dev, "Could not get TPM timeouts and durations\n"); + rc = -ENODEV; + goto out_err; + } + + if (tpm_do_selftest(chip)) { + dev_err(dev, "TPM self test failed\n"); + rc = -ENODEV; + goto out_err; + } + INIT_LIST_HEAD(&chip->vendor.list); mutex_lock(&tis_lock); list_add(&chip->vendor.list, &tis_chips); From 1c243c211c1141a14e5e418de314ed466c513ac6 Mon Sep 17 00:00:00 2001 From: "Hon Ching (Vicky) Lo" Date: Sun, 30 Nov 2014 15:01:28 +0100 Subject: [PATCH 095/204] tpm: Fix NULL return in tpm_ibmvtpm_get_desired_dma commit 84eb186bc37c0900b53077ca21cf6dd15823a232 upstream. There was an oops in tpm_ibmvtpm_get_desired_dma, which caused kernel panic during boot when vTPM is enabled in Power partition configured in AMS mode. vio_bus_probe calls vio_cmo_bus_probe which calls tpm_ibmvtpm_get_desired_dma to get the size needed for DMA allocation. The problem is, vio_cmo_bus_probe is called before calling probe, which for vtpm is tpm_ibmvtpm_probe and it's this function that initializes and sets up vtpm's CRQ and gets required data values. Therefore, since this has not yet been done, NULL is returned in attempt to get the size for DMA allocation. We added a NULL check. In addition, a default buffer size will be set when NULL is returned. Signed-off-by: Hon Ching (Vicky) Lo Signed-off-by: Peter Huewe Signed-off-by: Greg Kroah-Hartman --- drivers/char/tpm/tpm_ibmvtpm.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/char/tpm/tpm_ibmvtpm.c b/drivers/char/tpm/tpm_ibmvtpm.c index 56b07c35a13e..010d814dd9f5 100644 --- a/drivers/char/tpm/tpm_ibmvtpm.c +++ b/drivers/char/tpm/tpm_ibmvtpm.c @@ -307,6 +307,14 @@ static int tpm_ibmvtpm_remove(struct vio_dev *vdev) static unsigned long tpm_ibmvtpm_get_desired_dma(struct vio_dev *vdev) { struct ibmvtpm_dev *ibmvtpm = ibmvtpm_get_data(&vdev->dev); + + /* ibmvtpm initializes at probe time, so the data we are + * asking for may not be set yet. Estimate that 4K required + * for TCE-mapped buffer in addition to CRQ. + */ + if (!ibmvtpm) + return CRQ_RES_BUF_SIZE + PAGE_SIZE; + return CRQ_RES_BUF_SIZE + ibmvtpm->rtce_size; } From 6280501c3e6346652bb9a1f2a77148f5aa5a37a6 Mon Sep 17 00:00:00 2001 From: Christophe Ricard Date: Mon, 1 Dec 2014 19:32:46 +0100 Subject: [PATCH 096/204] tpm/tpm_i2c_stm_st33: Fix potential bug in tpm_stm_i2c_send commit 1ba3b0b6f218072afe8372d12f1b6bf26a26008e upstream. When sending data in tpm_stm_i2c_send, each loop iteration send buf. Send buf + i instead as the goal of this for loop is to send a number of byte from buf that fit in burstcnt. Once those byte are sent, we are supposed to send the next ones. The driver was working because the burstcount value returns always the maximum size for a TPM command or response. (0x800 for a command and 0x400 for a response). Reviewed-by: Jason Gunthorpe Signed-off-by: Christophe Ricard Signed-off-by: Peter Huewe Signed-off-by: Greg Kroah-Hartman --- drivers/char/tpm/tpm_i2c_stm_st33.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/char/tpm/tpm_i2c_stm_st33.c b/drivers/char/tpm/tpm_i2c_stm_st33.c index 156bd3c72770..3925f7b86841 100644 --- a/drivers/char/tpm/tpm_i2c_stm_st33.c +++ b/drivers/char/tpm/tpm_i2c_stm_st33.c @@ -488,7 +488,7 @@ static int tpm_stm_i2c_send(struct tpm_chip *chip, unsigned char *buf, if (burstcnt < 0) return burstcnt; size = min_t(int, len - i - 1, burstcnt); - ret = I2C_WRITE_DATA(client, TPM_DATA_FIFO, buf, size); + ret = I2C_WRITE_DATA(client, TPM_DATA_FIFO, buf + i, size); if (ret < 0) goto out_err; From c213da80aa303730ca6d99a30f10043428c0e354 Mon Sep 17 00:00:00 2001 From: honclo Date: Thu, 12 Feb 2015 21:02:24 -0500 Subject: [PATCH 097/204] Added Little Endian support to vtpm module commit eb71f8a5e33fa1066fb92f0111ab366a341e1f6c upstream. The tpm_ibmvtpm module is affected by an unaligned access problem. ibmvtpm_crq_get_version failed with rc=-4 during boot when vTPM is enabled in Power partition, which supports both little endian and big endian modes. We added little endian support to fix this problem: 1) added cpu_to_be64 calls to ensure BE data is sent from an LE OS. 2) added be16_to_cpu and be32_to_cpu calls to make sure data received is in LE format on a LE OS. Signed-off-by: Hon Ching(Vicky) Lo Signed-off-by: Joy Latten [phuewe: manually applied the patch :( ] Reviewed-by: Ashley Lai Signed-off-by: Peter Huewe Signed-off-by: Greg Kroah-Hartman --- drivers/char/tpm/tpm_ibmvtpm.c | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/drivers/char/tpm/tpm_ibmvtpm.c b/drivers/char/tpm/tpm_ibmvtpm.c index 010d814dd9f5..538856f3e68a 100644 --- a/drivers/char/tpm/tpm_ibmvtpm.c +++ b/drivers/char/tpm/tpm_ibmvtpm.c @@ -148,7 +148,8 @@ static int tpm_ibmvtpm_send(struct tpm_chip *chip, u8 *buf, size_t count) crq.len = (u16)count; crq.data = ibmvtpm->rtce_dma_handle; - rc = ibmvtpm_send_crq(ibmvtpm->vdev, word[0], word[1]); + rc = ibmvtpm_send_crq(ibmvtpm->vdev, cpu_to_be64(word[0]), + cpu_to_be64(word[1])); if (rc != H_SUCCESS) { dev_err(ibmvtpm->dev, "tpm_ibmvtpm_send failed rc=%d\n", rc); rc = 0; @@ -186,7 +187,8 @@ static int ibmvtpm_crq_get_rtce_size(struct ibmvtpm_dev *ibmvtpm) crq.valid = (u8)IBMVTPM_VALID_CMD; crq.msg = (u8)VTPM_GET_RTCE_BUFFER_SIZE; - rc = ibmvtpm_send_crq(ibmvtpm->vdev, buf[0], buf[1]); + rc = ibmvtpm_send_crq(ibmvtpm->vdev, cpu_to_be64(buf[0]), + cpu_to_be64(buf[1])); if (rc != H_SUCCESS) dev_err(ibmvtpm->dev, "ibmvtpm_crq_get_rtce_size failed rc=%d\n", rc); @@ -212,7 +214,8 @@ static int ibmvtpm_crq_get_version(struct ibmvtpm_dev *ibmvtpm) crq.valid = (u8)IBMVTPM_VALID_CMD; crq.msg = (u8)VTPM_GET_VERSION; - rc = ibmvtpm_send_crq(ibmvtpm->vdev, buf[0], buf[1]); + rc = ibmvtpm_send_crq(ibmvtpm->vdev, cpu_to_be64(buf[0]), + cpu_to_be64(buf[1])); if (rc != H_SUCCESS) dev_err(ibmvtpm->dev, "ibmvtpm_crq_get_version failed rc=%d\n", rc); @@ -335,7 +338,8 @@ static int tpm_ibmvtpm_suspend(struct device *dev) crq.valid = (u8)IBMVTPM_VALID_CMD; crq.msg = (u8)VTPM_PREPARE_TO_SUSPEND; - rc = ibmvtpm_send_crq(ibmvtpm->vdev, buf[0], buf[1]); + rc = ibmvtpm_send_crq(ibmvtpm->vdev, cpu_to_be64(buf[0]), + cpu_to_be64(buf[1])); if (rc != H_SUCCESS) dev_err(ibmvtpm->dev, "tpm_ibmvtpm_suspend failed rc=%d\n", rc); @@ -519,11 +523,11 @@ static void ibmvtpm_crq_process(struct ibmvtpm_crq *crq, case IBMVTPM_VALID_CMD: switch (crq->msg) { case VTPM_GET_RTCE_BUFFER_SIZE_RES: - if (crq->len <= 0) { + if (be16_to_cpu(crq->len) <= 0) { dev_err(ibmvtpm->dev, "Invalid rtce size\n"); return; } - ibmvtpm->rtce_size = crq->len; + ibmvtpm->rtce_size = be16_to_cpu(crq->len); ibmvtpm->rtce_buf = kmalloc(ibmvtpm->rtce_size, GFP_KERNEL); if (!ibmvtpm->rtce_buf) { @@ -544,11 +548,11 @@ static void ibmvtpm_crq_process(struct ibmvtpm_crq *crq, return; case VTPM_GET_VERSION_RES: - ibmvtpm->vtpm_version = crq->data; + ibmvtpm->vtpm_version = be32_to_cpu(crq->data); return; case VTPM_TPM_COMMAND_RES: /* len of the data in rtce buffer */ - ibmvtpm->res_len = crq->len; + ibmvtpm->res_len = be16_to_cpu(crq->len); wake_up_interruptible(&ibmvtpm->wq); return; default: From 72b19f30985230979d812ae65a3fd4c28067a589 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 11 Feb 2015 17:27:55 -0500 Subject: [PATCH 098/204] NFSv4.1: Fix a kfree() of uninitialised pointers in decode_cb_sequence_args commit d8ba1f971497c19cf80da1ea5391a46a5f9fbd41 upstream. If the call to decode_rc_list() fails due to a memory allocation error, then we need to truncate the array size to ensure that we only call kfree() on those pointer that were allocated. Reported-by: David Ramos Fixes: 4aece6a19cf7f ("nfs41: cb_sequence xdr implementation") Signed-off-by: Trond Myklebust Signed-off-by: Greg Kroah-Hartman --- fs/nfs/callback_xdr.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c index a35582c9d444..e98ecf8d2588 100644 --- a/fs/nfs/callback_xdr.c +++ b/fs/nfs/callback_xdr.c @@ -464,8 +464,10 @@ static __be32 decode_cb_sequence_args(struct svc_rqst *rqstp, for (i = 0; i < args->csa_nrclists; i++) { status = decode_rc_list(xdr, &args->csa_rclists[i]); - if (status) + if (status) { + args->csa_nrclists = i; goto out_free; + } } } status = 0; From b1a9198768efe67b11af1b72df379a4d75e8479e Mon Sep 17 00:00:00 2001 From: Nicholas Bellinger Date: Thu, 22 Jan 2015 00:56:53 -0800 Subject: [PATCH 099/204] iscsi-target: Drop problematic active_ts_list usage commit 3fd7b60f2c7418239d586e359e0c6d8503e10646 upstream. This patch drops legacy active_ts_list usage within iscsi_target_tq.c code. It was originally used to track the active thread sets during iscsi-target shutdown, and is no longer used by modern upstream code. Two people have reported list corruption using traditional iscsi-target and iser-target with the following backtrace, that appears to be related to iscsi_thread_set->ts_list being used across both active_ts_list and inactive_ts_list. [ 60.782534] ------------[ cut here ]------------ [ 60.782543] WARNING: CPU: 0 PID: 9430 at lib/list_debug.c:53 __list_del_entry+0x63/0xd0() [ 60.782545] list_del corruption, ffff88045b00d180->next is LIST_POISON1 (dead000000100100) [ 60.782546] Modules linked in: ib_srpt tcm_qla2xxx qla2xxx tcm_loop tcm_fc libfc scsi_transport_fc scsi_tgt ib_isert rdma_cm iw_cm ib_addr iscsi_target_mod target_core_pscsi target_core_file target_core_iblock target_core_mod configfs ebtable_nat ebtables ipt_MASQUERADE iptable_nat nf_nat_ipv4 nf_nat nf_conntrack_ipv4 nf_defrag_ipv4 ipt_REJECT xt_CHECKSUM iptable_mangle iptable_filter ip_tables bridge stp llc autofs4 sunrpc ip6t_REJECT nf_conntrack_ipv6 nf_defrag_ipv6 xt_state nf_conntrack ip6table_filter ip6_tables ipv6 ib_ipoib ib_cm ib_uverbs ib_umad mlx4_en mlx4_ib ib_sa ib_mad ib_core mlx4_core dm_mirror dm_region_hash dm_log dm_mod vhost_net macvtap macvlan vhost tun kvm_intel kvm uinput iTCO_wdt iTCO_vendor_support microcode serio_raw pcspkr sb_edac edac_core sg i2c_i801 lpc_ich mfd_core mtip32xx igb i2c_algo_bit i2c_core ptp pps_core ioatdma dca wmi ext3(F) jbd(F) mbcache(F) sd_mod(F) crc_t10dif(F) crct10dif_common(F) ahci(F) libahci(F) isci(F) libsas(F) scsi_transport_sas(F) [last unloaded: speedstep_lib] [ 60.782597] CPU: 0 PID: 9430 Comm: iscsi_ttx Tainted: GF 3.12.19+ #2 [ 60.782598] Hardware name: Supermicro X9DRX+-F/X9DRX+-F, BIOS 3.00 07/09/2013 [ 60.782599] 0000000000000035 ffff88044de31d08 ffffffff81553ae7 0000000000000035 [ 60.782602] ffff88044de31d58 ffff88044de31d48 ffffffff8104d1cc 0000000000000002 [ 60.782605] ffff88045b00d180 ffff88045b00d0c0 ffff88045b00d0c0 ffff88044de31e58 [ 60.782607] Call Trace: [ 60.782611] [] dump_stack+0x49/0x62 [ 60.782615] [] warn_slowpath_common+0x8c/0xc0 [ 60.782618] [] warn_slowpath_fmt+0x46/0x50 [ 60.782620] [] __list_del_entry+0x63/0xd0 [ 60.782622] [] list_del+0x11/0x40 [ 60.782630] [] iscsi_del_ts_from_active_list+0x29/0x50 [iscsi_target_mod] [ 60.782635] [] iscsi_tx_thread_pre_handler+0xa1/0x180 [iscsi_target_mod] [ 60.782642] [] iscsi_target_tx_thread+0x4e/0x220 [iscsi_target_mod] [ 60.782647] [] ? iscsit_handle_snack+0x190/0x190 [iscsi_target_mod] [ 60.782652] [] ? iscsit_handle_snack+0x190/0x190 [iscsi_target_mod] [ 60.782655] [] kthread+0xce/0xe0 [ 60.782657] [] ? kthread_freezable_should_stop+0x70/0x70 [ 60.782660] [] ret_from_fork+0x7c/0xb0 [ 60.782662] [] ? kthread_freezable_should_stop+0x70/0x70 [ 60.782663] ---[ end trace 9662f4a661d33965 ]--- Since this code is no longer used, go ahead and drop the problematic usage all-together. Reported-by: Gavin Guo Reported-by: Moussa Ba Signed-off-by: Nicholas Bellinger Signed-off-by: Greg Kroah-Hartman --- drivers/target/iscsi/iscsi_target_tq.c | 28 +++++--------------------- 1 file changed, 5 insertions(+), 23 deletions(-) diff --git a/drivers/target/iscsi/iscsi_target_tq.c b/drivers/target/iscsi/iscsi_target_tq.c index 81289520f96b..bd53364b75e8 100644 --- a/drivers/target/iscsi/iscsi_target_tq.c +++ b/drivers/target/iscsi/iscsi_target_tq.c @@ -26,36 +26,22 @@ #include "iscsi_target_tq.h" #include "iscsi_target.h" -static LIST_HEAD(active_ts_list); static LIST_HEAD(inactive_ts_list); -static DEFINE_SPINLOCK(active_ts_lock); static DEFINE_SPINLOCK(inactive_ts_lock); static DEFINE_SPINLOCK(ts_bitmap_lock); -static void iscsi_add_ts_to_active_list(struct iscsi_thread_set *ts) -{ - spin_lock(&active_ts_lock); - list_add_tail(&ts->ts_list, &active_ts_list); - iscsit_global->active_ts++; - spin_unlock(&active_ts_lock); -} - static void iscsi_add_ts_to_inactive_list(struct iscsi_thread_set *ts) { + if (!list_empty(&ts->ts_list)) { + WARN_ON(1); + return; + } spin_lock(&inactive_ts_lock); list_add_tail(&ts->ts_list, &inactive_ts_list); iscsit_global->inactive_ts++; spin_unlock(&inactive_ts_lock); } -static void iscsi_del_ts_from_active_list(struct iscsi_thread_set *ts) -{ - spin_lock(&active_ts_lock); - list_del(&ts->ts_list); - iscsit_global->active_ts--; - spin_unlock(&active_ts_lock); -} - static struct iscsi_thread_set *iscsi_get_ts_from_inactive_list(void) { struct iscsi_thread_set *ts; @@ -68,7 +54,7 @@ static struct iscsi_thread_set *iscsi_get_ts_from_inactive_list(void) ts = list_first_entry(&inactive_ts_list, struct iscsi_thread_set, ts_list); - list_del(&ts->ts_list); + list_del_init(&ts->ts_list); iscsit_global->inactive_ts--; spin_unlock(&inactive_ts_lock); @@ -219,8 +205,6 @@ static void iscsi_deallocate_extra_thread_sets(void) void iscsi_activate_thread_set(struct iscsi_conn *conn, struct iscsi_thread_set *ts) { - iscsi_add_ts_to_active_list(ts); - spin_lock_bh(&ts->ts_state_lock); conn->thread_set = ts; ts->conn = conn; @@ -423,7 +407,6 @@ struct iscsi_conn *iscsi_rx_thread_pre_handler(struct iscsi_thread_set *ts) if (ts->delay_inactive && (--ts->thread_count == 0)) { spin_unlock_bh(&ts->ts_state_lock); - iscsi_del_ts_from_active_list(ts); if (!iscsit_global->in_shutdown) iscsi_deallocate_extra_thread_sets(); @@ -476,7 +459,6 @@ struct iscsi_conn *iscsi_tx_thread_pre_handler(struct iscsi_thread_set *ts) if (ts->delay_inactive && (--ts->thread_count == 0)) { spin_unlock_bh(&ts->ts_state_lock); - iscsi_del_ts_from_active_list(ts); if (!iscsit_global->in_shutdown) iscsi_deallocate_extra_thread_sets(); From a8ace7cca0c77f9140a25ed175e4d0aaa5d77566 Mon Sep 17 00:00:00 2001 From: Konstantin Khlebnikov Date: Mon, 9 Feb 2015 16:42:49 +0300 Subject: [PATCH 100/204] cfq-iosched: handle failure of cfq group allocation commit 69abaffec7d47a083739b79e3066cb3730eba72e upstream. Cfq_lookup_create_cfqg() allocates struct blkcg_gq using GFP_ATOMIC. In cfq_find_alloc_queue() possible allocation failure is not handled. As a result kernel oopses on NULL pointer dereference when cfq_link_cfqq_cfqg() calls cfqg_get() for NULL pointer. Bug was introduced in v3.5 in commit cd1604fab4f9 ("blkcg: factor out blkio_group creation"). Prior to that commit cfq group lookup had returned pointer to root group as fallback. This patch handles this error using existing fallback oom_cfqq. Signed-off-by: Konstantin Khlebnikov Acked-by: Tejun Heo Acked-by: Vivek Goyal Fixes: cd1604fab4f9 ("blkcg: factor out blkio_group creation") Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman --- block/cfq-iosched.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c index c981097dd634..537244c2c7ef 100644 --- a/block/cfq-iosched.c +++ b/block/cfq-iosched.c @@ -3575,6 +3575,11 @@ cfq_find_alloc_queue(struct cfq_data *cfqd, bool is_sync, struct cfq_io_cq *cic, blkcg = bio_blkcg(bio); cfqg = cfq_lookup_create_cfqg(cfqd, blkcg); + if (!cfqg) { + cfqq = &cfqd->oom_cfqq; + goto out; + } + cfqq = cic_to_cfqq(cic, is_sync); /* @@ -3611,7 +3616,7 @@ cfq_find_alloc_queue(struct cfq_data *cfqd, bool is_sync, struct cfq_io_cq *cic, } else cfqq = &cfqd->oom_cfqq; } - +out: if (new_cfqq) kmem_cache_free(cfq_pool, new_cfqq); From 8a9a6a13371bac27f6cd7c9fc52f9b1ee98c2ba4 Mon Sep 17 00:00:00 2001 From: Jeff Moyer Date: Mon, 12 Jan 2015 15:21:01 -0500 Subject: [PATCH 101/204] cfq-iosched: fix incorrect filing of rt async cfqq commit c6ce194325cef342313e3d27620411ce90a89c50 upstream. Hi, If you can manage to submit an async write as the first async I/O from the context of a process with realtime scheduling priority, then a cfq_queue is allocated, but filed into the wrong async_cfqq bucket. It ends up in the best effort array, but actually has realtime I/O scheduling priority set in cfqq->ioprio. The reason is that cfq_get_queue assumes the default scheduling class and priority when there is no information present (i.e. when the async cfqq is created): static struct cfq_queue * cfq_get_queue(struct cfq_data *cfqd, bool is_sync, struct cfq_io_cq *cic, struct bio *bio, gfp_t gfp_mask) { const int ioprio_class = IOPRIO_PRIO_CLASS(cic->ioprio); const int ioprio = IOPRIO_PRIO_DATA(cic->ioprio); cic->ioprio starts out as 0, which is "invalid". So, class of 0 (IOPRIO_CLASS_NONE) is passed to cfq_async_queue_prio like so: async_cfqq = cfq_async_queue_prio(cfqd, ioprio_class, ioprio); static struct cfq_queue ** cfq_async_queue_prio(struct cfq_data *cfqd, int ioprio_class, int ioprio) { switch (ioprio_class) { case IOPRIO_CLASS_RT: return &cfqd->async_cfqq[0][ioprio]; case IOPRIO_CLASS_NONE: ioprio = IOPRIO_NORM; /* fall through */ case IOPRIO_CLASS_BE: return &cfqd->async_cfqq[1][ioprio]; case IOPRIO_CLASS_IDLE: return &cfqd->async_idle_cfqq; default: BUG(); } } Here, instead of returning a class mapped from the process' scheduling priority, we get back the bucket associated with IOPRIO_CLASS_BE. Now, there is no queue allocated there yet, so we create it: cfqq = cfq_find_alloc_queue(cfqd, is_sync, cic, bio, gfp_mask); That function ends up doing this: cfq_init_cfqq(cfqd, cfqq, current->pid, is_sync); cfq_init_prio_data(cfqq, cic); cfq_init_cfqq marks the priority as having changed. Then, cfq_init_prio data does this: ioprio_class = IOPRIO_PRIO_CLASS(cic->ioprio); switch (ioprio_class) { default: printk(KERN_ERR "cfq: bad prio %x\n", ioprio_class); case IOPRIO_CLASS_NONE: /* * no prio set, inherit CPU scheduling settings */ cfqq->ioprio = task_nice_ioprio(tsk); cfqq->ioprio_class = task_nice_ioclass(tsk); break; So we basically have two code paths that treat IOPRIO_CLASS_NONE differently, which results in an RT async cfqq filed into a best effort bucket. Attached is a patch which fixes the problem. I'm not sure how to make it cleaner. Suggestions would be welcome. Signed-off-by: Jeff Moyer Tested-by: Hidehiro Kawai Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman --- block/cfq-iosched.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c index 537244c2c7ef..69111c5c352c 100644 --- a/block/cfq-iosched.c +++ b/block/cfq-iosched.c @@ -3646,12 +3646,17 @@ static struct cfq_queue * cfq_get_queue(struct cfq_data *cfqd, bool is_sync, struct cfq_io_cq *cic, struct bio *bio, gfp_t gfp_mask) { - const int ioprio_class = IOPRIO_PRIO_CLASS(cic->ioprio); - const int ioprio = IOPRIO_PRIO_DATA(cic->ioprio); + int ioprio_class = IOPRIO_PRIO_CLASS(cic->ioprio); + int ioprio = IOPRIO_PRIO_DATA(cic->ioprio); struct cfq_queue **async_cfqq = NULL; struct cfq_queue *cfqq = NULL; if (!is_sync) { + if (!ioprio_valid(cic->ioprio)) { + struct task_struct *tsk = current; + ioprio = task_nice_ioprio(tsk); + ioprio_class = task_nice_ioclass(tsk); + } async_cfqq = cfq_async_queue_prio(cfqd, ioprio_class, ioprio); cfqq = *async_cfqq; } From 9d8039b13aea028b16676c28fcac358ec24e7b3b Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Wed, 7 Jan 2015 18:04:18 +0200 Subject: [PATCH 102/204] axonram: Fix bug in direct_access commit 91117a20245b59f70b563523edbf998a62fc6383 upstream. The 'pfn' returned by axonram was completely bogus, and has been since 2008. Signed-off-by: Matthew Wilcox Reviewed-by: Jan Kara Reviewed-by: Mathieu Desnoyers Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/sysdev/axonram.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/sysdev/axonram.c b/arch/powerpc/sysdev/axonram.c index 1c16141c031c..1fea24944ff4 100644 --- a/arch/powerpc/sysdev/axonram.c +++ b/arch/powerpc/sysdev/axonram.c @@ -155,7 +155,7 @@ axon_ram_direct_access(struct block_device *device, sector_t sector, } *kaddr = (void *)(bank->ph_addr + offset); - *pfn = virt_to_phys(kaddr) >> PAGE_SHIFT; + *pfn = virt_to_phys(*kaddr) >> PAGE_SHIFT; return 0; } From 4324af6a14ad1c0553a35d82a17d2a6066e98b79 Mon Sep 17 00:00:00 2001 From: Peter Hurley Date: Mon, 19 Jan 2015 13:05:03 -0500 Subject: [PATCH 103/204] tty: Prevent untrappable signals from malicious program commit 37480a05685ed5b8e1b9bf5e5c53b5810258b149 upstream. Commit 26df6d13406d1a5 ("tty: Add EXTPROC support for LINEMODE") allows a process which has opened a pty master to send _any_ signal to the process group of the pty slave. Although potentially exploitable by a malicious program running a setuid program on a pty slave, it's unknown if this exploit currently exists. Limit to signals actually used. Cc: Theodore Ts'o Cc: Howard Chu Cc: One Thousand Gnomes Cc: Jiri Slaby Signed-off-by: Peter Hurley Signed-off-by: Greg Kroah-Hartman --- drivers/tty/pty.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/tty/pty.c b/drivers/tty/pty.c index abfd99089781..7cb36813aac2 100644 --- a/drivers/tty/pty.c +++ b/drivers/tty/pty.c @@ -215,6 +215,9 @@ static int pty_signal(struct tty_struct *tty, int sig) unsigned long flags; struct pid *pgrp; + if (sig != SIGINT && sig != SIGQUIT && sig != SIGTSTP) + return -EINVAL; + if (tty->link) { spin_lock_irqsave(&tty->link->ctrl_lock, flags); pgrp = get_pid(tty->link->pgrp); From 931c8f77302707c736236b3d8b4c4ad0854b51c8 Mon Sep 17 00:00:00 2001 From: Lennart Sorensen Date: Wed, 21 Jan 2015 15:24:27 -0500 Subject: [PATCH 104/204] USB: cp210x: add ID for RUGGEDCOM USB Serial Console commit a6f0331236fa75afba14bbcf6668d42cebb55c43 upstream. Added the USB serial console device ID for Siemens Ruggedcom devices which have a USB port for their serial console. Signed-off-by: Len Sorensen Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/cp210x.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/usb/serial/cp210x.c b/drivers/usb/serial/cp210x.c index 8e15acd204ef..f48f5dfab245 100644 --- a/drivers/usb/serial/cp210x.c +++ b/drivers/usb/serial/cp210x.c @@ -56,6 +56,7 @@ static const struct usb_device_id id_table[] = { { USB_DEVICE(0x0846, 0x1100) }, /* NetGear Managed Switch M4100 series, M5300 series, M7100 series */ { USB_DEVICE(0x08e6, 0x5501) }, /* Gemalto Prox-PU/CU contactless smartcard reader */ { USB_DEVICE(0x08FD, 0x000A) }, /* Digianswer A/S , ZigBee/802.15.4 MAC Device */ + { USB_DEVICE(0x0908, 0x01FF) }, /* Siemens RUGGEDCOM USB Serial Console */ { USB_DEVICE(0x0BED, 0x1100) }, /* MEI (TM) Cashflow-SC Bill/Voucher Acceptor */ { USB_DEVICE(0x0BED, 0x1101) }, /* MEI series 2000 Combo Acceptor */ { USB_DEVICE(0x0FCF, 0x1003) }, /* Dynastream ANT development board */ From c237545ea8eab3d3e7647bde17634f51327847ff Mon Sep 17 00:00:00 2001 From: Alan Stern Date: Fri, 30 Jan 2015 12:58:26 -0500 Subject: [PATCH 105/204] USB: fix use-after-free bug in usb_hcd_unlink_urb() commit c99197902da284b4b723451c1471c45b18537cde upstream. The usb_hcd_unlink_urb() routine in hcd.c contains two possible use-after-free errors. The dev_dbg() statement at the end of the routine dereferences urb and urb->dev even though both structures may have been deallocated. This patch fixes the problem by storing urb->dev in a local variable (avoiding the dereference of urb) and moving the dev_dbg() up before the usb_put_dev() call. Signed-off-by: Alan Stern Reported-by: Joe Lawrence Tested-by: Joe Lawrence Signed-off-by: Greg Kroah-Hartman Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/hcd.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/usb/core/hcd.c b/drivers/usb/core/hcd.c index f6e5ceb03afb..cbfd3d14fa5a 100644 --- a/drivers/usb/core/hcd.c +++ b/drivers/usb/core/hcd.c @@ -1593,6 +1593,7 @@ static int unlink1(struct usb_hcd *hcd, struct urb *urb, int status) int usb_hcd_unlink_urb (struct urb *urb, int status) { struct usb_hcd *hcd; + struct usb_device *udev = urb->dev; int retval = -EIDRM; unsigned long flags; @@ -1604,20 +1605,19 @@ int usb_hcd_unlink_urb (struct urb *urb, int status) spin_lock_irqsave(&hcd_urb_unlink_lock, flags); if (atomic_read(&urb->use_count) > 0) { retval = 0; - usb_get_dev(urb->dev); + usb_get_dev(udev); } spin_unlock_irqrestore(&hcd_urb_unlink_lock, flags); if (retval == 0) { hcd = bus_to_hcd(urb->dev->bus); retval = unlink1(hcd, urb, status); - usb_put_dev(urb->dev); + if (retval == 0) + retval = -EINPROGRESS; + else if (retval != -EIDRM && retval != -EBUSY) + dev_dbg(&udev->dev, "hcd_unlink_urb %p fail %d\n", + urb, retval); + usb_put_dev(udev); } - - if (retval == 0) - retval = -EINPROGRESS; - else if (retval != -EIDRM && retval != -EBUSY) - dev_dbg(&urb->dev->dev, "hcd_unlink_urb %p fail %d\n", - urb, retval); return retval; } From 8b1d57fdf3dd846e4b797a913aecd7f832abb629 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Fri, 5 Dec 2014 15:13:54 +0100 Subject: [PATCH 106/204] usb: core: buffer: smallest buffer should start at ARCH_DMA_MINALIGN commit 5efd2ea8c9f4f12916ffc8ba636792ce052f6911 upstream. the following error pops up during "testusb -a -t 10" | musb-hdrc musb-hdrc.1.auto: dma_pool_free buffer-128, f134e000/be842000 (bad dma) hcd_buffer_create() creates a few buffers, the smallest has 32 bytes of size. ARCH_KMALLOC_MINALIGN is set to 64 bytes. This combo results in hcd_buffer_alloc() returning memory which is 32 bytes aligned and it might by identified by buffer_offset() as another buffer. This means the buffer which is on a 32 byte boundary will not get freed, instead it tries to free another buffer with the error message. This patch fixes the issue by creating the smallest DMA buffer with the size of ARCH_KMALLOC_MINALIGN (or 32 in case ARCH_KMALLOC_MINALIGN is smaller). This might be 32, 64 or even 128 bytes. The next three pools will have the size 128, 512 and 2048. In case the smallest pool is 128 bytes then we have only three pools instead of four (and zero the first entry in the array). The last pool size is always 2048 bytes which is the assumed PAGE_SIZE / 2 of 4096. I doubt it makes sense to continue using PAGE_SIZE / 2 where we would end up with 8KiB buffer in case we have 16KiB pages. Instead I think it makes sense to have a common size(s) and extend them if there is need to. There is a BUILD_BUG_ON() now in case someone has a minalign of more than 128 bytes. Signed-off-by: Sebastian Andrzej Siewior Acked-by: Alan Stern Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/buffer.c | 26 +++++++++++++++++--------- drivers/usb/core/usb.c | 1 + include/linux/usb/hcd.h | 1 + 3 files changed, 19 insertions(+), 9 deletions(-) diff --git a/drivers/usb/core/buffer.c b/drivers/usb/core/buffer.c index b0585e623ba9..19fa68a732f3 100644 --- a/drivers/usb/core/buffer.c +++ b/drivers/usb/core/buffer.c @@ -22,17 +22,25 @@ */ /* FIXME tune these based on pool statistics ... */ -static const size_t pool_max[HCD_BUFFER_POOLS] = { - /* platforms without dma-friendly caches might need to - * prevent cacheline sharing... - */ - 32, - 128, - 512, - PAGE_SIZE / 2 - /* bigger --> allocate pages */ +static size_t pool_max[HCD_BUFFER_POOLS] = { + 32, 128, 512, 2048, }; +void __init usb_init_pool_max(void) +{ + /* + * The pool_max values must never be smaller than + * ARCH_KMALLOC_MINALIGN. + */ + if (ARCH_KMALLOC_MINALIGN <= 32) + ; /* Original value is okay */ + else if (ARCH_KMALLOC_MINALIGN <= 64) + pool_max[0] = 64; + else if (ARCH_KMALLOC_MINALIGN <= 128) + pool_max[0] = 0; /* Don't use this pool */ + else + BUILD_BUG(); /* We don't allow this */ +} /* SETUP primitives */ diff --git a/drivers/usb/core/usb.c b/drivers/usb/core/usb.c index b10da720f2b4..e54b1bdcd66e 100644 --- a/drivers/usb/core/usb.c +++ b/drivers/usb/core/usb.c @@ -1003,6 +1003,7 @@ static int __init usb_init(void) pr_info("%s: USB support disabled\n", usbcore_name); return 0; } + usb_init_pool_max(); retval = usb_debugfs_init(); if (retval) diff --git a/include/linux/usb/hcd.h b/include/linux/usb/hcd.h index 0fdff28d5015..287b906f5d59 100644 --- a/include/linux/usb/hcd.h +++ b/include/linux/usb/hcd.h @@ -416,6 +416,7 @@ extern const struct dev_pm_ops usb_hcd_pci_pm_ops; #endif /* CONFIG_PCI */ /* pci-ish (pdev null is ok) buffer alloc/mapping support */ +void usb_init_pool_max(void); int hcd_buffer_create(struct usb_hcd *hcd); void hcd_buffer_destroy(struct usb_hcd *hcd); From cabab528e7641cc210791af946031ff98d06046d Mon Sep 17 00:00:00 2001 From: Nicolas Pitre Date: Fri, 23 Jan 2015 17:07:21 -0500 Subject: [PATCH 107/204] vt: provide notifications on selection changes commit 19e3ae6b4f07a87822c1c9e7ed99d31860e701af upstream. The vcs device's poll/fasync support relies on the vt notifier to signal changes to the screen content. Notifier invocations were missing for changes that comes through the selection interface though. Fix that. Tested with BRLTTY 5.2. Signed-off-by: Nicolas Pitre Cc: Dave Mielke Signed-off-by: Greg Kroah-Hartman --- drivers/tty/vt/vt.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/tty/vt/vt.c b/drivers/tty/vt/vt.c index 0d1b3757cfb6..db9d69fa1085 100644 --- a/drivers/tty/vt/vt.c +++ b/drivers/tty/vt/vt.c @@ -498,6 +498,7 @@ void invert_screen(struct vc_data *vc, int offset, int count, int viewed) #endif if (DO_UPDATE(vc)) do_update_region(vc, (unsigned long) p, count); + notify_update(vc); } /* used by selection: complement pointer position */ @@ -514,6 +515,7 @@ void complement_pos(struct vc_data *vc, int offset) scr_writew(old, screenpos(vc, old_offset, 1)); if (DO_UPDATE(vc)) vc->vc_sw->con_putc(vc, old, oldy, oldx); + notify_update(vc); } old_offset = offset; @@ -531,8 +533,8 @@ void complement_pos(struct vc_data *vc, int offset) oldy = (offset >> 1) / vc->vc_cols; vc->vc_sw->con_putc(vc, new, oldy, oldx); } + notify_update(vc); } - } static void insert_char(struct vc_data *vc, unsigned int nr) From e3dd19196c47778f7e23e5db3eda22f69de31d45 Mon Sep 17 00:00:00 2001 From: Dmitry Eremin-Solenikov Date: Thu, 4 Dec 2014 14:10:00 +0300 Subject: [PATCH 108/204] ARM: pxa: add regulator_has_full_constraints to corgi board file commit 271e80176aae4e5b481f4bb92df9768c6075bbca upstream. Add regulator_has_full_constraints() call to corgi board file to let regulator core know that we do not have any additional regulators left. This lets it substitute unprovided regulators with dummy ones. This fixes the following warnings that can be seen on corgi if regulators are enabled: ads7846 spi1.0: unable to get regulator: -517 spi spi1.0: Driver ads7846 requests probe deferral wm8731 0-001b: Failed to get supply 'AVDD': -517 wm8731 0-001b: Failed to request supplies: -517 wm8731 0-001b: ASoC: failed to probe component -517 corgi-audio corgi-audio: ASoC: failed to instantiate card -517 Signed-off-by: Dmitry Eremin-Solenikov Acked-by: Mark Brown Signed-off-by: Robert Jarzmik Signed-off-by: Greg Kroah-Hartman --- arch/arm/mach-pxa/corgi.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/arm/mach-pxa/corgi.c b/arch/arm/mach-pxa/corgi.c index a5b8fead7d61..9cb7ea776ecd 100644 --- a/arch/arm/mach-pxa/corgi.c +++ b/arch/arm/mach-pxa/corgi.c @@ -26,6 +26,7 @@ #include #include #include +#include #include #include #include @@ -711,6 +712,8 @@ static void __init corgi_init(void) sharpsl_nand_partitions[1].size = 53 * 1024 * 1024; platform_add_devices(devices, ARRAY_SIZE(devices)); + + regulator_has_full_constraints(); } static void __init fixup_corgi(struct tag *tags, char **cmdline, From 9e35c538b978e909a7805997f5944d5a0b7f5e95 Mon Sep 17 00:00:00 2001 From: Dmitry Eremin-Solenikov Date: Thu, 4 Dec 2014 14:10:01 +0300 Subject: [PATCH 109/204] ARM: pxa: add regulator_has_full_constraints to poodle board file commit 9bc78f32c2e430aebf6def965b316aa95e37a20c upstream. Add regulator_has_full_constraints() call to poodle board file to let regulator core know that we do not have any additional regulators left. This lets it substitute unprovided regulators with dummy ones. This fixes the following warnings that can be seen on poodle if regulators are enabled: ads7846 spi1.0: unable to get regulator: -517 spi spi1.0: Driver ads7846 requests probe deferral wm8731 0-001b: Failed to get supply 'AVDD': -517 wm8731 0-001b: Failed to request supplies: -517 wm8731 0-001b: ASoC: failed to probe component -517 Signed-off-by: Dmitry Eremin-Solenikov Acked-by: Mark Brown Signed-off-by: Robert Jarzmik Signed-off-by: Greg Kroah-Hartman --- arch/arm/mach-pxa/poodle.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/arm/mach-pxa/poodle.c b/arch/arm/mach-pxa/poodle.c index 50ccd5f1d560..362c05fffc28 100644 --- a/arch/arm/mach-pxa/poodle.c +++ b/arch/arm/mach-pxa/poodle.c @@ -25,6 +25,7 @@ #include #include #include +#include #include #include #include @@ -452,6 +453,7 @@ static void __init poodle_init(void) pxa_set_i2c_info(NULL); i2c_register_board_info(0, ARRAY_AND_SIZE(poodle_i2c_devices)); poodle_init_spi(); + regulator_has_full_constraints(); } static void __init fixup_poodle(struct tag *tags, char **cmdline, From ab92b84e8d3efaff78ca896ae2a0f3b2edc871b0 Mon Sep 17 00:00:00 2001 From: Jay Lan Date: Mon, 29 Sep 2014 15:36:57 -0700 Subject: [PATCH 110/204] kdb: fix incorrect counts in KDB summary command output commit 146755923262037fc4c54abc28c04b1103f3cc51 upstream. The output of KDB 'summary' command should report MemTotal, MemFree and Buffers output in kB. Current codes report in unit of pages. A define of K(x) as is defined in the code, but not used. This patch would apply the define to convert the values to kB. Please include me on Cc on replies. I do not subscribe to linux-kernel. Signed-off-by: Jay Lan Signed-off-by: Jason Wessel Signed-off-by: Greg Kroah-Hartman --- kernel/debug/kdb/kdb_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/debug/kdb/kdb_main.c b/kernel/debug/kdb/kdb_main.c index 00eb8f7fbf41..545241de23bf 100644 --- a/kernel/debug/kdb/kdb_main.c +++ b/kernel/debug/kdb/kdb_main.c @@ -2532,7 +2532,7 @@ static int kdb_summary(int argc, const char **argv) #define K(x) ((x) << (PAGE_SHIFT - 10)) kdb_printf("\nMemTotal: %8lu kB\nMemFree: %8lu kB\n" "Buffers: %8lu kB\n", - val.totalram, val.freeram, val.bufferram); + K(val.totalram), K(val.freeram), K(val.bufferram)); return 0; } From 72e2d609260a5a7515d4c9d2d709759fc282e552 Mon Sep 17 00:00:00 2001 From: John Stultz Date: Mon, 9 Feb 2015 23:30:36 -0800 Subject: [PATCH 111/204] ntp: Fixup adjtimex freq validation on 32-bit systems commit 29183a70b0b828500816bd794b3fe192fce89f73 upstream. Additional validation of adjtimex freq values to avoid potential multiplication overflows were added in commit 5e5aeb4367b (time: adjtimex: Validate the ADJ_FREQUENCY values) Unfortunately the patch used LONG_MAX/MIN instead of LLONG_MAX/MIN, which was fine on 64-bit systems, but being much smaller on 32-bit systems caused false positives resulting in most direct frequency adjustments to fail w/ EINVAL. ntpd only does direct frequency adjustments at startup, so the issue was not as easily observed there, but other time sync applications like ptpd and chrony were more effected by the bug. See bugs: https://bugzilla.kernel.org/show_bug.cgi?id=92481 https://bugzilla.redhat.com/show_bug.cgi?id=1188074 This patch changes the checks to use LLONG_MAX for clarity, and additionally the checks are disabled on 32-bit systems since LLONG_MAX/PPM_SCALE is always larger then the 32-bit long freq value, so multiplication overflows aren't possible there. Reported-by: Josh Boyer Reported-by: George Joseph Tested-by: George Joseph Signed-off-by: John Stultz Signed-off-by: Peter Zijlstra (Intel) Cc: Linus Torvalds Cc: Sasha Levin Link: http://lkml.kernel.org/r/1423553436-29747-1-git-send-email-john.stultz@linaro.org [ Prettified the changelog and the comments a bit. ] Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- kernel/time/ntp.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c index 28db9bedc857..6211d5d6d465 100644 --- a/kernel/time/ntp.c +++ b/kernel/time/ntp.c @@ -631,10 +631,14 @@ int ntp_validate_timex(struct timex *txc) if ((txc->modes & ADJ_SETOFFSET) && (!capable(CAP_SYS_TIME))) return -EPERM; - if (txc->modes & ADJ_FREQUENCY) { - if (LONG_MIN / PPM_SCALE > txc->freq) + /* + * Check for potential multiplication overflows that can + * only happen on 64-bit systems: + */ + if ((txc->modes & ADJ_FREQUENCY) && (BITS_PER_LONG == 64)) { + if (LLONG_MIN / PPM_SCALE > txc->freq) return -EINVAL; - if (LONG_MAX / PPM_SCALE < txc->freq) + if (LLONG_MAX / PPM_SCALE < txc->freq) return -EINVAL; } From 16eff7f2f472966415d6d0f598fe11da31d24432 Mon Sep 17 00:00:00 2001 From: Alexey Brodkin Date: Thu, 12 Feb 2015 21:10:11 +0300 Subject: [PATCH 112/204] ARC: fix page address calculation if PAGE_OFFSET != LINUX_LINK_BASE commit 06f34e1c28f3608b0ce5b310e41102d3fe7b65a1 upstream. We used to calculate page address differently in 2 cases: 1. In virt_to_page(x) we do --->8--- mem_map + (x - CONFIG_LINUX_LINK_BASE) >> PAGE_SHIFT --->8--- 2. In in pte_page(x) we do --->8--- mem_map + (pte_val(x) - PAGE_OFFSET) >> PAGE_SHIFT --->8--- That leads to problems in case PAGE_OFFSET != CONFIG_LINUX_LINK_BASE - different pages will be selected depending on where and how we calculate page address. In particular in the STAR 9000853582 when gdb attempted to read memory of another process it got improper page in get_user_pages() because this is exactly one of the places where we search for a page by pte_page(). The fix is trivial - we need to calculate page address similarly in both cases. Signed-off-by: Alexey Brodkin Signed-off-by: Vineet Gupta Signed-off-by: Greg Kroah-Hartman --- arch/arc/include/asm/pgtable.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/arc/include/asm/pgtable.h b/arch/arc/include/asm/pgtable.h index 95b1522212a7..ecf23eaa20c1 100644 --- a/arch/arc/include/asm/pgtable.h +++ b/arch/arc/include/asm/pgtable.h @@ -270,7 +270,8 @@ static inline void pmd_set(pmd_t *pmdp, pte_t *ptep) #define pmd_clear(xp) do { pmd_val(*(xp)) = 0; } while (0) #define pte_page(x) (mem_map + \ - (unsigned long)(((pte_val(x) - PAGE_OFFSET) >> PAGE_SHIFT))) + (unsigned long)(((pte_val(x) - CONFIG_LINUX_LINK_BASE) >> \ + PAGE_SHIFT))) #define mk_pte(page, pgprot) \ ({ \ From ef6bb317ad2f9fc585fcb87e6393763ea9850265 Mon Sep 17 00:00:00 2001 From: James Hogan Date: Wed, 4 Feb 2015 17:06:37 +0000 Subject: [PATCH 113/204] KVM: MIPS: Don't leak FPU/DSP to guest commit f798217dfd038af981a18bbe4bc57027a08bb182 upstream. The FPU and DSP are enabled via the CP0 Status CU1 and MX bits by kvm_mips_set_c0_status() on a guest exit, presumably in case there is active state that needs saving if pre-emption occurs. However neither of these bits are cleared again when returning to the guest. This effectively gives the guest access to the FPU/DSP hardware after the first guest exit even though it is not aware of its presence, allowing FP instructions in guest user code to intermittently actually execute instead of trapping into the guest OS for emulation. It will then read & manipulate the hardware FP registers which technically belong to the user process (e.g. QEMU), or are stale from another user process. It can also crash the guest OS by causing an FP exception, for which a guest exception handler won't have been registered. First lets save and disable the FPU (and MSA) state with lose_fpu(1) before entering the guest. This simplifies the problem, especially for when guest FPU/MSA support is added in the future, and prevents FR=1 FPU state being live when the FR bit gets cleared for the guest, which according to the architecture causes the contents of the FPU and vector registers to become UNPREDICTABLE. We can then safely remove the enabling of the FPU in kvm_mips_set_c0_status(), since there should never be any active FPU or MSA state to save at pre-emption, which should plug the FPU leak. DSP state is always live rather than being lazily restored, so for that it is simpler to just clear the MX bit again when re-entering the guest. Signed-off-by: James Hogan Cc: Paolo Bonzini Cc: Ralf Baechle Cc: Sanjay Lal Cc: Gleb Natapov Cc: kvm@vger.kernel.org Cc: linux-mips@linux-mips.org Cc: # v3.10+: 044f0f03eca0: MIPS: KVM: Deliver guest interrupts Cc: # v3.10+ Signed-off-by: Paolo Bonzini Signed-off-by: James Hogan Signed-off-by: Greg Kroah-Hartman --- arch/mips/kvm/kvm_locore.S | 2 +- arch/mips/kvm/kvm_mips.c | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/mips/kvm/kvm_locore.S b/arch/mips/kvm/kvm_locore.S index dca2aa665993..920b63210806 100644 --- a/arch/mips/kvm/kvm_locore.S +++ b/arch/mips/kvm/kvm_locore.S @@ -431,7 +431,7 @@ __kvm_mips_return_to_guest: /* Setup status register for running guest in UM */ .set at or v1, v1, (ST0_EXL | KSU_USER | ST0_IE) - and v1, v1, ~ST0_CU0 + and v1, v1, ~(ST0_CU0 | ST0_MX) .set noat mtc0 v1, CP0_STATUS ehb diff --git a/arch/mips/kvm/kvm_mips.c b/arch/mips/kvm/kvm_mips.c index f957a8ac979b..843ec38fec7b 100644 --- a/arch/mips/kvm/kvm_mips.c +++ b/arch/mips/kvm/kvm_mips.c @@ -15,6 +15,7 @@ #include #include #include +#include #include #include #include @@ -413,6 +414,8 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) vcpu->mmio_needed = 0; } + lose_fpu(1); + local_irq_disable(); /* Check if we have any exceptions/interrupts pending */ kvm_mips_deliver_interrupts(vcpu, @@ -1017,9 +1020,6 @@ void kvm_mips_set_c0_status(void) { uint32_t status = read_c0_status(); - if (cpu_has_fpu) - status |= (ST0_CU1); - if (cpu_has_dsp) status |= (ST0_MX); From b7e4884e64bcfefd359812a96505ecfa67c3ce8d Mon Sep 17 00:00:00 2001 From: Marcelo Tosatti Date: Tue, 4 Nov 2014 21:30:44 -0200 Subject: [PATCH 114/204] KVM: x86: update masterclock values on TSC writes commit 7f187922ddf6b67f2999a76dcb71663097b75497 upstream. When the guest writes to the TSC, the masterclock TSC copy must be updated as well along with the TSC_OFFSET update, otherwise a negative tsc_timestamp is calculated at kvm_guest_time_update. Once "if (!vcpus_matched && ka->use_master_clock)" is simplified to "if (ka->use_master_clock)", the corresponding "if (!ka->use_master_clock)" becomes redundant, so remove the do_request boolean and collapse everything into a single condition. Signed-off-by: Marcelo Tosatti Signed-off-by: Paolo Bonzini Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/x86.c | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index adfc30d9f9f4..41ba726c1ce2 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -1182,21 +1182,22 @@ void kvm_track_tsc_matching(struct kvm_vcpu *vcpu) { #ifdef CONFIG_X86_64 bool vcpus_matched; - bool do_request = false; struct kvm_arch *ka = &vcpu->kvm->arch; struct pvclock_gtod_data *gtod = &pvclock_gtod_data; vcpus_matched = (ka->nr_vcpus_matched_tsc + 1 == atomic_read(&vcpu->kvm->online_vcpus)); - if (vcpus_matched && gtod->clock.vclock_mode == VCLOCK_TSC) - if (!ka->use_master_clock) - do_request = 1; - - if (!vcpus_matched && ka->use_master_clock) - do_request = 1; - - if (do_request) + /* + * Once the masterclock is enabled, always perform request in + * order to update it. + * + * In order to enable masterclock, the host clocksource must be TSC + * and the vcpus need to have matched TSCs. When that happens, + * perform request to enable masterclock. + */ + if (ka->use_master_clock || + (gtod->clock.vclock_mode == VCLOCK_TSC && vcpus_matched)) kvm_make_request(KVM_REQ_MASTERCLOCK_UPDATE, vcpu); trace_kvm_track_tsc(vcpu->vcpu_id, ka->nr_vcpus_matched_tsc, From 09fc2667f76b53fabf9af2cc6ebd936fecfe6ffe Mon Sep 17 00:00:00 2001 From: Martin Vajnar Date: Wed, 24 Dec 2014 00:27:57 +0100 Subject: [PATCH 115/204] hx4700: regulator: declare full constraints commit a52d209336f8fc7483a8c7f4a8a7d2a8e1692a6c upstream. Since the removal of CONFIG_REGULATOR_DUMMY option, the touchscreen stopped working. This patch enables the "replacement" for REGULATOR_DUMMY and allows the touchscreen to work even though there is no regulator for "vcc". Signed-off-by: Martin Vajnar Signed-off-by: Robert Jarzmik Signed-off-by: Greg Kroah-Hartman --- arch/arm/mach-pxa/hx4700.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/arm/mach-pxa/hx4700.c b/arch/arm/mach-pxa/hx4700.c index 133109ec7332..a07accfb3aec 100644 --- a/arch/arm/mach-pxa/hx4700.c +++ b/arch/arm/mach-pxa/hx4700.c @@ -891,6 +891,8 @@ static void __init hx4700_init(void) mdelay(10); gpio_set_value(GPIO71_HX4700_ASIC3_nRESET, 1); mdelay(10); + + regulator_has_full_constraints(); } MACHINE_START(H4700, "HP iPAQ HX4700") From 424180f54384dd24cbd81e503d41eaa531ca0580 Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Mon, 23 Feb 2015 15:13:40 +0000 Subject: [PATCH 116/204] arm64: compat Fix siginfo_t -> compat_siginfo_t conversion on big endian commit 9d42d48a342aee208c1154696196497fdc556bbf upstream. The native (64-bit) sigval_t union contains sival_int (32-bit) and sival_ptr (64-bit). When a compat application invokes a syscall that takes a sigval_t value (as part of a larger structure, e.g. compat_sys_mq_notify, compat_sys_timer_create), the compat_sigval_t union is converted to the native sigval_t with sival_int overlapping with either the least or the most significant half of sival_ptr, depending on endianness. When the corresponding signal is delivered to a compat application, on big endian the current (compat_uptr_t)sival_ptr cast always returns 0 since sival_int corresponds to the top part of sival_ptr. This patch fixes copy_siginfo_to_user32() so that sival_int is copied to the compat_siginfo_t structure. Reported-by: Bamvor Jian Zhang Tested-by: Bamvor Jian Zhang Signed-off-by: Catalin Marinas Signed-off-by: Greg Kroah-Hartman --- arch/arm64/kernel/signal32.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/arch/arm64/kernel/signal32.c b/arch/arm64/kernel/signal32.c index e393174fe859..3d478102b1c0 100644 --- a/arch/arm64/kernel/signal32.c +++ b/arch/arm64/kernel/signal32.c @@ -179,8 +179,7 @@ int copy_siginfo_to_user32(compat_siginfo_t __user *to, siginfo_t *from) case __SI_TIMER: err |= __put_user(from->si_tid, &to->si_tid); err |= __put_user(from->si_overrun, &to->si_overrun); - err |= __put_user((compat_uptr_t)(unsigned long)from->si_ptr, - &to->si_ptr); + err |= __put_user(from->si_int, &to->si_int); break; case __SI_POLL: err |= __put_user(from->si_band, &to->si_band); @@ -209,7 +208,7 @@ int copy_siginfo_to_user32(compat_siginfo_t __user *to, siginfo_t *from) case __SI_MESGQ: /* But this is */ err |= __put_user(from->si_pid, &to->si_pid); err |= __put_user(from->si_uid, &to->si_uid); - err |= __put_user((compat_uptr_t)(unsigned long)from->si_ptr, &to->si_ptr); + err |= __put_user(from->si_int, &to->si_int); break; default: /* this is just in case for now ... */ err |= __put_user(from->si_pid, &to->si_pid); From dd17ef27db2d79b4cb2ade1dc03f5ee151b0240b Mon Sep 17 00:00:00 2001 From: Nicolas Saenz Julienne Date: Thu, 19 Feb 2015 01:52:25 +0000 Subject: [PATCH 117/204] gpio: tps65912: fix wrong container_of arguments commit 2f97c20e5f7c3582c7310f65a04465bfb0fd0e85 upstream. The gpio_chip operations receive a pointer the gpio_chip struct which is contained in the driver's private struct, yet the container_of call in those functions point to the mfd struct defined in include/linux/mfd/tps65912.h. Signed-off-by: Nicolas Saenz Julienne Signed-off-by: Linus Walleij Signed-off-by: Greg Kroah-Hartman --- drivers/gpio/gpio-tps65912.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/drivers/gpio/gpio-tps65912.c b/drivers/gpio/gpio-tps65912.c index 30a5844a7dca..845730469945 100644 --- a/drivers/gpio/gpio-tps65912.c +++ b/drivers/gpio/gpio-tps65912.c @@ -26,9 +26,12 @@ struct tps65912_gpio_data { struct gpio_chip gpio_chip; }; +#define to_tgd(gc) container_of(gc, struct tps65912_gpio_data, gpio_chip) + static int tps65912_gpio_get(struct gpio_chip *gc, unsigned offset) { - struct tps65912 *tps65912 = container_of(gc, struct tps65912, gpio); + struct tps65912_gpio_data *tps65912_gpio = to_tgd(gc); + struct tps65912 *tps65912 = tps65912_gpio->tps65912; int val; val = tps65912_reg_read(tps65912, TPS65912_GPIO1 + offset); @@ -42,7 +45,8 @@ static int tps65912_gpio_get(struct gpio_chip *gc, unsigned offset) static void tps65912_gpio_set(struct gpio_chip *gc, unsigned offset, int value) { - struct tps65912 *tps65912 = container_of(gc, struct tps65912, gpio); + struct tps65912_gpio_data *tps65912_gpio = to_tgd(gc); + struct tps65912 *tps65912 = tps65912_gpio->tps65912; if (value) tps65912_set_bits(tps65912, TPS65912_GPIO1 + offset, @@ -55,7 +59,8 @@ static void tps65912_gpio_set(struct gpio_chip *gc, unsigned offset, static int tps65912_gpio_output(struct gpio_chip *gc, unsigned offset, int value) { - struct tps65912 *tps65912 = container_of(gc, struct tps65912, gpio); + struct tps65912_gpio_data *tps65912_gpio = to_tgd(gc); + struct tps65912 *tps65912 = tps65912_gpio->tps65912; /* Set the initial value */ tps65912_gpio_set(gc, offset, value); @@ -66,7 +71,8 @@ static int tps65912_gpio_output(struct gpio_chip *gc, unsigned offset, static int tps65912_gpio_input(struct gpio_chip *gc, unsigned offset) { - struct tps65912 *tps65912 = container_of(gc, struct tps65912, gpio); + struct tps65912_gpio_data *tps65912_gpio = to_tgd(gc); + struct tps65912 *tps65912 = tps65912_gpio->tps65912; return tps65912_clear_bits(tps65912, TPS65912_GPIO1 + offset, GPIO_CFG_MASK); From cb96928e7520aa9c68074afd7229a2020005d132 Mon Sep 17 00:00:00 2001 From: James Hogan Date: Tue, 24 Feb 2015 12:25:25 +0000 Subject: [PATCH 118/204] metag: Fix KSTK_EIP() and KSTK_ESP() macros commit c2996cb29bfb73927a79dc96e598a718e843f01a upstream. The KSTK_EIP() and KSTK_ESP() macros should return the user program counter (PC) and stack pointer (A0StP) of the given task. These are used to determine which VMA corresponds to the user stack in /proc//maps, and for the user PC & A0StP in /proc//stat. However for Meta the PC & A0StP from the task's kernel context are used, resulting in broken output. For example in following /proc//maps output, the 3afff000-3b021000 VMA should be described as the stack: # cat /proc/self/maps ... 100b0000-100b1000 rwxp 00000000 00:00 0 [heap] 3afff000-3b021000 rwxp 00000000 00:00 0 And in the following /proc//stat output, the PC is in kernel code (1074234964 = 0x40078654) and the A0StP is in the kernel heap (1335981392 = 0x4fa17550): # cat /proc/self/stat 51 (cat) R ... 1335981392 1074234964 ... Fix the definitions of KSTK_EIP() and KSTK_ESP() to use task_pt_regs(tsk)->ctx rather than (tsk)->thread.kernel_context. This gets the registers from the user context stored after the thread info at the base of the kernel stack, which is from the last entry into the kernel from userland, regardless of where in the kernel the task may have been interrupted, which results in the following more correct /proc//maps output: # cat /proc/self/maps ... 0800b000-08070000 r-xp 00000000 00:02 207 /lib/libuClibc-0.9.34-git.so ... 100b0000-100b1000 rwxp 00000000 00:00 0 [heap] 3afff000-3b021000 rwxp 00000000 00:00 0 [stack] And /proc//stat now correctly reports the PC in libuClibc (134320308 = 0x80190b4) and the A0StP in the [stack] region (989864576 = 0x3b002280): # cat /proc/self/stat 51 (cat) R ... 989864576 134320308 ... Reported-by: Alexey Brodkin Reported-by: Vineet Gupta Signed-off-by: James Hogan Cc: linux-metag@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- arch/metag/include/asm/processor.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/metag/include/asm/processor.h b/arch/metag/include/asm/processor.h index 579e3d93a5ca..b88e9cbdc64b 100644 --- a/arch/metag/include/asm/processor.h +++ b/arch/metag/include/asm/processor.h @@ -149,8 +149,8 @@ extern void exit_thread(void); unsigned long get_wchan(struct task_struct *p); -#define KSTK_EIP(tsk) ((tsk)->thread.kernel_context->CurrPC) -#define KSTK_ESP(tsk) ((tsk)->thread.kernel_context->AX[0].U0) +#define KSTK_EIP(tsk) (task_pt_regs(tsk)->ctx.CurrPC) +#define KSTK_ESP(tsk) (task_pt_regs(tsk)->ctx.AX[0].U0) #define user_stack_pointer(regs) ((regs)->ctx.AX[0].U0) From 58f0e96a4358f164f3857ee0d609d1b75a3ccf7d Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Wed, 18 Feb 2015 11:35:14 +1100 Subject: [PATCH 119/204] md/raid5: Fix livelock when array is both resyncing and degraded. commit 26ac107378c4742978216be1005b7291b799c7b2 upstream. Commit a7854487cd7128a30a7f4f5259de9f67d5efb95f: md: When RAID5 is dirty, force reconstruct-write instead of read-modify-write. Causes an RCW cycle to be forced even when the array is degraded. A degraded array cannot support RCW as that requires reading all data blocks, and one may be missing. Forcing an RCW when it is not possible causes a live-lock and the code spins, repeatedly deciding to do something that cannot succeed. So change the condition to only force RCW on non-degraded arrays. Reported-by: Manibalan P Bisected-by: Jes Sorensen Tested-by: Jes Sorensen Signed-off-by: NeilBrown Fixes: a7854487cd7128a30a7f4f5259de9f67d5efb95f Signed-off-by: Greg Kroah-Hartman --- drivers/md/raid5.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 4daf5c03b33b..1b6986ce5da9 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -2853,7 +2853,8 @@ static void handle_stripe_dirtying(struct r5conf *conf, * generate correct data from the parity. */ if (conf->max_degraded == 2 || - (recovery_cp < MaxSector && sh->sector >= recovery_cp)) { + (recovery_cp < MaxSector && sh->sector >= recovery_cp && + s->failed == 0)) { /* Calculate the real rcw later - for now make it * look like rcw is cheaper */ From b581e762b1a452ac94d452117a6c953f4d011767 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Hodek?= Date: Mon, 23 Feb 2015 11:00:38 +1100 Subject: [PATCH 120/204] md/raid1: fix read balance when a drive is write-mostly. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit d1901ef099c38afd11add4cfb3312c02ef21ec4a upstream. When a drive is marked write-mostly it should only be the target of reads if there is no other option. This behaviour was broken by commit 9dedf60313fa4dddfd5b9b226a0ef12a512bf9dc md/raid1: read balance chooses idlest disk for SSD which causes a write-mostly device to be *preferred* is some cases. Restore correct behaviour by checking and setting best_dist_disk and best_pending_disk rather than best_disk. We only need to test one of these as they are both changed from -1 or >=0 at the same time. As we leave min_pending and best_dist unchanged, any non-write-mostly device will appear better than the write-mostly device. Reported-by: Tomáš Hodek Reported-by: Dark Penguin Signed-off-by: NeilBrown Link: http://marc.info/?l=linux-raid&m=135982797322422 Fixes: 9dedf60313fa4dddfd5b9b226a0ef12a512bf9dc Signed-off-by: Greg Kroah-Hartman --- drivers/md/raid1.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index e885dbf08c40..86ac4a4ccc01 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -557,7 +557,7 @@ static int read_balance(struct r1conf *conf, struct r1bio *r1_bio, int *max_sect if (test_bit(WriteMostly, &rdev->flags)) { /* Don't balance among write-mostly, just * use the first as a last resort */ - if (best_disk < 0) { + if (best_dist_disk < 0) { if (is_badblock(rdev, this_sector, sectors, &first_bad, &bad_sectors)) { if (first_bad < this_sector) @@ -566,7 +566,8 @@ static int read_balance(struct r1conf *conf, struct r1bio *r1_bio, int *max_sect best_good_sectors = first_bad - this_sector; } else best_good_sectors = sectors; - best_disk = disk; + best_dist_disk = disk; + best_pending_disk = disk; } continue; } From 6192e21a91dca9f225f028079835aa04601e1309 Mon Sep 17 00:00:00 2001 From: Chen Jie Date: Tue, 10 Feb 2015 12:49:48 -0800 Subject: [PATCH 121/204] jffs2: fix handling of corrupted summary length commit 164c24063a3eadee11b46575c5482b2f1417be49 upstream. sm->offset maybe wrong but magic maybe right, the offset do not have CRC. Badness at c00c7580 [verbose debug info unavailable] NIP: c00c7580 LR: c00c718c CTR: 00000014 REGS: df07bb40 TRAP: 0700 Not tainted (2.6.34.13-WR4.3.0.0_standard) MSR: 00029000 CR: 22084f84 XER: 00000000 TASK = df84d6e0[908] 'mount' THREAD: df07a000 GPR00: 00000001 df07bbf0 df84d6e0 00000000 00000001 00000000 df07bb58 00000041 GPR08: 00000041 c0638860 00000000 00000010 22084f88 100636c8 df814ff8 00000000 GPR16: df84d6e0 dfa558cc c05adb90 00000048 c0452d30 00000000 000240d0 000040d0 GPR24: 00000014 c05ae734 c05be2e0 00000000 00000001 00000000 00000000 c05ae730 NIP [c00c7580] __alloc_pages_nodemask+0x4d0/0x638 LR [c00c718c] __alloc_pages_nodemask+0xdc/0x638 Call Trace: [df07bbf0] [c00c718c] __alloc_pages_nodemask+0xdc/0x638 (unreliable) [df07bc90] [c00c7708] __get_free_pages+0x20/0x48 [df07bca0] [c00f4a40] __kmalloc+0x15c/0x1ec [df07bcd0] [c01fc880] jffs2_scan_medium+0xa58/0x14d0 [df07bd70] [c01ff38c] jffs2_do_mount_fs+0x1f4/0x6b4 [df07bdb0] [c020144c] jffs2_do_fill_super+0xa8/0x260 [df07bdd0] [c020230c] jffs2_fill_super+0x104/0x184 [df07be00] [c0335814] get_sb_mtd_aux+0x9c/0xec [df07be20] [c033596c] get_sb_mtd+0x84/0x1e8 [df07be60] [c0201ed0] jffs2_get_sb+0x1c/0x2c [df07be70] [c0103898] vfs_kern_mount+0x78/0x1e8 [df07bea0] [c0103a58] do_kern_mount+0x40/0x100 [df07bec0] [c011fe90] do_mount+0x240/0x890 [df07bf10] [c0120570] sys_mount+0x90/0xd8 [df07bf40] [c00110d8] ret_from_syscall+0x0/0x4 === Exception: c01 at 0xff61a34 LR = 0x100135f0 Instruction dump: 38800005 38600000 48010f41 4bfffe1c 4bfc2d15 4bfffe8c 72e90200 4082fc28 3d20c064 39298860 8809000d 68000001 <0f000000> 2f800000 419efc0c 38000001 mount: mounting /dev/mtdblock3 on /common failed: Input/output error Signed-off-by: Chen Jie Signed-off-by: Andrew Morton Signed-off-by: David Woodhouse Signed-off-by: Greg Kroah-Hartman --- fs/jffs2/scan.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/fs/jffs2/scan.c b/fs/jffs2/scan.c index 7654e87b0428..9ad5ba4b299b 100644 --- a/fs/jffs2/scan.c +++ b/fs/jffs2/scan.c @@ -510,6 +510,10 @@ static int jffs2_scan_eraseblock (struct jffs2_sb_info *c, struct jffs2_eraseblo sumlen = c->sector_size - je32_to_cpu(sm->offset); sumptr = buf + buf_size - sumlen; + /* sm->offset maybe wrong but MAGIC maybe right */ + if (sumlen > c->sector_size) + goto full_scan; + /* Now, make sure the summary itself is available */ if (sumlen > buf_size) { /* Need to kmalloc for this. */ @@ -544,6 +548,7 @@ static int jffs2_scan_eraseblock (struct jffs2_sb_info *c, struct jffs2_eraseblo } } +full_scan: buf_ofs = jeb->offset; if (!buf_size) { From 65e63ea91b18e634c53e68a846608fcf8d571418 Mon Sep 17 00:00:00 2001 From: Thadeu Lima de Souza Cascardo Date: Mon, 16 Feb 2015 17:16:45 -0200 Subject: [PATCH 122/204] blk-throttle: check stats_cpu before reading it from sysfs commit 045c47ca306acf30c740c285a77a4b4bda6be7c5 upstream. When reading blkio.throttle.io_serviced in a recently created blkio cgroup, it's possible to race against the creation of a throttle policy, which delays the allocation of stats_cpu. Like other functions in the throttle code, just checking for a NULL stats_cpu prevents the following oops caused by that race. [ 1117.285199] Unable to handle kernel paging request for data at address 0x7fb4d0020 [ 1117.285252] Faulting instruction address: 0xc0000000003efa2c [ 1137.733921] Oops: Kernel access of bad area, sig: 11 [#1] [ 1137.733945] SMP NR_CPUS=2048 NUMA PowerNV [ 1137.734025] Modules linked in: bridge stp llc kvm_hv kvm binfmt_misc autofs4 [ 1137.734102] CPU: 3 PID: 5302 Comm: blkcgroup Not tainted 3.19.0 #5 [ 1137.734132] task: c000000f1d188b00 ti: c000000f1d210000 task.ti: c000000f1d210000 [ 1137.734167] NIP: c0000000003efa2c LR: c0000000003ef9f0 CTR: c0000000003ef980 [ 1137.734202] REGS: c000000f1d213500 TRAP: 0300 Not tainted (3.19.0) [ 1137.734230] MSR: 9000000000009032 CR: 42008884 XER: 20000000 [ 1137.734325] CFAR: 0000000000008458 DAR: 00000007fb4d0020 DSISR: 40000000 SOFTE: 0 GPR00: c0000000003ed3a0 c000000f1d213780 c000000000c59538 0000000000000000 GPR04: 0000000000000800 0000000000000000 0000000000000000 0000000000000000 GPR08: ffffffffffffffff 00000007fb4d0020 00000007fb4d0000 c000000000780808 GPR12: 0000000022000888 c00000000fdc0d80 0000000000000000 0000000000000000 GPR16: 0000000000000000 0000000000000000 0000000000000000 0000000000000000 GPR20: 000001003e120200 c000000f1d5b0cc0 0000000000000200 0000000000000000 GPR24: 0000000000000001 c000000000c269e0 0000000000000020 c000000f1d5b0c80 GPR28: c000000000ca3a08 c000000000ca3dec c000000f1c667e00 c000000f1d213850 [ 1137.734886] NIP [c0000000003efa2c] .tg_prfill_cpu_rwstat+0xac/0x180 [ 1137.734915] LR [c0000000003ef9f0] .tg_prfill_cpu_rwstat+0x70/0x180 [ 1137.734943] Call Trace: [ 1137.734952] [c000000f1d213780] [d000000005560520] 0xd000000005560520 (unreliable) [ 1137.734996] [c000000f1d2138a0] [c0000000003ed3a0] .blkcg_print_blkgs+0xe0/0x1a0 [ 1137.735039] [c000000f1d213960] [c0000000003efb50] .tg_print_cpu_rwstat+0x50/0x70 [ 1137.735082] [c000000f1d2139e0] [c000000000104b48] .cgroup_seqfile_show+0x58/0x150 [ 1137.735125] [c000000f1d213a70] [c0000000002749dc] .kernfs_seq_show+0x3c/0x50 [ 1137.735161] [c000000f1d213ae0] [c000000000218630] .seq_read+0xe0/0x510 [ 1137.735197] [c000000f1d213bd0] [c000000000275b04] .kernfs_fop_read+0x164/0x200 [ 1137.735240] [c000000f1d213c80] [c0000000001eb8e0] .__vfs_read+0x30/0x80 [ 1137.735276] [c000000f1d213cf0] [c0000000001eb9c4] .vfs_read+0x94/0x1b0 [ 1137.735312] [c000000f1d213d90] [c0000000001ebb38] .SyS_read+0x58/0x100 [ 1137.735349] [c000000f1d213e30] [c000000000009218] syscall_exit+0x0/0x98 [ 1137.735383] Instruction dump: [ 1137.735405] 7c6307b4 7f891800 409d00b8 60000000 60420000 3d420004 392a63b0 786a1f24 [ 1137.735471] 7d49502a e93e01c8 7d495214 7d2ad214 <7cead02a> e9090008 e9490010 e9290018 And here is one code that allows to easily reproduce this, although this has first been found by running docker. void run(pid_t pid) { int n; int status; int fd; char *buffer; buffer = memalign(BUFFER_ALIGN, BUFFER_SIZE); n = snprintf(buffer, BUFFER_SIZE, "%d\n", pid); fd = open(CGPATH "/test/tasks", O_WRONLY); write(fd, buffer, n); close(fd); if (fork() > 0) { fd = open("/dev/sda", O_RDONLY | O_DIRECT); read(fd, buffer, 512); close(fd); wait(&status); } else { fd = open(CGPATH "/test/blkio.throttle.io_serviced", O_RDONLY); n = read(fd, buffer, BUFFER_SIZE); close(fd); } free(buffer); exit(0); } void test(void) { int status; mkdir(CGPATH "/test", 0666); if (fork() > 0) wait(&status); else run(getpid()); rmdir(CGPATH "/test"); } int main(int argc, char **argv) { int i; for (i = 0; i < NR_TESTS; i++) test(); return 0; } Reported-by: Ricardo Marin Matinata Signed-off-by: Thadeu Lima de Souza Cascardo Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman --- block/blk-throttle.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/block/blk-throttle.c b/block/blk-throttle.c index 31146225f3d0..7cddfe6baf66 100644 --- a/block/blk-throttle.c +++ b/block/blk-throttle.c @@ -942,6 +942,9 @@ static u64 tg_prfill_cpu_rwstat(struct seq_file *sf, struct blkg_rwstat rwstat = { }, tmp; int i, cpu; + if (tg->stats_cpu == NULL) + return 0; + for_each_possible_cpu(cpu) { struct tg_stats_cpu *sc = per_cpu_ptr(tg->stats_cpu, cpu); From 4f2e84da8a809db7747dd9712a120a44bebd92f3 Mon Sep 17 00:00:00 2001 From: Hector Marco-Gisbert Date: Sat, 14 Feb 2015 09:33:50 -0800 Subject: [PATCH 123/204] x86, mm/ASLR: Fix stack randomization on 64-bit systems commit 4e7c22d447bb6d7e37bfe39ff658486ae78e8d77 upstream. The issue is that the stack for processes is not properly randomized on 64 bit architectures due to an integer overflow. The affected function is randomize_stack_top() in file "fs/binfmt_elf.c": static unsigned long randomize_stack_top(unsigned long stack_top) { unsigned int random_variable = 0; if ((current->flags & PF_RANDOMIZE) && !(current->personality & ADDR_NO_RANDOMIZE)) { random_variable = get_random_int() & STACK_RND_MASK; random_variable <<= PAGE_SHIFT; } return PAGE_ALIGN(stack_top) + random_variable; return PAGE_ALIGN(stack_top) - random_variable; } Note that, it declares the "random_variable" variable as "unsigned int". Since the result of the shifting operation between STACK_RND_MASK (which is 0x3fffff on x86_64, 22 bits) and PAGE_SHIFT (which is 12 on x86_64): random_variable <<= PAGE_SHIFT; then the two leftmost bits are dropped when storing the result in the "random_variable". This variable shall be at least 34 bits long to hold the (22+12) result. These two dropped bits have an impact on the entropy of process stack. Concretely, the total stack entropy is reduced by four: from 2^28 to 2^30 (One fourth of expected entropy). This patch restores back the entropy by correcting the types involved in the operations in the functions randomize_stack_top() and stack_maxrandom_size(). The successful fix can be tested with: $ for i in `seq 1 10`; do cat /proc/self/maps | grep stack; done 7ffeda566000-7ffeda587000 rw-p 00000000 00:00 0 [stack] 7fff5a332000-7fff5a353000 rw-p 00000000 00:00 0 [stack] 7ffcdb7a1000-7ffcdb7c2000 rw-p 00000000 00:00 0 [stack] 7ffd5e2c4000-7ffd5e2e5000 rw-p 00000000 00:00 0 [stack] ... Once corrected, the leading bytes should be between 7ffc and 7fff, rather than always being 7fff. Signed-off-by: Hector Marco-Gisbert Signed-off-by: Ismael Ripoll [ Rebased, fixed 80 char bugs, cleaned up commit message, added test example and CVE ] Signed-off-by: Kees Cook Cc: Linus Torvalds Cc: Andrew Morton Cc: Al Viro Fixes: CVE-2015-1593 Link: http://lkml.kernel.org/r/20150214173350.GA18393@www.outflux.net Signed-off-by: Borislav Petkov Signed-off-by: Greg Kroah-Hartman --- arch/x86/mm/mmap.c | 6 +++--- fs/binfmt_elf.c | 5 +++-- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/arch/x86/mm/mmap.c b/arch/x86/mm/mmap.c index 5c1ae28825cd..75f9e5d80d02 100644 --- a/arch/x86/mm/mmap.c +++ b/arch/x86/mm/mmap.c @@ -35,12 +35,12 @@ struct __read_mostly va_alignment va_align = { .flags = -1, }; -static unsigned int stack_maxrandom_size(void) +static unsigned long stack_maxrandom_size(void) { - unsigned int max = 0; + unsigned long max = 0; if ((current->flags & PF_RANDOMIZE) && !(current->personality & ADDR_NO_RANDOMIZE)) { - max = ((-1U) & STACK_RND_MASK) << PAGE_SHIFT; + max = ((-1UL) & STACK_RND_MASK) << PAGE_SHIFT; } return max; diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index 3aac8e9edac3..3c4d8797ea9a 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -552,11 +552,12 @@ static unsigned long load_elf_interp(struct elfhdr *interp_elf_ex, static unsigned long randomize_stack_top(unsigned long stack_top) { - unsigned int random_variable = 0; + unsigned long random_variable = 0; if ((current->flags & PF_RANDOMIZE) && !(current->personality & ADDR_NO_RANDOMIZE)) { - random_variable = get_random_int() & STACK_RND_MASK; + random_variable = (unsigned long) get_random_int(); + random_variable &= STACK_RND_MASK; random_variable <<= PAGE_SHIFT; } #ifdef CONFIG_STACK_GROWSUP From 813a631f08c7112f12a3da9f63da632c925a8b37 Mon Sep 17 00:00:00 2001 From: James Hogan Date: Tue, 10 Feb 2015 10:02:59 +0000 Subject: [PATCH 124/204] MIPS: Export FP functions used by lose_fpu(1) for KVM commit 3ce465e04bfd8de9956d515d6e9587faac3375dc upstream. Export the _save_fp asm function used by the lose_fpu(1) macro to GPL modules so that KVM can make use of it when it is built as a module. This fixes the following build error when CONFIG_KVM=m due to commit f798217dfd03 ("KVM: MIPS: Don't leak FPU/DSP to guest"): ERROR: "_save_fp" [arch/mips/kvm/kvm.ko] undefined! Signed-off-by: James Hogan Fixes: f798217dfd03 (KVM: MIPS: Don't leak FPU/DSP to guest) Cc: Paolo Bonzini Cc: Ralf Baechle Cc: Paul Burton Cc: Gleb Natapov Cc: kvm@vger.kernel.org Cc: linux-mips@linux-mips.org Patchwork: https://patchwork.linux-mips.org/patch/9260/ Signed-off-by: Ralf Baechle [james.hogan@imgtec.com: Only export when CPU_R4K_FPU=y prior to v3.16, so as not to break the Octeon build which excludes FPU support. KVM depends on MIPS32r2 anyway.] Signed-off-by: James Hogan Signed-off-by: Greg Kroah-Hartman --- arch/mips/kernel/mips_ksyms.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/arch/mips/kernel/mips_ksyms.c b/arch/mips/kernel/mips_ksyms.c index 6e58e97fcd39..cedeb5686eb5 100644 --- a/arch/mips/kernel/mips_ksyms.c +++ b/arch/mips/kernel/mips_ksyms.c @@ -14,6 +14,7 @@ #include #include #include +#include extern void *__bzero(void *__s, size_t __count); extern long __strncpy_from_user_nocheck_asm(char *__to, @@ -25,6 +26,13 @@ extern long __strlen_user_asm(const char *s); extern long __strnlen_user_nocheck_asm(const char *s); extern long __strnlen_user_asm(const char *s); +/* + * Core architecture code + */ +#ifdef CONFIG_CPU_R4K_FPU +EXPORT_SYMBOL_GPL(_save_fp); +#endif + /* * String functions */ From 5d3c6d27f48ce3b501c988bd0ab2232a0d4612c6 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Wed, 18 Jun 2014 13:02:12 +0400 Subject: [PATCH 125/204] libceph: assert both regular and lingering lists in __remove_osd() commit 7c6e6fc53e7335570ed82f77656cedce1502744e upstream. It is important that both regular and lingering requests lists are empty when the OSD is removed. Signed-off-by: Ilya Dryomov Reviewed-by: Alex Elder Signed-off-by: Greg Kroah-Hartman --- net/ceph/osd_client.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c index 3663a305daf7..d2e44eeb83ec 100644 --- a/net/ceph/osd_client.c +++ b/net/ceph/osd_client.c @@ -969,6 +969,8 @@ static void __remove_osd(struct ceph_osd_client *osdc, struct ceph_osd *osd) { dout("__remove_osd %p\n", osd); BUG_ON(!list_empty(&osd->o_requests)); + BUG_ON(!list_empty(&osd->o_linger_requests)); + rb_erase(&osd->o_node, &osdc->osds); list_del_init(&osd->o_osd_lru); ceph_con_close(&osd->o_con); From 54ff4c89a5445fa8f313a338c1cf5478317df154 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Wed, 5 Nov 2014 19:33:44 +0300 Subject: [PATCH 126/204] libceph: change from BUG to WARN for __remove_osd() asserts commit cc9f1f518cec079289d11d732efa490306b1ddad upstream. No reason to use BUG_ON for osd request list assertions. Signed-off-by: Ilya Dryomov Reviewed-by: Alex Elder Signed-off-by: Greg Kroah-Hartman --- net/ceph/osd_client.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c index d2e44eeb83ec..3f34304efd8a 100644 --- a/net/ceph/osd_client.c +++ b/net/ceph/osd_client.c @@ -968,8 +968,8 @@ static void put_osd(struct ceph_osd *osd) static void __remove_osd(struct ceph_osd_client *osdc, struct ceph_osd *osd) { dout("__remove_osd %p\n", osd); - BUG_ON(!list_empty(&osd->o_requests)); - BUG_ON(!list_empty(&osd->o_linger_requests)); + WARN_ON(!list_empty(&osd->o_requests)); + WARN_ON(!list_empty(&osd->o_linger_requests)); rb_erase(&osd->o_node, &osdc->osds); list_del_init(&osd->o_osd_lru); From 6af167fbe6c42fda5203b8095b92669dd0a687d4 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Tue, 17 Feb 2015 19:37:15 +0300 Subject: [PATCH 127/204] libceph: fix double __remove_osd() problem commit 7eb71e0351fbb1b242ae70abb7bb17107fe2f792 upstream. It turns out it's possible to get __remove_osd() called twice on the same OSD. That doesn't sit well with rb_erase() - depending on the shape of the tree we can get a NULL dereference, a soft lockup or a random crash at some point in the future as we end up touching freed memory. One scenario that I was able to reproduce is as follows: con_fault_finish() osd_reset() ceph_osdc_handle_map() kick_requests() reset_changed_osds() __reset_osd() __remove_osd() __kick_osd_requests() __reset_osd() __remove_osd() <-- !!! A case can be made that osd refcounting is imperfect and reworking it would be a proper resolution, but for now Sage and I decided to fix this by adding a safe guard around __remove_osd(). Fixes: http://tracker.ceph.com/issues/8087 Cc: Sage Weil Signed-off-by: Ilya Dryomov Reviewed-by: Sage Weil Reviewed-by: Alex Elder Signed-off-by: Greg Kroah-Hartman --- net/ceph/osd_client.c | 26 ++++++++++++++++++-------- 1 file changed, 18 insertions(+), 8 deletions(-) diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c index 3f34304efd8a..1b51d9216e7e 100644 --- a/net/ceph/osd_client.c +++ b/net/ceph/osd_client.c @@ -967,14 +967,24 @@ static void put_osd(struct ceph_osd *osd) */ static void __remove_osd(struct ceph_osd_client *osdc, struct ceph_osd *osd) { - dout("__remove_osd %p\n", osd); + dout("%s %p osd%d\n", __func__, osd, osd->o_osd); WARN_ON(!list_empty(&osd->o_requests)); WARN_ON(!list_empty(&osd->o_linger_requests)); - rb_erase(&osd->o_node, &osdc->osds); list_del_init(&osd->o_osd_lru); - ceph_con_close(&osd->o_con); - put_osd(osd); + rb_erase(&osd->o_node, &osdc->osds); + RB_CLEAR_NODE(&osd->o_node); +} + +static void remove_osd(struct ceph_osd_client *osdc, struct ceph_osd *osd) +{ + dout("%s %p osd%d\n", __func__, osd, osd->o_osd); + + if (!RB_EMPTY_NODE(&osd->o_node)) { + ceph_con_close(&osd->o_con); + __remove_osd(osdc, osd); + put_osd(osd); + } } static void remove_all_osds(struct ceph_osd_client *osdc) @@ -984,7 +994,7 @@ static void remove_all_osds(struct ceph_osd_client *osdc) while (!RB_EMPTY_ROOT(&osdc->osds)) { struct ceph_osd *osd = rb_entry(rb_first(&osdc->osds), struct ceph_osd, o_node); - __remove_osd(osdc, osd); + remove_osd(osdc, osd); } mutex_unlock(&osdc->request_mutex); } @@ -1014,7 +1024,7 @@ static void remove_old_osds(struct ceph_osd_client *osdc) list_for_each_entry_safe(osd, nosd, &osdc->osd_lru, o_osd_lru) { if (time_before(jiffies, osd->lru_ttl)) break; - __remove_osd(osdc, osd); + remove_osd(osdc, osd); } mutex_unlock(&osdc->request_mutex); } @@ -1029,8 +1039,7 @@ static int __reset_osd(struct ceph_osd_client *osdc, struct ceph_osd *osd) dout("__reset_osd %p osd%d\n", osd, osd->o_osd); if (list_empty(&osd->o_requests) && list_empty(&osd->o_linger_requests)) { - __remove_osd(osdc, osd); - + remove_osd(osdc, osd); return -ENODEV; } @@ -1612,6 +1621,7 @@ static void reset_changed_osds(struct ceph_osd_client *osdc) { struct rb_node *p, *n; + dout("%s %p\n", __func__, osdc); for (p = rb_first(&osdc->osds); p; p = n) { struct ceph_osd *osd = rb_entry(p, struct ceph_osd, o_node); From 389fb5fb0b8b812ce0e853d5eca748b08fc73289 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Fri, 6 Mar 2015 14:42:00 -0800 Subject: [PATCH 128/204] Linux 3.10.71 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 402cbb7c27f1..d8b42f71ea5a 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,6 @@ VERSION = 3 PATCHLEVEL = 10 -SUBLEVEL = 70 +SUBLEVEL = 71 EXTRAVERSION = NAME = TOSSUG Baby Fish From 4f1d2937133c62cd7d5228960e9cdfb175522072 Mon Sep 17 00:00:00 2001 From: Rom Lemarchand Date: Sat, 7 Mar 2015 09:33:41 -0800 Subject: [PATCH 129/204] Revert "Grants system server access to /proc//oom_adj for Android applications." This reverts commit aa3305f2ba5976a95637c69c63409fdf007e1414. Bug: 19636629 Change-Id: I8dcd6a12cc591cb410d6a799c7e875726495504f --- fs/proc/base.c | 37 +------------------------------------ 1 file changed, 1 insertion(+), 36 deletions(-) diff --git a/fs/proc/base.c b/fs/proc/base.c index 2d00fdde9345..c3834dad09b3 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -139,12 +139,6 @@ struct pid_entry { NULL, &proc_single_file_operations, \ { .proc_show = show } ) -/* ANDROID is for special files in /proc. */ -#define ANDROID(NAME, MODE, OTYPE) \ - NOD(NAME, (S_IFREG|(MODE)), \ - &proc_##OTYPE##_inode_operations, \ - &proc_##OTYPE##_operations, {}) - /* * Count the number of hardlinks for the pid_entry table, excluding the . * and .. links. @@ -1006,35 +1000,6 @@ static ssize_t oom_adj_write(struct file *file, const char __user *buf, return err < 0 ? err : count; } -static int oom_adjust_permission(struct inode *inode, int mask) -{ - uid_t uid; - struct task_struct *p; - - p = get_proc_task(inode); - if(p) { - uid = task_uid(p); - put_task_struct(p); - } - - /* - * System Server (uid == 1000) is granted access to oom_adj of all - * android applications (uid > 10000) as and services (uid >= 1000) - */ - if (p && (current_fsuid() == 1000) && (uid >= 1000)) { - if (inode->i_mode >> 6 & mask) { - return 0; - } - } - - /* Fall back to default. */ - return generic_permission(inode, mask); -} - -static const struct inode_operations proc_oom_adj_inode_operations = { - .permission = oom_adjust_permission, -}; - static const struct file_operations proc_oom_adj_operations = { .read = oom_adj_read, .write = oom_adj_write, @@ -2732,7 +2697,7 @@ static const struct pid_entry tgid_base_stuff[] = { REG("cgroup", S_IRUGO, proc_cgroup_operations), #endif INF("oom_score", S_IRUGO, proc_oom_score), - ANDROID("oom_adj", S_IRUGO|S_IWUSR, oom_adj), + REG("oom_adj", S_IRUGO|S_IWUSR, proc_oom_adj_operations), REG("oom_score_adj", S_IRUGO|S_IWUSR, proc_oom_score_adj_operations), #ifdef CONFIG_AUDITSYSCALL REG("loginuid", S_IWUSR|S_IRUGO, proc_loginuid_operations), From 7b4fff3195f2938fdb859471a8ffaa467ffc5167 Mon Sep 17 00:00:00 2001 From: Rom Lemarchand Date: Sat, 7 Mar 2015 09:38:05 -0800 Subject: [PATCH 130/204] proc: make oom adjustment files user read-only Make oom_adj and oom_score_adj user read-only. Bug: 19636629 Change-Id: I055bb172d5b4d3d856e25918f3c5de8edf31e4a3 Signed-off-by: Rom Lemarchand --- fs/proc/base.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/fs/proc/base.c b/fs/proc/base.c index c3834dad09b3..b5553afa778e 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -2697,8 +2697,8 @@ static const struct pid_entry tgid_base_stuff[] = { REG("cgroup", S_IRUGO, proc_cgroup_operations), #endif INF("oom_score", S_IRUGO, proc_oom_score), - REG("oom_adj", S_IRUGO|S_IWUSR, proc_oom_adj_operations), - REG("oom_score_adj", S_IRUGO|S_IWUSR, proc_oom_score_adj_operations), + REG("oom_adj", S_IRUSR, proc_oom_adj_operations), + REG("oom_score_adj", S_IRUSR, proc_oom_score_adj_operations), #ifdef CONFIG_AUDITSYSCALL REG("loginuid", S_IWUSR|S_IRUGO, proc_loginuid_operations), REG("sessionid", S_IRUGO, proc_sessionid_operations), @@ -3053,8 +3053,8 @@ static const struct pid_entry tid_base_stuff[] = { REG("cgroup", S_IRUGO, proc_cgroup_operations), #endif INF("oom_score", S_IRUGO, proc_oom_score), - REG("oom_adj", S_IRUGO|S_IWUSR, proc_oom_adj_operations), - REG("oom_score_adj", S_IRUGO|S_IWUSR, proc_oom_score_adj_operations), + REG("oom_adj", S_IRUSR, proc_oom_adj_operations), + REG("oom_score_adj", S_IRUSR, proc_oom_score_adj_operations), #ifdef CONFIG_AUDITSYSCALL REG("loginuid", S_IWUSR|S_IRUGO, proc_loginuid_operations), REG("sessionid", S_IRUGO, proc_sessionid_operations), From 062a6b5efee1a4e97fae2569517d35af9ed99148 Mon Sep 17 00:00:00 2001 From: Sami Tolvanen Date: Thu, 26 Feb 2015 13:47:41 +0000 Subject: [PATCH 131/204] dm-verity: Add modes and emit uevent on corrupted blocks Add a device specific mode to dm-verity for handling corrupted blocks: DM_VERITY_MODE_EIO is the default behavior, where reading a corrupted block results in -EIO. DM_VERITY_MODE_LOGGING only logs corrupted blocks, but does not block the read. DM_VERITY_MODE_RESTART calls kernel_restart when a corrupted block is discovered. Each mode sends a uevent to notify userspace of corruption and allow further recovery actions. Defaults to previous behavior, other modes can be enabled with an optional parameter added to the verity table. Change-Id: Ib72ae6ccb865594d28f3553bdcc5a40b1d7af390 Signed-off-by: Sami Tolvanen --- Documentation/device-mapper/verity.txt | 12 +++- drivers/md/dm-verity.c | 98 +++++++++++++++++++++++--- 2 files changed, 100 insertions(+), 10 deletions(-) diff --git a/Documentation/device-mapper/verity.txt b/Documentation/device-mapper/verity.txt index 9884681535ee..2929f6b1ccf1 100644 --- a/Documentation/device-mapper/verity.txt +++ b/Documentation/device-mapper/verity.txt @@ -10,7 +10,7 @@ Construction Parameters - + This is the type of the on-disk hash format. @@ -62,6 +62,16 @@ Construction Parameters The hexadecimal encoding of the salt value. + + Optional. The mode of operation. + + 0 is the normal mode of operation where a corrupted block will result in an + I/O error. + + 1 is logging mode where corrupted blocks are logged and a uevent is sent to + notify user space. + + Theory of operation =================== diff --git a/drivers/md/dm-verity.c b/drivers/md/dm-verity.c index b948fd864d45..301a1dd2a08b 100644 --- a/drivers/md/dm-verity.c +++ b/drivers/md/dm-verity.c @@ -18,20 +18,36 @@ #include #include +#include #include #define DM_MSG_PREFIX "verity" +#define DM_VERITY_ENV_LENGTH 42 +#define DM_VERITY_ENV_VAR_NAME "VERITY_ERR_BLOCK_NR" + #define DM_VERITY_IO_VEC_INLINE 16 #define DM_VERITY_MEMPOOL_SIZE 4 #define DM_VERITY_DEFAULT_PREFETCH_SIZE 262144 #define DM_VERITY_MAX_LEVELS 63 +#define DM_VERITY_MAX_CORRUPTED_ERRS 100 static unsigned dm_verity_prefetch_cluster = DM_VERITY_DEFAULT_PREFETCH_SIZE; module_param_named(prefetch_cluster, dm_verity_prefetch_cluster, uint, S_IRUGO | S_IWUSR); +enum verity_mode { + DM_VERITY_MODE_EIO = 0, + DM_VERITY_MODE_LOGGING = 1, + DM_VERITY_MODE_RESTART = 2 +}; + +enum verity_block_type { + DM_VERITY_BLOCK_TYPE_DATA, + DM_VERITY_BLOCK_TYPE_METADATA +}; + struct dm_verity { struct dm_dev *data_dev; struct dm_dev *hash_dev; @@ -54,6 +70,8 @@ struct dm_verity { unsigned digest_size; /* digest size for the current hash algorithm */ unsigned shash_descsize;/* the size of temporary space for crypto */ int hash_failed; /* set to 1 if hash of any block failed */ + enum verity_mode mode; /* mode for handling verification errors */ + unsigned corrupted_errs;/* Number of errors for corrupted blocks */ mempool_t *vec_mempool; /* mempool of bio vector */ @@ -179,6 +197,54 @@ static void verity_hash_at_level(struct dm_verity *v, sector_t block, int level, *offset = idx << (v->hash_dev_block_bits - v->hash_per_block_bits); } +/* + * Handle verification errors. + */ +static int verity_handle_err(struct dm_verity *v, enum verity_block_type type, + unsigned long long block) +{ + char verity_env[DM_VERITY_ENV_LENGTH]; + char *envp[] = { verity_env, NULL }; + const char *type_str = ""; + struct mapped_device *md = dm_table_get_md(v->ti->table); + + if (v->corrupted_errs >= DM_VERITY_MAX_CORRUPTED_ERRS) + goto out; + + ++v->corrupted_errs; + + switch (type) { + case DM_VERITY_BLOCK_TYPE_DATA: + type_str = "data"; + break; + case DM_VERITY_BLOCK_TYPE_METADATA: + type_str = "metadata"; + break; + default: + BUG(); + } + + DMERR_LIMIT("%s: %s block %llu is corrupted", v->data_dev->name, + type_str, block); + + if (v->corrupted_errs == DM_VERITY_MAX_CORRUPTED_ERRS) + DMERR("%s: reached maximum errors", v->data_dev->name); + + snprintf(verity_env, DM_VERITY_ENV_LENGTH, "%s=%d,%llu", + DM_VERITY_ENV_VAR_NAME, type, block); + + kobject_uevent_env(&disk_to_dev(dm_disk(md))->kobj, KOBJ_CHANGE, envp); + +out: + if (v->mode == DM_VERITY_MODE_LOGGING) + return 0; + + if (v->mode == DM_VERITY_MODE_RESTART) + kernel_restart("dm-verity device corrupted"); + + return 1; +} + /* * Verify hash of a metadata block pertaining to the specified data block * ("block" argument) at a specified level ("level" argument). @@ -256,11 +322,13 @@ static int verity_verify_level(struct dm_verity_io *io, sector_t block, goto release_ret_r; } if (unlikely(memcmp(result, io_want_digest(v, io), v->digest_size))) { - DMERR_LIMIT("metadata block %llu is corrupted", - (unsigned long long)hash_block); v->hash_failed = 1; - r = -EIO; - goto release_ret_r; + + if (verity_handle_err(v, DM_VERITY_BLOCK_TYPE_METADATA, + hash_block)) { + r = -EIO; + goto release_ret_r; + } } else aux->hash_verified = 1; } @@ -377,10 +445,11 @@ static int verity_verify_io(struct dm_verity_io *io) return r; } if (unlikely(memcmp(result, io_want_digest(v, io), v->digest_size))) { - DMERR_LIMIT("data block %llu is corrupted", - (unsigned long long)(io->block + b)); v->hash_failed = 1; - return -EIO; + + if (verity_handle_err(v, DM_VERITY_BLOCK_TYPE_DATA, + io->block + b)) + return -EIO; } } BUG_ON(vector != io->io_vec_size); @@ -689,8 +758,8 @@ static int verity_ctr(struct dm_target *ti, unsigned argc, char **argv) goto bad; } - if (argc != 10) { - ti->error = "Invalid argument count: exactly 10 arguments required"; + if (argc < 10 || argc > 11) { + ti->error = "Invalid argument count: 10-11 arguments required"; r = -EINVAL; goto bad; } @@ -811,6 +880,17 @@ static int verity_ctr(struct dm_target *ti, unsigned argc, char **argv) } } + if (argc > 10) { + if (sscanf(argv[10], "%d%c", &num, &dummy) != 1 || + num < DM_VERITY_MODE_EIO || + num > DM_VERITY_MODE_RESTART) { + ti->error = "Invalid mode"; + r = -EINVAL; + goto bad; + } + v->mode = num; + } + v->hash_per_block_bits = fls((1 << v->hash_dev_block_bits) / v->digest_size) - 1; From e48e8c45925185c02b23ae461671be29c91101d5 Mon Sep 17 00:00:00 2001 From: Jeff Vander Stoep Date: Wed, 11 Mar 2015 14:32:24 -0700 Subject: [PATCH 132/204] mm: reorder can_do_mlock to fix audit denial A userspace call to mmap(MAP_LOCKED) may result in the successful locking of memory while also producing a confusing audit log denial. can_do_mlock checks capable and rlimit. If either of these return positive can_do_mlock returns true. The capable check leads to an LSM hook used by apparmour and selinux which produce the audit denial. Reordering so rlimit is checked first eliminates the denial on success, only recording a denial when the lock is unsuccessful as a result of the denial. Signed-off-by: Jeff Vander Stoep Acked-by: Nick Kralevich Cc: Jeff Vander Stoep Cc: Sasha Levin Cc: "Paul E. McKenney" Cc: Rik van Riel Cc: Vlastimil Babka Cc: Paul Cassella Signed-off-by: Andrew Morton --- mm/mlock.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mm/mlock.c b/mm/mlock.c index 33861c780070..dd8a0c325a46 100644 --- a/mm/mlock.c +++ b/mm/mlock.c @@ -23,10 +23,10 @@ int can_do_mlock(void) { - if (capable(CAP_IPC_LOCK)) - return 1; if (rlimit(RLIMIT_MEMLOCK) != 0) return 1; + if (capable(CAP_IPC_LOCK)) + return 1; return 0; } EXPORT_SYMBOL(can_do_mlock); From 4be5c6a45a26cdfa7f6ad4a3c01cb69781f37535 Mon Sep 17 00:00:00 2001 From: Amit Pundir Date: Fri, 13 Mar 2015 13:59:47 +0530 Subject: [PATCH 133/204] fs: ecryptfs: readdir: constify actor MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit actor is a constant in dir_context struct and because of that we run into following build failure: ---------- fs/ecryptfs/file.c: In function ‘ecryptfs_readdir’: fs/ecryptfs/file.c:130:2: error: assignment of read-only member ‘actor’ make[2]: *** [fs/ecryptfs/file.o] Error 1 make[1]: *** [fs/ecryptfs] Error 2 make: *** [fs] Error 2 ---------- This fix is based on commit: b2497fc3057a([readdir] constify ->actor) Signed-off-by: Amit Pundir --- fs/ecryptfs/file.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/fs/ecryptfs/file.c b/fs/ecryptfs/file.c index 041379a646b3..faa26beb1e4e 100644 --- a/fs/ecryptfs/file.c +++ b/fs/ecryptfs/file.c @@ -116,18 +116,18 @@ static int ecryptfs_readdir(struct file *file, void *dirent, filldir_t filldir) int rc; struct file *lower_file; struct inode *inode; - struct ecryptfs_getdents_callback buf; + struct ecryptfs_getdents_callback buf = { + .dirent = dirent, + .dentry = file->f_path.dentry, + .filldir = filldir, + .filldir_called = 0, + .entries_written = 0, + .ctx.actor = ecryptfs_filldir + }; lower_file = ecryptfs_file_to_lower(file); lower_file->f_pos = file->f_pos; inode = file_inode(file); - memset(&buf, 0, sizeof(buf)); - buf.dirent = dirent; - buf.dentry = file->f_path.dentry; - buf.filldir = filldir; - buf.filldir_called = 0; - buf.entries_written = 0; - buf.ctx.actor = ecryptfs_filldir; rc = iterate_dir(lower_file, &buf.ctx); file->f_pos = lower_file->f_pos; if (rc < 0) From 2d3bc982a1908dd121739918b97192a418717514 Mon Sep 17 00:00:00 2001 From: Frantisek Hrbata Date: Tue, 12 Nov 2013 15:11:24 -0800 Subject: [PATCH 134/204] gcov: move gcov structs definitions to a gcc version specific file Since also the gcov structures(gcov_info, gcov_fn_info, gcov_ctr_info) can change between gcc releases, as shown in gcc 4.7, they cannot be defined in a common header and need to be moved to a specific gcc implemention file. This also requires to make the gcov_info structure opaque for the common code and to introduce simple helpers for accessing data inside gcov_info. Signed-off-by: Frantisek Hrbata Cc: Jan Stancek Cc: Kees Cook Acked-by: Peter Oberparleiter Cc: Rusty Russell Cc: Arnd Bergmann Cc: Andy Gospodarek Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds (cherry picked from commit 8cbce376e3fdf4a21f59365aefbb52eac3c2e312) Signed-off-by: Mark Brown --- kernel/gcov/base.c | 26 +++++----- kernel/gcov/fs.c | 27 +++++----- kernel/gcov/gcc_3_4.c | 115 ++++++++++++++++++++++++++++++++++++++++++ kernel/gcov/gcov.h | 65 ++++-------------------- 4 files changed, 153 insertions(+), 80 deletions(-) diff --git a/kernel/gcov/base.c b/kernel/gcov/base.c index 9b22d03cc581..912576a671d8 100644 --- a/kernel/gcov/base.c +++ b/kernel/gcov/base.c @@ -20,7 +20,6 @@ #include #include "gcov.h" -static struct gcov_info *gcov_info_head; static int gcov_events_enabled; static DEFINE_MUTEX(gcov_lock); @@ -34,7 +33,7 @@ void __gcov_init(struct gcov_info *info) mutex_lock(&gcov_lock); if (gcov_version == 0) { - gcov_version = info->version; + gcov_version = gcov_info_version(info); /* * Printing gcc's version magic may prove useful for debugging * incompatibility reports. @@ -45,8 +44,7 @@ void __gcov_init(struct gcov_info *info) * Add new profiling data structure to list and inform event * listener. */ - info->next = gcov_info_head; - gcov_info_head = info; + gcov_info_link(info); if (gcov_events_enabled) gcov_event(GCOV_ADD, info); mutex_unlock(&gcov_lock); @@ -91,13 +89,15 @@ EXPORT_SYMBOL(__gcov_merge_delta); */ void gcov_enable_events(void) { - struct gcov_info *info; + struct gcov_info *info = NULL; mutex_lock(&gcov_lock); gcov_events_enabled = 1; + /* Perform event callback for previously registered entries. */ - for (info = gcov_info_head; info; info = info->next) + while ((info = gcov_info_next(info))) gcov_event(GCOV_ADD, info); + mutex_unlock(&gcov_lock); } @@ -112,25 +112,23 @@ static int gcov_module_notifier(struct notifier_block *nb, unsigned long event, void *data) { struct module *mod = data; - struct gcov_info *info; - struct gcov_info *prev; + struct gcov_info *info = NULL; + struct gcov_info *prev = NULL; if (event != MODULE_STATE_GOING) return NOTIFY_OK; mutex_lock(&gcov_lock); - prev = NULL; + /* Remove entries located in module from linked list. */ - for (info = gcov_info_head; info; info = info->next) { + while ((info = gcov_info_next(info))) { if (within(info, mod->module_core, mod->core_size)) { - if (prev) - prev->next = info->next; - else - gcov_info_head = info->next; + gcov_info_unlink(prev, info); if (gcov_events_enabled) gcov_event(GCOV_REMOVE, info); } else prev = info; } + mutex_unlock(&gcov_lock); return NOTIFY_OK; diff --git a/kernel/gcov/fs.c b/kernel/gcov/fs.c index 9bd0934f6c33..27e12ce30e7d 100644 --- a/kernel/gcov/fs.c +++ b/kernel/gcov/fs.c @@ -242,7 +242,7 @@ static struct gcov_node *get_node_by_name(const char *name) list_for_each_entry(node, &all_head, all) { info = get_node_info(node); - if (info && (strcmp(info->filename, name) == 0)) + if (info && (strcmp(gcov_info_filename(info), name) == 0)) return node; } @@ -279,7 +279,7 @@ static ssize_t gcov_seq_write(struct file *file, const char __user *addr, seq = file->private_data; info = gcov_iter_get_info(seq->private); mutex_lock(&node_lock); - node = get_node_by_name(info->filename); + node = get_node_by_name(gcov_info_filename(info)); if (node) { /* Reset counts or remove node for unloaded modules. */ if (node->num_loaded == 0) @@ -376,8 +376,9 @@ static void add_links(struct gcov_node *node, struct dentry *parent) if (!node->links) return; for (i = 0; i < num; i++) { - target = get_link_target(get_node_info(node)->filename, - &gcov_link[i]); + target = get_link_target( + gcov_info_filename(get_node_info(node)), + &gcov_link[i]); if (!target) goto out_err; basename = strrchr(target, '/'); @@ -576,7 +577,7 @@ static void add_node(struct gcov_info *info) struct gcov_node *parent; struct gcov_node *node; - filename = kstrdup(info->filename, GFP_KERNEL); + filename = kstrdup(gcov_info_filename(info), GFP_KERNEL); if (!filename) return; parent = &root_node; @@ -631,7 +632,7 @@ static void add_info(struct gcov_node *node, struct gcov_info *info) loaded_info = kcalloc(num + 1, sizeof(struct gcov_info *), GFP_KERNEL); if (!loaded_info) { pr_warning("could not add '%s' (out of memory)\n", - info->filename); + gcov_info_filename(info)); return; } memcpy(loaded_info, node->loaded_info, @@ -645,7 +646,8 @@ static void add_info(struct gcov_node *node, struct gcov_info *info) */ if (!gcov_info_is_compatible(node->unloaded_info, info)) { pr_warning("discarding saved data for %s " - "(incompatible version)\n", info->filename); + "(incompatible version)\n", + gcov_info_filename(info)); gcov_info_free(node->unloaded_info); node->unloaded_info = NULL; } @@ -656,7 +658,7 @@ static void add_info(struct gcov_node *node, struct gcov_info *info) */ if (!gcov_info_is_compatible(node->loaded_info[0], info)) { pr_warning("could not add '%s' (incompatible " - "version)\n", info->filename); + "version)\n", gcov_info_filename(info)); kfree(loaded_info); return; } @@ -692,7 +694,8 @@ static void save_info(struct gcov_node *node, struct gcov_info *info) node->unloaded_info = gcov_info_dup(info); if (!node->unloaded_info) { pr_warning("could not save data for '%s' " - "(out of memory)\n", info->filename); + "(out of memory)\n", + gcov_info_filename(info)); } } } @@ -708,7 +711,7 @@ static void remove_info(struct gcov_node *node, struct gcov_info *info) i = get_info_index(node, info); if (i < 0) { pr_warning("could not remove '%s' (not found)\n", - info->filename); + gcov_info_filename(info)); return; } if (gcov_persist) @@ -735,7 +738,7 @@ void gcov_event(enum gcov_action action, struct gcov_info *info) struct gcov_node *node; mutex_lock(&node_lock); - node = get_node_by_name(info->filename); + node = get_node_by_name(gcov_info_filename(info)); switch (action) { case GCOV_ADD: if (node) @@ -748,7 +751,7 @@ void gcov_event(enum gcov_action action, struct gcov_info *info) remove_info(node, info); else { pr_warning("could not remove '%s' (not found)\n", - info->filename); + gcov_info_filename(info)); } break; } diff --git a/kernel/gcov/gcc_3_4.c b/kernel/gcov/gcc_3_4.c index ae5bb4260033..27bc88a35013 100644 --- a/kernel/gcov/gcc_3_4.c +++ b/kernel/gcov/gcc_3_4.c @@ -21,6 +21,121 @@ #include #include "gcov.h" +#define GCOV_COUNTERS 5 + +static struct gcov_info *gcov_info_head; + +/** + * struct gcov_fn_info - profiling meta data per function + * @ident: object file-unique function identifier + * @checksum: function checksum + * @n_ctrs: number of values per counter type belonging to this function + * + * This data is generated by gcc during compilation and doesn't change + * at run-time. + */ +struct gcov_fn_info { + unsigned int ident; + unsigned int checksum; + unsigned int n_ctrs[0]; +}; + +/** + * struct gcov_ctr_info - profiling data per counter type + * @num: number of counter values for this type + * @values: array of counter values for this type + * @merge: merge function for counter values of this type (unused) + * + * This data is generated by gcc during compilation and doesn't change + * at run-time with the exception of the values array. + */ +struct gcov_ctr_info { + unsigned int num; + gcov_type *values; + void (*merge)(gcov_type *, unsigned int); +}; + +/** + * struct gcov_info - profiling data per object file + * @version: gcov version magic indicating the gcc version used for compilation + * @next: list head for a singly-linked list + * @stamp: time stamp + * @filename: name of the associated gcov data file + * @n_functions: number of instrumented functions + * @functions: function data + * @ctr_mask: mask specifying which counter types are active + * @counts: counter data per counter type + * + * This data is generated by gcc during compilation and doesn't change + * at run-time with the exception of the next pointer. + */ +struct gcov_info { + unsigned int version; + struct gcov_info *next; + unsigned int stamp; + const char *filename; + unsigned int n_functions; + const struct gcov_fn_info *functions; + unsigned int ctr_mask; + struct gcov_ctr_info counts[0]; +}; + +/** + * gcov_info_filename - return info filename + * @info: profiling data set + */ +const char *gcov_info_filename(struct gcov_info *info) +{ + return info->filename; +} + +/** + * gcov_info_version - return info version + * @info: profiling data set + */ +unsigned int gcov_info_version(struct gcov_info *info) +{ + return info->version; +} + +/** + * gcov_info_next - return next profiling data set + * @info: profiling data set + * + * Returns next gcov_info following @info or first gcov_info in the chain if + * @info is %NULL. + */ +struct gcov_info *gcov_info_next(struct gcov_info *info) +{ + if (!info) + return gcov_info_head; + + return info->next; +} + +/** + * gcov_info_link - link/add profiling data set to the list + * @info: profiling data set + */ +void gcov_info_link(struct gcov_info *info) +{ + info->next = gcov_info_head; + gcov_info_head = info; +} + +/** + * gcov_info_unlink - unlink/remove profiling data set from the list + * @prev: previous profiling data set + * @info: profiling data set + */ +void gcov_info_unlink(struct gcov_info *prev, struct gcov_info *info) +{ + if (prev) + prev->next = info->next; + else + gcov_info_head = info->next; +} + /* Symbolic links to be created for each profiling data file. */ const struct gcov_link gcov_link[] = { { OBJ_TREE, "gcno" }, /* Link to .gcno file in $(objtree). */ diff --git a/kernel/gcov/gcov.h b/kernel/gcov/gcov.h index 060073ebf7a6..92c8e22a29ed 100644 --- a/kernel/gcov/gcov.h +++ b/kernel/gcov/gcov.h @@ -21,7 +21,6 @@ * gcc and need to be kept as close to the original definition as possible to * remain compatible. */ -#define GCOV_COUNTERS 5 #define GCOV_DATA_MAGIC ((unsigned int) 0x67636461) #define GCOV_TAG_FUNCTION ((unsigned int) 0x01000000) #define GCOV_TAG_COUNTER_BASE ((unsigned int) 0x01a10000) @@ -34,60 +33,18 @@ typedef long gcov_type; typedef long long gcov_type; #endif -/** - * struct gcov_fn_info - profiling meta data per function - * @ident: object file-unique function identifier - * @checksum: function checksum - * @n_ctrs: number of values per counter type belonging to this function - * - * This data is generated by gcc during compilation and doesn't change - * at run-time. - */ -struct gcov_fn_info { - unsigned int ident; - unsigned int checksum; - unsigned int n_ctrs[0]; -}; +/* Opaque gcov_info. The gcov structures can change as for example in gcc 4.7 so + * we cannot use full definition here and they need to be placed in gcc specific + * implementation of gcov. This also means no direct access to the members in + * generic code and usage of the interface below.*/ +struct gcov_info; -/** - * struct gcov_ctr_info - profiling data per counter type - * @num: number of counter values for this type - * @values: array of counter values for this type - * @merge: merge function for counter values of this type (unused) - * - * This data is generated by gcc during compilation and doesn't change - * at run-time with the exception of the values array. - */ -struct gcov_ctr_info { - unsigned int num; - gcov_type *values; - void (*merge)(gcov_type *, unsigned int); -}; - -/** - * struct gcov_info - profiling data per object file - * @version: gcov version magic indicating the gcc version used for compilation - * @next: list head for a singly-linked list - * @stamp: time stamp - * @filename: name of the associated gcov data file - * @n_functions: number of instrumented functions - * @functions: function data - * @ctr_mask: mask specifying which counter types are active - * @counts: counter data per counter type - * - * This data is generated by gcc during compilation and doesn't change - * at run-time with the exception of the next pointer. - */ -struct gcov_info { - unsigned int version; - struct gcov_info *next; - unsigned int stamp; - const char *filename; - unsigned int n_functions; - const struct gcov_fn_info *functions; - unsigned int ctr_mask; - struct gcov_ctr_info counts[0]; -}; +/* Interface to access gcov_info data */ +const char *gcov_info_filename(struct gcov_info *info); +unsigned int gcov_info_version(struct gcov_info *info); +struct gcov_info *gcov_info_next(struct gcov_info *info); +void gcov_info_link(struct gcov_info *info); +void gcov_info_unlink(struct gcov_info *prev, struct gcov_info *info); /* Base interface. */ enum gcov_action { From 8ad15c3280f52cbe9782c113edb49cdc5a8b67d5 Mon Sep 17 00:00:00 2001 From: Frantisek Hrbata Date: Tue, 12 Nov 2013 15:11:26 -0800 Subject: [PATCH 135/204] gcov: add support for gcc 4.7 gcov format The gcov in-memory format changed in gcc 4.7. The biggest change, which requires this special implementation, is that gcov_info no longer contains array of counters for each counter type for all functions and gcov_fn_info is not used for mapping of function's counters to these arrays(offset). Now each gcov_fn_info contans it's counters, which makes things a little bit easier. This is heavily based on the previous gcc_3_4.c implementation and patches provided by Peter Oberparleiter. Specially the buffer gcda implementation for iterator. [akpm@linux-foundation.org: use kmemdup() and kcalloc()] [oberpar@linux.vnet.ibm.com: gcc_4_7.c needs vmalloc.h] Signed-off-by: Frantisek Hrbata Cc: Jan Stancek Cc: Kees Cook Reviewed-by: Peter Oberparleiter Cc: Rusty Russell Cc: Arnd Bergmann Cc: Andy Gospodarek Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds (cherry picked from commit 5f41ea0386a53414d688cfcaa321a78310e5f7c1) Signed-off-by: Mark Brown --- kernel/gcov/base.c | 6 + kernel/gcov/gcc_4_7.c | 560 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 566 insertions(+) create mode 100644 kernel/gcov/gcc_4_7.c diff --git a/kernel/gcov/base.c b/kernel/gcov/base.c index 912576a671d8..f45b75b713c0 100644 --- a/kernel/gcov/base.c +++ b/kernel/gcov/base.c @@ -79,6 +79,12 @@ void __gcov_merge_delta(gcov_type *counters, unsigned int n_counters) } EXPORT_SYMBOL(__gcov_merge_delta); +void __gcov_merge_ior(gcov_type *counters, unsigned int n_counters) +{ + /* Unused. */ +} +EXPORT_SYMBOL(__gcov_merge_ior); + /** * gcov_enable_events - enable event reporting through gcov_event() * diff --git a/kernel/gcov/gcc_4_7.c b/kernel/gcov/gcc_4_7.c new file mode 100644 index 000000000000..2c6e4631c814 --- /dev/null +++ b/kernel/gcov/gcc_4_7.c @@ -0,0 +1,560 @@ +/* + * This code provides functions to handle gcc's profiling data format + * introduced with gcc 4.7. + * + * This file is based heavily on gcc_3_4.c file. + * + * For a better understanding, refer to gcc source: + * gcc/gcov-io.h + * libgcc/libgcov.c + * + * Uses gcc-internal data definitions. + */ + +#include +#include +#include +#include +#include +#include "gcov.h" + +#define GCOV_COUNTERS 8 +#define GCOV_TAG_FUNCTION_LENGTH 3 + +static struct gcov_info *gcov_info_head; + +/** + * struct gcov_ctr_info - information about counters for a single function + * @num: number of counter values for this type + * @values: array of counter values for this type + * + * This data is generated by gcc during compilation and doesn't change + * at run-time with the exception of the values array. + */ +struct gcov_ctr_info { + unsigned int num; + gcov_type *values; +}; + +/** + * struct gcov_fn_info - profiling meta data per function + * @key: comdat key + * @ident: unique ident of function + * @lineno_checksum: function lineo_checksum + * @cfg_checksum: function cfg checksum + * @ctrs: instrumented counters + * + * This data is generated by gcc during compilation and doesn't change + * at run-time. + * + * Information about a single function. This uses the trailing array + * idiom. The number of counters is determined from the merge pointer + * array in gcov_info. The key is used to detect which of a set of + * comdat functions was selected -- it points to the gcov_info object + * of the object file containing the selected comdat function. + */ +struct gcov_fn_info { + const struct gcov_info *key; + unsigned int ident; + unsigned int lineno_checksum; + unsigned int cfg_checksum; + struct gcov_ctr_info ctrs[0]; +}; + +/** + * struct gcov_info - profiling data per object file + * @version: gcov version magic indicating the gcc version used for compilation + * @next: list head for a singly-linked list + * @stamp: uniquifying time stamp + * @filename: name of the associated gcov data file + * @merge: merge functions (null for unused counter type) + * @n_functions: number of instrumented functions + * @functions: pointer to pointers to function information + * + * This data is generated by gcc during compilation and doesn't change + * at run-time with the exception of the next pointer. + */ +struct gcov_info { + unsigned int version; + struct gcov_info *next; + unsigned int stamp; + const char *filename; + void (*merge[GCOV_COUNTERS])(gcov_type *, unsigned int); + unsigned int n_functions; + struct gcov_fn_info **functions; +}; + +/** + * gcov_info_filename - return info filename + * @info: profiling data set + */ +const char *gcov_info_filename(struct gcov_info *info) +{ + return info->filename; +} + +/** + * gcov_info_version - return info version + * @info: profiling data set + */ +unsigned int gcov_info_version(struct gcov_info *info) +{ + return info->version; +} + +/** + * gcov_info_next - return next profiling data set + * @info: profiling data set + * + * Returns next gcov_info following @info or first gcov_info in the chain if + * @info is %NULL. + */ +struct gcov_info *gcov_info_next(struct gcov_info *info) +{ + if (!info) + return gcov_info_head; + + return info->next; +} + +/** + * gcov_info_link - link/add profiling data set to the list + * @info: profiling data set + */ +void gcov_info_link(struct gcov_info *info) +{ + info->next = gcov_info_head; + gcov_info_head = info; +} + +/** + * gcov_info_unlink - unlink/remove profiling data set from the list + * @prev: previous profiling data set + * @info: profiling data set + */ +void gcov_info_unlink(struct gcov_info *prev, struct gcov_info *info) +{ + if (prev) + prev->next = info->next; + else + gcov_info_head = info->next; +} + +/* Symbolic links to be created for each profiling data file. */ +const struct gcov_link gcov_link[] = { + { OBJ_TREE, "gcno" }, /* Link to .gcno file in $(objtree). */ + { 0, NULL}, +}; + +/* + * Determine whether a counter is active. Doesn't change at run-time. + */ +static int counter_active(struct gcov_info *info, unsigned int type) +{ + return info->merge[type] ? 1 : 0; +} + +/* Determine number of active counters. Based on gcc magic. */ +static unsigned int num_counter_active(struct gcov_info *info) +{ + unsigned int i; + unsigned int result = 0; + + for (i = 0; i < GCOV_COUNTERS; i++) { + if (counter_active(info, i)) + result++; + } + return result; +} + +/** + * gcov_info_reset - reset profiling data to zero + * @info: profiling data set + */ +void gcov_info_reset(struct gcov_info *info) +{ + struct gcov_ctr_info *ci_ptr; + unsigned int fi_idx; + unsigned int ct_idx; + + for (fi_idx = 0; fi_idx < info->n_functions; fi_idx++) { + ci_ptr = info->functions[fi_idx]->ctrs; + + for (ct_idx = 0; ct_idx < GCOV_COUNTERS; ct_idx++) { + if (!counter_active(info, ct_idx)) + continue; + + memset(ci_ptr->values, 0, + sizeof(gcov_type) * ci_ptr->num); + ci_ptr++; + } + } +} + +/** + * gcov_info_is_compatible - check if profiling data can be added + * @info1: first profiling data set + * @info2: second profiling data set + * + * Returns non-zero if profiling data can be added, zero otherwise. + */ +int gcov_info_is_compatible(struct gcov_info *info1, struct gcov_info *info2) +{ + return (info1->stamp == info2->stamp); +} + +/** + * gcov_info_add - add up profiling data + * @dest: profiling data set to which data is added + * @source: profiling data set which is added + * + * Adds profiling counts of @source to @dest. + */ +void gcov_info_add(struct gcov_info *dst, struct gcov_info *src) +{ + struct gcov_ctr_info *dci_ptr; + struct gcov_ctr_info *sci_ptr; + unsigned int fi_idx; + unsigned int ct_idx; + unsigned int val_idx; + + for (fi_idx = 0; fi_idx < src->n_functions; fi_idx++) { + dci_ptr = dst->functions[fi_idx]->ctrs; + sci_ptr = src->functions[fi_idx]->ctrs; + + for (ct_idx = 0; ct_idx < GCOV_COUNTERS; ct_idx++) { + if (!counter_active(src, ct_idx)) + continue; + + for (val_idx = 0; val_idx < sci_ptr->num; val_idx++) + dci_ptr->values[val_idx] += + sci_ptr->values[val_idx]; + + dci_ptr++; + sci_ptr++; + } + } +} + +/** + * gcov_info_dup - duplicate profiling data set + * @info: profiling data set to duplicate + * + * Return newly allocated duplicate on success, %NULL on error. + */ +struct gcov_info *gcov_info_dup(struct gcov_info *info) +{ + struct gcov_info *dup; + struct gcov_ctr_info *dci_ptr; /* dst counter info */ + struct gcov_ctr_info *sci_ptr; /* src counter info */ + unsigned int active; + unsigned int fi_idx; /* function info idx */ + unsigned int ct_idx; /* counter type idx */ + size_t fi_size; /* function info size */ + size_t cv_size; /* counter values size */ + + dup = kmemdup(info, sizeof(*dup), GFP_KERNEL); + if (!dup) + return NULL; + + dup->next = NULL; + dup->filename = NULL; + dup->functions = NULL; + + dup->filename = kstrdup(info->filename, GFP_KERNEL); + if (!dup->filename) + goto err_free; + + dup->functions = kcalloc(info->n_functions, + sizeof(struct gcov_fn_info *), GFP_KERNEL); + if (!dup->functions) + goto err_free; + + active = num_counter_active(info); + fi_size = sizeof(struct gcov_fn_info); + fi_size += sizeof(struct gcov_ctr_info) * active; + + for (fi_idx = 0; fi_idx < info->n_functions; fi_idx++) { + dup->functions[fi_idx] = kzalloc(fi_size, GFP_KERNEL); + if (!dup->functions[fi_idx]) + goto err_free; + + *(dup->functions[fi_idx]) = *(info->functions[fi_idx]); + + sci_ptr = info->functions[fi_idx]->ctrs; + dci_ptr = dup->functions[fi_idx]->ctrs; + + for (ct_idx = 0; ct_idx < active; ct_idx++) { + + cv_size = sizeof(gcov_type) * sci_ptr->num; + + dci_ptr->values = vmalloc(cv_size); + + if (!dci_ptr->values) + goto err_free; + + dci_ptr->num = sci_ptr->num; + memcpy(dci_ptr->values, sci_ptr->values, cv_size); + + sci_ptr++; + dci_ptr++; + } + } + + return dup; +err_free: + gcov_info_free(dup); + return NULL; +} + +/** + * gcov_info_free - release memory for profiling data set duplicate + * @info: profiling data set duplicate to free + */ +void gcov_info_free(struct gcov_info *info) +{ + unsigned int active; + unsigned int fi_idx; + unsigned int ct_idx; + struct gcov_ctr_info *ci_ptr; + + if (!info->functions) + goto free_info; + + active = num_counter_active(info); + + for (fi_idx = 0; fi_idx < info->n_functions; fi_idx++) { + if (!info->functions[fi_idx]) + continue; + + ci_ptr = info->functions[fi_idx]->ctrs; + + for (ct_idx = 0; ct_idx < active; ct_idx++, ci_ptr++) + vfree(ci_ptr->values); + + kfree(info->functions[fi_idx]); + } + +free_info: + kfree(info->functions); + kfree(info->filename); + kfree(info); +} + +#define ITER_STRIDE PAGE_SIZE + +/** + * struct gcov_iterator - specifies current file position in logical records + * @info: associated profiling data + * @buffer: buffer containing file data + * @size: size of buffer + * @pos: current position in file + */ +struct gcov_iterator { + struct gcov_info *info; + void *buffer; + size_t size; + loff_t pos; +}; + +/** + * store_gcov_u32 - store 32 bit number in gcov format to buffer + * @buffer: target buffer or NULL + * @off: offset into the buffer + * @v: value to be stored + * + * Number format defined by gcc: numbers are recorded in the 32 bit + * unsigned binary form of the endianness of the machine generating the + * file. Returns the number of bytes stored. If @buffer is %NULL, doesn't + * store anything. + */ +static size_t store_gcov_u32(void *buffer, size_t off, u32 v) +{ + u32 *data; + + if (buffer) { + data = buffer + off; + *data = v; + } + + return sizeof(*data); +} + +/** + * store_gcov_u64 - store 64 bit number in gcov format to buffer + * @buffer: target buffer or NULL + * @off: offset into the buffer + * @v: value to be stored + * + * Number format defined by gcc: numbers are recorded in the 32 bit + * unsigned binary form of the endianness of the machine generating the + * file. 64 bit numbers are stored as two 32 bit numbers, the low part + * first. Returns the number of bytes stored. If @buffer is %NULL, doesn't store + * anything. + */ +static size_t store_gcov_u64(void *buffer, size_t off, u64 v) +{ + u32 *data; + + if (buffer) { + data = buffer + off; + + data[0] = (v & 0xffffffffUL); + data[1] = (v >> 32); + } + + return sizeof(*data) * 2; +} + +/** + * convert_to_gcda - convert profiling data set to gcda file format + * @buffer: the buffer to store file data or %NULL if no data should be stored + * @info: profiling data set to be converted + * + * Returns the number of bytes that were/would have been stored into the buffer. + */ +static size_t convert_to_gcda(char *buffer, struct gcov_info *info) +{ + struct gcov_fn_info *fi_ptr; + struct gcov_ctr_info *ci_ptr; + unsigned int fi_idx; + unsigned int ct_idx; + unsigned int cv_idx; + size_t pos = 0; + + /* File header. */ + pos += store_gcov_u32(buffer, pos, GCOV_DATA_MAGIC); + pos += store_gcov_u32(buffer, pos, info->version); + pos += store_gcov_u32(buffer, pos, info->stamp); + + for (fi_idx = 0; fi_idx < info->n_functions; fi_idx++) { + fi_ptr = info->functions[fi_idx]; + + /* Function record. */ + pos += store_gcov_u32(buffer, pos, GCOV_TAG_FUNCTION); + pos += store_gcov_u32(buffer, pos, GCOV_TAG_FUNCTION_LENGTH); + pos += store_gcov_u32(buffer, pos, fi_ptr->ident); + pos += store_gcov_u32(buffer, pos, fi_ptr->lineno_checksum); + pos += store_gcov_u32(buffer, pos, fi_ptr->cfg_checksum); + + ci_ptr = fi_ptr->ctrs; + + for (ct_idx = 0; ct_idx < GCOV_COUNTERS; ct_idx++) { + if (!counter_active(info, ct_idx)) + continue; + + /* Counter record. */ + pos += store_gcov_u32(buffer, pos, + GCOV_TAG_FOR_COUNTER(ct_idx)); + pos += store_gcov_u32(buffer, pos, ci_ptr->num * 2); + + for (cv_idx = 0; cv_idx < ci_ptr->num; cv_idx++) { + pos += store_gcov_u64(buffer, pos, + ci_ptr->values[cv_idx]); + } + + ci_ptr++; + } + } + + return pos; +} + +/** + * gcov_iter_new - allocate and initialize profiling data iterator + * @info: profiling data set to be iterated + * + * Return file iterator on success, %NULL otherwise. + */ +struct gcov_iterator *gcov_iter_new(struct gcov_info *info) +{ + struct gcov_iterator *iter; + + iter = kzalloc(sizeof(struct gcov_iterator), GFP_KERNEL); + if (!iter) + goto err_free; + + iter->info = info; + /* Dry-run to get the actual buffer size. */ + iter->size = convert_to_gcda(NULL, info); + iter->buffer = vmalloc(iter->size); + if (!iter->buffer) + goto err_free; + + convert_to_gcda(iter->buffer, info); + + return iter; + +err_free: + kfree(iter); + return NULL; +} + + +/** + * gcov_iter_get_info - return profiling data set for given file iterator + * @iter: file iterator + */ +void gcov_iter_free(struct gcov_iterator *iter) +{ + vfree(iter->buffer); + kfree(iter); +} + +/** + * gcov_iter_get_info - return profiling data set for given file iterator + * @iter: file iterator + */ +struct gcov_info *gcov_iter_get_info(struct gcov_iterator *iter) +{ + return iter->info; +} + +/** + * gcov_iter_start - reset file iterator to starting position + * @iter: file iterator + */ +void gcov_iter_start(struct gcov_iterator *iter) +{ + iter->pos = 0; +} + +/** + * gcov_iter_next - advance file iterator to next logical record + * @iter: file iterator + * + * Return zero if new position is valid, non-zero if iterator has reached end. + */ +int gcov_iter_next(struct gcov_iterator *iter) +{ + if (iter->pos < iter->size) + iter->pos += ITER_STRIDE; + + if (iter->pos >= iter->size) + return -EINVAL; + + return 0; +} + +/** + * gcov_iter_write - write data for current pos to seq_file + * @iter: file iterator + * @seq: seq_file handle + * + * Return zero on success, non-zero otherwise. + */ +int gcov_iter_write(struct gcov_iterator *iter, struct seq_file *seq) +{ + size_t len; + + if (iter->pos >= iter->size) + return -EINVAL; + + len = ITER_STRIDE; + if (iter->pos + len > iter->size) + len = iter->size - iter->pos; + + seq_write(seq, iter->buffer + iter->pos, len); + + return 0; +} From 20fde41812594096434ce2cc40d861d79f82bc36 Mon Sep 17 00:00:00 2001 From: Yuan Pengfei Date: Tue, 10 Jun 2014 15:18:39 -0700 Subject: [PATCH 136/204] gcov: add support for GCC 4.9 This patch handles the gcov-related changes in GCC 4.9: A new counter (time profile) is added. The total number is 9 now. A new profile merge function __gcov_merge_time_profile is added. See gcc/gcov-io.h and libgcc/libgcov-merge.c For the first change, the layout of struct gcov_info is affected. For the second one, a dummy function is added to kernel/gcov/base.c similarly. Signed-off-by: Yuan Pengfei Acked-by: Peter Oberparleiter Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds (cherry picked from commit a992bf836f9c3039a16f4bd068d161c86c6c3e2c) Signed-off-by: Mark Brown --- kernel/gcov/base.c | 6 ++++++ kernel/gcov/gcc_4_7.c | 5 +++++ 2 files changed, 11 insertions(+) diff --git a/kernel/gcov/base.c b/kernel/gcov/base.c index f45b75b713c0..b358a802fd18 100644 --- a/kernel/gcov/base.c +++ b/kernel/gcov/base.c @@ -85,6 +85,12 @@ void __gcov_merge_ior(gcov_type *counters, unsigned int n_counters) } EXPORT_SYMBOL(__gcov_merge_ior); +void __gcov_merge_time_profile(gcov_type *counters, unsigned int n_counters) +{ + /* Unused. */ +} +EXPORT_SYMBOL(__gcov_merge_time_profile); + /** * gcov_enable_events - enable event reporting through gcov_event() * diff --git a/kernel/gcov/gcc_4_7.c b/kernel/gcov/gcc_4_7.c index 2c6e4631c814..826ba9fb5e32 100644 --- a/kernel/gcov/gcc_4_7.c +++ b/kernel/gcov/gcc_4_7.c @@ -18,7 +18,12 @@ #include #include "gcov.h" +#if __GNUC__ == 4 && __GNUC_MINOR__ >= 9 +#define GCOV_COUNTERS 9 +#else #define GCOV_COUNTERS 8 +#endif + #define GCOV_TAG_FUNCTION_LENGTH 3 static struct gcov_info *gcov_info_head; From bbdc58be70cfcac7c1bcdcdf44bebf664c3b3d22 Mon Sep 17 00:00:00 2001 From: Vincent Sanders Date: Thu, 18 Sep 2014 20:39:15 +0100 Subject: [PATCH 137/204] ARM: 8153/1: Enable gcov support on the ARM architecture Enable gcov support for ARM based on original patches by David Singleton and George G. Davis Riku - updated to patch to current mainline kernel. The patch has been submitted in 2010, 2012 - for symmetry, now in 2014 too. https://lwn.net/Articles/390419/ http://marc.info/?l=linux-arm-kernel&m=133823081813044 v2: remove arch/arm/kernel from gcov disabled files Cc: Andrey Ryabinin Cc: Naresh Kamboju Acked-by: Arnd Bergmann Signed-off-by: Riku Voipio Signed-off-by: Vincent Sanders Signed-off-by: Russell King (cherry picked from commit 75c349062a666deab57bdca8b5bd0779c9fb0d58) Signed-off-by: Mark Brown --- arch/arm/boot/bootp/Makefile | 2 ++ arch/arm/boot/compressed/Makefile | 2 ++ kernel/gcov/Kconfig | 2 +- 3 files changed, 5 insertions(+), 1 deletion(-) diff --git a/arch/arm/boot/bootp/Makefile b/arch/arm/boot/bootp/Makefile index c394e305447c..5761f0039133 100644 --- a/arch/arm/boot/bootp/Makefile +++ b/arch/arm/boot/bootp/Makefile @@ -5,6 +5,8 @@ # architecture-specific flags and dependencies. # +GCOV_PROFILE := n + LDFLAGS_bootp :=-p --no-undefined -X \ --defsym initrd_phys=$(INITRD_PHYS) \ --defsym params_phys=$(PARAMS_PHYS) -T diff --git a/arch/arm/boot/compressed/Makefile b/arch/arm/boot/compressed/Makefile index 120b83bfde20..e1fc76ee441b 100644 --- a/arch/arm/boot/compressed/Makefile +++ b/arch/arm/boot/compressed/Makefile @@ -37,6 +37,8 @@ ifeq ($(CONFIG_ARM_VIRT_EXT),y) OBJS += hyp-stub.o endif +GCOV_PROFILE := n + # # Architecture dependencies # diff --git a/kernel/gcov/Kconfig b/kernel/gcov/Kconfig index d4da55d1fb65..fceac858cafe 100644 --- a/kernel/gcov/Kconfig +++ b/kernel/gcov/Kconfig @@ -35,7 +35,7 @@ config GCOV_KERNEL config GCOV_PROFILE_ALL bool "Profile entire Kernel" depends on GCOV_KERNEL - depends on SUPERH || S390 || X86 || PPC || MICROBLAZE + depends on SUPERH || S390 || X86 || PPC || MICROBLAZE || ARM default n ---help--- This options activates profiling for the entire kernel. From addd567d85e1863dfd8bcb733fffb271af206440 Mon Sep 17 00:00:00 2001 From: Riku Voipio Date: Wed, 29 Oct 2014 14:50:24 -0700 Subject: [PATCH 138/204] gcov: add ARM64 to GCOV_PROFILE_ALL Following up the arm testing of gcov, turns out gcov on ARM64 works fine as well. Only change needed is adding ARM64 to Kconfig depends. Tested with qemu and mach-virt Signed-off-by: Riku Voipio Acked-by: Peter Oberparleiter Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds (cherry picked from commit f601de204465048bdf0d5537f630729622ebc3a6) Signed-off-by: Mark Brown --- kernel/gcov/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/gcov/Kconfig b/kernel/gcov/Kconfig index fceac858cafe..9da70574dbcc 100644 --- a/kernel/gcov/Kconfig +++ b/kernel/gcov/Kconfig @@ -35,7 +35,7 @@ config GCOV_KERNEL config GCOV_PROFILE_ALL bool "Profile entire Kernel" depends on GCOV_KERNEL - depends on SUPERH || S390 || X86 || PPC || MICROBLAZE || ARM + depends on SUPERH || S390 || X86 || PPC || MICROBLAZE || ARM || ARM64 default n ---help--- This options activates profiling for the entire kernel. From 9c6152e67952d3645e9379b6b3e120e3308caca0 Mon Sep 17 00:00:00 2001 From: Frantisek Hrbata Date: Tue, 12 Nov 2013 15:11:27 -0800 Subject: [PATCH 139/204] gcov: compile specific gcov implementation based on gcc version Compile the correct gcov implementation file for the specific gcc version. Signed-off-by: Frantisek Hrbata Cc: Jan Stancek Cc: Kees Cook Acked-by: Peter Oberparleiter Cc: Rusty Russell Cc: Arnd Bergmann Cc: Andy Gospodarek Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds (cherry picked from commit 17c568d60af5a810208baf116dc174a2005c6c3e) Signed-off-by: Mark Brown --- Documentation/gcov.txt | 4 ++++ kernel/gcov/Kconfig | 30 ++++++++++++++++++++++++++++++ kernel/gcov/Makefile | 32 +++++++++++++++++++++++++++++++- 3 files changed, 65 insertions(+), 1 deletion(-) diff --git a/Documentation/gcov.txt b/Documentation/gcov.txt index e7ca6478cd93..7b727783db7e 100644 --- a/Documentation/gcov.txt +++ b/Documentation/gcov.txt @@ -50,6 +50,10 @@ Configure the kernel with: CONFIG_DEBUG_FS=y CONFIG_GCOV_KERNEL=y +select the gcc's gcov format, default is autodetect based on gcc version: + + CONFIG_GCOV_FORMAT_AUTODETECT=y + and to get coverage data for the entire kernel: CONFIG_GCOV_PROFILE_ALL=y diff --git a/kernel/gcov/Kconfig b/kernel/gcov/Kconfig index 9da70574dbcc..3b7408759bdf 100644 --- a/kernel/gcov/Kconfig +++ b/kernel/gcov/Kconfig @@ -46,4 +46,34 @@ config GCOV_PROFILE_ALL larger and run slower. Also be sure to exclude files from profiling which are not linked to the kernel image to prevent linker errors. +choice + prompt "Specify GCOV format" + depends on GCOV_KERNEL + default GCOV_FORMAT_AUTODETECT + ---help--- + The gcov format is usually determined by the GCC version, but there are + exceptions where format changes are integrated in lower-version GCCs. + In such a case use this option to adjust the format used in the kernel + accordingly. + + If unsure, choose "Autodetect". + +config GCOV_FORMAT_AUTODETECT + bool "Autodetect" + ---help--- + Select this option to use the format that corresponds to your GCC + version. + +config GCOV_FORMAT_3_4 + bool "GCC 3.4 format" + ---help--- + Select this option to use the format defined by GCC 3.4. + +config GCOV_FORMAT_4_7 + bool "GCC 4.7 format" + ---help--- + Select this option to use the format defined by GCC 4.7. + +endchoice + endmenu diff --git a/kernel/gcov/Makefile b/kernel/gcov/Makefile index e97ca59e2520..52aa7e8de927 100644 --- a/kernel/gcov/Makefile +++ b/kernel/gcov/Makefile @@ -1,3 +1,33 @@ ccflags-y := -DSRCTREE='"$(srctree)"' -DOBJTREE='"$(objtree)"' -obj-$(CONFIG_GCOV_KERNEL) := base.o fs.o gcc_3_4.o +# if-lt +# Usage VAR := $(call if-lt, $(a), $(b)) +# Returns 1 if (a < b) +if-lt = $(shell [ $(1) -lt $(2) ] && echo 1) + +ifeq ($(CONFIG_GCOV_FORMAT_3_4),y) + cc-ver := 0304 +else ifeq ($(CONFIG_GCOV_FORMAT_4_7),y) + cc-ver := 0407 +else +# Use cc-version if available, otherwise set 0 +# +# scripts/Kbuild.include, which contains cc-version function, is not included +# during make clean "make -f scripts/Makefile.clean obj=kernel/gcov" +# Meaning cc-ver is empty causing if-lt test to fail with +# "/bin/sh: line 0: [: -lt: unary operator expected" error mesage. +# This has no affect on the clean phase, but the error message could be +# confusing/annoying. So this dummy workaround sets cc-ver to zero if cc-version +# is not available. We can probably move if-lt to Kbuild.include, so it's also +# not defined during clean or to include Kbuild.include in +# scripts/Makefile.clean. But the following workaround seems least invasive. + cc-ver := $(if $(call cc-version),$(call cc-version),0) +endif + +obj-$(CONFIG_GCOV_KERNEL) := base.o fs.o + +ifeq ($(call if-lt, $(cc-ver), 0407),1) + obj-$(CONFIG_GCOV_KERNEL) += gcc_3_4.o +else + obj-$(CONFIG_GCOV_KERNEL) += gcc_4_7.o +endif From eb05fc8193e9632a1910c2a2dedbcaf88a5a361f Mon Sep 17 00:00:00 2001 From: Frantisek Hrbata Date: Mon, 14 Oct 2013 18:08:46 +1030 Subject: [PATCH 140/204] kernel: add support for init_array constructors This adds the .init_array section as yet another section with constructors. This is needed because gcc could add __gcov_init calls to .init_array or .ctors section, depending on gcc (and binutils) version . v2: - reuse mod->ctors for .init_array section for modules, because gcc uses .ctors or .init_array, but not both at the same time v3: - fail to load if that does happen somehow. Signed-off-by: Frantisek Hrbata Signed-off-by: Rusty Russell --- include/asm-generic/vmlinux.lds.h | 1 + kernel/module.c | 20 ++++++++++++++++++-- 2 files changed, 19 insertions(+), 2 deletions(-) diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index eb58d2d7d971..4b0f0121d0ab 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -495,6 +495,7 @@ #define KERNEL_CTORS() . = ALIGN(8); \ VMLINUX_SYMBOL(__ctors_start) = .; \ *(.ctors) \ + *(.init_array) \ VMLINUX_SYMBOL(__ctors_end) = .; #else #define KERNEL_CTORS() diff --git a/kernel/module.c b/kernel/module.c index cab4bce49c23..4cdf280c892b 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -2723,7 +2723,7 @@ static int check_modinfo(struct module *mod, struct load_info *info, int flags) return 0; } -static void find_module_sections(struct module *mod, struct load_info *info) +static int find_module_sections(struct module *mod, struct load_info *info) { mod->kp = section_objs(info, "__param", sizeof(*mod->kp), &mod->num_kp); @@ -2753,6 +2753,18 @@ static void find_module_sections(struct module *mod, struct load_info *info) #ifdef CONFIG_CONSTRUCTORS mod->ctors = section_objs(info, ".ctors", sizeof(*mod->ctors), &mod->num_ctors); + if (!mod->ctors) + mod->ctors = section_objs(info, ".init_array", + sizeof(*mod->ctors), &mod->num_ctors); + else if (find_sec(info, ".init_array")) { + /* + * This shouldn't happen with same compiler and binutils + * building all parts of the module. + */ + printk(KERN_WARNING "%s: has both .ctors and .init_array.\n", + mod->name); + return -EINVAL; + } #endif #ifdef CONFIG_TRACEPOINTS @@ -2791,6 +2803,8 @@ static void find_module_sections(struct module *mod, struct load_info *info) info->debug = section_objs(info, "__verbose", sizeof(*info->debug), &info->num_debug); + + return 0; } static int move_module(struct module *mod, struct load_info *info) @@ -3244,7 +3258,9 @@ static int load_module(struct load_info *info, const char __user *uargs, /* Now we've got everything in the final locations, we can * find optional sections. */ - find_module_sections(mod, info); + err = find_module_sections(mod, info); + if (err) + goto free_unload; err = check_module_license_and_versions(mod); if (err) From 3d95e2f8f6c3666ffb3f5aee405ecc15a14ea570 Mon Sep 17 00:00:00 2001 From: Riku Voipio Date: Fri, 12 Dec 2014 16:57:44 -0800 Subject: [PATCH 141/204] gcov: enable GCOV_PROFILE_ALL from ARCH Kconfigs Following the suggestions from Andrew Morton and Stephen Rothwell, Dont expand the ARCH list in kernel/gcov/Kconfig. Instead, define a ARCH_HAS_GCOV_PROFILE_ALL bool which architectures can enable. set ARCH_HAS_GCOV_PROFILE_ALL on Architectures where it was previously allowed + ARM64 which I tested. Signed-off-by: Riku Voipio Cc: Peter Oberparleiter Cc: Stephen Rothwell Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds (cherry picked from commit 957e3facd147510f2cf8780e38606f1d707f0e33) Signed-off-by: Mark Brown Conflicts: arch/arm/Kconfig arch/arm64/Kconfig arch/microblaze/Kconfig arch/s390/Kconfig arch/x86/Kconfig --- arch/arm/Kconfig | 1 + arch/arm64/Kconfig | 1 + arch/microblaze/Kconfig | 1 + arch/powerpc/Kconfig | 1 + arch/s390/Kconfig | 1 + arch/sh/Kconfig | 1 + arch/x86/Kconfig | 1 + kernel/gcov/Kconfig | 5 ++++- 8 files changed, 11 insertions(+), 1 deletion(-) diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index 136f263ed47b..143846a2c517 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -5,6 +5,7 @@ config ARM select ARCH_HAS_ATOMIC64_DEC_IF_POSITIVE select ARCH_HAVE_CUSTOM_GPIO_H select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST + select ARCH_HAS_GCOV_PROFILE_ALL select ARCH_WANT_IPC_PARSE_VERSION select BUILDTIME_EXTABLE_SORT if MMU select CPU_PM if (SUSPEND || CPU_IDLE) diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 56b3f6d447ae..177723e0fee0 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -1,6 +1,7 @@ config ARM64 def_bool y select ARCH_HAS_ATOMIC64_DEC_IF_POSITIVE + select ARCH_HAS_GCOV_PROFILE_ALL select ARCH_WANT_OPTIONAL_GPIOLIB select ARCH_WANT_COMPAT_IPC_PARSE_VERSION select ARCH_WANT_FRAME_POINTERS diff --git a/arch/microblaze/Kconfig b/arch/microblaze/Kconfig index d22a4ecffff4..2199bfbd4050 100644 --- a/arch/microblaze/Kconfig +++ b/arch/microblaze/Kconfig @@ -1,5 +1,6 @@ config MICROBLAZE def_bool y + select ARCH_HAS_GCOV_PROFILE_ALL select HAVE_MEMBLOCK select HAVE_MEMBLOCK_NODE_MAP select HAVE_FUNCTION_TRACER diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index c33e3ad2c8fd..661d77c7fd55 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -126,6 +126,7 @@ config PPC select HAVE_BPF_JIT if PPC64 select HAVE_ARCH_JUMP_LABEL select ARCH_HAVE_NMI_SAFE_CMPXCHG + select ARCH_HAS_GCOV_PROFILE_ALL select GENERIC_SMP_IDLE_THREAD select GENERIC_CMOS_UPDATE select GENERIC_TIME_VSYSCALL_OLD diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index da183c5a103c..b0a78940b7c4 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -62,6 +62,7 @@ config S390 def_bool y select ARCH_DISCARD_MEMBLOCK select ARCH_HAS_ATOMIC64_DEC_IF_POSITIVE + select ARCH_HAS_GCOV_PROFILE_ALL select ARCH_HAVE_NMI_SAFE_CMPXCHG select ARCH_INLINE_READ_LOCK select ARCH_INLINE_READ_LOCK_BH diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig index 8c868cf2cf93..69c567f98f82 100644 --- a/arch/sh/Kconfig +++ b/arch/sh/Kconfig @@ -15,6 +15,7 @@ config SUPERH select HAVE_DEBUG_BUGVERBOSE select ARCH_HAVE_CUSTOM_GPIO_H select ARCH_HAVE_NMI_SAFE_CMPXCHG if (GUSA_RB || CPU_SH4A) + select ARCH_HAS_GCOV_PROFILE_ALL select PERF_USE_VMALLOC select HAVE_DEBUG_KMEMLEAK select HAVE_KERNEL_GZIP diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index fe120da25625..3317b3c83f6d 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -21,6 +21,7 @@ config X86_64 config X86 def_bool y select ARCH_HAS_DEBUG_STRICT_USER_COPY_CHECKS + select ARCH_HAS_GCOV_PROFILE_ALL select HAVE_AOUT if X86_32 select HAVE_UNSTABLE_SCHED_CLOCK select ARCH_SUPPORTS_NUMA_BALANCING diff --git a/kernel/gcov/Kconfig b/kernel/gcov/Kconfig index 3b7408759bdf..c92e44855ddd 100644 --- a/kernel/gcov/Kconfig +++ b/kernel/gcov/Kconfig @@ -32,10 +32,13 @@ config GCOV_KERNEL Note that the debugfs filesystem has to be mounted to access profiling data. +config ARCH_HAS_GCOV_PROFILE_ALL + def_bool n + config GCOV_PROFILE_ALL bool "Profile entire Kernel" depends on GCOV_KERNEL - depends on SUPERH || S390 || X86 || PPC || MICROBLAZE || ARM || ARM64 + depends on ARCH_HAS_GCOV_PROFILE_ALL default n ---help--- This options activates profiling for the entire kernel. From 1671763b751e35debe0d0e5b3877e393c4e6ec97 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Thu, 5 Feb 2015 18:44:04 +0100 Subject: [PATCH 142/204] rtnetlink: ifla_vf_policy: fix misuses of NLA_BINARY [ Upstream commit 364d5716a7adb91b731a35765d369602d68d2881 ] ifla_vf_policy[] is wrong in advertising its individual member types as NLA_BINARY since .type = NLA_BINARY in combination with .len declares the len member as *max* attribute length [0, len]. The issue is that when do_setvfinfo() is being called to set up a VF through ndo handler, we could set corrupted data if the attribute length is less than the size of the related structure itself. The intent is exactly the opposite, namely to make sure to pass at least data of minimum size of len. Fixes: ebc08a6f47ee ("rtnetlink: Add VF config code to rtnetlink") Cc: Mitch Williams Cc: Jeff Kirsher Signed-off-by: Daniel Borkmann Acked-by: Thomas Graf Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/core/rtnetlink.c | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 279b5dcf09ae..72fd097c3fc6 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -1138,14 +1138,10 @@ static const struct nla_policy ifla_vfinfo_policy[IFLA_VF_INFO_MAX+1] = { }; static const struct nla_policy ifla_vf_policy[IFLA_VF_MAX+1] = { - [IFLA_VF_MAC] = { .type = NLA_BINARY, - .len = sizeof(struct ifla_vf_mac) }, - [IFLA_VF_VLAN] = { .type = NLA_BINARY, - .len = sizeof(struct ifla_vf_vlan) }, - [IFLA_VF_TX_RATE] = { .type = NLA_BINARY, - .len = sizeof(struct ifla_vf_tx_rate) }, - [IFLA_VF_SPOOFCHK] = { .type = NLA_BINARY, - .len = sizeof(struct ifla_vf_spoofchk) }, + [IFLA_VF_MAC] = { .len = sizeof(struct ifla_vf_mac) }, + [IFLA_VF_VLAN] = { .len = sizeof(struct ifla_vf_vlan) }, + [IFLA_VF_TX_RATE] = { .len = sizeof(struct ifla_vf_tx_rate) }, + [IFLA_VF_SPOOFCHK] = { .len = sizeof(struct ifla_vf_spoofchk) }, }; static const struct nla_policy ifla_port_policy[IFLA_PORT_MAX+1] = { From e5fc2a02354d8aa64e435e0240fb2b94f9edcc2a Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Thu, 12 Feb 2015 16:14:08 -0800 Subject: [PATCH 143/204] ipv6: fix ipv6_cow_metrics for non DST_HOST case [ Upstream commit 3b4711757d7903ab6fa88a9e7ab8901b8227da60 ] ipv6_cow_metrics() currently assumes only DST_HOST routes require dynamic metrics allocation from inetpeer. The assumption breaks when ndisc discovered router with RTAX_MTU and RTAX_HOPLIMIT metric. Refer to ndisc_router_discovery() in ndisc.c and note that dst_metric_set() is called after the route is created. This patch creates the metrics array (by calling dst_cow_metrics_generic) in ipv6_cow_metrics(). Test: radvd.conf: interface qemubr0 { AdvLinkMTU 1300; AdvCurHopLimit 30; prefix fd00:face:face:face::/64 { AdvOnLink on; AdvAutonomous on; AdvRouterAddr off; }; }; Before: [root@qemu1 ~]# ip -6 r show | egrep -v unreachable fd00:face:face:face::/64 dev eth0 proto kernel metric 256 expires 27sec fe80::/64 dev eth0 proto kernel metric 256 default via fe80::74df:d0ff:fe23:8ef2 dev eth0 proto ra metric 1024 expires 27sec After: [root@qemu1 ~]# ip -6 r show | egrep -v unreachable fd00:face:face:face::/64 dev eth0 proto kernel metric 256 expires 27sec mtu 1300 fe80::/64 dev eth0 proto kernel metric 256 mtu 1300 default via fe80::74df:d0ff:fe23:8ef2 dev eth0 proto ra metric 1024 expires 27sec mtu 1300 hoplimit 30 Fixes: 8e2ec639173f325 (ipv6: don't use inetpeer to store metrics for routes.) Signed-off-by: Martin KaFai Lau Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv6/route.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 92274796eb71..d94d224f7e68 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -109,7 +109,7 @@ static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old) u32 *p = NULL; if (!(rt->dst.flags & DST_HOST)) - return NULL; + return dst_cow_metrics_generic(dst, old); peer = rt6_get_peer_create(rt); if (peer) { From 140b057ced8f43b93c4d934eb94661782155f683 Mon Sep 17 00:00:00 2001 From: WANG Cong Date: Fri, 13 Feb 2015 13:56:53 -0800 Subject: [PATCH 144/204] rtnetlink: call ->dellink on failure when ->newlink exists [ Upstream commit 7afb8886a05be68e376655539a064ec672de8a8e ] Ignacy reported that when eth0 is down and add a vlan device on top of it like: ip link add link eth0 name eth0.1 up type vlan id 1 We will get a refcount leak: unregister_netdevice: waiting for eth0.1 to become free. Usage count = 2 The problem is when rtnl_configure_link() fails in rtnl_newlink(), we simply call unregister_device(), but for stacked device like vlan, we almost do nothing when we unregister the upper device, more work is done when we unregister the lower device, so call its ->dellink(). Reported-by: Ignacy Gawedzki Signed-off-by: Cong Wang Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/core/rtnetlink.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 72fd097c3fc6..a67310e00b3f 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -1851,8 +1851,16 @@ static int rtnl_newlink(struct sk_buff *skb, struct nlmsghdr *nlh) goto out; err = rtnl_configure_link(dev, ifm); - if (err < 0) - unregister_netdevice(dev); + if (err < 0) { + if (ops->newlink) { + LIST_HEAD(list_kill); + + ops->dellink(dev, &list_kill); + unregister_netdevice_many(&list_kill); + } else { + unregister_netdevice(dev); + } + } out: put_net(dest_net); return err; From 54c7f8978bfe00fdc80e44a0451a1f8f8b8de638 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ignacy=20Gaw=C4=99dzki?= Date: Fri, 13 Feb 2015 14:47:05 -0800 Subject: [PATCH 145/204] gen_stats.c: Duplicate xstats buffer for later use MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 1c4cff0cf55011792125b6041bc4e9713e46240f ] The gnet_stats_copy_app() function gets called, more often than not, with its second argument a pointer to an automatic variable in the caller's stack. Therefore, to avoid copying garbage afterwards when calling gnet_stats_finish_copy(), this data is better copied to a dynamically allocated memory that gets freed after use. [xiyou.wangcong@gmail.com: remove a useless kfree()] Signed-off-by: Ignacy Gawędzki Signed-off-by: Cong Wang Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/core/gen_stats.c | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/net/core/gen_stats.c b/net/core/gen_stats.c index ddedf211e588..b96437b6e82b 100644 --- a/net/core/gen_stats.c +++ b/net/core/gen_stats.c @@ -32,6 +32,9 @@ gnet_stats_copy(struct gnet_dump *d, int type, void *buf, int size) return 0; nla_put_failure: + kfree(d->xstats); + d->xstats = NULL; + d->xstats_len = 0; spin_unlock_bh(d->lock); return -1; } @@ -205,7 +208,9 @@ int gnet_stats_copy_app(struct gnet_dump *d, void *st, int len) { if (d->compat_xstats) { - d->xstats = st; + d->xstats = kmemdup(st, len, GFP_ATOMIC); + if (!d->xstats) + goto err_out; d->xstats_len = len; } @@ -213,6 +218,11 @@ gnet_stats_copy_app(struct gnet_dump *d, void *st, int len) return gnet_stats_copy(d, TCA_STATS_APP, st, len); return 0; + +err_out: + d->xstats_len = 0; + spin_unlock_bh(d->lock); + return -1; } EXPORT_SYMBOL(gnet_stats_copy_app); @@ -245,6 +255,9 @@ gnet_stats_finish_copy(struct gnet_dump *d) return -1; } + kfree(d->xstats); + d->xstats = NULL; + d->xstats_len = 0; spin_unlock_bh(d->lock); return 0; } From e3569bbff393a7a89a42d72ac240a09b8f21ee4f Mon Sep 17 00:00:00 2001 From: Alexander Drozdov Date: Tue, 17 Feb 2015 13:33:46 +0300 Subject: [PATCH 146/204] ipv4: ip_check_defrag should correctly check return value of skb_copy_bits [ Upstream commit fba04a9e0c869498889b6445fd06cbe7da9bb834 ] skb_copy_bits() returns zero on success and negative value on error, so it is needed to invert the condition in ip_check_defrag(). Fixes: 1bf3751ec90c ("ipv4: ip_check_defrag must not modify skb before unsharing") Signed-off-by: Alexander Drozdov Acked-by: Eric Dumazet Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/ip_fragment.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index b66910aaef4d..5fff3d466e90 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c @@ -683,7 +683,7 @@ struct sk_buff *ip_check_defrag(struct sk_buff *skb, u32 user) if (skb->protocol != htons(ETH_P_IP)) return skb; - if (!skb_copy_bits(skb, 0, &iph, sizeof(iph))) + if (skb_copy_bits(skb, 0, &iph, sizeof(iph)) < 0) return skb; if (iph.ihl < 5 || iph.version != 4) From 4c274a9d02a9bba0f13f3a8c1f39b462aead4bd6 Mon Sep 17 00:00:00 2001 From: Alexander Drozdov Date: Thu, 5 Mar 2015 10:29:39 +0300 Subject: [PATCH 147/204] ipv4: ip_check_defrag should not assume that skb_network_offset is zero [ Upstream commit 3e32e733d1bbb3f227259dc782ef01d5706bdae0 ] ip_check_defrag() may be used by af_packet to defragment outgoing packets. skb_network_offset() of af_packet's outgoing packets is not zero. Signed-off-by: Alexander Drozdov Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/ip_fragment.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index 5fff3d466e90..4c1884fed548 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c @@ -678,27 +678,30 @@ EXPORT_SYMBOL(ip_defrag); struct sk_buff *ip_check_defrag(struct sk_buff *skb, u32 user) { struct iphdr iph; + int netoff; u32 len; if (skb->protocol != htons(ETH_P_IP)) return skb; - if (skb_copy_bits(skb, 0, &iph, sizeof(iph)) < 0) + netoff = skb_network_offset(skb); + + if (skb_copy_bits(skb, netoff, &iph, sizeof(iph)) < 0) return skb; if (iph.ihl < 5 || iph.version != 4) return skb; len = ntohs(iph.tot_len); - if (skb->len < len || len < (iph.ihl * 4)) + if (skb->len < netoff + len || len < (iph.ihl * 4)) return skb; if (ip_is_fragment(&iph)) { skb = skb_share_check(skb, GFP_ATOMIC); if (skb) { - if (!pskb_may_pull(skb, iph.ihl*4)) + if (!pskb_may_pull(skb, netoff + iph.ihl * 4)) return skb; - if (pskb_trim_rcsum(skb, len)) + if (pskb_trim_rcsum(skb, netoff + len)) return skb; memset(IPCB(skb), 0, sizeof(struct inet_skb_parm)); if (ip_defrag(skb, user)) From 65d6368f21038a4315d508923501283c5e0681a9 Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Tue, 17 Feb 2015 09:36:22 -0800 Subject: [PATCH 148/204] net: phy: Fix verification of EEE support in phy_init_eee [ Upstream commit 54da5a8be3c1e924c35480eb44c6e9b275f6444e ] phy_init_eee uses phy_find_setting(phydev->speed, phydev->duplex) to find a valid entry in the settings array for the given speed and duplex value. For full duplex 1000baseT, this will return the first matching entry, which is the entry for 1000baseKX_Full. If the phy eee does not support 1000baseKX_Full, this entry will not match, causing phy_init_eee to fail for no good reason. Fixes: 9a9c56cb34e6 ("net: phy: fix a bug when verify the EEE support") Fixes: 3e7077067e80c ("phy: Expand phy speed/duplex settings array") Cc: Giuseppe Cavallaro Signed-off-by: Guenter Roeck Acked-by: Florian Fainelli Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/phy/phy.c | 24 +++++++++++++++++++++--- 1 file changed, 21 insertions(+), 3 deletions(-) diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c index 38f0b312ff85..427e48a80efa 100644 --- a/drivers/net/phy/phy.c +++ b/drivers/net/phy/phy.c @@ -202,6 +202,25 @@ static inline int phy_find_valid(int idx, u32 features) return idx < MAX_NUM_SETTINGS ? idx : MAX_NUM_SETTINGS - 1; } +/** + * phy_check_valid - check if there is a valid PHY setting which matches + * speed, duplex, and feature mask + * @speed: speed to match + * @duplex: duplex to match + * @features: A mask of the valid settings + * + * Description: Returns true if there is a valid setting, false otherwise. + */ +static inline bool phy_check_valid(int speed, int duplex, u32 features) +{ + unsigned int idx; + + idx = phy_find_valid(phy_find_setting(speed, duplex), features); + + return settings[idx].speed == speed && settings[idx].duplex == duplex && + (settings[idx].setting & features); +} + /** * phy_sanitize_settings - make sure the PHY is set to supported speed and duplex * @phydev: the target phy_device struct @@ -1011,7 +1030,7 @@ int phy_init_eee(struct phy_device *phydev, bool clk_stop_enable) (phydev->interface == PHY_INTERFACE_MODE_RGMII))) { int eee_lp, eee_cap, eee_adv; u32 lp, cap, adv; - int idx, status; + int status; /* Read phy status to properly get the right settings */ status = phy_read_status(phydev); @@ -1043,8 +1062,7 @@ int phy_init_eee(struct phy_device *phydev, bool clk_stop_enable) adv = mmd_eee_adv_to_ethtool_adv_t(eee_adv); lp = mmd_eee_adv_to_ethtool_adv_t(eee_lp); - idx = phy_find_setting(phydev->speed, phydev->duplex); - if (!(lp & adv & settings[idx].setting)) + if (!phy_check_valid(phydev->speed, phydev->duplex, lp & adv)) goto eee_exit; if (clk_stop_enable) { From cae79d75dd639bca743b91d5532a09d90bc3492d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ignacy=20Gaw=C4=99dzki?= Date: Tue, 17 Feb 2015 20:15:20 +0100 Subject: [PATCH 149/204] ematch: Fix auto-loading of ematch modules. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 34eea79e2664b314cab6a30fc582fdfa7a1bb1df ] In tcf_em_validate(), after calling request_module() to load the kind-specific module, set em->ops to NULL before returning -EAGAIN, so that module_put() is not called again by tcf_em_tree_destroy(). Signed-off-by: Ignacy Gawędzki Acked-by: Cong Wang Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/sched/ematch.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/sched/ematch.c b/net/sched/ematch.c index 3a633debb6df..a2abc449ce8f 100644 --- a/net/sched/ematch.c +++ b/net/sched/ematch.c @@ -227,6 +227,7 @@ static int tcf_em_validate(struct tcf_proto *tp, * to replay the request. */ module_put(em->ops->owner); + em->ops = NULL; err = -EAGAIN; } #endif From cde81ed79fc1540a3c3e775515d72f441aef8e6c Mon Sep 17 00:00:00 2001 From: Matthew Thode Date: Tue, 17 Feb 2015 18:31:57 -0600 Subject: [PATCH 150/204] net: reject creation of netdev names with colons [ Upstream commit a4176a9391868bfa87705bcd2e3b49e9b9dd2996 ] colons are used as a separator in netdev device lookup in dev_ioctl.c Specific functions are SIOCGIFTXQLEN SIOCETHTOOL SIOCSIFNAME Signed-off-by: Matthew Thode Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/core/dev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/core/dev.c b/net/core/dev.c index c310440309bb..aeca8dd88b2a 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -927,7 +927,7 @@ bool dev_valid_name(const char *name) return false; while (*name) { - if (*name == '/' || isspace(*name)) + if (*name == '/' || *name == ':' || isspace(*name)) return false; name++; } From 55fde24a60ef618558fef13c2413afa5daf126df Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Mon, 23 Feb 2015 14:02:54 +0100 Subject: [PATCH 151/204] team: fix possible null pointer dereference in team_handle_frame [ Upstream commit 57e595631904c827cfa1a0f7bbd7cc9a49da5745 ] Currently following race is possible in team: CPU0 CPU1 team_port_del team_upper_dev_unlink priv_flags &= ~IFF_TEAM_PORT team_handle_frame team_port_get_rcu team_port_exists priv_flags & IFF_TEAM_PORT == 0 return NULL (instead of port got from rx_handler_data) netdev_rx_handler_unregister The thing is that the flag is removed before rx_handler is unregistered. If team_handle_frame is called in between, team_port_exists returns 0 and team_port_get_rcu will return NULL. So do not check the flag here. It is guaranteed by netdev_rx_handler_unregister that team_handle_frame will always see valid rx_handler_data pointer. Signed-off-by: Jiri Pirko Fixes: 3d249d4ca7d0 ("net: introduce ethernet teaming device") Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/team/team.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/net/team/team.c b/drivers/net/team/team.c index 12222290c802..44d4d2a5f6c8 100644 --- a/drivers/net/team/team.c +++ b/drivers/net/team/team.c @@ -42,9 +42,7 @@ static struct team_port *team_port_get_rcu(const struct net_device *dev) { - struct team_port *port = rcu_dereference(dev->rx_handler_data); - - return team_port_exists(dev) ? port : NULL; + return rcu_dereference(dev->rx_handler_data); } static struct team_port *team_port_get_rtnl(const struct net_device *dev) From 83d2de946101424e79335d0d72c9288344704065 Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Mon, 23 Feb 2015 18:12:56 +0000 Subject: [PATCH 152/204] net: compat: Ignore MSG_CMSG_COMPAT in compat_sys_{send, recv}msg [ Upstream commit d720d8cec563ce4e4fa44a613d4f2dcb1caf2998 ] With commit a7526eb5d06b (net: Unbreak compat_sys_{send,recv}msg), the MSG_CMSG_COMPAT flag is blocked at the compat syscall entry points, changing the kernel compat behaviour from the one before the commit it was trying to fix (1be374a0518a, net: Block MSG_CMSG_COMPAT in send(m)msg and recv(m)msg). On 32-bit kernels (!CONFIG_COMPAT), MSG_CMSG_COMPAT is 0 and the native 32-bit sys_sendmsg() allows flag 0x80000000 to be set (it is ignored by the kernel). However, on a 64-bit kernel, the compat ABI is different with commit a7526eb5d06b. This patch changes the compat_sys_{send,recv}msg behaviour to the one prior to commit 1be374a0518a. The problem was found running 32-bit LTP (sendmsg01) binary on an arm64 kernel. Arguably, LTP should not pass 0xffffffff as flags to sendmsg() but the general rule is not to break user ABI (even when the user behaviour is not entirely sane). Fixes: a7526eb5d06b (net: Unbreak compat_sys_{send,recv}msg) Cc: Andy Lutomirski Cc: David S. Miller Signed-off-by: Catalin Marinas Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/compat.c | 9 --------- 1 file changed, 9 deletions(-) diff --git a/net/compat.c b/net/compat.c index cbc1a2a26587..275af79c131b 100644 --- a/net/compat.c +++ b/net/compat.c @@ -738,24 +738,18 @@ static unsigned char nas[21] = { asmlinkage long compat_sys_sendmsg(int fd, struct compat_msghdr __user *msg, unsigned int flags) { - if (flags & MSG_CMSG_COMPAT) - return -EINVAL; return __sys_sendmsg(fd, (struct msghdr __user *)msg, flags | MSG_CMSG_COMPAT); } asmlinkage long compat_sys_sendmmsg(int fd, struct compat_mmsghdr __user *mmsg, unsigned int vlen, unsigned int flags) { - if (flags & MSG_CMSG_COMPAT) - return -EINVAL; return __sys_sendmmsg(fd, (struct mmsghdr __user *)mmsg, vlen, flags | MSG_CMSG_COMPAT); } asmlinkage long compat_sys_recvmsg(int fd, struct compat_msghdr __user *msg, unsigned int flags) { - if (flags & MSG_CMSG_COMPAT) - return -EINVAL; return __sys_recvmsg(fd, (struct msghdr __user *)msg, flags | MSG_CMSG_COMPAT); } @@ -778,9 +772,6 @@ asmlinkage long compat_sys_recvmmsg(int fd, struct compat_mmsghdr __user *mmsg, int datagrams; struct timespec ktspec; - if (flags & MSG_CMSG_COMPAT) - return -EINVAL; - if (timeout == NULL) return __sys_recvmmsg(fd, (struct mmsghdr __user *)mmsg, vlen, flags | MSG_CMSG_COMPAT, NULL); From a83e4448958bd0542012648411b868c5a4d57b33 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 27 Feb 2015 18:35:35 -0800 Subject: [PATCH 153/204] macvtap: make sure neighbour code can push ethernet header [ Upstream commit 2f1d8b9e8afa5a833d96afcd23abcb8cdf8d83ab ] Brian reported crashes using IPv6 traffic with macvtap/veth combo. I tracked the crashes in neigh_hh_output() -> memcpy(skb->data - HH_DATA_MOD, hh->hh_data, HH_DATA_MOD); Neighbour code assumes headroom to push Ethernet header is at least 16 bytes. It appears macvtap has only 14 bytes available on arches where NET_IP_ALIGN is 0 (like x86) Effect is a corruption of 2 bytes right before skb->head, and possible crashes if accessing non existing memory. This fix should also increase IPv4 performance, as paranoid code in ip_finish_output2() wont have to call skb_realloc_headroom() Reported-by: Brian Rak Tested-by: Brian Rak Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/macvtap.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/net/macvtap.c b/drivers/net/macvtap.c index 2d255ba911d5..8fc46fcaee54 100644 --- a/drivers/net/macvtap.c +++ b/drivers/net/macvtap.c @@ -658,12 +658,15 @@ static unsigned long iov_pages(const struct iovec *iv, int offset, return pages; } +/* Neighbour code has some assumptions on HH_DATA_MOD alignment */ +#define MACVTAP_RESERVE HH_DATA_OFF(ETH_HLEN) + /* Get packet from user space buffer */ static ssize_t macvtap_get_user(struct macvtap_queue *q, struct msghdr *m, const struct iovec *iv, unsigned long total_len, size_t count, int noblock) { - int good_linear = SKB_MAX_HEAD(NET_IP_ALIGN); + int good_linear = SKB_MAX_HEAD(MACVTAP_RESERVE); struct sk_buff *skb; struct macvlan_dev *vlan; unsigned long len = total_len; @@ -722,7 +725,7 @@ static ssize_t macvtap_get_user(struct macvtap_queue *q, struct msghdr *m, linear = vnet_hdr.hdr_len; } - skb = macvtap_alloc_skb(&q->sk, NET_IP_ALIGN, copylen, + skb = macvtap_alloc_skb(&q->sk, MACVTAP_RESERVE, copylen, linear, noblock, &err); if (!skb) goto err; From 538022e1ce05a1ebadfbd916413c1712593a1fdc Mon Sep 17 00:00:00 2001 From: Ben Shelton Date: Mon, 16 Feb 2015 13:47:06 -0600 Subject: [PATCH 154/204] usb: plusb: Add support for National Instruments host-to-host cable [ Upstream commit 42c972a1f390e3bc51ca1e434b7e28764992067f ] The National Instruments USB Host-to-Host Cable is based on the Prolific PL-25A1 chipset. Add its VID/PID so the plusb driver will recognize it. Signed-off-by: Ben Shelton Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/usb/plusb.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/net/usb/plusb.c b/drivers/net/usb/plusb.c index 0fcc8e65a068..74323e9d9004 100644 --- a/drivers/net/usb/plusb.c +++ b/drivers/net/usb/plusb.c @@ -136,6 +136,11 @@ static const struct usb_device_id products [] = { }, { USB_DEVICE(0x050d, 0x258a), /* Belkin F5U258/F5U279 (PL-25A1) */ .driver_info = (unsigned long) &prolific_info, +}, { + USB_DEVICE(0x3923, 0x7825), /* National Instruments USB + * Host-to-Host Cable + */ + .driver_info = (unsigned long) &prolific_info, }, { }, // END From 6b3130085888b4f1866d57dc19175bbd283a36a3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michal=20Kube=C4=8Dek?= Date: Mon, 2 Mar 2015 18:27:11 +0100 Subject: [PATCH 155/204] udp: only allow UFO for packets from SOCK_DGRAM sockets [ Upstream commit acf8dd0a9d0b9e4cdb597c2f74802f79c699e802 ] If an over-MTU UDP datagram is sent through a SOCK_RAW socket to a UFO-capable device, ip_ufo_append_data() sets skb->ip_summed to CHECKSUM_PARTIAL unconditionally as all GSO code assumes transport layer checksum is to be computed on segmentation. However, in this case, skb->csum_start and skb->csum_offset are never set as raw socket transmit path bypasses udp_send_skb() where they are usually set. As a result, driver may access invalid memory when trying to calculate the checksum and store the result (as observed in virtio_net driver). Moreover, the very idea of modifying the userspace provided UDP header is IMHO against raw socket semantics (I wasn't able to find a document clearly stating this or the opposite, though). And while allowing CHECKSUM_NONE in the UFO case would be more efficient, it would be a bit too intrusive change just to handle a corner case like this. Therefore disallowing UFO for packets from SOCK_DGRAM seems to be the best option. Signed-off-by: Michal Kubecek Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/ip_output.c | 3 ++- net/ipv6/ip6_output.c | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index def18547748e..57e745086302 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -845,7 +845,8 @@ static int __ip_append_data(struct sock *sk, cork->length += length; if (((length > mtu) || (skb && skb_has_frags(skb))) && (sk->sk_protocol == IPPROTO_UDP) && - (rt->dst.dev->features & NETIF_F_UFO) && !rt->dst.header_len) { + (rt->dst.dev->features & NETIF_F_UFO) && !rt->dst.header_len && + (sk->sk_type == SOCK_DGRAM)) { err = ip_ufo_append_data(sk, queue, getfrag, from, length, hh_len, fragheaderlen, transhdrlen, maxfraglen, flags); diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 071edcba4158..1ce7ea1f40b7 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -1286,7 +1286,8 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to, if (((length > mtu) || (skb && skb_has_frags(skb))) && (sk->sk_protocol == IPPROTO_UDP) && - (rt->dst.dev->features & NETIF_F_UFO)) { + (rt->dst.dev->features & NETIF_F_UFO) && + (sk->sk_type == SOCK_DGRAM)) { err = ip6_ufo_append_data(sk, getfrag, from, length, hh_len, fragheaderlen, transhdrlen, mtu, flags, rt); From fc46dcb4a9c90a52fe279cb2f9d124c8b19fb569 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Wed, 4 Mar 2015 08:36:31 +0100 Subject: [PATCH 156/204] team: don't traverse port list using rcu in team_set_mac_address [ Upstream commit 9215f437b85da339a7dfe3db6e288637406f88b2 ] Currently the list is traversed using rcu variant. That is not correct since dev_set_mac_address can be called which eventually calls rtmsg_ifinfo_build_skb and there, skb allocation can sleep. So fix this by remove the rcu usage here. Fixes: 3d249d4ca7 "net: introduce ethernet teaming device" Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/team/team.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/team/team.c b/drivers/net/team/team.c index 44d4d2a5f6c8..14179a6593ef 100644 --- a/drivers/net/team/team.c +++ b/drivers/net/team/team.c @@ -1521,11 +1521,11 @@ static int team_set_mac_address(struct net_device *dev, void *p) if (dev->type == ARPHRD_ETHER && !is_valid_ether_addr(addr->sa_data)) return -EADDRNOTAVAIL; memcpy(dev->dev_addr, addr->sa_data, dev->addr_len); - rcu_read_lock(); - list_for_each_entry_rcu(port, &team->port_list, list) + mutex_lock(&team->lock); + list_for_each_entry(port, &team->port_list, list) if (team->ops.port_change_dev_addr) team->ops.port_change_dev_addr(team, port); - rcu_read_unlock(); + mutex_unlock(&team->lock); return 0; } From 1a25fb791ab61358715554597701d7d708be9c63 Mon Sep 17 00:00:00 2001 From: Naoya Horiguchi Date: Wed, 11 Feb 2015 15:25:32 -0800 Subject: [PATCH 157/204] mm/hugetlb: add migration entry check in __unmap_hugepage_range commit 9fbc1f635fd0bd28cb32550211bf095753ac637a upstream. If __unmap_hugepage_range() tries to unmap the address range over which hugepage migration is on the way, we get the wrong page because pte_page() doesn't work for migration entries. This patch simply clears the pte for migration entries as we do for hwpoison entries. Fixes: 290408d4a2 ("hugetlb: hugepage migration core") Signed-off-by: Naoya Horiguchi Cc: Hugh Dickins Cc: James Hogan Cc: David Rientjes Cc: Mel Gorman Cc: Johannes Weiner Cc: Michal Hocko Cc: Rik van Riel Cc: Andrea Arcangeli Cc: Luiz Capitulino Cc: Nishanth Aravamudan Cc: Lee Schermerhorn Cc: Steve Capper Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- mm/hugetlb.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 7de4f67c81fe..d9bc87ca062b 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -2451,9 +2451,10 @@ void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct *vma, continue; /* - * HWPoisoned hugepage is already unmapped and dropped reference + * Migrating hugepage or HWPoisoned hugepage is already + * unmapped and its refcount is dropped, so just clear pte here. */ - if (unlikely(is_hugetlb_entry_hwpoisoned(pte))) { + if (unlikely(!pte_present(pte))) { huge_pte_clear(mm, address, ptep); continue; } From 992f1caea7af5c94e56bb7de089848470e1c000a Mon Sep 17 00:00:00 2001 From: Roman Gushchin Date: Wed, 11 Feb 2015 15:28:39 -0800 Subject: [PATCH 158/204] mm/mmap.c: fix arithmetic overflow in __vm_enough_memory() commit 5703b087dc8eaf47bfb399d6cf512d471beff405 upstream. I noticed, that "allowed" can easily overflow by falling below 0, because (total_vm / 32) can be larger than "allowed". The problem occurs in OVERCOMMIT_NONE mode. In this case, a huge allocation can success and overcommit the system (despite OVERCOMMIT_NONE mode). All subsequent allocations will fall (system-wide), so system become unusable. The problem was masked out by commit c9b1d0981fcc ("mm: limit growth of 3% hardcoded other user reserve"), but it's easy to reproduce it on older kernels: 1) set overcommit_memory sysctl to 2 2) mmap() large file multiple times (with VM_SHARED flag) 3) try to malloc() large amount of memory It also can be reproduced on newer kernels, but miss-configured sysctl_user_reserve_kbytes is required. Fix this issue by switching to signed arithmetic here. [akpm@linux-foundation.org: use min_t] Signed-off-by: Roman Gushchin Cc: Andrew Shewmaker Cc: Rik van Riel Cc: Konstantin Khlebnikov Reviewed-by: Michal Hocko Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- mm/mmap.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mm/mmap.c b/mm/mmap.c index 43a7089c6a7c..70ff9b41c970 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -127,7 +127,7 @@ EXPORT_SYMBOL_GPL(vm_memory_committed); */ int __vm_enough_memory(struct mm_struct *mm, long pages, int cap_sys_admin) { - unsigned long free, allowed, reserve; + long free, allowed, reserve; vm_acct_memory(pages); @@ -193,7 +193,7 @@ int __vm_enough_memory(struct mm_struct *mm, long pages, int cap_sys_admin) */ if (mm) { reserve = sysctl_user_reserve_kbytes >> (PAGE_SHIFT - 10); - allowed -= min(mm->total_vm / 32, reserve); + allowed -= min_t(long, mm->total_vm / 32, reserve); } if (percpu_counter_read_positive(&vm_committed_as) < allowed) From ae9c2f1fe9a11848a29f04e67940220a3985bfee Mon Sep 17 00:00:00 2001 From: Roman Gushchin Date: Wed, 11 Feb 2015 15:28:42 -0800 Subject: [PATCH 159/204] mm/nommu.c: fix arithmetic overflow in __vm_enough_memory() commit 8138a67a5557ffea3a21dfd6f037842d4e748513 upstream. I noticed that "allowed" can easily overflow by falling below 0, because (total_vm / 32) can be larger than "allowed". The problem occurs in OVERCOMMIT_NONE mode. In this case, a huge allocation can success and overcommit the system (despite OVERCOMMIT_NONE mode). All subsequent allocations will fall (system-wide), so system become unusable. The problem was masked out by commit c9b1d0981fcc ("mm: limit growth of 3% hardcoded other user reserve"), but it's easy to reproduce it on older kernels: 1) set overcommit_memory sysctl to 2 2) mmap() large file multiple times (with VM_SHARED flag) 3) try to malloc() large amount of memory It also can be reproduced on newer kernels, but miss-configured sysctl_user_reserve_kbytes is required. Fix this issue by switching to signed arithmetic here. Signed-off-by: Roman Gushchin Cc: Andrew Shewmaker Cc: Rik van Riel Cc: Konstantin Khlebnikov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- mm/nommu.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mm/nommu.c b/mm/nommu.c index 298884dcd6e7..d9d07a5d2318 100644 --- a/mm/nommu.c +++ b/mm/nommu.c @@ -1898,7 +1898,7 @@ EXPORT_SYMBOL(unmap_mapping_range); */ int __vm_enough_memory(struct mm_struct *mm, long pages, int cap_sys_admin) { - unsigned long free, allowed, reserve; + long free, allowed, reserve; vm_acct_memory(pages); @@ -1963,7 +1963,7 @@ int __vm_enough_memory(struct mm_struct *mm, long pages, int cap_sys_admin) */ if (mm) { reserve = sysctl_user_reserve_kbytes >> (PAGE_SHIFT - 10); - allowed -= min(mm->total_vm / 32, reserve); + allowed -= min_t(long, mm->total_vm / 32, reserve); } if (percpu_counter_read_positive(&vm_committed_as) < allowed) From 2295074e4465734d4cb83cba15e055b4b2a87737 Mon Sep 17 00:00:00 2001 From: Joonsoo Kim Date: Thu, 12 Feb 2015 14:59:50 -0800 Subject: [PATCH 160/204] mm/compaction: fix wrong order check in compact_finished() commit 372549c2a3778fd3df445819811c944ad54609ca upstream. What we want to check here is whether there is highorder freepage in buddy list of other migratetype in order to steal it without fragmentation. But, current code just checks cc->order which means allocation request order. So, this is wrong. Without this fix, non-movable synchronous compaction below pageblock order would not stopped until compaction is complete, because migratetype of most pageblocks are movable and high order freepage made by compaction is usually on movable type buddy list. There is some report related to this bug. See below link. http://www.spinics.net/lists/linux-mm/msg81666.html Although the issued system still has load spike comes from compaction, this makes that system completely stable and responsive according to his report. stress-highalloc test in mmtests with non movable order 7 allocation doesn't show any notable difference in allocation success rate, but, it shows more compaction success rate. Compaction success rate (Compaction success * 100 / Compaction stalls, %) 18.47 : 28.94 Fixes: 1fb3f8ca0e92 ("mm: compaction: capture a suitable high-order page immediately when it is made available") Signed-off-by: Joonsoo Kim Acked-by: Vlastimil Babka Reviewed-by: Zhang Yanfei Cc: Mel Gorman Cc: David Rientjes Cc: Rik van Riel Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- mm/compaction.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/compaction.c b/mm/compaction.c index fb797a32362f..eeaaa929d1de 100644 --- a/mm/compaction.c +++ b/mm/compaction.c @@ -881,7 +881,7 @@ static int compact_finished(struct zone *zone, return COMPACT_PARTIAL; /* Job done if allocation would set block type */ - if (cc->order >= pageblock_order && area->nr_free) + if (order >= pageblock_order && area->nr_free) return COMPACT_PARTIAL; } From 9113c468b621ddb74f2395564720862faa3a083d Mon Sep 17 00:00:00 2001 From: Grazvydas Ignotas Date: Thu, 12 Feb 2015 15:00:19 -0800 Subject: [PATCH 161/204] mm/memory.c: actually remap enough memory commit 9cb12d7b4ccaa976f97ce0c5fd0f1b6a83bc2a75 upstream. For whatever reason, generic_access_phys() only remaps one page, but actually allows to access arbitrary size. It's quite easy to trigger large reads, like printing out large structure with gdb, which leads to a crash. Fix it by remapping correct size. Fixes: 28b2ee20c7cb ("access_process_vm device memory infrastructure") Signed-off-by: Grazvydas Ignotas Cc: Rik van Riel Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- mm/memory.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/memory.c b/mm/memory.c index 8b2d75f61b32..04232bb173f0 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -4088,7 +4088,7 @@ int generic_access_phys(struct vm_area_struct *vma, unsigned long addr, if (follow_phys(vma, addr, write, &prot, &phys_addr)) return -EINVAL; - maddr = ioremap_prot(phys_addr, PAGE_SIZE, prot); + maddr = ioremap_prot(phys_addr, PAGE_ALIGN(len + offset), prot); if (write) memcpy_toio(maddr + offset, buf, len); else From 84ba11a6ee549b2727a3b83d3c1b455df1c7ebcd Mon Sep 17 00:00:00 2001 From: Nicholas Bellinger Date: Fri, 13 Feb 2015 22:27:40 +0000 Subject: [PATCH 162/204] target: Check for LBA + sectors wrap-around in sbc_parse_cdb commit aa179935edea9a64dec4b757090c8106a3907ffa upstream. This patch adds a check to sbc_parse_cdb() in order to detect when an LBA + sector vs. end-of-device calculation wraps when the LBA is sufficently large enough (eg: 0xFFFFFFFFFFFFFFFF). Cc: Martin Petersen Cc: Christoph Hellwig Signed-off-by: Nicholas Bellinger Signed-off-by: Greg Kroah-Hartman --- drivers/target/target_core_sbc.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/target/target_core_sbc.c b/drivers/target/target_core_sbc.c index 92e6c510e5d0..70b0d265c37d 100644 --- a/drivers/target/target_core_sbc.c +++ b/drivers/target/target_core_sbc.c @@ -562,7 +562,8 @@ sbc_parse_cdb(struct se_cmd *cmd, struct sbc_ops *ops) unsigned long long end_lba; end_lba = dev->transport->get_blocks(dev) + 1; - if (cmd->t_task_lba + sectors > end_lba) { + if (((cmd->t_task_lba + sectors) < cmd->t_task_lba) || + ((cmd->t_task_lba + sectors) > end_lba)) { pr_err("cmd exceeds last lba %llu " "(lba %llu, sectors %u)\n", end_lba, cmd->t_task_lba, sectors); From 8dbaea2b3d24e3a77e6a853ff76c19c5a6052f4e Mon Sep 17 00:00:00 2001 From: Michael Scott Date: Tue, 10 Mar 2015 13:15:02 -0700 Subject: [PATCH 163/204] PM / QoS: remove duplicate call to pm_qos_update_target In 3.10.y backport patch 1dba303727f52ea062580b0a9b3f0c3b462769cf, the logic to call pm_qos_update_target was moved to __pm_qos_update_request. However, the original code was left in function pm_qos_update_request. Currently, if pm_qos_update_request is called where new_value != req->node.prio then pm_qos_update_target will be called twice in a row. Once in pm_qos_update_request and then again in the following call to _pm_qos_update_request. Removing the left over code from pm_qos_update_request stops this second call to pm_qos_update_target where the work of removing / re-adding the new_value in the constraints list would be duplicated. Signed-off-by: Michael Scott Signed-off-by: Greg Kroah-Hartman --- kernel/power/qos.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/kernel/power/qos.c b/kernel/power/qos.c index 25cf89bc659e..8703fc729fb4 100644 --- a/kernel/power/qos.c +++ b/kernel/power/qos.c @@ -369,12 +369,6 @@ void pm_qos_update_request(struct pm_qos_request *req, } cancel_delayed_work_sync(&req->work); - - if (new_value != req->node.prio) - pm_qos_update_target( - pm_qos_array[req->pm_qos_class]->constraints, - &req->node, PM_QOS_UPDATE_REQ, new_value); - __pm_qos_update_request(req, new_value); } EXPORT_SYMBOL_GPL(pm_qos_update_request); From 22e764ee4bafa7dbf5edd2580de006e32e671e93 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Thu, 5 Mar 2015 01:09:44 +0100 Subject: [PATCH 164/204] x86/asm/entry/64: Remove a bogus 'ret_from_fork' optimization commit 956421fbb74c3a6261903f3836c0740187cf038b upstream. 'ret_from_fork' checks TIF_IA32 to determine whether 'pt_regs' and the related state make sense for 'ret_from_sys_call'. This is entirely the wrong check. TS_COMPAT would make a little more sense, but there's really no point in keeping this optimization at all. This fixes a return to the wrong user CS if we came from int 0x80 in a 64-bit task. Signed-off-by: Andy Lutomirski Cc: Borislav Petkov Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/4710be56d76ef994ddf59087aad98c000fbab9a4.1424989793.git.luto@amacapital.net [ Backported from tip:x86/asm. ] Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/entry_64.S | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index 948b2e14df8c..6ed8f16fd61b 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -557,11 +557,14 @@ ENTRY(ret_from_fork) testl $3, CS-ARGOFFSET(%rsp) # from kernel_thread? jz 1f - testl $_TIF_IA32, TI_flags(%rcx) # 32-bit compat task needs IRET - jnz int_ret_from_sys_call - - RESTORE_TOP_OF_STACK %rdi, -ARGOFFSET - jmp ret_from_sys_call # go to the SYSRET fastpath + /* + * By the time we get here, we have no idea whether our pt_regs, + * ti flags, and ti status came from the 64-bit SYSCALL fast path, + * the slow path, or one of the ia32entry paths. + * Use int_ret_from_sys_call to return, since it can safely handle + * all of the above. + */ + jmp int_ret_from_sys_call 1: subq $REST_SKIP, %rsp # leave space for volatiles From b5e10b06c525414503cd40b536604b0167c062e6 Mon Sep 17 00:00:00 2001 From: Rasmus Villemoes Date: Fri, 23 Jan 2015 00:34:02 +0100 Subject: [PATCH 165/204] iio: imu: adis16400: Fix sign extension commit 19e353f2b344ad86cea6ebbc0002e5f903480a90 upstream. The intention is obviously to sign-extend a 12 bit quantity. But because of C's promotion rules, the assignment is equivalent to "val16 &= 0xfff;". Use the proper API for this. Signed-off-by: Rasmus Villemoes Acked-by: Lars-Peter Clausen Signed-off-by: Jonathan Cameron Signed-off-by: Greg Kroah-Hartman --- drivers/iio/imu/adis16400_core.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/iio/imu/adis16400_core.c b/drivers/iio/imu/adis16400_core.c index d6ece2d17dec..7d60c85cc16d 100644 --- a/drivers/iio/imu/adis16400_core.c +++ b/drivers/iio/imu/adis16400_core.c @@ -26,6 +26,7 @@ #include #include #include +#include #include #include @@ -447,7 +448,7 @@ static int adis16400_read_raw(struct iio_dev *indio_dev, mutex_unlock(&indio_dev->mlock); if (ret) return ret; - val16 = ((val16 & 0xFFF) << 4) >> 4; + val16 = sign_extend32(val16, 11); *val = val16; return IIO_VAL_INT; case IIO_CHAN_INFO_OFFSET: From a042770a1f4fb346e360cbde61426288efb71688 Mon Sep 17 00:00:00 2001 From: David Sterba Date: Tue, 24 Feb 2015 18:57:18 +0100 Subject: [PATCH 166/204] btrfs: fix lost return value due to variable shadowing commit 1932b7be973b554ffe20a5bba6ffaed6fa995cdc upstream. A block-local variable stores error code but btrfs_get_blocks_direct may not return it in the end as there's a ret defined in the function scope. Fixes: d187663ef24c ("Btrfs: lock extents as we map them in DIO") Signed-off-by: David Sterba Signed-off-by: Chris Mason Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/inode.c | 1 - 1 file changed, 1 deletion(-) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 187911fbabce..d20db6437723 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -6825,7 +6825,6 @@ static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock, ((BTRFS_I(inode)->flags & BTRFS_INODE_NODATACOW) && em->block_start != EXTENT_MAP_HOLE)) { int type; - int ret; u64 block_start, orig_start, orig_block_len, ram_bytes; if (test_bit(EXTENT_FLAG_PREALLOC, &em->flags)) From fa41700e373fc52e3c4e5193008332bb558e9f03 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Sun, 1 Mar 2015 20:36:00 +0000 Subject: [PATCH 167/204] Btrfs: fix data loss in the fast fsync path commit 3a8b36f378060d20062a0918e99fae39ff077bf0 upstream. When using the fast file fsync code path we can miss the fact that new writes happened since the last file fsync and therefore return without waiting for the IO to finish and write the new extents to the fsync log. Here's an example scenario where the fsync will miss the fact that new file data exists that wasn't yet durably persisted: 1. fs_info->last_trans_committed == N - 1 and current transaction is transaction N (fs_info->generation == N); 2. do a buffered write; 3. fsync our inode, this clears our inode's full sync flag, starts an ordered extent and waits for it to complete - when it completes at btrfs_finish_ordered_io(), the inode's last_trans is set to the value N (via btrfs_update_inode_fallback -> btrfs_update_inode -> btrfs_set_inode_last_trans); 4. transaction N is committed, so fs_info->last_trans_committed is now set to the value N and fs_info->generation remains with the value N; 5. do another buffered write, when this happens btrfs_file_write_iter sets our inode's last_trans to the value N + 1 (that is fs_info->generation + 1 == N + 1); 6. transaction N + 1 is started and fs_info->generation now has the value N + 1; 7. transaction N + 1 is committed, so fs_info->last_trans_committed is set to the value N + 1; 8. fsync our inode - because it doesn't have the full sync flag set, we only start the ordered extent, we don't wait for it to complete (only in a later phase) therefore its last_trans field has the value N + 1 set previously by btrfs_file_write_iter(), and so we have: inode->last_trans <= fs_info->last_trans_committed (N + 1) (N + 1) Which made us not log the last buffered write and exit the fsync handler immediately, returning success (0) to user space and resulting in data loss after a crash. This can actually be triggered deterministically and the following excerpt from a testcase I made for xfstests triggers the issue. It moves a dummy file across directories and then fsyncs the old parent directory - this is just to trigger a transaction commit, so moving files around isn't directly related to the issue but it was chosen because running 'sync' for example does more than just committing the current transaction, as it flushes/waits for all file data to be persisted. The issue can also happen at random periods, since the transaction kthread periodicaly commits the current transaction (about every 30 seconds by default). The body of the test is: _scratch_mkfs >> $seqres.full 2>&1 _init_flakey _mount_flakey # Create our main test file 'foo', the one we check for data loss. # By doing an fsync against our file, it makes btrfs clear the 'needs_full_sync' # bit from its flags (btrfs inode specific flags). $XFS_IO_PROG -f -c "pwrite -S 0xaa 0 8K" \ -c "fsync" $SCRATCH_MNT/foo | _filter_xfs_io # Now create one other file and 2 directories. We will move this second file # from one directory to the other later because it forces btrfs to commit its # currently open transaction if we fsync the old parent directory. This is # necessary to trigger the data loss bug that affected btrfs. mkdir $SCRATCH_MNT/testdir_1 touch $SCRATCH_MNT/testdir_1/bar mkdir $SCRATCH_MNT/testdir_2 # Make sure everything is durably persisted. sync # Write more 8Kb of data to our file. $XFS_IO_PROG -c "pwrite -S 0xbb 8K 8K" $SCRATCH_MNT/foo | _filter_xfs_io # Move our 'bar' file into a new directory. mv $SCRATCH_MNT/testdir_1/bar $SCRATCH_MNT/testdir_2/bar # Fsync our first directory. Because it had a file moved into some other # directory, this made btrfs commit the currently open transaction. This is # a condition necessary to trigger the data loss bug. $XFS_IO_PROG -c "fsync" $SCRATCH_MNT/testdir_1 # Now fsync our main test file. If the fsync succeeds, we expect the 8Kb of # data we wrote previously to be persisted and available if a crash happens. # This did not happen with btrfs, because of the transaction commit that # happened when we fsynced the parent directory. $XFS_IO_PROG -c "fsync" $SCRATCH_MNT/foo # Simulate a crash/power loss. _load_flakey_table $FLAKEY_DROP_WRITES _unmount_flakey _load_flakey_table $FLAKEY_ALLOW_WRITES _mount_flakey # Now check that all data we wrote before are available. echo "File content after log replay:" od -t x1 $SCRATCH_MNT/foo status=0 exit The expected golden output for the test, which is what we get with this fix applied (or when running against ext3/4 and xfs), is: wrote 8192/8192 bytes at offset 0 XXX Bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) wrote 8192/8192 bytes at offset 8192 XXX Bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) File content after log replay: 0000000 aa aa aa aa aa aa aa aa aa aa aa aa aa aa aa aa * 0020000 bb bb bb bb bb bb bb bb bb bb bb bb bb bb bb bb * 0040000 Without this fix applied, the output shows the test file does not have the second 8Kb extent that we successfully fsynced: wrote 8192/8192 bytes at offset 0 XXX Bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) wrote 8192/8192 bytes at offset 8192 XXX Bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) File content after log replay: 0000000 aa aa aa aa aa aa aa aa aa aa aa aa aa aa aa aa * 0020000 So fix this by skipping the fsync only if we're doing a full sync and if the inode's last_trans is <= fs_info->last_trans_committed, or if the inode is already in the log. Also remove setting the inode's last_trans in btrfs_file_write_iter since it's useless/unreliable. Also because btrfs_file_write_iter no longer sets inode->last_trans to fs_info->generation + 1, don't set last_trans to 0 if we bail out and don't bail out if last_trans is 0, otherwise something as simple as the following example wouldn't log the second write on the last fsync: 1. write to file 2. fsync file 3. fsync file |--> btrfs_inode_in_log() returns true and it set last_trans to 0 4. write to file |--> btrfs_file_write_iter() no longers sets last_trans, so it remained with a value of 0 5. fsync |--> inode->last_trans == 0, so it bails out without logging the second write A test case for xfstests will be sent soon. Signed-off-by: Filipe Manana Signed-off-by: Chris Mason Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/file.c | 56 ++++++++++++++++++++++++------------------------- 1 file changed, 28 insertions(+), 28 deletions(-) diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 4205ba752d40..caaf30f9f27f 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -1593,22 +1593,10 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb, mutex_unlock(&inode->i_mutex); /* - * we want to make sure fsync finds this change - * but we haven't joined a transaction running right now. - * - * Later on, someone is sure to update the inode and get the - * real transid recorded. - * - * We set last_trans now to the fs_info generation + 1, - * this will either be one more than the running transaction - * or the generation used for the next transaction if there isn't - * one running right now. - * * We also have to set last_sub_trans to the current log transid, * otherwise subsequent syncs to a file that's been synced in this * transaction will appear to have already occured. */ - BTRFS_I(inode)->last_trans = root->fs_info->generation + 1; BTRFS_I(inode)->last_sub_trans = root->log_transid; if (num_written > 0 || num_written == -EIOCBQUEUED) { err = generic_write_sync(file, pos, num_written); @@ -1706,25 +1694,37 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) atomic_inc(&root->log_batch); /* - * check the transaction that last modified this inode - * and see if its already been committed - */ - if (!BTRFS_I(inode)->last_trans) { - mutex_unlock(&inode->i_mutex); - goto out; - } - - /* - * if the last transaction that changed this file was before - * the current transaction, we can bail out now without any - * syncing + * If the last transaction that changed this file was before the current + * transaction and we have the full sync flag set in our inode, we can + * bail out now without any syncing. + * + * Note that we can't bail out if the full sync flag isn't set. This is + * because when the full sync flag is set we start all ordered extents + * and wait for them to fully complete - when they complete they update + * the inode's last_trans field through: + * + * btrfs_finish_ordered_io() -> + * btrfs_update_inode_fallback() -> + * btrfs_update_inode() -> + * btrfs_set_inode_last_trans() + * + * So we are sure that last_trans is up to date and can do this check to + * bail out safely. For the fast path, when the full sync flag is not + * set in our inode, we can not do it because we start only our ordered + * extents and don't wait for them to complete (that is when + * btrfs_finish_ordered_io runs), so here at this point their last_trans + * value might be less than or equals to fs_info->last_trans_committed, + * and setting a speculative last_trans for an inode when a buffered + * write is made (such as fs_info->generation + 1 for example) would not + * be reliable since after setting the value and before fsync is called + * any number of transactions can start and commit (transaction kthread + * commits the current transaction periodically), and a transaction + * commit does not start nor waits for ordered extents to complete. */ smp_mb(); if (btrfs_inode_in_log(inode, root->fs_info->generation) || - BTRFS_I(inode)->last_trans <= - root->fs_info->last_trans_committed) { - BTRFS_I(inode)->last_trans = 0; - + (full_sync && BTRFS_I(inode)->last_trans <= + root->fs_info->last_trans_committed)) { /* * We'v had everything committed since the last time we were * modified so clear this flag in case it was set for whatever From edf2ec9971b81163e986556d7773e46b372264fd Mon Sep 17 00:00:00 2001 From: Quentin Casasnovas Date: Tue, 3 Mar 2015 16:31:38 +0100 Subject: [PATCH 168/204] Btrfs:__add_inode_ref: out of bounds memory read when looking for extended ref. commit dd9ef135e3542ffc621c4eb7f0091870ec7a1504 upstream. Improper arithmetics when calculting the address of the extended ref could lead to an out of bounds memory read and kernel panic. Signed-off-by: Quentin Casasnovas Reviewed-by: David Sterba Signed-off-by: Chris Mason Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/tree-log.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index bca436330681..7d3331cbccba 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -943,7 +943,7 @@ static inline int __add_inode_ref(struct btrfs_trans_handle *trans, base = btrfs_item_ptr_offset(leaf, path->slots[0]); while (cur_offset < item_size) { - extref = (struct btrfs_inode_extref *)base + cur_offset; + extref = (struct btrfs_inode_extref *)(base + cur_offset); victim_name_len = btrfs_inode_extref_name_len(leaf, extref); From 61afd4acb82b97672a3ccdcf9e96dde60706f0cc Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Thu, 12 Feb 2015 17:04:47 +0100 Subject: [PATCH 169/204] KVM: emulate: fix CMPXCHG8B on 32-bit hosts commit 4ff6f8e61eb7f96d3ca535c6d240f863ccd6fb7d upstream. This has been broken for a long time: it broke first in 2.6.35, then was almost fixed in 2.6.36 but this one-liner slipped through the cracks. The bug shows up as an infinite loop in Windows 7 (and newer) boot on 32-bit hosts without EPT. Windows uses CMPXCHG8B to write to page tables, which causes a page fault if running without EPT; the emulator is then called from kvm_mmu_page_fault. The loop then happens if the higher 4 bytes are not 0; the common case for this is that the NX bit (bit 63) is 1. Fixes: 6550e1f165f384f3a46b60a1be9aba4bc3c2adad Fixes: 16518d5ada690643453eb0aef3cc7841d3623c2d Reported-by: Erik Rull Tested-by: Erik Rull Signed-off-by: Paolo Bonzini Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/emulate.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 4c01f022c6ac..af88fa20dbe8 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -4732,7 +4732,8 @@ int x86_emulate_insn(struct x86_emulate_ctxt *ctxt) if (rc != X86EMUL_CONTINUE) goto done; } - ctxt->dst.orig_val = ctxt->dst.val; + /* Copy full 64-bit value for CMPXCHG8B. */ + ctxt->dst.orig_val64 = ctxt->dst.val64; special_insn: From 18e3cd7c4675d18e08352d3e69af1c5d6f05d7f6 Mon Sep 17 00:00:00 2001 From: James Hogan Date: Tue, 24 Feb 2015 11:46:20 +0000 Subject: [PATCH 170/204] KVM: MIPS: Fix trace event to save PC directly commit b3cffac04eca9af46e1e23560a8ee22b1bd36d43 upstream. Currently the guest exit trace event saves the VCPU pointer to the structure, and the guest PC is retrieved by dereferencing it when the event is printed rather than directly from the trace record. This isn't safe as the printing may occur long afterwards, after the PC has changed and potentially after the VCPU has been freed. Usually this results in the same (wrong) PC being printed for multiple trace events. It also isn't portable as userland has no way to access the VCPU data structure when interpreting the trace record itself. Lets save the actual PC in the structure so that the correct value is accessible later. Fixes: 669e846e6c4e ("KVM/MIPS32: MIPS arch specific APIs for KVM") Signed-off-by: James Hogan Cc: Paolo Bonzini Cc: Ralf Baechle Cc: Marcelo Tosatti Cc: Gleb Natapov Cc: Steven Rostedt Cc: Ingo Molnar Cc: linux-mips@linux-mips.org Cc: kvm@vger.kernel.org Acked-by: Steven Rostedt Signed-off-by: Marcelo Tosatti Signed-off-by: Greg Kroah-Hartman --- arch/mips/kvm/trace.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/mips/kvm/trace.h b/arch/mips/kvm/trace.h index bc9e0f406c08..e51621e36152 100644 --- a/arch/mips/kvm/trace.h +++ b/arch/mips/kvm/trace.h @@ -26,18 +26,18 @@ TRACE_EVENT(kvm_exit, TP_PROTO(struct kvm_vcpu *vcpu, unsigned int reason), TP_ARGS(vcpu, reason), TP_STRUCT__entry( - __field(struct kvm_vcpu *, vcpu) + __field(unsigned long, pc) __field(unsigned int, reason) ), TP_fast_assign( - __entry->vcpu = vcpu; + __entry->pc = vcpu->arch.pc; __entry->reason = reason; ), TP_printk("[%s]PC: 0x%08lx", kvm_mips_exit_types_str[__entry->reason], - __entry->vcpu->arch.pc) + __entry->pc) ); #endif /* _TRACE_KVM_H */ From e256bf1483582a189a5bd58437b704f15fb9b06c Mon Sep 17 00:00:00 2001 From: Michiel vd Garde Date: Fri, 27 Feb 2015 02:08:29 +0100 Subject: [PATCH 171/204] USB: serial: cp210x: Adding Seletek device id's commit 675af70856d7cc026be8b6ea7a8b9db10b8b38a1 upstream. These device ID's are not associated with the cp210x module currently, but should be. This patch allows the devices to operate upon connecting them to the usb bus as intended. Signed-off-by: Michiel van de Garde Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/cp210x.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/usb/serial/cp210x.c b/drivers/usb/serial/cp210x.c index f48f5dfab245..79e9ea005c63 100644 --- a/drivers/usb/serial/cp210x.c +++ b/drivers/usb/serial/cp210x.c @@ -147,6 +147,8 @@ static const struct usb_device_id id_table[] = { { USB_DEVICE(0x166A, 0x0305) }, /* Clipsal C-5000CT2 C-Bus Spectrum Colour Touchscreen */ { USB_DEVICE(0x166A, 0x0401) }, /* Clipsal L51xx C-Bus Architectural Dimmer */ { USB_DEVICE(0x166A, 0x0101) }, /* Clipsal 5560884 C-Bus Multi-room Audio Matrix Switcher */ + { USB_DEVICE(0x16C0, 0x09B0) }, /* Lunatico Seletek */ + { USB_DEVICE(0x16C0, 0x09B1) }, /* Lunatico Seletek */ { USB_DEVICE(0x16D6, 0x0001) }, /* Jablotron serial interface */ { USB_DEVICE(0x16DC, 0x0010) }, /* W-IE-NE-R Plein & Baus GmbH PL512 Power Supply */ { USB_DEVICE(0x16DC, 0x0011) }, /* W-IE-NE-R Plein & Baus GmbH RCM Remote Control for MARATON Power Supply */ From 92677959bdadb8f7dd2809c5eedc4cd8ca8aeee2 Mon Sep 17 00:00:00 2001 From: Alan Stern Date: Fri, 13 Feb 2015 10:54:53 -0500 Subject: [PATCH 172/204] USB: usbfs: don't leak kernel data in siginfo commit f0c2b68198589249afd2b1f2c4e8de8c03e19c16 upstream. When a signal is delivered, the information in the siginfo structure is copied to userspace. Good security practice dicatates that the unused fields in this structure should be initialized to 0 so that random kernel stack data isn't exposed to the user. This patch adds such an initialization to the two places where usbfs raises signals. Signed-off-by: Alan Stern Reported-by: Dave Mielke Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/devio.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/usb/core/devio.c b/drivers/usb/core/devio.c index ce773cca2bf5..78ddfb43750a 100644 --- a/drivers/usb/core/devio.c +++ b/drivers/usb/core/devio.c @@ -501,6 +501,7 @@ static void async_completed(struct urb *urb) as->status = urb->status; signr = as->signr; if (signr) { + memset(&sinfo, 0, sizeof(sinfo)); sinfo.si_signo = as->signr; sinfo.si_errno = as->status; sinfo.si_code = SI_ASYNCIO; @@ -2228,6 +2229,7 @@ static void usbdev_remove(struct usb_device *udev) wake_up_all(&ps->wait); list_del_init(&ps->list); if (ps->discsignr) { + memset(&sinfo, 0, sizeof(sinfo)); sinfo.si_signo = ps->discsignr; sinfo.si_errno = EPIPE; sinfo.si_code = SI_ASYNCIO; From 9bd014f32671970a89ae9a8ccb0f5dd171485a50 Mon Sep 17 00:00:00 2001 From: Max Mansfield Date: Mon, 2 Mar 2015 18:38:02 -0700 Subject: [PATCH 173/204] usb: ftdi_sio: Add jtag quirk support for Cyber Cortex AV boards commit c7d373c3f0da2b2b78c4b1ce5ae41485b3ef848c upstream. This patch integrates Cyber Cortex AV boards with the existing ftdi_jtag_quirk in order to use serial port 0 with JTAG which is required by the manufacturers' software. Steps: 2 [ftdi_sio_ids.h] 1. Defined the device PID [ftdi_sio.c] 2. Added a macro declaration to the ids array, in order to enable the jtag quirk for the device. Signed-off-by: Max Mansfield Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/ftdi_sio.c | 2 ++ drivers/usb/serial/ftdi_sio_ids.h | 3 +++ 2 files changed, 5 insertions(+) diff --git a/drivers/usb/serial/ftdi_sio.c b/drivers/usb/serial/ftdi_sio.c index c625f55667f1..cf127a080644 100644 --- a/drivers/usb/serial/ftdi_sio.c +++ b/drivers/usb/serial/ftdi_sio.c @@ -815,6 +815,8 @@ static struct usb_device_id id_table_combined [] = { { USB_DEVICE(FTDI_VID, FTDI_ELSTER_UNICOM_PID) }, { USB_DEVICE(FTDI_VID, FTDI_PROPOX_JTAGCABLEII_PID) }, { USB_DEVICE(FTDI_VID, FTDI_PROPOX_ISPCABLEIII_PID) }, + { USB_DEVICE(FTDI_VID, CYBER_CORTEX_AV_PID), + .driver_info = (kernel_ulong_t)&ftdi_jtag_quirk }, { USB_DEVICE(OLIMEX_VID, OLIMEX_ARM_USB_OCD_PID), .driver_info = (kernel_ulong_t)&ftdi_jtag_quirk }, { USB_DEVICE(OLIMEX_VID, OLIMEX_ARM_USB_OCD_H_PID), diff --git a/drivers/usb/serial/ftdi_sio_ids.h b/drivers/usb/serial/ftdi_sio_ids.h index ac703a6e2115..e8d352615297 100644 --- a/drivers/usb/serial/ftdi_sio_ids.h +++ b/drivers/usb/serial/ftdi_sio_ids.h @@ -38,6 +38,9 @@ #define FTDI_LUMEL_PD12_PID 0x6002 +/* Cyber Cortex AV by Fabulous Silicon (http://fabuloussilicon.com) */ +#define CYBER_CORTEX_AV_PID 0x8698 + /* * Marvell OpenRD Base, Client * http://www.open-rd.org From 20ba9f7595d0e1b6551422ad1503d4e9eb650504 Mon Sep 17 00:00:00 2001 From: Mathias Nyman Date: Tue, 24 Feb 2015 18:27:01 +0200 Subject: [PATCH 174/204] xhci: Allocate correct amount of scratchpad buffers commit 6596a926b0b6c80b730a1dd2fa91908e0a539c37 upstream. Include the high order bit fields for Max scratchpad buffers when calculating how many scratchpad buffers are needed. I'm suprised this hasn't caused more issues, we never allocated more than 32 buffers even if xhci needed more. Either we got lucky and xhci never really used past that area, or then we got enough zeroed dma memory anyway. Should be backported as far back as possible Reported-by: Tim Chen Tested-by: Tim Chen Signed-off-by: Mathias Nyman Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/usb/host/xhci.h b/drivers/usb/host/xhci.h index 627fcd9388ca..11f12f1d5b29 100644 --- a/drivers/usb/host/xhci.h +++ b/drivers/usb/host/xhci.h @@ -88,9 +88,10 @@ struct xhci_cap_regs { #define HCS_IST(p) (((p) >> 0) & 0xf) /* bits 4:7, max number of Event Ring segments */ #define HCS_ERST_MAX(p) (((p) >> 4) & 0xf) +/* bits 21:25 Hi 5 bits of Scratchpad buffers SW must allocate for the HW */ /* bit 26 Scratchpad restore - for save/restore HW state - not used yet */ -/* bits 27:31 number of Scratchpad buffers SW must allocate for the HW */ -#define HCS_MAX_SCRATCHPAD(p) (((p) >> 27) & 0x1f) +/* bits 27:31 Lo 5 bits of Scratchpad buffers SW must allocate for the HW */ +#define HCS_MAX_SCRATCHPAD(p) ((((p) >> 16) & 0x3e0) | (((p) >> 27) & 0x1f)) /* HCSPARAMS3 - hcs_params3 - bitmasks */ /* bits 0:7, Max U1 to U0 latency for the roothub ports */ From 919977b109215485706287f49383977dea92f878 Mon Sep 17 00:00:00 2001 From: Aleksander Morgado Date: Fri, 6 Mar 2015 17:14:21 +0200 Subject: [PATCH 175/204] xhci: fix reporting of 0-sized URBs in control endpoint commit 45ba2154d12fc43b70312198ec47085f10be801a upstream. When a control transfer has a short data stage, the xHCI controller generates two transfer events: a COMP_SHORT_TX event that specifies the untransferred amount, and a COMP_SUCCESS event. But when the data stage is not short, only the COMP_SUCCESS event occurs. Therefore, xhci-hcd must set urb->actual_length to urb->transfer_buffer_length while processing the COMP_SUCCESS event, unless urb->actual_length was set already by a previous COMP_SHORT_TX event. The driver checks this by seeing whether urb->actual_length == 0, but this alone is the wrong test, as it is entirely possible for a short transfer to have an urb->actual_length = 0. This patch changes the xhci driver to rely on a new td->urb_length_set flag, which is set to true when a COMP_SHORT_TX event is received and the URB length updated at that stage. This fixes a bug which affected the HSO plugin, which relies on URBs with urb->actual_length == 0 to halt re-submitting the RX URB in the control endpoint. Signed-off-by: Aleksander Morgado Signed-off-by: Mathias Nyman Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-ring.c | 10 ++++++++-- drivers/usb/host/xhci.h | 3 +++ 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c index eb45ac843712..9948890ef93e 100644 --- a/drivers/usb/host/xhci-ring.c +++ b/drivers/usb/host/xhci-ring.c @@ -2064,7 +2064,7 @@ static int process_ctrl_td(struct xhci_hcd *xhci, struct xhci_td *td, if (event_trb != ep_ring->dequeue) { /* The event was for the status stage */ if (event_trb == td->last_trb) { - if (td->urb->actual_length != 0) { + if (td->urb_length_set) { /* Don't overwrite a previously set error code */ if ((*status == -EINPROGRESS || *status == 0) && @@ -2078,7 +2078,13 @@ static int process_ctrl_td(struct xhci_hcd *xhci, struct xhci_td *td, td->urb->transfer_buffer_length; } } else { - /* Maybe the event was for the data stage? */ + /* + * Maybe the event was for the data stage? If so, update + * already the actual_length of the URB and flag it as + * set, so that it is not overwritten in the event for + * the last TRB. + */ + td->urb_length_set = true; td->urb->actual_length = td->urb->transfer_buffer_length - EVENT_TRB_LEN(le32_to_cpu(event->transfer_len)); diff --git a/drivers/usb/host/xhci.h b/drivers/usb/host/xhci.h index 11f12f1d5b29..373d4dada565 100644 --- a/drivers/usb/host/xhci.h +++ b/drivers/usb/host/xhci.h @@ -1,3 +1,4 @@ + /* * xHCI host controller driver * @@ -1259,6 +1260,8 @@ struct xhci_td { struct xhci_segment *start_seg; union xhci_trb *first_trb; union xhci_trb *last_trb; + /* actual_length of the URB has already been set */ + bool urb_length_set; }; /* xHCI command default timeout value */ From 7c5f4dde19242e34f04e04087df40f35a15754be Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Wed, 4 Mar 2015 10:39:03 +0100 Subject: [PATCH 176/204] net: irda: fix wait_until_sent poll timeout commit 2c3fbe3cf28fbd7001545a92a83b4f8acfd9fa36 upstream. In case an infinite timeout (0) is requested, the irda wait_until_sent implementation would use a zero poll timeout rather than the default 200ms. Note that wait_until_sent is currently never called with a 0-timeout argument due to a bug in tty_wait_until_sent. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- net/irda/ircomm/ircomm_tty.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/irda/ircomm/ircomm_tty.c b/net/irda/ircomm/ircomm_tty.c index 41ac7938268b..2ee29ed13bd4 100644 --- a/net/irda/ircomm/ircomm_tty.c +++ b/net/irda/ircomm/ircomm_tty.c @@ -820,7 +820,9 @@ static void ircomm_tty_wait_until_sent(struct tty_struct *tty, int timeout) orig_jiffies = jiffies; /* Set poll time to 200 ms */ - poll_time = IRDA_MIN(timeout, msecs_to_jiffies(200)); + poll_time = msecs_to_jiffies(200); + if (timeout) + poll_time = min_t(unsigned long, timeout, poll_time); spin_lock_irqsave(&self->spinlock, flags); while (self->tx_skb && self->tx_skb->len) { From da90e1a218120d6a04cda86b09899de98132ff04 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Wed, 4 Mar 2015 10:39:05 +0100 Subject: [PATCH 177/204] USB: serial: fix infinite wait_until_sent timeout commit f528bf4f57e43d1af4b2a5c97f09e43e0338c105 upstream. Make sure to handle an infinite timeout (0). Note that wait_until_sent is currently never called with a 0-timeout argument due to a bug in tty_wait_until_sent. Fixes: dcf010503966 ("USB: serial: add generic wait_until_sent implementation") Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/generic.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/usb/serial/generic.c b/drivers/usb/serial/generic.c index 8335b484f14e..a10648d2596b 100644 --- a/drivers/usb/serial/generic.c +++ b/drivers/usb/serial/generic.c @@ -261,7 +261,8 @@ void usb_serial_generic_wait_until_sent(struct tty_struct *tty, long timeout) * character or at least one jiffy. */ period = max_t(unsigned long, (10 * HZ / bps), 1); - period = min_t(unsigned long, period, timeout); + if (timeout) + period = min_t(unsigned long, period, timeout); dev_dbg(&port->dev, "%s - timeout = %u ms, period = %u ms\n", __func__, jiffies_to_msecs(timeout), @@ -271,7 +272,7 @@ void usb_serial_generic_wait_until_sent(struct tty_struct *tty, long timeout) schedule_timeout_interruptible(period); if (signal_pending(current)) break; - if (time_after(jiffies, expire)) + if (timeout && time_after(jiffies, expire)) break; } } From 565acebb005569dc8527a9b2ad2c904ba92bf9d1 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Wed, 4 Mar 2015 10:39:06 +0100 Subject: [PATCH 178/204] TTY: fix tty_wait_until_sent on 64-bit machines commit 79fbf4a550ed6a22e1ae1516113e6c7fa5d56a53 upstream. Fix overflow bug in tty_wait_until_sent on 64-bit machines, where an infinite timeout (0) would be passed to the underlying tty-driver's wait_until_sent-operation as a negative timeout (-1), causing it to return immediately. This manifests itself for example as tcdrain() returning immediately, drivers not honouring the drain flags when setting terminal attributes, or even dropped data on close as a requested infinite closing-wait timeout would be ignored. The first symptom was reported by Asier LLANO who noted that tcdrain() returned prematurely when using the ftdi_sio usb-serial driver. Fix this by passing 0 rather than MAX_SCHEDULE_TIMEOUT (LONG_MAX) to the underlying tty driver. Note that the serial-core wait_until_sent-implementation is not affected by this bug due to a lucky chance (comparison to an unsigned maximum timeout), and neither is the cyclades one that had an explicit check for negative timeouts, but all other tty drivers appear to be affected. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Reported-by: ZIV-Asier Llano Palacios Signed-off-by: Johan Hovold Reviewed-by: Peter Hurley Signed-off-by: Greg Kroah-Hartman --- drivers/tty/tty_ioctl.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/drivers/tty/tty_ioctl.c b/drivers/tty/tty_ioctl.c index 088b4ca7d805..b46aca5cc23c 100644 --- a/drivers/tty/tty_ioctl.c +++ b/drivers/tty/tty_ioctl.c @@ -217,11 +217,17 @@ void tty_wait_until_sent(struct tty_struct *tty, long timeout) #endif if (!timeout) timeout = MAX_SCHEDULE_TIMEOUT; + if (wait_event_interruptible_timeout(tty->write_wait, - !tty_chars_in_buffer(tty), timeout) >= 0) { - if (tty->ops->wait_until_sent) - tty->ops->wait_until_sent(tty, timeout); + !tty_chars_in_buffer(tty), timeout) < 0) { + return; } + + if (timeout == MAX_SCHEDULE_TIMEOUT) + timeout = 0; + + if (tty->ops->wait_until_sent) + tty->ops->wait_until_sent(tty, timeout); } EXPORT_SYMBOL(tty_wait_until_sent); From 9fd948c1b776066a84386d146f3a4e848b976bd5 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Wed, 18 Feb 2015 10:34:50 +0700 Subject: [PATCH 179/204] USB: serial: fix potential use-after-free after failed probe commit 07fdfc5e9f1c966be8722e8fa927e5ea140df5ce upstream. Fix return value in probe error path, which could end up returning success (0) on errors. This could in turn lead to use-after-free or double free (e.g. in port_remove) when the port device is removed. Fixes: c706ebdfc895 ("USB: usb-serial: call port_probe and port_remove at the right times") Signed-off-by: Johan Hovold Acked-by: Greg Kroah-Hartman Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/bus.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/serial/bus.c b/drivers/usb/serial/bus.c index 7229b265870a..5c56efeaf202 100644 --- a/drivers/usb/serial/bus.c +++ b/drivers/usb/serial/bus.c @@ -76,7 +76,7 @@ static int usb_serial_device_probe(struct device *dev) retval = device_create_file(dev, &dev_attr_port_number); if (retval) { if (driver->port_remove) - retval = driver->port_remove(port); + driver->port_remove(port); goto exit_with_autopm; } From d91c5de58cd9ed6d37f14bde906bb307bf681ba1 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 21 Feb 2015 22:19:57 -0500 Subject: [PATCH 180/204] autofs4 copy_dev_ioctl(): keep the value of ->size we'd used for allocation commit 0a280962dc6e117e0e4baa668453f753579265d9 upstream. X-Coverup: just ask spender Signed-off-by: Al Viro Signed-off-by: Greg Kroah-Hartman --- fs/autofs4/dev-ioctl.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/fs/autofs4/dev-ioctl.c b/fs/autofs4/dev-ioctl.c index 743c7c2c949d..6aa8312ad89f 100644 --- a/fs/autofs4/dev-ioctl.c +++ b/fs/autofs4/dev-ioctl.c @@ -95,7 +95,7 @@ static int check_dev_ioctl_version(int cmd, struct autofs_dev_ioctl *param) */ static struct autofs_dev_ioctl *copy_dev_ioctl(struct autofs_dev_ioctl __user *in) { - struct autofs_dev_ioctl tmp; + struct autofs_dev_ioctl tmp, *res; if (copy_from_user(&tmp, in, sizeof(tmp))) return ERR_PTR(-EFAULT); @@ -103,7 +103,11 @@ static struct autofs_dev_ioctl *copy_dev_ioctl(struct autofs_dev_ioctl __user *i if (tmp.size < sizeof(tmp)) return ERR_PTR(-EINVAL); - return memdup_user(in, tmp.size); + res = memdup_user(in, tmp.size); + if (!IS_ERR(res)) + res->size = tmp.size; + + return res; } static inline void free_dev_ioctl(struct autofs_dev_ioctl *param) From db32c77427f773d625bc1e27720bd98cbb807185 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 21 Feb 2015 22:05:11 -0500 Subject: [PATCH 181/204] debugfs: leave freeing a symlink body until inode eviction commit 0db59e59299f0b67450c5db21f7f316c8fb04e84 upstream. As it is, we have debugfs_remove() racing with symlink traversals. Supply ->evict_inode() and do freeing there - inode will remain pinned until we are done with the symlink body. And rip the idiocy with checking if dentry is positive right after we'd verified debugfs_positive(), which is a stronger check... Signed-off-by: Al Viro Signed-off-by: Greg Kroah-Hartman --- fs/debugfs/inode.c | 34 +++++++++++++++++----------------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c index c7c83ff0f752..7269ec329c01 100644 --- a/fs/debugfs/inode.c +++ b/fs/debugfs/inode.c @@ -245,10 +245,19 @@ static int debugfs_show_options(struct seq_file *m, struct dentry *root) return 0; } +static void debugfs_evict_inode(struct inode *inode) +{ + truncate_inode_pages(&inode->i_data, 0); + clear_inode(inode); + if (S_ISLNK(inode->i_mode)) + kfree(inode->i_private); +} + static const struct super_operations debugfs_super_operations = { .statfs = simple_statfs, .remount_fs = debugfs_remount, .show_options = debugfs_show_options, + .evict_inode = debugfs_evict_inode, }; static int debug_fill_super(struct super_block *sb, void *data, int silent) @@ -465,23 +474,14 @@ static int __debugfs_remove(struct dentry *dentry, struct dentry *parent) int ret = 0; if (debugfs_positive(dentry)) { - if (dentry->d_inode) { - dget(dentry); - switch (dentry->d_inode->i_mode & S_IFMT) { - case S_IFDIR: - ret = simple_rmdir(parent->d_inode, dentry); - break; - case S_IFLNK: - kfree(dentry->d_inode->i_private); - /* fall through */ - default: - simple_unlink(parent->d_inode, dentry); - break; - } - if (!ret) - d_delete(dentry); - dput(dentry); - } + dget(dentry); + if (S_ISDIR(dentry->d_inode->i_mode)) + ret = simple_rmdir(parent->d_inode, dentry); + else + simple_unlink(parent->d_inode, dentry); + if (!ret) + d_delete(dentry); + dput(dentry); } return ret; } From cf6c05a77c5ef41bd72f45a4a008724ffd393668 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 21 Feb 2015 22:16:11 -0500 Subject: [PATCH 182/204] procfs: fix race between symlink removals and traversals commit 7e0e953bb0cf649f93277ac8fb67ecbb7f7b04a9 upstream. use_pde()/unuse_pde() in ->follow_link()/->put_link() resp. Signed-off-by: Al Viro Signed-off-by: Greg Kroah-Hartman --- fs/proc/generic.c | 12 ------------ fs/proc/inode.c | 21 +++++++++++++++++++++ fs/proc/internal.h | 1 + 3 files changed, 22 insertions(+), 12 deletions(-) diff --git a/fs/proc/generic.c b/fs/proc/generic.c index a2596afffae6..846b1d7852ed 100644 --- a/fs/proc/generic.c +++ b/fs/proc/generic.c @@ -19,7 +19,6 @@ #include #include #include -#include #include #include #include @@ -163,17 +162,6 @@ void proc_free_inum(unsigned int inum) spin_unlock_irqrestore(&proc_inum_lock, flags); } -static void *proc_follow_link(struct dentry *dentry, struct nameidata *nd) -{ - nd_set_link(nd, __PDE_DATA(dentry->d_inode)); - return NULL; -} - -static const struct inode_operations proc_link_inode_operations = { - .readlink = generic_readlink, - .follow_link = proc_follow_link, -}; - /* * As some entries in /proc are volatile, we want to * get rid of unused dentries. This could be made diff --git a/fs/proc/inode.c b/fs/proc/inode.c index 073aea60cf8f..843b8ef04e84 100644 --- a/fs/proc/inode.c +++ b/fs/proc/inode.c @@ -23,6 +23,7 @@ #include #include #include +#include #include @@ -373,6 +374,26 @@ static const struct file_operations proc_reg_file_ops_no_compat = { }; #endif +static void *proc_follow_link(struct dentry *dentry, struct nameidata *nd) +{ + struct proc_dir_entry *pde = PDE(dentry->d_inode); + if (unlikely(!use_pde(pde))) + return ERR_PTR(-EINVAL); + nd_set_link(nd, pde->data); + return pde; +} + +static void proc_put_link(struct dentry *dentry, struct nameidata *nd, void *p) +{ + unuse_pde(p); +} + +const struct inode_operations proc_link_inode_operations = { + .readlink = generic_readlink, + .follow_link = proc_follow_link, + .put_link = proc_put_link, +}; + struct inode *proc_get_inode(struct super_block *sb, struct proc_dir_entry *de) { struct inode *inode = new_inode_pseudo(sb); diff --git a/fs/proc/internal.h b/fs/proc/internal.h index d600fb098b6a..ec335ef62533 100644 --- a/fs/proc/internal.h +++ b/fs/proc/internal.h @@ -202,6 +202,7 @@ struct pde_opener { int closing; struct completion *c; }; +extern const struct inode_operations proc_link_inode_operations; extern const struct inode_operations proc_pid_link_inode_operations; From b4301ed560bcb78eae05bfbe76e516bb549861b3 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 7 Mar 2015 21:08:46 +0000 Subject: [PATCH 183/204] sunrpc: fix braino in ->poll() commit 1711fd9addf214823b993468567cab1f8254fc51 upstream. POLL_OUT isn't what callers of ->poll() are expecting to see; it's actually __SI_POLL | 2 and it's a siginfo code, not a poll bitmap bit... Signed-off-by: Al Viro Cc: Bruce Fields Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- net/sunrpc/cache.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c index 80fe5c86efd1..231b71944c52 100644 --- a/net/sunrpc/cache.c +++ b/net/sunrpc/cache.c @@ -930,7 +930,7 @@ static unsigned int cache_poll(struct file *filp, poll_table *wait, poll_wait(filp, &queue_wait, wait); /* alway allow write */ - mask = POLL_OUT | POLLWRNORM; + mask = POLLOUT | POLLWRNORM; if (!rp) return mask; From 125c50411b6df22afee8bb353b524f96b2ea71e8 Mon Sep 17 00:00:00 2001 From: Jiri Slaby Date: Fri, 27 Feb 2015 18:40:31 +0100 Subject: [PATCH 184/204] tty: fix up atime/mtime mess, take four commit f0bf0bd07943bfde8f5ac39a32664810a379c7d3 upstream. This problem was taken care of three times already in * b0de59b5733d18b0d1974a060860a8b5c1b36a2e (TTY: do not update atime/mtime on read/write), * 37b7f3c76595e23257f61bd80b223de8658617ee (TTY: fix atime/mtime regression), and * b0b885657b6c8ef63a46bc9299b2a7715d19acde (tty: fix up atime/mtime mess, take three) But it still misses one point. As John Paul correctly points out, we do not care about setting date. If somebody ever changes wall time backwards (by mistake for example), tty timestamps are never updated until the original wall time passes. So check the absolute difference of times and if it large than "8 seconds or so", always update the time. That means we will update immediatelly when changing time. Ergo, CAP_SYS_TIME can foul the check, but it was always that way. Thanks John for serving me this so nicely debugged. Signed-off-by: Jiri Slaby Reported-by: John Paul Perry Acked-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman Signed-off-by: Greg Kroah-Hartman --- drivers/tty/tty_io.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/tty/tty_io.c b/drivers/tty/tty_io.c index d35afccdb6c9..2967b6eb4c70 100644 --- a/drivers/tty/tty_io.c +++ b/drivers/tty/tty_io.c @@ -992,8 +992,8 @@ EXPORT_SYMBOL(start_tty); /* We limit tty time update visibility to every 8 seconds or so. */ static void tty_update_time(struct timespec *time) { - unsigned long sec = get_seconds() & ~7; - if ((long)(sec - time->tv_sec) > 0) + unsigned long sec = get_seconds(); + if (abs(sec - time->tv_sec) & ~7) time->tv_sec = sec; } From 0bfa6e5b16a31f4218c12d8ab7f2064d8ab96e2f Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Thu, 18 Dec 2014 10:02:41 +0100 Subject: [PATCH 185/204] ALSA: pcm: Don't leave PREPARED state after draining commit 70372a7566b5e552dbe48abdac08c275081d8558 upstream. When a PCM draining is performed to an empty stream that has been already in PREPARED state, the current code just ignores and leaves as it is, although the drain is supposed to set all such streams to SETUP state. This patch covers that overlooked case. Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/core/pcm_native.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sound/core/pcm_native.c b/sound/core/pcm_native.c index 175dca44c97e..707405cbc5c9 100644 --- a/sound/core/pcm_native.c +++ b/sound/core/pcm_native.c @@ -1404,6 +1404,8 @@ static int snd_pcm_do_drain_init(struct snd_pcm_substream *substream, int state) if (! snd_pcm_playback_empty(substream)) { snd_pcm_do_start(substream, SNDRV_PCM_STATE_DRAINING); snd_pcm_post_start(substream, SNDRV_PCM_STATE_DRAINING); + } else { + runtime->status->state = SNDRV_PCM_STATE_SETUP; } break; case SNDRV_PCM_STATE_RUNNING: From c7120339ac0391877be7013a8c5bf9dafeb3e984 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Thu, 19 Feb 2015 13:01:37 +0100 Subject: [PATCH 186/204] ALSA: hda - Add pin configs for ASUS mobo with IDT 92HD73XX codec commit 6426460e5d87810e042962281fe3c1e8fc256162 upstream. BIOS doesn't seem to set up pins for 5.1 and the SPDIF out, so we need to give explicitly here. Reported-and-tested-by: Misan Thropos Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_sigmatel.c | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/sound/pci/hda/patch_sigmatel.c b/sound/pci/hda/patch_sigmatel.c index 4ae5767a2cf5..44d3fb95ebba 100644 --- a/sound/pci/hda/patch_sigmatel.c +++ b/sound/pci/hda/patch_sigmatel.c @@ -85,6 +85,7 @@ enum { STAC_ALIENWARE_M17X, STAC_92HD89XX_HP_FRONT_JACK, STAC_92HD89XX_HP_Z1_G2_RIGHT_MIC_JACK, + STAC_92HD73XX_ASUS_MOBO, STAC_92HD73XX_MODELS }; @@ -1915,7 +1916,18 @@ static const struct hda_fixup stac92hd73xx_fixups[] = { [STAC_92HD89XX_HP_Z1_G2_RIGHT_MIC_JACK] = { .type = HDA_FIXUP_PINS, .v.pins = stac92hd89xx_hp_z1_g2_right_mic_jack_pin_configs, - } + }, + [STAC_92HD73XX_ASUS_MOBO] = { + .type = HDA_FIXUP_PINS, + .v.pins = (const struct hda_pintbl[]) { + /* enable 5.1 and SPDIF out */ + { 0x0c, 0x01014411 }, + { 0x0d, 0x01014410 }, + { 0x0e, 0x01014412 }, + { 0x22, 0x014b1180 }, + { } + } + }, }; static const struct hda_model_fixup stac92hd73xx_models[] = { @@ -1927,6 +1939,7 @@ static const struct hda_model_fixup stac92hd73xx_models[] = { { .id = STAC_DELL_M6_BOTH, .name = "dell-m6" }, { .id = STAC_DELL_EQ, .name = "dell-eq" }, { .id = STAC_ALIENWARE_M17X, .name = "alienware" }, + { .id = STAC_92HD73XX_ASUS_MOBO, .name = "asus-mobo" }, {} }; @@ -1979,6 +1992,8 @@ static const struct snd_pci_quirk stac92hd73xx_fixup_tbl[] = { "HP Z1 G2", STAC_92HD89XX_HP_Z1_G2_RIGHT_MIC_JACK), SND_PCI_QUIRK(PCI_VENDOR_ID_HP, 0x2b17, "unknown HP", STAC_92HD89XX_HP_FRONT_JACK), + SND_PCI_QUIRK(PCI_VENDOR_ID_ASUSTEK, 0x83f8, "ASUS AT4NM10", + STAC_92HD73XX_ASUS_MOBO), {} /* terminator */ }; From 7e95941876b4eaeec3052dc5fa998ff5f7096e7d Mon Sep 17 00:00:00 2001 From: Tony Battersby Date: Wed, 11 Feb 2015 11:32:06 -0500 Subject: [PATCH 187/204] sg: fix read() error reporting commit 3b524a683af8991b4eab4182b947c65f0ce1421b upstream. Fix SCSI generic read() incorrectly returning success after detecting an error. Signed-off-by: Tony Battersby Acked-by: Douglas Gilbert Signed-off-by: James Bottomley Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/sg.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/scsi/sg.c b/drivers/scsi/sg.c index df5e961484e1..eb81c98386b9 100644 --- a/drivers/scsi/sg.c +++ b/drivers/scsi/sg.c @@ -522,7 +522,7 @@ static ssize_t sg_new_read(Sg_fd * sfp, char __user *buf, size_t count, Sg_request * srp) { sg_io_hdr_t *hp = &srp->header; - int err = 0; + int err = 0, err2; int len; if (count < SZ_SG_IO_HDR) { @@ -551,8 +551,8 @@ sg_new_read(Sg_fd * sfp, char __user *buf, size_t count, Sg_request * srp) goto err_out; } err_out: - err = sg_finish_rem_req(srp); - return (0 == err) ? count : err; + err2 = sg_finish_rem_req(srp); + return err ? : err2 ? : count; } static ssize_t From 1152730c698dffedb14eaac8146e6187266d6622 Mon Sep 17 00:00:00 2001 From: Mitko Haralanov Date: Fri, 16 Jan 2015 08:55:27 -0500 Subject: [PATCH 188/204] IB/qib: Do not write EEPROM commit 18c0b82a3e4501511b08d0e8676fb08ac08734a3 upstream. This changeset removes all the code that allows the driver to write to the EEPROM and update the recorded error counters and power on hours. These two stats are unused and writing them exposes a timing risk which could leave the EEPROM in a bad state preventing further normal operation of the HCA. Reviewed-by: Mike Marciniszyn Signed-off-by: Mitko Haralanov Signed-off-by: Mike Marciniszyn Signed-off-by: Roland Dreier Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/hw/qib/qib.h | 9 +- drivers/infiniband/hw/qib/qib_eeprom.c | 181 ------------------------ drivers/infiniband/hw/qib/qib_iba6120.c | 2 - drivers/infiniband/hw/qib/qib_iba7220.c | 2 - drivers/infiniband/hw/qib/qib_iba7322.c | 2 - drivers/infiniband/hw/qib/qib_init.c | 1 - drivers/infiniband/hw/qib/qib_sysfs.c | 24 ---- 7 files changed, 1 insertion(+), 220 deletions(-) diff --git a/drivers/infiniband/hw/qib/qib.h b/drivers/infiniband/hw/qib/qib.h index 4d11575c2010..d1b30c66d604 100644 --- a/drivers/infiniband/hw/qib/qib.h +++ b/drivers/infiniband/hw/qib/qib.h @@ -1055,12 +1055,6 @@ struct qib_devdata { /* control high-level access to EEPROM */ struct mutex eep_lock; uint64_t traffic_wds; - /* active time is kept in seconds, but logged in hours */ - atomic_t active_time; - /* Below are nominal shadow of EEPROM, new since last EEPROM update */ - uint8_t eep_st_errs[QIB_EEP_LOG_CNT]; - uint8_t eep_st_new_errs[QIB_EEP_LOG_CNT]; - uint16_t eep_hrs; /* * masks for which bits of errs, hwerrs that cause * each of the counters to increment. @@ -1278,8 +1272,7 @@ int qib_twsi_blk_rd(struct qib_devdata *dd, int dev, int addr, void *buffer, int qib_twsi_blk_wr(struct qib_devdata *dd, int dev, int addr, const void *buffer, int len); void qib_get_eeprom_info(struct qib_devdata *); -int qib_update_eeprom_log(struct qib_devdata *dd); -void qib_inc_eeprom_err(struct qib_devdata *dd, u32 eidx, u32 incr); +#define qib_inc_eeprom_err(dd, eidx, incr) void qib_dump_lookup_output_queue(struct qib_devdata *); void qib_force_pio_avail_update(struct qib_devdata *); void qib_clear_symerror_on_linkup(unsigned long opaque); diff --git a/drivers/infiniband/hw/qib/qib_eeprom.c b/drivers/infiniband/hw/qib/qib_eeprom.c index 4d5d71aaa2b4..e2280b07df02 100644 --- a/drivers/infiniband/hw/qib/qib_eeprom.c +++ b/drivers/infiniband/hw/qib/qib_eeprom.c @@ -267,190 +267,9 @@ void qib_get_eeprom_info(struct qib_devdata *dd) "Board SN %s did not pass functional test: %s\n", dd->serial, ifp->if_comment); - memcpy(&dd->eep_st_errs, &ifp->if_errcntp, QIB_EEP_LOG_CNT); - /* - * Power-on (actually "active") hours are kept as little-endian value - * in EEPROM, but as seconds in a (possibly as small as 24-bit) - * atomic_t while running. - */ - atomic_set(&dd->active_time, 0); - dd->eep_hrs = ifp->if_powerhour[0] | (ifp->if_powerhour[1] << 8); - done: vfree(buf); bail:; } -/** - * qib_update_eeprom_log - copy active-time and error counters to eeprom - * @dd: the qlogic_ib device - * - * Although the time is kept as seconds in the qib_devdata struct, it is - * rounded to hours for re-write, as we have only 16 bits in EEPROM. - * First-cut code reads whole (expected) struct qib_flash, modifies, - * re-writes. Future direction: read/write only what we need, assuming - * that the EEPROM had to have been "good enough" for driver init, and - * if not, we aren't making it worse. - * - */ -int qib_update_eeprom_log(struct qib_devdata *dd) -{ - void *buf; - struct qib_flash *ifp; - int len, hi_water; - uint32_t new_time, new_hrs; - u8 csum; - int ret, idx; - unsigned long flags; - - /* first, check if we actually need to do anything. */ - ret = 0; - for (idx = 0; idx < QIB_EEP_LOG_CNT; ++idx) { - if (dd->eep_st_new_errs[idx]) { - ret = 1; - break; - } - } - new_time = atomic_read(&dd->active_time); - - if (ret == 0 && new_time < 3600) - goto bail; - - /* - * The quick-check above determined that there is something worthy - * of logging, so get current contents and do a more detailed idea. - * read full flash, not just currently used part, since it may have - * been written with a newer definition - */ - len = sizeof(struct qib_flash); - buf = vmalloc(len); - ret = 1; - if (!buf) { - qib_dev_err(dd, - "Couldn't allocate memory to read %u bytes from eeprom for logging\n", - len); - goto bail; - } - - /* Grab semaphore and read current EEPROM. If we get an - * error, let go, but if not, keep it until we finish write. - */ - ret = mutex_lock_interruptible(&dd->eep_lock); - if (ret) { - qib_dev_err(dd, "Unable to acquire EEPROM for logging\n"); - goto free_bail; - } - ret = qib_twsi_blk_rd(dd, dd->twsi_eeprom_dev, 0, buf, len); - if (ret) { - mutex_unlock(&dd->eep_lock); - qib_dev_err(dd, "Unable read EEPROM for logging\n"); - goto free_bail; - } - ifp = (struct qib_flash *)buf; - - csum = flash_csum(ifp, 0); - if (csum != ifp->if_csum) { - mutex_unlock(&dd->eep_lock); - qib_dev_err(dd, "EEPROM cks err (0x%02X, S/B 0x%02X)\n", - csum, ifp->if_csum); - ret = 1; - goto free_bail; - } - hi_water = 0; - spin_lock_irqsave(&dd->eep_st_lock, flags); - for (idx = 0; idx < QIB_EEP_LOG_CNT; ++idx) { - int new_val = dd->eep_st_new_errs[idx]; - if (new_val) { - /* - * If we have seen any errors, add to EEPROM values - * We need to saturate at 0xFF (255) and we also - * would need to adjust the checksum if we were - * trying to minimize EEPROM traffic - * Note that we add to actual current count in EEPROM, - * in case it was altered while we were running. - */ - new_val += ifp->if_errcntp[idx]; - if (new_val > 0xFF) - new_val = 0xFF; - if (ifp->if_errcntp[idx] != new_val) { - ifp->if_errcntp[idx] = new_val; - hi_water = offsetof(struct qib_flash, - if_errcntp) + idx; - } - /* - * update our shadow (used to minimize EEPROM - * traffic), to match what we are about to write. - */ - dd->eep_st_errs[idx] = new_val; - dd->eep_st_new_errs[idx] = 0; - } - } - /* - * Now update active-time. We would like to round to the nearest hour - * but unless atomic_t are sure to be proper signed ints we cannot, - * because we need to account for what we "transfer" to EEPROM and - * if we log an hour at 31 minutes, then we would need to set - * active_time to -29 to accurately count the _next_ hour. - */ - if (new_time >= 3600) { - new_hrs = new_time / 3600; - atomic_sub((new_hrs * 3600), &dd->active_time); - new_hrs += dd->eep_hrs; - if (new_hrs > 0xFFFF) - new_hrs = 0xFFFF; - dd->eep_hrs = new_hrs; - if ((new_hrs & 0xFF) != ifp->if_powerhour[0]) { - ifp->if_powerhour[0] = new_hrs & 0xFF; - hi_water = offsetof(struct qib_flash, if_powerhour); - } - if ((new_hrs >> 8) != ifp->if_powerhour[1]) { - ifp->if_powerhour[1] = new_hrs >> 8; - hi_water = offsetof(struct qib_flash, if_powerhour) + 1; - } - } - /* - * There is a tiny possibility that we could somehow fail to write - * the EEPROM after updating our shadows, but problems from holding - * the spinlock too long are a much bigger issue. - */ - spin_unlock_irqrestore(&dd->eep_st_lock, flags); - if (hi_water) { - /* we made some change to the data, uopdate cksum and write */ - csum = flash_csum(ifp, 1); - ret = eeprom_write_with_enable(dd, 0, buf, hi_water + 1); - } - mutex_unlock(&dd->eep_lock); - if (ret) - qib_dev_err(dd, "Failed updating EEPROM\n"); - -free_bail: - vfree(buf); -bail: - return ret; -} - -/** - * qib_inc_eeprom_err - increment one of the four error counters - * that are logged to EEPROM. - * @dd: the qlogic_ib device - * @eidx: 0..3, the counter to increment - * @incr: how much to add - * - * Each counter is 8-bits, and saturates at 255 (0xFF). They - * are copied to the EEPROM (aka flash) whenever qib_update_eeprom_log() - * is called, but it can only be called in a context that allows sleep. - * This function can be called even at interrupt level. - */ -void qib_inc_eeprom_err(struct qib_devdata *dd, u32 eidx, u32 incr) -{ - uint new_val; - unsigned long flags; - - spin_lock_irqsave(&dd->eep_st_lock, flags); - new_val = dd->eep_st_new_errs[eidx] + incr; - if (new_val > 255) - new_val = 255; - dd->eep_st_new_errs[eidx] = new_val; - spin_unlock_irqrestore(&dd->eep_st_lock, flags); -} diff --git a/drivers/infiniband/hw/qib/qib_iba6120.c b/drivers/infiniband/hw/qib/qib_iba6120.c index 0232ae56b1fa..4e2613325183 100644 --- a/drivers/infiniband/hw/qib/qib_iba6120.c +++ b/drivers/infiniband/hw/qib/qib_iba6120.c @@ -2682,8 +2682,6 @@ static void qib_get_6120_faststats(unsigned long opaque) spin_lock_irqsave(&dd->eep_st_lock, flags); traffic_wds -= dd->traffic_wds; dd->traffic_wds += traffic_wds; - if (traffic_wds >= QIB_TRAFFIC_ACTIVE_THRESHOLD) - atomic_add(5, &dd->active_time); /* S/B #define */ spin_unlock_irqrestore(&dd->eep_st_lock, flags); qib_chk_6120_errormask(dd); diff --git a/drivers/infiniband/hw/qib/qib_iba7220.c b/drivers/infiniband/hw/qib/qib_iba7220.c index 64d0ecb90cdc..3dbabf3a5d6d 100644 --- a/drivers/infiniband/hw/qib/qib_iba7220.c +++ b/drivers/infiniband/hw/qib/qib_iba7220.c @@ -3299,8 +3299,6 @@ static void qib_get_7220_faststats(unsigned long opaque) spin_lock_irqsave(&dd->eep_st_lock, flags); traffic_wds -= dd->traffic_wds; dd->traffic_wds += traffic_wds; - if (traffic_wds >= QIB_TRAFFIC_ACTIVE_THRESHOLD) - atomic_add(5, &dd->active_time); /* S/B #define */ spin_unlock_irqrestore(&dd->eep_st_lock, flags); done: mod_timer(&dd->stats_timer, jiffies + HZ * ACTIVITY_TIMER); diff --git a/drivers/infiniband/hw/qib/qib_iba7322.c b/drivers/infiniband/hw/qib/qib_iba7322.c index 14103ffb4839..5f5f20f42231 100644 --- a/drivers/infiniband/hw/qib/qib_iba7322.c +++ b/drivers/infiniband/hw/qib/qib_iba7322.c @@ -4854,8 +4854,6 @@ static void qib_get_7322_faststats(unsigned long opaque) spin_lock_irqsave(&ppd->dd->eep_st_lock, flags); traffic_wds -= ppd->dd->traffic_wds; ppd->dd->traffic_wds += traffic_wds; - if (traffic_wds >= QIB_TRAFFIC_ACTIVE_THRESHOLD) - atomic_add(ACTIVITY_TIMER, &ppd->dd->active_time); spin_unlock_irqrestore(&ppd->dd->eep_st_lock, flags); if (ppd->cpspec->qdr_dfe_on && (ppd->link_speed_active & QIB_IB_QDR) && diff --git a/drivers/infiniband/hw/qib/qib_init.c b/drivers/infiniband/hw/qib/qib_init.c index 173f805790da..8f936e36dd53 100644 --- a/drivers/infiniband/hw/qib/qib_init.c +++ b/drivers/infiniband/hw/qib/qib_init.c @@ -892,7 +892,6 @@ static void qib_shutdown_device(struct qib_devdata *dd) } } - qib_update_eeprom_log(dd); } /** diff --git a/drivers/infiniband/hw/qib/qib_sysfs.c b/drivers/infiniband/hw/qib/qib_sysfs.c index 3c8e4e3caca6..b9ccbda7817d 100644 --- a/drivers/infiniband/hw/qib/qib_sysfs.c +++ b/drivers/infiniband/hw/qib/qib_sysfs.c @@ -611,28 +611,6 @@ static ssize_t store_chip_reset(struct device *device, return ret < 0 ? ret : count; } -static ssize_t show_logged_errs(struct device *device, - struct device_attribute *attr, char *buf) -{ - struct qib_ibdev *dev = - container_of(device, struct qib_ibdev, ibdev.dev); - struct qib_devdata *dd = dd_from_dev(dev); - int idx, count; - - /* force consistency with actual EEPROM */ - if (qib_update_eeprom_log(dd) != 0) - return -ENXIO; - - count = 0; - for (idx = 0; idx < QIB_EEP_LOG_CNT; ++idx) { - count += scnprintf(buf + count, PAGE_SIZE - count, "%d%c", - dd->eep_st_errs[idx], - idx == (QIB_EEP_LOG_CNT - 1) ? '\n' : ' '); - } - - return count; -} - /* * Dump tempsense regs. in decimal, to ease shell-scripts. */ @@ -679,7 +657,6 @@ static DEVICE_ATTR(nctxts, S_IRUGO, show_nctxts, NULL); static DEVICE_ATTR(nfreectxts, S_IRUGO, show_nfreectxts, NULL); static DEVICE_ATTR(serial, S_IRUGO, show_serial, NULL); static DEVICE_ATTR(boardversion, S_IRUGO, show_boardversion, NULL); -static DEVICE_ATTR(logged_errors, S_IRUGO, show_logged_errs, NULL); static DEVICE_ATTR(tempsense, S_IRUGO, show_tempsense, NULL); static DEVICE_ATTR(localbus_info, S_IRUGO, show_localbus_info, NULL); static DEVICE_ATTR(chip_reset, S_IWUSR, NULL, store_chip_reset); @@ -693,7 +670,6 @@ static struct device_attribute *qib_attributes[] = { &dev_attr_nfreectxts, &dev_attr_serial, &dev_attr_boardversion, - &dev_attr_logged_errors, &dev_attr_tempsense, &dev_attr_localbus_info, &dev_attr_chip_reset, From b6b14e987c1f1aed20c494da061b5a90b8265520 Mon Sep 17 00:00:00 2001 From: Ryusuke Konishi Date: Fri, 27 Feb 2015 15:51:56 -0800 Subject: [PATCH 189/204] nilfs2: fix potential memory overrun on inode commit 957ed60b53b519064a54988c4e31e0087e47d091 upstream. Each inode of nilfs2 stores a root node of a b-tree, and it turned out to have a memory overrun issue: Each b-tree node of nilfs2 stores a set of key-value pairs and the number of them (in "bn_nchildren" member of nilfs_btree_node struct), as well as a few other "bn_*" members. Since the value of "bn_nchildren" is used for operations on the key-values within the b-tree node, it can cause memory access overrun if a large number is incorrectly set to "bn_nchildren". For instance, nilfs_btree_node_lookup() function determines the range of binary search with it, and too large "bn_nchildren" leads nilfs_btree_node_get_key() in that function to overrun. As for intermediate b-tree nodes, this is prevented by a sanity check performed when each node is read from a drive, however, no sanity check has been done for root nodes stored in inodes. This patch fixes the issue by adding missing sanity check against b-tree root nodes so that it's called when on-memory inodes are read from ifile, inode metadata file. Signed-off-by: Ryusuke Konishi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- fs/nilfs2/btree.c | 47 ++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 44 insertions(+), 3 deletions(-) diff --git a/fs/nilfs2/btree.c b/fs/nilfs2/btree.c index b2e3ff347620..ecdbae19a766 100644 --- a/fs/nilfs2/btree.c +++ b/fs/nilfs2/btree.c @@ -31,6 +31,8 @@ #include "alloc.h" #include "dat.h" +static void __nilfs_btree_init(struct nilfs_bmap *bmap); + static struct nilfs_btree_path *nilfs_btree_alloc_path(void) { struct nilfs_btree_path *path; @@ -368,6 +370,34 @@ static int nilfs_btree_node_broken(const struct nilfs_btree_node *node, return ret; } +/** + * nilfs_btree_root_broken - verify consistency of btree root node + * @node: btree root node to be examined + * @ino: inode number + * + * Return Value: If node is broken, 1 is returned. Otherwise, 0 is returned. + */ +static int nilfs_btree_root_broken(const struct nilfs_btree_node *node, + unsigned long ino) +{ + int level, flags, nchildren; + int ret = 0; + + level = nilfs_btree_node_get_level(node); + flags = nilfs_btree_node_get_flags(node); + nchildren = nilfs_btree_node_get_nchildren(node); + + if (unlikely(level < NILFS_BTREE_LEVEL_NODE_MIN || + level > NILFS_BTREE_LEVEL_MAX || + nchildren < 0 || + nchildren > NILFS_BTREE_ROOT_NCHILDREN_MAX)) { + pr_crit("NILFS: bad btree root (inode number=%lu): level = %d, flags = 0x%x, nchildren = %d\n", + ino, level, flags, nchildren); + ret = 1; + } + return ret; +} + int nilfs_btree_broken_node_block(struct buffer_head *bh) { int ret; @@ -1713,7 +1743,7 @@ nilfs_btree_commit_convert_and_insert(struct nilfs_bmap *btree, /* convert and insert */ dat = NILFS_BMAP_USE_VBN(btree) ? nilfs_bmap_get_dat(btree) : NULL; - nilfs_btree_init(btree); + __nilfs_btree_init(btree); if (nreq != NULL) { nilfs_bmap_commit_alloc_ptr(btree, dreq, dat); nilfs_bmap_commit_alloc_ptr(btree, nreq, dat); @@ -2294,12 +2324,23 @@ static const struct nilfs_bmap_operations nilfs_btree_ops_gc = { .bop_gather_data = NULL, }; -int nilfs_btree_init(struct nilfs_bmap *bmap) +static void __nilfs_btree_init(struct nilfs_bmap *bmap) { bmap->b_ops = &nilfs_btree_ops; bmap->b_nchildren_per_block = NILFS_BTREE_NODE_NCHILDREN_MAX(nilfs_btree_node_size(bmap)); - return 0; +} + +int nilfs_btree_init(struct nilfs_bmap *bmap) +{ + int ret = 0; + + __nilfs_btree_init(bmap); + + if (nilfs_btree_root_broken(nilfs_btree_get_root(bmap), + bmap->b_inode->i_ino)) + ret = -EIO; + return ret; } void nilfs_btree_init_gc(struct nilfs_bmap *bmap) From 28e75102ed1fbc6529751865b6b12070cb161b8d Mon Sep 17 00:00:00 2001 From: Minh Duc Tran Date: Mon, 9 Feb 2015 18:54:09 +0000 Subject: [PATCH 190/204] fixed invalid assignment of 64bit mask to host dma_boundary for scatter gather segment boundary limit. commit f76a610a8b4b6280eaedf48f3af9d5d74e418b66 upstream. In reference to bug https://bugzilla.redhat.com/show_bug.cgi?id=1097141 Assert is seen with AMD cpu whenever calling pci_alloc_consistent. [ 29.406183] ------------[ cut here ]------------ [ 29.410505] kernel BUG at lib/iommu-helper.c:13! Signed-off-by: Minh Tran Fixes: 6733b39a1301b0b020bbcbf3295852e93e624cb1 Signed-off-by: James Bottomley Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/be2iscsi/be_main.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/scsi/be2iscsi/be_main.c b/drivers/scsi/be2iscsi/be_main.c index d24a2867bc21..1ad39c799c74 100644 --- a/drivers/scsi/be2iscsi/be_main.c +++ b/drivers/scsi/be2iscsi/be_main.c @@ -564,7 +564,6 @@ static struct beiscsi_hba *beiscsi_hba_alloc(struct pci_dev *pcidev) "beiscsi_hba_alloc - iscsi_host_alloc failed\n"); return NULL; } - shost->dma_boundary = pcidev->dma_mask; shost->max_id = BE2_MAX_SESSIONS; shost->max_channel = 0; shost->max_cmd_len = BEISCSI_MAX_CMD_LEN; From 6e6493c26e176987bf215625ebcaea5954a05fa1 Mon Sep 17 00:00:00 2001 From: Chen-Yu Tsai Date: Thu, 26 Jun 2014 23:55:41 +0800 Subject: [PATCH 191/204] clk: sunxi: Support factor clocks with N factor starting not from 0 commit 9a5e6c7eb5ccbb5f0d3a1dffce135f0a727f40e1 upstream. The PLLs on newer Allwinner SoC's, such as the A31 and A23, have a N multiplier factor that starts from 1, not 0. This patch adds an option to the factor clk driver's config data structures to specify the base value of N. Signed-off-by: Chen-Yu Tsai Acked-by: Maxime Ripard Signed-off-by: Maxime Ripard Signed-off-by: Greg Kroah-Hartman --- drivers/clk/sunxi/clk-factors.c | 2 +- drivers/clk/sunxi/clk-factors.h | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/clk/sunxi/clk-factors.c b/drivers/clk/sunxi/clk-factors.c index 88523f91d9b7..7555793097f2 100644 --- a/drivers/clk/sunxi/clk-factors.c +++ b/drivers/clk/sunxi/clk-factors.c @@ -70,7 +70,7 @@ static unsigned long clk_factors_recalc_rate(struct clk_hw *hw, p = FACTOR_GET(config->pshift, config->pwidth, reg); /* Calculate the rate */ - rate = (parent_rate * n * (k + 1) >> p) / (m + 1); + rate = (parent_rate * (n + config->n_start) * (k + 1) >> p) / (m + 1); return rate; } diff --git a/drivers/clk/sunxi/clk-factors.h b/drivers/clk/sunxi/clk-factors.h index f49851cc4380..441fdc3f5717 100644 --- a/drivers/clk/sunxi/clk-factors.h +++ b/drivers/clk/sunxi/clk-factors.h @@ -15,6 +15,7 @@ struct clk_factors_config { u8 mwidth; u8 pshift; u8 pwidth; + u8 n_start; }; struct clk *clk_register_factors(struct device *dev, const char *name, From 596c469f8f338819a95531c5bdf970b9d98e4bb9 Mon Sep 17 00:00:00 2001 From: Ian Abbott Date: Tue, 27 Jan 2015 18:16:51 +0000 Subject: [PATCH 192/204] staging: comedi: comedi_compat32.c: fix COMEDI_CMD copy back commit 42b8ce6f55facfa101462e694d33fc6bca471138 upstream. `do_cmd_ioctl()` in "comedi_fops.c" handles the `COMEDI_CMD` ioctl. This returns `-EAGAIN` if it has copied a modified `struct comedi_cmd` back to user-space. (This occurs when the low-level Comedi driver's `do_cmdtest()` handler returns non-zero to indicate a problem with the contents of the `struct comedi_cmd`, or when the `struct comedi_cmd` has the `CMDF_BOGUS` flag set.) `compat_cmd()` in "comedi_compat32.c" handles the 32-bit compatible version of the `COMEDI_CMD` ioctl. Currently, it never copies a 32-bit compatible version of `struct comedi_cmd` back to user-space, which is at odds with the way the regular `COMEDI_CMD` ioctl is handled. To fix it, change `compat_cmd()` to copy a 32-bit compatible version of the `struct comedi_cmd` back to user-space when the main ioctl handler returns `-EAGAIN`. Signed-off-by: Ian Abbott Reviewed-by: H Hartley Sweeten Signed-off-by: Greg Kroah-Hartman Signed-off-by: Greg Kroah-Hartman --- drivers/staging/comedi/comedi_compat32.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/drivers/staging/comedi/comedi_compat32.c b/drivers/staging/comedi/comedi_compat32.c index ad208cdd53d4..83bcf968ac63 100644 --- a/drivers/staging/comedi/comedi_compat32.c +++ b/drivers/staging/comedi/comedi_compat32.c @@ -270,7 +270,7 @@ static int compat_cmd(struct file *file, unsigned long arg) { struct comedi_cmd __user *cmd; struct comedi32_cmd_struct __user *cmd32; - int rc; + int rc, err; cmd32 = compat_ptr(arg); cmd = compat_alloc_user_space(sizeof(*cmd)); @@ -279,7 +279,15 @@ static int compat_cmd(struct file *file, unsigned long arg) if (rc) return rc; - return translated_ioctl(file, COMEDI_CMD, (unsigned long)cmd); + rc = translated_ioctl(file, COMEDI_CMD, (unsigned long)cmd); + if (rc == -EAGAIN) { + /* Special case: copy cmd back to user. */ + err = put_compat_cmd(cmd32, cmd); + if (err) + rc = err; + } + + return rc; } /* Handle 32-bit COMEDI_CMDTEST ioctl. */ From 8843f1a0121580f96e9c8cfdd50bda4906c9e381 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Thu, 12 Feb 2015 10:09:20 -0500 Subject: [PATCH 193/204] dm mirror: do not degrade the mirror on discard error commit f2ed51ac64611d717d1917820a01930174c2f236 upstream. It may be possible that a device claims discard support but it rejects discards with -EOPNOTSUPP. It happens when using loopback on ext2/ext3 filesystem driven by the ext4 driver. It may also happen if the underlying devices are moved from one disk on another. If discard error happens, we reject the bio with -EOPNOTSUPP, but we do not degrade the array. This patch fixes failed test shell/lvconvert-repair-transient.sh in the lvm2 testsuite if the testsuite is extracted on an ext2 or ext3 filesystem and it is being driven by the ext4 driver. Signed-off-by: Mikulas Patocka Signed-off-by: Mike Snitzer Signed-off-by: Greg Kroah-Hartman --- drivers/md/dm-raid1.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/md/dm-raid1.c b/drivers/md/dm-raid1.c index 699b5be68d31..678556b8ee4d 100644 --- a/drivers/md/dm-raid1.c +++ b/drivers/md/dm-raid1.c @@ -604,6 +604,15 @@ static void write_callback(unsigned long error, void *context) return; } + /* + * If the bio is discard, return an error, but do not + * degrade the array. + */ + if (bio->bi_rw & REQ_DISCARD) { + bio_endio(bio, -EOPNOTSUPP); + return; + } + for (i = 0; i < ms->nr_mirrors; i++) if (test_bit(i, &error)) fail_mirror(ms->mirror + i, DM_RAID1_WRITE_ERROR); From 0696dbb4ae63bce45c9f59df5f97e7f6d1f99226 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Fri, 13 Feb 2015 11:05:37 -0800 Subject: [PATCH 194/204] dm io: reject unsupported DISCARD requests with EOPNOTSUPP commit 37527b869207ad4c208b1e13967d69b8bba1fbf9 upstream. I created a dm-raid1 device backed by a device that supports DISCARD and another device that does NOT support DISCARD with the following dm configuration: # echo '0 2048 mirror core 1 512 2 /dev/sda 0 /dev/sdb 0' | dmsetup create moo # lsblk -D NAME DISC-ALN DISC-GRAN DISC-MAX DISC-ZERO sda 0 4K 1G 0 `-moo (dm-0) 0 4K 1G 0 sdb 0 0B 0B 0 `-moo (dm-0) 0 4K 1G 0 Notice that the mirror device /dev/mapper/moo advertises DISCARD support even though one of the mirror halves doesn't. If I issue a DISCARD request (via fstrim, mount -o discard, or ioctl BLKDISCARD) through the mirror, kmirrord gets stuck in an infinite loop in do_region() when it tries to issue a DISCARD request to sdb. The problem is that when we call do_region() against sdb, num_sectors is set to zero because q->limits.max_discard_sectors is zero. Therefore, "remaining" never decreases and the loop never terminates. To fix this: before entering the loop, check for the combination of REQ_DISCARD and no discard and return -EOPNOTSUPP to avoid hanging up the mirror device. This bug was found by the unfortunate coincidence of pvmove and a discard operation in the RHEL 6.5 kernel; upstream is also affected. Signed-off-by: Darrick J. Wong Acked-by: "Martin K. Petersen" Signed-off-by: Mike Snitzer Signed-off-by: Greg Kroah-Hartman --- drivers/md/dm-io.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/md/dm-io.c b/drivers/md/dm-io.c index d1de1626a9d2..17cb2170e9d8 100644 --- a/drivers/md/dm-io.c +++ b/drivers/md/dm-io.c @@ -291,6 +291,12 @@ static void do_region(int rw, unsigned region, struct dm_io_region *where, unsigned short logical_block_size = queue_logical_block_size(q); sector_t num_sectors; + /* Reject unsupported discard requests */ + if ((rw & REQ_DISCARD) && !blk_queue_discard(q)) { + dec_count(io, region, -EOPNOTSUPP); + return; + } + /* * where->count may be zero if rw holds a flush and we need to * send a zero-sized flush. From 6bed72e42e3d3b9ce3d34b9f08550d22b2f801f4 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Tue, 17 Feb 2015 14:30:53 -0500 Subject: [PATCH 195/204] dm: fix a race condition in dm_get_md commit 2bec1f4a8832e74ebbe859f176d8a9cb20dd97f4 upstream. The function dm_get_md finds a device mapper device with a given dev_t, increases the reference count and returns the pointer. dm_get_md calls dm_find_md, dm_find_md takes _minor_lock, finds the device, tests that the device doesn't have DMF_DELETING or DMF_FREEING flag, drops _minor_lock and returns pointer to the device. dm_get_md then calls dm_get. dm_get calls BUG if the device has the DMF_FREEING flag, otherwise it increments the reference count. There is a possible race condition - after dm_find_md exits and before dm_get is called, there are no locks held, so the device may disappear or DMF_FREEING flag may be set, which results in BUG. To fix this bug, we need to call dm_get while we hold _minor_lock. This patch renames dm_find_md to dm_get_md and changes it so that it calls dm_get while holding the lock. Signed-off-by: Mikulas Patocka Signed-off-by: Mike Snitzer Signed-off-by: Greg Kroah-Hartman --- drivers/md/dm.c | 27 ++++++++++----------------- 1 file changed, 10 insertions(+), 17 deletions(-) diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 204a59fd872f..a87f0c42cb8b 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -2270,7 +2270,7 @@ int dm_setup_md_queue(struct mapped_device *md) return 0; } -static struct mapped_device *dm_find_md(dev_t dev) +struct mapped_device *dm_get_md(dev_t dev) { struct mapped_device *md; unsigned minor = MINOR(dev); @@ -2281,12 +2281,15 @@ static struct mapped_device *dm_find_md(dev_t dev) spin_lock(&_minor_lock); md = idr_find(&_minor_idr, minor); - if (md && (md == MINOR_ALLOCED || - (MINOR(disk_devt(dm_disk(md))) != minor) || - dm_deleting_md(md) || - test_bit(DMF_FREEING, &md->flags))) { - md = NULL; - goto out; + if (md) { + if ((md == MINOR_ALLOCED || + (MINOR(disk_devt(dm_disk(md))) != minor) || + dm_deleting_md(md) || + test_bit(DMF_FREEING, &md->flags))) { + md = NULL; + goto out; + } + dm_get(md); } out: @@ -2294,16 +2297,6 @@ static struct mapped_device *dm_find_md(dev_t dev) return md; } - -struct mapped_device *dm_get_md(dev_t dev) -{ - struct mapped_device *md = dm_find_md(dev); - - if (md) - dm_get(md); - - return md; -} EXPORT_SYMBOL_GPL(dm_get_md); void *dm_get_mdptr(struct mapped_device *md) From 840732fdbf11a53ca0cf0893b14d809ae3d1f228 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Tue, 17 Feb 2015 14:34:00 -0500 Subject: [PATCH 196/204] dm snapshot: fix a possible invalid memory access on unload commit 22aa66a3ee5b61e0f4a0bfeabcaa567861109ec3 upstream. When the snapshot target is unloaded, snapshot_dtr() waits until pending_exceptions_count drops to zero. Then, it destroys the snapshot. Therefore, the function that decrements pending_exceptions_count should not touch the snapshot structure after the decrement. pending_complete() calls free_pending_exception(), which decrements pending_exceptions_count, and then it performs up_write(&s->lock) and it calls retry_origin_bios() which dereferences s->origin. These two memory accesses to the fields of the snapshot may touch the dm_snapshot struture after it is freed. This patch moves the call to free_pending_exception() to the end of pending_complete(), so that the snapshot will not be destroyed while pending_complete() is in progress. Signed-off-by: Mikulas Patocka Signed-off-by: Mike Snitzer Signed-off-by: Greg Kroah-Hartman --- drivers/md/dm-snap.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c index 944690bafd93..d892a05c84f4 100644 --- a/drivers/md/dm-snap.c +++ b/drivers/md/dm-snap.c @@ -1439,8 +1439,6 @@ static void pending_complete(struct dm_snap_pending_exception *pe, int success) full_bio->bi_end_io = pe->full_bio_end_io; full_bio->bi_private = pe->full_bio_private; } - free_pending_exception(pe); - increment_pending_exceptions_done_count(); up_write(&s->lock); @@ -1457,6 +1455,8 @@ static void pending_complete(struct dm_snap_pending_exception *pe, int success) } retry_origin_bios(s, origin_bios); + + free_pending_exception(pe); } static void commit_callback(void *context, int success) From 52857af3bd2a2673d45466f3519304ad6ce1c05e Mon Sep 17 00:00:00 2001 From: Ian Abbott Date: Mon, 19 Jan 2015 14:47:27 +0000 Subject: [PATCH 197/204] staging: comedi: cb_pcidas64: fix incorrect AI range code handling commit be8e89087ec2d2c8a1ad1e3db64bf4efdfc3c298 upstream. The hardware range code values and list of valid ranges for the AI subdevice is incorrect for several supported boards. The hardware range code values for all boards except PCI-DAS4020/12 is determined by calling `ai_range_bits_6xxx()` based on the maximum voltage of the range and whether it is bipolar or unipolar, however it only returns the correct hardware range code for the PCI-DAS60xx boards. For PCI-DAS6402/16 (and /12) it returns the wrong code for the unipolar ranges. For PCI-DAS64/Mx/16 it returns the wrong code for all the ranges and the comedi range table is incorrect. Change `ai_range_bits_6xxx()` to use a look-up table pointed to by new member `ai_range_codes` of `struct pcidas64_board` to map the comedi range table indices to the hardware range codes. Use a new comedi range table for the PCI-DAS64/Mx/16 boards (and the commented out variants). Signed-off-by: Ian Abbott Signed-off-by: Greg Kroah-Hartman --- drivers/staging/comedi/drivers/cb_pcidas64.c | 122 ++++++++++++------- 1 file changed, 75 insertions(+), 47 deletions(-) diff --git a/drivers/staging/comedi/drivers/cb_pcidas64.c b/drivers/staging/comedi/drivers/cb_pcidas64.c index c3e5495b4f06..4220a44186c4 100644 --- a/drivers/staging/comedi/drivers/cb_pcidas64.c +++ b/drivers/staging/comedi/drivers/cb_pcidas64.c @@ -455,6 +455,29 @@ static const struct comedi_lrange ai_ranges_64xx = { } }; +static const uint8_t ai_range_code_64xx[8] = { + 0x0, 0x1, 0x2, 0x3, /* bipolar 10, 5, 2,5, 1.25 */ + 0x8, 0x9, 0xa, 0xb /* unipolar 10, 5, 2.5, 1.25 */ +}; + +/* analog input ranges for 64-Mx boards */ +static const struct comedi_lrange ai_ranges_64_mx = { + 7, { + BIP_RANGE(5), + BIP_RANGE(2.5), + BIP_RANGE(1.25), + BIP_RANGE(0.625), + UNI_RANGE(5), + UNI_RANGE(2.5), + UNI_RANGE(1.25) + } +}; + +static const uint8_t ai_range_code_64_mx[7] = { + 0x0, 0x1, 0x2, 0x3, /* bipolar 5, 2.5, 1.25, 0.625 */ + 0x9, 0xa, 0xb /* unipolar 5, 2.5, 1.25 */ +}; + /* analog input ranges for 60xx boards */ static const struct comedi_lrange ai_ranges_60xx = { 4, @@ -466,6 +489,10 @@ static const struct comedi_lrange ai_ranges_60xx = { } }; +static const uint8_t ai_range_code_60xx[4] = { + 0x0, 0x1, 0x4, 0x7 /* bipolar 10, 5, 0.5, 0.05 */ +}; + /* analog input ranges for 6030, etc boards */ static const struct comedi_lrange ai_ranges_6030 = { 14, @@ -487,6 +514,11 @@ static const struct comedi_lrange ai_ranges_6030 = { } }; +static const uint8_t ai_range_code_6030[14] = { + 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, /* bip 10, 5, 2, 1, 0.5, 0.2, 0.1 */ + 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf /* uni 10, 5, 2, 1, 0.5, 0.2, 0.1 */ +}; + /* analog input ranges for 6052, etc boards */ static const struct comedi_lrange ai_ranges_6052 = { 15, @@ -509,6 +541,11 @@ static const struct comedi_lrange ai_ranges_6052 = { } }; +static const uint8_t ai_range_code_6052[15] = { + 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, /* bipolar 10 ... 0.05 */ + 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf /* unipolar 10 ... 0.1 */ +}; + /* analog input ranges for 4020 board */ static const struct comedi_lrange ai_ranges_4020 = { 2, @@ -616,6 +653,7 @@ struct pcidas64_board { int ai_bits; /* analog input resolution */ int ai_speed; /* fastest conversion period in ns */ const struct comedi_lrange *ai_range_table; + const uint8_t *ai_range_code; int ao_nchan; /* number of analog out channels */ int ao_bits; /* analog output resolution */ int ao_scan_speed; /* analog output scan speed */ @@ -674,6 +712,7 @@ static const struct pcidas64_board pcidas64_boards[] = { .ao_scan_speed = 10000, .layout = LAYOUT_64XX, .ai_range_table = &ai_ranges_64xx, + .ai_range_code = ai_range_code_64xx, .ao_range_table = &ao_ranges_64xx, .ao_range_code = ao_range_code_64xx, .ai_fifo = &ai_fifo_64xx, @@ -689,6 +728,7 @@ static const struct pcidas64_board pcidas64_boards[] = { .ao_scan_speed = 10000, .layout = LAYOUT_64XX, .ai_range_table = &ai_ranges_64xx, + .ai_range_code = ai_range_code_64xx, .ao_range_table = &ao_ranges_64xx, .ao_range_code = ao_range_code_64xx, .ai_fifo = &ai_fifo_64xx, @@ -703,7 +743,8 @@ static const struct pcidas64_board pcidas64_boards[] = { .ao_bits = 16, .ao_scan_speed = 10000, .layout = LAYOUT_64XX, - .ai_range_table = &ai_ranges_64xx, + .ai_range_table = &ai_ranges_64_mx, + .ai_range_code = ai_range_code_64_mx, .ao_range_table = &ao_ranges_64xx, .ao_range_code = ao_range_code_64xx, .ai_fifo = &ai_fifo_64xx, @@ -718,7 +759,8 @@ static const struct pcidas64_board pcidas64_boards[] = { .ao_bits = 16, .ao_scan_speed = 10000, .layout = LAYOUT_64XX, - .ai_range_table = &ai_ranges_64xx, + .ai_range_table = &ai_ranges_64_mx, + .ai_range_code = ai_range_code_64_mx, .ao_range_table = &ao_ranges_64xx, .ao_range_code = ao_range_code_64xx, .ai_fifo = &ai_fifo_64xx, @@ -733,7 +775,8 @@ static const struct pcidas64_board pcidas64_boards[] = { .ao_bits = 16, .ao_scan_speed = 10000, .layout = LAYOUT_64XX, - .ai_range_table = &ai_ranges_64xx, + .ai_range_table = &ai_ranges_64_mx, + .ai_range_code = ai_range_code_64_mx, .ao_range_table = &ao_ranges_64xx, .ao_range_code = ao_range_code_64xx, .ai_fifo = &ai_fifo_64xx, @@ -748,6 +791,7 @@ static const struct pcidas64_board pcidas64_boards[] = { .ao_bits = 16, .layout = LAYOUT_60XX, .ai_range_table = &ai_ranges_60xx, + .ai_range_code = ai_range_code_60xx, .ao_range_table = &range_bipolar10, .ao_range_code = ao_range_code_60xx, .ai_fifo = &ai_fifo_60xx, @@ -763,6 +807,7 @@ static const struct pcidas64_board pcidas64_boards[] = { .ao_scan_speed = 100000, .layout = LAYOUT_60XX, .ai_range_table = &ai_ranges_60xx, + .ai_range_code = ai_range_code_60xx, .ao_range_table = &range_bipolar10, .ao_range_code = ao_range_code_60xx, .ai_fifo = &ai_fifo_60xx, @@ -777,6 +822,7 @@ static const struct pcidas64_board pcidas64_boards[] = { .ao_scan_speed = 100000, .layout = LAYOUT_60XX, .ai_range_table = &ai_ranges_60xx, + .ai_range_code = ai_range_code_60xx, .ao_range_table = &range_bipolar10, .ao_range_code = ao_range_code_60xx, .ai_fifo = &ai_fifo_60xx, @@ -792,6 +838,7 @@ static const struct pcidas64_board pcidas64_boards[] = { .ao_scan_speed = 100000, .layout = LAYOUT_60XX, .ai_range_table = &ai_ranges_60xx, + .ai_range_code = ai_range_code_60xx, .ao_range_table = &range_bipolar10, .ao_range_code = ao_range_code_60xx, .ai_fifo = &ai_fifo_60xx, @@ -807,6 +854,7 @@ static const struct pcidas64_board pcidas64_boards[] = { .ao_scan_speed = 10000, .layout = LAYOUT_60XX, .ai_range_table = &ai_ranges_6030, + .ai_range_code = ai_range_code_6030, .ao_range_table = &ao_ranges_6030, .ao_range_code = ao_range_code_6030, .ai_fifo = &ai_fifo_60xx, @@ -822,6 +870,7 @@ static const struct pcidas64_board pcidas64_boards[] = { .ao_scan_speed = 10000, .layout = LAYOUT_60XX, .ai_range_table = &ai_ranges_6030, + .ai_range_code = ai_range_code_6030, .ao_range_table = &ao_ranges_6030, .ao_range_code = ao_range_code_6030, .ai_fifo = &ai_fifo_60xx, @@ -835,6 +884,7 @@ static const struct pcidas64_board pcidas64_boards[] = { .ao_nchan = 0, .layout = LAYOUT_60XX, .ai_range_table = &ai_ranges_6030, + .ai_range_code = ai_range_code_6030, .ai_fifo = &ai_fifo_60xx, .has_8255 = 0, }, @@ -846,6 +896,7 @@ static const struct pcidas64_board pcidas64_boards[] = { .ao_nchan = 0, .layout = LAYOUT_60XX, .ai_range_table = &ai_ranges_6030, + .ai_range_code = ai_range_code_6030, .ai_fifo = &ai_fifo_60xx, .has_8255 = 0, }, @@ -858,6 +909,7 @@ static const struct pcidas64_board pcidas64_boards[] = { .ao_scan_speed = 0, .layout = LAYOUT_60XX, .ai_range_table = &ai_ranges_60xx, + .ai_range_code = ai_range_code_60xx, .ai_fifo = &ai_fifo_60xx, .has_8255 = 0, }, @@ -871,6 +923,7 @@ static const struct pcidas64_board pcidas64_boards[] = { .ao_scan_speed = 100000, .layout = LAYOUT_60XX, .ai_range_table = &ai_ranges_60xx, + .ai_range_code = ai_range_code_60xx, .ao_range_table = &range_bipolar10, .ao_range_code = ao_range_code_60xx, .ai_fifo = &ai_fifo_60xx, @@ -886,6 +939,7 @@ static const struct pcidas64_board pcidas64_boards[] = { .ao_scan_speed = 100000, .layout = LAYOUT_60XX, .ai_range_table = &ai_ranges_60xx, + .ai_range_code = ai_range_code_60xx, .ao_range_table = &range_bipolar10, .ao_range_code = ao_range_code_60xx, .ai_fifo = &ai_fifo_60xx, @@ -901,6 +955,7 @@ static const struct pcidas64_board pcidas64_boards[] = { .ao_scan_speed = 1000, .layout = LAYOUT_60XX, .ai_range_table = &ai_ranges_6052, + .ai_range_code = ai_range_code_6052, .ao_range_table = &ao_ranges_6030, .ao_range_code = ao_range_code_6030, .ai_fifo = &ai_fifo_60xx, @@ -916,6 +971,7 @@ static const struct pcidas64_board pcidas64_boards[] = { .ao_scan_speed = 3333, .layout = LAYOUT_60XX, .ai_range_table = &ai_ranges_6052, + .ai_range_code = ai_range_code_6052, .ao_range_table = &ao_ranges_6030, .ao_range_code = ao_range_code_6030, .ai_fifo = &ai_fifo_60xx, @@ -931,6 +987,7 @@ static const struct pcidas64_board pcidas64_boards[] = { .ao_scan_speed = 1000, .layout = LAYOUT_60XX, .ai_range_table = &ai_ranges_6052, + .ai_range_code = ai_range_code_6052, .ao_range_table = &ao_ranges_6030, .ao_range_code = ao_range_code_6030, .ai_fifo = &ai_fifo_60xx, @@ -946,6 +1003,7 @@ static const struct pcidas64_board pcidas64_boards[] = { .ao_scan_speed = 1000, .layout = LAYOUT_60XX, .ai_range_table = &ai_ranges_6052, + .ai_range_code = ai_range_code_6052, .ao_range_table = &ao_ranges_6030, .ao_range_code = ao_range_code_6030, .ai_fifo = &ai_fifo_60xx, @@ -980,6 +1038,7 @@ static const struct pcidas64_board pcidas64_boards[] = { .ao_scan_speed = 10000, .layout = LAYOUT_64XX, .ai_range_table = &ai_ranges_64xx, + .ai_range_code = ai_range_code_64xx, .ai_fifo = ai_fifo_64xx, .has_8255 = 1, }, @@ -991,7 +1050,8 @@ static const struct pcidas64_board pcidas64_boards[] = { .ao_nchan = 0, .ao_scan_speed = 10000, .layout = LAYOUT_64XX, - .ai_range_table = &ai_ranges_64xx, + .ai_range_table = &ai_ranges_64_mx, + .ai_range_code = ai_range_code_64_mx, .ai_fifo = ai_fifo_64xx, .has_8255 = 1, }, @@ -1003,7 +1063,8 @@ static const struct pcidas64_board pcidas64_boards[] = { .ao_nchan = 0, .ao_scan_speed = 10000, .layout = LAYOUT_64XX, - .ai_range_table = &ai_ranges_64xx, + .ai_range_table = &ai_ranges_64_mx, + .ai_range_code = ai_range_code_64_mx, .ai_fifo = ai_fifo_64xx, .has_8255 = 1, }, @@ -1015,7 +1076,8 @@ static const struct pcidas64_board pcidas64_boards[] = { .ao_nchan = 0, .ao_scan_speed = 10000, .layout = LAYOUT_64XX, - .ai_range_table = &ai_ranges_64xx, + .ai_range_table = &ai_ranges_64_mx, + .ai_range_code = ai_range_code_64_mx, .ai_fifo = ai_fifo_64xx, .has_8255 = 1, }, @@ -1027,7 +1089,8 @@ static const struct pcidas64_board pcidas64_boards[] = { .ao_nchan = 2, .ao_scan_speed = 10000, .layout = LAYOUT_64XX, - .ai_range_table = &ai_ranges_64xx, + .ai_range_table = &ai_ranges_64_mx, + .ai_range_code = ai_range_code_64_mx, .ai_fifo = ai_fifo_64xx, .has_8255 = 1, }, @@ -1039,7 +1102,8 @@ static const struct pcidas64_board pcidas64_boards[] = { .ao_nchan = 2, .ao_scan_speed = 10000, .layout = LAYOUT_64XX, - .ai_range_table = &ai_ranges_64xx, + .ai_range_table = &ai_ranges_64_mx, + .ai_range_code = ai_range_code_64_mx, .ai_fifo = ai_fifo_64xx, .has_8255 = 1, }, @@ -1051,7 +1115,8 @@ static const struct pcidas64_board pcidas64_boards[] = { .ao_nchan = 2, .ao_scan_speed = 10000, .layout = LAYOUT_64XX, - .ai_range_table = &ai_ranges_64xx, + .ai_range_table = &ai_ranges_64_mx, + .ai_range_code = ai_range_code_64_mx, .ai_fifo = ai_fifo_64xx, .has_8255 = 1, }, @@ -1148,45 +1213,8 @@ static unsigned int ai_range_bits_6xxx(const struct comedi_device *dev, unsigned int range_index) { const struct pcidas64_board *thisboard = comedi_board(dev); - const struct comedi_krange *range = - &thisboard->ai_range_table->range[range_index]; - unsigned int bits = 0; - switch (range->max) { - case 10000000: - bits = 0x000; - break; - case 5000000: - bits = 0x100; - break; - case 2000000: - case 2500000: - bits = 0x200; - break; - case 1000000: - case 1250000: - bits = 0x300; - break; - case 500000: - bits = 0x400; - break; - case 200000: - case 250000: - bits = 0x500; - break; - case 100000: - bits = 0x600; - break; - case 50000: - bits = 0x700; - break; - default: - comedi_error(dev, "bug! in ai_range_bits_6xxx"); - break; - } - if (range->min == 0) - bits += 0x900; - return bits; + return thisboard->ai_range_code[range_index] << 8; } static unsigned int hw_revision(const struct comedi_device *dev, From 3e886ecbf9b9543b5e2e6dac3b808b9a3c1552a8 Mon Sep 17 00:00:00 2001 From: David Herrmann Date: Mon, 29 Dec 2014 15:21:26 +0100 Subject: [PATCH 198/204] HID: input: fix confusion on conflicting mappings commit 6ce901eb61aa30ba8565c62049ee80c90728ef14 upstream. On an PC-101/103/104 keyboard (American layout) the 'Enter' key and its neighbours look like this: +---+ +---+ +-------+ | 1 | | 2 | | 5 | +---+ +---+ +-------+ +---+ +-----------+ | 3 | | 4 | +---+ +-----------+ On a PC-102/105 keyboard (European layout) it looks like this: +---+ +---+ +-------+ | 1 | | 2 | | | +---+ +---+ +-+ 4 | +---+ +---+ | | | 3 | | 5 | | | +---+ +---+ +-----+ (Note that the number of keys is the same, but key '5' is moved down and the shape of key '4' is changed. Keys '1' to '3' are exactly the same.) The keys 1-4 report the same scan-code in HID in both layouts, even though the keysym they produce is usually different depending on the XKB-keymap used by user-space. However, key '5' (US 'backslash'/'pipe') reports 0x31 for the upper layout and 0x32 for the lower layout, as defined by the HID spec. This is highly confusing as the linux-input API uses a single keycode for both. So far, this was never a problem as there never has been a keyboard with both of those keys present at the same time. It would have to look something like this: +---+ +---+ +-------+ | 1 | | 2 | | x31 | +---+ +---+ +-------+ +---+ +---+ +-----+ | 3 | |x32| | 4 | +---+ +---+ +-----+ HID can represent such a keyboard, but the linux-input API cannot. Furthermore, any user-space mapping would be confused by this and, luckily, no-one ever produced such hardware. Now, the HID input layer fixed this mess by mapping both 0x31 and 0x32 to the same keycode (KEY_BACKSLASH==0x2b). As only one of both physical keys is present on a hardware, this works just fine. Lets introduce hardware-vendors into this: ------------------------------------------ Unfortunately, it seems way to expensive to produce a different device for American and European layouts. Therefore, hardware-vendors put both keys, (0x31 and 0x32) on the same keyboard, but only one of them is hooked up to the physical button, the other one is 'dead'. This means, they can use the same hardware, with a different button-layout and automatically produce the correct HID events for American *and* European layouts. This is unproblematic for normal keyboards, as the 'dead' key will never report any KEY-DOWN events. But RollOver keyboards send the whole matrix on each key-event, allowing n-key roll-over mode. This means, we get a 0x31 and 0x32 event on each key-press. One of them will always be 0, the other reports the real state. As we map both to the same keycode, we will get spurious key-events, even though the real key-state never changed. The easiest way would be to blacklist 'dead' keys and never handle those. We could simply read the 'country' tag of USB devices and blacklist either key according to the layout. But... hardware vendors... want the same device for all countries and thus many of them set 'country' to 0 for all devices. Meh.. So we have to deal with this properly. As we cannot know which of the keys is 'dead', we either need a heuristic and track those keys, or we simply make use of our value-tracking for HID fields. We simply ignore HID events for absolute data if the data didn't change. As HID tracks events on the HID level, we haven't done the keycode translation, yet. Therefore, the 'dead' key is tracked independently of the real key, therefore, any events on it will be ignored. This patch simply discards any HID events for absolute data if it didn't change compared to the last report. We need to ignore relative and buffered-byte reports for obvious reasons. But those cannot be affected by this bug, so we're fine. Preferably, we'd do this filtering on the HID-core level. But this might break a lot of custom drivers, if they do not follow the HID specs. Therefore, we do this late in hid-input just before we inject it into the input layer (which does the exact same filtering, but on the keycode level). If this turns out to break some devices, we might have to limit filtering to EV_KEY events. But lets try to do the Right Thing first, and properly filter any absolute data that didn't change. This patch is tagged for 'stable' as it fixes a lot of n-key RollOver hardware. We might wanna wait with backporting for a while, before we know it doesn't break anything else, though. Reported-by: Adam Goode Reported-by: Fredrik Hallenberg Tested-by: Fredrik Hallenberg Signed-off-by: David Herrmann Signed-off-by: Jiri Kosina Signed-off-by: Greg Kroah-Hartman --- drivers/hid/hid-input.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/drivers/hid/hid-input.c b/drivers/hid/hid-input.c index 03a6acffed5d..8b264bd919ac 100644 --- a/drivers/hid/hid-input.c +++ b/drivers/hid/hid-input.c @@ -1066,6 +1066,22 @@ void hidinput_hid_event(struct hid_device *hid, struct hid_field *field, struct return; } + /* + * Ignore reports for absolute data if the data didn't change. This is + * not only an optimization but also fixes 'dead' key reports. Some + * RollOver implementations for localized keys (like BACKSLASH/PIPE; HID + * 0x31 and 0x32) report multiple keys, even though a localized keyboard + * can only have one of them physically available. The 'dead' keys + * report constant 0. As all map to the same keycode, they'd confuse + * the input layer. If we filter the 'dead' keys on the HID level, we + * skip the keycode translation and only forward real events. + */ + if (!(field->flags & (HID_MAIN_ITEM_RELATIVE | + HID_MAIN_ITEM_BUFFERED_BYTE)) && + usage->usage_index < field->maxusage && + value == field->value[usage->usage_index]) + return; + /* report the usage code as scancode if the key status has changed */ if (usage->type == EV_KEY && !!test_bit(usage->code, input->key) != value) input_event(input, EV_MSC, MSC_SCAN, usage->hid); From 0ec88c962fa283e437524c070aa135b2d47ae929 Mon Sep 17 00:00:00 2001 From: Jiri Kosina Date: Tue, 6 Jan 2015 22:34:19 +0100 Subject: [PATCH 199/204] HID: fixup the conflicting keyboard mappings quirk commit 8e7b341037db1835ee6eea64663013cbfcf33575 upstream. The ignore check that got added in 6ce901eb61 ("HID: input: fix confusion on conflicting mappings") needs to properly check for VARIABLE reports as well (ARRAY reports should be ignored), otherwise legitimate keyboards might break. Fixes: 6ce901eb61 ("HID: input: fix confusion on conflicting mappings") Reported-by: Fredrik Hallenberg Reported-by: David Herrmann Signed-off-by: Jiri Kosina Signed-off-by: Greg Kroah-Hartman --- drivers/hid/hid-input.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/hid/hid-input.c b/drivers/hid/hid-input.c index 8b264bd919ac..a3915d12e746 100644 --- a/drivers/hid/hid-input.c +++ b/drivers/hid/hid-input.c @@ -1078,6 +1078,7 @@ void hidinput_hid_event(struct hid_device *hid, struct hid_field *field, struct */ if (!(field->flags & (HID_MAIN_ITEM_RELATIVE | HID_MAIN_ITEM_BUFFERED_BYTE)) && + (field->flags & HID_MAIN_ITEM_VARIABLE) && usage->usage_index < field->maxusage && value == field->value[usage->usage_index]) return; From a370f956f5ff526e98a6fc7e71ac4175ad0b2503 Mon Sep 17 00:00:00 2001 From: Fernando Soto Date: Fri, 14 Jun 2013 23:13:35 +0000 Subject: [PATCH 200/204] Drivers: hv: vmbus: incorrect device name is printed when child device is unregistered MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 84672369ffb98a51d4ddf74c20a23636da3ad615 upstream. Whenever a device is unregistered in vmbus_device_unregister (drivers/hv/vmbus_drv.c), the device name in the log message may contain garbage as the memory has already been freed by the time pr_info is called. Log example: [ 3149.170475] hv_vmbus: child device àõsèè0_5 unregistered By logging the message just before calling device_unregister, the correct device name is printed: [ 3145.034652] hv_vmbus: child device vmbus_0_5 unregistered Also changing register & unregister messages to debug to avoid unnecessarily cluttering the kernel log. Signed-off-by: Fernando M Soto Signed-off-by: K. Y. Srinivasan Cc: Joseph Salisbury Signed-off-by: Greg Kroah-Hartman --- drivers/hv/vmbus_drv.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c index 4004e54ef05d..f445b0840d33 100644 --- a/drivers/hv/vmbus_drv.c +++ b/drivers/hv/vmbus_drv.c @@ -686,7 +686,7 @@ int vmbus_device_register(struct hv_device *child_device_obj) if (ret) pr_err("Unable to register child device\n"); else - pr_info("child device %s registered\n", + pr_debug("child device %s registered\n", dev_name(&child_device_obj->device)); return ret; @@ -698,14 +698,14 @@ int vmbus_device_register(struct hv_device *child_device_obj) */ void vmbus_device_unregister(struct hv_device *device_obj) { + pr_debug("child device %s unregistered\n", + dev_name(&device_obj->device)); + /* * Kick off the process of unregistering the device. * This will call vmbus_remove() and eventually vmbus_device_release() */ device_unregister(&device_obj->device); - - pr_info("child device %s unregistered\n", - dev_name(&device_obj->device)); } From 7f0240c5736ec77841f3cc3e0a91c2a8a1fa9357 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Thu, 19 Feb 2015 16:02:15 -0500 Subject: [PATCH 201/204] drm/radeon: fix 1 RB harvest config setup for TN/RL commit dbfb00c3e7e18439f2ebf67fe99bf7a50b5bae1e upstream. The logic was reversed from what the hw actually exposed. Fixes graphics corruption in certain harvest configurations. Signed-off-by: Alex Deucher Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/radeon/ni.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/radeon/ni.c b/drivers/gpu/drm/radeon/ni.c index 451d7886644c..c254e467ac62 100644 --- a/drivers/gpu/drm/radeon/ni.c +++ b/drivers/gpu/drm/radeon/ni.c @@ -930,12 +930,12 @@ static void cayman_gpu_init(struct radeon_device *rdev) if ((rdev->config.cayman.max_backends_per_se == 1) && (rdev->flags & RADEON_IS_IGP)) { - if ((disabled_rb_mask & 3) == 1) { - /* RB0 disabled, RB1 enabled */ - tmp = 0x11111111; - } else { + if ((disabled_rb_mask & 3) == 2) { /* RB1 disabled, RB0 enabled */ tmp = 0x00000000; + } else { + /* RB0 disabled, RB1 enabled */ + tmp = 0x11111111; } } else { tmp = gb_addr_config & NUM_PIPES_MASK; From 8686fc3d2fb81fbcc91f873d7227069d21fc2fcf Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Sun, 1 Mar 2015 10:41:37 +0000 Subject: [PATCH 202/204] ACPI / video: Load the module even if ACPI is disabled commit 6e17cb12881ba8d5e456b89f072dc6b70048af36 upstream. i915.ko depends upon the acpi/video.ko module and so refuses to load if ACPI is disabled at runtime if for example the BIOS is broken beyond repair. acpi/video provides an optional service for i915.ko and so we should just allow the modules to load, but do no nothing in order to let the machines boot correctly. Reported-by: Bill Augur Signed-off-by: Chris Wilson Cc: Daniel Vetter Cc: Jani Nikula Acked-by: Aaron Lu [ rjw: Fixed up the new comment in acpi_video_init() ] Signed-off-by: Rafael J. Wysocki Signed-off-by: Greg Kroah-Hartman --- drivers/acpi/video.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/drivers/acpi/video.c b/drivers/acpi/video.c index 82a01cc45f9c..0dc9ff61d7c2 100644 --- a/drivers/acpi/video.c +++ b/drivers/acpi/video.c @@ -1953,6 +1953,17 @@ EXPORT_SYMBOL(acpi_video_unregister); static int __init acpi_video_init(void) { + /* + * Let the module load even if ACPI is disabled (e.g. due to + * a broken BIOS) so that i915.ko can still be loaded on such + * old systems without an AcpiOpRegion. + * + * acpi_video_register() will report -ENODEV later as well due + * to acpi_disabled when i915.ko tries to register itself afterwards. + */ + if (acpi_disabled) + return 0; + dmi_check_system(video_dmi_table); if (intel_opregion_present()) From 868fd3d3e338c81232050a0519b86e7d6b6462be Mon Sep 17 00:00:00 2001 From: Sergey Ryazanov Date: Wed, 4 Feb 2015 00:21:13 +0300 Subject: [PATCH 203/204] ath5k: fix spontaneus AR5312 freezes commit 8bfae4f9938b6c1f033a5159febe97e441d6d526 upstream. Sometimes while CPU have some load and ath5k doing the wireless interface reset the whole WiSoC completely freezes. Set of tests shows that using atomic delay function while we wait interface reset helps to avoid such freezes. The easiest way to reproduce this issue: create a station interface, start continous scan with wpa_supplicant and load CPU by something. Or just create multiple station interfaces and put them all in continous scan. This patch partially reverts the commit 1846ac3dbec0 ("ath5k: Use usleep_range where possible"), which replaces initial udelay() by usleep_range(). I do not know actual source of this issue, but all looks like that HW freeze is caused by transaction on internal SoC bus, while wireless block is in reset state. Also I should note that I do not know how many chips are affected, but I did not see this issue with chips, other than AR5312. CC: Jiri Slaby CC: Nick Kossifidis CC: Luis R. Rodriguez Fixes: 1846ac3dbec0 ("ath5k: Use usleep_range where possible") Reported-by: Christophe Prevotaux Tested-by: Christophe Prevotaux Tested-by: Eric Bree Signed-off-by: Sergey Ryazanov Signed-off-by: Kalle Valo Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/ath/ath5k/reset.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/ath/ath5k/reset.c b/drivers/net/wireless/ath/ath5k/reset.c index a3399c4f13a9..b9b651ea9851 100644 --- a/drivers/net/wireless/ath/ath5k/reset.c +++ b/drivers/net/wireless/ath/ath5k/reset.c @@ -478,7 +478,7 @@ ath5k_hw_wisoc_reset(struct ath5k_hw *ah, u32 flags) regval = ioread32(reg); iowrite32(regval | val, reg); regval = ioread32(reg); - usleep_range(100, 150); + udelay(100); /* NB: should be atomic */ /* Bring BB/MAC out of reset */ iowrite32(regval & ~val, reg); From 7f4e64246049cef5ae1eca37eec1701a9477799e Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 18 Mar 2015 13:22:50 +0100 Subject: [PATCH 204/204] Linux 3.10.72 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index d8b42f71ea5a..211bb34102bf 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,6 @@ VERSION = 3 PATCHLEVEL = 10 -SUBLEVEL = 71 +SUBLEVEL = 72 EXTRAVERSION = NAME = TOSSUG Baby Fish