From b101957a5871ec616e18a8a6f0330d0e06a05754 Mon Sep 17 00:00:00 2001 From: Michael Neuling Date: Mon, 24 Jun 2013 15:47:22 +1000 Subject: [PATCH 001/102] powerpc/hw_brk: Fix setting of length for exact mode breakpoints commit b0b0aa9c7faf94e92320eabd8a1786c7747e40a8 upstream. The smallest match region for both the DABR and DAWR is 8 bytes, so the kernel needs to filter matches when users want to look at regions smaller than this. Currently we set the length of PPC_BREAKPOINT_MODE_EXACT breakpoints to 8. This is wrong as in exact mode we should only match on 1 address, hence the length should be 1. This ensures that the kernel will filter out any exact mode hardware breakpoint matches on any addresses other than the requested one. Signed-off-by: Michael Neuling Reported-by: Edjunior Barbosa Machado Signed-off-by: Benjamin Herrenschmidt Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/kernel/ptrace.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/kernel/ptrace.c b/arch/powerpc/kernel/ptrace.c index 98c2fc198712..64f7bd5b1b0f 100644 --- a/arch/powerpc/kernel/ptrace.c +++ b/arch/powerpc/kernel/ptrace.c @@ -1449,7 +1449,9 @@ static long ppc_set_hwdebug(struct task_struct *child, */ if (bp_info->addr_mode == PPC_BREAKPOINT_MODE_RANGE_INCLUSIVE) { len = bp_info->addr2 - bp_info->addr; - } else if (bp_info->addr_mode != PPC_BREAKPOINT_MODE_EXACT) { + } else if (bp_info->addr_mode == PPC_BREAKPOINT_MODE_EXACT) + len = 1; + else { ptrace_put_breakpoints(child); return -EINVAL; } From 277b5ae153a96d594ae6d61b244b936d6f77ff56 Mon Sep 17 00:00:00 2001 From: Michael Neuling Date: Mon, 24 Jun 2013 15:47:23 +1000 Subject: [PATCH 002/102] powerpc/hw_brk: Fix clearing of extraneous IRQ commit 540e07c67efe42ef6b6be4f1956931e676d58a15 upstream. In 9422de3 "powerpc: Hardware breakpoints rewrite to handle non DABR breakpoint registers" we changed the way we mark extraneous irqs with this: - info->extraneous_interrupt = !((bp->attr.bp_addr <= dar) && - (dar - bp->attr.bp_addr < bp->attr.bp_len)); + if (!((bp->attr.bp_addr <= dar) && + (dar - bp->attr.bp_addr < bp->attr.bp_len))) + info->type |= HW_BRK_TYPE_EXTRANEOUS_IRQ; Unfortunately this is bogus as it never clears extraneous IRQ if it's already set. This correctly clears extraneous IRQ before possibly setting it. Signed-off-by: Michael Neuling Reported-by: Edjunior Barbosa Machado Signed-off-by: Benjamin Herrenschmidt Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/kernel/hw_breakpoint.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/powerpc/kernel/hw_breakpoint.c b/arch/powerpc/kernel/hw_breakpoint.c index a949bdfc9623..1150ae7c22c3 100644 --- a/arch/powerpc/kernel/hw_breakpoint.c +++ b/arch/powerpc/kernel/hw_breakpoint.c @@ -250,6 +250,7 @@ int __kprobes hw_breakpoint_handler(struct die_args *args) * we still need to single-step the instruction, but we don't * generate an event. */ + info->type &= ~HW_BRK_TYPE_EXTRANEOUS_IRQ; if (!((bp->attr.bp_addr <= dar) && (dar - bp->attr.bp_addr < bp->attr.bp_len))) info->type |= HW_BRK_TYPE_EXTRANEOUS_IRQ; From 3b743326ed2edd5e118950874f7bdaed5759f977 Mon Sep 17 00:00:00 2001 From: Michael Neuling Date: Mon, 1 Jul 2013 14:19:50 +1000 Subject: [PATCH 003/102] powerpc/hw_brk: Fix off by one error when validating DAWR region end commit e2a800beaca1f580945773e57d1a0e7cd37b1056 upstream. The Data Address Watchpoint Register (DAWR) on POWER8 can take a 512 byte range but this range must not cross a 512 byte boundary. Unfortunately we were off by one when calculating the end of the region, hence we were not allowing some breakpoint regions which were actually valid. This fixes this error. Signed-off-by: Michael Neuling Reported-by: Edjunior Barbosa Machado Signed-off-by: Benjamin Herrenschmidt Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/kernel/hw_breakpoint.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/kernel/hw_breakpoint.c b/arch/powerpc/kernel/hw_breakpoint.c index 1150ae7c22c3..f0b47d1a6b0e 100644 --- a/arch/powerpc/kernel/hw_breakpoint.c +++ b/arch/powerpc/kernel/hw_breakpoint.c @@ -176,7 +176,7 @@ int arch_validate_hwbkpt_settings(struct perf_event *bp) length_max = 512 ; /* 64 doublewords */ /* DAWR region can't cross 512 boundary */ if ((bp->attr.bp_addr >> 10) != - ((bp->attr.bp_addr + bp->attr.bp_len) >> 10)) + ((bp->attr.bp_addr + bp->attr.bp_len - 1) >> 10)) return -EINVAL; } if (info->len > From e544a74525b9accf4919822196f29cd967e7e5c0 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Mon, 1 Jul 2013 17:54:09 +1000 Subject: [PATCH 004/102] powerpc/powernv: Fix iommu initialization again commit 74251fe21bfa9310ddba9e0436d1fcf389e602ee upstream. So because those things always end up in trainwrecks... In 7846de406 we moved back the iommu initialization earlier, essentially undoing 37f02195b which was causing us endless trouble... except that in the meantime we had merged 959c9bdd58 (to workaround the original breakage) which is now ... broken :-) This fixes it by doing a partial revert of the latter (we keep the ppc_md. path which will be needed in the hotplug case, which happens also during some EEH error recovery situations). Signed-off-by: Benjamin Herrenschmidt Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/platforms/powernv/pci-ioda.c | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index 9c9d15e4cdf2..7816beff1db8 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -441,6 +441,17 @@ static void pnv_pci_ioda_dma_dev_setup(struct pnv_phb *phb, struct pci_dev *pdev set_iommu_table_base(&pdev->dev, &pe->tce32_table); } +static void pnv_ioda_setup_bus_dma(struct pnv_ioda_pe *pe, struct pci_bus *bus) +{ + struct pci_dev *dev; + + list_for_each_entry(dev, &bus->devices, bus_list) { + set_iommu_table_base(&dev->dev, &pe->tce32_table); + if (dev->subordinate) + pnv_ioda_setup_bus_dma(pe, dev->subordinate); + } +} + static void pnv_pci_ioda1_tce_invalidate(struct iommu_table *tbl, u64 *startp, u64 *endp) { @@ -596,6 +607,11 @@ static void pnv_pci_ioda_setup_dma_pe(struct pnv_phb *phb, } iommu_init_table(tbl, phb->hose->node); + if (pe->pdev) + set_iommu_table_base(&pe->pdev->dev, tbl); + else + pnv_ioda_setup_bus_dma(pe, pe->pbus); + return; fail: /* XXX Failure: Try to fallback to 64-bit only ? */ @@ -667,6 +683,11 @@ static void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb *phb, } iommu_init_table(tbl, phb->hose->node); + if (pe->pdev) + set_iommu_table_base(&pe->pdev->dev, tbl); + else + pnv_ioda_setup_bus_dma(pe, pe->pbus); + return; fail: if (pe->tce32_seg >= 0) From d6ea4422c89da1b08029d4db10635942a3b29b2a Mon Sep 17 00:00:00 2001 From: Michael Neuling Date: Sun, 9 Jun 2013 21:23:15 +1000 Subject: [PATCH 005/102] powerpc/tm: Fix writing top half of MSR on 32 bit signals commit 1d25f11fdbcc5390d68efd98c28900bfd29b264c upstream. The MSR TM controls are in the top 32 bits of the MSR hence on 32 bit signals, we stick the top half of the MSR in the checkpointed signal context so that the user can access it. Unfortunately, we don't currently write anything to the checkpointed signal context when coming in a from a non transactional process and hence the top MSR bits can contain junk. This updates the 32 bit signal handling code to always write something to the top MSR bits so that users know if the process is transactional or not and the kernel can use it on signal return. Signed-off-by: Michael Neuling Signed-off-by: Benjamin Herrenschmidt Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/kernel/signal_32.c | 29 +++++++++++++++++++++-------- 1 file changed, 21 insertions(+), 8 deletions(-) diff --git a/arch/powerpc/kernel/signal_32.c b/arch/powerpc/kernel/signal_32.c index 201385c3a1ae..5bc819f50af6 100644 --- a/arch/powerpc/kernel/signal_32.c +++ b/arch/powerpc/kernel/signal_32.c @@ -407,7 +407,8 @@ inline unsigned long copy_transact_fpr_from_user(struct task_struct *task, * altivec/spe instructions at some point. */ static int save_user_regs(struct pt_regs *regs, struct mcontext __user *frame, - int sigret, int ctx_has_vsx_region) + struct mcontext __user *tm_frame, int sigret, + int ctx_has_vsx_region) { unsigned long msr = regs->msr; @@ -475,6 +476,12 @@ static int save_user_regs(struct pt_regs *regs, struct mcontext __user *frame, if (__put_user(msr, &frame->mc_gregs[PT_MSR])) return 1; + /* We need to write 0 the MSR top 32 bits in the tm frame so that we + * can check it on the restore to see if TM is active + */ + if (tm_frame && __put_user(0, &tm_frame->mc_gregs[PT_MSR])) + return 1; + if (sigret) { /* Set up the sigreturn trampoline: li r0,sigret; sc */ if (__put_user(0x38000000UL + sigret, &frame->tramp[0]) @@ -952,6 +959,7 @@ int handle_rt_signal32(unsigned long sig, struct k_sigaction *ka, { struct rt_sigframe __user *rt_sf; struct mcontext __user *frame; + struct mcontext __user *tm_frame = NULL; void __user *addr; unsigned long newsp = 0; int sigret; @@ -985,23 +993,24 @@ int handle_rt_signal32(unsigned long sig, struct k_sigaction *ka, } #ifdef CONFIG_PPC_TRANSACTIONAL_MEM + tm_frame = &rt_sf->uc_transact.uc_mcontext; if (MSR_TM_ACTIVE(regs->msr)) { - if (save_tm_user_regs(regs, &rt_sf->uc.uc_mcontext, - &rt_sf->uc_transact.uc_mcontext, sigret)) + if (save_tm_user_regs(regs, frame, tm_frame, sigret)) goto badframe; } else #endif - if (save_user_regs(regs, frame, sigret, 1)) + { + if (save_user_regs(regs, frame, tm_frame, sigret, 1)) goto badframe; + } regs->link = tramp; #ifdef CONFIG_PPC_TRANSACTIONAL_MEM if (MSR_TM_ACTIVE(regs->msr)) { if (__put_user((unsigned long)&rt_sf->uc_transact, &rt_sf->uc.uc_link) - || __put_user(to_user_ptr(&rt_sf->uc_transact.uc_mcontext), - &rt_sf->uc_transact.uc_regs)) + || __put_user((unsigned long)tm_frame, &rt_sf->uc_transact.uc_regs)) goto badframe; } else @@ -1170,7 +1179,7 @@ long sys_swapcontext(struct ucontext __user *old_ctx, mctx = (struct mcontext __user *) ((unsigned long) &old_ctx->uc_mcontext & ~0xfUL); if (!access_ok(VERIFY_WRITE, old_ctx, ctx_size) - || save_user_regs(regs, mctx, 0, ctx_has_vsx_region) + || save_user_regs(regs, mctx, NULL, 0, ctx_has_vsx_region) || put_sigset_t(&old_ctx->uc_sigmask, ¤t->blocked) || __put_user(to_user_ptr(mctx), &old_ctx->uc_regs)) return -EFAULT; @@ -1392,6 +1401,7 @@ int handle_signal32(unsigned long sig, struct k_sigaction *ka, { struct sigcontext __user *sc; struct sigframe __user *frame; + struct mcontext __user *tm_mctx = NULL; unsigned long newsp = 0; int sigret; unsigned long tramp; @@ -1425,6 +1435,7 @@ int handle_signal32(unsigned long sig, struct k_sigaction *ka, } #ifdef CONFIG_PPC_TRANSACTIONAL_MEM + tm_mctx = &frame->mctx_transact; if (MSR_TM_ACTIVE(regs->msr)) { if (save_tm_user_regs(regs, &frame->mctx, &frame->mctx_transact, sigret)) @@ -1432,8 +1443,10 @@ int handle_signal32(unsigned long sig, struct k_sigaction *ka, } else #endif - if (save_user_regs(regs, &frame->mctx, sigret, 1)) + { + if (save_user_regs(regs, &frame->mctx, tm_mctx, sigret, 1)) goto badframe; + } regs->link = tramp; From 743834135bc07e3bb9d6166607b874443fe1537c Mon Sep 17 00:00:00 2001 From: Michael Neuling Date: Sun, 9 Jun 2013 21:23:16 +1000 Subject: [PATCH 006/102] powerpc/tm: Fix 32 bit non-rt signals commit fee55450710dff32a13ae30b4129ec7b5a4b44d0 upstream. Currently sys_sigreturn() is TM unaware. Therefore, if we take a 32 bit signal without SIGINFO (non RT) inside a transaction, on signal return we don't restore the signal frame correctly. This checks if the signal frame being restoring is an active transaction, and if so, it copies the additional state to ptregs so it can be restored. Signed-off-by: Michael Neuling Signed-off-by: Benjamin Herrenschmidt Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/kernel/signal_32.c | 30 +++++++++++++++++++++++++----- 1 file changed, 25 insertions(+), 5 deletions(-) diff --git a/arch/powerpc/kernel/signal_32.c b/arch/powerpc/kernel/signal_32.c index 5bc819f50af6..fa81462f6987 100644 --- a/arch/powerpc/kernel/signal_32.c +++ b/arch/powerpc/kernel/signal_32.c @@ -1494,16 +1494,22 @@ int handle_signal32(unsigned long sig, struct k_sigaction *ka, long sys_sigreturn(int r3, int r4, int r5, int r6, int r7, int r8, struct pt_regs *regs) { + struct sigframe __user *sf; struct sigcontext __user *sc; struct sigcontext sigctx; struct mcontext __user *sr; void __user *addr; sigset_t set; +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM + struct mcontext __user *mcp, *tm_mcp; + unsigned long msr_hi; +#endif /* Always make any pending restarted system calls return -EINTR */ current_thread_info()->restart_block.fn = do_no_restart_syscall; - sc = (struct sigcontext __user *)(regs->gpr[1] + __SIGNAL_FRAMESIZE); + sf = (struct sigframe __user *)(regs->gpr[1] + __SIGNAL_FRAMESIZE); + sc = &sf->sctx; addr = sc; if (copy_from_user(&sigctx, sc, sizeof(sigctx))) goto badframe; @@ -1520,11 +1526,25 @@ long sys_sigreturn(int r3, int r4, int r5, int r6, int r7, int r8, #endif set_current_blocked(&set); - sr = (struct mcontext __user *)from_user_ptr(sigctx.regs); - addr = sr; - if (!access_ok(VERIFY_READ, sr, sizeof(*sr)) - || restore_user_regs(regs, sr, 1)) +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM + mcp = (struct mcontext __user *)&sf->mctx; + tm_mcp = (struct mcontext __user *)&sf->mctx_transact; + if (__get_user(msr_hi, &tm_mcp->mc_gregs[PT_MSR])) goto badframe; + if (MSR_TM_ACTIVE(msr_hi<<32)) { + if (!cpu_has_feature(CPU_FTR_TM)) + goto badframe; + if (restore_tm_user_regs(regs, mcp, tm_mcp)) + goto badframe; + } else +#endif + { + sr = (struct mcontext __user *)from_user_ptr(sigctx.regs); + addr = sr; + if (!access_ok(VERIFY_READ, sr, sizeof(*sr)) + || restore_user_regs(regs, sr, 1)) + goto badframe; + } set_thread_flag(TIF_RESTOREALL); return 0; From bc8ae5222e9e42582bd32028c638a5b4517e69e2 Mon Sep 17 00:00:00 2001 From: Michael Neuling Date: Sun, 9 Jun 2013 21:23:17 +1000 Subject: [PATCH 007/102] powerpc/tm: Fix restoration of MSR on 32bit signal return commit 2c27a18f8736da047bef2b997bdd48efc667e3c9 upstream. Currently we clear out the MSR TM bits on signal return assuming that the signal should never return to an active transaction. This is bogus as the user may do this. It's most likely the transaction will be doomed due to a treclaim but that's a problem for the HW not the kernel. The current code is a legacy of earlier kernel implementations which did software rollback of active transactions in the kernel. That code has now gone but we didn't correctly fix up this part of the signals code which still makes the assumption that it must be returning to a suspended transaction. This pulls out both MSR TM bits from the user supplied context rather than just setting TM suspend. We pull out only the bits needed to ensure the user can't do anything dangerous to the MSR. Signed-off-by: Michael Neuling Signed-off-by: Benjamin Herrenschmidt Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/kernel/signal_32.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/kernel/signal_32.c b/arch/powerpc/kernel/signal_32.c index fa81462f6987..364cb1e7300e 100644 --- a/arch/powerpc/kernel/signal_32.c +++ b/arch/powerpc/kernel/signal_32.c @@ -754,7 +754,7 @@ static long restore_tm_user_regs(struct pt_regs *regs, struct mcontext __user *tm_sr) { long err; - unsigned long msr; + unsigned long msr, msr_hi; #ifdef CONFIG_VSX int i; #endif @@ -859,8 +859,11 @@ static long restore_tm_user_regs(struct pt_regs *regs, tm_enable(); /* This loads the checkpointed FP/VEC state, if used */ tm_recheckpoint(¤t->thread, msr); - /* The task has moved into TM state S, so ensure MSR reflects this */ - regs->msr = (regs->msr & ~MSR_TS_MASK) | MSR_TS_S; + /* Get the top half of the MSR */ + if (__get_user(msr_hi, &tm_sr->mc_gregs[PT_MSR])) + return 1; + /* Pull in MSR TM from user context */ + regs->msr = (regs->msr & ~MSR_TS_MASK) | ((msr_hi<<32) & MSR_TS_MASK); /* This loads the speculative FP/VEC state, if used */ if (msr & MSR_FP) { From f6ff89fc47b05a061017200128ce154ae7165469 Mon Sep 17 00:00:00 2001 From: Michael Neuling Date: Sun, 9 Jun 2013 21:23:18 +1000 Subject: [PATCH 008/102] powerpc/tm: Fix return of 32bit rt signals to active transactions commit 55e4341850ac56e63a3eefe9583a9000042164fa upstream. Currently we only restore signals which are transactionally suspended but it's possible that the transaction can be restored even when it's active. Most likely this will result in a transactional rollback by the hardware as the transaction will have been doomed by an earlier treclaim. The current code is a legacy of earlier kernel implementations which did software rollback of active transactions in the kernel. That code has now gone but we didn't correctly fix up this part of the signals code which still makes assumptions based on having software rollback. This changes the signal return code to always restore both contexts on 32 bit rt signal return. Signed-off-by: Michael Neuling Signed-off-by: Benjamin Herrenschmidt Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/kernel/signal_32.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/kernel/signal_32.c b/arch/powerpc/kernel/signal_32.c index 364cb1e7300e..0f83122e6676 100644 --- a/arch/powerpc/kernel/signal_32.c +++ b/arch/powerpc/kernel/signal_32.c @@ -1245,7 +1245,7 @@ long sys_rt_sigreturn(int r3, int r4, int r5, int r6, int r7, int r8, if (__get_user(msr_hi, &mcp->mc_gregs[PT_MSR])) goto bad; - if (MSR_TM_SUSPENDED(msr_hi<<32)) { + if (MSR_TM_ACTIVE(msr_hi<<32)) { /* We only recheckpoint on return if we're * transaction. */ From 81bcd526fea6383be0361b1827fcac656d276d57 Mon Sep 17 00:00:00 2001 From: Michael Neuling Date: Sun, 9 Jun 2013 21:23:19 +1000 Subject: [PATCH 009/102] powerpc/tm: Fix return of active 64bit signals commit 87b4e5393af77f5cba124638f19f6c426e210aec upstream. Currently we only restore signals which are transactionally suspended but it's possible that the transaction can be restored even when it's active. Most likely this will result in a transactional rollback by the hardware as the transaction will have been doomed by an earlier treclaim. The current code is a legacy of earlier kernel implementations which did software rollback of active transactions in the kernel. That code has now gone but we didn't correctly fix up this part of the signals code which still makes assumptions based on having software rollback. This changes the signal return code to always restore both contexts on 64 bit signal return. It also ensures that the MSR TM bits are properly restored from the signal context which they are not currently. Signed-off-by: Michael Neuling Signed-off-by: Benjamin Herrenschmidt Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/kernel/signal_64.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/kernel/signal_64.c b/arch/powerpc/kernel/signal_64.c index 345947367ec0..887e99d85bc2 100644 --- a/arch/powerpc/kernel/signal_64.c +++ b/arch/powerpc/kernel/signal_64.c @@ -410,6 +410,10 @@ static long restore_tm_sigcontexts(struct pt_regs *regs, /* get MSR separately, transfer the LE bit if doing signal return */ err |= __get_user(msr, &sc->gp_regs[PT_MSR]); + /* pull in MSR TM from user context */ + regs->msr = (regs->msr & ~MSR_TS_MASK) | (msr & MSR_TS_MASK); + + /* pull in MSR LE from user context */ regs->msr = (regs->msr & ~MSR_LE) | (msr & MSR_LE); /* The following non-GPR non-FPR non-VR state is also checkpointed: */ @@ -505,8 +509,6 @@ static long restore_tm_sigcontexts(struct pt_regs *regs, tm_enable(); /* This loads the checkpointed FP/VEC state, if used */ tm_recheckpoint(¤t->thread, msr); - /* The task has moved into TM state S, so ensure MSR reflects this: */ - regs->msr = (regs->msr & ~MSR_TS_MASK) | __MASK(33); /* This loads the speculative FP/VEC state, if used */ if (msr & MSR_FP) { @@ -654,7 +656,7 @@ int sys_rt_sigreturn(unsigned long r3, unsigned long r4, unsigned long r5, #ifdef CONFIG_PPC_TRANSACTIONAL_MEM if (__get_user(msr, &uc->uc_mcontext.gp_regs[PT_MSR])) goto badframe; - if (MSR_TM_SUSPENDED(msr)) { + if (MSR_TM_ACTIVE(msr)) { /* We recheckpoint on return. */ struct ucontext __user *uc_transact; if (__get_user(uc_transact, &uc->uc_link)) From 497f0957430403bc4e3a0c776da3907fe769b64f Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Tue, 25 Jun 2013 17:47:54 +1000 Subject: [PATCH 010/102] powerpc: Remove unreachable relocation on exception handlers commit 1d567cb4bd42d560a7621cac6f6aebe87343689e upstream. We have relocation on exception handlers defined for h_data_storage and h_instr_storage. However we will never take relocation on exceptions for these because they can only come from a guest, and we never take relocation on exceptions when we transition from guest to host. We also have a handler for hmi_exception (Hypervisor Maintenance) which is defined in the architecture to never be delivered with relocation on, see see v2.07 Book III-S section 6.5. So remove the handlers, leaving a branch to self just to be double extra paranoid. Signed-off-by: Michael Ellerman Signed-off-by: Benjamin Herrenschmidt Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/kernel/exceptions-64s.S | 18 +++--------------- 1 file changed, 3 insertions(+), 15 deletions(-) diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index 40e4a17c8ba0..0a9fdea2fc0f 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -793,14 +793,10 @@ system_call_relon_pSeries: STD_RELON_EXCEPTION_PSERIES(0x4d00, 0xd00, single_step) . = 0x4e00 - SET_SCRATCH0(r13) - EXCEPTION_PROLOG_0(PACA_EXGEN) - b h_data_storage_relon_hv + b . /* Can't happen, see v2.07 Book III-S section 6.5 */ . = 0x4e20 - SET_SCRATCH0(r13) - EXCEPTION_PROLOG_0(PACA_EXGEN) - b h_instr_storage_relon_hv + b . /* Can't happen, see v2.07 Book III-S section 6.5 */ . = 0x4e40 SET_SCRATCH0(r13) @@ -808,9 +804,7 @@ system_call_relon_pSeries: b emulation_assist_relon_hv . = 0x4e60 - SET_SCRATCH0(r13) - EXCEPTION_PROLOG_0(PACA_EXGEN) - b hmi_exception_relon_hv + b . /* Can't happen, see v2.07 Book III-S section 6.5 */ . = 0x4e80 SET_SCRATCH0(r13) @@ -1180,14 +1174,8 @@ tm_unavailable_common: __end_handlers: /* Equivalents to the above handlers for relocation-on interrupt vectors */ - STD_RELON_EXCEPTION_HV_OOL(0xe00, h_data_storage) - KVM_HANDLER(PACA_EXGEN, EXC_HV, 0xe00) - STD_RELON_EXCEPTION_HV_OOL(0xe20, h_instr_storage) - KVM_HANDLER(PACA_EXGEN, EXC_HV, 0xe20) STD_RELON_EXCEPTION_HV_OOL(0xe40, emulation_assist) KVM_HANDLER(PACA_EXGEN, EXC_HV, 0xe40) - STD_RELON_EXCEPTION_HV_OOL(0xe60, hmi_exception) - KVM_HANDLER(PACA_EXGEN, EXC_HV, 0xe60) MASKABLE_RELON_EXCEPTION_HV_OOL(0xe80, h_doorbell) KVM_HANDLER(PACA_EXGEN, EXC_HV, 0xe80) From 77d8caacd2924ca7cbb989c30709503e93c7f026 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Tue, 25 Jun 2013 17:47:55 +1000 Subject: [PATCH 011/102] powerpc: Remove KVMTEST from RELON exception handlers commit c9f69518e5f08170bc857984a077f693d63171df upstream. KVMTEST is a macro which checks whether we are taking an exception from guest context, if so we branch out of line and eventually call into the KVM code to handle the switch. When running real guests on bare metal (HV KVM) the hardware ensures that we never take a relocation on exception when transitioning from guest to host. For PR KVM we disable relocation on exceptions ourself in kvmppc_core_init_vm(), as of commit a413f47 "Disable relocation on exceptions whenever PR KVM is active". So convert all the RELON macros to use NOTEST, and drop the remaining KVM_HANDLER() definitions we have for 0xe40 and 0xe80. Signed-off-by: Michael Ellerman Signed-off-by: Benjamin Herrenschmidt Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/include/asm/exception-64s.h | 8 ++++---- arch/powerpc/kernel/exceptions-64s.S | 2 -- 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/arch/powerpc/include/asm/exception-64s.h b/arch/powerpc/include/asm/exception-64s.h index 46793b58a761..07ca627e52c0 100644 --- a/arch/powerpc/include/asm/exception-64s.h +++ b/arch/powerpc/include/asm/exception-64s.h @@ -358,12 +358,12 @@ label##_relon_pSeries: \ /* No guest interrupts come through here */ \ SET_SCRATCH0(r13); /* save r13 */ \ EXCEPTION_RELON_PROLOG_PSERIES(PACA_EXGEN, label##_common, \ - EXC_STD, KVMTEST_PR, vec) + EXC_STD, NOTEST, vec) #define STD_RELON_EXCEPTION_PSERIES_OOL(vec, label) \ .globl label##_relon_pSeries; \ label##_relon_pSeries: \ - EXCEPTION_PROLOG_1(PACA_EXGEN, KVMTEST_PR, vec); \ + EXCEPTION_PROLOG_1(PACA_EXGEN, NOTEST, vec); \ EXCEPTION_RELON_PROLOG_PSERIES_1(label##_common, EXC_STD) #define STD_RELON_EXCEPTION_HV(loc, vec, label) \ @@ -374,12 +374,12 @@ label##_relon_hv: \ /* No guest interrupts come through here */ \ SET_SCRATCH0(r13); /* save r13 */ \ EXCEPTION_RELON_PROLOG_PSERIES(PACA_EXGEN, label##_common, \ - EXC_HV, KVMTEST, vec) + EXC_HV, NOTEST, vec) #define STD_RELON_EXCEPTION_HV_OOL(vec, label) \ .globl label##_relon_hv; \ label##_relon_hv: \ - EXCEPTION_PROLOG_1(PACA_EXGEN, KVMTEST, vec); \ + EXCEPTION_PROLOG_1(PACA_EXGEN, NOTEST, vec); \ EXCEPTION_RELON_PROLOG_PSERIES_1(label##_common, EXC_HV) /* This associate vector numbers with bits in paca->irq_happened */ diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index 0a9fdea2fc0f..6bd676391a6d 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -1175,9 +1175,7 @@ __end_handlers: /* Equivalents to the above handlers for relocation-on interrupt vectors */ STD_RELON_EXCEPTION_HV_OOL(0xe40, emulation_assist) - KVM_HANDLER(PACA_EXGEN, EXC_HV, 0xe40) MASKABLE_RELON_EXCEPTION_HV_OOL(0xe80, h_doorbell) - KVM_HANDLER(PACA_EXGEN, EXC_HV, 0xe80) STD_RELON_EXCEPTION_PSERIES_OOL(0xf00, performance_monitor) STD_RELON_EXCEPTION_PSERIES_OOL(0xf20, altivec_unavailable) From 8e0af91a12c135f6c22e472660f3400d46ed78ac Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Tue, 25 Jun 2013 17:47:56 +1000 Subject: [PATCH 012/102] powerpc: Rename and flesh out the facility unavailable exception handler commit 021424a1fce335e05807fd770eb8e1da30a63eea upstream. The exception at 0xf60 is not the TM (Transactional Memory) unavailable exception, it is the "Facility Unavailable Exception", rename it as such. Flesh out the handler to acknowledge the fact that it can be called for many reasons, one of which is TM being unavailable. Use STD_EXCEPTION_COMMON() for the exception body, for some reason we had it open-coded, I've checked the generated code is identical. Signed-off-by: Michael Ellerman Signed-off-by: Benjamin Herrenschmidt Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/kernel/exceptions-64s.S | 21 ++++++------------ arch/powerpc/kernel/traps.c | 33 +++++++++++++++++++++------- 2 files changed, 32 insertions(+), 22 deletions(-) diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index 6bd676391a6d..d55a63c3559a 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -341,10 +341,11 @@ vsx_unavailable_pSeries_1: EXCEPTION_PROLOG_0(PACA_EXGEN) b vsx_unavailable_pSeries +facility_unavailable_trampoline: . = 0xf60 SET_SCRATCH0(r13) EXCEPTION_PROLOG_0(PACA_EXGEN) - b tm_unavailable_pSeries + b facility_unavailable_pSeries #ifdef CONFIG_CBE_RAS STD_EXCEPTION_HV(0x1200, 0x1202, cbe_system_error) @@ -522,7 +523,7 @@ denorm_done: KVM_HANDLER_PR(PACA_EXGEN, EXC_STD, 0xf20) STD_EXCEPTION_PSERIES_OOL(0xf40, vsx_unavailable) KVM_HANDLER_PR(PACA_EXGEN, EXC_STD, 0xf40) - STD_EXCEPTION_PSERIES_OOL(0xf60, tm_unavailable) + STD_EXCEPTION_PSERIES_OOL(0xf60, facility_unavailable) KVM_HANDLER_PR(PACA_EXGEN, EXC_STD, 0xf60) /* @@ -829,11 +830,11 @@ vsx_unavailable_relon_pSeries_1: EXCEPTION_PROLOG_0(PACA_EXGEN) b vsx_unavailable_relon_pSeries -tm_unavailable_relon_pSeries_1: +facility_unavailable_relon_trampoline: . = 0x4f60 SET_SCRATCH0(r13) EXCEPTION_PROLOG_0(PACA_EXGEN) - b tm_unavailable_relon_pSeries + b facility_unavailable_relon_pSeries STD_RELON_EXCEPTION_PSERIES(0x5300, 0x1300, instruction_breakpoint) #ifdef CONFIG_PPC_DENORMALISATION @@ -1159,15 +1160,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_VSX) bl .vsx_unavailable_exception b .ret_from_except - .align 7 - .globl tm_unavailable_common -tm_unavailable_common: - EXCEPTION_PROLOG_COMMON(0xf60, PACA_EXGEN) - bl .save_nvgprs - DISABLE_INTS - addi r3,r1,STACK_FRAME_OVERHEAD - bl .tm_unavailable_exception - b .ret_from_except + STD_EXCEPTION_COMMON(0xf60, facility_unavailable, .facility_unavailable_exception) .align 7 .globl __end_handlers @@ -1180,7 +1173,7 @@ __end_handlers: STD_RELON_EXCEPTION_PSERIES_OOL(0xf00, performance_monitor) STD_RELON_EXCEPTION_PSERIES_OOL(0xf20, altivec_unavailable) STD_RELON_EXCEPTION_PSERIES_OOL(0xf40, vsx_unavailable) - STD_RELON_EXCEPTION_PSERIES_OOL(0xf60, tm_unavailable) + STD_RELON_EXCEPTION_PSERIES_OOL(0xf60, facility_unavailable) #if defined(CONFIG_PPC_PSERIES) || defined(CONFIG_PPC_POWERNV) /* diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c index c0e5caf8ccc7..2053bbd26a06 100644 --- a/arch/powerpc/kernel/traps.c +++ b/arch/powerpc/kernel/traps.c @@ -1282,25 +1282,42 @@ void vsx_unavailable_exception(struct pt_regs *regs) die("Unrecoverable VSX Unavailable Exception", regs, SIGABRT); } -void tm_unavailable_exception(struct pt_regs *regs) +void facility_unavailable_exception(struct pt_regs *regs) { + static char *facility_strings[] = { + "FPU", + "VMX/VSX", + "DSCR", + "PMU SPRs", + "BHRB", + "TM", + "AT", + "EBB", + "TAR", + }; + char *facility; + u64 value; + + value = mfspr(SPRN_FSCR) >> 56; + /* We restore the interrupt state now */ if (!arch_irq_disabled_regs(regs)) local_irq_enable(); - /* Currently we never expect a TMU exception. Catch - * this and kill the process! - */ - printk(KERN_EMERG "Unexpected TM unavailable exception at %lx " - "(msr %lx)\n", - regs->nip, regs->msr); + if (value < ARRAY_SIZE(facility_strings)) + facility = facility_strings[value]; + else + facility = "unknown"; + + pr_err("Facility '%s' unavailable, exception at 0x%lx, MSR=%lx\n", + facility, regs->nip, regs->msr); if (user_mode(regs)) { _exception(SIGILL, regs, ILL_ILLOPC, regs->nip); return; } - die("Unexpected TM unavailable exception", regs, SIGABRT); + die("Unexpected facility unavailable exception", regs, SIGABRT); } #ifdef CONFIG_PPC_TRANSACTIONAL_MEM From d24966cf890f3b22e379654ba70a38ad9a67d9db Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Tue, 25 Jun 2013 17:47:57 +1000 Subject: [PATCH 013/102] powerpc: Wire up the HV facility unavailable exception commit b14b6260efeee6eb8942c6e6420e31281892acb6 upstream. Similar to the facility unavailble exception, except the facilities are controlled by HFSCR. Adapt the facility_unavailable_exception() so it can be called for either the regular or Hypervisor facility unavailable exceptions. Signed-off-by: Michael Ellerman Signed-off-by: Benjamin Herrenschmidt Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/kernel/exceptions-64s.S | 15 +++++++++++++++ arch/powerpc/kernel/traps.c | 16 ++++++++++++---- 2 files changed, 27 insertions(+), 4 deletions(-) diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index d55a63c3559a..4e00d223b2e3 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -347,6 +347,12 @@ facility_unavailable_trampoline: EXCEPTION_PROLOG_0(PACA_EXGEN) b facility_unavailable_pSeries +hv_facility_unavailable_trampoline: + . = 0xf80 + SET_SCRATCH0(r13) + EXCEPTION_PROLOG_0(PACA_EXGEN) + b facility_unavailable_hv + #ifdef CONFIG_CBE_RAS STD_EXCEPTION_HV(0x1200, 0x1202, cbe_system_error) KVM_HANDLER_SKIP(PACA_EXGEN, EXC_HV, 0x1202) @@ -525,6 +531,8 @@ denorm_done: KVM_HANDLER_PR(PACA_EXGEN, EXC_STD, 0xf40) STD_EXCEPTION_PSERIES_OOL(0xf60, facility_unavailable) KVM_HANDLER_PR(PACA_EXGEN, EXC_STD, 0xf60) + STD_EXCEPTION_HV_OOL(0xf82, facility_unavailable) + KVM_HANDLER(PACA_EXGEN, EXC_HV, 0xf82) /* * An interrupt came in while soft-disabled. We set paca->irq_happened, then: @@ -836,6 +844,12 @@ facility_unavailable_relon_trampoline: EXCEPTION_PROLOG_0(PACA_EXGEN) b facility_unavailable_relon_pSeries +hv_facility_unavailable_relon_trampoline: + . = 0x4f80 + SET_SCRATCH0(r13) + EXCEPTION_PROLOG_0(PACA_EXGEN) + b facility_unavailable_relon_hv + STD_RELON_EXCEPTION_PSERIES(0x5300, 0x1300, instruction_breakpoint) #ifdef CONFIG_PPC_DENORMALISATION . = 0x5500 @@ -1174,6 +1188,7 @@ __end_handlers: STD_RELON_EXCEPTION_PSERIES_OOL(0xf20, altivec_unavailable) STD_RELON_EXCEPTION_PSERIES_OOL(0xf40, vsx_unavailable) STD_RELON_EXCEPTION_PSERIES_OOL(0xf60, facility_unavailable) + STD_RELON_EXCEPTION_HV_OOL(0xf80, facility_unavailable) #if defined(CONFIG_PPC_PSERIES) || defined(CONFIG_PPC_POWERNV) /* diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c index 2053bbd26a06..e4f205a209d2 100644 --- a/arch/powerpc/kernel/traps.c +++ b/arch/powerpc/kernel/traps.c @@ -1295,10 +1295,18 @@ void facility_unavailable_exception(struct pt_regs *regs) "EBB", "TAR", }; - char *facility; + char *facility, *prefix; u64 value; - value = mfspr(SPRN_FSCR) >> 56; + if (regs->trap == 0xf60) { + value = mfspr(SPRN_FSCR); + prefix = ""; + } else { + value = mfspr(SPRN_HFSCR); + prefix = "Hypervisor "; + } + + value = value >> 56; /* We restore the interrupt state now */ if (!arch_irq_disabled_regs(regs)) @@ -1309,8 +1317,8 @@ void facility_unavailable_exception(struct pt_regs *regs) else facility = "unknown"; - pr_err("Facility '%s' unavailable, exception at 0x%lx, MSR=%lx\n", - facility, regs->nip, regs->msr); + pr_err("%sFacility '%s' unavailable, exception at 0x%lx, MSR=%lx\n", + prefix, facility, regs->nip, regs->msr); if (user_mode(regs)) { _exception(SIGILL, regs, ILL_ILLOPC, regs->nip); From 0288917da5e378ea1fc0290f824d85ac9e07570d Mon Sep 17 00:00:00 2001 From: Chen Gang Date: Wed, 20 Mar 2013 14:30:12 +0800 Subject: [PATCH 014/102] powerpc/smp: Section mismatch from smp_release_cpus to __initdata spinning_secondaries commit 8246aca7058f3f2c2ae503081777965cd8df7b90 upstream. the smp_release_cpus is a normal funciton and called in normal environments, but it calls the __initdata spinning_secondaries. need modify spinning_secondaries to match smp_release_cpus. the related warning: (the linker report boot_paca.33377, but it should be spinning_secondaries) ----------------------------------------------------------------------------- WARNING: arch/powerpc/kernel/built-in.o(.text+0x23176): Section mismatch in reference from the function .smp_release_cpus() to the variable .init.data:boot_paca.33377 The function .smp_release_cpus() references the variable __initdata boot_paca.33377. This is often because .smp_release_cpus lacks a __initdata annotation or the annotation of boot_paca.33377 is wrong. WARNING: arch/powerpc/kernel/built-in.o(.text+0x231fe): Section mismatch in reference from the function .smp_release_cpus() to the variable .init.data:boot_paca.33377 The function .smp_release_cpus() references the variable __initdata boot_paca.33377. This is often because .smp_release_cpus lacks a __initdata annotation or the annotation of boot_paca.33377 is wrong. ----------------------------------------------------------------------------- Signed-off-by: Chen Gang Signed-off-by: Benjamin Herrenschmidt Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/kernel/setup_64.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index e379d3fd1694..389fb8077cc9 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -76,7 +76,7 @@ #endif int boot_cpuid = 0; -int __initdata spinning_secondaries; +int spinning_secondaries; u64 ppc64_pft_size; /* Pick defaults since we might want to patch instructions From 910a165889c9e0c79cb96883db3948c5eea204e2 Mon Sep 17 00:00:00 2001 From: Nathan Fontenot Date: Mon, 24 Jun 2013 22:08:05 -0500 Subject: [PATCH 015/102] powerpc/numa: Do not update sysfs cpu registration from invalid context commit dd023217e17e72b46fb4d49c7734c426938c3dba upstream. The topology update code that updates the cpu node registration in sysfs should not be called while in stop_machine(). The register/unregister calls take a lock and may sleep. This patch moves these calls outside of the call to stop_machine(). Signed-off-by: Nathan Fontenot Signed-off-by: Benjamin Herrenschmidt Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/mm/numa.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c index 88c0425dc0a8..2859a1f52279 100644 --- a/arch/powerpc/mm/numa.c +++ b/arch/powerpc/mm/numa.c @@ -1433,11 +1433,9 @@ static int update_cpu_topology(void *data) if (cpu != update->cpu) continue; - unregister_cpu_under_node(update->cpu, update->old_nid); unmap_cpu_from_node(update->cpu); map_cpu_to_node(update->cpu, update->new_nid); vdso_getcpu_init(); - register_cpu_under_node(update->cpu, update->new_nid); } return 0; @@ -1485,6 +1483,9 @@ int arch_update_cpu_topology(void) stop_machine(update_cpu_topology, &updates[0], &updated_cpus); for (ud = &updates[0]; ud; ud = ud->next) { + unregister_cpu_under_node(ud->cpu, ud->old_nid); + register_cpu_under_node(ud->cpu, ud->new_nid); + dev = get_cpu_device(ud->cpu); if (dev) kobject_uevent(&dev->kobj, KOBJ_CHANGE); From a9514fe520175d13cba23cf3c9dbba4df9691c86 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Fri, 28 Jun 2013 18:15:10 +1000 Subject: [PATCH 016/102] powerpc/perf: Check that events only include valid bits on Power8 commit d8bec4c9cd58f6d3679e09b7293851fb92ad7557 upstream. A mistake we have made in the past is that we pull out the fields we need from the event code, but don't check that there are no unknown bits set. This means that we can't ever assign meaning to those unknown bits in future. Although we have once again failed to do this at release, it is still early days for Power8 so I think we can still slip this in and get away with it. Signed-off-by: Michael Ellerman Signed-off-by: Benjamin Herrenschmidt Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/perf/power8-pmu.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/arch/powerpc/perf/power8-pmu.c b/arch/powerpc/perf/power8-pmu.c index f7d1c4fff303..84cdc6d892e3 100644 --- a/arch/powerpc/perf/power8-pmu.c +++ b/arch/powerpc/perf/power8-pmu.c @@ -109,6 +109,16 @@ #define EVENT_IS_MARKED (EVENT_MARKED_MASK << EVENT_MARKED_SHIFT) #define EVENT_PSEL_MASK 0xff /* PMCxSEL value */ +#define EVENT_VALID_MASK \ + ((EVENT_THRESH_MASK << EVENT_THRESH_SHIFT) | \ + (EVENT_SAMPLE_MASK << EVENT_SAMPLE_SHIFT) | \ + (EVENT_CACHE_SEL_MASK << EVENT_CACHE_SEL_SHIFT) | \ + (EVENT_PMC_MASK << EVENT_PMC_SHIFT) | \ + (EVENT_UNIT_MASK << EVENT_UNIT_SHIFT) | \ + (EVENT_COMBINE_MASK << EVENT_COMBINE_SHIFT) | \ + (EVENT_MARKED_MASK << EVENT_MARKED_SHIFT) | \ + EVENT_PSEL_MASK) + /* MMCRA IFM bits - POWER8 */ #define POWER8_MMCRA_IFM1 0x0000000040000000UL #define POWER8_MMCRA_IFM2 0x0000000080000000UL @@ -212,6 +222,9 @@ static int power8_get_constraint(u64 event, unsigned long *maskp, unsigned long mask = value = 0; + if (event & ~EVENT_VALID_MASK) + return -1; + pmc = (event >> EVENT_PMC_SHIFT) & EVENT_PMC_MASK; unit = (event >> EVENT_UNIT_SHIFT) & EVENT_UNIT_MASK; cache = (event >> EVENT_CACHE_SEL_SHIFT) & EVENT_CACHE_SEL_MASK; From 8cf3478f19143d4e2ece4947603bff7dbd360a36 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Fri, 28 Jun 2013 18:15:11 +1000 Subject: [PATCH 017/102] powerpc/perf: Rework disable logic in pmu_disable() commit 378a6ee99e4a431ec84e4e61893445c041c93007 upstream. In pmu_disable() we disable the PMU by setting the FC (Freeze Counters) bit in MMCR0. In order to do this we have to read/modify/write MMCR0. It's possible that we read a value from MMCR0 which has PMAO (PMU Alert Occurred) set. When we write that value back it will cause an interrupt to occur. We will then end up in the PMU interrupt handler even though we are supposed to have just disabled the PMU. We can avoid this by making sure we never write PMAO back. We should not lose interrupts because when the PMU is re-enabled the overflowed values will cause another interrupt. We also reorder the clearing of SAMPLE_ENABLE so that is done after the PMU is frozen. Otherwise there is a small window between the clearing of SAMPLE_ENABLE and the setting of FC where we could take an interrupt and incorrectly see SAMPLE_ENABLE not set. This would for example change the logic in perf_read_regs(). Signed-off-by: Michael Ellerman Signed-off-by: Benjamin Herrenschmidt Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/perf/core-book3s.c | 31 +++++++++++++++++++------------ 1 file changed, 19 insertions(+), 12 deletions(-) diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c index 29c6482890c8..1ab306815ff3 100644 --- a/arch/powerpc/perf/core-book3s.c +++ b/arch/powerpc/perf/core-book3s.c @@ -75,6 +75,7 @@ static unsigned int freeze_events_kernel = MMCR0_FCS; #define MMCR0_FCHV 0 #define MMCR0_PMCjCE MMCR0_PMCnCE +#define MMCR0_PMAO 0 #define SPRN_MMCRA SPRN_MMCR2 #define MMCRA_SAMPLE_ENABLE 0 @@ -852,7 +853,7 @@ static void write_mmcr0(struct cpu_hw_events *cpuhw, unsigned long mmcr0) static void power_pmu_disable(struct pmu *pmu) { struct cpu_hw_events *cpuhw; - unsigned long flags; + unsigned long flags, val; if (!ppmu) return; @@ -860,9 +861,6 @@ static void power_pmu_disable(struct pmu *pmu) cpuhw = &__get_cpu_var(cpu_hw_events); if (!cpuhw->disabled) { - cpuhw->disabled = 1; - cpuhw->n_added = 0; - /* * Check if we ever enabled the PMU on this cpu. */ @@ -871,6 +869,21 @@ static void power_pmu_disable(struct pmu *pmu) cpuhw->pmcs_enabled = 1; } + /* + * Set the 'freeze counters' bit, clear PMAO. + */ + val = mfspr(SPRN_MMCR0); + val |= MMCR0_FC; + val &= ~MMCR0_PMAO; + + /* + * The barrier is to make sure the mtspr has been + * executed and the PMU has frozen the events etc. + * before we return. + */ + write_mmcr0(cpuhw, val); + mb(); + /* * Disable instruction sampling if it was enabled */ @@ -880,14 +893,8 @@ static void power_pmu_disable(struct pmu *pmu) mb(); } - /* - * Set the 'freeze counters' bit. - * The barrier is to make sure the mtspr has been - * executed and the PMU has frozen the events - * before we return. - */ - write_mmcr0(cpuhw, mfspr(SPRN_MMCR0) | MMCR0_FC); - mb(); + cpuhw->disabled = 1; + cpuhw->n_added = 0; } local_irq_restore(flags); } From 8f6c5b6c1264c6cec9b04848d0744aac0853d641 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Fri, 28 Jun 2013 18:15:12 +1000 Subject: [PATCH 018/102] powerpc/perf: Freeze PMC5/6 if we're not using them commit 7a7a41f9d5b28ac3a916b057a7d3cd3f435ee9a6 upstream. On Power8 we can freeze PMC5 and 6 if we're not using them. Normally they run all the time. As noticed by Anshuman, we should unfreeze them when we disable the PMU as there are legacy tools which expect them to run all the time. Signed-off-by: Michael Ellerman Signed-off-by: Benjamin Herrenschmidt Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/include/asm/reg.h | 1 + arch/powerpc/perf/core-book3s.c | 5 +++-- arch/powerpc/perf/power8-pmu.c | 4 ++++ 3 files changed, 8 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h index 4a9e408644fe..362142b69d5b 100644 --- a/arch/powerpc/include/asm/reg.h +++ b/arch/powerpc/include/asm/reg.h @@ -626,6 +626,7 @@ #define MMCR0_TRIGGER 0x00002000UL /* TRIGGER enable */ #define MMCR0_PMAO 0x00000080UL /* performance monitor alert has occurred, set to 0 after handling exception */ #define MMCR0_SHRFC 0x00000040UL /* SHRre freeze conditions between threads */ +#define MMCR0_FC56 0x00000010UL /* freeze counters 5 and 6 */ #define MMCR0_FCTI 0x00000008UL /* freeze counters in tags inactive mode */ #define MMCR0_FCTA 0x00000004UL /* freeze counters in tags active mode */ #define MMCR0_FCWAIT 0x00000002UL /* freeze counter in WAIT state */ diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c index 1ab306815ff3..3d566ee896e2 100644 --- a/arch/powerpc/perf/core-book3s.c +++ b/arch/powerpc/perf/core-book3s.c @@ -75,6 +75,7 @@ static unsigned int freeze_events_kernel = MMCR0_FCS; #define MMCR0_FCHV 0 #define MMCR0_PMCjCE MMCR0_PMCnCE +#define MMCR0_FC56 0 #define MMCR0_PMAO 0 #define SPRN_MMCRA SPRN_MMCR2 @@ -870,11 +871,11 @@ static void power_pmu_disable(struct pmu *pmu) } /* - * Set the 'freeze counters' bit, clear PMAO. + * Set the 'freeze counters' bit, clear PMAO/FC56. */ val = mfspr(SPRN_MMCR0); val |= MMCR0_FC; - val &= ~MMCR0_PMAO; + val &= ~(MMCR0_PMAO | MMCR0_FC56); /* * The barrier is to make sure the mtspr has been diff --git a/arch/powerpc/perf/power8-pmu.c b/arch/powerpc/perf/power8-pmu.c index 84cdc6d892e3..d59f5b2d4c2f 100644 --- a/arch/powerpc/perf/power8-pmu.c +++ b/arch/powerpc/perf/power8-pmu.c @@ -391,6 +391,10 @@ static int power8_compute_mmcr(u64 event[], int n_ev, if (pmc_inuse & 0x7c) mmcr[0] |= MMCR0_PMCjCE; + /* If we're not using PMC 5 or 6, freeze them */ + if (!(pmc_inuse & 0x60)) + mmcr[0] |= MMCR0_FC56; + mmcr[1] = mmcr1; mmcr[2] = mmcra; From b26eb91187fdfa37b304b30003f799899e6373c9 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Fri, 28 Jun 2013 18:15:13 +1000 Subject: [PATCH 019/102] powerpc/perf: Use existing out label in power_pmu_enable() commit 0a48843d6c5114cfa4a9540ee4d6af87628cec01 upstream. In power_pmu_enable() we can use the existing out label to reduce the number of return paths. Signed-off-by: Michael Ellerman Signed-off-by: Benjamin Herrenschmidt Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/perf/core-book3s.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c index 3d566ee896e2..af4b4b1a691f 100644 --- a/arch/powerpc/perf/core-book3s.c +++ b/arch/powerpc/perf/core-book3s.c @@ -919,12 +919,13 @@ static void power_pmu_enable(struct pmu *pmu) if (!ppmu) return; + local_irq_save(flags); + cpuhw = &__get_cpu_var(cpu_hw_events); - if (!cpuhw->disabled) { - local_irq_restore(flags); - return; - } + if (!cpuhw->disabled) + goto out; + cpuhw->disabled = 0; /* From 382b9efb7be2d07e51f0a491ecd0a985e1ceb86e Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Fri, 28 Jun 2013 18:15:14 +1000 Subject: [PATCH 020/102] powerpc/perf: Don't enable if we have zero events commit 4ea355b5368bde0574c12430df53334c4be3bdcf upstream. In power_pmu_enable() we still enable the PMU even if we have zero events. This should have no effect but doesn't make much sense. Instead just return after telling the hypervisor that we are not using the PMCs. Signed-off-by: Michael Ellerman Signed-off-by: Benjamin Herrenschmidt Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/perf/core-book3s.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c index af4b4b1a691f..d3ee2e50a3a6 100644 --- a/arch/powerpc/perf/core-book3s.c +++ b/arch/powerpc/perf/core-book3s.c @@ -926,6 +926,11 @@ static void power_pmu_enable(struct pmu *pmu) if (!cpuhw->disabled) goto out; + if (cpuhw->n_events == 0) { + ppc_set_pmu_inuse(0); + goto out; + } + cpuhw->disabled = 0; /* @@ -937,8 +942,6 @@ static void power_pmu_enable(struct pmu *pmu) if (!cpuhw->n_added) { mtspr(SPRN_MMCRA, cpuhw->mmcr[2] & ~MMCRA_SAMPLE_ENABLE); mtspr(SPRN_MMCR1, cpuhw->mmcr[1]); - if (cpuhw->n_events == 0) - ppc_set_pmu_inuse(0); goto out_enable; } From 9d3ce4af3be0235d4cf41ea9fd774205a32e58a2 Mon Sep 17 00:00:00 2001 From: "Srivatsa S. Bhat" Date: Fri, 12 Jul 2013 03:45:37 +0530 Subject: [PATCH 021/102] cpufreq: Revert commit a66b2e to fix suspend/resume regression commit aae760ed21cd690fe8a6db9f3a177ad55d7e12ab upstream. commit a66b2e (cpufreq: Preserve sysfs files across suspend/resume) has unfortunately caused several things in the cpufreq subsystem to break subtly after a suspend/resume cycle. The intention of that patch was to retain the file permissions of the cpufreq related sysfs files across suspend/resume. To achieve that, the commit completely removed the calls to cpufreq_add_dev() and __cpufreq_remove_dev() during suspend/resume transitions. But the problem is that those functions do 2 kinds of things: 1. Low-level initialization/tear-down that are critical to the correct functioning of cpufreq-core. 2. Kobject and sysfs related initialization/teardown. Ideally we should have reorganized the code to cleanly separate these two responsibilities, and skipped only the sysfs related parts during suspend/resume. Since we skipped the entire callbacks instead (which also included some CPU and cpufreq-specific critical components), cpufreq subsystem started behaving erratically after suspend/resume. So revert the commit to fix the regression. We'll revisit and address the original goal of that commit separately, since it involves quite a bit of careful code reorganization and appears to be non-trivial. (While reverting the commit, note that another commit f51e1eb (cpufreq: Fix cpufreq regression after suspend/resume) already reverted part of the original set of changes. So revert only the remaining ones). Signed-off-by: Srivatsa S. Bhat Acked-by: Viresh Kumar Tested-by: Paul Bolle Signed-off-by: Rafael J. Wysocki Signed-off-by: Greg Kroah-Hartman --- drivers/cpufreq/cpufreq.c | 4 +++- drivers/cpufreq/cpufreq_stats.c | 6 ++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index 2d53f47d1747..178fe7a69056 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -1837,13 +1837,15 @@ static int __cpuinit cpufreq_cpu_callback(struct notifier_block *nfb, if (dev) { switch (action) { case CPU_ONLINE: + case CPU_ONLINE_FROZEN: cpufreq_add_dev(dev, NULL); break; case CPU_DOWN_PREPARE: - case CPU_UP_CANCELED_FROZEN: + case CPU_DOWN_PREPARE_FROZEN: __cpufreq_remove_dev(dev, NULL); break; case CPU_DOWN_FAILED: + case CPU_DOWN_FAILED_FROZEN: cpufreq_add_dev(dev, NULL); break; } diff --git a/drivers/cpufreq/cpufreq_stats.c b/drivers/cpufreq/cpufreq_stats.c index 591b6fb641b2..bfd6273fd873 100644 --- a/drivers/cpufreq/cpufreq_stats.c +++ b/drivers/cpufreq/cpufreq_stats.c @@ -353,13 +353,11 @@ static int __cpuinit cpufreq_stat_cpu_callback(struct notifier_block *nfb, cpufreq_update_policy(cpu); break; case CPU_DOWN_PREPARE: + case CPU_DOWN_PREPARE_FROZEN: cpufreq_stats_free_sysfs(cpu); break; case CPU_DEAD: - cpufreq_stats_free_table(cpu); - break; - case CPU_UP_CANCELED_FROZEN: - cpufreq_stats_free_sysfs(cpu); + case CPU_DEAD_FROZEN: cpufreq_stats_free_table(cpu); break; } From 916f4dbc2a827677212a3bf3ffcc22745fa6e0b1 Mon Sep 17 00:00:00 2001 From: "Srivatsa S. Bhat" Date: Tue, 16 Jul 2013 22:46:48 +0200 Subject: [PATCH 022/102] cpufreq: Revert commit 2f7021a8 to fix CPU hotplug regression commit e8d05276f236ee6435e78411f62be9714e0b9377 upstream. commit 2f7021a8 "cpufreq: protect 'policy->cpus' from offlining during __gov_queue_work()" caused a regression in CPU hotplug, because it lead to a deadlock between cpufreq governor worker thread and the CPU hotplug writer task. Lockdep splat corresponding to this deadlock is shown below: [ 60.277396] ====================================================== [ 60.277400] [ INFO: possible circular locking dependency detected ] [ 60.277407] 3.10.0-rc7-dbg-01385-g241fd04-dirty #1744 Not tainted [ 60.277411] ------------------------------------------------------- [ 60.277417] bash/2225 is trying to acquire lock: [ 60.277422] ((&(&j_cdbs->work)->work)){+.+...}, at: [] flush_work+0x5/0x280 [ 60.277444] but task is already holding lock: [ 60.277449] (cpu_hotplug.lock){+.+.+.}, at: [] cpu_hotplug_begin+0x2b/0x60 [ 60.277465] which lock already depends on the new lock. [ 60.277472] the existing dependency chain (in reverse order) is: [ 60.277477] -> #2 (cpu_hotplug.lock){+.+.+.}: [ 60.277490] [] lock_acquire+0xa4/0x200 [ 60.277503] [] mutex_lock_nested+0x67/0x410 [ 60.277514] [] get_online_cpus+0x3c/0x60 [ 60.277522] [] gov_queue_work+0x2a/0xb0 [ 60.277532] [] cs_dbs_timer+0xc1/0xe0 [ 60.277543] [] process_one_work+0x1cd/0x6a0 [ 60.277552] [] worker_thread+0x121/0x3a0 [ 60.277560] [] kthread+0xdb/0xe0 [ 60.277569] [] ret_from_fork+0x7c/0xb0 [ 60.277580] -> #1 (&j_cdbs->timer_mutex){+.+...}: [ 60.277592] [] lock_acquire+0xa4/0x200 [ 60.277600] [] mutex_lock_nested+0x67/0x410 [ 60.277608] [] cs_dbs_timer+0x8d/0xe0 [ 60.277616] [] process_one_work+0x1cd/0x6a0 [ 60.277624] [] worker_thread+0x121/0x3a0 [ 60.277633] [] kthread+0xdb/0xe0 [ 60.277640] [] ret_from_fork+0x7c/0xb0 [ 60.277649] -> #0 ((&(&j_cdbs->work)->work)){+.+...}: [ 60.277661] [] __lock_acquire+0x1766/0x1d30 [ 60.277669] [] lock_acquire+0xa4/0x200 [ 60.277677] [] flush_work+0x3d/0x280 [ 60.277685] [] __cancel_work_timer+0x8a/0x120 [ 60.277693] [] cancel_delayed_work_sync+0x13/0x20 [ 60.277701] [] cpufreq_governor_dbs+0x529/0x6f0 [ 60.277709] [] cs_cpufreq_governor_dbs+0x17/0x20 [ 60.277719] [] __cpufreq_governor+0x48/0x100 [ 60.277728] [] __cpufreq_remove_dev.isra.14+0x80/0x3c0 [ 60.277737] [] cpufreq_cpu_callback+0x38/0x4c [ 60.277747] [] notifier_call_chain+0x5d/0x110 [ 60.277759] [] __raw_notifier_call_chain+0xe/0x10 [ 60.277768] [] _cpu_down+0x88/0x330 [ 60.277779] [] cpu_down+0x36/0x50 [ 60.277788] [] store_online+0x98/0xd0 [ 60.277796] [] dev_attr_store+0x18/0x30 [ 60.277806] [] sysfs_write_file+0xdb/0x150 [ 60.277818] [] vfs_write+0xbd/0x1f0 [ 60.277826] [] SyS_write+0x4c/0xa0 [ 60.277834] [] tracesys+0xd0/0xd5 [ 60.277842] other info that might help us debug this: [ 60.277848] Chain exists of: (&(&j_cdbs->work)->work) --> &j_cdbs->timer_mutex --> cpu_hotplug.lock [ 60.277864] Possible unsafe locking scenario: [ 60.277869] CPU0 CPU1 [ 60.277873] ---- ---- [ 60.277877] lock(cpu_hotplug.lock); [ 60.277885] lock(&j_cdbs->timer_mutex); [ 60.277892] lock(cpu_hotplug.lock); [ 60.277900] lock((&(&j_cdbs->work)->work)); [ 60.277907] *** DEADLOCK *** [ 60.277915] 6 locks held by bash/2225: [ 60.277919] #0: (sb_writers#6){.+.+.+}, at: [] vfs_write+0x1c3/0x1f0 [ 60.277937] #1: (&buffer->mutex){+.+.+.}, at: [] sysfs_write_file+0x3c/0x150 [ 60.277954] #2: (s_active#61){.+.+.+}, at: [] sysfs_write_file+0xc3/0x150 [ 60.277972] #3: (x86_cpu_hotplug_driver_mutex){+.+...}, at: [] cpu_hotplug_driver_lock+0x17/0x20 [ 60.277990] #4: (cpu_add_remove_lock){+.+.+.}, at: [] cpu_down+0x22/0x50 [ 60.278007] #5: (cpu_hotplug.lock){+.+.+.}, at: [] cpu_hotplug_begin+0x2b/0x60 [ 60.278023] stack backtrace: [ 60.278031] CPU: 3 PID: 2225 Comm: bash Not tainted 3.10.0-rc7-dbg-01385-g241fd04-dirty #1744 [ 60.278037] Hardware name: Acer Aspire 5741G /Aspire 5741G , BIOS V1.20 02/08/2011 [ 60.278042] ffffffff8204e110 ffff88014df6b9f8 ffffffff815b3d90 ffff88014df6ba38 [ 60.278055] ffffffff815b0a8d ffff880150ed3f60 ffff880150ed4770 3871c4002c8980b2 [ 60.278068] ffff880150ed4748 ffff880150ed4770 ffff880150ed3f60 ffff88014df6bb00 [ 60.278081] Call Trace: [ 60.278091] [] dump_stack+0x19/0x1b [ 60.278101] [] print_circular_bug+0x2b6/0x2c5 [ 60.278111] [] __lock_acquire+0x1766/0x1d30 [ 60.278123] [] ? __kernel_text_address+0x58/0x80 [ 60.278134] [] lock_acquire+0xa4/0x200 [ 60.278142] [] ? flush_work+0x5/0x280 [ 60.278151] [] flush_work+0x3d/0x280 [ 60.278159] [] ? flush_work+0x5/0x280 [ 60.278169] [] ? mark_held_locks+0x94/0x140 [ 60.278178] [] ? __cancel_work_timer+0x77/0x120 [ 60.278188] [] ? trace_hardirqs_on_caller+0xfd/0x1c0 [ 60.278196] [] __cancel_work_timer+0x8a/0x120 [ 60.278206] [] cancel_delayed_work_sync+0x13/0x20 [ 60.278214] [] cpufreq_governor_dbs+0x529/0x6f0 [ 60.278225] [] cs_cpufreq_governor_dbs+0x17/0x20 [ 60.278234] [] __cpufreq_governor+0x48/0x100 [ 60.278244] [] __cpufreq_remove_dev.isra.14+0x80/0x3c0 [ 60.278255] [] cpufreq_cpu_callback+0x38/0x4c [ 60.278265] [] notifier_call_chain+0x5d/0x110 [ 60.278275] [] __raw_notifier_call_chain+0xe/0x10 [ 60.278284] [] _cpu_down+0x88/0x330 [ 60.278292] [] ? cpu_hotplug_driver_lock+0x17/0x20 [ 60.278302] [] cpu_down+0x36/0x50 [ 60.278311] [] store_online+0x98/0xd0 [ 60.278320] [] dev_attr_store+0x18/0x30 [ 60.278329] [] sysfs_write_file+0xdb/0x150 [ 60.278337] [] vfs_write+0xbd/0x1f0 [ 60.278347] [] ? fget_light+0x320/0x4b0 [ 60.278355] [] SyS_write+0x4c/0xa0 [ 60.278364] [] tracesys+0xd0/0xd5 [ 60.280582] smpboot: CPU 1 is now offline The intention of that commit was to avoid warnings during CPU hotplug, which indicated that offline CPUs were getting IPIs from the cpufreq governor's work items. But the real root-cause of that problem was commit a66b2e5 (cpufreq: Preserve sysfs files across suspend/resume) because it totally skipped all the cpufreq callbacks during CPU hotplug in the suspend/resume path, and hence it never actually shut down the cpufreq governor's worker threads during CPU offline in the suspend/resume path. Reflecting back, the reason why we never suspected that commit as the root-cause earlier, was that the original issue was reported with just the halt command and nobody had brought in suspend/resume to the equation. The reason for _that_ in turn, as it turns out, is that earlier halt/shutdown was being done by disabling non-boot CPUs while tasks were frozen, just like suspend/resume.... but commit cf7df378a (reboot: migrate shutdown/reboot to boot cpu) which came somewhere along that very same time changed that logic: shutdown/halt no longer takes CPUs offline. Thus, the test-cases for reproducing the bug were vastly different and thus we went totally off the trail. Overall, it was one hell of a confusion with so many commits affecting each other and also affecting the symptoms of the problems in subtle ways. Finally, now since the original problematic commit (a66b2e5) has been completely reverted, revert this intermediate fix too (2f7021a8), to fix the CPU hotplug deadlock. Phew! Reported-by: Sergey Senozhatsky Reported-by: Bartlomiej Zolnierkiewicz Signed-off-by: Srivatsa S. Bhat Tested-by: Peter Wu Signed-off-by: Rafael J. Wysocki Signed-off-by: Greg Kroah-Hartman --- drivers/cpufreq/cpufreq_governor.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/cpufreq/cpufreq_governor.c b/drivers/cpufreq/cpufreq_governor.c index dc9b72e25c1a..5af40ad82d23 100644 --- a/drivers/cpufreq/cpufreq_governor.c +++ b/drivers/cpufreq/cpufreq_governor.c @@ -26,7 +26,6 @@ #include #include #include -#include #include "cpufreq_governor.h" @@ -181,10 +180,8 @@ void gov_queue_work(struct dbs_data *dbs_data, struct cpufreq_policy *policy, if (!all_cpus) { __gov_queue_work(smp_processor_id(), dbs_data, delay); } else { - get_online_cpus(); for_each_cpu(i, policy->cpus) __gov_queue_work(i, dbs_data, delay); - put_online_cpus(); } } EXPORT_SYMBOL_GPL(gov_queue_work); From 88c0a794e5d9bcc29926e636cd1d6eb5c9dcb235 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Fri, 19 Jul 2013 15:37:12 +0100 Subject: [PATCH 023/102] arm64: mm: don't treat user cache maintenance faults as writes commit db6f41063cbdb58b14846e600e6bc3f4e4c2e888 upstream. On arm64, cache maintenance faults appear as data aborts with the CM bit set in the ESR. The WnR bit, usually used to distinguish between faulting loads and stores, always reads as 1 and (slightly confusingly) the instructions are treated as reads by the architecture. This patch fixes our fault handling code to treat cache maintenance faults in the same way as loads. Signed-off-by: Will Deacon Signed-off-by: Catalin Marinas Signed-off-by: Greg Kroah-Hartman --- arch/arm64/mm/fault.c | 46 +++++++++++++++++++------------------------ 1 file changed, 20 insertions(+), 26 deletions(-) diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c index 1426468b77f3..f51d669c8ebd 100644 --- a/arch/arm64/mm/fault.c +++ b/arch/arm64/mm/fault.c @@ -152,25 +152,8 @@ void do_bad_area(unsigned long addr, unsigned int esr, struct pt_regs *regs) #define ESR_CM (1 << 8) #define ESR_LNX_EXEC (1 << 24) -/* - * Check that the permissions on the VMA allow for the fault which occurred. - * If we encountered a write fault, we must have write permission, otherwise - * we allow any permission. - */ -static inline bool access_error(unsigned int esr, struct vm_area_struct *vma) -{ - unsigned int mask = VM_READ | VM_WRITE | VM_EXEC; - - if (esr & ESR_WRITE) - mask = VM_WRITE; - if (esr & ESR_LNX_EXEC) - mask = VM_EXEC; - - return vma->vm_flags & mask ? false : true; -} - static int __do_page_fault(struct mm_struct *mm, unsigned long addr, - unsigned int esr, unsigned int flags, + unsigned int mm_flags, unsigned long vm_flags, struct task_struct *tsk) { struct vm_area_struct *vma; @@ -188,12 +171,17 @@ static int __do_page_fault(struct mm_struct *mm, unsigned long addr, * it. */ good_area: - if (access_error(esr, vma)) { + /* + * Check that the permissions on the VMA allow for the fault which + * occurred. If we encountered a write or exec fault, we must have + * appropriate permissions, otherwise we allow any permission. + */ + if (!(vma->vm_flags & vm_flags)) { fault = VM_FAULT_BADACCESS; goto out; } - return handle_mm_fault(mm, vma, addr & PAGE_MASK, flags); + return handle_mm_fault(mm, vma, addr & PAGE_MASK, mm_flags); check_stack: if (vma->vm_flags & VM_GROWSDOWN && !expand_stack(vma, addr)) @@ -208,9 +196,15 @@ static int __kprobes do_page_fault(unsigned long addr, unsigned int esr, struct task_struct *tsk; struct mm_struct *mm; int fault, sig, code; - bool write = (esr & ESR_WRITE) && !(esr & ESR_CM); - unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE | - (write ? FAULT_FLAG_WRITE : 0); + unsigned long vm_flags = VM_READ | VM_WRITE | VM_EXEC; + unsigned int mm_flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE; + + if (esr & ESR_LNX_EXEC) { + vm_flags = VM_EXEC; + } else if ((esr & ESR_WRITE) && !(esr & ESR_CM)) { + vm_flags = VM_WRITE; + mm_flags |= FAULT_FLAG_WRITE; + } tsk = current; mm = tsk->mm; @@ -248,7 +242,7 @@ static int __kprobes do_page_fault(unsigned long addr, unsigned int esr, #endif } - fault = __do_page_fault(mm, addr, esr, flags, tsk); + fault = __do_page_fault(mm, addr, mm_flags, vm_flags, tsk); /* * If we need to retry but a fatal signal is pending, handle the @@ -265,7 +259,7 @@ static int __kprobes do_page_fault(unsigned long addr, unsigned int esr, */ perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, addr); - if (flags & FAULT_FLAG_ALLOW_RETRY) { + if (mm_flags & FAULT_FLAG_ALLOW_RETRY) { if (fault & VM_FAULT_MAJOR) { tsk->maj_flt++; perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, regs, @@ -280,7 +274,7 @@ static int __kprobes do_page_fault(unsigned long addr, unsigned int esr, * Clear FAULT_FLAG_ALLOW_RETRY to avoid any risk of * starvation. */ - flags &= ~FAULT_FLAG_ALLOW_RETRY; + mm_flags &= ~FAULT_FLAG_ALLOW_RETRY; goto retry; } } From 8b68eefae0dbd8cdddb54af08c2b424f6da6f6ba Mon Sep 17 00:00:00 2001 From: Alexandre Belloni Date: Mon, 1 Jul 2013 15:20:00 +0100 Subject: [PATCH 024/102] iio: Fix iio_channel_has_info commit 1c297a66654a3295ae87e2b7f3724d214eb2b5ec upstream. Since the info_mask split, iio_channel_has_info() is not working correctly. info_mask_separate and info_mask_shared_by_type, it is not possible to compare them directly with the iio_chan_info_enum enum. Correct that bit using the BIT() macro. Signed-off-by: Alexandre Belloni Signed-off-by: Jonathan Cameron Signed-off-by: Greg Kroah-Hartman --- include/linux/iio/iio.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/linux/iio/iio.h b/include/linux/iio/iio.h index 8d171f427632..3d35b7023591 100644 --- a/include/linux/iio/iio.h +++ b/include/linux/iio/iio.h @@ -211,8 +211,8 @@ struct iio_chan_spec { static inline bool iio_channel_has_info(const struct iio_chan_spec *chan, enum iio_chan_info_enum type) { - return (chan->info_mask_separate & type) | - (chan->info_mask_shared_by_type & type); + return (chan->info_mask_separate & BIT(type)) | + (chan->info_mask_shared_by_type & BIT(type)); } #define IIO_ST(si, rb, sb, sh) \ From 79d295ce5b9c322ebb40b2d7fdb9da4eee83f893 Mon Sep 17 00:00:00 2001 From: Alexandre Belloni Date: Mon, 1 Jul 2013 17:40:00 +0100 Subject: [PATCH 025/102] iio: inkern: fix iio_convert_raw_to_processed_unlocked commit f91d1b63a4e096d3023aaaafec9d9d3aff25997f upstream. When reading IIO_CHAN_INFO_OFFSET, the return value of iio_channel_read() for success will be IIO_VAL*, checking for 0 is not correct. Without this fix the offset applied by iio drivers will be ignored when converting a raw value to one in appropriate base units (e.g mV) in a IIO client drivers that use iio_convert_raw_to_processed including iio-hwmon. Signed-off-by: Alexandre Belloni Signed-off-by: Jonathan Cameron Signed-off-by: Greg Kroah-Hartman --- drivers/iio/inkern.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iio/inkern.c b/drivers/iio/inkern.c index 98ddc323add0..0cf5f8e06cfc 100644 --- a/drivers/iio/inkern.c +++ b/drivers/iio/inkern.c @@ -451,7 +451,7 @@ static int iio_convert_raw_to_processed_unlocked(struct iio_channel *chan, int ret; ret = iio_channel_read(chan, &offset, NULL, IIO_CHAN_INFO_OFFSET); - if (ret == 0) + if (ret >= 0) raw64 += offset; scale_type = iio_channel_read(chan, &scale_val, &scale_val2, From c38217e1b806155493c9324345f0f1e6561e55db Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Thu, 4 Jul 2013 12:54:22 +0200 Subject: [PATCH 026/102] ALSA: hda - Fix EAPD vmaster hook for AD1884 & co commit 8f0b3b7e222383a21f7d58bd97d5552b3a5dbced upstream. ad1884_fixup_hp_eapd() tries to set the NID for controlling the speaker EAPD from the pin configuration. But the current code can't work expectedly since it sets spec->eapd_nid before calling the generic parser where the autocfg pins are set up. This patch changes the function to set spec->eapd_nid after the generic parser call while it sets vmaster hook unconditionally. The spec->eapd_nid check is moved in the hook function itself instead. Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_analog.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/sound/pci/hda/patch_analog.c b/sound/pci/hda/patch_analog.c index 977b0d878dae..d97f0d61a15b 100644 --- a/sound/pci/hda/patch_analog.c +++ b/sound/pci/hda/patch_analog.c @@ -2112,6 +2112,9 @@ static void ad_vmaster_eapd_hook(void *private_data, int enabled) { struct hda_codec *codec = private_data; struct ad198x_spec *spec = codec->spec; + + if (!spec->eapd_nid) + return; snd_hda_codec_update_cache(codec, spec->eapd_nid, 0, AC_VERB_SET_EAPD_BTLENABLE, enabled ? 0x02 : 0x00); @@ -3601,13 +3604,16 @@ static void ad1884_fixup_hp_eapd(struct hda_codec *codec, { struct ad198x_spec *spec = codec->spec; - if (action == HDA_FIXUP_ACT_PRE_PROBE) { + switch (action) { + case HDA_FIXUP_ACT_PRE_PROBE: + spec->gen.vmaster_mute.hook = ad_vmaster_eapd_hook; + break; + case HDA_FIXUP_ACT_PROBE: if (spec->gen.autocfg.line_out_type == AUTO_PIN_SPEAKER_OUT) spec->eapd_nid = spec->gen.autocfg.line_out_pins[0]; else spec->eapd_nid = spec->gen.autocfg.speaker_pins[0]; - if (spec->eapd_nid) - spec->gen.vmaster_mute.hook = ad_vmaster_eapd_hook; + break; } } From 1d33b1e178d4d298150227227e079dd883f06f57 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 18 Jun 2013 07:55:02 +0200 Subject: [PATCH 027/102] ALSA: hda - Fix return value of snd_hda_check_power_state() commit 06ec56d3c60238f27bfa50d245592fccc1b4ef0f upstream. The refactoring by commit 9040d102 introduced the new function snd_hda_check_power_state(). This function is supposed to return true if the state already reached to the target state, but it actually returns false for that. An utterly stupid typo while copy & paste. Fortunately this didn't influence on much behavior because powering up AFG usually powers up the child widgets, too. But the finer power control must have been broken by this bug. Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/hda_local.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/pci/hda/hda_local.h b/sound/pci/hda/hda_local.h index e0bf7534fa1f..29ed7d9b27e4 100644 --- a/sound/pci/hda/hda_local.h +++ b/sound/pci/hda/hda_local.h @@ -667,7 +667,7 @@ snd_hda_check_power_state(struct hda_codec *codec, hda_nid_t nid, if (state & AC_PWRST_ERROR) return true; state = (state >> 4) & 0x0f; - return (state != target_state); + return (state == target_state); } unsigned int snd_hda_codec_eapd_power_filter(struct hda_codec *codec, From 27035caa37556524ef30311438cefaf1a80a5364 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 18 Jun 2013 16:14:22 +0200 Subject: [PATCH 028/102] ALSA: hda - Cache the MUX selection for generic HDMI commit bddee96b5d0db869f47b195fe48c614ca824203c upstream. When a selection to a converter MUX is changed in hdmi_pcm_open(), it should be cached so that the given connection can be restored properly at PM resume. We need just to replace the corresponding snd_hda_codec_write() call with snd_hda_codec_write_cache(). Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_hdmi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/pci/hda/patch_hdmi.c b/sound/pci/hda/patch_hdmi.c index e12f7a030c58..c018fb71cafb 100644 --- a/sound/pci/hda/patch_hdmi.c +++ b/sound/pci/hda/patch_hdmi.c @@ -1146,7 +1146,7 @@ static int hdmi_pcm_open(struct hda_pcm_stream *hinfo, per_cvt->assigned = 1; hinfo->nid = per_cvt->cvt_nid; - snd_hda_codec_write(codec, per_pin->pin_nid, 0, + snd_hda_codec_write_cache(codec, per_pin->pin_nid, 0, AC_VERB_SET_CONNECT_SEL, mux_idx); snd_hda_spdif_ctls_assign(codec, pin_idx, per_cvt->cvt_nid); From 1f563ec40d24e0af579911dfbd6ac8856889bb89 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Wed, 19 Jun 2013 07:54:09 +0200 Subject: [PATCH 029/102] ALSA: hda - Fix missing Mic Boost controls for VIA codecs commit d045c5dc43d829df9f067d363c3b42b14dacf434 upstream. Some VIA codecs like VT1708S have Mic boost amps in the mic pins but they aren't exposed in the capability bits. In the past driver code, we override the pin caps and create mic boost controls forcibly. While transition to the generic parser, we lost the mic boost controls although the pin caps are still overridden, because the generic parser code checks the widget caps, too. So this patch adds a new helper function to allow the override of the given widget capability bits, and makes VIA codecs driver to add the missing input-amp capability bit. Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=59861 Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/hda_local.h | 8 ++++++++ sound/pci/hda/patch_via.c | 2 ++ 2 files changed, 10 insertions(+) diff --git a/sound/pci/hda/hda_local.h b/sound/pci/hda/hda_local.h index 29ed7d9b27e4..2e7493ef8ee0 100644 --- a/sound/pci/hda/hda_local.h +++ b/sound/pci/hda/hda_local.h @@ -562,6 +562,14 @@ static inline unsigned int get_wcaps_channels(u32 wcaps) return chans; } +static inline void snd_hda_override_wcaps(struct hda_codec *codec, + hda_nid_t nid, u32 val) +{ + if (nid >= codec->start_nid && + nid < codec->start_nid + codec->num_nodes) + codec->wcaps[nid - codec->start_nid] = val; +} + u32 query_amp_caps(struct hda_codec *codec, hda_nid_t nid, int direction); int snd_hda_override_amp_caps(struct hda_codec *codec, hda_nid_t nid, int dir, unsigned int caps); diff --git a/sound/pci/hda/patch_via.c b/sound/pci/hda/patch_via.c index e5245544eb52..aed19c3f8466 100644 --- a/sound/pci/hda/patch_via.c +++ b/sound/pci/hda/patch_via.c @@ -910,6 +910,8 @@ static const struct hda_verb vt1708S_init_verbs[] = { static void override_mic_boost(struct hda_codec *codec, hda_nid_t pin, int offset, int num_steps, int step_size) { + snd_hda_override_wcaps(codec, pin, + get_wcaps(codec, pin) | AC_WCAP_IN_AMP); snd_hda_override_amp_caps(codec, pin, HDA_INPUT, (offset << AC_AMPCAP_OFFSET_SHIFT) | (num_steps << AC_AMPCAP_NUM_STEPS_SHIFT) | From 3a72cf75a1e32aedfd68b4467e2677bf1c0eb5d4 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Fri, 28 Jun 2013 11:51:32 +0200 Subject: [PATCH 030/102] ALSA: hda - Fix the max length of control name in generic parser commit 0c055b3413868227f2e85701c4e6938c9581f0e2 upstream. add_control_with_pfx() in hda_generic.c assumes a shorter name string for the control element, and this resulted in the truncation of the long but valid string like "Headphone Surround Switch" in the middle. This patch aligns the max size to the actual limit of snd_ctl_elem_id, 44. Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/hda_generic.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/pci/hda/hda_generic.c b/sound/pci/hda/hda_generic.c index 4b1524a861f3..24400cffb8f3 100644 --- a/sound/pci/hda/hda_generic.c +++ b/sound/pci/hda/hda_generic.c @@ -840,7 +840,7 @@ static int add_control_with_pfx(struct hda_gen_spec *spec, int type, const char *pfx, const char *dir, const char *sfx, int cidx, unsigned long val) { - char name[32]; + char name[44]; snprintf(name, sizeof(name), "%s %s %s", pfx, dir, sfx); if (!add_control(spec, type, name, cidx, val)) return -ENOMEM; From 782e9cac197a38391820b425a4645e5bda7d121b Mon Sep 17 00:00:00 2001 From: Aaron Plattner Date: Fri, 12 Jul 2013 11:01:37 -0700 Subject: [PATCH 031/102] ALSA: hda - Add new GPU codec ID to snd-hda commit d52392b1a80458c0510810789c7db4a39b88022a upstream. Vendor ID 0x10de0060 is used by a yet-to-be-named GPU chip. Reviewed-by: Andy Ritger Signed-off-by: Aaron Plattner Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_hdmi.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sound/pci/hda/patch_hdmi.c b/sound/pci/hda/patch_hdmi.c index c018fb71cafb..496d7f21d3e5 100644 --- a/sound/pci/hda/patch_hdmi.c +++ b/sound/pci/hda/patch_hdmi.c @@ -2536,6 +2536,7 @@ static const struct hda_codec_preset snd_hda_preset_hdmi[] = { { .id = 0x10de0043, .name = "GPU 43 HDMI/DP", .patch = patch_generic_hdmi }, { .id = 0x10de0044, .name = "GPU 44 HDMI/DP", .patch = patch_generic_hdmi }, { .id = 0x10de0051, .name = "GPU 51 HDMI/DP", .patch = patch_generic_hdmi }, +{ .id = 0x10de0060, .name = "GPU 60 HDMI/DP", .patch = patch_generic_hdmi }, { .id = 0x10de0067, .name = "MCP67 HDMI", .patch = patch_nvhdmi_2ch }, { .id = 0x10de8001, .name = "MCP73 HDMI", .patch = patch_nvhdmi_2ch }, { .id = 0x11069f80, .name = "VX900 HDMI/DP", .patch = patch_via_hdmi }, @@ -2588,6 +2589,7 @@ MODULE_ALIAS("snd-hda-codec-id:10de0042"); MODULE_ALIAS("snd-hda-codec-id:10de0043"); MODULE_ALIAS("snd-hda-codec-id:10de0044"); MODULE_ALIAS("snd-hda-codec-id:10de0051"); +MODULE_ALIAS("snd-hda-codec-id:10de0060"); MODULE_ALIAS("snd-hda-codec-id:10de0067"); MODULE_ALIAS("snd-hda-codec-id:10de8001"); MODULE_ALIAS("snd-hda-codec-id:11069f80"); From 409972c1c8678bb0c28eb0417054e2794dd59ec9 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 16 Jul 2013 12:17:49 +0200 Subject: [PATCH 032/102] ALSA: seq-oss: Initialize MIDI clients asynchronously commit 256ca9c3ad5013ff8a8f165e5a82fab437628c8e upstream. We've got bug reports that the module loading stuck on Debian system with 3.10 kernel. The debugging session revealed that the initial registration of OSS sequencer clients stuck at module loading time, which involves again with request_module() at the init phase. This is triggered only by special --install stuff Debian is using, but it's still not good to have such loops. As a workaround, call the registration part asynchronously. This is a better approach irrespective of the hang fix, in anyway. Reported-and-tested-by: Philipp Matthias Hahn Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/core/seq/oss/seq_oss_init.c | 16 +++++++++++++--- sound/core/seq/oss/seq_oss_midi.c | 2 +- 2 files changed, 14 insertions(+), 4 deletions(-) diff --git a/sound/core/seq/oss/seq_oss_init.c b/sound/core/seq/oss/seq_oss_init.c index e3cb46fef2c7..b3f39b5ed742 100644 --- a/sound/core/seq/oss/seq_oss_init.c +++ b/sound/core/seq/oss/seq_oss_init.c @@ -31,6 +31,7 @@ #include #include #include +#include /* * common variables @@ -60,6 +61,14 @@ static void free_devinfo(void *private); #define call_ctl(type,rec) snd_seq_kernel_client_ctl(system_client, type, rec) +/* call snd_seq_oss_midi_lookup_ports() asynchronously */ +static void async_call_lookup_ports(struct work_struct *work) +{ + snd_seq_oss_midi_lookup_ports(system_client); +} + +static DECLARE_WORK(async_lookup_work, async_call_lookup_ports); + /* * create sequencer client for OSS sequencer */ @@ -85,9 +94,6 @@ snd_seq_oss_create_client(void) system_client = rc; debug_printk(("new client = %d\n", rc)); - /* look up midi devices */ - snd_seq_oss_midi_lookup_ports(system_client); - /* create annoucement receiver port */ memset(port, 0, sizeof(*port)); strcpy(port->name, "Receiver"); @@ -115,6 +121,9 @@ snd_seq_oss_create_client(void) } rc = 0; + /* look up midi devices */ + schedule_work(&async_lookup_work); + __error: kfree(port); return rc; @@ -160,6 +169,7 @@ receive_announce(struct snd_seq_event *ev, int direct, void *private, int atomic int snd_seq_oss_delete_client(void) { + cancel_work_sync(&async_lookup_work); if (system_client >= 0) snd_seq_delete_kernel_client(system_client); diff --git a/sound/core/seq/oss/seq_oss_midi.c b/sound/core/seq/oss/seq_oss_midi.c index 677dc84590c7..862d84893ee8 100644 --- a/sound/core/seq/oss/seq_oss_midi.c +++ b/sound/core/seq/oss/seq_oss_midi.c @@ -72,7 +72,7 @@ static int send_midi_event(struct seq_oss_devinfo *dp, struct snd_seq_event *ev, * look up the existing ports * this looks a very exhausting job. */ -int __init +int snd_seq_oss_midi_lookup_ports(int client) { struct snd_seq_client_info *clinfo; From 2bc2f7d622af45d07980a83586c95c7530e4f6cc Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Thu, 11 Jul 2013 17:57:55 +0200 Subject: [PATCH 033/102] ALSA: 6fire: Fix unlocked snd_pcm_stop() call commit 5b9ab3f7324a1b94a5a5a76d44cf92dfeb3b5e80 upstream. snd_pcm_stop() must be called in the PCM substream lock context. Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/usb/6fire/pcm.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/sound/usb/6fire/pcm.c b/sound/usb/6fire/pcm.c index 40dd50a80f55..8221ff2f209f 100644 --- a/sound/usb/6fire/pcm.c +++ b/sound/usb/6fire/pcm.c @@ -641,17 +641,25 @@ int usb6fire_pcm_init(struct sfire_chip *chip) void usb6fire_pcm_abort(struct sfire_chip *chip) { struct pcm_runtime *rt = chip->pcm; + unsigned long flags; int i; if (rt) { rt->panic = true; - if (rt->playback.instance) + if (rt->playback.instance) { + snd_pcm_stream_lock_irqsave(rt->playback.instance, flags); snd_pcm_stop(rt->playback.instance, SNDRV_PCM_STATE_XRUN); - if (rt->capture.instance) + snd_pcm_stream_unlock_irqrestore(rt->playback.instance, flags); + } + + if (rt->capture.instance) { + snd_pcm_stream_lock_irqsave(rt->capture.instance, flags); snd_pcm_stop(rt->capture.instance, SNDRV_PCM_STATE_XRUN); + snd_pcm_stream_unlock_irqrestore(rt->capture.instance, flags); + } for (i = 0; i < PCM_N_URBS; i++) { usb_poison_urb(&rt->in_urbs[i].instance); From c9c8dd7d3cacecd3a3f0e6e49c0a0f8f93d3d04e Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Thu, 11 Jul 2013 17:58:25 +0200 Subject: [PATCH 034/102] ALSA: ua101: Fix unlocked snd_pcm_stop() call commit 9538aa46c2427d6782aa10036c4da4c541605e0e upstream. snd_pcm_stop() must be called in the PCM substream lock context. Acked-by: Clemens Ladisch Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/usb/misc/ua101.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/sound/usb/misc/ua101.c b/sound/usb/misc/ua101.c index 6ad617b94732..76d832908fe0 100644 --- a/sound/usb/misc/ua101.c +++ b/sound/usb/misc/ua101.c @@ -613,14 +613,24 @@ static int start_usb_playback(struct ua101 *ua) static void abort_alsa_capture(struct ua101 *ua) { - if (test_bit(ALSA_CAPTURE_RUNNING, &ua->states)) + unsigned long flags; + + if (test_bit(ALSA_CAPTURE_RUNNING, &ua->states)) { + snd_pcm_stream_lock_irqsave(ua->capture.substream, flags); snd_pcm_stop(ua->capture.substream, SNDRV_PCM_STATE_XRUN); + snd_pcm_stream_unlock_irqrestore(ua->capture.substream, flags); + } } static void abort_alsa_playback(struct ua101 *ua) { - if (test_bit(ALSA_PLAYBACK_RUNNING, &ua->states)) + unsigned long flags; + + if (test_bit(ALSA_PLAYBACK_RUNNING, &ua->states)) { + snd_pcm_stream_lock_irqsave(ua->playback.substream, flags); snd_pcm_stop(ua->playback.substream, SNDRV_PCM_STATE_XRUN); + snd_pcm_stream_unlock_irqrestore(ua->playback.substream, flags); + } } static int set_stream_hw(struct ua101 *ua, struct snd_pcm_substream *substream, From 2306396c952129dcf127d47c0383ae28c7288ed0 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Thu, 11 Jul 2013 17:59:33 +0200 Subject: [PATCH 035/102] ALSA: pxa2xx: Fix unlocked snd_pcm_stop() call commit 46f6c1aaf790be9ea3c8ddfc8f235a5f677d08e2 upstream. snd_pcm_stop() must be called in the PCM substream lock context. Acked-by: Mark Brown Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/arm/pxa2xx-pcm-lib.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sound/arm/pxa2xx-pcm-lib.c b/sound/arm/pxa2xx-pcm-lib.c index 76e0d5695075..823359ed95e1 100644 --- a/sound/arm/pxa2xx-pcm-lib.c +++ b/sound/arm/pxa2xx-pcm-lib.c @@ -166,7 +166,9 @@ void pxa2xx_pcm_dma_irq(int dma_ch, void *dev_id) } else { printk(KERN_ERR "%s: DMA error on channel %d (DCSR=%#x)\n", rtd->params->name, dma_ch, dcsr); + snd_pcm_stream_lock(substream); snd_pcm_stop(substream, SNDRV_PCM_STATE_XRUN); + snd_pcm_stream_unlock(substream); } } EXPORT_SYMBOL(pxa2xx_pcm_dma_irq); From 8268d1c754169393118ae847cf6741d777e976a4 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Thu, 11 Jul 2013 17:56:56 +0200 Subject: [PATCH 036/102] ALSA: atiixp: Fix unlocked snd_pcm_stop() call commit cc7282b8d5abbd48c81d1465925d464d9e3eaa8f upstream. snd_pcm_stop() must be called in the PCM substream lock context. Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/atiixp.c | 2 ++ sound/pci/atiixp_modem.c | 2 ++ 2 files changed, 4 insertions(+) diff --git a/sound/pci/atiixp.c b/sound/pci/atiixp.c index 6e78c6789858..819430ac6b3b 100644 --- a/sound/pci/atiixp.c +++ b/sound/pci/atiixp.c @@ -689,7 +689,9 @@ static void snd_atiixp_xrun_dma(struct atiixp *chip, struct atiixp_dma *dma) if (! dma->substream || ! dma->running) return; snd_printdd("atiixp: XRUN detected (DMA %d)\n", dma->ops->type); + snd_pcm_stream_lock(dma->substream); snd_pcm_stop(dma->substream, SNDRV_PCM_STATE_XRUN); + snd_pcm_stream_unlock(dma->substream); } /* diff --git a/sound/pci/atiixp_modem.c b/sound/pci/atiixp_modem.c index d0bec7ba3b0d..57f41820263f 100644 --- a/sound/pci/atiixp_modem.c +++ b/sound/pci/atiixp_modem.c @@ -638,7 +638,9 @@ static void snd_atiixp_xrun_dma(struct atiixp_modem *chip, if (! dma->substream || ! dma->running) return; snd_printdd("atiixp-modem: XRUN detected (DMA %d)\n", dma->ops->type); + snd_pcm_stream_lock(dma->substream); snd_pcm_stop(dma->substream, SNDRV_PCM_STATE_XRUN); + snd_pcm_stream_unlock(dma->substream); } /* From 4b06cd616f646a4257b39a1cc9a10410e7c90088 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Thu, 11 Jul 2013 17:55:57 +0200 Subject: [PATCH 037/102] ALSA: asihpi: Fix unlocked snd_pcm_stop() call commit 60478295d6876619f8f47f6d1a5c25eaade69ee3 upstream. snd_pcm_stop() must be called in the PCM substream lock context. Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/asihpi/asihpi.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/sound/pci/asihpi/asihpi.c b/sound/pci/asihpi/asihpi.c index fbc17203613c..a471d821c608 100644 --- a/sound/pci/asihpi/asihpi.c +++ b/sound/pci/asihpi/asihpi.c @@ -769,7 +769,10 @@ static void snd_card_asihpi_timer_function(unsigned long data) s->number); ds->drained_count++; if (ds->drained_count > 20) { + unsigned long flags; + snd_pcm_stream_lock_irqsave(s, flags); snd_pcm_stop(s, SNDRV_PCM_STATE_XRUN); + snd_pcm_stream_unlock_irqrestore(s, flags); continue; } } else { From 1ef08eb2a9a50d9b4dd119bda4f747e1b922a4fc Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Thu, 11 Jul 2013 17:58:47 +0200 Subject: [PATCH 038/102] ALSA: usx2y: Fix unlocked snd_pcm_stop() call commit 5be1efb4c2ed79c3d7c0cbcbecae768377666e84 upstream. snd_pcm_stop() must be called in the PCM substream lock context. Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/usb/usx2y/usbusx2yaudio.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/sound/usb/usx2y/usbusx2yaudio.c b/sound/usb/usx2y/usbusx2yaudio.c index b37653247ef4..0ce903375eaf 100644 --- a/sound/usb/usx2y/usbusx2yaudio.c +++ b/sound/usb/usx2y/usbusx2yaudio.c @@ -273,7 +273,11 @@ static void usX2Y_clients_stop(struct usX2Ydev *usX2Y) struct snd_usX2Y_substream *subs = usX2Y->subs[s]; if (subs) { if (atomic_read(&subs->state) >= state_PRERUNNING) { + unsigned long flags; + + snd_pcm_stream_lock_irqsave(subs->pcm_substream, flags); snd_pcm_stop(subs->pcm_substream, SNDRV_PCM_STATE_XRUN); + snd_pcm_stream_unlock_irqrestore(subs->pcm_substream, flags); } for (u = 0; u < NRURBS; u++) { struct urb *urb = subs->urb[u]; From e00354825e75a338ced4f4eb16cf08182c116984 Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Sat, 22 Jun 2013 16:15:31 -0700 Subject: [PATCH 039/102] hwmon: (nct6775) Fix temperature alarm attributes commit b1d2bff6a61140454b9d203519cc686a2e9ef32f upstream. Driver displays wrong alarms for temperature attributes. Turns out that temperature alarm bits are not fixed, but determined by temperature source mapping. To fix the problem, walk through the temperature sources to determine the correct alarm bit associated with a given attribute. Signed-off-by: Guenter Roeck Signed-off-by: Greg Kroah-Hartman --- drivers/hwmon/nct6775.c | 80 +++++++++++++++++++++++++++++------------ 1 file changed, 57 insertions(+), 23 deletions(-) diff --git a/drivers/hwmon/nct6775.c b/drivers/hwmon/nct6775.c index 04638aee9039..2405ab439e80 100644 --- a/drivers/hwmon/nct6775.c +++ b/drivers/hwmon/nct6775.c @@ -625,6 +625,7 @@ struct nct6775_data { u8 has_fan_min; /* some fans don't have min register */ bool has_fan_div; + u8 num_temp_alarms; /* 2 or 3 */ u8 temp_fixed_num; /* 3 or 6 */ u8 temp_type[NUM_TEMP_FIXED]; s8 temp_offset[NUM_TEMP_FIXED]; @@ -1193,6 +1194,42 @@ show_alarm(struct device *dev, struct device_attribute *attr, char *buf) (unsigned int)((data->alarms >> nr) & 0x01)); } +static int find_temp_source(struct nct6775_data *data, int index, int count) +{ + int source = data->temp_src[index]; + int nr; + + for (nr = 0; nr < count; nr++) { + int src; + + src = nct6775_read_value(data, + data->REG_TEMP_SOURCE[nr]) & 0x1f; + if (src == source) + return nr; + } + return -1; +} + +static ssize_t +show_temp_alarm(struct device *dev, struct device_attribute *attr, char *buf) +{ + struct sensor_device_attribute *sattr = to_sensor_dev_attr(attr); + struct nct6775_data *data = nct6775_update_device(dev); + unsigned int alarm = 0; + int nr; + + /* + * For temperatures, there is no fixed mapping from registers to alarm + * bits. Alarm bits are determined by the temperature source mapping. + */ + nr = find_temp_source(data, sattr->index, data->num_temp_alarms); + if (nr >= 0) { + int bit = data->ALARM_BITS[nr + TEMP_ALARM_BASE]; + alarm = (data->alarms >> bit) & 0x01; + } + return sprintf(buf, "%u\n", alarm); +} + static SENSOR_DEVICE_ATTR_2(in0_input, S_IRUGO, show_in_reg, NULL, 0, 0); static SENSOR_DEVICE_ATTR_2(in1_input, S_IRUGO, show_in_reg, NULL, 1, 0); static SENSOR_DEVICE_ATTR_2(in2_input, S_IRUGO, show_in_reg, NULL, 2, 0); @@ -1874,22 +1911,18 @@ static struct sensor_device_attribute sda_temp_type[] = { }; static struct sensor_device_attribute sda_temp_alarm[] = { - SENSOR_ATTR(temp1_alarm, S_IRUGO, show_alarm, NULL, - TEMP_ALARM_BASE), - SENSOR_ATTR(temp2_alarm, S_IRUGO, show_alarm, NULL, - TEMP_ALARM_BASE + 1), - SENSOR_ATTR(temp3_alarm, S_IRUGO, show_alarm, NULL, - TEMP_ALARM_BASE + 2), - SENSOR_ATTR(temp4_alarm, S_IRUGO, show_alarm, NULL, - TEMP_ALARM_BASE + 3), - SENSOR_ATTR(temp5_alarm, S_IRUGO, show_alarm, NULL, - TEMP_ALARM_BASE + 4), - SENSOR_ATTR(temp6_alarm, S_IRUGO, show_alarm, NULL, - TEMP_ALARM_BASE + 5), + SENSOR_ATTR(temp1_alarm, S_IRUGO, show_temp_alarm, NULL, 0), + SENSOR_ATTR(temp2_alarm, S_IRUGO, show_temp_alarm, NULL, 1), + SENSOR_ATTR(temp3_alarm, S_IRUGO, show_temp_alarm, NULL, 2), + SENSOR_ATTR(temp4_alarm, S_IRUGO, show_temp_alarm, NULL, 3), + SENSOR_ATTR(temp5_alarm, S_IRUGO, show_temp_alarm, NULL, 4), + SENSOR_ATTR(temp6_alarm, S_IRUGO, show_temp_alarm, NULL, 5), + SENSOR_ATTR(temp7_alarm, S_IRUGO, show_temp_alarm, NULL, 6), + SENSOR_ATTR(temp8_alarm, S_IRUGO, show_temp_alarm, NULL, 7), + SENSOR_ATTR(temp9_alarm, S_IRUGO, show_temp_alarm, NULL, 8), + SENSOR_ATTR(temp10_alarm, S_IRUGO, show_temp_alarm, NULL, 9), }; -#define NUM_TEMP_ALARM ARRAY_SIZE(sda_temp_alarm) - static ssize_t show_pwm_mode(struct device *dev, struct device_attribute *attr, char *buf) { @@ -3215,13 +3248,11 @@ static void nct6775_device_remove_files(struct device *dev) device_remove_file(dev, &sda_temp_max[i].dev_attr); device_remove_file(dev, &sda_temp_max_hyst[i].dev_attr); device_remove_file(dev, &sda_temp_crit[i].dev_attr); + device_remove_file(dev, &sda_temp_alarm[i].dev_attr); if (!(data->have_temp_fixed & (1 << i))) continue; device_remove_file(dev, &sda_temp_type[i].dev_attr); device_remove_file(dev, &sda_temp_offset[i].dev_attr); - if (i >= NUM_TEMP_ALARM) - continue; - device_remove_file(dev, &sda_temp_alarm[i].dev_attr); } device_remove_file(dev, &sda_caseopen[0].dev_attr); @@ -3419,6 +3450,7 @@ static int nct6775_probe(struct platform_device *pdev) data->auto_pwm_num = 6; data->has_fan_div = true; data->temp_fixed_num = 3; + data->num_temp_alarms = 3; data->ALARM_BITS = NCT6775_ALARM_BITS; @@ -3483,6 +3515,7 @@ static int nct6775_probe(struct platform_device *pdev) data->auto_pwm_num = 4; data->has_fan_div = false; data->temp_fixed_num = 3; + data->num_temp_alarms = 3; data->ALARM_BITS = NCT6776_ALARM_BITS; @@ -3547,6 +3580,7 @@ static int nct6775_probe(struct platform_device *pdev) data->auto_pwm_num = 4; data->has_fan_div = false; data->temp_fixed_num = 6; + data->num_temp_alarms = 2; data->ALARM_BITS = NCT6779_ALARM_BITS; @@ -3897,6 +3931,12 @@ static int nct6775_probe(struct platform_device *pdev) if (err) goto exit_remove; } + if (find_temp_source(data, i, data->num_temp_alarms) >= 0) { + err = device_create_file(dev, + &sda_temp_alarm[i].dev_attr); + if (err) + goto exit_remove; + } if (!(data->have_temp_fixed & (1 << i))) continue; err = device_create_file(dev, &sda_temp_type[i].dev_attr); @@ -3905,12 +3945,6 @@ static int nct6775_probe(struct platform_device *pdev) err = device_create_file(dev, &sda_temp_offset[i].dev_attr); if (err) goto exit_remove; - if (i >= NUM_TEMP_ALARM || - data->ALARM_BITS[TEMP_ALARM_BASE + i] < 0) - continue; - err = device_create_file(dev, &sda_temp_alarm[i].dev_attr); - if (err) - goto exit_remove; } for (i = 0; i < ARRAY_SIZE(sda_caseopen); i++) { From bde3f4bb512c34609becce11f23ed6f89e3f488f Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Sun, 23 Jun 2013 13:04:04 -0700 Subject: [PATCH 040/102] hwmon: (nct6775) Drop unsupported fan alarm attributes for NCT6775 commit 41fa9a944fce1d7efd5ee3d50ac85b92f42dcc3d upstream. NCT6775 does not support alarms for fans 4 and 5. Drop the attributes. Signed-off-by: Guenter Roeck Signed-off-by: Greg Kroah-Hartman --- drivers/hwmon/nct6775.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/drivers/hwmon/nct6775.c b/drivers/hwmon/nct6775.c index 2405ab439e80..99cec1825420 100644 --- a/drivers/hwmon/nct6775.c +++ b/drivers/hwmon/nct6775.c @@ -199,7 +199,7 @@ static const s8 NCT6775_ALARM_BITS[] = { 0, 1, 2, 3, 8, 21, 20, 16, /* in0.. in7 */ 17, -1, -1, -1, -1, -1, -1, /* in8..in14 */ -1, /* unused */ - 6, 7, 11, 10, 23, /* fan1..fan5 */ + 6, 7, 11, -1, -1, /* fan1..fan5 */ -1, -1, -1, /* unused */ 4, 5, 13, -1, -1, -1, /* temp1..temp6 */ 12, -1 }; /* intrusion0, intrusion1 */ @@ -3877,10 +3877,12 @@ static int nct6775_probe(struct platform_device *pdev) &sda_fan_input[i].dev_attr); if (err) goto exit_remove; - err = device_create_file(dev, - &sda_fan_alarm[i].dev_attr); - if (err) - goto exit_remove; + if (data->ALARM_BITS[FAN_ALARM_BASE + i] >= 0) { + err = device_create_file(dev, + &sda_fan_alarm[i].dev_attr); + if (err) + goto exit_remove; + } if (data->kind != nct6776 && data->kind != nct6779) { err = device_create_file(dev, From fe1ebd057ddfb37a23cc37090d427fb0a68d3000 Mon Sep 17 00:00:00 2001 From: Sergei Shtylyov Date: Sun, 23 Jun 2013 23:25:04 +0400 Subject: [PATCH 041/102] libata-zpodd: must use ata_tf_init() commit d0887c43f51c308b01605346e55d906ba858a6f9 upstream. There are some SATA controllers which have both devices 0 and 1 but this module just zeroes out taskfile and sets then ATA_TFLAG_DEVICE (not sure that's needed) which could lead to a wrong device being selected just before issuing command. Thus we should call ata_tf_init() which sets up the device register value properly, like all other users of ata_exec_internal() do... Signed-off-by: Sergei Shtylyov Signed-off-by: Tejun Heo Signed-off-by: Greg Kroah-Hartman --- drivers/ata/libata-zpodd.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/ata/libata-zpodd.c b/drivers/ata/libata-zpodd.c index 90b159b740b3..cd8daf47188b 100644 --- a/drivers/ata/libata-zpodd.c +++ b/drivers/ata/libata-zpodd.c @@ -32,13 +32,14 @@ struct zpodd { static int eject_tray(struct ata_device *dev) { - struct ata_taskfile tf = {}; + struct ata_taskfile tf; const char cdb[] = { GPCMD_START_STOP_UNIT, 0, 0, 0, 0x02, /* LoEj */ 0, 0, 0, 0, 0, 0, 0, }; + ata_tf_init(dev, &tf); tf.flags = ATA_TFLAG_ISADDR | ATA_TFLAG_DEVICE; tf.command = ATA_CMD_PACKET; tf.protocol = ATAPI_PROT_NODATA; @@ -52,8 +53,7 @@ static enum odd_mech_type zpodd_get_mech_type(struct ata_device *dev) char buf[16]; unsigned int ret; struct rm_feature_desc *desc = (void *)(buf + 8); - struct ata_taskfile tf = {}; - + struct ata_taskfile tf; char cdb[] = { GPCMD_GET_CONFIGURATION, 2, /* only 1 feature descriptor requested */ 0, 3, /* 3, removable medium feature */ @@ -62,6 +62,7 @@ static enum odd_mech_type zpodd_get_mech_type(struct ata_device *dev) 0, 0, 0, }; + ata_tf_init(dev, &tf); tf.flags = ATA_TFLAG_ISADDR | ATA_TFLAG_DEVICE; tf.command = ATA_CMD_PACKET; tf.protocol = ATAPI_PROT_PIO; From 64a03b5c4206bb104ea69efed3c8189427fcedfe Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 11 Jun 2013 00:11:36 -0700 Subject: [PATCH 042/102] libata: skip SRST for all SIMG [34]7x port-multipliers commit 7a87718d92760fc688628ad6a430643dafa16f1f upstream. For some reason, a lot of port-multipliers have issues with softreset. SIMG [34]7x series port-multipliers have been quite erratic in this regard. I recall that it was better with some firmware revisions and the current list of quirks worked fine for a while. I think it got worse with later firmwares or maybe my test coverage wasn't good enough. Anyways, HPA is reporting that his 3726 setup suffers SRST failures and then the PMP gets confused and fails to probe the last port. The hope was that we try to stick to the standard as much as possible and soonish the PMPs and their firmwares will improve in quality, so the quirk list was kept to minimum. Well, it seems like that's never gonna happen. Let's set NO_SRST for all [34]7x PMPs so that whatever remaining userbase of the device suffer the least. Maybe we should do the same for 57xx's but unfortunately I don't have any device left to test and I'm not even sure 57xx's have ever been made widely available, so let's leave those alone for now. Signed-off-by: Tejun Heo Reported-by: "H. Peter Anvin" Signed-off-by: Greg Kroah-Hartman --- drivers/ata/libata-pmp.c | 33 +++++++++++++++++---------------- 1 file changed, 17 insertions(+), 16 deletions(-) diff --git a/drivers/ata/libata-pmp.c b/drivers/ata/libata-pmp.c index 61c59ee45ce9..1c41722bb7e2 100644 --- a/drivers/ata/libata-pmp.c +++ b/drivers/ata/libata-pmp.c @@ -389,9 +389,13 @@ static void sata_pmp_quirks(struct ata_port *ap) /* link reports offline after LPM */ link->flags |= ATA_LFLAG_NO_LPM; - /* Class code report is unreliable. */ + /* + * Class code report is unreliable and SRST times + * out under certain configurations. + */ if (link->pmp < 5) - link->flags |= ATA_LFLAG_ASSUME_ATA; + link->flags |= ATA_LFLAG_NO_SRST | + ATA_LFLAG_ASSUME_ATA; /* port 5 is for SEMB device and it doesn't like SRST */ if (link->pmp == 5) @@ -399,20 +403,17 @@ static void sata_pmp_quirks(struct ata_port *ap) ATA_LFLAG_ASSUME_SEMB; } } else if (vendor == 0x1095 && devid == 0x4723) { - /* sil4723 quirks */ - ata_for_each_link(link, ap, EDGE) { - /* link reports offline after LPM */ - link->flags |= ATA_LFLAG_NO_LPM; - - /* class code report is unreliable */ - if (link->pmp < 2) - link->flags |= ATA_LFLAG_ASSUME_ATA; - - /* the config device at port 2 locks up on SRST */ - if (link->pmp == 2) - link->flags |= ATA_LFLAG_NO_SRST | - ATA_LFLAG_ASSUME_ATA; - } + /* + * sil4723 quirks + * + * Link reports offline after LPM. Class code report is + * unreliable. SIMG PMPs never got SRST reliable and the + * config device at port 2 locks up on SRST. + */ + ata_for_each_link(link, ap, EDGE) + link->flags |= ATA_LFLAG_NO_LPM | + ATA_LFLAG_NO_SRST | + ATA_LFLAG_ASSUME_ATA; } else if (vendor == 0x1095 && devid == 0x4726) { /* sil4726 quirks */ ata_for_each_link(link, ap, EDGE) { From b568411d97ec8c91eef5e512b5b2ec5df050c9c1 Mon Sep 17 00:00:00 2001 From: Seth Heasley Date: Wed, 19 Jun 2013 16:25:37 -0700 Subject: [PATCH 043/102] ata_piix: IDE-mode SATA patch for Intel Coleto Creek DeviceIDs commit c7e8695bfa0611b39493a9dfe8bab9f63f9809bd upstream. This patch adds the IDE-mode SATA DeviceIDs for the Intel Coleto Creek PCH. Signed-off-by: Seth Heasley Signed-off-by: Tejun Heo Signed-off-by: Greg Kroah-Hartman --- drivers/ata/ata_piix.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/ata/ata_piix.c b/drivers/ata/ata_piix.c index 9a8a674e8fac..8eae65905750 100644 --- a/drivers/ata/ata_piix.c +++ b/drivers/ata/ata_piix.c @@ -338,6 +338,8 @@ static const struct pci_device_id piix_pci_tbl[] = { /* SATA Controller IDE (BayTrail) */ { 0x8086, 0x0F20, PCI_ANY_ID, PCI_ANY_ID, 0, 0, ich8_2port_sata_byt }, { 0x8086, 0x0F21, PCI_ANY_ID, PCI_ANY_ID, 0, 0, ich8_2port_sata_byt }, + /* SATA Controller IDE (Coleto Creek) */ + { 0x8086, 0x23a6, PCI_ANY_ID, PCI_ANY_ID, 0, 0, ich8_2port_sata }, { } /* terminate list */ }; From 31c1a76deb262a074b209ad14a35bef7f5b9ab81 Mon Sep 17 00:00:00 2001 From: Mark Langsdorf Date: Mon, 3 Jun 2013 08:22:54 -0500 Subject: [PATCH 044/102] sata_highbank: increase retry count but shorten duration for Calxeda controller commit ddfef5de3d716f77bad32dbbba6b280158dfd721 upstream. Increase the retry count for the hard reset function to 100 but shorten the time out period to 500 ms. See the comment for ahci_highbank_hardreset for the reasons why those vaulues were chosen. Signed-off-by: Mark Langsdorf Signed-off-by: Tejun Heo Signed-off-by: Greg Kroah-Hartman --- drivers/ata/sata_highbank.c | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/drivers/ata/sata_highbank.c b/drivers/ata/sata_highbank.c index b20aa96b958d..c846fd3c5c09 100644 --- a/drivers/ata/sata_highbank.c +++ b/drivers/ata/sata_highbank.c @@ -196,10 +196,26 @@ static int highbank_initialize_phys(struct device *dev, void __iomem *addr) return 0; } +/* + * The Calxeda SATA phy intermittently fails to bring up a link with Gen3 + * Retrying the phy hard reset can work around the issue, but the drive + * may fail again. In less than 150 out of 15000 test runs, it took more + * than 10 tries for the link to be established (but never more than 35). + * Triple the maximum observed retry count to provide plenty of margin for + * rare events and to guarantee that the link is established. + * + * Also, the default 2 second time-out on a failed drive is too long in + * this situation. The uboot implementation of the same driver function + * uses a much shorter time-out period and never experiences a time out + * issue. Reducing the time-out to 500ms improves the responsiveness. + * The other timing constants were kept the same as the stock AHCI driver. + * This change was also tested 15000 times on 24 drives and none of them + * experienced a time out. + */ static int ahci_highbank_hardreset(struct ata_link *link, unsigned int *class, unsigned long deadline) { - const unsigned long *timing = sata_ehc_deb_timing(&link->eh_context); + static const unsigned long timing[] = { 5, 100, 500}; struct ata_port *ap = link->ap; struct ahci_port_priv *pp = ap->private_data; u8 *d2h_fis = pp->rx_fis + RX_FIS_D2H_REG; @@ -207,7 +223,7 @@ static int ahci_highbank_hardreset(struct ata_link *link, unsigned int *class, bool online; u32 sstatus; int rc; - int retry = 10; + int retry = 100; ahci_stop_engine(ap); From 5ebc73095cf30bca82ef2983a46756ff26b1a810 Mon Sep 17 00:00:00 2001 From: Shane Huang Date: Mon, 3 Jun 2013 18:24:55 +0800 Subject: [PATCH 045/102] i2c-piix4: Add AMD CZ SMBus device ID commit b996ac90f595dda271cbd858b136b45557fc1a57 upstream. To add AMD CZ SMBus controller device ID. [bhelgaas: drop pci_ids.h update] Signed-off-by: Shane Huang Signed-off-by: Bjorn Helgaas Reviewed-by: Tejun Heo Reviewed-by: Jean Delvare Signed-off-by: Greg Kroah-Hartman --- Documentation/i2c/busses/i2c-piix4 | 2 +- drivers/i2c/busses/Kconfig | 1 + drivers/i2c/busses/i2c-piix4.c | 3 ++- 3 files changed, 4 insertions(+), 2 deletions(-) diff --git a/Documentation/i2c/busses/i2c-piix4 b/Documentation/i2c/busses/i2c-piix4 index 1e6634f54c50..a370b2047cf3 100644 --- a/Documentation/i2c/busses/i2c-piix4 +++ b/Documentation/i2c/busses/i2c-piix4 @@ -13,7 +13,7 @@ Supported adapters: * AMD SP5100 (SB700 derivative found on some server mainboards) Datasheet: Publicly available at the AMD website http://support.amd.com/us/Embedded_TechDocs/44413.pdf - * AMD Hudson-2 + * AMD Hudson-2, CZ Datasheet: Not publicly available * Standard Microsystems (SMSC) SLC90E66 (Victory66) southbridge Datasheet: Publicly available at the SMSC website http://www.smsc.com diff --git a/drivers/i2c/busses/Kconfig b/drivers/i2c/busses/Kconfig index 631736e2e7ed..4faf02b3657d 100644 --- a/drivers/i2c/busses/Kconfig +++ b/drivers/i2c/busses/Kconfig @@ -150,6 +150,7 @@ config I2C_PIIX4 ATI SB700/SP5100 ATI SB800 AMD Hudson-2 + AMD CZ Serverworks OSB4 Serverworks CSB5 Serverworks CSB6 diff --git a/drivers/i2c/busses/i2c-piix4.c b/drivers/i2c/busses/i2c-piix4.c index 39ab78c1a02c..d05ad590af29 100644 --- a/drivers/i2c/busses/i2c-piix4.c +++ b/drivers/i2c/busses/i2c-piix4.c @@ -22,7 +22,7 @@ Intel PIIX4, 440MX Serverworks OSB4, CSB5, CSB6, HT-1000, HT-1100 ATI IXP200, IXP300, IXP400, SB600, SB700/SP5100, SB800 - AMD Hudson-2 + AMD Hudson-2, CZ SMSC Victory66 Note: we assume there can only be one device, with one or more @@ -522,6 +522,7 @@ static DEFINE_PCI_DEVICE_TABLE(piix4_ids) = { { PCI_DEVICE(PCI_VENDOR_ID_ATI, PCI_DEVICE_ID_ATI_IXP400_SMBUS) }, { PCI_DEVICE(PCI_VENDOR_ID_ATI, PCI_DEVICE_ID_ATI_SBX00_SMBUS) }, { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_HUDSON2_SMBUS) }, + { PCI_DEVICE(PCI_VENDOR_ID_AMD, 0x790b) }, { PCI_DEVICE(PCI_VENDOR_ID_SERVERWORKS, PCI_DEVICE_ID_SERVERWORKS_OSB4) }, { PCI_DEVICE(PCI_VENDOR_ID_SERVERWORKS, From d23067e924e410875602b3427251b88cd18da73f Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Thu, 11 Jul 2013 18:00:25 +0200 Subject: [PATCH 046/102] ASoC: s6000: Fix unlocked snd_pcm_stop() call commit 61be2b9a18ec70f3cbe3deef7a5f77869c71b5ae upstream. snd_pcm_stop() must be called in the PCM substream lock context. Acked-by: Mark Brown Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/soc/s6000/s6000-pcm.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sound/soc/s6000/s6000-pcm.c b/sound/soc/s6000/s6000-pcm.c index 1358c7de2521..d0740a762963 100644 --- a/sound/soc/s6000/s6000-pcm.c +++ b/sound/soc/s6000/s6000-pcm.c @@ -128,7 +128,9 @@ static irqreturn_t s6000_pcm_irq(int irq, void *data) substream->runtime && snd_pcm_running(substream)) { dev_dbg(pcm->dev, "xrun\n"); + snd_pcm_stream_lock(substream); snd_pcm_stop(substream, SNDRV_PCM_STATE_XRUN); + snd_pcm_stream_unlock(substream); ret = IRQ_HANDLED; } From b5aa3fc5730c605f4f6e1dbc45db459cfd59d1ed Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Thu, 11 Jul 2013 18:00:01 +0200 Subject: [PATCH 047/102] ASoC: atmel: Fix unlocked snd_pcm_stop() call commit 571185717f8d7f2a088a7ac38d94a9ad5fd9da5c upstream. snd_pcm_stop() must be called in the PCM substream lock context. Acked-by: Mark Brown Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/soc/atmel/atmel-pcm-dma.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sound/soc/atmel/atmel-pcm-dma.c b/sound/soc/atmel/atmel-pcm-dma.c index 1d38fd0bc4e2..d12826526798 100644 --- a/sound/soc/atmel/atmel-pcm-dma.c +++ b/sound/soc/atmel/atmel-pcm-dma.c @@ -81,7 +81,9 @@ static void atmel_pcm_dma_irq(u32 ssc_sr, /* stop RX and capture: will be enabled again at restart */ ssc_writex(prtd->ssc->regs, SSC_CR, prtd->mask->ssc_disable); + snd_pcm_stream_lock(substream); snd_pcm_stop(substream, SNDRV_PCM_STATE_XRUN); + snd_pcm_stream_unlock(substream); /* now drain RHR and read status to remove xrun condition */ ssc_readx(prtd->ssc->regs, SSC_RHR); From 3bd92a9d1f82e41d781f082d710b16036a0ea25b Mon Sep 17 00:00:00 2001 From: Fabio Estevam Date: Thu, 4 Jul 2013 20:01:03 -0300 Subject: [PATCH 048/102] ASoC: sglt5000: Fix SGTL5000_PLL_FRAC_DIV_MASK commit 5c78dfe87ea04b501ee000a7f03b9432ac9d008c upstream. SGTL5000_PLL_FRAC_DIV_MASK is used to mask bits 0-10 (11 bits in total) of register CHIP_PLL_CTRL, so fix the mask to accomodate all this bit range. Reported-by: Oskar Schirmer Signed-off-by: Fabio Estevam Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- sound/soc/codecs/sgtl5000.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/codecs/sgtl5000.h b/sound/soc/codecs/sgtl5000.h index 8a9f43534b79..d3a68bbfea00 100644 --- a/sound/soc/codecs/sgtl5000.h +++ b/sound/soc/codecs/sgtl5000.h @@ -347,7 +347,7 @@ #define SGTL5000_PLL_INT_DIV_MASK 0xf800 #define SGTL5000_PLL_INT_DIV_SHIFT 11 #define SGTL5000_PLL_INT_DIV_WIDTH 5 -#define SGTL5000_PLL_FRAC_DIV_MASK 0x0700 +#define SGTL5000_PLL_FRAC_DIV_MASK 0x07ff #define SGTL5000_PLL_FRAC_DIV_SHIFT 0 #define SGTL5000_PLL_FRAC_DIV_WIDTH 11 From 509c317d092a551f5c26de05445fc8d5b81cc23d Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Thu, 4 Jul 2013 16:41:53 +1000 Subject: [PATCH 049/102] md/raid10: fix bug which causes all RAID10 reshapes to move no data. commit 1376512065b23f39d5f9a160948f313397dde972 upstream. The recent comment: commit 7e83ccbecd608b971f340e951c9e84cd0343002f md/raid10: Allow skipping recovery when clean arrays are assembled Causes raid10 to skip a recovery in certain cases where it is safe to do so. Unfortunately it also causes a reshape to be skipped which is never safe. The result is that an attempt to reshape a RAID10 will appear to complete instantly, but no data will have been moves so the array will now contain garbage. (If nothing is written, you can recovery by simple performing the reverse reshape which will also complete instantly). Bug was introduced in 3.10, so this is suitable for 3.10-stable. Signed-off-by: NeilBrown Cc: Martin Wilck Signed-off-by: Greg Kroah-Hartman --- drivers/md/raid10.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index 6ddae2501b9a..f2f4a6e4f4f4 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -2909,14 +2909,13 @@ static sector_t sync_request(struct mddev *mddev, sector_t sector_nr, */ if (mddev->bitmap == NULL && mddev->recovery_cp == MaxSector && + mddev->reshape_position == MaxSector && + !test_bit(MD_RECOVERY_SYNC, &mddev->recovery) && !test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery) && + !test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) && conf->fullsync == 0) { *skipped = 1; - max_sector = mddev->dev_sectors; - if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery) || - test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery)) - max_sector = mddev->resync_max_sectors; - return max_sector - sector_nr; + return mddev->dev_sectors - sector_nr; } skipped: From 4b5c14511c401edf5ea78e2592c7043456deb595 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Tue, 2 Jul 2013 15:58:05 +1000 Subject: [PATCH 050/102] md/raid10: fix two bugs affecting RAID10 reshape. commit 78eaa0d4cbcdb345992fa3dd22b3bcbb473cc064 upstream. 1/ If a RAID10 is being reshaped to a fewer number of devices and is stopped while this is ongoing, then when the array is reassembled the 'mirrors' array will be allocated too small. This will lead to an access error or memory corruption. 2/ A sanity test for a reshaping RAID10 array is restarted is slightly incorrect. Due to the first bug, this is suitable for any -stable kernel since 3.5 where this code was introduced. Signed-off-by: NeilBrown Signed-off-by: Greg Kroah-Hartman --- drivers/md/raid10.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index f2f4a6e4f4f4..d7bad6bc46d8 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -3531,7 +3531,7 @@ static struct r10conf *setup_conf(struct mddev *mddev) /* FIXME calc properly */ conf->mirrors = kzalloc(sizeof(struct raid10_info)*(mddev->raid_disks + - max(0,mddev->delta_disks)), + max(0,-mddev->delta_disks)), GFP_KERNEL); if (!conf->mirrors) goto out; @@ -3690,7 +3690,7 @@ static int run(struct mddev *mddev) conf->geo.far_offset == 0) goto out_free_conf; if (conf->prev.far_copies != 1 && - conf->geo.far_offset == 0) + conf->prev.far_offset == 0) goto out_free_conf; } From 2dc04d3333049098691eb652e06d52fbd80771d2 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Tue, 16 Jul 2013 16:50:47 +1000 Subject: [PATCH 051/102] md/raid10: fix two problems with RAID10 resync. commit 7bb23c4934059c64cbee2e41d5d24ce122285176 upstream. 1/ When an different between blocks is found, data is copied from one bio to the other. However bv_len is used as the length to copy and this could be zero. So use r10_bio->sectors to calculate length instead. Using bv_len was probably always a bit dubious, but the introduction of bio_advance made it much more likely to be a problem. 2/ When preparing some blocks for sync, we don't set BIO_UPTODATE except on bios that we schedule for a read. This ensures that missing/failed devices don't confuse the loop at the top of sync_request write. Commit 8be185f2c9d54d6 "raid10: Use bio_reset()" removed a loop which set BIO_UPTDATE on all appropriate bios. So we need to re-add that flag. These bugs were introduced in 3.10, so this patch is suitable for 3.10-stable, and can remove a potential for data corruption. Reported-by: Brassow Jonathan Signed-off-by: NeilBrown Signed-off-by: Greg Kroah-Hartman --- drivers/md/raid10.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index d7bad6bc46d8..d61eb7ea0d81 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -2075,11 +2075,17 @@ static void sync_request_write(struct mddev *mddev, struct r10bio *r10_bio) * both 'first' and 'i', so we just compare them. * All vec entries are PAGE_SIZE; */ - for (j = 0; j < vcnt; j++) + int sectors = r10_bio->sectors; + for (j = 0; j < vcnt; j++) { + int len = PAGE_SIZE; + if (sectors < (len / 512)) + len = sectors * 512; if (memcmp(page_address(fbio->bi_io_vec[j].bv_page), page_address(tbio->bi_io_vec[j].bv_page), - fbio->bi_io_vec[j].bv_len)) + len)) break; + sectors -= len/512; + } if (j == vcnt) continue; atomic64_add(r10_bio->sectors, &mddev->resync_mismatches); @@ -3385,6 +3391,7 @@ static sector_t sync_request(struct mddev *mddev, sector_t sector_nr, if (bio->bi_end_io == end_sync_read) { md_sync_acct(bio->bi_bdev, nr_sectors); + set_bit(BIO_UPTODATE, &bio->bi_flags); generic_make_request(bio); } } From 1c0d08e652c18e3f3198969435fef31941b2eec3 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 1 Jul 2013 22:14:10 +0200 Subject: [PATCH 052/102] tick: Sanitize broadcast control logic commit 07bd1172902e782f288e4d44b1fde7dec0f08b6f upstream. The recent implementation of a generic dummy timer resulted in a different registration order of per cpu local timers which made the broadcast control logic go belly up. If the dummy timer is the first clock event device which is registered for a CPU, then it is installed, the broadcast timer is initialized and the CPU is marked as broadcast target. If a real clock event device is installed after that, we can fail to take the CPU out of the broadcast mask. In the worst case we end up with two periodic timer events firing for the same CPU. One from the per cpu hardware device and one from the broadcast. Now the problem is that we have no way to distinguish whether the system is in a state which makes broadcasting necessary or the broadcast bit was set due to the nonfunctional dummy timer installment. To solve this we need to keep track of the system state seperately and provide a more detailed decision logic whether we keep the CPU in broadcast mode or not. The old decision logic only clears the broadcast mode, if the newly installed clock event device is not affected by power states. The new logic clears the broadcast mode if one of the following is true: - The new device is not affected by power states. - The system is not in a power state affected mode - The system has switched to oneshot mode. The oneshot broadcast is controlled from the deep idle state. The CPU is not in idle at this point, so it's safe to remove it from the mask. If we clear the broadcast bit for the CPU when a new device is installed, we also shutdown the broadcast device when this was the last CPU in the broadcast mask. If the broadcast bit is kept, then we leave the new device in shutdown state and rely on the broadcast to deliver the timer interrupts via the broadcast ipis. Reported-and-tested-by: Stehle Vincent-B46079 Reviewed-by: Stephen Boyd Cc: John Stultz , Cc: Mark Rutland Link: http://lkml.kernel.org/r/alpine.DEB.2.02.1307012153060.4013@ionos.tec.linutronix.de Signed-off-by: Thomas Gleixner Signed-off-by: Greg Kroah-Hartman --- kernel/time/tick-broadcast.c | 70 ++++++++++++++++++++++++++++++------ kernel/time/tick-common.c | 3 +- 2 files changed, 61 insertions(+), 12 deletions(-) diff --git a/kernel/time/tick-broadcast.c b/kernel/time/tick-broadcast.c index 20d6fba70652..c389f068aca2 100644 --- a/kernel/time/tick-broadcast.c +++ b/kernel/time/tick-broadcast.c @@ -29,6 +29,7 @@ static struct tick_device tick_broadcast_device; static cpumask_var_t tick_broadcast_mask; +static cpumask_var_t tick_broadcast_on; static cpumask_var_t tmpmask; static DEFINE_RAW_SPINLOCK(tick_broadcast_lock); static int tick_broadcast_force; @@ -123,8 +124,9 @@ static void tick_device_setup_broadcast_func(struct clock_event_device *dev) */ int tick_device_uses_broadcast(struct clock_event_device *dev, int cpu) { + struct clock_event_device *bc = tick_broadcast_device.evtdev; unsigned long flags; - int ret = 0; + int ret; raw_spin_lock_irqsave(&tick_broadcast_lock, flags); @@ -138,20 +140,59 @@ int tick_device_uses_broadcast(struct clock_event_device *dev, int cpu) dev->event_handler = tick_handle_periodic; tick_device_setup_broadcast_func(dev); cpumask_set_cpu(cpu, tick_broadcast_mask); - tick_broadcast_start_periodic(tick_broadcast_device.evtdev); + tick_broadcast_start_periodic(bc); ret = 1; } else { /* - * When the new device is not affected by the stop - * feature and the cpu is marked in the broadcast mask - * then clear the broadcast bit. + * Clear the broadcast bit for this cpu if the + * device is not power state affected. */ - if (!(dev->features & CLOCK_EVT_FEAT_C3STOP)) { - int cpu = smp_processor_id(); + if (!(dev->features & CLOCK_EVT_FEAT_C3STOP)) cpumask_clear_cpu(cpu, tick_broadcast_mask); - tick_broadcast_clear_oneshot(cpu); - } else { + else tick_device_setup_broadcast_func(dev); + + /* + * Clear the broadcast bit if the CPU is not in + * periodic broadcast on state. + */ + if (!cpumask_test_cpu(cpu, tick_broadcast_on)) + cpumask_clear_cpu(cpu, tick_broadcast_mask); + + switch (tick_broadcast_device.mode) { + case TICKDEV_MODE_ONESHOT: + /* + * If the system is in oneshot mode we can + * unconditionally clear the oneshot mask bit, + * because the CPU is running and therefore + * not in an idle state which causes the power + * state affected device to stop. Let the + * caller initialize the device. + */ + tick_broadcast_clear_oneshot(cpu); + ret = 0; + break; + + case TICKDEV_MODE_PERIODIC: + /* + * If the system is in periodic mode, check + * whether the broadcast device can be + * switched off now. + */ + if (cpumask_empty(tick_broadcast_mask) && bc) + clockevents_shutdown(bc); + /* + * If we kept the cpu in the broadcast mask, + * tell the caller to leave the per cpu device + * in shutdown state. The periodic interrupt + * is delivered by the broadcast device. + */ + ret = cpumask_test_cpu(cpu, tick_broadcast_mask); + break; + default: + /* Nothing to do */ + ret = 0; + break; } } raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags); @@ -281,6 +322,7 @@ static void tick_do_broadcast_on_off(unsigned long *reason) switch (*reason) { case CLOCK_EVT_NOTIFY_BROADCAST_ON: case CLOCK_EVT_NOTIFY_BROADCAST_FORCE: + cpumask_set_cpu(cpu, tick_broadcast_on); if (!cpumask_test_and_set_cpu(cpu, tick_broadcast_mask)) { if (tick_broadcast_device.mode == TICKDEV_MODE_PERIODIC) @@ -290,8 +332,12 @@ static void tick_do_broadcast_on_off(unsigned long *reason) tick_broadcast_force = 1; break; case CLOCK_EVT_NOTIFY_BROADCAST_OFF: - if (!tick_broadcast_force && - cpumask_test_and_clear_cpu(cpu, tick_broadcast_mask)) { + if (tick_broadcast_force) + break; + cpumask_clear_cpu(cpu, tick_broadcast_on); + if (!tick_device_is_functional(dev)) + break; + if (cpumask_test_and_clear_cpu(cpu, tick_broadcast_mask)) { if (tick_broadcast_device.mode == TICKDEV_MODE_PERIODIC) tick_setup_periodic(dev, 0); @@ -349,6 +395,7 @@ void tick_shutdown_broadcast(unsigned int *cpup) bc = tick_broadcast_device.evtdev; cpumask_clear_cpu(cpu, tick_broadcast_mask); + cpumask_clear_cpu(cpu, tick_broadcast_on); if (tick_broadcast_device.mode == TICKDEV_MODE_PERIODIC) { if (bc && cpumask_empty(tick_broadcast_mask)) @@ -792,6 +839,7 @@ bool tick_broadcast_oneshot_available(void) void __init tick_broadcast_init(void) { zalloc_cpumask_var(&tick_broadcast_mask, GFP_NOWAIT); + zalloc_cpumask_var(&tick_broadcast_on, GFP_NOWAIT); zalloc_cpumask_var(&tmpmask, GFP_NOWAIT); #ifdef CONFIG_TICK_ONESHOT zalloc_cpumask_var(&tick_broadcast_oneshot_mask, GFP_NOWAIT); diff --git a/kernel/time/tick-common.c b/kernel/time/tick-common.c index 5d3fb100bc06..7ce5e5a4a4c5 100644 --- a/kernel/time/tick-common.c +++ b/kernel/time/tick-common.c @@ -194,7 +194,8 @@ static void tick_setup_device(struct tick_device *td, * When global broadcasting is active, check if the current * device is registered as a placeholder for broadcast mode. * This allows us to handle this x86 misfeature in a generic - * way. + * way. This function also returns !=0 when we keep the + * current active broadcast state for this CPU. */ if (tick_device_uses_broadcast(newdev, cpu)) return; From 084c895d3c9fecf80fe02ba442b90818116b30d2 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 1 Jul 2013 22:14:10 +0200 Subject: [PATCH 053/102] tick: Prevent uncontrolled switch to oneshot mode commit 1f73a9806bdd07a5106409bbcab3884078bd34fe upstream. When the system switches from periodic to oneshot mode, the broadcast logic causes a possibility that a CPU which has not yet switched to oneshot mode puts its own clock event device into oneshot mode without updating the state and the timer handler. CPU0 CPU1 per cpu tickdev is in periodic mode and switched to broadcast Switch to oneshot mode tick_broadcast_switch_to_oneshot() cpumask_copy(tick_oneshot_broacast_mask, tick_broadcast_mask); broadcast device mode = oneshot Timer interrupt irq_enter() tick_check_oneshot_broadcast() dev->set_mode(ONESHOT); tick_handle_periodic() if (dev->mode == ONESHOT) dev->next_event += period; FAIL. We fail, because dev->next_event contains KTIME_MAX, if the device was in periodic mode before the uncontrolled switch to oneshot happened. We must copy the broadcast bits over to the oneshot mask, because otherwise a CPU which relies on the broadcast would not been woken up anymore after the broadcast device switched to oneshot mode. So we need to verify in tick_check_oneshot_broadcast() whether the CPU has already switched to oneshot mode. If not, leave the device untouched and let the CPU switch controlled into oneshot mode. This is a long standing bug, which was never noticed, because the main user of the broadcast x86 cannot run into that scenario, AFAICT. The nonarchitected timer mess of ARM creates a gazillion of differently broken abominations which trigger the shortcomings of that broadcast code, which better had never been necessary in the first place. Reported-and-tested-by: Stehle Vincent-B46079 Reviewed-by: Stephen Boyd Cc: John Stultz , Cc: Mark Rutland Link: http://lkml.kernel.org/r/alpine.DEB.2.02.1307012153060.4013@ionos.tec.linutronix.de Signed-off-by: Thomas Gleixner Signed-off-by: Greg Kroah-Hartman --- kernel/time/tick-broadcast.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/kernel/time/tick-broadcast.c b/kernel/time/tick-broadcast.c index c389f068aca2..297b90b5277e 100644 --- a/kernel/time/tick-broadcast.c +++ b/kernel/time/tick-broadcast.c @@ -522,7 +522,15 @@ void tick_check_oneshot_broadcast(int cpu) if (cpumask_test_cpu(cpu, tick_broadcast_oneshot_mask)) { struct tick_device *td = &per_cpu(tick_cpu_device, cpu); - clockevents_set_mode(td->evtdev, CLOCK_EVT_MODE_ONESHOT); + /* + * We might be in the middle of switching over from + * periodic to oneshot. If the CPU has not yet + * switched over, leave the device alone. + */ + if (td->mode == TICKDEV_MODE_ONESHOT) { + clockevents_set_mode(td->evtdev, + CLOCK_EVT_MODE_ONESHOT); + } } } From f707f7ae37c514a3871741747bfd8c09d7badb05 Mon Sep 17 00:00:00 2001 From: Baruch Siach Date: Wed, 29 May 2013 10:11:17 +0200 Subject: [PATCH 054/102] clocksource: dw_apb: Fix error check commit 1a33bd2be705cbb3f57d7223b60baea441039307 upstream. irq_of_parse_and_map() returns 0 on error, while the code checks for NO_IRQ. This breaks on platforms that have NO_IRQ != 0. Signed-off-by: Baruch Siach Signed-off-by: Daniel Lezcano Signed-off-by: Greg Kroah-Hartman --- drivers/clocksource/dw_apb_timer_of.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/clocksource/dw_apb_timer_of.c b/drivers/clocksource/dw_apb_timer_of.c index ab09ed3742ee..6b02eddc5f50 100644 --- a/drivers/clocksource/dw_apb_timer_of.c +++ b/drivers/clocksource/dw_apb_timer_of.c @@ -44,7 +44,7 @@ static void add_clockevent(struct device_node *event_timer) u32 irq, rate; irq = irq_of_parse_and_map(event_timer, 0); - if (irq == NO_IRQ) + if (irq == 0) panic("No IRQ for clock event timer"); timer_get_base_and_rate(event_timer, &iobase, &rate); From 24e095cdc2d7781e756b052291ef0ca7c9edac93 Mon Sep 17 00:00:00 2001 From: Gabor Juhos Date: Sat, 22 Jun 2013 13:13:25 +0200 Subject: [PATCH 055/102] rt2x00: read 5GHz TX power values from the correct offset commit 0a6f3a8ebaf13407523c2c7d575b4ca2debd23ba upstream. The current code uses the same index value both for the channel information array and for the TX power table. The index starts from 14, however the index of the TX power table must start from zero. Fix it, in order to get the correct TX power value for a given channel. The changes in rt61pci.c and rt73usb.c are compile tested only. Signed-off-by: Gabor Juhos Acked-by: Stanislaw Gruszka Acked-by: Gertjan van Wingerde Signed-off-by: John W. Linville Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/rt2x00/rt2800lib.c | 4 ++-- drivers/net/wireless/rt2x00/rt61pci.c | 3 ++- drivers/net/wireless/rt2x00/rt73usb.c | 3 ++- 3 files changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/net/wireless/rt2x00/rt2800lib.c b/drivers/net/wireless/rt2x00/rt2800lib.c index 72f32e5caa4d..519914f33b43 100644 --- a/drivers/net/wireless/rt2x00/rt2800lib.c +++ b/drivers/net/wireless/rt2x00/rt2800lib.c @@ -6056,8 +6056,8 @@ static int rt2800_probe_hw_mode(struct rt2x00_dev *rt2x00dev) default_power2 = rt2x00_eeprom_addr(rt2x00dev, EEPROM_TXPOWER_A2); for (i = 14; i < spec->num_channels; i++) { - info[i].default_power1 = default_power1[i]; - info[i].default_power2 = default_power2[i]; + info[i].default_power1 = default_power1[i - 14]; + info[i].default_power2 = default_power2[i - 14]; } } diff --git a/drivers/net/wireless/rt2x00/rt61pci.c b/drivers/net/wireless/rt2x00/rt61pci.c index 0dc8180e251b..883a54c8c5bc 100644 --- a/drivers/net/wireless/rt2x00/rt61pci.c +++ b/drivers/net/wireless/rt2x00/rt61pci.c @@ -2825,7 +2825,8 @@ static int rt61pci_probe_hw_mode(struct rt2x00_dev *rt2x00dev) tx_power = rt2x00_eeprom_addr(rt2x00dev, EEPROM_TXPOWER_A_START); for (i = 14; i < spec->num_channels; i++) { info[i].max_power = MAX_TXPOWER; - info[i].default_power1 = TXPOWER_FROM_DEV(tx_power[i]); + info[i].default_power1 = + TXPOWER_FROM_DEV(tx_power[i - 14]); } } diff --git a/drivers/net/wireless/rt2x00/rt73usb.c b/drivers/net/wireless/rt2x00/rt73usb.c index 377e09bb0b81..2bbca183f94a 100644 --- a/drivers/net/wireless/rt2x00/rt73usb.c +++ b/drivers/net/wireless/rt2x00/rt73usb.c @@ -2167,7 +2167,8 @@ static int rt73usb_probe_hw_mode(struct rt2x00_dev *rt2x00dev) tx_power = rt2x00_eeprom_addr(rt2x00dev, EEPROM_TXPOWER_A_START); for (i = 14; i < spec->num_channels; i++) { info[i].max_power = MAX_TXPOWER; - info[i].default_power1 = TXPOWER_FROM_DEV(tx_power[i]); + info[i].default_power1 = + TXPOWER_FROM_DEV(tx_power[i - 14]); } } From 0530bd4a6cc3c04d1e621299e05d2da2c99736c4 Mon Sep 17 00:00:00 2001 From: Gabor Juhos Date: Tue, 25 Jun 2013 22:57:29 +0200 Subject: [PATCH 056/102] rt2x00: rt2800lib: fix default TX power check for RT55xx commit 0847beb2865f5ef1c8626ec1a37def18f3d6c41a upstream. The code writes the default_power2 value into the TX field of the RFCSR50 register, however the condition in the if statement uses default_power1. Due to this, wrong TX power value might be written into the register. Use the correct value in the condition to fix the issue. Compile tested only. Signed-off-by: Gabor Juhos Acked-by: Gertjan van Wingerde Signed-off-by: John W. Linville Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/rt2x00/rt2800lib.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/rt2x00/rt2800lib.c b/drivers/net/wireless/rt2x00/rt2800lib.c index 519914f33b43..705aa3386ee0 100644 --- a/drivers/net/wireless/rt2x00/rt2800lib.c +++ b/drivers/net/wireless/rt2x00/rt2800lib.c @@ -2392,7 +2392,7 @@ static void rt2800_config_channel_rf55xx(struct rt2x00_dev *rt2x00dev, rt2800_rfcsr_write(rt2x00dev, 49, rfcsr); rt2800_rfcsr_read(rt2x00dev, 50, &rfcsr); - if (info->default_power1 > power_bound) + if (info->default_power2 > power_bound) rt2x00_set_field8(&rfcsr, RFCSR50_TX, power_bound); else rt2x00_set_field8(&rfcsr, RFCSR50_TX, info->default_power2); From e9c25a407d5c75646eb258de1363dccc1fd3ab47 Mon Sep 17 00:00:00 2001 From: Sujith Manoharan Date: Mon, 10 Jun 2013 13:49:38 +0530 Subject: [PATCH 057/102] ath9k_hw: Assign default xlna config for AR9485 commit 30d5b709da23f4ab9836c7f66d2d2e780a69cf12 upstream. For AR9485 boards with XLNA, the default gpio config is not set correctly, fix this. Signed-off-by: Sujith Manoharan Signed-off-by: John W. Linville Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/ath/ath9k/ar9003_eeprom.c | 8 ++++++-- drivers/net/wireless/ath/ath9k/ar9003_phy.h | 2 ++ 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/ath/ath9k/ar9003_eeprom.c b/drivers/net/wireless/ath/ath9k/ar9003_eeprom.c index e6b92ff265fd..25b8bbbe74fe 100644 --- a/drivers/net/wireless/ath/ath9k/ar9003_eeprom.c +++ b/drivers/net/wireless/ath/ath9k/ar9003_eeprom.c @@ -3563,14 +3563,18 @@ static void ar9003_hw_ant_ctrl_apply(struct ath_hw *ah, bool is2ghz) { struct ath9k_hw_capabilities *pCap = &ah->caps; int chain; - u32 regval; + u32 regval, value; static const u32 switch_chain_reg[AR9300_MAX_CHAINS] = { AR_PHY_SWITCH_CHAIN_0, AR_PHY_SWITCH_CHAIN_1, AR_PHY_SWITCH_CHAIN_2, }; - u32 value = ar9003_hw_ant_ctrl_common_get(ah, is2ghz); + if (AR_SREV_9485(ah) && (ar9003_hw_get_rx_gain_idx(ah) == 0)) + ath9k_hw_cfg_output(ah, AR9300_EXT_LNA_CTL_GPIO_AR9485, + AR_GPIO_OUTPUT_MUX_AS_PCIE_ATTENTION_LED); + + value = ar9003_hw_ant_ctrl_common_get(ah, is2ghz); if (AR_SREV_9462(ah) || AR_SREV_9565(ah)) { REG_RMW_FIELD(ah, AR_PHY_SWITCH_COM, diff --git a/drivers/net/wireless/ath/ath9k/ar9003_phy.h b/drivers/net/wireless/ath/ath9k/ar9003_phy.h index e71774196c01..5013c731f9f6 100644 --- a/drivers/net/wireless/ath/ath9k/ar9003_phy.h +++ b/drivers/net/wireless/ath/ath9k/ar9003_phy.h @@ -351,6 +351,8 @@ #define AR_PHY_CCA_NOM_VAL_9330_2GHZ -118 +#define AR9300_EXT_LNA_CTL_GPIO_AR9485 9 + /* * AGC Field Definitions */ From 13311514a74f520989e304b746dc7478335877f9 Mon Sep 17 00:00:00 2001 From: Sujith Manoharan Date: Mon, 10 Jun 2013 13:49:39 +0530 Subject: [PATCH 058/102] ath9k: Fix noisefloor calibration commit 696df78509d1f81b651dd98ecdc1aecab616db6b upstream. The commits, "ath9k: Fix regression in channelwidth switch at the same channel" "ath9k: Fix invalid noisefloor reading due to channel update" attempted to fix noisefloor calibration when a channel switch happens due to HT20/HT40 bandwidth change. This is causing invalid readings resulting in messages like: "ath: phy16: NF[0] (-45) > MAX (-95), correcting to MAX". This results in an incorrect noise being used initially for reporting the signal level of received packets, until NF calibration is done and the history buffer is updated via the ANI timer, which happens much later. When a bandwidth change happens, it is appropriate to reset the internal history data for the channel. Do this correctly in the reset() routine by checking the "chanmode" variable. Signed-off-by: Sujith Manoharan Cc: Rajkumar Manoharan Signed-off-by: John W. Linville Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/ath/ath9k/hw.c | 3 ++- drivers/net/wireless/ath/ath9k/main.c | 7 ------- 2 files changed, 2 insertions(+), 8 deletions(-) diff --git a/drivers/net/wireless/ath/ath9k/hw.c b/drivers/net/wireless/ath/ath9k/hw.c index 15dfefcf2d0f..b1d5037bff7f 100644 --- a/drivers/net/wireless/ath/ath9k/hw.c +++ b/drivers/net/wireless/ath/ath9k/hw.c @@ -1872,7 +1872,8 @@ int ath9k_hw_reset(struct ath_hw *ah, struct ath9k_channel *chan, ah->caldata = caldata; if (caldata && (chan->channel != caldata->channel || - chan->channelFlags != caldata->channelFlags)) { + chan->channelFlags != caldata->channelFlags || + chan->chanmode != caldata->chanmode)) { /* Operating channel changed, reset channel calibration data */ memset(caldata, 0, sizeof(*caldata)); ath9k_init_nfcal_hist_buffer(ah, chan); diff --git a/drivers/net/wireless/ath/ath9k/main.c b/drivers/net/wireless/ath/ath9k/main.c index 5092ecae7706..35ced100c183 100644 --- a/drivers/net/wireless/ath/ath9k/main.c +++ b/drivers/net/wireless/ath/ath9k/main.c @@ -1211,13 +1211,6 @@ static int ath9k_config(struct ieee80211_hw *hw, u32 changed) ath_update_survey_stats(sc); spin_unlock_irqrestore(&common->cc_lock, flags); - /* - * Preserve the current channel values, before updating - * the same channel - */ - if (ah->curchan && (old_pos == pos)) - ath9k_hw_getnf(ah, ah->curchan); - ath9k_cmn_update_ichannel(&sc->sc_ah->channels[pos], curchan, channel_type); From 64ea2992a29aa0bdc4eaf457351b063caafb6655 Mon Sep 17 00:00:00 2001 From: Sujith Manoharan Date: Mon, 10 Jun 2013 13:49:40 +0530 Subject: [PATCH 059/102] ath9k: Do not assign noise for NULL caldata commit d3bcb7b24bbf09fde8405770e676fe0c11c79662 upstream. ah->noise is maintained globally and not per-channel. This is updated in the reset() routine after the NF history has been filled for the *current channel*, just before switching to the new channel. There is no need to do it inside getnf(), since ah->noise must contain a value for the new channel. Signed-off-by: Sujith Manoharan Signed-off-by: John W. Linville Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/ath/ath9k/calib.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/net/wireless/ath/ath9k/calib.c b/drivers/net/wireless/ath/ath9k/calib.c index 7304e7585009..5e8219a91e25 100644 --- a/drivers/net/wireless/ath/ath9k/calib.c +++ b/drivers/net/wireless/ath/ath9k/calib.c @@ -387,7 +387,6 @@ bool ath9k_hw_getnf(struct ath_hw *ah, struct ath9k_channel *chan) if (!caldata) { chan->noisefloor = nf; - ah->noise = ath9k_hw_getchan_noise(ah, chan); return false; } From 98dcc2946adbe4349ef1ef9b99873b912831edd4 Mon Sep 17 00:00:00 2001 From: "Martin K. Petersen" Date: Thu, 6 Jun 2013 22:15:55 -0400 Subject: [PATCH 060/102] SCSI: sd: Update WRITE SAME heuristics commit 66c28f97120e8a621afd5aa7a31c4b85c547d33d upstream. SATA drives located behind a SAS controller would incorrectly receive WRITE SAME commands. Tweak the heuristics so that: - If REPORT SUPPORTED OPERATION CODES is provided we will use that to choose between WRITE SAME(16), WRITE SAME(10) and disabled. This also fixes an issue with the old code which would issue WRITE SAME(10) despite the command not being whitelisted in REPORT SUPPORTED OPERATION CODES. - If REPORT SUPPORTED OPERATION CODES is not provided we will fall back to WRITE SAME(10) unless the device has an ATA Information VPD page. The assumption is that a SATL which is smart enough to implement WRITE SAME would also provide REPORT SUPPORTED OPERATION CODES. To facilitate the new heuristics scsi_report_opcode() has been modified to so we can distinguish between "operation not supported" and "RSOC not supported". Reported-by: H. Peter Anvin Tested-by: Bernd Schubert Signed-off-by: Martin K. Petersen Signed-off-by: James Bottomley Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/scsi.c | 8 ++++---- drivers/scsi/sd.c | 46 +++++++++++++++++++++++++++++++-------------- drivers/scsi/sd.h | 1 + 3 files changed, 37 insertions(+), 18 deletions(-) diff --git a/drivers/scsi/scsi.c b/drivers/scsi/scsi.c index 2c0d0ec8150b..3b1ea34e1f5a 100644 --- a/drivers/scsi/scsi.c +++ b/drivers/scsi/scsi.c @@ -1070,8 +1070,8 @@ EXPORT_SYMBOL_GPL(scsi_get_vpd_page); * @opcode: opcode for command to look up * * Uses the REPORT SUPPORTED OPERATION CODES to look up the given - * opcode. Returns 0 if RSOC fails or if the command opcode is - * unsupported. Returns 1 if the device claims to support the command. + * opcode. Returns -EINVAL if RSOC fails, 0 if the command opcode is + * unsupported and 1 if the device claims to support the command. */ int scsi_report_opcode(struct scsi_device *sdev, unsigned char *buffer, unsigned int len, unsigned char opcode) @@ -1081,7 +1081,7 @@ int scsi_report_opcode(struct scsi_device *sdev, unsigned char *buffer, int result; if (sdev->no_report_opcodes || sdev->scsi_level < SCSI_SPC_3) - return 0; + return -EINVAL; memset(cmd, 0, 16); cmd[0] = MAINTENANCE_IN; @@ -1097,7 +1097,7 @@ int scsi_report_opcode(struct scsi_device *sdev, unsigned char *buffer, if (result && scsi_sense_valid(&sshdr) && sshdr.sense_key == ILLEGAL_REQUEST && (sshdr.asc == 0x20 || sshdr.asc == 0x24) && sshdr.ascq == 0x00) - return 0; + return -EINVAL; if ((buffer[1] & 3) == 3) /* Command supported */ return 1; diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index 6f6a1b48f998..1b1125e67f1e 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -442,8 +442,10 @@ sd_store_write_same_blocks(struct device *dev, struct device_attribute *attr, if (max == 0) sdp->no_write_same = 1; - else if (max <= SD_MAX_WS16_BLOCKS) + else if (max <= SD_MAX_WS16_BLOCKS) { + sdp->no_write_same = 0; sdkp->max_ws_blocks = max; + } sd_config_write_same(sdkp); @@ -740,7 +742,6 @@ static void sd_config_write_same(struct scsi_disk *sdkp) { struct request_queue *q = sdkp->disk->queue; unsigned int logical_block_size = sdkp->device->sector_size; - unsigned int blocks = 0; if (sdkp->device->no_write_same) { sdkp->max_ws_blocks = 0; @@ -752,18 +753,20 @@ static void sd_config_write_same(struct scsi_disk *sdkp) * blocks per I/O unless the device explicitly advertises a * bigger limit. */ - if (sdkp->max_ws_blocks == 0) - sdkp->max_ws_blocks = SD_MAX_WS10_BLOCKS; - - if (sdkp->ws16 || sdkp->max_ws_blocks > SD_MAX_WS10_BLOCKS) - blocks = min_not_zero(sdkp->max_ws_blocks, - (u32)SD_MAX_WS16_BLOCKS); - else - blocks = min_not_zero(sdkp->max_ws_blocks, - (u32)SD_MAX_WS10_BLOCKS); + if (sdkp->max_ws_blocks > SD_MAX_WS10_BLOCKS) + sdkp->max_ws_blocks = min_not_zero(sdkp->max_ws_blocks, + (u32)SD_MAX_WS16_BLOCKS); + else if (sdkp->ws16 || sdkp->ws10 || sdkp->device->no_report_opcodes) + sdkp->max_ws_blocks = min_not_zero(sdkp->max_ws_blocks, + (u32)SD_MAX_WS10_BLOCKS); + else { + sdkp->device->no_write_same = 1; + sdkp->max_ws_blocks = 0; + } out: - blk_queue_max_write_same_sectors(q, blocks * (logical_block_size >> 9)); + blk_queue_max_write_same_sectors(q, sdkp->max_ws_blocks * + (logical_block_size >> 9)); } /** @@ -2635,9 +2638,24 @@ static void sd_read_block_provisioning(struct scsi_disk *sdkp) static void sd_read_write_same(struct scsi_disk *sdkp, unsigned char *buffer) { - if (scsi_report_opcode(sdkp->device, buffer, SD_BUF_SIZE, - WRITE_SAME_16)) + struct scsi_device *sdev = sdkp->device; + + if (scsi_report_opcode(sdev, buffer, SD_BUF_SIZE, INQUIRY) < 0) { + sdev->no_report_opcodes = 1; + + /* Disable WRITE SAME if REPORT SUPPORTED OPERATION + * CODES is unsupported and the device has an ATA + * Information VPD page (SAT). + */ + if (!scsi_get_vpd_page(sdev, 0x89, buffer, SD_BUF_SIZE)) + sdev->no_write_same = 1; + } + + if (scsi_report_opcode(sdev, buffer, SD_BUF_SIZE, WRITE_SAME_16) == 1) sdkp->ws16 = 1; + + if (scsi_report_opcode(sdev, buffer, SD_BUF_SIZE, WRITE_SAME) == 1) + sdkp->ws10 = 1; } static int sd_try_extended_inquiry(struct scsi_device *sdp) diff --git a/drivers/scsi/sd.h b/drivers/scsi/sd.h index 2386aeb41fe8..7a049de22051 100644 --- a/drivers/scsi/sd.h +++ b/drivers/scsi/sd.h @@ -84,6 +84,7 @@ struct scsi_disk { unsigned lbpws : 1; unsigned lbpws10 : 1; unsigned lbpvpd : 1; + unsigned ws10 : 1; unsigned ws16 : 1; }; #define to_scsi_disk(obj) container_of(obj,struct scsi_disk,dev) From 4e6b18250651a14b053508e30e731247e28e3f2a Mon Sep 17 00:00:00 2001 From: Mahesh Rajashekhara Date: Tue, 18 Jun 2013 17:02:07 +0530 Subject: [PATCH 061/102] SCSI: aacraid: Fix for arrays are going offline in the system. System hangs commit c5bebd829dd95602c15f8da8cc50fa938b5e0254 upstream. One of the customer had reported that the set of raid logical arrays will become unavailable (I/O offline) after a long hours of IO stress test. The OS wouldn`t be accessible afterwards and require a hard reset. This driver patch has a fix for race condition between the doorbell and the circular buffer. The driver is modified to do an extra read after clearing the doorbell in case there had been a completion posted during the small timing window. With this fix, we ran IO stress for ~13 days. There were no IO failures. Signed-off-by: Mahesh Rajashekhara Signed-off-by: James Bottomley Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/aacraid/src.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/scsi/aacraid/src.c b/drivers/scsi/aacraid/src.c index 0f56d8d7524f..7e17107643d4 100644 --- a/drivers/scsi/aacraid/src.c +++ b/drivers/scsi/aacraid/src.c @@ -93,6 +93,9 @@ static irqreturn_t aac_src_intr_message(int irq, void *dev_id) int send_it = 0; extern int aac_sync_mode; + src_writel(dev, MUnit.ODR_C, bellbits); + src_readl(dev, MUnit.ODR_C); + if (!aac_sync_mode) { src_writel(dev, MUnit.ODR_C, bellbits); src_readl(dev, MUnit.ODR_C); From 88bc5928044239f6902e89798ae4ef75d23ccb3c Mon Sep 17 00:00:00 2001 From: Daniel Hansel Date: Fri, 26 Apr 2013 17:32:14 +0200 Subject: [PATCH 062/102] SCSI: zfcp: fix adapter (re)open recovery while link to SAN is down commit f76ccaac4f82c463a037aa4a1e4ccb85c7011814 upstream. FCP device remains in status ERP_FAILED when device is switched online or adapter recovery is triggered while link to SAN is down. When Exchange Configuration Data command returns the FSF status FSF_EXCHANGE_CONFIG_DATA_INCOMPLETE it aborts the exchange process. The only retries are done during the common error recovery procedure (i.e. max. 3 retries with 8sec sleep between) and remains in status ERP_FAILED with QDIO down. This commit reverts the commit 0df138476c8306478d6e726f044868b4bccf411c (zfcp: Fix adapter activation on link down). When FSF status FSF_EXCHANGE_CONFIG_DATA_INCOMPLETE is received the adapter recovery will be finished without any retries. QDIO will be up now and status changes such as LINK UP will be received now. Signed-off-by: Daniel Hansel Signed-off-by: Steffen Maier Signed-off-by: James Bottomley Signed-off-by: Greg Kroah-Hartman --- drivers/s390/scsi/zfcp_fsf.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/s390/scsi/zfcp_fsf.c b/drivers/s390/scsi/zfcp_fsf.c index c7e148f33b2a..06760e435259 100644 --- a/drivers/s390/scsi/zfcp_fsf.c +++ b/drivers/s390/scsi/zfcp_fsf.c @@ -563,6 +563,10 @@ static void zfcp_fsf_exchange_config_data_handler(struct zfcp_fsf_req *req) fc_host_port_type(shost) = FC_PORTTYPE_UNKNOWN; adapter->hydra_version = 0; + /* avoids adapter shutdown to be able to recognize + * events such as LINK UP */ + atomic_set_mask(ZFCP_STATUS_ADAPTER_XCONFIG_OK, + &adapter->status); zfcp_fsf_link_down_info_eval(req, &qtcb->header.fsf_status_qual.link_down_info); break; From b22b8386992d886aa487b57e000b18512e639710 Mon Sep 17 00:00:00 2001 From: Steffen Maier Date: Fri, 26 Apr 2013 17:33:45 +0200 Subject: [PATCH 063/102] SCSI: zfcp: block queue limits with data router commit 5fea4291deacd80188b996d2f555fc6a1940e5d4 upstream. Commit 86a9668a8d29ea711613e1cb37efa68e7c4db564 "[SCSI] zfcp: support for hardware data router" reduced the initial block queue limits in the scsi_host_template to the absolute minimum and adjusted them later on. However, the adjustment was too late for the BSG devices of Scsi_Host and fc_host. Therefore, ioctl(..., SG_IO, ...) with request or response size > 4kB to a BSG device of an fc_host or a Scsi_Host fails with EINVAL. As a result, users of such ioctl such as HBA_SendCTPassThru() in libzfcphbaapi return with error HBA_STATUS_ERROR. Initialize the block queue limits in zfcp_scsi_host_template to the greatest common denominator (GCD). While we cannot exploit the slightly enlarged maximum request size with data router, this should be neglectible. Doing so also avoids running into trouble after live guest relocation (LGR) / migration from a data router FCP device to an FCP device that does not support data router. In that case, zfcp would figure out the new limits on adapter recovery, but the fc_host and Scsi_Host (plus in fact all sdevs) still exist with the old and now too large queue limits. It should also OK, not to use half the size as in the DIX case, because fc_host and Scsi_Host do not transport FCP requests including SCSI commands using protection data. Signed-off-by: Steffen Maier Reviewed-by: Martin Peschke Signed-off-by: James Bottomley Signed-off-by: Greg Kroah-Hartman --- drivers/s390/scsi/zfcp_scsi.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/drivers/s390/scsi/zfcp_scsi.c b/drivers/s390/scsi/zfcp_scsi.c index 7b31e3f403f9..7b353647cb90 100644 --- a/drivers/s390/scsi/zfcp_scsi.c +++ b/drivers/s390/scsi/zfcp_scsi.c @@ -3,7 +3,7 @@ * * Interface to Linux SCSI midlayer. * - * Copyright IBM Corp. 2002, 2010 + * Copyright IBM Corp. 2002, 2013 */ #define KMSG_COMPONENT "zfcp" @@ -311,8 +311,12 @@ static struct scsi_host_template zfcp_scsi_host_template = { .proc_name = "zfcp", .can_queue = 4096, .this_id = -1, - .sg_tablesize = 1, /* adjusted later */ - .max_sectors = 8, /* adjusted later */ + .sg_tablesize = (((QDIO_MAX_ELEMENTS_PER_BUFFER - 1) + * ZFCP_QDIO_MAX_SBALS_PER_REQ) - 2), + /* GCD, adjusted later */ + .max_sectors = (((QDIO_MAX_ELEMENTS_PER_BUFFER - 1) + * ZFCP_QDIO_MAX_SBALS_PER_REQ) - 2) * 8, + /* GCD, adjusted later */ .dma_boundary = ZFCP_QDIO_SBALE_LEN - 1, .cmd_per_lun = 1, .use_clustering = 1, From 8f425c63ecda1bab9982892e7890041292e2d5ec Mon Sep 17 00:00:00 2001 From: Steffen Maier Date: Fri, 26 Apr 2013 17:34:54 +0200 Subject: [PATCH 064/102] SCSI: zfcp: status read buffers on first adapter open with link down commit 9edf7d75ee5f21663a0183d21f702682d0ef132f upstream. Commit 64deb6efdc5504ce97b5c1c6f281fffbc150bd93 "[SCSI] zfcp: Use status_read_buf_num provided by FCP channel" started using a value returned by the channel but only evaluated the value if the fabric link is up. Commit 8d88cf3f3b9af4713642caeb221b6d6a42019001 "[SCSI] zfcp: Update status read mempool" introduced mempool resizings based on the above value. On setting an FCP device online for the very first time since boot, a new zeroed adapter object is allocated. If the link is down, the number of status read requests remains zero. Since just the config data exchange is incomplete, we proceed with adapter open recovery. However, we unconditionally call mempool_resize with adapter->stat_read_buf_num == 0 in this case. This causes a kernel message "kernel BUG at mm/mempool.c:131!" in process "zfcperp" with last function mempool_resize in Krnl PSW and zfcp_erp_thread in the Call Trace. Don't evaluate channel values which are invalid on link down. The number of status read requests is always valid, evaluated, and set to a positive minimum greater than zero. The adapter open recovery can proceed and the channel has status read buffers to inform us on a future link up event. While we are not aware of any other code path that could result in mempool resize attempts of size zero, we still also initialize the number of status read buffers to be posted to a static minimum number on adapter object allocation. Signed-off-by: Steffen Maier Signed-off-by: James Bottomley Signed-off-by: Greg Kroah-Hartman --- drivers/s390/scsi/zfcp_aux.c | 5 ++++- drivers/s390/scsi/zfcp_fsf.c | 23 ++++++++++++++++------- 2 files changed, 20 insertions(+), 8 deletions(-) diff --git a/drivers/s390/scsi/zfcp_aux.c b/drivers/s390/scsi/zfcp_aux.c index f6adde44f226..3743ac931231 100644 --- a/drivers/s390/scsi/zfcp_aux.c +++ b/drivers/s390/scsi/zfcp_aux.c @@ -3,7 +3,7 @@ * * Module interface and handling of zfcp data structures. * - * Copyright IBM Corp. 2002, 2010 + * Copyright IBM Corp. 2002, 2013 */ /* @@ -23,6 +23,7 @@ * Christof Schmitt * Martin Petermann * Sven Schuetz + * Steffen Maier */ #define KMSG_COMPONENT "zfcp" @@ -415,6 +416,8 @@ struct zfcp_adapter *zfcp_adapter_enqueue(struct ccw_device *ccw_device) adapter->dma_parms.max_segment_size = ZFCP_QDIO_SBALE_LEN; adapter->ccw_device->dev.dma_parms = &adapter->dma_parms; + adapter->stat_read_buf_num = FSF_STATUS_READS_RECOM; + if (!zfcp_scsi_adapter_register(adapter)) return adapter; diff --git a/drivers/s390/scsi/zfcp_fsf.c b/drivers/s390/scsi/zfcp_fsf.c index 06760e435259..9152999a0707 100644 --- a/drivers/s390/scsi/zfcp_fsf.c +++ b/drivers/s390/scsi/zfcp_fsf.c @@ -3,7 +3,7 @@ * * Implementation of FSF commands. * - * Copyright IBM Corp. 2002, 2010 + * Copyright IBM Corp. 2002, 2013 */ #define KMSG_COMPONENT "zfcp" @@ -483,12 +483,8 @@ static int zfcp_fsf_exchange_config_evaluate(struct zfcp_fsf_req *req) fc_host_port_name(shost) = nsp->fl_wwpn; fc_host_node_name(shost) = nsp->fl_wwnn; - fc_host_port_id(shost) = ntoh24(bottom->s_id); - fc_host_speed(shost) = - zfcp_fsf_convert_portspeed(bottom->fc_link_speed); fc_host_supported_classes(shost) = FC_COS_CLASS2 | FC_COS_CLASS3; - adapter->hydra_version = bottom->adapter_type; adapter->timer_ticks = bottom->timer_interval & ZFCP_FSF_TIMER_INT_MASK; adapter->stat_read_buf_num = max(bottom->status_read_buf_num, (u16)FSF_STATUS_READS_RECOM); @@ -496,6 +492,19 @@ static int zfcp_fsf_exchange_config_evaluate(struct zfcp_fsf_req *req) if (fc_host_permanent_port_name(shost) == -1) fc_host_permanent_port_name(shost) = fc_host_port_name(shost); + zfcp_scsi_set_prot(adapter); + + /* no error return above here, otherwise must fix call chains */ + /* do not evaluate invalid fields */ + if (req->qtcb->header.fsf_status == FSF_EXCHANGE_CONFIG_DATA_INCOMPLETE) + return 0; + + fc_host_port_id(shost) = ntoh24(bottom->s_id); + fc_host_speed(shost) = + zfcp_fsf_convert_portspeed(bottom->fc_link_speed); + + adapter->hydra_version = bottom->adapter_type; + switch (bottom->fc_topology) { case FSF_TOPO_P2P: adapter->peer_d_id = ntoh24(bottom->peer_d_id); @@ -517,8 +526,6 @@ static int zfcp_fsf_exchange_config_evaluate(struct zfcp_fsf_req *req) return -EIO; } - zfcp_scsi_set_prot(adapter); - return 0; } @@ -569,6 +576,8 @@ static void zfcp_fsf_exchange_config_data_handler(struct zfcp_fsf_req *req) &adapter->status); zfcp_fsf_link_down_info_eval(req, &qtcb->header.fsf_status_qual.link_down_info); + if (zfcp_fsf_exchange_config_evaluate(req)) + return; break; default: zfcp_erp_adapter_shutdown(adapter, 0, "fsecdh3"); From 47c00d01fb222baaf8d248f855061da168b29c5d Mon Sep 17 00:00:00 2001 From: Sreekanth Reddy Date: Sat, 2 Feb 2013 00:58:20 +0530 Subject: [PATCH 065/102] SCSI: mpt2sas: fix firmware failure with wrong task attribute commit 48ba2efc382f94fae16ca8ca011e5961a81ad1ea upstream. When SCSI command is received with task attribute not set, set it to SIMPLE. Previously it is set to untagged. This causes the firmware to fail the commands. Signed-off-by: Sreekanth Reddy Signed-off-by: James Bottomley Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/mpt2sas/mpt2sas_scsih.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/drivers/scsi/mpt2sas/mpt2sas_scsih.c b/drivers/scsi/mpt2sas/mpt2sas_scsih.c index c6bdc9267229..a49159561669 100644 --- a/drivers/scsi/mpt2sas/mpt2sas_scsih.c +++ b/drivers/scsi/mpt2sas/mpt2sas_scsih.c @@ -3994,11 +3994,7 @@ _scsih_qcmd_lck(struct scsi_cmnd *scmd, void (*done)(struct scsi_cmnd *)) else mpi_control |= MPI2_SCSIIO_CONTROL_SIMPLEQ; } else -/* MPI Revision I (UNIT = 0xA) - removed MPI2_SCSIIO_CONTROL_UNTAGGED */ -/* mpi_control |= MPI2_SCSIIO_CONTROL_UNTAGGED; - */ - mpi_control |= (0x500); - + mpi_control |= MPI2_SCSIIO_CONTROL_SIMPLEQ; } else mpi_control |= MPI2_SCSIIO_CONTROL_SIMPLEQ; /* Make sure Device is not raid volume. From 0531603e3a0dbb7c36f2b37833add0e73b8c0c5f Mon Sep 17 00:00:00 2001 From: "Reddy, Sreekanth" Date: Tue, 26 Feb 2013 16:59:59 +0530 Subject: [PATCH 066/102] SCSI: mpt2sas: Fix for issue Missing delay not getting set during system bootup commit b0df96a0068daee4f9c2189c29b9053eb6e46b17 upstream. Missing delay is not getting set properly. The reason is that it is not defined in the same file from where it is being invoked. The fix is to move the missing delay module parameter from mpt2sas_base.c to mpt2sas_scsh.c. Signed-off-by: Sreekanth Reddy Signed-off-by: James Bottomley Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/mpt2sas/mpt2sas_base.c | 13 +++---------- drivers/scsi/mpt2sas/mpt2sas_base.h | 3 +++ drivers/scsi/mpt2sas/mpt2sas_scsih.c | 8 +++++++- 3 files changed, 13 insertions(+), 11 deletions(-) diff --git a/drivers/scsi/mpt2sas/mpt2sas_base.c b/drivers/scsi/mpt2sas/mpt2sas_base.c index bcb23d28b3e8..c76b18bbacb8 100644 --- a/drivers/scsi/mpt2sas/mpt2sas_base.c +++ b/drivers/scsi/mpt2sas/mpt2sas_base.c @@ -80,10 +80,6 @@ static int msix_disable = -1; module_param(msix_disable, int, 0); MODULE_PARM_DESC(msix_disable, " disable msix routed interrupts (default=0)"); -static int missing_delay[2] = {-1, -1}; -module_param_array(missing_delay, int, NULL, 0); -MODULE_PARM_DESC(missing_delay, " device missing delay , io missing delay"); - static int mpt2sas_fwfault_debug; MODULE_PARM_DESC(mpt2sas_fwfault_debug, " enable detection of firmware fault " "and halt firmware - (default=0)"); @@ -2199,7 +2195,7 @@ _base_display_ioc_capabilities(struct MPT2SAS_ADAPTER *ioc) } /** - * _base_update_missing_delay - change the missing delay timers + * mpt2sas_base_update_missing_delay - change the missing delay timers * @ioc: per adapter object * @device_missing_delay: amount of time till device is reported missing * @io_missing_delay: interval IO is returned when there is a missing device @@ -2210,8 +2206,8 @@ _base_display_ioc_capabilities(struct MPT2SAS_ADAPTER *ioc) * delay, as well as the io missing delay. This should be called at driver * load time. */ -static void -_base_update_missing_delay(struct MPT2SAS_ADAPTER *ioc, +void +mpt2sas_base_update_missing_delay(struct MPT2SAS_ADAPTER *ioc, u16 device_missing_delay, u8 io_missing_delay) { u16 dmd, dmd_new, dmd_orignal; @@ -4407,9 +4403,6 @@ mpt2sas_base_attach(struct MPT2SAS_ADAPTER *ioc) if (r) goto out_free_resources; - if (missing_delay[0] != -1 && missing_delay[1] != -1) - _base_update_missing_delay(ioc, missing_delay[0], - missing_delay[1]); ioc->non_operational_loop = 0; return 0; diff --git a/drivers/scsi/mpt2sas/mpt2sas_base.h b/drivers/scsi/mpt2sas/mpt2sas_base.h index 4caaac13682f..11301974628b 100644 --- a/drivers/scsi/mpt2sas/mpt2sas_base.h +++ b/drivers/scsi/mpt2sas/mpt2sas_base.h @@ -1055,6 +1055,9 @@ void mpt2sas_base_validate_event_type(struct MPT2SAS_ADAPTER *ioc, u32 *event_ty void mpt2sas_halt_firmware(struct MPT2SAS_ADAPTER *ioc); +void mpt2sas_base_update_missing_delay(struct MPT2SAS_ADAPTER *ioc, + u16 device_missing_delay, u8 io_missing_delay); + int mpt2sas_port_enable(struct MPT2SAS_ADAPTER *ioc); /* scsih shared API */ diff --git a/drivers/scsi/mpt2sas/mpt2sas_scsih.c b/drivers/scsi/mpt2sas/mpt2sas_scsih.c index a49159561669..8dbe500c935d 100644 --- a/drivers/scsi/mpt2sas/mpt2sas_scsih.c +++ b/drivers/scsi/mpt2sas/mpt2sas_scsih.c @@ -101,6 +101,10 @@ static ushort max_sectors = 0xFFFF; module_param(max_sectors, ushort, 0); MODULE_PARM_DESC(max_sectors, "max sectors, range 64 to 32767 default=32767"); +static int missing_delay[2] = {-1, -1}; +module_param_array(missing_delay, int, NULL, 0); +MODULE_PARM_DESC(missing_delay, " device missing delay , io missing delay"); + /* scsi-mid layer global parmeter is max_report_luns, which is 511 */ #define MPT2SAS_MAX_LUN (16895) static int max_lun = MPT2SAS_MAX_LUN; @@ -7299,7 +7303,9 @@ _firmware_event_work(struct work_struct *work) case MPT2SAS_PORT_ENABLE_COMPLETE: ioc->start_scan = 0; - + if (missing_delay[0] != -1 && missing_delay[1] != -1) + mpt2sas_base_update_missing_delay(ioc, missing_delay[0], + missing_delay[1]); dewtprintk(ioc, printk(MPT2SAS_INFO_FMT "port enable: complete " "from worker thread\n", ioc->name)); From ebfc049ce8e32b5ff6bf6fec74775ac021769f37 Mon Sep 17 00:00:00 2001 From: Luiz Capitulino Date: Tue, 2 Jul 2013 15:35:13 +0930 Subject: [PATCH 067/102] virtio_balloon: leak_balloon(): only tell host if we got pages deflated commit 8c6bab4f3874d31804a00782c48a8f244a0d3cc0 upstream. balloon_page_dequeue() can return NULL. If it does for the first page being freed then leak_balloon() will create a scatter list with len=0. Which in turn seems to generate an invalid virtio request. I didn't get this in practice, I found it by code review. On the other hand, such an invalid virtio request will cause errors in QEMU and fill_balloon() also performs the same check implemented by this commit. This bug was introduced in e2250429. Signed-off-by: Luiz Capitulino Acked-by: Rafael Aquini Signed-off-by: Andrew Morton Signed-off-by: Rusty Russell Signed-off-by: Greg Kroah-Hartman --- drivers/virtio/virtio_balloon.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c index bd3ae324a1a2..71af7b5abe01 100644 --- a/drivers/virtio/virtio_balloon.c +++ b/drivers/virtio/virtio_balloon.c @@ -191,7 +191,8 @@ static void leak_balloon(struct virtio_balloon *vb, size_t num) * virtio_has_feature(vdev, VIRTIO_BALLOON_F_MUST_TELL_HOST); * is true, we *have* to do it in this order */ - tell_host(vb, vb->deflate_vq); + if (vb->num_pfns != 0) + tell_host(vb, vb->deflate_vq); mutex_unlock(&vb->balloon_lock); release_pages_by_pfn(vb->pfns, vb->num_pfns); } From b84f7b3f934ea3ff0240e866f2e73b34b42c2811 Mon Sep 17 00:00:00 2001 From: Hauke Mehrtens Date: Sun, 9 Jun 2013 18:53:58 +0200 Subject: [PATCH 068/102] b43: ensue that BCMA is "y" when B43 is "y" commit 693026ef2e751fd94d2e6c71028e68343cc875d5 upstream. When b43 gets build into the kernel and it should use bcma we have to ensure that bcma was also build into the kernel and not as a module. In this patch this is also done for SSB, although you can not build b43 without ssb support for now. This fixes a build problem reported by Randy Dunlap in 5187EB95.2060605@infradead.org Reported-By: Randy Dunlap Signed-off-by: Hauke Mehrtens Signed-off-by: John W. Linville Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/b43/Kconfig | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/b43/Kconfig b/drivers/net/wireless/b43/Kconfig index 078e6f3477a9..13f91ac9499e 100644 --- a/drivers/net/wireless/b43/Kconfig +++ b/drivers/net/wireless/b43/Kconfig @@ -28,7 +28,7 @@ config B43 config B43_BCMA bool "Support for BCMA bus" - depends on B43 && BCMA + depends on B43 && (BCMA = y || BCMA = B43) default y config B43_BCMA_EXTRA @@ -39,7 +39,7 @@ config B43_BCMA_EXTRA config B43_SSB bool - depends on B43 && SSB + depends on B43 && (SSB = y || SSB = B43) default y # Auto-select SSB PCI-HOST support, if possible From bcaef288b2a3187de17220e86e15b762b3b0808e Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 24 May 2013 01:06:09 +0200 Subject: [PATCH 069/102] mac80211: close AP_VLAN interfaces before unregistering all commit 4c8a9d4bfaf7dbc7d2168494904d79d22cc01db7 upstream. Since Eric's commit efe117ab8 ("Speedup ieee80211_remove_interfaces") there's a bug in mac80211 when it unregisters with AP_VLAN interfaces up. If the AP_VLAN interface was registered after the AP it belongs to (which is the typical case) and then we get into this code path, unregister_netdevice_many() will crash because it isn't prepared to deal with interfaces being closed in the middle of it. Exactly this happens though, because we iterate the list, find the AP master this AP_VLAN belongs to and dev_close() the dependent VLANs. After this, unregister_netdevice_many() won't pick up the fact that the AP_VLAN is already down and will do it again, causing a crash. Signed-off-by: Johannes Berg Cc: Eric Dumazet Signed-off-by: Greg Kroah-Hartman --- net/mac80211/iface.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index 98d20c0f6fed..514e90f470bf 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -1717,6 +1717,15 @@ void ieee80211_remove_interfaces(struct ieee80211_local *local) ASSERT_RTNL(); + /* + * Close all AP_VLAN interfaces first, as otherwise they + * might be closed while the AP interface they belong to + * is closed, causing unregister_netdevice_many() to crash. + */ + list_for_each_entry(sdata, &local->interfaces, list) + if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN) + dev_close(sdata->dev); + /* * Close all AP_VLAN interfaces first, as otherwise they * might be closed while the AP interface they belong to From 5df5df87f2b5f5d75825207d61a272b168d8cab6 Mon Sep 17 00:00:00 2001 From: "Bu, Yitian" Date: Mon, 18 Feb 2013 12:53:37 +0000 Subject: [PATCH 070/102] printk: Fix rq->lock vs logbuf_lock unlock lock inversion commit dbda92d16f8655044e082930e4e9d244b87fde77 upstream. commit 07354eb1a74d1 ("locking printk: Annotate logbuf_lock as raw") reintroduced a lock inversion problem which was fixed in commit 0b5e1c5255 ("printk: Release console_sem after logbuf_lock"). This happened probably when fixing up patch rejects. Restore the ordering and unlock logbuf_lock before releasing console_sem. Signed-off-by: ybu Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/E807E903FE6CBE4D95E420FBFCC273B827413C@nasanexd01h.na.qualcomm.com Signed-off-by: Thomas Gleixner Signed-off-by: Greg Kroah-Hartman --- kernel/printk.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/printk.c b/kernel/printk.c index 8212c1aef125..d37d45c90ae6 100644 --- a/kernel/printk.c +++ b/kernel/printk.c @@ -1369,9 +1369,9 @@ static int console_trylock_for_printk(unsigned int cpu) } } logbuf_cpu = UINT_MAX; + raw_spin_unlock(&logbuf_lock); if (wake) up(&console_sem); - raw_spin_unlock(&logbuf_lock); return retval; } From 3873153a61235a63517547e347db7ab90788486f Mon Sep 17 00:00:00 2001 From: "zhangwei(Jovi)" Date: Thu, 13 Jun 2013 14:21:51 +0800 Subject: [PATCH 071/102] uprobes: Fix return value in error handling path commit fa44063f9ef163c3a4c8d8c0465bb8a056b42035 upstream. When wrong argument is passed into uprobe_events it does not return an error: [root@jovi tracing]# echo 'p:myprobe /bin/bash' > uprobe_events [root@jovi tracing]# The proper response is: [root@jovi tracing]# echo 'p:myprobe /bin/bash' > uprobe_events -bash: echo: write error: Invalid argument Link: http://lkml.kernel.org/r/51B964FF.5000106@huawei.com Signed-off-by: zhangwei(Jovi) Cc: Frederic Weisbecker Cc: Signed-off-by: Steven Rostedt Signed-off-by: Greg Kroah-Hartman --- kernel/trace/trace_uprobe.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c index 32494fb0ee64..d5d0cd368a56 100644 --- a/kernel/trace/trace_uprobe.c +++ b/kernel/trace/trace_uprobe.c @@ -283,8 +283,10 @@ static int create_trace_uprobe(int argc, char **argv) return -EINVAL; } arg = strchr(argv[1], ':'); - if (!arg) + if (!arg) { + ret = -EINVAL; goto fail_address_parse; + } *arg++ = '\0'; filename = argv[1]; From c814208bb3542333629cc3c9e79666e94425899a Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Mon, 8 Jul 2013 13:44:45 -0400 Subject: [PATCH 072/102] svcrpc: fix failures to handle -1 uid's commit 0979292bfa301cb87d936b69af428090d2feea1b upstream. As of f025adf191924e3a75ce80e130afcd2485b53bb8 "sunrpc: Properly decode kuids and kgids in RPC_AUTH_UNIX credentials" any rpc containing a -1 (0xffff) uid or gid would fail with a badcred error. Commit afe3c3fd5392b2f0066930abc5dbd3f4b14a0f13 "svcrpc: fix failures to handle -1 uid's and gid's" fixed part of the problem, but overlooked the gid upcall--the kernel can request supplementary gid's for the -1 uid, but mountd's attempt write a response will get -EINVAL. Symptoms were nfsd failing to reply to the first attempt to use a newly negotiated krb5 context. Reported-by: Sven Geggus Tested-by: Sven Geggus Signed-off-by: J. Bruce Fields Signed-off-by: Greg Kroah-Hartman --- net/sunrpc/svcauth_unix.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/net/sunrpc/svcauth_unix.c b/net/sunrpc/svcauth_unix.c index 06bdf5a1082c..1583c8a4eb7f 100644 --- a/net/sunrpc/svcauth_unix.c +++ b/net/sunrpc/svcauth_unix.c @@ -493,8 +493,6 @@ static int unix_gid_parse(struct cache_detail *cd, if (rv) return -EINVAL; uid = make_kuid(&init_user_ns, id); - if (!uid_valid(uid)) - return -EINVAL; ug.uid = uid; expiry = get_expiry(&mesg); From 57370589c166cc8747f5e09365407d9dc04d7425 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Wed, 26 Jun 2013 11:09:06 -0400 Subject: [PATCH 073/102] svcrpc: fix handling of too-short rpc's commit cf3aa02cb4a0c5af5557dd47f15a08a7df33182a upstream. If we detect that an rpc is too short, we abort and close the connection. Except, there's a bug here: we're leaving sk_datalen nonzero without leaving any pages in the sk_pages array. The most likely result of the inconsistency is a subsequent crash in svc_tcp_clear_pages. Also demote the BUG_ON in svc_tcp_clear_pages to a WARN. Signed-off-by: J. Bruce Fields Signed-off-by: Greg Kroah-Hartman --- net/sunrpc/svcsock.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index 0f679df7d072..df74919c81c0 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -917,7 +917,10 @@ static void svc_tcp_clear_pages(struct svc_sock *svsk) len = svsk->sk_datalen; npages = (len + PAGE_SIZE - 1) >> PAGE_SHIFT; for (i = 0; i < npages; i++) { - BUG_ON(svsk->sk_pages[i] == NULL); + if (svsk->sk_pages[i] == NULL) { + WARN_ON_ONCE(1); + continue; + } put_page(svsk->sk_pages[i]); svsk->sk_pages[i] = NULL; } @@ -1092,8 +1095,10 @@ static int svc_tcp_recvfrom(struct svc_rqst *rqstp) goto err_noclose; } - if (svc_sock_reclen(svsk) < 8) + if (svc_sock_reclen(svsk) < 8) { + svsk->sk_datalen = 0; goto err_delete; /* client is nuts. */ + } rqstp->rq_arg.len = svsk->sk_datalen; rqstp->rq_arg.page_base = 0; From 369cf4be7588876d071c86d0efb7908e9bba7f6f Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Wed, 26 Jun 2013 10:55:40 -0400 Subject: [PATCH 074/102] svcrpc: don't error out on small tcp fragment commit 1f691b07c5dc51b2055834f58c0f351defd97f27 upstream. Though clients we care about mostly don't do this, it is possible for rpc requests to be sent in multiple fragments. Here we have a sanity check to ensure that the final received rpc isn't too small--except that the number we're actually checking is the length of just the final fragment, not of the whole rpc. So a perfectly legal rpc that's unluckily fragmented could cause the server to close the connection here. Signed-off-by: J. Bruce Fields Signed-off-by: Greg Kroah-Hartman --- net/sunrpc/svcsock.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index df74919c81c0..305374d4fb98 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -1095,7 +1095,7 @@ static int svc_tcp_recvfrom(struct svc_rqst *rqstp) goto err_noclose; } - if (svc_sock_reclen(svsk) < 8) { + if (svsk->sk_datalen < 8) { svsk->sk_datalen = 0; goto err_delete; /* client is nuts. */ } From a5ed6be766e6ca8f83ef0afea688ef9c760916d3 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Wed, 3 Jul 2013 16:01:10 +1000 Subject: [PATCH 075/102] of: Fix address decoding on Bimini and js2x machines commit 6dd18e4684f3d188277bbbc27545248487472108 upstream. Commit: e38c0a1fbc5803cbacdaac0557c70ac8ca5152e7 of/address: Handle #address-cells > 2 specially broke real time clock access on Bimini, js2x, and similar powerpc machines using the "maple" platform. That code was indirectly relying on the old (broken) behaviour of the translation for the hypertransport to ISA bridge. This fixes it by treating hypertransport as a PCI bus Signed-off-by: Benjamin Herrenschmidt Acked-by: Rob Herring Signed-off-by: Grant Likely Cc: Jonghwan Choi Signed-off-by: Greg Kroah-Hartman --- drivers/of/address.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/of/address.c b/drivers/of/address.c index 04da786c84d2..7c8221d36329 100644 --- a/drivers/of/address.c +++ b/drivers/of/address.c @@ -106,8 +106,12 @@ static unsigned int of_bus_default_get_flags(const __be32 *addr) static int of_bus_pci_match(struct device_node *np) { - /* "vci" is for the /chaos bridge on 1st-gen PCI powermacs */ - return !strcmp(np->type, "pci") || !strcmp(np->type, "vci"); + /* + * "vci" is for the /chaos bridge on 1st-gen PCI powermacs + * "ht" is hypertransport + */ + return !strcmp(np->type, "pci") || !strcmp(np->type, "vci") || + !strcmp(np->type, "ht"); } static void of_bus_pci_count_cells(struct device_node *np, From f8ee0e2e9db4d11d3b6593d9e2fba464caa0ff0c Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Mon, 24 Jun 2013 21:33:28 +0200 Subject: [PATCH 076/102] drm/i915: Fix up sdvo hpd pins for i965g/gm commit 4f7fd7095d85cd31c86cb9ba87bc301319630ccc upstream. Bspec seems to be full of lies, at least it disagress with reality: Two systems corrobated that SDVO hpd bits are the same as on gen3. v2: Update comment a bit. Tested-by: Chris Wilson Cc: Arthur Ranyan Reported-and-tested-by: Alex Fiestas Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=58405 Acked-by: Chris Wilson Signed-off-by: Daniel Vetter Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/i915/i915_irq.c | 13 ++----------- drivers/gpu/drm/i915/i915_reg.h | 13 ++++++------- 2 files changed, 8 insertions(+), 18 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index 0aa2ef0d2ae0..e5e328691e3a 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -70,15 +70,6 @@ static const u32 hpd_status_gen4[] = { [HPD_PORT_D] = PORTD_HOTPLUG_INT_STATUS }; -static const u32 hpd_status_i965[] = { - [HPD_CRT] = CRT_HOTPLUG_INT_STATUS, - [HPD_SDVO_B] = SDVOB_HOTPLUG_INT_STATUS_I965, - [HPD_SDVO_C] = SDVOC_HOTPLUG_INT_STATUS_I965, - [HPD_PORT_B] = PORTB_HOTPLUG_INT_STATUS, - [HPD_PORT_C] = PORTC_HOTPLUG_INT_STATUS, - [HPD_PORT_D] = PORTD_HOTPLUG_INT_STATUS -}; - static const u32 hpd_status_i915[] = { /* i915 and valleyview are the same */ [HPD_CRT] = CRT_HOTPLUG_INT_STATUS, [HPD_SDVO_B] = SDVOB_HOTPLUG_INT_STATUS_I915, @@ -2952,13 +2943,13 @@ static irqreturn_t i965_irq_handler(int irq, void *arg) u32 hotplug_status = I915_READ(PORT_HOTPLUG_STAT); u32 hotplug_trigger = hotplug_status & (IS_G4X(dev) ? HOTPLUG_INT_STATUS_G4X : - HOTPLUG_INT_STATUS_I965); + HOTPLUG_INT_STATUS_I915); DRM_DEBUG_DRIVER("hotplug event received, stat 0x%08x\n", hotplug_status); if (hotplug_trigger) { if (hotplug_irq_storm_detect(dev, hotplug_trigger, - IS_G4X(dev) ? hpd_status_gen4 : hpd_status_i965)) + IS_G4X(dev) ? hpd_status_gen4 : hpd_status_i915)) i915_hpd_irq_setup(dev); queue_work(dev_priv->wq, &dev_priv->hotplug_work); diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 2d6b62e42daf..e4e0466aa685 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -1691,6 +1691,12 @@ /* SDVO is different across gen3/4 */ #define SDVOC_HOTPLUG_INT_STATUS_G4X (1 << 3) #define SDVOB_HOTPLUG_INT_STATUS_G4X (1 << 2) +/* + * Bspec seems to be seriously misleaded about the SDVO hpd bits on i965g/gm, + * since reality corrobates that they're the same as on gen3. But keep these + * bits here (and the comment!) to help any other lost wanderers back onto the + * right tracks. + */ #define SDVOC_HOTPLUG_INT_STATUS_I965 (3 << 4) #define SDVOB_HOTPLUG_INT_STATUS_I965 (3 << 2) #define SDVOC_HOTPLUG_INT_STATUS_I915 (1 << 7) @@ -1702,13 +1708,6 @@ PORTC_HOTPLUG_INT_STATUS | \ PORTD_HOTPLUG_INT_STATUS) -#define HOTPLUG_INT_STATUS_I965 (CRT_HOTPLUG_INT_STATUS | \ - SDVOB_HOTPLUG_INT_STATUS_I965 | \ - SDVOC_HOTPLUG_INT_STATUS_I965 | \ - PORTB_HOTPLUG_INT_STATUS | \ - PORTC_HOTPLUG_INT_STATUS | \ - PORTD_HOTPLUG_INT_STATUS) - #define HOTPLUG_INT_STATUS_I915 (CRT_HOTPLUG_INT_STATUS | \ SDVOB_HOTPLUG_INT_STATUS_I915 | \ SDVOC_HOTPLUG_INT_STATUS_I915 | \ From 915bec00d92bdceefeff77ab67d32865900b1efe Mon Sep 17 00:00:00 2001 From: Ben Widawsky Date: Tue, 25 Jun 2013 21:53:40 -0700 Subject: [PATCH 077/102] drm/i915: Fix context sizes on HSW commit a0de80a0e07032a111230ec92eca563f9d93648d upstream. With updates to the spec, we can actually see the context layout, and how many dwords are allocated. That table suggests we need 70720 bytes per HW context. Rounded up, this is 18 pages. Looking at what lives after the current 4 pages we use, I can't see too much important (mostly it's d3d related), but there are a couple of things which look scary. I am hopeful this can explain some of our odd HSW failures. v2: Make the context only 17 pages. The power context space isn't used ever, and execlists aren't used in our driver, making the actual total 66944 bytes. v3: Add a comment to the code. (Jesse & Paulo) Reported-by: "Azad, Vinit" Reviewed-by: Jesse Barnes Signed-off-by: Ben Widawsky Signed-off-by: Daniel Vetter Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/i915/i915_gem_context.c | 2 +- drivers/gpu/drm/i915/i915_reg.h | 15 +++++++-------- 2 files changed, 8 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c index a1e8ecb6adf6..3bc8a58a8d5f 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.c +++ b/drivers/gpu/drm/i915/i915_gem_context.c @@ -113,7 +113,7 @@ static int get_context_size(struct drm_device *dev) case 7: reg = I915_READ(GEN7_CXT_SIZE); if (IS_HASWELL(dev)) - ret = HSW_CXT_TOTAL_SIZE(reg) * 64; + ret = HSW_CXT_TOTAL_SIZE; else ret = GEN7_CXT_TOTAL_SIZE(reg) * 64; break; diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index e4e0466aa685..80b0a6626a23 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -1535,14 +1535,13 @@ GEN7_CXT_EXTENDED_SIZE(ctx_reg) + \ GEN7_CXT_GT1_SIZE(ctx_reg) + \ GEN7_CXT_VFSTATE_SIZE(ctx_reg)) -#define HSW_CXT_POWER_SIZE(ctx_reg) ((ctx_reg >> 26) & 0x3f) -#define HSW_CXT_RING_SIZE(ctx_reg) ((ctx_reg >> 23) & 0x7) -#define HSW_CXT_RENDER_SIZE(ctx_reg) ((ctx_reg >> 15) & 0xff) -#define HSW_CXT_TOTAL_SIZE(ctx_reg) (HSW_CXT_POWER_SIZE(ctx_reg) + \ - HSW_CXT_RING_SIZE(ctx_reg) + \ - HSW_CXT_RENDER_SIZE(ctx_reg) + \ - GEN7_CXT_VFSTATE_SIZE(ctx_reg)) - +/* Haswell does have the CXT_SIZE register however it does not appear to be + * valid. Now, docs explain in dwords what is in the context object. The full + * size is 70720 bytes, however, the power context and execlist context will + * never be saved (power context is stored elsewhere, and execlists don't work + * on HSW) - so the final size is 66944 bytes, which rounds to 17 pages. + */ +#define HSW_CXT_TOTAL_SIZE (17 * PAGE_SIZE) /* * Overlay regs From e69a7ee45662a1b1d698c5d758613ffbe77fd2a9 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 28 Jun 2013 16:54:08 +0100 Subject: [PATCH 078/102] drm/i915: Only clear write-domains after a successful wait-seqno commit daa13e1ca587bc773c1aae415ed1af6554117bd4 upstream. In the introduction of the non-blocking wait, I cut'n'pasted the wait completion code from normal locked path. Unfortunately, this neglected that the normal path returned early if the wait returned early. The result is that read-only waits may return whilst the GPU is still writing to the bo. Fixes regression from commit 3236f57a0162391f84b93f39fc1882c49a8998c7 [v3.7] Author: Chris Wilson Date: Fri Aug 24 09:35:09 2012 +0100 drm/i915: Use a non-blocking wait for set-to-domain ioctl Signed-off-by: Chris Wilson Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=66163 Signed-off-by: Daniel Vetter Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/i915/i915_gem.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 9e35dafc5807..34118b0c02d1 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -1160,7 +1160,8 @@ i915_gem_object_wait_rendering__nonblocking(struct drm_i915_gem_object *obj, /* Manually manage the write flush as we may have not yet * retired the buffer. */ - if (obj->last_write_seqno && + if (ret == 0 && + obj->last_write_seqno && i915_seqno_passed(seqno, obj->last_write_seqno)) { obj->last_write_seqno = 0; obj->base.write_domain &= ~I915_GEM_GPU_DOMAINS; From 657c9009705f8d22faefdecac258312c5621ea51 Mon Sep 17 00:00:00 2001 From: YoungJun Cho Date: Thu, 27 Jun 2013 08:58:33 +0900 Subject: [PATCH 079/102] drm/gem: fix not to assign error value to gem name commit 2e07fb229396f99fc173d8612f0f83ea9de0341b upstream. If idr_alloc() is failed, obj->name can be error value. Also it cleans up duplicated flink processing code. This regression has been introduced in commit 2e928815c1886fe628ed54623aa98d0889cf5509 Author: Tejun Heo Date: Wed Feb 27 17:04:08 2013 -0800 drm: convert to idr_alloc() Signed-off-by: YoungJun Cho Signed-off-by: Seung-Woo Kim Signed-off-by: Kyungmin Park Reviewed-by: Chris Wilson Signed-off-by: Dave Airlie Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/drm_gem.c | 18 +++++++----------- 1 file changed, 7 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/drm_gem.c b/drivers/gpu/drm/drm_gem.c index cf919e36e8ae..239ef30f4a62 100644 --- a/drivers/gpu/drm/drm_gem.c +++ b/drivers/gpu/drm/drm_gem.c @@ -453,25 +453,21 @@ drm_gem_flink_ioctl(struct drm_device *dev, void *data, spin_lock(&dev->object_name_lock); if (!obj->name) { ret = idr_alloc(&dev->object_name_idr, obj, 1, 0, GFP_NOWAIT); - obj->name = ret; - args->name = (uint64_t) obj->name; - spin_unlock(&dev->object_name_lock); - idr_preload_end(); - if (ret < 0) goto err; - ret = 0; + + obj->name = ret; /* Allocate a reference for the name table. */ drm_gem_object_reference(obj); - } else { - args->name = (uint64_t) obj->name; - spin_unlock(&dev->object_name_lock); - idr_preload_end(); - ret = 0; } + args->name = (uint64_t) obj->name; + ret = 0; + err: + spin_unlock(&dev->object_name_lock); + idr_preload_end(); drm_gem_object_unreference_unlocked(obj); return ret; } From cd2c367cf10eab382c53bb9f39e01726c6f10253 Mon Sep 17 00:00:00 2001 From: Julia Lemire Date: Thu, 27 Jun 2013 13:38:59 -0400 Subject: [PATCH 080/102] drm/mgag200: Added resolution and bandwidth limits for various G200e products. commit abbee6238775c6633a3779962e9e5b5cb9823749 upstream. At the larger resolutions, the g200e series sometimes struggles with maintaining a proper output. Problems like flickering or black bands appearing on screen can occur. In order to avoid this, limitations regarding resolutions and bandwidth have been added for the different variations of the g200e series. This code was ported from the old xorg mga driver. Signed-off-by: Julia Lemire Signed-off-by: Dave Airlie Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/mgag200/mgag200_drv.h | 3 +- drivers/gpu/drm/mgag200/mgag200_main.c | 2 +- drivers/gpu/drm/mgag200/mgag200_mode.c | 70 ++++++++++++++++++++++++-- 3 files changed, 70 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/mgag200/mgag200_drv.h b/drivers/gpu/drm/mgag200/mgag200_drv.h index bf29b2f4d68d..988911afcc8b 100644 --- a/drivers/gpu/drm/mgag200/mgag200_drv.h +++ b/drivers/gpu/drm/mgag200/mgag200_drv.h @@ -198,7 +198,8 @@ struct mga_device { struct ttm_bo_device bdev; } ttm; - u32 reg_1e24; /* SE model number */ + /* SE model number stored in reg 0x1e24 */ + u32 unique_rev_id; }; diff --git a/drivers/gpu/drm/mgag200/mgag200_main.c b/drivers/gpu/drm/mgag200/mgag200_main.c index 99059237da38..dafe049fb1ae 100644 --- a/drivers/gpu/drm/mgag200/mgag200_main.c +++ b/drivers/gpu/drm/mgag200/mgag200_main.c @@ -176,7 +176,7 @@ static int mgag200_device_init(struct drm_device *dev, /* stash G200 SE model number for later use */ if (IS_G200_SE(mdev)) - mdev->reg_1e24 = RREG32(0x1e24); + mdev->unique_rev_id = RREG32(0x1e24); ret = mga_vram_init(mdev); if (ret) diff --git a/drivers/gpu/drm/mgag200/mgag200_mode.c b/drivers/gpu/drm/mgag200/mgag200_mode.c index ee66badc8bb6..99e07b688ea8 100644 --- a/drivers/gpu/drm/mgag200/mgag200_mode.c +++ b/drivers/gpu/drm/mgag200/mgag200_mode.c @@ -1008,7 +1008,7 @@ static int mga_crtc_mode_set(struct drm_crtc *crtc, if (IS_G200_SE(mdev)) { - if (mdev->reg_1e24 >= 0x02) { + if (mdev->unique_rev_id >= 0x02) { u8 hi_pri_lvl; u32 bpp; u32 mb; @@ -1038,7 +1038,7 @@ static int mga_crtc_mode_set(struct drm_crtc *crtc, WREG8(MGAREG_CRTCEXT_DATA, hi_pri_lvl); } else { WREG8(MGAREG_CRTCEXT_INDEX, 0x06); - if (mdev->reg_1e24 >= 0x01) + if (mdev->unique_rev_id >= 0x01) WREG8(MGAREG_CRTCEXT_DATA, 0x03); else WREG8(MGAREG_CRTCEXT_DATA, 0x04); @@ -1410,6 +1410,32 @@ static int mga_vga_get_modes(struct drm_connector *connector) return ret; } +static uint32_t mga_vga_calculate_mode_bandwidth(struct drm_display_mode *mode, + int bits_per_pixel) +{ + uint32_t total_area, divisor; + int64_t active_area, pixels_per_second, bandwidth; + uint64_t bytes_per_pixel = (bits_per_pixel + 7) / 8; + + divisor = 1024; + + if (!mode->htotal || !mode->vtotal || !mode->clock) + return 0; + + active_area = mode->hdisplay * mode->vdisplay; + total_area = mode->htotal * mode->vtotal; + + pixels_per_second = active_area * mode->clock * 1000; + do_div(pixels_per_second, total_area); + + bandwidth = pixels_per_second * bytes_per_pixel * 100; + do_div(bandwidth, divisor); + + return (uint32_t)(bandwidth); +} + +#define MODE_BANDWIDTH MODE_BAD + static int mga_vga_mode_valid(struct drm_connector *connector, struct drm_display_mode *mode) { @@ -1421,7 +1447,45 @@ static int mga_vga_mode_valid(struct drm_connector *connector, int bpp = 32; int i = 0; - /* FIXME: Add bandwidth and g200se limitations */ + if (IS_G200_SE(mdev)) { + if (mdev->unique_rev_id == 0x01) { + if (mode->hdisplay > 1600) + return MODE_VIRTUAL_X; + if (mode->vdisplay > 1200) + return MODE_VIRTUAL_Y; + if (mga_vga_calculate_mode_bandwidth(mode, bpp) + > (24400 * 1024)) + return MODE_BANDWIDTH; + } else if (mdev->unique_rev_id >= 0x02) { + if (mode->hdisplay > 1920) + return MODE_VIRTUAL_X; + if (mode->vdisplay > 1200) + return MODE_VIRTUAL_Y; + if (mga_vga_calculate_mode_bandwidth(mode, bpp) + > (30100 * 1024)) + return MODE_BANDWIDTH; + } + } else if (mdev->type == G200_WB) { + if (mode->hdisplay > 1280) + return MODE_VIRTUAL_X; + if (mode->vdisplay > 1024) + return MODE_VIRTUAL_Y; + if (mga_vga_calculate_mode_bandwidth(mode, + bpp > (31877 * 1024))) + return MODE_BANDWIDTH; + } else if (mdev->type == G200_EV && + (mga_vga_calculate_mode_bandwidth(mode, bpp) + > (32700 * 1024))) { + return MODE_BANDWIDTH; + } else if (mode->type == G200_EH && + (mga_vga_calculate_mode_bandwidth(mode, bpp) + > (37500 * 1024))) { + return MODE_BANDWIDTH; + } else if (mode->type == G200_ER && + (mga_vga_calculate_mode_bandwidth(mode, + bpp) > (55000 * 1024))) { + return MODE_BANDWIDTH; + } if (mode->crtc_hdisplay > 2048 || mode->crtc_hsync_start > 4096 || mode->crtc_hsync_end > 4096 || mode->crtc_htotal > 4096 || From ef0cfe2f8ee92be33f891c238d310abfaca7e0e3 Mon Sep 17 00:00:00 2001 From: Marcin Slusarz Date: Tue, 11 Jun 2013 10:50:30 +0200 Subject: [PATCH 081/102] drm/nouveau: use vmalloc for pgt allocation commit d005f51eb93d71cd40ebd11dd377453fa8c8a42a upstream. Page tables on nv50 take 48kB, which can be hard to allocate in one piece. Let's use vmalloc. Signed-off-by: Marcin Slusarz Signed-off-by: Ben Skeggs Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/nouveau/core/subdev/vm/base.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/nouveau/core/subdev/vm/base.c b/drivers/gpu/drm/nouveau/core/subdev/vm/base.c index 77c67fc970e6..e66fb77131bc 100644 --- a/drivers/gpu/drm/nouveau/core/subdev/vm/base.c +++ b/drivers/gpu/drm/nouveau/core/subdev/vm/base.c @@ -362,7 +362,7 @@ nouveau_vm_create(struct nouveau_vmmgr *vmm, u64 offset, u64 length, vm->fpde = offset >> (vmm->pgt_bits + 12); vm->lpde = (offset + length - 1) >> (vmm->pgt_bits + 12); - vm->pgt = kcalloc(vm->lpde - vm->fpde + 1, sizeof(*vm->pgt), GFP_KERNEL); + vm->pgt = vzalloc((vm->lpde - vm->fpde + 1) * sizeof(*vm->pgt)); if (!vm->pgt) { kfree(vm); return -ENOMEM; @@ -371,7 +371,7 @@ nouveau_vm_create(struct nouveau_vmmgr *vmm, u64 offset, u64 length, ret = nouveau_mm_init(&vm->mm, mm_offset >> 12, mm_length >> 12, block >> 12); if (ret) { - kfree(vm->pgt); + vfree(vm->pgt); kfree(vm); return ret; } @@ -446,7 +446,7 @@ nouveau_vm_del(struct nouveau_vm *vm) } nouveau_mm_fini(&vm->mm); - kfree(vm->pgt); + vfree(vm->pgt); kfree(vm); } From 9f7fbcd61507b130afb4ec6418ce8513295d4df4 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Fri, 7 Jun 2013 10:41:03 -0400 Subject: [PATCH 082/102] drm/radeon: fix AVI infoframe generation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit f100380ecd8287b0909d3c5694784adc46e78a4a upstream. - remove adding 2 to checksum, this is incorrect. This was incorrectly introduced in: 92db7f6c860b8190571a9dc1fcbc16d003422fe8 http://lists.freedesktop.org/archives/dri-devel/2011-December/017717.html However, the off by 2 was due to adding the version twice. From the examples in the URL above: [RafaÅ‚ MiÅ‚ecki][RV620] fglrx: 0x7454: 00 A8 5E 79 R600_HDMI_VIDEOINFOFRAME_0 0x7458: 00 28 00 10 R600_HDMI_VIDEOINFOFRAME_1 0x745C: 00 48 00 28 R600_HDMI_VIDEOINFOFRAME_2 0x7460: 02 00 00 48 R600_HDMI_VIDEOINFOFRAME_3 =================== (0x82 + 0x2 + 0xD) + 0x1F8 = 0x289 -0x289 = 0x77 However, the payload sum is not 0x1f8, it's 0x1f6. 00 + A8 + 5E + 00 + 00 + 28 + 00 + 10 + 00 + 48 + 00 + 28 + 00 + 48 = 0x1f6 Bits 25:24 of HDMI_VIDEOINFOFRAME_3 are the packet version, not part of the payload. So the total would be: (0x82 + 0x2 + 0xD) + 0x1f6 = 0x287 -0x287 = 0x79 - properly emit the AVI infoframe version. This was not being emitted previous which is probably what caused the issue above. This should fix blank screen when HDMI audio is enabled on certain monitors. Signed-off-by: Alex Deucher Cc: RafaÅ‚ MiÅ‚ecki Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/radeon/evergreen_hdmi.c | 11 ++--------- drivers/gpu/drm/radeon/r600_hdmi.c | 11 ++--------- 2 files changed, 4 insertions(+), 18 deletions(-) diff --git a/drivers/gpu/drm/radeon/evergreen_hdmi.c b/drivers/gpu/drm/radeon/evergreen_hdmi.c index ed7c8a768092..b9c6f7675e59 100644 --- a/drivers/gpu/drm/radeon/evergreen_hdmi.c +++ b/drivers/gpu/drm/radeon/evergreen_hdmi.c @@ -128,14 +128,7 @@ static void evergreen_hdmi_update_avi_infoframe(struct drm_encoder *encoder, struct radeon_encoder_atom_dig *dig = radeon_encoder->enc_priv; uint32_t offset = dig->afmt->offset; uint8_t *frame = buffer + 3; - - /* Our header values (type, version, length) should be alright, Intel - * is using the same. Checksum function also seems to be OK, it works - * fine for audio infoframe. However calculated value is always lower - * by 2 in comparison to fglrx. It breaks displaying anything in case - * of TVs that strictly check the checksum. Hack it manually here to - * workaround this issue. */ - frame[0x0] += 2; + uint8_t *header = buffer; WREG32(AFMT_AVI_INFO0 + offset, frame[0x0] | (frame[0x1] << 8) | (frame[0x2] << 16) | (frame[0x3] << 24)); @@ -144,7 +137,7 @@ static void evergreen_hdmi_update_avi_infoframe(struct drm_encoder *encoder, WREG32(AFMT_AVI_INFO2 + offset, frame[0x8] | (frame[0x9] << 8) | (frame[0xA] << 16) | (frame[0xB] << 24)); WREG32(AFMT_AVI_INFO3 + offset, - frame[0xC] | (frame[0xD] << 8)); + frame[0xC] | (frame[0xD] << 8) | (header[1] << 24)); } static void evergreen_audio_set_dto(struct drm_encoder *encoder, u32 clock) diff --git a/drivers/gpu/drm/radeon/r600_hdmi.c b/drivers/gpu/drm/radeon/r600_hdmi.c index 456750a0daa5..e73b2a73494a 100644 --- a/drivers/gpu/drm/radeon/r600_hdmi.c +++ b/drivers/gpu/drm/radeon/r600_hdmi.c @@ -133,14 +133,7 @@ static void r600_hdmi_update_avi_infoframe(struct drm_encoder *encoder, struct radeon_encoder_atom_dig *dig = radeon_encoder->enc_priv; uint32_t offset = dig->afmt->offset; uint8_t *frame = buffer + 3; - - /* Our header values (type, version, length) should be alright, Intel - * is using the same. Checksum function also seems to be OK, it works - * fine for audio infoframe. However calculated value is always lower - * by 2 in comparison to fglrx. It breaks displaying anything in case - * of TVs that strictly check the checksum. Hack it manually here to - * workaround this issue. */ - frame[0x0] += 2; + uint8_t *header = buffer; WREG32(HDMI0_AVI_INFO0 + offset, frame[0x0] | (frame[0x1] << 8) | (frame[0x2] << 16) | (frame[0x3] << 24)); @@ -149,7 +142,7 @@ static void r600_hdmi_update_avi_infoframe(struct drm_encoder *encoder, WREG32(HDMI0_AVI_INFO2 + offset, frame[0x8] | (frame[0x9] << 8) | (frame[0xA] << 16) | (frame[0xB] << 24)); WREG32(HDMI0_AVI_INFO3 + offset, - frame[0xC] | (frame[0xD] << 8)); + frame[0xC] | (frame[0xD] << 8) | (header[1] << 24)); } /* From 5c49b1cfa41e42003b481326af66806e5f1a46b9 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Mon, 10 Jun 2013 09:57:07 -0400 Subject: [PATCH 083/102] drm/radeon: add backlight quirk for hybrid mac commit 80101790670385a85aca35ecae4b89e3f2fceecc upstream. Mac laptops with multiple GPUs apparently use the gmux driver for backlight control. Don't register a radeon backlight interface. We may need to add other pci ids for other hybrid mac laptops. Fixes: https://bugs.freedesktop.org/show_bug.cgi?id=65377 Signed-off-by: Alex Deucher Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/radeon/atombios_encoders.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/gpu/drm/radeon/atombios_encoders.c b/drivers/gpu/drm/radeon/atombios_encoders.c index 8406c8251fbf..4120d355cadd 100644 --- a/drivers/gpu/drm/radeon/atombios_encoders.c +++ b/drivers/gpu/drm/radeon/atombios_encoders.c @@ -186,6 +186,13 @@ void radeon_atom_backlight_init(struct radeon_encoder *radeon_encoder, u8 backlight_level; char bl_name[16]; + /* Mac laptops with multiple GPUs use the gmux driver for backlight + * so don't register a backlight device + */ + if ((rdev->pdev->subsystem_vendor == PCI_VENDOR_ID_APPLE) && + (rdev->pdev->device == 0x6741)) + return; + if (!radeon_encoder->enc_priv) return; From 3913338881a1e4898176da0008c3df80aabdc3dd Mon Sep 17 00:00:00 2001 From: Ilia Mirkin Date: Wed, 3 Jul 2013 03:06:02 -0400 Subject: [PATCH 084/102] drm/nva3/disp: Fix HDMI audio regression commit bf03d1b293cc556df53545e318110505014d805e upstream. This is the nva3 counterpart to commit beba44b17 (drm/nv84/disp: Fix HDMI audio regression). The regression happened as a result of refactoring in commit 8e9e3d2de (drm/nv84/disp: move hdmi control into core). Reported-and-tested-by: Max Baldwin Signed-off-by: Ilia Mirkin Signed-off-by: Ben Skeggs Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/nouveau/core/engine/disp/hdminva3.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/gpu/drm/nouveau/core/engine/disp/hdminva3.c b/drivers/gpu/drm/nouveau/core/engine/disp/hdminva3.c index f065fc248adf..db8c6fd46278 100644 --- a/drivers/gpu/drm/nouveau/core/engine/disp/hdminva3.c +++ b/drivers/gpu/drm/nouveau/core/engine/disp/hdminva3.c @@ -55,6 +55,10 @@ nva3_hdmi_ctrl(struct nv50_disp_priv *priv, int head, int or, u32 data) nv_wr32(priv, 0x61c510 + soff, 0x00000000); nv_mask(priv, 0x61c500 + soff, 0x00000001, 0x00000001); + nv_mask(priv, 0x61c5d0 + soff, 0x00070001, 0x00010001); /* SPARE, HW_CTS */ + nv_mask(priv, 0x61c568 + soff, 0x00010101, 0x00000000); /* ACR_CTRL, ?? */ + nv_mask(priv, 0x61c578 + soff, 0x80000000, 0x80000000); /* ACR_0441_ENABLE */ + /* ??? */ nv_mask(priv, 0x61733c, 0x00100000, 0x00100000); /* RESETF */ nv_mask(priv, 0x61733c, 0x10000000, 0x10000000); /* LOOKUP_EN */ From 64dd8fc12c34e39e82b77109bb5b328ad5b24268 Mon Sep 17 00:00:00 2001 From: Emil Velikov Date: Tue, 2 Jul 2013 14:44:12 +0100 Subject: [PATCH 085/102] drm/nv50-/disp: Use output specific mask in interrupt commit 378f2bcdf7c971453d11580936dc0ffe845f5880 upstream. The commit commit 476e84e126171d809f9c0b5d97137f5055f95ca8 Author: Ben Skeggs Date: Mon Feb 11 09:24:23 2013 +1000 drm/nv50-/disp: initial supervisor support for off-chip encoders changed the write mask in one of the interrupt functions for on-chip encoders, causing a regression in certain VGA dual-head setups. This commit reintroduces the mask thus resolving the regression Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=66129 Reported-and-Tested-by: Yves-Alexis CC: Ben Skeggs Signed-off-by: Emil Velikov Signed-off-by: Ben Skeggs Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/nouveau/core/engine/disp/nv50.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/nouveau/core/engine/disp/nv50.c b/drivers/gpu/drm/nouveau/core/engine/disp/nv50.c index 6a38402fa56c..5680d3eb11ca 100644 --- a/drivers/gpu/drm/nouveau/core/engine/disp/nv50.c +++ b/drivers/gpu/drm/nouveau/core/engine/disp/nv50.c @@ -1107,6 +1107,7 @@ nv50_disp_intr_unk20_2(struct nv50_disp_priv *priv, int head) u32 pclk = nv_rd32(priv, 0x610ad0 + (head * 0x540)) & 0x3fffff; u32 hval, hreg = 0x614200 + (head * 0x800); u32 oval, oreg; + u32 mask; u32 conf = exec_clkcmp(priv, head, 0xff, pclk, &outp); if (conf != ~0) { if (outp.location == 0 && outp.type == DCB_OUTPUT_DP) { @@ -1133,6 +1134,7 @@ nv50_disp_intr_unk20_2(struct nv50_disp_priv *priv, int head) oreg = 0x614280 + (ffs(outp.or) - 1) * 0x800; oval = 0x00000000; hval = 0x00000000; + mask = 0xffffffff; } else if (!outp.location) { if (outp.type == DCB_OUTPUT_DP) @@ -1140,14 +1142,16 @@ nv50_disp_intr_unk20_2(struct nv50_disp_priv *priv, int head) oreg = 0x614300 + (ffs(outp.or) - 1) * 0x800; oval = (conf & 0x0100) ? 0x00000101 : 0x00000000; hval = 0x00000000; + mask = 0x00000707; } else { oreg = 0x614380 + (ffs(outp.or) - 1) * 0x800; oval = 0x00000001; hval = 0x00000001; + mask = 0x00000707; } nv_mask(priv, hreg, 0x0000000f, hval); - nv_mask(priv, oreg, 0x00000707, oval); + nv_mask(priv, oreg, mask, oval); } } From ed2f079599f3d9e5e27a516771d3d53afa7e7773 Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Fri, 21 Jun 2013 14:33:19 -0600 Subject: [PATCH 086/102] iommu/amd: Only unmap large pages from the first pte commit 60d0ca3cfd199b6612bbbbf4999a3470dad38bb1 upstream. If we use a large mapping, the expectation is that only unmaps from the first pte in the superpage are supported. Unmaps from offsets into the superpage should fail (ie. return zero sized unmap). In the current code, unmapping from an offset clears the size of the full mapping starting from an offset. For instance, if we map a 16k physically contiguous range at IOVA 0x0 with a large page, then attempt to unmap 4k at offset 12k, 4 ptes are cleared (12k - 28k) and the unmap returns 16k unmapped. This potentially incorrectly clears valid mappings and confuses drivers like VFIO that use the unmap size to release pinned pages. Fix by refusing to unmap from offsets into the page. Signed-off-by: Alex Williamson Signed-off-by: Joerg Roedel Signed-off-by: Greg Kroah-Hartman --- drivers/iommu/amd_iommu.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c index 21d02b0d907c..a3c338942f10 100644 --- a/drivers/iommu/amd_iommu.c +++ b/drivers/iommu/amd_iommu.c @@ -1484,6 +1484,10 @@ static unsigned long iommu_unmap_page(struct protection_domain *dom, /* Large PTE found which maps this address */ unmap_size = PTE_PAGE_SIZE(*pte); + + /* Only unmap from the first pte in the page */ + if ((unmap_size - 1) & bus_addr) + break; count = PAGE_SIZE_PTE_COUNT(unmap_size); for (i = 0; i < count; i++) pte[i] = 0ULL; @@ -1493,7 +1497,7 @@ static unsigned long iommu_unmap_page(struct protection_domain *dom, unmapped += unmap_size; } - BUG_ON(!is_power_of_2(unmapped)); + BUG_ON(unmapped && !is_power_of_2(unmapped)); return unmapped; } From 3ea8ad44cea35afc010942b448472fc993f24722 Mon Sep 17 00:00:00 2001 From: Max Filippov Date: Sun, 9 Jun 2013 04:52:11 +0400 Subject: [PATCH 087/102] xtensa: adjust boot parameters address when INITIALIZE_XTENSA_MMU_INSIDE_VMLINUX is selected commit c5a771d0678f9613e9f89cf1a5bdcfa5b08b225b upstream. The virtual address of boot parameters chain is passed to the kernel via a2 register. Adjust it in case it is remapped during MMUv3 -> MMUv2 mapping change, i.e. when it is in the first 128M. Also fix interpretation of initrd and FDT addresses passed in the boot parameters: these are physical addresses. Reported-by: Baruch Siach Signed-off-by: Max Filippov Signed-off-by: Chris Zankel Signed-off-by: Greg Kroah-Hartman --- arch/xtensa/kernel/head.S | 9 +++++++++ arch/xtensa/kernel/setup.c | 6 +++--- 2 files changed, 12 insertions(+), 3 deletions(-) diff --git a/arch/xtensa/kernel/head.S b/arch/xtensa/kernel/head.S index ef12c0e6fa25..7d740ebbe198 100644 --- a/arch/xtensa/kernel/head.S +++ b/arch/xtensa/kernel/head.S @@ -68,6 +68,15 @@ _SetupMMU: #ifdef CONFIG_INITIALIZE_XTENSA_MMU_INSIDE_VMLINUX initialize_mmu +#if defined(CONFIG_MMU) && XCHAL_HAVE_PTP_MMU && XCHAL_HAVE_SPANNING_WAY + rsr a2, excsave1 + movi a3, 0x08000000 + bgeu a2, a3, 1f + movi a3, 0xd0000000 + add a2, a2, a3 + wsr a2, excsave1 +1: +#endif #endif .end no-absolute-literals diff --git a/arch/xtensa/kernel/setup.c b/arch/xtensa/kernel/setup.c index 6dd25ecde3f5..14c6c3a6f04b 100644 --- a/arch/xtensa/kernel/setup.c +++ b/arch/xtensa/kernel/setup.c @@ -152,8 +152,8 @@ static int __init parse_tag_initrd(const bp_tag_t* tag) { meminfo_t* mi; mi = (meminfo_t*)(tag->data); - initrd_start = (void*)(mi->start); - initrd_end = (void*)(mi->end); + initrd_start = __va(mi->start); + initrd_end = __va(mi->end); return 0; } @@ -164,7 +164,7 @@ __tagtable(BP_TAG_INITRD, parse_tag_initrd); static int __init parse_tag_fdt(const bp_tag_t *tag) { - dtb_start = (void *)(tag->data[0]); + dtb_start = __va(tag->data[0]); return 0; } From d8724a91bc631690af241588936971214e55927e Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 5 Jul 2013 17:40:13 +0200 Subject: [PATCH 088/102] thermal: cpu_cooling: fix stub function commit e8d39240d635ed9bcaddbec898b1c9f063c5dbb2 upstream. The function stub for cpufreq_cooling_get_level introduced in 57df81069 "Thermal: exynos: fix cooling state translation" is not syntactically correct C and needs to be fixed to avoid this error: In file included from drivers/thermal/db8500_thermal.c:20:0: include/linux/cpu_cooling.h: In function 'cpufreq_cooling_get_level': include/linux/cpu_cooling.h:57:1: error: parameter name omitted unsigned long cpufreq_cooling_get_level(unsigned int, unsigned int) ^ include/linux/cpu_cooling.h:57:1: error: parameter name omitted Signed-off-by: Arnd Bergmann Acked-by: Eduardo Valentin Cc: Zhang Rui Cc: Amit Daniel kachhap Signed-off-by: Eduardo Valentin Signed-off-by: Greg Kroah-Hartman --- include/linux/cpu_cooling.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/linux/cpu_cooling.h b/include/linux/cpu_cooling.h index 282e27028418..a5d52eea8232 100644 --- a/include/linux/cpu_cooling.h +++ b/include/linux/cpu_cooling.h @@ -41,7 +41,7 @@ cpufreq_cooling_register(const struct cpumask *clip_cpus); */ void cpufreq_cooling_unregister(struct thermal_cooling_device *cdev); -unsigned long cpufreq_cooling_get_level(unsigned int, unsigned int); +unsigned long cpufreq_cooling_get_level(unsigned int cpu, unsigned int freq); #else /* !CONFIG_CPU_THERMAL */ static inline struct thermal_cooling_device * cpufreq_cooling_register(const struct cpumask *clip_cpus) @@ -54,7 +54,7 @@ void cpufreq_cooling_unregister(struct thermal_cooling_device *cdev) return; } static inline -unsigned long cpufreq_cooling_get_level(unsigned int, unsigned int) +unsigned long cpufreq_cooling_get_level(unsigned int cpu, unsigned int freq) { return THERMAL_CSTATE_INVALID; } From d6d10b1dceba6ed2b3d74207b2a454c2f4427785 Mon Sep 17 00:00:00 2001 From: David Daney Date: Wed, 12 Jun 2013 17:28:33 +0000 Subject: [PATCH 089/102] MIPS: Octeon: Don't clobber bootloader data structures. commit d949b4fe6d23dd92b5fa48cbf7af90ca32beed2e upstream. Commit abe77f90dc (MIPS: Octeon: Add kexec and kdump support) added a bootmem region for the kernel image itself. The problem is that this is rounded up to a 0x100000 boundary, which is memory that may not be owned by the kernel. Depending on the kernel's configuration based size, this 'extra' memory may contain data passed from the bootloader to the kernel itself, which if clobbered makes the kernel crash in various ways. The fix: Quit rounding the size up, so that we only use memory assigned to the kernel. Signed-off-by: David Daney Cc: linux-mips@linux-mips.org Patchwork: https://patchwork.linux-mips.org/patch/5449/ Signed-off-by: Ralf Baechle Signed-off-by: Greg Kroah-Hartman --- arch/mips/cavium-octeon/setup.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/mips/cavium-octeon/setup.c b/arch/mips/cavium-octeon/setup.c index 01b1b3f94feb..1e1e18c5a534 100644 --- a/arch/mips/cavium-octeon/setup.c +++ b/arch/mips/cavium-octeon/setup.c @@ -996,7 +996,7 @@ void __init plat_mem_setup(void) cvmx_bootmem_unlock(); /* Add the memory region for the kernel. */ kernel_start = (unsigned long) _text; - kernel_size = ALIGN(_end - _text, 0x100000); + kernel_size = _end - _text; /* Adjust for physical offset. */ kernel_start &= ~0xffffffff80000000ULL; From df3b055c92883b04459c54888b4bbb169ccb1b62 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Thu, 11 Jul 2013 18:02:38 +0200 Subject: [PATCH 090/102] staging: line6: Fix unlocked snd_pcm_stop() call commit 86f0b5b86d142b9323432fef078a6cf0fb5dda74 upstream. snd_pcm_stop() must be called in the PCM substream lock context. Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- drivers/staging/line6/pcm.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/staging/line6/pcm.c b/drivers/staging/line6/pcm.c index 02f77d74809f..a7856bad3cc6 100644 --- a/drivers/staging/line6/pcm.c +++ b/drivers/staging/line6/pcm.c @@ -385,8 +385,11 @@ static int snd_line6_pcm_free(struct snd_device *device) */ static void pcm_disconnect_substream(struct snd_pcm_substream *substream) { - if (substream->runtime && snd_pcm_running(substream)) + if (substream->runtime && snd_pcm_running(substream)) { + snd_pcm_stream_lock_irq(substream); snd_pcm_stop(substream, SNDRV_PCM_STATE_DISCONNECTED); + snd_pcm_stream_unlock_irq(substream); + } } /* From f38bac3d6d1fc1a726e4381f0423dcd85885b3d4 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Tue, 9 Jul 2013 17:44:10 +0200 Subject: [PATCH 091/102] perf: Clone child context from parent context pmu commit 734df5ab549ca44f40de0f07af1c8803856dfb18 upstream. Currently when the child context for inherited events is created, it's based on the pmu object of the first event of the parent context. This is wrong for the following scenario: - HW context having HW and SW event - HW event got removed (closed) - SW event stays in HW context as the only event and its pmu is used to clone the child context The issue starts when the cpu context object is touched based on the pmu context object (__get_cpu_context). In this case the HW context will work with SW cpu context ending up with following WARN below. Fixing this by using parent context pmu object to clone from child context. Addresses the following warning reported by Vince Weaver: [ 2716.472065] ------------[ cut here ]------------ [ 2716.476035] WARNING: at kernel/events/core.c:2122 task_ctx_sched_out+0x3c/0x) [ 2716.476035] Modules linked in: nfsd auth_rpcgss oid_registry nfs_acl nfs locn [ 2716.476035] CPU: 0 PID: 3164 Comm: perf_fuzzer Not tainted 3.10.0-rc4 #2 [ 2716.476035] Hardware name: AOpen DE7000/nMCP7ALPx-DE R1.06 Oct.19.2012, BI2 [ 2716.476035] 0000000000000000 ffffffff8102e215 0000000000000000 ffff88011fc18 [ 2716.476035] ffff8801175557f0 0000000000000000 ffff880119fda88c ffffffff810ad [ 2716.476035] ffff880119fda880 ffffffff810af02a 0000000000000009 ffff880117550 [ 2716.476035] Call Trace: [ 2716.476035] [] ? warn_slowpath_common+0x5b/0x70 [ 2716.476035] [] ? task_ctx_sched_out+0x3c/0x5f [ 2716.476035] [] ? perf_event_exit_task+0xbf/0x194 [ 2716.476035] [] ? do_exit+0x3e7/0x90c [ 2716.476035] [] ? __do_fault+0x359/0x394 [ 2716.476035] [] ? do_group_exit+0x66/0x98 [ 2716.476035] [] ? get_signal_to_deliver+0x479/0x4ad [ 2716.476035] [] ? __perf_event_task_sched_out+0x230/0x2d1 [ 2716.476035] [] ? do_signal+0x3c/0x432 [ 2716.476035] [] ? ctx_sched_in+0x43/0x141 [ 2716.476035] [] ? perf_event_context_sched_in+0x7a/0x90 [ 2716.476035] [] ? __perf_event_task_sched_in+0x31/0x118 [ 2716.476035] [] ? mmdrop+0xd/0x1c [ 2716.476035] [] ? finish_task_switch+0x7d/0xa6 [ 2716.476035] [] ? do_notify_resume+0x20/0x5d [ 2716.476035] [] ? retint_signal+0x3d/0x78 [ 2716.476035] ---[ end trace 827178d8a5966c3d ]--- Reported-by: Vince Weaver Signed-off-by: Jiri Olsa Cc: Corey Ashford Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Namhyung Kim Cc: Paul Mackerras Cc: Arnaldo Carvalho de Melo Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/1373384651-6109-1-git-send-email-jolsa@redhat.com Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- kernel/events/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/events/core.c b/kernel/events/core.c index b391907d5352..01d4fe68e57b 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -7228,7 +7228,7 @@ inherit_task_group(struct perf_event *event, struct task_struct *parent, * child. */ - child_ctx = alloc_perf_context(event->pmu, child); + child_ctx = alloc_perf_context(parent_ctx->pmu, child); if (!child_ctx) return -ENOMEM; From b2412679ab3e923437e2ee109560c151c9b0cedc Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Tue, 9 Jul 2013 17:44:11 +0200 Subject: [PATCH 092/102] perf: Remove WARN_ON_ONCE() check in __perf_event_enable() for valid scenario commit 06f417968beac6e6b614e17b37d347aa6a6b1d30 upstream. The '!ctx->is_active' check has a valid scenario, so there's no need for the warning. The reason is that there's a time window between the 'ctx->is_active' check in the perf_event_enable() function and the __perf_event_enable() function having: - IRQs on - ctx->lock unlocked where the task could be killed and 'ctx' deactivated by perf_event_exit_task(), ending up with the warning below. So remove the WARN_ON_ONCE() check and add comments to explain it all. This addresses the following warning reported by Vince Weaver: [ 324.983534] ------------[ cut here ]------------ [ 324.984420] WARNING: at kernel/events/core.c:1953 __perf_event_enable+0x187/0x190() [ 324.984420] Modules linked in: [ 324.984420] CPU: 19 PID: 2715 Comm: nmi_bug_snb Not tainted 3.10.0+ #246 [ 324.984420] Hardware name: Supermicro X8DTN/X8DTN, BIOS 4.6.3 01/08/2010 [ 324.984420] 0000000000000009 ffff88043fce3ec8 ffffffff8160ea0b ffff88043fce3f00 [ 324.984420] ffffffff81080ff0 ffff8802314fdc00 ffff880231a8f800 ffff88043fcf7860 [ 324.984420] 0000000000000286 ffff880231a8f800 ffff88043fce3f10 ffffffff8108103a [ 324.984420] Call Trace: [ 324.984420] [] dump_stack+0x19/0x1b [ 324.984420] [] warn_slowpath_common+0x70/0xa0 [ 324.984420] [] warn_slowpath_null+0x1a/0x20 [ 324.984420] [] __perf_event_enable+0x187/0x190 [ 324.984420] [] remote_function+0x40/0x50 [ 324.984420] [] generic_smp_call_function_single_interrupt+0xbe/0x130 [ 324.984420] [] smp_call_function_single_interrupt+0x27/0x40 [ 324.984420] [] call_function_single_interrupt+0x6f/0x80 [ 324.984420] [] ? _raw_spin_unlock_irqrestore+0x41/0x70 [ 324.984420] [] perf_event_exit_task+0x14d/0x210 [ 324.984420] [] ? switch_task_namespaces+0x24/0x60 [ 324.984420] [] do_exit+0x2b6/0xa40 [ 324.984420] [] ? _raw_spin_unlock_irq+0x2c/0x30 [ 324.984420] [] do_group_exit+0x49/0xc0 [ 324.984420] [] get_signal_to_deliver+0x254/0x620 [ 324.984420] [] do_signal+0x57/0x5a0 [ 324.984420] [] ? __do_page_fault+0x2a4/0x4e0 [ 324.984420] [] ? retint_restore_args+0xe/0xe [ 324.984420] [] ? retint_signal+0x11/0x84 [ 324.984420] [] do_notify_resume+0x65/0x80 [ 324.984420] [] retint_signal+0x46/0x84 [ 324.984420] ---[ end trace 442ec2f04db3771a ]--- Reported-by: Vince Weaver Signed-off-by: Jiri Olsa Suggested-by: Peter Zijlstra Cc: Corey Ashford Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Namhyung Kim Cc: Paul Mackerras Cc: Arnaldo Carvalho de Melo Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/1373384651-6109-2-git-send-email-jolsa@redhat.com Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- kernel/events/core.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/kernel/events/core.c b/kernel/events/core.c index 01d4fe68e57b..cb8744a1b120 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -1761,7 +1761,16 @@ static int __perf_event_enable(void *info) struct perf_cpu_context *cpuctx = __get_cpu_context(ctx); int err; - if (WARN_ON_ONCE(!ctx->is_active)) + /* + * There's a time window between 'ctx->is_active' check + * in perf_event_enable function and this place having: + * - IRQs on + * - ctx->lock unlocked + * + * where the task could be killed and 'ctx' deactivated + * by perf_event_exit_task. + */ + if (!ctx->is_active) return -EINVAL; raw_spin_lock(&ctx->lock); From 65e303d786e20460c3d67d362f989f59944fb744 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 12 Jul 2013 11:08:33 +0200 Subject: [PATCH 093/102] perf: Fix perf_lock_task_context() vs RCU commit 058ebd0eba3aff16b144eabf4510ed9510e1416e upstream. Jiri managed to trigger this warning: [] ====================================================== [] [ INFO: possible circular locking dependency detected ] [] 3.10.0+ #228 Tainted: G W [] ------------------------------------------------------- [] p/6613 is trying to acquire lock: [] (rcu_node_0){..-...}, at: [] rcu_read_unlock_special+0xa7/0x250 [] [] but task is already holding lock: [] (&ctx->lock){-.-...}, at: [] perf_lock_task_context+0xd9/0x2c0 [] [] which lock already depends on the new lock. [] [] the existing dependency chain (in reverse order) is: [] [] -> #4 (&ctx->lock){-.-...}: [] -> #3 (&rq->lock){-.-.-.}: [] -> #2 (&p->pi_lock){-.-.-.}: [] -> #1 (&rnp->nocb_gp_wq[1]){......}: [] -> #0 (rcu_node_0){..-...}: Paul was quick to explain that due to preemptible RCU we cannot call rcu_read_unlock() while holding scheduler (or nested) locks when part of the read side critical section was preemptible. Therefore solve it by making the entire RCU read side non-preemptible. Also pull out the retry from under the non-preempt to play nice with RT. Reported-by: Jiri Olsa Helped-out-by: Paul E. McKenney Signed-off-by: Peter Zijlstra Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- kernel/events/core.c | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/kernel/events/core.c b/kernel/events/core.c index cb8744a1b120..e76e4959908c 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -761,8 +761,18 @@ perf_lock_task_context(struct task_struct *task, int ctxn, unsigned long *flags) { struct perf_event_context *ctx; - rcu_read_lock(); retry: + /* + * One of the few rules of preemptible RCU is that one cannot do + * rcu_read_unlock() while holding a scheduler (or nested) lock when + * part of the read side critical section was preemptible -- see + * rcu_read_unlock_special(). + * + * Since ctx->lock nests under rq->lock we must ensure the entire read + * side critical section is non-preemptible. + */ + preempt_disable(); + rcu_read_lock(); ctx = rcu_dereference(task->perf_event_ctxp[ctxn]); if (ctx) { /* @@ -778,6 +788,8 @@ perf_lock_task_context(struct task_struct *task, int ctxn, unsigned long *flags) raw_spin_lock_irqsave(&ctx->lock, *flags); if (ctx != rcu_dereference(task->perf_event_ctxp[ctxn])) { raw_spin_unlock_irqrestore(&ctx->lock, *flags); + rcu_read_unlock(); + preempt_enable(); goto retry; } @@ -787,6 +799,7 @@ perf_lock_task_context(struct task_struct *task, int ctxn, unsigned long *flags) } } rcu_read_unlock(); + preempt_enable(); return ctx; } From e6929efa3320454d9a572300a4eb97a576c7d556 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Thu, 27 Jun 2013 10:58:31 -0400 Subject: [PATCH 094/102] tracing: Failed to create system directory commit 6e94a780374ed31b280f939d4757e8d7858dff16 upstream. Running the following: # cd /sys/kernel/debug/tracing # echo p:i do_sys_open > kprobe_events # echo p:j schedule >> kprobe_events # cat kprobe_events p:kprobes/i do_sys_open p:kprobes/j schedule # echo p:i do_sys_open >> kprobe_events # cat kprobe_events p:kprobes/j schedule p:kprobes/i do_sys_open # ls /sys/kernel/debug/tracing/events/kprobes/ enable filter j Notice that the 'i' is missing from the kprobes directory. The console produces: "Failed to create system directory kprobes" This is because kprobes passes in a allocated name for the system and the ftrace event subsystem saves off that name instead of creating a duplicate for it. But the kprobes may free the system name making the pointer to it invalid. This bug was introduced by 92edca073c37 "tracing: Use direct field, type and system names" which switched from using kstrdup() on the system name in favor of just keeping apointer to it, as the internal ftrace event system names are static and exist for the life of the computer being booted. Instead of reverting back to duplicating system names again, we can use core_kernel_data() to determine if the passed in name was allocated or static. Then use the MSB of the ref_count to be a flag to keep track if the name was allocated or not. Then we can still save from having to duplicate strings that will always exist, but still copy the ones that may be freed. Reported-by: "zhangwei(Jovi)" Reported-by: Masami Hiramatsu Tested-by: Masami Hiramatsu Signed-off-by: Steven Rostedt Signed-off-by: Greg Kroah-Hartman --- kernel/trace/trace_events.c | 41 +++++++++++++++++++++++++++++++------ 1 file changed, 35 insertions(+), 6 deletions(-) diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index 27963e2bf4bf..bf56c1d07df3 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -41,6 +41,23 @@ static LIST_HEAD(ftrace_common_fields); static struct kmem_cache *field_cachep; static struct kmem_cache *file_cachep; +#define SYSTEM_FL_FREE_NAME (1 << 31) + +static inline int system_refcount(struct event_subsystem *system) +{ + return system->ref_count & ~SYSTEM_FL_FREE_NAME; +} + +static int system_refcount_inc(struct event_subsystem *system) +{ + return (system->ref_count++) & ~SYSTEM_FL_FREE_NAME; +} + +static int system_refcount_dec(struct event_subsystem *system) +{ + return (--system->ref_count) & ~SYSTEM_FL_FREE_NAME; +} + /* Double loops, do not use break, only goto's work */ #define do_for_each_event_file(tr, file) \ list_for_each_entry(tr, &ftrace_trace_arrays, list) { \ @@ -349,8 +366,8 @@ static void __put_system(struct event_subsystem *system) { struct event_filter *filter = system->filter; - WARN_ON_ONCE(system->ref_count == 0); - if (--system->ref_count) + WARN_ON_ONCE(system_refcount(system) == 0); + if (system_refcount_dec(system)) return; list_del(&system->list); @@ -359,13 +376,15 @@ static void __put_system(struct event_subsystem *system) kfree(filter->filter_string); kfree(filter); } + if (system->ref_count & SYSTEM_FL_FREE_NAME) + kfree(system->name); kfree(system); } static void __get_system(struct event_subsystem *system) { - WARN_ON_ONCE(system->ref_count == 0); - system->ref_count++; + WARN_ON_ONCE(system_refcount(system) == 0); + system_refcount_inc(system); } static void __get_system_dir(struct ftrace_subsystem_dir *dir) @@ -379,7 +398,7 @@ static void __put_system_dir(struct ftrace_subsystem_dir *dir) { WARN_ON_ONCE(dir->ref_count == 0); /* If the subsystem is about to be freed, the dir must be too */ - WARN_ON_ONCE(dir->subsystem->ref_count == 1 && dir->ref_count != 1); + WARN_ON_ONCE(system_refcount(dir->subsystem) == 1 && dir->ref_count != 1); __put_system(dir->subsystem); if (!--dir->ref_count) @@ -1279,7 +1298,15 @@ create_new_subsystem(const char *name) return NULL; system->ref_count = 1; - system->name = name; + + /* Only allocate if dynamic (kprobes and modules) */ + if (!core_kernel_data((unsigned long)name)) { + system->ref_count |= SYSTEM_FL_FREE_NAME; + system->name = kstrdup(name, GFP_KERNEL); + if (!system->name) + goto out_free; + } else + system->name = name; system->filter = NULL; @@ -1292,6 +1319,8 @@ create_new_subsystem(const char *name) return system; out_free: + if (system->ref_count & SYSTEM_FL_FREE_NAME) + kfree(system->name); kfree(system); return NULL; } From 86515381237a7f3c88a4a36e3e4d32d58635b972 Mon Sep 17 00:00:00 2001 From: "zhangwei(Jovi)" Date: Wed, 10 Apr 2013 11:26:23 +0800 Subject: [PATCH 095/102] tracing: Fix irqs-off tag display in syscall tracing commit 11034ae9c20f4057a6127fc965906417978e69b2 upstream. All syscall tracing irqs-off tags are wrong, the syscall enter entry doesn't disable irqs. [root@jovi tracing]#echo "syscalls:sys_enter_open" > set_event [root@jovi tracing]# cat trace # tracer: nop # # entries-in-buffer/entries-written: 13/13 #P:2 # # _-----=> irqs-off # / _----=> need-resched # | / _---=> hardirq/softirq # || / _--=> preempt-depth # ||| / delay # TASK-PID CPU# |||| TIMESTAMP FUNCTION # | | | |||| | | irqbalance-513 [000] d... 56115.496766: sys_open(filename: 804e1a6, flags: 0, mode: 1b6) irqbalance-513 [000] d... 56115.497008: sys_open(filename: 804e1bb, flags: 0, mode: 1b6) sendmail-771 [000] d... 56115.827982: sys_open(filename: b770e6d1, flags: 0, mode: 1b6) The reason is syscall tracing doesn't record irq_flags into buffer. The proper display is: [root@jovi tracing]#echo "syscalls:sys_enter_open" > set_event [root@jovi tracing]# cat trace # tracer: nop # # entries-in-buffer/entries-written: 14/14 #P:2 # # _-----=> irqs-off # / _----=> need-resched # | / _---=> hardirq/softirq # || / _--=> preempt-depth # ||| / delay # TASK-PID CPU# |||| TIMESTAMP FUNCTION # | | | |||| | | irqbalance-514 [001] .... 46.213921: sys_open(filename: 804e1a6, flags: 0, mode: 1b6) irqbalance-514 [001] .... 46.214160: sys_open(filename: 804e1bb, flags: 0, mode: 1b6) <...>-920 [001] .... 47.307260: sys_open(filename: 4e82a0c5, flags: 80000, mode: 0) Link: http://lkml.kernel.org/r/1365564393-10972-3-git-send-email-jovi.zhangwei@huawei.com Signed-off-by: zhangwei(Jovi) Signed-off-by: Steven Rostedt Signed-off-by: Greg Kroah-Hartman --- kernel/trace/trace_syscalls.c | 21 +++++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c index 8f2ac73c7a5f..322e16461072 100644 --- a/kernel/trace/trace_syscalls.c +++ b/kernel/trace/trace_syscalls.c @@ -306,6 +306,8 @@ static void ftrace_syscall_enter(void *data, struct pt_regs *regs, long id) struct syscall_metadata *sys_data; struct ring_buffer_event *event; struct ring_buffer *buffer; + unsigned long irq_flags; + int pc; int syscall_nr; int size; @@ -321,9 +323,12 @@ static void ftrace_syscall_enter(void *data, struct pt_regs *regs, long id) size = sizeof(*entry) + sizeof(unsigned long) * sys_data->nb_args; + local_save_flags(irq_flags); + pc = preempt_count(); + buffer = tr->trace_buffer.buffer; event = trace_buffer_lock_reserve(buffer, - sys_data->enter_event->event.type, size, 0, 0); + sys_data->enter_event->event.type, size, irq_flags, pc); if (!event) return; @@ -333,7 +338,8 @@ static void ftrace_syscall_enter(void *data, struct pt_regs *regs, long id) if (!filter_current_check_discard(buffer, sys_data->enter_event, entry, event)) - trace_current_buffer_unlock_commit(buffer, event, 0, 0); + trace_current_buffer_unlock_commit(buffer, event, + irq_flags, pc); } static void ftrace_syscall_exit(void *data, struct pt_regs *regs, long ret) @@ -343,6 +349,8 @@ static void ftrace_syscall_exit(void *data, struct pt_regs *regs, long ret) struct syscall_metadata *sys_data; struct ring_buffer_event *event; struct ring_buffer *buffer; + unsigned long irq_flags; + int pc; int syscall_nr; syscall_nr = trace_get_syscall_nr(current, regs); @@ -355,9 +363,13 @@ static void ftrace_syscall_exit(void *data, struct pt_regs *regs, long ret) if (!sys_data) return; + local_save_flags(irq_flags); + pc = preempt_count(); + buffer = tr->trace_buffer.buffer; event = trace_buffer_lock_reserve(buffer, - sys_data->exit_event->event.type, sizeof(*entry), 0, 0); + sys_data->exit_event->event.type, sizeof(*entry), + irq_flags, pc); if (!event) return; @@ -367,7 +379,8 @@ static void ftrace_syscall_exit(void *data, struct pt_regs *regs, long ret) if (!filter_current_check_discard(buffer, sys_data->exit_event, entry, event)) - trace_current_buffer_unlock_commit(buffer, event, 0, 0); + trace_current_buffer_unlock_commit(buffer, event, + irq_flags, pc); } static int reg_event_syscall_enter(struct ftrace_event_file *file, From b7f15519edb2e3c3d7d07d6a0780a4386ef23085 Mon Sep 17 00:00:00 2001 From: Alexander Z Lam Date: Mon, 1 Jul 2013 15:31:24 -0700 Subject: [PATCH 096/102] tracing: Make trace_marker use the correct per-instance buffer commit 2d71619c59fac95a5415a326162fa046161b938c upstream. The trace_marker file was present for each new instance created, but it added the trace mark to the global trace buffer instead of to the instance's buffer. Link: http://lkml.kernel.org/r/1372717885-4543-2-git-send-email-azl@google.com Signed-off-by: Alexander Z Lam Cc: David Sharp Cc: Vaibhav Nagarnaik Cc: Alexander Z Lam Signed-off-by: Steven Rostedt Signed-off-by: Greg Kroah-Hartman --- kernel/trace/trace.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index e71a8be4a6ee..b90c993462be 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -4328,6 +4328,7 @@ tracing_mark_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *fpos) { unsigned long addr = (unsigned long)ubuf; + struct trace_array *tr = filp->private_data; struct ring_buffer_event *event; struct ring_buffer *buffer; struct print_entry *entry; @@ -4387,7 +4388,7 @@ tracing_mark_write(struct file *filp, const char __user *ubuf, local_save_flags(irq_flags); size = sizeof(*entry) + cnt + 2; /* possible \n added */ - buffer = global_trace.trace_buffer.buffer; + buffer = tr->trace_buffer.buffer; event = trace_buffer_lock_reserve(buffer, TRACE_PRINT, size, irq_flags, preempt_count()); if (!event) { From 9713f78568d0053621530fb9cf06756394b4403c Mon Sep 17 00:00:00 2001 From: Alexander Z Lam Date: Mon, 1 Jul 2013 19:37:54 -0700 Subject: [PATCH 097/102] tracing: Protect ftrace_trace_arrays list in trace_events.c commit a82274151af2b075163e3c42c828529dee311487 upstream. There are multiple places where the ftrace_trace_arrays list is accessed in trace_events.c without the trace_types_lock held. Link: http://lkml.kernel.org/r/1372732674-22726-1-git-send-email-azl@google.com Signed-off-by: Alexander Z Lam Cc: Vaibhav Nagarnaik Cc: David Sharp Cc: Alexander Z Lam Signed-off-by: Steven Rostedt Signed-off-by: Greg Kroah-Hartman --- kernel/trace/trace.c | 2 +- kernel/trace/trace.h | 2 ++ kernel/trace/trace_events.c | 11 ++++++++++- 3 files changed, 13 insertions(+), 2 deletions(-) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index b90c993462be..10d3f0871b48 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -240,7 +240,7 @@ static struct tracer *trace_types __read_mostly; /* * trace_types_lock is used to protect the trace_types list. */ -static DEFINE_MUTEX(trace_types_lock); +DEFINE_MUTEX(trace_types_lock); /* * serialize the access of the ring buffer diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 20572ed88c5c..7944b9294599 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -224,6 +224,8 @@ enum { extern struct list_head ftrace_trace_arrays; +extern struct mutex trace_types_lock; + /* * The global tracer (top) should be the first trace array added, * but we check the flag anyway. diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index bf56c1d07df3..f82d92dbd614 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -1011,6 +1011,7 @@ static int subsystem_open(struct inode *inode, struct file *filp) int ret; /* Make sure the system still exists */ + mutex_lock(&trace_types_lock); mutex_lock(&event_mutex); list_for_each_entry(tr, &ftrace_trace_arrays, list) { list_for_each_entry(dir, &tr->systems, list) { @@ -1026,6 +1027,7 @@ static int subsystem_open(struct inode *inode, struct file *filp) } exit_loop: mutex_unlock(&event_mutex); + mutex_unlock(&trace_types_lock); if (!system) return -ENODEV; @@ -1620,6 +1622,7 @@ static void __add_event_to_tracers(struct ftrace_event_call *call, int trace_add_event_call(struct ftrace_event_call *call) { int ret; + mutex_lock(&trace_types_lock); mutex_lock(&event_mutex); ret = __register_event(call, NULL); @@ -1627,11 +1630,13 @@ int trace_add_event_call(struct ftrace_event_call *call) __add_event_to_tracers(call, NULL); mutex_unlock(&event_mutex); + mutex_unlock(&trace_types_lock); return ret; } /* - * Must be called under locking both of event_mutex and trace_event_sem. + * Must be called under locking of trace_types_lock, event_mutex and + * trace_event_sem. */ static void __trace_remove_event_call(struct ftrace_event_call *call) { @@ -1643,11 +1648,13 @@ static void __trace_remove_event_call(struct ftrace_event_call *call) /* Remove an event_call */ void trace_remove_event_call(struct ftrace_event_call *call) { + mutex_lock(&trace_types_lock); mutex_lock(&event_mutex); down_write(&trace_event_sem); __trace_remove_event_call(call); up_write(&trace_event_sem); mutex_unlock(&event_mutex); + mutex_unlock(&trace_types_lock); } #define for_each_event(event, start, end) \ @@ -1791,6 +1798,7 @@ static int trace_module_notify(struct notifier_block *self, { struct module *mod = data; + mutex_lock(&trace_types_lock); mutex_lock(&event_mutex); switch (val) { case MODULE_STATE_COMING: @@ -1801,6 +1809,7 @@ static int trace_module_notify(struct notifier_block *self, break; } mutex_unlock(&event_mutex); + mutex_unlock(&trace_types_lock); return 0; } From 59d8f48855856c5e2e112bab78f1b1e6a14c216b Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Mon, 1 Jul 2013 22:50:29 -0400 Subject: [PATCH 098/102] tracing: Add trace_array_get/put() to handle instance refs better commit ff451961a8b2a17667a7bfa39c86fb9b351445db upstream. Commit a695cb58162 "tracing: Prevent deleting instances when they are being read" tried to fix a race between deleting a trace instance and reading contents of a trace file. But it wasn't good enough. The following could crash the kernel: # cd /sys/kernel/debug/tracing/instances # ( while :; do mkdir foo; rmdir foo; done ) & # ( while :; do cat foo/trace &> /dev/null; done ) & Luckily this can only be done by root user, but it should be fixed regardless. The problem is that a delete of the file can happen after the reader starts to open the file but before it grabs the trace_types_mutex. The solution is to validate the trace array before using it. If the trace array does not exist in the list of trace arrays, then it returns -ENODEV. There's a possibility that a trace_array could be deleted and a new one created and the open would open its file instead. But that is very minor as it will just return the data of the new trace array, it may confuse the user but it will not crash the system. As this can only be done by root anyway, the race will only occur if root is deleting what its trying to read at the same time. Reported-by: Alexander Lam Signed-off-by: Steven Rostedt Signed-off-by: Greg Kroah-Hartman --- kernel/trace/trace.c | 83 ++++++++++++++++++++++++++++++++++---------- 1 file changed, 65 insertions(+), 18 deletions(-) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 10d3f0871b48..9d076a1ffa0b 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -193,6 +193,37 @@ static struct trace_array global_trace; LIST_HEAD(ftrace_trace_arrays); +int trace_array_get(struct trace_array *this_tr) +{ + struct trace_array *tr; + int ret = -ENODEV; + + mutex_lock(&trace_types_lock); + list_for_each_entry(tr, &ftrace_trace_arrays, list) { + if (tr == this_tr) { + tr->ref++; + ret = 0; + break; + } + } + mutex_unlock(&trace_types_lock); + + return ret; +} + +static void __trace_array_put(struct trace_array *this_tr) +{ + WARN_ON(!this_tr->ref); + this_tr->ref--; +} + +void trace_array_put(struct trace_array *this_tr) +{ + mutex_lock(&trace_types_lock); + __trace_array_put(this_tr); + mutex_unlock(&trace_types_lock); +} + int filter_current_check_discard(struct ring_buffer *buffer, struct ftrace_event_call *call, void *rec, struct ring_buffer_event *event) @@ -2768,10 +2799,9 @@ static const struct seq_operations tracer_seq_ops = { }; static struct trace_iterator * -__tracing_open(struct inode *inode, struct file *file, bool snapshot) +__tracing_open(struct trace_array *tr, struct trace_cpu *tc, + struct inode *inode, struct file *file, bool snapshot) { - struct trace_cpu *tc = inode->i_private; - struct trace_array *tr = tc->tr; struct trace_iterator *iter; int cpu; @@ -2850,8 +2880,6 @@ __tracing_open(struct inode *inode, struct file *file, bool snapshot) tracing_iter_reset(iter, cpu); } - tr->ref++; - mutex_unlock(&trace_types_lock); return iter; @@ -2881,17 +2909,20 @@ static int tracing_release(struct inode *inode, struct file *file) struct trace_array *tr; int cpu; - if (!(file->f_mode & FMODE_READ)) + /* Writes do not use seq_file, need to grab tr from inode */ + if (!(file->f_mode & FMODE_READ)) { + struct trace_cpu *tc = inode->i_private; + + trace_array_put(tc->tr); return 0; + } iter = m->private; tr = iter->tr; + trace_array_put(tr); mutex_lock(&trace_types_lock); - WARN_ON(!tr->ref); - tr->ref--; - for_each_tracing_cpu(cpu) { if (iter->buffer_iter[cpu]) ring_buffer_read_finish(iter->buffer_iter[cpu]); @@ -2910,20 +2941,23 @@ static int tracing_release(struct inode *inode, struct file *file) kfree(iter->trace); kfree(iter->buffer_iter); seq_release_private(inode, file); + return 0; } static int tracing_open(struct inode *inode, struct file *file) { + struct trace_cpu *tc = inode->i_private; + struct trace_array *tr = tc->tr; struct trace_iterator *iter; int ret = 0; + if (trace_array_get(tr) < 0) + return -ENODEV; + /* If this file was open for write, then erase contents */ if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) { - struct trace_cpu *tc = inode->i_private; - struct trace_array *tr = tc->tr; - if (tc->cpu == RING_BUFFER_ALL_CPUS) tracing_reset_online_cpus(&tr->trace_buffer); else @@ -2931,12 +2965,16 @@ static int tracing_open(struct inode *inode, struct file *file) } if (file->f_mode & FMODE_READ) { - iter = __tracing_open(inode, file, false); + iter = __tracing_open(tr, tc, inode, file, false); if (IS_ERR(iter)) ret = PTR_ERR(iter); else if (trace_flags & TRACE_ITER_LATENCY_FMT) iter->iter_flags |= TRACE_FILE_LAT_FMT; } + + if (ret < 0) + trace_array_put(tr); + return ret; } @@ -4512,12 +4550,16 @@ struct ftrace_buffer_info { static int tracing_snapshot_open(struct inode *inode, struct file *file) { struct trace_cpu *tc = inode->i_private; + struct trace_array *tr = tc->tr; struct trace_iterator *iter; struct seq_file *m; int ret = 0; + if (trace_array_get(tr) < 0) + return -ENODEV; + if (file->f_mode & FMODE_READ) { - iter = __tracing_open(inode, file, true); + iter = __tracing_open(tr, tc, inode, file, true); if (IS_ERR(iter)) ret = PTR_ERR(iter); } else { @@ -4530,13 +4572,16 @@ static int tracing_snapshot_open(struct inode *inode, struct file *file) kfree(m); return -ENOMEM; } - iter->tr = tc->tr; + iter->tr = tr; iter->trace_buffer = &tc->tr->max_buffer; iter->cpu_file = tc->cpu; m->private = iter; file->private_data = m; } + if (ret < 0) + trace_array_put(tr); + return ret; } @@ -4617,9 +4662,12 @@ tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt, static int tracing_snapshot_release(struct inode *inode, struct file *file) { struct seq_file *m = file->private_data; + int ret; + + ret = tracing_release(inode, file); if (file->f_mode & FMODE_READ) - return tracing_release(inode, file); + return ret; /* If write only, the seq_file is just a stub */ if (m) @@ -4864,8 +4912,7 @@ static int tracing_buffers_release(struct inode *inode, struct file *file) mutex_lock(&trace_types_lock); - WARN_ON(!iter->tr->ref); - iter->tr->ref--; + __trace_array_put(iter->tr); if (info->spare) ring_buffer_free_read_page(iter->trace_buffer->buffer, info->spare); From 6492334c86dfb441af456337dc3217c2a430f141 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Mon, 1 Jul 2013 23:34:22 -0400 Subject: [PATCH 099/102] tracing: Get trace_array ref counts when accessing trace files commit 7b85af63034818e43aee6c1d7bf1c7c6796a9073 upstream. When a trace file is opened that may access a trace array, it must increment its ref count to prevent it from being deleted. Reported-by: Alexander Lam Signed-off-by: Steven Rostedt Signed-off-by: Greg Kroah-Hartman --- kernel/trace/trace.c | 121 +++++++++++++++++++++++++++++++++++++++---- 1 file changed, 112 insertions(+), 9 deletions(-) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 9d076a1ffa0b..0b936d806659 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -2902,6 +2902,43 @@ int tracing_open_generic(struct inode *inode, struct file *filp) return 0; } +/* + * Open and update trace_array ref count. + * Must have the current trace_array passed to it. + */ +int tracing_open_generic_tr(struct inode *inode, struct file *filp) +{ + struct trace_array *tr = inode->i_private; + + if (tracing_disabled) + return -ENODEV; + + if (trace_array_get(tr) < 0) + return -ENODEV; + + filp->private_data = inode->i_private; + + return 0; + +} + +int tracing_open_generic_tc(struct inode *inode, struct file *filp) +{ + struct trace_cpu *tc = inode->i_private; + struct trace_array *tr = tc->tr; + + if (tracing_disabled) + return -ENODEV; + + if (trace_array_get(tr) < 0) + return -ENODEV; + + filp->private_data = inode->i_private; + + return 0; + +} + static int tracing_release(struct inode *inode, struct file *file) { struct seq_file *m = file->private_data; @@ -2945,6 +2982,32 @@ static int tracing_release(struct inode *inode, struct file *file) return 0; } +static int tracing_release_generic_tr(struct inode *inode, struct file *file) +{ + struct trace_array *tr = inode->i_private; + + trace_array_put(tr); + return 0; +} + +static int tracing_release_generic_tc(struct inode *inode, struct file *file) +{ + struct trace_cpu *tc = inode->i_private; + struct trace_array *tr = tc->tr; + + trace_array_put(tr); + return 0; +} + +static int tracing_single_release_tr(struct inode *inode, struct file *file) +{ + struct trace_array *tr = inode->i_private; + + trace_array_put(tr); + + return single_release(inode, file); +} + static int tracing_open(struct inode *inode, struct file *file) { struct trace_cpu *tc = inode->i_private; @@ -3331,9 +3394,14 @@ tracing_trace_options_write(struct file *filp, const char __user *ubuf, static int tracing_trace_options_open(struct inode *inode, struct file *file) { + struct trace_array *tr = inode->i_private; + if (tracing_disabled) return -ENODEV; + if (trace_array_get(tr) < 0) + return -ENODEV; + return single_open(file, tracing_trace_options_show, inode->i_private); } @@ -3341,7 +3409,7 @@ static const struct file_operations tracing_iter_fops = { .open = tracing_trace_options_open, .read = seq_read, .llseek = seq_lseek, - .release = single_release, + .release = tracing_single_release_tr, .write = tracing_trace_options_write, }; @@ -3829,6 +3897,9 @@ static int tracing_open_pipe(struct inode *inode, struct file *filp) if (tracing_disabled) return -ENODEV; + if (trace_array_get(tr) < 0) + return -ENODEV; + mutex_lock(&trace_types_lock); /* create a buffer to store the information to pass to userspace */ @@ -3881,6 +3952,7 @@ static int tracing_open_pipe(struct inode *inode, struct file *filp) fail: kfree(iter->trace); kfree(iter); + __trace_array_put(tr); mutex_unlock(&trace_types_lock); return ret; } @@ -3888,6 +3960,8 @@ static int tracing_open_pipe(struct inode *inode, struct file *filp) static int tracing_release_pipe(struct inode *inode, struct file *file) { struct trace_iterator *iter = file->private_data; + struct trace_cpu *tc = inode->i_private; + struct trace_array *tr = tc->tr; mutex_lock(&trace_types_lock); @@ -3901,6 +3975,8 @@ static int tracing_release_pipe(struct inode *inode, struct file *file) kfree(iter->trace); kfree(iter); + trace_array_put(tr); + return 0; } @@ -4358,6 +4434,8 @@ tracing_free_buffer_release(struct inode *inode, struct file *filp) /* resize the ring buffer to 0 */ tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS); + trace_array_put(tr); + return 0; } @@ -4534,10 +4612,20 @@ static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf, static int tracing_clock_open(struct inode *inode, struct file *file) { + struct trace_array *tr = inode->i_private; + int ret; + if (tracing_disabled) return -ENODEV; - return single_open(file, tracing_clock_show, inode->i_private); + if (trace_array_get(tr)) + return -ENODEV; + + ret = single_open(file, tracing_clock_show, inode->i_private); + if (ret < 0) + trace_array_put(tr); + + return ret; } struct ftrace_buffer_info { @@ -4733,34 +4821,38 @@ static const struct file_operations tracing_pipe_fops = { }; static const struct file_operations tracing_entries_fops = { - .open = tracing_open_generic, + .open = tracing_open_generic_tc, .read = tracing_entries_read, .write = tracing_entries_write, .llseek = generic_file_llseek, + .release = tracing_release_generic_tc, }; static const struct file_operations tracing_total_entries_fops = { - .open = tracing_open_generic, + .open = tracing_open_generic_tr, .read = tracing_total_entries_read, .llseek = generic_file_llseek, + .release = tracing_release_generic_tr, }; static const struct file_operations tracing_free_buffer_fops = { + .open = tracing_open_generic_tr, .write = tracing_free_buffer_write, .release = tracing_free_buffer_release, }; static const struct file_operations tracing_mark_fops = { - .open = tracing_open_generic, + .open = tracing_open_generic_tr, .write = tracing_mark_write, .llseek = generic_file_llseek, + .release = tracing_release_generic_tr, }; static const struct file_operations trace_clock_fops = { .open = tracing_clock_open, .read = seq_read, .llseek = seq_lseek, - .release = single_release, + .release = tracing_single_release_tr, .write = tracing_clock_write, }; @@ -4788,13 +4880,19 @@ static int tracing_buffers_open(struct inode *inode, struct file *filp) struct trace_cpu *tc = inode->i_private; struct trace_array *tr = tc->tr; struct ftrace_buffer_info *info; + int ret; if (tracing_disabled) return -ENODEV; + if (trace_array_get(tr) < 0) + return -ENODEV; + info = kzalloc(sizeof(*info), GFP_KERNEL); - if (!info) + if (!info) { + trace_array_put(tr); return -ENOMEM; + } mutex_lock(&trace_types_lock); @@ -4812,7 +4910,11 @@ static int tracing_buffers_open(struct inode *inode, struct file *filp) mutex_unlock(&trace_types_lock); - return nonseekable_open(inode, filp); + ret = nonseekable_open(inode, filp); + if (ret < 0) + trace_array_put(tr); + + return ret; } static unsigned int @@ -5707,9 +5809,10 @@ rb_simple_write(struct file *filp, const char __user *ubuf, } static const struct file_operations rb_simple_fops = { - .open = tracing_open_generic, + .open = tracing_open_generic_tr, .read = rb_simple_read, .write = rb_simple_write, + .release = tracing_release_generic_tr, .llseek = default_llseek, }; From 68cebd265c91873277cf100e7ac1d047c6598ddf Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Tue, 2 Jul 2013 14:48:23 -0400 Subject: [PATCH 100/102] tracing: Fix race between deleting buffer and setting events commit 2a6c24afab70dbcfee49f4c76e1511eec1a3298b upstream. While analyzing the code, I discovered that there's a potential race between deleting a trace instance and setting events. There are a few races that can occur if events are being traced as the buffer is being deleted. Mostly the problem comes with freeing the descriptor used by the trace event callback. To prevent problems like this, the events are disabled before the buffer is deleted. The problem with the current solution is that the event_mutex is let go between disabling the events and freeing the files, which means that the events could be enabled again while the freeing takes place. Signed-off-by: Steven Rostedt Signed-off-by: Greg Kroah-Hartman --- kernel/trace/trace_events.c | 23 +++++++++++++++++------ 1 file changed, 17 insertions(+), 6 deletions(-) diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index f82d92dbd614..32b9895af239 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -415,14 +415,14 @@ static void put_system(struct ftrace_subsystem_dir *dir) /* * __ftrace_set_clr_event(NULL, NULL, NULL, set) will set/unset all events. */ -static int __ftrace_set_clr_event(struct trace_array *tr, const char *match, - const char *sub, const char *event, int set) +static int +__ftrace_set_clr_event_nolock(struct trace_array *tr, const char *match, + const char *sub, const char *event, int set) { struct ftrace_event_file *file; struct ftrace_event_call *call; int ret = -EINVAL; - mutex_lock(&event_mutex); list_for_each_entry(file, &tr->events, list) { call = file->event_call; @@ -448,6 +448,17 @@ static int __ftrace_set_clr_event(struct trace_array *tr, const char *match, ret = 0; } + + return ret; +} + +static int __ftrace_set_clr_event(struct trace_array *tr, const char *match, + const char *sub, const char *event, int set) +{ + int ret; + + mutex_lock(&event_mutex); + ret = __ftrace_set_clr_event_nolock(tr, match, sub, event, set); mutex_unlock(&event_mutex); return ret; @@ -2367,11 +2378,11 @@ early_event_add_tracer(struct dentry *parent, struct trace_array *tr) int event_trace_del_tracer(struct trace_array *tr) { - /* Disable any running events */ - __ftrace_set_clr_event(tr, NULL, NULL, NULL, 0); - mutex_lock(&event_mutex); + /* Disable any running events */ + __ftrace_set_clr_event_nolock(tr, NULL, NULL, NULL, 0); + down_write(&trace_event_sem); __trace_remove_event_dirs(tr); debugfs_remove_recursive(tr->event_dir); From fc82a11a9ce5ddc1cc49ac7bb2a099b9b18b85c0 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Tue, 2 Jul 2013 15:30:53 -0400 Subject: [PATCH 101/102] tracing: Add trace_array_get/put() to event handling commit 8e2e2fa47129532a30cff6c25a47078dc97d9260 upstream. Commit a695cb58162 "tracing: Prevent deleting instances when they are being read" tried to fix a race between deleting a trace instance and reading contents of a trace file. But it wasn't good enough. The following could crash the kernel: # cd /sys/kernel/debug/tracing/instances # ( while :; do mkdir foo; rmdir foo; done ) & # ( while :; do echo 1 > foo/events/sched/sched_switch 2> /dev/null; done ) & Luckily this can only be done by root user, but it should be fixed regardless. The problem is that a delete of the file can happen after the write to the event is opened, but before the enabling happens. The solution is to make sure the trace_array is available before succeeding in opening for write, and incerment the ref counter while opened. Now the instance can be deleted when the events are writing to the buffer, but the deletion of the instance will disable all events before the instance is actually deleted. Reported-by: Alexander Lam Signed-off-by: Steven Rostedt Signed-off-by: Greg Kroah-Hartman --- kernel/trace/trace.h | 3 ++ kernel/trace/trace_events.c | 57 +++++++++++++++++++++++++++++++++---- 2 files changed, 55 insertions(+), 5 deletions(-) diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 7944b9294599..51b44483eb78 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -226,6 +226,9 @@ extern struct list_head ftrace_trace_arrays; extern struct mutex trace_types_lock; +extern int trace_array_get(struct trace_array *tr); +extern void trace_array_put(struct trace_array *tr); + /* * The global tracer (top) should be the first trace array added, * but we check the flag anyway. diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index 32b9895af239..6dfd48b5d1c0 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -412,6 +412,35 @@ static void put_system(struct ftrace_subsystem_dir *dir) mutex_unlock(&event_mutex); } +/* + * Open and update trace_array ref count. + * Must have the current trace_array passed to it. + */ +static int tracing_open_generic_file(struct inode *inode, struct file *filp) +{ + struct ftrace_event_file *file = inode->i_private; + struct trace_array *tr = file->tr; + int ret; + + if (trace_array_get(tr) < 0) + return -ENODEV; + + ret = tracing_open_generic(inode, filp); + if (ret < 0) + trace_array_put(tr); + return ret; +} + +static int tracing_release_generic_file(struct inode *inode, struct file *filp) +{ + struct ftrace_event_file *file = inode->i_private; + struct trace_array *tr = file->tr; + + trace_array_put(tr); + + return 0; +} + /* * __ftrace_set_clr_event(NULL, NULL, NULL, set) will set/unset all events. */ @@ -1046,9 +1075,17 @@ static int subsystem_open(struct inode *inode, struct file *filp) /* Some versions of gcc think dir can be uninitialized here */ WARN_ON(!dir); - ret = tracing_open_generic(inode, filp); - if (ret < 0) + /* Still need to increment the ref count of the system */ + if (trace_array_get(tr) < 0) { put_system(dir); + return -ENODEV; + } + + ret = tracing_open_generic(inode, filp); + if (ret < 0) { + trace_array_put(tr); + put_system(dir); + } return ret; } @@ -1059,16 +1096,23 @@ static int system_tr_open(struct inode *inode, struct file *filp) struct trace_array *tr = inode->i_private; int ret; + if (trace_array_get(tr) < 0) + return -ENODEV; + /* Make a temporary dir that has no system but points to tr */ dir = kzalloc(sizeof(*dir), GFP_KERNEL); - if (!dir) + if (!dir) { + trace_array_put(tr); return -ENOMEM; + } dir->tr = tr; ret = tracing_open_generic(inode, filp); - if (ret < 0) + if (ret < 0) { + trace_array_put(tr); kfree(dir); + } filp->private_data = dir; @@ -1079,6 +1123,8 @@ static int subsystem_release(struct inode *inode, struct file *file) { struct ftrace_subsystem_dir *dir = file->private_data; + trace_array_put(dir->tr); + /* * If dir->subsystem is NULL, then this is a temporary * descriptor that was made for a trace_array to enable @@ -1206,9 +1252,10 @@ static const struct file_operations ftrace_set_event_fops = { }; static const struct file_operations ftrace_enable_fops = { - .open = tracing_open_generic, + .open = tracing_open_generic_file, .read = event_enable_read, .write = event_enable_write, + .release = tracing_release_generic_file, .llseek = default_llseek, }; From 81a464838675307c9fc7909fc840f248ed2caed5 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Thu, 25 Jul 2013 15:16:45 -0700 Subject: [PATCH 102/102] Linux 3.10.3 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 43367309fc78..b5485529b6bf 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,6 @@ VERSION = 3 PATCHLEVEL = 10 -SUBLEVEL = 2 +SUBLEVEL = 3 EXTRAVERSION = NAME = Unicycling Gorilla