From ce1c89d450af950db7d381df8ba8025736a44c8b Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Mon, 21 Jul 2014 14:21:18 -0300 Subject: [PATCH 01/74] media: xc5000: Fix get_frequency() commit a3eec916cbc17dc1aaa3ddf120836cd5200eb4ef upstream. The programmed frequency on xc5000 is not the middle frequency, but the initial frequency on the bandwidth range. However, the DVB API works with the middle frequency. Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Greg Kroah-Hartman --- drivers/media/tuners/xc5000.c | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/drivers/media/tuners/xc5000.c b/drivers/media/tuners/xc5000.c index 5cd09a681b6a..b2d9e9cb97f7 100644 --- a/drivers/media/tuners/xc5000.c +++ b/drivers/media/tuners/xc5000.c @@ -55,7 +55,7 @@ struct xc5000_priv { u32 if_khz; u16 xtal_khz; - u32 freq_hz; + u32 freq_hz, freq_offset; u32 bandwidth; u8 video_standard; u8 rf_mode; @@ -755,13 +755,13 @@ static int xc5000_set_params(struct dvb_frontend *fe) case SYS_ATSC: dprintk(1, "%s() VSB modulation\n", __func__); priv->rf_mode = XC_RF_MODE_AIR; - priv->freq_hz = freq - 1750000; + priv->freq_offset = 1750000; priv->video_standard = DTV6; break; case SYS_DVBC_ANNEX_B: dprintk(1, "%s() QAM modulation\n", __func__); priv->rf_mode = XC_RF_MODE_CABLE; - priv->freq_hz = freq - 1750000; + priv->freq_offset = 1750000; priv->video_standard = DTV6; break; case SYS_ISDBT: @@ -776,15 +776,15 @@ static int xc5000_set_params(struct dvb_frontend *fe) switch (bw) { case 6000000: priv->video_standard = DTV6; - priv->freq_hz = freq - 1750000; + priv->freq_offset = 1750000; break; case 7000000: priv->video_standard = DTV7; - priv->freq_hz = freq - 2250000; + priv->freq_offset = 2250000; break; case 8000000: priv->video_standard = DTV8; - priv->freq_hz = freq - 2750000; + priv->freq_offset = 2750000; break; default: printk(KERN_ERR "xc5000 bandwidth not set!\n"); @@ -798,15 +798,15 @@ static int xc5000_set_params(struct dvb_frontend *fe) priv->rf_mode = XC_RF_MODE_CABLE; if (bw <= 6000000) { priv->video_standard = DTV6; - priv->freq_hz = freq - 1750000; + priv->freq_offset = 1750000; b = 6; } else if (bw <= 7000000) { priv->video_standard = DTV7; - priv->freq_hz = freq - 2250000; + priv->freq_offset = 2250000; b = 7; } else { priv->video_standard = DTV7_8; - priv->freq_hz = freq - 2750000; + priv->freq_offset = 2750000; b = 8; } dprintk(1, "%s() Bandwidth %dMHz (%d)\n", __func__, @@ -817,6 +817,8 @@ static int xc5000_set_params(struct dvb_frontend *fe) return -EINVAL; } + priv->freq_hz = freq - priv->freq_offset; + dprintk(1, "%s() frequency=%d (compensated to %d)\n", __func__, freq, priv->freq_hz); @@ -1067,7 +1069,7 @@ static int xc5000_get_frequency(struct dvb_frontend *fe, u32 *freq) { struct xc5000_priv *priv = fe->tuner_priv; dprintk(1, "%s()\n", __func__); - *freq = priv->freq_hz; + *freq = priv->freq_hz + priv->freq_offset; return 0; } From d2b8c8c9e5ea79818a1496f8838ea1c7020ec34f Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Mon, 21 Jul 2014 13:28:15 -0300 Subject: [PATCH 02/74] media: xc4000: Fix get_frequency() commit 4c07e32884ab69574cfd9eb4de3334233c938071 upstream. The programmed frequency on xc4000 is not the middle frequency, but the initial frequency on the bandwidth range. However, the DVB API works with the middle frequency. This works fine on set_frontend, as the device calculates the needed offset. However, at get_frequency(), the returned value is the initial frequency. That's generally not a big problem on most drivers, however, starting with changeset 6fe1099c7aec, the frequency drift is taken into account at dib7000p driver. This broke support for PCTV 340e, with uses dib7000p demod and xc4000 tuner. Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Greg Kroah-Hartman --- drivers/media/tuners/xc4000.c | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/drivers/media/tuners/xc4000.c b/drivers/media/tuners/xc4000.c index 2018befabb5a..e71decbfd0af 100644 --- a/drivers/media/tuners/xc4000.c +++ b/drivers/media/tuners/xc4000.c @@ -93,7 +93,7 @@ struct xc4000_priv { struct firmware_description *firm; int firm_size; u32 if_khz; - u32 freq_hz; + u32 freq_hz, freq_offset; u32 bandwidth; u8 video_standard; u8 rf_mode; @@ -1157,14 +1157,14 @@ static int xc4000_set_params(struct dvb_frontend *fe) case SYS_ATSC: dprintk(1, "%s() VSB modulation\n", __func__); priv->rf_mode = XC_RF_MODE_AIR; - priv->freq_hz = c->frequency - 1750000; + priv->freq_offset = 1750000; priv->video_standard = XC4000_DTV6; type = DTV6; break; case SYS_DVBC_ANNEX_B: dprintk(1, "%s() QAM modulation\n", __func__); priv->rf_mode = XC_RF_MODE_CABLE; - priv->freq_hz = c->frequency - 1750000; + priv->freq_offset = 1750000; priv->video_standard = XC4000_DTV6; type = DTV6; break; @@ -1173,23 +1173,23 @@ static int xc4000_set_params(struct dvb_frontend *fe) dprintk(1, "%s() OFDM\n", __func__); if (bw == 0) { if (c->frequency < 400000000) { - priv->freq_hz = c->frequency - 2250000; + priv->freq_offset = 2250000; } else { - priv->freq_hz = c->frequency - 2750000; + priv->freq_offset = 2750000; } priv->video_standard = XC4000_DTV7_8; type = DTV78; } else if (bw <= 6000000) { priv->video_standard = XC4000_DTV6; - priv->freq_hz = c->frequency - 1750000; + priv->freq_offset = 1750000; type = DTV6; } else if (bw <= 7000000) { priv->video_standard = XC4000_DTV7; - priv->freq_hz = c->frequency - 2250000; + priv->freq_offset = 2250000; type = DTV7; } else { priv->video_standard = XC4000_DTV8; - priv->freq_hz = c->frequency - 2750000; + priv->freq_offset = 2750000; type = DTV8; } priv->rf_mode = XC_RF_MODE_AIR; @@ -1200,6 +1200,8 @@ static int xc4000_set_params(struct dvb_frontend *fe) goto fail; } + priv->freq_hz = c->frequency - priv->freq_offset; + dprintk(1, "%s() frequency=%d (compensated)\n", __func__, priv->freq_hz); @@ -1520,7 +1522,7 @@ static int xc4000_get_frequency(struct dvb_frontend *fe, u32 *freq) { struct xc4000_priv *priv = fe->tuner_priv; - *freq = priv->freq_hz; + *freq = priv->freq_hz + priv->freq_offset; if (debug) { mutex_lock(&priv->lock); From f3e8f2718d3fe6bf8dd845ce09e581d5e9772158 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Sun, 8 Jun 2014 13:54:57 -0300 Subject: [PATCH 03/74] media: au0828: Only alt setting logic when needed commit 64ea37bbd8a5815522706f0099ad3f11c7537e15 upstream. It seems that there's a bug at au0828 hardware/firmware related to alternate setting: when the device is already at alt 5, a further call causes the URBs to receive -ESHUTDOWN. I found two different encarnations of this issue: 1) at qv4l2, it fails the second time we try to open the video screen; 2) at xawtv, when audio underrun occurs, with is very frequent, at least on my test machine. The fix is simple: just check if alt=5 before calling set_usb_interface(). Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Greg Kroah-Hartman --- drivers/media/usb/au0828/au0828-video.c | 34 ++++++++++++------------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/drivers/media/usb/au0828/au0828-video.c b/drivers/media/usb/au0828/au0828-video.c index 75ac9947cdac..98e1b937b500 100644 --- a/drivers/media/usb/au0828/au0828-video.c +++ b/drivers/media/usb/au0828/au0828-video.c @@ -788,11 +788,27 @@ static int au0828_i2s_init(struct au0828_dev *dev) /* * Auvitek au0828 analog stream enable - * Please set interface0 to AS5 before enable the stream */ static int au0828_analog_stream_enable(struct au0828_dev *d) { + struct usb_interface *iface; + int ret; + dprintk(1, "au0828_analog_stream_enable called\n"); + + iface = usb_ifnum_to_if(d->usbdev, 0); + if (iface && iface->cur_altsetting->desc.bAlternateSetting != 5) { + dprintk(1, "Changing intf#0 to alt 5\n"); + /* set au0828 interface0 to AS5 here again */ + ret = usb_set_interface(d->usbdev, 0, 5); + if (ret < 0) { + printk(KERN_INFO "Au0828 can't set alt setting to 5!\n"); + return -EBUSY; + } + } + + /* FIXME: size should be calculated using d->width, d->height */ + au0828_writereg(d, AU0828_SENSORCTRL_VBI_103, 0x00); au0828_writereg(d, 0x106, 0x00); /* set x position */ @@ -1003,15 +1019,6 @@ static int au0828_v4l2_open(struct file *filp) return -ERESTARTSYS; } if (dev->users == 0) { - /* set au0828 interface0 to AS5 here again */ - ret = usb_set_interface(dev->usbdev, 0, 5); - if (ret < 0) { - mutex_unlock(&dev->lock); - printk(KERN_INFO "Au0828 can't set alternate to 5!\n"); - kfree(fh); - return -EBUSY; - } - au0828_analog_stream_enable(dev); au0828_analog_stream_reset(dev); @@ -1253,13 +1260,6 @@ static int au0828_set_format(struct au0828_dev *dev, unsigned int cmd, } } - /* set au0828 interface0 to AS5 here again */ - ret = usb_set_interface(dev->usbdev, 0, 5); - if (ret < 0) { - printk(KERN_INFO "Au0828 can't set alt setting to 5!\n"); - return -EBUSY; - } - au0828_analog_stream_enable(dev); return 0; From 01d29ff71d3b123c443f8426d5540462e9d04b19 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Salva=20Peir=C3=B3?= Date: Sat, 7 Jun 2014 11:41:44 -0300 Subject: [PATCH 04/74] media: media-device: Remove duplicated memset() in media_enum_entities() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit f8ca6ac00d2ba24c5557f08f81439cd3432f0802 upstream. After the zeroing the whole struct struct media_entity_desc u_ent, it is no longer necessary to memset(0) its u_ent.name field. Signed-off-by: Salva Peiró Signed-off-by: Laurent Pinchart Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Greg Kroah-Hartman --- drivers/media/media-device.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/media/media-device.c b/drivers/media/media-device.c index 79715f9feb0a..fdb5840f034b 100644 --- a/drivers/media/media-device.c +++ b/drivers/media/media-device.c @@ -106,8 +106,6 @@ static long media_device_enum_entities(struct media_device *mdev, if (ent->name) { strncpy(u_ent.name, ent->name, sizeof(u_ent.name)); u_ent.name[sizeof(u_ent.name) - 1] = '\0'; - } else { - memset(u_ent.name, 0, sizeof(u_ent.name)); } u_ent.type = ent->type; u_ent.revision = ent->revision; From b15dba9397aae1e091b047c9e3214bfbd1d17e04 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Tue, 5 Aug 2014 17:50:15 +0200 Subject: [PATCH 05/74] iommu/amd: Fix cleanup_domain for mass device removal commit 9b29d3c6510407d91786c1cf9183ff4debb3473a upstream. When multiple devices are detached in __detach_device, they are also removed from the domains dev_list. This makes it unsafe to use list_for_each_entry_safe, as the next pointer might also not be in the list anymore after __detach_device returns. So just repeatedly remove the first element of the list until it is empty. Tested-by: Marti Raudsepp Signed-off-by: Joerg Roedel Signed-off-by: Greg Kroah-Hartman --- drivers/iommu/amd_iommu.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c index 6f849cbcac6f..dfb401cba733 100644 --- a/drivers/iommu/amd_iommu.c +++ b/drivers/iommu/amd_iommu.c @@ -3187,14 +3187,16 @@ int __init amd_iommu_init_dma_ops(void) static void cleanup_domain(struct protection_domain *domain) { - struct iommu_dev_data *dev_data, *next; + struct iommu_dev_data *entry; unsigned long flags; write_lock_irqsave(&amd_iommu_devtable_lock, flags); - list_for_each_entry_safe(dev_data, next, &domain->dev_list, list) { - __detach_device(dev_data); - atomic_set(&dev_data->bind, 0); + while (!list_empty(&domain->dev_list)) { + entry = list_first_entry(&domain->dev_list, + struct iommu_dev_data, list); + __detach_device(entry); + atomic_set(&entry->bind, 0); } write_unlock_irqrestore(&amd_iommu_devtable_lock, flags); From bd09037436bbd28f75e21bc7e630725f4902c084 Mon Sep 17 00:00:00 2001 From: Thomas Petazzoni Date: Sun, 27 Jul 2014 23:53:19 +0200 Subject: [PATCH 06/74] spi: orion: fix incorrect handling of cell-index DT property commit e06871cd2c92e5c65d7ca1d32866b4ca5dd4ac30 upstream. In commit f814f9ac5a81 ("spi/orion: add device tree binding"), Device Tree support was added to the spi-orion driver. However, this commit reads the "cell-index" property, without taking into account the fact that DT properties are big-endian encoded. Since most of the platforms using spi-orion with DT have apparently not used anything but cell-index = <0>, the problem was not visible. But as soon as one starts using cell-index = <1>, the problem becomes clearly visible, as the master->bus_num gets a wrong value (actually it gets the value 0, which conflicts with the first bus that has cell-index = <0>). This commit fixes that by using of_property_read_u32() to read the property value, which does the appropriate endianness conversion when needed. Fixes: f814f9ac5a81 ("spi/orion: add device tree binding") Signed-off-by: Thomas Petazzoni Acked-by: Sebastian Hesselbarth Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- drivers/spi/spi-orion.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/drivers/spi/spi-orion.c b/drivers/spi/spi-orion.c index 66a5f82cf138..183aa80c9017 100644 --- a/drivers/spi/spi-orion.c +++ b/drivers/spi/spi-orion.c @@ -403,8 +403,6 @@ static int orion_spi_probe(struct platform_device *pdev) struct resource *r; unsigned long tclk_hz; int status = 0; - const u32 *iprop; - int size; master = spi_alloc_master(&pdev->dev, sizeof *spi); if (master == NULL) { @@ -415,10 +413,10 @@ static int orion_spi_probe(struct platform_device *pdev) if (pdev->id != -1) master->bus_num = pdev->id; if (pdev->dev.of_node) { - iprop = of_get_property(pdev->dev.of_node, "cell-index", - &size); - if (iprop && size == sizeof(*iprop)) - master->bus_num = *iprop; + u32 cell_index; + if (!of_property_read_u32(pdev->dev.of_node, "cell-index", + &cell_index)) + master->bus_num = cell_index; } /* we support only mode 0, and no options */ From abcc94f8395a956d8aa9188aa5ffb66cba90738c Mon Sep 17 00:00:00 2001 From: "Mark A. Greer" Date: Tue, 1 Jul 2014 20:28:32 -0700 Subject: [PATCH 07/74] spi: omap2-mcspi: Configure hardware when slave driver changes mode commit 97ca0d6cc118716840ea443e010cb3d5f2d25eaf upstream. Commit id 2bd16e3e23d9df41592c6b257c59b6860a9cc3ea (spi: omap2-mcspi: Do not configure the controller on each transfer unless needed) does its job too well so omap2_mcspi_setup_transfer() isn't called even when an SPI slave driver changes 'spi->mode'. The result is that the mode requested by the SPI slave driver never takes effect. Fix this by adding the 'mode' member to the omap2_mcspi_cs structure which holds the mode value that the hardware is configured for. When the SPI slave driver changes 'spi->mode' it will be different than the value of this new member and the SPI master driver will know that the hardware must be reconfigured (by calling omap2_mcspi_setup_transfer()). Fixes: 2bd16e3e23 (spi: omap2-mcspi: Do not configure the controller on each transfer unless needed) Signed-off-by: Mark A. Greer Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- drivers/spi/spi-omap2-mcspi.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/drivers/spi/spi-omap2-mcspi.c b/drivers/spi/spi-omap2-mcspi.c index 86d2158946bb..798729eb6689 100644 --- a/drivers/spi/spi-omap2-mcspi.c +++ b/drivers/spi/spi-omap2-mcspi.c @@ -136,6 +136,7 @@ struct omap2_mcspi_cs { void __iomem *base; unsigned long phys; int word_len; + u16 mode; struct list_head node; /* Context save and restore shadow register */ u32 chconf0; @@ -801,6 +802,8 @@ static int omap2_mcspi_setup_transfer(struct spi_device *spi, mcspi_write_chconf0(spi, l); + cs->mode = spi->mode; + dev_dbg(&spi->dev, "setup: speed %d, sample %s edge, clk %s\n", OMAP2_MCSPI_MAX_FREQ >> div, (spi->mode & SPI_CPHA) ? "trailing" : "leading", @@ -871,6 +874,7 @@ static int omap2_mcspi_setup(struct spi_device *spi) return -ENOMEM; cs->base = mcspi->base + spi->chip_select * 0x14; cs->phys = mcspi->phys + spi->chip_select * 0x14; + cs->mode = 0; cs->chconf0 = 0; spi->controller_state = cs; /* Link this to context save list */ @@ -1043,6 +1047,16 @@ static void omap2_mcspi_work(struct omap2_mcspi *mcspi, struct spi_message *m) mcspi_read_cs_reg(spi, OMAP2_MCSPI_MODULCTRL); } + /* + * The slave driver could have changed spi->mode in which case + * it will be different from cs->mode (the current hardware setup). + * If so, set par_override (even though its not a parity issue) so + * omap2_mcspi_setup_transfer will be called to configure the hardware + * with the correct mode on the first iteration of the loop below. + */ + if (spi->mode != cs->mode) + par_override = 1; + omap2_mcspi_set_enable(spi, 0); m->status = status; From 79943632c58a32b9faf8513d40a8120946cdf35c Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Wed, 13 Aug 2014 11:21:34 -0700 Subject: [PATCH 08/74] firmware: Do not use WARN_ON(!spin_is_locked()) commit aee530cfecf4f3ec83b78406bac618cec35853f8 upstream. spin_is_locked() always returns false for uniprocessor configurations in several architectures, so do not use WARN_ON with it. Use lockdep_assert_held() instead to also reduce overhead in non-debug kernels. Signed-off-by: Guenter Roeck Signed-off-by: Matt Fleming Signed-off-by: Greg Kroah-Hartman --- drivers/firmware/efi/vars.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/firmware/efi/vars.c b/drivers/firmware/efi/vars.c index 391c67b182d9..7dbc319e1cf5 100644 --- a/drivers/firmware/efi/vars.c +++ b/drivers/firmware/efi/vars.c @@ -481,7 +481,7 @@ EXPORT_SYMBOL_GPL(efivar_entry_remove); */ static void efivar_entry_list_del_unlock(struct efivar_entry *entry) { - WARN_ON(!spin_is_locked(&__efivars->lock)); + lockdep_assert_held(&__efivars->lock); list_del(&entry->list); spin_unlock_irq(&__efivars->lock); @@ -507,7 +507,7 @@ int __efivar_entry_delete(struct efivar_entry *entry) const struct efivar_operations *ops = __efivars->ops; efi_status_t status; - WARN_ON(!spin_is_locked(&__efivars->lock)); + lockdep_assert_held(&__efivars->lock); status = ops->set_variable(entry->var.VariableName, &entry->var.VendorGuid, @@ -667,7 +667,7 @@ struct efivar_entry *efivar_entry_find(efi_char16_t *name, efi_guid_t guid, int strsize1, strsize2; bool found = false; - WARN_ON(!spin_is_locked(&__efivars->lock)); + lockdep_assert_held(&__efivars->lock); list_for_each_entry_safe(entry, n, head, list) { strsize1 = ucs2_strsize(name, 1024); @@ -731,7 +731,7 @@ int __efivar_entry_get(struct efivar_entry *entry, u32 *attributes, const struct efivar_operations *ops = __efivars->ops; efi_status_t status; - WARN_ON(!spin_is_locked(&__efivars->lock)); + lockdep_assert_held(&__efivars->lock); status = ops->get_variable(entry->var.VariableName, &entry->var.VendorGuid, From d4281c33c1086d80b2a5e3cd6081752e75795833 Mon Sep 17 00:00:00 2001 From: Jarkko Sakkinen Date: Fri, 9 May 2014 14:23:10 +0300 Subject: [PATCH 09/74] tpm: missing tpm_chip_put in tpm_get_random() commit 3e14d83ef94a5806a865b85b513b4e891923c19b upstream. Regression in 41ab999c. Call to tpm_chip_put is missing. This will cause TPM device driver not to unload if tmp_get_random() is called. Signed-off-by: Jarkko Sakkinen Signed-off-by: Peter Huewe Signed-off-by: Greg Kroah-Hartman --- drivers/char/tpm/tpm.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/char/tpm/tpm.c b/drivers/char/tpm/tpm.c index 7c3b3dcbfbc8..01d6968a9e47 100644 --- a/drivers/char/tpm/tpm.c +++ b/drivers/char/tpm/tpm.c @@ -1423,13 +1423,13 @@ int tpm_get_random(u32 chip_num, u8 *out, size_t max) int err, total = 0, retries = 5; u8 *dest = out; + if (!out || !num_bytes || max > TPM_MAX_RNG_DATA) + return -EINVAL; + chip = tpm_chip_find_get(chip_num); if (chip == NULL) return -ENODEV; - if (!out || !num_bytes || max > TPM_MAX_RNG_DATA) - return -EINVAL; - do { tpm_cmd.header.in = tpm_getrandom_header; tpm_cmd.params.getrandom_in.num_bytes = cpu_to_be32(num_bytes); @@ -1448,6 +1448,7 @@ int tpm_get_random(u32 chip_num, u8 *out, size_t max) num_bytes -= recd; } while (retries-- && total < max); + tpm_chip_put(chip); return total ? total : -EIO; } EXPORT_SYMBOL_GPL(tpm_get_random); From 76f01555c78e496203105bd29b878db3431a2260 Mon Sep 17 00:00:00 2001 From: Eric Paris Date: Wed, 23 Jul 2014 15:36:26 -0400 Subject: [PATCH 10/74] CAPABILITIES: remove undefined caps from all processes commit 7d8b6c63751cfbbe5eef81a48c22978b3407a3ad upstream. This is effectively a revert of 7b9a7ec565505699f503b4fcf61500dceb36e744 plus fixing it a different way... We found, when trying to run an application from an application which had dropped privs that the kernel does security checks on undefined capability bits. This was ESPECIALLY difficult to debug as those undefined bits are hidden from /proc/$PID/status. Consider a root application which drops all capabilities from ALL 4 capability sets. We assume, since the application is going to set eff/perm/inh from an array that it will clear not only the defined caps less than CAP_LAST_CAP, but also the higher 28ish bits which are undefined future capabilities. The BSET gets cleared differently. Instead it is cleared one bit at a time. The problem here is that in security/commoncap.c::cap_task_prctl() we actually check the validity of a capability being read. So any task which attempts to 'read all things set in bset' followed by 'unset all things set in bset' will not even attempt to unset the undefined bits higher than CAP_LAST_CAP. So the 'parent' will look something like: CapInh: 0000000000000000 CapPrm: 0000000000000000 CapEff: 0000000000000000 CapBnd: ffffffc000000000 All of this 'should' be fine. Given that these are undefined bits that aren't supposed to have anything to do with permissions. But they do... So lets now consider a task which cleared the eff/perm/inh completely and cleared all of the valid caps in the bset (but not the invalid caps it couldn't read out of the kernel). We know that this is exactly what the libcap-ng library does and what the go capabilities library does. They both leave you in that above situation if you try to clear all of you capapabilities from all 4 sets. If that root task calls execve() the child task will pick up all caps not blocked by the bset. The bset however does not block bits higher than CAP_LAST_CAP. So now the child task has bits in eff which are not in the parent. These are 'meaningless' undefined bits, but still bits which the parent doesn't have. The problem is now in cred_cap_issubset() (or any operation which does a subset test) as the child, while a subset for valid cap bits, is not a subset for invalid cap bits! So now we set durring commit creds that the child is not dumpable. Given it is 'more priv' than its parent. It also means the parent cannot ptrace the child and other stupidity. The solution here: 1) stop hiding capability bits in status This makes debugging easier! 2) stop giving any task undefined capability bits. it's simple, it you don't put those invalid bits in CAP_FULL_SET you won't get them in init and you won't get them in any other task either. This fixes the cap_issubset() tests and resulting fallout (which made the init task in a docker container untraceable among other things) 3) mask out undefined bits when sys_capset() is called as it might use ~0, ~0 to denote 'all capabilities' for backward/forward compatibility. This lets 'capsh --caps="all=eip" -- -c /bin/bash' run. 4) mask out undefined bit when we read a file capability off of disk as again likely all bits are set in the xattr for forward/backward compatibility. This lets 'setcap all+pe /bin/bash; /bin/bash' run Signed-off-by: Eric Paris Reviewed-by: Kees Cook Cc: Andrew Vagin Cc: Andrew G. Morgan Cc: Serge E. Hallyn Cc: Kees Cook Cc: Steve Grubb Cc: Dan Walsh Signed-off-by: James Morris Signed-off-by: Greg Kroah-Hartman --- fs/proc/array.c | 11 +---------- include/linux/capability.h | 5 ++++- kernel/audit.c | 2 +- kernel/capability.c | 4 ++++ security/commoncap.c | 3 +++ 5 files changed, 13 insertions(+), 12 deletions(-) diff --git a/fs/proc/array.c b/fs/proc/array.c index cbd0f1b324b9..09f0d9c374a3 100644 --- a/fs/proc/array.c +++ b/fs/proc/array.c @@ -304,15 +304,11 @@ static void render_cap_t(struct seq_file *m, const char *header, seq_puts(m, header); CAP_FOR_EACH_U32(__capi) { seq_printf(m, "%08x", - a->cap[(_KERNEL_CAPABILITY_U32S-1) - __capi]); + a->cap[CAP_LAST_U32 - __capi]); } seq_putc(m, '\n'); } -/* Remove non-existent capabilities */ -#define NORM_CAPS(v) (v.cap[CAP_TO_INDEX(CAP_LAST_CAP)] &= \ - CAP_TO_MASK(CAP_LAST_CAP + 1) - 1) - static inline void task_cap(struct seq_file *m, struct task_struct *p) { const struct cred *cred; @@ -326,11 +322,6 @@ static inline void task_cap(struct seq_file *m, struct task_struct *p) cap_bset = cred->cap_bset; rcu_read_unlock(); - NORM_CAPS(cap_inheritable); - NORM_CAPS(cap_permitted); - NORM_CAPS(cap_effective); - NORM_CAPS(cap_bset); - render_cap_t(m, "CapInh:\t", &cap_inheritable); render_cap_t(m, "CapPrm:\t", &cap_permitted); render_cap_t(m, "CapEff:\t", &cap_effective); diff --git a/include/linux/capability.h b/include/linux/capability.h index 15f90929fb51..9b4378af414c 100644 --- a/include/linux/capability.h +++ b/include/linux/capability.h @@ -78,8 +78,11 @@ extern const kernel_cap_t __cap_init_eff_set; # error Fix up hand-coded capability macro initializers #else /* HAND-CODED capability initializers */ +#define CAP_LAST_U32 ((_KERNEL_CAPABILITY_U32S) - 1) +#define CAP_LAST_U32_VALID_MASK (CAP_TO_MASK(CAP_LAST_CAP + 1) -1) + # define CAP_EMPTY_SET ((kernel_cap_t){{ 0, 0 }}) -# define CAP_FULL_SET ((kernel_cap_t){{ ~0, ~0 }}) +# define CAP_FULL_SET ((kernel_cap_t){{ ~0, CAP_LAST_U32_VALID_MASK }}) # define CAP_FS_SET ((kernel_cap_t){{ CAP_FS_MASK_B0 \ | CAP_TO_MASK(CAP_LINUX_IMMUTABLE), \ CAP_FS_MASK_B1 } }) diff --git a/kernel/audit.c b/kernel/audit.c index a6c632757e57..4dd7529b0845 100644 --- a/kernel/audit.c +++ b/kernel/audit.c @@ -1412,7 +1412,7 @@ void audit_log_cap(struct audit_buffer *ab, char *prefix, kernel_cap_t *cap) audit_log_format(ab, " %s=", prefix); CAP_FOR_EACH_U32(i) { audit_log_format(ab, "%08x", - cap->cap[(_KERNEL_CAPABILITY_U32S-1) - i]); + cap->cap[CAP_LAST_U32 - i]); } } diff --git a/kernel/capability.c b/kernel/capability.c index d52eecc0942b..1339806a8731 100644 --- a/kernel/capability.c +++ b/kernel/capability.c @@ -268,6 +268,10 @@ SYSCALL_DEFINE2(capset, cap_user_header_t, header, const cap_user_data_t, data) i++; } + effective.cap[CAP_LAST_U32] &= CAP_LAST_U32_VALID_MASK; + permitted.cap[CAP_LAST_U32] &= CAP_LAST_U32_VALID_MASK; + inheritable.cap[CAP_LAST_U32] &= CAP_LAST_U32_VALID_MASK; + new = prepare_creds(); if (!new) return -ENOMEM; diff --git a/security/commoncap.c b/security/commoncap.c index c44b6fe6648e..c9219a66b7c6 100644 --- a/security/commoncap.c +++ b/security/commoncap.c @@ -421,6 +421,9 @@ int get_vfs_caps_from_disk(const struct dentry *dentry, struct cpu_vfs_cap_data cpu_caps->inheritable.cap[i] = le32_to_cpu(caps.data[i].inheritable); } + cpu_caps->permitted.cap[CAP_LAST_U32] &= CAP_LAST_U32_VALID_MASK; + cpu_caps->inheritable.cap[CAP_LAST_U32] &= CAP_LAST_U32_VALID_MASK; + return 0; } From a8fd51944ec90a41b37afe0aee25cf9ec17be8fe Mon Sep 17 00:00:00 2001 From: Sasha Levin Date: Wed, 6 Aug 2014 16:08:14 -0700 Subject: [PATCH 11/74] kernel/smp.c:on_each_cpu_cond(): fix warning in fallback path commit 618fde872163e782183ce574c77f1123e2be8887 upstream. The rarely-executed memry-allocation-failed callback path generates a WARN_ON_ONCE() when smp_call_function_single() succeeds. Presumably it's supposed to warn on failures. Signed-off-by: Sasha Levin Cc: Christoph Lameter Cc: Gilad Ben-Yossef Cc: David Rientjes Cc: Joonsoo Kim Cc: Tejun Heo Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- kernel/smp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/smp.c b/kernel/smp.c index 4dba0f7b72ad..88797cb0d23a 100644 --- a/kernel/smp.c +++ b/kernel/smp.c @@ -658,7 +658,7 @@ void on_each_cpu_cond(bool (*cond_func)(int cpu, void *info), if (cond_func(cpu, info)) { ret = smp_call_function_single(cpu, func, info, wait); - WARN_ON_ONCE(!ret); + WARN_ON_ONCE(ret); } preempt_enable(); } From 2c34d0d0fe11694fa617f7fca64d7e7fed59b3ea Mon Sep 17 00:00:00 2001 From: Michael Welling Date: Mon, 28 Jul 2014 18:01:04 -0500 Subject: [PATCH 12/74] mfd: omap-usb-host: Fix improper mask use. commit 46de8ff8e80a6546aa3d2fdf58c6776666301a0c upstream. single-ulpi-bypass is a flag used for older OMAP3 silicon. The flag when set, can excite code that improperly uses the OMAP_UHH_HOSTCONFIG_UPLI_BYPASS define to clear the corresponding bit. Instead it clears all of the other bits disabling all of the ports in the process. Signed-off-by: Michael Welling Signed-off-by: Lee Jones Signed-off-by: Greg Kroah-Hartman --- drivers/mfd/omap-usb-host.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/mfd/omap-usb-host.c b/drivers/mfd/omap-usb-host.c index 759fae3ca7fb..a36f3f282ae7 100644 --- a/drivers/mfd/omap-usb-host.c +++ b/drivers/mfd/omap-usb-host.c @@ -445,7 +445,7 @@ static unsigned omap_usbhs_rev1_hostconfig(struct usbhs_hcd_omap *omap, for (i = 0; i < omap->nports; i++) { if (is_ehci_phy_mode(pdata->port_mode[i])) { - reg &= OMAP_UHH_HOSTCONFIG_ULPI_BYPASS; + reg &= ~OMAP_UHH_HOSTCONFIG_ULPI_BYPASS; break; } } From b2102aa9271a4ac6c20b516304c2c88ee1d1cc7b Mon Sep 17 00:00:00 2001 From: Nikesh Oswal Date: Fri, 4 Jul 2014 09:55:16 +0100 Subject: [PATCH 13/74] regulator: arizona-ldo1: remove bypass functionality commit 5b919f3ebb533cbe400664837e24f66a0836b907 upstream. WM5110/8280 devices do not support bypass mode for LDO1 so remove the bypass callbacks registered with regulator core. Signed-off-by: Nikesh Oswal Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- drivers/regulator/arizona-ldo1.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/regulator/arizona-ldo1.c b/drivers/regulator/arizona-ldo1.c index 81d8681c3195..b1b35f38d11d 100644 --- a/drivers/regulator/arizona-ldo1.c +++ b/drivers/regulator/arizona-ldo1.c @@ -141,8 +141,6 @@ static struct regulator_ops arizona_ldo1_ops = { .map_voltage = regulator_map_voltage_linear, .get_voltage_sel = regulator_get_voltage_sel_regmap, .set_voltage_sel = regulator_set_voltage_sel_regmap, - .get_bypass = regulator_get_bypass_regmap, - .set_bypass = regulator_set_bypass_regmap, }; static const struct regulator_desc arizona_ldo1 = { From 5bad07d5f8a43a8ebc04840f525c9e8fce5d6f2c Mon Sep 17 00:00:00 2001 From: Andrey Utkin Date: Mon, 4 Aug 2014 23:13:10 +0300 Subject: [PATCH 14/74] powerpc/mm/numa: Fix break placement commit b00fc6ec1f24f9d7af9b8988b6a198186eb3408c upstream. Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=81631 Reported-by: David Binderman Signed-off-by: Andrey Utkin Signed-off-by: Benjamin Herrenschmidt Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/mm/numa.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c index b7293bba0062..08c6f3185d45 100644 --- a/arch/powerpc/mm/numa.c +++ b/arch/powerpc/mm/numa.c @@ -586,8 +586,8 @@ static int __cpuinit cpu_numa_callback(struct notifier_block *nfb, case CPU_UP_CANCELED: case CPU_UP_CANCELED_FROZEN: unmap_cpu_from_node(lcpu); - break; ret = NOTIFY_OK; + break; #endif } return ret; From 6136852d18b23a3a6d1db0b669233aba7f05e46c Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Wed, 13 Aug 2014 12:32:03 +0530 Subject: [PATCH 15/74] powerpc/mm: Use read barrier when creating real_pte commit 85c1fafd7262e68ad821ee1808686b1392b1167d upstream. On ppc64 we support 4K hash pte with 64K page size. That requires us to track the hash pte slot information on a per 4k basis. We do that by storing the slot details in the second half of pte page. The pte bit _PAGE_COMBO is used to indicate whether the second half need to be looked while building real_pte. We need to use read memory barrier while doing that so that load of hidx is not reordered w.r.t _PAGE_COMBO check. On the store side we already do a lwsync in __hash_page_4K Signed-off-by: Aneesh Kumar K.V Signed-off-by: Benjamin Herrenschmidt Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/include/asm/pte-hash64-64k.h | 32 +++++++++++++++++++---- 1 file changed, 27 insertions(+), 5 deletions(-) diff --git a/arch/powerpc/include/asm/pte-hash64-64k.h b/arch/powerpc/include/asm/pte-hash64-64k.h index d836d945068d..063fcadd1a00 100644 --- a/arch/powerpc/include/asm/pte-hash64-64k.h +++ b/arch/powerpc/include/asm/pte-hash64-64k.h @@ -40,17 +40,39 @@ #ifndef __ASSEMBLY__ +#include /* for smp_rmb() */ + /* * With 64K pages on hash table, we have a special PTE format that * uses a second "half" of the page table to encode sub-page information * in order to deal with 64K made of 4K HW pages. Thus we override the * generic accessors and iterators here */ -#define __real_pte(e,p) ((real_pte_t) { \ - (e), (pte_val(e) & _PAGE_COMBO) ? \ - (pte_val(*((p) + PTRS_PER_PTE))) : 0 }) -#define __rpte_to_hidx(r,index) ((pte_val((r).pte) & _PAGE_COMBO) ? \ - (((r).hidx >> ((index)<<2)) & 0xf) : ((pte_val((r).pte) >> 12) & 0xf)) +#define __real_pte __real_pte +static inline real_pte_t __real_pte(pte_t pte, pte_t *ptep) +{ + real_pte_t rpte; + + rpte.pte = pte; + rpte.hidx = 0; + if (pte_val(pte) & _PAGE_COMBO) { + /* + * Make sure we order the hidx load against the _PAGE_COMBO + * check. The store side ordering is done in __hash_page_4K + */ + smp_rmb(); + rpte.hidx = pte_val(*((ptep) + PTRS_PER_PTE)); + } + return rpte; +} + +static inline unsigned long __rpte_to_hidx(real_pte_t rpte, unsigned long index) +{ + if ((pte_val(rpte.pte) & _PAGE_COMBO)) + return (rpte.hidx >> (index<<2)) & 0xf; + return (pte_val(rpte.pte) >> 12) & 0xf; +} + #define __rpte_to_pte(r) ((r).pte) #define __rpte_sub_valid(rpte, index) \ (pte_val(rpte.pte) & (_PAGE_HPTE_SUB0 >> (index))) From d08ed7a370531ca47694498de8b5e32966545884 Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Mon, 11 Aug 2014 19:16:19 +1000 Subject: [PATCH 16/74] powerpc/pseries: Failure on removing device node commit f1b3929c232784580e5d8ee324b6bc634e709575 upstream. While running command "drmgr -c phb -r -s 'PHB 528'", following backtrace jumped out because the target device node isn't marked with OF_DETACHED by of_detach_node(), which caused by error returned from memory hotplug related reconfig notifier when disabling CONFIG_MEMORY_HOTREMOVE. The patch fixes it. ERROR: Bad of_node_put() on /pci@800000020000210/ethernet@0 CPU: 14 PID: 2252 Comm: drmgr Tainted: G W 3.16.0+ #427 Call Trace: [c000000012a776a0] [c000000000013d9c] .show_stack+0x88/0x148 (unreliable) [c000000012a77750] [c00000000083cd34] .dump_stack+0x7c/0x9c [c000000012a777d0] [c0000000006807c4] .of_node_release+0x58/0xe0 [c000000012a77860] [c00000000038a7d0] .kobject_release+0x174/0x1b8 [c000000012a77900] [c00000000038a884] .kobject_put+0x70/0x78 [c000000012a77980] [c000000000681680] .of_node_put+0x28/0x34 [c000000012a77a00] [c000000000681ea8] .__of_get_next_child+0x64/0x70 [c000000012a77a90] [c000000000682138] .of_find_node_by_path+0x1b8/0x20c [c000000012a77b40] [c000000000051840] .ofdt_write+0x308/0x688 [c000000012a77c20] [c000000000238430] .proc_reg_write+0xb8/0xd4 [c000000012a77cd0] [c0000000001cbeac] .vfs_write+0xec/0x1f8 [c000000012a77d70] [c0000000001cc3b0] .SyS_write+0x58/0xa0 [c000000012a77e30] [c00000000000a064] syscall_exit+0x0/0x98 Signed-off-by: Gavin Shan Signed-off-by: Benjamin Herrenschmidt Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/platforms/pseries/hotplug-memory.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/platforms/pseries/hotplug-memory.c b/arch/powerpc/platforms/pseries/hotplug-memory.c index 9a432de363b8..bebe64ed5dc3 100644 --- a/arch/powerpc/platforms/pseries/hotplug-memory.c +++ b/arch/powerpc/platforms/pseries/hotplug-memory.c @@ -158,7 +158,7 @@ static int pseries_remove_memory(struct device_node *np) static inline int pseries_remove_memblock(unsigned long base, unsigned int memblock_size) { - return -EOPNOTSUPP; + return 0; } static inline int pseries_remove_memory(struct device_node *np) { From 7afc3ac1263be1a6f2ef76d76ff41615a0c795d3 Mon Sep 17 00:00:00 2001 From: "K. Y. Srinivasan" Date: Sat, 12 Jul 2014 09:48:30 -0700 Subject: [PATCH 17/74] Drivers: scsi: storvsc: Implement a eh_timed_out handler commit 56b26e69c8283121febedd12b3cc193384af46b9 upstream. On Azure, we have seen instances of unbounded I/O latencies. To deal with this issue, implement handler that can reset the timeout. Note that the host gaurantees that it will respond to each command that has been issued. Signed-off-by: K. Y. Srinivasan Reviewed-by: Hannes Reinecke [hch: added a better comment explaining the issue] Signed-off-by: Christoph Hellwig Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/storvsc_drv.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/drivers/scsi/storvsc_drv.c b/drivers/scsi/storvsc_drv.c index 91b76cea3e3c..34a8f526908f 100644 --- a/drivers/scsi/storvsc_drv.c +++ b/drivers/scsi/storvsc_drv.c @@ -33,6 +33,7 @@ #include #include #include +#include #include #include #include @@ -1285,6 +1286,16 @@ static int storvsc_host_reset_handler(struct scsi_cmnd *scmnd) return SUCCESS; } +/* + * The host guarantees to respond to each command, although I/O latencies might + * be unbounded on Azure. Reset the timer unconditionally to give the host a + * chance to perform EH. + */ +static enum blk_eh_timer_return storvsc_eh_timed_out(struct scsi_cmnd *scmnd) +{ + return BLK_EH_RESET_TIMER; +} + static bool storvsc_scsi_cmd_ok(struct scsi_cmnd *scmnd) { bool allowed = true; @@ -1444,6 +1455,7 @@ static struct scsi_host_template scsi_driver = { .bios_param = storvsc_get_chs, .queuecommand = storvsc_queuecommand, .eh_host_reset_handler = storvsc_host_reset_handler, + .eh_timed_out = storvsc_eh_timed_out, .slave_alloc = storvsc_device_alloc, .slave_destroy = storvsc_device_destroy, .slave_configure = storvsc_device_configure, From 8ce6d81a2d174a0cecc1efbaf2d218c90d2e2fbd Mon Sep 17 00:00:00 2001 From: "K. Y. Srinivasan" Date: Sat, 12 Jul 2014 09:48:32 -0700 Subject: [PATCH 18/74] drivers: scsi: storvsc: Correctly handle TEST_UNIT_READY failure commit 3533f8603d28b77c62d75ec899449a99bc6b77a1 upstream. On some Windows hosts on FC SANs, TEST_UNIT_READY can return SRB_STATUS_ERROR. Correctly handle this. Note that there is sufficient sense information to support scsi error handling even in this case. Signed-off-by: K. Y. Srinivasan Reviewed-by: Hannes Reinecke Signed-off-by: Christoph Hellwig Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/storvsc_drv.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/scsi/storvsc_drv.c b/drivers/scsi/storvsc_drv.c index 34a8f526908f..87ca72d36d5b 100644 --- a/drivers/scsi/storvsc_drv.c +++ b/drivers/scsi/storvsc_drv.c @@ -804,6 +804,13 @@ static void storvsc_handle_error(struct vmscsi_request *vm_srb, case ATA_12: set_host_byte(scmnd, DID_PASSTHROUGH); break; + /* + * On Some Windows hosts TEST_UNIT_READY command can return + * SRB_STATUS_ERROR, let the upper level code deal with it + * based on the sense information. + */ + case TEST_UNIT_READY: + break; default: set_host_byte(scmnd, DID_TARGET_FAILURE); } From f52337e5d5de58e2c5be00389d1250ae537cb98b Mon Sep 17 00:00:00 2001 From: Jeffrey Deans Date: Thu, 17 Jul 2014 09:20:56 +0100 Subject: [PATCH 19/74] MIPS: GIC: Prevent array overrun commit ffc8415afab20bd97754efae6aad1f67b531132b upstream. A GIC interrupt which is declared as having a GIC_MAP_TO_NMI_MSK mapping causes the cpu parameter to gic_setup_intr() to be increased to 32, causing memory corruption when pcpu_masks[] is written to again later in the function. Signed-off-by: Jeffrey Deans Signed-off-by: Markos Chandras Cc: linux-mips@linux-mips.org Patchwork: https://patchwork.linux-mips.org/patch/7375/ Signed-off-by: Ralf Baechle Signed-off-by: Greg Kroah-Hartman --- arch/mips/kernel/irq-gic.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/arch/mips/kernel/irq-gic.c b/arch/mips/kernel/irq-gic.c index c01b307317a9..bffbbc557879 100644 --- a/arch/mips/kernel/irq-gic.c +++ b/arch/mips/kernel/irq-gic.c @@ -256,11 +256,13 @@ static void __init gic_setup_intr(unsigned int intr, unsigned int cpu, /* Setup Intr to Pin mapping */ if (pin & GIC_MAP_TO_NMI_MSK) { + int i; + GICWRITE(GIC_REG_ADDR(SHARED, GIC_SH_MAP_TO_PIN(intr)), pin); /* FIXME: hack to route NMI to all cpu's */ - for (cpu = 0; cpu < NR_CPUS; cpu += 32) { + for (i = 0; i < NR_CPUS; i += 32) { GICWRITE(GIC_REG_ADDR(SHARED, - GIC_SH_MAP_TO_VPE_REG_OFF(intr, cpu)), + GIC_SH_MAP_TO_VPE_REG_OFF(intr, i)), 0xffffffff); } } else { From fbd9df2eb92caee0ca82bd1b73e49e673cfc1ab5 Mon Sep 17 00:00:00 2001 From: Paul Burton Date: Tue, 22 Jul 2014 14:21:21 +0100 Subject: [PATCH 20/74] MIPS: Prevent user from setting FCSR cause bits commit b1442d39fac2fcfbe6a4814979020e993ca59c9e upstream. If one or more matching FCSR cause & enable bits are set in saved thread context then when that context is restored the kernel will take an FP exception. This is of course undesirable and considered an oops, leading to the kernel writing a backtrace to the console and potentially rebooting depending upon the configuration. Thus the kernel avoids this situation by clearing the cause bits of the FCSR register when handling FP exceptions and after emulating FP instructions. However the kernel does not prevent userland from setting arbitrary FCSR cause & enable bits via ptrace, using either the PTRACE_POKEUSR or PTRACE_SETFPREGS requests. This means userland can trivially cause the kernel to oops on any system with an FPU. Prevent this from happening by clearing the cause bits when writing to the saved FCSR context via ptrace. This problem appears to exist at least back to the beginning of the git era in the PTRACE_POKEUSR case. Signed-off-by: Paul Burton Cc: linux-mips@linux-mips.org Cc: Paul Burton Cc: stable@vger.kernel.org Patchwork: https://patchwork.linux-mips.org/patch/7438/ Signed-off-by: Ralf Baechle Signed-off-by: Greg Kroah-Hartman --- arch/mips/kernel/ptrace.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/mips/kernel/ptrace.c b/arch/mips/kernel/ptrace.c index 9c6299c733a3..1b95b2443221 100644 --- a/arch/mips/kernel/ptrace.c +++ b/arch/mips/kernel/ptrace.c @@ -161,6 +161,7 @@ int ptrace_setfpregs(struct task_struct *child, __u32 __user *data) __get_user(fregs[i], i + (__u64 __user *) data); __get_user(child->thread.fpu.fcr31, data + 64); + child->thread.fpu.fcr31 &= ~FPU_CSR_ALL_X; /* FIR may not be written. */ @@ -451,7 +452,7 @@ long arch_ptrace(struct task_struct *child, long request, break; #endif case FPC_CSR: - child->thread.fpu.fcr31 = data; + child->thread.fpu.fcr31 = data & ~FPU_CSR_ALL_X; break; case DSP_BASE ... DSP_BASE + 5: { dspreg_t *dregs; From 33103cff2cb6a6f2753676ed9f82c4e3e7fe4e42 Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Tue, 29 Jul 2014 14:54:40 +0800 Subject: [PATCH 21/74] MIPS: tlbex: Fix a missing statement for HUGETLB commit 8393c524a25609a30129e4a8975cf3b91f6c16a5 upstream. In commit 2c8c53e28f1 (MIPS: Optimize TLB handlers for Octeon CPUs) build_r4000_tlb_refill_handler() is modified. But it doesn't compatible with the original code in HUGETLB case. Because there is a copy & paste error and one line of code is missing. It is very easy to produce a bug with LTP's hugemmap05 test. Signed-off-by: Huacai Chen Signed-off-by: Binbin Zhou Cc: John Crispin Cc: Steven J. Hill Cc: linux-mips@linux-mips.org Cc: Fuxin Zhang Cc: Zhangjin Wu Patchwork: https://patchwork.linux-mips.org/patch/7496/ Signed-off-by: Ralf Baechle Signed-off-by: Greg Kroah-Hartman --- arch/mips/mm/tlbex.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/mips/mm/tlbex.c b/arch/mips/mm/tlbex.c index afeef93f81a7..0e17e1352718 100644 --- a/arch/mips/mm/tlbex.c +++ b/arch/mips/mm/tlbex.c @@ -1329,6 +1329,7 @@ static void __cpuinit build_r4000_tlb_refill_handler(void) } #ifdef CONFIG_MIPS_HUGE_TLB_SUPPORT uasm_l_tlb_huge_update(&l, p); + UASM_i_LW(&p, K0, 0, K1); build_huge_update_entries(&p, htlb_info.huge_pte, K1); build_huge_tlb_write_entry(&p, &l, &r, K0, tlb_random, htlb_info.restore_scratch); From f020cedd010180e759cb6cce817b8a60ea6311a1 Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Wed, 16 Jul 2014 09:19:16 +0800 Subject: [PATCH 22/74] MIPS: Remove BUG_ON(!is_fpu_owner()) in do_ade() commit 2e5767a27337812f6850b3fa362419e2f085e5c3 upstream. In do_ade(), is_fpu_owner() isn't preempt-safe. For example, when an unaligned ldc1 is executed, do_cpu() is called and then FPU will be enabled (and TIF_USEDFPU will be set for the current process). Then, do_ade() is called because the access is unaligned. If the current process is preempted at this time, TIF_USEDFPU will be cleard. So when the process is scheduled again, BUG_ON(!is_fpu_owner()) is triggered. This small program can trigger this BUG in a preemptible kernel: int main (int argc, char *argv[]) { double u64[2]; while (1) { asm volatile ( ".set push \n\t" ".set noreorder \n\t" "ldc1 $f3, 4(%0) \n\t" ".set pop \n\t" ::"r"(u64): ); } return 0; } V2: Remove the BUG_ON() unconditionally due to Paul's suggestion. Signed-off-by: Huacai Chen Signed-off-by: Jie Chen Signed-off-by: Rui Wang Cc: John Crispin Cc: Steven J. Hill Cc: linux-mips@linux-mips.org Cc: Fuxin Zhang Cc: Zhangjin Wu Signed-off-by: Ralf Baechle Signed-off-by: Greg Kroah-Hartman --- arch/mips/kernel/unaligned.c | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/mips/kernel/unaligned.c b/arch/mips/kernel/unaligned.c index 203d8857070d..2c81265bcf46 100644 --- a/arch/mips/kernel/unaligned.c +++ b/arch/mips/kernel/unaligned.c @@ -604,7 +604,6 @@ static void emulate_load_store_insn(struct pt_regs *regs, case sdc1_op: die_if_kernel("Unaligned FP access in kernel code", regs); BUG_ON(!used_math()); - BUG_ON(!is_fpu_owner()); lose_fpu(1); /* Save FPU state for the emulator. */ res = fpu_emulator_cop1Handler(regs, ¤t->thread.fpu, 1, From 887c148946c4cf2259355197672eeeb070cf8284 Mon Sep 17 00:00:00 2001 From: Alex Smith Date: Wed, 23 Jul 2014 14:40:08 +0100 Subject: [PATCH 23/74] MIPS: asm/reg.h: Make 32- and 64-bit definitions available at the same time commit bcec7c8da6b092b1ff3327fd83c2193adb12f684 upstream. Get rid of the WANT_COMPAT_REG_H test and instead define both the 32- and 64-bit register offset definitions at the same time with MIPS{32,64}_ prefixes, then define the existing EF_* names to the correct definitions for the kernel's bitness. This patch is a prerequisite of the following bug fix patch. Signed-off-by: Alex Smith Cc: linux-mips@linux-mips.org Patchwork: https://patchwork.linux-mips.org/patch/7451/ Signed-off-by: Ralf Baechle Signed-off-by: Greg Kroah-Hartman --- arch/mips/include/asm/reg.h | 250 ++++++++++++++++++++----------- arch/mips/kernel/binfmt_elfo32.c | 32 ++-- 2 files changed, 177 insertions(+), 105 deletions(-) diff --git a/arch/mips/include/asm/reg.h b/arch/mips/include/asm/reg.h index 910e71a12466..b8343ccbc989 100644 --- a/arch/mips/include/asm/reg.h +++ b/arch/mips/include/asm/reg.h @@ -12,116 +12,194 @@ #ifndef __ASM_MIPS_REG_H #define __ASM_MIPS_REG_H - -#if defined(CONFIG_32BIT) || defined(WANT_COMPAT_REG_H) - -#define EF_R0 6 -#define EF_R1 7 -#define EF_R2 8 -#define EF_R3 9 -#define EF_R4 10 -#define EF_R5 11 -#define EF_R6 12 -#define EF_R7 13 -#define EF_R8 14 -#define EF_R9 15 -#define EF_R10 16 -#define EF_R11 17 -#define EF_R12 18 -#define EF_R13 19 -#define EF_R14 20 -#define EF_R15 21 -#define EF_R16 22 -#define EF_R17 23 -#define EF_R18 24 -#define EF_R19 25 -#define EF_R20 26 -#define EF_R21 27 -#define EF_R22 28 -#define EF_R23 29 -#define EF_R24 30 -#define EF_R25 31 +#define MIPS32_EF_R0 6 +#define MIPS32_EF_R1 7 +#define MIPS32_EF_R2 8 +#define MIPS32_EF_R3 9 +#define MIPS32_EF_R4 10 +#define MIPS32_EF_R5 11 +#define MIPS32_EF_R6 12 +#define MIPS32_EF_R7 13 +#define MIPS32_EF_R8 14 +#define MIPS32_EF_R9 15 +#define MIPS32_EF_R10 16 +#define MIPS32_EF_R11 17 +#define MIPS32_EF_R12 18 +#define MIPS32_EF_R13 19 +#define MIPS32_EF_R14 20 +#define MIPS32_EF_R15 21 +#define MIPS32_EF_R16 22 +#define MIPS32_EF_R17 23 +#define MIPS32_EF_R18 24 +#define MIPS32_EF_R19 25 +#define MIPS32_EF_R20 26 +#define MIPS32_EF_R21 27 +#define MIPS32_EF_R22 28 +#define MIPS32_EF_R23 29 +#define MIPS32_EF_R24 30 +#define MIPS32_EF_R25 31 /* * k0/k1 unsaved */ -#define EF_R26 32 -#define EF_R27 33 +#define MIPS32_EF_R26 32 +#define MIPS32_EF_R27 33 -#define EF_R28 34 -#define EF_R29 35 -#define EF_R30 36 -#define EF_R31 37 +#define MIPS32_EF_R28 34 +#define MIPS32_EF_R29 35 +#define MIPS32_EF_R30 36 +#define MIPS32_EF_R31 37 /* * Saved special registers */ -#define EF_LO 38 -#define EF_HI 39 +#define MIPS32_EF_LO 38 +#define MIPS32_EF_HI 39 -#define EF_CP0_EPC 40 -#define EF_CP0_BADVADDR 41 -#define EF_CP0_STATUS 42 -#define EF_CP0_CAUSE 43 -#define EF_UNUSED0 44 +#define MIPS32_EF_CP0_EPC 40 +#define MIPS32_EF_CP0_BADVADDR 41 +#define MIPS32_EF_CP0_STATUS 42 +#define MIPS32_EF_CP0_CAUSE 43 +#define MIPS32_EF_UNUSED0 44 -#define EF_SIZE 180 +#define MIPS32_EF_SIZE 180 -#endif - -#if defined(CONFIG_64BIT) && !defined(WANT_COMPAT_REG_H) - -#define EF_R0 0 -#define EF_R1 1 -#define EF_R2 2 -#define EF_R3 3 -#define EF_R4 4 -#define EF_R5 5 -#define EF_R6 6 -#define EF_R7 7 -#define EF_R8 8 -#define EF_R9 9 -#define EF_R10 10 -#define EF_R11 11 -#define EF_R12 12 -#define EF_R13 13 -#define EF_R14 14 -#define EF_R15 15 -#define EF_R16 16 -#define EF_R17 17 -#define EF_R18 18 -#define EF_R19 19 -#define EF_R20 20 -#define EF_R21 21 -#define EF_R22 22 -#define EF_R23 23 -#define EF_R24 24 -#define EF_R25 25 +#define MIPS64_EF_R0 0 +#define MIPS64_EF_R1 1 +#define MIPS64_EF_R2 2 +#define MIPS64_EF_R3 3 +#define MIPS64_EF_R4 4 +#define MIPS64_EF_R5 5 +#define MIPS64_EF_R6 6 +#define MIPS64_EF_R7 7 +#define MIPS64_EF_R8 8 +#define MIPS64_EF_R9 9 +#define MIPS64_EF_R10 10 +#define MIPS64_EF_R11 11 +#define MIPS64_EF_R12 12 +#define MIPS64_EF_R13 13 +#define MIPS64_EF_R14 14 +#define MIPS64_EF_R15 15 +#define MIPS64_EF_R16 16 +#define MIPS64_EF_R17 17 +#define MIPS64_EF_R18 18 +#define MIPS64_EF_R19 19 +#define MIPS64_EF_R20 20 +#define MIPS64_EF_R21 21 +#define MIPS64_EF_R22 22 +#define MIPS64_EF_R23 23 +#define MIPS64_EF_R24 24 +#define MIPS64_EF_R25 25 /* * k0/k1 unsaved */ -#define EF_R26 26 -#define EF_R27 27 +#define MIPS64_EF_R26 26 +#define MIPS64_EF_R27 27 -#define EF_R28 28 -#define EF_R29 29 -#define EF_R30 30 -#define EF_R31 31 +#define MIPS64_EF_R28 28 +#define MIPS64_EF_R29 29 +#define MIPS64_EF_R30 30 +#define MIPS64_EF_R31 31 /* * Saved special registers */ -#define EF_LO 32 -#define EF_HI 33 +#define MIPS64_EF_LO 32 +#define MIPS64_EF_HI 33 -#define EF_CP0_EPC 34 -#define EF_CP0_BADVADDR 35 -#define EF_CP0_STATUS 36 -#define EF_CP0_CAUSE 37 +#define MIPS64_EF_CP0_EPC 34 +#define MIPS64_EF_CP0_BADVADDR 35 +#define MIPS64_EF_CP0_STATUS 36 +#define MIPS64_EF_CP0_CAUSE 37 -#define EF_SIZE 304 /* size in bytes */ +#define MIPS64_EF_SIZE 304 /* size in bytes */ + +#if defined(CONFIG_32BIT) + +#define EF_R0 MIPS32_EF_R0 +#define EF_R1 MIPS32_EF_R1 +#define EF_R2 MIPS32_EF_R2 +#define EF_R3 MIPS32_EF_R3 +#define EF_R4 MIPS32_EF_R4 +#define EF_R5 MIPS32_EF_R5 +#define EF_R6 MIPS32_EF_R6 +#define EF_R7 MIPS32_EF_R7 +#define EF_R8 MIPS32_EF_R8 +#define EF_R9 MIPS32_EF_R9 +#define EF_R10 MIPS32_EF_R10 +#define EF_R11 MIPS32_EF_R11 +#define EF_R12 MIPS32_EF_R12 +#define EF_R13 MIPS32_EF_R13 +#define EF_R14 MIPS32_EF_R14 +#define EF_R15 MIPS32_EF_R15 +#define EF_R16 MIPS32_EF_R16 +#define EF_R17 MIPS32_EF_R17 +#define EF_R18 MIPS32_EF_R18 +#define EF_R19 MIPS32_EF_R19 +#define EF_R20 MIPS32_EF_R20 +#define EF_R21 MIPS32_EF_R21 +#define EF_R22 MIPS32_EF_R22 +#define EF_R23 MIPS32_EF_R23 +#define EF_R24 MIPS32_EF_R24 +#define EF_R25 MIPS32_EF_R25 +#define EF_R26 MIPS32_EF_R26 +#define EF_R27 MIPS32_EF_R27 +#define EF_R28 MIPS32_EF_R28 +#define EF_R29 MIPS32_EF_R29 +#define EF_R30 MIPS32_EF_R30 +#define EF_R31 MIPS32_EF_R31 +#define EF_LO MIPS32_EF_LO +#define EF_HI MIPS32_EF_HI +#define EF_CP0_EPC MIPS32_EF_CP0_EPC +#define EF_CP0_BADVADDR MIPS32_EF_CP0_BADVADDR +#define EF_CP0_STATUS MIPS32_EF_CP0_STATUS +#define EF_CP0_CAUSE MIPS32_EF_CP0_CAUSE +#define EF_UNUSED0 MIPS32_EF_UNUSED0 +#define EF_SIZE MIPS32_EF_SIZE + +#elif defined(CONFIG_64BIT) + +#define EF_R0 MIPS64_EF_R0 +#define EF_R1 MIPS64_EF_R1 +#define EF_R2 MIPS64_EF_R2 +#define EF_R3 MIPS64_EF_R3 +#define EF_R4 MIPS64_EF_R4 +#define EF_R5 MIPS64_EF_R5 +#define EF_R6 MIPS64_EF_R6 +#define EF_R7 MIPS64_EF_R7 +#define EF_R8 MIPS64_EF_R8 +#define EF_R9 MIPS64_EF_R9 +#define EF_R10 MIPS64_EF_R10 +#define EF_R11 MIPS64_EF_R11 +#define EF_R12 MIPS64_EF_R12 +#define EF_R13 MIPS64_EF_R13 +#define EF_R14 MIPS64_EF_R14 +#define EF_R15 MIPS64_EF_R15 +#define EF_R16 MIPS64_EF_R16 +#define EF_R17 MIPS64_EF_R17 +#define EF_R18 MIPS64_EF_R18 +#define EF_R19 MIPS64_EF_R19 +#define EF_R20 MIPS64_EF_R20 +#define EF_R21 MIPS64_EF_R21 +#define EF_R22 MIPS64_EF_R22 +#define EF_R23 MIPS64_EF_R23 +#define EF_R24 MIPS64_EF_R24 +#define EF_R25 MIPS64_EF_R25 +#define EF_R26 MIPS64_EF_R26 +#define EF_R27 MIPS64_EF_R27 +#define EF_R28 MIPS64_EF_R28 +#define EF_R29 MIPS64_EF_R29 +#define EF_R30 MIPS64_EF_R30 +#define EF_R31 MIPS64_EF_R31 +#define EF_LO MIPS64_EF_LO +#define EF_HI MIPS64_EF_HI +#define EF_CP0_EPC MIPS64_EF_CP0_EPC +#define EF_CP0_BADVADDR MIPS64_EF_CP0_BADVADDR +#define EF_CP0_STATUS MIPS64_EF_CP0_STATUS +#define EF_CP0_CAUSE MIPS64_EF_CP0_CAUSE +#define EF_SIZE MIPS64_EF_SIZE #endif /* CONFIG_64BIT */ diff --git a/arch/mips/kernel/binfmt_elfo32.c b/arch/mips/kernel/binfmt_elfo32.c index 202e581e6096..7fdf1de0447f 100644 --- a/arch/mips/kernel/binfmt_elfo32.c +++ b/arch/mips/kernel/binfmt_elfo32.c @@ -58,12 +58,6 @@ typedef elf_fpreg_t elf_fpregset_t[ELF_NFPREG]; #include -/* - * When this file is selected, we are definitely running a 64bit kernel. - * So using the right regs define in asm/reg.h - */ -#define WANT_COMPAT_REG_H - /* These MUST be defined before elf.h gets included */ extern void elf32_core_copy_regs(elf_gregset_t grp, struct pt_regs *regs); #define ELF_CORE_COPY_REGS(_dest, _regs) elf32_core_copy_regs(_dest, _regs); @@ -135,21 +129,21 @@ void elf32_core_copy_regs(elf_gregset_t grp, struct pt_regs *regs) { int i; - for (i = 0; i < EF_R0; i++) + for (i = 0; i < MIPS32_EF_R0; i++) grp[i] = 0; - grp[EF_R0] = 0; + grp[MIPS32_EF_R0] = 0; for (i = 1; i <= 31; i++) - grp[EF_R0 + i] = (elf_greg_t) regs->regs[i]; - grp[EF_R26] = 0; - grp[EF_R27] = 0; - grp[EF_LO] = (elf_greg_t) regs->lo; - grp[EF_HI] = (elf_greg_t) regs->hi; - grp[EF_CP0_EPC] = (elf_greg_t) regs->cp0_epc; - grp[EF_CP0_BADVADDR] = (elf_greg_t) regs->cp0_badvaddr; - grp[EF_CP0_STATUS] = (elf_greg_t) regs->cp0_status; - grp[EF_CP0_CAUSE] = (elf_greg_t) regs->cp0_cause; -#ifdef EF_UNUSED0 - grp[EF_UNUSED0] = 0; + grp[MIPS32_EF_R0 + i] = (elf_greg_t) regs->regs[i]; + grp[MIPS32_EF_R26] = 0; + grp[MIPS32_EF_R27] = 0; + grp[MIPS32_EF_LO] = (elf_greg_t) regs->lo; + grp[MIPS32_EF_HI] = (elf_greg_t) regs->hi; + grp[MIPS32_EF_CP0_EPC] = (elf_greg_t) regs->cp0_epc; + grp[MIPS32_EF_CP0_BADVADDR] = (elf_greg_t) regs->cp0_badvaddr; + grp[MIPS32_EF_CP0_STATUS] = (elf_greg_t) regs->cp0_status; + grp[MIPS32_EF_CP0_CAUSE] = (elf_greg_t) regs->cp0_cause; +#ifdef MIPS32_EF_UNUSED0 + grp[MIPS32_EF_UNUSED0] = 0; #endif } From 1b91a02f6af6554850568fb1d2716c1b27d1c503 Mon Sep 17 00:00:00 2001 From: Ralf Baechle Date: Wed, 29 May 2013 01:02:18 +0200 Subject: [PATCH 24/74] MIPS: Cleanup flags in syscall flags handlers. commit e7f3b48af7be9f8007a224663a5b91340626fed5 upstream. This will simplify further modifications. Signed-off-by: Ralf Baechle Signed-off-by: Greg Kroah-Hartman --- arch/mips/include/asm/thread_info.h | 2 ++ arch/mips/kernel/scall32-o32.S | 2 +- arch/mips/kernel/scall64-64.S | 2 +- arch/mips/kernel/scall64-n32.S | 2 +- arch/mips/kernel/scall64-o32.S | 2 +- 5 files changed, 6 insertions(+), 4 deletions(-) diff --git a/arch/mips/include/asm/thread_info.h b/arch/mips/include/asm/thread_info.h index 895320e25662..cdea4f65b944 100644 --- a/arch/mips/include/asm/thread_info.h +++ b/arch/mips/include/asm/thread_info.h @@ -131,6 +131,8 @@ static inline struct thread_info *current_thread_info(void) #define _TIF_FPUBOUND (1< yes diff --git a/arch/mips/kernel/scall64-64.S b/arch/mips/kernel/scall64-64.S index 97a5909a61cf..be6627ead619 100644 --- a/arch/mips/kernel/scall64-64.S +++ b/arch/mips/kernel/scall64-64.S @@ -54,7 +54,7 @@ NESTED(handle_sys64, PT_SIZE, sp) sd a3, PT_R26(sp) # save a3 for syscall restarting - li t1, _TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT + li t1, _TIF_WORK_SYSCALL_ENTRY LONG_L t0, TI_FLAGS($28) # syscall tracing enabled? and t0, t1, t0 bnez t0, syscall_trace_entry diff --git a/arch/mips/kernel/scall64-n32.S b/arch/mips/kernel/scall64-n32.S index edcb6594e7b5..cab150789c8d 100644 --- a/arch/mips/kernel/scall64-n32.S +++ b/arch/mips/kernel/scall64-n32.S @@ -47,7 +47,7 @@ NESTED(handle_sysn32, PT_SIZE, sp) sd a3, PT_R26(sp) # save a3 for syscall restarting - li t1, _TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT + li t1, _TIF_WORK_SYSCALL_ENTRY LONG_L t0, TI_FLAGS($28) # syscall tracing enabled? and t0, t1, t0 bnez t0, n32_syscall_trace_entry diff --git a/arch/mips/kernel/scall64-o32.S b/arch/mips/kernel/scall64-o32.S index 74f485d3c0ef..37605dc8eef7 100644 --- a/arch/mips/kernel/scall64-o32.S +++ b/arch/mips/kernel/scall64-o32.S @@ -81,7 +81,7 @@ NESTED(handle_sys, PT_SIZE, sp) PTR 4b, bad_stack .previous - li t1, _TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT + li t1, _TIF_WORK_SYSCALL_ENTRY LONG_L t0, TI_FLAGS($28) # syscall tracing enabled? and t0, t1, t0 bnez t0, trace_a_syscall From 4fc5ea5e141b9874db74af31befd6a953972b7c0 Mon Sep 17 00:00:00 2001 From: Markos Chandras Date: Wed, 22 Jan 2014 14:40:00 +0000 Subject: [PATCH 25/74] MIPS: asm: thread_info: Add _TIF_SECCOMP flag commit 137f7df8cead00688524c82360930845396b8a21 upstream. Add _TIF_SECCOMP flag to _TIF_WORK_SYSCALL_ENTRY to indicate that the system call needs to be checked against a seccomp filter. Signed-off-by: Markos Chandras Reviewed-by: Paul Burton Reviewed-by: James Hogan Cc: linux-mips@linux-mips.org Patchwork: https://patchwork.linux-mips.org/patch/6405/ Signed-off-by: Ralf Baechle [bwh: Backported to 3.2: various other flags are not included in _TIF_WORK_SYSCALL_ENTRY] Signed-off-by: Ben Hutchings Signed-off-by: Greg Kroah-Hartman --- arch/mips/include/asm/thread_info.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/mips/include/asm/thread_info.h b/arch/mips/include/asm/thread_info.h index cdea4f65b944..e6e5d9162213 100644 --- a/arch/mips/include/asm/thread_info.h +++ b/arch/mips/include/asm/thread_info.h @@ -131,7 +131,7 @@ static inline struct thread_info *current_thread_info(void) #define _TIF_FPUBOUND (1< Date: Tue, 22 Jul 2014 14:51:08 +0300 Subject: [PATCH 26/74] MIPS: OCTEON: make get_system_type() thread-safe commit 608308682addfdc7b8e2aee88f0e028331d88e4d upstream. get_system_type() is not thread-safe on OCTEON. It uses static data, also more dangerous issue is that it's calling cvmx_fuse_read_byte() every time without any synchronization. Currently it's possible to get processes stuck looping forever in kernel simply by launching multiple readers of /proc/cpuinfo: (while true; do cat /proc/cpuinfo > /dev/null; done) & (while true; do cat /proc/cpuinfo > /dev/null; done) & ... Fix by initializing the system type string only once during the early boot. Signed-off-by: Aaro Koskinen Reviewed-by: Markos Chandras Patchwork: http://patchwork.linux-mips.org/patch/7437/ Signed-off-by: James Hogan Signed-off-by: Greg Kroah-Hartman --- arch/mips/cavium-octeon/setup.c | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/arch/mips/cavium-octeon/setup.c b/arch/mips/cavium-octeon/setup.c index 2a75ff249e71..6430e7acb1eb 100644 --- a/arch/mips/cavium-octeon/setup.c +++ b/arch/mips/cavium-octeon/setup.c @@ -463,6 +463,18 @@ static void octeon_halt(void) octeon_kill_core(NULL); } +static char __read_mostly octeon_system_type[80]; + +static int __init init_octeon_system_type(void) +{ + snprintf(octeon_system_type, sizeof(octeon_system_type), "%s (%s)", + cvmx_board_type_to_string(octeon_bootinfo->board_type), + octeon_model_get_string(read_c0_prid())); + + return 0; +} +early_initcall(init_octeon_system_type); + /** * Handle all the error condition interrupts that might occur. * @@ -482,11 +494,7 @@ static irqreturn_t octeon_rlm_interrupt(int cpl, void *dev_id) */ const char *octeon_board_type_string(void) { - static char name[80]; - sprintf(name, "%s (%s)", - cvmx_board_type_to_string(octeon_bootinfo->board_type), - octeon_model_get_string(read_c0_prid())); - return name; + return octeon_system_type; } const char *get_system_type(void) From 4f91cb537d2f7fa700a2b6d86a2cc77d20ee2616 Mon Sep 17 00:00:00 2001 From: Ralf Baechle Date: Tue, 17 Sep 2013 12:44:31 +0200 Subject: [PATCH 27/74] MIPS: Fix accessing to per-cpu data when flushing the cache commit ff522058bd717506b2fa066fa564657f2b86477e upstream. This fixes the following issue BUG: using smp_processor_id() in preemptible [00000000] code: kjournald/1761 caller is blast_dcache32+0x30/0x254 Call Trace: [<8047f02c>] dump_stack+0x8/0x34 [<802e7e40>] debug_smp_processor_id+0xe0/0xf0 [<80114d94>] blast_dcache32+0x30/0x254 [<80118484>] r4k_dma_cache_wback_inv+0x200/0x288 [<80110ff0>] mips_dma_map_sg+0x108/0x180 [<80355098>] ide_dma_prepare+0xf0/0x1b8 [<8034eaa4>] do_rw_taskfile+0x1e8/0x33c [<8035951c>] ide_do_rw_disk+0x298/0x3e4 [<8034a3c4>] do_ide_request+0x2e0/0x704 [<802bb0dc>] __blk_run_queue+0x44/0x64 [<802be000>] queue_unplugged.isra.36+0x1c/0x54 [<802beb94>] blk_flush_plug_list+0x18c/0x24c [<802bec6c>] blk_finish_plug+0x18/0x48 [<8026554c>] journal_commit_transaction+0x3b8/0x151c [<80269648>] kjournald+0xec/0x238 [<8014ac00>] kthread+0xb8/0xc0 [<8010268c>] ret_from_kernel_thread+0x14/0x1c Caches in most systems are identical - but not always, so we can't avoid the use of smp_call_function() by just looking at the boot CPU's data, have to fiddle with preemption instead. Signed-off-by: Ralf Baechle Cc: Markos Chandras Cc: linux-mips@linux-mips.org Patchwork: https://patchwork.linux-mips.org/patch/5835 Cc: Guenter Roeck Signed-off-by: Greg Kroah-Hartman --- arch/mips/mm/c-r4k.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/arch/mips/mm/c-r4k.c b/arch/mips/mm/c-r4k.c index 21813beec7a5..5495101d32c8 100644 --- a/arch/mips/mm/c-r4k.c +++ b/arch/mips/mm/c-r4k.c @@ -12,6 +12,7 @@ #include #include #include +#include #include #include #include @@ -601,6 +602,7 @@ static void r4k_dma_cache_wback_inv(unsigned long addr, unsigned long size) /* Catch bad driver code */ BUG_ON(size == 0); + preempt_disable(); if (cpu_has_inclusive_pcaches) { if (size >= scache_size) r4k_blast_scache(); @@ -621,6 +623,7 @@ static void r4k_dma_cache_wback_inv(unsigned long addr, unsigned long size) R4600_HIT_CACHEOP_WAR_IMPL; blast_dcache_range(addr, addr + size); } + preempt_enable(); bc_wback_inv(addr, size); __sync(); @@ -631,6 +634,7 @@ static void r4k_dma_cache_inv(unsigned long addr, unsigned long size) /* Catch bad driver code */ BUG_ON(size == 0); + preempt_disable(); if (cpu_has_inclusive_pcaches) { if (size >= scache_size) r4k_blast_scache(); @@ -655,6 +659,7 @@ static void r4k_dma_cache_inv(unsigned long addr, unsigned long size) R4600_HIT_CACHEOP_WAR_IMPL; blast_inv_dcache_range(addr, addr + size); } + preempt_enable(); bc_inv(addr, size); __sync(); From af7b15c9d60584ad34b2ac1641953229ac6d1ba8 Mon Sep 17 00:00:00 2001 From: Jonas Bonn Date: Sun, 19 Feb 2012 17:36:53 +0100 Subject: [PATCH 28/74] openrisc: Rework signal handling commit 10f67dbf6add97751050f294d4c8e0cc1e5c2c23 upstream. The mainline signal handling code for OpenRISC has been buggy since day one with respect to syscall restart. This patch significantly reworks the signal handling code: i) Move the "work pending" loop to C code (borrowed from ARM arch) ii) Allow a tracer to muck about with the IP and skip syscall restart in that case (again, borrowed from ARM) iii) Make signal handling WRT syscall restart actually work v) Make the signal handling code look more like that of other architectures so that it's easier for others to follow Reported-by: Anders Nystrom Signed-off-by: Jonas Bonn Cc: Guenter Roeck Signed-off-by: Greg Kroah-Hartman --- arch/openrisc/kernel/entry.S | 63 ++++++----- arch/openrisc/kernel/signal.c | 202 ++++++++++++++++++---------------- 2 files changed, 143 insertions(+), 122 deletions(-) diff --git a/arch/openrisc/kernel/entry.S b/arch/openrisc/kernel/entry.S index d8a455ede5a7..fec8bf97d806 100644 --- a/arch/openrisc/kernel/entry.S +++ b/arch/openrisc/kernel/entry.S @@ -853,38 +853,45 @@ UNHANDLED_EXCEPTION(_vector_0x1f00,0x1f00) /* ========================================================[ return ] === */ -_work_pending: - /* - * if (current_thread_info->flags & _TIF_NEED_RESCHED) - * schedule(); - */ - l.lwz r5,TI_FLAGS(r10) - l.andi r3,r5,_TIF_NEED_RESCHED - l.sfnei r3,0 - l.bnf _work_notifysig - l.nop - l.jal schedule - l.nop - l.j _resume_userspace - l.nop - -/* Handle pending signals and notify-resume requests. - * do_notify_resume must be passed the latest pushed pt_regs, not - * necessarily the "userspace" ones. Also, pt_regs->syscallno - * must be set so that the syscall restart functionality works. - */ -_work_notifysig: - l.jal do_notify_resume - l.ori r3,r1,0 /* pt_regs */ - _resume_userspace: DISABLE_INTERRUPTS(r3,r4) - l.lwz r3,TI_FLAGS(r10) - l.andi r3,r3,_TIF_WORK_MASK - l.sfnei r3,0 - l.bf _work_pending + l.lwz r4,TI_FLAGS(r10) + l.andi r13,r4,_TIF_WORK_MASK + l.sfeqi r13,0 + l.bf _restore_all l.nop +_work_pending: + l.lwz r5,PT_ORIG_GPR11(r1) + l.sfltsi r5,0 + l.bnf 1f + l.nop + l.andi r5,r5,0 +1: + l.jal do_work_pending + l.ori r3,r1,0 /* pt_regs */ + + l.sfeqi r11,0 + l.bf _restore_all + l.nop + l.sfltsi r11,0 + l.bnf 1f + l.nop + l.and r11,r11,r0 + l.ori r11,r11,__NR_restart_syscall + l.j _syscall_check_trace_enter + l.nop +1: + l.lwz r11,PT_ORIG_GPR11(r1) + /* Restore arg registers */ + l.lwz r3,PT_GPR3(r1) + l.lwz r4,PT_GPR4(r1) + l.lwz r5,PT_GPR5(r1) + l.lwz r6,PT_GPR6(r1) + l.lwz r7,PT_GPR7(r1) + l.j _syscall_check_trace_enter + l.lwz r8,PT_GPR8(r1) + _restore_all: RESTORE_ALL /* This returns to userspace code */ diff --git a/arch/openrisc/kernel/signal.c b/arch/openrisc/kernel/signal.c index ae167f7e081a..c277ec82783d 100644 --- a/arch/openrisc/kernel/signal.c +++ b/arch/openrisc/kernel/signal.c @@ -28,24 +28,24 @@ #include #include +#include #include #include #define DEBUG_SIG 0 struct rt_sigframe { - struct siginfo *pinfo; - void *puc; struct siginfo info; struct ucontext uc; unsigned char retcode[16]; /* trampoline code */ }; -static int restore_sigcontext(struct pt_regs *regs, struct sigcontext *sc) +static int restore_sigcontext(struct pt_regs *regs, + struct sigcontext __user *sc) { - unsigned int err = 0; + int err = 0; - /* Alwys make any pending restarted system call return -EINTR */ + /* Always make any pending restarted system calls return -EINTR */ current_thread_info()->restart_block.fn = do_no_restart_syscall; /* @@ -53,25 +53,21 @@ static int restore_sigcontext(struct pt_regs *regs, struct sigcontext *sc) * (sc is already checked for VERIFY_READ since the sigframe was * checked in sys_sigreturn previously) */ - if (__copy_from_user(regs, sc->regs.gpr, 32 * sizeof(unsigned long))) - goto badframe; - if (__copy_from_user(®s->pc, &sc->regs.pc, sizeof(unsigned long))) - goto badframe; - if (__copy_from_user(®s->sr, &sc->regs.sr, sizeof(unsigned long))) - goto badframe; + err |= __copy_from_user(regs, sc->regs.gpr, 32 * sizeof(unsigned long)); + err |= __copy_from_user(®s->pc, &sc->regs.pc, sizeof(unsigned long)); + err |= __copy_from_user(®s->sr, &sc->regs.sr, sizeof(unsigned long)); /* make sure the SM-bit is cleared so user-mode cannot fool us */ regs->sr &= ~SPR_SR_SM; + regs->orig_gpr11 = -1; /* Avoid syscall restart checks */ + /* TODO: the other ports use regs->orig_XX to disable syscall checks * after this completes, but we don't use that mechanism. maybe we can * use it now ? */ return err; - -badframe: - return 1; } asmlinkage long _sys_rt_sigreturn(struct pt_regs *regs) @@ -111,21 +107,18 @@ asmlinkage long _sys_rt_sigreturn(struct pt_regs *regs) * Set up a signal frame. */ -static int setup_sigcontext(struct sigcontext *sc, struct pt_regs *regs, - unsigned long mask) +static int setup_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc) { int err = 0; /* copy the regs */ - + /* There should be no need to save callee-saved registers here... + * ...but we save them anyway. Revisit this + */ err |= __copy_to_user(sc->regs.gpr, regs, 32 * sizeof(unsigned long)); err |= __copy_to_user(&sc->regs.pc, ®s->pc, sizeof(unsigned long)); err |= __copy_to_user(&sc->regs.sr, ®s->sr, sizeof(unsigned long)); - /* then some other stuff */ - - err |= __put_user(mask, &sc->oldmask); - return err; } @@ -181,24 +174,18 @@ static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, int err = 0; frame = get_sigframe(ka, regs, sizeof(*frame)); - if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame))) goto give_sigsegv; - err |= __put_user(&frame->info, &frame->pinfo); - err |= __put_user(&frame->uc, &frame->puc); - + /* Create siginfo. */ if (ka->sa.sa_flags & SA_SIGINFO) err |= copy_siginfo_to_user(&frame->info, info); - if (err) - goto give_sigsegv; - /* Clear all the bits of the ucontext we don't use. */ - err |= __clear_user(&frame->uc, offsetof(struct ucontext, uc_mcontext)); + /* Create the ucontext. */ err |= __put_user(0, &frame->uc.uc_flags); err |= __put_user(NULL, &frame->uc.uc_link); err |= __save_altstack(&frame->uc.uc_stack, regs->sp); - err |= setup_sigcontext(&frame->uc.uc_mcontext, regs, set->sig[0]); + err |= setup_sigcontext(regs, &frame->uc.uc_mcontext); err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set)); @@ -207,9 +194,12 @@ static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, /* trampoline - the desired return ip is the retcode itself */ return_ip = (unsigned long)&frame->retcode; - /* This is l.ori r11,r0,__NR_sigreturn, l.sys 1 */ - err |= __put_user(0xa960, (short *)(frame->retcode + 0)); - err |= __put_user(__NR_rt_sigreturn, (short *)(frame->retcode + 2)); + /* This is: + l.ori r11,r0,__NR_sigreturn + l.sys 1 + */ + err |= __put_user(0xa960, (short *)(frame->retcode + 0)); + err |= __put_user(__NR_rt_sigreturn, (short *)(frame->retcode + 2)); err |= __put_user(0x20000001, (unsigned long *)(frame->retcode + 4)); err |= __put_user(0x15000000, (unsigned long *)(frame->retcode + 8)); @@ -262,82 +252,106 @@ handle_signal(unsigned long sig, * mode below. */ -void do_signal(struct pt_regs *regs) +int do_signal(struct pt_regs *regs, int syscall) { siginfo_t info; int signr; struct k_sigaction ka; + unsigned long continue_addr = 0; + unsigned long restart_addr = 0; + unsigned long retval = 0; + int restart = 0; + + if (syscall) { + continue_addr = regs->pc; + restart_addr = continue_addr - 4; + retval = regs->gpr[11]; + + /* + * Setup syscall restart here so that a debugger will + * see the already changed PC. + */ + switch (retval) { + case -ERESTART_RESTARTBLOCK: + restart = -2; + /* Fall through */ + case -ERESTARTNOHAND: + case -ERESTARTSYS: + case -ERESTARTNOINTR: + restart++; + regs->gpr[11] = regs->orig_gpr11; + regs->pc = restart_addr; + break; + } + } /* - * We want the common case to go fast, which - * is why we may in certain cases get here from - * kernel mode. Just return without doing anything - * if so. + * Get the signal to deliver. When running under ptrace, at this + * point the debugger may change all our registers ... */ - if (!user_mode(regs)) - return; - signr = get_signal_to_deliver(&info, &ka, regs, NULL); - - /* If we are coming out of a syscall then we need - * to check if the syscall was interrupted and wants to be - * restarted after handling the signal. If so, the original - * syscall number is put back into r11 and the PC rewound to - * point at the l.sys instruction that resulted in the - * original syscall. Syscall results other than the four - * below mean that the syscall executed to completion and no - * restart is necessary. + /* + * Depending on the signal settings we may need to revert the + * decision to restart the system call. But skip this if a + * debugger has chosen to restart at a different PC. */ - if (regs->orig_gpr11) { - int restart = 0; - - switch (regs->gpr[11]) { - case -ERESTART_RESTARTBLOCK: - case -ERESTARTNOHAND: - /* Restart if there is no signal handler */ - restart = (signr <= 0); - break; - case -ERESTARTSYS: - /* Restart if there no signal handler or - * SA_RESTART flag is set */ - restart = (signr <= 0 || (ka.sa.sa_flags & SA_RESTART)); - break; - case -ERESTARTNOINTR: - /* Always restart */ - restart = 1; - break; + if (signr > 0) { + if (unlikely(restart) && regs->pc == restart_addr) { + if (retval == -ERESTARTNOHAND || + retval == -ERESTART_RESTARTBLOCK + || (retval == -ERESTARTSYS + && !(ka.sa.sa_flags & SA_RESTART))) { + /* No automatic restart */ + regs->gpr[11] = -EINTR; + regs->pc = continue_addr; + } } - if (restart) { - if (regs->gpr[11] == -ERESTART_RESTARTBLOCK) - regs->gpr[11] = __NR_restart_syscall; - else - regs->gpr[11] = regs->orig_gpr11; - regs->pc -= 4; - } else { - regs->gpr[11] = -EINTR; - } - } - - if (signr <= 0) { - /* no signal to deliver so we just put the saved sigmask - * back */ - restore_saved_sigmask(); - } else { /* signr > 0 */ - /* Whee! Actually deliver the signal. */ handle_signal(signr, &info, &ka, regs); + } else { + /* no handler */ + restore_saved_sigmask(); + /* + * Restore pt_regs PC as syscall restart will be handled by + * kernel without return to userspace + */ + if (unlikely(restart) && regs->pc == restart_addr) { + regs->pc = continue_addr; + return restart; + } } - return; + return 0; } -asmlinkage void do_notify_resume(struct pt_regs *regs) +asmlinkage int +do_work_pending(struct pt_regs *regs, unsigned int thread_flags, int syscall) { - if (current_thread_info()->flags & _TIF_SIGPENDING) - do_signal(regs); - - if (current_thread_info()->flags & _TIF_NOTIFY_RESUME) { - clear_thread_flag(TIF_NOTIFY_RESUME); - tracehook_notify_resume(regs); - } + do { + if (likely(thread_flags & _TIF_NEED_RESCHED)) { + schedule(); + } else { + if (unlikely(!user_mode(regs))) + return 0; + local_irq_enable(); + if (thread_flags & _TIF_SIGPENDING) { + int restart = do_signal(regs, syscall); + if (unlikely(restart)) { + /* + * Restart without handlers. + * Deal with it without leaving + * the kernel space. + */ + return restart; + } + syscall = 0; + } else { + clear_thread_flag(TIF_NOTIFY_RESUME); + tracehook_notify_resume(regs); + } + } + local_irq_disable(); + thread_flags = current_thread_info()->flags; + } while (thread_flags & _TIF_WORK_MASK); + return 0; } From 24be9aa61fa39fb973d5f7079a382524e459972c Mon Sep 17 00:00:00 2001 From: Qiao Zhou Date: Wed, 4 Jun 2014 19:42:06 +0800 Subject: [PATCH 29/74] ASoC: pcm: fix dpcm_path_put in dpcm runtime update commit 7ed9de76ff342cbd717a9cf897044b99272cb8f8 upstream. we need to release dapm widget list after dpcm_path_get in soc_dpcm_runtime_update. otherwise, there will be potential memory leak. add dpcm_path_put to fix it. Signed-off-by: Qiao Zhou Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- sound/soc/soc-pcm.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/soc/soc-pcm.c b/sound/soc/soc-pcm.c index ccb6be4d658d..02d26915b61d 100644 --- a/sound/soc/soc-pcm.c +++ b/sound/soc/soc-pcm.c @@ -1886,6 +1886,7 @@ int soc_dpcm_runtime_update(struct snd_soc_dapm_widget *widget) dpcm_be_disconnect(fe, SNDRV_PCM_STREAM_PLAYBACK); } + dpcm_path_put(&list); capture: /* skip if FE doesn't have capture capability */ if (!fe->cpu_dai->driver->capture.channels_min) From f3327152958aeab0c3dc9ae115e171141eabe2ab Mon Sep 17 00:00:00 2001 From: Praveen Diwakar Date: Fri, 4 Jul 2014 11:17:41 +0530 Subject: [PATCH 30/74] ASoC: wm_adsp: Add missing MODULE_LICENSE commit 0a37c6efec4a2fdc2563c5a8faa472b814deee80 upstream. Since MODULE_LICENSE is missing the module load fails, so add this for module. Signed-off-by: Praveen Diwakar Signed-off-by: Vinod Koul Reviewed-by: Charles Keepax Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- sound/soc/codecs/wm_adsp.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sound/soc/codecs/wm_adsp.c b/sound/soc/codecs/wm_adsp.c index 6dbb17d050c9..ca1e999026e5 100644 --- a/sound/soc/codecs/wm_adsp.c +++ b/sound/soc/codecs/wm_adsp.c @@ -1284,3 +1284,5 @@ int wm_adsp2_init(struct wm_adsp *adsp, bool dvfs) return 0; } EXPORT_SYMBOL_GPL(wm_adsp2_init); + +MODULE_LICENSE("GPL v2"); From ef49cea359559aa9065226ca331c11ddc6a5327f Mon Sep 17 00:00:00 2001 From: Sylwester Nawrocki Date: Fri, 4 Jul 2014 16:05:45 +0200 Subject: [PATCH 31/74] ASoC: samsung: Correct I2S DAI suspend/resume ops commit d3d4e5247b013008a39e4d5f69ce4c60ed57f997 upstream. We should save/restore relevant I2S registers regardless of the dai->active flag, otherwise some settings are being lost after system suspend/resume cycle. E.g. I2S slave mode set only during dai initialization is not preserved and the device ends up in master mode after system resume. Signed-off-by: Sylwester Nawrocki Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- sound/soc/samsung/i2s.c | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/sound/soc/samsung/i2s.c b/sound/soc/samsung/i2s.c index 82ebb1a51479..5c9b5e4f94c3 100644 --- a/sound/soc/samsung/i2s.c +++ b/sound/soc/samsung/i2s.c @@ -853,11 +853,9 @@ static int i2s_suspend(struct snd_soc_dai *dai) { struct i2s_dai *i2s = to_info(dai); - if (dai->active) { - i2s->suspend_i2smod = readl(i2s->addr + I2SMOD); - i2s->suspend_i2scon = readl(i2s->addr + I2SCON); - i2s->suspend_i2spsr = readl(i2s->addr + I2SPSR); - } + i2s->suspend_i2smod = readl(i2s->addr + I2SMOD); + i2s->suspend_i2scon = readl(i2s->addr + I2SCON); + i2s->suspend_i2spsr = readl(i2s->addr + I2SPSR); return 0; } @@ -866,11 +864,9 @@ static int i2s_resume(struct snd_soc_dai *dai) { struct i2s_dai *i2s = to_info(dai); - if (dai->active) { - writel(i2s->suspend_i2scon, i2s->addr + I2SCON); - writel(i2s->suspend_i2smod, i2s->addr + I2SMOD); - writel(i2s->suspend_i2spsr, i2s->addr + I2SPSR); - } + writel(i2s->suspend_i2scon, i2s->addr + I2SCON); + writel(i2s->suspend_i2smod, i2s->addr + I2SMOD); + writel(i2s->suspend_i2spsr, i2s->addr + I2SPSR); return 0; } From f084c9428b1fa9a7df201a312fe708a286252240 Mon Sep 17 00:00:00 2001 From: Jarkko Nikula Date: Thu, 19 Jun 2014 09:32:05 +0300 Subject: [PATCH 32/74] ASoC: max98090: Fix missing free_irq commit 4adeb0ccf86a5af1825bbfe290dee9e60a5ab870 upstream. max98090.c doesn't free the threaded interrupt it requests. This causes an oops when doing "cat /proc/interrupts" after snd-soc-max98090.ko is unloaded. Fix this by requesting the interrupt by using devm_request_threaded_irq(). Signed-off-by: Jarkko Nikula Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- sound/soc/codecs/max98090.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/codecs/max98090.c b/sound/soc/codecs/max98090.c index 9b7746c9546f..76bfeb3c3e30 100644 --- a/sound/soc/codecs/max98090.c +++ b/sound/soc/codecs/max98090.c @@ -2234,7 +2234,7 @@ static int max98090_probe(struct snd_soc_codec *codec) /* Register for interrupts */ dev_dbg(codec->dev, "irq = %d\n", max98090->irq); - ret = request_threaded_irq(max98090->irq, NULL, + ret = devm_request_threaded_irq(codec->dev, max98090->irq, NULL, max98090_interrupt, IRQF_TRIGGER_FALLING | IRQF_ONESHOT, "max98090_interrupt", codec); if (ret < 0) { From f4d475ab09e057063a8a399b6ebb4c42bc22dd07 Mon Sep 17 00:00:00 2001 From: Daniel Mack Date: Wed, 13 Aug 2014 21:51:06 +0200 Subject: [PATCH 33/74] ASoC: pxa-ssp: drop SNDRV_PCM_FMTBIT_S24_LE commit 9301503af016eb537ccce76adec0c1bb5c84871e upstream. This mode is unsupported, as the DMA controller can't do zero-padding of samples. Signed-off-by: Daniel Mack Reported-by: Johannes Stezenbach Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- sound/soc/pxa/pxa-ssp.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/sound/soc/pxa/pxa-ssp.c b/sound/soc/pxa/pxa-ssp.c index 6f4dd7543e82..95a9b07bbe96 100644 --- a/sound/soc/pxa/pxa-ssp.c +++ b/sound/soc/pxa/pxa-ssp.c @@ -757,9 +757,7 @@ static int pxa_ssp_remove(struct snd_soc_dai *dai) SNDRV_PCM_RATE_48000 | SNDRV_PCM_RATE_64000 | \ SNDRV_PCM_RATE_88200 | SNDRV_PCM_RATE_96000) -#define PXA_SSP_FORMATS (SNDRV_PCM_FMTBIT_S16_LE |\ - SNDRV_PCM_FMTBIT_S24_LE | \ - SNDRV_PCM_FMTBIT_S32_LE) +#define PXA_SSP_FORMATS (SNDRV_PCM_FMTBIT_S16_LE | SNDRV_PCM_FMTBIT_S32_LE) static const struct snd_soc_dai_ops pxa_ssp_dai_ops = { .startup = pxa_ssp_startup, From db065663add6a78c8054b11e41c30cd045316437 Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Sun, 8 Jun 2014 23:33:25 +0100 Subject: [PATCH 34/74] bfa: Fix undefined bit shift on big-endian architectures with 32-bit DMA address commit 03a6c3ff3282ee9fa893089304d951e0be93a144 upstream. bfa_swap_words() shifts its argument (assumed to be 64-bit) by 32 bits each way. In two places the argument type is dma_addr_t, which may be 32-bit, in which case the effect of the bit shift is undefined: drivers/scsi/bfa/bfa_fcpim.c: In function 'bfa_ioim_send_ioreq': drivers/scsi/bfa/bfa_fcpim.c:2497:4: warning: left shift count >= width of type [enabled by default] addr = bfa_sgaddr_le(sg_dma_address(sg)); ^ drivers/scsi/bfa/bfa_fcpim.c:2497:4: warning: right shift count >= width of type [enabled by default] drivers/scsi/bfa/bfa_fcpim.c:2509:4: warning: left shift count >= width of type [enabled by default] addr = bfa_sgaddr_le(sg_dma_address(sg)); ^ drivers/scsi/bfa/bfa_fcpim.c:2509:4: warning: right shift count >= width of type [enabled by default] Avoid this by adding casts to u64 in bfa_swap_words(). Compile-tested only. Signed-off-by: Ben Hutchings Reviewed-by: Martin K. Petersen Acked-by: Anil Gurumurthy Fixes: f16a17507b09 ('[SCSI] bfa: remove all OS wrappers') Signed-off-by: Christoph Hellwig Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/bfa/bfa_ioc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/bfa/bfa_ioc.h b/drivers/scsi/bfa/bfa_ioc.h index 23a90e7b7107..a119421cb324 100644 --- a/drivers/scsi/bfa/bfa_ioc.h +++ b/drivers/scsi/bfa/bfa_ioc.h @@ -72,7 +72,7 @@ struct bfa_sge_s { } while (0) #define bfa_swap_words(_x) ( \ - ((_x) << 32) | ((_x) >> 32)) + ((u64)(_x) << 32) | ((u64)(_x) >> 32)) #ifdef __BIG_ENDIAN #define bfa_sge_to_be(_x) From b3e98f0c4f996cd53b80bad71f0d7e4a2cf3a4e8 Mon Sep 17 00:00:00 2001 From: "David E. Box" Date: Tue, 8 Jul 2014 10:05:52 +0800 Subject: [PATCH 35/74] ACPICA: Utilities: Fix memory leak in acpi_ut_copy_iobject_to_iobject commit 8aa5e56eeb61a099ea6519eb30ee399e1bc043ce upstream. Adds return status check on copy routines to delete the allocated destination object if either copy fails. Reported by Colin Ian King on bugs.acpica.org, Bug 1087. The last applicable commit: Commit: 3371c19c294a4cb3649aa4e84606be8a1d999e61 Subject: ACPICA: Remove ACPI_GET_OBJECT_TYPE macro Link: https://bugs.acpica.org/show_bug.cgi?id=1087 Reported-by: Colin Ian King Signed-off-by: David E. Box Signed-off-by: Bob Moore Signed-off-by: Lv Zheng Signed-off-by: Rafael J. Wysocki Signed-off-by: Greg Kroah-Hartman --- drivers/acpi/acpica/utcopy.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/acpi/acpica/utcopy.c b/drivers/acpi/acpica/utcopy.c index e4c9291fc0a3..a63a4cdd2ce8 100644 --- a/drivers/acpi/acpica/utcopy.c +++ b/drivers/acpi/acpica/utcopy.c @@ -998,5 +998,11 @@ acpi_ut_copy_iobject_to_iobject(union acpi_operand_object *source_desc, status = acpi_ut_copy_simple_object(source_desc, *dest_desc); } + /* Delete the allocated object if copy failed */ + + if (ACPI_FAILURE(status)) { + acpi_ut_remove_reference(*dest_desc); + } + return_ACPI_STATUS(status); } From c55d35d2b8fc84218785cf12dd3e895bc83af66d Mon Sep 17 00:00:00 2001 From: Lan Tianyu Date: Tue, 26 Aug 2014 01:29:24 +0200 Subject: [PATCH 36/74] ACPI: Run fixed event device notifications in process context commit 236105db632c6279a020f78c83e22eaef746006b upstream. Currently, notify callbacks for fixed button events are run from interrupt context. That is not necessary and after commit 0bf6368ee8f2 (ACPI / button: Add ACPI Button event via netlink routine) it causes netlink routines to be called from interrupt context which is not correct. Also, that is different from non-fixed device events (including non-fixed button events) whose notify callbacks are all executed from process context. For the above reasons, make fixed button device notify callbacks run in process context which will avoid the deadlock when using netlink to report button events to user space. Fixes: 0bf6368ee8f2 (ACPI / button: Add ACPI Button event via netlink routine) Link: https://lkml.org/lkml/2014/8/21/606 Reported-by: Benjamin Block Reported-by: Knut Petersen Signed-off-by: Lan Tianyu [rjw: Function names, subject and changelog.] Signed-off-by: Rafael J. Wysocki Signed-off-by: Greg Kroah-Hartman --- drivers/acpi/scan.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c index cca761e80d89..091682fb1617 100644 --- a/drivers/acpi/scan.c +++ b/drivers/acpi/scan.c @@ -769,12 +769,17 @@ static void acpi_device_notify(acpi_handle handle, u32 event, void *data) device->driver->ops.notify(device, event); } -static acpi_status acpi_device_notify_fixed(void *data) +static void acpi_device_notify_fixed(void *data) { struct acpi_device *device = data; /* Fixed hardware devices have no handles */ acpi_device_notify(NULL, ACPI_FIXED_HARDWARE_EVENT, device); +} + +static acpi_status acpi_device_fixed_event(void *data) +{ + acpi_os_execute(OSL_NOTIFY_HANDLER, acpi_device_notify_fixed, data); return AE_OK; } @@ -785,12 +790,12 @@ static int acpi_device_install_notify_handler(struct acpi_device *device) if (device->device_type == ACPI_BUS_TYPE_POWER_BUTTON) status = acpi_install_fixed_event_handler(ACPI_EVENT_POWER_BUTTON, - acpi_device_notify_fixed, + acpi_device_fixed_event, device); else if (device->device_type == ACPI_BUS_TYPE_SLEEP_BUTTON) status = acpi_install_fixed_event_handler(ACPI_EVENT_SLEEP_BUTTON, - acpi_device_notify_fixed, + acpi_device_fixed_event, device); else status = acpi_install_notify_handler(device->handle, @@ -807,10 +812,10 @@ static void acpi_device_remove_notify_handler(struct acpi_device *device) { if (device->device_type == ACPI_BUS_TYPE_POWER_BUTTON) acpi_remove_fixed_event_handler(ACPI_EVENT_POWER_BUTTON, - acpi_device_notify_fixed); + acpi_device_fixed_event); else if (device->device_type == ACPI_BUS_TYPE_SLEEP_BUTTON) acpi_remove_fixed_event_handler(ACPI_EVENT_SLEEP_BUTTON, - acpi_device_notify_fixed); + acpi_device_fixed_event); else acpi_remove_notify_handler(device->handle, ACPI_DEVICE_NOTIFY, acpi_device_notify); From 4f6a1e6210f5aeed2832d69103fc6511c0ca7c2d Mon Sep 17 00:00:00 2001 From: Jiri Kosina Date: Wed, 3 Sep 2014 15:04:28 +0200 Subject: [PATCH 37/74] ACPI / cpuidle: fix deadlock between cpuidle_lock and cpu_hotplug.lock commit 6726655dfdd2dc60c035c690d9f10cb69d7ea075 upstream. There is a following AB-BA dependency between cpu_hotplug.lock and cpuidle_lock: 1) cpu_hotplug.lock -> cpuidle_lock enable_nonboot_cpus() _cpu_up() cpu_hotplug_begin() LOCK(cpu_hotplug.lock) cpu_notify() ... acpi_processor_hotplug() cpuidle_pause_and_lock() LOCK(cpuidle_lock) 2) cpuidle_lock -> cpu_hotplug.lock acpi_os_execute_deferred() workqueue ... acpi_processor_cst_has_changed() cpuidle_pause_and_lock() LOCK(cpuidle_lock) get_online_cpus() LOCK(cpu_hotplug.lock) Fix this by reversing the order acpi_processor_cst_has_changed() does thigs -- let it first execute the protection against CPU hotplug by calling get_online_cpus() and obtain the cpuidle lock only after that (and perform the symmentric change when allowing CPUs hotplug again and dropping cpuidle lock). Spotted by lockdep. Signed-off-by: Jiri Kosina Signed-off-by: Rafael J. Wysocki Signed-off-by: Greg Kroah-Hartman --- drivers/acpi/processor_idle.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/acpi/processor_idle.c b/drivers/acpi/processor_idle.c index 4056d3175178..a88894190e41 100644 --- a/drivers/acpi/processor_idle.c +++ b/drivers/acpi/processor_idle.c @@ -1101,9 +1101,9 @@ int acpi_processor_cst_has_changed(struct acpi_processor *pr) if (pr->id == 0 && cpuidle_get_driver() == &acpi_idle_driver) { - cpuidle_pause_and_lock(); /* Protect against cpu-hotplug */ get_online_cpus(); + cpuidle_pause_and_lock(); /* Disable all cpuidle devices */ for_each_online_cpu(cpu) { @@ -1130,8 +1130,8 @@ int acpi_processor_cst_has_changed(struct acpi_processor *pr) cpuidle_enable_device(dev); } } - put_online_cpus(); cpuidle_resume_and_unlock(); + put_online_cpus(); } return 0; From 814aa5addf612498365a99dc844265eed232a700 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Wed, 6 Aug 2014 14:11:33 -0400 Subject: [PATCH 38/74] ring-buffer: Always reset iterator to reader page commit 651e22f2701b4113989237c3048d17337dd2185c upstream. When performing a consuming read, the ring buffer swaps out a page from the ring buffer with a empty page and this page that was swapped out becomes the new reader page. The reader page is owned by the reader and since it was swapped out of the ring buffer, writers do not have access to it (there's an exception to that rule, but it's out of scope for this commit). When reading the "trace" file, it is a non consuming read, which means that the data in the ring buffer will not be modified. When the trace file is opened, a ring buffer iterator is allocated and writes to the ring buffer are disabled, such that the iterator will not have issues iterating over the data. Although the ring buffer disabled writes, it does not disable other reads, or even consuming reads. If a consuming read happens, then the iterator is reset and starts reading from the beginning again. My tests would sometimes trigger this bug on my i386 box: WARNING: CPU: 0 PID: 5175 at kernel/trace/trace.c:1527 __trace_find_cmdline+0x66/0xaa() Modules linked in: CPU: 0 PID: 5175 Comm: grep Not tainted 3.16.0-rc3-test+ #8 Hardware name: /DG965MQ, BIOS MQ96510J.86A.0372.2006.0605.1717 06/05/2006 00000000 00000000 f09c9e1c c18796b3 c1b5d74c f09c9e4c c103a0e3 c1b5154b f09c9e78 00001437 c1b5d74c 000005f7 c10bd85a c10bd85a c1cac57c f09c9eb0 ed0e0000 f09c9e64 c103a185 00000009 f09c9e5c c1b5154b f09c9e78 f09c9e80^M Call Trace: [] dump_stack+0x4b/0x75 [] warn_slowpath_common+0x7e/0x95 [] ? __trace_find_cmdline+0x66/0xaa [] ? __trace_find_cmdline+0x66/0xaa [] warn_slowpath_fmt+0x33/0x35 [] __trace_find_cmdline+0x66/0xaa^M [] trace_find_cmdline+0x40/0x64 [] trace_print_context+0x27/0xec [] ? trace_seq_printf+0x37/0x5b [] print_trace_line+0x319/0x39b [] ? ring_buffer_read+0x47/0x50 [] s_show+0x192/0x1ab [] ? s_next+0x5a/0x7c [] seq_read+0x267/0x34c [] vfs_read+0x8c/0xef [] ? seq_lseek+0x154/0x154 [] SyS_read+0x54/0x7f [] syscall_call+0x7/0xb ---[ end trace 3f507febd6b4cc83 ]--- >>>> ##### CPU 1 buffer started #### Which was the __trace_find_cmdline() function complaining about the pid in the event record being negative. After adding more test cases, this would trigger more often. Strangely enough, it would never trigger on a single test, but instead would trigger only when running all the tests. I believe that was the case because it required one of the tests to be shutting down via delayed instances while a new test started up. After spending several days debugging this, I found that it was caused by the iterator becoming corrupted. Debugging further, I found out why the iterator became corrupted. It happened with the rb_iter_reset(). As consuming reads may not read the full reader page, and only part of it, there's a "read" field to know where the last read took place. The iterator, must also start at the read position. In the rb_iter_reset() code, if the reader page was disconnected from the ring buffer, the iterator would start at the head page within the ring buffer (where writes still happen). But the mistake there was that it still used the "read" field to start the iterator on the head page, where it should always start at zero because readers never read from within the ring buffer where writes occur. I originally wrote a patch to have it set the iter->head to 0 instead of iter->head_page->read, but then I questioned why it wasn't always setting the iter to point to the reader page, as the reader page is still valid. The list_empty(reader_page->list) just means that it was successful in swapping out. But the reader_page may still have data. There was a bug report a long time ago that was not reproducible that had something about trace_pipe (consuming read) not matching trace (iterator read). This may explain why that happened. Anyway, the correct answer to this bug is to always use the reader page an not reset the iterator to inside the writable ring buffer. Fixes: d769041f8653 "ring_buffer: implement new locking" Signed-off-by: Steven Rostedt Signed-off-by: Greg Kroah-Hartman --- kernel/trace/ring_buffer.c | 17 ++++++----------- 1 file changed, 6 insertions(+), 11 deletions(-) diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 4063d5fe5e44..933d1838488b 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -3353,21 +3353,16 @@ static void rb_iter_reset(struct ring_buffer_iter *iter) struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer; /* Iterator usage is expected to have record disabled */ - if (list_empty(&cpu_buffer->reader_page->list)) { - iter->head_page = rb_set_head_page(cpu_buffer); - if (unlikely(!iter->head_page)) - return; - iter->head = iter->head_page->read; - } else { - iter->head_page = cpu_buffer->reader_page; - iter->head = cpu_buffer->reader_page->read; - } + iter->head_page = cpu_buffer->reader_page; + iter->head = cpu_buffer->reader_page->read; + + iter->cache_reader_page = iter->head_page; + iter->cache_read = iter->head; + if (iter->head) iter->read_stamp = cpu_buffer->read_stamp; else iter->read_stamp = iter->head_page->page->time_stamp; - iter->cache_reader_page = cpu_buffer->reader_page; - iter->cache_read = cpu_buffer->read; } /** From 7f70b62ea0f72edae160ddf540723991b84c8279 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Wed, 6 Aug 2014 15:36:31 -0400 Subject: [PATCH 39/74] ring-buffer: Up rb_iter_peek() loop count to 3 commit 021de3d904b88b1771a3a2cfc5b75023c391e646 upstream. After writting a test to try to trigger the bug that caused the ring buffer iterator to become corrupted, I hit another bug: WARNING: CPU: 1 PID: 5281 at kernel/trace/ring_buffer.c:3766 rb_iter_peek+0x113/0x238() Modules linked in: ipt_MASQUERADE sunrpc [...] CPU: 1 PID: 5281 Comm: grep Tainted: G W 3.16.0-rc3-test+ #143 Hardware name: To Be Filled By O.E.M. To Be Filled By O.E.M./To be filled by O.E.M., BIOS SDBLI944.86P 05/08/2007 0000000000000000 ffffffff81809a80 ffffffff81503fb0 0000000000000000 ffffffff81040ca1 ffff8800796d6010 ffffffff810c138d ffff8800796d6010 ffff880077438c80 ffff8800796d6010 ffff88007abbe600 0000000000000003 Call Trace: [] ? dump_stack+0x4a/0x75 [] ? warn_slowpath_common+0x7e/0x97 [] ? rb_iter_peek+0x113/0x238 [] ? rb_iter_peek+0x113/0x238 [] ? ring_buffer_iter_peek+0x2d/0x5c [] ? tracing_iter_reset+0x6e/0x96 [] ? s_start+0xd7/0x17b [] ? kmem_cache_alloc_trace+0xda/0xea [] ? seq_read+0x148/0x361 [] ? vfs_read+0x93/0xf1 [] ? SyS_read+0x60/0x8e [] ? tracesys+0xdd/0xe2 Debugging this bug, which triggers when the rb_iter_peek() loops too many times (more than 2 times), I discovered there's a case that can cause that function to legitimately loop 3 times! rb_iter_peek() is different than rb_buffer_peek() as the rb_buffer_peek() only deals with the reader page (it's for consuming reads). The rb_iter_peek() is for traversing the buffer without consuming it, and as such, it can loop for one more reason. That is, if we hit the end of the reader page or any page, it will go to the next page and try again. That is, we have this: 1. iter->head > iter->head_page->page->commit (rb_inc_iter() which moves the iter to the next page) try again 2. event = rb_iter_head_event() event->type_len == RINGBUF_TYPE_TIME_EXTEND rb_advance_iter() try again 3. read the event. But we never get to 3, because the count is greater than 2 and we cause the WARNING and return NULL. Up the counter to 3. Fixes: 69d1b839f7ee "ring-buffer: Bind time extend and data events together" Signed-off-by: Steven Rostedt Signed-off-by: Greg Kroah-Hartman --- kernel/trace/ring_buffer.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 933d1838488b..5efbc122e5ce 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -1980,7 +1980,7 @@ rb_add_time_stamp(struct ring_buffer_event *event, u64 delta) /** * rb_update_event - update event type and data - * @event: the even to update + * @event: the event to update * @type: the type of event * @length: the size of the event field in the ring buffer * @@ -3755,12 +3755,14 @@ rb_iter_peek(struct ring_buffer_iter *iter, u64 *ts) return NULL; /* - * We repeat when a time extend is encountered. - * Since the time extend is always attached to a data event, - * we should never loop more than once. - * (We never hit the following condition more than twice). + * We repeat when a time extend is encountered or we hit + * the end of the page. Since the time extend is always attached + * to a data event, we should never loop more than three times. + * Once for going to next page, once on time extend, and + * finally once to get the event. + * (We never hit the following condition more than thrice). */ - if (RB_WARN_ON(cpu_buffer, ++nr_loops > 2)) + if (RB_WARN_ON(cpu_buffer, ++nr_loops > 3)) return NULL; if (rb_per_cpu_empty(cpu_buffer)) From 8c30f22757c97041750fddce8ea11c6d7231574a Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Mon, 28 Jul 2014 16:26:53 -0700 Subject: [PATCH 40/74] mnt: Only change user settable mount flags in remount commit a6138db815df5ee542d848318e5dae681590fccd upstream. Kenton Varda discovered that by remounting a read-only bind mount read-only in a user namespace the MNT_LOCK_READONLY bit would be cleared, allowing an unprivileged user to the remount a read-only mount read-write. Correct this by replacing the mask of mount flags to preserve with a mask of mount flags that may be changed, and preserve all others. This ensures that any future bugs with this mask and remount will fail in an easy to detect way where new mount flags simply won't change. Acked-by: Serge E. Hallyn Signed-off-by: "Eric W. Biederman" Signed-off-by: Greg Kroah-Hartman --- fs/namespace.c | 2 +- include/linux/mount.h | 4 +++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/fs/namespace.c b/fs/namespace.c index a45ba4f267fe..a438e4c81b0b 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -1805,7 +1805,7 @@ static int do_remount(struct path *path, int flags, int mnt_flags, err = do_remount_sb(sb, flags, data, 0); if (!err) { br_write_lock(&vfsmount_lock); - mnt_flags |= mnt->mnt.mnt_flags & MNT_PROPAGATION_MASK; + mnt_flags |= mnt->mnt.mnt_flags & ~MNT_USER_SETTABLE_MASK; mnt->mnt.mnt_flags = mnt_flags; br_write_unlock(&vfsmount_lock); } diff --git a/include/linux/mount.h b/include/linux/mount.h index 73005f9957ea..16fc05d816d4 100644 --- a/include/linux/mount.h +++ b/include/linux/mount.h @@ -42,7 +42,9 @@ struct mnt_namespace; * flag, consider how it interacts with shared mounts. */ #define MNT_SHARED_MASK (MNT_UNBINDABLE) -#define MNT_PROPAGATION_MASK (MNT_SHARED | MNT_UNBINDABLE) +#define MNT_USER_SETTABLE_MASK (MNT_NOSUID | MNT_NODEV | MNT_NOEXEC \ + | MNT_NOATIME | MNT_NODIRATIME | MNT_RELATIME \ + | MNT_READONLY) #define MNT_INTERNAL 0x4000 From 81d4c13ebbdcb69b2d56b3bc5e626b1a881421cf Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Mon, 28 Jul 2014 17:10:56 -0700 Subject: [PATCH 41/74] mnt: Move the test for MNT_LOCK_READONLY from change_mount_flags into do_remount commit 07b645589dcda8b7a5249e096fece2a67556f0f4 upstream. There are no races as locked mount flags are guaranteed to never change. Moving the test into do_remount makes it more visible, and ensures all filesystem remounts pass the MNT_LOCK_READONLY permission check. This second case is not an issue today as filesystem remounts are guarded by capable(CAP_DAC_ADMIN) and thus will always fail in less privileged mount namespaces, but it could become an issue in the future. Acked-by: Serge E. Hallyn Signed-off-by: "Eric W. Biederman" Signed-off-by: Greg Kroah-Hartman --- fs/namespace.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/fs/namespace.c b/fs/namespace.c index a438e4c81b0b..515cbff64c93 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -1764,9 +1764,6 @@ static int change_mount_flags(struct vfsmount *mnt, int ms_flags) if (readonly_request == __mnt_is_readonly(mnt)) return 0; - if (mnt->mnt_flags & MNT_LOCK_READONLY) - return -EPERM; - if (readonly_request) error = mnt_make_readonly(real_mount(mnt)); else @@ -1792,6 +1789,16 @@ static int do_remount(struct path *path, int flags, int mnt_flags, if (path->dentry != path->mnt->mnt_root) return -EINVAL; + /* Don't allow changing of locked mnt flags. + * + * No locks need to be held here while testing the various + * MNT_LOCK flags because those flags can never be cleared + * once they are set. + */ + if ((mnt->mnt.mnt_flags & MNT_LOCK_READONLY) && + !(mnt_flags & MNT_READONLY)) { + return -EPERM; + } err = security_sb_remount(sb, data); if (err) return err; From 187985d9395c7c093e9a565c87c6547c16009ddf Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Mon, 28 Jul 2014 17:26:07 -0700 Subject: [PATCH 42/74] mnt: Correct permission checks in do_remount commit 9566d6742852c527bf5af38af5cbb878dad75705 upstream. While invesgiating the issue where in "mount --bind -oremount,ro ..." would result in later "mount --bind -oremount,rw" succeeding even if the mount started off locked I realized that there are several additional mount flags that should be locked and are not. In particular MNT_NOSUID, MNT_NODEV, MNT_NOEXEC, and the atime flags in addition to MNT_READONLY should all be locked. These flags are all per superblock, can all be changed with MS_BIND, and should not be changable if set by a more privileged user. The following additions to the current logic are added in this patch. - nosuid may not be clearable by a less privileged user. - nodev may not be clearable by a less privielged user. - noexec may not be clearable by a less privileged user. - atime flags may not be changeable by a less privileged user. The logic with atime is that always setting atime on access is a global policy and backup software and auditing software could break if atime bits are not updated (when they are configured to be updated), and serious performance degradation could result (DOS attack) if atime updates happen when they have been explicitly disabled. Therefore an unprivileged user should not be able to mess with the atime bits set by a more privileged user. The additional restrictions are implemented with the addition of MNT_LOCK_NOSUID, MNT_LOCK_NODEV, MNT_LOCK_NOEXEC, and MNT_LOCK_ATIME mnt flags. Taken together these changes and the fixes for MNT_LOCK_READONLY should make it safe for an unprivileged user to create a user namespace and to call "mount --bind -o remount,... ..." without the danger of mount flags being changed maliciously. Acked-by: Serge E. Hallyn Signed-off-by: "Eric W. Biederman" Signed-off-by: Greg Kroah-Hartman --- fs/namespace.c | 36 +++++++++++++++++++++++++++++++++--- include/linux/mount.h | 5 +++++ 2 files changed, 38 insertions(+), 3 deletions(-) diff --git a/fs/namespace.c b/fs/namespace.c index 515cbff64c93..99748ff4065f 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -828,8 +828,21 @@ static struct mount *clone_mnt(struct mount *old, struct dentry *root, mnt->mnt.mnt_flags = old->mnt.mnt_flags & ~MNT_WRITE_HOLD; /* Don't allow unprivileged users to change mount flags */ - if ((flag & CL_UNPRIVILEGED) && (mnt->mnt.mnt_flags & MNT_READONLY)) - mnt->mnt.mnt_flags |= MNT_LOCK_READONLY; + if (flag & CL_UNPRIVILEGED) { + mnt->mnt.mnt_flags |= MNT_LOCK_ATIME; + + if (mnt->mnt.mnt_flags & MNT_READONLY) + mnt->mnt.mnt_flags |= MNT_LOCK_READONLY; + + if (mnt->mnt.mnt_flags & MNT_NODEV) + mnt->mnt.mnt_flags |= MNT_LOCK_NODEV; + + if (mnt->mnt.mnt_flags & MNT_NOSUID) + mnt->mnt.mnt_flags |= MNT_LOCK_NOSUID; + + if (mnt->mnt.mnt_flags & MNT_NOEXEC) + mnt->mnt.mnt_flags |= MNT_LOCK_NOEXEC; + } atomic_inc(&sb->s_active); mnt->mnt.mnt_sb = sb; @@ -1799,6 +1812,23 @@ static int do_remount(struct path *path, int flags, int mnt_flags, !(mnt_flags & MNT_READONLY)) { return -EPERM; } + if ((mnt->mnt.mnt_flags & MNT_LOCK_NODEV) && + !(mnt_flags & MNT_NODEV)) { + return -EPERM; + } + if ((mnt->mnt.mnt_flags & MNT_LOCK_NOSUID) && + !(mnt_flags & MNT_NOSUID)) { + return -EPERM; + } + if ((mnt->mnt.mnt_flags & MNT_LOCK_NOEXEC) && + !(mnt_flags & MNT_NOEXEC)) { + return -EPERM; + } + if ((mnt->mnt.mnt_flags & MNT_LOCK_ATIME) && + ((mnt->mnt.mnt_flags & MNT_ATIME_MASK) != (mnt_flags & MNT_ATIME_MASK))) { + return -EPERM; + } + err = security_sb_remount(sb, data); if (err) return err; @@ -1998,7 +2028,7 @@ static int do_new_mount(struct path *path, const char *fstype, int flags, */ if (!(type->fs_flags & FS_USERNS_DEV_MOUNT)) { flags |= MS_NODEV; - mnt_flags |= MNT_NODEV; + mnt_flags |= MNT_NODEV | MNT_LOCK_NODEV; } } diff --git a/include/linux/mount.h b/include/linux/mount.h index 16fc05d816d4..8eeb8f6ab110 100644 --- a/include/linux/mount.h +++ b/include/linux/mount.h @@ -46,9 +46,14 @@ struct mnt_namespace; | MNT_NOATIME | MNT_NODIRATIME | MNT_RELATIME \ | MNT_READONLY) +#define MNT_ATIME_MASK (MNT_NOATIME | MNT_NODIRATIME | MNT_RELATIME ) #define MNT_INTERNAL 0x4000 +#define MNT_LOCK_ATIME 0x040000 +#define MNT_LOCK_NOEXEC 0x080000 +#define MNT_LOCK_NOSUID 0x100000 +#define MNT_LOCK_NODEV 0x200000 #define MNT_LOCK_READONLY 0x400000 struct vfsmount { From 99dd97b843562853e01a134e8d5c13a87d156795 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Mon, 28 Jul 2014 17:36:04 -0700 Subject: [PATCH 43/74] mnt: Change the default remount atime from relatime to the existing value commit ffbc6f0ead47fa5a1dc9642b0331cb75c20a640e upstream. Since March 2009 the kernel has treated the state that if no MS_..ATIME flags are passed then the kernel defaults to relatime. Defaulting to relatime instead of the existing atime state during a remount is silly, and causes problems in practice for people who don't specify any MS_...ATIME flags and to get the default filesystem atime setting. Those users may encounter a permission error because the default atime setting does not work. A default that does not work and causes permission problems is ridiculous, so preserve the existing value to have a default atime setting that is always guaranteed to work. Using the default atime setting in this way is particularly interesting for applications built to run in restricted userspace environments without /proc mounted, as the existing atime mount options of a filesystem can not be read from /proc/mounts. In practice this fixes user space that uses the default atime setting on remount that are broken by the permission checks keeping less privileged users from changing more privileged users atime settings. Acked-by: Serge E. Hallyn Signed-off-by: "Eric W. Biederman" Signed-off-by: Greg Kroah-Hartman --- fs/namespace.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/fs/namespace.c b/fs/namespace.c index 99748ff4065f..00409add4d96 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -2346,6 +2346,14 @@ long do_mount(const char *dev_name, const char *dir_name, if (flags & MS_RDONLY) mnt_flags |= MNT_READONLY; + /* The default atime for remount is preservation */ + if ((flags & MS_REMOUNT) && + ((flags & (MS_NOATIME | MS_NODIRATIME | MS_RELATIME | + MS_STRICTATIME)) == 0)) { + mnt_flags &= ~MNT_ATIME_MASK; + mnt_flags |= path.mnt->mnt_flags & MNT_ATIME_MASK; + } + flags &= ~(MS_NOSUID | MS_NOEXEC | MS_NODEV | MS_ACTIVE | MS_BORN | MS_NOATIME | MS_NODIRATIME | MS_RELATIME| MS_KERNMOUNT | MS_STRICTATIME); From bbeed681a5d5f845fad2c097920ca8493f2419f6 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Tue, 29 Jul 2014 15:50:44 -0700 Subject: [PATCH 44/74] mnt: Add tests for unprivileged remount cases that have found to be faulty commit db181ce011e3c033328608299cd6fac06ea50130 upstream. Kenton Varda discovered that by remounting a read-only bind mount read-only in a user namespace the MNT_LOCK_READONLY bit would be cleared, allowing an unprivileged user to the remount a read-only mount read-write. Upon review of the code in remount it was discovered that the code allowed nosuid, noexec, and nodev to be cleared. It was also discovered that the code was allowing the per mount atime flags to be changed. The first naive patch to fix these issues contained the flaw that using default atime settings when remounting a filesystem could be disallowed. To avoid this problems in the future add tests to ensure unprivileged remounts are succeeding and failing at the appropriate times. Acked-by: Serge E. Hallyn Signed-off-by: "Eric W. Biederman" Signed-off-by: Greg Kroah-Hartman --- tools/testing/selftests/Makefile | 1 + tools/testing/selftests/mount/Makefile | 17 ++ .../mount/unprivileged-remount-test.c | 242 ++++++++++++++++++ 3 files changed, 260 insertions(+) create mode 100644 tools/testing/selftests/mount/Makefile create mode 100644 tools/testing/selftests/mount/unprivileged-remount-test.c diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile index 0a63658065f0..2cee2b79b4de 100644 --- a/tools/testing/selftests/Makefile +++ b/tools/testing/selftests/Makefile @@ -4,6 +4,7 @@ TARGETS += efivarfs TARGETS += kcmp TARGETS += memory-hotplug TARGETS += mqueue +TARGETS += mount TARGETS += net TARGETS += ptrace TARGETS += vm diff --git a/tools/testing/selftests/mount/Makefile b/tools/testing/selftests/mount/Makefile new file mode 100644 index 000000000000..337d853c2b72 --- /dev/null +++ b/tools/testing/selftests/mount/Makefile @@ -0,0 +1,17 @@ +# Makefile for mount selftests. + +all: unprivileged-remount-test + +unprivileged-remount-test: unprivileged-remount-test.c + gcc -Wall -O2 unprivileged-remount-test.c -o unprivileged-remount-test + +# Allow specific tests to be selected. +test_unprivileged_remount: unprivileged-remount-test + @if [ -f /proc/self/uid_map ] ; then ./unprivileged-remount-test ; fi + +run_tests: all test_unprivileged_remount + +clean: + rm -f unprivileged-remount-test + +.PHONY: all test_unprivileged_remount diff --git a/tools/testing/selftests/mount/unprivileged-remount-test.c b/tools/testing/selftests/mount/unprivileged-remount-test.c new file mode 100644 index 000000000000..1b3ff2fda4d0 --- /dev/null +++ b/tools/testing/selftests/mount/unprivileged-remount-test.c @@ -0,0 +1,242 @@ +#define _GNU_SOURCE +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#ifndef CLONE_NEWNS +# define CLONE_NEWNS 0x00020000 +#endif +#ifndef CLONE_NEWUTS +# define CLONE_NEWUTS 0x04000000 +#endif +#ifndef CLONE_NEWIPC +# define CLONE_NEWIPC 0x08000000 +#endif +#ifndef CLONE_NEWNET +# define CLONE_NEWNET 0x40000000 +#endif +#ifndef CLONE_NEWUSER +# define CLONE_NEWUSER 0x10000000 +#endif +#ifndef CLONE_NEWPID +# define CLONE_NEWPID 0x20000000 +#endif + +#ifndef MS_RELATIME +#define MS_RELATIME (1 << 21) +#endif +#ifndef MS_STRICTATIME +#define MS_STRICTATIME (1 << 24) +#endif + +static void die(char *fmt, ...) +{ + va_list ap; + va_start(ap, fmt); + vfprintf(stderr, fmt, ap); + va_end(ap); + exit(EXIT_FAILURE); +} + +static void write_file(char *filename, char *fmt, ...) +{ + char buf[4096]; + int fd; + ssize_t written; + int buf_len; + va_list ap; + + va_start(ap, fmt); + buf_len = vsnprintf(buf, sizeof(buf), fmt, ap); + va_end(ap); + if (buf_len < 0) { + die("vsnprintf failed: %s\n", + strerror(errno)); + } + if (buf_len >= sizeof(buf)) { + die("vsnprintf output truncated\n"); + } + + fd = open(filename, O_WRONLY); + if (fd < 0) { + die("open of %s failed: %s\n", + filename, strerror(errno)); + } + written = write(fd, buf, buf_len); + if (written != buf_len) { + if (written >= 0) { + die("short write to %s\n", filename); + } else { + die("write to %s failed: %s\n", + filename, strerror(errno)); + } + } + if (close(fd) != 0) { + die("close of %s failed: %s\n", + filename, strerror(errno)); + } +} + +static void create_and_enter_userns(void) +{ + uid_t uid; + gid_t gid; + + uid = getuid(); + gid = getgid(); + + if (unshare(CLONE_NEWUSER) !=0) { + die("unshare(CLONE_NEWUSER) failed: %s\n", + strerror(errno)); + } + + write_file("/proc/self/uid_map", "0 %d 1", uid); + write_file("/proc/self/gid_map", "0 %d 1", gid); + + if (setgroups(0, NULL) != 0) { + die("setgroups failed: %s\n", + strerror(errno)); + } + if (setgid(0) != 0) { + die ("setgid(0) failed %s\n", + strerror(errno)); + } + if (setuid(0) != 0) { + die("setuid(0) failed %s\n", + strerror(errno)); + } +} + +static +bool test_unpriv_remount(int mount_flags, int remount_flags, int invalid_flags) +{ + pid_t child; + + child = fork(); + if (child == -1) { + die("fork failed: %s\n", + strerror(errno)); + } + if (child != 0) { /* parent */ + pid_t pid; + int status; + pid = waitpid(child, &status, 0); + if (pid == -1) { + die("waitpid failed: %s\n", + strerror(errno)); + } + if (pid != child) { + die("waited for %d got %d\n", + child, pid); + } + if (!WIFEXITED(status)) { + die("child did not terminate cleanly\n"); + } + return WEXITSTATUS(status) == EXIT_SUCCESS ? true : false; + } + + create_and_enter_userns(); + if (unshare(CLONE_NEWNS) != 0) { + die("unshare(CLONE_NEWNS) failed: %s\n", + strerror(errno)); + } + + if (mount("testing", "/tmp", "ramfs", mount_flags, NULL) != 0) { + die("mount of /tmp failed: %s\n", + strerror(errno)); + } + + create_and_enter_userns(); + + if (unshare(CLONE_NEWNS) != 0) { + die("unshare(CLONE_NEWNS) failed: %s\n", + strerror(errno)); + } + + if (mount("/tmp", "/tmp", "none", + MS_REMOUNT | MS_BIND | remount_flags, NULL) != 0) { + /* system("cat /proc/self/mounts"); */ + die("remount of /tmp failed: %s\n", + strerror(errno)); + } + + if (mount("/tmp", "/tmp", "none", + MS_REMOUNT | MS_BIND | invalid_flags, NULL) == 0) { + /* system("cat /proc/self/mounts"); */ + die("remount of /tmp with invalid flags " + "succeeded unexpectedly\n"); + } + exit(EXIT_SUCCESS); +} + +static bool test_unpriv_remount_simple(int mount_flags) +{ + return test_unpriv_remount(mount_flags, mount_flags, 0); +} + +static bool test_unpriv_remount_atime(int mount_flags, int invalid_flags) +{ + return test_unpriv_remount(mount_flags, mount_flags, invalid_flags); +} + +int main(int argc, char **argv) +{ + if (!test_unpriv_remount_simple(MS_RDONLY|MS_NODEV)) { + die("MS_RDONLY malfunctions\n"); + } + if (!test_unpriv_remount_simple(MS_NODEV)) { + die("MS_NODEV malfunctions\n"); + } + if (!test_unpriv_remount_simple(MS_NOSUID|MS_NODEV)) { + die("MS_NOSUID malfunctions\n"); + } + if (!test_unpriv_remount_simple(MS_NOEXEC|MS_NODEV)) { + die("MS_NOEXEC malfunctions\n"); + } + if (!test_unpriv_remount_atime(MS_RELATIME|MS_NODEV, + MS_NOATIME|MS_NODEV)) + { + die("MS_RELATIME malfunctions\n"); + } + if (!test_unpriv_remount_atime(MS_STRICTATIME|MS_NODEV, + MS_NOATIME|MS_NODEV)) + { + die("MS_STRICTATIME malfunctions\n"); + } + if (!test_unpriv_remount_atime(MS_NOATIME|MS_NODEV, + MS_STRICTATIME|MS_NODEV)) + { + die("MS_RELATIME malfunctions\n"); + } + if (!test_unpriv_remount_atime(MS_RELATIME|MS_NODIRATIME|MS_NODEV, + MS_NOATIME|MS_NODEV)) + { + die("MS_RELATIME malfunctions\n"); + } + if (!test_unpriv_remount_atime(MS_STRICTATIME|MS_NODIRATIME|MS_NODEV, + MS_NOATIME|MS_NODEV)) + { + die("MS_RELATIME malfunctions\n"); + } + if (!test_unpriv_remount_atime(MS_NOATIME|MS_NODIRATIME|MS_NODEV, + MS_STRICTATIME|MS_NODEV)) + { + die("MS_RELATIME malfunctions\n"); + } + if (!test_unpriv_remount(MS_STRICTATIME|MS_NODEV, MS_NODEV, + MS_NOATIME|MS_NODEV)) + { + die("Default atime malfunctions\n"); + } + return EXIT_SUCCESS; +} From 819f3e7ae9c1ad79b7f9583ea687cdcebe7a0fe1 Mon Sep 17 00:00:00 2001 From: Vladimir Davydov Date: Tue, 15 Jul 2014 12:25:28 +0400 Subject: [PATCH 45/74] Bluetooth: never linger on process exit commit 093facf3634da1b0c2cc7ed106f1983da901bbab upstream. If the current process is exiting, lingering on socket close will make it unkillable, so we should avoid it. Reproducer: #include #include #define BTPROTO_L2CAP 0 #define BTPROTO_SCO 2 #define BTPROTO_RFCOMM 3 int main() { int fd; struct linger ling; fd = socket(PF_BLUETOOTH, SOCK_STREAM, BTPROTO_RFCOMM); //or: fd = socket(PF_BLUETOOTH, SOCK_DGRAM, BTPROTO_L2CAP); //or: fd = socket(PF_BLUETOOTH, SOCK_SEQPACKET, BTPROTO_SCO); ling.l_onoff = 1; ling.l_linger = 1000000000; setsockopt(fd, SOL_SOCKET, SO_LINGER, &ling, sizeof(ling)); return 0; } Signed-off-by: Vladimir Davydov Signed-off-by: Marcel Holtmann Signed-off-by: Greg Kroah-Hartman --- net/bluetooth/l2cap_sock.c | 3 ++- net/bluetooth/rfcomm/sock.c | 3 ++- net/bluetooth/sco.c | 6 ++++-- 3 files changed, 8 insertions(+), 4 deletions(-) diff --git a/net/bluetooth/l2cap_sock.c b/net/bluetooth/l2cap_sock.c index 302d29b3744d..5f36f70ce44d 100644 --- a/net/bluetooth/l2cap_sock.c +++ b/net/bluetooth/l2cap_sock.c @@ -887,7 +887,8 @@ static int l2cap_sock_shutdown(struct socket *sock, int how) l2cap_chan_close(chan, 0); lock_sock(sk); - if (sock_flag(sk, SOCK_LINGER) && sk->sk_lingertime) + if (sock_flag(sk, SOCK_LINGER) && sk->sk_lingertime && + !(current->flags & PF_EXITING)) err = bt_sock_wait_state(sk, BT_CLOSED, sk->sk_lingertime); } diff --git a/net/bluetooth/rfcomm/sock.c b/net/bluetooth/rfcomm/sock.c index c1c6028e389a..7ca014daa5ab 100644 --- a/net/bluetooth/rfcomm/sock.c +++ b/net/bluetooth/rfcomm/sock.c @@ -887,7 +887,8 @@ static int rfcomm_sock_shutdown(struct socket *sock, int how) sk->sk_shutdown = SHUTDOWN_MASK; __rfcomm_sock_close(sk); - if (sock_flag(sk, SOCK_LINGER) && sk->sk_lingertime) + if (sock_flag(sk, SOCK_LINGER) && sk->sk_lingertime && + !(current->flags & PF_EXITING)) err = bt_sock_wait_state(sk, BT_CLOSED, sk->sk_lingertime); } release_sock(sk); diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c index 2bb1d3a5e76b..c9ae6b703c13 100644 --- a/net/bluetooth/sco.c +++ b/net/bluetooth/sco.c @@ -858,7 +858,8 @@ static int sco_sock_shutdown(struct socket *sock, int how) sco_sock_clear_timer(sk); __sco_sock_close(sk); - if (sock_flag(sk, SOCK_LINGER) && sk->sk_lingertime) + if (sock_flag(sk, SOCK_LINGER) && sk->sk_lingertime && + !(current->flags & PF_EXITING)) err = bt_sock_wait_state(sk, BT_CLOSED, sk->sk_lingertime); } @@ -878,7 +879,8 @@ static int sco_sock_release(struct socket *sock) sco_sock_close(sk); - if (sock_flag(sk, SOCK_LINGER) && sk->sk_lingertime) { + if (sock_flag(sk, SOCK_LINGER) && sk->sk_lingertime && + !(current->flags & PF_EXITING)) { lock_sock(sk); err = bt_sock_wait_state(sk, BT_CLOSED, sk->sk_lingertime); release_sock(sk); From 07b41b34490bb0286a1537afe0190bb197f973eb Mon Sep 17 00:00:00 2001 From: Vignesh Raman Date: Tue, 22 Jul 2014 19:24:25 +0530 Subject: [PATCH 46/74] Bluetooth: Avoid use of session socket after the session gets freed commit 32333edb82fb2009980eefc5518100068147ab82 upstream. The commits 08c30aca9e698faddebd34f81e1196295f9dc063 "Bluetooth: Remove RFCOMM session refcnt" and 8ff52f7d04d9cc31f1e81dcf9a2ba6335ed34905 "Bluetooth: Return RFCOMM session ptrs to avoid freed session" allow rfcomm_recv_ua and rfcomm_session_close to delete the session (and free the corresponding socket) and propagate NULL session pointer to the upper callers. Additional fix is required to terminate the loop in rfcomm_process_rx function to avoid use of freed 'sk' memory. The issue is only reproducible with kernel option CONFIG_PAGE_POISONING enabled making freed memory being changed and filled up with fixed char value used to unmask use-after-free issues. Signed-off-by: Vignesh Raman Signed-off-by: Vitaly Kuzmichev Acked-by: Dean Jenkins Signed-off-by: Marcel Holtmann Signed-off-by: Greg Kroah-Hartman --- net/bluetooth/rfcomm/core.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/net/bluetooth/rfcomm/core.c b/net/bluetooth/rfcomm/core.c index ca957d34b0c8..19ba192e9dbf 100644 --- a/net/bluetooth/rfcomm/core.c +++ b/net/bluetooth/rfcomm/core.c @@ -1857,10 +1857,13 @@ static struct rfcomm_session *rfcomm_process_rx(struct rfcomm_session *s) /* Get data directly from socket receive queue without copying it. */ while ((skb = skb_dequeue(&sk->sk_receive_queue))) { skb_orphan(skb); - if (!skb_linearize(skb)) + if (!skb_linearize(skb)) { s = rfcomm_recv_frame(s, skb); - else + if (!s) + break; + } else { kfree_skb(skb); + } } if (s && (sk->sk_state == BT_CLOSED)) From 318a3d59cea8f6790a364f4f8ab24a7aa9514ada Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Wed, 13 Aug 2014 09:57:07 +1000 Subject: [PATCH 47/74] md/raid6: avoid data corruption during recovery of double-degraded RAID6 commit 9c4bdf697c39805078392d5ddbbba5ae5680e0dd upstream. During recovery of a double-degraded RAID6 it is possible for some blocks not to be recovered properly, leading to corruption. If a write happens to one block in a stripe that would be written to a missing device, and at the same time that stripe is recovering data to the other missing device, then that recovered data may not be written. This patch skips, in the double-degraded case, an optimisation that is only safe for single-degraded arrays. Bug was introduced in 2.6.32 and fix is suitable for any kernel since then. In an older kernel with separate handle_stripe5() and handle_stripe6() functions the patch must change handle_stripe6(). Fixes: 6c0069c0ae9659e3a91b68eaed06a5c6c37f45c8 Cc: Yuri Tikhonov Cc: Dan Williams Reported-by: "Manibalan P" Tested-by: "Manibalan P" Resolves: https://bugzilla.redhat.com/show_bug.cgi?id=1090423 Signed-off-by: NeilBrown Acked-by: Dan Williams Signed-off-by: Greg Kroah-Hartman --- drivers/md/raid5.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 5e3c25d4562c..774f81423d78 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -3561,6 +3561,8 @@ static void handle_stripe(struct stripe_head *sh) set_bit(R5_Wantwrite, &dev->flags); if (prexor) continue; + if (s.failed > 1) + continue; if (!test_bit(R5_Insync, &dev->flags) || ((i == sh->pd_idx || i == sh->qd_idx) && s.failed == 0)) From 1075d2bdf6334b8373c00978403a69a4d9fdb883 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Mon, 18 Aug 2014 13:56:38 +1000 Subject: [PATCH 48/74] md/raid10: fix memory leak when reshaping a RAID10. commit ce0b0a46955d1bb389684a2605dbcaa990ba0154 upstream. raid10 reshape clears unwanted bits from a bio->bi_flags using a method which, while clumsy, worked until 3.10 when BIO_OWNS_VEC was added. Since then it clears that bit but shouldn't. This results in a memory leak. So change to used the approved method of clearing unwanted bits. As this causes a memory leak which can consume all of memory the fix is suitable for -stable. Fixes: a38352e0ac02dbbd4fa464dc22d1352b5fbd06fd Reported-by: mdraid.pkoch@dfgh.net (Peter Koch) Signed-off-by: NeilBrown Signed-off-by: Greg Kroah-Hartman --- drivers/md/raid10.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index d2f8cd332b4a..973e26fc18c1 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -4398,7 +4398,7 @@ static sector_t reshape_request(struct mddev *mddev, sector_t sector_nr, read_bio->bi_private = r10_bio; read_bio->bi_end_io = end_sync_read; read_bio->bi_rw = READ; - read_bio->bi_flags &= ~(BIO_POOL_MASK - 1); + read_bio->bi_flags &= (~0UL << BIO_RESET_BITS); read_bio->bi_flags |= 1 << BIO_UPTODATE; read_bio->bi_vcnt = 0; read_bio->bi_size = 0; From 26584e18c463bcf54fbd97e7c76f6f00ba13b1c9 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Mon, 18 Aug 2014 13:59:50 +1000 Subject: [PATCH 49/74] md/raid10: Fix memory leak when raid10 reshape completes. commit b39685526f46976bcd13aa08c82480092befa46c upstream. When a raid10 commences a resync/recovery/reshape it allocates some buffer space. When a resync/recovery completes the buffer space is freed. But not when the reshape completes. This can result in a small memory leak. There is a subtle side-effect of this bug. When a RAID10 is reshaped to a larger array (more devices), the reshape is immediately followed by a "resync" of the new space. This "resync" will use the buffer space which was allocated for "reshape". This can cause problems including a "BUG" in the SCSI layer. So this is suitable for -stable. Fixes: 3ea7daa5d7fde47cd41f4d56c2deb949114da9d6 Signed-off-by: NeilBrown Signed-off-by: Greg Kroah-Hartman --- drivers/md/raid10.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index 973e26fc18c1..353c2b392652 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -2948,6 +2948,7 @@ static sector_t sync_request(struct mddev *mddev, sector_t sector_nr, */ if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery)) { end_reshape(conf); + close_sync(conf); return 0; } From 433d80d62562bfbc30cbf426206f29c9153bbc15 Mon Sep 17 00:00:00 2001 From: Steve Wise Date: Fri, 25 Jul 2014 09:11:33 -0500 Subject: [PATCH 50/74] RDMA/iwcm: Use a default listen backlog if needed commit 2f0304d21867476394cd51a54e97f7273d112261 upstream. If the user creates a listening cm_id with backlog of 0 the IWCM ends up not allowing any connection requests at all. The correct behavior is for the IWCM to pick a default value if the user backlog parameter is zero. Lustre from version 1.8.8 onward uses a backlog of 0, which breaks iwarp support without this fix. Signed-off-by: Steve Wise Signed-off-by: Roland Dreier Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/core/iwcm.c | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/drivers/infiniband/core/iwcm.c b/drivers/infiniband/core/iwcm.c index c47c2034ca71..4293e89bbbdd 100644 --- a/drivers/infiniband/core/iwcm.c +++ b/drivers/infiniband/core/iwcm.c @@ -46,6 +46,7 @@ #include #include #include +#include #include #include @@ -65,6 +66,20 @@ struct iwcm_work { struct list_head free_list; }; +static unsigned int default_backlog = 256; + +static struct ctl_table_header *iwcm_ctl_table_hdr; +static struct ctl_table iwcm_ctl_table[] = { + { + .procname = "default_backlog", + .data = &default_backlog, + .maxlen = sizeof(default_backlog), + .mode = 0644, + .proc_handler = proc_dointvec, + }, + { } +}; + /* * The following services provide a mechanism for pre-allocating iwcm_work * elements. The design pre-allocates them based on the cm_id type: @@ -419,6 +434,9 @@ int iw_cm_listen(struct iw_cm_id *cm_id, int backlog) cm_id_priv = container_of(cm_id, struct iwcm_id_private, id); + if (!backlog) + backlog = default_backlog; + ret = alloc_work_entries(cm_id_priv, backlog); if (ret) return ret; @@ -1024,11 +1042,20 @@ static int __init iw_cm_init(void) if (!iwcm_wq) return -ENOMEM; + iwcm_ctl_table_hdr = register_net_sysctl(&init_net, "net/iw_cm", + iwcm_ctl_table); + if (!iwcm_ctl_table_hdr) { + pr_err("iw_cm: couldn't register sysctl paths\n"); + destroy_workqueue(iwcm_wq); + return -ENOMEM; + } + return 0; } static void __exit iw_cm_cleanup(void) { + unregister_net_sysctl_table(iwcm_ctl_table_hdr); destroy_workqueue(iwcm_wq); } From 9a9237c9e897fd73ee74689fc319a6412f4377f9 Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Mon, 4 Aug 2014 12:43:26 +1000 Subject: [PATCH 51/74] xfs: quotacheck leaves dquot buffers without verifiers commit 5fd364fee81a7888af806e42ed8a91c845894f2d upstream. When running xfs/305, I noticed that quotacheck was flushing dquot buffers that did not have the xfs_dquot_buf_ops verifiers attached: XFS (vdb): _xfs_buf_ioapply: no ops on block 0x1dc8/0x1dc8 ffff880052489000: 44 51 01 04 00 00 65 b8 00 00 00 00 00 00 00 00 DQ....e......... ffff880052489010: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ ffff880052489020: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ ffff880052489030: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ CPU: 1 PID: 2376 Comm: mount Not tainted 3.16.0-rc2-dgc+ #306 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011 ffff88006fe38000 ffff88004a0ffae8 ffffffff81cf1cca 0000000000000001 ffff88004a0ffb88 ffffffff814d50ca 000010004a0ffc70 0000000000000000 ffff88006be56dc4 0000000000000021 0000000000001dc8 ffff88007c773d80 Call Trace: [] dump_stack+0x45/0x56 [] _xfs_buf_ioapply+0x3ca/0x3d0 [] ? wake_up_state+0x20/0x20 [] ? xfs_bdstrat_cb+0x55/0xb0 [] xfs_buf_iorequest+0x6b/0xd0 [] xfs_bdstrat_cb+0x55/0xb0 [] __xfs_buf_delwri_submit+0x15b/0x220 [] ? xfs_buf_delwri_submit+0x30/0x90 [] xfs_buf_delwri_submit+0x30/0x90 [] xfs_qm_quotacheck+0x17d/0x3c0 [] xfs_qm_mount_quotas+0x151/0x1e0 [] xfs_mountfs+0x56c/0x7d0 [] xfs_fs_fill_super+0x2c2/0x340 [] mount_bdev+0x194/0x1d0 [] ? xfs_finish_flags+0x170/0x170 [] xfs_fs_mount+0x15/0x20 [] mount_fs+0x39/0x1b0 [] vfs_kern_mount+0x67/0x120 [] do_mount+0x23e/0xad0 [] ? __get_free_pages+0xe/0x50 [] ? copy_mount_options+0x36/0x150 [] SyS_mount+0x83/0xc0 [] tracesys+0xdd/0xe2 This was caused by dquot buffer readahead not attaching a verifier structure to the buffer when readahead was issued, resulting in the followup read of the buffer finding a valid buffer and so not attaching new verifiers to the buffer as part of the read. Also, when a verifier failure occurs, we then read the buffer without verifiers. Attach the verifiers manually after this read so that if the buffer is then written it will be verified that the corruption has been repaired. Further, when flushing a dquot we don't ask for a verifier when reading in the dquot buffer the dquot belongs to. Most of the time this isn't an issue because the buffer is still cached, but when it is not cached it will result in writing the dquot buffer without having the verfier attached. Signed-off-by: Dave Chinner Reviewed-by: Brian Foster Reviewed-by: Christoph Hellwig Signed-off-by: Dave Chinner Signed-off-by: Greg Kroah-Hartman --- fs/xfs/xfs_dquot.c | 3 ++- fs/xfs/xfs_qm.c | 8 +++++++- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/fs/xfs/xfs_dquot.c b/fs/xfs/xfs_dquot.c index 044e97a33c8d..bac3e1635b7d 100644 --- a/fs/xfs/xfs_dquot.c +++ b/fs/xfs/xfs_dquot.c @@ -1104,7 +1104,8 @@ xfs_qm_dqflush( * Get the buffer containing the on-disk dquot */ error = xfs_trans_read_buf(mp, NULL, mp->m_ddev_targp, dqp->q_blkno, - mp->m_quotainfo->qi_dqchunklen, 0, &bp, NULL); + mp->m_quotainfo->qi_dqchunklen, 0, &bp, + &xfs_dquot_buf_ops); if (error) goto out_unlock; diff --git a/fs/xfs/xfs_qm.c b/fs/xfs/xfs_qm.c index b75c9bb6e71e..29d1ca567ed3 100644 --- a/fs/xfs/xfs_qm.c +++ b/fs/xfs/xfs_qm.c @@ -935,6 +935,12 @@ xfs_qm_dqiter_bufs( if (error) break; + /* + * A corrupt buffer might not have a verifier attached, so + * make sure we have the correct one attached before writeback + * occurs. + */ + bp->b_ops = &xfs_dquot_buf_ops; xfs_qm_reset_dqcounts(mp, bp, firstid, type); xfs_buf_delwri_queue(bp, buffer_list); xfs_buf_relse(bp); @@ -1018,7 +1024,7 @@ xfs_qm_dqiterate( xfs_buf_readahead(mp->m_ddev_targp, XFS_FSB_TO_DADDR(mp, rablkno), mp->m_quotainfo->qi_dqchunklen, - NULL); + &xfs_dquot_buf_ops); rablkno++; } } From 3430681f33b23ae206fca816ab8abf82c7a0f0fc Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Tue, 2 Sep 2014 12:12:51 +1000 Subject: [PATCH 52/74] xfs: don't dirty buffers beyond EOF commit 22e757a49cf010703fcb9c9b4ef793248c39b0c2 upstream. generic/263 is failing fsx at this point with a page spanning EOF that cannot be invalidated. The operations are: 1190 mapwrite 0x52c00 thru 0x5e569 (0xb96a bytes) 1191 mapread 0x5c000 thru 0x5d636 (0x1637 bytes) 1192 write 0x5b600 thru 0x771ff (0x1bc00 bytes) where 1190 extents EOF from 0x54000 to 0x5e569. When the direct IO write attempts to invalidate the cached page over this range, it fails with -EBUSY and so any attempt to do page invalidation fails. The real question is this: Why can't that page be invalidated after it has been written to disk and cleaned? Well, there's data on the first two buffers in the page (1k block size, 4k page), but the third buffer on the page (i.e. beyond EOF) is failing drop_buffers because it's bh->b_state == 0x3, which is BH_Uptodate | BH_Dirty. IOWs, there's dirty buffers beyond EOF. Say what? OK, set_buffer_dirty() is called on all buffers from __set_page_buffers_dirty(), regardless of whether the buffer is beyond EOF or not, which means that when we get to ->writepage, we have buffers marked dirty beyond EOF that we need to clean. So, we need to implement our own .set_page_dirty method that doesn't dirty buffers beyond EOF. This is messy because the buffer code is not meant to be shared and it has interesting locking issues on the buffer dirty bits. So just copy and paste it and then modify it to suit what we need. Note: the solutions the other filesystems and generic block code use of marking the buffers clean in ->writepage does not work for XFS. It still leaves dirty buffers beyond EOF and invalidations still fail. Hence rather than play whack-a-mole, this patch simply prevents those buffers from being dirtied in the first place. Signed-off-by: Dave Chinner Reviewed-by: Brian Foster Signed-off-by: Dave Chinner Signed-off-by: Greg Kroah-Hartman --- fs/xfs/xfs_aops.c | 61 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 61 insertions(+) diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c index 41a695048be7..cfbb4c1b2f17 100644 --- a/fs/xfs/xfs_aops.c +++ b/fs/xfs/xfs_aops.c @@ -1661,11 +1661,72 @@ xfs_vm_readpages( return mpage_readpages(mapping, pages, nr_pages, xfs_get_blocks); } +/* + * This is basically a copy of __set_page_dirty_buffers() with one + * small tweak: buffers beyond EOF do not get marked dirty. If we mark them + * dirty, we'll never be able to clean them because we don't write buffers + * beyond EOF, and that means we can't invalidate pages that span EOF + * that have been marked dirty. Further, the dirty state can leak into + * the file interior if the file is extended, resulting in all sorts of + * bad things happening as the state does not match the underlying data. + * + * XXX: this really indicates that bufferheads in XFS need to die. Warts like + * this only exist because of bufferheads and how the generic code manages them. + */ +STATIC int +xfs_vm_set_page_dirty( + struct page *page) +{ + struct address_space *mapping = page->mapping; + struct inode *inode = mapping->host; + loff_t end_offset; + loff_t offset; + int newly_dirty; + + if (unlikely(!mapping)) + return !TestSetPageDirty(page); + + end_offset = i_size_read(inode); + offset = page_offset(page); + + spin_lock(&mapping->private_lock); + if (page_has_buffers(page)) { + struct buffer_head *head = page_buffers(page); + struct buffer_head *bh = head; + + do { + if (offset < end_offset) + set_buffer_dirty(bh); + bh = bh->b_this_page; + offset += 1 << inode->i_blkbits; + } while (bh != head); + } + newly_dirty = !TestSetPageDirty(page); + spin_unlock(&mapping->private_lock); + + if (newly_dirty) { + /* sigh - __set_page_dirty() is static, so copy it here, too */ + unsigned long flags; + + spin_lock_irqsave(&mapping->tree_lock, flags); + if (page->mapping) { /* Race with truncate? */ + WARN_ON_ONCE(!PageUptodate(page)); + account_page_dirtied(page, mapping); + radix_tree_tag_set(&mapping->page_tree, + page_index(page), PAGECACHE_TAG_DIRTY); + } + spin_unlock_irqrestore(&mapping->tree_lock, flags); + __mark_inode_dirty(mapping->host, I_DIRTY_PAGES); + } + return newly_dirty; +} + const struct address_space_operations xfs_address_space_operations = { .readpage = xfs_vm_readpage, .readpages = xfs_vm_readpages, .writepage = xfs_vm_writepage, .writepages = xfs_vm_writepages, + .set_page_dirty = xfs_vm_set_page_dirty, .releasepage = xfs_vm_releasepage, .invalidatepage = xfs_vm_invalidatepage, .write_begin = xfs_vm_write_begin, From 1025b461b3bb78860e071f7c0bad301c104ac94a Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Tue, 2 Sep 2014 12:12:52 +1000 Subject: [PATCH 53/74] xfs: don't zero partial page cache pages during O_DIRECT writes commit 834ffca6f7e345a79f6f2e2d131b0dfba8a4b67a upstream. Similar to direct IO reads, direct IO writes are using truncate_pagecache_range to invalidate the page cache. This is incorrect due to the sub-block zeroing in the page cache that truncate_pagecache_range() triggers. This patch fixes things by using invalidate_inode_pages2_range instead. It preserves the page cache invalidation, but won't zero any pages. Signed-off-by: Dave Chinner Reviewed-by: Brian Foster Reviewed-by: Christoph Hellwig Signed-off-by: Dave Chinner Signed-off-by: Greg Kroah-Hartman --- fs/xfs/xfs_file.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index a5f2042aec8b..1028717d95f8 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c @@ -677,7 +677,15 @@ xfs_file_dio_aio_write( pos, -1); if (ret) goto out; - truncate_pagecache_range(VFS_I(ip), pos, -1); + /* + * Invalidate whole pages. This can return an error if + * we fail to invalidate a page, but this should never + * happen on XFS. Warn if it does fail. + */ + ret = invalidate_inode_pages2_range(VFS_I(ip)->i_mapping, + pos >> PAGE_CACHE_SHIFT, -1); + WARN_ON_ONCE(ret); + ret = 0; } /* From d96dbb069199aa4e5bf7406e67fa5d38cebdf2fb Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Tue, 2 Sep 2014 12:12:52 +1000 Subject: [PATCH 54/74] xfs: don't zero partial page cache pages during O_DIRECT writes commit 85e584da3212140ee80fd047f9058bbee0bc00d5 upstream. xfs is using truncate_pagecache_range to invalidate the page cache during DIO reads. This is different from the other filesystems who only invalidate pages during DIO writes. truncate_pagecache_range is meant to be used when we are freeing the underlying data structs from disk, so it will zero any partial ranges in the page. This means a DIO read can zero out part of the page cache page, and it is possible the page will stay in cache. buffered reads will find an up to date page with zeros instead of the data actually on disk. This patch fixes things by using invalidate_inode_pages2_range instead. It preserves the page cache invalidation, but won't zero any pages. [dchinner: catch error and warn if it fails. Comment.] Signed-off-by: Chris Mason Reviewed-by: Dave Chinner Reviewed-by: Brian Foster Reviewed-by: Christoph Hellwig Signed-off-by: Dave Chinner Signed-off-by: Greg Kroah-Hartman --- fs/xfs/xfs_file.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index 1028717d95f8..9f457fedbcfc 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c @@ -298,7 +298,16 @@ xfs_file_aio_read( xfs_rw_iunlock(ip, XFS_IOLOCK_EXCL); return ret; } - truncate_pagecache_range(VFS_I(ip), pos, -1); + + /* + * Invalidate whole pages. This can return an error if + * we fail to invalidate a page, but this should never + * happen on XFS. Warn if it does fail. + */ + ret = invalidate_inode_pages2_range(VFS_I(ip)->i_mapping, + pos >> PAGE_CACHE_SHIFT, -1); + WARN_ON_ONCE(ret); + ret = 0; } xfs_rw_ilock_demote(ip, XFS_IOLOCK_EXCL); } From b08633de6ba926a3412292b3239bf41a3f00912b Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Thu, 31 Jul 2014 10:16:29 +1000 Subject: [PATCH 55/74] md/raid1,raid10: always abort recover on write error. commit 2446dba03f9dabe0b477a126cbeb377854785b47 upstream. Currently we don't abort recovery on a write error if the write error to the recovering device was triggerd by normal IO (as opposed to recovery IO). This means that for one bitmap region, the recovery might write to the recovering device for a few sectors, then not bother for subsequent sectors (as it never writes to failed devices). In this case the bitmap bit will be cleared, but it really shouldn't. The result is that if the recovering device fails and is then re-added (after fixing whatever hardware problem triggerred the failure), the second recovery won't redo the region it was in the middle of, so some of the device will not be recovered properly. If we abort the recovery, the region being processes will be cancelled (bit not cleared) and the whole region will be retried. As the bug can result in data corruption the patch is suitable for -stable. For kernels prior to 3.11 there is a conflict in raid10.c which will require care. Original-from: jiao hui Reported-and-tested-by: jiao hui Signed-off-by: NeilBrown Signed-off-by: Greg Kroah-Hartman --- drivers/md/raid1.c | 8 ++++---- drivers/md/raid10.c | 8 ++++---- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index 75771b2077c0..a176791509f6 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -1406,12 +1406,12 @@ static void error(struct mddev *mddev, struct md_rdev *rdev) mddev->degraded++; set_bit(Faulty, &rdev->flags); spin_unlock_irqrestore(&conf->device_lock, flags); - /* - * if recovery is running, make sure it aborts. - */ - set_bit(MD_RECOVERY_INTR, &mddev->recovery); } else set_bit(Faulty, &rdev->flags); + /* + * if recovery is running, make sure it aborts. + */ + set_bit(MD_RECOVERY_INTR, &mddev->recovery); set_bit(MD_CHANGE_DEVS, &mddev->flags); printk(KERN_ALERT "md/raid1:%s: Disk failure on %s, disabling device.\n" diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index 353c2b392652..a1ea2a753912 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -1681,11 +1681,11 @@ static void error(struct mddev *mddev, struct md_rdev *rdev) spin_lock_irqsave(&conf->device_lock, flags); mddev->degraded++; spin_unlock_irqrestore(&conf->device_lock, flags); - /* - * if recovery is running, make sure it aborts. - */ - set_bit(MD_RECOVERY_INTR, &mddev->recovery); } + /* + * If recovery is running, make sure it aborts. + */ + set_bit(MD_RECOVERY_INTR, &mddev->recovery); set_bit(Blocked, &rdev->flags); set_bit(Faulty, &rdev->flags); set_bit(MD_CHANGE_DEVS, &mddev->flags); From a64897279757e1e98b6b1d98e0bba561b0f88331 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Fri, 8 Aug 2014 12:43:39 +0400 Subject: [PATCH 56/74] libceph: set last_piece in ceph_msg_data_pages_cursor_init() correctly commit 5f740d7e1531099b888410e6bab13f68da9b1a4d upstream. Determining ->last_piece based on the value of ->page_offset + length is incorrect because length here is the length of the entire message. ->last_piece set to false even if page array data item length is <= PAGE_SIZE, which results in invalid length passed to ceph_tcp_{send,recv}page() and causes various asserts to fire. # cat pages-cursor-init.sh #!/bin/bash rbd create --size 10 --image-format 2 foo FOO_DEV=$(rbd map foo) dd if=/dev/urandom of=$FOO_DEV bs=1M &>/dev/null rbd snap create foo@snap rbd snap protect foo@snap rbd clone foo@snap bar # rbd_resize calls librbd rbd_resize(), size is in bytes ./rbd_resize bar $(((4 << 20) + 512)) rbd resize --size 10 bar BAR_DEV=$(rbd map bar) # trigger a 512-byte copyup -- 512-byte page array data item dd if=/dev/urandom of=$BAR_DEV bs=1M count=1 seek=5 The problem exists only in ceph_msg_data_pages_cursor_init(), ceph_msg_data_pages_advance() does the right thing. The size_t cast is unnecessary. Signed-off-by: Ilya Dryomov Reviewed-by: Sage Weil Reviewed-by: Alex Elder Signed-off-by: Greg Kroah-Hartman --- net/ceph/messenger.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c index b9d7df175700..6ff7d9dc240f 100644 --- a/net/ceph/messenger.c +++ b/net/ceph/messenger.c @@ -904,7 +904,7 @@ static void ceph_msg_data_pages_cursor_init(struct ceph_msg_data_cursor *cursor, BUG_ON(page_count > (int)USHRT_MAX); cursor->page_count = (unsigned short)page_count; BUG_ON(length > SIZE_MAX - cursor->page_offset); - cursor->last_piece = (size_t)cursor->page_offset + length <= PAGE_SIZE; + cursor->last_piece = cursor->page_offset + cursor->resid <= PAGE_SIZE; } static struct page * From 2e1dbf27a941085ba21c23355006f10d297faec9 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Mon, 8 Sep 2014 17:25:34 +0400 Subject: [PATCH 57/74] libceph: add process_one_ticket() helper commit 597cda357716a3cf8d994cb11927af917c8d71fa upstream. Add a helper for processing individual cephx auth tickets. Needed for the next commit, which deals with allocating ticket buffers. (Most of the diff here is whitespace - view with git diff -b). Signed-off-by: Ilya Dryomov Reviewed-by: Sage Weil Signed-off-by: Greg Kroah-Hartman --- net/ceph/auth_x.c | 228 +++++++++++++++++++++++++--------------------- 1 file changed, 124 insertions(+), 104 deletions(-) diff --git a/net/ceph/auth_x.c b/net/ceph/auth_x.c index 96238ba95f2b..0eb146dce1aa 100644 --- a/net/ceph/auth_x.c +++ b/net/ceph/auth_x.c @@ -129,17 +129,131 @@ static void remove_ticket_handler(struct ceph_auth_client *ac, kfree(th); } +static int process_one_ticket(struct ceph_auth_client *ac, + struct ceph_crypto_key *secret, + void **p, void *end, + void *dbuf, void *ticket_buf) +{ + struct ceph_x_info *xi = ac->private; + int type; + u8 tkt_struct_v, blob_struct_v; + struct ceph_x_ticket_handler *th; + void *dp, *dend; + int dlen; + char is_enc; + struct timespec validity; + struct ceph_crypto_key old_key; + void *tp, *tpend; + struct ceph_timespec new_validity; + struct ceph_crypto_key new_session_key; + struct ceph_buffer *new_ticket_blob; + unsigned long new_expires, new_renew_after; + u64 new_secret_id; + int ret; + + ceph_decode_need(p, end, sizeof(u32) + 1, bad); + + type = ceph_decode_32(p); + dout(" ticket type %d %s\n", type, ceph_entity_type_name(type)); + + tkt_struct_v = ceph_decode_8(p); + if (tkt_struct_v != 1) + goto bad; + + th = get_ticket_handler(ac, type); + if (IS_ERR(th)) { + ret = PTR_ERR(th); + goto out; + } + + /* blob for me */ + dlen = ceph_x_decrypt(secret, p, end, dbuf, + TEMP_TICKET_BUF_LEN); + if (dlen <= 0) { + ret = dlen; + goto out; + } + dout(" decrypted %d bytes\n", dlen); + dp = dbuf; + dend = dp + dlen; + + tkt_struct_v = ceph_decode_8(&dp); + if (tkt_struct_v != 1) + goto bad; + + memcpy(&old_key, &th->session_key, sizeof(old_key)); + ret = ceph_crypto_key_decode(&new_session_key, &dp, dend); + if (ret) + goto out; + + ceph_decode_copy(&dp, &new_validity, sizeof(new_validity)); + ceph_decode_timespec(&validity, &new_validity); + new_expires = get_seconds() + validity.tv_sec; + new_renew_after = new_expires - (validity.tv_sec / 4); + dout(" expires=%lu renew_after=%lu\n", new_expires, + new_renew_after); + + /* ticket blob for service */ + ceph_decode_8_safe(p, end, is_enc, bad); + tp = ticket_buf; + if (is_enc) { + /* encrypted */ + dout(" encrypted ticket\n"); + dlen = ceph_x_decrypt(&old_key, p, end, ticket_buf, + TEMP_TICKET_BUF_LEN); + if (dlen < 0) { + ret = dlen; + goto out; + } + dlen = ceph_decode_32(&tp); + } else { + /* unencrypted */ + ceph_decode_32_safe(p, end, dlen, bad); + ceph_decode_need(p, end, dlen, bad); + ceph_decode_copy(p, ticket_buf, dlen); + } + tpend = tp + dlen; + dout(" ticket blob is %d bytes\n", dlen); + ceph_decode_need(&tp, tpend, 1 + sizeof(u64), bad); + blob_struct_v = ceph_decode_8(&tp); + new_secret_id = ceph_decode_64(&tp); + ret = ceph_decode_buffer(&new_ticket_blob, &tp, tpend); + if (ret) + goto out; + + /* all is well, update our ticket */ + ceph_crypto_key_destroy(&th->session_key); + if (th->ticket_blob) + ceph_buffer_put(th->ticket_blob); + th->session_key = new_session_key; + th->ticket_blob = new_ticket_blob; + th->validity = new_validity; + th->secret_id = new_secret_id; + th->expires = new_expires; + th->renew_after = new_renew_after; + dout(" got ticket service %d (%s) secret_id %lld len %d\n", + type, ceph_entity_type_name(type), th->secret_id, + (int)th->ticket_blob->vec.iov_len); + xi->have_keys |= th->service; + +out: + return ret; + +bad: + ret = -EINVAL; + goto out; +} + static int ceph_x_proc_ticket_reply(struct ceph_auth_client *ac, struct ceph_crypto_key *secret, void *buf, void *end) { - struct ceph_x_info *xi = ac->private; - int num; void *p = buf; - int ret; char *dbuf; char *ticket_buf; u8 reply_struct_v; + u32 num; + int ret; dbuf = kmalloc(TEMP_TICKET_BUF_LEN, GFP_NOFS); if (!dbuf) @@ -150,112 +264,18 @@ static int ceph_x_proc_ticket_reply(struct ceph_auth_client *ac, if (!ticket_buf) goto out_dbuf; - ceph_decode_need(&p, end, 1 + sizeof(u32), bad); - reply_struct_v = ceph_decode_8(&p); + ceph_decode_8_safe(&p, end, reply_struct_v, bad); if (reply_struct_v != 1) - goto bad; - num = ceph_decode_32(&p); + return -EINVAL; + + ceph_decode_32_safe(&p, end, num, bad); dout("%d tickets\n", num); + while (num--) { - int type; - u8 tkt_struct_v, blob_struct_v; - struct ceph_x_ticket_handler *th; - void *dp, *dend; - int dlen; - char is_enc; - struct timespec validity; - struct ceph_crypto_key old_key; - void *tp, *tpend; - struct ceph_timespec new_validity; - struct ceph_crypto_key new_session_key; - struct ceph_buffer *new_ticket_blob; - unsigned long new_expires, new_renew_after; - u64 new_secret_id; - - ceph_decode_need(&p, end, sizeof(u32) + 1, bad); - - type = ceph_decode_32(&p); - dout(" ticket type %d %s\n", type, ceph_entity_type_name(type)); - - tkt_struct_v = ceph_decode_8(&p); - if (tkt_struct_v != 1) - goto bad; - - th = get_ticket_handler(ac, type); - if (IS_ERR(th)) { - ret = PTR_ERR(th); - goto out; - } - - /* blob for me */ - dlen = ceph_x_decrypt(secret, &p, end, dbuf, - TEMP_TICKET_BUF_LEN); - if (dlen <= 0) { - ret = dlen; - goto out; - } - dout(" decrypted %d bytes\n", dlen); - dend = dbuf + dlen; - dp = dbuf; - - tkt_struct_v = ceph_decode_8(&dp); - if (tkt_struct_v != 1) - goto bad; - - memcpy(&old_key, &th->session_key, sizeof(old_key)); - ret = ceph_crypto_key_decode(&new_session_key, &dp, dend); + ret = process_one_ticket(ac, secret, &p, end, + dbuf, ticket_buf); if (ret) goto out; - - ceph_decode_copy(&dp, &new_validity, sizeof(new_validity)); - ceph_decode_timespec(&validity, &new_validity); - new_expires = get_seconds() + validity.tv_sec; - new_renew_after = new_expires - (validity.tv_sec / 4); - dout(" expires=%lu renew_after=%lu\n", new_expires, - new_renew_after); - - /* ticket blob for service */ - ceph_decode_8_safe(&p, end, is_enc, bad); - tp = ticket_buf; - if (is_enc) { - /* encrypted */ - dout(" encrypted ticket\n"); - dlen = ceph_x_decrypt(&old_key, &p, end, ticket_buf, - TEMP_TICKET_BUF_LEN); - if (dlen < 0) { - ret = dlen; - goto out; - } - dlen = ceph_decode_32(&tp); - } else { - /* unencrypted */ - ceph_decode_32_safe(&p, end, dlen, bad); - ceph_decode_need(&p, end, dlen, bad); - ceph_decode_copy(&p, ticket_buf, dlen); - } - tpend = tp + dlen; - dout(" ticket blob is %d bytes\n", dlen); - ceph_decode_need(&tp, tpend, 1 + sizeof(u64), bad); - blob_struct_v = ceph_decode_8(&tp); - new_secret_id = ceph_decode_64(&tp); - ret = ceph_decode_buffer(&new_ticket_blob, &tp, tpend); - if (ret) - goto out; - - /* all is well, update our ticket */ - ceph_crypto_key_destroy(&th->session_key); - if (th->ticket_blob) - ceph_buffer_put(th->ticket_blob); - th->session_key = new_session_key; - th->ticket_blob = new_ticket_blob; - th->validity = new_validity; - th->secret_id = new_secret_id; - th->expires = new_expires; - th->renew_after = new_renew_after; - dout(" got ticket service %d (%s) secret_id %lld len %d\n", - type, ceph_entity_type_name(type), th->secret_id, - (int)th->ticket_blob->vec.iov_len); - xi->have_keys |= th->service; } ret = 0; From 9c38ff707bbe0635121f8fb6f108ee376cff90fe Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Tue, 9 Sep 2014 19:39:15 +0400 Subject: [PATCH 58/74] libceph: do not hard code max auth ticket len commit c27a3e4d667fdcad3db7b104f75659478e0c68d8 upstream. We hard code cephx auth ticket buffer size to 256 bytes. This isn't enough for any moderate setups and, in case tickets themselves are not encrypted, leads to buffer overflows (ceph_x_decrypt() errors out, but ceph_decode_copy() doesn't - it's just a memcpy() wrapper). Since the buffer is allocated dynamically anyway, allocated it a bit later, at the point where we know how much is going to be needed. Fixes: http://tracker.ceph.com/issues/8979 Signed-off-by: Ilya Dryomov Reviewed-by: Sage Weil Signed-off-by: Greg Kroah-Hartman --- net/ceph/auth_x.c | 64 +++++++++++++++++++++-------------------------- 1 file changed, 29 insertions(+), 35 deletions(-) diff --git a/net/ceph/auth_x.c b/net/ceph/auth_x.c index 0eb146dce1aa..de6662b14e1f 100644 --- a/net/ceph/auth_x.c +++ b/net/ceph/auth_x.c @@ -13,8 +13,6 @@ #include "auth_x.h" #include "auth_x_protocol.h" -#define TEMP_TICKET_BUF_LEN 256 - static void ceph_x_validate_tickets(struct ceph_auth_client *ac, int *pneed); static int ceph_x_is_authenticated(struct ceph_auth_client *ac) @@ -64,7 +62,7 @@ static int ceph_x_encrypt(struct ceph_crypto_key *secret, } static int ceph_x_decrypt(struct ceph_crypto_key *secret, - void **p, void *end, void *obuf, size_t olen) + void **p, void *end, void **obuf, size_t olen) { struct ceph_x_encrypt_header head; size_t head_len = sizeof(head); @@ -75,8 +73,14 @@ static int ceph_x_decrypt(struct ceph_crypto_key *secret, return -EINVAL; dout("ceph_x_decrypt len %d\n", len); - ret = ceph_decrypt2(secret, &head, &head_len, obuf, &olen, - *p, len); + if (*obuf == NULL) { + *obuf = kmalloc(len, GFP_NOFS); + if (!*obuf) + return -ENOMEM; + olen = len; + } + + ret = ceph_decrypt2(secret, &head, &head_len, *obuf, &olen, *p, len); if (ret) return ret; if (head.struct_v != 1 || le64_to_cpu(head.magic) != CEPHX_ENC_MAGIC) @@ -131,18 +135,19 @@ static void remove_ticket_handler(struct ceph_auth_client *ac, static int process_one_ticket(struct ceph_auth_client *ac, struct ceph_crypto_key *secret, - void **p, void *end, - void *dbuf, void *ticket_buf) + void **p, void *end) { struct ceph_x_info *xi = ac->private; int type; u8 tkt_struct_v, blob_struct_v; struct ceph_x_ticket_handler *th; + void *dbuf = NULL; void *dp, *dend; int dlen; char is_enc; struct timespec validity; struct ceph_crypto_key old_key; + void *ticket_buf = NULL; void *tp, *tpend; struct ceph_timespec new_validity; struct ceph_crypto_key new_session_key; @@ -167,8 +172,7 @@ static int process_one_ticket(struct ceph_auth_client *ac, } /* blob for me */ - dlen = ceph_x_decrypt(secret, p, end, dbuf, - TEMP_TICKET_BUF_LEN); + dlen = ceph_x_decrypt(secret, p, end, &dbuf, 0); if (dlen <= 0) { ret = dlen; goto out; @@ -195,20 +199,25 @@ static int process_one_ticket(struct ceph_auth_client *ac, /* ticket blob for service */ ceph_decode_8_safe(p, end, is_enc, bad); - tp = ticket_buf; if (is_enc) { /* encrypted */ dout(" encrypted ticket\n"); - dlen = ceph_x_decrypt(&old_key, p, end, ticket_buf, - TEMP_TICKET_BUF_LEN); + dlen = ceph_x_decrypt(&old_key, p, end, &ticket_buf, 0); if (dlen < 0) { ret = dlen; goto out; } + tp = ticket_buf; dlen = ceph_decode_32(&tp); } else { /* unencrypted */ ceph_decode_32_safe(p, end, dlen, bad); + ticket_buf = kmalloc(dlen, GFP_NOFS); + if (!ticket_buf) { + ret = -ENOMEM; + goto out; + } + tp = ticket_buf; ceph_decode_need(p, end, dlen, bad); ceph_decode_copy(p, ticket_buf, dlen); } @@ -237,6 +246,8 @@ static int process_one_ticket(struct ceph_auth_client *ac, xi->have_keys |= th->service; out: + kfree(ticket_buf); + kfree(dbuf); return ret; bad: @@ -249,21 +260,10 @@ static int ceph_x_proc_ticket_reply(struct ceph_auth_client *ac, void *buf, void *end) { void *p = buf; - char *dbuf; - char *ticket_buf; u8 reply_struct_v; u32 num; int ret; - dbuf = kmalloc(TEMP_TICKET_BUF_LEN, GFP_NOFS); - if (!dbuf) - return -ENOMEM; - - ret = -ENOMEM; - ticket_buf = kmalloc(TEMP_TICKET_BUF_LEN, GFP_NOFS); - if (!ticket_buf) - goto out_dbuf; - ceph_decode_8_safe(&p, end, reply_struct_v, bad); if (reply_struct_v != 1) return -EINVAL; @@ -272,22 +272,15 @@ static int ceph_x_proc_ticket_reply(struct ceph_auth_client *ac, dout("%d tickets\n", num); while (num--) { - ret = process_one_ticket(ac, secret, &p, end, - dbuf, ticket_buf); + ret = process_one_ticket(ac, secret, &p, end); if (ret) - goto out; + return ret; } - ret = 0; -out: - kfree(ticket_buf); -out_dbuf: - kfree(dbuf); - return ret; + return 0; bad: - ret = -EINVAL; - goto out; + return -EINVAL; } static int ceph_x_build_authorizer(struct ceph_auth_client *ac, @@ -603,13 +596,14 @@ static int ceph_x_verify_authorizer_reply(struct ceph_auth_client *ac, struct ceph_x_ticket_handler *th; int ret = 0; struct ceph_x_authorize_reply reply; + void *preply = &reply; void *p = au->reply_buf; void *end = p + sizeof(au->reply_buf); th = get_ticket_handler(ac, au->service); if (IS_ERR(th)) return PTR_ERR(th); - ret = ceph_x_decrypt(&th->session_key, &p, end, &reply, sizeof(reply)); + ret = ceph_x_decrypt(&th->session_key, &p, end, &preply, sizeof(reply)); if (ret < 0) return ret; if (ret != sizeof(reply)) From 9b1eceeba2e3aeefa4725f51ca617fe683ddfa65 Mon Sep 17 00:00:00 2001 From: Pavel Shilovsky Date: Fri, 18 Jul 2014 18:25:52 +0400 Subject: [PATCH 59/74] CIFS: Fix STATUS_CANNOT_DELETE error mapping for SMB2 commit 21496687a79424572f46a84c690d331055f4866f upstream. The existing mapping causes unlink() call to return error after delete operation. Changing the mapping to -EACCES makes the client process the call like CIFS protocol does - reset dos attributes with ATTR_READONLY flag masked off and retry the operation. Signed-off-by: Pavel Shilovsky Signed-off-by: Steve French Signed-off-by: Greg Kroah-Hartman --- fs/cifs/smb2maperror.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/cifs/smb2maperror.c b/fs/cifs/smb2maperror.c index 7c2f45c06fc2..824696fb24db 100644 --- a/fs/cifs/smb2maperror.c +++ b/fs/cifs/smb2maperror.c @@ -605,7 +605,7 @@ static const struct status_to_posix_error smb2_error_map_table[] = { {STATUS_MAPPED_FILE_SIZE_ZERO, -EIO, "STATUS_MAPPED_FILE_SIZE_ZERO"}, {STATUS_TOO_MANY_OPENED_FILES, -EMFILE, "STATUS_TOO_MANY_OPENED_FILES"}, {STATUS_CANCELLED, -EIO, "STATUS_CANCELLED"}, - {STATUS_CANNOT_DELETE, -EIO, "STATUS_CANNOT_DELETE"}, + {STATUS_CANNOT_DELETE, -EACCES, "STATUS_CANNOT_DELETE"}, {STATUS_INVALID_COMPUTER_NAME, -EIO, "STATUS_INVALID_COMPUTER_NAME"}, {STATUS_FILE_DELETED, -EIO, "STATUS_FILE_DELETED"}, {STATUS_SPECIAL_ACCOUNT, -EIO, "STATUS_SPECIAL_ACCOUNT"}, From 8f516091b6c747c2db53b33c0eb955c5b3975792 Mon Sep 17 00:00:00 2001 From: Pavel Shilovsky Date: Fri, 27 Jun 2014 10:33:11 +0400 Subject: [PATCH 60/74] CIFS: Fix async reading on reconnects commit 038bc961c31b070269ecd07349a7ee2e839d4fec upstream. If we get into read_into_pages() from cifs_readv_receive() and then loose a network, we issue cifs_reconnect that moves all mids to a private list and issue their callbacks. The callback of the async read request sets a mid to retry, frees it and wakes up a process that waits on the rdata completion. After the connection is established we return from read_into_pages() with a short read, use the mid that was freed before and try to read the remaining data from the a newly created socket. Both actions are not what we want to do. In reconnect cases (-EAGAIN) we should not mask off the error with a short read but should return the error code instead. Acked-by: Jeff Layton Signed-off-by: Pavel Shilovsky Signed-off-by: Steve French Signed-off-by: Greg Kroah-Hartman --- fs/cifs/file.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/cifs/file.c b/fs/cifs/file.c index 8b0c656f2ab2..97b03895ac8c 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -2809,7 +2809,7 @@ cifs_uncached_read_into_pages(struct TCP_Server_Info *server, total_read += result; } - return total_read > 0 ? total_read : result; + return total_read > 0 && result != -EAGAIN ? total_read : result; } static ssize_t @@ -3232,7 +3232,7 @@ cifs_readpages_read_into_pages(struct TCP_Server_Info *server, total_read += result; } - return total_read > 0 ? total_read : result; + return total_read > 0 && result != -EAGAIN ? total_read : result; } static int cifs_readpages(struct file *file, struct address_space *mapping, From c6bef3b64c1e605f4059189153de3251855846b9 Mon Sep 17 00:00:00 2001 From: Steve French Date: Sun, 17 Aug 2014 00:22:24 -0500 Subject: [PATCH 61/74] CIFS: Possible null ptr deref in SMB2_tcon commit 18f39e7be0121317550d03e267e3ebd4dbfbb3ce upstream. As Raphael Geissert pointed out, tcon_error_exit can dereference tcon and there is one path in which tcon can be null. Signed-off-by: Steve French Reported-by: Raphael Geissert Signed-off-by: Greg Kroah-Hartman --- fs/cifs/smb2pdu.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c index c7a6fd87bb6e..184c55820d1f 100644 --- a/fs/cifs/smb2pdu.c +++ b/fs/cifs/smb2pdu.c @@ -809,7 +809,8 @@ SMB2_tcon(const unsigned int xid, struct cifs_ses *ses, const char *tree, tcon_error_exit: if (rsp->hdr.Status == STATUS_BAD_NETWORK_NAME) { cifs_dbg(VFS, "BAD_NETWORK_NAME: %s\n", tree); - tcon->bad_network_name = true; + if (tcon) + tcon->bad_network_name = true; } goto tcon_exit; } From 4cf2ef68d23fcdea34b72325af7b9ac5f35f52ef Mon Sep 17 00:00:00 2001 From: Pavel Shilovsky Date: Mon, 18 Aug 2014 20:49:58 +0400 Subject: [PATCH 62/74] CIFS: Fix wrong directory attributes after rename commit b46799a8f28c43c5264ac8d8ffa28b311b557e03 upstream. When we requests rename we also need to update attributes of both source and target parent directories. Not doing it causes generic/309 xfstest to fail on SMB2 mounts. Fix this by marking these directories for force revalidating. Signed-off-by: Pavel Shilovsky Signed-off-by: Steve French Signed-off-by: Greg Kroah-Hartman --- fs/cifs/inode.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c index 9d463501348f..c9bce9b43855 100644 --- a/fs/cifs/inode.c +++ b/fs/cifs/inode.c @@ -1647,6 +1647,12 @@ cifs_rename(struct inode *source_dir, struct dentry *source_dentry, target_dentry, to_name); } + /* force revalidate to go get info when needed */ + CIFS_I(source_dir)->time = CIFS_I(target_dir)->time = 0; + + source_dir->i_ctime = source_dir->i_mtime = target_dir->i_ctime = + target_dir->i_mtime = current_fs_time(source_dir->i_sb); + cifs_rename_exit: kfree(info_buf_source); kfree(from_name); From 0c17ceb6f88f685e8a23019052891a198434aba8 Mon Sep 17 00:00:00 2001 From: Pavel Shilovsky Date: Fri, 22 Aug 2014 13:32:11 +0400 Subject: [PATCH 63/74] CIFS: Fix wrong filename length for SMB2 commit 1bbe4997b13de903c421c1cc78440e544b5f9064 upstream. The existing code uses the old MAX_NAME constant. This causes XFS test generic/013 to fail. Fix it by replacing MAX_NAME with PATH_MAX that SMB1 uses. Also remove an unused MAX_NAME constant definition. Signed-off-by: Pavel Shilovsky Signed-off-by: Steve French Signed-off-by: Greg Kroah-Hartman --- fs/cifs/cifsglob.h | 5 ----- fs/cifs/smb2file.c | 2 +- fs/cifs/smb2inode.c | 2 +- fs/cifs/smb2ops.c | 2 +- fs/cifs/smb2pdu.c | 2 +- 5 files changed, 4 insertions(+), 9 deletions(-) diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h index e2c2d96491fa..52480240168e 100644 --- a/fs/cifs/cifsglob.h +++ b/fs/cifs/cifsglob.h @@ -74,11 +74,6 @@ #define SERVER_NAME_LENGTH 40 #define SERVER_NAME_LEN_WITH_NULL (SERVER_NAME_LENGTH + 1) -/* used to define string lengths for reversing unicode strings */ -/* (256+1)*2 = 514 */ -/* (max path length + 1 for null) * 2 for unicode */ -#define MAX_NAME 514 - /* SMB echo "timeout" -- FIXME: tunable? */ #define SMB_ECHO_INTERVAL (60 * HZ) diff --git a/fs/cifs/smb2file.c b/fs/cifs/smb2file.c index 5da1b55a2258..d801f63cddd0 100644 --- a/fs/cifs/smb2file.c +++ b/fs/cifs/smb2file.c @@ -73,7 +73,7 @@ smb2_open_file(const unsigned int xid, struct cifs_tcon *tcon, const char *path, goto out; } - smb2_data = kzalloc(sizeof(struct smb2_file_all_info) + MAX_NAME * 2, + smb2_data = kzalloc(sizeof(struct smb2_file_all_info) + PATH_MAX * 2, GFP_KERNEL); if (smb2_data == NULL) { rc = -ENOMEM; diff --git a/fs/cifs/smb2inode.c b/fs/cifs/smb2inode.c index fff6dfba6204..6d535797ec76 100644 --- a/fs/cifs/smb2inode.c +++ b/fs/cifs/smb2inode.c @@ -123,7 +123,7 @@ smb2_query_path_info(const unsigned int xid, struct cifs_tcon *tcon, *adjust_tz = false; - smb2_data = kzalloc(sizeof(struct smb2_file_all_info) + MAX_NAME * 2, + smb2_data = kzalloc(sizeof(struct smb2_file_all_info) + PATH_MAX * 2, GFP_KERNEL); if (smb2_data == NULL) return -ENOMEM; diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c index e2756bb40b4d..fe7ac989c6c4 100644 --- a/fs/cifs/smb2ops.c +++ b/fs/cifs/smb2ops.c @@ -243,7 +243,7 @@ smb2_query_file_info(const unsigned int xid, struct cifs_tcon *tcon, int rc; struct smb2_file_all_info *smb2_data; - smb2_data = kzalloc(sizeof(struct smb2_file_all_info) + MAX_NAME * 2, + smb2_data = kzalloc(sizeof(struct smb2_file_all_info) + PATH_MAX * 2, GFP_KERNEL); if (smb2_data == NULL) return -ENOMEM; diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c index 184c55820d1f..e37790841446 100644 --- a/fs/cifs/smb2pdu.c +++ b/fs/cifs/smb2pdu.c @@ -1204,7 +1204,7 @@ SMB2_query_info(const unsigned int xid, struct cifs_tcon *tcon, { return query_info(xid, tcon, persistent_fid, volatile_fid, FILE_ALL_INFORMATION, - sizeof(struct smb2_file_all_info) + MAX_NAME * 2, + sizeof(struct smb2_file_all_info) + PATH_MAX * 2, sizeof(struct smb2_file_all_info), data); } From 659c639916aabca0165fbd9f8a966055a519e44b Mon Sep 17 00:00:00 2001 From: Pavel Shilovsky Date: Tue, 26 Aug 2014 19:04:44 +0400 Subject: [PATCH 64/74] CIFS: Fix wrong restart readdir for SMB1 commit f736906a7669a77cf8cabdcbcf1dc8cb694e12ef upstream. The existing code calls server->ops->close() that is not right. This causes XFS test generic/310 to fail. Fix this by using server->ops->closedir() function. Signed-off-by: Dan Carpenter Signed-off-by: Pavel Shilovsky Signed-off-by: Steve French Signed-off-by: Greg Kroah-Hartman --- fs/cifs/readdir.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c index 036279c064ff..87d125f682cd 100644 --- a/fs/cifs/readdir.c +++ b/fs/cifs/readdir.c @@ -585,8 +585,8 @@ find_cifs_entry(const unsigned int xid, struct cifs_tcon *tcon, if (!cfile->srch_inf.endOfSearch && !cfile->invalidHandle) { cfile->invalidHandle = true; spin_unlock(&cifs_file_list_lock); - if (server->ops->close) - server->ops->close(xid, tcon, &cfile->fid); + if (server->ops->close_dir) + server->ops->close_dir(xid, tcon, &cfile->fid); } else spin_unlock(&cifs_file_list_lock); if (cfile->srch_inf.ntwrk_buf_start) { From a9d28db622269c29825f13c28bae0f587f77aada Mon Sep 17 00:00:00 2001 From: Kevin Hao Date: Thu, 3 Jul 2014 10:35:26 +0800 Subject: [PATCH 65/74] mtd/ftl: fix the double free of the buffers allocated in build_maps() commit a152056c912db82860a8b4c23d0bd3a5aa89e363 upstream. I got the following panic on my fsl p5020ds board. Unable to handle kernel paging request for data at address 0x7375627379737465 Faulting instruction address: 0xc000000000100778 Oops: Kernel access of bad area, sig: 11 [#1] SMP NR_CPUS=24 CoreNet Generic Modules linked in: CPU: 0 PID: 1 Comm: swapper/0 Not tainted 3.15.0-next-20140613 #145 task: c0000000fe080000 ti: c0000000fe088000 task.ti: c0000000fe088000 NIP: c000000000100778 LR: c00000000010073c CTR: 0000000000000000 REGS: c0000000fe08aa00 TRAP: 0300 Not tainted (3.15.0-next-20140613) MSR: 0000000080029000 CR: 24ad2e24 XER: 00000000 DEAR: 7375627379737465 ESR: 0000000000000000 SOFTE: 1 GPR00: c0000000000c99b0 c0000000fe08ac80 c0000000009598e0 c0000000fe001d80 GPR04: 00000000000000d0 0000000000000913 c000000007902b20 0000000000000000 GPR08: c0000000feaae888 0000000000000000 0000000007091000 0000000000200200 GPR12: 0000000028ad2e28 c00000000fff4000 c0000000007abe08 0000000000000000 GPR16: c0000000007ab160 c0000000007aaf98 c00000000060ba68 c0000000007abda8 GPR20: c0000000007abde8 c0000000feaea6f8 c0000000feaea708 c0000000007abd10 GPR24: c000000000989370 c0000000008c6228 00000000000041ed c0000000fe00a400 GPR28: c00000000017c1cc 00000000000000d0 7375627379737465 c0000000fe001d80 NIP [c000000000100778] .__kmalloc_track_caller+0x70/0x168 LR [c00000000010073c] .__kmalloc_track_caller+0x34/0x168 Call Trace: [c0000000fe08ac80] [c00000000087e6b8] uevent_sock_list+0x0/0x10 (unreliable) [c0000000fe08ad20] [c0000000000c99b0] .kstrdup+0x44/0x90 [c0000000fe08adc0] [c00000000017c1cc] .__kernfs_new_node+0x4c/0x130 [c0000000fe08ae70] [c00000000017d7e4] .kernfs_new_node+0x2c/0x64 [c0000000fe08aef0] [c00000000017db00] .kernfs_create_dir_ns+0x34/0xc8 [c0000000fe08af80] [c00000000018067c] .sysfs_create_dir_ns+0x58/0xcc [c0000000fe08b010] [c0000000002c711c] .kobject_add_internal+0xc8/0x384 [c0000000fe08b0b0] [c0000000002c7644] .kobject_add+0x64/0xc8 [c0000000fe08b140] [c000000000355ebc] .device_add+0x11c/0x654 [c0000000fe08b200] [c0000000002b5988] .add_disk+0x20c/0x4b4 [c0000000fe08b2c0] [c0000000003a21d4] .add_mtd_blktrans_dev+0x340/0x514 [c0000000fe08b350] [c0000000003a3410] .mtdblock_add_mtd+0x74/0xb4 [c0000000fe08b3e0] [c0000000003a32cc] .blktrans_notify_add+0x64/0x94 [c0000000fe08b470] [c00000000039b5b4] .add_mtd_device+0x1d4/0x368 [c0000000fe08b520] [c00000000039b830] .mtd_device_parse_register+0xe8/0x104 [c0000000fe08b5c0] [c0000000003b8408] .of_flash_probe+0x72c/0x734 [c0000000fe08b750] [c00000000035ba40] .platform_drv_probe+0x38/0x84 [c0000000fe08b7d0] [c0000000003599a4] .really_probe+0xa4/0x29c [c0000000fe08b870] [c000000000359d3c] .__driver_attach+0x100/0x104 [c0000000fe08b900] [c00000000035746c] .bus_for_each_dev+0x84/0xe4 [c0000000fe08b9a0] [c0000000003593c0] .driver_attach+0x24/0x38 [c0000000fe08ba10] [c000000000358f24] .bus_add_driver+0x1c8/0x2ac [c0000000fe08bab0] [c00000000035a3a4] .driver_register+0x8c/0x158 [c0000000fe08bb30] [c00000000035b9f4] .__platform_driver_register+0x6c/0x80 [c0000000fe08bba0] [c00000000084e080] .of_flash_driver_init+0x1c/0x30 [c0000000fe08bc10] [c000000000001864] .do_one_initcall+0xbc/0x238 [c0000000fe08bd00] [c00000000082cdc0] .kernel_init_freeable+0x188/0x268 [c0000000fe08bdb0] [c0000000000020a0] .kernel_init+0x1c/0xf7c [c0000000fe08be30] [c000000000000884] .ret_from_kernel_thread+0x58/0xd4 Instruction dump: 41bd0010 480000c8 4bf04eb5 60000000 e94d0028 e93f0000 7cc95214 e8a60008 7fc9502a 2fbe0000 419e00c8 e93f0022 <7f7e482a> 39200000 88ed06b2 992d06b2 ---[ end trace b4c9a94804a42d40 ]--- It seems that the corrupted partition header on my mtd device triggers a bug in the ftl. In function build_maps() it will allocate the buffers needed by the mtd partition, but if something goes wrong such as kmalloc failure, mtd read error or invalid partition header parameter, it will free all allocated buffers and then return non-zero. In my case, it seems that partition header parameter 'NumTransferUnits' is invalid. And the ftl_freepart() is a function which free all the partition buffers allocated by build_maps(). Given the build_maps() is a self cleaning function, so there is no need to invoke this function even if build_maps() return with error. Otherwise it will causes the buffers to be freed twice and then weird things would happen. Signed-off-by: Kevin Hao Signed-off-by: Brian Norris Signed-off-by: Greg Kroah-Hartman --- drivers/mtd/ftl.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/mtd/ftl.c b/drivers/mtd/ftl.c index 19d637266fcd..71e4f6ccae2f 100644 --- a/drivers/mtd/ftl.c +++ b/drivers/mtd/ftl.c @@ -1075,7 +1075,6 @@ static void ftl_add_mtd(struct mtd_blktrans_ops *tr, struct mtd_info *mtd) return; } - ftl_freepart(partition); kfree(partition); } From 6562c0cc805b391489e2f511983300e802864aea Mon Sep 17 00:00:00 2001 From: Roger Quadros Date: Mon, 25 Aug 2014 16:15:33 -0700 Subject: [PATCH 66/74] mtd: nand: omap: Fix 1-bit Hamming code scheme, omap_calculate_ecc() commit 40ddbf5069bd4e11447c0088fc75318e0aac53f0 upstream. commit 65b97cf6b8de introduced in v3.7 caused a regression by using a reversed CS_MASK thus causing omap_calculate_ecc to always fail. As the NAND base driver never checks for .calculate()'s return value, the zeroed ECC values are used as is without showing any error to the user. However, this won't work and the NAND device won't be guarded by any error code. Fix the issue by using the correct mask. Code was tested on omap3beagle using the following procedure - flash the primary bootloader (MLO) from the kernel to the first NAND partition using nandwrite. - boot the board from NAND. This utilizes OMAP ROM loader that relies on 1-bit Hamming code ECC. Fixes: 65b97cf6b8de (mtd: nand: omap2: handle nand on gpmc) Signed-off-by: Roger Quadros Signed-off-by: Tony Lindgren Signed-off-by: Greg Kroah-Hartman --- drivers/mtd/nand/omap2.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/mtd/nand/omap2.c b/drivers/mtd/nand/omap2.c index 8c4eb287bbdb..e9b1797cdb5f 100644 --- a/drivers/mtd/nand/omap2.c +++ b/drivers/mtd/nand/omap2.c @@ -948,7 +948,7 @@ static int omap_calculate_ecc(struct mtd_info *mtd, const u_char *dat, u32 val; val = readl(info->reg.gpmc_ecc_config); - if (((val >> ECC_CONFIG_CS_SHIFT) & ~CS_MASK) != info->gpmc_cs) + if (((val >> ECC_CONFIG_CS_SHIFT) & CS_MASK) != info->gpmc_cs) return -EINVAL; /* read ecc result */ From f5b48b7a3d0d1ab761ba939b6cbb4a07d37a750b Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Sat, 5 Jul 2014 18:43:21 -0400 Subject: [PATCH 67/74] blkcg: don't call into policy draining if root_blkg is already gone commit 2a1b4cf2331d92bc009bf94fa02a24604cdaf24c upstream. While a queue is being destroyed, all the blkgs are destroyed and its ->root_blkg pointer is set to NULL. If someone else starts to drain while the queue is in this state, the following oops happens. NULL pointer dereference at 0000000000000028 IP: [] blk_throtl_drain+0x84/0x230 PGD e4a1067 PUD b773067 PMD 0 Oops: 0000 [#1] PREEMPT SMP DEBUG_PAGEALLOC Modules linked in: cfq_iosched(-) [last unloaded: cfq_iosched] CPU: 1 PID: 537 Comm: bash Not tainted 3.16.0-rc3-work+ #2 Hardware name: Bochs Bochs, BIOS Bochs 01/01/2011 task: ffff88000e222250 ti: ffff88000efd4000 task.ti: ffff88000efd4000 RIP: 0010:[] [] blk_throtl_drain+0x84/0x230 RSP: 0018:ffff88000efd7bf0 EFLAGS: 00010046 RAX: 0000000000000000 RBX: ffff880015091450 RCX: 0000000000000001 RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000000000000 RBP: ffff88000efd7c10 R08: 0000000000000000 R09: 0000000000000001 R10: ffff88000e222250 R11: 0000000000000000 R12: ffff880015091450 R13: ffff880015092e00 R14: ffff880015091d70 R15: ffff88001508fc28 FS: 00007f1332650740(0000) GS:ffff88001fa80000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 000000008005003b CR2: 0000000000000028 CR3: 0000000009446000 CR4: 00000000000006e0 Stack: ffffffff8144e8f6 ffff880015091450 0000000000000000 ffff880015091d80 ffff88000efd7c28 ffffffff8144ae2f ffff880015091450 ffff88000efd7c58 ffffffff81427641 ffff880015091450 ffffffff82401f00 ffff880015091450 Call Trace: [] blkcg_drain_queue+0x1f/0x60 [] __blk_drain_queue+0x71/0x180 [] blk_queue_bypass_start+0x6e/0xb0 [] blkcg_deactivate_policy+0x38/0x120 [] blk_throtl_exit+0x34/0x50 [] blkcg_exit_queue+0x35/0x40 [] blk_release_queue+0x26/0xd0 [] kobject_cleanup+0x38/0x70 [] kobject_put+0x28/0x60 [] blk_put_queue+0x15/0x20 [] scsi_device_dev_release_usercontext+0x16b/0x1c0 [] execute_in_process_context+0x89/0xa0 [] scsi_device_dev_release+0x1c/0x20 [] device_release+0x32/0xa0 [] kobject_cleanup+0x38/0x70 [] kobject_put+0x28/0x60 [] put_device+0x17/0x20 [] __scsi_remove_device+0xa9/0xe0 [] scsi_remove_device+0x2b/0x40 [] sdev_store_delete+0x27/0x30 [] dev_attr_store+0x18/0x30 [] sysfs_kf_write+0x3e/0x50 [] kernfs_fop_write+0xe7/0x170 [] vfs_write+0xaf/0x1d0 [] SyS_write+0x4d/0xc0 [] system_call_fastpath+0x16/0x1b 776687bce42b ("block, blk-mq: draining can't be skipped even if bypass_depth was non-zero") made it easier to trigger this bug by making blk_queue_bypass_start() drain even when it loses the first bypass test to blk_cleanup_queue(); however, the bug has always been there even before the commit as blk_queue_bypass_start() could race against queue destruction, win the initial bypass test but perform the actual draining after blk_cleanup_queue() already destroyed all blkgs. Fix it by skippping calling into policy draining if all the blkgs are already gone. Signed-off-by: Tejun Heo Reported-by: Shirish Pargaonkar Reported-by: Sasha Levin Reported-by: Jet Chen Tested-by: Shirish Pargaonkar Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman --- block/blk-cgroup.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c index b95219d2168d..1ff8e97f853a 100644 --- a/block/blk-cgroup.c +++ b/block/blk-cgroup.c @@ -876,6 +876,13 @@ void blkcg_drain_queue(struct request_queue *q) { lockdep_assert_held(q->queue_lock); + /* + * @q could be exiting and already have destroyed all blkgs as + * indicated by NULL root_blkg. If so, don't confuse policies. + */ + if (!q->root_blkg) + return; + /* * @q could be exiting and already have destroyed all blkgs as * indicated by NULL root_blkg. If so, don't confuse policies. From 70efec16cf060603b54ea71c9cb4499f052efd69 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Wed, 9 Jul 2014 15:57:26 +0200 Subject: [PATCH 68/74] IB/srp: Fix deadlock between host removal and multipathd commit bcc05910359183b431da92713e98eed478edf83a upstream. If scsi_remove_host() is invoked after a SCSI device has been blocked, if the fast_io_fail_tmo or dev_loss_tmo work gets scheduled on the workqueue executing srp_remove_work() and if an I/O request is scheduled after the SCSI device had been blocked by e.g. multipathd then the following deadlock can occur: kworker/6:1 D ffff880831f3c460 0 195 2 0x00000000 Call Trace: [] schedule+0x29/0x70 [] schedule_timeout+0x10f/0x2a0 [] msleep+0x2f/0x40 [] __blk_drain_queue+0x4e/0x180 [] blk_cleanup_queue+0x225/0x230 [] __scsi_remove_device+0x62/0xe0 [scsi_mod] [] scsi_forget_host+0x6f/0x80 [scsi_mod] [] scsi_remove_host+0x7a/0x130 [scsi_mod] [] srp_remove_work+0x95/0x180 [ib_srp] [] process_one_work+0x1ea/0x6c0 [] worker_thread+0x11b/0x3a0 [] kthread+0xed/0x110 [] ret_from_fork+0x7c/0xb0 multipathd D ffff880096acc460 0 5340 1 0x00000000 Call Trace: [] schedule+0x29/0x70 [] schedule_timeout+0x10f/0x2a0 [] io_schedule_timeout+0x9b/0xf0 [] wait_for_completion_io_timeout+0xdc/0x110 [] blk_execute_rq+0x9b/0x100 [] sg_io+0x1a5/0x450 [] scsi_cmd_ioctl+0x2a1/0x430 [] scsi_cmd_blk_ioctl+0x42/0x50 [] sd_ioctl+0xbe/0x140 [sd_mod] [] blkdev_ioctl+0x234/0x840 [] block_ioctl+0x41/0x50 [] do_vfs_ioctl+0x300/0x520 [] SyS_ioctl+0x41/0x80 [] tracesys+0xd0/0xd5 Fix this by scheduling removal work on another workqueue than the transport layer timers. Signed-off-by: Bart Van Assche Reviewed-by: Sagi Grimberg Reviewed-by: David Dillow Cc: Sebastian Parschauer Signed-off-by: Roland Dreier Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/ulp/srp/ib_srp.c | 38 +++++++++++++++++++++-------- 1 file changed, 28 insertions(+), 10 deletions(-) diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c index 1954daac0b59..35dd5ff662f1 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.c +++ b/drivers/infiniband/ulp/srp/ib_srp.c @@ -93,6 +93,7 @@ static void srp_send_completion(struct ib_cq *cq, void *target_ptr); static int srp_cm_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event); static struct scsi_transport_template *ib_srp_transport_template; +static struct workqueue_struct *srp_remove_wq; static struct ib_client srp_client = { .name = "srp", @@ -456,7 +457,7 @@ static bool srp_queue_remove_work(struct srp_target_port *target) spin_unlock_irq(&target->lock); if (changed) - queue_work(system_long_wq, &target->remove_work); + queue_work(srp_remove_wq, &target->remove_work); return changed; } @@ -2530,9 +2531,10 @@ static void srp_remove_one(struct ib_device *device) spin_unlock(&host->target_lock); /* - * Wait for target port removal tasks. + * Wait for tl_err and target port removal tasks. */ flush_workqueue(system_long_wq); + flush_workqueue(srp_remove_wq); kfree(host); } @@ -2577,16 +2579,22 @@ static int __init srp_init_module(void) indirect_sg_entries = cmd_sg_entries; } + srp_remove_wq = create_workqueue("srp_remove"); + if (IS_ERR(srp_remove_wq)) { + ret = PTR_ERR(srp_remove_wq); + goto out; + } + + ret = -ENOMEM; ib_srp_transport_template = srp_attach_transport(&ib_srp_transport_functions); if (!ib_srp_transport_template) - return -ENOMEM; + goto destroy_wq; ret = class_register(&srp_class); if (ret) { pr_err("couldn't register class infiniband_srp\n"); - srp_release_transport(ib_srp_transport_template); - return ret; + goto release_tr; } ib_sa_register_client(&srp_sa_client); @@ -2594,13 +2602,22 @@ static int __init srp_init_module(void) ret = ib_register_client(&srp_client); if (ret) { pr_err("couldn't register IB client\n"); - srp_release_transport(ib_srp_transport_template); - ib_sa_unregister_client(&srp_sa_client); - class_unregister(&srp_class); - return ret; + goto unreg_sa; } - return 0; +out: + return ret; + +unreg_sa: + ib_sa_unregister_client(&srp_sa_client); + class_unregister(&srp_class); + +release_tr: + srp_release_transport(ib_srp_transport_template); + +destroy_wq: + destroy_workqueue(srp_remove_wq); + goto out; } static void __exit srp_cleanup_module(void) @@ -2609,6 +2626,7 @@ static void __exit srp_cleanup_module(void) ib_sa_unregister_client(&srp_sa_client); class_unregister(&srp_class); srp_release_transport(ib_srp_transport_template); + destroy_workqueue(srp_remove_wq); } module_init(srp_init_module); From a6c56468b3f3274896ee8da73608dc48ad4103e0 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 25 Oct 2013 16:41:01 -0400 Subject: [PATCH 69/74] dcache.c: get rid of pointless macros commit 482db9066199813d6b999b65a3171afdbec040b6 upstream. D_HASH{MASK,BITS} are used once each, both in the same function (d_hash()). At this point they are actively misguiding - they imply that values are compiler constants, which is no longer true. Signed-off-by: Al Viro Signed-off-by: Greg Kroah-Hartman --- fs/dcache.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/fs/dcache.c b/fs/dcache.c index 9a59653d3449..f867c53a7989 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -96,8 +96,6 @@ static struct kmem_cache *dentry_cache __read_mostly; * This hash-function tries to avoid losing too many bits of hash * information, yet avoid using a prime hash-size or similar. */ -#define D_HASHBITS d_hash_shift -#define D_HASHMASK d_hash_mask static unsigned int d_hash_mask __read_mostly; static unsigned int d_hash_shift __read_mostly; @@ -108,8 +106,8 @@ static inline struct hlist_bl_head *d_hash(const struct dentry *parent, unsigned int hash) { hash += (unsigned long) parent / L1_CACHE_BYTES; - hash = hash + (hash >> D_HASHBITS); - return dentry_hashtable + (hash & D_HASHMASK); + hash = hash + (hash >> d_hash_shift); + return dentry_hashtable + (hash & d_hash_mask); } /* Statistics gathering. */ From d4c96061fddd129778ce8b70fb093aa532f422d0 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sat, 13 Sep 2014 11:30:10 -0700 Subject: [PATCH 70/74] vfs: fix bad hashing of dentries commit 99d263d4c5b2f541dfacb5391e22e8c91ea982a6 upstream. Josef Bacik found a performance regression between 3.2 and 3.10 and narrowed it down to commit bfcfaa77bdf0 ("vfs: use 'unsigned long' accesses for dcache name comparison and hashing"). He reports: "The test case is essentially for (i = 0; i < 1000000; i++) mkdir("a$i"); On xfs on a fio card this goes at about 20k dir/sec with 3.2, and 12k dir/sec with 3.10. This is because we spend waaaaay more time in __d_lookup on 3.10 than in 3.2. The new hashing function for strings is suboptimal for < sizeof(unsigned long) string names (and hell even > sizeof(unsigned long) string names that I've tested). I broke out the old hashing function and the new one into a userspace helper to get real numbers and this is what I'm getting: Old hash table had 1000000 entries, 0 dupes, 0 max dupes New hash table had 12628 entries, 987372 dupes, 900 max dupes We had 11400 buckets with a p50 of 30 dupes, p90 of 240 dupes, p99 of 567 dupes for the new hash My test does the hash, and then does the d_hash into a integer pointer array the same size as the dentry hash table on my system, and then just increments the value at the address we got to see how many entries we overlap with. As you can see the old hash function ended up with all 1 million entries in their own bucket, whereas the new one they are only distributed among ~12.5k buckets, which is why we're using so much more CPU in __d_lookup". The reason for this hash regression is two-fold: - On 64-bit architectures the down-mixing of the original 64-bit word-at-a-time hash into the final 32-bit hash value is very simplistic and suboptimal, and just adds the two 32-bit parts together. In particular, because there is no bit shuffling and the mixing boundary is also a byte boundary, similar character patterns in the low and high word easily end up just canceling each other out. - the old byte-at-a-time hash mixed each byte into the final hash as it hashed the path component name, resulting in the low bits of the hash generally being a good source of hash data. That is not true for the word-at-a-time case, and the hash data is distributed among all the bits. The fix is the same in both cases: do a better job of mixing the bits up and using as much of the hash data as possible. We already have the "hash_32|64()" functions to do that. Reported-by: Josef Bacik Cc: Al Viro Cc: Christoph Hellwig Cc: Chris Mason Cc: linux-fsdevel@vger.kernel.org Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- fs/dcache.c | 3 +-- fs/namei.c | 4 ++-- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/fs/dcache.c b/fs/dcache.c index f867c53a7989..25c0a1b5f6c0 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -106,8 +106,7 @@ static inline struct hlist_bl_head *d_hash(const struct dentry *parent, unsigned int hash) { hash += (unsigned long) parent / L1_CACHE_BYTES; - hash = hash + (hash >> d_hash_shift); - return dentry_hashtable + (hash & d_hash_mask); + return dentry_hashtable + hash_32(hash, d_hash_shift); } /* Statistics gathering. */ diff --git a/fs/namei.c b/fs/namei.c index 6ac16a37ded2..f7c4393f8535 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -34,6 +34,7 @@ #include #include #include +#include #include #include "internal.h" @@ -1647,8 +1648,7 @@ static inline int can_lookup(struct inode *inode) static inline unsigned int fold_hash(unsigned long hash) { - hash += hash >> (8*sizeof(int)); - return hash; + return hash_64(hash, 32); } #else /* 32-bit case */ From d64269e30131fbd8a2228323266c9e84ee6ab80d Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Wed, 21 May 2014 18:26:44 -0600 Subject: [PATCH 71/74] tpm: Provide a generic means to override the chip returned timeouts commit 8e54caf407b98efa05409e1fee0e5381abd2b088 upstream. Some Atmel TPMs provide completely wrong timeouts from their TPM_CAP_PROP_TIS_TIMEOUT query. This patch detects that and returns new correct values via a DID/VID table in the TIS driver. Tested on ARM using an AT97SC3204T FW version 37.16 [PHuewe: without this fix these 'broken' Atmel TPMs won't function on older kernels] Signed-off-by: "Berg, Christopher" Signed-off-by: Jason Gunthorpe Signed-off-by: Peter Huewe [bwh: Backported to 3.10: - Adjust filename, context - s/chip->ops->/chip->vendor./] Signed-off-by: Ben Hutchings Signed-off-by: Greg Kroah-Hartman --- drivers/char/tpm/tpm.c | 62 +++++++++++++++++++++++++------------- drivers/char/tpm/tpm.h | 3 ++ drivers/char/tpm/tpm_tis.c | 31 +++++++++++++++++++ 3 files changed, 75 insertions(+), 21 deletions(-) diff --git a/drivers/char/tpm/tpm.c b/drivers/char/tpm/tpm.c index 01d6968a9e47..f659a571ad23 100644 --- a/drivers/char/tpm/tpm.c +++ b/drivers/char/tpm/tpm.c @@ -533,11 +533,10 @@ static int tpm_startup(struct tpm_chip *chip, __be16 startup_type) int tpm_get_timeouts(struct tpm_chip *chip) { struct tpm_cmd_t tpm_cmd; - struct timeout_t *timeout_cap; + unsigned long new_timeout[4]; + unsigned long old_timeout[4]; struct duration_t *duration_cap; ssize_t rc; - u32 timeout; - unsigned int scale = 1; tpm_cmd.header.in = tpm_getcap_header; tpm_cmd.params.getcap_in.cap = TPM_CAP_PROP; @@ -571,25 +570,46 @@ int tpm_get_timeouts(struct tpm_chip *chip) != sizeof(tpm_cmd.header.out) + sizeof(u32) + 4 * sizeof(u32)) return -EINVAL; - timeout_cap = &tpm_cmd.params.getcap_out.cap.timeout; - /* Don't overwrite default if value is 0 */ - timeout = be32_to_cpu(timeout_cap->a); - if (timeout && timeout < 1000) { - /* timeouts in msec rather usec */ - scale = 1000; - chip->vendor.timeout_adjusted = true; + old_timeout[0] = be32_to_cpu(tpm_cmd.params.getcap_out.cap.timeout.a); + old_timeout[1] = be32_to_cpu(tpm_cmd.params.getcap_out.cap.timeout.b); + old_timeout[2] = be32_to_cpu(tpm_cmd.params.getcap_out.cap.timeout.c); + old_timeout[3] = be32_to_cpu(tpm_cmd.params.getcap_out.cap.timeout.d); + memcpy(new_timeout, old_timeout, sizeof(new_timeout)); + + /* + * Provide ability for vendor overrides of timeout values in case + * of misreporting. + */ + if (chip->vendor.update_timeouts != NULL) + chip->vendor.timeout_adjusted = + chip->vendor.update_timeouts(chip, new_timeout); + + if (!chip->vendor.timeout_adjusted) { + /* Don't overwrite default if value is 0 */ + if (new_timeout[0] != 0 && new_timeout[0] < 1000) { + int i; + + /* timeouts in msec rather usec */ + for (i = 0; i != ARRAY_SIZE(new_timeout); i++) + new_timeout[i] *= 1000; + chip->vendor.timeout_adjusted = true; + } } - if (timeout) - chip->vendor.timeout_a = usecs_to_jiffies(timeout * scale); - timeout = be32_to_cpu(timeout_cap->b); - if (timeout) - chip->vendor.timeout_b = usecs_to_jiffies(timeout * scale); - timeout = be32_to_cpu(timeout_cap->c); - if (timeout) - chip->vendor.timeout_c = usecs_to_jiffies(timeout * scale); - timeout = be32_to_cpu(timeout_cap->d); - if (timeout) - chip->vendor.timeout_d = usecs_to_jiffies(timeout * scale); + + /* Report adjusted timeouts */ + if (chip->vendor.timeout_adjusted) { + dev_info(chip->dev, + HW_ERR "Adjusting reported timeouts: A %lu->%luus B %lu->%luus C %lu->%luus D %lu->%luus\n", + old_timeout[0], new_timeout[0], + old_timeout[1], new_timeout[1], + old_timeout[2], new_timeout[2], + old_timeout[3], new_timeout[3]); + } + + chip->vendor.timeout_a = usecs_to_jiffies(new_timeout[0]); + chip->vendor.timeout_b = usecs_to_jiffies(new_timeout[1]); + chip->vendor.timeout_c = usecs_to_jiffies(new_timeout[2]); + chip->vendor.timeout_d = usecs_to_jiffies(new_timeout[3]); duration: tpm_cmd.header.in = tpm_getcap_header; diff --git a/drivers/char/tpm/tpm.h b/drivers/char/tpm/tpm.h index 0770d1d79366..deffda7678a0 100644 --- a/drivers/char/tpm/tpm.h +++ b/drivers/char/tpm/tpm.h @@ -95,6 +95,9 @@ struct tpm_vendor_specific { int (*send) (struct tpm_chip *, u8 *, size_t); void (*cancel) (struct tpm_chip *); u8 (*status) (struct tpm_chip *); + bool (*update_timeouts)(struct tpm_chip *chip, + unsigned long *timeout_cap); + void (*release) (struct device *); struct miscdevice miscdev; struct attribute_group *attr_group; diff --git a/drivers/char/tpm/tpm_tis.c b/drivers/char/tpm/tpm_tis.c index 8a41b6be23a0..72f21377fa02 100644 --- a/drivers/char/tpm/tpm_tis.c +++ b/drivers/char/tpm/tpm_tis.c @@ -373,6 +373,36 @@ static int tpm_tis_send(struct tpm_chip *chip, u8 *buf, size_t len) return rc; } +struct tis_vendor_timeout_override { + u32 did_vid; + unsigned long timeout_us[4]; +}; + +static const struct tis_vendor_timeout_override vendor_timeout_overrides[] = { + /* Atmel 3204 */ + { 0x32041114, { (TIS_SHORT_TIMEOUT*1000), (TIS_LONG_TIMEOUT*1000), + (TIS_SHORT_TIMEOUT*1000), (TIS_SHORT_TIMEOUT*1000) } }, +}; + +static bool tpm_tis_update_timeouts(struct tpm_chip *chip, + unsigned long *timeout_cap) +{ + int i; + u32 did_vid; + + did_vid = ioread32(chip->vendor.iobase + TPM_DID_VID(0)); + + for (i = 0; i != ARRAY_SIZE(vendor_timeout_overrides); i++) { + if (vendor_timeout_overrides[i].did_vid != did_vid) + continue; + memcpy(timeout_cap, vendor_timeout_overrides[i].timeout_us, + sizeof(vendor_timeout_overrides[i].timeout_us)); + return true; + } + + return false; +} + /* * Early probing for iTPM with STS_DATA_EXPECT flaw. * Try sending command without itpm flag set and if that @@ -475,6 +505,7 @@ static struct tpm_vendor_specific tpm_tis = { .recv = tpm_tis_recv, .send = tpm_tis_send, .cancel = tpm_tis_ready, + .update_timeouts = tpm_tis_update_timeouts, .req_complete_mask = TPM_STS_DATA_AVAIL | TPM_STS_VALID, .req_complete_val = TPM_STS_DATA_AVAIL | TPM_STS_VALID, .req_canceled = tpm_tis_req_canceled, From 842a5780d61743550cf319f1bb4aee6778088b1c Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Thu, 9 Jan 2014 20:08:21 +0200 Subject: [PATCH 72/74] libceph: rename ceph_msg::front_max to front_alloc_len commit 3cea4c3071d4e55e9d7356efe9d0ebf92f0c2204 upstream. Rename front_max field of struct ceph_msg to front_alloc_len to make its purpose more clear. Signed-off-by: Ilya Dryomov Reviewed-by: Sage Weil Signed-off-by: Greg Kroah-Hartman --- include/linux/ceph/messenger.h | 2 +- net/ceph/messenger.c | 6 +++--- net/ceph/mon_client.c | 8 ++++---- 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/include/linux/ceph/messenger.h b/include/linux/ceph/messenger.h index 7c1420bb1dce..6ade97de7a85 100644 --- a/include/linux/ceph/messenger.h +++ b/include/linux/ceph/messenger.h @@ -157,7 +157,7 @@ struct ceph_msg { bool front_is_vmalloc; bool more_to_follow; bool needs_out_seq; - int front_max; + int front_alloc_len; unsigned long ack_stamp; /* tx: when we were acked */ struct ceph_msgpool *pool; diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c index 6ff7d9dc240f..66e77f380fce 100644 --- a/net/ceph/messenger.c +++ b/net/ceph/messenger.c @@ -3144,7 +3144,7 @@ struct ceph_msg *ceph_msg_new(int type, int front_len, gfp_t flags, INIT_LIST_HEAD(&m->data); /* front */ - m->front_max = front_len; + m->front_alloc_len = front_len; if (front_len) { if (front_len > PAGE_CACHE_SIZE) { m->front.iov_base = __vmalloc(front_len, flags, @@ -3319,8 +3319,8 @@ EXPORT_SYMBOL(ceph_msg_last_put); void ceph_msg_dump(struct ceph_msg *msg) { - pr_debug("msg_dump %p (front_max %d length %zd)\n", msg, - msg->front_max, msg->data_length); + pr_debug("msg_dump %p (front_alloc_len %d length %zd)\n", msg, + msg->front_alloc_len, msg->data_length); print_hex_dump(KERN_DEBUG, "header: ", DUMP_PREFIX_OFFSET, 16, 1, &msg->hdr, sizeof(msg->hdr), true); diff --git a/net/ceph/mon_client.c b/net/ceph/mon_client.c index 1fe25cd29d0e..2ac9ef35110b 100644 --- a/net/ceph/mon_client.c +++ b/net/ceph/mon_client.c @@ -152,7 +152,7 @@ static int __open_session(struct ceph_mon_client *monc) /* initiatiate authentication handshake */ ret = ceph_auth_build_hello(monc->auth, monc->m_auth->front.iov_base, - monc->m_auth->front_max); + monc->m_auth->front_alloc_len); __send_prepared_auth_request(monc, ret); } else { dout("open_session mon%d already open\n", monc->cur_mon); @@ -196,7 +196,7 @@ static void __send_subscribe(struct ceph_mon_client *monc) int num; p = msg->front.iov_base; - end = p + msg->front_max; + end = p + msg->front_alloc_len; num = 1 + !!monc->want_next_osdmap + !!monc->want_mdsmap; ceph_encode_32(&p, num); @@ -897,7 +897,7 @@ static void handle_auth_reply(struct ceph_mon_client *monc, ret = ceph_handle_auth_reply(monc->auth, msg->front.iov_base, msg->front.iov_len, monc->m_auth->front.iov_base, - monc->m_auth->front_max); + monc->m_auth->front_alloc_len); if (ret < 0) { monc->client->auth_err = ret; wake_up_all(&monc->client->auth_wq); @@ -939,7 +939,7 @@ static int __validate_auth(struct ceph_mon_client *monc) return 0; ret = ceph_build_auth(monc->auth, monc->m_auth->front.iov_base, - monc->m_auth->front_max); + monc->m_auth->front_alloc_len); if (ret <= 0) return ret; /* either an error, or no need to authenticate */ __send_prepared_auth_request(monc, ret); From 12477ec830cb1bd188f23b80f6a0d976dd19090e Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Mon, 4 Aug 2014 07:01:54 -0700 Subject: [PATCH 73/74] libceph: gracefully handle large reply messages from the mon commit 73c3d4812b4c755efeca0140f606f83772a39ce4 upstream. We preallocate a few of the message types we get back from the mon. If we get a larger message than we are expecting, fall back to trying to allocate a new one instead of blindly using the one we have. Signed-off-by: Sage Weil Reviewed-by: Ilya Dryomov Signed-off-by: Greg Kroah-Hartman --- net/ceph/mon_client.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/net/ceph/mon_client.c b/net/ceph/mon_client.c index 2ac9ef35110b..dbcbf5a4707f 100644 --- a/net/ceph/mon_client.c +++ b/net/ceph/mon_client.c @@ -1041,7 +1041,15 @@ static struct ceph_msg *mon_alloc_msg(struct ceph_connection *con, if (!m) { pr_info("alloc_msg unknown type %d\n", type); *skip = 1; + } else if (front_len > m->front_alloc_len) { + pr_warning("mon_alloc_msg front %d > prealloc %d (%u#%llu)\n", + front_len, m->front_alloc_len, + (unsigned int)con->peer_name.type, + le64_to_cpu(con->peer_name.num)); + ceph_msg_put(m); + m = ceph_msg_new(type, front_len, GFP_NOFS, false); } + return m; } From 339f8f37f0203884332585e38c06536c8477d475 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 17 Sep 2014 09:04:18 -0700 Subject: [PATCH 74/74] Linux 3.10.55 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 9429aa5e89de..6141df04fcb5 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,6 @@ VERSION = 3 PATCHLEVEL = 10 -SUBLEVEL = 54 +SUBLEVEL = 55 EXTRAVERSION = NAME = TOSSUG Baby Fish