From 52965d6f52e69bfcc55e60145f116840d8529d9d Mon Sep 17 00:00:00 2001 From: Stafford Horne Date: Mon, 25 Jun 2018 21:45:37 +0900 Subject: [PATCH 001/812] crypto: skcipher - Fix -Wstringop-truncation warnings MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit cefd769fd0192c84d638f66da202459ed8ad63ba ] As of GCC 9.0.0 the build is reporting warnings like: crypto/ablkcipher.c: In function ‘crypto_ablkcipher_report’: crypto/ablkcipher.c:374:2: warning: ‘strncpy’ specified bound 64 equals destination size [-Wstringop-truncation] strncpy(rblkcipher.geniv, alg->cra_ablkcipher.geniv ?: "", ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ sizeof(rblkcipher.geniv)); ~~~~~~~~~~~~~~~~~~~~~~~~~ This means the strnycpy might create a non null terminated string. Fix this by explicitly performing '\0' termination. Cc: Greg Kroah-Hartman Cc: Arnd Bergmann Cc: Max Filippov Cc: Eric Biggers Cc: Nick Desaulniers Signed-off-by: Stafford Horne Signed-off-by: Herbert Xu Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- crypto/ablkcipher.c | 2 ++ crypto/blkcipher.c | 1 + 2 files changed, 3 insertions(+) diff --git a/crypto/ablkcipher.c b/crypto/ablkcipher.c index 149e7a7f04fe..b524f702e658 100644 --- a/crypto/ablkcipher.c +++ b/crypto/ablkcipher.c @@ -384,6 +384,7 @@ static int crypto_ablkcipher_report(struct sk_buff *skb, struct crypto_alg *alg) strncpy(rblkcipher.type, "ablkcipher", sizeof(rblkcipher.type)); strncpy(rblkcipher.geniv, alg->cra_ablkcipher.geniv ?: "", sizeof(rblkcipher.geniv)); + rblkcipher.geniv[sizeof(rblkcipher.geniv) - 1] = '\0'; rblkcipher.blocksize = alg->cra_blocksize; rblkcipher.min_keysize = alg->cra_ablkcipher.min_keysize; @@ -465,6 +466,7 @@ static int crypto_givcipher_report(struct sk_buff *skb, struct crypto_alg *alg) strncpy(rblkcipher.type, "givcipher", sizeof(rblkcipher.type)); strncpy(rblkcipher.geniv, alg->cra_ablkcipher.geniv ?: "", sizeof(rblkcipher.geniv)); + rblkcipher.geniv[sizeof(rblkcipher.geniv) - 1] = '\0'; rblkcipher.blocksize = alg->cra_blocksize; rblkcipher.min_keysize = alg->cra_ablkcipher.min_keysize; diff --git a/crypto/blkcipher.c b/crypto/blkcipher.c index 2d08e59b3212..d524f838eb10 100644 --- a/crypto/blkcipher.c +++ b/crypto/blkcipher.c @@ -515,6 +515,7 @@ static int crypto_blkcipher_report(struct sk_buff *skb, struct crypto_alg *alg) strncpy(rblkcipher.type, "blkcipher", sizeof(rblkcipher.type)); strncpy(rblkcipher.geniv, alg->cra_blkcipher.geniv ?: "", sizeof(rblkcipher.geniv)); + rblkcipher.geniv[sizeof(rblkcipher.geniv) - 1] = '\0'; rblkcipher.blocksize = alg->cra_blocksize; rblkcipher.min_keysize = alg->cra_blkcipher.min_keysize; From 2c4d20c8c2d6556b793b870b618e76b7d0cc6b74 Mon Sep 17 00:00:00 2001 From: Matt Ranostay Date: Fri, 8 Jun 2018 23:58:15 -0700 Subject: [PATCH 002/812] tsl2550: fix lux1_input error in low light [ Upstream commit ce054546cc2c26891cefa2f284d90d93b52205de ] ADC channel 0 photodiode detects both infrared + visible light, but ADC channel 1 just detects infrared. However, the latter is a bit more sensitive in that range so complete darkness or low light causes a error condition in which the chan0 - chan1 is negative that results in a -EAGAIN. This patch changes the resulting lux1_input sysfs attribute message from "Resource temporarily unavailable" to a user-grokable lux value of 0. Cc: Arnd Bergmann Cc: Greg Kroah-Hartman Signed-off-by: Matt Ranostay Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/misc/tsl2550.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/misc/tsl2550.c b/drivers/misc/tsl2550.c index 87a13374fdc0..eb5761067310 100644 --- a/drivers/misc/tsl2550.c +++ b/drivers/misc/tsl2550.c @@ -177,7 +177,7 @@ static int tsl2550_calculate_lux(u8 ch0, u8 ch1) } else lux = 0; else - return -EAGAIN; + return 0; /* LUX range check */ return lux > TSL2550_MAX_LUX ? TSL2550_MAX_LUX : lux; From 344edd7fc4083217bf26534e3e3568ea00aa47ca Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 4 Jul 2018 12:33:34 +0300 Subject: [PATCH 003/812] vmci: type promotion bug in qp_host_get_user_memory() [ Upstream commit 7fb2fd4e25fc1fb10dcb30b5519de257cfeae84c ] The problem is that if get_user_pages_fast() fails and returns a negative error code, it gets type promoted to a high positive value and treated as a success. Fixes: 06164d2b72aa ("VMCI: queue pairs implementation.") Signed-off-by: Dan Carpenter Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/misc/vmw_vmci/vmci_queue_pair.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/misc/vmw_vmci/vmci_queue_pair.c b/drivers/misc/vmw_vmci/vmci_queue_pair.c index cc277f7849b0..3877f534fd3f 100644 --- a/drivers/misc/vmw_vmci/vmci_queue_pair.c +++ b/drivers/misc/vmw_vmci/vmci_queue_pair.c @@ -755,7 +755,7 @@ static int qp_host_get_user_memory(u64 produce_uva, retval = get_user_pages_fast((uintptr_t) produce_uva, produce_q->kernel_if->num_pages, 1, produce_q->kernel_if->u.h.header_page); - if (retval < produce_q->kernel_if->num_pages) { + if (retval < (int)produce_q->kernel_if->num_pages) { pr_debug("get_user_pages_fast(produce) failed (retval=%d)", retval); qp_release_pages(produce_q->kernel_if->u.h.header_page, @@ -767,7 +767,7 @@ static int qp_host_get_user_memory(u64 produce_uva, retval = get_user_pages_fast((uintptr_t) consume_uva, consume_q->kernel_if->num_pages, 1, consume_q->kernel_if->u.h.header_page); - if (retval < consume_q->kernel_if->num_pages) { + if (retval < (int)consume_q->kernel_if->num_pages) { pr_debug("get_user_pages_fast(consume) failed (retval=%d)", retval); qp_release_pages(consume_q->kernel_if->u.h.header_page, From 12b093d3b6444e67780840b2a8c30768193eab4d Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Fri, 6 Jul 2018 09:08:01 -0700 Subject: [PATCH 004/812] x86/numa_emulation: Fix emulated-to-physical node mapping [ Upstream commit 3b6c62f363a19ce82bf378187ab97c9dc01e3927 ] Without this change the distance table calculation for emulated nodes may use the wrong numa node and report an incorrect distance. Signed-off-by: Dan Williams Cc: David Rientjes Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Wei Yang Cc: linux-mm@kvack.org Link: http://lkml.kernel.org/r/153089328103.27680.14778434392225818887.stgit@dwillia2-desk3.amr.corp.intel.com Signed-off-by: Ingo Molnar Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/x86/mm/numa_emulation.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/mm/numa_emulation.c b/arch/x86/mm/numa_emulation.c index a8f90ce3dedf..dc6d99017f3f 100644 --- a/arch/x86/mm/numa_emulation.c +++ b/arch/x86/mm/numa_emulation.c @@ -60,7 +60,7 @@ static int __init emu_setup_memblk(struct numa_meminfo *ei, eb->nid = nid; if (emu_nid_to_phys[nid] == NUMA_NO_NODE) - emu_nid_to_phys[nid] = nid; + emu_nid_to_phys[nid] = pb->nid; pb->start += size; if (pb->start >= pb->end) { From 6732f6d09bde6b8957c866915c2a28e3a8605183 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Mon, 2 Jul 2018 14:27:35 +0100 Subject: [PATCH 005/812] staging: rts5208: fix missing error check on call to rtsx_write_register [ Upstream commit c5fae4f4fd28189b1062fb8ef7b21fec37cb8b17 ] Currently the check on error return from the call to rtsx_write_register is checking the error status from the previous call. Fix this by adding in the missing assignment of retval. Detected by CoverityScan, CID#709877 Fixes: fa590c222fba ("staging: rts5208: add support for rts5208 and rts5288") Signed-off-by: Colin Ian King Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/staging/rts5208/sd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/staging/rts5208/sd.c b/drivers/staging/rts5208/sd.c index d6c498209b2c..9a876ce92dbd 100644 --- a/drivers/staging/rts5208/sd.c +++ b/drivers/staging/rts5208/sd.c @@ -5031,7 +5031,7 @@ int sd_execute_write_data(struct scsi_cmnd *srb, struct rtsx_chip *chip) goto SD_Execute_Write_Cmd_Failed; } - rtsx_write_register(chip, SD_BYTE_CNT_L, 0xFF, 0x00); + retval = rtsx_write_register(chip, SD_BYTE_CNT_L, 0xFF, 0x00); if (retval != STATUS_SUCCESS) { rtsx_trace(chip); goto SD_Execute_Write_Cmd_Failed; From e92968792c2bf75c598b5a0675ba1a89bb03247d Mon Sep 17 00:00:00 2001 From: Anton Vasilyev Date: Fri, 6 Jul 2018 15:32:53 +0300 Subject: [PATCH 006/812] uwb: hwa-rc: fix memory leak at probe [ Upstream commit 11b71782c1d10d9bccc31825cf84291cd7588a1e ] hwarc_probe() allocates memory for hwarc, but does not free it if uwb_rc_add() or hwarc_get_version() fail. Found by Linux Driver Verification project (linuxtesting.org). Signed-off-by: Anton Vasilyev Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/uwb/hwa-rc.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/uwb/hwa-rc.c b/drivers/uwb/hwa-rc.c index 1212b4b3c5a9..e9ff710a3d12 100644 --- a/drivers/uwb/hwa-rc.c +++ b/drivers/uwb/hwa-rc.c @@ -875,6 +875,7 @@ static int hwarc_probe(struct usb_interface *iface, error_rc_add: usb_put_intf(iface); usb_put_dev(hwarc->usb_dev); + kfree(hwarc); error_alloc: uwb_rc_put(uwb_rc); error_rc_alloc: From 97294c24f8682913458d11bcf60dd923c0ee24b2 Mon Sep 17 00:00:00 2001 From: Sudeep Holla Date: Mon, 18 Jun 2018 16:54:32 +0100 Subject: [PATCH 007/812] power: vexpress: fix corruption in notifier registration [ Upstream commit 09bebb1adb21ecd04adf7ccb3b06f73e3a851e93 ] Vexpress platforms provide two different restart handlers: SYS_REBOOT that restart the entire system, while DB_RESET only restarts the daughter board containing the CPU. DB_RESET is overridden by SYS_REBOOT if it exists. notifier_chain_register used in register_restart_handler by design relies on notifiers to be registered once only, however vexpress restart notifier can get registered twice. When this happen it corrupts list of notifiers, as result some notifiers can be not called on proper event, traverse on list can be cycled forever, and second unregister can access already freed memory. So far, since this was the only restart handler in the system, no issue was observed even if the same notifier was registered twice. However commit 6c5c0d48b686 ("watchdog: sp805: add restart handler") added support for SP805 restart handlers and since the system under test contains two vexpress restart and two SP805 watchdog instances, it was observed that during the boot traversing the restart handler list looped forever as there's a cycle in that list resulting in boot hang. This patch fixes the issues by ensuring that the notifier is installed only once. Cc: Sebastian Reichel Signed-off-by: Sudeep Holla Fixes: 46c99ac66222 ("power/reset: vexpress: Register with kernel restart handler") Signed-off-by: Sebastian Reichel Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/power/reset/vexpress-poweroff.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/drivers/power/reset/vexpress-poweroff.c b/drivers/power/reset/vexpress-poweroff.c index 6a9bf7089373..ccb619632e46 100644 --- a/drivers/power/reset/vexpress-poweroff.c +++ b/drivers/power/reset/vexpress-poweroff.c @@ -35,6 +35,7 @@ static void vexpress_reset_do(struct device *dev, const char *what) } static struct device *vexpress_power_off_device; +static atomic_t vexpress_restart_nb_refcnt = ATOMIC_INIT(0); static void vexpress_power_off(void) { @@ -99,10 +100,13 @@ static int _vexpress_register_restart_handler(struct device *dev) int err; vexpress_restart_device = dev; - err = register_restart_handler(&vexpress_restart_nb); - if (err) { - dev_err(dev, "cannot register restart handler (err=%d)\n", err); - return err; + if (atomic_inc_return(&vexpress_restart_nb_refcnt) == 1) { + err = register_restart_handler(&vexpress_restart_nb); + if (err) { + dev_err(dev, "cannot register restart handler (err=%d)\n", err); + atomic_dec(&vexpress_restart_nb_refcnt); + return err; + } } device_create_file(dev, &dev_attr_active); From 850f9c0c1b8412ac4b4f571d338f554c0eac53cf Mon Sep 17 00:00:00 2001 From: Jian-Hong Pan Date: Fri, 25 May 2018 17:54:52 +0800 Subject: [PATCH 008/812] Bluetooth: Add a new Realtek 8723DE ID 0bda:b009 [ Upstream commit 45ae68b8cfc25bdbffc11248001c47ab1b76ff6e ] Without this patch we cannot turn on the Bluethooth adapter on HP 14-bs007la. T: Bus=01 Lev=02 Prnt=03 Port=00 Cnt=01 Dev#= 4 Spd=12 MxCh= 0 D: Ver= 1.10 Cls=e0(wlcon) Sub=01 Prot=01 MxPS=64 #Cfgs= 1 P: Vendor=0bda ProdID=b009 Rev= 2.00 S: Manufacturer=Realtek S: Product=802.11n WLAN Adapter S: SerialNumber=00e04c000001 C:* #Ifs= 2 Cfg#= 1 Atr=e0 MxPwr=500mA I:* If#= 0 Alt= 0 #EPs= 3 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb E: Ad=81(I) Atr=03(Int.) MxPS= 16 Ivl=1ms E: Ad=02(O) Atr=02(Bulk) MxPS= 64 Ivl=0ms E: Ad=82(I) Atr=02(Bulk) MxPS= 64 Ivl=0ms I:* If#= 1 Alt= 0 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb E: Ad=03(O) Atr=01(Isoc) MxPS= 0 Ivl=1ms E: Ad=83(I) Atr=01(Isoc) MxPS= 0 Ivl=1ms I: If#= 1 Alt= 1 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb E: Ad=03(O) Atr=01(Isoc) MxPS= 9 Ivl=1ms E: Ad=83(I) Atr=01(Isoc) MxPS= 9 Ivl=1ms I: If#= 1 Alt= 2 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb E: Ad=03(O) Atr=01(Isoc) MxPS= 17 Ivl=1ms E: Ad=83(I) Atr=01(Isoc) MxPS= 17 Ivl=1ms I: If#= 1 Alt= 3 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb E: Ad=03(O) Atr=01(Isoc) MxPS= 25 Ivl=1ms E: Ad=83(I) Atr=01(Isoc) MxPS= 25 Ivl=1ms I: If#= 1 Alt= 4 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb E: Ad=03(O) Atr=01(Isoc) MxPS= 33 Ivl=1ms E: Ad=83(I) Atr=01(Isoc) MxPS= 33 Ivl=1ms I: If#= 1 Alt= 5 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb E: Ad=03(O) Atr=01(Isoc) MxPS= 49 Ivl=1ms E: Ad=83(I) Atr=01(Isoc) MxPS= 49 Ivl=1ms Signed-off-by: Jian-Hong Pan Signed-off-by: Marcel Holtmann Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/bluetooth/btusb.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c index 4a899b41145e..b0a12e6dae43 100644 --- a/drivers/bluetooth/btusb.c +++ b/drivers/bluetooth/btusb.c @@ -340,6 +340,7 @@ static const struct usb_device_id blacklist_table[] = { { USB_DEVICE(0x7392, 0xa611), .driver_info = BTUSB_REALTEK }, /* Additional Realtek 8723DE Bluetooth devices */ + { USB_DEVICE(0x0bda, 0xb009), .driver_info = BTUSB_REALTEK }, { USB_DEVICE(0x2ff8, 0xb011), .driver_info = BTUSB_REALTEK }, /* Additional Realtek 8821AE Bluetooth devices */ From 5fe874e1f3499a77f8a0c29712c9a0c85eb59c98 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Wed, 4 Jul 2018 17:02:18 +0200 Subject: [PATCH 009/812] USB: serial: kobil_sct: fix modem-status error handling [ Upstream commit a420b5d939ee58f1d950f0ea782834056520aeaa ] Make sure to return -EIO in case of a short modem-status read request. While at it, split the debug message to not include the (zeroed) transfer-buffer content in case of errors. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Johan Hovold Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/kobil_sct.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/drivers/usb/serial/kobil_sct.c b/drivers/usb/serial/kobil_sct.c index 813035f51fe7..7d252678c55a 100644 --- a/drivers/usb/serial/kobil_sct.c +++ b/drivers/usb/serial/kobil_sct.c @@ -408,12 +408,20 @@ static int kobil_tiocmget(struct tty_struct *tty) transfer_buffer_length, KOBIL_TIMEOUT); - dev_dbg(&port->dev, "%s - Send get_status_line_state URB returns: %i. Statusline: %02x\n", - __func__, result, transfer_buffer[0]); + dev_dbg(&port->dev, "Send get_status_line_state URB returns: %i\n", + result); + if (result < 1) { + if (result >= 0) + result = -EIO; + goto out_free; + } + + dev_dbg(&port->dev, "Statusline: %02x\n", transfer_buffer[0]); result = 0; if ((transfer_buffer[0] & SUSBCR_GSL_DSR) != 0) result = TIOCM_DSR; +out_free: kfree(transfer_buffer); return result; } From 35af729caeaa863fc485000a36a5074d5ba0b6aa Mon Sep 17 00:00:00 2001 From: Michael Scott Date: Tue, 19 Jun 2018 16:44:06 -0700 Subject: [PATCH 010/812] 6lowpan: iphc: reset mac_header after decompress to fix panic [ Upstream commit 03bc05e1a4972f73b4eb8907aa373369e825c252 ] After decompression of 6lowpan socket data, an IPv6 header is inserted before the existing socket payload. After this, we reset the network_header value of the skb to account for the difference in payload size from prior to decompression + the addition of the IPv6 header. However, we fail to reset the mac_header value. Leaving the mac_header value untouched here, can cause a calculation error in net/packet/af_packet.c packet_rcv() function when an AF_PACKET socket is opened in SOCK_RAW mode for use on a 6lowpan interface. On line 2088, the data pointer is moved backward by the value returned from skb_mac_header(). If skb->data is adjusted so that it is before the skb->head pointer (which can happen when an old value of mac_header is left in place) the kernel generates a panic in net/core/skbuff.c line 1717. This panic can be generated by BLE 6lowpan interfaces (such as bt0) and 802.15.4 interfaces (such as lowpan0) as they both use the same 6lowpan sources for compression and decompression. Signed-off-by: Michael Scott Acked-by: Alexander Aring Acked-by: Jukka Rissanen Signed-off-by: Marcel Holtmann Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- net/6lowpan/iphc.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/6lowpan/iphc.c b/net/6lowpan/iphc.c index 346b5c1a9185..c40eb04dd856 100644 --- a/net/6lowpan/iphc.c +++ b/net/6lowpan/iphc.c @@ -569,6 +569,7 @@ int lowpan_header_decompress(struct sk_buff *skb, const struct net_device *dev, hdr.hop_limit, &hdr.daddr); skb_push(skb, sizeof(hdr)); + skb_reset_mac_header(skb); skb_reset_network_header(skb); skb_copy_to_linear_data(skb, &hdr, sizeof(hdr)); From 825fabcb91aa23dc66e7f29c8fb5d51916fde7b7 Mon Sep 17 00:00:00 2001 From: Guoqing Jiang Date: Mon, 2 Jul 2018 16:26:24 +0800 Subject: [PATCH 011/812] md-cluster: clear another node's suspend_area after the copy is finished [ Upstream commit 010228e4a932ca1e8365e3b58c8e1e44c16ff793 ] When one node leaves cluster or stops the resyncing (resync or recovery) array, then other nodes need to call recover_bitmaps to continue the unfinished task. But we need to clear suspend_area later after other nodes copy the resync information to their bitmap (by call bitmap_copy_from_slot). Otherwise, all nodes could write to the suspend_area even the suspend_area is not handled by any node, because area_resyncing returns 0 at the beginning of raid1_write_request. Which means one node could write suspend_area while another node is resyncing the same area, then data could be inconsistent. So let's clear suspend_area later to avoid above issue with the protection of bm lock. Also it is straightforward to clear suspend_area after nodes have copied the resync info to bitmap. Signed-off-by: Guoqing Jiang Reviewed-by: NeilBrown Signed-off-by: Shaohua Li Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/md/md-cluster.c | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/drivers/md/md-cluster.c b/drivers/md/md-cluster.c index a7a561af05c9..617a0aefc1c4 100644 --- a/drivers/md/md-cluster.c +++ b/drivers/md/md-cluster.c @@ -239,15 +239,6 @@ static void recover_bitmaps(struct md_thread *thread) while (cinfo->recovery_map) { slot = fls64((u64)cinfo->recovery_map) - 1; - /* Clear suspend_area associated with the bitmap */ - spin_lock_irq(&cinfo->suspend_lock); - list_for_each_entry_safe(s, tmp, &cinfo->suspend_list, list) - if (slot == s->slot) { - list_del(&s->list); - kfree(s); - } - spin_unlock_irq(&cinfo->suspend_lock); - snprintf(str, 64, "bitmap%04d", slot); bm_lockres = lockres_init(mddev, str, NULL, 1); if (!bm_lockres) { @@ -266,6 +257,16 @@ static void recover_bitmaps(struct md_thread *thread) pr_err("md-cluster: Could not copy data from bitmap %d\n", slot); goto dlm_unlock; } + + /* Clear suspend_area associated with the bitmap */ + spin_lock_irq(&cinfo->suspend_lock); + list_for_each_entry_safe(s, tmp, &cinfo->suspend_list, list) + if (slot == s->slot) { + list_del(&s->list); + kfree(s); + } + spin_unlock_irq(&cinfo->suspend_lock); + if (hi > 0) { /* TODO:Wait for current resync to get over */ set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); From 96b1608d01f588fdf9d76f3b76b116edd0a3d33d Mon Sep 17 00:00:00 2001 From: Sylwester Nawrocki Date: Tue, 15 May 2018 05:21:45 -0400 Subject: [PATCH 012/812] media: exynos4-is: Prevent NULL pointer dereference in __isp_video_try_fmt() [ Upstream commit 7c1b9a5aeed91bef98988ac0fcf38c8c1f4f9a3a ] This patch fixes potential NULL pointer dereference as indicated by the following static checker warning: drivers/media/platform/exynos4-is/fimc-isp-video.c:408 isp_video_try_fmt_mplane() error: NULL dereference inside function '__isp_video_try_fmt(isp, &f->fmt.pix_mp, (0))()'. Fixes: 34947b8aebe3: ("[media] exynos4-is: Add the FIMC-IS ISP capture DMA driver") Reported-by: Dan Carpenter Signed-off-by: Sylwester Nawrocki Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/media/platform/exynos4-is/fimc-isp-video.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/drivers/media/platform/exynos4-is/fimc-isp-video.c b/drivers/media/platform/exynos4-is/fimc-isp-video.c index 6e6648446f00..667d3720154a 100644 --- a/drivers/media/platform/exynos4-is/fimc-isp-video.c +++ b/drivers/media/platform/exynos4-is/fimc-isp-video.c @@ -391,12 +391,17 @@ static void __isp_video_try_fmt(struct fimc_isp *isp, struct v4l2_pix_format_mplane *pixm, const struct fimc_fmt **fmt) { - *fmt = fimc_isp_find_format(&pixm->pixelformat, NULL, 2); + const struct fimc_fmt *__fmt; + + __fmt = fimc_isp_find_format(&pixm->pixelformat, NULL, 2); + + if (fmt) + *fmt = __fmt; pixm->colorspace = V4L2_COLORSPACE_SRGB; pixm->field = V4L2_FIELD_NONE; - pixm->num_planes = (*fmt)->memplanes; - pixm->pixelformat = (*fmt)->fourcc; + pixm->num_planes = __fmt->memplanes; + pixm->pixelformat = __fmt->fourcc; /* * TODO: double check with the docmentation these width/height * constraints are correct. From 3e7de9ebadc344d66b046add2ce981b9cb75e770 Mon Sep 17 00:00:00 2001 From: Hari Bathini Date: Thu, 28 Jun 2018 10:49:56 +0530 Subject: [PATCH 013/812] powerpc/kdump: Handle crashkernel memory reservation failure [ Upstream commit 8950329c4a64c6d3ca0bc34711a1afbd9ce05657 ] Memory reservation for crashkernel could fail if there are holes around kdump kernel offset (128M). Fail gracefully in such cases and print an error message. Signed-off-by: Hari Bathini Tested-by: David Gibson Reviewed-by: Dave Young Signed-off-by: Michael Ellerman Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/kernel/machine_kexec.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/kernel/machine_kexec.c b/arch/powerpc/kernel/machine_kexec.c index 015ae55c1868..8dff2b371219 100644 --- a/arch/powerpc/kernel/machine_kexec.c +++ b/arch/powerpc/kernel/machine_kexec.c @@ -186,7 +186,12 @@ void __init reserve_crashkernel(void) (unsigned long)(crashk_res.start >> 20), (unsigned long)(memblock_phys_mem_size() >> 20)); - memblock_reserve(crashk_res.start, crash_size); + if (!memblock_is_region_memory(crashk_res.start, crash_size) || + memblock_reserve(crashk_res.start, crash_size)) { + pr_err("Failed to reserve memory for crashkernel!\n"); + crashk_res.start = crashk_res.end = 0; + return; + } } int overlaps_crashkernel(unsigned long start, unsigned long size) From 6a18e32b35679b183742e2f7d119e80a2394739f Mon Sep 17 00:00:00 2001 From: Alexey Khoroshilov Date: Fri, 29 Jun 2018 17:49:22 -0400 Subject: [PATCH 014/812] media: fsl-viu: fix error handling in viu_of_probe() [ Upstream commit 662a99e145661c2b35155cf375044deae9b79896 ] viu_of_probe() ignores fails in i2c_get_adapter(), tries to unlock uninitialized mutex on error path. The patch streamlining the error handling in viu_of_probe(). Found by Linux Driver Verification project (linuxtesting.org). Signed-off-by: Alexey Khoroshilov Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/media/platform/fsl-viu.c | 38 +++++++++++++++++++------------- 1 file changed, 23 insertions(+), 15 deletions(-) diff --git a/drivers/media/platform/fsl-viu.c b/drivers/media/platform/fsl-viu.c index ae8c6b35a357..7f0ed5a26da9 100644 --- a/drivers/media/platform/fsl-viu.c +++ b/drivers/media/platform/fsl-viu.c @@ -1417,7 +1417,7 @@ static int viu_of_probe(struct platform_device *op) sizeof(struct viu_reg), DRV_NAME)) { dev_err(&op->dev, "Error while requesting mem region\n"); ret = -EBUSY; - goto err; + goto err_irq; } /* remap registers */ @@ -1425,7 +1425,7 @@ static int viu_of_probe(struct platform_device *op) if (!viu_regs) { dev_err(&op->dev, "Can't map register set\n"); ret = -ENOMEM; - goto err; + goto err_irq; } /* Prepare our private structure */ @@ -1433,7 +1433,7 @@ static int viu_of_probe(struct platform_device *op) if (!viu_dev) { dev_err(&op->dev, "Can't allocate private structure\n"); ret = -ENOMEM; - goto err; + goto err_irq; } viu_dev->vr = viu_regs; @@ -1449,16 +1449,21 @@ static int viu_of_probe(struct platform_device *op) ret = v4l2_device_register(viu_dev->dev, &viu_dev->v4l2_dev); if (ret < 0) { dev_err(&op->dev, "v4l2_device_register() failed: %d\n", ret); - goto err; + goto err_irq; } ad = i2c_get_adapter(0); + if (!ad) { + ret = -EFAULT; + dev_err(&op->dev, "couldn't get i2c adapter\n"); + goto err_v4l2; + } v4l2_ctrl_handler_init(&viu_dev->hdl, 5); if (viu_dev->hdl.error) { ret = viu_dev->hdl.error; dev_err(&op->dev, "couldn't register control\n"); - goto err_vdev; + goto err_i2c; } /* This control handler will inherit the control(s) from the sub-device(s). */ @@ -1476,7 +1481,7 @@ static int viu_of_probe(struct platform_device *op) vdev = video_device_alloc(); if (vdev == NULL) { ret = -ENOMEM; - goto err_vdev; + goto err_hdl; } *vdev = viu_template; @@ -1497,7 +1502,7 @@ static int viu_of_probe(struct platform_device *op) ret = video_register_device(viu_dev->vdev, VFL_TYPE_GRABBER, -1); if (ret < 0) { video_device_release(viu_dev->vdev); - goto err_vdev; + goto err_unlock; } /* enable VIU clock */ @@ -1505,12 +1510,12 @@ static int viu_of_probe(struct platform_device *op) if (IS_ERR(clk)) { dev_err(&op->dev, "failed to lookup the clock!\n"); ret = PTR_ERR(clk); - goto err_clk; + goto err_vdev; } ret = clk_prepare_enable(clk); if (ret) { dev_err(&op->dev, "failed to enable the clock!\n"); - goto err_clk; + goto err_vdev; } viu_dev->clk = clk; @@ -1521,7 +1526,7 @@ static int viu_of_probe(struct platform_device *op) if (request_irq(viu_dev->irq, viu_intr, 0, "viu", (void *)viu_dev)) { dev_err(&op->dev, "Request VIU IRQ failed.\n"); ret = -ENODEV; - goto err_irq; + goto err_clk; } mutex_unlock(&viu_dev->lock); @@ -1529,16 +1534,19 @@ static int viu_of_probe(struct platform_device *op) dev_info(&op->dev, "Freescale VIU Video Capture Board\n"); return ret; -err_irq: - clk_disable_unprepare(viu_dev->clk); err_clk: - video_unregister_device(viu_dev->vdev); + clk_disable_unprepare(viu_dev->clk); err_vdev: - v4l2_ctrl_handler_free(&viu_dev->hdl); + video_unregister_device(viu_dev->vdev); +err_unlock: mutex_unlock(&viu_dev->lock); +err_hdl: + v4l2_ctrl_handler_free(&viu_dev->hdl); +err_i2c: i2c_put_adapter(ad); +err_v4l2: v4l2_device_unregister(&viu_dev->v4l2_dev); -err: +err_irq: irq_dispose_mapping(viu_irq); return ret; } From 4c51e1b78e593dc64710691c07279413aaeed97b Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Fri, 29 Jun 2018 22:31:10 +0300 Subject: [PATCH 015/812] x86/tsc: Add missing header to tsc_msr.c MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit dbd0fbc76c77daac08ddd245afdcbade0d506e19 ] Add a missing header otherwise compiler warns about missed prototype: CC arch/x86/kernel/tsc_msr.o arch/x86/kernel/tsc_msr.c:73:15: warning: no previous prototype for ‘cpu_khz_from_msr’ [-Wmissing-prototypes] unsigned long cpu_khz_from_msr(void) ^~~~~~~~~~~~~~~~ Signed-off-by: Andy Shevchenko Signed-off-by: Thomas Gleixner Cc: "H. Peter Anvin" Cc: Pavel Tatashin Link: https://lkml.kernel.org/r/20180629193113.84425-4-andriy.shevchenko@linux.intel.com Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/tsc_msr.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/x86/kernel/tsc_msr.c b/arch/x86/kernel/tsc_msr.c index 6aa0f4d9eea6..0e37e369b3a0 100644 --- a/arch/x86/kernel/tsc_msr.c +++ b/arch/x86/kernel/tsc_msr.c @@ -21,6 +21,7 @@ #include #include #include +#include /* CPU reference clock frequency: in KHz */ #define FREQ_83 83200 From a93a755237359205513398a2378c76f2c7a2f415 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Mon, 2 Jul 2018 04:47:57 -0600 Subject: [PATCH 016/812] x86/entry/64: Add two more instruction suffixes [ Upstream commit 6709812f094d96543b443645c68daaa32d3d3e77 ] Sadly, other than claimed in: a368d7fd2a ("x86/entry/64: Add instruction suffix") ... there are two more instances which want to be adjusted. As said there, omitting suffixes from instructions in AT&T mode is bad practice when operand size cannot be determined by the assembler from register operands, and is likely going to be warned about by upstream gas in the future (mine does already). Add the other missing suffixes here as well. Signed-off-by: Jan Beulich Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/5B3A02DD02000078001CFB78@prv1-mh.provo.novell.com Signed-off-by: Ingo Molnar Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/x86/entry/entry_64.S | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index 92b840c94f17..8be48b1d7596 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -90,7 +90,7 @@ ENDPROC(native_usergs_sysret64) .endm .macro TRACE_IRQS_IRETQ_DEBUG - bt $9, EFLAGS(%rsp) /* interrupts off? */ + btl $9, EFLAGS(%rsp) /* interrupts off? */ jnc 1f TRACE_IRQS_ON_DEBUG 1: @@ -620,7 +620,7 @@ retint_kernel: #ifdef CONFIG_PREEMPT /* Interrupts are off */ /* Check if we need preemption */ - bt $9, EFLAGS(%rsp) /* were interrupts off? */ + btl $9, EFLAGS(%rsp) /* were interrupts off? */ jnc 1f 0: cmpl $0, PER_CPU_VAR(__preempt_count) jnz 1f From cce3c1921d8e852d186da4302f8a87d8b30a1f0a Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Fri, 22 Jun 2018 14:53:01 -0700 Subject: [PATCH 017/812] scsi: target/iscsi: Make iscsit_ta_authentication() respect the output buffer size [ Upstream commit 35bea5c84fd13c643cce63f0b5cd4b148f8c901d ] Fixes: e48354ce078c ("iscsi-target: Add iSCSI fabric support for target v4.1") Signed-off-by: Bart Van Assche Reviewed-by: Mike Christie Cc: Mike Christie Cc: Christoph Hellwig Cc: Hannes Reinecke Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/target/iscsi/iscsi_target_tpg.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/target/iscsi/iscsi_target_tpg.c b/drivers/target/iscsi/iscsi_target_tpg.c index 63e1dcc5914d..761b065a40bb 100644 --- a/drivers/target/iscsi/iscsi_target_tpg.c +++ b/drivers/target/iscsi/iscsi_target_tpg.c @@ -637,8 +637,7 @@ int iscsit_ta_authentication(struct iscsi_portal_group *tpg, u32 authentication) none = strstr(buf1, NONE); if (none) goto out; - strncat(buf1, ",", strlen(",")); - strncat(buf1, NONE, strlen(NONE)); + strlcat(buf1, "," NONE, sizeof(buf1)); if (iscsi_update_param_value(param, buf1) < 0) return -EINVAL; } From 4e6c6a4fd12c8249ded8578b45b83a43f157929a Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Fri, 22 Jun 2018 14:54:49 -0700 Subject: [PATCH 018/812] scsi: klist: Make it safe to use klists in atomic context [ Upstream commit 624fa7790f80575a4ec28fbdb2034097dc18d051 ] In the scsi_transport_srp implementation it cannot be avoided to iterate over a klist from atomic context when using the legacy block layer instead of blk-mq. Hence this patch that makes it safe to use klists in atomic context. This patch avoids that lockdep reports the following: WARNING: SOFTIRQ-safe -> SOFTIRQ-unsafe lock order detected Possible interrupt unsafe locking scenario: CPU0 CPU1 ---- ---- lock(&(&k->k_lock)->rlock); local_irq_disable(); lock(&(&q->__queue_lock)->rlock); lock(&(&k->k_lock)->rlock); lock(&(&q->__queue_lock)->rlock); stack backtrace: Workqueue: kblockd blk_timeout_work Call Trace: dump_stack+0xa4/0xf5 check_usage+0x6e6/0x700 __lock_acquire+0x185d/0x1b50 lock_acquire+0xd2/0x260 _raw_spin_lock+0x32/0x50 klist_next+0x47/0x190 device_for_each_child+0x8e/0x100 srp_timed_out+0xaf/0x1d0 [scsi_transport_srp] scsi_times_out+0xd4/0x410 [scsi_mod] blk_rq_timed_out+0x36/0x70 blk_timeout_work+0x1b5/0x220 process_one_work+0x4fe/0xad0 worker_thread+0x63/0x5a0 kthread+0x1c1/0x1e0 ret_from_fork+0x24/0x30 See also commit c9ddf73476ff ("scsi: scsi_transport_srp: Fix shost to rport translation"). Signed-off-by: Bart Van Assche Cc: Martin K. Petersen Cc: James Bottomley Acked-by: Greg Kroah-Hartman Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- lib/klist.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/lib/klist.c b/lib/klist.c index 0507fa5d84c5..f6b547812fe3 100644 --- a/lib/klist.c +++ b/lib/klist.c @@ -336,8 +336,9 @@ struct klist_node *klist_prev(struct klist_iter *i) void (*put)(struct klist_node *) = i->i_klist->put; struct klist_node *last = i->i_cur; struct klist_node *prev; + unsigned long flags; - spin_lock(&i->i_klist->k_lock); + spin_lock_irqsave(&i->i_klist->k_lock, flags); if (last) { prev = to_klist_node(last->n_node.prev); @@ -356,7 +357,7 @@ struct klist_node *klist_prev(struct klist_iter *i) prev = to_klist_node(prev->n_node.prev); } - spin_unlock(&i->i_klist->k_lock); + spin_unlock_irqrestore(&i->i_klist->k_lock, flags); if (put && last) put(last); @@ -377,8 +378,9 @@ struct klist_node *klist_next(struct klist_iter *i) void (*put)(struct klist_node *) = i->i_klist->put; struct klist_node *last = i->i_cur; struct klist_node *next; + unsigned long flags; - spin_lock(&i->i_klist->k_lock); + spin_lock_irqsave(&i->i_klist->k_lock, flags); if (last) { next = to_klist_node(last->n_node.next); @@ -397,7 +399,7 @@ struct klist_node *klist_next(struct klist_iter *i) next = to_klist_node(next->n_node.next); } - spin_unlock(&i->i_klist->k_lock); + spin_unlock_irqrestore(&i->i_klist->k_lock, flags); if (put && last) put(last); From 73b0eecd584437817d73e0d8ee716d375c221ac7 Mon Sep 17 00:00:00 2001 From: Breno Leitao Date: Tue, 26 Jun 2018 17:35:16 -0300 Subject: [PATCH 019/812] scsi: ibmvscsi: Improve strings handling [ Upstream commit 1262dc09dc9ae7bf4ad00b6a2c5ed6a6936bcd10 ] Currently an open firmware property is copied into partition_name variable without keeping a room for \0. Later one, this variable (partition_name), which is 97 bytes long, is strncpyed into ibmvcsci_host_data->madapter_info->partition_name, which is 96 bytes long, possibly truncating it 'again' and removing the \0. This patch simply decreases the partition name to 96 and just copy using strlcpy() which guarantees that the string is \0 terminated. I think there is no issue if this there is a truncation in this very first copy, i.e, when the open firmware property is read and copied into the driver for the very first time; This issue also causes the following warning on GCC 8: drivers/scsi/ibmvscsi/ibmvscsi.c:281:2: warning: strncpy output may be truncated copying 96 bytes from a string of length 96 [-Wstringop-truncation] ... inlined from ibmvscsi_probe at drivers/scsi/ibmvscsi/ibmvscsi.c:2221:7: drivers/scsi/ibmvscsi/ibmvscsi.c:265:3: warning: strncpy specified bound 97 equals destination size [-Wstringop-truncation] CC: Bart Van Assche CC: Tyrel Datwyler Signed-off-by: Breno Leitao Acked-by: Tyrel Datwyler Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/ibmvscsi/ibmvscsi.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/scsi/ibmvscsi/ibmvscsi.c b/drivers/scsi/ibmvscsi/ibmvscsi.c index adfef9db6f1e..e26747a1b35a 100644 --- a/drivers/scsi/ibmvscsi/ibmvscsi.c +++ b/drivers/scsi/ibmvscsi/ibmvscsi.c @@ -93,7 +93,7 @@ static int max_requests = IBMVSCSI_MAX_REQUESTS_DEFAULT; static int max_events = IBMVSCSI_MAX_REQUESTS_DEFAULT + 2; static int fast_fail = 1; static int client_reserve = 1; -static char partition_name[97] = "UNKNOWN"; +static char partition_name[96] = "UNKNOWN"; static unsigned int partition_number = -1; static struct scsi_transport_template *ibmvscsi_transport_template; @@ -261,7 +261,7 @@ static void gather_partition_info(void) ppartition_name = of_get_property(rootdn, "ibm,partition-name", NULL); if (ppartition_name) - strncpy(partition_name, ppartition_name, + strlcpy(partition_name, ppartition_name, sizeof(partition_name)); p_number_ptr = of_get_property(rootdn, "ibm,partition-no", NULL); if (p_number_ptr) From ef9211bb9a767b058679ba7fdeca00e93f1d457c Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Sun, 1 Jul 2018 19:32:04 +0200 Subject: [PATCH 020/812] usb: wusbcore: security: cast sizeof to int for comparison [ Upstream commit d3ac5598c5010a8999978ebbcca3b1c6188ca36b ] Comparing an int to a size, which is unsigned, causes the int to become unsigned, giving the wrong result. usb_get_descriptor can return a negative error code. A simplified version of the semantic match that finds this problem is as follows: (http://coccinelle.lip6.fr/) // @@ int x; expression e,e1; identifier f; @@ *x = f(...); ... when != x = e1 when != if (x < 0 || ...) { ... return ...; } *x < sizeof(e) // Signed-off-by: Julia Lawall Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/usb/wusbcore/security.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/wusbcore/security.c b/drivers/usb/wusbcore/security.c index b66faaf3e842..4019c11f24e2 100644 --- a/drivers/usb/wusbcore/security.c +++ b/drivers/usb/wusbcore/security.c @@ -230,7 +230,7 @@ int wusb_dev_sec_add(struct wusbhc *wusbhc, result = usb_get_descriptor(usb_dev, USB_DT_SECURITY, 0, secd, sizeof(*secd)); - if (result < sizeof(*secd)) { + if (result < (int)sizeof(*secd)) { dev_err(dev, "Can't read security descriptor or " "not enough data: %d\n", result); goto out; From 6a9e681b391c6fe6679bf647571ccac6b335e417 Mon Sep 17 00:00:00 2001 From: Alexey Kardashevskiy Date: Fri, 1 Jun 2018 18:06:16 +1000 Subject: [PATCH 021/812] powerpc/powernv/ioda2: Reduce upper limit for DMA window size [ Upstream commit d3d4ffaae439981e1e441ebb125aa3588627c5d8 ] We use PHB in mode1 which uses bit 59 to select a correct DMA window. However there is mode2 which uses bits 59:55 and allows up to 32 DMA windows per a PE. Even though documentation does not clearly specify that, it seems that the actual hardware does not support bits 59:55 even in mode1, in other words we can create a window as big as 1<<58 but DMA simply won't work. This reduces the upper limit from 59 to 55 bits to let the userspace know about the hardware limits. Fixes: 7aafac11e3 "powerpc/powernv/ioda2: Gracefully fail if too many TCE levels requested" Signed-off-by: Alexey Kardashevskiy Signed-off-by: Michael Ellerman Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/platforms/powernv/pci-ioda.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index eac3b7cc78c6..ab7b2594e0f6 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -2270,7 +2270,7 @@ static long pnv_pci_ioda2_table_alloc_pages(int nid, __u64 bus_offset, level_shift = entries_shift + 3; level_shift = max_t(unsigned, level_shift, PAGE_SHIFT); - if ((level_shift - 3) * levels + page_shift >= 60) + if ((level_shift - 3) * levels + page_shift >= 55) return -EINVAL; /* Allocate TCE table */ From 2e17841715313cab2464b3b6360a289a1cb2744b Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 2 Jul 2018 09:34:29 +0200 Subject: [PATCH 022/812] alarmtimer: Prevent overflow for relative nanosleep [ Upstream commit 5f936e19cc0ef97dbe3a56e9498922ad5ba1edef ] Air Icy reported: UBSAN: Undefined behaviour in kernel/time/alarmtimer.c:811:7 signed integer overflow: 1529859276030040771 + 9223372036854775807 cannot be represented in type 'long long int' Call Trace: alarm_timer_nsleep+0x44c/0x510 kernel/time/alarmtimer.c:811 __do_sys_clock_nanosleep kernel/time/posix-timers.c:1235 [inline] __se_sys_clock_nanosleep kernel/time/posix-timers.c:1213 [inline] __x64_sys_clock_nanosleep+0x326/0x4e0 kernel/time/posix-timers.c:1213 do_syscall_64+0xb8/0x3a0 arch/x86/entry/common.c:290 alarm_timer_nsleep() uses ktime_add() to add the current time and the relative expiry value. ktime_add() has no sanity checks so the addition can overflow when the relative timeout is large enough. Use ktime_add_safe() which has the necessary sanity checks in place and limits the result to the valid range. Fixes: 9a7adcf5c6de ("timers: Posix interface for alarm-timers") Reported-by: Team OWL337 Signed-off-by: Thomas Gleixner Cc: John Stultz Link: https://lkml.kernel.org/r/alpine.DEB.2.21.1807020926360.1595@nanos.tec.linutronix.de Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- kernel/time/alarmtimer.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/kernel/time/alarmtimer.c b/kernel/time/alarmtimer.c index 6fcc367ad531..e78480b81f8d 100644 --- a/kernel/time/alarmtimer.c +++ b/kernel/time/alarmtimer.c @@ -773,7 +773,8 @@ static int alarm_timer_nsleep(const clockid_t which_clock, int flags, /* Convert (if necessary) to absolute time */ if (flags != TIMER_ABSTIME) { ktime_t now = alarm_bases[type].gettime(); - exp = ktime_add(now, exp); + + exp = ktime_add_safe(now, exp); } if (alarmtimer_do_nsleep(&alarm, exp)) From 0ee9070935b33d714fccdaaee8cf6e55f5cb203a Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Sun, 17 Jun 2018 00:30:43 +0200 Subject: [PATCH 023/812] s390/extmem: fix gcc 8 stringop-overflow warning [ Upstream commit 6b2ddf33baec23dace85bd647e3fc4ac070963e8 ] arch/s390/mm/extmem.c: In function '__segment_load': arch/s390/mm/extmem.c:436:2: warning: 'strncat' specified bound 7 equals source length [-Wstringop-overflow=] strncat(seg->res_name, " (DCSS)", 7); What gcc complains about here is the misuse of strncat function, which in this case does not limit a number of bytes taken from "src", so it is in the end the same as strcat(seg->res_name, " (DCSS)"); Keeping in mind that a res_name is 15 bytes, strncat in this case would overflow the buffer and write 0 into alignment byte between the fields in the struct. To avoid that increasing res_name size to 16, and reusing strlcat. Reviewed-by: Heiko Carstens Signed-off-by: Vasily Gorbik Signed-off-by: Martin Schwidefsky Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/s390/mm/extmem.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/s390/mm/extmem.c b/arch/s390/mm/extmem.c index 18fccc303db7..bfd75be6d415 100644 --- a/arch/s390/mm/extmem.c +++ b/arch/s390/mm/extmem.c @@ -79,7 +79,7 @@ struct qin64 { struct dcss_segment { struct list_head list; char dcss_name[8]; - char res_name[15]; + char res_name[16]; unsigned long start_addr; unsigned long end; atomic_t ref_count; @@ -434,7 +434,7 @@ __segment_load (char *name, int do_nonshared, unsigned long *addr, unsigned long memcpy(&seg->res_name, seg->dcss_name, 8); EBCASC(seg->res_name, 8); seg->res_name[8] = '\0'; - strncat(seg->res_name, " (DCSS)", 7); + strlcat(seg->res_name, " (DCSS)", sizeof(seg->res_name)); seg->res->name = seg->res_name; rc = seg->vm_segtype; if (rc == SEG_TYPE_SC || From 8b4ef82205d4147d82a41e71e7698d82fee0d251 Mon Sep 17 00:00:00 2001 From: Nicholas Mc Guire Date: Fri, 29 Jun 2018 19:07:42 +0200 Subject: [PATCH 024/812] ALSA: snd-aoa: add of_node_put() in error path [ Upstream commit 222bce5eb88d1af656419db04bcd84b2419fb900 ] Both calls to of_find_node_by_name() and of_get_next_child() return a node pointer with refcount incremented thus it must be explicidly decremented here after the last usage. As we are assured to have a refcounted np either from the initial of_find_node_by_name(NULL, name); or from the of_get_next_child(gpio, np) in the while loop if we reached the error code path below, an x of_node_put(np) is needed. Signed-off-by: Nicholas Mc Guire Fixes: commit f3d9478b2ce4 ("[ALSA] snd-aoa: add snd-aoa") Signed-off-by: Takashi Iwai Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- sound/aoa/core/gpio-feature.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/sound/aoa/core/gpio-feature.c b/sound/aoa/core/gpio-feature.c index f34153962d07..585b594bd838 100644 --- a/sound/aoa/core/gpio-feature.c +++ b/sound/aoa/core/gpio-feature.c @@ -88,8 +88,10 @@ static struct device_node *get_gpio(char *name, } reg = of_get_property(np, "reg", NULL); - if (!reg) + if (!reg) { + of_node_put(np); return NULL; + } *gpioptr = *reg; From f63d861f1c6142ebffd30d0ed5c0b4bb3344cd3b Mon Sep 17 00:00:00 2001 From: Akinobu Mita Date: Sun, 10 Jun 2018 11:42:01 -0400 Subject: [PATCH 025/812] media: s3c-camif: ignore -ENOIOCTLCMD from v4l2_subdev_call for s_power [ Upstream commit 30ed2b83343bd1e07884ca7355dac70d25ffc158 ] When the subdevice doesn't provide s_power core ops callback, the v4l2_subdev_call for s_power returns -ENOIOCTLCMD. If the subdevice doesn't have the special handling for its power saving mode, the s_power isn't required. So -ENOIOCTLCMD from the v4l2_subdev_call should be ignored. Cc: Hans Verkuil Signed-off-by: Akinobu Mita Acked-by: Sylwester Nawrocki Signed-off-by: Sakari Ailus Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/media/platform/s3c-camif/camif-capture.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/media/platform/s3c-camif/camif-capture.c b/drivers/media/platform/s3c-camif/camif-capture.c index fa6af4a7dae1..f97f4bc22ced 100644 --- a/drivers/media/platform/s3c-camif/camif-capture.c +++ b/drivers/media/platform/s3c-camif/camif-capture.c @@ -117,6 +117,8 @@ static int sensor_set_power(struct camif_dev *camif, int on) if (camif->sensor.power_count == !on) err = v4l2_subdev_call(sensor->sd, core, s_power, on); + if (err == -ENOIOCTLCMD) + err = 0; if (!err) sensor->power_count += on ? 1 : -1; From c9fac59ef32a856fed11fde7b827292abac5b532 Mon Sep 17 00:00:00 2001 From: Akinobu Mita Date: Sun, 10 Jun 2018 11:42:26 -0400 Subject: [PATCH 026/812] media: soc_camera: ov772x: correct setting of banding filter [ Upstream commit 22216ec41e919682c15345e95928f266e8ba6f9e ] The banding filter ON/OFF is controlled via bit 5 of COM8 register. It is attempted to be enabled in ov772x_set_params() by the following line. ret = ov772x_mask_set(client, COM8, BNDF_ON_OFF, 1); But this unexpectedly results disabling the banding filter, because the mask and set bits are exclusive. On the other hand, ov772x_s_ctrl() correctly sets the bit by: ret = ov772x_mask_set(client, COM8, BNDF_ON_OFF, BNDF_ON_OFF); The same fix was already applied to non-soc_camera version of ov772x driver in the commit commit a024ee14cd36 ("media: ov772x: correct setting of banding filter") Cc: Jacopo Mondi Cc: Laurent Pinchart Cc: Hans Verkuil Signed-off-by: Akinobu Mita Signed-off-by: Sakari Ailus Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/media/i2c/soc_camera/ov772x.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/media/i2c/soc_camera/ov772x.c b/drivers/media/i2c/soc_camera/ov772x.c index f150a8bd94dc..70f1a80d4e39 100644 --- a/drivers/media/i2c/soc_camera/ov772x.c +++ b/drivers/media/i2c/soc_camera/ov772x.c @@ -834,7 +834,7 @@ static int ov772x_set_params(struct ov772x_priv *priv, * set COM8 */ if (priv->band_filter) { - ret = ov772x_mask_set(client, COM8, BNDF_ON_OFF, 1); + ret = ov772x_mask_set(client, COM8, BNDF_ON_OFF, BNDF_ON_OFF); if (!ret) ret = ov772x_mask_set(client, BDBASE, 0xff, 256 - priv->band_filter); From 7333c519adb86cce26f74516ff7149d1c5a6ceca Mon Sep 17 00:00:00 2001 From: Javier Martinez Canillas Date: Sat, 9 Jun 2018 08:22:45 -0400 Subject: [PATCH 027/812] media: omap3isp: zero-initialize the isp cam_xclk{a,b} initial data [ Upstream commit 2ec7debd44b49927a6e2861521994cc075a389ed ] The struct clk_init_data init variable is declared in the isp_xclk_init() function so is an automatic variable allocated in the stack. But it's not explicitly zero-initialized, so some init fields are left uninitialized. This causes the data structure to have undefined values that may confuse the common clock framework when the clock is registered. For example, the uninitialized .flags field could have the CLK_IS_CRITICAL bit set, causing the framework to wrongly prepare the clk on registration. This leads to the isp_xclk_prepare() callback being called, which in turn calls to the omap3isp_get() function that increments the isp dev refcount. Since this omap3isp_get() call is unexpected, this leads to an unbalanced omap3isp_get() call that prevents the requested IRQ to be later enabled, due the refcount not being 0 when the correct omap3isp_get() call happens. Fixes: 9b28ee3c9122 ("[media] omap3isp: Use the common clock framework") Signed-off-by: Javier Martinez Canillas Reviewed-by: Sebastian Reichel Signed-off-by: Sakari Ailus Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/media/platform/omap3isp/isp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/media/platform/omap3isp/isp.c b/drivers/media/platform/omap3isp/isp.c index 91e02c1ff392..136ea1848701 100644 --- a/drivers/media/platform/omap3isp/isp.c +++ b/drivers/media/platform/omap3isp/isp.c @@ -303,7 +303,7 @@ static struct clk *isp_xclk_src_get(struct of_phandle_args *clkspec, void *data) static int isp_xclk_init(struct isp_device *isp) { struct device_node *np = isp->dev->of_node; - struct clk_init_data init; + struct clk_init_data init = { 0 }; unsigned int i; for (i = 0; i < ARRAY_SIZE(isp->xclks); ++i) From 778ebfa4c973f0960825bab52da019687d530236 Mon Sep 17 00:00:00 2001 From: Alistair Strachan Date: Tue, 19 Jun 2018 17:57:35 -0700 Subject: [PATCH 028/812] staging: android: ashmem: Fix mmap size validation [ Upstream commit 8632c614565d0c5fdde527889601c018e97b6384 ] The ashmem driver did not check that the size/offset of the vma passed to its .mmap() function was not larger than the ashmem object being mapped. This could cause mmap() to succeed, even though accessing parts of the mapping would later fail with a segmentation fault. Ensure an error is returned by the ashmem_mmap() function if the vma size is larger than the ashmem object size. This enables safer handling of the problem in userspace. Cc: Todd Kjos Cc: devel@driverdev.osuosl.org Cc: linux-kernel@vger.kernel.org Cc: kernel-team@android.com Cc: Joel Fernandes Signed-off-by: Alistair Strachan Acked-by: Joel Fernandes (Google) Reviewed-by: Martijn Coenen Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/staging/android/ashmem.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/staging/android/ashmem.c b/drivers/staging/android/ashmem.c index 013b33760639..e9c74c41aece 100644 --- a/drivers/staging/android/ashmem.c +++ b/drivers/staging/android/ashmem.c @@ -370,6 +370,12 @@ static int ashmem_mmap(struct file *file, struct vm_area_struct *vma) goto out; } + /* requested mapping size larger than object size */ + if (vma->vm_end - vma->vm_start > PAGE_ALIGN(asma->size)) { + ret = -EINVAL; + goto out; + } + /* requested protection bits must match our allowed protection mask */ if (unlikely((vma->vm_flags & ~calc_vm_prot_bits(asma->prot_mask)) & calc_vm_prot_bits(PROT_MASK))) { From 4e9cf990bf43e290d5fca4d08a6dde7f68521fc0 Mon Sep 17 00:00:00 2001 From: Zhouyang Jia Date: Tue, 12 Jun 2018 12:36:25 +0800 Subject: [PATCH 029/812] drivers/tty: add error handling for pcmcia_loop_config [ Upstream commit 85c634e919bd6ef17427f26a52920aeba12e16ee ] When pcmcia_loop_config fails, the lack of error-handling code may cause unexpected results. This patch adds error-handling code after calling pcmcia_loop_config. Signed-off-by: Zhouyang Jia Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/8250/serial_cs.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/tty/serial/8250/serial_cs.c b/drivers/tty/serial/8250/serial_cs.c index 4d180c9423ef..1a14948c86d6 100644 --- a/drivers/tty/serial/8250/serial_cs.c +++ b/drivers/tty/serial/8250/serial_cs.c @@ -629,8 +629,10 @@ static int serial_config(struct pcmcia_device * link) (link->has_func_id) && (link->socket->pcmcia_pfc == 0) && ((link->func_id == CISTPL_FUNCID_MULTI) || - (link->func_id == CISTPL_FUNCID_SERIAL))) - pcmcia_loop_config(link, serial_check_for_multi, info); + (link->func_id == CISTPL_FUNCID_SERIAL))) { + if (pcmcia_loop_config(link, serial_check_for_multi, info)) + goto failed; + } /* * Apply any multi-port quirk. From 5a816f1495a54e813aff70b6b2abcbf0e1b65f12 Mon Sep 17 00:00:00 2001 From: Zhouyang Jia Date: Mon, 11 Jun 2018 00:39:20 -0400 Subject: [PATCH 030/812] media: tm6000: add error handling for dvb_register_adapter [ Upstream commit e95d7c6eb94c634852eaa5ff4caf3db05b5d2e86 ] When dvb_register_adapter fails, the lack of error-handling code may cause unexpected results. This patch adds error-handling code after calling dvb_register_adapter. Signed-off-by: Zhouyang Jia [hans.verkuil@cisco.com: use pr_err and fix typo: adater -> adapter] Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/media/usb/tm6000/tm6000-dvb.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/media/usb/tm6000/tm6000-dvb.c b/drivers/media/usb/tm6000/tm6000-dvb.c index 095f5db1a790..4f317e2686e9 100644 --- a/drivers/media/usb/tm6000/tm6000-dvb.c +++ b/drivers/media/usb/tm6000/tm6000-dvb.c @@ -275,6 +275,11 @@ static int register_dvb(struct tm6000_core *dev) ret = dvb_register_adapter(&dvb->adapter, "Trident TVMaster 6000 DVB-T", THIS_MODULE, &dev->udev->dev, adapter_nr); + if (ret < 0) { + pr_err("tm6000: couldn't register the adapter!\n"); + goto err; + } + dvb->adapter.priv = dev; if (dvb->frontend) { From 3aad9c2a5a13645bf06fc4115733b0e2bb33bdbe Mon Sep 17 00:00:00 2001 From: Kai-Heng Feng Date: Thu, 28 Jun 2018 15:28:24 +0800 Subject: [PATCH 031/812] ALSA: hda: Add AZX_DCAPS_PM_RUNTIME for AMD Raven Ridge [ Upstream commit 1adca4b0cd65c14cb8b8c9c257720385869c3d5f ] This patch can make audio controller in AMD Raven Ridge gets runtime suspended to D3, to save ~1W power when it's not in use. Cc: Vijendar Mukunda Signed-off-by: Kai-Heng Feng Signed-off-by: Takashi Iwai Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/hda_intel.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c index cabccb10210e..95a82e428f37 100644 --- a/sound/pci/hda/hda_intel.c +++ b/sound/pci/hda/hda_intel.c @@ -2360,7 +2360,8 @@ static const struct pci_device_id azx_ids[] = { .driver_data = AZX_DRIVER_GENERIC | AZX_DCAPS_PRESET_ATI_SB }, /* AMD Raven */ { PCI_DEVICE(0x1022, 0x15e3), - .driver_data = AZX_DRIVER_GENERIC | AZX_DCAPS_PRESET_ATI_SB }, + .driver_data = AZX_DRIVER_GENERIC | AZX_DCAPS_PRESET_ATI_SB | + AZX_DCAPS_PM_RUNTIME }, /* ATI HDMI */ { PCI_DEVICE(0x1002, 0x0002), .driver_data = AZX_DRIVER_ATIHDMI_NS | AZX_DCAPS_PRESET_ATI_HDMI_NS }, From 7d73c76e40675bbe75f129070fe650ca91875eae Mon Sep 17 00:00:00 2001 From: Ben Greear Date: Mon, 18 Jun 2018 17:00:56 +0300 Subject: [PATCH 032/812] ath10k: protect ath10k_htt_rx_ring_free with rx_ring.lock [ Upstream commit 168f75f11fe68455e0d058a818ebccfc329d8685 ] While debugging driver crashes related to a buggy firmware crashing under load, I noticed that ath10k_htt_rx_ring_free could be called without being under lock. I'm not sure if this is the root cause of the crash or not, but it seems prudent to protect it. Originally tested on 4.16+ kernel with ath10k-ct 10.4 firmware running on 9984 NIC. Signed-off-by: Ben Greear Signed-off-by: Kalle Valo Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/ath/ath10k/htt_rx.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/ath/ath10k/htt_rx.c b/drivers/net/wireless/ath/ath10k/htt_rx.c index b32c47fe926d..a65b5d7f59f4 100644 --- a/drivers/net/wireless/ath/ath10k/htt_rx.c +++ b/drivers/net/wireless/ath/ath10k/htt_rx.c @@ -212,11 +212,12 @@ int ath10k_htt_rx_ring_refill(struct ath10k *ar) spin_lock_bh(&htt->rx_ring.lock); ret = ath10k_htt_rx_ring_fill_n(htt, (htt->rx_ring.fill_level - htt->rx_ring.fill_cnt)); - spin_unlock_bh(&htt->rx_ring.lock); if (ret) ath10k_htt_rx_ring_free(htt); + spin_unlock_bh(&htt->rx_ring.lock); + return ret; } @@ -230,7 +231,9 @@ void ath10k_htt_rx_free(struct ath10k_htt *htt) skb_queue_purge(&htt->rx_compl_q); skb_queue_purge(&htt->rx_in_ord_compl_q); + spin_lock_bh(&htt->rx_ring.lock); ath10k_htt_rx_ring_free(htt); + spin_unlock_bh(&htt->rx_ring.lock); dma_free_coherent(htt->ar->dev, (htt->rx_ring.size * From a080594683912893f80ba2191730c9a3326ff6e4 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Tue, 5 Jun 2018 14:31:39 +0300 Subject: [PATCH 033/812] rndis_wlan: potential buffer overflow in rndis_wlan_auth_indication() [ Upstream commit ae636fb1554833ee5133ca47bf4b2791b6739c52 ] This is a static checker fix, not something I have tested. The issue is that on the second iteration through the loop, we jump forward by le32_to_cpu(auth_req->length) bytes. The problem is that if the length is more than "buflen" then we end up with a negative "buflen". A negative buflen is type promoted to a high positive value and the loop continues but it's accessing beyond the end of the buffer. I believe the "auth_req->length" comes from the firmware and if the firmware is malicious or buggy, you're already toasted so the impact of this bug is probably not very severe. Fixes: 030645aceb3d ("rndis_wlan: handle 802.11 indications from device") Signed-off-by: Dan Carpenter Signed-off-by: Kalle Valo Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/rndis_wlan.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/wireless/rndis_wlan.c b/drivers/net/wireless/rndis_wlan.c index 259590013382..c76e0cfbb8b4 100644 --- a/drivers/net/wireless/rndis_wlan.c +++ b/drivers/net/wireless/rndis_wlan.c @@ -2919,6 +2919,8 @@ static void rndis_wlan_auth_indication(struct usbnet *usbdev, while (buflen >= sizeof(*auth_req)) { auth_req = (void *)buf; + if (buflen < le32_to_cpu(auth_req->length)) + return; type = "unknown"; flags = le32_to_cpu(auth_req->flags); pairwise_error = false; From 3fdd34643ffc378b5924941fad40352c04610294 Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Tue, 19 Jun 2018 02:43:35 -0700 Subject: [PATCH 034/812] wlcore: Add missing PM call for wlcore_cmd_wait_for_event_or_timeout() [ Upstream commit 4ec7cece87b3ed21ffcd407c62fb2f151a366bc1 ] Otherwise we can get: WARNING: CPU: 0 PID: 55 at drivers/net/wireless/ti/wlcore/io.h:84 I've only seen this few times with the runtime PM patches enabled so this one is probably not needed before that. This seems to work currently based on the current PM implementation timer. Let's apply this separately though in case others are hitting this issue. Signed-off-by: Tony Lindgren Signed-off-by: Kalle Valo Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/ti/wlcore/cmd.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/net/wireless/ti/wlcore/cmd.c b/drivers/net/wireless/ti/wlcore/cmd.c index f01d24baff7c..15dc7a398b90 100644 --- a/drivers/net/wireless/ti/wlcore/cmd.c +++ b/drivers/net/wireless/ti/wlcore/cmd.c @@ -35,6 +35,7 @@ #include "wl12xx_80211.h" #include "cmd.h" #include "event.h" +#include "ps.h" #include "tx.h" #include "hw_ops.h" @@ -191,6 +192,10 @@ int wlcore_cmd_wait_for_event_or_timeout(struct wl1271 *wl, timeout_time = jiffies + msecs_to_jiffies(WL1271_EVENT_TIMEOUT); + ret = wl1271_ps_elp_wakeup(wl); + if (ret < 0) + return ret; + do { if (time_after(jiffies, timeout_time)) { wl1271_debug(DEBUG_CMD, "timeout waiting for event %d", @@ -222,6 +227,7 @@ int wlcore_cmd_wait_for_event_or_timeout(struct wl1271 *wl, } while (!event); out: + wl1271_ps_elp_sleep(wl); kfree(events_vector); return ret; } From e4b4dd50034f6209ceb433586c7d934c02259cda Mon Sep 17 00:00:00 2001 From: Ethan Tuttle Date: Tue, 19 Jun 2018 21:31:08 -0700 Subject: [PATCH 035/812] ARM: mvebu: declare asm symbols as character arrays in pmsu.c [ Upstream commit d0d378ff451a66e486488eec842e507d28145813 ] With CONFIG_FORTIFY_SOURCE, memcpy uses the declared size of operands to detect buffer overflows. If src or dest is declared as a char, attempts to copy more than byte will result in a fortify_panic(). Address this problem in mvebu_setup_boot_addr_wa() by declaring mvebu_boot_wa_start and mvebu_boot_wa_end as character arrays. Also remove a couple addressof operators to avoid "arithmetic on pointer to an incomplete type" compiler error. See commit 54a7d50b9205 ("x86: mark kprobe templates as character arrays, not single characters") for a similar fix. Fixes "detected buffer overflow in memcpy" error during init on some mvebu systems (armada-370-xp, armada-375): (fortify_panic) from (mvebu_setup_boot_addr_wa+0xb0/0xb4) (mvebu_setup_boot_addr_wa) from (mvebu_v7_cpu_pm_init+0x154/0x204) (mvebu_v7_cpu_pm_init) from (do_one_initcall+0x7c/0x1a8) (do_one_initcall) from (kernel_init_freeable+0x1bc/0x254) (kernel_init_freeable) from (kernel_init+0x8/0x114) (kernel_init) from (ret_from_fork+0x14/0x2c) Signed-off-by: Ethan Tuttle Tested-by: Ethan Tuttle Signed-off-by: Gregory CLEMENT Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/arm/mach-mvebu/pmsu.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/arm/mach-mvebu/pmsu.c b/arch/arm/mach-mvebu/pmsu.c index ed8fda4cd055..45fd4b173dac 100644 --- a/arch/arm/mach-mvebu/pmsu.c +++ b/arch/arm/mach-mvebu/pmsu.c @@ -117,8 +117,8 @@ void mvebu_pmsu_set_cpu_boot_addr(int hw_cpu, void *boot_addr) PMSU_BOOT_ADDR_REDIRECT_OFFSET(hw_cpu)); } -extern unsigned char mvebu_boot_wa_start; -extern unsigned char mvebu_boot_wa_end; +extern unsigned char mvebu_boot_wa_start[]; +extern unsigned char mvebu_boot_wa_end[]; /* * This function sets up the boot address workaround needed for SMP @@ -131,7 +131,7 @@ int mvebu_setup_boot_addr_wa(unsigned int crypto_eng_target, phys_addr_t resume_addr_reg) { void __iomem *sram_virt_base; - u32 code_len = &mvebu_boot_wa_end - &mvebu_boot_wa_start; + u32 code_len = mvebu_boot_wa_end - mvebu_boot_wa_start; mvebu_mbus_del_window(BOOTROM_BASE, BOOTROM_SIZE); mvebu_mbus_add_window_by_id(crypto_eng_target, crypto_eng_attribute, From e645e3c0029037fa8088b012591a58e8dfa8e390 Mon Sep 17 00:00:00 2001 From: Zhouyang Jia Date: Thu, 14 Jun 2018 21:37:17 +0800 Subject: [PATCH 036/812] HID: hid-ntrig: add error handling for sysfs_create_group [ Upstream commit 44d4d51de9a3534a2b63d69efda02a10e66541e4 ] When sysfs_create_group fails, the lack of error-handling code may cause unexpected results. This patch adds error-handling code after calling sysfs_create_group. Signed-off-by: Zhouyang Jia Signed-off-by: Jiri Kosina Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/hid/hid-ntrig.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/hid/hid-ntrig.c b/drivers/hid/hid-ntrig.c index 756d1ef9bd99..6124fd6e04d1 100644 --- a/drivers/hid/hid-ntrig.c +++ b/drivers/hid/hid-ntrig.c @@ -955,6 +955,8 @@ static int ntrig_probe(struct hid_device *hdev, const struct hid_device_id *id) ret = sysfs_create_group(&hdev->dev.kobj, &ntrig_attribute_group); + if (ret) + hid_err(hdev, "cannot create sysfs group\n"); return 0; err_free: From 7e1b660dc70e486fcf104c40cde784084a6e9b6b Mon Sep 17 00:00:00 2001 From: Zhouyang Jia Date: Tue, 12 Jun 2018 11:13:00 +0800 Subject: [PATCH 037/812] scsi: bnx2i: add error handling for ioremap_nocache [ Upstream commit aa154ea885eb0c2407457ce9c1538d78c95456fa ] When ioremap_nocache fails, the lack of error-handling code may cause unexpected results. This patch adds error-handling code after calling ioremap_nocache. Signed-off-by: Zhouyang Jia Reviewed-by: Johannes Thumshirn Acked-by: Manish Rangankar Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/bnx2i/bnx2i_hwi.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/scsi/bnx2i/bnx2i_hwi.c b/drivers/scsi/bnx2i/bnx2i_hwi.c index fb072cc5e9fd..dada9ce4e702 100644 --- a/drivers/scsi/bnx2i/bnx2i_hwi.c +++ b/drivers/scsi/bnx2i/bnx2i_hwi.c @@ -2742,6 +2742,8 @@ int bnx2i_map_ep_dbell_regs(struct bnx2i_endpoint *ep) BNX2X_DOORBELL_PCI_BAR); reg_off = (1 << BNX2X_DB_SHIFT) * (cid_num & 0x1FFFF); ep->qp.ctx_base = ioremap_nocache(reg_base + reg_off, 4); + if (!ep->qp.ctx_base) + return -ENOMEM; goto arm_cq; } From 55e16f49e210a6bad5f856a1ecfa954d7a0dbae7 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Tue, 12 Jun 2018 14:43:35 +0200 Subject: [PATCH 038/812] EDAC, i7core: Fix memleaks and use-after-free on probe and remove [ Upstream commit 6c974d4dfafe5e9ee754f2a6fba0eb1864f1649e ] Make sure to free and deregister the addrmatch and chancounts devices allocated during probe in all error paths. Also fix use-after-free in a probe error path and in the remove success path where the devices were being put before before deregistration. Signed-off-by: Johan Hovold Cc: Mauro Carvalho Chehab Cc: linux-edac Fixes: 356f0a30860d ("i7core_edac: change the mem allocation scheme to make Documentation/kobject.txt happy") Link: http://lkml.kernel.org/r/20180612124335.6420-2-johan@kernel.org Signed-off-by: Borislav Petkov Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/edac/i7core_edac.c | 22 +++++++++++++++------- 1 file changed, 15 insertions(+), 7 deletions(-) diff --git a/drivers/edac/i7core_edac.c b/drivers/edac/i7core_edac.c index 792bdae2b91d..d14c8ffea910 100644 --- a/drivers/edac/i7core_edac.c +++ b/drivers/edac/i7core_edac.c @@ -1187,15 +1187,14 @@ static int i7core_create_sysfs_devices(struct mem_ctl_info *mci) rc = device_add(pvt->addrmatch_dev); if (rc < 0) - return rc; + goto err_put_addrmatch; if (!pvt->is_registered) { pvt->chancounts_dev = kzalloc(sizeof(*pvt->chancounts_dev), GFP_KERNEL); if (!pvt->chancounts_dev) { - put_device(pvt->addrmatch_dev); - device_del(pvt->addrmatch_dev); - return -ENOMEM; + rc = -ENOMEM; + goto err_del_addrmatch; } pvt->chancounts_dev->type = &all_channel_counts_type; @@ -1209,9 +1208,18 @@ static int i7core_create_sysfs_devices(struct mem_ctl_info *mci) rc = device_add(pvt->chancounts_dev); if (rc < 0) - return rc; + goto err_put_chancounts; } return 0; + +err_put_chancounts: + put_device(pvt->chancounts_dev); +err_del_addrmatch: + device_del(pvt->addrmatch_dev); +err_put_addrmatch: + put_device(pvt->addrmatch_dev); + + return rc; } static void i7core_delete_sysfs_devices(struct mem_ctl_info *mci) @@ -1221,11 +1229,11 @@ static void i7core_delete_sysfs_devices(struct mem_ctl_info *mci) edac_dbg(1, "\n"); if (!pvt->is_registered) { - put_device(pvt->chancounts_dev); device_del(pvt->chancounts_dev); + put_device(pvt->chancounts_dev); } - put_device(pvt->addrmatch_dev); device_del(pvt->addrmatch_dev); + put_device(pvt->addrmatch_dev); } /**************************************************************************** From 436dae6ae264e9976e323aae234d4b7d02708363 Mon Sep 17 00:00:00 2001 From: Liam Girdwood Date: Thu, 14 Jun 2018 20:26:42 +0100 Subject: [PATCH 039/812] ASoC: dapm: Fix potential DAI widget pointer deref when linking DAIs [ Upstream commit e01b4f624278d5efe5fb5da585ca371947b16680 ] Sometime a component or topology may configure a DAI widget with no private data leading to a dev_dbg() dereferencne of this data. Fix this to check for non NULL private data and let users know if widget is missing DAI. Signed-off-by: Liam Girdwood Signed-off-by: Mark Brown Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- sound/soc/soc-dapm.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/sound/soc/soc-dapm.c b/sound/soc/soc-dapm.c index 9e784cc3e5d2..0aefed8ab0cf 100644 --- a/sound/soc/soc-dapm.c +++ b/sound/soc/soc-dapm.c @@ -3864,6 +3864,13 @@ int snd_soc_dapm_link_dai_widgets(struct snd_soc_card *card) continue; } + /* let users know there is no DAI to link */ + if (!dai_w->priv) { + dev_dbg(card->dev, "dai widget %s has no DAI\n", + dai_w->name); + continue; + } + dai = dai_w->priv; /* ...find all widgets with the same stream and link them */ From 10d721285daf864f31d5562200d3002c81626c0e Mon Sep 17 00:00:00 2001 From: Jessica Yu Date: Tue, 5 Jun 2018 10:22:52 +0200 Subject: [PATCH 040/812] module: exclude SHN_UNDEF symbols from kallsyms api [ Upstream commit 9f2d1e68cf4d641def734adaccfc3823d3575e6c ] Livepatch modules are special in that we preserve their entire symbol tables in order to be able to apply relocations after module load. The unwanted side effect of this is that undefined (SHN_UNDEF) symbols of livepatch modules are accessible via the kallsyms api and this can confuse symbol resolution in livepatch (klp_find_object_symbol()) and cause subtle bugs in livepatch. Have the module kallsyms api skip over SHN_UNDEF symbols. These symbols are usually not available for normal modules anyway as we cut down their symbol tables to just the core (non-undefined) symbols, so this should really just affect livepatch modules. Note that this patch doesn't affect the display of undefined symbols in /proc/kallsyms. Reported-by: Josh Poimboeuf Tested-by: Josh Poimboeuf Reviewed-by: Josh Poimboeuf Signed-off-by: Jessica Yu Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- kernel/module.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/kernel/module.c b/kernel/module.c index aa81f41f2b19..bcc78f4c15e9 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -3860,7 +3860,7 @@ static unsigned long mod_find_symname(struct module *mod, const char *name) for (i = 0; i < kallsyms->num_symtab; i++) if (strcmp(name, symname(kallsyms, i)) == 0 && - kallsyms->symtab[i].st_info != 'U') + kallsyms->symtab[i].st_shndx != SHN_UNDEF) return kallsyms->symtab[i].st_value; return 0; } @@ -3906,6 +3906,10 @@ int module_kallsyms_on_each_symbol(int (*fn)(void *, const char *, if (mod->state == MODULE_STATE_UNFORMED) continue; for (i = 0; i < kallsyms->num_symtab; i++) { + + if (kallsyms->symtab[i].st_shndx == SHN_UNDEF) + continue; + ret = fn(data, symname(kallsyms, i), mod, kallsyms->symtab[i].st_value); if (ret != 0) From 953b51b7a398b15bc172e1743181b29b5235b2df Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Wed, 13 Jun 2018 15:21:35 -0400 Subject: [PATCH 041/812] nfsd: fix corrupted reply to badly ordered compound [ Upstream commit 5b7b15aee641904ae269be9846610a3950cbd64c ] We're encoding a single op in the reply but leaving the number of ops zero, so the reply makes no sense. Somewhat academic as this isn't a case any real client will hit, though in theory perhaps that could change in a future protocol extension. Reviewed-by: Jeff Layton Signed-off-by: J. Bruce Fields Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- fs/nfsd/nfs4proc.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index bfbee8ddf978..c67064d94096 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -1632,6 +1632,7 @@ nfsd4_proc_compound(struct svc_rqst *rqstp, if (status) { op = &args->ops[0]; op->status = status; + resp->opcnt = 1; goto encode_op; } From 7b9d445c2005a8e063aa98909b81779122009c59 Mon Sep 17 00:00:00 2001 From: Kevin Hilman Date: Mon, 21 May 2018 13:08:32 -0700 Subject: [PATCH 042/812] ARM: dts: dra7: fix DCAN node addresses [ Upstream commit 949bdcc8a97c6078f21c8d4966436b117f2e4cd3 ] Fix the DT node addresses to match the reg property addresses, which were verified to match the TRM: http://www.ti.com/lit/pdf/sprui30 Cc: Roger Quadros Signed-off-by: Kevin Hilman Acked-by: Roger Quadros Signed-off-by: Tony Lindgren Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/arm/boot/dts/dra7.dtsi | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm/boot/dts/dra7.dtsi b/arch/arm/boot/dts/dra7.dtsi index 02bd6312d1d9..e6a3a94bac69 100644 --- a/arch/arm/boot/dts/dra7.dtsi +++ b/arch/arm/boot/dts/dra7.dtsi @@ -1549,7 +1549,7 @@ phy_sel: cpsw-phy-sel@4a002554 { }; }; - dcan1: can@481cc000 { + dcan1: can@4ae3c000 { compatible = "ti,dra7-d_can"; ti,hwmods = "dcan1"; reg = <0x4ae3c000 0x2000>; @@ -1559,7 +1559,7 @@ dcan1: can@481cc000 { status = "disabled"; }; - dcan2: can@481d0000 { + dcan2: can@48480000 { compatible = "ti,dra7-d_can"; ti,hwmods = "dcan2"; reg = <0x48480000 0x2000>; From ef0a9f76fda4a7ffce6ceee5abdd554a719e8bd8 Mon Sep 17 00:00:00 2001 From: Andy Whitcroft Date: Thu, 20 Sep 2018 09:09:48 -0600 Subject: [PATCH 043/812] floppy: Do not copy a kernel pointer to user memory in FDGETPRM ioctl commit 65eea8edc315589d6c993cf12dbb5d0e9ef1fe4e upstream. The final field of a floppy_struct is the field "name", which is a pointer to a string in kernel memory. The kernel pointer should not be copied to user memory. The FDGETPRM ioctl copies a floppy_struct to user memory, including this "name" field. This pointer cannot be used by the user and it will leak a kernel address to user-space, which will reveal the location of kernel code and data and undermine KASLR protection. Model this code after the compat ioctl which copies the returned data to a previously cleared temporary structure on the stack (excluding the name pointer) and copy out to userspace from there. As we already have an inparam union with an appropriate member and that memory is already cleared even for read only calls make use of that as a temporary store. Based on an initial patch by Brian Belleville. CVE-2018-7755 Signed-off-by: Andy Whitcroft Broke up long line. Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman --- drivers/block/floppy.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c index 331363e7de0f..2daa5b84abbc 100644 --- a/drivers/block/floppy.c +++ b/drivers/block/floppy.c @@ -3459,6 +3459,9 @@ static int fd_locked_ioctl(struct block_device *bdev, fmode_t mode, unsigned int (struct floppy_struct **)&outparam); if (ret) return ret; + memcpy(&inparam.g, outparam, + offsetof(struct floppy_struct, name)); + outparam = &inparam.g; break; case FDMSGON: UDP->flags |= FTD_MSG; From da9d7fd84db0b99bbc769a25e6ba44d8157054c5 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Fri, 14 Sep 2018 10:32:50 +0000 Subject: [PATCH 044/812] serial: cpm_uart: return immediately from console poll commit be28c1e3ca29887e207f0cbcd294cefe5074bab6 upstream. kgdb expects poll function to return immediately and returning NO_POLL_CHAR when no character is available. Fixes: f5316b4aea024 ("kgdb,8250,pl011: Return immediately from console poll") Cc: Jason Wessel Cc: Signed-off-by: Christophe Leroy Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/cpm_uart/cpm_uart_core.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/drivers/tty/serial/cpm_uart/cpm_uart_core.c b/drivers/tty/serial/cpm_uart/cpm_uart_core.c index d3e3d42c0c12..0040c29f651a 100644 --- a/drivers/tty/serial/cpm_uart/cpm_uart_core.c +++ b/drivers/tty/serial/cpm_uart/cpm_uart_core.c @@ -1068,8 +1068,8 @@ static int poll_wait_key(char *obuf, struct uart_cpm_port *pinfo) /* Get the address of the host memory buffer. */ bdp = pinfo->rx_cur; - while (bdp->cbd_sc & BD_SC_EMPTY) - ; + if (bdp->cbd_sc & BD_SC_EMPTY) + return NO_POLL_CHAR; /* If the buffer address is in the CPM DPRAM, don't * convert it. @@ -1104,7 +1104,11 @@ static int cpm_get_poll_char(struct uart_port *port) poll_chars = 0; } if (poll_chars <= 0) { - poll_chars = poll_wait_key(poll_buf, pinfo); + int ret = poll_wait_key(poll_buf, pinfo); + + if (ret == NO_POLL_CHAR) + return ret; + poll_chars = ret; pollp = poll_buf; } poll_chars--; From 7c5a3916b59a45f06be760fb683b22b6774f7bd2 Mon Sep 17 00:00:00 2001 From: Marcel Ziswiler Date: Wed, 29 Aug 2018 08:47:57 +0200 Subject: [PATCH 045/812] spi: tegra20-slink: explicitly enable/disable clock commit 7001cab1dabc0b72b2b672ef58a90ab64f5e2343 upstream. Depending on the SPI instance one may get an interrupt storm upon requesting resp. interrupt unless the clock is explicitly enabled beforehand. This has been observed trying to bring up instance 4 on T20. Signed-off-by: Marcel Ziswiler Signed-off-by: Mark Brown Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/spi/spi-tegra20-slink.c | 31 +++++++++++++++++++++++-------- 1 file changed, 23 insertions(+), 8 deletions(-) diff --git a/drivers/spi/spi-tegra20-slink.c b/drivers/spi/spi-tegra20-slink.c index 85c91f58b42f..af2880d0c112 100644 --- a/drivers/spi/spi-tegra20-slink.c +++ b/drivers/spi/spi-tegra20-slink.c @@ -1063,6 +1063,24 @@ static int tegra_slink_probe(struct platform_device *pdev) goto exit_free_master; } + /* disabled clock may cause interrupt storm upon request */ + tspi->clk = devm_clk_get(&pdev->dev, NULL); + if (IS_ERR(tspi->clk)) { + ret = PTR_ERR(tspi->clk); + dev_err(&pdev->dev, "Can not get clock %d\n", ret); + goto exit_free_master; + } + ret = clk_prepare(tspi->clk); + if (ret < 0) { + dev_err(&pdev->dev, "Clock prepare failed %d\n", ret); + goto exit_free_master; + } + ret = clk_enable(tspi->clk); + if (ret < 0) { + dev_err(&pdev->dev, "Clock enable failed %d\n", ret); + goto exit_free_master; + } + spi_irq = platform_get_irq(pdev, 0); tspi->irq = spi_irq; ret = request_threaded_irq(tspi->irq, tegra_slink_isr, @@ -1071,14 +1089,7 @@ static int tegra_slink_probe(struct platform_device *pdev) if (ret < 0) { dev_err(&pdev->dev, "Failed to register ISR for IRQ %d\n", tspi->irq); - goto exit_free_master; - } - - tspi->clk = devm_clk_get(&pdev->dev, NULL); - if (IS_ERR(tspi->clk)) { - dev_err(&pdev->dev, "can not get clock\n"); - ret = PTR_ERR(tspi->clk); - goto exit_free_irq; + goto exit_clk_disable; } tspi->rst = devm_reset_control_get(&pdev->dev, "spi"); @@ -1138,6 +1149,8 @@ static int tegra_slink_probe(struct platform_device *pdev) tegra_slink_deinit_dma_param(tspi, true); exit_free_irq: free_irq(spi_irq, tspi); +exit_clk_disable: + clk_disable(tspi->clk); exit_free_master: spi_master_put(master); return ret; @@ -1150,6 +1163,8 @@ static int tegra_slink_remove(struct platform_device *pdev) free_irq(tspi->irq, tspi); + clk_disable(tspi->clk); + if (tspi->tx_dma_chan) tegra_slink_deinit_dma_param(tspi, false); From 6b35618c6e97d96148caacefcf9c7f484f17bb65 Mon Sep 17 00:00:00 2001 From: Gaku Inami Date: Wed, 5 Sep 2018 10:49:36 +0200 Subject: [PATCH 046/812] spi: sh-msiof: Fix invalid SPI use during system suspend commit ffa69d6a16f686efe45269342474e421f2aa58b2 upstream. If the SPI queue is running during system suspend, the system may lock up. Fix this by stopping/restarting the queue during system suspend/resume by calling spi_master_suspend()/spi_master_resume() from the PM callbacks. In-kernel users will receive an -ESHUTDOWN error while system suspend/resume is in progress. Signed-off-by: Gaku Inami Signed-off-by: Hiromitsu Yamasaki [geert: Cleanup, reword] Signed-off-by: Geert Uytterhoeven Signed-off-by: Mark Brown Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/spi/spi-sh-msiof.c | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/drivers/spi/spi-sh-msiof.c b/drivers/spi/spi-sh-msiof.c index 3de39bd794b6..19905e6caa14 100644 --- a/drivers/spi/spi-sh-msiof.c +++ b/drivers/spi/spi-sh-msiof.c @@ -1275,12 +1275,37 @@ static const struct platform_device_id spi_driver_ids[] = { }; MODULE_DEVICE_TABLE(platform, spi_driver_ids); +#ifdef CONFIG_PM_SLEEP +static int sh_msiof_spi_suspend(struct device *dev) +{ + struct platform_device *pdev = to_platform_device(dev); + struct sh_msiof_spi_priv *p = platform_get_drvdata(pdev); + + return spi_master_suspend(p->master); +} + +static int sh_msiof_spi_resume(struct device *dev) +{ + struct platform_device *pdev = to_platform_device(dev); + struct sh_msiof_spi_priv *p = platform_get_drvdata(pdev); + + return spi_master_resume(p->master); +} + +static SIMPLE_DEV_PM_OPS(sh_msiof_spi_pm_ops, sh_msiof_spi_suspend, + sh_msiof_spi_resume); +#define DEV_PM_OPS &sh_msiof_spi_pm_ops +#else +#define DEV_PM_OPS NULL +#endif /* CONFIG_PM_SLEEP */ + static struct platform_driver sh_msiof_spi_drv = { .probe = sh_msiof_spi_probe, .remove = sh_msiof_spi_remove, .id_table = spi_driver_ids, .driver = { .name = "spi_sh_msiof", + .pm = DEV_PM_OPS, .of_match_table = of_match_ptr(sh_msiof_match), }, }; From 348abf931415cc4032b6d66ddb4eb2692ea5c65d Mon Sep 17 00:00:00 2001 From: Hiromitsu Yamasaki Date: Wed, 5 Sep 2018 10:49:37 +0200 Subject: [PATCH 047/812] spi: sh-msiof: Fix handling of write value for SISTR register commit 31a5fae4c5a009898da6d177901d5328051641ff upstream. This patch changes writing to the SISTR register according to the H/W user's manual. The TDREQ bit and RDREQ bits of SISTR are read-only, and must be written their initial values of zero. Signed-off-by: Hiromitsu Yamasaki [geert: reword] Signed-off-by: Geert Uytterhoeven Signed-off-by: Mark Brown Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/spi/spi-sh-msiof.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/spi/spi-sh-msiof.c b/drivers/spi/spi-sh-msiof.c index 19905e6caa14..03b566848da6 100644 --- a/drivers/spi/spi-sh-msiof.c +++ b/drivers/spi/spi-sh-msiof.c @@ -374,7 +374,8 @@ static void sh_msiof_spi_set_mode_regs(struct sh_msiof_spi_priv *p, static void sh_msiof_reset_str(struct sh_msiof_spi_priv *p) { - sh_msiof_write(p, STR, sh_msiof_read(p, STR)); + sh_msiof_write(p, STR, + sh_msiof_read(p, STR) & ~(STR_TDREQ | STR_RDREQ)); } static void sh_msiof_spi_write_fifo_8(struct sh_msiof_spi_priv *p, From 3613559be6e456328fed06633d6114598679e9e0 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Wed, 5 Sep 2018 10:49:38 +0200 Subject: [PATCH 048/812] spi: rspi: Fix invalid SPI use during system suspend commit c1ca59c22c56930b377a665fdd1b43351887830b upstream. If the SPI queue is running during system suspend, the system may lock up. Fix this by stopping/restarting the queue during system suspend/resume, by calling spi_master_suspend()/spi_master_resume() from the PM callbacks. In-kernel users will receive an -ESHUTDOWN error while system suspend/resume is in progress. Based on a patch for sh-msiof by Gaku Inami. Signed-off-by: Geert Uytterhoeven Signed-off-by: Mark Brown Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/spi/spi-rspi.c | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/drivers/spi/spi-rspi.c b/drivers/spi/spi-rspi.c index 818843336932..489d192b4f29 100644 --- a/drivers/spi/spi-rspi.c +++ b/drivers/spi/spi-rspi.c @@ -1303,12 +1303,36 @@ static const struct platform_device_id spi_driver_ids[] = { MODULE_DEVICE_TABLE(platform, spi_driver_ids); +#ifdef CONFIG_PM_SLEEP +static int rspi_suspend(struct device *dev) +{ + struct platform_device *pdev = to_platform_device(dev); + struct rspi_data *rspi = platform_get_drvdata(pdev); + + return spi_master_suspend(rspi->master); +} + +static int rspi_resume(struct device *dev) +{ + struct platform_device *pdev = to_platform_device(dev); + struct rspi_data *rspi = platform_get_drvdata(pdev); + + return spi_master_resume(rspi->master); +} + +static SIMPLE_DEV_PM_OPS(rspi_pm_ops, rspi_suspend, rspi_resume); +#define DEV_PM_OPS &rspi_pm_ops +#else +#define DEV_PM_OPS NULL +#endif /* CONFIG_PM_SLEEP */ + static struct platform_driver rspi_driver = { .probe = rspi_probe, .remove = rspi_remove, .id_table = spi_driver_ids, .driver = { .name = "renesas_spi", + .pm = DEV_PM_OPS, .of_match_table = of_match_ptr(rspi_of_match), }, }; From 3851c90a348ca177d2fd3d7f3dde549f379235ef Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Wed, 5 Sep 2018 10:49:39 +0200 Subject: [PATCH 049/812] spi: rspi: Fix interrupted DMA transfers commit 8dbbaa47b96f6ea5f09f922b4effff3c505cd8cf upstream. When interrupted, wait_event_interruptible_timeout() returns -ERESTARTSYS, and the SPI transfer in progress will fail, as expected: m25p80 spi0.0: SPI transfer failed: -512 spi_master spi0: failed to transfer one message from queue However, as the underlying DMA transfers may not have completed, all subsequent SPI transfers may start to fail: spi_master spi0: receive timeout qspi_transfer_out_in() returned -110 m25p80 spi0.0: SPI transfer failed: -110 spi_master spi0: failed to transfer one message from queue Fix this by calling dmaengine_terminate_all() not only for timeouts, but also for errors. This can be reproduced on r8a7991/koelsch, using "hd /dev/mtd0" followed by CTRL-C. Signed-off-by: Geert Uytterhoeven Signed-off-by: Mark Brown Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/spi/spi-rspi.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/spi/spi-rspi.c b/drivers/spi/spi-rspi.c index 489d192b4f29..9882d93e7566 100644 --- a/drivers/spi/spi-rspi.c +++ b/drivers/spi/spi-rspi.c @@ -587,11 +587,13 @@ static int rspi_dma_transfer(struct rspi_data *rspi, struct sg_table *tx, ret = wait_event_interruptible_timeout(rspi->wait, rspi->dma_callbacked, HZ); - if (ret > 0 && rspi->dma_callbacked) + if (ret > 0 && rspi->dma_callbacked) { ret = 0; - else if (!ret) { - dev_err(&rspi->master->dev, "DMA timeout\n"); - ret = -ETIMEDOUT; + } else { + if (!ret) { + dev_err(&rspi->master->dev, "DMA timeout\n"); + ret = -ETIMEDOUT; + } if (tx) dmaengine_terminate_all(rspi->master->dma_tx); if (rx) From ae96945ac60cca9f0045cb7116c7ea231eb48037 Mon Sep 17 00:00:00 2001 From: Alan Stern Date: Mon, 10 Sep 2018 13:59:59 -0400 Subject: [PATCH 050/812] USB: fix error handling in usb_driver_claim_interface() commit bd729f9d67aa9a303d8925bb8c4f06af25f407d1 upstream. The syzbot fuzzing project found a use-after-free bug in the USB core. The bug was caused by usbfs not unbinding from an interface when the USB device file was closed, which led another process to attempt the unbind later on, after the private data structure had been deallocated. The reason usbfs did not unbind the interface at the appropriate time was because it thought the interface had never been claimed in the first place. This was caused by the fact that usb_driver_claim_interface() does not clean up properly when device_bind_driver() returns an error. Although the error code gets passed back to the caller, the iface->dev.driver pointer remains set and iface->condition remains equal to USB_INTERFACE_BOUND. This patch adds proper error handling to usb_driver_claim_interface(). Signed-off-by: Alan Stern Reported-by: syzbot+f84aa7209ccec829536f@syzkaller.appspotmail.com CC: Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/driver.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/drivers/usb/core/driver.c b/drivers/usb/core/driver.c index 0bb380a9fcf7..3c9e495d0ecb 100644 --- a/drivers/usb/core/driver.c +++ b/drivers/usb/core/driver.c @@ -562,6 +562,21 @@ int usb_driver_claim_interface(struct usb_driver *driver, if (!lpm_disable_error) usb_unlocked_enable_lpm(udev); + if (retval) { + dev->driver = NULL; + usb_set_intfdata(iface, NULL); + iface->needs_remote_wakeup = 0; + iface->condition = USB_INTERFACE_UNBOUND; + + /* + * Unbound interfaces are always runtime-PM-disabled + * and runtime-PM-suspended + */ + if (driver->supports_autosuspend) + pm_runtime_disable(dev); + pm_runtime_set_suspended(dev); + } + return retval; } EXPORT_SYMBOL_GPL(usb_driver_claim_interface); From 06fc0ef3ef7f52ea90eb7f3190b261cb0f5b11f9 Mon Sep 17 00:00:00 2001 From: Alan Stern Date: Mon, 10 Sep 2018 14:00:53 -0400 Subject: [PATCH 051/812] USB: handle NULL config in usb_find_alt_setting() commit c9a4cb204e9eb7fa7dfbe3f7d3a674fa530aa193 upstream. usb_find_alt_setting() takes a pointer to a struct usb_host_config as an argument; it searches for an interface with specified interface and alternate setting numbers in that config. However, it crashes if the usb_host_config pointer argument is NULL. Since this is a general-purpose routine, available for use in many places, we want to to be more robust. This patch makes it return NULL whenever the config argument is NULL. Signed-off-by: Alan Stern Reported-by: syzbot+19c3aaef85a89d451eac@syzkaller.appspotmail.com CC: Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/usb.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/usb/core/usb.c b/drivers/usb/core/usb.c index f8bbd0b6d9fe..ad308c8e9af5 100644 --- a/drivers/usb/core/usb.c +++ b/drivers/usb/core/usb.c @@ -95,6 +95,8 @@ struct usb_host_interface *usb_find_alt_setting( struct usb_interface_cache *intf_cache = NULL; int i; + if (!config) + return NULL; for (i = 0; i < config->desc.bNumInterfaces; i++) { if (config->intf_cache[i]->altsetting[0].desc.bInterfaceNumber == iface_num) { From de2aac8ee0fb52a743a854d781e4b4a32600fa8a Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Thu, 5 Apr 2018 16:21:10 -0700 Subject: [PATCH 052/812] slub: make ->cpu_partial unsigned int commit e5d9998f3e09359b372a037a6ac55ba235d95d57 upstream. /* * cpu_partial determined the maximum number of objects * kept in the per cpu partial lists of a processor. */ Can't be negative. Link: http://lkml.kernel.org/r/20180305200730.15812-15-adobriyan@gmail.com Signed-off-by: Alexey Dobriyan Acked-by: Christoph Lameter Cc: Pekka Enberg Cc: David Rientjes Cc: Joonsoo Kim Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: zhong jiang Signed-off-by: Greg Kroah-Hartman --- include/linux/slub_def.h | 3 ++- mm/slub.c | 6 +++--- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h index 33885118523c..9b681f21c2a9 100644 --- a/include/linux/slub_def.h +++ b/include/linux/slub_def.h @@ -67,7 +67,8 @@ struct kmem_cache { int size; /* The size of an object including meta data */ int object_size; /* The size of an object without meta data */ int offset; /* Free pointer offset. */ - int cpu_partial; /* Number of per cpu partial objects to keep around */ + /* Number of per cpu partial objects to keep around */ + unsigned int cpu_partial; struct kmem_cache_order_objects oo; /* Allocation and freeing of slabs */ diff --git a/mm/slub.c b/mm/slub.c index 2284c4333857..c33b0e13cca7 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -1661,7 +1661,7 @@ static void *get_partial_node(struct kmem_cache *s, struct kmem_cache_node *n, { struct page *page, *page2; void *object = NULL; - int available = 0; + unsigned int available = 0; int objects; /* @@ -4674,10 +4674,10 @@ static ssize_t cpu_partial_show(struct kmem_cache *s, char *buf) static ssize_t cpu_partial_store(struct kmem_cache *s, const char *buf, size_t length) { - unsigned long objects; + unsigned int objects; int err; - err = kstrtoul(buf, 10, &objects); + err = kstrtouint(buf, 10, &objects); if (err) return err; if (objects && !kmem_cache_has_cpu_partial(s)) From 78769e876f5762f0a3cbd2e0c08e93dd81463339 Mon Sep 17 00:00:00 2001 From: ming_qian Date: Tue, 8 May 2018 22:13:08 -0400 Subject: [PATCH 053/812] media: uvcvideo: Support realtek's UVC 1.5 device commit f620d1d7afc7db57ab59f35000752840c91f67e7 upstream. media: uvcvideo: Support UVC 1.5 video probe & commit controls The length of UVC 1.5 video control is 48, and it is 34 for UVC 1.1. Change it to 48 for UVC 1.5 device, and the UVC 1.5 device can be recognized. More changes to the driver are needed for full UVC 1.5 compatibility. However, at least the UVC 1.5 Realtek RTS5847/RTS5852 cameras have been reported to work well. [laurent.pinchart@ideasonboard.com: Factor out code to helper function, update size checks] Cc: stable@vger.kernel.org Signed-off-by: ming_qian Signed-off-by: Laurent Pinchart Tested-by: Kai-Heng Feng Tested-by: Ana Guerrero Lopez Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Greg Kroah-Hartman --- drivers/media/usb/uvc/uvc_video.c | 24 ++++++++++++++++++------ 1 file changed, 18 insertions(+), 6 deletions(-) diff --git a/drivers/media/usb/uvc/uvc_video.c b/drivers/media/usb/uvc/uvc_video.c index 2b276ab7764f..a4048a04d236 100644 --- a/drivers/media/usb/uvc/uvc_video.c +++ b/drivers/media/usb/uvc/uvc_video.c @@ -163,14 +163,27 @@ static void uvc_fixup_video_ctrl(struct uvc_streaming *stream, } } +static size_t uvc_video_ctrl_size(struct uvc_streaming *stream) +{ + /* + * Return the size of the video probe and commit controls, which depends + * on the protocol version. + */ + if (stream->dev->uvc_version < 0x0110) + return 26; + else if (stream->dev->uvc_version < 0x0150) + return 34; + else + return 48; +} + static int uvc_get_video_ctrl(struct uvc_streaming *stream, struct uvc_streaming_control *ctrl, int probe, __u8 query) { + __u16 size = uvc_video_ctrl_size(stream); __u8 *data; - __u16 size; int ret; - size = stream->dev->uvc_version >= 0x0110 ? 34 : 26; if ((stream->dev->quirks & UVC_QUIRK_PROBE_DEF) && query == UVC_GET_DEF) return -EIO; @@ -225,7 +238,7 @@ static int uvc_get_video_ctrl(struct uvc_streaming *stream, ctrl->dwMaxVideoFrameSize = get_unaligned_le32(&data[18]); ctrl->dwMaxPayloadTransferSize = get_unaligned_le32(&data[22]); - if (size == 34) { + if (size >= 34) { ctrl->dwClockFrequency = get_unaligned_le32(&data[26]); ctrl->bmFramingInfo = data[30]; ctrl->bPreferedVersion = data[31]; @@ -254,11 +267,10 @@ static int uvc_get_video_ctrl(struct uvc_streaming *stream, static int uvc_set_video_ctrl(struct uvc_streaming *stream, struct uvc_streaming_control *ctrl, int probe) { + __u16 size = uvc_video_ctrl_size(stream); __u8 *data; - __u16 size; int ret; - size = stream->dev->uvc_version >= 0x0110 ? 34 : 26; data = kzalloc(size, GFP_KERNEL); if (data == NULL) return -ENOMEM; @@ -275,7 +287,7 @@ static int uvc_set_video_ctrl(struct uvc_streaming *stream, put_unaligned_le32(ctrl->dwMaxVideoFrameSize, &data[18]); put_unaligned_le32(ctrl->dwMaxPayloadTransferSize, &data[22]); - if (size == 34) { + if (size >= 34) { put_unaligned_le32(ctrl->dwClockFrequency, &data[26]); data[30] = ctrl->bmFramingInfo; data[31] = ctrl->bPreferedVersion; From 26eaa502770cfe4412e57a8151c8eea7247d1ec4 Mon Sep 17 00:00:00 2001 From: Oliver Neukum Date: Wed, 5 Sep 2018 12:07:02 +0200 Subject: [PATCH 054/812] USB: usbdevfs: sanitize flags more commit 7a68d9fb851012829c29e770621905529bd9490b upstream. Requesting a ZERO_PACKET or not is sensible only for output. In the input direction the device decides. Likewise accepting short packets makes sense only for input. This allows operation with panic_on_warn without opening up a local DOS. Signed-off-by: Oliver Neukum Reported-by: syzbot+843efa30c8821bd69f53@syzkaller.appspotmail.com Fixes: 0cb54a3e47cb ("USB: debugging code shouldn't alter control flow") Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/devio.c | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/drivers/usb/core/devio.c b/drivers/usb/core/devio.c index ad2e6d235c30..26141a1db9f6 100644 --- a/drivers/usb/core/devio.c +++ b/drivers/usb/core/devio.c @@ -1289,10 +1289,13 @@ static int proc_do_submiturb(struct usb_dev_state *ps, struct usbdevfs_urb *uurb struct async *as = NULL; struct usb_ctrlrequest *dr = NULL; unsigned int u, totlen, isofrmlen; - int i, ret, is_in, num_sgs = 0, ifnum = -1; + int i, ret, num_sgs = 0, ifnum = -1; int number_of_packets = 0; unsigned int stream_id = 0; void *buf; + bool is_in; + bool allow_short = false; + bool allow_zero = false; unsigned long mask = USBDEVFS_URB_SHORT_NOT_OK | USBDEVFS_URB_BULK_CONTINUATION | USBDEVFS_URB_NO_FSBR | @@ -1326,6 +1329,8 @@ static int proc_do_submiturb(struct usb_dev_state *ps, struct usbdevfs_urb *uurb u = 0; switch (uurb->type) { case USBDEVFS_URB_TYPE_CONTROL: + if (is_in) + allow_short = true; if (!usb_endpoint_xfer_control(&ep->desc)) return -EINVAL; /* min 8 byte setup packet */ @@ -1366,6 +1371,10 @@ static int proc_do_submiturb(struct usb_dev_state *ps, struct usbdevfs_urb *uurb break; case USBDEVFS_URB_TYPE_BULK: + if (!is_in) + allow_zero = true; + else + allow_short = true; switch (usb_endpoint_type(&ep->desc)) { case USB_ENDPOINT_XFER_CONTROL: case USB_ENDPOINT_XFER_ISOC: @@ -1386,6 +1395,10 @@ static int proc_do_submiturb(struct usb_dev_state *ps, struct usbdevfs_urb *uurb if (!usb_endpoint_xfer_int(&ep->desc)) return -EINVAL; interrupt_urb: + if (!is_in) + allow_zero = true; + else + allow_short = true; break; case USBDEVFS_URB_TYPE_ISO: @@ -1512,11 +1525,11 @@ static int proc_do_submiturb(struct usb_dev_state *ps, struct usbdevfs_urb *uurb u = (is_in ? URB_DIR_IN : URB_DIR_OUT); if (uurb->flags & USBDEVFS_URB_ISO_ASAP) u |= URB_ISO_ASAP; - if (uurb->flags & USBDEVFS_URB_SHORT_NOT_OK && is_in) + if (allow_short && uurb->flags & USBDEVFS_URB_SHORT_NOT_OK) u |= URB_SHORT_NOT_OK; if (uurb->flags & USBDEVFS_URB_NO_FSBR) u |= URB_NO_FSBR; - if (uurb->flags & USBDEVFS_URB_ZERO_PACKET) + if (allow_zero && uurb->flags & USBDEVFS_URB_ZERO_PACKET) u |= URB_ZERO_PACKET; if (uurb->flags & USBDEVFS_URB_NO_INTERRUPT) u |= URB_NO_INTERRUPT; From 30948c685b98042991459f7d386d48ac8a80b5cf Mon Sep 17 00:00:00 2001 From: Oliver Neukum Date: Wed, 5 Sep 2018 12:07:03 +0200 Subject: [PATCH 055/812] USB: usbdevfs: restore warning for nonsensical flags commit 81e0403b26d94360abd1f6a57311337973bc82cd upstream. If we filter flags before they reach the core we need to generate our own warnings. Signed-off-by: Oliver Neukum Fixes: 0cb54a3e47cb ("USB: debugging code shouldn't alter control flow") Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/devio.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/usb/core/devio.c b/drivers/usb/core/devio.c index 26141a1db9f6..5e0af15aebc4 100644 --- a/drivers/usb/core/devio.c +++ b/drivers/usb/core/devio.c @@ -1535,6 +1535,11 @@ static int proc_do_submiturb(struct usb_dev_state *ps, struct usbdevfs_urb *uurb u |= URB_NO_INTERRUPT; as->urb->transfer_flags = u; + if (!allow_short && uurb->flags & USBDEVFS_URB_SHORT_NOT_OK) + dev_warn(&ps->dev->dev, "Requested nonsensical USBDEVFS_URB_SHORT_NOT_OK.\n"); + if (!allow_zero && uurb->flags & USBDEVFS_URB_ZERO_PACKET) + dev_warn(&ps->dev->dev, "Requested nonsensical USBDEVFS_URB_ZERO_PACKET.\n"); + as->urb->transfer_buffer_length = uurb->buffer_length; as->urb->setup_packet = (unsigned char *)dr; dr = NULL; From acdc177dcc870f7fcb20fe3d9d6f221fbe8f8206 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Tue, 11 Sep 2018 10:00:44 +0200 Subject: [PATCH 056/812] Revert "usb: cdc-wdm: Fix a sleep-in-atomic-context bug in service_outstanding_interrupt()" commit e871db8d78df1c411032cbb3acfdf8930509360e upstream. This reverts commit 6e22e3af7bb3a7b9dc53cb4687659f6e63fca427. The bug the patch describes to, has been already fixed in commit 2df6948428542 ("USB: cdc-wdm: don't enable interrupts in USB-giveback") so need to this, revert it. Fixes: 6e22e3af7bb3 ("usb: cdc-wdm: Fix a sleep-in-atomic-context bug in service_outstanding_interrupt()") Cc: stable Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Greg Kroah-Hartman --- drivers/usb/class/cdc-wdm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/class/cdc-wdm.c b/drivers/usb/class/cdc-wdm.c index 4380e4f600ab..61ea87917433 100644 --- a/drivers/usb/class/cdc-wdm.c +++ b/drivers/usb/class/cdc-wdm.c @@ -453,7 +453,7 @@ static int clear_wdm_read_flag(struct wdm_device *desc) set_bit(WDM_RESPONDING, &desc->flags); spin_unlock_irq(&desc->iuspin); - rv = usb_submit_urb(desc->response, GFP_ATOMIC); + rv = usb_submit_urb(desc->response, GFP_KERNEL); spin_lock_irq(&desc->iuspin); if (rv) { dev_err(&desc->intf->dev, From 1df1f7369637f0554bc44bac2f443b396e6b6915 Mon Sep 17 00:00:00 2001 From: Alan Stern Date: Mon, 10 Sep 2018 13:58:51 -0400 Subject: [PATCH 057/812] USB: remove LPM management from usb_driver_claim_interface() commit c183813fcee44a249339b7c46e1ad271ca1870aa upstream. usb_driver_claim_interface() disables and re-enables Link Power Management, but it shouldn't do either one, for the reasons listed below. This patch removes the two LPM-related function calls from the routine. The reason for disabling LPM in the analogous function usb_probe_interface() is so that drivers won't have to deal with unwanted LPM transitions in their probe routine. But usb_driver_claim_interface() doesn't call the driver's probe routine (or any other callbacks), so that reason doesn't apply here. Furthermore, no driver other than usbfs will ever call usb_driver_claim_interface() unless it is already bound to another interface in the same device, which means disabling LPM here would be redundant. usbfs doesn't interact with LPM at all. Lastly, the error return from usb_unlocked_disable_lpm() isn't handled properly; the code doesn't clean up its earlier actions before returning. Signed-off-by: Alan Stern Fixes: 8306095fd2c1 ("USB: Disable USB 3.0 LPM in critical sections.") CC: Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/driver.c | 15 --------------- 1 file changed, 15 deletions(-) diff --git a/drivers/usb/core/driver.c b/drivers/usb/core/driver.c index 3c9e495d0ecb..e9d6cf146fcc 100644 --- a/drivers/usb/core/driver.c +++ b/drivers/usb/core/driver.c @@ -509,7 +509,6 @@ int usb_driver_claim_interface(struct usb_driver *driver, struct device *dev; struct usb_device *udev; int retval = 0; - int lpm_disable_error = -ENODEV; if (!iface) return -ENODEV; @@ -530,16 +529,6 @@ int usb_driver_claim_interface(struct usb_driver *driver, iface->condition = USB_INTERFACE_BOUND; - /* See the comment about disabling LPM in usb_probe_interface(). */ - if (driver->disable_hub_initiated_lpm) { - lpm_disable_error = usb_unlocked_disable_lpm(udev); - if (lpm_disable_error) { - dev_err(&iface->dev, "%s Failed to disable LPM for driver %s\n.", - __func__, driver->name); - return -ENOMEM; - } - } - /* Claimed interfaces are initially inactive (suspended) and * runtime-PM-enabled, but only if the driver has autosuspend * support. Otherwise they are marked active, to prevent the @@ -558,10 +547,6 @@ int usb_driver_claim_interface(struct usb_driver *driver, if (device_is_registered(dev)) retval = device_bind_driver(dev); - /* Attempt to re-enable USB3 LPM, if the disable was successful. */ - if (!lpm_disable_error) - usb_unlocked_enable_lpm(udev); - if (retval) { dev->driver = NULL; usb_set_intfdata(iface, NULL); From 640ddaa0c07ca3f6dc2a5b6cb2823477d1763e55 Mon Sep 17 00:00:00 2001 From: Aaron Ma Date: Tue, 18 Sep 2018 09:32:22 -0700 Subject: [PATCH 058/812] Input: elantech - enable middle button of touchpad on ThinkPad P72 commit 91a97507323e1ad4bfc10f4a5922e67cdaf8b3cd upstream. Adding 2 new touchpad IDs to support middle button support. Cc: stable@vger.kernel.org Signed-off-by: Aaron Ma Signed-off-by: Dmitry Torokhov Signed-off-by: Greg Kroah-Hartman --- drivers/input/mouse/elantech.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/input/mouse/elantech.c b/drivers/input/mouse/elantech.c index 174bb52c578b..84aead19622c 100644 --- a/drivers/input/mouse/elantech.c +++ b/drivers/input/mouse/elantech.c @@ -1180,6 +1180,8 @@ static const struct dmi_system_id elantech_dmi_has_middle_button[] = { static const char * const middle_button_pnp_ids[] = { "LEN2131", /* ThinkPad P52 w/ NFC */ "LEN2132", /* ThinkPad P52 */ + "LEN2133", /* ThinkPad P72 w/ NFC */ + "LEN2134", /* ThinkPad P72 */ NULL }; From 9c5da711131164b8b64dfe9e5498d79fef60c865 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Mon, 17 Sep 2018 18:10:05 -0700 Subject: [PATCH 059/812] IB/srp: Avoid that sg_reset -d ${srp_device} triggers an infinite loop commit ee92efe41cf358f4b99e73509f2bfd4733609f26 upstream. Use different loop variables for the inner and outer loop. This avoids that an infinite loop occurs if there are more RDMA channels than target->req_ring_size. Fixes: d92c0da71a35 ("IB/srp: Add multichannel support") Cc: Signed-off-by: Bart Van Assche Signed-off-by: Jason Gunthorpe Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/ulp/srp/ib_srp.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c index 4fd2892613dd..1897c4080346 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.c +++ b/drivers/infiniband/ulp/srp/ib_srp.c @@ -2594,7 +2594,7 @@ static int srp_reset_device(struct scsi_cmnd *scmnd) { struct srp_target_port *target = host_to_target(scmnd->device->host); struct srp_rdma_ch *ch; - int i; + int i, j; u8 status; shost_printk(KERN_ERR, target->scsi_host, "SRP reset_device called\n"); @@ -2608,8 +2608,8 @@ static int srp_reset_device(struct scsi_cmnd *scmnd) for (i = 0; i < target->ch_count; i++) { ch = &target->ch[i]; - for (i = 0; i < target->req_ring_size; ++i) { - struct srp_request *req = &ch->req_ring[i]; + for (j = 0; j < target->req_ring_size; ++j) { + struct srp_request *req = &ch->req_ring[j]; srp_finish_req(ch, req, scmnd->device, DID_RESET << 16); } From 739d24491c953eabe3cf42423bdbce127457d8c7 Mon Sep 17 00:00:00 2001 From: Vincent Pelletier Date: Sun, 9 Sep 2018 04:09:27 +0000 Subject: [PATCH 060/812] scsi: target: iscsi: Use bin2hex instead of a re-implementation commit 8c39e2699f8acb2e29782a834e56306da24937fe upstream. Signed-off-by: Vincent Pelletier Reviewed-by: Mike Christie Signed-off-by: Martin K. Petersen [plr.vincent@gmail.com: hunk context change for 4.4 and 4.9, no code change] Signed-off-by: Greg Kroah-Hartman --- drivers/target/iscsi/iscsi_target_auth.c | 15 +++------------ 1 file changed, 3 insertions(+), 12 deletions(-) diff --git a/drivers/target/iscsi/iscsi_target_auth.c b/drivers/target/iscsi/iscsi_target_auth.c index b380bc7ee10a..3184e023a052 100644 --- a/drivers/target/iscsi/iscsi_target_auth.c +++ b/drivers/target/iscsi/iscsi_target_auth.c @@ -26,15 +26,6 @@ #include "iscsi_target_nego.h" #include "iscsi_target_auth.h" -static void chap_binaryhex_to_asciihex(char *dst, char *src, int src_len) -{ - int i; - - for (i = 0; i < src_len; i++) { - sprintf(&dst[i*2], "%02x", (int) src[i] & 0xff); - } -} - static void chap_gen_challenge( struct iscsi_conn *conn, int caller, @@ -47,7 +38,7 @@ static void chap_gen_challenge( memset(challenge_asciihex, 0, CHAP_CHALLENGE_LENGTH * 2 + 1); get_random_bytes(chap->challenge, CHAP_CHALLENGE_LENGTH); - chap_binaryhex_to_asciihex(challenge_asciihex, chap->challenge, + bin2hex(challenge_asciihex, chap->challenge, CHAP_CHALLENGE_LENGTH); /* * Set CHAP_C, and copy the generated challenge into c_str. @@ -287,7 +278,7 @@ static int chap_server_compute_md5( } crypto_free_hash(tfm); - chap_binaryhex_to_asciihex(response, server_digest, MD5_SIGNATURE_SIZE); + bin2hex(response, server_digest, MD5_SIGNATURE_SIZE); pr_debug("[server] MD5 Server Digest: %s\n", response); if (memcmp(server_digest, client_digest, MD5_SIGNATURE_SIZE) != 0) { @@ -431,7 +422,7 @@ static int chap_server_compute_md5( /* * Convert response from binary hex to ascii hext. */ - chap_binaryhex_to_asciihex(response, digest, MD5_SIGNATURE_SIZE); + bin2hex(response, digest, MD5_SIGNATURE_SIZE); *nr_out_len += sprintf(nr_out_ptr + *nr_out_len, "CHAP_R=0x%s", response); *nr_out_len += 1; From 9aeb6fd7e2d9b459df8cce696f51e5c7dba2d73a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Thu, 20 Sep 2018 14:11:17 +0200 Subject: [PATCH 061/812] serial: imx: restore handshaking irq for imx1 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 7e620984b62532783912312e334f3c48cdacbd5d upstream. Back in 2015 when irda was dropped from the driver imx1 was broken. This change reintroduces the support for the third interrupt of the UART. Fixes: afe9cbb1a6ad ("serial: imx: drop support for IRDA") Cc: stable Signed-off-by: Uwe Kleine-König Reviewed-by: Leonard Crestez Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/imx.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/tty/serial/imx.c b/drivers/tty/serial/imx.c index 07ede982b472..f5f46c121ee3 100644 --- a/drivers/tty/serial/imx.c +++ b/drivers/tty/serial/imx.c @@ -1997,6 +1997,14 @@ static int serial_imx_probe(struct platform_device *pdev) dev_name(&pdev->dev), sport); if (ret) return ret; + + ret = devm_request_irq(&pdev->dev, rtsirq, imx_rtsint, 0, + dev_name(&pdev->dev), sport); + if (ret) { + dev_err(&pdev->dev, "failed to request rts irq: %d\n", + ret); + return ret; + } } else { ret = devm_request_irq(&pdev->dev, rxirq, imx_int, 0, dev_name(&pdev->dev), sport); From 14a65511bc7c1eec560764fe53018b0765fb8d2d Mon Sep 17 00:00:00 2001 From: Dave Martin Date: Thu, 27 Sep 2018 16:53:21 +0100 Subject: [PATCH 062/812] arm64: KVM: Tighten guest core register access from userspace commit d26c25a9d19b5976b319af528886f89cf455692d upstream. We currently allow userspace to access the core register file in about any possible way, including straddling multiple registers and doing unaligned accesses. This is not the expected use of the ABI, and nobody is actually using it that way. Let's tighten it by explicitly checking the size and alignment for each field of the register file. Cc: Fixes: 2f4a07c5f9fe ("arm64: KVM: guest one-reg interface") Reviewed-by: Christoffer Dall Reviewed-by: Mark Rutland Signed-off-by: Dave Martin [maz: rewrote Dave's initial patch to be more easily backported] Signed-off-by: Marc Zyngier Signed-off-by: Will Deacon Signed-off-by: Greg Kroah-Hartman --- arch/arm64/kvm/guest.c | 45 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 45 insertions(+) diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c index 3039f080e2d5..83aa598ed81e 100644 --- a/arch/arm64/kvm/guest.c +++ b/arch/arm64/kvm/guest.c @@ -48,6 +48,45 @@ static u64 core_reg_offset_from_id(u64 id) return id & ~(KVM_REG_ARCH_MASK | KVM_REG_SIZE_MASK | KVM_REG_ARM_CORE); } +static int validate_core_offset(const struct kvm_one_reg *reg) +{ + u64 off = core_reg_offset_from_id(reg->id); + int size; + + switch (off) { + case KVM_REG_ARM_CORE_REG(regs.regs[0]) ... + KVM_REG_ARM_CORE_REG(regs.regs[30]): + case KVM_REG_ARM_CORE_REG(regs.sp): + case KVM_REG_ARM_CORE_REG(regs.pc): + case KVM_REG_ARM_CORE_REG(regs.pstate): + case KVM_REG_ARM_CORE_REG(sp_el1): + case KVM_REG_ARM_CORE_REG(elr_el1): + case KVM_REG_ARM_CORE_REG(spsr[0]) ... + KVM_REG_ARM_CORE_REG(spsr[KVM_NR_SPSR - 1]): + size = sizeof(__u64); + break; + + case KVM_REG_ARM_CORE_REG(fp_regs.vregs[0]) ... + KVM_REG_ARM_CORE_REG(fp_regs.vregs[31]): + size = sizeof(__uint128_t); + break; + + case KVM_REG_ARM_CORE_REG(fp_regs.fpsr): + case KVM_REG_ARM_CORE_REG(fp_regs.fpcr): + size = sizeof(__u32); + break; + + default: + return -EINVAL; + } + + if (KVM_REG_SIZE(reg->id) == size && + IS_ALIGNED(off, size / sizeof(__u32))) + return 0; + + return -EINVAL; +} + static int get_core_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) { /* @@ -67,6 +106,9 @@ static int get_core_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) (off + (KVM_REG_SIZE(reg->id) / sizeof(__u32))) >= nr_regs) return -ENOENT; + if (validate_core_offset(reg)) + return -EINVAL; + if (copy_to_user(uaddr, ((u32 *)regs) + off, KVM_REG_SIZE(reg->id))) return -EFAULT; @@ -89,6 +131,9 @@ static int set_core_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) (off + (KVM_REG_SIZE(reg->id) / sizeof(__u32))) >= nr_regs) return -ENOENT; + if (validate_core_offset(reg)) + return -EINVAL; + if (KVM_REG_SIZE(reg->id) > sizeof(tmp)) return -EINVAL; From cd3d6463759d21f4093d3434effacc358dd0caf8 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Sat, 16 Jun 2018 15:40:48 -0400 Subject: [PATCH 063/812] ext4: never move the system.data xattr out of the inode body commit 8cdb5240ec5928b20490a2bb34cb87e9a5f40226 upstream. When expanding the extra isize space, we must never move the system.data xattr out of the inode body. For performance reasons, it doesn't make any sense, and the inline data implementation assumes that system.data xattr is never in the external xattr block. This addresses CVE-2018-10880 https://bugzilla.kernel.org/show_bug.cgi?id=200005 Signed-off-by: Theodore Ts'o Cc: stable@kernel.org Signed-off-by: Zubin Mithra Signed-off-by: Greg Kroah-Hartman --- fs/ext4/xattr.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c index 9fb2a751fce4..b51bb73b06a6 100644 --- a/fs/ext4/xattr.c +++ b/fs/ext4/xattr.c @@ -1386,6 +1386,11 @@ int ext4_expand_extra_isize_ea(struct inode *inode, int new_extra_isize, /* Find the entry best suited to be pushed into EA block */ entry = NULL; for (; !IS_LAST_ENTRY(last); last = EXT4_XATTR_NEXT(last)) { + /* never move system.data out of the inode */ + if ((last->e_name_len == 4) && + (last->e_name_index == EXT4_XATTR_INDEX_SYSTEM) && + !memcmp(last->e_name, "data", 4)) + continue; total_size = EXT4_XATTR_SIZE(le32_to_cpu(last->e_value_size)) + EXT4_XATTR_LEN(last->e_name_len); From 644c07d0185bc85aa0f075e86f84cae5b0639e7e Mon Sep 17 00:00:00 2001 From: Anson Huang Date: Tue, 31 Jul 2018 00:56:49 +0800 Subject: [PATCH 064/812] thermal: of-thermal: disable passive polling when thermal zone is disabled [ Upstream commit 152395fd03d4ce1e535a75cdbf58105e50587611 ] When thermal zone is in passive mode, disabling its mode from sysfs is NOT taking effect at all, it is still polling the temperature of the disabled thermal zone and handling all thermal trips, it makes user confused. The disabling operation should disable the thermal zone behavior completely, for both active and passive mode, this patch clears the passive_delay when thermal zone is disabled and restores it when it is enabled. Signed-off-by: Anson Huang Signed-off-by: Eduardo Valentin Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/thermal/of-thermal.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/thermal/of-thermal.c b/drivers/thermal/of-thermal.c index be4eedcb839a..236c4eb5cf78 100644 --- a/drivers/thermal/of-thermal.c +++ b/drivers/thermal/of-thermal.c @@ -284,10 +284,13 @@ static int of_thermal_set_mode(struct thermal_zone_device *tz, mutex_lock(&tz->lock); - if (mode == THERMAL_DEVICE_ENABLED) + if (mode == THERMAL_DEVICE_ENABLED) { tz->polling_delay = data->polling_delay; - else + tz->passive_delay = data->passive_delay; + } else { tz->polling_delay = 0; + tz->passive_delay = 0; + } mutex_unlock(&tz->lock); From aa1c657ae2c320372fceeea98c0e6d25102f9d33 Mon Sep 17 00:00:00 2001 From: Huazhong Tan Date: Thu, 23 Aug 2018 11:10:10 +0800 Subject: [PATCH 065/812] net: hns: fix length and page_offset overflow when CONFIG_ARM64_64K_PAGES [ Upstream commit 3ed614dce3ca9912d22be215ff0f11104b69fe62 ] When enable the config item "CONFIG_ARM64_64K_PAGES", the size of PAGE_SIZE is 65536(64K). But the type of length and page_offset are u16, they will overflow. So change them to u32. Fixes: 6fe6611ff275 ("net: add Hisilicon Network Subsystem hnae framework support") Signed-off-by: Huazhong Tan Signed-off-by: Salil Mehta Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/hisilicon/hns/hnae.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hns/hnae.h b/drivers/net/ethernet/hisilicon/hns/hnae.h index cec95ac8687d..fe37fc7ec76e 100644 --- a/drivers/net/ethernet/hisilicon/hns/hnae.h +++ b/drivers/net/ethernet/hisilicon/hns/hnae.h @@ -171,10 +171,10 @@ struct hnae_desc_cb { /* priv data for the desc, e.g. skb when use with ip stack*/ void *priv; - u16 page_offset; - u16 reuse_flag; + u32 page_offset; + u32 length; /* length of the buffer */ - u16 length; /* length of the buffer */ + u16 reuse_flag; /* desc type, used by the ring user to mark the type of the priv data */ u16 type; From c5bf8b49cc5a8416aea41645e64cdb8aa613c788 Mon Sep 17 00:00:00 2001 From: Bo Chen Date: Mon, 23 Jul 2018 09:01:29 -0700 Subject: [PATCH 066/812] e1000: check on netif_running() before calling e1000_up() [ Upstream commit cf1acec008f8d7761aa3fd7c4bca7e17b2d2512d ] When the device is not up, the call to 'e1000_up()' from the error handling path of 'e1000_set_ringparam()' causes a kernel oops with a null-pointer dereference. The null-pointer dereference is triggered in function 'e1000_alloc_rx_buffers()' at line 'buffer_info = &rx_ring->buffer_info[i]'. This bug was reported by COD, a tool for testing kernel module binaries I am building. This bug was also detected by KFI from Dr. Kai Cong. This patch fixes the bug by checking on 'netif_running()' before calling 'e1000_up()' in 'e1000_set_ringparam()'. Signed-off-by: Bo Chen Acked-by: Alexander Duyck Tested-by: Aaron Brown Signed-off-by: Jeff Kirsher Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/intel/e1000/e1000_ethtool.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/e1000/e1000_ethtool.c b/drivers/net/ethernet/intel/e1000/e1000_ethtool.c index 83e557c7f279..52129b214c09 100644 --- a/drivers/net/ethernet/intel/e1000/e1000_ethtool.c +++ b/drivers/net/ethernet/intel/e1000/e1000_ethtool.c @@ -665,7 +665,8 @@ static int e1000_set_ringparam(struct net_device *netdev, err_alloc_rx: kfree(txdr); err_alloc_tx: - e1000_up(adapter); + if (netif_running(adapter->netdev)) + e1000_up(adapter); err_setup: clear_bit(__E1000_RESETTING, &adapter->flags); return err; From b8d10a09f81a0b7566a4291c756a08e402abbd4c Mon Sep 17 00:00:00 2001 From: Bo Chen Date: Mon, 23 Jul 2018 09:01:30 -0700 Subject: [PATCH 067/812] e1000: ensure to free old tx/rx rings in set_ringparam() [ Upstream commit ee400a3f1bfe7004a3e14b81c38ccc5583c26295 ] In 'e1000_set_ringparam()', the tx_ring and rx_ring are updated with new value and the old tx/rx rings are freed only when the device is up. There are resource leaks on old tx/rx rings when the device is not up. This bug is reported by COD, a tool for testing kernel module binaries I am building. This patch fixes the bug by always calling 'kfree()' on old tx/rx rings in 'e1000_set_ringparam()'. Signed-off-by: Bo Chen Reviewed-by: Alexander Duyck Tested-by: Aaron Brown Signed-off-by: Jeff Kirsher Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/intel/e1000/e1000_ethtool.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/intel/e1000/e1000_ethtool.c b/drivers/net/ethernet/intel/e1000/e1000_ethtool.c index 52129b214c09..5ae8874bbf72 100644 --- a/drivers/net/ethernet/intel/e1000/e1000_ethtool.c +++ b/drivers/net/ethernet/intel/e1000/e1000_ethtool.c @@ -645,14 +645,14 @@ static int e1000_set_ringparam(struct net_device *netdev, adapter->tx_ring = tx_old; e1000_free_all_rx_resources(adapter); e1000_free_all_tx_resources(adapter); - kfree(tx_old); - kfree(rx_old); adapter->rx_ring = rxdr; adapter->tx_ring = txdr; err = e1000_up(adapter); if (err) goto err_setup; } + kfree(tx_old); + kfree(rx_old); clear_bit(__E1000_RESETTING, &adapter->flags); return 0; From 94516f10f3030a2c6640b5e1a845f0ae4395ef4e Mon Sep 17 00:00:00 2001 From: Lothar Felten Date: Tue, 14 Aug 2018 09:09:37 +0200 Subject: [PATCH 068/812] hwmon: (ina2xx) fix sysfs shunt resistor read access [ Upstream commit 3ad867001c91657c46dcf6656d52eb6080286fd5 ] fix the sysfs shunt resistor read access: return the shunt resistor value, not the calibration register contents. update email address Signed-off-by: Lothar Felten Signed-off-by: Guenter Roeck Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- Documentation/hwmon/ina2xx | 2 +- drivers/hwmon/ina2xx.c | 13 +++++++++++-- include/linux/platform_data/ina2xx.h | 2 +- 3 files changed, 13 insertions(+), 4 deletions(-) diff --git a/Documentation/hwmon/ina2xx b/Documentation/hwmon/ina2xx index cfd31d94c872..f8bf14055c2f 100644 --- a/Documentation/hwmon/ina2xx +++ b/Documentation/hwmon/ina2xx @@ -32,7 +32,7 @@ Supported chips: Datasheet: Publicly available at the Texas Instruments website http://www.ti.com/ -Author: Lothar Felten +Author: Lothar Felten Description ----------- diff --git a/drivers/hwmon/ina2xx.c b/drivers/hwmon/ina2xx.c index ac63e562071f..9ac6e1673375 100644 --- a/drivers/hwmon/ina2xx.c +++ b/drivers/hwmon/ina2xx.c @@ -17,7 +17,7 @@ * Bi-directional Current/Power Monitor with I2C Interface * Datasheet: http://www.ti.com/product/ina230 * - * Copyright (C) 2012 Lothar Felten + * Copyright (C) 2012 Lothar Felten * Thanks to Jan Volkering * * This program is free software; you can redistribute it and/or modify @@ -328,6 +328,15 @@ static int ina2xx_set_shunt(struct ina2xx_data *data, long val) return 0; } +static ssize_t ina2xx_show_shunt(struct device *dev, + struct device_attribute *da, + char *buf) +{ + struct ina2xx_data *data = dev_get_drvdata(dev); + + return snprintf(buf, PAGE_SIZE, "%li\n", data->rshunt); +} + static ssize_t ina2xx_store_shunt(struct device *dev, struct device_attribute *da, const char *buf, size_t count) @@ -402,7 +411,7 @@ static SENSOR_DEVICE_ATTR(power1_input, S_IRUGO, ina2xx_show_value, NULL, /* shunt resistance */ static SENSOR_DEVICE_ATTR(shunt_resistor, S_IRUGO | S_IWUSR, - ina2xx_show_value, ina2xx_store_shunt, + ina2xx_show_shunt, ina2xx_store_shunt, INA2XX_CALIBRATION); /* update interval (ina226 only) */ diff --git a/include/linux/platform_data/ina2xx.h b/include/linux/platform_data/ina2xx.h index 9abc0ca7259b..9f0aa1b48c78 100644 --- a/include/linux/platform_data/ina2xx.h +++ b/include/linux/platform_data/ina2xx.h @@ -1,7 +1,7 @@ /* * Driver for Texas Instruments INA219, INA226 power monitor chips * - * Copyright (C) 2012 Lothar Felten + * Copyright (C) 2012 Lothar Felten * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as From 55aaf8e0d9cfac87cdac31d6227b7f699a52f63f Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Tue, 14 Aug 2018 13:07:47 +0300 Subject: [PATCH 069/812] hwmon: (adt7475) Make adt7475_read_word() return errors [ Upstream commit f196dec6d50abb2e65fb54a0621b2f1b4d922995 ] The adt7475_read_word() function was meant to return negative error codes on failure. Signed-off-by: Dan Carpenter Reviewed-by: Tokunori Ikegami Signed-off-by: Guenter Roeck Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/hwmon/adt7475.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/drivers/hwmon/adt7475.c b/drivers/hwmon/adt7475.c index 3cefd1aeb24f..9c262d955331 100644 --- a/drivers/hwmon/adt7475.c +++ b/drivers/hwmon/adt7475.c @@ -274,14 +274,18 @@ static inline u16 volt2reg(int channel, long volt, u8 bypass_attn) return clamp_val(reg, 0, 1023) & (0xff << 2); } -static u16 adt7475_read_word(struct i2c_client *client, int reg) +static int adt7475_read_word(struct i2c_client *client, int reg) { - u16 val; + int val1, val2; - val = i2c_smbus_read_byte_data(client, reg); - val |= (i2c_smbus_read_byte_data(client, reg + 1) << 8); + val1 = i2c_smbus_read_byte_data(client, reg); + if (val1 < 0) + return val1; + val2 = i2c_smbus_read_byte_data(client, reg + 1); + if (val2 < 0) + return val2; - return val; + return val1 | (val2 << 8); } static void adt7475_write_word(struct i2c_client *client, int reg, u16 val) From 4ff0d84e6d990383f0709e363417315a95c20255 Mon Sep 17 00:00:00 2001 From: Mika Westerberg Date: Thu, 30 Aug 2018 11:50:13 +0300 Subject: [PATCH 070/812] i2c: i801: Allow ACPI AML access I/O ports not reserved for SMBus [ Upstream commit 7fd6d98b89f382d414e1db528e29a67bbd749457 ] Commit 7ae81952cda ("i2c: i801: Allow ACPI SystemIO OpRegion to conflict with PCI BAR") made it possible for AML code to access SMBus I/O ports by installing custom SystemIO OpRegion handler and blocking i80i driver access upon first AML read/write to this OpRegion. However, while ThinkPad T560 does have SystemIO OpRegion declared under the SMBus device, it does not access any of the SMBus registers: Device (SMBU) { ... OperationRegion (SMBP, PCI_Config, 0x50, 0x04) Field (SMBP, DWordAcc, NoLock, Preserve) { , 5, TCOB, 11, Offset (0x04) } Name (TCBV, 0x00) Method (TCBS, 0, NotSerialized) { If ((TCBV == 0x00)) { TCBV = (\_SB.PCI0.SMBU.TCOB << 0x05) } Return (TCBV) /* \_SB_.PCI0.SMBU.TCBV */ } OperationRegion (TCBA, SystemIO, TCBS (), 0x10) Field (TCBA, ByteAcc, NoLock, Preserve) { Offset (0x04), , 9, CPSC, 1 } } Problem with the current approach is that it blocks all I/O port access and because this system has touchpad connected to the SMBus controller after first AML access (happens during suspend/resume cycle) the touchpad fails to work anymore. Fix this so that we allow ACPI AML I/O port access if it does not touch the region reserved for the SMBus. Fixes: 7ae81952cda ("i2c: i801: Allow ACPI SystemIO OpRegion to conflict with PCI BAR") Link: https://bugzilla.kernel.org/show_bug.cgi?id=200737 Reported-by: Yussuf Khalil Signed-off-by: Mika Westerberg Reviewed-by: Jean Delvare Signed-off-by: Wolfram Sang Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/i2c/busses/i2c-i801.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/i2c/busses/i2c-i801.c b/drivers/i2c/busses/i2c-i801.c index 47581c32b1e1..f78069cd8d53 100644 --- a/drivers/i2c/busses/i2c-i801.c +++ b/drivers/i2c/busses/i2c-i801.c @@ -1272,6 +1272,13 @@ static void i801_add_tco(struct i801_priv *priv) } #ifdef CONFIG_ACPI +static bool i801_acpi_is_smbus_ioport(const struct i801_priv *priv, + acpi_physical_address address) +{ + return address >= priv->smba && + address <= pci_resource_end(priv->pci_dev, SMBBAR); +} + static acpi_status i801_acpi_io_handler(u32 function, acpi_physical_address address, u32 bits, u64 *value, void *handler_context, void *region_context) @@ -1287,7 +1294,7 @@ i801_acpi_io_handler(u32 function, acpi_physical_address address, u32 bits, */ mutex_lock(&priv->acpi_lock); - if (!priv->acpi_reserved) { + if (!priv->acpi_reserved && i801_acpi_is_smbus_ioport(priv, address)) { priv->acpi_reserved = true; dev_warn(&pdev->dev, "BIOS is accessing SMBus registers\n"); From fdbd625224311d5f622f0a4251114ba63293a5d5 Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Mon, 18 Apr 2016 10:28:36 +0100 Subject: [PATCH 071/812] arm64: cpufeature: Track 32bit EL0 support commit 042446a31e3803d81c7e618dd80928dc3dce70c5 upstream. Add cpu_hwcap bit for keeping track of the support for 32bit EL0. Tested-by: Yury Norov Signed-off-by: Suzuki K Poulose Signed-off-by: Will Deacon Signed-off-by: Marc Zyngier Signed-off-by: Greg Kroah-Hartman --- arch/arm64/include/asm/cpufeature.h | 8 +++++++- arch/arm64/include/asm/sysreg.h | 1 + arch/arm64/kernel/cpufeature.c | 8 ++++++++ 3 files changed, 16 insertions(+), 1 deletion(-) diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h index 8884b5d5f48c..ad83c245781c 100644 --- a/arch/arm64/include/asm/cpufeature.h +++ b/arch/arm64/include/asm/cpufeature.h @@ -31,8 +31,9 @@ #define ARM64_WORKAROUND_CAVIUM_23154 6 #define ARM64_WORKAROUND_834220 7 #define ARM64_WORKAROUND_CAVIUM_27456 8 +#define ARM64_HAS_32BIT_EL0 9 -#define ARM64_NCAPS 9 +#define ARM64_NCAPS 10 #ifndef __ASSEMBLY__ @@ -180,6 +181,11 @@ static inline bool cpu_supports_mixed_endian_el0(void) return id_aa64mmfr0_mixed_endian_el0(read_cpuid(ID_AA64MMFR0_EL1)); } +static inline bool system_supports_32bit_el0(void) +{ + return cpus_have_cap(ARM64_HAS_32BIT_EL0); +} + static inline bool system_supports_mixed_endian_el0(void) { return id_aa64mmfr0_mixed_endian_el0(read_system_reg(SYS_ID_AA64MMFR0_EL1)); diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index d48ab5b41f52..50150320f80d 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -109,6 +109,7 @@ #define ID_AA64PFR0_ASIMD_SUPPORTED 0x0 #define ID_AA64PFR0_EL1_64BIT_ONLY 0x1 #define ID_AA64PFR0_EL0_64BIT_ONLY 0x1 +#define ID_AA64PFR0_EL0_32BIT_64BIT 0x2 /* id_aa64mmfr0 */ #define ID_AA64MMFR0_TGRAN4_SHIFT 28 diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 2735bf814592..c1eddc07d996 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -653,6 +653,14 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .min_field_value = 2, }, #endif /* CONFIG_AS_LSE && CONFIG_ARM64_LSE_ATOMICS */ + { + .desc = "32-bit EL0 Support", + .capability = ARM64_HAS_32BIT_EL0, + .matches = has_cpuid_feature, + .sys_reg = SYS_ID_AA64PFR0_EL1, + .field_pos = ID_AA64PFR0_EL0_SHIFT, + .min_field_value = ID_AA64PFR0_EL0_32BIT_64BIT, + }, {}, }; From ae22586bb579b89c43231a206f074a181a04da13 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 27 Sep 2018 16:53:22 +0100 Subject: [PATCH 072/812] arm64: KVM: Sanitize PSTATE.M when being set from userspace commit 2a3f93459d689d990b3ecfbe782fec89b97d3279 upstream. Not all execution modes are valid for a guest, and some of them depend on what the HW actually supports. Let's verify that what userspace provides is compatible with both the VM settings and the HW capabilities. Cc: Fixes: 0d854a60b1d7 ("arm64: KVM: enable initialization of a 32bit vcpu") Reviewed-by: Christoffer Dall Reviewed-by: Mark Rutland Reviewed-by: Dave Martin Signed-off-by: Marc Zyngier Signed-off-by: Will Deacon Signed-off-by: Marc Zyngier Signed-off-by: Greg Kroah-Hartman --- arch/arm64/include/asm/kvm_emulate.h | 5 +++++ arch/arm64/kvm/guest.c | 10 +++++++++- 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h index 25a40213bd9b..0729a2f94482 100644 --- a/arch/arm64/include/asm/kvm_emulate.h +++ b/arch/arm64/include/asm/kvm_emulate.h @@ -41,6 +41,11 @@ void kvm_inject_undefined(struct kvm_vcpu *vcpu); void kvm_inject_dabt(struct kvm_vcpu *vcpu, unsigned long addr); void kvm_inject_pabt(struct kvm_vcpu *vcpu, unsigned long addr); +static inline bool vcpu_el1_is_32bit(struct kvm_vcpu *vcpu) +{ + return !(vcpu->arch.hcr_el2 & HCR_RW); +} + static inline void vcpu_reset_hcr(struct kvm_vcpu *vcpu) { vcpu->arch.hcr_el2 = HCR_GUEST_FLAGS; diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c index 83aa598ed81e..79705fde8cc8 100644 --- a/arch/arm64/kvm/guest.c +++ b/arch/arm64/kvm/guest.c @@ -143,17 +143,25 @@ static int set_core_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) } if (off == KVM_REG_ARM_CORE_REG(regs.pstate)) { - u32 mode = (*(u32 *)valp) & COMPAT_PSR_MODE_MASK; + u64 mode = (*(u64 *)valp) & COMPAT_PSR_MODE_MASK; switch (mode) { case COMPAT_PSR_MODE_USR: + if (!system_supports_32bit_el0()) + return -EINVAL; + break; case COMPAT_PSR_MODE_FIQ: case COMPAT_PSR_MODE_IRQ: case COMPAT_PSR_MODE_SVC: case COMPAT_PSR_MODE_ABT: case COMPAT_PSR_MODE_UND: + if (!vcpu_el1_is_32bit(vcpu)) + return -EINVAL; + break; case PSR_MODE_EL0t: case PSR_MODE_EL1t: case PSR_MODE_EL1h: + if (vcpu_el1_is_32bit(vcpu)) + return -EINVAL; break; default: err = -EINVAL; From bbbc4dabca5d51c608b0fd608eb3cd8659266653 Mon Sep 17 00:00:00 2001 From: Sakari Ailus Date: Tue, 11 Sep 2018 05:32:37 -0400 Subject: [PATCH 073/812] media: v4l: event: Prevent freeing event subscriptions while accessed commit ad608fbcf166fec809e402d548761768f602702c upstream. The event subscriptions are added to the subscribed event list while holding a spinlock, but that lock is subsequently released while still accessing the subscription object. This makes it possible to unsubscribe the event --- and freeing the subscription object's memory --- while the subscription object is simultaneously accessed. Prevent this by adding a mutex to serialise the event subscription and unsubscription. This also gives a guarantee to the callback ops that the add op has returned before the del op is called. This change also results in making the elems field less special: subscriptions are only added to the event list once they are fully initialised. Signed-off-by: Sakari Ailus Reviewed-by: Hans Verkuil Reviewed-by: Laurent Pinchart Cc: stable@vger.kernel.org # for 4.14 and up Fixes: c3b5b0241f62 ("V4L/DVB: V4L: Events: Add backend") Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Greg Kroah-Hartman --- drivers/media/v4l2-core/v4l2-event.c | 37 ++++++++++++++-------------- drivers/media/v4l2-core/v4l2-fh.c | 2 ++ include/media/v4l2-fh.h | 1 + 3 files changed, 22 insertions(+), 18 deletions(-) diff --git a/drivers/media/v4l2-core/v4l2-event.c b/drivers/media/v4l2-core/v4l2-event.c index 8d3171c6bee8..b47ac4e053d0 100644 --- a/drivers/media/v4l2-core/v4l2-event.c +++ b/drivers/media/v4l2-core/v4l2-event.c @@ -119,14 +119,6 @@ static void __v4l2_event_queue_fh(struct v4l2_fh *fh, const struct v4l2_event *e if (sev == NULL) return; - /* - * If the event has been added to the fh->subscribed list, but its - * add op has not completed yet elems will be 0, treat this as - * not being subscribed. - */ - if (!sev->elems) - return; - /* Increase event sequence number on fh. */ fh->sequence++; @@ -212,6 +204,7 @@ int v4l2_event_subscribe(struct v4l2_fh *fh, struct v4l2_subscribed_event *sev, *found_ev; unsigned long flags; unsigned i; + int ret = 0; if (sub->type == V4L2_EVENT_ALL) return -EINVAL; @@ -229,31 +222,36 @@ int v4l2_event_subscribe(struct v4l2_fh *fh, sev->flags = sub->flags; sev->fh = fh; sev->ops = ops; + sev->elems = elems; + + mutex_lock(&fh->subscribe_lock); spin_lock_irqsave(&fh->vdev->fh_lock, flags); found_ev = v4l2_event_subscribed(fh, sub->type, sub->id); - if (!found_ev) - list_add(&sev->list, &fh->subscribed); spin_unlock_irqrestore(&fh->vdev->fh_lock, flags); if (found_ev) { + /* Already listening */ kfree(sev); - return 0; /* Already listening */ + goto out_unlock; } if (sev->ops && sev->ops->add) { - int ret = sev->ops->add(sev, elems); + ret = sev->ops->add(sev, elems); if (ret) { - sev->ops = NULL; - v4l2_event_unsubscribe(fh, sub); - return ret; + kfree(sev); + goto out_unlock; } } - /* Mark as ready for use */ - sev->elems = elems; + spin_lock_irqsave(&fh->vdev->fh_lock, flags); + list_add(&sev->list, &fh->subscribed); + spin_unlock_irqrestore(&fh->vdev->fh_lock, flags); - return 0; +out_unlock: + mutex_unlock(&fh->subscribe_lock); + + return ret; } EXPORT_SYMBOL_GPL(v4l2_event_subscribe); @@ -292,6 +290,8 @@ int v4l2_event_unsubscribe(struct v4l2_fh *fh, return 0; } + mutex_lock(&fh->subscribe_lock); + spin_lock_irqsave(&fh->vdev->fh_lock, flags); sev = v4l2_event_subscribed(fh, sub->type, sub->id); @@ -310,6 +310,7 @@ int v4l2_event_unsubscribe(struct v4l2_fh *fh, sev->ops->del(sev); kfree(sev); + mutex_unlock(&fh->subscribe_lock); return 0; } diff --git a/drivers/media/v4l2-core/v4l2-fh.c b/drivers/media/v4l2-core/v4l2-fh.c index c97067a25bd2..1d076deb05a9 100644 --- a/drivers/media/v4l2-core/v4l2-fh.c +++ b/drivers/media/v4l2-core/v4l2-fh.c @@ -49,6 +49,7 @@ void v4l2_fh_init(struct v4l2_fh *fh, struct video_device *vdev) INIT_LIST_HEAD(&fh->available); INIT_LIST_HEAD(&fh->subscribed); fh->sequence = -1; + mutex_init(&fh->subscribe_lock); } EXPORT_SYMBOL_GPL(v4l2_fh_init); @@ -93,6 +94,7 @@ void v4l2_fh_exit(struct v4l2_fh *fh) if (fh->vdev == NULL) return; v4l2_event_unsubscribe_all(fh); + mutex_destroy(&fh->subscribe_lock); fh->vdev = NULL; } EXPORT_SYMBOL_GPL(v4l2_fh_exit); diff --git a/include/media/v4l2-fh.h b/include/media/v4l2-fh.h index 803516775162..4fdcd0d807d7 100644 --- a/include/media/v4l2-fh.h +++ b/include/media/v4l2-fh.h @@ -43,6 +43,7 @@ struct v4l2_fh { wait_queue_head_t wait; struct list_head subscribed; /* Subscribed events */ struct list_head available; /* Dequeueable event */ + struct mutex subscribe_lock; unsigned int navailable; u32 sequence; From c7bb863d8ec048e9ee63d2fc028544507ab857e9 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Mon, 20 Aug 2018 16:05:45 +1000 Subject: [PATCH 074/812] KVM: PPC: Book3S HV: Don't truncate HPTE index in xlate function [ Upstream commit 46dec40fb741f00f1864580130779aeeaf24fb3d ] This fixes a bug which causes guest virtual addresses to get translated to guest real addresses incorrectly when the guest is using the HPT MMU and has more than 256GB of RAM, or more specifically has a HPT larger than 2GB. This has showed up in testing as a failure of the host to emulate doorbell instructions correctly on POWER9 for HPT guests with more than 256GB of RAM. The bug is that the HPTE index in kvmppc_mmu_book3s_64_hv_xlate() is stored as an int, and in forming the HPTE address, the index gets shifted left 4 bits as an int before being signed-extended to 64 bits. The simple fix is to make the variable a long int, matching the return type of kvmppc_hv_find_lock_hpte(), which is what calculates the index. Fixes: 697d3899dcb4 ("KVM: PPC: Implement MMIO emulation support for Book3S HV guests") Signed-off-by: Paul Mackerras Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/kvm/book3s_64_mmu_hv.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c index fb37290a57b4..366965ae37bd 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_hv.c +++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c @@ -314,7 +314,7 @@ static int kvmppc_mmu_book3s_64_hv_xlate(struct kvm_vcpu *vcpu, gva_t eaddr, unsigned long pp, key; unsigned long v, gr; __be64 *hptep; - int index; + long int index; int virtmode = vcpu->arch.shregs.msr & (data ? MSR_DR : MSR_IR); /* Get SLB entry */ From 3ba72689c8d142df3bd9c4d432fa2a27005e4631 Mon Sep 17 00:00:00 2001 From: Danek Duvall Date: Wed, 22 Aug 2018 16:01:04 -0700 Subject: [PATCH 075/812] mac80211: correct use of IEEE80211_VHT_CAP_RXSTBC_X [ Upstream commit 67d1ba8a6dc83d90cd58b89fa6cbf9ae35a0cf7f ] The mod mask for VHT capabilities intends to say that you can override the number of STBC receive streams, and it does, but only by accident. The IEEE80211_VHT_CAP_RXSTBC_X aren't bits to be set, but values (albeit left-shifted). ORing the bits together gets the right answer, but we should use the _MASK macro here instead. Signed-off-by: Danek Duvall Signed-off-by: Johannes Berg Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- net/mac80211/main.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/net/mac80211/main.c b/net/mac80211/main.c index 2ee53dc1ddf7..d784d29a4fd6 100644 --- a/net/mac80211/main.c +++ b/net/mac80211/main.c @@ -460,10 +460,7 @@ static const struct ieee80211_vht_cap mac80211_vht_capa_mod_mask = { cpu_to_le32(IEEE80211_VHT_CAP_RXLDPC | IEEE80211_VHT_CAP_SHORT_GI_80 | IEEE80211_VHT_CAP_SHORT_GI_160 | - IEEE80211_VHT_CAP_RXSTBC_1 | - IEEE80211_VHT_CAP_RXSTBC_2 | - IEEE80211_VHT_CAP_RXSTBC_3 | - IEEE80211_VHT_CAP_RXSTBC_4 | + IEEE80211_VHT_CAP_RXSTBC_MASK | IEEE80211_VHT_CAP_TXSTBC | IEEE80211_VHT_CAP_SU_BEAMFORMER_CAPABLE | IEEE80211_VHT_CAP_SU_BEAMFORMEE_CAPABLE | From cef1e353282eb63cec3bfb325bacac45b1c156f2 Mon Sep 17 00:00:00 2001 From: Danek Duvall Date: Wed, 22 Aug 2018 16:01:05 -0700 Subject: [PATCH 076/812] mac80211_hwsim: correct use of IEEE80211_VHT_CAP_RXSTBC_X [ Upstream commit d7c863a2f65e48f442379f4ee1846d52e0c5d24d ] The mac80211_hwsim driver intends to say that it supports up to four STBC receive streams, but instead it ends up saying something undefined. The IEEE80211_VHT_CAP_RXSTBC_X macros aren't independent bits that can be ORed together, but values. In this case, _4 is the appropriate one to use. Signed-off-by: Danek Duvall Signed-off-by: Johannes Berg Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/mac80211_hwsim.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/net/wireless/mac80211_hwsim.c b/drivers/net/wireless/mac80211_hwsim.c index e8b770a95f7a..c98cb962b454 100644 --- a/drivers/net/wireless/mac80211_hwsim.c +++ b/drivers/net/wireless/mac80211_hwsim.c @@ -2453,9 +2453,6 @@ static int mac80211_hwsim_new_radio(struct genl_info *info, IEEE80211_VHT_CAP_SHORT_GI_80 | IEEE80211_VHT_CAP_SHORT_GI_160 | IEEE80211_VHT_CAP_TXSTBC | - IEEE80211_VHT_CAP_RXSTBC_1 | - IEEE80211_VHT_CAP_RXSTBC_2 | - IEEE80211_VHT_CAP_RXSTBC_3 | IEEE80211_VHT_CAP_RXSTBC_4 | IEEE80211_VHT_CAP_MAX_A_MPDU_LENGTH_EXPONENT_MASK; sband->vht_cap.vht_mcs.rx_mcs_map = From 880b5a109a50a8a15ff355de2f64e91bc9d8710c Mon Sep 17 00:00:00 2001 From: Michael Hennerich Date: Mon, 13 Aug 2018 15:57:44 +0200 Subject: [PATCH 077/812] gpio: adp5588: Fix sleep-in-atomic-context bug [ Upstream commit 6537886cdc9a637711fd6da980dbb87c2c87c9aa ] This fixes: [BUG] gpio: gpio-adp5588: A possible sleep-in-atomic-context bug in adp5588_gpio_write() [BUG] gpio: gpio-adp5588: A possible sleep-in-atomic-context bug in adp5588_gpio_direction_input() Reported-by: Jia-Ju Bai Signed-off-by: Michael Hennerich Signed-off-by: Linus Walleij Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/gpio/gpio-adp5588.c | 24 ++++++++++++++++++++---- 1 file changed, 20 insertions(+), 4 deletions(-) diff --git a/drivers/gpio/gpio-adp5588.c b/drivers/gpio/gpio-adp5588.c index 984186ee58a0..f5f7b5368da6 100644 --- a/drivers/gpio/gpio-adp5588.c +++ b/drivers/gpio/gpio-adp5588.c @@ -41,6 +41,8 @@ struct adp5588_gpio { uint8_t int_en[3]; uint8_t irq_mask[3]; uint8_t irq_stat[3]; + uint8_t int_input_en[3]; + uint8_t int_lvl_cached[3]; }; static int adp5588_gpio_read(struct i2c_client *client, u8 reg) @@ -177,12 +179,28 @@ static void adp5588_irq_bus_sync_unlock(struct irq_data *d) struct adp5588_gpio *dev = irq_data_get_irq_chip_data(d); int i; - for (i = 0; i <= ADP5588_BANK(ADP5588_MAXGPIO); i++) + for (i = 0; i <= ADP5588_BANK(ADP5588_MAXGPIO); i++) { + if (dev->int_input_en[i]) { + mutex_lock(&dev->lock); + dev->dir[i] &= ~dev->int_input_en[i]; + dev->int_input_en[i] = 0; + adp5588_gpio_write(dev->client, GPIO_DIR1 + i, + dev->dir[i]); + mutex_unlock(&dev->lock); + } + + if (dev->int_lvl_cached[i] != dev->int_lvl[i]) { + dev->int_lvl_cached[i] = dev->int_lvl[i]; + adp5588_gpio_write(dev->client, GPIO_INT_LVL1 + i, + dev->int_lvl[i]); + } + if (dev->int_en[i] ^ dev->irq_mask[i]) { dev->int_en[i] = dev->irq_mask[i]; adp5588_gpio_write(dev->client, GPIO_INT_EN1 + i, dev->int_en[i]); } + } mutex_unlock(&dev->irq_lock); } @@ -225,9 +243,7 @@ static int adp5588_irq_set_type(struct irq_data *d, unsigned int type) else return -EINVAL; - adp5588_gpio_direction_input(&dev->gpio_chip, gpio); - adp5588_gpio_write(dev->client, GPIO_INT_LVL1 + bank, - dev->int_lvl[bank]); + dev->int_input_en[bank] |= bit; return 0; } From dd23c326d63a4383dfe0668781830f3353a3a9b9 Mon Sep 17 00:00:00 2001 From: Yuan-Chi Pang Date: Wed, 29 Aug 2018 09:30:08 +0800 Subject: [PATCH 078/812] mac80211: mesh: fix HWMP sequence numbering to follow standard [ Upstream commit 1f631c3201fe5491808df143d8fcba81b3197ffd ] IEEE 802.11-2016 14.10.8.3 HWMP sequence numbering says: If it is a target mesh STA, it shall update its own HWMP SN to maximum (current HWMP SN, target HWMP SN in the PREQ element) + 1 immediately before it generates a PREP element in response to a PREQ element. Signed-off-by: Yuan-Chi Pang Signed-off-by: Johannes Berg Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- net/mac80211/mesh_hwmp.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/net/mac80211/mesh_hwmp.c b/net/mac80211/mesh_hwmp.c index e68a409fc351..33d5271a9e32 100644 --- a/net/mac80211/mesh_hwmp.c +++ b/net/mac80211/mesh_hwmp.c @@ -552,6 +552,10 @@ static void hwmp_preq_frame_process(struct ieee80211_sub_if_data *sdata, forward = false; reply = true; target_metric = 0; + + if (SN_GT(target_sn, ifmsh->sn)) + ifmsh->sn = target_sn; + if (time_after(jiffies, ifmsh->last_sn_update + net_traversal_jiffies(sdata)) || time_before(jiffies, ifmsh->last_sn_update)) { From 7445b711282eeee0c033c1d13af9f2be349bf2d6 Mon Sep 17 00:00:00 2001 From: Arunk Khandavalli Date: Thu, 30 Aug 2018 00:40:16 +0300 Subject: [PATCH 079/812] cfg80211: nl80211_update_ft_ies() to validate NL80211_ATTR_IE [ Upstream commit 4f0223bfe9c3e62d8f45a85f1ef1b18a8a263ef9 ] nl80211_update_ft_ies() tried to validate NL80211_ATTR_IE with is_valid_ie_attr() before dereferencing it, but that helper function returns true in case of NULL pointer (i.e., attribute not included). This can result to dereferencing a NULL pointer. Fix that by explicitly checking that NL80211_ATTR_IE is included. Fixes: 355199e02b83 ("cfg80211: Extend support for IEEE 802.11r Fast BSS Transition") Signed-off-by: Arunk Khandavalli Signed-off-by: Jouni Malinen Signed-off-by: Johannes Berg Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- net/wireless/nl80211.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index b07fd8b8b50c..642a78079ae1 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -10014,6 +10014,7 @@ static int nl80211_update_ft_ies(struct sk_buff *skb, struct genl_info *info) return -EOPNOTSUPP; if (!info->attrs[NL80211_ATTR_MDID] || + !info->attrs[NL80211_ATTR_IE] || !is_valid_ie_attr(info->attrs[NL80211_ATTR_IE])) return -EINVAL; From e02c60d8b79abb4c96043bcf3cb70e98461ceb42 Mon Sep 17 00:00:00 2001 From: Xiao Ni Date: Thu, 30 Aug 2018 15:57:09 +0800 Subject: [PATCH 080/812] RAID10 BUG_ON in raise_barrier when force is true and conf->barrier is 0 [ Upstream commit 1d0ffd264204eba1861865560f1f7f7a92919384 ] In raid10 reshape_request it gets max_sectors in read_balance. If the underlayer disks have bad blocks, the max_sectors is less than last. It will call goto read_more many times. It calls raise_barrier(conf, sectors_done != 0) every time. In this condition sectors_done is not 0. So the value passed to the argument force of raise_barrier is true. In raise_barrier it checks conf->barrier when force is true. If force is true and conf->barrier is 0, it panic. In this case reshape_request submits bio to under layer disks. And in the callback function of the bio it calls lower_barrier. If the bio finishes before calling raise_barrier again, it can trigger the BUG_ON. Add one pair of raise_barrier/lower_barrier to fix this bug. Signed-off-by: Xiao Ni Suggested-by: Neil Brown Signed-off-by: Shaohua Li Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/md/raid10.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index 8a731bdd268e..89111d455b71 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -4336,11 +4336,12 @@ static sector_t reshape_request(struct mddev *mddev, sector_t sector_nr, allow_barrier(conf); } + raise_barrier(conf, 0); read_more: /* Now schedule reads for blocks from sector_nr to last */ r10_bio = mempool_alloc(conf->r10buf_pool, GFP_NOIO); r10_bio->state = 0; - raise_barrier(conf, sectors_done != 0); + raise_barrier(conf, 1); atomic_set(&r10_bio->remaining, 0); r10_bio->mddev = mddev; r10_bio->sector = sector_nr; @@ -4445,6 +4446,8 @@ static sector_t reshape_request(struct mddev *mddev, sector_t sector_nr, if (sector_nr <= last) goto read_more; + lower_barrier(conf); + /* Now that we have done the whole section we can * update reshape_progress */ From 3a20876cd27dc80c3aec27e161e3cfb20ca57d74 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Fri, 31 Aug 2018 23:30:47 +0900 Subject: [PATCH 081/812] i2c: uniphier: issue STOP only for last message or I2C_M_STOP [ Upstream commit 38f5d8d8cbb2ffa2b54315118185332329ec891c ] This driver currently emits a STOP if the next message is not I2C_MD_RD. It should not do it because it disturbs the I2C_RDWR ioctl, where read/write transactions are combined without STOP between. Issue STOP only when the message is the last one _or_ flagged with I2C_M_STOP. Fixes: dd6fd4a32793 ("i2c: uniphier: add UniPhier FIFO-less I2C driver") Signed-off-by: Masahiro Yamada Signed-off-by: Wolfram Sang Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/i2c/busses/i2c-uniphier.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/drivers/i2c/busses/i2c-uniphier.c b/drivers/i2c/busses/i2c-uniphier.c index e3c3861c3325..ad5eb8bacc6d 100644 --- a/drivers/i2c/busses/i2c-uniphier.c +++ b/drivers/i2c/busses/i2c-uniphier.c @@ -247,11 +247,8 @@ static int uniphier_i2c_master_xfer(struct i2c_adapter *adap, return ret; for (msg = msgs; msg < emsg; msg++) { - /* If next message is read, skip the stop condition */ - bool stop = !(msg + 1 < emsg && msg[1].flags & I2C_M_RD); - /* but, force it if I2C_M_STOP is set */ - if (msg->flags & I2C_M_STOP) - stop = true; + /* Emit STOP if it is the last message or I2C_M_STOP is set. */ + bool stop = (msg + 1 == emsg) || (msg->flags & I2C_M_STOP); ret = uniphier_i2c_master_xfer_one(adap, msg, stop); if (ret) From 293c9b05879be53b17f80985123d93537264a875 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Fri, 31 Aug 2018 23:30:48 +0900 Subject: [PATCH 082/812] i2c: uniphier-f: issue STOP only for last message or I2C_M_STOP [ Upstream commit 4c85609b08c4761eca0a40fd7beb06bc650f252d ] This driver currently emits a STOP if the next message is not I2C_MD_RD. It should not do it because it disturbs the I2C_RDWR ioctl, where read/write transactions are combined without STOP between. Issue STOP only when the message is the last one _or_ flagged with I2C_M_STOP. Fixes: 6a62974b667f ("i2c: uniphier_f: add UniPhier FIFO-builtin I2C driver") Signed-off-by: Masahiro Yamada Signed-off-by: Wolfram Sang Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/i2c/busses/i2c-uniphier-f.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/drivers/i2c/busses/i2c-uniphier-f.c b/drivers/i2c/busses/i2c-uniphier-f.c index e8d03bcfe3e0..3f6b43fe4d5d 100644 --- a/drivers/i2c/busses/i2c-uniphier-f.c +++ b/drivers/i2c/busses/i2c-uniphier-f.c @@ -394,11 +394,8 @@ static int uniphier_fi2c_master_xfer(struct i2c_adapter *adap, return ret; for (msg = msgs; msg < emsg; msg++) { - /* If next message is read, skip the stop condition */ - bool stop = !(msg + 1 < emsg && msg[1].flags & I2C_M_RD); - /* but, force it if I2C_M_STOP is set */ - if (msg->flags & I2C_M_STOP) - stop = true; + /* Emit STOP if it is the last message or I2C_M_STOP is set. */ + bool stop = (msg + 1 == emsg) || (msg->flags & I2C_M_STOP); ret = uniphier_fi2c_master_xfer_one(adap, msg, stop); if (ret) From ca8aa9f49b569ac6647dd5dad3ba4072e2e70c1e Mon Sep 17 00:00:00 2001 From: Jia-Ju Bai Date: Sat, 1 Sep 2018 20:11:05 +0800 Subject: [PATCH 083/812] net: cadence: Fix a sleep-in-atomic-context bug in macb_halt_tx() [ Upstream commit 16fe10cf92783ed9ceb182d6ea2b8adf5e8ec1b8 ] The kernel module may sleep with holding a spinlock. The function call paths (from bottom to top) in Linux-4.16 are: [FUNC] usleep_range drivers/net/ethernet/cadence/macb_main.c, 648: usleep_range in macb_halt_tx drivers/net/ethernet/cadence/macb_main.c, 730: macb_halt_tx in macb_tx_error_task drivers/net/ethernet/cadence/macb_main.c, 721: _raw_spin_lock_irqsave in macb_tx_error_task To fix this bug, usleep_range() is replaced with udelay(). This bug is found by my static analysis tool DSAC. Signed-off-by: Jia-Ju Bai Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/cadence/macb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/cadence/macb.c b/drivers/net/ethernet/cadence/macb.c index 8d54e7b41bbf..8c698d464716 100644 --- a/drivers/net/ethernet/cadence/macb.c +++ b/drivers/net/ethernet/cadence/macb.c @@ -523,7 +523,7 @@ static int macb_halt_tx(struct macb *bp) if (!(status & MACB_BIT(TGO))) return 0; - usleep_range(10, 250); + udelay(250); } while (time_before(halt_time, timeout)); return -ETIMEDOUT; From 2b2ccb29f307c382ce4c19c3cf5733be2abd2dba Mon Sep 17 00:00:00 2001 From: Jon Kuhn Date: Mon, 9 Jul 2018 14:33:14 +0000 Subject: [PATCH 084/812] fs/cifs: don't translate SFM_SLASH (U+F026) to backslash [ Upstream commit c15e3f19a6d5c89b1209dc94b40e568177cb0921 ] When a Mac client saves an item containing a backslash to a file server the backslash is represented in the CIFS/SMB protocol as as U+F026. Before this change, listing a directory containing an item with a backslash in its name will return that item with the backslash represented with a true backslash character (U+005C) because convert_sfm_character mapped U+F026 to U+005C when interpretting the CIFS/SMB protocol response. However, attempting to open or stat the path using a true backslash will result in an error because convert_to_sfm_char does not map U+005C back to U+F026 causing the CIFS/SMB request to be made with the backslash represented as U+005C. This change simply prevents the U+F026 to U+005C conversion from happenning. This is analogous to how the code does not do any translation of UNI_SLASH (U+F000). Signed-off-by: Jon Kuhn Signed-off-by: Steve French Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- fs/cifs/cifs_unicode.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/fs/cifs/cifs_unicode.c b/fs/cifs/cifs_unicode.c index a0b3e7d1be48..211ac472cb9d 100644 --- a/fs/cifs/cifs_unicode.c +++ b/fs/cifs/cifs_unicode.c @@ -101,9 +101,6 @@ convert_sfm_char(const __u16 src_char, char *target) case SFM_LESSTHAN: *target = '<'; break; - case SFM_SLASH: - *target = '\\'; - break; case SFM_SPACE: *target = ' '; break; From 2c81860b8ebd47ab1a4b3f719874937acf1a2e24 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 31 Aug 2018 11:10:55 +0300 Subject: [PATCH 085/812] cfg80211: fix a type issue in ieee80211_chandef_to_operating_class() [ Upstream commit 8442938c3a2177ba16043b3a935f2c78266ad399 ] The "chandef->center_freq1" variable is a u32 but "freq" is a u16 so we are truncating away the high bits. I noticed this bug because in commit 9cf0a0b4b64a ("cfg80211: Add support for 60GHz band channels 5 and 6") we made "freq <= 56160 + 2160 * 6" a valid requency when before it was only "freq <= 56160 + 2160 * 4" that was valid. It introduces a static checker warning: net/wireless/util.c:1571 ieee80211_chandef_to_operating_class() warn: always true condition '(freq <= 56160 + 2160 * 6) => (0-u16max <= 69120)' But really we probably shouldn't have been truncating the high bits away to begin with. Signed-off-by: Dan Carpenter Signed-off-by: Johannes Berg Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- net/wireless/util.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/wireless/util.c b/net/wireless/util.c index baf7218cec15..1d239564baa3 100644 --- a/net/wireless/util.c +++ b/net/wireless/util.c @@ -1360,7 +1360,7 @@ bool ieee80211_chandef_to_operating_class(struct cfg80211_chan_def *chandef, u8 *op_class) { u8 vht_opclass; - u16 freq = chandef->center_freq1; + u32 freq = chandef->center_freq1; if (freq >= 2412 && freq <= 2472) { if (chandef->width > NL80211_CHAN_WIDTH_40) From 1d9e59cbd3c22f8f255b539c7ac74cb2d6ffee64 Mon Sep 17 00:00:00 2001 From: Emmanuel Grumbach Date: Fri, 31 Aug 2018 11:31:06 +0300 Subject: [PATCH 086/812] mac80211: fix a race between restart and CSA flows [ Upstream commit f3ffb6c3a28963657eb8b02a795d75f2ebbd5ef4 ] We hit a problem with iwlwifi that was caused by a bug in mac80211. A bug in iwlwifi caused the firwmare to crash in certain cases in channel switch. Because of that bug, drv_pre_channel_switch would fail and trigger the restart flow. Now we had the hw restart worker which runs on the system's workqueue and the csa_connection_drop_work worker that runs on mac80211's workqueue that can run together. This is obviously problematic since the restart work wants to reconfigure the connection, while the csa_connection_drop_work worker does the exact opposite: it tries to disconnect. Fix this by cancelling the csa_connection_drop_work worker in the restart worker. Note that this can sound racy: we could have: driver iface_work CSA_work restart_work +++++++++++++++++++++++++++++++++++++++++++++ | <--drv_cs ---| -CS FAILED--> | | | cancel_work(CSA) schedule | CSA work | | | Race between those 2 But this is not possible because we flush the workqueue in the restart worker before we cancel the CSA worker. That would be bullet proof if we could guarantee that we schedule the CSA worker only from the iface_work which runs on the workqueue (and not on the system's workqueue), but unfortunately we do have an instance in which we schedule the CSA work outside the context of the workqueue (ieee80211_chswitch_done). Note also that we should probably cancel other workers like beacon_connection_loss_work and possibly others for different types of interfaces, at the very least, IBSS should suffer from the exact same problem, but for now, do the minimum to fix the actual bug that was actually experienced and reproduced. Signed-off-by: Emmanuel Grumbach Signed-off-by: Luca Coelho Signed-off-by: Johannes Berg Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- net/mac80211/main.c | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/net/mac80211/main.c b/net/mac80211/main.c index d784d29a4fd6..15d23aeea634 100644 --- a/net/mac80211/main.c +++ b/net/mac80211/main.c @@ -253,8 +253,27 @@ static void ieee80211_restart_work(struct work_struct *work) "%s called with hardware scan in progress\n", __func__); rtnl_lock(); - list_for_each_entry(sdata, &local->interfaces, list) + list_for_each_entry(sdata, &local->interfaces, list) { + /* + * XXX: there may be more work for other vif types and even + * for station mode: a good thing would be to run most of + * the iface type's dependent _stop (ieee80211_mg_stop, + * ieee80211_ibss_stop) etc... + * For now, fix only the specific bug that was seen: race + * between csa_connection_drop_work and us. + */ + if (sdata->vif.type == NL80211_IFTYPE_STATION) { + /* + * This worker is scheduled from the iface worker that + * runs on mac80211's workqueue, so we can't be + * scheduling this worker after the cancel right here. + * The exception is ieee80211_chswitch_done. + * Then we can have a race... + */ + cancel_work_sync(&sdata->u.mgd.csa_connection_drop_work); + } flush_delayed_work(&sdata->dec_tailroom_needed_wk); + } ieee80211_scan_cancel(local); ieee80211_reconfig(local); rtnl_unlock(); From 8ac8c00f263cb02193233937715d53b416472081 Mon Sep 17 00:00:00 2001 From: Ilan Peer Date: Fri, 31 Aug 2018 11:31:10 +0300 Subject: [PATCH 087/812] mac80211: Fix station bandwidth setting after channel switch [ Upstream commit 0007e94355fdb71a1cf5dba0754155cba08f0666 ] When performing a channel switch flow for a managed interface, the flow did not update the bandwidth of the AP station and the rate scale algorithm. In case of a channel width downgrade, this would result with the rate scale algorithm using a bandwidth that does not match the interface channel configuration. Fix this by updating the AP station bandwidth and rate scaling algorithm before the actual channel change in case of a bandwidth downgrade, or after the actual channel change in case of a bandwidth upgrade. Signed-off-by: Ilan Peer Signed-off-by: Luca Coelho Signed-off-by: Johannes Berg Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- net/mac80211/mlme.c | 53 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 53 insertions(+) diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 005cd8796505..a5e11280f405 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -1021,6 +1021,10 @@ static void ieee80211_chswitch_work(struct work_struct *work) */ if (sdata->reserved_chanctx) { + struct ieee80211_supported_band *sband = NULL; + struct sta_info *mgd_sta = NULL; + enum ieee80211_sta_rx_bandwidth bw = IEEE80211_STA_RX_BW_20; + /* * with multi-vif csa driver may call ieee80211_csa_finish() * many times while waiting for other interfaces to use their @@ -1029,6 +1033,48 @@ static void ieee80211_chswitch_work(struct work_struct *work) if (sdata->reserved_ready) goto out; + if (sdata->vif.bss_conf.chandef.width != + sdata->csa_chandef.width) { + /* + * For managed interface, we need to also update the AP + * station bandwidth and align the rate scale algorithm + * on the bandwidth change. Here we only consider the + * bandwidth of the new channel definition (as channel + * switch flow does not have the full HT/VHT/HE + * information), assuming that if additional changes are + * required they would be done as part of the processing + * of the next beacon from the AP. + */ + switch (sdata->csa_chandef.width) { + case NL80211_CHAN_WIDTH_20_NOHT: + case NL80211_CHAN_WIDTH_20: + default: + bw = IEEE80211_STA_RX_BW_20; + break; + case NL80211_CHAN_WIDTH_40: + bw = IEEE80211_STA_RX_BW_40; + break; + case NL80211_CHAN_WIDTH_80: + bw = IEEE80211_STA_RX_BW_80; + break; + case NL80211_CHAN_WIDTH_80P80: + case NL80211_CHAN_WIDTH_160: + bw = IEEE80211_STA_RX_BW_160; + break; + } + + mgd_sta = sta_info_get(sdata, ifmgd->bssid); + sband = + local->hw.wiphy->bands[sdata->csa_chandef.chan->band]; + } + + if (sdata->vif.bss_conf.chandef.width > + sdata->csa_chandef.width) { + mgd_sta->sta.bandwidth = bw; + rate_control_rate_update(local, sband, mgd_sta, + IEEE80211_RC_BW_CHANGED); + } + ret = ieee80211_vif_use_reserved_context(sdata); if (ret) { sdata_info(sdata, @@ -1039,6 +1085,13 @@ static void ieee80211_chswitch_work(struct work_struct *work) goto out; } + if (sdata->vif.bss_conf.chandef.width < + sdata->csa_chandef.width) { + mgd_sta->sta.bandwidth = bw; + rate_control_rate_update(local, sband, mgd_sta, + IEEE80211_RC_BW_CHANGED); + } + goto out; } From e712dfc2284196788194cb770ee86c642ee37e10 Mon Sep 17 00:00:00 2001 From: Emmanuel Grumbach Date: Fri, 31 Aug 2018 11:31:13 +0300 Subject: [PATCH 088/812] mac80211: shorten the IBSS debug messages [ Upstream commit c6e57b3896fc76299913b8cfd82d853bee8a2c84 ] When tracing is enabled, all the debug messages are recorded and must not exceed MAX_MSG_LEN (100) columns. Longer debug messages grant the user with: WARNING: CPU: 3 PID: 32642 at /tmp/wifi-core-20180806094828/src/iwlwifi-stack-dev/net/mac80211/./trace_msg.h:32 trace_event_raw_event_mac80211_msg_event+0xab/0xc0 [mac80211] Workqueue: phy1 ieee80211_iface_work [mac80211] RIP: 0010:trace_event_raw_event_mac80211_msg_event+0xab/0xc0 [mac80211] Call Trace: __sdata_dbg+0xbd/0x120 [mac80211] ieee80211_ibss_rx_queued_mgmt+0x15f/0x510 [mac80211] ieee80211_iface_work+0x21d/0x320 [mac80211] Signed-off-by: Emmanuel Grumbach Signed-off-by: Luca Coelho Signed-off-by: Johannes Berg Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- net/mac80211/ibss.c | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/net/mac80211/ibss.c b/net/mac80211/ibss.c index 24ba31601fc9..f2af19673b26 100644 --- a/net/mac80211/ibss.c +++ b/net/mac80211/ibss.c @@ -948,8 +948,8 @@ static void ieee80211_rx_mgmt_deauth_ibss(struct ieee80211_sub_if_data *sdata, if (len < IEEE80211_DEAUTH_FRAME_LEN) return; - ibss_dbg(sdata, "RX DeAuth SA=%pM DA=%pM BSSID=%pM (reason: %d)\n", - mgmt->sa, mgmt->da, mgmt->bssid, reason); + ibss_dbg(sdata, "RX DeAuth SA=%pM DA=%pM\n", mgmt->sa, mgmt->da); + ibss_dbg(sdata, "\tBSSID=%pM (reason: %d)\n", mgmt->bssid, reason); sta_info_destroy_addr(sdata, mgmt->sa); } @@ -967,9 +967,9 @@ static void ieee80211_rx_mgmt_auth_ibss(struct ieee80211_sub_if_data *sdata, auth_alg = le16_to_cpu(mgmt->u.auth.auth_alg); auth_transaction = le16_to_cpu(mgmt->u.auth.auth_transaction); - ibss_dbg(sdata, - "RX Auth SA=%pM DA=%pM BSSID=%pM (auth_transaction=%d)\n", - mgmt->sa, mgmt->da, mgmt->bssid, auth_transaction); + ibss_dbg(sdata, "RX Auth SA=%pM DA=%pM\n", mgmt->sa, mgmt->da); + ibss_dbg(sdata, "\tBSSID=%pM (auth_transaction=%d)\n", + mgmt->bssid, auth_transaction); if (auth_alg != WLAN_AUTH_OPEN || auth_transaction != 1) return; @@ -1174,10 +1174,10 @@ static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata, rx_timestamp = drv_get_tsf(local, sdata); } - ibss_dbg(sdata, - "RX beacon SA=%pM BSSID=%pM TSF=0x%llx BCN=0x%llx diff=%lld @%lu\n", + ibss_dbg(sdata, "RX beacon SA=%pM BSSID=%pM TSF=0x%llx\n", mgmt->sa, mgmt->bssid, - (unsigned long long)rx_timestamp, + (unsigned long long)rx_timestamp); + ibss_dbg(sdata, "\tBCN=0x%llx diff=%lld @%lu\n", (unsigned long long)beacon_timestamp, (unsigned long long)(rx_timestamp - beacon_timestamp), jiffies); @@ -1536,9 +1536,9 @@ static void ieee80211_rx_mgmt_probe_req(struct ieee80211_sub_if_data *sdata, tx_last_beacon = drv_tx_last_beacon(local); - ibss_dbg(sdata, - "RX ProbeReq SA=%pM DA=%pM BSSID=%pM (tx_last_beacon=%d)\n", - mgmt->sa, mgmt->da, mgmt->bssid, tx_last_beacon); + ibss_dbg(sdata, "RX ProbeReq SA=%pM DA=%pM\n", mgmt->sa, mgmt->da); + ibss_dbg(sdata, "\tBSSID=%pM (tx_last_beacon=%d)\n", + mgmt->bssid, tx_last_beacon); if (!tx_last_beacon && is_multicast_ether_addr(mgmt->da)) return; From 0a3026d30f48edeba214bdd53c03ea4250f99d16 Mon Sep 17 00:00:00 2001 From: Naoya Horiguchi Date: Tue, 4 Sep 2018 15:45:48 -0700 Subject: [PATCH 089/812] tools/vm/slabinfo.c: fix sign-compare warning [ Upstream commit 904506562e0856f2535d876407d087c9459d345b ] Currently we get the following compiler warning: slabinfo.c:854:22: warning: comparison between signed and unsigned integer expressions [-Wsign-compare] if (s->object_size < min_objsize) ^ due to the mismatch of signed/unsigned comparison. ->object_size and ->slab_size are never expected to be negative, so let's define them as unsigned int. [n-horiguchi@ah.jp.nec.com: convert everything - none of these can be negative] Link: http://lkml.kernel.org/r/20180826234947.GA9787@hori1.linux.bs1.fc.nec.co.jp Link: http://lkml.kernel.org/r/1535103134-20239-1-git-send-email-n-horiguchi@ah.jp.nec.com Signed-off-by: Naoya Horiguchi Reviewed-by: Andrew Morton Cc: Matthew Wilcox Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- tools/vm/slabinfo.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/vm/slabinfo.c b/tools/vm/slabinfo.c index 499b8819d4c6..5173a191cd03 100644 --- a/tools/vm/slabinfo.c +++ b/tools/vm/slabinfo.c @@ -29,8 +29,8 @@ struct slabinfo { int alias; int refs; int aliases, align, cache_dma, cpu_slabs, destroy_by_rcu; - int hwcache_align, object_size, objs_per_slab; - int sanity_checks, slab_size, store_user, trace; + unsigned int hwcache_align, object_size, objs_per_slab; + unsigned int sanity_checks, slab_size, store_user, trace; int order, poison, reclaim_account, red_zone; unsigned long partial, objects, slabs, objects_partial, objects_total; unsigned long alloc_fastpath, alloc_slowpath; From 38ecf178f64a0e866f4bd479f0aca988c2ac9958 Mon Sep 17 00:00:00 2001 From: Naoya Horiguchi Date: Tue, 4 Sep 2018 15:45:51 -0700 Subject: [PATCH 090/812] tools/vm/page-types.c: fix "defined but not used" warning [ Upstream commit 7ab660f8baecfe26c1c267fa8e64d2073feae2bb ] debugfs_known_mountpoints[] is not used any more, so let's remove it. Link: http://lkml.kernel.org/r/1535102651-19418-1-git-send-email-n-horiguchi@ah.jp.nec.com Signed-off-by: Naoya Horiguchi Reviewed-by: Andrew Morton Cc: Matthew Wilcox Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- tools/vm/page-types.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/tools/vm/page-types.c b/tools/vm/page-types.c index 5a6016224bb9..c7fcc84fc0c0 100644 --- a/tools/vm/page-types.c +++ b/tools/vm/page-types.c @@ -150,12 +150,6 @@ static const char * const page_flag_names[] = { }; -static const char * const debugfs_known_mountpoints[] = { - "/sys/kernel/debug", - "/debug", - 0, -}; - /* * data structures */ From 82660490d5e81b95bb74e7cc0811f3ea6559d9ad Mon Sep 17 00:00:00 2001 From: Daniel Black Date: Fri, 5 Oct 2018 15:52:19 -0700 Subject: [PATCH 091/812] mm: madvise(MADV_DODUMP): allow hugetlbfs pages commit d41aa5252394c065d1f04d1ceea885b70d00c9c6 upstream. Reproducer, assuming 2M of hugetlbfs available: Hugetlbfs mounted, size=2M and option user=testuser # mount | grep ^hugetlbfs hugetlbfs on /dev/hugepages type hugetlbfs (rw,pagesize=2M,user=dan) # sysctl vm.nr_hugepages=1 vm.nr_hugepages = 1 # grep Huge /proc/meminfo AnonHugePages: 0 kB ShmemHugePages: 0 kB HugePages_Total: 1 HugePages_Free: 1 HugePages_Rsvd: 0 HugePages_Surp: 0 Hugepagesize: 2048 kB Hugetlb: 2048 kB Code: #include #include #define SIZE 2*1024*1024 int main() { void *ptr; ptr = mmap(NULL, SIZE, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_HUGETLB | MAP_ANONYMOUS, -1, 0); madvise(ptr, SIZE, MADV_DONTDUMP); madvise(ptr, SIZE, MADV_DODUMP); } Compile and strace: mmap(NULL, 2097152, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS|MAP_HUGETLB, -1, 0) = 0x7ff7c9200000 madvise(0x7ff7c9200000, 2097152, MADV_DONTDUMP) = 0 madvise(0x7ff7c9200000, 2097152, MADV_DODUMP) = -1 EINVAL (Invalid argument) hugetlbfs pages have VM_DONTEXPAND in the VmFlags driver pages based on author testing with analysis from Florian Weimer[1]. The inclusion of VM_DONTEXPAND into the VM_SPECIAL defination was a consequence of the large useage of VM_DONTEXPAND in device drivers. A consequence of [2] is that VM_DONTEXPAND marked pages are unable to be marked DODUMP. A user could quite legitimately madvise(MADV_DONTDUMP) their hugetlbfs memory for a while and later request that madvise(MADV_DODUMP) on the same memory. We correct this omission by allowing madvice(MADV_DODUMP) on hugetlbfs pages. [1] https://stackoverflow.com/questions/52548260/madvisedodump-on-the-same-ptr-size-as-a-successful-madvisedontdump-fails-wit [2] commit 0103bd16fb90 ("mm: prepare VM_DONTDUMP for using in drivers") Link: http://lkml.kernel.org/r/20180930054629.29150-1-daniel@linux.ibm.com Link: https://lists.launchpad.net/maria-discuss/msg05245.html Fixes: 0103bd16fb90 ("mm: prepare VM_DONTDUMP for using in drivers") Reported-by: Kenneth Penza Signed-off-by: Daniel Black Reviewed-by: Mike Kravetz Cc: Konstantin Khlebnikov Signed-off-by: Andrew Morton Signed-off-by: Greg Kroah-Hartman Signed-off-by: Greg Kroah-Hartman --- mm/madvise.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/madvise.c b/mm/madvise.c index 2a0f9a4504f1..f548c66154ee 100644 --- a/mm/madvise.c +++ b/mm/madvise.c @@ -76,7 +76,7 @@ static long madvise_behavior(struct vm_area_struct *vma, new_flags |= VM_DONTDUMP; break; case MADV_DODUMP: - if (new_flags & VM_SPECIAL) { + if (!is_vm_hugetlb_page(vma) && new_flags & VM_SPECIAL) { error = -EINVAL; goto out; } From 093da71c6ead5dd84b590262e64e2ff381349acf Mon Sep 17 00:00:00 2001 From: Anton Vasilyev Date: Tue, 7 Aug 2018 14:44:48 +0300 Subject: [PATCH 092/812] usb: gadget: fotg210-udc: Fix memory leak of fotg210->ep[i] [ Upstream commit c37bd52836296ecc9a0fc8060b819089aebdbcde ] There is no deallocation of fotg210->ep[i] elements, allocated at fotg210_udc_probe. The patch adds deallocation of fotg210->ep array elements and simplifies error path of fotg210_udc_probe(). Found by Linux Driver Verification project (linuxtesting.org). Signed-off-by: Anton Vasilyev Signed-off-by: Felipe Balbi Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/udc/fotg210-udc.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/drivers/usb/gadget/udc/fotg210-udc.c b/drivers/usb/gadget/udc/fotg210-udc.c index 6ba122cc7490..95df2b3bb6a1 100644 --- a/drivers/usb/gadget/udc/fotg210-udc.c +++ b/drivers/usb/gadget/udc/fotg210-udc.c @@ -1066,12 +1066,15 @@ static struct usb_gadget_ops fotg210_gadget_ops = { static int fotg210_udc_remove(struct platform_device *pdev) { struct fotg210_udc *fotg210 = platform_get_drvdata(pdev); + int i; usb_del_gadget_udc(&fotg210->gadget); iounmap(fotg210->reg); free_irq(platform_get_irq(pdev, 0), fotg210); fotg210_ep_free_request(&fotg210->ep[0]->ep, fotg210->ep0_req); + for (i = 0; i < FOTG210_MAX_NUM_EP; i++) + kfree(fotg210->ep[i]); kfree(fotg210); return 0; @@ -1102,7 +1105,7 @@ static int fotg210_udc_probe(struct platform_device *pdev) /* initialize udc */ fotg210 = kzalloc(sizeof(struct fotg210_udc), GFP_KERNEL); if (fotg210 == NULL) - goto err_alloc; + goto err; for (i = 0; i < FOTG210_MAX_NUM_EP; i++) { _ep[i] = kzalloc(sizeof(struct fotg210_ep), GFP_KERNEL); @@ -1114,7 +1117,7 @@ static int fotg210_udc_probe(struct platform_device *pdev) fotg210->reg = ioremap(res->start, resource_size(res)); if (fotg210->reg == NULL) { pr_err("ioremap error.\n"); - goto err_map; + goto err_alloc; } spin_lock_init(&fotg210->lock); @@ -1162,7 +1165,7 @@ static int fotg210_udc_probe(struct platform_device *pdev) fotg210->ep0_req = fotg210_ep_alloc_request(&fotg210->ep[0]->ep, GFP_KERNEL); if (fotg210->ep0_req == NULL) - goto err_req; + goto err_map; fotg210_init(fotg210); @@ -1190,12 +1193,14 @@ static int fotg210_udc_probe(struct platform_device *pdev) fotg210_ep_free_request(&fotg210->ep[0]->ep, fotg210->ep0_req); err_map: - if (fotg210->reg) - iounmap(fotg210->reg); + iounmap(fotg210->reg); err_alloc: + for (i = 0; i < FOTG210_MAX_NUM_EP; i++) + kfree(fotg210->ep[i]); kfree(fotg210); +err: return ret; } From 0482fe19f6c6a9c0e1b18ad506c5f71408d6ae35 Mon Sep 17 00:00:00 2001 From: Sandipan Das Date: Tue, 28 Aug 2018 14:38:48 +0530 Subject: [PATCH 093/812] perf probe powerpc: Ignore SyS symbols irrespective of endianness [ Upstream commit fa694160cca6dbba17c57dc7efec5f93feaf8795 ] This makes sure that the SyS symbols are ignored for any powerpc system, not just the big endian ones. Reported-by: Naveen N. Rao Signed-off-by: Sandipan Das Reviewed-by: Kamalesh Babulal Acked-by: Naveen N. Rao Cc: Jiri Olsa Cc: Ravi Bangoria Fixes: fb6d59423115 ("perf probe ppc: Use the right prefix when ignoring SyS symbols on ppc") Link: http://lkml.kernel.org/r/20180828090848.1914-1-sandipan@linux.ibm.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- tools/perf/arch/powerpc/util/sym-handling.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tools/perf/arch/powerpc/util/sym-handling.c b/tools/perf/arch/powerpc/util/sym-handling.c index bbc1a50768dd..873f19f1a771 100644 --- a/tools/perf/arch/powerpc/util/sym-handling.c +++ b/tools/perf/arch/powerpc/util/sym-handling.c @@ -27,15 +27,16 @@ void arch__elf_sym_adjust(GElf_Sym *sym) #endif #endif -#if !defined(_CALL_ELF) || _CALL_ELF != 2 int arch__choose_best_symbol(struct symbol *syma, struct symbol *symb __maybe_unused) { char *sym = syma->name; +#if !defined(_CALL_ELF) || _CALL_ELF != 2 /* Skip over any initial dot */ if (*sym == '.') sym++; +#endif /* Avoid "SyS" kernel syscall aliases */ if (strlen(sym) >= 3 && !strncmp(sym, "SyS", 3)) @@ -46,6 +47,7 @@ int arch__choose_best_symbol(struct symbol *syma, return SYMBOL_A; } +#if !defined(_CALL_ELF) || _CALL_ELF != 2 /* Allow matching against dot variants */ int arch__compare_symbol_names(const char *namea, const char *nameb) { From 378ee2546842993fe27a2678f82abac683b5e795 Mon Sep 17 00:00:00 2001 From: Jann Horn Date: Mon, 3 Sep 2018 18:54:14 +0200 Subject: [PATCH 094/812] RDMA/ucma: check fd type in ucma_migrate_id() [ Upstream commit 0d23ba6034b9cf48b8918404367506da3e4b3ee5 ] The current code grabs the private_data of whatever file descriptor userspace has supplied and implicitly casts it to a `struct ucma_file *`, potentially causing a type confusion. This is probably fine in practice because the pointer is only used for comparisons, it is never actually dereferenced; and even in the comparisons, it is unlikely that a file from another filesystem would have a ->private_data pointer that happens to also be valid in this context. But ->private_data is not always guaranteed to be a valid pointer to an object owned by the file's filesystem; for example, some filesystems just cram numbers in there. Check the type of the supplied file descriptor to be safe, analogous to how other places in the kernel do it. Fixes: 88314e4dda1e ("RDMA/cma: add support for rdma_migrate_id()") Signed-off-by: Jann Horn Signed-off-by: Jason Gunthorpe Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/core/ucma.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/infiniband/core/ucma.c b/drivers/infiniband/core/ucma.c index 55aa8d3d752f..9712a63957e1 100644 --- a/drivers/infiniband/core/ucma.c +++ b/drivers/infiniband/core/ucma.c @@ -123,6 +123,8 @@ static DEFINE_MUTEX(mut); static DEFINE_IDR(ctx_idr); static DEFINE_IDR(multicast_idr); +static const struct file_operations ucma_fops; + static inline struct ucma_context *_ucma_find_context(int id, struct ucma_file *file) { @@ -1535,6 +1537,10 @@ static ssize_t ucma_migrate_id(struct ucma_file *new_file, f = fdget(cmd.fd); if (!f.file) return -ENOENT; + if (f.file->f_op != &ucma_fops) { + ret = -EINVAL; + goto file_put; + } /* Validate current fd and prevent destruction of id. */ ctx = ucma_get_ctx(f.file->private_data, cmd.id); From 6b63d77fbe177f3845bce24da34e68ca32672f86 Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Wed, 15 Aug 2018 21:45:37 +0100 Subject: [PATCH 095/812] USB: yurex: Check for truncation in yurex_read() [ Upstream commit 14427b86837a4baf1c121934c6599bdb67dfa9fc ] snprintf() always returns the full length of the string it could have printed, even if it was truncated because the buffer was too small. So in case the counter value is truncated, we will over-read from in_buffer and over-write to the caller's buffer. I don't think it's actually possible for this to happen, but in case truncation occurs, WARN and return -EIO. Signed-off-by: Ben Hutchings Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/usb/misc/yurex.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/usb/misc/yurex.c b/drivers/usb/misc/yurex.c index e8e8702d5adf..5594a4a4a83f 100644 --- a/drivers/usb/misc/yurex.c +++ b/drivers/usb/misc/yurex.c @@ -431,6 +431,9 @@ static ssize_t yurex_read(struct file *file, char __user *buffer, size_t count, spin_unlock_irqrestore(&dev->lock, flags); mutex_unlock(&dev->io_mutex); + if (WARN_ON_ONCE(len >= sizeof(in_buffer))) + return -EIO; + return simple_read_from_buffer(buffer, count, ppos, in_buffer, len); } From ec5947c8aa7ae9a821f973552a1b48580ae30549 Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Tue, 4 Sep 2018 15:56:57 +1000 Subject: [PATCH 096/812] drm/nouveau/TBDdevinit: don't fail when PMU/PRE_OS is missing from VBIOS [ Upstream commit 0a6986c6595e9afd20ff7280dab36431c1e467f8 ] This Falcon application doesn't appear to be present on some newer systems, so let's not fail init if we can't find it. TBD: is there a way to determine whether it *should* be there? Signed-off-by: Ben Skeggs Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/nouveau/nvkm/subdev/devinit/gm204.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/devinit/gm204.c b/drivers/gpu/drm/nouveau/nvkm/subdev/devinit/gm204.c index 2b9c3f11b7a8..ba42ed86148a 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/devinit/gm204.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/devinit/gm204.c @@ -161,7 +161,8 @@ gm204_devinit_post(struct nvkm_devinit *base, bool post) } /* load and execute some other ucode image (bios therm?) */ - return pmu_load(init, 0x01, post, NULL, NULL); + pmu_load(init, 0x01, post, NULL, NULL); + return 0; } static const struct nvkm_devinit_func From 8c47defad8510820d609a66d1c8d3cac64913ac5 Mon Sep 17 00:00:00 2001 From: Stephen Rothwell Date: Mon, 3 Sep 2018 13:15:58 +1000 Subject: [PATCH 097/812] fs/cifs: suppress a string overflow warning MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit bcfb84a996f6fa90b5e6e2954b2accb7a4711097 ] A powerpc build of cifs with gcc v8.2.0 produces this warning: fs/cifs/cifssmb.c: In function ‘CIFSSMBNegotiate’: fs/cifs/cifssmb.c:605:3: warning: ‘strncpy’ writing 16 bytes into a region of size 1 overflows the destination [-Wstringop-overflow=] strncpy(pSMB->DialectsArray+count, protocols[i].name, 16); ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Since we are already doing a strlen() on the source, change the strncpy to a memcpy(). Signed-off-by: Stephen Rothwell Signed-off-by: Steve French Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- fs/cifs/cifssmb.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c index 63aea21e6298..b9b8f19dce0e 100644 --- a/fs/cifs/cifssmb.c +++ b/fs/cifs/cifssmb.c @@ -577,10 +577,15 @@ CIFSSMBNegotiate(const unsigned int xid, struct cifs_ses *ses) } count = 0; + /* + * We know that all the name entries in the protocols array + * are short (< 16 bytes anyway) and are NUL terminated. + */ for (i = 0; i < CIFS_NUM_PROT; i++) { - strncpy(pSMB->DialectsArray+count, protocols[i].name, 16); - count += strlen(protocols[i].name) + 1; - /* null at end of source and target buffers anyway */ + size_t len = strlen(protocols[i].name) + 1; + + memcpy(pSMB->DialectsArray+count, protocols[i].name, len); + count += len; } inc_rfc1001_len(pSMB, count); pSMB->ByteCount = cpu_to_le16(count); From a438d505966601c324ebe3c231de9ed9d5ef8642 Mon Sep 17 00:00:00 2001 From: Joe Thornber Date: Mon, 10 Sep 2018 16:50:09 +0100 Subject: [PATCH 098/812] dm thin metadata: try to avoid ever aborting transactions [ Upstream commit 3ab91828166895600efd9cdc3a0eb32001f7204a ] Committing a transaction can consume some metadata of it's own, we now reserve a small amount of metadata to cover this. Free metadata reported by the kernel will not include this reserve. If any of the reserve has been used after a commit we enter a new internal state PM_OUT_OF_METADATA_SPACE. This is reported as PM_READ_ONLY, so no userland changes are needed. If the metadata device is resized the pool will move back to PM_WRITE. These changes mean we never need to abort and rollback a transaction due to running out of metadata space. This is particularly important because there have been a handful of reports of data corruption against DM thin-provisioning that can all be attributed to the thin-pool having ran out of metadata space. Signed-off-by: Joe Thornber Signed-off-by: Mike Snitzer Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/md/dm-thin-metadata.c | 36 ++++++++++++++++- drivers/md/dm-thin.c | 73 +++++++++++++++++++++++++++++++---- 2 files changed, 100 insertions(+), 9 deletions(-) diff --git a/drivers/md/dm-thin-metadata.c b/drivers/md/dm-thin-metadata.c index e339f4288e8f..14ab86424c6a 100644 --- a/drivers/md/dm-thin-metadata.c +++ b/drivers/md/dm-thin-metadata.c @@ -189,6 +189,12 @@ struct dm_pool_metadata { unsigned long flags; sector_t data_block_size; + /* + * We reserve a section of the metadata for commit overhead. + * All reported space does *not* include this. + */ + dm_block_t metadata_reserve; + /* * Set if a transaction has to be aborted but the attempt to roll back * to the previous (good) transaction failed. The only pool metadata @@ -827,6 +833,22 @@ static int __commit_transaction(struct dm_pool_metadata *pmd) return dm_tm_commit(pmd->tm, sblock); } +static void __set_metadata_reserve(struct dm_pool_metadata *pmd) +{ + int r; + dm_block_t total; + dm_block_t max_blocks = 4096; /* 16M */ + + r = dm_sm_get_nr_blocks(pmd->metadata_sm, &total); + if (r) { + DMERR("could not get size of metadata device"); + pmd->metadata_reserve = max_blocks; + } else { + sector_div(total, 10); + pmd->metadata_reserve = min(max_blocks, total); + } +} + struct dm_pool_metadata *dm_pool_metadata_open(struct block_device *bdev, sector_t data_block_size, bool format_device) @@ -860,6 +882,8 @@ struct dm_pool_metadata *dm_pool_metadata_open(struct block_device *bdev, return ERR_PTR(r); } + __set_metadata_reserve(pmd); + return pmd; } @@ -1763,6 +1787,13 @@ int dm_pool_get_free_metadata_block_count(struct dm_pool_metadata *pmd, down_read(&pmd->root_lock); if (!pmd->fail_io) r = dm_sm_get_nr_free(pmd->metadata_sm, result); + + if (!r) { + if (*result < pmd->metadata_reserve) + *result = 0; + else + *result -= pmd->metadata_reserve; + } up_read(&pmd->root_lock); return r; @@ -1875,8 +1906,11 @@ int dm_pool_resize_metadata_dev(struct dm_pool_metadata *pmd, dm_block_t new_cou int r = -EINVAL; down_write(&pmd->root_lock); - if (!pmd->fail_io) + if (!pmd->fail_io) { r = __resize_space_map(pmd->metadata_sm, new_count); + if (!r) + __set_metadata_reserve(pmd); + } up_write(&pmd->root_lock); return r; diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c index 315767e8ae4d..bc4e6825ff62 100644 --- a/drivers/md/dm-thin.c +++ b/drivers/md/dm-thin.c @@ -200,7 +200,13 @@ struct dm_thin_new_mapping; enum pool_mode { PM_WRITE, /* metadata may be changed */ PM_OUT_OF_DATA_SPACE, /* metadata may be changed, though data may not be allocated */ + + /* + * Like READ_ONLY, except may switch back to WRITE on metadata resize. Reported as READ_ONLY. + */ + PM_OUT_OF_METADATA_SPACE, PM_READ_ONLY, /* metadata may not be changed */ + PM_FAIL, /* all I/O fails */ }; @@ -1301,7 +1307,35 @@ static void set_pool_mode(struct pool *pool, enum pool_mode new_mode); static void requeue_bios(struct pool *pool); -static void check_for_space(struct pool *pool) +static bool is_read_only_pool_mode(enum pool_mode mode) +{ + return (mode == PM_OUT_OF_METADATA_SPACE || mode == PM_READ_ONLY); +} + +static bool is_read_only(struct pool *pool) +{ + return is_read_only_pool_mode(get_pool_mode(pool)); +} + +static void check_for_metadata_space(struct pool *pool) +{ + int r; + const char *ooms_reason = NULL; + dm_block_t nr_free; + + r = dm_pool_get_free_metadata_block_count(pool->pmd, &nr_free); + if (r) + ooms_reason = "Could not get free metadata blocks"; + else if (!nr_free) + ooms_reason = "No free metadata blocks"; + + if (ooms_reason && !is_read_only(pool)) { + DMERR("%s", ooms_reason); + set_pool_mode(pool, PM_OUT_OF_METADATA_SPACE); + } +} + +static void check_for_data_space(struct pool *pool) { int r; dm_block_t nr_free; @@ -1327,14 +1361,16 @@ static int commit(struct pool *pool) { int r; - if (get_pool_mode(pool) >= PM_READ_ONLY) + if (get_pool_mode(pool) >= PM_OUT_OF_METADATA_SPACE) return -EINVAL; r = dm_pool_commit_metadata(pool->pmd); if (r) metadata_operation_failed(pool, "dm_pool_commit_metadata", r); - else - check_for_space(pool); + else { + check_for_metadata_space(pool); + check_for_data_space(pool); + } return r; } @@ -1400,6 +1436,19 @@ static int alloc_data_block(struct thin_c *tc, dm_block_t *result) return r; } + r = dm_pool_get_free_metadata_block_count(pool->pmd, &free_blocks); + if (r) { + metadata_operation_failed(pool, "dm_pool_get_free_metadata_block_count", r); + return r; + } + + if (!free_blocks) { + /* Let's commit before we use up the metadata reserve. */ + r = commit(pool); + if (r) + return r; + } + return 0; } @@ -1431,6 +1480,7 @@ static int should_error_unserviceable_bio(struct pool *pool) case PM_OUT_OF_DATA_SPACE: return pool->pf.error_if_no_space ? -ENOSPC : 0; + case PM_OUT_OF_METADATA_SPACE: case PM_READ_ONLY: case PM_FAIL: return -EIO; @@ -2401,8 +2451,9 @@ static void set_pool_mode(struct pool *pool, enum pool_mode new_mode) error_retry_list(pool); break; + case PM_OUT_OF_METADATA_SPACE: case PM_READ_ONLY: - if (old_mode != new_mode) + if (!is_read_only_pool_mode(old_mode)) notify_of_pool_mode_change(pool, "read-only"); dm_pool_metadata_read_only(pool->pmd); pool->process_bio = process_bio_read_only; @@ -3333,6 +3384,10 @@ static int maybe_resize_metadata_dev(struct dm_target *ti, bool *need_commit) DMINFO("%s: growing the metadata device from %llu to %llu blocks", dm_device_name(pool->pool_md), sb_metadata_dev_size, metadata_dev_size); + + if (get_pool_mode(pool) == PM_OUT_OF_METADATA_SPACE) + set_pool_mode(pool, PM_WRITE); + r = dm_pool_resize_metadata_dev(pool->pmd, metadata_dev_size); if (r) { metadata_operation_failed(pool, "dm_pool_resize_metadata_dev", r); @@ -3636,7 +3691,7 @@ static int pool_message(struct dm_target *ti, unsigned argc, char **argv) struct pool_c *pt = ti->private; struct pool *pool = pt->pool; - if (get_pool_mode(pool) >= PM_READ_ONLY) { + if (get_pool_mode(pool) >= PM_OUT_OF_METADATA_SPACE) { DMERR("%s: unable to service pool target messages in READ_ONLY or FAIL mode", dm_device_name(pool->pool_md)); return -EOPNOTSUPP; @@ -3710,6 +3765,7 @@ static void pool_status(struct dm_target *ti, status_type_t type, dm_block_t nr_blocks_data; dm_block_t nr_blocks_metadata; dm_block_t held_root; + enum pool_mode mode; char buf[BDEVNAME_SIZE]; char buf2[BDEVNAME_SIZE]; struct pool_c *pt = ti->private; @@ -3780,9 +3836,10 @@ static void pool_status(struct dm_target *ti, status_type_t type, else DMEMIT("- "); - if (pool->pf.mode == PM_OUT_OF_DATA_SPACE) + mode = get_pool_mode(pool); + if (mode == PM_OUT_OF_DATA_SPACE) DMEMIT("out_of_data_space "); - else if (pool->pf.mode == PM_READ_ONLY) + else if (is_read_only_pool_mode(mode)) DMEMIT("ro "); else DMEMIT("rw "); From 80908be20fc65757628ab0c337a0e845616f220d Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Fri, 20 Jul 2018 20:17:35 -0700 Subject: [PATCH 099/812] arch/hexagon: fix kernel/dma.c build warning [ Upstream commit 200f351e27f014fcbf69b544b0b4b72aeaf45fd3 ] Fix build warning in arch/hexagon/kernel/dma.c by casting a void * to unsigned long to match the function parameter type. ../arch/hexagon/kernel/dma.c: In function 'arch_dma_alloc': ../arch/hexagon/kernel/dma.c:51:5: warning: passing argument 2 of 'gen_pool_add' makes integer from pointer without a cast [enabled by default] ../include/linux/genalloc.h:112:19: note: expected 'long unsigned int' but argument is of type 'void *' Signed-off-by: Randy Dunlap Cc: Yoshinori Sato Cc: Rich Felker Cc: linux-sh@vger.kernel.org Patch-mainline: linux-kernel @ 07/20/2018, 20:17 [rkuo@codeaurora.org: fixed architecture name] Signed-off-by: Richard Kuo Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/hexagon/kernel/dma.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/hexagon/kernel/dma.c b/arch/hexagon/kernel/dma.c index 9e3ddf792bd3..2704e0b8de43 100644 --- a/arch/hexagon/kernel/dma.c +++ b/arch/hexagon/kernel/dma.c @@ -68,7 +68,7 @@ static void *hexagon_dma_alloc_coherent(struct device *dev, size_t size, panic("Can't create %s() memory pool!", __func__); else gen_pool_add(coherent_pool, - pfn_to_virt(max_low_pfn), + (unsigned long)pfn_to_virt(max_low_pfn), hexagon_coherent_pool_size, -1); } From ad4f63ba582b52b153fd452417a2fe59c4fa08e9 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Sun, 22 Jul 2018 16:03:58 -0700 Subject: [PATCH 100/812] hexagon: modify ffs() and fls() to return int [ Upstream commit 5c41aaad409c097cf1ef74f2c649fed994744ef5 ] Building drivers/mtd/nand/raw/nandsim.c on arch/hexagon/ produces a printk format build warning. This is due to hexagon's ffs() being coded as returning long instead of int. Fix the printk format warning by changing all of hexagon's ffs() and fls() functions to return int instead of long. The variables that they return are already int instead of long. This return type matches the return type in . ../drivers/mtd/nand/raw/nandsim.c: In function 'init_nandsim': ../drivers/mtd/nand/raw/nandsim.c:760:2: warning: format '%u' expects argument of type 'unsigned int', but argument 2 has type 'long int' [-Wformat] There are no ffs() or fls() allmodconfig build errors after making this change. Signed-off-by: Randy Dunlap Cc: Richard Kuo Cc: linux-hexagon@vger.kernel.org Cc: Geert Uytterhoeven Patch-mainline: linux-kernel @ 07/22/2018, 16:03 Signed-off-by: Richard Kuo Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/hexagon/include/asm/bitops.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/hexagon/include/asm/bitops.h b/arch/hexagon/include/asm/bitops.h index 5e4a59b3ec1b..2691a1857d20 100644 --- a/arch/hexagon/include/asm/bitops.h +++ b/arch/hexagon/include/asm/bitops.h @@ -211,7 +211,7 @@ static inline long ffz(int x) * This is defined the same way as ffs. * Note fls(0) = 0, fls(1) = 1, fls(0x80000000) = 32. */ -static inline long fls(int x) +static inline int fls(int x) { int r; @@ -232,7 +232,7 @@ static inline long fls(int x) * the libc and compiler builtin ffs routines, therefore * differs in spirit from the above ffz (man ffs). */ -static inline long ffs(int x) +static inline int ffs(int x) { int r; From d1271cd1348ce5a86ff85f2bb410e9b36182f968 Mon Sep 17 00:00:00 2001 From: Miguel Ojeda Date: Sun, 9 Sep 2018 17:47:31 +0200 Subject: [PATCH 101/812] arm64: jump_label.h: use asm_volatile_goto macro instead of "asm goto" [ Upstream commit 13aceef06adfaf93d52e01e28a8bc8a0ad471d83 ] All other uses of "asm goto" go through asm_volatile_goto, which avoids a miscompile when using GCC < 4.8.2. Replace our open-coded "asm goto" statements with the asm_volatile_goto macro to avoid issues with older toolchains. Cc: Catalin Marinas Reviewed-by: Nick Desaulniers Signed-off-by: Miguel Ojeda Signed-off-by: Will Deacon Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/arm64/include/asm/jump_label.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/include/asm/jump_label.h b/arch/arm64/include/asm/jump_label.h index 1b5e0e843c3a..7e2b3e360086 100644 --- a/arch/arm64/include/asm/jump_label.h +++ b/arch/arm64/include/asm/jump_label.h @@ -28,7 +28,7 @@ static __always_inline bool arch_static_branch(struct static_key *key, bool branch) { - asm goto("1: nop\n\t" + asm_volatile_goto("1: nop\n\t" ".pushsection __jump_table, \"aw\"\n\t" ".align 3\n\t" ".quad 1b, %l[l_yes], %c0\n\t" @@ -42,7 +42,7 @@ static __always_inline bool arch_static_branch(struct static_key *key, bool bran static __always_inline bool arch_static_branch_jump(struct static_key *key, bool branch) { - asm goto("1: b %l[l_yes]\n\t" + asm_volatile_goto("1: b %l[l_yes]\n\t" ".pushsection __jump_table, \"aw\"\n\t" ".align 3\n\t" ".quad 1b, %l[l_yes], %c0\n\t" From 02975ee7377f64f24a94dde614dbe832b4078a9b Mon Sep 17 00:00:00 2001 From: Kai-Heng Feng Date: Tue, 11 Sep 2018 01:51:43 +0800 Subject: [PATCH 102/812] r8169: Clear RTL_FLAG_TASK_*_PENDING when clearing RTL_FLAG_TASK_ENABLED [ Upstream commit 6ad569019999300afd8e614d296fdc356550b77f ] After system suspend, sometimes the r8169 doesn't work when ethernet cable gets pluggued. This issue happens because rtl_reset_work() doesn't get called from rtl8169_runtime_resume(), after system suspend. In rtl_task(), RTL_FLAG_TASK_* only gets cleared if this condition is met: if (!netif_running(dev) || !test_bit(RTL_FLAG_TASK_ENABLED, tp->wk.flags)) ... If RTL_FLAG_TASK_ENABLED was cleared during system suspend while RTL_FLAG_TASK_RESET_PENDING was set, the next rtl_schedule_task() won't schedule task as the flag is still there. So in addition to clearing RTL_FLAG_TASK_ENABLED, also clears other flags. Cc: Heiner Kallweit Signed-off-by: Kai-Heng Feng Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/realtek/r8169.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/realtek/r8169.c b/drivers/net/ethernet/realtek/r8169.c index 8b4069ea52ce..c6782ebd35e1 100644 --- a/drivers/net/ethernet/realtek/r8169.c +++ b/drivers/net/ethernet/realtek/r8169.c @@ -759,7 +759,7 @@ struct rtl8169_tc_offsets { }; enum rtl_flag { - RTL_FLAG_TASK_ENABLED, + RTL_FLAG_TASK_ENABLED = 0, RTL_FLAG_TASK_SLOW_PENDING, RTL_FLAG_TASK_RESET_PENDING, RTL_FLAG_TASK_PHY_PENDING, @@ -7618,7 +7618,8 @@ static int rtl8169_close(struct net_device *dev) rtl8169_update_counters(dev); rtl_lock_work(tp); - clear_bit(RTL_FLAG_TASK_ENABLED, tp->wk.flags); + /* Clear all task flags */ + bitmap_zero(tp->wk.flags, RTL_FLAG_MAX); rtl8169_down(dev); rtl_unlock_work(tp); @@ -7795,7 +7796,9 @@ static void rtl8169_net_suspend(struct net_device *dev) rtl_lock_work(tp); napi_disable(&tp->napi); - clear_bit(RTL_FLAG_TASK_ENABLED, tp->wk.flags); + /* Clear all task flags */ + bitmap_zero(tp->wk.flags, RTL_FLAG_MAX); + rtl_unlock_work(tp); rtl_pll_power_down(tp); From e12134726af8cbfd2dfa119a00d4e933e6a2ebc8 Mon Sep 17 00:00:00 2001 From: Julian Wiedmann Date: Wed, 12 Sep 2018 15:31:35 +0200 Subject: [PATCH 103/812] s390/qeth: don't dump past end of unknown HW header [ Upstream commit 0ac1487c4b2de383b91ecad1be561b8f7a2c15f4 ] For inbound data with an unsupported HW header format, only dump the actual HW header. We have no idea how much payload follows it, and what it contains. Worst case, we dump past the end of the Inbound Buffer and access whatever is located next in memory. Signed-off-by: Julian Wiedmann Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/s390/net/qeth_l2_main.c | 2 +- drivers/s390/net/qeth_l3_main.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/s390/net/qeth_l2_main.c b/drivers/s390/net/qeth_l2_main.c index acdb5ccb0ab9..34d3b7aff513 100644 --- a/drivers/s390/net/qeth_l2_main.c +++ b/drivers/s390/net/qeth_l2_main.c @@ -523,7 +523,7 @@ static int qeth_l2_process_inbound_buffer(struct qeth_card *card, default: dev_kfree_skb_any(skb); QETH_CARD_TEXT(card, 3, "inbunkno"); - QETH_DBF_HEX(CTRL, 3, hdr, QETH_DBF_CTRL_LEN); + QETH_DBF_HEX(CTRL, 3, hdr, sizeof(*hdr)); continue; } work_done++; diff --git a/drivers/s390/net/qeth_l3_main.c b/drivers/s390/net/qeth_l3_main.c index bbdb3b6c54bb..2cc9bc1ef1e3 100644 --- a/drivers/s390/net/qeth_l3_main.c +++ b/drivers/s390/net/qeth_l3_main.c @@ -1902,7 +1902,7 @@ static int qeth_l3_process_inbound_buffer(struct qeth_card *card, default: dev_kfree_skb_any(skb); QETH_CARD_TEXT(card, 3, "inbunkno"); - QETH_DBF_HEX(CTRL, 3, hdr, QETH_DBF_CTRL_LEN); + QETH_DBF_HEX(CTRL, 3, hdr, sizeof(*hdr)); continue; } work_done++; From cd65a43f4dfb75a6bb5211ce28e65e5429b7c499 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Thu, 6 Sep 2018 12:47:01 +0300 Subject: [PATCH 104/812] cifs: read overflow in is_valid_oplock_break() [ Upstream commit 097f5863b1a0c9901f180bbd56ae7d630655faaa ] We need to verify that the "data_offset" is within bounds. Reported-by: Dr Silvio Cesare of InfoSect Signed-off-by: Dan Carpenter Signed-off-by: Steve French Reviewed-by: Aurelien Aptel Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- fs/cifs/misc.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/fs/cifs/misc.c b/fs/cifs/misc.c index 0cc699d9b932..61a09ab2752e 100644 --- a/fs/cifs/misc.c +++ b/fs/cifs/misc.c @@ -406,9 +406,17 @@ is_valid_oplock_break(char *buffer, struct TCP_Server_Info *srv) (struct smb_com_transaction_change_notify_rsp *)buf; struct file_notify_information *pnotify; __u32 data_offset = 0; + size_t len = srv->total_read - sizeof(pSMBr->hdr.smb_buf_length); + if (get_bcc(buf) > sizeof(struct file_notify_information)) { data_offset = le32_to_cpu(pSMBr->DataOffset); + if (data_offset > + len - sizeof(struct file_notify_information)) { + cifs_dbg(FYI, "invalid data_offset %u\n", + data_offset); + return true; + } pnotify = (struct file_notify_information *) ((char *)&pSMBr->hdr.Protocol + data_offset); cifs_dbg(FYI, "dnotify on %s Action: 0x%x\n", From 6899f2fe641c66569327284ce02242f9cc4f7b8d Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Thu, 6 Sep 2018 13:26:08 +0200 Subject: [PATCH 105/812] xen/manage: don't complain about an empty value in control/sysrq node [ Upstream commit 87dffe86d406bee8782cac2db035acb9a28620a7 ] When guest receives a sysrq request from the host it acknowledges it by writing '\0' to control/sysrq xenstore node. This, however, make xenstore watch fire again but xenbus_scanf() fails to parse empty value with "%c" format string: sysrq: SysRq : Emergency Sync Emergency Sync complete xen:manage: Error -34 reading sysrq code in control/sysrq Ignore -ERANGE the same way we already ignore -ENOENT, empty value in control/sysrq is totally legal. Signed-off-by: Vitaly Kuznetsov Reviewed-by: Wei Liu Signed-off-by: Boris Ostrovsky Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/xen/manage.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/xen/manage.c b/drivers/xen/manage.c index 2dd285827169..f494126aaecd 100644 --- a/drivers/xen/manage.c +++ b/drivers/xen/manage.c @@ -280,9 +280,11 @@ static void sysrq_handler(struct xenbus_watch *watch, const char **vec, /* * The Xenstore watch fires directly after registering it and * after a suspend/resume cycle. So ENOENT is no error but - * might happen in those cases. + * might happen in those cases. ERANGE is observed when we get + * an empty value (''), this happens when we acknowledge the + * request by writing '\0' below. */ - if (err != -ENOENT) + if (err != -ENOENT && err != -ERANGE) pr_err("Error %d reading sysrq code in control/sysrq\n", err); xenbus_transaction_end(xbt, 1); From 3ff6443780f19de31827528e9486a6d4b8441e1e Mon Sep 17 00:00:00 2001 From: Olaf Hering Date: Fri, 7 Sep 2018 16:31:35 +0200 Subject: [PATCH 106/812] xen: avoid crash in disable_hotplug_cpu [ Upstream commit 3366cdb6d350d95466ee430ac50f3c8415ca8f46 ] The command 'xl vcpu-set 0 0', issued in dom0, will crash dom0: BUG: unable to handle kernel NULL pointer dereference at 00000000000002d8 PGD 0 P4D 0 Oops: 0000 [#1] PREEMPT SMP NOPTI CPU: 7 PID: 65 Comm: xenwatch Not tainted 4.19.0-rc2-1.ga9462db-default #1 openSUSE Tumbleweed (unreleased) Hardware name: Intel Corporation S5520UR/S5520UR, BIOS S5500.86B.01.00.0050.050620101605 05/06/2010 RIP: e030:device_offline+0x9/0xb0 Code: 77 24 00 e9 ce fe ff ff 48 8b 13 e9 68 ff ff ff 48 8b 13 e9 29 ff ff ff 48 8b 13 e9 ea fe ff ff 90 66 66 66 66 90 41 54 55 53 87 d8 02 00 00 01 0f 85 88 00 00 00 48 c7 c2 20 09 60 81 31 f6 RSP: e02b:ffffc90040f27e80 EFLAGS: 00010203 RAX: 0000000000000000 RBX: 0000000000000000 RCX: 0000000000000000 RDX: ffff8801f3800000 RSI: ffffc90040f27e70 RDI: 0000000000000000 RBP: 0000000000000000 R08: ffffffff820e47b3 R09: 0000000000000000 R10: 0000000000007ff0 R11: 0000000000000000 R12: ffffffff822e6d30 R13: dead000000000200 R14: dead000000000100 R15: ffffffff8158b4e0 FS: 00007ffa595158c0(0000) GS:ffff8801f39c0000(0000) knlGS:0000000000000000 CS: e033 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00000000000002d8 CR3: 00000001d9602000 CR4: 0000000000002660 Call Trace: handle_vcpu_hotplug_event+0xb5/0xc0 xenwatch_thread+0x80/0x140 ? wait_woken+0x80/0x80 kthread+0x112/0x130 ? kthread_create_worker_on_cpu+0x40/0x40 ret_from_fork+0x3a/0x50 This happens because handle_vcpu_hotplug_event is called twice. In the first iteration cpu_present is still true, in the second iteration cpu_present is false which causes get_cpu_device to return NULL. In case of cpu#0, cpu_online is apparently always true. Fix this crash by checking if the cpu can be hotplugged, which is false for a cpu that was just removed. Also check if the cpu was actually offlined by device_remove, otherwise leave the cpu_present state as it is. Rearrange to code to do all work with device_hotplug_lock held. Signed-off-by: Olaf Hering Reviewed-by: Juergen Gross Signed-off-by: Boris Ostrovsky Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/xen/cpu_hotplug.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/drivers/xen/cpu_hotplug.c b/drivers/xen/cpu_hotplug.c index 5676aefdf2bc..f4e59c445964 100644 --- a/drivers/xen/cpu_hotplug.c +++ b/drivers/xen/cpu_hotplug.c @@ -18,15 +18,16 @@ static void enable_hotplug_cpu(int cpu) static void disable_hotplug_cpu(int cpu) { - if (cpu_online(cpu)) { - lock_device_hotplug(); + if (!cpu_is_hotpluggable(cpu)) + return; + lock_device_hotplug(); + if (cpu_online(cpu)) device_offline(get_cpu_device(cpu)); - unlock_device_hotplug(); - } - if (cpu_present(cpu)) + if (!cpu_online(cpu) && cpu_present(cpu)) { xen_arch_unregister_cpu(cpu); - - set_cpu_present(cpu, false); + set_cpu_present(cpu, false); + } + unlock_device_hotplug(); } static int vcpu_online(unsigned int cpu) From 17a6446ac75dba5c9f7d9f2775d7a9956b9486f9 Mon Sep 17 00:00:00 2001 From: Josh Abraham Date: Wed, 12 Sep 2018 15:13:54 -1000 Subject: [PATCH 107/812] xen: fix GCC warning and remove duplicate EVTCHN_ROW/EVTCHN_COL usage MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 4dca864b59dd150a221730775e2f21f49779c135 ] This patch removes duplicate macro useage in events_base.c. It also fixes gcc warning: variable ‘col’ set but not used [-Wunused-but-set-variable] Signed-off-by: Joshua Abraham Reviewed-by: Juergen Gross Signed-off-by: Boris Ostrovsky Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/xen/events/events_base.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/xen/events/events_base.c b/drivers/xen/events/events_base.c index 21d679f88dfa..878a40950a3a 100644 --- a/drivers/xen/events/events_base.c +++ b/drivers/xen/events/events_base.c @@ -139,7 +139,7 @@ static int set_evtchn_to_irq(unsigned evtchn, unsigned irq) clear_evtchn_to_irq_row(row); } - evtchn_to_irq[EVTCHN_ROW(evtchn)][EVTCHN_COL(evtchn)] = irq; + evtchn_to_irq[row][col] = irq; return 0; } From ec2a4f06e31f410e027ff10237c8c824f36fa259 Mon Sep 17 00:00:00 2001 From: Aurelien Aptel Date: Thu, 17 May 2018 16:35:07 +0200 Subject: [PATCH 108/812] smb2: fix missing files in root share directory listing commit 0595751f267994c3c7027377058e4185b3a28e75 upstream. When mounting a Windows share that is the root of a drive (eg. C$) the server does not return . and .. directory entries. This results in the smb2 code path erroneously skipping the 2 first entries. Pseudo-code of the readdir() code path: cifs_readdir(struct file, struct dir_context) initiate_cifs_search <-- if no reponse cached yet server->ops->query_dir_first dir_emit_dots dir_emit <-- adds "." and ".." if we're at pos=0 find_cifs_entry initiate_cifs_search <-- if pos < start of current response (restart search) server->ops->query_dir_next <-- if pos > end of current response (fetch next search res) for(...) <-- loops over cur response entries starting at pos cifs_filldir <-- skip . and .., emit entry cifs_fill_dirent dir_emit pos++ A) dir_emit_dots() always adds . & .. and sets the current dir pos to 2 (0 and 1 are done). Therefore we always want the index_to_find to be 2 regardless of if the response has . and .. B) smb1 code initializes index_of_last_entry with a +2 offset in cifssmb.c CIFSFindFirst(): psrch_inf->index_of_last_entry = 2 /* skip . and .. */ + psrch_inf->entries_in_buffer; Later in find_cifs_entry() we want to find the next dir entry at pos=2 as a result of (A) first_entry_in_buffer = cfile->srch_inf.index_of_last_entry - cfile->srch_inf.entries_in_buffer; This var is the dir pos that the first entry in the buffer will have therefore it must be 2 in the first call. If we don't offset index_of_last_entry by 2 (like in (B)), first_entry_in_buffer=0 but we were instructed to get pos=2 so this code in find_cifs_entry() skips the 2 first which is ok for non-root shares, as it skips . and .. from the response but is not ok for root shares where the 2 first are actual files pos_in_buf = index_to_find - first_entry_in_buffer; // pos_in_buf=2 // we skip 2 first response entries :( for (i = 0; (i < (pos_in_buf)) && (cur_ent != NULL); i++) { /* go entry by entry figuring out which is first */ cur_ent = nxt_dir_entry(cur_ent, end_of_smb, cfile->srch_inf.info_level); } C) cifs_filldir() skips . and .. so we can safely ignore them for now. Sample program: int main(int argc, char **argv) { const char *path = argc >= 2 ? argv[1] : "."; DIR *dh; struct dirent *de; printf("listing path <%s>\n", path); dh = opendir(path); if (!dh) { printf("opendir error %d\n", errno); return 1; } while (1) { de = readdir(dh); if (!de) { if (errno) { printf("readdir error %d\n", errno); return 1; } printf("end of listing\n"); break; } printf("off=%lu <%s>\n", de->d_off, de->d_name); } return 0; } Before the fix with SMB1 on root shares: <.> off=1 <..> off=2 <$Recycle.Bin> off=3 off=4 and on non-root shares: <.> off=1 <..> off=4 <-- after adding .., the offsets jumps to +2 because <2536> off=5 we skipped . and .. from response buffer (C) <411> off=6 but still incremented pos off=7 off=8 Therefore the fix for smb2 is to mimic smb1 behaviour and offset the index_of_last_entry by 2. Test results comparing smb1 and smb2 before/after the fix on root share, non-root shares and on large directories (ie. multi-response dir listing): PRE FIX ======= pre-1-root VS pre-2-root: ERR pre-2-root is missing [bootmgr, $Recycle.Bin] pre-1-nonroot VS pre-2-nonroot: OK~ same files, same order, different offsets pre-1-nonroot-large VS pre-2-nonroot-large: OK~ same files, same order, different offsets POST FIX ======== post-1-root VS post-2-root: OK same files, same order, same offsets post-1-nonroot VS post-2-nonroot: OK same files, same order, same offsets post-1-nonroot-large VS post-2-nonroot-large: OK same files, same order, same offsets REGRESSION? =========== pre-1-root VS post-1-root: OK same files, same order, same offsets pre-1-nonroot VS post-1-nonroot: OK same files, same order, same offsets BugLink: https://bugzilla.samba.org/show_bug.cgi?id=13107 Signed-off-by: Aurelien Aptel Signed-off-by: Paulo Alcantara Reviewed-by: Ronnie Sahlberg Signed-off-by: Steve French CC: Stable Signed-off-by: Greg Kroah-Hartman --- fs/cifs/smb2ops.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c index e6b1795fbf2a..2725085a3f9f 100644 --- a/fs/cifs/smb2ops.c +++ b/fs/cifs/smb2ops.c @@ -914,7 +914,7 @@ smb2_query_dir_first(const unsigned int xid, struct cifs_tcon *tcon, } srch_inf->entries_in_buffer = 0; - srch_inf->index_of_last_entry = 0; + srch_inf->index_of_last_entry = 2; rc = SMB2_query_directory(xid, tcon, fid->persistent_fid, fid->volatile_fid, 0, srch_inf); From c3ae79907424401d241bbabaf623f58eb5bafe94 Mon Sep 17 00:00:00 2001 From: Kai-Heng Feng Date: Thu, 4 Oct 2018 11:39:42 +0800 Subject: [PATCH 109/812] ALSA: hda/realtek - Cannot adjust speaker's volume on Dell XPS 27 7760 commit 709ae62e8e6d9ac4df7dadb3b8ae432675c45ef9 upstream. The issue is the same as commit dd9aa335c880 ("ALSA: hda/realtek - Can't adjust speaker's volume on a Dell AIO"), the output requires to connect to a node with Amp-out capability. Applying the same fixup ALC298_FIXUP_SPK_VOLUME can fix the issue. BugLink: https://bugs.launchpad.net/bugs/1775068 Signed-off-by: Kai-Heng Feng Cc: Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_realtek.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index d706a416b587..0467e5ba82e0 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -5642,6 +5642,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1028, 0x0706, "Dell Inspiron 7559", ALC256_FIXUP_DELL_INSPIRON_7559_SUBWOOFER), SND_PCI_QUIRK(0x1028, 0x0725, "Dell Inspiron 3162", ALC255_FIXUP_DELL_SPK_NOISE), SND_PCI_QUIRK(0x1028, 0x075b, "Dell XPS 13 9360", ALC256_FIXUP_DELL_XPS_13_HEADPHONE_NOISE), + SND_PCI_QUIRK(0x1028, 0x075c, "Dell XPS 27 7760", ALC298_FIXUP_SPK_VOLUME), SND_PCI_QUIRK(0x1028, 0x075d, "Dell AIO", ALC298_FIXUP_SPK_VOLUME), SND_PCI_QUIRK(0x1028, 0x07b0, "Dell Precision 7520", ALC295_FIXUP_DISABLE_DAC3), SND_PCI_QUIRK(0x1028, 0x0798, "Dell Inspiron 17 7000 Gaming", ALC256_FIXUP_DELL_INSPIRON_7559_SUBWOOFER), From 8ec9cef30515e0d78183cb71d6fc3e82e1427d9e Mon Sep 17 00:00:00 2001 From: Leonard Crestez Date: Fri, 21 Sep 2018 18:03:18 +0300 Subject: [PATCH 110/812] crypto: mxs-dcp - Fix wait logic on chan threads commit d80771c08363ad7fbf0f56f5301e7ca65065c582 upstream. When compiling with CONFIG_DEBUG_ATOMIC_SLEEP=y the mxs-dcp driver prints warnings such as: WARNING: CPU: 0 PID: 120 at kernel/sched/core.c:7736 __might_sleep+0x98/0x9c do not call blocking ops when !TASK_RUNNING; state=1 set at [<8081978c>] dcp_chan_thread_sha+0x3c/0x2ec The problem is that blocking ops will manipulate current->state themselves so it is not allowed to call them between set_current_state(TASK_INTERRUPTIBLE) and schedule(). Fix this by converting the per-chan mutex to a spinlock (it only protects tiny list ops anyway) and rearranging the wait logic so that callbacks are called current->state as TASK_RUNNING. Those callbacks will indeed call blocking ops themselves so this is required. Cc: Signed-off-by: Leonard Crestez Signed-off-by: Herbert Xu Signed-off-by: Greg Kroah-Hartman --- drivers/crypto/mxs-dcp.c | 53 +++++++++++++++++++++++----------------- 1 file changed, 30 insertions(+), 23 deletions(-) diff --git a/drivers/crypto/mxs-dcp.c b/drivers/crypto/mxs-dcp.c index 59ed54e464a9..fe8cfe24c518 100644 --- a/drivers/crypto/mxs-dcp.c +++ b/drivers/crypto/mxs-dcp.c @@ -63,7 +63,7 @@ struct dcp { struct dcp_coherent_block *coh; struct completion completion[DCP_MAX_CHANS]; - struct mutex mutex[DCP_MAX_CHANS]; + spinlock_t lock[DCP_MAX_CHANS]; struct task_struct *thread[DCP_MAX_CHANS]; struct crypto_queue queue[DCP_MAX_CHANS]; }; @@ -349,13 +349,20 @@ static int dcp_chan_thread_aes(void *data) int ret; - do { - __set_current_state(TASK_INTERRUPTIBLE); + while (!kthread_should_stop()) { + set_current_state(TASK_INTERRUPTIBLE); - mutex_lock(&sdcp->mutex[chan]); + spin_lock(&sdcp->lock[chan]); backlog = crypto_get_backlog(&sdcp->queue[chan]); arq = crypto_dequeue_request(&sdcp->queue[chan]); - mutex_unlock(&sdcp->mutex[chan]); + spin_unlock(&sdcp->lock[chan]); + + if (!backlog && !arq) { + schedule(); + continue; + } + + set_current_state(TASK_RUNNING); if (backlog) backlog->complete(backlog, -EINPROGRESS); @@ -363,11 +370,8 @@ static int dcp_chan_thread_aes(void *data) if (arq) { ret = mxs_dcp_aes_block_crypt(arq); arq->complete(arq, ret); - continue; } - - schedule(); - } while (!kthread_should_stop()); + } return 0; } @@ -407,9 +411,9 @@ static int mxs_dcp_aes_enqueue(struct ablkcipher_request *req, int enc, int ecb) rctx->ecb = ecb; actx->chan = DCP_CHAN_CRYPTO; - mutex_lock(&sdcp->mutex[actx->chan]); + spin_lock(&sdcp->lock[actx->chan]); ret = crypto_enqueue_request(&sdcp->queue[actx->chan], &req->base); - mutex_unlock(&sdcp->mutex[actx->chan]); + spin_unlock(&sdcp->lock[actx->chan]); wake_up_process(sdcp->thread[actx->chan]); @@ -645,13 +649,20 @@ static int dcp_chan_thread_sha(void *data) struct ahash_request *req; int ret, fini; - do { - __set_current_state(TASK_INTERRUPTIBLE); + while (!kthread_should_stop()) { + set_current_state(TASK_INTERRUPTIBLE); - mutex_lock(&sdcp->mutex[chan]); + spin_lock(&sdcp->lock[chan]); backlog = crypto_get_backlog(&sdcp->queue[chan]); arq = crypto_dequeue_request(&sdcp->queue[chan]); - mutex_unlock(&sdcp->mutex[chan]); + spin_unlock(&sdcp->lock[chan]); + + if (!backlog && !arq) { + schedule(); + continue; + } + + set_current_state(TASK_RUNNING); if (backlog) backlog->complete(backlog, -EINPROGRESS); @@ -663,12 +674,8 @@ static int dcp_chan_thread_sha(void *data) ret = dcp_sha_req_to_buf(arq); fini = rctx->fini; arq->complete(arq, ret); - if (!fini) - continue; } - - schedule(); - } while (!kthread_should_stop()); + } return 0; } @@ -726,9 +733,9 @@ static int dcp_sha_update_fx(struct ahash_request *req, int fini) rctx->init = 1; } - mutex_lock(&sdcp->mutex[actx->chan]); + spin_lock(&sdcp->lock[actx->chan]); ret = crypto_enqueue_request(&sdcp->queue[actx->chan], &req->base); - mutex_unlock(&sdcp->mutex[actx->chan]); + spin_unlock(&sdcp->lock[actx->chan]); wake_up_process(sdcp->thread[actx->chan]); mutex_unlock(&actx->mutex); @@ -984,7 +991,7 @@ static int mxs_dcp_probe(struct platform_device *pdev) platform_set_drvdata(pdev, sdcp); for (i = 0; i < DCP_MAX_CHANS; i++) { - mutex_init(&sdcp->mutex[i]); + spin_lock_init(&sdcp->lock[i]); init_completion(&sdcp->completion[i]); crypto_init_queue(&sdcp->queue[i], 50); } From 574757073482f77ec10caea5e57726190a2837fa Mon Sep 17 00:00:00 2001 From: Jann Horn Date: Fri, 5 Oct 2018 15:51:58 -0700 Subject: [PATCH 111/812] proc: restrict kernel stack dumps to root commit f8a00cef17206ecd1b30d3d9f99e10d9fa707aa7 upstream. Currently, you can use /proc/self/task/*/stack to cause a stack walk on a task you control while it is running on another CPU. That means that the stack can change under the stack walker. The stack walker does have guards against going completely off the rails and into random kernel memory, but it can interpret random data from your kernel stack as instruction pointers and stack pointers. This can cause exposure of kernel stack contents to userspace. Restrict the ability to inspect kernel stacks of arbitrary tasks to root in order to prevent a local attacker from exploiting racy stack unwinding to leak kernel task stack contents. See the added comment for a longer rationale. There don't seem to be any users of this userspace API that can't gracefully bail out if reading from the file fails. Therefore, I believe that this change is unlikely to break things. In the case that this patch does end up needing a revert, the next-best solution might be to fake a single-entry stack based on wchan. Link: http://lkml.kernel.org/r/20180927153316.200286-1-jannh@google.com Fixes: 2ec220e27f50 ("proc: add /proc/*/stack") Signed-off-by: Jann Horn Acked-by: Kees Cook Cc: Alexey Dobriyan Cc: Ken Chen Cc: Will Deacon Cc: Laura Abbott Cc: Andy Lutomirski Cc: Catalin Marinas Cc: Josh Poimboeuf Cc: Thomas Gleixner Cc: Ingo Molnar Cc: "H . Peter Anvin" Cc: Signed-off-by: Andrew Morton Signed-off-by: Greg Kroah-Hartman --- fs/proc/base.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/fs/proc/base.c b/fs/proc/base.c index 5f9cec2db6c3..4beed301e224 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -471,6 +471,20 @@ static int proc_pid_stack(struct seq_file *m, struct pid_namespace *ns, int err; int i; + /* + * The ability to racily run the kernel stack unwinder on a running task + * and then observe the unwinder output is scary; while it is useful for + * debugging kernel issues, it can also allow an attacker to leak kernel + * stack contents. + * Doing this in a manner that is at least safe from races would require + * some work to ensure that the remote task can not be scheduled; and + * even then, this would still expose the unwinder as local attack + * surface. + * Therefore, this interface is restricted to root. + */ + if (!file_ns_capable(m->file, &init_user_ns, CAP_SYS_ADMIN)) + return -EACCES; + entries = kmalloc(MAX_STACK_TRACE_DEPTH * sizeof(*entries), GFP_KERNEL); if (!entries) return -ENOMEM; From 20ba8a53a1ef1cffd0f795206d69181572ae84e8 Mon Sep 17 00:00:00 2001 From: Ashish Samant Date: Fri, 5 Oct 2018 15:52:15 -0700 Subject: [PATCH 112/812] ocfs2: fix locking for res->tracking and dlm->tracking_list commit cbe355f57c8074bc4f452e5b6e35509044c6fa23 upstream. In dlm_init_lockres() we access and modify res->tracking and dlm->tracking_list without holding dlm->track_lock. This can cause list corruptions and can end up in kernel panic. Fix this by locking res->tracking and dlm->tracking_list with dlm->track_lock instead of dlm->spinlock. Link: http://lkml.kernel.org/r/1529951192-4686-1-git-send-email-ashish.samant@oracle.com Signed-off-by: Ashish Samant Reviewed-by: Changwei Ge Acked-by: Joseph Qi Acked-by: Jun Piao Cc: Mark Fasheh Cc: Joel Becker Cc: Junxiao Bi Cc: Changwei Ge Cc: Signed-off-by: Andrew Morton Signed-off-by: Greg Kroah-Hartman --- fs/ocfs2/dlm/dlmmaster.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c index 4e2162b355db..0cefb036a17e 100644 --- a/fs/ocfs2/dlm/dlmmaster.c +++ b/fs/ocfs2/dlm/dlmmaster.c @@ -589,9 +589,9 @@ static void dlm_init_lockres(struct dlm_ctxt *dlm, res->last_used = 0; - spin_lock(&dlm->spinlock); + spin_lock(&dlm->track_lock); list_add_tail(&res->tracking, &dlm->tracking_list); - spin_unlock(&dlm->spinlock); + spin_unlock(&dlm->track_lock); memset(res->lvb, 0, DLM_LVB_LEN); memset(res->refmap, 0, sizeof(res->refmap)); From 93ae8552ec98405e84d194f9c5b084996c20ccdf Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Thu, 13 Sep 2018 21:16:20 -0400 Subject: [PATCH 113/812] dm thin metadata: fix __udivdi3 undefined on 32-bit commit 013ad043906b2befd4a9bfb06219ed9fedd92716 upstream. sector_div() is only viable for use with sector_t. dm_block_t is typedef'd to uint64_t -- so use div_u64() instead. Fixes: 3ab918281 ("dm thin metadata: try to avoid ever aborting transactions") Signed-off-by: Mike Snitzer Cc: Sudip Mukherjee Signed-off-by: Greg Kroah-Hartman --- drivers/md/dm-thin-metadata.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/md/dm-thin-metadata.c b/drivers/md/dm-thin-metadata.c index 14ab86424c6a..2711aa965445 100644 --- a/drivers/md/dm-thin-metadata.c +++ b/drivers/md/dm-thin-metadata.c @@ -843,10 +843,8 @@ static void __set_metadata_reserve(struct dm_pool_metadata *pmd) if (r) { DMERR("could not get size of metadata device"); pmd->metadata_reserve = max_blocks; - } else { - sector_div(total, 10); - pmd->metadata_reserve = min(max_blocks, total); - } + } else + pmd->metadata_reserve = min(max_blocks, div_u64(total, 10)); } struct dm_pool_metadata *dm_pool_metadata_open(struct block_device *bdev, From 11abaca7d5b923f1a14c578746d4232bc4ca7d72 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 10 Oct 2018 08:52:13 +0200 Subject: [PATCH 114/812] Linux 4.4.160 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 06d5c6a6a0f6..607394a56036 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,6 @@ VERSION = 4 PATCHLEVEL = 4 -SUBLEVEL = 159 +SUBLEVEL = 160 EXTRAVERSION = NAME = Blurry Fish Butt From c6f1725b9cbd208c469142ebfc5bff476352cc08 Mon Sep 17 00:00:00 2001 From: Jann Horn Date: Fri, 5 Oct 2018 15:52:07 -0700 Subject: [PATCH 115/812] mm/vmstat.c: skip NR_TLB_REMOTE_FLUSH* properly commit 58bc4c34d249bf1bc50730a9a209139347cfacfe upstream. 5dd0b16cdaff ("mm/vmstat: Make NR_TLB_REMOTE_FLUSH_RECEIVED available even on UP") made the availability of the NR_TLB_REMOTE_FLUSH* counters inside the kernel unconditional to reduce #ifdef soup, but (either to avoid showing dummy zero counters to userspace, or because that code was missed) didn't update the vmstat_array, meaning that all following counters would be shown with incorrect values. This only affects kernel builds with CONFIG_VM_EVENT_COUNTERS=y && CONFIG_DEBUG_TLBFLUSH=y && CONFIG_SMP=n. Link: http://lkml.kernel.org/r/20181001143138.95119-2-jannh@google.com Fixes: 5dd0b16cdaff ("mm/vmstat: Make NR_TLB_REMOTE_FLUSH_RECEIVED available even on UP") Signed-off-by: Jann Horn Reviewed-by: Kees Cook Reviewed-by: Andrew Morton Acked-by: Michal Hocko Acked-by: Roman Gushchin Cc: Davidlohr Bueso Cc: Oleg Nesterov Cc: Christoph Lameter Cc: Kemi Wang Cc: Andy Lutomirski Cc: Ingo Molnar Cc: Signed-off-by: Andrew Morton Signed-off-by: Greg Kroah-Hartman --- mm/vmstat.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/mm/vmstat.c b/mm/vmstat.c index 5712cdaae964..8895eff2d735 100644 --- a/mm/vmstat.c +++ b/mm/vmstat.c @@ -858,6 +858,9 @@ const char * const vmstat_text[] = { #ifdef CONFIG_SMP "nr_tlb_remote_flush", "nr_tlb_remote_flush_received", +#else + "", /* nr_tlb_remote_flush */ + "", /* nr_tlb_remote_flush_received */ #endif /* CONFIG_SMP */ "nr_tlb_local_flush_all", "nr_tlb_local_flush_one", From 937b51457b5fd9560a2ddb300b5dfbec87082919 Mon Sep 17 00:00:00 2001 From: Tomi Valkeinen Date: Wed, 26 Sep 2018 18:11:22 +0200 Subject: [PATCH 116/812] fbdev/omapfb: fix omapfb_memory_read infoleak commit 1bafcbf59fed92af58955024452f45430d3898c5 upstream. OMAPFB_MEMORY_READ ioctl reads pixels from the LCD's memory and copies them to a userspace buffer. The code has two issues: - The user provided width and height could be large enough to overflow the calculations - The copy_to_user() can copy uninitialized memory to the userspace, which might contain sensitive kernel information. Fix these by limiting the width & height parameters, and only copying the amount of data that we actually received from the LCD. Signed-off-by: Tomi Valkeinen Reported-by: Jann Horn Cc: stable@vger.kernel.org Cc: security@kernel.org Cc: Will Deacon Cc: Jann Horn Cc: Tony Lindgren Signed-off-by: Bartlomiej Zolnierkiewicz Signed-off-by: Greg Kroah-Hartman --- drivers/video/fbdev/omap2/omapfb/omapfb-ioctl.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/video/fbdev/omap2/omapfb/omapfb-ioctl.c b/drivers/video/fbdev/omap2/omapfb/omapfb-ioctl.c index 9ddfdd63b84c..34ab4f950f0a 100644 --- a/drivers/video/fbdev/omap2/omapfb/omapfb-ioctl.c +++ b/drivers/video/fbdev/omap2/omapfb/omapfb-ioctl.c @@ -496,6 +496,9 @@ static int omapfb_memory_read(struct fb_info *fbi, if (!access_ok(VERIFY_WRITE, mr->buffer, mr->buffer_size)) return -EFAULT; + if (mr->w > 4096 || mr->h > 4096) + return -EINVAL; + if (mr->w * mr->h * 3 > mr->buffer_size) return -EINVAL; @@ -509,7 +512,7 @@ static int omapfb_memory_read(struct fb_info *fbi, mr->x, mr->y, mr->w, mr->h); if (r > 0) { - if (copy_to_user(mr->buffer, buf, mr->buffer_size)) + if (copy_to_user(mr->buffer, buf, r)) r = -EFAULT; } From 5961c3d0064fdc4326d5390b30ceb50ae5d1459e Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Mon, 1 Oct 2018 12:52:15 -0700 Subject: [PATCH 117/812] x86/vdso: Fix asm constraints on vDSO syscall fallbacks commit 715bd9d12f84d8f5cc8ad21d888f9bc304a8eb0b upstream. The syscall fallbacks in the vDSO have incorrect asm constraints. They are not marked as writing to their outputs -- instead, they are marked as clobbering "memory", which is useless. In particular, gcc is smart enough to know that the timespec parameter hasn't escaped, so a memory clobber doesn't clobber it. And passing a pointer as an asm *input* does not tell gcc that the pointed-to value is changed. Add in the fact that the asm instructions weren't volatile, and gcc was free to omit them entirely unless their sole output (the return value) is used. Which it is (phew!), but that stops happening with some upcoming patches. As a trivial example, the following code: void test_fallback(struct timespec *ts) { vdso_fallback_gettime(CLOCK_MONOTONIC, ts); } compiles to: 00000000000000c0 : c0: c3 retq To add insult to injury, the RCX and R11 clobbers on 64-bit builds were missing. The "memory" clobber is also unnecessary -- no ordering with respect to other memory operations is needed, but that's going to be fixed in a separate not-for-stable patch. Fixes: 2aae950b21e4 ("x86_64: Add vDSO for x86-64 with gettimeofday/clock_gettime/getcpu") Signed-off-by: Andy Lutomirski Signed-off-by: Thomas Gleixner Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/2c0231690551989d2fafa60ed0e7b5cc8b403908.1538422295.git.luto@kernel.org Signed-off-by: Greg Kroah-Hartman --- arch/x86/entry/vdso/vclock_gettime.c | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/arch/x86/entry/vdso/vclock_gettime.c b/arch/x86/entry/vdso/vclock_gettime.c index 5dd363d54348..722cbed8026b 100644 --- a/arch/x86/entry/vdso/vclock_gettime.c +++ b/arch/x86/entry/vdso/vclock_gettime.c @@ -51,8 +51,9 @@ extern u8 pvclock_page notrace static long vdso_fallback_gettime(long clock, struct timespec *ts) { long ret; - asm("syscall" : "=a" (ret) : - "0" (__NR_clock_gettime), "D" (clock), "S" (ts) : "memory"); + asm ("syscall" : "=a" (ret), "=m" (*ts) : + "0" (__NR_clock_gettime), "D" (clock), "S" (ts) : + "memory", "rcx", "r11"); return ret; } @@ -60,8 +61,9 @@ notrace static long vdso_fallback_gtod(struct timeval *tv, struct timezone *tz) { long ret; - asm("syscall" : "=a" (ret) : - "0" (__NR_gettimeofday), "D" (tv), "S" (tz) : "memory"); + asm ("syscall" : "=a" (ret), "=m" (*tv), "=m" (*tz) : + "0" (__NR_gettimeofday), "D" (tv), "S" (tz) : + "memory", "rcx", "r11"); return ret; } @@ -143,12 +145,12 @@ notrace static long vdso_fallback_gettime(long clock, struct timespec *ts) { long ret; - asm( + asm ( "mov %%ebx, %%edx \n" "mov %2, %%ebx \n" "call __kernel_vsyscall \n" "mov %%edx, %%ebx \n" - : "=a" (ret) + : "=a" (ret), "=m" (*ts) : "0" (__NR_clock_gettime), "g" (clock), "c" (ts) : "memory", "edx"); return ret; @@ -158,12 +160,12 @@ notrace static long vdso_fallback_gtod(struct timeval *tv, struct timezone *tz) { long ret; - asm( + asm ( "mov %%ebx, %%edx \n" "mov %2, %%ebx \n" "call __kernel_vsyscall \n" "mov %%edx, %%ebx \n" - : "=a" (ret) + : "=a" (ret), "=m" (*tv), "=m" (*tz) : "0" (__NR_gettimeofday), "g" (tv), "c" (tz) : "memory", "edx"); return ret; From e8dc08a109cb0ceb84bd0f5b9e7f13f054f7fba5 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Wed, 3 Oct 2018 16:23:49 -0700 Subject: [PATCH 118/812] x86/vdso: Fix vDSO syscall fallback asm constraint regression commit 02e425668f5c9deb42787d10001a3b605993ad15 upstream. When I added the missing memory outputs, I failed to update the index of the first argument (ebx) on 32-bit builds, which broke the fallbacks. Somehow I must have screwed up my testing or gotten lucky. Add another test to cover gettimeofday() as well. Signed-off-by: Andy Lutomirski Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: stable@vger.kernel.org Fixes: 715bd9d12f84 ("x86/vdso: Fix asm constraints on vDSO syscall fallbacks") Link: http://lkml.kernel.org/r/21bd45ab04b6d838278fa5bebfa9163eceffa13c.1538608971.git.luto@kernel.org Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- arch/x86/entry/vdso/vclock_gettime.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/x86/entry/vdso/vclock_gettime.c b/arch/x86/entry/vdso/vclock_gettime.c index 722cbed8026b..049327ee8868 100644 --- a/arch/x86/entry/vdso/vclock_gettime.c +++ b/arch/x86/entry/vdso/vclock_gettime.c @@ -147,11 +147,11 @@ notrace static long vdso_fallback_gettime(long clock, struct timespec *ts) asm ( "mov %%ebx, %%edx \n" - "mov %2, %%ebx \n" + "mov %[clock], %%ebx \n" "call __kernel_vsyscall \n" "mov %%edx, %%ebx \n" : "=a" (ret), "=m" (*ts) - : "0" (__NR_clock_gettime), "g" (clock), "c" (ts) + : "0" (__NR_clock_gettime), [clock] "g" (clock), "c" (ts) : "memory", "edx"); return ret; } @@ -162,11 +162,11 @@ notrace static long vdso_fallback_gtod(struct timeval *tv, struct timezone *tz) asm ( "mov %%ebx, %%edx \n" - "mov %2, %%ebx \n" + "mov %[tv], %%ebx \n" "call __kernel_vsyscall \n" "mov %%edx, %%ebx \n" : "=a" (ret), "=m" (*tv), "=m" (*tz) - : "0" (__NR_gettimeofday), "g" (tv), "c" (tz) + : "0" (__NR_gettimeofday), [tv] "g" (tv), "c" (tz) : "memory", "edx"); return ret; } From 5527ae62617e69400435aa2f8b95656ad41626a2 Mon Sep 17 00:00:00 2001 From: Daniel Drake Date: Thu, 27 Sep 2018 15:47:33 -0500 Subject: [PATCH 119/812] PCI: Reprogram bridge prefetch registers on resume commit 083874549fdfefa629dfa752785e20427dde1511 upstream. On 38+ Intel-based ASUS products, the NVIDIA GPU becomes unusable after S3 suspend/resume. The affected products include multiple generations of NVIDIA GPUs and Intel SoCs. After resume, nouveau logs many errors such as: fifo: fault 00 [READ] at 0000005555555000 engine 00 [GR] client 04 [HUB/FE] reason 4a [] on channel -1 [007fa91000 unknown] DRM: failed to idle channel 0 [DRM] Similarly, the NVIDIA proprietary driver also fails after resume (black screen, 100% CPU usage in Xorg process). We shipped a sample to NVIDIA for diagnosis, and their response indicated that it's a problem with the parent PCI bridge (on the Intel SoC), not the GPU. Runtime suspend/resume works fine, only S3 suspend is affected. We found a workaround: on resume, rewrite the Intel PCI bridge 'Prefetchable Base Upper 32 Bits' register (PCI_PREF_BASE_UPPER32). In the cases that I checked, this register has value 0 and we just have to rewrite that value. Linux already saves and restores PCI config space during suspend/resume, but this register was being skipped because upon resume, it already has value 0 (the correct, pre-suspend value). Intel appear to have previously acknowledged this behaviour and the requirement to rewrite this register: https://bugzilla.kernel.org/show_bug.cgi?id=116851#c23 Based on that, rewrite the prefetch register values even when that appears unnecessary. We have confirmed this solution on all the affected models we have in-hands (X542UQ, UX533FD, X530UN, V272UN). Additionally, this solves an issue where r8169 MSI-X interrupts were broken after S3 suspend/resume on ASUS X441UAR. This issue was recently worked around in commit 7bb05b85bc2d ("r8169: don't use MSI-X on RTL8106e"). It also fixes the same issue on RTL6186evl/8111evl on an Aimfor-tech laptop that we had not yet patched. I suspect it will also fix the issue that was worked around in commit 7c53a722459c ("r8169: don't use MSI-X on RTL8168g"). Thomas Martitz reports that this change also solves an issue where the AMD Radeon Polaris 10 GPU on the HP Zbook 14u G5 is unresponsive after S3 suspend/resume. Link: https://bugzilla.kernel.org/show_bug.cgi?id=201069 Signed-off-by: Daniel Drake Signed-off-by: Bjorn Helgaas Reviewed-by: Rafael J. Wysocki Reviewed-By: Peter Wu CC: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/pci/pci.c | 27 +++++++++++++++++++-------- 1 file changed, 19 insertions(+), 8 deletions(-) diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index 295bf1472d02..5073ab023123 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -1064,12 +1064,12 @@ int pci_save_state(struct pci_dev *dev) EXPORT_SYMBOL(pci_save_state); static void pci_restore_config_dword(struct pci_dev *pdev, int offset, - u32 saved_val, int retry) + u32 saved_val, int retry, bool force) { u32 val; pci_read_config_dword(pdev, offset, &val); - if (val == saved_val) + if (!force && val == saved_val) return; for (;;) { @@ -1088,25 +1088,36 @@ static void pci_restore_config_dword(struct pci_dev *pdev, int offset, } static void pci_restore_config_space_range(struct pci_dev *pdev, - int start, int end, int retry) + int start, int end, int retry, + bool force) { int index; for (index = end; index >= start; index--) pci_restore_config_dword(pdev, 4 * index, pdev->saved_config_space[index], - retry); + retry, force); } static void pci_restore_config_space(struct pci_dev *pdev) { if (pdev->hdr_type == PCI_HEADER_TYPE_NORMAL) { - pci_restore_config_space_range(pdev, 10, 15, 0); + pci_restore_config_space_range(pdev, 10, 15, 0, false); /* Restore BARs before the command register. */ - pci_restore_config_space_range(pdev, 4, 9, 10); - pci_restore_config_space_range(pdev, 0, 3, 0); + pci_restore_config_space_range(pdev, 4, 9, 10, false); + pci_restore_config_space_range(pdev, 0, 3, 0, false); + } else if (pdev->hdr_type == PCI_HEADER_TYPE_BRIDGE) { + pci_restore_config_space_range(pdev, 12, 15, 0, false); + + /* + * Force rewriting of prefetch registers to avoid S3 resume + * issues on Intel PCI bridges that occur when these + * registers are not explicitly written. + */ + pci_restore_config_space_range(pdev, 9, 11, 0, true); + pci_restore_config_space_range(pdev, 0, 8, 0, false); } else { - pci_restore_config_space_range(pdev, 0, 15, 0); + pci_restore_config_space_range(pdev, 0, 15, 0, false); } } From 24479b9d2d03c1fde26dae957b18da8164caf3b0 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Sat, 29 Sep 2018 16:01:58 +0200 Subject: [PATCH 120/812] mac80211: fix setting IEEE80211_KEY_FLAG_RX_MGMT for AP mode keys commit 211710ca74adf790b46ab3867fcce8047b573cd1 upstream. key->sta is only valid after ieee80211_key_link, which is called later in this function. Because of that, the IEEE80211_KEY_FLAG_RX_MGMT is never set when management frame protection is enabled. Fixes: e548c49e6dc6b ("mac80211: add key flag for management keys") Cc: stable@vger.kernel.org Signed-off-by: Felix Fietkau Signed-off-by: Johannes Berg Signed-off-by: Greg Kroah-Hartman --- net/mac80211/cfg.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 1f930032253a..67348d8ac35d 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -219,7 +219,7 @@ static int ieee80211_add_key(struct wiphy *wiphy, struct net_device *dev, case NL80211_IFTYPE_AP: case NL80211_IFTYPE_AP_VLAN: /* Keys without a station are used for TX only */ - if (key->sta && test_sta_flag(key->sta, WLAN_STA_MFP)) + if (sta && test_sta_flag(sta, WLAN_STA_MFP)) key->conf.flags |= IEEE80211_KEY_FLAG_RX_MGMT; break; case NL80211_IFTYPE_ADHOC: From 1516d9fa636f1e6276bf37086d06f9847c25bc59 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Thu, 4 Oct 2018 11:08:12 +0200 Subject: [PATCH 121/812] PM / core: Clear the direct_complete flag on errors commit 69e445ab8b66a9f30519842ef18be555d3ee9b51 upstream. If __device_suspend() runs asynchronously (in which case the device passed to it is in dpm_suspended_list at that point) and it returns early on an error or pending wakeup, and the power.direct_complete flag has been set for the device already, the subsequent device_resume() will be confused by that and it will call pm_runtime_enable() incorrectly, as runtime PM has not been disabled for the device by __device_suspend(). To avoid that, clear power.direct_complete if __device_suspend() is not going to disable runtime PM for the device before returning. Fixes: aae4518b3124 (PM / sleep: Mechanism to avoid resuming runtime-suspended devices unnecessarily) Reported-by: Al Cooper Tested-by: Al Cooper Reviewed-by: Ulf Hansson Cc: 3.16+ # 3.16+ Signed-off-by: Rafael J. Wysocki Signed-off-by: Greg Kroah-Hartman --- drivers/base/power/main.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c index e9b713675c7c..05409141ec07 100644 --- a/drivers/base/power/main.c +++ b/drivers/base/power/main.c @@ -1355,8 +1355,10 @@ static int __device_suspend(struct device *dev, pm_message_t state, bool async) dpm_wait_for_children(dev, async); - if (async_error) + if (async_error) { + dev->power.direct_complete = false; goto Complete; + } /* * If a device configured to wake up the system from sleep states @@ -1368,6 +1370,7 @@ static int __device_suspend(struct device *dev, pm_message_t state, bool async) pm_wakeup_event(dev, 0); if (pm_wakeup_pending()) { + dev->power.direct_complete = false; async_error = -EBUSY; goto Complete; } From dce708809daaced8d63eda3447b59951186f3f1a Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Tue, 25 Sep 2018 20:56:02 -0400 Subject: [PATCH 122/812] dm cache: fix resize crash if user doesn't reload cache table commit 5d07384a666d4b2f781dc056bfeec2c27fbdf383 upstream. A reload of the cache's DM table is needed during resize because otherwise a crash will occur when attempting to access smq policy entries associated with the portion of the cache that was recently extended. The reason is cache-size based data structures in the policy will not be resized, the only way to safely extend the cache is to allow for a proper cache policy initialization that occurs when the cache table is loaded. For example the smq policy's space_init(), init_allocator(), calc_hotspot_params() must be sized based on the extended cache size. The fix for this is to disallow cache resizes of this pattern: 1) suspend "cache" target's device 2) resize the fast device used for the cache 3) resume "cache" target's device Instead, the last step must be a full reload of the cache's DM table. Fixes: 66a636356 ("dm cache: add stochastic-multi-queue (smq) policy") Cc: stable@vger.kernel.org Signed-off-by: Mike Snitzer Signed-off-by: Greg Kroah-Hartman --- drivers/md/dm-cache-target.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/md/dm-cache-target.c b/drivers/md/dm-cache-target.c index b59615ddf6ba..531d6f3a786e 100644 --- a/drivers/md/dm-cache-target.c +++ b/drivers/md/dm-cache-target.c @@ -3391,8 +3391,13 @@ static dm_cblock_t get_cache_dev_size(struct cache *cache) static bool can_resize(struct cache *cache, dm_cblock_t new_size) { - if (from_cblock(new_size) > from_cblock(cache->cache_size)) - return true; + if (from_cblock(new_size) > from_cblock(cache->cache_size)) { + if (cache->sized) { + DMERR("%s: unable to extend cache due to missing cache table reload", + cache_device_name(cache)); + return false; + } + } /* * We can't drop a dirty block when shrinking the cache. From ed8649a4691b52228bbbeca87780e284f2c62456 Mon Sep 17 00:00:00 2001 From: Mathias Nyman Date: Mon, 1 Oct 2018 18:36:07 +0300 Subject: [PATCH 123/812] xhci: Add missing CAS workaround for Intel Sunrise Point xHCI commit ffe84e01bb1b38c7eb9c6b6da127a6c136d251df upstream. The workaround for missing CAS bit is also needed for xHC on Intel sunrisepoint PCH. For more details see: Intel 100/c230 series PCH specification update Doc #332692-006 Errata #8 Cc: Signed-off-by: Mathias Nyman Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-pci.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/usb/host/xhci-pci.c b/drivers/usb/host/xhci-pci.c index cbf3be66f89c..d6e2199bcfe5 100644 --- a/drivers/usb/host/xhci-pci.c +++ b/drivers/usb/host/xhci-pci.c @@ -174,6 +174,8 @@ static void xhci_pci_quirks(struct device *dev, struct xhci_hcd *xhci) } if (pdev->vendor == PCI_VENDOR_ID_INTEL && (pdev->device == PCI_DEVICE_ID_INTEL_CHERRYVIEW_XHCI || + pdev->device == PCI_DEVICE_ID_INTEL_SUNRISEPOINT_LP_XHCI || + pdev->device == PCI_DEVICE_ID_INTEL_SUNRISEPOINT_H_XHCI || pdev->device == PCI_DEVICE_ID_INTEL_APL_XHCI || pdev->device == PCI_DEVICE_ID_INTEL_DNV_XHCI)) xhci->quirks |= XHCI_MISSING_CAS; From d70a6783f730d83709ca8173558483dba0741b88 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 24 Sep 2018 15:28:10 +0200 Subject: [PATCH 124/812] USB: serial: simple: add Motorola Tetra MTP6550 id commit f5fad711c06e652f90f581fc7c2caee327c33d31 upstream. Add device-id for the Motorola Tetra radio MTP6550. Bus 001 Device 004: ID 0cad:9012 Motorola CGISS Device Descriptor: bLength 18 bDescriptorType 1 bcdUSB 2.00 bDeviceClass 0 (Defined at Interface level) bDeviceSubClass 0 bDeviceProtocol 0 bMaxPacketSize0 64 idVendor 0x0cad Motorola CGISS idProduct 0x9012 bcdDevice 24.16 iManufacturer 1 Motorola Solutions, Inc. iProduct 2 TETRA PEI interface iSerial 0 bNumConfigurations 1 Configuration Descriptor: bLength 9 bDescriptorType 2 wTotalLength 55 bNumInterfaces 2 bConfigurationValue 1 iConfiguration 3 Generic Serial config bmAttributes 0x80 (Bus Powered) MaxPower 500mA Interface Descriptor: bLength 9 bDescriptorType 4 bInterfaceNumber 0 bAlternateSetting 0 bNumEndpoints 2 bInterfaceClass 255 Vendor Specific Class bInterfaceSubClass 0 bInterfaceProtocol 0 iInterface 0 Endpoint Descriptor: bLength 7 bDescriptorType 5 bEndpointAddress 0x81 EP 1 IN bmAttributes 2 Transfer Type Bulk Synch Type None Usage Type Data wMaxPacketSize 0x0200 1x 512 bytes bInterval 0 Endpoint Descriptor: bLength 7 bDescriptorType 5 bEndpointAddress 0x01 EP 1 OUT bmAttributes 2 Transfer Type Bulk Synch Type None Usage Type Data wMaxPacketSize 0x0200 1x 512 bytes Interface Descriptor: bLength 9 bDescriptorType 4 bInterfaceNumber 1 bAlternateSetting 0 bNumEndpoints 2 bInterfaceClass 255 Vendor Specific Class bInterfaceSubClass 0 bInterfaceProtocol 0 iInterface 0 Endpoint Descriptor: bLength 7 bDescriptorType 5 bEndpointAddress 0x82 EP 2 IN bmAttributes 2 Transfer Type Bulk Synch Type None Usage Type Data wMaxPacketSize 0x0200 1x 512 bytes bInterval 0 Endpoint Descriptor: bLength 7 bDescriptorType 5 bEndpointAddress 0x02 EP 2 OUT bmAttributes 2 Transfer Type Bulk Synch Type None Usage Type Data wMaxPacketSize 0x0200 1x 512 bytes bInterval 0 Device Qualifier (for other device speed): bLength 10 bDescriptorType 6 bcdUSB 2.00 bDeviceClass 0 (Defined at Interface level) bDeviceSubClass 0 bDeviceProtocol 0 bMaxPacketSize0 64 bNumConfigurations 1 Device Status: 0x0000 (Bus Powered) Reported-by: Hans Hult Cc: stable Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/usb-serial-simple.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/usb/serial/usb-serial-simple.c b/drivers/usb/serial/usb-serial-simple.c index 2674da40d9cd..6d6acf2c07c3 100644 --- a/drivers/usb/serial/usb-serial-simple.c +++ b/drivers/usb/serial/usb-serial-simple.c @@ -87,7 +87,8 @@ DEVICE(moto_modem, MOTO_IDS); /* Motorola Tetra driver */ #define MOTOROLA_TETRA_IDS() \ - { USB_DEVICE(0x0cad, 0x9011) } /* Motorola Solutions TETRA PEI */ + { USB_DEVICE(0x0cad, 0x9011) }, /* Motorola Solutions TETRA PEI */ \ + { USB_DEVICE(0x0cad, 0x9012) } /* MTP6550 */ DEVICE(motorola_tetra, MOTOROLA_TETRA_IDS); /* Novatel Wireless GPS driver */ From 26f9ef6cebbe7af1893efc7720d3270e0139b9cf Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Tue, 25 Sep 2018 21:06:24 -0700 Subject: [PATCH 125/812] of: unittest: Disable interrupt node tests for old world MAC systems commit 8894891446c9380709451b99ab45c5c53adfd2fc upstream. On systems with OF_IMAP_OLDWORLD_MAC set in of_irq_workarounds, the devicetree interrupt parsing code is different, causing unit tests of devicetree interrupt nodes to fail. Due to a bug in unittest code, which tries to dereference an uninitialized pointer, this results in a crash. OF: /testcase-data/phandle-tests/consumer-a: arguments longer than property Unable to handle kernel paging request for data at address 0x00bc616e Faulting instruction address: 0xc08e9468 Oops: Kernel access of bad area, sig: 11 [#1] BE PREEMPT PowerMac Modules linked in: CPU: 0 PID: 1 Comm: swapper Not tainted 4.14.72-rc1-yocto-standard+ #1 task: cf8e0000 task.stack: cf8da000 NIP: c08e9468 LR: c08ea5bc CTR: c08ea5ac REGS: cf8dbb50 TRAP: 0300 Not tainted (4.14.72-rc1-yocto-standard+) MSR: 00001032 CR: 82004044 XER: 00000000 DAR: 00bc616e DSISR: 40000000 GPR00: c08ea5bc cf8dbc00 cf8e0000 c13ca517 c13ca517 c13ca8a0 00000066 00000002 GPR08: 00000063 00bc614e c0b05865 000affff 82004048 00000000 c00047f0 00000000 GPR16: c0a80000 c0a9cc34 c13ca517 c0ad1134 05ffffff 000affff c0b05860 c0abeef8 GPR24: cecec278 cecec278 c0a8c4d0 c0a885e0 c13ca8a0 05ffffff c13ca8a0 c13ca517 NIP [c08e9468] device_node_gen_full_name+0x30/0x15c LR [c08ea5bc] device_node_string+0x190/0x3c8 Call Trace: [cf8dbc00] [c007f670] trace_hardirqs_on_caller+0x118/0x1fc (unreliable) [cf8dbc40] [c08ea5bc] device_node_string+0x190/0x3c8 [cf8dbcb0] [c08eb794] pointer+0x25c/0x4d0 [cf8dbd00] [c08ebcbc] vsnprintf+0x2b4/0x5ec [cf8dbd60] [c08ec00c] vscnprintf+0x18/0x48 [cf8dbd70] [c008e268] vprintk_store+0x4c/0x22c [cf8dbda0] [c008ecac] vprintk_emit+0x94/0x130 [cf8dbdd0] [c008ff54] printk+0x5c/0x6c [cf8dbe10] [c0b8ddd4] of_unittest+0x2220/0x26f8 [cf8dbea0] [c0004434] do_one_initcall+0x4c/0x184 [cf8dbf00] [c0b4534c] kernel_init_freeable+0x13c/0x1d8 [cf8dbf30] [c0004814] kernel_init+0x24/0x118 [cf8dbf40] [c0013398] ret_from_kernel_thread+0x5c/0x64 The problem was observed when running a qemu test for the g3beige machine with devicetree unittests enabled. Disable interrupt node tests on affected systems to avoid both false unittest failures and the crash. With this patch in place, unittest on the affected system passes with the following message. dt-test ### end of unittest - 144 passed, 0 failed Fixes: 53a42093d96ef ("of: Add device tree selftests") Signed-off-by: Guenter Roeck Reviewed-by: Frank Rowand Signed-off-by: Rob Herring Signed-off-by: Greg Kroah-Hartman --- drivers/of/unittest.c | 26 ++++++++++++++++++-------- 1 file changed, 18 insertions(+), 8 deletions(-) diff --git a/drivers/of/unittest.c b/drivers/of/unittest.c index 2a547ca3d443..2eac3df7dd29 100644 --- a/drivers/of/unittest.c +++ b/drivers/of/unittest.c @@ -553,6 +553,9 @@ static void __init of_unittest_parse_interrupts(void) struct of_phandle_args args; int i, rc; + if (of_irq_workarounds & OF_IMAP_OLDWORLD_MAC) + return; + np = of_find_node_by_path("/testcase-data/interrupts/interrupts0"); if (!np) { pr_err("missing testcase data\n"); @@ -627,6 +630,9 @@ static void __init of_unittest_parse_interrupts_extended(void) struct of_phandle_args args; int i, rc; + if (of_irq_workarounds & OF_IMAP_OLDWORLD_MAC) + return; + np = of_find_node_by_path("/testcase-data/interrupts/interrupts-extended0"); if (!np) { pr_err("missing testcase data\n"); @@ -778,15 +784,19 @@ static void __init of_unittest_platform_populate(void) pdev = of_find_device_by_node(np); unittest(pdev, "device 1 creation failed\n"); - irq = platform_get_irq(pdev, 0); - unittest(irq == -EPROBE_DEFER, "device deferred probe failed - %d\n", irq); + if (!(of_irq_workarounds & OF_IMAP_OLDWORLD_MAC)) { + irq = platform_get_irq(pdev, 0); + unittest(irq == -EPROBE_DEFER, + "device deferred probe failed - %d\n", irq); - /* Test that a parsing failure does not return -EPROBE_DEFER */ - np = of_find_node_by_path("/testcase-data/testcase-device2"); - pdev = of_find_device_by_node(np); - unittest(pdev, "device 2 creation failed\n"); - irq = platform_get_irq(pdev, 0); - unittest(irq < 0 && irq != -EPROBE_DEFER, "device parsing error failed - %d\n", irq); + /* Test that a parsing failure does not return -EPROBE_DEFER */ + np = of_find_node_by_path("/testcase-data/testcase-device2"); + pdev = of_find_device_by_node(np); + unittest(pdev, "device 2 creation failed\n"); + irq = platform_get_irq(pdev, 0); + unittest(irq < 0 && irq != -EPROBE_DEFER, + "device parsing error failed - %d\n", irq); + } np = of_find_node_by_path("/testcase-data/platform-tests"); unittest(np, "No testcase data in device tree\n"); From fb751efb29d037ee051f326b6f622881ca0b9a14 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Wed, 13 Jun 2018 00:51:28 -0400 Subject: [PATCH 126/812] ext4: always verify the magic number in xattr blocks commit 513f86d73855ce556ea9522b6bfd79f87356dc3a upstream. If there an inode points to a block which is also some other type of metadata block (such as a block allocation bitmap), the buffer_verified flag can be set when it was validated as that other metadata block type; however, it would make a really terrible external attribute block. The reason why we use the verified flag is to avoid constantly reverifying the block. However, it doesn't take much overhead to make sure the magic number of the xattr block is correct, and this will avoid potential crashes. This addresses CVE-2018-10879. https://bugzilla.kernel.org/show_bug.cgi?id=200001 Signed-off-by: Theodore Ts'o Reviewed-by: Andreas Dilger [Backported to 4.4: adjust context] Signed-off-by: Daniel Rosenberg Signed-off-by: Greg Kroah-Hartman --- fs/ext4/xattr.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c index b51bb73b06a6..d0aaf338fa9f 100644 --- a/fs/ext4/xattr.c +++ b/fs/ext4/xattr.c @@ -220,12 +220,12 @@ ext4_xattr_check_block(struct inode *inode, struct buffer_head *bh) { int error; - if (buffer_verified(bh)) - return 0; - if (BHDR(bh)->h_magic != cpu_to_le32(EXT4_XATTR_MAGIC) || BHDR(bh)->h_blocks != cpu_to_le32(1)) return -EFSCORRUPTED; + if (buffer_verified(bh)) + return 0; + if (!ext4_xattr_block_csum_verify(inode, bh->b_blocknr, BHDR(bh))) return -EFSBADCRC; error = ext4_xattr_check_names(BFIRST(bh), bh->b_data + bh->b_size, From 75fe5488b22f10844334c0960b12b2cf5c606028 Mon Sep 17 00:00:00 2001 From: Prateek Sood Date: Tue, 19 Dec 2017 12:56:57 +0530 Subject: [PATCH 127/812] cgroup: Fix deadlock in cpu hotplug path commit 116d2f7496c51b2e02e8e4ecdd2bdf5fb9d5a641 upstream. Deadlock during cgroup migration from cpu hotplug path when a task T is being moved from source to destination cgroup. kworker/0:0 cpuset_hotplug_workfn() cpuset_hotplug_update_tasks() hotplug_update_tasks_legacy() remove_tasks_in_empty_cpuset() cgroup_transfer_tasks() // stuck in iterator loop cgroup_migrate() cgroup_migrate_add_task() In cgroup_migrate_add_task() it checks for PF_EXITING flag of task T. Task T will not migrate to destination cgroup. css_task_iter_start() will keep pointing to task T in loop waiting for task T cg_list node to be removed. Task T do_exit() exit_signals() // sets PF_EXITING exit_task_namespaces() switch_task_namespaces() free_nsproxy() put_mnt_ns() drop_collected_mounts() namespace_unlock() synchronize_rcu() _synchronize_rcu_expedited() schedule_work() // on cpu0 low priority worker pool wait_event() // waiting for work item to execute Task T inserted a work item in the worklist of cpu0 low priority worker pool. It is waiting for expedited grace period work item to execute. This work item will only be executed once kworker/0:0 complete execution of cpuset_hotplug_workfn(). kworker/0:0 ==> Task T ==>kworker/0:0 In case of PF_EXITING task being migrated from source to destination cgroup, migrate next available task in source cgroup. Signed-off-by: Prateek Sood Signed-off-by: Tejun Heo [AmitP: Upstream commit cherry-pick failed, so I picked the backported changes from CAF/msm-4.9 tree instead: https://source.codeaurora.org/quic/la/kernel/msm-4.9/commit/?id=49b74f1696417b270c89cd893ca9f37088928078] Signed-off-by: Amit Pundir Signed-off-by: Greg Kroah-Hartman --- kernel/cgroup.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 4cb94b678e9f..5299618d6308 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -4083,7 +4083,11 @@ int cgroup_transfer_tasks(struct cgroup *to, struct cgroup *from) */ do { css_task_iter_start(&from->self, &it); - task = css_task_iter_next(&it); + + do { + task = css_task_iter_next(&it); + } while (task && (task->flags & PF_EXITING)); + if (task) get_task_struct(task); css_task_iter_end(&it); From 023fdb64ee8d171f691a97bffa1d168bef750c7f Mon Sep 17 00:00:00 2001 From: Carl Huang Date: Mon, 5 Mar 2018 14:44:02 +0800 Subject: [PATCH 128/812] ath10k: fix use-after-free in ath10k_wmi_cmd_send_nowait commit 9ef0f58ed7b4a55da4a64641d538e0d9e46579ac upstream. The skb may be freed in tx completion context before trace_ath10k_wmi_cmd is called. This can be easily captured when KASAN(Kernel Address Sanitizer) is enabled. The fix is to move trace_ath10k_wmi_cmd before the send operation. As the ret has no meaning in trace_ath10k_wmi_cmd then, so remove this parameter too. Signed-off-by: Carl Huang Tested-by: Brian Norris Reviewed-by: Brian Norris Signed-off-by: Kalle Valo Signed-off-by: Amit Pundir Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/ath/ath10k/trace.h | 12 ++++-------- drivers/net/wireless/ath/ath10k/wmi.c | 2 +- 2 files changed, 5 insertions(+), 9 deletions(-) diff --git a/drivers/net/wireless/ath/ath10k/trace.h b/drivers/net/wireless/ath/ath10k/trace.h index 71bdb368813d..0194bebbdbf7 100644 --- a/drivers/net/wireless/ath/ath10k/trace.h +++ b/drivers/net/wireless/ath/ath10k/trace.h @@ -152,10 +152,9 @@ TRACE_EVENT(ath10k_log_dbg_dump, ); TRACE_EVENT(ath10k_wmi_cmd, - TP_PROTO(struct ath10k *ar, int id, const void *buf, size_t buf_len, - int ret), + TP_PROTO(struct ath10k *ar, int id, const void *buf, size_t buf_len), - TP_ARGS(ar, id, buf, buf_len, ret), + TP_ARGS(ar, id, buf, buf_len), TP_STRUCT__entry( __string(device, dev_name(ar->dev)) @@ -163,7 +162,6 @@ TRACE_EVENT(ath10k_wmi_cmd, __field(unsigned int, id) __field(size_t, buf_len) __dynamic_array(u8, buf, buf_len) - __field(int, ret) ), TP_fast_assign( @@ -171,17 +169,15 @@ TRACE_EVENT(ath10k_wmi_cmd, __assign_str(driver, dev_driver_string(ar->dev)); __entry->id = id; __entry->buf_len = buf_len; - __entry->ret = ret; memcpy(__get_dynamic_array(buf), buf, buf_len); ), TP_printk( - "%s %s id %d len %zu ret %d", + "%s %s id %d len %zu", __get_str(driver), __get_str(device), __entry->id, - __entry->buf_len, - __entry->ret + __entry->buf_len ) ); diff --git a/drivers/net/wireless/ath/ath10k/wmi.c b/drivers/net/wireless/ath/ath10k/wmi.c index 7569db0f69b5..5bb1be478954 100644 --- a/drivers/net/wireless/ath/ath10k/wmi.c +++ b/drivers/net/wireless/ath/ath10k/wmi.c @@ -1642,8 +1642,8 @@ int ath10k_wmi_cmd_send_nowait(struct ath10k *ar, struct sk_buff *skb, cmd_hdr->cmd_id = __cpu_to_le32(cmd); memset(skb_cb, 0, sizeof(*skb_cb)); + trace_ath10k_wmi_cmd(ar, cmd_id, skb->data, skb->len); ret = ath10k_htc_send(&ar->htc, ar->wmi.eid, skb); - trace_ath10k_wmi_cmd(ar, cmd_id, skb->data, skb->len, ret); if (ret) goto err_pull; From 0cbf366a69887b623108f0a20f8850331a7ffac8 Mon Sep 17 00:00:00 2001 From: Michal Suchanek Date: Sat, 27 May 2017 17:46:15 +0200 Subject: [PATCH 129/812] powerpc/fadump: Return error when fadump registration fails commit 98b8cd7f75643e0a442d7a4c1cef2c9d53b7e92b upstream. - log an error message when registration fails and no error code listed in the switch is returned - translate the hv error code to posix error code and return it from fw_register - return the posix error code from fw_register to the process writing to sysfs - return EEXIST on re-registration - return success on deregistration when fadump is not registered - return ENODEV when no memory is reserved for fadump Signed-off-by: Michal Suchanek Tested-by: Hari Bathini [mpe: Use pr_err() to shrink the error print] Signed-off-by: Michael Ellerman Cc: Kleber Sacilotto de Souza Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/kernel/fadump.c | 23 +++++++++++++++-------- 1 file changed, 15 insertions(+), 8 deletions(-) diff --git a/arch/powerpc/kernel/fadump.c b/arch/powerpc/kernel/fadump.c index c3c835290131..ca3ad5ebcd41 100644 --- a/arch/powerpc/kernel/fadump.c +++ b/arch/powerpc/kernel/fadump.c @@ -360,9 +360,9 @@ static int __init early_fadump_reserve_mem(char *p) } early_param("fadump_reserve_mem", early_fadump_reserve_mem); -static void register_fw_dump(struct fadump_mem_struct *fdm) +static int register_fw_dump(struct fadump_mem_struct *fdm) { - int rc; + int rc, err; unsigned int wait_time; pr_debug("Registering for firmware-assisted kernel dump...\n"); @@ -379,7 +379,11 @@ static void register_fw_dump(struct fadump_mem_struct *fdm) } while (wait_time); + err = -EIO; switch (rc) { + default: + pr_err("Failed to register. Unknown Error(%d).\n", rc); + break; case -1: printk(KERN_ERR "Failed to register firmware-assisted kernel" " dump. Hardware Error(%d).\n", rc); @@ -387,18 +391,22 @@ static void register_fw_dump(struct fadump_mem_struct *fdm) case -3: printk(KERN_ERR "Failed to register firmware-assisted kernel" " dump. Parameter Error(%d).\n", rc); + err = -EINVAL; break; case -9: printk(KERN_ERR "firmware-assisted kernel dump is already " " registered."); fw_dump.dump_registered = 1; + err = -EEXIST; break; case 0: printk(KERN_INFO "firmware-assisted kernel dump registration" " is successful\n"); fw_dump.dump_registered = 1; + err = 0; break; } + return err; } void crash_fadump(struct pt_regs *regs, const char *str) @@ -997,7 +1005,7 @@ static unsigned long init_fadump_header(unsigned long addr) return addr; } -static void register_fadump(void) +static int register_fadump(void) { unsigned long addr; void *vaddr; @@ -1008,7 +1016,7 @@ static void register_fadump(void) * assisted dump. */ if (!fw_dump.reserve_dump_area_size) - return; + return -ENODEV; ret = fadump_setup_crash_memory_ranges(); if (ret) @@ -1023,7 +1031,7 @@ static void register_fadump(void) fadump_create_elfcore_headers(vaddr); /* register the future kernel dump with firmware. */ - register_fw_dump(&fdm); + return register_fw_dump(&fdm); } static int fadump_unregister_dump(struct fadump_mem_struct *fdm) @@ -1208,7 +1216,6 @@ static ssize_t fadump_register_store(struct kobject *kobj, switch (buf[0]) { case '0': if (fw_dump.dump_registered == 0) { - ret = -EINVAL; goto unlock_out; } /* Un-register Firmware-assisted dump */ @@ -1216,11 +1223,11 @@ static ssize_t fadump_register_store(struct kobject *kobj, break; case '1': if (fw_dump.dump_registered == 1) { - ret = -EINVAL; + ret = -EEXIST; goto unlock_out; } /* Register Firmware-assisted dump */ - register_fadump(); + ret = register_fadump(); break; default: ret = -EINVAL; From b4dd80c33338df403868222045d6427295ff54e5 Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Fri, 5 Oct 2018 12:48:48 -0700 Subject: [PATCH 130/812] ARC: clone syscall to setp r25 as thread pointer commit c58a584f05e35d1d4342923cd7aac07d9c3d3d16 upstream. Per ARC TLS ABI, r25 is designated TP (thread pointer register). However so far kernel didn't do any special treatment, like setting up usermode r25, even for CLONE_SETTLS. We instead relied on libc runtime to do this, in say clone libc wrapper [1]. This was deliberate to keep kernel ABI agnostic (userspace could potentially change TP, specially for different ARC ISA say ARCompact vs. ARCv2 with different spare registers etc) However userspace setting up r25, after clone syscall opens a race, if child is not scheduled and gets a signal instead. It starts off in userspace not in clone but in a signal handler and anything TP sepcific there such as pthread_self() fails which showed up with uClibc testsuite nptl/tst-kill6 [2] Fix this by having kernel populate r25 to TP value. So this locks in ABI, but it was not going to change anyways, and fwiw is same for both ARCompact (arc700 core) and ARCvs (HS3x cores) [1] https://cgit.uclibc-ng.org/cgi/cgit/uclibc-ng.git/tree/libc/sysdeps/linux/arc/clone.S [2] https://github.com/wbx-github/uclibc-ng-test/blob/master/test/nptl/tst-kill6.c Fixes: ARC STAR 9001378481 Cc: stable@vger.kernel.org Reported-by: Nikita Sobolev Signed-off-by: Vineet Gupta Signed-off-by: Greg Kroah-Hartman --- arch/arc/kernel/process.c | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/arch/arc/kernel/process.c b/arch/arc/kernel/process.c index a3f750e76b68..8f40c6c5d77e 100644 --- a/arch/arc/kernel/process.c +++ b/arch/arc/kernel/process.c @@ -153,6 +153,26 @@ int copy_thread(unsigned long clone_flags, task_thread_info(current)->thr_ptr; } + + /* + * setup usermode thread pointer #1: + * when child is picked by scheduler, __switch_to() uses @c_callee to + * populate usermode callee regs: this works (despite being in a kernel + * function) since special return path for child @ret_from_fork() + * ensures those regs are not clobbered all the way to RTIE to usermode + */ + c_callee->r25 = task_thread_info(p)->thr_ptr; + +#ifdef CONFIG_ARC_CURR_IN_REG + /* + * setup usermode thread pointer #2: + * however for this special use of r25 in kernel, __switch_to() sets + * r25 for kernel needs and only in the final return path is usermode + * r25 setup, from pt_regs->user_r25. So set that up as well + */ + c_regs->user_r25 = c_callee->r25; +#endif + return 0; } From 38ea605f919c61fc0c3ca65abeaa87ca83f2110e Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Wed, 12 Sep 2018 16:27:44 -0700 Subject: [PATCH 131/812] ucma: fix a use-after-free in ucma_resolve_ip() commit 5fe23f262e0548ca7f19fb79f89059a60d087d22 upstream. There is a race condition between ucma_close() and ucma_resolve_ip(): CPU0 CPU1 ucma_resolve_ip(): ucma_close(): ctx = ucma_get_ctx(file, cmd.id); list_for_each_entry_safe(ctx, tmp, &file->ctx_list, list) { mutex_lock(&mut); idr_remove(&ctx_idr, ctx->id); mutex_unlock(&mut); ... mutex_lock(&mut); if (!ctx->closing) { mutex_unlock(&mut); rdma_destroy_id(ctx->cm_id); ... ucma_free_ctx(ctx); ret = rdma_resolve_addr(); ucma_put_ctx(ctx); Before idr_remove(), ucma_get_ctx() could still find the ctx and after rdma_destroy_id(), rdma_resolve_addr() may still access id_priv pointer. Also, ucma_put_ctx() may use ctx after ucma_free_ctx() too. ucma_close() should call ucma_put_ctx() too which tests the refcnt and waits for the last one releasing it. The similar pattern is already used by ucma_destroy_id(). Reported-and-tested-by: syzbot+da2591e115d57a9cbb8b@syzkaller.appspotmail.com Reported-by: syzbot+cfe3c1e8ef634ba8964b@syzkaller.appspotmail.com Cc: Jason Gunthorpe Cc: Doug Ledford Cc: Leon Romanovsky Signed-off-by: Cong Wang Reviewed-by: Leon Romanovsky Signed-off-by: Doug Ledford Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/core/ucma.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/infiniband/core/ucma.c b/drivers/infiniband/core/ucma.c index 9712a63957e1..7525e9f6949e 100644 --- a/drivers/infiniband/core/ucma.c +++ b/drivers/infiniband/core/ucma.c @@ -1709,6 +1709,8 @@ static int ucma_close(struct inode *inode, struct file *filp) mutex_lock(&mut); if (!ctx->closing) { mutex_unlock(&mut); + ucma_put_ctx(ctx); + wait_for_completion(&ctx->comp); /* rdma_destroy_id ensures that no event handlers are * inflight for that id before releasing it. */ From c4c84454902516b7648a18d5173304da7dffa9d6 Mon Sep 17 00:00:00 2001 From: Richard Weinberger Date: Mon, 3 Sep 2018 23:06:23 +0200 Subject: [PATCH 132/812] ubifs: Check for name being NULL while mounting commit 37f31b6ca4311b94d985fb398a72e5399ad57925 upstream. The requested device name can be NULL or an empty string. Check for that and refuse to continue. UBIFS has to do this manually since we cannot use mount_bdev(), which checks for this condition. Fixes: 1e51764a3c2ac ("UBIFS: add new flash file system") Reported-by: syzbot+38bd0f7865e5c6379280@syzkaller.appspotmail.com Signed-off-by: Richard Weinberger Signed-off-by: Greg Kroah-Hartman --- fs/ubifs/super.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c index 0bb6de356451..7968b7a5e787 100644 --- a/fs/ubifs/super.c +++ b/fs/ubifs/super.c @@ -1918,6 +1918,9 @@ static struct ubi_volume_desc *open_ubi(const char *name, int mode) int dev, vol; char *endptr; + if (!name || !*name) + return ERR_PTR(-EINVAL); + /* First, try to open using the device node path method */ ubi = ubi_open_volume_path(name, mode); if (!IS_ERR(ubi)) From ec7055c62714326c56dabcf7757069ac7f276bda Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 14 Sep 2018 16:24:05 +0800 Subject: [PATCH 133/812] tcp: increment sk_drops for dropped rx packets [ Upstream commit 532182cd610782db8c18230c2747626562032205 ] Now ss can report sk_drops, we can instruct TCP to increment this per socket counter when it drops an incoming frame, to refine monitoring and debugging. Following patch takes care of listeners drops. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller Signed-off-by: Mao Wenan Signed-off-by: Greg Kroah-Hartman --- include/net/sock.h | 7 +++++++ net/ipv4/tcp_input.c | 33 ++++++++++++++++++++------------- net/ipv4/tcp_ipv4.c | 1 + net/ipv6/tcp_ipv6.c | 1 + 4 files changed, 29 insertions(+), 13 deletions(-) diff --git a/include/net/sock.h b/include/net/sock.h index 3d5ff7436f41..577075713ad5 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -2139,6 +2139,13 @@ sock_skb_set_dropcount(const struct sock *sk, struct sk_buff *skb) SOCK_SKB_CB(skb)->dropcount = atomic_read(&sk->sk_drops); } +static inline void sk_drops_add(struct sock *sk, const struct sk_buff *skb) +{ + int segs = max_t(u16, 1, skb_shinfo(skb)->gso_segs); + + atomic_add(segs, &sk->sk_drops); +} + void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk, struct sk_buff *skb); void __sock_recv_wifi_status(struct msghdr *msg, struct sock *sk, diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 9c4c6cd0316e..9df4cbb9c25a 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -4296,6 +4296,12 @@ static bool tcp_try_coalesce(struct sock *sk, return true; } +static void tcp_drop(struct sock *sk, struct sk_buff *skb) +{ + sk_drops_add(sk, skb); + __kfree_skb(skb); +} + /* This one checks to see if we can put data from the * out_of_order queue into the receive_queue. */ @@ -4320,7 +4326,7 @@ static void tcp_ofo_queue(struct sock *sk) __skb_unlink(skb, &tp->out_of_order_queue); if (!after(TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt)) { SOCK_DEBUG(sk, "ofo packet was already received\n"); - __kfree_skb(skb); + tcp_drop(sk, skb); continue; } SOCK_DEBUG(sk, "ofo requeuing : rcv_next %X seq %X - %X\n", @@ -4372,7 +4378,7 @@ static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb) if (unlikely(tcp_try_rmem_schedule(sk, skb, skb->truesize))) { NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPOFODROP); - __kfree_skb(skb); + tcp_drop(sk, skb); return; } @@ -4436,7 +4442,7 @@ static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb) if (!after(end_seq, TCP_SKB_CB(skb1)->end_seq)) { /* All the bits are present. Drop. */ NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPOFOMERGE); - __kfree_skb(skb); + tcp_drop(sk, skb); skb = NULL; tcp_dsack_set(sk, seq, end_seq); goto add_sack; @@ -4475,7 +4481,7 @@ static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb) tcp_dsack_extend(sk, TCP_SKB_CB(skb1)->seq, TCP_SKB_CB(skb1)->end_seq); NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPOFOMERGE); - __kfree_skb(skb1); + tcp_drop(sk, skb1); } add_sack: @@ -4558,12 +4564,13 @@ int tcp_send_rcvq(struct sock *sk, struct msghdr *msg, size_t size) static void tcp_data_queue(struct sock *sk, struct sk_buff *skb) { struct tcp_sock *tp = tcp_sk(sk); - int eaten = -1; bool fragstolen = false; + int eaten = -1; - if (TCP_SKB_CB(skb)->seq == TCP_SKB_CB(skb)->end_seq) - goto drop; - + if (TCP_SKB_CB(skb)->seq == TCP_SKB_CB(skb)->end_seq) { + __kfree_skb(skb); + return; + } skb_dst_drop(skb); __skb_pull(skb, tcp_hdr(skb)->doff * 4); @@ -4645,7 +4652,7 @@ static void tcp_data_queue(struct sock *sk, struct sk_buff *skb) tcp_enter_quickack_mode(sk, TCP_MAX_QUICKACKS); inet_csk_schedule_ack(sk); drop: - __kfree_skb(skb); + tcp_drop(sk, skb); return; } @@ -5236,7 +5243,7 @@ static bool tcp_validate_incoming(struct sock *sk, struct sk_buff *skb, return true; discard: - __kfree_skb(skb); + tcp_drop(sk, skb); return false; } @@ -5454,7 +5461,7 @@ void tcp_rcv_established(struct sock *sk, struct sk_buff *skb, TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_INERRS); discard: - __kfree_skb(skb); + tcp_drop(sk, skb); } EXPORT_SYMBOL(tcp_rcv_established); @@ -5684,7 +5691,7 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb, TCP_DELACK_MAX, TCP_RTO_MAX); discard: - __kfree_skb(skb); + tcp_drop(sk, skb); return 0; } else { tcp_send_ack(sk); @@ -6041,7 +6048,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb) if (!queued) { discard: - __kfree_skb(skb); + tcp_drop(sk, skb); } return 0; } diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index eeda67c3dd11..01715fc75169 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1716,6 +1716,7 @@ int tcp_v4_rcv(struct sk_buff *skb) return 0; discard_and_relse: + sk_drops_add(sk, skb); sock_put(sk); goto discard_it; diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 90abe88e1b40..d6c191158e07 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1505,6 +1505,7 @@ static int tcp_v6_rcv(struct sk_buff *skb) return 0; discard_and_relse: + sk_drops_add(sk, skb); sock_put(sk); goto discard_it; From 4666b6e2b27d91e05a5b8459e40e4a05dbc1c7b0 Mon Sep 17 00:00:00 2001 From: Yaogong Wang Date: Fri, 14 Sep 2018 16:24:06 +0800 Subject: [PATCH 134/812] tcp: use an RB tree for ooo receive queue MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 9f5afeae51526b3ad7b7cb21ee8b145ce6ea7a7a ] Over the years, TCP BDP has increased by several orders of magnitude, and some people are considering to reach the 2 Gbytes limit. Even with current window scale limit of 14, ~1 Gbytes maps to ~740,000 MSS. In presence of packet losses (or reorders), TCP stores incoming packets into an out of order queue, and number of skbs sitting there waiting for the missing packets to be received can be in the 10^5 range. Most packets are appended to the tail of this queue, and when packets can finally be transferred to receive queue, we scan the queue from its head. However, in presence of heavy losses, we might have to find an arbitrary point in this queue, involving a linear scan for every incoming packet, throwing away cpu caches. This patch converts it to a RB tree, to get bounded latencies. Yaogong wrote a preliminary patch about 2 years ago. Eric did the rebase, added ofo_last_skb cache, polishing and tests. Tested with network dropping between 1 and 10 % packets, with good success (about 30 % increase of throughput in stress tests) Next step would be to also use an RB tree for the write queue at sender side ;) Signed-off-by: Yaogong Wang Signed-off-by: Eric Dumazet Cc: Yuchung Cheng Cc: Neal Cardwell Cc: Ilpo Järvinen Acked-By: Ilpo Järvinen Signed-off-by: David S. Miller Signed-off-by: Mao Wenan Signed-off-by: Greg Kroah-Hartman --- include/linux/skbuff.h | 8 + include/linux/tcp.h | 7 +- include/net/tcp.h | 2 +- net/core/skbuff.c | 19 ++ net/ipv4/tcp.c | 4 +- net/ipv4/tcp_input.c | 362 +++++++++++++++++++++++---------------- net/ipv4/tcp_ipv4.c | 2 +- net/ipv4/tcp_minisocks.c | 1 - 8 files changed, 244 insertions(+), 161 deletions(-) diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index c28bd8be290a..a490dd718654 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -2273,6 +2273,8 @@ static inline void __skb_queue_purge(struct sk_buff_head *list) kfree_skb(skb); } +void skb_rbtree_purge(struct rb_root *root); + void *netdev_alloc_frag(unsigned int fragsz); struct sk_buff *__netdev_alloc_skb(struct net_device *dev, unsigned int length, @@ -2807,6 +2809,12 @@ static inline int pskb_trim_rcsum(struct sk_buff *skb, unsigned int len) return __pskb_trim(skb, len); } +#define rb_to_skb(rb) rb_entry_safe(rb, struct sk_buff, rbnode) +#define skb_rb_first(root) rb_to_skb(rb_first(root)) +#define skb_rb_last(root) rb_to_skb(rb_last(root)) +#define skb_rb_next(skb) rb_to_skb(rb_next(&(skb)->rbnode)) +#define skb_rb_prev(skb) rb_to_skb(rb_prev(&(skb)->rbnode)) + #define skb_queue_walk(queue, skb) \ for (skb = (queue)->next; \ skb != (struct sk_buff *)(queue); \ diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 5b6df1a8dc74..747404dbe506 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -279,10 +279,9 @@ struct tcp_sock { struct sk_buff* lost_skb_hint; struct sk_buff *retransmit_skb_hint; - /* OOO segments go in this list. Note that socket lock must be held, - * as we do not use sk_buff_head lock. - */ - struct sk_buff_head out_of_order_queue; + /* OOO segments go in this rbtree. Socket lock must be held. */ + struct rb_root out_of_order_queue; + struct sk_buff *ooo_last_skb; /* cache rb_last(out_of_order_queue) */ /* SACKs data, these 2 need to be together (see tcp_options_write) */ struct tcp_sack_block duplicate_sack[1]; /* D-SACK block */ diff --git a/include/net/tcp.h b/include/net/tcp.h index 6c89238f192e..a99f75ef6a73 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -649,7 +649,7 @@ static inline void tcp_fast_path_check(struct sock *sk) { struct tcp_sock *tp = tcp_sk(sk); - if (skb_queue_empty(&tp->out_of_order_queue) && + if (RB_EMPTY_ROOT(&tp->out_of_order_queue) && tp->rcv_wnd && atomic_read(&sk->sk_rmem_alloc) < sk->sk_rcvbuf && !tp->urg_data) diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 55be076706e5..9703924ed071 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -2377,6 +2377,25 @@ void skb_queue_purge(struct sk_buff_head *list) } EXPORT_SYMBOL(skb_queue_purge); +/** + * skb_rbtree_purge - empty a skb rbtree + * @root: root of the rbtree to empty + * + * Delete all buffers on an &sk_buff rbtree. Each buffer is removed from + * the list and one reference dropped. This function does not take + * any lock. Synchronization should be handled by the caller (e.g., TCP + * out-of-order queue is protected by the socket lock). + */ +void skb_rbtree_purge(struct rb_root *root) +{ + struct sk_buff *skb, *next; + + rbtree_postorder_for_each_entry_safe(skb, next, root, rbnode) + kfree_skb(skb); + + *root = RB_ROOT; +} + /** * skb_queue_head - queue a buffer at the list head * @list: list to use diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 5e162b8ab184..b7492aabe710 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -382,7 +382,7 @@ void tcp_init_sock(struct sock *sk) struct inet_connection_sock *icsk = inet_csk(sk); struct tcp_sock *tp = tcp_sk(sk); - __skb_queue_head_init(&tp->out_of_order_queue); + tp->out_of_order_queue = RB_ROOT; tcp_init_xmit_timers(sk); tcp_prequeue_init(tp); INIT_LIST_HEAD(&tp->tsq_node); @@ -2240,7 +2240,7 @@ int tcp_disconnect(struct sock *sk, int flags) tcp_clear_xmit_timers(sk); __skb_queue_purge(&sk->sk_receive_queue); tcp_write_queue_purge(sk); - __skb_queue_purge(&tp->out_of_order_queue); + skb_rbtree_purge(&tp->out_of_order_queue); inet->inet_dport = 0; diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 9df4cbb9c25a..7832d0d107cd 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -4073,7 +4073,7 @@ static void tcp_fin(struct sock *sk) /* It _is_ possible, that we have something out-of-order _after_ FIN. * Probably, we should reset in this case. For now drop them. */ - __skb_queue_purge(&tp->out_of_order_queue); + skb_rbtree_purge(&tp->out_of_order_queue); if (tcp_is_sack(tp)) tcp_sack_reset(&tp->rx_opt); sk_mem_reclaim(sk); @@ -4233,7 +4233,7 @@ static void tcp_sack_remove(struct tcp_sock *tp) int this_sack; /* Empty ofo queue, hence, all the SACKs are eaten. Clear. */ - if (skb_queue_empty(&tp->out_of_order_queue)) { + if (RB_EMPTY_ROOT(&tp->out_of_order_queue)) { tp->rx_opt.num_sacks = 0; return; } @@ -4309,10 +4309,13 @@ static void tcp_ofo_queue(struct sock *sk) { struct tcp_sock *tp = tcp_sk(sk); __u32 dsack_high = tp->rcv_nxt; + bool fin, fragstolen, eaten; struct sk_buff *skb, *tail; - bool fragstolen, eaten; + struct rb_node *p; - while ((skb = skb_peek(&tp->out_of_order_queue)) != NULL) { + p = rb_first(&tp->out_of_order_queue); + while (p) { + skb = rb_entry(p, struct sk_buff, rbnode); if (after(TCP_SKB_CB(skb)->seq, tp->rcv_nxt)) break; @@ -4322,9 +4325,10 @@ static void tcp_ofo_queue(struct sock *sk) dsack_high = TCP_SKB_CB(skb)->end_seq; tcp_dsack_extend(sk, TCP_SKB_CB(skb)->seq, dsack); } + p = rb_next(p); + rb_erase(&skb->rbnode, &tp->out_of_order_queue); - __skb_unlink(skb, &tp->out_of_order_queue); - if (!after(TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt)) { + if (unlikely(!after(TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt))) { SOCK_DEBUG(sk, "ofo packet was already received\n"); tcp_drop(sk, skb); continue; @@ -4336,12 +4340,19 @@ static void tcp_ofo_queue(struct sock *sk) tail = skb_peek_tail(&sk->sk_receive_queue); eaten = tail && tcp_try_coalesce(sk, tail, skb, &fragstolen); tcp_rcv_nxt_update(tp, TCP_SKB_CB(skb)->end_seq); + fin = TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN; if (!eaten) __skb_queue_tail(&sk->sk_receive_queue, skb); - if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN) - tcp_fin(sk); - if (eaten) + else kfree_skb_partial(skb, fragstolen); + + if (unlikely(fin)) { + tcp_fin(sk); + /* tcp_fin() purges tp->out_of_order_queue, + * so we must end this loop right now. + */ + break; + } } } @@ -4371,8 +4382,10 @@ static int tcp_try_rmem_schedule(struct sock *sk, struct sk_buff *skb, static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb) { struct tcp_sock *tp = tcp_sk(sk); + struct rb_node **p, *q, *parent; struct sk_buff *skb1; u32 seq, end_seq; + bool fragstolen; tcp_ecn_check_ce(sk, skb); @@ -4387,89 +4400,86 @@ static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb) inet_csk_schedule_ack(sk); NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPOFOQUEUE); + seq = TCP_SKB_CB(skb)->seq; + end_seq = TCP_SKB_CB(skb)->end_seq; SOCK_DEBUG(sk, "out of order segment: rcv_next %X seq %X - %X\n", - tp->rcv_nxt, TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq); + tp->rcv_nxt, seq, end_seq); - skb1 = skb_peek_tail(&tp->out_of_order_queue); - if (!skb1) { + p = &tp->out_of_order_queue.rb_node; + if (RB_EMPTY_ROOT(&tp->out_of_order_queue)) { /* Initial out of order segment, build 1 SACK. */ if (tcp_is_sack(tp)) { tp->rx_opt.num_sacks = 1; - tp->selective_acks[0].start_seq = TCP_SKB_CB(skb)->seq; - tp->selective_acks[0].end_seq = - TCP_SKB_CB(skb)->end_seq; + tp->selective_acks[0].start_seq = seq; + tp->selective_acks[0].end_seq = end_seq; } - __skb_queue_head(&tp->out_of_order_queue, skb); + rb_link_node(&skb->rbnode, NULL, p); + rb_insert_color(&skb->rbnode, &tp->out_of_order_queue); + tp->ooo_last_skb = skb; goto end; } - seq = TCP_SKB_CB(skb)->seq; - end_seq = TCP_SKB_CB(skb)->end_seq; - - if (seq == TCP_SKB_CB(skb1)->end_seq) { - bool fragstolen; - - if (!tcp_try_coalesce(sk, skb1, skb, &fragstolen)) { - __skb_queue_after(&tp->out_of_order_queue, skb1, skb); - } else { - tcp_grow_window(sk, skb); - kfree_skb_partial(skb, fragstolen); - skb = NULL; - } - - if (!tp->rx_opt.num_sacks || - tp->selective_acks[0].end_seq != seq) - goto add_sack; - - /* Common case: data arrive in order after hole. */ - tp->selective_acks[0].end_seq = end_seq; - goto end; + /* In the typical case, we are adding an skb to the end of the list. + * Use of ooo_last_skb avoids the O(Log(N)) rbtree lookup. + */ + if (tcp_try_coalesce(sk, tp->ooo_last_skb, skb, &fragstolen)) { +coalesce_done: + tcp_grow_window(sk, skb); + kfree_skb_partial(skb, fragstolen); + skb = NULL; + goto add_sack; } - /* Find place to insert this segment. */ - while (1) { - if (!after(TCP_SKB_CB(skb1)->seq, seq)) - break; - if (skb_queue_is_first(&tp->out_of_order_queue, skb1)) { - skb1 = NULL; - break; + /* Find place to insert this segment. Handle overlaps on the way. */ + parent = NULL; + while (*p) { + parent = *p; + skb1 = rb_entry(parent, struct sk_buff, rbnode); + if (before(seq, TCP_SKB_CB(skb1)->seq)) { + p = &parent->rb_left; + continue; } - skb1 = skb_queue_prev(&tp->out_of_order_queue, skb1); + + if (before(seq, TCP_SKB_CB(skb1)->end_seq)) { + if (!after(end_seq, TCP_SKB_CB(skb1)->end_seq)) { + /* All the bits are present. Drop. */ + NET_INC_STATS(sock_net(sk), + LINUX_MIB_TCPOFOMERGE); + __kfree_skb(skb); + skb = NULL; + tcp_dsack_set(sk, seq, end_seq); + goto add_sack; + } + if (after(seq, TCP_SKB_CB(skb1)->seq)) { + /* Partial overlap. */ + tcp_dsack_set(sk, seq, TCP_SKB_CB(skb1)->end_seq); + } else { + /* skb's seq == skb1's seq and skb covers skb1. + * Replace skb1 with skb. + */ + rb_replace_node(&skb1->rbnode, &skb->rbnode, + &tp->out_of_order_queue); + tcp_dsack_extend(sk, + TCP_SKB_CB(skb1)->seq, + TCP_SKB_CB(skb1)->end_seq); + NET_INC_STATS(sock_net(sk), + LINUX_MIB_TCPOFOMERGE); + __kfree_skb(skb1); + goto add_sack; + } + } else if (tcp_try_coalesce(sk, skb1, skb, &fragstolen)) { + goto coalesce_done; + } + p = &parent->rb_right; } - /* Do skb overlap to previous one? */ - if (skb1 && before(seq, TCP_SKB_CB(skb1)->end_seq)) { - if (!after(end_seq, TCP_SKB_CB(skb1)->end_seq)) { - /* All the bits are present. Drop. */ - NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPOFOMERGE); - tcp_drop(sk, skb); - skb = NULL; - tcp_dsack_set(sk, seq, end_seq); - goto add_sack; - } - if (after(seq, TCP_SKB_CB(skb1)->seq)) { - /* Partial overlap. */ - tcp_dsack_set(sk, seq, - TCP_SKB_CB(skb1)->end_seq); - } else { - if (skb_queue_is_first(&tp->out_of_order_queue, - skb1)) - skb1 = NULL; - else - skb1 = skb_queue_prev( - &tp->out_of_order_queue, - skb1); - } - } - if (!skb1) - __skb_queue_head(&tp->out_of_order_queue, skb); - else - __skb_queue_after(&tp->out_of_order_queue, skb1, skb); - - /* And clean segments covered by new one as whole. */ - while (!skb_queue_is_last(&tp->out_of_order_queue, skb)) { - skb1 = skb_queue_next(&tp->out_of_order_queue, skb); + /* Insert segment into RB tree. */ + rb_link_node(&skb->rbnode, parent, p); + rb_insert_color(&skb->rbnode, &tp->out_of_order_queue); + /* Remove other segments covered by skb. */ + while ((q = rb_next(&skb->rbnode)) != NULL) { + skb1 = rb_entry(q, struct sk_buff, rbnode); if (!after(end_seq, TCP_SKB_CB(skb1)->seq)) break; if (before(end_seq, TCP_SKB_CB(skb1)->end_seq)) { @@ -4477,12 +4487,15 @@ static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb) end_seq); break; } - __skb_unlink(skb1, &tp->out_of_order_queue); + rb_erase(&skb1->rbnode, &tp->out_of_order_queue); tcp_dsack_extend(sk, TCP_SKB_CB(skb1)->seq, TCP_SKB_CB(skb1)->end_seq); NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPOFOMERGE); tcp_drop(sk, skb1); } + /* If there is no skb after us, we are the last_skb ! */ + if (!q) + tp->ooo_last_skb = skb; add_sack: if (tcp_is_sack(tp)) @@ -4621,13 +4634,13 @@ static void tcp_data_queue(struct sock *sk, struct sk_buff *skb) if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN) tcp_fin(sk); - if (!skb_queue_empty(&tp->out_of_order_queue)) { + if (!RB_EMPTY_ROOT(&tp->out_of_order_queue)) { tcp_ofo_queue(sk); /* RFC2581. 4.2. SHOULD send immediate ACK, when * gap in queue is filled. */ - if (skb_queue_empty(&tp->out_of_order_queue)) + if (RB_EMPTY_ROOT(&tp->out_of_order_queue)) inet_csk(sk)->icsk_ack.pingpong = 0; } @@ -4679,48 +4692,76 @@ static void tcp_data_queue(struct sock *sk, struct sk_buff *skb) tcp_data_queue_ofo(sk, skb); } -static struct sk_buff *tcp_collapse_one(struct sock *sk, struct sk_buff *skb, - struct sk_buff_head *list) +static struct sk_buff *tcp_skb_next(struct sk_buff *skb, struct sk_buff_head *list) { - struct sk_buff *next = NULL; + if (list) + return !skb_queue_is_last(list, skb) ? skb->next : NULL; - if (!skb_queue_is_last(list, skb)) - next = skb_queue_next(list, skb); + return rb_entry_safe(rb_next(&skb->rbnode), struct sk_buff, rbnode); +} + +static struct sk_buff *tcp_collapse_one(struct sock *sk, struct sk_buff *skb, + struct sk_buff_head *list, + struct rb_root *root) +{ + struct sk_buff *next = tcp_skb_next(skb, list); + + if (list) + __skb_unlink(skb, list); + else + rb_erase(&skb->rbnode, root); - __skb_unlink(skb, list); __kfree_skb(skb); NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPRCVCOLLAPSED); return next; } +/* Insert skb into rb tree, ordered by TCP_SKB_CB(skb)->seq */ +static void tcp_rbtree_insert(struct rb_root *root, struct sk_buff *skb) +{ + struct rb_node **p = &root->rb_node; + struct rb_node *parent = NULL; + struct sk_buff *skb1; + + while (*p) { + parent = *p; + skb1 = rb_entry(parent, struct sk_buff, rbnode); + if (before(TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb1)->seq)) + p = &parent->rb_left; + else + p = &parent->rb_right; + } + rb_link_node(&skb->rbnode, parent, p); + rb_insert_color(&skb->rbnode, root); +} + /* Collapse contiguous sequence of skbs head..tail with * sequence numbers start..end. * - * If tail is NULL, this means until the end of the list. + * If tail is NULL, this means until the end of the queue. * * Segments with FIN/SYN are not collapsed (only because this * simplifies code) */ static void -tcp_collapse(struct sock *sk, struct sk_buff_head *list, - struct sk_buff *head, struct sk_buff *tail, - u32 start, u32 end) +tcp_collapse(struct sock *sk, struct sk_buff_head *list, struct rb_root *root, + struct sk_buff *head, struct sk_buff *tail, u32 start, u32 end) { - struct sk_buff *skb, *n; + struct sk_buff *skb = head, *n; + struct sk_buff_head tmp; bool end_of_skbs; /* First, check that queue is collapsible and find - * the point where collapsing can be useful. */ - skb = head; + * the point where collapsing can be useful. + */ restart: - end_of_skbs = true; - skb_queue_walk_from_safe(list, skb, n) { - if (skb == tail) - break; + for (end_of_skbs = true; skb != NULL && skb != tail; skb = n) { + n = tcp_skb_next(skb, list); + /* No new bits? It is possible on ofo queue. */ if (!before(start, TCP_SKB_CB(skb)->end_seq)) { - skb = tcp_collapse_one(sk, skb, list); + skb = tcp_collapse_one(sk, skb, list, root); if (!skb) break; goto restart; @@ -4738,13 +4779,10 @@ tcp_collapse(struct sock *sk, struct sk_buff_head *list, break; } - if (!skb_queue_is_last(list, skb)) { - struct sk_buff *next = skb_queue_next(list, skb); - if (next != tail && - TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(next)->seq) { - end_of_skbs = false; - break; - } + if (n && n != tail && + TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(n)->seq) { + end_of_skbs = false; + break; } /* Decided to skip this, advance start seq. */ @@ -4754,17 +4792,22 @@ tcp_collapse(struct sock *sk, struct sk_buff_head *list, (TCP_SKB_CB(skb)->tcp_flags & (TCPHDR_SYN | TCPHDR_FIN))) return; + __skb_queue_head_init(&tmp); + while (before(start, end)) { int copy = min_t(int, SKB_MAX_ORDER(0, 0), end - start); struct sk_buff *nskb; nskb = alloc_skb(copy, GFP_ATOMIC); if (!nskb) - return; + break; memcpy(nskb->cb, skb->cb, sizeof(skb->cb)); TCP_SKB_CB(nskb)->seq = TCP_SKB_CB(nskb)->end_seq = start; - __skb_queue_before(list, skb, nskb); + if (list) + __skb_queue_before(list, skb, nskb); + else + __skb_queue_tail(&tmp, nskb); /* defer rbtree insertion */ skb_set_owner_r(nskb, sk); /* Copy data, releasing collapsed skbs. */ @@ -4782,14 +4825,17 @@ tcp_collapse(struct sock *sk, struct sk_buff_head *list, start += size; } if (!before(start, TCP_SKB_CB(skb)->end_seq)) { - skb = tcp_collapse_one(sk, skb, list); + skb = tcp_collapse_one(sk, skb, list, root); if (!skb || skb == tail || (TCP_SKB_CB(skb)->tcp_flags & (TCPHDR_SYN | TCPHDR_FIN))) - return; + goto end; } } } +end: + skb_queue_walk_safe(&tmp, skb, n) + tcp_rbtree_insert(root, skb); } /* Collapse ofo queue. Algorithm: select contiguous sequence of skbs @@ -4799,34 +4845,39 @@ static void tcp_collapse_ofo_queue(struct sock *sk) { struct tcp_sock *tp = tcp_sk(sk); u32 range_truesize, sum_tiny = 0; - struct sk_buff *skb = skb_peek(&tp->out_of_order_queue); - struct sk_buff *head; + struct sk_buff *skb, *head; + struct rb_node *p; u32 start, end; - if (!skb) + p = rb_first(&tp->out_of_order_queue); + skb = rb_entry_safe(p, struct sk_buff, rbnode); +new_range: + if (!skb) { + p = rb_last(&tp->out_of_order_queue); + /* Note: This is possible p is NULL here. We do not + * use rb_entry_safe(), as ooo_last_skb is valid only + * if rbtree is not empty. + */ + tp->ooo_last_skb = rb_entry(p, struct sk_buff, rbnode); return; - + } start = TCP_SKB_CB(skb)->seq; end = TCP_SKB_CB(skb)->end_seq; range_truesize = skb->truesize; - head = skb; - for (;;) { - struct sk_buff *next = NULL; + for (head = skb;;) { + skb = tcp_skb_next(skb, NULL); - if (!skb_queue_is_last(&tp->out_of_order_queue, skb)) - next = skb_queue_next(&tp->out_of_order_queue, skb); - skb = next; - - /* Segment is terminated when we see gap or when - * we are at the end of all the queue. */ + /* Range is terminated when we see a gap or when + * we are at the queue end. + */ if (!skb || after(TCP_SKB_CB(skb)->seq, end) || before(TCP_SKB_CB(skb)->end_seq, start)) { /* Do not attempt collapsing tiny skbs */ if (range_truesize != head->truesize || end - start >= SKB_WITH_OVERHEAD(SK_MEM_QUANTUM)) { - tcp_collapse(sk, &tp->out_of_order_queue, + tcp_collapse(sk, NULL, &tp->out_of_order_queue, head, skb, start, end); } else { sum_tiny += range_truesize; @@ -4834,20 +4885,14 @@ static void tcp_collapse_ofo_queue(struct sock *sk) return; } - head = skb; - if (!skb) - break; - /* Start new segment */ - start = TCP_SKB_CB(skb)->seq; - end = TCP_SKB_CB(skb)->end_seq; - range_truesize = skb->truesize; - } else { - range_truesize += skb->truesize; - if (before(TCP_SKB_CB(skb)->seq, start)) - start = TCP_SKB_CB(skb)->seq; - if (after(TCP_SKB_CB(skb)->end_seq, end)) - end = TCP_SKB_CB(skb)->end_seq; + goto new_range; } + + range_truesize += skb->truesize; + if (unlikely(before(TCP_SKB_CB(skb)->seq, start))) + start = TCP_SKB_CB(skb)->seq; + if (after(TCP_SKB_CB(skb)->end_seq, end)) + end = TCP_SKB_CB(skb)->end_seq; } } @@ -4858,23 +4903,36 @@ static void tcp_collapse_ofo_queue(struct sock *sk) static bool tcp_prune_ofo_queue(struct sock *sk) { struct tcp_sock *tp = tcp_sk(sk); - bool res = false; + struct rb_node *node, *prev; - if (!skb_queue_empty(&tp->out_of_order_queue)) { - NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_OFOPRUNED); - __skb_queue_purge(&tp->out_of_order_queue); + if (RB_EMPTY_ROOT(&tp->out_of_order_queue)) + return false; - /* Reset SACK state. A conforming SACK implementation will - * do the same at a timeout based retransmit. When a connection - * is in a sad state like this, we care only about integrity - * of the connection not performance. - */ - if (tp->rx_opt.sack_ok) - tcp_sack_reset(&tp->rx_opt); + NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_OFOPRUNED); + + node = &tp->ooo_last_skb->rbnode; + do { + prev = rb_prev(node); + rb_erase(node, &tp->out_of_order_queue); + __kfree_skb(rb_to_skb(node)); sk_mem_reclaim(sk); - res = true; - } - return res; + if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf && + !tcp_under_memory_pressure(sk)) + break; + + node = prev; + } while (node); + tp->ooo_last_skb = rb_entry(prev, struct sk_buff, rbnode); + + /* Reset SACK state. A conforming SACK implementation will + * do the same at a timeout based retransmit. When a connection + * is in a sad state like this, we care only about integrity + * of the connection not performance. + */ + if (tp->rx_opt.sack_ok) + tcp_sack_reset(&tp->rx_opt); + + return true; } /* Reduce allocated memory if we can, trying to get @@ -4902,7 +4960,7 @@ static int tcp_prune_queue(struct sock *sk) tcp_collapse_ofo_queue(sk); if (!skb_queue_empty(&sk->sk_receive_queue)) - tcp_collapse(sk, &sk->sk_receive_queue, + tcp_collapse(sk, &sk->sk_receive_queue, NULL, skb_peek(&sk->sk_receive_queue), NULL, tp->copied_seq, tp->rcv_nxt); @@ -5007,7 +5065,7 @@ static void __tcp_ack_snd_check(struct sock *sk, int ofo_possible) /* We ACK each frame or... */ tcp_in_quickack_mode(sk) || /* We have out of order data. */ - (ofo_possible && skb_peek(&tp->out_of_order_queue))) { + (ofo_possible && !RB_EMPTY_ROOT(&tp->out_of_order_queue))) { /* Then ack it now */ tcp_send_ack(sk); } else { diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 01715fc75169..ee8399f11fd0 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1830,7 +1830,7 @@ void tcp_v4_destroy_sock(struct sock *sk) tcp_write_queue_purge(sk); /* Cleans up our, hopefully empty, out_of_order_queue. */ - __skb_queue_purge(&tp->out_of_order_queue); + skb_rbtree_purge(&tp->out_of_order_queue); #ifdef CONFIG_TCP_MD5SIG /* Clean up the MD5 key list, if any */ diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index d270870bf492..a48846d81b41 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -496,7 +496,6 @@ struct sock *tcp_create_openreq_child(const struct sock *sk, newtp->snd_cwnd_cnt = 0; tcp_init_xmit_timers(newsk); - __skb_queue_head_init(&newtp->out_of_order_queue); newtp->write_seq = newtp->pushed_seq = treq->snt_isn + 1; newtp->rx_opt.saw_tstamp = 0; From e747775172a2d4dc4dae794f248f9687ba793f54 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 14 Sep 2018 16:24:07 +0800 Subject: [PATCH 135/812] tcp: fix a stale ooo_last_skb after a replace [ Upstream commit 76f0dcbb5ae1a7c3dbeec13dd98233b8e6b0b32a ] When skb replaces another one in ooo queue, I forgot to also update tp->ooo_last_skb as well, if the replaced skb was the last one in the queue. To fix this, we simply can re-use the code that runs after an insertion, trying to merge skbs at the right of current skb. This not only fixes the bug, but also remove all small skbs that might be a subset of the new one. Example: We receive segments 2001:3001, 4001:5001 Then we receive 2001:8001 : We should replace 2001:3001 with the big skb, but also remove 4001:50001 from the queue to save space. packetdrill test demonstrating the bug 0.000 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 +0 bind(3, ..., ...) = 0 +0 listen(3, 1) = 0 +0 < S 0:0(0) win 32792 +0 > S. 0:0(0) ack 1 +0.100 < . 1:1(0) ack 1 win 1024 +0 accept(3, ..., ...) = 4 +0.01 < . 1001:2001(1000) ack 1 win 1024 +0 > . 1:1(0) ack 1 +0.01 < . 1001:3001(2000) ack 1 win 1024 +0 > . 1:1(0) ack 1 Fixes: 9f5afeae5152 ("tcp: use an RB tree for ooo receive queue") Signed-off-by: Eric Dumazet Reported-by: Yuchung Cheng Cc: Yaogong Wang Signed-off-by: David S. Miller Signed-off-by: Mao Wenan Signed-off-by: Greg Kroah-Hartman --- net/ipv4/tcp_input.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 7832d0d107cd..4cc0a53652ea 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -4465,7 +4465,7 @@ static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb) NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPOFOMERGE); __kfree_skb(skb1); - goto add_sack; + goto merge_right; } } else if (tcp_try_coalesce(sk, skb1, skb, &fragstolen)) { goto coalesce_done; @@ -4477,6 +4477,7 @@ static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb) rb_link_node(&skb->rbnode, parent, p); rb_insert_color(&skb->rbnode, &tp->out_of_order_queue); +merge_right: /* Remove other segments covered by skb. */ while ((q = rb_next(&skb->rbnode)) != NULL) { skb1 = rb_entry(q, struct sk_buff, rbnode); From 352b66932a23fb11f0a7c316361220648bca3c79 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 14 Sep 2018 16:24:08 +0800 Subject: [PATCH 136/812] tcp: free batches of packets in tcp_prune_ofo_queue() [ Upstream commit 72cd43ba64fc172a443410ce01645895850844c8 ] Juha-Matti Tilli reported that malicious peers could inject tiny packets in out_of_order_queue, forcing very expensive calls to tcp_collapse_ofo_queue() and tcp_prune_ofo_queue() for every incoming packet. out_of_order_queue rb-tree can contain thousands of nodes, iterating over all of them is not nice. Before linux-4.9, we would have pruned all packets in ofo_queue in one go, every XXXX packets. XXXX depends on sk_rcvbuf and skbs truesize, but is about 7000 packets with tcp_rmem[2] default of 6 MB. Since we plan to increase tcp_rmem[2] in the future to cope with modern BDP, can not revert to the old behavior, without great pain. Strategy taken in this patch is to purge ~12.5 % of the queue capacity. Fixes: 36a6503fedda ("tcp: refine tcp_prune_ofo_queue() to not drop all packets") Signed-off-by: Eric Dumazet Reported-by: Juha-Matti Tilli Acked-by: Yuchung Cheng Acked-by: Soheil Hassas Yeganeh Signed-off-by: David S. Miller Signed-off-by: Mao Wenan Signed-off-by: Greg Kroah-Hartman --- net/ipv4/tcp_input.c | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 4cc0a53652ea..4739a93425ed 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -4899,27 +4899,33 @@ static void tcp_collapse_ofo_queue(struct sock *sk) /* * Purge the out-of-order queue. + * Drop at least 12.5 % of sk_rcvbuf to avoid malicious attacks. * Return true if queue was pruned. */ static bool tcp_prune_ofo_queue(struct sock *sk) { struct tcp_sock *tp = tcp_sk(sk); struct rb_node *node, *prev; + int goal; if (RB_EMPTY_ROOT(&tp->out_of_order_queue)) return false; NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_OFOPRUNED); - + goal = sk->sk_rcvbuf >> 3; node = &tp->ooo_last_skb->rbnode; do { prev = rb_prev(node); rb_erase(node, &tp->out_of_order_queue); + goal -= rb_to_skb(node)->truesize; __kfree_skb(rb_to_skb(node)); - sk_mem_reclaim(sk); - if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf && - !tcp_under_memory_pressure(sk)) - break; + if (!prev || goal <= 0) { + sk_mem_reclaim(sk); + if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf && + !tcp_under_memory_pressure(sk)) + break; + goal = sk->sk_rcvbuf >> 3; + } node = prev; } while (node); From be288481479ca8c1beba02a7e779ffeaa11f1597 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 14 Sep 2018 16:24:09 +0800 Subject: [PATCH 137/812] tcp: call tcp_drop() from tcp_data_queue_ofo() [ Upstream commit 8541b21e781a22dce52a74fef0b9bed00404a1cd ] In order to be able to give better diagnostics and detect malicious traffic, we need to have better sk->sk_drops tracking. Fixes: 9f5afeae5152 ("tcp: use an RB tree for ooo receive queue") Signed-off-by: Eric Dumazet Acked-by: Soheil Hassas Yeganeh Acked-by: Yuchung Cheng Signed-off-by: David S. Miller Signed-off-by: Mao Wenan Signed-off-by: Greg Kroah-Hartman --- net/ipv4/tcp_input.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 4739a93425ed..cbe0ca03bde8 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -4445,7 +4445,7 @@ static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb) /* All the bits are present. Drop. */ NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPOFOMERGE); - __kfree_skb(skb); + tcp_drop(sk, skb); skb = NULL; tcp_dsack_set(sk, seq, end_seq); goto add_sack; @@ -4464,7 +4464,7 @@ static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb) TCP_SKB_CB(skb1)->end_seq); NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPOFOMERGE); - __kfree_skb(skb1); + tcp_drop(sk, skb1); goto merge_right; } } else if (tcp_try_coalesce(sk, skb1, skb, &fragstolen)) { From eee1af4e268e10fecb76bce42a8d7343aeb2a5e6 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 14 Sep 2018 16:24:10 +0800 Subject: [PATCH 138/812] tcp: add tcp_ooo_try_coalesce() helper [ Upstream commit 58152ecbbcc6a0ce7fddd5bf5f6ee535834ece0c ] In case skb in out_or_order_queue is the result of multiple skbs coalescing, we would like to get a proper gso_segs counter tracking, so that future tcp_drop() can report an accurate number. I chose to not implement this tracking for skbs in receive queue, since they are not dropped, unless socket is disconnected. Signed-off-by: Eric Dumazet Acked-by: Soheil Hassas Yeganeh Acked-by: Yuchung Cheng Signed-off-by: David S. Miller Signed-off-by: Mao Wenan Signed-off-by: Greg Kroah-Hartman --- net/ipv4/tcp_input.c | 23 +++++++++++++++++++++-- 1 file changed, 21 insertions(+), 2 deletions(-) diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index cbe0ca03bde8..1aff93d76f24 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -4296,6 +4296,23 @@ static bool tcp_try_coalesce(struct sock *sk, return true; } +static bool tcp_ooo_try_coalesce(struct sock *sk, + struct sk_buff *to, + struct sk_buff *from, + bool *fragstolen) +{ + bool res = tcp_try_coalesce(sk, to, from, fragstolen); + + /* In case tcp_drop() is called later, update to->gso_segs */ + if (res) { + u32 gso_segs = max_t(u16, 1, skb_shinfo(to)->gso_segs) + + max_t(u16, 1, skb_shinfo(from)->gso_segs); + + skb_shinfo(to)->gso_segs = min_t(u32, gso_segs, 0xFFFF); + } + return res; +} + static void tcp_drop(struct sock *sk, struct sk_buff *skb) { sk_drops_add(sk, skb); @@ -4422,7 +4439,8 @@ static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb) /* In the typical case, we are adding an skb to the end of the list. * Use of ooo_last_skb avoids the O(Log(N)) rbtree lookup. */ - if (tcp_try_coalesce(sk, tp->ooo_last_skb, skb, &fragstolen)) { + if (tcp_ooo_try_coalesce(sk, tp->ooo_last_skb, + skb, &fragstolen)) { coalesce_done: tcp_grow_window(sk, skb); kfree_skb_partial(skb, fragstolen); @@ -4467,7 +4485,8 @@ static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb) tcp_drop(sk, skb1); goto merge_right; } - } else if (tcp_try_coalesce(sk, skb1, skb, &fragstolen)) { + } else if (tcp_ooo_try_coalesce(sk, skb1, + skb, &fragstolen)) { goto coalesce_done; } p = &parent->rb_right; From 367222df917632b354d193cc861ba0d7e622654f Mon Sep 17 00:00:00 2001 From: Zhi Chen Date: Mon, 18 Jun 2018 17:00:39 +0300 Subject: [PATCH 139/812] ath10k: fix scan crash due to incorrect length calculation commit c8291988806407e02a01b4b15b4504eafbcc04e0 upstream. Length of WMI scan message was not calculated correctly. The allocated buffer was smaller than what we expected. So WMI message corrupted skb_info, which is at the end of skb->data. This fix takes TLV header into account even if the element is zero-length. Crash log: [49.629986] Unhandled kernel unaligned access[#1]: [49.634932] CPU: 0 PID: 1176 Comm: logd Not tainted 4.4.60 #180 [49.641040] task: 83051460 ti: 8329c000 task.ti: 8329c000 [49.646608] $ 0 : 00000000 00000001 80984a80 00000000 [49.652038] $ 4 : 45259e89 8046d484 8046df30 8024ba70 [49.657468] $ 8 : 00000000 804cc4c0 00000001 20306320 [49.662898] $12 : 33322037 000110f2 00000000 31203930 [49.668327] $16 : 82792b40 80984a80 00000001 804207fc [49.673757] $20 : 00000000 0000012c 00000040 80470000 [49.679186] $24 : 00000000 8024af7c [49.684617] $28 : 8329c000 8329db88 00000001 802c58d0 [49.690046] Hi : 00000000 [49.693022] Lo : 453c0000 [49.696013] epc : 800efae4 put_page+0x0/0x58 [49.700615] ra : 802c58d0 skb_release_data+0x148/0x1d4 [49.706184] Status: 1000fc03 KERNEL EXL IE [49.710531] Cause : 00800010 (ExcCode 04) [49.714669] BadVA : 45259e89 [49.717644] PrId : 00019374 (MIPS 24Kc) Signed-off-by: Zhi Chen Signed-off-by: Kalle Valo Cc: Brian Norris Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/ath/ath10k/wmi-tlv.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/wireless/ath/ath10k/wmi-tlv.c b/drivers/net/wireless/ath/ath10k/wmi-tlv.c index c72eb4464de9..c27fff39ddae 100644 --- a/drivers/net/wireless/ath/ath10k/wmi-tlv.c +++ b/drivers/net/wireless/ath/ath10k/wmi-tlv.c @@ -1459,10 +1459,10 @@ ath10k_wmi_tlv_op_gen_start_scan(struct ath10k *ar, bssid_len = arg->n_bssids * sizeof(struct wmi_mac_addr); ie_len = roundup(arg->ie_len, 4); len = (sizeof(*tlv) + sizeof(*cmd)) + - (arg->n_channels ? sizeof(*tlv) + chan_len : 0) + - (arg->n_ssids ? sizeof(*tlv) + ssid_len : 0) + - (arg->n_bssids ? sizeof(*tlv) + bssid_len : 0) + - (arg->ie_len ? sizeof(*tlv) + ie_len : 0); + sizeof(*tlv) + chan_len + + sizeof(*tlv) + ssid_len + + sizeof(*tlv) + bssid_len + + sizeof(*tlv) + ie_len; skb = ath10k_wmi_alloc_skb(ar, len); if (!skb) From 3a07d58f20c8e0efd9e5d43ee929c3f5c5f35874 Mon Sep 17 00:00:00 2001 From: Gao Feng Date: Tue, 16 May 2017 09:30:18 +0800 Subject: [PATCH 140/812] ebtables: arpreply: Add the standard target sanity check commit c953d63548207a085abcb12a15fefc8a11ffdf0a upstream. The info->target comes from userspace and it would be used directly. So we need to add the sanity check to make sure it is a valid standard target, although the ebtables tool has already checked it. Kernel needs to validate anything coming from userspace. If the target is set as an evil value, it would break the ebtables and cause a panic. Because the non-standard target is treated as one offset. Now add one helper function ebt_invalid_target, and we would replace the macro INVALID_TARGET later. Signed-off-by: Gao Feng Signed-off-by: Pablo Neira Ayuso Cc: Loic Signed-off-by: Greg Kroah-Hartman --- include/linux/netfilter_bridge/ebtables.h | 5 +++++ net/bridge/netfilter/ebt_arpreply.c | 3 +++ 2 files changed, 8 insertions(+) diff --git a/include/linux/netfilter_bridge/ebtables.h b/include/linux/netfilter_bridge/ebtables.h index 2ea517c7c6b9..bffd096fae3b 100644 --- a/include/linux/netfilter_bridge/ebtables.h +++ b/include/linux/netfilter_bridge/ebtables.h @@ -125,4 +125,9 @@ extern unsigned int ebt_do_table(struct sk_buff *skb, /* True if the target is not a standard target */ #define INVALID_TARGET (info->target < -NUM_STANDARD_TARGETS || info->target >= 0) +static inline bool ebt_invalid_target(int target) +{ + return (target < -NUM_STANDARD_TARGETS || target >= 0); +} + #endif diff --git a/net/bridge/netfilter/ebt_arpreply.c b/net/bridge/netfilter/ebt_arpreply.c index 070cf134a22f..f2660c1b29e4 100644 --- a/net/bridge/netfilter/ebt_arpreply.c +++ b/net/bridge/netfilter/ebt_arpreply.c @@ -67,6 +67,9 @@ static int ebt_arpreply_tg_check(const struct xt_tgchk_param *par) if (e->ethproto != htons(ETH_P_ARP) || e->invflags & EBT_IPROTO) return -EINVAL; + if (ebt_invalid_target(info->target)) + return -EINVAL; + return 0; } From b001adea66f0e0a7803adfbf9128a2d7969daa4e Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Sat, 13 Oct 2018 09:11:36 +0200 Subject: [PATCH 141/812] Linux 4.4.161 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 607394a56036..57e4ff1a8b96 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,6 @@ VERSION = 4 PATCHLEVEL = 4 -SUBLEVEL = 160 +SUBLEVEL = 161 EXTRAVERSION = NAME = Blurry Fish Butt From 1e3a440cf2bdec90023ca2451e6f121ab9bd7b68 Mon Sep 17 00:00:00 2001 From: Pierre-Louis Bossart Date: Wed, 22 Aug 2018 22:49:36 -0500 Subject: [PATCH 142/812] ASoC: wm8804: Add ACPI support [ Upstream commit 960cdd50ca9fdfeb82c2757107bcb7f93c8d7d41 ] HID made of either Wolfson/CirrusLogic PCI ID + 8804 identifier. This helps enumerate the HifiBerry Digi+ HAT boards on the Up2 platform. The scripts at https://github.com/thesofproject/acpi-scripts can be used to add the ACPI initrd overlays. Signed-off-by: Pierre-Louis Bossart Acked-by: Charles Keepax Signed-off-by: Mark Brown Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- sound/soc/codecs/wm8804-i2c.c | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/sound/soc/codecs/wm8804-i2c.c b/sound/soc/codecs/wm8804-i2c.c index f27464c2c5ba..79541960f45d 100644 --- a/sound/soc/codecs/wm8804-i2c.c +++ b/sound/soc/codecs/wm8804-i2c.c @@ -13,6 +13,7 @@ #include #include #include +#include #include "wm8804.h" @@ -40,17 +41,29 @@ static const struct i2c_device_id wm8804_i2c_id[] = { }; MODULE_DEVICE_TABLE(i2c, wm8804_i2c_id); +#if defined(CONFIG_OF) static const struct of_device_id wm8804_of_match[] = { { .compatible = "wlf,wm8804", }, { } }; MODULE_DEVICE_TABLE(of, wm8804_of_match); +#endif + +#ifdef CONFIG_ACPI +static const struct acpi_device_id wm8804_acpi_match[] = { + { "1AEC8804", 0 }, /* Wolfson PCI ID + part ID */ + { "10138804", 0 }, /* Cirrus Logic PCI ID + part ID */ + { }, +}; +MODULE_DEVICE_TABLE(acpi, wm8804_acpi_match); +#endif static struct i2c_driver wm8804_i2c_driver = { .driver = { .name = "wm8804", .pm = &wm8804_pm, - .of_match_table = wm8804_of_match, + .of_match_table = of_match_ptr(wm8804_of_match), + .acpi_match_table = ACPI_PTR(wm8804_acpi_match), }, .probe = wm8804_i2c_probe, .remove = wm8804_i2c_remove, From a5aa8bd85a4f99055f4c4e59a775e936d0442ced Mon Sep 17 00:00:00 2001 From: Danny Smith Date: Thu, 23 Aug 2018 10:26:20 +0200 Subject: [PATCH 143/812] ASoC: sigmadsp: safeload should not have lower byte limit [ Upstream commit 5ea752c6efdf5aa8a57aed816d453a8f479f1b0a ] Fixed range in safeload conditional to allow safeload to up to 20 bytes, without a lower limit. Signed-off-by: Danny Smith Acked-by: Lars-Peter Clausen Signed-off-by: Mark Brown Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- sound/soc/codecs/sigmadsp.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/sound/soc/codecs/sigmadsp.c b/sound/soc/codecs/sigmadsp.c index d53680ac78e4..6df158669420 100644 --- a/sound/soc/codecs/sigmadsp.c +++ b/sound/soc/codecs/sigmadsp.c @@ -117,8 +117,7 @@ static int sigmadsp_ctrl_write(struct sigmadsp *sigmadsp, struct sigmadsp_control *ctrl, void *data) { /* safeload loads up to 20 bytes in a atomic operation */ - if (ctrl->num_bytes > 4 && ctrl->num_bytes <= 20 && sigmadsp->ops && - sigmadsp->ops->safeload) + if (ctrl->num_bytes <= 20 && sigmadsp->ops && sigmadsp->ops->safeload) return sigmadsp->ops->safeload(sigmadsp, ctrl->addr, data, ctrl->num_bytes); else From ac38645226f310d2525e66cb0dbec265b0be3754 Mon Sep 17 00:00:00 2001 From: Lei Yang Date: Wed, 5 Sep 2018 11:14:49 +0800 Subject: [PATCH 144/812] selftests/efivarfs: add required kernel configs [ Upstream commit 53cf59d6c0ad3edc4f4449098706a8f8986258b6 ] add config file Signed-off-by: Lei Yang Signed-off-by: Shuah Khan (Samsung OSG) Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- tools/testing/selftests/efivarfs/config | 1 + 1 file changed, 1 insertion(+) create mode 100644 tools/testing/selftests/efivarfs/config diff --git a/tools/testing/selftests/efivarfs/config b/tools/testing/selftests/efivarfs/config new file mode 100644 index 000000000000..4e151f1005b2 --- /dev/null +++ b/tools/testing/selftests/efivarfs/config @@ -0,0 +1 @@ +CONFIG_EFIVAR_FS=y From 8c2077219e252f752ceaa14fd724192d69dcced6 Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Wed, 25 Apr 2018 07:29:22 -0700 Subject: [PATCH 145/812] mfd: omap-usb-host: Fix dts probe of children [ Upstream commit 10492ee8ed9188d6d420e1f79b2b9bdbc0624e65 ] It currently only works if the parent bus uses "simple-bus". We currently try to probe children with non-existing compatible values. And we're missing .probe. I noticed this while testing devices configured to probe using ti-sysc interconnect target module driver. For that we also may want to rebind the driver, so let's remove __init and __exit. Signed-off-by: Tony Lindgren Acked-by: Roger Quadros Signed-off-by: Lee Jones Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/mfd/omap-usb-host.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/drivers/mfd/omap-usb-host.c b/drivers/mfd/omap-usb-host.c index 1d924d1533c0..b9dc2fcd8f26 100644 --- a/drivers/mfd/omap-usb-host.c +++ b/drivers/mfd/omap-usb-host.c @@ -548,8 +548,8 @@ static int usbhs_omap_get_dt_pdata(struct device *dev, } static const struct of_device_id usbhs_child_match_table[] = { - { .compatible = "ti,omap-ehci", }, - { .compatible = "ti,omap-ohci", }, + { .compatible = "ti,ehci-omap", }, + { .compatible = "ti,ohci-omap3", }, { } }; @@ -875,6 +875,7 @@ static struct platform_driver usbhs_omap_driver = { .pm = &usbhsomap_dev_pm_ops, .of_match_table = usbhs_omap_dt_ids, }, + .probe = usbhs_omap_probe, .remove = usbhs_omap_remove, }; @@ -884,9 +885,9 @@ MODULE_ALIAS("platform:" USBHS_DRIVER_NAME); MODULE_LICENSE("GPL v2"); MODULE_DESCRIPTION("usb host common core driver for omap EHCI and OHCI"); -static int __init omap_usbhs_drvinit(void) +static int omap_usbhs_drvinit(void) { - return platform_driver_probe(&usbhs_omap_driver, usbhs_omap_probe); + return platform_driver_register(&usbhs_omap_driver); } /* @@ -898,7 +899,7 @@ static int __init omap_usbhs_drvinit(void) */ fs_initcall_sync(omap_usbhs_drvinit); -static void __exit omap_usbhs_drvexit(void) +static void omap_usbhs_drvexit(void) { platform_driver_unregister(&usbhs_omap_driver); } From ba996b37908ccafefa452f85747975829fcb25eb Mon Sep 17 00:00:00 2001 From: Yu Zhao Date: Tue, 11 Sep 2018 15:14:04 -0600 Subject: [PATCH 146/812] sound: enable interrupt after dma buffer initialization [ Upstream commit b61749a89f826eb61fc59794d9e4697bd246eb61 ] In snd_hdac_bus_init_chip(), we enable interrupt before snd_hdac_bus_init_cmd_io() initializing dma buffers. If irq has been acquired and irq handler uses the dma buffer, kernel may crash when interrupt comes in. Fix the problem by postponing enabling irq after dma buffer initialization. And warn once on null dma buffer pointer during the initialization. Reviewed-by: Takashi Iwai Signed-off-by: Yu Zhao Signed-off-by: Mark Brown Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- sound/hda/hdac_controller.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/sound/hda/hdac_controller.c b/sound/hda/hdac_controller.c index b5a17cb510a0..4727f5b80e76 100644 --- a/sound/hda/hdac_controller.c +++ b/sound/hda/hdac_controller.c @@ -40,6 +40,8 @@ static void azx_clear_corbrp(struct hdac_bus *bus) */ void snd_hdac_bus_init_cmd_io(struct hdac_bus *bus) { + WARN_ON_ONCE(!bus->rb.area); + spin_lock_irq(&bus->reg_lock); /* CORB set up */ bus->corb.addr = bus->rb.addr; @@ -377,13 +379,15 @@ bool snd_hdac_bus_init_chip(struct hdac_bus *bus, bool full_reset) /* reset controller */ azx_reset(bus, full_reset); - /* initialize interrupts */ + /* clear interrupts */ azx_int_clear(bus); - azx_int_enable(bus); /* initialize the codec command I/O */ snd_hdac_bus_init_cmd_io(bus); + /* enable interrupts after CORB/RIRB buffers are initialized above */ + azx_int_enable(bus); + /* program the position buffer */ if (bus->use_posbuf && bus->posbuf.addr) { snd_hdac_chip_writel(bus, DPLBASE, (u32)bus->posbuf.addr); From bd66767a8c0ecbd3f5eb0ad58da9b7fb975ee444 Mon Sep 17 00:00:00 2001 From: Jongsung Kim Date: Thu, 13 Sep 2018 18:32:21 +0900 Subject: [PATCH 147/812] stmmac: fix valid numbers of unicast filter entries [ Upstream commit edf2ef7242805e53ec2e0841db26e06d8bc7da70 ] Synopsys DWC Ethernet MAC can be configured to have 1..32, 64, or 128 unicast filter entries. (Table 7-8 MAC Address Registers from databook) Fix dwmac1000_validate_ucast_entries() to accept values between 1 and 32 in addition. Signed-off-by: Jongsung Kim Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c index d02691ba3d7f..20aa34f45f07 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c @@ -71,7 +71,7 @@ static int dwmac1000_validate_mcast_bins(int mcast_bins) * Description: * This function validates the number of Unicast address entries supported * by a particular Synopsys 10/100/1000 controller. The Synopsys controller - * supports 1, 32, 64, or 128 Unicast filter entries for it's Unicast filter + * supports 1..32, 64, or 128 Unicast filter entries for it's Unicast filter * logic. This function validates a valid, supported configuration is * selected, and defaults to 1 Unicast address if an unsupported * configuration is selected. @@ -81,8 +81,7 @@ static int dwmac1000_validate_ucast_entries(int ucast_entries) int x = ucast_entries; switch (x) { - case 1: - case 32: + case 1 ... 32: case 64: case 128: break; From f74eda8ed5d90d40ed3e0f46ddfbfe57bc6786f2 Mon Sep 17 00:00:00 2001 From: Nicolas Ferre Date: Fri, 14 Sep 2018 17:48:10 +0200 Subject: [PATCH 148/812] net: macb: disable scatter-gather for macb on sama5d3 [ Upstream commit eb4ed8e2d7fecb5f40db38e4498b9ee23cddf196 ] Create a new configuration for the sama5d3-macb new compatibility string. This configuration disables scatter-gather because we experienced lock down of the macb interface of this particular SoC under very high load. Signed-off-by: Nicolas Ferre Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/cadence/macb.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/net/ethernet/cadence/macb.c b/drivers/net/ethernet/cadence/macb.c index 8c698d464716..085f77f273ba 100644 --- a/drivers/net/ethernet/cadence/macb.c +++ b/drivers/net/ethernet/cadence/macb.c @@ -2743,6 +2743,13 @@ static const struct macb_config at91sam9260_config = { .init = macb_init, }; +static const struct macb_config sama5d3macb_config = { + .caps = MACB_CAPS_SG_DISABLED + | MACB_CAPS_USRIO_HAS_CLKEN | MACB_CAPS_USRIO_DEFAULT_IS_MII_GMII, + .clk_init = macb_clk_init, + .init = macb_init, +}; + static const struct macb_config pc302gem_config = { .caps = MACB_CAPS_SG_DISABLED | MACB_CAPS_GIGABIT_MODE_AVAILABLE, .dma_burst_length = 16, @@ -2801,6 +2808,7 @@ static const struct of_device_id macb_dt_ids[] = { { .compatible = "cdns,gem", .data = &pc302gem_config }, { .compatible = "atmel,sama5d2-gem", .data = &sama5d2_config }, { .compatible = "atmel,sama5d3-gem", .data = &sama5d3_config }, + { .compatible = "atmel,sama5d3-macb", .data = &sama5d3macb_config }, { .compatible = "atmel,sama5d4-gem", .data = &sama5d4_config }, { .compatible = "cdns,at91rm9200-emac", .data = &emac_config }, { .compatible = "cdns,emac", .data = &emac_config }, From 51b6ff44c6d4c06489c15955ed52ba67cf20eecf Mon Sep 17 00:00:00 2001 From: Nicolas Ferre Date: Fri, 14 Sep 2018 17:48:11 +0200 Subject: [PATCH 149/812] ARM: dts: at91: add new compatibility string for macb on sama5d3 [ Upstream commit 321cc359d899a8e988f3725d87c18a628e1cc624 ] We need this new compatibility string as we experienced different behavior for this 10/100Mbits/s macb interface on this particular SoC. Backward compatibility is preserved as we keep the alternative strings. Signed-off-by: Nicolas Ferre Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- Documentation/devicetree/bindings/net/macb.txt | 1 + arch/arm/boot/dts/sama5d3_emac.dtsi | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/net/macb.txt b/Documentation/devicetree/bindings/net/macb.txt index b5d79761ac97..410c044166e2 100644 --- a/Documentation/devicetree/bindings/net/macb.txt +++ b/Documentation/devicetree/bindings/net/macb.txt @@ -8,6 +8,7 @@ Required properties: Use "cdns,pc302-gem" for Picochip picoXcell pc302 and later devices based on the Cadence GEM, or the generic form: "cdns,gem". Use "atmel,sama5d2-gem" for the GEM IP (10/100) available on Atmel sama5d2 SoCs. + Use "atmel,sama5d3-macb" for the 10/100Mbit IP available on Atmel sama5d3 SoCs. Use "atmel,sama5d3-gem" for the Gigabit IP available on Atmel sama5d3 SoCs. Use "atmel,sama5d4-gem" for the GEM IP (10/100) available on Atmel sama5d4 SoCs. Use "cdns,zynqmp-gem" for Zynq Ultrascale+ MPSoC. diff --git a/arch/arm/boot/dts/sama5d3_emac.dtsi b/arch/arm/boot/dts/sama5d3_emac.dtsi index 7cb235ef0fb6..6e9e1c2f9def 100644 --- a/arch/arm/boot/dts/sama5d3_emac.dtsi +++ b/arch/arm/boot/dts/sama5d3_emac.dtsi @@ -41,7 +41,7 @@ macb1_clk: macb1_clk { }; macb1: ethernet@f802c000 { - compatible = "cdns,at91sam9260-macb", "cdns,macb"; + compatible = "atmel,sama5d3-macb", "cdns,at91sam9260-macb", "cdns,macb"; reg = <0xf802c000 0x100>; interrupts = <35 IRQ_TYPE_LEVEL_HIGH 3>; pinctrl-names = "default"; From d6a6d4e4f784be0a6957043779e85bd6a410fd4d Mon Sep 17 00:00:00 2001 From: Amber Lin Date: Wed, 12 Sep 2018 21:42:18 -0400 Subject: [PATCH 150/812] drm/amdgpu: Fix SDMA HQD destroy error on gfx_v7 [ Upstream commit caaa4c8a6be2a275bd14f2369ee364978ff74704 ] A wrong register bit was examinated for checking SDMA status so it reports false failures. This typo only appears on gfx_v7. gfx_v8 checks the correct bit. Acked-by: Alex Deucher Signed-off-by: Amber Lin Reviewed-by: Felix Kuehling Signed-off-by: Felix Kuehling Signed-off-by: Alex Deucher Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c index b233cf8436b0..2e1e84c98034 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c @@ -504,7 +504,7 @@ static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd, while (true) { temp = RREG32(sdma_base_addr + mmSDMA0_RLC0_CONTEXT_STATUS); - if (temp & SDMA0_STATUS_REG__RB_CMD_IDLE__SHIFT) + if (temp & SDMA0_RLC0_CONTEXT_STATUS__IDLE_MASK) break; if (timeout == 0) return -ETIME; From 7a3d844f83d47c432fba7d28570bc68b5015ecd0 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Wed, 13 Jun 2018 00:23:11 -0400 Subject: [PATCH 151/812] ext4: add corruption check in ext4_xattr_set_entry() commit 5369a762c882c0b6e9599e4ebbb3a9ba9eee7e2d upstream. In theory this should have been caught earlier when the xattr list was verified, but in case it got missed, it's simple enough to add check to make sure we don't overrun the xattr buffer. This addresses CVE-2018-10879. https://bugzilla.kernel.org/show_bug.cgi?id=200001 Signed-off-by: Theodore Ts'o Reviewed-by: Andreas Dilger [bwh: Backported to 3.16: - Add inode parameter to ext4_xattr_set_entry() and update callers - Adjust context] Signed-off-by: Ben Hutchings [adjusted for 4.4 context] Signed-off-by: Daniel Rosenberg Signed-off-by: Greg Kroah-Hartman --- fs/ext4/xattr.c | 22 ++++++++++++++-------- 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c index d0aaf338fa9f..d6bae37489af 100644 --- a/fs/ext4/xattr.c +++ b/fs/ext4/xattr.c @@ -638,14 +638,20 @@ static size_t ext4_xattr_free_space(struct ext4_xattr_entry *last, } static int -ext4_xattr_set_entry(struct ext4_xattr_info *i, struct ext4_xattr_search *s) +ext4_xattr_set_entry(struct ext4_xattr_info *i, struct ext4_xattr_search *s, + struct inode *inode) { - struct ext4_xattr_entry *last; + struct ext4_xattr_entry *last, *next; size_t free, min_offs = s->end - s->base, name_len = strlen(i->name); /* Compute min_offs and last. */ last = s->first; - for (; !IS_LAST_ENTRY(last); last = EXT4_XATTR_NEXT(last)) { + for (; !IS_LAST_ENTRY(last); last = next) { + next = EXT4_XATTR_NEXT(last); + if ((void *)next >= s->end) { + EXT4_ERROR_INODE(inode, "corrupted xattr entries"); + return -EFSCORRUPTED; + } if (!last->e_value_block && last->e_value_size) { size_t offs = le16_to_cpu(last->e_value_offs); if (offs < min_offs) @@ -825,7 +831,7 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode, ce = NULL; } ea_bdebug(bs->bh, "modifying in-place"); - error = ext4_xattr_set_entry(i, s); + error = ext4_xattr_set_entry(i, s, inode); if (!error) { if (!IS_LAST_ENTRY(s->first)) ext4_xattr_rehash(header(s->base), @@ -875,7 +881,7 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode, s->end = s->base + sb->s_blocksize; } - error = ext4_xattr_set_entry(i, s); + error = ext4_xattr_set_entry(i, s, inode); if (error == -EFSCORRUPTED) goto bad_block; if (error) @@ -1037,7 +1043,7 @@ int ext4_xattr_ibody_inline_set(handle_t *handle, struct inode *inode, if (EXT4_I(inode)->i_extra_isize == 0) return -ENOSPC; - error = ext4_xattr_set_entry(i, s); + error = ext4_xattr_set_entry(i, s, inode); if (error) { if (error == -ENOSPC && ext4_has_inline_data(inode)) { @@ -1049,7 +1055,7 @@ int ext4_xattr_ibody_inline_set(handle_t *handle, struct inode *inode, error = ext4_xattr_ibody_find(inode, i, is); if (error) return error; - error = ext4_xattr_set_entry(i, s); + error = ext4_xattr_set_entry(i, s, inode); } if (error) return error; @@ -1075,7 +1081,7 @@ static int ext4_xattr_ibody_set(handle_t *handle, struct inode *inode, if (EXT4_I(inode)->i_extra_isize == 0) return -ENOSPC; - error = ext4_xattr_set_entry(i, s); + error = ext4_xattr_set_entry(i, s, inode); if (error) return error; header = IHDR(inode, ext4_raw_inode(&is->iloc)); From 0f0ad8d548588419b5f1b8ca36673af32d3dafdf Mon Sep 17 00:00:00 2001 From: Jann Horn Date: Fri, 5 Oct 2018 15:52:03 -0700 Subject: [PATCH 152/812] mm/vmstat.c: fix outdated vmstat_text commit 28e2c4bb99aa40f9d5f07ac130cbc4da0ea93079 upstream. 7a9cdebdcc17 ("mm: get rid of vmacache_flush_all() entirely") removed the VMACACHE_FULL_FLUSHES statistics, but didn't remove the corresponding entry in vmstat_text. This causes an out-of-bounds access in vmstat_show(). Luckily this only affects kernels with CONFIG_DEBUG_VM_VMACACHE=y, which is probably very rare. Link: http://lkml.kernel.org/r/20181001143138.95119-1-jannh@google.com Fixes: 7a9cdebdcc17 ("mm: get rid of vmacache_flush_all() entirely") Signed-off-by: Jann Horn Reviewed-by: Kees Cook Reviewed-by: Andrew Morton Acked-by: Michal Hocko Acked-by: Roman Gushchin Cc: Davidlohr Bueso Cc: Oleg Nesterov Cc: Christoph Lameter Cc: Kemi Wang Cc: Andy Lutomirski Cc: Ingo Molnar Signed-off-by: Andrew Morton Signed-off-by: Greg Kroah-Hartman --- mm/vmstat.c | 1 - 1 file changed, 1 deletion(-) diff --git a/mm/vmstat.c b/mm/vmstat.c index 8895eff2d735..a2d70ef74db7 100644 --- a/mm/vmstat.c +++ b/mm/vmstat.c @@ -869,7 +869,6 @@ const char * const vmstat_text[] = { #ifdef CONFIG_DEBUG_VM_VMACACHE "vmacache_find_calls", "vmacache_find_hits", - "vmacache_full_flushes", #endif #endif /* CONFIG_VM_EVENTS_COUNTERS */ }; From 2f41f387b0f44fd5ba325d893edead8b52314437 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Fri, 17 Aug 2018 15:19:37 -0400 Subject: [PATCH 153/812] mach64: detect the dot clock divider correctly on sparc MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 76ebebd2464c5c8a4453c98b6dbf9c95a599e810 upstream. On Sun Ultra 5, it happens that the dot clock is not set up properly for some videomodes. For example, if we set the videomode "r1024x768x60" in the firmware, Linux would incorrectly set a videomode with refresh rate 180Hz when booting (suprisingly, my LCD monitor can display it, although display quality is very low). The reason is this: Older mach64 cards set the divider in the register VCLK_POST_DIV. The register has four 2-bit fields (the field that is actually used is specified in the lowest two bits of the register CLOCK_CNTL). The 2 bits select divider "1, 2, 4, 8". On newer mach64 cards, there's another bit added - the top four bits of PLL_EXT_CNTL extend the divider selection, so we have possible dividers "1, 2, 4, 8, 3, 5, 6, 12". The Linux driver clears the top four bits of PLL_EXT_CNTL and never sets them, so it can work regardless if the card supports them. However, the sparc64 firmware may set these extended dividers during boot - and the mach64 driver detects incorrect dot clock in this case. This patch makes the driver read the additional divider bit from PLL_EXT_CNTL and calculate the initial refresh rate properly. Signed-off-by: Mikulas Patocka Cc: stable@vger.kernel.org Acked-by: David S. Miller Reviewed-by: Ville Syrjälä Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/video/fbdev/aty/atyfb.h | 3 ++- drivers/video/fbdev/aty/atyfb_base.c | 7 ++++--- drivers/video/fbdev/aty/mach64_ct.c | 10 +++++----- 3 files changed, 11 insertions(+), 9 deletions(-) diff --git a/drivers/video/fbdev/aty/atyfb.h b/drivers/video/fbdev/aty/atyfb.h index 63c4842eb224..46e0e8b39b76 100644 --- a/drivers/video/fbdev/aty/atyfb.h +++ b/drivers/video/fbdev/aty/atyfb.h @@ -332,6 +332,8 @@ extern const struct aty_pll_ops aty_pll_ct; /* Integrated */ extern void aty_set_pll_ct(const struct fb_info *info, const union aty_pll *pll); extern u8 aty_ld_pll_ct(int offset, const struct atyfb_par *par); +extern const u8 aty_postdividers[8]; + /* * Hardware cursor support @@ -358,7 +360,6 @@ static inline void wait_for_idle(struct atyfb_par *par) extern void aty_reset_engine(const struct atyfb_par *par); extern void aty_init_engine(struct atyfb_par *par, struct fb_info *info); -extern u8 aty_ld_pll_ct(int offset, const struct atyfb_par *par); void atyfb_copyarea(struct fb_info *info, const struct fb_copyarea *area); void atyfb_fillrect(struct fb_info *info, const struct fb_fillrect *rect); diff --git a/drivers/video/fbdev/aty/atyfb_base.c b/drivers/video/fbdev/aty/atyfb_base.c index 7f658fa4d22a..9755a0ec6591 100644 --- a/drivers/video/fbdev/aty/atyfb_base.c +++ b/drivers/video/fbdev/aty/atyfb_base.c @@ -3093,17 +3093,18 @@ static int atyfb_setup_sparc(struct pci_dev *pdev, struct fb_info *info, /* * PLL Reference Divider M: */ - M = pll_regs[2]; + M = pll_regs[PLL_REF_DIV]; /* * PLL Feedback Divider N (Dependent on CLOCK_CNTL): */ - N = pll_regs[7 + (clock_cntl & 3)]; + N = pll_regs[VCLK0_FB_DIV + (clock_cntl & 3)]; /* * PLL Post Divider P (Dependent on CLOCK_CNTL): */ - P = 1 << (pll_regs[6] >> ((clock_cntl & 3) << 1)); + P = aty_postdividers[((pll_regs[VCLK_POST_DIV] >> ((clock_cntl & 3) << 1)) & 3) | + ((pll_regs[PLL_EXT_CNTL] >> (2 + (clock_cntl & 3))) & 4)]; /* * PLL Divider Q: diff --git a/drivers/video/fbdev/aty/mach64_ct.c b/drivers/video/fbdev/aty/mach64_ct.c index 51f29d627ceb..af54256a20a1 100644 --- a/drivers/video/fbdev/aty/mach64_ct.c +++ b/drivers/video/fbdev/aty/mach64_ct.c @@ -114,7 +114,7 @@ static void aty_st_pll_ct(int offset, u8 val, const struct atyfb_par *par) */ #define Maximum_DSP_PRECISION 7 -static u8 postdividers[] = {1,2,4,8,3}; +const u8 aty_postdividers[8] = {1,2,4,8,3,5,6,12}; static int aty_dsp_gt(const struct fb_info *info, u32 bpp, struct pll_ct *pll) { @@ -221,7 +221,7 @@ static int aty_valid_pll_ct(const struct fb_info *info, u32 vclk_per, struct pll pll->vclk_post_div += (q < 64*8); pll->vclk_post_div += (q < 32*8); } - pll->vclk_post_div_real = postdividers[pll->vclk_post_div]; + pll->vclk_post_div_real = aty_postdividers[pll->vclk_post_div]; // pll->vclk_post_div <<= 6; pll->vclk_fb_div = q * pll->vclk_post_div_real / 8; pllvclk = (1000000 * 2 * pll->vclk_fb_div) / @@ -512,7 +512,7 @@ static int aty_init_pll_ct(const struct fb_info *info, union aty_pll *pll) u8 mclk_fb_div, pll_ext_cntl; pll->ct.pll_ref_div = aty_ld_pll_ct(PLL_REF_DIV, par); pll_ext_cntl = aty_ld_pll_ct(PLL_EXT_CNTL, par); - pll->ct.xclk_post_div_real = postdividers[pll_ext_cntl & 0x07]; + pll->ct.xclk_post_div_real = aty_postdividers[pll_ext_cntl & 0x07]; mclk_fb_div = aty_ld_pll_ct(MCLK_FB_DIV, par); if (pll_ext_cntl & PLL_MFB_TIMES_4_2B) mclk_fb_div <<= 1; @@ -534,7 +534,7 @@ static int aty_init_pll_ct(const struct fb_info *info, union aty_pll *pll) xpost_div += (q < 64*8); xpost_div += (q < 32*8); } - pll->ct.xclk_post_div_real = postdividers[xpost_div]; + pll->ct.xclk_post_div_real = aty_postdividers[xpost_div]; pll->ct.mclk_fb_div = q * pll->ct.xclk_post_div_real / 8; #ifdef CONFIG_PPC @@ -583,7 +583,7 @@ static int aty_init_pll_ct(const struct fb_info *info, union aty_pll *pll) mpost_div += (q < 64*8); mpost_div += (q < 32*8); } - sclk_post_div_real = postdividers[mpost_div]; + sclk_post_div_real = aty_postdividers[mpost_div]; pll->ct.sclk_fb_div = q * sclk_post_div_real / 8; pll->ct.spll_cntl2 = mpost_div << 4; #ifdef DEBUG From 49f80e6ef438982c42d9acb9175ead40d64074c1 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Tue, 11 Sep 2018 14:45:03 +0300 Subject: [PATCH 154/812] perf script python: Fix export-to-postgresql.py occasional failure commit 25e11700b54c7b6b5ebfc4361981dae12299557b upstream. Occasional export failures were found to be caused by truncating 64-bit pointers to 32-bits. Fix by explicitly setting types for all ctype arguments and results. Signed-off-by: Adrian Hunter Cc: Jiri Olsa Cc: stable@vger.kernel.org Link: http://lkml.kernel.org/r/20180911114504.28516-2-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Greg Kroah-Hartman --- tools/perf/scripts/python/export-to-postgresql.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/tools/perf/scripts/python/export-to-postgresql.py b/tools/perf/scripts/python/export-to-postgresql.py index 1b02cdc0cab6..84cb5913b05a 100644 --- a/tools/perf/scripts/python/export-to-postgresql.py +++ b/tools/perf/scripts/python/export-to-postgresql.py @@ -205,14 +205,23 @@ from ctypes import * libpq = CDLL("libpq.so.5") PQconnectdb = libpq.PQconnectdb PQconnectdb.restype = c_void_p +PQconnectdb.argtypes = [ c_char_p ] PQfinish = libpq.PQfinish +PQfinish.argtypes = [ c_void_p ] PQstatus = libpq.PQstatus +PQstatus.restype = c_int +PQstatus.argtypes = [ c_void_p ] PQexec = libpq.PQexec PQexec.restype = c_void_p +PQexec.argtypes = [ c_void_p, c_char_p ] PQresultStatus = libpq.PQresultStatus +PQresultStatus.restype = c_int +PQresultStatus.argtypes = [ c_void_p ] PQputCopyData = libpq.PQputCopyData +PQputCopyData.restype = c_int PQputCopyData.argtypes = [ c_void_p, c_void_p, c_int ] PQputCopyEnd = libpq.PQputCopyEnd +PQputCopyEnd.restype = c_int PQputCopyEnd.argtypes = [ c_void_p, c_void_p ] sys.path.append(os.environ['PERF_EXEC_PATH'] + \ From 863334098b60d163d39fde83d6044c54ca60d53e Mon Sep 17 00:00:00 2001 From: Edgar Cherkasov Date: Thu, 27 Sep 2018 11:56:03 +0300 Subject: [PATCH 155/812] i2c: i2c-scmi: fix for i2c_smbus_write_block_data commit 08d9db00fe0e300d6df976e6c294f974988226dd upstream. The i2c-scmi driver crashes when the SMBus Write Block transaction is executed: WARNING: CPU: 9 PID: 2194 at mm/page_alloc.c:3931 __alloc_pages_slowpath+0x9db/0xec0 Call Trace: ? get_page_from_freelist+0x49d/0x11f0 ? alloc_pages_current+0x6a/0xe0 ? new_slab+0x499/0x690 __alloc_pages_nodemask+0x265/0x280 alloc_pages_current+0x6a/0xe0 kmalloc_order+0x18/0x40 kmalloc_order_trace+0x24/0xb0 ? acpi_ut_allocate_object_desc_dbg+0x62/0x10c __kmalloc+0x203/0x220 acpi_os_allocate_zeroed+0x34/0x36 acpi_ut_copy_eobject_to_iobject+0x266/0x31e acpi_evaluate_object+0x166/0x3b2 acpi_smbus_cmi_access+0x144/0x530 [i2c_scmi] i2c_smbus_xfer+0xda/0x370 i2cdev_ioctl_smbus+0x1bd/0x270 i2cdev_ioctl+0xaa/0x250 do_vfs_ioctl+0xa4/0x600 SyS_ioctl+0x79/0x90 do_syscall_64+0x73/0x130 entry_SYSCALL_64_after_hwframe+0x3d/0xa2 ACPI Error: Evaluating _SBW: 4 (20170831/smbus_cmi-185) This problem occurs because the length of ACPI Buffer object is not defined/initialized in the code before a corresponding ACPI method is called. The obvious patch below fixes this issue. Signed-off-by: Edgar Cherkasov Acked-by: Viktor Krasnov Acked-by: Michael Brunner Signed-off-by: Wolfram Sang Signed-off-by: Greg Kroah-Hartman --- drivers/i2c/busses/i2c-scmi.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/i2c/busses/i2c-scmi.c b/drivers/i2c/busses/i2c-scmi.c index 7aa7b9cb6203..efefcfa24a4c 100644 --- a/drivers/i2c/busses/i2c-scmi.c +++ b/drivers/i2c/busses/i2c-scmi.c @@ -152,6 +152,7 @@ acpi_smbus_cmi_access(struct i2c_adapter *adap, u16 addr, unsigned short flags, mt_params[3].type = ACPI_TYPE_INTEGER; mt_params[3].integer.value = len; mt_params[4].type = ACPI_TYPE_BUFFER; + mt_params[4].buffer.length = len; mt_params[4].buffer.pointer = data->block + 1; } break; From f37a3ff6e4c7cb93eb65c6fc6c549fafa2673863 Mon Sep 17 00:00:00 2001 From: Mathias Nyman Date: Mon, 12 Feb 2018 14:24:47 +0200 Subject: [PATCH 156/812] xhci: Don't print a warning when setting link state for disabled ports commit 1208d8a84fdcae6b395c57911cdf907450d30e70 upstream. When disabling a USB3 port the hub driver will set the port link state to U3 to prevent "ejected" or "safely removed" devices that are still physically connected from immediately re-enumerating. If the device was really unplugged, then error messages were printed as the hub tries to set the U3 link state for a port that is no longer enabled. xhci-hcd ee000000.usb: Cannot set link state. usb usb8-port1: cannot disable (err = -32) Don't print error message in xhci-hub if hub tries to set port link state for a disabled port. Return -ENODEV instead which also silences hub driver. Signed-off-by: Mathias Nyman Tested-by: Yoshihiro Shimoda Signed-off-by: Ross Zwisler Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-hub.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/drivers/usb/host/xhci-hub.c b/drivers/usb/host/xhci-hub.c index 9df00101bfe1..f788f80fe1aa 100644 --- a/drivers/usb/host/xhci-hub.c +++ b/drivers/usb/host/xhci-hub.c @@ -1048,17 +1048,17 @@ int xhci_hub_control(struct usb_hcd *hcd, u16 typeReq, u16 wValue, temp = readl(port_array[wIndex]); break; } - - /* Software should not attempt to set - * port link state above '3' (U3) and the port - * must be enabled. - */ - if ((temp & PORT_PE) == 0 || - (link_state > USB_SS_PORT_LS_U3)) { - xhci_warn(xhci, "Cannot set link state.\n"); + /* Port must be enabled */ + if (!(temp & PORT_PE)) { + retval = -ENODEV; + break; + } + /* Can't set port link state above '3' (U3) */ + if (link_state > USB_SS_PORT_LS_U3) { + xhci_warn(xhci, "Cannot set port %d link state %d\n", + wIndex, link_state); goto error; } - if (link_state == USB_SS_PORT_LS_U3) { slot_id = xhci_find_slot_id_by_port(hcd, xhci, wIndex + 1); From 5ac147ebfa1c50cc0f25e3c7f42862cfe6b40d0d Mon Sep 17 00:00:00 2001 From: Hou Tao Date: Fri, 12 Oct 2018 14:01:26 +0800 Subject: [PATCH 157/812] jffs2: return -ERANGE when xattr buffer is too small When a file have multiple xattrs and the passed buffer is smaller than the required size, jffs2_listxattr() should return -ERANGE instead of continue, else Oops may occur due to memory corruption. Also remove the unnecessary check ("rc < 0"), because xhandle->list(...) will not return an error number. Spotted by generic/377 in xfstests-dev. NB: The problem had been fixed by commit 764a5c6b1fa4 ("xattr handlers: Simplify list operation") in v4.5-rc1, but the modification in that commit may be too much because it modifies all file-systems which implement xattr, so I create a single patch for jffs2 to fix the problem. Signed-off-by: Hou Tao Cc: David Woodhouse Signed-off-by: Greg Kroah-Hartman --- fs/jffs2/xattr.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/fs/jffs2/xattr.c b/fs/jffs2/xattr.c index 4c2c03663533..8e1427762eeb 100644 --- a/fs/jffs2/xattr.c +++ b/fs/jffs2/xattr.c @@ -1004,12 +1004,14 @@ ssize_t jffs2_listxattr(struct dentry *dentry, char *buffer, size_t size) rc = xhandle->list(xhandle, dentry, buffer + len, size - len, xd->xname, xd->name_len); + if (rc > size - len) { + rc = -ERANGE; + goto out; + } } else { rc = xhandle->list(xhandle, dentry, NULL, 0, xd->xname, xd->name_len); } - if (rc < 0) - goto out; len += rc; } rc = len; From 4348b96650bb03e0fa6cf5704ffc1a6517db4970 Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Wed, 26 Sep 2018 00:41:04 -0400 Subject: [PATCH 158/812] bnxt_en: Fix TX timeout during netpoll. [ Upstream commit 73f21c653f930f438d53eed29b5e4c65c8a0f906 ] The current netpoll implementation in the bnxt_en driver has problems that may miss TX completion events. bnxt_poll_work() in effect is only handling at most 1 TX packet before exiting. In addition, there may be in flight TX completions that ->poll() may miss even after we fix bnxt_poll_work() to handle all visible TX completions. netpoll may not call ->poll() again and HW may not generate IRQ because the driver does not ARM the IRQ when the budget (0 for netpoll) is reached. We fix it by handling all TX completions and to always ARM the IRQ when we exit ->poll() with 0 budget. Also, the logic to ACK the completion ring in case it is almost filled with TX completions need to be adjusted to take care of the 0 budget case, as discussed with Eric Dumazet Reported-by: Song Liu Reviewed-by: Song Liu Tested-by: Song Liu Signed-off-by: Michael Chan Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 4ffacafddacb..fea8116da06a 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -1343,8 +1343,11 @@ static int bnxt_poll_work(struct bnxt *bp, struct bnxt_napi *bnapi, int budget) if (TX_CMP_TYPE(txcmp) == CMP_TYPE_TX_L2_CMP) { tx_pkts++; /* return full budget so NAPI will complete. */ - if (unlikely(tx_pkts > bp->tx_wake_thresh)) + if (unlikely(tx_pkts > bp->tx_wake_thresh)) { rx_pkts = budget; + raw_cons = NEXT_RAW_CMP(raw_cons); + break; + } } else if ((TX_CMP_TYPE(txcmp) & 0x30) == 0x10) { rc = bnxt_rx_pkt(bp, bnapi, &raw_cons, &agg_event); if (likely(rc >= 0)) @@ -1362,7 +1365,7 @@ static int bnxt_poll_work(struct bnxt *bp, struct bnxt_napi *bnapi, int budget) } raw_cons = NEXT_RAW_CMP(raw_cons); - if (rx_pkts == budget) + if (rx_pkts && rx_pkts == budget) break; } @@ -1404,8 +1407,12 @@ static int bnxt_poll(struct napi_struct *napi, int budget) while (1) { work_done += bnxt_poll_work(bp, bnapi, budget - work_done); - if (work_done >= budget) + if (work_done >= budget) { + if (!budget) + BNXT_CP_DB_REARM(cpr->cp_doorbell, + cpr->cp_raw_cons); break; + } if (!bnxt_has_work(bp, cpr)) { napi_complete(napi); From 2691921c7d26efc18400162b77c794d6a921afe5 Mon Sep 17 00:00:00 2001 From: Mahesh Bandewar Date: Mon, 24 Sep 2018 14:40:11 -0700 Subject: [PATCH 159/812] bonding: avoid possible dead-lock [ Upstream commit d4859d749aa7090ffb743d15648adb962a1baeae ] Syzkaller reported this on a slightly older kernel but it's still applicable to the current kernel - ====================================================== WARNING: possible circular locking dependency detected 4.18.0-next-20180823+ #46 Not tainted ------------------------------------------------------ syz-executor4/26841 is trying to acquire lock: 00000000dd41ef48 ((wq_completion)bond_dev->name){+.+.}, at: flush_workqueue+0x2db/0x1e10 kernel/workqueue.c:2652 but task is already holding lock: 00000000768ab431 (rtnl_mutex){+.+.}, at: rtnl_lock net/core/rtnetlink.c:77 [inline] 00000000768ab431 (rtnl_mutex){+.+.}, at: rtnetlink_rcv_msg+0x412/0xc30 net/core/rtnetlink.c:4708 which lock already depends on the new lock. the existing dependency chain (in reverse order) is: -> #2 (rtnl_mutex){+.+.}: __mutex_lock_common kernel/locking/mutex.c:925 [inline] __mutex_lock+0x171/0x1700 kernel/locking/mutex.c:1073 mutex_lock_nested+0x16/0x20 kernel/locking/mutex.c:1088 rtnl_lock+0x17/0x20 net/core/rtnetlink.c:77 bond_netdev_notify drivers/net/bonding/bond_main.c:1310 [inline] bond_netdev_notify_work+0x44/0xd0 drivers/net/bonding/bond_main.c:1320 process_one_work+0xc73/0x1aa0 kernel/workqueue.c:2153 worker_thread+0x189/0x13c0 kernel/workqueue.c:2296 kthread+0x35a/0x420 kernel/kthread.c:246 ret_from_fork+0x3a/0x50 arch/x86/entry/entry_64.S:415 -> #1 ((work_completion)(&(&nnw->work)->work)){+.+.}: process_one_work+0xc0b/0x1aa0 kernel/workqueue.c:2129 worker_thread+0x189/0x13c0 kernel/workqueue.c:2296 kthread+0x35a/0x420 kernel/kthread.c:246 ret_from_fork+0x3a/0x50 arch/x86/entry/entry_64.S:415 -> #0 ((wq_completion)bond_dev->name){+.+.}: lock_acquire+0x1e4/0x4f0 kernel/locking/lockdep.c:3901 flush_workqueue+0x30a/0x1e10 kernel/workqueue.c:2655 drain_workqueue+0x2a9/0x640 kernel/workqueue.c:2820 destroy_workqueue+0xc6/0x9d0 kernel/workqueue.c:4155 __alloc_workqueue_key+0xef9/0x1190 kernel/workqueue.c:4138 bond_init+0x269/0x940 drivers/net/bonding/bond_main.c:4734 register_netdevice+0x337/0x1100 net/core/dev.c:8410 bond_newlink+0x49/0xa0 drivers/net/bonding/bond_netlink.c:453 rtnl_newlink+0xef4/0x1d50 net/core/rtnetlink.c:3099 rtnetlink_rcv_msg+0x46e/0xc30 net/core/rtnetlink.c:4711 netlink_rcv_skb+0x172/0x440 net/netlink/af_netlink.c:2454 rtnetlink_rcv+0x1c/0x20 net/core/rtnetlink.c:4729 netlink_unicast_kernel net/netlink/af_netlink.c:1317 [inline] netlink_unicast+0x5a0/0x760 net/netlink/af_netlink.c:1343 netlink_sendmsg+0xa18/0xfc0 net/netlink/af_netlink.c:1908 sock_sendmsg_nosec net/socket.c:622 [inline] sock_sendmsg+0xd5/0x120 net/socket.c:632 ___sys_sendmsg+0x7fd/0x930 net/socket.c:2115 __sys_sendmsg+0x11d/0x290 net/socket.c:2153 __do_sys_sendmsg net/socket.c:2162 [inline] __se_sys_sendmsg net/socket.c:2160 [inline] __x64_sys_sendmsg+0x78/0xb0 net/socket.c:2160 do_syscall_64+0x1b9/0x820 arch/x86/entry/common.c:290 entry_SYSCALL_64_after_hwframe+0x49/0xbe other info that might help us debug this: Chain exists of: (wq_completion)bond_dev->name --> (work_completion)(&(&nnw->work)->work) --> rtnl_mutex Possible unsafe locking scenario: CPU0 CPU1 ---- ---- lock(rtnl_mutex); lock((work_completion)(&(&nnw->work)->work)); lock(rtnl_mutex); lock((wq_completion)bond_dev->name); *** DEADLOCK *** 1 lock held by syz-executor4/26841: stack backtrace: CPU: 1 PID: 26841 Comm: syz-executor4 Not tainted 4.18.0-next-20180823+ #46 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:77 [inline] dump_stack+0x1c9/0x2b4 lib/dump_stack.c:113 print_circular_bug.isra.34.cold.55+0x1bd/0x27d kernel/locking/lockdep.c:1222 check_prev_add kernel/locking/lockdep.c:1862 [inline] check_prevs_add kernel/locking/lockdep.c:1975 [inline] validate_chain kernel/locking/lockdep.c:2416 [inline] __lock_acquire+0x3449/0x5020 kernel/locking/lockdep.c:3412 lock_acquire+0x1e4/0x4f0 kernel/locking/lockdep.c:3901 flush_workqueue+0x30a/0x1e10 kernel/workqueue.c:2655 drain_workqueue+0x2a9/0x640 kernel/workqueue.c:2820 destroy_workqueue+0xc6/0x9d0 kernel/workqueue.c:4155 __alloc_workqueue_key+0xef9/0x1190 kernel/workqueue.c:4138 bond_init+0x269/0x940 drivers/net/bonding/bond_main.c:4734 register_netdevice+0x337/0x1100 net/core/dev.c:8410 bond_newlink+0x49/0xa0 drivers/net/bonding/bond_netlink.c:453 rtnl_newlink+0xef4/0x1d50 net/core/rtnetlink.c:3099 rtnetlink_rcv_msg+0x46e/0xc30 net/core/rtnetlink.c:4711 netlink_rcv_skb+0x172/0x440 net/netlink/af_netlink.c:2454 rtnetlink_rcv+0x1c/0x20 net/core/rtnetlink.c:4729 netlink_unicast_kernel net/netlink/af_netlink.c:1317 [inline] netlink_unicast+0x5a0/0x760 net/netlink/af_netlink.c:1343 netlink_sendmsg+0xa18/0xfc0 net/netlink/af_netlink.c:1908 sock_sendmsg_nosec net/socket.c:622 [inline] sock_sendmsg+0xd5/0x120 net/socket.c:632 ___sys_sendmsg+0x7fd/0x930 net/socket.c:2115 __sys_sendmsg+0x11d/0x290 net/socket.c:2153 __do_sys_sendmsg net/socket.c:2162 [inline] __se_sys_sendmsg net/socket.c:2160 [inline] __x64_sys_sendmsg+0x78/0xb0 net/socket.c:2160 do_syscall_64+0x1b9/0x820 arch/x86/entry/common.c:290 entry_SYSCALL_64_after_hwframe+0x49/0xbe RIP: 0033:0x457089 Code: fd b4 fb ff c3 66 2e 0f 1f 84 00 00 00 00 00 66 90 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 0f 83 cb b4 fb ff c3 66 2e 0f 1f 84 00 00 00 00 RSP: 002b:00007f2df20a5c78 EFLAGS: 00000246 ORIG_RAX: 000000000000002e RAX: ffffffffffffffda RBX: 00007f2df20a66d4 RCX: 0000000000457089 RDX: 0000000000000000 RSI: 0000000020000180 RDI: 0000000000000003 RBP: 0000000000930140 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000246 R12: 00000000ffffffff R13: 00000000004d40b8 R14: 00000000004c8ad8 R15: 0000000000000001 Signed-off-by: Mahesh Bandewar Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/bonding/bond_main.c | 43 +++++++++++++-------------------- include/net/bonding.h | 7 +----- 2 files changed, 18 insertions(+), 32 deletions(-) diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 339118f3c718..78da1b7b4d86 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -216,6 +216,7 @@ static struct rtnl_link_stats64 *bond_get_stats(struct net_device *bond_dev, static void bond_slave_arr_handler(struct work_struct *work); static bool bond_time_in_interval(struct bonding *bond, unsigned long last_act, int mod); +static void bond_netdev_notify_work(struct work_struct *work); /*---------------------------- General routines -----------------------------*/ @@ -1237,6 +1238,8 @@ static struct slave *bond_alloc_slave(struct bonding *bond) return NULL; } } + INIT_DELAYED_WORK(&slave->notify_work, bond_netdev_notify_work); + return slave; } @@ -1244,6 +1247,7 @@ static void bond_free_slave(struct slave *slave) { struct bonding *bond = bond_get_bond_by_slave(slave); + cancel_delayed_work_sync(&slave->notify_work); if (BOND_MODE(bond) == BOND_MODE_8023AD) kfree(SLAVE_AD_INFO(slave)); @@ -1265,39 +1269,26 @@ static void bond_fill_ifslave(struct slave *slave, struct ifslave *info) info->link_failure_count = slave->link_failure_count; } -static void bond_netdev_notify(struct net_device *dev, - struct netdev_bonding_info *info) -{ - rtnl_lock(); - netdev_bonding_info_change(dev, info); - rtnl_unlock(); -} - static void bond_netdev_notify_work(struct work_struct *_work) { - struct netdev_notify_work *w = - container_of(_work, struct netdev_notify_work, work.work); + struct slave *slave = container_of(_work, struct slave, + notify_work.work); - bond_netdev_notify(w->dev, &w->bonding_info); - dev_put(w->dev); - kfree(w); + if (rtnl_trylock()) { + struct netdev_bonding_info binfo; + + bond_fill_ifslave(slave, &binfo.slave); + bond_fill_ifbond(slave->bond, &binfo.master); + netdev_bonding_info_change(slave->dev, &binfo); + rtnl_unlock(); + } else { + queue_delayed_work(slave->bond->wq, &slave->notify_work, 1); + } } void bond_queue_slave_event(struct slave *slave) { - struct bonding *bond = slave->bond; - struct netdev_notify_work *nnw = kzalloc(sizeof(*nnw), GFP_ATOMIC); - - if (!nnw) - return; - - dev_hold(slave->dev); - nnw->dev = slave->dev; - bond_fill_ifslave(slave, &nnw->bonding_info.slave); - bond_fill_ifbond(bond, &nnw->bonding_info.master); - INIT_DELAYED_WORK(&nnw->work, bond_netdev_notify_work); - - queue_delayed_work(slave->bond->wq, &nnw->work, 0); + queue_delayed_work(slave->bond->wq, &slave->notify_work, 0); } /* enslave device to bond device */ diff --git a/include/net/bonding.h b/include/net/bonding.h index 93abe5f6188d..d5abd3a80896 100644 --- a/include/net/bonding.h +++ b/include/net/bonding.h @@ -146,12 +146,6 @@ struct bond_parm_tbl { int mode; }; -struct netdev_notify_work { - struct delayed_work work; - struct net_device *dev; - struct netdev_bonding_info bonding_info; -}; - struct slave { struct net_device *dev; /* first - useful for panic debug */ struct bonding *bond; /* our master */ @@ -177,6 +171,7 @@ struct slave { #ifdef CONFIG_NET_POLL_CONTROLLER struct netpoll *np; #endif + struct delayed_work notify_work; struct kobject kobj; struct rtnl_link_stats64 slave_stats; }; From 20f16d1a3817523350e9c5b5a231c4cb2cf0ab3f Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Wed, 19 Sep 2018 15:02:07 +0200 Subject: [PATCH 160/812] ip6_tunnel: be careful when accessing the inner header [ Upstream commit 76c0ddd8c3a683f6e2c6e60e11dc1a1558caf4bc ] the ip6 tunnel xmit ndo assumes that the processed skb always contains an ip[v6] header, but syzbot has found a way to send frames that fall short of this assumption, leading to the following splat: BUG: KMSAN: uninit-value in ip6ip6_tnl_xmit net/ipv6/ip6_tunnel.c:1307 [inline] BUG: KMSAN: uninit-value in ip6_tnl_start_xmit+0x7d2/0x1ef0 net/ipv6/ip6_tunnel.c:1390 CPU: 0 PID: 4504 Comm: syz-executor558 Not tainted 4.16.0+ #87 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:17 [inline] dump_stack+0x185/0x1d0 lib/dump_stack.c:53 kmsan_report+0x142/0x240 mm/kmsan/kmsan.c:1067 __msan_warning_32+0x6c/0xb0 mm/kmsan/kmsan_instr.c:683 ip6ip6_tnl_xmit net/ipv6/ip6_tunnel.c:1307 [inline] ip6_tnl_start_xmit+0x7d2/0x1ef0 net/ipv6/ip6_tunnel.c:1390 __netdev_start_xmit include/linux/netdevice.h:4066 [inline] netdev_start_xmit include/linux/netdevice.h:4075 [inline] xmit_one net/core/dev.c:3026 [inline] dev_hard_start_xmit+0x5f1/0xc70 net/core/dev.c:3042 __dev_queue_xmit+0x27ee/0x3520 net/core/dev.c:3557 dev_queue_xmit+0x4b/0x60 net/core/dev.c:3590 packet_snd net/packet/af_packet.c:2944 [inline] packet_sendmsg+0x7c70/0x8a30 net/packet/af_packet.c:2969 sock_sendmsg_nosec net/socket.c:630 [inline] sock_sendmsg net/socket.c:640 [inline] ___sys_sendmsg+0xec0/0x1310 net/socket.c:2046 __sys_sendmmsg+0x42d/0x800 net/socket.c:2136 SYSC_sendmmsg+0xc4/0x110 net/socket.c:2167 SyS_sendmmsg+0x63/0x90 net/socket.c:2162 do_syscall_64+0x309/0x430 arch/x86/entry/common.c:287 entry_SYSCALL_64_after_hwframe+0x3d/0xa2 RIP: 0033:0x441819 RSP: 002b:00007ffe58ee8268 EFLAGS: 00000213 ORIG_RAX: 0000000000000133 RAX: ffffffffffffffda RBX: 0000000000000003 RCX: 0000000000441819 RDX: 0000000000000002 RSI: 0000000020000100 RDI: 0000000000000003 RBP: 00000000006cd018 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000213 R12: 0000000000402510 R13: 00000000004025a0 R14: 0000000000000000 R15: 0000000000000000 Uninit was created at: kmsan_save_stack_with_flags mm/kmsan/kmsan.c:278 [inline] kmsan_internal_poison_shadow+0xb8/0x1b0 mm/kmsan/kmsan.c:188 kmsan_kmalloc+0x94/0x100 mm/kmsan/kmsan.c:314 kmsan_slab_alloc+0x11/0x20 mm/kmsan/kmsan.c:321 slab_post_alloc_hook mm/slab.h:445 [inline] slab_alloc_node mm/slub.c:2737 [inline] __kmalloc_node_track_caller+0xaed/0x11c0 mm/slub.c:4369 __kmalloc_reserve net/core/skbuff.c:138 [inline] __alloc_skb+0x2cf/0x9f0 net/core/skbuff.c:206 alloc_skb include/linux/skbuff.h:984 [inline] alloc_skb_with_frags+0x1d4/0xb20 net/core/skbuff.c:5234 sock_alloc_send_pskb+0xb56/0x1190 net/core/sock.c:2085 packet_alloc_skb net/packet/af_packet.c:2803 [inline] packet_snd net/packet/af_packet.c:2894 [inline] packet_sendmsg+0x6454/0x8a30 net/packet/af_packet.c:2969 sock_sendmsg_nosec net/socket.c:630 [inline] sock_sendmsg net/socket.c:640 [inline] ___sys_sendmsg+0xec0/0x1310 net/socket.c:2046 __sys_sendmmsg+0x42d/0x800 net/socket.c:2136 SYSC_sendmmsg+0xc4/0x110 net/socket.c:2167 SyS_sendmmsg+0x63/0x90 net/socket.c:2162 do_syscall_64+0x309/0x430 arch/x86/entry/common.c:287 entry_SYSCALL_64_after_hwframe+0x3d/0xa2 This change addresses the issue adding the needed check before accessing the inner header. The ipv4 side of the issue is apparently there since the ipv4 over ipv6 initial support, and the ipv6 side predates git history. Fixes: c4d3efafcc93 ("[IPV6] IP6TUNNEL: Add support to IPv4 over IPv6 tunnel.") Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Reported-by: syzbot+3fde91d4d394747d6db4@syzkaller.appspotmail.com Tested-by: Alexander Potapenko Signed-off-by: Paolo Abeni Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv6/ip6_tunnel.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index e8f21dd520b2..7c7a74ea2b0d 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -1096,7 +1096,7 @@ static inline int ip4ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev) { struct ip6_tnl *t = netdev_priv(dev); - const struct iphdr *iph = ip_hdr(skb); + const struct iphdr *iph; int encap_limit = -1; struct flowi6 fl6; __u8 dsfield; @@ -1104,6 +1104,11 @@ ip4ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev) u8 tproto; int err; + /* ensure we can access the full inner ip header */ + if (!pskb_may_pull(skb, sizeof(struct iphdr))) + return -1; + + iph = ip_hdr(skb); memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); tproto = ACCESS_ONCE(t->parms.proto); @@ -1140,7 +1145,7 @@ static inline int ip6ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev) { struct ip6_tnl *t = netdev_priv(dev); - struct ipv6hdr *ipv6h = ipv6_hdr(skb); + struct ipv6hdr *ipv6h; int encap_limit = -1; __u16 offset; struct flowi6 fl6; @@ -1149,6 +1154,10 @@ ip6ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev) u8 tproto; int err; + if (unlikely(!pskb_may_pull(skb, sizeof(*ipv6h)))) + return -1; + + ipv6h = ipv6_hdr(skb); tproto = ACCESS_ONCE(t->parms.proto); if ((tproto != IPPROTO_IPV6 && tproto != 0) || ip6_tnl_addr_conflict(t, ipv6h)) From f9d3572816ba724129da0ec2ecd590d5c899e86e Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Mon, 24 Sep 2018 15:48:19 +0200 Subject: [PATCH 161/812] ip_tunnel: be careful when accessing the inner header [ Upstream commit ccfec9e5cb2d48df5a955b7bf47f7782157d3bc2] Cong noted that we need the same checks introduced by commit 76c0ddd8c3a6 ("ip6_tunnel: be careful when accessing the inner header") even for ipv4 tunnels. Fixes: c54419321455 ("GRE: Refactor GRE tunneling code.") Suggested-by: Cong Wang Signed-off-by: Paolo Abeni Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/ip_tunnel.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c index 3d62feb65932..9d3176b080a4 100644 --- a/net/ipv4/ip_tunnel.c +++ b/net/ipv4/ip_tunnel.c @@ -597,6 +597,7 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, const struct iphdr *tnl_params, u8 protocol) { struct ip_tunnel *tunnel = netdev_priv(dev); + unsigned int inner_nhdr_len = 0; const struct iphdr *inner_iph; struct flowi4 fl4; u8 tos, ttl; @@ -607,6 +608,14 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, int err; bool connected; + /* ensure we can access the inner net header, for several users below */ + if (skb->protocol == htons(ETH_P_IP)) + inner_nhdr_len = sizeof(struct iphdr); + else if (skb->protocol == htons(ETH_P_IPV6)) + inner_nhdr_len = sizeof(struct ipv6hdr); + if (unlikely(!pskb_may_pull(skb, inner_nhdr_len))) + goto tx_error; + inner_iph = (const struct iphdr *)skb_inner_network_header(skb); connected = (tunnel->parms.iph.daddr != 0); From d7148eeb647fd55909f985ad14b0176ff1fae335 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sun, 30 Sep 2018 11:33:39 -0700 Subject: [PATCH 162/812] ipv4: fix use-after-free in ip_cmsg_recv_dstaddr() [ Upstream commit 64199fc0a46ba211362472f7f942f900af9492fd ] Caching ip_hdr(skb) before a call to pskb_may_pull() is buggy, do not do it. Fixes: 2efd4fca703a ("ip: in cmsg IP(V6)_ORIGDSTADDR call pskb_may_pull") Signed-off-by: Eric Dumazet Cc: Willem de Bruijn Reported-by: syzbot Acked-by: Willem de Bruijn Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/ip_sockglue.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index 88426a6a7a85..3f8caf7d19b8 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c @@ -134,7 +134,6 @@ static void ip_cmsg_recv_security(struct msghdr *msg, struct sk_buff *skb) static void ip_cmsg_recv_dstaddr(struct msghdr *msg, struct sk_buff *skb) { struct sockaddr_in sin; - const struct iphdr *iph = ip_hdr(skb); __be16 *ports; int end; @@ -149,7 +148,7 @@ static void ip_cmsg_recv_dstaddr(struct msghdr *msg, struct sk_buff *skb) ports = (__be16 *)skb_transport_header(skb); sin.sin_family = AF_INET; - sin.sin_addr.s_addr = iph->daddr; + sin.sin_addr.s_addr = ip_hdr(skb)->daddr; sin.sin_port = ports[1]; memset(sin.sin_zero, 0, sizeof(sin.sin_zero)); From 2b7e4c735933be79882aba2bed9afa789e03c62f Mon Sep 17 00:00:00 2001 From: Sabrina Dubroca Date: Tue, 9 Oct 2018 17:48:14 +0200 Subject: [PATCH 163/812] net: ipv4: update fnhe_pmtu when first hop's MTU changes [ Upstream commit af7d6cce53694a88d6a1bb60c9a239a6a5144459 ] Since commit 5aad1de5ea2c ("ipv4: use separate genid for next hop exceptions"), exceptions get deprecated separately from cached routes. In particular, administrative changes don't clear PMTU anymore. As Stefano described in commit e9fa1495d738 ("ipv6: Reflect MTU changes on PMTU of exceptions for MTU-less routes"), the PMTU discovered before the local MTU change can become stale: - if the local MTU is now lower than the PMTU, that PMTU is now incorrect - if the local MTU was the lowest value in the path, and is increased, we might discover a higher PMTU Similarly to what commit e9fa1495d738 did for IPv6, update PMTU in those cases. If the exception was locked, the discovered PMTU was smaller than the minimal accepted PMTU. In that case, if the new local MTU is smaller than the current PMTU, let PMTU discovery figure out if locking of the exception is still needed. To do this, we need to know the old link MTU in the NETDEV_CHANGEMTU notifier. By the time the notifier is called, dev->mtu has been changed. This patch adds the old MTU as additional information in the notifier structure, and a new call_netdevice_notifiers_u32() function. Fixes: 5aad1de5ea2c ("ipv4: use separate genid for next hop exceptions") Signed-off-by: Sabrina Dubroca Reviewed-by: Stefano Brivio Reviewed-by: David Ahern Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- include/linux/netdevice.h | 7 ++++++ include/net/ip_fib.h | 1 + net/core/dev.c | 28 ++++++++++++++++++++-- net/ipv4/fib_frontend.c | 12 ++++++---- net/ipv4/fib_semantics.c | 50 +++++++++++++++++++++++++++++++++++++++ 5 files changed, 92 insertions(+), 6 deletions(-) diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index fc54049e8286..0b211d482c96 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -2168,6 +2168,13 @@ struct netdev_notifier_info { struct net_device *dev; }; +struct netdev_notifier_info_ext { + struct netdev_notifier_info info; /* must be first */ + union { + u32 mtu; + } ext; +}; + struct netdev_notifier_change_info { struct netdev_notifier_info info; /* must be first */ unsigned int flags_changed; diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h index 3afb7c4c7098..2a25b53cd427 100644 --- a/include/net/ip_fib.h +++ b/include/net/ip_fib.h @@ -322,6 +322,7 @@ int ip_fib_check_default(__be32 gw, struct net_device *dev); int fib_sync_down_dev(struct net_device *dev, unsigned long event, bool force); int fib_sync_down_addr(struct net *net, __be32 local); int fib_sync_up(struct net_device *dev, unsigned int nh_flags); +void fib_sync_mtu(struct net_device *dev, u32 orig_mtu); extern u32 fib_multipath_secret __read_mostly; diff --git a/net/core/dev.c b/net/core/dev.c index 191cf880d805..7366feb8b5b3 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1660,6 +1660,28 @@ int call_netdevice_notifiers(unsigned long val, struct net_device *dev) } EXPORT_SYMBOL(call_netdevice_notifiers); +/** + * call_netdevice_notifiers_mtu - call all network notifier blocks + * @val: value passed unmodified to notifier function + * @dev: net_device pointer passed unmodified to notifier function + * @arg: additional u32 argument passed to the notifier function + * + * Call all network notifier blocks. Parameters and return value + * are as for raw_notifier_call_chain(). + */ +static int call_netdevice_notifiers_mtu(unsigned long val, + struct net_device *dev, u32 arg) +{ + struct netdev_notifier_info_ext info = { + .info.dev = dev, + .ext.mtu = arg, + }; + + BUILD_BUG_ON(offsetof(struct netdev_notifier_info_ext, info) != 0); + + return call_netdevice_notifiers_info(val, dev, &info.info); +} + #ifdef CONFIG_NET_INGRESS static struct static_key ingress_needed __read_mostly; @@ -6134,14 +6156,16 @@ int dev_set_mtu(struct net_device *dev, int new_mtu) err = __dev_set_mtu(dev, new_mtu); if (!err) { - err = call_netdevice_notifiers(NETDEV_CHANGEMTU, dev); + err = call_netdevice_notifiers_mtu(NETDEV_CHANGEMTU, dev, + orig_mtu); err = notifier_to_errno(err); if (err) { /* setting mtu back and notifying everyone again, * so that they have a chance to revert changes. */ __dev_set_mtu(dev, orig_mtu); - call_netdevice_notifiers(NETDEV_CHANGEMTU, dev); + call_netdevice_notifiers_mtu(NETDEV_CHANGEMTU, dev, + new_mtu); } } return err; diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index 015c33712803..ce646572b912 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -1170,7 +1170,8 @@ static int fib_inetaddr_event(struct notifier_block *this, unsigned long event, static int fib_netdev_event(struct notifier_block *this, unsigned long event, void *ptr) { struct net_device *dev = netdev_notifier_info_to_dev(ptr); - struct netdev_notifier_changeupper_info *info; + struct netdev_notifier_changeupper_info *upper_info = ptr; + struct netdev_notifier_info_ext *info_ext = ptr; struct in_device *in_dev; struct net *net = dev_net(dev); unsigned int flags; @@ -1205,16 +1206,19 @@ static int fib_netdev_event(struct notifier_block *this, unsigned long event, vo fib_sync_up(dev, RTNH_F_LINKDOWN); else fib_sync_down_dev(dev, event, false); - /* fall through */ + rt_cache_flush(net); + break; case NETDEV_CHANGEMTU: + fib_sync_mtu(dev, info_ext->ext.mtu); rt_cache_flush(net); break; case NETDEV_CHANGEUPPER: - info = ptr; + upper_info = ptr; /* flush all routes if dev is linked to or unlinked from * an L3 master device (e.g., VRF) */ - if (info->upper_dev && netif_is_l3_master(info->upper_dev)) + if (upper_info->upper_dev && + netif_is_l3_master(upper_info->upper_dev)) fib_disable_ip(dev, NETDEV_DOWN, true); break; } diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index 03ebff3950d8..3109b9bb95d2 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -1373,6 +1373,56 @@ int fib_sync_down_addr(struct net *net, __be32 local) return ret; } +/* Update the PMTU of exceptions when: + * - the new MTU of the first hop becomes smaller than the PMTU + * - the old MTU was the same as the PMTU, and it limited discovery of + * larger MTUs on the path. With that limit raised, we can now + * discover larger MTUs + * A special case is locked exceptions, for which the PMTU is smaller + * than the minimal accepted PMTU: + * - if the new MTU is greater than the PMTU, don't make any change + * - otherwise, unlock and set PMTU + */ +static void nh_update_mtu(struct fib_nh *nh, u32 new, u32 orig) +{ + struct fnhe_hash_bucket *bucket; + int i; + + bucket = rcu_dereference_protected(nh->nh_exceptions, 1); + if (!bucket) + return; + + for (i = 0; i < FNHE_HASH_SIZE; i++) { + struct fib_nh_exception *fnhe; + + for (fnhe = rcu_dereference_protected(bucket[i].chain, 1); + fnhe; + fnhe = rcu_dereference_protected(fnhe->fnhe_next, 1)) { + if (fnhe->fnhe_mtu_locked) { + if (new <= fnhe->fnhe_pmtu) { + fnhe->fnhe_pmtu = new; + fnhe->fnhe_mtu_locked = false; + } + } else if (new < fnhe->fnhe_pmtu || + orig == fnhe->fnhe_pmtu) { + fnhe->fnhe_pmtu = new; + } + } + } +} + +void fib_sync_mtu(struct net_device *dev, u32 orig_mtu) +{ + unsigned int hash = fib_devindex_hashfn(dev->ifindex); + struct hlist_head *head = &fib_info_devhash[hash]; + struct fib_nh *nh; + + hlist_for_each_entry(nh, head, nh_hash) { + if (nh->nh_dev == dev) + nh_update_mtu(nh, dev->mtu, orig_mtu); + } +} + /* Event force Flags Description * NETDEV_CHANGE 0 LINKDOWN Carrier OFF, not for scope host * NETDEV_DOWN 0 LINKDOWN|DEAD Link down, not for scope host From 9ee4a60d618e8cf9ba6228139fae35c4ba3d4df0 Mon Sep 17 00:00:00 2001 From: Jeff Barnhill <0xeffeff@gmail.com> Date: Fri, 21 Sep 2018 00:45:27 +0000 Subject: [PATCH 164/812] net/ipv6: Display all addresses in output of /proc/net/if_inet6 [ Upstream commit 86f9bd1ff61c413a2a251fa736463295e4e24733 ] The backend handling for /proc/net/if_inet6 in addrconf.c doesn't properly handle starting/stopping the iteration. The problem is that at some point during the iteration, an overflow is detected and the process is subsequently stopped. The item being shown via seq_printf() when the overflow occurs is not actually shown, though. When start() is subsequently called to resume iterating, it returns the next item, and thus the item that was being processed when the overflow occurred never gets printed. Alter the meaning of the private data member "offset". Currently, when it is not 0 (which only happens at the very beginning), "offset" represents the next hlist item to be printed. After this change, "offset" always represents the current item. This is also consistent with the private data member "bucket", which represents the current bucket, and also the use of "pos" as defined in seq_file.txt: The pos passed to start() will always be either zero, or the most recent pos used in the previous session. Signed-off-by: Jeff Barnhill <0xeffeff@gmail.com> Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv6/addrconf.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 0613be57513e..582e757e5727 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -3786,7 +3786,6 @@ static struct inet6_ifaddr *if6_get_first(struct seq_file *seq, loff_t pos) p++; continue; } - state->offset++; return ifa; } @@ -3810,13 +3809,12 @@ static struct inet6_ifaddr *if6_get_next(struct seq_file *seq, return ifa; } + state->offset = 0; while (++state->bucket < IN6_ADDR_HSIZE) { - state->offset = 0; hlist_for_each_entry_rcu_bh(ifa, &inet6_addr_lst[state->bucket], addr_lst) { if (!net_eq(dev_net(ifa->idev->dev), net)) continue; - state->offset++; return ifa; } } From 1f3a2366922b3e0e1af3df011dae20929d3a8204 Mon Sep 17 00:00:00 2001 From: Sean Tranchetti Date: Thu, 20 Sep 2018 14:29:45 -0600 Subject: [PATCH 165/812] netlabel: check for IPV4MASK in addrinfo_get [ Upstream commit f88b4c01b97e09535505cf3c327fdbce55c27f00 ] netlbl_unlabel_addrinfo_get() assumes that if it finds the NLBL_UNLABEL_A_IPV4ADDR attribute, it must also have the NLBL_UNLABEL_A_IPV4MASK attribute as well. However, this is not necessarily the case as the current checks in netlbl_unlabel_staticadd() and friends are not sufficent to enforce this. If passed a netlink message with NLBL_UNLABEL_A_IPV4ADDR, NLBL_UNLABEL_A_IPV6ADDR, and NLBL_UNLABEL_A_IPV6MASK attributes, these functions will all call netlbl_unlabel_addrinfo_get() which will then attempt dereference NULL when fetching the non-existent NLBL_UNLABEL_A_IPV4MASK attribute: Unable to handle kernel NULL pointer dereference at virtual address 0 Process unlab (pid: 31762, stack limit = 0xffffff80502d8000) Call trace: netlbl_unlabel_addrinfo_get+0x44/0xd8 netlbl_unlabel_staticremovedef+0x98/0xe0 genl_rcv_msg+0x354/0x388 netlink_rcv_skb+0xac/0x118 genl_rcv+0x34/0x48 netlink_unicast+0x158/0x1f0 netlink_sendmsg+0x32c/0x338 sock_sendmsg+0x44/0x60 ___sys_sendmsg+0x1d0/0x2a8 __sys_sendmsg+0x64/0xb4 SyS_sendmsg+0x34/0x4c el0_svc_naked+0x34/0x38 Code: 51001149 7100113f 540000a0 f9401508 (79400108) ---[ end trace f6438a488e737143 ]--- Kernel panic - not syncing: Fatal exception Signed-off-by: Sean Tranchetti Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/netlabel/netlabel_unlabeled.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/netlabel/netlabel_unlabeled.c b/net/netlabel/netlabel_unlabeled.c index 3f33ec44bd28..9f4ec16abfcf 100644 --- a/net/netlabel/netlabel_unlabeled.c +++ b/net/netlabel/netlabel_unlabeled.c @@ -787,7 +787,8 @@ static int netlbl_unlabel_addrinfo_get(struct genl_info *info, { u32 addr_len; - if (info->attrs[NLBL_UNLABEL_A_IPV4ADDR]) { + if (info->attrs[NLBL_UNLABEL_A_IPV4ADDR] && + info->attrs[NLBL_UNLABEL_A_IPV4MASK]) { addr_len = nla_len(info->attrs[NLBL_UNLABEL_A_IPV4ADDR]); if (addr_len != sizeof(struct in_addr) && addr_len != nla_len(info->attrs[NLBL_UNLABEL_A_IPV4MASK])) From 0aa059ea2dacc7383052f9b2380edd3964f23033 Mon Sep 17 00:00:00 2001 From: Yu Zhao Date: Fri, 28 Sep 2018 17:04:30 -0600 Subject: [PATCH 166/812] net/usb: cancel pending work when unbinding smsc75xx [ Upstream commit f7b2a56e1f3dcbdb4cf09b2b63e859ffe0e09df8 ] Cancel pending work before freeing smsc75xx private data structure during binding. This fixes the following crash in the driver: BUG: unable to handle kernel NULL pointer dereference at 0000000000000050 IP: mutex_lock+0x2b/0x3f Workqueue: events smsc75xx_deferred_multicast_write [smsc75xx] task: ffff8caa83e85700 task.stack: ffff948b80518000 RIP: 0010:mutex_lock+0x2b/0x3f Call Trace: smsc75xx_deferred_multicast_write+0x40/0x1af [smsc75xx] process_one_work+0x18d/0x2fc worker_thread+0x1a2/0x269 ? pr_cont_work+0x58/0x58 kthread+0xfa/0x10a ? pr_cont_work+0x58/0x58 ? rcu_read_unlock_sched_notrace+0x48/0x48 ret_from_fork+0x22/0x40 Signed-off-by: Yu Zhao Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/usb/smsc75xx.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/usb/smsc75xx.c b/drivers/net/usb/smsc75xx.c index 478937418a33..8dbe086e0a96 100644 --- a/drivers/net/usb/smsc75xx.c +++ b/drivers/net/usb/smsc75xx.c @@ -1506,6 +1506,7 @@ static void smsc75xx_unbind(struct usbnet *dev, struct usb_interface *intf) { struct smsc75xx_priv *pdata = (struct smsc75xx_priv *)(dev->data[0]); if (pdata) { + cancel_work_sync(&pdata->set_multicast); netif_dbg(dev, ifdown, dev->net, "free pdata\n"); kfree(pdata); pdata = NULL; From 2f04cacdcd5bc4227d0d4ed6be08da6f959168c8 Mon Sep 17 00:00:00 2001 From: Shahed Shaikh Date: Wed, 26 Sep 2018 12:41:10 -0700 Subject: [PATCH 167/812] qlcnic: fix Tx descriptor corruption on 82xx devices [ Upstream commit c333fa0c4f220f8f7ea5acd6b0ebf3bf13fd684d ] In regular NIC transmission flow, driver always configures MAC using Tx queue zero descriptor as a part of MAC learning flow. But with multi Tx queue supported NIC, regular transmission can occur on any non-zero Tx queue and from that context it uses Tx queue zero descriptor to configure MAC, at the same time TX queue zero could be used by another CPU for regular transmission which could lead to Tx queue zero descriptor corruption and cause FW abort. This patch fixes this in such a way that driver always configures learned MAC address from the same Tx queue which is used for regular transmission. Fixes: 7e2cf4feba05 ("qlcnic: change driver hardware interface mechanism") Signed-off-by: Shahed Shaikh Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/qlogic/qlcnic/qlcnic.h | 8 +++++--- drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c | 3 ++- drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.h | 3 ++- drivers/net/ethernet/qlogic/qlcnic/qlcnic_hw.h | 3 ++- drivers/net/ethernet/qlogic/qlcnic/qlcnic_io.c | 12 ++++++------ 5 files changed, 17 insertions(+), 12 deletions(-) diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic.h b/drivers/net/ethernet/qlogic/qlcnic/qlcnic.h index 55007f1e6bbc..12cd8aef1881 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic.h +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic.h @@ -1802,7 +1802,8 @@ struct qlcnic_hardware_ops { int (*config_loopback) (struct qlcnic_adapter *, u8); int (*clear_loopback) (struct qlcnic_adapter *, u8); int (*config_promisc_mode) (struct qlcnic_adapter *, u32); - void (*change_l2_filter) (struct qlcnic_adapter *, u64 *, u16); + void (*change_l2_filter)(struct qlcnic_adapter *adapter, u64 *addr, + u16 vlan, struct qlcnic_host_tx_ring *tx_ring); int (*get_board_info) (struct qlcnic_adapter *); void (*set_mac_filter_count) (struct qlcnic_adapter *); void (*free_mac_list) (struct qlcnic_adapter *); @@ -2044,9 +2045,10 @@ static inline int qlcnic_nic_set_promisc(struct qlcnic_adapter *adapter, } static inline void qlcnic_change_filter(struct qlcnic_adapter *adapter, - u64 *addr, u16 id) + u64 *addr, u16 vlan, + struct qlcnic_host_tx_ring *tx_ring) { - adapter->ahw->hw_ops->change_l2_filter(adapter, addr, id); + adapter->ahw->hw_ops->change_l2_filter(adapter, addr, vlan, tx_ring); } static inline int qlcnic_get_board_info(struct qlcnic_adapter *adapter) diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c index b4f3cb55605e..7f7aea9758e7 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c @@ -2132,7 +2132,8 @@ int qlcnic_83xx_sre_macaddr_change(struct qlcnic_adapter *adapter, u8 *addr, } void qlcnic_83xx_change_l2_filter(struct qlcnic_adapter *adapter, u64 *addr, - u16 vlan_id) + u16 vlan_id, + struct qlcnic_host_tx_ring *tx_ring) { u8 mac[ETH_ALEN]; memcpy(&mac, addr, ETH_ALEN); diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.h b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.h index 331ae2c20f40..c8e012b3f7e7 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.h +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.h @@ -550,7 +550,8 @@ int qlcnic_83xx_wrt_reg_indirect(struct qlcnic_adapter *, ulong, u32); int qlcnic_83xx_nic_set_promisc(struct qlcnic_adapter *, u32); int qlcnic_83xx_config_hw_lro(struct qlcnic_adapter *, int); int qlcnic_83xx_config_rss(struct qlcnic_adapter *, int); -void qlcnic_83xx_change_l2_filter(struct qlcnic_adapter *, u64 *, u16); +void qlcnic_83xx_change_l2_filter(struct qlcnic_adapter *adapter, u64 *addr, + u16 vlan, struct qlcnic_host_tx_ring *ring); int qlcnic_83xx_get_pci_info(struct qlcnic_adapter *, struct qlcnic_pci_info *); int qlcnic_83xx_set_nic_info(struct qlcnic_adapter *, struct qlcnic_info *); void qlcnic_83xx_initialize_nic(struct qlcnic_adapter *, int); diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_hw.h b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_hw.h index 4bb33af8e2b3..56a3bd9e37dc 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_hw.h +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_hw.h @@ -173,7 +173,8 @@ int qlcnic_82xx_napi_add(struct qlcnic_adapter *adapter, struct net_device *netdev); void qlcnic_82xx_get_beacon_state(struct qlcnic_adapter *); void qlcnic_82xx_change_filter(struct qlcnic_adapter *adapter, - u64 *uaddr, u16 vlan_id); + u64 *uaddr, u16 vlan_id, + struct qlcnic_host_tx_ring *tx_ring); int qlcnic_82xx_config_intr_coalesce(struct qlcnic_adapter *, struct ethtool_coalesce *); int qlcnic_82xx_set_rx_coalesce(struct qlcnic_adapter *); diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_io.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_io.c index d4b5085a21fa..98042a3701b5 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_io.c +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_io.c @@ -269,13 +269,12 @@ static void qlcnic_add_lb_filter(struct qlcnic_adapter *adapter, } void qlcnic_82xx_change_filter(struct qlcnic_adapter *adapter, u64 *uaddr, - u16 vlan_id) + u16 vlan_id, struct qlcnic_host_tx_ring *tx_ring) { struct cmd_desc_type0 *hwdesc; struct qlcnic_nic_req *req; struct qlcnic_mac_req *mac_req; struct qlcnic_vlan_req *vlan_req; - struct qlcnic_host_tx_ring *tx_ring = adapter->tx_ring; u32 producer; u64 word; @@ -302,7 +301,8 @@ void qlcnic_82xx_change_filter(struct qlcnic_adapter *adapter, u64 *uaddr, static void qlcnic_send_filter(struct qlcnic_adapter *adapter, struct cmd_desc_type0 *first_desc, - struct sk_buff *skb) + struct sk_buff *skb, + struct qlcnic_host_tx_ring *tx_ring) { struct vlan_ethhdr *vh = (struct vlan_ethhdr *)(skb->data); struct ethhdr *phdr = (struct ethhdr *)(skb->data); @@ -336,7 +336,7 @@ static void qlcnic_send_filter(struct qlcnic_adapter *adapter, tmp_fil->vlan_id == vlan_id) { if (jiffies > (QLCNIC_READD_AGE * HZ + tmp_fil->ftime)) qlcnic_change_filter(adapter, &src_addr, - vlan_id); + vlan_id, tx_ring); tmp_fil->ftime = jiffies; return; } @@ -351,7 +351,7 @@ static void qlcnic_send_filter(struct qlcnic_adapter *adapter, if (!fil) return; - qlcnic_change_filter(adapter, &src_addr, vlan_id); + qlcnic_change_filter(adapter, &src_addr, vlan_id, tx_ring); fil->ftime = jiffies; fil->vlan_id = vlan_id; memcpy(fil->faddr, &src_addr, ETH_ALEN); @@ -767,7 +767,7 @@ netdev_tx_t qlcnic_xmit_frame(struct sk_buff *skb, struct net_device *netdev) } if (adapter->drv_mac_learn) - qlcnic_send_filter(adapter, first_desc, skb); + qlcnic_send_filter(adapter, first_desc, skb, tx_ring); tx_ring->tx_stats.tx_bytes += skb->len; tx_ring->tx_stats.xmit_called++; From e0fff89962d3d30cf677b3c0e7a2f379d7ef0f3a Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Mon, 1 Oct 2018 12:21:59 +0300 Subject: [PATCH 168/812] team: Forbid enslaving team device to itself [ Upstream commit 471b83bd8bbe4e89743683ef8ecb78f7029d8288 ] team's ndo_add_slave() acquires 'team->lock' and later tries to open the newly enslaved device via dev_open(). This emits a 'NETDEV_UP' event that causes the VLAN driver to add VLAN 0 on the team device. team's ndo_vlan_rx_add_vid() will also try to acquire 'team->lock' and deadlock. Fix this by checking early at the enslavement function that a team device is not being enslaved to itself. A similar check was added to the bond driver in commit 09a89c219baf ("bonding: disallow enslaving a bond to itself"). WARNING: possible recursive locking detected 4.18.0-rc7+ #176 Not tainted -------------------------------------------- syz-executor4/6391 is trying to acquire lock: (____ptrval____) (&team->lock){+.+.}, at: team_vlan_rx_add_vid+0x3b/0x1e0 drivers/net/team/team.c:1868 but task is already holding lock: (____ptrval____) (&team->lock){+.+.}, at: team_add_slave+0xdb/0x1c30 drivers/net/team/team.c:1947 other info that might help us debug this: Possible unsafe locking scenario: CPU0 ---- lock(&team->lock); lock(&team->lock); *** DEADLOCK *** May be due to missing lock nesting notation 2 locks held by syz-executor4/6391: #0: (____ptrval____) (rtnl_mutex){+.+.}, at: rtnl_lock net/core/rtnetlink.c:77 [inline] #0: (____ptrval____) (rtnl_mutex){+.+.}, at: rtnetlink_rcv_msg+0x412/0xc30 net/core/rtnetlink.c:4662 #1: (____ptrval____) (&team->lock){+.+.}, at: team_add_slave+0xdb/0x1c30 drivers/net/team/team.c:1947 stack backtrace: CPU: 1 PID: 6391 Comm: syz-executor4 Not tainted 4.18.0-rc7+ #176 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:77 [inline] dump_stack+0x1c9/0x2b4 lib/dump_stack.c:113 print_deadlock_bug kernel/locking/lockdep.c:1765 [inline] check_deadlock kernel/locking/lockdep.c:1809 [inline] validate_chain kernel/locking/lockdep.c:2405 [inline] __lock_acquire.cold.64+0x1fb/0x486 kernel/locking/lockdep.c:3435 lock_acquire+0x1e4/0x540 kernel/locking/lockdep.c:3924 __mutex_lock_common kernel/locking/mutex.c:757 [inline] __mutex_lock+0x176/0x1820 kernel/locking/mutex.c:894 mutex_lock_nested+0x16/0x20 kernel/locking/mutex.c:909 team_vlan_rx_add_vid+0x3b/0x1e0 drivers/net/team/team.c:1868 vlan_add_rx_filter_info+0x14a/0x1d0 net/8021q/vlan_core.c:210 __vlan_vid_add net/8021q/vlan_core.c:278 [inline] vlan_vid_add+0x63e/0x9d0 net/8021q/vlan_core.c:308 vlan_device_event.cold.12+0x2a/0x2f net/8021q/vlan.c:381 notifier_call_chain+0x180/0x390 kernel/notifier.c:93 __raw_notifier_call_chain kernel/notifier.c:394 [inline] raw_notifier_call_chain+0x2d/0x40 kernel/notifier.c:401 call_netdevice_notifiers_info+0x3f/0x90 net/core/dev.c:1735 call_netdevice_notifiers net/core/dev.c:1753 [inline] dev_open+0x173/0x1b0 net/core/dev.c:1433 team_port_add drivers/net/team/team.c:1219 [inline] team_add_slave+0xa8b/0x1c30 drivers/net/team/team.c:1948 do_set_master+0x1c9/0x220 net/core/rtnetlink.c:2248 do_setlink+0xba4/0x3e10 net/core/rtnetlink.c:2382 rtnl_setlink+0x2a9/0x400 net/core/rtnetlink.c:2636 rtnetlink_rcv_msg+0x46e/0xc30 net/core/rtnetlink.c:4665 netlink_rcv_skb+0x172/0x440 net/netlink/af_netlink.c:2455 rtnetlink_rcv+0x1c/0x20 net/core/rtnetlink.c:4683 netlink_unicast_kernel net/netlink/af_netlink.c:1317 [inline] netlink_unicast+0x5a0/0x760 net/netlink/af_netlink.c:1343 netlink_sendmsg+0xa18/0xfd0 net/netlink/af_netlink.c:1908 sock_sendmsg_nosec net/socket.c:642 [inline] sock_sendmsg+0xd5/0x120 net/socket.c:652 ___sys_sendmsg+0x7fd/0x930 net/socket.c:2126 __sys_sendmsg+0x11d/0x290 net/socket.c:2164 __do_sys_sendmsg net/socket.c:2173 [inline] __se_sys_sendmsg net/socket.c:2171 [inline] __x64_sys_sendmsg+0x78/0xb0 net/socket.c:2171 do_syscall_64+0x1b9/0x820 arch/x86/entry/common.c:290 entry_SYSCALL_64_after_hwframe+0x49/0xbe RIP: 0033:0x456b29 Code: fd b4 fb ff c3 66 2e 0f 1f 84 00 00 00 00 00 66 90 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 0f 83 cb b4 fb ff c3 66 2e 0f 1f 84 00 00 00 00 RSP: 002b:00007f9706bf8c78 EFLAGS: 00000246 ORIG_RAX: 000000000000002e RAX: ffffffffffffffda RBX: 00007f9706bf96d4 RCX: 0000000000456b29 RDX: 0000000000000000 RSI: 0000000020000240 RDI: 0000000000000004 RBP: 00000000009300a0 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000246 R12: 00000000ffffffff R13: 00000000004d3548 R14: 00000000004c8227 R15: 0000000000000000 Fixes: 87002b03baab ("net: introduce vlan_vid_[add/del] and use them instead of direct [add/kill]_vid ndo calls") Signed-off-by: Ido Schimmel Reported-and-tested-by: syzbot+bd051aba086537515cdb@syzkaller.appspotmail.com Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/team/team.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/net/team/team.c b/drivers/net/team/team.c index 49174837c2ba..33ffb573fd67 100644 --- a/drivers/net/team/team.c +++ b/drivers/net/team/team.c @@ -1142,6 +1142,11 @@ static int team_port_add(struct team *team, struct net_device *port_dev) return -EBUSY; } + if (dev == port_dev) { + netdev_err(dev, "Cannot enslave team device to itself\n"); + return -EINVAL; + } + if (port_dev->features & NETIF_F_VLAN_CHALLENGED && vlan_uses_dev(dev)) { netdev_err(dev, "Device %s is VLAN challenged and team device has VLAN set up\n", From 2b2c9306e3cd90d508384a3d94dabf59d24824e6 Mon Sep 17 00:00:00 2001 From: Maxime Chevallier Date: Fri, 5 Oct 2018 09:04:40 +0200 Subject: [PATCH 169/812] net: mvpp2: Extract the correct ethtype from the skb for tx csum offload [ Upstream commit 35f3625c21852ad839f20c91c7d81c4c1101e207 ] When offloading the L3 and L4 csum computation on TX, we need to extract the l3_proto from the ethtype, independently of the presence of a vlan tag. The actual driver uses skb->protocol as-is, resulting in packets with the wrong L4 checksum being sent when there's a vlan tag in the packet header and checksum offloading is enabled. This commit makes use of vlan_protocol_get() to get the correct ethtype regardless the presence of a vlan tag. Fixes: 3f518509dedc ("ethernet: Add new driver for Marvell Armada 375 network unit") Signed-off-by: Maxime Chevallier Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/marvell/mvpp2.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/marvell/mvpp2.c b/drivers/net/ethernet/marvell/mvpp2.c index ac92685dd4e5..42305f3234ff 100644 --- a/drivers/net/ethernet/marvell/mvpp2.c +++ b/drivers/net/ethernet/marvell/mvpp2.c @@ -29,6 +29,7 @@ #include #include #include +#include #include #include #include @@ -4268,7 +4269,7 @@ static void mvpp2_txq_desc_put(struct mvpp2_tx_queue *txq) } /* Set Tx descriptors fields relevant for CSUM calculation */ -static u32 mvpp2_txq_desc_csum(int l3_offs, int l3_proto, +static u32 mvpp2_txq_desc_csum(int l3_offs, __be16 l3_proto, int ip_hdr_len, int l4_proto) { u32 command; @@ -5032,14 +5033,15 @@ static u32 mvpp2_skb_tx_csum(struct mvpp2_port *port, struct sk_buff *skb) if (skb->ip_summed == CHECKSUM_PARTIAL) { int ip_hdr_len = 0; u8 l4_proto; + __be16 l3_proto = vlan_get_protocol(skb); - if (skb->protocol == htons(ETH_P_IP)) { + if (l3_proto == htons(ETH_P_IP)) { struct iphdr *ip4h = ip_hdr(skb); /* Calculate IPv4 checksum and L4 checksum */ ip_hdr_len = ip4h->ihl; l4_proto = ip4h->protocol; - } else if (skb->protocol == htons(ETH_P_IPV6)) { + } else if (l3_proto == htons(ETH_P_IPV6)) { struct ipv6hdr *ip6h = ipv6_hdr(skb); /* Read l4_protocol from one of IPv6 extra headers */ @@ -5051,7 +5053,7 @@ static u32 mvpp2_skb_tx_csum(struct mvpp2_port *port, struct sk_buff *skb) } return mvpp2_txq_desc_csum(skb_network_offset(skb), - skb->protocol, ip_hdr_len, l4_proto); + l3_proto, ip_hdr_len, l4_proto); } return MVPP2_TXD_L4_CSUM_NOT | MVPP2_TXD_IP_CSUM_DISABLE; From 59b6d2e59b290ebd597c3e249477d6866d275ce2 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Tue, 2 Oct 2018 16:52:03 -0700 Subject: [PATCH 170/812] net: systemport: Fix wake-up interrupt race during resume [ Upstream commit 45ec318578c0c22a11f5b9927d064418e1ab1905 ] The AON_PM_L2 is normally used to trigger and identify the source of a wake-up event. Since the RX_SYS clock is no longer turned off, we also have an interrupt being sent to the SYSTEMPORT INTRL_2_0 controller, and that interrupt remains active up until the magic packet detector is disabled which happens much later during the driver resumption. The race happens if we have a CPU that is entering the SYSTEMPORT INTRL2_0 handler during resume, and another CPU has managed to clear the wake-up interrupt during bcm_sysport_resume_from_wol(). In that case, we have the first CPU stuck in the interrupt handler with an interrupt cause that has been cleared under its feet, and so we keep returning IRQ_NONE and we never make any progress. This was not a problem before because we would always turn off the RX_SYS clock during WoL, so the SYSTEMPORT INTRL2_0 would also be turned off as well, thus not latching the interrupt. The fix is to make sure we do not enable either the MPD or BRCM_TAG_MATCH interrupts since those are redundant with what the AON_PM_L2 interrupt controller already processes and they would cause such a race to occur. Fixes: bb9051a2b230 ("net: systemport: Add support for WAKE_FILTER") Fixes: 83e82f4c706b ("net: systemport: add Wake-on-LAN support") Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/broadcom/bcmsysport.c | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bcmsysport.c b/drivers/net/ethernet/broadcom/bcmsysport.c index af9ec57bbebf..7a6dd5e5e498 100644 --- a/drivers/net/ethernet/broadcom/bcmsysport.c +++ b/drivers/net/ethernet/broadcom/bcmsysport.c @@ -850,14 +850,22 @@ static void bcm_sysport_resume_from_wol(struct bcm_sysport_priv *priv) { u32 reg; - /* Stop monitoring MPD interrupt */ - intrl2_0_mask_set(priv, INTRL2_0_MPD); - /* Clear the MagicPacket detection logic */ reg = umac_readl(priv, UMAC_MPD_CTRL); reg &= ~MPD_EN; umac_writel(priv, reg, UMAC_MPD_CTRL); + reg = intrl2_0_readl(priv, INTRL2_CPU_STATUS); + if (reg & INTRL2_0_MPD) + netdev_info(priv->netdev, "Wake-on-LAN (MPD) interrupt!\n"); + + if (reg & INTRL2_0_BRCM_MATCH_TAG) { + reg = rxchk_readl(priv, RXCHK_BRCM_TAG_MATCH_STATUS) & + RXCHK_BRCM_TAG_MATCH_MASK; + netdev_info(priv->netdev, + "Wake-on-LAN (filters 0x%02x) interrupt!\n", reg); + } + netif_dbg(priv, wol, priv->netdev, "resumed from WOL\n"); } @@ -890,11 +898,6 @@ static irqreturn_t bcm_sysport_rx_isr(int irq, void *dev_id) if (priv->irq0_stat & INTRL2_0_TX_RING_FULL) bcm_sysport_tx_reclaim_all(priv); - if (priv->irq0_stat & INTRL2_0_MPD) { - netdev_info(priv->netdev, "Wake-on-LAN interrupt!\n"); - bcm_sysport_resume_from_wol(priv); - } - return IRQ_HANDLED; } @@ -1915,9 +1918,6 @@ static int bcm_sysport_suspend_to_wol(struct bcm_sysport_priv *priv) /* UniMAC receive needs to be turned on */ umac_enable_set(priv, CMD_RX_EN, 1); - /* Enable the interrupt wake-up source */ - intrl2_0_mask_clear(priv, INTRL2_0_MPD); - netif_dbg(priv, wol, ndev, "entered WOL mode\n"); return 0; From dcea9310efa8d0f790509770341be4ea5bfc63b3 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 2 Oct 2018 15:47:35 -0700 Subject: [PATCH 171/812] rtnl: limit IFLA_NUM_TX_QUEUES and IFLA_NUM_RX_QUEUES to 4096 [ Upstream commit 0e1d6eca5113858ed2caea61a5adc03c595f6096 ] We have an impressive number of syzkaller bugs that are linked to the fact that syzbot was able to create a networking device with millions of TX (or RX) queues. Let's limit the number of RX/TX queues to 4096, this really should cover all known cases. A separate patch will add various cond_resched() in the loops handling sysfs entries at device creation and dismantle. Tested: lpaa6:~# ip link add gre-4097 numtxqueues 4097 numrxqueues 4097 type ip6gretap RTNETLINK answers: Invalid argument lpaa6:~# time ip link add gre-4096 numtxqueues 4096 numrxqueues 4096 type ip6gretap real 0m0.180s user 0m0.000s sys 0m0.107s Fixes: 76ff5cc91935 ("rtnl: allow to specify number of rx and tx queues on device creation") Signed-off-by: Eric Dumazet Reported-by: syzbot Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/core/rtnetlink.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 96c9c0f0905a..f1df04c7d395 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -2116,6 +2116,12 @@ struct net_device *rtnl_create_link(struct net *net, else if (ops->get_num_rx_queues) num_rx_queues = ops->get_num_rx_queues(); + if (num_tx_queues < 1 || num_tx_queues > 4096) + return ERR_PTR(-EINVAL); + + if (num_rx_queues < 1 || num_rx_queues > 4096) + return ERR_PTR(-EINVAL); + err = -ENOMEM; dev = alloc_netdev_mqs(ops->priv_size, ifname, name_assign_type, ops->setup, num_tx_queues, num_rx_queues); From 84037eebce3a51a135149bcb16fe01bcdf6d7711 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Tue, 8 Mar 2016 10:00:11 +0100 Subject: [PATCH 172/812] KVM: x86: remove eager_fpu field of struct kvm_vcpu_arch commit 5a5fbdc0e3f1159a734f1890da60fce70e98271d upstream. It is now equal to use_eager_fpu(), which simply tests a cpufeature bit. Signed-off-by: Paolo Bonzini Signed-off-by: Daniel Sangorrin Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/kvm_host.h | 1 - arch/x86/kvm/cpuid.c | 3 +-- arch/x86/kvm/x86.c | 2 +- 3 files changed, 2 insertions(+), 4 deletions(-) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 74fda1a453bd..3a37cdbdfbaa 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -439,7 +439,6 @@ struct kvm_vcpu_arch { struct kvm_mmu_memory_cache mmu_page_header_cache; struct fpu guest_fpu; - bool eager_fpu; u64 xcr0; u64 guest_supported_xcr0; u32 guest_xstate_size; diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index 83d6369c45f5..7b4ea5e2a68f 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -104,8 +104,7 @@ int kvm_update_cpuid(struct kvm_vcpu *vcpu) if (best && (best->eax & (F(XSAVES) | F(XSAVEC)))) best->ebx = xstate_required_size(vcpu->arch.xcr0, true); - vcpu->arch.eager_fpu = use_eager_fpu(); - if (vcpu->arch.eager_fpu) + if (use_eager_fpu()) kvm_x86_ops->fpu_activate(vcpu); /* diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 53d43d22a84b..660d8a0575cf 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -7325,7 +7325,7 @@ void kvm_put_guest_fpu(struct kvm_vcpu *vcpu) * Every 255 times fpu_counter rolls over to 0; a guest that uses * the FPU in bursts will revert to loading it on demand. */ - if (!vcpu->arch.eager_fpu) { + if (!use_eager_fpu()) { if (++vcpu->fpu_counter < 5) kvm_make_request(KVM_REQ_DEACTIVATE_FPU, vcpu); } From 8c6b69cf4bd3e1f4a35d5e8ab2b4c428fcff348f Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Tue, 4 Oct 2016 20:34:33 -0400 Subject: [PATCH 173/812] x86/fpu: Remove use_eager_fpu() commit c592b57347069abfc0dcad3b3a302cf882602597 upstream. This removes all the obvious code paths that depend on lazy FPU mode. It shouldn't change the generated code at all. Signed-off-by: Andy Lutomirski Signed-off-by: Rik van Riel Cc: Borislav Petkov Cc: Brian Gerst Cc: Dave Hansen Cc: Denys Vlasenko Cc: Fenghua Yu Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Quentin Casasnovas Cc: Thomas Gleixner Cc: pbonzini@redhat.com Link: http://lkml.kernel.org/r/1475627678-20788-5-git-send-email-riel@redhat.com Signed-off-by: Ingo Molnar Signed-off-by: Daniel Sangorrin Signed-off-by: Greg Kroah-Hartman --- arch/x86/crypto/crc32c-intel_glue.c | 17 +++---------- arch/x86/include/asm/fpu/internal.h | 34 +------------------------- arch/x86/kernel/fpu/core.c | 38 ++++------------------------- arch/x86/kernel/fpu/signal.c | 8 +++--- arch/x86/kvm/cpuid.c | 4 +-- arch/x86/kvm/x86.c | 10 -------- 6 files changed, 14 insertions(+), 97 deletions(-) diff --git a/arch/x86/crypto/crc32c-intel_glue.c b/arch/x86/crypto/crc32c-intel_glue.c index 715399b14ed7..c194d5717ae5 100644 --- a/arch/x86/crypto/crc32c-intel_glue.c +++ b/arch/x86/crypto/crc32c-intel_glue.c @@ -48,21 +48,13 @@ #ifdef CONFIG_X86_64 /* * use carryless multiply version of crc32c when buffer - * size is >= 512 (when eager fpu is enabled) or - * >= 1024 (when eager fpu is disabled) to account + * size is >= 512 to account * for fpu state save/restore overhead. */ -#define CRC32C_PCL_BREAKEVEN_EAGERFPU 512 -#define CRC32C_PCL_BREAKEVEN_NOEAGERFPU 1024 +#define CRC32C_PCL_BREAKEVEN 512 asmlinkage unsigned int crc_pcl(const u8 *buffer, int len, unsigned int crc_init); -static int crc32c_pcl_breakeven = CRC32C_PCL_BREAKEVEN_EAGERFPU; -#define set_pcl_breakeven_point() \ -do { \ - if (!use_eager_fpu()) \ - crc32c_pcl_breakeven = CRC32C_PCL_BREAKEVEN_NOEAGERFPU; \ -} while (0) #endif /* CONFIG_X86_64 */ static u32 crc32c_intel_le_hw_byte(u32 crc, unsigned char const *data, size_t length) @@ -185,7 +177,7 @@ static int crc32c_pcl_intel_update(struct shash_desc *desc, const u8 *data, * use faster PCL version if datasize is large enough to * overcome kernel fpu state save/restore overhead */ - if (len >= crc32c_pcl_breakeven && irq_fpu_usable()) { + if (len >= CRC32C_PCL_BREAKEVEN && irq_fpu_usable()) { kernel_fpu_begin(); *crcp = crc_pcl(data, len, *crcp); kernel_fpu_end(); @@ -197,7 +189,7 @@ static int crc32c_pcl_intel_update(struct shash_desc *desc, const u8 *data, static int __crc32c_pcl_intel_finup(u32 *crcp, const u8 *data, unsigned int len, u8 *out) { - if (len >= crc32c_pcl_breakeven && irq_fpu_usable()) { + if (len >= CRC32C_PCL_BREAKEVEN && irq_fpu_usable()) { kernel_fpu_begin(); *(__le32 *)out = ~cpu_to_le32(crc_pcl(data, len, *crcp)); kernel_fpu_end(); @@ -256,7 +248,6 @@ static int __init crc32c_intel_mod_init(void) alg.update = crc32c_pcl_intel_update; alg.finup = crc32c_pcl_intel_finup; alg.digest = crc32c_pcl_intel_digest; - set_pcl_breakeven_point(); } #endif return crypto_register_shash(&alg); diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h index ec2aedb6f92a..2ff03cca9254 100644 --- a/arch/x86/include/asm/fpu/internal.h +++ b/arch/x86/include/asm/fpu/internal.h @@ -57,11 +57,6 @@ extern u64 fpu__get_supported_xfeatures_mask(void); /* * FPU related CPU feature flag helper routines: */ -static __always_inline __pure bool use_eager_fpu(void) -{ - return true; -} - static __always_inline __pure bool use_xsaveopt(void) { return static_cpu_has(X86_FEATURE_XSAVEOPT); @@ -498,24 +493,6 @@ static inline int fpu_want_lazy_restore(struct fpu *fpu, unsigned int cpu) } -/* - * Wrap lazy FPU TS handling in a 'hw fpregs activation/deactivation' - * idiom, which is then paired with the sw-flag (fpregs_active) later on: - */ - -static inline void __fpregs_activate_hw(void) -{ - if (!use_eager_fpu()) - clts(); -} - -static inline void __fpregs_deactivate_hw(void) -{ - if (!use_eager_fpu()) - stts(); -} - -/* Must be paired with an 'stts' (fpregs_deactivate_hw()) after! */ static inline void __fpregs_deactivate(struct fpu *fpu) { WARN_ON_FPU(!fpu->fpregs_active); @@ -524,7 +501,6 @@ static inline void __fpregs_deactivate(struct fpu *fpu) this_cpu_write(fpu_fpregs_owner_ctx, NULL); } -/* Must be paired with a 'clts' (fpregs_activate_hw()) before! */ static inline void __fpregs_activate(struct fpu *fpu) { WARN_ON_FPU(fpu->fpregs_active); @@ -549,22 +525,17 @@ static inline int fpregs_active(void) } /* - * Encapsulate the CR0.TS handling together with the - * software flag. - * * These generally need preemption protection to work, * do try to avoid using these on their own. */ static inline void fpregs_activate(struct fpu *fpu) { - __fpregs_activate_hw(); __fpregs_activate(fpu); } static inline void fpregs_deactivate(struct fpu *fpu) { __fpregs_deactivate(fpu); - __fpregs_deactivate_hw(); } /* @@ -591,8 +562,7 @@ switch_fpu_prepare(struct fpu *old_fpu, struct fpu *new_fpu, int cpu) * or if the past 5 consecutive context-switches used math. */ fpu.preload = static_cpu_has(X86_FEATURE_FPU) && - new_fpu->fpstate_active && - (use_eager_fpu() || new_fpu->counter > 5); + new_fpu->fpstate_active; if (old_fpu->fpregs_active) { if (!copy_fpregs_to_fpstate(old_fpu)) @@ -608,8 +578,6 @@ switch_fpu_prepare(struct fpu *old_fpu, struct fpu *new_fpu, int cpu) new_fpu->counter++; __fpregs_activate(new_fpu); prefetch(&new_fpu->state); - } else { - __fpregs_deactivate_hw(); } } else { old_fpu->counter = 0; diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c index 6aa0b519c851..b282364c075c 100644 --- a/arch/x86/kernel/fpu/core.c +++ b/arch/x86/kernel/fpu/core.c @@ -53,27 +53,9 @@ static bool kernel_fpu_disabled(void) return this_cpu_read(in_kernel_fpu); } -/* - * Were we in an interrupt that interrupted kernel mode? - * - * On others, we can do a kernel_fpu_begin/end() pair *ONLY* if that - * pair does nothing at all: the thread must not have fpu (so - * that we don't try to save the FPU state), and TS must - * be set (so that the clts/stts pair does nothing that is - * visible in the interrupted kernel thread). - * - * Except for the eagerfpu case when we return true; in the likely case - * the thread has FPU but we are not going to set/clear TS. - */ static bool interrupted_kernel_fpu_idle(void) { - if (kernel_fpu_disabled()) - return false; - - if (use_eager_fpu()) - return true; - - return !current->thread.fpu.fpregs_active && (read_cr0() & X86_CR0_TS); + return !kernel_fpu_disabled(); } /* @@ -121,7 +103,6 @@ void __kernel_fpu_begin(void) copy_fpregs_to_fpstate(fpu); } else { this_cpu_write(fpu_fpregs_owner_ctx, NULL); - __fpregs_activate_hw(); } } EXPORT_SYMBOL(__kernel_fpu_begin); @@ -132,8 +113,6 @@ void __kernel_fpu_end(void) if (fpu->fpregs_active) copy_kernel_to_fpregs(&fpu->state); - else - __fpregs_deactivate_hw(); kernel_fpu_enable(); } @@ -194,10 +173,7 @@ void fpu__save(struct fpu *fpu) preempt_disable(); if (fpu->fpregs_active) { if (!copy_fpregs_to_fpstate(fpu)) { - if (use_eager_fpu()) - copy_kernel_to_fpregs(&fpu->state); - else - fpregs_deactivate(fpu); + copy_kernel_to_fpregs(&fpu->state); } } preempt_enable(); @@ -245,8 +221,7 @@ static void fpu_copy(struct fpu *dst_fpu, struct fpu *src_fpu) * Don't let 'init optimized' areas of the XSAVE area * leak into the child task: */ - if (use_eager_fpu()) - memset(&dst_fpu->state.xsave, 0, xstate_size); + memset(&dst_fpu->state.xsave, 0, xstate_size); /* * Save current FPU registers directly into the child @@ -268,10 +243,7 @@ static void fpu_copy(struct fpu *dst_fpu, struct fpu *src_fpu) if (!copy_fpregs_to_fpstate(dst_fpu)) { memcpy(&src_fpu->state, &dst_fpu->state, xstate_size); - if (use_eager_fpu()) - copy_kernel_to_fpregs(&src_fpu->state); - else - fpregs_deactivate(src_fpu); + copy_kernel_to_fpregs(&src_fpu->state); } preempt_enable(); } @@ -437,7 +409,7 @@ void fpu__clear(struct fpu *fpu) { WARN_ON_FPU(fpu != ¤t->thread.fpu); /* Almost certainly an anomaly */ - if (!use_eager_fpu() || !static_cpu_has(X86_FEATURE_FPU)) { + if (!static_cpu_has(X86_FEATURE_FPU)) { /* FPU state will be reallocated lazily at the first use. */ fpu__drop(fpu); } else { diff --git a/arch/x86/kernel/fpu/signal.c b/arch/x86/kernel/fpu/signal.c index 3de077116218..9be3e79eb629 100644 --- a/arch/x86/kernel/fpu/signal.c +++ b/arch/x86/kernel/fpu/signal.c @@ -319,11 +319,9 @@ static int __fpu__restore_sig(void __user *buf, void __user *buf_fx, int size) } fpu->fpstate_active = 1; - if (use_eager_fpu()) { - preempt_disable(); - fpu__restore(fpu); - preempt_enable(); - } + preempt_disable(); + fpu__restore(fpu); + preempt_enable(); return err; } else { diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index 7b4ea5e2a68f..338d13d4fd2f 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -16,7 +16,6 @@ #include #include #include -#include /* For use_eager_fpu. Ugh! */ #include #include #include "cpuid.h" @@ -104,8 +103,7 @@ int kvm_update_cpuid(struct kvm_vcpu *vcpu) if (best && (best->eax & (F(XSAVES) | F(XSAVEC)))) best->ebx = xstate_required_size(vcpu->arch.xcr0, true); - if (use_eager_fpu()) - kvm_x86_ops->fpu_activate(vcpu); + kvm_x86_ops->fpu_activate(vcpu); /* * The existing code assumes virtual address is 48-bit in the canonical diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 660d8a0575cf..e6ab034f0bc7 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -7319,16 +7319,6 @@ void kvm_put_guest_fpu(struct kvm_vcpu *vcpu) copy_fpregs_to_fpstate(&vcpu->arch.guest_fpu); __kernel_fpu_end(); ++vcpu->stat.fpu_reload; - /* - * If using eager FPU mode, or if the guest is a frequent user - * of the FPU, just leave the FPU active for next time. - * Every 255 times fpu_counter rolls over to 0; a guest that uses - * the FPU in bursts will revert to loading it on demand. - */ - if (!use_eager_fpu()) { - if (++vcpu->fpu_counter < 5) - kvm_make_request(KVM_REQ_DEACTIVATE_FPU, vcpu); - } trace_kvm_fpu(0); } From 6e7d7bea15e866838110b7e12dcea9f4ca414b05 Mon Sep 17 00:00:00 2001 From: Rik van Riel Date: Tue, 4 Oct 2016 20:34:34 -0400 Subject: [PATCH 174/812] x86/fpu: Remove struct fpu::counter commit 3913cc3507575273beb165a5e027a081913ed507 upstream. With the lazy FPU code gone, we no longer use the counter field in struct fpu for anything. Get rid it. Signed-off-by: Rik van Riel Reviewed-by: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Dave Hansen Cc: Denys Vlasenko Cc: Fenghua Yu Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Quentin Casasnovas Cc: Thomas Gleixner Cc: pbonzini@redhat.com Link: http://lkml.kernel.org/r/1475627678-20788-6-git-send-email-riel@redhat.com Signed-off-by: Ingo Molnar Signed-off-by: Daniel Sangorrin Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/fpu/internal.h | 3 --- arch/x86/include/asm/fpu/types.h | 11 ----------- arch/x86/kernel/fpu/core.c | 3 --- 3 files changed, 17 deletions(-) diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h index 2ff03cca9254..16825dda18dc 100644 --- a/arch/x86/include/asm/fpu/internal.h +++ b/arch/x86/include/asm/fpu/internal.h @@ -575,15 +575,12 @@ switch_fpu_prepare(struct fpu *old_fpu, struct fpu *new_fpu, int cpu) /* Don't change CR0.TS if we just switch! */ if (fpu.preload) { - new_fpu->counter++; __fpregs_activate(new_fpu); prefetch(&new_fpu->state); } } else { - old_fpu->counter = 0; old_fpu->last_cpu = -1; if (fpu.preload) { - new_fpu->counter++; if (fpu_want_lazy_restore(new_fpu, cpu)) fpu.preload = 0; else diff --git a/arch/x86/include/asm/fpu/types.h b/arch/x86/include/asm/fpu/types.h index 1c6f6ac52ad0..dcb85a9ac9b4 100644 --- a/arch/x86/include/asm/fpu/types.h +++ b/arch/x86/include/asm/fpu/types.h @@ -302,17 +302,6 @@ struct fpu { */ unsigned char fpregs_active; - /* - * @counter: - * - * This counter contains the number of consecutive context switches - * during which the FPU stays used. If this is over a threshold, the - * lazy FPU restore logic becomes eager, to save the trap overhead. - * This is an unsigned char so that after 256 iterations the counter - * wraps and the context switch behavior turns lazy again; this is to - * deal with bursty apps that only use the FPU for a short time: - */ - unsigned char counter; /* * @state: * diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c index b282364c075c..b322325424bc 100644 --- a/arch/x86/kernel/fpu/core.c +++ b/arch/x86/kernel/fpu/core.c @@ -250,7 +250,6 @@ static void fpu_copy(struct fpu *dst_fpu, struct fpu *src_fpu) int fpu__copy(struct fpu *dst_fpu, struct fpu *src_fpu) { - dst_fpu->counter = 0; dst_fpu->fpregs_active = 0; dst_fpu->last_cpu = -1; @@ -353,7 +352,6 @@ void fpu__restore(struct fpu *fpu) kernel_fpu_disable(); fpregs_activate(fpu); copy_kernel_to_fpregs(&fpu->state); - fpu->counter++; kernel_fpu_enable(); } EXPORT_SYMBOL_GPL(fpu__restore); @@ -370,7 +368,6 @@ EXPORT_SYMBOL_GPL(fpu__restore); void fpu__drop(struct fpu *fpu) { preempt_disable(); - fpu->counter = 0; if (fpu->fpregs_active) { /* Ignore delayed exceptions from user space */ From 4e7ef8fc8001589339083872f3b6260734cc0e76 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Mon, 17 Oct 2016 14:40:11 -0700 Subject: [PATCH 175/812] x86/fpu: Finish excising 'eagerfpu' commit e63650840e8b053aa09ad934877e87e9941ed135 upstream. Now that eagerfpu= is gone, remove it from the docs and some comments. Also sync the changes to tools/. Signed-off-by: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Dave Hansen Cc: Denys Vlasenko Cc: Fenghua Yu Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Quentin Casasnovas Cc: Rik van Riel Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/cf430dd4481d41280e93ac6cf0def1007a67fc8e.1476740397.git.luto@kernel.org Signed-off-by: Ingo Molnar Signed-off-by: Daniel Sangorrin Signed-off-by: Greg Kroah-Hartman --- Documentation/kernel-parameters.txt | 5 ----- arch/x86/include/asm/cpufeatures.h | 1 - arch/x86/include/asm/fpu/types.h | 23 ----------------------- 3 files changed, 29 deletions(-) diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 3fd53e193b7f..da515c535e62 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -961,11 +961,6 @@ bytes respectively. Such letter suffixes can also be entirely omitted. See Documentation/x86/intel_mpx.txt for more information about the feature. - eagerfpu= [X86] - on enable eager fpu restore - off disable eager fpu restore - auto selects the default scheme, which automatically - enables eagerfpu restore for xsaveopt. module.async_probe [KNL] Enable asynchronous probe on this module. diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index dd2269dcbc47..a5fa3195a230 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -104,7 +104,6 @@ #define X86_FEATURE_EXTD_APICID ( 3*32+26) /* has extended APICID (8 bits) */ #define X86_FEATURE_AMD_DCM ( 3*32+27) /* multi-node processor */ #define X86_FEATURE_APERFMPERF ( 3*32+28) /* APERFMPERF */ -/* free, was #define X86_FEATURE_EAGER_FPU ( 3*32+29) * "eagerfpu" Non lazy FPU restore */ #define X86_FEATURE_NONSTOP_TSC_S3 ( 3*32+30) /* TSC doesn't stop in S3 state */ /* Intel-defined CPU features, CPUID level 0x00000001 (ecx), word 4 */ diff --git a/arch/x86/include/asm/fpu/types.h b/arch/x86/include/asm/fpu/types.h index dcb85a9ac9b4..0d81c7d6fe96 100644 --- a/arch/x86/include/asm/fpu/types.h +++ b/arch/x86/include/asm/fpu/types.h @@ -310,29 +310,6 @@ struct fpu { * the registers in the FPU are more recent than this state * copy. If the task context-switches away then they get * saved here and represent the FPU state. - * - * After context switches there may be a (short) time period - * during which the in-FPU hardware registers are unchanged - * and still perfectly match this state, if the tasks - * scheduled afterwards are not using the FPU. - * - * This is the 'lazy restore' window of optimization, which - * we track though 'fpu_fpregs_owner_ctx' and 'fpu->last_cpu'. - * - * We detect whether a subsequent task uses the FPU via setting - * CR0::TS to 1, which causes any FPU use to raise a #NM fault. - * - * During this window, if the task gets scheduled again, we - * might be able to skip having to do a restore from this - * memory buffer to the hardware registers - at the cost of - * incurring the overhead of #NM fault traps. - * - * Note that on modern CPUs that support the XSAVEOPT (or other - * optimized XSAVE instructions), we don't use #NM traps anymore, - * as the hardware can track whether FPU registers need saving - * or not. On such CPUs we activate the non-lazy ('eagerfpu') - * logic, which unconditionally saves/restores all FPU state - * across context switches. (if FPU state exists.) */ union fpregs_state state; /* From 2a585d445050f16030b372ada1f4ce076de27833 Mon Sep 17 00:00:00 2001 From: Jozef Balga Date: Tue, 21 Aug 2018 05:01:04 -0400 Subject: [PATCH 176/812] media: af9035: prevent buffer overflow on write [ Upstream commit 312f73b648626a0526a3aceebb0a3192aaba05ce ] When less than 3 bytes are written to the device, memcpy is called with negative array size which leads to buffer overflow and kernel panic. This patch adds a condition and returns -EOPNOTSUPP instead. Fixes bugzilla issue 64871 [mchehab+samsung@kernel.org: fix a merge conflict and changed the condition to match the patch's comment, e. g. len == 3 could also be valid] Signed-off-by: Jozef Balga Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/media/usb/dvb-usb-v2/af9035.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/media/usb/dvb-usb-v2/af9035.c b/drivers/media/usb/dvb-usb-v2/af9035.c index 6e02a15d39ce..abddb621d9e6 100644 --- a/drivers/media/usb/dvb-usb-v2/af9035.c +++ b/drivers/media/usb/dvb-usb-v2/af9035.c @@ -389,8 +389,10 @@ static int af9035_i2c_master_xfer(struct i2c_adapter *adap, msg[0].addr == (state->af9033_i2c_addr[1] >> 1)) reg |= 0x100000; - ret = af9035_wr_regs(d, reg, &msg[0].buf[3], - msg[0].len - 3); + ret = (msg[0].len >= 3) ? af9035_wr_regs(d, reg, + &msg[0].buf[3], + msg[0].len - 3) + : -EOPNOTSUPP; } else { /* I2C write */ u8 buf[MAX_XFER_SIZE]; From be7e497bf0908851c8744d30c513655b47e386bf Mon Sep 17 00:00:00 2001 From: Keerthy Date: Wed, 8 Aug 2018 18:44:59 +0530 Subject: [PATCH 177/812] clocksource/drivers/ti-32k: Add CLOCK_SOURCE_SUSPEND_NONSTOP flag for non-am43 SoCs [ Upstream commit 3b7d96a0dbb6b630878597a1838fc39f808b761b ] The 32k clocksource is NONSTOP for non-am43 SoCs. Hence add the flag for all the other SoCs. Reported-by: Tony Lindgren Signed-off-by: Keerthy Acked-by: Tony Lindgren Signed-off-by: Daniel Lezcano Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/clocksource/timer-ti-32k.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/clocksource/timer-ti-32k.c b/drivers/clocksource/timer-ti-32k.c index 8518d9dfba5c..73c990867c01 100644 --- a/drivers/clocksource/timer-ti-32k.c +++ b/drivers/clocksource/timer-ti-32k.c @@ -98,6 +98,9 @@ static void __init ti_32k_timer_init(struct device_node *np) return; } + if (!of_machine_is_compatible("ti,am43")) + ti_32k_timer.cs.flags |= CLOCK_SOURCE_SUSPEND_NONSTOP; + ti_32k_timer.counter = ti_32k_timer.base; /* From d200ea29009ebd4eb8e932e364d565c4c1a5b837 Mon Sep 17 00:00:00 2001 From: Andreas Schwab Date: Mon, 17 Sep 2018 12:43:34 -0700 Subject: [PATCH 178/812] Input: atakbd - fix Atari keymap [ Upstream commit 9e62df51be993035c577371ffee5477697a56aad ] Fix errors in Atari keymap (mostly in keypad, help and undo keys). Patch provided on debian-68k ML by Andreas Schwab , keymap array size and unhandled scancode limit adjusted to 0x73 by me. Tested-by: Michael Schmitz Signed-off-by: Michael Schmitz Signed-off-by: Andreas Schwab Signed-off-by: Dmitry Torokhov Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/input/keyboard/atakbd.c | 64 ++++++++++++++------------------- 1 file changed, 26 insertions(+), 38 deletions(-) diff --git a/drivers/input/keyboard/atakbd.c b/drivers/input/keyboard/atakbd.c index f1235831283d..524a72bee55a 100644 --- a/drivers/input/keyboard/atakbd.c +++ b/drivers/input/keyboard/atakbd.c @@ -79,8 +79,7 @@ MODULE_LICENSE("GPL"); */ -static unsigned char atakbd_keycode[0x72] = { /* American layout */ - [0] = KEY_GRAVE, +static unsigned char atakbd_keycode[0x73] = { /* American layout */ [1] = KEY_ESC, [2] = KEY_1, [3] = KEY_2, @@ -121,9 +120,9 @@ static unsigned char atakbd_keycode[0x72] = { /* American layout */ [38] = KEY_L, [39] = KEY_SEMICOLON, [40] = KEY_APOSTROPHE, - [41] = KEY_BACKSLASH, /* FIXME, '#' */ + [41] = KEY_GRAVE, [42] = KEY_LEFTSHIFT, - [43] = KEY_GRAVE, /* FIXME: '~' */ + [43] = KEY_BACKSLASH, [44] = KEY_Z, [45] = KEY_X, [46] = KEY_C, @@ -149,45 +148,34 @@ static unsigned char atakbd_keycode[0x72] = { /* American layout */ [66] = KEY_F8, [67] = KEY_F9, [68] = KEY_F10, - [69] = KEY_ESC, - [70] = KEY_DELETE, - [71] = KEY_KP7, - [72] = KEY_KP8, - [73] = KEY_KP9, + [71] = KEY_HOME, + [72] = KEY_UP, [74] = KEY_KPMINUS, - [75] = KEY_KP4, - [76] = KEY_KP5, - [77] = KEY_KP6, + [75] = KEY_LEFT, + [77] = KEY_RIGHT, [78] = KEY_KPPLUS, - [79] = KEY_KP1, - [80] = KEY_KP2, - [81] = KEY_KP3, - [82] = KEY_KP0, - [83] = KEY_KPDOT, - [90] = KEY_KPLEFTPAREN, - [91] = KEY_KPRIGHTPAREN, - [92] = KEY_KPASTERISK, /* FIXME */ - [93] = KEY_KPASTERISK, - [94] = KEY_KPPLUS, - [95] = KEY_HELP, + [80] = KEY_DOWN, + [82] = KEY_INSERT, + [83] = KEY_DELETE, [96] = KEY_102ND, - [97] = KEY_KPASTERISK, /* FIXME */ - [98] = KEY_KPSLASH, + [97] = KEY_UNDO, + [98] = KEY_HELP, [99] = KEY_KPLEFTPAREN, [100] = KEY_KPRIGHTPAREN, [101] = KEY_KPSLASH, [102] = KEY_KPASTERISK, - [103] = KEY_UP, - [104] = KEY_KPASTERISK, /* FIXME */ - [105] = KEY_LEFT, - [106] = KEY_RIGHT, - [107] = KEY_KPASTERISK, /* FIXME */ - [108] = KEY_DOWN, - [109] = KEY_KPASTERISK, /* FIXME */ - [110] = KEY_KPASTERISK, /* FIXME */ - [111] = KEY_KPASTERISK, /* FIXME */ - [112] = KEY_KPASTERISK, /* FIXME */ - [113] = KEY_KPASTERISK /* FIXME */ + [103] = KEY_KP7, + [104] = KEY_KP8, + [105] = KEY_KP9, + [106] = KEY_KP4, + [107] = KEY_KP5, + [108] = KEY_KP6, + [109] = KEY_KP1, + [110] = KEY_KP2, + [111] = KEY_KP3, + [112] = KEY_KP0, + [113] = KEY_KPDOT, + [114] = KEY_KPENTER, }; static struct input_dev *atakbd_dev; @@ -195,7 +183,7 @@ static struct input_dev *atakbd_dev; static void atakbd_interrupt(unsigned char scancode, char down) { - if (scancode < 0x72) { /* scancodes < 0xf2 are keys */ + if (scancode < 0x73) { /* scancodes < 0xf3 are keys */ // report raw events here? @@ -209,7 +197,7 @@ static void atakbd_interrupt(unsigned char scancode, char down) input_report_key(atakbd_dev, scancode, down); input_sync(atakbd_dev); } - } else /* scancodes >= 0xf2 are mouse data, most likely */ + } else /* scancodes >= 0xf3 are mouse data, most likely */ printk(KERN_INFO "atakbd: unhandled scancode %x\n", scancode); return; From 6df8eb6ae2295ad0fa830d943585d9eb69785074 Mon Sep 17 00:00:00 2001 From: Michael Schmitz Date: Mon, 17 Sep 2018 15:27:49 -0700 Subject: [PATCH 179/812] Input: atakbd - fix Atari CapsLock behaviour [ Upstream commit 52d2c7bf7c90217fbe875d2d76f310979c48eb83 ] The CapsLock key on Atari keyboards is not a toggle, it does send the normal make and break scancodes. Drop the CapsLock toggle handling code, which did cause the CapsLock key to merely act as a Shift key. Tested-by: Michael Schmitz Signed-off-by: Michael Schmitz Signed-off-by: Andreas Schwab Signed-off-by: Dmitry Torokhov Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/input/keyboard/atakbd.c | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/drivers/input/keyboard/atakbd.c b/drivers/input/keyboard/atakbd.c index 524a72bee55a..fdeda0b0fbd6 100644 --- a/drivers/input/keyboard/atakbd.c +++ b/drivers/input/keyboard/atakbd.c @@ -189,14 +189,8 @@ static void atakbd_interrupt(unsigned char scancode, char down) scancode = atakbd_keycode[scancode]; - if (scancode == KEY_CAPSLOCK) { /* CapsLock is a toggle switch key on Amiga */ - input_report_key(atakbd_dev, scancode, 1); - input_report_key(atakbd_dev, scancode, 0); - input_sync(atakbd_dev); - } else { - input_report_key(atakbd_dev, scancode, down); - input_sync(atakbd_dev); - } + input_report_key(atakbd_dev, scancode, down); + input_sync(atakbd_dev); } else /* scancodes >= 0xf3 are mouse data, most likely */ printk(KERN_INFO "atakbd: unhandled scancode %x\n", scancode); From 0d02fdd080d2435101f09d02e843eae17606fb59 Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Fri, 21 Sep 2018 02:44:12 -0700 Subject: [PATCH 180/812] net/mlx4: Use cpumask_available for eq->affinity_mask [ Upstream commit 8ac1ee6f4d62e781e3b3fd8b9c42b70371427669 ] Clang warns that the address of a pointer will always evaluated as true in a boolean context: drivers/net/ethernet/mellanox/mlx4/eq.c:243:11: warning: address of array 'eq->affinity_mask' will always evaluate to 'true' [-Wpointer-bool-conversion] if (!eq->affinity_mask || cpumask_empty(eq->affinity_mask)) ~~~~~^~~~~~~~~~~~~ 1 warning generated. Use cpumask_available, introduced in commit f7e30f01a9e2 ("cpumask: Add helper cpumask_available()"), which does the proper checking and avoids this warning. Link: https://github.com/ClangBuiltLinux/linux/issues/86 Signed-off-by: Nathan Chancellor Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/mellanox/mlx4/eq.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx4/eq.c b/drivers/net/ethernet/mellanox/mlx4/eq.c index ff77b8b608bd..7417605c3cf6 100644 --- a/drivers/net/ethernet/mellanox/mlx4/eq.c +++ b/drivers/net/ethernet/mellanox/mlx4/eq.c @@ -228,7 +228,8 @@ static void mlx4_set_eq_affinity_hint(struct mlx4_priv *priv, int vec) struct mlx4_dev *dev = &priv->dev; struct mlx4_eq *eq = &priv->eq_table.eq[vec]; - if (!eq->affinity_mask || cpumask_empty(eq->affinity_mask)) + if (!cpumask_available(eq->affinity_mask) || + cpumask_empty(eq->affinity_mask)) return; hint_err = irq_set_affinity_hint(eq->irq, eq->affinity_mask); From 6939b3b00c124d1b88c3e1c57d1f18e426df2fa3 Mon Sep 17 00:00:00 2001 From: Michael Neuling Date: Mon, 24 Sep 2018 17:27:04 +1000 Subject: [PATCH 181/812] powerpc/tm: Fix userspace r13 corruption [ Upstream commit cf13435b730a502e814c63c84d93db131e563f5f ] When we treclaim we store the userspace checkpointed r13 to a scratch SPR and then later save the scratch SPR to the user thread struct. Unfortunately, this doesn't work as accessing the user thread struct can take an SLB fault and the SLB fault handler will write the same scratch SPRG that now contains the userspace r13. To fix this, we store r13 to the kernel stack (which can't fault) before we access the user thread struct. Found by running P8 guest + powervm + disable_1tb_segments + TM. Seen as a random userspace segfault with r13 looking like a kernel address. Signed-off-by: Michael Neuling Reviewed-by: Breno Leitao Signed-off-by: Michael Ellerman Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/kernel/tm.S | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/kernel/tm.S b/arch/powerpc/kernel/tm.S index b7019b559ddb..cf30c2c36502 100644 --- a/arch/powerpc/kernel/tm.S +++ b/arch/powerpc/kernel/tm.S @@ -199,13 +199,20 @@ dont_backup_fp: std r1, PACATMSCRATCH(r13) ld r1, PACAR1(r13) - /* Store the PPR in r11 and reset to decent value */ std r11, GPR11(r1) /* Temporary stash */ + /* + * Store r13 away so we can free up the scratch SPR for the SLB fault + * handler (needed once we start accessing the thread_struct). + */ + GET_SCRATCH0(r11) + std r11, GPR13(r1) + /* Reset MSR RI so we can take SLB faults again */ li r11, MSR_RI mtmsrd r11, 1 + /* Store the PPR in r11 and reset to decent value */ mfspr r11, SPRN_PPR HMT_MEDIUM @@ -234,7 +241,7 @@ dont_backup_fp: ld r4, GPR7(r1) /* user r7 */ ld r5, GPR11(r1) /* user r11 */ ld r6, GPR12(r1) /* user r12 */ - GET_SCRATCH0(8) /* user r13 */ + ld r8, GPR13(r1) /* user r13 */ std r3, GPR1(r7) std r4, GPR7(r7) std r5, GPR11(r7) From 5a51c55493c039e5579d9d87bc7601ae119b6efb Mon Sep 17 00:00:00 2001 From: Michael Neuling Date: Tue, 25 Sep 2018 19:36:47 +1000 Subject: [PATCH 182/812] powerpc/tm: Avoid possible userspace r1 corruption on reclaim [ Upstream commit 96dc89d526ef77604376f06220e3d2931a0bfd58 ] Current we store the userspace r1 to PACATMSCRATCH before finally saving it to the thread struct. In theory an exception could be taken here (like a machine check or SLB miss) that could write PACATMSCRATCH and hence corrupt the userspace r1. The SLB fault currently doesn't touch PACATMSCRATCH, but others do. We've never actually seen this happen but it's theoretically possible. Either way, the code is fragile as it is. This patch saves r1 to the kernel stack (which can't fault) before we turn MSR[RI] back on. PACATMSCRATCH is still used but only with MSR[RI] off. We then copy r1 from the kernel stack to the thread struct once we have MSR[RI] back on. Suggested-by: Breno Leitao Signed-off-by: Michael Neuling Signed-off-by: Michael Ellerman Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/kernel/tm.S | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/kernel/tm.S b/arch/powerpc/kernel/tm.S index cf30c2c36502..2d2860711e07 100644 --- a/arch/powerpc/kernel/tm.S +++ b/arch/powerpc/kernel/tm.S @@ -201,6 +201,13 @@ dont_backup_fp: std r11, GPR11(r1) /* Temporary stash */ + /* + * Move the saved user r1 to the kernel stack in case PACATMSCRATCH is + * clobbered by an exception once we turn on MSR_RI below. + */ + ld r11, PACATMSCRATCH(r13) + std r11, GPR1(r1) + /* * Store r13 away so we can free up the scratch SPR for the SLB fault * handler (needed once we start accessing the thread_struct). @@ -237,7 +244,7 @@ dont_backup_fp: SAVE_GPR(8, r7) /* user r8 */ SAVE_GPR(9, r7) /* user r9 */ SAVE_GPR(10, r7) /* user r10 */ - ld r3, PACATMSCRATCH(r13) /* user r1 */ + ld r3, GPR1(r1) /* user r1 */ ld r4, GPR7(r1) /* user r7 */ ld r5, GPR11(r1) /* user r11 */ ld r6, GPR12(r1) /* user r12 */ From 466dda64e443101da297bd26e911df8e0aa6576c Mon Sep 17 00:00:00 2001 From: Alexey Brodkin Date: Thu, 13 Sep 2018 23:24:28 +0300 Subject: [PATCH 183/812] ARC: build: Get rid of toolchain check commit 615f64458ad890ef94abc879a66d8b27236e733a upstream. This check is very naive: we simply test if GCC invoked without "-mcpu=XXX" has ARC700 define set. In that case we think that GCC was built with "--with-cpu=arc700" and has libgcc built for ARC700. Otherwise if ARC700 is not defined we think that everythng was built for ARCv2. But in reality our life is much more interesting. 1. Regardless of GCC configuration (i.e. what we pass in "--with-cpu" it may generate code for any ARC core). 2. libgcc might be built with explicitly specified "--mcpu=YYY" That's exactly what happens in case of multilibbed toolchains: - GCC is configured with default settings - All the libs built for many different CPU flavors I.e. that check gets in the way of usage of multilibbed toolchains. And even non-multilibbed toolchains are affected. OpenEmbedded also builds GCC without "--with-cpu" because each and every target component later is compiled with explicitly set "-mcpu=ZZZ". Acked-by: Rob Herring Signed-off-by: Alexey Brodkin Signed-off-by: Vineet Gupta Signed-off-by: Greg Kroah-Hartman --- arch/arc/Makefile | 14 -------------- 1 file changed, 14 deletions(-) diff --git a/arch/arc/Makefile b/arch/arc/Makefile index b9f7306412e5..9d64eacdd2aa 100644 --- a/arch/arc/Makefile +++ b/arch/arc/Makefile @@ -18,20 +18,6 @@ cflags-y += -fno-common -pipe -fno-builtin -mmedium-calls -D__linux__ cflags-$(CONFIG_ISA_ARCOMPACT) += -mA7 cflags-$(CONFIG_ISA_ARCV2) += -mcpu=archs -is_700 = $(shell $(CC) -dM -E - < /dev/null | grep -q "ARC700" && echo 1 || echo 0) - -ifdef CONFIG_ISA_ARCOMPACT -ifeq ($(is_700), 0) - $(error Toolchain not configured for ARCompact builds) -endif -endif - -ifdef CONFIG_ISA_ARCV2 -ifeq ($(is_700), 1) - $(error Toolchain not configured for ARCv2 builds) -endif -endif - ifdef CONFIG_ARC_CURR_IN_REG # For a global register defintion, make sure it gets passed to every file # We had a customer reported bug where some code built in kernel was NOT using From 68a318670c663cac76ac359784d2f58d8d19cfc7 Mon Sep 17 00:00:00 2001 From: Stephen Warren Date: Wed, 16 Aug 2017 14:30:10 -0600 Subject: [PATCH 184/812] usb: gadget: serial: fix oops when data rx'd after close commit daa35bd95634a2a2d72d1049c93576a02711cb1a upstream. When the gadget serial device has no associated TTY, do not pass any received data into the TTY layer for processing; simply drop it instead. This prevents the TTY layer from calling back into the gadget serial driver, which will then crash in e.g. gs_write_room() due to lack of gadget serial device to TTY association (i.e. a NULL pointer dereference). Signed-off-by: Stephen Warren Signed-off-by: Felipe Balbi Signed-off-by: Krzysztof Kozlowski Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/function/u_serial.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/gadget/function/u_serial.c b/drivers/usb/gadget/function/u_serial.c index f7771d86ad6c..4ea44f7122ee 100644 --- a/drivers/usb/gadget/function/u_serial.c +++ b/drivers/usb/gadget/function/u_serial.c @@ -518,7 +518,7 @@ static void gs_rx_push(unsigned long _port) } /* push data to (open) tty */ - if (req->actual) { + if (req->actual && tty) { char *packet = req->buf; unsigned size = req->actual; unsigned n; From f91b8b1fd69c57764ffd404e33f565aa75357a1e Mon Sep 17 00:00:00 2001 From: "K. Y. Srinivasan" Date: Mon, 14 Dec 2015 16:01:57 -0800 Subject: [PATCH 185/812] Drivers: hv: utils: Invoke the poll function after handshake commit 2d0c3b5ad739697a68dc8a444f5b9f4817cf8f8f upstream. When the handshake with daemon is complete, we should poll the channel since during the handshake, we will not be processing any messages. This is a potential bug if the host is waiting for a response from the guest. I would like to thank Dexuan for pointing this out. Signed-off-by: K. Y. Srinivasan Signed-off-by: Dexuan Cui Signed-off-by: Greg Kroah-Hartman --- drivers/hv/hv_kvp.c | 2 +- drivers/hv/hv_snapshot.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/hv/hv_kvp.c b/drivers/hv/hv_kvp.c index ce4d3a935491..b97ef3e094fd 100644 --- a/drivers/hv/hv_kvp.c +++ b/drivers/hv/hv_kvp.c @@ -155,7 +155,7 @@ static int kvp_handle_handshake(struct hv_kvp_msg *msg) pr_debug("KVP: userspace daemon ver. %d registered\n", KVP_OP_REGISTER); kvp_register(dm_reg_value); - kvp_transaction.state = HVUTIL_READY; + hv_poll_channel(kvp_transaction.recv_channel, kvp_poll_wrapper); return 0; } diff --git a/drivers/hv/hv_snapshot.c b/drivers/hv/hv_snapshot.c index faad79ae318a..c5fb249e34cc 100644 --- a/drivers/hv/hv_snapshot.c +++ b/drivers/hv/hv_snapshot.c @@ -114,7 +114,7 @@ static int vss_handle_handshake(struct hv_vss_msg *vss_msg) default: return -EINVAL; } - vss_transaction.state = HVUTIL_READY; + hv_poll_channel(vss_transaction.recv_channel, vss_poll_wrapper); pr_debug("VSS: userspace daemon ver. %d registered\n", dm_reg_value); return 0; } From eaee7976cf2af79c294ab41d651c0e52488ca868 Mon Sep 17 00:00:00 2001 From: "K. Y. Srinivasan" Date: Fri, 26 Feb 2016 15:13:19 -0800 Subject: [PATCH 186/812] Drivers: hv: util: Pass the channel information during the init call commit b9830d120cbe155863399f25eaef6aa8353e767f upstream. Pass the channel information to the util drivers that need to defer reading the channel while they are processing a request. This would address the following issue reported by Vitaly: Commit 3cace4a61610 ("Drivers: hv: utils: run polling callback always in interrupt context") removed direct *_transaction.state = HVUTIL_READY assignments from *_handle_handshake() functions introducing the following race: if a userspace daemon connects before we get first non-negotiation request from the server hv_poll_channel() won't set transaction state to HVUTIL_READY as (!channel) condition will fail, we set it to non-NULL on the first real request from the server. Signed-off-by: K. Y. Srinivasan Reported-by: Vitaly Kuznetsov Signed-off-by: Dexuan Cui Signed-off-by: Greg Kroah-Hartman --- drivers/hv/hv_fcopy.c | 2 +- drivers/hv/hv_kvp.c | 2 +- drivers/hv/hv_snapshot.c | 2 +- drivers/hv/hv_util.c | 1 + include/linux/hyperv.h | 1 + 5 files changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/hv/hv_fcopy.c b/drivers/hv/hv_fcopy.c index 12dcbd8226f2..2cce48d9e903 100644 --- a/drivers/hv/hv_fcopy.c +++ b/drivers/hv/hv_fcopy.c @@ -256,7 +256,6 @@ void hv_fcopy_onchannelcallback(void *context) */ fcopy_transaction.recv_len = recvlen; - fcopy_transaction.recv_channel = channel; fcopy_transaction.recv_req_id = requestid; fcopy_transaction.fcopy_msg = fcopy_msg; @@ -323,6 +322,7 @@ static void fcopy_on_reset(void) int hv_fcopy_init(struct hv_util_service *srv) { recv_buffer = srv->recv_buffer; + fcopy_transaction.recv_channel = srv->channel; init_completion(&release_event); /* diff --git a/drivers/hv/hv_kvp.c b/drivers/hv/hv_kvp.c index b97ef3e094fd..cd3fb0151e8a 100644 --- a/drivers/hv/hv_kvp.c +++ b/drivers/hv/hv_kvp.c @@ -640,7 +640,6 @@ void hv_kvp_onchannelcallback(void *context) */ kvp_transaction.recv_len = recvlen; - kvp_transaction.recv_channel = channel; kvp_transaction.recv_req_id = requestid; kvp_transaction.kvp_msg = kvp_msg; @@ -690,6 +689,7 @@ int hv_kvp_init(struct hv_util_service *srv) { recv_buffer = srv->recv_buffer; + kvp_transaction.recv_channel = srv->channel; init_completion(&release_event); /* diff --git a/drivers/hv/hv_snapshot.c b/drivers/hv/hv_snapshot.c index c5fb249e34cc..b0feddb17170 100644 --- a/drivers/hv/hv_snapshot.c +++ b/drivers/hv/hv_snapshot.c @@ -264,7 +264,6 @@ void hv_vss_onchannelcallback(void *context) */ vss_transaction.recv_len = recvlen; - vss_transaction.recv_channel = channel; vss_transaction.recv_req_id = requestid; vss_transaction.msg = (struct hv_vss_msg *)vss_msg; @@ -340,6 +339,7 @@ hv_vss_init(struct hv_util_service *srv) return -ENOTSUPP; } recv_buffer = srv->recv_buffer; + vss_transaction.recv_channel = srv->channel; /* * When this driver loads, the user level daemon that diff --git a/drivers/hv/hv_util.c b/drivers/hv/hv_util.c index 41f5896224bd..9dc63725363d 100644 --- a/drivers/hv/hv_util.c +++ b/drivers/hv/hv_util.c @@ -326,6 +326,7 @@ static int util_probe(struct hv_device *dev, srv->recv_buffer = kmalloc(PAGE_SIZE * 4, GFP_KERNEL); if (!srv->recv_buffer) return -ENOMEM; + srv->channel = dev->channel; if (srv->util_init) { ret = srv->util_init(srv); if (ret) { diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h index ae6a711dcd1d..281bb007f725 100644 --- a/include/linux/hyperv.h +++ b/include/linux/hyperv.h @@ -1179,6 +1179,7 @@ int vmbus_allocate_mmio(struct resource **new, struct hv_device *device_obj, struct hv_util_service { u8 *recv_buffer; + void *channel; void (*util_cb)(void *); int (*util_init)(struct hv_util_service *); void (*util_deinit)(void); From 43fea648af714b0578f20bfadfec18858e67430d Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Sat, 30 Apr 2016 19:21:33 -0700 Subject: [PATCH 187/812] Drivers: hv: kvp: fix IP Failover commit 4dbfc2e68004c60edab7e8fd26784383dd3ee9bc upstream. Hyper-V VMs can be replicated to another hosts and there is a feature to set different IP for replicas, it is called 'Failover TCP/IP'. When such guest starts Hyper-V host sends it KVP_OP_SET_IP_INFO message as soon as we finish negotiation procedure. The problem is that it can happen (and it actually happens) before userspace daemon connects and we reply with HV_E_FAIL to the message. As there are no repetitions we fail to set the requested IP. Solve the issue by postponing our reply to the negotiation message till userspace daemon is connected. We can't wait too long as there is a host-side timeout (cca. 75 seconds) and if we fail to reply in this time frame the whole KVP service will become inactive. The solution is not ideal - if it takes userspace daemon more than 60 seconds to connect IP Failover will still fail but I don't see a solution with our current separation between kernel and userspace parts. Other two modules (VSS and FCOPY) don't require such delay, leave them untouched. Signed-off-by: Vitaly Kuznetsov Signed-off-by: K. Y. Srinivasan Signed-off-by: Dexuan Cui Signed-off-by: Greg Kroah-Hartman --- drivers/hv/hv_kvp.c | 31 +++++++++++++++++++++++++++++++ drivers/hv/hyperv_vmbus.h | 5 +++++ 2 files changed, 36 insertions(+) diff --git a/drivers/hv/hv_kvp.c b/drivers/hv/hv_kvp.c index cd3fb0151e8a..ff0a42618b50 100644 --- a/drivers/hv/hv_kvp.c +++ b/drivers/hv/hv_kvp.c @@ -78,9 +78,11 @@ static void kvp_send_key(struct work_struct *dummy); static void kvp_respond_to_host(struct hv_kvp_msg *msg, int error); static void kvp_timeout_func(struct work_struct *dummy); +static void kvp_host_handshake_func(struct work_struct *dummy); static void kvp_register(int); static DECLARE_DELAYED_WORK(kvp_timeout_work, kvp_timeout_func); +static DECLARE_DELAYED_WORK(kvp_host_handshake_work, kvp_host_handshake_func); static DECLARE_WORK(kvp_sendkey_work, kvp_send_key); static const char kvp_devname[] = "vmbus/hv_kvp"; @@ -131,6 +133,11 @@ static void kvp_timeout_func(struct work_struct *dummy) hv_poll_channel(kvp_transaction.recv_channel, kvp_poll_wrapper); } +static void kvp_host_handshake_func(struct work_struct *dummy) +{ + hv_poll_channel(kvp_transaction.recv_channel, hv_kvp_onchannelcallback); +} + static int kvp_handle_handshake(struct hv_kvp_msg *msg) { switch (msg->kvp_hdr.operation) { @@ -155,6 +162,12 @@ static int kvp_handle_handshake(struct hv_kvp_msg *msg) pr_debug("KVP: userspace daemon ver. %d registered\n", KVP_OP_REGISTER); kvp_register(dm_reg_value); + + /* + * If we're still negotiating with the host cancel the timeout + * work to not poll the channel twice. + */ + cancel_delayed_work_sync(&kvp_host_handshake_work); hv_poll_channel(kvp_transaction.recv_channel, kvp_poll_wrapper); return 0; @@ -595,7 +608,22 @@ void hv_kvp_onchannelcallback(void *context) struct icmsg_negotiate *negop = NULL; int util_fw_version; int kvp_srv_version; + static enum {NEGO_NOT_STARTED, + NEGO_IN_PROGRESS, + NEGO_FINISHED} host_negotiatied = NEGO_NOT_STARTED; + if (host_negotiatied == NEGO_NOT_STARTED && + kvp_transaction.state < HVUTIL_READY) { + /* + * If userspace daemon is not connected and host is asking + * us to negotiate we need to delay to not lose messages. + * This is important for Failover IP setting. + */ + host_negotiatied = NEGO_IN_PROGRESS; + schedule_delayed_work(&kvp_host_handshake_work, + HV_UTIL_NEGO_TIMEOUT * HZ); + return; + } if (kvp_transaction.state > HVUTIL_READY) return; @@ -673,6 +701,8 @@ void hv_kvp_onchannelcallback(void *context) vmbus_sendpacket(channel, recv_buffer, recvlen, requestid, VM_PKT_DATA_INBAND, 0); + + host_negotiatied = NEGO_FINISHED; } } @@ -711,6 +741,7 @@ hv_kvp_init(struct hv_util_service *srv) void hv_kvp_deinit(void) { kvp_transaction.state = HVUTIL_DEVICE_DYING; + cancel_delayed_work_sync(&kvp_host_handshake_work); cancel_delayed_work_sync(&kvp_timeout_work); cancel_work_sync(&kvp_sendkey_work); hvutil_transport_destroy(hvt); diff --git a/drivers/hv/hyperv_vmbus.h b/drivers/hv/hyperv_vmbus.h index 75e383e6d03d..15e06493c53a 100644 --- a/drivers/hv/hyperv_vmbus.h +++ b/drivers/hv/hyperv_vmbus.h @@ -35,6 +35,11 @@ */ #define HV_UTIL_TIMEOUT 30 +/* + * Timeout for guest-host handshake for services. + */ +#define HV_UTIL_NEGO_TIMEOUT 60 + /* * The below CPUID leaves are present if VersionAndFeatures.HypervisorPresent * is set by CPUID(HVCPUID_VERSION_FEATURES). From e34c69f2094a07b0565feb444dff58a53ccf0958 Mon Sep 17 00:00:00 2001 From: Long Li Date: Sun, 30 Apr 2017 16:21:19 -0700 Subject: [PATCH 188/812] HV: properly delay KVP packets when negotiation is in progress commit a3ade8cc474d848676278660e65f5af1e9e094d9 upstream. The host may send multiple negotiation packets (due to timeout) before the KVP user-mode daemon is connected. KVP user-mode daemon is connected. We need to defer processing those packets until the daemon is negotiated and connected. It's okay for guest to respond to all negotiation packets. In addition, the host may send multiple staged KVP requests as soon as negotiation is done. We need to properly process those packets using one tasklet for exclusive access to ring buffer. This patch is based on the work of Nick Meier . The above is the original changelog of a3ade8cc474d ("HV: properly delay KVP packets when negotiation is in progress" Here I re-worked the original patch because the mainline version can't work for the linux-4.4.y branch, on which channel->callback_event doesn't exist yet. In the mainline, channel->callback_event was added by: 631e63a9f346 ("vmbus: change to per channel tasklet"). Here we don't want to backport it to v4.4, as it requires extra supporting changes and fixes, which are unnecessary as to the KVP bug we're trying to resolve. NOTE: before this patch is used, we should cherry-pick the other related 3 patches from the mainline first: The background of this backport request is that: recently Wang Jian reported some KVP issues: https://github.com/LIS/lis-next/issues/593: e.g. the /var/lib/hyperv/.kvp_pool_* files can not be updated, and sometimes if the hv_kvp_daemon doesn't timely start, the host may not be able to query the VM's IP address via KVP. Reported-by: Wang Jian Tested-by: Wang Jian Signed-off-by: Dexuan Cui Signed-off-by: Long Li Signed-off-by: K. Y. Srinivasan Signed-off-by: Greg Kroah-Hartman --- drivers/hv/hv_kvp.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/drivers/hv/hv_kvp.c b/drivers/hv/hv_kvp.c index ff0a42618b50..1771a968c3f2 100644 --- a/drivers/hv/hv_kvp.c +++ b/drivers/hv/hv_kvp.c @@ -612,21 +612,22 @@ void hv_kvp_onchannelcallback(void *context) NEGO_IN_PROGRESS, NEGO_FINISHED} host_negotiatied = NEGO_NOT_STARTED; - if (host_negotiatied == NEGO_NOT_STARTED && - kvp_transaction.state < HVUTIL_READY) { + if (kvp_transaction.state < HVUTIL_READY) { /* * If userspace daemon is not connected and host is asking * us to negotiate we need to delay to not lose messages. * This is important for Failover IP setting. */ - host_negotiatied = NEGO_IN_PROGRESS; - schedule_delayed_work(&kvp_host_handshake_work, + if (host_negotiatied == NEGO_NOT_STARTED) { + host_negotiatied = NEGO_IN_PROGRESS; + schedule_delayed_work(&kvp_host_handshake_work, HV_UTIL_NEGO_TIMEOUT * HZ); + } return; } if (kvp_transaction.state > HVUTIL_READY) return; - +recheck: vmbus_recvpacket(channel, recv_buffer, PAGE_SIZE * 4, &recvlen, &requestid); @@ -703,6 +704,8 @@ void hv_kvp_onchannelcallback(void *context) VM_PKT_DATA_INBAND, 0); host_negotiatied = NEGO_FINISHED; + + goto recheck; } } From 24c2342b8e51ab3185e68470709904150a1e3ee0 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Sat, 20 Oct 2018 09:52:38 +0200 Subject: [PATCH 189/812] Linux 4.4.162 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 57e4ff1a8b96..00ff2dd68ff1 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,6 @@ VERSION = 4 PATCHLEVEL = 4 -SUBLEVEL = 161 +SUBLEVEL = 162 EXTRAVERSION = NAME = Blurry Fish Butt From 4518c70d5a4216fd68b4fde56f4f692d1ba91c0f Mon Sep 17 00:00:00 2001 From: Chengguang Xu Date: Thu, 30 Aug 2018 21:33:31 +0800 Subject: [PATCH 190/812] f2fs: add additional sanity check in f2fs_acl_from_disk() Add additinal sanity check for irregular case(e.g. corruption). If size of extended attribution is smaller than size of acl header, then return -EINVAL. Signed-off-by: Chengguang Xu Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/acl.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fs/f2fs/acl.c b/fs/f2fs/acl.c index 3f52efa0f94f..c65158a8bb06 100644 --- a/fs/f2fs/acl.c +++ b/fs/f2fs/acl.c @@ -53,6 +53,9 @@ static struct posix_acl *f2fs_acl_from_disk(const char *value, size_t size) struct f2fs_acl_entry *entry = (struct f2fs_acl_entry *)(hdr + 1); const char *end = value + size; + if (size < sizeof(struct f2fs_acl_header)) + return ERR_PTR(-EINVAL); + if (hdr->a_version != cpu_to_le32(F2FS_ACL_VERSION)) return ERR_PTR(-EINVAL); From ea89149780009de133f60752939a5bacfb69f61b Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Tue, 4 Sep 2018 03:52:17 +0800 Subject: [PATCH 191/812] f2fs: fix to avoid NULL pointer dereference on se->discard_map MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit https://bugzilla.kernel.org/show_bug.cgi?id=200951 These is a NULL pointer dereference issue reported in bugzilla: Hi, in the setup there is a SATA SSD connected to a SATA-to-USB bridge. The disc is "Samsung SSD 850 PRO 256G" which supports TRIM. There are four partitions: sda1: FAT /boot sda2: F2FS / sda3: F2FS /home sda4: F2FS The bridge is ASMT1153e which uses the "uas" driver. There is no TRIM pass-through, so, when mounting it reports: mounting with "discard" option, but the device does not support discard The USB host is USB3.0 and UASP capable. It is the one on RK3399. Given this everything works fine, except there is no TRIM support. In order to enable TRIM a new UDEV rule is added [1]: /etc/udev/rules.d/10-sata-bridge-trim.rules: ACTION=="add|change", ATTRS{idVendor}=="174c", ATTRS{idProduct}=="55aa", SUBSYSTEM=="scsi_disk", ATTR{provisioning_mode}="unmap" After reboot any F2FS write hangs forever and dmesg reports: Unable to handle kernel NULL pointer dereference Also tested on a x86_64 system: works fine even with TRIM enabled. same disc same bridge different usb host controller different cpu architecture not root filesystem Regards, Vicenç. [1] Post #5 in https://bbs.archlinux.org/viewtopic.php?id=236280 Unable to handle kernel NULL pointer dereference at virtual address 000000000000003e Mem abort info: ESR = 0x96000004 Exception class = DABT (current EL), IL = 32 bits SET = 0, FnV = 0 EA = 0, S1PTW = 0 Data abort info: ISV = 0, ISS = 0x00000004 CM = 0, WnR = 0 user pgtable: 4k pages, 48-bit VAs, pgdp = 00000000626e3122 [000000000000003e] pgd=0000000000000000 Internal error: Oops: 96000004 [#1] SMP Modules linked in: overlay snd_soc_hdmi_codec rc_cec dw_hdmi_i2s_audio dw_hdmi_cec snd_soc_simple_card snd_soc_simple_card_utils snd_soc_rockchip_i2s rockchip_rga snd_soc_rockchip_pcm rockchipdrm videobuf2_dma_sg v4l2_mem2mem rtc_rk808 videobuf2_memops analogix_dp videobuf2_v4l2 videobuf2_common dw_hdmi dw_wdt cec rc_core videodev drm_kms_helper media drm rockchip_thermal rockchip_saradc realtek drm_panel_orientation_quirks syscopyarea sysfillrect sysimgblt fb_sys_fops dwmac_rk stmmac_platform stmmac pwm_bl squashfs loop crypto_user gpio_keys hid_kensington CPU: 5 PID: 957 Comm: nvim Not tainted 4.19.0-rc1-1-ARCH #1 Hardware name: Sapphire-RK3399 Board (DT) pstate: 00000005 (nzcv daif -PAN -UAO) pc : update_sit_entry+0x304/0x4b0 lr : update_sit_entry+0x108/0x4b0 sp : ffff00000ca13bd0 x29: ffff00000ca13bd0 x28: 000000000000003e x27: 0000000000000020 x26: 0000000000080000 x25: 0000000000000048 x24: ffff8000ebb85cf8 x23: 0000000000000253 x22: 00000000ffffffff x21: 00000000000535f2 x20: 00000000ffffffdf x19: ffff8000eb9e6800 x18: ffff8000eb9e6be8 x17: 0000000007ce6926 x16: 000000001c83ffa8 x15: 0000000000000000 x14: ffff8000f602df90 x13: 0000000000000006 x12: 0000000000000040 x11: 0000000000000228 x10: 0000000000000000 x9 : 0000000000000000 x8 : 0000000000000000 x7 : 00000000000535f2 x6 : ffff8000ebff3440 x5 : ffff8000ebff3440 x4 : ffff8000ebe3a6c8 x3 : 00000000ffffffff x2 : 0000000000000020 x1 : 0000000000000000 x0 : ffff8000eb9e5800 Process nvim (pid: 957, stack limit = 0x0000000063a78320) Call trace: update_sit_entry+0x304/0x4b0 f2fs_invalidate_blocks+0x98/0x140 truncate_node+0x90/0x400 f2fs_remove_inode_page+0xe8/0x340 f2fs_evict_inode+0x2b0/0x408 evict+0xe0/0x1e0 iput+0x160/0x260 do_unlinkat+0x214/0x298 __arm64_sys_unlinkat+0x3c/0x68 el0_svc_handler+0x94/0x118 el0_svc+0x8/0xc Code: f9400800 b9488400 36080140 f9400f01 (387c4820) ---[ end trace a0f21a307118c477 ]--- The reason is it is possible to enable discard flag on block queue via UDEV, but during mount, f2fs will initialize se->discard_map only if this flag is set, once the flag is set after mount, f2fs may dereference NULL pointer on se->discard_map. So this patch does below changes to fix this issue: - initialize and update se->discard_map all the time. - don't clear DISCARD option if device has no QUEUE_FLAG_DISCARD flag during mount. - don't issue small discard on zoned block device. - introduce some functions to enhance the readability. Signed-off-by: Chao Yu Tested-by: Vicente Bergas Signed-off-by: Jaegeuk Kim --- fs/f2fs/debug.c | 3 +-- fs/f2fs/f2fs.h | 15 ++++++++--- fs/f2fs/file.c | 2 +- fs/f2fs/segment.c | 65 ++++++++++++++++++++--------------------------- fs/f2fs/super.c | 16 +++--------- 5 files changed, 46 insertions(+), 55 deletions(-) diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c index 214a968962a1..ebe649d9793c 100644 --- a/fs/f2fs/debug.c +++ b/fs/f2fs/debug.c @@ -190,8 +190,7 @@ static void update_mem_info(struct f2fs_sb_info *sbi) si->base_mem += MAIN_SEGS(sbi) * sizeof(struct seg_entry); si->base_mem += f2fs_bitmap_size(MAIN_SEGS(sbi)); si->base_mem += 2 * SIT_VBLOCK_MAP_SIZE * MAIN_SEGS(sbi); - if (f2fs_discard_en(sbi)) - si->base_mem += SIT_VBLOCK_MAP_SIZE * MAIN_SEGS(sbi); + si->base_mem += SIT_VBLOCK_MAP_SIZE * MAIN_SEGS(sbi); si->base_mem += SIT_VBLOCK_MAP_SIZE; if (sbi->segs_per_sec > 1) si->base_mem += MAIN_SECS(sbi) * sizeof(struct sec_entry); diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 787df98db916..bff413119ccb 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -3484,11 +3484,20 @@ static inline int get_blkz_type(struct f2fs_sb_info *sbi, } #endif -static inline bool f2fs_discard_en(struct f2fs_sb_info *sbi) +static inline bool f2fs_hw_should_discard(struct f2fs_sb_info *sbi) { - struct request_queue *q = bdev_get_queue(sbi->sb->s_bdev); + return f2fs_sb_has_blkzoned(sbi->sb); +} - return blk_queue_discard(q) || f2fs_sb_has_blkzoned(sbi->sb); +static inline bool f2fs_hw_support_discard(struct f2fs_sb_info *sbi) +{ + return blk_queue_discard(bdev_get_queue(sbi->sb->s_bdev)); +} + +static inline bool f2fs_realtime_discard_enable(struct f2fs_sb_info *sbi) +{ + return (test_opt(sbi, DISCARD) && f2fs_hw_support_discard(sbi)) || + f2fs_hw_should_discard(sbi); } static inline void set_opt_mode(struct f2fs_sb_info *sbi, unsigned int mt) diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 8e381b6385e3..09893856ca0d 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -1978,7 +1978,7 @@ static int f2fs_ioc_fitrim(struct file *filp, unsigned long arg) if (!capable(CAP_SYS_ADMIN)) return -EPERM; - if (!blk_queue_discard(q)) + if (!f2fs_hw_support_discard(F2FS_SB(sb))) return -EOPNOTSUPP; if (copy_from_user(&range, (struct fstrim_range __user *)arg, diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index fdc17721e41e..17ae3567ef6a 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -1805,11 +1805,11 @@ static bool add_discard_addrs(struct f2fs_sb_info *sbi, struct cp_control *cpc, struct list_head *head = &SM_I(sbi)->dcc_info->entry_list; int i; - if (se->valid_blocks == max_blocks || !f2fs_discard_en(sbi)) + if (se->valid_blocks == max_blocks || !f2fs_hw_support_discard(sbi)) return false; if (!force) { - if (!test_opt(sbi, DISCARD) || !se->valid_blocks || + if (!f2fs_realtime_discard_enable(sbi) || !se->valid_blocks || SM_I(sbi)->dcc_info->nr_discards >= SM_I(sbi)->dcc_info->max_discards) return false; @@ -1915,7 +1915,7 @@ void f2fs_clear_prefree_segments(struct f2fs_sb_info *sbi, dirty_i->nr_dirty[PRE]--; } - if (!test_opt(sbi, DISCARD)) + if (!f2fs_realtime_discard_enable(sbi)) continue; if (force && start >= cpc->trim_start && @@ -2105,8 +2105,7 @@ static void update_sit_entry(struct f2fs_sb_info *sbi, block_t blkaddr, int del) del = 0; } - if (f2fs_discard_en(sbi) && - !f2fs_test_and_set_bit(offset, se->discard_map)) + if (!f2fs_test_and_set_bit(offset, se->discard_map)) sbi->discard_blks--; /* don't overwrite by SSR to keep node chain */ @@ -2134,8 +2133,7 @@ static void update_sit_entry(struct f2fs_sb_info *sbi, block_t blkaddr, int del) del = 0; } - if (f2fs_discard_en(sbi) && - f2fs_test_and_clear_bit(offset, se->discard_map)) + if (f2fs_test_and_clear_bit(offset, se->discard_map)) sbi->discard_blks++; } if (!f2fs_test_bit(offset, se->ckpt_valid_map)) @@ -2751,7 +2749,7 @@ int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range) * discard option. User configuration looks like using runtime discard * or periodic fstrim instead of it. */ - if (test_opt(sbi, DISCARD)) + if (f2fs_realtime_discard_enable(sbi)) goto out; start_block = START_BLOCK(sbi, start_segno); @@ -3843,13 +3841,11 @@ static int build_sit_info(struct f2fs_sb_info *sbi) return -ENOMEM; #endif - if (f2fs_discard_en(sbi)) { - sit_i->sentries[start].discard_map - = f2fs_kzalloc(sbi, SIT_VBLOCK_MAP_SIZE, - GFP_KERNEL); - if (!sit_i->sentries[start].discard_map) - return -ENOMEM; - } + sit_i->sentries[start].discard_map + = f2fs_kzalloc(sbi, SIT_VBLOCK_MAP_SIZE, + GFP_KERNEL); + if (!sit_i->sentries[start].discard_map) + return -ENOMEM; } sit_i->tmp_map = f2fs_kzalloc(sbi, SIT_VBLOCK_MAP_SIZE, GFP_KERNEL); @@ -3997,18 +3993,16 @@ static int build_sit_entries(struct f2fs_sb_info *sbi) total_node_blocks += se->valid_blocks; /* build discard map only one time */ - if (f2fs_discard_en(sbi)) { - if (is_set_ckpt_flags(sbi, CP_TRIMMED_FLAG)) { - memset(se->discard_map, 0xff, - SIT_VBLOCK_MAP_SIZE); - } else { - memcpy(se->discard_map, - se->cur_valid_map, - SIT_VBLOCK_MAP_SIZE); - sbi->discard_blks += - sbi->blocks_per_seg - - se->valid_blocks; - } + if (is_set_ckpt_flags(sbi, CP_TRIMMED_FLAG)) { + memset(se->discard_map, 0xff, + SIT_VBLOCK_MAP_SIZE); + } else { + memcpy(se->discard_map, + se->cur_valid_map, + SIT_VBLOCK_MAP_SIZE); + sbi->discard_blks += + sbi->blocks_per_seg - + se->valid_blocks; } if (sbi->segs_per_sec > 1) @@ -4046,16 +4040,13 @@ static int build_sit_entries(struct f2fs_sb_info *sbi) if (IS_NODESEG(se->type)) total_node_blocks += se->valid_blocks; - if (f2fs_discard_en(sbi)) { - if (is_set_ckpt_flags(sbi, CP_TRIMMED_FLAG)) { - memset(se->discard_map, 0xff, - SIT_VBLOCK_MAP_SIZE); - } else { - memcpy(se->discard_map, se->cur_valid_map, - SIT_VBLOCK_MAP_SIZE); - sbi->discard_blks += old_valid_blocks; - sbi->discard_blks -= se->valid_blocks; - } + if (is_set_ckpt_flags(sbi, CP_TRIMMED_FLAG)) { + memset(se->discard_map, 0xff, SIT_VBLOCK_MAP_SIZE); + } else { + memcpy(se->discard_map, se->cur_valid_map, + SIT_VBLOCK_MAP_SIZE); + sbi->discard_blks += old_valid_blocks; + sbi->discard_blks -= se->valid_blocks; } if (sbi->segs_per_sec > 1) { diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 5e329c022e74..c8034729a7d0 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -360,7 +360,6 @@ static int f2fs_check_quota_options(struct f2fs_sb_info *sbi) static int parse_options(struct super_block *sb, char *options) { struct f2fs_sb_info *sbi = F2FS_SB(sb); - struct request_queue *q; substring_t args[MAX_OPT_ARGS]; char *p, *name; int arg = 0; @@ -415,14 +414,7 @@ static int parse_options(struct super_block *sb, char *options) return -EINVAL; break; case Opt_discard: - q = bdev_get_queue(sb->s_bdev); - if (blk_queue_discard(q)) { - set_opt(sbi, DISCARD); - } else if (!f2fs_sb_has_blkzoned(sb)) { - f2fs_msg(sb, KERN_WARNING, - "mounting with \"discard\" option, but " - "the device does not support discard"); - } + set_opt(sbi, DISCARD); break; case Opt_nodiscard: if (f2fs_sb_has_blkzoned(sb)) { @@ -1032,7 +1024,8 @@ static void f2fs_put_super(struct super_block *sb) /* be sure to wait for any on-going discard commands */ dropped = f2fs_wait_discard_bios(sbi); - if (f2fs_discard_en(sbi) && !sbi->discard_blks && !dropped) { + if ((f2fs_hw_support_discard(sbi) || f2fs_hw_should_discard(sbi)) && + !sbi->discard_blks && !dropped) { struct cp_control cpc = { .reason = CP_UMOUNT | CP_TRIMMED, }; @@ -1400,8 +1393,7 @@ static void default_options(struct f2fs_sb_info *sbi) set_opt(sbi, NOHEAP); sbi->sb->s_flags |= MS_LAZYTIME; set_opt(sbi, FLUSH_MERGE); - if (blk_queue_discard(bdev_get_queue(sbi->sb->s_bdev))) - set_opt(sbi, DISCARD); + set_opt(sbi, DISCARD); if (f2fs_sb_has_blkzoned(sbi->sb)) set_opt_mode(sbi, F2FS_MOUNT_LFS); else From 810579fccebf14472c7813381eecaad398231c82 Mon Sep 17 00:00:00 2001 From: Sahitya Tummala Date: Fri, 31 Aug 2018 15:09:26 +0530 Subject: [PATCH 192/812] f2fs: fix unnecessary periodic wakeup of discard thread when dev is busy When dev is busy, discard thread wake up timeout can be aligned with the exact time that it needs to wait for dev to come out of busy. This helps to avoid unnecessary periodic wakeups and thus save some power. Signed-off-by: Sahitya Tummala Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 17ae3567ef6a..cba6697a9ee2 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -1644,6 +1644,8 @@ static int issue_discard_thread(void *data) struct discard_policy dpolicy; unsigned int wait_ms = DEF_MIN_DISCARD_ISSUE_TIME; int issued; + unsigned long interval = sbi->interval_time[REQ_TIME] * HZ; + long delta; set_freezable(); @@ -1680,7 +1682,11 @@ static int issue_discard_thread(void *data) __wait_all_discard_cmd(sbi, &dpolicy); wait_ms = dpolicy.min_interval; } else if (issued == -1){ - wait_ms = dpolicy.mid_interval; + delta = (sbi->last_time[REQ_TIME] + interval) - jiffies; + if (delta > 0) + wait_ms = jiffies_to_msecs(delta); + else + wait_ms = dpolicy.mid_interval; } else { wait_ms = dpolicy.max_interval; } From 09206dd9ac2ab85a7f3e628de14b410137f68618 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Wed, 22 Aug 2018 17:17:47 +0800 Subject: [PATCH 193/812] Revert "f2fs: use printk_ratelimited for f2fs_msg" Don't limit printing log, so that we will not miss any key messages. This reverts commit a36c106dffb616250117efb1cab271c19a8f94ff. In addition, we use printk_ratelimited to avoid too many log prints. - error injection - discard submission failure Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 6 +++--- fs/f2fs/segment.c | 6 +++--- fs/f2fs/super.c | 2 +- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index bff413119ccb..631f7a1348ab 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1371,9 +1371,9 @@ struct f2fs_sb_info { }; #ifdef CONFIG_F2FS_FAULT_INJECTION -#define f2fs_show_injection_info(type) \ - printk("%sF2FS-fs : inject %s in %s of %pF\n", \ - KERN_INFO, f2fs_fault_name[type], \ +#define f2fs_show_injection_info(type) \ + printk_ratelimited("%sF2FS-fs : inject %s in %s of %pF\n", \ + KERN_INFO, f2fs_fault_name[type], \ __func__, __builtin_return_address(0)) static inline bool time_to_inject(struct f2fs_sb_info *sbi, int type) { diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index cba6697a9ee2..8eb01d4c0ce3 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -905,9 +905,9 @@ static void __remove_discard_cmd(struct f2fs_sb_info *sbi, dc->error = 0; if (dc->error) - f2fs_msg(sbi->sb, KERN_INFO, - "Issue discard(%u, %u, %u) failed, ret: %d", - dc->lstart, dc->start, dc->len, dc->error); + printk_ratelimited( + "%sF2FS-fs: Issue discard(%u, %u, %u) failed, ret: %d", + KERN_INFO, dc->lstart, dc->start, dc->len, dc->error); __detach_discard_cmd(dcc, dc); } diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index c8034729a7d0..acfe792dd523 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -207,7 +207,7 @@ void f2fs_msg(struct super_block *sb, const char *level, const char *fmt, ...) va_start(args, fmt); vaf.fmt = fmt; vaf.va = &args; - printk_ratelimited("%sF2FS-fs (%s): %pV\n", level, sb->s_id, &vaf); + printk("%sF2FS-fs (%s): %pV\n", level, sb->s_id, &vaf); va_end(args); } From 9175c6044f0175531f87046edad266e11eed088e Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Wed, 22 Aug 2018 21:18:00 -0700 Subject: [PATCH 194/812] f2fs: avoid wrong decrypted data from disk 1. Create a file in an encrypted directory 2. Do GC & drop caches 3. Read stale data before its bio for metapage was not issued yet Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 18 ++++++++++-------- fs/f2fs/f2fs.h | 2 +- fs/f2fs/file.c | 3 +-- fs/f2fs/segment.c | 6 +++++- 4 files changed, 17 insertions(+), 12 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 267d907104a1..fd85f9ede1a8 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -563,9 +563,6 @@ static struct bio *f2fs_grab_read_bio(struct inode *inode, block_t blkaddr, ctx->bio = bio; ctx->enabled_steps = post_read_steps; bio->bi_private = ctx; - - /* wait the page to be moved by cleaning */ - f2fs_wait_on_block_writeback(sbi, blkaddr); } return bio; @@ -580,6 +577,9 @@ static int f2fs_submit_page_read(struct inode *inode, struct page *page, if (IS_ERR(bio)) return PTR_ERR(bio); + /* wait for GCed page writeback via META_MAPPING */ + f2fs_wait_on_block_writeback(inode, blkaddr); + if (bio_add_page(bio, page, PAGE_SIZE, 0) < PAGE_SIZE) { bio_put(bio); return -EFAULT; @@ -1555,6 +1555,12 @@ static int f2fs_mpage_readpages(struct address_space *mapping, } } + /* + * If the page is under writeback, we need to wait for + * its completion to see the correct decrypted data. + */ + f2fs_wait_on_block_writeback(inode, block_nr); + if (bio_add_page(bio, page, blocksize, 0) < blocksize) goto submit_and_realloc; @@ -1622,7 +1628,7 @@ static int encrypt_one_page(struct f2fs_io_info *fio) return 0; /* wait for GCed page writeback via META_MAPPING */ - f2fs_wait_on_block_writeback(fio->sbi, fio->old_blkaddr); + f2fs_wait_on_block_writeback(inode, fio->old_blkaddr); retry_encrypt: fio->encrypted_page = fscrypt_encrypt_page(inode, fio->page, @@ -2379,10 +2385,6 @@ static int f2fs_write_begin(struct file *file, struct address_space *mapping, f2fs_wait_on_page_writeback(page, DATA, false); - /* wait for GCed page writeback via META_MAPPING */ - if (f2fs_post_read_required(inode)) - f2fs_wait_on_block_writeback(sbi, blkaddr); - if (len == PAGE_SIZE || PageUptodate(page)) return 0; diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 631f7a1348ab..ead9916582df 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -3027,7 +3027,7 @@ void f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct page *page, struct f2fs_io_info *fio, bool add_list); void f2fs_wait_on_page_writeback(struct page *page, enum page_type type, bool ordered); -void f2fs_wait_on_block_writeback(struct f2fs_sb_info *sbi, block_t blkaddr); +void f2fs_wait_on_block_writeback(struct inode *inode, block_t blkaddr); void f2fs_write_data_summaries(struct f2fs_sb_info *sbi, block_t start_blk); void f2fs_write_node_summaries(struct f2fs_sb_info *sbi, block_t start_blk); int f2fs_lookup_journal_in_cursum(struct f2fs_journal *journal, int type, diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 09893856ca0d..c774e32d5ebb 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -115,8 +115,7 @@ static int f2fs_vm_page_mkwrite(struct vm_area_struct *vma, f2fs_wait_on_page_writeback(page, DATA, false); /* wait for GCed page writeback via META_MAPPING */ - if (f2fs_post_read_required(inode)) - f2fs_wait_on_block_writeback(sbi, dn.data_blkaddr); + f2fs_wait_on_block_writeback(inode, dn.data_blkaddr); out_sem: up_read(&F2FS_I(inode)->i_mmap_sem); diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 8eb01d4c0ce3..20a68557e4f5 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -3276,10 +3276,14 @@ void f2fs_wait_on_page_writeback(struct page *page, } } -void f2fs_wait_on_block_writeback(struct f2fs_sb_info *sbi, block_t blkaddr) +void f2fs_wait_on_block_writeback(struct inode *inode, block_t blkaddr) { + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); struct page *cpage; + if (!f2fs_post_read_required(inode)) + return; + if (!is_valid_data_blkaddr(sbi, blkaddr)) return; From 0da4a7250d0dacb7b7ea022194da6d9c6d2eb4dc Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Thu, 6 Sep 2018 11:40:12 -0700 Subject: [PATCH 195/812] f2fs: submit bio after shutdown Sometimes, some merged IOs could get a chance to be submitted, resulting in system hang in shutdown test. This issues IOs all the time after shutdown. Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index fd85f9ede1a8..ae22394a969c 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -531,6 +531,8 @@ void f2fs_submit_page_write(struct f2fs_io_info *fio) if (fio->in_list) goto next; out: + if (is_sbi_flag_set(sbi, SBI_IS_SHUTDOWN)) + __submit_merged_bio(io); up_write(&io->io_rwsem); } From 5ad8ed96fd822f9f301628ab2908a7873753ea64 Mon Sep 17 00:00:00 2001 From: Yunlei He Date: Tue, 26 Jun 2018 13:12:43 +0800 Subject: [PATCH 196/812] f2fs: report error if quota off error during umount Now, we depend on fsck to ensure quota file data is ok, so we scan whole partition if checkpoint without umount flag. It's same for quota off error case, which may make quota file data inconsistent. generic/019 reports below error: __quota_error: 1160 callbacks suppressed Quota error (device zram1): write_blk: dquota write failed Quota error (device zram1): qtree_write_dquot: Error -28 occurred while creating quota Quota error (device zram1): write_blk: dquota write failed Quota error (device zram1): qtree_write_dquot: Error -28 occurred while creating quota Quota error (device zram1): write_blk: dquota write failed Quota error (device zram1): qtree_write_dquot: Error -28 occurred while creating quota Quota error (device zram1): write_blk: dquota write failed Quota error (device zram1): qtree_write_dquot: Error -28 occurred while creating quota Quota error (device zram1): write_blk: dquota write failed Quota error (device zram1): qtree_write_dquot: Error -28 occurred while creating quota VFS: Busy inodes after unmount of zram1. Self-destruct in 5 seconds. Have a nice day... If we failed in below path due to fail to write dquot block, we will miss to release quota inode, fix it. - f2fs_put_super - f2fs_quota_off_umount - f2fs_quota_off - f2fs_quota_sync <-- failed - dquot_quota_off <-- missed to call Signed-off-by: Yunlei He Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/super.c | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index acfe792dd523..68290ea51660 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -1845,7 +1845,9 @@ static int f2fs_quota_off(struct super_block *sb, int type) if (!inode || !igrab(inode)) return dquot_quota_off(sb, type); - f2fs_quota_sync(sb, type); + err = f2fs_quota_sync(sb, type); + if (err) + goto out_put; err = dquot_quota_off(sb, type); if (err || f2fs_sb_has_quota_ino(sb)) @@ -1864,9 +1866,20 @@ static int f2fs_quota_off(struct super_block *sb, int type) void f2fs_quota_off_umount(struct super_block *sb) { int type; + int err; - for (type = 0; type < MAXQUOTAS; type++) - f2fs_quota_off(sb, type); + for (type = 0; type < MAXQUOTAS; type++) { + err = f2fs_quota_off(sb, type); + if (err) { + int ret = dquot_quota_off(sb, type); + + f2fs_msg(sb, KERN_ERR, + "Fail to turn off disk quota " + "(type: %d, err: %d, ret:%d), Please " + "run fsck to fix it.", type, err, ret); + set_sbi_flag(F2FS_SB(sb), SBI_NEED_FSCK); + } + } } #if 0 From 4f36be46ade61d0d32aefd777212adb6770cee10 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Wed, 22 Aug 2018 17:11:05 +0800 Subject: [PATCH 197/812] f2fs: fix to flush all dirty inodes recovered in readonly fs generic/417 reported as blow: ------------[ cut here ]------------ kernel BUG at /home/yuchao/git/devf2fs/inode.c:695! invalid opcode: 0000 [#1] PREEMPT SMP CPU: 1 PID: 21697 Comm: umount Tainted: G W O 4.18.0-rc2+ #39 Hardware name: innotek GmbH VirtualBox/VirtualBox, BIOS VirtualBox 12/01/2006 EIP: f2fs_evict_inode+0x556/0x580 [f2fs] Call Trace: ? _raw_spin_unlock+0x2c/0x50 evict+0xa8/0x170 dispose_list+0x34/0x40 evict_inodes+0x118/0x120 generic_shutdown_super+0x41/0x100 ? rcu_read_lock_sched_held+0x97/0xa0 kill_block_super+0x22/0x50 kill_f2fs_super+0x6f/0x80 [f2fs] deactivate_locked_super+0x3d/0x70 deactivate_super+0x40/0x60 cleanup_mnt+0x39/0x70 __cleanup_mnt+0x10/0x20 task_work_run+0x81/0xa0 exit_to_usermode_loop+0x59/0xa7 do_fast_syscall_32+0x1f5/0x22c entry_SYSENTER_32+0x53/0x86 EIP: f2fs_evict_inode+0x556/0x580 [f2fs] It can simply reproduced with scripts: Enable quota feature during mkfs. Testcase1: 1. mkfs.f2fs /dev/zram0 2. mount -t f2fs /dev/zram0 /mnt/f2fs 3. xfs_io -f /mnt/f2fs/file -c "pwrite 0 4k" -c "fsync" 4. godown /mnt/f2fs 5. umount /mnt/f2fs 6. mount -t f2fs -o ro /dev/zram0 /mnt/f2fs 7. umount /mnt/f2fs Testcase2: 1. mkfs.f2fs /dev/zram0 2. mount -t f2fs /dev/zram0 /mnt/f2fs 3. touch /mnt/f2fs/file 4. create process[pid = x] do: a) open /mnt/f2fs/file; b) unlink /mnt/f2fs/file 5. godown -f /mnt/f2fs 6. kill process[pid = x] 7. umount /mnt/f2fs 8. mount -t f2fs -o ro /dev/zram0 /mnt/f2fs 9. umount /mnt/f2fs The reason is: during recovery, i_{c,m}time of inode will be updated, then the inode can be set dirty w/o being tracked in sbi->inode_list[DIRTY_META] global list, so later write_checkpoint will not flush such dirty inode into node page. Once umount is called, sync_filesystem() in generic_shutdown_super() will skip syncng dirty inodes due to sb_rdonly check, leaving dirty inodes there. To solve this issue, during umount, add remove SB_RDONLY flag in sb->s_flags, to make sure sync_filesystem() will not be skipped. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/checkpoint.c | 2 ++ fs/f2fs/f2fs.h | 1 + fs/f2fs/recovery.c | 14 +++++++++----- fs/f2fs/super.c | 3 +++ 4 files changed, 15 insertions(+), 5 deletions(-) diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index f7cdd3b536e3..646d3497bde7 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -697,6 +697,8 @@ int f2fs_recover_orphan_inodes(struct f2fs_sb_info *sbi) /* clear Orphan Flag */ clear_ckpt_flags(sbi, CP_ORPHAN_PRESENT_FLAG); out: + set_sbi_flag(sbi, SBI_IS_RECOVERED); + #ifdef CONFIG_QUOTA /* Turn quotas off */ if (quota_enabled) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index ead9916582df..2ace2b36a2d3 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1153,6 +1153,7 @@ enum { SBI_NEED_SB_WRITE, /* need to recover superblock */ SBI_NEED_CP, /* need to checkpoint */ SBI_IS_SHUTDOWN, /* shutdown by ioctl */ + SBI_IS_RECOVERED, /* recovered orphan/data */ }; enum { diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index 501bb0fdda1b..e34ca1686e3b 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c @@ -697,11 +697,15 @@ int f2fs_recover_fsync_data(struct f2fs_sb_info *sbi, bool check_only) /* let's drop all the directory inodes for clean checkpoint */ destroy_fsync_dnodes(&dir_list); - if (!err && need_writecp) { - struct cp_control cpc = { - .reason = CP_RECOVERY, - }; - err = f2fs_write_checkpoint(sbi, &cpc); + if (need_writecp) { + set_sbi_flag(sbi, SBI_IS_RECOVERED); + + if (!err) { + struct cp_control cpc = { + .reason = CP_RECOVERY, + }; + err = f2fs_write_checkpoint(sbi, &cpc); + } } kmem_cache_destroy(fsync_entry_slab); diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 68290ea51660..51888dcabb35 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -3187,6 +3187,9 @@ static void kill_f2fs_super(struct super_block *sb) }; f2fs_write_checkpoint(sbi, &cpc); } + + if (is_sbi_flag_set(sbi, SBI_IS_RECOVERED) && f2fs_readonly(sb)) + sb->s_flags &= ~MS_RDONLY; } kill_block_super(sb); } From 41e7c940b30d135a5424ae1fad018945e0e39008 Mon Sep 17 00:00:00 2001 From: Chengguang Xu Date: Fri, 31 Aug 2018 22:33:50 +0800 Subject: [PATCH 198/812] f2fs: cache NULL when both default_acl and acl are NULL default_acl and acl of newly created inode will be initiated as ACL_NOT_CACHED in vfs function inode_init_always() and later will be updated by calling xxx_init_acl() in specific filesystems. Howerver, when default_acl and acl are NULL then they keep the value of ACL_NOT_CACHED, this patch tries to cache NULL for acl/default_acl in this case. Signed-off-by: Chengguang Xu Acked-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/acl.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/fs/f2fs/acl.c b/fs/f2fs/acl.c index c65158a8bb06..409537cbf4d3 100644 --- a/fs/f2fs/acl.c +++ b/fs/f2fs/acl.c @@ -400,12 +400,16 @@ int f2fs_init_acl(struct inode *inode, struct inode *dir, struct page *ipage, error = __f2fs_set_acl(inode, ACL_TYPE_DEFAULT, default_acl, ipage); posix_acl_release(default_acl); + } else { + inode->i_default_acl = NULL; } if (acl) { if (!error) error = __f2fs_set_acl(inode, ACL_TYPE_ACCESS, acl, ipage); posix_acl_release(acl); + } else { + inode->i_acl = NULL; } return error; From e95ef46b0d135eba8e15f9aee140f71bd83c8714 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Wed, 5 Sep 2018 14:54:01 +0800 Subject: [PATCH 199/812] f2fs: fix memory leak of write_io in fill_super() It needs to release memory allocated for sbi->write_io in error path, otherwise, it will cause memory leak. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/super.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 51888dcabb35..c96aa5f3ee80 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -2879,7 +2879,7 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent) GFP_KERNEL); if (!sbi->write_io[i]) { err = -ENOMEM; - goto free_options; + goto free_bio_info; } for (j = HOT; j < n; j++) { From 9c051754e9eaf69424f1c0df2c61a9b1a5d0f7c7 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Wed, 5 Sep 2018 14:54:02 +0800 Subject: [PATCH 200/812] f2fs: fix memory leak of percpu counter in fill_super() In fill_super -> init_percpu_info, we should destroy percpu counter in error path, otherwise memory allcoated for percpu counter will leak. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/super.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index c96aa5f3ee80..14ac38a1dbe7 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -2462,8 +2462,12 @@ static int init_percpu_info(struct f2fs_sb_info *sbi) if (err) return err; - return percpu_counter_init(&sbi->total_valid_inode_count, 0, + err = percpu_counter_init(&sbi->total_valid_inode_count, 0, GFP_KERNEL); + if (err) + percpu_counter_destroy(&sbi->alloc_valid_block_count); + + return err; } #ifdef CONFIG_BLK_DEV_ZONED From 99255f120022d82457d8d71b526a92b7f37f4dd2 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Thu, 6 Sep 2018 20:34:12 +0800 Subject: [PATCH 201/812] f2fs: fix to do sanity check with current segment number https://bugzilla.kernel.org/show_bug.cgi?id=200219 Reproduction way: - mount image - run poc code - umount image F2FS-fs (loop1): Bitmap was wrongly set, blk:15364 ------------[ cut here ]------------ kernel BUG at /home/yuchao/git/devf2fs/segment.c:2061! invalid opcode: 0000 [#1] PREEMPT SMP CPU: 2 PID: 17686 Comm: umount Tainted: G W O 4.18.0-rc2+ #39 Hardware name: innotek GmbH VirtualBox/VirtualBox, BIOS VirtualBox 12/01/2006 EIP: update_sit_entry+0x459/0x4e0 [f2fs] Code: e8 1c b5 fd ff 0f 0b 0f 0b 8b 45 e4 c7 44 24 08 9c 7a 6c f8 c7 44 24 04 bc 4a 6c f8 89 44 24 0c 8b 06 89 04 24 e8 f7 b4 fd ff <0f> 0b 8b 45 e4 0f b6 d2 89 54 24 10 c7 44 24 08 60 7a 6c f8 c7 44 EAX: 00000032 EBX: 000000f8 ECX: 00000002 EDX: 00000001 ESI: d7177000 EDI: f520fe68 EBP: d6477c6c ESP: d6477c34 DS: 007b ES: 007b FS: 00d8 GS: 00e0 SS: 0068 EFLAGS: 00010282 CR0: 80050033 CR2: b7fbe000 CR3: 2a99b3c0 CR4: 000406f0 Call Trace: f2fs_allocate_data_block+0x124/0x580 [f2fs] do_write_page+0x78/0x150 [f2fs] f2fs_do_write_node_page+0x25/0xa0 [f2fs] __write_node_page+0x2bf/0x550 [f2fs] f2fs_sync_node_pages+0x60e/0x6d0 [f2fs] ? sync_inode_metadata+0x2f/0x40 ? f2fs_write_checkpoint+0x28f/0x7d0 [f2fs] ? up_write+0x1e/0x80 f2fs_write_checkpoint+0x2a9/0x7d0 [f2fs] ? mark_held_locks+0x5d/0x80 ? _raw_spin_unlock_irq+0x27/0x50 kill_f2fs_super+0x68/0x90 [f2fs] deactivate_locked_super+0x3d/0x70 deactivate_super+0x40/0x60 cleanup_mnt+0x39/0x70 __cleanup_mnt+0x10/0x20 task_work_run+0x81/0xa0 exit_to_usermode_loop+0x59/0xa7 do_fast_syscall_32+0x1f5/0x22c entry_SYSENTER_32+0x53/0x86 EIP: 0xb7f95c51 Code: c1 1e f7 ff ff 89 e5 8b 55 08 85 d2 8b 81 64 cd ff ff 74 02 89 02 5d c3 8b 0c 24 c3 8b 1c 24 c3 90 51 52 55 89 e5 0f 34 cd 80 <5d> 5a 59 c3 90 90 90 90 8d 76 00 58 b8 77 00 00 00 cd 80 90 8d 76 EAX: 00000000 EBX: 0871ab90 ECX: bfb2cd00 EDX: 00000000 ESI: 00000000 EDI: 0871ab90 EBP: 0871ab90 ESP: bfb2cd7c DS: 007b ES: 007b FS: 0000 GS: 0033 SS: 007b EFLAGS: 00000246 Modules linked in: f2fs(O) crc32_generic bnep rfcomm bluetooth ecdh_generic snd_intel8x0 snd_ac97_codec ac97_bus snd_pcm snd_seq_midi snd_seq_midi_event snd_rawmidi snd_seq pcbc joydev aesni_intel snd_seq_device aes_i586 snd_timer crypto_simd snd cryptd soundcore mac_hid serio_raw video i2c_piix4 parport_pc ppdev lp parport hid_generic psmouse usbhid hid e1000 [last unloaded: f2fs] ---[ end trace d423f83982cfcdc5 ]--- The reason is, different log headers using the same segment, once one log's next block address is used by another log, it will cause panic as above. Main area: 24 segs, 24 secs 24 zones - COLD data: 0, 0, 0 - WARM data: 1, 1, 1 - HOT data: 20, 20, 20 - Dir dnode: 22, 22, 22 - File dnode: 22, 22, 22 - Indir nodes: 21, 21, 21 So this patch adds sanity check to detect such condition to avoid this issue. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/super.c | 34 +++++++++++++++++++++++++++++++++- 1 file changed, 33 insertions(+), 1 deletion(-) diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 14ac38a1dbe7..2d50b4f9f1d2 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -2329,7 +2329,7 @@ int f2fs_sanity_check_ckpt(struct f2fs_sb_info *sbi) unsigned int segment_count_main; unsigned int cp_pack_start_sum, cp_payload; block_t user_block_count; - int i; + int i, j; total = le32_to_cpu(raw_super->segment_count); fsmeta = le32_to_cpu(raw_super->segment_count_ckpt); @@ -2370,11 +2370,43 @@ int f2fs_sanity_check_ckpt(struct f2fs_sb_info *sbi) if (le32_to_cpu(ckpt->cur_node_segno[i]) >= main_segs || le16_to_cpu(ckpt->cur_node_blkoff[i]) >= blocks_per_seg) return 1; + for (j = i + 1; j < NR_CURSEG_NODE_TYPE; j++) { + if (le32_to_cpu(ckpt->cur_node_segno[i]) == + le32_to_cpu(ckpt->cur_node_segno[j])) { + f2fs_msg(sbi->sb, KERN_ERR, + "Node segment (%u, %u) has the same " + "segno: %u", i, j, + le32_to_cpu(ckpt->cur_node_segno[i])); + return 1; + } + } } for (i = 0; i < NR_CURSEG_DATA_TYPE; i++) { if (le32_to_cpu(ckpt->cur_data_segno[i]) >= main_segs || le16_to_cpu(ckpt->cur_data_blkoff[i]) >= blocks_per_seg) return 1; + for (j = i + 1; j < NR_CURSEG_DATA_TYPE; j++) { + if (le32_to_cpu(ckpt->cur_data_segno[i]) == + le32_to_cpu(ckpt->cur_data_segno[j])) { + f2fs_msg(sbi->sb, KERN_ERR, + "Data segment (%u, %u) has the same " + "segno: %u", i, j, + le32_to_cpu(ckpt->cur_data_segno[i])); + return 1; + } + } + } + for (i = 0; i < NR_CURSEG_NODE_TYPE; i++) { + for (j = i; j < NR_CURSEG_DATA_TYPE; j++) { + if (le32_to_cpu(ckpt->cur_node_segno[i]) == + le32_to_cpu(ckpt->cur_data_segno[j])) { + f2fs_msg(sbi->sb, KERN_ERR, + "Data segment (%u) and Data segment (%u)" + " has the same segno: %u", i, j, + le32_to_cpu(ckpt->cur_node_segno[i])); + return 1; + } + } } sit_bitmap_size = le32_to_cpu(ckpt->sit_ver_bitmap_bytesize); From cdbab19b219f61f218b62c072cacf460c569e234 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Fri, 7 Sep 2018 19:49:07 +0800 Subject: [PATCH 202/812] f2fs: plug readahead IO in readdir() Add a plug to merge readahead IO in readdir(), expecting it can reduce bio count before submitting to block layer. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/dir.c | 22 ++++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c index 086639556705..2f2c29b6f5f9 100644 --- a/fs/f2fs/dir.c +++ b/fs/f2fs/dir.c @@ -784,9 +784,15 @@ int f2fs_fill_dentries(struct dir_context *ctx, struct f2fs_dentry_ptr *d, struct f2fs_dir_entry *de = NULL; struct fscrypt_str de_name = FSTR_INIT(NULL, 0); struct f2fs_sb_info *sbi = F2FS_I_SB(d->inode); + struct blk_plug plug; + bool readdir_ra = sbi->readdir_ra == 1; + int err = 0; bit_pos = ((unsigned long)ctx->pos % d->max); + if (readdir_ra) + blk_start_plug(&plug); + while (bit_pos < d->max) { bit_pos = find_next_bit_le(d->bitmap, d->max, bit_pos); if (bit_pos >= d->max) @@ -806,29 +812,33 @@ int f2fs_fill_dentries(struct dir_context *ctx, struct f2fs_dentry_ptr *d, if (f2fs_encrypted_inode(d->inode)) { int save_len = fstr->len; - int err; err = fscrypt_fname_disk_to_usr(d->inode, (u32)de->hash_code, 0, &de_name, fstr); if (err) - return err; + goto out; de_name = *fstr; fstr->len = save_len; } if (!dir_emit(ctx, de_name.name, de_name.len, - le32_to_cpu(de->ino), d_type)) - return 1; + le32_to_cpu(de->ino), d_type)) { + err = 1; + goto out; + } - if (sbi->readdir_ra == 1) + if (readdir_ra) f2fs_ra_node_page(sbi, le32_to_cpu(de->ino)); bit_pos += GET_DENTRY_SLOTS(le16_to_cpu(de->name_len)); ctx->pos = start_pos + bit_pos; } - return 0; +out: + if (readdir_ra) + blk_finish_plug(&plug); + return err; } static int f2fs_readdir(struct file *file, struct dir_context *ctx) From 3a365d71681a1b389337efefab99f3ae30061ab5 Mon Sep 17 00:00:00 2001 From: Zhikang Zhang Date: Mon, 10 Sep 2018 16:18:25 +0800 Subject: [PATCH 203/812] f2fs: avoid sleeping under spin_lock In the call trace below, we might sleep in function dput(). So in order to avoid sleeping under spin_lock, we remove f2fs_mark_inode_dirty_sync from __try_update_largest_extent && __drop_largest_extent. BUG: sleeping function called from invalid context at fs/dcache.c:796 Call trace: dump_backtrace+0x0/0x3f4 show_stack+0x24/0x30 dump_stack+0xe0/0x138 ___might_sleep+0x2a8/0x2c8 __might_sleep+0x78/0x10c dput+0x7c/0x750 block_dump___mark_inode_dirty+0x120/0x17c __mark_inode_dirty+0x344/0x11f0 f2fs_mark_inode_dirty_sync+0x40/0x50 __insert_extent_tree+0x2e0/0x2f4 f2fs_update_extent_tree_range+0xcf4/0xde8 f2fs_update_extent_cache+0x114/0x12c f2fs_update_data_blkaddr+0x40/0x50 write_data_page+0x150/0x314 do_write_data_page+0x648/0x2318 __write_data_page+0xdb4/0x1640 f2fs_write_cache_pages+0x768/0xafc __f2fs_write_data_pages+0x590/0x1218 f2fs_write_data_pages+0x64/0x74 do_writepages+0x74/0xe4 __writeback_single_inode+0xdc/0x15f0 writeback_sb_inodes+0x574/0xc98 __writeback_inodes_wb+0x190/0x204 wb_writeback+0x730/0xf14 wb_check_old_data_flush+0x1bc/0x1c8 wb_workfn+0x554/0xf74 process_one_work+0x440/0x118c worker_thread+0xac/0x974 kthread+0x1a0/0x1c8 ret_from_fork+0x10/0x1c Signed-off-by: Zhikang Zhang Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/extent_cache.c | 51 ++++++++++++++++++++++++++---------------- fs/f2fs/f2fs.h | 7 +++--- 2 files changed, 36 insertions(+), 22 deletions(-) diff --git a/fs/f2fs/extent_cache.c b/fs/f2fs/extent_cache.c index 231b77ef5a53..a70cd2580eae 100644 --- a/fs/f2fs/extent_cache.c +++ b/fs/f2fs/extent_cache.c @@ -308,14 +308,13 @@ static unsigned int __free_extent_tree(struct f2fs_sb_info *sbi, return count - atomic_read(&et->node_cnt); } -static void __drop_largest_extent(struct inode *inode, +static void __drop_largest_extent(struct extent_tree *et, pgoff_t fofs, unsigned int len) { - struct extent_info *largest = &F2FS_I(inode)->extent_tree->largest; - - if (fofs < largest->fofs + largest->len && fofs + len > largest->fofs) { - largest->len = 0; - f2fs_mark_inode_dirty_sync(inode, true); + if (fofs < et->largest.fofs + et->largest.len && + fofs + len > et->largest.fofs) { + et->largest.len = 0; + et->largest_updated = true; } } @@ -416,12 +415,11 @@ static bool f2fs_lookup_extent_tree(struct inode *inode, pgoff_t pgofs, return ret; } -static struct extent_node *__try_merge_extent_node(struct inode *inode, +static struct extent_node *__try_merge_extent_node(struct f2fs_sb_info *sbi, struct extent_tree *et, struct extent_info *ei, struct extent_node *prev_ex, struct extent_node *next_ex) { - struct f2fs_sb_info *sbi = F2FS_I_SB(inode); struct extent_node *en = NULL; if (prev_ex && __is_back_mergeable(ei, &prev_ex->ei)) { @@ -443,7 +441,7 @@ static struct extent_node *__try_merge_extent_node(struct inode *inode, if (!en) return NULL; - __try_update_largest_extent(inode, et, en); + __try_update_largest_extent(et, en); spin_lock(&sbi->extent_lock); if (!list_empty(&en->list)) { @@ -454,12 +452,11 @@ static struct extent_node *__try_merge_extent_node(struct inode *inode, return en; } -static struct extent_node *__insert_extent_tree(struct inode *inode, +static struct extent_node *__insert_extent_tree(struct f2fs_sb_info *sbi, struct extent_tree *et, struct extent_info *ei, struct rb_node **insert_p, struct rb_node *insert_parent) { - struct f2fs_sb_info *sbi = F2FS_I_SB(inode); struct rb_node **p; struct rb_node *parent = NULL; struct extent_node *en = NULL; @@ -476,7 +473,7 @@ static struct extent_node *__insert_extent_tree(struct inode *inode, if (!en) return NULL; - __try_update_largest_extent(inode, et, en); + __try_update_largest_extent(et, en); /* update in global extent list */ spin_lock(&sbi->extent_lock); @@ -497,6 +494,7 @@ static void f2fs_update_extent_tree_range(struct inode *inode, struct rb_node **insert_p = NULL, *insert_parent = NULL; unsigned int end = fofs + len; unsigned int pos = (unsigned int)fofs; + bool updated = false; if (!et) return; @@ -517,7 +515,7 @@ static void f2fs_update_extent_tree_range(struct inode *inode, * drop largest extent before lookup, in case it's already * been shrunk from extent tree */ - __drop_largest_extent(inode, fofs, len); + __drop_largest_extent(et, fofs, len); /* 1. lookup first extent node in range [fofs, fofs + len - 1] */ en = (struct extent_node *)f2fs_lookup_rb_tree_ret(&et->root, @@ -550,7 +548,7 @@ static void f2fs_update_extent_tree_range(struct inode *inode, set_extent_info(&ei, end, end - dei.fofs + dei.blk, org_end - end); - en1 = __insert_extent_tree(inode, et, &ei, + en1 = __insert_extent_tree(sbi, et, &ei, NULL, NULL); next_en = en1; } else { @@ -570,7 +568,7 @@ static void f2fs_update_extent_tree_range(struct inode *inode, } if (parts) - __try_update_largest_extent(inode, et, en); + __try_update_largest_extent(et, en); else __release_extent_node(sbi, et, en); @@ -590,15 +588,16 @@ static void f2fs_update_extent_tree_range(struct inode *inode, if (blkaddr) { set_extent_info(&ei, fofs, blkaddr, len); - if (!__try_merge_extent_node(inode, et, &ei, prev_en, next_en)) - __insert_extent_tree(inode, et, &ei, + if (!__try_merge_extent_node(sbi, et, &ei, prev_en, next_en)) + __insert_extent_tree(sbi, et, &ei, insert_p, insert_parent); /* give up extent_cache, if split and small updates happen */ if (dei.len >= 1 && prev.len < F2FS_MIN_EXTENT_LEN && et->largest.len < F2FS_MIN_EXTENT_LEN) { - __drop_largest_extent(inode, 0, UINT_MAX); + et->largest.len = 0; + et->largest_updated = true; set_inode_flag(inode, FI_NO_EXTENT); } } @@ -606,7 +605,15 @@ static void f2fs_update_extent_tree_range(struct inode *inode, if (is_inode_flag_set(inode, FI_NO_EXTENT)) __free_extent_tree(sbi, et); + if (et->largest_updated) { + et->largest_updated = false; + updated = true; + } + write_unlock(&et->lock); + + if (updated) + f2fs_mark_inode_dirty_sync(inode, true); } unsigned int f2fs_shrink_extent_tree(struct f2fs_sb_info *sbi, int nr_shrink) @@ -705,6 +712,7 @@ void f2fs_drop_extent_tree(struct inode *inode) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); struct extent_tree *et = F2FS_I(inode)->extent_tree; + bool updated = false; if (!f2fs_may_extent_tree(inode)) return; @@ -713,8 +721,13 @@ void f2fs_drop_extent_tree(struct inode *inode) write_lock(&et->lock); __free_extent_tree(sbi, et); - __drop_largest_extent(inode, 0, UINT_MAX); + if (et->largest.len) { + et->largest.len = 0; + updated = true; + } write_unlock(&et->lock); + if (updated) + f2fs_mark_inode_dirty_sync(inode, true); } void f2fs_destroy_extent_tree(struct inode *inode) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 2ace2b36a2d3..c533a0fd357b 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -637,6 +637,7 @@ struct extent_tree { struct list_head list; /* to be used by sbi->zombie_list */ rwlock_t lock; /* protect extent info rb-tree */ atomic_t node_cnt; /* # of extent node in rb-tree*/ + bool largest_updated; /* largest extent updated */ }; /* @@ -819,12 +820,12 @@ static inline bool __is_front_mergeable(struct extent_info *cur, } extern void f2fs_mark_inode_dirty_sync(struct inode *inode, bool sync); -static inline void __try_update_largest_extent(struct inode *inode, - struct extent_tree *et, struct extent_node *en) +static inline void __try_update_largest_extent(struct extent_tree *et, + struct extent_node *en) { if (en->ei.len > et->largest.len) { et->largest = en->ei; - f2fs_mark_inode_dirty_sync(inode, true); + et->largest_updated = true; } } From c4503a88feddedb22232ad48fc15698f4bd087b8 Mon Sep 17 00:00:00 2001 From: Chengguang Xu Date: Wed, 12 Sep 2018 13:32:52 +0800 Subject: [PATCH 204/812] f2fs: surround fault_injection related option parsing using CONFIG_F2FS_FAULT_INJECTION It's a little bit strange when fault_injection related options fail with -EINVAL which were already disabled from config, so surround all fault_injection related option parsing code using CONFIG_F2FS_FAULT_INJECTION. Meanwhile, slightly change warning message to keep consistency with option POSIX_ACL and FS_XATTR. Signed-off-by: Chengguang Xu Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/super.c | 23 +++++++++++++---------- 1 file changed, 13 insertions(+), 10 deletions(-) diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 2d50b4f9f1d2..cbc485b5967f 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -594,28 +594,31 @@ static int parse_options(struct super_block *sb, char *options) } F2FS_OPTION(sbi).write_io_size_bits = arg; break; +#ifdef CONFIG_F2FS_FAULT_INJECTION case Opt_fault_injection: if (args->from && match_int(args, &arg)) return -EINVAL; -#ifdef CONFIG_F2FS_FAULT_INJECTION f2fs_build_fault_attr(sbi, arg, F2FS_ALL_FAULT_TYPE); set_opt(sbi, FAULT_INJECTION); -#else - f2fs_msg(sb, KERN_INFO, - "FAULT_INJECTION was not selected"); -#endif break; + case Opt_fault_type: if (args->from && match_int(args, &arg)) return -EINVAL; -#ifdef CONFIG_F2FS_FAULT_INJECTION f2fs_build_fault_attr(sbi, 0, arg); set_opt(sbi, FAULT_INJECTION); -#else - f2fs_msg(sb, KERN_INFO, - "FAULT_INJECTION was not selected"); -#endif break; +#else + case Opt_fault_injection: + f2fs_msg(sb, KERN_INFO, + "fault_injection options not supported"); + break; + + case Opt_fault_type: + f2fs_msg(sb, KERN_INFO, + "fault_type options not supported"); + break; +#endif case Opt_lazytime: sb->s_flags |= MS_LAZYTIME; break; From 5d020af76e6de17167293ded5b1b37bd766228f3 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Wed, 12 Sep 2018 09:16:07 +0800 Subject: [PATCH 205/812] f2fs: add SPDX license identifiers Remove the verbose license text from f2fs files and replace them with SPDX tags. This does not change the license of any of the code. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/acl.c | 5 +---- fs/f2fs/acl.h | 5 +---- fs/f2fs/checkpoint.c | 5 +---- fs/f2fs/data.c | 5 +---- fs/f2fs/debug.c | 5 +---- fs/f2fs/dir.c | 5 +---- fs/f2fs/extent_cache.c | 5 +---- fs/f2fs/f2fs.h | 5 +---- fs/f2fs/file.c | 5 +---- fs/f2fs/gc.c | 5 +---- fs/f2fs/gc.h | 5 +---- fs/f2fs/hash.c | 5 +---- fs/f2fs/inline.c | 4 +--- fs/f2fs/inode.c | 5 +---- fs/f2fs/namei.c | 5 +---- fs/f2fs/node.c | 5 +---- fs/f2fs/node.h | 5 +---- fs/f2fs/recovery.c | 5 +---- fs/f2fs/segment.c | 5 +---- fs/f2fs/segment.h | 5 +---- fs/f2fs/shrinker.c | 5 +---- fs/f2fs/super.c | 5 +---- fs/f2fs/sysfs.c | 5 +---- fs/f2fs/trace.c | 5 +---- fs/f2fs/trace.h | 5 +---- fs/f2fs/xattr.c | 5 +---- fs/f2fs/xattr.h | 5 +---- include/linux/f2fs_fs.h | 5 +---- 28 files changed, 28 insertions(+), 111 deletions(-) diff --git a/fs/f2fs/acl.c b/fs/f2fs/acl.c index 409537cbf4d3..f82f916e1f2c 100644 --- a/fs/f2fs/acl.c +++ b/fs/f2fs/acl.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * fs/f2fs/acl.c * @@ -7,10 +8,6 @@ * Portions of this code from linux/fs/ext2/acl.c * * Copyright (C) 2001-2003 Andreas Gruenbacher, - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. */ #include #include "f2fs.h" diff --git a/fs/f2fs/acl.h b/fs/f2fs/acl.h index 2c685185c24d..b96823c59b15 100644 --- a/fs/f2fs/acl.h +++ b/fs/f2fs/acl.h @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * fs/f2fs/acl.h * @@ -7,10 +8,6 @@ * Portions of this code from linux/fs/ext2/acl.h * * Copyright (C) 2001-2003 Andreas Gruenbacher, - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. */ #ifndef __F2FS_ACL_H__ #define __F2FS_ACL_H__ diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index 646d3497bde7..5a16a4ddbc01 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -1,12 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0 /* * fs/f2fs/checkpoint.c * * Copyright (c) 2012 Samsung Electronics Co., Ltd. * http://www.samsung.com/ - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. */ #include #include diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index ae22394a969c..d95a8fa00e44 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -1,12 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0 /* * fs/f2fs/data.c * * Copyright (c) 2012 Samsung Electronics Co., Ltd. * http://www.samsung.com/ - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. */ #include #include diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c index ebe649d9793c..d3c402183e3c 100644 --- a/fs/f2fs/debug.c +++ b/fs/f2fs/debug.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * f2fs debugging statistics * @@ -5,10 +6,6 @@ * http://www.samsung.com/ * Copyright (c) 2012 Linux Foundation * Copyright (c) 2012 Greg Kroah-Hartman - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. */ #include diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c index 2f2c29b6f5f9..a3302e2888dc 100644 --- a/fs/f2fs/dir.c +++ b/fs/f2fs/dir.c @@ -1,12 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0 /* * fs/f2fs/dir.c * * Copyright (c) 2012 Samsung Electronics Co., Ltd. * http://www.samsung.com/ - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. */ #include #include diff --git a/fs/f2fs/extent_cache.c b/fs/f2fs/extent_cache.c index a70cd2580eae..904ad7ba5a45 100644 --- a/fs/f2fs/extent_cache.c +++ b/fs/f2fs/extent_cache.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * f2fs extent cache support * @@ -5,10 +6,6 @@ * Copyright (c) 2015 Samsung Electronics * Authors: Jaegeuk Kim * Chao Yu - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. */ #include diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index c533a0fd357b..c96766c267fb 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1,12 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0 /* * fs/f2fs/f2fs.h * * Copyright (c) 2012 Samsung Electronics Co., Ltd. * http://www.samsung.com/ - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. */ #ifndef _LINUX_F2FS_H #define _LINUX_F2FS_H diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index c774e32d5ebb..ce0b32031294 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -1,12 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0 /* * fs/f2fs/file.c * * Copyright (c) 2012 Samsung Electronics Co., Ltd. * http://www.samsung.com/ - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. */ #include #include diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index ada8b8056cd0..7e3f323254ef 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -1,12 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0 /* * fs/f2fs/gc.c * * Copyright (c) 2012 Samsung Electronics Co., Ltd. * http://www.samsung.com/ - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. */ #include #include diff --git a/fs/f2fs/gc.h b/fs/f2fs/gc.h index c8619e408009..bbac9d3787bd 100644 --- a/fs/f2fs/gc.h +++ b/fs/f2fs/gc.h @@ -1,12 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0 /* * fs/f2fs/gc.h * * Copyright (c) 2012 Samsung Electronics Co., Ltd. * http://www.samsung.com/ - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. */ #define GC_THREAD_MIN_WB_PAGES 1 /* * a threshold to determine diff --git a/fs/f2fs/hash.c b/fs/f2fs/hash.c index eb2e031ea887..cc82f142f811 100644 --- a/fs/f2fs/hash.c +++ b/fs/f2fs/hash.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * fs/f2fs/hash.c * @@ -7,10 +8,6 @@ * Portions of this code from linux/fs/ext3/hash.c * * Copyright (C) 2002 by Theodore Ts'o - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. */ #include #include diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c index a628c747e693..8caaab0685c7 100644 --- a/fs/f2fs/inline.c +++ b/fs/f2fs/inline.c @@ -1,11 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0 /* * fs/f2fs/inline.c * Copyright (c) 2013, Intel Corporation * Authors: Huajun Li * Haicheng Li - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. */ #include diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index 292f787a65e2..63e88eac5db3 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -1,12 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0 /* * fs/f2fs/inode.c * * Copyright (c) 2012 Samsung Electronics Co., Ltd. * http://www.samsung.com/ - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. */ #include #include diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c index 49cc29df9800..82c1f8824806 100644 --- a/fs/f2fs/namei.c +++ b/fs/f2fs/namei.c @@ -1,12 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0 /* * fs/f2fs/namei.c * * Copyright (c) 2012 Samsung Electronics Co., Ltd. * http://www.samsung.com/ - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. */ #include #include diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 1af0805915b4..7d11854028e8 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -1,12 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0 /* * fs/f2fs/node.c * * Copyright (c) 2012 Samsung Electronics Co., Ltd. * http://www.samsung.com/ - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. */ #include #include diff --git a/fs/f2fs/node.h b/fs/f2fs/node.h index 0f4db7a61254..1c73d879a9bc 100644 --- a/fs/f2fs/node.h +++ b/fs/f2fs/node.h @@ -1,12 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0 /* * fs/f2fs/node.h * * Copyright (c) 2012 Samsung Electronics Co., Ltd. * http://www.samsung.com/ - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. */ /* start node id of a node block dedicated to the given node id */ #define START_NID(nid) (((nid) / NAT_ENTRY_PER_BLOCK) * NAT_ENTRY_PER_BLOCK) diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index e34ca1686e3b..4e9bcdda38df 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c @@ -1,12 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0 /* * fs/f2fs/recovery.c * * Copyright (c) 2012 Samsung Electronics Co., Ltd. * http://www.samsung.com/ - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. */ #include #include diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 20a68557e4f5..7c61edd8c11f 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -1,12 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0 /* * fs/f2fs/segment.c * * Copyright (c) 2012 Samsung Electronics Co., Ltd. * http://www.samsung.com/ - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. */ #include #include diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h index b3d9e317ff0c..086150028c6d 100644 --- a/fs/f2fs/segment.h +++ b/fs/f2fs/segment.h @@ -1,12 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0 /* * fs/f2fs/segment.h * * Copyright (c) 2012 Samsung Electronics Co., Ltd. * http://www.samsung.com/ - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. */ #include #include diff --git a/fs/f2fs/shrinker.c b/fs/f2fs/shrinker.c index 36cfd816c160..9e13db994fdf 100644 --- a/fs/f2fs/shrinker.c +++ b/fs/f2fs/shrinker.c @@ -1,13 +1,10 @@ +// SPDX-License-Identifier: GPL-2.0 /* * f2fs shrinker support * the basic infra was copied from fs/ubifs/shrinker.c * * Copyright (c) 2015 Motorola Mobility * Copyright (c) 2015 Jaegeuk Kim - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. */ #include #include diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index cbc485b5967f..f0ef16eca5e9 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -1,12 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0 /* * fs/f2fs/super.c * * Copyright (c) 2012 Samsung Electronics Co., Ltd. * http://www.samsung.com/ - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. */ #include #include diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c index 30fd016afeb3..4c88b0720cdd 100644 --- a/fs/f2fs/sysfs.c +++ b/fs/f2fs/sysfs.c @@ -1,13 +1,10 @@ +// SPDX-License-Identifier: GPL-2.0 /* * f2fs sysfs interface * * Copyright (c) 2012 Samsung Electronics Co., Ltd. * http://www.samsung.com/ * Copyright (c) 2017 Chao Yu - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. */ #include #include diff --git a/fs/f2fs/trace.c b/fs/f2fs/trace.c index a1fcd00bbb2b..ce2a5eb210b6 100644 --- a/fs/f2fs/trace.c +++ b/fs/f2fs/trace.c @@ -1,12 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0 /* * f2fs IO tracer * * Copyright (c) 2014 Motorola Mobility * Copyright (c) 2014 Jaegeuk Kim - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. */ #include #include diff --git a/fs/f2fs/trace.h b/fs/f2fs/trace.h index 67db24ac1e85..e8075fc5b228 100644 --- a/fs/f2fs/trace.h +++ b/fs/f2fs/trace.h @@ -1,12 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0 /* * f2fs IO tracer * * Copyright (c) 2014 Motorola Mobility * Copyright (c) 2014 Jaegeuk Kim - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. */ #ifndef __F2FS_TRACE_H__ #define __F2FS_TRACE_H__ diff --git a/fs/f2fs/xattr.c b/fs/f2fs/xattr.c index 152078bb4829..c7c3c2a63a85 100644 --- a/fs/f2fs/xattr.c +++ b/fs/f2fs/xattr.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * fs/f2fs/xattr.c * @@ -13,10 +14,6 @@ * suggestion of Luka Renko . * xattr consolidation Copyright (c) 2004 James Morris , * Red Hat Inc. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. */ #include #include diff --git a/fs/f2fs/xattr.h b/fs/f2fs/xattr.h index 08a4840d6d7d..595d2af00a58 100644 --- a/fs/f2fs/xattr.h +++ b/fs/f2fs/xattr.h @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * fs/f2fs/xattr.h * @@ -9,10 +10,6 @@ * On-disk format of extended attributes for the ext2 filesystem. * * (C) 2001 Andreas Gruenbacher, - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. */ #ifndef __F2FS_XATTR_H__ #define __F2FS_XATTR_H__ diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h index 8e6a18582566..8b2b48bb53ff 100644 --- a/include/linux/f2fs_fs.h +++ b/include/linux/f2fs_fs.h @@ -1,12 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0 /** * include/linux/f2fs_fs.h * * Copyright (c) 2012 Samsung Electronics Co., Ltd. * http://www.samsung.com/ - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. */ #ifndef _LINUX_F2FS_FS_H #define _LINUX_F2FS_FS_H From f1aab15eb34071ada2405437f8b4aa273c0d93fb Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Wed, 12 Sep 2018 09:22:29 +0800 Subject: [PATCH 206/812] f2fs: split IO error injection according to RW This patch adds to support injecting error for write IO, this can simulate IO error like fail_make_request or dm_flakey does. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- Documentation/filesystems/f2fs.txt | 3 ++- fs/f2fs/data.c | 9 +++++++-- fs/f2fs/f2fs.h | 3 ++- fs/f2fs/super.c | 3 ++- 4 files changed, 13 insertions(+), 5 deletions(-) diff --git a/Documentation/filesystems/f2fs.txt b/Documentation/filesystems/f2fs.txt index 0c8bdd38cefd..c32641476f5e 100644 --- a/Documentation/filesystems/f2fs.txt +++ b/Documentation/filesystems/f2fs.txt @@ -172,9 +172,10 @@ fault_type=%d Support configuring fault injection type, should be FAULT_DIR_DEPTH 0x000000100 FAULT_EVICT_INODE 0x000000200 FAULT_TRUNCATE 0x000000400 - FAULT_IO 0x000000800 + FAULT_READ_IO 0x000000800 FAULT_CHECKPOINT 0x000001000 FAULT_DISCARD 0x000002000 + FAULT_WRITE_IO 0x000004000 mode=%s Control block allocation mode which supports "adaptive" and "lfs". In "lfs" mode, there should be no random writes towards main area. diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index d95a8fa00e44..0d76ff25c437 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -122,8 +122,8 @@ static bool f2fs_bio_post_read_required(struct bio *bio) static void f2fs_read_end_io(struct bio *bio) { - if (time_to_inject(F2FS_P_SB(bio->bi_io_vec->bv_page), FAULT_IO)) { - f2fs_show_injection_info(FAULT_IO); + if (time_to_inject(F2FS_P_SB(bio->bi_io_vec->bv_page), FAULT_READ_IO)) { + f2fs_show_injection_info(FAULT_READ_IO); bio->bi_error = -EIO; } @@ -144,6 +144,11 @@ static void f2fs_write_end_io(struct bio *bio) struct bio_vec *bvec; int i; + if (time_to_inject(sbi, FAULT_WRITE_IO)) { + f2fs_show_injection_info(FAULT_WRITE_IO); + bio->bi_error = -EIO; + } + bio_for_each_segment_all(bvec, bio, i) { struct page *page = bvec->bv_page; enum count_type type = WB_DATA_TYPE(page); diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index c96766c267fb..5580f21e35ae 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -52,9 +52,10 @@ enum { FAULT_DIR_DEPTH, FAULT_EVICT_INODE, FAULT_TRUNCATE, - FAULT_IO, + FAULT_READ_IO, FAULT_CHECKPOINT, FAULT_DISCARD, + FAULT_WRITE_IO, FAULT_MAX, }; diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index f0ef16eca5e9..0bd53c8be22d 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -50,9 +50,10 @@ char *f2fs_fault_name[FAULT_MAX] = { [FAULT_DIR_DEPTH] = "too big dir depth", [FAULT_EVICT_INODE] = "evict_inode fail", [FAULT_TRUNCATE] = "truncate fail", - [FAULT_IO] = "IO error", + [FAULT_READ_IO] = "read IO error", [FAULT_CHECKPOINT] = "checkpoint error", [FAULT_DISCARD] = "discard error", + [FAULT_WRITE_IO] = "write IO error", }; void f2fs_build_fault_attr(struct f2fs_sb_info *sbi, unsigned int rate, From 47c4cfebd60f9e001bb50bb266a6899777a806c3 Mon Sep 17 00:00:00 2001 From: Sahitya Tummala Date: Wed, 19 Sep 2018 14:18:47 +0530 Subject: [PATCH 207/812] f2fs: add new idle interval timing for discard and gc paths This helps to control the frequency of submission of discard and GC requests independently, based on the need. Suggested-by: Chao Yu Signed-off-by: Sahitya Tummala Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- Documentation/ABI/testing/sysfs-fs-f2fs | 17 +++++++++++++- fs/f2fs/f2fs.h | 30 ++++++++++++++++++++++--- fs/f2fs/gc.c | 2 +- fs/f2fs/segment.c | 14 +++++------- fs/f2fs/super.c | 2 ++ fs/f2fs/sysfs.c | 5 +++++ 6 files changed, 56 insertions(+), 14 deletions(-) diff --git a/Documentation/ABI/testing/sysfs-fs-f2fs b/Documentation/ABI/testing/sysfs-fs-f2fs index 3bbb9fe9548c..6b5e31f470dc 100644 --- a/Documentation/ABI/testing/sysfs-fs-f2fs +++ b/Documentation/ABI/testing/sysfs-fs-f2fs @@ -121,7 +121,22 @@ What: /sys/fs/f2fs//idle_interval Date: January 2016 Contact: "Jaegeuk Kim" Description: - Controls the idle timing. + Controls the idle timing for all paths other than + discard and gc path. + +What: /sys/fs/f2fs//discard_idle_interval +Date: September 2018 +Contact: "Chao Yu" +Contact: "Sahitya Tummala" +Description: + Controls the idle timing for discard path. + +What: /sys/fs/f2fs//gc_idle_interval +Date: September 2018 +Contact: "Chao Yu" +Contact: "Sahitya Tummala" +Description: + Controls the idle timing for gc path. What: /sys/fs/f2fs//iostat_enable Date: August 2017 diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 5580f21e35ae..9f4a1a22f0d7 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1158,6 +1158,8 @@ enum { enum { CP_TIME, REQ_TIME, + DISCARD_TIME, + GC_TIME, MAX_TIME, }; @@ -1409,7 +1411,15 @@ static inline bool time_to_inject(struct f2fs_sb_info *sbi, int type) static inline void f2fs_update_time(struct f2fs_sb_info *sbi, int type) { - sbi->last_time[type] = jiffies; + unsigned long now = jiffies; + + sbi->last_time[type] = now; + + /* DISCARD_TIME and GC_TIME are based on REQ_TIME */ + if (type == REQ_TIME) { + sbi->last_time[DISCARD_TIME] = now; + sbi->last_time[GC_TIME] = now; + } } static inline bool f2fs_time_over(struct f2fs_sb_info *sbi, int type) @@ -1419,7 +1429,21 @@ static inline bool f2fs_time_over(struct f2fs_sb_info *sbi, int type) return time_after(jiffies, sbi->last_time[type] + interval); } -static inline bool is_idle(struct f2fs_sb_info *sbi) +static inline unsigned int f2fs_time_to_wait(struct f2fs_sb_info *sbi, + int type) +{ + unsigned long interval = sbi->interval_time[type] * HZ; + unsigned int wait_ms = 0; + long delta; + + delta = (sbi->last_time[type] + interval) - jiffies; + if (delta > 0) + wait_ms = jiffies_to_msecs(delta); + + return wait_ms; +} + +static inline bool is_idle(struct f2fs_sb_info *sbi, int type) { struct block_device *bdev = sbi->sb->s_bdev; struct request_queue *q = bdev_get_queue(bdev); @@ -1428,7 +1452,7 @@ static inline bool is_idle(struct f2fs_sb_info *sbi) if (rl->count[BLK_RW_SYNC] || rl->count[BLK_RW_ASYNC]) return false; - return f2fs_time_over(sbi, REQ_TIME); + return f2fs_time_over(sbi, type); } /* diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index 7e3f323254ef..5c80146a578e 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -80,7 +80,7 @@ static int gc_thread_func(void *data) if (!mutex_trylock(&sbi->gc_mutex)) goto next; - if (!is_idle(sbi)) { + if (!is_idle(sbi, GC_TIME)) { increase_sleep_time(gc_th, &wait_ms); mutex_unlock(&sbi->gc_mutex); goto next; diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 7c61edd8c11f..384185e3a2f4 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -508,7 +508,7 @@ void f2fs_balance_fs_bg(struct f2fs_sb_info *sbi) else f2fs_build_free_nids(sbi, false, false); - if (!is_idle(sbi) && + if (!is_idle(sbi, REQ_TIME) && (!excess_dirty_nats(sbi) && !excess_dirty_nodes(sbi))) return; @@ -1388,7 +1388,7 @@ static unsigned int __issue_discard_cmd_orderly(struct f2fs_sb_info *sbi, if (dc->state != D_PREP) goto next; - if (dpolicy->io_aware && !is_idle(sbi)) { + if (dpolicy->io_aware && !is_idle(sbi, DISCARD_TIME)) { io_interrupted = true; break; } @@ -1448,7 +1448,7 @@ static int __issue_discard_cmd(struct f2fs_sb_info *sbi, f2fs_bug_on(sbi, dc->state != D_PREP); if (dpolicy->io_aware && i < dpolicy->io_aware_gran && - !is_idle(sbi)) { + !is_idle(sbi, DISCARD_TIME)) { io_interrupted = true; break; } @@ -1641,8 +1641,6 @@ static int issue_discard_thread(void *data) struct discard_policy dpolicy; unsigned int wait_ms = DEF_MIN_DISCARD_ISSUE_TIME; int issued; - unsigned long interval = sbi->interval_time[REQ_TIME] * HZ; - long delta; set_freezable(); @@ -1679,10 +1677,8 @@ static int issue_discard_thread(void *data) __wait_all_discard_cmd(sbi, &dpolicy); wait_ms = dpolicy.min_interval; } else if (issued == -1){ - delta = (sbi->last_time[REQ_TIME] + interval) - jiffies; - if (delta > 0) - wait_ms = jiffies_to_msecs(delta); - else + wait_ms = f2fs_time_to_wait(sbi, DISCARD_TIME); + if (!wait_ms) wait_ms = dpolicy.mid_interval; } else { wait_ms = dpolicy.max_interval; diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 0bd53c8be22d..5ddfa0d0b386 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -2465,6 +2465,8 @@ static void init_sb_info(struct f2fs_sb_info *sbi) sbi->dir_level = DEF_DIR_LEVEL; sbi->interval_time[CP_TIME] = DEF_CP_INTERVAL; sbi->interval_time[REQ_TIME] = DEF_IDLE_INTERVAL; + sbi->interval_time[DISCARD_TIME] = DEF_IDLE_INTERVAL; + sbi->interval_time[GC_TIME] = DEF_IDLE_INTERVAL; clear_sbi_flag(sbi, SBI_NEED_FSCK); for (i = 0; i < NR_COUNT_TYPE; i++) diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c index 4c88b0720cdd..5a252d78d178 100644 --- a/fs/f2fs/sysfs.c +++ b/fs/f2fs/sysfs.c @@ -404,6 +404,9 @@ F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, max_victim_search, max_victim_search); F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, dir_level, dir_level); F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, cp_interval, interval_time[CP_TIME]); F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, idle_interval, interval_time[REQ_TIME]); +F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, discard_idle_interval, + interval_time[DISCARD_TIME]); +F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, gc_idle_interval, interval_time[GC_TIME]); F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, iostat_enable, iostat_enable); F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, readdir_ra, readdir_ra); F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, gc_pin_file_thresh, gc_pin_file_threshold); @@ -457,6 +460,8 @@ static struct attribute *f2fs_attrs[] = { ATTR_LIST(dirty_nats_ratio), ATTR_LIST(cp_interval), ATTR_LIST(idle_interval), + ATTR_LIST(discard_idle_interval), + ATTR_LIST(gc_idle_interval), ATTR_LIST(iostat_enable), ATTR_LIST(readdir_ra), ATTR_LIST(gc_pin_file_thresh), From fa0b9799279721b5e8db54ef334fb6dda62563ce Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Wed, 19 Sep 2018 15:45:19 -0700 Subject: [PATCH 208/812] f2fs: avoid infinite loop in f2fs_alloc_nid If we have an error in f2fs_build_free_nids, we're able to fall into a loop to find free nids. Suggested-by: Chao Yu Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/node.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 7d11854028e8..4dc98f5f2e84 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -2353,8 +2353,9 @@ bool f2fs_alloc_nid(struct f2fs_sb_info *sbi, nid_t *nid) spin_unlock(&nm_i->nid_list_lock); /* Let's scan nat pages and its caches to get free nids */ - f2fs_build_free_nids(sbi, true, false); - goto retry; + if (!f2fs_build_free_nids(sbi, true, false)) + goto retry; + return false; } /* From 6502c983b93d6b033529259ca3e29896c26863f0 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Thu, 20 Sep 2018 17:41:30 +0800 Subject: [PATCH 209/812] f2fs: fix to recover inode's uid/gid during POR Step to reproduce this bug: 1. logon as root 2. mount -t f2fs /dev/sdd /mnt; 3. touch /mnt/file; 4. chown system /mnt/file; chgrp system /mnt/file; 5. xfs_io -f /mnt/file -c "fsync"; 6. godown /mnt; 7. umount /mnt; 8. mount -t f2fs /dev/sdd /mnt; After step 8) we will expect file's uid/gid are all system, but during recovery, these two fields were not been recovered, fix it. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/recovery.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index 4e9bcdda38df..a990a49a0723 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c @@ -209,6 +209,8 @@ static void recover_inode(struct inode *inode, struct page *page) char *name; inode->i_mode = le16_to_cpu(raw->i_mode); + i_uid_write(inode, le32_to_cpu(raw->i_uid)); + i_gid_write(inode, le32_to_cpu(raw->i_gid)); f2fs_i_size_write(inode, le64_to_cpu(raw->i_size)); inode->i_atime.tv_sec = le64_to_cpu(raw->i_atime); inode->i_ctime.tv_sec = le64_to_cpu(raw->i_ctime); From bf3a5ea5c868f9a5fa6f313aba46e98c52b54895 Mon Sep 17 00:00:00 2001 From: Chengguang Xu Date: Sat, 22 Sep 2018 22:43:09 +0800 Subject: [PATCH 210/812] f2fs: fix remount problem of option io_bits Currently we show mount option "io_bits=%u" as "io_size=%uKB", it will cause option parsing problem(unrecognized mount option) in remount. Signed-off-by: Chengguang Xu Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/super.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 5ddfa0d0b386..24c390dbed1a 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -1331,7 +1331,8 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root) from_kgid_munged(&init_user_ns, F2FS_OPTION(sbi).s_resgid)); if (F2FS_IO_SIZE_BITS(sbi)) - seq_printf(seq, ",io_size=%uKB", F2FS_IO_SIZE_KB(sbi)); + seq_printf(seq, ",io_bits=%u", + F2FS_OPTION(sbi).write_io_size_bits); #ifdef CONFIG_F2FS_FAULT_INJECTION if (test_opt(sbi, FAULT_INJECTION)) { seq_printf(seq, ",fault_injection=%u", From 80b258f2a6c58bdf444848daaf009c6ddcc983b6 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Mon, 17 Sep 2018 13:25:04 -0700 Subject: [PATCH 211/812] f2fs: report ENOENT correctly in f2fs_rename This fixes wrong error report in f2fs_rename. Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/namei.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c index 82c1f8824806..f85241dc8140 100644 --- a/fs/f2fs/namei.c +++ b/fs/f2fs/namei.c @@ -821,7 +821,7 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry, struct f2fs_dir_entry *old_entry; struct f2fs_dir_entry *new_entry; bool is_old_inline = f2fs_has_inline_dentry(old_dir); - int err = -ENOENT; + int err; if (unlikely(f2fs_cp_error(sbi))) return -EIO; @@ -845,6 +845,7 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry, goto out; } + err = -ENOENT; old_entry = f2fs_find_entry(old_dir, &old_dentry->d_name, &old_page); if (!old_entry) { if (IS_ERR(old_page)) @@ -1010,7 +1011,7 @@ static int f2fs_cross_rename(struct inode *old_dir, struct dentry *old_dentry, struct f2fs_dir_entry *old_dir_entry = NULL, *new_dir_entry = NULL; struct f2fs_dir_entry *old_entry, *new_entry; int old_nlink = 0, new_nlink = 0; - int err = -ENOENT; + int err; if (unlikely(f2fs_cp_error(sbi))) return -EIO; @@ -1031,6 +1032,7 @@ static int f2fs_cross_rename(struct inode *old_dir, struct dentry *old_dentry, if (err) goto out; + err = -ENOENT; old_entry = f2fs_find_entry(old_dir, &old_dentry->d_name, &old_page); if (!old_entry) { if (IS_ERR(old_page)) From 202de66f20d1b349d0360e89272aa81346785a27 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Wed, 19 Sep 2018 15:28:40 -0700 Subject: [PATCH 212/812] f2fs: update i_size after DIO completion This is related to ee70daaba82d ("xfs: update i_size after unwritten conversion in dio completion") If we update i_size during dio_write, dio_read can read out stale data, which breaks xfstests/465. Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 15 +++++++-------- fs/f2fs/f2fs.h | 1 + 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 0d76ff25c437..739417e335ae 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -878,7 +878,6 @@ static int __allocate_data_block(struct dnode_of_data *dn, int seg_type) struct f2fs_summary sum; struct node_info ni; block_t old_blkaddr; - pgoff_t fofs; blkcnt_t count = 1; int err; @@ -907,12 +906,10 @@ static int __allocate_data_block(struct dnode_of_data *dn, int seg_type) old_blkaddr, old_blkaddr); f2fs_set_data_blkaddr(dn); - /* update i_size */ - fofs = f2fs_start_bidx_of_node(ofs_of_node(dn->node_page), dn->inode) + - dn->ofs_in_node; - if (i_size_read(dn->inode) < ((loff_t)(fofs + 1) << PAGE_SHIFT)) - f2fs_i_size_write(dn->inode, - ((loff_t)(fofs + 1) << PAGE_SHIFT)); + /* + * i_size will be updated by direct_IO. Otherwise, we'll get stale + * data from unwritten block via dio_read. + */ return 0; } @@ -1078,6 +1075,8 @@ int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map, last_ofs_in_node = dn.ofs_in_node; } } else { + WARN_ON(flag != F2FS_GET_BLOCK_PRE_DIO && + flag != F2FS_GET_BLOCK_DIO); err = __allocate_data_block(&dn, map->m_seg_type); if (!err) @@ -1257,7 +1256,7 @@ static int get_data_block_dio(struct inode *inode, sector_t iblock, struct buffer_head *bh_result, int create) { return __get_data_block(inode, iblock, bh_result, create, - F2FS_GET_BLOCK_DEFAULT, NULL, + F2FS_GET_BLOCK_DIO, NULL, f2fs_rw_hint_to_seg_type( inode->i_write_hint)); } diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 9f4a1a22f0d7..5850b3f1300f 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -664,6 +664,7 @@ enum { F2FS_GET_BLOCK_DEFAULT, F2FS_GET_BLOCK_FIEMAP, F2FS_GET_BLOCK_BMAP, + F2FS_GET_BLOCK_DIO, F2FS_GET_BLOCK_PRE_DIO, F2FS_GET_BLOCK_PRE_AIO, F2FS_GET_BLOCK_PRECACHE, From 01ca8a368ff7bd8a74eb2b6c229a7ac2ed42372b Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Tue, 25 Sep 2018 15:35:58 +0800 Subject: [PATCH 213/812] f2fs: fix to recover inode's project id during POR Testcase to reproduce this bug: 1. mkfs.f2fs -O extra_attr -O project_quota /dev/sdd 2. mount -t f2fs /dev/sdd /mnt/f2fs 3. touch /mnt/f2fs/file 4. sync 5. chattr -p 1 /mnt/f2fs/file 6. xfs_io -f /mnt/f2fs/file -c "fsync" 7. godown /mnt/f2fs 8. umount /mnt/f2fs 9. mount -t f2fs /dev/sdd /mnt/f2fs 10. lsattr -p /mnt/f2fs/file 0 -----------------N- /mnt/f2fs/file But actually, we expect the correct result is: 1 -----------------N- /mnt/f2fs/file The reason is we didn't recover inode.i_projid field during mount, fix it. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/recovery.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index a990a49a0723..7bb254956110 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c @@ -211,6 +211,19 @@ static void recover_inode(struct inode *inode, struct page *page) inode->i_mode = le16_to_cpu(raw->i_mode); i_uid_write(inode, le32_to_cpu(raw->i_uid)); i_gid_write(inode, le32_to_cpu(raw->i_gid)); + + if (raw->i_inline & F2FS_EXTRA_ATTR) { + if (f2fs_sb_has_project_quota(F2FS_I_SB(inode)->sb) && + F2FS_FITS_IN_INODE(raw, le16_to_cpu(raw->i_extra_isize), + i_projid)) { + projid_t i_projid; + + i_projid = (projid_t)le32_to_cpu(raw->i_projid); + F2FS_I(inode)->i_projid = + make_kprojid(&init_user_ns, i_projid); + } + } + f2fs_i_size_write(inode, le64_to_cpu(raw->i_size)); inode->i_atime.tv_sec = le64_to_cpu(raw->i_atime); inode->i_ctime.tv_sec = le64_to_cpu(raw->i_ctime); From a19aff4b0517357a26d0f849a6455c0a946b600c Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Tue, 25 Sep 2018 15:35:59 +0800 Subject: [PATCH 214/812] f2fs: fix to recover inode's i_flags during POR Testcase to reproduce this bug: 1. mkfs.f2fs /dev/sdd 2. mount -t f2fs /dev/sdd /mnt/f2fs 3. touch /mnt/f2fs/file 4. sync 5. chattr +A /mnt/f2fs/file 6. xfs_io -f /mnt/f2fs/file -c "fsync" 7. godown /mnt/f2fs 8. umount /mnt/f2fs 9. mount -t f2fs /dev/sdd /mnt/f2fs 10. lsattr /mnt/f2fs/file -----------------N- /mnt/f2fs/file But actually, we expect the corrct result is: -------A---------N- /mnt/f2fs/file The reason is we didn't recover inode.i_flags field during mount, fix it. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/recovery.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index 7bb254956110..c0d110d26656 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c @@ -233,6 +233,7 @@ static void recover_inode(struct inode *inode, struct page *page) inode->i_mtime.tv_nsec = le32_to_cpu(raw->i_mtime_nsec); F2FS_I(inode)->i_advise = raw->i_advise; + F2FS_I(inode)->i_flags = le32_to_cpu(raw->i_flags); recover_inline_flags(inode, raw); From 9dd0bfb443446971e63c2f9a1cb4bc9d51081f7d Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Tue, 25 Sep 2018 15:36:00 +0800 Subject: [PATCH 215/812] f2fs: fix to recover inode's i_gc_failures during POR inode.i_gc_failures is used to indicate that skip count of migrating on blocks of inode, we should guarantee it can be recovered in sudden power-off case. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/recovery.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index c0d110d26656..14c156a256ca 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c @@ -234,6 +234,8 @@ static void recover_inode(struct inode *inode, struct page *page) F2FS_I(inode)->i_advise = raw->i_advise; F2FS_I(inode)->i_flags = le32_to_cpu(raw->i_flags); + F2FS_I(inode)->i_gc_failures[GC_FAILURE_PIN] = + le16_to_cpu(raw->i_gc_failures); recover_inline_flags(inode, raw); From 2238c3783157606539f7872e6499b847324835fc Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Tue, 25 Sep 2018 15:36:01 +0800 Subject: [PATCH 216/812] f2fs: fix to recover inode's crtime during POR Testcase to reproduce this bug: 1. mkfs.f2fs -O extra_attr -O inode_crtime /dev/sdd 2. mount -t f2fs /dev/sdd /mnt/f2fs 3. touch /mnt/f2fs/file 4. xfs_io -f /mnt/f2fs/file -c "fsync" 5. godown /mnt/f2fs 6. umount /mnt/f2fs 7. mount -t f2fs /dev/sdd /mnt/f2fs 8. xfs_io -f /mnt/f2fs/file -c "statx -r" stat.btime.tv_sec = 0 stat.btime.tv_nsec = 0 This patch fixes to recover inode creation time fields during mount. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/node.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 4dc98f5f2e84..0706116c6ea4 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -2561,6 +2561,13 @@ int f2fs_recover_inode_page(struct f2fs_sb_info *sbi, struct page *page) F2FS_FITS_IN_INODE(src, le16_to_cpu(src->i_extra_isize), i_projid)) dst->i_projid = src->i_projid; + + if (f2fs_sb_has_inode_crtime(sbi->sb) && + F2FS_FITS_IN_INODE(src, le16_to_cpu(src->i_extra_isize), + i_crtime_nsec)) { + dst->i_crtime = src->i_crtime; + dst->i_crtime_nsec = src->i_crtime_nsec; + } } new_ni = old_ni; From 4b4f4610ed661ec3666282acc2eced3a2807756d Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Tue, 25 Sep 2018 15:36:03 +0800 Subject: [PATCH 217/812] f2fs: mark inode dirty explicitly in recover_inode() Mark inode dirty explicitly in the end of recover_inode() to make sure that all recoverable fields can be persisted later. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/recovery.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index 14c156a256ca..6682c8d86a51 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c @@ -239,6 +239,8 @@ static void recover_inode(struct inode *inode, struct page *page) recover_inline_flags(inode, raw); + f2fs_mark_inode_dirty_sync(inode, true); + if (file_enc_name(inode)) name = ""; else From 614fe66d2715fbb6e3f73e12d89816cf2e7bc3a9 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Mon, 17 Sep 2018 17:36:06 -0700 Subject: [PATCH 218/812] f2fs: avoid f2fs_bug_on if f2fs_get_meta_page_nofail got EIO This patch avoids BUG_ON when f2fs_get_meta_page_nofail got EIO during xfstests/generic/475. Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/checkpoint.c | 10 +++++----- fs/f2fs/f2fs.h | 2 +- fs/f2fs/gc.c | 12 ++++++++++++ fs/f2fs/node.c | 32 ++++++++++++++++++++++++-------- fs/f2fs/recovery.c | 2 ++ fs/f2fs/segment.c | 3 +++ 6 files changed, 47 insertions(+), 14 deletions(-) diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index 5a16a4ddbc01..6c24f543e2d8 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -119,11 +119,8 @@ struct page *f2fs_get_meta_page_nofail(struct f2fs_sb_info *sbi, pgoff_t index) if (PTR_ERR(page) == -EIO && ++count <= DEFAULT_RETRY_IO_COUNT) goto retry; - f2fs_stop_checkpoint(sbi, false); - f2fs_bug_on(sbi, 1); } - return page; } @@ -1497,7 +1494,10 @@ int f2fs_write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) ckpt->checkpoint_ver = cpu_to_le64(++ckpt_ver); /* write cached NAT/SIT entries to NAT/SIT area */ - f2fs_flush_nat_entries(sbi, cpc); + err = f2fs_flush_nat_entries(sbi, cpc); + if (err) + goto stop; + f2fs_flush_sit_entries(sbi, cpc); /* unlock all the fs_lock[] in do_checkpoint() */ @@ -1506,7 +1506,7 @@ int f2fs_write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) f2fs_release_discard_addrs(sbi); else f2fs_clear_prefree_segments(sbi, cpc); - +stop: unblock_operations(sbi); stat_inc_cp_count(sbi->stat_info); diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 5850b3f1300f..c356629f5089 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -2996,7 +2996,7 @@ int f2fs_recover_xattr_data(struct inode *inode, struct page *page); int f2fs_recover_inode_page(struct f2fs_sb_info *sbi, struct page *page); int f2fs_restore_node_summary(struct f2fs_sb_info *sbi, unsigned int segno, struct f2fs_summary_block *sum); -void f2fs_flush_nat_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc); +int f2fs_flush_nat_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc); int f2fs_build_node_manager(struct f2fs_sb_info *sbi); void f2fs_destroy_node_manager(struct f2fs_sb_info *sbi); int __init f2fs_create_node_manager_caches(void); diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index 5c80146a578e..7e8bde2dd279 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -1066,6 +1066,18 @@ static int do_garbage_collect(struct f2fs_sb_info *sbi, /* reference all summary page */ while (segno < end_segno) { sum_page = f2fs_get_sum_page(sbi, segno++); + if (IS_ERR(sum_page)) { + int err = PTR_ERR(sum_page); + + end_segno = segno - 1; + for (segno = start_segno; segno < end_segno; segno++) { + sum_page = find_get_page(META_MAPPING(sbi), + GET_SUM_BLOCK(sbi, segno)); + f2fs_put_page(sum_page, 0); + f2fs_put_page(sum_page, 0); + } + return err; + } unlock_page(sum_page); } diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 0706116c6ea4..9a03c2844c93 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -126,6 +126,8 @@ static struct page *get_next_nat_page(struct f2fs_sb_info *sbi, nid_t nid) /* get current nat block page with lock */ src_page = get_current_nat_page(sbi, nid); + if (IS_ERR(src_page)) + return src_page; dst_page = f2fs_grab_meta_page(sbi, dst_off); f2fs_bug_on(sbi, PageDirty(src_page)); @@ -2268,15 +2270,19 @@ static int __f2fs_build_free_nids(struct f2fs_sb_info *sbi, nm_i->nat_block_bitmap)) { struct page *page = get_current_nat_page(sbi, nid); - ret = scan_nat_page(sbi, page, nid); - f2fs_put_page(page, 1); + if (IS_ERR(page)) { + ret = PTR_ERR(page); + } else { + ret = scan_nat_page(sbi, page, nid); + f2fs_put_page(page, 1); + } if (ret) { up_read(&nm_i->nat_tree_lock); f2fs_bug_on(sbi, !mount); f2fs_msg(sbi->sb, KERN_ERR, "NAT is corrupt, run fsck to fix it"); - return -EINVAL; + return ret; } } @@ -2711,7 +2717,7 @@ static void __update_nat_bits(struct f2fs_sb_info *sbi, nid_t start_nid, __clear_bit_le(nat_index, nm_i->full_nat_bits); } -static void __flush_nat_entry_set(struct f2fs_sb_info *sbi, +static int __flush_nat_entry_set(struct f2fs_sb_info *sbi, struct nat_entry_set *set, struct cp_control *cpc) { struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA); @@ -2735,6 +2741,9 @@ static void __flush_nat_entry_set(struct f2fs_sb_info *sbi, down_write(&curseg->journal_rwsem); } else { page = get_next_nat_page(sbi, start_nid); + if (IS_ERR(page)) + return PTR_ERR(page); + nat_blk = page_address(page); f2fs_bug_on(sbi, !nat_blk); } @@ -2780,12 +2789,13 @@ static void __flush_nat_entry_set(struct f2fs_sb_info *sbi, radix_tree_delete(&NM_I(sbi)->nat_set_root, set->set); kmem_cache_free(nat_entry_set_slab, set); } + return 0; } /* * This function is called during the checkpointing process. */ -void f2fs_flush_nat_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc) +int f2fs_flush_nat_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc) { struct f2fs_nm_info *nm_i = NM_I(sbi); struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA); @@ -2795,6 +2805,7 @@ void f2fs_flush_nat_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc) unsigned int found; nid_t set_idx = 0; LIST_HEAD(sets); + int err = 0; /* during unmount, let's flush nat_bits before checking dirty_nat_cnt */ if (enabled_nat_bits(sbi, cpc)) { @@ -2804,7 +2815,7 @@ void f2fs_flush_nat_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc) } if (!nm_i->dirty_nat_cnt) - return; + return 0; down_write(&nm_i->nat_tree_lock); @@ -2827,11 +2838,16 @@ void f2fs_flush_nat_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc) } /* flush dirty nats in nat entry set */ - list_for_each_entry_safe(set, tmp, &sets, set_list) - __flush_nat_entry_set(sbi, set, cpc); + list_for_each_entry_safe(set, tmp, &sets, set_list) { + err = __flush_nat_entry_set(sbi, set, cpc); + if (err) + break; + } up_write(&nm_i->nat_tree_lock); /* Allow dirty nats by node block allocation in write_begin */ + + return err; } static int __get_nat_bitmaps(struct f2fs_sb_info *sbi) diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index 6682c8d86a51..b069b7d7403d 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c @@ -375,6 +375,8 @@ static int check_index_in_prev_nodes(struct f2fs_sb_info *sbi, } sum_page = f2fs_get_sum_page(sbi, segno); + if (IS_ERR(sum_page)) + return PTR_ERR(sum_page); sum_node = (struct f2fs_summary_block *)page_address(sum_page); sum = sum_node->entries[blkoff]; f2fs_put_page(sum_page, 1); diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 384185e3a2f4..2e9c6dd183b6 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -2509,6 +2509,7 @@ static void change_curseg(struct f2fs_sb_info *sbi, int type) __next_free_blkoff(sbi, curseg, 0); sum_page = f2fs_get_sum_page(sbi, new_segno); + f2fs_bug_on(sbi, IS_ERR(sum_page)); sum_node = (struct f2fs_summary_block *)page_address(sum_page); memcpy(curseg->sum_blk, sum_node, SUM_ENTRY_SIZE); f2fs_put_page(sum_page, 1); @@ -3984,6 +3985,8 @@ static int build_sit_entries(struct f2fs_sb_info *sbi) se = &sit_i->sentries[start]; page = get_current_sit_page(sbi, start); + if (IS_ERR(page)) + return PTR_ERR(page); sit_blk = (struct f2fs_sit_block *)page_address(page); sit = sit_blk->entries[SIT_ENTRY_OFFSET(sit_i, start)]; f2fs_put_page(page, 1); From a3b89f58fc863aab72afee892ab24fc3a2fd550b Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Tue, 25 Sep 2018 15:25:21 -0700 Subject: [PATCH 219/812] f2fs: return correct errno in f2fs_gc This fixes overriding error number in f2fs_gc. Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/gc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index 7e8bde2dd279..ff2708bde48d 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -1253,7 +1253,7 @@ int f2fs_gc(struct f2fs_sb_info *sbi, bool sync, put_gc_inode(&gc_list); - if (sync) + if (sync && !ret) ret = sec_freed ? 0 : -EAGAIN; return ret; } From 129674a584f4bfcf3e69d4fad6614dfff1906f55 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Thu, 27 Sep 2018 22:15:31 -0700 Subject: [PATCH 220/812] f2fs: fix missing up_read This patch fixes missing up_read call. Fixes: c9b60788fc76 ("f2fs: fix to do sanity check with block address in main area") Cc: # 4.19+ Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/node.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 9a03c2844c93..bde71a04cf95 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -1541,8 +1541,10 @@ static int __write_node_page(struct page *page, bool atomic, bool *submitted, } if (__is_valid_data_blkaddr(ni.blk_addr) && - !f2fs_is_valid_blkaddr(sbi, ni.blk_addr, DATA_GENERIC)) + !f2fs_is_valid_blkaddr(sbi, ni.blk_addr, DATA_GENERIC)) { + up_read(&sbi->node_write); goto redirty_out; + } if (atomic && !test_opt(sbi, NOBARRIER)) fio.op_flags |= WRITE_FLUSH_FUA; From 092a13da9c4edd8c0782004febb3af1d40a80800 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Fri, 28 Sep 2018 00:24:39 -0700 Subject: [PATCH 221/812] f2fs: keep lazytime on remount This patch fixes losing lazytime when remounting f2fs. Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/super.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 24c390dbed1a..f30a6b67b7b4 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -1551,6 +1551,7 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data) (test_opt(sbi, POSIX_ACL) ? MS_POSIXACL : 0); limit_reserve_root(sbi); + *flags = (*flags & ~MS_LAZYTIME) | (sb->s_flags & MS_LAZYTIME); return 0; restore_gc: if (need_restart_gc) { From fb32c954b6a90f029382e4f35434fb071fdf682f Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Sat, 29 Sep 2018 18:31:27 +0800 Subject: [PATCH 222/812] f2fs: add to account meta IO This patch supports to account meta IO, it enables to show write IO from f2fs more comprehensively via 'status' debugfs entry. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/debug.c | 13 +++++++++++++ fs/f2fs/f2fs.h | 15 +++++++++++++++ fs/f2fs/segment.c | 1 + 3 files changed, 29 insertions(+) diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c index d3c402183e3c..da1cabbc4973 100644 --- a/fs/f2fs/debug.c +++ b/fs/f2fs/debug.c @@ -118,6 +118,9 @@ static void update_general_status(struct f2fs_sb_info *sbi) si->curzone[i] = GET_ZONE_FROM_SEC(sbi, si->cursec[i]); } + for (i = META_CP; i < META_MAX; i++) + si->meta_count[i] = atomic_read(&sbi->meta_count[i]); + for (i = 0; i < 2; i++) { si->segment_count[i] = sbi->segment_count[i]; si->block_count[i] = sbi->block_count[i]; @@ -329,6 +332,13 @@ static int stat_show(struct seq_file *s, void *v) si->prefree_count, si->free_segs, si->free_secs); seq_printf(s, "CP calls: %d (BG: %d)\n", si->cp_count, si->bg_cp_count); + seq_printf(s, " - cp blocks : %u\n", si->meta_count[META_CP]); + seq_printf(s, " - sit blocks : %u\n", + si->meta_count[META_SIT]); + seq_printf(s, " - nat blocks : %u\n", + si->meta_count[META_NAT]); + seq_printf(s, " - ssa blocks : %u\n", + si->meta_count[META_SSA]); seq_printf(s, "GC calls: %d (BG: %d)\n", si->call_count, si->bg_gc); seq_printf(s, " - data segments : %d (%d)\n", @@ -441,6 +451,7 @@ int f2fs_build_stats(struct f2fs_sb_info *sbi) { struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi); struct f2fs_stat_info *si; + int i; si = f2fs_kzalloc(sbi, sizeof(struct f2fs_stat_info), GFP_KERNEL); if (!si) @@ -466,6 +477,8 @@ int f2fs_build_stats(struct f2fs_sb_info *sbi) atomic_set(&sbi->inline_inode, 0); atomic_set(&sbi->inline_dir, 0); atomic_set(&sbi->inplace_count, 0); + for (i = META_CP; i < META_MAX; i++) + atomic_set(&sbi->meta_count[i], 0); atomic_set(&sbi->aw_cnt, 0); atomic_set(&sbi->vw_cnt, 0); diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index c356629f5089..ba94df0c117a 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -269,6 +269,7 @@ enum { META_NAT, META_SIT, META_SSA, + META_MAX, META_POR, DATA_GENERIC, META_GENERIC, @@ -1325,6 +1326,7 @@ struct f2fs_sb_info { */ #ifdef CONFIG_F2FS_STAT_FS struct f2fs_stat_info *stat_info; /* FS status information */ + atomic_t meta_count[META_MAX]; /* # of meta blocks */ unsigned int segment_count[2]; /* # of allocated segments */ unsigned int block_count[2]; /* # of allocated blocks */ atomic_t inplace_count; /* # of inplace update */ @@ -3208,6 +3210,7 @@ struct f2fs_stat_info { int cursec[NR_CURSEG_TYPE]; int curzone[NR_CURSEG_TYPE]; + unsigned int meta_count[META_MAX]; unsigned int segment_count[2]; unsigned int block_count[2]; unsigned int inplace_count; @@ -3259,6 +3262,17 @@ static inline struct f2fs_stat_info *F2FS_STAT(struct f2fs_sb_info *sbi) if (f2fs_has_inline_dentry(inode)) \ (atomic_dec(&F2FS_I_SB(inode)->inline_dir)); \ } while (0) +#define stat_inc_meta_count(sbi, blkaddr) \ + do { \ + if (blkaddr < SIT_I(sbi)->sit_base_addr) \ + atomic_inc(&(sbi)->meta_count[META_CP]); \ + else if (blkaddr < NM_I(sbi)->nat_blkaddr) \ + atomic_inc(&(sbi)->meta_count[META_SIT]); \ + else if (blkaddr < SM_I(sbi)->ssa_blkaddr) \ + atomic_inc(&(sbi)->meta_count[META_NAT]); \ + else if (blkaddr < SM_I(sbi)->main_blkaddr) \ + atomic_inc(&(sbi)->meta_count[META_SSA]); \ + } while (0) #define stat_inc_seg_type(sbi, curseg) \ ((sbi)->segment_count[(curseg)->alloc_type]++) #define stat_inc_block_count(sbi, curseg) \ @@ -3346,6 +3360,7 @@ void f2fs_destroy_root_stats(void); #define stat_inc_volatile_write(inode) do { } while (0) #define stat_dec_volatile_write(inode) do { } while (0) #define stat_update_max_volatile_write(inode) do { } while (0) +#define stat_inc_meta_count(sbi, blkaddr) do { } while (0) #define stat_inc_seg_type(sbi, curseg) do { } while (0) #define stat_inc_block_count(sbi, curseg) do { } while (0) #define stat_inc_inplace_blocks(sbi) do { } while (0) diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 2e9c6dd183b6..841ba43fc02b 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -3099,6 +3099,7 @@ void f2fs_do_write_meta_page(struct f2fs_sb_info *sbi, struct page *page, ClearPageError(page); f2fs_submit_page_write(&fio); + stat_inc_meta_count(sbi, page->index); f2fs_update_iostat(sbi, io_type, F2FS_BLKSIZE); } From c95f10ed9fe0adbb785b2572d47bd3631c7584a5 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Sat, 29 Sep 2018 18:31:28 +0800 Subject: [PATCH 223/812] f2fs: add to account skip count of background GC This patch adds to account skip count of background GC, and show stat info via 'status' debugfs entry. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/debug.c | 4 ++++ fs/f2fs/f2fs.h | 7 +++++++ fs/f2fs/gc.c | 14 +++++++++++--- 3 files changed, 22 insertions(+), 3 deletions(-) diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c index da1cabbc4973..75bc62edc4c1 100644 --- a/fs/f2fs/debug.c +++ b/fs/f2fs/debug.c @@ -101,6 +101,8 @@ static void update_general_status(struct f2fs_sb_info *sbi) si->avail_nids = NM_I(sbi)->available_nids; si->alloc_nids = NM_I(sbi)->nid_cnt[PREALLOC_NID]; si->bg_gc = sbi->bg_gc; + si->io_skip_bggc = sbi->io_skip_bggc; + si->other_skip_bggc = sbi->other_skip_bggc; si->skipped_atomic_files[BG_GC] = sbi->skipped_atomic_files[BG_GC]; si->skipped_atomic_files[FG_GC] = sbi->skipped_atomic_files[FG_GC]; si->util_free = (int)(free_user_blocks(sbi) >> sbi->log_blocks_per_seg) @@ -355,6 +357,8 @@ static int stat_show(struct seq_file *s, void *v) si->skipped_atomic_files[BG_GC] + si->skipped_atomic_files[FG_GC], si->skipped_atomic_files[BG_GC]); + seq_printf(s, "BG skip : IO: %u, Other: %u\n", + si->io_skip_bggc, si->other_skip_bggc); seq_puts(s, "\nExtent Cache:\n"); seq_printf(s, " - Hit Count: L1-1:%llu L1-2:%llu L2:%llu\n", si->hit_largest, si->hit_cached, diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index ba94df0c117a..0df8903dfa92 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1342,6 +1342,8 @@ struct f2fs_sb_info { atomic_t max_aw_cnt; /* max # of atomic writes */ atomic_t max_vw_cnt; /* max # of volatile writes */ int bg_gc; /* background gc calls */ + unsigned int io_skip_bggc; /* skip background gc for in-flight IO */ + unsigned int other_skip_bggc; /* skip background gc for other reasons */ unsigned int ndirty_inode[NR_INODE_TYPE]; /* # of dirty inodes */ #endif spinlock_t stat_lock; /* lock for stat operations */ @@ -3189,6 +3191,7 @@ struct f2fs_stat_info { int free_nids, avail_nids, alloc_nids; int total_count, utilization; int bg_gc, nr_wb_cp_data, nr_wb_data; + unsigned int io_skip_bggc, other_skip_bggc; int nr_flushing, nr_flushed, flush_list_empty; int nr_discarding, nr_discarded; int nr_discard_cmd; @@ -3226,6 +3229,8 @@ static inline struct f2fs_stat_info *F2FS_STAT(struct f2fs_sb_info *sbi) #define stat_inc_bg_cp_count(si) ((si)->bg_cp_count++) #define stat_inc_call_count(si) ((si)->call_count++) #define stat_inc_bggc_count(sbi) ((sbi)->bg_gc++) +#define stat_io_skip_bggc_count(sbi) ((sbi)->io_skip_bggc++) +#define stat_other_skip_bggc_count(sbi) ((sbi)->other_skip_bggc++) #define stat_inc_dirty_inode(sbi, type) ((sbi)->ndirty_inode[type]++) #define stat_dec_dirty_inode(sbi, type) ((sbi)->ndirty_inode[type]--) #define stat_inc_total_hit(sbi) (atomic64_inc(&(sbi)->total_hit_ext)) @@ -3342,6 +3347,8 @@ void f2fs_destroy_root_stats(void); #define stat_inc_bg_cp_count(si) do { } while (0) #define stat_inc_call_count(si) do { } while (0) #define stat_inc_bggc_count(si) do { } while (0) +#define stat_io_skip_bggc_count(sbi) do { } while (0) +#define stat_other_skip_bggc_count(sbi) do { } while (0) #define stat_inc_dirty_inode(sbi, type) do { } while (0) #define stat_dec_dirty_inode(sbi, type) do { } while (0) #define stat_inc_total_hit(sb) do { } while (0) diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index ff2708bde48d..4db16f7c6858 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -40,13 +40,16 @@ static int gc_thread_func(void *data) if (gc_th->gc_wake) gc_th->gc_wake = 0; - if (try_to_freeze()) + if (try_to_freeze()) { + stat_other_skip_bggc_count(sbi); continue; + } if (kthread_should_stop()) break; if (sbi->sb->s_writers.frozen >= SB_FREEZE_WRITE) { increase_sleep_time(gc_th, &wait_ms); + stat_other_skip_bggc_count(sbi); continue; } @@ -55,8 +58,10 @@ static int gc_thread_func(void *data) f2fs_stop_checkpoint(sbi, false); } - if (!sb_start_write_trylock(sbi->sb)) + if (!sb_start_write_trylock(sbi->sb)) { + stat_other_skip_bggc_count(sbi); continue; + } /* * [GC triggering condition] @@ -77,12 +82,15 @@ static int gc_thread_func(void *data) goto do_gc; } - if (!mutex_trylock(&sbi->gc_mutex)) + if (!mutex_trylock(&sbi->gc_mutex)) { + stat_other_skip_bggc_count(sbi); goto next; + } if (!is_idle(sbi, GC_TIME)) { increase_sleep_time(gc_th, &wait_ms); mutex_unlock(&sbi->gc_mutex); + stat_io_skip_bggc_count(sbi); goto next; } From f6a7a0346c35a6477f7b09b3a2f074e93cdafd7d Mon Sep 17 00:00:00 2001 From: Junling Zheng Date: Fri, 28 Sep 2018 20:25:56 +0800 Subject: [PATCH 224/812] f2fs: support superblock checksum Now we support crc32 checksum for superblock. Reviewed-by: Chao Yu Signed-off-by: Junling Zheng Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 2 ++ fs/f2fs/super.c | 28 ++++++++++++++++++++++++++++ fs/f2fs/sysfs.c | 7 +++++++ include/linux/f2fs_fs.h | 3 ++- 4 files changed, 39 insertions(+), 1 deletion(-) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 0df8903dfa92..4810e05d3173 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -150,6 +150,7 @@ struct f2fs_mount_info { #define F2FS_FEATURE_INODE_CRTIME 0x0100 #define F2FS_FEATURE_LOST_FOUND 0x0200 #define F2FS_FEATURE_VERITY 0x0400 /* reserved */ +#define F2FS_FEATURE_SB_CHKSUM 0x0800 #define F2FS_HAS_FEATURE(sb, mask) \ ((F2FS_SB(sb)->raw_super->feature & cpu_to_le32(mask)) != 0) @@ -3516,6 +3517,7 @@ F2FS_FEATURE_FUNCS(flexible_inline_xattr, FLEXIBLE_INLINE_XATTR); F2FS_FEATURE_FUNCS(quota_ino, QUOTA_INO); F2FS_FEATURE_FUNCS(inode_crtime, INODE_CRTIME); F2FS_FEATURE_FUNCS(lost_found, LOST_FOUND); +F2FS_FEATURE_FUNCS(sb_chksum, SB_CHKSUM); #ifdef CONFIG_BLK_DEV_ZONED static inline int get_blkz_type(struct f2fs_sb_info *sbi, diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index f30a6b67b7b4..cca2ccbf18fe 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -2182,6 +2182,26 @@ static int sanity_check_raw_super(struct f2fs_sb_info *sbi, (bh->b_data + F2FS_SUPER_OFFSET); struct super_block *sb = sbi->sb; unsigned int blocksize; + size_t crc_offset = 0; + __u32 crc = 0; + + /* Check checksum_offset and crc in superblock */ + if (le32_to_cpu(raw_super->feature) & F2FS_FEATURE_SB_CHKSUM) { + crc_offset = le32_to_cpu(raw_super->checksum_offset); + if (crc_offset != + offsetof(struct f2fs_super_block, crc)) { + f2fs_msg(sb, KERN_INFO, + "Invalid SB checksum offset: %zu", + crc_offset); + return 1; + } + crc = le32_to_cpu(raw_super->crc); + if (!f2fs_crc_valid(sbi, crc, raw_super, crc_offset)) { + f2fs_msg(sb, KERN_INFO, + "Invalid SB checksum value: %u", crc); + return 1; + } + } if (F2FS_SUPER_MAGIC != le32_to_cpu(raw_super->magic)) { f2fs_msg(sb, KERN_INFO, @@ -2639,6 +2659,7 @@ static int read_raw_super_block(struct f2fs_sb_info *sbi, int f2fs_commit_super(struct f2fs_sb_info *sbi, bool recover) { struct buffer_head *bh; + __u32 crc = 0; int err; if ((recover && f2fs_readonly(sbi->sb)) || @@ -2647,6 +2668,13 @@ int f2fs_commit_super(struct f2fs_sb_info *sbi, bool recover) return -EROFS; } + /* we should update superblock crc here */ + if (!recover && f2fs_sb_has_sb_chksum(sbi->sb)) { + crc = f2fs_crc32(sbi, F2FS_RAW_SUPER(sbi), + offsetof(struct f2fs_super_block, crc)); + F2FS_RAW_SUPER(sbi)->crc = cpu_to_le32(crc); + } + /* write back-up superblock first */ bh = sb_bread(sbi->sb, sbi->valid_super_block ? 0 : 1); if (!bh) diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c index 5a252d78d178..db89741b219b 100644 --- a/fs/f2fs/sysfs.c +++ b/fs/f2fs/sysfs.c @@ -117,6 +117,9 @@ static ssize_t features_show(struct f2fs_attr *a, if (f2fs_sb_has_lost_found(sb)) len += snprintf(buf + len, PAGE_SIZE - len, "%s%s", len ? ", " : "", "lost_found"); + if (f2fs_sb_has_sb_chksum(sb)) + len += snprintf(buf + len, PAGE_SIZE - len, "%s%s", + len ? ", " : "", "sb_checksum"); len += snprintf(buf + len, PAGE_SIZE - len, "\n"); return len; } @@ -334,6 +337,7 @@ enum feat_id { FEAT_QUOTA_INO, FEAT_INODE_CRTIME, FEAT_LOST_FOUND, + FEAT_SB_CHECKSUM, }; static ssize_t f2fs_feature_show(struct f2fs_attr *a, @@ -350,6 +354,7 @@ static ssize_t f2fs_feature_show(struct f2fs_attr *a, case FEAT_QUOTA_INO: case FEAT_INODE_CRTIME: case FEAT_LOST_FOUND: + case FEAT_SB_CHECKSUM: return snprintf(buf, PAGE_SIZE, "supported\n"); } return 0; @@ -434,6 +439,7 @@ F2FS_FEATURE_RO_ATTR(flexible_inline_xattr, FEAT_FLEXIBLE_INLINE_XATTR); F2FS_FEATURE_RO_ATTR(quota_ino, FEAT_QUOTA_INO); F2FS_FEATURE_RO_ATTR(inode_crtime, FEAT_INODE_CRTIME); F2FS_FEATURE_RO_ATTR(lost_found, FEAT_LOST_FOUND); +F2FS_FEATURE_RO_ATTR(sb_checksum, FEAT_SB_CHECKSUM); #define ATTR_LIST(name) (&f2fs_attr_##name.attr) static struct attribute *f2fs_attrs[] = { @@ -493,6 +499,7 @@ static struct attribute *f2fs_feat_attrs[] = { ATTR_LIST(quota_ino), ATTR_LIST(inode_crtime), ATTR_LIST(lost_found), + ATTR_LIST(sb_checksum), NULL, }; diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h index 8b2b48bb53ff..a5a5bd94551e 100644 --- a/include/linux/f2fs_fs.h +++ b/include/linux/f2fs_fs.h @@ -109,7 +109,8 @@ struct f2fs_super_block { struct f2fs_device devs[MAX_DEVICES]; /* device list */ __le32 qf_ino[F2FS_MAX_QUOTAS]; /* quota inode numbers */ __u8 hot_ext_count; /* # of hot file extension */ - __u8 reserved[314]; /* valid reserved region */ + __u8 reserved[310]; /* valid reserved region */ + __le32 crc; /* checksum of superblock */ } __packed; /* From dbb428fbbb59fae08b01fc14d98d320d448877e0 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Thu, 27 Sep 2018 23:41:16 +0800 Subject: [PATCH 225/812] Revert: "f2fs: check last page index in cached bio to decide submission" There is one case that we can leave bio in f2fs, result in hanging page writeback waiter. Thread A Thread B - f2fs_write_cache_pages - f2fs_submit_page_write page #0 cached in bio #0 of cold log - f2fs_submit_page_write page #1 cached in bio #1 of warm log - f2fs_write_cache_pages - f2fs_submit_page_write bio is full, submit bio #1 contain page #1 - f2fs_submit_merged_write_cond(, page #1) fail to submit bio #0 due to page #1 is not in any cached bios. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/checkpoint.c | 3 +-- fs/f2fs/data.c | 38 +++++++++++++++++++------------------- fs/f2fs/f2fs.h | 4 ++-- fs/f2fs/node.c | 11 +++++------ fs/f2fs/segment.c | 11 +++++------ 5 files changed, 32 insertions(+), 35 deletions(-) diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index 6c24f543e2d8..96ad2358ddb2 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -277,8 +277,7 @@ static int __f2fs_write_meta_page(struct page *page, dec_page_count(sbi, F2FS_DIRTY_META); if (wbc->for_reclaim) - f2fs_submit_merged_write_cond(sbi, page->mapping->host, - 0, page->index, META); + f2fs_submit_merged_write_cond(sbi, NULL, page, 0, META); unlock_page(page); diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 739417e335ae..71d7d929f324 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -319,8 +319,8 @@ static void __submit_merged_bio(struct f2fs_bio_info *io) io->bio = NULL; } -static bool __has_merged_page(struct f2fs_bio_info *io, - struct inode *inode, nid_t ino, pgoff_t idx) +static bool __has_merged_page(struct f2fs_bio_info *io, struct inode *inode, + struct page *page, nid_t ino) { struct bio_vec *bvec; struct page *target; @@ -329,7 +329,7 @@ static bool __has_merged_page(struct f2fs_bio_info *io, if (!io->bio) return false; - if (!inode && !ino) + if (!inode && !page && !ino) return true; bio_for_each_segment_all(bvec, io->bio, i) { @@ -339,11 +339,10 @@ static bool __has_merged_page(struct f2fs_bio_info *io, else target = fscrypt_control_page(bvec->bv_page); - if (idx != target->index) - continue; - if (inode && inode == target->mapping->host) return true; + if (page && page == target) + return true; if (ino && ino == ino_of_node(target)) return true; } @@ -352,7 +351,8 @@ static bool __has_merged_page(struct f2fs_bio_info *io, } static bool has_merged_page(struct f2fs_sb_info *sbi, struct inode *inode, - nid_t ino, pgoff_t idx, enum page_type type) + struct page *page, nid_t ino, + enum page_type type) { enum page_type btype = PAGE_TYPE_OF_BIO(type); enum temp_type temp; @@ -363,7 +363,7 @@ static bool has_merged_page(struct f2fs_sb_info *sbi, struct inode *inode, io = sbi->write_io[btype] + temp; down_read(&io->io_rwsem); - ret = __has_merged_page(io, inode, ino, idx); + ret = __has_merged_page(io, inode, page, ino); up_read(&io->io_rwsem); /* TODO: use HOT temp only for meta pages now. */ @@ -394,12 +394,12 @@ static void __f2fs_submit_merged_write(struct f2fs_sb_info *sbi, } static void __submit_merged_write_cond(struct f2fs_sb_info *sbi, - struct inode *inode, nid_t ino, pgoff_t idx, - enum page_type type, bool force) + struct inode *inode, struct page *page, + nid_t ino, enum page_type type, bool force) { enum temp_type temp; - if (!force && !has_merged_page(sbi, inode, ino, idx, type)) + if (!force && !has_merged_page(sbi, inode, page, ino, type)) return; for (temp = HOT; temp < NR_TEMP_TYPE; temp++) { @@ -418,10 +418,10 @@ void f2fs_submit_merged_write(struct f2fs_sb_info *sbi, enum page_type type) } void f2fs_submit_merged_write_cond(struct f2fs_sb_info *sbi, - struct inode *inode, nid_t ino, pgoff_t idx, - enum page_type type) + struct inode *inode, struct page *page, + nid_t ino, enum page_type type) { - __submit_merged_write_cond(sbi, inode, ino, idx, type, false); + __submit_merged_write_cond(sbi, inode, page, ino, type, false); } void f2fs_flush_merged_writes(struct f2fs_sb_info *sbi) @@ -1948,7 +1948,7 @@ static int __write_data_page(struct page *page, bool *submitted, ClearPageUptodate(page); if (wbc->for_reclaim) { - f2fs_submit_merged_write_cond(sbi, inode, 0, page->index, DATA); + f2fs_submit_merged_write_cond(sbi, NULL, page, 0, DATA); clear_inode_flag(inode, FI_HOT_DATA); f2fs_remove_dirty_inode(inode); submitted = NULL; @@ -2006,10 +2006,10 @@ static int f2fs_write_cache_pages(struct address_space *mapping, pgoff_t index; pgoff_t end; /* Inclusive */ pgoff_t done_index; - pgoff_t last_idx = ULONG_MAX; int cycled; int range_whole = 0; int tag; + int nwritten = 0; pagevec_init(&pvec, 0); @@ -2112,7 +2112,7 @@ static int f2fs_write_cache_pages(struct address_space *mapping, done = 1; break; } else if (submitted) { - last_idx = page->index; + nwritten++; } if (--wbc->nr_to_write <= 0 && @@ -2134,9 +2134,9 @@ static int f2fs_write_cache_pages(struct address_space *mapping, if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0)) mapping->writeback_index = done_index; - if (last_idx != ULONG_MAX) + if (nwritten) f2fs_submit_merged_write_cond(F2FS_M_SB(mapping), mapping->host, - 0, last_idx, DATA); + NULL, 0, DATA); return ret; } diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 4810e05d3173..493188ef3023 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -3117,8 +3117,8 @@ int f2fs_init_post_read_processing(void); void f2fs_destroy_post_read_processing(void); void f2fs_submit_merged_write(struct f2fs_sb_info *sbi, enum page_type type); void f2fs_submit_merged_write_cond(struct f2fs_sb_info *sbi, - struct inode *inode, nid_t ino, pgoff_t idx, - enum page_type type); + struct inode *inode, struct page *page, + nid_t ino, enum page_type type); void f2fs_flush_merged_writes(struct f2fs_sb_info *sbi); int f2fs_submit_page_bio(struct f2fs_io_info *fio); void f2fs_submit_page_write(struct f2fs_io_info *fio); diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index bde71a04cf95..c1936432154f 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -1565,8 +1565,7 @@ static int __write_node_page(struct page *page, bool atomic, bool *submitted, up_read(&sbi->node_write); if (wbc->for_reclaim) { - f2fs_submit_merged_write_cond(sbi, page->mapping->host, 0, - page->index, NODE); + f2fs_submit_merged_write_cond(sbi, NULL, page, 0, NODE); submitted = NULL; } @@ -1631,13 +1630,13 @@ int f2fs_fsync_node_pages(struct f2fs_sb_info *sbi, struct inode *inode, unsigned int *seq_id) { pgoff_t index; - pgoff_t last_idx = ULONG_MAX; struct pagevec pvec; int ret = 0; struct page *last_page = NULL; bool marked = false; nid_t ino = inode->i_ino; int nr_pages; + int nwritten = 0; if (atomic) { last_page = last_fsync_dnode(sbi, ino); @@ -1715,7 +1714,7 @@ int f2fs_fsync_node_pages(struct f2fs_sb_info *sbi, struct inode *inode, f2fs_put_page(last_page, 0); break; } else if (submitted) { - last_idx = page->index; + nwritten++; } if (page == last_page) { @@ -1741,8 +1740,8 @@ int f2fs_fsync_node_pages(struct f2fs_sb_info *sbi, struct inode *inode, goto retry; } out: - if (last_idx != ULONG_MAX) - f2fs_submit_merged_write_cond(sbi, NULL, ino, last_idx, NODE); + if (nwritten) + f2fs_submit_merged_write_cond(sbi, NULL, NULL, ino, NODE); return ret ? -EIO: 0; } diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 841ba43fc02b..5dd7d7cecabf 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -371,7 +371,7 @@ static int __f2fs_commit_inmem_pages(struct inode *inode) .io_type = FS_DATA_IO, }; struct list_head revoke_list; - pgoff_t last_idx = ULONG_MAX; + bool submit_bio = false; int err = 0; INIT_LIST_HEAD(&revoke_list); @@ -406,14 +406,14 @@ static int __f2fs_commit_inmem_pages(struct inode *inode) } /* record old blkaddr for revoking */ cur->old_addr = fio.old_blkaddr; - last_idx = page->index; + submit_bio = true; } unlock_page(page); list_move_tail(&cur->list, &revoke_list); } - if (last_idx != ULONG_MAX) - f2fs_submit_merged_write_cond(sbi, inode, 0, last_idx, DATA); + if (submit_bio) + f2fs_submit_merged_write_cond(sbi, inode, NULL, 0, DATA); if (err) { /* @@ -3262,8 +3262,7 @@ void f2fs_wait_on_page_writeback(struct page *page, if (PageWriteback(page)) { struct f2fs_sb_info *sbi = F2FS_P_SB(page); - f2fs_submit_merged_write_cond(sbi, page->mapping->host, - 0, page->index, type); + f2fs_submit_merged_write_cond(sbi, NULL, page, 0, type); if (ordered) wait_on_page_writeback(page); else From a84324083292e3c923fad16668f5039f4d4af5c4 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Thu, 27 Sep 2018 18:33:18 +0800 Subject: [PATCH 226/812] f2fs: refactor ->page_mkwrite() flow Thread A Thread B - f2fs_vm_page_mkwrite - f2fs_setattr - down_write(i_mmap_sem) - truncate_setsize - f2fs_truncate - up_write(i_mmap_sem) - f2fs_reserve_block reserve NEW_ADDR - skip dirty page due to truncation 1. we don't need to rserve new block address for a truncated page. 2. dn.data_blkaddr is used out of node page lock coverage. Refactor ->page_mkwrite() flow to fix above issues: - use __do_map_lock() to avoid racing checkpoint() - lock data page in prior to dnode page - cover f2fs_reserve_block with i_mmap_sem lock - wait page writeback before zeroing page Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 2 +- fs/f2fs/f2fs.h | 1 + fs/f2fs/file.c | 45 ++++++++++++++++++++++----------------------- 3 files changed, 24 insertions(+), 24 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 71d7d929f324..586d87a2697a 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -969,7 +969,7 @@ int f2fs_preallocate_blocks(struct kiocb *iocb, struct iov_iter *from) return err; } -static inline void __do_map_lock(struct f2fs_sb_info *sbi, int flag, bool lock) +void __do_map_lock(struct f2fs_sb_info *sbi, int flag, bool lock) { if (flag == F2FS_GET_BLOCK_PRE_AIO) { if (lock) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 493188ef3023..79295c280c83 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -3140,6 +3140,7 @@ struct page *f2fs_get_lock_data_page(struct inode *inode, pgoff_t index, struct page *f2fs_get_new_data_page(struct inode *inode, struct page *ipage, pgoff_t index, bool new_i_size); int f2fs_do_write_data_page(struct f2fs_io_info *fio); +void __do_map_lock(struct f2fs_sb_info *sbi, int flag, bool lock); int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map, int create, int flag); int f2fs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index ce0b32031294..d83e8dad12e9 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -50,7 +50,7 @@ static int f2fs_vm_page_mkwrite(struct vm_area_struct *vma, struct page *page = vmf->page; struct inode *inode = file_inode(vma->vm_file); struct f2fs_sb_info *sbi = F2FS_I_SB(inode); - struct dnode_of_data dn; + struct dnode_of_data dn = { .node_changed = false }; int err; if (unlikely(f2fs_cp_error(sbi))) { @@ -62,19 +62,6 @@ static int f2fs_vm_page_mkwrite(struct vm_area_struct *vma, f2fs_bug_on(sbi, f2fs_has_inline_data(inode)); - /* block allocation */ - f2fs_lock_op(sbi); - set_new_dnode(&dn, inode, NULL, NULL, 0); - err = f2fs_reserve_block(&dn, page->index); - if (err) { - f2fs_unlock_op(sbi); - goto out; - } - f2fs_put_dnode(&dn); - f2fs_unlock_op(sbi); - - f2fs_balance_fs(sbi, dn.node_changed); - file_update_time(vma->vm_file); down_read(&F2FS_I(inode)->i_mmap_sem); lock_page(page); @@ -86,11 +73,28 @@ static int f2fs_vm_page_mkwrite(struct vm_area_struct *vma, goto out_sem; } + /* block allocation */ + __do_map_lock(sbi, F2FS_GET_BLOCK_PRE_AIO, true); + set_new_dnode(&dn, inode, NULL, NULL, 0); + err = f2fs_get_block(&dn, page->index); + f2fs_put_dnode(&dn); + __do_map_lock(sbi, F2FS_GET_BLOCK_PRE_AIO, false); + if (err) { + unlock_page(page); + goto out_sem; + } + + /* fill the page */ + f2fs_wait_on_page_writeback(page, DATA, false); + + /* wait for GCed page writeback via META_MAPPING */ + f2fs_wait_on_block_writeback(inode, dn.data_blkaddr); + /* * check to see if the page is mapped already (no holes) */ if (PageMappedToDisk(page)) - goto mapped; + goto out_sem; /* page is wholly or partially inside EOF */ if (((loff_t)(page->index + 1) << PAGE_SHIFT) > @@ -107,16 +111,11 @@ static int f2fs_vm_page_mkwrite(struct vm_area_struct *vma, f2fs_update_iostat(sbi, APP_MAPPED_IO, F2FS_BLKSIZE); trace_f2fs_vm_page_mkwrite(page, DATA); -mapped: - /* fill the page */ - f2fs_wait_on_page_writeback(page, DATA, false); - - /* wait for GCed page writeback via META_MAPPING */ - f2fs_wait_on_block_writeback(inode, dn.data_blkaddr); - out_sem: up_read(&F2FS_I(inode)->i_mmap_sem); -out: + + f2fs_balance_fs(sbi, dn.node_changed); + sb_end_pagefault(inode->i_sb); f2fs_update_time(sbi, REQ_TIME); err: From 511f52d02f05dbefb355deed05632cc6f9ce9a19 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Thu, 27 Sep 2018 18:34:52 +0800 Subject: [PATCH 227/812] f2fs: allow out-place-update for direct IO in LFS mode Normally, DIO uses in-pllace-update, but in LFS mode, f2fs doesn't allow triggering any in-place-update writes, so we fallback direct write to buffered write, result in bad performance of large size write. This patch adds to support triggering out-place-update for direct IO to enhance its performance. Note that it needs to exclude direct read IO during direct write, since new data writing to new block address will no be valid until write finished. storage: zram time xfs_io -f -d /mnt/f2fs/file -c "pwrite 0 1073741824" -c "fsync" Before: real 0m13.061s user 0m0.327s sys 0m12.486s After: real 0m6.448s user 0m0.228s sys 0m6.212s Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 44 +++++++++++++++++++++++++++++++++++--------- fs/f2fs/f2fs.h | 45 +++++++++++++++++++++++++++++++++++++++++---- fs/f2fs/file.c | 3 ++- 3 files changed, 78 insertions(+), 14 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 586d87a2697a..41fc5ff29f70 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -890,7 +890,7 @@ static int __allocate_data_block(struct dnode_of_data *dn, int seg_type) dn->data_blkaddr = datablock_addr(dn->inode, dn->node_page, dn->ofs_in_node); - if (dn->data_blkaddr == NEW_ADDR) + if (dn->data_blkaddr != NULL_ADDR) goto alloc; if (unlikely((err = inc_valid_block_count(sbi, dn->inode, &count)))) @@ -944,7 +944,7 @@ int f2fs_preallocate_blocks(struct kiocb *iocb, struct iov_iter *from) if (direct_io) { map.m_seg_type = f2fs_rw_hint_to_seg_type(iocb->ki_hint); - flag = f2fs_force_buffered_io(inode, WRITE) ? + flag = f2fs_force_buffered_io(inode, iocb, from) ? F2FS_GET_BLOCK_PRE_AIO : F2FS_GET_BLOCK_PRE_DIO; goto map_blocks; @@ -1063,7 +1063,15 @@ int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map, goto sync_out; } - if (!is_valid_data_blkaddr(sbi, blkaddr)) { + if (is_valid_data_blkaddr(sbi, blkaddr)) { + /* use out-place-update for driect IO under LFS mode */ + if (test_opt(sbi, LFS) && create && + flag == F2FS_GET_BLOCK_DIO) { + err = __allocate_data_block(&dn, map->m_seg_type); + if (!err) + set_inode_flag(inode, FI_APPEND_WRITE); + } + } else { if (create) { if (unlikely(f2fs_cp_error(sbi))) { err = -EIO; @@ -2483,35 +2491,52 @@ static ssize_t f2fs_direct_IO(struct kiocb *iocb, struct iov_iter *iter, struct address_space *mapping = iocb->ki_filp->f_mapping; struct inode *inode = mapping->host; struct f2fs_sb_info *sbi = F2FS_I_SB(inode); + struct f2fs_inode_info *fi = F2FS_I(inode); size_t count = iov_iter_count(iter); int rw = iov_iter_rw(iter); int err; enum rw_hint hint = iocb->ki_hint; int whint_mode = F2FS_OPTION(sbi).whint_mode; + bool do_opu; err = check_direct_IO(inode, iter, offset); if (err) return err < 0 ? err : 0; - if (f2fs_force_buffered_io(inode, rw)) + if (f2fs_force_buffered_io(inode, iocb, iter)) return 0; + do_opu = allow_outplace_dio(inode, iocb, iter); + trace_f2fs_direct_IO_enter(inode, offset, count, rw); if (rw == WRITE && whint_mode == WHINT_MODE_OFF) iocb->ki_hint = WRITE_LIFE_NOT_SET; - if (!down_read_trylock(&F2FS_I(inode)->i_gc_rwsem[rw])) { - if (iocb->ki_flags & IOCB_NOWAIT) { + if (iocb->ki_flags & IOCB_NOWAIT) { + if (!down_read_trylock(&fi->i_gc_rwsem[rw])) { iocb->ki_hint = hint; err = -EAGAIN; goto out; } - down_read(&F2FS_I(inode)->i_gc_rwsem[rw]); + if (do_opu && !down_read_trylock(&fi->i_gc_rwsem[READ])) { + up_read(&fi->i_gc_rwsem[rw]); + iocb->ki_hint = hint; + err = -EAGAIN; + goto out; + } + } else { + down_read(&fi->i_gc_rwsem[rw]); + if (do_opu) + down_read(&fi->i_gc_rwsem[READ]); } err = blockdev_direct_IO(iocb, inode, iter, offset, get_data_block_dio); - up_read(&F2FS_I(inode)->i_gc_rwsem[rw]); + + if (do_opu) + up_read(&fi->i_gc_rwsem[READ]); + + up_read(&fi->i_gc_rwsem[rw]); if (rw == WRITE) { if (whint_mode == WHINT_MODE_OFF) @@ -2519,7 +2544,8 @@ static ssize_t f2fs_direct_IO(struct kiocb *iocb, struct iov_iter *iter, if (err > 0) { f2fs_update_iostat(F2FS_I_SB(inode), APP_DIRECT_IO, err); - set_inode_flag(inode, FI_UPDATE_WRITE); + if (!do_opu) + set_inode_flag(inode, FI_UPDATE_WRITE); } else if (err < 0) { f2fs_write_failed(mapping, offset + count); } diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 79295c280c83..64be3fa73043 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -8,6 +8,7 @@ #ifndef _LINUX_F2FS_H #define _LINUX_F2FS_H +#include #include #include #include @@ -3576,11 +3577,47 @@ static inline bool f2fs_may_encrypt(struct inode *inode) #endif } -static inline bool f2fs_force_buffered_io(struct inode *inode, int rw) +static inline int block_unaligned_IO(struct inode *inode, + struct kiocb *iocb, struct iov_iter *iter) { - return (f2fs_post_read_required(inode) || - (rw == WRITE && test_opt(F2FS_I_SB(inode), LFS)) || - F2FS_I_SB(inode)->s_ndevs); + unsigned int i_blkbits = READ_ONCE(inode->i_blkbits); + unsigned int blocksize_mask = (1 << i_blkbits) - 1; + loff_t offset = iocb->ki_pos; + unsigned long align = offset | iov_iter_alignment(iter); + + return align & blocksize_mask; +} + +static inline int allow_outplace_dio(struct inode *inode, + struct kiocb *iocb, struct iov_iter *iter) +{ + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); + int rw = iov_iter_rw(iter); + + return (test_opt(sbi, LFS) && (rw == WRITE) && + !block_unaligned_IO(inode, iocb, iter)); +} + +static inline bool f2fs_force_buffered_io(struct inode *inode, + struct kiocb *iocb, struct iov_iter *iter) +{ + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); + int rw = iov_iter_rw(iter); + + if (f2fs_post_read_required(inode)) + return true; + if (sbi->s_ndevs) + return true; + /* + * for blkzoned device, fallback direct IO to buffered IO, so + * all IOs can be serialized by log-structured write. + */ + if (f2fs_sb_has_blkzoned(sbi->sb)) + return true; + if (test_opt(sbi, LFS) && (rw == WRITE) && + block_unaligned_IO(inode, iocb, iter)) + return true; + return false; } #ifdef CONFIG_F2FS_FAULT_INJECTION diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index d83e8dad12e9..9c8d2cb4585e 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -2795,7 +2795,8 @@ static ssize_t f2fs_file_write_iter(struct kiocb *iocb, struct iov_iter *from) if (!f2fs_overwrite_io(inode, iocb->ki_pos, iov_iter_count(from)) || f2fs_has_inline_data(inode) || - f2fs_force_buffered_io(inode, WRITE)) { + f2fs_force_buffered_io(inode, + iocb, from)) { clear_inode_flag(inode, FI_NO_PREALLOC); inode_unlock(inode); From bb423cfd60a4c6f62707e55e213a79e8d0f2bfd8 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Tue, 25 Sep 2018 13:54:33 -0700 Subject: [PATCH 228/812] f2fs: clear PageError on the read path When running fault injection test, I hit somewhat wrong behavior in f2fs_gc -> gc_data_segment(): 0. fault injection generated some PageError'ed pages 1. gc_data_segment -> f2fs_get_read_data_page(REQ_RAHEAD) 2. move_data_page -> f2fs_get_lock_data_page() -> f2f_get_read_data_page() -> f2fs_submit_page_read() -> submit_bio(READ) -> return EIO due to PageError -> fail to move data Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 41fc5ff29f70..a6daf54c331e 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -76,7 +76,8 @@ static void __read_end_io(struct bio *bio) /* PG_error was set if any post_read step failed */ if (bio->bi_error || PageError(page)) { ClearPageUptodate(page); - SetPageError(page); + /* will re-read again later */ + ClearPageError(page); } else { SetPageUptodate(page); } @@ -588,6 +589,7 @@ static int f2fs_submit_page_read(struct inode *inode, struct page *page, bio_put(bio); return -EFAULT; } + ClearPageError(page); __submit_bio(F2FS_I_SB(inode), bio, DATA); return 0; } @@ -1575,6 +1577,7 @@ static int f2fs_mpage_readpages(struct address_space *mapping, if (bio_add_page(bio, page, blocksize, 0) < blocksize) goto submit_and_realloc; + ClearPageError(page); last_block_in_bio = block_nr; goto next_page; set_error_page: From 99efe50ef6cf0ba523ee32274fe0d58bc16e175f Mon Sep 17 00:00:00 2001 From: Daniel Rosenberg Date: Mon, 20 Aug 2018 19:21:43 -0700 Subject: [PATCH 229/812] f2fs: checkpoint disabling Note that, it requires "f2fs: return correct errno in f2fs_gc". This adds a lightweight non-persistent snapshotting scheme to f2fs. To use, mount with the option checkpoint=disable, and to return to normal operation, remount with checkpoint=enable. If the filesystem is shut down before remounting with checkpoint=enable, it will revert back to its apparent state when it was first mounted with checkpoint=disable. This is useful for situations where you wish to be able to roll back the state of the disk in case of some critical failure. Signed-off-by: Daniel Rosenberg [Jaegeuk Kim: use SB_RDONLY instead of MS_RDONLY] Signed-off-by: Jaegeuk Kim --- Documentation/filesystems/f2fs.txt | 5 ++ fs/f2fs/checkpoint.c | 12 +++ fs/f2fs/data.c | 14 +++- fs/f2fs/debug.c | 3 +- fs/f2fs/f2fs.h | 18 ++++- fs/f2fs/file.c | 12 ++- fs/f2fs/gc.c | 9 ++- fs/f2fs/inode.c | 6 +- fs/f2fs/namei.c | 19 +++++ fs/f2fs/segment.c | 98 ++++++++++++++++++++++- fs/f2fs/segment.h | 15 ++++ fs/f2fs/super.c | 124 ++++++++++++++++++++++++++++- include/linux/f2fs_fs.h | 1 + 13 files changed, 323 insertions(+), 13 deletions(-) diff --git a/Documentation/filesystems/f2fs.txt b/Documentation/filesystems/f2fs.txt index c32641476f5e..348d80440433 100644 --- a/Documentation/filesystems/f2fs.txt +++ b/Documentation/filesystems/f2fs.txt @@ -212,6 +212,11 @@ fsync_mode=%s Control the policy of fsync. Currently supports "posix", non-atomic files likewise "nobarrier" mount option. test_dummy_encryption Enable dummy encryption, which provides a fake fscrypt context. The fake fscrypt context is used by xfstests. +checkpoint=%s Set to "disable" to turn off checkpointing. Set to "enable" + to reenable checkpointing. Is enabled by default. While + disabled, any unmounting or unexpected shutdowns will cause + the filesystem contents to appear as they did when the + filesystem was mounted with that option. ================================================================================ DEBUGFS ENTRIES diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index 96ad2358ddb2..598424c1c4b1 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -1211,6 +1211,11 @@ static void update_ckpt_flags(struct f2fs_sb_info *sbi, struct cp_control *cpc) if (is_sbi_flag_set(sbi, SBI_NEED_FSCK)) __set_ckpt_flags(ckpt, CP_FSCK_FLAG); + if (is_sbi_flag_set(sbi, SBI_CP_DISABLED)) + __set_ckpt_flags(ckpt, CP_DISABLED_FLAG); + else + __clear_ckpt_flags(ckpt, CP_DISABLED_FLAG); + /* set this flag to activate crc|cp_ver for recovery */ __set_ckpt_flags(ckpt, CP_CRC_RECOVERY_FLAG); __clear_ckpt_flags(ckpt, CP_NOCRC_RECOVERY_FLAG); @@ -1418,6 +1423,7 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) clear_sbi_flag(sbi, SBI_IS_DIRTY); clear_sbi_flag(sbi, SBI_NEED_CP); + sbi->unusable_block_count = 0; __set_cp_next_pack(sbi); /* @@ -1442,6 +1448,12 @@ int f2fs_write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) unsigned long long ckpt_ver; int err = 0; + if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED))) { + if (cpc->reason != CP_PAUSE) + return 0; + f2fs_msg(sbi->sb, KERN_WARNING, + "Start checkpoint disabled!"); + } mutex_lock(&sbi->cp_mutex); if (!is_sbi_flag_set(sbi, SBI_IS_DIRTY) && diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index a6daf54c331e..cdfdd6919dcc 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -534,7 +534,8 @@ void f2fs_submit_page_write(struct f2fs_io_info *fio) if (fio->in_list) goto next; out: - if (is_sbi_flag_set(sbi, SBI_IS_SHUTDOWN)) + if (is_sbi_flag_set(sbi, SBI_IS_SHUTDOWN) || + f2fs_is_checkpoint_ready(sbi)) __submit_merged_bio(io); up_write(&io->io_rwsem); } @@ -1699,6 +1700,10 @@ static inline bool check_inplace_update_policy(struct inode *inode, is_inode_flag_set(inode, FI_NEED_IPU)) return true; + if (unlikely(fio && is_sbi_flag_set(sbi, SBI_CP_DISABLED) && + !f2fs_is_checkpointed_data(sbi, fio->old_blkaddr))) + return true; + return false; } @@ -1729,6 +1734,9 @@ bool f2fs_should_update_outplace(struct inode *inode, struct f2fs_io_info *fio) return true; if (IS_ATOMIC_WRITTEN_PAGE(fio->page)) return true; + if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED) && + f2fs_is_checkpointed_data(sbi, fio->old_blkaddr))) + return true; } return false; } @@ -2349,6 +2357,10 @@ static int f2fs_write_begin(struct file *file, struct address_space *mapping, trace_f2fs_write_begin(inode, pos, len, flags); + err = f2fs_is_checkpoint_ready(sbi); + if (err) + goto fail; + if ((f2fs_is_atomic_file(inode) && !f2fs_available_free_memory(sbi, INMEM_PAGES)) || is_inode_flag_set(inode, FI_ATOMIC_REVOKE_REQUEST)) { diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c index 75bc62edc4c1..026e10f30889 100644 --- a/fs/f2fs/debug.c +++ b/fs/f2fs/debug.c @@ -272,7 +272,8 @@ static int stat_show(struct seq_file *s, void *v) seq_printf(s, "\n=====[ partition info(%pg). #%d, %s, CP: %s]=====\n", si->sbi->sb->s_bdev, i++, f2fs_readonly(si->sbi->sb) ? "RO": "RW", - f2fs_cp_error(si->sbi) ? "Error": "Good"); + is_set_ckpt_flags(si->sbi, CP_DISABLED_FLAG) ? + "Disabled": (f2fs_cp_error(si->sbi) ? "Error": "Good")); seq_printf(s, "[SB: 1] [CP: 2] [SIT: %d] [NAT: %d] ", si->sit_area_segs, si->nat_area_segs); seq_printf(s, "[SSA: %d] [MAIN: %d", diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 64be3fa73043..646012a70f4e 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -101,6 +101,7 @@ extern char *f2fs_fault_name[FAULT_MAX]; #define F2FS_MOUNT_QUOTA 0x00400000 #define F2FS_MOUNT_INLINE_XATTR_SIZE 0x00800000 #define F2FS_MOUNT_RESERVE_ROOT 0x01000000 +#define F2FS_MOUNT_DISABLE_CHECKPOINT 0x02000000 #define F2FS_OPTION(sbi) ((sbi)->mount_opt) #define clear_opt(sbi, option) (F2FS_OPTION(sbi).opt &= ~F2FS_MOUNT_##option) @@ -246,6 +247,7 @@ enum { #define CP_RECOVERY 0x00000008 #define CP_DISCARD 0x00000010 #define CP_TRIMMED 0x00000020 +#define CP_PAUSE 0x00000040 #define MAX_DISCARD_BLOCKS(sbi) BLKS_PER_SEC(sbi) #define DEF_MAX_DISCARD_REQUEST 8 /* issue 8 discards per round */ @@ -255,6 +257,7 @@ enum { #define DEF_DISCARD_URGENT_UTIL 80 /* do more discard over 80% */ #define DEF_CP_INTERVAL 60 /* 60 secs */ #define DEF_IDLE_INTERVAL 5 /* 5 secs */ +#define DEF_DISABLE_INTERVAL 5 /* 5 secs */ struct cp_control { int reason; @@ -1157,6 +1160,7 @@ enum { SBI_NEED_CP, /* need to checkpoint */ SBI_IS_SHUTDOWN, /* shutdown by ioctl */ SBI_IS_RECOVERED, /* recovered orphan/data */ + SBI_CP_DISABLED, /* CP was disabled last mount */ }; enum { @@ -1164,6 +1168,7 @@ enum { REQ_TIME, DISCARD_TIME, GC_TIME, + DISABLE_TIME, MAX_TIME, }; @@ -1290,6 +1295,9 @@ struct f2fs_sb_info { block_t reserved_blocks; /* configurable reserved blocks */ block_t current_reserved_blocks; /* current reserved blocks */ + /* Additional tracking for no checkpoint mode */ + block_t unusable_block_count; /* # of blocks saved by last cp */ + unsigned int nquota_files; /* # of quota sysfile */ u32 s_next_generation; /* for NFS support */ @@ -1800,7 +1808,8 @@ static inline int inc_valid_block_count(struct f2fs_sb_info *sbi, if (!__allow_reserved_blocks(sbi, inode, true)) avail_user_block_count -= F2FS_OPTION(sbi).root_reserved_blocks; - + if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED))) + avail_user_block_count -= sbi->unusable_block_count; if (unlikely(sbi->total_valid_block_count > avail_user_block_count)) { diff = sbi->total_valid_block_count - avail_user_block_count; if (diff > *count) @@ -2007,6 +2016,8 @@ static inline int inc_valid_node_count(struct f2fs_sb_info *sbi, if (!__allow_reserved_blocks(sbi, inode, false)) valid_block_count += F2FS_OPTION(sbi).root_reserved_blocks; + if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED))) + valid_block_count += sbi->unusable_block_count; if (unlikely(valid_block_count > sbi->user_block_count)) { spin_unlock(&sbi->stat_lock); @@ -3030,6 +3041,8 @@ void f2fs_stop_discard_thread(struct f2fs_sb_info *sbi); bool f2fs_wait_discard_bios(struct f2fs_sb_info *sbi); void f2fs_clear_prefree_segments(struct f2fs_sb_info *sbi, struct cp_control *cpc); +void f2fs_dirty_to_prefree(struct f2fs_sb_info *sbi); +int f2fs_disable_cp_again(struct f2fs_sb_info *sbi); void f2fs_release_discard_addrs(struct f2fs_sb_info *sbi); int f2fs_npages_for_summary_flush(struct f2fs_sb_info *sbi, bool for_ra); void f2fs_allocate_new_segments(struct f2fs_sb_info *sbi); @@ -3617,6 +3630,9 @@ static inline bool f2fs_force_buffered_io(struct inode *inode, if (test_opt(sbi, LFS) && (rw == WRITE) && block_unaligned_IO(inode, iocb, iter)) return true; + if (is_sbi_flag_set(F2FS_I_SB(inode), SBI_CP_DISABLED)) + return true; + return false; } diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 9c8d2cb4585e..834f87e75dff 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -213,7 +213,8 @@ static int f2fs_do_sync_file(struct file *file, loff_t start, loff_t end, }; unsigned int seq_id = 0; - if (unlikely(f2fs_readonly(inode->i_sb))) + if (unlikely(f2fs_readonly(inode->i_sb) || + is_sbi_flag_set(sbi, SBI_CP_DISABLED))) return 0; trace_f2fs_sync_file_enter(inode); @@ -2157,6 +2158,12 @@ static int f2fs_ioc_write_checkpoint(struct file *filp, unsigned long arg) if (f2fs_readonly(sbi->sb)) return -EROFS; + if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED))) { + f2fs_msg(sbi->sb, KERN_INFO, + "Skipping Checkpoint. Checkpoints currently disabled."); + return -EINVAL; + } + ret = mnt_want_write_file(filp); if (ret) return ret; @@ -2528,6 +2535,9 @@ static int f2fs_ioc_flush_device(struct file *filp, unsigned long arg) if (f2fs_readonly(sbi->sb)) return -EROFS; + if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED))) + return -EINVAL; + if (copy_from_user(&range, (struct f2fs_flush_device __user *)arg, sizeof(range))) return -EFAULT; diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index 4db16f7c6858..6cdcd79baeb7 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -370,6 +370,10 @@ static int get_victim_by_default(struct f2fs_sb_info *sbi, if (sec_usage_check(sbi, secno)) goto next; + /* Don't touch checkpointed data */ + if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED) && + get_ckpt_valid_blocks(sbi, segno))) + goto next; if (gc_type == BG_GC && test_bit(secno, dirty_i->victim_secmap)) goto next; @@ -1189,7 +1193,8 @@ int f2fs_gc(struct f2fs_sb_info *sbi, bool sync, * threshold, we can make them free by checkpoint. Then, we * secure free segments which doesn't need fggc any more. */ - if (prefree_segments(sbi)) { + if (prefree_segments(sbi) && + !is_sbi_flag_set(sbi, SBI_CP_DISABLED)) { ret = f2fs_write_checkpoint(sbi, &cpc); if (ret) goto stop; @@ -1241,7 +1246,7 @@ int f2fs_gc(struct f2fs_sb_info *sbi, bool sync, segno = NULL_SEGNO; goto gc_more; } - if (gc_type == FG_GC) + if (gc_type == FG_GC && !is_sbi_flag_set(sbi, SBI_CP_DISABLED)) ret = f2fs_write_checkpoint(sbi, &cpc); } stop: diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index 63e88eac5db3..ab845b558dd5 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -607,6 +607,9 @@ int f2fs_write_inode(struct inode *inode, struct writeback_control *wbc) if (!is_inode_flag_set(inode, FI_DIRTY_INODE)) return 0; + if (f2fs_is_checkpoint_ready(sbi)) + return -ENOSPC; + /* * We need to balance fs here to prevent from producing dirty node pages * during the urgent cleaning time when runing out of free sections. @@ -688,7 +691,8 @@ void f2fs_evict_inode(struct inode *inode) stat_dec_inline_dir(inode); stat_dec_inline_inode(inode); - if (likely(!is_set_ckpt_flags(sbi, CP_ERROR_FLAG))) + if (likely(!is_set_ckpt_flags(sbi, CP_ERROR_FLAG) && + !is_sbi_flag_set(sbi, SBI_CP_DISABLED))) f2fs_bug_on(sbi, is_inode_flag_set(inode, FI_DIRTY_INODE)); else f2fs_inode_synced(inode); diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c index f85241dc8140..b2cc6c315f62 100644 --- a/fs/f2fs/namei.c +++ b/fs/f2fs/namei.c @@ -16,6 +16,7 @@ #include "f2fs.h" #include "node.h" +#include "segment.h" #include "xattr.h" #include "acl.h" #include @@ -269,6 +270,9 @@ static int f2fs_create(struct inode *dir, struct dentry *dentry, umode_t mode, if (unlikely(f2fs_cp_error(sbi))) return -EIO; + err = f2fs_is_checkpoint_ready(sbi); + if (err) + return err; err = dquot_initialize(dir); if (err) @@ -316,6 +320,9 @@ static int f2fs_link(struct dentry *old_dentry, struct inode *dir, if (unlikely(f2fs_cp_error(sbi))) return -EIO; + err = f2fs_is_checkpoint_ready(sbi); + if (err) + return err; err = fscrypt_prepare_link(old_dentry, dir, dentry); if (err) @@ -559,6 +566,9 @@ static int f2fs_symlink(struct inode *dir, struct dentry *dentry, if (unlikely(f2fs_cp_error(sbi))) return -EIO; + err = f2fs_is_checkpoint_ready(sbi); + if (err) + return err; err = fscrypt_prepare_symlink(dir, symname, len, dir->i_sb->s_blocksize, &disk_link); @@ -690,6 +700,9 @@ static int f2fs_mknod(struct inode *dir, struct dentry *dentry, if (unlikely(f2fs_cp_error(sbi))) return -EIO; + err = f2fs_is_checkpoint_ready(sbi); + if (err) + return err; err = dquot_initialize(dir); if (err) @@ -825,6 +838,9 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry, if (unlikely(f2fs_cp_error(sbi))) return -EIO; + err = f2fs_is_checkpoint_ready(sbi); + if (err) + return err; if (is_inode_flag_set(new_dir, FI_PROJ_INHERIT) && (!projid_eq(F2FS_I(new_dir)->i_projid, @@ -1015,6 +1031,9 @@ static int f2fs_cross_rename(struct inode *old_dir, struct dentry *old_dentry, if (unlikely(f2fs_cp_error(sbi))) return -EIO; + err = f2fs_is_checkpoint_ready(sbi); + if (err) + return err; if ((is_inode_flag_set(new_dir, FI_PROJ_INHERIT) && !projid_eq(F2FS_I(new_dir)->i_projid, diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 5dd7d7cecabf..1b2ebd9b986d 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -176,6 +176,8 @@ bool f2fs_need_SSR(struct f2fs_sb_info *sbi) return false; if (sbi->gc_mode == GC_URGENT) return true; + if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED))) + return true; return free_sections(sbi) <= (node_secs + 2 * dent_secs + imeta_secs + SM_I(sbi)->min_ssr_sections + reserved_sections(sbi)); @@ -480,6 +482,9 @@ void f2fs_balance_fs(struct f2fs_sb_info *sbi, bool need) if (need && excess_cached_nats(sbi)) f2fs_balance_fs_bg(sbi); + if (f2fs_is_checkpoint_ready(sbi)) + return; + /* * We should do GC or end up with checkpoint, if there are so many dirty * dir/node pages without enough free segments. @@ -796,7 +801,7 @@ static void __remove_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno, static void locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno) { struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); - unsigned short valid_blocks; + unsigned short valid_blocks, ckpt_valid_blocks; if (segno == NULL_SEGNO || IS_CURSEG(sbi, segno)) return; @@ -804,8 +809,10 @@ static void locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno) mutex_lock(&dirty_i->seglist_lock); valid_blocks = get_valid_blocks(sbi, segno, false); + ckpt_valid_blocks = get_ckpt_valid_blocks(sbi, segno); - if (valid_blocks == 0) { + if (valid_blocks == 0 && (!is_sbi_flag_set(sbi, SBI_CP_DISABLED) || + ckpt_valid_blocks == sbi->blocks_per_seg)) { __locate_dirty_segment(sbi, segno, PRE); __remove_dirty_segment(sbi, segno, DIRTY); } else if (valid_blocks < sbi->blocks_per_seg) { @@ -818,6 +825,66 @@ static void locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno) mutex_unlock(&dirty_i->seglist_lock); } +/* This moves currently empty dirty blocks to prefree. Must hold seglist_lock */ +void f2fs_dirty_to_prefree(struct f2fs_sb_info *sbi) +{ + struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); + unsigned int segno; + + mutex_lock(&dirty_i->seglist_lock); + for_each_set_bit(segno, dirty_i->dirty_segmap[DIRTY], MAIN_SEGS(sbi)) { + if (get_valid_blocks(sbi, segno, false)) + continue; + if (IS_CURSEG(sbi, segno)) + continue; + __locate_dirty_segment(sbi, segno, PRE); + __remove_dirty_segment(sbi, segno, DIRTY); + } + mutex_unlock(&dirty_i->seglist_lock); +} + +int f2fs_disable_cp_again(struct f2fs_sb_info *sbi) +{ + struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); + block_t ovp = overprovision_segments(sbi) << sbi->log_blocks_per_seg; + block_t holes[2] = {0, 0}; /* DATA and NODE */ + struct seg_entry *se; + unsigned int segno; + + mutex_lock(&dirty_i->seglist_lock); + for_each_set_bit(segno, dirty_i->dirty_segmap[DIRTY], MAIN_SEGS(sbi)) { + se = get_seg_entry(sbi, segno); + if (IS_NODESEG(se->type)) + holes[NODE] += sbi->blocks_per_seg - se->valid_blocks; + else + holes[DATA] += sbi->blocks_per_seg - se->valid_blocks; + } + mutex_unlock(&dirty_i->seglist_lock); + + if (holes[DATA] > ovp || holes[NODE] > ovp) + return -EAGAIN; + return 0; +} + +/* This is only used by SBI_CP_DISABLED */ +static unsigned int get_free_segment(struct f2fs_sb_info *sbi) +{ + struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); + unsigned int segno = 0; + + mutex_lock(&dirty_i->seglist_lock); + for_each_set_bit(segno, dirty_i->dirty_segmap[DIRTY], MAIN_SEGS(sbi)) { + if (get_valid_blocks(sbi, segno, false)) + continue; + if (get_ckpt_valid_blocks(sbi, segno)) + continue; + mutex_unlock(&dirty_i->seglist_lock); + return segno; + } + mutex_unlock(&dirty_i->seglist_lock); + return NULL_SEGNO; +} + static struct discard_cmd *__create_discard_cmd(struct f2fs_sb_info *sbi, struct block_device *bdev, block_t lstart, block_t start, block_t len) @@ -2108,7 +2175,8 @@ static void update_sit_entry(struct f2fs_sb_info *sbi, block_t blkaddr, int del) sbi->discard_blks--; /* don't overwrite by SSR to keep node chain */ - if (IS_NODESEG(se->type)) { + if (IS_NODESEG(se->type) && + !is_sbi_flag_set(sbi, SBI_CP_DISABLED)) { if (!f2fs_test_and_set_bit(offset, se->ckpt_valid_map)) se->ckpt_valid_blocks++; } @@ -2130,6 +2198,15 @@ static void update_sit_entry(struct f2fs_sb_info *sbi, block_t blkaddr, int del) f2fs_bug_on(sbi, 1); se->valid_blocks++; del = 0; + } else if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED))) { + /* + * If checkpoints are off, we must not reuse data that + * was used in the previous checkpoint. If it was used + * before, we must track that to know how much space we + * really have. + */ + if (f2fs_test_bit(offset, se->ckpt_valid_map)) + sbi->unusable_block_count++; } if (f2fs_test_and_clear_bit(offset, se->discard_map)) @@ -2412,6 +2489,9 @@ static unsigned int __get_next_segno(struct f2fs_sb_info *sbi, int type) if (sbi->segs_per_sec != 1) return CURSEG_I(sbi, type)->segno; + if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED))) + return 0; + if (test_opt(sbi, NOHEAP) && (type == CURSEG_HOT_DATA || IS_NODESEG(type))) return 0; @@ -2556,6 +2636,15 @@ static int get_ssr_segment(struct f2fs_sb_info *sbi, int type) return 1; } } + + /* find valid_blocks=0 in dirty list */ + if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED))) { + segno = get_free_segment(sbi); + if (segno != NULL_SEGNO) { + curseg->next_segno = segno; + return 1; + } + } return 0; } @@ -2573,7 +2662,8 @@ static void allocate_segment_by_default(struct f2fs_sb_info *sbi, else if (!is_set_ckpt_flags(sbi, CP_CRC_RECOVERY_FLAG) && type == CURSEG_WARM_NODE) new_curseg(sbi, type, false); - else if (curseg->alloc_type == LFS && is_next_segment_free(sbi, type)) + else if (curseg->alloc_type == LFS && is_next_segment_free(sbi, type) && + likely(!is_sbi_flag_set(sbi, SBI_CP_DISABLED))) new_curseg(sbi, type, false); else if (f2fs_need_SSR(sbi) && get_ssr_segment(sbi, type)) change_curseg(sbi, type); diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h index 086150028c6d..ab3465faddf1 100644 --- a/fs/f2fs/segment.h +++ b/fs/f2fs/segment.h @@ -339,6 +339,12 @@ static inline unsigned int get_valid_blocks(struct f2fs_sb_info *sbi, return get_seg_entry(sbi, segno)->valid_blocks; } +static inline unsigned int get_ckpt_valid_blocks(struct f2fs_sb_info *sbi, + unsigned int segno) +{ + return get_seg_entry(sbi, segno)->ckpt_valid_blocks; +} + static inline void seg_info_from_raw_sit(struct seg_entry *se, struct f2fs_sit_entry *rs) { @@ -576,6 +582,15 @@ static inline bool has_not_enough_free_secs(struct f2fs_sb_info *sbi, reserved_sections(sbi) + needed); } +static inline int f2fs_is_checkpoint_ready(struct f2fs_sb_info *sbi) +{ + if (likely(!is_sbi_flag_set(sbi, SBI_CP_DISABLED))) + return 0; + if (likely(!has_not_enough_free_secs(sbi, 0, 0))) + return 0; + return -ENOSPC; +} + static inline bool excess_prefree_segs(struct f2fs_sb_info *sbi) { return prefree_segments(sbi) > SM_I(sbi)->rec_prefree_segments; diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index cca2ccbf18fe..41e0609e7e30 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -136,6 +136,7 @@ enum { Opt_alloc, Opt_fsync, Opt_test_dummy_encryption, + Opt_checkpoint, Opt_err, }; @@ -194,6 +195,7 @@ static match_table_t f2fs_tokens = { {Opt_alloc, "alloc_mode=%s"}, {Opt_fsync, "fsync_mode=%s"}, {Opt_test_dummy_encryption, "test_dummy_encryption"}, + {Opt_checkpoint, "checkpoint=%s"}, {Opt_err, NULL}, }; @@ -769,6 +771,23 @@ static int parse_options(struct super_block *sb, char *options) "Test dummy encryption mount option ignored"); #endif break; + case Opt_checkpoint: + name = match_strdup(&args[0]); + if (!name) + return -ENOMEM; + + if (strlen(name) == 6 && + !strncmp(name, "enable", 6)) { + clear_opt(sbi, DISABLE_CHECKPOINT); + } else if (strlen(name) == 7 && + !strncmp(name, "disable", 7)) { + set_opt(sbi, DISABLE_CHECKPOINT); + } else { + kfree(name); + return -EINVAL; + } + kfree(name); + break; default: f2fs_msg(sb, KERN_ERR, "Unrecognized mount option \"%s\" or missing value", @@ -827,6 +846,12 @@ static int parse_options(struct super_block *sb, char *options) } } + if (test_opt(sbi, DISABLE_CHECKPOINT) && test_opt(sbi, LFS)) { + f2fs_msg(sb, KERN_ERR, + "LFS not compatible with checkpoint=disable\n"); + return -EINVAL; + } + /* Not pass down write hints if the number of active logs is lesser * than NR_CURSEG_TYPE. */ @@ -1014,8 +1039,8 @@ static void f2fs_put_super(struct super_block *sb) * But, the previous checkpoint was not done by umount, it needs to do * clean checkpoint again. */ - if (is_sbi_flag_set(sbi, SBI_IS_DIRTY) || - !is_set_ckpt_flags(sbi, CP_UMOUNT_FLAG)) { + if ((is_sbi_flag_set(sbi, SBI_IS_DIRTY) || + !is_set_ckpt_flags(sbi, CP_UMOUNT_FLAG))) { struct cp_control cpc = { .reason = CP_UMOUNT, }; @@ -1088,6 +1113,8 @@ int f2fs_sync_fs(struct super_block *sb, int sync) if (unlikely(f2fs_cp_error(sbi))) return 0; + if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED))) + return 0; trace_f2fs_sync_fs(sb, sync); @@ -1187,6 +1214,11 @@ static int f2fs_statfs(struct dentry *dentry, struct kstatfs *buf) buf->f_blocks = total_count - start_count; buf->f_bfree = user_block_count - valid_user_blocks(sbi) - sbi->current_reserved_blocks; + if (unlikely(buf->f_bfree <= sbi->unusable_block_count)) + buf->f_bfree = 0; + else + buf->f_bfree -= sbi->unusable_block_count; + if (buf->f_bfree > F2FS_OPTION(sbi).root_reserved_blocks) buf->f_bavail = buf->f_bfree - F2FS_OPTION(sbi).root_reserved_blocks; @@ -1366,6 +1398,9 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root) else if (F2FS_OPTION(sbi).alloc_mode == ALLOC_MODE_REUSE) seq_printf(seq, ",alloc_mode=%s", "reuse"); + if (test_opt(sbi, DISABLE_CHECKPOINT)) + seq_puts(seq, ",checkpoint=disable"); + if (F2FS_OPTION(sbi).fsync_mode == FSYNC_MODE_POSIX) seq_printf(seq, ",fsync_mode=%s", "posix"); else if (F2FS_OPTION(sbi).fsync_mode == FSYNC_MODE_STRICT) @@ -1394,6 +1429,7 @@ static void default_options(struct f2fs_sb_info *sbi) set_opt(sbi, EXTENT_CACHE); set_opt(sbi, NOHEAP); sbi->sb->s_flags |= MS_LAZYTIME; + clear_opt(sbi, DISABLE_CHECKPOINT); set_opt(sbi, FLUSH_MERGE); set_opt(sbi, DISCARD); if (f2fs_sb_has_blkzoned(sbi->sb)) @@ -1414,6 +1450,57 @@ static void default_options(struct f2fs_sb_info *sbi) #ifdef CONFIG_QUOTA static int f2fs_enable_quotas(struct super_block *sb); #endif + +static int f2fs_disable_checkpoint(struct f2fs_sb_info *sbi) +{ + struct cp_control cpc; + int err; + + sbi->sb->s_flags |= MS_ACTIVE; + + mutex_lock(&sbi->gc_mutex); + f2fs_update_time(sbi, DISABLE_TIME); + + while (!f2fs_time_over(sbi, DISABLE_TIME)) { + err = f2fs_gc(sbi, true, false, NULL_SEGNO); + if (err == -ENODATA) + break; + if (err && err != -EAGAIN) { + mutex_unlock(&sbi->gc_mutex); + return err; + } + } + mutex_unlock(&sbi->gc_mutex); + + err = sync_filesystem(sbi->sb); + if (err) + return err; + + if (f2fs_disable_cp_again(sbi)) + return -EAGAIN; + + mutex_lock(&sbi->gc_mutex); + cpc.reason = CP_PAUSE; + set_sbi_flag(sbi, SBI_CP_DISABLED); + f2fs_write_checkpoint(sbi, &cpc); + + sbi->unusable_block_count = 0; + mutex_unlock(&sbi->gc_mutex); + return 0; +} + +static void f2fs_enable_checkpoint(struct f2fs_sb_info *sbi) +{ + mutex_lock(&sbi->gc_mutex); + f2fs_dirty_to_prefree(sbi); + + clear_sbi_flag(sbi, SBI_CP_DISABLED); + set_sbi_flag(sbi, SBI_IS_DIRTY); + mutex_unlock(&sbi->gc_mutex); + + f2fs_sync_fs(sbi->sb, 1); +} + static int f2fs_remount(struct super_block *sb, int *flags, char *data) { struct f2fs_sb_info *sbi = F2FS_SB(sb); @@ -1423,6 +1510,8 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data) bool need_restart_gc = false; bool need_stop_gc = false; bool no_extent_cache = !test_opt(sbi, EXTENT_CACHE); + bool disable_checkpoint = test_opt(sbi, DISABLE_CHECKPOINT); + bool checkpoint_changed; #ifdef CONFIG_QUOTA int i, j; #endif @@ -1467,6 +1556,8 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data) err = parse_options(sb, data); if (err) goto restore_opts; + checkpoint_changed = + disable_checkpoint != test_opt(sbi, DISABLE_CHECKPOINT); /* * Previous and new state of filesystem is RO, @@ -1500,6 +1591,13 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data) goto restore_opts; } + if ((*flags & MS_RDONLY) && test_opt(sbi, DISABLE_CHECKPOINT)) { + err = -EINVAL; + f2fs_msg(sbi->sb, KERN_WARNING, + "disabling checkpoint not compatible with read-only"); + goto restore_opts; + } + /* * We stop the GC thread if FS is mounted as RO * or if background_gc = off is passed in mount @@ -1528,6 +1626,16 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data) clear_sbi_flag(sbi, SBI_IS_CLOSE); } + if (checkpoint_changed) { + if (test_opt(sbi, DISABLE_CHECKPOINT)) { + err = f2fs_disable_checkpoint(sbi); + if (err) + goto restore_gc; + } else { + f2fs_enable_checkpoint(sbi); + } + } + /* * We stop issue flush thread if FS is mounted as RO * or if flush_merge is not passed in mount option. @@ -2489,6 +2597,7 @@ static void init_sb_info(struct f2fs_sb_info *sbi) sbi->interval_time[REQ_TIME] = DEF_IDLE_INTERVAL; sbi->interval_time[DISCARD_TIME] = DEF_IDLE_INTERVAL; sbi->interval_time[GC_TIME] = DEF_IDLE_INTERVAL; + sbi->interval_time[DISABLE_TIME] = DEF_DISABLE_INTERVAL; clear_sbi_flag(sbi, SBI_NEED_FSCK); for (i = 0; i < NR_COUNT_TYPE; i++) @@ -3100,6 +3209,9 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent) if (err) goto free_meta; + if (unlikely(is_set_ckpt_flags(sbi, CP_DISABLED_FLAG))) + goto skip_recovery; + /* recover fsynced data */ if (!test_opt(sbi, DISABLE_ROLL_FORWARD)) { /* @@ -3139,6 +3251,14 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent) /* f2fs_recover_fsync_data() cleared this already */ clear_sbi_flag(sbi, SBI_POR_DOING); + if (test_opt(sbi, DISABLE_CHECKPOINT)) { + err = f2fs_disable_checkpoint(sbi); + if (err) + goto free_meta; + } else if (is_set_ckpt_flags(sbi, CP_DISABLED_FLAG)) { + f2fs_enable_checkpoint(sbi); + } + /* * If filesystem is not mounted as read-only then * do start the gc_thread. diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h index a5a5bd94551e..6c2786db5154 100644 --- a/include/linux/f2fs_fs.h +++ b/include/linux/f2fs_fs.h @@ -116,6 +116,7 @@ struct f2fs_super_block { /* * For checkpoint */ +#define CP_DISABLED_FLAG 0x00001000 #define CP_LARGE_NAT_BITMAP_FLAG 0x00000400 #define CP_NOCRC_RECOVERY_FLAG 0x00000200 #define CP_TRIMMED_FLAG 0x00000100 From 4bb9f775d5ec065c2ac04931839466a6eb34c05f Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Thu, 13 Sep 2018 07:40:53 +0800 Subject: [PATCH 230/812] f2fs: submit cached bio to avoid endless PageWriteback When migrating encrypted block from background GC thread, we only add them into f2fs inner bio cache, but forget to submit the cached bio, it may cause potential deadlock when we are waiting page writebacked, fix it. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 2 +- fs/f2fs/gc.c | 71 +++++++++++++++++++++++++++++++++++--------------- fs/f2fs/node.c | 13 ++++++--- 3 files changed, 61 insertions(+), 25 deletions(-) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 646012a70f4e..eb83794e9b38 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -2996,7 +2996,7 @@ struct page *f2fs_new_node_page(struct dnode_of_data *dn, unsigned int ofs); void f2fs_ra_node_page(struct f2fs_sb_info *sbi, nid_t nid); struct page *f2fs_get_node_page(struct f2fs_sb_info *sbi, pgoff_t nid); struct page *f2fs_get_node_page_ra(struct page *parent, int start); -void f2fs_move_node_page(struct page *node_page, int gc_type); +int f2fs_move_node_page(struct page *node_page, int gc_type); int f2fs_fsync_node_pages(struct f2fs_sb_info *sbi, struct inode *inode, struct writeback_control *wbc, bool atomic, unsigned int *seq_id); diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index 6cdcd79baeb7..843430b55a73 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -473,7 +473,7 @@ static int check_valid_map(struct f2fs_sb_info *sbi, * On validity, copy that node with cold status, otherwise (invalid node) * ignore that. */ -static void gc_node_segment(struct f2fs_sb_info *sbi, +static int gc_node_segment(struct f2fs_sb_info *sbi, struct f2fs_summary *sum, unsigned int segno, int gc_type) { struct f2fs_summary *entry; @@ -481,6 +481,7 @@ static void gc_node_segment(struct f2fs_sb_info *sbi, int off; int phase = 0; bool fggc = (gc_type == FG_GC); + int submitted = 0; start_addr = START_BLOCK(sbi, segno); @@ -494,10 +495,11 @@ static void gc_node_segment(struct f2fs_sb_info *sbi, nid_t nid = le32_to_cpu(entry->nid); struct page *node_page; struct node_info ni; + int err; /* stop BG_GC if there is not enough free sections. */ if (gc_type == BG_GC && has_not_enough_free_secs(sbi, 0, 0)) - return; + return submitted; if (check_valid_map(sbi, segno, off) == 0) continue; @@ -534,7 +536,9 @@ static void gc_node_segment(struct f2fs_sb_info *sbi, continue; } - f2fs_move_node_page(node_page, gc_type); + err = f2fs_move_node_page(node_page, gc_type); + if (!err && gc_type == FG_GC) + submitted++; stat_inc_node_blk_count(sbi, 1, gc_type); } @@ -543,6 +547,7 @@ static void gc_node_segment(struct f2fs_sb_info *sbi, if (fggc) atomic_dec(&sbi->wb_sync_req[NODE]); + return submitted; } /* @@ -678,7 +683,7 @@ static int ra_data_block(struct inode *inode, pgoff_t index) * Move data block via META_MAPPING while keeping locked data page. * This can be used to move blocks, aka LBAs, directly on disk. */ -static void move_data_block(struct inode *inode, block_t bidx, +static int move_data_block(struct inode *inode, block_t bidx, int gc_type, unsigned int segno, int off) { struct f2fs_io_info fio = { @@ -697,25 +702,29 @@ static void move_data_block(struct inode *inode, block_t bidx, struct node_info ni; struct page *page, *mpage; block_t newaddr; - int err; + int err = 0; bool lfs_mode = test_opt(fio.sbi, LFS); /* do not read out */ page = f2fs_grab_cache_page(inode->i_mapping, bidx, false); if (!page) - return; + return -ENOMEM; - if (!check_valid_map(F2FS_I_SB(inode), segno, off)) + if (!check_valid_map(F2FS_I_SB(inode), segno, off)) { + err = -ENOENT; goto out; + } if (f2fs_is_atomic_file(inode)) { F2FS_I(inode)->i_gc_failures[GC_FAILURE_ATOMIC]++; F2FS_I_SB(inode)->skipped_atomic_files[gc_type]++; + err = -EAGAIN; goto out; } if (f2fs_is_pinned_file(inode)) { f2fs_pin_file_control(inode, true); + err = -EAGAIN; goto out; } @@ -726,6 +735,7 @@ static void move_data_block(struct inode *inode, block_t bidx, if (unlikely(dn.data_blkaddr == NULL_ADDR)) { ClearPageUptodate(page); + err = -ENOENT; goto put_out; } @@ -808,6 +818,7 @@ static void move_data_block(struct inode *inode, block_t bidx, fio.new_blkaddr = newaddr; f2fs_submit_page_write(&fio); if (fio.retry) { + err = -EAGAIN; if (PageWriteback(fio.encrypted_page)) end_page_writeback(fio.encrypted_page); goto put_page_out; @@ -831,34 +842,42 @@ static void move_data_block(struct inode *inode, block_t bidx, f2fs_put_dnode(&dn); out: f2fs_put_page(page, 1); + return err; } -static void move_data_page(struct inode *inode, block_t bidx, int gc_type, +static int move_data_page(struct inode *inode, block_t bidx, int gc_type, unsigned int segno, int off) { struct page *page; + int err = 0; page = f2fs_get_lock_data_page(inode, bidx, true); if (IS_ERR(page)) - return; + return PTR_ERR(page); - if (!check_valid_map(F2FS_I_SB(inode), segno, off)) + if (!check_valid_map(F2FS_I_SB(inode), segno, off)) { + err = -ENOENT; goto out; + } if (f2fs_is_atomic_file(inode)) { F2FS_I(inode)->i_gc_failures[GC_FAILURE_ATOMIC]++; F2FS_I_SB(inode)->skipped_atomic_files[gc_type]++; + err = -EAGAIN; goto out; } if (f2fs_is_pinned_file(inode)) { if (gc_type == FG_GC) f2fs_pin_file_control(inode, true); + err = -EAGAIN; goto out; } if (gc_type == BG_GC) { - if (PageWriteback(page)) + if (PageWriteback(page)) { + err = -EAGAIN; goto out; + } set_page_dirty(page); set_cold_data(page); } else { @@ -876,7 +895,6 @@ static void move_data_page(struct inode *inode, block_t bidx, int gc_type, .io_type = FS_GC_DATA_IO, }; bool is_dirty = PageDirty(page); - int err; retry: set_page_dirty(page); @@ -901,6 +919,7 @@ static void move_data_page(struct inode *inode, block_t bidx, int gc_type, } out: f2fs_put_page(page, 1); + return err; } /* @@ -910,7 +929,7 @@ static void move_data_page(struct inode *inode, block_t bidx, int gc_type, * If the parent node is not valid or the data block address is different, * the victim data block is ignored. */ -static void gc_data_segment(struct f2fs_sb_info *sbi, struct f2fs_summary *sum, +static int gc_data_segment(struct f2fs_sb_info *sbi, struct f2fs_summary *sum, struct gc_inode_list *gc_list, unsigned int segno, int gc_type) { struct super_block *sb = sbi->sb; @@ -918,6 +937,7 @@ static void gc_data_segment(struct f2fs_sb_info *sbi, struct f2fs_summary *sum, block_t start_addr; int off; int phase = 0; + int submitted = 0; start_addr = START_BLOCK(sbi, segno); @@ -934,7 +954,7 @@ static void gc_data_segment(struct f2fs_sb_info *sbi, struct f2fs_summary *sum, /* stop BG_GC if there is not enough free sections. */ if (gc_type == BG_GC && has_not_enough_free_secs(sbi, 0, 0)) - return; + return submitted; if (check_valid_map(sbi, segno, off) == 0) continue; @@ -1006,6 +1026,7 @@ static void gc_data_segment(struct f2fs_sb_info *sbi, struct f2fs_summary *sum, if (inode) { struct f2fs_inode_info *fi = F2FS_I(inode); bool locked = false; + int err; if (S_ISREG(inode->i_mode)) { if (!down_write_trylock(&fi->i_gc_rwsem[READ])) @@ -1025,12 +1046,16 @@ static void gc_data_segment(struct f2fs_sb_info *sbi, struct f2fs_summary *sum, start_bidx = f2fs_start_bidx_of_node(nofs, inode) + ofs_in_node; if (f2fs_post_read_required(inode)) - move_data_block(inode, start_bidx, gc_type, - segno, off); + err = move_data_block(inode, start_bidx, + gc_type, segno, off); else - move_data_page(inode, start_bidx, gc_type, + err = move_data_page(inode, start_bidx, gc_type, segno, off); + if (!err && (gc_type == FG_GC || + f2fs_post_read_required(inode))) + submitted++; + if (locked) { up_write(&fi->i_gc_rwsem[WRITE]); up_write(&fi->i_gc_rwsem[READ]); @@ -1042,6 +1067,8 @@ static void gc_data_segment(struct f2fs_sb_info *sbi, struct f2fs_summary *sum, if (++phase < 5) goto next_step; + + return submitted; } static int __get_victim(struct f2fs_sb_info *sbi, unsigned int *victim, @@ -1069,6 +1096,7 @@ static int do_garbage_collect(struct f2fs_sb_info *sbi, int seg_freed = 0; unsigned char type = IS_DATASEG(get_seg_entry(sbi, segno)->type) ? SUM_TYPE_DATA : SUM_TYPE_NODE; + int submitted = 0; /* readahead multi ssa blocks those have contiguous address */ if (sbi->segs_per_sec > 1) @@ -1124,10 +1152,11 @@ static int do_garbage_collect(struct f2fs_sb_info *sbi, * - lock_page(sum_page) */ if (type == SUM_TYPE_NODE) - gc_node_segment(sbi, sum->entries, segno, gc_type); - else - gc_data_segment(sbi, sum->entries, gc_list, segno, + submitted += gc_node_segment(sbi, sum->entries, segno, gc_type); + else + submitted += gc_data_segment(sbi, sum->entries, gc_list, + segno, gc_type); stat_inc_seg_count(sbi, type, gc_type); @@ -1138,7 +1167,7 @@ static int do_garbage_collect(struct f2fs_sb_info *sbi, f2fs_put_page(sum_page, 0); } - if (gc_type == FG_GC) + if (submitted) f2fs_submit_merged_write(sbi, (type == SUM_TYPE_NODE) ? NODE : DATA); diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index c1936432154f..b76dd5a06c29 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -1587,8 +1587,10 @@ static int __write_node_page(struct page *page, bool atomic, bool *submitted, return AOP_WRITEPAGE_ACTIVATE; } -void f2fs_move_node_page(struct page *node_page, int gc_type) +int f2fs_move_node_page(struct page *node_page, int gc_type) { + int err = 0; + if (gc_type == FG_GC) { struct writeback_control wbc = { .sync_mode = WB_SYNC_ALL, @@ -1600,12 +1602,16 @@ void f2fs_move_node_page(struct page *node_page, int gc_type) f2fs_wait_on_page_writeback(node_page, NODE, true); f2fs_bug_on(F2FS_P_SB(node_page), PageWriteback(node_page)); - if (!clear_page_dirty_for_io(node_page)) + if (!clear_page_dirty_for_io(node_page)) { + err = -EAGAIN; goto out_page; + } if (__write_node_page(node_page, false, NULL, - &wbc, false, FS_GC_NODE_IO, NULL)) + &wbc, false, FS_GC_NODE_IO, NULL)) { + err = -EAGAIN; unlock_page(node_page); + } goto release_page; } else { /* set page dirty and write it */ @@ -1616,6 +1622,7 @@ void f2fs_move_node_page(struct page *node_page, int gc_type) unlock_page(node_page); release_page: f2fs_put_page(node_page, 0); + return err; } static int f2fs_write_node_page(struct page *page, From 329676473320d309bb267267645b1717ef4bfa4e Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Wed, 3 Oct 2018 22:32:44 +0800 Subject: [PATCH 231/812] f2fs: fix to recover cold bit of inode block during POR Testcase to reproduce this bug: 1. mkfs.f2fs /dev/sdd 2. mount -t f2fs /dev/sdd /mnt/f2fs 3. touch /mnt/f2fs/file 4. sync 5. chattr +A /mnt/f2fs/file 6. xfs_io -f /mnt/f2fs/file -c "fsync" 7. godown /mnt/f2fs 8. umount /mnt/f2fs 9. mount -t f2fs /dev/sdd /mnt/f2fs 10. chattr -A /mnt/f2fs/file 11. xfs_io -f /mnt/f2fs/file -c "fsync" 12. umount /mnt/f2fs 13. mount -t f2fs /dev/sdd /mnt/f2fs 14. lsattr /mnt/f2fs/file -----------------N- /mnt/f2fs/file But actually, we expect the corrct result is: -------A---------N- /mnt/f2fs/file The reason is in step 9) we missed to recover cold bit flag in inode block, so later, in fsync, we will skip write inode block due to below condition check, result in lossing data in another SPOR. f2fs_fsync_node_pages() if (!IS_DNODE(page) || !is_cold_node(page)) continue; Note that, I guess that some non-dir inode has already lost cold bit during POR, so in order to reenable recovery for those inode, let's try to recover cold bit in f2fs_iget() to save more fsynced data. Fixes: c56675750d7c ("f2fs: remove unneeded set_cold_node()") Cc: 4.17+ Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/inode.c | 6 ++++++ fs/f2fs/node.c | 2 +- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index ab845b558dd5..4408feee9488 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -365,6 +365,12 @@ static int do_read_inode(struct inode *inode) if (f2fs_has_inline_data(inode) && !f2fs_exist_data(inode)) __recover_inline_status(inode, node_page); + /* try to recover cold bit for non-dir inode */ + if (!S_ISDIR(inode->i_mode) && !is_cold_node(node_page)) { + set_cold_node(node_page, false); + set_page_dirty(node_page); + } + /* get rdev by using inline_info */ __get_inode_rdev(inode, ri); diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index b76dd5a06c29..adb6f8ee686c 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -2552,7 +2552,7 @@ int f2fs_recover_inode_page(struct f2fs_sb_info *sbi, struct page *page) if (!PageUptodate(ipage)) SetPageUptodate(ipage); fill_node_footer(ipage, ino, ino, 0, true); - set_cold_node(page, false); + set_cold_node(ipage, false); src = F2FS_INODE(page); dst = F2FS_INODE(ipage); From 9f2917f2bb5d8a77e79a356128b2e65fb98b5e18 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Thu, 4 Oct 2018 11:15:18 +0800 Subject: [PATCH 232/812] f2fs: shrink sbi->sb_lock coverage in set_file_temperature() file_set_{cold,hot} doesn't need holding sbi->sb_lock, so moving them out of the lock. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/namei.c | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c index b2cc6c315f62..7b522c85a1a3 100644 --- a/fs/f2fs/namei.c +++ b/fs/f2fs/namei.c @@ -182,16 +182,19 @@ static inline void set_file_temperature(struct f2fs_sb_info *sbi, struct inode * hot_count = sbi->raw_super->hot_ext_count; for (i = 0; i < cold_count + hot_count; i++) { - if (!is_extension_exist(name, extlist[i])) - continue; - if (i < cold_count) - file_set_cold(inode); - else - file_set_hot(inode); - break; + if (is_extension_exist(name, extlist[i])) + break; } up_read(&sbi->sb_lock); + + if (i == cold_count + hot_count) + return; + + if (i < cold_count) + file_set_cold(inode); + else + file_set_hot(inode); } int f2fs_update_extension_list(struct f2fs_sb_info *sbi, const char *name, From 3dd704fff9483e9aa682ebd6487928ee9c0cf76c Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Thu, 4 Oct 2018 11:15:19 +0800 Subject: [PATCH 233/812] f2fs: remove unused sbi->trigger_ssr_threshold Commit a2a12b679f36 ("f2fs: export SSR allocation threshold") introduced two threshold .min_ssr_sections and .trigger_ssr_threshold, but only .min_ssr_sections is used, so just remove redundant one for cleanup. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 1 - 1 file changed, 1 deletion(-) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index eb83794e9b38..2a5271717dc7 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1285,7 +1285,6 @@ struct f2fs_sb_info { unsigned int total_valid_node_count; /* valid node block count */ loff_t max_file_blocks; /* max block index of file */ int dir_level; /* directory level */ - unsigned int trigger_ssr_threshold; /* threshold to trigger ssr */ int readdir_ra; /* readahead inode in readdir */ block_t user_block_count; /* # of user blocks */ From 2dc22029a7e51dade5cddc06acc29bcec60263d2 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Thu, 4 Oct 2018 11:15:20 +0800 Subject: [PATCH 234/812] f2fs: remove unneeded disable_nat_bits() Commit 7735730d39d7 ("f2fs: fix to propagate error from __get_meta_page()") added disable_nat_bits() in error path of __get_nat_bitmaps(), but it's unneeded, beause we will fail mount, we won't have chance to change nid usage status w/o nat full/empty bitmaps. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/node.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index adb6f8ee686c..08edc0930831 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -2882,10 +2882,8 @@ static int __get_nat_bitmaps(struct f2fs_sb_info *sbi) struct page *page; page = f2fs_get_meta_page(sbi, nat_bits_addr++); - if (IS_ERR(page)) { - disable_nat_bits(sbi, true); + if (IS_ERR(page)) return PTR_ERR(page); - } memcpy(nm_i->nat_bits + (i << F2FS_BLKSIZE_BITS), page_address(page), F2FS_BLKSIZE); From 4aa5ef7fb109be395e9ebe06804d3d77c4bc8b68 Mon Sep 17 00:00:00 2001 From: Sahitya Tummala Date: Fri, 5 Oct 2018 10:47:39 +0530 Subject: [PATCH 235/812] f2fs: do not update REQ_TIME in case of error conditions The REQ_TIME should be updated only in case of success cases as followed at all other places in the file system. Signed-off-by: Sahitya Tummala Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/dir.c | 2 +- fs/f2fs/file.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c index a3302e2888dc..317820239111 100644 --- a/fs/f2fs/dir.c +++ b/fs/f2fs/dir.c @@ -655,9 +655,9 @@ int f2fs_do_tmpfile(struct inode *inode, struct inode *dir) f2fs_put_page(page, 1); clear_inode_flag(inode, FI_NEW_INODE); + f2fs_update_time(F2FS_I_SB(inode), REQ_TIME); fail: up_write(&F2FS_I(inode)->i_sem); - f2fs_update_time(F2FS_I_SB(inode), REQ_TIME); return err; } diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 834f87e75dff..040c9667347e 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -109,6 +109,7 @@ static int f2fs_vm_page_mkwrite(struct vm_area_struct *vma, SetPageUptodate(page); f2fs_update_iostat(sbi, APP_MAPPED_IO, F2FS_BLKSIZE); + f2fs_update_time(sbi, REQ_TIME); trace_f2fs_vm_page_mkwrite(page, DATA); out_sem: @@ -117,7 +118,6 @@ static int f2fs_vm_page_mkwrite(struct vm_area_struct *vma, f2fs_balance_fs(sbi, dn.node_changed); sb_end_pagefault(inode->i_sb); - f2fs_update_time(sbi, REQ_TIME); err: return block_page_mkwrite_return(err); } From fe2b3bc0fc9f37f6e672cce4d969c8b5020f4380 Mon Sep 17 00:00:00 2001 From: Sahitya Tummala Date: Fri, 5 Oct 2018 10:47:40 +0530 Subject: [PATCH 236/812] f2fs: update REQ_TIME in f2fs_cross_rename() Update REQ_TIME in the missing path - f2fs_cross_rename(). Signed-off-by: Sahitya Tummala [Jaegeuk Kim: add it in f2fs_rename()] Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/namei.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c index 7b522c85a1a3..2d2924696174 100644 --- a/fs/f2fs/namei.c +++ b/fs/f2fs/namei.c @@ -1001,6 +1001,8 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry, if (IS_DIRSYNC(old_dir) || IS_DIRSYNC(new_dir)) f2fs_sync_fs(sbi->sb, 1); + + f2fs_update_time(sbi, REQ_TIME); return 0; put_out_dir: @@ -1158,6 +1160,8 @@ static int f2fs_cross_rename(struct inode *old_dir, struct dentry *old_dentry, if (IS_DIRSYNC(old_dir) || IS_DIRSYNC(new_dir)) f2fs_sync_fs(sbi->sb, 1); + + f2fs_update_time(sbi, REQ_TIME); return 0; out_new_dir: if (new_dir_entry) { From 05d4dcf63d564bf54f3bb2f310c1eff31ba5de02 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Tue, 2 Oct 2018 17:20:58 -0700 Subject: [PATCH 237/812] f2fs: allow to mount, if quota is failed Since we can use the filesystem without quotas till next boot. Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/super.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 41e0609e7e30..53d549f0145a 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -3197,11 +3197,9 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent) /* Enable quota usage during mount */ if (f2fs_sb_has_quota_ino(sb) && !f2fs_readonly(sb)) { err = f2fs_enable_quotas(sb); - if (err) { + if (err) f2fs_msg(sb, KERN_ERR, "Cannot turn on quotas: error %d", err); - goto free_sysfs; - } } #endif /* if there are nt orphan nodes free them */ @@ -3302,9 +3300,6 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent) * falls into an infinite loop in f2fs_sync_meta_pages(). */ truncate_inode_pages_final(META_MAPPING(sbi)); -#ifdef CONFIG_QUOTA -free_sysfs: -#endif f2fs_unregister_sysfs(sbi); free_root_inode: dput(sb->s_root); From c4b87d0c1ca217e4f925034b213e57995e2bd986 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 16 Oct 2018 08:34:50 -0600 Subject: [PATCH 238/812] f2fs: remove request_list check in is_idle() This doesn't work on stacked devices, and it doesn't work on blk-mq devices. The request_list is only used on legacy, which we don't have much of anymore, and soon won't have any of. Kill the check. Cc: Jaegeuk Kim Cc: linux-f2fs-devel@lists.sourceforge.net Signed-off-by: Jens Axboe Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 7 ------- 1 file changed, 7 deletions(-) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 2a5271717dc7..90c99b7013bd 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1459,13 +1459,6 @@ static inline unsigned int f2fs_time_to_wait(struct f2fs_sb_info *sbi, static inline bool is_idle(struct f2fs_sb_info *sbi, int type) { - struct block_device *bdev = sbi->sb->s_bdev; - struct request_queue *q = bdev_get_queue(bdev); - struct request_list *rl = &q->root_rl; - - if (rl->count[BLK_RW_SYNC] || rl->count[BLK_RW_ASYNC]) - return false; - return f2fs_time_over(sbi, type); } From fb363c5db4200af3eaaa1665aa2290f8a4121b24 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Mon, 22 Oct 2018 09:12:51 +0800 Subject: [PATCH 239/812] f2fs: fix to account IO correctly Below race can cause reversed reference on dirty count, fix it by relocating __submit_bio() and inc_page_count(). Thread A Thread B - f2fs_inplace_write_data - f2fs_submit_page_bio - __submit_bio - f2fs_write_end_io - dec_page_count - inc_page_count Cc: Fixes: d1b3e72d5490 ("f2fs: submit bio of in-place-update pages") Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index cdfdd6919dcc..49cfa4859291 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -459,10 +459,10 @@ int f2fs_submit_page_bio(struct f2fs_io_info *fio) } bio_set_op_attrs(bio, fio->op, fio->op_flags); - __submit_bio(fio->sbi, bio, fio->type); - if (!is_read_io(fio->op)) inc_page_count(fio->sbi, WB_DATA_TYPE(fio->page)); + + __submit_bio(fio->sbi, bio, fio->type); return 0; } From 9204be8a482b55f71abce48baddb24c8a8f0e424 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Mon, 22 Oct 2018 23:24:28 +0800 Subject: [PATCH 240/812] f2fs: fix to account IO correctly for cgroup writeback Now, we have supported cgroup writeback, it depends on correctly IO account of specified filesystem. But in commit d1b3e72d5490 ("f2fs: submit bio of in-place-update pages"), we split write paths from f2fs_submit_page_mbio() to two: - f2fs_submit_page_bio() for IPU path - f2fs_submit_page_bio() for OPU path But still we account write IO only in f2fs_submit_page_mbio(), result in incorrect IO account, fix it by adding missing IO account in IPU path. Fixes: d1b3e72d5490 ("f2fs: submit bio of in-place-update pages") Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 49cfa4859291..326b6955bd2a 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -457,6 +457,10 @@ int f2fs_submit_page_bio(struct f2fs_io_info *fio) bio_put(bio); return -EFAULT; } + + if (fio->io_wbc && !is_read_io(fio->op)) + wbc_account_io(fio->io_wbc, page, PAGE_SIZE); + bio_set_op_attrs(bio, fio->op, fio->op_flags); if (!is_read_io(fio->op)) From 003e915c0196b0f6a913947c3f9cf51ef54a26f7 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Tue, 16 Oct 2018 10:20:53 -0700 Subject: [PATCH 241/812] f2fs: account read IOs and use IO counts for is_idle This patch adds issued read IO counts which is under block layer. Chao modified a bit, since: Below race can cause reversed reference on F2FS_RD_DATA, there is the same issue in f2fs_submit_page_bio(), fix them by relocate __submit_bio() and inc_page_count. Thread A Thread B - f2fs_write_begin - f2fs_submit_page_read - __submit_bio - f2fs_read_end_io - __read_end_io - dec_page_count(, F2FS_RD_DATA) - inc_page_count(, F2FS_RD_DATA) Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 24 ++++++++++++++++++++++-- fs/f2fs/debug.c | 7 ++++++- fs/f2fs/f2fs.h | 22 ++++++++++++++++------ 3 files changed, 44 insertions(+), 9 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 326b6955bd2a..a762d4e5d2c6 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -51,6 +51,23 @@ static bool __is_cp_guaranteed(struct page *page) return false; } +static enum count_type __read_io_type(struct page *page) +{ + struct address_space *mapping = page->mapping; + + if (mapping) { + struct inode *inode = mapping->host; + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); + + if (inode->i_ino == F2FS_META_INO(sbi)) + return F2FS_RD_META; + + if (inode->i_ino == F2FS_NODE_INO(sbi)) + return F2FS_RD_NODE; + } + return F2FS_RD_DATA; +} + /* postprocessing steps for read bios */ enum bio_post_read_step { STEP_INITIAL = 0, @@ -81,6 +98,7 @@ static void __read_end_io(struct bio *bio) } else { SetPageUptodate(page); } + dec_page_count(F2FS_P_SB(page), __read_io_type(page)); unlock_page(page); } if (bio->bi_private) @@ -463,8 +481,8 @@ int f2fs_submit_page_bio(struct f2fs_io_info *fio) bio_set_op_attrs(bio, fio->op, fio->op_flags); - if (!is_read_io(fio->op)) - inc_page_count(fio->sbi, WB_DATA_TYPE(fio->page)); + inc_page_count(fio->sbi, is_read_io(fio->op) ? + __read_io_type(page): WB_DATA_TYPE(fio->page)); __submit_bio(fio->sbi, bio, fio->type); return 0; @@ -595,6 +613,7 @@ static int f2fs_submit_page_read(struct inode *inode, struct page *page, return -EFAULT; } ClearPageError(page); + inc_page_count(F2FS_I_SB(inode), F2FS_RD_DATA); __submit_bio(F2FS_I_SB(inode), bio, DATA); return 0; } @@ -1582,6 +1601,7 @@ static int f2fs_mpage_readpages(struct address_space *mapping, if (bio_add_page(bio, page, blocksize, 0) < blocksize) goto submit_and_realloc; + inc_page_count(F2FS_I_SB(inode), F2FS_RD_DATA); ClearPageError(page); last_block_in_bio = block_nr; goto next_page; diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c index 026e10f30889..139b4d5c83d5 100644 --- a/fs/f2fs/debug.c +++ b/fs/f2fs/debug.c @@ -55,6 +55,9 @@ static void update_general_status(struct f2fs_sb_info *sbi) si->max_vw_cnt = atomic_read(&sbi->max_vw_cnt); si->nr_wb_cp_data = get_pages(sbi, F2FS_WB_CP_DATA); si->nr_wb_data = get_pages(sbi, F2FS_WB_DATA); + si->nr_rd_data = get_pages(sbi, F2FS_RD_DATA); + si->nr_rd_node = get_pages(sbi, F2FS_RD_NODE); + si->nr_rd_meta = get_pages(sbi, F2FS_RD_META); if (SM_I(sbi) && SM_I(sbi)->fcc_info) { si->nr_flushed = atomic_read(&SM_I(sbi)->fcc_info->issued_flush); @@ -371,7 +374,9 @@ static int stat_show(struct seq_file *s, void *v) seq_printf(s, " - Inner Struct Count: tree: %d(%d), node: %d\n", si->ext_tree, si->zombie_tree, si->ext_node); seq_puts(s, "\nBalancing F2FS Async:\n"); - seq_printf(s, " - IO (CP: %4d, Data: %4d, Flush: (%4d %4d %4d), " + seq_printf(s, " - IO_R (Data: %4d, Node: %4d, Meta: %4d\n", + si->nr_rd_data, si->nr_rd_node, si->nr_rd_meta); + seq_printf(s, " - IO_W (CP: %4d, Data: %4d, Flush: (%4d %4d %4d), " "Discard: (%4d %4d)) cmd: %4d undiscard:%4u\n", si->nr_wb_cp_data, si->nr_wb_data, si->nr_flushing, si->nr_flushed, diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 90c99b7013bd..07fa071a66f5 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1015,6 +1015,9 @@ enum count_type { F2FS_DIRTY_IMETA, F2FS_WB_CP_DATA, F2FS_WB_DATA, + F2FS_RD_DATA, + F2FS_RD_NODE, + F2FS_RD_META, NR_COUNT_TYPE, }; @@ -1457,11 +1460,6 @@ static inline unsigned int f2fs_time_to_wait(struct f2fs_sb_info *sbi, return wait_ms; } -static inline bool is_idle(struct f2fs_sb_info *sbi, int type) -{ - return f2fs_time_over(sbi, type); -} - /* * Inline functions */ @@ -1852,7 +1850,9 @@ static inline void inc_page_count(struct f2fs_sb_info *sbi, int count_type) atomic_inc(&sbi->nr_pages[count_type]); if (count_type == F2FS_DIRTY_DATA || count_type == F2FS_INMEM_PAGES || - count_type == F2FS_WB_CP_DATA || count_type == F2FS_WB_DATA) + count_type == F2FS_WB_CP_DATA || count_type == F2FS_WB_DATA || + count_type == F2FS_RD_DATA || count_type == F2FS_RD_NODE || + count_type == F2FS_RD_META) return; set_sbi_flag(sbi, SBI_IS_DIRTY); @@ -2189,6 +2189,15 @@ static inline struct bio *f2fs_bio_alloc(struct f2fs_sb_info *sbi, return bio_alloc(GFP_KERNEL, npages); } +static inline bool is_idle(struct f2fs_sb_info *sbi, int type) +{ + if (get_pages(sbi, F2FS_RD_DATA) || get_pages(sbi, F2FS_RD_NODE) || + get_pages(sbi, F2FS_RD_META) || get_pages(sbi, F2FS_WB_DATA) || + get_pages(sbi, F2FS_WB_CP_DATA)) + return false; + return f2fs_time_over(sbi, type); +} + static inline void f2fs_radix_tree_insert(struct radix_tree_root *root, unsigned long index, void *item) { @@ -3199,6 +3208,7 @@ struct f2fs_stat_info { int free_nids, avail_nids, alloc_nids; int total_count, utilization; int bg_gc, nr_wb_cp_data, nr_wb_data; + int nr_rd_data, nr_rd_node, nr_rd_meta; unsigned int io_skip_bggc, other_skip_bggc; int nr_flushing, nr_flushed, flush_list_empty; int nr_discarding, nr_discarded; From 0246fe9d2e9ad87b5ec7186a09566b8ce96b19cf Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Tue, 16 Oct 2018 19:30:13 -0700 Subject: [PATCH 242/812] Revert "f2fs: fix to clear PG_checked flag in set_page_dirty()" This reverts commit 66110abc4c931f879d70e83e1281f891699364bf. If we clear the cold data flag out of the writeback flow, we can miscount -1 by end_io, which incurs a deadlock caused by all I/Os being blocked during heavy GC. Balancing F2FS Async: - IO (CP: 1, Data: -1, Flush: ( 0 0 1), Discard: ( ... GC thread: IRQ - move_data_page() - set_page_dirty() - clear_cold_data() - f2fs_write_end_io() - type = WB_DATA_TYPE(page); here, we get wrong type - dec_page_count(sbi, type); - f2fs_wait_on_page_writeback() Cc: Reported-and-Tested-by: Park Ju Hyung Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index a762d4e5d2c6..dacfc4aa7e38 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -2650,10 +2650,6 @@ static int f2fs_set_data_page_dirty(struct page *page) if (!PageUptodate(page)) SetPageUptodate(page); - /* don't remain PG_checked flag which was set during GC */ - if (is_cold_data(page)) - clear_cold_data(page); - if (f2fs_is_atomic_file(inode) && !f2fs_is_commit_atomic_write(inode)) { if (!IS_ATOMIC_WRITTEN_PAGE(page)) { f2fs_register_inmem_page(inode, page); From fbfc2e102ca7fca46eecc8a73d2b7d2a5d81c9c7 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Fri, 27 Jul 2018 18:15:16 +0800 Subject: [PATCH 243/812] f2fs: fix to spread clear_cold_data() We need to drop PG_checked flag on page as well when we clear PG_uptodate flag, in order to avoid treating the page as GCing one later. Signed-off-by: Weichao Guo Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 8 +++++++- fs/f2fs/dir.c | 1 + fs/f2fs/segment.c | 4 +++- 3 files changed, 11 insertions(+), 2 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index dacfc4aa7e38..231876a9d647 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -1812,6 +1812,7 @@ int f2fs_do_write_data_page(struct f2fs_io_info *fio) /* This page is already truncated */ if (fio->old_blkaddr == NULL_ADDR) { ClearPageUptodate(page); + clear_cold_data(page); goto out_writepage; } got_it: @@ -1987,8 +1988,10 @@ static int __write_data_page(struct page *page, bool *submitted, out: inode_dec_dirty_pages(inode); - if (err) + if (err) { ClearPageUptodate(page); + clear_cold_data(page); + } if (wbc->for_reclaim) { f2fs_submit_merged_write_cond(sbi, NULL, page, 0, DATA); @@ -2617,6 +2620,8 @@ void f2fs_invalidate_page(struct page *page, unsigned int offset, } } + clear_cold_data(page); + /* This is atomic written page, keep Private */ if (IS_ATOMIC_WRITTEN_PAGE(page)) return f2fs_drop_inmem_page(inode, page); @@ -2635,6 +2640,7 @@ int f2fs_release_page(struct page *page, gfp_t wait) if (IS_ATOMIC_WRITTEN_PAGE(page)) return 0; + clear_cold_data(page); set_page_private(page, 0); ClearPagePrivate(page); return 1; diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c index 317820239111..eedc07b46a52 100644 --- a/fs/f2fs/dir.c +++ b/fs/f2fs/dir.c @@ -730,6 +730,7 @@ void f2fs_delete_entry(struct f2fs_dir_entry *dentry, struct page *page, clear_page_dirty_for_io(page); ClearPagePrivate(page); ClearPageUptodate(page); + clear_cold_data(page); inode_dec_dirty_pages(dir); f2fs_remove_dirty_inode(dir); } diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 1b2ebd9b986d..e4305fcb6265 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -266,8 +266,10 @@ static int __revoke_inmem_pages(struct inode *inode, } next: /* we don't need to invalidate this in the sccessful status */ - if (drop || recover) + if (drop || recover) { ClearPageUptodate(page); + clear_cold_data(page); + } set_page_private(page, 0); ClearPagePrivate(page); f2fs_put_page(page, 1); From 6a23a85581000e723c45ff2541237adc47822702 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Sun, 7 Oct 2018 19:06:15 +0800 Subject: [PATCH 244/812] f2fs: spread f2fs_set_inode_flags() This patch changes codes as below: - use f2fs_set_inode_flags() to update i_flags atomically to avoid potential race. - synchronize F2FS_I(inode)->i_flags to inode->i_flags in f2fs_new_inode(). - use f2fs_set_inode_flags() to simply codes in f2fs_quota_{on,off}. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 2 +- fs/f2fs/namei.c | 2 ++ fs/f2fs/super.c | 5 ++--- 3 files changed, 5 insertions(+), 4 deletions(-) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 07fa071a66f5..efe56091b07c 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -3506,7 +3506,7 @@ static inline void f2fs_set_encrypted_inode(struct inode *inode) { #ifdef CONFIG_F2FS_FS_ENCRYPTION file_set_encrypt(inode); - inode->i_flags |= S_ENCRYPTED; + f2fs_set_inode_flags(inode); #endif } diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c index 2d2924696174..dd1748c5e7f0 100644 --- a/fs/f2fs/namei.c +++ b/fs/f2fs/namei.c @@ -122,6 +122,8 @@ static struct inode *f2fs_new_inode(struct inode *dir, umode_t mode) if (F2FS_I(inode)->i_flags & F2FS_PROJINHERIT_FL) set_inode_flag(inode, FI_PROJ_INHERIT); + f2fs_set_inode_flags(inode); + trace_f2fs_new_inode(inode, 0); return inode; diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 53d549f0145a..4693e4c6d8b6 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -1940,8 +1940,7 @@ static int f2fs_quota_on(struct super_block *sb, int type, int format_id, inode_lock(inode); F2FS_I(inode)->i_flags |= F2FS_NOATIME_FL | F2FS_IMMUTABLE_FL; - inode_set_flags(inode, S_NOATIME | S_IMMUTABLE, - S_NOATIME | S_IMMUTABLE); + f2fs_set_inode_flags(inode); inode_unlock(inode); f2fs_mark_inode_dirty_sync(inode, false); @@ -1966,7 +1965,7 @@ static int f2fs_quota_off(struct super_block *sb, int type) inode_lock(inode); F2FS_I(inode)->i_flags &= ~(F2FS_NOATIME_FL | F2FS_IMMUTABLE_FL); - inode_set_flags(inode, 0, S_NOATIME | S_IMMUTABLE); + f2fs_set_inode_flags(inode); inode_unlock(inode); f2fs_mark_inode_dirty_sync(inode, false); out_put: From 797d94b07ea05f1b58c8056ca80e0712591fee04 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Sun, 7 Oct 2018 03:03:38 +0800 Subject: [PATCH 245/812] f2fs: fix to recover inode->i_flags of inode block during POR Testcase to reproduce this bug: 1. mkfs.f2fs /dev/sdd 2. mount -t f2fs /dev/sdd /mnt/f2fs 3. touch /mnt/f2fs/file 4. sync 5. chattr +a /mnt/f2fs/file 6. xfs_io -a /mnt/f2fs/file -c "fsync" 7. godown /mnt/f2fs 8. umount /mnt/f2fs 9. mount -t f2fs /dev/sdd /mnt/f2fs 10. xfs_io /mnt/f2fs/file There is no error when opening this file w/o O_APPEND, but actually, we expect the correct result should be: /mnt/f2fs/file: Operation not permitted The root cause is, in recover_inode(), we recover inode->i_flags more than F2FS_I(inode)->i_flags, so fix it. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/recovery.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index b069b7d7403d..7899a9e8a6e0 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c @@ -234,6 +234,7 @@ static void recover_inode(struct inode *inode, struct page *page) F2FS_I(inode)->i_advise = raw->i_advise; F2FS_I(inode)->i_flags = le32_to_cpu(raw->i_flags); + f2fs_set_inode_flags(inode); F2FS_I(inode)->i_gc_failures[GC_FAILURE_PIN] = le16_to_cpu(raw->i_gc_failures); From f8a7408381a446f046f7e4f01f247aeb39511050 Mon Sep 17 00:00:00 2001 From: Sahitya Tummala Date: Wed, 10 Oct 2018 10:56:22 +0530 Subject: [PATCH 246/812] f2fs: fix data corruption issue with hardware encryption Direct IO can be used in case of hardware encryption. The following scenario results into data corruption issue in this path - Thread A - Thread B- -> write file#1 in direct IO -> GC gets kicked in -> GC submitted bio on meta mapping for file#1, but pending completion -> write file#1 again with new data in direct IO -> GC bio gets completed now -> GC writes old data to the new location and thus file#1 is corrupted. Fix this by submitting and waiting for pending io on meta mapping for direct IO case in f2fs_map_blocks(). Signed-off-by: Sahitya Tummala Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 11 +++++++++++ fs/f2fs/f2fs.h | 2 ++ fs/f2fs/segment.c | 9 +++++++++ 3 files changed, 22 insertions(+) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 231876a9d647..47fdb3aa4b21 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -1050,6 +1050,11 @@ int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map, map->m_flags = F2FS_MAP_MAPPED; if (map->m_next_extent) *map->m_next_extent = pgofs + map->m_len; + + /* for hardware encryption, but to avoid potential issue in future */ + if (flag == F2FS_GET_BLOCK_DIO) + f2fs_wait_on_block_writeback_range(inode, + map->m_pblk, map->m_len); goto out; } @@ -1208,6 +1213,12 @@ int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map, goto next_dnode; sync_out: + + /* for hardware encryption, but to avoid potential issue in future */ + if (flag == F2FS_GET_BLOCK_DIO && map->m_flags & F2FS_MAP_MAPPED) + f2fs_wait_on_block_writeback_range(inode, + map->m_pblk, map->m_len); + if (flag == F2FS_GET_BLOCK_PRECACHE) { if (map->m_flags & F2FS_MAP_MAPPED) { unsigned int ofs = start_pgofs - map->m_lblk; diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index efe56091b07c..b1c2d097073b 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -3073,6 +3073,8 @@ void f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct page *page, void f2fs_wait_on_page_writeback(struct page *page, enum page_type type, bool ordered); void f2fs_wait_on_block_writeback(struct inode *inode, block_t blkaddr); +void f2fs_wait_on_block_writeback_range(struct inode *inode, block_t blkaddr, + block_t len); void f2fs_write_data_summaries(struct f2fs_sb_info *sbi, block_t start_blk); void f2fs_write_node_summaries(struct f2fs_sb_info *sbi, block_t start_blk); int f2fs_lookup_journal_in_cursum(struct f2fs_journal *journal, int type, diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index e4305fcb6265..fb4be4629add 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -3380,6 +3380,15 @@ void f2fs_wait_on_block_writeback(struct inode *inode, block_t blkaddr) } } +void f2fs_wait_on_block_writeback_range(struct inode *inode, block_t blkaddr, + block_t len) +{ + block_t i; + + for (i = 0; i < len; i++) + f2fs_wait_on_block_writeback(inode, blkaddr + i); +} + static int read_compacted_summaries(struct f2fs_sb_info *sbi) { struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi); From ba406b630883486ac915f6d2261873dacd648f7d Mon Sep 17 00:00:00 2001 From: Sheng Yong Date: Fri, 12 Oct 2018 18:49:26 +0800 Subject: [PATCH 247/812] f2fs: cleanup dirty pages if recover failed During recover, we will try to create new dentries for inodes with dentry_mark. But if the parent is missing (e.g. killed by fsck), recover will break. But those recovered dirty pages are not cleanup. This will hit f2fs_bug_on: [ 53.519566] F2FS-fs (loop0): Found nat_bits in checkpoint [ 53.539354] F2FS-fs (loop0): recover_inode: ino = 5, name = file, inline = 3 [ 53.539402] F2FS-fs (loop0): recover_dentry: ino = 5, name = file, dir = 0, err = -2 [ 53.545760] F2FS-fs (loop0): Cannot recover all fsync data errno=-2 [ 53.546105] F2FS-fs (loop0): access invalid blkaddr:4294967295 [ 53.546171] WARNING: CPU: 1 PID: 1798 at fs/f2fs/checkpoint.c:163 f2fs_is_valid_blkaddr+0x26c/0x320 [ 53.546174] Modules linked in: [ 53.546183] CPU: 1 PID: 1798 Comm: mount Not tainted 4.19.0-rc2+ #1 [ 53.546186] Hardware name: innotek GmbH VirtualBox/VirtualBox, BIOS VirtualBox 12/01/2006 [ 53.546191] RIP: 0010:f2fs_is_valid_blkaddr+0x26c/0x320 [ 53.546195] Code: 85 bb 00 00 00 48 89 df 88 44 24 07 e8 ad a8 db ff 48 8b 3b 44 89 e1 48 c7 c2 40 03 72 a9 48 c7 c6 e0 01 72 a9 e8 84 3c ff ff <0f> 0b 0f b6 44 24 07 e9 8a 00 00 00 48 8d bf 38 01 00 00 e8 7c a8 [ 53.546201] RSP: 0018:ffff88006c067768 EFLAGS: 00010282 [ 53.546208] RAX: 0000000000000000 RBX: ffff880068844200 RCX: ffffffffa83e1a33 [ 53.546211] RDX: 0000000000000000 RSI: 0000000000000008 RDI: ffff88006d51e590 [ 53.546215] RBP: 0000000000000005 R08: ffffed000daa3cb3 R09: ffffed000daa3cb3 [ 53.546218] R10: 0000000000000001 R11: ffffed000daa3cb2 R12: 00000000ffffffff [ 53.546221] R13: ffff88006a1f8000 R14: 0000000000000200 R15: 0000000000000009 [ 53.546226] FS: 00007fb2f3646840(0000) GS:ffff88006d500000(0000) knlGS:0000000000000000 [ 53.546229] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 53.546234] CR2: 00007f0fd77f0008 CR3: 00000000687e6002 CR4: 00000000000206e0 [ 53.546237] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 53.546240] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [ 53.546242] Call Trace: [ 53.546248] f2fs_submit_page_bio+0x95/0x740 [ 53.546253] read_node_page+0x161/0x1e0 [ 53.546271] ? truncate_node+0x650/0x650 [ 53.546283] ? add_to_page_cache_lru+0x12c/0x170 [ 53.546288] ? pagecache_get_page+0x262/0x2d0 [ 53.546292] __get_node_page+0x200/0x660 [ 53.546302] f2fs_update_inode_page+0x4a/0x160 [ 53.546306] f2fs_write_inode+0x86/0xb0 [ 53.546317] __writeback_single_inode+0x49c/0x620 [ 53.546322] writeback_single_inode+0xe4/0x1e0 [ 53.546326] sync_inode_metadata+0x93/0xd0 [ 53.546330] ? sync_inode+0x10/0x10 [ 53.546342] ? do_raw_spin_unlock+0xed/0x100 [ 53.546347] f2fs_sync_inode_meta+0xe0/0x130 [ 53.546351] f2fs_fill_super+0x287d/0x2d10 [ 53.546367] ? vsnprintf+0x742/0x7a0 [ 53.546372] ? f2fs_commit_super+0x180/0x180 [ 53.546379] ? up_write+0x20/0x40 [ 53.546385] ? set_blocksize+0x5f/0x140 [ 53.546391] ? f2fs_commit_super+0x180/0x180 [ 53.546402] mount_bdev+0x181/0x200 [ 53.546406] mount_fs+0x94/0x180 [ 53.546411] vfs_kern_mount+0x6c/0x1e0 [ 53.546415] do_mount+0xe5e/0x1510 [ 53.546420] ? fs_reclaim_release+0x9/0x30 [ 53.546424] ? copy_mount_string+0x20/0x20 [ 53.546428] ? fs_reclaim_acquire+0xd/0x30 [ 53.546435] ? __might_sleep+0x2c/0xc0 [ 53.546440] ? ___might_sleep+0x53/0x170 [ 53.546453] ? __might_fault+0x4c/0x60 [ 53.546468] ? _copy_from_user+0x95/0xa0 [ 53.546474] ? memdup_user+0x39/0x60 [ 53.546478] ksys_mount+0x88/0xb0 [ 53.546482] __x64_sys_mount+0x5d/0x70 [ 53.546495] do_syscall_64+0x65/0x130 [ 53.546503] entry_SYSCALL_64_after_hwframe+0x44/0xa9 [ 53.547639] ---[ end trace b804d1ea2fec893e ]--- So if recover fails, we need to drop all recovered data. Signed-off-by: Sheng Yong Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/recovery.c | 32 +++++++++++++++++++++----------- fs/f2fs/super.c | 14 +++++++++++++- 2 files changed, 34 insertions(+), 12 deletions(-) diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index 7899a9e8a6e0..b7173ae30c75 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c @@ -96,8 +96,12 @@ static struct fsync_inode_entry *add_fsync_inode(struct f2fs_sb_info *sbi, return ERR_PTR(err); } -static void del_fsync_inode(struct fsync_inode_entry *entry) +static void del_fsync_inode(struct fsync_inode_entry *entry, int drop) { + if (drop) { + /* inode should not be recovered, drop it */ + f2fs_inode_synced(entry->inode); + } iput(entry->inode); list_del(&entry->list); kmem_cache_free(fsync_entry_slab, entry); @@ -338,12 +342,12 @@ static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head, return err; } -static void destroy_fsync_dnodes(struct list_head *head) +static void destroy_fsync_dnodes(struct list_head *head, int drop) { struct fsync_inode_entry *entry, *tmp; list_for_each_entry_safe(entry, tmp, head, list) - del_fsync_inode(entry); + del_fsync_inode(entry, drop); } static int check_index_in_prev_nodes(struct f2fs_sb_info *sbi, @@ -580,7 +584,7 @@ static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode, } static int recover_data(struct f2fs_sb_info *sbi, struct list_head *inode_list, - struct list_head *dir_list) + struct list_head *tmp_inode_list, struct list_head *dir_list) { struct curseg_info *curseg; struct page *page = NULL; @@ -634,7 +638,7 @@ static int recover_data(struct f2fs_sb_info *sbi, struct list_head *inode_list, } if (entry->blkaddr == blkaddr) - del_fsync_inode(entry); + list_move_tail(&entry->list, tmp_inode_list); next: /* check next segment */ blkaddr = next_blkaddr_of_node(page); @@ -647,7 +651,7 @@ static int recover_data(struct f2fs_sb_info *sbi, struct list_head *inode_list, int f2fs_recover_fsync_data(struct f2fs_sb_info *sbi, bool check_only) { - struct list_head inode_list; + struct list_head inode_list, tmp_inode_list; struct list_head dir_list; int err; int ret = 0; @@ -678,6 +682,7 @@ int f2fs_recover_fsync_data(struct f2fs_sb_info *sbi, bool check_only) } INIT_LIST_HEAD(&inode_list); + INIT_LIST_HEAD(&tmp_inode_list); INIT_LIST_HEAD(&dir_list); /* prevent checkpoint */ @@ -696,11 +701,16 @@ int f2fs_recover_fsync_data(struct f2fs_sb_info *sbi, bool check_only) need_writecp = true; /* step #2: recover data */ - err = recover_data(sbi, &inode_list, &dir_list); + err = recover_data(sbi, &inode_list, &tmp_inode_list, &dir_list); if (!err) f2fs_bug_on(sbi, !list_empty(&inode_list)); + else { + /* restore s_flags to let iput() trash data */ + sbi->sb->s_flags = s_flags; + } skip: - destroy_fsync_dnodes(&inode_list); + destroy_fsync_dnodes(&inode_list, err); + destroy_fsync_dnodes(&tmp_inode_list, err); /* truncate meta pages to be used by the recovery */ truncate_inode_pages_range(META_MAPPING(sbi), @@ -709,13 +719,13 @@ int f2fs_recover_fsync_data(struct f2fs_sb_info *sbi, bool check_only) if (err) { truncate_inode_pages_final(NODE_MAPPING(sbi)); truncate_inode_pages_final(META_MAPPING(sbi)); + } else { + clear_sbi_flag(sbi, SBI_POR_DOING); } - - clear_sbi_flag(sbi, SBI_POR_DOING); mutex_unlock(&sbi->cp_mutex); /* let's drop all the directory inodes for clean checkpoint */ - destroy_fsync_dnodes(&dir_list); + destroy_fsync_dnodes(&dir_list, err); if (need_writecp) { set_sbi_flag(sbi, SBI_IS_RECOVERED); diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 4693e4c6d8b6..64d2bc35225e 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -1992,6 +1992,18 @@ void f2fs_quota_off_umount(struct super_block *sb) } } +static void f2fs_truncate_quota_inode_pages(struct super_block *sb) +{ + struct quota_info *dqopt = sb_dqopt(sb); + int type; + + for (type = 0; type < MAXQUOTAS; type++) { + if (!dqopt->files[type]) + continue; + f2fs_inode_synced(dqopt->files[type]); + } +} + #if 0 static int f2fs_get_projid(struct inode *inode, kprojid_t *projid) { @@ -3288,10 +3300,10 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent) free_meta: #ifdef CONFIG_QUOTA + f2fs_truncate_quota_inode_pages(sb); if (f2fs_sb_has_quota_ino(sb) && !f2fs_readonly(sb)) f2fs_quota_off_umount(sbi->sb); #endif - f2fs_sync_inode_meta(sbi); /* * Some dirty meta pages can be produced by f2fs_recover_orphan_inodes() * failed by EIO. Then, iput(node_inode) can trigger balance_fs_bg() From 0eea3276eebca7f423de34093d04a2e3fc541212 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Thu, 20 Sep 2018 20:05:00 +0800 Subject: [PATCH 248/812] f2fs: guarantee journalled quota data by checkpoint For journalled quota mode, let checkpoint to flush dquot dirty data and quota file data to guarntee persistence of all quota sysfile in last checkpoint, by this way, we can avoid corrupting quota sysfile when encountering SPO. The implementation is as below: 1. add a global state SBI_QUOTA_NEED_FLUSH to indicate that there is cached dquot metadata changes in quota subsystem, and later checkpoint should: a) flush dquot metadata into quota file. b) flush quota file to storage to keep file usage be consistent. 2. add a global state SBI_QUOTA_NEED_REPAIR to indicate that quota operation failed due to -EIO or -ENOSPC, so later, a) checkpoint will skip syncing dquot metadata. b) CP_QUOTA_NEED_FSCK_FLAG will be set in last cp pack to give a hint for fsck repairing. 3. add a global state SBI_QUOTA_SKIP_FLUSH, in checkpoint, if quota data updating is very heavy, it may cause hungtask in block_operation(). To avoid this, if our retry time exceed threshold, let's just skip flushing and retry in next checkpoint(). Signed-off-by: Weichao Guo Signed-off-by: Chao Yu [Jaegeuk Kim: avoid warnings and set fsck flag] Signed-off-by: Jaegeuk Kim --- fs/f2fs/checkpoint.c | 62 +++++++++++++++++++-- fs/f2fs/data.c | 16 ++++-- fs/f2fs/f2fs.h | 49 +++++++++++++--- fs/f2fs/file.c | 31 ++++++++--- fs/f2fs/inline.c | 4 +- fs/f2fs/inode.c | 11 +++- fs/f2fs/namei.c | 4 -- fs/f2fs/recovery.c | 43 +++++++++++++- fs/f2fs/super.c | 120 ++++++++++++++++++++++++++++++++++++---- include/linux/f2fs_fs.h | 1 + 10 files changed, 294 insertions(+), 47 deletions(-) diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index 598424c1c4b1..4ed2d2a0bf02 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -1080,6 +1080,21 @@ static void __prepare_cp_block(struct f2fs_sb_info *sbi) ckpt->next_free_nid = cpu_to_le32(last_nid); } +static bool __need_flush_quota(struct f2fs_sb_info *sbi) +{ + if (!is_journalled_quota(sbi)) + return false; + if (is_sbi_flag_set(sbi, SBI_QUOTA_SKIP_FLUSH)) + return false; + if (is_sbi_flag_set(sbi, SBI_QUOTA_NEED_REPAIR)) + return false; + if (is_sbi_flag_set(sbi, SBI_QUOTA_NEED_FLUSH)) + return true; + if (get_pages(sbi, F2FS_DIRTY_QDATA)) + return true; + return false; +} + /* * Freeze all the FS-operations for checkpoint. */ @@ -1091,12 +1106,36 @@ static int block_operations(struct f2fs_sb_info *sbi) .for_reclaim = 0, }; struct blk_plug plug; - int err = 0; + int err = 0, cnt = 0; blk_start_plug(&plug); -retry_flush_dents: +retry_flush_quotas: + if (__need_flush_quota(sbi)) { + int locked; + + if (++cnt > DEFAULT_RETRY_QUOTA_FLUSH_COUNT) { + set_sbi_flag(sbi, SBI_QUOTA_SKIP_FLUSH); + f2fs_lock_all(sbi); + goto retry_flush_dents; + } + clear_sbi_flag(sbi, SBI_QUOTA_NEED_FLUSH); + + /* only failed during mount/umount/freeze/quotactl */ + locked = down_read_trylock(&sbi->sb->s_umount); + f2fs_quota_sync(sbi->sb, -1); + if (locked) + up_read(&sbi->sb->s_umount); + } + f2fs_lock_all(sbi); + if (__need_flush_quota(sbi)) { + f2fs_unlock_all(sbi); + cond_resched(); + goto retry_flush_quotas; + } + +retry_flush_dents: /* write all the dirty dentry pages */ if (get_pages(sbi, F2FS_DIRTY_DENTS)) { f2fs_unlock_all(sbi); @@ -1104,7 +1143,7 @@ static int block_operations(struct f2fs_sb_info *sbi) if (err) goto out; cond_resched(); - goto retry_flush_dents; + goto retry_flush_quotas; } /* @@ -1113,6 +1152,12 @@ static int block_operations(struct f2fs_sb_info *sbi) */ down_write(&sbi->node_change); + if (__need_flush_quota(sbi)) { + up_write(&sbi->node_change); + f2fs_unlock_all(sbi); + goto retry_flush_quotas; + } + if (get_pages(sbi, F2FS_DIRTY_IMETA)) { up_write(&sbi->node_change); f2fs_unlock_all(sbi); @@ -1120,7 +1165,7 @@ static int block_operations(struct f2fs_sb_info *sbi) if (err) goto out; cond_resched(); - goto retry_flush_dents; + goto retry_flush_quotas; } retry_flush_nodes: @@ -1216,6 +1261,14 @@ static void update_ckpt_flags(struct f2fs_sb_info *sbi, struct cp_control *cpc) else __clear_ckpt_flags(ckpt, CP_DISABLED_FLAG); + if (is_sbi_flag_set(sbi, SBI_QUOTA_SKIP_FLUSH)) + __set_ckpt_flags(ckpt, CP_QUOTA_NEED_FSCK_FLAG); + else + __clear_ckpt_flags(ckpt, CP_QUOTA_NEED_FSCK_FLAG); + + if (is_sbi_flag_set(sbi, SBI_QUOTA_NEED_REPAIR)) + __set_ckpt_flags(ckpt, CP_QUOTA_NEED_FSCK_FLAG); + /* set this flag to activate crc|cp_ver for recovery */ __set_ckpt_flags(ckpt, CP_CRC_RECOVERY_FLAG); __clear_ckpt_flags(ckpt, CP_NOCRC_RECOVERY_FLAG); @@ -1423,6 +1476,7 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) clear_sbi_flag(sbi, SBI_IS_DIRTY); clear_sbi_flag(sbi, SBI_NEED_CP); + clear_sbi_flag(sbi, SBI_QUOTA_SKIP_FLUSH); sbi->unusable_block_count = 0; __set_cp_next_pack(sbi); diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 47fdb3aa4b21..ac25667e9999 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -45,7 +45,7 @@ static bool __is_cp_guaranteed(struct page *page) inode->i_ino == F2FS_NODE_INO(sbi) || S_ISDIR(inode->i_mode) || (S_ISREG(inode->i_mode) && - is_inode_flag_set(inode, FI_ATOMIC_FILE)) || + (f2fs_is_atomic_file(inode) || IS_NOQUOTA(inode))) || is_cold_data(page)) return true; return false; @@ -1762,6 +1762,8 @@ bool f2fs_should_update_outplace(struct inode *inode, struct f2fs_io_info *fio) return true; if (S_ISDIR(inode->i_mode)) return true; + if (IS_NOQUOTA(inode)) + return true; if (f2fs_is_atomic_file(inode)) return true; if (fio) { @@ -2012,7 +2014,7 @@ static int __write_data_page(struct page *page, bool *submitted, } unlock_page(page); - if (!S_ISDIR(inode->i_mode)) + if (!S_ISDIR(inode->i_mode) && !IS_NOQUOTA(inode)) f2fs_balance_fs(sbi, need_balance_fs); if (unlikely(f2fs_cp_error(sbi))) { @@ -2203,6 +2205,8 @@ static inline bool __should_serialize_io(struct inode *inode, { if (!S_ISREG(inode->i_mode)) return false; + if (IS_NOQUOTA(inode)) + return false; if (wbc->sync_mode != WB_SYNC_ALL) return true; if (get_dirty_pages(inode) >= SM_I(F2FS_I_SB(inode))->min_seq_blocks) @@ -2232,7 +2236,8 @@ static int __f2fs_write_data_pages(struct address_space *mapping, if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING))) goto skip_write; - if (S_ISDIR(inode->i_mode) && wbc->sync_mode == WB_SYNC_NONE && + if ((S_ISDIR(inode->i_mode) || IS_NOQUOTA(inode)) && + wbc->sync_mode == WB_SYNC_NONE && get_dirty_pages(inode) < nr_pages_to_skip(sbi, DATA) && f2fs_available_free_memory(sbi, DIRTY_DENTS)) goto skip_write; @@ -2297,7 +2302,7 @@ static void f2fs_write_failed(struct address_space *mapping, loff_t to) down_write(&F2FS_I(inode)->i_mmap_sem); truncate_pagecache(inode, i_size); - f2fs_truncate_blocks(inode, i_size, true); + f2fs_truncate_blocks(inode, i_size, true, true); up_write(&F2FS_I(inode)->i_mmap_sem); up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); @@ -2436,7 +2441,8 @@ static int f2fs_write_begin(struct file *file, struct address_space *mapping, if (err) goto fail; - if (need_balance && has_not_enough_free_secs(sbi, 0, 0)) { + if (need_balance && !IS_NOQUOTA(inode) && + has_not_enough_free_secs(sbi, 0, 0)) { unlock_page(page); f2fs_balance_fs(sbi, true); lock_page(page); diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index b1c2d097073b..ac0c10990bbd 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -596,6 +596,9 @@ enum { #define DEFAULT_RETRY_IO_COUNT 8 /* maximum retry read IO count */ +/* maximum retry quota flush count */ +#define DEFAULT_RETRY_QUOTA_FLUSH_COUNT 8 + #define F2FS_LINK_MAX 0xffffffff /* maximum link count per file */ #define MAX_DIR_RA_PAGES 4 /* maximum ra pages of dir */ @@ -1164,6 +1167,9 @@ enum { SBI_IS_SHUTDOWN, /* shutdown by ioctl */ SBI_IS_RECOVERED, /* recovered orphan/data */ SBI_CP_DISABLED, /* CP was disabled last mount */ + SBI_QUOTA_NEED_FLUSH, /* need to flush quota info in CP */ + SBI_QUOTA_SKIP_FLUSH, /* skip flushing quota in current CP */ + SBI_QUOTA_NEED_REPAIR, /* quota file may be corrupted */ }; enum { @@ -1988,12 +1994,18 @@ static inline int inc_valid_node_count(struct f2fs_sb_info *sbi, { block_t valid_block_count; unsigned int valid_node_count; - bool quota = inode && !is_inode; + int err; - if (quota) { - int ret = dquot_reserve_block(inode, 1); - if (ret) - return ret; + if (is_inode) { + if (inode) { + err = dquot_alloc_inode(inode); + if (err) + return err; + } + } else { + err = dquot_reserve_block(inode, 1); + if (err) + return err; } if (time_to_inject(sbi, FAULT_BLOCK)) { @@ -2037,8 +2049,12 @@ static inline int inc_valid_node_count(struct f2fs_sb_info *sbi, return 0; enospc: - if (quota) + if (is_inode) { + if (inode) + dquot_free_inode(inode); + } else { dquot_release_reservation_block(inode, 1); + } return -ENOSPC; } @@ -2059,7 +2075,9 @@ static inline void dec_valid_node_count(struct f2fs_sb_info *sbi, spin_unlock(&sbi->stat_lock); - if (!is_inode) + if (is_inode) + dquot_free_inode(inode); + else f2fs_i_blocks_write(inode, 1, false, true); } @@ -2867,7 +2885,8 @@ static inline bool is_valid_data_blkaddr(struct f2fs_sb_info *sbi, */ int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync); void f2fs_truncate_data_blocks(struct dnode_of_data *dn); -int f2fs_truncate_blocks(struct inode *inode, u64 from, bool lock); +int f2fs_truncate_blocks(struct inode *inode, u64 from, bool lock, + bool buf_write); int f2fs_truncate(struct inode *inode); int f2fs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat); @@ -2955,6 +2974,7 @@ static inline int f2fs_add_link(struct dentry *dentry, struct inode *inode) int f2fs_inode_dirtied(struct inode *inode, bool sync); void f2fs_inode_synced(struct inode *inode); int f2fs_enable_quota_files(struct f2fs_sb_info *sbi, bool rdonly); +int f2fs_quota_sync(struct super_block *sb, int type); void f2fs_quota_off_umount(struct super_block *sb); int f2fs_commit_super(struct f2fs_sb_info *sbi, bool recover); int f2fs_sync_fs(struct super_block *sb, int sync); @@ -3648,3 +3668,16 @@ extern void f2fs_build_fault_attr(struct f2fs_sb_info *sbi, unsigned int rate, #endif #endif + +static inline bool is_journalled_quota(struct f2fs_sb_info *sbi) +{ +#ifdef CONFIG_QUOTA + if (f2fs_sb_has_quota_ino(sbi->sb)) + return true; + if (F2FS_OPTION(sbi).s_qf_names[USRQUOTA] || + F2FS_OPTION(sbi).s_qf_names[GRPQUOTA] || + F2FS_OPTION(sbi).s_qf_names[PRJQUOTA]) + return true; +#endif + return false; +} diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 040c9667347e..3c61f9619be1 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -589,7 +589,8 @@ static int truncate_partial_data_page(struct inode *inode, u64 from, return 0; } -int f2fs_truncate_blocks(struct inode *inode, u64 from, bool lock) +int f2fs_truncate_blocks(struct inode *inode, u64 from, bool lock, + bool buf_write) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); struct dnode_of_data dn; @@ -597,6 +598,7 @@ int f2fs_truncate_blocks(struct inode *inode, u64 from, bool lock) int count = 0, err = 0; struct page *ipage; bool truncate_page = false; + int flag = buf_write ? F2FS_GET_BLOCK_PRE_AIO : F2FS_GET_BLOCK_PRE_DIO; trace_f2fs_truncate_blocks_enter(inode, from); @@ -606,7 +608,7 @@ int f2fs_truncate_blocks(struct inode *inode, u64 from, bool lock) goto free_partial; if (lock) - f2fs_lock_op(sbi); + __do_map_lock(sbi, flag, true); ipage = f2fs_get_node_page(sbi, inode->i_ino); if (IS_ERR(ipage)) { @@ -644,7 +646,7 @@ int f2fs_truncate_blocks(struct inode *inode, u64 from, bool lock) err = f2fs_truncate_inode_blocks(inode, free_from); out: if (lock) - f2fs_unlock_op(sbi); + __do_map_lock(sbi, flag, false); free_partial: /* lastly zero out the first data page */ if (!err) @@ -679,7 +681,7 @@ int f2fs_truncate(struct inode *inode) return err; } - err = f2fs_truncate_blocks(inode, i_size_read(inode), true); + err = f2fs_truncate_blocks(inode, i_size_read(inode), true, false); if (err) return err; @@ -789,9 +791,24 @@ int f2fs_setattr(struct dentry *dentry, struct iattr *attr) !uid_eq(attr->ia_uid, inode->i_uid)) || (attr->ia_valid & ATTR_GID && !gid_eq(attr->ia_gid, inode->i_gid))) { + f2fs_lock_op(F2FS_I_SB(inode)); err = dquot_transfer(inode, attr); - if (err) + if (err) { + set_sbi_flag(F2FS_I_SB(inode), + SBI_QUOTA_NEED_REPAIR); + f2fs_unlock_op(F2FS_I_SB(inode)); return err; + } + /* + * update uid/gid under lock_op(), so that dquot and inode can + * be updated atomically. + */ + if (attr->ia_valid & ATTR_UID) + inode->i_uid = attr->ia_uid; + if (attr->ia_valid & ATTR_GID) + inode->i_gid = attr->ia_gid; + f2fs_mark_inode_dirty_sync(inode, true); + f2fs_unlock_op(F2FS_I_SB(inode)); } if (attr->ia_valid & ATTR_SIZE) { @@ -1249,7 +1266,7 @@ static int f2fs_collapse_range(struct inode *inode, loff_t offset, loff_t len) new_size = i_size_read(inode) - len; truncate_pagecache(inode, new_size); - ret = f2fs_truncate_blocks(inode, new_size, true); + ret = f2fs_truncate_blocks(inode, new_size, true, false); up_write(&F2FS_I(inode)->i_mmap_sem); if (!ret) f2fs_i_size_write(inode, new_size); @@ -1434,7 +1451,7 @@ static int f2fs_insert_range(struct inode *inode, loff_t offset, loff_t len) f2fs_balance_fs(sbi, true); down_write(&F2FS_I(inode)->i_mmap_sem); - ret = f2fs_truncate_blocks(inode, i_size_read(inode), true); + ret = f2fs_truncate_blocks(inode, i_size_read(inode), true, false); up_write(&F2FS_I(inode)->i_mmap_sem); if (ret) return ret; diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c index 8caaab0685c7..15269b971e59 100644 --- a/fs/f2fs/inline.c +++ b/fs/f2fs/inline.c @@ -298,7 +298,7 @@ bool f2fs_recover_inline_data(struct inode *inode, struct page *npage) clear_inode_flag(inode, FI_INLINE_DATA); f2fs_put_page(ipage, 1); } else if (ri && (ri->i_inline & F2FS_INLINE_DATA)) { - if (f2fs_truncate_blocks(inode, 0, false)) + if (f2fs_truncate_blocks(inode, 0, false, false)) return false; goto process_inline; } @@ -470,7 +470,7 @@ static int f2fs_add_inline_entries(struct inode *dir, void *inline_dentry) return 0; punch_dentry_pages: truncate_inode_pages(&dir->i_data, 0); - f2fs_truncate_blocks(dir, 0, false); + f2fs_truncate_blocks(dir, 0, false, false); f2fs_remove_dirty_inode(dir); return err; } diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index 4408feee9488..1b1f22faff22 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -654,7 +654,11 @@ void f2fs_evict_inode(struct inode *inode) if (inode->i_nlink || is_bad_inode(inode)) goto no_delete; - dquot_initialize(inode); + err = dquot_initialize(inode); + if (err) { + err = 0; + set_sbi_flag(sbi, SBI_QUOTA_NEED_REPAIR); + } f2fs_remove_ino_entry(sbi, inode->i_ino, APPEND_INO); f2fs_remove_ino_entry(sbi, inode->i_ino, UPDATE_INO); @@ -686,9 +690,10 @@ void f2fs_evict_inode(struct inode *inode) goto retry; } - if (err) + if (err) { f2fs_update_inode_page(inode); - dquot_free_inode(inode); + set_sbi_flag(sbi, SBI_QUOTA_NEED_REPAIR); + } sb_end_intwrite(inode->i_sb); no_delete: dquot_drop(inode); diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c index dd1748c5e7f0..3ae6f5ae597e 100644 --- a/fs/f2fs/namei.c +++ b/fs/f2fs/namei.c @@ -72,10 +72,6 @@ static struct inode *f2fs_new_inode(struct inode *dir, umode_t mode) if (err) goto fail_drop; - err = dquot_alloc_inode(inode); - if (err) - goto fail_drop; - set_inode_flag(inode, FI_NEW_INODE); /* If the directory encrypted, then we should encrypt the inode. */ diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index b7173ae30c75..336273a81fba 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c @@ -195,6 +195,33 @@ static int recover_dentry(struct inode *inode, struct page *ipage, return err; } +static int recover_quota_data(struct inode *inode, struct page *page) +{ + struct f2fs_inode *raw = F2FS_INODE(page); + struct iattr attr; + uid_t i_uid = le32_to_cpu(raw->i_uid); + gid_t i_gid = le32_to_cpu(raw->i_gid); + int err; + + memset(&attr, 0, sizeof(attr)); + + attr.ia_uid = make_kuid(&init_user_ns, i_uid); + attr.ia_gid = make_kgid(&init_user_ns, i_gid); + + if (!uid_eq(attr.ia_uid, inode->i_uid)) + attr.ia_valid |= ATTR_UID; + if (!gid_eq(attr.ia_gid, inode->i_gid)) + attr.ia_valid |= ATTR_GID; + + if (!attr.ia_valid) + return 0; + + err = dquot_transfer(inode, &attr); + if (err) + set_sbi_flag(F2FS_I_SB(inode), SBI_QUOTA_NEED_REPAIR); + return err; +} + static void recover_inline_flags(struct inode *inode, struct f2fs_inode *ri) { if (ri->i_inline & F2FS_PIN_FILE) @@ -207,12 +234,18 @@ static void recover_inline_flags(struct inode *inode, struct f2fs_inode *ri) clear_inode_flag(inode, FI_DATA_EXIST); } -static void recover_inode(struct inode *inode, struct page *page) +static int recover_inode(struct inode *inode, struct page *page) { struct f2fs_inode *raw = F2FS_INODE(page); char *name; + int err; inode->i_mode = le16_to_cpu(raw->i_mode); + + err = recover_quota_data(inode, page); + if (err) + return err; + i_uid_write(inode, le32_to_cpu(raw->i_uid)); i_gid_write(inode, le32_to_cpu(raw->i_gid)); @@ -254,6 +287,7 @@ static void recover_inode(struct inode *inode, struct page *page) f2fs_msg(inode->i_sb, KERN_NOTICE, "recover_inode: ino = %x, name = %s, inline = %x", ino_of_node(page), name, raw->i_inline); + return 0; } static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head, @@ -622,8 +656,11 @@ static int recover_data(struct f2fs_sb_info *sbi, struct list_head *inode_list, * In this case, we can lose the latest inode(x). * So, call recover_inode for the inode update. */ - if (IS_INODE(page)) - recover_inode(entry->inode, page); + if (IS_INODE(page)) { + err = recover_inode(entry->inode, page); + if (err) + break; + } if (entry->last_dentry == blkaddr) { err = recover_dentry(entry->inode, page, dir_list); if (err) { diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 64d2bc35225e..c551d5336473 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -1712,6 +1712,7 @@ static ssize_t f2fs_quota_read(struct super_block *sb, int type, char *data, congestion_wait(BLK_RW_ASYNC, HZ/50); goto repeat; } + set_sbi_flag(F2FS_SB(sb), SBI_QUOTA_NEED_REPAIR); return PTR_ERR(page); } @@ -1723,6 +1724,7 @@ static ssize_t f2fs_quota_read(struct super_block *sb, int type, char *data, } if (unlikely(!PageUptodate(page))) { f2fs_put_page(page, 1); + set_sbi_flag(F2FS_SB(sb), SBI_QUOTA_NEED_REPAIR); return -EIO; } @@ -1764,6 +1766,7 @@ static ssize_t f2fs_quota_write(struct super_block *sb, int type, congestion_wait(BLK_RW_ASYNC, HZ/50); goto retry; } + set_sbi_flag(F2FS_SB(sb), SBI_QUOTA_NEED_REPAIR); break; } @@ -1800,6 +1803,12 @@ static qsize_t *f2fs_get_reserved_space(struct inode *inode) static int f2fs_quota_on_mount(struct f2fs_sb_info *sbi, int type) { + if (is_set_ckpt_flags(sbi, CP_QUOTA_NEED_FSCK_FLAG)) { + f2fs_msg(sbi->sb, KERN_ERR, + "quota sysfile may be corrupted, skip loading it"); + return 0; + } + return dquot_quota_on_mount(sbi->sb, F2FS_OPTION(sbi).s_qf_names[type], F2FS_OPTION(sbi).s_jquota_fmt, type); } @@ -1870,7 +1879,14 @@ static int f2fs_enable_quotas(struct super_block *sb) test_opt(F2FS_SB(sb), PRJQUOTA), }; + if (is_set_ckpt_flags(F2FS_SB(sb), CP_QUOTA_NEED_FSCK_FLAG)) { + f2fs_msg(sb, KERN_ERR, + "quota file may be corrupted, skip loading it"); + return 0; + } + sb_dqopt(sb)->flags |= DQUOT_QUOTA_SYS_FILE; + for (type = 0; type < MAXQUOTAS; type++) { qf_inum = f2fs_qf_ino(sb, type); if (qf_inum) { @@ -1884,6 +1900,8 @@ static int f2fs_enable_quotas(struct super_block *sb) "fsck to fix.", type, err); for (type--; type >= 0; type--) dquot_quota_off(sb, type); + set_sbi_flag(F2FS_SB(sb), + SBI_QUOTA_NEED_REPAIR); return err; } } @@ -1891,35 +1909,51 @@ static int f2fs_enable_quotas(struct super_block *sb) return 0; } -static int f2fs_quota_sync(struct super_block *sb, int type) +int f2fs_quota_sync(struct super_block *sb, int type) { + struct f2fs_sb_info *sbi = F2FS_SB(sb); struct quota_info *dqopt = sb_dqopt(sb); int cnt; int ret; ret = dquot_writeback_dquots(sb, type); if (ret) - return ret; + goto out; /* * Now when everything is written we can discard the pagecache so * that userspace sees the changes. */ for (cnt = 0; cnt < MAXQUOTAS; cnt++) { + struct address_space *mapping; + if (type != -1 && cnt != type) continue; if (!sb_has_quota_active(sb, cnt)) continue; - ret = filemap_write_and_wait(dqopt->files[cnt]->i_mapping); + mapping = dqopt->files[cnt]->i_mapping; + + ret = filemap_fdatawrite(mapping); if (ret) - return ret; + goto out; + + /* if we are using journalled quota */ + if (is_journalled_quota(sbi)) + continue; + + ret = filemap_fdatawait(mapping); + if (ret) + set_sbi_flag(F2FS_SB(sb), SBI_QUOTA_NEED_REPAIR); inode_lock(dqopt->files[cnt]); truncate_inode_pages(&dqopt->files[cnt]->i_data, 0); inode_unlock(dqopt->files[cnt]); } - return 0; +out: + if (ret) + set_sbi_flag(F2FS_SB(sb), SBI_QUOTA_NEED_REPAIR); + return ret; } static int f2fs_quota_on(struct super_block *sb, int type, int format_id, @@ -1987,7 +2021,7 @@ void f2fs_quota_off_umount(struct super_block *sb) "Fail to turn off disk quota " "(type: %d, err: %d, ret:%d), Please " "run fsck to fix it.", type, err, ret); - set_sbi_flag(F2FS_SB(sb), SBI_NEED_FSCK); + set_sbi_flag(F2FS_SB(sb), SBI_QUOTA_NEED_REPAIR); } } } @@ -2004,6 +2038,62 @@ static void f2fs_truncate_quota_inode_pages(struct super_block *sb) } } +static int f2fs_dquot_commit(struct dquot *dquot) +{ + int ret; + + ret = dquot_commit(dquot); + if (ret < 0) + set_sbi_flag(F2FS_SB(dquot->dq_sb), SBI_QUOTA_NEED_REPAIR); + return ret; +} + +static int f2fs_dquot_acquire(struct dquot *dquot) +{ + int ret; + + ret = dquot_acquire(dquot); + if (ret < 0) + set_sbi_flag(F2FS_SB(dquot->dq_sb), SBI_QUOTA_NEED_REPAIR); + + return ret; +} + +static int f2fs_dquot_release(struct dquot *dquot) +{ + int ret; + + ret = dquot_release(dquot); + if (ret < 0) + set_sbi_flag(F2FS_SB(dquot->dq_sb), SBI_QUOTA_NEED_REPAIR); + return ret; +} + +static int f2fs_dquot_mark_dquot_dirty(struct dquot *dquot) +{ + struct super_block *sb = dquot->dq_sb; + struct f2fs_sb_info *sbi = F2FS_SB(sb); + int ret; + + ret = dquot_mark_dquot_dirty(dquot); + + /* if we are using journalled quota */ + if (is_journalled_quota(sbi)) + set_sbi_flag(sbi, SBI_QUOTA_NEED_FLUSH); + + return ret; +} + +static int f2fs_dquot_commit_info(struct super_block *sb, int type) +{ + int ret; + + ret = dquot_commit_info(sb, type); + if (ret < 0) + set_sbi_flag(F2FS_SB(sb), SBI_QUOTA_NEED_REPAIR); + return ret; +} + #if 0 static int f2fs_get_projid(struct inode *inode, kprojid_t *projid) { @@ -2014,11 +2104,11 @@ static int f2fs_get_projid(struct inode *inode, kprojid_t *projid) static const struct dquot_operations f2fs_quota_operations = { .get_reserved_space = f2fs_get_reserved_space, - .write_dquot = dquot_commit, - .acquire_dquot = dquot_acquire, - .release_dquot = dquot_release, - .mark_dirty = dquot_mark_dquot_dirty, - .write_info = dquot_commit_info, + .write_dquot = f2fs_dquot_commit, + .acquire_dquot = f2fs_dquot_acquire, + .release_dquot = f2fs_dquot_release, + .mark_dirty = f2fs_dquot_mark_dquot_dirty, + .write_info = f2fs_dquot_commit_info, .alloc_dquot = dquot_alloc, .destroy_dquot = dquot_destroy, #if 0 @@ -2037,6 +2127,11 @@ static const struct quotactl_ops f2fs_quotactl_ops = { .set_dqblk = dquot_set_dqblk, }; #else +int f2fs_quota_sync(struct super_block *sb, int type) +{ + return 0; +} + void f2fs_quota_off_umount(struct super_block *sb) { } @@ -3111,6 +3206,9 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent) goto free_meta_inode; } + if (__is_set_ckpt_flags(F2FS_CKPT(sbi), CP_QUOTA_NEED_FSCK_FLAG)) + set_sbi_flag(sbi, SBI_QUOTA_NEED_REPAIR); + /* Initialize device list */ err = f2fs_scan_devices(sbi); if (err) { diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h index 6c2786db5154..162f83358abb 100644 --- a/include/linux/f2fs_fs.h +++ b/include/linux/f2fs_fs.h @@ -117,6 +117,7 @@ struct f2fs_super_block { * For checkpoint */ #define CP_DISABLED_FLAG 0x00001000 +#define CP_QUOTA_NEED_FSCK_FLAG 0x00000800 #define CP_LARGE_NAT_BITMAP_FLAG 0x00000400 #define CP_NOCRC_RECOVERY_FLAG 0x00000200 #define CP_TRIMMED_FLAG 0x00000100 From 5ce93bd4cc02ef3c1b25587ce65eaa84f6b4172a Mon Sep 17 00:00:00 2001 From: Steffen Klassert Date: Wed, 1 Aug 2018 13:45:11 +0200 Subject: [PATCH 249/812] xfrm: Validate address prefix lengths in the xfrm selector. [ Upstream commit 07bf7908950a8b14e81aa1807e3c667eab39287a ] We don't validate the address prefix lengths in the xfrm selector we got from userspace. This can lead to undefined behaviour in the address matching functions if the prefix is too big for the given address family. Fix this by checking the prefixes and refuse SA/policy insertation when a prefix is invalid. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Reported-by: Air Icy Signed-off-by: Steffen Klassert Signed-off-by: Sasha Levin --- net/xfrm/xfrm_user.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index a9b4491a3cc4..c2e98dcba9fe 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -151,10 +151,16 @@ static int verify_newsa_info(struct xfrm_usersa_info *p, err = -EINVAL; switch (p->family) { case AF_INET: + if (p->sel.prefixlen_d > 32 || p->sel.prefixlen_s > 32) + goto out; + break; case AF_INET6: #if IS_ENABLED(CONFIG_IPV6) + if (p->sel.prefixlen_d > 128 || p->sel.prefixlen_s > 128) + goto out; + break; #else err = -EAFNOSUPPORT; @@ -1312,10 +1318,16 @@ static int verify_newpolicy_info(struct xfrm_userpolicy_info *p) switch (p->sel.family) { case AF_INET: + if (p->sel.prefixlen_d > 32 || p->sel.prefixlen_s > 32) + return -EINVAL; + break; case AF_INET6: #if IS_ENABLED(CONFIG_IPV6) + if (p->sel.prefixlen_d > 128 || p->sel.prefixlen_s > 128) + return -EINVAL; + break; #else return -EAFNOSUPPORT; From 4e16c61e87a1c3fc79253fc07cf043533b33b661 Mon Sep 17 00:00:00 2001 From: Thadeu Lima de Souza Cascardo Date: Fri, 31 Aug 2018 08:38:49 -0300 Subject: [PATCH 250/812] xfrm6: call kfree_skb when skb is toobig [ Upstream commit 215ab0f021c9fea3c18b75e7d522400ee6a49990 ] After commit d6990976af7c5d8f55903bfb4289b6fb030bf754 ("vti6: fix PMTU caching and reporting on xmit"), some too big skbs might be potentially passed down to __xfrm6_output, causing it to fail to transmit but not free the skb, causing a leak of skb, and consequentially a leak of dst references. After running pmtu.sh, that shows as failure to unregister devices in a namespace: [ 311.397671] unregister_netdevice: waiting for veth_b to become free. Usage count = 1 The fix is to call kfree_skb in case of transmit failures. Fixes: dd767856a36e ("xfrm6: Don't call icmpv6_send on local error") Signed-off-by: Thadeu Lima de Souza Cascardo Reviewed-by: Sabrina Dubroca Signed-off-by: Steffen Klassert Signed-off-by: Sasha Levin --- net/ipv6/xfrm6_output.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/ipv6/xfrm6_output.c b/net/ipv6/xfrm6_output.c index 4d09ce6fa90e..64862c5084ee 100644 --- a/net/ipv6/xfrm6_output.c +++ b/net/ipv6/xfrm6_output.c @@ -165,9 +165,11 @@ static int __xfrm6_output(struct net *net, struct sock *sk, struct sk_buff *skb) if (toobig && xfrm6_local_dontfrag(skb)) { xfrm6_local_rxpmtu(skb, mtu); + kfree_skb(skb); return -EMSGSIZE; } else if (!skb->ignore_df && toobig && skb->sk) { xfrm_local_error(skb, mtu); + kfree_skb(skb); return -EMSGSIZE; } From 7402bc9ca983a6a4c1545a4048e8b1a859b882ba Mon Sep 17 00:00:00 2001 From: Andrei Otcheretianski Date: Wed, 5 Sep 2018 08:06:13 +0300 Subject: [PATCH 251/812] mac80211: Always report TX status [ Upstream commit 8682250b3c1b75a45feb7452bc413d004cfe3778 ] If a frame is dropped for any reason, mac80211 wouldn't report the TX status back to user space. As the user space may rely on the TX_STATUS to kick its state machines, resends etc, it's better to just report this frame as not acked instead. Signed-off-by: Andrei Otcheretianski Signed-off-by: Luca Coelho Signed-off-by: Johannes Berg Signed-off-by: Sasha Levin --- net/mac80211/status.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/net/mac80211/status.c b/net/mac80211/status.c index 45fb1abdb265..2731cf5bf052 100644 --- a/net/mac80211/status.c +++ b/net/mac80211/status.c @@ -466,11 +466,6 @@ static void ieee80211_report_ack_skb(struct ieee80211_local *local, if (!skb) return; - if (dropped) { - dev_kfree_skb_any(skb); - return; - } - if (info->flags & IEEE80211_TX_INTFL_NL80211_FRAME_TX) { u64 cookie = IEEE80211_SKB_CB(skb)->ack.cookie; struct ieee80211_sub_if_data *sdata; @@ -491,6 +486,8 @@ static void ieee80211_report_ack_skb(struct ieee80211_local *local, } rcu_read_unlock(); + dev_kfree_skb_any(skb); + } else if (dropped) { dev_kfree_skb_any(skb); } else { /* consumes skb */ From db420bc4b798c1a5c91ce87e40011c89c118aef7 Mon Sep 17 00:00:00 2001 From: Andrei Otcheretianski Date: Wed, 5 Sep 2018 08:06:12 +0300 Subject: [PATCH 252/812] cfg80211: reg: Init wiphy_idx in regulatory_hint_core() [ Upstream commit 24f33e64fcd0d50a4b1a8e5b41bd0257aa66b0e8 ] Core regulatory hints didn't set wiphy_idx to WIPHY_IDX_INVALID. Since the regulatory request is zeroed, wiphy_idx was always implicitly set to 0. This resulted in updating only phy #0. Fix that. Fixes: 806a9e39670b ("cfg80211: make regulatory_request use wiphy_idx instead of wiphy") Signed-off-by: Andrei Otcheretianski Signed-off-by: Luca Coelho [add fixes tag] Signed-off-by: Johannes Berg Signed-off-by: Sasha Levin --- net/wireless/reg.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/wireless/reg.c b/net/wireless/reg.c index 06d050da0d94..50dffd183cc6 100644 --- a/net/wireless/reg.c +++ b/net/wireless/reg.c @@ -2367,6 +2367,7 @@ static int regulatory_hint_core(const char *alpha2) request->alpha2[0] = alpha2[0]; request->alpha2[1] = alpha2[1]; request->initiator = NL80211_REGDOM_SET_BY_CORE; + request->wiphy_idx = WIPHY_IDX_INVALID; queue_regulatory_request(request); From f4c24fd3b6919a7fb0fa5e858381299e593daf44 Mon Sep 17 00:00:00 2001 From: Thomas Petazzoni Date: Thu, 13 Sep 2018 16:48:08 +0100 Subject: [PATCH 253/812] ARM: 8799/1: mm: fix pci_ioremap_io() offset check [ Upstream commit 3a58ac65e2d7969bcdf1b6acb70fa4d12a88e53e ] IO_SPACE_LIMIT is the ending address of the PCI IO space, i.e something like 0xfffff (and not 0x100000). Therefore, when offset = 0xf0000 is passed as argument, this function fails even though the offset + SZ_64K fits below the IO_SPACE_LIMIT. This makes the last chunk of 64 KB of the I/O space not usable as it cannot be mapped. This patch fixes that by substracing 1 to offset + SZ_64K, so that we compare the addrss of the last byte of the I/O space against IO_SPACE_LIMIT instead of the address of the first byte of what is after the I/O space. Fixes: c2794437091a4 ("ARM: Add fixed PCI i/o mapping") Signed-off-by: Thomas Petazzoni Acked-by: Nicolas Pitre Signed-off-by: Russell King Signed-off-by: Sasha Levin --- arch/arm/mm/ioremap.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/mm/ioremap.c b/arch/arm/mm/ioremap.c index 0c81056c1dd7..2a3feb73de0b 100644 --- a/arch/arm/mm/ioremap.c +++ b/arch/arm/mm/ioremap.c @@ -460,7 +460,7 @@ void pci_ioremap_set_mem_type(int mem_type) int pci_ioremap_io(unsigned int offset, phys_addr_t phys_addr) { - BUG_ON(offset + SZ_64K > IO_SPACE_LIMIT); + BUG_ON(offset + SZ_64K - 1 > IO_SPACE_LIMIT); return ioremap_page_range(PCI_IO_VIRT_BASE + offset, PCI_IO_VIRT_BASE + offset + SZ_64K, From 5217bec5a6e7bd3c95d1229fc6029d956a470060 Mon Sep 17 00:00:00 2001 From: Sean Tranchetti Date: Wed, 19 Sep 2018 13:54:56 -0600 Subject: [PATCH 254/812] xfrm: validate template mode [ Upstream commit 32bf94fb5c2ec4ec842152d0e5937cd4bb6738fa ] XFRM mode parameters passed as part of the user templates in the IP_XFRM_POLICY are never properly validated. Passing values other than valid XFRM modes can cause stack-out-of-bounds reads to occur later in the XFRM processing: [ 140.535608] ================================================================ [ 140.543058] BUG: KASAN: stack-out-of-bounds in xfrm_state_find+0x17e4/0x1cc4 [ 140.550306] Read of size 4 at addr ffffffc0238a7a58 by task repro/5148 [ 140.557369] [ 140.558927] Call trace: [ 140.558936] dump_backtrace+0x0/0x388 [ 140.558940] show_stack+0x24/0x30 [ 140.558946] __dump_stack+0x24/0x2c [ 140.558949] dump_stack+0x8c/0xd0 [ 140.558956] print_address_description+0x74/0x234 [ 140.558960] kasan_report+0x240/0x264 [ 140.558963] __asan_report_load4_noabort+0x2c/0x38 [ 140.558967] xfrm_state_find+0x17e4/0x1cc4 [ 140.558971] xfrm_resolve_and_create_bundle+0x40c/0x1fb8 [ 140.558975] xfrm_lookup+0x238/0x1444 [ 140.558977] xfrm_lookup_route+0x48/0x11c [ 140.558984] ip_route_output_flow+0x88/0xc4 [ 140.558991] raw_sendmsg+0xa74/0x266c [ 140.558996] inet_sendmsg+0x258/0x3b0 [ 140.559002] sock_sendmsg+0xbc/0xec [ 140.559005] SyS_sendto+0x3a8/0x5a8 [ 140.559008] el0_svc_naked+0x34/0x38 [ 140.559009] [ 140.592245] page dumped because: kasan: bad access detected [ 140.597981] page_owner info is not active (free page?) [ 140.603267] [ 140.653503] ================================================================ Signed-off-by: Sean Tranchetti Signed-off-by: Steffen Klassert Signed-off-by: Sasha Levin --- net/xfrm/xfrm_user.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index c2e98dcba9fe..476f1fc6d655 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -1408,6 +1408,9 @@ static int validate_tmpl(int nr, struct xfrm_user_tmpl *ut, u16 family) (ut[i].family != prev_family)) return -EINVAL; + if (ut[i].mode >= XFRM_MODE_MAX) + return -EINVAL; + prev_family = ut[i].family; switch (ut[i].family) { From 6879c047ae6e9b6b43e54e8a34bebb7a52467198 Mon Sep 17 00:00:00 2001 From: Martin Willi Date: Tue, 25 Sep 2018 09:51:02 +0200 Subject: [PATCH 255/812] mac80211_hwsim: do not omit multicast announce of first added radio [ Upstream commit 28ef8b49a338dc1844e86b7954cfffc7dfa2660a ] The allocation of hwsim radio identifiers uses a post-increment from 0, so the first radio has idx 0. This idx is explicitly excluded from multicast announcements ever since, but it is unclear why. Drop that idx check and announce the first radio as well. This makes userspace happy if it relies on these events. Signed-off-by: Martin Willi Signed-off-by: Johannes Berg Signed-off-by: Sasha Levin --- drivers/net/wireless/mac80211_hwsim.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/net/wireless/mac80211_hwsim.c b/drivers/net/wireless/mac80211_hwsim.c index c98cb962b454..05413176a5d6 100644 --- a/drivers/net/wireless/mac80211_hwsim.c +++ b/drivers/net/wireless/mac80211_hwsim.c @@ -2547,8 +2547,7 @@ static int mac80211_hwsim_new_radio(struct genl_info *info, list_add_tail(&data->list, &hwsim_radios); spin_unlock_bh(&hwsim_radio_lock); - if (idx > 0) - hwsim_mcast_new_radio(idx, info, param); + hwsim_mcast_new_radio(idx, info, param); return idx; From b0c52fbff860b5dd23df44fd36f48e6a52ba8bfa Mon Sep 17 00:00:00 2001 From: Matias Karhumaa Date: Wed, 26 Sep 2018 09:13:46 +0300 Subject: [PATCH 256/812] Bluetooth: SMP: fix crash in unpairing [ Upstream commit cb28c306b93b71f2741ce1a5a66289db26715f4d ] In case unpair_device() was called through mgmt interface at the same time when pairing was in progress, Bluetooth kernel module crash was seen. [ 600.351225] general protection fault: 0000 [#1] SMP PTI [ 600.351235] CPU: 1 PID: 11096 Comm: btmgmt Tainted: G OE 4.19.0-rc1+ #1 [ 600.351238] Hardware name: Dell Inc. Latitude E5440/08RCYC, BIOS A18 05/14/2017 [ 600.351272] RIP: 0010:smp_chan_destroy.isra.10+0xce/0x2c0 [bluetooth] [ 600.351276] Code: c0 0f 84 b4 01 00 00 80 78 28 04 0f 84 53 01 00 00 4d 85 ed 0f 85 ab 00 00 00 48 8b 08 48 8b 50 08 be 10 00 00 00 48 89 51 08 <48> 89 0a 48 b9 00 02 00 00 00 00 ad de 48 89 48 08 48 8b 83 00 01 [ 600.351279] RSP: 0018:ffffa9be839b3b50 EFLAGS: 00010246 [ 600.351282] RAX: ffff9c999ac565a0 RBX: ffff9c9996e98c00 RCX: ffff9c999aa28b60 [ 600.351285] RDX: dead000000000200 RSI: 0000000000000010 RDI: ffff9c999e403500 [ 600.351287] RBP: ffffa9be839b3b70 R08: 0000000000000000 R09: ffffffff92a25c00 [ 600.351290] R10: ffffa9be839b3ae8 R11: 0000000000000001 R12: ffff9c995375b800 [ 600.351292] R13: 0000000000000000 R14: ffff9c99619a5000 R15: ffff9c9962a01c00 [ 600.351295] FS: 00007fb2be27c700(0000) GS:ffff9c999e880000(0000) knlGS:0000000000000000 [ 600.351298] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 600.351300] CR2: 00007fb2bdadbad0 CR3: 000000041c328001 CR4: 00000000001606e0 [ 600.351302] Call Trace: [ 600.351325] smp_failure+0x4f/0x70 [bluetooth] [ 600.351345] smp_cancel_pairing+0x74/0x80 [bluetooth] [ 600.351370] unpair_device+0x1c1/0x330 [bluetooth] [ 600.351399] hci_sock_sendmsg+0x960/0x9f0 [bluetooth] [ 600.351409] ? apparmor_socket_sendmsg+0x1e/0x20 [ 600.351417] sock_sendmsg+0x3e/0x50 [ 600.351422] sock_write_iter+0x85/0xf0 [ 600.351429] do_iter_readv_writev+0x12b/0x1b0 [ 600.351434] do_iter_write+0x87/0x1a0 [ 600.351439] vfs_writev+0x98/0x110 [ 600.351443] ? ep_poll+0x16d/0x3d0 [ 600.351447] ? ep_modify+0x73/0x170 [ 600.351451] do_writev+0x61/0xf0 [ 600.351455] ? do_writev+0x61/0xf0 [ 600.351460] __x64_sys_writev+0x1c/0x20 [ 600.351465] do_syscall_64+0x5a/0x110 [ 600.351471] entry_SYSCALL_64_after_hwframe+0x44/0xa9 [ 600.351474] RIP: 0033:0x7fb2bdb62fe0 [ 600.351477] Code: 73 01 c3 48 8b 0d b8 6e 2c 00 f7 d8 64 89 01 48 83 c8 ff c3 66 0f 1f 44 00 00 83 3d 69 c7 2c 00 00 75 10 b8 14 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 31 c3 48 83 ec 08 e8 de 80 01 00 48 89 04 24 [ 600.351479] RSP: 002b:00007ffe062cb8f8 EFLAGS: 00000246 ORIG_RAX: 0000000000000014 [ 600.351484] RAX: ffffffffffffffda RBX: 000000000255b3d0 RCX: 00007fb2bdb62fe0 [ 600.351487] RDX: 0000000000000001 RSI: 00007ffe062cb920 RDI: 0000000000000004 [ 600.351490] RBP: 00007ffe062cb920 R08: 000000000255bd80 R09: 0000000000000000 [ 600.351494] R10: 0000000000000353 R11: 0000000000000246 R12: 0000000000000001 [ 600.351497] R13: 00007ffe062cbbe0 R14: 0000000000000000 R15: 0000000000000000 [ 600.351501] Modules linked in: algif_hash algif_skcipher af_alg cmac ipt_MASQUERADE nf_conntrack_netlink nfnetlink xfrm_user xfrm_algo iptable_nat nf_nat_ipv4 xt_addrtype iptable_filter ip_tables xt_conntrack x_tables nf_nat nf_conntrack nf_defrag_ipv6 nf_defrag_ipv4 libcrc32c br_netfilter bridge stp llc overlay arc4 nls_iso8859_1 dm_crypt intel_rapl x86_pkg_temp_thermal intel_powerclamp coretemp dell_laptop kvm_intel crct10dif_pclmul dell_smm_hwmon crc32_pclmul ghash_clmulni_intel pcbc aesni_intel aes_x86_64 crypto_simd cryptd glue_helper intel_cstate intel_rapl_perf uvcvideo videobuf2_vmalloc videobuf2_memops videobuf2_v4l2 videobuf2_common videodev media hid_multitouch input_leds joydev serio_raw dell_wmi snd_hda_codec_hdmi snd_hda_codec_realtek snd_hda_codec_generic dell_smbios dcdbas sparse_keymap [ 600.351569] snd_hda_intel btusb snd_hda_codec btrtl btbcm btintel snd_hda_core bluetooth(OE) snd_hwdep snd_pcm iwlmvm ecdh_generic wmi_bmof dell_wmi_descriptor snd_seq_midi mac80211 snd_seq_midi_event lpc_ich iwlwifi snd_rawmidi snd_seq snd_seq_device snd_timer cfg80211 snd soundcore mei_me mei dell_rbtn dell_smo8800 mac_hid parport_pc ppdev lp parport autofs4 hid_generic usbhid hid i915 nouveau kvmgt vfio_mdev mdev vfio_iommu_type1 vfio kvm irqbypass i2c_algo_bit ttm drm_kms_helper syscopyarea sysfillrect sysimgblt mxm_wmi psmouse ahci sdhci_pci cqhci libahci fb_sys_fops sdhci drm e1000e video wmi [ 600.351637] ---[ end trace e49e9f1df09c94fb ]--- [ 600.351664] RIP: 0010:smp_chan_destroy.isra.10+0xce/0x2c0 [bluetooth] [ 600.351666] Code: c0 0f 84 b4 01 00 00 80 78 28 04 0f 84 53 01 00 00 4d 85 ed 0f 85 ab 00 00 00 48 8b 08 48 8b 50 08 be 10 00 00 00 48 89 51 08 <48> 89 0a 48 b9 00 02 00 00 00 00 ad de 48 89 48 08 48 8b 83 00 01 [ 600.351669] RSP: 0018:ffffa9be839b3b50 EFLAGS: 00010246 [ 600.351672] RAX: ffff9c999ac565a0 RBX: ffff9c9996e98c00 RCX: ffff9c999aa28b60 [ 600.351674] RDX: dead000000000200 RSI: 0000000000000010 RDI: ffff9c999e403500 [ 600.351676] RBP: ffffa9be839b3b70 R08: 0000000000000000 R09: ffffffff92a25c00 [ 600.351679] R10: ffffa9be839b3ae8 R11: 0000000000000001 R12: ffff9c995375b800 [ 600.351681] R13: 0000000000000000 R14: ffff9c99619a5000 R15: ffff9c9962a01c00 [ 600.351684] FS: 00007fb2be27c700(0000) GS:ffff9c999e880000(0000) knlGS:0000000000000000 [ 600.351686] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 600.351689] CR2: 00007fb2bdadbad0 CR3: 000000041c328001 CR4: 00000000001606e0 Crash happened because list_del_rcu() was called twice for smp->ltk. This was possible if unpair_device was called right after ltk was generated but before keys were distributed. In this commit smp_cancel_pairing was refactored to cancel pairing if it is in progress and otherwise just removes keys. Once keys are removed from rcu list, pointers to smp context's keys are set to NULL to make sure removed list items are not accessed later. This commit also adjusts the functionality of mgmt unpair_device() little bit. Previously pairing was canceled only if pairing was in state that keys were already generated. With this commit unpair_device() cancels pairing already in earlier states. Bug was found by fuzzing kernel SMP implementation using Synopsys Defensics. Reported-by: Pekka Oikarainen Signed-off-by: Matias Karhumaa Signed-off-by: Johan Hedberg Signed-off-by: Sasha Levin --- net/bluetooth/mgmt.c | 7 ++----- net/bluetooth/smp.c | 29 +++++++++++++++++++++++++---- net/bluetooth/smp.h | 3 ++- 3 files changed, 29 insertions(+), 10 deletions(-) diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index b1b0a1c0bd8d..ecc3da6a14a1 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -3083,9 +3083,8 @@ static int unpair_device(struct sock *sk, struct hci_dev *hdev, void *data, /* LE address type */ addr_type = le_addr_type(cp->addr.type); - hci_remove_irk(hdev, &cp->addr.bdaddr, addr_type); - - err = hci_remove_ltk(hdev, &cp->addr.bdaddr, addr_type); + /* Abort any ongoing SMP pairing. Removes ltk and irk if they exist. */ + err = smp_cancel_and_remove_pairing(hdev, &cp->addr.bdaddr, addr_type); if (err < 0) { err = mgmt_cmd_complete(sk, hdev->id, MGMT_OP_UNPAIR_DEVICE, MGMT_STATUS_NOT_PAIRED, &rp, @@ -3099,8 +3098,6 @@ static int unpair_device(struct sock *sk, struct hci_dev *hdev, void *data, goto done; } - /* Abort any ongoing SMP pairing */ - smp_cancel_pairing(conn); /* Defer clearing up the connection parameters until closing to * give a chance of keeping them if a repairing happens. diff --git a/net/bluetooth/smp.c b/net/bluetooth/smp.c index 0dc27d2e8f18..bedfaef2c59e 100644 --- a/net/bluetooth/smp.c +++ b/net/bluetooth/smp.c @@ -2371,30 +2371,51 @@ int smp_conn_security(struct hci_conn *hcon, __u8 sec_level) return ret; } -void smp_cancel_pairing(struct hci_conn *hcon) +int smp_cancel_and_remove_pairing(struct hci_dev *hdev, bdaddr_t *bdaddr, + u8 addr_type) { - struct l2cap_conn *conn = hcon->l2cap_data; + struct hci_conn *hcon; + struct l2cap_conn *conn; struct l2cap_chan *chan; struct smp_chan *smp; + int err; + err = hci_remove_ltk(hdev, bdaddr, addr_type); + hci_remove_irk(hdev, bdaddr, addr_type); + + hcon = hci_conn_hash_lookup_le(hdev, bdaddr, addr_type); + if (!hcon) + goto done; + + conn = hcon->l2cap_data; if (!conn) - return; + goto done; chan = conn->smp; if (!chan) - return; + goto done; l2cap_chan_lock(chan); smp = chan->data; if (smp) { + /* Set keys to NULL to make sure smp_failure() does not try to + * remove and free already invalidated rcu list entries. */ + smp->ltk = NULL; + smp->slave_ltk = NULL; + smp->remote_irk = NULL; + if (test_bit(SMP_FLAG_COMPLETE, &smp->flags)) smp_failure(conn, 0); else smp_failure(conn, SMP_UNSPECIFIED); + err = 0; } l2cap_chan_unlock(chan); + +done: + return err; } static int smp_cmd_encrypt_info(struct l2cap_conn *conn, struct sk_buff *skb) diff --git a/net/bluetooth/smp.h b/net/bluetooth/smp.h index ffcc70b6b199..993cbd7bcfe7 100644 --- a/net/bluetooth/smp.h +++ b/net/bluetooth/smp.h @@ -180,7 +180,8 @@ enum smp_key_pref { }; /* SMP Commands */ -void smp_cancel_pairing(struct hci_conn *hcon); +int smp_cancel_and_remove_pairing(struct hci_dev *hdev, bdaddr_t *bdaddr, + u8 addr_type); bool smp_sufficient_security(struct hci_conn *hcon, u8 sec_level, enum smp_key_pref key_pref); int smp_conn_security(struct hci_conn *hcon, __u8 sec_level); From df2d090bc5df33e92b43b2134d118fc60e2e0c97 Mon Sep 17 00:00:00 2001 From: Lubomir Rintel Date: Wed, 26 Sep 2018 18:11:22 +0200 Subject: [PATCH 257/812] pxa168fb: prepare the clock [ Upstream commit d85536cde91fcfed6fb8d983783bd2b92c843939 ] Add missing prepare/unprepare operations for fbi->clk, this fixes following kernel warning: ------------[ cut here ]------------ WARNING: CPU: 0 PID: 1 at drivers/clk/clk.c:874 clk_core_enable+0x2c/0x1b0 Enabling unprepared disp0_clk Modules linked in: CPU: 0 PID: 1 Comm: swapper Not tainted 4.18.0-rc8-00032-g02b43ddd4f21-dirty #25 Hardware name: Marvell MMP2 (Device Tree Support) [] (unwind_backtrace) from [] (show_stack+0x10/0x14) [] (show_stack) from [] (__warn+0xd8/0xf0) [] (__warn) from [] (warn_slowpath_fmt+0x44/0x6c) [] (warn_slowpath_fmt) from [] (clk_core_enable+0x2c/0x1b0) [] (clk_core_enable) from [] (clk_core_enable_lock+0x18/0x2c) [] (clk_core_enable_lock) from [] (pxa168fb_probe+0x464/0x6ac) [] (pxa168fb_probe) from [] (platform_drv_probe+0x48/0x94) [] (platform_drv_probe) from [] (driver_probe_device+0x328/0x470) [] (driver_probe_device) from [] (__driver_attach+0xb0/0x124) [] (__driver_attach) from [] (bus_for_each_dev+0x64/0xa0) [] (bus_for_each_dev) from [] (bus_add_driver+0x1b8/0x230) [] (bus_add_driver) from [] (driver_register+0xac/0xf0) [] (driver_register) from [] (do_one_initcall+0xb8/0x1f0) [] (do_one_initcall) from [] (kernel_init_freeable+0x294/0x2e0) [] (kernel_init_freeable) from [] (kernel_init+0x8/0x10c) [] (kernel_init) from [] (ret_from_fork+0x14/0x2c) Exception stack(0xd008bfb0 to 0xd008bff8) bfa0: 00000000 00000000 00000000 00000000 bfc0: 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 bfe0: 00000000 00000000 00000000 00000000 00000013 00000000 ---[ end trace c0af40f9e2ed7cb4 ]--- Signed-off-by: Lubomir Rintel [b.zolnierkie: enhance patch description a bit] Signed-off-by: Bartlomiej Zolnierkiewicz Signed-off-by: Sasha Levin --- drivers/video/fbdev/pxa168fb.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/video/fbdev/pxa168fb.c b/drivers/video/fbdev/pxa168fb.c index efb57c059997..5190b1749e2a 100644 --- a/drivers/video/fbdev/pxa168fb.c +++ b/drivers/video/fbdev/pxa168fb.c @@ -712,7 +712,7 @@ static int pxa168fb_probe(struct platform_device *pdev) /* * enable controller clock */ - clk_enable(fbi->clk); + clk_prepare_enable(fbi->clk); pxa168fb_set_par(info); @@ -767,7 +767,7 @@ static int pxa168fb_probe(struct platform_device *pdev) failed_free_cmap: fb_dealloc_cmap(&info->cmap); failed_free_clk: - clk_disable(fbi->clk); + clk_disable_unprepare(fbi->clk); failed_free_fbmem: dma_free_coherent(fbi->dev, info->fix.smem_len, info->screen_base, fbi->fb_start_dma); @@ -807,7 +807,7 @@ static int pxa168fb_remove(struct platform_device *pdev) dma_free_writecombine(fbi->dev, PAGE_ALIGN(info->fix.smem_len), info->screen_base, info->fix.smem_start); - clk_disable(fbi->clk); + clk_disable_unprepare(fbi->clk); framebuffer_release(info); From 644d1918531ba9d6efa74e2228df8f0dd7002b53 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Fri, 28 Sep 2018 16:18:50 -0700 Subject: [PATCH 258/812] asix: Check for supported Wake-on-LAN modes [ Upstream commit c4ce446e33d7a0e978256ac6fea4c80e59d9de5f ] The driver currently silently accepts unsupported Wake-on-LAN modes (other than WAKE_PHY or WAKE_MAGIC) without reporting that to the user, which is confusing. Fixes: 2e55cc7210fe ("[PATCH] USB: usbnet (3/9) module for ASIX Ethernet adapters") Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/usb/asix_common.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/usb/asix_common.c b/drivers/net/usb/asix_common.c index 7fbd8f044207..2092ef6431f2 100644 --- a/drivers/net/usb/asix_common.c +++ b/drivers/net/usb/asix_common.c @@ -449,6 +449,9 @@ int asix_set_wol(struct net_device *net, struct ethtool_wolinfo *wolinfo) struct usbnet *dev = netdev_priv(net); u8 opt = 0; + if (wolinfo->wolopts & ~(WAKE_PHY | WAKE_MAGIC)) + return -EINVAL; + if (wolinfo->wolopts & WAKE_PHY) opt |= AX_MONITOR_LINK; if (wolinfo->wolopts & WAKE_MAGIC) From f3d71a323f51d033929ed70b53bd4bca27c7da3c Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Fri, 28 Sep 2018 16:18:51 -0700 Subject: [PATCH 259/812] ax88179_178a: Check for supported Wake-on-LAN modes [ Upstream commit 5ba6b4aa9a410c5e2c6417df52b5e2118ea9b467 ] The driver currently silently accepts unsupported Wake-on-LAN modes (other than WAKE_PHY or WAKE_MAGIC) without reporting that to the user, which is confusing. Fixes: e2ca90c276e1 ("ax88179_178a: ASIX AX88179_178A USB 3.0/2.0 to gigabit ethernet adapter driver") Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/usb/ax88179_178a.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/usb/ax88179_178a.c b/drivers/net/usb/ax88179_178a.c index e6338c16081a..e3f2e6098db4 100644 --- a/drivers/net/usb/ax88179_178a.c +++ b/drivers/net/usb/ax88179_178a.c @@ -566,6 +566,9 @@ ax88179_set_wol(struct net_device *net, struct ethtool_wolinfo *wolinfo) struct usbnet *dev = netdev_priv(net); u8 opt = 0; + if (wolinfo->wolopts & ~(WAKE_PHY | WAKE_MAGIC)) + return -EINVAL; + if (wolinfo->wolopts & WAKE_PHY) opt |= AX_MONITOR_MODE_RWLC; if (wolinfo->wolopts & WAKE_MAGIC) From 04d846cddb4a941b5dc5b49b677736eb0c9e8431 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Fri, 28 Sep 2018 16:18:52 -0700 Subject: [PATCH 260/812] lan78xx: Check for supported Wake-on-LAN modes [ Upstream commit eb9ad088f96653a26b340f7c447c44cf023d5cdc ] The driver supports a fair amount of Wake-on-LAN modes, but is not checking that the user specified one that is supported. Fixes: 55d7de9de6c3 ("Microchip's LAN7800 family USB 2/3 to 10/100/1000 Ethernet device driver") Signed-off-by: Florian Fainelli Reviewed-by: Woojung Huh Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/usb/lan78xx.c | 17 ++++------------- 1 file changed, 4 insertions(+), 13 deletions(-) diff --git a/drivers/net/usb/lan78xx.c b/drivers/net/usb/lan78xx.c index 1aede726052c..45a6a7cae4bf 100644 --- a/drivers/net/usb/lan78xx.c +++ b/drivers/net/usb/lan78xx.c @@ -1051,19 +1051,10 @@ static int lan78xx_set_wol(struct net_device *netdev, if (ret < 0) return ret; - pdata->wol = 0; - if (wol->wolopts & WAKE_UCAST) - pdata->wol |= WAKE_UCAST; - if (wol->wolopts & WAKE_MCAST) - pdata->wol |= WAKE_MCAST; - if (wol->wolopts & WAKE_BCAST) - pdata->wol |= WAKE_BCAST; - if (wol->wolopts & WAKE_MAGIC) - pdata->wol |= WAKE_MAGIC; - if (wol->wolopts & WAKE_PHY) - pdata->wol |= WAKE_PHY; - if (wol->wolopts & WAKE_ARP) - pdata->wol |= WAKE_ARP; + if (wol->wolopts & ~WAKE_ALL) + return -EINVAL; + + pdata->wol = wol->wolopts; device_set_wakeup_enable(&dev->udev->dev, (bool)wol->wolopts); From 24665cbd768ad4b923cce0347d9905db504048fd Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Fri, 28 Sep 2018 16:18:53 -0700 Subject: [PATCH 261/812] sr9800: Check for supported Wake-on-LAN modes [ Upstream commit c5cb93e994ffb43b7b3b1ff10b9f928f54574a36 ] The driver currently silently accepts unsupported Wake-on-LAN modes (other than WAKE_PHY or WAKE_MAGIC) without reporting that to the user, which is confusing. Fixes: 19a38d8e0aa3 ("USB2NET : SR9800 : One chip USB2.0 USB2NET SR9800 Device Driver Support") Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/usb/sr9800.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/usb/sr9800.c b/drivers/net/usb/sr9800.c index a50df0d8fb9a..004c955c1fd1 100644 --- a/drivers/net/usb/sr9800.c +++ b/drivers/net/usb/sr9800.c @@ -421,6 +421,9 @@ sr_set_wol(struct net_device *net, struct ethtool_wolinfo *wolinfo) struct usbnet *dev = netdev_priv(net); u8 opt = 0; + if (wolinfo->wolopts & ~(WAKE_PHY | WAKE_MAGIC)) + return -EINVAL; + if (wolinfo->wolopts & WAKE_PHY) opt |= SR_MONITOR_LINK; if (wolinfo->wolopts & WAKE_MAGIC) From 2bb181d8a8ec755e5f977fe8e74fc81e86f7af4d Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Fri, 28 Sep 2018 16:18:54 -0700 Subject: [PATCH 262/812] r8152: Check for supported Wake-on-LAN Modes [ Upstream commit f2750df1548bd8a2b060eb609fc43ca82811af4c ] The driver does not check for Wake-on-LAN modes specified by an user, but will conditionally set the device as wake-up enabled or not based on that, which could be a very confusing user experience. Fixes: 21ff2e8976b1 ("r8152: support WOL") Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/usb/r8152.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c index 2bb336cb13ee..2d83689374bb 100644 --- a/drivers/net/usb/r8152.c +++ b/drivers/net/usb/r8152.c @@ -3663,6 +3663,9 @@ static int rtl8152_set_wol(struct net_device *dev, struct ethtool_wolinfo *wol) if (!rtl_can_wakeup(tp)) return -EOPNOTSUPP; + if (wol->wolopts & ~WAKE_ANY) + return -EINVAL; + ret = usb_autopm_get_interface(tp->intf); if (ret < 0) goto out_set_wol; From 66e43f427d5d3ee003a3a5f38a8673b8c94e2b84 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Fri, 28 Sep 2018 16:18:55 -0700 Subject: [PATCH 263/812] smsc75xx: Check for Wake-on-LAN modes [ Upstream commit 9c734b2769a73eea2e9e9767c0e0bf839ff23679 ] The driver does not check for Wake-on-LAN modes specified by an user, but will conditionally set the device as wake-up enabled or not based on that, which could be a very confusing user experience. Fixes: 6c636503260d ("smsc75xx: add wol magic packet support") Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/usb/smsc75xx.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/usb/smsc75xx.c b/drivers/net/usb/smsc75xx.c index 8dbe086e0a96..234febc6e1d9 100644 --- a/drivers/net/usb/smsc75xx.c +++ b/drivers/net/usb/smsc75xx.c @@ -728,6 +728,9 @@ static int smsc75xx_ethtool_set_wol(struct net_device *net, struct smsc75xx_priv *pdata = (struct smsc75xx_priv *)(dev->data[0]); int ret; + if (wolinfo->wolopts & ~SUPPORTED_WAKE) + return -EINVAL; + pdata->wolopts = wolinfo->wolopts & SUPPORTED_WAKE; ret = device_set_wakeup_enable(&dev->udev->dev, pdata->wolopts); From 863fdd166500af2613aad12552cf4eb503845e47 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Fri, 28 Sep 2018 16:18:56 -0700 Subject: [PATCH 264/812] smsc95xx: Check for Wake-on-LAN modes [ Upstream commit c530c471ba37bdd9fe1c7185b01455c00ae606fb ] The driver does not check for Wake-on-LAN modes specified by an user, but will conditionally set the device as wake-up enabled or not based on that, which could be a very confusing user experience. Fixes: e0e474a83c18 ("smsc95xx: add wol magic packet support") Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/usb/smsc95xx.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/usb/smsc95xx.c b/drivers/net/usb/smsc95xx.c index 66b3ab9f614e..7cee7777d13f 100644 --- a/drivers/net/usb/smsc95xx.c +++ b/drivers/net/usb/smsc95xx.c @@ -727,6 +727,9 @@ static int smsc95xx_ethtool_set_wol(struct net_device *net, struct smsc95xx_priv *pdata = (struct smsc95xx_priv *)(dev->data[0]); int ret; + if (wolinfo->wolopts & ~SUPPORTED_WAKE) + return -EINVAL; + pdata->wolopts = wolinfo->wolopts & SUPPORTED_WAKE; ret = device_set_wakeup_enable(&dev->udev->dev, pdata->wolopts); From 4f0336a8a0084e3bbddf71af87869aa9c612669a Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Sun, 23 Sep 2018 18:13:43 +0200 Subject: [PATCH 265/812] perf/ring_buffer: Prevent concurent ring buffer access [ Upstream commit cd6fb677ce7e460c25bdd66f689734102ec7d642 ] Some of the scheduling tracepoints allow the perf_tp_event code to write to ring buffer under different cpu than the code is running on. This results in corrupted ring buffer data demonstrated in following perf commands: # perf record -e 'sched:sched_switch,sched:sched_wakeup' perf bench sched messaging # Running 'sched/messaging' benchmark: # 20 sender and receiver processes per group # 10 groups == 400 processes run Total time: 0.383 [sec] [ perf record: Woken up 8 times to write data ] 0x42b890 [0]: failed to process type: -1765585640 [ perf record: Captured and wrote 4.825 MB perf.data (29669 samples) ] # perf report --stdio 0x42b890 [0]: failed to process type: -1765585640 The reason for the corruption are some of the scheduling tracepoints, that have __perf_task dfined and thus allow to store data to another cpu ring buffer: sched_waking sched_wakeup sched_wakeup_new sched_stat_wait sched_stat_sleep sched_stat_iowait sched_stat_blocked The perf_tp_event function first store samples for current cpu related events defined for tracepoint: hlist_for_each_entry_rcu(event, head, hlist_entry) perf_swevent_event(event, count, &data, regs); And then iterates events of the 'task' and store the sample for any task's event that passes tracepoint checks: ctx = rcu_dereference(task->perf_event_ctxp[perf_sw_context]); list_for_each_entry_rcu(event, &ctx->event_list, event_entry) { if (event->attr.type != PERF_TYPE_TRACEPOINT) continue; if (event->attr.config != entry->type) continue; perf_swevent_event(event, count, &data, regs); } Above code can race with same code running on another cpu, ending up with 2 cpus trying to store under the same ring buffer, which is specifically not allowed. This patch prevents the problem, by allowing only events with the same current cpu to receive the event. NOTE: this requires the use of (per-task-)per-cpu buffers for this feature to work; perf-record does this. Signed-off-by: Jiri Olsa [peterz: small edits to Changelog] Signed-off-by: Peter Zijlstra (Intel) Cc: Alexander Shishkin Cc: Andrew Vagin Cc: Arnaldo Carvalho de Melo Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Linus Torvalds Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Vince Weaver Fixes: e6dab5ffab59 ("perf/trace: Add ability to set a target task for events") Link: http://lkml.kernel.org/r/20180923161343.GB15054@krava Signed-off-by: Ingo Molnar Signed-off-by: Sasha Levin --- kernel/events/core.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/kernel/events/core.c b/kernel/events/core.c index 990ac41d8a5f..330fcd1b1822 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -7018,6 +7018,8 @@ void perf_tp_event(u64 addr, u64 count, void *record, int entry_size, goto unlock; list_for_each_entry_rcu(event, &ctx->event_list, event_entry) { + if (event->cpu != smp_processor_id()) + continue; if (event->attr.type != PERF_TYPE_TRACEPOINT) continue; if (event->attr.config != entry->type) From 6596d0ab8c602b6bb9f4a8423ddd7fc3acb2418f Mon Sep 17 00:00:00 2001 From: Wenwen Wang Date: Fri, 5 Oct 2018 08:48:27 -0500 Subject: [PATCH 266/812] net: cxgb3_main: fix a missing-check bug [ Upstream commit 2c05d88818ab6571816b93edce4d53703870d7ae ] In cxgb_extension_ioctl(), the command of the ioctl is firstly copied from the user-space buffer 'useraddr' to 'cmd' and checked through the switch statement. If the command is not as expected, an error code EOPNOTSUPP is returned. In the following execution, i.e., the cases of the switch statement, the whole buffer of 'useraddr' is copied again to a specific data structure, according to what kind of command is requested. However, after the second copy, there is no re-check on the newly-copied command. Given that the buffer 'useraddr' is in the user space, a malicious user can race to change the command between the two copies. By doing so, the attacker can supply malicious data to the kernel and cause undefined behavior. This patch adds a re-check in each case of the switch statement if there is a second copy in that case, to re-check whether the command obtained in the second copy is the same as the one in the first copy. If not, an error code EINVAL is returned. Signed-off-by: Wenwen Wang Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/chelsio/cxgb3/cxgb3_main.c | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/drivers/net/ethernet/chelsio/cxgb3/cxgb3_main.c b/drivers/net/ethernet/chelsio/cxgb3/cxgb3_main.c index 7ae8374bff13..3dd4c39640dc 100644 --- a/drivers/net/ethernet/chelsio/cxgb3/cxgb3_main.c +++ b/drivers/net/ethernet/chelsio/cxgb3/cxgb3_main.c @@ -2147,6 +2147,8 @@ static int cxgb_extension_ioctl(struct net_device *dev, void __user *useraddr) return -EPERM; if (copy_from_user(&t, useraddr, sizeof(t))) return -EFAULT; + if (t.cmd != CHELSIO_SET_QSET_PARAMS) + return -EINVAL; if (t.qset_idx >= SGE_QSETS) return -EINVAL; if (!in_range(t.intr_lat, 0, M_NEWTIMER) || @@ -2246,6 +2248,9 @@ static int cxgb_extension_ioctl(struct net_device *dev, void __user *useraddr) if (copy_from_user(&t, useraddr, sizeof(t))) return -EFAULT; + if (t.cmd != CHELSIO_GET_QSET_PARAMS) + return -EINVAL; + /* Display qsets for all ports when offload enabled */ if (test_bit(OFFLOAD_DEVMAP_BIT, &adapter->open_device_map)) { q1 = 0; @@ -2291,6 +2296,8 @@ static int cxgb_extension_ioctl(struct net_device *dev, void __user *useraddr) return -EBUSY; if (copy_from_user(&edata, useraddr, sizeof(edata))) return -EFAULT; + if (edata.cmd != CHELSIO_SET_QSET_NUM) + return -EINVAL; if (edata.val < 1 || (edata.val > 1 && !(adapter->flags & USING_MSIX))) return -EINVAL; @@ -2331,6 +2338,8 @@ static int cxgb_extension_ioctl(struct net_device *dev, void __user *useraddr) return -EPERM; if (copy_from_user(&t, useraddr, sizeof(t))) return -EFAULT; + if (t.cmd != CHELSIO_LOAD_FW) + return -EINVAL; /* Check t.len sanity ? */ fw_data = memdup_user(useraddr + sizeof(t), t.len); if (IS_ERR(fw_data)) @@ -2354,6 +2363,8 @@ static int cxgb_extension_ioctl(struct net_device *dev, void __user *useraddr) return -EBUSY; if (copy_from_user(&m, useraddr, sizeof(m))) return -EFAULT; + if (m.cmd != CHELSIO_SETMTUTAB) + return -EINVAL; if (m.nmtus != NMTUS) return -EINVAL; if (m.mtus[0] < 81) /* accommodate SACK */ @@ -2395,6 +2406,8 @@ static int cxgb_extension_ioctl(struct net_device *dev, void __user *useraddr) return -EBUSY; if (copy_from_user(&m, useraddr, sizeof(m))) return -EFAULT; + if (m.cmd != CHELSIO_SET_PM) + return -EINVAL; if (!is_power_of_2(m.rx_pg_sz) || !is_power_of_2(m.tx_pg_sz)) return -EINVAL; /* not power of 2 */ @@ -2428,6 +2441,8 @@ static int cxgb_extension_ioctl(struct net_device *dev, void __user *useraddr) return -EIO; /* need the memory controllers */ if (copy_from_user(&t, useraddr, sizeof(t))) return -EFAULT; + if (t.cmd != CHELSIO_GET_MEM) + return -EINVAL; if ((t.addr & 7) || (t.len & 7)) return -EINVAL; if (t.mem_id == MEM_CM) @@ -2480,6 +2495,8 @@ static int cxgb_extension_ioctl(struct net_device *dev, void __user *useraddr) return -EAGAIN; if (copy_from_user(&t, useraddr, sizeof(t))) return -EFAULT; + if (t.cmd != CHELSIO_SET_TRACE_FILTER) + return -EINVAL; tp = (const struct trace_params *)&t.sip; if (t.config_tx) From 9aae17f851e2828de33fa4330cf519397c9aa2bf Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Thu, 8 Jun 2017 14:48:03 +0100 Subject: [PATCH 267/812] KEYS: put keyring if install_session_keyring_to_cred() fails [ Upstream commit d636bd9f12a66ea3775c9fabbf3f8e118253467a ] In join_session_keyring(), if install_session_keyring_to_cred() were to fail, we would leak the keyring reference, just like in the bug fixed by commit 23567fd052a9 ("KEYS: Fix keyring ref leak in join_session_keyring()"). Fortunately this cannot happen currently, but we really should be more careful. Do this by adding and using a new error label at which the keyring reference is dropped. Signed-off-by: Eric Biggers Signed-off-by: David Howells Signed-off-by: James Morris Signed-off-by: Sasha Levin --- security/keys/process_keys.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/security/keys/process_keys.c b/security/keys/process_keys.c index ac1d5b2b1626..a7095372701e 100644 --- a/security/keys/process_keys.c +++ b/security/keys/process_keys.c @@ -808,15 +808,14 @@ long join_session_keyring(const char *name) ret = PTR_ERR(keyring); goto error2; } else if (keyring == new->session_keyring) { - key_put(keyring); ret = 0; - goto error2; + goto error3; } /* we've got a keyring - now to install it */ ret = install_session_keyring_to_cred(new, keyring); if (ret < 0) - goto error2; + goto error3; commit_creds(new); mutex_unlock(&key_session_mutex); @@ -826,6 +825,8 @@ long join_session_keyring(const char *name) okay: return ret; +error3: + key_put(keyring); error2: mutex_unlock(&key_session_mutex); error: From 997a4944ce4fdfc5a0ce3adee367ca5daff48244 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 12 Aug 2016 07:48:21 +0200 Subject: [PATCH 268/812] ipv6: suppress sparse warnings in IP6_ECN_set_ce() [ Upstream commit c15c0ab12fd62f2b19181d05c62d24bc9fa55a42 ] Pass the correct type __wsum to csum_sub() and csum_add(). This doesn't really change anything since __wsum really *is* __be32, but removes the address space warnings from sparse. Cc: Eric Dumazet Fixes: 34ae6a1aa054 ("ipv6: update skb->csum when CE mark is propagated") Signed-off-by: Johannes Berg Acked-by: Eric Dumazet Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- include/net/inet_ecn.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/include/net/inet_ecn.h b/include/net/inet_ecn.h index 0dc0a51da38f..dce2d586d9ce 100644 --- a/include/net/inet_ecn.h +++ b/include/net/inet_ecn.h @@ -128,7 +128,8 @@ static inline int IP6_ECN_set_ce(struct sk_buff *skb, struct ipv6hdr *iph) to = from | htonl(INET_ECN_CE << 20); *(__be32 *)iph = to; if (skb->ip_summed == CHECKSUM_COMPLETE) - skb->csum = csum_add(csum_sub(skb->csum, from), to); + skb->csum = csum_add(csum_sub(skb->csum, (__force __wsum)from), + (__force __wsum)to); return 1; } From ca4a744b1828d8f9c3b62ba84305b69361b515e6 Mon Sep 17 00:00:00 2001 From: David Herrmann Date: Tue, 2 Feb 2016 18:17:54 +0100 Subject: [PATCH 269/812] net: drop write-only stack variable [ Upstream commit 3575dbf2cbbc8e598f17ec441aed526dbea0e1bd ] Remove a write-only stack variable from unix_attach_fds(). This is a left-over from the security fix in: commit 712f4aad406bb1ed67f3f98d04c044191f0ff593 Author: willy tarreau Date: Sun Jan 10 07:54:56 2016 +0100 unix: properly account for FDs passed over unix sockets Signed-off-by: David Herrmann Acked-by: Hannes Frederic Sowa Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/unix/af_unix.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index e05ec54ac53f..c6b1eec94911 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -1531,7 +1531,6 @@ static int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb) { int i; unsigned char max_level = 0; - int unix_sock_count = 0; if (too_many_unix_fds(current)) return -ETOOMANYREFS; @@ -1539,11 +1538,9 @@ static int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb) for (i = scm->fp->count - 1; i >= 0; i--) { struct sock *sk = unix_get_socket(scm->fp->fp[i]); - if (sk) { - unix_sock_count++; + if (sk) max_level = max(max_level, unix_sk(sk)->recursion_level); - } } if (unlikely(max_level > MAX_RECURSION_LEVEL)) return -ETOOMANYREFS; From 6b3d1619c1c8968bf890589af3a2ceabb672cf84 Mon Sep 17 00:00:00 2001 From: Paul Bolle Date: Thu, 18 Feb 2016 21:29:08 +0100 Subject: [PATCH 270/812] ser_gigaset: use container_of() instead of detour [ Upstream commit 8d2c3ab4445640957d136caa3629857d63544a2a ] The purpose of gigaset_device_release() is to kfree() the struct ser_cardstate that contains our struct device. This is done via a bit of a detour. First we make our struct device's driver_data point to the container of our struct ser_cardstate (which is a struct cardstate). In gigaset_device_release() we then retrieve that driver_data again. And after that we finally kfree() the struct ser_cardstate that was saved in the struct cardstate. All of this can be achieved much easier by using container_of() to get from our struct device to its container, struct ser_cardstate. Do so. Note that at the time the detour was implemented commit b8b2c7d845d5 ("base/platform: assert that dev_pm_domain callbacks are called unconditionally") had just entered the tree. That commit disconnected our platform_device and our platform_driver. These were reconnected again in v4.5-rc2 through commit 25cad69f21f5 ("base/platform: Fix platform drivers with no probe callback"). And one of the consequences of that fix was that it broke the detour via driver_data. That's because it made __device_release_driver() stop being a NOP for our struct device and actually do stuff again. One of the things it now does, is setting our driver_data to NULL. That, in turn, makes it impossible for gigaset_device_release() to get to our struct cardstate. Which has the net effect of leaking a struct ser_cardstate at every call of this driver's tty close() operation. So using container_of() has the additional benefit of actually working. Reported-by: Dmitry Vyukov Tested-by: Dmitry Vyukov Signed-off-by: Paul Bolle Acked-by: Tilman Schmidt Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/isdn/gigaset/ser-gigaset.c | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/drivers/isdn/gigaset/ser-gigaset.c b/drivers/isdn/gigaset/ser-gigaset.c index 74bf1a17ae7c..b90776ef56ec 100644 --- a/drivers/isdn/gigaset/ser-gigaset.c +++ b/drivers/isdn/gigaset/ser-gigaset.c @@ -373,13 +373,7 @@ static void gigaset_freecshw(struct cardstate *cs) static void gigaset_device_release(struct device *dev) { - struct cardstate *cs = dev_get_drvdata(dev); - - if (!cs) - return; - dev_set_drvdata(dev, NULL); - kfree(cs->hw.ser); - cs->hw.ser = NULL; + kfree(container_of(dev, struct ser_cardstate, dev.dev)); } /* @@ -408,7 +402,6 @@ static int gigaset_initcshw(struct cardstate *cs) cs->hw.ser = NULL; return rc; } - dev_set_drvdata(&cs->hw.ser->dev.dev, cs); tasklet_init(&cs->write_tasklet, gigaset_modem_fill, (unsigned long) cs); From 70b3d6c5aa7172eb2c86128ca19f038ae61a7fb7 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Thu, 23 Jun 2016 14:03:47 -0400 Subject: [PATCH 271/812] tracing: Skip more functions when doing stack tracing of events [ Upstream commit be54f69c26193de31053190761e521903b89d098 ] # echo 1 > options/stacktrace # echo 1 > events/sched/sched_switch/enable # cat trace -0 [002] d..2 1982.525169: => save_stack_trace => __ftrace_trace_stack => trace_buffer_unlock_commit_regs => event_trigger_unlock_commit => trace_event_buffer_commit => trace_event_raw_event_sched_switch => __schedule => schedule => schedule_preempt_disabled => cpu_startup_entry => start_secondary The above shows that we are seeing 6 functions before ever making it to the caller of the sched_switch event. # echo stacktrace > events/sched/sched_switch/trigger # cat trace -0 [002] d..3 2146.335208: => trace_event_buffer_commit => trace_event_raw_event_sched_switch => __schedule => schedule => schedule_preempt_disabled => cpu_startup_entry => start_secondary The stacktrace trigger isn't as bad, because it adds its own skip to the stacktracing, but still has two events extra. One issue is that if the stacktrace passes its own "regs" then there should be no addition to the skip, as the regs will not include the functions being called. This was an issue that was fixed by commit 7717c6be6999 ("tracing: Fix stacktrace skip depth in trace_buffer_unlock_commit_regs()" as adding the skip number for kprobes made the probes not have any stack at all. But since this is only an issue when regs is being used, a skip should be added if regs is NULL. Now we have: # echo 1 > options/stacktrace # echo 1 > events/sched/sched_switch/enable # cat trace -0 [000] d..2 1297.676333: => __schedule => schedule => schedule_preempt_disabled => cpu_startup_entry => rest_init => start_kernel => x86_64_start_reservations => x86_64_start_kernel # echo stacktrace > events/sched/sched_switch/trigger # cat trace -0 [002] d..3 1370.759745: => __schedule => schedule => schedule_preempt_disabled => cpu_startup_entry => start_secondary And kprobes are not touched. Reported-by: Peter Zijlstra Signed-off-by: Steven Rostedt Signed-off-by: Sasha Levin --- kernel/trace/trace.c | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index e409ddce8754..1a47a64d623f 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -1757,7 +1757,17 @@ void trace_buffer_unlock_commit_regs(struct trace_array *tr, { __buffer_unlock_commit(buffer, event); - ftrace_trace_stack(tr, buffer, flags, 0, pc, regs); + /* + * If regs is not set, then skip the following callers: + * trace_buffer_unlock_commit_regs + * event_trigger_unlock_commit + * trace_event_buffer_commit + * trace_event_raw_event_sched_switch + * Note, we can still get here via blktrace, wakeup tracer + * and mmiotrace, but that's ok if they lose a function or + * two. They are that meaningful. + */ + ftrace_trace_stack(tr, buffer, flags, regs ? 0 : 4, pc, regs); ftrace_trace_userstack(buffer, flags, pc); } EXPORT_SYMBOL_GPL(trace_buffer_unlock_commit_regs); @@ -1815,6 +1825,13 @@ static void __ftrace_trace_stack(struct ring_buffer *buffer, trace.nr_entries = 0; trace.skip = skip; + /* + * Add two, for this function and the call to save_stack_trace() + * If regs is set, then these functions will not be in the way. + */ + if (!regs) + trace.skip += 2; + /* * Since events can happen in NMIs there's no safe way to * use the per cpu ftrace_stacks. We reserve it and if an interrupt From 011859fdb4764ccf46e6e36dbdfeb3442a17a71e Mon Sep 17 00:00:00 2001 From: Srinivas Kandagatla Date: Fri, 1 Apr 2016 08:52:58 +0100 Subject: [PATCH 272/812] ARM: dts: apq8064: add ahci ports-implemented mask [ Upstream commit bb4add2ce991e4ec891b5a0287fd1ab77b631979 ] This patch adds new ports-implemented mask, which is required to get achi working on the mainline. Without this patch value read from PORTS_IMPL register which is zero would not enable any ports for software to use. Fixes: 566d1827df2e ("libata: disable forced PORTS_IMPL for >= AHCI 1.3") Cc: stable@vger.kernel.org # v4.5+ Signed-off-by: Srinivas Kandagatla Reviewed-by: Andy Gross Signed-off-by: Tejun Heo Signed-off-by: Sasha Levin --- arch/arm/boot/dts/qcom-apq8064.dtsi | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/arm/boot/dts/qcom-apq8064.dtsi b/arch/arm/boot/dts/qcom-apq8064.dtsi index e00d50ef678f..3ff5ea16ebb3 100644 --- a/arch/arm/boot/dts/qcom-apq8064.dtsi +++ b/arch/arm/boot/dts/qcom-apq8064.dtsi @@ -577,7 +577,7 @@ sata_phy0: phy@1b400000 { }; sata0: sata@29000000 { - compatible = "generic-ahci"; + compatible = "qcom,apq8064-ahci", "generic-ahci"; status = "disabled"; reg = <0x29000000 0x180>; interrupts = ; @@ -599,6 +599,7 @@ sata0: sata@29000000 { phys = <&sata_phy0>; phy-names = "sata-phy"; + ports-implemented = <0x1>; }; /* Temporary fixed regulator */ From 9903f3abf2a3e088d485551726814e90a5c37f68 Mon Sep 17 00:00:00 2001 From: Matt Fleming Date: Tue, 20 Sep 2016 14:26:21 +0100 Subject: [PATCH 273/812] x86/mm/pat: Prevent hang during boot when mapping pages [ Upstream commit e535ec0899d1fe52ec3a84c9bc03457ac67ad6f7 ] There's a mixture of signed 32-bit and unsigned 32-bit and 64-bit data types used for keeping track of how many pages have been mapped. This leads to hangs during boot when mapping large numbers of pages (multiple terabytes, as reported by Waiman) because those values are interpreted as being negative. commit 742563777e8d ("x86/mm/pat: Avoid truncation when converting cpa->numpages to address") fixed one of those bugs, but there is another lurking in __change_page_attr_set_clr(). Additionally, the return value type for the populate_*() functions can return negative values when a large number of pages have been mapped, triggering the error paths even though no error occurred. Consistently use 64-bit types on 64-bit platforms when counting pages. Even in the signed case this gives us room for regions 8PiB (pebibytes) in size whilst still allowing the usual negative value error checking idiom. Reported-by: Waiman Long Cc: Ard Biesheuvel Cc: Borislav Petkov Cc: Linus Torvalds CC: Theodore Ts'o Cc: Arnd Bergmann Cc: Greg Kroah-Hartman Cc: Scott J Norton Cc: Douglas Hatch Signed-off-by: Matt Fleming Signed-off-by: Sasha Levin --- arch/x86/mm/pageattr.c | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index 0e1dd7d47f05..26598e08666c 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c @@ -955,11 +955,11 @@ static void populate_pte(struct cpa_data *cpa, } } -static int populate_pmd(struct cpa_data *cpa, - unsigned long start, unsigned long end, - unsigned num_pages, pud_t *pud, pgprot_t pgprot) +static long populate_pmd(struct cpa_data *cpa, + unsigned long start, unsigned long end, + unsigned num_pages, pud_t *pud, pgprot_t pgprot) { - unsigned int cur_pages = 0; + long cur_pages = 0; pmd_t *pmd; pgprot_t pmd_pgprot; @@ -1029,12 +1029,12 @@ static int populate_pmd(struct cpa_data *cpa, return num_pages; } -static int populate_pud(struct cpa_data *cpa, unsigned long start, pgd_t *pgd, - pgprot_t pgprot) +static long populate_pud(struct cpa_data *cpa, unsigned long start, pgd_t *pgd, + pgprot_t pgprot) { pud_t *pud; unsigned long end; - int cur_pages = 0; + long cur_pages = 0; pgprot_t pud_pgprot; end = start + (cpa->numpages << PAGE_SHIFT); @@ -1090,7 +1090,7 @@ static int populate_pud(struct cpa_data *cpa, unsigned long start, pgd_t *pgd, /* Map trailing leftover */ if (start < end) { - int tmp; + long tmp; pud = pud_offset(pgd, start); if (pud_none(*pud)) @@ -1116,7 +1116,7 @@ static int populate_pgd(struct cpa_data *cpa, unsigned long addr) pgprot_t pgprot = __pgprot(_KERNPG_TABLE); pud_t *pud = NULL; /* shut up gcc */ pgd_t *pgd_entry; - int ret; + long ret; pgd_entry = cpa->pgd + pgd_index(addr); @@ -1351,7 +1351,8 @@ static int cpa_process_alias(struct cpa_data *cpa) static int __change_page_attr_set_clr(struct cpa_data *cpa, int checkalias) { - int ret, numpages = cpa->numpages; + unsigned long numpages = cpa->numpages; + int ret; while (numpages) { /* From 11eea056980680aa4a24870165da340f4b32d06d Mon Sep 17 00:00:00 2001 From: Andrey Ryabinin Date: Wed, 20 Jul 2016 15:45:00 -0700 Subject: [PATCH 274/812] radix-tree: fix radix_tree_iter_retry() for tagged iterators. [ Upstream commit 3cb9185c67304b2a7ea9be73e7d13df6fb2793a1 ] radix_tree_iter_retry() resets slot to NULL, but it doesn't reset tags. Then NULL slot and non-zero iter.tags passed to radix_tree_next_slot() leading to crash: RIP: radix_tree_next_slot include/linux/radix-tree.h:473 find_get_pages_tag+0x334/0x930 mm/filemap.c:1452 .... Call Trace: pagevec_lookup_tag+0x3a/0x80 mm/swap.c:960 mpage_prepare_extent_to_map+0x321/0xa90 fs/ext4/inode.c:2516 ext4_writepages+0x10be/0x2b20 fs/ext4/inode.c:2736 do_writepages+0x97/0x100 mm/page-writeback.c:2364 __filemap_fdatawrite_range+0x248/0x2e0 mm/filemap.c:300 filemap_write_and_wait_range+0x121/0x1b0 mm/filemap.c:490 ext4_sync_file+0x34d/0xdb0 fs/ext4/fsync.c:115 vfs_fsync_range+0x10a/0x250 fs/sync.c:195 vfs_fsync fs/sync.c:209 do_fsync+0x42/0x70 fs/sync.c:219 SYSC_fdatasync fs/sync.c:232 SyS_fdatasync+0x19/0x20 fs/sync.c:230 entry_SYSCALL_64_fastpath+0x23/0xc1 arch/x86/entry/entry_64.S:207 We must reset iterator's tags to bail out from radix_tree_next_slot() and go to the slow-path in radix_tree_next_chunk(). Fixes: 46437f9a554f ("radix-tree: fix race in gang lookup") Link: http://lkml.kernel.org/r/1468495196-10604-1-git-send-email-aryabinin@virtuozzo.com Signed-off-by: Andrey Ryabinin Reported-by: Dmitry Vyukov Acked-by: Konstantin Khlebnikov Cc: Matthew Wilcox Cc: Hugh Dickins Cc: Ross Zwisler Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin --- include/linux/radix-tree.h | 1 + 1 file changed, 1 insertion(+) diff --git a/include/linux/radix-tree.h b/include/linux/radix-tree.h index 5d5174b59802..673dee29a9b9 100644 --- a/include/linux/radix-tree.h +++ b/include/linux/radix-tree.h @@ -382,6 +382,7 @@ static inline __must_check void **radix_tree_iter_retry(struct radix_tree_iter *iter) { iter->next_index = iter->index; + iter->tags = 0; return NULL; } From 80176161f4514af9064f75a52a5dece6084128b5 Mon Sep 17 00:00:00 2001 From: Mateusz Jurczyk Date: Fri, 23 Jun 2017 19:32:28 +0200 Subject: [PATCH 275/812] af_iucv: Move sockaddr length checks to before accessing sa_family in bind and connect handlers [ Upstream commit e3c42b61ff813921ba58cfc0019e3fd63f651190 ] Verify that the caller-provided sockaddr structure is large enough to contain the sa_family field, before accessing it in bind() and connect() handlers of the AF_IUCV socket. Since neither syscall enforces a minimum size of the corresponding memory region, very short sockaddrs (zero or one byte long) result in operating on uninitialized memory while referencing .sa_family. Fixes: 52a82e23b9f2 ("af_iucv: Validate socket address length in iucv_sock_bind()") Signed-off-by: Mateusz Jurczyk [jwi: removed unneeded null-check for addr] Signed-off-by: Julian Wiedmann Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/iucv/af_iucv.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c index aeffb65181f5..5984cc35d508 100644 --- a/net/iucv/af_iucv.c +++ b/net/iucv/af_iucv.c @@ -705,10 +705,8 @@ static int iucv_sock_bind(struct socket *sock, struct sockaddr *addr, char uid[9]; /* Verify the input sockaddr */ - if (!addr || addr->sa_family != AF_IUCV) - return -EINVAL; - - if (addr_len < sizeof(struct sockaddr_iucv)) + if (addr_len < sizeof(struct sockaddr_iucv) || + addr->sa_family != AF_IUCV) return -EINVAL; lock_sock(sk); @@ -852,7 +850,7 @@ static int iucv_sock_connect(struct socket *sock, struct sockaddr *addr, struct iucv_sock *iucv = iucv_sk(sk); int err; - if (addr->sa_family != AF_IUCV || alen < sizeof(struct sockaddr_iucv)) + if (alen < sizeof(struct sockaddr_iucv) || addr->sa_family != AF_IUCV) return -EINVAL; if (sk->sk_state != IUCV_OPEN && sk->sk_state != IUCV_BOUND) From 61918dbc7efef25294f32431869720d99772b4ca Mon Sep 17 00:00:00 2001 From: Eugenia Emantayev Date: Thu, 27 Oct 2016 16:27:16 +0300 Subject: [PATCH 276/812] net/mlx4_en: Resolve dividing by zero in 32-bit system [ Upstream commit 4850cf4581578216468b7b3c3d06cc5abb0a697d ] When doing roundup_pow_of_two for large enough number with bit 31, an overflow will occur and a value equal to 1 will be returned. In this case 1 will be subtracted from the return value and division by zero will be reached. Fixes: 31c128b66e5b ("net/mlx4_en: Choose time-stamping shift value according to HW frequency") Signed-off-by: Eugenia Emantayev Signed-off-by: Tariq Toukan Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/mellanox/mlx4/en_clock.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx4/en_clock.c b/drivers/net/ethernet/mellanox/mlx4/en_clock.c index 4dccf7287f0f..52e4ed2f639d 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_clock.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_clock.c @@ -251,8 +251,11 @@ static u32 freq_to_shift(u16 freq) { u32 freq_khz = freq * 1000; u64 max_val_cycles = freq_khz * 1000 * MLX4_EN_WRAP_AROUND_SEC; + u64 tmp_rounded = + roundup_pow_of_two(max_val_cycles) > max_val_cycles ? + roundup_pow_of_two(max_val_cycles) - 1 : UINT_MAX; u64 max_val_cycles_rounded = is_power_of_2(max_val_cycles + 1) ? - max_val_cycles : roundup_pow_of_two(max_val_cycles) - 1; + max_val_cycles : tmp_rounded; /* calculate max possible multiplier in order to fit in 64bit */ u64 max_mul = div_u64(0xffffffffffffffffULL, max_val_cycles_rounded); From 274337f8da92f188edb798e0fcdccb80618bcf79 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 1 Mar 2017 14:45:06 -0800 Subject: [PATCH 277/812] ipv6: orphan skbs in reassembly unit [ Upstream commit 48cac18ecf1de82f76259a54402c3adb7839ad01 ] Andrey reported a use-after-free in IPv6 stack. Issue here is that we free the socket while it still has skb in TX path and in some queues. It happens here because IPv6 reassembly unit messes skb->truesize, breaking skb_set_owner_w() badly. We fixed a similar issue for IPV4 in commit 8282f27449bf ("inet: frag: Always orphan skbs inside ip_defrag()") Acked-by: Joe Stringer ================================================================== BUG: KASAN: use-after-free in sock_wfree+0x118/0x120 Read of size 8 at addr ffff880062da0060 by task a.out/4140 page:ffffea00018b6800 count:1 mapcount:0 mapping: (null) index:0x0 compound_mapcount: 0 flags: 0x100000000008100(slab|head) raw: 0100000000008100 0000000000000000 0000000000000000 0000000180130013 raw: dead000000000100 dead000000000200 ffff88006741f140 0000000000000000 page dumped because: kasan: bad access detected CPU: 0 PID: 4140 Comm: a.out Not tainted 4.10.0-rc3+ #59 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:15 dump_stack+0x292/0x398 lib/dump_stack.c:51 describe_address mm/kasan/report.c:262 kasan_report_error+0x121/0x560 mm/kasan/report.c:370 kasan_report mm/kasan/report.c:392 __asan_report_load8_noabort+0x3e/0x40 mm/kasan/report.c:413 sock_flag ./arch/x86/include/asm/bitops.h:324 sock_wfree+0x118/0x120 net/core/sock.c:1631 skb_release_head_state+0xfc/0x250 net/core/skbuff.c:655 skb_release_all+0x15/0x60 net/core/skbuff.c:668 __kfree_skb+0x15/0x20 net/core/skbuff.c:684 kfree_skb+0x16e/0x4e0 net/core/skbuff.c:705 inet_frag_destroy+0x121/0x290 net/ipv4/inet_fragment.c:304 inet_frag_put ./include/net/inet_frag.h:133 nf_ct_frag6_gather+0x1125/0x38b0 net/ipv6/netfilter/nf_conntrack_reasm.c:617 ipv6_defrag+0x21b/0x350 net/ipv6/netfilter/nf_defrag_ipv6_hooks.c:68 nf_hook_entry_hookfn ./include/linux/netfilter.h:102 nf_hook_slow+0xc3/0x290 net/netfilter/core.c:310 nf_hook ./include/linux/netfilter.h:212 __ip6_local_out+0x52c/0xaf0 net/ipv6/output_core.c:160 ip6_local_out+0x2d/0x170 net/ipv6/output_core.c:170 ip6_send_skb+0xa1/0x340 net/ipv6/ip6_output.c:1722 ip6_push_pending_frames+0xb3/0xe0 net/ipv6/ip6_output.c:1742 rawv6_push_pending_frames net/ipv6/raw.c:613 rawv6_sendmsg+0x2cff/0x4130 net/ipv6/raw.c:927 inet_sendmsg+0x164/0x5b0 net/ipv4/af_inet.c:744 sock_sendmsg_nosec net/socket.c:635 sock_sendmsg+0xca/0x110 net/socket.c:645 sock_write_iter+0x326/0x620 net/socket.c:848 new_sync_write fs/read_write.c:499 __vfs_write+0x483/0x760 fs/read_write.c:512 vfs_write+0x187/0x530 fs/read_write.c:560 SYSC_write fs/read_write.c:607 SyS_write+0xfb/0x230 fs/read_write.c:599 entry_SYSCALL_64_fastpath+0x1f/0xc2 arch/x86/entry/entry_64.S:203 RIP: 0033:0x7ff26e6f5b79 RSP: 002b:00007ff268e0ed98 EFLAGS: 00000206 ORIG_RAX: 0000000000000001 RAX: ffffffffffffffda RBX: 00007ff268e0f9c0 RCX: 00007ff26e6f5b79 RDX: 0000000000000010 RSI: 0000000020f50fe1 RDI: 0000000000000003 RBP: 00007ff26ebc1220 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000206 R12: 0000000000000000 R13: 00007ff268e0f9c0 R14: 00007ff26efec040 R15: 0000000000000003 The buggy address belongs to the object at ffff880062da0000 which belongs to the cache RAWv6 of size 1504 The buggy address ffff880062da0060 is located 96 bytes inside of 1504-byte region [ffff880062da0000, ffff880062da05e0) Freed by task 4113: save_stack_trace+0x16/0x20 arch/x86/kernel/stacktrace.c:57 save_stack+0x43/0xd0 mm/kasan/kasan.c:502 set_track mm/kasan/kasan.c:514 kasan_slab_free+0x73/0xc0 mm/kasan/kasan.c:578 slab_free_hook mm/slub.c:1352 slab_free_freelist_hook mm/slub.c:1374 slab_free mm/slub.c:2951 kmem_cache_free+0xb2/0x2c0 mm/slub.c:2973 sk_prot_free net/core/sock.c:1377 __sk_destruct+0x49c/0x6e0 net/core/sock.c:1452 sk_destruct+0x47/0x80 net/core/sock.c:1460 __sk_free+0x57/0x230 net/core/sock.c:1468 sk_free+0x23/0x30 net/core/sock.c:1479 sock_put ./include/net/sock.h:1638 sk_common_release+0x31e/0x4e0 net/core/sock.c:2782 rawv6_close+0x54/0x80 net/ipv6/raw.c:1214 inet_release+0xed/0x1c0 net/ipv4/af_inet.c:425 inet6_release+0x50/0x70 net/ipv6/af_inet6.c:431 sock_release+0x8d/0x1e0 net/socket.c:599 sock_close+0x16/0x20 net/socket.c:1063 __fput+0x332/0x7f0 fs/file_table.c:208 ____fput+0x15/0x20 fs/file_table.c:244 task_work_run+0x19b/0x270 kernel/task_work.c:116 exit_task_work ./include/linux/task_work.h:21 do_exit+0x186b/0x2800 kernel/exit.c:839 do_group_exit+0x149/0x420 kernel/exit.c:943 SYSC_exit_group kernel/exit.c:954 SyS_exit_group+0x1d/0x20 kernel/exit.c:952 entry_SYSCALL_64_fastpath+0x1f/0xc2 arch/x86/entry/entry_64.S:203 Allocated by task 4115: save_stack_trace+0x16/0x20 arch/x86/kernel/stacktrace.c:57 save_stack+0x43/0xd0 mm/kasan/kasan.c:502 set_track mm/kasan/kasan.c:514 kasan_kmalloc+0xad/0xe0 mm/kasan/kasan.c:605 kasan_slab_alloc+0x12/0x20 mm/kasan/kasan.c:544 slab_post_alloc_hook mm/slab.h:432 slab_alloc_node mm/slub.c:2708 slab_alloc mm/slub.c:2716 kmem_cache_alloc+0x1af/0x250 mm/slub.c:2721 sk_prot_alloc+0x65/0x2a0 net/core/sock.c:1334 sk_alloc+0x105/0x1010 net/core/sock.c:1396 inet6_create+0x44d/0x1150 net/ipv6/af_inet6.c:183 __sock_create+0x4f6/0x880 net/socket.c:1199 sock_create net/socket.c:1239 SYSC_socket net/socket.c:1269 SyS_socket+0xf9/0x230 net/socket.c:1249 entry_SYSCALL_64_fastpath+0x1f/0xc2 arch/x86/entry/entry_64.S:203 Memory state around the buggy address: ffff880062d9ff00: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc ffff880062d9ff80: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc >ffff880062da0000: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ^ ffff880062da0080: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ffff880062da0100: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ================================================================== Reported-by: Andrey Konovalov Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/ipv6/netfilter/nf_conntrack_reasm.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c index 838b65a59a73..5a9ae56e7868 100644 --- a/net/ipv6/netfilter/nf_conntrack_reasm.c +++ b/net/ipv6/netfilter/nf_conntrack_reasm.c @@ -601,6 +601,7 @@ struct sk_buff *nf_ct_frag6_gather(struct net *net, struct sk_buff *skb, u32 use hdr = ipv6_hdr(clone); fhdr = (struct frag_hdr *)skb_transport_header(clone); + skb_orphan(skb); fq = fq_find(net, fhdr->identification, user, &hdr->saddr, &hdr->daddr, skb->dev ? skb->dev->ifindex : 0, ip6_frag_ecn(hdr)); if (fq == NULL) { From 53025e7f5667cc7b5915ead628867fd1db983a76 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Tue, 23 May 2017 17:32:31 -0700 Subject: [PATCH 278/812] um: Avoid longjmp/setjmp symbol clashes with libpthread.a [ Upstream commit f44f1e7da7c8e3f4575d5d61c4df978496903fcc ] Building a statically linked UML kernel on a Centos 6.9 host resulted in the following linking failure (GCC 4.4, glibc-2.12): /usr/lib/gcc/x86_64-redhat-linux/4.4.7/../../../../lib64/libpthread.a(libpthread.o): In function `siglongjmp': (.text+0x8490): multiple definition of `longjmp' arch/x86/um/built-in.o:/local/users/fainelli/openwrt/trunk/build_dir/target-x86_64_musl/linux-uml/linux-4.4.69/arch/x86/um/setjmp_64.S:44: first defined here /usr/lib/gcc/x86_64-redhat-linux/4.4.7/../../../../lib64/libpthread.a(libpthread.o): In function `sem_open': (.text+0x77cd): warning: the use of `mktemp' is dangerous, better use `mkstemp' collect2: ld returned 1 exit status make[4]: *** [vmlinux] Error 1 Adopt a solution similar to the one done for vmap where we define longjmp/setjmp to be kernel_longjmp/setjmp. In the process, make sure we do rename the functions in arch/x86/um/setjmp_*.S accordingly. Fixes: a7df4716d195 ("um: link with -lpthread") Signed-off-by: Florian Fainelli Signed-off-by: Richard Weinberger Signed-off-by: Sasha Levin --- arch/um/Makefile | 4 ++++ arch/x86/um/setjmp_32.S | 16 ++++++++-------- arch/x86/um/setjmp_64.S | 16 ++++++++-------- 3 files changed, 20 insertions(+), 16 deletions(-) diff --git a/arch/um/Makefile b/arch/um/Makefile index 9ccf462131c4..d9cd7ed27834 100644 --- a/arch/um/Makefile +++ b/arch/um/Makefile @@ -59,10 +59,14 @@ KBUILD_CPPFLAGS += -I$(srctree)/$(HOST_DIR)/um # Same things for in6addr_loopback and mktime - found in libc. For these two we # only get link-time error, luckily. # +# -Dlongjmp=kernel_longjmp prevents anything from referencing the libpthread.a +# embedded copy of longjmp, same thing for setjmp. +# # These apply to USER_CFLAGS to. KBUILD_CFLAGS += $(CFLAGS) $(CFLAGS-y) -D__arch_um__ \ $(ARCH_INCLUDE) $(MODE_INCLUDE) -Dvmap=kernel_vmap \ + -Dlongjmp=kernel_longjmp -Dsetjmp=kernel_setjmp \ -Din6addr_loopback=kernel_in6addr_loopback \ -Din6addr_any=kernel_in6addr_any -Dstrrchr=kernel_strrchr diff --git a/arch/x86/um/setjmp_32.S b/arch/x86/um/setjmp_32.S index b766792c9933..39053192918d 100644 --- a/arch/x86/um/setjmp_32.S +++ b/arch/x86/um/setjmp_32.S @@ -16,9 +16,9 @@ .text .align 4 - .globl setjmp - .type setjmp, @function -setjmp: + .globl kernel_setjmp + .type kernel_setjmp, @function +kernel_setjmp: #ifdef _REGPARM movl %eax,%edx #else @@ -35,13 +35,13 @@ setjmp: movl %ecx,20(%edx) # Return address ret - .size setjmp,.-setjmp + .size kernel_setjmp,.-kernel_setjmp .text .align 4 - .globl longjmp - .type longjmp, @function -longjmp: + .globl kernel_longjmp + .type kernel_longjmp, @function +kernel_longjmp: #ifdef _REGPARM xchgl %eax,%edx #else @@ -55,4 +55,4 @@ longjmp: movl 16(%edx),%edi jmp *20(%edx) - .size longjmp,.-longjmp + .size kernel_longjmp,.-kernel_longjmp diff --git a/arch/x86/um/setjmp_64.S b/arch/x86/um/setjmp_64.S index 45f547b4043e..c56942e1a38c 100644 --- a/arch/x86/um/setjmp_64.S +++ b/arch/x86/um/setjmp_64.S @@ -18,9 +18,9 @@ .text .align 4 - .globl setjmp - .type setjmp, @function -setjmp: + .globl kernel_setjmp + .type kernel_setjmp, @function +kernel_setjmp: pop %rsi # Return address, and adjust the stack xorl %eax,%eax # Return value movq %rbx,(%rdi) @@ -34,13 +34,13 @@ setjmp: movq %rsi,56(%rdi) # Return address ret - .size setjmp,.-setjmp + .size kernel_setjmp,.-kernel_setjmp .text .align 4 - .globl longjmp - .type longjmp, @function -longjmp: + .globl kernel_longjmp + .type kernel_longjmp, @function +kernel_longjmp: movl %esi,%eax # Return value (int) movq (%rdi),%rbx movq 8(%rdi),%rsp @@ -51,4 +51,4 @@ longjmp: movq 48(%rdi),%r15 jmp *56(%rdi) - .size longjmp,.-longjmp + .size kernel_longjmp,.-kernel_longjmp From 137b1ce318f0bd9847f68520cfd68c24e79ea258 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 21 Jan 2016 22:24:16 +0100 Subject: [PATCH 279/812] sched/cgroup: Fix cgroup entity load tracking tear-down [ Upstream commit 6fe1f348b3dd1f700f9630562b7d38afd6949568 ] When a cgroup's CPU runqueue is destroyed, it should remove its remaining load accounting from its parent cgroup. The current site for doing so it unsuited because its far too late and unordered against other cgroup removal (->css_free() will be, but we're also in an RCU callback). Put it in the ->css_offline() callback, which is the start of cgroup destruction, right after the group has been made unavailable to userspace. The ->css_offline() callbacks are called in hierarchical order after the following v4.4 commit: aa226ff4a1ce ("cgroup: make sure a parent css isn't offlined before its children") Signed-off-by: Peter Zijlstra (Intel) Cc: Christian Borntraeger Cc: Johannes Weiner Cc: Li Zefan Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Tejun Heo Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/20160121212416.GL6357@twins.programming.kicks-ass.net Signed-off-by: Ingo Molnar Signed-off-by: Sasha Levin --- kernel/sched/core.c | 4 +--- kernel/sched/fair.c | 37 +++++++++++++++++++++---------------- kernel/sched/sched.h | 2 +- 3 files changed, 23 insertions(+), 20 deletions(-) diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 65ed3501c2ca..4743e1f2a3d1 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -7817,11 +7817,9 @@ void sched_destroy_group(struct task_group *tg) void sched_offline_group(struct task_group *tg) { unsigned long flags; - int i; /* end participation in shares distribution */ - for_each_possible_cpu(i) - unregister_fair_sched_group(tg, i); + unregister_fair_sched_group(tg); spin_lock_irqsave(&task_group_lock, flags); list_del_rcu(&tg->list); diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 3b136fb4422c..a0c5bb93a3ab 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -8154,11 +8154,8 @@ void free_fair_sched_group(struct task_group *tg) for_each_possible_cpu(i) { if (tg->cfs_rq) kfree(tg->cfs_rq[i]); - if (tg->se) { - if (tg->se[i]) - remove_entity_load_avg(tg->se[i]); + if (tg->se) kfree(tg->se[i]); - } } kfree(tg->cfs_rq); @@ -8206,21 +8203,29 @@ int alloc_fair_sched_group(struct task_group *tg, struct task_group *parent) return 0; } -void unregister_fair_sched_group(struct task_group *tg, int cpu) +void unregister_fair_sched_group(struct task_group *tg) { - struct rq *rq = cpu_rq(cpu); unsigned long flags; + struct rq *rq; + int cpu; - /* - * Only empty task groups can be destroyed; so we can speculatively - * check on_list without danger of it being re-added. - */ - if (!tg->cfs_rq[cpu]->on_list) - return; + for_each_possible_cpu(cpu) { + if (tg->se[cpu]) + remove_entity_load_avg(tg->se[cpu]); - raw_spin_lock_irqsave(&rq->lock, flags); - list_del_leaf_cfs_rq(tg->cfs_rq[cpu]); - raw_spin_unlock_irqrestore(&rq->lock, flags); + /* + * Only empty task groups can be destroyed; so we can speculatively + * check on_list without danger of it being re-added. + */ + if (!tg->cfs_rq[cpu]->on_list) + continue; + + rq = cpu_rq(cpu); + + raw_spin_lock_irqsave(&rq->lock, flags); + list_del_leaf_cfs_rq(tg->cfs_rq[cpu]); + raw_spin_unlock_irqrestore(&rq->lock, flags); + } } void init_tg_cfs_entry(struct task_group *tg, struct cfs_rq *cfs_rq, @@ -8302,7 +8307,7 @@ int alloc_fair_sched_group(struct task_group *tg, struct task_group *parent) return 1; } -void unregister_fair_sched_group(struct task_group *tg, int cpu) { } +void unregister_fair_sched_group(struct task_group *tg) { } #endif /* CONFIG_FAIR_GROUP_SCHED */ diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 0c9ebd82a684..af8d8c3eb8ab 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -308,7 +308,7 @@ extern int tg_nop(struct task_group *tg, void *data); extern void free_fair_sched_group(struct task_group *tg); extern int alloc_fair_sched_group(struct task_group *tg, struct task_group *parent); -extern void unregister_fair_sched_group(struct task_group *tg, int cpu); +extern void unregister_fair_sched_group(struct task_group *tg); extern void init_tg_cfs_entry(struct task_group *tg, struct cfs_rq *cfs_rq, struct sched_entity *se, int cpu, struct sched_entity *parent); From c1504091b9805d131abb36a1f01af7a5c99ac9fc Mon Sep 17 00:00:00 2001 From: Jeff Mahoney Date: Wed, 17 Aug 2016 21:58:33 -0400 Subject: [PATCH 280/812] btrfs: don't create or leak aliased root while cleaning up orphans [ Upstream commit 35bbb97fc898aeb874cb7c8b746f091caa359994 ] commit 909c3a22da3 (Btrfs: fix loading of orphan roots leading to BUG_ON) avoids the BUG_ON but can add an aliased root to the dead_roots list or leak the root. Since we've already been loading roots into the radix tree, we should use it before looking the root up on disk. Cc: # 4.5 Signed-off-by: Jeff Mahoney Reviewed-by: David Sterba Signed-off-by: David Sterba Signed-off-by: Chris Mason Signed-off-by: Sasha Levin --- fs/btrfs/disk-io.c | 4 ++-- fs/btrfs/disk-io.h | 2 ++ fs/btrfs/root-tree.c | 27 ++++++++++++++++++--------- 3 files changed, 22 insertions(+), 11 deletions(-) diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index ae6e3a30e61e..8dbb00fbb00b 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1608,8 +1608,8 @@ int btrfs_init_fs_root(struct btrfs_root *root) return ret; } -static struct btrfs_root *btrfs_lookup_fs_root(struct btrfs_fs_info *fs_info, - u64 root_id) +struct btrfs_root *btrfs_lookup_fs_root(struct btrfs_fs_info *fs_info, + u64 root_id) { struct btrfs_root *root; diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h index adeb31830b9c..3c9819403487 100644 --- a/fs/btrfs/disk-io.h +++ b/fs/btrfs/disk-io.h @@ -68,6 +68,8 @@ struct extent_buffer *btrfs_find_tree_block(struct btrfs_fs_info *fs_info, struct btrfs_root *btrfs_read_fs_root(struct btrfs_root *tree_root, struct btrfs_key *location); int btrfs_init_fs_root(struct btrfs_root *root); +struct btrfs_root *btrfs_lookup_fs_root(struct btrfs_fs_info *fs_info, + u64 root_id); int btrfs_insert_fs_root(struct btrfs_fs_info *fs_info, struct btrfs_root *root); void btrfs_free_fs_roots(struct btrfs_fs_info *fs_info); diff --git a/fs/btrfs/root-tree.c b/fs/btrfs/root-tree.c index 2c849b08a91b..6a6efb26d52f 100644 --- a/fs/btrfs/root-tree.c +++ b/fs/btrfs/root-tree.c @@ -272,6 +272,23 @@ int btrfs_find_orphan_roots(struct btrfs_root *tree_root) root_key.objectid = key.offset; key.offset++; + /* + * The root might have been inserted already, as before we look + * for orphan roots, log replay might have happened, which + * triggers a transaction commit and qgroup accounting, which + * in turn reads and inserts fs roots while doing backref + * walking. + */ + root = btrfs_lookup_fs_root(tree_root->fs_info, + root_key.objectid); + if (root) { + WARN_ON(!test_bit(BTRFS_ROOT_ORPHAN_ITEM_INSERTED, + &root->state)); + if (btrfs_root_refs(&root->root_item) == 0) + btrfs_add_dead_root(root); + continue; + } + root = btrfs_read_fs_root(tree_root, &root_key); err = PTR_ERR_OR_ZERO(root); if (err && err != -ENOENT) { @@ -310,16 +327,8 @@ int btrfs_find_orphan_roots(struct btrfs_root *tree_root) set_bit(BTRFS_ROOT_ORPHAN_ITEM_INSERTED, &root->state); err = btrfs_insert_fs_root(root->fs_info, root); - /* - * The root might have been inserted already, as before we look - * for orphan roots, log replay might have happened, which - * triggers a transaction commit and qgroup accounting, which - * in turn reads and inserts fs roots while doing backref - * walking. - */ - if (err == -EEXIST) - err = 0; if (err) { + BUG_ON(err == -EEXIST); btrfs_free_fs_root(root); break; } From 90f9ed93549e68cd5e349d7c3a8ce0983728f1fe Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 25 Jan 2016 17:44:11 +0100 Subject: [PATCH 281/812] thermal: allow spear-thermal driver to be a module [ Upstream commit 4d2f1794c07aae55b8f25f4d8aebcafc0d3e501d ] When the thermal subsystem is a loadable module, the spear driver fails to build: drivers/thermal/built-in.o: In function `spear_thermal_exit': spear_thermal.c:(.text+0xf8): undefined reference to `thermal_zone_device_unregister' drivers/thermal/built-in.o: In function `spear_thermal_probe': spear_thermal.c:(.text+0x230): undefined reference to `thermal_zone_device_register' This changes the symbol to a tristate, so Kconfig can track the dependency correctly. Signed-off-by: Arnd Bergmann Signed-off-by: Eduardo Valentin Signed-off-by: Sasha Levin --- drivers/thermal/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/thermal/Kconfig b/drivers/thermal/Kconfig index 4b660b5beb98..b556fa8250fc 100644 --- a/drivers/thermal/Kconfig +++ b/drivers/thermal/Kconfig @@ -195,7 +195,7 @@ config IMX_THERMAL passive trip is crossed. config SPEAR_THERMAL - bool "SPEAr thermal sensor driver" + tristate "SPEAr thermal sensor driver" depends on PLAT_SPEAR || COMPILE_TEST depends on OF help From c8a5f83fa43e8464f7bef12a8dc22f4e778bd78b Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 25 Jan 2016 17:44:12 +0100 Subject: [PATCH 282/812] thermal: allow u8500-thermal driver to be a module [ Upstream commit 26716ce124fce88f288f07738ef685d5dfe5c13f ] When the thermal subsystem is a loadable module, the u8500 driver fails to build: drivers/thermal/built-in.o: In function `db8500_thermal_probe': db8500_thermal.c:(.text+0x96c): undefined reference to `thermal_zone_device_register' drivers/thermal/built-in.o: In function `db8500_thermal_work': db8500_thermal.c:(.text+0xab4): undefined reference to `thermal_zone_device_update' This changes the symbol to a tristate, so Kconfig can track the dependency correctly. Signed-off-by: Arnd Bergmann Signed-off-by: Eduardo Valentin Signed-off-by: Sasha Levin --- drivers/mfd/db8500-prcmu.c | 3 +++ drivers/thermal/Kconfig | 4 ++-- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/mfd/db8500-prcmu.c b/drivers/mfd/db8500-prcmu.c index e6e4bacb09ee..12099b09a9a7 100644 --- a/drivers/mfd/db8500-prcmu.c +++ b/drivers/mfd/db8500-prcmu.c @@ -2048,6 +2048,7 @@ int db8500_prcmu_config_hotmon(u8 low, u8 high) return 0; } +EXPORT_SYMBOL_GPL(db8500_prcmu_config_hotmon); static int config_hot_period(u16 val) { @@ -2074,11 +2075,13 @@ int db8500_prcmu_start_temp_sense(u16 cycles32k) return config_hot_period(cycles32k); } +EXPORT_SYMBOL_GPL(db8500_prcmu_start_temp_sense); int db8500_prcmu_stop_temp_sense(void) { return config_hot_period(0xFFFF); } +EXPORT_SYMBOL_GPL(db8500_prcmu_stop_temp_sense); static int prcmu_a9wdog(u8 cmd, u8 d0, u8 d1, u8 d2, u8 d3) { diff --git a/drivers/thermal/Kconfig b/drivers/thermal/Kconfig index b556fa8250fc..1def65d2f0b5 100644 --- a/drivers/thermal/Kconfig +++ b/drivers/thermal/Kconfig @@ -237,8 +237,8 @@ config DOVE_THERMAL framework. config DB8500_THERMAL - bool "DB8500 thermal management" - depends on ARCH_U8500 + tristate "DB8500 thermal management" + depends on MFD_DB8500_PRCMU default y help Adds DB8500 thermal management implementation according to the thermal From c447410b6dfacf132865e79af7bc26fa977f24e5 Mon Sep 17 00:00:00 2001 From: Jarkko Sakkinen Date: Sat, 13 Feb 2016 11:58:16 +0200 Subject: [PATCH 283/812] tpm: fix: return rc when devm_add_action() fails [ Upstream commit 4f3b193dee4423d8c89c9a3e8e05f9197ea459a4 ] Call put_device() and return error code if devm_add_action() fails. Signed-off-by: Jarkko Sakkinen Reported-by: Jason Gunthorpe Fixes: 8e0ee3c9faed ("tpm: fix the cleanup of struct tpm_chip") Signed-off-by: Sasha Levin --- drivers/char/tpm/tpm-chip.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/char/tpm/tpm-chip.c b/drivers/char/tpm/tpm-chip.c index e759100e41a7..28894878dcd5 100644 --- a/drivers/char/tpm/tpm-chip.c +++ b/drivers/char/tpm/tpm-chip.c @@ -230,7 +230,11 @@ struct tpm_chip *tpmm_chip_alloc(struct device *dev, chip->cdev.owner = dev->driver->owner; chip->cdev.kobj.parent = &chip->dev.kobj; - devm_add_action(dev, (void (*)(void *)) put_device, &chip->dev); + rc = devm_add_action(dev, (void (*)(void *)) put_device, &chip->dev); + if (rc) { + put_device(&chip->dev); + return ERR_PTR(rc); + } return chip; } From 92fe37c0f09c48ba42300d7ac79c0750e48d7a3f Mon Sep 17 00:00:00 2001 From: Prarit Bhargava Date: Wed, 11 May 2016 12:27:15 -0400 Subject: [PATCH 284/812] x86/PCI: Mark Broadwell-EP Home Agent 1 as having non-compliant BARs [ Upstream commit da77b67195de1c65bef4908fa29967c4d0af2da2 ] Commit b894157145e4 ("x86/PCI: Mark Broadwell-EP Home Agent & PCU as having non-compliant BARs") marked Home Agent 0 & PCU has having non-compliant BARs. Home Agent 1 also has non-compliant BARs. Mark Home Agent 1 as having non-compliant BARs so the PCI core doesn't touch them. The problem with these devices is documented in the Xeon v4 specification update: BDF2 PCI BARs in the Home Agent Will Return Non-Zero Values During Enumeration Problem: During system initialization the Operating System may access the standard PCI BARs (Base Address Registers). Due to this erratum, accesses to the Home Agent BAR registers (Bus 1; Device 18; Function 0,4; Offsets (0x14-0x24) will return non-zero values. Implication: The operating system may issue a warning. Intel has not observed any functional failures due to this erratum. Link: http://www.intel.com/content/www/us/en/processors/xeon/xeon-e5-v4-spec-update.html Fixes: b894157145e4 ("x86/PCI: Mark Broadwell-EP Home Agent & PCU as having non-compliant BARs") Signed-off-by: Prarit Bhargava Signed-off-by: Bjorn Helgaas CC: Thomas Gleixner CC: Ingo Molnar CC: "H. Peter Anvin" CC: Andi Kleen Signed-off-by: Sasha Levin --- arch/x86/pci/fixup.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/arch/x86/pci/fixup.c b/arch/x86/pci/fixup.c index 0ae7e9fa348d..89f90549c6a8 100644 --- a/arch/x86/pci/fixup.c +++ b/arch/x86/pci/fixup.c @@ -541,9 +541,16 @@ static void twinhead_reserve_killing_zone(struct pci_dev *dev) } DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x27B9, twinhead_reserve_killing_zone); +/* + * Broadwell EP Home Agent BARs erroneously return non-zero values when read. + * + * See http://www.intel.com/content/www/us/en/processors/xeon/xeon-e5-v4-spec-update.html + * entry BDF2. + */ static void pci_bdwep_bar(struct pci_dev *dev) { dev->non_compliant_bars = 1; } +DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x6f60, pci_bdwep_bar); DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x6fa0, pci_bdwep_bar); DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x6fc0, pci_bdwep_bar); From 93e8e691675a33960b607d9fd0858d5a35f0e419 Mon Sep 17 00:00:00 2001 From: Raghava Aditya Renukunta Date: Mon, 25 Apr 2016 23:31:12 -0700 Subject: [PATCH 285/812] aacraid: Start adapter after updating number of MSIX vectors [ Upstream commit 116d77fea02e2a5aded7d29ba4c692774cb339f1 ] The adapter has to be started after updating the number of MSIX Vectors Fixes: ecc479e00db8 (aacraid: Set correct MSIX count for EEH recovery) Cc: stable@vger.kernel.org Signed-off-by: Raghava Aditya Renukunta Reviewed-by: Johannes Thumshirn Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin --- drivers/scsi/aacraid/linit.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/aacraid/linit.c b/drivers/scsi/aacraid/linit.c index 8da8b46da722..1c447405ebbf 100644 --- a/drivers/scsi/aacraid/linit.c +++ b/drivers/scsi/aacraid/linit.c @@ -1416,8 +1416,8 @@ static int aac_acquire_resources(struct aac_dev *dev) /* After EEH recovery or suspend resume, max_msix count * may change, therfore updating in init as well. */ - aac_adapter_start(dev); dev->init->Sa_MSIXVectors = cpu_to_le32(dev->max_msix); + aac_adapter_start(dev); } return 0; From 5796c70e5b1d62016bdc3b678346ffbd433d030a Mon Sep 17 00:00:00 2001 From: Alexander Shishkin Date: Mon, 21 Mar 2016 10:02:42 +0200 Subject: [PATCH 286/812] perf/core: Don't leak event in the syscall error path [ Upstream commit 201c2f85bd0bc13b712d9c0b3d11251b182e06ae ] In the error path, event_file not being NULL is used to determine whether the event itself still needs to be free'd, so fix it up to avoid leaking. Reported-by: Leon Yu Signed-off-by: Alexander Shishkin Signed-off-by: Peter Zijlstra (Intel) Cc: Arnaldo Carvalho de Melo Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Vince Weaver Fixes: 130056275ade ("perf: Do not double free") Link: http://lkml.kernel.org/r/87twk06yxp.fsf@ashishki-desk.ger.corp.intel.com Signed-off-by: Ingo Molnar Signed-off-by: Sasha Levin --- kernel/events/core.c | 1 + 1 file changed, 1 insertion(+) diff --git a/kernel/events/core.c b/kernel/events/core.c index 330fcd1b1822..19519ec39d51 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -8532,6 +8532,7 @@ SYSCALL_DEFINE5(perf_event_open, f_flags); if (IS_ERR(event_file)) { err = PTR_ERR(event_file); + event_file = NULL; goto err_context; } From af521c7f828f49ec73324c47bef073d39e0d93ba Mon Sep 17 00:00:00 2001 From: Vladis Dronov Date: Sun, 31 Jan 2016 14:14:52 -0200 Subject: [PATCH 287/812] usbvision: revert commit 588afcc1 [ Upstream commit d5468d7afaa9c9e961e150f0455a14a9f4872a98 ] Commit 588afcc1c0e4 ("[media] usbvision fix overflow of interfaces array")' should be reverted, because: * "!dev->actconfig->interface[ifnum]" won't catch a case where the value is not NULL but some garbage. This way the system may crash later with GPF. * "(ifnum >= USB_MAXINTERFACES)" does not cover all the error conditions. "ifnum" should be compared to "dev->actconfig-> desc.bNumInterfaces", i.e. compared to the number of "struct usb_interface" kzalloc()-ed, not to USB_MAXINTERFACES. * There is a "struct usb_device" leak in this error path, as there is usb_get_dev(), but no usb_put_dev() on this path. * There is a bug of the same type several lines below with number of endpoints. The code is accessing hard-coded second endpoint ("interface->endpoint[1].desc") which may not exist. It would be great to handle this in the same patch too. * All the concerns above are resolved by already-accepted commit fa52bd50 ("[media] usbvision: fix crash on detecting device with invalid configuration") * Mailing list message: http://www.spinics.net/lists/linux-media/msg94832.html Signed-off-by: Vladis Dronov Signed-off-by: Hans Verkuil Cc: # for v4.5 Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Sasha Levin --- drivers/media/usb/usbvision/usbvision-video.c | 7 ------- 1 file changed, 7 deletions(-) diff --git a/drivers/media/usb/usbvision/usbvision-video.c b/drivers/media/usb/usbvision/usbvision-video.c index cafc34938a79..91d709efef7a 100644 --- a/drivers/media/usb/usbvision/usbvision-video.c +++ b/drivers/media/usb/usbvision/usbvision-video.c @@ -1461,13 +1461,6 @@ static int usbvision_probe(struct usb_interface *intf, printk(KERN_INFO "%s: %s found\n", __func__, usbvision_device_data[model].model_string); - /* - * this is a security check. - * an exploit using an incorrect bInterfaceNumber is known - */ - if (ifnum >= USB_MAXINTERFACES || !dev->actconfig->interface[ifnum]) - return -ENODEV; - if (usbvision_device_data[model].interface >= 0) interface = &dev->actconfig->interface[usbvision_device_data[model].interface]->altsetting[0]; else if (ifnum < dev->actconfig->desc.bNumInterfaces) From 1c84d14eb7f4cefe455facae6b1ac06e7841db56 Mon Sep 17 00:00:00 2001 From: "Maciej W. Rozycki" Date: Fri, 28 Oct 2016 08:21:03 +0100 Subject: [PATCH 288/812] MIPS: Fix FCSR Cause bit handling for correct SIGFPE issue [ Upstream commit 5a1aca4469fdccd5b74ba0b4e490173b2b447895 ] Sanitize FCSR Cause bit handling, following a trail of past attempts: * commit 4249548454f7 ("MIPS: ptrace: Fix FP context restoration FCSR regression"), * commit 443c44032a54 ("MIPS: Always clear FCSR cause bits after emulation"), * commit 64bedffe4968 ("MIPS: Clear [MSA]FPE CSR.Cause after notify_die()"), * commit b1442d39fac2 ("MIPS: Prevent user from setting FCSR cause bits"), * commit b54d2901517d ("Properly handle branch delay slots in connection with signals."). Specifically do not mask these bits out in ptrace(2) processing and send a SIGFPE signal instead whenever a matching pair of an FCSR Cause and Enable bit is seen as execution of an affected context is about to resume. Only then clear Cause bits, and even then do not clear any bits that are set but masked with the respective Enable bits. Adjust Cause bit clearing throughout code likewise, except within the FPU emulator proper where they are set according to IEEE 754 exceptions raised as the operation emulated executed. Do so so that any IEEE 754 exceptions subject to their default handling are recorded like with operations executed by FPU hardware. Signed-off-by: Maciej W. Rozycki Cc: Paul Burton Cc: James Hogan Cc: linux-mips@linux-mips.org Cc: linux-kernel@vger.kernel.org Patchwork: https://patchwork.linux-mips.org/patch/14460/ Signed-off-by: Ralf Baechle Signed-off-by: Sasha Levin --- arch/mips/include/asm/fpu_emulator.h | 13 +++++ arch/mips/include/asm/switch_to.h | 18 +++++++ arch/mips/kernel/mips-r2-to-r6-emul.c | 10 ++-- arch/mips/kernel/ptrace.c | 7 ++- arch/mips/kernel/traps.c | 72 +++++++++++++++------------ 5 files changed, 78 insertions(+), 42 deletions(-) diff --git a/arch/mips/include/asm/fpu_emulator.h b/arch/mips/include/asm/fpu_emulator.h index 2f021cdfba4f..742223716fc8 100644 --- a/arch/mips/include/asm/fpu_emulator.h +++ b/arch/mips/include/asm/fpu_emulator.h @@ -66,6 +66,8 @@ extern int do_dsemulret(struct pt_regs *xcp); extern int fpu_emulator_cop1Handler(struct pt_regs *xcp, struct mips_fpu_struct *ctx, int has_fpu, void *__user *fault_addr); +void force_fcr31_sig(unsigned long fcr31, void __user *fault_addr, + struct task_struct *tsk); int process_fpemu_return(int sig, void __user *fault_addr, unsigned long fcr31); int mm_isBranchInstr(struct pt_regs *regs, struct mm_decoded_insn dec_insn, @@ -92,4 +94,15 @@ static inline void fpu_emulator_init_fpu(void) set_fpr64(&t->thread.fpu.fpr[i], 0, SIGNALLING_NAN); } +/* + * Mask the FCSR Cause bits according to the Enable bits, observing + * that Unimplemented is always enabled. + */ +static inline unsigned long mask_fcr31_x(unsigned long fcr31) +{ + return fcr31 & (FPU_CSR_UNI_X | + ((fcr31 & FPU_CSR_ALL_E) << + (ffs(FPU_CSR_ALL_X) - ffs(FPU_CSR_ALL_E)))); +} + #endif /* _ASM_FPU_EMULATOR_H */ diff --git a/arch/mips/include/asm/switch_to.h b/arch/mips/include/asm/switch_to.h index ebb5c0f2f90d..c0ae27971e31 100644 --- a/arch/mips/include/asm/switch_to.h +++ b/arch/mips/include/asm/switch_to.h @@ -75,6 +75,22 @@ do { if (cpu_has_rw_llb) { \ } \ } while (0) +/* + * Check FCSR for any unmasked exceptions pending set with `ptrace', + * clear them and send a signal. + */ +#define __sanitize_fcr31(next) \ +do { \ + unsigned long fcr31 = mask_fcr31_x(next->thread.fpu.fcr31); \ + void __user *pc; \ + \ + if (unlikely(fcr31)) { \ + pc = (void __user *)task_pt_regs(next)->cp0_epc; \ + next->thread.fpu.fcr31 &= ~fcr31; \ + force_fcr31_sig(fcr31, pc, next); \ + } \ +} while (0) + /* * For newly created kernel threads switch_to() will return to * ret_from_kernel_thread, newly created user threads to ret_from_fork. @@ -85,6 +101,8 @@ do { if (cpu_has_rw_llb) { \ do { \ __mips_mt_fpaff_switch_to(prev); \ lose_fpu_inatomic(1, prev); \ + if (tsk_used_math(next)) \ + __sanitize_fcr31(next); \ if (cpu_has_dsp) { \ __save_dsp(prev); \ __restore_dsp(next); \ diff --git a/arch/mips/kernel/mips-r2-to-r6-emul.c b/arch/mips/kernel/mips-r2-to-r6-emul.c index cbe0f025856d..7b887027dca2 100644 --- a/arch/mips/kernel/mips-r2-to-r6-emul.c +++ b/arch/mips/kernel/mips-r2-to-r6-emul.c @@ -900,7 +900,7 @@ static inline int mipsr2_find_op_func(struct pt_regs *regs, u32 inst, * mipsr2_decoder: Decode and emulate a MIPS R2 instruction * @regs: Process register set * @inst: Instruction to decode and emulate - * @fcr31: Floating Point Control and Status Register returned + * @fcr31: Floating Point Control and Status Register Cause bits returned */ int mipsr2_decoder(struct pt_regs *regs, u32 inst, unsigned long *fcr31) { @@ -1183,13 +1183,13 @@ int mipsr2_decoder(struct pt_regs *regs, u32 inst, unsigned long *fcr31) err = fpu_emulator_cop1Handler(regs, ¤t->thread.fpu, 0, &fault_addr); - *fcr31 = current->thread.fpu.fcr31; /* - * We can't allow the emulated instruction to leave any of - * the cause bits set in $fcr31. + * We can't allow the emulated instruction to leave any + * enabled Cause bits set in $fcr31. */ - current->thread.fpu.fcr31 &= ~FPU_CSR_ALL_X; + *fcr31 = res = mask_fcr31_x(current->thread.fpu.fcr31); + current->thread.fpu.fcr31 &= ~res; /* * this is a tricky issue - lose_fpu() uses LL/SC atomics diff --git a/arch/mips/kernel/ptrace.c b/arch/mips/kernel/ptrace.c index 5a869515b393..9d04392f7ef0 100644 --- a/arch/mips/kernel/ptrace.c +++ b/arch/mips/kernel/ptrace.c @@ -79,16 +79,15 @@ void ptrace_disable(struct task_struct *child) } /* - * Poke at FCSR according to its mask. Don't set the cause bits as - * this is currently not handled correctly in FP context restoration - * and will cause an oops if a corresponding enable bit is set. + * Poke at FCSR according to its mask. Set the Cause bits even + * if a corresponding Enable bit is set. This will be noticed at + * the time the thread is switched to and SIGFPE thrown accordingly. */ static void ptrace_setfcr31(struct task_struct *child, u32 value) { u32 fcr31; u32 mask; - value &= ~FPU_CSR_ALL_X; fcr31 = child->thread.fpu.fcr31; mask = boot_cpu_data.fpu_msk31; child->thread.fpu.fcr31 = (value & ~mask) | (fcr31 & mask); diff --git a/arch/mips/kernel/traps.c b/arch/mips/kernel/traps.c index 1b901218e3ae..6abd6b41c13d 100644 --- a/arch/mips/kernel/traps.c +++ b/arch/mips/kernel/traps.c @@ -706,6 +706,32 @@ asmlinkage void do_ov(struct pt_regs *regs) exception_exit(prev_state); } +/* + * Send SIGFPE according to FCSR Cause bits, which must have already + * been masked against Enable bits. This is impotant as Inexact can + * happen together with Overflow or Underflow, and `ptrace' can set + * any bits. + */ +void force_fcr31_sig(unsigned long fcr31, void __user *fault_addr, + struct task_struct *tsk) +{ + struct siginfo si = { .si_addr = fault_addr, .si_signo = SIGFPE }; + + if (fcr31 & FPU_CSR_INV_X) + si.si_code = FPE_FLTINV; + else if (fcr31 & FPU_CSR_DIV_X) + si.si_code = FPE_FLTDIV; + else if (fcr31 & FPU_CSR_OVF_X) + si.si_code = FPE_FLTOVF; + else if (fcr31 & FPU_CSR_UDF_X) + si.si_code = FPE_FLTUND; + else if (fcr31 & FPU_CSR_INE_X) + si.si_code = FPE_FLTRES; + else + si.si_code = __SI_FAULT; + force_sig_info(SIGFPE, &si, tsk); +} + int process_fpemu_return(int sig, void __user *fault_addr, unsigned long fcr31) { struct siginfo si = { 0 }; @@ -715,27 +741,7 @@ int process_fpemu_return(int sig, void __user *fault_addr, unsigned long fcr31) return 0; case SIGFPE: - si.si_addr = fault_addr; - si.si_signo = sig; - /* - * Inexact can happen together with Overflow or Underflow. - * Respect the mask to deliver the correct exception. - */ - fcr31 &= (fcr31 & FPU_CSR_ALL_E) << - (ffs(FPU_CSR_ALL_X) - ffs(FPU_CSR_ALL_E)); - if (fcr31 & FPU_CSR_INV_X) - si.si_code = FPE_FLTINV; - else if (fcr31 & FPU_CSR_DIV_X) - si.si_code = FPE_FLTDIV; - else if (fcr31 & FPU_CSR_OVF_X) - si.si_code = FPE_FLTOVF; - else if (fcr31 & FPU_CSR_UDF_X) - si.si_code = FPE_FLTUND; - else if (fcr31 & FPU_CSR_INE_X) - si.si_code = FPE_FLTRES; - else - si.si_code = __SI_FAULT; - force_sig_info(sig, &si, current); + force_fcr31_sig(fcr31, fault_addr, current); return 1; case SIGBUS: @@ -798,13 +804,13 @@ static int simulate_fp(struct pt_regs *regs, unsigned int opcode, /* Run the emulator */ sig = fpu_emulator_cop1Handler(regs, ¤t->thread.fpu, 1, &fault_addr); - fcr31 = current->thread.fpu.fcr31; /* - * We can't allow the emulated instruction to leave any of - * the cause bits set in $fcr31. + * We can't allow the emulated instruction to leave any + * enabled Cause bits set in $fcr31. */ - current->thread.fpu.fcr31 &= ~FPU_CSR_ALL_X; + fcr31 = mask_fcr31_x(current->thread.fpu.fcr31); + current->thread.fpu.fcr31 &= ~fcr31; /* Restore the hardware register state */ own_fpu(1); @@ -830,7 +836,7 @@ asmlinkage void do_fpe(struct pt_regs *regs, unsigned long fcr31) goto out; /* Clear FCSR.Cause before enabling interrupts */ - write_32bit_cp1_register(CP1_STATUS, fcr31 & ~FPU_CSR_ALL_X); + write_32bit_cp1_register(CP1_STATUS, fcr31 & ~mask_fcr31_x(fcr31)); local_irq_enable(); die_if_kernel("FP exception in kernel code", regs); @@ -852,13 +858,13 @@ asmlinkage void do_fpe(struct pt_regs *regs, unsigned long fcr31) /* Run the emulator */ sig = fpu_emulator_cop1Handler(regs, ¤t->thread.fpu, 1, &fault_addr); - fcr31 = current->thread.fpu.fcr31; /* - * We can't allow the emulated instruction to leave any of - * the cause bits set in $fcr31. + * We can't allow the emulated instruction to leave any + * enabled Cause bits set in $fcr31. */ - current->thread.fpu.fcr31 &= ~FPU_CSR_ALL_X; + fcr31 = mask_fcr31_x(current->thread.fpu.fcr31); + current->thread.fpu.fcr31 &= ~fcr31; /* Restore the hardware register state */ own_fpu(1); /* Using the FPU again. */ @@ -1431,13 +1437,13 @@ asmlinkage void do_cpu(struct pt_regs *regs) sig = fpu_emulator_cop1Handler(regs, ¤t->thread.fpu, 0, &fault_addr); - fcr31 = current->thread.fpu.fcr31; /* * We can't allow the emulated instruction to leave - * any of the cause bits set in $fcr31. + * any enabled Cause bits set in $fcr31. */ - current->thread.fpu.fcr31 &= ~FPU_CSR_ALL_X; + fcr31 = mask_fcr31_x(current->thread.fpu.fcr31); + current->thread.fpu.fcr31 &= ~fcr31; /* Send a signal if required. */ if (!process_fpemu_return(sig, fault_addr, fcr31) && !err) From f629104427a6dc9ab20dc10424896777a2bbe506 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Thu, 16 Jun 2016 14:34:30 +0200 Subject: [PATCH 289/812] ASoC: ak4613: Enable cache usage to fix crashes on resume [ Upstream commit dcd2d1f78664fdc75eadaaf65257834e24383d01 ] During system resume: kernel BUG at drivers/base/regmap/regcache.c:347! ... PC is at regcache_sync+0x1c/0x128 LR is at ak4613_resume+0x28/0x34 The ak4613 driver is using a regmap cache sync to restore the configuration of the chip on resume but does not actually define a register cache which means that the resume is never going to work and we trigger asserts in regmap. Fix this by enabling caching. Based on commit d3030d11961a8c10 ("ASoC: ak4642: Enable cache usage to fix crashes on resume") by Mark Brown . Signed-off-by: Geert Uytterhoeven Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/codecs/ak4613.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/soc/codecs/ak4613.c b/sound/soc/codecs/ak4613.c index 07a266460ec3..b4b36cc92ffe 100644 --- a/sound/soc/codecs/ak4613.c +++ b/sound/soc/codecs/ak4613.c @@ -143,6 +143,7 @@ static const struct regmap_config ak4613_regmap_cfg = { .max_register = 0x16, .reg_defaults = ak4613_reg, .num_reg_defaults = ARRAY_SIZE(ak4613_reg), + .cache_type = REGCACHE_RBTREE, }; static const struct of_device_id ak4613_of_match[] = { From 796c9c018b9a11399c3b7fb54f4d37485ad9fc6e Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Thu, 16 Jun 2016 14:34:32 +0200 Subject: [PATCH 290/812] ASoC: wm8940: Enable cache usage to fix crashes on resume [ Upstream commit 50c7a0ef2d97e56c7ce2f1ea5fe1d8e25aadc1bb ] The wm8940 driver is using a regmap cache sync to restore the configuration of the chip when switching from OFF to STANDBY, but does not actually define a register cache which means that the restore is never going to work and we trigger asserts in regmap. Fix this by enabling caching. Based on commit d3030d11961a8c10 ("ASoC: ak4642: Enable cache usage to fix crashes on resume") by Mark Brown . Signed-off-by: Geert Uytterhoeven Acked-by: Charles Keepax Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/codecs/wm8940.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/soc/codecs/wm8940.c b/sound/soc/codecs/wm8940.c index f6f9395ea38e..1c600819f768 100644 --- a/sound/soc/codecs/wm8940.c +++ b/sound/soc/codecs/wm8940.c @@ -743,6 +743,7 @@ static const struct regmap_config wm8940_regmap = { .max_register = WM8940_MONOMIX, .reg_defaults = wm8940_reg_defaults, .num_reg_defaults = ARRAY_SIZE(wm8940_reg_defaults), + .cache_type = REGCACHE_RBTREE, .readable_reg = wm8940_readable_register, .volatile_reg = wm8940_volatile_register, From 22979776093abc47dcbc6a06eab41d24800ec32d Mon Sep 17 00:00:00 2001 From: Mark Syms Date: Tue, 29 Nov 2016 11:36:46 +0000 Subject: [PATCH 291/812] CIFS: handle guest access errors to Windows shares [ Upstream commit 40920c2bb119fd49ba03e2f97a172171781be442 ] Commit 1a967d6c9b39c226be1b45f13acd4d8a5ab3dc44 ("correctly to anonymous authentication for the NTLM(v2) authentication") introduces a regression in handling errors related to attempting a guest connection to a Windows share which requires authentication. This should result in a permission denied error but actually causes the kernel module to enter a never-ending loop trying to follow a DFS referal which doesn't exist. The base cause of this is the failure now occurs later in the process during tree connect and not at the session setup setup and all errors in tree connect are interpreted as needing to follow the DFS paths which isn't in this case correct. So, check the returned error against EACCES and fail if this is returned error. Feedback from Aurelien: PS> net user guest /activate:no PS> mkdir C:\guestshare PS> icacls C:\guestshare /grant 'Everyone:(OI)(CI)F' PS> new-smbshare -name guestshare -path C:\guestshare -fullaccess Everyone I've tested v3.10, v4.4, master, master+your patch using default options (empty or no user "NU") and user=abc (U). NT_LOGON_FAILURE in session setup: LF This is what you seem to have in 3.10. NT_ACCESS_DENIED in tree connect to the share: AD This is what you get before your infinite loop. | NU U -------------------------------- 3.10 | LF LF 4.4 | LF LF master | AD LF master+patch | AD LF No infinite DFS loop :( All these issues result in mount failing very fast with permission denied. I guess it could be from either the Windows version or the share/folder ACL. A deeper analysis of the packets might reveal more. In any case I did not notice any issues for on a basic DFS setup with the patch so I don't think it introduced any regressions, which is probably all that matters. It still bothers me a little I couldn't hit the bug. I've included kernel output w/ debugging output and network capture of my tests if anyone want to have a look at it. (master+patch = ml-guestfix). Signed-off-by: Mark Syms Reviewed-by: Aurelien Aptel Tested-by: Aurelien Aptel Acked-by: Pavel Shilovsky Signed-off-by: Steve French Signed-off-by: Sasha Levin --- fs/cifs/connect.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index 077ad3a06c9a..1eeb4780c3ed 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -3674,6 +3674,9 @@ cifs_mount(struct cifs_sb_info *cifs_sb, struct smb_vol *volume_info) if (IS_ERR(tcon)) { rc = PTR_ERR(tcon); tcon = NULL; + if (rc == -EACCES) + goto mount_fail_check; + goto remote_path_check; } From db769a03103b7cd1d419c76d3840e2d3a64c0df8 Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Tue, 25 Jul 2017 14:53:03 +0100 Subject: [PATCH 292/812] arm64: Fix potential race with hardware DBM in ptep_set_access_flags() [ Upstream commit 6d332747fa5f0a6843b56b5b129168ba909336d1 ] In a system with DBM (dirty bit management) capable agents there is a possible race between a CPU executing ptep_set_access_flags() (maybe non-DBM capable) and a hardware update of the dirty state (clearing of PTE_RDONLY). The scenario: a) the pte is writable (PTE_WRITE set), clean (PTE_RDONLY set) and old (PTE_AF clear) b) ptep_set_access_flags() is called as a result of a read access and it needs to set the pte to writable, clean and young (PTE_AF set) c) a DBM-capable agent, as a result of a different write access, is marking the entry as young (setting PTE_AF) and dirty (clearing PTE_RDONLY) The current ptep_set_access_flags() implementation would set the PTE_RDONLY bit in the resulting value overriding the DBM update and losing the dirty state. This patch fixes such race by setting PTE_RDONLY to the most permissive (lowest value) of the current entry and the new one. Fixes: 66dbd6e61a52 ("arm64: Implement ptep_set_access_flags() for hardware AF/DBM") Cc: Will Deacon Acked-by: Mark Rutland Acked-by: Steve Capper Signed-off-by: Catalin Marinas Signed-off-by: Will Deacon Signed-off-by: Sasha Levin --- arch/arm64/mm/fault.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c index 86485415c5f0..be7f8416809f 100644 --- a/arch/arm64/mm/fault.c +++ b/arch/arm64/mm/fault.c @@ -107,26 +107,27 @@ int ptep_set_access_flags(struct vm_area_struct *vma, /* only preserve the access flags and write permission */ pte_val(entry) &= PTE_AF | PTE_WRITE | PTE_DIRTY; - /* - * PTE_RDONLY is cleared by default in the asm below, so set it in - * back if necessary (read-only or clean PTE). - */ + /* set PTE_RDONLY if actual read-only or clean PTE */ if (!pte_write(entry) || !pte_sw_dirty(entry)) pte_val(entry) |= PTE_RDONLY; /* * Setting the flags must be done atomically to avoid racing with the - * hardware update of the access/dirty state. + * hardware update of the access/dirty state. The PTE_RDONLY bit must + * be set to the most permissive (lowest value) of *ptep and entry + * (calculated as: a & b == ~(~a | ~b)). */ + pte_val(entry) ^= PTE_RDONLY; asm volatile("// ptep_set_access_flags\n" " prfm pstl1strm, %2\n" "1: ldxr %0, %2\n" - " and %0, %0, %3 // clear PTE_RDONLY\n" + " eor %0, %0, %3 // negate PTE_RDONLY in *ptep\n" " orr %0, %0, %4 // set flags\n" + " eor %0, %0, %3 // negate final PTE_RDONLY\n" " stxr %w1, %0, %2\n" " cbnz %w1, 1b\n" : "=&r" (old_pteval), "=&r" (tmp), "+Q" (pte_val(*ptep)) - : "L" (~PTE_RDONLY), "r" (pte_val(entry))); + : "L" (PTE_RDONLY), "r" (pte_val(entry))); flush_tlb_fix_spurious_fault(vma, address); return 1; From 9e9fe58a92a46c6d154d2901735bf230d91b8507 Mon Sep 17 00:00:00 2001 From: Jonathan Basseri Date: Wed, 25 Oct 2017 09:52:27 -0700 Subject: [PATCH 293/812] xfrm: Clear sk_dst_cache when applying per-socket policy. [ Upstream commit 2b06cdf3e688b98fcc9945873b5d42792bd4eee0 ] If a socket has a valid dst cache, then xfrm_lookup_route will get skipped. However, the cache is not invalidated when applying policy to a socket (i.e. IPV6_XFRM_POLICY). The result is that new policies are sometimes ignored on those sockets. (Note: This was broken for IPv4 and IPv6 at different times.) This can be demonstrated like so, 1. Create UDP socket. 2. connect() the socket. 3. Apply an outbound XFRM policy to the socket. (setsockopt) 4. send() data on the socket. Packets will continue to be sent in the clear instead of matching an xfrm or returning a no-match error (EAGAIN). This affects calls to send() and not sendto(). Invalidating the sk_dst_cache is necessary to correctly apply xfrm policies. Since we do this in xfrm_user_policy(), the sk_lock was already acquired in either do_ip_setsockopt() or do_ipv6_setsockopt(), and we may call __sk_dst_reset(). Performance impact should be negligible, since this code is only called when changing xfrm policy, and only affects the socket in question. Fixes: 00bc0ef5880d ("ipv6: Skip XFRM lookup if dst_entry in socket cache is valid") Tested: https://android-review.googlesource.com/517555 Tested: https://android-review.googlesource.com/418659 Signed-off-by: Jonathan Basseri Signed-off-by: Steffen Klassert Signed-off-by: Sasha Levin --- net/xfrm/xfrm_state.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index d6a11af0bab1..9b6e51450fc5 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -1884,6 +1884,7 @@ int xfrm_user_policy(struct sock *sk, int optname, u8 __user *optval, int optlen if (err >= 0) { xfrm_sk_policy_insert(sk, err, pol); xfrm_pol_put(pol); + __sk_dst_reset(sk); err = 0; } From d896b9f298afcfc4873000a290324ba72b23eef5 Mon Sep 17 00:00:00 2001 From: "Ewan D. Milne" Date: Tue, 27 Jun 2017 14:55:58 -0400 Subject: [PATCH 294/812] scsi: Add STARGET_CREATED_REMOVE state to scsi_target_state [ Upstream commit f9279c968c257ee39b0d7bd2571a4d231a67bcc1 ] The addition of the STARGET_REMOVE state had the side effect of introducing a race condition that can cause a crash. scsi_target_reap_ref_release() checks the starget->state to see if it still in STARGET_CREATED, and if so, skips calling transport_remove_device() and device_del(), because the starget->state is only set to STARGET_RUNNING after scsi_target_add() has called device_add() and transport_add_device(). However, if an rport loss occurs while a target is being scanned, it can happen that scsi_remove_target() will be called while the starget is still in the STARGET_CREATED state. In this case, the starget->state will be set to STARGET_REMOVE, and as a result, scsi_target_reap_ref_release() will take the wrong path. The end result is a panic: [ 1255.356653] Oops: 0000 [#1] SMP [ 1255.360154] Modules linked in: x86_pkg_temp_thermal kvm_intel kvm irqbypass crc32c_intel ghash_clmulni_i [ 1255.393234] CPU: 5 PID: 149 Comm: kworker/u96:4 Tainted: G W 4.11.0+ #8 [ 1255.401879] Hardware name: Dell Inc. PowerEdge R320/08VT7V, BIOS 2.0.22 11/19/2013 [ 1255.410327] Workqueue: scsi_wq_6 fc_scsi_scan_rport [scsi_transport_fc] [ 1255.417720] task: ffff88060ca8c8c0 task.stack: ffffc900048a8000 [ 1255.424331] RIP: 0010:kernfs_find_ns+0x13/0xc0 [ 1255.429287] RSP: 0018:ffffc900048abbf0 EFLAGS: 00010246 [ 1255.435123] RAX: 0000000000000000 RBX: 0000000000000000 RCX: 0000000000000000 [ 1255.443083] RDX: 0000000000000000 RSI: ffffffff8188d659 RDI: 0000000000000000 [ 1255.451043] RBP: ffffc900048abc10 R08: 0000000000000000 R09: 0000012433fe0025 [ 1255.459005] R10: 0000000025e5a4b5 R11: 0000000025e5a4b5 R12: ffffffff8188d659 [ 1255.466972] R13: 0000000000000000 R14: ffff8805f55e5088 R15: 0000000000000000 [ 1255.474931] FS: 0000000000000000(0000) GS:ffff880616b40000(0000) knlGS:0000000000000000 [ 1255.483959] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 1255.490370] CR2: 0000000000000068 CR3: 0000000001c09000 CR4: 00000000000406e0 [ 1255.498332] Call Trace: [ 1255.501058] kernfs_find_and_get_ns+0x31/0x60 [ 1255.505916] sysfs_unmerge_group+0x1d/0x60 [ 1255.510498] dpm_sysfs_remove+0x22/0x60 [ 1255.514783] device_del+0xf4/0x2e0 [ 1255.518577] ? device_remove_file+0x19/0x20 [ 1255.523241] attribute_container_class_device_del+0x1a/0x20 [ 1255.529457] transport_remove_classdev+0x4e/0x60 [ 1255.534607] ? transport_add_class_device+0x40/0x40 [ 1255.540046] attribute_container_device_trigger+0xb0/0xc0 [ 1255.546069] transport_remove_device+0x15/0x20 [ 1255.551025] scsi_target_reap_ref_release+0x25/0x40 [ 1255.556467] scsi_target_reap+0x2e/0x40 [ 1255.560744] __scsi_scan_target+0xaa/0x5b0 [ 1255.565312] scsi_scan_target+0xec/0x100 [ 1255.569689] fc_scsi_scan_rport+0xb1/0xc0 [scsi_transport_fc] [ 1255.576099] process_one_work+0x14b/0x390 [ 1255.580569] worker_thread+0x4b/0x390 [ 1255.584651] kthread+0x109/0x140 [ 1255.588251] ? rescuer_thread+0x330/0x330 [ 1255.592730] ? kthread_park+0x60/0x60 [ 1255.596815] ret_from_fork+0x29/0x40 [ 1255.600801] Code: 24 08 48 83 42 40 01 5b 41 5c 5d c3 66 66 66 2e 0f 1f 84 00 00 00 00 00 66 66 66 66 90 [ 1255.621876] RIP: kernfs_find_ns+0x13/0xc0 RSP: ffffc900048abbf0 [ 1255.628479] CR2: 0000000000000068 [ 1255.632756] ---[ end trace 34a69ba0477d036f ]--- Fix this by adding another scsi_target state STARGET_CREATED_REMOVE to distinguish this case. Fixes: f05795d3d771 ("scsi: Add intermediate STARGET_REMOVE state to scsi_target_state") Reported-by: David Jeffery Signed-off-by: Ewan D. Milne Cc: Reviewed-by: Laurence Oberman Tested-by: Laurence Oberman Reviewed-by: Johannes Thumshirn Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin --- drivers/scsi/scsi_scan.c | 5 +++-- drivers/scsi/scsi_sysfs.c | 8 ++++++-- include/scsi/scsi_device.h | 1 + 3 files changed, 10 insertions(+), 4 deletions(-) diff --git a/drivers/scsi/scsi_scan.c b/drivers/scsi/scsi_scan.c index 692445bcca6f..850ddc5fac04 100644 --- a/drivers/scsi/scsi_scan.c +++ b/drivers/scsi/scsi_scan.c @@ -381,11 +381,12 @@ static void scsi_target_reap_ref_release(struct kref *kref) = container_of(kref, struct scsi_target, reap_ref); /* - * if we get here and the target is still in the CREATED state that + * if we get here and the target is still in a CREATED state that * means it was allocated but never made visible (because a scan * turned up no LUNs), so don't call device_del() on it. */ - if (starget->state != STARGET_CREATED) { + if ((starget->state != STARGET_CREATED) && + (starget->state != STARGET_CREATED_REMOVE)) { transport_remove_device(&starget->dev); device_del(&starget->dev); } diff --git a/drivers/scsi/scsi_sysfs.c b/drivers/scsi/scsi_sysfs.c index 8db0c48943d6..085e470d1c49 100644 --- a/drivers/scsi/scsi_sysfs.c +++ b/drivers/scsi/scsi_sysfs.c @@ -1212,11 +1212,15 @@ void scsi_remove_target(struct device *dev) spin_lock_irqsave(shost->host_lock, flags); list_for_each_entry(starget, &shost->__targets, siblings) { if (starget->state == STARGET_DEL || - starget->state == STARGET_REMOVE) + starget->state == STARGET_REMOVE || + starget->state == STARGET_CREATED_REMOVE) continue; if (starget->dev.parent == dev || &starget->dev == dev) { kref_get(&starget->reap_ref); - starget->state = STARGET_REMOVE; + if (starget->state == STARGET_CREATED) + starget->state = STARGET_CREATED_REMOVE; + else + starget->state = STARGET_REMOVE; spin_unlock_irqrestore(shost->host_lock, flags); __scsi_remove_target(starget); scsi_target_reap(starget); diff --git a/include/scsi/scsi_device.h b/include/scsi/scsi_device.h index 293b9a7f53bc..fb53a94a5e8b 100644 --- a/include/scsi/scsi_device.h +++ b/include/scsi/scsi_device.h @@ -240,6 +240,7 @@ enum scsi_target_state { STARGET_CREATED = 1, STARGET_RUNNING, STARGET_REMOVE, + STARGET_CREATED_REMOVE, STARGET_DEL, }; From c300313d484db2511f3496bdca96b1e902462839 Mon Sep 17 00:00:00 2001 From: Sowmini Varadhan Date: Mon, 11 Apr 2016 17:57:05 -0700 Subject: [PATCH 295/812] sparc/pci: Refactor dev_archdata initialization into pci_init_dev_archdata [ Upstream commit 9a78d4fc28904785ffe4c2d361e25b251b479704 ] The function pcibios_add_device() added by commit d0c31e020057 ("sparc/PCI: Fix for panic while enabling SR-IOV") initializes the dev_archdata by doing a memcpy from the PF. This has the problem that it erroneously copies the OF device without explicitly refcounting it. As David Miller pointed out: "Generally speaking we don't really support hot-plug for OF probed devices, but if we did all of the device tree pointers have to be refcounted properly." To fix this error, and also avoid code duplication, this patch creates a new helper function, pci_init_dev_archdata(), that initializes the fields in dev_archdata, and can be invoked by callers after they have taken the needed refcounts Signed-off-by: Sowmini Varadhan Tested-by: Babu Moger Reviewed-by: Khalid Aziz Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- arch/sparc/kernel/pci.c | 29 +++++++++++++++++++++-------- 1 file changed, 21 insertions(+), 8 deletions(-) diff --git a/arch/sparc/kernel/pci.c b/arch/sparc/kernel/pci.c index 9f9614df9e1e..c2b202d763a1 100644 --- a/arch/sparc/kernel/pci.c +++ b/arch/sparc/kernel/pci.c @@ -245,6 +245,18 @@ static void pci_parse_of_addrs(struct platform_device *op, } } +static void pci_init_dev_archdata(struct dev_archdata *sd, void *iommu, + void *stc, void *host_controller, + struct platform_device *op, + int numa_node) +{ + sd->iommu = iommu; + sd->stc = stc; + sd->host_controller = host_controller; + sd->op = op; + sd->numa_node = numa_node; +} + static struct pci_dev *of_create_pci_dev(struct pci_pbm_info *pbm, struct device_node *node, struct pci_bus *bus, int devfn) @@ -259,13 +271,10 @@ static struct pci_dev *of_create_pci_dev(struct pci_pbm_info *pbm, if (!dev) return NULL; + op = of_find_device_by_node(node); sd = &dev->dev.archdata; - sd->iommu = pbm->iommu; - sd->stc = &pbm->stc; - sd->host_controller = pbm; - sd->op = op = of_find_device_by_node(node); - sd->numa_node = pbm->numa_node; - + pci_init_dev_archdata(sd, pbm->iommu, &pbm->stc, pbm, op, + pbm->numa_node); sd = &op->dev.archdata; sd->iommu = pbm->iommu; sd->stc = &pbm->stc; @@ -1003,9 +1012,13 @@ int pcibios_add_device(struct pci_dev *dev) * Copy dev_archdata from PF to VF */ if (dev->is_virtfn) { + struct dev_archdata *psd; + pdev = dev->physfn; - memcpy(&dev->dev.archdata, &pdev->dev.archdata, - sizeof(struct dev_archdata)); + psd = &pdev->dev.archdata; + pci_init_dev_archdata(&dev->dev.archdata, psd->iommu, + psd->stc, psd->host_controller, NULL, + psd->numa_node); } return 0; } From ab0b3b9dbf559a5633d460e748144697bd2d3aa3 Mon Sep 17 00:00:00 2001 From: WANG Cong Date: Wed, 1 Jun 2016 16:15:18 -0700 Subject: [PATCH 296/812] sch_red: update backlog as well [ Upstream commit d7f4f332f082c4d4ba53582f902ed6b44fd6f45e ] Fixes: 2ccccf5fb43f ("net_sched: update hierarchical backlog too") Cc: Jamal Hadi Salim Signed-off-by: Cong Wang Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/sched/sch_red.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/net/sched/sch_red.c b/net/sched/sch_red.c index 0505b8408c8b..4bf2b599ef98 100644 --- a/net/sched/sch_red.c +++ b/net/sched/sch_red.c @@ -97,6 +97,7 @@ static int red_enqueue(struct sk_buff *skb, struct Qdisc *sch) ret = qdisc_enqueue(skb, child); if (likely(ret == NET_XMIT_SUCCESS)) { + qdisc_qstats_backlog_inc(sch, skb); sch->q.qlen++; } else if (net_xmit_drop_count(ret)) { q->stats.pdrop++; @@ -118,6 +119,7 @@ static struct sk_buff *red_dequeue(struct Qdisc *sch) skb = child->dequeue(child); if (skb) { qdisc_bstats_update(sch, skb); + qdisc_qstats_backlog_dec(sch, skb); sch->q.qlen--; } else { if (!red_is_idling(&q->vars)) @@ -143,6 +145,7 @@ static unsigned int red_drop(struct Qdisc *sch) if (child->ops->drop && (len = child->ops->drop(child)) > 0) { q->stats.other++; qdisc_qstats_drop(sch); + sch->qstats.backlog -= len; sch->q.qlen--; return len; } @@ -158,6 +161,7 @@ static void red_reset(struct Qdisc *sch) struct red_sched_data *q = qdisc_priv(sch); qdisc_reset(q->qdisc); + sch->qstats.backlog = 0; sch->q.qlen = 0; red_restart(&q->vars); } From 75c8542375a1ed286ad617badf24408ce376de80 Mon Sep 17 00:00:00 2001 From: Alan Stern Date: Thu, 21 Sep 2017 16:02:05 -0400 Subject: [PATCH 297/812] usb-storage: fix bogus hardware error messages for ATA pass-thru devices [ Upstream commit a4fd4a724d6c30ad671046d83be2e9be2f11d275 ] Ever since commit a621bac3044e ("scsi_lib: correctly retry failed zero length REQ_TYPE_FS commands"), people have been getting bogus error messages for USB disk drives using ATA pass-thru. For example: [ 1344.880193] sd 6:0:0:0: [sdb] Attached SCSI disk [ 1345.069152] sd 6:0:0:0: [sdb] tag#0 FAILED Result: hostbyte=DID_ERROR driverbyte=DRIVER_SENSE [ 1345.069159] sd 6:0:0:0: [sdb] tag#0 Sense Key : Hardware Error [current] [descriptor] [ 1345.069162] sd 6:0:0:0: [sdb] tag#0 Add. Sense: No additional sense information [ 1345.069168] sd 6:0:0:0: [sdb] tag#0 CDB: ATA command pass through(16) 85 06 20 00 00 00 00 00 00 00 00 00 00 00 e5 00 [ 1345.172252] sd 6:0:0:0: [sdb] tag#0 FAILED Result: hostbyte=DID_ERROR driverbyte=DRIVER_SENSE [ 1345.172258] sd 6:0:0:0: [sdb] tag#0 Sense Key : Hardware Error [current] [descriptor] [ 1345.172261] sd 6:0:0:0: [sdb] tag#0 Add. Sense: No additional sense information [ 1345.172266] sd 6:0:0:0: [sdb] tag#0 CDB: ATA command pass through(12)/Blank a1 06 20 da 00 00 4f c2 00 b0 00 00 These messages can be quite annoying, because programs like udisks2 provoke them every 10 minutes or so. Other programs can also have this effect, such as those in smartmontools. I don't fully understand how that commit induced the SCSI core to log these error messages, but the underlying cause for them is code added to usb-storage by commit f1a0743bc0e7 ("USB: storage: When a device returns no sense data, call it a Hardware Error"). At the time it was necessary to do this, in order to prevent an infinite retry loop with some not-so-great mass storage devices. However, the ATA pass-thru protocol uses SCSI sense data to return command status values, and some devices always report Check Condition status for ATA pass-thru commands to ensure that the host retrieves the sense data, even if the command succeeded. This violates the USB mass-storage protocol (Check Condition status is supposed to mean the command failed), but we can't help that. This patch attempts to mitigate the problem of these bogus error reports by changing usb-storage. The HARDWARE ERROR sense key will be inserted only for commands that aren't ATA pass-thru. Thanks to Ewan Milne for pointing out that this mechanism was present in usb-storage. 8 years after writing it, I had completely forgotten its existence. Signed-off-by: Alan Stern Tested-by: Kris Lindgren Ref: https://bugzilla.redhat.com/show_bug.cgi?id=1351305 CC: Ewan D. Milne CC: Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/usb/storage/transport.c | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/drivers/usb/storage/transport.c b/drivers/usb/storage/transport.c index 02f86dd1a340..90a7bffe3484 100644 --- a/drivers/usb/storage/transport.c +++ b/drivers/usb/storage/transport.c @@ -808,12 +808,24 @@ void usb_stor_invoke_transport(struct scsi_cmnd *srb, struct us_data *us) if (result == USB_STOR_TRANSPORT_GOOD) { srb->result = SAM_STAT_GOOD; srb->sense_buffer[0] = 0x0; + } + + /* + * ATA-passthru commands use sense data to report + * the command completion status, and often devices + * return Check Condition status when nothing is + * wrong. + */ + else if (srb->cmnd[0] == ATA_16 || + srb->cmnd[0] == ATA_12) { + /* leave the data alone */ + } /* If there was a problem, report an unspecified * hardware error to prevent the higher layers from * entering an infinite retry loop. */ - } else { + else { srb->result = DID_ERROR << 16; if ((sshdr.response_code & 0x72) == 0x72) srb->sense_buffer[1] = HARDWARE_ERROR; From e25dc63aa366fd0f61d1d9ba67b66f5d75fc4372 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Thu, 30 Jun 2016 17:24:43 +0200 Subject: [PATCH 298/812] bpf: generally move prog destruction to RCU deferral [ Upstream commit 1aacde3d22c42281236155c1ef6d7a5aa32a826b ] Jann Horn reported following analysis that could potentially result in a very hard to trigger (if not impossible) UAF race, to quote his event timeline: - Set up a process with threads T1, T2 and T3 - Let T1 set up a socket filter F1 that invokes another filter F2 through a BPF map [tail call] - Let T1 trigger the socket filter via a unix domain socket write, don't wait for completion - Let T2 call PERF_EVENT_IOC_SET_BPF with F2, don't wait for completion - Now T2 should be behind bpf_prog_get(), but before bpf_prog_put() - Let T3 close the file descriptor for F2, dropping the reference count of F2 to 2 - At this point, T1 should have looked up F2 from the map, but not finished executing it - Let T3 remove F2 from the BPF map, dropping the reference count of F2 to 1 - Now T2 should call bpf_prog_put() (wrong BPF program type), dropping the reference count of F2 to 0 and scheduling bpf_prog_free_deferred() via schedule_work() - At this point, the BPF program could be freed - BPF execution is still running in a freed BPF program While at PERF_EVENT_IOC_SET_BPF time it's only guaranteed that the perf event fd we're doing the syscall on doesn't disappear from underneath us for whole syscall time, it may not be the case for the bpf fd used as an argument only after we did the put. It needs to be a valid fd pointing to a BPF program at the time of the call to make the bpf_prog_get() and while T2 gets preempted, F2 must have dropped reference to 1 on the other CPU. The fput() from the close() in T3 should also add additionally delay to the reference drop via exit_task_work() when bpf_prog_release() gets called as well as scheduling bpf_prog_free_deferred(). That said, it makes nevertheless sense to move the BPF prog destruction generally after RCU grace period to guarantee that such scenario above, but also others as recently fixed in ceb56070359b ("bpf, perf: delay release of BPF prog after grace period") with regards to tail calls won't happen. Integrating bpf_prog_free_deferred() directly into the RCU callback is not allowed since the invocation might happen from either softirq or process context, so we're not permitted to block. Reviewing all bpf_prog_put() invocations from eBPF side (note, cBPF -> eBPF progs don't use this for their destruction) with call_rcu() look good to me. Since we don't know whether at the time of attaching the program, we're already part of a tail call map, we need to use RCU variant. However, due to this, there won't be severely more stress on the RCU callback queue: situations with above bpf_prog_get() and bpf_prog_put() combo in practice normally won't lead to releases, but even if they would, enough effort/ cycles have to be put into loading a BPF program into the kernel already. Reported-by: Jann Horn Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- include/linux/bpf.h | 5 ----- kernel/bpf/arraymap.c | 4 +--- kernel/bpf/syscall.c | 13 +++---------- kernel/events/core.c | 2 +- 4 files changed, 5 insertions(+), 19 deletions(-) diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 132585a7fbd8..bae3da5bcda0 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -177,7 +177,6 @@ void bpf_register_map_type(struct bpf_map_type_list *tl); struct bpf_prog *bpf_prog_get(u32 ufd); struct bpf_prog *bpf_prog_inc(struct bpf_prog *prog); void bpf_prog_put(struct bpf_prog *prog); -void bpf_prog_put_rcu(struct bpf_prog *prog); struct bpf_map *bpf_map_get_with_uref(u32 ufd); struct bpf_map *__bpf_map_get(struct fd f); @@ -208,10 +207,6 @@ static inline struct bpf_prog *bpf_prog_get(u32 ufd) static inline void bpf_prog_put(struct bpf_prog *prog) { } - -static inline void bpf_prog_put_rcu(struct bpf_prog *prog) -{ -} #endif /* CONFIG_BPF_SYSCALL */ /* verifier prototypes for helper functions called from eBPF programs */ diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c index 0eb11b4ac4c7..daa4e0782cf7 100644 --- a/kernel/bpf/arraymap.c +++ b/kernel/bpf/arraymap.c @@ -270,9 +270,7 @@ static void *prog_fd_array_get_ptr(struct bpf_map *map, int fd) static void prog_fd_array_put_ptr(void *ptr) { - struct bpf_prog *prog = ptr; - - bpf_prog_put_rcu(prog); + bpf_prog_put(ptr); } /* decrement refcnt of all bpf_progs that are stored in this map */ diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 4b9bbfe764e8..04fc1022ad9f 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -487,7 +487,7 @@ static void bpf_prog_uncharge_memlock(struct bpf_prog *prog) free_uid(user); } -static void __prog_put_common(struct rcu_head *rcu) +static void __bpf_prog_put_rcu(struct rcu_head *rcu) { struct bpf_prog_aux *aux = container_of(rcu, struct bpf_prog_aux, rcu); @@ -496,17 +496,10 @@ static void __prog_put_common(struct rcu_head *rcu) bpf_prog_free(aux->prog); } -/* version of bpf_prog_put() that is called after a grace period */ -void bpf_prog_put_rcu(struct bpf_prog *prog) -{ - if (atomic_dec_and_test(&prog->aux->refcnt)) - call_rcu(&prog->aux->rcu, __prog_put_common); -} - void bpf_prog_put(struct bpf_prog *prog) { if (atomic_dec_and_test(&prog->aux->refcnt)) - __prog_put_common(&prog->aux->rcu); + call_rcu(&prog->aux->rcu, __bpf_prog_put_rcu); } EXPORT_SYMBOL_GPL(bpf_prog_put); @@ -514,7 +507,7 @@ static int bpf_prog_release(struct inode *inode, struct file *filp) { struct bpf_prog *prog = filp->private_data; - bpf_prog_put_rcu(prog); + bpf_prog_put(prog); return 0; } diff --git a/kernel/events/core.c b/kernel/events/core.c index 19519ec39d51..e53dfb5b826e 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -7141,7 +7141,7 @@ static void perf_event_free_bpf_prog(struct perf_event *event) prog = event->tp_event->prog; if (prog && event->tp_event->bpf_prog_owner == event) { event->tp_event->prog = NULL; - bpf_prog_put_rcu(prog); + bpf_prog_put(prog); } } From 4a3948cb6da454f21f89a34e694cea322ea559f9 Mon Sep 17 00:00:00 2001 From: Pavel Roskin Date: Sat, 23 Sep 2017 13:10:33 -0700 Subject: [PATCH 299/812] drm/nouveau/fbcon: fix oops without fbdev emulation [ Upstream commit 4813766325374af6ed0b66879ba6a0bbb05c83b6 ] This is similar to an earlier commit 52dfcc5ccfbb ("drm/nouveau: fix for disabled fbdev emulation"), but protects all occurrences of helper.fbdev in the source. I see oops in nouveau_fbcon_accel_save_disable() called from nouveau_fbcon_set_suspend_work() on Linux 3.13 when CONFIG_DRM_FBDEV_EMULATION option is disabled. Signed-off-by: Pavel Roskin Reviewed-by: Daniel Vetter Signed-off-by: Ben Skeggs Signed-off-by: Sasha Levin --- drivers/gpu/drm/nouveau/nouveau_fbcon.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/nouveau/nouveau_fbcon.c b/drivers/gpu/drm/nouveau/nouveau_fbcon.c index e40a1b07a014..343476d15726 100644 --- a/drivers/gpu/drm/nouveau/nouveau_fbcon.c +++ b/drivers/gpu/drm/nouveau/nouveau_fbcon.c @@ -235,7 +235,7 @@ void nouveau_fbcon_accel_save_disable(struct drm_device *dev) { struct nouveau_drm *drm = nouveau_drm(dev); - if (drm->fbcon) { + if (drm->fbcon && drm->fbcon->helper.fbdev) { drm->fbcon->saved_flags = drm->fbcon->helper.fbdev->flags; drm->fbcon->helper.fbdev->flags |= FBINFO_HWACCEL_DISABLED; } @@ -245,7 +245,7 @@ void nouveau_fbcon_accel_restore(struct drm_device *dev) { struct nouveau_drm *drm = nouveau_drm(dev); - if (drm->fbcon) { + if (drm->fbcon && drm->fbcon->helper.fbdev) { drm->fbcon->helper.fbdev->flags = drm->fbcon->saved_flags; } } @@ -257,7 +257,8 @@ nouveau_fbcon_accel_fini(struct drm_device *dev) struct nouveau_fbdev *fbcon = drm->fbcon; if (fbcon && drm->channel) { console_lock(); - fbcon->helper.fbdev->flags |= FBINFO_HWACCEL_DISABLED; + if (fbcon->helper.fbdev) + fbcon->helper.fbdev->flags |= FBINFO_HWACCEL_DISABLED; console_unlock(); nouveau_channel_idle(drm->channel); nvif_object_fini(&fbcon->twod); From 1b6a863ff29ccd7b65b0112bbec1cda26b7f9c4e Mon Sep 17 00:00:00 2001 From: Ashish Samant Date: Wed, 12 Jul 2017 19:26:58 -0700 Subject: [PATCH 300/812] fuse: Dont call set_page_dirty_lock() for ITER_BVEC pages for async_dio [ Upstream commit 61c12b49e1c9c77d7a1bcc161de540d0fd21cf0c ] Commit 8fba54aebbdf ("fuse: direct-io: don't dirty ITER_BVEC pages") fixes the ITER_BVEC page deadlock for direct io in fuse by checking in fuse_direct_io(), whether the page is a bvec page or not, before locking it. However, this check is missed when the "async_dio" mount option is enabled. In this case, set_page_dirty_lock() is called from the req->end callback in request_end(), when the fuse thread is returning from userspace to respond to the read request. This will cause the same deadlock because the bvec condition is not checked in this path. Here is the stack of the deadlocked thread, while returning from userspace: [13706.656686] INFO: task glusterfs:3006 blocked for more than 120 seconds. [13706.657808] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. [13706.658788] glusterfs D ffffffff816c80f0 0 3006 1 0x00000080 [13706.658797] ffff8800d6713a58 0000000000000086 ffff8800d9ad7000 ffff8800d9ad5400 [13706.658799] ffff88011ffd5cc0 ffff8800d6710008 ffff88011fd176c0 7fffffffffffffff [13706.658801] 0000000000000002 ffffffff816c80f0 ffff8800d6713a78 ffffffff816c790e [13706.658803] Call Trace: [13706.658809] [] ? bit_wait_io_timeout+0x80/0x80 [13706.658811] [] schedule+0x3e/0x90 [13706.658813] [] schedule_timeout+0x1b5/0x210 [13706.658816] [] ? gup_pud_range+0x1db/0x1f0 [13706.658817] [] ? kvm_clock_read+0x1e/0x20 [13706.658819] [] ? kvm_clock_get_cycles+0x9/0x10 [13706.658822] [] ? ktime_get+0x52/0xc0 [13706.658824] [] io_schedule_timeout+0xa4/0x110 [13706.658826] [] bit_wait_io+0x36/0x50 [13706.658828] [] __wait_on_bit_lock+0x76/0xb0 [13706.658831] [] ? lock_request+0x46/0x70 [fuse] [13706.658834] [] __lock_page+0xaa/0xb0 [13706.658836] [] ? wake_atomic_t_function+0x40/0x40 [13706.658838] [] set_page_dirty_lock+0x58/0x60 [13706.658841] [] fuse_release_user_pages+0x58/0x70 [fuse] [13706.658844] [] ? fuse_aio_complete+0x190/0x190 [fuse] [13706.658847] [] fuse_aio_complete_req+0x29/0x90 [fuse] [13706.658849] [] request_end+0xd9/0x190 [fuse] [13706.658852] [] fuse_dev_do_write+0x336/0x490 [fuse] [13706.658854] [] fuse_dev_write+0x6e/0xa0 [fuse] [13706.658857] [] ? security_file_permission+0x23/0x90 [13706.658859] [] do_iter_readv_writev+0x60/0x90 [13706.658862] [] ? fuse_dev_splice_write+0x350/0x350 [fuse] [13706.658863] [] do_readv_writev+0x171/0x1f0 [13706.658866] [] ? try_to_wake_up+0x210/0x210 [13706.658868] [] vfs_writev+0x41/0x50 [13706.658870] [] SyS_writev+0x56/0xf0 [13706.658872] [] ? syscall_trace_leave+0xf1/0x160 [13706.658874] [] system_call_fastpath+0x12/0x71 Fix this by making should_dirty a fuse_io_priv parameter that can be checked in fuse_aio_complete_req(). Reported-by: Tiger Yang Signed-off-by: Ashish Samant Signed-off-by: Miklos Szeredi Signed-off-by: Sasha Levin --- fs/fuse/file.c | 6 +++--- fs/fuse/fuse_i.h | 1 + 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/fs/fuse/file.c b/fs/fuse/file.c index 8577f3ba6dc6..7014318f6d18 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -625,7 +625,7 @@ static void fuse_aio_complete_req(struct fuse_conn *fc, struct fuse_req *req) struct fuse_io_priv *io = req->io; ssize_t pos = -1; - fuse_release_user_pages(req, !io->write); + fuse_release_user_pages(req, io->should_dirty); if (io->write) { if (req->misc.write.in.size != req->misc.write.out.size) @@ -1333,7 +1333,6 @@ ssize_t fuse_direct_io(struct fuse_io_priv *io, struct iov_iter *iter, loff_t *ppos, int flags) { int write = flags & FUSE_DIO_WRITE; - bool should_dirty = !write && iter_is_iovec(iter); int cuse = flags & FUSE_DIO_CUSE; struct file *file = io->file; struct inode *inode = file->f_mapping->host; @@ -1362,6 +1361,7 @@ ssize_t fuse_direct_io(struct fuse_io_priv *io, struct iov_iter *iter, mutex_unlock(&inode->i_mutex); } + io->should_dirty = !write && iter_is_iovec(iter); while (count) { size_t nres; fl_owner_t owner = current->files; @@ -1378,7 +1378,7 @@ ssize_t fuse_direct_io(struct fuse_io_priv *io, struct iov_iter *iter, nres = fuse_send_read(req, io, pos, nbytes, owner); if (!io->async) - fuse_release_user_pages(req, should_dirty); + fuse_release_user_pages(req, io->should_dirty); if (req->out.h.error) { if (!res) res = req->out.h.error; diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index 7aafe9acc6c0..c6eb35a95fcc 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h @@ -252,6 +252,7 @@ struct fuse_io_priv { size_t size; __u64 offset; bool write; + bool should_dirty; int err; struct kiocb *iocb; struct file *file; From c97383a60e49db65a88c864aa0280d491e299e36 Mon Sep 17 00:00:00 2001 From: William Dauchy Date: Fri, 30 Oct 2015 18:16:30 +0100 Subject: [PATCH 301/812] ixgbevf: Fix handling of NAPI budget when multiple queues are enabled per vector [ Upstream commit d0f71afffa1c3d5a36a4a278f1dbbd2643176dc3 ] This is the same patch as for ixgbe but applied differently according to busy polling. See commit 5d6002b7b822c74 ("ixgbe: Fix handling of NAPI budget when multiple queues are enabled per vector") Signed-off-by: William Dauchy Tested-by: Phil Schmitt Signed-off-by: Jeff Kirsher Signed-off-by: Sasha Levin --- drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c index 50bbad37d640..723bda33472a 100644 --- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c +++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c @@ -1014,6 +1014,8 @@ static int ixgbevf_poll(struct napi_struct *napi, int budget) ixgbevf_for_each_ring(ring, q_vector->tx) clean_complete &= ixgbevf_clean_tx_irq(q_vector, ring); + if (budget <= 0) + return budget; #ifdef CONFIG_NET_RX_BUSY_POLL if (!ixgbevf_qv_lock_napi(q_vector)) return budget; From 40e9abaac6531e665cf57e4d456774239264a7cd Mon Sep 17 00:00:00 2001 From: Tariq Toukan Date: Mon, 29 Feb 2016 21:17:10 +0200 Subject: [PATCH 302/812] net/mlx5e: Fix LRO modify [ Upstream commit ab0394fe2c258fdb5086c51a251b28f8ee7ab35c ] Ethtool LRO enable/disable is broken, as of today we only modify TCP TIRs in order to apply the requested configuration. Hardware requires that all TIRs pointing to the same RQ should share the same LRO configuration. For that all other TIRs' LRO fields must be modified as well. Fixes: 5c50368f3831 ('net/mlx5e: Light-weight netdev open/stop') Signed-off-by: Tariq Toukan Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 90e876ecc720..765b069d6a90 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -1304,7 +1304,7 @@ static void mlx5e_build_tir_ctx_lro(void *tirc, struct mlx5e_priv *priv) lro_timer_supported_periods[2])); } -static int mlx5e_modify_tir_lro(struct mlx5e_priv *priv, int tt) +static int mlx5e_modify_tirs_lro(struct mlx5e_priv *priv) { struct mlx5_core_dev *mdev = priv->mdev; @@ -1312,6 +1312,7 @@ static int mlx5e_modify_tir_lro(struct mlx5e_priv *priv, int tt) void *tirc; int inlen; int err; + int tt; inlen = MLX5_ST_SZ_BYTES(modify_tir_in); in = mlx5_vzalloc(inlen); @@ -1323,7 +1324,11 @@ static int mlx5e_modify_tir_lro(struct mlx5e_priv *priv, int tt) mlx5e_build_tir_ctx_lro(tirc, priv); - err = mlx5_core_modify_tir(mdev, priv->tirn[tt], in, inlen); + for (tt = 0; tt < MLX5E_NUM_TT; tt++) { + err = mlx5_core_modify_tir(mdev, priv->tirn[tt], in, inlen); + if (err) + break; + } kvfree(in); @@ -1870,8 +1875,10 @@ static int mlx5e_set_features(struct net_device *netdev, mlx5e_close_locked(priv->netdev); priv->params.lro_en = !!(features & NETIF_F_LRO); - mlx5e_modify_tir_lro(priv, MLX5E_TT_IPV4_TCP); - mlx5e_modify_tir_lro(priv, MLX5E_TT_IPV6_TCP); + err = mlx5e_modify_tirs_lro(priv); + if (err) + mlx5_core_warn(priv->mdev, "lro modify failed, %d\n", + err); if (was_opened) err = mlx5e_open_locked(priv->netdev); From fcac753bf1563ab131dc602e55890fa6b93f5491 Mon Sep 17 00:00:00 2001 From: Tariq Toukan Date: Mon, 29 Feb 2016 21:17:13 +0200 Subject: [PATCH 303/812] net/mlx5e: Correctly handle RSS indirection table when changing number of channels [ Upstream commit 85082dba0a5059c538cfa786d07f5ec5370d22fe ] Upon changing num_channels, reset the RSS indirection table to match the new value. Fixes: 2d75b2bc8a8c ('net/mlx5e: Add ethtool RSS configuration options') Signed-off-by: Tariq Toukan Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/mellanox/mlx5/core/en.h | 2 ++ .../net/ethernet/mellanox/mlx5/core/en_ethtool.c | 2 ++ drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 15 +++++++++++---- 3 files changed, 15 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index 22e72bf1ae48..7a716733d9ca 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -586,6 +586,8 @@ int mlx5e_redirect_rqt(struct mlx5e_priv *priv, enum mlx5e_rqt_ix rqt_ix); int mlx5e_open_locked(struct net_device *netdev); int mlx5e_close_locked(struct net_device *netdev); +void mlx5e_build_default_indir_rqt(u32 *indirection_rqt, int len, + int num_channels); static inline void mlx5e_tx_notify_hw(struct mlx5e_sq *sq, struct mlx5e_tx_wqe *wqe, int bf_sz) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c index 7cc9df717323..7ee301310817 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c @@ -385,6 +385,8 @@ static int mlx5e_set_channels(struct net_device *dev, mlx5e_close_locked(dev); priv->params.num_channels = count; + mlx5e_build_default_indir_rqt(priv->params.indirection_rqt, + MLX5E_INDIR_RQT_SIZE, count); if (was_opened) err = mlx5e_open_locked(dev); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 765b069d6a90..26d25ecdca7e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -1186,7 +1186,6 @@ static void mlx5e_fill_indir_rqt_rqns(struct mlx5e_priv *priv, void *rqtc) ix = mlx5e_bits_invert(i, MLX5E_LOG_INDIR_RQT_SIZE); ix = priv->params.indirection_rqt[ix]; - ix = ix % priv->params.num_channels; MLX5_SET(rqtc, rqtc, rq_num[i], test_bit(MLX5E_STATE_OPENED, &priv->state) ? priv->channel[ix]->rq.rqn : @@ -1983,12 +1982,20 @@ u16 mlx5e_get_max_inline_cap(struct mlx5_core_dev *mdev) 2 /*sizeof(mlx5e_tx_wqe.inline_hdr_start)*/; } +void mlx5e_build_default_indir_rqt(u32 *indirection_rqt, int len, + int num_channels) +{ + int i; + + for (i = 0; i < len; i++) + indirection_rqt[i] = i % num_channels; +} + static void mlx5e_build_netdev_priv(struct mlx5_core_dev *mdev, struct net_device *netdev, int num_channels) { struct mlx5e_priv *priv = netdev_priv(netdev); - int i; priv->params.log_sq_size = MLX5E_PARAMS_DEFAULT_LOG_SQ_SIZE; @@ -2012,8 +2019,8 @@ static void mlx5e_build_netdev_priv(struct mlx5_core_dev *mdev, netdev_rss_key_fill(priv->params.toeplitz_hash_key, sizeof(priv->params.toeplitz_hash_key)); - for (i = 0; i < MLX5E_INDIR_RQT_SIZE; i++) - priv->params.indirection_rqt[i] = i % num_channels; + mlx5e_build_default_indir_rqt(priv->params.indirection_rqt, + MLX5E_INDIR_RQT_SIZE, num_channels); priv->params.lro_wqe_sz = MLX5E_PARAMS_DEFAULT_LRO_WQE_SZ; From 83662744dcf2abe632b25c8bac256de69c99c06b Mon Sep 17 00:00:00 2001 From: Emil Tantilov Date: Fri, 20 Nov 2015 13:02:16 -0800 Subject: [PATCH 304/812] ixgbe: fix RSS limit for X550 [ Upstream commit e9ee3238f8a480bbca58e51d02a93628d7c1f265 ] X550 allows for up to 64 RSS queues, but the driver can have max of 63 (-1 MSIX vector for link). On systems with >= 64 CPUs the driver will set the redirection table for all 64 queues which will result in packets being dropped. Signed-off-by: Emil Tantilov Tested-by: Phil Schmitt Signed-off-by: Jeff Kirsher Signed-off-by: Sasha Levin --- drivers/net/ethernet/intel/ixgbe/ixgbe.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe.h b/drivers/net/ethernet/intel/ixgbe/ixgbe.h index 1d2174526a4c..18e4e4a69262 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe.h +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe.h @@ -312,7 +312,7 @@ enum ixgbe_ring_f_enum { }; #define IXGBE_MAX_RSS_INDICES 16 -#define IXGBE_MAX_RSS_INDICES_X550 64 +#define IXGBE_MAX_RSS_INDICES_X550 63 #define IXGBE_MAX_VMDQ_INDICES 64 #define IXGBE_MAX_FDIR_INDICES 63 /* based on q_vector limit */ #define IXGBE_MAX_FCOE_INDICES 8 From 83aacb9cf479031bb8de3d0a3bcf132cf6b158f0 Mon Sep 17 00:00:00 2001 From: Mark Rustad Date: Fri, 20 Nov 2015 13:12:17 -0800 Subject: [PATCH 305/812] ixgbe: Correct X550EM_x revision check [ Upstream commit 3ca2b2506ec9a3b1615930a6810d30ec9aba10a1 ] The X550EM_x revision check needs to check a value, not just a bit. Use a mask and check the value. Also remove the redundant check inside the ixgbe_enter_lplu_t_x550em, because it can only be called when both the mac type and revision check pass. Signed-off-by: Mark Rustad Tested-by: Phil Schmitt Signed-off-by: Jeff Kirsher Signed-off-by: Sasha Levin --- drivers/net/ethernet/intel/ixgbe/ixgbe_type.h | 2 +- drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c | 9 +++------ 2 files changed, 4 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h b/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h index 995f03107eac..04bc4df82fa7 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h @@ -3508,7 +3508,7 @@ struct ixgbe_info { #define IXGBE_FUSES0_GROUP(_i) (0x11158 + ((_i) * 4)) #define IXGBE_FUSES0_300MHZ BIT(5) -#define IXGBE_FUSES0_REV1 BIT(6) +#define IXGBE_FUSES0_REV_MASK (3 << 6) #define IXGBE_KRM_PORT_CAR_GEN_CTRL(P) ((P) ? 0x8010 : 0x4010) #define IXGBE_KRM_LINK_CTRL_1(P) ((P) ? 0x820C : 0x420C) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c index a75f2e3ce86f..ffd2e74e5638 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c @@ -1873,10 +1873,6 @@ static s32 ixgbe_enter_lplu_t_x550em(struct ixgbe_hw *hw) u32 save_autoneg; bool link_up; - /* SW LPLU not required on later HW revisions. */ - if (IXGBE_FUSES0_REV1 & IXGBE_READ_REG(hw, IXGBE_FUSES0_GROUP(0))) - return 0; - /* If blocked by MNG FW, then don't restart AN */ if (ixgbe_check_reset_blocked(hw)) return 0; @@ -2030,8 +2026,9 @@ static s32 ixgbe_init_phy_ops_X550em(struct ixgbe_hw *hw) } /* setup SW LPLU only for first revision */ - if (!(IXGBE_FUSES0_REV1 & IXGBE_READ_REG(hw, - IXGBE_FUSES0_GROUP(0)))) + if (hw->mac.type == ixgbe_mac_X550EM_x && + !(IXGBE_READ_REG(hw, IXGBE_FUSES0_GROUP(0)) & + IXGBE_FUSES0_REV_MASK)) phy->ops.enter_lplu = ixgbe_enter_lplu_t_x550em; phy->ops.handle_lasi = ixgbe_handle_lasi_ext_t_x550em; From c21f80e9661b5363638eef93be4bf8ed4fea7577 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Wed, 7 Sep 2016 15:45:31 +0200 Subject: [PATCH 306/812] ALSA: timer: Fix zero-division by continue of uninitialized instance [ Upstream commit 9f8a7658bcafb2a7853f7a2eae8a94e87e6e695b ] When a user timer instance is continued without the explicit start beforehand, the system gets eventually zero-division error like: divide error: 0000 [#1] SMP DEBUG_PAGEALLOC KASAN CPU: 1 PID: 27320 Comm: syz-executor Not tainted 4.8.0-rc3-next-20160825+ #8 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011 task: ffff88003c9b2280 task.stack: ffff880027280000 RIP: 0010:[] [< inline >] ktime_divns include/linux/ktime.h:195 RIP: 0010:[] [] snd_hrtimer_callback+0x1bc/0x3c0 sound/core/hrtimer.c:62 Call Trace: [< inline >] __run_hrtimer kernel/time/hrtimer.c:1238 [] __hrtimer_run_queues+0x325/0xe70 kernel/time/hrtimer.c:1302 [] hrtimer_interrupt+0x18b/0x420 kernel/time/hrtimer.c:1336 [] local_apic_timer_interrupt+0x6f/0xe0 arch/x86/kernel/apic/apic.c:933 [] smp_apic_timer_interrupt+0x76/0xa0 arch/x86/kernel/apic/apic.c:957 [] apic_timer_interrupt+0x8c/0xa0 arch/x86/entry/entry_64.S:487 ..... Although a similar issue was spotted and a fix patch was merged in commit [6b760bb2c63a: ALSA: timer: fix division by zero after SNDRV_TIMER_IOCTL_CONTINUE], it seems covering only a part of iceberg. In this patch, we fix the issue a bit more drastically. Basically the continue of an uninitialized timer is supposed to be a fresh start, so we do it for user timers. For the direct snd_timer_continue() call, there is no way to pass the initial tick value, so we kick out for the uninitialized case. Reported-by: Dmitry Vyukov Cc: Signed-off-by: Takashi Iwai Signed-off-by: Sasha Levin --- sound/core/timer.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/sound/core/timer.c b/sound/core/timer.c index ef850a99d64a..f989adb98a22 100644 --- a/sound/core/timer.c +++ b/sound/core/timer.c @@ -35,6 +35,9 @@ #include #include +/* internal flags */ +#define SNDRV_TIMER_IFLG_PAUSED 0x00010000 + #if IS_ENABLED(CONFIG_SND_HRTIMER) #define DEFAULT_TIMER_LIMIT 4 #elif IS_ENABLED(CONFIG_SND_RTCTIMER) @@ -547,6 +550,10 @@ static int snd_timer_stop1(struct snd_timer_instance *timeri, bool stop) } } timeri->flags &= ~(SNDRV_TIMER_IFLG_RUNNING | SNDRV_TIMER_IFLG_START); + if (stop) + timeri->flags &= ~SNDRV_TIMER_IFLG_PAUSED; + else + timeri->flags |= SNDRV_TIMER_IFLG_PAUSED; snd_timer_notify1(timeri, stop ? SNDRV_TIMER_EVENT_STOP : SNDRV_TIMER_EVENT_PAUSE); unlock: @@ -608,6 +615,10 @@ int snd_timer_stop(struct snd_timer_instance *timeri) */ int snd_timer_continue(struct snd_timer_instance *timeri) { + /* timer can continue only after pause */ + if (!(timeri->flags & SNDRV_TIMER_IFLG_PAUSED)) + return -EINVAL; + if (timeri->flags & SNDRV_TIMER_IFLG_SLAVE) return snd_timer_start_slave(timeri, false); else @@ -1837,6 +1848,9 @@ static int snd_timer_user_continue(struct file *file) tu = file->private_data; if (!tu->timeri) return -EBADFD; + /* start timer instead of continue if it's not used before */ + if (!(tu->timeri->flags & SNDRV_TIMER_IFLG_PAUSED)) + return snd_timer_user_start(file); tu->timeri->lost = 0; return (err = snd_timer_continue(tu->timeri)) < 0 ? err : 0; } From 152368987f1844ca5728ccbac5e8d7a7de8a7457 Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Fri, 30 Sep 2016 11:11:07 +0200 Subject: [PATCH 307/812] vti6: flush x-netns xfrm cache when vti interface is removed [ Upstream commit 7f92083eb58f85ea114d97f65fcbe22be5b0468d ] This is the same fix than commit a5d0dc810abf ("vti: flush x-netns xfrm cache when vti interface is removed") This patch fixes a refcnt problem when a x-netns vti6 interface is removed: unregister_netdevice: waiting for vti6_test to become free. Usage count = 1 Here is a script to reproduce the problem: ip link set dev ntfp2 up ip addr add dev ntfp2 2001::1/64 ip link add vti6_test type vti6 local 2001::1 remote 2001::2 key 1 ip netns add secure ip link set vti6_test netns secure ip netns exec secure ip link set vti6_test up ip netns exec secure ip link s lo up ip netns exec secure ip addr add dev vti6_test 2003::1/64 ip -6 xfrm policy add dir out tmpl src 2001::1 dst 2001::2 proto esp \ mode tunnel mark 1 ip -6 xfrm policy add dir in tmpl src 2001::2 dst 2001::1 proto esp \ mode tunnel mark 1 ip xfrm state add src 2001::1 dst 2001::2 proto esp spi 1 mode tunnel \ enc des3_ede 0x112233445566778811223344556677881122334455667788 mark 1 ip xfrm state add src 2001::2 dst 2001::1 proto esp spi 1 mode tunnel \ enc des3_ede 0x112233445566778811223344556677881122334455667788 mark 1 ip netns exec secure ping6 -c 4 2003::2 ip netns del secure CC: Lance Richardson Signed-off-by: Nicolas Dichtel Acked-by: Lance Richardson Signed-off-by: Steffen Klassert Signed-off-by: Sasha Levin --- net/ipv6/ip6_vti.c | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c index 60d4052d97a6..51da5987952c 100644 --- a/net/ipv6/ip6_vti.c +++ b/net/ipv6/ip6_vti.c @@ -1140,6 +1140,33 @@ static struct xfrm6_protocol vti_ipcomp6_protocol __read_mostly = { .priority = 100, }; +static bool is_vti6_tunnel(const struct net_device *dev) +{ + return dev->netdev_ops == &vti6_netdev_ops; +} + +static int vti6_device_event(struct notifier_block *unused, + unsigned long event, void *ptr) +{ + struct net_device *dev = netdev_notifier_info_to_dev(ptr); + struct ip6_tnl *t = netdev_priv(dev); + + if (!is_vti6_tunnel(dev)) + return NOTIFY_DONE; + + switch (event) { + case NETDEV_DOWN: + if (!net_eq(t->net, dev_net(dev))) + xfrm_garbage_collect(t->net); + break; + } + return NOTIFY_DONE; +} + +static struct notifier_block vti6_notifier_block __read_mostly = { + .notifier_call = vti6_device_event, +}; + /** * vti6_tunnel_init - register protocol and reserve needed resources * @@ -1150,6 +1177,8 @@ static int __init vti6_tunnel_init(void) const char *msg; int err; + register_netdevice_notifier(&vti6_notifier_block); + msg = "tunnel device"; err = register_pernet_device(&vti6_net_ops); if (err < 0) @@ -1182,6 +1211,7 @@ static int __init vti6_tunnel_init(void) xfrm_proto_esp_failed: unregister_pernet_device(&vti6_net_ops); pernet_dev_failed: + unregister_netdevice_notifier(&vti6_notifier_block); pr_err("vti6 init: failed to register %s\n", msg); return err; } @@ -1196,6 +1226,7 @@ static void __exit vti6_tunnel_cleanup(void) xfrm6_protocol_deregister(&vti_ah6_protocol, IPPROTO_AH); xfrm6_protocol_deregister(&vti_esp6_protocol, IPPROTO_ESP); unregister_pernet_device(&vti6_net_ops); + unregister_netdevice_notifier(&vti6_notifier_block); } module_init(vti6_tunnel_init); From 02bc5312bb66a828f21d85af14425fd45eec08f8 Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Tue, 29 Mar 2016 14:55:22 -0700 Subject: [PATCH 308/812] gro: Allow tunnel stacking in the case of FOU/GUE [ Upstream commit c3483384ee511ee2af40b4076366cd82a6a47b86 ] This patch should fix the issues seen with a recent fix to prevent tunnel-in-tunnel frames from being generated with GRO. The fix itself is correct for now as long as we do not add any devices that support NETIF_F_GSO_GRE_CSUM. When such a device is added it could have the potential to mess things up due to the fact that the outer transport header points to the outer UDP header and not the GRE header as would be expected. Fixes: fac8e0f579695 ("tunnels: Don't apply GRO to multiple layers of encapsulation.") Signed-off-by: Alexander Duyck Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/ipv4/fou.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/net/ipv4/fou.c b/net/ipv4/fou.c index 08d8ee124538..d83888bc33d3 100644 --- a/net/ipv4/fou.c +++ b/net/ipv4/fou.c @@ -195,6 +195,14 @@ static struct sk_buff **fou_gro_receive(struct sk_buff **head, u8 proto = NAPI_GRO_CB(skb)->proto; const struct net_offload **offloads; + /* We can clear the encap_mark for FOU as we are essentially doing + * one of two possible things. We are either adding an L4 tunnel + * header to the outer L3 tunnel header, or we are are simply + * treating the GRE tunnel header as though it is a UDP protocol + * specific header such as VXLAN or GENEVE. + */ + NAPI_GRO_CB(skb)->encap_mark = 0; + rcu_read_lock(); offloads = NAPI_GRO_CB(skb)->is_ipv6 ? inet6_offloads : inet_offloads; ops = rcu_dereference(offloads[proto]); @@ -354,6 +362,14 @@ static struct sk_buff **gue_gro_receive(struct sk_buff **head, } } + /* We can clear the encap_mark for GUE as we are essentially doing + * one of two possible things. We are either adding an L4 tunnel + * header to the outer L3 tunnel header, or we are are simply + * treating the GRE tunnel header as though it is a UDP protocol + * specific header such as VXLAN or GENEVE. + */ + NAPI_GRO_CB(skb)->encap_mark = 0; + rcu_read_lock(); offloads = NAPI_GRO_CB(skb)->is_ipv6 ? inet6_offloads : inet_offloads; ops = rcu_dereference(offloads[guehdr->proto_ctype]); From 44656b939099b3381134dfe7c1cf5ccfc08d3272 Mon Sep 17 00:00:00 2001 From: "Peter S. Housel" Date: Mon, 12 Jun 2017 11:46:22 +0100 Subject: [PATCH 309/812] brcmfmac: Fix glom_skb leak in brcmf_sdiod_recv_chain [ Upstream commit 5ea59db8a375216e6c915c5586f556766673b5a7 ] An earlier change to this function (3bdae810721b) fixed a leak in the case of an unsuccessful call to brcmf_sdiod_buffrw(). However, the glom_skb buffer, used for emulating a scattering read, is never used or referenced after its contents are copied into the destination buffers, and therefore always needs to be freed by the end of the function. Fixes: 3bdae810721b ("brcmfmac: Fix glob_skb leak in brcmf_sdiod_recv_chain") Fixes: a413e39a38573 ("brcmfmac: fix brcmf_sdcard_recv_chain() for host without sg support") Cc: stable@vger.kernel.org # 4.9.x- Signed-off-by: Peter S. Housel Signed-off-by: Arend van Spriel Signed-off-by: Kalle Valo Signed-off-by: Sasha Levin --- drivers/net/wireless/brcm80211/brcmfmac/bcmsdh.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/net/wireless/brcm80211/brcmfmac/bcmsdh.c b/drivers/net/wireless/brcm80211/brcmfmac/bcmsdh.c index 91da67657f81..72e1796c8167 100644 --- a/drivers/net/wireless/brcm80211/brcmfmac/bcmsdh.c +++ b/drivers/net/wireless/brcm80211/brcmfmac/bcmsdh.c @@ -705,7 +705,7 @@ int brcmf_sdiod_recv_pkt(struct brcmf_sdio_dev *sdiodev, struct sk_buff *pkt) int brcmf_sdiod_recv_chain(struct brcmf_sdio_dev *sdiodev, struct sk_buff_head *pktq, uint totlen) { - struct sk_buff *glom_skb; + struct sk_buff *glom_skb = NULL; struct sk_buff *skb; u32 addr = sdiodev->sbwad; int err = 0; @@ -726,10 +726,8 @@ int brcmf_sdiod_recv_chain(struct brcmf_sdio_dev *sdiodev, return -ENOMEM; err = brcmf_sdiod_buffrw(sdiodev, SDIO_FUNC_2, false, addr, glom_skb); - if (err) { - brcmu_pkt_buf_free_skb(glom_skb); + if (err) goto done; - } skb_queue_walk(pktq, skb) { memcpy(skb->data, glom_skb->data, skb->len); @@ -740,6 +738,7 @@ int brcmf_sdiod_recv_chain(struct brcmf_sdio_dev *sdiodev, pktq); done: + brcmu_pkt_buf_free_skb(glom_skb); return err; } From 414fb21bd5e0b72ff76045e3b76cea9629b47ea0 Mon Sep 17 00:00:00 2001 From: Guillaume Nault Date: Tue, 29 Nov 2016 13:09:45 +0100 Subject: [PATCH 310/812] l2tp: hold socket before dropping lock in l2tp_ip{, 6}_recv() [ Upstream commit a3c18422a4b4e108bcf6a2328f48867e1003fd95 ] Socket must be held while under the protection of the l2tp lock; there is no guarantee that sk remains valid after the read_unlock_bh() call. Same issue for l2tp_ip and l2tp_ip6. Signed-off-by: Guillaume Nault Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/l2tp/l2tp_ip.c | 11 ++++++----- net/l2tp/l2tp_ip6.c | 11 ++++++----- 2 files changed, 12 insertions(+), 10 deletions(-) diff --git a/net/l2tp/l2tp_ip.c b/net/l2tp/l2tp_ip.c index 48ab93842322..c7e6098c924e 100644 --- a/net/l2tp/l2tp_ip.c +++ b/net/l2tp/l2tp_ip.c @@ -184,14 +184,15 @@ static int l2tp_ip_recv(struct sk_buff *skb) read_lock_bh(&l2tp_ip_lock); sk = __l2tp_ip_bind_lookup(net, iph->daddr, 0, tunnel_id); + if (!sk) { + read_unlock_bh(&l2tp_ip_lock); + goto discard; + } + + sock_hold(sk); read_unlock_bh(&l2tp_ip_lock); } - if (sk == NULL) - goto discard; - - sock_hold(sk); - if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb)) goto discard_put; diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c index bcdab1cba773..5fe0a6f6af3d 100644 --- a/net/l2tp/l2tp_ip6.c +++ b/net/l2tp/l2tp_ip6.c @@ -196,14 +196,15 @@ static int l2tp_ip6_recv(struct sk_buff *skb) read_lock_bh(&l2tp_ip6_lock); sk = __l2tp_ip6_bind_lookup(&init_net, &iph->daddr, 0, tunnel_id); + if (!sk) { + read_unlock_bh(&l2tp_ip6_lock); + goto discard; + } + + sock_hold(sk); read_unlock_bh(&l2tp_ip6_lock); } - if (sk == NULL) - goto discard; - - sock_hold(sk); - if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb)) goto discard_put; From 27e13827973eccb7fcbf3c70832305082aa082c2 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Tue, 8 Aug 2017 17:42:46 -0500 Subject: [PATCH 311/812] tty: serial: sprd: fix error return code in sprd_probe() [ Upstream commit ec085c5a51b768947ca481f90b66653e36b3c566 ] platform_get_irq() returns an error code, but the sprd_serial driver ignores it and always returns -ENODEV. This is not correct and, prevents -EPROBE_DEFER from being propagated properly. Also, notice that platform_get_irq() no longer returns 0 on error: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=e330b9a6bb35dc7097a4f02cb1ae7b6f96df92af Print and propagate the return value of platform_get_irq on failure. This issue was detected with the help of Coccinelle. Signed-off-by: Gustavo A. R. Silva Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/tty/serial/sprd_serial.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/tty/serial/sprd_serial.c b/drivers/tty/serial/sprd_serial.c index 9dbae01d41ce..1e302caaa450 100644 --- a/drivers/tty/serial/sprd_serial.c +++ b/drivers/tty/serial/sprd_serial.c @@ -731,8 +731,8 @@ static int sprd_probe(struct platform_device *pdev) irq = platform_get_irq(pdev, 0); if (irq < 0) { - dev_err(&pdev->dev, "not provide irq resource\n"); - return -ENODEV; + dev_err(&pdev->dev, "not provide irq resource: %d\n", irq); + return irq; } up->irq = irq; From 9954f1884eec0c584136a08290a62d996fd010ea Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Mon, 21 Aug 2017 16:49:58 +0200 Subject: [PATCH 312/812] video: fbdev: pxa3xx_gcu: fix error return code in pxa3xx_gcu_probe() [ Upstream commit 7588f1ecc5f0c914e669d8afb6525f47cd1c4355 ] platform_get_irq() returns an error code, but the pxa3xx_gcu driver ignores it and always returns -ENODEV. This is not correct and, prevents -EPROBE_DEFER from being propagated properly. Also, notice that platform_get_irq() no longer returns 0 on error: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=e330b9a6bb35dc7097a4f02cb1ae7b6f96df92af Print and propagate the return value of platform_get_irq on failure. This issue was detected with the help of Coccinelle. Signed-off-by: Gustavo A. R. Silva Signed-off-by: Bartlomiej Zolnierkiewicz Signed-off-by: Sasha Levin --- drivers/video/fbdev/pxa3xx-gcu.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/video/fbdev/pxa3xx-gcu.c b/drivers/video/fbdev/pxa3xx-gcu.c index 50bce45e7f3d..933619da1a94 100644 --- a/drivers/video/fbdev/pxa3xx-gcu.c +++ b/drivers/video/fbdev/pxa3xx-gcu.c @@ -626,8 +626,8 @@ static int pxa3xx_gcu_probe(struct platform_device *pdev) /* request the IRQ */ irq = platform_get_irq(pdev, 0); if (irq < 0) { - dev_err(dev, "no IRQ defined\n"); - return -ENODEV; + dev_err(dev, "no IRQ defined: %d\n", irq); + return irq; } ret = devm_request_irq(dev, irq, pxa3xx_gcu_handle_irq, From cc014084cb850df29b19e4bf4419f129c5cfea95 Mon Sep 17 00:00:00 2001 From: Mike Kravetz Date: Wed, 31 Aug 2016 13:48:19 -0700 Subject: [PATCH 313/812] sparc64 mm: Fix more TSB sizing issues [ Upstream commit 1e953d846ac015fbfcf09c857e8f893924cb629c ] Commit af1b1a9b36b8 ("sparc64 mm: Fix base TSB sizing when hugetlb pages are used") addressed the difference between hugetlb and THP pages when computing TSB sizes. The following additional issues were also discovered while working with the code. In order to save memory, THP makes use of a huge zero page. This huge zero page does not count against a task's RSS, but it does consume TSB entries. This is similar to hugetlb pages. Therefore, count huge zero page entries in hugetlb_pte_count. Accounting of THP pages is done in the routine set_pmd_at(). Unfortunately, this does not catch the case where a THP page is split. To handle this case, decrement the count in pmdp_invalidate(). pmdp_invalidate is only called when splitting a THP. However, 'sanity checks' are added in case it is ever called for other purposes. A more general issue exists with HPAGE_SIZE accounting. hugetlb_pte_count tracks the number of HPAGE_SIZE (8M) pages. This value is used to size the TSB for HPAGE_SIZE pages. However, each HPAGE_SIZE page consists of two REAL_HPAGE_SIZE (4M) pages. The TSB contains an entry for each REAL_HPAGE_SIZE page. Therefore, the number of REAL_HPAGE_SIZE pages should be used to size the huge page TSB. A new compile time constant REAL_HPAGE_PER_HPAGE is used to multiply hugetlb_pte_count before sizing the TSB. Changes from V1 - Fixed build issue if hugetlb or THP not configured Signed-off-by: Mike Kravetz Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- arch/sparc/include/asm/page_64.h | 1 + arch/sparc/mm/fault_64.c | 1 + arch/sparc/mm/tlb.c | 35 ++++++++++++++++++++++++++++---- arch/sparc/mm/tsb.c | 18 ++++++++++------ 4 files changed, 45 insertions(+), 10 deletions(-) diff --git a/arch/sparc/include/asm/page_64.h b/arch/sparc/include/asm/page_64.h index 8c2a8c937540..c1263fc390db 100644 --- a/arch/sparc/include/asm/page_64.h +++ b/arch/sparc/include/asm/page_64.h @@ -25,6 +25,7 @@ #define HPAGE_MASK (~(HPAGE_SIZE - 1UL)) #define HUGETLB_PAGE_ORDER (HPAGE_SHIFT - PAGE_SHIFT) #define HAVE_ARCH_HUGETLB_UNMAPPED_AREA +#define REAL_HPAGE_PER_HPAGE (_AC(1,UL) << (HPAGE_SHIFT - REAL_HPAGE_SHIFT)) #endif #ifndef __ASSEMBLY__ diff --git a/arch/sparc/mm/fault_64.c b/arch/sparc/mm/fault_64.c index e15f33715103..b01ec72522cb 100644 --- a/arch/sparc/mm/fault_64.c +++ b/arch/sparc/mm/fault_64.c @@ -487,6 +487,7 @@ asmlinkage void __kprobes do_sparc64_fault(struct pt_regs *regs) tsb_grow(mm, MM_TSB_BASE, mm_rss); #if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE) mm_rss = mm->context.hugetlb_pte_count + mm->context.thp_pte_count; + mm_rss *= REAL_HPAGE_PER_HPAGE; if (unlikely(mm_rss > mm->context.tsb_block[MM_TSB_HUGE].tsb_rss_limit)) { if (mm->context.tsb_block[MM_TSB_HUGE].tsb) diff --git a/arch/sparc/mm/tlb.c b/arch/sparc/mm/tlb.c index 3659d37b4d81..c56a195c9071 100644 --- a/arch/sparc/mm/tlb.c +++ b/arch/sparc/mm/tlb.c @@ -174,10 +174,25 @@ void set_pmd_at(struct mm_struct *mm, unsigned long addr, return; if ((pmd_val(pmd) ^ pmd_val(orig)) & _PAGE_PMD_HUGE) { - if (pmd_val(pmd) & _PAGE_PMD_HUGE) - mm->context.thp_pte_count++; - else - mm->context.thp_pte_count--; + /* + * Note that this routine only sets pmds for THP pages. + * Hugetlb pages are handled elsewhere. We need to check + * for huge zero page. Huge zero pages are like hugetlb + * pages in that there is no RSS, but there is the need + * for TSB entries. So, huge zero page counts go into + * hugetlb_pte_count. + */ + if (pmd_val(pmd) & _PAGE_PMD_HUGE) { + if (is_huge_zero_page(pmd_page(pmd))) + mm->context.hugetlb_pte_count++; + else + mm->context.thp_pte_count++; + } else { + if (is_huge_zero_page(pmd_page(orig))) + mm->context.hugetlb_pte_count--; + else + mm->context.thp_pte_count--; + } /* Do not try to allocate the TSB hash table if we * don't have one already. We have various locks held @@ -204,6 +219,9 @@ void set_pmd_at(struct mm_struct *mm, unsigned long addr, } } +/* + * This routine is only called when splitting a THP + */ void pmdp_invalidate(struct vm_area_struct *vma, unsigned long address, pmd_t *pmdp) { @@ -213,6 +231,15 @@ void pmdp_invalidate(struct vm_area_struct *vma, unsigned long address, set_pmd_at(vma->vm_mm, address, pmdp, entry); flush_tlb_range(vma, address, address + HPAGE_PMD_SIZE); + + /* + * set_pmd_at() will not be called in a way to decrement + * thp_pte_count when splitting a THP, so do it now. + * Sanity check pmd before doing the actual decrement. + */ + if ((pmd_val(entry) & _PAGE_PMD_HUGE) && + !is_huge_zero_page(pmd_page(entry))) + (vma->vm_mm)->context.thp_pte_count--; } void pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp, diff --git a/arch/sparc/mm/tsb.c b/arch/sparc/mm/tsb.c index 266411291634..84cd593117a6 100644 --- a/arch/sparc/mm/tsb.c +++ b/arch/sparc/mm/tsb.c @@ -489,8 +489,10 @@ void tsb_grow(struct mm_struct *mm, unsigned long tsb_index, unsigned long rss) int init_new_context(struct task_struct *tsk, struct mm_struct *mm) { + unsigned long mm_rss = get_mm_rss(mm); #if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE) - unsigned long total_huge_pte_count; + unsigned long saved_hugetlb_pte_count; + unsigned long saved_thp_pte_count; #endif unsigned int i; @@ -503,10 +505,12 @@ int init_new_context(struct task_struct *tsk, struct mm_struct *mm) * will re-increment the counters as the parent PTEs are * copied into the child address space. */ - total_huge_pte_count = mm->context.hugetlb_pte_count + - mm->context.thp_pte_count; + saved_hugetlb_pte_count = mm->context.hugetlb_pte_count; + saved_thp_pte_count = mm->context.thp_pte_count; mm->context.hugetlb_pte_count = 0; mm->context.thp_pte_count = 0; + + mm_rss -= saved_thp_pte_count * (HPAGE_SIZE / PAGE_SIZE); #endif /* copy_mm() copies over the parent's mm_struct before calling @@ -519,11 +523,13 @@ int init_new_context(struct task_struct *tsk, struct mm_struct *mm) /* If this is fork, inherit the parent's TSB size. We would * grow it to that size on the first page fault anyways. */ - tsb_grow(mm, MM_TSB_BASE, get_mm_rss(mm)); + tsb_grow(mm, MM_TSB_BASE, mm_rss); #if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE) - if (unlikely(total_huge_pte_count)) - tsb_grow(mm, MM_TSB_HUGE, total_huge_pte_count); + if (unlikely(saved_hugetlb_pte_count + saved_thp_pte_count)) + tsb_grow(mm, MM_TSB_HUGE, + (saved_hugetlb_pte_count + saved_thp_pte_count) * + REAL_HPAGE_PER_HPAGE); #endif if (unlikely(!mm->context.tsb_block[MM_TSB_BASE].tsb)) From b99152a593de5df769c889177e731565845aefbd Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Tue, 8 Aug 2017 00:08:06 -0500 Subject: [PATCH 314/812] gpu: host1x: fix error return code in host1x_probe() [ Upstream commit 7b2c63de20080c18d0de35b292ad61fc9bc8328e ] platform_get_irq() returns an error code, but the host1x driver ignores it and always returns -ENXIO. This is not correct and, prevents -EPROBE_DEFER from being propagated properly. Notice that platform_get_irq() no longer returns 0 on error: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=e330b9a6bb35dc7097a4f02cb1ae7b6f96df92af Print and propagate the return value of platform_get_irq on failure. This issue was detected with the help of Coccinelle. Signed-off-by: Gustavo A. R. Silva Signed-off-by: Thierry Reding Signed-off-by: Sasha Levin --- drivers/gpu/host1x/dev.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/host1x/dev.c b/drivers/gpu/host1x/dev.c index 53d3d1d45b48..ce1b10a2ae85 100644 --- a/drivers/gpu/host1x/dev.c +++ b/drivers/gpu/host1x/dev.c @@ -116,8 +116,8 @@ static int host1x_probe(struct platform_device *pdev) syncpt_irq = platform_get_irq(pdev, 0); if (syncpt_irq < 0) { - dev_err(&pdev->dev, "failed to get IRQ\n"); - return -ENXIO; + dev_err(&pdev->dev, "failed to get IRQ: %d\n", syncpt_irq); + return syncpt_irq; } host = devm_kzalloc(&pdev->dev, sizeof(*host), GFP_KERNEL); From 20d01c513f8d00633a0778df60d07d79878f24fc Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Fri, 4 Aug 2017 09:47:52 -0700 Subject: [PATCH 315/812] sparc64: Fix exception handling in UltraSPARC-III memcpy. [ Upstream commit 0ede1c401332173ab0693121dc6cde04a4dbf131 ] Mikael Pettersson reported that some test programs in the strace-4.18 testsuite cause an OOPS. After some debugging it turns out that garbage values are returned when an exception occurs, causing the fixup memset() to be run with bogus arguments. The problem is that two of the exception handler stubs write the successfully copied length into the wrong register. Fixes: ee841d0aff64 ("sparc64: Convert U3copy_{from,to}_user to accurate exception reporting.") Reported-by: Mikael Pettersson Tested-by: Mikael Pettersson Reviewed-by: Sam Ravnborg Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- arch/sparc/lib/U3memcpy.S | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/sparc/lib/U3memcpy.S b/arch/sparc/lib/U3memcpy.S index 54f98706b03b..5a8cb37f0a3b 100644 --- a/arch/sparc/lib/U3memcpy.S +++ b/arch/sparc/lib/U3memcpy.S @@ -145,13 +145,13 @@ ENDPROC(U3_retl_o2_plus_GS_plus_0x08) ENTRY(U3_retl_o2_and_7_plus_GS) and %o2, 7, %o2 retl - add %o2, GLOBAL_SPARE, %o2 + add %o2, GLOBAL_SPARE, %o0 ENDPROC(U3_retl_o2_and_7_plus_GS) ENTRY(U3_retl_o2_and_7_plus_GS_plus_8) add GLOBAL_SPARE, 8, GLOBAL_SPARE and %o2, 7, %o2 retl - add %o2, GLOBAL_SPARE, %o2 + add %o2, GLOBAL_SPARE, %o0 ENDPROC(U3_retl_o2_and_7_plus_GS_plus_8) #endif From 7f71f461af4a3e8e29e543d15fca150a2057faf4 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Thu, 3 Aug 2017 17:00:01 -0500 Subject: [PATCH 316/812] gpio: msic: fix error return code in platform_msic_gpio_probe() [ Upstream commit ca1f3ae3154ad6d08caa740c99be0d86644a4e44 ] platform_get_irq() returns an error code, but the gpio-msic driver ignores it and always returns -EINVAL. This is not correct, and prevents -EPROBE_DEFER from being propagated properly. Notice that platform_get_irq() no longer returns 0 on error: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=e330b9a6bb35dc7097a4f02cb1ae7b6f96df92af Print and propagate the return value of platform_get_irq on failure. Signed-off-by: Gustavo A. R. Silva Signed-off-by: Linus Walleij Signed-off-by: Sasha Levin --- drivers/gpio/gpio-msic.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpio/gpio-msic.c b/drivers/gpio/gpio-msic.c index 22523aae8abe..3abf066f93d3 100644 --- a/drivers/gpio/gpio-msic.c +++ b/drivers/gpio/gpio-msic.c @@ -266,8 +266,8 @@ static int platform_msic_gpio_probe(struct platform_device *pdev) int i; if (irq < 0) { - dev_err(dev, "no IRQ line\n"); - return -EINVAL; + dev_err(dev, "no IRQ line: %d\n", irq); + return irq; } if (!pdata || !pdata->gpio_base) { From 047719209751af5133cf7614f2d6edce495d11e5 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Mon, 7 Aug 2017 23:14:23 -0500 Subject: [PATCH 317/812] usb: imx21-hcd: fix error return code in imx21_probe() [ Upstream commit 46edf52d08342b3dc1f9a61c5200ab8b1c0f5a37 ] platform_get_irq() returns an error code, but the imx21-hcd driver ignores it and always returns -ENXIO. This is not correct, and prevents -EPROBE_DEFER from being propagated properly. Notice that platform_get_irq() no longer returns 0 on error: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=e330b9a6bb35dc7097a4f02cb1ae7b6f96df92af Print error message and propagate the return value of platform_get_irq on failure. This issue was detected with the help of Coccinelle. Signed-off-by: Gustavo A. R. Silva Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/usb/host/imx21-hcd.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/usb/host/imx21-hcd.c b/drivers/usb/host/imx21-hcd.c index f542045dc2a6..e25d72e0527f 100644 --- a/drivers/usb/host/imx21-hcd.c +++ b/drivers/usb/host/imx21-hcd.c @@ -1849,8 +1849,10 @@ static int imx21_probe(struct platform_device *pdev) if (!res) return -ENODEV; irq = platform_get_irq(pdev, 0); - if (irq < 0) - return -ENXIO; + if (irq < 0) { + dev_err(&pdev->dev, "Failed to get IRQ: %d\n", irq); + return irq; + } hcd = usb_create_hcd(&imx21_hc_driver, &pdev->dev, dev_name(&pdev->dev)); From fd5939e73ea78ac33e597de3b7677c3d7dc50e63 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Tue, 8 Aug 2017 17:26:13 -0500 Subject: [PATCH 318/812] usb: ehci-omap: fix error return code in ehci_hcd_omap_probe() [ Upstream commit 99dbff202e28ad1dadf55b058bcae7908678e963 ] platform_get_irq() returns an error code, but the ehci-omap driver ignores it and always returns -ENODEV. This is not correct and, prevents -EPROBE_DEFER from being propagated properly. Also, notice that platform_get_irq() no longer returns 0 on error: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=e330b9a6bb35dc7097a4f02cb1ae7b6f96df92af Print and propagate the return value of platform_get_irq on failure. This issue was detected with the help of Coccinelle. Signed-off-by: Gustavo A. R. Silva Acked-by: Alan Stern Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/usb/host/ehci-omap.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/usb/host/ehci-omap.c b/drivers/usb/host/ehci-omap.c index a24720beb39d..cccde8217f28 100644 --- a/drivers/usb/host/ehci-omap.c +++ b/drivers/usb/host/ehci-omap.c @@ -130,8 +130,8 @@ static int ehci_hcd_omap_probe(struct platform_device *pdev) irq = platform_get_irq(pdev, 0); if (irq < 0) { - dev_err(dev, "EHCI irq failed\n"); - return -ENODEV; + dev_err(dev, "EHCI irq failed: %d\n", irq); + return irq; } res = platform_get_resource(pdev, IORESOURCE_MEM, 0); From 18747e4caac9c6d7352e2af9c57ccaf0e94776f8 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Mon, 7 Aug 2017 23:25:34 -0500 Subject: [PATCH 319/812] usb: dwc3: omap: fix error return code in dwc3_omap_probe() [ Upstream commit 0ae99ecba7928c7bc66cf14d8a88f0b6ec9fc78e ] platform_get_irq() returns an error code, but the dwc3-omap driver ignores it and always returns -EINVAL. This is not correct and, prevents -EPROBE_DEFER from being propagated properly. Notice that platform_get_irq() no longer returns 0 on error: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=e330b9a6bb35dc7097a4f02cb1ae7b6f96df92af Print and propagate the return value of platform_get_irq on failure. This issue was detected with the help of Coccinelle. Signed-off-by: Gustavo A. R. Silva Signed-off-by: Felipe Balbi Signed-off-by: Sasha Levin --- drivers/usb/dwc3/dwc3-omap.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/usb/dwc3/dwc3-omap.c b/drivers/usb/dwc3/dwc3-omap.c index 22e9606d8e08..9078af0ce06c 100644 --- a/drivers/usb/dwc3/dwc3-omap.c +++ b/drivers/usb/dwc3/dwc3-omap.c @@ -469,8 +469,8 @@ static int dwc3_omap_probe(struct platform_device *pdev) irq = platform_get_irq(pdev, 0); if (irq < 0) { - dev_err(dev, "missing IRQ resource\n"); - return -EINVAL; + dev_err(dev, "missing IRQ resource: %d\n", irq); + return irq; } res = platform_get_resource(pdev, IORESOURCE_MEM, 0); From 8025a41779ebae3670dc060ff759be2fa4bcc1f0 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Mon, 7 Aug 2017 23:59:44 -0500 Subject: [PATCH 320/812] spi/bcm63xx-hspi: fix error return code in bcm63xx_hsspi_probe() [ Upstream commit 378da4a65f3a0390837b38145bb5d8c2d20c2cf7 ] platform_get_irq() returns an error code, but the spi-bcm63xx-hsspi driver ignores it and always returns -ENXIO. This is not correct and, prevents -EPROBE_DEFER from being propagated properly. Notice that platform_get_irq() no longer returns 0 on error: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=e330b9a6bb35dc7097a4f02cb1ae7b6f96df92af Print and propagate the return value of platform_get_irq on failure. This issue was detected with the help of Coccinelle. Signed-off-by: Gustavo A. R. Silva Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- drivers/spi/spi-bcm63xx-hsspi.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/spi/spi-bcm63xx-hsspi.c b/drivers/spi/spi-bcm63xx-hsspi.c index 55789f7cda92..645f428ad0a2 100644 --- a/drivers/spi/spi-bcm63xx-hsspi.c +++ b/drivers/spi/spi-bcm63xx-hsspi.c @@ -336,8 +336,8 @@ static int bcm63xx_hsspi_probe(struct platform_device *pdev) irq = platform_get_irq(pdev, 0); if (irq < 0) { - dev_err(dev, "no irq\n"); - return -ENXIO; + dev_err(dev, "no irq: %d\n", irq); + return irq; } res_mem = platform_get_resource(pdev, IORESOURCE_MEM, 0); From f46b1dc562a56970e0669ecd488ddb71574416f8 Mon Sep 17 00:00:00 2001 From: Matt Redfearn Date: Tue, 8 Aug 2017 13:22:30 +0100 Subject: [PATCH 321/812] MIPS: Handle non word sized instructions when examining frame [ Upstream commit 11887ed172a6960673f130dad8f8fb42778f64d7 ] Commit 34c2f668d0f6b ("MIPS: microMIPS: Add unaligned access support.") added fairly broken support for handling 16bit microMIPS instructions in get_frame_info(). It adjusts the instruction pointer by 16bits in the case of a 16bit sp move instruction, but not any other 16bit instruction. Commit b6c7a324df37 ("MIPS: Fix get_frame_info() handling of microMIPS function size") goes some way to fixing get_frame_info() to iterate over microMIPS instuctions, but the instruction pointer is still manipulated using a postincrement, and is of union mips_instruction type. Since the union is sized to the largest member (a word), but microMIPS instructions are a mix of halfword and word sizes, the function does not always iterate correctly, ending up misaligned with the instruction stream and interpreting it incorrectly. Since the instruction modifying the stack pointer is usually the first in the function, that one is usually handled correctly. But the instruction which saves the return address to the sp is some variable number of instructions into the frame and is frequently missed due to not being on a word boundary, leading to incomplete walking of the stack. Fix this by incrementing the instruction pointer based on the size of the previously decoded instruction (& remove the hack introduced by commit 34c2f668d0f6b ("MIPS: microMIPS: Add unaligned access support.") which adjusts the instruction pointer in the case of a 16bit sp move instruction, but not any other). Fixes: 34c2f668d0f6b ("MIPS: microMIPS: Add unaligned access support.") Signed-off-by: Matt Redfearn Cc: Marcin Nowakowski Cc: James Hogan Cc: Ingo Molnar Cc: Paul Burton Cc: linux-mips@linux-mips.org Cc: linux-kernel@vger.kernel.org Patchwork: https://patchwork.linux-mips.org/patch/16953/ Signed-off-by: Ralf Baechle Signed-off-by: Sasha Levin --- arch/mips/kernel/process.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/arch/mips/kernel/process.c b/arch/mips/kernel/process.c index ed6cac4a4df0..a9cc74354df8 100644 --- a/arch/mips/kernel/process.c +++ b/arch/mips/kernel/process.c @@ -341,6 +341,7 @@ static int get_frame_info(struct mips_frame_info *info) bool is_mmips = IS_ENABLED(CONFIG_CPU_MICROMIPS); union mips_instruction insn, *ip, *ip_end; const unsigned int max_insns = 128; + unsigned int last_insn_size = 0; unsigned int i; info->pc_offset = -1; @@ -352,15 +353,19 @@ static int get_frame_info(struct mips_frame_info *info) ip_end = (void *)ip + info->func_size; - for (i = 0; i < max_insns && ip < ip_end; i++, ip++) { + for (i = 0; i < max_insns && ip < ip_end; i++) { + ip = (void *)ip + last_insn_size; if (is_mmips && mm_insn_16bit(ip->halfword[0])) { insn.halfword[0] = 0; insn.halfword[1] = ip->halfword[0]; + last_insn_size = 2; } else if (is_mmips) { insn.halfword[0] = ip->halfword[1]; insn.halfword[1] = ip->halfword[0]; + last_insn_size = 4; } else { insn.word = ip->word; + last_insn_size = 4; } if (is_jump_ins(&insn)) @@ -382,8 +387,6 @@ static int get_frame_info(struct mips_frame_info *info) tmp = (ip->halfword[0] >> 1); info->frame_size = -(signed short)(tmp & 0xf); } - ip = (void *) &ip->halfword[1]; - ip--; } else #endif info->frame_size = - ip->i_format.simmediate; From 61d2a3e63bc2a5a149fc01c367fd59cc5e5cf9ef Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Mon, 7 Aug 2017 23:52:34 -0500 Subject: [PATCH 322/812] spi/bcm63xx: fix error return code in bcm63xx_spi_probe() [ Upstream commit ba8afe94723e9ba665aee9cca649fb2c80f7304c ] platform_get_irq() returns an error code, but the spi-bcm63xx driver ignores it and always returns -ENXIO. This is not correct and, prevents -EPROBE_DEFER from being propagated properly. Notice that platform_get_irq() no longer returns 0 on error: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=e330b9a6bb35dc7097a4f02cb1ae7b6f96df92af Print and propagate the return value of platform_get_irq on failure. This issue was detected with the help of Coccinelle. Signed-off-by: Gustavo A. R. Silva Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- drivers/spi/spi-bcm63xx.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/spi/spi-bcm63xx.c b/drivers/spi/spi-bcm63xx.c index bf9a610e5b89..f14500910bc2 100644 --- a/drivers/spi/spi-bcm63xx.c +++ b/drivers/spi/spi-bcm63xx.c @@ -496,8 +496,8 @@ static int bcm63xx_spi_probe(struct platform_device *pdev) irq = platform_get_irq(pdev, 0); if (irq < 0) { - dev_err(dev, "no irq\n"); - return -ENXIO; + dev_err(dev, "no irq: %d\n", irq); + return irq; } clk = devm_clk_get(dev, "spi"); From 87c7239cde199a612892d3347e95342272e8adc5 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Mon, 7 Aug 2017 23:45:02 -0500 Subject: [PATCH 323/812] spi: xlp: fix error return code in xlp_spi_probe() [ Upstream commit 9a6b94796ae6feaf275ec6200e9b2964db208182 ] platform_get_irq() returns an error code, but the spi-xlp driver ignores it and always returns -EINVAL. This is not correct and, prevents -EPROBE_DEFER from being propagated properly. Notice that platform_get_irq() no longer returns 0 on error: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=e330b9a6bb35dc7097a4f02cb1ae7b6f96df92af Print and propagate the return value of platform_get_irq on failure. This issue was detected with the help of Coccinelle. Signed-off-by: Gustavo A. R. Silva Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- drivers/spi/spi-xlp.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/spi/spi-xlp.c b/drivers/spi/spi-xlp.c index 8f04feca6ee3..0ddb0adaa8aa 100644 --- a/drivers/spi/spi-xlp.c +++ b/drivers/spi/spi-xlp.c @@ -392,8 +392,8 @@ static int xlp_spi_probe(struct platform_device *pdev) irq = platform_get_irq(pdev, 0); if (irq < 0) { - dev_err(&pdev->dev, "no IRQ resource found\n"); - return -EINVAL; + dev_err(&pdev->dev, "no IRQ resource found: %d\n", irq); + return irq; } err = devm_request_irq(&pdev->dev, irq, xlp_spi_interrupt, 0, pdev->name, xspi); From dfb3a6a31fd001e89973c53123752c608f0acd07 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Fri, 30 Jun 2017 15:43:50 -0500 Subject: [PATCH 324/812] ASoC: spear: fix error return code in spdif_in_probe() [ Upstream commit 27d30400c448264c1ac9434cb836de0c230af213 ] platform_get_irq() returns an error code, but the spdif_in driver ignores it and always returns -EINVAL. This is not correct, and prevents -EPROBE_DEFER from being propagated properly. Notice that platform_get_irq() no longer returns 0 on error: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=e330b9a6bb35dc7097a4f02cb1ae7b6f96df92af Print error message and propagate the return value of platform_get_irq on failure. Signed-off-by: Gustavo A. R. Silva Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/spear/spdif_in.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/sound/soc/spear/spdif_in.c b/sound/soc/spear/spdif_in.c index 977a078eb92f..7f32527fc3c8 100644 --- a/sound/soc/spear/spdif_in.c +++ b/sound/soc/spear/spdif_in.c @@ -223,8 +223,10 @@ static int spdif_in_probe(struct platform_device *pdev) host->io_base = io_base; host->irq = platform_get_irq(pdev, 0); - if (host->irq < 0) - return -EINVAL; + if (host->irq < 0) { + dev_warn(&pdev->dev, "failed to get IRQ: %d\n", host->irq); + return host->irq; + } host->clk = devm_clk_get(&pdev->dev, NULL); if (IS_ERR(host->clk)) From 1bd2b909fd4579dcb5d231ab276e2d2f20b81a82 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Mon, 3 Jul 2017 07:47:38 -0500 Subject: [PATCH 325/812] PM / devfreq: tegra: fix error return code in tegra_devfreq_probe() [ Upstream commit 9e578b37505018622dfafc40eed7cd78ff2af221 ] platform_get_irq() returns an error code, but the tegra-devfreq driver ignores it and always returns -ENODEV. This is not correct, and prevents -EPROBE_DEFER from being propagated properly. Notice that platform_get_irq() no longer returns 0 on error: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=e330b9a6bb35dc7097a4f02cb1ae7b6f96df92af Print and propagate the return value of platform_get_irq on failure. Reviewed-by: Chanwoo Choi Signed-off-by: Gustavo A. R. Silva Signed-off-by: MyungJoo Ham Signed-off-by: Sasha Levin --- drivers/devfreq/tegra-devfreq.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/devfreq/tegra-devfreq.c b/drivers/devfreq/tegra-devfreq.c index 848b93ee930f..64a2e02b87d7 100644 --- a/drivers/devfreq/tegra-devfreq.c +++ b/drivers/devfreq/tegra-devfreq.c @@ -688,9 +688,9 @@ static int tegra_devfreq_probe(struct platform_device *pdev) } irq = platform_get_irq(pdev, 0); - if (irq <= 0) { - dev_err(&pdev->dev, "Failed to get IRQ\n"); - return -ENODEV; + if (irq < 0) { + dev_err(&pdev->dev, "Failed to get IRQ: %d\n", irq); + return irq; } platform_set_drvdata(pdev, tegra); From b1f4be0ee470f7244d85152dbde7f50a23d15c0b Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Thu, 27 Apr 2017 19:29:34 +0200 Subject: [PATCH 326/812] bonding: avoid defaulting hard_header_len to ETH_HLEN on slave removal [ Upstream commit 19cdead3e2ef8ed765c5d1ce48057ca9d97b5094 ] On slave list updates, the bonding driver computes its hard_header_len as the maximum of all enslaved devices's hard_header_len. If the slave list is empty, e.g. on last enslaved device removal, ETH_HLEN is used. Since the bonding header_ops are set only when the first enslaved device is attached, the above can lead to header_ops->create() being called with the wrong skb headroom in place. If bond0 is configured on top of ipoib devices, with the following commands: ifup bond0 for slave in $BOND_SLAVES_LIST; do ip link set dev $slave nomaster done ping -c 1 we will obtain a skb_under_panic() with a similar call trace: skb_push+0x3d/0x40 push_pseudo_header+0x17/0x30 [ib_ipoib] ipoib_hard_header+0x4e/0x80 [ib_ipoib] arp_create+0x12f/0x220 arp_send_dst.part.19+0x28/0x50 arp_solicit+0x115/0x290 neigh_probe+0x4d/0x70 __neigh_event_send+0xa7/0x230 neigh_resolve_output+0x12e/0x1c0 ip_finish_output2+0x14b/0x390 ip_finish_output+0x136/0x1e0 ip_output+0x76/0xe0 ip_local_out+0x35/0x40 ip_send_skb+0x19/0x40 ip_push_pending_frames+0x33/0x40 raw_sendmsg+0x7d3/0xb50 inet_sendmsg+0x31/0xb0 sock_sendmsg+0x38/0x50 SYSC_sendto+0x102/0x190 SyS_sendto+0xe/0x10 do_syscall_64+0x67/0x180 entry_SYSCALL64_slow_path+0x25/0x25 This change addresses the issue avoiding updating the bonding device hard_header_len when the slaves list become empty, forbidding to shrink it below the value used by header_ops->create(). The bug is there since commit 54ef31371407 ("[PATCH] bonding: Handle large hard_header_len") but the panic can be triggered only since commit fc791b633515 ("IB/ipoib: move back IB LL address into the hard header"). Reported-by: Norbert P Fixes: 54ef31371407 ("[PATCH] bonding: Handle large hard_header_len") Fixes: fc791b633515 ("IB/ipoib: move back IB LL address into the hard header") Signed-off-by: Marcelo Ricardo Leitner Signed-off-by: Paolo Abeni Signed-off-by: Jay Vosburgh Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/bonding/bond_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 78da1b7b4d86..a32dcb6718ca 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -1107,11 +1107,11 @@ static void bond_compute_features(struct bonding *bond) gso_max_size = min(gso_max_size, slave->dev->gso_max_size); gso_max_segs = min(gso_max_segs, slave->dev->gso_max_segs); } + bond_dev->hard_header_len = max_hard_header_len; done: bond_dev->vlan_features = vlan_features; bond_dev->hw_enc_features = enc_features | NETIF_F_GSO_ENCAP_ALL; - bond_dev->hard_header_len = max_hard_header_len; bond_dev->gso_max_segs = gso_max_segs; netif_set_gso_max_size(bond_dev, gso_max_size); From 83aa7d13df9f9cabc87ce1c70f86c5b35c0f5ead Mon Sep 17 00:00:00 2001 From: Raghava Aditya Renukunta Date: Thu, 2 Mar 2017 09:21:33 -0800 Subject: [PATCH 327/812] scsi: aacraid: Fix typo in blink status [ Upstream commit 934767c56b0d9dbb95a40e9e6e4d9dcdc3a165ad ] The return status of the adapter check on KERNEL_PANIC is supposed to be the upper 16 bits of the OMR status register. Fixes: c421530bf848604e (scsi: aacraid: Reorder Adpater status check) Reported-by: Dan Carpenter Signed-off-by: Raghava Aditya Renukunta Reviewed-by: Dave Carroll Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin --- drivers/scsi/aacraid/src.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/aacraid/src.c b/drivers/scsi/aacraid/src.c index e415e1c58eb5..cf3ac0654a3a 100644 --- a/drivers/scsi/aacraid/src.c +++ b/drivers/scsi/aacraid/src.c @@ -444,7 +444,7 @@ static int aac_src_check_health(struct aac_dev *dev) return -1; err_blink: - return (status > 16) & 0xFF; + return (status >> 16) & 0xFF; } /** From 0f951e1a4186372583e05850240e861d8ca8b6a2 Mon Sep 17 00:00:00 2001 From: Matt Redfearn Date: Tue, 8 Aug 2017 13:22:33 +0100 Subject: [PATCH 328/812] MIPS: microMIPS: Fix decoding of swsp16 instruction [ Upstream commit cea8cd498f4f1c30ea27e3664b3c671e495c4fce ] When the immediate encoded in the instruction is accessed, it is sign extended due to being a signed value being assigned to a signed integer. The ISA specifies that this operation is an unsigned operation. The sign extension leads us to incorrectly decode: 801e9c8e: cbf1 sw ra,68(sp) As having an immediate of 1073741809. Since the instruction format does not specify signed/unsigned, and this is currently the only location to use this instuction format, change it to an unsigned immediate. Fixes: bb9bc4689b9c ("MIPS: Calculate microMIPS ra properly when unwinding the stack") Suggested-by: Paul Burton Signed-off-by: Matt Redfearn Reviewed-by: James Hogan Cc: Marcin Nowakowski Cc: Miodrag Dinic Cc: Ingo Molnar Cc: David Daney Cc: linux-mips@linux-mips.org Cc: linux-kernel@vger.kernel.org Patchwork: https://patchwork.linux-mips.org/patch/16957/ Signed-off-by: Ralf Baechle Signed-off-by: Sasha Levin --- arch/mips/include/uapi/asm/inst.h | 2 +- arch/mips/kernel/process.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/mips/include/uapi/asm/inst.h b/arch/mips/include/uapi/asm/inst.h index 9b44d5a816fa..1b6f2f219298 100644 --- a/arch/mips/include/uapi/asm/inst.h +++ b/arch/mips/include/uapi/asm/inst.h @@ -846,7 +846,7 @@ struct mm16_r3_format { /* Load from global pointer format */ struct mm16_r5_format { /* Load/store from stack pointer format */ __BITFIELD_FIELD(unsigned int opcode : 6, __BITFIELD_FIELD(unsigned int rt : 5, - __BITFIELD_FIELD(signed int simmediate : 5, + __BITFIELD_FIELD(unsigned int imm : 5, __BITFIELD_FIELD(unsigned int : 16, /* Ignored */ ;)))) }; diff --git a/arch/mips/kernel/process.c b/arch/mips/kernel/process.c index a9cc74354df8..ebd8a715fe38 100644 --- a/arch/mips/kernel/process.c +++ b/arch/mips/kernel/process.c @@ -207,7 +207,7 @@ static inline int is_ra_save_ins(union mips_instruction *ip, int *poff) if (ip->mm16_r5_format.rt != 31) return 0; - *poff = ip->mm16_r5_format.simmediate; + *poff = ip->mm16_r5_format.imm; *poff = (*poff << 2) / sizeof(ulong); return 1; From a5ce9639fc792b1321ef9f88cbbf5e5aeb127adc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20Gr=C3=B6nke?= Date: Tue, 26 Jun 2018 10:12:18 +0000 Subject: [PATCH 329/812] igb: Remove superfluous reset to PHY and page 0 selection MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 2a83fba6cae89dd9c0625e68ff8ffff791c67ac0 ] This patch reverts two previous applied patches to fix an issue that appeared when using SGMII based SFP modules. In the current state the driver will try to reset the PHY before obtaining the phy_addr of the SGMII attached PHY. That leads to an error in e1000_write_phy_reg_sgmii_82575. Causing the initialization to fail: igb: Intel(R) Gigabit Ethernet Network Driver - version 5.4.0-k igb: Copyright (c) 2007-2014 Intel Corporation. igb: probe of ????:??:??.? failed with error -3 The patches being reverted are: commit 182785335447957409282ca745aa5bc3968facee Author: Aaron Sierra Date: Tue Nov 29 10:03:56 2016 -0600 igb: reset the PHY before reading the PHY ID commit 440aeca4b9858248d8f16d724d9fa87a4f65fa33 Author: Matwey V Kornilov Date: Thu Nov 24 13:32:48 2016 +0300 igb: Explicitly select page 0 at initialization The first reverted patch directly causes the problem mentioned above. In case of SGMII the phy_addr is not known at this point and will only be obtained by 'igb_get_phy_id_82575' further down in the code. The second removed patch selects forces selection of page 0 in the PHY. Something that the reset tries to address as well. As pointed out by Alexander Duzck, the patch below fixes the same issue but in the proper location: commit 4e684f59d760a2c7c716bb60190783546e2d08a1 Author: Chris J Arges Date: Wed Nov 2 09:13:42 2016 -0500 igb: Workaround for igb i210 firmware issue Reverts: 440aeca4b9858248d8f16d724d9fa87a4f65fa33. Reverts: 182785335447957409282ca745aa5bc3968facee. Signed-off-by: Christian Grönke Reviewed-by: Alexander Duyck Tested-by: Aaron Brown Signed-off-by: Jeff Kirsher Signed-off-by: Sasha Levin --- drivers/net/ethernet/intel/igb/e1000_82575.c | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/drivers/net/ethernet/intel/igb/e1000_82575.c b/drivers/net/ethernet/intel/igb/e1000_82575.c index f3f3b95d5512..97bf0c3d5c69 100644 --- a/drivers/net/ethernet/intel/igb/e1000_82575.c +++ b/drivers/net/ethernet/intel/igb/e1000_82575.c @@ -223,17 +223,6 @@ static s32 igb_init_phy_params_82575(struct e1000_hw *hw) hw->bus.func = (rd32(E1000_STATUS) & E1000_STATUS_FUNC_MASK) >> E1000_STATUS_FUNC_SHIFT; - /* Make sure the PHY is in a good state. Several people have reported - * firmware leaving the PHY's page select register set to something - * other than the default of zero, which causes the PHY ID read to - * access something other than the intended register. - */ - ret_val = hw->phy.ops.reset(hw); - if (ret_val) { - hw_dbg("Error resetting the PHY.\n"); - goto out; - } - /* Set phy->phy_addr and phy->id. */ ret_val = igb_get_phy_id_82575(hw); if (ret_val) From fbba23bb4071123431812a653ff98de7b94fe61e Mon Sep 17 00:00:00 2001 From: "Maciej W. Rozycki" Date: Sun, 30 Jul 2017 21:28:15 +0100 Subject: [PATCH 330/812] MIPS: DEC: Fix an int-handler.S CPU_DADDI_WORKAROUNDS regression [ Upstream commit 68fe55680d0f3342969f49412fceabb90bdfadba ] Fix a commit 3021773c7c3e ("MIPS: DEC: Avoid la pseudo-instruction in delay slots") regression and remove assembly errors: arch/mips/dec/int-handler.S: Assembler messages: arch/mips/dec/int-handler.S:162: Error: Macro used $at after ".set noat" arch/mips/dec/int-handler.S:163: Error: Macro used $at after ".set noat" arch/mips/dec/int-handler.S:229: Error: Macro used $at after ".set noat" arch/mips/dec/int-handler.S:230: Error: Macro used $at after ".set noat" triggering with with the CPU_DADDI_WORKAROUNDS option set and the DADDIU instruction. This is because with that option in place the instruction becomes a macro, which expands to an LI/DADDU (or actually ADDIU/DADDU) sequence that uses $at as a temporary register. With CPU_DADDI_WORKAROUNDS we only support `-msym32' compilation though, and this is already enforced in arch/mips/Makefile, so choose the 32-bit expansion variant for the supported configurations and then replace the 64-bit variant with #error just in case. Fixes: 3021773c7c3e ("MIPS: DEC: Avoid la pseudo-instruction in delay slots") Signed-off-by: Maciej W. Rozycki Cc: linux-mips@linux-mips.org Cc: stable@vger.kernel.org # 4.8+ Patchwork: https://patchwork.linux-mips.org/patch/16893/ Signed-off-by: Ralf Baechle Signed-off-by: Sasha Levin --- arch/mips/dec/int-handler.S | 34 ++++++---------------------------- 1 file changed, 6 insertions(+), 28 deletions(-) diff --git a/arch/mips/dec/int-handler.S b/arch/mips/dec/int-handler.S index 554d1da97743..21f4a9fe82fa 100644 --- a/arch/mips/dec/int-handler.S +++ b/arch/mips/dec/int-handler.S @@ -147,23 +147,12 @@ * Find irq with highest priority */ # open coded PTR_LA t1, cpu_mask_nr_tbl -#if (_MIPS_SZPTR == 32) +#if defined(CONFIG_32BIT) || defined(KBUILD_64BIT_SYM32) # open coded la t1, cpu_mask_nr_tbl lui t1, %hi(cpu_mask_nr_tbl) addiu t1, %lo(cpu_mask_nr_tbl) - -#endif -#if (_MIPS_SZPTR == 64) - # open coded dla t1, cpu_mask_nr_tbl - .set push - .set noat - lui t1, %highest(cpu_mask_nr_tbl) - lui AT, %hi(cpu_mask_nr_tbl) - daddiu t1, t1, %higher(cpu_mask_nr_tbl) - daddiu AT, AT, %lo(cpu_mask_nr_tbl) - dsll t1, 32 - daddu t1, t1, AT - .set pop +#else +#error GCC `-msym32' option required for 64-bit DECstation builds #endif 1: lw t2,(t1) nop @@ -214,23 +203,12 @@ * Find irq with highest priority */ # open coded PTR_LA t1,asic_mask_nr_tbl -#if (_MIPS_SZPTR == 32) +#if defined(CONFIG_32BIT) || defined(KBUILD_64BIT_SYM32) # open coded la t1, asic_mask_nr_tbl lui t1, %hi(asic_mask_nr_tbl) addiu t1, %lo(asic_mask_nr_tbl) - -#endif -#if (_MIPS_SZPTR == 64) - # open coded dla t1, asic_mask_nr_tbl - .set push - .set noat - lui t1, %highest(asic_mask_nr_tbl) - lui AT, %hi(asic_mask_nr_tbl) - daddiu t1, t1, %higher(asic_mask_nr_tbl) - daddiu AT, AT, %lo(asic_mask_nr_tbl) - dsll t1, 32 - daddu t1, t1, AT - .set pop +#else +#error GCC `-msym32' option required for 64-bit DECstation builds #endif 2: lw t2,(t1) nop From 55df64d96155659e06c6c4ae2126f23b08cfe8e2 Mon Sep 17 00:00:00 2001 From: Sascha Hauer Date: Wed, 12 Sep 2018 08:23:01 +0200 Subject: [PATCH 331/812] ARM: dts: imx53-qsb: disable 1.2GHz OPP [ Upstream commit eea96566c189c77e5272585984eb2729881a2f1d ] The maximum CPU frequency for the i.MX53 QSB is 1GHz, so disable the 1.2GHz OPP. This makes the board work again with configs that have cpufreq enabled like imx_v6_v7_defconfig on which the board stopped working with the addition of cpufreq-dt support. Fixes: 791f416608 ("ARM: dts: imx53: add cpufreq-dt support") Signed-off-by: Sascha Hauer Signed-off-by: Shawn Guo Signed-off-by: Sasha Levin --- arch/arm/boot/dts/imx53-qsb-common.dtsi | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/arch/arm/boot/dts/imx53-qsb-common.dtsi b/arch/arm/boot/dts/imx53-qsb-common.dtsi index 53fd75c8ffcf..47894b41e4e2 100644 --- a/arch/arm/boot/dts/imx53-qsb-common.dtsi +++ b/arch/arm/boot/dts/imx53-qsb-common.dtsi @@ -130,6 +130,17 @@ sound { }; }; +&cpu0 { + /* CPU rated to 1GHz, not 1.2GHz as per the default settings */ + operating-points = < + /* kHz uV */ + 166666 850000 + 400000 900000 + 800000 1050000 + 1000000 1200000 + >; +}; + &esdhc1 { pinctrl-names = "default"; pinctrl-0 = <&pinctrl_esdhc1>; From fa2075baa95c1957b477c830415d4d6b72282ddc Mon Sep 17 00:00:00 2001 From: Khazhismel Kumykov Date: Fri, 12 Oct 2018 21:34:40 -0700 Subject: [PATCH 332/812] fs/fat/fatent.c: add cond_resched() to fat_count_free_clusters() [ Upstream commit ac081c3be3fae6d0cc3e1862507fca3862d30b67 ] On non-preempt kernels this loop can take a long time (more than 50 ticks) processing through entries. Link: http://lkml.kernel.org/r/20181010172623.57033-1-khazhy@google.com Signed-off-by: Khazhismel Kumykov Acked-by: OGAWA Hirofumi Reviewed-by: Andrew Morton Signed-off-by: Andrew Morton Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- fs/fat/fatent.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/fat/fatent.c b/fs/fat/fatent.c index a70e37c47a78..e3fc477728b3 100644 --- a/fs/fat/fatent.c +++ b/fs/fat/fatent.c @@ -681,6 +681,7 @@ int fat_count_free_clusters(struct super_block *sb) if (ops->ent_get(&fatent) == FAT_ENT_FREE) free++; } while (fat_ent_next(sbi, &fatent)); + cond_resched(); } sbi->free_clusters = free; sbi->free_clus_valid = 1; From 25e2243ad203b1afa12ce15004a4e67f287d9137 Mon Sep 17 00:00:00 2001 From: Kimmo Rautkoski Date: Sat, 20 Oct 2018 22:44:42 +0300 Subject: [PATCH 333/812] mtd: spi-nor: Add support for is25wp series chips [ Upstream commit d616f81cdd2a21edfa90a595a4e9b143f5ba8414 ] Added support for is25wp032, is25wp064 and is25wp128. Signed-off-by: Kimmo Rautkoski Reviewed-by: Marek Vasut Signed-off-by: Boris Brezillon [ Adrian Bunk: Trivial adaption to changed context. ] Signed-off-by: Adrian Bunk Signed-off-by: Sasha Levin --- drivers/mtd/spi-nor/spi-nor.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/mtd/spi-nor/spi-nor.c b/drivers/mtd/spi-nor/spi-nor.c index 64d6f053c2a5..276998ea0267 100644 --- a/drivers/mtd/spi-nor/spi-nor.c +++ b/drivers/mtd/spi-nor/spi-nor.c @@ -708,6 +708,12 @@ static const struct flash_info spi_nor_ids[] = { /* ISSI */ { "is25cd512", INFO(0x7f9d20, 0, 32 * 1024, 2, SECT_4K) }, + { "is25wp032", INFO(0x9d7016, 0, 64 * 1024, 64, + SECT_4K | SPI_NOR_DUAL_READ | SPI_NOR_QUAD_READ) }, + { "is25wp064", INFO(0x9d7017, 0, 64 * 1024, 128, + SECT_4K | SPI_NOR_DUAL_READ | SPI_NOR_QUAD_READ) }, + { "is25wp128", INFO(0x9d7018, 0, 64 * 1024, 256, + SECT_4K | SPI_NOR_DUAL_READ | SPI_NOR_QUAD_READ) }, /* Macronix */ { "mx25l512e", INFO(0xc22010, 0, 64 * 1024, 1, SECT_4K) }, From 6f52608e74f3712b20d6e1c24a84d79ea77e4e34 Mon Sep 17 00:00:00 2001 From: Rasmus Villemoes Date: Thu, 5 Jul 2018 15:15:27 +0200 Subject: [PATCH 334/812] perf tools: Disable parallelism for 'make clean' [ Upstream commit da15fc2fa9c07b23db8f5e479bd8a9f0d741ca07 ] The Yocto build system does a 'make clean' when rebuilding due to changed dependencies, and that consistently fails for me (causing the whole BSP build to fail) with errors such as | find: '[...]/perf/1.0-r9/perf-1.0/plugin_mac80211.so': No such file or directory | find: '[...]/perf/1.0-r9/perf-1.0/plugin_mac80211.so': No such file or directory | find: find: '[...]/perf/1.0-r9/perf-1.0/libtraceevent.a''[...]/perf/1.0-r9/perf-1.0/libtraceevent.a': No such file or directory: No such file or directory | [...] | find: cannot delete '/mnt/xfs/devel/pil/yocto/tmp-glibc/work/wandboard-oe-linux-gnueabi/perf/1.0-r9/perf-1.0/util/.pstack.o.cmd': No such file or directory Apparently (despite the comment), 'make clean' ends up launching multiple sub-makes that all want to remove the same things - perhaps this only happens in combination with a O=... parameter. In any case, we don't lose much by explicitly disabling the parallelism for the clean target, and it makes automated builds much more reliable. Signed-off-by: Rasmus Villemoes Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20180705131527.19749-1-linux@rasmusvillemoes.dk Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Sasha Levin --- tools/perf/Makefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/perf/Makefile b/tools/perf/Makefile index dcd9a70c7193..55933b2eb932 100644 --- a/tools/perf/Makefile +++ b/tools/perf/Makefile @@ -69,10 +69,10 @@ all tags TAGS: $(make) # -# The clean target is not really parallel, don't print the jobs info: +# Explicitly disable parallelism for the clean target. # clean: - $(make) + $(make) -j1 # # The build-test target is not really parallel, don't print the jobs info: From a1519237965f31b5780c8f63e6c3649600fb5754 Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Fri, 26 Oct 2018 10:28:43 +0800 Subject: [PATCH 335/812] bridge: do not add port to router list when receives query with source 0.0.0.0 commit 5a2de63fd1a59c30c02526d427bc014b98adf508 upstream. Based on RFC 4541, 2.1.1. IGMP Forwarding Rules The switch supporting IGMP snooping must maintain a list of multicast routers and the ports on which they are attached. This list can be constructed in any combination of the following ways: a) This list should be built by the snooping switch sending Multicast Router Solicitation messages as described in IGMP Multicast Router Discovery [MRDISC]. It may also snoop Multicast Router Advertisement messages sent by and to other nodes. b) The arrival port for IGMP Queries (sent by multicast routers) where the source address is not 0.0.0.0. We should not add the port to router list when receives query with source 0.0.0.0. Reported-by: Ying Xu Signed-off-by: Hangbin Liu Acked-by: Nikolay Aleksandrov Acked-by: Roopa Prabhu Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/bridge/br_multicast.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c index d80c15d028fe..7a6d053f76e2 100644 --- a/net/bridge/br_multicast.c +++ b/net/bridge/br_multicast.c @@ -1261,7 +1261,15 @@ static void br_multicast_query_received(struct net_bridge *br, return; br_multicast_update_query_timer(br, query, max_delay); - br_multicast_mark_router(br, port); + + /* Based on RFC4541, section 2.1.1 IGMP Forwarding Rules, + * the arrival port for IGMP Queries where the source address + * is 0.0.0.0 should not be added to router port list. + */ + if ((saddr->proto == htons(ETH_P_IP) && saddr->u.ip4) || + (saddr->proto == htons(ETH_P_IPV6) && + !ipv6_addr_any(&saddr->u.ip6))) + br_multicast_mark_router(br, port); } static int br_ip4_multicast_query(struct net_bridge *br, From 4100be3e8177049f5bf40769a18f708fc2c3a9b7 Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Sat, 27 Oct 2018 12:07:47 +0300 Subject: [PATCH 336/812] net: bridge: remove ipv6 zero address check in mcast queries commit 0fe5119e267f3e3d8ac206895f5922195ec55a8a upstream. Recently a check was added which prevents marking of routers with zero source address, but for IPv6 that cannot happen as the relevant RFCs actually forbid such packets: RFC 2710 (MLDv1): "To be valid, the Query message MUST come from a link-local IPv6 Source Address, be at least 24 octets long, and have a correct MLD checksum." Same goes for RFC 3810. And also it can be seen as a requirement in ipv6_mc_check_mld_query() which is used by the bridge to validate the message before processing it. Thus any queries with :: source address won't be processed anyway. So just remove the check for zero IPv6 source address from the query processing function. Fixes: 5a2de63fd1a5 ("bridge: do not add port to router list when receives query with source 0.0.0.0") Signed-off-by: Nikolay Aleksandrov Signed-off-by: David S. Miller Cc: Hangbin Liu Signed-off-by: Greg Kroah-Hartman --- net/bridge/br_multicast.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c index 7a6d053f76e2..270d9c9a5331 100644 --- a/net/bridge/br_multicast.c +++ b/net/bridge/br_multicast.c @@ -1267,8 +1267,7 @@ static void br_multicast_query_received(struct net_bridge *br, * is 0.0.0.0 should not be added to router port list. */ if ((saddr->proto == htons(ETH_P_IP) && saddr->u.ip4) || - (saddr->proto == htons(ETH_P_IPV6) && - !ipv6_addr_any(&saddr->u.ip6))) + saddr->proto == htons(ETH_P_IPV6)) br_multicast_mark_router(br, port); } From ef1cb6b06b39482a043d3c06a53e88c689c9463d Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 12 Oct 2018 18:58:53 -0700 Subject: [PATCH 337/812] ipv6: mcast: fix a use-after-free in inet6_mc_check [ Upstream commit dc012f3628eaecfb5ba68404a5c30ef501daf63d ] syzbot found a use-after-free in inet6_mc_check [1] The problem here is that inet6_mc_check() uses rcu and read_lock(&iml->sflock) So the fact that ip6_mc_leave_src() is called under RTNL and the socket lock does not help us, we need to acquire iml->sflock in write mode. In the future, we should convert all this stuff to RCU. [1] BUG: KASAN: use-after-free in ipv6_addr_equal include/net/ipv6.h:521 [inline] BUG: KASAN: use-after-free in inet6_mc_check+0xae7/0xb40 net/ipv6/mcast.c:649 Read of size 8 at addr ffff8801ce7f2510 by task syz-executor0/22432 CPU: 1 PID: 22432 Comm: syz-executor0 Not tainted 4.19.0-rc7+ #280 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:77 [inline] dump_stack+0x1c4/0x2b4 lib/dump_stack.c:113 print_address_description.cold.8+0x9/0x1ff mm/kasan/report.c:256 kasan_report_error mm/kasan/report.c:354 [inline] kasan_report.cold.9+0x242/0x309 mm/kasan/report.c:412 __asan_report_load8_noabort+0x14/0x20 mm/kasan/report.c:433 ipv6_addr_equal include/net/ipv6.h:521 [inline] inet6_mc_check+0xae7/0xb40 net/ipv6/mcast.c:649 __raw_v6_lookup+0x320/0x3f0 net/ipv6/raw.c:98 ipv6_raw_deliver net/ipv6/raw.c:183 [inline] raw6_local_deliver+0x3d3/0xcb0 net/ipv6/raw.c:240 ip6_input_finish+0x467/0x1aa0 net/ipv6/ip6_input.c:345 NF_HOOK include/linux/netfilter.h:289 [inline] ip6_input+0xe9/0x600 net/ipv6/ip6_input.c:426 ip6_mc_input+0x48a/0xd20 net/ipv6/ip6_input.c:503 dst_input include/net/dst.h:450 [inline] ip6_rcv_finish+0x17a/0x330 net/ipv6/ip6_input.c:76 NF_HOOK include/linux/netfilter.h:289 [inline] ipv6_rcv+0x120/0x640 net/ipv6/ip6_input.c:271 __netif_receive_skb_one_core+0x14d/0x200 net/core/dev.c:4913 __netif_receive_skb+0x2c/0x1e0 net/core/dev.c:5023 netif_receive_skb_internal+0x12c/0x620 net/core/dev.c:5126 napi_frags_finish net/core/dev.c:5664 [inline] napi_gro_frags+0x75a/0xc90 net/core/dev.c:5737 tun_get_user+0x3189/0x4250 drivers/net/tun.c:1923 tun_chr_write_iter+0xb9/0x154 drivers/net/tun.c:1968 call_write_iter include/linux/fs.h:1808 [inline] do_iter_readv_writev+0x8b0/0xa80 fs/read_write.c:680 do_iter_write+0x185/0x5f0 fs/read_write.c:959 vfs_writev+0x1f1/0x360 fs/read_write.c:1004 do_writev+0x11a/0x310 fs/read_write.c:1039 __do_sys_writev fs/read_write.c:1112 [inline] __se_sys_writev fs/read_write.c:1109 [inline] __x64_sys_writev+0x75/0xb0 fs/read_write.c:1109 do_syscall_64+0x1b9/0x820 arch/x86/entry/common.c:290 entry_SYSCALL_64_after_hwframe+0x49/0xbe RIP: 0033:0x457421 Code: 75 14 b8 14 00 00 00 0f 05 48 3d 01 f0 ff ff 0f 83 34 b5 fb ff c3 48 83 ec 08 e8 1a 2d 00 00 48 89 04 24 b8 14 00 00 00 0f 05 <48> 8b 3c 24 48 89 c2 e8 63 2d 00 00 48 89 d0 48 83 c4 08 48 3d 01 RSP: 002b:00007f2d30ecaba0 EFLAGS: 00000293 ORIG_RAX: 0000000000000014 RAX: ffffffffffffffda RBX: 000000000000003e RCX: 0000000000457421 RDX: 0000000000000001 RSI: 00007f2d30ecabf0 RDI: 00000000000000f0 RBP: 0000000020000500 R08: 00000000000000f0 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000293 R12: 00007f2d30ecb6d4 R13: 00000000004c4890 R14: 00000000004d7b90 R15: 00000000ffffffff Allocated by task 22437: save_stack+0x43/0xd0 mm/kasan/kasan.c:448 set_track mm/kasan/kasan.c:460 [inline] kasan_kmalloc+0xc7/0xe0 mm/kasan/kasan.c:553 __do_kmalloc mm/slab.c:3718 [inline] __kmalloc+0x14e/0x760 mm/slab.c:3727 kmalloc include/linux/slab.h:518 [inline] sock_kmalloc+0x15a/0x1f0 net/core/sock.c:1983 ip6_mc_source+0x14dd/0x1960 net/ipv6/mcast.c:427 do_ipv6_setsockopt.isra.9+0x3afb/0x45d0 net/ipv6/ipv6_sockglue.c:743 ipv6_setsockopt+0xbd/0x170 net/ipv6/ipv6_sockglue.c:933 rawv6_setsockopt+0x59/0x140 net/ipv6/raw.c:1069 sock_common_setsockopt+0x9a/0xe0 net/core/sock.c:3038 __sys_setsockopt+0x1ba/0x3c0 net/socket.c:1902 __do_sys_setsockopt net/socket.c:1913 [inline] __se_sys_setsockopt net/socket.c:1910 [inline] __x64_sys_setsockopt+0xbe/0x150 net/socket.c:1910 do_syscall_64+0x1b9/0x820 arch/x86/entry/common.c:290 entry_SYSCALL_64_after_hwframe+0x49/0xbe Freed by task 22430: save_stack+0x43/0xd0 mm/kasan/kasan.c:448 set_track mm/kasan/kasan.c:460 [inline] __kasan_slab_free+0x102/0x150 mm/kasan/kasan.c:521 kasan_slab_free+0xe/0x10 mm/kasan/kasan.c:528 __cache_free mm/slab.c:3498 [inline] kfree+0xcf/0x230 mm/slab.c:3813 __sock_kfree_s net/core/sock.c:2004 [inline] sock_kfree_s+0x29/0x60 net/core/sock.c:2010 ip6_mc_leave_src+0x11a/0x1d0 net/ipv6/mcast.c:2448 __ipv6_sock_mc_close+0x20b/0x4e0 net/ipv6/mcast.c:310 ipv6_sock_mc_close+0x158/0x1d0 net/ipv6/mcast.c:328 inet6_release+0x40/0x70 net/ipv6/af_inet6.c:452 __sock_release+0xd7/0x250 net/socket.c:579 sock_close+0x19/0x20 net/socket.c:1141 __fput+0x385/0xa30 fs/file_table.c:278 ____fput+0x15/0x20 fs/file_table.c:309 task_work_run+0x1e8/0x2a0 kernel/task_work.c:113 tracehook_notify_resume include/linux/tracehook.h:193 [inline] exit_to_usermode_loop+0x318/0x380 arch/x86/entry/common.c:166 prepare_exit_to_usermode arch/x86/entry/common.c:197 [inline] syscall_return_slowpath arch/x86/entry/common.c:268 [inline] do_syscall_64+0x6be/0x820 arch/x86/entry/common.c:293 entry_SYSCALL_64_after_hwframe+0x49/0xbe The buggy address belongs to the object at ffff8801ce7f2500 which belongs to the cache kmalloc-192 of size 192 The buggy address is located 16 bytes inside of 192-byte region [ffff8801ce7f2500, ffff8801ce7f25c0) The buggy address belongs to the page: page:ffffea000739fc80 count:1 mapcount:0 mapping:ffff8801da800040 index:0x0 flags: 0x2fffc0000000100(slab) raw: 02fffc0000000100 ffffea0006f6e548 ffffea000737b948 ffff8801da800040 raw: 0000000000000000 ffff8801ce7f2000 0000000100000010 0000000000000000 page dumped because: kasan: bad access detected Memory state around the buggy address: ffff8801ce7f2400: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ffff8801ce7f2480: fb fb fb fb fb fb fb fb fc fc fc fc fc fc fc fc >ffff8801ce7f2500: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ^ ffff8801ce7f2580: fb fb fb fb fb fb fb fb fc fc fc fc fc fc fc fc ffff8801ce7f2600: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 Signed-off-by: Eric Dumazet Reported-by: syzbot Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv6/mcast.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c index 091cee551cd9..a5ec9a0cbb80 100644 --- a/net/ipv6/mcast.c +++ b/net/ipv6/mcast.c @@ -2390,17 +2390,17 @@ static int ip6_mc_leave_src(struct sock *sk, struct ipv6_mc_socklist *iml, { int err; - /* callers have the socket lock and rtnl lock - * so no other readers or writers of iml or its sflist - */ + write_lock_bh(&iml->sflock); if (!iml->sflist) { /* any-source empty exclude case */ - return ip6_mc_del_src(idev, &iml->addr, iml->sfmode, 0, NULL, 0); + err = ip6_mc_del_src(idev, &iml->addr, iml->sfmode, 0, NULL, 0); + } else { + err = ip6_mc_del_src(idev, &iml->addr, iml->sfmode, + iml->sflist->sl_count, iml->sflist->sl_addr, 0); + sock_kfree_s(sk, iml->sflist, IP6_SFLSIZE(iml->sflist->sl_max)); + iml->sflist = NULL; } - err = ip6_mc_del_src(idev, &iml->addr, iml->sfmode, - iml->sflist->sl_count, iml->sflist->sl_addr, 0); - sock_kfree_s(sk, iml->sflist, IP6_SFLSIZE(iml->sflist->sl_max)); - iml->sflist = NULL; + write_unlock_bh(&iml->sflock); return err; } From 2647feb650d7d48f85a957621d8404d9e34cccdb Mon Sep 17 00:00:00 2001 From: Stefano Brivio Date: Wed, 24 Oct 2018 14:37:21 +0200 Subject: [PATCH 338/812] ipv6/ndisc: Preserve IPv6 control buffer if protocol error handlers are called [ Upstream commit ee1abcf689353f36d9322231b4320926096bdee0 ] Commit a61bbcf28a8c ("[NET]: Store skb->timestamp as offset to a base timestamp") introduces a neighbour control buffer and zeroes it out in ndisc_rcv(), as ndisc_recv_ns() uses it. Commit f2776ff04722 ("[IPV6]: Fix address/interface handling in UDP and DCCP, according to the scoping architecture.") introduces the usage of the IPv6 control buffer in protocol error handlers (e.g. inet6_iif() in present-day __udp6_lib_err()). Now, with commit b94f1c0904da ("ipv6: Use icmpv6_notify() to propagate redirect, instead of rt6_redirect()."), we call protocol error handlers from ndisc_redirect_rcv(), after the control buffer is already stolen and some parts are already zeroed out. This implies that inet6_iif() on this path will always return zero. This gives unexpected results on UDP socket lookup in __udp6_lib_err(), as we might actually need to match sockets for a given interface. Instead of always claiming the control buffer in ndisc_rcv(), do that only when needed. Fixes: b94f1c0904da ("ipv6: Use icmpv6_notify() to propagate redirect, instead of rt6_redirect().") Signed-off-by: Stefano Brivio Reviewed-by: Sabrina Dubroca Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv6/ndisc.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index 3db8d7d1a986..0bf375177a9a 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -1649,10 +1649,9 @@ int ndisc_rcv(struct sk_buff *skb) return 0; } - memset(NEIGH_CB(skb), 0, sizeof(struct neighbour_cb)); - switch (msg->icmph.icmp6_type) { case NDISC_NEIGHBOUR_SOLICITATION: + memset(NEIGH_CB(skb), 0, sizeof(struct neighbour_cb)); ndisc_recv_ns(skb); break; From 97749034bdb4aa8dea0ff7b08f9e08823a535ebb Mon Sep 17 00:00:00 2001 From: David Ahern Date: Fri, 19 Oct 2018 10:00:19 -0700 Subject: [PATCH 339/812] net/ipv6: Fix index counter for unicast addresses in in6_dump_addrs [ Upstream commit 4ba4c566ba8448a05e6257e0b98a21f1a0d55315 ] The loop wants to skip previously dumped addresses, so loops until current index >= saved index. If the message fills it wants to save the index for the next address to dump - ie., the one that did not fit in the current message. Currently, it is incrementing the index counter before comparing to the saved index, and then the saved index is off by 1 - it assumes the current address is going to fit in the message. Change the index handling to increment only after a succesful dump. Fixes: 502a2ffd7376a ("ipv6: convert idev_list to list macros") Signed-off-by: David Ahern Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv6/addrconf.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 582e757e5727..4dde1e0e7d37 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -4439,8 +4439,8 @@ static int in6_dump_addrs(struct inet6_dev *idev, struct sk_buff *skb, /* unicast address incl. temp addr */ list_for_each_entry(ifa, &idev->addr_list, if_list) { - if (++ip_idx < s_ip_idx) - continue; + if (ip_idx < s_ip_idx) + goto next; err = inet6_fill_ifaddr(skb, ifa, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, @@ -4449,6 +4449,8 @@ static int in6_dump_addrs(struct inet6_dev *idev, struct sk_buff *skb, if (err < 0) break; nl_dump_check_consistent(cb, nlmsg_hdr(skb)); +next: + ip_idx++; } break; } From 7fc851d0f813c939e72f26d37bd8cd876abc8241 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Fri, 26 Oct 2018 15:51:06 -0700 Subject: [PATCH 340/812] net: sched: gred: pass the right attribute to gred_change_table_def() [ Upstream commit 38b4f18d56372e1e21771ab7b0357b853330186c ] gred_change_table_def() takes a pointer to TCA_GRED_DPS attribute, and expects it will be able to interpret its contents as struct tc_gred_sopt. Pass the correct gred attribute, instead of TCA_OPTIONS. This bug meant the table definition could never be changed after Qdisc was initialized (unless whatever TCA_OPTIONS contained both passed netlink validation and was a valid struct tc_gred_sopt...). Old behaviour: $ ip link add type dummy $ tc qdisc replace dev dummy0 parent root handle 7: \ gred setup vqs 4 default 0 $ tc qdisc replace dev dummy0 parent root handle 7: \ gred setup vqs 4 default 0 RTNETLINK answers: Invalid argument Now: $ ip link add type dummy $ tc qdisc replace dev dummy0 parent root handle 7: \ gred setup vqs 4 default 0 $ tc qdisc replace dev dummy0 parent root handle 7: \ gred setup vqs 4 default 0 $ tc qdisc replace dev dummy0 parent root handle 7: \ gred setup vqs 4 default 0 Fixes: f62d6b936df5 ("[PKT_SCHED]: GRED: Use central VQ change procedure") Signed-off-by: Jakub Kicinski Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/sched/sch_gred.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/sched/sch_gred.c b/net/sched/sch_gred.c index f9e8deeeac96..a5745cb2d014 100644 --- a/net/sched/sch_gred.c +++ b/net/sched/sch_gred.c @@ -444,7 +444,7 @@ static int gred_change(struct Qdisc *sch, struct nlattr *opt) if (tb[TCA_GRED_PARMS] == NULL && tb[TCA_GRED_STAB] == NULL) { if (tb[TCA_GRED_LIMIT] != NULL) sch->limit = nla_get_u32(tb[TCA_GRED_LIMIT]); - return gred_change_table_def(sch, opt); + return gred_change_table_def(sch, tb[TCA_GRED_DPS]); } if (tb[TCA_GRED_PARMS] == NULL || From 98528072a2a9f7936bb2cbda6bf0f34545a45adf Mon Sep 17 00:00:00 2001 From: Wenwen Wang Date: Thu, 18 Oct 2018 09:36:46 -0500 Subject: [PATCH 341/812] net: socket: fix a missing-check bug [ Upstream commit b6168562c8ce2bd5a30e213021650422e08764dc ] In ethtool_ioctl(), the ioctl command 'ethcmd' is checked through a switch statement to see whether it is necessary to pre-process the ethtool structure, because, as mentioned in the comment, the structure ethtool_rxnfc is defined with padding. If yes, a user-space buffer 'rxnfc' is allocated through compat_alloc_user_space(). One thing to note here is that, if 'ethcmd' is ETHTOOL_GRXCLSRLALL, the size of the buffer 'rxnfc' is partially determined by 'rule_cnt', which is actually acquired from the user-space buffer 'compat_rxnfc', i.e., 'compat_rxnfc->rule_cnt', through get_user(). After 'rxnfc' is allocated, the data in the original user-space buffer 'compat_rxnfc' is then copied to 'rxnfc' through copy_in_user(), including the 'rule_cnt' field. However, after this copy, no check is re-enforced on 'rxnfc->rule_cnt'. So it is possible that a malicious user race to change the value in the 'compat_rxnfc->rule_cnt' between these two copies. Through this way, the attacker can bypass the previous check on 'rule_cnt' and inject malicious data. This can cause undefined behavior of the kernel and introduce potential security risk. This patch avoids the above issue via copying the value acquired by get_user() to 'rxnfc->rule_cn', if 'ethcmd' is ETHTOOL_GRXCLSRLALL. Signed-off-by: Wenwen Wang Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/socket.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/net/socket.c b/net/socket.c index 0c544ae48eac..96133777d17c 100644 --- a/net/socket.c +++ b/net/socket.c @@ -2760,9 +2760,14 @@ static int ethtool_ioctl(struct net *net, struct compat_ifreq __user *ifr32) copy_in_user(&rxnfc->fs.ring_cookie, &compat_rxnfc->fs.ring_cookie, (void __user *)(&rxnfc->fs.location + 1) - - (void __user *)&rxnfc->fs.ring_cookie) || - copy_in_user(&rxnfc->rule_cnt, &compat_rxnfc->rule_cnt, - sizeof(rxnfc->rule_cnt))) + (void __user *)&rxnfc->fs.ring_cookie)) + return -EFAULT; + if (ethcmd == ETHTOOL_GRXCLSRLALL) { + if (put_user(rule_cnt, &rxnfc->rule_cnt)) + return -EFAULT; + } else if (copy_in_user(&rxnfc->rule_cnt, + &compat_rxnfc->rule_cnt, + sizeof(rxnfc->rule_cnt))) return -EFAULT; } From d5df0bdcceba33e121fe338dd85db9f86cedd1f4 Mon Sep 17 00:00:00 2001 From: Niklas Cassel Date: Wed, 31 Oct 2018 16:08:10 +0100 Subject: [PATCH 342/812] net: stmmac: Fix stmmac_mdio_reset() when building stmmac as modules [ Upstream commit 30549aab146ccb1275230c3b4b4bc6b4181fd54e ] When building stmmac, it is only possible to select CONFIG_DWMAC_GENERIC, or any of the glue drivers, when CONFIG_STMMAC_PLATFORM is set. The only exception is CONFIG_STMMAC_PCI. When calling of_mdiobus_register(), it will call our ->reset() callback, which is set to stmmac_mdio_reset(). Most of the code in stmmac_mdio_reset() is protected by a "#if defined(CONFIG_STMMAC_PLATFORM)", which will evaluate to false when CONFIG_STMMAC_PLATFORM=m. Because of this, the phy reset gpio will only be pulled when stmmac is built as built-in, but not when built as modules. Fix this by using "#if IS_ENABLED()" instead of "#if defined()". Signed-off-by: Niklas Cassel Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c index bba670c42e37..90d95b3654f5 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c @@ -130,7 +130,7 @@ static int stmmac_mdio_write(struct mii_bus *bus, int phyaddr, int phyreg, */ int stmmac_mdio_reset(struct mii_bus *bus) { -#if defined(CONFIG_STMMAC_PLATFORM) +#if IS_ENABLED(CONFIG_STMMAC_PLATFORM) struct net_device *ndev = bus->priv; struct stmmac_priv *priv = netdev_priv(ndev); unsigned int mii_address = priv->hw->mii.addr; From 104ed9e3d12651ba6a95aad7f229c966386f2f83 Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Thu, 18 Oct 2018 19:56:01 +0200 Subject: [PATCH 343/812] r8169: fix NAPI handling under high load [ Upstream commit 6b839b6cf9eada30b086effb51e5d6076bafc761 ] rtl_rx() and rtl_tx() are called only if the respective bits are set in the interrupt status register. Under high load NAPI may not be able to process all data (work_done == budget) and it will schedule subsequent calls to the poll callback. rtl_ack_events() however resets the bits in the interrupt status register, therefore subsequent calls to rtl8169_poll() won't call rtl_rx() and rtl_tx() - chip interrupts are still disabled. Fix this by calling rtl_rx() and rtl_tx() independent of the bits set in the interrupt status register. Both functions will detect if there's nothing to do for them. Fixes: da78dbff2e05 ("r8169: remove work from irq handler.") Signed-off-by: Heiner Kallweit Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/realtek/r8169.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/realtek/r8169.c b/drivers/net/ethernet/realtek/r8169.c index c6782ebd35e1..93543e176829 100644 --- a/drivers/net/ethernet/realtek/r8169.c +++ b/drivers/net/ethernet/realtek/r8169.c @@ -7540,17 +7540,15 @@ static int rtl8169_poll(struct napi_struct *napi, int budget) struct rtl8169_private *tp = container_of(napi, struct rtl8169_private, napi); struct net_device *dev = tp->dev; u16 enable_mask = RTL_EVENT_NAPI | tp->event_slow; - int work_done= 0; + int work_done; u16 status; status = rtl_get_events(tp); rtl_ack_events(tp, status & ~tp->event_slow); - if (status & RTL_EVENT_NAPI_RX) - work_done = rtl_rx(dev, tp, (u32) budget); + work_done = rtl_rx(dev, tp, (u32) budget); - if (status & RTL_EVENT_NAPI_TX) - rtl_tx(dev, tp); + rtl_tx(dev, tp); if (status & tp->event_slow) { enable_mask &= ~tp->event_slow; From fee37f15abcc273766186e13c2ff0338f14035cd Mon Sep 17 00:00:00 2001 From: Marcelo Ricardo Leitner Date: Tue, 16 Oct 2018 15:18:17 -0300 Subject: [PATCH 344/812] sctp: fix race on sctp_id2asoc [ Upstream commit b336decab22158937975293aea79396525f92bb3 ] syzbot reported an use-after-free involving sctp_id2asoc. Dmitry Vyukov helped to root cause it and it is because of reading the asoc after it was freed: CPU 1 CPU 2 (working on socket 1) (working on socket 2) sctp_association_destroy sctp_id2asoc spin lock grab the asoc from idr spin unlock spin lock remove asoc from idr spin unlock free(asoc) if asoc->base.sk != sk ... [*] This can only be hit if trying to fetch asocs from different sockets. As we have a single IDR for all asocs, in all SCTP sockets, their id is unique on the system. An application can try to send stuff on an id that matches on another socket, and the if in [*] will protect from such usage. But it didn't consider that as that asoc may belong to another socket, it may be freed in parallel (read: under another socket lock). We fix it by moving the checks in [*] into the protected region. This fixes it because the asoc cannot be freed while the lock is held. Reported-by: syzbot+c7dd55d7aec49d48e49a@syzkaller.appspotmail.com Acked-by: Dmitry Vyukov Signed-off-by: Marcelo Ricardo Leitner Acked-by: Neil Horman Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/sctp/socket.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 13c7f42b7040..53f1b33bca4e 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -248,11 +248,10 @@ struct sctp_association *sctp_id2assoc(struct sock *sk, sctp_assoc_t id) spin_lock_bh(&sctp_assocs_id_lock); asoc = (struct sctp_association *)idr_find(&sctp_assocs_id, (int)id); + if (asoc && (asoc->base.sk != sk || asoc->base.dead)) + asoc = NULL; spin_unlock_bh(&sctp_assocs_id_lock); - if (!asoc || (asoc->base.sk != sk) || asoc->base.dead) - return NULL; - return asoc; } From 8de8589c09dc78c5245b73d6b31a615a5aed52b6 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Thu, 1 Nov 2018 12:02:37 -0700 Subject: [PATCH 345/812] net: drop skb on failure in ip_check_defrag() [ Upstream commit 7de414a9dd91426318df7b63da024b2b07e53df5 ] Most callers of pskb_trim_rcsum() simply drop the skb when it fails, however, ip_check_defrag() still continues to pass the skb up to stack. This is suspicious. In ip_check_defrag(), after we learn the skb is an IP fragment, passing the skb to callers makes no sense, because callers expect fragments are defrag'ed on success. So, dropping the skb when we can't defrag it is reasonable. Note, prior to commit 88078d98d1bb, this is not a big problem as checksum will be fixed up anyway. After it, the checksum is not correct on failure. Found this during code review. Fixes: 88078d98d1bb ("net: pskb_trim_rcsum() and CHECKSUM_COMPLETE are friends") Cc: Eric Dumazet Signed-off-by: Cong Wang Reviewed-by: Eric Dumazet Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/ip_fragment.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index 7057a1b09b5e..72915658a6b1 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c @@ -716,10 +716,14 @@ struct sk_buff *ip_check_defrag(struct net *net, struct sk_buff *skb, u32 user) if (ip_is_fragment(&iph)) { skb = skb_share_check(skb, GFP_ATOMIC); if (skb) { - if (!pskb_may_pull(skb, netoff + iph.ihl * 4)) - return skb; - if (pskb_trim_rcsum(skb, netoff + len)) - return skb; + if (!pskb_may_pull(skb, netoff + iph.ihl * 4)) { + kfree_skb(skb); + return NULL; + } + if (pskb_trim_rcsum(skb, netoff + len)) { + kfree_skb(skb); + return NULL; + } memset(IPCB(skb), 0, sizeof(struct inet_skb_parm)); if (ip_defrag(net, skb, user)) return NULL; From 628a149b00d1eb16687e61f8d93676db3e49071c Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Tue, 30 Oct 2018 14:10:49 +0800 Subject: [PATCH 346/812] vhost: Fix Spectre V1 vulnerability [ Upstream commit ff002269a4ee9c769dbf9365acef633ebcbd6cbe ] The idx in vhost_vring_ioctl() was controlled by userspace, hence a potential exploitation of the Spectre variant 1 vulnerability. Fixing this by sanitizing idx before using it to index d->vqs. Cc: Michael S. Tsirkin Cc: Josh Poimboeuf Cc: Andrea Arcangeli Signed-off-by: Jason Wang Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/vhost/vhost.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c index 675819a1af37..c54d388310f0 100644 --- a/drivers/vhost/vhost.c +++ b/drivers/vhost/vhost.c @@ -27,6 +27,7 @@ #include #include #include +#include #include "vhost.h" @@ -748,6 +749,7 @@ long vhost_vring_ioctl(struct vhost_dev *d, int ioctl, void __user *argp) if (idx >= d->nvqs) return -ENOBUFS; + idx = array_index_nospec(idx, d->nvqs); vq = d->vqs[idx]; mutex_lock(&vq->mutex); From abd46fca025ccb8781303a4956e2bf031a7e8c01 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Mon, 29 Oct 2018 20:36:43 +0000 Subject: [PATCH 347/812] rtnetlink: Disallow FDB configuration for non-Ethernet device [ Upstream commit da71577545a52be3e0e9225a946e5fd79cfab015 ] When an FDB entry is configured, the address is validated to have the length of an Ethernet address, but the device for which the address is configured can be of any type. The above can result in the use of uninitialized memory when the address is later compared against existing addresses since 'dev->addr_len' is used and it may be greater than ETH_ALEN, as with ip6tnl devices. Fix this by making sure that FDB entries are only configured for Ethernet devices. BUG: KMSAN: uninit-value in memcmp+0x11d/0x180 lib/string.c:863 CPU: 1 PID: 4318 Comm: syz-executor998 Not tainted 4.19.0-rc3+ #49 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:77 [inline] dump_stack+0x14b/0x190 lib/dump_stack.c:113 kmsan_report+0x183/0x2b0 mm/kmsan/kmsan.c:956 __msan_warning+0x70/0xc0 mm/kmsan/kmsan_instr.c:645 memcmp+0x11d/0x180 lib/string.c:863 dev_uc_add_excl+0x165/0x7b0 net/core/dev_addr_lists.c:464 ndo_dflt_fdb_add net/core/rtnetlink.c:3463 [inline] rtnl_fdb_add+0x1081/0x1270 net/core/rtnetlink.c:3558 rtnetlink_rcv_msg+0xa0b/0x1530 net/core/rtnetlink.c:4715 netlink_rcv_skb+0x36e/0x5f0 net/netlink/af_netlink.c:2454 rtnetlink_rcv+0x50/0x60 net/core/rtnetlink.c:4733 netlink_unicast_kernel net/netlink/af_netlink.c:1317 [inline] netlink_unicast+0x1638/0x1720 net/netlink/af_netlink.c:1343 netlink_sendmsg+0x1205/0x1290 net/netlink/af_netlink.c:1908 sock_sendmsg_nosec net/socket.c:621 [inline] sock_sendmsg net/socket.c:631 [inline] ___sys_sendmsg+0xe70/0x1290 net/socket.c:2114 __sys_sendmsg net/socket.c:2152 [inline] __do_sys_sendmsg net/socket.c:2161 [inline] __se_sys_sendmsg+0x2a3/0x3d0 net/socket.c:2159 __x64_sys_sendmsg+0x4a/0x70 net/socket.c:2159 do_syscall_64+0xb8/0x100 arch/x86/entry/common.c:291 entry_SYSCALL_64_after_hwframe+0x63/0xe7 RIP: 0033:0x440ee9 Code: e8 cc ab 02 00 48 83 c4 18 c3 0f 1f 80 00 00 00 00 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 0f 83 bb 0a fc ff c3 66 2e 0f 1f 84 00 00 00 00 RSP: 002b:00007fff6a93b518 EFLAGS: 00000213 ORIG_RAX: 000000000000002e RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 0000000000440ee9 RDX: 0000000000000000 RSI: 0000000020000240 RDI: 0000000000000003 RBP: 0000000000000000 R08: 00000000004002c8 R09: 00000000004002c8 R10: 00000000004002c8 R11: 0000000000000213 R12: 000000000000b4b0 R13: 0000000000401ec0 R14: 0000000000000000 R15: 0000000000000000 Uninit was created at: kmsan_save_stack_with_flags mm/kmsan/kmsan.c:256 [inline] kmsan_internal_poison_shadow+0xb8/0x1b0 mm/kmsan/kmsan.c:181 kmsan_kmalloc+0x98/0x100 mm/kmsan/kmsan_hooks.c:91 kmsan_slab_alloc+0x10/0x20 mm/kmsan/kmsan_hooks.c:100 slab_post_alloc_hook mm/slab.h:446 [inline] slab_alloc_node mm/slub.c:2718 [inline] __kmalloc_node_track_caller+0x9e7/0x1160 mm/slub.c:4351 __kmalloc_reserve net/core/skbuff.c:138 [inline] __alloc_skb+0x2f5/0x9e0 net/core/skbuff.c:206 alloc_skb include/linux/skbuff.h:996 [inline] netlink_alloc_large_skb net/netlink/af_netlink.c:1189 [inline] netlink_sendmsg+0xb49/0x1290 net/netlink/af_netlink.c:1883 sock_sendmsg_nosec net/socket.c:621 [inline] sock_sendmsg net/socket.c:631 [inline] ___sys_sendmsg+0xe70/0x1290 net/socket.c:2114 __sys_sendmsg net/socket.c:2152 [inline] __do_sys_sendmsg net/socket.c:2161 [inline] __se_sys_sendmsg+0x2a3/0x3d0 net/socket.c:2159 __x64_sys_sendmsg+0x4a/0x70 net/socket.c:2159 do_syscall_64+0xb8/0x100 arch/x86/entry/common.c:291 entry_SYSCALL_64_after_hwframe+0x63/0xe7 v2: * Make error message more specific (David) Fixes: 090096bf3db1 ("net: generic fdb support for drivers without ndo_fdb_") Signed-off-by: Ido Schimmel Reported-and-tested-by: syzbot+3a288d5f5530b901310e@syzkaller.appspotmail.com Reported-and-tested-by: syzbot+d53ab4e92a1db04110ff@syzkaller.appspotmail.com Cc: Vlad Yasevich Cc: David Ahern Reviewed-by: David Ahern Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/core/rtnetlink.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index f1df04c7d395..d2a46ffe6382 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -2734,6 +2734,11 @@ static int rtnl_fdb_add(struct sk_buff *skb, struct nlmsghdr *nlh) return -EINVAL; } + if (dev->type != ARPHRD_ETHER) { + pr_info("PF_BRIDGE: FDB add only supported for Ethernet devices"); + return -EINVAL; + } + addr = nla_data(tb[NDA_LLADDR]); err = fdb_vid_parse(tb[NDA_VLAN], &vid); @@ -2836,6 +2841,11 @@ static int rtnl_fdb_del(struct sk_buff *skb, struct nlmsghdr *nlh) return -EINVAL; } + if (dev->type != ARPHRD_ETHER) { + pr_info("PF_BRIDGE: FDB delete only supported for Ethernet devices"); + return -EINVAL; + } + addr = nla_data(tb[NDA_LLADDR]); err = fdb_vid_parse(tb[NDA_VLAN], &vid); From 2e3ae534fb98c7a6a5cf3e80a190181154328f80 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Fri, 2 Nov 2018 13:22:42 +0000 Subject: [PATCH 348/812] mremap: properly flush TLB before releasing the page Commit eb66ae030829605d61fbef1909ce310e29f78821 upstream. This is a backport to stable 4.4.y. Jann Horn points out that our TLB flushing was subtly wrong for the mremap() case. What makes mremap() special is that we don't follow the usual "add page to list of pages to be freed, then flush tlb, and then free pages". No, mremap() obviously just _moves_ the page from one page table location to another. That matters, because mremap() thus doesn't directly control the lifetime of the moved page with a freelist: instead, the lifetime of the page is controlled by the page table locking, that serializes access to the entry. As a result, we need to flush the TLB not just before releasing the lock for the source location (to avoid any concurrent accesses to the entry), but also before we release the destination page table lock (to avoid the TLB being flushed after somebody else has already done something to that page). This also makes the whole "need_flush" logic unnecessary, since we now always end up flushing the TLB for every valid entry. Reported-and-tested-by: Jann Horn Acked-by: Will Deacon Tested-by: Ingo Molnar Acked-by: Peter Zijlstra (Intel) Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman [will: backport to 4.4 stable] Signed-off-by: Will Deacon Signed-off-by: Greg Kroah-Hartman --- mm/huge_memory.c | 6 +++++- mm/mremap.c | 21 ++++++++++++++++----- 2 files changed, 21 insertions(+), 6 deletions(-) diff --git a/mm/huge_memory.c b/mm/huge_memory.c index c4ea57ee2fd1..465786cd6490 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -1511,7 +1511,7 @@ int move_huge_pmd(struct vm_area_struct *vma, struct vm_area_struct *new_vma, spinlock_t *old_ptl, *new_ptl; int ret = 0; pmd_t pmd; - + bool force_flush = false; struct mm_struct *mm = vma->vm_mm; if ((old_addr & ~HPAGE_PMD_MASK) || @@ -1539,6 +1539,8 @@ int move_huge_pmd(struct vm_area_struct *vma, struct vm_area_struct *new_vma, if (new_ptl != old_ptl) spin_lock_nested(new_ptl, SINGLE_DEPTH_NESTING); pmd = pmdp_huge_get_and_clear(mm, old_addr, old_pmd); + if (pmd_present(pmd)) + force_flush = true; VM_BUG_ON(!pmd_none(*new_pmd)); if (pmd_move_must_withdraw(new_ptl, old_ptl)) { @@ -1547,6 +1549,8 @@ int move_huge_pmd(struct vm_area_struct *vma, struct vm_area_struct *new_vma, pgtable_trans_huge_deposit(mm, new_pmd, pgtable); } set_pmd_at(mm, new_addr, new_pmd, pmd_mksoft_dirty(pmd)); + if (force_flush) + flush_tlb_range(vma, old_addr, old_addr + PMD_SIZE); if (new_ptl != old_ptl) spin_unlock(new_ptl); spin_unlock(old_ptl); diff --git a/mm/mremap.c b/mm/mremap.c index fe7b7f65f4f4..450b306d473e 100644 --- a/mm/mremap.c +++ b/mm/mremap.c @@ -96,6 +96,8 @@ static void move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd, struct mm_struct *mm = vma->vm_mm; pte_t *old_pte, *new_pte, pte; spinlock_t *old_ptl, *new_ptl; + bool force_flush = false; + unsigned long len = old_end - old_addr; /* * When need_rmap_locks is true, we take the i_mmap_rwsem and anon_vma @@ -143,12 +145,26 @@ static void move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd, if (pte_none(*old_pte)) continue; pte = ptep_get_and_clear(mm, old_addr, old_pte); + /* + * If we are remapping a valid PTE, make sure + * to flush TLB before we drop the PTL for the PTE. + * + * NOTE! Both old and new PTL matter: the old one + * for racing with page_mkclean(), the new one to + * make sure the physical page stays valid until + * the TLB entry for the old mapping has been + * flushed. + */ + if (pte_present(pte)) + force_flush = true; pte = move_pte(pte, new_vma->vm_page_prot, old_addr, new_addr); pte = move_soft_dirty_pte(pte); set_pte_at(mm, new_addr, new_pte, pte); } arch_leave_lazy_mmu_mode(); + if (force_flush) + flush_tlb_range(vma, old_end - len, old_end); if (new_ptl != old_ptl) spin_unlock(new_ptl); pte_unmap(new_pte - 1); @@ -168,7 +184,6 @@ unsigned long move_page_tables(struct vm_area_struct *vma, { unsigned long extent, next, old_end; pmd_t *old_pmd, *new_pmd; - bool need_flush = false; unsigned long mmun_start; /* For mmu_notifiers */ unsigned long mmun_end; /* For mmu_notifiers */ @@ -207,7 +222,6 @@ unsigned long move_page_tables(struct vm_area_struct *vma, anon_vma_unlock_write(vma->anon_vma); } if (err > 0) { - need_flush = true; continue; } else if (!err) { split_huge_page_pmd(vma, old_addr, old_pmd); @@ -224,10 +238,7 @@ unsigned long move_page_tables(struct vm_area_struct *vma, extent = LATENCY_LIMIT; move_ptes(vma, old_pmd, old_addr, old_addr + extent, new_vma, new_pmd, new_addr, need_rmap_locks); - need_flush = true; } - if (likely(need_flush)) - flush_tlb_range(vma, old_end-len, old_addr); mmu_notifier_invalidate_range_end(vma->vm_mm, mmun_start, mmun_end); From 333de2f4131efde180eba58abe231f4ad35d8451 Mon Sep 17 00:00:00 2001 From: Jia-Ju Bai Date: Tue, 3 Oct 2017 10:25:22 +0800 Subject: [PATCH 349/812] crypto: shash - Fix a sleep-in-atomic bug in shash_setkey_unaligned [ Upstream commit 9039f3ef446e9ffa200200c934f049add9e58426 ] The SCTP program may sleep under a spinlock, and the function call path is: sctp_generate_t3_rtx_event (acquire the spinlock) sctp_do_sm sctp_side_effects sctp_cmd_interpreter sctp_make_init_ack sctp_pack_cookie crypto_shash_setkey shash_setkey_unaligned kmalloc(GFP_KERNEL) For the same reason, the orinoco driver may sleep in interrupt handler, and the function call path is: orinoco_rx_isr_tasklet orinoco_rx orinoco_mic crypto_shash_setkey shash_setkey_unaligned kmalloc(GFP_KERNEL) To fix it, GFP_KERNEL is replaced with GFP_ATOMIC. This bug is found by my static analysis tool and my code review. Signed-off-by: Jia-Ju Bai Signed-off-by: Herbert Xu Signed-off-by: Sasha Levin --- crypto/shash.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crypto/shash.c b/crypto/shash.c index 5444b429e35d..4f89f78031e2 100644 --- a/crypto/shash.c +++ b/crypto/shash.c @@ -41,7 +41,7 @@ static int shash_setkey_unaligned(struct crypto_shash *tfm, const u8 *key, int err; absize = keylen + (alignmask & ~(crypto_tfm_ctx_alignment() - 1)); - buffer = kmalloc(absize, GFP_KERNEL); + buffer = kmalloc(absize, GFP_ATOMIC); if (!buffer) return -ENOMEM; From f2adb1f6f9d691f8d518b6b44ea20618e636ffee Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Mon, 2 Oct 2017 19:31:24 +0100 Subject: [PATCH 350/812] ahci: don't ignore result code of ahci_reset_controller() [ Upstream commit d312fefea8387503375f728855c9a62de20c9665 ] ahci_pci_reset_controller() calls ahci_reset_controller(), which may fail, but ignores the result code and always returns success. This may result in failures like below ahci 0000:02:00.0: version 3.0 ahci 0000:02:00.0: enabling device (0000 -> 0003) ahci 0000:02:00.0: SSS flag set, parallel bus scan disabled ahci 0000:02:00.0: controller reset failed (0xffffffff) ahci 0000:02:00.0: failed to stop engine (-5) ... repeated many times ... ahci 0000:02:00.0: failed to stop engine (-5) Unable to handle kernel paging request at virtual address ffff0000093f9018 ... PC is at ahci_stop_engine+0x5c/0xd8 [libahci] LR is at ahci_deinit_port.constprop.12+0x1c/0xc0 [libahci] ... [] ahci_stop_engine+0x5c/0xd8 [libahci] [] ahci_deinit_port.constprop.12+0x1c/0xc0 [libahci] [] ahci_init_controller+0x80/0x168 [libahci] [] ahci_pci_init_controller+0x60/0x68 [ahci] [] ahci_init_one+0x75c/0xd88 [ahci] [] local_pci_probe+0x3c/0xb8 [] pci_device_probe+0x138/0x170 [] driver_probe_device+0x2dc/0x458 [] __driver_attach+0x114/0x118 [] bus_for_each_dev+0x60/0xa0 [] driver_attach+0x20/0x28 [] bus_add_driver+0x1f0/0x2a8 [] driver_register+0x60/0xf8 [] __pci_register_driver+0x3c/0x48 [] ahci_pci_driver_init+0x1c/0x1000 [ahci] [] do_one_initcall+0x38/0x120 where an obvious hardware level failure results in an unnecessary 15 second delay and a subsequent crash. So record the result code of ahci_reset_controller() and relay it, rather than ignoring it. Signed-off-by: Ard Biesheuvel Signed-off-by: Tejun Heo Signed-off-by: Sasha Levin --- drivers/ata/ahci.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/ata/ahci.c b/drivers/ata/ahci.c index 34fdaa6e99ba..5f1f049063dd 100644 --- a/drivers/ata/ahci.c +++ b/drivers/ata/ahci.c @@ -619,8 +619,11 @@ static void ahci_pci_save_initial_config(struct pci_dev *pdev, static int ahci_pci_reset_controller(struct ata_host *host) { struct pci_dev *pdev = to_pci_dev(host->dev); + int rc; - ahci_reset_controller(host); + rc = ahci_reset_controller(host); + if (rc) + return rc; if (pdev->vendor == PCI_VENDOR_ID_INTEL) { struct ahci_host_priv *hpriv = host->private_data; From d1ce094c3c90434844c6580be7290517762422d6 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 17 Oct 2018 15:23:26 +0100 Subject: [PATCH 351/812] cachefiles: fix the race between cachefiles_bury_object() and rmdir(2) commit 169b803397499be85bdd1e3d07d6f5e3d4bd669e upstream. the victim might've been rmdir'ed just before the lock_rename(); unlike the normal callers, we do not look the source up after the parents are locked - we know it beforehand and just recheck that it's still the child of what used to be its parent. Unfortunately, the check is too weak - we don't spot a dead directory since its ->d_parent is unchanged, dentry is positive, etc. So we sail all the way to ->rename(), with hosting filesystems _not_ expecting to be asked renaming an rmdir'ed subdirectory. The fix is easy, fortunately - the lock on parent is sufficient for making IS_DEADDIR() on child safe. Cc: stable@vger.kernel.org Fixes: 9ae326a69004 (CacheFiles: A cache that backs onto a mounted filesystem) Signed-off-by: Al Viro Signed-off-by: David Howells Signed-off-by: Greg Kroah-Hartman --- fs/cachefiles/namei.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/cachefiles/namei.c b/fs/cachefiles/namei.c index c43b4b08546b..a5f59eed8287 100644 --- a/fs/cachefiles/namei.c +++ b/fs/cachefiles/namei.c @@ -317,7 +317,7 @@ static int cachefiles_bury_object(struct cachefiles_cache *cache, trap = lock_rename(cache->graveyard, dir); /* do some checks before getting the grave dentry */ - if (rep->d_parent != dir) { + if (rep->d_parent != dir || IS_DEADDIR(d_inode(rep))) { /* the entry was probably culled when we dropped the parent dir * lock */ unlock_rename(cache->graveyard, dir); From 3700bfc36c3cef29533951d31b37b041628bb5b2 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Tue, 16 Oct 2018 15:06:41 +0200 Subject: [PATCH 352/812] ptp: fix Spectre v1 vulnerability commit efa61c8cf2950ab5c0e66cff3cabe2a2b24e81ba upstream. pin_index can be indirectly controlled by user-space, hence leading to a potential exploitation of the Spectre variant 1 vulnerability. This issue was detected with the help of Smatch: drivers/ptp/ptp_chardev.c:253 ptp_ioctl() warn: potential spectre issue 'ops->pin_config' [r] (local cap) Fix this by sanitizing pin_index before using it to index ops->pin_config, and before passing it as an argument to function ptp_set_pinfunc(), in which it is used to index info->pin_config. Notice that given that speculation windows are large, the policy is to kill the speculation on the first load and not worry if it can be completed with a dependent load/store [1]. [1] https://marc.info/?l=linux-kernel&m=152449131114778&w=2 Cc: stable@vger.kernel.org Signed-off-by: Gustavo A. R. Silva Acked-by: Richard Cochran Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/ptp/ptp_chardev.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/ptp/ptp_chardev.c b/drivers/ptp/ptp_chardev.c index d877ff124365..4eb254a273f8 100644 --- a/drivers/ptp/ptp_chardev.c +++ b/drivers/ptp/ptp_chardev.c @@ -23,6 +23,8 @@ #include #include +#include + #include "ptp_private.h" static int ptp_disable_pinfunc(struct ptp_clock_info *ops, @@ -224,6 +226,7 @@ long ptp_ioctl(struct posix_clock *pc, unsigned int cmd, unsigned long arg) err = -EINVAL; break; } + pin_index = array_index_nospec(pin_index, ops->n_pins); if (mutex_lock_interruptible(&ptp->pincfg_mux)) return -ERESTARTSYS; pd = ops->pin_config[pin_index]; @@ -242,6 +245,7 @@ long ptp_ioctl(struct posix_clock *pc, unsigned int cmd, unsigned long arg) err = -EINVAL; break; } + pin_index = array_index_nospec(pin_index, ops->n_pins); if (mutex_lock_interruptible(&ptp->pincfg_mux)) return -ERESTARTSYS; err = ptp_set_pinfunc(ptp, pin_index, pd.func, pd.chan); From 6ede39a8e1f64babac7e50e989f8b6535e7e7dde Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Tue, 16 Oct 2018 16:59:01 +0200 Subject: [PATCH 353/812] RDMA/ucma: Fix Spectre v1 vulnerability commit a3671a4f973ee9d9621d60166cc3b037c397d604 upstream. hdr.cmd can be indirectly controlled by user-space, hence leading to a potential exploitation of the Spectre variant 1 vulnerability. This issue was detected with the help of Smatch: drivers/infiniband/core/ucma.c:1686 ucma_write() warn: potential spectre issue 'ucma_cmd_table' [r] (local cap) Fix this by sanitizing hdr.cmd before using it to index ucm_cmd_table. Notice that given that speculation windows are large, the policy is to kill the speculation on the first load and not worry if it can be completed with a dependent load/store [1]. [1] https://marc.info/?l=linux-kernel&m=152449131114778&w=2 Cc: stable@vger.kernel.org Signed-off-by: Gustavo A. R. Silva Signed-off-by: Doug Ledford Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/core/ucma.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/infiniband/core/ucma.c b/drivers/infiniband/core/ucma.c index 7525e9f6949e..3e4d3d5560bf 100644 --- a/drivers/infiniband/core/ucma.c +++ b/drivers/infiniband/core/ucma.c @@ -44,6 +44,8 @@ #include #include +#include + #include #include #include @@ -1627,6 +1629,7 @@ static ssize_t ucma_write(struct file *filp, const char __user *buf, if (hdr.cmd >= ARRAY_SIZE(ucma_cmd_table)) return -EINVAL; + hdr.cmd = array_index_nospec(hdr.cmd, ARRAY_SIZE(ucma_cmd_table)); if (hdr.in + sizeof(hdr) > len) return -EINVAL; From 17eb02cc26926b879015590a86380e431048057e Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Tue, 16 Oct 2018 16:32:40 +0200 Subject: [PATCH 354/812] IB/ucm: Fix Spectre v1 vulnerability commit 0295e39595e1146522f2722715dba7f7fba42217 upstream. hdr.cmd can be indirectly controlled by user-space, hence leading to a potential exploitation of the Spectre variant 1 vulnerability. This issue was detected with the help of Smatch: drivers/infiniband/core/ucm.c:1127 ib_ucm_write() warn: potential spectre issue 'ucm_cmd_table' [r] (local cap) Fix this by sanitizing hdr.cmd before using it to index ucm_cmd_table. Notice that given that speculation windows are large, the policy is to kill the speculation on the first load and not worry if it can be completed with a dependent load/store [1]. [1] https://marc.info/?l=linux-kernel&m=152449131114778&w=2 Cc: stable@vger.kernel.org Signed-off-by: Gustavo A. R. Silva Signed-off-by: Doug Ledford Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/core/ucm.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/infiniband/core/ucm.c b/drivers/infiniband/core/ucm.c index 564adf3116e8..4b3a00855f52 100644 --- a/drivers/infiniband/core/ucm.c +++ b/drivers/infiniband/core/ucm.c @@ -46,6 +46,8 @@ #include #include +#include + #include #include @@ -1115,6 +1117,7 @@ static ssize_t ib_ucm_write(struct file *filp, const char __user *buf, if (hdr.cmd >= ARRAY_SIZE(ucm_cmd_table)) return -EINVAL; + hdr.cmd = array_index_nospec(hdr.cmd, ARRAY_SIZE(ucm_cmd_table)); if (hdr.in + sizeof(hdr) > len) return -EINVAL; From 17275e092a0126211eec3114d64c639feee7cacd Mon Sep 17 00:00:00 2001 From: Tobias Herzog Date: Sat, 22 Sep 2018 22:11:11 +0200 Subject: [PATCH 355/812] cdc-acm: correct counting of UART states in serial state notification commit f976d0e5747ca65ccd0fb2a4118b193d70aa1836 upstream. The usb standard ("Universal Serial Bus Class Definitions for Communication Devices") distiguishes between "consistent signals" (DSR, DCD), and "irregular signals" (break, ring, parity error, framing error, overrun). The bits of "irregular signals" are set, if this error/event occurred on the device side and are immeadeatly unset, if the serial state notification was sent. Like other drivers of real serial ports do, just the occurence of those events should be counted in serial_icounter_struct (but no 1->0 transitions). Signed-off-by: Tobias Herzog Acked-by: Oliver Neukum Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/usb/class/cdc-acm.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/usb/class/cdc-acm.c b/drivers/usb/class/cdc-acm.c index a501f3ba6a3f..3cbf6aa10f2c 100644 --- a/drivers/usb/class/cdc-acm.c +++ b/drivers/usb/class/cdc-acm.c @@ -332,17 +332,17 @@ static void acm_ctrl_irq(struct urb *urb) if (difference & ACM_CTRL_DSR) acm->iocount.dsr++; - if (difference & ACM_CTRL_BRK) - acm->iocount.brk++; - if (difference & ACM_CTRL_RI) - acm->iocount.rng++; if (difference & ACM_CTRL_DCD) acm->iocount.dcd++; - if (difference & ACM_CTRL_FRAMING) + if (newctrl & ACM_CTRL_BRK) + acm->iocount.brk++; + if (newctrl & ACM_CTRL_RI) + acm->iocount.rng++; + if (newctrl & ACM_CTRL_FRAMING) acm->iocount.frame++; - if (difference & ACM_CTRL_PARITY) + if (newctrl & ACM_CTRL_PARITY) acm->iocount.parity++; - if (difference & ACM_CTRL_OVERRUN) + if (newctrl & ACM_CTRL_OVERRUN) acm->iocount.overrun++; spin_unlock(&acm->read_lock); From 87f8db65811b5bdb7686d0e53fab486d9fc2fff8 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Tue, 16 Oct 2018 12:16:45 +0200 Subject: [PATCH 356/812] usb: gadget: storage: Fix Spectre v1 vulnerability commit 9ae24af3669111d418242caec8dd4ebd9ba26860 upstream. num can be indirectly controlled by user-space, hence leading to a potential exploitation of the Spectre variant 1 vulnerability. This issue was detected with the help of Smatch: drivers/usb/gadget/function/f_mass_storage.c:3177 fsg_lun_make() warn: potential spectre issue 'fsg_opts->common->luns' [r] (local cap) Fix this by sanitizing num before using it to index fsg_opts->common->luns Notice that given that speculation windows are large, the policy is to kill the speculation on the first load and not worry if it can be completed with a dependent load/store [1]. [1] https://marc.info/?l=linux-kernel&m=152449131114778&w=2 Cc: stable@vger.kernel.org Signed-off-by: Gustavo A. R. Silva Acked-by: Felipe Balbi Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/function/f_mass_storage.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/usb/gadget/function/f_mass_storage.c b/drivers/usb/gadget/function/f_mass_storage.c index 4dd3c7672247..25488c89308a 100644 --- a/drivers/usb/gadget/function/f_mass_storage.c +++ b/drivers/usb/gadget/function/f_mass_storage.c @@ -220,6 +220,8 @@ #include #include +#include + #include "configfs.h" @@ -3260,6 +3262,7 @@ static struct config_group *fsg_lun_make(struct config_group *group, fsg_opts = to_fsg_opts(&group->cg_item); if (num >= FSG_MAX_LUNS) return ERR_PTR(-ERANGE); + num = array_index_nospec(num, FSG_MAX_LUNS); mutex_lock(&fsg_opts->lock); if (fsg_opts->refcnt || fsg_opts->common->luns[num]) { From 506617d92fc31de7e1a769da660924d42097210f Mon Sep 17 00:00:00 2001 From: Alan Stern Date: Mon, 15 Oct 2018 16:55:04 -0400 Subject: [PATCH 357/812] USB: fix the usbfs flag sanitization for control transfers commit 665c365a77fbfeabe52694aedf3446d5f2f1ce42 upstream. Commit 7a68d9fb8510 ("USB: usbdevfs: sanitize flags more") checks the transfer flags for URBs submitted from userspace via usbfs. However, the check for whether the USBDEVFS_URB_SHORT_NOT_OK flag should be allowed for a control transfer was added in the wrong place, before the code has properly determined the direction of the control transfer. (Control transfers are special because for them, the direction is set by the bRequestType byte of the Setup packet rather than direction bit of the endpoint address.) This patch moves code which sets up the allow_short flag for control transfers down after is_in has been set to the correct value. Signed-off-by: Alan Stern Reported-and-tested-by: syzbot+24a30223a4b609bb802e@syzkaller.appspotmail.com Fixes: 7a68d9fb8510 ("USB: usbdevfs: sanitize flags more") CC: Oliver Neukum CC: Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/devio.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/usb/core/devio.c b/drivers/usb/core/devio.c index 5e0af15aebc4..7559d96695da 100644 --- a/drivers/usb/core/devio.c +++ b/drivers/usb/core/devio.c @@ -1329,8 +1329,6 @@ static int proc_do_submiturb(struct usb_dev_state *ps, struct usbdevfs_urb *uurb u = 0; switch (uurb->type) { case USBDEVFS_URB_TYPE_CONTROL: - if (is_in) - allow_short = true; if (!usb_endpoint_xfer_control(&ep->desc)) return -EINVAL; /* min 8 byte setup packet */ @@ -1360,6 +1358,8 @@ static int proc_do_submiturb(struct usb_dev_state *ps, struct usbdevfs_urb *uurb is_in = 0; uurb->endpoint &= ~USB_DIR_IN; } + if (is_in) + allow_short = true; snoop(&ps->dev->dev, "control urb: bRequestType=%02x " "bRequest=%02x wValue=%04x " "wIndex=%04x wLength=%04x\n", From c057f7581524a8d845e6a102e1b75faa0dac2bd6 Mon Sep 17 00:00:00 2001 From: Mikhail Nikiforov Date: Mon, 15 Oct 2018 11:17:56 -0700 Subject: [PATCH 358/812] Input: elan_i2c - add ACPI ID for Lenovo IdeaPad 330-15IGM commit 13c1c5e4d7f887cba36c5e3df3faa22071c1469f upstream. Add ELAN061C to the ACPI table to support Elan touchpad found in Lenovo IdeaPad 330-15IGM. Signed-off-by: Mikhail Nikiforov Cc: stable@vger.kernel.org Signed-off-by: Dmitry Torokhov Signed-off-by: Greg Kroah-Hartman --- drivers/input/mouse/elan_i2c_core.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/input/mouse/elan_i2c_core.c b/drivers/input/mouse/elan_i2c_core.c index a716482774db..b3119589a444 100644 --- a/drivers/input/mouse/elan_i2c_core.c +++ b/drivers/input/mouse/elan_i2c_core.c @@ -1251,6 +1251,7 @@ static const struct acpi_device_id elan_acpi_id[] = { { "ELAN0611", 0 }, { "ELAN0612", 0 }, { "ELAN0618", 0 }, + { "ELAN061C", 0 }, { "ELAN061D", 0 }, { "ELAN0622", 0 }, { "ELAN1000", 0 }, From 4a2b54a750bf18be02997944464396485b7ef284 Mon Sep 17 00:00:00 2001 From: Phil Auld Date: Mon, 8 Oct 2018 10:36:40 -0400 Subject: [PATCH 359/812] sched/fair: Fix throttle_list starvation with low CFS quota commit baa9be4ffb55876923dc9716abc0a448e510ba30 upstream. With a very low cpu.cfs_quota_us setting, such as the minimum of 1000, distribute_cfs_runtime may not empty the throttled_list before it runs out of runtime to distribute. In that case, due to the change from c06f04c7048 to put throttled entries at the head of the list, later entries on the list will starve. Essentially, the same X processes will get pulled off the list, given CPU time and then, when expired, get put back on the head of the list where distribute_cfs_runtime will give runtime to the same set of processes leaving the rest. Fix the issue by setting a bit in struct cfs_bandwidth when distribute_cfs_runtime is running, so that the code in throttle_cfs_rq can decide to put the throttled entry on the tail or the head of the list. The bit is set/cleared by the callers of distribute_cfs_runtime while they hold cfs_bandwidth->lock. This is easy to reproduce with a handful of CPU consumers. I use 'crash' on the live system. In some cases you can simply look at the throttled list and see the later entries are not changing: crash> list cfs_rq.throttled_list -H 0xffff90b54f6ade40 -s cfs_rq.runtime_remaining | paste - - | awk '{print $1" "$4}' | pr -t -n3 1 ffff90b56cb2d200 -976050 2 ffff90b56cb2cc00 -484925 3 ffff90b56cb2bc00 -658814 4 ffff90b56cb2ba00 -275365 5 ffff90b166a45600 -135138 6 ffff90b56cb2da00 -282505 7 ffff90b56cb2e000 -148065 8 ffff90b56cb2fa00 -872591 9 ffff90b56cb2c000 -84687 10 ffff90b56cb2f000 -87237 11 ffff90b166a40a00 -164582 crash> list cfs_rq.throttled_list -H 0xffff90b54f6ade40 -s cfs_rq.runtime_remaining | paste - - | awk '{print $1" "$4}' | pr -t -n3 1 ffff90b56cb2d200 -994147 2 ffff90b56cb2cc00 -306051 3 ffff90b56cb2bc00 -961321 4 ffff90b56cb2ba00 -24490 5 ffff90b166a45600 -135138 6 ffff90b56cb2da00 -282505 7 ffff90b56cb2e000 -148065 8 ffff90b56cb2fa00 -872591 9 ffff90b56cb2c000 -84687 10 ffff90b56cb2f000 -87237 11 ffff90b166a40a00 -164582 Sometimes it is easier to see by finding a process getting starved and looking at the sched_info: crash> task ffff8eb765994500 sched_info PID: 7800 TASK: ffff8eb765994500 CPU: 16 COMMAND: "cputest" sched_info = { pcount = 8, run_delay = 697094208, last_arrival = 240260125039, last_queued = 240260327513 }, crash> task ffff8eb765994500 sched_info PID: 7800 TASK: ffff8eb765994500 CPU: 16 COMMAND: "cputest" sched_info = { pcount = 8, run_delay = 697094208, last_arrival = 240260125039, last_queued = 240260327513 }, Signed-off-by: Phil Auld Reviewed-by: Ben Segall Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: stable@vger.kernel.org Fixes: c06f04c70489 ("sched: Fix potential near-infinite distribute_cfs_runtime() loop") Link: http://lkml.kernel.org/r/20181008143639.GA4019@pauld.bos.csb Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- kernel/sched/fair.c | 22 +++++++++++++++++++--- kernel/sched/sched.h | 2 ++ 2 files changed, 21 insertions(+), 3 deletions(-) diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index a0c5bb93a3ab..c2af250547bb 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -3624,9 +3624,13 @@ static void throttle_cfs_rq(struct cfs_rq *cfs_rq) /* * Add to the _head_ of the list, so that an already-started - * distribute_cfs_runtime will not see us + * distribute_cfs_runtime will not see us. If disribute_cfs_runtime is + * not running add to the tail so that later runqueues don't get starved. */ - list_add_rcu(&cfs_rq->throttled_list, &cfs_b->throttled_cfs_rq); + if (cfs_b->distribute_running) + list_add_rcu(&cfs_rq->throttled_list, &cfs_b->throttled_cfs_rq); + else + list_add_tail_rcu(&cfs_rq->throttled_list, &cfs_b->throttled_cfs_rq); /* * If we're the first throttled task, make sure the bandwidth @@ -3769,14 +3773,16 @@ static int do_sched_cfs_period_timer(struct cfs_bandwidth *cfs_b, int overrun) * in us over-using our runtime if it is all used during this loop, but * only by limited amounts in that extreme case. */ - while (throttled && cfs_b->runtime > 0) { + while (throttled && cfs_b->runtime > 0 && !cfs_b->distribute_running) { runtime = cfs_b->runtime; + cfs_b->distribute_running = 1; raw_spin_unlock(&cfs_b->lock); /* we can't nest cfs_b->lock while distributing bandwidth */ runtime = distribute_cfs_runtime(cfs_b, runtime, runtime_expires); raw_spin_lock(&cfs_b->lock); + cfs_b->distribute_running = 0; throttled = !list_empty(&cfs_b->throttled_cfs_rq); cfs_b->runtime -= min(runtime, cfs_b->runtime); @@ -3887,6 +3893,11 @@ static void do_sched_cfs_slack_timer(struct cfs_bandwidth *cfs_b) /* confirm we're still not at a refresh boundary */ raw_spin_lock(&cfs_b->lock); + if (cfs_b->distribute_running) { + raw_spin_unlock(&cfs_b->lock); + return; + } + if (runtime_refresh_within(cfs_b, min_bandwidth_expiration)) { raw_spin_unlock(&cfs_b->lock); return; @@ -3896,6 +3907,9 @@ static void do_sched_cfs_slack_timer(struct cfs_bandwidth *cfs_b) runtime = cfs_b->runtime; expires = cfs_b->runtime_expires; + if (runtime) + cfs_b->distribute_running = 1; + raw_spin_unlock(&cfs_b->lock); if (!runtime) @@ -3906,6 +3920,7 @@ static void do_sched_cfs_slack_timer(struct cfs_bandwidth *cfs_b) raw_spin_lock(&cfs_b->lock); if (expires == cfs_b->runtime_expires) cfs_b->runtime -= min(runtime, cfs_b->runtime); + cfs_b->distribute_running = 0; raw_spin_unlock(&cfs_b->lock); } @@ -4017,6 +4032,7 @@ void init_cfs_bandwidth(struct cfs_bandwidth *cfs_b) cfs_b->period_timer.function = sched_cfs_period_timer; hrtimer_init(&cfs_b->slack_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); cfs_b->slack_timer.function = sched_cfs_slack_timer; + cfs_b->distribute_running = 0; } static void init_cfs_rq_runtime(struct cfs_rq *cfs_rq) diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index af8d8c3eb8ab..6893ee31df4d 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -233,6 +233,8 @@ struct cfs_bandwidth { /* statistics */ int nr_periods, nr_throttled; u64 throttled_time; + + bool distribute_running; #endif }; From 74ede0af32f565ac5b7222c8722cc7da6951bef4 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 11 Oct 2018 12:38:27 +0200 Subject: [PATCH 360/812] x86/percpu: Fix this_cpu_read() commit b59167ac7bafd804c91e49ad53c6d33a7394d4c8 upstream. Eric reported that a sequence count loop using this_cpu_read() got optimized out. This is wrong, this_cpu_read() must imply READ_ONCE() because the interface is IRQ-safe, therefore an interrupt can have changed the per-cpu value. Fixes: 7c3576d261ce ("[PATCH] i386: Convert PDA into the percpu section") Reported-by: Eric Dumazet Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Thomas Gleixner Acked-by: Eric Dumazet Cc: hpa@zytor.com Cc: eric.dumazet@gmail.com Cc: bp@alien8.de Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/20181011104019.748208519@infradead.org Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/percpu.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h index e0ba66ca68c6..f5e780bfa2b3 100644 --- a/arch/x86/include/asm/percpu.h +++ b/arch/x86/include/asm/percpu.h @@ -184,22 +184,22 @@ do { \ typeof(var) pfo_ret__; \ switch (sizeof(var)) { \ case 1: \ - asm(op "b "__percpu_arg(1)",%0" \ + asm volatile(op "b "__percpu_arg(1)",%0"\ : "=q" (pfo_ret__) \ : "m" (var)); \ break; \ case 2: \ - asm(op "w "__percpu_arg(1)",%0" \ + asm volatile(op "w "__percpu_arg(1)",%0"\ : "=r" (pfo_ret__) \ : "m" (var)); \ break; \ case 4: \ - asm(op "l "__percpu_arg(1)",%0" \ + asm volatile(op "l "__percpu_arg(1)",%0"\ : "=r" (pfo_ret__) \ : "m" (var)); \ break; \ case 8: \ - asm(op "q "__percpu_arg(1)",%0" \ + asm volatile(op "q "__percpu_arg(1)",%0"\ : "=r" (pfo_ret__) \ : "m" (var)); \ break; \ From 8a1d3de19b0afbf069baf704afecfbeba9059fce Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Wed, 1 Jun 2016 18:52:16 +0100 Subject: [PATCH 361/812] cpuidle: Do not access cpuidle_devices when !CONFIG_CPU_IDLE commit 9bd616e3dbedfc103f158197c8ad93678849b1ed upstream. The cpuidle_devices per-CPU variable is only defined when CPU_IDLE is enabled. Commit c8cc7d4de7a4 ("sched/idle: Reorganize the idle loop") removed the #ifdef CONFIG_CPU_IDLE around cpuidle_idle_call() with the compiler optimising away __this_cpu_read(cpuidle_devices). However, with CONFIG_UBSAN && !CONFIG_CPU_IDLE, this optimisation no longer happens and the kernel fails to link since cpuidle_devices is not defined. This patch introduces an accessor function for the current CPU cpuidle device (returning NULL when !CONFIG_CPU_IDLE) and uses it in cpuidle_idle_call(). Signed-off-by: Catalin Marinas Cc: 4.5+ # 4.5+ Signed-off-by: Rafael J. Wysocki Cc: Guenter Roeck Signed-off-by: Greg Kroah-Hartman --- include/linux/cpuidle.h | 3 +++ kernel/sched/idle.c | 2 +- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/include/linux/cpuidle.h b/include/linux/cpuidle.h index 786ad32631a6..07b83d32f66c 100644 --- a/include/linux/cpuidle.h +++ b/include/linux/cpuidle.h @@ -152,6 +152,8 @@ extern void cpuidle_disable_device(struct cpuidle_device *dev); extern int cpuidle_play_dead(void); extern struct cpuidle_driver *cpuidle_get_cpu_driver(struct cpuidle_device *dev); +static inline struct cpuidle_device *cpuidle_get_device(void) +{return __this_cpu_read(cpuidle_devices); } #else static inline void disable_cpuidle(void) { } static inline bool cpuidle_not_available(struct cpuidle_driver *drv, @@ -187,6 +189,7 @@ static inline void cpuidle_disable_device(struct cpuidle_device *dev) { } static inline int cpuidle_play_dead(void) {return -ENODEV; } static inline struct cpuidle_driver *cpuidle_get_cpu_driver( struct cpuidle_device *dev) {return NULL; } +static inline struct cpuidle_device *cpuidle_get_device(void) {return NULL; } #endif #if defined(CONFIG_CPU_IDLE) && defined(CONFIG_SUSPEND) diff --git a/kernel/sched/idle.c b/kernel/sched/idle.c index 4a2ef5a02fd3..bfd573122e0d 100644 --- a/kernel/sched/idle.c +++ b/kernel/sched/idle.c @@ -132,7 +132,7 @@ static int call_cpuidle(struct cpuidle_driver *drv, struct cpuidle_device *dev, */ static void cpuidle_idle_call(void) { - struct cpuidle_device *dev = __this_cpu_read(cpuidle_devices); + struct cpuidle_device *dev = cpuidle_get_device(); struct cpuidle_driver *drv = cpuidle_get_cpu_driver(dev); int next_state, entered_state; From 80ab1e24e2c03585fe4883aaf591cfe07da96edd Mon Sep 17 00:00:00 2001 From: Guillaume Nault Date: Wed, 29 Mar 2017 08:44:59 +0200 Subject: [PATCH 362/812] l2tp: hold tunnel socket when handling control frames in l2tp_ip and l2tp_ip6 commit 94d7ee0baa8b764cf64ad91ed69464c1a6a0066b upstream. The code following l2tp_tunnel_find() expects that a new reference is held on sk. Either sk_receive_skb() or the discard_put error path will drop a reference from the tunnel's socket. This issue exists in both l2tp_ip and l2tp_ip6. Fixes: a3c18422a4b4 ("l2tp: hold socket before dropping lock in l2tp_ip{, 6}_recv()") Signed-off-by: Guillaume Nault Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/l2tp/l2tp_ip.c | 5 +++-- net/l2tp/l2tp_ip6.c | 5 +++-- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/net/l2tp/l2tp_ip.c b/net/l2tp/l2tp_ip.c index c7e6098c924e..af74e3ba0f92 100644 --- a/net/l2tp/l2tp_ip.c +++ b/net/l2tp/l2tp_ip.c @@ -177,9 +177,10 @@ static int l2tp_ip_recv(struct sk_buff *skb) tunnel_id = ntohl(*(__be32 *) &skb->data[4]); tunnel = l2tp_tunnel_find(net, tunnel_id); - if (tunnel != NULL) + if (tunnel) { sk = tunnel->sock; - else { + sock_hold(sk); + } else { struct iphdr *iph = (struct iphdr *) skb_network_header(skb); read_lock_bh(&l2tp_ip_lock); diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c index 5fe0a6f6af3d..591d308bf63a 100644 --- a/net/l2tp/l2tp_ip6.c +++ b/net/l2tp/l2tp_ip6.c @@ -188,9 +188,10 @@ static int l2tp_ip6_recv(struct sk_buff *skb) tunnel_id = ntohl(*(__be32 *) &skb->data[4]); tunnel = l2tp_tunnel_find(&init_net, tunnel_id); - if (tunnel != NULL) + if (tunnel) { sk = tunnel->sock; - else { + sock_hold(sk); + } else { struct ipv6hdr *iph = ipv6_hdr(skb); read_lock_bh(&l2tp_ip6_lock); From 8474c9b8cc8b7f29c719d26f4fbca5cb957634fc Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Fri, 12 Oct 2018 17:53:12 -0700 Subject: [PATCH 363/812] x86/time: Correct the attribute on jiffies' definition commit 53c13ba8ed39e89f21a0b98f4c8a241bb44e483d upstream. Clang warns that the declaration of jiffies in include/linux/jiffies.h doesn't match the definition in arch/x86/time/kernel.c: arch/x86/kernel/time.c:29:42: warning: section does not match previous declaration [-Wsection] __visible volatile unsigned long jiffies __cacheline_aligned = INITIAL_JIFFIES; ^ ./include/linux/cache.h:49:4: note: expanded from macro '__cacheline_aligned' __section__(".data..cacheline_aligned"))) ^ ./include/linux/jiffies.h:81:31: note: previous attribute is here extern unsigned long volatile __cacheline_aligned_in_smp __jiffy_arch_data jiffies; ^ ./arch/x86/include/asm/cache.h:20:2: note: expanded from macro '__cacheline_aligned_in_smp' __page_aligned_data ^ ./include/linux/linkage.h:39:29: note: expanded from macro '__page_aligned_data' #define __page_aligned_data __section(.data..page_aligned) __aligned(PAGE_SIZE) ^ ./include/linux/compiler_attributes.h:233:56: note: expanded from macro '__section' #define __section(S) __attribute__((__section__(#S))) ^ 1 warning generated. The declaration was changed in commit 7c30f352c852 ("jiffies.h: declare jiffies and jiffies_64 with ____cacheline_aligned_in_smp") but wasn't updated here. Make them match so Clang no longer warns. Fixes: 7c30f352c852 ("jiffies.h: declare jiffies and jiffies_64 with ____cacheline_aligned_in_smp") Signed-off-by: Nathan Chancellor Signed-off-by: Thomas Gleixner Cc: Borislav Petkov Cc: "H. Peter Anvin" Cc: Nick Desaulniers Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/20181013005311.28617-1-natechancellor@gmail.com Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/time.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kernel/time.c b/arch/x86/kernel/time.c index d39c09119db6..590c8fd2ed9b 100644 --- a/arch/x86/kernel/time.c +++ b/arch/x86/kernel/time.c @@ -23,7 +23,7 @@ #include #ifdef CONFIG_X86_64 -__visible volatile unsigned long jiffies __cacheline_aligned = INITIAL_JIFFIES; +__visible volatile unsigned long jiffies __cacheline_aligned_in_smp = INITIAL_JIFFIES; #endif unsigned long profile_pc(struct pt_regs *regs) From 7a4269707deb6ab22d488eb1a9eedae3ef88abc5 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Sat, 10 Nov 2018 07:41:44 -0800 Subject: [PATCH 364/812] Linux 4.4.163 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 00ff2dd68ff1..4e3179768eea 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,6 @@ VERSION = 4 PATCHLEVEL = 4 -SUBLEVEL = 162 +SUBLEVEL = 163 EXTRAVERSION = NAME = Blurry Fish Butt From 9b359dd99ce4129e0048b4f8d4e1285877144c3f Mon Sep 17 00:00:00 2001 From: Tang Junhui Date: Mon, 8 Oct 2018 20:41:14 +0800 Subject: [PATCH 365/812] bcache: fix miss key refill->end in writeback commit 2d6cb6edd2c7fb4f40998895bda45006281b1ac5 upstream. refill->end record the last key of writeback, for example, at the first time, keys (1,128K) to (1,1024K) are flush to the backend device, but the end key (1,1024K) is not included, since the bellow code: if (bkey_cmp(k, refill->end) >= 0) { ret = MAP_DONE; goto out; } And in the next time when we refill writeback keybuf again, we searched key start from (1,1024K), and got a key bigger than it, so the key (1,1024K) missed. This patch modify the above code, and let the end key to be included to the writeback key buffer. Signed-off-by: Tang Junhui Cc: stable@vger.kernel.org Signed-off-by: Coly Li Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman --- drivers/md/bcache/btree.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c index 4ed621ad27e4..05aa3ac1381b 100644 --- a/drivers/md/bcache/btree.c +++ b/drivers/md/bcache/btree.c @@ -2372,7 +2372,7 @@ static int refill_keybuf_fn(struct btree_op *op, struct btree *b, struct keybuf *buf = refill->buf; int ret = MAP_CONTINUE; - if (bkey_cmp(k, refill->end) >= 0) { + if (bkey_cmp(k, refill->end) > 0) { ret = MAP_DONE; goto out; } From dff53cf71a7300d2229f8ccb82834156f00a4ea2 Mon Sep 17 00:00:00 2001 From: Dmitry Bazhenov Date: Mon, 15 Oct 2018 14:21:22 +0500 Subject: [PATCH 366/812] hwmon: (pmbus) Fix page count auto-detection. commit e7c6a55606b5c46b449d76588968b4d8caae903f upstream. Devices with compatible="pmbus" field have zero initial page count, and pmbus_clear_faults() being called before the page count auto- detection does not actually clear faults because it depends on the page count. Non-cleared faults in its turn may fail the subsequent page count auto-detection. This patch fixes this problem by calling pmbus_clear_fault_page() for currently set page and calling pmbus_clear_faults() after the page count was detected. Cc: stable@vger.kernel.org Signed-off-by: Dmitry Bazhenov Signed-off-by: Guenter Roeck Signed-off-by: Greg Kroah-Hartman --- drivers/hwmon/pmbus/pmbus.c | 2 ++ drivers/hwmon/pmbus/pmbus_core.c | 5 ++++- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/hwmon/pmbus/pmbus.c b/drivers/hwmon/pmbus/pmbus.c index 0a74991a60f0..1b2b79f6ea3a 100644 --- a/drivers/hwmon/pmbus/pmbus.c +++ b/drivers/hwmon/pmbus/pmbus.c @@ -117,6 +117,8 @@ static int pmbus_identify(struct i2c_client *client, } else { info->pages = 1; } + + pmbus_clear_faults(client); } if (pmbus_check_byte_register(client, 0, PMBUS_VOUT_MODE)) { diff --git a/drivers/hwmon/pmbus/pmbus_core.c b/drivers/hwmon/pmbus/pmbus_core.c index d013acf3f83a..c00bad02761a 100644 --- a/drivers/hwmon/pmbus/pmbus_core.c +++ b/drivers/hwmon/pmbus/pmbus_core.c @@ -1759,7 +1759,10 @@ static int pmbus_init_common(struct i2c_client *client, struct pmbus_data *data, if (ret >= 0 && (ret & PB_CAPABILITY_ERROR_CHECK)) client->flags |= I2C_CLIENT_PEC; - pmbus_clear_faults(client); + if (data->info->pages) + pmbus_clear_faults(client); + else + pmbus_clear_fault_page(client, -1); if (info->identify) { ret = (*info->identify)(client, info); From 85b89ccf86cececb3e562b071d078a3cf50a54cc Mon Sep 17 00:00:00 2001 From: Hou Tao Date: Sat, 6 Oct 2018 17:09:35 +0800 Subject: [PATCH 367/812] jffs2: free jffs2_sb_info through jffs2_kill_sb() commit 92e2921f7eee63450a5f953f4b15dc6210219430 upstream. When an invalid mount option is passed to jffs2, jffs2_parse_options() will fail and jffs2_sb_info will be freed, but then jffs2_sb_info will be used (use-after-free) and freeed (double-free) in jffs2_kill_sb(). Fix it by removing the buggy invocation of kfree() when getting invalid mount options. Fixes: 92abc475d8de ("jffs2: implement mount option parsing and compression overriding") Cc: stable@kernel.org Signed-off-by: Hou Tao Reviewed-by: Richard Weinberger Signed-off-by: Boris Brezillon Signed-off-by: Greg Kroah-Hartman --- fs/jffs2/super.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/fs/jffs2/super.c b/fs/jffs2/super.c index 600da1a4df29..1544f530ccd0 100644 --- a/fs/jffs2/super.c +++ b/fs/jffs2/super.c @@ -285,10 +285,8 @@ static int jffs2_fill_super(struct super_block *sb, void *data, int silent) sb->s_fs_info = c; ret = jffs2_parse_options(c, data); - if (ret) { - kfree(c); + if (ret) return -EINVAL; - } /* Initialize JFFS2 superblock locks, the further initialization will * be done later */ From 0497878b6b3f4330a8b644c1625f8b586ce40cd0 Mon Sep 17 00:00:00 2001 From: "Maciej S. Szmigiero" Date: Sun, 9 Sep 2018 01:21:06 +0200 Subject: [PATCH 368/812] pcmcia: Implement CLKRUN protocol disabling for Ricoh bridges commit 95691e3eddc41da2d1cd3cca51fecdfb46bd85bc upstream. Currently, "disable_clkrun" yenta_socket module parameter is only implemented for TI CardBus bridges. Add also an implementation for Ricoh bridges that have the necessary setting documented in publicly available datasheets. Tested on a RL5C476II with a Sunrich C-160 CardBus NIC that doesn't work correctly unless the CLKRUN protocol is disabled. Let's also make it clear in its description that the "disable_clkrun" module parameter only works on these two previously mentioned brands of CardBus bridges. Signed-off-by: Maciej S. Szmigiero Cc: stable@vger.kernel.org Signed-off-by: Dominik Brodowski Signed-off-by: Greg Kroah-Hartman --- drivers/pcmcia/ricoh.h | 35 +++++++++++++++++++++++++++++++++++ drivers/pcmcia/yenta_socket.c | 3 ++- 2 files changed, 37 insertions(+), 1 deletion(-) diff --git a/drivers/pcmcia/ricoh.h b/drivers/pcmcia/ricoh.h index 01098c841f87..8ac7b138c094 100644 --- a/drivers/pcmcia/ricoh.h +++ b/drivers/pcmcia/ricoh.h @@ -119,6 +119,10 @@ #define RL5C4XX_MISC_CONTROL 0x2F /* 8 bit */ #define RL5C4XX_ZV_ENABLE 0x08 +/* Misc Control 3 Register */ +#define RL5C4XX_MISC3 0x00A2 /* 16 bit */ +#define RL5C47X_MISC3_CB_CLKRUN_DIS BIT(1) + #ifdef __YENTA_H #define rl_misc(socket) ((socket)->private[0]) @@ -156,6 +160,35 @@ static void ricoh_set_zv(struct yenta_socket *socket) } } +static void ricoh_set_clkrun(struct yenta_socket *socket, bool quiet) +{ + u16 misc3; + + /* + * RL5C475II likely has this setting, too, however no datasheet + * is publicly available for this chip + */ + if (socket->dev->device != PCI_DEVICE_ID_RICOH_RL5C476 && + socket->dev->device != PCI_DEVICE_ID_RICOH_RL5C478) + return; + + if (socket->dev->revision < 0x80) + return; + + misc3 = config_readw(socket, RL5C4XX_MISC3); + if (misc3 & RL5C47X_MISC3_CB_CLKRUN_DIS) { + if (!quiet) + dev_dbg(&socket->dev->dev, + "CLKRUN feature already disabled\n"); + } else if (disable_clkrun) { + if (!quiet) + dev_info(&socket->dev->dev, + "Disabling CLKRUN feature\n"); + misc3 |= RL5C47X_MISC3_CB_CLKRUN_DIS; + config_writew(socket, RL5C4XX_MISC3, misc3); + } +} + static void ricoh_save_state(struct yenta_socket *socket) { rl_misc(socket) = config_readw(socket, RL5C4XX_MISC); @@ -172,6 +205,7 @@ static void ricoh_restore_state(struct yenta_socket *socket) config_writew(socket, RL5C4XX_16BIT_IO_0, rl_io(socket)); config_writew(socket, RL5C4XX_16BIT_MEM_0, rl_mem(socket)); config_writew(socket, RL5C4XX_CONFIG, rl_config(socket)); + ricoh_set_clkrun(socket, true); } @@ -197,6 +231,7 @@ static int ricoh_override(struct yenta_socket *socket) config_writew(socket, RL5C4XX_CONFIG, config); ricoh_set_zv(socket); + ricoh_set_clkrun(socket, false); return 0; } diff --git a/drivers/pcmcia/yenta_socket.c b/drivers/pcmcia/yenta_socket.c index 5d6d9b1549bc..5034422a1d96 100644 --- a/drivers/pcmcia/yenta_socket.c +++ b/drivers/pcmcia/yenta_socket.c @@ -26,7 +26,8 @@ static bool disable_clkrun; module_param(disable_clkrun, bool, 0444); -MODULE_PARM_DESC(disable_clkrun, "If PC card doesn't function properly, please try this option"); +MODULE_PARM_DESC(disable_clkrun, + "If PC card doesn't function properly, please try this option (TI and Ricoh bridges only)"); static bool isa_probe = 1; module_param(isa_probe, bool, 0444); From eda6ef4ae1504b12ae24d8ee55ddef27e7c58f12 Mon Sep 17 00:00:00 2001 From: Jan Glauber Date: Thu, 11 Oct 2018 12:13:01 +0200 Subject: [PATCH 369/812] ipmi: Fix timer race with module unload commit 0711e8c1b4572d076264e71b0002d223f2666ed7 upstream. Please note that below oops is from an older kernel, but the same race seems to be present in the upstream kernel too. ---8<--- The following panic was encountered during removing the ipmi_ssif module: [ 526.352555] Unable to handle kernel paging request at virtual address ffff000006923090 [ 526.360464] Mem abort info: [ 526.363257] ESR = 0x86000007 [ 526.366304] Exception class = IABT (current EL), IL = 32 bits [ 526.372221] SET = 0, FnV = 0 [ 526.375269] EA = 0, S1PTW = 0 [ 526.378405] swapper pgtable: 4k pages, 48-bit VAs, pgd = 000000008ae60416 [ 526.385185] [ffff000006923090] *pgd=000000bffcffe803, *pud=000000bffcffd803, *pmd=0000009f4731a003, *pte=0000000000000000 [ 526.396141] Internal error: Oops: 86000007 [#1] SMP [ 526.401008] Modules linked in: nls_iso8859_1 ipmi_devintf joydev input_leds ipmi_msghandler shpchp sch_fq_codel ib_iser rdma_cm iw_cm ib_cm ib_core iscsi_tcp libiscsi_tcp libiscsi scsi_transport_iscsi ip_tables x_tables autofs4 btrfs zstd_compress raid10 raid456 async_raid6_recov async_memcpy async_pq async_xor async_tx xor raid6_pq libcrc32c raid1 raid0 multipath linear i2c_smbus hid_generic usbhid uas hid usb_storage ast aes_ce_blk i2c_algo_bit aes_ce_cipher qede ttm crc32_ce ptp crct10dif_ce drm_kms_helper ghash_ce syscopyarea sha2_ce sysfillrect sysimgblt pps_core fb_sys_fops sha256_arm64 sha1_ce mpt3sas qed drm raid_class ahci scsi_transport_sas libahci gpio_xlp i2c_xlp9xx aes_neon_bs aes_neon_blk crypto_simd cryptd aes_arm64 [last unloaded: ipmi_ssif] [ 526.468085] CPU: 125 PID: 0 Comm: swapper/125 Not tainted 4.15.0-35-generic #38~lp1775396+build.1 [ 526.476942] Hardware name: To be filled by O.E.M. Saber/Saber, BIOS 0ACKL022 08/14/2018 [ 526.484932] pstate: 00400009 (nzcv daif +PAN -UAO) [ 526.489713] pc : 0xffff000006923090 [ 526.493198] lr : call_timer_fn+0x34/0x178 [ 526.497194] sp : ffff000009b0bdd0 [ 526.500496] x29: ffff000009b0bdd0 x28: 0000000000000082 [ 526.505796] x27: 0000000000000002 x26: ffff000009515188 [ 526.511096] x25: ffff000009515180 x24: ffff0000090f1018 [ 526.516396] x23: ffff000009519660 x22: dead000000000200 [ 526.521696] x21: ffff000006923090 x20: 0000000000000100 [ 526.526995] x19: ffff809eeb466a40 x18: 0000000000000000 [ 526.532295] x17: 000000000000000e x16: 0000000000000007 [ 526.537594] x15: 0000000000000000 x14: 071c71c71c71c71c [ 526.542894] x13: 0000000000000000 x12: 0000000000000000 [ 526.548193] x11: 0000000000000001 x10: ffff000009b0be88 [ 526.553493] x9 : 0000000000000000 x8 : 0000000000000005 [ 526.558793] x7 : ffff80befc1f8528 x6 : 0000000000000020 [ 526.564092] x5 : 0000000000000040 x4 : 0000000020001b20 [ 526.569392] x3 : 0000000000000000 x2 : ffff809eeb466a40 [ 526.574692] x1 : ffff000006923090 x0 : ffff809eeb466a40 [ 526.579992] Process swapper/125 (pid: 0, stack limit = 0x000000002eb50acc) [ 526.586854] Call trace: [ 526.589289] 0xffff000006923090 [ 526.592419] expire_timers+0xc8/0x130 [ 526.596070] run_timer_softirq+0xec/0x1b0 [ 526.600070] __do_softirq+0x134/0x328 [ 526.603726] irq_exit+0xc8/0xe0 [ 526.606857] __handle_domain_irq+0x6c/0xc0 [ 526.610941] gic_handle_irq+0x84/0x188 [ 526.614679] el1_irq+0xe8/0x180 [ 526.617822] cpuidle_enter_state+0xa0/0x328 [ 526.621993] cpuidle_enter+0x34/0x48 [ 526.625564] call_cpuidle+0x44/0x70 [ 526.629040] do_idle+0x1b8/0x1f0 [ 526.632256] cpu_startup_entry+0x2c/0x30 [ 526.636174] secondary_start_kernel+0x11c/0x130 [ 526.640694] Code: bad PC value [ 526.643800] ---[ end trace d020b0b8417c2498 ]--- [ 526.648404] Kernel panic - not syncing: Fatal exception in interrupt [ 526.654778] SMP: stopping secondary CPUs [ 526.658734] Kernel Offset: disabled [ 526.662211] CPU features: 0x5800c38 [ 526.665688] Memory Limit: none [ 526.668768] ---[ end Kernel panic - not syncing: Fatal exception in interrupt Prevent mod_timer from arming a timer that was already removed by del_timer during module unload. Signed-off-by: Jan Glauber Cc: # 3.19 Signed-off-by: Corey Minyard Signed-off-by: Greg Kroah-Hartman --- drivers/char/ipmi/ipmi_ssif.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/char/ipmi/ipmi_ssif.c b/drivers/char/ipmi/ipmi_ssif.c index d6d166fe49a3..7a2e23d6bfdd 100644 --- a/drivers/char/ipmi/ipmi_ssif.c +++ b/drivers/char/ipmi/ipmi_ssif.c @@ -613,8 +613,9 @@ static void msg_done_handler(struct ssif_info *ssif_info, int result, flags = ipmi_ssif_lock_cond(ssif_info, &oflags); ssif_info->waiting_alert = true; ssif_info->rtc_us_timer = SSIF_MSG_USEC; - mod_timer(&ssif_info->retry_timer, - jiffies + SSIF_MSG_JIFFIES); + if (!ssif_info->stopping) + mod_timer(&ssif_info->retry_timer, + jiffies + SSIF_MSG_JIFFIES); ipmi_ssif_unlock_cond(ssif_info, flags); return; } @@ -951,8 +952,9 @@ static void msg_written_handler(struct ssif_info *ssif_info, int result, ssif_info->waiting_alert = true; ssif_info->retries_left = SSIF_RECV_RETRIES; ssif_info->rtc_us_timer = SSIF_MSG_PART_USEC; - mod_timer(&ssif_info->retry_timer, - jiffies + SSIF_MSG_PART_JIFFIES); + if (!ssif_info->stopping) + mod_timer(&ssif_info->retry_timer, + jiffies + SSIF_MSG_PART_JIFFIES); ipmi_ssif_unlock_cond(ssif_info, flags); } } From 7d39307dd6dad9b4a345bb121aae061f91a0dce1 Mon Sep 17 00:00:00 2001 From: John David Anglin Date: Sat, 6 Oct 2018 13:11:30 -0400 Subject: [PATCH 370/812] parisc: Fix address in HPMC IVA commit 1138b6718ff74d2a934459643e3754423d23b5e2 upstream. Helge noticed that the address of the os_hpmc handler was not being correctly calculated in the hpmc macro. As a result, PDCE_CHECK would fail to call os_hpmc: e800009802e00000 0000000000000000 CC_ERR_CHECK_HPMC 37000f7302e00000 8040004000000000 CC_ERR_CPU_CHECK_SUMMARY f600105e02e00000 fffffff0f0c00000 CC_MC_HPMC_MONARCH_SELECTED 140003b202e00000 000000000000000b CC_ERR_HPMC_STATE_ENTRY 5600100b02e00000 00000000000001a0 CC_MC_OS_HPMC_LEN_ERR 5600106402e00000 fffffff0f0438e70 CC_MC_BR_TO_OS_HPMC_FAILED e800009802e00000 0000000000000000 CC_ERR_CHECK_HPMC 37000f7302e00000 8040004000000000 CC_ERR_CPU_CHECK_SUMMARY 4000109f02e00000 0000000000000000 CC_MC_HPMC_INITIATED 4000101902e00000 0000000000000000 CC_MC_MULTIPLE_HPMCS 030010d502e00000 0000000000000000 CC_CPU_STOP The address problem can be seen by dumping the fault vector: 0000000040159000 : 40159000: 63 6f 77 73 stb r15,-2447(dp) 40159004: 20 63 61 6e ldil L%b747000,r3 40159008: 20 66 6c 79 ldil L%-1c3b3000,r3 ... 40159020: 08 00 02 40 nop 40159024: 20 6e 60 02 ldil L%15d000,r3 40159028: 34 63 00 00 ldo 0(r3),r3 4015902c: e8 60 c0 02 bv,n r0(r3) 40159030: 08 00 02 40 nop 40159034: 00 00 00 00 break 0,0 40159038: c0 00 70 00 bb,*< r0,sar,40159840 4015903c: 00 00 00 00 break 0,0 Location 40159038 should contain the physical address of os_hpmc: 000000004015d000 : 4015d000: 08 1a 02 43 copy r26,r3 4015d004: 01 c0 08 a4 mfctl iva,r4 4015d008: 48 85 00 68 ldw 34(r4),r5 This patch moves the address setup into initialize_ivt to resolve the above problem. I tested the change by dumping the HPMC entry after setup: 0000000040209020: 8000240 0000000040209024: 206a2004 0000000040209028: 34630ac0 000000004020902c: e860c002 0000000040209030: 8000240 0000000040209034: 1bdddce6 0000000040209038: 15d000 000000004020903c: 1a0 Signed-off-by: John David Anglin Cc: Signed-off-by: Helge Deller Signed-off-by: Greg Kroah-Hartman --- arch/parisc/kernel/entry.S | 2 +- arch/parisc/kernel/traps.c | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/arch/parisc/kernel/entry.S b/arch/parisc/kernel/entry.S index 13cb2461fef5..3b7b022384a0 100644 --- a/arch/parisc/kernel/entry.S +++ b/arch/parisc/kernel/entry.S @@ -185,7 +185,7 @@ bv,n 0(%r3) nop .word 0 /* checksum (will be patched) */ - .word PA(os_hpmc) /* address of handler */ + .word 0 /* address of handler */ .word 0 /* length of handler */ .endm diff --git a/arch/parisc/kernel/traps.c b/arch/parisc/kernel/traps.c index 77e2262c97f6..6f61a17e2485 100644 --- a/arch/parisc/kernel/traps.c +++ b/arch/parisc/kernel/traps.c @@ -829,7 +829,8 @@ void __init initialize_ivt(const void *iva) for (i = 0; i < 8; i++) *ivap++ = 0; - /* Compute Checksum for HPMC handler */ + /* Setup IVA and compute checksum for HPMC handler */ + ivap[6] = (u32)__pa(os_hpmc); length = os_hpmc_size; ivap[7] = length; From ae53e64ea305d278857d31ea2d20dcad7a6ac929 Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Fri, 12 Oct 2018 22:37:46 +0200 Subject: [PATCH 371/812] parisc: Fix map_pages() to not overwrite existing pte entries commit 3c229b3f2dd8133f61bb81d3cb018be92f4bba39 upstream. Fix a long-existing small nasty bug in the map_pages() implementation which leads to overwriting already written pte entries with zero, *if* map_pages() is called a second time with an end address which isn't aligned on a pmd boundry. This happens for example if we want to remap only the text segment read/write in order to run alternative patching on the code. Exiting the loop when we reach the end address fixes this. Cc: stable@vger.kernel.org Signed-off-by: Helge Deller Signed-off-by: Greg Kroah-Hartman --- arch/parisc/mm/init.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/arch/parisc/mm/init.c b/arch/parisc/mm/init.c index 1b366c477687..63741f2e8d01 100644 --- a/arch/parisc/mm/init.c +++ b/arch/parisc/mm/init.c @@ -491,12 +491,8 @@ static void __init map_pages(unsigned long start_vaddr, pte = pte_mkhuge(pte); } - if (address >= end_paddr) { - if (force) - break; - else - pte_val(pte) = 0; - } + if (address >= end_paddr) + break; set_pte(pg_table, pte); From f0a658e5d67b3f9ac61713025b2694ccf6f19e6b Mon Sep 17 00:00:00 2001 From: Jeremy Cline Date: Thu, 11 Oct 2018 15:49:17 -0400 Subject: [PATCH 372/812] ALSA: hda - Add mic quirk for the Lenovo G50-30 (17aa:3905) commit e7bb6ad5685f05685dd8a6a5eda7bfcd14d5f95b upstream. The Lenovo G50-30, like other G50 models, has a Conexant codec that requires a quirk for its inverted stereo dmic. Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1249364 Reported-by: Alexander Ploumistos Tested-by: Alexander Ploumistos Cc: stable@vger.kernel.org Signed-off-by: Jeremy Cline Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_conexant.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/patch_conexant.c b/sound/pci/hda/patch_conexant.c index a1a3ce8c3f56..aea3cc2abe3a 100644 --- a/sound/pci/hda/patch_conexant.c +++ b/sound/pci/hda/patch_conexant.c @@ -867,6 +867,7 @@ static const struct snd_pci_quirk cxt5066_fixups[] = { SND_PCI_QUIRK(0x17aa, 0x21da, "Lenovo X220", CXT_PINCFG_LENOVO_TP410), SND_PCI_QUIRK(0x17aa, 0x21db, "Lenovo X220-tablet", CXT_PINCFG_LENOVO_TP410), SND_PCI_QUIRK(0x17aa, 0x38af, "Lenovo IdeaPad Z560", CXT_FIXUP_MUTE_LED_EAPD), + SND_PCI_QUIRK(0x17aa, 0x3905, "Lenovo G50-30", CXT_FIXUP_STEREO_DMIC), SND_PCI_QUIRK(0x17aa, 0x390b, "Lenovo G50-80", CXT_FIXUP_STEREO_DMIC), SND_PCI_QUIRK(0x17aa, 0x3975, "Lenovo U300s", CXT_FIXUP_STEREO_DMIC), SND_PCI_QUIRK(0x17aa, 0x3977, "Lenovo IdeaPad U310", CXT_FIXUP_STEREO_DMIC), From 818f57e747f266d6642f84d622f292e6677958c7 Mon Sep 17 00:00:00 2001 From: Alex Stanoev Date: Sun, 28 Oct 2018 16:55:12 +0000 Subject: [PATCH 373/812] ALSA: ca0106: Disable IZD on SB0570 DAC to fix audio pops commit ac237c28d5ac1b241d58b1b7b4b9fa10efb22fb5 upstream. The Creative Audigy SE (SB0570) card currently exhibits an audible pop whenever playback is stopped or resumed, or during silent periods of an audio stream. Initialise the IZD bit to the 0 to eliminate these pops. The Infinite Zero Detection (IZD) feature on the DAC causes the output to be shunted to Vcap after 2048 samples of silence. This discharges the AC coupling capacitor through the output and causes the aforementioned pop/click noise. The behaviour of the IZD bit is described on page 15 of the WM8768GEDS datasheet: "With IZD=1, applying MUTE for 1024 consecutive input samples will cause all outputs to be connected directly to VCAP. This also happens if 2048 consecutive zero input samples are applied to all 6 channels, and IZD=0. It will be removed as soon as any channel receives a non-zero input". I believe the second sentence might be referring to IZD=1 instead of IZD=0 given the observed behaviour of the card. This change should make the DAC initialisation consistent with Creative's Windows driver, as this popping persists when initialising the card in Linux and soft rebooting into Windows, but is not present on a cold boot to Windows. Signed-off-by: Alex Stanoev Cc: Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/ca0106/ca0106.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/pci/ca0106/ca0106.h b/sound/pci/ca0106/ca0106.h index 04402c14cb23..9847b669cf3c 100644 --- a/sound/pci/ca0106/ca0106.h +++ b/sound/pci/ca0106/ca0106.h @@ -582,7 +582,7 @@ #define SPI_PL_BIT_R_R (2<<7) /* right channel = right */ #define SPI_PL_BIT_R_C (3<<7) /* right channel = (L+R)/2 */ #define SPI_IZD_REG 2 -#define SPI_IZD_BIT (1<<4) /* infinite zero detect */ +#define SPI_IZD_BIT (0<<4) /* infinite zero detect */ #define SPI_FMT_REG 3 #define SPI_FMT_BIT_RJ (0<<0) /* right justified mode */ From 52d8cdd9d58b3f16c6940f115a54ff344801e1aa Mon Sep 17 00:00:00 2001 From: He Zhe Date: Tue, 14 Aug 2018 23:33:42 +0800 Subject: [PATCH 374/812] x86/corruption-check: Fix panic in memory_corruption_check() when boot option without value is provided commit ccde460b9ae5c2bd5e4742af0a7f623c2daad566 upstream. memory_corruption_check[{_period|_size}]()'s handlers do not check input argument before passing it to kstrtoul() or simple_strtoull(). The argument would be a NULL pointer if each of the kernel parameters, without its value, is set in command line and thus cause the following panic. PANIC: early exception 0xe3 IP 10:ffffffff73587c22 error 0 cr2 0x0 [ 0.000000] CPU: 0 PID: 0 Comm: swapper Not tainted 4.18-rc8+ #2 [ 0.000000] RIP: 0010:kstrtoull+0x2/0x10 ... [ 0.000000] Call Trace [ 0.000000] ? set_corruption_check+0x21/0x49 [ 0.000000] ? do_early_param+0x4d/0x82 [ 0.000000] ? parse_args+0x212/0x330 [ 0.000000] ? rdinit_setup+0x26/0x26 [ 0.000000] ? parse_early_options+0x20/0x23 [ 0.000000] ? rdinit_setup+0x26/0x26 [ 0.000000] ? parse_early_param+0x2d/0x39 [ 0.000000] ? setup_arch+0x2f7/0xbf4 [ 0.000000] ? start_kernel+0x5e/0x4c2 [ 0.000000] ? load_ucode_bsp+0x113/0x12f [ 0.000000] ? secondary_startup_64+0xa5/0xb0 This patch adds checks to prevent the panic. Signed-off-by: He Zhe Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: gregkh@linuxfoundation.org Cc: kstewart@linuxfoundation.org Cc: pombredanne@nexb.com Cc: stable@vger.kernel.org Link: http://lkml.kernel.org/r/1534260823-87917-1-git-send-email-zhe.he@windriver.com Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/check.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/arch/x86/kernel/check.c b/arch/x86/kernel/check.c index 145863d4d343..a8b215865636 100644 --- a/arch/x86/kernel/check.c +++ b/arch/x86/kernel/check.c @@ -30,6 +30,11 @@ static __init int set_corruption_check(char *arg) ssize_t ret; unsigned long val; + if (!arg) { + pr_err("memory_corruption_check config string not provided\n"); + return -EINVAL; + } + ret = kstrtoul(arg, 10, &val); if (ret) return ret; @@ -44,6 +49,11 @@ static __init int set_corruption_check_period(char *arg) ssize_t ret; unsigned long val; + if (!arg) { + pr_err("memory_corruption_check_period config string not provided\n"); + return -EINVAL; + } + ret = kstrtoul(arg, 10, &val); if (ret) return ret; @@ -58,6 +68,11 @@ static __init int set_corruption_check_size(char *arg) char *end; unsigned size; + if (!arg) { + pr_err("memory_corruption_check_size config string not provided\n"); + return -EINVAL; + } + size = memparse(arg, &end); if (*end == '\0') From 97b8ca659ab410c6955da052592959244d041fa8 Mon Sep 17 00:00:00 2001 From: Daniel Wagner Date: Wed, 31 Oct 2018 09:14:58 +0100 Subject: [PATCH 375/812] x86/kconfig: Fall back to ticket spinlocks Sebastian writes: """ We reproducibly observe cache line starvation on a Core2Duo E6850 (2 cores), a i5-6400 SKL (4 cores) and on a NXP LS2044A ARM Cortex-A72 (4 cores). The problem can be triggered with a v4.9-RT kernel by starting cyclictest -S -p98 -m -i2000 -b 200 and as "load" stress-ng --ptrace 4 The reported maximal latency is usually less than 60us. If the problem triggers then values around 400us, 800us or even more are reported. The upperlimit is the -i parameter. Reproduction with 4.9-RT is almost immediate on Core2Duo, ARM64 and SKL, but it took 7.5 hours to trigger on v4.14-RT on the Core2Duo. Instrumentation show always the picture: CPU0 CPU1 => do_syscall_64 => do_syscall_64 => SyS_ptrace => syscall_slow_exit_work => ptrace_check_attach => ptrace_do_notify / rt_read_unlock => wait_task_inactive rt_spin_lock_slowunlock() -> while task_running() __rt_mutex_unlock_common() / check_task_state() mark_wakeup_next_waiter() | raw_spin_lock_irq(&p->pi_lock); raw_spin_lock(¤t->pi_lock); | . . | raw_spin_unlock_irq(&p->pi_lock); . \ cpu_relax() . - . *IRQ* In the error case we observe that the while() loop is repeated more than 5000 times which indicates that the pi_lock can be acquired. CPU1 on the other side does not make progress waiting for the same lock with interrupts disabled. This continues until an IRQ hits CPU0. Once CPU0 starts processing the IRQ the other CPU is able to acquire pi_lock and the situation relaxes. """ This matches with the observeration for v4.4-rt on a Core2Duo E6850: CPU 0: - no progress for a very long time in rt_mutex_dequeue_pi): stress-n-1931 0d..11 5060.891219: function: __try_to_take_rt_mutex stress-n-1931 0d..11 5060.891219: function: rt_mutex_dequeue stress-n-1931 0d..21 5060.891220: function: rt_mutex_enqueue_pi stress-n-1931 0....2 5060.891220: signal_generate: sig=17 errno=0 code=262148 comm=stress-ng-ptrac pid=1928 grp=1 res=1 stress-n-1931 0d..21 5060.894114: function: rt_mutex_dequeue_pi stress-n-1931 0d.h11 5060.894115: local_timer_entry: vector=239 CPU 1: - IRQ at 5060.894114 on CPU 1 followed by the IRQ on CPU 0 stress-n-1928 1....0 5060.891215: sys_enter: NR 101 (18, 78b, 0, 0, 17, 788) stress-n-1928 1d..11 5060.891216: function: __try_to_take_rt_mutex stress-n-1928 1d..21 5060.891216: function: rt_mutex_enqueue_pi stress-n-1928 1d..21 5060.891217: function: rt_mutex_dequeue_pi stress-n-1928 1....1 5060.891217: function: rt_mutex_adjust_prio stress-n-1928 1d..11 5060.891218: function: __rt_mutex_adjust_prio stress-n-1928 1d.h10 5060.894114: local_timer_entry: vector=239 Thomas writes: """ This has nothing to do with RT. RT is merily exposing the problem in an observable way. The same issue happens with upstream, it's harder to trigger and it's harder to observe for obvious reasons. If you read through the discussions [see the links below] then you really see that there is an upstream issue with the x86 qrlock implementation and Peter has posted fixes which resolve it, both at the practical and the theoretical level. """ Backporting all qspinlock related patches is very likely to introduce regressions on v4.4. Therefore, the recommended solution by Peter and Thomas is to drop back to ticket spinlocks for v4.4. Link :https://lkml.kernel.org/r/20180921120226.6xjgr4oiho22ex75@linutronix.de Link: https://lkml.kernel.org/r/20180926110117.405325143@infradead.org Cc: Sebastian Andrzej Siewior Acked-by: Peter Zijlstra Acked-by: Thomas Gleixner Signed-off-by: Daniel Wagner Signed-off-by: Greg Kroah-Hartman --- arch/x86/Kconfig | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index d9afe6d40550..9beee7f364ad 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -41,7 +41,6 @@ config X86 select ARCH_USE_BUILTIN_BSWAP select ARCH_USE_CMPXCHG_LOCKREF if X86_64 select ARCH_USE_QUEUED_RWLOCKS - select ARCH_USE_QUEUED_SPINLOCKS select ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH select ARCH_WANTS_DYNAMIC_TASK_STRUCT select ARCH_WANT_FRAME_POINTERS From 9190b06c4dbbf7fe1f0c337e35fa0969ba1ddf3d Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Fri, 12 Oct 2018 10:31:58 -0700 Subject: [PATCH 376/812] sparc: Fix single-pcr perf event counter management. [ Upstream commit cfdc3170d214046b9509183fe9b9544dc644d40b ] It is important to clear the hw->state value for non-stopped events when they are added into the PMU. Otherwise when the event is scheduled out, we won't read the counter because HES_UPTODATE is still set. This breaks 'perf stat' and similar use cases, causing all the events to show zero. This worked for multi-pcr because we make explicit sparc_pmu_start() calls in calculate_multiple_pcrs(). calculate_single_pcr() doesn't do this because the idea there is to accumulate all of the counter settings into the single pcr value. So we have to add explicit hw->state handling there. Like x86, we use the PERF_HES_ARCH bit to track truly stopped events so that we don't accidently start them on a reload. Related to all of this, sparc_pmu_start() is missing a userpage update so add it. Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/sparc/kernel/perf_event.c | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/arch/sparc/kernel/perf_event.c b/arch/sparc/kernel/perf_event.c index 6596f66ce112..a5d0c2f08110 100644 --- a/arch/sparc/kernel/perf_event.c +++ b/arch/sparc/kernel/perf_event.c @@ -926,6 +926,8 @@ static void read_in_all_counters(struct cpu_hw_events *cpuc) sparc_perf_event_update(cp, &cp->hw, cpuc->current_idx[i]); cpuc->current_idx[i] = PIC_NO_INDEX; + if (cp->hw.state & PERF_HES_STOPPED) + cp->hw.state |= PERF_HES_ARCH; } } } @@ -958,10 +960,12 @@ static void calculate_single_pcr(struct cpu_hw_events *cpuc) enc = perf_event_get_enc(cpuc->events[i]); cpuc->pcr[0] &= ~mask_for_index(idx); - if (hwc->state & PERF_HES_STOPPED) + if (hwc->state & PERF_HES_ARCH) { cpuc->pcr[0] |= nop_for_index(idx); - else + } else { cpuc->pcr[0] |= event_encoding(enc, idx); + hwc->state = 0; + } } out: cpuc->pcr[0] |= cpuc->event[0]->hw.config_base; @@ -987,6 +991,9 @@ static void calculate_multiple_pcrs(struct cpu_hw_events *cpuc) cpuc->current_idx[i] = idx; + if (cp->hw.state & PERF_HES_ARCH) + continue; + sparc_pmu_start(cp, PERF_EF_RELOAD); } out: @@ -1078,6 +1085,8 @@ static void sparc_pmu_start(struct perf_event *event, int flags) event->hw.state = 0; sparc_pmu_enable_event(cpuc, &event->hw, idx); + + perf_event_update_userpage(event); } static void sparc_pmu_stop(struct perf_event *event, int flags) @@ -1370,9 +1379,9 @@ static int sparc_pmu_add(struct perf_event *event, int ef_flags) cpuc->events[n0] = event->hw.event_base; cpuc->current_idx[n0] = PIC_NO_INDEX; - event->hw.state = PERF_HES_UPTODATE; + event->hw.state = PERF_HES_UPTODATE | PERF_HES_STOPPED; if (!(ef_flags & PERF_EF_START)) - event->hw.state |= PERF_HES_STOPPED; + event->hw.state |= PERF_HES_ARCH; /* * If group events scheduling transaction was started, From 055dbfe1d1e4b16e7408c087eb7be1d76f74e118 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Tue, 16 Oct 2018 22:25:24 +0200 Subject: [PATCH 377/812] x86/fpu: Remove second definition of fpu in __fpu__restore_sig() [ Upstream commit 6aa676761d4c1acfa31320e55fa1f83f3fcbbc7a ] Commit: c5bedc6847c3b ("x86/fpu: Get rid of PF_USED_MATH usage, convert it to fpu->fpstate_active") introduced the 'fpu' variable at top of __restore_xstate_sig(), which now shadows the other definition: arch/x86/kernel/fpu/signal.c:318:28: warning: symbol 'fpu' shadows an earlier one arch/x86/kernel/fpu/signal.c:271:20: originally declared here Remove the shadowed definition of 'fpu', as the two definitions are the same. Signed-off-by: Sebastian Andrzej Siewior Reviewed-by: Andy Lutomirski Cc: Borislav Petkov Cc: Dave Hansen Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Fixes: c5bedc6847c3b ("x86/fpu: Get rid of PF_USED_MATH usage, convert it to fpu->fpstate_active") Link: http://lkml.kernel.org/r/20181016202525.29437-3-bigeasy@linutronix.de Signed-off-by: Ingo Molnar Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/fpu/signal.c | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/x86/kernel/fpu/signal.c b/arch/x86/kernel/fpu/signal.c index 9be3e79eb629..31fad2cbd734 100644 --- a/arch/x86/kernel/fpu/signal.c +++ b/arch/x86/kernel/fpu/signal.c @@ -294,7 +294,6 @@ static int __fpu__restore_sig(void __user *buf, void __user *buf_fx, int size) * thread's fpu state, reconstruct fxstate from the fsave * header. Sanitize the copied state etc. */ - struct fpu *fpu = &tsk->thread.fpu; struct user_i387_ia32_struct env; int err = 0; From ac435f05a321d4c8db12a937501e8ca8609e74a5 Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Fri, 12 Oct 2018 19:14:58 -0700 Subject: [PATCH 378/812] net: qla3xxx: Remove overflowing shift statement [ Upstream commit 8c3bf9b62b667456a57aefcf1689e826df146159 ] Clang currently warns: drivers/net/ethernet/qlogic/qla3xxx.c:384:24: warning: signed shift result (0xF00000000) requires 37 bits to represent, but 'int' only has 32 bits [-Wshift-overflow] ((ISP_NVRAM_MASK << 16) | qdev->eeprom_cmd_data)); ~~~~~~~~~~~~~~ ^ ~~ 1 warning generated. The warning is certainly accurate since ISP_NVRAM_MASK is defined as (0x000F << 16) which is then shifted by 16, resulting in 64424509440, well above UINT_MAX. Given that this is the only location in this driver where ISP_NVRAM_MASK is shifted again, it seems likely that ISP_NVRAM_MASK was originally defined without a shift and during the move of the shift to the definition, this statement wasn't properly removed (since ISP_NVRAM_MASK is used in the statenent right above this). Only the maintainers can confirm this since this statment has been here since the driver was first added to the kernel. Link: https://github.com/ClangBuiltLinux/linux/issues/127 Signed-off-by: Nathan Chancellor Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/qlogic/qla3xxx.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/net/ethernet/qlogic/qla3xxx.c b/drivers/net/ethernet/qlogic/qla3xxx.c index b09a6b80d107..355c5fb802cd 100644 --- a/drivers/net/ethernet/qlogic/qla3xxx.c +++ b/drivers/net/ethernet/qlogic/qla3xxx.c @@ -380,8 +380,6 @@ static void fm93c56a_select(struct ql3_adapter *qdev) qdev->eeprom_cmd_data = AUBURN_EEPROM_CS_1; ql_write_nvram_reg(qdev, spir, ISP_NVRAM_MASK | qdev->eeprom_cmd_data); - ql_write_nvram_reg(qdev, spir, - ((ISP_NVRAM_MASK << 16) | qdev->eeprom_cmd_data)); } /* From 049b96e8c5d0e50ce9ebefe5206d1f3032a958ef Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Thu, 18 Oct 2018 22:13:02 +0900 Subject: [PATCH 379/812] selftests: ftrace: Add synthetic event syntax testcase [ Upstream commit ba0e41ca81b935b958006c7120466e2217357827 ] Add a testcase to check the syntax and field types for synthetic_events interface. Link: http://lkml.kernel.org/r/153986838264.18251.16627517536956299922.stgit@devbox Acked-by: Shuah Khan Signed-off-by: Masami Hiramatsu Signed-off-by: Steven Rostedt (VMware) Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- .../trigger-synthetic-event-syntax.tc | 80 +++++++++++++++++++ 1 file changed, 80 insertions(+) create mode 100644 tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-synthetic-event-syntax.tc diff --git a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-synthetic-event-syntax.tc b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-synthetic-event-syntax.tc new file mode 100644 index 000000000000..88e6c3f43006 --- /dev/null +++ b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-synthetic-event-syntax.tc @@ -0,0 +1,80 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0 +# description: event trigger - test synthetic_events syntax parser + +do_reset() { + reset_trigger + echo > set_event + clear_trace +} + +fail() { #msg + do_reset + echo $1 + exit_fail +} + +if [ ! -f set_event ]; then + echo "event tracing is not supported" + exit_unsupported +fi + +if [ ! -f synthetic_events ]; then + echo "synthetic event is not supported" + exit_unsupported +fi + +reset_tracer +do_reset + +echo "Test synthetic_events syntax parser" + +echo > synthetic_events + +# synthetic event must have a field +! echo "myevent" >> synthetic_events +echo "myevent u64 var1" >> synthetic_events + +# synthetic event must be found in synthetic_events +grep "myevent[[:space:]]u64 var1" synthetic_events + +# it is not possible to add same name event +! echo "myevent u64 var2" >> synthetic_events + +# Non-append open will cleanup all events and add new one +echo "myevent u64 var2" > synthetic_events + +# multiple fields with different spaces +echo "myevent u64 var1; u64 var2;" > synthetic_events +grep "myevent[[:space:]]u64 var1; u64 var2" synthetic_events +echo "myevent u64 var1 ; u64 var2 ;" > synthetic_events +grep "myevent[[:space:]]u64 var1; u64 var2" synthetic_events +echo "myevent u64 var1 ;u64 var2" > synthetic_events +grep "myevent[[:space:]]u64 var1; u64 var2" synthetic_events + +# test field types +echo "myevent u32 var" > synthetic_events +echo "myevent u16 var" > synthetic_events +echo "myevent u8 var" > synthetic_events +echo "myevent s64 var" > synthetic_events +echo "myevent s32 var" > synthetic_events +echo "myevent s16 var" > synthetic_events +echo "myevent s8 var" > synthetic_events + +echo "myevent char var" > synthetic_events +echo "myevent int var" > synthetic_events +echo "myevent long var" > synthetic_events +echo "myevent pid_t var" > synthetic_events + +echo "myevent unsigned char var" > synthetic_events +echo "myevent unsigned int var" > synthetic_events +echo "myevent unsigned long var" > synthetic_events +grep "myevent[[:space:]]unsigned long var" synthetic_events + +# test string type +echo "myevent char var[10]" > synthetic_events +grep "myevent[[:space:]]char\[10\] var" synthetic_events + +do_reset + +exit 0 From ef42ef8451faae09fe3a0b00c603ceeda8f5f5c5 Mon Sep 17 00:00:00 2001 From: Waiman Long Date: Thu, 18 Oct 2018 21:45:17 -0400 Subject: [PATCH 380/812] locking/lockdep: Fix debug_locks off performance problem [ Upstream commit 9506a7425b094d2f1d9c877ed5a78f416669269b ] It was found that when debug_locks was turned off because of a problem found by the lockdep code, the system performance could drop quite significantly when the lock_stat code was also configured into the kernel. For instance, parallel kernel build time on a 4-socket x86-64 server nearly doubled. Further analysis into the cause of the slowdown traced back to the frequent call to debug_locks_off() from the __lock_acquired() function probably due to some inconsistent lockdep states with debug_locks off. The debug_locks_off() function did an unconditional atomic xchg to write a 0 value into debug_locks which had already been set to 0. This led to severe cacheline contention in the cacheline that held debug_locks. As debug_locks is being referenced in quite a few different places in the kernel, this greatly slow down the system performance. To prevent that trashing of debug_locks cacheline, lock_acquired() and lock_contended() now checks the state of debug_locks before proceeding. The debug_locks_off() function is also modified to check debug_locks before calling __debug_locks_off(). Signed-off-by: Waiman Long Cc: Andrew Morton Cc: Linus Torvalds Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Will Deacon Link: http://lkml.kernel.org/r/1539913518-15598-1-git-send-email-longman@redhat.com Signed-off-by: Ingo Molnar Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- kernel/locking/lockdep.c | 4 ++-- lib/debug_locks.c | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c index 6e171b547a80..774ab79d3ec7 100644 --- a/kernel/locking/lockdep.c +++ b/kernel/locking/lockdep.c @@ -3826,7 +3826,7 @@ void lock_contended(struct lockdep_map *lock, unsigned long ip) { unsigned long flags; - if (unlikely(!lock_stat)) + if (unlikely(!lock_stat || !debug_locks)) return; if (unlikely(current->lockdep_recursion)) @@ -3846,7 +3846,7 @@ void lock_acquired(struct lockdep_map *lock, unsigned long ip) { unsigned long flags; - if (unlikely(!lock_stat)) + if (unlikely(!lock_stat || !debug_locks)) return; if (unlikely(current->lockdep_recursion)) diff --git a/lib/debug_locks.c b/lib/debug_locks.c index 96c4c633d95e..124fdf238b3d 100644 --- a/lib/debug_locks.c +++ b/lib/debug_locks.c @@ -37,7 +37,7 @@ EXPORT_SYMBOL_GPL(debug_locks_silent); */ int debug_locks_off(void) { - if (__debug_locks_off()) { + if (debug_locks && __debug_locks_off()) { if (!debug_locks_silent) { console_verbose(); return 1; From 7d81f8fe5427ebe7d5ea8cd526c77fee9c01a7c3 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Thu, 11 Oct 2018 12:20:49 -0700 Subject: [PATCH 381/812] ataflop: fix error handling during setup [ Upstream commit 71327f547ee3a46ec5c39fdbbd268401b2578d0e ] Move queue allocation next to disk allocation to fix a couple of issues: - If add_disk() hasn't been called, we should clear disk->queue before calling put_disk(). - If we fail to allocate a request queue, we still need to put all of the disks, not just the ones that we allocated queues for. Signed-off-by: Omar Sandoval Signed-off-by: Jens Axboe Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/block/ataflop.c | 25 +++++++++++++++---------- 1 file changed, 15 insertions(+), 10 deletions(-) diff --git a/drivers/block/ataflop.c b/drivers/block/ataflop.c index 2104b1b4ccda..9ab759bcebd5 100644 --- a/drivers/block/ataflop.c +++ b/drivers/block/ataflop.c @@ -1933,6 +1933,11 @@ static int __init atari_floppy_init (void) unit[i].disk = alloc_disk(1); if (!unit[i].disk) goto Enomem; + + unit[i].disk->queue = blk_init_queue(do_fd_request, + &ataflop_lock); + if (!unit[i].disk->queue) + goto Enomem; } if (UseTrackbuffer < 0) @@ -1964,10 +1969,6 @@ static int __init atari_floppy_init (void) sprintf(unit[i].disk->disk_name, "fd%d", i); unit[i].disk->fops = &floppy_fops; unit[i].disk->private_data = &unit[i]; - unit[i].disk->queue = blk_init_queue(do_fd_request, - &ataflop_lock); - if (!unit[i].disk->queue) - goto Enomem; set_capacity(unit[i].disk, MAX_DISK_SIZE * 2); add_disk(unit[i].disk); } @@ -1982,13 +1983,17 @@ static int __init atari_floppy_init (void) return 0; Enomem: - while (i--) { - struct request_queue *q = unit[i].disk->queue; + do { + struct gendisk *disk = unit[i].disk; - put_disk(unit[i].disk); - if (q) - blk_cleanup_queue(q); - } + if (disk) { + if (disk->queue) { + blk_cleanup_queue(disk->queue); + disk->queue = NULL; + } + put_disk(unit[i].disk); + } + } while (i--); unregister_blkdev(FLOPPY_MAJOR, "fd"); return -ENOMEM; From 82e05b994790211bbcc4b18c99f5eb5112e44f56 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Thu, 11 Oct 2018 12:20:41 -0700 Subject: [PATCH 382/812] swim: fix cleanup on setup error [ Upstream commit 1448a2a5360ae06f25e2edc61ae070dff5c0beb4 ] If we fail to allocate the request queue for a disk, we still need to free that disk, not just the previous ones. Additionally, we need to cleanup the previous request queues. Signed-off-by: Omar Sandoval Signed-off-by: Jens Axboe Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/block/swim.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/drivers/block/swim.c b/drivers/block/swim.c index b5afd495d482..eec6e393c124 100644 --- a/drivers/block/swim.c +++ b/drivers/block/swim.c @@ -868,8 +868,17 @@ static int swim_floppy_init(struct swim_priv *swd) exit_put_disks: unregister_blkdev(FLOPPY_MAJOR, "fd"); - while (drive--) - put_disk(swd->unit[drive].disk); + do { + struct gendisk *disk = swd->unit[drive].disk; + + if (disk) { + if (disk->queue) { + blk_cleanup_queue(disk->queue); + disk->queue = NULL; + } + put_disk(disk); + } + } while (drive--); return err; } From 088753235d065c6881dfb7ccb30b8d4fae27696a Mon Sep 17 00:00:00 2001 From: Serhey Popovych Date: Tue, 9 Oct 2018 21:21:01 +0300 Subject: [PATCH 383/812] tun: Consistently configure generic netdev params via rtnetlink [ Upstream commit df52eab23d703142c766ac00bdb8db19d71238d0 ] Configuring generic network device parameters on tun will fail in presence of IFLA_INFO_KIND attribute in IFLA_LINKINFO nested attribute since tun_validate() always return failure. This can be visualized with following ip-link(8) command sequences: # ip link set dev tun0 group 100 # ip link set dev tun0 group 100 type tun RTNETLINK answers: Invalid argument with contrast to dummy and veth drivers: # ip link set dev dummy0 group 100 # ip link set dev dummy0 type dummy # ip link set dev veth0 group 100 # ip link set dev veth0 group 100 type veth Fix by returning zero in tun_validate() when @data is NULL that is always in case since rtnl_link_ops->maxtype is zero in tun driver. Fixes: f019a7a594d9 ("tun: Implement ip link del tunXXX") Signed-off-by: Serhey Popovych Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/net/tun.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/tun.c b/drivers/net/tun.c index 50bfded6d7ef..5ac0b850d6b1 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -1475,6 +1475,8 @@ static void tun_setup(struct net_device *dev) */ static int tun_validate(struct nlattr *tb[], struct nlattr *data[]) { + if (!data) + return 0; return -EINVAL; } From d05d8ef0c02eab3e63f11b4bcf0edb7b334bf52f Mon Sep 17 00:00:00 2001 From: Sanskriti Sharma Date: Tue, 2 Oct 2018 10:29:14 -0400 Subject: [PATCH 384/812] perf tools: Free temporary 'sys' string in read_event_files() [ Upstream commit 1e44224fb0528b4c0cc176bde2bb31e9127eb14b ] For each system in a given pevent, read_event_files() reads in a temporary 'sys' string. Be sure to free this string before moving onto to the next system and/or leaving read_event_files(). Fixes the following coverity complaints: Error: RESOURCE_LEAK (CWE-772): tools/perf/util/trace-event-read.c:343: overwrite_var: Overwriting "sys" in "sys = read_string()" leaks the storage that "sys" points to. tools/perf/util/trace-event-read.c:353: leaked_storage: Variable "sys" going out of scope leaks the storage it points to. Signed-off-by: Sanskriti Sharma Reviewed-by: Jiri Olsa Cc: Joe Lawrence Link: http://lkml.kernel.org/r/1538490554-8161-6-git-send-email-sansharm@redhat.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- tools/perf/util/trace-event-read.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tools/perf/util/trace-event-read.c b/tools/perf/util/trace-event-read.c index b67a0ccf5ab9..23baee7b786a 100644 --- a/tools/perf/util/trace-event-read.c +++ b/tools/perf/util/trace-event-read.c @@ -334,9 +334,12 @@ static int read_event_files(struct pevent *pevent) for (x=0; x < count; x++) { size = read8(pevent); ret = read_event_file(pevent, sys, size); - if (ret) + if (ret) { + free(sys); return ret; + } } + free(sys); } return 0; } From 7ce210a6beca436a4ecdc71138bf4a244e893e2c Mon Sep 17 00:00:00 2001 From: Sanskriti Sharma Date: Tue, 2 Oct 2018 10:29:11 -0400 Subject: [PATCH 385/812] perf tools: Cleanup trace-event-info 'tdata' leak [ Upstream commit faedbf3fd19f2511a39397f76359e4cc6ee93072 ] Free tracing_data structure in tracing_data_get() error paths. Fixes the following coverity complaint: Error: RESOURCE_LEAK (CWE-772): leaked_storage: Variable "tdata" going out of scope leaks the storage Signed-off-by: Sanskriti Sharma Reviewed-by: Jiri Olsa Cc: Joe Lawrence Link: http://lkml.kernel.org/r/1538490554-8161-3-git-send-email-sansharm@redhat.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- tools/perf/util/trace-event-info.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tools/perf/util/trace-event-info.c b/tools/perf/util/trace-event-info.c index d995743cb673..58ce62088a39 100644 --- a/tools/perf/util/trace-event-info.c +++ b/tools/perf/util/trace-event-info.c @@ -507,12 +507,14 @@ struct tracing_data *tracing_data_get(struct list_head *pattrs, "/tmp/perf-XXXXXX"); if (!mkstemp(tdata->temp_file)) { pr_debug("Can't make temp file"); + free(tdata); return NULL; } temp_fd = open(tdata->temp_file, O_RDWR); if (temp_fd < 0) { pr_debug("Can't read '%s'", tdata->temp_file); + free(tdata); return NULL; } From 8c2c6a9aa99c2f438444990603635155d20c7ffd Mon Sep 17 00:00:00 2001 From: Yu Zhao Date: Sun, 23 Sep 2018 14:39:24 -0600 Subject: [PATCH 386/812] mmc: sdhci-pci-o2micro: Add quirk for O2 Micro dev 0x8620 rev 0x01 [ Upstream commit 5169894982bb67486d93cc1e10151712bb86bcb6 ] This device reports SDHCI_CLOCK_INT_STABLE even though it's not ready to take SDHCI_CLOCK_CARD_EN. The symptom is that reading SDHCI_CLOCK_CONTROL after enabling the clock shows absence of the bit from the register (e.g. expecting 0x0000fa07 = 0x0000fa03 | SDHCI_CLOCK_CARD_EN but only observed the first operand). mmc1: Timeout waiting for hardware cmd interrupt. mmc1: sdhci: ============ SDHCI REGISTER DUMP =========== mmc1: sdhci: Sys addr: 0x00000000 | Version: 0x00000603 mmc1: sdhci: Blk size: 0x00000000 | Blk cnt: 0x00000000 mmc1: sdhci: Argument: 0x00000000 | Trn mode: 0x00000000 mmc1: sdhci: Present: 0x01ff0001 | Host ctl: 0x00000001 mmc1: sdhci: Power: 0x0000000f | Blk gap: 0x00000000 mmc1: sdhci: Wake-up: 0x00000000 | Clock: 0x0000fa03 mmc1: sdhci: Timeout: 0x00000000 | Int stat: 0x00000000 mmc1: sdhci: Int enab: 0x00ff0083 | Sig enab: 0x00ff0083 mmc1: sdhci: AC12 err: 0x00000000 | Slot int: 0x00000000 mmc1: sdhci: Caps: 0x25fcc8bf | Caps_1: 0x00002077 mmc1: sdhci: Cmd: 0x00000000 | Max curr: 0x005800c8 mmc1: sdhci: Resp[0]: 0x00000000 | Resp[1]: 0x00000000 mmc1: sdhci: Resp[2]: 0x00000000 | Resp[3]: 0x00000000 mmc1: sdhci: Host ctl2: 0x00000008 mmc1: sdhci: ADMA Err: 0x00000000 | ADMA Ptr: 0x00000000 mmc1: sdhci: ============================================ The problem happens during wakeup from S3. Adding a delay quirk after power up reliably fixes the problem. Signed-off-by: Yu Zhao Signed-off-by: Ulf Hansson Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/mmc/host/sdhci-pci-o2micro.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/mmc/host/sdhci-pci-o2micro.c b/drivers/mmc/host/sdhci-pci-o2micro.c index d48f03104b5b..e417e4274d66 100644 --- a/drivers/mmc/host/sdhci-pci-o2micro.c +++ b/drivers/mmc/host/sdhci-pci-o2micro.c @@ -334,6 +334,9 @@ int sdhci_pci_o2_probe(struct sdhci_pci_chip *chip) pci_write_config_byte(chip->pdev, O2_SD_LOCK_WP, scratch); break; case PCI_DEVICE_ID_O2_SEABIRD0: + if (chip->pdev->revision == 0x01) + chip->quirks |= SDHCI_QUIRK_DELAY_AFTER_POWER; + /* fall through */ case PCI_DEVICE_ID_O2_SEABIRD1: /* UnLock WP */ ret = pci_read_config_byte(chip->pdev, From f88f4125af7ba3f9e498e2921d74691787a31dec Mon Sep 17 00:00:00 2001 From: Christian Hewitt Date: Tue, 4 Sep 2018 21:50:57 +0400 Subject: [PATCH 387/812] Bluetooth: btbcm: Add entry for BCM4335C0 UART bluetooth [ Upstream commit a357ea098c9605f60d92a66a9073f56ce25726da ] This patch adds the device ID for the AMPAK AP6335 combo module used in the 1st generation WeTek Hub Android/LibreELEC HTPC box. The WiFI chip identifies itself as BCM4339, while Bluetooth identifies itself as BCM4335 (rev C0): ``` [ 4.864248] Bluetooth: hci0: BCM: chip id 86 [ 4.866388] Bluetooth: hci0: BCM: features 0x2f [ 4.889317] Bluetooth: hci0: BCM4335C0 [ 4.889332] Bluetooth: hci0: BCM4335C0 (003.001.009) build 0000 [ 9.778383] Bluetooth: hci0: BCM4335C0 (003.001.009) build 0268 ``` Output from hciconfig: ``` hci0: Type: Primary Bus: UART BD Address: 43:39:00:00:1F:AC ACL MTU: 1021:8 SCO MTU: 64:1 UP RUNNING RX bytes:7567 acl:234 sco:0 events:386 errors:0 TX bytes:53844 acl:77 sco:0 commands:304 errors:0 Features: 0xbf 0xfe 0xcf 0xfe 0xdb 0xff 0x7b 0x87 Packet type: DM1 DM3 DM5 DH1 DH3 DH5 HV1 HV2 HV3 Link policy: RSWITCH SNIFF Link mode: SLAVE ACCEPT Name: 'HUB' Class: 0x0c0000 Service Classes: Rendering, Capturing Device Class: Miscellaneous, HCI Version: 4.0 (0x6) Revision: 0x10c LMP Version: 4.0 (0x6) Subversion: 0x6109 Manufacturer: Broadcom Corporation (15) ``` Signed-off-by: Christian Hewitt Signed-off-by: Marcel Holtmann Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/bluetooth/btbcm.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/bluetooth/btbcm.c b/drivers/bluetooth/btbcm.c index 0b697946e9bc..a08c6529271e 100644 --- a/drivers/bluetooth/btbcm.c +++ b/drivers/bluetooth/btbcm.c @@ -270,6 +270,7 @@ static const struct { { 0x4103, "BCM4330B1" }, /* 002.001.003 */ { 0x410e, "BCM43341B0" }, /* 002.001.014 */ { 0x4406, "BCM4324B3" }, /* 002.004.006 */ + { 0x6109, "BCM4335C0" }, /* 003.001.009 */ { 0x610c, "BCM4354" }, /* 003.001.012 */ { } }; From cd6ace69ba21fd54d70cfa1403c979dfccdf1eca Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Sun, 16 Sep 2018 16:22:47 +0100 Subject: [PATCH 388/812] x86: boot: Fix EFI stub alignment [ Upstream commit 9c1442a9d039a1a3302fa93e9a11001c5f23b624 ] We currently align the end of the compressed image to a multiple of 16. However, the PE-COFF header included in the EFI stub says that the file alignment is 32 bytes, and when adding an EFI signature to the file it must first be padded to this alignment. sbsigntool commands warn about this: warning: file-aligned section .text extends beyond end of file warning: checksum areas are greater than image size. Invalid section table? Worse, pesign -at least when creating a detached signature- uses the hash of the unpadded file, resulting in an invalid signature if padding is required. Avoid both these problems by increasing alignment to 32 bytes when CONFIG_EFI_STUB is enabled. Signed-off-by: Ben Hutchings Signed-off-by: Ard Biesheuvel Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/x86/boot/tools/build.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/arch/x86/boot/tools/build.c b/arch/x86/boot/tools/build.c index a7661c430cd9..523db6ce88dd 100644 --- a/arch/x86/boot/tools/build.c +++ b/arch/x86/boot/tools/build.c @@ -391,6 +391,13 @@ int main(int argc, char ** argv) die("Unable to mmap '%s': %m", argv[2]); /* Number of 16-byte paragraphs, including space for a 4-byte CRC */ sys_size = (sz + 15 + 4) / 16; +#ifdef CONFIG_EFI_STUB + /* + * COFF requires minimum 32-byte alignment of sections, and + * adding a signature is problematic without that alignment. + */ + sys_size = (sys_size + 1) & ~1; +#endif /* Patch the setup code with the appropriate size parameters */ buf[0x1f1] = setup_sectors-1; From 45947209d3565d0f31b3fca4a7f8c46701cf0bea Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Thu, 20 Sep 2018 01:58:18 +0000 Subject: [PATCH 389/812] pinctrl: qcom: spmi-mpp: Fix err handling of pmic_mpp_set_mux [ Upstream commit 69f8455f6cc78fa6cdf80d0105d7a748106271dc ] 'ret' should be returned while pmic_mpp_write_mode_ctl fails. Fixes: 0e948042c420 ("pinctrl: qcom: spmi-mpp: Implement support for sink mode") Signed-off-by: YueHaibing Signed-off-by: Linus Walleij Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/pinctrl/qcom/pinctrl-spmi-mpp.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/pinctrl/qcom/pinctrl-spmi-mpp.c b/drivers/pinctrl/qcom/pinctrl-spmi-mpp.c index 9ce0e30e33e8..0643abfd8a50 100644 --- a/drivers/pinctrl/qcom/pinctrl-spmi-mpp.c +++ b/drivers/pinctrl/qcom/pinctrl-spmi-mpp.c @@ -321,6 +321,8 @@ static int pmic_mpp_set_mux(struct pinctrl_dev *pctldev, unsigned function, pad->function = function; ret = pmic_mpp_write_mode_ctl(state, pad); + if (ret < 0) + return ret; val = pad->is_enabled << PMIC_MPP_REG_MASTER_EN_SHIFT; From 0524e92e0f8240866e9f3d79d10d335175c6bb59 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Tue, 11 Sep 2018 19:20:40 +0900 Subject: [PATCH 390/812] kprobes: Return error if we fail to reuse kprobe instead of BUG_ON() [ Upstream commit 819319fc93461c07b9cdb3064f154bd8cfd48172 ] Make reuse_unused_kprobe() to return error code if it fails to reuse unused kprobe for optprobe instead of calling BUG_ON(). Signed-off-by: Masami Hiramatsu Cc: Anil S Keshavamurthy Cc: David S . Miller Cc: Linus Torvalds Cc: Naveen N . Rao Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/153666124040.21306.14150398706331307654.stgit@devbox Signed-off-by: Ingo Molnar Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- kernel/kprobes.c | 27 ++++++++++++++++++++------- 1 file changed, 20 insertions(+), 7 deletions(-) diff --git a/kernel/kprobes.c b/kernel/kprobes.c index 388bcace62f8..d8daf6c55d2b 100644 --- a/kernel/kprobes.c +++ b/kernel/kprobes.c @@ -665,9 +665,10 @@ static void unoptimize_kprobe(struct kprobe *p, bool force) } /* Cancel unoptimizing for reusing */ -static void reuse_unused_kprobe(struct kprobe *ap) +static int reuse_unused_kprobe(struct kprobe *ap) { struct optimized_kprobe *op; + int ret; BUG_ON(!kprobe_unused(ap)); /* @@ -681,8 +682,12 @@ static void reuse_unused_kprobe(struct kprobe *ap) /* Enable the probe again */ ap->flags &= ~KPROBE_FLAG_DISABLED; /* Optimize it again (remove from op->list) */ - BUG_ON(!kprobe_optready(ap)); + ret = kprobe_optready(ap); + if (ret) + return ret; + optimize_kprobe(ap); + return 0; } /* Remove optimized instructions */ @@ -894,11 +899,16 @@ static void __disarm_kprobe(struct kprobe *p, bool reopt) #define kprobe_disarmed(p) kprobe_disabled(p) #define wait_for_kprobe_optimizer() do {} while (0) -/* There should be no unused kprobes can be reused without optimization */ -static void reuse_unused_kprobe(struct kprobe *ap) +static int reuse_unused_kprobe(struct kprobe *ap) { + /* + * If the optimized kprobe is NOT supported, the aggr kprobe is + * released at the same time that the last aggregated kprobe is + * unregistered. + * Thus there should be no chance to reuse unused kprobe. + */ printk(KERN_ERR "Error: There should be no unused kprobe here.\n"); - BUG_ON(kprobe_unused(ap)); + return -EINVAL; } static void free_aggr_kprobe(struct kprobe *p) @@ -1276,9 +1286,12 @@ static int register_aggr_kprobe(struct kprobe *orig_p, struct kprobe *p) goto out; } init_aggr_kprobe(ap, orig_p); - } else if (kprobe_unused(ap)) + } else if (kprobe_unused(ap)) { /* This probe is going to die. Rescue it */ - reuse_unused_kprobe(ap); + ret = reuse_unused_kprobe(ap); + if (ret) + goto out; + } if (kprobe_gone(ap)) { /* From c55954e7e0b8fb84e3250a44d7431334555636a4 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Mon, 27 Aug 2018 09:45:44 +0200 Subject: [PATCH 391/812] ACPI / LPSS: Add alternative ACPI HIDs for Cherry Trail DMA controllers [ Upstream commit 240714061c58e6b1abfb3322398a7634151c06cb ] Bay and Cherry Trail DSTDs represent a different set of devices depending on which OS the device think it is booting. One set of decices for Windows and another set of devices for Android which targets the Android-x86 Linux kernel fork (which e.g. used to have its own display driver instead of using the i915 driver). Which set of devices we are actually going to get is out of our control, this is controlled by the ACPI OSID variable, which gets either set through an EFI setup option, or sometimes is autodetected. So we need to support both. This commit adds support for the 80862286 and 808622C0 ACPI HIDs which we get for the first resp. second DMA controller on Cherry Trail devices when OSID is set to Android. Signed-off-by: Hans de Goede Reviewed-by: Andy Shevchenko Signed-off-by: Rafael J. Wysocki Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/acpi/acpi_lpss.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/acpi/acpi_lpss.c b/drivers/acpi/acpi_lpss.c index 8a0f77fb5181..572755e557d6 100644 --- a/drivers/acpi/acpi_lpss.c +++ b/drivers/acpi/acpi_lpss.c @@ -235,9 +235,11 @@ static const struct acpi_device_id acpi_lpss_device_ids[] = { { "INT33FC", }, /* Braswell LPSS devices */ + { "80862286", LPSS_ADDR(lpss_dma_desc) }, { "80862288", LPSS_ADDR(bsw_pwm_dev_desc) }, { "8086228A", LPSS_ADDR(bsw_uart_dev_desc) }, { "8086228E", LPSS_ADDR(bsw_spi_dev_desc) }, + { "808622C0", LPSS_ADDR(lpss_dma_desc) }, { "808622C1", LPSS_ADDR(bsw_i2c_dev_desc) }, /* Broadwell LPSS devices */ From 37f2457cf4cfe826a7a58c4d8a48c34ef575948c Mon Sep 17 00:00:00 2001 From: Stephen Boyd Date: Thu, 30 Aug 2018 17:58:52 -0700 Subject: [PATCH 392/812] pinctrl: qcom: spmi-mpp: Fix drive strength setting [ Upstream commit 89c68b102f13f123aaef22b292526d6b92501334 ] It looks like we parse the drive strength setting here, but never actually write it into the hardware to update it. Parse the setting and then write it at the end of the pinconf setting function so that it actually sticks in the hardware. Fixes: 0e948042c420 ("pinctrl: qcom: spmi-mpp: Implement support for sink mode") Cc: Doug Anderson Signed-off-by: Stephen Boyd Reviewed-by: Bjorn Andersson Signed-off-by: Linus Walleij Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/pinctrl/qcom/pinctrl-spmi-mpp.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/pinctrl/qcom/pinctrl-spmi-mpp.c b/drivers/pinctrl/qcom/pinctrl-spmi-mpp.c index 0643abfd8a50..8301e600e7a8 100644 --- a/drivers/pinctrl/qcom/pinctrl-spmi-mpp.c +++ b/drivers/pinctrl/qcom/pinctrl-spmi-mpp.c @@ -459,7 +459,7 @@ static int pmic_mpp_config_set(struct pinctrl_dev *pctldev, unsigned int pin, pad->dtest = arg; break; case PIN_CONFIG_DRIVE_STRENGTH: - arg = pad->drive_strength; + pad->drive_strength = arg; break; case PMIC_MPP_CONF_AMUX_ROUTE: if (arg >= PMIC_MPP_AMUX_ROUTE_ABUS4) @@ -503,6 +503,10 @@ static int pmic_mpp_config_set(struct pinctrl_dev *pctldev, unsigned int pin, if (ret < 0) return ret; + ret = pmic_mpp_write(state, pad, PMIC_MPP_REG_SINK_CTL, pad->drive_strength); + if (ret < 0) + return ret; + val = pad->is_enabled << PMIC_MPP_REG_MASTER_EN_SHIFT; return pmic_mpp_write(state, pad, PMIC_MPP_REG_EN_CTL, val); From ace4787f6cc6512696141aace9996df90673b51c Mon Sep 17 00:00:00 2001 From: Douglas Anderson Date: Thu, 30 Aug 2018 08:23:39 -0700 Subject: [PATCH 393/812] pinctrl: spmi-mpp: Fix pmic_mpp_config_get() to be compliant [ Upstream commit 0d5b476f8f57fcb06c45fe27681ac47254f63fd2 ] If you look at "pinconf-groups" in debugfs for ssbi-mpp you'll notice it looks like nonsense. The problem is fairly well described in commit 1cf86bc21257 ("pinctrl: qcom: spmi-gpio: Fix pmic_gpio_config_get() to be compliant") and commit 05e0c828955c ("pinctrl: msm: Fix msm_config_group_get() to be compliant"), but it was pointed out that ssbi-mpp has the same problem. Let's fix it there too. NOTE: in case it's helpful to someone reading this, the way to tell whether to do the -EINVAL or not is to look at the PCONFDUMP for a given attribute. If the last element (has_arg) is false then you need to do the -EINVAL trick. ALSO NOTE: it seems unlikely that the values returned when we try to get PIN_CONFIG_BIAS_PULL_UP will actually be printed since "has_arg" is false for that one, but I guess it's still fine to return different values so I kept doing that. It seems like another driver (ssbi-gpio) uses a custom attribute (PM8XXX_QCOM_PULL_UP_STRENGTH) for something similar so maybe a future change should do that here too. Fixes: cfb24f6ebd38 ("pinctrl: Qualcomm SPMI PMIC MPP pin controller driver") Signed-off-by: Douglas Anderson Reviewed-by: Stephen Boyd Reviewed-by: Bjorn Andersson Signed-off-by: Linus Walleij Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/pinctrl/qcom/pinctrl-spmi-mpp.c | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/drivers/pinctrl/qcom/pinctrl-spmi-mpp.c b/drivers/pinctrl/qcom/pinctrl-spmi-mpp.c index 8301e600e7a8..5cd9a81a6060 100644 --- a/drivers/pinctrl/qcom/pinctrl-spmi-mpp.c +++ b/drivers/pinctrl/qcom/pinctrl-spmi-mpp.c @@ -347,13 +347,12 @@ static int pmic_mpp_config_get(struct pinctrl_dev *pctldev, switch (param) { case PIN_CONFIG_BIAS_DISABLE: - arg = pad->pullup == PMIC_MPP_PULL_UP_OPEN; + if (pad->pullup != PMIC_MPP_PULL_UP_OPEN) + return -EINVAL; + arg = 1; break; case PIN_CONFIG_BIAS_PULL_UP: switch (pad->pullup) { - case PMIC_MPP_PULL_UP_OPEN: - arg = 0; - break; case PMIC_MPP_PULL_UP_0P6KOHM: arg = 600; break; @@ -368,13 +367,17 @@ static int pmic_mpp_config_get(struct pinctrl_dev *pctldev, } break; case PIN_CONFIG_BIAS_HIGH_IMPEDANCE: - arg = !pad->is_enabled; + if (pad->is_enabled) + return -EINVAL; + arg = 1; break; case PIN_CONFIG_POWER_SOURCE: arg = pad->power_source; break; case PIN_CONFIG_INPUT_ENABLE: - arg = pad->input_enabled; + if (!pad->input_enabled) + return -EINVAL; + arg = 1; break; case PIN_CONFIG_OUTPUT: arg = pad->out_value; @@ -386,7 +389,9 @@ static int pmic_mpp_config_get(struct pinctrl_dev *pctldev, arg = pad->amux_input; break; case PMIC_MPP_CONF_PAIRED: - arg = pad->paired; + if (!pad->paired) + return -EINVAL; + arg = 1; break; case PIN_CONFIG_DRIVE_STRENGTH: arg = pad->drive_strength; From 1305e3d89377e1e5d0df42e4bcedd58d9af5e983 Mon Sep 17 00:00:00 2001 From: Douglas Anderson Date: Thu, 30 Aug 2018 08:23:38 -0700 Subject: [PATCH 394/812] pinctrl: ssbi-gpio: Fix pm8xxx_pin_config_get() to be compliant [ Upstream commit b432414b996d32a1bd9afe2bd595bd5729c1477f ] If you look at "pinconf-groups" in debugfs for ssbi-gpio you'll notice it looks like nonsense. The problem is fairly well described in commit 1cf86bc21257 ("pinctrl: qcom: spmi-gpio: Fix pmic_gpio_config_get() to be compliant") and commit 05e0c828955c ("pinctrl: msm: Fix msm_config_group_get() to be compliant"), but it was pointed out that ssbi-gpio has the same problem. Let's fix it there too. Fixes: b4c45fe974bc ("pinctrl: qcom: ssbi: Family A gpio & mpp drivers") Signed-off-by: Douglas Anderson Reviewed-by: Stephen Boyd Reviewed-by: Bjorn Andersson Signed-off-by: Linus Walleij Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/pinctrl/qcom/pinctrl-ssbi-gpio.c | 28 ++++++++++++++++++------ 1 file changed, 21 insertions(+), 7 deletions(-) diff --git a/drivers/pinctrl/qcom/pinctrl-ssbi-gpio.c b/drivers/pinctrl/qcom/pinctrl-ssbi-gpio.c index 19a3c3bc2f1f..b1e8a2d905ff 100644 --- a/drivers/pinctrl/qcom/pinctrl-ssbi-gpio.c +++ b/drivers/pinctrl/qcom/pinctrl-ssbi-gpio.c @@ -259,22 +259,32 @@ static int pm8xxx_pin_config_get(struct pinctrl_dev *pctldev, switch (param) { case PIN_CONFIG_BIAS_DISABLE: - arg = pin->bias == PM8XXX_GPIO_BIAS_NP; + if (pin->bias != PM8XXX_GPIO_BIAS_NP) + return -EINVAL; + arg = 1; break; case PIN_CONFIG_BIAS_PULL_DOWN: - arg = pin->bias == PM8XXX_GPIO_BIAS_PD; + if (pin->bias != PM8XXX_GPIO_BIAS_PD) + return -EINVAL; + arg = 1; break; case PIN_CONFIG_BIAS_PULL_UP: - arg = pin->bias <= PM8XXX_GPIO_BIAS_PU_1P5_30; + if (pin->bias > PM8XXX_GPIO_BIAS_PU_1P5_30) + return -EINVAL; + arg = 1; break; case PM8XXX_QCOM_PULL_UP_STRENGTH: arg = pin->pull_up_strength; break; case PIN_CONFIG_BIAS_HIGH_IMPEDANCE: - arg = pin->disable; + if (!pin->disable) + return -EINVAL; + arg = 1; break; case PIN_CONFIG_INPUT_ENABLE: - arg = pin->mode == PM8XXX_GPIO_MODE_INPUT; + if (pin->mode != PM8XXX_GPIO_MODE_INPUT) + return -EINVAL; + arg = 1; break; case PIN_CONFIG_OUTPUT: if (pin->mode & PM8XXX_GPIO_MODE_OUTPUT) @@ -289,10 +299,14 @@ static int pm8xxx_pin_config_get(struct pinctrl_dev *pctldev, arg = pin->output_strength; break; case PIN_CONFIG_DRIVE_PUSH_PULL: - arg = !pin->open_drain; + if (pin->open_drain) + return -EINVAL; + arg = 1; break; case PIN_CONFIG_DRIVE_OPEN_DRAIN: - arg = pin->open_drain; + if (!pin->open_drain) + return -EINVAL; + arg = 1; break; default: return -EINVAL; From 93ceb8dcdef904189b10cbddcf9be3f971a1ab89 Mon Sep 17 00:00:00 2001 From: Martin Willi Date: Wed, 22 Aug 2018 09:39:52 +0200 Subject: [PATCH 395/812] ath10k: schedule hardware restart if WMI command times out [ Upstream commit a9911937e7d332761e8c4fcbc7ba0426bdc3956f ] When running in AP mode, ath10k sometimes suffers from TX credit starvation. The issue is hard to reproduce and shows up once in a few days, but has been repeatedly seen with QCA9882 and a large range of firmwares, including 10.2.4.70.67. Once the module is in this state, TX credits are never replenished, which results in "SWBA overrun" errors, as no beacons can be sent. Even worse, WMI commands run in a timeout while holding the conf mutex for three seconds each, making any further operations slow and the whole system unresponsive. The firmware/driver never recovers from that state automatically, and triggering TX flush or warm restarts won't work over WMI. So issue a hardware restart if a WMI command times out due to missing TX credits. This implies a connectivity outage of about 1.4s in AP mode, but brings back the interface and the whole system to a usable state. WMI command timeouts have not been seen in absent of this specific issue, so taking such drastic actions seems legitimate. Signed-off-by: Martin Willi Signed-off-by: Kalle Valo Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/ath/ath10k/wmi.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/net/wireless/ath/ath10k/wmi.c b/drivers/net/wireless/ath/ath10k/wmi.c index 5bb1be478954..f201e50447d8 100644 --- a/drivers/net/wireless/ath/ath10k/wmi.c +++ b/drivers/net/wireless/ath/ath10k/wmi.c @@ -1749,6 +1749,12 @@ int ath10k_wmi_cmd_send(struct ath10k *ar, struct sk_buff *skb, u32 cmd_id) if (ret) dev_kfree_skb_any(skb); + if (ret == -EAGAIN) { + ath10k_warn(ar, "wmi command %d timeout, restarting hardware\n", + cmd_id); + queue_work(ar->workqueue, &ar->restart_work); + } + return ret; } From 8c00d5d73231196555032000d25be65a149938a7 Mon Sep 17 00:00:00 2001 From: Finn Thain Date: Tue, 16 Oct 2018 16:31:25 +1100 Subject: [PATCH 396/812] scsi: esp_scsi: Track residual for PIO transfers [ Upstream commit fd47d919d0c336e7c22862b51ee94927ffea227a ] If a target disconnects during a PIO data transfer the command may fail when the target reconnects: scsi host1: DMA length is zero! scsi host1: cur adr[04380000] len[00000000] The scsi bus is then reset. This happens because the residual reached zero before the transfer was completed. The usual residual calculation relies on the Transfer Count registers. That works for DMA transfers but not for PIO transfers. Fix the problem by storing the PIO transfer residual and using that to correctly calculate bytes_sent. Fixes: 6fe07aaffbf0 ("[SCSI] m68k: new mac_esp scsi driver") Tested-by: Stan Johnson Signed-off-by: Finn Thain Tested-by: Michael Schmitz Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/esp_scsi.c | 1 + drivers/scsi/esp_scsi.h | 2 ++ drivers/scsi/mac_esp.c | 2 ++ 3 files changed, 5 insertions(+) diff --git a/drivers/scsi/esp_scsi.c b/drivers/scsi/esp_scsi.c index 71cb05b1c3eb..60be0742e2c8 100644 --- a/drivers/scsi/esp_scsi.c +++ b/drivers/scsi/esp_scsi.c @@ -1349,6 +1349,7 @@ static int esp_data_bytes_sent(struct esp *esp, struct esp_cmd_entry *ent, bytes_sent = esp->data_dma_len; bytes_sent -= ecount; + bytes_sent -= esp->send_cmd_residual; /* * The am53c974 has a DMA 'pecularity'. The doc states: diff --git a/drivers/scsi/esp_scsi.h b/drivers/scsi/esp_scsi.h index 84dcbe4a6268..55be43fe7667 100644 --- a/drivers/scsi/esp_scsi.h +++ b/drivers/scsi/esp_scsi.h @@ -540,6 +540,8 @@ struct esp { void *dma; int dmarev; + + u32 send_cmd_residual; }; /* A front-end driver for the ESP chip should do the following in diff --git a/drivers/scsi/mac_esp.c b/drivers/scsi/mac_esp.c index 26c67c42985c..1002124bd8bf 100644 --- a/drivers/scsi/mac_esp.c +++ b/drivers/scsi/mac_esp.c @@ -426,6 +426,8 @@ static void mac_esp_send_pio_cmd(struct esp *esp, u32 addr, u32 esp_count, scsi_esp_cmd(esp, ESP_CMD_TI); } } + + esp->send_cmd_residual = esp_count; } static int mac_esp_irq_pending(struct esp *esp) From be1851b9a22c10ae676afcc047835307b2d381ab Mon Sep 17 00:00:00 2001 From: Wenwen Wang Date: Sat, 6 Oct 2018 13:34:21 -0500 Subject: [PATCH 397/812] scsi: megaraid_sas: fix a missing-check bug [ Upstream commit 47db7873136a9c57c45390a53b57019cf73c8259 ] In megasas_mgmt_compat_ioctl_fw(), to handle the structure compat_megasas_iocpacket 'cioc', a user-space structure megasas_iocpacket 'ioc' is allocated before megasas_mgmt_ioctl_fw() is invoked to handle the packet. Since the two data structures have different fields, the data is copied from 'cioc' to 'ioc' field by field. In the copy process, 'sense_ptr' is prepared if the field 'sense_len' is not null, because it will be used in megasas_mgmt_ioctl_fw(). To prepare 'sense_ptr', the user-space data 'ioc->sense_off' and 'cioc->sense_off' are copied and saved to kernel-space variables 'local_sense_off' and 'user_sense_off' respectively. Given that 'ioc->sense_off' is also copied from 'cioc->sense_off', 'local_sense_off' and 'user_sense_off' should have the same value. However, 'cioc' is in the user space and a malicious user can race to change the value of 'cioc->sense_off' after it is copied to 'ioc->sense_off' but before it is copied to 'user_sense_off'. By doing so, the attacker can inject different values into 'local_sense_off' and 'user_sense_off'. This can cause undefined behavior in the following execution, because the two variables are supposed to be same. This patch enforces a check on the two kernel variables 'local_sense_off' and 'user_sense_off' to make sure they are the same after the copy. In case they are not, an error code EINVAL will be returned. Signed-off-by: Wenwen Wang Acked-by: Sumit Saxena Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/megaraid/megaraid_sas_base.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/scsi/megaraid/megaraid_sas_base.c b/drivers/scsi/megaraid/megaraid_sas_base.c index 6835bae33ec4..ac7acd257c99 100644 --- a/drivers/scsi/megaraid/megaraid_sas_base.c +++ b/drivers/scsi/megaraid/megaraid_sas_base.c @@ -6510,6 +6510,9 @@ static int megasas_mgmt_compat_ioctl_fw(struct file *file, unsigned long arg) get_user(user_sense_off, &cioc->sense_off)) return -EFAULT; + if (local_sense_off != user_sense_off) + return -EINVAL; + if (local_sense_len) { void __user **sense_ioc_ptr = (void __user **)((u8 *)((unsigned long)&ioc->frame.raw) + local_sense_off); From 3cf68396d52384ad808695ed5be23629ead6e6e3 Mon Sep 17 00:00:00 2001 From: Javier Martinez Canillas Date: Thu, 30 Aug 2018 16:40:05 +0200 Subject: [PATCH 398/812] tpm: suppress transmit cmd error logs when TPM 1.2 is disabled/deactivated [ Upstream commit 0d6d0d62d9505a9816716aa484ebd0b04c795063 ] For TPM 1.2 chips the system setup utility allows to set the TPM device in one of the following states: * Active: Security chip is functional * Inactive: Security chip is visible, but is not functional * Disabled: Security chip is hidden and is not functional When choosing the "Inactive" state, the TPM 1.2 device is enumerated and registered, but sending TPM commands fail with either TPM_DEACTIVATED or TPM_DISABLED depending if the firmware deactivated or disabled the TPM. Since these TPM 1.2 error codes don't have special treatment, inactivating the TPM leads to a very noisy kernel log buffer that shows messages like the following: tpm_tis 00:05: 1.2 TPM (device-id 0x0, rev-id 78) tpm tpm0: A TPM error (6) occurred attempting to read a pcr value tpm tpm0: TPM is disabled/deactivated (0x6) tpm tpm0: A TPM error (6) occurred attempting get random tpm tpm0: A TPM error (6) occurred attempting to read a pcr value ima: No TPM chip found, activating TPM-bypass! (rc=6) tpm tpm0: A TPM error (6) occurred attempting get random tpm tpm0: A TPM error (6) occurred attempting get random tpm tpm0: A TPM error (6) occurred attempting get random tpm tpm0: A TPM error (6) occurred attempting get random Let's just suppress error log messages for the TPM_{DEACTIVATED,DISABLED} return codes, since this is expected when the TPM 1.2 is set to Inactive. In that case the kernel log is cleaner and less confusing for users, i.e: tpm_tis 00:05: 1.2 TPM (device-id 0x0, rev-id 78) tpm tpm0: TPM is disabled/deactivated (0x6) ima: No TPM chip found, activating TPM-bypass! (rc=6) Reported-by: Hans de Goede Signed-off-by: Javier Martinez Canillas Reviewed-by: Jarkko Sakkinen Signed-off-by: Jarkko Sakkinen Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/char/tpm/tpm-interface.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/char/tpm/tpm-interface.c b/drivers/char/tpm/tpm-interface.c index 95a40ec854ad..e3f2915ca4be 100644 --- a/drivers/char/tpm/tpm-interface.c +++ b/drivers/char/tpm/tpm-interface.c @@ -415,7 +415,8 @@ ssize_t tpm_transmit_cmd(struct tpm_chip *chip, const void *cmd, header = cmd; err = be32_to_cpu(header->return_code); - if (err != 0 && desc) + if (err != 0 && err != TPM_ERR_DISABLED && err != TPM_ERR_DEACTIVATED + && desc) dev_err(&chip->dev, "A TPM error (%d) occurred %s\n", err, desc); From c87fb36e0dfd35c3c2c454a3f8c6d7018fcde400 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Tue, 2 Oct 2018 01:34:44 -0400 Subject: [PATCH 399/812] ext4: fix argument checking in EXT4_IOC_MOVE_EXT [ Upstream commit f18b2b83a727a3db208308057d2c7945f368e625 ] If the starting block number of either the source or destination file exceeds the EOF, EXT4_IOC_MOVE_EXT should return EINVAL. Also fixed the helper function mext_check_coverage() so that if the logical block is beyond EOF, make it return immediately, instead of looping until the block number wraps all the away around. This takes long enough that if there are multiple threads trying to do pound on an the same inode doing non-sensical things, it can end up triggering the kernel's soft lockup detector. Reported-by: syzbot+c61979f6f2cba5cb3c06@syzkaller.appspotmail.com Signed-off-by: Theodore Ts'o Cc: stable@kernel.org Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- fs/ext4/move_extent.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/fs/ext4/move_extent.c b/fs/ext4/move_extent.c index 05048fcfd602..6b5e2eddd8d7 100644 --- a/fs/ext4/move_extent.c +++ b/fs/ext4/move_extent.c @@ -526,9 +526,13 @@ mext_check_arguments(struct inode *orig_inode, orig_inode->i_ino, donor_inode->i_ino); return -EINVAL; } - if (orig_eof < orig_start + *len - 1) + if (orig_eof <= orig_start) + *len = 0; + else if (orig_eof < orig_start + *len - 1) *len = orig_eof - orig_start; - if (donor_eof < donor_start + *len - 1) + if (donor_eof <= donor_start) + *len = 0; + else if (donor_eof < donor_start + *len - 1) *len = donor_eof - donor_start; if (!*len) { ext4_debug("ext4 move extent: len should not be 0 " From 8f177fb603dd3ffb1691e6cadc9834bc16b07884 Mon Sep 17 00:00:00 2001 From: Shaohua Li Date: Mon, 1 Oct 2018 18:36:36 -0700 Subject: [PATCH 400/812] MD: fix invalid stored role for a disk [ Upstream commit d595567dc4f0c1d90685ec1e2e296e2cad2643ac ] If we change the number of array's device after device is removed from array, then add the device back to array, we can see that device is added as active role instead of spare which we expected. Please see the below link for details: https://marc.info/?l=linux-raid&m=153736982015076&w=2 This is caused by that we prefer to use device's previous role which is recorded by saved_raid_disk, but we should respect the new number of conf->raid_disks since it could be changed after device is removed. Reported-by: Gioh Kim Tested-by: Gioh Kim Acked-by: Guoqing Jiang Signed-off-by: Shaohua Li Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/md/md.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/md/md.c b/drivers/md/md.c index 07f307402351..4caf240693cd 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -1670,6 +1670,10 @@ static int super_1_validate(struct mddev *mddev, struct md_rdev *rdev) } else set_bit(In_sync, &rdev->flags); rdev->raid_disk = role; + if (role >= mddev->raid_disks) { + rdev->saved_raid_disk = -1; + rdev->raid_disk = -1; + } break; } if (sb->devflags & WriteMostly1) From d986aedcc39ab86a9179a7588c7327e524bf7baf Mon Sep 17 00:00:00 2001 From: Loic Poulain Date: Tue, 4 Sep 2018 17:18:57 +0200 Subject: [PATCH 401/812] usb: chipidea: Prevent unbalanced IRQ disable [ Upstream commit 8b97d73c4d72a2abf58f8e49062a7ee1e5f1334e ] The ChipIdea IRQ is disabled before scheduling the otg work and re-enabled on otg work completion. However if the job is already scheduled we have to undo the effect of disable_irq int order to balance the IRQ disable-depth value. Fixes: be6b0c1bd0be ("usb: chipidea: using one inline function to cover queue work operations") Signed-off-by: Loic Poulain Signed-off-by: Peter Chen Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/usb/chipidea/otg.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/usb/chipidea/otg.h b/drivers/usb/chipidea/otg.h index 9ecb598e48f0..a5557c70034a 100644 --- a/drivers/usb/chipidea/otg.h +++ b/drivers/usb/chipidea/otg.h @@ -20,7 +20,8 @@ void ci_handle_vbus_change(struct ci_hdrc *ci); static inline void ci_otg_queue_work(struct ci_hdrc *ci) { disable_irq_nosync(ci->irq); - queue_work(ci->wq, &ci->work); + if (queue_work(ci->wq, &ci->work) == false) + enable_irq(ci->irq); } #endif /* __DRIVERS_USB_CHIPIDEA_OTG_H */ From d7a7df4c972c5de069ecd0c23486147248db40a8 Mon Sep 17 00:00:00 2001 From: Waiman Long Date: Fri, 14 Sep 2018 14:53:32 -0400 Subject: [PATCH 402/812] driver/dma/ioat: Call del_timer_sync() without holding prep_lock [ Upstream commit cfb03be6c7e8a1591285849c361d67b09f5149f7 ] The following lockdep splat was observed: [ 1222.241750] ====================================================== [ 1222.271301] WARNING: possible circular locking dependency detected [ 1222.301060] 4.16.0-10.el8+5.x86_64+debug #1 Not tainted [ 1222.326659] ------------------------------------------------------ [ 1222.356565] systemd-shutdow/1 is trying to acquire lock: [ 1222.382660] ((&ioat_chan->timer)){+.-.}, at: [<00000000f71e1a28>] del_timer_sync+0x5/0xf0 [ 1222.422928] [ 1222.422928] but task is already holding lock: [ 1222.451743] (&(&ioat_chan->prep_lock)->rlock){+.-.}, at: [<000000008ea98b12>] ioat_shutdown+0x86/0x100 [ioatdma] : [ 1223.524987] Chain exists of: [ 1223.524987] (&ioat_chan->timer) --> &(&ioat_chan->cleanup_lock)->rlock --> &(&ioat_chan->prep_lock)->rlock [ 1223.524987] [ 1223.594082] Possible unsafe locking scenario: [ 1223.594082] [ 1223.622630] CPU0 CPU1 [ 1223.645080] ---- ---- [ 1223.667404] lock(&(&ioat_chan->prep_lock)->rlock); [ 1223.691535] lock(&(&ioat_chan->cleanup_lock)->rlock); [ 1223.728657] lock(&(&ioat_chan->prep_lock)->rlock); [ 1223.765122] lock((&ioat_chan->timer)); [ 1223.784095] [ 1223.784095] *** DEADLOCK *** [ 1223.784095] [ 1223.813492] 4 locks held by systemd-shutdow/1: [ 1223.834677] #0: (reboot_mutex){+.+.}, at: [<0000000056d33456>] SYSC_reboot+0x10f/0x300 [ 1223.873310] #1: (&dev->mutex){....}, at: [<00000000258dfdd7>] device_shutdown+0x1c8/0x660 [ 1223.913604] #2: (&dev->mutex){....}, at: [<0000000068331147>] device_shutdown+0x1d6/0x660 [ 1223.954000] #3: (&(&ioat_chan->prep_lock)->rlock){+.-.}, at: [<000000008ea98b12>] ioat_shutdown+0x86/0x100 [ioatdma] In the ioat_shutdown() function: spin_lock_bh(&ioat_chan->prep_lock); set_bit(IOAT_CHAN_DOWN, &ioat_chan->state); del_timer_sync(&ioat_chan->timer); spin_unlock_bh(&ioat_chan->prep_lock); According to the synchronization rule for the del_timer_sync() function, the caller must not hold locks which would prevent completion of the timer's handler. The timer structure has its own lock that manages its synchronization. Setting the IOAT_CHAN_DOWN bit should prevent other CPUs from trying to use that device anyway, there is probably no need to call del_timer_sync() while holding the prep_lock. So the del_timer_sync() call is now moved outside of the prep_lock critical section to prevent the circular lock dependency. Signed-off-by: Waiman Long Reviewed-by: Dave Jiang Signed-off-by: Vinod Koul Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/dma/ioat/init.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/dma/ioat/init.c b/drivers/dma/ioat/init.c index ac8c28968422..106fa9b327d9 100644 --- a/drivers/dma/ioat/init.c +++ b/drivers/dma/ioat/init.c @@ -1210,8 +1210,15 @@ static void ioat_shutdown(struct pci_dev *pdev) spin_lock_bh(&ioat_chan->prep_lock); set_bit(IOAT_CHAN_DOWN, &ioat_chan->state); - del_timer_sync(&ioat_chan->timer); spin_unlock_bh(&ioat_chan->prep_lock); + /* + * Synchronization rule for del_timer_sync(): + * - The caller must not hold locks which would prevent + * completion of the timer's handler. + * So prep_lock cannot be held before calling it. + */ + del_timer_sync(&ioat_chan->timer); + /* this should quiesce then reset */ ioat_reset_hw(ioat_chan); } From 2d62826e4bd58417f1e1b6e64f2fcbc950a12b07 Mon Sep 17 00:00:00 2001 From: Alexandre Belloni Date: Thu, 16 Aug 2018 09:39:41 +0200 Subject: [PATCH 403/812] uio: ensure class is registered before devices [ Upstream commit ae61cf5b9913027c6953a79ed3894da4f47061bd ] When both uio and the uio drivers are built in the kernel, it is possible for a driver to register devices before the uio class is registered. This may result in a NULL pointer dereference later on in get_device_parent() when accessing the class glue_dirs spinlock. The trace looks like that: Unable to handle kernel NULL pointer dereference at virtual address 00000140 [...] [] _raw_spin_lock+0x14/0x48 [] device_add+0x154/0x6a0 [] device_create_groups_vargs+0x120/0x128 [] device_create+0x54/0x60 [] __uio_register_device+0x120/0x4a8 [] jaguar2_pci_probe+0x2d4/0x558 [] local_pci_probe+0x3c/0xb8 [] pci_device_probe+0x11c/0x180 [] driver_probe_device+0x22c/0x2d8 [] __driver_attach+0xbc/0xc0 [] bus_for_each_dev+0x4c/0x98 [] driver_attach+0x20/0x28 [] bus_add_driver+0x1b8/0x228 [] driver_register+0x60/0xf8 [] __pci_register_driver+0x40/0x48 Return EPROBE_DEFER in that case so the driver can register the device later. Signed-off-by: Alexandre Belloni Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/uio/uio.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/uio/uio.c b/drivers/uio/uio.c index b9823eb9c195..0ab15d833d1b 100644 --- a/drivers/uio/uio.c +++ b/drivers/uio/uio.c @@ -249,6 +249,8 @@ static struct class uio_class = { .dev_groups = uio_groups, }; +bool uio_class_registered; + /* * device functions */ @@ -772,6 +774,9 @@ static int init_uio_class(void) printk(KERN_ERR "class_register failed for uio\n"); goto err_class_register; } + + uio_class_registered = true; + return 0; err_class_register: @@ -782,6 +787,7 @@ static int init_uio_class(void) static void release_uio_class(void) { + uio_class_registered = false; class_unregister(&uio_class); uio_major_cleanup(); } @@ -801,6 +807,9 @@ int __uio_register_device(struct module *owner, struct uio_device *idev; int ret = 0; + if (!uio_class_registered) + return -EPROBE_DEFER; + if (!parent || !info || !info->name || !info->version) return -EINVAL; From c9debbd29918b5772be8788b9e47fbd31956fa96 Mon Sep 17 00:00:00 2001 From: James Smart Date: Mon, 10 Sep 2018 10:30:45 -0700 Subject: [PATCH 404/812] scsi: lpfc: Correct soft lockup when running mds diagnostics [ Upstream commit 0ef01a2d95fd62bb4f536e7ce4d5e8e74b97a244 ] When running an mds diagnostic that passes frames with the switch, soft lockups are detected. The driver is in a CQE processing loop and has sufficient amount of traffic that it never exits the ring processing routine, thus the "lockup". Cap the number of elements in the work processing routine to 64 elements. This ensures that the cpu will be given up and the handler reschedule to process additional items. Signed-off-by: Dick Kennedy Signed-off-by: James Smart Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/lpfc/lpfc_sli.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/scsi/lpfc/lpfc_sli.c b/drivers/scsi/lpfc/lpfc_sli.c index 3406586b9201..ad4f16ab7f7a 100644 --- a/drivers/scsi/lpfc/lpfc_sli.c +++ b/drivers/scsi/lpfc/lpfc_sli.c @@ -3485,6 +3485,7 @@ lpfc_sli_handle_slow_ring_event_s4(struct lpfc_hba *phba, struct hbq_dmabuf *dmabuf; struct lpfc_cq_event *cq_event; unsigned long iflag; + int count = 0; spin_lock_irqsave(&phba->hbalock, iflag); phba->hba_flag &= ~HBA_SP_QUEUE_EVT; @@ -3506,16 +3507,22 @@ lpfc_sli_handle_slow_ring_event_s4(struct lpfc_hba *phba, if (irspiocbq) lpfc_sli_sp_handle_rspiocb(phba, pring, irspiocbq); + count++; break; case CQE_CODE_RECEIVE: case CQE_CODE_RECEIVE_V1: dmabuf = container_of(cq_event, struct hbq_dmabuf, cq_event); lpfc_sli4_handle_received_buffer(phba, dmabuf); + count++; break; default: break; } + + /* Limit the number of events to 64 to avoid soft lockups */ + if (count == 64) + break; } } From 9ab9fc44d3de6053e4193e737b06a333609c80da Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Mon, 3 Sep 2018 20:02:46 +0200 Subject: [PATCH 405/812] signal: Always deliver the kernel's SIGKILL and SIGSTOP to a pid namespace init [ Upstream commit 3597dfe01d12f570bc739da67f857fd222a3ea66 ] Instead of playing whack-a-mole and changing SEND_SIG_PRIV to SEND_SIG_FORCED throughout the kernel to ensure a pid namespace init gets signals sent by the kernel, stop allowing a pid namespace init to ignore SIGKILL or SIGSTOP sent by the kernel. A pid namespace init is only supposed to be able to ignore signals sent from itself and children with SIG_DFL. Fixes: 921cf9f63089 ("signals: protect cinit from unblocked SIG_DFL signals") Reviewed-by: Thomas Gleixner Signed-off-by: "Eric W. Biederman" Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- kernel/signal.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/signal.c b/kernel/signal.c index 8bfbc47f0a23..5b1313309356 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -991,7 +991,7 @@ static int __send_signal(int sig, struct siginfo *info, struct task_struct *t, result = TRACE_SIGNAL_IGNORED; if (!prepare_signal(sig, t, - from_ancestor_ns || (info == SEND_SIG_FORCED))) + from_ancestor_ns || (info == SEND_SIG_PRIV) || (info == SEND_SIG_FORCED))) goto ret; pending = group ? &t->signal->shared_pending : &t->pending; From 7e41dd0f799dbe922cbcda56f2fdb0fbce6292ba Mon Sep 17 00:00:00 2001 From: Paul Cercueil Date: Wed, 29 Aug 2018 23:32:44 +0200 Subject: [PATCH 406/812] dmaengine: dma-jz4780: Return error if not probed from DT [ Upstream commit 54f919a04cf221bc1601d1193682d4379dacacbd ] The driver calls clk_get() with the clock name set to NULL, which means that the driver could only work when probed from devicetree. From now on, we explicitly require the driver to be probed from devicetree. Signed-off-by: Paul Cercueil Tested-by: Mathieu Malaterre Signed-off-by: Vinod Koul Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/dma/dma-jz4780.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/dma/dma-jz4780.c b/drivers/dma/dma-jz4780.c index dade7c47ff18..8344b7c91fe3 100644 --- a/drivers/dma/dma-jz4780.c +++ b/drivers/dma/dma-jz4780.c @@ -750,6 +750,11 @@ static int jz4780_dma_probe(struct platform_device *pdev) struct resource *res; int i, ret; + if (!dev->of_node) { + dev_err(dev, "This driver must be probed from devicetree\n"); + return -EINVAL; + } + jzdma = devm_kzalloc(dev, sizeof(*jzdma), GFP_KERNEL); if (!jzdma) return -ENOMEM; From a9d21a73909f3bce076ecdaed589640d001bc5b2 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Sat, 11 Aug 2018 23:33:34 +0200 Subject: [PATCH 407/812] ALSA: hda: Check the non-cached stream buffers more explicitly [ Upstream commit 78c9be61c3a5cd9e2439fd27a5ffad73a81958c7 ] Introduce a new flag, uc_buffer, to indicate that the controller requires the non-cached pages for stream buffers, either as a chip-specific requirement or specified via snoop=0 option. This improves the code-readability. Also, this patch fixes the incorrect behavior for C-Media chip where the stream buffers were never handled as non-cached due to the check of driver_type even if you pass snoop=0 option. Signed-off-by: Takashi Iwai Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/hda_controller.h | 1 + sound/pci/hda/hda_intel.c | 11 ++++++++--- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/sound/pci/hda/hda_controller.h b/sound/pci/hda/hda_controller.h index b17539537b2e..55ec4470f6b6 100644 --- a/sound/pci/hda/hda_controller.h +++ b/sound/pci/hda/hda_controller.h @@ -151,6 +151,7 @@ struct azx { unsigned int msi:1; unsigned int probing:1; /* codec probing phase */ unsigned int snoop:1; + unsigned int uc_buffer:1; /* non-cached pages for stream buffers */ unsigned int align_buffer_size:1; unsigned int region_requested:1; unsigned int disabled:1; /* disabled by vga_switcheroo */ diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c index 95a82e428f37..ecb07fb036af 100644 --- a/sound/pci/hda/hda_intel.c +++ b/sound/pci/hda/hda_intel.c @@ -401,7 +401,7 @@ static void __mark_pages_wc(struct azx *chip, struct snd_dma_buffer *dmab, bool #ifdef CONFIG_SND_DMA_SGBUF if (dmab->dev.type == SNDRV_DMA_TYPE_DEV_SG) { struct snd_sg_buf *sgbuf = dmab->private_data; - if (chip->driver_type == AZX_DRIVER_CMEDIA) + if (!chip->uc_buffer) return; /* deal with only CORB/RIRB buffers */ if (on) set_pages_array_wc(sgbuf->page_table, sgbuf->pages); @@ -1538,6 +1538,7 @@ static void azx_check_snoop_available(struct azx *chip) dev_info(chip->card->dev, "Force to %s mode by module option\n", snoop ? "snoop" : "non-snoop"); chip->snoop = snoop; + chip->uc_buffer = !snoop; return; } @@ -1558,8 +1559,12 @@ static void azx_check_snoop_available(struct azx *chip) snoop = false; chip->snoop = snoop; - if (!snoop) + if (!snoop) { dev_info(chip->card->dev, "Force to non-snoop mode\n"); + /* C-Media requires non-cached pages only for CORB/RIRB */ + if (chip->driver_type != AZX_DRIVER_CMEDIA) + chip->uc_buffer = true; + } } static void azx_probe_work(struct work_struct *work) @@ -1958,7 +1963,7 @@ static void pcm_mmap_prepare(struct snd_pcm_substream *substream, #ifdef CONFIG_X86 struct azx_pcm *apcm = snd_pcm_substream_chip(substream); struct azx *chip = apcm->chip; - if (!azx_snoop(chip) && chip->driver_type != AZX_DRIVER_CMEDIA) + if (chip->uc_buffer) area->vm_page_prot = pgprot_writecombine(area->vm_page_prot); #endif } From cf9e0f77f54a18775516de808195cb57439be98a Mon Sep 17 00:00:00 2001 From: Joe Jin Date: Tue, 16 Oct 2018 15:21:16 -0700 Subject: [PATCH 408/812] xen-swiotlb: use actually allocated size on check physical continuous commit 7250f422da0480d8512b756640f131b9b893ccda upstream. xen_swiotlb_{alloc,free}_coherent() allocate/free memory based on the order of the pages and not size argument (bytes). This is inconsistent with range_straddles_page_boundary and memset which use the 'size' value, which may lead to not exchanging memory with Xen (range_straddles_page_boundary() returned true). And then the call to xen_swiotlb_free_coherent() would actually try to exchange the memory with Xen, leading to the kernel hitting an BUG (as the hypercall returned an error). This patch fixes it by making the 'size' variable be of the same size as the amount of memory allocated. CC: stable@vger.kernel.org Signed-off-by: Joe Jin Cc: Konrad Rzeszutek Wilk Cc: Boris Ostrovsky Cc: Christoph Helwig Cc: Dongli Zhang Cc: John Sobecki Signed-off-by: Konrad Rzeszutek Wilk Signed-off-by: Greg Kroah-Hartman --- drivers/xen/swiotlb-xen.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/xen/swiotlb-xen.c b/drivers/xen/swiotlb-xen.c index 1889e928a0da..a8a388382347 100644 --- a/drivers/xen/swiotlb-xen.c +++ b/drivers/xen/swiotlb-xen.c @@ -310,6 +310,9 @@ xen_swiotlb_alloc_coherent(struct device *hwdev, size_t size, */ flags &= ~(__GFP_DMA | __GFP_HIGHMEM); + /* Convert the size to actually allocated. */ + size = 1UL << (order + XEN_PAGE_SHIFT); + /* On ARM this function returns an ioremap'ped virtual address for * which virt_to_phys doesn't return the corresponding physical * address. In fact on ARM virt_to_phys only works for kernel direct @@ -359,6 +362,9 @@ xen_swiotlb_free_coherent(struct device *hwdev, size_t size, void *vaddr, * physical address */ phys = xen_bus_to_phys(dev_addr); + /* Convert the size to actually allocated. */ + size = 1UL << (order + XEN_PAGE_SHIFT); + if (((dev_addr + size - 1 <= dma_mask)) || range_straddles_page_boundary(phys, size)) xen_destroy_contiguous_region(phys, order); From 8cacd37f58a5aa170877bc61c41327086d57ce81 Mon Sep 17 00:00:00 2001 From: "Dr. Greg Wettstein" Date: Mon, 17 Sep 2018 18:53:33 -0400 Subject: [PATCH 409/812] tpm: Restore functionality to xen vtpm driver. commit e487a0f52301293152a6f8c4e217f2a11dd808e3 upstream. Functionality of the xen-tpmfront driver was lost secondary to the introduction of xenbus multi-page support in commit ccc9d90a9a8b ("xenbus_client: Extend interface to support multi-page ring"). In this commit pointer to location of where the shared page address is stored was being passed to the xenbus_grant_ring() function rather then the address of the shared page itself. This resulted in a situation where the driver would attach to the vtpm-stubdom but any attempt to send a command to the stub domain would timeout. A diagnostic finding for this regression is the following error message being generated when the xen-tpmfront driver probes for a device: <3>vtpm vtpm-0: tpm_transmit: tpm_send: error -62 <3>vtpm vtpm-0: A TPM error (-62) occurred attempting to determine the timeouts This fix is relevant to all kernels from 4.1 forward which is the release in which multi-page xenbus support was introduced. Daniel De Graaf formulated the fix by code inspection after the regression point was located. Fixes: ccc9d90a9a8b ("xenbus_client: Extend interface to support multi-page ring") Signed-off-by: Dr. Greg Wettstein Signed-off-by: Greg Kroah-Hartman [boris: Updated commit message, added Fixes tag] Signed-off-by: Boris Ostrovsky Cc: stable@vger.kernel.org # v4.1+ Reviewed-by: Jarkko Sakkinen Signed-off-by: Jarkko Sakkinen --- drivers/char/tpm/xen-tpmfront.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/char/tpm/xen-tpmfront.c b/drivers/char/tpm/xen-tpmfront.c index 849f2e29c243..0fb18765f982 100644 --- a/drivers/char/tpm/xen-tpmfront.c +++ b/drivers/char/tpm/xen-tpmfront.c @@ -201,7 +201,7 @@ static int setup_ring(struct xenbus_device *dev, struct tpm_private *priv) return -ENOMEM; } - rv = xenbus_grant_ring(dev, &priv->shr, 1, &gref); + rv = xenbus_grant_ring(dev, priv->shr, 1, &gref); if (rv < 0) return rv; From a1a52e429decbc3e9bd31634ba1ecd4870ffa686 Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Mon, 1 Oct 2018 07:57:42 +0200 Subject: [PATCH 410/812] xen: fix race in xen_qlock_wait() commit 2ac2a7d4d9ff4e01e36f9c3d116582f6f655ab47 upstream. In the following situation a vcpu waiting for a lock might not be woken up from xen_poll_irq(): CPU 1: CPU 2: CPU 3: takes a spinlock tries to get lock -> xen_qlock_wait() frees the lock -> xen_qlock_kick(cpu2) -> xen_clear_irq_pending() takes lock again tries to get lock -> *lock = _Q_SLOW_VAL -> *lock == _Q_SLOW_VAL ? -> xen_poll_irq() frees the lock -> xen_qlock_kick(cpu3) And cpu 2 will sleep forever. This can be avoided easily by modifying xen_qlock_wait() to call xen_poll_irq() only if the related irq was not pending and to call xen_clear_irq_pending() only if it was pending. Cc: stable@vger.kernel.org Cc: Waiman.Long@hp.com Cc: peterz@infradead.org Signed-off-by: Juergen Gross Reviewed-by: Jan Beulich Signed-off-by: Juergen Gross Signed-off-by: Greg Kroah-Hartman --- arch/x86/xen/spinlock.c | 15 +++++---------- 1 file changed, 5 insertions(+), 10 deletions(-) diff --git a/arch/x86/xen/spinlock.c b/arch/x86/xen/spinlock.c index f42e78de1e10..55ba2513c749 100644 --- a/arch/x86/xen/spinlock.c +++ b/arch/x86/xen/spinlock.c @@ -47,17 +47,12 @@ static void xen_qlock_wait(u8 *byte, u8 val) if (irq == -1) return; - /* clear pending */ - xen_clear_irq_pending(irq); - barrier(); + /* If irq pending already clear it and return. */ + if (xen_test_irq_pending(irq)) { + xen_clear_irq_pending(irq); + return; + } - /* - * We check the byte value after clearing pending IRQ to make sure - * that we won't miss a wakeup event because of the clearing. - * - * The sync_clear_bit() call in xen_clear_irq_pending() is atomic. - * So it is effectively a memory barrier for x86. - */ if (READ_ONCE(*byte) != val) return; From 4f2f01aa89e9332ebc5b07919ab17ee7d367fb35 Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Mon, 1 Oct 2018 07:57:42 +0200 Subject: [PATCH 411/812] xen: make xen_qlock_wait() nestable commit a856531951dc8094359dfdac21d59cee5969c18e upstream. xen_qlock_wait() isn't safe for nested calls due to interrupts. A call of xen_qlock_kick() might be ignored in case a deeper nesting level was active right before the call of xen_poll_irq(): CPU 1: CPU 2: spin_lock(lock1) spin_lock(lock1) -> xen_qlock_wait() -> xen_clear_irq_pending() Interrupt happens spin_unlock(lock1) -> xen_qlock_kick(CPU 2) spin_lock_irqsave(lock2) spin_lock_irqsave(lock2) -> xen_qlock_wait() -> xen_clear_irq_pending() clears kick for lock1 -> xen_poll_irq() spin_unlock_irq_restore(lock2) -> xen_qlock_kick(CPU 2) wakes up spin_unlock_irq_restore(lock2) IRET resumes in xen_qlock_wait() -> xen_poll_irq() never wakes up The solution is to disable interrupts in xen_qlock_wait() and not to poll for the irq in case xen_qlock_wait() is called in nmi context. Cc: stable@vger.kernel.org Cc: Waiman.Long@hp.com Cc: peterz@infradead.org Signed-off-by: Juergen Gross Reviewed-by: Jan Beulich Signed-off-by: Juergen Gross Signed-off-by: Greg Kroah-Hartman --- arch/x86/xen/spinlock.c | 24 ++++++++++-------------- 1 file changed, 10 insertions(+), 14 deletions(-) diff --git a/arch/x86/xen/spinlock.c b/arch/x86/xen/spinlock.c index 55ba2513c749..9d81231b86a8 100644 --- a/arch/x86/xen/spinlock.c +++ b/arch/x86/xen/spinlock.c @@ -41,29 +41,25 @@ static void xen_qlock_kick(int cpu) */ static void xen_qlock_wait(u8 *byte, u8 val) { + unsigned long flags; int irq = __this_cpu_read(lock_kicker_irq); /* If kicker interrupts not initialized yet, just spin */ - if (irq == -1) + if (irq == -1 || in_nmi()) return; - /* If irq pending already clear it and return. */ + /* Guard against reentry. */ + local_irq_save(flags); + + /* If irq pending already clear it. */ if (xen_test_irq_pending(irq)) { xen_clear_irq_pending(irq); - return; + } else if (READ_ONCE(*byte) == val) { + /* Block until irq becomes pending (or a spurious wakeup) */ + xen_poll_irq(irq); } - if (READ_ONCE(*byte) != val) - return; - - /* - * If an interrupt happens here, it will leave the wakeup irq - * pending, which will cause xen_poll_irq() to return - * immediately. - */ - - /* Block until irq becomes pending (or perhaps a spurious wakeup) */ - xen_poll_irq(irq); + local_irq_restore(flags); } #else /* CONFIG_QUEUED_SPINLOCKS */ From 7546540bfd7e9be6e29602624fafae747de22558 Mon Sep 17 00:00:00 2001 From: Stefan Nuernberger Date: Mon, 17 Sep 2018 19:46:53 +0200 Subject: [PATCH 412/812] net/ipv4: defensive cipso option parsing commit 076ed3da0c9b2f88d9157dbe7044a45641ae369e upstream. commit 40413955ee26 ("Cipso: cipso_v4_optptr enter infinite loop") fixed a possible infinite loop in the IP option parsing of CIPSO. The fix assumes that ip_options_compile filtered out all zero length options and that no other one-byte options beside IPOPT_END and IPOPT_NOOP exist. While this assumption currently holds true, add explicit checks for zero length and invalid length options to be safe for the future. Even though ip_options_compile should have validated the options, the introduction of new one-byte options can still confuse this code without the additional checks. Signed-off-by: Stefan Nuernberger Cc: David Woodhouse Cc: Simon Veith Cc: stable@vger.kernel.org Acked-by: Paul Moore Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/cipso_ipv4.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/net/ipv4/cipso_ipv4.c b/net/ipv4/cipso_ipv4.c index 5169b9b36b6a..cfaacaa023e6 100644 --- a/net/ipv4/cipso_ipv4.c +++ b/net/ipv4/cipso_ipv4.c @@ -1582,7 +1582,7 @@ static int cipso_v4_parsetag_loc(const struct cipso_v4_doi *doi_def, * * Description: * Parse the packet's IP header looking for a CIPSO option. Returns a pointer - * to the start of the CIPSO option on success, NULL if one if not found. + * to the start of the CIPSO option on success, NULL if one is not found. * */ unsigned char *cipso_v4_optptr(const struct sk_buff *skb) @@ -1592,10 +1592,8 @@ unsigned char *cipso_v4_optptr(const struct sk_buff *skb) int optlen; int taglen; - for (optlen = iph->ihl*4 - sizeof(struct iphdr); optlen > 0; ) { + for (optlen = iph->ihl*4 - sizeof(struct iphdr); optlen > 1; ) { switch (optptr[0]) { - case IPOPT_CIPSO: - return optptr; case IPOPT_END: return NULL; case IPOPT_NOOP: @@ -1604,6 +1602,11 @@ unsigned char *cipso_v4_optptr(const struct sk_buff *skb) default: taglen = optptr[1]; } + if (!taglen || taglen > optlen) + return NULL; + if (optptr[0] == IPOPT_CIPSO) + return optptr; + optlen -= taglen; optptr += taglen; } From 8954771abdea5c34280870e35592c7226a816d95 Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Tue, 25 Sep 2018 13:53:02 -0700 Subject: [PATCH 413/812] libnvdimm: Hold reference on parent while scheduling async init commit b6eae0f61db27748606cc00dafcfd1e2c032f0a5 upstream. Unlike asynchronous initialization in the core we have not yet associated the device with the parent, and as such the device doesn't hold a reference to the parent. In order to resolve that we should be holding a reference on the parent until the asynchronous initialization has completed. Cc: Fixes: 4d88a97aa9e8 ("libnvdimm: ...base ... infrastructure") Signed-off-by: Alexander Duyck Signed-off-by: Dan Williams Signed-off-by: Greg Kroah-Hartman --- drivers/nvdimm/bus.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/nvdimm/bus.c b/drivers/nvdimm/bus.c index a71187c783b7..273c7ecf4879 100644 --- a/drivers/nvdimm/bus.c +++ b/drivers/nvdimm/bus.c @@ -158,6 +158,8 @@ static void nd_async_device_register(void *d, async_cookie_t cookie) put_device(dev); } put_device(dev); + if (dev->parent) + put_device(dev->parent); } static void nd_async_device_unregister(void *d, async_cookie_t cookie) @@ -175,6 +177,8 @@ static void nd_async_device_unregister(void *d, async_cookie_t cookie) void __nd_device_register(struct device *dev) { dev->bus = &nvdimm_bus_type; + if (dev->parent) + get_device(dev->parent); get_device(dev); async_schedule_domain(nd_async_device_register, dev, &nd_async_domain); From f4af4c7329886024c122cdd88559db4d5b93dc81 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Fri, 5 Oct 2018 18:44:40 -0400 Subject: [PATCH 414/812] jbd2: fix use after free in jbd2_log_do_checkpoint() commit ccd3c4373eacb044eb3832966299d13d2631f66f upstream. The code cleaning transaction's lists of checkpoint buffers has a bug where it increases bh refcount only after releasing journal->j_list_lock. Thus the following race is possible: CPU0 CPU1 jbd2_log_do_checkpoint() jbd2_journal_try_to_free_buffers() __journal_try_to_free_buffer(bh) ... while (transaction->t_checkpoint_io_list) ... if (buffer_locked(bh)) { <-- IO completes now, buffer gets unlocked --> spin_unlock(&journal->j_list_lock); spin_lock(&journal->j_list_lock); __jbd2_journal_remove_checkpoint(jh); spin_unlock(&journal->j_list_lock); try_to_free_buffers(page); get_bh(bh) <-- accesses freed bh Fix the problem by grabbing bh reference before unlocking journal->j_list_lock. Fixes: dc6e8d669cf5 ("jbd2: don't call get_bh() before calling __jbd2_journal_remove_checkpoint()") Fixes: be1158cc615f ("jbd2: fold __process_buffer() into jbd2_log_do_checkpoint()") Reported-by: syzbot+7f4a27091759e2fe7453@syzkaller.appspotmail.com CC: stable@vger.kernel.org Reviewed-by: Lukas Czerner Signed-off-by: Jan Kara Signed-off-by: Theodore Ts'o Signed-off-by: Greg Kroah-Hartman --- fs/jbd2/checkpoint.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/jbd2/checkpoint.c b/fs/jbd2/checkpoint.c index 684996c8a3a4..4d5a5a4cc017 100644 --- a/fs/jbd2/checkpoint.c +++ b/fs/jbd2/checkpoint.c @@ -254,8 +254,8 @@ int jbd2_log_do_checkpoint(journal_t *journal) bh = jh2bh(jh); if (buffer_locked(bh)) { - spin_unlock(&journal->j_list_lock); get_bh(bh); + spin_unlock(&journal->j_list_lock); wait_on_buffer(bh); /* the journal_head may have gone by now */ BUFFER_TRACE(bh, "brelse"); @@ -336,8 +336,8 @@ int jbd2_log_do_checkpoint(journal_t *journal) jh = transaction->t_checkpoint_io_list; bh = jh2bh(jh); if (buffer_locked(bh)) { - spin_unlock(&journal->j_list_lock); get_bh(bh); + spin_unlock(&journal->j_list_lock); wait_on_buffer(bh); /* the journal_head may have gone by now */ BUFFER_TRACE(bh, "brelse"); From 82a76725f960854d275991bb0c1f8adb44b3c1eb Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 13 Oct 2018 00:19:13 -0400 Subject: [PATCH 415/812] gfs2_meta: ->mount() can get NULL dev_name commit 3df629d873f8683af6f0d34dfc743f637966d483 upstream. get in sync with mount_bdev() handling of the same Reported-by: syzbot+c54f8e94e6bba03b04e9@syzkaller.appspotmail.com Cc: stable@vger.kernel.org Signed-off-by: Al Viro Signed-off-by: Greg Kroah-Hartman --- fs/gfs2/ops_fstype.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c index baab99b69d8a..d9178388cf48 100644 --- a/fs/gfs2/ops_fstype.c +++ b/fs/gfs2/ops_fstype.c @@ -1353,6 +1353,9 @@ static struct dentry *gfs2_mount_meta(struct file_system_type *fs_type, struct path path; int error; + if (!dev_name || !*dev_name) + return ERR_PTR(-EINVAL); + error = kern_path(dev_name, LOOKUP_FOLLOW, &path); if (error) { pr_warn("path_lookup on %s returned error %d\n", From d396e5395271f59bbe9069ebcc14a0fa5d9c85e7 Mon Sep 17 00:00:00 2001 From: Lukas Czerner Date: Tue, 2 Oct 2018 21:18:45 -0400 Subject: [PATCH 416/812] ext4: initialize retries variable in ext4_da_write_inline_data_begin() commit 625ef8a3acd111d5f496d190baf99d1a815bd03e upstream. Variable retries is not initialized in ext4_da_write_inline_data_begin() which can lead to nondeterministic number of retries in case we hit ENOSPC. Initialize retries to zero as we do everywhere else. Signed-off-by: Lukas Czerner Signed-off-by: Theodore Ts'o Fixes: bc0ca9df3b2a ("ext4: retry allocation when inline->extent conversion failed") Cc: stable@kernel.org Signed-off-by: Greg Kroah-Hartman --- fs/ext4/inline.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/ext4/inline.c b/fs/ext4/inline.c index 1e7a9774119c..34c365f0e69e 100644 --- a/fs/ext4/inline.c +++ b/fs/ext4/inline.c @@ -859,7 +859,7 @@ int ext4_da_write_inline_data_begin(struct address_space *mapping, handle_t *handle; struct page *page; struct ext4_iloc iloc; - int retries; + int retries = 0; ret = ext4_get_inode_loc(inode, &iloc); if (ret) From 66f3e856de5d51fcbcd7cf1ae58c08b7b564ea7b Mon Sep 17 00:00:00 2001 From: Breno Leitao Date: Fri, 19 Oct 2018 17:01:33 -0300 Subject: [PATCH 417/812] HID: hiddev: fix potential Spectre v1 commit f11274396a538b31bc010f782e05c2ce3f804c13 upstream. uref->usage_index can be indirectly controlled by userspace, hence leading to a potential exploitation of the Spectre variant 1 vulnerability. This field is used as an array index by the hiddev_ioctl_usage() function, when 'cmd' is either HIDIOCGCOLLECTIONINDEX, HIDIOCGUSAGES or HIDIOCSUSAGES. For cmd == HIDIOCGCOLLECTIONINDEX case, uref->usage_index is compared to field->maxusage and then used as an index to dereference field->usage array. The same thing happens to the cmd == HIDIOC{G,S}USAGES cases, where uref->usage_index is checked against an array maximum value and then it is used as an index in an array. This is a summary of the HIDIOCGCOLLECTIONINDEX case, which matches the traditional Spectre V1 first load: copy_from_user(uref, user_arg, sizeof(*uref)) if (uref->usage_index >= field->maxusage) goto inval; i = field->usage[uref->usage_index].collection_index; return i; This patch fixes this by sanitizing field uref->usage_index before using it to index field->usage (HIDIOCGCOLLECTIONINDEX) or field->value in HIDIOC{G,S}USAGES arrays, thus, avoiding speculation in the first load. Cc: Signed-off-by: Breno Leitao v2: Contemplate cmd == HIDIOC{G,S}USAGES case Signed-off-by: Jiri Kosina Signed-off-by: Greg Kroah-Hartman --- drivers/hid/usbhid/hiddev.c | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/drivers/hid/usbhid/hiddev.c b/drivers/hid/usbhid/hiddev.c index b59b15d4caa9..308d8432fea3 100644 --- a/drivers/hid/usbhid/hiddev.c +++ b/drivers/hid/usbhid/hiddev.c @@ -521,14 +521,24 @@ static noinline int hiddev_ioctl_usage(struct hiddev *hiddev, unsigned int cmd, if (cmd == HIDIOCGCOLLECTIONINDEX) { if (uref->usage_index >= field->maxusage) goto inval; + uref->usage_index = + array_index_nospec(uref->usage_index, + field->maxusage); } else if (uref->usage_index >= field->report_count) goto inval; } - if ((cmd == HIDIOCGUSAGES || cmd == HIDIOCSUSAGES) && - (uref_multi->num_values > HID_MAX_MULTI_USAGES || - uref->usage_index + uref_multi->num_values > field->report_count)) - goto inval; + if (cmd == HIDIOCGUSAGES || cmd == HIDIOCSUSAGES) { + if (uref_multi->num_values > HID_MAX_MULTI_USAGES || + uref->usage_index + uref_multi->num_values > + field->report_count) + goto inval; + + uref->usage_index = + array_index_nospec(uref->usage_index, + field->report_count - + uref_multi->num_values); + } switch (cmd) { case HIDIOCGUSAGE: From d681d83d8399e468f22a72ef9c0c54b1f25f6d64 Mon Sep 17 00:00:00 2001 From: Bin Meng Date: Wed, 26 Sep 2018 08:14:01 -0700 Subject: [PATCH 418/812] PCI: Add Device IDs for Intel GPU "spurious interrupt" quirk commit d0c9606b31a21028fb5b753c8ad79626292accfd upstream. Add Device IDs to the Intel GPU "spurious interrupt" quirk table. For these devices, unplugging the VGA cable and plugging it in again causes spurious interrupts from the IGD. Linux eventually disables the interrupt, but of course that disables any other devices sharing the interrupt. The theory is that this is a VGA BIOS defect: it should have disabled the IGD interrupt but failed to do so. See f67fd55fa96f ("PCI: Add quirk for still enabled interrupts on Intel Sandy Bridge GPUs") and 7c82126a94e6 ("PCI: Add new ID for Intel GPU "spurious interrupt" quirk") for some history. [bhelgaas: See link below for discussion about how to fix this more generically instead of adding device IDs for every new Intel GPU. I hope this is the last patch to add device IDs.] Link: https://lore.kernel.org/linux-pci/1537974841-29928-1-git-send-email-bmeng.cn@gmail.com Signed-off-by: Bin Meng [bhelgaas: changelog] Signed-off-by: Bjorn Helgaas Cc: stable@vger.kernel.org # v3.4+ Signed-off-by: Greg Kroah-Hartman --- drivers/pci/quirks.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c index 5697b32819cb..84d501f5ff4e 100644 --- a/drivers/pci/quirks.c +++ b/drivers/pci/quirks.c @@ -3061,7 +3061,11 @@ static void disable_igfx_irq(struct pci_dev *dev) pci_iounmap(dev, regs); } +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x0042, disable_igfx_irq); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x0046, disable_igfx_irq); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x004a, disable_igfx_irq); DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x0102, disable_igfx_irq); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x0106, disable_igfx_irq); DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x010a, disable_igfx_irq); DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x0152, disable_igfx_irq); From 5f894b3ee37a567fc2012fcc1caa8df399801b21 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Thu, 13 Sep 2018 11:28:01 +0200 Subject: [PATCH 419/812] signal/GenWQE: Fix sending of SIGKILL commit 0ab93e9c99f8208c0a1a7b7170c827936268c996 upstream. The genweq_add_file and genwqe_del_file by caching current without using reference counting embed the assumption that a file descriptor will never be passed from one process to another. It even embeds the assumption that the the thread that opened the file will be in existence when the process terminates. Neither of which are guaranteed to be true. Therefore replace caching the task_struct of the opener with pid of the openers thread group id. All the knowledge of the opener is used for is as the target of SIGKILL and a SIGKILL will kill the entire process group. Rename genwqe_force_sig to genwqe_terminate, remove it's unncessary signal argument, update it's ownly caller, and use kill_pid instead of force_sig. The work force_sig does in changing signal handling state is not relevant to SIGKILL sent as SEND_SIG_PRIV. The exact same processess will be killed just with less work, and less confusion. The work done by force_sig is really only needed for handling syncrhonous exceptions. It will still be possible to cause genwqe_device_remove to wait 8 seconds by passing a file descriptor to another process but the possible user after free is fixed. Fixes: eaf4722d4645 ("GenWQE Character device and DDCB queue") Cc: stable@vger.kernel.org Cc: Greg Kroah-Hartman Cc: Frank Haverkamp Cc: Joerg-Stephan Vogt Cc: Michael Jung Cc: Michael Ruettger Cc: Kleber Sacilotto de Souza Cc: Sebastian Ott Cc: Eberhard S. Amann Cc: Gabriel Krisman Bertazi Cc: Guilherme G. Piccoli Signed-off-by: "Eric W. Biederman" Signed-off-by: Greg Kroah-Hartman --- drivers/misc/genwqe/card_base.h | 2 +- drivers/misc/genwqe/card_dev.c | 9 +++++---- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/drivers/misc/genwqe/card_base.h b/drivers/misc/genwqe/card_base.h index cb851c14ca4b..159f35b2bd11 100644 --- a/drivers/misc/genwqe/card_base.h +++ b/drivers/misc/genwqe/card_base.h @@ -404,7 +404,7 @@ struct genwqe_file { struct file *filp; struct fasync_struct *async_queue; - struct task_struct *owner; + struct pid *opener; struct list_head list; /* entry in list of open files */ spinlock_t map_lock; /* lock for dma_mappings */ diff --git a/drivers/misc/genwqe/card_dev.c b/drivers/misc/genwqe/card_dev.c index 7f1b282d7d96..c0012ca4229e 100644 --- a/drivers/misc/genwqe/card_dev.c +++ b/drivers/misc/genwqe/card_dev.c @@ -52,7 +52,7 @@ static void genwqe_add_file(struct genwqe_dev *cd, struct genwqe_file *cfile) { unsigned long flags; - cfile->owner = current; + cfile->opener = get_pid(task_tgid(current)); spin_lock_irqsave(&cd->file_lock, flags); list_add(&cfile->list, &cd->file_list); spin_unlock_irqrestore(&cd->file_lock, flags); @@ -65,6 +65,7 @@ static int genwqe_del_file(struct genwqe_dev *cd, struct genwqe_file *cfile) spin_lock_irqsave(&cd->file_lock, flags); list_del(&cfile->list); spin_unlock_irqrestore(&cd->file_lock, flags); + put_pid(cfile->opener); return 0; } @@ -275,7 +276,7 @@ static int genwqe_kill_fasync(struct genwqe_dev *cd, int sig) return files; } -static int genwqe_force_sig(struct genwqe_dev *cd, int sig) +static int genwqe_terminate(struct genwqe_dev *cd) { unsigned int files = 0; unsigned long flags; @@ -283,7 +284,7 @@ static int genwqe_force_sig(struct genwqe_dev *cd, int sig) spin_lock_irqsave(&cd->file_lock, flags); list_for_each_entry(cfile, &cd->file_list, list) { - force_sig(sig, cfile->owner); + kill_pid(cfile->opener, SIGKILL, 1); files++; } spin_unlock_irqrestore(&cd->file_lock, flags); @@ -1356,7 +1357,7 @@ static int genwqe_inform_and_stop_processes(struct genwqe_dev *cd) dev_warn(&pci_dev->dev, "[%s] send SIGKILL and wait ...\n", __func__); - rc = genwqe_force_sig(cd, SIGKILL); /* force terminate */ + rc = genwqe_terminate(cd); if (rc) { /* Give kill_timout more seconds to end processes */ for (i = 0; (i < genwqe_kill_timeout) && From 00d3634cab94845cd8a9c91819276ea28748a37d Mon Sep 17 00:00:00 2001 From: Ondrej Mosnacek Date: Thu, 13 Sep 2018 10:51:31 +0200 Subject: [PATCH 420/812] crypto: lrw - Fix out-of bounds access on counter overflow commit fbe1a850b3b1522e9fc22319ccbbcd2ab05328d2 upstream. When the LRW block counter overflows, the current implementation returns 128 as the index to the precomputed multiplication table, which has 128 entries. This patch fixes it to return the correct value (127). Fixes: 64470f1b8510 ("[CRYPTO] lrw: Liskov Rivest Wagner, a tweakable narrow block cipher mode") Cc: # 2.6.20+ Reported-by: Eric Biggers Signed-off-by: Ondrej Mosnacek Signed-off-by: Herbert Xu Signed-off-by: Greg Kroah-Hartman --- crypto/lrw.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/crypto/lrw.c b/crypto/lrw.c index 6f9908a7ebcb..d38a382b09eb 100644 --- a/crypto/lrw.c +++ b/crypto/lrw.c @@ -132,7 +132,12 @@ static inline int get_index128(be128 *block) return x + ffz(val); } - return x; + /* + * If we get here, then x == 128 and we are incrementing the counter + * from all ones to all zeros. This means we must return index 127, i.e. + * the one corresponding to key2*{ 1,...,1 }. + */ + return 127; } static int crypt(struct blkcipher_desc *d, From 34aa96127aaff4a53df9c82b0a0fd7059a798370 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Fri, 7 Sep 2018 14:33:24 -0700 Subject: [PATCH 421/812] ima: fix showing large 'violations' or 'runtime_measurements_count' commit 1e4c8dafbb6bf72fb5eca035b861e39c5896c2b7 upstream. The 12 character temporary buffer is not necessarily long enough to hold a 'long' value. Increase it. Signed-off-by: Eric Biggers Cc: stable@vger.kernel.org Signed-off-by: Mimi Zohar Signed-off-by: Greg Kroah-Hartman --- security/integrity/ima/ima_fs.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/security/integrity/ima/ima_fs.c b/security/integrity/ima/ima_fs.c index 816d175da79a..30aced99bc55 100644 --- a/security/integrity/ima/ima_fs.c +++ b/security/integrity/ima/ima_fs.c @@ -26,14 +26,14 @@ #include "ima.h" static int valid_policy = 1; -#define TMPBUFLEN 12 + static ssize_t ima_show_htable_value(char __user *buf, size_t count, loff_t *ppos, atomic_long_t *val) { - char tmpbuf[TMPBUFLEN]; + char tmpbuf[32]; /* greater than largest 'long' string value */ ssize_t len; - len = scnprintf(tmpbuf, TMPBUFLEN, "%li\n", atomic_long_read(val)); + len = scnprintf(tmpbuf, sizeof(tmpbuf), "%li\n", atomic_long_read(val)); return simple_read_from_buffer(buf, count, ppos, tmpbuf, len); } From 4fdd46c946f98b736929a7c68984a57807725141 Mon Sep 17 00:00:00 2001 From: Mike Kravetz Date: Fri, 26 Oct 2018 15:10:58 -0700 Subject: [PATCH 422/812] hugetlbfs: dirty pages as they are added to pagecache commit 22146c3ce98962436e401f7b7016a6f664c9ffb5 upstream. Some test systems were experiencing negative huge page reserve counts and incorrect file block counts. This was traced to /proc/sys/vm/drop_caches removing clean pages from hugetlbfs file pagecaches. When non-hugetlbfs explicit code removes the pages, the appropriate accounting is not performed. This can be recreated as follows: fallocate -l 2M /dev/hugepages/foo echo 1 > /proc/sys/vm/drop_caches fallocate -l 2M /dev/hugepages/foo grep -i huge /proc/meminfo AnonHugePages: 0 kB ShmemHugePages: 0 kB HugePages_Total: 2048 HugePages_Free: 2047 HugePages_Rsvd: 18446744073709551615 HugePages_Surp: 0 Hugepagesize: 2048 kB Hugetlb: 4194304 kB ls -lsh /dev/hugepages/foo 4.0M -rw-r--r--. 1 root root 2.0M Oct 17 20:05 /dev/hugepages/foo To address this issue, dirty pages as they are added to pagecache. This can easily be reproduced with fallocate as shown above. Read faulted pages will eventually end up being marked dirty. But there is a window where they are clean and could be impacted by code such as drop_caches. So, just dirty them all as they are added to the pagecache. Link: http://lkml.kernel.org/r/b5be45b8-5afe-56cd-9482-28384699a049@oracle.com Fixes: 6bda666a03f0 ("hugepages: fold find_or_alloc_pages into huge_no_page()") Signed-off-by: Mike Kravetz Acked-by: Mihcla Hocko Reviewed-by: Khalid Aziz Cc: Hugh Dickins Cc: Naoya Horiguchi Cc: "Aneesh Kumar K . V" Cc: Andrea Arcangeli Cc: "Kirill A . Shutemov" Cc: Davidlohr Bueso Cc: Alexander Viro Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- mm/hugetlb.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/mm/hugetlb.c b/mm/hugetlb.c index a813b03021b7..1033ff44ac62 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -3537,6 +3537,12 @@ int huge_add_to_page_cache(struct page *page, struct address_space *mapping, return err; ClearPagePrivate(page); + /* + * set page dirty so that it will not be removed from cache/file + * by non-hugetlbfs specific code paths. + */ + set_page_dirty(page); + spin_lock(&inode->i_lock); inode->i_blocks += blocks_per_huge_page(h); spin_unlock(&inode->i_lock); From 52458d09b5d0e5bacc52d6e7e7fbcf4907903891 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 30 Oct 2018 15:07:32 -0700 Subject: [PATCH 423/812] kbuild: fix kernel/bounds.c 'W=1' warning commit 6a32c2469c3fbfee8f25bcd20af647326650a6cf upstream. Building any configuration with 'make W=1' produces a warning: kernel/bounds.c:16:6: warning: no previous prototype for 'foo' [-Wmissing-prototypes] When also passing -Werror, this prevents us from building any other files. Nobody ever calls the function, but we can't make it 'static' either since we want the compiler output. Calling it 'main' instead however avoids the warning, because gcc does not insist on having a declaration for main. Link: http://lkml.kernel.org/r/20181005083313.2088252-1-arnd@arndb.de Signed-off-by: Arnd Bergmann Reported-by: Kieran Bingham Reviewed-by: Kieran Bingham Cc: David Laight Cc: Masahiro Yamada Cc: Greg Kroah-Hartman Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- kernel/bounds.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/kernel/bounds.c b/kernel/bounds.c index e1d1d1952bfa..c37f68d758db 100644 --- a/kernel/bounds.c +++ b/kernel/bounds.c @@ -12,7 +12,7 @@ #include #include -void foo(void) +int main(void) { /* The enum constants to put into include/generated/bounds.h */ DEFINE(NR_PAGEFLAGS, __NR_PAGEFLAGS); @@ -22,4 +22,6 @@ void foo(void) #endif DEFINE(SPINLOCK_SIZE, sizeof(spinlock_t)); /* End of constants */ + + return 0; } From 6b2aa089c950b244b67ed94709b93af3d4d34944 Mon Sep 17 00:00:00 2001 From: Eugen Hristev Date: Mon, 24 Sep 2018 10:51:43 +0300 Subject: [PATCH 424/812] iio: adc: at91: fix acking DRDY irq on simple conversions commit bc1b45326223e7e890053cf6266357adfa61942d upstream. When doing simple conversions, the driver did not acknowledge the DRDY irq. If this irq status is not acked, it will be left pending, and as soon as a trigger is enabled, the irq handler will be called, it doesn't know why this status has occurred because no channel is pending, and then it will go int a irq loop and board will hang. To avoid this situation, read the LCDR after a raw conversion is done. Fixes: 0e589d5fb ("ARM: AT91: IIO: Add AT91 ADC driver.") Cc: Maxime Ripard Signed-off-by: Eugen Hristev Acked-by: Ludovic Desroches Cc: Signed-off-by: Jonathan Cameron Signed-off-by: Greg Kroah-Hartman --- drivers/iio/adc/at91_adc.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/iio/adc/at91_adc.c b/drivers/iio/adc/at91_adc.c index 93986f0590ef..3f65c7661907 100644 --- a/drivers/iio/adc/at91_adc.c +++ b/drivers/iio/adc/at91_adc.c @@ -276,6 +276,8 @@ static void handle_adc_eoc_trigger(int irq, struct iio_dev *idev) iio_trigger_poll(idev->trig); } else { st->last_value = at91_adc_readl(st, AT91_ADC_CHAN(st, st->chnb)); + /* Needed to ACK the DRDY interruption */ + at91_adc_readl(st, AT91_ADC_LCDR); st->done = true; wake_up_interruptible(&st->wq_data_avail); } From cc8e1ff726ab86a7da2256ca963c5547972fad10 Mon Sep 17 00:00:00 2001 From: Eugen Hristev Date: Mon, 24 Sep 2018 10:51:44 +0300 Subject: [PATCH 425/812] iio: adc: at91: fix wrong channel number in triggered buffer mode commit aea835f2dc8a682942b859179c49ad1841a6c8b9 upstream. When channels are registered, the hardware channel number is not the actual iio channel number. This is because the driver is probed with a certain number of accessible channels. Some pins are routed and some not, depending on the description of the board in the DT. Because of that, channels 0,1,2,3 can correspond to hardware channels 2,3,4,5 for example. In the buffered triggered case, we need to do the translation accordingly. Fixed the channel number to stop reading the wrong channel. Fixes: 0e589d5fb ("ARM: AT91: IIO: Add AT91 ADC driver.") Cc: Maxime Ripard Signed-off-by: Eugen Hristev Acked-by: Ludovic Desroches Cc: Signed-off-by: Jonathan Cameron Signed-off-by: Greg Kroah-Hartman --- drivers/iio/adc/at91_adc.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/iio/adc/at91_adc.c b/drivers/iio/adc/at91_adc.c index 3f65c7661907..d83e5b75a37b 100644 --- a/drivers/iio/adc/at91_adc.c +++ b/drivers/iio/adc/at91_adc.c @@ -245,12 +245,14 @@ static irqreturn_t at91_adc_trigger_handler(int irq, void *p) struct iio_poll_func *pf = p; struct iio_dev *idev = pf->indio_dev; struct at91_adc_state *st = iio_priv(idev); + struct iio_chan_spec const *chan; int i, j = 0; for (i = 0; i < idev->masklength; i++) { if (!test_bit(i, idev->active_scan_mask)) continue; - st->buffer[j] = at91_adc_readl(st, AT91_ADC_CHAN(st, i)); + chan = idev->channels + i; + st->buffer[j] = at91_adc_readl(st, AT91_ADC_CHAN(st, chan->channel)); j++; } From b0ef4712fea978a446f82ea49c7fbedc0ad9871a Mon Sep 17 00:00:00 2001 From: Andreas Kemnade Date: Sat, 22 Sep 2018 21:20:54 +0200 Subject: [PATCH 426/812] w1: omap-hdq: fix missing bus unregister at removal commit a007734618fee1bf35556c04fa498d41d42c7301 upstream. The bus master was not removed after unloading the module or unbinding the driver. That lead to oopses like this [ 127.842987] Unable to handle kernel paging request at virtual address bf01d04c [ 127.850646] pgd = 70e3cd9a [ 127.853698] [bf01d04c] *pgd=8f908811, *pte=00000000, *ppte=00000000 [ 127.860412] Internal error: Oops: 80000007 [#1] PREEMPT SMP ARM [ 127.866668] Modules linked in: bq27xxx_battery overlay [last unloaded: omap_hdq] [ 127.874542] CPU: 0 PID: 1022 Comm: w1_bus_master1 Not tainted 4.19.0-rc4-00001-g2d51da718324 #12 [ 127.883819] Hardware name: Generic OMAP36xx (Flattened Device Tree) [ 127.890441] PC is at 0xbf01d04c [ 127.893798] LR is at w1_search_process_cb+0x4c/0xfc [ 127.898956] pc : [] lr : [] psr: a0070013 [ 127.905609] sp : cf885f48 ip : bf01d04c fp : ddf1e11c [ 127.911132] r10: cf8fe040 r9 : c05f8d00 r8 : cf8fe040 [ 127.916656] r7 : 000000f0 r6 : cf8fe02c r5 : cf8fe000 r4 : cf8fe01c [ 127.923553] r3 : c05f8d00 r2 : 000000f0 r1 : cf8fe000 r0 : dde1ef10 [ 127.930450] Flags: NzCv IRQs on FIQs on Mode SVC_32 ISA ARM Segment none [ 127.938018] Control: 10c5387d Table: 8f8f0019 DAC: 00000051 [ 127.944091] Process w1_bus_master1 (pid: 1022, stack limit = 0x9135699f) [ 127.951171] Stack: (0xcf885f48 to 0xcf886000) [ 127.955810] 5f40: cf8fe000 00000000 cf884000 cf8fe090 000003e8 c05f8d00 [ 127.964477] 5f60: dde5fc34 c05f9700 ddf1e100 ddf1e540 cf884000 cf8fe000 c05f9694 00000000 [ 127.973114] 5f80: dde5fc34 c01499a4 00000000 ddf1e540 c0149874 00000000 00000000 00000000 [ 127.981781] 5fa0: 00000000 00000000 00000000 c01010e8 00000000 00000000 00000000 00000000 [ 127.990447] 5fc0: 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 [ 127.999114] 5fe0: 00000000 00000000 00000000 00000000 00000013 00000000 00000000 00000000 [ 128.007781] [] (w1_search_process_cb) from [] (w1_process+0x6c/0x118) [ 128.016479] [] (w1_process) from [] (kthread+0x130/0x148) [ 128.024047] [] (kthread) from [] (ret_from_fork+0x14/0x2c) [ 128.031677] Exception stack(0xcf885fb0 to 0xcf885ff8) [ 128.037017] 5fa0: 00000000 00000000 00000000 00000000 [ 128.045684] 5fc0: 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 [ 128.054351] 5fe0: 00000000 00000000 00000000 00000000 00000013 00000000 [ 128.061340] Code: bad PC value [ 128.064697] ---[ end trace af066e33c0e14119 ]--- Cc: Signed-off-by: Andreas Kemnade Signed-off-by: Greg Kroah-Hartman --- drivers/w1/masters/omap_hdq.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/w1/masters/omap_hdq.c b/drivers/w1/masters/omap_hdq.c index 0c427d6a12d1..4c5c6550809d 100644 --- a/drivers/w1/masters/omap_hdq.c +++ b/drivers/w1/masters/omap_hdq.c @@ -785,6 +785,8 @@ static int omap_hdq_remove(struct platform_device *pdev) /* remove module dependency */ pm_runtime_disable(&pdev->dev); + w1_remove_master_device(&omap_w1_master); + return 0; } From 39d6c4cdcf67e3b7f91822d138b7d7f870c295be Mon Sep 17 00:00:00 2001 From: Steve French Date: Sat, 15 Sep 2018 23:04:41 -0500 Subject: [PATCH 427/812] smb3: allow stats which track session and share reconnects to be reset commit 2c887635cd6ab3af619dc2be94e5bf8f2e172b78 upstream. Currently, "echo 0 > /proc/fs/cifs/Stats" resets all of the stats except the session and share reconnect counts. Fix it to reset those as well. CC: Stable Signed-off-by: Steve French Reviewed-by: Aurelien Aptel Signed-off-by: Greg Kroah-Hartman --- fs/cifs/cifs_debug.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fs/cifs/cifs_debug.c b/fs/cifs/cifs_debug.c index 0e72a14228f8..7bc6d27d47a4 100644 --- a/fs/cifs/cifs_debug.c +++ b/fs/cifs/cifs_debug.c @@ -285,6 +285,9 @@ static ssize_t cifs_stats_proc_write(struct file *file, atomic_set(&totBufAllocCount, 0); atomic_set(&totSmBufAllocCount, 0); #endif /* CONFIG_CIFS_STATS2 */ + atomic_set(&tcpSesReconnectCount, 0); + atomic_set(&tconInfoReconnectCount, 0); + spin_lock(&GlobalMid_Lock); GlobalMaxActiveXid = 0; GlobalCurrentXid = 0; From ae83508da422b1c6ec5a1409ca15c0f5f33cff31 Mon Sep 17 00:00:00 2001 From: Steve French Date: Fri, 19 Oct 2018 00:45:21 -0500 Subject: [PATCH 428/812] smb3: do not attempt cifs operation in smb3 query info error path commit 1e77a8c204c9d1b655c61751b8ad0fde22421dbb upstream. If backupuid mount option is sent, we can incorrectly retry (on access denied on query info) with a cifs (FindFirst) operation on an smb3 mount which causes the server to force the session close. We set backup intent on open so no need for this fallback. See kernel bugzilla 201435 Signed-off-by: Steve French CC: Stable Reviewed-by: Ronnie Sahlberg Signed-off-by: Greg Kroah-Hartman --- fs/cifs/inode.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c index 36c8594bb147..5c3187df9ab9 100644 --- a/fs/cifs/inode.c +++ b/fs/cifs/inode.c @@ -756,7 +756,15 @@ cifs_get_inode_info(struct inode **inode, const char *full_path, } else if (rc == -EREMOTE) { cifs_create_dfs_fattr(&fattr, sb); rc = 0; - } else if (rc == -EACCES && backup_cred(cifs_sb)) { + } else if ((rc == -EACCES) && backup_cred(cifs_sb) && + (strcmp(server->vals->version_string, SMB1_VERSION_STRING) + == 0)) { + /* + * For SMB2 and later the backup intent flag is already + * sent if needed on open and there is no path based + * FindFirst operation to use to retry with + */ + srchinf = kzalloc(sizeof(struct cifs_search_info), GFP_KERNEL); if (srchinf == NULL) { From aa21d67d7fe74897df53cbd22c8be21f8928e07d Mon Sep 17 00:00:00 2001 From: Steve French Date: Sun, 28 Oct 2018 13:13:23 -0500 Subject: [PATCH 429/812] smb3: on kerberos mount if server doesn't specify auth type use krb5 commit 926674de6705f0f1dbf29a62fd758d0977f535d6 upstream. Some servers (e.g. Azure) do not include a spnego blob in the SMB3 negotiate protocol response, so on kerberos mounts ("sec=krb5") we can fail, as we expected the server to list its supported auth types (OIDs in the spnego blob in the negprot response). Change this so that on krb5 mounts we default to trying krb5 if the server doesn't list its supported protocol mechanisms. Signed-off-by: Steve French Reviewed-by: Ronnie Sahlberg CC: Stable Signed-off-by: Greg Kroah-Hartman --- fs/cifs/cifs_spnego.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/fs/cifs/cifs_spnego.c b/fs/cifs/cifs_spnego.c index 6908080e9b6d..e3f2b7370bd8 100644 --- a/fs/cifs/cifs_spnego.c +++ b/fs/cifs/cifs_spnego.c @@ -143,8 +143,10 @@ cifs_get_spnego_key(struct cifs_ses *sesInfo) sprintf(dp, ";sec=krb5"); else if (server->sec_mskerberos) sprintf(dp, ";sec=mskrb5"); - else - goto out; + else { + cifs_dbg(VFS, "unknown or missing server auth type, use krb5\n"); + sprintf(dp, ";sec=krb5"); + } dp = description + strlen(description); sprintf(dp, ";uid=0x%x", From d24a12ef98b7e9cbff3842df164534a21bb0d5a1 Mon Sep 17 00:00:00 2001 From: He Zhe Date: Sun, 30 Sep 2018 00:45:50 +0800 Subject: [PATCH 430/812] printk: Fix panic caused by passing log_buf_len to command line commit 277fcdb2cfee38ccdbe07e705dbd4896ba0c9930 upstream. log_buf_len_setup does not check input argument before passing it to simple_strtoull. The argument would be a NULL pointer if "log_buf_len", without its value, is set in command line and thus causes the following panic. PANIC: early exception 0xe3 IP 10:ffffffffaaeacd0d error 0 cr2 0x0 [ 0.000000] CPU: 0 PID: 0 Comm: swapper Not tainted 4.19.0-rc4-yocto-standard+ #1 [ 0.000000] RIP: 0010:_parse_integer_fixup_radix+0xd/0x70 ... [ 0.000000] Call Trace: [ 0.000000] simple_strtoull+0x29/0x70 [ 0.000000] memparse+0x26/0x90 [ 0.000000] log_buf_len_setup+0x17/0x22 [ 0.000000] do_early_param+0x57/0x8e [ 0.000000] parse_args+0x208/0x320 [ 0.000000] ? rdinit_setup+0x30/0x30 [ 0.000000] parse_early_options+0x29/0x2d [ 0.000000] ? rdinit_setup+0x30/0x30 [ 0.000000] parse_early_param+0x36/0x4d [ 0.000000] setup_arch+0x336/0x99e [ 0.000000] start_kernel+0x6f/0x4ee [ 0.000000] x86_64_start_reservations+0x24/0x26 [ 0.000000] x86_64_start_kernel+0x6f/0x72 [ 0.000000] secondary_startup_64+0xa4/0xb0 This patch adds a check to prevent the panic. Link: http://lkml.kernel.org/r/1538239553-81805-1-git-send-email-zhe.he@windriver.com Cc: stable@vger.kernel.org Cc: rostedt@goodmis.org Cc: linux-kernel@vger.kernel.org Signed-off-by: He Zhe Reviewed-by: Sergey Senozhatsky Signed-off-by: Petr Mladek Signed-off-by: Greg Kroah-Hartman --- kernel/printk/printk.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index 0b5613554769..dd689ab22806 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -881,7 +881,12 @@ static void __init log_buf_len_update(unsigned size) /* save requested log_buf_len since it's too early to process it */ static int __init log_buf_len_setup(char *str) { - unsigned size = memparse(str, &str); + unsigned int size; + + if (!str) + return -EINVAL; + + size = memparse(str, &str); log_buf_len_update(size); From 59ba12331e8fb92cd18c062db937d9d7e601c940 Mon Sep 17 00:00:00 2001 From: Lukas Wunner Date: Thu, 18 Oct 2018 15:15:05 +0200 Subject: [PATCH 431/812] genirq: Fix race on spurious interrupt detection commit 746a923b863a1065ef77324e1e43f19b1a3eab5c upstream. Commit 1e77d0a1ed74 ("genirq: Sanitize spurious interrupt detection of threaded irqs") made detection of spurious interrupts work for threaded handlers by: a) incrementing a counter every time the thread returns IRQ_HANDLED, and b) checking whether that counter has increased every time the thread is woken. However for oneshot interrupts, the commit unmasks the interrupt before incrementing the counter. If another interrupt occurs right after unmasking but before the counter is incremented, that interrupt is incorrectly considered spurious: time | irq_thread() | irq_thread_fn() | action->thread_fn() | irq_finalize_oneshot() | unmask_threaded_irq() /* interrupt is unmasked */ | | /* interrupt fires, incorrectly deemed spurious */ | | atomic_inc(&desc->threads_handled); /* counter is incremented */ v This is observed with a hi3110 CAN controller receiving data at high volume (from a separate machine sending with "cangen -g 0 -i -x"): The controller signals a huge number of interrupts (hundreds of millions per day) and every second there are about a dozen which are deemed spurious. In theory with high CPU load and the presence of higher priority tasks, the number of incorrectly detected spurious interrupts might increase beyond the 99,900 threshold and cause disablement of the interrupt. In practice it just increments the spurious interrupt count. But that can cause people to waste time investigating it over and over. Fix it by moving the accounting before the invocation of irq_finalize_oneshot(). [ tglx: Folded change log update ] Fixes: 1e77d0a1ed74 ("genirq: Sanitize spurious interrupt detection of threaded irqs") Signed-off-by: Lukas Wunner Signed-off-by: Thomas Gleixner Cc: Mathias Duckeck Cc: Akshay Bhat Cc: Casey Fitzpatrick Cc: stable@vger.kernel.org # v3.16+ Link: https://lkml.kernel.org/r/1dfd8bbd16163940648045495e3e9698e63b50ad.1539867047.git.lukas@wunner.de Signed-off-by: Greg Kroah-Hartman --- kernel/irq/manage.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c index 0df2b44dac7c..83cea913983c 100644 --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c @@ -864,6 +864,9 @@ irq_forced_thread_fn(struct irq_desc *desc, struct irqaction *action) local_bh_disable(); ret = action->thread_fn(action->irq, action->dev_id); + if (ret == IRQ_HANDLED) + atomic_inc(&desc->threads_handled); + irq_finalize_oneshot(desc, action); local_bh_enable(); return ret; @@ -880,6 +883,9 @@ static irqreturn_t irq_thread_fn(struct irq_desc *desc, irqreturn_t ret; ret = action->thread_fn(action->irq, action->dev_id); + if (ret == IRQ_HANDLED) + atomic_inc(&desc->threads_handled); + irq_finalize_oneshot(desc, action); return ret; } @@ -957,8 +963,6 @@ static int irq_thread(void *data) irq_thread_check_affinity(desc, action); action_ret = handler_fn(desc, action); - if (action_ret == IRQ_HANDLED) - atomic_inc(&desc->threads_handled); if (action_ret == IRQ_WAKE_THREAD) irq_wake_secondary(desc, action); From a88fd5847b939a87f4814b41814c74ec1d2a5309 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 18 Sep 2018 10:07:44 -0400 Subject: [PATCH 432/812] NFSv4.1: Fix the r/wsize checking commit 943cff67b842839f4f35364ba2db5c2d3f025d94 upstream. The intention of nfs4_session_set_rwsize() was to cap the r/wsize to the buffer sizes negotiated by the CREATE_SESSION. The initial code had a bug whereby we would not check the values negotiated by nfs_probe_fsinfo() (the assumption being that CREATE_SESSION will always negotiate buffer values that are sane w.r.t. the server's preferred r/wsizes) but would only check values set by the user in the 'mount' command. The code was changed in 4.11 to _always_ set the r/wsize, meaning that we now never use the server preferred r/wsizes. This is the regression that this patch fixes. Also rename the function to nfs4_session_limit_rwsize() in order to avoid future confusion. Fixes: 033853325fe3 (NFSv4.1 respect server's max size in CREATE_SESSION") Cc: stable@vger.kernel.org # v4.11+ Signed-off-by: Trond Myklebust Signed-off-by: Greg Kroah-Hartman --- fs/nfs/nfs4client.c | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c index 63498e1a542a..ae91d1e450be 100644 --- a/fs/nfs/nfs4client.c +++ b/fs/nfs/nfs4client.c @@ -879,10 +879,10 @@ EXPORT_SYMBOL_GPL(nfs4_set_ds_client); /* * Session has been established, and the client marked ready. - * Set the mount rsize and wsize with negotiated fore channel - * attributes which will be bound checked in nfs_server_set_fsinfo. + * Limit the mount rsize, wsize and dtsize using negotiated fore + * channel attributes. */ -static void nfs4_session_set_rwsize(struct nfs_server *server) +static void nfs4_session_limit_rwsize(struct nfs_server *server) { #ifdef CONFIG_NFS_V4_1 struct nfs4_session *sess; @@ -895,9 +895,11 @@ static void nfs4_session_set_rwsize(struct nfs_server *server) server_resp_sz = sess->fc_attrs.max_resp_sz - nfs41_maxread_overhead; server_rqst_sz = sess->fc_attrs.max_rqst_sz - nfs41_maxwrite_overhead; - if (!server->rsize || server->rsize > server_resp_sz) + if (server->dtsize > server_resp_sz) + server->dtsize = server_resp_sz; + if (server->rsize > server_resp_sz) server->rsize = server_resp_sz; - if (!server->wsize || server->wsize > server_rqst_sz) + if (server->wsize > server_rqst_sz) server->wsize = server_rqst_sz; #endif /* CONFIG_NFS_V4_1 */ } @@ -944,12 +946,12 @@ static int nfs4_server_common_setup(struct nfs_server *server, (unsigned long long) server->fsid.minor); nfs_display_fhandle(mntfh, "Pseudo-fs root FH"); - nfs4_session_set_rwsize(server); - error = nfs_probe_fsinfo(server, mntfh, fattr); if (error < 0) goto out; + nfs4_session_limit_rwsize(server); + if (server->namelen == 0 || server->namelen > NFS4_MAXNAMLEN) server->namelen = NFS4_MAXNAMLEN; From e6914e1af4819455ad654ce83d1b51509b1915d6 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 9 Oct 2018 15:54:15 -0400 Subject: [PATCH 433/812] nfsd: Fix an Oops in free_session() commit bb6ad5572c0022e17e846b382d7413cdcf8055be upstream. In call_xpt_users(), we delete the entry from the list, but we do not reinitialise it. This triggers the list poisoning when we later call unregister_xpt_user() in nfsd4_del_conns(). Signed-off-by: Trond Myklebust Cc: stable@vger.kernel.org Signed-off-by: J. Bruce Fields Signed-off-by: Greg Kroah-Hartman --- net/sunrpc/svc_xprt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c index a6cbb2104667..71f15da72f02 100644 --- a/net/sunrpc/svc_xprt.c +++ b/net/sunrpc/svc_xprt.c @@ -945,7 +945,7 @@ static void call_xpt_users(struct svc_xprt *xprt) spin_lock(&xprt->xpt_lock); while (!list_empty(&xprt->xpt_users)) { u = list_first_entry(&xprt->xpt_users, struct svc_xpt_user, list); - list_del(&u->list); + list_del_init(&u->list); u->callback(u); } spin_unlock(&xprt->xpt_lock); From 637276555f3a9c35dcbe136d1bc108f5f708d500 Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Fri, 28 Sep 2018 20:41:48 +0300 Subject: [PATCH 434/812] lockd: fix access beyond unterminated strings in prints commit 93f38b6fae0ea8987e22d9e6c38f8dfdccd867ee upstream. printk format used %*s instead of %.*s, so hostname_len does not limit the number of bytes accessed from hostname. Signed-off-by: Amir Goldstein Cc: stable@vger.kernel.org Signed-off-by: J. Bruce Fields Signed-off-by: Greg Kroah-Hartman --- fs/lockd/host.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/lockd/host.c b/fs/lockd/host.c index d716c9993a26..c7eb47f2fb6c 100644 --- a/fs/lockd/host.c +++ b/fs/lockd/host.c @@ -340,7 +340,7 @@ struct nlm_host *nlmsvc_lookup_host(const struct svc_rqst *rqstp, }; struct lockd_net *ln = net_generic(net, lockd_net_id); - dprintk("lockd: %s(host='%*s', vers=%u, proto=%s)\n", __func__, + dprintk("lockd: %s(host='%.*s', vers=%u, proto=%s)\n", __func__, (int)hostname_len, hostname, rqstp->rq_vers, (rqstp->rq_prot == IPPROTO_UDP ? "udp" : "tcp")); From 4cc537f4a947e7f11ca84f3f94ad986761558980 Mon Sep 17 00:00:00 2001 From: Wenwen Wang Date: Wed, 3 Oct 2018 11:43:59 -0500 Subject: [PATCH 435/812] dm ioctl: harden copy_params()'s copy_from_user() from malicious users commit 800a7340ab7dd667edf95e74d8e4f23a17e87076 upstream. In copy_params(), the struct 'dm_ioctl' is first copied from the user space buffer 'user' to 'param_kernel' and the field 'data_size' is checked against 'minimum_data_size' (size of 'struct dm_ioctl' payload up to its 'data' member). If the check fails, an error code EINVAL will be returned. Otherwise, param_kernel->data_size is used to do a second copy, which copies from the same user-space buffer to 'dmi'. After the second copy, only 'dmi->data_size' is checked against 'param_kernel->data_size'. Given that the buffer 'user' resides in the user space, a malicious user-space process can race to change the content in the buffer between the two copies. This way, the attacker can inject inconsistent data into 'dmi' (versus previously validated 'param_kernel'). Fix redundant copying of 'minimum_data_size' from user-space buffer by using the first copy stored in 'param_kernel'. Also remove the 'data_size' check after the second copy because it is now unnecessary. Cc: stable@vger.kernel.org Signed-off-by: Wenwen Wang Signed-off-by: Mike Snitzer Signed-off-by: Greg Kroah-Hartman --- drivers/md/dm-ioctl.c | 18 ++++++------------ 1 file changed, 6 insertions(+), 12 deletions(-) diff --git a/drivers/md/dm-ioctl.c b/drivers/md/dm-ioctl.c index 6865b186f749..9371194677dc 100644 --- a/drivers/md/dm-ioctl.c +++ b/drivers/md/dm-ioctl.c @@ -1685,8 +1685,7 @@ static void free_params(struct dm_ioctl *param, size_t param_size, int param_fla } static int copy_params(struct dm_ioctl __user *user, struct dm_ioctl *param_kernel, - int ioctl_flags, - struct dm_ioctl **param, int *param_flags) + int ioctl_flags, struct dm_ioctl **param, int *param_flags) { struct dm_ioctl *dmi; int secure_data; @@ -1734,18 +1733,13 @@ static int copy_params(struct dm_ioctl __user *user, struct dm_ioctl *param_kern return -ENOMEM; } - if (copy_from_user(dmi, user, param_kernel->data_size)) - goto bad; + /* Copy from param_kernel (which was already copied from user) */ + memcpy(dmi, param_kernel, minimum_data_size); + if (copy_from_user(&dmi->data, (char __user *)user + minimum_data_size, + param_kernel->data_size - minimum_data_size)) + goto bad; data_copied: - /* - * Abort if something changed the ioctl data while it was being copied. - */ - if (dmi->data_size != param_kernel->data_size) { - DMERR("rejecting ioctl: data size modified while processing parameters"); - goto bad; - } - /* Wipe the user buffer so we do not return it to userspace */ if (secure_data && clear_user(user, param_kernel->data_size)) goto bad; From 901c46620f2fb8f93e0888b658e424f38e706cba Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Fri, 19 Oct 2018 06:12:50 +0000 Subject: [PATCH 436/812] powerpc/msi: Fix compile error on mpc83xx commit 0f99153def98134403c9149128e59d3e1786cf04 upstream. mpic_get_primary_version() is not defined when not using MPIC. The compile error log like: arch/powerpc/sysdev/built-in.o: In function `fsl_of_msi_probe': fsl_msi.c:(.text+0x150c): undefined reference to `fsl_mpic_primary_get_version' Signed-off-by: Jia Hongtao Signed-off-by: Scott Wood Reported-by: Radu Rendec Fixes: 807d38b73b6 ("powerpc/mpic: Add get_version API both for internal and external use") Cc: stable@vger.kernel.org Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/include/asm/mpic.h | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/arch/powerpc/include/asm/mpic.h b/arch/powerpc/include/asm/mpic.h index 98697611e7b3..705f4dc5073b 100644 --- a/arch/powerpc/include/asm/mpic.h +++ b/arch/powerpc/include/asm/mpic.h @@ -392,7 +392,14 @@ extern struct bus_type mpic_subsys; #define MPIC_REGSET_TSI108 MPIC_REGSET(1) /* Tsi108/109 PIC */ /* Get the version of primary MPIC */ +#ifdef CONFIG_MPIC extern u32 fsl_mpic_primary_get_version(void); +#else +static inline u32 fsl_mpic_primary_get_version(void) +{ + return 0; +} +#endif /* Allocate the controller structure and setup the linux irq descs * for the range if interrupts passed in. No HW initialization is From d3105e63dc72969cf5cc643154146c1a181043c7 Mon Sep 17 00:00:00 2001 From: Aaro Koskinen Date: Sat, 27 Oct 2018 01:46:34 +0300 Subject: [PATCH 437/812] MIPS: OCTEON: fix out of bounds array access on CN68XX commit c0fae7e2452b90c31edd2d25eb3baf0c76b400ca upstream. The maximum number of interfaces is returned by cvmx_helper_get_number_of_interfaces(), and the value is used to access interface_port_count[]. When CN68XX support was added, we forgot to increase the array size. Fix that. Fixes: 2c8c3f0201333 ("MIPS: Octeon: Support additional interfaces on CN68XX") Signed-off-by: Aaro Koskinen Signed-off-by: Paul Burton Patchwork: https://patchwork.linux-mips.org/patch/20949/ Cc: Ralf Baechle Cc: linux-mips@linux-mips.org Cc: linux-kernel@vger.kernel.org Cc: stable@vger.kernel.org # v4.3+ Signed-off-by: Greg Kroah-Hartman --- arch/mips/cavium-octeon/executive/cvmx-helper.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/mips/cavium-octeon/executive/cvmx-helper.c b/arch/mips/cavium-octeon/executive/cvmx-helper.c index 376701f41cc2..692bbc1c5b79 100644 --- a/arch/mips/cavium-octeon/executive/cvmx-helper.c +++ b/arch/mips/cavium-octeon/executive/cvmx-helper.c @@ -67,7 +67,7 @@ void (*cvmx_override_pko_queue_priority) (int pko_port, void (*cvmx_override_ipd_port_setup) (int ipd_port); /* Port count per interface */ -static int interface_port_count[5]; +static int interface_port_count[9]; /* Port last configured link info index by IPD/PKO port */ static cvmx_helper_link_info_t From 8e5e88948e5f02e7441c2dd77b33c6fc26a401d9 Mon Sep 17 00:00:00 2001 From: "Maciej W. Rozycki" Date: Wed, 3 Oct 2018 13:21:07 +0100 Subject: [PATCH 438/812] TC: Set DMA masks for devices commit 3f2aa244ee1a0d17ed5b6c86564d2c1b24d1c96b upstream. Fix a TURBOchannel support regression with commit 205e1b7f51e4 ("dma-mapping: warn when there is no coherent_dma_mask") that caused coherent DMA allocations to produce a warning such as: defxx: v1.11 2014/07/01 Lawrence V. Stefani and others tc1: DEFTA at MMIO addr = 0x1e900000, IRQ = 20, Hardware addr = 08-00-2b-a3-a3-29 ------------[ cut here ]------------ WARNING: CPU: 0 PID: 1 at ./include/linux/dma-mapping.h:516 dfx_dev_register+0x670/0x678 Modules linked in: CPU: 0 PID: 1 Comm: swapper Not tainted 4.19.0-rc6 #2 Stack : ffffffff8009ffc0 fffffffffffffec0 0000000000000000 ffffffff80647650 0000000000000000 0000000000000000 ffffffff806f5f80 ffffffffffffffff 0000000000000000 0000000000000000 0000000000000001 ffffffff8065d4e8 98000000031b6300 ffffffff80563478 ffffffff805685b0 ffffffffffffffff 0000000000000000 ffffffff805d6720 0000000000000204 ffffffff80388df8 0000000000000000 0000000000000009 ffffffff8053efd0 ffffffff806657d0 0000000000000000 ffffffff803177f8 0000000000000000 ffffffff806d0000 9800000003078000 980000000307b9e0 000000001e900000 ffffffff80067940 0000000000000000 ffffffff805d6720 0000000000000204 ffffffff80388df8 ffffffff805176c0 ffffffff8004dc78 0000000000000000 ffffffff80067940 ... Call Trace: [] show_stack+0xa0/0x130 [] __warn+0x128/0x170 ---[ end trace b1d1e094f67f3bb2 ]--- This is because the TURBOchannel bus driver fails to set the coherent DMA mask for devices enumerated. Set the regular and coherent DMA masks for TURBOchannel devices then, observing that the bus protocol supports a 34-bit (16GiB) DMA address space, by interpreting the value presented in the address cycle across the 32 `ad' lines as a 32-bit word rather than byte address[1]. The architectural size of the TURBOchannel DMA address space exceeds the maximum amount of RAM any actual TURBOchannel system in existence may have, hence both masks are the same. This removes the warning shown above. References: [1] "TURBOchannel Hardware Specification", EK-369AA-OD-007B, Digital Equipment Corporation, January 1993, Section "DMA", pp. 1-15 -- 1-17 Signed-off-by: Maciej W. Rozycki Signed-off-by: Paul Burton Patchwork: https://patchwork.linux-mips.org/patch/20835/ Fixes: 205e1b7f51e4 ("dma-mapping: warn when there is no coherent_dma_mask") Cc: stable@vger.kernel.org # 4.16+ Cc: Ralf Baechle Signed-off-by: Greg Kroah-Hartman --- drivers/tc/tc.c | 8 +++++++- include/linux/tc.h | 1 + 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/tc/tc.c b/drivers/tc/tc.c index 3be9519654e5..cf3fad2cb871 100644 --- a/drivers/tc/tc.c +++ b/drivers/tc/tc.c @@ -2,7 +2,7 @@ * TURBOchannel bus services. * * Copyright (c) Harald Koerfgen, 1998 - * Copyright (c) 2001, 2003, 2005, 2006 Maciej W. Rozycki + * Copyright (c) 2001, 2003, 2005, 2006, 2018 Maciej W. Rozycki * Copyright (c) 2005 James Simmons * * This file is subject to the terms and conditions of the GNU @@ -10,6 +10,7 @@ * directory of this archive for more details. */ #include +#include #include #include #include @@ -92,6 +93,11 @@ static void __init tc_bus_add_devices(struct tc_bus *tbus) tdev->dev.bus = &tc_bus_type; tdev->slot = slot; + /* TURBOchannel has 34-bit DMA addressing (16GiB space). */ + tdev->dma_mask = DMA_BIT_MASK(34); + tdev->dev.dma_mask = &tdev->dma_mask; + tdev->dev.coherent_dma_mask = DMA_BIT_MASK(34); + for (i = 0; i < 8; i++) { tdev->firmware[i] = readb(module + offset + TC_FIRM_VER + 4 * i); diff --git a/include/linux/tc.h b/include/linux/tc.h index f92511e57cdb..a60639f37963 100644 --- a/include/linux/tc.h +++ b/include/linux/tc.h @@ -84,6 +84,7 @@ struct tc_dev { device. */ struct device dev; /* Generic device interface. */ struct resource resource; /* Address space of this device. */ + u64 dma_mask; /* DMA addressable range. */ char vendor[9]; char name[9]; char firmware[9]; From 54e9c8cf659f3f8d07a610b2836025cde6556f46 Mon Sep 17 00:00:00 2001 From: He Zhe Date: Fri, 17 Aug 2018 22:42:28 +0800 Subject: [PATCH 439/812] kgdboc: Passing ekgdboc to command line causes panic commit 1bd54d851f50dea6af30c3e6ff4f3e9aab5558f9 upstream. kgdboc_option_setup does not check input argument before passing it to strlen. The argument would be a NULL pointer if "ekgdboc", without its value, is set in command line and thus cause the following panic. PANIC: early exception 0xe3 IP 10:ffffffff8fbbb620 error 0 cr2 0x0 [ 0.000000] CPU: 0 PID: 0 Comm: swapper Not tainted 4.18-rc8+ #1 [ 0.000000] RIP: 0010:strlen+0x0/0x20 ... [ 0.000000] Call Trace [ 0.000000] ? kgdboc_option_setup+0x9/0xa0 [ 0.000000] ? kgdboc_early_init+0x6/0x1b [ 0.000000] ? do_early_param+0x4d/0x82 [ 0.000000] ? parse_args+0x212/0x330 [ 0.000000] ? rdinit_setup+0x26/0x26 [ 0.000000] ? parse_early_options+0x20/0x23 [ 0.000000] ? rdinit_setup+0x26/0x26 [ 0.000000] ? parse_early_param+0x2d/0x39 [ 0.000000] ? setup_arch+0x2f7/0xbf4 [ 0.000000] ? start_kernel+0x5e/0x4c2 [ 0.000000] ? load_ucode_bsp+0x113/0x12f [ 0.000000] ? secondary_startup_64+0xa5/0xb0 This patch adds a check to prevent the panic. Cc: stable@vger.kernel.org Cc: jason.wessel@windriver.com Cc: gregkh@linuxfoundation.org Cc: jslaby@suse.com Signed-off-by: He Zhe Reviewed-by: Daniel Thompson Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/kgdboc.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/tty/serial/kgdboc.c b/drivers/tty/serial/kgdboc.c index a260cde743e2..2db68dfe497d 100644 --- a/drivers/tty/serial/kgdboc.c +++ b/drivers/tty/serial/kgdboc.c @@ -133,6 +133,11 @@ static void kgdboc_unregister_kbd(void) static int kgdboc_option_setup(char *opt) { + if (!opt) { + pr_err("kgdboc: config string not provided\n"); + return -EINVAL; + } + if (strlen(opt) >= MAX_CONFIG_LEN) { printk(KERN_ERR "kgdboc: config string too long\n"); return -ENOSPC; From afe7fb75a67b8f80042f019e7bf60e60d0d5c1fb Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Thu, 8 Nov 2018 08:35:06 +0100 Subject: [PATCH 440/812] xen: fix xen_qlock_wait() commit d3132b3860f6cf35ff7609a76bbcdbb814bd027c upstream. Commit a856531951dc80 ("xen: make xen_qlock_wait() nestable") introduced a regression for Xen guests running fully virtualized (HVM or PVH mode). The Xen hypervisor wouldn't return from the poll hypercall with interrupts disabled in case of an interrupt (for PV guests it does). So instead of disabling interrupts in xen_qlock_wait() use a nesting counter to avoid calling xen_clear_irq_pending() in case xen_qlock_wait() is nested. Fixes: a856531951dc80 ("xen: make xen_qlock_wait() nestable") Cc: stable@vger.kernel.org Reported-by: Sander Eikelenboom Signed-off-by: Juergen Gross Reviewed-by: Boris Ostrovsky Tested-by: Sander Eikelenboom Signed-off-by: Juergen Gross Signed-off-by: Greg Kroah-Hartman --- arch/x86/xen/spinlock.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/arch/x86/xen/spinlock.c b/arch/x86/xen/spinlock.c index 9d81231b86a8..85872a08994a 100644 --- a/arch/x86/xen/spinlock.c +++ b/arch/x86/xen/spinlock.c @@ -8,6 +8,7 @@ #include #include #include +#include #include @@ -19,6 +20,7 @@ static DEFINE_PER_CPU(int, lock_kicker_irq) = -1; static DEFINE_PER_CPU(char *, irq_name); +static DEFINE_PER_CPU(atomic_t, xen_qlock_wait_nest); static bool xen_pvspin = true; #ifdef CONFIG_QUEUED_SPINLOCKS @@ -41,25 +43,25 @@ static void xen_qlock_kick(int cpu) */ static void xen_qlock_wait(u8 *byte, u8 val) { - unsigned long flags; int irq = __this_cpu_read(lock_kicker_irq); + atomic_t *nest_cnt = this_cpu_ptr(&xen_qlock_wait_nest); /* If kicker interrupts not initialized yet, just spin */ if (irq == -1 || in_nmi()) return; - /* Guard against reentry. */ - local_irq_save(flags); + /* Detect reentry. */ + atomic_inc(nest_cnt); - /* If irq pending already clear it. */ - if (xen_test_irq_pending(irq)) { + /* If irq pending already and no nested call clear it. */ + if (atomic_read(nest_cnt) == 1 && xen_test_irq_pending(irq)) { xen_clear_irq_pending(irq); } else if (READ_ONCE(*byte) == val) { /* Block until irq becomes pending (or a spurious wakeup) */ xen_poll_irq(irq); } - local_irq_restore(flags); + atomic_dec(nest_cnt); } #else /* CONFIG_QUEUED_SPINLOCKS */ From 476b64582620924dac5c205a365e2b91633063d3 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Thu, 13 Sep 2018 23:22:40 -0400 Subject: [PATCH 441/812] media: em28xx: use a default format if TRY_FMT fails commit f823ce2a1202d47110a7ef86b65839f0be8adc38 upstream. Follow the V4L2 spec, as warned by v4l2-compliance: warn: v4l2-test-formats.cpp(732): TRY_FMT cannot handle an invalid pixelformat. warn: v4l2-test-formats.cpp(733): This may or may not be a problem. For more information see: warn: v4l2-test-formats.cpp(734): http://www.mail-archive.com/linux-media@vger.kernel.org/msg56550.html Cc: stable@vger.kernel.org Fixes: bddcf63313c6 ("V4L/DVB (9927): em28xx: use a more standard way to specify video formats") Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Greg Kroah-Hartman --- drivers/media/usb/em28xx/em28xx-video.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/media/usb/em28xx/em28xx-video.c b/drivers/media/usb/em28xx/em28xx-video.c index 6a3cf342e087..fe6605efc40f 100644 --- a/drivers/media/usb/em28xx/em28xx-video.c +++ b/drivers/media/usb/em28xx/em28xx-video.c @@ -1288,9 +1288,9 @@ static int vidioc_try_fmt_vid_cap(struct file *file, void *priv, fmt = format_by_fourcc(f->fmt.pix.pixelformat); if (!fmt) { - em28xx_videodbg("Fourcc format (%08x) invalid.\n", - f->fmt.pix.pixelformat); - return -EINVAL; + fmt = &format[0]; + em28xx_videodbg("Fourcc format (%08x) invalid. Using default (%08x).\n", + f->fmt.pix.pixelformat, fmt->fourcc); } if (dev->board.is_em2800) { From 77a61c5a570551be9698f7b8deb212fa9c9fd135 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Fri, 14 Sep 2018 00:20:21 -0400 Subject: [PATCH 442/812] media: em28xx: fix input name for Terratec AV 350 commit 15644bfa195bd166d0a5ed76ae2d587f719c3dac upstream. Instead of using a register value, use an AMUX name, as otherwise VIDIOC_G_AUDIO would fail. Cc: stable@vger.kernel.org Fixes: 766ed64de554 ("V4L/DVB (11827): Add support for Terratec Grabster AV350") Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Greg Kroah-Hartman --- drivers/media/usb/em28xx/em28xx-cards.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/media/usb/em28xx/em28xx-cards.c b/drivers/media/usb/em28xx/em28xx-cards.c index 394004607059..7c7dfaed9d15 100644 --- a/drivers/media/usb/em28xx/em28xx-cards.c +++ b/drivers/media/usb/em28xx/em28xx-cards.c @@ -2021,13 +2021,13 @@ struct em28xx_board em28xx_boards[] = { .input = { { .type = EM28XX_VMUX_COMPOSITE1, .vmux = TVP5150_COMPOSITE1, - .amux = EM28XX_AUDIO_SRC_LINE, + .amux = EM28XX_AMUX_LINE_IN, .gpio = terratec_av350_unmute_gpio, }, { .type = EM28XX_VMUX_SVIDEO, .vmux = TVP5150_SVIDEO, - .amux = EM28XX_AUDIO_SRC_LINE, + .amux = EM28XX_AMUX_LINE_IN, .gpio = terratec_av350_unmute_gpio, } }, }, From 1fe3db85c38007c48200d1b100ee16652de0c12e Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Thu, 13 Sep 2018 22:46:29 -0400 Subject: [PATCH 443/812] media: em28xx: make v4l2-compliance happier by starting sequence on zero commit afeaade90db4c5dab93f326d9582be1d5954a198 upstream. The v4l2-compliance tool complains if a video doesn't start with a zero sequence number. While this shouldn't cause any real problem for apps, let's make it happier, in order to better check the v4l2-compliance differences before and after patchsets. This is actually an old issue. It is there since at least its videobuf2 conversion, e. g. changeset 3829fadc461 ("[media] em28xx: convert to videobuf2"), if VB1 wouldn't suffer from the same issue. Cc: stable@vger.kernel.org Fixes: d3829fadc461 ("[media] em28xx: convert to videobuf2") Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Greg Kroah-Hartman --- drivers/media/usb/em28xx/em28xx-video.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/media/usb/em28xx/em28xx-video.c b/drivers/media/usb/em28xx/em28xx-video.c index fe6605efc40f..6cfcdcea27e0 100644 --- a/drivers/media/usb/em28xx/em28xx-video.c +++ b/drivers/media/usb/em28xx/em28xx-video.c @@ -1149,6 +1149,8 @@ static void em28xx_ctrl_notify(struct v4l2_ctrl *ctrl, void *priv) { struct em28xx *dev = priv; + dev->v4l2->field_count = 0; + /* * In the case of non-AC97 volume controls, we still need * to do some setups at em28xx, in order to mute/unmute From 4492b0b5f0a46859b12d67126fd2f68ae2674936 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Sat, 16 Jun 2018 23:41:59 -0400 Subject: [PATCH 444/812] ext4: avoid running out of journal credits when appending to an inline file commit 8bc1379b82b8e809eef77a9fedbb75c6c297be19 upstream. Use a separate journal transaction if it turns out that we need to convert an inline file to use an data block. Otherwise we could end up failing due to not having journal credits. This addresses CVE-2018-10883. https://bugzilla.kernel.org/show_bug.cgi?id=200071 Signed-off-by: Theodore Ts'o Cc: stable@kernel.org [fengc@google.com: 4.4 backport: adjust context] Signed-off-by: Chenbo Feng Signed-off-by: Greg Kroah-Hartman --- fs/ext4/ext4.h | 3 --- fs/ext4/inline.c | 38 +------------------------------------- fs/ext4/xattr.c | 18 ++---------------- 3 files changed, 3 insertions(+), 56 deletions(-) diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index f5d9f82b173a..b6e25d771eea 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -3039,9 +3039,6 @@ extern struct buffer_head *ext4_get_first_inline_block(struct inode *inode, extern int ext4_inline_data_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, int *has_inline, __u64 start, __u64 len); -extern int ext4_try_to_evict_inline_data(handle_t *handle, - struct inode *inode, - int needed); extern void ext4_inline_data_truncate(struct inode *inode, int *has_inline); extern int ext4_convert_inline_data(struct inode *inode); diff --git a/fs/ext4/inline.c b/fs/ext4/inline.c index 34c365f0e69e..1aec46733ef8 100644 --- a/fs/ext4/inline.c +++ b/fs/ext4/inline.c @@ -888,11 +888,11 @@ int ext4_da_write_inline_data_begin(struct address_space *mapping, flags |= AOP_FLAG_NOFS; if (ret == -ENOSPC) { + ext4_journal_stop(handle); ret = ext4_da_convert_inline_data_to_extent(mapping, inode, flags, fsdata); - ext4_journal_stop(handle); if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries)) goto retry_journal; @@ -1867,42 +1867,6 @@ int ext4_inline_data_fiemap(struct inode *inode, return (error < 0 ? error : 0); } -/* - * Called during xattr set, and if we can sparse space 'needed', - * just create the extent tree evict the data to the outer block. - * - * We use jbd2 instead of page cache to move data to the 1st block - * so that the whole transaction can be committed as a whole and - * the data isn't lost because of the delayed page cache write. - */ -int ext4_try_to_evict_inline_data(handle_t *handle, - struct inode *inode, - int needed) -{ - int error; - struct ext4_xattr_entry *entry; - struct ext4_inode *raw_inode; - struct ext4_iloc iloc; - - error = ext4_get_inode_loc(inode, &iloc); - if (error) - return error; - - raw_inode = ext4_raw_inode(&iloc); - entry = (struct ext4_xattr_entry *)((void *)raw_inode + - EXT4_I(inode)->i_inline_off); - if (EXT4_XATTR_LEN(entry->e_name_len) + - EXT4_XATTR_SIZE(le32_to_cpu(entry->e_value_size)) < needed) { - error = -ENOSPC; - goto out; - } - - error = ext4_convert_inline_data_nolock(handle, inode, &iloc); -out: - brelse(iloc.bh); - return error; -} - void ext4_inline_data_truncate(struct inode *inode, int *has_inline) { handle_t *handle; diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c index d6bae37489af..16cf24dc9c34 100644 --- a/fs/ext4/xattr.c +++ b/fs/ext4/xattr.c @@ -1044,22 +1044,8 @@ int ext4_xattr_ibody_inline_set(handle_t *handle, struct inode *inode, if (EXT4_I(inode)->i_extra_isize == 0) return -ENOSPC; error = ext4_xattr_set_entry(i, s, inode); - if (error) { - if (error == -ENOSPC && - ext4_has_inline_data(inode)) { - error = ext4_try_to_evict_inline_data(handle, inode, - EXT4_XATTR_LEN(strlen(i->name) + - EXT4_XATTR_SIZE(i->value_len))); - if (error) - return error; - error = ext4_xattr_ibody_find(inode, i, is); - if (error) - return error; - error = ext4_xattr_set_entry(i, s, inode); - } - if (error) - return error; - } + if (error) + return error; header = IHDR(inode, ext4_raw_inode(&is->iloc)); if (!IS_LAST_ENTRY(s->first)) { header->h_magic = cpu_to_le32(EXT4_XATTR_MAGIC); From ae6e27402c216840e2d361675a684803b319aa27 Mon Sep 17 00:00:00 2001 From: Nicolas Pitre Date: Tue, 30 Oct 2018 13:26:15 -0400 Subject: [PATCH 445/812] Cramfs: fix abad comparison when wrap-arounds occur commit 672ca9dd13f1aca0c17516f76fc5b0e8344b3e46 upstream. It is possible for corrupted filesystem images to produce very large block offsets that may wrap when a length is added, and wrongly pass the buffer size test. Reported-by: Anatoly Trosinenko Signed-off-by: Nicolas Pitre Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- fs/cramfs/inode.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/cramfs/inode.c b/fs/cramfs/inode.c index 355c522f3585..a6c9c2d66af1 100644 --- a/fs/cramfs/inode.c +++ b/fs/cramfs/inode.c @@ -185,7 +185,8 @@ static void *cramfs_read(struct super_block *sb, unsigned int offset, unsigned i continue; blk_offset = (blocknr - buffer_blocknr[i]) << PAGE_CACHE_SHIFT; blk_offset += offset; - if (blk_offset + len > BUFFER_SIZE) + if (blk_offset > BUFFER_SIZE || + blk_offset + len > BUFFER_SIZE) continue; return read_buffers[i] + blk_offset; } From fd81a7a647c6705ff0e81fd1cf3278d2bb932ba1 Mon Sep 17 00:00:00 2001 From: Thor Thayer Date: Tue, 25 Sep 2018 10:31:52 -0500 Subject: [PATCH 446/812] arm64: dts: stratix10: Correct System Manager register size commit 74121b9aa3cd571ddfff014a9f47db36cae3cda9 upstream. Correct the register size of the System Manager node. Cc: stable@vger.kernel.org Fixes: 78cd6a9d8e154 ("arm64: dts: Add base stratix 10 dtsi") Signed-off-by: Thor Thayer Signed-off-by: Dinh Nguyen Signed-off-by: Greg Kroah-Hartman --- arch/arm64/boot/dts/altera/socfpga_stratix10.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/altera/socfpga_stratix10.dtsi b/arch/arm64/boot/dts/altera/socfpga_stratix10.dtsi index 445aa678f914..6a37101344aa 100644 --- a/arch/arm64/boot/dts/altera/socfpga_stratix10.dtsi +++ b/arch/arm64/boot/dts/altera/socfpga_stratix10.dtsi @@ -249,7 +249,7 @@ spi1: spi@ffda5000 { sysmgr: sysmgr@ffd12000 { compatible = "altr,sys-mgr", "syscon"; - reg = <0xffd12000 0x1000>; + reg = <0xffd12000 0x228>; }; /* Local timer */ From cb526c9ab85a76731c8d7fbf485d0c1196c35471 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Wed, 15 Nov 2017 10:44:58 +0100 Subject: [PATCH 447/812] soc/tegra: pmc: Fix child-node lookup commit 1dc6bd5e39a29453bdcc17348dd2a89f1aa4004e upstream. Fix child-node lookup during probe, which ended up searching the whole device tree depth-first starting at the parent rather than just matching on its children. To make things worse, the parent pmc node could end up being prematurely freed as of_find_node_by_name() drops a reference to its first argument. Fixes: 3568df3d31d6 ("soc: tegra: Add thermal reset (thermtrip) support to PMC") Cc: stable # 4.0 Cc: Mikko Perttunen Signed-off-by: Johan Hovold Reviewed-by: Mikko Perttunen Signed-off-by: Thierry Reding Signed-off-by: Greg Kroah-Hartman --- drivers/soc/tegra/pmc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/soc/tegra/pmc.c b/drivers/soc/tegra/pmc.c index bc34cf7482fb..a4753644f4cf 100644 --- a/drivers/soc/tegra/pmc.c +++ b/drivers/soc/tegra/pmc.c @@ -738,7 +738,7 @@ void tegra_pmc_init_tsense_reset(struct tegra_pmc *pmc) if (!pmc->soc->has_tsense_reset) return; - np = of_find_node_by_name(pmc->dev->of_node, "i2c-thermtrip"); + np = of_get_child_by_name(pmc->dev->of_node, "i2c-thermtrip"); if (!np) { dev_warn(dev, "i2c-thermtrip node not found, %s.\n", disabled); return; From dc0a989b23a65501827d02bb94dd6a8d6d3fc8c8 Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Tue, 21 Aug 2018 09:42:03 +0800 Subject: [PATCH 448/812] btrfs: Handle owner mismatch gracefully when walking up tree commit 65c6e82becec33731f48786e5a30f98662c86b16 upstream. [BUG] When mounting certain crafted image, btrfs will trigger kernel BUG_ON() when trying to recover balance: kernel BUG at fs/btrfs/extent-tree.c:8956! invalid opcode: 0000 [#1] PREEMPT SMP NOPTI CPU: 1 PID: 662 Comm: mount Not tainted 4.18.0-rc1-custom+ #10 RIP: 0010:walk_up_proc+0x336/0x480 [btrfs] RSP: 0018:ffffb53540c9b890 EFLAGS: 00010202 Call Trace: walk_up_tree+0x172/0x1f0 [btrfs] btrfs_drop_snapshot+0x3a4/0x830 [btrfs] merge_reloc_roots+0xe1/0x1d0 [btrfs] btrfs_recover_relocation+0x3ea/0x420 [btrfs] open_ctree+0x1af3/0x1dd0 [btrfs] btrfs_mount_root+0x66b/0x740 [btrfs] mount_fs+0x3b/0x16a vfs_kern_mount.part.9+0x54/0x140 btrfs_mount+0x16d/0x890 [btrfs] mount_fs+0x3b/0x16a vfs_kern_mount.part.9+0x54/0x140 do_mount+0x1fd/0xda0 ksys_mount+0xba/0xd0 __x64_sys_mount+0x21/0x30 do_syscall_64+0x60/0x210 entry_SYSCALL_64_after_hwframe+0x49/0xbe [CAUSE] Extent tree corruption. In this particular case, reloc tree root's owner is DATA_RELOC_TREE (should be TREE_RELOC), thus its backref is corrupted and we failed the owner check in walk_up_tree(). [FIX] It's pretty hard to take care of every extent tree corruption, but at least we can remove such BUG_ON() and exit more gracefully. And since in this particular image, DATA_RELOC_TREE and TREE_RELOC share the same root (which is obviously invalid), we needs to make __del_reloc_root() more robust to detect such invalid sharing to avoid possible NULL dereference as root->node can be NULL in this case. Link: https://bugzilla.kernel.org/show_bug.cgi?id=200411 Reported-by: Xu Wen CC: stable@vger.kernel.org # 4.4+ Signed-off-by: Qu Wenruo Reviewed-by: David Sterba Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/extent-tree.c | 18 ++++++++++++------ fs/btrfs/relocation.c | 2 +- 2 files changed, 13 insertions(+), 7 deletions(-) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index a72f941ca750..722e11b318a7 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -8704,15 +8704,14 @@ static noinline int walk_up_proc(struct btrfs_trans_handle *trans, if (eb == root->node) { if (wc->flags[level] & BTRFS_BLOCK_FLAG_FULL_BACKREF) parent = eb->start; - else - BUG_ON(root->root_key.objectid != - btrfs_header_owner(eb)); + else if (root->root_key.objectid != btrfs_header_owner(eb)) + goto owner_mismatch; } else { if (wc->flags[level + 1] & BTRFS_BLOCK_FLAG_FULL_BACKREF) parent = path->nodes[level + 1]->start; - else - BUG_ON(root->root_key.objectid != - btrfs_header_owner(path->nodes[level + 1])); + else if (root->root_key.objectid != + btrfs_header_owner(path->nodes[level + 1])) + goto owner_mismatch; } btrfs_free_tree_block(trans, root, eb, parent, wc->refs[level] == 1); @@ -8720,6 +8719,11 @@ static noinline int walk_up_proc(struct btrfs_trans_handle *trans, wc->refs[level] = 0; wc->flags[level] = 0; return 0; + +owner_mismatch: + btrfs_err_rl(root->fs_info, "unexpected tree owner, have %llu expect %llu", + btrfs_header_owner(eb), root->root_key.objectid); + return -EUCLEAN; } static noinline int walk_down_tree(struct btrfs_trans_handle *trans, @@ -8773,6 +8777,8 @@ static noinline int walk_up_tree(struct btrfs_trans_handle *trans, ret = walk_up_proc(trans, root, path, wc); if (ret > 0) return 0; + if (ret < 0) + return ret; if (path->locks[level]) { btrfs_tree_unlock_rw(path->nodes[level], diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index cfe913d2d3df..d6ccfb31aef0 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c @@ -1318,7 +1318,7 @@ static void __del_reloc_root(struct btrfs_root *root) struct mapping_node *node = NULL; struct reloc_control *rc = root->fs_info->reloc_ctl; - if (rc) { + if (rc && root->node) { spin_lock(&rc->reloc_root_tree.lock); rb_node = tree_search(&rc->reloc_root_tree.rb_root, root->node->start); From ff58ad5f344d9cd6b706271781b2d11baa32e18d Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Tue, 21 Aug 2018 09:53:47 +0800 Subject: [PATCH 449/812] btrfs: locking: Add extra check in btrfs_init_new_buffer() to avoid deadlock commit b72c3aba09a53fc7c1824250d71180ca154517a7 upstream. [BUG] For certain crafted image, whose csum root leaf has missing backref, if we try to trigger write with data csum, it could cause deadlock with the following kernel WARN_ON(): WARNING: CPU: 1 PID: 41 at fs/btrfs/locking.c:230 btrfs_tree_lock+0x3e2/0x400 CPU: 1 PID: 41 Comm: kworker/u4:1 Not tainted 4.18.0-rc1+ #8 Workqueue: btrfs-endio-write btrfs_endio_write_helper RIP: 0010:btrfs_tree_lock+0x3e2/0x400 Call Trace: btrfs_alloc_tree_block+0x39f/0x770 __btrfs_cow_block+0x285/0x9e0 btrfs_cow_block+0x191/0x2e0 btrfs_search_slot+0x492/0x1160 btrfs_lookup_csum+0xec/0x280 btrfs_csum_file_blocks+0x2be/0xa60 add_pending_csums+0xaf/0xf0 btrfs_finish_ordered_io+0x74b/0xc90 finish_ordered_fn+0x15/0x20 normal_work_helper+0xf6/0x500 btrfs_endio_write_helper+0x12/0x20 process_one_work+0x302/0x770 worker_thread+0x81/0x6d0 kthread+0x180/0x1d0 ret_from_fork+0x35/0x40 [CAUSE] That crafted image has missing backref for csum tree root leaf. And when we try to allocate new tree block, since there is no EXTENT/METADATA_ITEM for csum tree root, btrfs consider it's free slot and use it. The extent tree of the image looks like: Normal image | This fuzzed image ----------------------------------+-------------------------------- BG 29360128 | BG 29360128 One empty slot | One empty slot 29364224: backref to UUID tree | 29364224: backref to UUID tree Two empty slots | Two empty slots 29376512: backref to CSUM tree | One empty slot (bad type) <<< 29380608: backref to D_RELOC tree | 29380608: backref to D_RELOC tree ... | ... Since bytenr 29376512 has no METADATA/EXTENT_ITEM, when btrfs try to alloc tree block, it's an valid slot for btrfs. And for finish_ordered_write, when we need to insert csum, we try to CoW csum tree root. By accident, empty slots at bytenr BG_OFFSET, BG_OFFSET + 8K, BG_OFFSET + 12K is already used by tree block COW for other trees, the next empty slot is BG_OFFSET + 16K, which should be the backref for CSUM tree. But due to the bad type, btrfs can recognize it and still consider it as an empty slot, and will try to use it for csum tree CoW. Then in the following call trace, we will try to lock the new tree block, which turns out to be the old csum tree root which is already locked: btrfs_search_slot() called on csum tree root, which is at 29376512 |- btrfs_cow_block() |- btrfs_set_lock_block() | |- Now locks tree block 29376512 (old csum tree root) |- __btrfs_cow_block() |- btrfs_alloc_tree_block() |- btrfs_reserve_extent() | Now it returns tree block 29376512, which extent tree | shows its empty slot, but it's already hold by csum tree |- btrfs_init_new_buffer() |- btrfs_tree_lock() | Triggers WARN_ON(eb->lock_owner == current->pid) |- wait_event() Wait lock owner to release the lock, but it's locked by ourself, so it will deadlock [FIX] This patch will do the lock_owner and current->pid check at btrfs_init_new_buffer(). So above deadlock can be avoided. Since such problem can only happen in crafted image, we will still trigger kernel warning for later aborted transaction, but with a little more meaningful warning message. Link: https://bugzilla.kernel.org/show_bug.cgi?id=200405 Reported-by: Xu Wen CC: stable@vger.kernel.org # 4.4+ Signed-off-by: Qu Wenruo Reviewed-by: David Sterba Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/extent-tree.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 722e11b318a7..6f0ecaf09962 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -7835,6 +7835,20 @@ btrfs_init_new_buffer(struct btrfs_trans_handle *trans, struct btrfs_root *root, buf = btrfs_find_create_tree_block(root, bytenr); if (!buf) return ERR_PTR(-ENOMEM); + + /* + * Extra safety check in case the extent tree is corrupted and extent + * allocator chooses to use a tree block which is already used and + * locked. + */ + if (buf->lock_owner == current->pid) { + btrfs_err_rl(root->fs_info, +"tree block %llu owner %llu already locked by pid=%d, extent tree corruption detected", + buf->start, btrfs_header_owner(buf), current->pid); + free_extent_buffer(buf); + return ERR_PTR(-EUCLEAN); + } + btrfs_set_header_generation(buf, trans->transid); btrfs_set_buffer_lockdep_class(root->root_key.objectid, buf, level); btrfs_tree_lock(buf); From 98edddde5a2cccf6daf6428d9d03f82027ab62d8 Mon Sep 17 00:00:00 2001 From: Jeff Mahoney Date: Thu, 6 Sep 2018 17:18:14 -0400 Subject: [PATCH 450/812] btrfs: iterate all devices during trim, instead of fs_devices::alloc_list commit d4e329de5e5e21594df2e0dd59da9acee71f133b upstream. btrfs_trim_fs iterates over the fs_devices->alloc_list while holding the device_list_mutex. The problem is that ->alloc_list is protected by the chunk mutex. We don't want to hold the chunk mutex over the trim of the entire file system. Fortunately, the ->dev_list list is protected by the dev_list mutex and while it will give us all devices, including read-only devices, we already just skip the read-only devices. Then we can continue to take and release the chunk mutex while scanning each device. Fixes: 499f377f49f ("btrfs: iterate over unused chunk space in FITRIM") CC: stable@vger.kernel.org # 4.4+ Signed-off-by: Jeff Mahoney Reviewed-by: David Sterba Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/extent-tree.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 6f0ecaf09962..4ebfe130bee0 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -10751,8 +10751,8 @@ int btrfs_trim_fs(struct btrfs_root *root, struct fstrim_range *range) } mutex_lock(&root->fs_info->fs_devices->device_list_mutex); - devices = &root->fs_info->fs_devices->alloc_list; - list_for_each_entry(device, devices, dev_alloc_list) { + devices = &root->fs_info->fs_devices->devices; + list_for_each_entry(device, devices, dev_list) { ret = btrfs_trim_free_extents(device, range->minlen, &group_trimmed); if (ret) From 639a61c6bab798193e9e0a60dc2b988ae66847fd Mon Sep 17 00:00:00 2001 From: Jeff Mahoney Date: Thu, 6 Sep 2018 17:18:15 -0400 Subject: [PATCH 451/812] btrfs: don't attempt to trim devices that don't support it commit 0be88e367fd8fbdb45257615d691f4675dda062f upstream. We check whether any device the file system is using supports discard in the ioctl call, but then we attempt to trim free extents on every device regardless of whether discard is supported. Due to the way we mask off EOPNOTSUPP, we can end up issuing the trim operations on each free range on devices that don't support it, just wasting time. Fixes: 499f377f49f08 ("btrfs: iterate over unused chunk space in FITRIM") CC: stable@vger.kernel.org # 4.4+ Signed-off-by: Jeff Mahoney Reviewed-by: David Sterba Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/extent-tree.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 4ebfe130bee0..59f0664de70f 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -10629,6 +10629,10 @@ static int btrfs_trim_free_extents(struct btrfs_device *device, *trimmed = 0; + /* Discard not supported = nothing to do. */ + if (!blk_queue_discard(bdev_get_queue(device->bdev))) + return 0; + /* Not writeable = nothing to do. */ if (!device->writeable) return 0; From 6968f018a95ad377b644b2f4b449aa8a4f6620d8 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Wed, 12 Sep 2018 10:45:45 -0400 Subject: [PATCH 452/812] btrfs: wait on caching when putting the bg cache commit 3aa7c7a31c26321696b92841d5103461c6f3f517 upstream. While testing my backport I noticed there was a panic if I ran generic/416 generic/417 generic/418 all in a row. This just happened to uncover a race where we had outstanding IO after we destroy all of our workqueues, and then we'd go to queue the endio work on those free'd workqueues. This is because we aren't waiting for the caching threads to be done before freeing everything up, so to fix this make sure we wait on any outstanding caching that's being done before we free up the block group, so we're sure to be done with all IO by the time we get to btrfs_stop_all_workers(). This fixes the panic I was seeing consistently in testing. ------------[ cut here ]------------ kernel BUG at fs/btrfs/volumes.c:6112! SMP PTI Modules linked in: CPU: 1 PID: 27165 Comm: kworker/u4:7 Not tainted 4.16.0-02155-g3553e54a578d-dirty #875 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.11.0-2.el7 04/01/2014 Workqueue: btrfs-cache btrfs_cache_helper RIP: 0010:btrfs_map_bio+0x346/0x370 RSP: 0000:ffffc900061e79d0 EFLAGS: 00010202 RAX: 0000000000000000 RBX: ffff880071542e00 RCX: 0000000000533000 RDX: ffff88006bb74380 RSI: 0000000000000008 RDI: ffff880078160000 RBP: 0000000000000001 R08: ffff8800781cd200 R09: 0000000000503000 R10: ffff88006cd21200 R11: 0000000000000000 R12: 0000000000000000 R13: 0000000000000000 R14: ffff8800781cd200 R15: ffff880071542e00 FS: 0000000000000000(0000) GS:ffff88007fd00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 000000000817ffc4 CR3: 0000000078314000 CR4: 00000000000006e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: btree_submit_bio_hook+0x8a/0xd0 submit_one_bio+0x5d/0x80 read_extent_buffer_pages+0x18a/0x320 btree_read_extent_buffer_pages+0xbc/0x200 ? alloc_extent_buffer+0x359/0x3e0 read_tree_block+0x3d/0x60 read_block_for_search.isra.30+0x1a5/0x360 btrfs_search_slot+0x41b/0xa10 btrfs_next_old_leaf+0x212/0x470 caching_thread+0x323/0x490 normal_work_helper+0xc5/0x310 process_one_work+0x141/0x340 worker_thread+0x44/0x3c0 kthread+0xf8/0x130 ? process_one_work+0x340/0x340 ? kthread_bind+0x10/0x10 ret_from_fork+0x35/0x40 RIP: btrfs_map_bio+0x346/0x370 RSP: ffffc900061e79d0 ---[ end trace 827eb13e50846033 ]--- Kernel panic - not syncing: Fatal exception Kernel Offset: disabled ---[ end Kernel panic - not syncing: Fatal exception CC: stable@vger.kernel.org # 4.4+ Signed-off-by: Josef Bacik Reviewed-by: Omar Sandoval Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/extent-tree.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 59f0664de70f..528b68cdc350 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -9521,6 +9521,7 @@ void btrfs_put_block_group_cache(struct btrfs_fs_info *info) block_group = btrfs_lookup_first_block_group(info, last); while (block_group) { + wait_block_group_cache_done(block_group); spin_lock(&block_group->lock); if (block_group->iref) break; From 5e7a422384626abbd41ff229bbf18b7119053b9e Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Fri, 28 Sep 2018 07:18:00 -0400 Subject: [PATCH 453/812] btrfs: reset max_extent_size on clear in a bitmap commit 553cceb49681d60975d00892877d4c871bf220f9 upstream. We need to clear the max_extent_size when we clear bits from a bitmap since it could have been from the range that contains the max_extent_size. CC: stable@vger.kernel.org # 4.4+ Reviewed-by: Liu Bo Signed-off-by: Josef Bacik Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/free-space-cache.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c index 45934deacfd7..2b4c5f298325 100644 --- a/fs/btrfs/free-space-cache.c +++ b/fs/btrfs/free-space-cache.c @@ -1699,6 +1699,8 @@ static inline void __bitmap_clear_bits(struct btrfs_free_space_ctl *ctl, bitmap_clear(info->bitmap, start, count); info->bytes -= bytes; + if (info->max_extent_size > ctl->unit) + info->max_extent_size = 0; } static void bitmap_clear_bits(struct btrfs_free_space_ctl *ctl, From da36a0a52348e6301dba61c5d764ac8275d173a1 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Fri, 28 Sep 2018 07:18:02 -0400 Subject: [PATCH 454/812] btrfs: make sure we create all new block groups commit 545e3366db823dc3342ca9d7fea803f829c9062f upstream. Allocating new chunks modifies both the extent and chunk tree, which can trigger new chunk allocations. So instead of doing list_for_each_safe, just do while (!list_empty()) so we make sure we don't exit with other pending bg's still on our list. CC: stable@vger.kernel.org # 4.4+ Reviewed-by: Omar Sandoval Reviewed-by: Liu Bo Signed-off-by: Josef Bacik Reviewed-by: David Sterba Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/extent-tree.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 528b68cdc350..80cd28456f08 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -9912,7 +9912,7 @@ int btrfs_read_block_groups(struct btrfs_root *root) void btrfs_create_pending_block_groups(struct btrfs_trans_handle *trans, struct btrfs_root *root) { - struct btrfs_block_group_cache *block_group, *tmp; + struct btrfs_block_group_cache *block_group; struct btrfs_root *extent_root = root->fs_info->extent_root; struct btrfs_block_group_item item; struct btrfs_key key; @@ -9920,7 +9920,10 @@ void btrfs_create_pending_block_groups(struct btrfs_trans_handle *trans, bool can_flush_pending_bgs = trans->can_flush_pending_bgs; trans->can_flush_pending_bgs = false; - list_for_each_entry_safe(block_group, tmp, &trans->new_bgs, bg_list) { + while (!list_empty(&trans->new_bgs)) { + block_group = list_first_entry(&trans->new_bgs, + struct btrfs_block_group_cache, + bg_list); if (ret) goto next; From b1223028e6882ba36430cfb328f66605c7034f1e Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Tue, 9 Oct 2018 15:05:29 +0100 Subject: [PATCH 455/812] Btrfs: fix wrong dentries after fsync of file that got its parent replaced commit 0f375eed92b5a407657532637ed9652611a682f5 upstream. In a scenario like the following: mkdir /mnt/A # inode 258 mkdir /mnt/B # inode 259 touch /mnt/B/bar # inode 260 sync mv /mnt/B/bar /mnt/A/bar mv -T /mnt/A /mnt/B fsync /mnt/B/bar After replaying the log we end up with file bar having 2 hard links, both with the name 'bar' and one in the directory with inode number 258 and the other in the directory with inode number 259. Also, we end up with the directory inode 259 still existing and with the directory inode 258 still named as 'A', instead of 'B'. In this scenario, file 'bar' should only have one hard link, located at directory inode 258, the directory inode 259 should not exist anymore and the name for directory inode 258 should be 'B'. This incorrect behaviour happens because when attempting to log the old parents of an inode, we skip any parents that no longer exist. Fix this by forcing a full commit if an old parent no longer exists. A test case for fstests follows soon. CC: stable@vger.kernel.org # 4.4+ Signed-off-by: Filipe Manana Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/tree-log.c | 30 +++++++++++++++++++++++++++--- 1 file changed, 27 insertions(+), 3 deletions(-) diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 2c7f9a5f8717..63f59f17c97e 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -5240,9 +5240,33 @@ static int btrfs_log_all_parents(struct btrfs_trans_handle *trans, dir_inode = btrfs_iget(root->fs_info->sb, &inode_key, root, NULL); - /* If parent inode was deleted, skip it. */ - if (IS_ERR(dir_inode)) - continue; + /* + * If the parent inode was deleted, return an error to + * fallback to a transaction commit. This is to prevent + * getting an inode that was moved from one parent A to + * a parent B, got its former parent A deleted and then + * it got fsync'ed, from existing at both parents after + * a log replay (and the old parent still existing). + * Example: + * + * mkdir /mnt/A + * mkdir /mnt/B + * touch /mnt/B/bar + * sync + * mv /mnt/B/bar /mnt/A/bar + * mv -T /mnt/A /mnt/B + * fsync /mnt/B/bar + * + * + * If we ignore the old parent B which got deleted, + * after a log replay we would have file bar linked + * at both parents and the old parent B would still + * exist. + */ + if (IS_ERR(dir_inode)) { + ret = PTR_ERR(dir_inode); + goto out; + } ret = btrfs_log_inode(trans, root, dir_inode, LOG_INODE_ALL, 0, LLONG_MAX, ctx); From b5bb62e56134180d802649a2954163454e551b7a Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Fri, 10 Aug 2018 10:20:26 +0800 Subject: [PATCH 456/812] btrfs: qgroup: Dirty all qgroups before rescan commit 9c7b0c2e8dbfbcd80a71e2cbfe02704f26c185c6 upstream. [BUG] In the following case, rescan won't zero out the number of qgroup 1/0: $ mkfs.btrfs -fq $DEV $ mount $DEV /mnt $ btrfs quota enable /mnt $ btrfs qgroup create 1/0 /mnt $ btrfs sub create /mnt/sub $ btrfs qgroup assign 0/257 1/0 /mnt $ dd if=/dev/urandom of=/mnt/sub/file bs=1k count=1000 $ btrfs sub snap /mnt/sub /mnt/snap $ btrfs quota rescan -w /mnt $ btrfs qgroup show -pcre /mnt qgroupid rfer excl max_rfer max_excl parent child -------- ---- ---- -------- -------- ------ ----- 0/5 16.00KiB 16.00KiB none none --- --- 0/257 1016.00KiB 16.00KiB none none 1/0 --- 0/258 1016.00KiB 16.00KiB none none --- --- 1/0 1016.00KiB 16.00KiB none none --- 0/257 So far so good, but: $ btrfs qgroup remove 0/257 1/0 /mnt WARNING: quotas may be inconsistent, rescan needed $ btrfs quota rescan -w /mnt $ btrfs qgroup show -pcre /mnt qgoupid rfer excl max_rfer max_excl parent child -------- ---- ---- -------- -------- ------ ----- 0/5 16.00KiB 16.00KiB none none --- --- 0/257 1016.00KiB 16.00KiB none none --- --- 0/258 1016.00KiB 16.00KiB none none --- --- 1/0 1016.00KiB 16.00KiB none none --- --- ^^^^^^^^^^ ^^^^^^^^ not cleared [CAUSE] Before rescan we call qgroup_rescan_zero_tracking() to zero out all qgroups' accounting numbers. However we don't mark all qgroups dirty, but rely on rescan to do so. If we have any high level qgroup without children, it won't be marked dirty during rescan, since we cannot reach that qgroup. This will cause QGROUP_INFO items of childless qgroups never get updated in the quota tree, thus their numbers will stay the same in "btrfs qgroup show" output. [FIX] Just mark all qgroups dirty in qgroup_rescan_zero_tracking(), so even if we have childless qgroups, their QGROUP_INFO items will still get updated during rescan. Reported-by: Misono Tomohiro CC: stable@vger.kernel.org # 4.4+ Signed-off-by: Qu Wenruo Reviewed-by: Misono Tomohiro Tested-by: Misono Tomohiro Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/qgroup.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c index a751937dded5..90e29d40aa82 100644 --- a/fs/btrfs/qgroup.c +++ b/fs/btrfs/qgroup.c @@ -2446,6 +2446,7 @@ qgroup_rescan_zero_tracking(struct btrfs_fs_info *fs_info) qgroup->rfer_cmpr = 0; qgroup->excl = 0; qgroup->excl_cmpr = 0; + qgroup_dirty(fs_info, qgroup); } spin_unlock(&fs_info->qgroup_lock); } From b38ace86f35a95e7e91f73dd2524da1fd7c3bc35 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Sat, 13 Oct 2018 00:37:25 +0100 Subject: [PATCH 457/812] Btrfs: fix null pointer dereference on compressed write path error commit 3527a018c00e5dbada2f9d7ed5576437b6dd5cfb upstream. At inode.c:compress_file_range(), under the "free_pages_out" label, we can end up dereferencing the "pages" pointer when it has a NULL value. This case happens when "start" has a value of 0 and we fail to allocate memory for the "pages" pointer. When that happens we jump to the "cont" label and then enter the "if (start == 0)" branch where we immediately call the cow_file_range_inline() function. If that function returns 0 (success creating an inline extent) or an error (like -ENOMEM for example) we jump to the "free_pages_out" label and then access "pages[i]" leading to a NULL pointer dereference, since "nr_pages" has a value greater than zero at that point. Fix this by setting "nr_pages" to 0 when we fail to allocate memory for the "pages" pointer. Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=201119 Fixes: 771ed689d2cd ("Btrfs: Optimize compressed writeback and reads") CC: stable@vger.kernel.org # 4.4+ Reviewed-by: Liu Bo Signed-off-by: Filipe Manana Reviewed-by: David Sterba Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/inode.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index b895be3d4311..383717ccecc7 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -481,6 +481,7 @@ static noinline void compress_file_range(struct inode *inode, pages = kcalloc(nr_pages, sizeof(struct page *), GFP_NOFS); if (!pages) { /* just bail out to the uncompressed code */ + nr_pages = 0; goto cont; } From bddfd0a2d61e3f627b50d88b713769583f7d7b03 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Fri, 12 Oct 2018 15:32:33 -0400 Subject: [PATCH 458/812] btrfs: set max_extent_size properly commit ad22cf6ea47fa20fbe11ac324a0a15c0a9a4a2a9 upstream. We can't use entry->bytes if our entry is a bitmap entry, we need to use entry->max_extent_size in that case. Fix up all the logic to make this consistent. CC: stable@vger.kernel.org # 4.4+ Signed-off-by: Josef Bacik Reviewed-by: David Sterba Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/free-space-cache.c | 30 ++++++++++++++++++++---------- 1 file changed, 20 insertions(+), 10 deletions(-) diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c index 2b4c5f298325..1aa897dd9ce3 100644 --- a/fs/btrfs/free-space-cache.c +++ b/fs/btrfs/free-space-cache.c @@ -1784,6 +1784,13 @@ static int search_bitmap(struct btrfs_free_space_ctl *ctl, return -1; } +static inline u64 get_max_extent_size(struct btrfs_free_space *entry) +{ + if (entry->bitmap) + return entry->max_extent_size; + return entry->bytes; +} + /* Cache the size of the max extent in bytes */ static struct btrfs_free_space * find_free_space(struct btrfs_free_space_ctl *ctl, u64 *offset, u64 *bytes, @@ -1805,8 +1812,8 @@ find_free_space(struct btrfs_free_space_ctl *ctl, u64 *offset, u64 *bytes, for (node = &entry->offset_index; node; node = rb_next(node)) { entry = rb_entry(node, struct btrfs_free_space, offset_index); if (entry->bytes < *bytes) { - if (entry->bytes > *max_extent_size) - *max_extent_size = entry->bytes; + *max_extent_size = max(get_max_extent_size(entry), + *max_extent_size); continue; } @@ -1824,8 +1831,8 @@ find_free_space(struct btrfs_free_space_ctl *ctl, u64 *offset, u64 *bytes, } if (entry->bytes < *bytes + align_off) { - if (entry->bytes > *max_extent_size) - *max_extent_size = entry->bytes; + *max_extent_size = max(get_max_extent_size(entry), + *max_extent_size); continue; } @@ -1837,8 +1844,10 @@ find_free_space(struct btrfs_free_space_ctl *ctl, u64 *offset, u64 *bytes, *offset = tmp; *bytes = size; return entry; - } else if (size > *max_extent_size) { - *max_extent_size = size; + } else { + *max_extent_size = + max(get_max_extent_size(entry), + *max_extent_size); } continue; } @@ -2696,8 +2705,8 @@ static u64 btrfs_alloc_from_bitmap(struct btrfs_block_group_cache *block_group, err = search_bitmap(ctl, entry, &search_start, &search_bytes, true); if (err) { - if (search_bytes > *max_extent_size) - *max_extent_size = search_bytes; + *max_extent_size = max(get_max_extent_size(entry), + *max_extent_size); return 0; } @@ -2734,8 +2743,9 @@ u64 btrfs_alloc_from_cluster(struct btrfs_block_group_cache *block_group, entry = rb_entry(node, struct btrfs_free_space, offset_index); while (1) { - if (entry->bytes < bytes && entry->bytes > *max_extent_size) - *max_extent_size = entry->bytes; + if (entry->bytes < bytes) + *max_extent_size = max(get_max_extent_size(entry), + *max_extent_size); if (entry->bytes < bytes || (!entry->bitmap && entry->offset < min_start)) { From b234c2a058f08982af156a19ec49910361e1b98e Mon Sep 17 00:00:00 2001 From: Shaohua Li Date: Sun, 14 Oct 2018 17:05:07 -0700 Subject: [PATCH 459/812] MD: fix invalid stored role for a disk - try2 commit 9e753ba9b9b405e3902d9f08aec5f2ea58a0c317 upstream. Commit d595567dc4f0 (MD: fix invalid stored role for a disk) broke linear hotadd. Let's only fix the role for disks in raid1/10. Based on Guoqing's original patch. Reported-by: kernel test robot Cc: Gioh Kim Cc: Guoqing Jiang Signed-off-by: Shaohua Li Signed-off-by: Greg Kroah-Hartman --- drivers/md/md.c | 4 ---- drivers/md/raid1.c | 1 + drivers/md/raid10.c | 1 + 3 files changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/md/md.c b/drivers/md/md.c index 4caf240693cd..07f307402351 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -1670,10 +1670,6 @@ static int super_1_validate(struct mddev *mddev, struct md_rdev *rdev) } else set_bit(In_sync, &rdev->flags); rdev->raid_disk = role; - if (role >= mddev->raid_disks) { - rdev->saved_raid_disk = -1; - rdev->raid_disk = -1; - } break; } if (sb->devflags & WriteMostly1) diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index 89dcbf2fa846..82e284d2b202 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -1605,6 +1605,7 @@ static int raid1_add_disk(struct mddev *mddev, struct md_rdev *rdev) */ if (rdev->saved_raid_disk >= 0 && rdev->saved_raid_disk >= first && + rdev->saved_raid_disk < conf->raid_disks && conf->mirrors[rdev->saved_raid_disk].rdev == NULL) first = last = rdev->saved_raid_disk; diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index 89111d455b71..8d613652d0e2 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -1737,6 +1737,7 @@ static int raid10_add_disk(struct mddev *mddev, struct md_rdev *rdev) first = last = rdev->raid_disk; if (rdev->saved_raid_disk >= first && + rdev->saved_raid_disk < conf->geo.raid_disks && conf->mirrors[rdev->saved_raid_disk].rdev == NULL) mirror = rdev->saved_raid_disk; else From 7c7378d4c57af308257faf6a5d8af49970a8e218 Mon Sep 17 00:00:00 2001 From: Miles Chen Date: Mon, 8 Oct 2018 10:39:17 +0800 Subject: [PATCH 460/812] tty: check name length in tty_find_polling_driver() [ Upstream commit 33a1a7be198657c8ca26ad406c4d2a89b7162bcc ] The issue is found by a fuzzing test. If tty_find_polling_driver() recevies an incorrect input such as ',,' or '0b', the len becomes 0 and strncmp() always return 0. In this case, a null p->ops->poll_init() is called and it causes a kernel panic. Fix this by checking name length against zero in tty_find_polling_driver(). $echo ,, > /sys/module/kgdboc/parameters/kgdboc [ 20.804451] WARNING: CPU: 1 PID: 104 at drivers/tty/serial/serial_core.c:457 uart_get_baud_rate+0xe8/0x190 [ 20.804917] Modules linked in: [ 20.805317] CPU: 1 PID: 104 Comm: sh Not tainted 4.19.0-rc7ajb #8 [ 20.805469] Hardware name: linux,dummy-virt (DT) [ 20.805732] pstate: 20000005 (nzCv daif -PAN -UAO) [ 20.805895] pc : uart_get_baud_rate+0xe8/0x190 [ 20.806042] lr : uart_get_baud_rate+0xc0/0x190 [ 20.806476] sp : ffffffc06acff940 [ 20.806676] x29: ffffffc06acff940 x28: 0000000000002580 [ 20.806977] x27: 0000000000009600 x26: 0000000000009600 [ 20.807231] x25: ffffffc06acffad0 x24: 00000000ffffeff0 [ 20.807576] x23: 0000000000000001 x22: 0000000000000000 [ 20.807807] x21: 0000000000000001 x20: 0000000000000000 [ 20.808049] x19: ffffffc06acffac8 x18: 0000000000000000 [ 20.808277] x17: 0000000000000000 x16: 0000000000000000 [ 20.808520] x15: ffffffffffffffff x14: ffffffff00000000 [ 20.808757] x13: ffffffffffffffff x12: 0000000000000001 [ 20.809011] x11: 0101010101010101 x10: ffffff880d59ff5f [ 20.809292] x9 : ffffff880d59ff5e x8 : ffffffc06acffaf3 [ 20.809549] x7 : 0000000000000000 x6 : ffffff880d59ff5f [ 20.809803] x5 : 0000000080008001 x4 : 0000000000000003 [ 20.810056] x3 : ffffff900853e6b4 x2 : dfffff9000000000 [ 20.810693] x1 : ffffffc06acffad0 x0 : 0000000000000cb0 [ 20.811005] Call trace: [ 20.811214] uart_get_baud_rate+0xe8/0x190 [ 20.811479] serial8250_do_set_termios+0xe0/0x6f4 [ 20.811719] serial8250_set_termios+0x48/0x54 [ 20.811928] uart_set_options+0x138/0x1bc [ 20.812129] uart_poll_init+0x114/0x16c [ 20.812330] tty_find_polling_driver+0x158/0x200 [ 20.812545] configure_kgdboc+0xbc/0x1bc [ 20.812745] param_set_kgdboc_var+0xb8/0x150 [ 20.812960] param_attr_store+0xbc/0x150 [ 20.813160] module_attr_store+0x40/0x58 [ 20.813364] sysfs_kf_write+0x8c/0xa8 [ 20.813563] kernfs_fop_write+0x154/0x290 [ 20.813764] vfs_write+0xf0/0x278 [ 20.813951] __arm64_sys_write+0x84/0xf4 [ 20.814400] el0_svc_common+0xf4/0x1dc [ 20.814616] el0_svc_handler+0x98/0xbc [ 20.814804] el0_svc+0x8/0xc [ 20.822005] Unable to handle kernel NULL pointer dereference at virtual address 0000000000000000 [ 20.826913] Mem abort info: [ 20.827103] ESR = 0x84000006 [ 20.827352] Exception class = IABT (current EL), IL = 16 bits [ 20.827655] SET = 0, FnV = 0 [ 20.827855] EA = 0, S1PTW = 0 [ 20.828135] user pgtable: 4k pages, 39-bit VAs, pgdp = (____ptrval____) [ 20.828484] [0000000000000000] pgd=00000000aadee003, pud=00000000aadee003, pmd=0000000000000000 [ 20.829195] Internal error: Oops: 84000006 [#1] SMP [ 20.829564] Modules linked in: [ 20.829890] CPU: 1 PID: 104 Comm: sh Tainted: G W 4.19.0-rc7ajb #8 [ 20.830545] Hardware name: linux,dummy-virt (DT) [ 20.830829] pstate: 60000085 (nZCv daIf -PAN -UAO) [ 20.831174] pc : (null) [ 20.831457] lr : serial8250_do_set_termios+0x358/0x6f4 [ 20.831727] sp : ffffffc06acff9b0 [ 20.831936] x29: ffffffc06acff9b0 x28: ffffff9008d7c000 [ 20.832267] x27: ffffff900969e16f x26: 0000000000000000 [ 20.832589] x25: ffffff900969dfb0 x24: 0000000000000000 [ 20.832906] x23: ffffffc06acffad0 x22: ffffff900969e160 [ 20.833232] x21: 0000000000000000 x20: ffffffc06acffac8 [ 20.833559] x19: ffffff900969df90 x18: 0000000000000000 [ 20.833878] x17: 0000000000000000 x16: 0000000000000000 [ 20.834491] x15: ffffffffffffffff x14: ffffffff00000000 [ 20.834821] x13: ffffffffffffffff x12: 0000000000000001 [ 20.835143] x11: 0101010101010101 x10: ffffff880d59ff5f [ 20.835467] x9 : ffffff880d59ff5e x8 : ffffffc06acffaf3 [ 20.835790] x7 : 0000000000000000 x6 : ffffff880d59ff5f [ 20.836111] x5 : c06419717c314100 x4 : 0000000000000007 [ 20.836419] x3 : 0000000000000000 x2 : 0000000000000000 [ 20.836732] x1 : 0000000000000001 x0 : ffffff900969df90 [ 20.837100] Process sh (pid: 104, stack limit = 0x(____ptrval____)) [ 20.837396] Call trace: [ 20.837566] (null) [ 20.837816] serial8250_set_termios+0x48/0x54 [ 20.838089] uart_set_options+0x138/0x1bc [ 20.838570] uart_poll_init+0x114/0x16c [ 20.838834] tty_find_polling_driver+0x158/0x200 [ 20.839119] configure_kgdboc+0xbc/0x1bc [ 20.839380] param_set_kgdboc_var+0xb8/0x150 [ 20.839658] param_attr_store+0xbc/0x150 [ 20.839920] module_attr_store+0x40/0x58 [ 20.840183] sysfs_kf_write+0x8c/0xa8 [ 20.840183] sysfs_kf_write+0x8c/0xa8 [ 20.840440] kernfs_fop_write+0x154/0x290 [ 20.840702] vfs_write+0xf0/0x278 [ 20.840942] __arm64_sys_write+0x84/0xf4 [ 20.841209] el0_svc_common+0xf4/0x1dc [ 20.841471] el0_svc_handler+0x98/0xbc [ 20.841713] el0_svc+0x8/0xc [ 20.842057] Code: bad PC value [ 20.842764] ---[ end trace a8835d7de79aaadf ]--- [ 20.843134] Kernel panic - not syncing: Fatal exception [ 20.843515] SMP: stopping secondary CPUs [ 20.844289] Kernel Offset: disabled [ 20.844634] CPU features: 0x0,21806002 [ 20.844857] Memory Limit: none [ 20.845172] ---[ end Kernel panic - not syncing: Fatal exception ]--- Signed-off-by: Miles Chen Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/tty/tty_io.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/tty/tty_io.c b/drivers/tty/tty_io.c index 198451fa9e5d..c1cff2b455ae 100644 --- a/drivers/tty/tty_io.c +++ b/drivers/tty/tty_io.c @@ -357,7 +357,7 @@ struct tty_driver *tty_find_polling_driver(char *name, int *line) mutex_lock(&tty_mutex); /* Search through the tty devices to look for a match */ list_for_each_entry(p, &tty_drivers, tty_drivers) { - if (strncmp(name, p->name, len) != 0) + if (!len || strncmp(name, p->name, len) != 0) continue; stp = str; if (*stp == ',') From 1e8628eb97a1a218b9180446f154a85451fb69e3 Mon Sep 17 00:00:00 2001 From: Daniel Axtens Date: Mon, 1 Oct 2018 16:21:51 +1000 Subject: [PATCH 461/812] powerpc/nohash: fix undefined behaviour when testing page size support [ Upstream commit f5e284803a7206d43e26f9ffcae5de9626d95e37 ] When enumerating page size definitions to check hardware support, we construct a constant which is (1U << (def->shift - 10)). However, the array of page size definitions is only initalised for various MMU_PAGE_* constants, so it contains a number of 0-initialised elements with def->shift == 0. This means we end up shifting by a very large number, which gives the following UBSan splat: ================================================================================ UBSAN: Undefined behaviour in /home/dja/dev/linux/linux/arch/powerpc/mm/tlb_nohash.c:506:21 shift exponent 4294967286 is too large for 32-bit type 'unsigned int' CPU: 0 PID: 0 Comm: swapper Not tainted 4.19.0-rc3-00045-ga604f927b012-dirty #6 Call Trace: [c00000000101bc20] [c000000000a13d54] .dump_stack+0xa8/0xec (unreliable) [c00000000101bcb0] [c0000000004f20a8] .ubsan_epilogue+0x18/0x64 [c00000000101bd30] [c0000000004f2b10] .__ubsan_handle_shift_out_of_bounds+0x110/0x1a4 [c00000000101be20] [c000000000d21760] .early_init_mmu+0x1b4/0x5a0 [c00000000101bf10] [c000000000d1ba28] .early_setup+0x100/0x130 [c00000000101bf90] [c000000000000528] start_here_multiplatform+0x68/0x80 ================================================================================ Fix this by first checking if the element exists (shift != 0) before constructing the constant. Signed-off-by: Daniel Axtens Signed-off-by: Michael Ellerman Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/mm/tlb_nohash.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/powerpc/mm/tlb_nohash.c b/arch/powerpc/mm/tlb_nohash.c index bb04e4df3100..1b784b8fd8b4 100644 --- a/arch/powerpc/mm/tlb_nohash.c +++ b/arch/powerpc/mm/tlb_nohash.c @@ -487,6 +487,9 @@ static void setup_page_sizes(void) for (psize = 0; psize < MMU_PAGE_COUNT; ++psize) { struct mmu_psize_def *def = &mmu_psize_defs[psize]; + if (!def->shift) + continue; + if (tlb1ps & (1U << (def->shift - 10))) { def->flags |= MMU_PAGE_SIZE_DIRECT; From f79cc6a2b86d46bd7f0a689e9c891d25c8588112 Mon Sep 17 00:00:00 2001 From: Tomi Valkeinen Date: Wed, 26 Sep 2018 12:11:27 +0300 Subject: [PATCH 462/812] drm/omap: fix memory barrier bug in DMM driver [ Upstream commit 538f66ba204944470a653a4cccc5f8befdf97c22 ] A DMM timeout "timed out waiting for done" has been observed on DRA7 devices. The timeout happens rarely, and only when the system is under heavy load. Debugging showed that the timeout can be made to happen much more frequently by optimizing the DMM driver, so that there's almost no code between writing the last DMM descriptors to RAM, and writing to DMM register which starts the DMM transaction. The current theory is that a wmb() does not properly ensure that the data written to RAM is observable by all the components in the system. This DMM timeout has caused interesting (and rare) bugs as the error handling was not functioning properly (the error handling has been fixed in previous commits): * If a DMM timeout happened when a GEM buffer was being pinned for display on the screen, a timeout error would be shown, but the driver would continue programming DSS HW with broken buffer, leading to SYNCLOST floods and possible crashes. * If a DMM timeout happened when other user (say, video decoder) was pinning a GEM buffer, a timeout would be shown but if the user handled the error properly, no other issues followed. * If a DMM timeout happened when a GEM buffer was being released, the driver does not even notice the error, leading to crashes or hang later. This patch adds wmb() and readl() calls after the last bit is written to RAM, which should ensure that the execution proceeds only after the data is actually in RAM, and thus observable by DMM. The read-back should not be needed. Further study is required to understand if DMM is somehow special case and read-back is ok, or if DRA7's memory barriers do not work correctly. Signed-off-by: Tomi Valkeinen Signed-off-by: Peter Ujfalusi Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/omapdrm/omap_dmm_tiler.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/drivers/gpu/drm/omapdrm/omap_dmm_tiler.c b/drivers/gpu/drm/omapdrm/omap_dmm_tiler.c index 083db3f5181f..8282ae0c4fc3 100644 --- a/drivers/gpu/drm/omapdrm/omap_dmm_tiler.c +++ b/drivers/gpu/drm/omapdrm/omap_dmm_tiler.c @@ -262,6 +262,17 @@ static int dmm_txn_commit(struct dmm_txn *txn, bool wait) } txn->last_pat->next_pa = 0; + /* ensure that the written descriptors are visible to DMM */ + wmb(); + + /* + * NOTE: the wmb() above should be enough, but there seems to be a bug + * in OMAP's memory barrier implementation, which in some rare cases may + * cause the writes not to be observable after wmb(). + */ + + /* read back to ensure the data is in RAM */ + readl(&txn->last_pat->next_pa); /* write to PAT_DESCR to clear out any pending transaction */ writel(0x0, dmm->base + reg[PAT_DESCR][engine->id]); From 1664dd1fbd0f8c0d7aa5c834311b7bcd83b625bc Mon Sep 17 00:00:00 2001 From: Nicholas Mc Guire Date: Sun, 9 Sep 2018 12:02:32 -0400 Subject: [PATCH 463/812] media: pci: cx23885: handle adding to list failure [ Upstream commit c5d59528e24ad22500347b199d52b9368e686a42 ] altera_hw_filt_init() which calls append_internal() assumes that the node was successfully linked in while in fact it can silently fail. So the call-site needs to set return to -ENOMEM on append_internal() returning NULL and exit through the err path. Fixes: 349bcf02e361 ("[media] Altera FPGA based CI driver module") Signed-off-by: Nicholas Mc Guire Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/media/pci/cx23885/altera-ci.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/media/pci/cx23885/altera-ci.c b/drivers/media/pci/cx23885/altera-ci.c index aaf4e46ff3e9..a0c1ff97f905 100644 --- a/drivers/media/pci/cx23885/altera-ci.c +++ b/drivers/media/pci/cx23885/altera-ci.c @@ -660,6 +660,10 @@ static int altera_hw_filt_init(struct altera_ci_config *config, int hw_filt_nr) } temp_int = append_internal(inter); + if (!temp_int) { + ret = -ENOMEM; + goto err; + } inter->filts_used = 1; inter->dev = config->dev; inter->fpga_rw = config->fpga_rw; @@ -694,6 +698,7 @@ static int altera_hw_filt_init(struct altera_ci_config *config, int hw_filt_nr) __func__, ret); kfree(pid_filt); + kfree(inter); return ret; } @@ -728,6 +733,10 @@ int altera_ci_init(struct altera_ci_config *config, int ci_nr) } temp_int = append_internal(inter); + if (!temp_int) { + ret = -ENOMEM; + goto err; + } inter->cis_used = 1; inter->dev = config->dev; inter->fpga_rw = config->fpga_rw; @@ -796,6 +805,7 @@ int altera_ci_init(struct altera_ci_config *config, int ci_nr) ci_dbg_print("%s: Cannot initialize CI: Error %d.\n", __func__, ret); kfree(state); + kfree(inter); return ret; } From c5e2c6f92422466ffb9fc647e9cd92ce46ba5b29 Mon Sep 17 00:00:00 2001 From: Dengcheng Zhu Date: Tue, 11 Sep 2018 14:49:20 -0700 Subject: [PATCH 464/812] MIPS: kexec: Mark CPU offline before disabling local IRQ [ Upstream commit dc57aaf95a516f70e2d527d8287a0332c481a226 ] After changing CPU online status, it will not be sent any IPIs such as in __flush_cache_all() on software coherency systems. Do this before disabling local IRQ. Signed-off-by: Dengcheng Zhu Signed-off-by: Paul Burton Patchwork: https://patchwork.linux-mips.org/patch/20571/ Cc: pburton@wavecomp.com Cc: ralf@linux-mips.org Cc: linux-mips@linux-mips.org Cc: rachel.mozes@intel.com Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/mips/kernel/crash.c | 3 +++ arch/mips/kernel/machine_kexec.c | 3 +++ 2 files changed, 6 insertions(+) diff --git a/arch/mips/kernel/crash.c b/arch/mips/kernel/crash.c index 610f0f3bdb34..93c46c9cebb7 100644 --- a/arch/mips/kernel/crash.c +++ b/arch/mips/kernel/crash.c @@ -34,6 +34,9 @@ static void crash_shutdown_secondary(void *passed_regs) if (!cpu_online(cpu)) return; + /* We won't be sent IPIs any more. */ + set_cpu_online(cpu, false); + local_irq_disable(); if (!cpumask_test_cpu(cpu, &cpus_in_crash)) crash_save_cpu(regs, cpu); diff --git a/arch/mips/kernel/machine_kexec.c b/arch/mips/kernel/machine_kexec.c index 50980bf3983e..92bc066e47a3 100644 --- a/arch/mips/kernel/machine_kexec.c +++ b/arch/mips/kernel/machine_kexec.c @@ -95,6 +95,9 @@ machine_kexec(struct kimage *image) *ptr = (unsigned long) phys_to_virt(*ptr); } + /* Mark offline BEFORE disabling local irq. */ + set_cpu_online(smp_processor_id(), false); + /* * we do not want to be bothered. */ From c269f0b61c3d87d8c2e173322dbefd7712966f99 Mon Sep 17 00:00:00 2001 From: Joel Stanley Date: Fri, 14 Sep 2018 13:36:47 +0930 Subject: [PATCH 465/812] powerpc/boot: Ensure _zimage_start is a weak symbol [ Upstream commit ee9d21b3b3583712029a0db65a4b7c081d08d3b3 ] When building with clang crt0's _zimage_start is not marked weak, which breaks the build when linking the kernel image: $ objdump -t arch/powerpc/boot/crt0.o |grep _zimage_start$ 0000000000000058 g .text 0000000000000000 _zimage_start ld: arch/powerpc/boot/wrapper.a(crt0.o): in function '_zimage_start': (.text+0x58): multiple definition of '_zimage_start'; arch/powerpc/boot/pseries-head.o:(.text+0x0): first defined here Clang requires the .weak directive to appear after the symbol is declared. The binutils manual says: This directive sets the weak attribute on the comma separated list of symbol names. If the symbols do not already exist, they will be created. So it appears this is different with clang. The only reference I could see for this was an OpenBSD mailing list post[1]. Changing it to be after the declaration fixes building with Clang, and still works with GCC. $ objdump -t arch/powerpc/boot/crt0.o |grep _zimage_start$ 0000000000000058 w .text 0000000000000000 _zimage_start Reported to clang as https://bugs.llvm.org/show_bug.cgi?id=38921 [1] https://groups.google.com/forum/#!topic/fa.openbsd.tech/PAgKKen2YCY Signed-off-by: Joel Stanley Reviewed-by: Nick Desaulniers Signed-off-by: Michael Ellerman Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/boot/crt0.S | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/boot/crt0.S b/arch/powerpc/boot/crt0.S index 12866ccb5694..5c2199857aa8 100644 --- a/arch/powerpc/boot/crt0.S +++ b/arch/powerpc/boot/crt0.S @@ -47,8 +47,10 @@ p_end: .long _end p_pstack: .long _platform_stack_top #endif - .weak _zimage_start .globl _zimage_start + /* Clang appears to require the .weak directive to be after the symbol + * is defined. See https://bugs.llvm.org/show_bug.cgi?id=38921 */ + .weak _zimage_start _zimage_start: .globl _zimage_start_lib _zimage_start_lib: From 06bbe23870049ca0bc1541d005efb0b2bdfd9486 Mon Sep 17 00:00:00 2001 From: Phil Elwell Date: Wed, 12 Sep 2018 15:31:55 +0100 Subject: [PATCH 466/812] sc16is7xx: Fix for multi-channel stall [ Upstream commit 8344498721059754e09d30fe255a12dab8fb03ef ] The SC16IS752 is a dual-channel device. The two channels are largely independent, but the IRQ signals are wired together as an open-drain, active low signal which will be driven low while either of the channels requires attention, which can be for significant periods of time until operations complete and the interrupt can be acknowledged. In that respect it is should be treated as a true level-sensitive IRQ. The kernel, however, needs to be able to exit interrupt context in order to use I2C or SPI to access the device registers (which may involve sleeping). Therefore the interrupt needs to be masked out or paused in some way. The usual way to manage sleeping from within an interrupt handler is to use a threaded interrupt handler - a regular interrupt routine does the minimum amount of work needed to triage the interrupt before waking the interrupt service thread. If the threaded IRQ is marked as IRQF_ONESHOT the kernel will automatically mask out the interrupt until the thread runs to completion. The sc16is7xx driver used to use a threaded IRQ, but a patch switched to using a kthread_worker in order to set realtime priorities on the handler thread and for other optimisations. The end result is non-threaded IRQ that schedules some work then returns IRQ_HANDLED, making the kernel think that all IRQ processing has completed. The work-around to prevent a constant stream of interrupts is to mark the interrupt as edge-sensitive rather than level-sensitive, but interpreting an active-low source as a falling-edge source requires care to prevent a total cessation of interrupts. Whereas an edge-triggering source will generate a new edge for every interrupt condition a level-triggering source will keep the signal at the interrupting level until it no longer requires attention; in other words, the host won't see another edge until all interrupt conditions are cleared. It is therefore vital that the interrupt handler does not exit with an outstanding interrupt condition, otherwise the kernel will not receive another interrupt unless some other operation causes the interrupt state on the device to be cleared. The existing sc16is7xx driver has a very simple interrupt "thread" (kthread_work job) that processes interrupts on each channel in turn until there are no more. If both channels are active and the first channel starts interrupting while the handler for the second channel is running then it will not be detected and an IRQ stall ensues. This could be handled easily if there was a shared IRQ status register, or a convenient way to determine if the IRQ had been deasserted for any length of time, but both appear to be lacking. Avoid this problem (or at least make it much less likely to happen) by reducing the granularity of per-channel interrupt processing to one condition per iteration, only exiting the overall loop when both channels are no longer interrupting. Signed-off-by: Phil Elwell Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/sc16is7xx.c | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/drivers/tty/serial/sc16is7xx.c b/drivers/tty/serial/sc16is7xx.c index 7d5ee8a13ac6..17a22073d226 100644 --- a/drivers/tty/serial/sc16is7xx.c +++ b/drivers/tty/serial/sc16is7xx.c @@ -648,7 +648,7 @@ static void sc16is7xx_handle_tx(struct uart_port *port) uart_write_wakeup(port); } -static void sc16is7xx_port_irq(struct sc16is7xx_port *s, int portno) +static bool sc16is7xx_port_irq(struct sc16is7xx_port *s, int portno) { struct uart_port *port = &s->p[portno].port; @@ -657,7 +657,7 @@ static void sc16is7xx_port_irq(struct sc16is7xx_port *s, int portno) iir = sc16is7xx_port_read(port, SC16IS7XX_IIR_REG); if (iir & SC16IS7XX_IIR_NO_INT_BIT) - break; + return false; iir &= SC16IS7XX_IIR_ID_MASK; @@ -685,16 +685,23 @@ static void sc16is7xx_port_irq(struct sc16is7xx_port *s, int portno) port->line, iir); break; } - } while (1); + } while (0); + return true; } static void sc16is7xx_ist(struct kthread_work *ws) { struct sc16is7xx_port *s = to_sc16is7xx_port(ws, irq_work); - int i; - for (i = 0; i < s->devtype->nr_uart; ++i) - sc16is7xx_port_irq(s, i); + while (1) { + bool keep_polling = false; + int i; + + for (i = 0; i < s->devtype->nr_uart; ++i) + keep_polling |= sc16is7xx_port_irq(s, i); + if (!keep_polling) + break; + } } static irqreturn_t sc16is7xx_irq(int irq, void *dev_id) From 155bd1c4bfc949a2b3250d7136a361e99d38e635 Mon Sep 17 00:00:00 2001 From: Marco Felsch Date: Thu, 28 Jun 2018 12:20:33 -0400 Subject: [PATCH 467/812] media: tvp5150: fix width alignment during set_selection() [ Upstream commit bd24db04101f45a9c1d874fe21b0c7eab7bcadec ] The driver ignored the width alignment which exists due to the UYVY colorspace format. Fix the width alignment and make use of the the provided v4l2 helper function to set the width, height and all alignments in one. Fixes: 963ddc63e20d ("[media] media: tvp5150: Add cropping support") Signed-off-by: Marco Felsch Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/media/i2c/tvp5150.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/drivers/media/i2c/tvp5150.c b/drivers/media/i2c/tvp5150.c index 3c5fb2509c47..118277d57c30 100644 --- a/drivers/media/i2c/tvp5150.c +++ b/drivers/media/i2c/tvp5150.c @@ -870,9 +870,6 @@ static int tvp5150_s_crop(struct v4l2_subdev *sd, const struct v4l2_crop *a) /* tvp5150 has some special limits */ rect.left = clamp(rect.left, 0, TVP5150_MAX_CROP_LEFT); - rect.width = clamp_t(unsigned int, rect.width, - TVP5150_H_MAX - TVP5150_MAX_CROP_LEFT - rect.left, - TVP5150_H_MAX - rect.left); rect.top = clamp(rect.top, 0, TVP5150_MAX_CROP_TOP); /* Calculate height based on current standard */ @@ -886,9 +883,16 @@ static int tvp5150_s_crop(struct v4l2_subdev *sd, const struct v4l2_crop *a) else hmax = TVP5150_V_MAX_OTHERS; - rect.height = clamp_t(unsigned int, rect.height, + /* + * alignments: + * - width = 2 due to UYVY colorspace + * - height, image = no special alignment + */ + v4l_bound_align_image(&rect.width, + TVP5150_H_MAX - TVP5150_MAX_CROP_LEFT - rect.left, + TVP5150_H_MAX - rect.left, 1, &rect.height, hmax - TVP5150_MAX_CROP_TOP - rect.top, - hmax - rect.top); + hmax - rect.top, 0, 0); tvp5150_write(sd, TVP5150_VERT_BLANKING_START, rect.top); tvp5150_write(sd, TVP5150_VERT_BLANKING_STOP, From 55e464966baf3e57374540f058ab68d6dce15f32 Mon Sep 17 00:00:00 2001 From: Dominique Martinet Date: Sat, 8 Sep 2018 01:18:43 +0900 Subject: [PATCH 468/812] 9p locks: fix glock.client_id leak in do_lock [ Upstream commit b4dc44b3cac9e8327e0655f530ed0c46f2e6214c ] the 9p client code overwrites our glock.client_id pointing to a static buffer by an allocated string holding the network provided value which we do not care about; free and reset the value as appropriate. This is almost identical to the leak in v9fs_file_getlock() fixed by Al Viro in commit ce85dd58ad5a6 ("9p: we are leaking glock.client_id in v9fs_file_getlock()"), which was returned as an error by a coverity false positive -- while we are here attempt to make the code slightly more robust to future change of the net/9p/client code and hopefully more clear to coverity that there is no problem. Link: http://lkml.kernel.org/r/1536339057-21974-5-git-send-email-asmadeus@codewreck.org Signed-off-by: Dominique Martinet Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- fs/9p/vfs_file.c | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c index 12ceaf52dae6..e7b3d2c4472d 100644 --- a/fs/9p/vfs_file.c +++ b/fs/9p/vfs_file.c @@ -204,6 +204,14 @@ static int v9fs_file_do_lock(struct file *filp, int cmd, struct file_lock *fl) break; if (schedule_timeout_interruptible(P9_LOCK_TIMEOUT) != 0) break; + /* + * p9_client_lock_dotl overwrites flock.client_id with the + * server message, free and reuse the client name + */ + if (flock.client_id != fid->clnt->name) { + kfree(flock.client_id); + flock.client_id = fid->clnt->name; + } } /* map 9p status to VFS status */ @@ -235,6 +243,8 @@ static int v9fs_file_do_lock(struct file *filp, int cmd, struct file_lock *fl) locks_lock_file_wait(filp, fl); fl->fl_type = fl_type; } + if (flock.client_id != fid->clnt->name) + kfree(flock.client_id); out: return res; } @@ -269,7 +279,7 @@ static int v9fs_file_getlock(struct file *filp, struct file_lock *fl) res = p9_client_getlock_dotl(fid, &glock); if (res < 0) - return res; + goto out; /* map 9p lock type to os lock type */ switch (glock.type) { case P9_LOCK_TYPE_RDLCK: @@ -290,7 +300,9 @@ static int v9fs_file_getlock(struct file *filp, struct file_lock *fl) fl->fl_end = glock.start + glock.length - 1; fl->fl_pid = glock.proc_id; } - kfree(glock.client_id); +out: + if (glock.client_id != fid->clnt->name) + kfree(glock.client_id); return res; } From 1c6c5d0c1a432d7164662d653da56c57483df063 Mon Sep 17 00:00:00 2001 From: Dominique Martinet Date: Tue, 28 Aug 2018 07:32:35 +0900 Subject: [PATCH 469/812] 9p: clear dangling pointers in p9stat_free [ Upstream commit 62e3941776fea8678bb8120607039410b1b61a65 ] p9stat_free is more of a cleanup function than a 'free' function as it only frees the content of the struct; there are chances of use-after-free if it is improperly used (e.g. p9stat_free called twice as it used to be possible to) Clearing dangling pointers makes the function idempotent and safer to use. Link: http://lkml.kernel.org/r/1535410108-20650-2-git-send-email-asmadeus@codewreck.org Signed-off-by: Dominique Martinet Reported-by: syzbot+d4252148d198410b864f@syzkaller.appspotmail.com Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- net/9p/protocol.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/net/9p/protocol.c b/net/9p/protocol.c index 16d287565987..145f80518064 100644 --- a/net/9p/protocol.c +++ b/net/9p/protocol.c @@ -46,10 +46,15 @@ p9pdu_writef(struct p9_fcall *pdu, int proto_version, const char *fmt, ...); void p9stat_free(struct p9_wstat *stbuf) { kfree(stbuf->name); + stbuf->name = NULL; kfree(stbuf->uid); + stbuf->uid = NULL; kfree(stbuf->gid); + stbuf->gid = NULL; kfree(stbuf->muid); + stbuf->muid = NULL; kfree(stbuf->extension); + stbuf->extension = NULL; } EXPORT_SYMBOL(p9stat_free); From 661aa0b46dfb23700b569ac319b95e0b0154832f Mon Sep 17 00:00:00 2001 From: Young_X Date: Wed, 3 Oct 2018 12:54:29 +0000 Subject: [PATCH 470/812] cdrom: fix improper type cast, which can leat to information leak. commit e4f3aa2e1e67bb48dfbaaf1cad59013d5a5bc276 upstream. There is another cast from unsigned long to int which causes a bounds check to fail with specially crafted input. The value is then used as an index in the slot array in cdrom_slot_status(). This issue is similar to CVE-2018-16658 and CVE-2018-10940. Signed-off-by: Young_X Signed-off-by: Jens Axboe Cc: Ben Hutchings Signed-off-by: Greg Kroah-Hartman --- drivers/cdrom/cdrom.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/cdrom/cdrom.c b/drivers/cdrom/cdrom.c index 1012b2cb6a16..d203940203b6 100644 --- a/drivers/cdrom/cdrom.c +++ b/drivers/cdrom/cdrom.c @@ -2425,7 +2425,7 @@ static int cdrom_ioctl_select_disc(struct cdrom_device_info *cdi, return -ENOSYS; if (arg != CDSL_CURRENT && arg != CDSL_NONE) { - if ((int)arg >= cdi->capacity) + if (arg >= cdi->capacity) return -EINVAL; } From e304bf93ed2b00560158fb2cf19ebc3b976f27de Mon Sep 17 00:00:00 2001 From: Himanshu Madhani Date: Fri, 31 Aug 2018 11:24:27 -0700 Subject: [PATCH 471/812] scsi: qla2xxx: Fix incorrect port speed being set for FC adapters commit 4c1458df9635c7e3ced155f594d2e7dfd7254e21 upstream. Fixes: 6246b8a1d26c7c ("[SCSI] qla2xxx: Enhancements to support ISP83xx.") Fixes: 1bb395485160d2 ("qla2xxx: Correct iiDMA-update calling conventions.") Cc: Signed-off-by: Himanshu Madhani Signed-off-by: Martin K. Petersen Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/qla2xxx/qla_mbx.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/scsi/qla2xxx/qla_mbx.c b/drivers/scsi/qla2xxx/qla_mbx.c index cb11e04be568..87059a6786f4 100644 --- a/drivers/scsi/qla2xxx/qla_mbx.c +++ b/drivers/scsi/qla2xxx/qla_mbx.c @@ -3315,10 +3315,7 @@ qla2x00_set_idma_speed(scsi_qla_host_t *vha, uint16_t loop_id, mcp->mb[0] = MBC_PORT_PARAMS; mcp->mb[1] = loop_id; mcp->mb[2] = BIT_0; - if (IS_CNA_CAPABLE(vha->hw)) - mcp->mb[3] = port_speed & (BIT_5|BIT_4|BIT_3|BIT_2|BIT_1|BIT_0); - else - mcp->mb[3] = port_speed & (BIT_2|BIT_1|BIT_0); + mcp->mb[3] = port_speed & (BIT_5|BIT_4|BIT_3|BIT_2|BIT_1|BIT_0); mcp->mb[9] = vha->vp_idx; mcp->out_mb = MBX_9|MBX_3|MBX_2|MBX_1|MBX_0; mcp->in_mb = MBX_3|MBX_1|MBX_0; From 7574afe0cfc0e103f309a721880d195f38b292e0 Mon Sep 17 00:00:00 2001 From: Kirill Tkhai Date: Tue, 25 Sep 2018 12:28:55 +0300 Subject: [PATCH 472/812] fuse: Fix use-after-free in fuse_dev_do_read() commit bc78abbd55dd28e2287ec6d6502b842321a17c87 upstream. We may pick freed req in this way: [cpu0] [cpu1] fuse_dev_do_read() fuse_dev_do_write() list_move_tail(&req->list, ...); ... spin_unlock(&fpq->lock); ... ... request_end(fc, req); ... fuse_put_request(fc, req); if (test_bit(FR_INTERRUPTED, ...)) queue_interrupt(fiq, req); Fix that by keeping req alive until we finish all manipulations. Reported-by: syzbot+4e975615ca01f2277bdd@syzkaller.appspotmail.com Signed-off-by: Kirill Tkhai Signed-off-by: Miklos Szeredi Fixes: 46c34a348b0a ("fuse: no fc->lock for pqueue parts") Cc: # v4.2 Signed-off-by: Greg Kroah-Hartman --- fs/fuse/dev.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index 2671e922c720..f47253a0502b 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c @@ -1328,12 +1328,14 @@ static ssize_t fuse_dev_do_read(struct fuse_dev *fud, struct file *file, goto out_end; } list_move_tail(&req->list, &fpq->processing); + __fuse_get_request(req); spin_unlock(&fpq->lock); set_bit(FR_SENT, &req->flags); /* matches barrier in request_wait_answer() */ smp_mb__after_atomic(); if (test_bit(FR_INTERRUPTED, &req->flags)) queue_interrupt(fiq, req); + fuse_put_request(fc, req); return reqsize; From 8bb4354af373a8af395450acd298f25ddb79d93b Mon Sep 17 00:00:00 2001 From: Kirill Tkhai Date: Tue, 25 Sep 2018 12:52:42 +0300 Subject: [PATCH 473/812] fuse: Fix use-after-free in fuse_dev_do_write() commit d2d2d4fb1f54eff0f3faa9762d84f6446a4bc5d0 upstream. After we found req in request_find() and released the lock, everything may happen with the req in parallel: cpu0 cpu1 fuse_dev_do_write() fuse_dev_do_write() req = request_find(fpq, ...) ... spin_unlock(&fpq->lock) ... ... req = request_find(fpq, oh.unique) ... spin_unlock(&fpq->lock) queue_interrupt(&fc->iq, req); ... ... ... ... ... request_end(fc, req); fuse_put_request(fc, req); ... queue_interrupt(&fc->iq, req); Signed-off-by: Kirill Tkhai Signed-off-by: Miklos Szeredi Fixes: 46c34a348b0a ("fuse: no fc->lock for pqueue parts") Cc: # v4.2 Signed-off-by: Greg Kroah-Hartman --- fs/fuse/dev.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index f47253a0502b..78225f1f1902 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c @@ -1924,16 +1924,20 @@ static ssize_t fuse_dev_do_write(struct fuse_dev *fud, /* Is it an interrupt reply? */ if (req->intr_unique == oh.unique) { + __fuse_get_request(req); spin_unlock(&fpq->lock); err = -EINVAL; - if (nbytes != sizeof(struct fuse_out_header)) + if (nbytes != sizeof(struct fuse_out_header)) { + fuse_put_request(fc, req); goto err_finish; + } if (oh.error == -ENOSYS) fc->no_interrupt = 1; else if (oh.error == -EAGAIN) queue_interrupt(&fc->iq, req); + fuse_put_request(fc, req); fuse_copy_finish(cs); return nbytes; From 2fe23468dae467043183e301829827c17f65f45f Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Fri, 28 Sep 2018 16:43:22 +0200 Subject: [PATCH 474/812] fuse: fix blocked_waitq wakeup commit 908a572b80f6e9577b45e81b3dfe2e22111286b8 upstream. Using waitqueue_active() is racy. Make sure we issue a wake_up() unconditionally after storing into fc->blocked. After that it's okay to optimize with waitqueue_active() since the first wake up provides the necessary barrier for all waiters, not the just the woken one. Signed-off-by: Miklos Szeredi Fixes: 3c18ef8117f0 ("fuse: optimize wake_up") Cc: # v3.10 Signed-off-by: Greg Kroah-Hartman --- fs/fuse/dev.c | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index 78225f1f1902..da31954e1a99 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c @@ -402,12 +402,19 @@ static void request_end(struct fuse_conn *fc, struct fuse_req *req) if (test_bit(FR_BACKGROUND, &req->flags)) { spin_lock(&fc->lock); clear_bit(FR_BACKGROUND, &req->flags); - if (fc->num_background == fc->max_background) + if (fc->num_background == fc->max_background) { fc->blocked = 0; - - /* Wake up next waiter, if any */ - if (!fc->blocked && waitqueue_active(&fc->blocked_waitq)) wake_up(&fc->blocked_waitq); + } else if (!fc->blocked) { + /* + * Wake up next waiter, if any. It's okay to use + * waitqueue_active(), as we've already synced up + * fc->blocked with waiters with the wake_up() call + * above. + */ + if (waitqueue_active(&fc->blocked_waitq)) + wake_up(&fc->blocked_waitq); + } if (fc->num_background == fc->congestion_threshold && fc->connected && fc->bdi_initialized) { From f04651b97aed95cc82fc49997de1e5b1a6990e97 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Fri, 28 Sep 2018 16:43:22 +0200 Subject: [PATCH 475/812] fuse: set FR_SENT while locked commit 4c316f2f3ff315cb48efb7435621e5bfb81df96d upstream. Otherwise fuse_dev_do_write() could come in and finish off the request, and the set_bit(FR_SENT, ...) could trigger the WARN_ON(test_bit(FR_SENT, ...)) in request_end(). Signed-off-by: Miklos Szeredi Reported-by: syzbot+ef054c4d3f64cd7f7cec@syzkaller.appspotmai Fixes: 46c34a348b0a ("fuse: no fc->lock for pqueue parts") Cc: # v4.2 Signed-off-by: Greg Kroah-Hartman --- fs/fuse/dev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index da31954e1a99..18e2f54737d1 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c @@ -1336,8 +1336,8 @@ static ssize_t fuse_dev_do_read(struct fuse_dev *fud, struct file *file, } list_move_tail(&req->list, &fpq->processing); __fuse_get_request(req); - spin_unlock(&fpq->lock); set_bit(FR_SENT, &req->flags); + spin_unlock(&fpq->lock); /* matches barrier in request_wait_answer() */ smp_mb__after_atomic(); if (test_bit(FR_INTERRUPTED, &req->flags)) From cac9339052667de0805dc079b7fd74432acff515 Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Mon, 23 May 2016 16:25:39 -0700 Subject: [PATCH 476/812] mm, elf: handle vm_brk error commit ecc2bc8ac03884266cf73f8a2a42b911465b2fbc upstream. load_elf_library doesn't handle vm_brk failure although nothing really indicates it cannot do that because the function is allowed to fail due to vm_mmap failures already. This might be not a problem now but later patch will make vm_brk killable (resp. mmap_sem for write waiting will become killable) and so the failure will be more probable. Signed-off-by: Michal Hocko Acked-by: Vlastimil Babka Cc: Alexander Viro Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Ben Hutchings Signed-off-by: Sasha Levin --- fs/binfmt_elf.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index 62bc72001fce..70ea4b9c6dd9 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -1215,8 +1215,11 @@ static int load_elf_library(struct file *file) len = ELF_PAGESTART(eppnt->p_filesz + eppnt->p_vaddr + ELF_MIN_ALIGN - 1); bss = eppnt->p_memsz + eppnt->p_vaddr; - if (bss > len) - vm_brk(len, bss - len); + if (bss > len) { + error = vm_brk(len, bss - len); + if (BAD_ADDR(error)) + goto out_free_ph; + } error = 0; out_free_ph: From 4a0c08d709536d12f0dc2a9e6360826a56c9bceb Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Tue, 2 Aug 2016 14:04:51 -0700 Subject: [PATCH 477/812] binfmt_elf: fix calculations for bss padding commit 0036d1f7eb95bcc52977f15507f00dd07018e7e2 upstream. A double-bug exists in the bss calculation code, where an overflow can happen in the "last_bss - elf_bss" calculation, but vm_brk internally aligns the argument, underflowing it, wrapping back around safe. We shouldn't depend on these bugs staying in sync, so this cleans up the bss padding handling to avoid the overflow. This moves the bss padzero() before the last_bss > elf_bss case, since the zero-filling of the ELF_PAGE should have nothing to do with the relationship of last_bss and elf_bss: any trailing portion should be zeroed, and a zero size is already handled by padzero(). Then it handles the math on elf_bss vs last_bss correctly. These need to both be ELF_PAGE aligned to get the comparison correct, since that's the expected granularity of the mappings. Since elf_bss already had alignment-based padding happen in padzero(), the "start" of the new vm_brk() should be moved forward as done in the original code. However, since the "end" of the vm_brk() area will already become PAGE_ALIGNed in vm_brk() then last_bss should get aligned here to avoid hiding it as a side-effect. Additionally makes a cosmetic change to the initial last_bss calculation so it's easier to read in comparison to the load_addr calculation above it (i.e. the only difference is p_filesz vs p_memsz). Link: http://lkml.kernel.org/r/1468014494-25291-2-git-send-email-keescook@chromium.org Signed-off-by: Kees Cook Reported-by: Hector Marco-Gisbert Cc: Ismael Ripoll Ripoll Cc: Alexander Viro Cc: "Kirill A. Shutemov" Cc: Oleg Nesterov Cc: Chen Gang Cc: Michal Hocko Cc: Konstantin Khlebnikov Cc: Andrea Arcangeli Cc: Andrey Ryabinin Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Ben Hutchings Signed-off-by: Sasha Levin --- fs/binfmt_elf.c | 34 ++++++++++++++++++---------------- 1 file changed, 18 insertions(+), 16 deletions(-) diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index 70ea4b9c6dd9..2963a23f7a80 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -604,28 +604,30 @@ static unsigned long load_elf_interp(struct elfhdr *interp_elf_ex, * Do the same thing for the memory mapping - between * elf_bss and last_bss is the bss section. */ - k = load_addr + eppnt->p_memsz + eppnt->p_vaddr; + k = load_addr + eppnt->p_vaddr + eppnt->p_memsz; if (k > last_bss) last_bss = k; } } + /* + * Now fill out the bss section: first pad the last page from + * the file up to the page boundary, and zero it from elf_bss + * up to the end of the page. + */ + if (padzero(elf_bss)) { + error = -EFAULT; + goto out; + } + /* + * Next, align both the file and mem bss up to the page size, + * since this is where elf_bss was just zeroed up to, and where + * last_bss will end after the vm_brk() below. + */ + elf_bss = ELF_PAGEALIGN(elf_bss); + last_bss = ELF_PAGEALIGN(last_bss); + /* Finally, if there is still more bss to allocate, do it. */ if (last_bss > elf_bss) { - /* - * Now fill out the bss section. First pad the last page up - * to the page boundary, and then perform a mmap to make sure - * that there are zero-mapped pages up to and including the - * last bss page. - */ - if (padzero(elf_bss)) { - error = -EFAULT; - goto out; - } - - /* What we have mapped so far */ - elf_bss = ELF_PAGESTART(elf_bss + ELF_MIN_ALIGN - 1); - - /* Map the last of the bss segment */ error = vm_brk(elf_bss, last_bss - elf_bss); if (BAD_ADDR(error)) goto out; From 2c69d1f0db81179a3a4ef664c88d051d1531e948 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Tue, 2 Aug 2016 14:04:54 -0700 Subject: [PATCH 478/812] mm: refuse wrapped vm_brk requests commit ba093a6d9397da8eafcfbaa7d95bd34255da39a0 upstream. The vm_brk() alignment calculations should refuse to overflow. The ELF loader depending on this, but it has been fixed now. No other unsafe callers have been found. Link: http://lkml.kernel.org/r/1468014494-25291-3-git-send-email-keescook@chromium.org Signed-off-by: Kees Cook Reported-by: Hector Marco-Gisbert Cc: Ismael Ripoll Ripoll Cc: Alexander Viro Cc: "Kirill A. Shutemov" Cc: Oleg Nesterov Cc: Chen Gang Cc: Michal Hocko Cc: Konstantin Khlebnikov Cc: Andrea Arcangeli Cc: Andrey Ryabinin Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds [bwh: Backported to 4.4: adjust context] Signed-off-by: Ben Hutchings Signed-off-by: Sasha Levin --- mm/mmap.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/mm/mmap.c b/mm/mmap.c index 39f5fbd07486..dd9205542a86 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -2808,16 +2808,18 @@ static inline void verify_mm_writelocked(struct mm_struct *mm) * anonymous maps. eventually we may be able to do some * brk-specific accounting here. */ -static unsigned long do_brk(unsigned long addr, unsigned long len) +static unsigned long do_brk(unsigned long addr, unsigned long request) { struct mm_struct *mm = current->mm; struct vm_area_struct *vma, *prev; - unsigned long flags; + unsigned long flags, len; struct rb_node **rb_link, *rb_parent; pgoff_t pgoff = addr >> PAGE_SHIFT; int error; - len = PAGE_ALIGN(len); + len = PAGE_ALIGN(request); + if (len < request) + return -ENOMEM; if (!len) return addr; From a0b71d1b894556ae8e346596046e4f8bb0689f43 Mon Sep 17 00:00:00 2001 From: Oscar Salvador Date: Fri, 13 Jul 2018 16:59:13 -0700 Subject: [PATCH 479/812] fs, elf: make sure to page align bss in load_elf_library commit 24962af7e1041b7e50c1bc71d8d10dc678c556b5 upstream. The current code does not make sure to page align bss before calling vm_brk(), and this can lead to a VM_BUG_ON() in __mm_populate() due to the requested lenght not being correctly aligned. Let us make sure to align it properly. Kees: only applicable to CONFIG_USELIB kernels: 32-bit and configured for libc5. Link: http://lkml.kernel.org/r/20180705145539.9627-1-osalvador@techadventures.net Signed-off-by: Oscar Salvador Reported-by: syzbot+5dcb560fe12aa5091c06@syzkaller.appspotmail.com Tested-by: Tetsuo Handa Acked-by: Kees Cook Cc: Michal Hocko Cc: Nicolas Pitre Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Ben Hutchings Signed-off-by: Sasha Levin --- fs/binfmt_elf.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index 2963a23f7a80..f010d6c8dd14 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -1214,9 +1214,8 @@ static int load_elf_library(struct file *file) goto out_free_ph; } - len = ELF_PAGESTART(eppnt->p_filesz + eppnt->p_vaddr + - ELF_MIN_ALIGN - 1); - bss = eppnt->p_memsz + eppnt->p_vaddr; + len = ELF_PAGEALIGN(eppnt->p_filesz + eppnt->p_vaddr); + bss = ELF_PAGEALIGN(eppnt->p_memsz + eppnt->p_vaddr); if (bss > len) { error = vm_brk(len, bss - len); if (BAD_ADDR(error)) From 6f0cb0e3ec883d19e89800200fe7a24240b0b0b8 Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Fri, 13 Jul 2018 16:59:20 -0700 Subject: [PATCH 480/812] mm: do not bug_on on incorrect length in __mm_populate() commit bb177a732c4369bb58a1fe1df8f552b6f0f7db5f upstream. syzbot has noticed that a specially crafted library can easily hit VM_BUG_ON in __mm_populate kernel BUG at mm/gup.c:1242! invalid opcode: 0000 [#1] SMP CPU: 2 PID: 9667 Comm: a.out Not tainted 4.18.0-rc3 #644 Hardware name: VMware, Inc. VMware Virtual Platform/440BX Desktop Reference Platform, BIOS 6.00 05/19/2017 RIP: 0010:__mm_populate+0x1e2/0x1f0 Code: 55 d0 65 48 33 14 25 28 00 00 00 89 d8 75 21 48 83 c4 20 5b 41 5c 41 5d 41 5e 41 5f 5d c3 e8 75 18 f1 ff 0f 0b e8 6e 18 f1 ff <0f> 0b 31 db eb c9 e8 93 06 e0 ff 0f 1f 00 55 48 89 e5 53 48 89 fb Call Trace: vm_brk_flags+0xc3/0x100 vm_brk+0x1f/0x30 load_elf_library+0x281/0x2e0 __ia32_sys_uselib+0x170/0x1e0 do_fast_syscall_32+0xca/0x420 entry_SYSENTER_compat+0x70/0x7f The reason is that the length of the new brk is not page aligned when we try to populate the it. There is no reason to bug on that though. do_brk_flags already aligns the length properly so the mapping is expanded as it should. All we need is to tell mm_populate about it. Besides that there is absolutely no reason to to bug_on in the first place. The worst thing that could happen is that the last page wouldn't get populated and that is far from putting system into an inconsistent state. Fix the issue by moving the length sanitization code from do_brk_flags up to vm_brk_flags. The only other caller of do_brk_flags is brk syscall entry and it makes sure to provide the proper length so t here is no need for sanitation and so we can use do_brk_flags without it. Also remove the bogus BUG_ONs. [osalvador@techadventures.net: fix up vm_brk_flags s@request@len@] Link: http://lkml.kernel.org/r/20180706090217.GI32658@dhcp22.suse.cz Signed-off-by: Michal Hocko Reported-by: syzbot Tested-by: Tetsuo Handa Reviewed-by: Oscar Salvador Cc: Zi Yan Cc: "Aneesh Kumar K.V" Cc: Dan Williams Cc: "Kirill A. Shutemov" Cc: Michael S. Tsirkin Cc: Al Viro Cc: "Huang, Ying" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds [bwh: Backported to 4.4: - There is no do_brk_flags() function; update do_brk() - do_brk(), vm_brk() return the address on success - Adjust context] Signed-off-by: Ben Hutchings Signed-off-by: Sasha Levin --- mm/gup.c | 2 -- mm/mmap.c | 19 ++++++++++--------- 2 files changed, 10 insertions(+), 11 deletions(-) diff --git a/mm/gup.c b/mm/gup.c index b599526db9f7..018144c4b9ec 100644 --- a/mm/gup.c +++ b/mm/gup.c @@ -940,8 +940,6 @@ int __mm_populate(unsigned long start, unsigned long len, int ignore_errors) int locked = 0; long ret = 0; - VM_BUG_ON(start & ~PAGE_MASK); - VM_BUG_ON(len != PAGE_ALIGN(len)); end = start + len; for (nstart = start; nstart < end; nstart = nend) { diff --git a/mm/mmap.c b/mm/mmap.c index dd9205542a86..3074dbcd9621 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -2808,21 +2808,15 @@ static inline void verify_mm_writelocked(struct mm_struct *mm) * anonymous maps. eventually we may be able to do some * brk-specific accounting here. */ -static unsigned long do_brk(unsigned long addr, unsigned long request) +static unsigned long do_brk(unsigned long addr, unsigned long len) { struct mm_struct *mm = current->mm; struct vm_area_struct *vma, *prev; - unsigned long flags, len; + unsigned long flags; struct rb_node **rb_link, *rb_parent; pgoff_t pgoff = addr >> PAGE_SHIFT; int error; - len = PAGE_ALIGN(request); - if (len < request) - return -ENOMEM; - if (!len) - return addr; - flags = VM_DATA_DEFAULT_FLAGS | VM_ACCOUNT | mm->def_flags; error = get_unmapped_area(NULL, addr, len, 0, MAP_FIXED); @@ -2890,12 +2884,19 @@ static unsigned long do_brk(unsigned long addr, unsigned long request) return addr; } -unsigned long vm_brk(unsigned long addr, unsigned long len) +unsigned long vm_brk(unsigned long addr, unsigned long request) { struct mm_struct *mm = current->mm; + unsigned long len; unsigned long ret; bool populate; + len = PAGE_ALIGN(request); + if (len < request) + return -ENOMEM; + if (!len) + return addr; + down_write(&mm->mmap_sem); ret = do_brk(addr, len); populate = ((mm->def_flags & VM_LOCKED) != 0); From 5637fad6b1462945ccc8bd9734146c93422590fd Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Fri, 22 Sep 2017 18:13:48 +0100 Subject: [PATCH 481/812] e1000: avoid null pointer dereference on invalid stat type [ Upstream commit 5983587c8c5ef00d6886477544ad67d495bc5479 ] Currently if the stat type is invalid then data[i] is being set either by dereferencing a null pointer p, or it is reading from an incorrect previous location if we had a valid stat type previously. Fix this by skipping over the read of p on an invalid stat type. Detected by CoverityScan, CID#113385 ("Explicit null dereferenced") Signed-off-by: Colin Ian King Reviewed-by: Alexander Duyck Tested-by: Aaron Brown Signed-off-by: Jeff Kirsher Signed-off-by: Sasha Levin --- drivers/net/ethernet/intel/e1000/e1000_ethtool.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/intel/e1000/e1000_ethtool.c b/drivers/net/ethernet/intel/e1000/e1000_ethtool.c index 5ae8874bbf72..d70b2e5d5222 100644 --- a/drivers/net/ethernet/intel/e1000/e1000_ethtool.c +++ b/drivers/net/ethernet/intel/e1000/e1000_ethtool.c @@ -1826,11 +1826,12 @@ static void e1000_get_ethtool_stats(struct net_device *netdev, { struct e1000_adapter *adapter = netdev_priv(netdev); int i; - char *p = NULL; const struct e1000_stats *stat = e1000_gstrings_stats; e1000_update_stats(adapter); - for (i = 0; i < E1000_GLOBAL_STATS_LEN; i++) { + for (i = 0; i < E1000_GLOBAL_STATS_LEN; i++, stat++) { + char *p; + switch (stat->type) { case NETDEV_STATS: p = (char *)netdev + stat->stat_offset; @@ -1841,15 +1842,13 @@ static void e1000_get_ethtool_stats(struct net_device *netdev, default: WARN_ONCE(1, "Invalid E1000 stat type: %u index %d\n", stat->type, i); - break; + continue; } if (stat->sizeof_stat == sizeof(u64)) data[i] = *(u64 *)p; else data[i] = *(u32 *)p; - - stat++; } /* BUG_ON(i != E1000_STATS_LEN); */ } From 9b574c3d21460550175a8036c0d15a964ae79774 Mon Sep 17 00:00:00 2001 From: Vincenzo Maffione Date: Sat, 16 Sep 2017 18:00:00 +0200 Subject: [PATCH 482/812] e1000: fix race condition between e1000_down() and e1000_watchdog [ Upstream commit 44c445c3d1b4eacff23141fa7977c3b2ec3a45c9 ] This patch fixes a race condition that can result into the interface being up and carrier on, but with transmits disabled in the hardware. The bug may show up by repeatedly IFF_DOWN+IFF_UP the interface, which allows e1000_watchdog() interleave with e1000_down(). CPU x CPU y -------------------------------------------------------------------- e1000_down(): netif_carrier_off() e1000_watchdog(): if (carrier == off) { netif_carrier_on(); enable_hw_transmit(); } disable_hw_transmit(); e1000_watchdog(): /* carrier on, do nothing */ Signed-off-by: Vincenzo Maffione Tested-by: Aaron Brown Signed-off-by: Jeff Kirsher Signed-off-by: Sasha Levin --- drivers/net/ethernet/intel/e1000/e1000_main.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/intel/e1000/e1000_main.c b/drivers/net/ethernet/intel/e1000/e1000_main.c index 2a1d4a9d3c19..1f84f2fa459f 100644 --- a/drivers/net/ethernet/intel/e1000/e1000_main.c +++ b/drivers/net/ethernet/intel/e1000/e1000_main.c @@ -521,8 +521,6 @@ void e1000_down(struct e1000_adapter *adapter) struct net_device *netdev = adapter->netdev; u32 rctl, tctl; - netif_carrier_off(netdev); - /* disable receives in the hardware */ rctl = er32(RCTL); ew32(RCTL, rctl & ~E1000_RCTL_EN); @@ -538,6 +536,15 @@ void e1000_down(struct e1000_adapter *adapter) E1000_WRITE_FLUSH(); msleep(10); + /* Set the carrier off after transmits have been disabled in the + * hardware, to avoid race conditions with e1000_watchdog() (which + * may be running concurrently to us, checking for the carrier + * bit to decide whether it should enable transmits again). Such + * a race condition would result into transmission being disabled + * in the hardware until the next IFF_DOWN+IFF_UP cycle. + */ + netif_carrier_off(netdev); + napi_disable(&adapter->napi); e1000_irq_disable(adapter); From e1993df1c756bc491127e3e89ffc5c9c045b31de Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Fri, 5 May 2017 15:30:23 -0700 Subject: [PATCH 483/812] bna: ethtool: Avoid reading past end of buffer [ Upstream commit 4dc69c1c1fff2f587f8e737e70b4a4e7565a5c94 ] Using memcpy() from a string that is shorter than the length copied means the destination buffer is being filled with arbitrary data from the kernel rodata segment. Instead, use strncpy() which will fill the trailing bytes with zeros. This was found with the future CONFIG_FORTIFY_SOURCE feature. Cc: Daniel Micay Signed-off-by: Kees Cook Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/brocade/bna/bnad_ethtool.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/brocade/bna/bnad_ethtool.c b/drivers/net/ethernet/brocade/bna/bnad_ethtool.c index 0e4fdc3dd729..18672ad773fb 100644 --- a/drivers/net/ethernet/brocade/bna/bnad_ethtool.c +++ b/drivers/net/ethernet/brocade/bna/bnad_ethtool.c @@ -556,8 +556,8 @@ bnad_get_strings(struct net_device *netdev, u32 stringset, u8 *string) for (i = 0; i < BNAD_ETHTOOL_STATS_NUM; i++) { BUG_ON(!(strlen(bnad_net_stats_strings[i]) < ETH_GSTRING_LEN)); - memcpy(string, bnad_net_stats_strings[i], - ETH_GSTRING_LEN); + strncpy(string, bnad_net_stats_strings[i], + ETH_GSTRING_LEN); string += ETH_GSTRING_LEN; } bmap = bna_tx_rid_mask(&bnad->bna); From 09b61caa8332bb36dedd9efd69a97c51caecc53e Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Wed, 5 Sep 2018 17:33:08 +0800 Subject: [PATCH 484/812] MIPS: Loongson-3: Fix CPU UART irq delivery problem [ Upstream commit d06f8a2f1befb5a3d0aa660ab1c05e9b744456ea ] Masking/unmasking the CPU UART irq in CP0_Status (and redirecting it to other CPUs) may cause interrupts be lost, especially in multi-package machines (Package-0's UART irq cannot be delivered to others). So make mask_loongson_irq() and unmask_loongson_irq() be no-ops. The original problem (UART IRQ may deliver to any core) is also because of masking/unmasking the CPU UART irq in CP0_Status. So it is safe to remove all of the stuff. Signed-off-by: Huacai Chen Signed-off-by: Paul Burton Patchwork: https://patchwork.linux-mips.org/patch/20433/ Cc: Ralf Baechle Cc: James Hogan Cc: linux-mips@linux-mips.org Cc: Fuxin Zhang Cc: Zhangjin Wu Cc: Huacai Chen Signed-off-by: Sasha Levin --- arch/mips/loongson64/loongson-3/irq.c | 43 ++------------------------- 1 file changed, 3 insertions(+), 40 deletions(-) diff --git a/arch/mips/loongson64/loongson-3/irq.c b/arch/mips/loongson64/loongson-3/irq.c index 0f75b6b3d218..53424f2a53f3 100644 --- a/arch/mips/loongson64/loongson-3/irq.c +++ b/arch/mips/loongson64/loongson-3/irq.c @@ -48,45 +48,8 @@ static struct irqaction cascade_irqaction = { .name = "cascade", }; -static inline void mask_loongson_irq(struct irq_data *d) -{ - clear_c0_status(0x100 << (d->irq - MIPS_CPU_IRQ_BASE)); - irq_disable_hazard(); - - /* Workaround: UART IRQ may deliver to any core */ - if (d->irq == LOONGSON_UART_IRQ) { - int cpu = smp_processor_id(); - int node_id = cpu_logical_map(cpu) / loongson_sysconf.cores_per_node; - int core_id = cpu_logical_map(cpu) % loongson_sysconf.cores_per_node; - u64 intenclr_addr = smp_group[node_id] | - (u64)(&LOONGSON_INT_ROUTER_INTENCLR); - u64 introuter_lpc_addr = smp_group[node_id] | - (u64)(&LOONGSON_INT_ROUTER_LPC); - - *(volatile u32 *)intenclr_addr = 1 << 10; - *(volatile u8 *)introuter_lpc_addr = 0x10 + (1<irq == LOONGSON_UART_IRQ) { - int cpu = smp_processor_id(); - int node_id = cpu_logical_map(cpu) / loongson_sysconf.cores_per_node; - int core_id = cpu_logical_map(cpu) % loongson_sysconf.cores_per_node; - u64 intenset_addr = smp_group[node_id] | - (u64)(&LOONGSON_INT_ROUTER_INTENSET); - u64 introuter_lpc_addr = smp_group[node_id] | - (u64)(&LOONGSON_INT_ROUTER_LPC); - - *(volatile u32 *)intenset_addr = 1 << 10; - *(volatile u8 *)introuter_lpc_addr = 0x10 + (1<irq - MIPS_CPU_IRQ_BASE)); - irq_enable_hazard(); -} +static inline void mask_loongson_irq(struct irq_data *d) { } +static inline void unmask_loongson_irq(struct irq_data *d) { } /* For MIPS IRQs which shared by all cores */ static struct irq_chip loongson_irq_chip = { @@ -124,7 +87,7 @@ void __init mach_init_irq(void) mips_cpu_irq_init(); init_i8259_irqs(); irq_set_chip_and_handler(LOONGSON_UART_IRQ, - &loongson_irq_chip, handle_level_irq); + &loongson_irq_chip, handle_percpu_irq); /* setup HT1 irq */ setup_irq(LOONGSON_HT1_IRQ, &cascade_irqaction); From 4353f89bc3f808760354478bf91a205977ef23c7 Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Wed, 5 Sep 2018 17:33:09 +0800 Subject: [PATCH 485/812] MIPS: Loongson-3: Fix BRIDGE irq delivery problem [ Upstream commit 360fe725f8849aaddc53475fef5d4a0c439b05ae ] After commit e509bd7da149dc349160 ("genirq: Allow migration of chained interrupts by installing default action") Loongson-3 fails at here: setup_irq(LOONGSON_HT1_IRQ, &cascade_irqaction); This is because both chained_action and cascade_irqaction don't have IRQF_SHARED flag. This will cause Loongson-3 resume fails because HPET timer interrupt can't be delivered during S3. So we set the irqchip of the chained irq to loongson_irq_chip which doesn't disable the chained irq in CP0.Status. Cc: stable@vger.kernel.org Signed-off-by: Huacai Chen Signed-off-by: Paul Burton Patchwork: https://patchwork.linux-mips.org/patch/20434/ Cc: Ralf Baechle Cc: James Hogan Cc: linux-mips@linux-mips.org Cc: Fuxin Zhang Cc: Zhangjin Wu Cc: Huacai Chen Signed-off-by: Sasha Levin --- arch/mips/include/asm/mach-loongson64/irq.h | 2 +- arch/mips/loongson64/loongson-3/irq.c | 13 +++---------- 2 files changed, 4 insertions(+), 11 deletions(-) diff --git a/arch/mips/include/asm/mach-loongson64/irq.h b/arch/mips/include/asm/mach-loongson64/irq.h index d18c45c7c394..19ff9ce46c02 100644 --- a/arch/mips/include/asm/mach-loongson64/irq.h +++ b/arch/mips/include/asm/mach-loongson64/irq.h @@ -9,7 +9,7 @@ #define MIPS_CPU_IRQ_BASE 56 #define LOONGSON_UART_IRQ (MIPS_CPU_IRQ_BASE + 2) /* UART */ -#define LOONGSON_HT1_IRQ (MIPS_CPU_IRQ_BASE + 3) /* HT1 */ +#define LOONGSON_BRIDGE_IRQ (MIPS_CPU_IRQ_BASE + 3) /* CASCADE */ #define LOONGSON_TIMER_IRQ (MIPS_CPU_IRQ_BASE + 7) /* CPU Timer */ #define LOONGSON_HT1_CFG_BASE loongson_sysconf.ht_control_base diff --git a/arch/mips/loongson64/loongson-3/irq.c b/arch/mips/loongson64/loongson-3/irq.c index 53424f2a53f3..241cb88f9c03 100644 --- a/arch/mips/loongson64/loongson-3/irq.c +++ b/arch/mips/loongson64/loongson-3/irq.c @@ -42,12 +42,6 @@ void mach_irq_dispatch(unsigned int pending) } } -static struct irqaction cascade_irqaction = { - .handler = no_action, - .flags = IRQF_NO_SUSPEND, - .name = "cascade", -}; - static inline void mask_loongson_irq(struct irq_data *d) { } static inline void unmask_loongson_irq(struct irq_data *d) { } @@ -88,11 +82,10 @@ void __init mach_init_irq(void) init_i8259_irqs(); irq_set_chip_and_handler(LOONGSON_UART_IRQ, &loongson_irq_chip, handle_percpu_irq); + irq_set_chip_and_handler(LOONGSON_BRIDGE_IRQ, + &loongson_irq_chip, handle_percpu_irq); - /* setup HT1 irq */ - setup_irq(LOONGSON_HT1_IRQ, &cascade_irqaction); - - set_c0_status(STATUSF_IP2 | STATUSF_IP6); + set_c0_status(STATUSF_IP2 | STATUSF_IP3 | STATUSF_IP6); } #ifdef CONFIG_HOTPLUG_CPU From f9d2364450c7c66a60f02bedb99eedf2d372683c Mon Sep 17 00:00:00 2001 From: Max Filippov Date: Mon, 29 Oct 2018 18:30:13 -0700 Subject: [PATCH 486/812] xtensa: add NOTES section to the linker script commit 4119ba211bc4f1bf638f41e50b7a0f329f58aa16 upstream. This section collects all source .note.* sections together in the vmlinux image. Without it .note.Linux section may be placed at address 0, while the rest of the kernel is at its normal address, resulting in a huge vmlinux.bin image that may not be linked into the xtensa Image.elf. Cc: stable@vger.kernel.org Signed-off-by: Max Filippov Signed-off-by: Greg Kroah-Hartman --- arch/xtensa/boot/Makefile | 2 +- arch/xtensa/kernel/vmlinux.lds.S | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/xtensa/boot/Makefile b/arch/xtensa/boot/Makefile index ca20a892021b..6c6877d628ef 100644 --- a/arch/xtensa/boot/Makefile +++ b/arch/xtensa/boot/Makefile @@ -31,7 +31,7 @@ $(bootdir-y): $(addprefix $(obj)/,$(subdir-y)) \ $(addprefix $(obj)/,$(host-progs)) $(Q)$(MAKE) $(build)=$(obj)/$@ $(MAKECMDGOALS) -OBJCOPYFLAGS = --strip-all -R .comment -R .note.gnu.build-id -O binary +OBJCOPYFLAGS = --strip-all -R .comment -R .notes -O binary vmlinux.bin: vmlinux FORCE $(call if_changed,objcopy) diff --git a/arch/xtensa/kernel/vmlinux.lds.S b/arch/xtensa/kernel/vmlinux.lds.S index c417cbe4ec87..bdfeda5a913c 100644 --- a/arch/xtensa/kernel/vmlinux.lds.S +++ b/arch/xtensa/kernel/vmlinux.lds.S @@ -110,6 +110,7 @@ SECTIONS .fixup : { *(.fixup) } EXCEPTION_TABLE(16) + NOTES /* Data section */ _sdata = .; From fdc26b9d183c1c3ea4769e139a54bdf2aad6ecf4 Mon Sep 17 00:00:00 2001 From: Max Filippov Date: Sun, 4 Nov 2018 01:46:00 -0700 Subject: [PATCH 487/812] xtensa: make sure bFLT stack is 16 byte aligned commit 0773495b1f5f1c5e23551843f87b5ff37e7af8f7 upstream. Xtensa ABI requires stack alignment to be at least 16. In noMMU configuration ARCH_SLAB_MINALIGN is used to align stack. Make it at least 16. This fixes the following runtime error in noMMU configuration, caused by interaction between insufficiently aligned stack and alloca function, that results in corruption of on-stack variable in the libc function glob: Caught unhandled exception in 'sh' (pid = 47, pc = 0x02d05d65) - should not happen EXCCAUSE is 15 Cc: stable@vger.kernel.org Signed-off-by: Max Filippov Signed-off-by: Greg Kroah-Hartman --- arch/xtensa/include/asm/processor.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/arch/xtensa/include/asm/processor.h b/arch/xtensa/include/asm/processor.h index 83e2e4bc01ba..d3ac00fcb15c 100644 --- a/arch/xtensa/include/asm/processor.h +++ b/arch/xtensa/include/asm/processor.h @@ -24,7 +24,11 @@ # error Linux requires the Xtensa Windowed Registers Option. #endif -#define ARCH_SLAB_MINALIGN XCHAL_DATA_WIDTH +/* Xtensa ABI requires stack alignment to be at least 16 */ + +#define STACK_ALIGN (XCHAL_DATA_WIDTH > 16 ? XCHAL_DATA_WIDTH : 16) + +#define ARCH_SLAB_MINALIGN STACK_ALIGN /* * User space process size: 1 GB. From bf6ae42ce4aea5089f74d4eea84fbe271e67e55d Mon Sep 17 00:00:00 2001 From: Max Filippov Date: Tue, 13 Nov 2018 23:46:42 -0800 Subject: [PATCH 488/812] xtensa: fix boot parameters address translation commit 40dc948f234b73497c3278875eb08a01d5854d3f upstream. The bootloader may pass physical address of the boot parameters structure to the MMUv3 kernel in the register a2. Code in the _SetupMMU block in the arch/xtensa/kernel/head.S is supposed to map that physical address to the virtual address in the configured virtual memory layout. This code haven't been updated when additional 256+256 and 512+512 memory layouts were introduced and it may produce wrong addresses when used with these layouts. Cc: stable@vger.kernel.org Signed-off-by: Max Filippov Signed-off-by: Greg Kroah-Hartman --- arch/xtensa/kernel/head.S | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/arch/xtensa/kernel/head.S b/arch/xtensa/kernel/head.S index 05e1df943856..c7b3bedbfffe 100644 --- a/arch/xtensa/kernel/head.S +++ b/arch/xtensa/kernel/head.S @@ -88,9 +88,12 @@ _SetupMMU: initialize_mmu #if defined(CONFIG_MMU) && XCHAL_HAVE_PTP_MMU && XCHAL_HAVE_SPANNING_WAY rsr a2, excsave1 - movi a3, 0x08000000 + movi a3, XCHAL_KSEG_PADDR + bltu a2, a3, 1f + sub a2, a2, a3 + movi a3, XCHAL_KSEG_SIZE bgeu a2, a3, 1f - movi a3, 0xd0000000 + movi a3, XCHAL_KSEG_CACHED_VADDR add a2, a2, a3 wsr a2, excsave1 1: From c2fc59dc0a01fdb2120ae488a15884be64802796 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Wed, 29 Aug 2018 21:20:10 +0200 Subject: [PATCH 489/812] clk: s2mps11: Fix matching when built as module and DT node contains compatible commit 8985167ecf57f97061599a155bb9652c84ea4913 upstream. When driver is built as module and DT node contains clocks compatible (e.g. "samsung,s2mps11-clk"), the module will not be autoloaded because module aliases won't match. The modalias from uevent: of:NclocksTCsamsung,s2mps11-clk The modalias from driver: platform:s2mps11-clk The devices are instantiated by parent's MFD. However both Device Tree bindings and parent define the compatible for clocks devices. In case of module matching this DT compatible will be used. The issue will not happen if this is a built-in (no need for module matching) or when clocks DT node does not contain compatible (not correct from bindings perspective but working for driver). Note when backporting to stable kernels: adjust the list of device ID entries. Cc: Fixes: 53c31b3437a6 ("mfd: sec-core: Add of_compatible strings for clock MFD cells") Signed-off-by: Krzysztof Kozlowski Acked-by: Stephen Boyd Signed-off-by: Stephen Boyd Signed-off-by: Greg Kroah-Hartman --- drivers/clk/clk-s2mps11.c | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/drivers/clk/clk-s2mps11.c b/drivers/clk/clk-s2mps11.c index d266299dfdb1..785864893f9a 100644 --- a/drivers/clk/clk-s2mps11.c +++ b/drivers/clk/clk-s2mps11.c @@ -297,6 +297,36 @@ static const struct platform_device_id s2mps11_clk_id[] = { }; MODULE_DEVICE_TABLE(platform, s2mps11_clk_id); +#ifdef CONFIG_OF +/* + * Device is instantiated through parent MFD device and device matching is done + * through platform_device_id. + * + * However if device's DT node contains proper clock compatible and driver is + * built as a module, then the *module* matching will be done trough DT aliases. + * This requires of_device_id table. In the same time this will not change the + * actual *device* matching so do not add .of_match_table. + */ +static const struct of_device_id s2mps11_dt_match[] = { + { + .compatible = "samsung,s2mps11-clk", + .data = (void *)S2MPS11X, + }, { + .compatible = "samsung,s2mps13-clk", + .data = (void *)S2MPS13X, + }, { + .compatible = "samsung,s2mps14-clk", + .data = (void *)S2MPS14X, + }, { + .compatible = "samsung,s5m8767-clk", + .data = (void *)S5M8767X, + }, { + /* Sentinel */ + }, +}; +MODULE_DEVICE_TABLE(of, s2mps11_dt_match); +#endif + static struct platform_driver s2mps11_clk_driver = { .driver = { .name = "s2mps11-clk", From f29b66550eed5bae17eed21490d99061356bf447 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Wed, 26 Sep 2018 18:03:16 +0200 Subject: [PATCH 490/812] libceph: bump CEPH_MSG_MAX_DATA_LEN commit 94e6992bb560be8bffb47f287194adf070b57695 upstream. If the read is large enough, we end up spinning in the messenger: libceph: osd0 192.168.122.1:6801 io error libceph: osd0 192.168.122.1:6801 io error libceph: osd0 192.168.122.1:6801 io error This is a receive side limit, so only reads were affected. Cc: stable@vger.kernel.org Signed-off-by: Ilya Dryomov Signed-off-by: Greg Kroah-Hartman --- include/linux/ceph/libceph.h | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/include/linux/ceph/libceph.h b/include/linux/ceph/libceph.h index 3e3799cdc6e6..9b9fe0588008 100644 --- a/include/linux/ceph/libceph.h +++ b/include/linux/ceph/libceph.h @@ -72,7 +72,13 @@ struct ceph_options { #define CEPH_MSG_MAX_FRONT_LEN (16*1024*1024) #define CEPH_MSG_MAX_MIDDLE_LEN (16*1024*1024) -#define CEPH_MSG_MAX_DATA_LEN (16*1024*1024) + +/* + * Handle the largest possible rbd object in one message. + * There is no limit on the size of cephfs objects, but it has to obey + * rsize and wsize mount options anyway. + */ +#define CEPH_MSG_MAX_DATA_LEN (32*1024*1024) #define CEPH_AUTH_NAME_DEFAULT "guest" From 5d4ce43c13b799653ad1e14820f182e28c7f6f49 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Mon, 8 Oct 2018 12:57:34 +0200 Subject: [PATCH 491/812] mach64: fix display corruption on big endian machines MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 3c6c6a7878d00a3ac997a779c5b9861ff25dfcc8 upstream. The code for manual bit triple is not endian-clean. It builds the variable "hostdword" using byte accesses, therefore we must read the variable with "le32_to_cpu". The patch also enables (hardware or software) bit triple only if the image is monochrome (image->depth). If we want to blit full-color image, we shouldn't use the triple code. Signed-off-by: Mikulas Patocka Reviewed-by: Ville Syrjälä Cc: stable@vger.kernel.org Signed-off-by: Bartlomiej Zolnierkiewicz Signed-off-by: Greg Kroah-Hartman --- drivers/video/fbdev/aty/mach64_accel.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/video/fbdev/aty/mach64_accel.c b/drivers/video/fbdev/aty/mach64_accel.c index 182bd680141f..e3143cf1184c 100644 --- a/drivers/video/fbdev/aty/mach64_accel.c +++ b/drivers/video/fbdev/aty/mach64_accel.c @@ -344,7 +344,7 @@ void atyfb_imageblit(struct fb_info *info, const struct fb_image *image) * since Rage 3D IIc we have DP_HOST_TRIPLE_EN bit * this hwaccelerated triple has an issue with not aligned data */ - if (M64_HAS(HW_TRIPLE) && image->width % 8 == 0) + if (image->depth == 1 && M64_HAS(HW_TRIPLE) && image->width % 8 == 0) pix_width |= DP_HOST_TRIPLE_EN; } @@ -381,7 +381,7 @@ void atyfb_imageblit(struct fb_info *info, const struct fb_image *image) src_bytes = (((image->width * image->depth) + 7) / 8) * image->height; /* manual triple each pixel */ - if (info->var.bits_per_pixel == 24 && !(pix_width & DP_HOST_TRIPLE_EN)) { + if (image->depth == 1 && info->var.bits_per_pixel == 24 && !(pix_width & DP_HOST_TRIPLE_EN)) { int inbit, outbit, mult24, byte_id_in_dword, width; u8 *pbitmapin = (u8*)image->data, *pbitmapout; u32 hostdword; @@ -414,7 +414,7 @@ void atyfb_imageblit(struct fb_info *info, const struct fb_image *image) } } wait_for_fifo(1, par); - aty_st_le32(HOST_DATA0, hostdword, par); + aty_st_le32(HOST_DATA0, le32_to_cpu(hostdword), par); } } else { u32 *pbitmap, dwords = (src_bytes + 3) / 4; From 1ffd631735f5e6bf565aef94b098e4821eb105c1 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Mon, 8 Oct 2018 12:57:35 +0200 Subject: [PATCH 492/812] mach64: fix image corruption due to reading accelerator registers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit c09bcc91bb94ed91f1391bffcbe294963d605732 upstream. Reading the registers without waiting for engine idle returns unpredictable values. These unpredictable values result in display corruption - if atyfb_imageblit reads the content of DP_PIX_WIDTH with the bit DP_HOST_TRIPLE_EN set (from previous invocation), the driver would never ever clear the bit, resulting in display corruption. We don't want to wait for idle because it would degrade performance, so this patch modifies the driver so that it never reads accelerator registers. HOST_CNTL doesn't have to be read, we can just write it with HOST_BYTE_ALIGN because no other part of the driver cares if HOST_BYTE_ALIGN is set. DP_PIX_WIDTH is written in the functions atyfb_copyarea and atyfb_fillrect with the default value and in atyfb_imageblit with the value set according to the source image data. Signed-off-by: Mikulas Patocka Reviewed-by: Ville Syrjälä Cc: stable@vger.kernel.org Signed-off-by: Bartlomiej Zolnierkiewicz Signed-off-by: Greg Kroah-Hartman --- drivers/video/fbdev/aty/mach64_accel.c | 22 +++++++++------------- 1 file changed, 9 insertions(+), 13 deletions(-) diff --git a/drivers/video/fbdev/aty/mach64_accel.c b/drivers/video/fbdev/aty/mach64_accel.c index e3143cf1184c..e9dfe0e40b8b 100644 --- a/drivers/video/fbdev/aty/mach64_accel.c +++ b/drivers/video/fbdev/aty/mach64_accel.c @@ -126,7 +126,7 @@ void aty_init_engine(struct atyfb_par *par, struct fb_info *info) /* set host attributes */ wait_for_fifo(13, par); - aty_st_le32(HOST_CNTL, 0, par); + aty_st_le32(HOST_CNTL, HOST_BYTE_ALIGN, par); /* set pattern attributes */ aty_st_le32(PAT_REG0, 0, par); @@ -232,7 +232,8 @@ void atyfb_copyarea(struct fb_info *info, const struct fb_copyarea *area) rotation = rotation24bpp(dx, direction); } - wait_for_fifo(4, par); + wait_for_fifo(5, par); + aty_st_le32(DP_PIX_WIDTH, par->crtc.dp_pix_width, par); aty_st_le32(DP_SRC, FRGD_SRC_BLIT, par); aty_st_le32(SRC_Y_X, (sx << 16) | sy, par); aty_st_le32(SRC_HEIGHT1_WIDTH1, (width << 16) | area->height, par); @@ -268,7 +269,8 @@ void atyfb_fillrect(struct fb_info *info, const struct fb_fillrect *rect) rotation = rotation24bpp(dx, DST_X_LEFT_TO_RIGHT); } - wait_for_fifo(3, par); + wait_for_fifo(4, par); + aty_st_le32(DP_PIX_WIDTH, par->crtc.dp_pix_width, par); aty_st_le32(DP_FRGD_CLR, color, par); aty_st_le32(DP_SRC, BKGD_SRC_BKGD_CLR | FRGD_SRC_FRGD_CLR | MONO_SRC_ONE, @@ -283,7 +285,7 @@ void atyfb_imageblit(struct fb_info *info, const struct fb_image *image) { struct atyfb_par *par = (struct atyfb_par *) info->par; u32 src_bytes, dx = image->dx, dy = image->dy, width = image->width; - u32 pix_width_save, pix_width, host_cntl, rotation = 0, src, mix; + u32 pix_width, rotation = 0, src, mix; if (par->asleep) return; @@ -295,8 +297,7 @@ void atyfb_imageblit(struct fb_info *info, const struct fb_image *image) return; } - pix_width = pix_width_save = aty_ld_le32(DP_PIX_WIDTH, par); - host_cntl = aty_ld_le32(HOST_CNTL, par) | HOST_BYTE_ALIGN; + pix_width = par->crtc.dp_pix_width; switch (image->depth) { case 1: @@ -369,12 +370,11 @@ void atyfb_imageblit(struct fb_info *info, const struct fb_image *image) mix = FRGD_MIX_D_XOR_S | BKGD_MIX_D; } - wait_for_fifo(6, par); - aty_st_le32(DP_WRITE_MASK, 0xFFFFFFFF, par); + wait_for_fifo(5, par); aty_st_le32(DP_PIX_WIDTH, pix_width, par); aty_st_le32(DP_MIX, mix, par); aty_st_le32(DP_SRC, src, par); - aty_st_le32(HOST_CNTL, host_cntl, par); + aty_st_le32(HOST_CNTL, HOST_BYTE_ALIGN, par); aty_st_le32(DST_CNTL, DST_Y_TOP_TO_BOTTOM | DST_X_LEFT_TO_RIGHT | rotation, par); draw_rect(dx, dy, width, image->height, par); @@ -423,8 +423,4 @@ void atyfb_imageblit(struct fb_info *info, const struct fb_image *image) aty_st_le32(HOST_DATA0, get_unaligned_le32(pbitmap), par); } } - - /* restore pix_width */ - wait_for_fifo(1, par); - aty_st_le32(DP_PIX_WIDTH, pix_width_save, par); } From d2926beb35cfd8ebda3aa1e4ee396888b300bb87 Mon Sep 17 00:00:00 2001 From: Greg Edwards Date: Wed, 22 Aug 2018 13:21:53 -0600 Subject: [PATCH 493/812] vhost/scsi: truncate T10 PI iov_iter to prot_bytes commit 4542d623c7134bc1738f8a68ccb6dd546f1c264f upstream. Commands with protection information included were not truncating the protection iov_iter to the number of protection bytes in the command. This resulted in vhost_scsi mis-calculating the size of the protection SGL in vhost_scsi_calc_sgls(), and including both the protection and data SG entries in the protection SGL. Fixes: 09b13fa8c1a1 ("vhost/scsi: Add ANY_LAYOUT support in vhost_scsi_handle_vq") Signed-off-by: Greg Edwards Signed-off-by: Michael S. Tsirkin Fixes: 09b13fa8c1a1093e9458549ac8bb203a7c65c62a Cc: stable@vger.kernel.org Reviewed-by: Paolo Bonzini Signed-off-by: Greg Kroah-Hartman --- drivers/vhost/scsi.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/vhost/scsi.c b/drivers/vhost/scsi.c index da6cc25baaef..8fc62a03637a 100644 --- a/drivers/vhost/scsi.c +++ b/drivers/vhost/scsi.c @@ -1009,7 +1009,8 @@ vhost_scsi_handle_vq(struct vhost_scsi *vs, struct vhost_virtqueue *vq) prot_bytes = vhost32_to_cpu(vq, v_req_pi.pi_bytesin); } /* - * Set prot_iter to data_iter, and advance past any + * Set prot_iter to data_iter and truncate it to + * prot_bytes, and advance data_iter past any * preceeding prot_bytes that may be present. * * Also fix up the exp_data_len to reflect only the @@ -1018,6 +1019,7 @@ vhost_scsi_handle_vq(struct vhost_scsi *vs, struct vhost_virtqueue *vq) if (prot_bytes) { exp_data_len -= prot_bytes; prot_iter = data_iter; + iov_iter_truncate(&prot_iter, prot_bytes); iov_iter_advance(&data_iter, prot_bytes); } tag = vhost64_to_cpu(vq, v_req_pi.tag); From 298ed64f1e889b87172e2f9ba807318dadc893d7 Mon Sep 17 00:00:00 2001 From: Changwei Ge Date: Fri, 2 Nov 2018 15:48:15 -0700 Subject: [PATCH 494/812] ocfs2: fix a misuse a of brelse after failing ocfs2_check_dir_entry commit 29aa30167a0a2e6045a0d6d2e89d8168132333d5 upstream. Somehow, file system metadata was corrupted, which causes ocfs2_check_dir_entry() to fail in function ocfs2_dir_foreach_blk_el(). According to the original design intention, if above happens we should skip the problematic block and continue to retrieve dir entry. But there is obviouse misuse of brelse around related code. After failure of ocfs2_check_dir_entry(), current code just moves to next position and uses the problematic buffer head again and again during which the problematic buffer head is released for multiple times. I suppose, this a serious issue which is long-lived in ocfs2. This may cause other file systems which is also used in a the same host insane. So we should also consider about bakcporting this patch into linux -stable. Link: http://lkml.kernel.org/r/HK2PR06MB045211675B43EED794E597B6D56E0@HK2PR06MB0452.apcprd06.prod.outlook.com Signed-off-by: Changwei Ge Suggested-by: Changkuo Shi Reviewed-by: Andrew Morton Cc: Mark Fasheh Cc: Joel Becker Cc: Junxiao Bi Cc: Joseph Qi Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- fs/ocfs2/dir.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/fs/ocfs2/dir.c b/fs/ocfs2/dir.c index ffecf89c8c1c..49af618e410d 100644 --- a/fs/ocfs2/dir.c +++ b/fs/ocfs2/dir.c @@ -1896,8 +1896,7 @@ static int ocfs2_dir_foreach_blk_el(struct inode *inode, /* On error, skip the f_pos to the next block. */ ctx->pos = (ctx->pos | (sb->s_blocksize - 1)) + 1; - brelse(bh); - continue; + break; } if (le64_to_cpu(de->inode)) { unsigned char d_type = DT_UNKNOWN; From 877813e0101429af6df3479e92e93e01910af5c7 Mon Sep 17 00:00:00 2001 From: Andrea Arcangeli Date: Fri, 2 Nov 2018 15:47:59 -0700 Subject: [PATCH 495/812] mm: thp: relax __GFP_THISNODE for MADV_HUGEPAGE mappings commit ac5b2c18911ffe95c08d69273917f90212cf5659 upstream. THP allocation might be really disruptive when allocated on NUMA system with the local node full or hard to reclaim. Stefan has posted an allocation stall report on 4.12 based SLES kernel which suggests the same issue: kvm: page allocation stalls for 194572ms, order:9, mode:0x4740ca(__GFP_HIGHMEM|__GFP_IO|__GFP_FS|__GFP_COMP|__GFP_NOMEMALLOC|__GFP_HARDWALL|__GFP_THISNODE|__GFP_MOVABLE|__GFP_DIRECT_RECLAIM), nodemask=(null) kvm cpuset=/ mems_allowed=0-1 CPU: 10 PID: 84752 Comm: kvm Tainted: G W 4.12.0+98-ph 0000001 SLE15 (unreleased) Hardware name: Supermicro SYS-1029P-WTRT/X11DDW-NT, BIOS 2.0 12/05/2017 Call Trace: dump_stack+0x5c/0x84 warn_alloc+0xe0/0x180 __alloc_pages_slowpath+0x820/0xc90 __alloc_pages_nodemask+0x1cc/0x210 alloc_pages_vma+0x1e5/0x280 do_huge_pmd_wp_page+0x83f/0xf00 __handle_mm_fault+0x93d/0x1060 handle_mm_fault+0xc6/0x1b0 __do_page_fault+0x230/0x430 do_page_fault+0x2a/0x70 page_fault+0x7b/0x80 [...] Mem-Info: active_anon:126315487 inactive_anon:1612476 isolated_anon:5 active_file:60183 inactive_file:245285 isolated_file:0 unevictable:15657 dirty:286 writeback:1 unstable:0 slab_reclaimable:75543 slab_unreclaimable:2509111 mapped:81814 shmem:31764 pagetables:370616 bounce:0 free:32294031 free_pcp:6233 free_cma:0 Node 0 active_anon:254680388kB inactive_anon:1112760kB active_file:240648kB inactive_file:981168kB unevictable:13368kB isolated(anon):0kB isolated(file):0kB mapped:280240kB dirty:1144kB writeback:0kB shmem:95832kB shmem_thp: 0kB shmem_pmdmapped: 0kB anon_thp: 81225728kB writeback_tmp:0kB unstable:0kB all_unreclaimable? no Node 1 active_anon:250583072kB inactive_anon:5337144kB active_file:84kB inactive_file:0kB unevictable:49260kB isolated(anon):20kB isolated(file):0kB mapped:47016kB dirty:0kB writeback:4kB shmem:31224kB shmem_thp: 0kB shmem_pmdmapped: 0kB anon_thp: 31897600kB writeback_tmp:0kB unstable:0kB all_unreclaimable? no The defrag mode is "madvise" and from the above report it is clear that the THP has been allocated for MADV_HUGEPAGA vma. Andrea has identified that the main source of the problem is __GFP_THISNODE usage: : The problem is that direct compaction combined with the NUMA : __GFP_THISNODE logic in mempolicy.c is telling reclaim to swap very : hard the local node, instead of failing the allocation if there's no : THP available in the local node. : : Such logic was ok until __GFP_THISNODE was added to the THP allocation : path even with MPOL_DEFAULT. : : The idea behind the __GFP_THISNODE addition, is that it is better to : provide local memory in PAGE_SIZE units than to use remote NUMA THP : backed memory. That largely depends on the remote latency though, on : threadrippers for example the overhead is relatively low in my : experience. : : The combination of __GFP_THISNODE and __GFP_DIRECT_RECLAIM results in : extremely slow qemu startup with vfio, if the VM is larger than the : size of one host NUMA node. This is because it will try very hard to : unsuccessfully swapout get_user_pages pinned pages as result of the : __GFP_THISNODE being set, instead of falling back to PAGE_SIZE : allocations and instead of trying to allocate THP on other nodes (it : would be even worse without vfio type1 GUP pins of course, except it'd : be swapping heavily instead). Fix this by removing __GFP_THISNODE for THP requests which are requesting the direct reclaim. This effectivelly reverts 5265047ac301 on the grounds that the zone/node reclaim was known to be disruptive due to premature reclaim when there was memory free. While it made sense at the time for HPC workloads without NUMA awareness on rare machines, it was ultimately harmful in the majority of cases. The existing behaviour is similar, if not as widespare as it applies to a corner case but crucially, it cannot be tuned around like zone_reclaim_mode can. The default behaviour should always be to cause the least harm for the common case. If there are specialised use cases out there that want zone_reclaim_mode in specific cases, then it can be built on top. Longterm we should consider a memory policy which allows for the node reclaim like behavior for the specific memory ranges which would allow a [1] http://lkml.kernel.org/r/20180820032204.9591-1-aarcange@redhat.com Mel said: : Both patches look correct to me but I'm responding to this one because : it's the fix. The change makes sense and moves further away from the : severe stalling behaviour we used to see with both THP and zone reclaim : mode. : : I put together a basic experiment with usemem configured to reference a : buffer multiple times that is 80% the size of main memory on a 2-socket : box with symmetric node sizes and defrag set to "always". The defrag : setting is not the default but it would be functionally similar to : accessing a buffer with madvise(MADV_HUGEPAGE). Usemem is configured to : reference the buffer multiple times and while it's not an interesting : workload, it would be expected to complete reasonably quickly as it fits : within memory. The results were; : : usemem : vanilla noreclaim-v1 : Amean Elapsd-1 42.78 ( 0.00%) 26.87 ( 37.18%) : Amean Elapsd-3 27.55 ( 0.00%) 7.44 ( 73.00%) : Amean Elapsd-4 5.72 ( 0.00%) 5.69 ( 0.45%) : : This shows the elapsed time in seconds for 1 thread, 3 threads and 4 : threads referencing buffers 80% the size of memory. With the patches : applied, it's 37.18% faster for the single thread and 73% faster with two : threads. Note that 4 threads showing little difference does not indicate : the problem is related to thread counts. It's simply the case that 4 : threads gets spread so their workload mostly fits in one node. : : The overall view from /proc/vmstats is more startling : : 4.19.0-rc1 4.19.0-rc1 : vanillanoreclaim-v1r1 : Minor Faults 35593425 708164 : Major Faults 484088 36 : Swap Ins 3772837 0 : Swap Outs 3932295 0 : : Massive amounts of swap in/out without the patch : : Direct pages scanned 6013214 0 : Kswapd pages scanned 0 0 : Kswapd pages reclaimed 0 0 : Direct pages reclaimed 4033009 0 : : Lots of reclaim activity without the patch : : Kswapd efficiency 100% 100% : Kswapd velocity 0.000 0.000 : Direct efficiency 67% 100% : Direct velocity 11191.956 0.000 : : Mostly from direct reclaim context as you'd expect without the patch. : : Page writes by reclaim 3932314.000 0.000 : Page writes file 19 0 : Page writes anon 3932295 0 : Page reclaim immediate 42336 0 : : Writes from reclaim context is never good but the patch eliminates it. : : We should never have default behaviour to thrash the system for such a : basic workload. If zone reclaim mode behaviour is ever desired but on a : single task instead of a global basis then the sensible option is to build : a mempolicy that enforces that behaviour. This was a severe regression compared to previous kernels that made important workloads unusable and it starts when __GFP_THISNODE was added to THP allocations under MADV_HUGEPAGE. It is not a significant risk to go to the previous behavior before __GFP_THISNODE was added, it worked like that for years. This was simply an optimization to some lucky workloads that can fit in a single node, but it ended up breaking the VM for others that can't possibly fit in a single node, so going back is safe. [mhocko@suse.com: rewrote the changelog based on the one from Andrea] Link: http://lkml.kernel.org/r/20180925120326.24392-2-mhocko@kernel.org Fixes: 5265047ac301 ("mm, thp: really limit transparent hugepage allocation to local node") Signed-off-by: Andrea Arcangeli Signed-off-by: Michal Hocko Reported-by: Stefan Priebe Debugged-by: Andrea Arcangeli Reported-by: Alex Williamson Reviewed-by: Mel Gorman Tested-by: Mel Gorman Cc: Zi Yan Cc: Vlastimil Babka Cc: David Rientjes Cc: "Kirill A. Shutemov" Cc: [4.1+] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- mm/mempolicy.c | 32 ++++++++++++++++++++++++++++++-- 1 file changed, 30 insertions(+), 2 deletions(-) diff --git a/mm/mempolicy.c b/mm/mempolicy.c index b777590c3e13..be9840bf11d1 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -2010,8 +2010,36 @@ alloc_pages_vma(gfp_t gfp, int order, struct vm_area_struct *vma, nmask = policy_nodemask(gfp, pol); if (!nmask || node_isset(hpage_node, *nmask)) { mpol_cond_put(pol); - page = __alloc_pages_node(hpage_node, - gfp | __GFP_THISNODE, order); + /* + * We cannot invoke reclaim if __GFP_THISNODE + * is set. Invoking reclaim with + * __GFP_THISNODE set, would cause THP + * allocations to trigger heavy swapping + * despite there may be tons of free memory + * (including potentially plenty of THP + * already available in the buddy) on all the + * other NUMA nodes. + * + * At most we could invoke compaction when + * __GFP_THISNODE is set (but we would need to + * refrain from invoking reclaim even if + * compaction returned COMPACT_SKIPPED because + * there wasn't not enough memory to succeed + * compaction). For now just avoid + * __GFP_THISNODE instead of limiting the + * allocation path to a strict and single + * compaction invocation. + * + * Supposedly if direct reclaim was enabled by + * the caller, the app prefers THP regardless + * of the node it comes from so this would be + * more desiderable behavior than only + * providing THP originated from the local + * node in such case. + */ + if (!(gfp & __GFP_DIRECT_RECLAIM)) + gfp |= __GFP_THISNODE; + page = __alloc_pages_node(hpage_node, gfp, order); goto out; } } From c7d2166d802e7b0a5d55c0fac117447d004b11a1 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 11 Oct 2018 13:06:16 +0200 Subject: [PATCH 496/812] mtd: docg3: don't set conflicting BCH_CONST_PARAMS option commit be2e1c9dcf76886a83fb1c433a316e26d4ca2550 upstream. I noticed during the creation of another bugfix that the BCH_CONST_PARAMS option that is set by DOCG3 breaks setting variable parameters for any other users of the BCH library code. The only other user we have today is the MTD_NAND software BCH implementation (most flash controllers use hardware BCH these days and are not affected). I considered removing BCH_CONST_PARAMS entirely because of the inherent conflict, but according to the description in lib/bch.c there is a significant performance benefit in keeping it. To avoid the immediate problem of the conflict between MTD_NAND_BCH and DOCG3, this only sets the constant parameters if MTD_NAND_BCH is disabled, which should fix the problem for all cases that are affected. This should also work for all stable kernels. Note that there is only one machine that actually seems to use the DOCG3 driver (arch/arm/mach-pxa/mioa701.c), so most users should have the driver disabled, but it almost certainly shows up if we wanted to test random kernels on machines that use software BCH in MTD. Fixes: d13d19ece39f ("mtd: docg3: add ECC correction code") Cc: stable@vger.kernel.org Cc: Robert Jarzmik Signed-off-by: Arnd Bergmann Signed-off-by: Boris Brezillon Signed-off-by: Greg Kroah-Hartman --- drivers/mtd/devices/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/mtd/devices/Kconfig b/drivers/mtd/devices/Kconfig index f73c41697a00..5ab9a46daf06 100644 --- a/drivers/mtd/devices/Kconfig +++ b/drivers/mtd/devices/Kconfig @@ -208,7 +208,7 @@ comment "Disk-On-Chip Device Drivers" config MTD_DOCG3 tristate "M-Systems Disk-On-Chip G3" select BCH - select BCH_CONST_PARAMS + select BCH_CONST_PARAMS if !MTD_NAND_BCH select BITREVERSE ---help--- This provides an MTD device driver for the M-Systems DiskOnChip From 505bc0f3c125f985f4482cacf009437bc26932f9 Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Mon, 22 Oct 2018 09:19:04 -0700 Subject: [PATCH 497/812] termios, tty/tty_baudrate.c: fix buffer overrun commit 991a25194097006ec1e0d2e0814ff920e59e3465 upstream. On architectures with CBAUDEX == 0 (Alpha and PowerPC), the code in tty_baudrate.c does not do any limit checking on the tty_baudrate[] array, and in fact a buffer overrun is possible on both architectures. Add a limit check to prevent that situation. This will be followed by a much bigger cleanup/simplification patch. Signed-off-by: H. Peter Anvin (Intel) Requested-by: Cc: Johan Hovold Cc: Jiri Slaby Cc: Al Viro Cc: Richard Henderson Cc: Ivan Kokshaysky Cc: Matt Turner Cc: Thomas Gleixner Cc: Kate Stewart Cc: Philippe Ombredanne Cc: Eugene Syromiatnikov Cc: Alan Cox Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/tty/tty_ioctl.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/tty/tty_ioctl.c b/drivers/tty/tty_ioctl.c index 1445dd39aa62..bece7e39f512 100644 --- a/drivers/tty/tty_ioctl.c +++ b/drivers/tty/tty_ioctl.c @@ -330,7 +330,7 @@ speed_t tty_termios_baud_rate(struct ktermios *termios) else cbaud += 15; } - return baud_table[cbaud]; + return cbaud >= n_baud_table ? 0 : baud_table[cbaud]; } EXPORT_SYMBOL(tty_termios_baud_rate); @@ -366,7 +366,7 @@ speed_t tty_termios_input_baud_rate(struct ktermios *termios) else cbaud += 15; } - return baud_table[cbaud]; + return cbaud >= n_baud_table ? 0 : baud_table[cbaud]; #else return tty_termios_baud_rate(termios); #endif From d2db9e2ee15f44173838b52bd0cdbfdea9f21583 Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin (Intel)" Date: Mon, 22 Oct 2018 09:19:05 -0700 Subject: [PATCH 498/812] arch/alpha, termios: implement BOTHER, IBSHIFT and termios2 commit d0ffb805b729322626639336986bc83fc2e60871 upstream. Alpha has had c_ispeed and c_ospeed, but still set speeds in c_cflags using arbitrary flags. Because BOTHER is not defined, the general Linux code doesn't allow setting arbitrary baud rates, and because CBAUDEX == 0, we can have an array overrun of the baud_rate[] table in drivers/tty/tty_baudrate.c if (c_cflags & CBAUD) == 037. Resolve both problems by #defining BOTHER to 037 on Alpha. However, userspace still needs to know if setting BOTHER is actually safe given legacy kernels (does anyone actually care about that on Alpha anymore?), so enable the TCGETS2/TCSETS*2 ioctls on Alpha, even though they use the same structure. Define struct termios2 just for compatibility; it is the exact same structure as struct termios. In a future patchset, this will be cleaned up so the uapi headers are usable from libc. Signed-off-by: H. Peter Anvin (Intel) Cc: Jiri Slaby Cc: Al Viro Cc: Richard Henderson Cc: Ivan Kokshaysky Cc: Matt Turner Cc: Thomas Gleixner Cc: Kate Stewart Cc: Philippe Ombredanne Cc: Eugene Syromiatnikov Cc: Cc: Cc: Johan Hovold Cc: Alan Cox Cc: Signed-off-by: Greg Kroah-Hartman --- arch/alpha/include/asm/termios.h | 8 +++++++- arch/alpha/include/uapi/asm/ioctls.h | 5 +++++ arch/alpha/include/uapi/asm/termbits.h | 17 +++++++++++++++++ 3 files changed, 29 insertions(+), 1 deletion(-) diff --git a/arch/alpha/include/asm/termios.h b/arch/alpha/include/asm/termios.h index 7fde0f88da88..51ed90be770a 100644 --- a/arch/alpha/include/asm/termios.h +++ b/arch/alpha/include/asm/termios.h @@ -72,9 +72,15 @@ }) #define user_termios_to_kernel_termios(k, u) \ - copy_from_user(k, u, sizeof(struct termios)) + copy_from_user(k, u, sizeof(struct termios2)) #define kernel_termios_to_user_termios(u, k) \ + copy_to_user(u, k, sizeof(struct termios2)) + +#define user_termios_to_kernel_termios_1(k, u) \ + copy_from_user(k, u, sizeof(struct termios)) + +#define kernel_termios_to_user_termios_1(u, k) \ copy_to_user(u, k, sizeof(struct termios)) #endif /* _ALPHA_TERMIOS_H */ diff --git a/arch/alpha/include/uapi/asm/ioctls.h b/arch/alpha/include/uapi/asm/ioctls.h index f30c94ae1bdb..7ee8ab577e11 100644 --- a/arch/alpha/include/uapi/asm/ioctls.h +++ b/arch/alpha/include/uapi/asm/ioctls.h @@ -31,6 +31,11 @@ #define TCXONC _IO('t', 30) #define TCFLSH _IO('t', 31) +#define TCGETS2 _IOR('T', 42, struct termios2) +#define TCSETS2 _IOW('T', 43, struct termios2) +#define TCSETSW2 _IOW('T', 44, struct termios2) +#define TCSETSF2 _IOW('T', 45, struct termios2) + #define TIOCSWINSZ _IOW('t', 103, struct winsize) #define TIOCGWINSZ _IOR('t', 104, struct winsize) #define TIOCSTART _IO('t', 110) /* start output, like ^Q */ diff --git a/arch/alpha/include/uapi/asm/termbits.h b/arch/alpha/include/uapi/asm/termbits.h index 879dd3589921..483c7ec2a879 100644 --- a/arch/alpha/include/uapi/asm/termbits.h +++ b/arch/alpha/include/uapi/asm/termbits.h @@ -25,6 +25,19 @@ struct termios { speed_t c_ospeed; /* output speed */ }; +/* Alpha has identical termios and termios2 */ + +struct termios2 { + tcflag_t c_iflag; /* input mode flags */ + tcflag_t c_oflag; /* output mode flags */ + tcflag_t c_cflag; /* control mode flags */ + tcflag_t c_lflag; /* local mode flags */ + cc_t c_cc[NCCS]; /* control characters */ + cc_t c_line; /* line discipline (== c_cc[19]) */ + speed_t c_ispeed; /* input speed */ + speed_t c_ospeed; /* output speed */ +}; + /* Alpha has matching termios and ktermios */ struct ktermios { @@ -147,6 +160,7 @@ struct ktermios { #define B3000000 00034 #define B3500000 00035 #define B4000000 00036 +#define BOTHER 00037 #define CSIZE 00001400 #define CS5 00000000 @@ -164,6 +178,9 @@ struct ktermios { #define CMSPAR 010000000000 /* mark or space (stick) parity */ #define CRTSCTS 020000000000 /* flow control */ +#define CIBAUD 07600000 +#define IBSHIFT 16 + /* c_lflag bits */ #define ISIG 0x00000080 #define ICANON 0x00000100 From f02c3c34bda23e96df909d23625073539db19e3d Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Mon, 5 Nov 2018 11:14:17 +0000 Subject: [PATCH 499/812] Btrfs: fix data corruption due to cloning of eof block commit ac765f83f1397646c11092a032d4f62c3d478b81 upstream. We currently allow cloning a range from a file which includes the last block of the file even if the file's size is not aligned to the block size. This is fine and useful when the destination file has the same size, but when it does not and the range ends somewhere in the middle of the destination file, it leads to corruption because the bytes between the EOF and the end of the block have undefined data (when there is support for discard/trimming they have a value of 0x00). Example: $ mkfs.btrfs -f /dev/sdb $ mount /dev/sdb /mnt $ export foo_size=$((256 * 1024 + 100)) $ xfs_io -f -c "pwrite -S 0x3c 0 $foo_size" /mnt/foo $ xfs_io -f -c "pwrite -S 0xb5 0 1M" /mnt/bar $ xfs_io -c "reflink /mnt/foo 0 512K $foo_size" /mnt/bar $ od -A d -t x1 /mnt/bar 0000000 b5 b5 b5 b5 b5 b5 b5 b5 b5 b5 b5 b5 b5 b5 b5 b5 * 0524288 3c 3c 3c 3c 3c 3c 3c 3c 3c 3c 3c 3c 3c 3c 3c 3c * 0786528 3c 3c 3c 3c 00 00 00 00 00 00 00 00 00 00 00 00 0786544 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 * 0790528 b5 b5 b5 b5 b5 b5 b5 b5 b5 b5 b5 b5 b5 b5 b5 b5 * 1048576 The bytes in the range from 786532 (512Kb + 256Kb + 100 bytes) to 790527 (512Kb + 256Kb + 4Kb - 1) got corrupted, having now a value of 0x00 instead of 0xb5. This is similar to the problem we had for deduplication that got recently fixed by commit de02b9f6bb65 ("Btrfs: fix data corruption when deduplicating between different files"). Fix this by not allowing such operations to be performed and return the errno -EINVAL to user space. This is what XFS is doing as well at the VFS level. This change however now makes us return -EINVAL instead of -EOPNOTSUPP for cases where the source range maps to an inline extent and the destination range's end is smaller then the destination file's size, since the detection of inline extents is done during the actual process of dropping file extent items (at __btrfs_drop_extents()). Returning the -EINVAL error is done early on and solely based on the input parameters (offsets and length) and destination file's size. This makes us consistent with XFS and anyone else supporting cloning since this case is now checked at a higher level in the VFS and is where the -EINVAL will be returned from starting with kernel 4.20 (the VFS changed was introduced in 4.20-rc1 by commit 07d19dc9fbe9 ("vfs: avoid problematic remapping requests into partial EOF block"). So this change is more geared towards stable kernels, as it's unlikely the new VFS checks get removed intentionally. A test case for fstests follows soon, as well as an update to filter existing tests that expect -EOPNOTSUPP to accept -EINVAL as well. CC: # 4.4+ Signed-off-by: Filipe Manana Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/ioctl.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 6caeb946fc1d..150d3c891815 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -3950,9 +3950,17 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd, goto out_unlock; if (len == 0) olen = len = src->i_size - off; - /* if we extend to eof, continue to block boundary */ - if (off + len == src->i_size) + /* + * If we extend to eof, continue to block boundary if and only if the + * destination end offset matches the destination file's size, otherwise + * we would be corrupting data by placing the eof block into the middle + * of a file. + */ + if (off + len == src->i_size) { + if (!IS_ALIGNED(len, bs) && destoff + len < inode->i_size) + goto out_unlock; len = ALIGN(src->i_size, bs) - off; + } if (len == 0) { ret = 0; From 989e44d6d2688a9bb162589a719410d6e7898502 Mon Sep 17 00:00:00 2001 From: Michael Kelley Date: Sun, 4 Nov 2018 03:48:54 +0000 Subject: [PATCH 500/812] clockevents/drivers/i8253: Add support for PIT shutdown quirk commit 35b69a420bfb56b7b74cb635ea903db05e357bec upstream. Add support for platforms where pit_shutdown() doesn't work because of a quirk in the PIT emulation. On these platforms setting the counter register to zero causes the PIT to start running again, negating the shutdown. Provide a global variable that controls whether the counter register is zero'ed, which platform specific code can override. Signed-off-by: Michael Kelley Signed-off-by: Thomas Gleixner Cc: "gregkh@linuxfoundation.org" Cc: "devel@linuxdriverproject.org" Cc: "daniel.lezcano@linaro.org" Cc: "virtualization@lists.linux-foundation.org" Cc: "jgross@suse.com" Cc: "akataria@vmware.com" Cc: "olaf@aepfle.de" Cc: "apw@canonical.com" Cc: vkuznets Cc: "jasowang@redhat.com" Cc: "marcelo.cerri@canonical.com" Cc: KY Srinivasan Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/1541303219-11142-2-git-send-email-mikelley@microsoft.com Signed-off-by: Greg Kroah-Hartman --- drivers/clocksource/i8253.c | 14 ++++++++++++-- include/linux/i8253.h | 1 + 2 files changed, 13 insertions(+), 2 deletions(-) diff --git a/drivers/clocksource/i8253.c b/drivers/clocksource/i8253.c index 0efd36e483ab..60c8a9bd562d 100644 --- a/drivers/clocksource/i8253.c +++ b/drivers/clocksource/i8253.c @@ -19,6 +19,13 @@ DEFINE_RAW_SPINLOCK(i8253_lock); EXPORT_SYMBOL(i8253_lock); +/* + * Handle PIT quirk in pit_shutdown() where zeroing the counter register + * restarts the PIT, negating the shutdown. On platforms with the quirk, + * platform specific code can set this to false. + */ +bool i8253_clear_counter_on_shutdown = true; + #ifdef CONFIG_CLKSRC_I8253 /* * Since the PIT overflows every tick, its not very useful @@ -108,8 +115,11 @@ static int pit_shutdown(struct clock_event_device *evt) raw_spin_lock(&i8253_lock); outb_p(0x30, PIT_MODE); - outb_p(0, PIT_CH0); - outb_p(0, PIT_CH0); + + if (i8253_clear_counter_on_shutdown) { + outb_p(0, PIT_CH0); + outb_p(0, PIT_CH0); + } raw_spin_unlock(&i8253_lock); return 0; diff --git a/include/linux/i8253.h b/include/linux/i8253.h index e6bb36a97519..8336b2f6f834 100644 --- a/include/linux/i8253.h +++ b/include/linux/i8253.h @@ -21,6 +21,7 @@ #define PIT_LATCH ((PIT_TICK_RATE + HZ/2) / HZ) extern raw_spinlock_t i8253_lock; +extern bool i8253_clear_counter_on_shutdown; extern struct clock_event_device i8253_clockevent; extern void clockevent_i8253_init(bool oneshot); From 2eefc69471f17b4b65fc8ba8356b9ee3a7c523d0 Mon Sep 17 00:00:00 2001 From: Vasily Averin Date: Sat, 3 Nov 2018 17:11:19 -0400 Subject: [PATCH 501/812] ext4: add missing brelse() update_backups()'s error path commit ea0abbb648452cdb6e1734b702b6330a7448fcf8 upstream. Fixes: ac27a0ec112a ("ext4: initial copy of files from ext3") Signed-off-by: Vasily Averin Signed-off-by: Theodore Ts'o Cc: stable@kernel.org # 2.6.19 Signed-off-by: Greg Kroah-Hartman --- fs/ext4/resize.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c index 783280ebc2fe..550eb2dfe357 100644 --- a/fs/ext4/resize.c +++ b/fs/ext4/resize.c @@ -1095,8 +1095,10 @@ static void update_backups(struct super_block *sb, sector_t blk_off, char *data, backup_block, backup_block - ext4_group_first_block_no(sb, group)); BUFFER_TRACE(bh, "get_write_access"); - if ((err = ext4_journal_get_write_access(handle, bh))) + if ((err = ext4_journal_get_write_access(handle, bh))) { + brelse(bh); break; + } lock_buffer(bh); memcpy(bh->b_data, data, size); if (rest) From 88cc2d51d221bf5bbc3622a47b07ef0947cdbfa1 Mon Sep 17 00:00:00 2001 From: Vasily Averin Date: Sat, 3 Nov 2018 16:22:10 -0400 Subject: [PATCH 502/812] ext4: add missing brelse() in set_flexbg_block_bitmap()'s error path commit cea5794122125bf67559906a0762186cf417099c upstream. Fixes: 33afdcc5402d ("ext4: add a function which sets up group blocks ...") Cc: stable@kernel.org # 3.3 Signed-off-by: Vasily Averin Signed-off-by: Theodore Ts'o Signed-off-by: Greg Kroah-Hartman --- fs/ext4/resize.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c index 550eb2dfe357..9646d4b325cb 100644 --- a/fs/ext4/resize.c +++ b/fs/ext4/resize.c @@ -442,16 +442,18 @@ static int set_flexbg_block_bitmap(struct super_block *sb, handle_t *handle, BUFFER_TRACE(bh, "get_write_access"); err = ext4_journal_get_write_access(handle, bh); - if (err) + if (err) { + brelse(bh); return err; + } ext4_debug("mark block bitmap %#04llx (+%llu/%u)\n", block, block - start, count2); ext4_set_bits(bh->b_data, block - start, count2); err = ext4_handle_dirty_metadata(handle, NULL, bh); + brelse(bh); if (unlikely(err)) return err; - brelse(bh); } return 0; From 47aa49f64f0f2b8db7de8eba2e89490e8371fd7e Mon Sep 17 00:00:00 2001 From: Vasily Averin Date: Sat, 3 Nov 2018 16:50:08 -0400 Subject: [PATCH 503/812] ext4: add missing brelse() add_new_gdb_meta_bg()'s error path commit 61a9c11e5e7a0dab5381afa5d9d4dd5ebf18f7a0 upstream. Fixes: 01f795f9e0d6 ("ext4: add online resizing support for meta_bg ...") Signed-off-by: Vasily Averin Signed-off-by: Theodore Ts'o Cc: stable@kernel.org # 3.7 Signed-off-by: Greg Kroah-Hartman --- fs/ext4/resize.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c index 9646d4b325cb..f87dc9e4999d 100644 --- a/fs/ext4/resize.c +++ b/fs/ext4/resize.c @@ -899,6 +899,7 @@ static int add_new_gdb_meta_bg(struct super_block *sb, sizeof(struct buffer_head *), GFP_NOFS); if (!n_group_desc) { + brelse(gdb_bh); err = -ENOMEM; ext4_warning(sb, "not enough memory for %lu groups", gdb_num + 1); @@ -914,8 +915,6 @@ static int add_new_gdb_meta_bg(struct super_block *sb, kvfree(o_group_desc); BUFFER_TRACE(gdb_bh, "get_write_access"); err = ext4_journal_get_write_access(handle, gdb_bh); - if (unlikely(err)) - brelse(gdb_bh); return err; } From d507dfb5e4fe3e5b01259055c7a4fcaba114f89e Mon Sep 17 00:00:00 2001 From: Vasily Averin Date: Sat, 3 Nov 2018 16:13:17 -0400 Subject: [PATCH 504/812] ext4: avoid potential extra brelse in setup_new_flex_group_blocks() commit 9e4028935cca3f9ef9b6a90df9da6f1f94853536 upstream. Currently bh is set to NULL only during first iteration of for cycle, then this pointer is not cleared after end of using. Therefore rollback after errors can lead to extra brelse(bh) call, decrements bh counter and later trigger an unexpected warning in __brelse() Patch moves brelse() calls in body of cycle to exclude requirement of brelse() call in rollback. Fixes: 33afdcc5402d ("ext4: add a function which sets up group blocks ...") Signed-off-by: Vasily Averin Signed-off-by: Theodore Ts'o Cc: stable@kernel.org # 3.3+ Signed-off-by: Greg Kroah-Hartman --- fs/ext4/resize.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c index f87dc9e4999d..37f275375089 100644 --- a/fs/ext4/resize.c +++ b/fs/ext4/resize.c @@ -590,7 +590,6 @@ static int setup_new_flex_group_blocks(struct super_block *sb, bh = bclean(handle, sb, block); if (IS_ERR(bh)) { err = PTR_ERR(bh); - bh = NULL; goto out; } overhead = ext4_group_overhead_blocks(sb, group); @@ -602,9 +601,9 @@ static int setup_new_flex_group_blocks(struct super_block *sb, ext4_mark_bitmap_end(group_data[i].blocks_count, sb->s_blocksize * 8, bh->b_data); err = ext4_handle_dirty_metadata(handle, NULL, bh); + brelse(bh); if (err) goto out; - brelse(bh); handle_ib: if (bg_flags[i] & EXT4_BG_INODE_UNINIT) @@ -619,18 +618,16 @@ static int setup_new_flex_group_blocks(struct super_block *sb, bh = bclean(handle, sb, block); if (IS_ERR(bh)) { err = PTR_ERR(bh); - bh = NULL; goto out; } ext4_mark_bitmap_end(EXT4_INODES_PER_GROUP(sb), sb->s_blocksize * 8, bh->b_data); err = ext4_handle_dirty_metadata(handle, NULL, bh); + brelse(bh); if (err) goto out; - brelse(bh); } - bh = NULL; /* Mark group tables in block bitmap */ for (j = 0; j < GROUP_TABLE_COUNT; j++) { @@ -661,7 +658,6 @@ static int setup_new_flex_group_blocks(struct super_block *sb, } out: - brelse(bh); err2 = ext4_journal_stop(handle); if (err2 && !err) err = err2; From 36ab0ab9b88d85fcb7b6a86db4b13cbbd52c208a Mon Sep 17 00:00:00 2001 From: Vasily Averin Date: Tue, 6 Nov 2018 16:20:40 -0500 Subject: [PATCH 505/812] ext4: fix possible inode leak in the retry loop of ext4_resize_fs() commit db6aee62406d9fbb53315fcddd81f1dc271d49fa upstream. Fixes: 1c6bd7173d66 ("ext4: convert file system to meta_bg if needed ...") Signed-off-by: Vasily Averin Signed-off-by: Theodore Ts'o Cc: stable@kernel.org # 3.7 Signed-off-by: Greg Kroah-Hartman --- fs/ext4/resize.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c index 37f275375089..52c28d37112e 100644 --- a/fs/ext4/resize.c +++ b/fs/ext4/resize.c @@ -2026,6 +2026,10 @@ int ext4_resize_fs(struct super_block *sb, ext4_fsblk_t n_blocks_count) n_blocks_count_retry = 0; free_flex_gd(flex_gd); flex_gd = NULL; + if (resize_inode) { + iput(resize_inode); + resize_inode = NULL; + } goto retry; } From e85d624f7164b51b3a9c081d0d9d7e3b5187dab8 Mon Sep 17 00:00:00 2001 From: Vasily Averin Date: Tue, 6 Nov 2018 17:01:36 -0500 Subject: [PATCH 506/812] ext4: avoid buffer leak in ext4_orphan_add() after prior errors commit feaf264ce7f8d54582e2f66eb82dd9dd124c94f3 upstream. Fixes: d745a8c20c1f ("ext4: reduce contention on s_orphan_lock") Fixes: 6e3617e579e0 ("ext4: Handle non empty on-disk orphan link") Cc: Dmitry Monakhov Signed-off-by: Vasily Averin Signed-off-by: Theodore Ts'o Cc: stable@kernel.org # 2.6.34 Signed-off-by: Greg Kroah-Hartman --- fs/ext4/namei.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index a1f1e53d0e25..f6a63029b24d 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c @@ -2830,7 +2830,9 @@ int ext4_orphan_add(handle_t *handle, struct inode *inode) list_del_init(&EXT4_I(inode)->i_orphan); mutex_unlock(&sbi->s_orphan_lock); } - } + } else + brelse(iloc.bh); + jbd_debug(4, "superblock will point to %lu\n", inode->i_ino); jbd_debug(4, "orphan inode %lu will point to %d\n", inode->i_ino, NEXT_ORPHAN(inode)); From 7f168837b121b47de9ce22201293819d65f5d6ac Mon Sep 17 00:00:00 2001 From: Vasily Averin Date: Tue, 6 Nov 2018 16:16:01 -0500 Subject: [PATCH 507/812] ext4: fix missing cleanup if ext4_alloc_flex_bg_array() fails while resizing commit f348e2241fb73515d65b5d77dd9c174128a7fbf2 upstream. Fixes: 117fff10d7f1 ("ext4: grow the s_flex_groups array as needed ...") Signed-off-by: Vasily Averin Signed-off-by: Theodore Ts'o Cc: stable@kernel.org # 3.7 Signed-off-by: Greg Kroah-Hartman --- fs/ext4/resize.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c index 52c28d37112e..82df738e5403 100644 --- a/fs/ext4/resize.c +++ b/fs/ext4/resize.c @@ -1990,7 +1990,7 @@ int ext4_resize_fs(struct super_block *sb, ext4_fsblk_t n_blocks_count) err = ext4_alloc_flex_bg_array(sb, n_group + 1); if (err) - return err; + goto out; err = ext4_mb_alloc_groupinfo(sb, n_group + 1); if (err) From 6371a05a1580651c8cd21bb1e49c4985f702c63d Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Tue, 6 Nov 2018 17:18:17 -0500 Subject: [PATCH 508/812] ext4: avoid possible double brelse() in add_new_gdb() on error path commit 4f32c38b4662312dd3c5f113d8bdd459887fb773 upstream. Fixes: b40971426a83 ("ext4: add error checking to calls to ...") Reported-by: Vasily Averin Signed-off-by: Theodore Ts'o Cc: stable@kernel.org # 2.6.38 Signed-off-by: Greg Kroah-Hartman --- fs/ext4/resize.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c index 82df738e5403..bad13f049fb0 100644 --- a/fs/ext4/resize.c +++ b/fs/ext4/resize.c @@ -844,6 +844,7 @@ static int add_new_gdb(handle_t *handle, struct inode *inode, err = ext4_handle_dirty_metadata(handle, NULL, gdb_bh); if (unlikely(err)) { ext4_std_error(sb, err); + iloc.bh = NULL; goto exit_inode; } brelse(dind); From 761ff77def544b52954334db2923feb1a947cebf Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Wed, 7 Nov 2018 10:32:53 -0500 Subject: [PATCH 509/812] ext4: fix possible leak of sbi->s_group_desc_leak in error path commit 9e463084cdb22e0b56b2dfbc50461020409a5fd3 upstream. Fixes: bfe0a5f47ada ("ext4: add more mount time checks of the superblock") Reported-by: Vasily Averin Signed-off-by: Theodore Ts'o Cc: stable@kernel.org # 4.18 Signed-off-by: Greg Kroah-Hartman --- fs/ext4/super.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/fs/ext4/super.c b/fs/ext4/super.c index a3d905abbaa9..cd9cd581fd92 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -3731,6 +3731,14 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) sbi->s_groups_count = blocks_count; sbi->s_blockfile_groups = min_t(ext4_group_t, sbi->s_groups_count, (EXT4_MAX_BLOCK_FILE_PHYS / EXT4_BLOCKS_PER_GROUP(sb))); + if (((u64)sbi->s_groups_count * sbi->s_inodes_per_group) != + le32_to_cpu(es->s_inodes_count)) { + ext4_msg(sb, KERN_ERR, "inodes count not valid: %u vs %llu", + le32_to_cpu(es->s_inodes_count), + ((u64)sbi->s_groups_count * sbi->s_inodes_per_group)); + ret = -EINVAL; + goto failed_mount; + } db_count = (sbi->s_groups_count + EXT4_DESC_PER_BLOCK(sb) - 1) / EXT4_DESC_PER_BLOCK(sb); if (ext4_has_feature_meta_bg(sb)) { @@ -3750,14 +3758,6 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) ret = -ENOMEM; goto failed_mount; } - if (((u64)sbi->s_groups_count * sbi->s_inodes_per_group) != - le32_to_cpu(es->s_inodes_count)) { - ext4_msg(sb, KERN_ERR, "inodes count not valid: %u vs %llu", - le32_to_cpu(es->s_inodes_count), - ((u64)sbi->s_groups_count * sbi->s_inodes_per_group)); - ret = -EINVAL; - goto failed_mount; - } bgl_lock_init(sbi->s_blockgroup_lock); From 0d9902d43fb696d460c29ef4ee0b86ce6ddc9a15 Mon Sep 17 00:00:00 2001 From: Vasily Averin Date: Wed, 7 Nov 2018 11:07:01 -0500 Subject: [PATCH 510/812] ext4: release bs.bh before re-using in ext4_xattr_block_find() commit 45ae932d246f721e6584430017176cbcadfde610 upstream. bs.bh was taken in previous ext4_xattr_block_find() call, it should be released before re-using Fixes: 7e01c8e5420b ("ext3/4: fix uninitialized bs in ...") Signed-off-by: Vasily Averin Signed-off-by: Theodore Ts'o Cc: stable@kernel.org # 2.6.26 Signed-off-by: Greg Kroah-Hartman --- fs/ext4/xattr.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c index 16cf24dc9c34..1022c17fc1a1 100644 --- a/fs/ext4/xattr.c +++ b/fs/ext4/xattr.c @@ -1161,6 +1161,8 @@ ext4_xattr_set_handle(handle_t *handle, struct inode *inode, int name_index, error = ext4_xattr_block_set(handle, inode, &i, &bs); } else if (error == -ENOSPC) { if (EXT4_I(inode)->i_file_acl && !bs.s.base) { + brelse(bs.bh); + bs.bh = NULL; error = ext4_xattr_block_find(inode, &i, &bs); if (error) goto cleanup; From 27acd8f869ba92e70408168ed2bed504a99b154c Mon Sep 17 00:00:00 2001 From: Vasily Averin Date: Wed, 7 Nov 2018 11:10:21 -0500 Subject: [PATCH 511/812] ext4: fix buffer leak in ext4_xattr_move_to_block() on error path commit 6bdc9977fcdedf47118d2caf7270a19f4b6d8a8f upstream. Fixes: 3f2571c1f91f ("ext4: factor out xattr moving") Fixes: 6dd4ee7cab7e ("ext4: Expand extra_inodes space per ...") Reviewed-by: Jan Kara Signed-off-by: Vasily Averin Signed-off-by: Theodore Ts'o Cc: stable@kernel.org # 2.6.23 Signed-off-by: Greg Kroah-Hartman --- fs/ext4/xattr.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c index 1022c17fc1a1..53679716baca 100644 --- a/fs/ext4/xattr.c +++ b/fs/ext4/xattr.c @@ -1490,6 +1490,8 @@ int ext4_expand_extra_isize_ea(struct inode *inode, int new_extra_isize, kfree(buffer); if (is) brelse(is->iloc.bh); + if (bs) + brelse(bs->bh); kfree(is); kfree(bs); brelse(bh); From 954ff0468f8bc17baed4673b2458cf8377f0b9ea Mon Sep 17 00:00:00 2001 From: Vasily Averin Date: Wed, 7 Nov 2018 22:36:23 -0500 Subject: [PATCH 512/812] ext4: fix buffer leak in __ext4_read_dirblock() on error path commit de59fae0043f07de5d25e02ca360f7d57bfa5866 upstream. Fixes: dc6982ff4db1 ("ext4: refactor code to read directory blocks ...") Signed-off-by: Vasily Averin Signed-off-by: Theodore Ts'o Cc: stable@kernel.org # 3.9 Signed-off-by: Greg Kroah-Hartman --- fs/ext4/namei.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index f6a63029b24d..aa08e129149d 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c @@ -124,6 +124,7 @@ static struct buffer_head *__ext4_read_dirblock(struct inode *inode, if (!is_dx_block && type == INDEX) { ext4_error_inode(inode, func, line, block, "directory leaf block found instead of index block"); + brelse(bh); return ERR_PTR(-EFSCORRUPTED); } if (!ext4_has_metadata_csum(inode->i_sb) || From 5becfca9f9714df49b06562b3b6ab434bbcfd3ae Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Mon, 22 Oct 2018 10:21:38 -0500 Subject: [PATCH 513/812] mount: Retest MNT_LOCKED in do_umount commit 25d202ed820ee347edec0bf3bf553544556bf64b upstream. It was recently pointed out that the one instance of testing MNT_LOCKED outside of the namespace_sem is in ksys_umount. Fix that by adding a test inside of do_umount with namespace_sem and the mount_lock held. As it helps to fail fails the existing test is maintained with an additional comment pointing out that it may be racy because the locks are not held. Cc: stable@vger.kernel.org Reported-by: Al Viro Fixes: 5ff9d8a65ce8 ("vfs: Lock in place mounts from more privileged users") Signed-off-by: "Eric W. Biederman" Signed-off-by: Greg Kroah-Hartman --- fs/namespace.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/fs/namespace.c b/fs/namespace.c index b56b50e3da11..416496e2f648 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -1584,8 +1584,13 @@ static int do_umount(struct mount *mnt, int flags) namespace_lock(); lock_mount_hash(); - event++; + /* Recheck MNT_LOCKED with the locks held */ + retval = -EINVAL; + if (mnt->mnt.mnt_flags & MNT_LOCKED) + goto out; + + event++; if (flags & MNT_DETACH) { if (!list_empty(&mnt->mnt_list)) umount_tree(mnt, UMOUNT_PROPAGATE); @@ -1599,6 +1604,7 @@ static int do_umount(struct mount *mnt, int flags) retval = 0; } } +out: unlock_mount_hash(); namespace_unlock(); return retval; @@ -1681,7 +1687,7 @@ SYSCALL_DEFINE2(umount, char __user *, name, int, flags) goto dput_and_out; if (!check_mnt(mnt)) goto dput_and_out; - if (mnt->mnt.mnt_flags & MNT_LOCKED) + if (mnt->mnt.mnt_flags & MNT_LOCKED) /* Check optimistically */ goto dput_and_out; retval = -EPERM; if (flags & MNT_FORCE && !capable(CAP_SYS_ADMIN)) From 6258099fd2c9573cde4e3201bef5ef41a56618c5 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Thu, 25 Oct 2018 09:04:18 -0500 Subject: [PATCH 514/812] mount: Don't allow copying MNT_UNBINDABLE|MNT_LOCKED mounts commit df7342b240185d58d3d9665c0bbf0a0f5570ec29 upstream. Jonathan Calmels from NVIDIA reported that he's able to bypass the mount visibility security check in place in the Linux kernel by using a combination of the unbindable property along with the private mount propagation option to allow a unprivileged user to see a path which was purposefully hidden by the root user. Reproducer: # Hide a path to all users using a tmpfs root@castiana:~# mount -t tmpfs tmpfs /sys/devices/ root@castiana:~# # As an unprivileged user, unshare user namespace and mount namespace stgraber@castiana:~$ unshare -U -m -r # Confirm the path is still not accessible root@castiana:~# ls /sys/devices/ # Make /sys recursively unbindable and private root@castiana:~# mount --make-runbindable /sys root@castiana:~# mount --make-private /sys # Recursively bind-mount the rest of /sys over to /mnnt root@castiana:~# mount --rbind /sys/ /mnt # Access our hidden /sys/device as an unprivileged user root@castiana:~# ls /mnt/devices/ breakpoint cpu cstate_core cstate_pkg i915 intel_pt isa kprobe LNXSYSTM:00 msr pci0000:00 platform pnp0 power software system tracepoint uncore_arb uncore_cbox_0 uncore_cbox_1 uprobe virtual Solve this by teaching copy_tree to fail if a mount turns out to be both unbindable and locked. Cc: stable@vger.kernel.org Fixes: 5ff9d8a65ce8 ("vfs: Lock in place mounts from more privileged users") Reported-by: Jonathan Calmels Signed-off-by: "Eric W. Biederman" Signed-off-by: Greg Kroah-Hartman --- fs/namespace.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/fs/namespace.c b/fs/namespace.c index 416496e2f648..2d718d552a37 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -1765,8 +1765,14 @@ struct mount *copy_tree(struct mount *mnt, struct dentry *dentry, for (s = r; s; s = next_mnt(s, r)) { if (!(flag & CL_COPY_UNBINDABLE) && IS_MNT_UNBINDABLE(s)) { - s = skip_mnt_tree(s); - continue; + if (s->mnt.mnt_flags & MNT_LOCKED) { + /* Both unbindable and locked. */ + q = ERR_PTR(-EPERM); + goto out; + } else { + s = skip_mnt_tree(s); + continue; + } } if (!(flag & CL_COPY_MNT_NS_FILE) && is_mnt_ns_file(s->mnt.mnt_root)) { From f7e6ee2e18cc745f05a9fe678de65ac24a6adcfd Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Thu, 25 Oct 2018 12:05:11 -0500 Subject: [PATCH 515/812] mount: Prevent MNT_DETACH from disconnecting locked mounts commit 9c8e0a1b683525464a2abe9fb4b54404a50ed2b4 upstream. Timothy Baldwin wrote: > As per mount_namespaces(7) unprivileged users should not be able to look under mount points: > > Mounts that come as a single unit from more privileged mount are locked > together and may not be separated in a less privileged mount namespace. > > However they can: > > 1. Create a mount namespace. > 2. In the mount namespace open a file descriptor to the parent of a mount point. > 3. Destroy the mount namespace. > 4. Use the file descriptor to look under the mount point. > > I have reproduced this with Linux 4.16.18 and Linux 4.18-rc8. > > The setup: > > $ sudo sysctl kernel.unprivileged_userns_clone=1 > kernel.unprivileged_userns_clone = 1 > $ mkdir -p A/B/Secret > $ sudo mount -t tmpfs hide A/B > > > "Secret" is indeed hidden as expected: > > $ ls -lR A > A: > total 0 > drwxrwxrwt 2 root root 40 Feb 12 21:08 B > > A/B: > total 0 > > > The attack revealing "Secret": > > $ unshare -Umr sh -c "exec unshare -m ls -lR /proc/self/fd/4/ 4 /proc/self/fd/4/: > total 0 > drwxr-xr-x 3 root root 60 Feb 12 21:08 B > > /proc/self/fd/4/B: > total 0 > drwxr-xr-x 2 root root 40 Feb 12 21:08 Secret > > /proc/self/fd/4/B/Secret: > total 0 I tracked this down to put_mnt_ns running passing UMOUNT_SYNC and disconnecting all of the mounts in a mount namespace. Fix this by factoring drop_mounts out of drop_collected_mounts and passing 0 instead of UMOUNT_SYNC. There are two possible behavior differences that result from this. - No longer setting UMOUNT_SYNC will no longer set MNT_SYNC_UMOUNT on the vfsmounts being unmounted. This effects the lazy rcu walk by kicking the walk out of rcu mode and forcing it to be a non-lazy walk. - No longer disconnecting locked mounts will keep some mounts around longer as they stay because the are locked to other mounts. There are only two users of drop_collected mounts: audit_tree.c and put_mnt_ns. In audit_tree.c the mounts are private and there are no rcu lazy walks only calls to iterate_mounts. So the changes should have no effect except for a small timing effect as the connected mounts are disconnected. In put_mnt_ns there may be references from process outside the mount namespace to the mounts. So the mounts remaining connected will be the bug fix that is needed. That rcu walks are allowed to continue appears not to be a problem especially as the rcu walk change was about an implementation detail not about semantics. Cc: stable@vger.kernel.org Fixes: 5ff9d8a65ce8 ("vfs: Lock in place mounts from more privileged users") Reported-by: Timothy Baldwin Tested-by: Timothy Baldwin Signed-off-by: "Eric W. Biederman" Signed-off-by: Greg Kroah-Hartman --- fs/namespace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/namespace.c b/fs/namespace.c index 2d718d552a37..88c5d5bddf74 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -1825,7 +1825,7 @@ void drop_collected_mounts(struct vfsmount *mnt) { namespace_lock(); lock_mount_hash(); - umount_tree(real_mount(mnt), UMOUNT_SYNC); + umount_tree(real_mount(mnt), 0); unlock_mount_hash(); namespace_unlock(); } From 4293fbc271e4da53c690a1c913ad3b2252b575ab Mon Sep 17 00:00:00 2001 From: Frank Sorenson Date: Tue, 30 Oct 2018 15:10:40 -0500 Subject: [PATCH 516/812] sunrpc: correct the computation for page_ptr when truncating commit 5d7a5bcb67c70cbc904057ef52d3fcfeb24420bb upstream. When truncating the encode buffer, the page_ptr is getting advanced, causing the next page to be skipped while encoding. The page is still included in the response, so the response contains a page of bogus data. We need to adjust the page_ptr backwards to ensure we encode the next page into the correct place. We saw this triggered when concurrent directory modifications caused nfsd4_encode_direct_fattr() to return nfserr_noent, and the resulting call to xdr_truncate_encode() corrupted the READDIR reply. Signed-off-by: Frank Sorenson Cc: stable@vger.kernel.org Signed-off-by: J. Bruce Fields Signed-off-by: Greg Kroah-Hartman --- net/sunrpc/xdr.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c index 4439ac4c1b53..9b8d855e4a87 100644 --- a/net/sunrpc/xdr.c +++ b/net/sunrpc/xdr.c @@ -639,11 +639,10 @@ void xdr_truncate_encode(struct xdr_stream *xdr, size_t len) WARN_ON_ONCE(xdr->iov); return; } - if (fraglen) { + if (fraglen) xdr->end = head->iov_base + head->iov_len; - xdr->page_ptr--; - } /* (otherwise assume xdr->end is already set) */ + xdr->page_ptr--; head->iov_len = len; buf->len = len; xdr->p = head->iov_base + head->iov_len; From 453e01cc6d80b189b8dae4564dc8680093bbb2ec Mon Sep 17 00:00:00 2001 From: "Maciej W. Rozycki" Date: Mon, 5 Nov 2018 03:48:25 +0000 Subject: [PATCH 517/812] rtc: hctosys: Add missing range error reporting commit 7ce9a992ffde8ce93d5ae5767362a5c7389ae895 upstream. Fix an issue with the 32-bit range error path in `rtc_hctosys' where no error code is set and consequently the successful preceding call result from `rtc_read_time' is propagated to `rtc_hctosys_ret'. This in turn makes any subsequent call to `hctosys_show' incorrectly report in sysfs that the system time has been set from this RTC while it has not. Set the error to ERANGE then if we can't express the result due to an overflow. Signed-off-by: Maciej W. Rozycki Fixes: b3a5ac42ab18 ("rtc: hctosys: Ensure system time doesn't overflow time_t") Cc: stable@vger.kernel.org # 4.17+ Signed-off-by: Alexandre Belloni Signed-off-by: Greg Kroah-Hartman --- drivers/rtc/hctosys.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/rtc/hctosys.c b/drivers/rtc/hctosys.c index e79f2a181ad2..b9ec4a16db1f 100644 --- a/drivers/rtc/hctosys.c +++ b/drivers/rtc/hctosys.c @@ -50,8 +50,10 @@ static int __init rtc_hctosys(void) tv64.tv_sec = rtc_tm_to_time64(&tm); #if BITS_PER_LONG == 32 - if (tv64.tv_sec > INT_MAX) + if (tv64.tv_sec > INT_MAX) { + err = -ERANGE; goto err_read; + } #endif err = do_settimeofday64(&tv64); From 6023d16fdb84e849d4aa60c2bc1ea294a217d6cb Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Fri, 9 Nov 2018 15:52:16 +0100 Subject: [PATCH 518/812] fuse: fix leaked notify reply commit 7fabaf303458fcabb694999d6fa772cc13d4e217 upstream. fuse_request_send_notify_reply() may fail if the connection was reset for some reason (e.g. fs was unmounted). Don't leak request reference in this case. Besides leaking memory, this resulted in fc->num_waiting not being decremented and hence fuse_wait_aborted() left in a hanging and unkillable state. Fixes: 2d45ba381a74 ("fuse: add retrieve request") Fixes: b8f95e5d13f5 ("fuse: umount should wait for all requests") Reported-and-tested-by: syzbot+6339eda9cb4ebbc4c37b@syzkaller.appspotmail.com Signed-off-by: Miklos Szeredi Cc: #v2.6.36 Signed-off-by: Greg Kroah-Hartman --- fs/fuse/dev.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index 18e2f54737d1..e566652ac922 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c @@ -1771,8 +1771,10 @@ static int fuse_retrieve(struct fuse_conn *fc, struct inode *inode, req->in.args[1].size = total_len; err = fuse_request_send_notify_reply(fc, req, outarg->notify_unique); - if (err) + if (err) { fuse_retrieve_end(fc, req); + fuse_put_request(fc, req); + } return err; } From 9546d6609e1095af6a5c8728238234e1b6b4d85c Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Sun, 1 Jul 2018 13:56:54 -0700 Subject: [PATCH 519/812] configfs: replace strncpy with memcpy commit 1823342a1f2b47a4e6f5667f67cd28ab6bc4d6cd upstream. gcc 8.1.0 complains: fs/configfs/symlink.c:67:3: warning: 'strncpy' output truncated before terminating nul copying as many bytes from a string as its length fs/configfs/symlink.c: In function 'configfs_get_link': fs/configfs/symlink.c:63:13: note: length computed here Using strncpy() is indeed less than perfect since the length of data to be copied has already been determined with strlen(). Replace strncpy() with memcpy() to address the warning and optimize the code a little. Signed-off-by: Guenter Roeck Signed-off-by: Christoph Hellwig Signed-off-by: Nobuhiro Iwamatsu Signed-off-by: Greg Kroah-Hartman --- fs/configfs/symlink.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/configfs/symlink.c b/fs/configfs/symlink.c index 0525ebc3aea2..66e8c5d58b21 100644 --- a/fs/configfs/symlink.c +++ b/fs/configfs/symlink.c @@ -64,7 +64,7 @@ static void fill_item_path(struct config_item * item, char * buffer, int length) /* back up enough to print this bus id with '/' */ length -= cur; - strncpy(buffer + length,config_item_name(p),cur); + memcpy(buffer + length, config_item_name(p), cur); *(buffer + --length) = '/'; } } From 575361a2cbe0bfe1ef9bf43a8c017074f913c4e3 Mon Sep 17 00:00:00 2001 From: Mike Kravetz Date: Fri, 16 Nov 2018 15:08:04 -0800 Subject: [PATCH 520/812] hugetlbfs: fix kernel BUG at fs/hugetlbfs/inode.c:444! commit 5e41540c8a0f0e98c337dda8b391e5dda0cde7cf upstream. This bug has been experienced several times by the Oracle DB team. The BUG is in remove_inode_hugepages() as follows: /* * If page is mapped, it was faulted in after being * unmapped in caller. Unmap (again) now after taking * the fault mutex. The mutex will prevent faults * until we finish removing the page. * * This race can only happen in the hole punch case. * Getting here in a truncate operation is a bug. */ if (unlikely(page_mapped(page))) { BUG_ON(truncate_op); In this case, the elevated map count is not the result of a race. Rather it was incorrectly incremented as the result of a bug in the huge pmd sharing code. Consider the following: - Process A maps a hugetlbfs file of sufficient size and alignment (PUD_SIZE) that a pmd page could be shared. - Process B maps the same hugetlbfs file with the same size and alignment such that a pmd page is shared. - Process B then calls mprotect() to change protections for the mapping with the shared pmd. As a result, the pmd is 'unshared'. - Process B then calls mprotect() again to chage protections for the mapping back to their original value. pmd remains unshared. - Process B then forks and process C is created. During the fork process, we do dup_mm -> dup_mmap -> copy_page_range to copy page tables. Copying page tables for hugetlb mappings is done in the routine copy_hugetlb_page_range. In copy_hugetlb_page_range(), the destination pte is obtained by: dst_pte = huge_pte_alloc(dst, addr, sz); If pmd sharing is possible, the returned pointer will be to a pte in an existing page table. In the situation above, process C could share with either process A or process B. Since process A is first in the list, the returned pte is a pointer to a pte in process A's page table. However, the check for pmd sharing in copy_hugetlb_page_range is: /* If the pagetables are shared don't copy or take references */ if (dst_pte == src_pte) continue; Since process C is sharing with process A instead of process B, the above test fails. The code in copy_hugetlb_page_range which follows assumes dst_pte points to a huge_pte_none pte. It copies the pte entry from src_pte to dst_pte and increments this map count of the associated page. This is how we end up with an elevated map count. To solve, check the dst_pte entry for huge_pte_none. If !none, this implies PMD sharing so do not copy. Link: http://lkml.kernel.org/r/20181105212315.14125-1-mike.kravetz@oracle.com Fixes: c5c99429fa57 ("fix hugepages leak due to pagetable page sharing") Signed-off-by: Mike Kravetz Reviewed-by: Naoya Horiguchi Cc: Michal Hocko Cc: Hugh Dickins Cc: Andrea Arcangeli Cc: "Kirill A . Shutemov" Cc: Davidlohr Bueso Cc: Prakash Sangappa Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- mm/hugetlb.c | 23 +++++++++++++++++++---- 1 file changed, 19 insertions(+), 4 deletions(-) diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 1033ff44ac62..b09988d8e498 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -3103,7 +3103,7 @@ static int is_hugetlb_entry_hwpoisoned(pte_t pte) int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src, struct vm_area_struct *vma) { - pte_t *src_pte, *dst_pte, entry; + pte_t *src_pte, *dst_pte, entry, dst_entry; struct page *ptepage; unsigned long addr; int cow; @@ -3131,15 +3131,30 @@ int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src, break; } - /* If the pagetables are shared don't copy or take references */ - if (dst_pte == src_pte) + /* + * If the pagetables are shared don't copy or take references. + * dst_pte == src_pte is the common case of src/dest sharing. + * + * However, src could have 'unshared' and dst shares with + * another vma. If dst_pte !none, this implies sharing. + * Check here before taking page table lock, and once again + * after taking the lock below. + */ + dst_entry = huge_ptep_get(dst_pte); + if ((dst_pte == src_pte) || !huge_pte_none(dst_entry)) continue; dst_ptl = huge_pte_lock(h, dst, dst_pte); src_ptl = huge_pte_lockptr(h, src, src_pte); spin_lock_nested(src_ptl, SINGLE_DEPTH_NESTING); entry = huge_ptep_get(src_pte); - if (huge_pte_none(entry)) { /* skip none entry */ + dst_entry = huge_ptep_get(dst_pte); + if (huge_pte_none(entry) || !huge_pte_none(dst_entry)) { + /* + * Skip if src entry none. Also, skip in the + * unlikely case dst entry !none as this implies + * sharing with another vma. + */ ; } else if (unlikely(is_hugetlb_entry_migration(entry) || is_hugetlb_entry_hwpoisoned(entry))) { From b026c7ee565994721805503b570cf5b7feed608b Mon Sep 17 00:00:00 2001 From: Mike Kravetz Date: Fri, 5 Oct 2018 15:51:29 -0700 Subject: [PATCH 521/812] mm: migration: fix migration of huge PMD shared pages MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 017b1660df89f5fb4bfe66c34e35f7d2031100c7 upstream. The page migration code employs try_to_unmap() to try and unmap the source page. This is accomplished by using rmap_walk to find all vmas where the page is mapped. This search stops when page mapcount is zero. For shared PMD huge pages, the page map count is always 1 no matter the number of mappings. Shared mappings are tracked via the reference count of the PMD page. Therefore, try_to_unmap stops prematurely and does not completely unmap all mappings of the source page. This problem can result is data corruption as writes to the original source page can happen after contents of the page are copied to the target page. Hence, data is lost. This problem was originally seen as DB corruption of shared global areas after a huge page was soft offlined due to ECC memory errors. DB developers noticed they could reproduce the issue by (hotplug) offlining memory used to back huge pages. A simple testcase can reproduce the problem by creating a shared PMD mapping (note that this must be at least PUD_SIZE in size and PUD_SIZE aligned (1GB on x86)), and using migrate_pages() to migrate process pages between nodes while continually writing to the huge pages being migrated. To fix, have the try_to_unmap_one routine check for huge PMD sharing by calling huge_pmd_unshare for hugetlbfs huge pages. If it is a shared mapping it will be 'unshared' which removes the page table entry and drops the reference on the PMD page. After this, flush caches and TLB. mmu notifiers are called before locking page tables, but we can not be sure of PMD sharing until page tables are locked. Therefore, check for the possibility of PMD sharing before locking so that notifiers can prepare for the worst possible case. Link: http://lkml.kernel.org/r/20180823205917.16297-2-mike.kravetz@oracle.com [mike.kravetz@oracle.com: make _range_in_vma() a static inline] Link: http://lkml.kernel.org/r/6063f215-a5c8-2f0c-465a-2c515ddc952d@oracle.com Fixes: 39dde65c9940 ("shared page table for hugetlb page") Signed-off-by: Mike Kravetz Acked-by: Kirill A. Shutemov Reviewed-by: Naoya Horiguchi Acked-by: Michal Hocko Cc: Vlastimil Babka Cc: Davidlohr Bueso Cc: Jerome Glisse Cc: Mike Kravetz Cc: Signed-off-by: Andrew Morton Signed-off-by: Mike Kravetz Acked-by: Michal Hocko Reviewed-by: Jérôme Glisse Signed-off-by: Greg Kroah-Hartman --- include/linux/hugetlb.h | 14 +++++++++++ include/linux/mm.h | 6 +++++ mm/hugetlb.c | 37 +++++++++++++++++++++++++-- mm/rmap.c | 56 +++++++++++++++++++++++++++++++++++++++++ 4 files changed, 111 insertions(+), 2 deletions(-) diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index 685c262e0be8..3957d99e66ea 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -110,6 +110,8 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr, unsigned long sz); pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr); int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep); +void adjust_range_if_pmd_sharing_possible(struct vm_area_struct *vma, + unsigned long *start, unsigned long *end); struct page *follow_huge_addr(struct mm_struct *mm, unsigned long address, int write); struct page *follow_huge_pmd(struct mm_struct *mm, unsigned long address, @@ -132,6 +134,18 @@ static inline unsigned long hugetlb_total_pages(void) return 0; } +static inline int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, + pte_t *ptep) +{ + return 0; +} + +static inline void adjust_range_if_pmd_sharing_possible( + struct vm_area_struct *vma, + unsigned long *start, unsigned long *end) +{ +} + #define follow_hugetlb_page(m,v,p,vs,a,b,i,w) ({ BUG(); 0; }) #define follow_huge_addr(mm, addr, write) ERR_PTR(-EINVAL) #define copy_hugetlb_page_range(src, dst, vma) ({ BUG(); 0; }) diff --git a/include/linux/mm.h b/include/linux/mm.h index 1f4366567e7d..d4e8077fca96 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -2058,6 +2058,12 @@ static inline struct vm_area_struct *find_exact_vma(struct mm_struct *mm, return vma; } +static inline bool range_in_vma(struct vm_area_struct *vma, + unsigned long start, unsigned long end) +{ + return (vma && vma->vm_start <= start && end <= vma->vm_end); +} + #ifdef CONFIG_MMU pgprot_t vm_get_page_prot(unsigned long vm_flags); void vma_set_page_prot(struct vm_area_struct *vma); diff --git a/mm/hugetlb.c b/mm/hugetlb.c index b09988d8e498..6f99a0f906bb 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -4216,12 +4216,40 @@ static bool vma_shareable(struct vm_area_struct *vma, unsigned long addr) /* * check on proper vm_flags and page table alignment */ - if (vma->vm_flags & VM_MAYSHARE && - vma->vm_start <= base && end <= vma->vm_end) + if (vma->vm_flags & VM_MAYSHARE && range_in_vma(vma, base, end)) return true; return false; } +/* + * Determine if start,end range within vma could be mapped by shared pmd. + * If yes, adjust start and end to cover range associated with possible + * shared pmd mappings. + */ +void adjust_range_if_pmd_sharing_possible(struct vm_area_struct *vma, + unsigned long *start, unsigned long *end) +{ + unsigned long check_addr = *start; + + if (!(vma->vm_flags & VM_MAYSHARE)) + return; + + for (check_addr = *start; check_addr < *end; check_addr += PUD_SIZE) { + unsigned long a_start = check_addr & PUD_MASK; + unsigned long a_end = a_start + PUD_SIZE; + + /* + * If sharing is possible, adjust start/end if necessary. + */ + if (range_in_vma(vma, a_start, a_end)) { + if (a_start < *start) + *start = a_start; + if (a_end > *end) + *end = a_end; + } + } +} + /* * Search for a shareable pmd page for hugetlb. In any case calls pmd_alloc() * and returns the corresponding pte. While this is not necessary for the @@ -4318,6 +4346,11 @@ int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep) { return 0; } + +void adjust_range_if_pmd_sharing_possible(struct vm_area_struct *vma, + unsigned long *start, unsigned long *end) +{ +} #define want_pmd_share() (0) #endif /* CONFIG_ARCH_WANT_HUGE_PMD_SHARE */ diff --git a/mm/rmap.c b/mm/rmap.c index 1bceb49aa214..488dda209431 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -1324,12 +1324,41 @@ static int try_to_unmap_one(struct page *page, struct vm_area_struct *vma, pte_t pteval; spinlock_t *ptl; int ret = SWAP_AGAIN; + unsigned long sh_address; + bool pmd_sharing_possible = false; + unsigned long spmd_start, spmd_end; enum ttu_flags flags = (enum ttu_flags)arg; /* munlock has nothing to gain from examining un-locked vmas */ if ((flags & TTU_MUNLOCK) && !(vma->vm_flags & VM_LOCKED)) goto out; + /* + * Only use the range_start/end mmu notifiers if huge pmd sharing + * is possible. In the normal case, mmu_notifier_invalidate_page + * is sufficient as we only unmap a page. However, if we unshare + * a pmd, we will unmap a PUD_SIZE range. + */ + if (PageHuge(page)) { + spmd_start = address; + spmd_end = spmd_start + vma_mmu_pagesize(vma); + + /* + * Check if pmd sharing is possible. If possible, we could + * unmap a PUD_SIZE range. spmd_start/spmd_end will be + * modified if sharing is possible. + */ + adjust_range_if_pmd_sharing_possible(vma, &spmd_start, + &spmd_end); + if (spmd_end - spmd_start != vma_mmu_pagesize(vma)) { + sh_address = address; + + pmd_sharing_possible = true; + mmu_notifier_invalidate_range_start(vma->vm_mm, + spmd_start, spmd_end); + } + } + pte = page_check_address(page, mm, address, &ptl, 0); if (!pte) goto out; @@ -1356,6 +1385,30 @@ static int try_to_unmap_one(struct page *page, struct vm_area_struct *vma, } } + /* + * Call huge_pmd_unshare to potentially unshare a huge pmd. Pass + * sh_address as it will be modified if unsharing is successful. + */ + if (PageHuge(page) && huge_pmd_unshare(mm, &sh_address, pte)) { + /* + * huge_pmd_unshare unmapped an entire PMD page. There is + * no way of knowing exactly which PMDs may be cached for + * this mm, so flush them all. spmd_start/spmd_end cover + * this PUD_SIZE range. + */ + flush_cache_range(vma, spmd_start, spmd_end); + flush_tlb_range(vma, spmd_start, spmd_end); + + /* + * The ref count of the PMD page was dropped which is part + * of the way map counting is done for shared PMDs. When + * there is no other sharing, huge_pmd_unshare returns false + * and we will unmap the actual page and drop map count + * to zero. + */ + goto out_unmap; + } + /* Nuke the page table entry. */ flush_cache_page(vma, address, page_to_pfn(page)); if (should_defer_flush(mm, flags)) { @@ -1450,6 +1503,9 @@ static int try_to_unmap_one(struct page *page, struct vm_area_struct *vma, if (ret != SWAP_FAIL && ret != SWAP_MLOCK && !(flags & TTU_MUNLOCK)) mmu_notifier_invalidate_page(mm, address); out: + if (pmd_sharing_possible) + mmu_notifier_invalidate_range_end(vma->vm_mm, + spmd_start, spmd_end); return ret; } From 67dbeda8f4bb1c728683869ee7c35d50f73e76a3 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Sun, 5 Aug 2018 13:48:07 +0100 Subject: [PATCH 522/812] drm/rockchip: Allow driver to be shutdown on reboot/kexec commit 7f3ef5dedb146e3d5063b6845781ad1bb59b92b5 upstream. Leaving the DRM driver enabled on reboot or kexec has the annoying effect of leaving the display generating transactions whilst the IOMMU has been shut down. In turn, the IOMMU driver (which shares its interrupt line with the VOP) starts warning either on shutdown or when entering the secondary kernel in the kexec case (nothing is expected on that front). A cheap way of ensuring that things are nicely shut down is to register a shutdown callback in the platform driver. Signed-off-by: Marc Zyngier Tested-by: Vicente Bergas Signed-off-by: Heiko Stuebner Link: https://patchwork.freedesktop.org/patch/msgid/20180805124807.18169-1-marc.zyngier@arm.com Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/rockchip/rockchip_drm_drv.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/gpu/drm/rockchip/rockchip_drm_drv.c b/drivers/gpu/drm/rockchip/rockchip_drm_drv.c index f22e1e1ee64a..d1f3be78c649 100644 --- a/drivers/gpu/drm/rockchip/rockchip_drm_drv.c +++ b/drivers/gpu/drm/rockchip/rockchip_drm_drv.c @@ -547,6 +547,11 @@ static int rockchip_drm_platform_remove(struct platform_device *pdev) return 0; } +static void rockchip_drm_platform_shutdown(struct platform_device *pdev) +{ + rockchip_drm_platform_remove(pdev); +} + static const struct of_device_id rockchip_drm_dt_ids[] = { { .compatible = "rockchip,display-subsystem", }, { /* sentinel */ }, @@ -556,6 +561,7 @@ MODULE_DEVICE_TABLE(of, rockchip_drm_dt_ids); static struct platform_driver rockchip_drm_platform_driver = { .probe = rockchip_drm_platform_probe, .remove = rockchip_drm_platform_remove, + .shutdown = rockchip_drm_platform_shutdown, .driver = { .name = "rockchip-drm", .of_match_table = rockchip_drm_dt_ids, From 8ef21c4005bdeb3c7932837abe571d30a1d36ec8 Mon Sep 17 00:00:00 2001 From: Stanislav Lisovskiy Date: Fri, 9 Nov 2018 11:00:12 +0200 Subject: [PATCH 523/812] drm/dp_mst: Check if primary mstb is null commit 23d8003907d094f77cf959228e2248d6db819fa7 upstream. Unfortunately drm_dp_get_mst_branch_device which is called from both drm_dp_mst_handle_down_rep and drm_dp_mst_handle_up_rep seem to rely on that mgr->mst_primary is not NULL, which seem to be wrong as it can be cleared with simultaneous mode set, if probing fails or in other case. mgr->lock mutex doesn't protect against that as it might just get assigned to NULL right before, not simultaneously. There are currently bugs 107738, 108616 bugs which crash in drm_dp_get_mst_branch_device, caused by this issue. v2: Refactored the code, as it was nicely noticed. Fixed Bugzilla bug numbers(second was 108616, but not 108816) and added links. [changed title and added stable cc] Signed-off-by: Lyude Paul Signed-off-by: Stanislav Lisovskiy Cc: stable@vger.kernel.org Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=108616 Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=107738 Link: https://patchwork.freedesktop.org/patch/msgid/20181109090012.24438-1-stanislav.lisovskiy@intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/drm_dp_mst_topology.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/drm_dp_mst_topology.c b/drivers/gpu/drm/drm_dp_mst_topology.c index 273e05a3c933..5a1bafb5ecbb 100644 --- a/drivers/gpu/drm/drm_dp_mst_topology.c +++ b/drivers/gpu/drm/drm_dp_mst_topology.c @@ -1225,6 +1225,9 @@ static struct drm_dp_mst_branch *drm_dp_get_mst_branch_device(struct drm_dp_mst_ mutex_lock(&mgr->lock); mstb = mgr->mst_primary; + if (!mstb) + goto out; + for (i = 0; i < lct - 1; i++) { int shift = (i % 2) ? 0 : 4; int port_num = (rad[i / 2] >> shift) & 0xf; From 29a231817b4fc364931b43a35d29d6945dd8f317 Mon Sep 17 00:00:00 2001 From: Clint Taylor Date: Thu, 25 Oct 2018 11:52:00 -0700 Subject: [PATCH 524/812] drm/i915/hdmi: Add HDMI 2.0 audio clock recovery N values commit 6503493145cba4413ecd3d4d153faeef4a1e9b85 upstream. HDMI 2.0 594Mhz modes were incorrectly selecting 25.200Mhz Automatic N value mode instead of HDMI specification values. V2: Fix 88.2 Hz N value Cc: Jani Nikula Cc: stable@vger.kernel.org Signed-off-by: Clint Taylor Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/1540493521-1746-2-git-send-email-clinton.a.taylor@intel.com (cherry picked from commit 5a400aa3c562c4a726b4da286e63c96db905ade1) Signed-off-by: Joonas Lahtinen Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/i915/intel_audio.c | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/drivers/gpu/drm/i915/intel_audio.c b/drivers/gpu/drm/i915/intel_audio.c index 4dccd9b003a1..0d738d7870fd 100644 --- a/drivers/gpu/drm/i915/intel_audio.c +++ b/drivers/gpu/drm/i915/intel_audio.c @@ -76,6 +76,9 @@ static const struct { /* HDMI N/CTS table */ #define TMDS_297M 297000 #define TMDS_296M 296703 +#define TMDS_594M 594000 +#define TMDS_593M 593407 + static const struct { int sample_rate; int clock; @@ -96,6 +99,20 @@ static const struct { { 176400, TMDS_297M, 18816, 247500 }, { 192000, TMDS_296M, 23296, 281250 }, { 192000, TMDS_297M, 20480, 247500 }, + { 44100, TMDS_593M, 8918, 937500 }, + { 44100, TMDS_594M, 9408, 990000 }, + { 48000, TMDS_593M, 5824, 562500 }, + { 48000, TMDS_594M, 6144, 594000 }, + { 32000, TMDS_593M, 5824, 843750 }, + { 32000, TMDS_594M, 3072, 445500 }, + { 88200, TMDS_593M, 17836, 937500 }, + { 88200, TMDS_594M, 18816, 990000 }, + { 96000, TMDS_593M, 11648, 562500 }, + { 96000, TMDS_594M, 12288, 594000 }, + { 176400, TMDS_593M, 35672, 937500 }, + { 176400, TMDS_594M, 37632, 990000 }, + { 192000, TMDS_593M, 23296, 562500 }, + { 192000, TMDS_594M, 24576, 594000 }, }; /* get AUD_CONFIG_PIXEL_CLOCK_HDMI_* value for mode */ From 8e592f4a26addb5d2008548787fbe3c9cc960fd5 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 21 Nov 2018 09:27:45 +0100 Subject: [PATCH 525/812] Linux 4.4.164 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 4e3179768eea..9382e7e4e750 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,6 @@ VERSION = 4 PATCHLEVEL = 4 -SUBLEVEL = 163 +SUBLEVEL = 164 EXTRAVERSION = NAME = Blurry Fish Butt From 8c1741548937903b02e92b6fe2d14d90acedf14d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=EB=B0=B0=EC=84=9D=EC=A7=84?= Date: Fri, 9 Nov 2018 16:53:06 -0800 Subject: [PATCH 526/812] flow_dissector: do not dissect l4 ports for fragments MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 62230715fd2453b3ba948c9d83cfb3ada9169169 ] Only first fragment has the sport/dport information, not the following ones. If we want consistent hash for all fragments, we need to ignore ports even for first fragment. This bug is visible for IPv6 traffic, if incoming fragments do not have a flow label, since skb_get_hash() will give different results for first fragment and following ones. It is also visible if any routing rule wants dissection and sport or dport. See commit 5e5d6fed3741 ("ipv6: route: dissect flow in input path if fib rules need it") for details. [edumazet] rewrote the changelog completely. Fixes: 06635a35d13d ("flow_dissect: use programable dissector in skb_flow_dissect and friends") Signed-off-by: 배석진 Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/core/flow_dissector.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c index 4d14908afaec..697c4212129a 100644 --- a/net/core/flow_dissector.c +++ b/net/core/flow_dissector.c @@ -480,8 +480,8 @@ bool __skb_flow_dissect(const struct sk_buff *skb, break; } - if (dissector_uses_key(flow_dissector, - FLOW_DISSECTOR_KEY_PORTS)) { + if (dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_PORTS) && + !(key_control->flags & FLOW_DIS_IS_FRAGMENT)) { key_ports = skb_flow_dissector_target(flow_dissector, FLOW_DISSECTOR_KEY_PORTS, target_container); From 7876c2d6ced154eb7eff3587d75f02eda3a8f45f Mon Sep 17 00:00:00 2001 From: Sabrina Dubroca Date: Fri, 16 Nov 2018 16:58:19 +0100 Subject: [PATCH 527/812] ip_tunnel: don't force DF when MTU is locked [ Upstream commit 16f7eb2b77b55da816c4e207f3f9440a8cafc00a ] The various types of tunnels running over IPv4 can ask to set the DF bit to do PMTU discovery. However, PMTU discovery is subject to the threshold set by the net.ipv4.route.min_pmtu sysctl, and is also disabled on routes with "mtu lock". In those cases, we shouldn't set the DF bit. This patch makes setting the DF bit conditional on the route's MTU locking state. This issue seems to be older than git history. Signed-off-by: Sabrina Dubroca Reviewed-by: Stefano Brivio Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/ip_tunnel_core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv4/ip_tunnel_core.c b/net/ipv4/ip_tunnel_core.c index dbda0565781c..4916d1857b75 100644 --- a/net/ipv4/ip_tunnel_core.c +++ b/net/ipv4/ip_tunnel_core.c @@ -71,7 +71,7 @@ int iptunnel_xmit(struct sock *sk, struct rtable *rt, struct sk_buff *skb, iph->version = 4; iph->ihl = sizeof(struct iphdr) >> 2; - iph->frag_off = df; + iph->frag_off = ip_mtu_locked(&rt->dst) ? 0 : df; iph->protocol = proto; iph->tos = tos; iph->daddr = dst; From 4dc0c62a488f341fa61458eb415db34bf4723657 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sat, 17 Nov 2018 21:57:02 -0800 Subject: [PATCH 528/812] net-gro: reset skb->pkt_type in napi_reuse_skb() [ Upstream commit 33d9a2c72f086cbf1087b2fd2d1a15aa9df14a7f ] eth_type_trans() assumes initial value for skb->pkt_type is PACKET_HOST. This is indeed the value right after a fresh skb allocation. However, it is possible that GRO merged a packet with a different value (like PACKET_OTHERHOST in case macvlan is used), so we need to make sure napi->skb will have pkt_type set back to PACKET_HOST. Otherwise, valid packets might be dropped by the stack because their pkt_type is not PACKET_HOST. napi_reuse_skb() was added in commit 96e93eab2033 ("gro: Add internal interfaces for VLAN"), but this bug always has been there. Fixes: 96e93eab2033 ("gro: Add internal interfaces for VLAN") Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/core/dev.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/net/core/dev.c b/net/core/dev.c index 7366feb8b5b3..e03c1d2f6707 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -4481,6 +4481,10 @@ static void napi_reuse_skb(struct napi_struct *napi, struct sk_buff *skb) skb->vlan_tci = 0; skb->dev = napi->dev; skb->skb_iif = 0; + + /* eth_type_trans() assumes pkt_type is PACKET_HOST */ + skb->pkt_type = PACKET_HOST; + skb->encapsulation = 0; skb_shinfo(skb)->gso_type = 0; skb->truesize = SKB_TRUESIZE(skb_end_offset(skb)); From 51d258e4c75bbaf1a190564ea70997a8a33570f0 Mon Sep 17 00:00:00 2001 From: Siva Reddy Kallam Date: Tue, 20 Nov 2018 10:04:04 +0530 Subject: [PATCH 529/812] tg3: Add PHY reset for 5717/5719/5720 in change ring and flow control paths [ Upstream commit 59663e42199c93d1d7314d1446f6782fc4b1eb81 ] This patch has the fix to avoid PHY lockup with 5717/5719/5720 in change ring and flow control paths. This patch solves the RX hang while doing continuous ring or flow control parameters with heavy traffic from peer. Signed-off-by: Siva Reddy Kallam Acked-by: Michael Chan Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/broadcom/tg3.c | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/broadcom/tg3.c b/drivers/net/ethernet/broadcom/tg3.c index ce3a56bea6e6..58102e96ac5c 100644 --- a/drivers/net/ethernet/broadcom/tg3.c +++ b/drivers/net/ethernet/broadcom/tg3.c @@ -12379,6 +12379,7 @@ static int tg3_set_ringparam(struct net_device *dev, struct ethtool_ringparam *e { struct tg3 *tp = netdev_priv(dev); int i, irq_sync = 0, err = 0; + bool reset_phy = false; if ((ering->rx_pending > tp->rx_std_ring_mask) || (ering->rx_jumbo_pending > tp->rx_jmb_ring_mask) || @@ -12410,7 +12411,13 @@ static int tg3_set_ringparam(struct net_device *dev, struct ethtool_ringparam *e if (netif_running(dev)) { tg3_halt(tp, RESET_KIND_SHUTDOWN, 1); - err = tg3_restart_hw(tp, false); + /* Reset PHY to avoid PHY lock up */ + if (tg3_asic_rev(tp) == ASIC_REV_5717 || + tg3_asic_rev(tp) == ASIC_REV_5719 || + tg3_asic_rev(tp) == ASIC_REV_5720) + reset_phy = true; + + err = tg3_restart_hw(tp, reset_phy); if (!err) tg3_netif_start(tp); } @@ -12444,6 +12451,7 @@ static int tg3_set_pauseparam(struct net_device *dev, struct ethtool_pauseparam { struct tg3 *tp = netdev_priv(dev); int err = 0; + bool reset_phy = false; if (tp->link_config.autoneg == AUTONEG_ENABLE) tg3_warn_mgmt_link_flap(tp); @@ -12534,7 +12542,13 @@ static int tg3_set_pauseparam(struct net_device *dev, struct ethtool_pauseparam if (netif_running(dev)) { tg3_halt(tp, RESET_KIND_SHUTDOWN, 1); - err = tg3_restart_hw(tp, false); + /* Reset PHY to avoid PHY lock up */ + if (tg3_asic_rev(tp) == ASIC_REV_5717 || + tg3_asic_rev(tp) == ASIC_REV_5719 || + tg3_asic_rev(tp) == ASIC_REV_5720) + reset_phy = true; + + err = tg3_restart_hw(tp, reset_phy); if (!err) tg3_netif_start(tp); } From c37215a94f5ee89ec10575ea28ec20f1e89f3015 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Sun, 18 Nov 2018 10:45:30 -0800 Subject: [PATCH 530/812] ipv6: Fix PMTU updates for UDP/raw sockets in presence of VRF [ Upstream commit 7ddacfa564870cdd97275fd87decb6174abc6380 ] Preethi reported that PMTU discovery for UDP/raw applications is not working in the presence of VRF when the socket is not bound to a device. The problem is that ip6_sk_update_pmtu does not consider the L3 domain of the skb device if the socket is not bound. Update the function to set oif to the L3 master device if relevant. Fixes: ca254490c8df ("net: Add VRF support to IPv6 stack") Reported-by: Preethi Ramachandra Signed-off-by: David Ahern Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv6/route.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 2f6d8f57fdd4..1cb8954885ec 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -1420,8 +1420,12 @@ EXPORT_SYMBOL_GPL(ip6_update_pmtu); void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu) { - ip6_update_pmtu(skb, sock_net(sk), mtu, - sk->sk_bound_dev_if, sk->sk_mark); + int oif = sk->sk_bound_dev_if; + + if (!oif && skb->dev) + oif = l3mdev_master_ifindex(skb->dev); + + ip6_update_pmtu(skb, sock_net(sk), mtu, oif, sk->sk_mark); } EXPORT_SYMBOL_GPL(ip6_sk_update_pmtu); From cbb5fc70e499b794741f431443874be6977ac539 Mon Sep 17 00:00:00 2001 From: Behan Webster Date: Fri, 21 Apr 2017 11:20:01 -0700 Subject: [PATCH 531/812] kbuild: Add better clang cross build support MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 785f11aa595bc3d4e74096cbd598ada54ecc0d81 upstream. Add cross target to CC if using clang. Also add custom gcc toolchain path for fallback gcc tools. Clang will fallback to using things like ld, as, and libgcc if (respectively) one of the llvm linkers isn't available, the integrated assembler is turned off, or an appropriately cross-compiled version of compiler-rt isn't available. To this end, you can specify the path to this fallback gcc toolchain with GCC_TOOLCHAIN. Signed-off-by: Behan Webster Reviewed-by: Jan-Simon Möller Reviewed-by: Mark Charlebois Signed-off-by: Greg Hackmann Signed-off-by: Matthias Kaehlcke Signed-off-by: Masahiro Yamada Signed-off-by: Nathan Chancellor Signed-off-by: Greg Kroah-Hartman --- Makefile | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/Makefile b/Makefile index 9382e7e4e750..7fedb079d7a7 100644 --- a/Makefile +++ b/Makefile @@ -698,6 +698,15 @@ endif KBUILD_CFLAGS += $(stackp-flag) ifeq ($(cc-name),clang) +ifneq ($(CROSS_COMPILE),) +CLANG_TARGET := -target $(notdir $(CROSS_COMPILE:%-=%)) +GCC_TOOLCHAIN := $(realpath $(dir $(shell which $(LD)))/..) +endif +ifneq ($(GCC_TOOLCHAIN),) +CLANG_GCC_TC := -gcc-toolchain $(GCC_TOOLCHAIN) +endif +KBUILD_CFLAGS += $(CLANG_TARGET) $(CLANG_GCC_TC) +KBUILD_AFLAGS += $(CLANG_TARGET) $(CLANG_GCC_TC) KBUILD_CPPFLAGS += $(call cc-option,-Qunused-arguments,) KBUILD_CPPFLAGS += $(call cc-option,-Wno-unknown-warning-option,) KBUILD_CFLAGS += $(call cc-disable-warning, unused-variable) From 3ebd7941d0bdc85ebfc6eaa58fbf53e0f26e00e3 Mon Sep 17 00:00:00 2001 From: Michael Davidson Date: Tue, 25 Apr 2017 15:47:35 -0700 Subject: [PATCH 532/812] kbuild: clang: add -no-integrated-as to KBUILD_[AC]FLAGS commit a37c45cd82e62a361706b9688a984a3a63957321 upstream. The Linux Kernel relies on GCC's acceptance of inline assembly as an opaque object which will not have any validation performed on the content. The current behaviour in LLVM is to perform validation of the contents by means of parsing the input if the MC layer can handle it. Disable clangs integrated assembler and use the GNU assembler instead. Wording-mostly-from: Saleem Abdulrasool Signed-off-by: Michael Davidson Signed-off-by: Matthias Kaehlcke Signed-off-by: Masahiro Yamada Signed-off-by: Nathan Chancellor Signed-off-by: Greg Kroah-Hartman --- Makefile | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Makefile b/Makefile index 7fedb079d7a7..0c29f97946b4 100644 --- a/Makefile +++ b/Makefile @@ -719,6 +719,8 @@ KBUILD_CFLAGS += $(call cc-disable-warning, tautological-compare) # See modpost pattern 2 KBUILD_CFLAGS += $(call cc-option, -mno-global-merge,) KBUILD_CFLAGS += $(call cc-option, -fcatch-undefined-behavior) +KBUILD_CFLAGS += $(call cc-option, -no-integrated-as) +KBUILD_AFLAGS += $(call cc-option, -no-integrated-as) else # These warnings generated too much noise in a regular build. From c7288b457145b8e9a3ffbee1c0fb78afeb9d3693 Mon Sep 17 00:00:00 2001 From: Matthias Kaehlcke Date: Wed, 12 Apr 2017 12:43:52 -0700 Subject: [PATCH 533/812] kbuild: Consolidate header generation from ASM offset information commit ebf003f0cfb3705e60d40dedc3ec949176c741af upstream. Largely redundant code is used in different places to generate C headers from offset information extracted from assembly language output. Consolidate the code in Makefile.lib and use this instead. Signed-off-by: Matthias Kaehlcke Signed-off-by: Masahiro Yamada Signed-off-by: Nathan Chancellor Signed-off-by: Greg Kroah-Hartman --- Kbuild | 25 ------------------------- arch/ia64/kernel/Makefile | 26 ++------------------------ scripts/Makefile.lib | 28 ++++++++++++++++++++++++++++ scripts/mod/Makefile | 28 ++-------------------------- 4 files changed, 32 insertions(+), 75 deletions(-) diff --git a/Kbuild b/Kbuild index f55cefd9bf29..f56ed561a284 100644 --- a/Kbuild +++ b/Kbuild @@ -6,31 +6,6 @@ # 3) Generate asm-offsets.h (may need bounds.h and timeconst.h) # 4) Check for missing system calls -# Default sed regexp - multiline due to syntax constraints -define sed-y - "/^->/{s:->#\(.*\):/* \1 */:; \ - s:^->\([^ ]*\) [\$$#]*\([-0-9]*\) \(.*\):#define \1 \2 /* \3 */:; \ - s:^->\([^ ]*\) [\$$#]*\([^ ]*\) \(.*\):#define \1 \2 /* \3 */:; \ - s:->::; p;}" -endef - -# Use filechk to avoid rebuilds when a header changes, but the resulting file -# does not -define filechk_offsets - (set -e; \ - echo "#ifndef $2"; \ - echo "#define $2"; \ - echo "/*"; \ - echo " * DO NOT MODIFY."; \ - echo " *"; \ - echo " * This file was generated by Kbuild"; \ - echo " */"; \ - echo ""; \ - sed -ne $(sed-y); \ - echo ""; \ - echo "#endif" ) -endef - ##### # 1) Generate bounds.h diff --git a/arch/ia64/kernel/Makefile b/arch/ia64/kernel/Makefile index 3686d6abafde..9edda5466020 100644 --- a/arch/ia64/kernel/Makefile +++ b/arch/ia64/kernel/Makefile @@ -50,32 +50,10 @@ CFLAGS_traps.o += -mfixed-range=f2-f5,f16-f31 # The gate DSO image is built using a special linker script. include $(src)/Makefile.gate -# Calculate NR_IRQ = max(IA64_NATIVE_NR_IRQS, XEN_NR_IRQS, ...) based on config -define sed-y - "/^->/{s:^->\([^ ]*\) [\$$#]*\([^ ]*\) \(.*\):#define \1 \2 /* \3 */:; s:->::; p;}" -endef -quiet_cmd_nr_irqs = GEN $@ -define cmd_nr_irqs - (set -e; \ - echo "#ifndef __ASM_NR_IRQS_H__"; \ - echo "#define __ASM_NR_IRQS_H__"; \ - echo "/*"; \ - echo " * DO NOT MODIFY."; \ - echo " *"; \ - echo " * This file was generated by Kbuild"; \ - echo " *"; \ - echo " */"; \ - echo ""; \ - sed -ne $(sed-y) $<; \ - echo ""; \ - echo "#endif" ) > $@ -endef - # We use internal kbuild rules to avoid the "is up to date" message from make arch/$(SRCARCH)/kernel/nr-irqs.s: arch/$(SRCARCH)/kernel/nr-irqs.c $(Q)mkdir -p $(dir $@) $(call if_changed_dep,cc_s_c) -include/generated/nr-irqs.h: arch/$(SRCARCH)/kernel/nr-irqs.s - $(Q)mkdir -p $(dir $@) - $(call cmd,nr_irqs) +include/generated/nr-irqs.h: arch/$(SRCARCH)/kernel/nr-irqs.s FORCE + $(call filechk,offsets,__ASM_NR_IRQS_H__) diff --git a/scripts/Makefile.lib b/scripts/Makefile.lib index a2d0e6d32659..18a1c8381ce0 100644 --- a/scripts/Makefile.lib +++ b/scripts/Makefile.lib @@ -388,3 +388,31 @@ quiet_cmd_xzmisc = XZMISC $@ cmd_xzmisc = (cat $(filter-out FORCE,$^) | \ xz --check=crc32 --lzma2=dict=1MiB) > $@ || \ (rm -f $@ ; false) + +# ASM offsets +# --------------------------------------------------------------------------- + +# Default sed regexp - multiline due to syntax constraints +define sed-offsets + "/^->/{s:->#\(.*\):/* \1 */:; \ + s:^->\([^ ]*\) [\$$#]*\([-0-9]*\) \(.*\):#define \1 \2 /* \3 */:; \ + s:^->\([^ ]*\) [\$$#]*\([^ ]*\) \(.*\):#define \1 \2 /* \3 */:; \ + s:->::; p;}" +endef + +# Use filechk to avoid rebuilds when a header changes, but the resulting file +# does not +define filechk_offsets + (set -e; \ + echo "#ifndef $2"; \ + echo "#define $2"; \ + echo "/*"; \ + echo " * DO NOT MODIFY."; \ + echo " *"; \ + echo " * This file was generated by Kbuild"; \ + echo " */"; \ + echo ""; \ + sed -ne $(sed-offsets); \ + echo ""; \ + echo "#endif" ) +endef diff --git a/scripts/mod/Makefile b/scripts/mod/Makefile index c11212ff3510..e0cb2e4a3b15 100644 --- a/scripts/mod/Makefile +++ b/scripts/mod/Makefile @@ -5,32 +5,8 @@ modpost-objs := modpost.o file2alias.o sumversion.o devicetable-offsets-file := devicetable-offsets.h -define sed-y - "/^->/{s:->#\(.*\):/* \1 */:; \ - s:^->\([^ ]*\) [\$$#]*\([-0-9]*\) \(.*\):#define \1 \2 /* \3 */:; \ - s:^->\([^ ]*\) [\$$#]*\([^ ]*\) \(.*\):#define \1 \2 /* \3 */:; \ - s:->::; p;}" -endef - -quiet_cmd_offsets = GEN $@ -define cmd_offsets - (set -e; \ - echo "#ifndef __DEVICETABLE_OFFSETS_H__"; \ - echo "#define __DEVICETABLE_OFFSETS_H__"; \ - echo "/*"; \ - echo " * DO NOT MODIFY."; \ - echo " *"; \ - echo " * This file was generated by Kbuild"; \ - echo " *"; \ - echo " */"; \ - echo ""; \ - sed -ne $(sed-y) $<; \ - echo ""; \ - echo "#endif" ) > $@ -endef - -$(obj)/$(devicetable-offsets-file): $(obj)/devicetable-offsets.s - $(call if_changed,offsets) +$(obj)/$(devicetable-offsets-file): $(obj)/devicetable-offsets.s FORCE + $(call filechk,offsets,__DEVICETABLE_OFFSETS_H__) targets += $(devicetable-offsets-file) devicetable-offsets.s From 9ee9a0eb53d7c3eec581aacf9b8d24e8467f3e09 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Fri, 21 Apr 2017 15:21:10 +0900 Subject: [PATCH 534/812] kbuild: consolidate redundant sed script ASM offset generation commit 7dd47b95b0f54f2057d40af6e66d477e3fe95d13 upstream. This part ended up in redundant code after touched by multiple people. [1] Commit 3234282f33b2 ("x86, asm: Fix CFI macro invocations to deal with shortcomings in gas") added parentheses for defined expressions to support old gas for x86. [2] Commit a22dcdb0032c ("x86, asm: Fix ancient-GAS workaround") split the pattern into two to avoid parentheses for non-numeric expressions. [3] Commit 95a2f6f72d37 ("Partially revert patch that encloses asm-offset.h numbers in brackets") removed parentheses from numeric expressions as well because parentheses in MN10300 assembly have a special meaning (pointer access). Apparently, there is a conflict between [1] and [3]. After all, [3] took precedence, and a long time has passed since then. Now, merge the two patterns again because the first one is covered by the other. Signed-off-by: Masahiro Yamada Reviewed-by: Matthias Kaehlcke Signed-off-by: Nathan Chancellor Signed-off-by: Greg Kroah-Hartman --- scripts/Makefile.lib | 1 - 1 file changed, 1 deletion(-) diff --git a/scripts/Makefile.lib b/scripts/Makefile.lib index 18a1c8381ce0..f2f46d055f20 100644 --- a/scripts/Makefile.lib +++ b/scripts/Makefile.lib @@ -395,7 +395,6 @@ cmd_xzmisc = (cat $(filter-out FORCE,$^) | \ # Default sed regexp - multiline due to syntax constraints define sed-offsets "/^->/{s:->#\(.*\):/* \1 */:; \ - s:^->\([^ ]*\) [\$$#]*\([-0-9]*\) \(.*\):#define \1 \2 /* \3 */:; \ s:^->\([^ ]*\) [\$$#]*\([^ ]*\) \(.*\):#define \1 \2 /* \3 */:; \ s:->::; p;}" endef From 57c47840248ca79ce57306d018dce30681c2eab7 Mon Sep 17 00:00:00 2001 From: Jeroen Hofstee Date: Fri, 21 Apr 2017 15:21:11 +0900 Subject: [PATCH 535/812] kbuild: fix asm-offset generation to work with clang commit cf0c3e68aa81f992b0301f62e341b710d385bf68 upstream. KBuild abuses the asm statement to write to a file and clang chokes about these invalid asm statements. Hack it even more by fooling this is actual valid asm code. [masahiro: Import Jeroen's work for U-Boot: http://patchwork.ozlabs.org/patch/375026/ Tweak sed script a little to avoid garbage '#' for GCC case, like #define NR_PAGEFLAGS 23 /* __NR_PAGEFLAGS # */ ] Signed-off-by: Jeroen Hofstee Signed-off-by: Masahiro Yamada Reviewed-by: Matthias Kaehlcke Tested-by: Matthias Kaehlcke Signed-off-by: Nathan Chancellor Signed-off-by: Greg Kroah-Hartman --- include/linux/kbuild.h | 6 +++--- scripts/Makefile.lib | 8 ++++++-- 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/include/linux/kbuild.h b/include/linux/kbuild.h index 22a72198c14b..4e80f3a9ad58 100644 --- a/include/linux/kbuild.h +++ b/include/linux/kbuild.h @@ -2,14 +2,14 @@ #define __LINUX_KBUILD_H #define DEFINE(sym, val) \ - asm volatile("\n->" #sym " %0 " #val : : "i" (val)) + asm volatile("\n.ascii \"->" #sym " %0 " #val "\"" : : "i" (val)) -#define BLANK() asm volatile("\n->" : : ) +#define BLANK() asm volatile("\n.ascii \"->\"" : : ) #define OFFSET(sym, str, mem) \ DEFINE(sym, offsetof(struct str, mem)) #define COMMENT(x) \ - asm volatile("\n->#" x) + asm volatile("\n.ascii \"->#" x "\"") #endif diff --git a/scripts/Makefile.lib b/scripts/Makefile.lib index f2f46d055f20..27c56b5874f8 100644 --- a/scripts/Makefile.lib +++ b/scripts/Makefile.lib @@ -393,10 +393,14 @@ cmd_xzmisc = (cat $(filter-out FORCE,$^) | \ # --------------------------------------------------------------------------- # Default sed regexp - multiline due to syntax constraints +# +# Use [:space:] because LLVM's integrated assembler inserts around +# the .ascii directive whereas GCC keeps the as-is. define sed-offsets - "/^->/{s:->#\(.*\):/* \1 */:; \ + 's:^[[:space:]]*\.ascii[[:space:]]*"\(.*\)".*:\1:; \ + /^->/{s:->#\(.*\):/* \1 */:; \ s:^->\([^ ]*\) [\$$#]*\([^ ]*\) \(.*\):#define \1 \2 /* \3 */:; \ - s:->::; p;}" + s:->::; p;}' endef # Use filechk to avoid rebuilds when a header changes, but the resulting file From f96245c042d6e77b0dd3b48e3a0abb4147f92937 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Thu, 13 Apr 2017 07:25:21 +0900 Subject: [PATCH 536/812] kbuild: drop -Wno-unknown-warning-option from clang options commit a0ae981eba8f07dbc74bce38fd3a462b69a5bc8e upstream. Since commit c3f0d0bc5b01 ("kbuild, LLVMLinux: Add -Werror to cc-option to support clang"), cc-option and friends work nicely for clang. However, -Wno-unknown-warning-option makes clang happy with any unknown warning options even if -Werror is specified. Once -Wno-unknown-warning-option is added, any succeeding call of cc-disable-warning is evaluated positive, then unknown warning options are accepted. This should be dropped. Signed-off-by: Masahiro Yamada Signed-off-by: Nathan Chancellor Signed-off-by: Greg Kroah-Hartman --- Makefile | 1 - scripts/Makefile.extrawarn | 1 - 2 files changed, 2 deletions(-) diff --git a/Makefile b/Makefile index 0c29f97946b4..0fffa99742ca 100644 --- a/Makefile +++ b/Makefile @@ -708,7 +708,6 @@ endif KBUILD_CFLAGS += $(CLANG_TARGET) $(CLANG_GCC_TC) KBUILD_AFLAGS += $(CLANG_TARGET) $(CLANG_GCC_TC) KBUILD_CPPFLAGS += $(call cc-option,-Qunused-arguments,) -KBUILD_CPPFLAGS += $(call cc-option,-Wno-unknown-warning-option,) KBUILD_CFLAGS += $(call cc-disable-warning, unused-variable) KBUILD_CFLAGS += $(call cc-disable-warning, format-invalid-specifier) KBUILD_CFLAGS += $(call cc-disable-warning, gnu) diff --git a/scripts/Makefile.extrawarn b/scripts/Makefile.extrawarn index da3386a9d244..abe5f47b1ab0 100644 --- a/scripts/Makefile.extrawarn +++ b/scripts/Makefile.extrawarn @@ -61,7 +61,6 @@ ifeq ($(cc-name),clang) KBUILD_CFLAGS += $(call cc-disable-warning, initializer-overrides) KBUILD_CFLAGS += $(call cc-disable-warning, unused-value) KBUILD_CFLAGS += $(call cc-disable-warning, format) -KBUILD_CFLAGS += $(call cc-disable-warning, unknown-warning-option) KBUILD_CFLAGS += $(call cc-disable-warning, sign-compare) KBUILD_CFLAGS += $(call cc-disable-warning, format-zero-length) KBUILD_CFLAGS += $(call cc-disable-warning, uninitialized) From 045812536e46b13d25f2d252819c20a4a7d2079a Mon Sep 17 00:00:00 2001 From: Mark Charlebois Date: Fri, 31 Mar 2017 22:38:13 +0200 Subject: [PATCH 537/812] kbuild, LLVMLinux: Add -Werror to cc-option to support clang MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit c3f0d0bc5b01ad90c45276952802455750444b4f upstream. Clang will warn about unknown warnings but will not return false unless -Werror is set. GCC will return false if an unknown warning is passed. Adding -Werror make both compiler behave the same. [arnd: it turns out we need the same patch for testing whether -ffunction-sections works right with gcc. I've build tested extensively with this patch applied, so let's just merge this one now.] Signed-off-by: Mark Charlebois Signed-off-by: Behan Webster Reviewed-by: Jan-Simon Möller Signed-off-by: Arnd Bergmann Acked-by: Kees Cook Signed-off-by: Masahiro Yamada [nc: Adjust context due to lack of d26e94149276f] Signed-off-by: Nathan Chancellor Signed-off-by: Greg Kroah-Hartman --- scripts/Kbuild.include | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/scripts/Kbuild.include b/scripts/Kbuild.include index 5897fc3857a0..87ee353b8254 100644 --- a/scripts/Kbuild.include +++ b/scripts/Kbuild.include @@ -112,12 +112,12 @@ as-instr = $(call try-run,\ # Usage: cflags-y += $(call cc-option,-march=winchip-c6,-march=i586) cc-option = $(call try-run,\ - $(CC) $(KBUILD_CPPFLAGS) $(KBUILD_CFLAGS) $(1) -c -x c /dev/null -o "$$TMP",$(1),$(2)) + $(CC) -Werror $(KBUILD_CPPFLAGS) $(KBUILD_CFLAGS) $(1) -c -x c /dev/null -o "$$TMP",$(1),$(2)) # cc-option-yn # Usage: flag := $(call cc-option-yn,-march=winchip-c6) cc-option-yn = $(call try-run,\ - $(CC) $(KBUILD_CPPFLAGS) $(KBUILD_CFLAGS) $(1) -c -x c /dev/null -o "$$TMP",y,n) + $(CC) -Werror $(KBUILD_CPPFLAGS) $(KBUILD_CFLAGS) $(1) -c -x c /dev/null -o "$$TMP",y,n) # cc-option-align # Prefix align with either -falign or -malign @@ -127,7 +127,7 @@ cc-option-align = $(subst -functions=0,,\ # cc-disable-warning # Usage: cflags-y += $(call cc-disable-warning,unused-but-set-variable) cc-disable-warning = $(call try-run,\ - $(CC) $(KBUILD_CPPFLAGS) $(KBUILD_CFLAGS) -W$(strip $(1)) -c -x c /dev/null -o "$$TMP",-Wno-$(strip $(1))) + $(CC) -Werror $(KBUILD_CPPFLAGS) $(KBUILD_CFLAGS) -W$(strip $(1)) -c -x c /dev/null -o "$$TMP",-Wno-$(strip $(1))) # cc-name # Expands to either gcc or clang From c8250381e987589874ef68223446a5cbebac1076 Mon Sep 17 00:00:00 2001 From: Behan Webster Date: Mon, 27 Mar 2017 18:19:09 -0700 Subject: [PATCH 538/812] kbuild: use -Oz instead of -Os when using clang commit 6748cb3c299de1ffbe56733647b01dbcc398c419 upstream. This generates smaller resulting object code when compiled with clang. Signed-off-by: Behan Webster Signed-off-by: Matthias Kaehlcke Signed-off-by: Masahiro Yamada [nc: Adjust context due to lack of commit a76bcf557ef4 in linux-4.4.y] Signed-off-by: Nathan Chancellor Signed-off-by: Greg Kroah-Hartman --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 0fffa99742ca..c9e27ba9152d 100644 --- a/Makefile +++ b/Makefile @@ -628,7 +628,7 @@ KBUILD_CFLAGS += $(call cc-disable-warning, int-in-bool-context) KBUILD_CFLAGS += $(call cc-disable-warning, attribute-alias) ifdef CONFIG_CC_OPTIMIZE_FOR_SIZE -KBUILD_CFLAGS += -Os +KBUILD_CFLAGS += $(call cc-option,-Oz,-Os) else ifdef CONFIG_PROFILE_ALL_BRANCHES KBUILD_CFLAGS += -O2 From f0ea0fe278c5b87d637be0cae0680b61c183e7bd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Vin=C3=ADcius=20Tinti?= Date: Mon, 24 Apr 2017 13:04:58 -0700 Subject: [PATCH 539/812] kbuild: Add support to generate LLVM assembly files MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 433db3e260bc8134d4a46ddf20b3668937e12556 upstream. Add rules to kbuild in order to generate LLVM assembly files with the .ll extension when using clang. # from c code make CC=clang kernel/pid.ll Signed-off-by: Vinícius Tinti Signed-off-by: Behan Webster Signed-off-by: Matthias Kaehlcke Signed-off-by: Masahiro Yamada [nc: Fix conflicts due to lack of commit 6b90bd4ba40b3 in linux-4.4.y] Signed-off-by: Nathan Chancellor Signed-off-by: Greg Kroah-Hartman --- .gitignore | 1 + Makefile | 5 +++++ scripts/Makefile.build | 8 ++++++++ 3 files changed, 14 insertions(+) diff --git a/.gitignore b/.gitignore index fd3a35592543..34fe1346aa87 100644 --- a/.gitignore +++ b/.gitignore @@ -33,6 +33,7 @@ *.lzo *.patch *.gcno +*.ll modules.builtin Module.symvers *.dwo diff --git a/Makefile b/Makefile index c9e27ba9152d..10cbd11f99ee 100644 --- a/Makefile +++ b/Makefile @@ -1307,6 +1307,8 @@ help: @echo ' (default: $$(INSTALL_MOD_PATH)/lib/firmware)' @echo ' dir/ - Build all files in dir and below' @echo ' dir/file.[ois] - Build specified target only' + @echo ' dir/file.ll - Build the LLVM assembly file' + @echo ' (requires compiler support for LLVM assembly generation)' @echo ' dir/file.lst - Build specified mixed source/assembly target only' @echo ' (requires a recent binutils and recent build (System.map))' @echo ' dir/file.ko - Build module including final link' @@ -1482,6 +1484,7 @@ clean: $(clean-dirs) -o -name '.*.d' -o -name '.*.tmp' -o -name '*.mod.c' \ -o -name '*.symtypes' -o -name 'modules.order' \ -o -name modules.builtin -o -name '.tmp_*.o.*' \ + -o -name '*.ll' \ -o -name '*.gcno' \) -type f -print | xargs rm -f # Generate tags for editors @@ -1585,6 +1588,8 @@ endif $(Q)$(MAKE) $(build)=$(build-dir) $(target-dir)$(notdir $@) %.symtypes: %.c prepare scripts FORCE $(Q)$(MAKE) $(build)=$(build-dir) $(target-dir)$(notdir $@) +%.ll: %.c prepare scripts FORCE + $(Q)$(MAKE) $(build)=$(build-dir) $(target-dir)$(notdir $@) # Modules /: prepare scripts FORCE diff --git a/scripts/Makefile.build b/scripts/Makefile.build index 18209917e379..42aef001dfdd 100644 --- a/scripts/Makefile.build +++ b/scripts/Makefile.build @@ -175,6 +175,14 @@ cmd_cc_symtypes_c = \ $(obj)/%.symtypes : $(src)/%.c FORCE $(call cmd,cc_symtypes_c) +# LLVM assembly +# Generate .ll files from .c +quiet_cmd_cc_ll_c = CC $(quiet_modtag) $@ + cmd_cc_ll_c = $(CC) $(c_flags) -emit-llvm -S -o $@ $< + +$(obj)/%.ll: $(src)/%.c FORCE + $(call if_changed_dep,cc_ll_c) + # C (.c) files # The C file is compiled and updated dependency information is generated. # (See cmd_cc_o_c + relevant part of rule_cc_o_c) From 89134f0918ec3bf4614ac2f9258543940e611f01 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 1 Feb 2017 18:00:14 +0100 Subject: [PATCH 540/812] modules: mark __inittest/__exittest as __maybe_unused commit 1f318a8bafcfba9f0d623f4870c4e890fd22e659 upstream. clang warns about unused inline functions by default: arch/arm/crypto/aes-cipher-glue.c:68:1: warning: unused function '__inittest' [-Wunused-function] arch/arm/crypto/aes-cipher-glue.c:69:1: warning: unused function '__exittest' [-Wunused-function] As these appear in every single module, let's just disable the warnings by marking the two functions as __maybe_unused. Signed-off-by: Arnd Bergmann Reviewed-by: Miroslav Benes Acked-by: Rusty Russell Signed-off-by: Jessica Yu Signed-off-by: Nathan Chancellor Signed-off-by: Greg Kroah-Hartman --- include/linux/module.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/linux/module.h b/include/linux/module.h index c9f2f85017ad..dfe5c2e25ba1 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -125,13 +125,13 @@ extern void cleanup_module(void); /* Each module must use one module_init(). */ #define module_init(initfn) \ - static inline initcall_t __inittest(void) \ + static inline initcall_t __maybe_unused __inittest(void) \ { return initfn; } \ int init_module(void) __attribute__((alias(#initfn))); /* This is only required if you want to be unloadable. */ #define module_exit(exitfn) \ - static inline exitcall_t __exittest(void) \ + static inline exitcall_t __maybe_unused __exittest(void) \ { return exitfn; } \ void cleanup_module(void) __attribute__((alias(#exitfn))); From 0516e87c1eda7ff6f52a5f6c3e9b962d0b60723f Mon Sep 17 00:00:00 2001 From: Matthias Kaehlcke Date: Fri, 21 Apr 2017 14:39:30 -0700 Subject: [PATCH 541/812] kbuild: clang: Disable 'address-of-packed-member' warning commit bfb38988c51e440fd7062ddf3157f7d8b1dd5d70 upstream. clang generates plenty of these warnings in different parts of the code, to an extent that the warnings are little more than noise. Disable the 'address-of-packed-member' warning. Signed-off-by: Matthias Kaehlcke Reviewed-by: Douglas Anderson Signed-off-by: Masahiro Yamada Signed-off-by: Nathan Chancellor Signed-off-by: Greg Kroah-Hartman --- Makefile | 1 + 1 file changed, 1 insertion(+) diff --git a/Makefile b/Makefile index 10cbd11f99ee..2e3cadb1bfc3 100644 --- a/Makefile +++ b/Makefile @@ -711,6 +711,7 @@ KBUILD_CPPFLAGS += $(call cc-option,-Qunused-arguments,) KBUILD_CFLAGS += $(call cc-disable-warning, unused-variable) KBUILD_CFLAGS += $(call cc-disable-warning, format-invalid-specifier) KBUILD_CFLAGS += $(call cc-disable-warning, gnu) +KBUILD_CFLAGS += $(call cc-disable-warning, address-of-packed-member) # Quiet clang warning: comparison of unsigned expression < 0 is always false KBUILD_CFLAGS += $(call cc-disable-warning, tautological-compare) # CLANG uses a _MergedGlobals as optimization, but this breaks modpost, as the From c042dd600f4e89b6e7bdffa00aea4d1d3c1e9686 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Wed, 26 Apr 2017 17:11:32 +0100 Subject: [PATCH 542/812] crypto: arm64/sha - avoid non-standard inline asm tricks commit f4857f4c2ee9aa4e2aacac1a845352b00197fb57 upstream. Replace the inline asm which exports struct offsets as ELF symbols with proper const variables exposing the same values. This works around an issue with Clang which does not interpret the "i" (or "I") constraints in the same way as GCC. Signed-off-by: Ard Biesheuvel Tested-by: Matthias Kaehlcke Signed-off-by: Herbert Xu Signed-off-by: Nathan Chancellor Signed-off-by: Greg Kroah-Hartman --- arch/arm64/crypto/sha1-ce-core.S | 6 ++++-- arch/arm64/crypto/sha1-ce-glue.c | 11 +++-------- arch/arm64/crypto/sha2-ce-core.S | 6 ++++-- arch/arm64/crypto/sha2-ce-glue.c | 13 +++++-------- 4 files changed, 16 insertions(+), 20 deletions(-) diff --git a/arch/arm64/crypto/sha1-ce-core.S b/arch/arm64/crypto/sha1-ce-core.S index c98e7e849f06..8550408735a0 100644 --- a/arch/arm64/crypto/sha1-ce-core.S +++ b/arch/arm64/crypto/sha1-ce-core.S @@ -82,7 +82,8 @@ ENTRY(sha1_ce_transform) ldr dgb, [x0, #16] /* load sha1_ce_state::finalize */ - ldr w4, [x0, #:lo12:sha1_ce_offsetof_finalize] + ldr_l w4, sha1_ce_offsetof_finalize, x4 + ldr w4, [x0, x4] /* load input */ 0: ld1 {v8.4s-v11.4s}, [x1], #64 @@ -132,7 +133,8 @@ CPU_LE( rev32 v11.16b, v11.16b ) * the padding is handled by the C code in that case. */ cbz x4, 3f - ldr x4, [x0, #:lo12:sha1_ce_offsetof_count] + ldr_l w4, sha1_ce_offsetof_count, x4 + ldr x4, [x0, x4] movi v9.2d, #0 mov x8, #0x80000000 movi v10.2d, #0 diff --git a/arch/arm64/crypto/sha1-ce-glue.c b/arch/arm64/crypto/sha1-ce-glue.c index aefda9868627..ea319c055f5d 100644 --- a/arch/arm64/crypto/sha1-ce-glue.c +++ b/arch/arm64/crypto/sha1-ce-glue.c @@ -17,9 +17,6 @@ #include #include -#define ASM_EXPORT(sym, val) \ - asm(".globl " #sym "; .set " #sym ", %0" :: "I"(val)); - MODULE_DESCRIPTION("SHA1 secure hash using ARMv8 Crypto Extensions"); MODULE_AUTHOR("Ard Biesheuvel "); MODULE_LICENSE("GPL v2"); @@ -32,6 +29,9 @@ struct sha1_ce_state { asmlinkage void sha1_ce_transform(struct sha1_ce_state *sst, u8 const *src, int blocks); +const u32 sha1_ce_offsetof_count = offsetof(struct sha1_ce_state, sst.count); +const u32 sha1_ce_offsetof_finalize = offsetof(struct sha1_ce_state, finalize); + static int sha1_ce_update(struct shash_desc *desc, const u8 *data, unsigned int len) { @@ -52,11 +52,6 @@ static int sha1_ce_finup(struct shash_desc *desc, const u8 *data, struct sha1_ce_state *sctx = shash_desc_ctx(desc); bool finalize = !sctx->sst.count && !(len % SHA1_BLOCK_SIZE); - ASM_EXPORT(sha1_ce_offsetof_count, - offsetof(struct sha1_ce_state, sst.count)); - ASM_EXPORT(sha1_ce_offsetof_finalize, - offsetof(struct sha1_ce_state, finalize)); - /* * Allow the asm code to perform the finalization if there is no * partial data and the input is a round multiple of the block size. diff --git a/arch/arm64/crypto/sha2-ce-core.S b/arch/arm64/crypto/sha2-ce-core.S index 01cfee066837..679c6c002f4f 100644 --- a/arch/arm64/crypto/sha2-ce-core.S +++ b/arch/arm64/crypto/sha2-ce-core.S @@ -88,7 +88,8 @@ ENTRY(sha2_ce_transform) ld1 {dgav.4s, dgbv.4s}, [x0] /* load sha256_ce_state::finalize */ - ldr w4, [x0, #:lo12:sha256_ce_offsetof_finalize] + ldr_l w4, sha256_ce_offsetof_finalize, x4 + ldr w4, [x0, x4] /* load input */ 0: ld1 {v16.4s-v19.4s}, [x1], #64 @@ -136,7 +137,8 @@ CPU_LE( rev32 v19.16b, v19.16b ) * the padding is handled by the C code in that case. */ cbz x4, 3f - ldr x4, [x0, #:lo12:sha256_ce_offsetof_count] + ldr_l w4, sha256_ce_offsetof_count, x4 + ldr x4, [x0, x4] movi v17.2d, #0 mov x8, #0x80000000 movi v18.2d, #0 diff --git a/arch/arm64/crypto/sha2-ce-glue.c b/arch/arm64/crypto/sha2-ce-glue.c index 7cd587564a41..0ed9486f75dd 100644 --- a/arch/arm64/crypto/sha2-ce-glue.c +++ b/arch/arm64/crypto/sha2-ce-glue.c @@ -17,9 +17,6 @@ #include #include -#define ASM_EXPORT(sym, val) \ - asm(".globl " #sym "; .set " #sym ", %0" :: "I"(val)); - MODULE_DESCRIPTION("SHA-224/SHA-256 secure hash using ARMv8 Crypto Extensions"); MODULE_AUTHOR("Ard Biesheuvel "); MODULE_LICENSE("GPL v2"); @@ -32,6 +29,11 @@ struct sha256_ce_state { asmlinkage void sha2_ce_transform(struct sha256_ce_state *sst, u8 const *src, int blocks); +const u32 sha256_ce_offsetof_count = offsetof(struct sha256_ce_state, + sst.count); +const u32 sha256_ce_offsetof_finalize = offsetof(struct sha256_ce_state, + finalize); + static int sha256_ce_update(struct shash_desc *desc, const u8 *data, unsigned int len) { @@ -52,11 +54,6 @@ static int sha256_ce_finup(struct shash_desc *desc, const u8 *data, struct sha256_ce_state *sctx = shash_desc_ctx(desc); bool finalize = !sctx->sst.count && !(len % SHA256_BLOCK_SIZE); - ASM_EXPORT(sha256_ce_offsetof_count, - offsetof(struct sha256_ce_state, sst.count)); - ASM_EXPORT(sha256_ce_offsetof_finalize, - offsetof(struct sha256_ce_state, finalize)); - /* * Allow the asm code to perform the finalization if there is no * partial data and the input is a round multiple of the block size. From c2d14540ebbd0522b8226348909d13065544af94 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Fri, 18 Aug 2017 20:49:36 +0100 Subject: [PATCH 543/812] efi/libstub/arm64: Force 'hidden' visibility for section markers commit 0426a4e68f18d75515414361de9e3e1445d2644e upstream. To prevent the compiler from emitting absolute references to the section markers when running in PIC mode, override the visibility to 'hidden' for all contents of asm/sections.h Tested-by: Matthias Kaehlcke Signed-off-by: Ard Biesheuvel Cc: Linus Torvalds Cc: Matt Fleming Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-efi@vger.kernel.org Link: http://lkml.kernel.org/r/20170818194947.19347-4-ard.biesheuvel@linaro.org Signed-off-by: Ingo Molnar [nc: Fix conflict due to lack of commit 42b55734030c1 in linux-4.4.y] Signed-off-by: Nathan Chancellor Signed-off-by: Greg Kroah-Hartman --- drivers/firmware/efi/libstub/arm64-stub.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/drivers/firmware/efi/libstub/arm64-stub.c b/drivers/firmware/efi/libstub/arm64-stub.c index 78dfbd34b6bf..044efb779ed1 100644 --- a/drivers/firmware/efi/libstub/arm64-stub.c +++ b/drivers/firmware/efi/libstub/arm64-stub.c @@ -9,9 +9,17 @@ * published by the Free Software Foundation. * */ + +/* + * To prevent the compiler from emitting GOT-indirected (and thus absolute) + * references to the section markers, override their visibility as 'hidden' + */ +#pragma GCC visibility push(hidden) +#include +#pragma GCC visibility pop + #include #include -#include efi_status_t __init handle_kernel_image(efi_system_table_t *sys_table_arg, unsigned long *image_addr, From ba0523881cac21c5cdc477390ff3e3ef9104437e Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Fri, 18 Aug 2017 20:49:37 +0100 Subject: [PATCH 544/812] efi/libstub/arm64: Set -fpie when building the EFI stub commit 91ee5b21ee026c49e4e7483de69b55b8b47042be upstream. Clang may emit absolute symbol references when building in non-PIC mode, even when using the default 'small' code model, which is already mostly position independent to begin with, due to its use of adrp/add pairs that have a relative range of +/- 4 GB. The remedy is to pass the -fpie flag, which can be done safely now that the code has been updated to avoid GOT indirections (which may be emitted due to the compiler assuming that the PIC/PIE code may end up in a shared library that is subject to ELF symbol preemption) Passing -fpie when building code that needs to execute at an a priori unknown offset is arguably an improvement in any case, and given that the recent visibility changes allow the PIC build to pass with GCC as well, let's add -fpie for all arm64 builds rather than only for Clang. Tested-by: Matthias Kaehlcke Signed-off-by: Ard Biesheuvel Cc: Linus Torvalds Cc: Matt Fleming Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-efi@vger.kernel.org Link: http://lkml.kernel.org/r/20170818194947.19347-5-ard.biesheuvel@linaro.org Signed-off-by: Ingo Molnar Signed-off-by: Nathan Chancellor Signed-off-by: Greg Kroah-Hartman --- drivers/firmware/efi/libstub/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/firmware/efi/libstub/Makefile b/drivers/firmware/efi/libstub/Makefile index 3c0467d3688c..88bd6829a358 100644 --- a/drivers/firmware/efi/libstub/Makefile +++ b/drivers/firmware/efi/libstub/Makefile @@ -10,7 +10,7 @@ cflags-$(CONFIG_X86) += -m$(BITS) -D__KERNEL__ $(LINUX_INCLUDE) -O2 \ -fPIC -fno-strict-aliasing -mno-red-zone \ -mno-mmx -mno-sse -DDISABLE_BRANCH_PROFILING -cflags-$(CONFIG_ARM64) := $(subst -pg,,$(KBUILD_CFLAGS)) +cflags-$(CONFIG_ARM64) := $(subst -pg,,$(KBUILD_CFLAGS)) -fpie cflags-$(CONFIG_ARM) := $(subst -pg,,$(KBUILD_CFLAGS)) \ -fno-builtin -fpic -mno-single-pic-base From 904ccb4cd1e5d4a46d96f008e00bf681b28150cb Mon Sep 17 00:00:00 2001 From: Nick Desaulniers Date: Mon, 6 Nov 2017 10:47:54 -0800 Subject: [PATCH 545/812] kbuild: fix linker feature test macros when cross compiling with Clang commit 86a9df597cdd564d2d29c65897bcad42519e3678 upstream. I was not seeing my linker flags getting added when using ld-option when cross compiling with Clang. Upon investigation, this seems to be due to a difference in how GCC vs Clang handle cross compilation. GCC is configured at build time to support one backend, that is implicit when compiling. Clang is explicit via the use of `-target ` and ships with all supported backends by default. GNU Make feature test macros that compile then link will always fail when cross compiling with Clang unless Clang's triple is passed along to the compiler. For example: $ clang -x c /dev/null -c -o temp.o $ aarch64-linux-android/bin/ld -E temp.o aarch64-linux-android/bin/ld: unknown architecture of input file `temp.o' is incompatible with aarch64 output aarch64-linux-android/bin/ld: warning: cannot find entry symbol _start; defaulting to 0000000000400078 $ echo $? 1 $ clang -target aarch64-linux-android- -x c /dev/null -c -o temp.o $ aarch64-linux-android/bin/ld -E temp.o aarch64-linux-android/bin/ld: warning: cannot find entry symbol _start; defaulting to 00000000004002e4 $ echo $? 0 This causes conditional checks that invoke $(CC) without the target triple, then $(LD) on the result, to always fail. Suggested-by: Masahiro Yamada Signed-off-by: Nick Desaulniers Reviewed-by: Matthias Kaehlcke Signed-off-by: Masahiro Yamada [nc: Fix conflicts due to lack of commit 3298b690b21cd in linux-4.4.y Use KBUILD_CFLAGS instead of CC_OPTION_FLAGS because commit d26e94149276f that introduced that variable isn't in 4.4 either] Signed-off-by: Nathan Chancellor Signed-off-by: Greg Kroah-Hartman --- scripts/Kbuild.include | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/scripts/Kbuild.include b/scripts/Kbuild.include index 87ee353b8254..9f634b50e57b 100644 --- a/scripts/Kbuild.include +++ b/scripts/Kbuild.include @@ -147,12 +147,13 @@ cc-ifversion = $(shell [ $(cc-version) $(1) $(2) ] && echo $(3) || echo $(4)) # cc-ldoption # Usage: ldflags += $(call cc-ldoption, -Wl$(comma)--hash-style=both) cc-ldoption = $(call try-run,\ - $(CC) $(1) -nostdlib -x c /dev/null -o "$$TMP",$(1),$(2)) + $(CC) $(1) $(KBUILD_CPPFLAGS) $(KBUILD_CFLAGS) -nostdlib -x c /dev/null -o "$$TMP",$(1),$(2)) # ld-option # Usage: LDFLAGS += $(call ld-option, -X) ld-option = $(call try-run,\ - $(CC) -x c /dev/null -c -o "$$TMPO" ; $(LD) $(1) "$$TMPO" -o "$$TMP",$(1),$(2)) + $(CC) $(KBUILD_CPPFLAGS) $(KBUILD_CFLAGS) -x c /dev/null -c -o "$$TMPO"; \ + $(LD) $(LDFLAGS) $(1) "$$TMPO" -o "$$TMP",$(1),$(2)) # ar-option # Usage: KBUILD_ARFLAGS := $(call ar-option,D) From c630d13c98062347d2efd3814c0bef170ad5ce65 Mon Sep 17 00:00:00 2001 From: Chris Fries Date: Tue, 7 Nov 2017 11:46:13 -0800 Subject: [PATCH 546/812] kbuild: Set KBUILD_CFLAGS before incl. arch Makefile commit ae6b289a37890909fea0e4a1666e19377fa0ed2c upstream. Set the clang KBUILD_CFLAGS up before including arch/ Makefiles, so that ld-options (etc.) can work correctly. This fixes errors with clang such as ld-options trying to CC against your host architecture, but LD trying to link against your target architecture. Signed-off-by: Chris Fries Signed-off-by: Nick Desaulniers Reviewed-by: Matthias Kaehlcke Tested-by: Matthias Kaehlcke Signed-off-by: Masahiro Yamada [nc: Adjust context] Signed-off-by: Nathan Chancellor Signed-off-by: Greg Kroah-Hartman --- Makefile | 64 ++++++++++++++++++++++++++++---------------------------- 1 file changed, 32 insertions(+), 32 deletions(-) diff --git a/Makefile b/Makefile index 2e3cadb1bfc3..abae380dc489 100644 --- a/Makefile +++ b/Makefile @@ -612,6 +612,38 @@ endif # $(dot-config) # Defaults to vmlinux, but the arch makefile usually adds further targets all: vmlinux +ifeq ($(cc-name),clang) +ifneq ($(CROSS_COMPILE),) +CLANG_TARGET := -target $(notdir $(CROSS_COMPILE:%-=%)) +GCC_TOOLCHAIN := $(realpath $(dir $(shell which $(LD)))/..) +endif +ifneq ($(GCC_TOOLCHAIN),) +CLANG_GCC_TC := -gcc-toolchain $(GCC_TOOLCHAIN) +endif +KBUILD_CFLAGS += $(CLANG_TARGET) $(CLANG_GCC_TC) +KBUILD_AFLAGS += $(CLANG_TARGET) $(CLANG_GCC_TC) +KBUILD_CPPFLAGS += $(call cc-option,-Qunused-arguments,) +KBUILD_CFLAGS += $(call cc-disable-warning, unused-variable) +KBUILD_CFLAGS += $(call cc-disable-warning, format-invalid-specifier) +KBUILD_CFLAGS += $(call cc-disable-warning, gnu) +KBUILD_CFLAGS += $(call cc-disable-warning, address-of-packed-member) +# Quiet clang warning: comparison of unsigned expression < 0 is always false +KBUILD_CFLAGS += $(call cc-disable-warning, tautological-compare) +# CLANG uses a _MergedGlobals as optimization, but this breaks modpost, as the +# source of a reference will be _MergedGlobals and not on of the whitelisted names. +# See modpost pattern 2 +KBUILD_CFLAGS += $(call cc-option, -mno-global-merge,) +KBUILD_CFLAGS += $(call cc-option, -fcatch-undefined-behavior) +KBUILD_CFLAGS += $(call cc-option, -no-integrated-as) +KBUILD_AFLAGS += $(call cc-option, -no-integrated-as) +else + +# These warnings generated too much noise in a regular build. +# Use make W=1 to enable them (see scripts/Makefile.build) +KBUILD_CFLAGS += $(call cc-disable-warning, unused-but-set-variable) +KBUILD_CFLAGS += $(call cc-disable-warning, unused-const-variable) +endif + # The arch Makefile can set ARCH_{CPP,A,C}FLAGS to override the default # values of the respective KBUILD_* variables ARCH_CPPFLAGS := @@ -697,38 +729,6 @@ endif endif KBUILD_CFLAGS += $(stackp-flag) -ifeq ($(cc-name),clang) -ifneq ($(CROSS_COMPILE),) -CLANG_TARGET := -target $(notdir $(CROSS_COMPILE:%-=%)) -GCC_TOOLCHAIN := $(realpath $(dir $(shell which $(LD)))/..) -endif -ifneq ($(GCC_TOOLCHAIN),) -CLANG_GCC_TC := -gcc-toolchain $(GCC_TOOLCHAIN) -endif -KBUILD_CFLAGS += $(CLANG_TARGET) $(CLANG_GCC_TC) -KBUILD_AFLAGS += $(CLANG_TARGET) $(CLANG_GCC_TC) -KBUILD_CPPFLAGS += $(call cc-option,-Qunused-arguments,) -KBUILD_CFLAGS += $(call cc-disable-warning, unused-variable) -KBUILD_CFLAGS += $(call cc-disable-warning, format-invalid-specifier) -KBUILD_CFLAGS += $(call cc-disable-warning, gnu) -KBUILD_CFLAGS += $(call cc-disable-warning, address-of-packed-member) -# Quiet clang warning: comparison of unsigned expression < 0 is always false -KBUILD_CFLAGS += $(call cc-disable-warning, tautological-compare) -# CLANG uses a _MergedGlobals as optimization, but this breaks modpost, as the -# source of a reference will be _MergedGlobals and not on of the whitelisted names. -# See modpost pattern 2 -KBUILD_CFLAGS += $(call cc-option, -mno-global-merge,) -KBUILD_CFLAGS += $(call cc-option, -fcatch-undefined-behavior) -KBUILD_CFLAGS += $(call cc-option, -no-integrated-as) -KBUILD_AFLAGS += $(call cc-option, -no-integrated-as) -else - -# These warnings generated too much noise in a regular build. -# Use make W=1 to enable them (see scripts/Makefile.build) -KBUILD_CFLAGS += $(call cc-disable-warning, unused-but-set-variable) -KBUILD_CFLAGS += $(call cc-disable-warning, unused-const-variable) -endif - ifdef CONFIG_FRAME_POINTER KBUILD_CFLAGS += -fno-omit-frame-pointer -fno-optimize-sibling-calls else From 0925fe3d2eb6907dd2bf6e6521be6c8d52045c2d Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Mon, 27 Nov 2017 21:15:13 +0900 Subject: [PATCH 547/812] kbuild: move cc-option and cc-disable-warning after incl. arch Makefile commit cfe17c9bbe6a673fdafdab179c32b355ed447f66 upstream. Geert reported commit ae6b289a3789 ("kbuild: Set KBUILD_CFLAGS before incl. arch Makefile") broke cross-compilation using a cross-compiler that supports less compiler options than the host compiler. For example, cc1: error: unrecognized command line option "-Wno-unused-but-set-variable" This problem happens on architectures that setup CROSS_COMPILE in their arch/*/Makefile. Move the cc-option and cc-disable-warning back to the original position, but keep the Clang target options untouched. Fixes: ae6b289a3789 ("kbuild: Set KBUILD_CFLAGS before incl. arch Makefile") Reported-by: Geert Uytterhoeven Signed-off-by: Masahiro Yamada Tested-by: Geert Uytterhoeven [nc: Adjust context] Signed-off-by: Nathan Chancellor Signed-off-by: Greg Kroah-Hartman --- Makefile | 43 +++++++++++++++++++++++-------------------- 1 file changed, 23 insertions(+), 20 deletions(-) diff --git a/Makefile b/Makefile index abae380dc489..6a15a2a851c8 100644 --- a/Makefile +++ b/Makefile @@ -622,26 +622,6 @@ CLANG_GCC_TC := -gcc-toolchain $(GCC_TOOLCHAIN) endif KBUILD_CFLAGS += $(CLANG_TARGET) $(CLANG_GCC_TC) KBUILD_AFLAGS += $(CLANG_TARGET) $(CLANG_GCC_TC) -KBUILD_CPPFLAGS += $(call cc-option,-Qunused-arguments,) -KBUILD_CFLAGS += $(call cc-disable-warning, unused-variable) -KBUILD_CFLAGS += $(call cc-disable-warning, format-invalid-specifier) -KBUILD_CFLAGS += $(call cc-disable-warning, gnu) -KBUILD_CFLAGS += $(call cc-disable-warning, address-of-packed-member) -# Quiet clang warning: comparison of unsigned expression < 0 is always false -KBUILD_CFLAGS += $(call cc-disable-warning, tautological-compare) -# CLANG uses a _MergedGlobals as optimization, but this breaks modpost, as the -# source of a reference will be _MergedGlobals and not on of the whitelisted names. -# See modpost pattern 2 -KBUILD_CFLAGS += $(call cc-option, -mno-global-merge,) -KBUILD_CFLAGS += $(call cc-option, -fcatch-undefined-behavior) -KBUILD_CFLAGS += $(call cc-option, -no-integrated-as) -KBUILD_AFLAGS += $(call cc-option, -no-integrated-as) -else - -# These warnings generated too much noise in a regular build. -# Use make W=1 to enable them (see scripts/Makefile.build) -KBUILD_CFLAGS += $(call cc-disable-warning, unused-but-set-variable) -KBUILD_CFLAGS += $(call cc-disable-warning, unused-const-variable) endif # The arch Makefile can set ARCH_{CPP,A,C}FLAGS to override the default @@ -729,6 +709,29 @@ endif endif KBUILD_CFLAGS += $(stackp-flag) +ifeq ($(cc-name),clang) +KBUILD_CPPFLAGS += $(call cc-option,-Qunused-arguments,) +KBUILD_CFLAGS += $(call cc-disable-warning, unused-variable) +KBUILD_CFLAGS += $(call cc-disable-warning, format-invalid-specifier) +KBUILD_CFLAGS += $(call cc-disable-warning, gnu) +KBUILD_CFLAGS += $(call cc-disable-warning, address-of-packed-member) +# Quiet clang warning: comparison of unsigned expression < 0 is always false +KBUILD_CFLAGS += $(call cc-disable-warning, tautological-compare) +# CLANG uses a _MergedGlobals as optimization, but this breaks modpost, as the +# source of a reference will be _MergedGlobals and not on of the whitelisted names. +# See modpost pattern 2 +KBUILD_CFLAGS += $(call cc-option, -mno-global-merge,) +KBUILD_CFLAGS += $(call cc-option, -fcatch-undefined-behavior) +KBUILD_CFLAGS += $(call cc-option, -no-integrated-as) +KBUILD_AFLAGS += $(call cc-option, -no-integrated-as) +else + +# These warnings generated too much noise in a regular build. +# Use make W=1 to enable them (see scripts/Makefile.extrawarn) +KBUILD_CFLAGS += $(call cc-disable-warning, unused-but-set-variable) +KBUILD_CFLAGS += $(call cc-disable-warning, unused-const-variable) +endif + ifdef CONFIG_FRAME_POINTER KBUILD_CFLAGS += -fno-omit-frame-pointer -fno-optimize-sibling-calls else From 03e4b23ec9ab053b6cbf39eca2fd821116022665 Mon Sep 17 00:00:00 2001 From: David Lin Date: Fri, 20 Oct 2017 14:09:13 -0700 Subject: [PATCH 548/812] kbuild: clang: fix build failures with sparse check commit bb3f38c3c5b759163e09b9152629cc789731de47 upstream. We should avoid using the space character when passing arguments to clang, because static code analysis check tool such as sparse may misinterpret the arguments followed by spaces as build targets hence cause the build to fail. Signed-off-by: David Lin Signed-off-by: Masahiro Yamada Signed-off-by: Nathan Chancellor Signed-off-by: Greg Kroah-Hartman --- Makefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Makefile b/Makefile index 6a15a2a851c8..0598c61f77ee 100644 --- a/Makefile +++ b/Makefile @@ -614,11 +614,11 @@ all: vmlinux ifeq ($(cc-name),clang) ifneq ($(CROSS_COMPILE),) -CLANG_TARGET := -target $(notdir $(CROSS_COMPILE:%-=%)) +CLANG_TARGET := --target=$(notdir $(CROSS_COMPILE:%-=%)) GCC_TOOLCHAIN := $(realpath $(dir $(shell which $(LD)))/..) endif ifneq ($(GCC_TOOLCHAIN),) -CLANG_GCC_TC := -gcc-toolchain $(GCC_TOOLCHAIN) +CLANG_GCC_TC := --gcc-toolchain=$(GCC_TOOLCHAIN) endif KBUILD_CFLAGS += $(CLANG_TARGET) $(CLANG_GCC_TC) KBUILD_AFLAGS += $(CLANG_TARGET) $(CLANG_GCC_TC) From 4e0ae28ea0a7b769c596107b6297ce6d1122a214 Mon Sep 17 00:00:00 2001 From: Nick Desaulniers Date: Sat, 7 Oct 2017 13:23:23 -0700 Subject: [PATCH 549/812] kbuild: clang: remove crufty HOSTCFLAGS commit df16aaac26e92e97ab7234d3f93c953466adc4b5 upstream. When compiling with `make CC=clang HOSTCC=clang`, I was seeing warnings that clang did not recognize -fno-delete-null-pointer-checks for HOSTCC targets. These were added in commit 61163efae020 ("kbuild: LLVMLinux: Add Kbuild support for building kernel with Clang"). Clang does not support -fno-delete-null-pointer-checks, so adding it to HOSTCFLAGS if HOSTCC is clang does not make sense. It's not clear why the other warnings were disabled, and just for HOSTCFLAGS, but I can remove them, add -Werror to HOSTCFLAGS and compile with clang just fine. Suggested-by: Masahiro Yamada Signed-off-by: Nick Desaulniers Signed-off-by: Masahiro Yamada [nc: Adjust context] Signed-off-by: Nathan Chancellor Signed-off-by: Greg Kroah-Hartman --- Makefile | 5 ----- 1 file changed, 5 deletions(-) diff --git a/Makefile b/Makefile index 0598c61f77ee..50866f8c9450 100644 --- a/Makefile +++ b/Makefile @@ -306,11 +306,6 @@ HOSTCXX = g++ HOSTCFLAGS = -Wall -Wmissing-prototypes -Wstrict-prototypes -O2 -fomit-frame-pointer -std=gnu89 HOSTCXXFLAGS = -O2 -ifeq ($(shell $(HOSTCC) -v 2>&1 | grep -c "clang version"), 1) -HOSTCFLAGS += -Wno-unused-value -Wno-unused-parameter \ - -Wno-missing-field-initializers -fno-delete-null-pointer-checks -endif - # Decide whether to build built-in, modular, or both. # Normally, just do built-in. From cfbabf536f6b3e9f0bdea8cc388906ae90232689 Mon Sep 17 00:00:00 2001 From: Sodagudi Prasad Date: Tue, 6 Feb 2018 15:46:51 -0800 Subject: [PATCH 550/812] kbuild: clang: disable unused variable warnings only when constant commit 0a5f41767444cc3b4fc5573921ab914b4f78baaa upstream. Currently, GCC disables -Wunused-const-variable, but not -Wunused-variable, so warns unused variables if they are non-constant. While, Clang does not warn unused variables at all regardless of the const qualifier because -Wno-unused-const-variable is implied by the stronger option -Wno-unused-variable. Disable -Wunused-const-variable instead of -Wunused-variable so that GCC and Clang work in the same way. Signed-off-by: Prasad Sodagudi Signed-off-by: Masahiro Yamada Signed-off-by: Nathan Chancellor Signed-off-by: Greg Kroah-Hartman --- Makefile | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/Makefile b/Makefile index 50866f8c9450..0b35e1a73a38 100644 --- a/Makefile +++ b/Makefile @@ -706,7 +706,6 @@ KBUILD_CFLAGS += $(stackp-flag) ifeq ($(cc-name),clang) KBUILD_CPPFLAGS += $(call cc-option,-Qunused-arguments,) -KBUILD_CFLAGS += $(call cc-disable-warning, unused-variable) KBUILD_CFLAGS += $(call cc-disable-warning, format-invalid-specifier) KBUILD_CFLAGS += $(call cc-disable-warning, gnu) KBUILD_CFLAGS += $(call cc-disable-warning, address-of-packed-member) @@ -724,9 +723,9 @@ else # These warnings generated too much noise in a regular build. # Use make W=1 to enable them (see scripts/Makefile.extrawarn) KBUILD_CFLAGS += $(call cc-disable-warning, unused-but-set-variable) -KBUILD_CFLAGS += $(call cc-disable-warning, unused-const-variable) endif +KBUILD_CFLAGS += $(call cc-disable-warning, unused-const-variable) ifdef CONFIG_FRAME_POINTER KBUILD_CFLAGS += -fno-omit-frame-pointer -fno-optimize-sibling-calls else From 14e4ca675b1c0a5661bbc9f4ef48c4c1244e6ab3 Mon Sep 17 00:00:00 2001 From: Stefan Agner Date: Mon, 19 Mar 2018 22:12:53 +0100 Subject: [PATCH 551/812] kbuild: set no-integrated-as before incl. arch Makefile commit 0f0e8de334c54c38818a4a5390a39aa09deff5bf upstream. In order to make sure compiler flag detection for ARM works correctly the no-integrated-as flags need to be set before including the arch specific Makefile. Fixes: cfe17c9bbe6a ("kbuild: move cc-option and cc-disable-warning after incl. arch Makefile") Signed-off-by: Stefan Agner Signed-off-by: Masahiro Yamada [nc: Adjust context] Signed-off-by: Nathan Chancellor Signed-off-by: Greg Kroah-Hartman --- Makefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Makefile b/Makefile index 0b35e1a73a38..e399e52fe35e 100644 --- a/Makefile +++ b/Makefile @@ -617,6 +617,8 @@ CLANG_GCC_TC := --gcc-toolchain=$(GCC_TOOLCHAIN) endif KBUILD_CFLAGS += $(CLANG_TARGET) $(CLANG_GCC_TC) KBUILD_AFLAGS += $(CLANG_TARGET) $(CLANG_GCC_TC) +KBUILD_CFLAGS += $(call cc-option, -no-integrated-as) +KBUILD_AFLAGS += $(call cc-option, -no-integrated-as) endif # The arch Makefile can set ARCH_{CPP,A,C}FLAGS to override the default @@ -716,8 +718,6 @@ KBUILD_CFLAGS += $(call cc-disable-warning, tautological-compare) # See modpost pattern 2 KBUILD_CFLAGS += $(call cc-option, -mno-global-merge,) KBUILD_CFLAGS += $(call cc-option, -fcatch-undefined-behavior) -KBUILD_CFLAGS += $(call cc-option, -no-integrated-as) -KBUILD_AFLAGS += $(call cc-option, -no-integrated-as) else # These warnings generated too much noise in a regular build. From c695cffc9f7cac218249dce3c79c030e56b88170 Mon Sep 17 00:00:00 2001 From: Stefan Agner Date: Mon, 17 Sep 2018 19:31:57 -0700 Subject: [PATCH 552/812] kbuild: allow to use GCC toolchain not in Clang search path commit ef8c4ed9db80261f397f0c0bf723684601ae3b52 upstream. When using a GCC cross toolchain which is not in a compiled in Clang search path, Clang reverts to the system assembler and linker. This leads to assembler or linker errors, depending on which tool is first used for a given architecture. It seems that Clang is not searching $PATH for a matching assembler or linker. Make sure that Clang picks up the correct assembler or linker by passing the cross compilers bin directory as search path. This allows to use Clang provided by distributions with GCC toolchains not in /usr/bin. Link: https://github.com/ClangBuiltLinux/linux/issues/78 Signed-off-by: Stefan Agner Reviewed-and-tested-by: Nick Desaulniers Signed-off-by: Masahiro Yamada [nc: Adjust context] Signed-off-by: Nathan Chancellor Signed-off-by: Greg Kroah-Hartman --- Makefile | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/Makefile b/Makefile index e399e52fe35e..f3612502ff8b 100644 --- a/Makefile +++ b/Makefile @@ -610,13 +610,15 @@ all: vmlinux ifeq ($(cc-name),clang) ifneq ($(CROSS_COMPILE),) CLANG_TARGET := --target=$(notdir $(CROSS_COMPILE:%-=%)) -GCC_TOOLCHAIN := $(realpath $(dir $(shell which $(LD)))/..) +GCC_TOOLCHAIN_DIR := $(dir $(shell which $(LD))) +CLANG_PREFIX := --prefix=$(GCC_TOOLCHAIN_DIR) +GCC_TOOLCHAIN := $(realpath $(GCC_TOOLCHAIN_DIR)/..) endif ifneq ($(GCC_TOOLCHAIN),) CLANG_GCC_TC := --gcc-toolchain=$(GCC_TOOLCHAIN) endif -KBUILD_CFLAGS += $(CLANG_TARGET) $(CLANG_GCC_TC) -KBUILD_AFLAGS += $(CLANG_TARGET) $(CLANG_GCC_TC) +KBUILD_CFLAGS += $(CLANG_TARGET) $(CLANG_GCC_TC) $(CLANG_PREFIX) +KBUILD_AFLAGS += $(CLANG_TARGET) $(CLANG_GCC_TC) $(CLANG_PREFIX) KBUILD_CFLAGS += $(call cc-option, -no-integrated-as) KBUILD_AFLAGS += $(call cc-option, -no-integrated-as) endif From 2b92e27f26f136b9c16a0ccb94a89ff599814675 Mon Sep 17 00:00:00 2001 From: Matthias Kaehlcke Date: Fri, 21 Apr 2017 16:00:56 -0700 Subject: [PATCH 553/812] arm64: Disable asm-operand-width warning for clang clang raises 'asm-operand-widths' warnings in inline assembly code when the size of an operand is < 64 bits and the operand width is unspecified. Most warnings are raised in macros, i.e. the datatype of the operand may vary. Signed-off-by: Matthias Kaehlcke nc: I trimmed the original commit message since I'm not a part of CrOS and can't speak on their behalf. To fix these warnings, it requires a fairly intrusive backport of the sysreg conversion that Mark Rutland did in 4.9. I think disabling the warning is smarter, similar to commit d41d0fe374d4 ("turn off -Wattribute-alias") in this tree. Signed-off-by: Nathan Chancellor Signed-off-by: Greg Kroah-Hartman --- arch/arm64/Makefile | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile index b6c90e5006e4..c10bb98144b0 100644 --- a/arch/arm64/Makefile +++ b/arch/arm64/Makefile @@ -56,6 +56,10 @@ else TEXT_OFFSET := 0x00080000 endif +ifeq ($(cc-name),clang) +KBUILD_CFLAGS += $(call cc-disable-warning, asm-operand-widths) +endif + # KASAN_SHADOW_OFFSET = VA_START + (1 << (VA_BITS - 3)) - (1 << 61) # in 32-bit arithmetic KASAN_SHADOW_OFFSET := $(shell printf "0x%08x00000000\n" $$(( \ From 68cb70349a5f87e5083ce6014f1a4e9e7b6a0e18 Mon Sep 17 00:00:00 2001 From: Matthias Kaehlcke Date: Thu, 13 Apr 2017 10:26:09 -0700 Subject: [PATCH 554/812] x86/kbuild: Use cc-option to enable -falign-{jumps/loops} commit 2c4fd1ac3ff167c91272dc43c7bfd2269ef61557 upstream. clang currently does not support these optimizations, only enable them when they are available. Signed-off-by: Matthias Kaehlcke Cc: Greg Hackmann Cc: Linus Torvalds Cc: Masahiro Yamada Cc: Michael Davidson Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: grundler@chromium.org Link: http://lkml.kernel.org/r/20170413172609.118122-1-mka@chromium.org Signed-off-by: Ingo Molnar Signed-off-by: Nathan Chancellor Signed-off-by: Greg Kroah-Hartman --- arch/x86/Makefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/Makefile b/arch/x86/Makefile index d2c663aeccba..865c465f2881 100644 --- a/arch/x86/Makefile +++ b/arch/x86/Makefile @@ -88,10 +88,10 @@ else KBUILD_CFLAGS += -m64 # Align jump targets to 1 byte, not the default 16 bytes: - KBUILD_CFLAGS += -falign-jumps=1 + KBUILD_CFLAGS += $(call cc-option,-falign-jumps=1) # Pack loops tightly as well: - KBUILD_CFLAGS += -falign-loops=1 + KBUILD_CFLAGS += $(call cc-option,-falign-loops=1) # Don't autogenerate traditional x87 instructions KBUILD_CFLAGS += $(call cc-option,-mno-80387) From 5039cea07546571f364b6edaefbe86213b593dd8 Mon Sep 17 00:00:00 2001 From: Michael Davidson Date: Wed, 15 Mar 2017 15:36:00 -0700 Subject: [PATCH 555/812] crypto, x86: aesni - fix token pasting for clang commit fdb2726f4e61c5e3abc052f547d5a5f6c0dc5504 upstream. aes_ctrby8_avx-x86_64.S uses the C preprocessor for token pasting of character sequences that are not valid preprocessor tokens. While this is allowed when preprocessing assembler files it exposes an incompatibilty between the clang and gcc preprocessors where clang does not strip leading white space from macro parameters, leading to the CONCAT(%xmm, i) macro expansion on line 96 resulting in a token with a space character embedded in it. While this could be resolved by deleting the offending space character, the assembler is perfectly capable of doing the token pasting correctly for itself so we can just get rid of the preprocessor macros. Signed-off-by: Michael Davidson Signed-off-by: Herbert Xu Signed-off-by: Nathan Chancellor Signed-off-by: Greg Kroah-Hartman --- arch/x86/crypto/aes_ctrby8_avx-x86_64.S | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/arch/x86/crypto/aes_ctrby8_avx-x86_64.S b/arch/x86/crypto/aes_ctrby8_avx-x86_64.S index a916c4a61165..5f6a5af9c489 100644 --- a/arch/x86/crypto/aes_ctrby8_avx-x86_64.S +++ b/arch/x86/crypto/aes_ctrby8_avx-x86_64.S @@ -65,7 +65,6 @@ #include #include -#define CONCAT(a,b) a##b #define VMOVDQ vmovdqu #define xdata0 %xmm0 @@ -92,8 +91,6 @@ #define num_bytes %r8 #define tmp %r10 -#define DDQ(i) CONCAT(ddq_add_,i) -#define XMM(i) CONCAT(%xmm, i) #define DDQ_DATA 0 #define XDATA 1 #define KEY_128 1 @@ -131,12 +128,12 @@ ddq_add_8: /* generate a unique variable for ddq_add_x */ .macro setddq n - var_ddq_add = DDQ(\n) + var_ddq_add = ddq_add_\n .endm /* generate a unique variable for xmm register */ .macro setxdata n - var_xdata = XMM(\n) + var_xdata = %xmm\n .endm /* club the numeric 'id' to the symbol 'name' */ From ee43aaa95604334e35dba586bbf5910984498d7d Mon Sep 17 00:00:00 2001 From: Matthias Kaehlcke Date: Mon, 1 May 2017 15:47:41 -0700 Subject: [PATCH 556/812] x86/mm/kaslr: Use the _ASM_MUL macro for multiplication to work around Clang incompatibility commit 121843eb02a6e2fa30aefab64bfe183c97230c75 upstream. The constraint "rm" allows the compiler to put mix_const into memory. When the input operand is a memory location then MUL needs an operand size suffix, since Clang can't infer the multiplication width from the operand. Add and use the _ASM_MUL macro which determines the operand size and resolves to the NUL instruction with the corresponding suffix. This fixes the following error when building with clang: CC arch/x86/lib/kaslr.o /tmp/kaslr-dfe1ad.s: Assembler messages: /tmp/kaslr-dfe1ad.s:182: Error: no instruction mnemonic suffix given and no register operands; can't size instruction Signed-off-by: Matthias Kaehlcke Cc: Grant Grundler Cc: Greg Hackmann Cc: Kees Cook Cc: Linus Torvalds Cc: Michael Davidson Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/20170501224741.133938-1-mka@chromium.org Signed-off-by: Ingo Molnar [nc: Apply to aslr.c in get_random_long as the kaslr shift didn't happen until 4.8 in commit d899a7d146a2] Signed-off-by: Nathan Chancellor Signed-off-by: Greg Kroah-Hartman --- arch/x86/boot/compressed/aslr.c | 3 ++- arch/x86/include/asm/asm.h | 1 + 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/x86/boot/compressed/aslr.c b/arch/x86/boot/compressed/aslr.c index 6a9b96b4624d..31dab2135188 100644 --- a/arch/x86/boot/compressed/aslr.c +++ b/arch/x86/boot/compressed/aslr.c @@ -1,5 +1,6 @@ #include "misc.h" +#include #include #include #include @@ -94,7 +95,7 @@ static unsigned long get_random_long(void) } /* Circular multiply for better bit diffusion */ - asm("mul %3" + asm(_ASM_MUL "%3" : "=a" (random), "=d" (raw) : "a" (random), "rm" (mix_const)); random += raw; diff --git a/arch/x86/include/asm/asm.h b/arch/x86/include/asm/asm.h index 21e84a31d211..f3d4f1edc947 100644 --- a/arch/x86/include/asm/asm.h +++ b/arch/x86/include/asm/asm.h @@ -34,6 +34,7 @@ #define _ASM_ADD __ASM_SIZE(add) #define _ASM_SUB __ASM_SIZE(sub) #define _ASM_XADD __ASM_SIZE(xadd) +#define _ASM_MUL __ASM_SIZE(mul) #define _ASM_AX __ASM_REG(ax) #define _ASM_BX __ASM_REG(bx) From bf6401610af484e94afdd9534e706e9bef5fbe5c Mon Sep 17 00:00:00 2001 From: Matthias Kaehlcke Date: Wed, 21 Jun 2017 16:28:03 -0700 Subject: [PATCH 557/812] kbuild: Add __cc-option macro commit 9f3f1fd299768782465cb32cdf0dd4528d11f26b upstream. cc-option uses KBUILD_CFLAGS and KBUILD_CPPFLAGS when it determines whether an option is supported or not. This is fine for options used to build the kernel itself, however some components like the x86 boot code use a different set of flags. Add the new macro __cc-option which is a more generic version of cc-option with additional parameters. One parameter is the compiler with which the check should be performed, the other the compiler options to be used instead KBUILD_C*FLAGS. Refactor cc-option and hostcc-option to use __cc-option and move hostcc-option to scripts/Kbuild.include. Suggested-by: Arnd Bergmann Suggested-by: Masahiro Yamada Signed-off-by: Matthias Kaehlcke Acked-by: Arnd Bergmann Acked-by: Michal Marek Signed-off-by: Masahiro Yamada [nc: Fix conflicts due to lack of CC_OPTION_CFLAGS and hostcc-option wasn't added until v4.8 so no point including it in this tree] Signed-off-by: Nathan Chancellor Signed-off-by: Greg Kroah-Hartman --- Makefile | 2 +- scripts/Kbuild.include | 9 +++++++-- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/Makefile b/Makefile index f3612502ff8b..cef9451148cc 100644 --- a/Makefile +++ b/Makefile @@ -303,7 +303,7 @@ CONFIG_SHELL := $(shell if [ -x "$$BASH" ]; then echo $$BASH; \ HOSTCC = gcc HOSTCXX = g++ -HOSTCFLAGS = -Wall -Wmissing-prototypes -Wstrict-prototypes -O2 -fomit-frame-pointer -std=gnu89 +HOSTCFLAGS := -Wall -Wmissing-prototypes -Wstrict-prototypes -O2 -fomit-frame-pointer -std=gnu89 HOSTCXXFLAGS = -O2 # Decide whether to build built-in, modular, or both. diff --git a/scripts/Kbuild.include b/scripts/Kbuild.include index 9f634b50e57b..5e9cf7d146f0 100644 --- a/scripts/Kbuild.include +++ b/scripts/Kbuild.include @@ -108,11 +108,16 @@ as-option = $(call try-run,\ as-instr = $(call try-run,\ printf "%b\n" "$(1)" | $(CC) $(KBUILD_AFLAGS) -c -x assembler -o "$$TMP" -,$(2),$(3)) +# __cc-option +# Usage: MY_CFLAGS += $(call __cc-option,$(CC),$(MY_CFLAGS),-march=winchip-c6,-march=i586) +__cc-option = $(call try-run,\ + $(1) -Werror $(2) $(3) -c -x c /dev/null -o "$$TMP",$(3),$(4)) + # cc-option # Usage: cflags-y += $(call cc-option,-march=winchip-c6,-march=i586) -cc-option = $(call try-run,\ - $(CC) -Werror $(KBUILD_CPPFLAGS) $(KBUILD_CFLAGS) $(1) -c -x c /dev/null -o "$$TMP",$(1),$(2)) +cc-option = $(call __cc-option, $(CC),\ + $(KBUILD_CPPFLAGS) $(KBUILD_CFLAGS),$(1),$(2)) # cc-option-yn # Usage: flag := $(call cc-option-yn,-march=winchip-c6) From b7c98579f935d1308786d2e5bdd85ce7aef42d43 Mon Sep 17 00:00:00 2001 From: Matthias Kaehlcke Date: Wed, 21 Jun 2017 16:28:04 -0700 Subject: [PATCH 558/812] x86/build: Use __cc-option for boot code compiler options commit 032a2c4f65a2f81c93e161a11197ba19bc14a909 upstream. cc-option is used to enable compiler options for the boot code if they are available. The macro uses KBUILD_CFLAGS and KBUILD_CPPFLAGS for the check, however these flags aren't used to build the boot code, in consequence cc-option can yield wrong results. For example -mpreferred-stack-boundary=2 is never set with a 64-bit compiler, since the setting is only valid for 16 and 32-bit binaries. This is also the case for 32-bit kernel builds, because the option -m32 is added to KBUILD_CFLAGS after the assignment of REALMODE_CFLAGS. Use __cc-option instead of cc-option for the boot mode options. The macro receives the compiler options as parameter instead of using KBUILD_C*FLAGS, for the boot code we pass REALMODE_CFLAGS. Also use separate statements for the __cc-option checks instead of performing them in the initial assignment of REALMODE_CFLAGS since the variable is an input of the macro. Signed-off-by: Matthias Kaehlcke Acked-by: Ingo Molnar Signed-off-by: Masahiro Yamada Signed-off-by: Nathan Chancellor Signed-off-by: Greg Kroah-Hartman --- arch/x86/Makefile | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/arch/x86/Makefile b/arch/x86/Makefile index 865c465f2881..be909f26563e 100644 --- a/arch/x86/Makefile +++ b/arch/x86/Makefile @@ -24,10 +24,11 @@ REALMODE_CFLAGS := $(M16_CFLAGS) -g -Os -D__KERNEL__ \ -DDISABLE_BRANCH_PROFILING \ -Wall -Wstrict-prototypes -march=i386 -mregparm=3 \ -fno-strict-aliasing -fomit-frame-pointer -fno-pic \ - -mno-mmx -mno-sse \ - $(call cc-option, -ffreestanding) \ - $(call cc-option, -fno-stack-protector) \ - $(call cc-option, -mpreferred-stack-boundary=2) + -mno-mmx -mno-sse + +REALMODE_CFLAGS += $(call __cc-option, $(CC), $(REALMODE_CFLAGS), -ffreestanding) +REALMODE_CFLAGS += $(call __cc-option, $(CC), $(REALMODE_CFLAGS), -fno-stack-protector) +REALMODE_CFLAGS += $(call __cc-option, $(CC), $(REALMODE_CFLAGS), -mpreferred-stack-boundary=2) export REALMODE_CFLAGS # BITS is used as extension for files which are available in a 32 bit From 397fae4e356f2ebb4baa2fad3c6b170290db5e9c Mon Sep 17 00:00:00 2001 From: Matthias Kaehlcke Date: Wed, 21 Jun 2017 16:28:05 -0700 Subject: [PATCH 559/812] x86/build: Specify stack alignment for clang commit d77698df39a512911586834d303275ea5fda74d0 upstream. For gcc stack alignment is configured with -mpreferred-stack-boundary=N, clang has the option -mstack-alignment=N for that purpose. Use the same alignment as with gcc. If the alignment is not specified clang assumes an alignment of 16 bytes, as required by the standard ABI. However as mentioned in d9b0cde91c60 ("x86-64, gcc: Use -mpreferred-stack-boundary=3 if supported") the standard kernel entry on x86-64 leaves the stack on an 8-byte boundary, as a consequence clang will keep the stack misaligned. Signed-off-by: Matthias Kaehlcke Acked-by: Ingo Molnar Signed-off-by: Masahiro Yamada Signed-off-by: Nathan Chancellor Signed-off-by: Greg Kroah-Hartman --- arch/x86/Makefile | 26 +++++++++++++++++++++----- 1 file changed, 21 insertions(+), 5 deletions(-) diff --git a/arch/x86/Makefile b/arch/x86/Makefile index be909f26563e..914b1e87d1a6 100644 --- a/arch/x86/Makefile +++ b/arch/x86/Makefile @@ -11,6 +11,14 @@ else KBUILD_DEFCONFIG := $(ARCH)_defconfig endif +# For gcc stack alignment is specified with -mpreferred-stack-boundary, +# clang has the option -mstack-alignment for that purpose. +ifneq ($(call cc-option, -mpreferred-stack-boundary=4),) + cc_stack_align_opt := -mpreferred-stack-boundary +else ifneq ($(call cc-option, -mstack-alignment=4),) + cc_stack_align_opt := -mstack-alignment +endif + # How to compile the 16-bit code. Note we always compile for -march=i386; # that way we can complain to the user if the CPU is insufficient. # @@ -28,7 +36,7 @@ REALMODE_CFLAGS := $(M16_CFLAGS) -g -Os -D__KERNEL__ \ REALMODE_CFLAGS += $(call __cc-option, $(CC), $(REALMODE_CFLAGS), -ffreestanding) REALMODE_CFLAGS += $(call __cc-option, $(CC), $(REALMODE_CFLAGS), -fno-stack-protector) -REALMODE_CFLAGS += $(call __cc-option, $(CC), $(REALMODE_CFLAGS), -mpreferred-stack-boundary=2) +REALMODE_CFLAGS += $(call __cc-option, $(CC), $(REALMODE_CFLAGS), $(cc_stack_align_opt)=2) export REALMODE_CFLAGS # BITS is used as extension for files which are available in a 32 bit @@ -65,8 +73,10 @@ ifeq ($(CONFIG_X86_32),y) # with nonstandard options KBUILD_CFLAGS += -fno-pic - # prevent gcc from keeping the stack 16 byte aligned - KBUILD_CFLAGS += $(call cc-option,-mpreferred-stack-boundary=2) + # Align the stack to the register width instead of using the default + # alignment of 16 bytes. This reduces stack usage and the number of + # alignment instructions. + KBUILD_CFLAGS += $(call cc-option,$(cc_stack_align_opt)=2) # Disable unit-at-a-time mode on pre-gcc-4.0 compilers, it makes gcc use # a lot more stack due to the lack of sharing of stacklots: @@ -98,8 +108,14 @@ else KBUILD_CFLAGS += $(call cc-option,-mno-80387) KBUILD_CFLAGS += $(call cc-option,-mno-fp-ret-in-387) - # Use -mpreferred-stack-boundary=3 if supported. - KBUILD_CFLAGS += $(call cc-option,-mpreferred-stack-boundary=3) + # By default gcc and clang use a stack alignment of 16 bytes for x86. + # However the standard kernel entry on x86-64 leaves the stack on an + # 8-byte boundary. If the compiler isn't informed about the actual + # alignment it will generate extra alignment instructions for the + # default alignment which keep the stack *mis*aligned. + # Furthermore an alignment to the register width reduces stack usage + # and the number of alignment instructions. + KBUILD_CFLAGS += $(call cc-option,$(cc_stack_align_opt)=3) # Use -mskip-rax-setup if supported. KBUILD_CFLAGS += $(call cc-option,-mskip-rax-setup) From 5f9c9bdde17a68dc889a8191020c73f55d2ea7f9 Mon Sep 17 00:00:00 2001 From: Michael Davidson Date: Mon, 24 Jul 2017 16:51:55 -0700 Subject: [PATCH 560/812] x86/boot: #undef memcpy() et al in string.c commit 18d5e6c34a8eda438d5ad8b3b15f42dab01bf05d upstream. undef memcpy() and friends in boot/string.c so that the functions defined here will have the correct names, otherwise we end up up trying to redefine __builtin_memcpy() etc. Surprisingly, GCC allows this (and, helpfully, discards the __builtin_ prefix from the function name when compiling it), but clang does not. Adding these #undef's appears to preserve what I assume was the original intent of the code. Signed-off-by: Michael Davidson Signed-off-by: Matthias Kaehlcke Acked-by: H. Peter Anvin Cc: Arnd Bergmann Cc: Bernhard.Rosenkranzer@linaro.org Cc: Greg Hackmann Cc: Kees Cook Cc: Linus Torvalds Cc: Nick Desaulniers Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/20170724235155.79255-1-mka@chromium.org Signed-off-by: Ingo Molnar Signed-off-by: Nathan Chancellor Signed-off-by: Greg Kroah-Hartman --- arch/x86/boot/string.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/arch/x86/boot/string.c b/arch/x86/boot/string.c index 06ceddb3a22e..1d56adea8a7c 100644 --- a/arch/x86/boot/string.c +++ b/arch/x86/boot/string.c @@ -16,6 +16,15 @@ #include "ctype.h" #include "string.h" +/* + * Undef these macros so that the functions that we provide + * here will have the correct names regardless of how string.h + * may have chosen to #define them. + */ +#undef memcpy +#undef memset +#undef memcmp + int memcmp(const void *s1, const void *s2, size_t len) { u8 diff; From 7b76d79043d0b095864ab09527781c0b4b05fe8e Mon Sep 17 00:00:00 2001 From: Matthias Kaehlcke Date: Wed, 16 Aug 2017 17:47:40 -0700 Subject: [PATCH 561/812] x86/build: Fix stack alignment for CLang commit 8f91869766c00622b2eaa8ee567db4f333b78c1a upstream. Commit: d77698df39a5 ("x86/build: Specify stack alignment for clang") intended to use the same stack alignment for clang as with gcc. The two compilers use different options to configure the stack alignment (gcc: -mpreferred-stack-boundary=n, clang: -mstack-alignment=n). The above commit assumes that the clang option uses the same parameter type as gcc, i.e. that the alignment is specified as 2^n. However clang interprets the value of this option literally to use an alignment of n, in consequence the stack remains misaligned. Change the values used with -mstack-alignment to be the actual alignment instead of a power of two. cc-option isn't used here with the typical pattern of KBUILD_CFLAGS += $(call cc-option ...). The reason is that older gcc versions don't support the -mpreferred-stack-boundary option, since cc-option doesn't verify whether the alternative option is valid it would incorrectly select the clang option -mstack-alignment.. Signed-off-by: Matthias Kaehlcke Cc: Arnd Bergmann Cc: Bernhard.Rosenkranzer@linaro.org Cc: Greg Hackmann Cc: Kees Cook Cc: Linus Torvalds Cc: Masahiro Yamada Cc: Michael Davidson Cc: Nick Desaulniers Cc: Peter Zijlstra Cc: Stephen Hines Cc: Thomas Gleixner Cc: dianders@chromium.org Link: http://lkml.kernel.org/r/20170817004740.170588-1-mka@chromium.org Signed-off-by: Ingo Molnar Signed-off-by: Nathan Chancellor Signed-off-by: Greg Kroah-Hartman --- arch/x86/Makefile | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/arch/x86/Makefile b/arch/x86/Makefile index 914b1e87d1a6..6c698e49dff5 100644 --- a/arch/x86/Makefile +++ b/arch/x86/Makefile @@ -14,9 +14,11 @@ endif # For gcc stack alignment is specified with -mpreferred-stack-boundary, # clang has the option -mstack-alignment for that purpose. ifneq ($(call cc-option, -mpreferred-stack-boundary=4),) - cc_stack_align_opt := -mpreferred-stack-boundary -else ifneq ($(call cc-option, -mstack-alignment=4),) - cc_stack_align_opt := -mstack-alignment + cc_stack_align4 := -mpreferred-stack-boundary=2 + cc_stack_align8 := -mpreferred-stack-boundary=3 +else ifneq ($(call cc-option, -mstack-alignment=16),) + cc_stack_align4 := -mstack-alignment=4 + cc_stack_align8 := -mstack-alignment=8 endif # How to compile the 16-bit code. Note we always compile for -march=i386; @@ -36,7 +38,7 @@ REALMODE_CFLAGS := $(M16_CFLAGS) -g -Os -D__KERNEL__ \ REALMODE_CFLAGS += $(call __cc-option, $(CC), $(REALMODE_CFLAGS), -ffreestanding) REALMODE_CFLAGS += $(call __cc-option, $(CC), $(REALMODE_CFLAGS), -fno-stack-protector) -REALMODE_CFLAGS += $(call __cc-option, $(CC), $(REALMODE_CFLAGS), $(cc_stack_align_opt)=2) +REALMODE_CFLAGS += $(cc_stack_align4) export REALMODE_CFLAGS # BITS is used as extension for files which are available in a 32 bit @@ -76,7 +78,7 @@ ifeq ($(CONFIG_X86_32),y) # Align the stack to the register width instead of using the default # alignment of 16 bytes. This reduces stack usage and the number of # alignment instructions. - KBUILD_CFLAGS += $(call cc-option,$(cc_stack_align_opt)=2) + KBUILD_CFLAGS += $(cc_stack_align4) # Disable unit-at-a-time mode on pre-gcc-4.0 compilers, it makes gcc use # a lot more stack due to the lack of sharing of stacklots: @@ -115,7 +117,7 @@ else # default alignment which keep the stack *mis*aligned. # Furthermore an alignment to the register width reduces stack usage # and the number of alignment instructions. - KBUILD_CFLAGS += $(call cc-option,$(cc_stack_align_opt)=3) + KBUILD_CFLAGS += $(cc_stack_align8) # Use -mskip-rax-setup if supported. KBUILD_CFLAGS += $(call cc-option,-mskip-rax-setup) From 88aead985c3f8f9528b34900c133ad17d76e8d34 Mon Sep 17 00:00:00 2001 From: Matthias Kaehlcke Date: Thu, 17 Aug 2017 11:20:47 -0700 Subject: [PATCH 562/812] x86/build: Use cc-option to validate stack alignment parameter commit 9e8730b178a2472fca3123e909d6e69cc8127778 upstream. With the following commit: 8f91869766c0 ("x86/build: Fix stack alignment for CLang") cc-option is only used to determine the name of the stack alignment option supported by the compiler, but not to verify that the actual parameter