From b96e7dacf24315a84f71ba0f15a603ba5f82b010 Mon Sep 17 00:00:00 2001 From: Tyler Hicks Date: Thu, 20 Jun 2013 13:13:59 -0700 Subject: [PATCH 0001/1368] libceph: Fix NULL pointer dereference in auth client code commit 2cb33cac622afde897aa02d3dcd9fbba8bae839e upstream. A malicious monitor can craft an auth reply message that could cause a NULL function pointer dereference in the client's kernel. To prevent this, the auth_none protocol handler needs an empty ceph_auth_client_ops->build_request() function. CVE-2013-1059 Signed-off-by: Tyler Hicks Reported-by: Chanam Park Reviewed-by: Seth Arnold Reviewed-by: Sage Weil Signed-off-by: Greg Kroah-Hartman --- net/ceph/auth_none.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/net/ceph/auth_none.c b/net/ceph/auth_none.c index 925ca583c09c..8c93fa8d81bc 100644 --- a/net/ceph/auth_none.c +++ b/net/ceph/auth_none.c @@ -39,6 +39,11 @@ static int should_authenticate(struct ceph_auth_client *ac) return xi->starting; } +static int build_request(struct ceph_auth_client *ac, void *buf, void *end) +{ + return 0; +} + /* * the generic auth code decode the global_id, and we carry no actual * authenticate state, so nothing happens here. @@ -106,6 +111,7 @@ static const struct ceph_auth_client_ops ceph_auth_none_ops = { .destroy = destroy, .is_authenticated = is_authenticated, .should_authenticate = should_authenticate, + .build_request = build_request, .handle_reply = handle_reply, .create_authorizer = ceph_auth_none_create_authorizer, .destroy_authorizer = ceph_auth_none_destroy_authorizer, From bd4d4d8725c31dee623999ff7904ee36490ae884 Mon Sep 17 00:00:00 2001 From: majianpeng Date: Wed, 19 Jun 2013 14:58:10 +0800 Subject: [PATCH 0002/1368] ceph: fix sleeping function called from invalid context. commit a1dc1937337a93e699eaa56968b7de6e1a9e77cf upstream. [ 1121.231883] BUG: sleeping function called from invalid context at kernel/rwsem.c:20 [ 1121.231935] in_atomic(): 1, irqs_disabled(): 0, pid: 9831, name: mv [ 1121.231971] 1 lock held by mv/9831: [ 1121.231973] #0: (&(&ci->i_ceph_lock)->rlock){+.+...},at:[] ceph_getxattr+0x58/0x1d0 [ceph] [ 1121.231998] CPU: 3 PID: 9831 Comm: mv Not tainted 3.10.0-rc6+ #215 [ 1121.232000] Hardware name: To Be Filled By O.E.M. To Be Filled By O.E.M./To be filled by O.E.M., BIOS 080015 11/09/2011 [ 1121.232027] ffff88006d355a80 ffff880092f69ce0 ffffffff8168348c ffff880092f69cf8 [ 1121.232045] ffffffff81070435 ffff88006d355a20 ffff880092f69d20 ffffffff816899ba [ 1121.232052] 0000000300000004 ffff8800b76911d0 ffff88006d355a20 ffff880092f69d68 [ 1121.232056] Call Trace: [ 1121.232062] [] dump_stack+0x19/0x1b [ 1121.232067] [] __might_sleep+0xe5/0x110 [ 1121.232071] [] down_read+0x2a/0x98 [ 1121.232080] [] ceph_vxattrcb_layout+0x60/0xf0 [ceph] [ 1121.232088] [] ceph_getxattr+0x9f/0x1d0 [ceph] [ 1121.232093] [] vfs_getxattr+0xa8/0xd0 [ 1121.232097] [] getxattr+0xab/0x1c0 [ 1121.232100] [] ? final_putname+0x22/0x50 [ 1121.232104] [] ? kmem_cache_free+0xb0/0x260 [ 1121.232107] [] ? final_putname+0x22/0x50 [ 1121.232110] [] ? trace_hardirqs_on+0xd/0x10 [ 1121.232114] [] ? sysret_check+0x1b/0x56 [ 1121.232120] [] SyS_fgetxattr+0x6c/0xc0 [ 1121.232125] [] system_call_fastpath+0x16/0x1b [ 1121.232129] BUG: scheduling while atomic: mv/9831/0x10000002 [ 1121.232154] 1 lock held by mv/9831: [ 1121.232156] #0: (&(&ci->i_ceph_lock)->rlock){+.+...}, at: [] ceph_getxattr+0x58/0x1d0 [ceph] I think move the ci->i_ceph_lock down is safe because we can't free ceph_inode_info at there. Signed-off-by: Jianpeng Ma Reviewed-by: Sage Weil Signed-off-by: Greg Kroah-Hartman --- fs/ceph/xattr.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/fs/ceph/xattr.c b/fs/ceph/xattr.c index 9b6b2b6dd164..be661d8f532a 100644 --- a/fs/ceph/xattr.c +++ b/fs/ceph/xattr.c @@ -675,17 +675,18 @@ ssize_t ceph_getxattr(struct dentry *dentry, const char *name, void *value, if (!ceph_is_valid_xattr(name)) return -ENODATA; - spin_lock(&ci->i_ceph_lock); - dout("getxattr %p ver=%lld index_ver=%lld\n", inode, - ci->i_xattrs.version, ci->i_xattrs.index_version); /* let's see if a virtual xattr was requested */ vxattr = ceph_match_vxattr(inode, name); if (vxattr && !(vxattr->exists_cb && !vxattr->exists_cb(ci))) { err = vxattr->getxattr_cb(ci, value, size); - goto out; + return err; } + spin_lock(&ci->i_ceph_lock); + dout("getxattr %p ver=%lld index_ver=%lld\n", inode, + ci->i_xattrs.version, ci->i_xattrs.index_version); + if (__ceph_caps_issued_mask(ci, CEPH_CAP_XATTR_SHARED, 1) && (ci->i_xattrs.index_version >= ci->i_xattrs.version)) { goto get_xattr; From 1b7927f047e3d71b46e36c186903fafbc9a47bf3 Mon Sep 17 00:00:00 2001 From: Josh Durgin Date: Fri, 28 Jun 2013 13:13:16 -0700 Subject: [PATCH 0003/1368] libceph: fix invalid unsigned->signed conversion for timespec encoding commit 8b8cf8917f9b5d74e04f281272d8719ce335a497 upstream. __kernel_time_t is a long, which cannot hold a U32_MAX on 32-bit architectures. Just drop this check as it has limited value. This fixes a crash like: [ 957.905812] kernel BUG at /srv/autobuild-ceph/gitbuilder.git/build/include/linux/ceph/decode.h:164! [ 957.914849] Internal error: Oops - BUG: 0 [#1] SMP ARM [ 957.919978] Modules linked in: rbd libceph libcrc32c ipmi_devintf ipmi_si ipmi_msghandler nfsd nfs_acl auth_rpcgss nfs fscache lockd sunrpc [ 957.932547] CPU: 1 Tainted: G W (3.9.0-ceph-19bb6a83-highbank #1) [ 957.939881] PC is at ceph_osdc_build_request+0x8c/0x4f8 [libceph] [ 957.945967] LR is at 0xec520904 [ 957.949103] pc : [] lr : [] psr: 20000153 [ 957.949103] sp : ec753df8 ip : 00000001 fp : ec53e100 [ 957.960571] r10: ebef25c0 r9 : ec5fa400 r8 : ecbcc000 [ 957.965788] r7 : 00000000 r6 : 00000000 r5 : ffffffff r4 : 00000020 [ 957.972307] r3 : 51cc8143 r2 : ec520900 r1 : ec753e58 r0 : ec520908 [ 957.978827] Flags: nzCv IRQs on FIQs off Mode SVC_32 ISA ARM Segment user [ 957.986039] Control: 10c5387d Table: 2c59c04a DAC: 00000015 [ 957.991777] Process rbd (pid: 2138, stack limit = 0xec752238) [ 957.997514] Stack: (0xec753df8 to 0xec754000) [ 958.001864] 3de0: 00000001 00000001 [ 958.010032] 3e00: 00000001 bf139744 ecbcc000 ec55a0a0 00000024 00000000 ebef25c0 fffffffe [ 958.018204] 3e20: ffffffff 00000000 00000000 00000001 ec5fa400 ebef25c0 ec53e100 bf166b68 [ 958.026377] 3e40: 00000000 0000220f fffffffe ffffffff ec753e58 bf13ff24 51cc8143 05b25ed2 [ 958.034548] 3e60: 00000001 00000000 00000000 bf1688d4 00000001 00000000 00000000 00000000 [ 958.042720] 3e80: 00000001 00000060 ec5fa400 ed53d200 ed439600 ed439300 00000001 00000060 [ 958.050888] 3ea0: ec5fa400 ed53d200 00000000 bf16a320 00000000 ec53e100 00000040 ec753eb8 [ 958.059059] 3ec0: ec51df00 ed53d7c0 ed53d200 ed53d7c0 00000000 ed53d7c0 ec5fa400 bf16ed70 [ 958.067230] 3ee0: 00000000 00000060 00000002 ed53d200 00000000 bf16acf4 ed53d7c0 ec752000 [ 958.075402] 3f00: ed980e50 e954f5d8 00000000 00000060 ed53d240 ed53d258 ec753f80 c04f44a8 [ 958.083574] 3f20: edb7910c ec664700 01ade920 c02e4c44 00000060 c016b3dc ec51de40 01adfb84 [ 958.091745] 3f40: 00000060 ec752000 ec753f80 ec752000 00000060 c0108444 00000007 ec51de48 [ 958.099914] 3f60: ed0eb8c0 00000000 00000000 ec51de40 01adfb84 00000001 00000060 c0108858 [ 958.108085] 3f80: 00000000 00000000 51cc8143 00000060 01adfb84 00000007 00000004 c000dd68 [ 958.116257] 3fa0: 00000000 c000dbc0 00000060 01adfb84 00000007 01adfb84 00000060 01adfb80 [ 958.124429] 3fc0: 00000060 01adfb84 00000007 00000004 beded1a8 00000000 01adf2f0 01ade920 [ 958.132599] 3fe0: 00000000 beded180 b6811324 b6811334 800f0010 00000007 2e7f5821 2e7f5c21 [ 958.140815] [] (ceph_osdc_build_request+0x8c/0x4f8 [libceph]) from [] (rbd_osd_req_format_write+0x50/0x7c [rbd]) [ 958.152739] [] (rbd_osd_req_format_write+0x50/0x7c [rbd]) from [] (rbd_dev_header_watch_sync+0xe0/0x204 [rbd]) [ 958.164486] [] (rbd_dev_header_watch_sync+0xe0/0x204 [rbd]) from [] (rbd_dev_image_probe+0x23c/0x850 [rbd]) [ 958.175967] [] (rbd_dev_image_probe+0x23c/0x850 [rbd]) from [] (rbd_add+0x3c0/0x918 [rbd]) [ 958.185975] [] (rbd_add+0x3c0/0x918 [rbd]) from [] (bus_attr_store+0x20/0x2c) [ 958.194850] [] (bus_attr_store+0x20/0x2c) from [] (sysfs_write_file+0x168/0x198) [ 958.203984] [] (sysfs_write_file+0x168/0x198) from [] (vfs_write+0x9c/0x170) [ 958.212768] [] (vfs_write+0x9c/0x170) from [] (sys_write+0x3c/0x70) [ 958.220768] [] (sys_write+0x3c/0x70) from [] (ret_fast_syscall+0x0/0x30) [ 958.229199] Code: e59d1058 e5913000 e3530000 ba000114 (e7f001f2) Signed-off-by: Josh Durgin Reviewed-by: Sage Weil Signed-off-by: Greg Kroah-Hartman --- include/linux/ceph/decode.h | 5 ----- 1 file changed, 5 deletions(-) diff --git a/include/linux/ceph/decode.h b/include/linux/ceph/decode.h index 379f71508995..0442c3d800f0 100644 --- a/include/linux/ceph/decode.h +++ b/include/linux/ceph/decode.h @@ -160,11 +160,6 @@ static inline void ceph_decode_timespec(struct timespec *ts, static inline void ceph_encode_timespec(struct ceph_timespec *tv, const struct timespec *ts) { - BUG_ON(ts->tv_sec < 0); - BUG_ON(ts->tv_sec > (__kernel_time_t)U32_MAX); - BUG_ON(ts->tv_nsec < 0); - BUG_ON(ts->tv_nsec > (long)U32_MAX); - tv->tv_sec = cpu_to_le32((u32)ts->tv_sec); tv->tv_nsec = cpu_to_le32((u32)ts->tv_nsec); } From 2842e87389ad1af50afd3fb89b7832aae7f3a7c0 Mon Sep 17 00:00:00 2001 From: Jonathan Salwan Date: Wed, 3 Jul 2013 15:01:13 -0700 Subject: [PATCH 0004/1368] drivers/cdrom/cdrom.c: use kzalloc() for failing hardware commit 542db01579fbb7ea7d1f7bb9ddcef1559df660b2 upstream. In drivers/cdrom/cdrom.c mmc_ioctl_cdrom_read_data() allocates a memory area with kmalloc in line 2885. 2885 cgc->buffer = kmalloc(blocksize, GFP_KERNEL); 2886 if (cgc->buffer == NULL) 2887 return -ENOMEM; In line 2908 we can find the copy_to_user function: 2908 if (!ret && copy_to_user(arg, cgc->buffer, blocksize)) The cgc->buffer is never cleaned and initialized before this function. If ret = 0 with the previous basic block, it's possible to display some memory bytes in kernel space from userspace. When we read a block from the disk it normally fills the ->buffer but if the drive is malfunctioning there is a chance that it would only be partially filled. The result is an leak information to userspace. Signed-off-by: Dan Carpenter Cc: Jens Axboe Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Cc: Jonathan Salwan Cc: Luis Henriques Signed-off-by: Greg Kroah-Hartman --- drivers/cdrom/cdrom.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/cdrom/cdrom.c b/drivers/cdrom/cdrom.c index d620b4495745..8a3aff724d98 100644 --- a/drivers/cdrom/cdrom.c +++ b/drivers/cdrom/cdrom.c @@ -2882,7 +2882,7 @@ static noinline int mmc_ioctl_cdrom_read_data(struct cdrom_device_info *cdi, if (lba < 0) return -EINVAL; - cgc->buffer = kmalloc(blocksize, GFP_KERNEL); + cgc->buffer = kzalloc(blocksize, GFP_KERNEL); if (cgc->buffer == NULL) return -ENOMEM; From 0b72ac9665b63dd029675ea601cb46defd2c974d Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Wed, 3 Jul 2013 10:06:28 +0930 Subject: [PATCH 0005/1368] module: do percpu allocation after uniqueness check. No, really! commit 8d8022e8aba85192e937f1f0f7450e256d66ae5c upstream. v3.8-rc1-5-g1fb9341 was supposed to stop parallel kvm loads exhausting percpu memory on large machines: Now we have a new state MODULE_STATE_UNFORMED, we can insert the module into the list (and thus guarantee its uniqueness) before we allocate the per-cpu region. In my defence, it didn't actually say the patch did this. Just that we "can". This patch actually *does* it. Signed-off-by: Rusty Russell Tested-by: Jim Hull Signed-off-by: Greg Kroah-Hartman --- kernel/module.c | 34 ++++++++++++++++++---------------- 1 file changed, 18 insertions(+), 16 deletions(-) diff --git a/kernel/module.c b/kernel/module.c index cab4bce49c23..fa53db8aadeb 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -2927,7 +2927,6 @@ static struct module *layout_and_allocate(struct load_info *info, int flags) { /* Module within temporary copy. */ struct module *mod; - Elf_Shdr *pcpusec; int err; mod = setup_load_info(info, flags); @@ -2942,17 +2941,10 @@ static struct module *layout_and_allocate(struct load_info *info, int flags) err = module_frob_arch_sections(info->hdr, info->sechdrs, info->secstrings, mod); if (err < 0) - goto out; + return ERR_PTR(err); - pcpusec = &info->sechdrs[info->index.pcpu]; - if (pcpusec->sh_size) { - /* We have a special allocation for this section. */ - err = percpu_modalloc(mod, - pcpusec->sh_size, pcpusec->sh_addralign); - if (err) - goto out; - pcpusec->sh_flags &= ~(unsigned long)SHF_ALLOC; - } + /* We will do a special allocation for per-cpu sections later. */ + info->sechdrs[info->index.pcpu].sh_flags &= ~(unsigned long)SHF_ALLOC; /* Determine total sizes, and put offsets in sh_entsize. For now this is done generically; there doesn't appear to be any @@ -2963,17 +2955,22 @@ static struct module *layout_and_allocate(struct load_info *info, int flags) /* Allocate and move to the final place */ err = move_module(mod, info); if (err) - goto free_percpu; + return ERR_PTR(err); /* Module has been copied to its final place now: return it. */ mod = (void *)info->sechdrs[info->index.mod].sh_addr; kmemleak_load_module(mod, info); return mod; +} -free_percpu: - percpu_modfree(mod); -out: - return ERR_PTR(err); +static int alloc_module_percpu(struct module *mod, struct load_info *info) +{ + Elf_Shdr *pcpusec = &info->sechdrs[info->index.pcpu]; + if (!pcpusec->sh_size) + return 0; + + /* We have a special allocation for this section. */ + return percpu_modalloc(mod, pcpusec->sh_size, pcpusec->sh_addralign); } /* mod is no longer valid after this! */ @@ -3237,6 +3234,11 @@ static int load_module(struct load_info *info, const char __user *uargs, } #endif + /* To avoid stressing percpu allocator, do this once we're unique. */ + err = alloc_module_percpu(mod, info); + if (err) + goto unlink_mod; + /* Now module is in final location, initialize linked lists, etc. */ err = module_unload_init(mod); if (err) From e64c7e14f5dcb917a06fb39d5331ab88433fcaa7 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Thu, 6 Jun 2013 13:52:21 -0700 Subject: [PATCH 0006/1368] charger-manager: Ensure event is not used as format string commit 3594f4c0d7bc51e3a7e6d73c44e368ae079e42f3 upstream. The exposed interface for cm_notify_event() could result in the event msg string being parsed as a format string. Make sure it is only used as a literal string. Signed-off-by: Kees Cook Cc: Anton Vorontsov Cc: David Woodhouse Signed-off-by: Anton Vorontsov Signed-off-by: Greg Kroah-Hartman --- drivers/power/charger-manager.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/power/charger-manager.c b/drivers/power/charger-manager.c index fefc39fe42be..98de1ddce458 100644 --- a/drivers/power/charger-manager.c +++ b/drivers/power/charger-manager.c @@ -450,7 +450,7 @@ static void uevent_notify(struct charger_manager *cm, const char *event) strncpy(env_str, event, UEVENT_BUF_SIZE); kobject_uevent(&cm->dev->kobj, KOBJ_CHANGE); - dev_info(cm->dev, event); + dev_info(cm->dev, "%s", event); } /** From 70621db0945635044214766b42e2dfca0d673929 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Thu, 4 Jul 2013 18:42:29 +0200 Subject: [PATCH 0007/1368] hpfs: better test for errors commit 3ebacb05044f82c5f0bb456a894eb9dc57d0ed90 upstream. The test if bitmap access is out of bound could errorneously pass if the device size is divisible by 16384 sectors and we are asking for one bitmap after the end. Check for invalid size in the superblock. Invalid size could cause integer overflows in the rest of the code. Signed-off-by: Mikulas Patocka Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- fs/hpfs/map.c | 3 ++- fs/hpfs/super.c | 8 +++++++- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/fs/hpfs/map.c b/fs/hpfs/map.c index 4acb19d78359..803d3da3a0fe 100644 --- a/fs/hpfs/map.c +++ b/fs/hpfs/map.c @@ -17,7 +17,8 @@ __le32 *hpfs_map_bitmap(struct super_block *s, unsigned bmp_block, struct quad_buffer_head *qbh, char *id) { secno sec; - if (hpfs_sb(s)->sb_chk) if (bmp_block * 16384 > hpfs_sb(s)->sb_fs_size) { + unsigned n_bands = (hpfs_sb(s)->sb_fs_size + 0x3fff) >> 14; + if (hpfs_sb(s)->sb_chk) if (bmp_block >= n_bands) { hpfs_error(s, "hpfs_map_bitmap called with bad parameter: %08x at %s", bmp_block, id); return NULL; } diff --git a/fs/hpfs/super.c b/fs/hpfs/super.c index a0617e706957..962e90c37aec 100644 --- a/fs/hpfs/super.c +++ b/fs/hpfs/super.c @@ -558,7 +558,13 @@ static int hpfs_fill_super(struct super_block *s, void *options, int silent) sbi->sb_cp_table = NULL; sbi->sb_c_bitmap = -1; sbi->sb_max_fwd_alloc = 0xffffff; - + + if (sbi->sb_fs_size >= 0x80000000) { + hpfs_error(s, "invalid size in superblock: %08x", + (unsigned)sbi->sb_fs_size); + goto bail4; + } + /* Load bitmap directory */ if (!(sbi->sb_bmp_dir = hpfs_load_bitmap_directory(s, le32_to_cpu(superblock->bitmaps)))) goto bail4; From 88ce7cf76ced99962699d0ebb4d47d6a88b94c29 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Wed, 3 Jul 2013 15:01:14 -0700 Subject: [PATCH 0008/1368] block: do not pass disk names as format strings commit ffc8b30866879ed9ba62bd0a86fecdbd51cd3d19 upstream. Disk names may contain arbitrary strings, so they must not be interpreted as format strings. It seems that only md allows arbitrary strings to be used for disk names, but this could allow for a local memory corruption from uid 0 into ring 0. CVE-2013-2851 Signed-off-by: Kees Cook Cc: Jens Axboe Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- block/genhd.c | 2 +- drivers/block/nbd.c | 3 ++- drivers/scsi/osd/osd_uld.c | 2 +- 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/block/genhd.c b/block/genhd.c index 20625eed5511..cdeb5277dfd4 100644 --- a/block/genhd.c +++ b/block/genhd.c @@ -512,7 +512,7 @@ static void register_disk(struct gendisk *disk) ddev->parent = disk->driverfs_dev; - dev_set_name(ddev, disk->disk_name); + dev_set_name(ddev, "%s", disk->disk_name); /* delay uevents, until we scanned partition table */ dev_set_uevent_suppress(ddev, 1); diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c index 037288e7874d..46b35f7acfde 100644 --- a/drivers/block/nbd.c +++ b/drivers/block/nbd.c @@ -714,7 +714,8 @@ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd, else blk_queue_flush(nbd->disk->queue, 0); - thread = kthread_create(nbd_thread, nbd, nbd->disk->disk_name); + thread = kthread_create(nbd_thread, nbd, "%s", + nbd->disk->disk_name); if (IS_ERR(thread)) { mutex_lock(&nbd->tx_lock); return PTR_ERR(thread); diff --git a/drivers/scsi/osd/osd_uld.c b/drivers/scsi/osd/osd_uld.c index 0fab6b5c7b82..9d86947d67fe 100644 --- a/drivers/scsi/osd/osd_uld.c +++ b/drivers/scsi/osd/osd_uld.c @@ -485,7 +485,7 @@ static int osd_probe(struct device *dev) oud->class_dev.class = &osd_uld_class; oud->class_dev.parent = dev; oud->class_dev.release = __remove; - error = dev_set_name(&oud->class_dev, disk->disk_name); + error = dev_set_name(&oud->class_dev, "%s", disk->disk_name); if (error) { OSD_ERR("dev_set_name failed => %d\n", error); goto err_put_cdev; From c231b9d04be757bd8a9953737ed2c22ffb987f49 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Wed, 3 Jul 2013 15:01:15 -0700 Subject: [PATCH 0009/1368] crypto: sanitize argument for format string commit 1c8fca1d92e14859159a82b8a380d220139b7344 upstream. The template lookup interface does not provide a way to use format strings, so make sure that the interface cannot be abused accidentally. Signed-off-by: Kees Cook Cc: Herbert Xu Cc: "David S. Miller" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- crypto/algapi.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/crypto/algapi.c b/crypto/algapi.c index 6149a6e09643..7a1ae87f1683 100644 --- a/crypto/algapi.c +++ b/crypto/algapi.c @@ -495,7 +495,8 @@ static struct crypto_template *__crypto_lookup_template(const char *name) struct crypto_template *crypto_lookup_template(const char *name) { - return try_then_request_module(__crypto_lookup_template(name), name); + return try_then_request_module(__crypto_lookup_template(name), "%s", + name); } EXPORT_SYMBOL_GPL(crypto_lookup_template); From 44016289145159dcb2b154b92cfc211ac88783b8 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Tue, 18 Jun 2013 12:58:12 -0700 Subject: [PATCH 0010/1368] MAINTAINERS: add stable_kernel_rules.txt to stable maintainer information commit 7b175c46720f8e6b92801bb634c93d1016f80c62 upstream. This hopefully will help point developers to the proper way that patches should be submitted for inclusion in the stable kernel releases. Reported-by: David Howells Acked-by: David Howells Signed-off-by: Greg Kroah-Hartman --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index ad7e322ad17b..48c748080c96 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -7667,6 +7667,7 @@ STABLE BRANCH M: Greg Kroah-Hartman L: stable@vger.kernel.org S: Supported +F: Documentation/stable_kernel_rules.txt STAGING SUBSYSTEM M: Greg Kroah-Hartman From ab1842f10b18f847893dad5bfe513f1392cbe797 Mon Sep 17 00:00:00 2001 From: Zhang Yi Date: Tue, 25 Jun 2013 21:19:31 +0800 Subject: [PATCH 0011/1368] futex: Take hugepages into account when generating futex_key commit 13d60f4b6ab5b702dc8d2ee20999f98a93728aec upstream. The futex_keys of process shared futexes are generated from the page offset, the mapping host and the mapping index of the futex user space address. This should result in an unique identifier for each futex. Though this is not true when futexes are located in different subpages of an hugepage. The reason is, that the mapping index for all those futexes evaluates to the index of the base page of the hugetlbfs mapping. So a futex at offset 0 of the hugepage mapping and another one at offset PAGE_SIZE of the same hugepage mapping have identical futex_keys. This happens because the futex code blindly uses page->index. Steps to reproduce the bug: 1. Map a file from hugetlbfs. Initialize pthread_mutex1 at offset 0 and pthread_mutex2 at offset PAGE_SIZE of the hugetlbfs mapping. The mutexes must be initialized as PTHREAD_PROCESS_SHARED because PTHREAD_PROCESS_PRIVATE mutexes are not affected by this issue as their keys solely depend on the user space address. 2. Lock mutex1 and mutex2 3. Create thread1 and in the thread function lock mutex1, which results in thread1 blocking on the locked mutex1. 4. Create thread2 and in the thread function lock mutex2, which results in thread2 blocking on the locked mutex2. 5. Unlock mutex2. Despite the fact that mutex2 got unlocked, thread2 still blocks on mutex2 because the futex_key points to mutex1. To solve this issue we need to take the normal page index of the page which contains the futex into account, if the futex is in an hugetlbfs mapping. In other words, we calculate the normal page mapping index of the subpage in the hugetlbfs mapping. Mappings which are not based on hugetlbfs are not affected and still use page->index. Thanks to Mel Gorman who provided a patch for adding proper evaluation functions to the hugetlbfs code to avoid exposing hugetlbfs specific details to the futex code. [ tglx: Massaged changelog ] Signed-off-by: Zhang Yi Reviewed-by: Jiang Biao Tested-by: Ma Chenggong Reviewed-by: 'Mel Gorman' Acked-by: 'Darren Hart' Cc: 'Peter Zijlstra' Link: http://lkml.kernel.org/r/000101ce71a6%24a83c5880%24f8b50980%24@com Signed-off-by: Thomas Gleixner Signed-off-by: Greg Kroah-Hartman --- include/linux/hugetlb.h | 16 ++++++++++++++++ kernel/futex.c | 3 ++- mm/hugetlb.c | 17 +++++++++++++++++ 3 files changed, 35 insertions(+), 1 deletion(-) diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index 6b4890fa57e7..feaf0c7fb7d8 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -358,6 +358,17 @@ static inline int hstate_index(struct hstate *h) return h - hstates; } +pgoff_t __basepage_index(struct page *page); + +/* Return page->index in PAGE_SIZE units */ +static inline pgoff_t basepage_index(struct page *page) +{ + if (!PageCompound(page)) + return page->index; + + return __basepage_index(page); +} + #else /* CONFIG_HUGETLB_PAGE */ struct hstate {}; #define alloc_huge_page_node(h, nid) NULL @@ -378,6 +389,11 @@ static inline unsigned int pages_per_huge_page(struct hstate *h) } #define hstate_index_to_shift(index) 0 #define hstate_index(h) 0 + +static inline pgoff_t basepage_index(struct page *page) +{ + return page->index; +} #endif /* CONFIG_HUGETLB_PAGE */ #endif /* _LINUX_HUGETLB_H */ diff --git a/kernel/futex.c b/kernel/futex.c index b26dcfc02c94..49dacfb45745 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -61,6 +61,7 @@ #include #include #include +#include #include @@ -365,7 +366,7 @@ get_futex_key(u32 __user *uaddr, int fshared, union futex_key *key, int rw) } else { key->both.offset |= FUT_OFF_INODE; /* inode-based key */ key->shared.inode = page_head->mapping->host; - key->shared.pgoff = page_head->index; + key->shared.pgoff = basepage_index(page); } get_futex_key_refs(key); diff --git a/mm/hugetlb.c b/mm/hugetlb.c index e2bfbf73a551..5cf99bf8cce2 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -690,6 +690,23 @@ int PageHuge(struct page *page) } EXPORT_SYMBOL_GPL(PageHuge); +pgoff_t __basepage_index(struct page *page) +{ + struct page *page_head = compound_head(page); + pgoff_t index = page_index(page_head); + unsigned long compound_idx; + + if (!PageHuge(page_head)) + return page_index(page); + + if (compound_order(page_head) >= MAX_ORDER) + compound_idx = page_to_pfn(page) - page_to_pfn(page_head); + else + compound_idx = page - page_head; + + return (index << compound_order(page_head)) + compound_idx; +} + static struct page *alloc_fresh_huge_page_node(struct hstate *h, int nid) { struct page *page; From e0896b461ff2761c7a36d223f33d4f6d52c7340f Mon Sep 17 00:00:00 2001 From: Peter Hurley Date: Sat, 15 Jun 2013 09:01:00 -0400 Subject: [PATCH 0012/1368] tty: Reset itty for other pty commit 64e377dcd7d75c241d614458e9619d3445de44ef upstream. Commit 19ffd68f816878aed456d5e87697f43bd9e3bd2b ('pty: Remove redundant itty reset') introduced a regression whereby the other pty's linkage is not cleared on teardown. This triggers a false positive diagnostic in testing. Properly reset the itty linkage. Signed-off-by: Peter Hurley Signed-off-by: Greg Kroah-Hartman --- drivers/tty/tty_io.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/tty/tty_io.c b/drivers/tty/tty_io.c index 6464029e4860..447668213900 100644 --- a/drivers/tty/tty_io.c +++ b/drivers/tty/tty_io.c @@ -1618,6 +1618,8 @@ static void release_tty(struct tty_struct *tty, int idx) tty_free_termios(tty); tty_driver_remove_tty(tty->driver, tty); tty->port->itty = NULL; + if (tty->link) + tty->link->port->itty = NULL; cancel_work_sync(&tty->port->buf.work); if (tty->link) From 94b86558ab7109fa41e18d33ca510c0bc17b6cba Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Sun, 30 Jun 2013 09:03:06 -0700 Subject: [PATCH 0013/1368] Revert "serial: 8250_pci: add support for another kind of NetMos Technology PCI 9835 Multi-I/O Controller" commit 828c6a102b1f2b8583fadc0e779c46b31d448f0b upstream. This reverts commit 8d2f8cd424ca0b99001f3ff4f5db87c4e525f366. As reported by Stefan, this device already works with the parport_serial driver, so the 8250_pci driver should not also try to grab it as well. Reported-by: Stefan Seyfried Cc: Wang YanQing Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/8250/8250_pci.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/tty/serial/8250/8250_pci.c b/drivers/tty/serial/8250/8250_pci.c index 26e3a97ab157..c52948b368d8 100644 --- a/drivers/tty/serial/8250/8250_pci.c +++ b/drivers/tty/serial/8250/8250_pci.c @@ -4797,10 +4797,6 @@ static struct pci_device_id serial_pci_tbl[] = { PCI_VENDOR_ID_IBM, 0x0299, 0, 0, pbn_b0_bt_2_115200 }, - { PCI_VENDOR_ID_NETMOS, PCI_DEVICE_ID_NETMOS_9835, - 0x1000, 0x0012, - 0, 0, pbn_b0_bt_2_115200 }, - { PCI_VENDOR_ID_NETMOS, PCI_DEVICE_ID_NETMOS_9901, 0xA000, 0x1000, 0, 0, pbn_b0_1_115200 }, From 9c42c27677b133bd8c8f7a7d69dd4d00e96ef035 Mon Sep 17 00:00:00 2001 From: Andy Adamson Date: Wed, 19 Jun 2013 16:39:44 -0400 Subject: [PATCH 0014/1368] NFSv4.1 end back channel session draining commit 62f288a02f97bd9f6b2361a6fff709729fe9e110 upstream. We need to ensure that we clear NFS4_SLOT_TBL_DRAINING on the back channel when we're done recovering the session. Regression introduced by commit 774d5f14e (NFSv4.1 Fix a pNFS session draining deadlock) Signed-off-by: Andy Adamson [Trond: Changed order to start back-channel first. Minor code cleanup] Signed-off-by: Trond Myklebust Signed-off-by: Greg Kroah-Hartman --- fs/nfs/nfs4state.c | 23 +++++++++++------------ 1 file changed, 11 insertions(+), 12 deletions(-) diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 1fab140764c4..2c37442ed936 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -228,19 +228,8 @@ static int nfs41_setup_state_renewal(struct nfs_client *clp) return status; } -/* - * Back channel returns NFS4ERR_DELAY for new requests when - * NFS4_SESSION_DRAINING is set so there is no work to be done when draining - * is ended. - */ -static void nfs4_end_drain_session(struct nfs_client *clp) +static void nfs4_end_drain_slot_table(struct nfs4_slot_table *tbl) { - struct nfs4_session *ses = clp->cl_session; - struct nfs4_slot_table *tbl; - - if (ses == NULL) - return; - tbl = &ses->fc_slot_table; if (test_and_clear_bit(NFS4_SLOT_TBL_DRAINING, &tbl->slot_tbl_state)) { spin_lock(&tbl->slot_tbl_lock); nfs41_wake_slot_table(tbl); @@ -248,6 +237,16 @@ static void nfs4_end_drain_session(struct nfs_client *clp) } } +static void nfs4_end_drain_session(struct nfs_client *clp) +{ + struct nfs4_session *ses = clp->cl_session; + + if (ses != NULL) { + nfs4_end_drain_slot_table(&ses->bc_slot_table); + nfs4_end_drain_slot_table(&ses->fc_slot_table); + } +} + /* * Signal state manager thread if session fore channel is drained */ From c334daca734bbb11f79742ec699b1cf9d2112117 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Fri, 21 Jun 2013 11:48:11 -0400 Subject: [PATCH 0015/1368] nfsd4: fix decoding of compounds across page boundaries commit 247500820ebd02ad87525db5d9b199e5b66f6636 upstream. A freebsd NFSv4.0 client was getting rare IO errors expanding a tarball. A network trace showed the server returning BAD_XDR on the final getattr of a getattr+write+getattr compound. The final getattr started on a page boundary. I believe the Linux client ignores errors on the post-write getattr, and that that's why we haven't seen this before. Reported-by: Rick Macklem Signed-off-by: J. Bruce Fields Signed-off-by: Greg Kroah-Hartman --- fs/nfsd/nfs4xdr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 6cd86e0fe450..582321a978b0 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -162,8 +162,8 @@ static __be32 *read_buf(struct nfsd4_compoundargs *argp, u32 nbytes) */ memcpy(p, argp->p, avail); /* step to next page */ - argp->p = page_address(argp->pagelist[0]); argp->pagelist++; + argp->p = page_address(argp->pagelist[0]); if (argp->pagelen < PAGE_SIZE) { argp->end = argp->p + (argp->pagelen>>2); argp->pagelen = 0; From 096bff23937209645162a7fc5e70246cb5c60336 Mon Sep 17 00:00:00 2001 From: Gleb Natapov Date: Fri, 28 Jun 2013 13:17:18 +0300 Subject: [PATCH 0016/1368] KVM: VMX: mark unusable segment as nonpresent commit 03617c188f41eeeb4223c919ee7e66e5a114f2c6 upstream. Some userspaces do not preserve unusable property. Since usable segment has to be present according to VMX spec we can use present property to amend userspace bug by making unusable segment always nonpresent. vmx_segment_access_rights() already marks nonpresent segment as unusable. Reported-by: Stefan Pietsch Tested-by: Stefan Pietsch Signed-off-by: Gleb Natapov Signed-off-by: Paolo Bonzini Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/vmx.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 260a91939555..5402c94ab768 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -3399,15 +3399,22 @@ static void vmx_get_segment(struct kvm_vcpu *vcpu, var->limit = vmx_read_guest_seg_limit(vmx, seg); var->selector = vmx_read_guest_seg_selector(vmx, seg); ar = vmx_read_guest_seg_ar(vmx, seg); + var->unusable = (ar >> 16) & 1; var->type = ar & 15; var->s = (ar >> 4) & 1; var->dpl = (ar >> 5) & 3; - var->present = (ar >> 7) & 1; + /* + * Some userspaces do not preserve unusable property. Since usable + * segment has to be present according to VMX spec we can use present + * property to amend userspace bug by making unusable segment always + * nonpresent. vmx_segment_access_rights() already marks nonpresent + * segment as unusable. + */ + var->present = !var->unusable; var->avl = (ar >> 12) & 1; var->l = (ar >> 13) & 1; var->db = (ar >> 14) & 1; var->g = (ar >> 15) & 1; - var->unusable = (ar >> 16) & 1; } static u64 vmx_get_segment_base(struct kvm_vcpu *vcpu, int seg) From 312cf3e4cf531e1f729624464bce7542025f7cbe Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Mon, 27 May 2013 19:07:19 +0100 Subject: [PATCH 0017/1368] SCSI: sd: Fix parsing of 'temporary ' cache mode prefix commit 2ee3e26c673e75c05ef8b914f54fadee3d7b9c88 upstream. Commit 39c60a0948cc '[SCSI] sd: fix array cache flushing bug causing performance problems' added temp as a pointer to "temporary " and used sizeof(temp) - 1 as its length. But sizeof(temp) is the size of the pointer, not the size of the string constant. Change temp to a static array so that sizeof() does what was intended. Signed-off-by: Ben Hutchings Signed-off-by: James Bottomley Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/sd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index c1c555242d0d..6f6a1b48f998 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -142,7 +142,7 @@ sd_store_cache_type(struct device *dev, struct device_attribute *attr, char *buffer_data; struct scsi_mode_data data; struct scsi_sense_hdr sshdr; - const char *temp = "temporary "; + static const char temp[] = "temporary "; int len; if (sdp->type != TYPE_DISK) From c02527487f0c8feb578c0394ad481a97f26d3bd2 Mon Sep 17 00:00:00 2001 From: "Srivatsa S. Bhat" Date: Mon, 1 Jul 2013 00:40:55 +0200 Subject: [PATCH 0018/1368] cpufreq: Fix cpufreq regression after suspend/resume MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit f51e1eb63d9c28cec188337ee656a13be6980cfd upstream. Toralf Förster reported that the cpufreq ondemand governor behaves erratically (doesn't scale well) after a suspend/resume cycle. The problem was that the cpufreq subsystem's idea of the cpu frequencies differed from the actual frequencies set in the hardware after a suspend/resume cycle. Toralf bisected the problem to commit a66b2e5 (cpufreq: Preserve sysfs files across suspend/resume). Among other (harmless) things, that commit skipped the call to cpufreq_update_policy() in the resume path. But cpufreq_update_policy() plays an important role during resume, because it is responsible for checking if the BIOS changed the cpu frequencies behind our back and resynchronize the cpufreq subsystem's knowledge of the cpu frequencies, and update them accordingly. So, restore the call to cpufreq_update_policy() in the resume path to fix the cpufreq regression. Reported-and-tested-by: Toralf Förster Signed-off-by: Srivatsa S. Bhat Signed-off-by: Rafael J. Wysocki Signed-off-by: Greg Kroah-Hartman --- drivers/cpufreq/cpufreq_stats.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/cpufreq/cpufreq_stats.c b/drivers/cpufreq/cpufreq_stats.c index fb65decffa28..591b6fb641b2 100644 --- a/drivers/cpufreq/cpufreq_stats.c +++ b/drivers/cpufreq/cpufreq_stats.c @@ -349,6 +349,7 @@ static int __cpuinit cpufreq_stat_cpu_callback(struct notifier_block *nfb, switch (action) { case CPU_ONLINE: + case CPU_ONLINE_FROZEN: cpufreq_update_policy(cpu); break; case CPU_DOWN_PREPARE: From 9cafb55474a872531d0b9bf429346912ae5f1153 Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Mon, 8 Jul 2013 16:00:27 -0700 Subject: [PATCH 0019/1368] Revert "memcg: avoid dangling reference count in creation failure" commit fa460c2d37870e0a6f94c70e8b76d05ca11b6db0 upstream. This reverts commit e4715f01be697a. mem_cgroup_put is hierarchy aware so mem_cgroup_put(memcg) already drops an additional reference from all parents so the additional mem_cgrroup_put(parent) potentially causes use-after-free. Signed-off-by: Michal Hocko Signed-off-by: Li Zefan Acked-by: KAMEZAWA Hiroyuki Cc: Hugh Dickins Cc: Tejun Heo Cc: Glauber Costa Cc: Johannes Weiner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- mm/memcontrol.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 194721839cf5..fd79df5d3152 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -6303,8 +6303,6 @@ mem_cgroup_css_online(struct cgroup *cont) * call __mem_cgroup_free, so return directly */ mem_cgroup_put(memcg); - if (parent->use_hierarchy) - mem_cgroup_put(parent); } return error; } From cb5d8be972cfbea114ea56fd63ed5ce1644863df Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Sat, 13 Jul 2013 11:42:41 -0700 Subject: [PATCH 0020/1368] Linux 3.10.1 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index e5e3ba085191..b75cc30af7bb 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,6 @@ VERSION = 3 PATCHLEVEL = 10 -SUBLEVEL = 0 +SUBLEVEL = 1 EXTRAVERSION = NAME = Unicycling Gorilla From cfe24e4e36ec19a56bac17691f8721ac44050a6f Mon Sep 17 00:00:00 2001 From: Steve French Date: Thu, 4 Jul 2013 14:38:48 -0500 Subject: [PATCH 0021/1368] CIFS use sensible file nlink values if unprovided commit 6658b9f70ebca5fc0795b1d6d733996af1e2caa7 upstream. Certain servers may not set the NumberOfLinks field in query file/path info responses. In such a case, cifs_inode_needs_reval() assumes that all regular files are hardlinks and triggers revalidation, leading to excessive and unnecessary network traffic. This change hardcodes cf_nlink (and subsequently i_nlink) when not returned by the server, similar to what already occurs in cifs_mkdir(). Signed-off-by: David Disseldorp Signed-off-by: Steve French Signed-off-by: Greg Kroah-Hartman --- fs/cifs/inode.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c index 20efd81266c6..449b6cf09b09 100644 --- a/fs/cifs/inode.c +++ b/fs/cifs/inode.c @@ -558,6 +558,11 @@ cifs_all_info_to_fattr(struct cifs_fattr *fattr, FILE_ALL_INFO *info, fattr->cf_mode &= ~(S_IWUGO); fattr->cf_nlink = le32_to_cpu(info->NumberOfLinks); + if (fattr->cf_nlink < 1) { + cifs_dbg(1, "replacing bogus file nlink value %u\n", + fattr->cf_nlink); + fattr->cf_nlink = 1; + } } fattr->cf_uid = cifs_sb->mnt_uid; From 059a1671f28c977fe69a0f86eeb0fa2084fdb3dd Mon Sep 17 00:00:00 2001 From: Pavel Shilovsky Date: Thu, 11 Jul 2013 11:17:45 +0400 Subject: [PATCH 0022/1368] CIFS: Fix a deadlock when a file is reopened commit 689c3db4d57a73bee6c5ad7797fce7b54d32a87c upstream. If we request reading or writing on a file that needs to be reopened, it causes the deadlock: we are already holding rw semaphore for reading and then we try to acquire it for writing in cifs_relock_file. Fix this by acquiring the semaphore for reading in cifs_relock_file due to we don't make any changes in locks and don't need a write access. Signed-off-by: Pavel Shilovsky Acked-by: Jeff Layton Signed-off-by: Steve French Signed-off-by: Greg Kroah-Hartman --- fs/cifs/file.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/fs/cifs/file.c b/fs/cifs/file.c index 48b29d24c9f4..c2934f8701da 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -553,11 +553,10 @@ cifs_relock_file(struct cifsFileInfo *cfile) struct cifs_tcon *tcon = tlink_tcon(cfile->tlink); int rc = 0; - /* we are going to update can_cache_brlcks here - need a write access */ - down_write(&cinode->lock_sem); + down_read(&cinode->lock_sem); if (cinode->can_cache_brlcks) { - /* can cache locks - no need to push them */ - up_write(&cinode->lock_sem); + /* can cache locks - no need to relock */ + up_read(&cinode->lock_sem); return rc; } @@ -568,7 +567,7 @@ cifs_relock_file(struct cifsFileInfo *cfile) else rc = tcon->ses->server->ops->push_mand_locks(cfile); - up_write(&cinode->lock_sem); + up_read(&cinode->lock_sem); return rc; } From 05400bc8db6fe1a0aa902910cdf812a5e645dd7f Mon Sep 17 00:00:00 2001 From: Larry Finger Date: Mon, 17 Jun 2013 13:25:49 -0500 Subject: [PATCH 0023/1368] rtlwifi: rtl8192cu: Add new USB ID for TP-Link TL-WN8200ND commit c4d827c5ccc3a49227dbf9d4b248a2e86f388023 upstream. This is a new device for this driver. Reported-by: Tobias Kluge Signed-off-by: Larry Finger Cc: Tobias Kluge Signed-off-by: John W. Linville Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/rtlwifi/rtl8192cu/sw.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/wireless/rtlwifi/rtl8192cu/sw.c b/drivers/net/wireless/rtlwifi/rtl8192cu/sw.c index 826f085c29dd..2bd598526217 100644 --- a/drivers/net/wireless/rtlwifi/rtl8192cu/sw.c +++ b/drivers/net/wireless/rtlwifi/rtl8192cu/sw.c @@ -359,6 +359,7 @@ static struct usb_device_id rtl8192c_usb_ids[] = { {RTL_USB_DEVICE(0x2001, 0x330a, rtl92cu_hal_cfg)}, /*D-Link-Alpha*/ {RTL_USB_DEVICE(0x2019, 0xab2b, rtl92cu_hal_cfg)}, /*Planex -Abocom*/ {RTL_USB_DEVICE(0x20f4, 0x624d, rtl92cu_hal_cfg)}, /*TRENDNet*/ + {RTL_USB_DEVICE(0x2357, 0x0100, rtl92cu_hal_cfg)}, /*TP-Link WN8200ND*/ {RTL_USB_DEVICE(0x7392, 0x7822, rtl92cu_hal_cfg)}, /*Edimax -Edimax*/ {} }; From 85eb7ce1182a97ee66095a4fb1f8eae5171373c2 Mon Sep 17 00:00:00 2001 From: Larry Finger Date: Sun, 23 Jun 2013 18:14:43 -0500 Subject: [PATCH 0024/1368] rtlwifi: rtl8723ae: Fix typo in firmware names MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 73e088ed17c2880a963cc760a78af8a06d4a4d9d upstream. The driver loads its firmware from files rtlwifi/rtl8723fw*.bin, but the MODULE_FIRMWARE macros refer to rtlwifi/RTL8723aefw*.bin. Signed-off-by: Larry Finger Reported-by: Axel Köllhofer Signed-off-by: John W. Linville Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/rtlwifi/rtl8723ae/sw.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/wireless/rtlwifi/rtl8723ae/sw.c b/drivers/net/wireless/rtlwifi/rtl8723ae/sw.c index e4c4cdc3eb67..d9ee2efffe5f 100644 --- a/drivers/net/wireless/rtlwifi/rtl8723ae/sw.c +++ b/drivers/net/wireless/rtlwifi/rtl8723ae/sw.c @@ -251,7 +251,7 @@ static struct rtl_hal_cfg rtl8723ae_hal_cfg = { .bar_id = 2, .write_readback = true, .name = "rtl8723ae_pci", - .fw_name = "rtlwifi/rtl8723aefw.bin", + .fw_name = "rtlwifi/rtl8723fw.bin", .ops = &rtl8723ae_hal_ops, .mod_params = &rtl8723ae_mod_params, .maps[SYS_ISO_CTRL] = REG_SYS_ISO_CTRL, @@ -353,8 +353,8 @@ MODULE_AUTHOR("Realtek WlanFAE "); MODULE_AUTHOR("Larry Finger "); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("Realtek 8723E 802.11n PCI wireless"); -MODULE_FIRMWARE("rtlwifi/rtl8723aefw.bin"); -MODULE_FIRMWARE("rtlwifi/rtl8723aefw_B.bin"); +MODULE_FIRMWARE("rtlwifi/rtl8723fw.bin"); +MODULE_FIRMWARE("rtlwifi/rtl8723fw_B.bin"); module_param_named(swenc, rtl8723ae_mod_params.sw_crypto, bool, 0444); module_param_named(debug, rtl8723ae_mod_params.debug, int, 0444); From 9e8e8fdf3d60235954e1452283b21e955e574b98 Mon Sep 17 00:00:00 2001 From: Larry Finger Date: Tue, 18 Jun 2013 13:25:05 -0500 Subject: [PATCH 0025/1368] rtlwifi: rtl8192cu: Fix duplicate if test commit 10d0b9030a3f86e1e26c710c7580524d7787d688 upstream. A typo causes routine rtl92cu_phy_rf6052_set_cck_txpower() to test the same condition twice. The problem was found using cppcheck-1.49, and the proper fix was verified against the pre-mac80211 version of the code. This patch was originally included as commit 1288aa4, but was accidentally reverted in a later patch. Reported-by: David Binderman [original report] Reported-by: Andrea Morello [report of accidental reversion] Signed-off-by: Larry Finger Signed-off-by: John W. Linville Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/rtlwifi/rtl8192cu/rf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/rtlwifi/rtl8192cu/rf.c b/drivers/net/wireless/rtlwifi/rtl8192cu/rf.c index 953f1a0f8532..2119313a737b 100644 --- a/drivers/net/wireless/rtlwifi/rtl8192cu/rf.c +++ b/drivers/net/wireless/rtlwifi/rtl8192cu/rf.c @@ -104,7 +104,7 @@ void rtl92cu_phy_rf6052_set_cck_txpower(struct ieee80211_hw *hw, tx_agc[RF90_PATH_A] = 0x10101010; tx_agc[RF90_PATH_B] = 0x10101010; } else if (rtlpriv->dm.dynamic_txhighpower_lvl == - TXHIGHPWRLEVEL_LEVEL1) { + TXHIGHPWRLEVEL_LEVEL2) { tx_agc[RF90_PATH_A] = 0x00000000; tx_agc[RF90_PATH_B] = 0x00000000; } else{ From 5f81e313889bcaf53faec9c05a4c92e0b43823f4 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Mon, 1 Jul 2013 08:12:38 -0400 Subject: [PATCH 0026/1368] jbd2: move superblock checksum calculation to jbd2_write_superblock() commit fe52d17cdd343ac43c85cf72940a58865b9d3bfb upstream. Some of the functions which modify the jbd2 superblock were not updating the checksum before calling jbd2_write_superblock(). Move the call to jbd2_superblock_csum_set() to jbd2_write_superblock(), so that the checksum is calculated consistently. Signed-off-by: "Theodore Ts'o" Cc: Darrick J. Wong Signed-off-by: Greg Kroah-Hartman --- fs/jbd2/journal.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c index 95457576e434..aaa1a3f33b0e 100644 --- a/fs/jbd2/journal.c +++ b/fs/jbd2/journal.c @@ -1318,6 +1318,7 @@ static int journal_reset(journal_t *journal) static void jbd2_write_superblock(journal_t *journal, int write_op) { struct buffer_head *bh = journal->j_sb_buffer; + journal_superblock_t *sb = journal->j_superblock; int ret; trace_jbd2_write_superblock(journal, write_op); @@ -1339,6 +1340,7 @@ static void jbd2_write_superblock(journal_t *journal, int write_op) clear_buffer_write_io_error(bh); set_buffer_uptodate(bh); } + jbd2_superblock_csum_set(journal, sb); get_bh(bh); bh->b_end_io = end_buffer_write_sync; ret = submit_bh(write_op, bh); @@ -1435,7 +1437,6 @@ void jbd2_journal_update_sb_errno(journal_t *journal) jbd_debug(1, "JBD2: updating superblock error (errno %d)\n", journal->j_errno); sb->s_errno = cpu_to_be32(journal->j_errno); - jbd2_superblock_csum_set(journal, sb); read_unlock(&journal->j_state_lock); jbd2_write_superblock(journal, WRITE_SYNC); From 15f26a4c4820d1fb5f1ba979b4fe4d00a2d38b7d Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Mon, 1 Jul 2013 08:12:40 -0400 Subject: [PATCH 0027/1368] jbd2: fix theoretical race in jbd2__journal_restart commit 39c04153fda8c32e85b51c96eb5511a326ad7609 upstream. Once we decrement transaction->t_updates, if this is the last handle holding the transaction from closing, and once we release the t_handle_lock spinlock, it's possible for the transaction to commit and be released. In practice with normal kernels, this probably won't happen, since the commit happens in a separate kernel thread and it's unlikely this could all happen within the space of a few CPU cycles. On the other hand, with a real-time kernel, this could potentially happen, so save the tid found in transaction->t_tid before we release t_handle_lock. It would require an insane configuration, such as one where the jbd2 thread was set to a very high real-time priority, perhaps because a high priority real-time thread is trying to read or write to a file system. But some people who use real-time kernels have been known to do insane things, including controlling laser-wielding industrial robots. :-) Signed-off-by: "Theodore Ts'o" Signed-off-by: Greg Kroah-Hartman --- fs/jbd2/transaction.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c index 10f524c59ea8..e0c0bc275924 100644 --- a/fs/jbd2/transaction.c +++ b/fs/jbd2/transaction.c @@ -517,10 +517,10 @@ int jbd2__journal_restart(handle_t *handle, int nblocks, gfp_t gfp_mask) &transaction->t_outstanding_credits); if (atomic_dec_and_test(&transaction->t_updates)) wake_up(&journal->j_wait_updates); + tid = transaction->t_tid; spin_unlock(&transaction->t_handle_lock); jbd_debug(2, "restarting handle %p\n", handle); - tid = transaction->t_tid; need_to_start = !tid_geq(journal->j_commit_request, tid); read_unlock(&journal->j_state_lock); if (need_to_start) From b6a81140efa3b994f3bde4ea48434cfc6e5d4fe9 Mon Sep 17 00:00:00 2001 From: Maarten ter Huurne Date: Mon, 1 Jul 2013 08:12:08 -0400 Subject: [PATCH 0028/1368] ext4: fix corruption when online resizing a fs with 1K block size commit 6ca792edc13c409e8d4eb9001e048264c6a2eb64 upstream. Subtracting the number of the first data block places the superblock backups one block too early, corrupting the file system. When the block size is larger than 1K, the first data block is 0, so the subtraction has no effect and no corruption occurs. Signed-off-by: Maarten ter Huurne Signed-off-by: "Theodore Ts'o" Reviewed-by: Jan Kara Signed-off-by: Greg Kroah-Hartman --- fs/ext4/resize.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c index b27c96d01965..49d3c01eabf8 100644 --- a/fs/ext4/resize.c +++ b/fs/ext4/resize.c @@ -1656,12 +1656,10 @@ static int ext4_group_extend_no_check(struct super_block *sb, err = err2; if (!err) { - ext4_fsblk_t first_block; - first_block = ext4_group_first_block_no(sb, 0); if (test_opt(sb, DEBUG)) printk(KERN_DEBUG "EXT4-fs: extended group to %llu " "blocks\n", ext4_blocks_count(es)); - update_backups(sb, EXT4_SB(sb)->s_sbh->b_blocknr - first_block, + update_backups(sb, EXT4_SB(sb)->s_sbh->b_blocknr, (char *)es, sizeof(struct ext4_super_block), 0); } return err; From 5196bcf9844862ad05b2dfe825422e95a519e514 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 1 Jul 2013 08:12:38 -0400 Subject: [PATCH 0029/1368] ext3,ext4: don't mess with dir_file->f_pos in htree_dirblock_to_tree() commit 64cb927371cd2ec43758d8a094a003d27bc3d0dc upstream. Both ext3 and ext4 htree_dirblock_to_tree() is just filling the in-core rbtree for use by call_filldir(). All updates of ->f_pos are done by the latter; bumping it here (on error) is obviously wrong - we might very well have it nowhere near the block we'd found an error in. Signed-off-by: Al Viro Signed-off-by: "Theodore Ts'o" Signed-off-by: Greg Kroah-Hartman --- fs/ext3/namei.c | 7 ++----- fs/ext4/namei.c | 7 ++----- 2 files changed, 4 insertions(+), 10 deletions(-) diff --git a/fs/ext3/namei.c b/fs/ext3/namei.c index 692de13e3596..cea8ecf3e76e 100644 --- a/fs/ext3/namei.c +++ b/fs/ext3/namei.c @@ -576,11 +576,8 @@ static int htree_dirblock_to_tree(struct file *dir_file, if (!ext3_check_dir_entry("htree_dirblock_to_tree", dir, de, bh, (block<i_sb)) +((char *)de - bh->b_data))) { - /* On error, skip the f_pos to the next block. */ - dir_file->f_pos = (dir_file->f_pos | - (dir->i_sb->s_blocksize - 1)) + 1; - brelse (bh); - return count; + /* silently ignore the rest of the block */ + break; } ext3fs_dirhash(de->name, de->name_len, hinfo); if ((hinfo->hash < start_hash) || diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index 6653fc35ecb7..ab2f6dc44b3a 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c @@ -918,11 +918,8 @@ static int htree_dirblock_to_tree(struct file *dir_file, bh->b_data, bh->b_size, (block<i_sb)) + ((char *)de - bh->b_data))) { - /* On error, skip the f_pos to the next block. */ - dir_file->f_pos = (dir_file->f_pos | - (dir->i_sb->s_blocksize - 1)) + 1; - brelse(bh); - return count; + /* silently ignore the rest of the block */ + break; } ext4fs_dirhash(de->name, de->name_len, hinfo); if ((hinfo->hash < start_hash) || From 0529b225e14f6187cd7a997ebc438538059a7889 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Mon, 1 Jul 2013 08:12:39 -0400 Subject: [PATCH 0030/1368] ext4: check error return from ext4_write_inline_data_end() commit 42c832debbbf819f6c4ad8601baa559c44105ba4 upstream. The function ext4_write_inline_data_end() can return an error. So we need to assign it to a signed integer variable to check for an error return (since copied is an unsigned int). Signed-off-by: "Theodore Ts'o" Cc: Zheng Liu Signed-off-by: Greg Kroah-Hartman --- fs/ext4/inode.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index d6382b89ecbd..c2434f89e9ed 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -1118,10 +1118,13 @@ static int ext4_write_end(struct file *file, } } - if (ext4_has_inline_data(inode)) - copied = ext4_write_inline_data_end(inode, pos, len, - copied, page); - else + if (ext4_has_inline_data(inode)) { + ret = ext4_write_inline_data_end(inode, pos, len, + copied, page); + if (ret < 0) + goto errout; + copied = ret; + } else copied = block_write_end(file, mapping, pos, len, copied, page, fsdata); From c1d2dfed42f0d8d7eb7d3fa458f93e1e6fdb7e93 Mon Sep 17 00:00:00 2001 From: Darren Hart Date: Tue, 25 Jun 2013 18:53:22 -0700 Subject: [PATCH 0031/1368] pch_uart: Add uart_clk selection for the MinnowBoard commit 29692d05647cb7ecea56242241f77291d5624b95 upstream. Use DMI_BOARD_NAME to determine if we are running on a MinnowBoard and set the uart clock to 50MHz if so. This removes the need to pass the user_uartclk to the kernel at boot time. Signed-off-by: Darren Hart Cc: Jiri Slaby Cc: "H. Peter Anvin" Cc: Peter Waskiewicz Cc: Andy Shevchenko Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/pch_uart.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/tty/serial/pch_uart.c b/drivers/tty/serial/pch_uart.c index 21a7e179edf3..572d48189de9 100644 --- a/drivers/tty/serial/pch_uart.c +++ b/drivers/tty/serial/pch_uart.c @@ -217,6 +217,7 @@ enum { #define FRI2_64_UARTCLK 64000000 /* 64.0000 MHz */ #define FRI2_48_UARTCLK 48000000 /* 48.0000 MHz */ #define NTC1_UARTCLK 64000000 /* 64.0000 MHz */ +#define MINNOW_UARTCLK 50000000 /* 50.0000 MHz */ struct pch_uart_buffer { unsigned char *buf; @@ -398,6 +399,10 @@ static int pch_uart_get_uartclk(void) strstr(cmp, "nanoETXexpress-TT"))) return NTC1_UARTCLK; + cmp = dmi_get_system_info(DMI_BOARD_NAME); + if (cmp && strstr(cmp, "MinnowBoard")) + return MINNOW_UARTCLK; + return DEFAULT_UARTCLK; } From 859cfbcdc4374da93fc1258f18f95849c02e52c4 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Thu, 20 Jun 2013 16:07:40 -0500 Subject: [PATCH 0032/1368] USB: option,qcserial: move Novatel Gobi1K IDs to qcserial commit a254810a86aaaac4ac6ba44fa934558b042a17a7 upstream. These devices are all Gobi1K devices (according to the Windows INF files) and should be handled by qcserial instead of option. Their network port is handled by qmi_wwan. Signed-off-by: Dan Williams Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/option.c | 4 ---- drivers/usb/serial/qcserial.c | 8 +++++++- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index bd4323ddae1a..5dd857de05b0 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -159,8 +159,6 @@ static void option_instat_callback(struct urb *urb); #define NOVATELWIRELESS_PRODUCT_HSPA_EMBEDDED_FULLSPEED 0x9000 #define NOVATELWIRELESS_PRODUCT_HSPA_EMBEDDED_HIGHSPEED 0x9001 #define NOVATELWIRELESS_PRODUCT_E362 0x9010 -#define NOVATELWIRELESS_PRODUCT_G1 0xA001 -#define NOVATELWIRELESS_PRODUCT_G1_M 0xA002 #define NOVATELWIRELESS_PRODUCT_G2 0xA010 #define NOVATELWIRELESS_PRODUCT_MC551 0xB001 @@ -730,8 +728,6 @@ static const struct usb_device_id option_ids[] = { { USB_DEVICE(NOVATELWIRELESS_VENDOR_ID, NOVATELWIRELESS_PRODUCT_MC547) }, { USB_DEVICE(NOVATELWIRELESS_VENDOR_ID, NOVATELWIRELESS_PRODUCT_EVDO_EMBEDDED_HIGHSPEED) }, { USB_DEVICE(NOVATELWIRELESS_VENDOR_ID, NOVATELWIRELESS_PRODUCT_HSPA_EMBEDDED_HIGHSPEED) }, - { USB_DEVICE(NOVATELWIRELESS_VENDOR_ID, NOVATELWIRELESS_PRODUCT_G1) }, - { USB_DEVICE(NOVATELWIRELESS_VENDOR_ID, NOVATELWIRELESS_PRODUCT_G1_M) }, { USB_DEVICE(NOVATELWIRELESS_VENDOR_ID, NOVATELWIRELESS_PRODUCT_G2) }, /* Novatel Ovation MC551 a.k.a. Verizon USB551L */ { USB_DEVICE_AND_INTERFACE_INFO(NOVATELWIRELESS_VENDOR_ID, NOVATELWIRELESS_PRODUCT_MC551, 0xff, 0xff, 0xff) }, diff --git a/drivers/usb/serial/qcserial.c b/drivers/usb/serial/qcserial.c index bd794b43898c..c65437cfd4a2 100644 --- a/drivers/usb/serial/qcserial.c +++ b/drivers/usb/serial/qcserial.c @@ -35,7 +35,13 @@ static const struct usb_device_id id_table[] = { {DEVICE_G1K(0x04da, 0x250c)}, /* Panasonic Gobi QDL device */ {DEVICE_G1K(0x413c, 0x8172)}, /* Dell Gobi Modem device */ {DEVICE_G1K(0x413c, 0x8171)}, /* Dell Gobi QDL device */ - {DEVICE_G1K(0x1410, 0xa001)}, /* Novatel Gobi Modem device */ + {DEVICE_G1K(0x1410, 0xa001)}, /* Novatel/Verizon USB-1000 */ + {DEVICE_G1K(0x1410, 0xa002)}, /* Novatel Gobi Modem device */ + {DEVICE_G1K(0x1410, 0xa003)}, /* Novatel Gobi Modem device */ + {DEVICE_G1K(0x1410, 0xa004)}, /* Novatel Gobi Modem device */ + {DEVICE_G1K(0x1410, 0xa005)}, /* Novatel Gobi Modem device */ + {DEVICE_G1K(0x1410, 0xa006)}, /* Novatel Gobi Modem device */ + {DEVICE_G1K(0x1410, 0xa007)}, /* Novatel Gobi Modem device */ {DEVICE_G1K(0x1410, 0xa008)}, /* Novatel Gobi QDL device */ {DEVICE_G1K(0x0b05, 0x1776)}, /* Asus Gobi Modem device */ {DEVICE_G1K(0x0b05, 0x1774)}, /* Asus Gobi QDL device */ From f8da12fa870df0d4304a3661352d721d90bd0929 Mon Sep 17 00:00:00 2001 From: UCHINO Satoshi Date: Thu, 23 May 2013 11:10:11 +0900 Subject: [PATCH 0033/1368] usb: gadget: f_mass_storage: add missing memory barrier for thread_wakeup_needed commit d68c277b501889b3a50c179d1c3d704db7947b83 upstream. Without this memory barrier, the file-storage thread may fail to escape from the following while loop, because it may observe new common->thread_wakeup_needed and old bh->state which are updated by the callback functions. /* Wait for the CBW to arrive */ while (bh->state != BUF_STATE_FULL) { rc = sleep_thread(common); if (rc) return rc; } Signed-off-by: UCHINO Satoshi Acked-by: Michal Nazarewicz Signed-off-by: Felipe Balbi Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/f_mass_storage.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/usb/gadget/f_mass_storage.c b/drivers/usb/gadget/f_mass_storage.c index 97666e8b1b95..c35a9ecc576b 100644 --- a/drivers/usb/gadget/f_mass_storage.c +++ b/drivers/usb/gadget/f_mass_storage.c @@ -413,6 +413,7 @@ static int fsg_set_halt(struct fsg_dev *fsg, struct usb_ep *ep) /* Caller must hold fsg->lock */ static void wakeup_thread(struct fsg_common *common) { + smp_wmb(); /* ensure the write of bh->state is complete */ /* Tell the main thread that something has happened */ common->thread_wakeup_needed = 1; if (common->thread_task) @@ -632,6 +633,7 @@ static int sleep_thread(struct fsg_common *common) } __set_current_state(TASK_RUNNING); common->thread_wakeup_needed = 0; + smp_rmb(); /* ensure the latest bh->state is visible */ return rc; } From 5b3e69eafcfc490dcdd0368f680170b24686abe5 Mon Sep 17 00:00:00 2001 From: Roger Quadros Date: Fri, 14 Jun 2013 16:52:07 +0300 Subject: [PATCH 0034/1368] USB: ehci-omap: Tweak PHY initialization sequence commit 4e5c9e6fa2d232a0686d5fe45cd1508484048936 upstream. For PHY mode, the PHYs must be brought out of reset before the EHCI controller is started. This patch fixes the issue where USB devices are not found on Beagleboard/Beagle-xm if USB has been started previously by the bootloader. (e.g. by "usb start" command in u-boot) Tested on Beagleboard, Beagleboard-xm and Pandaboard. Issue present on 3.10 onwards. Reported-by: Tomi Valkeinen Tested-by: Tomi Valkeinen Signed-off-by: Roger Quadros Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/ehci-omap.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/drivers/usb/host/ehci-omap.c b/drivers/usb/host/ehci-omap.c index 16d7150e8557..dda408f2c6e9 100644 --- a/drivers/usb/host/ehci-omap.c +++ b/drivers/usb/host/ehci-omap.c @@ -187,6 +187,12 @@ static int ehci_hcd_omap_probe(struct platform_device *pdev) } omap->phy[i] = phy; + + if (pdata->port_mode[i] == OMAP_EHCI_PORT_MODE_PHY) { + usb_phy_init(omap->phy[i]); + /* bring PHY out of suspend */ + usb_phy_set_suspend(omap->phy[i], 0); + } } pm_runtime_enable(dev); @@ -211,13 +217,14 @@ static int ehci_hcd_omap_probe(struct platform_device *pdev) } /* - * Bring PHYs out of reset. + * Bring PHYs out of reset for non PHY modes. * Even though HSIC mode is a PHY-less mode, the reset * line exists between the chips and can be modelled * as a PHY device for reset control. */ for (i = 0; i < omap->nports; i++) { - if (!omap->phy[i]) + if (!omap->phy[i] || + pdata->port_mode[i] == OMAP_EHCI_PORT_MODE_PHY) continue; usb_phy_init(omap->phy[i]); From 333db1d07c454f02e008780273df44c1e3102448 Mon Sep 17 00:00:00 2001 From: Mathias Nyman Date: Mon, 17 Jun 2013 09:56:33 -0700 Subject: [PATCH 0035/1368] xhci: check for failed dma pool allocation commit 025f880cb2e4d7218d0422d4b07bea1a68959c38 upstream. Fail and free the container context in case dma_pool_alloc() can't allocate the raw context data part of it This patch should be backported to kernels as old as 2.6.31, that contain the commit d115b04818e57bdbc7ccde4d0660b15e33013dc8 "USB: xhci: Support for 64-byte contexts". Signed-off-by: Mathias Nyman Signed-off-by: Sarah Sharp Cc: John Youn Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-mem.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/usb/host/xhci-mem.c b/drivers/usb/host/xhci-mem.c index fbf75e57628b..f2e57a1112c9 100644 --- a/drivers/usb/host/xhci-mem.c +++ b/drivers/usb/host/xhci-mem.c @@ -369,6 +369,10 @@ static struct xhci_container_ctx *xhci_alloc_container_ctx(struct xhci_hcd *xhci ctx->size += CTX_SIZE(xhci->hcc_params); ctx->bytes = dma_pool_alloc(xhci->device_pool, flags, &ctx->dma); + if (!ctx->bytes) { + kfree(ctx); + return NULL; + } memset(ctx->bytes, 0, ctx->size); return ctx; } From a5ea8ca0b2d24ab8beae6b218abeb771d40ae966 Mon Sep 17 00:00:00 2001 From: George Cherian Date: Fri, 21 Jun 2013 13:59:08 +0530 Subject: [PATCH 0036/1368] usb: host: xhci-plat: release mem region while removing module commit 5388a3a5faba8dfa69e5f06c3a415d373c1a4316 upstream. Do a release_mem_region of the hcd resource. Without this the subsequent insertion of module fails in request_mem_region. Signed-off-by: George Cherian Acked-by: Felipe Balbi Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-plat.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/usb/host/xhci-plat.c b/drivers/usb/host/xhci-plat.c index df90fe51b4aa..93ad67eca053 100644 --- a/drivers/usb/host/xhci-plat.c +++ b/drivers/usb/host/xhci-plat.c @@ -179,6 +179,7 @@ static int xhci_plat_remove(struct platform_device *dev) usb_remove_hcd(hcd); iounmap(hcd->regs); + release_mem_region(hcd->rsrc_start, hcd->rsrc_len); usb_put_hcd(hcd); kfree(xhci); From 540005dcb39acd12c52d2a580a85ccfd5de76fc1 Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Tue, 18 Jun 2013 13:04:23 +0800 Subject: [PATCH 0037/1368] drivers: hv: switch to use mb() instead of smp_mb() commit 35848f68b07df3f917cb13fc3c134718669f569b upstream. Even if guest were compiled without SMP support, it could not assume that host wasn't. So switch to use mb() instead of smp_mb() to force memory barriers for UP guest. Signed-off-by: Jason Wang Cc: Haiyang Zhang Signed-off-by: K. Y. Srinivasan Signed-off-by: Greg Kroah-Hartman --- drivers/hv/ring_buffer.c | 10 +++++----- drivers/hv/vmbus_drv.c | 2 +- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/hv/ring_buffer.c b/drivers/hv/ring_buffer.c index d6fbb5772b8d..791f45dfc85d 100644 --- a/drivers/hv/ring_buffer.c +++ b/drivers/hv/ring_buffer.c @@ -32,7 +32,7 @@ void hv_begin_read(struct hv_ring_buffer_info *rbi) { rbi->ring_buffer->interrupt_mask = 1; - smp_mb(); + mb(); } u32 hv_end_read(struct hv_ring_buffer_info *rbi) @@ -41,7 +41,7 @@ u32 hv_end_read(struct hv_ring_buffer_info *rbi) u32 write; rbi->ring_buffer->interrupt_mask = 0; - smp_mb(); + mb(); /* * Now check to see if the ring buffer is still empty. @@ -71,7 +71,7 @@ u32 hv_end_read(struct hv_ring_buffer_info *rbi) static bool hv_need_to_signal(u32 old_write, struct hv_ring_buffer_info *rbi) { - smp_mb(); + mb(); if (rbi->ring_buffer->interrupt_mask) return false; @@ -442,7 +442,7 @@ int hv_ringbuffer_write(struct hv_ring_buffer_info *outring_info, sizeof(u64)); /* Issue a full memory barrier before updating the write index */ - smp_mb(); + mb(); /* Now, update the write location */ hv_set_next_write_location(outring_info, next_write_location); @@ -549,7 +549,7 @@ int hv_ringbuffer_read(struct hv_ring_buffer_info *inring_info, void *buffer, /* Make sure all reads are done before we update the read index since */ /* the writer may start writing to the read area once the read index */ /*is updated */ - smp_mb(); + mb(); /* Update the read index */ hv_set_next_read_location(inring_info, next_read_location); diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c index bf421e0efa1e..4004e54ef05d 100644 --- a/drivers/hv/vmbus_drv.c +++ b/drivers/hv/vmbus_drv.c @@ -434,7 +434,7 @@ static void vmbus_on_msg_dpc(unsigned long data) * will not deliver any more messages since there is * no empty slot */ - smp_mb(); + mb(); if (msg->header.message_flags.msg_pending) { /* From 5eb2698932989f855e3427b0860771611f831a69 Mon Sep 17 00:00:00 2001 From: Joachim Eastwood Date: Thu, 6 Jun 2013 10:24:14 +0200 Subject: [PATCH 0038/1368] pcmcia: at91_cf: fix gpio_get_value in at91_cf_get_status commit e39506b466edcda2a7e9d0174d7987ae654137b7 upstream. Commit 80af9e6d (pcmcia at91_cf: fix raw gpio number usage) forgot to change the parameter in gpio_get_value after adding gpio validation. Signed-off-by: Joachim Eastwood Signed-off-by: Nicolas Ferre Acked-by: Jean-Christophe PLAGNIOL-VILLARD Signed-off-by: Greg Kroah-Hartman --- drivers/pcmcia/at91_cf.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/pcmcia/at91_cf.c b/drivers/pcmcia/at91_cf.c index 01463c781847..1b2c6317c772 100644 --- a/drivers/pcmcia/at91_cf.c +++ b/drivers/pcmcia/at91_cf.c @@ -100,9 +100,9 @@ static int at91_cf_get_status(struct pcmcia_socket *s, u_int *sp) int vcc = gpio_is_valid(cf->board->vcc_pin); *sp = SS_DETECT | SS_3VCARD; - if (!rdy || gpio_get_value(rdy)) + if (!rdy || gpio_get_value(cf->board->irq_pin)) *sp |= SS_READY; - if (!vcc || gpio_get_value(vcc)) + if (!vcc || gpio_get_value(cf->board->vcc_pin)) *sp |= SS_POWERON; } else *sp = 0; From 83d0eb79752482bb888fb6d86ceed8971272f8b4 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Tue, 18 Jun 2013 18:41:10 +0800 Subject: [PATCH 0039/1368] cgroup: fix umount vs cgroup_event_remove() race commit 1c8158eeae0f37d0eee9f1fbe68080df6a408df2 upstream. commit 5db9a4d99b0157a513944e9a44d29c9cec2e91dc Author: Tejun Heo Date: Sat Jul 7 16:08:18 2012 -0700 cgroup: fix cgroup hierarchy umount race This commit fixed a race caused by the dput() in css_dput_fn(), but the dput() in cgroup_event_remove() can also lead to the same BUG(). Signed-off-by: Li Zefan Signed-off-by: Tejun Heo Signed-off-by: Greg Kroah-Hartman --- kernel/cgroup.c | 25 +++++++++++++++++++------ 1 file changed, 19 insertions(+), 6 deletions(-) diff --git a/kernel/cgroup.c b/kernel/cgroup.c index a7c9e6ddb979..c6e77ef2a0a6 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -3726,6 +3726,23 @@ static int cgroup_write_notify_on_release(struct cgroup *cgrp, return 0; } +/* + * When dput() is called asynchronously, if umount has been done and + * then deactivate_super() in cgroup_free_fn() kills the superblock, + * there's a small window that vfs will see the root dentry with non-zero + * refcnt and trigger BUG(). + * + * That's why we hold a reference before dput() and drop it right after. + */ +static void cgroup_dput(struct cgroup *cgrp) +{ + struct super_block *sb = cgrp->root->sb; + + atomic_inc(&sb->s_active); + dput(cgrp->dentry); + deactivate_super(sb); +} + /* * Unregister event and free resources. * @@ -3746,7 +3763,7 @@ static void cgroup_event_remove(struct work_struct *work) eventfd_ctx_put(event->eventfd); kfree(event); - dput(cgrp->dentry); + cgroup_dput(cgrp); } /* @@ -4031,12 +4048,8 @@ static void css_dput_fn(struct work_struct *work) { struct cgroup_subsys_state *css = container_of(work, struct cgroup_subsys_state, dput_work); - struct dentry *dentry = css->cgroup->dentry; - struct super_block *sb = dentry->d_sb; - atomic_inc(&sb->s_active); - dput(dentry); - deactivate_super(sb); + cgroup_dput(css->cgroup); } static void init_cgroup_css(struct cgroup_subsys_state *css, From b6891ed4e66b65e5d6bb36964af0d65a08590018 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 25 Jun 2013 11:48:32 -0700 Subject: [PATCH 0040/1368] cgroup: fix RCU accesses to task->cgroups commit 14611e51a57df10240817d8ada510842faf0ec51 upstream. task->cgroups is a RCU pointer pointing to struct css_set. A task switches to a different css_set on cgroup migration but a css_set doesn't change once created and its pointers to cgroup_subsys_states aren't RCU protected. task_subsys_state[_check]() is the macro to acquire css given a task and subsys_id pair. It RCU-dereferences task->cgroups->subsys[] not task->cgroups, so the RCU pointer task->cgroups ends up being dereferenced without read_barrier_depends() after it. It's broken. Fix it by introducing task_css_set[_check]() which does RCU-dereference on task->cgroups. task_subsys_state[_check]() is reimplemented to directly dereference ->subsys[] of the css_set returned from task_css_set[_check](). This removes some of sparse RCU warnings in cgroup. v2: Fixed unbalanced parenthsis and there's no need to use rcu_dereference_raw() when !CONFIG_PROVE_RCU. Both spotted by Li. Signed-off-by: Tejun Heo Reported-by: Fengguang Wu Acked-by: Li Zefan Signed-off-by: Greg Kroah-Hartman --- include/linux/cgroup.h | 58 ++++++++++++++++++++++++++++++++++-------- 1 file changed, 48 insertions(+), 10 deletions(-) diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index 8bda1294c035..8852d370c720 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -646,22 +646,60 @@ static inline struct cgroup_subsys_state *cgroup_subsys_state( return cgrp->subsys[subsys_id]; } -/* - * function to get the cgroup_subsys_state which allows for extra - * rcu_dereference_check() conditions, such as locks used during the - * cgroup_subsys::attach() methods. +/** + * task_css_set_check - obtain a task's css_set with extra access conditions + * @task: the task to obtain css_set for + * @__c: extra condition expression to be passed to rcu_dereference_check() + * + * A task's css_set is RCU protected, initialized and exited while holding + * task_lock(), and can only be modified while holding both cgroup_mutex + * and task_lock() while the task is alive. This macro verifies that the + * caller is inside proper critical section and returns @task's css_set. + * + * The caller can also specify additional allowed conditions via @__c, such + * as locks used during the cgroup_subsys::attach() methods. */ #ifdef CONFIG_PROVE_RCU extern struct mutex cgroup_mutex; -#define task_subsys_state_check(task, subsys_id, __c) \ - rcu_dereference_check((task)->cgroups->subsys[(subsys_id)], \ - lockdep_is_held(&(task)->alloc_lock) || \ - lockdep_is_held(&cgroup_mutex) || (__c)) +#define task_css_set_check(task, __c) \ + rcu_dereference_check((task)->cgroups, \ + lockdep_is_held(&(task)->alloc_lock) || \ + lockdep_is_held(&cgroup_mutex) || (__c)) #else -#define task_subsys_state_check(task, subsys_id, __c) \ - rcu_dereference((task)->cgroups->subsys[(subsys_id)]) +#define task_css_set_check(task, __c) \ + rcu_dereference((task)->cgroups) #endif +/** + * task_subsys_state_check - obtain css for (task, subsys) w/ extra access conds + * @task: the target task + * @subsys_id: the target subsystem ID + * @__c: extra condition expression to be passed to rcu_dereference_check() + * + * Return the cgroup_subsys_state for the (@task, @subsys_id) pair. The + * synchronization rules are the same as task_css_set_check(). + */ +#define task_subsys_state_check(task, subsys_id, __c) \ + task_css_set_check((task), (__c))->subsys[(subsys_id)] + +/** + * task_css_set - obtain a task's css_set + * @task: the task to obtain css_set for + * + * See task_css_set_check(). + */ +static inline struct css_set *task_css_set(struct task_struct *task) +{ + return task_css_set_check(task, false); +} + +/** + * task_subsys_state - obtain css for (task, subsys) + * @task: the target task + * @subsys_id: the target subsystem ID + * + * See task_subsys_state_check(). + */ static inline struct cgroup_subsys_state * task_subsys_state(struct task_struct *task, int subsys_id) { From fa8223db1d69e6de9c7c7210804e8bb284067954 Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Sat, 29 Jun 2013 13:24:16 +0200 Subject: [PATCH 0041/1368] parisc: document the shadow registers commit a83f58bcb24003b9de2364de7c829a263423ead7 upstream. Signed-off-by: Helge Deller Signed-off-by: Greg Kroah-Hartman --- Documentation/parisc/registers | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/Documentation/parisc/registers b/Documentation/parisc/registers index dd3caddd1ad9..10c7d1730f5d 100644 --- a/Documentation/parisc/registers +++ b/Documentation/parisc/registers @@ -77,6 +77,14 @@ PSW default E value 0 Shadow Registers used by interruption handler code TOC enable bit 1 +========================================================================= + +The PA-RISC architecture defines 7 registers as "shadow registers". +Those are used in RETURN FROM INTERRUPTION AND RESTORE instruction to reduce +the state save and restore time by eliminating the need for general register +(GR) saves and restores in interruption handlers. +Shadow registers are the GRs 1, 8, 9, 16, 17, 24, and 25. + ========================================================================= Register usage notes, originally from John Marvin, with some additional notes from Randolph Chung. From 8e700e9586394b9769fd93293482e9a950e24ba4 Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Thu, 4 Jul 2013 22:34:11 +0200 Subject: [PATCH 0042/1368] parisc: Fix gcc miscompilation in pa_memcpy() commit 5b879d78bc0818aa710f5d4d9abbfc2aca075cc3 upstream. When running the LTP testsuite one may hit this kernel BUG() with the write06 testcase: kernel BUG at mm/filemap.c:2023! CPU: 1 PID: 8614 Comm: writev01 Not tainted 3.10.0-rc7-64bit-c3000+ #6 IASQ: 0000000000000000 0000000000000000 IAOQ: 00000000401e6e84 00000000401e6e88 IIR: 03ffe01f ISR: 0000000010340000 IOR: 000001fbe0380820 CPU: 1 CR30: 00000000bef80000 CR31: ffffffffffffffff ORIG_R28: 00000000bdc192c0 IAOQ[0]: iov_iter_advance+0x3c/0xc0 IAOQ[1]: iov_iter_advance+0x40/0xc0 RP(r2): generic_file_buffered_write+0x204/0x3f0 Backtrace: [<00000000401e764c>] generic_file_buffered_write+0x204/0x3f0 [<00000000401eab24>] __generic_file_aio_write+0x244/0x448 [<00000000401eadc0>] generic_file_aio_write+0x98/0x150 [<000000004024f460>] do_sync_readv_writev+0xc0/0x130 [<000000004025037c>] compat_do_readv_writev+0x12c/0x340 [<00000000402505f8>] compat_writev+0x68/0xa0 [<0000000040251d88>] compat_SyS_writev+0x98/0xf8 Reason for this crash is a gcc miscompilation in the fault handlers of pa_memcpy() which return the fault address instead of the copied bytes. Since this seems to be a generic problem with gcc-4.7.x (and below), it's better to simplify the fault handlers in pa_memcpy to avoid this problem. Here is a simple reproducer for the problem: int main(int argc, char **argv) { int fd, nbytes; struct iovec wr_iovec[] = { { "TEST STRING ",32}, { (char*)0x40005000,32} }; // random memory. fd = open(DATA_FILE, O_RDWR | O_CREAT, 0666); nbytes = writev(fd, wr_iovec, 2); printf("return value = %d, errno %d (%s)\n", nbytes, errno, strerror(errno)); return 0; } In addition, John David Anglin wrote: There is no gcc PR as pa_memcpy is not legitimate C code. There is an implicit assumption that certain variables will contain correct values when an exception occurs and the code randomly jumps to one of the exception blocks. There is no guarantee of this. If a PR was filed, it would likely be marked as invalid. Signed-off-by: Helge Deller Signed-off-by: John David Anglin Signed-off-by: Helge Deller Signed-off-by: Greg Kroah-Hartman --- arch/parisc/lib/memcpy.c | 79 +++++++++++++++++++++++++--------------- 1 file changed, 50 insertions(+), 29 deletions(-) diff --git a/arch/parisc/lib/memcpy.c b/arch/parisc/lib/memcpy.c index a49cc812df8a..ac4370b1ca40 100644 --- a/arch/parisc/lib/memcpy.c +++ b/arch/parisc/lib/memcpy.c @@ -2,6 +2,7 @@ * Optimized memory copy routines. * * Copyright (C) 2004 Randolph Chung + * Copyright (C) 2013 Helge Deller * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -153,17 +154,21 @@ static inline void prefetch_dst(const void *addr) #define prefetch_dst(addr) do { } while(0) #endif +#define PA_MEMCPY_OK 0 +#define PA_MEMCPY_LOAD_ERROR 1 +#define PA_MEMCPY_STORE_ERROR 2 + /* Copy from a not-aligned src to an aligned dst, using shifts. Handles 4 words * per loop. This code is derived from glibc. */ -static inline unsigned long copy_dstaligned(unsigned long dst, unsigned long src, unsigned long len, unsigned long o_dst, unsigned long o_src, unsigned long o_len) +static inline unsigned long copy_dstaligned(unsigned long dst, + unsigned long src, unsigned long len) { /* gcc complains that a2 and a3 may be uninitialized, but actually * they cannot be. Initialize a2/a3 to shut gcc up. */ register unsigned int a0, a1, a2 = 0, a3 = 0; int sh_1, sh_2; - struct exception_data *d; /* prefetch_src((const void *)src); */ @@ -197,7 +202,7 @@ static inline unsigned long copy_dstaligned(unsigned long dst, unsigned long src goto do2; case 0: if (len == 0) - return 0; + return PA_MEMCPY_OK; /* a3 = ((unsigned int *) src)[0]; a0 = ((unsigned int *) src)[1]; */ ldw(s_space, 0, src, a3, cda_ldw_exc); @@ -256,42 +261,35 @@ static inline unsigned long copy_dstaligned(unsigned long dst, unsigned long src preserve_branch(handle_load_error); preserve_branch(handle_store_error); - return 0; + return PA_MEMCPY_OK; handle_load_error: __asm__ __volatile__ ("cda_ldw_exc:\n"); - d = &__get_cpu_var(exception_data); - DPRINTF("cda_ldw_exc: o_len=%lu fault_addr=%lu o_src=%lu ret=%lu\n", - o_len, d->fault_addr, o_src, o_len - d->fault_addr + o_src); - return o_len * 4 - d->fault_addr + o_src; + return PA_MEMCPY_LOAD_ERROR; handle_store_error: __asm__ __volatile__ ("cda_stw_exc:\n"); - d = &__get_cpu_var(exception_data); - DPRINTF("cda_stw_exc: o_len=%lu fault_addr=%lu o_dst=%lu ret=%lu\n", - o_len, d->fault_addr, o_dst, o_len - d->fault_addr + o_dst); - return o_len * 4 - d->fault_addr + o_dst; + return PA_MEMCPY_STORE_ERROR; } -/* Returns 0 for success, otherwise, returns number of bytes not transferred. */ -static unsigned long pa_memcpy(void *dstp, const void *srcp, unsigned long len) +/* Returns PA_MEMCPY_OK, PA_MEMCPY_LOAD_ERROR or PA_MEMCPY_STORE_ERROR. + * In case of an access fault the faulty address can be read from the per_cpu + * exception data struct. */ +static unsigned long pa_memcpy_internal(void *dstp, const void *srcp, + unsigned long len) { register unsigned long src, dst, t1, t2, t3; register unsigned char *pcs, *pcd; register unsigned int *pws, *pwd; register double *pds, *pdd; - unsigned long ret = 0; - unsigned long o_dst, o_src, o_len; - struct exception_data *d; + unsigned long ret; src = (unsigned long)srcp; dst = (unsigned long)dstp; pcs = (unsigned char *)srcp; pcd = (unsigned char *)dstp; - o_dst = dst; o_src = src; o_len = len; - /* prefetch_src((const void *)srcp); */ if (len < THRESHOLD) @@ -401,7 +399,7 @@ static unsigned long pa_memcpy(void *dstp, const void *srcp, unsigned long len) len--; } - return 0; + return PA_MEMCPY_OK; unaligned_copy: /* possibly we are aligned on a word, but not on a double... */ @@ -438,8 +436,7 @@ static unsigned long pa_memcpy(void *dstp, const void *srcp, unsigned long len) src = (unsigned long)pcs; } - ret = copy_dstaligned(dst, src, len / sizeof(unsigned int), - o_dst, o_src, o_len); + ret = copy_dstaligned(dst, src, len / sizeof(unsigned int)); if (ret) return ret; @@ -454,17 +451,41 @@ static unsigned long pa_memcpy(void *dstp, const void *srcp, unsigned long len) handle_load_error: __asm__ __volatile__ ("pmc_load_exc:\n"); - d = &__get_cpu_var(exception_data); - DPRINTF("pmc_load_exc: o_len=%lu fault_addr=%lu o_src=%lu ret=%lu\n", - o_len, d->fault_addr, o_src, o_len - d->fault_addr + o_src); - return o_len - d->fault_addr + o_src; + return PA_MEMCPY_LOAD_ERROR; handle_store_error: __asm__ __volatile__ ("pmc_store_exc:\n"); + return PA_MEMCPY_STORE_ERROR; +} + + +/* Returns 0 for success, otherwise, returns number of bytes not transferred. */ +static unsigned long pa_memcpy(void *dstp, const void *srcp, unsigned long len) +{ + unsigned long ret, fault_addr, reference; + struct exception_data *d; + + ret = pa_memcpy_internal(dstp, srcp, len); + if (likely(ret == PA_MEMCPY_OK)) + return 0; + + /* if a load or store fault occured we can get the faulty addr */ d = &__get_cpu_var(exception_data); - DPRINTF("pmc_store_exc: o_len=%lu fault_addr=%lu o_dst=%lu ret=%lu\n", - o_len, d->fault_addr, o_dst, o_len - d->fault_addr + o_dst); - return o_len - d->fault_addr + o_dst; + fault_addr = d->fault_addr; + + /* error in load or store? */ + if (ret == PA_MEMCPY_LOAD_ERROR) + reference = (unsigned long) srcp; + else + reference = (unsigned long) dstp; + + DPRINTF("pa_memcpy: fault type = %lu, len=%lu fault_addr=%lu ref=%lu\n", + ret, len, fault_addr, reference); + + if (fault_addr >= reference) + return len - (fault_addr - reference); + else + return len; } #ifdef __KERNEL__ From 002b98a8495089379af21689688ad575d90bd8d4 Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Tue, 2 Jul 2013 12:12:10 -0700 Subject: [PATCH 0043/1368] slab: fix init_lock_keys commit 0f8f8094d28eb53368ac09186ea6b3a324cc7d44 upstream. Some architectures (e.g. powerpc built with CONFIG_PPC_256K_PAGES=y CONFIG_FORCE_MAX_ZONEORDER=11) get PAGE_SHIFT + MAX_ORDER > 26. In 3.10 kernels, CONFIG_LOCKDEP=y with PAGE_SHIFT + MAX_ORDER > 26 makes init_lock_keys() dereference beyond kmalloc_caches[26]. This leads to an unbootable system (kernel panic at initializing SLAB) if one of kmalloc_caches[26...PAGE_SHIFT+MAX_ORDER-1] is not NULL. Fix this by making sure that init_lock_keys() does not dereference beyond kmalloc_caches[26] arrays. Signed-off-by: Christoph Lameter Reported-by: Tetsuo Handa Cc: Pekka Enberg Signed-off-by: Andrew Morton Signed-off-by: Pekka Enberg Signed-off-by: Greg Kroah-Hartman --- mm/slab.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/slab.c b/mm/slab.c index 8ccd296c6d9c..bd88411595b9 100644 --- a/mm/slab.c +++ b/mm/slab.c @@ -565,7 +565,7 @@ static void init_node_lock_keys(int q) if (slab_state < UP) return; - for (i = 1; i < PAGE_SHIFT + MAX_ORDER; i++) { + for (i = 1; i <= KMALLOC_SHIFT_HIGH; i++) { struct kmem_cache_node *n; struct kmem_cache *cache = kmalloc_caches[i]; From ad5def802da90a988d243d8426478bf23f54aad0 Mon Sep 17 00:00:00 2001 From: John David Anglin Date: Sat, 29 Jun 2013 16:42:12 -0400 Subject: [PATCH 0044/1368] parisc: Ensure volatile space register %sr1 is not clobbered commit e8d8fc219f9a0e63e7fb927881e6f4db8e7d34df upstream. I still see the occasional random segv on rp3440. Looking at one of these (a code 15), it appeared the problem must be with the cache handling of anonymous pages. Reviewing this, I noticed that the space register %sr1 might be being clobbered when we flush an anonymous page. Register %sr1 is used for TLB purges in a couple of places. These purges are needed on PA8800 and PA8900 processors to ensure cache consistency of flushed cache lines. The solution here is simply to move the %sr1 load into the TLB lock region needed to ensure that one purge executes at a time on SMP systems. This was already the case for one use. After a few days of operation, I haven't had a random segv on my rp3440. Signed-off-by: John David Anglin Signed-off-by: Helge Deller Signed-off-by: Greg Kroah-Hartman --- arch/parisc/include/asm/tlbflush.h | 5 +++-- arch/parisc/kernel/cache.c | 2 +- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/arch/parisc/include/asm/tlbflush.h b/arch/parisc/include/asm/tlbflush.h index 5273da991e06..9d086a599fa0 100644 --- a/arch/parisc/include/asm/tlbflush.h +++ b/arch/parisc/include/asm/tlbflush.h @@ -63,13 +63,14 @@ static inline void flush_tlb_mm(struct mm_struct *mm) static inline void flush_tlb_page(struct vm_area_struct *vma, unsigned long addr) { - unsigned long flags; + unsigned long flags, sid; /* For one page, it's not worth testing the split_tlb variable */ mb(); - mtsp(vma->vm_mm->context,1); + sid = vma->vm_mm->context; purge_tlb_start(flags); + mtsp(sid, 1); pdtlb(addr); pitlb(addr); purge_tlb_end(flags); diff --git a/arch/parisc/kernel/cache.c b/arch/parisc/kernel/cache.c index 65fb4cbc3a0f..2e65aa54bd10 100644 --- a/arch/parisc/kernel/cache.c +++ b/arch/parisc/kernel/cache.c @@ -440,8 +440,8 @@ void __flush_tlb_range(unsigned long sid, unsigned long start, else { unsigned long flags; - mtsp(sid, 1); purge_tlb_start(flags); + mtsp(sid, 1); if (split_tlb) { while (npages--) { pdtlb(start); From 286f3bf473da48489bb539c08088fbb8dc64a35c Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Tue, 18 Jun 2013 23:21:25 +0200 Subject: [PATCH 0045/1368] parisc: fix LMMIO mismatch between PAT length and MASK register commit dac76f1be5beaea4af9afe85fb475c73de0b8731 upstream. The LMMIO length reported by PAT and the length given by the LBA MASK register are not consistent. This leads e.g. to a not-working ATI FireGL card with the radeon DRM driver since the memory can't be mapped. Fix this by correctly adjusting the resource sizes. Signed-off-by: Helge Deller Signed-off-by: Greg Kroah-Hartman --- drivers/parisc/lba_pci.c | 56 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 56 insertions(+) diff --git a/drivers/parisc/lba_pci.c b/drivers/parisc/lba_pci.c index 1f05913ae677..19f6f70c67d3 100644 --- a/drivers/parisc/lba_pci.c +++ b/drivers/parisc/lba_pci.c @@ -613,6 +613,54 @@ truncate_pat_collision(struct resource *root, struct resource *new) return 0; /* truncation successful */ } +/* + * extend_lmmio_len: extend lmmio range to maximum length + * + * This is needed at least on C8000 systems to get the ATI FireGL card + * working. On other systems we will currently not extend the lmmio space. + */ +static unsigned long +extend_lmmio_len(unsigned long start, unsigned long end, unsigned long lba_len) +{ + struct resource *tmp; + + pr_debug("LMMIO mismatch: PAT length = 0x%lx, MASK register = 0x%lx\n", + end - start, lba_len); + + lba_len = min(lba_len+1, 256UL*1024*1024); /* limit to 256 MB */ + + pr_debug("LBA: lmmio_space [0x%lx-0x%lx] - original\n", start, end); + + if (boot_cpu_data.cpu_type < mako) { + pr_info("LBA: Not a C8000 system - not extending LMMIO range.\n"); + return end; + } + + end += lba_len; + if (end < start) /* fix overflow */ + end = -1ULL; + + pr_debug("LBA: lmmio_space [0x%lx-0x%lx] - current\n", start, end); + + /* first overlap */ + for (tmp = iomem_resource.child; tmp; tmp = tmp->sibling) { + pr_debug("LBA: testing %pR\n", tmp); + if (tmp->start == start) + continue; /* ignore ourself */ + if (tmp->end < start) + continue; + if (tmp->start > end) + continue; + if (end >= tmp->start) + end = tmp->start - 1; + } + + pr_info("LBA: lmmio_space [0x%lx-0x%lx] - new\n", start, end); + + /* return new end */ + return end; +} + #else #define truncate_pat_collision(r,n) (0) #endif @@ -994,6 +1042,14 @@ lba_pat_resources(struct parisc_device *pa_dev, struct lba_device *lba_dev) case PAT_LMMIO: /* used to fix up pre-initialized MEM BARs */ if (!lba_dev->hba.lmmio_space.flags) { + unsigned long lba_len; + + lba_len = ~READ_REG32(lba_dev->hba.base_addr + + LBA_LMMIO_MASK); + if ((p->end - p->start) != lba_len) + p->end = extend_lmmio_len(p->start, + p->end, lba_len); + sprintf(lba_dev->hba.lmmio_name, "PCI%02x LMMIO", (int)lba_dev->hba.bus_num.start); From a2ebe4443fd55755cae5834e3f3021d3572cc463 Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Sat, 29 Jun 2013 22:08:03 +0200 Subject: [PATCH 0046/1368] parisc: optimize mtsp(0,sr) inline assembly commit 92b59929825d67db575043a76651865d16873b36 upstream. If the value which should be moved into a space register is zero, we can optimize the inline assembly to become "mtsp %r0,%srX". Signed-off-by: Helge Deller Signed-off-by: Greg Kroah-Hartman --- arch/parisc/include/asm/special_insns.h | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/arch/parisc/include/asm/special_insns.h b/arch/parisc/include/asm/special_insns.h index d306b75bc77f..e1509308899f 100644 --- a/arch/parisc/include/asm/special_insns.h +++ b/arch/parisc/include/asm/special_insns.h @@ -32,9 +32,12 @@ static inline void set_eiem(unsigned long val) cr; \ }) -#define mtsp(gr, cr) \ - __asm__ __volatile__("mtsp %0,%1" \ +#define mtsp(val, cr) \ + { if (__builtin_constant_p(val) && ((val) == 0)) \ + __asm__ __volatile__("mtsp %%r0,%0" : : "i" (cr) : "memory"); \ + else \ + __asm__ __volatile__("mtsp %0,%1" \ : /* no outputs */ \ - : "r" (gr), "i" (cr) : "memory") + : "r" (val), "i" (cr) : "memory"); } #endif /* __PARISC_SPECIAL_INSNS_H */ From 587562a77c7ab453d16f6b6bb02951bd66c44982 Mon Sep 17 00:00:00 2001 From: Zach Bobroff Date: Fri, 7 Jun 2013 13:02:50 +0100 Subject: [PATCH 0047/1368] x86, efi: retry ExitBootServices() on failure commit d3768d885c6ccbf8a137276843177d76c49033a7 upstream. ExitBootServices is absolutely supposed to return a failure if any ExitBootServices event handler changes the memory map. Basically the get_map loop should run again if ExitBootServices returns an error the first time. I would say it would be fair that if ExitBootServices gives an error the second time then Linux would be fine in returning control back to BIOS. The second change is the following line: again: size += sizeof(*mem_map) * 2; Originally you were incrementing it by the size of one memory map entry. The issue here is all related to the low_alloc routine you are using. In this routine you are making allocations to get the memory map itself. Doing this allocation or allocations can affect the memory map by more than one record. [ mfleming - changelog, code style ] Signed-off-by: Zach Bobroff Signed-off-by: Matt Fleming Signed-off-by: Greg Kroah-Hartman --- arch/x86/boot/compressed/eboot.c | 20 +++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) diff --git a/arch/x86/boot/compressed/eboot.c b/arch/x86/boot/compressed/eboot.c index c205035a6b96..d606463aa6d6 100644 --- a/arch/x86/boot/compressed/eboot.c +++ b/arch/x86/boot/compressed/eboot.c @@ -992,18 +992,20 @@ static efi_status_t exit_boot(struct boot_params *boot_params, efi_memory_desc_t *mem_map; efi_status_t status; __u32 desc_version; + bool called_exit = false; u8 nr_entries; int i; size = sizeof(*mem_map) * 32; again: - size += sizeof(*mem_map); + size += sizeof(*mem_map) * 2; _size = size; status = low_alloc(size, 1, (unsigned long *)&mem_map); if (status != EFI_SUCCESS) return status; +get_map: status = efi_call_phys5(sys_table->boottime->get_memory_map, &size, mem_map, &key, &desc_size, &desc_version); if (status == EFI_BUFFER_TOO_SMALL) { @@ -1029,8 +1031,20 @@ static efi_status_t exit_boot(struct boot_params *boot_params, /* Might as well exit boot services now */ status = efi_call_phys2(sys_table->boottime->exit_boot_services, handle, key); - if (status != EFI_SUCCESS) - goto free_mem_map; + if (status != EFI_SUCCESS) { + /* + * ExitBootServices() will fail if any of the event + * handlers change the memory map. In which case, we + * must be prepared to retry, but only once so that + * we're guaranteed to exit on repeated failures instead + * of spinning forever. + */ + if (called_exit) + goto free_mem_map; + + called_exit = true; + goto get_map; + } /* Historic? */ boot_params->alt_mem_k = 32 * 1024; From ac9349c428f94f636e3ca4d5d92cf048dd819518 Mon Sep 17 00:00:00 2001 From: Laszlo Ersek Date: Tue, 18 Oct 2011 22:42:59 +0200 Subject: [PATCH 0048/1368] xen/time: remove blocked time accounting from xen "clockchip" commit 0b0c002c340e78173789f8afaa508070d838cf3d upstream. ... because the "clock_event_device framework" already accounts for idle time through the "event_handler" function pointer in xen_timer_interrupt(). The patch is intended as the completion of [1]. It should fix the double idle times seen in PV guests' /proc/stat [2]. It should be orthogonal to stolen time accounting (the removed code seems to be isolated). The approach may be completely misguided. [1] https://lkml.org/lkml/2011/10/6/10 [2] http://lists.xensource.com/archives/html/xen-devel/2010-08/msg01068.html John took the time to retest this patch on top of v3.10 and reported: "idle time is correctly incremented for pv and hvm for the normal case, nohz=off and nohz=idle." so lets put this patch in. Signed-off-by: Laszlo Ersek Signed-off-by: John Haxby Signed-off-by: Konrad Rzeszutek Wilk Signed-off-by: Greg Kroah-Hartman --- arch/x86/xen/time.c | 17 ++--------------- 1 file changed, 2 insertions(+), 15 deletions(-) diff --git a/arch/x86/xen/time.c b/arch/x86/xen/time.c index 3d88bfdf9e1c..13e8935e2eab 100644 --- a/arch/x86/xen/time.c +++ b/arch/x86/xen/time.c @@ -36,9 +36,8 @@ static DEFINE_PER_CPU(struct vcpu_runstate_info, xen_runstate); /* snapshots of runstate info */ static DEFINE_PER_CPU(struct vcpu_runstate_info, xen_runstate_snapshot); -/* unused ns of stolen and blocked time */ +/* unused ns of stolen time */ static DEFINE_PER_CPU(u64, xen_residual_stolen); -static DEFINE_PER_CPU(u64, xen_residual_blocked); /* return an consistent snapshot of 64-bit time/counter value */ static u64 get64(const u64 *p) @@ -115,7 +114,7 @@ static void do_stolen_accounting(void) { struct vcpu_runstate_info state; struct vcpu_runstate_info *snap; - s64 blocked, runnable, offline, stolen; + s64 runnable, offline, stolen; cputime_t ticks; get_runstate_snapshot(&state); @@ -125,7 +124,6 @@ static void do_stolen_accounting(void) snap = &__get_cpu_var(xen_runstate_snapshot); /* work out how much time the VCPU has not been runn*ing* */ - blocked = state.time[RUNSTATE_blocked] - snap->time[RUNSTATE_blocked]; runnable = state.time[RUNSTATE_runnable] - snap->time[RUNSTATE_runnable]; offline = state.time[RUNSTATE_offline] - snap->time[RUNSTATE_offline]; @@ -141,17 +139,6 @@ static void do_stolen_accounting(void) ticks = iter_div_u64_rem(stolen, NS_PER_TICK, &stolen); __this_cpu_write(xen_residual_stolen, stolen); account_steal_ticks(ticks); - - /* Add the appropriate number of ticks of blocked time, - including any left-overs from last time. */ - blocked += __this_cpu_read(xen_residual_blocked); - - if (blocked < 0) - blocked = 0; - - ticks = iter_div_u64_rem(blocked, NS_PER_TICK, &blocked); - __this_cpu_write(xen_residual_blocked, blocked); - account_idle_ticks(ticks); } /* Get the TSC speed from Xen */ From 9ceb896c679bf0fb6c8c968cf006ec8593052f37 Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Mon, 10 Jun 2013 16:48:09 -0400 Subject: [PATCH 0049/1368] xen/pcifront: Deal with toolstack missing 'XenbusStateClosing' state. commit 098b1aeaf4d6149953b8f1f8d55c21d85536fbff upstream. There are two tool-stack that can instruct the Xen PCI frontend and backend to change states: 'xm' (Python code with a daemon), and 'xl' (C library - does not keep state changes). With the 'xm', the path to disconnect a single PCI device (xm pci-detach ) is: 4(Connected)->7(Reconfiguring*)-> 8(Reconfigured)-> 4(Connected)->5(Closing*). The * is for states that the tool-stack sets. For 'xl', it is similar: 4(Connected)->7(Reconfiguring*)-> 8(Reconfigured)-> 4(Connected) Both of them also tear down the XenBus structure, so the backend state ends up going in the 3(Initialised) and calls pcifront_xenbus_remove. When a PCI device is plugged back in (xm pci-attach ) both of them follow the same pattern: 2(InitWait*), 3(Initialized*), 4(Connected*)->4(Connected). [xen-pcifront ignores the 2,3 state changes and only acts when 4 (Connected) has been reached] Note that this is for a _single_ PCI device. If there were two PCI devices and only one was disconnected 'xm' would show the same state changes. The problem is that git commit 3d925320e9e2de162bd138bf97816bda8c3f71be ("xen/pcifront: Use Xen-SWIOTLB when initting if required") introduced a mechanism to initialize the SWIOTLB when the Xen PCI front moves to Connected state. It also had some aggressive seatbelt code check that would warn the user if one tried to change to Connected state without hitting first the Closing state: pcifront pci-0: PCI frontend already installed! However, that code can be relaxed and we can continue on working even if the frontend is instructed to be the 'Connected' state with no devices and then gets tickled to be in 'Connected' state again. In other words, this 4(Connected)->5(Closing)->4(Connected) state was expected, while 4(Connected)->.... anything but 5(Closing)->4(Connected) was not. This patch removes that aggressive check and allows Xen pcifront to work with the 'xl' toolstack (for one or more PCI devices) and with 'xm' toolstack (for more than two PCI devices). Acked-by: Bjorn Helgaas Cc: linux-pci@vger.kernel.org [v2: Added in the description about two PCI devices] Signed-off-by: Konrad Rzeszutek Wilk Signed-off-by: Greg Kroah-Hartman --- drivers/pci/xen-pcifront.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/pci/xen-pcifront.c b/drivers/pci/xen-pcifront.c index 966abc6054d7..f7197a790341 100644 --- a/drivers/pci/xen-pcifront.c +++ b/drivers/pci/xen-pcifront.c @@ -678,10 +678,9 @@ static int pcifront_connect_and_init_dma(struct pcifront_device *pdev) if (!pcifront_dev) { dev_info(&pdev->xdev->dev, "Installing PCI frontend\n"); pcifront_dev = pdev; - } else { - dev_err(&pdev->xdev->dev, "PCI frontend already installed!\n"); + } else err = -EEXIST; - } + spin_unlock(&pcifront_dev_lock); if (!err && !swiotlb_nr_tbl()) { @@ -848,7 +847,7 @@ static int pcifront_try_connect(struct pcifront_device *pdev) goto out; err = pcifront_connect_and_init_dma(pdev); - if (err) { + if (err && err != -EEXIST) { xenbus_dev_fatal(pdev->xdev, err, "Error setting up PCI Frontend"); goto out; From 3dc8601b54d7efa6ccf94b5159dd79b179a965dc Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Fri, 28 Jun 2013 02:40:30 +0100 Subject: [PATCH 0050/1368] genirq: Fix can_request_irq() for IRQs without an action commit 2779db8d37d4b542d9ca2575f5f178dbeaca6c86 upstream. Commit 02725e7471b8 ('genirq: Use irq_get/put functions'), inadvertently changed can_request_irq() to return 0 for IRQs that have no action. This causes pcibios_lookup_irq() to select only IRQs that already have an action with IRQF_SHARED set, or to fail if there are none. Change can_request_irq() to return 1 for IRQs that have no action (if the first two conditions are met). Reported-by: Bjarni Ingi Gislason Tested-by: Bjarni Ingi Gislason (against 3.2) Signed-off-by: Ben Hutchings Cc: 709647@bugs.debian.org Link: http://bugs.debian.org/709647 Link: http://lkml.kernel.org/r/1372383630.23847.40.camel@deadeye.wl.decadent.org.uk Signed-off-by: Thomas Gleixner Signed-off-by: Greg Kroah-Hartman --- kernel/irq/manage.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c index fa17855ca65a..dc4db3228dcd 100644 --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c @@ -555,9 +555,9 @@ int can_request_irq(unsigned int irq, unsigned long irqflags) return 0; if (irq_settings_can_request(desc)) { - if (desc->action) - if (irqflags & desc->action->flags & IRQF_SHARED) - canrequest =1; + if (!desc->action || + irqflags & desc->action->flags & IRQF_SHARED) + canrequest = 1; } irq_put_desc_unlock(desc, flags); return canrequest; From e64c0641beee09a0d9870efc5ea40eb06b6529cb Mon Sep 17 00:00:00 2001 From: Axel Lin Date: Wed, 3 Jul 2013 15:06:45 -0700 Subject: [PATCH 0051/1368] drivers/rtc/rtc-rv3029c2.c: fix disabling AIE irq commit 29ecd78c0fd6ee05f2c6b07b23823a6ae43c13ff upstream. In the disable AIE irq code path, current code passes "1" to enable parameter of rv3029c2_rtc_i2c_alarm_set_irq(). Thus it does not disable AIE irq. Signed-off-by: Axel Lin Acked-by: Heiko Schocher Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- drivers/rtc/rtc-rv3029c2.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/rtc/rtc-rv3029c2.c b/drivers/rtc/rtc-rv3029c2.c index 5032c24ec159..9100a3401de1 100644 --- a/drivers/rtc/rtc-rv3029c2.c +++ b/drivers/rtc/rtc-rv3029c2.c @@ -310,7 +310,7 @@ static int rv3029c2_rtc_i2c_set_alarm(struct i2c_client *client, dev_dbg(&client->dev, "alarm IRQ armed\n"); } else { /* disable AIE irq */ - ret = rv3029c2_rtc_i2c_alarm_set_irq(client, 1); + ret = rv3029c2_rtc_i2c_alarm_set_irq(client, 0); if (ret) return ret; From 623c1cba04991c15d594a25fd31d7480d931aa57 Mon Sep 17 00:00:00 2001 From: Lan Tianyu Date: Wed, 5 Jun 2013 02:27:50 +0000 Subject: [PATCH 0052/1368] ACPI: Add CMOS RTC Operation Region handler support commit 2fa97feb4406c546b52e35b6b6c50cb8f63425d2 upstream. On HP Folio 13-2000, the BIOS defines a CMOS RTC Operation Region and the EC's _REG methord accesses that region. Thus an appropriate address space handler must be registered for that region before the EC driver is loaded. Introduce a mechanism for adding CMOS RTC address space handlers. Register an ACPI scan handler for CMOS RTC devices such that, when a device of that kind is detected during an ACPI namespace scan, a common CMOS RTC operation region address space handler will be installed for it. References: https://bugzilla.kernel.org/show_bug.cgi?id=54621 Reported-and-tested-by: Stefan Nagy Signed-off-by: Lan Tianyu Signed-off-by: Rafael J. Wysocki Signed-off-by: Greg Kroah-Hartman --- drivers/acpi/Makefile | 1 + drivers/acpi/acpi_cmos_rtc.c | 92 ++++++++++++++++++++++++++++++++++++ drivers/acpi/internal.h | 5 ++ drivers/acpi/scan.c | 1 + 4 files changed, 99 insertions(+) create mode 100644 drivers/acpi/acpi_cmos_rtc.c diff --git a/drivers/acpi/Makefile b/drivers/acpi/Makefile index 536562c626a2..97c949abfabb 100644 --- a/drivers/acpi/Makefile +++ b/drivers/acpi/Makefile @@ -43,6 +43,7 @@ acpi-y += acpi_platform.o acpi-y += power.o acpi-y += event.o acpi-y += sysfs.o +acpi-$(CONFIG_X86) += acpi_cmos_rtc.o acpi-$(CONFIG_DEBUG_FS) += debugfs.o acpi-$(CONFIG_ACPI_NUMA) += numa.o acpi-$(CONFIG_ACPI_PROCFS_POWER) += cm_sbs.o diff --git a/drivers/acpi/acpi_cmos_rtc.c b/drivers/acpi/acpi_cmos_rtc.c new file mode 100644 index 000000000000..84190ed89c04 --- /dev/null +++ b/drivers/acpi/acpi_cmos_rtc.c @@ -0,0 +1,92 @@ +/* + * ACPI support for CMOS RTC Address Space access + * + * Copyright (C) 2013, Intel Corporation + * Authors: Lan Tianyu + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include +#include +#include +#include + +#include "internal.h" + +#define PREFIX "ACPI: " + +ACPI_MODULE_NAME("cmos rtc"); + +static const struct acpi_device_id acpi_cmos_rtc_ids[] = { + { "PNP0B00" }, + { "PNP0B01" }, + { "PNP0B02" }, + {} +}; + +static acpi_status +acpi_cmos_rtc_space_handler(u32 function, acpi_physical_address address, + u32 bits, u64 *value64, + void *handler_context, void *region_context) +{ + int i; + u8 *value = (u8 *)&value64; + + if (address > 0xff || !value64) + return AE_BAD_PARAMETER; + + if (function != ACPI_WRITE && function != ACPI_READ) + return AE_BAD_PARAMETER; + + spin_lock_irq(&rtc_lock); + + for (i = 0; i < DIV_ROUND_UP(bits, 8); ++i, ++address, ++value) + if (function == ACPI_READ) + *value = CMOS_READ(address); + else + CMOS_WRITE(*value, address); + + spin_unlock_irq(&rtc_lock); + + return AE_OK; +} + +static int acpi_install_cmos_rtc_space_handler(struct acpi_device *adev, + const struct acpi_device_id *id) +{ + acpi_status status; + + status = acpi_install_address_space_handler(adev->handle, + ACPI_ADR_SPACE_CMOS, + &acpi_cmos_rtc_space_handler, + NULL, NULL); + if (ACPI_FAILURE(status)) { + pr_err(PREFIX "Error installing CMOS-RTC region handler\n"); + return -ENODEV; + } + + return 0; +} + +static void acpi_remove_cmos_rtc_space_handler(struct acpi_device *adev) +{ + if (ACPI_FAILURE(acpi_remove_address_space_handler(adev->handle, + ACPI_ADR_SPACE_CMOS, &acpi_cmos_rtc_space_handler))) + pr_err(PREFIX "Error removing CMOS-RTC region handler\n"); +} + +static struct acpi_scan_handler cmos_rtc_handler = { + .ids = acpi_cmos_rtc_ids, + .attach = acpi_install_cmos_rtc_space_handler, + .detach = acpi_remove_cmos_rtc_space_handler, +}; + +void __init acpi_cmos_rtc_init(void) +{ + acpi_scan_add_handler(&cmos_rtc_handler); +} diff --git a/drivers/acpi/internal.h b/drivers/acpi/internal.h index c610a76d92c4..63a08549bfa2 100644 --- a/drivers/acpi/internal.h +++ b/drivers/acpi/internal.h @@ -50,6 +50,11 @@ void acpi_memory_hotplug_init(void); #else static inline void acpi_memory_hotplug_init(void) {} #endif +#ifdef CONFIG_X86 +void acpi_cmos_rtc_init(void); +#else +static inline void acpi_cmos_rtc_init(void) {} +#endif void acpi_sysfs_add_hotplug_profile(struct acpi_hotplug_profile *hotplug, const char *name); diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c index 27da63061e11..14807e53ccae 100644 --- a/drivers/acpi/scan.c +++ b/drivers/acpi/scan.c @@ -2040,6 +2040,7 @@ int __init acpi_scan_init(void) acpi_pci_link_init(); acpi_platform_init(); acpi_lpss_init(); + acpi_cmos_rtc_init(); acpi_container_init(); acpi_memory_hotplug_init(); acpi_dock_init(); From 93ede471186348e7a82b13a879213fd661c951be Mon Sep 17 00:00:00 2001 From: Lan Tianyu Date: Wed, 5 Jun 2013 02:27:51 +0000 Subject: [PATCH 0053/1368] ACPI / EC: Add HP Folio 13 to ec_dmi_table in order to skip DSDT scan commit eff9a4b62b14cf0d9913e3caf1f26f8b7a6105c9 upstream. HP Folio 13's BIOS defines CMOS RTC Operation Region and the EC's _REG method will access that region. To allow the CMOS RTC region handler to be installed before the EC _REG method is first invoked, add ec_skip_dsdt_scan() as HP Folio 13's callback to ec_dmi_table. References: https://bugzilla.kernel.org/show_bug.cgi?id=54621 Reported-and-tested-by: Stefan Nagy Signed-off-by: Lan Tianyu Signed-off-by: Rafael J. Wysocki Signed-off-by: Greg Kroah-Hartman --- drivers/acpi/ec.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/acpi/ec.c b/drivers/acpi/ec.c index edc00818c803..80403c1a89f8 100644 --- a/drivers/acpi/ec.c +++ b/drivers/acpi/ec.c @@ -983,6 +983,10 @@ static struct dmi_system_id __initdata ec_dmi_table[] = { ec_enlarge_storm_threshold, "CLEVO hardware", { DMI_MATCH(DMI_SYS_VENDOR, "CLEVO Co."), DMI_MATCH(DMI_PRODUCT_NAME, "M720T/M730T"),}, NULL}, + { + ec_skip_dsdt_scan, "HP Folio 13", { + DMI_MATCH(DMI_SYS_VENDOR, "Hewlett-Packard"), + DMI_MATCH(DMI_PRODUCT_NAME, "HP Folio 13"),}, NULL}, {}, }; From 22a5fdfc36cd18abfd421699e9076307fa54dae4 Mon Sep 17 00:00:00 2001 From: Lv Zheng Date: Sat, 8 Jun 2013 00:59:18 +0000 Subject: [PATCH 0054/1368] ACPICA: Do not use extended sleep registers unless HW-reduced bit is set commit 7cec7048fe22e3e92389da2cd67098f6c4284e7f upstream. Previous implementation incorrectly used the ACPI 5.0 extended sleep registers if they were simply populated. This caused problems on some non-HW-reduced machines. As per the ACPI spec, they should only be used if the HW-reduced bit is set. Lv Zheng, ACPICA BZ 1020. Reported-by: Daniel Rowe References: https://bugzilla.kernel.org/show_bug.cgi?id=54181 References: https://bugs.acpica.org/show_bug.cgi?id=1020 Bisected-by: Brint E. Kriebel Signed-off-by: Lv Zheng Signed-off-by: Bob Moore Signed-off-by: Rafael J. Wysocki Signed-off-by: Greg Kroah-Hartman --- drivers/acpi/acpica/hwxfsleep.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/acpi/acpica/hwxfsleep.c b/drivers/acpi/acpica/hwxfsleep.c index 35eebdac0f9d..09b06e2feff8 100644 --- a/drivers/acpi/acpica/hwxfsleep.c +++ b/drivers/acpi/acpica/hwxfsleep.c @@ -240,12 +240,14 @@ static acpi_status acpi_hw_sleep_dispatch(u8 sleep_state, u32 function_id) &acpi_sleep_dispatch[function_id]; #if (!ACPI_REDUCED_HARDWARE) - /* * If the Hardware Reduced flag is set (from the FADT), we must - * use the extended sleep registers + * use the extended sleep registers (FADT). Note: As per the ACPI + * specification, these extended registers are to be used for HW-reduced + * platforms only. They are not general-purpose replacements for the + * legacy PM register sleep support. */ - if (acpi_gbl_reduced_hardware || acpi_gbl_FADT.sleep_control.address) { + if (acpi_gbl_reduced_hardware) { status = sleep_functions->extended_function(sleep_state); } else { /* Legacy sleep */ From 2757f8d630ae099abc7d51b7b09de76a33876bf4 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Thu, 4 Jul 2013 13:22:11 +0200 Subject: [PATCH 0055/1368] ACPI / PM: Fix corner case in acpi_bus_update_power() commit 91bdad0b6237c25a7bf8fd4604d0cc64a2005a23 upstream. The role of acpi_bus_update_power() is to update the given ACPI device object's power.state field to reflect the current physical state of the device (as inferred from the configuration of power resources and _PSC, if available). For this purpose it calls acpi_device_set_power() that should update the power resources' reference counters and set power.state as appropriate. However, that doesn't work if the "new" state is D1, D2 or D3hot and the the current value of power.state means D3cold, because in that case acpi_device_set_power() will refuse to transition the device from D3cold to non-D0. To address this problem, make acpi_bus_update_power() call acpi_power_transition() directly to update the power resources' reference counters and only use acpi_device_set_power() to put the device into D0 if the current physical state of it cannot be determined. Signed-off-by: Rafael J. Wysocki Signed-off-by: Greg Kroah-Hartman --- drivers/acpi/device_pm.c | 23 ++++++++++++++++++----- 1 file changed, 18 insertions(+), 5 deletions(-) diff --git a/drivers/acpi/device_pm.c b/drivers/acpi/device_pm.c index 31c217a42839..553527c2532b 100644 --- a/drivers/acpi/device_pm.c +++ b/drivers/acpi/device_pm.c @@ -324,14 +324,27 @@ int acpi_bus_update_power(acpi_handle handle, int *state_p) if (result) return result; - if (state == ACPI_STATE_UNKNOWN) + if (state == ACPI_STATE_UNKNOWN) { state = ACPI_STATE_D0; - - result = acpi_device_set_power(device, state); - if (!result && state_p) + result = acpi_device_set_power(device, state); + if (result) + return result; + } else { + if (device->power.flags.power_resources) { + /* + * We don't need to really switch the state, bu we need + * to update the power resources' reference counters. + */ + result = acpi_power_transition(device, state); + if (result) + return result; + } + device->power.state = state; + } + if (state_p) *state_p = state; - return result; + return 0; } EXPORT_SYMBOL_GPL(acpi_bus_update_power); From 391c2e14c6442ac370afee2674d3e0c7409b319d Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Mon, 1 Jul 2013 11:46:27 -0700 Subject: [PATCH 0056/1368] HID: apple: Add support for the 2013 Macbook Air commit 9d9a04ee758b4c1fcc7586d065cdde7a7607e156 upstream. This patch adds keyboard support for MacbookAir6,2 as WELLSPRING8 (0x0291, 0x0292, 0x0293). The touchpad is handled in a separate bcm5974 patch, as usual. Reported-and-tested-by: Brad Ford Signed-off-by: Henrik Rydberg Signed-off-by: Jiri Kosina Signed-off-by: Dmitry Torokhov Signed-off-by: Greg Kroah-Hartman --- drivers/hid/hid-apple.c | 6 ++++++ drivers/hid/hid-core.c | 6 ++++++ drivers/hid/hid-ids.h | 3 +++ 3 files changed, 15 insertions(+) diff --git a/drivers/hid/hid-apple.c b/drivers/hid/hid-apple.c index feae88b53fcd..c7710b5c69af 100644 --- a/drivers/hid/hid-apple.c +++ b/drivers/hid/hid-apple.c @@ -524,6 +524,12 @@ static const struct hid_device_id apple_devices[] = { .driver_data = APPLE_HAS_FN | APPLE_ISO_KEYBOARD }, { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRING7A_JIS), .driver_data = APPLE_HAS_FN | APPLE_RDESC_JIS }, + { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRING8_ANSI), + .driver_data = APPLE_HAS_FN }, + { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRING8_ISO), + .driver_data = APPLE_HAS_FN | APPLE_ISO_KEYBOARD }, + { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRING8_JIS), + .driver_data = APPLE_HAS_FN | APPLE_RDESC_JIS }, { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_ALU_WIRELESS_2009_ANSI), .driver_data = APPLE_NUMLOCK_EMULATION | APPLE_HAS_FN }, { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_ALU_WIRELESS_2009_ISO), diff --git a/drivers/hid/hid-core.c b/drivers/hid/hid-core.c index 264f55099940..402f48689943 100644 --- a/drivers/hid/hid-core.c +++ b/drivers/hid/hid-core.c @@ -1547,6 +1547,9 @@ static const struct hid_device_id hid_have_special_driver[] = { { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRING7A_ANSI) }, { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRING7A_ISO) }, { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRING7A_JIS) }, + { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRING8_ANSI) }, + { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRING8_ISO) }, + { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRING8_JIS) }, { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_ALU_WIRELESS_2009_ANSI) }, { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_ALU_WIRELESS_2009_ISO) }, { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_ALU_WIRELESS_2009_JIS) }, @@ -2179,6 +2182,9 @@ static const struct hid_device_id hid_mouse_ignore_list[] = { { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRING7A_ANSI) }, { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRING7A_ISO) }, { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRING7A_JIS) }, + { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRING8_ANSI) }, + { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRING8_ISO) }, + { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRING8_JIS) }, { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_FOUNTAIN_TP_ONLY) }, { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_GEYSER1_TP_ONLY) }, { } diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h index 38535c9243d5..216888538c50 100644 --- a/drivers/hid/hid-ids.h +++ b/drivers/hid/hid-ids.h @@ -135,6 +135,9 @@ #define USB_DEVICE_ID_APPLE_ALU_WIRELESS_2009_JIS 0x023b #define USB_DEVICE_ID_APPLE_ALU_WIRELESS_2011_ANSI 0x0255 #define USB_DEVICE_ID_APPLE_ALU_WIRELESS_2011_ISO 0x0256 +#define USB_DEVICE_ID_APPLE_WELLSPRING8_ANSI 0x0291 +#define USB_DEVICE_ID_APPLE_WELLSPRING8_ISO 0x0292 +#define USB_DEVICE_ID_APPLE_WELLSPRING8_JIS 0x0293 #define USB_DEVICE_ID_APPLE_FOUNTAIN_TP_ONLY 0x030a #define USB_DEVICE_ID_APPLE_GEYSER1_TP_ONLY 0x030b #define USB_DEVICE_ID_APPLE_IRCONTROL 0x8240 From f7019bfd8c1cc896f194009dff643fc73eefdd74 Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Mon, 1 Jul 2013 11:47:51 -0700 Subject: [PATCH 0057/1368] Input: bcm5974 - add support for the 2013 MacBook Air commit 148c1c8ad3c4170186ebe6ea5900adde27d2a0e7 upstream. The June 2013 Macbook Air (13'') has a new trackpad protocol; four new values are inserted in the header, and the mode switch is no longer needed. This patch adds support for the new devices. Reported-and-tested-by: Brad Ford Signed-off-by: Henrik Rydberg Signed-off-by: Dmitry Torokhov Signed-off-by: Greg Kroah-Hartman --- drivers/input/mouse/bcm5974.c | 36 +++++++++++++++++++++++++++++++++-- 1 file changed, 34 insertions(+), 2 deletions(-) diff --git a/drivers/input/mouse/bcm5974.c b/drivers/input/mouse/bcm5974.c index 2baff1b79a55..4ef4d5e198ae 100644 --- a/drivers/input/mouse/bcm5974.c +++ b/drivers/input/mouse/bcm5974.c @@ -88,6 +88,10 @@ #define USB_DEVICE_ID_APPLE_WELLSPRING7A_ANSI 0x0259 #define USB_DEVICE_ID_APPLE_WELLSPRING7A_ISO 0x025a #define USB_DEVICE_ID_APPLE_WELLSPRING7A_JIS 0x025b +/* MacbookAir6,2 (unibody, June 2013) */ +#define USB_DEVICE_ID_APPLE_WELLSPRING8_ANSI 0x0291 +#define USB_DEVICE_ID_APPLE_WELLSPRING8_ISO 0x0292 +#define USB_DEVICE_ID_APPLE_WELLSPRING8_JIS 0x0293 #define BCM5974_DEVICE(prod) { \ .match_flags = (USB_DEVICE_ID_MATCH_DEVICE | \ @@ -145,6 +149,10 @@ static const struct usb_device_id bcm5974_table[] = { BCM5974_DEVICE(USB_DEVICE_ID_APPLE_WELLSPRING7A_ANSI), BCM5974_DEVICE(USB_DEVICE_ID_APPLE_WELLSPRING7A_ISO), BCM5974_DEVICE(USB_DEVICE_ID_APPLE_WELLSPRING7A_JIS), + /* MacbookAir6,2 */ + BCM5974_DEVICE(USB_DEVICE_ID_APPLE_WELLSPRING8_ANSI), + BCM5974_DEVICE(USB_DEVICE_ID_APPLE_WELLSPRING8_ISO), + BCM5974_DEVICE(USB_DEVICE_ID_APPLE_WELLSPRING8_JIS), /* Terminating entry */ {} }; @@ -172,15 +180,18 @@ struct bt_data { /* trackpad header types */ enum tp_type { TYPE1, /* plain trackpad */ - TYPE2 /* button integrated in trackpad */ + TYPE2, /* button integrated in trackpad */ + TYPE3 /* additional header fields since June 2013 */ }; /* trackpad finger data offsets, le16-aligned */ #define FINGER_TYPE1 (13 * sizeof(__le16)) #define FINGER_TYPE2 (15 * sizeof(__le16)) +#define FINGER_TYPE3 (19 * sizeof(__le16)) /* trackpad button data offsets */ #define BUTTON_TYPE2 15 +#define BUTTON_TYPE3 23 /* list of device capability bits */ #define HAS_INTEGRATED_BUTTON 1 @@ -400,6 +411,19 @@ static const struct bcm5974_config bcm5974_config_table[] = { { SN_COORD, -150, 6730 }, { SN_ORIENT, -MAX_FINGER_ORIENTATION, MAX_FINGER_ORIENTATION } }, + { + USB_DEVICE_ID_APPLE_WELLSPRING8_ANSI, + USB_DEVICE_ID_APPLE_WELLSPRING8_ISO, + USB_DEVICE_ID_APPLE_WELLSPRING8_JIS, + HAS_INTEGRATED_BUTTON, + 0, sizeof(struct bt_data), + 0x83, TYPE3, FINGER_TYPE3, FINGER_TYPE3 + SIZEOF_ALL_FINGERS, + { SN_PRESSURE, 0, 300 }, + { SN_WIDTH, 0, 2048 }, + { SN_COORD, -4620, 5140 }, + { SN_COORD, -150, 6600 }, + { SN_ORIENT, -MAX_FINGER_ORIENTATION, MAX_FINGER_ORIENTATION } + }, {} }; @@ -557,6 +581,9 @@ static int report_tp_state(struct bcm5974 *dev, int size) input_report_key(input, BTN_LEFT, ibt); } + if (c->tp_type == TYPE3) + input_report_key(input, BTN_LEFT, dev->tp_data[BUTTON_TYPE3]); + input_sync(input); return 0; @@ -572,9 +599,14 @@ static int report_tp_state(struct bcm5974 *dev, int size) static int bcm5974_wellspring_mode(struct bcm5974 *dev, bool on) { - char *data = kmalloc(8, GFP_KERNEL); int retval = 0, size; + char *data; + /* Type 3 does not require a mode switch */ + if (dev->cfg.tp_type == TYPE3) + return 0; + + data = kmalloc(8, GFP_KERNEL); if (!data) { dev_err(&dev->intf->dev, "out of memory\n"); retval = -ENOMEM; From 14a1d6afc622566a7dcb961a183010ec83bea009 Mon Sep 17 00:00:00 2001 From: Chen Gang Date: Wed, 3 Jul 2013 15:00:42 -0700 Subject: [PATCH 0058/1368] arch: c6x: mm: include "asm/uaccess.h" to pass compiling commit fe74650166dd6905b0cf66594eb79222dc9d7109 upstream. Need include "asm/uaccess.h" to pass compiling. The related error (with allmodconfig): arch/c6x/mm/init.c: In function `paging_init': arch/c6x/mm/init.c:46:2: error: implicit declaration of function `set_fs' [-Werror=implicit-function-declaration] arch/c6x/mm/init.c:46:9: error: `KERNEL_DS' undeclared (first use in this function) arch/c6x/mm/init.c:46:9: note: each undeclared identifier is reported only once for each function it appears in Signed-off-by: Chen Gang Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- arch/c6x/mm/init.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/c6x/mm/init.c b/arch/c6x/mm/init.c index a9fcd89b251b..b74ccb5a7690 100644 --- a/arch/c6x/mm/init.c +++ b/arch/c6x/mm/init.c @@ -18,6 +18,7 @@ #include #include +#include /* * ZERO_PAGE is a special page that is used for zero-initialized From 1926bf8ae44d80c9f50103f11fc4f17e2e2bf684 Mon Sep 17 00:00:00 2001 From: Junxiao Bi Date: Wed, 3 Jul 2013 15:01:03 -0700 Subject: [PATCH 0059/1368] ocfs2: xattr: fix inlined xattr reflink commit ef962df057aaafd714f5c22ba3de1be459571fdf upstream. Inlined xattr shared free space of inode block with inlined data or data extent record, so the size of the later two should be adjusted when inlined xattr is enabled. See ocfs2_xattr_ibody_init(). But this isn't done well when reflink. For inode with inlined data, its max inlined data size is adjusted in ocfs2_duplicate_inline_data(), no problem. But for inode with data extent record, its record count isn't adjusted. Fix it, or data extent record and inlined xattr may overwrite each other, then cause data corruption or xattr failure. One panic caused by this bug in our test environment is the following: kernel BUG at fs/ocfs2/xattr.c:1435! invalid opcode: 0000 [#1] SMP Pid: 10871, comm: multi_reflink_t Not tainted 2.6.39-300.17.1.el5uek #1 RIP: ocfs2_xa_offset_pointer+0x17/0x20 [ocfs2] RSP: e02b:ffff88007a587948 EFLAGS: 00010283 RAX: 0000000000000000 RBX: 0000000000000010 RCX: 00000000000051e4 RDX: ffff880057092060 RSI: 0000000000000f80 RDI: ffff88007a587a68 RBP: ffff88007a587948 R08: 00000000000062f4 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000000 R12: 0000000000000010 R13: ffff88007a587a68 R14: 0000000000000001 R15: ffff88007a587c68 FS: 00007fccff7f06e0(0000) GS:ffff88007fc00000(0000) knlGS:0000000000000000 CS: e033 DS: 0000 ES: 0000 CR0: 000000008005003b CR2: 00000000015cf000 CR3: 000000007aa76000 CR4: 0000000000000660 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400 Process multi_reflink_t Call Trace: ocfs2_xa_reuse_entry+0x60/0x280 [ocfs2] ocfs2_xa_prepare_entry+0x17e/0x2a0 [ocfs2] ocfs2_xa_set+0xcc/0x250 [ocfs2] ocfs2_xattr_ibody_set+0x98/0x230 [ocfs2] __ocfs2_xattr_set_handle+0x4f/0x700 [ocfs2] ocfs2_xattr_set+0x6c6/0x890 [ocfs2] ocfs2_xattr_user_set+0x46/0x50 [ocfs2] generic_setxattr+0x70/0x90 __vfs_setxattr_noperm+0x80/0x1a0 vfs_setxattr+0xa9/0xb0 setxattr+0xc3/0x120 sys_fsetxattr+0xa8/0xd0 system_call_fastpath+0x16/0x1b Signed-off-by: Junxiao Bi Reviewed-by: Jie Liu Acked-by: Joel Becker Cc: Mark Fasheh Cc: Sunil Mushran Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- fs/ocfs2/xattr.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c index 2e3ea308c144..5b8d94436105 100644 --- a/fs/ocfs2/xattr.c +++ b/fs/ocfs2/xattr.c @@ -6499,6 +6499,16 @@ static int ocfs2_reflink_xattr_inline(struct ocfs2_xattr_reflink *args) } new_oi = OCFS2_I(args->new_inode); + /* + * Adjust extent record count to reserve space for extended attribute. + * Inline data count had been adjusted in ocfs2_duplicate_inline_data(). + */ + if (!(new_oi->ip_dyn_features & OCFS2_INLINE_DATA_FL) && + !(ocfs2_inode_is_fast_symlink(args->new_inode))) { + struct ocfs2_extent_list *el = &new_di->id2.i_list; + le16_add_cpu(&el->l_count, -(inline_size / + sizeof(struct ocfs2_extent_rec))); + } spin_lock(&new_oi->ip_lock); new_oi->ip_dyn_features |= OCFS2_HAS_XATTR_FL | OCFS2_INLINE_XATTR_FL; new_di->i_dyn_features = cpu_to_le16(new_oi->ip_dyn_features); From 323af551c09ddc7cac1c22486b1419aeb1cccdd5 Mon Sep 17 00:00:00 2001 From: Paul Clements Date: Wed, 3 Jul 2013 15:09:04 -0700 Subject: [PATCH 0060/1368] nbd: correct disconnect behavior commit c378f70adbc1bbecd9e6db145019f14b2f688c7c upstream. Currently, when a disconnect is requested by the user (via NBD_DISCONNECT ioctl) the return from NBD_DO_IT is undefined (it is usually one of several error codes). This means that nbd-client does not know if a manual disconnect was performed or whether a network error occurred. Because of this, nbd-client's persist mode (which tries to reconnect after error, but not after manual disconnect) does not always work correctly. This change fixes this by causing NBD_DO_IT to always return 0 if a user requests a disconnect. This means that nbd-client can correctly either persist the connection (if an error occurred) or disconnect (if the user requested it). Signed-off-by: Paul Clements Acked-by: Rob Landley Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- drivers/block/nbd.c | 7 ++++++- include/linux/nbd.h | 1 + 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c index 46b35f7acfde..cf1576d54363 100644 --- a/drivers/block/nbd.c +++ b/drivers/block/nbd.c @@ -623,8 +623,10 @@ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd, if (!nbd->sock) return -EINVAL; + nbd->disconnect = 1; + nbd_send_req(nbd, &sreq); - return 0; + return 0; } case NBD_CLEAR_SOCK: { @@ -654,6 +656,7 @@ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd, nbd->sock = SOCKET_I(inode); if (max_part > 0) bdev->bd_invalidated = 1; + nbd->disconnect = 0; /* we're connected now */ return 0; } else { fput(file); @@ -743,6 +746,8 @@ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd, set_capacity(nbd->disk, 0); if (max_part > 0) ioctl_by_bdev(bdev, BLKRRPART, 0); + if (nbd->disconnect) /* user requested, ignore socket errors */ + return 0; return nbd->harderror; } diff --git a/include/linux/nbd.h b/include/linux/nbd.h index 4871170a04a0..ae4981ebd18e 100644 --- a/include/linux/nbd.h +++ b/include/linux/nbd.h @@ -41,6 +41,7 @@ struct nbd_device { u64 bytesize; pid_t pid; /* pid of nbd-client, if attached */ int xmit_timeout; + int disconnect; /* a disconnect has been requested by user */ }; #endif From c723caeecf4240abbdafbc4ccd96ef08828a2d4f Mon Sep 17 00:00:00 2001 From: Xudong Hao Date: Fri, 31 May 2013 12:21:29 +0800 Subject: [PATCH 0061/1368] PCI: Finish SR-IOV VF setup before adding the device commit fbf33f516bdbcc2ab1ba1e54dfb720b0cfaa6874 upstream. Commit 4f535093cf "PCI: Put pci_dev in device tree as early as possible" moves device registering from pci_bus_add_devices() to pci_device_add(). That causes problems for virtual functions because device_add(&virtfn->dev) is called before setting the virtfn->is_virtfn flag, which then causes Xen to report PCI virtual functions as PCI physical functions. Fix it by setting virtfn->is_virtfn before calling pci_device_add(). [Jiang Liu]: Move the setting of virtfn->is_virtfn ahead further for better readability and modify changelog. Signed-off-by: Xudong Hao Signed-off-by: Jiang Liu Signed-off-by: Bjorn Helgaas Signed-off-by: Greg Kroah-Hartman --- drivers/pci/iov.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/pci/iov.c b/drivers/pci/iov.c index c93071d428f5..a971a6f6268d 100644 --- a/drivers/pci/iov.c +++ b/drivers/pci/iov.c @@ -92,6 +92,8 @@ static int virtfn_add(struct pci_dev *dev, int id, int reset) pci_read_config_word(dev, iov->pos + PCI_SRIOV_VF_DID, &virtfn->device); pci_setup_device(virtfn); virtfn->dev.parent = dev->dev.parent; + virtfn->physfn = pci_dev_get(dev); + virtfn->is_virtfn = 1; for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) { res = dev->resource + PCI_IOV_RESOURCES + i; @@ -113,9 +115,6 @@ static int virtfn_add(struct pci_dev *dev, int id, int reset) pci_device_add(virtfn, virtfn->bus); mutex_unlock(&iov->dev->sriov->lock); - virtfn->physfn = pci_dev_get(dev); - virtfn->is_virtfn = 1; - rc = pci_bus_add_device(virtfn); sprintf(buf, "virtfn%u", id); rc = sysfs_create_link(&dev->dev.kobj, &virtfn->dev.kobj, buf); From 65a1fb23bc5b2f21b44f8b81c89ccda808b3b321 Mon Sep 17 00:00:00 2001 From: Jiang Liu Date: Fri, 7 Jun 2013 01:10:08 +0800 Subject: [PATCH 0062/1368] PCI: Fix refcount issue in pci_create_root_bus() error recovery path commit 343df771e671d821478dd3ef525a0610b808dbf8 upstream. After calling device_register(&bridge->dev), the bridge is reference- counted, and it is illegal to call kfree() on it except in the release function. [bhelgaas: changelog, use put_device() after device_register() failure] Signed-off-by: Jiang Liu Signed-off-by: Bjorn Helgaas Signed-off-by: Greg Kroah-Hartman --- drivers/pci/probe.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c index 70f10fa3c1b2..ea37072e8bf2 100644 --- a/drivers/pci/probe.c +++ b/drivers/pci/probe.c @@ -1703,12 +1703,16 @@ struct pci_bus *pci_create_root_bus(struct device *parent, int bus, bridge->dev.release = pci_release_bus_bridge_dev; dev_set_name(&bridge->dev, "pci%04x:%02x", pci_domain_nr(b), bus); error = pcibios_root_bridge_prepare(bridge); - if (error) - goto bridge_dev_reg_err; + if (error) { + kfree(bridge); + goto err_out; + } error = device_register(&bridge->dev); - if (error) - goto bridge_dev_reg_err; + if (error) { + put_device(&bridge->dev); + goto err_out; + } b->bridge = get_device(&bridge->dev); device_enable_async_suspend(b->bridge); pci_set_bus_of_node(b); @@ -1764,8 +1768,6 @@ struct pci_bus *pci_create_root_bus(struct device *parent, int bus, class_dev_reg_err: put_device(&bridge->dev); device_unregister(&bridge->dev); -bridge_dev_reg_err: - kfree(bridge); err_out: kfree(b); return NULL; From f4ab9ece03dfc4b15c63b5fd5506183554976903 Mon Sep 17 00:00:00 2001 From: Emmanuel Grumbach Date: Thu, 13 Jun 2013 11:45:59 +0300 Subject: [PATCH 0063/1368] iwlwifi: pcie: fix race in queue unmapping commit b967613d7e7c7bad176f5627c55e2d8c5aa2480e upstream. When a queue is disabled, it frees all its entries. Later, the op_mode might still get notifications from the firmware that triggers to free entries in the tx queue. The transport should be prepared for these races and know to ignore reclaim calls on queues that have been disabled and whose entries have been freed. Signed-off-by: Emmanuel Grumbach Signed-off-by: Johannes Berg Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/iwlwifi/pcie/tx.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/net/wireless/iwlwifi/pcie/tx.c b/drivers/net/wireless/iwlwifi/pcie/tx.c index c5e30294c5ac..51fca425def7 100644 --- a/drivers/net/wireless/iwlwifi/pcie/tx.c +++ b/drivers/net/wireless/iwlwifi/pcie/tx.c @@ -576,9 +576,12 @@ static void iwl_pcie_txq_unmap(struct iwl_trans *trans, int txq_id) spin_lock_bh(&txq->lock); while (q->write_ptr != q->read_ptr) { + IWL_DEBUG_TX_REPLY(trans, "Q %d Free %d\n", + txq_id, q->read_ptr); iwl_pcie_txq_free_tfd(trans, txq); q->read_ptr = iwl_queue_inc_wrap(q->read_ptr, q->n_bd); } + txq->active = false; spin_unlock_bh(&txq->lock); } @@ -927,6 +930,12 @@ void iwl_trans_pcie_reclaim(struct iwl_trans *trans, int txq_id, int ssn, spin_lock_bh(&txq->lock); + if (!txq->active) { + IWL_DEBUG_TX_QUEUES(trans, "Q %d inactive - ignoring idx %d\n", + txq_id, ssn); + goto out; + } + if (txq->q.read_ptr == tfd_num) goto out; @@ -1103,6 +1112,7 @@ void iwl_trans_pcie_txq_enable(struct iwl_trans *trans, int txq_id, int fifo, (fifo << SCD_QUEUE_STTS_REG_POS_TXF) | (1 << SCD_QUEUE_STTS_REG_POS_WSL) | SCD_QUEUE_STTS_REG_MSK); + trans_pcie->txq[txq_id].active = true; IWL_DEBUG_TX_QUEUES(trans, "Activate queue %d on FIFO %d WrPtr: %d\n", txq_id, fifo, ssn & 0xff); } From d26cb648c5bf93d2e17b54d3dd55523e68d625ce Mon Sep 17 00:00:00 2001 From: Emmanuel Grumbach Date: Thu, 13 Jun 2013 13:10:00 +0300 Subject: [PATCH 0064/1368] iwlwifi: pcie: wake the queue if stopped when being unmapped commit 8a487b1a7432b20ff3f82387a8ce7555a964b44e upstream. When the queue is unmapped while it was so loaded that mac80211's was stopped, we need to wake the queue after having freed all the packets in the queue. Not doing so can result in weird stuff like: * run lots of traffic (mac80211's queue gets stopped) * RFKILL * de-assert RFKILL * no traffic Signed-off-by: Emmanuel Grumbach Signed-off-by: Johannes Berg Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/iwlwifi/pcie/tx.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/wireless/iwlwifi/pcie/tx.c b/drivers/net/wireless/iwlwifi/pcie/tx.c index 51fca425def7..48acfc620191 100644 --- a/drivers/net/wireless/iwlwifi/pcie/tx.c +++ b/drivers/net/wireless/iwlwifi/pcie/tx.c @@ -583,6 +583,9 @@ static void iwl_pcie_txq_unmap(struct iwl_trans *trans, int txq_id) } txq->active = false; spin_unlock_bh(&txq->lock); + + /* just in case - this queue may have been stopped */ + iwl_wake_queue(trans, txq); } /* From 7d31ea0d281c8443c8637c78822f56388f4cf82d Mon Sep 17 00:00:00 2001 From: Shane Huang Date: Mon, 3 Jun 2013 18:24:10 +0800 Subject: [PATCH 0065/1368] ahci: Add AMD CZ SATA device ID commit fafe5c3d82a470d73de53e6b08eb4e28d974d895 upstream. To add AMD CZ SATA controller device ID of IDE mode. [bhelgaas: drop pci_ids.h update] Signed-off-by: Shane Huang Signed-off-by: Bjorn Helgaas Reviewed-by: Tejun Heo Signed-off-by: Greg Kroah-Hartman --- drivers/ata/ahci.c | 1 + drivers/pci/quirks.c | 2 ++ 2 files changed, 3 insertions(+) diff --git a/drivers/ata/ahci.c b/drivers/ata/ahci.c index 2b50dfdf1cfc..fe1a889c6910 100644 --- a/drivers/ata/ahci.c +++ b/drivers/ata/ahci.c @@ -310,6 +310,7 @@ static const struct pci_device_id ahci_pci_tbl[] = { /* AMD */ { PCI_VDEVICE(AMD, 0x7800), board_ahci }, /* AMD Hudson-2 */ + { PCI_VDEVICE(AMD, 0x7900), board_ahci }, /* AMD CZ */ /* AMD is using RAID class only for ahci controllers */ { PCI_VENDOR_ID_AMD, PCI_ANY_ID, PCI_ANY_ID, PCI_ANY_ID, PCI_CLASS_STORAGE_RAID << 8, 0xffffff, board_ahci }, diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c index 7d68aeebf56b..df4655c5c138 100644 --- a/drivers/pci/quirks.c +++ b/drivers/pci/quirks.c @@ -1022,6 +1022,8 @@ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_ATI, PCI_DEVICE_ID_ATI_IXP700_SATA, quirk DECLARE_PCI_FIXUP_RESUME_EARLY(PCI_VENDOR_ID_ATI, PCI_DEVICE_ID_ATI_IXP700_SATA, quirk_amd_ide_mode); DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_HUDSON2_SATA_IDE, quirk_amd_ide_mode); DECLARE_PCI_FIXUP_RESUME_EARLY(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_HUDSON2_SATA_IDE, quirk_amd_ide_mode); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AMD, 0x7900, quirk_amd_ide_mode); +DECLARE_PCI_FIXUP_RESUME_EARLY(PCI_VENDOR_ID_AMD, 0x7900, quirk_amd_ide_mode); /* * Serverworks CSB5 IDE does not fully support native mode From 21e8e04d6f137aeab872fd9e0ef356dd1126deb4 Mon Sep 17 00:00:00 2001 From: Seth Heasley Date: Wed, 19 Jun 2013 16:36:45 -0700 Subject: [PATCH 0066/1368] ahci: AHCI-mode SATA patch for Intel Coleto Creek DeviceIDs commit 1cfc7df3de10c40ed459e13cce6de616023bf41c upstream. This patch adds the AHCI-mode SATA DeviceIDs for the Intel Coleto Creek PCH. Signed-off-by: Seth Heasley Signed-off-by: Tejun Heo Signed-off-by: Greg Kroah-Hartman --- drivers/ata/ahci.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/ata/ahci.c b/drivers/ata/ahci.c index fe1a889c6910..b112625482ca 100644 --- a/drivers/ata/ahci.c +++ b/drivers/ata/ahci.c @@ -291,6 +291,7 @@ static const struct pci_device_id ahci_pci_tbl[] = { { PCI_VDEVICE(INTEL, 0x8d64), board_ahci }, /* Wellsburg RAID */ { PCI_VDEVICE(INTEL, 0x8d66), board_ahci }, /* Wellsburg RAID */ { PCI_VDEVICE(INTEL, 0x8d6e), board_ahci }, /* Wellsburg RAID */ + { PCI_VDEVICE(INTEL, 0x23a3), board_ahci }, /* Coleto Creek AHCI */ /* JMicron 360/1/3/5/6, match class to avoid IDE function */ { PCI_VENDOR_ID_JMICRON, PCI_ANY_ID, PCI_ANY_ID, PCI_ANY_ID, From f4f391e86e14bcbc0ddcbca8a52b81cac7bf0c7c Mon Sep 17 00:00:00 2001 From: Shane Huang Date: Sat, 8 Jun 2013 16:00:16 +0800 Subject: [PATCH 0067/1368] ahci: remove pmp link online check in FBS EH commit 912b9ac683b112615d5605686f1dc086402ce9f7 upstream. ata_link_online() check in ahci_error_intr() is unnecessary, it should be removed otherwise may lead to lockup with FBS enabled PMP. http://marc.info/?l=linux-ide&m=137050421603272&w=2 Reported-by: Yu Liu Signed-off-by: Shane Huang Signed-off-by: Tejun Heo Signed-off-by: Greg Kroah-Hartman --- drivers/ata/libahci.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/ata/libahci.c b/drivers/ata/libahci.c index a70ff154f586..7b9bdd822c62 100644 --- a/drivers/ata/libahci.c +++ b/drivers/ata/libahci.c @@ -1560,8 +1560,7 @@ static void ahci_error_intr(struct ata_port *ap, u32 irq_stat) u32 fbs = readl(port_mmio + PORT_FBS); int pmp = fbs >> PORT_FBS_DWE_OFFSET; - if ((fbs & PORT_FBS_SDE) && (pmp < ap->nr_pmp_links) && - ata_link_online(&ap->pmp_link[pmp])) { + if ((fbs & PORT_FBS_SDE) && (pmp < ap->nr_pmp_links)) { link = &ap->pmp_link[pmp]; fbs_need_dec = true; } From d2f5933432d677d224f83cc865c52f14a4785eec Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Tue, 21 May 2013 20:43:50 +0200 Subject: [PATCH 0068/1368] timer: Fix jiffies wrap behavior of round_jiffies_common() commit 9e04d3804d3ac97d8c03a41d78d0f0674b5d01e1 upstream. Direct compare of jiffies related values does not work in the wrap around case. Replace it with time_is_after_jiffies(). Signed-off-by: Bart Van Assche Cc: Arjan van de Ven Cc: Stephen Rothwell Link: http://lkml.kernel.org/r/519BC066.5080600@acm.org Signed-off-by: Thomas Gleixner Signed-off-by: Greg Kroah-Hartman --- kernel/timer.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/kernel/timer.c b/kernel/timer.c index 15ffdb3f1948..15bc1b41021d 100644 --- a/kernel/timer.c +++ b/kernel/timer.c @@ -149,9 +149,11 @@ static unsigned long round_jiffies_common(unsigned long j, int cpu, /* now that we have rounded, subtract the extra skew again */ j -= cpu * 3; - if (j <= jiffies) /* rounding ate our timeout entirely; */ - return original; - return j; + /* + * Make sure j is still in the future. Otherwise return the + * unmodified value. + */ + return time_is_after_jiffies(j) ? j : original; } /** From 8049d11b3af40dc43b45017a1646ee6bd02287a1 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Mon, 20 May 2013 11:26:50 -0400 Subject: [PATCH 0069/1368] Btrfs: fix estale with btrfs send commit 139f807a1eba1e484941a98fb93ee32ad859a6a1 upstream. This fixes bugzilla 57491. If we take a snapshot of a fs with a unlink ongoing and then try to send that root we will run into problems. When comparing with a parent root we will search the parents and the send roots commit_root, which if we've just created the snapshot will include the file that needs to be evicted by the orphan cleanup. So when we find a changed extent we will try and copy that info into the send stream, but when we lookup the inode we use the normal root, which no longer has the inode because the orphan cleanup deleted it. The best solution I have for this is to check our otransid with the generation of the commit root and if they match just commit the transaction again, that way we get the changes from the orphan cleanup. With this patch the reproducer I made for this bugzilla no longer returns ESTALE when trying to do the send. Thanks, Reported-by: Chris Wilson Signed-off-by: Josef Bacik Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/send.c | 35 +++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c index ff40f1c00ce3..ba9690b9ae24 100644 --- a/fs/btrfs/send.c +++ b/fs/btrfs/send.c @@ -4579,6 +4579,41 @@ long btrfs_ioctl_send(struct file *mnt_file, void __user *arg_) send_root = BTRFS_I(file_inode(mnt_file))->root; fs_info = send_root->fs_info; + /* + * This is done when we lookup the root, it should already be complete + * by the time we get here. + */ + WARN_ON(send_root->orphan_cleanup_state != ORPHAN_CLEANUP_DONE); + + /* + * If we just created this root we need to make sure that the orphan + * cleanup has been done and committed since we search the commit root, + * so check its commit root transid with our otransid and if they match + * commit the transaction to make sure everything is updated. + */ + down_read(&send_root->fs_info->extent_commit_sem); + if (btrfs_header_generation(send_root->commit_root) == + btrfs_root_otransid(&send_root->root_item)) { + struct btrfs_trans_handle *trans; + + up_read(&send_root->fs_info->extent_commit_sem); + + trans = btrfs_attach_transaction_barrier(send_root); + if (IS_ERR(trans)) { + if (PTR_ERR(trans) != -ENOENT) { + ret = PTR_ERR(trans); + goto out; + } + /* ENOENT means theres no transaction */ + } else { + ret = btrfs_commit_transaction(trans, send_root); + if (ret) + goto out; + } + } else { + up_read(&send_root->fs_info->extent_commit_sem); + } + arg = memdup_user(arg_, sizeof(*arg)); if (IS_ERR(arg)) { ret = PTR_ERR(arg); From c701343cd0444bd9440a4236331e80f45c75ece2 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Sat, 29 Jun 2013 23:15:19 -0400 Subject: [PATCH 0070/1368] Btrfs: hold the tree mod lock in __tree_mod_log_rewind commit f1ca7e98a67da618d8595866e0860308525154da upstream. We need to hold the tree mod log lock in __tree_mod_log_rewind since we walk forward in the tree mod entries, otherwise we'll end up with random entries and trip the BUG_ON() at the front of __tree_mod_log_rewind. This fixes the panics people were seeing when running find /whatever -type f -exec btrfs fi defrag {} \; Thansk, Signed-off-by: Josef Bacik Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/ctree.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 02fae7f7e42c..2543d11b7d8b 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -1161,8 +1161,8 @@ __tree_mod_log_oldest_root(struct btrfs_fs_info *fs_info, * time_seq). */ static void -__tree_mod_log_rewind(struct extent_buffer *eb, u64 time_seq, - struct tree_mod_elem *first_tm) +__tree_mod_log_rewind(struct btrfs_fs_info *fs_info, struct extent_buffer *eb, + u64 time_seq, struct tree_mod_elem *first_tm) { u32 n; struct rb_node *next; @@ -1172,6 +1172,7 @@ __tree_mod_log_rewind(struct extent_buffer *eb, u64 time_seq, unsigned long p_size = sizeof(struct btrfs_key_ptr); n = btrfs_header_nritems(eb); + tree_mod_log_read_lock(fs_info); while (tm && tm->seq >= time_seq) { /* * all the operations are recorded with the operator used for @@ -1226,6 +1227,7 @@ __tree_mod_log_rewind(struct extent_buffer *eb, u64 time_seq, if (tm->index != first_tm->index) break; } + tree_mod_log_read_unlock(fs_info); btrfs_set_header_nritems(eb, n); } @@ -1274,7 +1276,7 @@ tree_mod_log_rewind(struct btrfs_fs_info *fs_info, struct extent_buffer *eb, extent_buffer_get(eb_rewin); btrfs_tree_read_lock(eb_rewin); - __tree_mod_log_rewind(eb_rewin, time_seq, tm); + __tree_mod_log_rewind(fs_info, eb_rewin, time_seq, tm); WARN_ON(btrfs_header_nritems(eb_rewin) > BTRFS_NODEPTRS_PER_BLOCK(fs_info->tree_root)); @@ -1350,7 +1352,7 @@ get_old_root(struct btrfs_root *root, u64 time_seq) btrfs_set_header_generation(eb, old_generation); } if (tm) - __tree_mod_log_rewind(eb, time_seq, tm); + __tree_mod_log_rewind(root->fs_info, eb, time_seq, tm); else WARN_ON(btrfs_header_level(eb) != 0); WARN_ON(btrfs_header_nritems(eb) > BTRFS_NODEPTRS_PER_BLOCK(root)); From 7a44bf654d595a523f01664f781633861fc7262d Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Mon, 1 Jul 2013 16:10:16 -0400 Subject: [PATCH 0071/1368] Btrfs: only do the tree_mod_log_free_eb if this is our last ref commit 7fb7d76f96bfcbea25007d190ba828b18e13d29d upstream. There is another bug in the tree mod log stuff in that we're calling tree_mod_log_free_eb every single time a block is cow'ed. The problem with this is that if this block is shared by multiple snapshots we will call this multiple times per block, so if we go to rewind the mod log for this block we'll BUG_ON() in __tree_mod_log_rewind because we try to rewind a free twice. We only want to call tree_mod_log_free_eb if we are actually freeing the block. With this patch I no longer hit the panic in __tree_mod_log_rewind. Thanks, Reviewed-by: Jan Schmidt Signed-off-by: Josef Bacik Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/ctree.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 2543d11b7d8b..7fb054ba1b60 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -1089,7 +1089,8 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans, btrfs_set_node_ptr_generation(parent, parent_slot, trans->transid); btrfs_mark_buffer_dirty(parent); - tree_mod_log_free_eb(root->fs_info, buf); + if (last_ref) + tree_mod_log_free_eb(root->fs_info, buf); btrfs_free_tree_block(trans, root, buf, parent_start, last_ref); } From e47438e6af1377ae8b29d2c0262513f54f17adcd Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Fri, 31 May 2013 19:33:42 -0400 Subject: [PATCH 0072/1368] ext4: fix data offset overflow on 32-bit archs in ext4_inline_data_fiemap() commit eaf3793728d07d995f1e74250b2d0005f7ae98b5 upstream. On 32-bit archs when sector_t is defined as 32-bit the logic computing data offset in ext4_inline_data_fiemap(). Fix that by properly typing the shifted value. Signed-off-by: Jan Kara Signed-off-by: Theodore Ts'o Signed-off-by: Greg Kroah-Hartman --- fs/ext4/inline.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/ext4/inline.c b/fs/ext4/inline.c index 3e2bf873e8a8..33331b4c2178 100644 --- a/fs/ext4/inline.c +++ b/fs/ext4/inline.c @@ -1842,7 +1842,7 @@ int ext4_inline_data_fiemap(struct inode *inode, if (error) goto out; - physical = iloc.bh->b_blocknr << inode->i_sb->s_blocksize_bits; + physical = (__u64)iloc.bh->b_blocknr << inode->i_sb->s_blocksize_bits; physical += (char *)ext4_raw_inode(&iloc) - iloc.bh->b_data; physical += offsetof(struct ext4_inode, i_block); length = i_size_read(inode); From 80747f06d94394d0c2bc384515abe0ba460e58f4 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Fri, 31 May 2013 19:37:56 -0400 Subject: [PATCH 0073/1368] ext4: fix overflows in SEEK_HOLE, SEEK_DATA implementations commit e7293fd146846e2a44d29e0477e0860c60fb856b upstream. ext4_lblk_t is just u32 so multiplying it by blocksize can easily overflow for files larger than 4 GB. Fix that by properly typing the block offsets before shifting. Signed-off-by: Jan Kara Signed-off-by: Theodore Ts'o Reviewed-by: Zheng Liu Signed-off-by: Greg Kroah-Hartman --- fs/ext4/file.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/fs/ext4/file.c b/fs/ext4/file.c index b1b4d51b5d86..b19f0a457f32 100644 --- a/fs/ext4/file.c +++ b/fs/ext4/file.c @@ -312,7 +312,7 @@ static int ext4_find_unwritten_pgoff(struct inode *inode, blkbits = inode->i_sb->s_blocksize_bits; startoff = *offset; lastoff = startoff; - endoff = (map->m_lblk + map->m_len) << blkbits; + endoff = (loff_t)(map->m_lblk + map->m_len) << blkbits; index = startoff >> PAGE_CACHE_SHIFT; end = endoff >> PAGE_CACHE_SHIFT; @@ -457,7 +457,7 @@ static loff_t ext4_seek_data(struct file *file, loff_t offset, loff_t maxsize) ret = ext4_map_blocks(NULL, inode, &map, 0); if (ret > 0 && !(map.m_flags & EXT4_MAP_UNWRITTEN)) { if (last != start) - dataoff = last << blkbits; + dataoff = (loff_t)last << blkbits; break; } @@ -468,7 +468,7 @@ static loff_t ext4_seek_data(struct file *file, loff_t offset, loff_t maxsize) ext4_es_find_delayed_extent_range(inode, last, last, &es); if (es.es_len != 0 && in_range(last, es.es_lblk, es.es_len)) { if (last != start) - dataoff = last << blkbits; + dataoff = (loff_t)last << blkbits; break; } @@ -486,7 +486,7 @@ static loff_t ext4_seek_data(struct file *file, loff_t offset, loff_t maxsize) } last++; - dataoff = last << blkbits; + dataoff = (loff_t)last << blkbits; } while (last <= end); mutex_unlock(&inode->i_mutex); @@ -540,7 +540,7 @@ static loff_t ext4_seek_hole(struct file *file, loff_t offset, loff_t maxsize) ret = ext4_map_blocks(NULL, inode, &map, 0); if (ret > 0 && !(map.m_flags & EXT4_MAP_UNWRITTEN)) { last += ret; - holeoff = last << blkbits; + holeoff = (loff_t)last << blkbits; continue; } @@ -551,7 +551,7 @@ static loff_t ext4_seek_hole(struct file *file, loff_t offset, loff_t maxsize) ext4_es_find_delayed_extent_range(inode, last, last, &es); if (es.es_len != 0 && in_range(last, es.es_lblk, es.es_len)) { last = es.es_lblk + es.es_len; - holeoff = last << blkbits; + holeoff = (loff_t)last << blkbits; continue; } @@ -566,7 +566,7 @@ static loff_t ext4_seek_hole(struct file *file, loff_t offset, loff_t maxsize) &map, &holeoff); if (!unwritten) { last += ret; - holeoff = last << blkbits; + holeoff = (loff_t)last << blkbits; continue; } } From b9ea84239c8098c367680278d1daa4d69ae88719 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Fri, 31 May 2013 19:38:56 -0400 Subject: [PATCH 0074/1368] ext4: fix data offset overflow in ext4_xattr_fiemap() on 32-bit archs commit a60697f411eb365fb09e639e6f183fe33d1eb796 upstream. On 32-bit architectures with 32-bit sector_t computation of data offset in ext4_xattr_fiemap() can overflow resulting in reporting bogus data location. Fix the problem by typing block number to proper type before shifting. Signed-off-by: Jan Kara Signed-off-by: Theodore Ts'o Signed-off-by: Greg Kroah-Hartman --- fs/ext4/extents.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index bc0f1910b9cf..e49da585739f 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -4659,7 +4659,7 @@ static int ext4_xattr_fiemap(struct inode *inode, error = ext4_get_inode_loc(inode, &iloc); if (error) return error; - physical = iloc.bh->b_blocknr << blockbits; + physical = (__u64)iloc.bh->b_blocknr << blockbits; offset = EXT4_GOOD_OLD_INODE_SIZE + EXT4_I(inode)->i_extra_isize; physical += offset; @@ -4667,7 +4667,7 @@ static int ext4_xattr_fiemap(struct inode *inode, flags |= FIEMAP_EXTENT_DATA_INLINE; brelse(iloc.bh); } else { /* external block */ - physical = EXT4_I(inode)->i_file_acl << blockbits; + physical = (__u64)EXT4_I(inode)->i_file_acl << blockbits; length = inode->i_sb->s_blocksize; } From bb39c83ce5453f7b702865d7fe5cc283689d8810 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Fri, 31 May 2013 19:39:56 -0400 Subject: [PATCH 0075/1368] ext4: fix overflow when counting used blocks on 32-bit architectures commit 8af8eecc1331dbf5e8c662022272cf667e213da5 upstream. The arithmetics adding delalloc blocks to the number of used blocks in ext4_getattr() can easily overflow on 32-bit archs as we first multiply number of blocks by blocksize and then divide back by 512. Make the arithmetics more clever and also use proper type (unsigned long long instead of unsigned long). Signed-off-by: Jan Kara Signed-off-by: Theodore Ts'o Signed-off-by: Greg Kroah-Hartman --- fs/ext4/inode.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index c2434f89e9ed..ccbfbbb12dc5 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -4808,7 +4808,7 @@ int ext4_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) { struct inode *inode; - unsigned long delalloc_blocks; + unsigned long long delalloc_blocks; inode = dentry->d_inode; generic_fillattr(inode, stat); @@ -4826,7 +4826,7 @@ int ext4_getattr(struct vfsmount *mnt, struct dentry *dentry, delalloc_blocks = EXT4_C2B(EXT4_SB(inode->i_sb), EXT4_I(inode)->i_reserved_data_blocks); - stat->blocks += (delalloc_blocks << inode->i_sb->s_blocksize_bits)>>9; + stat->blocks += delalloc_blocks << (inode->i_sb->s_blocksize_bits-9); return 0; } From d860925657644b00b0fed703c647edcf2aa27975 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Fri, 5 Jul 2013 23:11:16 -0400 Subject: [PATCH 0076/1368] ext4: fix ext4_get_group_number() commit 960fd856fdc3b08b3638f3f9b6b4bfceb77660c7 upstream. The function ext4_get_group_number() was introduced as an optimization in commit bd86298e60b8. Unfortunately, this commit incorrectly calculate the group number for file systems with a 1k block size (when s_first_data_block is 1 instead of zero). This could cause the following kernel BUG: [ 568.877799] ------------[ cut here ]------------ [ 568.877833] kernel BUG at fs/ext4/mballoc.c:3728! [ 568.877840] Oops: Exception in kernel mode, sig: 5 [#1] [ 568.877845] SMP NR_CPUS=32 NUMA pSeries [ 568.877852] Modules linked in: binfmt_misc [ 568.877861] CPU: 1 PID: 3516 Comm: fs_mark Not tainted 3.10.0-03216-g7c6809f-dirty #1 [ 568.877867] task: c0000001fb0b8000 ti: c0000001fa954000 task.ti: c0000001fa954000 [ 568.877873] NIP: c0000000002f42a4 LR: c0000000002f4274 CTR: c000000000317ef8 [ 568.877879] REGS: c0000001fa956ed0 TRAP: 0700 Not tainted (3.10.0-03216-g7c6809f-dirty) [ 568.877884] MSR: 8000000000029032 CR: 24000428 XER: 00000000 [ 568.877902] SOFTE: 1 [ 568.877905] CFAR: c0000000002b5464 [ 568.877908] GPR00: 0000000000000001 c0000001fa957150 c000000000c6a408 c0000001fb588000 GPR04: 0000000000003fff c0000001fa9571c0 c0000001fa9571c4 000138098c50625f GPR08: 1301200000000000 0000000000000002 0000000000000001 0000000000000000 GPR12: 0000000024000422 c00000000f33a300 0000000000008000 c0000001fa9577f0 GPR16: c0000001fb7d0100 c000000000c29190 c0000000007f46e8 c000000000a14672 GPR20: 0000000000000001 0000000000000008 ffffffffffffffff 0000000000000000 GPR24: 0000000000000100 c0000001fa957278 c0000001fdb2bc78 c0000001fa957288 GPR28: 0000000000100100 c0000001fa957288 c0000001fb588000 c0000001fdb2bd10 [ 568.877993] NIP [c0000000002f42a4] .ext4_mb_release_group_pa+0xec/0x1c0 [ 568.877999] LR [c0000000002f4274] .ext4_mb_release_group_pa+0xbc/0x1c0 [ 568.878004] Call Trace: [ 568.878008] [c0000001fa957150] [c0000000002f4274] .ext4_mb_release_group_pa+0xbc/0x1c0 (unreliable) [ 568.878017] [c0000001fa957200] [c0000000002fb070] .ext4_mb_discard_lg_preallocations+0x394/0x444 [ 568.878025] [c0000001fa957340] [c0000000002fb45c] .ext4_mb_release_context+0x33c/0x734 [ 568.878032] [c0000001fa957440] [c0000000002fbcf8] .ext4_mb_new_blocks+0x4a4/0x5f4 [ 568.878039] [c0000001fa957510] [c0000000002ef56c] .ext4_ext_map_blocks+0xc28/0x1178 [ 568.878047] [c0000001fa957640] [c0000000002c1a94] .ext4_map_blocks+0x2c8/0x490 [ 568.878054] [c0000001fa957730] [c0000000002c536c] .ext4_writepages+0x738/0xc60 [ 568.878062] [c0000001fa957950] [c000000000168a78] .do_writepages+0x5c/0x80 [ 568.878069] [c0000001fa9579d0] [c00000000015d1c4] .__filemap_fdatawrite_range+0x88/0xb0 [ 568.878078] [c0000001fa957aa0] [c00000000015d23c] .filemap_write_and_wait_range+0x50/0xfc [ 568.878085] [c0000001fa957b30] [c0000000002b8edc] .ext4_sync_file+0x220/0x3c4 [ 568.878092] [c0000001fa957be0] [c0000000001f849c] .vfs_fsync_range+0x64/0x80 [ 568.878098] [c0000001fa957c70] [c0000000001f84f0] .vfs_fsync+0x38/0x4c [ 568.878105] [c0000001fa957d00] [c0000000001f87f4] .do_fsync+0x54/0x90 [ 568.878111] [c0000001fa957db0] [c0000000001f8894] .SyS_fsync+0x28/0x3c [ 568.878120] [c0000001fa957e30] [c000000000009c88] syscall_exit+0x0/0x7c [ 568.878125] Instruction dump: [ 568.878130] 60000000 813d0034 81610070 38000000 7f8b4800 419e001c 813f007c 7d2bfe70 [ 568.878144] 7d604a78 7c005850 54000ffe 7c0007b4 <0b000000> e8a10076 e87f0090 7fa4eb78 [ 568.878160] ---[ end trace 594d911d9654770b ]--- In addition fix the STD_GROUP optimization so that it works for bigalloc file systems as well. Signed-off-by: "Theodore Ts'o" Reported-by: Li Zhong Reviewed-by: Lukas Czerner Signed-off-by: Greg Kroah-Hartman --- fs/ext4/balloc.c | 4 ++-- fs/ext4/super.c | 8 ++++---- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c index d0f13eada0ed..3742e4c85723 100644 --- a/fs/ext4/balloc.c +++ b/fs/ext4/balloc.c @@ -38,8 +38,8 @@ ext4_group_t ext4_get_group_number(struct super_block *sb, ext4_group_t group; if (test_opt2(sb, STD_GROUP_SIZE)) - group = (le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block) + - block) >> + group = (block - + le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block)) >> (EXT4_BLOCK_SIZE_BITS(sb) + EXT4_CLUSTER_BITS(sb) + 3); else ext4_get_group_no_and_offset(sb, block, &group, NULL); diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 94cc84db7c9a..9751333d8030 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -3586,10 +3586,6 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) sbi->s_addr_per_block_bits = ilog2(EXT4_ADDR_PER_BLOCK(sb)); sbi->s_desc_per_block_bits = ilog2(EXT4_DESC_PER_BLOCK(sb)); - /* Do we have standard group size of blocksize * 8 blocks ? */ - if (sbi->s_blocks_per_group == blocksize << 3) - set_opt2(sb, STD_GROUP_SIZE); - for (i = 0; i < 4; i++) sbi->s_hash_seed[i] = le32_to_cpu(es->s_hash_seed[i]); sbi->s_def_hash_version = es->s_def_hash_version; @@ -3659,6 +3655,10 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) goto failed_mount; } + /* Do we have standard group size of clustersize * 8 blocks ? */ + if (sbi->s_blocks_per_group == clustersize << 3) + set_opt2(sb, STD_GROUP_SIZE); + /* * Test whether we have more sectors than will fit in sector_t, * and whether the max offset is addressable by the page cache. From bbb1d9216c9d8f5ac0aca90719512deae7f989ea Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Thu, 11 Jul 2013 18:54:37 -0400 Subject: [PATCH 0077/1368] ext4: don't show usrquota/grpquota twice in /proc/mounts commit ad065dd01662ae22138899e6b1c8eeb3a529964f upstream. We now print mount options in a generic fashion in ext4_show_options(), so we shouldn't be explicitly printing the {usr,grp}quota options in ext4_show_quota_options(). Without this patch, /proc/mounts can look like this: /dev/vdb /vdb ext4 rw,relatime,quota,usrquota,data=ordered,usrquota 0 0 ^^^^^^^^ ^^^^^^^^ Signed-off-by: "Theodore Ts'o" Signed-off-by: Greg Kroah-Hartman --- fs/ext4/super.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 9751333d8030..6681c037b99d 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -1684,12 +1684,6 @@ static inline void ext4_show_quota_options(struct seq_file *seq, if (sbi->s_qf_names[GRPQUOTA]) seq_printf(seq, ",grpjquota=%s", sbi->s_qf_names[GRPQUOTA]); - - if (test_opt(sb, USRQUOTA)) - seq_puts(seq, ",usrquota"); - - if (test_opt(sb, GRPQUOTA)) - seq_puts(seq, ",grpquota"); #endif } From 7557d8d5a46aaa4d0ec98bffdeedf9f3c7464088 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Sat, 13 Jul 2013 00:40:35 -0400 Subject: [PATCH 0078/1368] ext4: don't allow ext4_free_blocks() to fail due to ENOMEM commit e7676a704ee0a1ef71a6b23760b5a8f6896cb1a1 upstream. The filesystem should not be marked inconsistent if ext4_free_blocks() is not able to allocate memory. Unfortunately some callers (most notably ext4_truncate) don't have a way to reflect an error back up to the VFS. And even if we did, most userspace applications won't deal with most system calls returning ENOMEM anyway. Reported-by: Nagachandra P Signed-off-by: "Theodore Ts'o" Signed-off-by: Greg Kroah-Hartman --- fs/ext4/mballoc.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index def84082a9a9..59c6750b894f 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -4735,11 +4735,16 @@ void ext4_free_blocks(handle_t *handle, struct inode *inode, * blocks being freed are metadata. these blocks shouldn't * be used until this transaction is committed */ + retry: new_entry = kmem_cache_alloc(ext4_free_data_cachep, GFP_NOFS); if (!new_entry) { - ext4_mb_unload_buddy(&e4b); - err = -ENOMEM; - goto error_return; + /* + * We use a retry loop because + * ext4_free_blocks() is not allowed to fail. + */ + cond_resched(); + congestion_wait(BLK_RW_ASYNC, HZ/50); + goto retry; } new_entry->efd_start_cluster = bit; new_entry->efd_group = block_group; From c9f3f7f79f3ee852f253bdb1dbbb43d79bcaa6c2 Mon Sep 17 00:00:00 2001 From: Jed Davis Date: Thu, 20 Jun 2013 10:16:29 +0100 Subject: [PATCH 0079/1368] ARM: 7765/1: perf: Record the user-mode PC in the call chain. commit c5f927a6f62196226915f12194c9d0df4e2210d7 upstream. With this change, we no longer lose the innermost entry in the user-mode part of the call chain. See also the x86 port, which includes the ip. It's possible to partially work around this problem by post-processing the data to use the PERF_SAMPLE_IP value, but this works only if the CPU wasn't in the kernel when the sample was taken. Signed-off-by: Jed Davis Signed-off-by: Will Deacon Signed-off-by: Russell King Signed-off-by: Greg Kroah-Hartman --- arch/arm/kernel/perf_event.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm/kernel/perf_event.c b/arch/arm/kernel/perf_event.c index 8c3094d0f7b7..d9f5cd4e533f 100644 --- a/arch/arm/kernel/perf_event.c +++ b/arch/arm/kernel/perf_event.c @@ -569,6 +569,7 @@ perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs) return; } + perf_callchain_store(entry, regs->ARM_pc); tail = (struct frame_tail __user *)regs->ARM_fp - 1; while ((entry->nr < PERF_MAX_STACK_DEPTH) && From b7dc4032cd44843ea93119adb00a5f15b7b05943 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Fri, 21 Jun 2013 12:06:19 +0100 Subject: [PATCH 0080/1368] ARM: 7767/1: let the ASID allocator handle suspended animation commit ae120d9edfe96628f03d87634acda0bfa7110632 upstream. When a CPU is running a process, the ASID for that process is held in a per-CPU variable (the "active ASIDs" array). When the ASID allocator handles a rollover, it copies the active ASIDs into a "reserved ASIDs" array to ensure that a process currently running on another CPU will continue to run unaffected. The active array is zero-ed to indicate that a rollover occurred. Because of this mechanism, a reserved ASID is only remembered for a single rollover. A subsequent rollover will completely refill the reserved ASIDs array. In a severely oversubscribed environment where a CPU can be prevented from running for extended periods of time (think virtual machines), the above has a horrible side effect: [P{a} denotes process P running with ASID a] CPU-0 CPU-1 A{x} [active = ] [suspended] runs B{y} [active = ] [rollover: active = <0 0> reserved = ] runs B{y} [active = <0 y> reserved = ] [rollover: active = <0 0> reserved = <0 y>] runs C{x} [active = <0 x>] [resumes] runs A{x} At that stage, both A and C have the same ASID, with deadly consequences. The fix is to preserve reserved ASIDs across rollovers if the CPU doesn't have an active ASID when the rollover occurs. Acked-by: Will Deacon Acked-by: Catalin Carinas Signed-off-by: Marc Zyngier Signed-off-by: Russell King Signed-off-by: Greg Kroah-Hartman --- arch/arm/mm/context.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/arch/arm/mm/context.c b/arch/arm/mm/context.c index 2ac37372ef52..8e12fcbb2c63 100644 --- a/arch/arm/mm/context.c +++ b/arch/arm/mm/context.c @@ -128,6 +128,15 @@ static void flush_context(unsigned int cpu) asid = 0; } else { asid = atomic64_xchg(&per_cpu(active_asids, i), 0); + /* + * If this CPU has already been through a + * rollover, but hasn't run another task in + * the meantime, we must preserve its reserved + * ASID, as this is the only trace we have of + * the process it is still running. + */ + if (asid == 0) + asid = per_cpu(reserved_asids, i); __set_bit(ASID_TO_IDX(asid), asid_map); } per_cpu(reserved_asids, i) = asid; From 4aa6022129a8b5e0b0e42815521071ce7a766a84 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Fri, 21 Jun 2013 12:06:55 +0100 Subject: [PATCH 0081/1368] ARM: 7768/1: prevent risks of out-of-bound access in ASID allocator commit b8e4a4740fa2b17c0a447b3ab783b3dc10702e27 upstream. On a CPU that never ran anything, both the active and reserved ASID fields are set to zero. In this case the ASID_TO_IDX() macro will return -1, which is not a very useful value to index a bitmap. Instead of trying to offset the ASID so that ASID #1 is actually bit 0 in the asid_map bitmap, just always ignore bit 0 and start the search from bit 1. This makes the code a bit more readable, and without risk of OoB access. Acked-by: Will Deacon Acked-by: Catalin Marinas Reported-by: Catalin Marinas Signed-off-by: Marc Zyngier Signed-off-by: Russell King Signed-off-by: Greg Kroah-Hartman --- arch/arm/mm/context.c | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/arch/arm/mm/context.c b/arch/arm/mm/context.c index 8e12fcbb2c63..83e09058f96f 100644 --- a/arch/arm/mm/context.c +++ b/arch/arm/mm/context.c @@ -39,10 +39,7 @@ * non 64-bit operations. */ #define ASID_FIRST_VERSION (1ULL << ASID_BITS) -#define NUM_USER_ASIDS (ASID_FIRST_VERSION - 1) - -#define ASID_TO_IDX(asid) ((asid & ~ASID_MASK) - 1) -#define IDX_TO_ASID(idx) ((idx + 1) & ~ASID_MASK) +#define NUM_USER_ASIDS ASID_FIRST_VERSION static DEFINE_RAW_SPINLOCK(cpu_asid_lock); static atomic64_t asid_generation = ATOMIC64_INIT(ASID_FIRST_VERSION); @@ -137,7 +134,7 @@ static void flush_context(unsigned int cpu) */ if (asid == 0) asid = per_cpu(reserved_asids, i); - __set_bit(ASID_TO_IDX(asid), asid_map); + __set_bit(asid & ~ASID_MASK, asid_map); } per_cpu(reserved_asids, i) = asid; } @@ -176,17 +173,19 @@ static u64 new_context(struct mm_struct *mm, unsigned int cpu) /* * Allocate a free ASID. If we can't find one, take a * note of the currently active ASIDs and mark the TLBs - * as requiring flushes. + * as requiring flushes. We always count from ASID #1, + * as we reserve ASID #0 to switch via TTBR0 and indicate + * rollover events. */ - asid = find_first_zero_bit(asid_map, NUM_USER_ASIDS); + asid = find_next_zero_bit(asid_map, NUM_USER_ASIDS, 1); if (asid == NUM_USER_ASIDS) { generation = atomic64_add_return(ASID_FIRST_VERSION, &asid_generation); flush_context(cpu); - asid = find_first_zero_bit(asid_map, NUM_USER_ASIDS); + asid = find_next_zero_bit(asid_map, NUM_USER_ASIDS, 1); } __set_bit(asid, asid_map); - asid = generation | IDX_TO_ASID(asid); + asid |= generation; cpumask_clear(mm_cpumask(mm)); } From e6a01df4cd3a38e8dcc05d763c0f793b2f30c5f2 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Fri, 21 Jun 2013 12:07:27 +0100 Subject: [PATCH 0082/1368] ARM: 7769/1: Cortex-A15: fix erratum 798181 implementation commit 0d0752bca1f9a91fb646647aa4abbb21156f316c upstream. Looking into the active_asids array is not enough, as we also need to look into the reserved_asids array (they both represent processes that are currently running). Also, not holding the ASID allocator lock is racy, as another CPU could schedule that process and trigger a rollover, making the erratum workaround miss an IPI. Exposing this outside of context.c is a little ugly on the side, so let's define a new entry point that the erratum workaround can call to obtain the cpumask. Acked-by: Will Deacon Acked-by: Catalin Marinas Signed-off-by: Marc Zyngier Signed-off-by: Russell King Signed-off-by: Greg Kroah-Hartman --- arch/arm/include/asm/mmu_context.h | 10 +++++++++- arch/arm/kernel/smp_tlb.c | 18 ++---------------- arch/arm/mm/context.c | 29 ++++++++++++++++++++++++++++- 3 files changed, 39 insertions(+), 18 deletions(-) diff --git a/arch/arm/include/asm/mmu_context.h b/arch/arm/include/asm/mmu_context.h index a7b85e0d0cc1..dc90203c6ddb 100644 --- a/arch/arm/include/asm/mmu_context.h +++ b/arch/arm/include/asm/mmu_context.h @@ -27,7 +27,15 @@ void __check_vmalloc_seq(struct mm_struct *mm); void check_and_switch_context(struct mm_struct *mm, struct task_struct *tsk); #define init_new_context(tsk,mm) ({ atomic64_set(&mm->context.id, 0); 0; }) -DECLARE_PER_CPU(atomic64_t, active_asids); +#ifdef CONFIG_ARM_ERRATA_798181 +void a15_erratum_get_cpumask(int this_cpu, struct mm_struct *mm, + cpumask_t *mask); +#else /* !CONFIG_ARM_ERRATA_798181 */ +static inline void a15_erratum_get_cpumask(int this_cpu, struct mm_struct *mm, + cpumask_t *mask) +{ +} +#endif /* CONFIG_ARM_ERRATA_798181 */ #else /* !CONFIG_CPU_HAS_ASID */ diff --git a/arch/arm/kernel/smp_tlb.c b/arch/arm/kernel/smp_tlb.c index 9a52a07aa40e..a98b62dca2fa 100644 --- a/arch/arm/kernel/smp_tlb.c +++ b/arch/arm/kernel/smp_tlb.c @@ -103,7 +103,7 @@ static void broadcast_tlb_a15_erratum(void) static void broadcast_tlb_mm_a15_erratum(struct mm_struct *mm) { - int cpu, this_cpu; + int this_cpu; cpumask_t mask = { CPU_BITS_NONE }; if (!erratum_a15_798181()) @@ -111,21 +111,7 @@ static void broadcast_tlb_mm_a15_erratum(struct mm_struct *mm) dummy_flush_tlb_a15_erratum(); this_cpu = get_cpu(); - for_each_online_cpu(cpu) { - if (cpu == this_cpu) - continue; - /* - * We only need to send an IPI if the other CPUs are running - * the same ASID as the one being invalidated. There is no - * need for locking around the active_asids check since the - * switch_mm() function has at least one dmb() (as required by - * this workaround) in case a context switch happens on - * another CPU after the condition below. - */ - if (atomic64_read(&mm->context.id) == - atomic64_read(&per_cpu(active_asids, cpu))) - cpumask_set_cpu(cpu, &mask); - } + a15_erratum_get_cpumask(this_cpu, mm, &mask); smp_call_function_many(&mask, ipi_flush_tlb_a15_erratum, NULL, 1); put_cpu(); } diff --git a/arch/arm/mm/context.c b/arch/arm/mm/context.c index 83e09058f96f..eeab06ebd06e 100644 --- a/arch/arm/mm/context.c +++ b/arch/arm/mm/context.c @@ -45,10 +45,37 @@ static DEFINE_RAW_SPINLOCK(cpu_asid_lock); static atomic64_t asid_generation = ATOMIC64_INIT(ASID_FIRST_VERSION); static DECLARE_BITMAP(asid_map, NUM_USER_ASIDS); -DEFINE_PER_CPU(atomic64_t, active_asids); +static DEFINE_PER_CPU(atomic64_t, active_asids); static DEFINE_PER_CPU(u64, reserved_asids); static cpumask_t tlb_flush_pending; +#ifdef CONFIG_ARM_ERRATA_798181 +void a15_erratum_get_cpumask(int this_cpu, struct mm_struct *mm, + cpumask_t *mask) +{ + int cpu; + unsigned long flags; + u64 context_id, asid; + + raw_spin_lock_irqsave(&cpu_asid_lock, flags); + context_id = mm->context.id.counter; + for_each_online_cpu(cpu) { + if (cpu == this_cpu) + continue; + /* + * We only need to send an IPI if the other CPUs are + * running the same ASID as the one being invalidated. + */ + asid = per_cpu(active_asids, cpu).counter; + if (asid == 0) + asid = per_cpu(reserved_asids, cpu); + if (context_id == asid) + cpumask_set_cpu(cpu, mask); + } + raw_spin_unlock_irqrestore(&cpu_asid_lock, flags); +} +#endif + #ifdef CONFIG_ARM_LPAE static void cpu_set_reserved_ttbr0(void) { From 766f6d2a32311196679b7b8771d60391bc8a4e9b Mon Sep 17 00:00:00 2001 From: Jason Liu Date: Mon, 1 Jul 2013 09:53:30 +0100 Subject: [PATCH 0083/1368] ARM: 7778/1: smp_twd: twd_update_frequency need be run on all online CPUs commit cbbe6f82b489e7ceba4ad7c833bd3a76cd0084cb upstream. When the local timer freq changed, the twd_update_frequency function should be run all the CPUs include itself, otherwise, the twd freq will not get updated and the local timer will not run correcttly. smp_call_function will run functions on all other CPUs, but not include himself, this is not correct,use on_each_cpu instead to fix this issue. Acked-by: Linus Walleij Cc: Linus Walleij Cc: Rob Herring Cc: Shawn Guo Cc: Arnd Bergmann Acked-by: Shawn Guo Signed-off-by: Jason Liu Signed-off-by: Russell King Signed-off-by: Greg Kroah-Hartman --- arch/arm/kernel/smp_twd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/kernel/smp_twd.c b/arch/arm/kernel/smp_twd.c index 90525d9d290b..f6fd1d4398c6 100644 --- a/arch/arm/kernel/smp_twd.c +++ b/arch/arm/kernel/smp_twd.c @@ -120,7 +120,7 @@ static int twd_rate_change(struct notifier_block *nb, * changing cpu. */ if (flags == POST_RATE_CHANGE) - smp_call_function(twd_update_frequency, + on_each_cpu(twd_update_frequency, (void *)&cnd->new_rate, 1); return NOTIFY_OK; From 74d22408355c5fafd6d6e4bd79b7aeeee0d845a9 Mon Sep 17 00:00:00 2001 From: Lorenzo Pieralisi Date: Thu, 18 Apr 2013 18:34:06 +0100 Subject: [PATCH 0084/1368] ARM: dts: imx: cpus/cpu nodes dts updates commit 7925e89f54fc49bcd1e73f0a65c4a3eb35b9cfb1 upstream. This patch updates the in-kernel dts files according to the latest cpus and cpu bindings updates for ARM. Signed-off-by: Lorenzo Pieralisi Acked-by: Shawn Guo Signed-off-by: Greg Kroah-Hartman --- arch/arm/boot/dts/imx23.dtsi | 8 ++++++-- arch/arm/boot/dts/imx28.dtsi | 8 ++++++-- arch/arm/boot/dts/imx6dl.dtsi | 2 ++ arch/arm/boot/dts/imx6q.dtsi | 4 ++++ 4 files changed, 18 insertions(+), 4 deletions(-) diff --git a/arch/arm/boot/dts/imx23.dtsi b/arch/arm/boot/dts/imx23.dtsi index 73fd7d0887b5..587ceef81e45 100644 --- a/arch/arm/boot/dts/imx23.dtsi +++ b/arch/arm/boot/dts/imx23.dtsi @@ -23,8 +23,12 @@ aliases { }; cpus { - cpu@0 { - compatible = "arm,arm926ejs"; + #address-cells = <0>; + #size-cells = <0>; + + cpu { + compatible = "arm,arm926ej-s"; + device_type = "cpu"; }; }; diff --git a/arch/arm/boot/dts/imx28.dtsi b/arch/arm/boot/dts/imx28.dtsi index 600f7cb51f3e..4c10a1968c0e 100644 --- a/arch/arm/boot/dts/imx28.dtsi +++ b/arch/arm/boot/dts/imx28.dtsi @@ -32,8 +32,12 @@ aliases { }; cpus { - cpu@0 { - compatible = "arm,arm926ejs"; + #address-cells = <0>; + #size-cells = <0>; + + cpu { + compatible = "arm,arm926ej-s"; + device_type = "cpu"; }; }; diff --git a/arch/arm/boot/dts/imx6dl.dtsi b/arch/arm/boot/dts/imx6dl.dtsi index 5bcdf3a90bb3..62dc78126795 100644 --- a/arch/arm/boot/dts/imx6dl.dtsi +++ b/arch/arm/boot/dts/imx6dl.dtsi @@ -18,12 +18,14 @@ cpus { cpu@0 { compatible = "arm,cortex-a9"; + device_type = "cpu"; reg = <0>; next-level-cache = <&L2>; }; cpu@1 { compatible = "arm,cortex-a9"; + device_type = "cpu"; reg = <1>; next-level-cache = <&L2>; }; diff --git a/arch/arm/boot/dts/imx6q.dtsi b/arch/arm/boot/dts/imx6q.dtsi index 21e675848bd1..dc54a72a3bcd 100644 --- a/arch/arm/boot/dts/imx6q.dtsi +++ b/arch/arm/boot/dts/imx6q.dtsi @@ -18,6 +18,7 @@ cpus { cpu@0 { compatible = "arm,cortex-a9"; + device_type = "cpu"; reg = <0>; next-level-cache = <&L2>; operating-points = < @@ -39,18 +40,21 @@ cpu@0 { cpu@1 { compatible = "arm,cortex-a9"; + device_type = "cpu"; reg = <1>; next-level-cache = <&L2>; }; cpu@2 { compatible = "arm,cortex-a9"; + device_type = "cpu"; reg = <2>; next-level-cache = <&L2>; }; cpu@3 { compatible = "arm,cortex-a9"; + device_type = "cpu"; reg = <3>; next-level-cache = <&L2>; }; From b0edd4bfb0850da39eb6a07aa3a87c526d4ca83b Mon Sep 17 00:00:00 2001 From: Takanari Hayama Date: Mon, 1 Jul 2013 16:38:53 +0900 Subject: [PATCH 0085/1368] ARM: shmobile: r8a73a4: Fix resources for SCIFB0 commit f820b60582f75e73e83b8505d7e48fe59770f558 upstream. Fix base address and IRQ resources associated with SCIFB0. This bug was introduced by e481a528901d0cd18b5b5fcbdc55207ea3b6ef68 ("ARM: shmobile: r8a73a4 SCIF support V3") which was included in v3.10. Signed-off-by: Takanari Hayama Acked-by: Magnus Damm [ horms+renesas@verge.net.au: Add information about commit and version this bug was added in ] Signed-off-by: Simon Horman Signed-off-by: Greg Kroah-Hartman --- arch/arm/mach-shmobile/setup-r8a73a4.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/mach-shmobile/setup-r8a73a4.c b/arch/arm/mach-shmobile/setup-r8a73a4.c index c5a75a7a508f..7f45c2edbca9 100644 --- a/arch/arm/mach-shmobile/setup-r8a73a4.c +++ b/arch/arm/mach-shmobile/setup-r8a73a4.c @@ -62,7 +62,7 @@ enum { SCIFA0, SCIFA1, SCIFB0, SCIFB1, SCIFB2, SCIFB3 }; static const struct plat_sci_port scif[] = { SCIFA_DATA(SCIFA0, 0xe6c40000, gic_spi(144)), /* SCIFA0 */ SCIFA_DATA(SCIFA1, 0xe6c50000, gic_spi(145)), /* SCIFA1 */ - SCIFB_DATA(SCIFB0, 0xe6c50000, gic_spi(145)), /* SCIFB0 */ + SCIFB_DATA(SCIFB0, 0xe6c20000, gic_spi(148)), /* SCIFB0 */ SCIFB_DATA(SCIFB1, 0xe6c30000, gic_spi(149)), /* SCIFB1 */ SCIFB_DATA(SCIFB2, 0xe6ce0000, gic_spi(150)), /* SCIFB2 */ SCIFB_DATA(SCIFB3, 0xe6cf0000, gic_spi(151)), /* SCIFB3 */ From 65e9f76985abdf9198deae2231648d7e5408caf0 Mon Sep 17 00:00:00 2001 From: Magnus Damm Date: Mon, 1 Jul 2013 20:48:07 +0900 Subject: [PATCH 0086/1368] ARM: shmobile: emev2 GIO3 resource fix commit 1eb14ea1e6bcd11d6d0ba937fc39808bb4d3453e upstream. Fix GIO3 base addresses for EMEV2. This bug was introduced by 088efd9273b5076a0aead479aa31f1066d182b3e ("mach-shmobile: Emma Mobile EV2 GPIO support V3") which was included in v3.5. Signed-off-by: Magnus Damm Signed-off-by: Simon Horman Signed-off-by: Greg Kroah-Hartman --- arch/arm/mach-shmobile/setup-emev2.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/arm/mach-shmobile/setup-emev2.c b/arch/arm/mach-shmobile/setup-emev2.c index 899a86c31ec9..1ccddd228112 100644 --- a/arch/arm/mach-shmobile/setup-emev2.c +++ b/arch/arm/mach-shmobile/setup-emev2.c @@ -287,14 +287,14 @@ static struct gpio_em_config gio3_config = { static struct resource gio3_resources[] = { [0] = { .name = "GIO_096", - .start = 0xe0050100, - .end = 0xe005012b, + .start = 0xe0050180, + .end = 0xe00501ab, .flags = IORESOURCE_MEM, }, [1] = { .name = "GIO_096", - .start = 0xe0050140, - .end = 0xe005015f, + .start = 0xe00501c0, + .end = 0xe00501df, .flags = IORESOURCE_MEM, }, [2] = { From 9131e72c764bd7ea9554392edab26a368a765c4d Mon Sep 17 00:00:00 2001 From: Russell King Date: Tue, 9 Jul 2013 09:52:55 +0100 Subject: [PATCH 0087/1368] ARM: mm: fix boot on SA1110 Assabet commit 319e0b4f02f73983c03a2ca38595fc6367929edf upstream. Commit 83db0384 (mm/ARM: use common help functions to free reserved pages) broke booting on the Assabet by trying to convert a PFN to a virtual address using the __va() macro. This macro takes the physical address, not a PFN. Fix this. Signed-off-by: Russell King Signed-off-by: Greg Kroah-Hartman --- arch/arm/mm/init.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/mm/init.c b/arch/arm/mm/init.c index 9a5cdc01fcdf..0ecc43fd6229 100644 --- a/arch/arm/mm/init.c +++ b/arch/arm/mm/init.c @@ -600,7 +600,7 @@ void __init mem_init(void) #ifdef CONFIG_SA1111 /* now that our DMA memory is actually so designated, we can free it */ - free_reserved_area(__va(PHYS_PFN_OFFSET), swapper_pg_dir, 0, NULL); + free_reserved_area(__va(PHYS_OFFSET), swapper_pg_dir, 0, NULL); #endif free_highpages(); From c14a4eaf8a43a295176942c7a43a999b4557f7c3 Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Wed, 3 Jul 2013 15:00:43 -0700 Subject: [PATCH 0088/1368] drivers/dma/pl330.c: fix locking in pl330_free_chan_resources() commit da331ba8e9c5de72a27e50f71105395bba6eebe0 upstream. tasklet_kill() may sleep so call it before taking pch->lock. Fixes following lockup: BUG: scheduling while atomic: cat/2383/0x00000002 Modules linked in: unwind_backtrace+0x0/0xfc __schedule_bug+0x4c/0x58 __schedule+0x690/0x6e0 sys_sched_yield+0x70/0x78 tasklet_kill+0x34/0x8c pl330_free_chan_resources+0x24/0x88 dma_chan_put+0x4c/0x50 [...] BUG: spinlock lockup suspected on CPU#0, swapper/0/0 lock: 0xe52aa04c, .magic: dead4ead, .owner: cat/2383, .owner_cpu: 1 unwind_backtrace+0x0/0xfc do_raw_spin_lock+0x194/0x204 _raw_spin_lock_irqsave+0x20/0x28 pl330_tasklet+0x2c/0x5a8 tasklet_action+0xfc/0x114 __do_softirq+0xe4/0x19c irq_exit+0x98/0x9c handle_IPI+0x124/0x16c gic_handle_irq+0x64/0x68 __irq_svc+0x40/0x70 cpuidle_wrap_enter+0x4c/0xa0 cpuidle_enter_state+0x18/0x68 cpuidle_idle_call+0xac/0xe0 cpu_idle+0xac/0xf0 Signed-off-by: Bartlomiej Zolnierkiewicz Signed-off-by: Kyungmin Park Acked-by: Jassi Brar Cc: Vinod Koul Cc: Tomasz Figa Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- drivers/dma/pl330.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/dma/pl330.c b/drivers/dma/pl330.c index a17553f7c028..7ec82f0667eb 100644 --- a/drivers/dma/pl330.c +++ b/drivers/dma/pl330.c @@ -2485,10 +2485,10 @@ static void pl330_free_chan_resources(struct dma_chan *chan) struct dma_pl330_chan *pch = to_pchan(chan); unsigned long flags; - spin_lock_irqsave(&pch->lock, flags); - tasklet_kill(&pch->task); + spin_lock_irqsave(&pch->lock, flags); + pl330_release_channel(pch->pl330_chid); pch->pl330_chid = NULL; From 71a429cf00c7ff8da3fc3356617987e746055c00 Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Mon, 8 Jul 2013 16:00:29 -0700 Subject: [PATCH 0089/1368] memcg, kmem: fix reference count handling on the error path commit f37a96914d1aea10fed8d9af10251f0b9caea31b upstream. mem_cgroup_css_online calls mem_cgroup_put if memcg_init_kmem fails. This is not correct because only memcg_propagate_kmem takes an additional reference while mem_cgroup_sockets_init is allowed to fail as well (although no current implementation fails) but it doesn't take any reference. This all suggests that it should be memcg_propagate_kmem that should clean up after itself so this patch moves mem_cgroup_put over there. Unfortunately this is not that easy (as pointed out by Li Zefan) because memcg_kmem_mark_dead marks the group dead (KMEM_ACCOUNTED_DEAD) if it is marked active (KMEM_ACCOUNTED_ACTIVE) which is the case even if memcg_propagate_kmem fails so the additional reference is dropped in that case in kmem_cgroup_destroy which means that the reference would be dropped two times. The easiest way then would be to simply remove mem_cgrroup_put from mem_cgroup_css_online and rely on kmem_cgroup_destroy doing the right thing. Signed-off-by: Michal Hocko Signed-off-by: Li Zefan Acked-by: KAMEZAWA Hiroyuki Cc: Hugh Dickins Cc: Tejun Heo Cc: Glauber Costa Cc: Johannes Weiner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- mm/memcontrol.c | 8 -------- 1 file changed, 8 deletions(-) diff --git a/mm/memcontrol.c b/mm/memcontrol.c index fd79df5d3152..15b040904dc3 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -6296,14 +6296,6 @@ mem_cgroup_css_online(struct cgroup *cont) error = memcg_init_kmem(memcg, &mem_cgroup_subsys); mutex_unlock(&memcg_create_mutex); - if (error) { - /* - * We call put now because our (and parent's) refcnts - * are already in place. mem_cgroup_put() will internally - * call __mem_cgroup_free, so return directly - */ - mem_cgroup_put(memcg); - } return error; } From 7642257802319a58c118649e20ad5c7ee0d663f3 Mon Sep 17 00:00:00 2001 From: Wanpeng Li Date: Wed, 3 Jul 2013 15:02:40 -0700 Subject: [PATCH 0090/1368] mm/memory-hotplug: fix lowmem count overflow when offline pages commit cea27eb2a202959783f81254c48c250ddd80e129 upstream. The logic for the memory-remove code fails to correctly account the Total High Memory when a memory block which contains High Memory is offlined as shown in the example below. The following patch fixes it. Before logic memory remove: MemTotal: 7603740 kB MemFree: 6329612 kB Buffers: 94352 kB Cached: 872008 kB SwapCached: 0 kB Active: 626932 kB Inactive: 519216 kB Active(anon): 180776 kB Inactive(anon): 222944 kB Active(file): 446156 kB Inactive(file): 296272 kB Unevictable: 0 kB Mlocked: 0 kB HighTotal: 7294672 kB HighFree: 5704696 kB LowTotal: 309068 kB LowFree: 624916 kB After logic memory remove: MemTotal: 7079452 kB MemFree: 5805976 kB Buffers: 94372 kB Cached: 872000 kB SwapCached: 0 kB Active: 626936 kB Inactive: 519236 kB Active(anon): 180780 kB Inactive(anon): 222944 kB Active(file): 446156 kB Inactive(file): 296292 kB Unevictable: 0 kB Mlocked: 0 kB HighTotal: 7294672 kB HighFree: 5181024 kB LowTotal: 4294752076 kB LowFree: 624952 kB [mhocko@suse.cz: fix CONFIG_HIGHMEM=n build] Signed-off-by: Wanpeng Li Reviewed-by: Michal Hocko Cc: KAMEZAWA Hiroyuki Cc: David Rientjes Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- mm/page_alloc.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/mm/page_alloc.c b/mm/page_alloc.c index c3edb624fccf..2ee0fd313f03 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -6142,6 +6142,10 @@ __offline_isolated_pages(unsigned long start_pfn, unsigned long end_pfn) list_del(&page->lru); rmv_page_order(page); zone->free_area[order].nr_free--; +#ifdef CONFIG_HIGHMEM + if (PageHighMem(page)) + totalhigh_pages -= 1 << order; +#endif for (i = 0; i < (1 << order); i++) SetPageReserved((page+i)); pfn += (1 << order); From e885d8b8a8ff435d4fcb66c7376e349c4658621d Mon Sep 17 00:00:00 2001 From: Steve French Date: Tue, 25 Jun 2013 14:03:16 -0500 Subject: [PATCH 0091/1368] Handle big endianness in NTLM (ntlmv2) authentication commit fdf96a907c1fbb93c633e2b7ede3b8df26d6a4c0 upstream. This is RH bug 970891 Uppercasing of username during calculation of ntlmv2 hash fails because UniStrupr function does not handle big endian wchars. Also fix a comment in the same code to reflect its correct usage. [To make it easier for stable (rather than require 2nd patch) fixed this patch of Shirish's to remove endian warning generated by sparse -- steve f.] Reported-by: steve Signed-off-by: Shirish Pargaonkar Reviewed-by: Jeff Layton Signed-off-by: Steve French Signed-off-by: Greg Kroah-Hartman --- fs/cifs/cifs_unicode.h | 8 ++++---- fs/cifs/cifsencrypt.c | 6 +++--- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/fs/cifs/cifs_unicode.h b/fs/cifs/cifs_unicode.h index 4fb097468e21..fe8d6276410a 100644 --- a/fs/cifs/cifs_unicode.h +++ b/fs/cifs/cifs_unicode.h @@ -327,14 +327,14 @@ UniToupper(register wchar_t uc) /* * UniStrupr: Upper case a unicode string */ -static inline wchar_t * -UniStrupr(register wchar_t *upin) +static inline __le16 * +UniStrupr(register __le16 *upin) { - register wchar_t *up; + register __le16 *up; up = upin; while (*up) { /* For all characters */ - *up = UniToupper(*up); + *up = cpu_to_le16(UniToupper(le16_to_cpu(*up))); up++; } return upin; /* Return input pointer */ diff --git a/fs/cifs/cifsencrypt.c b/fs/cifs/cifsencrypt.c index 71436d1fca13..f59d0d58258e 100644 --- a/fs/cifs/cifsencrypt.c +++ b/fs/cifs/cifsencrypt.c @@ -414,7 +414,7 @@ static int calc_ntlmv2_hash(struct cifs_ses *ses, char *ntlmv2_hash, int rc = 0; int len; char nt_hash[CIFS_NTHASH_SIZE]; - wchar_t *user; + __le16 *user; wchar_t *domain; wchar_t *server; @@ -439,7 +439,7 @@ static int calc_ntlmv2_hash(struct cifs_ses *ses, char *ntlmv2_hash, return rc; } - /* convert ses->user_name to unicode and uppercase */ + /* convert ses->user_name to unicode */ len = ses->user_name ? strlen(ses->user_name) : 0; user = kmalloc(2 + (len * 2), GFP_KERNEL); if (user == NULL) { @@ -448,7 +448,7 @@ static int calc_ntlmv2_hash(struct cifs_ses *ses, char *ntlmv2_hash, } if (len) { - len = cifs_strtoUTF16((__le16 *)user, ses->user_name, len, nls_cp); + len = cifs_strtoUTF16(user, ses->user_name, len, nls_cp); UniStrupr(user); } else { memset(user, '\0', 2); From 4791df938fe7aa27a9fe50e4582efd583a045541 Mon Sep 17 00:00:00 2001 From: Richard Genoud Date: Tue, 2 Apr 2013 12:24:37 +0200 Subject: [PATCH 0092/1368] UBIFS: correct mount message commit beadadfa5467e09e36891f39cae1f5d8d3bbf17e upstream. When mounting an UBIFS R/W volume, we have the message: UBIFS: mounted UBI device 0, volume 1, name "rootfs"(null) With this patch, we'll have: UBIFS: mounted UBI device 0, volume 1, name "rootfs" Which is, I think, what was intended. Signed-off-by: Richard Genoud Signed-off-by: Artem Bityutskiy Signed-off-by: Greg Kroah-Hartman --- fs/ubifs/super.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c index f21acf0ef01f..879b9976c12b 100644 --- a/fs/ubifs/super.c +++ b/fs/ubifs/super.c @@ -1412,7 +1412,7 @@ static int mount_ubifs(struct ubifs_info *c) ubifs_msg("mounted UBI device %d, volume %d, name \"%s\"%s", c->vi.ubi_num, c->vi.vol_id, c->vi.name, - c->ro_mount ? ", R/O mode" : NULL); + c->ro_mount ? ", R/O mode" : ""); x = (long long)c->main_lebs * c->leb_size; y = (long long)c->log_lebs * c->leb_size + c->max_bud_bytes; ubifs_msg("LEB size: %d bytes (%d KiB), min./max. I/O unit sizes: %d bytes/%d bytes", From 0f4a56e16d5fc9028b62ba529177a3109513e111 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Sun, 21 Jul 2013 18:23:38 -0700 Subject: [PATCH 0093/1368] Linux 3.10.2 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index b75cc30af7bb..43367309fc78 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,6 @@ VERSION = 3 PATCHLEVEL = 10 -SUBLEVEL = 1 +SUBLEVEL = 2 EXTRAVERSION = NAME = Unicycling Gorilla From b101957a5871ec616e18a8a6f0330d0e06a05754 Mon Sep 17 00:00:00 2001 From: Michael Neuling Date: Mon, 24 Jun 2013 15:47:22 +1000 Subject: [PATCH 0094/1368] powerpc/hw_brk: Fix setting of length for exact mode breakpoints commit b0b0aa9c7faf94e92320eabd8a1786c7747e40a8 upstream. The smallest match region for both the DABR and DAWR is 8 bytes, so the kernel needs to filter matches when users want to look at regions smaller than this. Currently we set the length of PPC_BREAKPOINT_MODE_EXACT breakpoints to 8. This is wrong as in exact mode we should only match on 1 address, hence the length should be 1. This ensures that the kernel will filter out any exact mode hardware breakpoint matches on any addresses other than the requested one. Signed-off-by: Michael Neuling Reported-by: Edjunior Barbosa Machado Signed-off-by: Benjamin Herrenschmidt Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/kernel/ptrace.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/kernel/ptrace.c b/arch/powerpc/kernel/ptrace.c index 98c2fc198712..64f7bd5b1b0f 100644 --- a/arch/powerpc/kernel/ptrace.c +++ b/arch/powerpc/kernel/ptrace.c @@ -1449,7 +1449,9 @@ static long ppc_set_hwdebug(struct task_struct *child, */ if (bp_info->addr_mode == PPC_BREAKPOINT_MODE_RANGE_INCLUSIVE) { len = bp_info->addr2 - bp_info->addr; - } else if (bp_info->addr_mode != PPC_BREAKPOINT_MODE_EXACT) { + } else if (bp_info->addr_mode == PPC_BREAKPOINT_MODE_EXACT) + len = 1; + else { ptrace_put_breakpoints(child); return -EINVAL; } From 277b5ae153a96d594ae6d61b244b936d6f77ff56 Mon Sep 17 00:00:00 2001 From: Michael Neuling Date: Mon, 24 Jun 2013 15:47:23 +1000 Subject: [PATCH 0095/1368] powerpc/hw_brk: Fix clearing of extraneous IRQ commit 540e07c67efe42ef6b6be4f1956931e676d58a15 upstream. In 9422de3 "powerpc: Hardware breakpoints rewrite to handle non DABR breakpoint registers" we changed the way we mark extraneous irqs with this: - info->extraneous_interrupt = !((bp->attr.bp_addr <= dar) && - (dar - bp->attr.bp_addr < bp->attr.bp_len)); + if (!((bp->attr.bp_addr <= dar) && + (dar - bp->attr.bp_addr < bp->attr.bp_len))) + info->type |= HW_BRK_TYPE_EXTRANEOUS_IRQ; Unfortunately this is bogus as it never clears extraneous IRQ if it's already set. This correctly clears extraneous IRQ before possibly setting it. Signed-off-by: Michael Neuling Reported-by: Edjunior Barbosa Machado Signed-off-by: Benjamin Herrenschmidt Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/kernel/hw_breakpoint.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/powerpc/kernel/hw_breakpoint.c b/arch/powerpc/kernel/hw_breakpoint.c index a949bdfc9623..1150ae7c22c3 100644 --- a/arch/powerpc/kernel/hw_breakpoint.c +++ b/arch/powerpc/kernel/hw_breakpoint.c @@ -250,6 +250,7 @@ int __kprobes hw_breakpoint_handler(struct die_args *args) * we still need to single-step the instruction, but we don't * generate an event. */ + info->type &= ~HW_BRK_TYPE_EXTRANEOUS_IRQ; if (!((bp->attr.bp_addr <= dar) && (dar - bp->attr.bp_addr < bp->attr.bp_len))) info->type |= HW_BRK_TYPE_EXTRANEOUS_IRQ; From 3b743326ed2edd5e118950874f7bdaed5759f977 Mon Sep 17 00:00:00 2001 From: Michael Neuling Date: Mon, 1 Jul 2013 14:19:50 +1000 Subject: [PATCH 0096/1368] powerpc/hw_brk: Fix off by one error when validating DAWR region end commit e2a800beaca1f580945773e57d1a0e7cd37b1056 upstream. The Data Address Watchpoint Register (DAWR) on POWER8 can take a 512 byte range but this range must not cross a 512 byte boundary. Unfortunately we were off by one when calculating the end of the region, hence we were not allowing some breakpoint regions which were actually valid. This fixes this error. Signed-off-by: Michael Neuling Reported-by: Edjunior Barbosa Machado Signed-off-by: Benjamin Herrenschmidt Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/kernel/hw_breakpoint.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/kernel/hw_breakpoint.c b/arch/powerpc/kernel/hw_breakpoint.c index 1150ae7c22c3..f0b47d1a6b0e 100644 --- a/arch/powerpc/kernel/hw_breakpoint.c +++ b/arch/powerpc/kernel/hw_breakpoint.c @@ -176,7 +176,7 @@ int arch_validate_hwbkpt_settings(struct perf_event *bp) length_max = 512 ; /* 64 doublewords */ /* DAWR region can't cross 512 boundary */ if ((bp->attr.bp_addr >> 10) != - ((bp->attr.bp_addr + bp->attr.bp_len) >> 10)) + ((bp->attr.bp_addr + bp->attr.bp_len - 1) >> 10)) return -EINVAL; } if (info->len > From e544a74525b9accf4919822196f29cd967e7e5c0 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Mon, 1 Jul 2013 17:54:09 +1000 Subject: [PATCH 0097/1368] powerpc/powernv: Fix iommu initialization again commit 74251fe21bfa9310ddba9e0436d1fcf389e602ee upstream. So because those things always end up in trainwrecks... In 7846de406 we moved back the iommu initialization earlier, essentially undoing 37f02195b which was causing us endless trouble... except that in the meantime we had merged 959c9bdd58 (to workaround the original breakage) which is now ... broken :-) This fixes it by doing a partial revert of the latter (we keep the ppc_md. path which will be needed in the hotplug case, which happens also during some EEH error recovery situations). Signed-off-by: Benjamin Herrenschmidt Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/platforms/powernv/pci-ioda.c | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index 9c9d15e4cdf2..7816beff1db8 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -441,6 +441,17 @@ static void pnv_pci_ioda_dma_dev_setup(struct pnv_phb *phb, struct pci_dev *pdev set_iommu_table_base(&pdev->dev, &pe->tce32_table); } +static void pnv_ioda_setup_bus_dma(struct pnv_ioda_pe *pe, struct pci_bus *bus) +{ + struct pci_dev *dev; + + list_for_each_entry(dev, &bus->devices, bus_list) { + set_iommu_table_base(&dev->dev, &pe->tce32_table); + if (dev->subordinate) + pnv_ioda_setup_bus_dma(pe, dev->subordinate); + } +} + static void pnv_pci_ioda1_tce_invalidate(struct iommu_table *tbl, u64 *startp, u64 *endp) { @@ -596,6 +607,11 @@ static void pnv_pci_ioda_setup_dma_pe(struct pnv_phb *phb, } iommu_init_table(tbl, phb->hose->node); + if (pe->pdev) + set_iommu_table_base(&pe->pdev->dev, tbl); + else + pnv_ioda_setup_bus_dma(pe, pe->pbus); + return; fail: /* XXX Failure: Try to fallback to 64-bit only ? */ @@ -667,6 +683,11 @@ static void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb *phb, } iommu_init_table(tbl, phb->hose->node); + if (pe->pdev) + set_iommu_table_base(&pe->pdev->dev, tbl); + else + pnv_ioda_setup_bus_dma(pe, pe->pbus); + return; fail: if (pe->tce32_seg >= 0) From d6ea4422c89da1b08029d4db10635942a3b29b2a Mon Sep 17 00:00:00 2001 From: Michael Neuling Date: Sun, 9 Jun 2013 21:23:15 +1000 Subject: [PATCH 0098/1368] powerpc/tm: Fix writing top half of MSR on 32 bit signals commit 1d25f11fdbcc5390d68efd98c28900bfd29b264c upstream. The MSR TM controls are in the top 32 bits of the MSR hence on 32 bit signals, we stick the top half of the MSR in the checkpointed signal context so that the user can access it. Unfortunately, we don't currently write anything to the checkpointed signal context when coming in a from a non transactional process and hence the top MSR bits can contain junk. This updates the 32 bit signal handling code to always write something to the top MSR bits so that users know if the process is transactional or not and the kernel can use it on signal return. Signed-off-by: Michael Neuling Signed-off-by: Benjamin Herrenschmidt Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/kernel/signal_32.c | 29 +++++++++++++++++++++-------- 1 file changed, 21 insertions(+), 8 deletions(-) diff --git a/arch/powerpc/kernel/signal_32.c b/arch/powerpc/kernel/signal_32.c index 201385c3a1ae..5bc819f50af6 100644 --- a/arch/powerpc/kernel/signal_32.c +++ b/arch/powerpc/kernel/signal_32.c @@ -407,7 +407,8 @@ inline unsigned long copy_transact_fpr_from_user(struct task_struct *task, * altivec/spe instructions at some point. */ static int save_user_regs(struct pt_regs *regs, struct mcontext __user *frame, - int sigret, int ctx_has_vsx_region) + struct mcontext __user *tm_frame, int sigret, + int ctx_has_vsx_region) { unsigned long msr = regs->msr; @@ -475,6 +476,12 @@ static int save_user_regs(struct pt_regs *regs, struct mcontext __user *frame, if (__put_user(msr, &frame->mc_gregs[PT_MSR])) return 1; + /* We need to write 0 the MSR top 32 bits in the tm frame so that we + * can check it on the restore to see if TM is active + */ + if (tm_frame && __put_user(0, &tm_frame->mc_gregs[PT_MSR])) + return 1; + if (sigret) { /* Set up the sigreturn trampoline: li r0,sigret; sc */ if (__put_user(0x38000000UL + sigret, &frame->tramp[0]) @@ -952,6 +959,7 @@ int handle_rt_signal32(unsigned long sig, struct k_sigaction *ka, { struct rt_sigframe __user *rt_sf; struct mcontext __user *frame; + struct mcontext __user *tm_frame = NULL; void __user *addr; unsigned long newsp = 0; int sigret; @@ -985,23 +993,24 @@ int handle_rt_signal32(unsigned long sig, struct k_sigaction *ka, } #ifdef CONFIG_PPC_TRANSACTIONAL_MEM + tm_frame = &rt_sf->uc_transact.uc_mcontext; if (MSR_TM_ACTIVE(regs->msr)) { - if (save_tm_user_regs(regs, &rt_sf->uc.uc_mcontext, - &rt_sf->uc_transact.uc_mcontext, sigret)) + if (save_tm_user_regs(regs, frame, tm_frame, sigret)) goto badframe; } else #endif - if (save_user_regs(regs, frame, sigret, 1)) + { + if (save_user_regs(regs, frame, tm_frame, sigret, 1)) goto badframe; + } regs->link = tramp; #ifdef CONFIG_PPC_TRANSACTIONAL_MEM if (MSR_TM_ACTIVE(regs->msr)) { if (__put_user((unsigned long)&rt_sf->uc_transact, &rt_sf->uc.uc_link) - || __put_user(to_user_ptr(&rt_sf->uc_transact.uc_mcontext), - &rt_sf->uc_transact.uc_regs)) + || __put_user((unsigned long)tm_frame, &rt_sf->uc_transact.uc_regs)) goto badframe; } else @@ -1170,7 +1179,7 @@ long sys_swapcontext(struct ucontext __user *old_ctx, mctx = (struct mcontext __user *) ((unsigned long) &old_ctx->uc_mcontext & ~0xfUL); if (!access_ok(VERIFY_WRITE, old_ctx, ctx_size) - || save_user_regs(regs, mctx, 0, ctx_has_vsx_region) + || save_user_regs(regs, mctx, NULL, 0, ctx_has_vsx_region) || put_sigset_t(&old_ctx->uc_sigmask, ¤t->blocked) || __put_user(to_user_ptr(mctx), &old_ctx->uc_regs)) return -EFAULT; @@ -1392,6 +1401,7 @@ int handle_signal32(unsigned long sig, struct k_sigaction *ka, { struct sigcontext __user *sc; struct sigframe __user *frame; + struct mcontext __user *tm_mctx = NULL; unsigned long newsp = 0; int sigret; unsigned long tramp; @@ -1425,6 +1435,7 @@ int handle_signal32(unsigned long sig, struct k_sigaction *ka, } #ifdef CONFIG_PPC_TRANSACTIONAL_MEM + tm_mctx = &frame->mctx_transact; if (MSR_TM_ACTIVE(regs->msr)) { if (save_tm_user_regs(regs, &frame->mctx, &frame->mctx_transact, sigret)) @@ -1432,8 +1443,10 @@ int handle_signal32(unsigned long sig, struct k_sigaction *ka, } else #endif - if (save_user_regs(regs, &frame->mctx, sigret, 1)) + { + if (save_user_regs(regs, &frame->mctx, tm_mctx, sigret, 1)) goto badframe; + } regs->link = tramp; From 743834135bc07e3bb9d6166607b874443fe1537c Mon Sep 17 00:00:00 2001 From: Michael Neuling Date: Sun, 9 Jun 2013 21:23:16 +1000 Subject: [PATCH 0099/1368] powerpc/tm: Fix 32 bit non-rt signals commit fee55450710dff32a13ae30b4129ec7b5a4b44d0 upstream. Currently sys_sigreturn() is TM unaware. Therefore, if we take a 32 bit signal without SIGINFO (non RT) inside a transaction, on signal return we don't restore the signal frame correctly. This checks if the signal frame being restoring is an active transaction, and if so, it copies the additional state to ptregs so it can be restored. Signed-off-by: Michael Neuling Signed-off-by: Benjamin Herrenschmidt Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/kernel/signal_32.c | 30 +++++++++++++++++++++++++----- 1 file changed, 25 insertions(+), 5 deletions(-) diff --git a/arch/powerpc/kernel/signal_32.c b/arch/powerpc/kernel/signal_32.c index 5bc819f50af6..fa81462f6987 100644 --- a/arch/powerpc/kernel/signal_32.c +++ b/arch/powerpc/kernel/signal_32.c @@ -1494,16 +1494,22 @@ int handle_signal32(unsigned long sig, struct k_sigaction *ka, long sys_sigreturn(int r3, int r4, int r5, int r6, int r7, int r8, struct pt_regs *regs) { + struct sigframe __user *sf; struct sigcontext __user *sc; struct sigcontext sigctx; struct mcontext __user *sr; void __user *addr; sigset_t set; +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM + struct mcontext __user *mcp, *tm_mcp; + unsigned long msr_hi; +#endif /* Always make any pending restarted system calls return -EINTR */ current_thread_info()->restart_block.fn = do_no_restart_syscall; - sc = (struct sigcontext __user *)(regs->gpr[1] + __SIGNAL_FRAMESIZE); + sf = (struct sigframe __user *)(regs->gpr[1] + __SIGNAL_FRAMESIZE); + sc = &sf->sctx; addr = sc; if (copy_from_user(&sigctx, sc, sizeof(sigctx))) goto badframe; @@ -1520,11 +1526,25 @@ long sys_sigreturn(int r3, int r4, int r5, int r6, int r7, int r8, #endif set_current_blocked(&set); - sr = (struct mcontext __user *)from_user_ptr(sigctx.regs); - addr = sr; - if (!access_ok(VERIFY_READ, sr, sizeof(*sr)) - || restore_user_regs(regs, sr, 1)) +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM + mcp = (struct mcontext __user *)&sf->mctx; + tm_mcp = (struct mcontext __user *)&sf->mctx_transact; + if (__get_user(msr_hi, &tm_mcp->mc_gregs[PT_MSR])) goto badframe; + if (MSR_TM_ACTIVE(msr_hi<<32)) { + if (!cpu_has_feature(CPU_FTR_TM)) + goto badframe; + if (restore_tm_user_regs(regs, mcp, tm_mcp)) + goto badframe; + } else +#endif + { + sr = (struct mcontext __user *)from_user_ptr(sigctx.regs); + addr = sr; + if (!access_ok(VERIFY_READ, sr, sizeof(*sr)) + || restore_user_regs(regs, sr, 1)) + goto badframe; + } set_thread_flag(TIF_RESTOREALL); return 0; From bc8ae5222e9e42582bd32028c638a5b4517e69e2 Mon Sep 17 00:00:00 2001 From: Michael Neuling Date: Sun, 9 Jun 2013 21:23:17 +1000 Subject: [PATCH 0100/1368] powerpc/tm: Fix restoration of MSR on 32bit signal return commit 2c27a18f8736da047bef2b997bdd48efc667e3c9 upstream. Currently we clear out the MSR TM bits on signal return assuming that the signal should never return to an active transaction. This is bogus as the user may do this. It's most likely the transaction will be doomed due to a treclaim but that's a problem for the HW not the kernel. The current code is a legacy of earlier kernel implementations which did software rollback of active transactions in the kernel. That code has now gone but we didn't correctly fix up this part of the signals code which still makes the assumption that it must be returning to a suspended transaction. This pulls out both MSR TM bits from the user supplied context rather than just setting TM suspend. We pull out only the bits needed to ensure the user can't do anything dangerous to the MSR. Signed-off-by: Michael Neuling Signed-off-by: Benjamin Herrenschmidt Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/kernel/signal_32.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/kernel/signal_32.c b/arch/powerpc/kernel/signal_32.c index fa81462f6987..364cb1e7300e 100644 --- a/arch/powerpc/kernel/signal_32.c +++ b/arch/powerpc/kernel/signal_32.c @@ -754,7 +754,7 @@ static long restore_tm_user_regs(struct pt_regs *regs, struct mcontext __user *tm_sr) { long err; - unsigned long msr; + unsigned long msr, msr_hi; #ifdef CONFIG_VSX int i; #endif @@ -859,8 +859,11 @@ static long restore_tm_user_regs(struct pt_regs *regs, tm_enable(); /* This loads the checkpointed FP/VEC state, if used */ tm_recheckpoint(¤t->thread, msr); - /* The task has moved into TM state S, so ensure MSR reflects this */ - regs->msr = (regs->msr & ~MSR_TS_MASK) | MSR_TS_S; + /* Get the top half of the MSR */ + if (__get_user(msr_hi, &tm_sr->mc_gregs[PT_MSR])) + return 1; + /* Pull in MSR TM from user context */ + regs->msr = (regs->msr & ~MSR_TS_MASK) | ((msr_hi<<32) & MSR_TS_MASK); /* This loads the speculative FP/VEC state, if used */ if (msr & MSR_FP) { From f6ff89fc47b05a061017200128ce154ae7165469 Mon Sep 17 00:00:00 2001 From: Michael Neuling Date: Sun, 9 Jun 2013 21:23:18 +1000 Subject: [PATCH 0101/1368] powerpc/tm: Fix return of 32bit rt signals to active transactions commit 55e4341850ac56e63a3eefe9583a9000042164fa upstream. Currently we only restore signals which are transactionally suspended but it's possible that the transaction can be restored even when it's active. Most likely this will result in a transactional rollback by the hardware as the transaction will have been doomed by an earlier treclaim. The current code is a legacy of earlier kernel implementations which did software rollback of active transactions in the kernel. That code has now gone but we didn't correctly fix up this part of the signals code which still makes assumptions based on having software rollback. This changes the signal return code to always restore both contexts on 32 bit rt signal return. Signed-off-by: Michael Neuling Signed-off-by: Benjamin Herrenschmidt Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/kernel/signal_32.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/kernel/signal_32.c b/arch/powerpc/kernel/signal_32.c index 364cb1e7300e..0f83122e6676 100644 --- a/arch/powerpc/kernel/signal_32.c +++ b/arch/powerpc/kernel/signal_32.c @@ -1245,7 +1245,7 @@ long sys_rt_sigreturn(int r3, int r4, int r5, int r6, int r7, int r8, if (__get_user(msr_hi, &mcp->mc_gregs[PT_MSR])) goto bad; - if (MSR_TM_SUSPENDED(msr_hi<<32)) { + if (MSR_TM_ACTIVE(msr_hi<<32)) { /* We only recheckpoint on return if we're * transaction. */ From 81bcd526fea6383be0361b1827fcac656d276d57 Mon Sep 17 00:00:00 2001 From: Michael Neuling Date: Sun, 9 Jun 2013 21:23:19 +1000 Subject: [PATCH 0102/1368] powerpc/tm: Fix return of active 64bit signals commit 87b4e5393af77f5cba124638f19f6c426e210aec upstream. Currently we only restore signals which are transactionally suspended but it's possible that the transaction can be restored even when it's active. Most likely this will result in a transactional rollback by the hardware as the transaction will have been doomed by an earlier treclaim. The current code is a legacy of earlier kernel implementations which did software rollback of active transactions in the kernel. That code has now gone but we didn't correctly fix up this part of the signals code which still makes assumptions based on having software rollback. This changes the signal return code to always restore both contexts on 64 bit signal return. It also ensures that the MSR TM bits are properly restored from the signal context which they are not currently. Signed-off-by: Michael Neuling Signed-off-by: Benjamin Herrenschmidt Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/kernel/signal_64.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/kernel/signal_64.c b/arch/powerpc/kernel/signal_64.c index 345947367ec0..887e99d85bc2 100644 --- a/arch/powerpc/kernel/signal_64.c +++ b/arch/powerpc/kernel/signal_64.c @@ -410,6 +410,10 @@ static long restore_tm_sigcontexts(struct pt_regs *regs, /* get MSR separately, transfer the LE bit if doing signal return */ err |= __get_user(msr, &sc->gp_regs[PT_MSR]); + /* pull in MSR TM from user context */ + regs->msr = (regs->msr & ~MSR_TS_MASK) | (msr & MSR_TS_MASK); + + /* pull in MSR LE from user context */ regs->msr = (regs->msr & ~MSR_LE) | (msr & MSR_LE); /* The following non-GPR non-FPR non-VR state is also checkpointed: */ @@ -505,8 +509,6 @@ static long restore_tm_sigcontexts(struct pt_regs *regs, tm_enable(); /* This loads the checkpointed FP/VEC state, if used */ tm_recheckpoint(¤t->thread, msr); - /* The task has moved into TM state S, so ensure MSR reflects this: */ - regs->msr = (regs->msr & ~MSR_TS_MASK) | __MASK(33); /* This loads the speculative FP/VEC state, if used */ if (msr & MSR_FP) { @@ -654,7 +656,7 @@ int sys_rt_sigreturn(unsigned long r3, unsigned long r4, unsigned long r5, #ifdef CONFIG_PPC_TRANSACTIONAL_MEM if (__get_user(msr, &uc->uc_mcontext.gp_regs[PT_MSR])) goto badframe; - if (MSR_TM_SUSPENDED(msr)) { + if (MSR_TM_ACTIVE(msr)) { /* We recheckpoint on return. */ struct ucontext __user *uc_transact; if (__get_user(uc_transact, &uc->uc_link)) From 497f0957430403bc4e3a0c776da3907fe769b64f Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Tue, 25 Jun 2013 17:47:54 +1000 Subject: [PATCH 0103/1368] powerpc: Remove unreachable relocation on exception handlers commit 1d567cb4bd42d560a7621cac6f6aebe87343689e upstream. We have relocation on exception handlers defined for h_data_storage and h_instr_storage. However we will never take relocation on exceptions for these because they can only come from a guest, and we never take relocation on exceptions when we transition from guest to host. We also have a handler for hmi_exception (Hypervisor Maintenance) which is defined in the architecture to never be delivered with relocation on, see see v2.07 Book III-S section 6.5. So remove the handlers, leaving a branch to self just to be double extra paranoid. Signed-off-by: Michael Ellerman Signed-off-by: Benjamin Herrenschmidt Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/kernel/exceptions-64s.S | 18 +++--------------- 1 file changed, 3 insertions(+), 15 deletions(-) diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index 40e4a17c8ba0..0a9fdea2fc0f 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -793,14 +793,10 @@ system_call_relon_pSeries: STD_RELON_EXCEPTION_PSERIES(0x4d00, 0xd00, single_step) . = 0x4e00 - SET_SCRATCH0(r13) - EXCEPTION_PROLOG_0(PACA_EXGEN) - b h_data_storage_relon_hv + b . /* Can't happen, see v2.07 Book III-S section 6.5 */ . = 0x4e20 - SET_SCRATCH0(r13) - EXCEPTION_PROLOG_0(PACA_EXGEN) - b h_instr_storage_relon_hv + b . /* Can't happen, see v2.07 Book III-S section 6.5 */ . = 0x4e40 SET_SCRATCH0(r13) @@ -808,9 +804,7 @@ system_call_relon_pSeries: b emulation_assist_relon_hv . = 0x4e60 - SET_SCRATCH0(r13) - EXCEPTION_PROLOG_0(PACA_EXGEN) - b hmi_exception_relon_hv + b . /* Can't happen, see v2.07 Book III-S section 6.5 */ . = 0x4e80 SET_SCRATCH0(r13) @@ -1180,14 +1174,8 @@ tm_unavailable_common: __end_handlers: /* Equivalents to the above handlers for relocation-on interrupt vectors */ - STD_RELON_EXCEPTION_HV_OOL(0xe00, h_data_storage) - KVM_HANDLER(PACA_EXGEN, EXC_HV, 0xe00) - STD_RELON_EXCEPTION_HV_OOL(0xe20, h_instr_storage) - KVM_HANDLER(PACA_EXGEN, EXC_HV, 0xe20) STD_RELON_EXCEPTION_HV_OOL(0xe40, emulation_assist) KVM_HANDLER(PACA_EXGEN, EXC_HV, 0xe40) - STD_RELON_EXCEPTION_HV_OOL(0xe60, hmi_exception) - KVM_HANDLER(PACA_EXGEN, EXC_HV, 0xe60) MASKABLE_RELON_EXCEPTION_HV_OOL(0xe80, h_doorbell) KVM_HANDLER(PACA_EXGEN, EXC_HV, 0xe80) From 77d8caacd2924ca7cbb989c30709503e93c7f026 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Tue, 25 Jun 2013 17:47:55 +1000 Subject: [PATCH 0104/1368] powerpc: Remove KVMTEST from RELON exception handlers commit c9f69518e5f08170bc857984a077f693d63171df upstream. KVMTEST is a macro which checks whether we are taking an exception from guest context, if so we branch out of line and eventually call into the KVM code to handle the switch. When running real guests on bare metal (HV KVM) the hardware ensures that we never take a relocation on exception when transitioning from guest to host. For PR KVM we disable relocation on exceptions ourself in kvmppc_core_init_vm(), as of commit a413f47 "Disable relocation on exceptions whenever PR KVM is active". So convert all the RELON macros to use NOTEST, and drop the remaining KVM_HANDLER() definitions we have for 0xe40 and 0xe80. Signed-off-by: Michael Ellerman Signed-off-by: Benjamin Herrenschmidt Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/include/asm/exception-64s.h | 8 ++++---- arch/powerpc/kernel/exceptions-64s.S | 2 -- 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/arch/powerpc/include/asm/exception-64s.h b/arch/powerpc/include/asm/exception-64s.h index 46793b58a761..07ca627e52c0 100644 --- a/arch/powerpc/include/asm/exception-64s.h +++ b/arch/powerpc/include/asm/exception-64s.h @@ -358,12 +358,12 @@ label##_relon_pSeries: \ /* No guest interrupts come through here */ \ SET_SCRATCH0(r13); /* save r13 */ \ EXCEPTION_RELON_PROLOG_PSERIES(PACA_EXGEN, label##_common, \ - EXC_STD, KVMTEST_PR, vec) + EXC_STD, NOTEST, vec) #define STD_RELON_EXCEPTION_PSERIES_OOL(vec, label) \ .globl label##_relon_pSeries; \ label##_relon_pSeries: \ - EXCEPTION_PROLOG_1(PACA_EXGEN, KVMTEST_PR, vec); \ + EXCEPTION_PROLOG_1(PACA_EXGEN, NOTEST, vec); \ EXCEPTION_RELON_PROLOG_PSERIES_1(label##_common, EXC_STD) #define STD_RELON_EXCEPTION_HV(loc, vec, label) \ @@ -374,12 +374,12 @@ label##_relon_hv: \ /* No guest interrupts come through here */ \ SET_SCRATCH0(r13); /* save r13 */ \ EXCEPTION_RELON_PROLOG_PSERIES(PACA_EXGEN, label##_common, \ - EXC_HV, KVMTEST, vec) + EXC_HV, NOTEST, vec) #define STD_RELON_EXCEPTION_HV_OOL(vec, label) \ .globl label##_relon_hv; \ label##_relon_hv: \ - EXCEPTION_PROLOG_1(PACA_EXGEN, KVMTEST, vec); \ + EXCEPTION_PROLOG_1(PACA_EXGEN, NOTEST, vec); \ EXCEPTION_RELON_PROLOG_PSERIES_1(label##_common, EXC_HV) /* This associate vector numbers with bits in paca->irq_happened */ diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index 0a9fdea2fc0f..6bd676391a6d 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -1175,9 +1175,7 @@ __end_handlers: /* Equivalents to the above handlers for relocation-on interrupt vectors */ STD_RELON_EXCEPTION_HV_OOL(0xe40, emulation_assist) - KVM_HANDLER(PACA_EXGEN, EXC_HV, 0xe40) MASKABLE_RELON_EXCEPTION_HV_OOL(0xe80, h_doorbell) - KVM_HANDLER(PACA_EXGEN, EXC_HV, 0xe80) STD_RELON_EXCEPTION_PSERIES_OOL(0xf00, performance_monitor) STD_RELON_EXCEPTION_PSERIES_OOL(0xf20, altivec_unavailable) From 8e0af91a12c135f6c22e472660f3400d46ed78ac Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Tue, 25 Jun 2013 17:47:56 +1000 Subject: [PATCH 0105/1368] powerpc: Rename and flesh out the facility unavailable exception handler commit 021424a1fce335e05807fd770eb8e1da30a63eea upstream. The exception at 0xf60 is not the TM (Transactional Memory) unavailable exception, it is the "Facility Unavailable Exception", rename it as such. Flesh out the handler to acknowledge the fact that it can be called for many reasons, one of which is TM being unavailable. Use STD_EXCEPTION_COMMON() for the exception body, for some reason we had it open-coded, I've checked the generated code is identical. Signed-off-by: Michael Ellerman Signed-off-by: Benjamin Herrenschmidt Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/kernel/exceptions-64s.S | 21 ++++++------------ arch/powerpc/kernel/traps.c | 33 +++++++++++++++++++++------- 2 files changed, 32 insertions(+), 22 deletions(-) diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index 6bd676391a6d..d55a63c3559a 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -341,10 +341,11 @@ vsx_unavailable_pSeries_1: EXCEPTION_PROLOG_0(PACA_EXGEN) b vsx_unavailable_pSeries +facility_unavailable_trampoline: . = 0xf60 SET_SCRATCH0(r13) EXCEPTION_PROLOG_0(PACA_EXGEN) - b tm_unavailable_pSeries + b facility_unavailable_pSeries #ifdef CONFIG_CBE_RAS STD_EXCEPTION_HV(0x1200, 0x1202, cbe_system_error) @@ -522,7 +523,7 @@ denorm_done: KVM_HANDLER_PR(PACA_EXGEN, EXC_STD, 0xf20) STD_EXCEPTION_PSERIES_OOL(0xf40, vsx_unavailable) KVM_HANDLER_PR(PACA_EXGEN, EXC_STD, 0xf40) - STD_EXCEPTION_PSERIES_OOL(0xf60, tm_unavailable) + STD_EXCEPTION_PSERIES_OOL(0xf60, facility_unavailable) KVM_HANDLER_PR(PACA_EXGEN, EXC_STD, 0xf60) /* @@ -829,11 +830,11 @@ vsx_unavailable_relon_pSeries_1: EXCEPTION_PROLOG_0(PACA_EXGEN) b vsx_unavailable_relon_pSeries -tm_unavailable_relon_pSeries_1: +facility_unavailable_relon_trampoline: . = 0x4f60 SET_SCRATCH0(r13) EXCEPTION_PROLOG_0(PACA_EXGEN) - b tm_unavailable_relon_pSeries + b facility_unavailable_relon_pSeries STD_RELON_EXCEPTION_PSERIES(0x5300, 0x1300, instruction_breakpoint) #ifdef CONFIG_PPC_DENORMALISATION @@ -1159,15 +1160,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_VSX) bl .vsx_unavailable_exception b .ret_from_except - .align 7 - .globl tm_unavailable_common -tm_unavailable_common: - EXCEPTION_PROLOG_COMMON(0xf60, PACA_EXGEN) - bl .save_nvgprs - DISABLE_INTS - addi r3,r1,STACK_FRAME_OVERHEAD - bl .tm_unavailable_exception - b .ret_from_except + STD_EXCEPTION_COMMON(0xf60, facility_unavailable, .facility_unavailable_exception) .align 7 .globl __end_handlers @@ -1180,7 +1173,7 @@ __end_handlers: STD_RELON_EXCEPTION_PSERIES_OOL(0xf00, performance_monitor) STD_RELON_EXCEPTION_PSERIES_OOL(0xf20, altivec_unavailable) STD_RELON_EXCEPTION_PSERIES_OOL(0xf40, vsx_unavailable) - STD_RELON_EXCEPTION_PSERIES_OOL(0xf60, tm_unavailable) + STD_RELON_EXCEPTION_PSERIES_OOL(0xf60, facility_unavailable) #if defined(CONFIG_PPC_PSERIES) || defined(CONFIG_PPC_POWERNV) /* diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c index c0e5caf8ccc7..2053bbd26a06 100644 --- a/arch/powerpc/kernel/traps.c +++ b/arch/powerpc/kernel/traps.c @@ -1282,25 +1282,42 @@ void vsx_unavailable_exception(struct pt_regs *regs) die("Unrecoverable VSX Unavailable Exception", regs, SIGABRT); } -void tm_unavailable_exception(struct pt_regs *regs) +void facility_unavailable_exception(struct pt_regs *regs) { + static char *facility_strings[] = { + "FPU", + "VMX/VSX", + "DSCR", + "PMU SPRs", + "BHRB", + "TM", + "AT", + "EBB", + "TAR", + }; + char *facility; + u64 value; + + value = mfspr(SPRN_FSCR) >> 56; + /* We restore the interrupt state now */ if (!arch_irq_disabled_regs(regs)) local_irq_enable(); - /* Currently we never expect a TMU exception. Catch - * this and kill the process! - */ - printk(KERN_EMERG "Unexpected TM unavailable exception at %lx " - "(msr %lx)\n", - regs->nip, regs->msr); + if (value < ARRAY_SIZE(facility_strings)) + facility = facility_strings[value]; + else + facility = "unknown"; + + pr_err("Facility '%s' unavailable, exception at 0x%lx, MSR=%lx\n", + facility, regs->nip, regs->msr); if (user_mode(regs)) { _exception(SIGILL, regs, ILL_ILLOPC, regs->nip); return; } - die("Unexpected TM unavailable exception", regs, SIGABRT); + die("Unexpected facility unavailable exception", regs, SIGABRT); } #ifdef CONFIG_PPC_TRANSACTIONAL_MEM From d24966cf890f3b22e379654ba70a38ad9a67d9db Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Tue, 25 Jun 2013 17:47:57 +1000 Subject: [PATCH 0106/1368] powerpc: Wire up the HV facility unavailable exception commit b14b6260efeee6eb8942c6e6420e31281892acb6 upstream. Similar to the facility unavailble exception, except the facilities are controlled by HFSCR. Adapt the facility_unavailable_exception() so it can be called for either the regular or Hypervisor facility unavailable exceptions. Signed-off-by: Michael Ellerman Signed-off-by: Benjamin Herrenschmidt Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/kernel/exceptions-64s.S | 15 +++++++++++++++ arch/powerpc/kernel/traps.c | 16 ++++++++++++---- 2 files changed, 27 insertions(+), 4 deletions(-) diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index d55a63c3559a..4e00d223b2e3 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -347,6 +347,12 @@ facility_unavailable_trampoline: EXCEPTION_PROLOG_0(PACA_EXGEN) b facility_unavailable_pSeries +hv_facility_unavailable_trampoline: + . = 0xf80 + SET_SCRATCH0(r13) + EXCEPTION_PROLOG_0(PACA_EXGEN) + b facility_unavailable_hv + #ifdef CONFIG_CBE_RAS STD_EXCEPTION_HV(0x1200, 0x1202, cbe_system_error) KVM_HANDLER_SKIP(PACA_EXGEN, EXC_HV, 0x1202) @@ -525,6 +531,8 @@ denorm_done: KVM_HANDLER_PR(PACA_EXGEN, EXC_STD, 0xf40) STD_EXCEPTION_PSERIES_OOL(0xf60, facility_unavailable) KVM_HANDLER_PR(PACA_EXGEN, EXC_STD, 0xf60) + STD_EXCEPTION_HV_OOL(0xf82, facility_unavailable) + KVM_HANDLER(PACA_EXGEN, EXC_HV, 0xf82) /* * An interrupt came in while soft-disabled. We set paca->irq_happened, then: @@ -836,6 +844,12 @@ facility_unavailable_relon_trampoline: EXCEPTION_PROLOG_0(PACA_EXGEN) b facility_unavailable_relon_pSeries +hv_facility_unavailable_relon_trampoline: + . = 0x4f80 + SET_SCRATCH0(r13) + EXCEPTION_PROLOG_0(PACA_EXGEN) + b facility_unavailable_relon_hv + STD_RELON_EXCEPTION_PSERIES(0x5300, 0x1300, instruction_breakpoint) #ifdef CONFIG_PPC_DENORMALISATION . = 0x5500 @@ -1174,6 +1188,7 @@ __end_handlers: STD_RELON_EXCEPTION_PSERIES_OOL(0xf20, altivec_unavailable) STD_RELON_EXCEPTION_PSERIES_OOL(0xf40, vsx_unavailable) STD_RELON_EXCEPTION_PSERIES_OOL(0xf60, facility_unavailable) + STD_RELON_EXCEPTION_HV_OOL(0xf80, facility_unavailable) #if defined(CONFIG_PPC_PSERIES) || defined(CONFIG_PPC_POWERNV) /* diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c index 2053bbd26a06..e4f205a209d2 100644 --- a/arch/powerpc/kernel/traps.c +++ b/arch/powerpc/kernel/traps.c @@ -1295,10 +1295,18 @@ void facility_unavailable_exception(struct pt_regs *regs) "EBB", "TAR", }; - char *facility; + char *facility, *prefix; u64 value; - value = mfspr(SPRN_FSCR) >> 56; + if (regs->trap == 0xf60) { + value = mfspr(SPRN_FSCR); + prefix = ""; + } else { + value = mfspr(SPRN_HFSCR); + prefix = "Hypervisor "; + } + + value = value >> 56; /* We restore the interrupt state now */ if (!arch_irq_disabled_regs(regs)) @@ -1309,8 +1317,8 @@ void facility_unavailable_exception(struct pt_regs *regs) else facility = "unknown"; - pr_err("Facility '%s' unavailable, exception at 0x%lx, MSR=%lx\n", - facility, regs->nip, regs->msr); + pr_err("%sFacility '%s' unavailable, exception at 0x%lx, MSR=%lx\n", + prefix, facility, regs->nip, regs->msr); if (user_mode(regs)) { _exception(SIGILL, regs, ILL_ILLOPC, regs->nip); From 0288917da5e378ea1fc0290f824d85ac9e07570d Mon Sep 17 00:00:00 2001 From: Chen Gang Date: Wed, 20 Mar 2013 14:30:12 +0800 Subject: [PATCH 0107/1368] powerpc/smp: Section mismatch from smp_release_cpus to __initdata spinning_secondaries commit 8246aca7058f3f2c2ae503081777965cd8df7b90 upstream. the smp_release_cpus is a normal funciton and called in normal environments, but it calls the __initdata spinning_secondaries. need modify spinning_secondaries to match smp_release_cpus. the related warning: (the linker report boot_paca.33377, but it should be spinning_secondaries) ----------------------------------------------------------------------------- WARNING: arch/powerpc/kernel/built-in.o(.text+0x23176): Section mismatch in reference from the function .smp_release_cpus() to the variable .init.data:boot_paca.33377 The function .smp_release_cpus() references the variable __initdata boot_paca.33377. This is often because .smp_release_cpus lacks a __initdata annotation or the annotation of boot_paca.33377 is wrong. WARNING: arch/powerpc/kernel/built-in.o(.text+0x231fe): Section mismatch in reference from the function .smp_release_cpus() to the variable .init.data:boot_paca.33377 The function .smp_release_cpus() references the variable __initdata boot_paca.33377. This is often because .smp_release_cpus lacks a __initdata annotation or the annotation of boot_paca.33377 is wrong. ----------------------------------------------------------------------------- Signed-off-by: Chen Gang Signed-off-by: Benjamin Herrenschmidt Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/kernel/setup_64.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index e379d3fd1694..389fb8077cc9 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -76,7 +76,7 @@ #endif int boot_cpuid = 0; -int __initdata spinning_secondaries; +int spinning_secondaries; u64 ppc64_pft_size; /* Pick defaults since we might want to patch instructions From 910a165889c9e0c79cb96883db3948c5eea204e2 Mon Sep 17 00:00:00 2001 From: Nathan Fontenot Date: Mon, 24 Jun 2013 22:08:05 -0500 Subject: [PATCH 0108/1368] powerpc/numa: Do not update sysfs cpu registration from invalid context commit dd023217e17e72b46fb4d49c7734c426938c3dba upstream. The topology update code that updates the cpu node registration in sysfs should not be called while in stop_machine(). The register/unregister calls take a lock and may sleep. This patch moves these calls outside of the call to stop_machine(). Signed-off-by: Nathan Fontenot Signed-off-by: Benjamin Herrenschmidt Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/mm/numa.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c index 88c0425dc0a8..2859a1f52279 100644 --- a/arch/powerpc/mm/numa.c +++ b/arch/powerpc/mm/numa.c @@ -1433,11 +1433,9 @@ static int update_cpu_topology(void *data) if (cpu != update->cpu) continue; - unregister_cpu_under_node(update->cpu, update->old_nid); unmap_cpu_from_node(update->cpu); map_cpu_to_node(update->cpu, update->new_nid); vdso_getcpu_init(); - register_cpu_under_node(update->cpu, update->new_nid); } return 0; @@ -1485,6 +1483,9 @@ int arch_update_cpu_topology(void) stop_machine(update_cpu_topology, &updates[0], &updated_cpus); for (ud = &updates[0]; ud; ud = ud->next) { + unregister_cpu_under_node(ud->cpu, ud->old_nid); + register_cpu_under_node(ud->cpu, ud->new_nid); + dev = get_cpu_device(ud->cpu); if (dev) kobject_uevent(&dev->kobj, KOBJ_CHANGE); From a9514fe520175d13cba23cf3c9dbba4df9691c86 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Fri, 28 Jun 2013 18:15:10 +1000 Subject: [PATCH 0109/1368] powerpc/perf: Check that events only include valid bits on Power8 commit d8bec4c9cd58f6d3679e09b7293851fb92ad7557 upstream. A mistake we have made in the past is that we pull out the fields we need from the event code, but don't check that there are no unknown bits set. This means that we can't ever assign meaning to those unknown bits in future. Although we have once again failed to do this at release, it is still early days for Power8 so I think we can still slip this in and get away with it. Signed-off-by: Michael Ellerman Signed-off-by: Benjamin Herrenschmidt Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/perf/power8-pmu.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/arch/powerpc/perf/power8-pmu.c b/arch/powerpc/perf/power8-pmu.c index f7d1c4fff303..84cdc6d892e3 100644 --- a/arch/powerpc/perf/power8-pmu.c +++ b/arch/powerpc/perf/power8-pmu.c @@ -109,6 +109,16 @@ #define EVENT_IS_MARKED (EVENT_MARKED_MASK << EVENT_MARKED_SHIFT) #define EVENT_PSEL_MASK 0xff /* PMCxSEL value */ +#define EVENT_VALID_MASK \ + ((EVENT_THRESH_MASK << EVENT_THRESH_SHIFT) | \ + (EVENT_SAMPLE_MASK << EVENT_SAMPLE_SHIFT) | \ + (EVENT_CACHE_SEL_MASK << EVENT_CACHE_SEL_SHIFT) | \ + (EVENT_PMC_MASK << EVENT_PMC_SHIFT) | \ + (EVENT_UNIT_MASK << EVENT_UNIT_SHIFT) | \ + (EVENT_COMBINE_MASK << EVENT_COMBINE_SHIFT) | \ + (EVENT_MARKED_MASK << EVENT_MARKED_SHIFT) | \ + EVENT_PSEL_MASK) + /* MMCRA IFM bits - POWER8 */ #define POWER8_MMCRA_IFM1 0x0000000040000000UL #define POWER8_MMCRA_IFM2 0x0000000080000000UL @@ -212,6 +222,9 @@ static int power8_get_constraint(u64 event, unsigned long *maskp, unsigned long mask = value = 0; + if (event & ~EVENT_VALID_MASK) + return -1; + pmc = (event >> EVENT_PMC_SHIFT) & EVENT_PMC_MASK; unit = (event >> EVENT_UNIT_SHIFT) & EVENT_UNIT_MASK; cache = (event >> EVENT_CACHE_SEL_SHIFT) & EVENT_CACHE_SEL_MASK; From 8cf3478f19143d4e2ece4947603bff7dbd360a36 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Fri, 28 Jun 2013 18:15:11 +1000 Subject: [PATCH 0110/1368] powerpc/perf: Rework disable logic in pmu_disable() commit 378a6ee99e4a431ec84e4e61893445c041c93007 upstream. In pmu_disable() we disable the PMU by setting the FC (Freeze Counters) bit in MMCR0. In order to do this we have to read/modify/write MMCR0. It's possible that we read a value from MMCR0 which has PMAO (PMU Alert Occurred) set. When we write that value back it will cause an interrupt to occur. We will then end up in the PMU interrupt handler even though we are supposed to have just disabled the PMU. We can avoid this by making sure we never write PMAO back. We should not lose interrupts because when the PMU is re-enabled the overflowed values will cause another interrupt. We also reorder the clearing of SAMPLE_ENABLE so that is done after the PMU is frozen. Otherwise there is a small window between the clearing of SAMPLE_ENABLE and the setting of FC where we could take an interrupt and incorrectly see SAMPLE_ENABLE not set. This would for example change the logic in perf_read_regs(). Signed-off-by: Michael Ellerman Signed-off-by: Benjamin Herrenschmidt Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/perf/core-book3s.c | 31 +++++++++++++++++++------------ 1 file changed, 19 insertions(+), 12 deletions(-) diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c index 29c6482890c8..1ab306815ff3 100644 --- a/arch/powerpc/perf/core-book3s.c +++ b/arch/powerpc/perf/core-book3s.c @@ -75,6 +75,7 @@ static unsigned int freeze_events_kernel = MMCR0_FCS; #define MMCR0_FCHV 0 #define MMCR0_PMCjCE MMCR0_PMCnCE +#define MMCR0_PMAO 0 #define SPRN_MMCRA SPRN_MMCR2 #define MMCRA_SAMPLE_ENABLE 0 @@ -852,7 +853,7 @@ static void write_mmcr0(struct cpu_hw_events *cpuhw, unsigned long mmcr0) static void power_pmu_disable(struct pmu *pmu) { struct cpu_hw_events *cpuhw; - unsigned long flags; + unsigned long flags, val; if (!ppmu) return; @@ -860,9 +861,6 @@ static void power_pmu_disable(struct pmu *pmu) cpuhw = &__get_cpu_var(cpu_hw_events); if (!cpuhw->disabled) { - cpuhw->disabled = 1; - cpuhw->n_added = 0; - /* * Check if we ever enabled the PMU on this cpu. */ @@ -871,6 +869,21 @@ static void power_pmu_disable(struct pmu *pmu) cpuhw->pmcs_enabled = 1; } + /* + * Set the 'freeze counters' bit, clear PMAO. + */ + val = mfspr(SPRN_MMCR0); + val |= MMCR0_FC; + val &= ~MMCR0_PMAO; + + /* + * The barrier is to make sure the mtspr has been + * executed and the PMU has frozen the events etc. + * before we return. + */ + write_mmcr0(cpuhw, val); + mb(); + /* * Disable instruction sampling if it was enabled */ @@ -880,14 +893,8 @@ static void power_pmu_disable(struct pmu *pmu) mb(); } - /* - * Set the 'freeze counters' bit. - * The barrier is to make sure the mtspr has been - * executed and the PMU has frozen the events - * before we return. - */ - write_mmcr0(cpuhw, mfspr(SPRN_MMCR0) | MMCR0_FC); - mb(); + cpuhw->disabled = 1; + cpuhw->n_added = 0; } local_irq_restore(flags); } From 8f6c5b6c1264c6cec9b04848d0744aac0853d641 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Fri, 28 Jun 2013 18:15:12 +1000 Subject: [PATCH 0111/1368] powerpc/perf: Freeze PMC5/6 if we're not using them commit 7a7a41f9d5b28ac3a916b057a7d3cd3f435ee9a6 upstream. On Power8 we can freeze PMC5 and 6 if we're not using them. Normally they run all the time. As noticed by Anshuman, we should unfreeze them when we disable the PMU as there are legacy tools which expect them to run all the time. Signed-off-by: Michael Ellerman Signed-off-by: Benjamin Herrenschmidt Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/include/asm/reg.h | 1 + arch/powerpc/perf/core-book3s.c | 5 +++-- arch/powerpc/perf/power8-pmu.c | 4 ++++ 3 files changed, 8 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h index 4a9e408644fe..362142b69d5b 100644 --- a/arch/powerpc/include/asm/reg.h +++ b/arch/powerpc/include/asm/reg.h @@ -626,6 +626,7 @@ #define MMCR0_TRIGGER 0x00002000UL /* TRIGGER enable */ #define MMCR0_PMAO 0x00000080UL /* performance monitor alert has occurred, set to 0 after handling exception */ #define MMCR0_SHRFC 0x00000040UL /* SHRre freeze conditions between threads */ +#define MMCR0_FC56 0x00000010UL /* freeze counters 5 and 6 */ #define MMCR0_FCTI 0x00000008UL /* freeze counters in tags inactive mode */ #define MMCR0_FCTA 0x00000004UL /* freeze counters in tags active mode */ #define MMCR0_FCWAIT 0x00000002UL /* freeze counter in WAIT state */ diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c index 1ab306815ff3..3d566ee896e2 100644 --- a/arch/powerpc/perf/core-book3s.c +++ b/arch/powerpc/perf/core-book3s.c @@ -75,6 +75,7 @@ static unsigned int freeze_events_kernel = MMCR0_FCS; #define MMCR0_FCHV 0 #define MMCR0_PMCjCE MMCR0_PMCnCE +#define MMCR0_FC56 0 #define MMCR0_PMAO 0 #define SPRN_MMCRA SPRN_MMCR2 @@ -870,11 +871,11 @@ static void power_pmu_disable(struct pmu *pmu) } /* - * Set the 'freeze counters' bit, clear PMAO. + * Set the 'freeze counters' bit, clear PMAO/FC56. */ val = mfspr(SPRN_MMCR0); val |= MMCR0_FC; - val &= ~MMCR0_PMAO; + val &= ~(MMCR0_PMAO | MMCR0_FC56); /* * The barrier is to make sure the mtspr has been diff --git a/arch/powerpc/perf/power8-pmu.c b/arch/powerpc/perf/power8-pmu.c index 84cdc6d892e3..d59f5b2d4c2f 100644 --- a/arch/powerpc/perf/power8-pmu.c +++ b/arch/powerpc/perf/power8-pmu.c @@ -391,6 +391,10 @@ static int power8_compute_mmcr(u64 event[], int n_ev, if (pmc_inuse & 0x7c) mmcr[0] |= MMCR0_PMCjCE; + /* If we're not using PMC 5 or 6, freeze them */ + if (!(pmc_inuse & 0x60)) + mmcr[0] |= MMCR0_FC56; + mmcr[1] = mmcr1; mmcr[2] = mmcra; From b26eb91187fdfa37b304b30003f799899e6373c9 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Fri, 28 Jun 2013 18:15:13 +1000 Subject: [PATCH 0112/1368] powerpc/perf: Use existing out label in power_pmu_enable() commit 0a48843d6c5114cfa4a9540ee4d6af87628cec01 upstream. In power_pmu_enable() we can use the existing out label to reduce the number of return paths. Signed-off-by: Michael Ellerman Signed-off-by: Benjamin Herrenschmidt Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/perf/core-book3s.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c index 3d566ee896e2..af4b4b1a691f 100644 --- a/arch/powerpc/perf/core-book3s.c +++ b/arch/powerpc/perf/core-book3s.c @@ -919,12 +919,13 @@ static void power_pmu_enable(struct pmu *pmu) if (!ppmu) return; + local_irq_save(flags); + cpuhw = &__get_cpu_var(cpu_hw_events); - if (!cpuhw->disabled) { - local_irq_restore(flags); - return; - } + if (!cpuhw->disabled) + goto out; + cpuhw->disabled = 0; /* From 382b9efb7be2d07e51f0a491ecd0a985e1ceb86e Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Fri, 28 Jun 2013 18:15:14 +1000 Subject: [PATCH 0113/1368] powerpc/perf: Don't enable if we have zero events commit 4ea355b5368bde0574c12430df53334c4be3bdcf upstream. In power_pmu_enable() we still enable the PMU even if we have zero events. This should have no effect but doesn't make much sense. Instead just return after telling the hypervisor that we are not using the PMCs. Signed-off-by: Michael Ellerman Signed-off-by: Benjamin Herrenschmidt Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/perf/core-book3s.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c index af4b4b1a691f..d3ee2e50a3a6 100644 --- a/arch/powerpc/perf/core-book3s.c +++ b/arch/powerpc/perf/core-book3s.c @@ -926,6 +926,11 @@ static void power_pmu_enable(struct pmu *pmu) if (!cpuhw->disabled) goto out; + if (cpuhw->n_events == 0) { + ppc_set_pmu_inuse(0); + goto out; + } + cpuhw->disabled = 0; /* @@ -937,8 +942,6 @@ static void power_pmu_enable(struct pmu *pmu) if (!cpuhw->n_added) { mtspr(SPRN_MMCRA, cpuhw->mmcr[2] & ~MMCRA_SAMPLE_ENABLE); mtspr(SPRN_MMCR1, cpuhw->mmcr[1]); - if (cpuhw->n_events == 0) - ppc_set_pmu_inuse(0); goto out_enable; } From 9d3ce4af3be0235d4cf41ea9fd774205a32e58a2 Mon Sep 17 00:00:00 2001 From: "Srivatsa S. Bhat" Date: Fri, 12 Jul 2013 03:45:37 +0530 Subject: [PATCH 0114/1368] cpufreq: Revert commit a66b2e to fix suspend/resume regression commit aae760ed21cd690fe8a6db9f3a177ad55d7e12ab upstream. commit a66b2e (cpufreq: Preserve sysfs files across suspend/resume) has unfortunately caused several things in the cpufreq subsystem to break subtly after a suspend/resume cycle. The intention of that patch was to retain the file permissions of the cpufreq related sysfs files across suspend/resume. To achieve that, the commit completely removed the calls to cpufreq_add_dev() and __cpufreq_remove_dev() during suspend/resume transitions. But the problem is that those functions do 2 kinds of things: 1. Low-level initialization/tear-down that are critical to the correct functioning of cpufreq-core. 2. Kobject and sysfs related initialization/teardown. Ideally we should have reorganized the code to cleanly separate these two responsibilities, and skipped only the sysfs related parts during suspend/resume. Since we skipped the entire callbacks instead (which also included some CPU and cpufreq-specific critical components), cpufreq subsystem started behaving erratically after suspend/resume. So revert the commit to fix the regression. We'll revisit and address the original goal of that commit separately, since it involves quite a bit of careful code reorganization and appears to be non-trivial. (While reverting the commit, note that another commit f51e1eb (cpufreq: Fix cpufreq regression after suspend/resume) already reverted part of the original set of changes. So revert only the remaining ones). Signed-off-by: Srivatsa S. Bhat Acked-by: Viresh Kumar Tested-by: Paul Bolle Signed-off-by: Rafael J. Wysocki Signed-off-by: Greg Kroah-Hartman --- drivers/cpufreq/cpufreq.c | 4 +++- drivers/cpufreq/cpufreq_stats.c | 6 ++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index 2d53f47d1747..178fe7a69056 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -1837,13 +1837,15 @@ static int __cpuinit cpufreq_cpu_callback(struct notifier_block *nfb, if (dev) { switch (action) { case CPU_ONLINE: + case CPU_ONLINE_FROZEN: cpufreq_add_dev(dev, NULL); break; case CPU_DOWN_PREPARE: - case CPU_UP_CANCELED_FROZEN: + case CPU_DOWN_PREPARE_FROZEN: __cpufreq_remove_dev(dev, NULL); break; case CPU_DOWN_FAILED: + case CPU_DOWN_FAILED_FROZEN: cpufreq_add_dev(dev, NULL); break; } diff --git a/drivers/cpufreq/cpufreq_stats.c b/drivers/cpufreq/cpufreq_stats.c index 591b6fb641b2..bfd6273fd873 100644 --- a/drivers/cpufreq/cpufreq_stats.c +++ b/drivers/cpufreq/cpufreq_stats.c @@ -353,13 +353,11 @@ static int __cpuinit cpufreq_stat_cpu_callback(struct notifier_block *nfb, cpufreq_update_policy(cpu); break; case CPU_DOWN_PREPARE: + case CPU_DOWN_PREPARE_FROZEN: cpufreq_stats_free_sysfs(cpu); break; case CPU_DEAD: - cpufreq_stats_free_table(cpu); - break; - case CPU_UP_CANCELED_FROZEN: - cpufreq_stats_free_sysfs(cpu); + case CPU_DEAD_FROZEN: cpufreq_stats_free_table(cpu); break; } From 916f4dbc2a827677212a3bf3ffcc22745fa6e0b1 Mon Sep 17 00:00:00 2001 From: "Srivatsa S. Bhat" Date: Tue, 16 Jul 2013 22:46:48 +0200 Subject: [PATCH 0115/1368] cpufreq: Revert commit 2f7021a8 to fix CPU hotplug regression commit e8d05276f236ee6435e78411f62be9714e0b9377 upstream. commit 2f7021a8 "cpufreq: protect 'policy->cpus' from offlining during __gov_queue_work()" caused a regression in CPU hotplug, because it lead to a deadlock between cpufreq governor worker thread and the CPU hotplug writer task. Lockdep splat corresponding to this deadlock is shown below: [ 60.277396] ====================================================== [ 60.277400] [ INFO: possible circular locking dependency detected ] [ 60.277407] 3.10.0-rc7-dbg-01385-g241fd04-dirty #1744 Not tainted [ 60.277411] ------------------------------------------------------- [ 60.277417] bash/2225 is trying to acquire lock: [ 60.277422] ((&(&j_cdbs->work)->work)){+.+...}, at: [] flush_work+0x5/0x280 [ 60.277444] but task is already holding lock: [ 60.277449] (cpu_hotplug.lock){+.+.+.}, at: [] cpu_hotplug_begin+0x2b/0x60 [ 60.277465] which lock already depends on the new lock. [ 60.277472] the existing dependency chain (in reverse order) is: [ 60.277477] -> #2 (cpu_hotplug.lock){+.+.+.}: [ 60.277490] [] lock_acquire+0xa4/0x200 [ 60.277503] [] mutex_lock_nested+0x67/0x410 [ 60.277514] [] get_online_cpus+0x3c/0x60 [ 60.277522] [] gov_queue_work+0x2a/0xb0 [ 60.277532] [] cs_dbs_timer+0xc1/0xe0 [ 60.277543] [] process_one_work+0x1cd/0x6a0 [ 60.277552] [] worker_thread+0x121/0x3a0 [ 60.277560] [] kthread+0xdb/0xe0 [ 60.277569] [] ret_from_fork+0x7c/0xb0 [ 60.277580] -> #1 (&j_cdbs->timer_mutex){+.+...}: [ 60.277592] [] lock_acquire+0xa4/0x200 [ 60.277600] [] mutex_lock_nested+0x67/0x410 [ 60.277608] [] cs_dbs_timer+0x8d/0xe0 [ 60.277616] [] process_one_work+0x1cd/0x6a0 [ 60.277624] [] worker_thread+0x121/0x3a0 [ 60.277633] [] kthread+0xdb/0xe0 [ 60.277640] [] ret_from_fork+0x7c/0xb0 [ 60.277649] -> #0 ((&(&j_cdbs->work)->work)){+.+...}: [ 60.277661] [] __lock_acquire+0x1766/0x1d30 [ 60.277669] [] lock_acquire+0xa4/0x200 [ 60.277677] [] flush_work+0x3d/0x280 [ 60.277685] [] __cancel_work_timer+0x8a/0x120 [ 60.277693] [] cancel_delayed_work_sync+0x13/0x20 [ 60.277701] [] cpufreq_governor_dbs+0x529/0x6f0 [ 60.277709] [] cs_cpufreq_governor_dbs+0x17/0x20 [ 60.277719] [] __cpufreq_governor+0x48/0x100 [ 60.277728] [] __cpufreq_remove_dev.isra.14+0x80/0x3c0 [ 60.277737] [] cpufreq_cpu_callback+0x38/0x4c [ 60.277747] [] notifier_call_chain+0x5d/0x110 [ 60.277759] [] __raw_notifier_call_chain+0xe/0x10 [ 60.277768] [] _cpu_down+0x88/0x330 [ 60.277779] [] cpu_down+0x36/0x50 [ 60.277788] [] store_online+0x98/0xd0 [ 60.277796] [] dev_attr_store+0x18/0x30 [ 60.277806] [] sysfs_write_file+0xdb/0x150 [ 60.277818] [] vfs_write+0xbd/0x1f0 [ 60.277826] [] SyS_write+0x4c/0xa0 [ 60.277834] [] tracesys+0xd0/0xd5 [ 60.277842] other info that might help us debug this: [ 60.277848] Chain exists of: (&(&j_cdbs->work)->work) --> &j_cdbs->timer_mutex --> cpu_hotplug.lock [ 60.277864] Possible unsafe locking scenario: [ 60.277869] CPU0 CPU1 [ 60.277873] ---- ---- [ 60.277877] lock(cpu_hotplug.lock); [ 60.277885] lock(&j_cdbs->timer_mutex); [ 60.277892] lock(cpu_hotplug.lock); [ 60.277900] lock((&(&j_cdbs->work)->work)); [ 60.277907] *** DEADLOCK *** [ 60.277915] 6 locks held by bash/2225: [ 60.277919] #0: (sb_writers#6){.+.+.+}, at: [] vfs_write+0x1c3/0x1f0 [ 60.277937] #1: (&buffer->mutex){+.+.+.}, at: [] sysfs_write_file+0x3c/0x150 [ 60.277954] #2: (s_active#61){.+.+.+}, at: [] sysfs_write_file+0xc3/0x150 [ 60.277972] #3: (x86_cpu_hotplug_driver_mutex){+.+...}, at: [] cpu_hotplug_driver_lock+0x17/0x20 [ 60.277990] #4: (cpu_add_remove_lock){+.+.+.}, at: [] cpu_down+0x22/0x50 [ 60.278007] #5: (cpu_hotplug.lock){+.+.+.}, at: [] cpu_hotplug_begin+0x2b/0x60 [ 60.278023] stack backtrace: [ 60.278031] CPU: 3 PID: 2225 Comm: bash Not tainted 3.10.0-rc7-dbg-01385-g241fd04-dirty #1744 [ 60.278037] Hardware name: Acer Aspire 5741G /Aspire 5741G , BIOS V1.20 02/08/2011 [ 60.278042] ffffffff8204e110 ffff88014df6b9f8 ffffffff815b3d90 ffff88014df6ba38 [ 60.278055] ffffffff815b0a8d ffff880150ed3f60 ffff880150ed4770 3871c4002c8980b2 [ 60.278068] ffff880150ed4748 ffff880150ed4770 ffff880150ed3f60 ffff88014df6bb00 [ 60.278081] Call Trace: [ 60.278091] [] dump_stack+0x19/0x1b [ 60.278101] [] print_circular_bug+0x2b6/0x2c5 [ 60.278111] [] __lock_acquire+0x1766/0x1d30 [ 60.278123] [] ? __kernel_text_address+0x58/0x80 [ 60.278134] [] lock_acquire+0xa4/0x200 [ 60.278142] [] ? flush_work+0x5/0x280 [ 60.278151] [] flush_work+0x3d/0x280 [ 60.278159] [] ? flush_work+0x5/0x280 [ 60.278169] [] ? mark_held_locks+0x94/0x140 [ 60.278178] [] ? __cancel_work_timer+0x77/0x120 [ 60.278188] [] ? trace_hardirqs_on_caller+0xfd/0x1c0 [ 60.278196] [] __cancel_work_timer+0x8a/0x120 [ 60.278206] [] cancel_delayed_work_sync+0x13/0x20 [ 60.278214] [] cpufreq_governor_dbs+0x529/0x6f0 [ 60.278225] [] cs_cpufreq_governor_dbs+0x17/0x20 [ 60.278234] [] __cpufreq_governor+0x48/0x100 [ 60.278244] [] __cpufreq_remove_dev.isra.14+0x80/0x3c0 [ 60.278255] [] cpufreq_cpu_callback+0x38/0x4c [ 60.278265] [] notifier_call_chain+0x5d/0x110 [ 60.278275] [] __raw_notifier_call_chain+0xe/0x10 [ 60.278284] [] _cpu_down+0x88/0x330 [ 60.278292] [] ? cpu_hotplug_driver_lock+0x17/0x20 [ 60.278302] [] cpu_down+0x36/0x50 [ 60.278311] [] store_online+0x98/0xd0 [ 60.278320] [] dev_attr_store+0x18/0x30 [ 60.278329] [] sysfs_write_file+0xdb/0x150 [ 60.278337] [] vfs_write+0xbd/0x1f0 [ 60.278347] [] ? fget_light+0x320/0x4b0 [ 60.278355] [] SyS_write+0x4c/0xa0 [ 60.278364] [] tracesys+0xd0/0xd5 [ 60.280582] smpboot: CPU 1 is now offline The intention of that commit was to avoid warnings during CPU hotplug, which indicated that offline CPUs were getting IPIs from the cpufreq governor's work items. But the real root-cause of that problem was commit a66b2e5 (cpufreq: Preserve sysfs files across suspend/resume) because it totally skipped all the cpufreq callbacks during CPU hotplug in the suspend/resume path, and hence it never actually shut down the cpufreq governor's worker threads during CPU offline in the suspend/resume path. Reflecting back, the reason why we never suspected that commit as the root-cause earlier, was that the original issue was reported with just the halt command and nobody had brought in suspend/resume to the equation. The reason for _that_ in turn, as it turns out, is that earlier halt/shutdown was being done by disabling non-boot CPUs while tasks were frozen, just like suspend/resume.... but commit cf7df378a (reboot: migrate shutdown/reboot to boot cpu) which came somewhere along that very same time changed that logic: shutdown/halt no longer takes CPUs offline. Thus, the test-cases for reproducing the bug were vastly different and thus we went totally off the trail. Overall, it was one hell of a confusion with so many commits affecting each other and also affecting the symptoms of the problems in subtle ways. Finally, now since the original problematic commit (a66b2e5) has been completely reverted, revert this intermediate fix too (2f7021a8), to fix the CPU hotplug deadlock. Phew! Reported-by: Sergey Senozhatsky Reported-by: Bartlomiej Zolnierkiewicz Signed-off-by: Srivatsa S. Bhat Tested-by: Peter Wu Signed-off-by: Rafael J. Wysocki Signed-off-by: Greg Kroah-Hartman --- drivers/cpufreq/cpufreq_governor.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/cpufreq/cpufreq_governor.c b/drivers/cpufreq/cpufreq_governor.c index dc9b72e25c1a..5af40ad82d23 100644 --- a/drivers/cpufreq/cpufreq_governor.c +++ b/drivers/cpufreq/cpufreq_governor.c @@ -26,7 +26,6 @@ #include #include #include -#include #include "cpufreq_governor.h" @@ -181,10 +180,8 @@ void gov_queue_work(struct dbs_data *dbs_data, struct cpufreq_policy *policy, if (!all_cpus) { __gov_queue_work(smp_processor_id(), dbs_data, delay); } else { - get_online_cpus(); for_each_cpu(i, policy->cpus) __gov_queue_work(i, dbs_data, delay); - put_online_cpus(); } } EXPORT_SYMBOL_GPL(gov_queue_work); From 88c0a794e5d9bcc29926e636cd1d6eb5c9dcb235 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Fri, 19 Jul 2013 15:37:12 +0100 Subject: [PATCH 0116/1368] arm64: mm: don't treat user cache maintenance faults as writes commit db6f41063cbdb58b14846e600e6bc3f4e4c2e888 upstream. On arm64, cache maintenance faults appear as data aborts with the CM bit set in the ESR. The WnR bit, usually used to distinguish between faulting loads and stores, always reads as 1 and (slightly confusingly) the instructions are treated as reads by the architecture. This patch fixes our fault handling code to treat cache maintenance faults in the same way as loads. Signed-off-by: Will Deacon Signed-off-by: Catalin Marinas Signed-off-by: Greg Kroah-Hartman --- arch/arm64/mm/fault.c | 46 +++++++++++++++++++------------------------ 1 file changed, 20 insertions(+), 26 deletions(-) diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c index 1426468b77f3..f51d669c8ebd 100644 --- a/arch/arm64/mm/fault.c +++ b/arch/arm64/mm/fault.c @@ -152,25 +152,8 @@ void do_bad_area(unsigned long addr, unsigned int esr, struct pt_regs *regs) #define ESR_CM (1 << 8) #define ESR_LNX_EXEC (1 << 24) -/* - * Check that the permissions on the VMA allow for the fault which occurred. - * If we encountered a write fault, we must have write permission, otherwise - * we allow any permission. - */ -static inline bool access_error(unsigned int esr, struct vm_area_struct *vma) -{ - unsigned int mask = VM_READ | VM_WRITE | VM_EXEC; - - if (esr & ESR_WRITE) - mask = VM_WRITE; - if (esr & ESR_LNX_EXEC) - mask = VM_EXEC; - - return vma->vm_flags & mask ? false : true; -} - static int __do_page_fault(struct mm_struct *mm, unsigned long addr, - unsigned int esr, unsigned int flags, + unsigned int mm_flags, unsigned long vm_flags, struct task_struct *tsk) { struct vm_area_struct *vma; @@ -188,12 +171,17 @@ static int __do_page_fault(struct mm_struct *mm, unsigned long addr, * it. */ good_area: - if (access_error(esr, vma)) { + /* + * Check that the permissions on the VMA allow for the fault which + * occurred. If we encountered a write or exec fault, we must have + * appropriate permissions, otherwise we allow any permission. + */ + if (!(vma->vm_flags & vm_flags)) { fault = VM_FAULT_BADACCESS; goto out; } - return handle_mm_fault(mm, vma, addr & PAGE_MASK, flags); + return handle_mm_fault(mm, vma, addr & PAGE_MASK, mm_flags); check_stack: if (vma->vm_flags & VM_GROWSDOWN && !expand_stack(vma, addr)) @@ -208,9 +196,15 @@ static int __kprobes do_page_fault(unsigned long addr, unsigned int esr, struct task_struct *tsk; struct mm_struct *mm; int fault, sig, code; - bool write = (esr & ESR_WRITE) && !(esr & ESR_CM); - unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE | - (write ? FAULT_FLAG_WRITE : 0); + unsigned long vm_flags = VM_READ | VM_WRITE | VM_EXEC; + unsigned int mm_flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE; + + if (esr & ESR_LNX_EXEC) { + vm_flags = VM_EXEC; + } else if ((esr & ESR_WRITE) && !(esr & ESR_CM)) { + vm_flags = VM_WRITE; + mm_flags |= FAULT_FLAG_WRITE; + } tsk = current; mm = tsk->mm; @@ -248,7 +242,7 @@ static int __kprobes do_page_fault(unsigned long addr, unsigned int esr, #endif } - fault = __do_page_fault(mm, addr, esr, flags, tsk); + fault = __do_page_fault(mm, addr, mm_flags, vm_flags, tsk); /* * If we need to retry but a fatal signal is pending, handle the @@ -265,7 +259,7 @@ static int __kprobes do_page_fault(unsigned long addr, unsigned int esr, */ perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, addr); - if (flags & FAULT_FLAG_ALLOW_RETRY) { + if (mm_flags & FAULT_FLAG_ALLOW_RETRY) { if (fault & VM_FAULT_MAJOR) { tsk->maj_flt++; perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, regs, @@ -280,7 +274,7 @@ static int __kprobes do_page_fault(unsigned long addr, unsigned int esr, * Clear FAULT_FLAG_ALLOW_RETRY to avoid any risk of * starvation. */ - flags &= ~FAULT_FLAG_ALLOW_RETRY; + mm_flags &= ~FAULT_FLAG_ALLOW_RETRY; goto retry; } } From 8b68eefae0dbd8cdddb54af08c2b424f6da6f6ba Mon Sep 17 00:00:00 2001 From: Alexandre Belloni Date: Mon, 1 Jul 2013 15:20:00 +0100 Subject: [PATCH 0117/1368] iio: Fix iio_channel_has_info commit 1c297a66654a3295ae87e2b7f3724d214eb2b5ec upstream. Since the info_mask split, iio_channel_has_info() is not working correctly. info_mask_separate and info_mask_shared_by_type, it is not possible to compare them directly with the iio_chan_info_enum enum. Correct that bit using the BIT() macro. Signed-off-by: Alexandre Belloni Signed-off-by: Jonathan Cameron Signed-off-by: Greg Kroah-Hartman --- include/linux/iio/iio.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/linux/iio/iio.h b/include/linux/iio/iio.h index 8d171f427632..3d35b7023591 100644 --- a/include/linux/iio/iio.h +++ b/include/linux/iio/iio.h @@ -211,8 +211,8 @@ struct iio_chan_spec { static inline bool iio_channel_has_info(const struct iio_chan_spec *chan, enum iio_chan_info_enum type) { - return (chan->info_mask_separate & type) | - (chan->info_mask_shared_by_type & type); + return (chan->info_mask_separate & BIT(type)) | + (chan->info_mask_shared_by_type & BIT(type)); } #define IIO_ST(si, rb, sb, sh) \ From 79d295ce5b9c322ebb40b2d7fdb9da4eee83f893 Mon Sep 17 00:00:00 2001 From: Alexandre Belloni Date: Mon, 1 Jul 2013 17:40:00 +0100 Subject: [PATCH 0118/1368] iio: inkern: fix iio_convert_raw_to_processed_unlocked commit f91d1b63a4e096d3023aaaafec9d9d3aff25997f upstream. When reading IIO_CHAN_INFO_OFFSET, the return value of iio_channel_read() for success will be IIO_VAL*, checking for 0 is not correct. Without this fix the offset applied by iio drivers will be ignored when converting a raw value to one in appropriate base units (e.g mV) in a IIO client drivers that use iio_convert_raw_to_processed including iio-hwmon. Signed-off-by: Alexandre Belloni Signed-off-by: Jonathan Cameron Signed-off-by: Greg Kroah-Hartman --- drivers/iio/inkern.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iio/inkern.c b/drivers/iio/inkern.c index 98ddc323add0..0cf5f8e06cfc 100644 --- a/drivers/iio/inkern.c +++ b/drivers/iio/inkern.c @@ -451,7 +451,7 @@ static int iio_convert_raw_to_processed_unlocked(struct iio_channel *chan, int ret; ret = iio_channel_read(chan, &offset, NULL, IIO_CHAN_INFO_OFFSET); - if (ret == 0) + if (ret >= 0) raw64 += offset; scale_type = iio_channel_read(chan, &scale_val, &scale_val2, From c38217e1b806155493c9324345f0f1e6561e55db Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Thu, 4 Jul 2013 12:54:22 +0200 Subject: [PATCH 0119/1368] ALSA: hda - Fix EAPD vmaster hook for AD1884 & co commit 8f0b3b7e222383a21f7d58bd97d5552b3a5dbced upstream. ad1884_fixup_hp_eapd() tries to set the NID for controlling the speaker EAPD from the pin configuration. But the current code can't work expectedly since it sets spec->eapd_nid before calling the generic parser where the autocfg pins are set up. This patch changes the function to set spec->eapd_nid after the generic parser call while it sets vmaster hook unconditionally. The spec->eapd_nid check is moved in the hook function itself instead. Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_analog.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/sound/pci/hda/patch_analog.c b/sound/pci/hda/patch_analog.c index 977b0d878dae..d97f0d61a15b 100644 --- a/sound/pci/hda/patch_analog.c +++ b/sound/pci/hda/patch_analog.c @@ -2112,6 +2112,9 @@ static void ad_vmaster_eapd_hook(void *private_data, int enabled) { struct hda_codec *codec = private_data; struct ad198x_spec *spec = codec->spec; + + if (!spec->eapd_nid) + return; snd_hda_codec_update_cache(codec, spec->eapd_nid, 0, AC_VERB_SET_EAPD_BTLENABLE, enabled ? 0x02 : 0x00); @@ -3601,13 +3604,16 @@ static void ad1884_fixup_hp_eapd(struct hda_codec *codec, { struct ad198x_spec *spec = codec->spec; - if (action == HDA_FIXUP_ACT_PRE_PROBE) { + switch (action) { + case HDA_FIXUP_ACT_PRE_PROBE: + spec->gen.vmaster_mute.hook = ad_vmaster_eapd_hook; + break; + case HDA_FIXUP_ACT_PROBE: if (spec->gen.autocfg.line_out_type == AUTO_PIN_SPEAKER_OUT) spec->eapd_nid = spec->gen.autocfg.line_out_pins[0]; else spec->eapd_nid = spec->gen.autocfg.speaker_pins[0]; - if (spec->eapd_nid) - spec->gen.vmaster_mute.hook = ad_vmaster_eapd_hook; + break; } } From 1d33b1e178d4d298150227227e079dd883f06f57 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 18 Jun 2013 07:55:02 +0200 Subject: [PATCH 0120/1368] ALSA: hda - Fix return value of snd_hda_check_power_state() commit 06ec56d3c60238f27bfa50d245592fccc1b4ef0f upstream. The refactoring by commit 9040d102 introduced the new function snd_hda_check_power_state(). This function is supposed to return true if the state already reached to the target state, but it actually returns false for that. An utterly stupid typo while copy & paste. Fortunately this didn't influence on much behavior because powering up AFG usually powers up the child widgets, too. But the finer power control must have been broken by this bug. Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/hda_local.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/pci/hda/hda_local.h b/sound/pci/hda/hda_local.h index e0bf7534fa1f..29ed7d9b27e4 100644 --- a/sound/pci/hda/hda_local.h +++ b/sound/pci/hda/hda_local.h @@ -667,7 +667,7 @@ snd_hda_check_power_state(struct hda_codec *codec, hda_nid_t nid, if (state & AC_PWRST_ERROR) return true; state = (state >> 4) & 0x0f; - return (state != target_state); + return (state == target_state); } unsigned int snd_hda_codec_eapd_power_filter(struct hda_codec *codec, From 27035caa37556524ef30311438cefaf1a80a5364 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 18 Jun 2013 16:14:22 +0200 Subject: [PATCH 0121/1368] ALSA: hda - Cache the MUX selection for generic HDMI commit bddee96b5d0db869f47b195fe48c614ca824203c upstream. When a selection to a converter MUX is changed in hdmi_pcm_open(), it should be cached so that the given connection can be restored properly at PM resume. We need just to replace the corresponding snd_hda_codec_write() call with snd_hda_codec_write_cache(). Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_hdmi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/pci/hda/patch_hdmi.c b/sound/pci/hda/patch_hdmi.c index e12f7a030c58..c018fb71cafb 100644 --- a/sound/pci/hda/patch_hdmi.c +++ b/sound/pci/hda/patch_hdmi.c @@ -1146,7 +1146,7 @@ static int hdmi_pcm_open(struct hda_pcm_stream *hinfo, per_cvt->assigned = 1; hinfo->nid = per_cvt->cvt_nid; - snd_hda_codec_write(codec, per_pin->pin_nid, 0, + snd_hda_codec_write_cache(codec, per_pin->pin_nid, 0, AC_VERB_SET_CONNECT_SEL, mux_idx); snd_hda_spdif_ctls_assign(codec, pin_idx, per_cvt->cvt_nid); From 1f563ec40d24e0af579911dfbd6ac8856889bb89 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Wed, 19 Jun 2013 07:54:09 +0200 Subject: [PATCH 0122/1368] ALSA: hda - Fix missing Mic Boost controls for VIA codecs commit d045c5dc43d829df9f067d363c3b42b14dacf434 upstream. Some VIA codecs like VT1708S have Mic boost amps in the mic pins but they aren't exposed in the capability bits. In the past driver code, we override the pin caps and create mic boost controls forcibly. While transition to the generic parser, we lost the mic boost controls although the pin caps are still overridden, because the generic parser code checks the widget caps, too. So this patch adds a new helper function to allow the override of the given widget capability bits, and makes VIA codecs driver to add the missing input-amp capability bit. Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=59861 Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/hda_local.h | 8 ++++++++ sound/pci/hda/patch_via.c | 2 ++ 2 files changed, 10 insertions(+) diff --git a/sound/pci/hda/hda_local.h b/sound/pci/hda/hda_local.h index 29ed7d9b27e4..2e7493ef8ee0 100644 --- a/sound/pci/hda/hda_local.h +++ b/sound/pci/hda/hda_local.h @@ -562,6 +562,14 @@ static inline unsigned int get_wcaps_channels(u32 wcaps) return chans; } +static inline void snd_hda_override_wcaps(struct hda_codec *codec, + hda_nid_t nid, u32 val) +{ + if (nid >= codec->start_nid && + nid < codec->start_nid + codec->num_nodes) + codec->wcaps[nid - codec->start_nid] = val; +} + u32 query_amp_caps(struct hda_codec *codec, hda_nid_t nid, int direction); int snd_hda_override_amp_caps(struct hda_codec *codec, hda_nid_t nid, int dir, unsigned int caps); diff --git a/sound/pci/hda/patch_via.c b/sound/pci/hda/patch_via.c index e5245544eb52..aed19c3f8466 100644 --- a/sound/pci/hda/patch_via.c +++ b/sound/pci/hda/patch_via.c @@ -910,6 +910,8 @@ static const struct hda_verb vt1708S_init_verbs[] = { static void override_mic_boost(struct hda_codec *codec, hda_nid_t pin, int offset, int num_steps, int step_size) { + snd_hda_override_wcaps(codec, pin, + get_wcaps(codec, pin) | AC_WCAP_IN_AMP); snd_hda_override_amp_caps(codec, pin, HDA_INPUT, (offset << AC_AMPCAP_OFFSET_SHIFT) | (num_steps << AC_AMPCAP_NUM_STEPS_SHIFT) | From 3a72cf75a1e32aedfd68b4467e2677bf1c0eb5d4 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Fri, 28 Jun 2013 11:51:32 +0200 Subject: [PATCH 0123/1368] ALSA: hda - Fix the max length of control name in generic parser commit 0c055b3413868227f2e85701c4e6938c9581f0e2 upstream. add_control_with_pfx() in hda_generic.c assumes a shorter name string for the control element, and this resulted in the truncation of the long but valid string like "Headphone Surround Switch" in the middle. This patch aligns the max size to the actual limit of snd_ctl_elem_id, 44. Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/hda_generic.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/pci/hda/hda_generic.c b/sound/pci/hda/hda_generic.c index 4b1524a861f3..24400cffb8f3 100644 --- a/sound/pci/hda/hda_generic.c +++ b/sound/pci/hda/hda_generic.c @@ -840,7 +840,7 @@ static int add_control_with_pfx(struct hda_gen_spec *spec, int type, const char *pfx, const char *dir, const char *sfx, int cidx, unsigned long val) { - char name[32]; + char name[44]; snprintf(name, sizeof(name), "%s %s %s", pfx, dir, sfx); if (!add_control(spec, type, name, cidx, val)) return -ENOMEM; From 782e9cac197a38391820b425a4645e5bda7d121b Mon Sep 17 00:00:00 2001 From: Aaron Plattner Date: Fri, 12 Jul 2013 11:01:37 -0700 Subject: [PATCH 0124/1368] ALSA: hda - Add new GPU codec ID to snd-hda commit d52392b1a80458c0510810789c7db4a39b88022a upstream. Vendor ID 0x10de0060 is used by a yet-to-be-named GPU chip. Reviewed-by: Andy Ritger Signed-off-by: Aaron Plattner Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_hdmi.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sound/pci/hda/patch_hdmi.c b/sound/pci/hda/patch_hdmi.c index c018fb71cafb..496d7f21d3e5 100644 --- a/sound/pci/hda/patch_hdmi.c +++ b/sound/pci/hda/patch_hdmi.c @@ -2536,6 +2536,7 @@ static const struct hda_codec_preset snd_hda_preset_hdmi[] = { { .id = 0x10de0043, .name = "GPU 43 HDMI/DP", .patch = patch_generic_hdmi }, { .id = 0x10de0044, .name = "GPU 44 HDMI/DP", .patch = patch_generic_hdmi }, { .id = 0x10de0051, .name = "GPU 51 HDMI/DP", .patch = patch_generic_hdmi }, +{ .id = 0x10de0060, .name = "GPU 60 HDMI/DP", .patch = patch_generic_hdmi }, { .id = 0x10de0067, .name = "MCP67 HDMI", .patch = patch_nvhdmi_2ch }, { .id = 0x10de8001, .name = "MCP73 HDMI", .patch = patch_nvhdmi_2ch }, { .id = 0x11069f80, .name = "VX900 HDMI/DP", .patch = patch_via_hdmi }, @@ -2588,6 +2589,7 @@ MODULE_ALIAS("snd-hda-codec-id:10de0042"); MODULE_ALIAS("snd-hda-codec-id:10de0043"); MODULE_ALIAS("snd-hda-codec-id:10de0044"); MODULE_ALIAS("snd-hda-codec-id:10de0051"); +MODULE_ALIAS("snd-hda-codec-id:10de0060"); MODULE_ALIAS("snd-hda-codec-id:10de0067"); MODULE_ALIAS("snd-hda-codec-id:10de8001"); MODULE_ALIAS("snd-hda-codec-id:11069f80"); From 409972c1c8678bb0c28eb0417054e2794dd59ec9 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 16 Jul 2013 12:17:49 +0200 Subject: [PATCH 0125/1368] ALSA: seq-oss: Initialize MIDI clients asynchronously commit 256ca9c3ad5013ff8a8f165e5a82fab437628c8e upstream. We've got bug reports that the module loading stuck on Debian system with 3.10 kernel. The debugging session revealed that the initial registration of OSS sequencer clients stuck at module loading time, which involves again with request_module() at the init phase. This is triggered only by special --install stuff Debian is using, but it's still not good to have such loops. As a workaround, call the registration part asynchronously. This is a better approach irrespective of the hang fix, in anyway. Reported-and-tested-by: Philipp Matthias Hahn Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/core/seq/oss/seq_oss_init.c | 16 +++++++++++++--- sound/core/seq/oss/seq_oss_midi.c | 2 +- 2 files changed, 14 insertions(+), 4 deletions(-) diff --git a/sound/core/seq/oss/seq_oss_init.c b/sound/core/seq/oss/seq_oss_init.c index e3cb46fef2c7..b3f39b5ed742 100644 --- a/sound/core/seq/oss/seq_oss_init.c +++ b/sound/core/seq/oss/seq_oss_init.c @@ -31,6 +31,7 @@ #include #include #include +#include /* * common variables @@ -60,6 +61,14 @@ static void free_devinfo(void *private); #define call_ctl(type,rec) snd_seq_kernel_client_ctl(system_client, type, rec) +/* call snd_seq_oss_midi_lookup_ports() asynchronously */ +static void async_call_lookup_ports(struct work_struct *work) +{ + snd_seq_oss_midi_lookup_ports(system_client); +} + +static DECLARE_WORK(async_lookup_work, async_call_lookup_ports); + /* * create sequencer client for OSS sequencer */ @@ -85,9 +94,6 @@ snd_seq_oss_create_client(void) system_client = rc; debug_printk(("new client = %d\n", rc)); - /* look up midi devices */ - snd_seq_oss_midi_lookup_ports(system_client); - /* create annoucement receiver port */ memset(port, 0, sizeof(*port)); strcpy(port->name, "Receiver"); @@ -115,6 +121,9 @@ snd_seq_oss_create_client(void) } rc = 0; + /* look up midi devices */ + schedule_work(&async_lookup_work); + __error: kfree(port); return rc; @@ -160,6 +169,7 @@ receive_announce(struct snd_seq_event *ev, int direct, void *private, int atomic int snd_seq_oss_delete_client(void) { + cancel_work_sync(&async_lookup_work); if (system_client >= 0) snd_seq_delete_kernel_client(system_client); diff --git a/sound/core/seq/oss/seq_oss_midi.c b/sound/core/seq/oss/seq_oss_midi.c index 677dc84590c7..862d84893ee8 100644 --- a/sound/core/seq/oss/seq_oss_midi.c +++ b/sound/core/seq/oss/seq_oss_midi.c @@ -72,7 +72,7 @@ static int send_midi_event(struct seq_oss_devinfo *dp, struct snd_seq_event *ev, * look up the existing ports * this looks a very exhausting job. */ -int __init +int snd_seq_oss_midi_lookup_ports(int client) { struct snd_seq_client_info *clinfo; From 2bc2f7d622af45d07980a83586c95c7530e4f6cc Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Thu, 11 Jul 2013 17:57:55 +0200 Subject: [PATCH 0126/1368] ALSA: 6fire: Fix unlocked snd_pcm_stop() call commit 5b9ab3f7324a1b94a5a5a76d44cf92dfeb3b5e80 upstream. snd_pcm_stop() must be called in the PCM substream lock context. Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/usb/6fire/pcm.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/sound/usb/6fire/pcm.c b/sound/usb/6fire/pcm.c index 40dd50a80f55..8221ff2f209f 100644 --- a/sound/usb/6fire/pcm.c +++ b/sound/usb/6fire/pcm.c @@ -641,17 +641,25 @@ int usb6fire_pcm_init(struct sfire_chip *chip) void usb6fire_pcm_abort(struct sfire_chip *chip) { struct pcm_runtime *rt = chip->pcm; + unsigned long flags; int i; if (rt) { rt->panic = true; - if (rt->playback.instance) + if (rt->playback.instance) { + snd_pcm_stream_lock_irqsave(rt->playback.instance, flags); snd_pcm_stop(rt->playback.instance, SNDRV_PCM_STATE_XRUN); - if (rt->capture.instance) + snd_pcm_stream_unlock_irqrestore(rt->playback.instance, flags); + } + + if (rt->capture.instance) { + snd_pcm_stream_lock_irqsave(rt->capture.instance, flags); snd_pcm_stop(rt->capture.instance, SNDRV_PCM_STATE_XRUN); + snd_pcm_stream_unlock_irqrestore(rt->capture.instance, flags); + } for (i = 0; i < PCM_N_URBS; i++) { usb_poison_urb(&rt->in_urbs[i].instance); From c9c8dd7d3cacecd3a3f0e6e49c0a0f8f93d3d04e Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Thu, 11 Jul 2013 17:58:25 +0200 Subject: [PATCH 0127/1368] ALSA: ua101: Fix unlocked snd_pcm_stop() call commit 9538aa46c2427d6782aa10036c4da4c541605e0e upstream. snd_pcm_stop() must be called in the PCM substream lock context. Acked-by: Clemens Ladisch Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/usb/misc/ua101.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/sound/usb/misc/ua101.c b/sound/usb/misc/ua101.c index 6ad617b94732..76d832908fe0 100644 --- a/sound/usb/misc/ua101.c +++ b/sound/usb/misc/ua101.c @@ -613,14 +613,24 @@ static int start_usb_playback(struct ua101 *ua) static void abort_alsa_capture(struct ua101 *ua) { - if (test_bit(ALSA_CAPTURE_RUNNING, &ua->states)) + unsigned long flags; + + if (test_bit(ALSA_CAPTURE_RUNNING, &ua->states)) { + snd_pcm_stream_lock_irqsave(ua->capture.substream, flags); snd_pcm_stop(ua->capture.substream, SNDRV_PCM_STATE_XRUN); + snd_pcm_stream_unlock_irqrestore(ua->capture.substream, flags); + } } static void abort_alsa_playback(struct ua101 *ua) { - if (test_bit(ALSA_PLAYBACK_RUNNING, &ua->states)) + unsigned long flags; + + if (test_bit(ALSA_PLAYBACK_RUNNING, &ua->states)) { + snd_pcm_stream_lock_irqsave(ua->playback.substream, flags); snd_pcm_stop(ua->playback.substream, SNDRV_PCM_STATE_XRUN); + snd_pcm_stream_unlock_irqrestore(ua->playback.substream, flags); + } } static int set_stream_hw(struct ua101 *ua, struct snd_pcm_substream *substream, From 2306396c952129dcf127d47c0383ae28c7288ed0 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Thu, 11 Jul 2013 17:59:33 +0200 Subject: [PATCH 0128/1368] ALSA: pxa2xx: Fix unlocked snd_pcm_stop() call commit 46f6c1aaf790be9ea3c8ddfc8f235a5f677d08e2 upstream. snd_pcm_stop() must be called in the PCM substream lock context. Acked-by: Mark Brown Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/arm/pxa2xx-pcm-lib.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sound/arm/pxa2xx-pcm-lib.c b/sound/arm/pxa2xx-pcm-lib.c index 76e0d5695075..823359ed95e1 100644 --- a/sound/arm/pxa2xx-pcm-lib.c +++ b/sound/arm/pxa2xx-pcm-lib.c @@ -166,7 +166,9 @@ void pxa2xx_pcm_dma_irq(int dma_ch, void *dev_id) } else { printk(KERN_ERR "%s: DMA error on channel %d (DCSR=%#x)\n", rtd->params->name, dma_ch, dcsr); + snd_pcm_stream_lock(substream); snd_pcm_stop(substream, SNDRV_PCM_STATE_XRUN); + snd_pcm_stream_unlock(substream); } } EXPORT_SYMBOL(pxa2xx_pcm_dma_irq); From 8268d1c754169393118ae847cf6741d777e976a4 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Thu, 11 Jul 2013 17:56:56 +0200 Subject: [PATCH 0129/1368] ALSA: atiixp: Fix unlocked snd_pcm_stop() call commit cc7282b8d5abbd48c81d1465925d464d9e3eaa8f upstream. snd_pcm_stop() must be called in the PCM substream lock context. Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/atiixp.c | 2 ++ sound/pci/atiixp_modem.c | 2 ++ 2 files changed, 4 insertions(+) diff --git a/sound/pci/atiixp.c b/sound/pci/atiixp.c index 6e78c6789858..819430ac6b3b 100644 --- a/sound/pci/atiixp.c +++ b/sound/pci/atiixp.c @@ -689,7 +689,9 @@ static void snd_atiixp_xrun_dma(struct atiixp *chip, struct atiixp_dma *dma) if (! dma->substream || ! dma->running) return; snd_printdd("atiixp: XRUN detected (DMA %d)\n", dma->ops->type); + snd_pcm_stream_lock(dma->substream); snd_pcm_stop(dma->substream, SNDRV_PCM_STATE_XRUN); + snd_pcm_stream_unlock(dma->substream); } /* diff --git a/sound/pci/atiixp_modem.c b/sound/pci/atiixp_modem.c index d0bec7ba3b0d..57f41820263f 100644 --- a/sound/pci/atiixp_modem.c +++ b/sound/pci/atiixp_modem.c @@ -638,7 +638,9 @@ static void snd_atiixp_xrun_dma(struct atiixp_modem *chip, if (! dma->substream || ! dma->running) return; snd_printdd("atiixp-modem: XRUN detected (DMA %d)\n", dma->ops->type); + snd_pcm_stream_lock(dma->substream); snd_pcm_stop(dma->substream, SNDRV_PCM_STATE_XRUN); + snd_pcm_stream_unlock(dma->substream); } /* From 4b06cd616f646a4257b39a1cc9a10410e7c90088 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Thu, 11 Jul 2013 17:55:57 +0200 Subject: [PATCH 0130/1368] ALSA: asihpi: Fix unlocked snd_pcm_stop() call commit 60478295d6876619f8f47f6d1a5c25eaade69ee3 upstream. snd_pcm_stop() must be called in the PCM substream lock context. Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/asihpi/asihpi.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/sound/pci/asihpi/asihpi.c b/sound/pci/asihpi/asihpi.c index fbc17203613c..a471d821c608 100644 --- a/sound/pci/asihpi/asihpi.c +++ b/sound/pci/asihpi/asihpi.c @@ -769,7 +769,10 @@ static void snd_card_asihpi_timer_function(unsigned long data) s->number); ds->drained_count++; if (ds->drained_count > 20) { + unsigned long flags; + snd_pcm_stream_lock_irqsave(s, flags); snd_pcm_stop(s, SNDRV_PCM_STATE_XRUN); + snd_pcm_stream_unlock_irqrestore(s, flags); continue; } } else { From 1ef08eb2a9a50d9b4dd119bda4f747e1b922a4fc Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Thu, 11 Jul 2013 17:58:47 +0200 Subject: [PATCH 0131/1368] ALSA: usx2y: Fix unlocked snd_pcm_stop() call commit 5be1efb4c2ed79c3d7c0cbcbecae768377666e84 upstream. snd_pcm_stop() must be called in the PCM substream lock context. Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/usb/usx2y/usbusx2yaudio.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/sound/usb/usx2y/usbusx2yaudio.c b/sound/usb/usx2y/usbusx2yaudio.c index b37653247ef4..0ce903375eaf 100644 --- a/sound/usb/usx2y/usbusx2yaudio.c +++ b/sound/usb/usx2y/usbusx2yaudio.c @@ -273,7 +273,11 @@ static void usX2Y_clients_stop(struct usX2Ydev *usX2Y) struct snd_usX2Y_substream *subs = usX2Y->subs[s]; if (subs) { if (atomic_read(&subs->state) >= state_PRERUNNING) { + unsigned long flags; + + snd_pcm_stream_lock_irqsave(subs->pcm_substream, flags); snd_pcm_stop(subs->pcm_substream, SNDRV_PCM_STATE_XRUN); + snd_pcm_stream_unlock_irqrestore(subs->pcm_substream, flags); } for (u = 0; u < NRURBS; u++) { struct urb *urb = subs->urb[u]; From e00354825e75a338ced4f4eb16cf08182c116984 Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Sat, 22 Jun 2013 16:15:31 -0700 Subject: [PATCH 0132/1368] hwmon: (nct6775) Fix temperature alarm attributes commit b1d2bff6a61140454b9d203519cc686a2e9ef32f upstream. Driver displays wrong alarms for temperature attributes. Turns out that temperature alarm bits are not fixed, but determined by temperature source mapping. To fix the problem, walk through the temperature sources to determine the correct alarm bit associated with a given attribute. Signed-off-by: Guenter Roeck Signed-off-by: Greg Kroah-Hartman --- drivers/hwmon/nct6775.c | 80 +++++++++++++++++++++++++++++------------ 1 file changed, 57 insertions(+), 23 deletions(-) diff --git a/drivers/hwmon/nct6775.c b/drivers/hwmon/nct6775.c index 04638aee9039..2405ab439e80 100644 --- a/drivers/hwmon/nct6775.c +++ b/drivers/hwmon/nct6775.c @@ -625,6 +625,7 @@ struct nct6775_data { u8 has_fan_min; /* some fans don't have min register */ bool has_fan_div; + u8 num_temp_alarms; /* 2 or 3 */ u8 temp_fixed_num; /* 3 or 6 */ u8 temp_type[NUM_TEMP_FIXED]; s8 temp_offset[NUM_TEMP_FIXED]; @@ -1193,6 +1194,42 @@ show_alarm(struct device *dev, struct device_attribute *attr, char *buf) (unsigned int)((data->alarms >> nr) & 0x01)); } +static int find_temp_source(struct nct6775_data *data, int index, int count) +{ + int source = data->temp_src[index]; + int nr; + + for (nr = 0; nr < count; nr++) { + int src; + + src = nct6775_read_value(data, + data->REG_TEMP_SOURCE[nr]) & 0x1f; + if (src == source) + return nr; + } + return -1; +} + +static ssize_t +show_temp_alarm(struct device *dev, struct device_attribute *attr, char *buf) +{ + struct sensor_device_attribute *sattr = to_sensor_dev_attr(attr); + struct nct6775_data *data = nct6775_update_device(dev); + unsigned int alarm = 0; + int nr; + + /* + * For temperatures, there is no fixed mapping from registers to alarm + * bits. Alarm bits are determined by the temperature source mapping. + */ + nr = find_temp_source(data, sattr->index, data->num_temp_alarms); + if (nr >= 0) { + int bit = data->ALARM_BITS[nr + TEMP_ALARM_BASE]; + alarm = (data->alarms >> bit) & 0x01; + } + return sprintf(buf, "%u\n", alarm); +} + static SENSOR_DEVICE_ATTR_2(in0_input, S_IRUGO, show_in_reg, NULL, 0, 0); static SENSOR_DEVICE_ATTR_2(in1_input, S_IRUGO, show_in_reg, NULL, 1, 0); static SENSOR_DEVICE_ATTR_2(in2_input, S_IRUGO, show_in_reg, NULL, 2, 0); @@ -1874,22 +1911,18 @@ static struct sensor_device_attribute sda_temp_type[] = { }; static struct sensor_device_attribute sda_temp_alarm[] = { - SENSOR_ATTR(temp1_alarm, S_IRUGO, show_alarm, NULL, - TEMP_ALARM_BASE), - SENSOR_ATTR(temp2_alarm, S_IRUGO, show_alarm, NULL, - TEMP_ALARM_BASE + 1), - SENSOR_ATTR(temp3_alarm, S_IRUGO, show_alarm, NULL, - TEMP_ALARM_BASE + 2), - SENSOR_ATTR(temp4_alarm, S_IRUGO, show_alarm, NULL, - TEMP_ALARM_BASE + 3), - SENSOR_ATTR(temp5_alarm, S_IRUGO, show_alarm, NULL, - TEMP_ALARM_BASE + 4), - SENSOR_ATTR(temp6_alarm, S_IRUGO, show_alarm, NULL, - TEMP_ALARM_BASE + 5), + SENSOR_ATTR(temp1_alarm, S_IRUGO, show_temp_alarm, NULL, 0), + SENSOR_ATTR(temp2_alarm, S_IRUGO, show_temp_alarm, NULL, 1), + SENSOR_ATTR(temp3_alarm, S_IRUGO, show_temp_alarm, NULL, 2), + SENSOR_ATTR(temp4_alarm, S_IRUGO, show_temp_alarm, NULL, 3), + SENSOR_ATTR(temp5_alarm, S_IRUGO, show_temp_alarm, NULL, 4), + SENSOR_ATTR(temp6_alarm, S_IRUGO, show_temp_alarm, NULL, 5), + SENSOR_ATTR(temp7_alarm, S_IRUGO, show_temp_alarm, NULL, 6), + SENSOR_ATTR(temp8_alarm, S_IRUGO, show_temp_alarm, NULL, 7), + SENSOR_ATTR(temp9_alarm, S_IRUGO, show_temp_alarm, NULL, 8), + SENSOR_ATTR(temp10_alarm, S_IRUGO, show_temp_alarm, NULL, 9), }; -#define NUM_TEMP_ALARM ARRAY_SIZE(sda_temp_alarm) - static ssize_t show_pwm_mode(struct device *dev, struct device_attribute *attr, char *buf) { @@ -3215,13 +3248,11 @@ static void nct6775_device_remove_files(struct device *dev) device_remove_file(dev, &sda_temp_max[i].dev_attr); device_remove_file(dev, &sda_temp_max_hyst[i].dev_attr); device_remove_file(dev, &sda_temp_crit[i].dev_attr); + device_remove_file(dev, &sda_temp_alarm[i].dev_attr); if (!(data->have_temp_fixed & (1 << i))) continue; device_remove_file(dev, &sda_temp_type[i].dev_attr); device_remove_file(dev, &sda_temp_offset[i].dev_attr); - if (i >= NUM_TEMP_ALARM) - continue; - device_remove_file(dev, &sda_temp_alarm[i].dev_attr); } device_remove_file(dev, &sda_caseopen[0].dev_attr); @@ -3419,6 +3450,7 @@ static int nct6775_probe(struct platform_device *pdev) data->auto_pwm_num = 6; data->has_fan_div = true; data->temp_fixed_num = 3; + data->num_temp_alarms = 3; data->ALARM_BITS = NCT6775_ALARM_BITS; @@ -3483,6 +3515,7 @@ static int nct6775_probe(struct platform_device *pdev) data->auto_pwm_num = 4; data->has_fan_div = false; data->temp_fixed_num = 3; + data->num_temp_alarms = 3; data->ALARM_BITS = NCT6776_ALARM_BITS; @@ -3547,6 +3580,7 @@ static int nct6775_probe(struct platform_device *pdev) data->auto_pwm_num = 4; data->has_fan_div = false; data->temp_fixed_num = 6; + data->num_temp_alarms = 2; data->ALARM_BITS = NCT6779_ALARM_BITS; @@ -3897,6 +3931,12 @@ static int nct6775_probe(struct platform_device *pdev) if (err) goto exit_remove; } + if (find_temp_source(data, i, data->num_temp_alarms) >= 0) { + err = device_create_file(dev, + &sda_temp_alarm[i].dev_attr); + if (err) + goto exit_remove; + } if (!(data->have_temp_fixed & (1 << i))) continue; err = device_create_file(dev, &sda_temp_type[i].dev_attr); @@ -3905,12 +3945,6 @@ static int nct6775_probe(struct platform_device *pdev) err = device_create_file(dev, &sda_temp_offset[i].dev_attr); if (err) goto exit_remove; - if (i >= NUM_TEMP_ALARM || - data->ALARM_BITS[TEMP_ALARM_BASE + i] < 0) - continue; - err = device_create_file(dev, &sda_temp_alarm[i].dev_attr); - if (err) - goto exit_remove; } for (i = 0; i < ARRAY_SIZE(sda_caseopen); i++) { From bde3f4bb512c34609becce11f23ed6f89e3f488f Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Sun, 23 Jun 2013 13:04:04 -0700 Subject: [PATCH 0133/1368] hwmon: (nct6775) Drop unsupported fan alarm attributes for NCT6775 commit 41fa9a944fce1d7efd5ee3d50ac85b92f42dcc3d upstream. NCT6775 does not support alarms for fans 4 and 5. Drop the attributes. Signed-off-by: Guenter Roeck Signed-off-by: Greg Kroah-Hartman --- drivers/hwmon/nct6775.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/drivers/hwmon/nct6775.c b/drivers/hwmon/nct6775.c index 2405ab439e80..99cec1825420 100644 --- a/drivers/hwmon/nct6775.c +++ b/drivers/hwmon/nct6775.c @@ -199,7 +199,7 @@ static const s8 NCT6775_ALARM_BITS[] = { 0, 1, 2, 3, 8, 21, 20, 16, /* in0.. in7 */ 17, -1, -1, -1, -1, -1, -1, /* in8..in14 */ -1, /* unused */ - 6, 7, 11, 10, 23, /* fan1..fan5 */ + 6, 7, 11, -1, -1, /* fan1..fan5 */ -1, -1, -1, /* unused */ 4, 5, 13, -1, -1, -1, /* temp1..temp6 */ 12, -1 }; /* intrusion0, intrusion1 */ @@ -3877,10 +3877,12 @@ static int nct6775_probe(struct platform_device *pdev) &sda_fan_input[i].dev_attr); if (err) goto exit_remove; - err = device_create_file(dev, - &sda_fan_alarm[i].dev_attr); - if (err) - goto exit_remove; + if (data->ALARM_BITS[FAN_ALARM_BASE + i] >= 0) { + err = device_create_file(dev, + &sda_fan_alarm[i].dev_attr); + if (err) + goto exit_remove; + } if (data->kind != nct6776 && data->kind != nct6779) { err = device_create_file(dev, From fe1ebd057ddfb37a23cc37090d427fb0a68d3000 Mon Sep 17 00:00:00 2001 From: Sergei Shtylyov Date: Sun, 23 Jun 2013 23:25:04 +0400 Subject: [PATCH 0134/1368] libata-zpodd: must use ata_tf_init() commit d0887c43f51c308b01605346e55d906ba858a6f9 upstream. There are some SATA controllers which have both devices 0 and 1 but this module just zeroes out taskfile and sets then ATA_TFLAG_DEVICE (not sure that's needed) which could lead to a wrong device being selected just before issuing command. Thus we should call ata_tf_init() which sets up the device register value properly, like all other users of ata_exec_internal() do... Signed-off-by: Sergei Shtylyov Signed-off-by: Tejun Heo Signed-off-by: Greg Kroah-Hartman --- drivers/ata/libata-zpodd.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/ata/libata-zpodd.c b/drivers/ata/libata-zpodd.c index 90b159b740b3..cd8daf47188b 100644 --- a/drivers/ata/libata-zpodd.c +++ b/drivers/ata/libata-zpodd.c @@ -32,13 +32,14 @@ struct zpodd { static int eject_tray(struct ata_device *dev) { - struct ata_taskfile tf = {}; + struct ata_taskfile tf; const char cdb[] = { GPCMD_START_STOP_UNIT, 0, 0, 0, 0x02, /* LoEj */ 0, 0, 0, 0, 0, 0, 0, }; + ata_tf_init(dev, &tf); tf.flags = ATA_TFLAG_ISADDR | ATA_TFLAG_DEVICE; tf.command = ATA_CMD_PACKET; tf.protocol = ATAPI_PROT_NODATA; @@ -52,8 +53,7 @@ static enum odd_mech_type zpodd_get_mech_type(struct ata_device *dev) char buf[16]; unsigned int ret; struct rm_feature_desc *desc = (void *)(buf + 8); - struct ata_taskfile tf = {}; - + struct ata_taskfile tf; char cdb[] = { GPCMD_GET_CONFIGURATION, 2, /* only 1 feature descriptor requested */ 0, 3, /* 3, removable medium feature */ @@ -62,6 +62,7 @@ static enum odd_mech_type zpodd_get_mech_type(struct ata_device *dev) 0, 0, 0, }; + ata_tf_init(dev, &tf); tf.flags = ATA_TFLAG_ISADDR | ATA_TFLAG_DEVICE; tf.command = ATA_CMD_PACKET; tf.protocol = ATAPI_PROT_PIO; From 64a03b5c4206bb104ea69efed3c8189427fcedfe Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 11 Jun 2013 00:11:36 -0700 Subject: [PATCH 0135/1368] libata: skip SRST for all SIMG [34]7x port-multipliers commit 7a87718d92760fc688628ad6a430643dafa16f1f upstream. For some reason, a lot of port-multipliers have issues with softreset. SIMG [34]7x series port-multipliers have been quite erratic in this regard. I recall that it was better with some firmware revisions and the current list of quirks worked fine for a while. I think it got worse with later firmwares or maybe my test coverage wasn't good enough. Anyways, HPA is reporting that his 3726 setup suffers SRST failures and then the PMP gets confused and fails to probe the last port. The hope was that we try to stick to the standard as much as possible and soonish the PMPs and their firmwares will improve in quality, so the quirk list was kept to minimum. Well, it seems like that's never gonna happen. Let's set NO_SRST for all [34]7x PMPs so that whatever remaining userbase of the device suffer the least. Maybe we should do the same for 57xx's but unfortunately I don't have any device left to test and I'm not even sure 57xx's have ever been made widely available, so let's leave those alone for now. Signed-off-by: Tejun Heo Reported-by: "H. Peter Anvin" Signed-off-by: Greg Kroah-Hartman --- drivers/ata/libata-pmp.c | 33 +++++++++++++++++---------------- 1 file changed, 17 insertions(+), 16 deletions(-) diff --git a/drivers/ata/libata-pmp.c b/drivers/ata/libata-pmp.c index 61c59ee45ce9..1c41722bb7e2 100644 --- a/drivers/ata/libata-pmp.c +++ b/drivers/ata/libata-pmp.c @@ -389,9 +389,13 @@ static void sata_pmp_quirks(struct ata_port *ap) /* link reports offline after LPM */ link->flags |= ATA_LFLAG_NO_LPM; - /* Class code report is unreliable. */ + /* + * Class code report is unreliable and SRST times + * out under certain configurations. + */ if (link->pmp < 5) - link->flags |= ATA_LFLAG_ASSUME_ATA; + link->flags |= ATA_LFLAG_NO_SRST | + ATA_LFLAG_ASSUME_ATA; /* port 5 is for SEMB device and it doesn't like SRST */ if (link->pmp == 5) @@ -399,20 +403,17 @@ static void sata_pmp_quirks(struct ata_port *ap) ATA_LFLAG_ASSUME_SEMB; } } else if (vendor == 0x1095 && devid == 0x4723) { - /* sil4723 quirks */ - ata_for_each_link(link, ap, EDGE) { - /* link reports offline after LPM */ - link->flags |= ATA_LFLAG_NO_LPM; - - /* class code report is unreliable */ - if (link->pmp < 2) - link->flags |= ATA_LFLAG_ASSUME_ATA; - - /* the config device at port 2 locks up on SRST */ - if (link->pmp == 2) - link->flags |= ATA_LFLAG_NO_SRST | - ATA_LFLAG_ASSUME_ATA; - } + /* + * sil4723 quirks + * + * Link reports offline after LPM. Class code report is + * unreliable. SIMG PMPs never got SRST reliable and the + * config device at port 2 locks up on SRST. + */ + ata_for_each_link(link, ap, EDGE) + link->flags |= ATA_LFLAG_NO_LPM | + ATA_LFLAG_NO_SRST | + ATA_LFLAG_ASSUME_ATA; } else if (vendor == 0x1095 && devid == 0x4726) { /* sil4726 quirks */ ata_for_each_link(link, ap, EDGE) { From b568411d97ec8c91eef5e512b5b2ec5df050c9c1 Mon Sep 17 00:00:00 2001 From: Seth Heasley Date: Wed, 19 Jun 2013 16:25:37 -0700 Subject: [PATCH 0136/1368] ata_piix: IDE-mode SATA patch for Intel Coleto Creek DeviceIDs commit c7e8695bfa0611b39493a9dfe8bab9f63f9809bd upstream. This patch adds the IDE-mode SATA DeviceIDs for the Intel Coleto Creek PCH. Signed-off-by: Seth Heasley Signed-off-by: Tejun Heo Signed-off-by: Greg Kroah-Hartman --- drivers/ata/ata_piix.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/ata/ata_piix.c b/drivers/ata/ata_piix.c index 9a8a674e8fac..8eae65905750 100644 --- a/drivers/ata/ata_piix.c +++ b/drivers/ata/ata_piix.c @@ -338,6 +338,8 @@ static const struct pci_device_id piix_pci_tbl[] = { /* SATA Controller IDE (BayTrail) */ { 0x8086, 0x0F20, PCI_ANY_ID, PCI_ANY_ID, 0, 0, ich8_2port_sata_byt }, { 0x8086, 0x0F21, PCI_ANY_ID, PCI_ANY_ID, 0, 0, ich8_2port_sata_byt }, + /* SATA Controller IDE (Coleto Creek) */ + { 0x8086, 0x23a6, PCI_ANY_ID, PCI_ANY_ID, 0, 0, ich8_2port_sata }, { } /* terminate list */ }; From 31c1a76deb262a074b209ad14a35bef7f5b9ab81 Mon Sep 17 00:00:00 2001 From: Mark Langsdorf Date: Mon, 3 Jun 2013 08:22:54 -0500 Subject: [PATCH 0137/1368] sata_highbank: increase retry count but shorten duration for Calxeda controller commit ddfef5de3d716f77bad32dbbba6b280158dfd721 upstream. Increase the retry count for the hard reset function to 100 but shorten the time out period to 500 ms. See the comment for ahci_highbank_hardreset for the reasons why those vaulues were chosen. Signed-off-by: Mark Langsdorf Signed-off-by: Tejun Heo Signed-off-by: Greg Kroah-Hartman --- drivers/ata/sata_highbank.c | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/drivers/ata/sata_highbank.c b/drivers/ata/sata_highbank.c index b20aa96b958d..c846fd3c5c09 100644 --- a/drivers/ata/sata_highbank.c +++ b/drivers/ata/sata_highbank.c @@ -196,10 +196,26 @@ static int highbank_initialize_phys(struct device *dev, void __iomem *addr) return 0; } +/* + * The Calxeda SATA phy intermittently fails to bring up a link with Gen3 + * Retrying the phy hard reset can work around the issue, but the drive + * may fail again. In less than 150 out of 15000 test runs, it took more + * than 10 tries for the link to be established (but never more than 35). + * Triple the maximum observed retry count to provide plenty of margin for + * rare events and to guarantee that the link is established. + * + * Also, the default 2 second time-out on a failed drive is too long in + * this situation. The uboot implementation of the same driver function + * uses a much shorter time-out period and never experiences a time out + * issue. Reducing the time-out to 500ms improves the responsiveness. + * The other timing constants were kept the same as the stock AHCI driver. + * This change was also tested 15000 times on 24 drives and none of them + * experienced a time out. + */ static int ahci_highbank_hardreset(struct ata_link *link, unsigned int *class, unsigned long deadline) { - const unsigned long *timing = sata_ehc_deb_timing(&link->eh_context); + static const unsigned long timing[] = { 5, 100, 500}; struct ata_port *ap = link->ap; struct ahci_port_priv *pp = ap->private_data; u8 *d2h_fis = pp->rx_fis + RX_FIS_D2H_REG; @@ -207,7 +223,7 @@ static int ahci_highbank_hardreset(struct ata_link *link, unsigned int *class, bool online; u32 sstatus; int rc; - int retry = 10; + int retry = 100; ahci_stop_engine(ap); From 5ebc73095cf30bca82ef2983a46756ff26b1a810 Mon Sep 17 00:00:00 2001 From: Shane Huang Date: Mon, 3 Jun 2013 18:24:55 +0800 Subject: [PATCH 0138/1368] i2c-piix4: Add AMD CZ SMBus device ID commit b996ac90f595dda271cbd858b136b45557fc1a57 upstream. To add AMD CZ SMBus controller device ID. [bhelgaas: drop pci_ids.h update] Signed-off-by: Shane Huang Signed-off-by: Bjorn Helgaas Reviewed-by: Tejun Heo Reviewed-by: Jean Delvare Signed-off-by: Greg Kroah-Hartman --- Documentation/i2c/busses/i2c-piix4 | 2 +- drivers/i2c/busses/Kconfig | 1 + drivers/i2c/busses/i2c-piix4.c | 3 ++- 3 files changed, 4 insertions(+), 2 deletions(-) diff --git a/Documentation/i2c/busses/i2c-piix4 b/Documentation/i2c/busses/i2c-piix4 index 1e6634f54c50..a370b2047cf3 100644 --- a/Documentation/i2c/busses/i2c-piix4 +++ b/Documentation/i2c/busses/i2c-piix4 @@ -13,7 +13,7 @@ Supported adapters: * AMD SP5100 (SB700 derivative found on some server mainboards) Datasheet: Publicly available at the AMD website http://support.amd.com/us/Embedded_TechDocs/44413.pdf - * AMD Hudson-2 + * AMD Hudson-2, CZ Datasheet: Not publicly available * Standard Microsystems (SMSC) SLC90E66 (Victory66) southbridge Datasheet: Publicly available at the SMSC website http://www.smsc.com diff --git a/drivers/i2c/busses/Kconfig b/drivers/i2c/busses/Kconfig index 631736e2e7ed..4faf02b3657d 100644 --- a/drivers/i2c/busses/Kconfig +++ b/drivers/i2c/busses/Kconfig @@ -150,6 +150,7 @@ config I2C_PIIX4 ATI SB700/SP5100 ATI SB800 AMD Hudson-2 + AMD CZ Serverworks OSB4 Serverworks CSB5 Serverworks CSB6 diff --git a/drivers/i2c/busses/i2c-piix4.c b/drivers/i2c/busses/i2c-piix4.c index 39ab78c1a02c..d05ad590af29 100644 --- a/drivers/i2c/busses/i2c-piix4.c +++ b/drivers/i2c/busses/i2c-piix4.c @@ -22,7 +22,7 @@ Intel PIIX4, 440MX Serverworks OSB4, CSB5, CSB6, HT-1000, HT-1100 ATI IXP200, IXP300, IXP400, SB600, SB700/SP5100, SB800 - AMD Hudson-2 + AMD Hudson-2, CZ SMSC Victory66 Note: we assume there can only be one device, with one or more @@ -522,6 +522,7 @@ static DEFINE_PCI_DEVICE_TABLE(piix4_ids) = { { PCI_DEVICE(PCI_VENDOR_ID_ATI, PCI_DEVICE_ID_ATI_IXP400_SMBUS) }, { PCI_DEVICE(PCI_VENDOR_ID_ATI, PCI_DEVICE_ID_ATI_SBX00_SMBUS) }, { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_HUDSON2_SMBUS) }, + { PCI_DEVICE(PCI_VENDOR_ID_AMD, 0x790b) }, { PCI_DEVICE(PCI_VENDOR_ID_SERVERWORKS, PCI_DEVICE_ID_SERVERWORKS_OSB4) }, { PCI_DEVICE(PCI_VENDOR_ID_SERVERWORKS, From d23067e924e410875602b3427251b88cd18da73f Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Thu, 11 Jul 2013 18:00:25 +0200 Subject: [PATCH 0139/1368] ASoC: s6000: Fix unlocked snd_pcm_stop() call commit 61be2b9a18ec70f3cbe3deef7a5f77869c71b5ae upstream. snd_pcm_stop() must be called in the PCM substream lock context. Acked-by: Mark Brown Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/soc/s6000/s6000-pcm.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sound/soc/s6000/s6000-pcm.c b/sound/soc/s6000/s6000-pcm.c index 1358c7de2521..d0740a762963 100644 --- a/sound/soc/s6000/s6000-pcm.c +++ b/sound/soc/s6000/s6000-pcm.c @@ -128,7 +128,9 @@ static irqreturn_t s6000_pcm_irq(int irq, void *data) substream->runtime && snd_pcm_running(substream)) { dev_dbg(pcm->dev, "xrun\n"); + snd_pcm_stream_lock(substream); snd_pcm_stop(substream, SNDRV_PCM_STATE_XRUN); + snd_pcm_stream_unlock(substream); ret = IRQ_HANDLED; } From b5aa3fc5730c605f4f6e1dbc45db459cfd59d1ed Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Thu, 11 Jul 2013 18:00:01 +0200 Subject: [PATCH 0140/1368] ASoC: atmel: Fix unlocked snd_pcm_stop() call commit 571185717f8d7f2a088a7ac38d94a9ad5fd9da5c upstream. snd_pcm_stop() must be called in the PCM substream lock context. Acked-by: Mark Brown Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/soc/atmel/atmel-pcm-dma.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sound/soc/atmel/atmel-pcm-dma.c b/sound/soc/atmel/atmel-pcm-dma.c index 1d38fd0bc4e2..d12826526798 100644 --- a/sound/soc/atmel/atmel-pcm-dma.c +++ b/sound/soc/atmel/atmel-pcm-dma.c @@ -81,7 +81,9 @@ static void atmel_pcm_dma_irq(u32 ssc_sr, /* stop RX and capture: will be enabled again at restart */ ssc_writex(prtd->ssc->regs, SSC_CR, prtd->mask->ssc_disable); + snd_pcm_stream_lock(substream); snd_pcm_stop(substream, SNDRV_PCM_STATE_XRUN); + snd_pcm_stream_unlock(substream); /* now drain RHR and read status to remove xrun condition */ ssc_readx(prtd->ssc->regs, SSC_RHR); From 3bd92a9d1f82e41d781f082d710b16036a0ea25b Mon Sep 17 00:00:00 2001 From: Fabio Estevam Date: Thu, 4 Jul 2013 20:01:03 -0300 Subject: [PATCH 0141/1368] ASoC: sglt5000: Fix SGTL5000_PLL_FRAC_DIV_MASK commit 5c78dfe87ea04b501ee000a7f03b9432ac9d008c upstream. SGTL5000_PLL_FRAC_DIV_MASK is used to mask bits 0-10 (11 bits in total) of register CHIP_PLL_CTRL, so fix the mask to accomodate all this bit range. Reported-by: Oskar Schirmer Signed-off-by: Fabio Estevam Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- sound/soc/codecs/sgtl5000.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/codecs/sgtl5000.h b/sound/soc/codecs/sgtl5000.h index 8a9f43534b79..d3a68bbfea00 100644 --- a/sound/soc/codecs/sgtl5000.h +++ b/sound/soc/codecs/sgtl5000.h @@ -347,7 +347,7 @@ #define SGTL5000_PLL_INT_DIV_MASK 0xf800 #define SGTL5000_PLL_INT_DIV_SHIFT 11 #define SGTL5000_PLL_INT_DIV_WIDTH 5 -#define SGTL5000_PLL_FRAC_DIV_MASK 0x0700 +#define SGTL5000_PLL_FRAC_DIV_MASK 0x07ff #define SGTL5000_PLL_FRAC_DIV_SHIFT 0 #define SGTL5000_PLL_FRAC_DIV_WIDTH 11 From 509c317d092a551f5c26de05445fc8d5b81cc23d Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Thu, 4 Jul 2013 16:41:53 +1000 Subject: [PATCH 0142/1368] md/raid10: fix bug which causes all RAID10 reshapes to move no data. commit 1376512065b23f39d5f9a160948f313397dde972 upstream. The recent comment: commit 7e83ccbecd608b971f340e951c9e84cd0343002f md/raid10: Allow skipping recovery when clean arrays are assembled Causes raid10 to skip a recovery in certain cases where it is safe to do so. Unfortunately it also causes a reshape to be skipped which is never safe. The result is that an attempt to reshape a RAID10 will appear to complete instantly, but no data will have been moves so the array will now contain garbage. (If nothing is written, you can recovery by simple performing the reverse reshape which will also complete instantly). Bug was introduced in 3.10, so this is suitable for 3.10-stable. Signed-off-by: NeilBrown Cc: Martin Wilck Signed-off-by: Greg Kroah-Hartman --- drivers/md/raid10.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index 6ddae2501b9a..f2f4a6e4f4f4 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -2909,14 +2909,13 @@ static sector_t sync_request(struct mddev *mddev, sector_t sector_nr, */ if (mddev->bitmap == NULL && mddev->recovery_cp == MaxSector && + mddev->reshape_position == MaxSector && + !test_bit(MD_RECOVERY_SYNC, &mddev->recovery) && !test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery) && + !test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) && conf->fullsync == 0) { *skipped = 1; - max_sector = mddev->dev_sectors; - if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery) || - test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery)) - max_sector = mddev->resync_max_sectors; - return max_sector - sector_nr; + return mddev->dev_sectors - sector_nr; } skipped: From 4b5c14511c401edf5ea78e2592c7043456deb595 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Tue, 2 Jul 2013 15:58:05 +1000 Subject: [PATCH 0143/1368] md/raid10: fix two bugs affecting RAID10 reshape. commit 78eaa0d4cbcdb345992fa3dd22b3bcbb473cc064 upstream. 1/ If a RAID10 is being reshaped to a fewer number of devices and is stopped while this is ongoing, then when the array is reassembled the 'mirrors' array will be allocated too small. This will lead to an access error or memory corruption. 2/ A sanity test for a reshaping RAID10 array is restarted is slightly incorrect. Due to the first bug, this is suitable for any -stable kernel since 3.5 where this code was introduced. Signed-off-by: NeilBrown Signed-off-by: Greg Kroah-Hartman --- drivers/md/raid10.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index f2f4a6e4f4f4..d7bad6bc46d8 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -3531,7 +3531,7 @@ static struct r10conf *setup_conf(struct mddev *mddev) /* FIXME calc properly */ conf->mirrors = kzalloc(sizeof(struct raid10_info)*(mddev->raid_disks + - max(0,mddev->delta_disks)), + max(0,-mddev->delta_disks)), GFP_KERNEL); if (!conf->mirrors) goto out; @@ -3690,7 +3690,7 @@ static int run(struct mddev *mddev) conf->geo.far_offset == 0) goto out_free_conf; if (conf->prev.far_copies != 1 && - conf->geo.far_offset == 0) + conf->prev.far_offset == 0) goto out_free_conf; } From 2dc04d3333049098691eb652e06d52fbd80771d2 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Tue, 16 Jul 2013 16:50:47 +1000 Subject: [PATCH 0144/1368] md/raid10: fix two problems with RAID10 resync. commit 7bb23c4934059c64cbee2e41d5d24ce122285176 upstream. 1/ When an different between blocks is found, data is copied from one bio to the other. However bv_len is used as the length to copy and this could be zero. So use r10_bio->sectors to calculate length instead. Using bv_len was probably always a bit dubious, but the introduction of bio_advance made it much more likely to be a problem. 2/ When preparing some blocks for sync, we don't set BIO_UPTODATE except on bios that we schedule for a read. This ensures that missing/failed devices don't confuse the loop at the top of sync_request write. Commit 8be185f2c9d54d6 "raid10: Use bio_reset()" removed a loop which set BIO_UPTDATE on all appropriate bios. So we need to re-add that flag. These bugs were introduced in 3.10, so this patch is suitable for 3.10-stable, and can remove a potential for data corruption. Reported-by: Brassow Jonathan Signed-off-by: NeilBrown Signed-off-by: Greg Kroah-Hartman --- drivers/md/raid10.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index d7bad6bc46d8..d61eb7ea0d81 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -2075,11 +2075,17 @@ static void sync_request_write(struct mddev *mddev, struct r10bio *r10_bio) * both 'first' and 'i', so we just compare them. * All vec entries are PAGE_SIZE; */ - for (j = 0; j < vcnt; j++) + int sectors = r10_bio->sectors; + for (j = 0; j < vcnt; j++) { + int len = PAGE_SIZE; + if (sectors < (len / 512)) + len = sectors * 512; if (memcmp(page_address(fbio->bi_io_vec[j].bv_page), page_address(tbio->bi_io_vec[j].bv_page), - fbio->bi_io_vec[j].bv_len)) + len)) break; + sectors -= len/512; + } if (j == vcnt) continue; atomic64_add(r10_bio->sectors, &mddev->resync_mismatches); @@ -3385,6 +3391,7 @@ static sector_t sync_request(struct mddev *mddev, sector_t sector_nr, if (bio->bi_end_io == end_sync_read) { md_sync_acct(bio->bi_bdev, nr_sectors); + set_bit(BIO_UPTODATE, &bio->bi_flags); generic_make_request(bio); } } From 1c0d08e652c18e3f3198969435fef31941b2eec3 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 1 Jul 2013 22:14:10 +0200 Subject: [PATCH 0145/1368] tick: Sanitize broadcast control logic commit 07bd1172902e782f288e4d44b1fde7dec0f08b6f upstream. The recent implementation of a generic dummy timer resulted in a different registration order of per cpu local timers which made the broadcast control logic go belly up. If the dummy timer is the first clock event device which is registered for a CPU, then it is installed, the broadcast timer is initialized and the CPU is marked as broadcast target. If a real clock event device is installed after that, we can fail to take the CPU out of the broadcast mask. In the worst case we end up with two periodic timer events firing for the same CPU. One from the per cpu hardware device and one from the broadcast. Now the problem is that we have no way to distinguish whether the system is in a state which makes broadcasting necessary or the broadcast bit was set due to the nonfunctional dummy timer installment. To solve this we need to keep track of the system state seperately and provide a more detailed decision logic whether we keep the CPU in broadcast mode or not. The old decision logic only clears the broadcast mode, if the newly installed clock event device is not affected by power states. The new logic clears the broadcast mode if one of the following is true: - The new device is not affected by power states. - The system is not in a power state affected mode - The system has switched to oneshot mode. The oneshot broadcast is controlled from the deep idle state. The CPU is not in idle at this point, so it's safe to remove it from the mask. If we clear the broadcast bit for the CPU when a new device is installed, we also shutdown the broadcast device when this was the last CPU in the broadcast mask. If the broadcast bit is kept, then we leave the new device in shutdown state and rely on the broadcast to deliver the timer interrupts via the broadcast ipis. Reported-and-tested-by: Stehle Vincent-B46079 Reviewed-by: Stephen Boyd Cc: John Stultz , Cc: Mark Rutland Link: http://lkml.kernel.org/r/alpine.DEB.2.02.1307012153060.4013@ionos.tec.linutronix.de Signed-off-by: Thomas Gleixner Signed-off-by: Greg Kroah-Hartman --- kernel/time/tick-broadcast.c | 70 ++++++++++++++++++++++++++++++------ kernel/time/tick-common.c | 3 +- 2 files changed, 61 insertions(+), 12 deletions(-) diff --git a/kernel/time/tick-broadcast.c b/kernel/time/tick-broadcast.c index 20d6fba70652..c389f068aca2 100644 --- a/kernel/time/tick-broadcast.c +++ b/kernel/time/tick-broadcast.c @@ -29,6 +29,7 @@ static struct tick_device tick_broadcast_device; static cpumask_var_t tick_broadcast_mask; +static cpumask_var_t tick_broadcast_on; static cpumask_var_t tmpmask; static DEFINE_RAW_SPINLOCK(tick_broadcast_lock); static int tick_broadcast_force; @@ -123,8 +124,9 @@ static void tick_device_setup_broadcast_func(struct clock_event_device *dev) */ int tick_device_uses_broadcast(struct clock_event_device *dev, int cpu) { + struct clock_event_device *bc = tick_broadcast_device.evtdev; unsigned long flags; - int ret = 0; + int ret; raw_spin_lock_irqsave(&tick_broadcast_lock, flags); @@ -138,20 +140,59 @@ int tick_device_uses_broadcast(struct clock_event_device *dev, int cpu) dev->event_handler = tick_handle_periodic; tick_device_setup_broadcast_func(dev); cpumask_set_cpu(cpu, tick_broadcast_mask); - tick_broadcast_start_periodic(tick_broadcast_device.evtdev); + tick_broadcast_start_periodic(bc); ret = 1; } else { /* - * When the new device is not affected by the stop - * feature and the cpu is marked in the broadcast mask - * then clear the broadcast bit. + * Clear the broadcast bit for this cpu if the + * device is not power state affected. */ - if (!(dev->features & CLOCK_EVT_FEAT_C3STOP)) { - int cpu = smp_processor_id(); + if (!(dev->features & CLOCK_EVT_FEAT_C3STOP)) cpumask_clear_cpu(cpu, tick_broadcast_mask); - tick_broadcast_clear_oneshot(cpu); - } else { + else tick_device_setup_broadcast_func(dev); + + /* + * Clear the broadcast bit if the CPU is not in + * periodic broadcast on state. + */ + if (!cpumask_test_cpu(cpu, tick_broadcast_on)) + cpumask_clear_cpu(cpu, tick_broadcast_mask); + + switch (tick_broadcast_device.mode) { + case TICKDEV_MODE_ONESHOT: + /* + * If the system is in oneshot mode we can + * unconditionally clear the oneshot mask bit, + * because the CPU is running and therefore + * not in an idle state which causes the power + * state affected device to stop. Let the + * caller initialize the device. + */ + tick_broadcast_clear_oneshot(cpu); + ret = 0; + break; + + case TICKDEV_MODE_PERIODIC: + /* + * If the system is in periodic mode, check + * whether the broadcast device can be + * switched off now. + */ + if (cpumask_empty(tick_broadcast_mask) && bc) + clockevents_shutdown(bc); + /* + * If we kept the cpu in the broadcast mask, + * tell the caller to leave the per cpu device + * in shutdown state. The periodic interrupt + * is delivered by the broadcast device. + */ + ret = cpumask_test_cpu(cpu, tick_broadcast_mask); + break; + default: + /* Nothing to do */ + ret = 0; + break; } } raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags); @@ -281,6 +322,7 @@ static void tick_do_broadcast_on_off(unsigned long *reason) switch (*reason) { case CLOCK_EVT_NOTIFY_BROADCAST_ON: case CLOCK_EVT_NOTIFY_BROADCAST_FORCE: + cpumask_set_cpu(cpu, tick_broadcast_on); if (!cpumask_test_and_set_cpu(cpu, tick_broadcast_mask)) { if (tick_broadcast_device.mode == TICKDEV_MODE_PERIODIC) @@ -290,8 +332,12 @@ static void tick_do_broadcast_on_off(unsigned long *reason) tick_broadcast_force = 1; break; case CLOCK_EVT_NOTIFY_BROADCAST_OFF: - if (!tick_broadcast_force && - cpumask_test_and_clear_cpu(cpu, tick_broadcast_mask)) { + if (tick_broadcast_force) + break; + cpumask_clear_cpu(cpu, tick_broadcast_on); + if (!tick_device_is_functional(dev)) + break; + if (cpumask_test_and_clear_cpu(cpu, tick_broadcast_mask)) { if (tick_broadcast_device.mode == TICKDEV_MODE_PERIODIC) tick_setup_periodic(dev, 0); @@ -349,6 +395,7 @@ void tick_shutdown_broadcast(unsigned int *cpup) bc = tick_broadcast_device.evtdev; cpumask_clear_cpu(cpu, tick_broadcast_mask); + cpumask_clear_cpu(cpu, tick_broadcast_on); if (tick_broadcast_device.mode == TICKDEV_MODE_PERIODIC) { if (bc && cpumask_empty(tick_broadcast_mask)) @@ -792,6 +839,7 @@ bool tick_broadcast_oneshot_available(void) void __init tick_broadcast_init(void) { zalloc_cpumask_var(&tick_broadcast_mask, GFP_NOWAIT); + zalloc_cpumask_var(&tick_broadcast_on, GFP_NOWAIT); zalloc_cpumask_var(&tmpmask, GFP_NOWAIT); #ifdef CONFIG_TICK_ONESHOT zalloc_cpumask_var(&tick_broadcast_oneshot_mask, GFP_NOWAIT); diff --git a/kernel/time/tick-common.c b/kernel/time/tick-common.c index 5d3fb100bc06..7ce5e5a4a4c5 100644 --- a/kernel/time/tick-common.c +++ b/kernel/time/tick-common.c @@ -194,7 +194,8 @@ static void tick_setup_device(struct tick_device *td, * When global broadcasting is active, check if the current * device is registered as a placeholder for broadcast mode. * This allows us to handle this x86 misfeature in a generic - * way. + * way. This function also returns !=0 when we keep the + * current active broadcast state for this CPU. */ if (tick_device_uses_broadcast(newdev, cpu)) return; From 084c895d3c9fecf80fe02ba442b90818116b30d2 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 1 Jul 2013 22:14:10 +0200 Subject: [PATCH 0146/1368] tick: Prevent uncontrolled switch to oneshot mode commit 1f73a9806bdd07a5106409bbcab3884078bd34fe upstream. When the system switches from periodic to oneshot mode, the broadcast logic causes a possibility that a CPU which has not yet switched to oneshot mode puts its own clock event device into oneshot mode without updating the state and the timer handler. CPU0 CPU1 per cpu tickdev is in periodic mode and switched to broadcast Switch to oneshot mode tick_broadcast_switch_to_oneshot() cpumask_copy(tick_oneshot_broacast_mask, tick_broadcast_mask); broadcast device mode = oneshot Timer interrupt irq_enter() tick_check_oneshot_broadcast() dev->set_mode(ONESHOT); tick_handle_periodic() if (dev->mode == ONESHOT) dev->next_event += period; FAIL. We fail, because dev->next_event contains KTIME_MAX, if the device was in periodic mode before the uncontrolled switch to oneshot happened. We must copy the broadcast bits over to the oneshot mask, because otherwise a CPU which relies on the broadcast would not been woken up anymore after the broadcast device switched to oneshot mode. So we need to verify in tick_check_oneshot_broadcast() whether the CPU has already switched to oneshot mode. If not, leave the device untouched and let the CPU switch controlled into oneshot mode. This is a long standing bug, which was never noticed, because the main user of the broadcast x86 cannot run into that scenario, AFAICT. The nonarchitected timer mess of ARM creates a gazillion of differently broken abominations which trigger the shortcomings of that broadcast code, which better had never been necessary in the first place. Reported-and-tested-by: Stehle Vincent-B46079 Reviewed-by: Stephen Boyd Cc: John Stultz , Cc: Mark Rutland Link: http://lkml.kernel.org/r/alpine.DEB.2.02.1307012153060.4013@ionos.tec.linutronix.de Signed-off-by: Thomas Gleixner Signed-off-by: Greg Kroah-Hartman --- kernel/time/tick-broadcast.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/kernel/time/tick-broadcast.c b/kernel/time/tick-broadcast.c index c389f068aca2..297b90b5277e 100644 --- a/kernel/time/tick-broadcast.c +++ b/kernel/time/tick-broadcast.c @@ -522,7 +522,15 @@ void tick_check_oneshot_broadcast(int cpu) if (cpumask_test_cpu(cpu, tick_broadcast_oneshot_mask)) { struct tick_device *td = &per_cpu(tick_cpu_device, cpu); - clockevents_set_mode(td->evtdev, CLOCK_EVT_MODE_ONESHOT); + /* + * We might be in the middle of switching over from + * periodic to oneshot. If the CPU has not yet + * switched over, leave the device alone. + */ + if (td->mode == TICKDEV_MODE_ONESHOT) { + clockevents_set_mode(td->evtdev, + CLOCK_EVT_MODE_ONESHOT); + } } } From f707f7ae37c514a3871741747bfd8c09d7badb05 Mon Sep 17 00:00:00 2001 From: Baruch Siach Date: Wed, 29 May 2013 10:11:17 +0200 Subject: [PATCH 0147/1368] clocksource: dw_apb: Fix error check commit 1a33bd2be705cbb3f57d7223b60baea441039307 upstream. irq_of_parse_and_map() returns 0 on error, while the code checks for NO_IRQ. This breaks on platforms that have NO_IRQ != 0. Signed-off-by: Baruch Siach Signed-off-by: Daniel Lezcano Signed-off-by: Greg Kroah-Hartman --- drivers/clocksource/dw_apb_timer_of.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/clocksource/dw_apb_timer_of.c b/drivers/clocksource/dw_apb_timer_of.c index ab09ed3742ee..6b02eddc5f50 100644 --- a/drivers/clocksource/dw_apb_timer_of.c +++ b/drivers/clocksource/dw_apb_timer_of.c @@ -44,7 +44,7 @@ static void add_clockevent(struct device_node *event_timer) u32 irq, rate; irq = irq_of_parse_and_map(event_timer, 0); - if (irq == NO_IRQ) + if (irq == 0) panic("No IRQ for clock event timer"); timer_get_base_and_rate(event_timer, &iobase, &rate); From 24e095cdc2d7781e756b052291ef0ca7c9edac93 Mon Sep 17 00:00:00 2001 From: Gabor Juhos Date: Sat, 22 Jun 2013 13:13:25 +0200 Subject: [PATCH 0148/1368] rt2x00: read 5GHz TX power values from the correct offset commit 0a6f3a8ebaf13407523c2c7d575b4ca2debd23ba upstream. The current code uses the same index value both for the channel information array and for the TX power table. The index starts from 14, however the index of the TX power table must start from zero. Fix it, in order to get the correct TX power value for a given channel. The changes in rt61pci.c and rt73usb.c are compile tested only. Signed-off-by: Gabor Juhos Acked-by: Stanislaw Gruszka Acked-by: Gertjan van Wingerde Signed-off-by: John W. Linville Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/rt2x00/rt2800lib.c | 4 ++-- drivers/net/wireless/rt2x00/rt61pci.c | 3 ++- drivers/net/wireless/rt2x00/rt73usb.c | 3 ++- 3 files changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/net/wireless/rt2x00/rt2800lib.c b/drivers/net/wireless/rt2x00/rt2800lib.c index 72f32e5caa4d..519914f33b43 100644 --- a/drivers/net/wireless/rt2x00/rt2800lib.c +++ b/drivers/net/wireless/rt2x00/rt2800lib.c @@ -6056,8 +6056,8 @@ static int rt2800_probe_hw_mode(struct rt2x00_dev *rt2x00dev) default_power2 = rt2x00_eeprom_addr(rt2x00dev, EEPROM_TXPOWER_A2); for (i = 14; i < spec->num_channels; i++) { - info[i].default_power1 = default_power1[i]; - info[i].default_power2 = default_power2[i]; + info[i].default_power1 = default_power1[i - 14]; + info[i].default_power2 = default_power2[i - 14]; } } diff --git a/drivers/net/wireless/rt2x00/rt61pci.c b/drivers/net/wireless/rt2x00/rt61pci.c index 0dc8180e251b..883a54c8c5bc 100644 --- a/drivers/net/wireless/rt2x00/rt61pci.c +++ b/drivers/net/wireless/rt2x00/rt61pci.c @@ -2825,7 +2825,8 @@ static int rt61pci_probe_hw_mode(struct rt2x00_dev *rt2x00dev) tx_power = rt2x00_eeprom_addr(rt2x00dev, EEPROM_TXPOWER_A_START); for (i = 14; i < spec->num_channels; i++) { info[i].max_power = MAX_TXPOWER; - info[i].default_power1 = TXPOWER_FROM_DEV(tx_power[i]); + info[i].default_power1 = + TXPOWER_FROM_DEV(tx_power[i - 14]); } } diff --git a/drivers/net/wireless/rt2x00/rt73usb.c b/drivers/net/wireless/rt2x00/rt73usb.c index 377e09bb0b81..2bbca183f94a 100644 --- a/drivers/net/wireless/rt2x00/rt73usb.c +++ b/drivers/net/wireless/rt2x00/rt73usb.c @@ -2167,7 +2167,8 @@ static int rt73usb_probe_hw_mode(struct rt2x00_dev *rt2x00dev) tx_power = rt2x00_eeprom_addr(rt2x00dev, EEPROM_TXPOWER_A_START); for (i = 14; i < spec->num_channels; i++) { info[i].max_power = MAX_TXPOWER; - info[i].default_power1 = TXPOWER_FROM_DEV(tx_power[i]); + info[i].default_power1 = + TXPOWER_FROM_DEV(tx_power[i - 14]); } } From 0530bd4a6cc3c04d1e621299e05d2da2c99736c4 Mon Sep 17 00:00:00 2001 From: Gabor Juhos Date: Tue, 25 Jun 2013 22:57:29 +0200 Subject: [PATCH 0149/1368] rt2x00: rt2800lib: fix default TX power check for RT55xx commit 0847beb2865f5ef1c8626ec1a37def18f3d6c41a upstream. The code writes the default_power2 value into the TX field of the RFCSR50 register, however the condition in the if statement uses default_power1. Due to this, wrong TX power value might be written into the register. Use the correct value in the condition to fix the issue. Compile tested only. Signed-off-by: Gabor Juhos Acked-by: Gertjan van Wingerde Signed-off-by: John W. Linville Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/rt2x00/rt2800lib.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/rt2x00/rt2800lib.c b/drivers/net/wireless/rt2x00/rt2800lib.c index 519914f33b43..705aa3386ee0 100644 --- a/drivers/net/wireless/rt2x00/rt2800lib.c +++ b/drivers/net/wireless/rt2x00/rt2800lib.c @@ -2392,7 +2392,7 @@ static void rt2800_config_channel_rf55xx(struct rt2x00_dev *rt2x00dev, rt2800_rfcsr_write(rt2x00dev, 49, rfcsr); rt2800_rfcsr_read(rt2x00dev, 50, &rfcsr); - if (info->default_power1 > power_bound) + if (info->default_power2 > power_bound) rt2x00_set_field8(&rfcsr, RFCSR50_TX, power_bound); else rt2x00_set_field8(&rfcsr, RFCSR50_TX, info->default_power2); From e9c25a407d5c75646eb258de1363dccc1fd3ab47 Mon Sep 17 00:00:00 2001 From: Sujith Manoharan Date: Mon, 10 Jun 2013 13:49:38 +0530 Subject: [PATCH 0150/1368] ath9k_hw: Assign default xlna config for AR9485 commit 30d5b709da23f4ab9836c7f66d2d2e780a69cf12 upstream. For AR9485 boards with XLNA, the default gpio config is not set correctly, fix this. Signed-off-by: Sujith Manoharan Signed-off-by: John W. Linville Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/ath/ath9k/ar9003_eeprom.c | 8 ++++++-- drivers/net/wireless/ath/ath9k/ar9003_phy.h | 2 ++ 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/ath/ath9k/ar9003_eeprom.c b/drivers/net/wireless/ath/ath9k/ar9003_eeprom.c index e6b92ff265fd..25b8bbbe74fe 100644 --- a/drivers/net/wireless/ath/ath9k/ar9003_eeprom.c +++ b/drivers/net/wireless/ath/ath9k/ar9003_eeprom.c @@ -3563,14 +3563,18 @@ static void ar9003_hw_ant_ctrl_apply(struct ath_hw *ah, bool is2ghz) { struct ath9k_hw_capabilities *pCap = &ah->caps; int chain; - u32 regval; + u32 regval, value; static const u32 switch_chain_reg[AR9300_MAX_CHAINS] = { AR_PHY_SWITCH_CHAIN_0, AR_PHY_SWITCH_CHAIN_1, AR_PHY_SWITCH_CHAIN_2, }; - u32 value = ar9003_hw_ant_ctrl_common_get(ah, is2ghz); + if (AR_SREV_9485(ah) && (ar9003_hw_get_rx_gain_idx(ah) == 0)) + ath9k_hw_cfg_output(ah, AR9300_EXT_LNA_CTL_GPIO_AR9485, + AR_GPIO_OUTPUT_MUX_AS_PCIE_ATTENTION_LED); + + value = ar9003_hw_ant_ctrl_common_get(ah, is2ghz); if (AR_SREV_9462(ah) || AR_SREV_9565(ah)) { REG_RMW_FIELD(ah, AR_PHY_SWITCH_COM, diff --git a/drivers/net/wireless/ath/ath9k/ar9003_phy.h b/drivers/net/wireless/ath/ath9k/ar9003_phy.h index e71774196c01..5013c731f9f6 100644 --- a/drivers/net/wireless/ath/ath9k/ar9003_phy.h +++ b/drivers/net/wireless/ath/ath9k/ar9003_phy.h @@ -351,6 +351,8 @@ #define AR_PHY_CCA_NOM_VAL_9330_2GHZ -118 +#define AR9300_EXT_LNA_CTL_GPIO_AR9485 9 + /* * AGC Field Definitions */ From 13311514a74f520989e304b746dc7478335877f9 Mon Sep 17 00:00:00 2001 From: Sujith Manoharan Date: Mon, 10 Jun 2013 13:49:39 +0530 Subject: [PATCH 0151/1368] ath9k: Fix noisefloor calibration commit 696df78509d1f81b651dd98ecdc1aecab616db6b upstream. The commits, "ath9k: Fix regression in channelwidth switch at the same channel" "ath9k: Fix invalid noisefloor reading due to channel update" attempted to fix noisefloor calibration when a channel switch happens due to HT20/HT40 bandwidth change. This is causing invalid readings resulting in messages like: "ath: phy16: NF[0] (-45) > MAX (-95), correcting to MAX". This results in an incorrect noise being used initially for reporting the signal level of received packets, until NF calibration is done and the history buffer is updated via the ANI timer, which happens much later. When a bandwidth change happens, it is appropriate to reset the internal history data for the channel. Do this correctly in the reset() routine by checking the "chanmode" variable. Signed-off-by: Sujith Manoharan Cc: Rajkumar Manoharan Signed-off-by: John W. Linville Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/ath/ath9k/hw.c | 3 ++- drivers/net/wireless/ath/ath9k/main.c | 7 ------- 2 files changed, 2 insertions(+), 8 deletions(-) diff --git a/drivers/net/wireless/ath/ath9k/hw.c b/drivers/net/wireless/ath/ath9k/hw.c index 15dfefcf2d0f..b1d5037bff7f 100644 --- a/drivers/net/wireless/ath/ath9k/hw.c +++ b/drivers/net/wireless/ath/ath9k/hw.c @@ -1872,7 +1872,8 @@ int ath9k_hw_reset(struct ath_hw *ah, struct ath9k_channel *chan, ah->caldata = caldata; if (caldata && (chan->channel != caldata->channel || - chan->channelFlags != caldata->channelFlags)) { + chan->channelFlags != caldata->channelFlags || + chan->chanmode != caldata->chanmode)) { /* Operating channel changed, reset channel calibration data */ memset(caldata, 0, sizeof(*caldata)); ath9k_init_nfcal_hist_buffer(ah, chan); diff --git a/drivers/net/wireless/ath/ath9k/main.c b/drivers/net/wireless/ath/ath9k/main.c index 5092ecae7706..35ced100c183 100644 --- a/drivers/net/wireless/ath/ath9k/main.c +++ b/drivers/net/wireless/ath/ath9k/main.c @@ -1211,13 +1211,6 @@ static int ath9k_config(struct ieee80211_hw *hw, u32 changed) ath_update_survey_stats(sc); spin_unlock_irqrestore(&common->cc_lock, flags); - /* - * Preserve the current channel values, before updating - * the same channel - */ - if (ah->curchan && (old_pos == pos)) - ath9k_hw_getnf(ah, ah->curchan); - ath9k_cmn_update_ichannel(&sc->sc_ah->channels[pos], curchan, channel_type); From 64ea2992a29aa0bdc4eaf457351b063caafb6655 Mon Sep 17 00:00:00 2001 From: Sujith Manoharan Date: Mon, 10 Jun 2013 13:49:40 +0530 Subject: [PATCH 0152/1368] ath9k: Do not assign noise for NULL caldata commit d3bcb7b24bbf09fde8405770e676fe0c11c79662 upstream. ah->noise is maintained globally and not per-channel. This is updated in the reset() routine after the NF history has been filled for the *current channel*, just before switching to the new channel. There is no need to do it inside getnf(), since ah->noise must contain a value for the new channel. Signed-off-by: Sujith Manoharan Signed-off-by: John W. Linville Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/ath/ath9k/calib.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/net/wireless/ath/ath9k/calib.c b/drivers/net/wireless/ath/ath9k/calib.c index 7304e7585009..5e8219a91e25 100644 --- a/drivers/net/wireless/ath/ath9k/calib.c +++ b/drivers/net/wireless/ath/ath9k/calib.c @@ -387,7 +387,6 @@ bool ath9k_hw_getnf(struct ath_hw *ah, struct ath9k_channel *chan) if (!caldata) { chan->noisefloor = nf; - ah->noise = ath9k_hw_getchan_noise(ah, chan); return false; } From 98dcc2946adbe4349ef1ef9b99873b912831edd4 Mon Sep 17 00:00:00 2001 From: "Martin K. Petersen" Date: Thu, 6 Jun 2013 22:15:55 -0400 Subject: [PATCH 0153/1368] SCSI: sd: Update WRITE SAME heuristics commit 66c28f97120e8a621afd5aa7a31c4b85c547d33d upstream. SATA drives located behind a SAS controller would incorrectly receive WRITE SAME commands. Tweak the heuristics so that: - If REPORT SUPPORTED OPERATION CODES is provided we will use that to choose between WRITE SAME(16), WRITE SAME(10) and disabled. This also fixes an issue with the old code which would issue WRITE SAME(10) despite the command not being whitelisted in REPORT SUPPORTED OPERATION CODES. - If REPORT SUPPORTED OPERATION CODES is not provided we will fall back to WRITE SAME(10) unless the device has an ATA Information VPD page. The assumption is that a SATL which is smart enough to implement WRITE SAME would also provide REPORT SUPPORTED OPERATION CODES. To facilitate the new heuristics scsi_report_opcode() has been modified to so we can distinguish between "operation not supported" and "RSOC not supported". Reported-by: H. Peter Anvin Tested-by: Bernd Schubert Signed-off-by: Martin K. Petersen Signed-off-by: James Bottomley Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/scsi.c | 8 ++++---- drivers/scsi/sd.c | 46 +++++++++++++++++++++++++++++++-------------- drivers/scsi/sd.h | 1 + 3 files changed, 37 insertions(+), 18 deletions(-) diff --git a/drivers/scsi/scsi.c b/drivers/scsi/scsi.c index 2c0d0ec8150b..3b1ea34e1f5a 100644 --- a/drivers/scsi/scsi.c +++ b/drivers/scsi/scsi.c @@ -1070,8 +1070,8 @@ EXPORT_SYMBOL_GPL(scsi_get_vpd_page); * @opcode: opcode for command to look up * * Uses the REPORT SUPPORTED OPERATION CODES to look up the given - * opcode. Returns 0 if RSOC fails or if the command opcode is - * unsupported. Returns 1 if the device claims to support the command. + * opcode. Returns -EINVAL if RSOC fails, 0 if the command opcode is + * unsupported and 1 if the device claims to support the command. */ int scsi_report_opcode(struct scsi_device *sdev, unsigned char *buffer, unsigned int len, unsigned char opcode) @@ -1081,7 +1081,7 @@ int scsi_report_opcode(struct scsi_device *sdev, unsigned char *buffer, int result; if (sdev->no_report_opcodes || sdev->scsi_level < SCSI_SPC_3) - return 0; + return -EINVAL; memset(cmd, 0, 16); cmd[0] = MAINTENANCE_IN; @@ -1097,7 +1097,7 @@ int scsi_report_opcode(struct scsi_device *sdev, unsigned char *buffer, if (result && scsi_sense_valid(&sshdr) && sshdr.sense_key == ILLEGAL_REQUEST && (sshdr.asc == 0x20 || sshdr.asc == 0x24) && sshdr.ascq == 0x00) - return 0; + return -EINVAL; if ((buffer[1] & 3) == 3) /* Command supported */ return 1; diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index 6f6a1b48f998..1b1125e67f1e 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -442,8 +442,10 @@ sd_store_write_same_blocks(struct device *dev, struct device_attribute *attr, if (max == 0) sdp->no_write_same = 1; - else if (max <= SD_MAX_WS16_BLOCKS) + else if (max <= SD_MAX_WS16_BLOCKS) { + sdp->no_write_same = 0; sdkp->max_ws_blocks = max; + } sd_config_write_same(sdkp); @@ -740,7 +742,6 @@ static void sd_config_write_same(struct scsi_disk *sdkp) { struct request_queue *q = sdkp->disk->queue; unsigned int logical_block_size = sdkp->device->sector_size; - unsigned int blocks = 0; if (sdkp->device->no_write_same) { sdkp->max_ws_blocks = 0; @@ -752,18 +753,20 @@ static void sd_config_write_same(struct scsi_disk *sdkp) * blocks per I/O unless the device explicitly advertises a * bigger limit. */ - if (sdkp->max_ws_blocks == 0) - sdkp->max_ws_blocks = SD_MAX_WS10_BLOCKS; - - if (sdkp->ws16 || sdkp->max_ws_blocks > SD_MAX_WS10_BLOCKS) - blocks = min_not_zero(sdkp->max_ws_blocks, - (u32)SD_MAX_WS16_BLOCKS); - else - blocks = min_not_zero(sdkp->max_ws_blocks, - (u32)SD_MAX_WS10_BLOCKS); + if (sdkp->max_ws_blocks > SD_MAX_WS10_BLOCKS) + sdkp->max_ws_blocks = min_not_zero(sdkp->max_ws_blocks, + (u32)SD_MAX_WS16_BLOCKS); + else if (sdkp->ws16 || sdkp->ws10 || sdkp->device->no_report_opcodes) + sdkp->max_ws_blocks = min_not_zero(sdkp->max_ws_blocks, + (u32)SD_MAX_WS10_BLOCKS); + else { + sdkp->device->no_write_same = 1; + sdkp->max_ws_blocks = 0; + } out: - blk_queue_max_write_same_sectors(q, blocks * (logical_block_size >> 9)); + blk_queue_max_write_same_sectors(q, sdkp->max_ws_blocks * + (logical_block_size >> 9)); } /** @@ -2635,9 +2638,24 @@ static void sd_read_block_provisioning(struct scsi_disk *sdkp) static void sd_read_write_same(struct scsi_disk *sdkp, unsigned char *buffer) { - if (scsi_report_opcode(sdkp->device, buffer, SD_BUF_SIZE, - WRITE_SAME_16)) + struct scsi_device *sdev = sdkp->device; + + if (scsi_report_opcode(sdev, buffer, SD_BUF_SIZE, INQUIRY) < 0) { + sdev->no_report_opcodes = 1; + + /* Disable WRITE SAME if REPORT SUPPORTED OPERATION + * CODES is unsupported and the device has an ATA + * Information VPD page (SAT). + */ + if (!scsi_get_vpd_page(sdev, 0x89, buffer, SD_BUF_SIZE)) + sdev->no_write_same = 1; + } + + if (scsi_report_opcode(sdev, buffer, SD_BUF_SIZE, WRITE_SAME_16) == 1) sdkp->ws16 = 1; + + if (scsi_report_opcode(sdev, buffer, SD_BUF_SIZE, WRITE_SAME) == 1) + sdkp->ws10 = 1; } static int sd_try_extended_inquiry(struct scsi_device *sdp) diff --git a/drivers/scsi/sd.h b/drivers/scsi/sd.h index 2386aeb41fe8..7a049de22051 100644 --- a/drivers/scsi/sd.h +++ b/drivers/scsi/sd.h @@ -84,6 +84,7 @@ struct scsi_disk { unsigned lbpws : 1; unsigned lbpws10 : 1; unsigned lbpvpd : 1; + unsigned ws10 : 1; unsigned ws16 : 1; }; #define to_scsi_disk(obj) container_of(obj,struct scsi_disk,dev) From 4e6b18250651a14b053508e30e731247e28e3f2a Mon Sep 17 00:00:00 2001 From: Mahesh Rajashekhara Date: Tue, 18 Jun 2013 17:02:07 +0530 Subject: [PATCH 0154/1368] SCSI: aacraid: Fix for arrays are going offline in the system. System hangs commit c5bebd829dd95602c15f8da8cc50fa938b5e0254 upstream. One of the customer had reported that the set of raid logical arrays will become unavailable (I/O offline) after a long hours of IO stress test. The OS wouldn`t be accessible afterwards and require a hard reset. This driver patch has a fix for race condition between the doorbell and the circular buffer. The driver is modified to do an extra read after clearing the doorbell in case there had been a completion posted during the small timing window. With this fix, we ran IO stress for ~13 days. There were no IO failures. Signed-off-by: Mahesh Rajashekhara Signed-off-by: James Bottomley Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/aacraid/src.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/scsi/aacraid/src.c b/drivers/scsi/aacraid/src.c index 0f56d8d7524f..7e17107643d4 100644 --- a/drivers/scsi/aacraid/src.c +++ b/drivers/scsi/aacraid/src.c @@ -93,6 +93,9 @@ static irqreturn_t aac_src_intr_message(int irq, void *dev_id) int send_it = 0; extern int aac_sync_mode; + src_writel(dev, MUnit.ODR_C, bellbits); + src_readl(dev, MUnit.ODR_C); + if (!aac_sync_mode) { src_writel(dev, MUnit.ODR_C, bellbits); src_readl(dev, MUnit.ODR_C); From 88bc5928044239f6902e89798ae4ef75d23ccb3c Mon Sep 17 00:00:00 2001 From: Daniel Hansel Date: Fri, 26 Apr 2013 17:32:14 +0200 Subject: [PATCH 0155/1368] SCSI: zfcp: fix adapter (re)open recovery while link to SAN is down commit f76ccaac4f82c463a037aa4a1e4ccb85c7011814 upstream. FCP device remains in status ERP_FAILED when device is switched online or adapter recovery is triggered while link to SAN is down. When Exchange Configuration Data command returns the FSF status FSF_EXCHANGE_CONFIG_DATA_INCOMPLETE it aborts the exchange process. The only retries are done during the common error recovery procedure (i.e. max. 3 retries with 8sec sleep between) and remains in status ERP_FAILED with QDIO down. This commit reverts the commit 0df138476c8306478d6e726f044868b4bccf411c (zfcp: Fix adapter activation on link down). When FSF status FSF_EXCHANGE_CONFIG_DATA_INCOMPLETE is received the adapter recovery will be finished without any retries. QDIO will be up now and status changes such as LINK UP will be received now. Signed-off-by: Daniel Hansel Signed-off-by: Steffen Maier Signed-off-by: James Bottomley Signed-off-by: Greg Kroah-Hartman --- drivers/s390/scsi/zfcp_fsf.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/s390/scsi/zfcp_fsf.c b/drivers/s390/scsi/zfcp_fsf.c index c7e148f33b2a..06760e435259 100644 --- a/drivers/s390/scsi/zfcp_fsf.c +++ b/drivers/s390/scsi/zfcp_fsf.c @@ -563,6 +563,10 @@ static void zfcp_fsf_exchange_config_data_handler(struct zfcp_fsf_req *req) fc_host_port_type(shost) = FC_PORTTYPE_UNKNOWN; adapter->hydra_version = 0; + /* avoids adapter shutdown to be able to recognize + * events such as LINK UP */ + atomic_set_mask(ZFCP_STATUS_ADAPTER_XCONFIG_OK, + &adapter->status); zfcp_fsf_link_down_info_eval(req, &qtcb->header.fsf_status_qual.link_down_info); break; From b22b8386992d886aa487b57e000b18512e639710 Mon Sep 17 00:00:00 2001 From: Steffen Maier Date: Fri, 26 Apr 2013 17:33:45 +0200 Subject: [PATCH 0156/1368] SCSI: zfcp: block queue limits with data router commit 5fea4291deacd80188b996d2f555fc6a1940e5d4 upstream. Commit 86a9668a8d29ea711613e1cb37efa68e7c4db564 "[SCSI] zfcp: support for hardware data router" reduced the initial block queue limits in the scsi_host_template to the absolute minimum and adjusted them later on. However, the adjustment was too late for the BSG devices of Scsi_Host and fc_host. Therefore, ioctl(..., SG_IO, ...) with request or response size > 4kB to a BSG device of an fc_host or a Scsi_Host fails with EINVAL. As a result, users of such ioctl such as HBA_SendCTPassThru() in libzfcphbaapi return with error HBA_STATUS_ERROR. Initialize the block queue limits in zfcp_scsi_host_template to the greatest common denominator (GCD). While we cannot exploit the slightly enlarged maximum request size with data router, this should be neglectible. Doing so also avoids running into trouble after live guest relocation (LGR) / migration from a data router FCP device to an FCP device that does not support data router. In that case, zfcp would figure out the new limits on adapter recovery, but the fc_host and Scsi_Host (plus in fact all sdevs) still exist with the old and now too large queue limits. It should also OK, not to use half the size as in the DIX case, because fc_host and Scsi_Host do not transport FCP requests including SCSI commands using protection data. Signed-off-by: Steffen Maier Reviewed-by: Martin Peschke Signed-off-by: James Bottomley Signed-off-by: Greg Kroah-Hartman --- drivers/s390/scsi/zfcp_scsi.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/drivers/s390/scsi/zfcp_scsi.c b/drivers/s390/scsi/zfcp_scsi.c index 7b31e3f403f9..7b353647cb90 100644 --- a/drivers/s390/scsi/zfcp_scsi.c +++ b/drivers/s390/scsi/zfcp_scsi.c @@ -3,7 +3,7 @@ * * Interface to Linux SCSI midlayer. * - * Copyright IBM Corp. 2002, 2010 + * Copyright IBM Corp. 2002, 2013 */ #define KMSG_COMPONENT "zfcp" @@ -311,8 +311,12 @@ static struct scsi_host_template zfcp_scsi_host_template = { .proc_name = "zfcp", .can_queue = 4096, .this_id = -1, - .sg_tablesize = 1, /* adjusted later */ - .max_sectors = 8, /* adjusted later */ + .sg_tablesize = (((QDIO_MAX_ELEMENTS_PER_BUFFER - 1) + * ZFCP_QDIO_MAX_SBALS_PER_REQ) - 2), + /* GCD, adjusted later */ + .max_sectors = (((QDIO_MAX_ELEMENTS_PER_BUFFER - 1) + * ZFCP_QDIO_MAX_SBALS_PER_REQ) - 2) * 8, + /* GCD, adjusted later */ .dma_boundary = ZFCP_QDIO_SBALE_LEN - 1, .cmd_per_lun = 1, .use_clustering = 1, From 8f425c63ecda1bab9982892e7890041292e2d5ec Mon Sep 17 00:00:00 2001 From: Steffen Maier Date: Fri, 26 Apr 2013 17:34:54 +0200 Subject: [PATCH 0157/1368] SCSI: zfcp: status read buffers on first adapter open with link down commit 9edf7d75ee5f21663a0183d21f702682d0ef132f upstream. Commit 64deb6efdc5504ce97b5c1c6f281fffbc150bd93 "[SCSI] zfcp: Use status_read_buf_num provided by FCP channel" started using a value returned by the channel but only evaluated the value if the fabric link is up. Commit 8d88cf3f3b9af4713642caeb221b6d6a42019001 "[SCSI] zfcp: Update status read mempool" introduced mempool resizings based on the above value. On setting an FCP device online for the very first time since boot, a new zeroed adapter object is allocated. If the link is down, the number of status read requests remains zero. Since just the config data exchange is incomplete, we proceed with adapter open recovery. However, we unconditionally call mempool_resize with adapter->stat_read_buf_num == 0 in this case. This causes a kernel message "kernel BUG at mm/mempool.c:131!" in process "zfcperp" with last function mempool_resize in Krnl PSW and zfcp_erp_thread in the Call Trace. Don't evaluate channel values which are invalid on link down. The number of status read requests is always valid, evaluated, and set to a positive minimum greater than zero. The adapter open recovery can proceed and the channel has status read buffers to inform us on a future link up event. While we are not aware of any other code path that could result in mempool resize attempts of size zero, we still also initialize the number of status read buffers to be posted to a static minimum number on adapter object allocation. Signed-off-by: Steffen Maier Signed-off-by: James Bottomley Signed-off-by: Greg Kroah-Hartman --- drivers/s390/scsi/zfcp_aux.c | 5 ++++- drivers/s390/scsi/zfcp_fsf.c | 23 ++++++++++++++++------- 2 files changed, 20 insertions(+), 8 deletions(-) diff --git a/drivers/s390/scsi/zfcp_aux.c b/drivers/s390/scsi/zfcp_aux.c index f6adde44f226..3743ac931231 100644 --- a/drivers/s390/scsi/zfcp_aux.c +++ b/drivers/s390/scsi/zfcp_aux.c @@ -3,7 +3,7 @@ * * Module interface and handling of zfcp data structures. * - * Copyright IBM Corp. 2002, 2010 + * Copyright IBM Corp. 2002, 2013 */ /* @@ -23,6 +23,7 @@ * Christof Schmitt * Martin Petermann * Sven Schuetz + * Steffen Maier */ #define KMSG_COMPONENT "zfcp" @@ -415,6 +416,8 @@ struct zfcp_adapter *zfcp_adapter_enqueue(struct ccw_device *ccw_device) adapter->dma_parms.max_segment_size = ZFCP_QDIO_SBALE_LEN; adapter->ccw_device->dev.dma_parms = &adapter->dma_parms; + adapter->stat_read_buf_num = FSF_STATUS_READS_RECOM; + if (!zfcp_scsi_adapter_register(adapter)) return adapter; diff --git a/drivers/s390/scsi/zfcp_fsf.c b/drivers/s390/scsi/zfcp_fsf.c index 06760e435259..9152999a0707 100644 --- a/drivers/s390/scsi/zfcp_fsf.c +++ b/drivers/s390/scsi/zfcp_fsf.c @@ -3,7 +3,7 @@ * * Implementation of FSF commands. * - * Copyright IBM Corp. 2002, 2010 + * Copyright IBM Corp. 2002, 2013 */ #define KMSG_COMPONENT "zfcp" @@ -483,12 +483,8 @@ static int zfcp_fsf_exchange_config_evaluate(struct zfcp_fsf_req *req) fc_host_port_name(shost) = nsp->fl_wwpn; fc_host_node_name(shost) = nsp->fl_wwnn; - fc_host_port_id(shost) = ntoh24(bottom->s_id); - fc_host_speed(shost) = - zfcp_fsf_convert_portspeed(bottom->fc_link_speed); fc_host_supported_classes(shost) = FC_COS_CLASS2 | FC_COS_CLASS3; - adapter->hydra_version = bottom->adapter_type; adapter->timer_ticks = bottom->timer_interval & ZFCP_FSF_TIMER_INT_MASK; adapter->stat_read_buf_num = max(bottom->status_read_buf_num, (u16)FSF_STATUS_READS_RECOM); @@ -496,6 +492,19 @@ static int zfcp_fsf_exchange_config_evaluate(struct zfcp_fsf_req *req) if (fc_host_permanent_port_name(shost) == -1) fc_host_permanent_port_name(shost) = fc_host_port_name(shost); + zfcp_scsi_set_prot(adapter); + + /* no error return above here, otherwise must fix call chains */ + /* do not evaluate invalid fields */ + if (req->qtcb->header.fsf_status == FSF_EXCHANGE_CONFIG_DATA_INCOMPLETE) + return 0; + + fc_host_port_id(shost) = ntoh24(bottom->s_id); + fc_host_speed(shost) = + zfcp_fsf_convert_portspeed(bottom->fc_link_speed); + + adapter->hydra_version = bottom->adapter_type; + switch (bottom->fc_topology) { case FSF_TOPO_P2P: adapter->peer_d_id = ntoh24(bottom->peer_d_id); @@ -517,8 +526,6 @@ static int zfcp_fsf_exchange_config_evaluate(struct zfcp_fsf_req *req) return -EIO; } - zfcp_scsi_set_prot(adapter); - return 0; } @@ -569,6 +576,8 @@ static void zfcp_fsf_exchange_config_data_handler(struct zfcp_fsf_req *req) &adapter->status); zfcp_fsf_link_down_info_eval(req, &qtcb->header.fsf_status_qual.link_down_info); + if (zfcp_fsf_exchange_config_evaluate(req)) + return; break; default: zfcp_erp_adapter_shutdown(adapter, 0, "fsecdh3"); From 47c00d01fb222baaf8d248f855061da168b29c5d Mon Sep 17 00:00:00 2001 From: Sreekanth Reddy Date: Sat, 2 Feb 2013 00:58:20 +0530 Subject: [PATCH 0158/1368] SCSI: mpt2sas: fix firmware failure with wrong task attribute commit 48ba2efc382f94fae16ca8ca011e5961a81ad1ea upstream. When SCSI command is received with task attribute not set, set it to SIMPLE. Previously it is set to untagged. This causes the firmware to fail the commands. Signed-off-by: Sreekanth Reddy Signed-off-by: James Bottomley Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/mpt2sas/mpt2sas_scsih.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/drivers/scsi/mpt2sas/mpt2sas_scsih.c b/drivers/scsi/mpt2sas/mpt2sas_scsih.c index c6bdc9267229..a49159561669 100644 --- a/drivers/scsi/mpt2sas/mpt2sas_scsih.c +++ b/drivers/scsi/mpt2sas/mpt2sas_scsih.c @@ -3994,11 +3994,7 @@ _scsih_qcmd_lck(struct scsi_cmnd *scmd, void (*done)(struct scsi_cmnd *)) else mpi_control |= MPI2_SCSIIO_CONTROL_SIMPLEQ; } else -/* MPI Revision I (UNIT = 0xA) - removed MPI2_SCSIIO_CONTROL_UNTAGGED */ -/* mpi_control |= MPI2_SCSIIO_CONTROL_UNTAGGED; - */ - mpi_control |= (0x500); - + mpi_control |= MPI2_SCSIIO_CONTROL_SIMPLEQ; } else mpi_control |= MPI2_SCSIIO_CONTROL_SIMPLEQ; /* Make sure Device is not raid volume. From 0531603e3a0dbb7c36f2b37833add0e73b8c0c5f Mon Sep 17 00:00:00 2001 From: "Reddy, Sreekanth" Date: Tue, 26 Feb 2013 16:59:59 +0530 Subject: [PATCH 0159/1368] SCSI: mpt2sas: Fix for issue Missing delay not getting set during system bootup commit b0df96a0068daee4f9c2189c29b9053eb6e46b17 upstream. Missing delay is not getting set properly. The reason is that it is not defined in the same file from where it is being invoked. The fix is to move the missing delay module parameter from mpt2sas_base.c to mpt2sas_scsh.c. Signed-off-by: Sreekanth Reddy Signed-off-by: James Bottomley Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/mpt2sas/mpt2sas_base.c | 13 +++---------- drivers/scsi/mpt2sas/mpt2sas_base.h | 3 +++ drivers/scsi/mpt2sas/mpt2sas_scsih.c | 8 +++++++- 3 files changed, 13 insertions(+), 11 deletions(-) diff --git a/drivers/scsi/mpt2sas/mpt2sas_base.c b/drivers/scsi/mpt2sas/mpt2sas_base.c index bcb23d28b3e8..c76b18bbacb8 100644 --- a/drivers/scsi/mpt2sas/mpt2sas_base.c +++ b/drivers/scsi/mpt2sas/mpt2sas_base.c @@ -80,10 +80,6 @@ static int msix_disable = -1; module_param(msix_disable, int, 0); MODULE_PARM_DESC(msix_disable, " disable msix routed interrupts (default=0)"); -static int missing_delay[2] = {-1, -1}; -module_param_array(missing_delay, int, NULL, 0); -MODULE_PARM_DESC(missing_delay, " device missing delay , io missing delay"); - static int mpt2sas_fwfault_debug; MODULE_PARM_DESC(mpt2sas_fwfault_debug, " enable detection of firmware fault " "and halt firmware - (default=0)"); @@ -2199,7 +2195,7 @@ _base_display_ioc_capabilities(struct MPT2SAS_ADAPTER *ioc) } /** - * _base_update_missing_delay - change the missing delay timers + * mpt2sas_base_update_missing_delay - change the missing delay timers * @ioc: per adapter object * @device_missing_delay: amount of time till device is reported missing * @io_missing_delay: interval IO is returned when there is a missing device @@ -2210,8 +2206,8 @@ _base_display_ioc_capabilities(struct MPT2SAS_ADAPTER *ioc) * delay, as well as the io missing delay. This should be called at driver * load time. */ -static void -_base_update_missing_delay(struct MPT2SAS_ADAPTER *ioc, +void +mpt2sas_base_update_missing_delay(struct MPT2SAS_ADAPTER *ioc, u16 device_missing_delay, u8 io_missing_delay) { u16 dmd, dmd_new, dmd_orignal; @@ -4407,9 +4403,6 @@ mpt2sas_base_attach(struct MPT2SAS_ADAPTER *ioc) if (r) goto out_free_resources; - if (missing_delay[0] != -1 && missing_delay[1] != -1) - _base_update_missing_delay(ioc, missing_delay[0], - missing_delay[1]); ioc->non_operational_loop = 0; return 0; diff --git a/drivers/scsi/mpt2sas/mpt2sas_base.h b/drivers/scsi/mpt2sas/mpt2sas_base.h index 4caaac13682f..11301974628b 100644 --- a/drivers/scsi/mpt2sas/mpt2sas_base.h +++ b/drivers/scsi/mpt2sas/mpt2sas_base.h @@ -1055,6 +1055,9 @@ void mpt2sas_base_validate_event_type(struct MPT2SAS_ADAPTER *ioc, u32 *event_ty void mpt2sas_halt_firmware(struct MPT2SAS_ADAPTER *ioc); +void mpt2sas_base_update_missing_delay(struct MPT2SAS_ADAPTER *ioc, + u16 device_missing_delay, u8 io_missing_delay); + int mpt2sas_port_enable(struct MPT2SAS_ADAPTER *ioc); /* scsih shared API */ diff --git a/drivers/scsi/mpt2sas/mpt2sas_scsih.c b/drivers/scsi/mpt2sas/mpt2sas_scsih.c index a49159561669..8dbe500c935d 100644 --- a/drivers/scsi/mpt2sas/mpt2sas_scsih.c +++ b/drivers/scsi/mpt2sas/mpt2sas_scsih.c @@ -101,6 +101,10 @@ static ushort max_sectors = 0xFFFF; module_param(max_sectors, ushort, 0); MODULE_PARM_DESC(max_sectors, "max sectors, range 64 to 32767 default=32767"); +static int missing_delay[2] = {-1, -1}; +module_param_array(missing_delay, int, NULL, 0); +MODULE_PARM_DESC(missing_delay, " device missing delay , io missing delay"); + /* scsi-mid layer global parmeter is max_report_luns, which is 511 */ #define MPT2SAS_MAX_LUN (16895) static int max_lun = MPT2SAS_MAX_LUN; @@ -7299,7 +7303,9 @@ _firmware_event_work(struct work_struct *work) case MPT2SAS_PORT_ENABLE_COMPLETE: ioc->start_scan = 0; - + if (missing_delay[0] != -1 && missing_delay[1] != -1) + mpt2sas_base_update_missing_delay(ioc, missing_delay[0], + missing_delay[1]); dewtprintk(ioc, printk(MPT2SAS_INFO_FMT "port enable: complete " "from worker thread\n", ioc->name)); From ebfc049ce8e32b5ff6bf6fec74775ac021769f37 Mon Sep 17 00:00:00 2001 From: Luiz Capitulino Date: Tue, 2 Jul 2013 15:35:13 +0930 Subject: [PATCH 0160/1368] virtio_balloon: leak_balloon(): only tell host if we got pages deflated commit 8c6bab4f3874d31804a00782c48a8f244a0d3cc0 upstream. balloon_page_dequeue() can return NULL. If it does for the first page being freed then leak_balloon() will create a scatter list with len=0. Which in turn seems to generate an invalid virtio request. I didn't get this in practice, I found it by code review. On the other hand, such an invalid virtio request will cause errors in QEMU and fill_balloon() also performs the same check implemented by this commit. This bug was introduced in e2250429. Signed-off-by: Luiz Capitulino Acked-by: Rafael Aquini Signed-off-by: Andrew Morton Signed-off-by: Rusty Russell Signed-off-by: Greg Kroah-Hartman --- drivers/virtio/virtio_balloon.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c index bd3ae324a1a2..71af7b5abe01 100644 --- a/drivers/virtio/virtio_balloon.c +++ b/drivers/virtio/virtio_balloon.c @@ -191,7 +191,8 @@ static void leak_balloon(struct virtio_balloon *vb, size_t num) * virtio_has_feature(vdev, VIRTIO_BALLOON_F_MUST_TELL_HOST); * is true, we *have* to do it in this order */ - tell_host(vb, vb->deflate_vq); + if (vb->num_pfns != 0) + tell_host(vb, vb->deflate_vq); mutex_unlock(&vb->balloon_lock); release_pages_by_pfn(vb->pfns, vb->num_pfns); } From b84f7b3f934ea3ff0240e866f2e73b34b42c2811 Mon Sep 17 00:00:00 2001 From: Hauke Mehrtens Date: Sun, 9 Jun 2013 18:53:58 +0200 Subject: [PATCH 0161/1368] b43: ensue that BCMA is "y" when B43 is "y" commit 693026ef2e751fd94d2e6c71028e68343cc875d5 upstream. When b43 gets build into the kernel and it should use bcma we have to ensure that bcma was also build into the kernel and not as a module. In this patch this is also done for SSB, although you can not build b43 without ssb support for now. This fixes a build problem reported by Randy Dunlap in 5187EB95.2060605@infradead.org Reported-By: Randy Dunlap Signed-off-by: Hauke Mehrtens Signed-off-by: John W. Linville Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/b43/Kconfig | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/b43/Kconfig b/drivers/net/wireless/b43/Kconfig index 078e6f3477a9..13f91ac9499e 100644 --- a/drivers/net/wireless/b43/Kconfig +++ b/drivers/net/wireless/b43/Kconfig @@ -28,7 +28,7 @@ config B43 config B43_BCMA bool "Support for BCMA bus" - depends on B43 && BCMA + depends on B43 && (BCMA = y || BCMA = B43) default y config B43_BCMA_EXTRA @@ -39,7 +39,7 @@ config B43_BCMA_EXTRA config B43_SSB bool - depends on B43 && SSB + depends on B43 && (SSB = y || SSB = B43) default y # Auto-select SSB PCI-HOST support, if possible From bcaef288b2a3187de17220e86e15b762b3b0808e Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 24 May 2013 01:06:09 +0200 Subject: [PATCH 0162/1368] mac80211: close AP_VLAN interfaces before unregistering all commit 4c8a9d4bfaf7dbc7d2168494904d79d22cc01db7 upstream. Since Eric's commit efe117ab8 ("Speedup ieee80211_remove_interfaces") there's a bug in mac80211 when it unregisters with AP_VLAN interfaces up. If the AP_VLAN interface was registered after the AP it belongs to (which is the typical case) and then we get into this code path, unregister_netdevice_many() will crash because it isn't prepared to deal with interfaces being closed in the middle of it. Exactly this happens though, because we iterate the list, find the AP master this AP_VLAN belongs to and dev_close() the dependent VLANs. After this, unregister_netdevice_many() won't pick up the fact that the AP_VLAN is already down and will do it again, causing a crash. Signed-off-by: Johannes Berg Cc: Eric Dumazet Signed-off-by: Greg Kroah-Hartman --- net/mac80211/iface.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index 98d20c0f6fed..514e90f470bf 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -1717,6 +1717,15 @@ void ieee80211_remove_interfaces(struct ieee80211_local *local) ASSERT_RTNL(); + /* + * Close all AP_VLAN interfaces first, as otherwise they + * might be closed while the AP interface they belong to + * is closed, causing unregister_netdevice_many() to crash. + */ + list_for_each_entry(sdata, &local->interfaces, list) + if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN) + dev_close(sdata->dev); + /* * Close all AP_VLAN interfaces first, as otherwise they * might be closed while the AP interface they belong to From 5df5df87f2b5f5d75825207d61a272b168d8cab6 Mon Sep 17 00:00:00 2001 From: "Bu, Yitian" Date: Mon, 18 Feb 2013 12:53:37 +0000 Subject: [PATCH 0163/1368] printk: Fix rq->lock vs logbuf_lock unlock lock inversion commit dbda92d16f8655044e082930e4e9d244b87fde77 upstream. commit 07354eb1a74d1 ("locking printk: Annotate logbuf_lock as raw") reintroduced a lock inversion problem which was fixed in commit 0b5e1c5255 ("printk: Release console_sem after logbuf_lock"). This happened probably when fixing up patch rejects. Restore the ordering and unlock logbuf_lock before releasing console_sem. Signed-off-by: ybu Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/E807E903FE6CBE4D95E420FBFCC273B827413C@nasanexd01h.na.qualcomm.com Signed-off-by: Thomas Gleixner Signed-off-by: Greg Kroah-Hartman --- kernel/printk.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/printk.c b/kernel/printk.c index 8212c1aef125..d37d45c90ae6 100644 --- a/kernel/printk.c +++ b/kernel/printk.c @@ -1369,9 +1369,9 @@ static int console_trylock_for_printk(unsigned int cpu) } } logbuf_cpu = UINT_MAX; + raw_spin_unlock(&logbuf_lock); if (wake) up(&console_sem); - raw_spin_unlock(&logbuf_lock); return retval; } From 3873153a61235a63517547e347db7ab90788486f Mon Sep 17 00:00:00 2001 From: "zhangwei(Jovi)" Date: Thu, 13 Jun 2013 14:21:51 +0800 Subject: [PATCH 0164/1368] uprobes: Fix return value in error handling path commit fa44063f9ef163c3a4c8d8c0465bb8a056b42035 upstream. When wrong argument is passed into uprobe_events it does not return an error: [root@jovi tracing]# echo 'p:myprobe /bin/bash' > uprobe_events [root@jovi tracing]# The proper response is: [root@jovi tracing]# echo 'p:myprobe /bin/bash' > uprobe_events -bash: echo: write error: Invalid argument Link: http://lkml.kernel.org/r/51B964FF.5000106@huawei.com Signed-off-by: zhangwei(Jovi) Cc: Frederic Weisbecker Cc: Signed-off-by: Steven Rostedt Signed-off-by: Greg Kroah-Hartman --- kernel/trace/trace_uprobe.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c index 32494fb0ee64..d5d0cd368a56 100644 --- a/kernel/trace/trace_uprobe.c +++ b/kernel/trace/trace_uprobe.c @@ -283,8 +283,10 @@ static int create_trace_uprobe(int argc, char **argv) return -EINVAL; } arg = strchr(argv[1], ':'); - if (!arg) + if (!arg) { + ret = -EINVAL; goto fail_address_parse; + } *arg++ = '\0'; filename = argv[1]; From c814208bb3542333629cc3c9e79666e94425899a Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Mon, 8 Jul 2013 13:44:45 -0400 Subject: [PATCH 0165/1368] svcrpc: fix failures to handle -1 uid's commit 0979292bfa301cb87d936b69af428090d2feea1b upstream. As of f025adf191924e3a75ce80e130afcd2485b53bb8 "sunrpc: Properly decode kuids and kgids in RPC_AUTH_UNIX credentials" any rpc containing a -1 (0xffff) uid or gid would fail with a badcred error. Commit afe3c3fd5392b2f0066930abc5dbd3f4b14a0f13 "svcrpc: fix failures to handle -1 uid's and gid's" fixed part of the problem, but overlooked the gid upcall--the kernel can request supplementary gid's for the -1 uid, but mountd's attempt write a response will get -EINVAL. Symptoms were nfsd failing to reply to the first attempt to use a newly negotiated krb5 context. Reported-by: Sven Geggus Tested-by: Sven Geggus Signed-off-by: J. Bruce Fields Signed-off-by: Greg Kroah-Hartman --- net/sunrpc/svcauth_unix.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/net/sunrpc/svcauth_unix.c b/net/sunrpc/svcauth_unix.c index 06bdf5a1082c..1583c8a4eb7f 100644 --- a/net/sunrpc/svcauth_unix.c +++ b/net/sunrpc/svcauth_unix.c @@ -493,8 +493,6 @@ static int unix_gid_parse(struct cache_detail *cd, if (rv) return -EINVAL; uid = make_kuid(&init_user_ns, id); - if (!uid_valid(uid)) - return -EINVAL; ug.uid = uid; expiry = get_expiry(&mesg); From 57370589c166cc8747f5e09365407d9dc04d7425 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Wed, 26 Jun 2013 11:09:06 -0400 Subject: [PATCH 0166/1368] svcrpc: fix handling of too-short rpc's commit cf3aa02cb4a0c5af5557dd47f15a08a7df33182a upstream. If we detect that an rpc is too short, we abort and close the connection. Except, there's a bug here: we're leaving sk_datalen nonzero without leaving any pages in the sk_pages array. The most likely result of the inconsistency is a subsequent crash in svc_tcp_clear_pages. Also demote the BUG_ON in svc_tcp_clear_pages to a WARN. Signed-off-by: J. Bruce Fields Signed-off-by: Greg Kroah-Hartman --- net/sunrpc/svcsock.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index 0f679df7d072..df74919c81c0 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -917,7 +917,10 @@ static void svc_tcp_clear_pages(struct svc_sock *svsk) len = svsk->sk_datalen; npages = (len + PAGE_SIZE - 1) >> PAGE_SHIFT; for (i = 0; i < npages; i++) { - BUG_ON(svsk->sk_pages[i] == NULL); + if (svsk->sk_pages[i] == NULL) { + WARN_ON_ONCE(1); + continue; + } put_page(svsk->sk_pages[i]); svsk->sk_pages[i] = NULL; } @@ -1092,8 +1095,10 @@ static int svc_tcp_recvfrom(struct svc_rqst *rqstp) goto err_noclose; } - if (svc_sock_reclen(svsk) < 8) + if (svc_sock_reclen(svsk) < 8) { + svsk->sk_datalen = 0; goto err_delete; /* client is nuts. */ + } rqstp->rq_arg.len = svsk->sk_datalen; rqstp->rq_arg.page_base = 0; From 369cf4be7588876d071c86d0efb7908e9bba7f6f Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Wed, 26 Jun 2013 10:55:40 -0400 Subject: [PATCH 0167/1368] svcrpc: don't error out on small tcp fragment commit 1f691b07c5dc51b2055834f58c0f351defd97f27 upstream. Though clients we care about mostly don't do this, it is possible for rpc requests to be sent in multiple fragments. Here we have a sanity check to ensure that the final received rpc isn't too small--except that the number we're actually checking is the length of just the final fragment, not of the whole rpc. So a perfectly legal rpc that's unluckily fragmented could cause the server to close the connection here. Signed-off-by: J. Bruce Fields Signed-off-by: Greg Kroah-Hartman --- net/sunrpc/svcsock.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index df74919c81c0..305374d4fb98 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -1095,7 +1095,7 @@ static int svc_tcp_recvfrom(struct svc_rqst *rqstp) goto err_noclose; } - if (svc_sock_reclen(svsk) < 8) { + if (svsk->sk_datalen < 8) { svsk->sk_datalen = 0; goto err_delete; /* client is nuts. */ } From a5ed6be766e6ca8f83ef0afea688ef9c760916d3 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Wed, 3 Jul 2013 16:01:10 +1000 Subject: [PATCH 0168/1368] of: Fix address decoding on Bimini and js2x machines commit 6dd18e4684f3d188277bbbc27545248487472108 upstream. Commit: e38c0a1fbc5803cbacdaac0557c70ac8ca5152e7 of/address: Handle #address-cells > 2 specially broke real time clock access on Bimini, js2x, and similar powerpc machines using the "maple" platform. That code was indirectly relying on the old (broken) behaviour of the translation for the hypertransport to ISA bridge. This fixes it by treating hypertransport as a PCI bus Signed-off-by: Benjamin Herrenschmidt Acked-by: Rob Herring Signed-off-by: Grant Likely Cc: Jonghwan Choi Signed-off-by: Greg Kroah-Hartman --- drivers/of/address.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/of/address.c b/drivers/of/address.c index 04da786c84d2..7c8221d36329 100644 --- a/drivers/of/address.c +++ b/drivers/of/address.c @@ -106,8 +106,12 @@ static unsigned int of_bus_default_get_flags(const __be32 *addr) static int of_bus_pci_match(struct device_node *np) { - /* "vci" is for the /chaos bridge on 1st-gen PCI powermacs */ - return !strcmp(np->type, "pci") || !strcmp(np->type, "vci"); + /* + * "vci" is for the /chaos bridge on 1st-gen PCI powermacs + * "ht" is hypertransport + */ + return !strcmp(np->type, "pci") || !strcmp(np->type, "vci") || + !strcmp(np->type, "ht"); } static void of_bus_pci_count_cells(struct device_node *np, From f8ee0e2e9db4d11d3b6593d9e2fba464caa0ff0c Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Mon, 24 Jun 2013 21:33:28 +0200 Subject: [PATCH 0169/1368] drm/i915: Fix up sdvo hpd pins for i965g/gm commit 4f7fd7095d85cd31c86cb9ba87bc301319630ccc upstream. Bspec seems to be full of lies, at least it disagress with reality: Two systems corrobated that SDVO hpd bits are the same as on gen3. v2: Update comment a bit. Tested-by: Chris Wilson Cc: Arthur Ranyan Reported-and-tested-by: Alex Fiestas Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=58405 Acked-by: Chris Wilson Signed-off-by: Daniel Vetter Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/i915/i915_irq.c | 13 ++----------- drivers/gpu/drm/i915/i915_reg.h | 13 ++++++------- 2 files changed, 8 insertions(+), 18 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index 0aa2ef0d2ae0..e5e328691e3a 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -70,15 +70,6 @@ static const u32 hpd_status_gen4[] = { [HPD_PORT_D] = PORTD_HOTPLUG_INT_STATUS }; -static const u32 hpd_status_i965[] = { - [HPD_CRT] = CRT_HOTPLUG_INT_STATUS, - [HPD_SDVO_B] = SDVOB_HOTPLUG_INT_STATUS_I965, - [HPD_SDVO_C] = SDVOC_HOTPLUG_INT_STATUS_I965, - [HPD_PORT_B] = PORTB_HOTPLUG_INT_STATUS, - [HPD_PORT_C] = PORTC_HOTPLUG_INT_STATUS, - [HPD_PORT_D] = PORTD_HOTPLUG_INT_STATUS -}; - static const u32 hpd_status_i915[] = { /* i915 and valleyview are the same */ [HPD_CRT] = CRT_HOTPLUG_INT_STATUS, [HPD_SDVO_B] = SDVOB_HOTPLUG_INT_STATUS_I915, @@ -2952,13 +2943,13 @@ static irqreturn_t i965_irq_handler(int irq, void *arg) u32 hotplug_status = I915_READ(PORT_HOTPLUG_STAT); u32 hotplug_trigger = hotplug_status & (IS_G4X(dev) ? HOTPLUG_INT_STATUS_G4X : - HOTPLUG_INT_STATUS_I965); + HOTPLUG_INT_STATUS_I915); DRM_DEBUG_DRIVER("hotplug event received, stat 0x%08x\n", hotplug_status); if (hotplug_trigger) { if (hotplug_irq_storm_detect(dev, hotplug_trigger, - IS_G4X(dev) ? hpd_status_gen4 : hpd_status_i965)) + IS_G4X(dev) ? hpd_status_gen4 : hpd_status_i915)) i915_hpd_irq_setup(dev); queue_work(dev_priv->wq, &dev_priv->hotplug_work); diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 2d6b62e42daf..e4e0466aa685 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -1691,6 +1691,12 @@ /* SDVO is different across gen3/4 */ #define SDVOC_HOTPLUG_INT_STATUS_G4X (1 << 3) #define SDVOB_HOTPLUG_INT_STATUS_G4X (1 << 2) +/* + * Bspec seems to be seriously misleaded about the SDVO hpd bits on i965g/gm, + * since reality corrobates that they're the same as on gen3. But keep these + * bits here (and the comment!) to help any other lost wanderers back onto the + * right tracks. + */ #define SDVOC_HOTPLUG_INT_STATUS_I965 (3 << 4) #define SDVOB_HOTPLUG_INT_STATUS_I965 (3 << 2) #define SDVOC_HOTPLUG_INT_STATUS_I915 (1 << 7) @@ -1702,13 +1708,6 @@ PORTC_HOTPLUG_INT_STATUS | \ PORTD_HOTPLUG_INT_STATUS) -#define HOTPLUG_INT_STATUS_I965 (CRT_HOTPLUG_INT_STATUS | \ - SDVOB_HOTPLUG_INT_STATUS_I965 | \ - SDVOC_HOTPLUG_INT_STATUS_I965 | \ - PORTB_HOTPLUG_INT_STATUS | \ - PORTC_HOTPLUG_INT_STATUS | \ - PORTD_HOTPLUG_INT_STATUS) - #define HOTPLUG_INT_STATUS_I915 (CRT_HOTPLUG_INT_STATUS | \ SDVOB_HOTPLUG_INT_STATUS_I915 | \ SDVOC_HOTPLUG_INT_STATUS_I915 | \ From 915bec00d92bdceefeff77ab67d32865900b1efe Mon Sep 17 00:00:00 2001 From: Ben Widawsky Date: Tue, 25 Jun 2013 21:53:40 -0700 Subject: [PATCH 0170/1368] drm/i915: Fix context sizes on HSW commit a0de80a0e07032a111230ec92eca563f9d93648d upstream. With updates to the spec, we can actually see the context layout, and how many dwords are allocated. That table suggests we need 70720 bytes per HW context. Rounded up, this is 18 pages. Looking at what lives after the current 4 pages we use, I can't see too much important (mostly it's d3d related), but there are a couple of things which look scary. I am hopeful this can explain some of our odd HSW failures. v2: Make the context only 17 pages. The power context space isn't used ever, and execlists aren't used in our driver, making the actual total 66944 bytes. v3: Add a comment to the code. (Jesse & Paulo) Reported-by: "Azad, Vinit" Reviewed-by: Jesse Barnes Signed-off-by: Ben Widawsky Signed-off-by: Daniel Vetter Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/i915/i915_gem_context.c | 2 +- drivers/gpu/drm/i915/i915_reg.h | 15 +++++++-------- 2 files changed, 8 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c index a1e8ecb6adf6..3bc8a58a8d5f 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.c +++ b/drivers/gpu/drm/i915/i915_gem_context.c @@ -113,7 +113,7 @@ static int get_context_size(struct drm_device *dev) case 7: reg = I915_READ(GEN7_CXT_SIZE); if (IS_HASWELL(dev)) - ret = HSW_CXT_TOTAL_SIZE(reg) * 64; + ret = HSW_CXT_TOTAL_SIZE; else ret = GEN7_CXT_TOTAL_SIZE(reg) * 64; break; diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index e4e0466aa685..80b0a6626a23 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -1535,14 +1535,13 @@ GEN7_CXT_EXTENDED_SIZE(ctx_reg) + \ GEN7_CXT_GT1_SIZE(ctx_reg) + \ GEN7_CXT_VFSTATE_SIZE(ctx_reg)) -#define HSW_CXT_POWER_SIZE(ctx_reg) ((ctx_reg >> 26) & 0x3f) -#define HSW_CXT_RING_SIZE(ctx_reg) ((ctx_reg >> 23) & 0x7) -#define HSW_CXT_RENDER_SIZE(ctx_reg) ((ctx_reg >> 15) & 0xff) -#define HSW_CXT_TOTAL_SIZE(ctx_reg) (HSW_CXT_POWER_SIZE(ctx_reg) + \ - HSW_CXT_RING_SIZE(ctx_reg) + \ - HSW_CXT_RENDER_SIZE(ctx_reg) + \ - GEN7_CXT_VFSTATE_SIZE(ctx_reg)) - +/* Haswell does have the CXT_SIZE register however it does not appear to be + * valid. Now, docs explain in dwords what is in the context object. The full + * size is 70720 bytes, however, the power context and execlist context will + * never be saved (power context is stored elsewhere, and execlists don't work + * on HSW) - so the final size is 66944 bytes, which rounds to 17 pages. + */ +#define HSW_CXT_TOTAL_SIZE (17 * PAGE_SIZE) /* * Overlay regs From e69a7ee45662a1b1d698c5d758613ffbe77fd2a9 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 28 Jun 2013 16:54:08 +0100 Subject: [PATCH 0171/1368] drm/i915: Only clear write-domains after a successful wait-seqno commit daa13e1ca587bc773c1aae415ed1af6554117bd4 upstream. In the introduction of the non-blocking wait, I cut'n'pasted the wait completion code from normal locked path. Unfortunately, this neglected that the normal path returned early if the wait returned early. The result is that read-only waits may return whilst the GPU is still writing to the bo. Fixes regression from commit 3236f57a0162391f84b93f39fc1882c49a8998c7 [v3.7] Author: Chris Wilson Date: Fri Aug 24 09:35:09 2012 +0100 drm/i915: Use a non-blocking wait for set-to-domain ioctl Signed-off-by: Chris Wilson Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=66163 Signed-off-by: Daniel Vetter Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/i915/i915_gem.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 9e35dafc5807..34118b0c02d1 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -1160,7 +1160,8 @@ i915_gem_object_wait_rendering__nonblocking(struct drm_i915_gem_object *obj, /* Manually manage the write flush as we may have not yet * retired the buffer. */ - if (obj->last_write_seqno && + if (ret == 0 && + obj->last_write_seqno && i915_seqno_passed(seqno, obj->last_write_seqno)) { obj->last_write_seqno = 0; obj->base.write_domain &= ~I915_GEM_GPU_DOMAINS; From 657c9009705f8d22faefdecac258312c5621ea51 Mon Sep 17 00:00:00 2001 From: YoungJun Cho Date: Thu, 27 Jun 2013 08:58:33 +0900 Subject: [PATCH 0172/1368] drm/gem: fix not to assign error value to gem name commit 2e07fb229396f99fc173d8612f0f83ea9de0341b upstream. If idr_alloc() is failed, obj->name can be error value. Also it cleans up duplicated flink processing code. This regression has been introduced in commit 2e928815c1886fe628ed54623aa98d0889cf5509 Author: Tejun Heo Date: Wed Feb 27 17:04:08 2013 -0800 drm: convert to idr_alloc() Signed-off-by: YoungJun Cho Signed-off-by: Seung-Woo Kim Signed-off-by: Kyungmin Park Reviewed-by: Chris Wilson Signed-off-by: Dave Airlie Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/drm_gem.c | 18 +++++++----------- 1 file changed, 7 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/drm_gem.c b/drivers/gpu/drm/drm_gem.c index cf919e36e8ae..239ef30f4a62 100644 --- a/drivers/gpu/drm/drm_gem.c +++ b/drivers/gpu/drm/drm_gem.c @@ -453,25 +453,21 @@ drm_gem_flink_ioctl(struct drm_device *dev, void *data, spin_lock(&dev->object_name_lock); if (!obj->name) { ret = idr_alloc(&dev->object_name_idr, obj, 1, 0, GFP_NOWAIT); - obj->name = ret; - args->name = (uint64_t) obj->name; - spin_unlock(&dev->object_name_lock); - idr_preload_end(); - if (ret < 0) goto err; - ret = 0; + + obj->name = ret; /* Allocate a reference for the name table. */ drm_gem_object_reference(obj); - } else { - args->name = (uint64_t) obj->name; - spin_unlock(&dev->object_name_lock); - idr_preload_end(); - ret = 0; } + args->name = (uint64_t) obj->name; + ret = 0; + err: + spin_unlock(&dev->object_name_lock); + idr_preload_end(); drm_gem_object_unreference_unlocked(obj); return ret; } From cd2c367cf10eab382c53bb9f39e01726c6f10253 Mon Sep 17 00:00:00 2001 From: Julia Lemire Date: Thu, 27 Jun 2013 13:38:59 -0400 Subject: [PATCH 0173/1368] drm/mgag200: Added resolution and bandwidth limits for various G200e products. commit abbee6238775c6633a3779962e9e5b5cb9823749 upstream. At the larger resolutions, the g200e series sometimes struggles with maintaining a proper output. Problems like flickering or black bands appearing on screen can occur. In order to avoid this, limitations regarding resolutions and bandwidth have been added for the different variations of the g200e series. This code was ported from the old xorg mga driver. Signed-off-by: Julia Lemire Signed-off-by: Dave Airlie Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/mgag200/mgag200_drv.h | 3 +- drivers/gpu/drm/mgag200/mgag200_main.c | 2 +- drivers/gpu/drm/mgag200/mgag200_mode.c | 70 ++++++++++++++++++++++++-- 3 files changed, 70 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/mgag200/mgag200_drv.h b/drivers/gpu/drm/mgag200/mgag200_drv.h index bf29b2f4d68d..988911afcc8b 100644 --- a/drivers/gpu/drm/mgag200/mgag200_drv.h +++ b/drivers/gpu/drm/mgag200/mgag200_drv.h @@ -198,7 +198,8 @@ struct mga_device { struct ttm_bo_device bdev; } ttm; - u32 reg_1e24; /* SE model number */ + /* SE model number stored in reg 0x1e24 */ + u32 unique_rev_id; }; diff --git a/drivers/gpu/drm/mgag200/mgag200_main.c b/drivers/gpu/drm/mgag200/mgag200_main.c index 99059237da38..dafe049fb1ae 100644 --- a/drivers/gpu/drm/mgag200/mgag200_main.c +++ b/drivers/gpu/drm/mgag200/mgag200_main.c @@ -176,7 +176,7 @@ static int mgag200_device_init(struct drm_device *dev, /* stash G200 SE model number for later use */ if (IS_G200_SE(mdev)) - mdev->reg_1e24 = RREG32(0x1e24); + mdev->unique_rev_id = RREG32(0x1e24); ret = mga_vram_init(mdev); if (ret) diff --git a/drivers/gpu/drm/mgag200/mgag200_mode.c b/drivers/gpu/drm/mgag200/mgag200_mode.c index ee66badc8bb6..99e07b688ea8 100644 --- a/drivers/gpu/drm/mgag200/mgag200_mode.c +++ b/drivers/gpu/drm/mgag200/mgag200_mode.c @@ -1008,7 +1008,7 @@ static int mga_crtc_mode_set(struct drm_crtc *crtc, if (IS_G200_SE(mdev)) { - if (mdev->reg_1e24 >= 0x02) { + if (mdev->unique_rev_id >= 0x02) { u8 hi_pri_lvl; u32 bpp; u32 mb; @@ -1038,7 +1038,7 @@ static int mga_crtc_mode_set(struct drm_crtc *crtc, WREG8(MGAREG_CRTCEXT_DATA, hi_pri_lvl); } else { WREG8(MGAREG_CRTCEXT_INDEX, 0x06); - if (mdev->reg_1e24 >= 0x01) + if (mdev->unique_rev_id >= 0x01) WREG8(MGAREG_CRTCEXT_DATA, 0x03); else WREG8(MGAREG_CRTCEXT_DATA, 0x04); @@ -1410,6 +1410,32 @@ static int mga_vga_get_modes(struct drm_connector *connector) return ret; } +static uint32_t mga_vga_calculate_mode_bandwidth(struct drm_display_mode *mode, + int bits_per_pixel) +{ + uint32_t total_area, divisor; + int64_t active_area, pixels_per_second, bandwidth; + uint64_t bytes_per_pixel = (bits_per_pixel + 7) / 8; + + divisor = 1024; + + if (!mode->htotal || !mode->vtotal || !mode->clock) + return 0; + + active_area = mode->hdisplay * mode->vdisplay; + total_area = mode->htotal * mode->vtotal; + + pixels_per_second = active_area * mode->clock * 1000; + do_div(pixels_per_second, total_area); + + bandwidth = pixels_per_second * bytes_per_pixel * 100; + do_div(bandwidth, divisor); + + return (uint32_t)(bandwidth); +} + +#define MODE_BANDWIDTH MODE_BAD + static int mga_vga_mode_valid(struct drm_connector *connector, struct drm_display_mode *mode) { @@ -1421,7 +1447,45 @@ static int mga_vga_mode_valid(struct drm_connector *connector, int bpp = 32; int i = 0; - /* FIXME: Add bandwidth and g200se limitations */ + if (IS_G200_SE(mdev)) { + if (mdev->unique_rev_id == 0x01) { + if (mode->hdisplay > 1600) + return MODE_VIRTUAL_X; + if (mode->vdisplay > 1200) + return MODE_VIRTUAL_Y; + if (mga_vga_calculate_mode_bandwidth(mode, bpp) + > (24400 * 1024)) + return MODE_BANDWIDTH; + } else if (mdev->unique_rev_id >= 0x02) { + if (mode->hdisplay > 1920) + return MODE_VIRTUAL_X; + if (mode->vdisplay > 1200) + return MODE_VIRTUAL_Y; + if (mga_vga_calculate_mode_bandwidth(mode, bpp) + > (30100 * 1024)) + return MODE_BANDWIDTH; + } + } else if (mdev->type == G200_WB) { + if (mode->hdisplay > 1280) + return MODE_VIRTUAL_X; + if (mode->vdisplay > 1024) + return MODE_VIRTUAL_Y; + if (mga_vga_calculate_mode_bandwidth(mode, + bpp > (31877 * 1024))) + return MODE_BANDWIDTH; + } else if (mdev->type == G200_EV && + (mga_vga_calculate_mode_bandwidth(mode, bpp) + > (32700 * 1024))) { + return MODE_BANDWIDTH; + } else if (mode->type == G200_EH && + (mga_vga_calculate_mode_bandwidth(mode, bpp) + > (37500 * 1024))) { + return MODE_BANDWIDTH; + } else if (mode->type == G200_ER && + (mga_vga_calculate_mode_bandwidth(mode, + bpp) > (55000 * 1024))) { + return MODE_BANDWIDTH; + } if (mode->crtc_hdisplay > 2048 || mode->crtc_hsync_start > 4096 || mode->crtc_hsync_end > 4096 || mode->crtc_htotal > 4096 || From ef0cfe2f8ee92be33f891c238d310abfaca7e0e3 Mon Sep 17 00:00:00 2001 From: Marcin Slusarz Date: Tue, 11 Jun 2013 10:50:30 +0200 Subject: [PATCH 0174/1368] drm/nouveau: use vmalloc for pgt allocation commit d005f51eb93d71cd40ebd11dd377453fa8c8a42a upstream. Page tables on nv50 take 48kB, which can be hard to allocate in one piece. Let's use vmalloc. Signed-off-by: Marcin Slusarz Signed-off-by: Ben Skeggs Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/nouveau/core/subdev/vm/base.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/nouveau/core/subdev/vm/base.c b/drivers/gpu/drm/nouveau/core/subdev/vm/base.c index 77c67fc970e6..e66fb77131bc 100644 --- a/drivers/gpu/drm/nouveau/core/subdev/vm/base.c +++ b/drivers/gpu/drm/nouveau/core/subdev/vm/base.c @@ -362,7 +362,7 @@ nouveau_vm_create(struct nouveau_vmmgr *vmm, u64 offset, u64 length, vm->fpde = offset >> (vmm->pgt_bits + 12); vm->lpde = (offset + length - 1) >> (vmm->pgt_bits + 12); - vm->pgt = kcalloc(vm->lpde - vm->fpde + 1, sizeof(*vm->pgt), GFP_KERNEL); + vm->pgt = vzalloc((vm->lpde - vm->fpde + 1) * sizeof(*vm->pgt)); if (!vm->pgt) { kfree(vm); return -ENOMEM; @@ -371,7 +371,7 @@ nouveau_vm_create(struct nouveau_vmmgr *vmm, u64 offset, u64 length, ret = nouveau_mm_init(&vm->mm, mm_offset >> 12, mm_length >> 12, block >> 12); if (ret) { - kfree(vm->pgt); + vfree(vm->pgt); kfree(vm); return ret; } @@ -446,7 +446,7 @@ nouveau_vm_del(struct nouveau_vm *vm) } nouveau_mm_fini(&vm->mm); - kfree(vm->pgt); + vfree(vm->pgt); kfree(vm); } From 9f7fbcd61507b130afb4ec6418ce8513295d4df4 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Fri, 7 Jun 2013 10:41:03 -0400 Subject: [PATCH 0175/1368] drm/radeon: fix AVI infoframe generation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit f100380ecd8287b0909d3c5694784adc46e78a4a upstream. - remove adding 2 to checksum, this is incorrect. This was incorrectly introduced in: 92db7f6c860b8190571a9dc1fcbc16d003422fe8 http://lists.freedesktop.org/archives/dri-devel/2011-December/017717.html However, the off by 2 was due to adding the version twice. From the examples in the URL above: [Rafał Miłecki][RV620] fglrx: 0x7454: 00 A8 5E 79 R600_HDMI_VIDEOINFOFRAME_0 0x7458: 00 28 00 10 R600_HDMI_VIDEOINFOFRAME_1 0x745C: 00 48 00 28 R600_HDMI_VIDEOINFOFRAME_2 0x7460: 02 00 00 48 R600_HDMI_VIDEOINFOFRAME_3 =================== (0x82 + 0x2 + 0xD) + 0x1F8 = 0x289 -0x289 = 0x77 However, the payload sum is not 0x1f8, it's 0x1f6. 00 + A8 + 5E + 00 + 00 + 28 + 00 + 10 + 00 + 48 + 00 + 28 + 00 + 48 = 0x1f6 Bits 25:24 of HDMI_VIDEOINFOFRAME_3 are the packet version, not part of the payload. So the total would be: (0x82 + 0x2 + 0xD) + 0x1f6 = 0x287 -0x287 = 0x79 - properly emit the AVI infoframe version. This was not being emitted previous which is probably what caused the issue above. This should fix blank screen when HDMI audio is enabled on certain monitors. Signed-off-by: Alex Deucher Cc: Rafał Miłecki Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/radeon/evergreen_hdmi.c | 11 ++--------- drivers/gpu/drm/radeon/r600_hdmi.c | 11 ++--------- 2 files changed, 4 insertions(+), 18 deletions(-) diff --git a/drivers/gpu/drm/radeon/evergreen_hdmi.c b/drivers/gpu/drm/radeon/evergreen_hdmi.c index ed7c8a768092..b9c6f7675e59 100644 --- a/drivers/gpu/drm/radeon/evergreen_hdmi.c +++ b/drivers/gpu/drm/radeon/evergreen_hdmi.c @@ -128,14 +128,7 @@ static void evergreen_hdmi_update_avi_infoframe(struct drm_encoder *encoder, struct radeon_encoder_atom_dig *dig = radeon_encoder->enc_priv; uint32_t offset = dig->afmt->offset; uint8_t *frame = buffer + 3; - - /* Our header values (type, version, length) should be alright, Intel - * is using the same. Checksum function also seems to be OK, it works - * fine for audio infoframe. However calculated value is always lower - * by 2 in comparison to fglrx. It breaks displaying anything in case - * of TVs that strictly check the checksum. Hack it manually here to - * workaround this issue. */ - frame[0x0] += 2; + uint8_t *header = buffer; WREG32(AFMT_AVI_INFO0 + offset, frame[0x0] | (frame[0x1] << 8) | (frame[0x2] << 16) | (frame[0x3] << 24)); @@ -144,7 +137,7 @@ static void evergreen_hdmi_update_avi_infoframe(struct drm_encoder *encoder, WREG32(AFMT_AVI_INFO2 + offset, frame[0x8] | (frame[0x9] << 8) | (frame[0xA] << 16) | (frame[0xB] << 24)); WREG32(AFMT_AVI_INFO3 + offset, - frame[0xC] | (frame[0xD] << 8)); + frame[0xC] | (frame[0xD] << 8) | (header[1] << 24)); } static void evergreen_audio_set_dto(struct drm_encoder *encoder, u32 clock) diff --git a/drivers/gpu/drm/radeon/r600_hdmi.c b/drivers/gpu/drm/radeon/r600_hdmi.c index 456750a0daa5..e73b2a73494a 100644 --- a/drivers/gpu/drm/radeon/r600_hdmi.c +++ b/drivers/gpu/drm/radeon/r600_hdmi.c @@ -133,14 +133,7 @@ static void r600_hdmi_update_avi_infoframe(struct drm_encoder *encoder, struct radeon_encoder_atom_dig *dig = radeon_encoder->enc_priv; uint32_t offset = dig->afmt->offset; uint8_t *frame = buffer + 3; - - /* Our header values (type, version, length) should be alright, Intel - * is using the same. Checksum function also seems to be OK, it works - * fine for audio infoframe. However calculated value is always lower - * by 2 in comparison to fglrx. It breaks displaying anything in case - * of TVs that strictly check the checksum. Hack it manually here to - * workaround this issue. */ - frame[0x0] += 2; + uint8_t *header = buffer; WREG32(HDMI0_AVI_INFO0 + offset, frame[0x0] | (frame[0x1] << 8) | (frame[0x2] << 16) | (frame[0x3] << 24)); @@ -149,7 +142,7 @@ static void r600_hdmi_update_avi_infoframe(struct drm_encoder *encoder, WREG32(HDMI0_AVI_INFO2 + offset, frame[0x8] | (frame[0x9] << 8) | (frame[0xA] << 16) | (frame[0xB] << 24)); WREG32(HDMI0_AVI_INFO3 + offset, - frame[0xC] | (frame[0xD] << 8)); + frame[0xC] | (frame[0xD] << 8) | (header[1] << 24)); } /* From 5c49b1cfa41e42003b481326af66806e5f1a46b9 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Mon, 10 Jun 2013 09:57:07 -0400 Subject: [PATCH 0176/1368] drm/radeon: add backlight quirk for hybrid mac commit 80101790670385a85aca35ecae4b89e3f2fceecc upstream. Mac laptops with multiple GPUs apparently use the gmux driver for backlight control. Don't register a radeon backlight interface. We may need to add other pci ids for other hybrid mac laptops. Fixes: https://bugs.freedesktop.org/show_bug.cgi?id=65377 Signed-off-by: Alex Deucher Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/radeon/atombios_encoders.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/gpu/drm/radeon/atombios_encoders.c b/drivers/gpu/drm/radeon/atombios_encoders.c index 8406c8251fbf..4120d355cadd 100644 --- a/drivers/gpu/drm/radeon/atombios_encoders.c +++ b/drivers/gpu/drm/radeon/atombios_encoders.c @@ -186,6 +186,13 @@ void radeon_atom_backlight_init(struct radeon_encoder *radeon_encoder, u8 backlight_level; char bl_name[16]; + /* Mac laptops with multiple GPUs use the gmux driver for backlight + * so don't register a backlight device + */ + if ((rdev->pdev->subsystem_vendor == PCI_VENDOR_ID_APPLE) && + (rdev->pdev->device == 0x6741)) + return; + if (!radeon_encoder->enc_priv) return; From 3913338881a1e4898176da0008c3df80aabdc3dd Mon Sep 17 00:00:00 2001 From: Ilia Mirkin Date: Wed, 3 Jul 2013 03:06:02 -0400 Subject: [PATCH 0177/1368] drm/nva3/disp: Fix HDMI audio regression commit bf03d1b293cc556df53545e318110505014d805e upstream. This is the nva3 counterpart to commit beba44b17 (drm/nv84/disp: Fix HDMI audio regression). The regression happened as a result of refactoring in commit 8e9e3d2de (drm/nv84/disp: move hdmi control into core). Reported-and-tested-by: Max Baldwin Signed-off-by: Ilia Mirkin Signed-off-by: Ben Skeggs Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/nouveau/core/engine/disp/hdminva3.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/gpu/drm/nouveau/core/engine/disp/hdminva3.c b/drivers/gpu/drm/nouveau/core/engine/disp/hdminva3.c index f065fc248adf..db8c6fd46278 100644 --- a/drivers/gpu/drm/nouveau/core/engine/disp/hdminva3.c +++ b/drivers/gpu/drm/nouveau/core/engine/disp/hdminva3.c @@ -55,6 +55,10 @@ nva3_hdmi_ctrl(struct nv50_disp_priv *priv, int head, int or, u32 data) nv_wr32(priv, 0x61c510 + soff, 0x00000000); nv_mask(priv, 0x61c500 + soff, 0x00000001, 0x00000001); + nv_mask(priv, 0x61c5d0 + soff, 0x00070001, 0x00010001); /* SPARE, HW_CTS */ + nv_mask(priv, 0x61c568 + soff, 0x00010101, 0x00000000); /* ACR_CTRL, ?? */ + nv_mask(priv, 0x61c578 + soff, 0x80000000, 0x80000000); /* ACR_0441_ENABLE */ + /* ??? */ nv_mask(priv, 0x61733c, 0x00100000, 0x00100000); /* RESETF */ nv_mask(priv, 0x61733c, 0x10000000, 0x10000000); /* LOOKUP_EN */ From 64dd8fc12c34e39e82b77109bb5b328ad5b24268 Mon Sep 17 00:00:00 2001 From: Emil Velikov Date: Tue, 2 Jul 2013 14:44:12 +0100 Subject: [PATCH 0178/1368] drm/nv50-/disp: Use output specific mask in interrupt commit 378f2bcdf7c971453d11580936dc0ffe845f5880 upstream. The commit commit 476e84e126171d809f9c0b5d97137f5055f95ca8 Author: Ben Skeggs Date: Mon Feb 11 09:24:23 2013 +1000 drm/nv50-/disp: initial supervisor support for off-chip encoders changed the write mask in one of the interrupt functions for on-chip encoders, causing a regression in certain VGA dual-head setups. This commit reintroduces the mask thus resolving the regression Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=66129 Reported-and-Tested-by: Yves-Alexis CC: Ben Skeggs Signed-off-by: Emil Velikov Signed-off-by: Ben Skeggs Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/nouveau/core/engine/disp/nv50.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/nouveau/core/engine/disp/nv50.c b/drivers/gpu/drm/nouveau/core/engine/disp/nv50.c index 6a38402fa56c..5680d3eb11ca 100644 --- a/drivers/gpu/drm/nouveau/core/engine/disp/nv50.c +++ b/drivers/gpu/drm/nouveau/core/engine/disp/nv50.c @@ -1107,6 +1107,7 @@ nv50_disp_intr_unk20_2(struct nv50_disp_priv *priv, int head) u32 pclk = nv_rd32(priv, 0x610ad0 + (head * 0x540)) & 0x3fffff; u32 hval, hreg = 0x614200 + (head * 0x800); u32 oval, oreg; + u32 mask; u32 conf = exec_clkcmp(priv, head, 0xff, pclk, &outp); if (conf != ~0) { if (outp.location == 0 && outp.type == DCB_OUTPUT_DP) { @@ -1133,6 +1134,7 @@ nv50_disp_intr_unk20_2(struct nv50_disp_priv *priv, int head) oreg = 0x614280 + (ffs(outp.or) - 1) * 0x800; oval = 0x00000000; hval = 0x00000000; + mask = 0xffffffff; } else if (!outp.location) { if (outp.type == DCB_OUTPUT_DP) @@ -1140,14 +1142,16 @@ nv50_disp_intr_unk20_2(struct nv50_disp_priv *priv, int head) oreg = 0x614300 + (ffs(outp.or) - 1) * 0x800; oval = (conf & 0x0100) ? 0x00000101 : 0x00000000; hval = 0x00000000; + mask = 0x00000707; } else { oreg = 0x614380 + (ffs(outp.or) - 1) * 0x800; oval = 0x00000001; hval = 0x00000001; + mask = 0x00000707; } nv_mask(priv, hreg, 0x0000000f, hval); - nv_mask(priv, oreg, 0x00000707, oval); + nv_mask(priv, oreg, mask, oval); } } From ed2f079599f3d9e5e27a516771d3d53afa7e7773 Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Fri, 21 Jun 2013 14:33:19 -0600 Subject: [PATCH 0179/1368] iommu/amd: Only unmap large pages from the first pte commit 60d0ca3cfd199b6612bbbbf4999a3470dad38bb1 upstream. If we use a large mapping, the expectation is that only unmaps from the first pte in the superpage are supported. Unmaps from offsets into the superpage should fail (ie. return zero sized unmap). In the current code, unmapping from an offset clears the size of the full mapping starting from an offset. For instance, if we map a 16k physically contiguous range at IOVA 0x0 with a large page, then attempt to unmap 4k at offset 12k, 4 ptes are cleared (12k - 28k) and the unmap returns 16k unmapped. This potentially incorrectly clears valid mappings and confuses drivers like VFIO that use the unmap size to release pinned pages. Fix by refusing to unmap from offsets into the page. Signed-off-by: Alex Williamson Signed-off-by: Joerg Roedel Signed-off-by: Greg Kroah-Hartman --- drivers/iommu/amd_iommu.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c index 21d02b0d907c..a3c338942f10 100644 --- a/drivers/iommu/amd_iommu.c +++ b/drivers/iommu/amd_iommu.c @@ -1484,6 +1484,10 @@ static unsigned long iommu_unmap_page(struct protection_domain *dom, /* Large PTE found which maps this address */ unmap_size = PTE_PAGE_SIZE(*pte); + + /* Only unmap from the first pte in the page */ + if ((unmap_size - 1) & bus_addr) + break; count = PAGE_SIZE_PTE_COUNT(unmap_size); for (i = 0; i < count; i++) pte[i] = 0ULL; @@ -1493,7 +1497,7 @@ static unsigned long iommu_unmap_page(struct protection_domain *dom, unmapped += unmap_size; } - BUG_ON(!is_power_of_2(unmapped)); + BUG_ON(unmapped && !is_power_of_2(unmapped)); return unmapped; } From 3ea8ad44cea35afc010942b448472fc993f24722 Mon Sep 17 00:00:00 2001 From: Max Filippov Date: Sun, 9 Jun 2013 04:52:11 +0400 Subject: [PATCH 0180/1368] xtensa: adjust boot parameters address when INITIALIZE_XTENSA_MMU_INSIDE_VMLINUX is selected commit c5a771d0678f9613e9f89cf1a5bdcfa5b08b225b upstream. The virtual address of boot parameters chain is passed to the kernel via a2 register. Adjust it in case it is remapped during MMUv3 -> MMUv2 mapping change, i.e. when it is in the first 128M. Also fix interpretation of initrd and FDT addresses passed in the boot parameters: these are physical addresses. Reported-by: Baruch Siach Signed-off-by: Max Filippov Signed-off-by: Chris Zankel Signed-off-by: Greg Kroah-Hartman --- arch/xtensa/kernel/head.S | 9 +++++++++ arch/xtensa/kernel/setup.c | 6 +++--- 2 files changed, 12 insertions(+), 3 deletions(-) diff --git a/arch/xtensa/kernel/head.S b/arch/xtensa/kernel/head.S index ef12c0e6fa25..7d740ebbe198 100644 --- a/arch/xtensa/kernel/head.S +++ b/arch/xtensa/kernel/head.S @@ -68,6 +68,15 @@ _SetupMMU: #ifdef CONFIG_INITIALIZE_XTENSA_MMU_INSIDE_VMLINUX initialize_mmu +#if defined(CONFIG_MMU) && XCHAL_HAVE_PTP_MMU && XCHAL_HAVE_SPANNING_WAY + rsr a2, excsave1 + movi a3, 0x08000000 + bgeu a2, a3, 1f + movi a3, 0xd0000000 + add a2, a2, a3 + wsr a2, excsave1 +1: +#endif #endif .end no-absolute-literals diff --git a/arch/xtensa/kernel/setup.c b/arch/xtensa/kernel/setup.c index 6dd25ecde3f5..14c6c3a6f04b 100644 --- a/arch/xtensa/kernel/setup.c +++ b/arch/xtensa/kernel/setup.c @@ -152,8 +152,8 @@ static int __init parse_tag_initrd(const bp_tag_t* tag) { meminfo_t* mi; mi = (meminfo_t*)(tag->data); - initrd_start = (void*)(mi->start); - initrd_end = (void*)(mi->end); + initrd_start = __va(mi->start); + initrd_end = __va(mi->end); return 0; } @@ -164,7 +164,7 @@ __tagtable(BP_TAG_INITRD, parse_tag_initrd); static int __init parse_tag_fdt(const bp_tag_t *tag) { - dtb_start = (void *)(tag->data[0]); + dtb_start = __va(tag->data[0]); return 0; } From d8724a91bc631690af241588936971214e55927e Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 5 Jul 2013 17:40:13 +0200 Subject: [PATCH 0181/1368] thermal: cpu_cooling: fix stub function commit e8d39240d635ed9bcaddbec898b1c9f063c5dbb2 upstream. The function stub for cpufreq_cooling_get_level introduced in 57df81069 "Thermal: exynos: fix cooling state translation" is not syntactically correct C and needs to be fixed to avoid this error: In file included from drivers/thermal/db8500_thermal.c:20:0: include/linux/cpu_cooling.h: In function 'cpufreq_cooling_get_level': include/linux/cpu_cooling.h:57:1: error: parameter name omitted unsigned long cpufreq_cooling_get_level(unsigned int, unsigned int) ^ include/linux/cpu_cooling.h:57:1: error: parameter name omitted Signed-off-by: Arnd Bergmann Acked-by: Eduardo Valentin Cc: Zhang Rui Cc: Amit Daniel kachhap Signed-off-by: Eduardo Valentin Signed-off-by: Greg Kroah-Hartman --- include/linux/cpu_cooling.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/linux/cpu_cooling.h b/include/linux/cpu_cooling.h index 282e27028418..a5d52eea8232 100644 --- a/include/linux/cpu_cooling.h +++ b/include/linux/cpu_cooling.h @@ -41,7 +41,7 @@ cpufreq_cooling_register(const struct cpumask *clip_cpus); */ void cpufreq_cooling_unregister(struct thermal_cooling_device *cdev); -unsigned long cpufreq_cooling_get_level(unsigned int, unsigned int); +unsigned long cpufreq_cooling_get_level(unsigned int cpu, unsigned int freq); #else /* !CONFIG_CPU_THERMAL */ static inline struct thermal_cooling_device * cpufreq_cooling_register(const struct cpumask *clip_cpus) @@ -54,7 +54,7 @@ void cpufreq_cooling_unregister(struct thermal_cooling_device *cdev) return; } static inline -unsigned long cpufreq_cooling_get_level(unsigned int, unsigned int) +unsigned long cpufreq_cooling_get_level(unsigned int cpu, unsigned int freq) { return THERMAL_CSTATE_INVALID; } From d6d10b1dceba6ed2b3d74207b2a454c2f4427785 Mon Sep 17 00:00:00 2001 From: David Daney Date: Wed, 12 Jun 2013 17:28:33 +0000 Subject: [PATCH 0182/1368] MIPS: Octeon: Don't clobber bootloader data structures. commit d949b4fe6d23dd92b5fa48cbf7af90ca32beed2e upstream. Commit abe77f90dc (MIPS: Octeon: Add kexec and kdump support) added a bootmem region for the kernel image itself. The problem is that this is rounded up to a 0x100000 boundary, which is memory that may not be owned by the kernel. Depending on the kernel's configuration based size, this 'extra' memory may contain data passed from the bootloader to the kernel itself, which if clobbered makes the kernel crash in various ways. The fix: Quit rounding the size up, so that we only use memory assigned to the kernel. Signed-off-by: David Daney Cc: linux-mips@linux-mips.org Patchwork: https://patchwork.linux-mips.org/patch/5449/ Signed-off-by: Ralf Baechle Signed-off-by: Greg Kroah-Hartman --- arch/mips/cavium-octeon/setup.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/mips/cavium-octeon/setup.c b/arch/mips/cavium-octeon/setup.c index 01b1b3f94feb..1e1e18c5a534 100644 --- a/arch/mips/cavium-octeon/setup.c +++ b/arch/mips/cavium-octeon/setup.c @@ -996,7 +996,7 @@ void __init plat_mem_setup(void) cvmx_bootmem_unlock(); /* Add the memory region for the kernel. */ kernel_start = (unsigned long) _text; - kernel_size = ALIGN(_end - _text, 0x100000); + kernel_size = _end - _text; /* Adjust for physical offset. */ kernel_start &= ~0xffffffff80000000ULL; From df3b055c92883b04459c54888b4bbb169ccb1b62 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Thu, 11 Jul 2013 18:02:38 +0200 Subject: [PATCH 0183/1368] staging: line6: Fix unlocked snd_pcm_stop() call commit 86f0b5b86d142b9323432fef078a6cf0fb5dda74 upstream. snd_pcm_stop() must be called in the PCM substream lock context. Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- drivers/staging/line6/pcm.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/staging/line6/pcm.c b/drivers/staging/line6/pcm.c index 02f77d74809f..a7856bad3cc6 100644 --- a/drivers/staging/line6/pcm.c +++ b/drivers/staging/line6/pcm.c @@ -385,8 +385,11 @@ static int snd_line6_pcm_free(struct snd_device *device) */ static void pcm_disconnect_substream(struct snd_pcm_substream *substream) { - if (substream->runtime && snd_pcm_running(substream)) + if (substream->runtime && snd_pcm_running(substream)) { + snd_pcm_stream_lock_irq(substream); snd_pcm_stop(substream, SNDRV_PCM_STATE_DISCONNECTED); + snd_pcm_stream_unlock_irq(substream); + } } /* From f38bac3d6d1fc1a726e4381f0423dcd85885b3d4 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Tue, 9 Jul 2013 17:44:10 +0200 Subject: [PATCH 0184/1368] perf: Clone child context from parent context pmu commit 734df5ab549ca44f40de0f07af1c8803856dfb18 upstream. Currently when the child context for inherited events is created, it's based on the pmu object of the first event of the parent context. This is wrong for the following scenario: - HW context having HW and SW event - HW event got removed (closed) - SW event stays in HW context as the only event and its pmu is used to clone the child context The issue starts when the cpu context object is touched based on the pmu context object (__get_cpu_context). In this case the HW context will work with SW cpu context ending up with following WARN below. Fixing this by using parent context pmu object to clone from child context. Addresses the following warning reported by Vince Weaver: [ 2716.472065] ------------[ cut here ]------------ [ 2716.476035] WARNING: at kernel/events/core.c:2122 task_ctx_sched_out+0x3c/0x) [ 2716.476035] Modules linked in: nfsd auth_rpcgss oid_registry nfs_acl nfs locn [ 2716.476035] CPU: 0 PID: 3164 Comm: perf_fuzzer Not tainted 3.10.0-rc4 #2 [ 2716.476035] Hardware name: AOpen DE7000/nMCP7ALPx-DE R1.06 Oct.19.2012, BI2 [ 2716.476035] 0000000000000000 ffffffff8102e215 0000000000000000 ffff88011fc18 [ 2716.476035] ffff8801175557f0 0000000000000000 ffff880119fda88c ffffffff810ad [ 2716.476035] ffff880119fda880 ffffffff810af02a 0000000000000009 ffff880117550 [ 2716.476035] Call Trace: [ 2716.476035] [] ? warn_slowpath_common+0x5b/0x70 [ 2716.476035] [] ? task_ctx_sched_out+0x3c/0x5f [ 2716.476035] [] ? perf_event_exit_task+0xbf/0x194 [ 2716.476035] [] ? do_exit+0x3e7/0x90c [ 2716.476035] [] ? __do_fault+0x359/0x394 [ 2716.476035] [] ? do_group_exit+0x66/0x98 [ 2716.476035] [] ? get_signal_to_deliver+0x479/0x4ad [ 2716.476035] [] ? __perf_event_task_sched_out+0x230/0x2d1 [ 2716.476035] [] ? do_signal+0x3c/0x432 [ 2716.476035] [] ? ctx_sched_in+0x43/0x141 [ 2716.476035] [] ? perf_event_context_sched_in+0x7a/0x90 [ 2716.476035] [] ? __perf_event_task_sched_in+0x31/0x118 [ 2716.476035] [] ? mmdrop+0xd/0x1c [ 2716.476035] [] ? finish_task_switch+0x7d/0xa6 [ 2716.476035] [] ? do_notify_resume+0x20/0x5d [ 2716.476035] [] ? retint_signal+0x3d/0x78 [ 2716.476035] ---[ end trace 827178d8a5966c3d ]--- Reported-by: Vince Weaver Signed-off-by: Jiri Olsa Cc: Corey Ashford Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Namhyung Kim Cc: Paul Mackerras Cc: Arnaldo Carvalho de Melo Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/1373384651-6109-1-git-send-email-jolsa@redhat.com Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- kernel/events/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/events/core.c b/kernel/events/core.c index b391907d5352..01d4fe68e57b 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -7228,7 +7228,7 @@ inherit_task_group(struct perf_event *event, struct task_struct *parent, * child. */ - child_ctx = alloc_perf_context(event->pmu, child); + child_ctx = alloc_perf_context(parent_ctx->pmu, child); if (!child_ctx) return -ENOMEM; From b2412679ab3e923437e2ee109560c151c9b0cedc Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Tue, 9 Jul 2013 17:44:11 +0200 Subject: [PATCH 0185/1368] perf: Remove WARN_ON_ONCE() check in __perf_event_enable() for valid scenario commit 06f417968beac6e6b614e17b37d347aa6a6b1d30 upstream. The '!ctx->is_active' check has a valid scenario, so there's no need for the warning. The reason is that there's a time window between the 'ctx->is_active' check in the perf_event_enable() function and the __perf_event_enable() function having: - IRQs on - ctx->lock unlocked where the task could be killed and 'ctx' deactivated by perf_event_exit_task(), ending up with the warning below. So remove the WARN_ON_ONCE() check and add comments to explain it all. This addresses the following warning reported by Vince Weaver: [ 324.983534] ------------[ cut here ]------------ [ 324.984420] WARNING: at kernel/events/core.c:1953 __perf_event_enable+0x187/0x190() [ 324.984420] Modules linked in: [ 324.984420] CPU: 19 PID: 2715 Comm: nmi_bug_snb Not tainted 3.10.0+ #246 [ 324.984420] Hardware name: Supermicro X8DTN/X8DTN, BIOS 4.6.3 01/08/2010 [ 324.984420] 0000000000000009 ffff88043fce3ec8 ffffffff8160ea0b ffff88043fce3f00 [ 324.984420] ffffffff81080ff0 ffff8802314fdc00 ffff880231a8f800 ffff88043fcf7860 [ 324.984420] 0000000000000286 ffff880231a8f800 ffff88043fce3f10 ffffffff8108103a [ 324.984420] Call Trace: [ 324.984420] [] dump_stack+0x19/0x1b [ 324.984420] [] warn_slowpath_common+0x70/0xa0 [ 324.984420] [] warn_slowpath_null+0x1a/0x20 [ 324.984420] [] __perf_event_enable+0x187/0x190 [ 324.984420] [] remote_function+0x40/0x50 [ 324.984420] [] generic_smp_call_function_single_interrupt+0xbe/0x130 [ 324.984420] [] smp_call_function_single_interrupt+0x27/0x40 [ 324.984420] [] call_function_single_interrupt+0x6f/0x80 [ 324.984420] [] ? _raw_spin_unlock_irqrestore+0x41/0x70 [ 324.984420] [] perf_event_exit_task+0x14d/0x210 [ 324.984420] [] ? switch_task_namespaces+0x24/0x60 [ 324.984420] [] do_exit+0x2b6/0xa40 [ 324.984420] [] ? _raw_spin_unlock_irq+0x2c/0x30 [ 324.984420] [] do_group_exit+0x49/0xc0 [ 324.984420] [] get_signal_to_deliver+0x254/0x620 [ 324.984420] [] do_signal+0x57/0x5a0 [ 324.984420] [] ? __do_page_fault+0x2a4/0x4e0 [ 324.984420] [] ? retint_restore_args+0xe/0xe [ 324.984420] [] ? retint_signal+0x11/0x84 [ 324.984420] [] do_notify_resume+0x65/0x80 [ 324.984420] [] retint_signal+0x46/0x84 [ 324.984420] ---[ end trace 442ec2f04db3771a ]--- Reported-by: Vince Weaver Signed-off-by: Jiri Olsa Suggested-by: Peter Zijlstra Cc: Corey Ashford Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Namhyung Kim Cc: Paul Mackerras Cc: Arnaldo Carvalho de Melo Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/1373384651-6109-2-git-send-email-jolsa@redhat.com Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- kernel/events/core.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/kernel/events/core.c b/kernel/events/core.c index 01d4fe68e57b..cb8744a1b120 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -1761,7 +1761,16 @@ static int __perf_event_enable(void *info) struct perf_cpu_context *cpuctx = __get_cpu_context(ctx); int err; - if (WARN_ON_ONCE(!ctx->is_active)) + /* + * There's a time window between 'ctx->is_active' check + * in perf_event_enable function and this place having: + * - IRQs on + * - ctx->lock unlocked + * + * where the task could be killed and 'ctx' deactivated + * by perf_event_exit_task. + */ + if (!ctx->is_active) return -EINVAL; raw_spin_lock(&ctx->lock); From 65e303d786e20460c3d67d362f989f59944fb744 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 12 Jul 2013 11:08:33 +0200 Subject: [PATCH 0186/1368] perf: Fix perf_lock_task_context() vs RCU commit 058ebd0eba3aff16b144eabf4510ed9510e1416e upstream. Jiri managed to trigger this warning: [] ====================================================== [] [ INFO: possible circular locking dependency detected ] [] 3.10.0+ #228 Tainted: G W [] ------------------------------------------------------- [] p/6613 is trying to acquire lock: [] (rcu_node_0){..-...}, at: [] rcu_read_unlock_special+0xa7/0x250 [] [] but task is already holding lock: [] (&ctx->lock){-.-...}, at: [] perf_lock_task_context+0xd9/0x2c0 [] [] which lock already depends on the new lock. [] [] the existing dependency chain (in reverse order) is: [] [] -> #4 (&ctx->lock){-.-...}: [] -> #3 (&rq->lock){-.-.-.}: [] -> #2 (&p->pi_lock){-.-.-.}: [] -> #1 (&rnp->nocb_gp_wq[1]){......}: [] -> #0 (rcu_node_0){..-...}: Paul was quick to explain that due to preemptible RCU we cannot call rcu_read_unlock() while holding scheduler (or nested) locks when part of the read side critical section was preemptible. Therefore solve it by making the entire RCU read side non-preemptible. Also pull out the retry from under the non-preempt to play nice with RT. Reported-by: Jiri Olsa Helped-out-by: Paul E. McKenney Signed-off-by: Peter Zijlstra Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- kernel/events/core.c | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/kernel/events/core.c b/kernel/events/core.c index cb8744a1b120..e76e4959908c 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -761,8 +761,18 @@ perf_lock_task_context(struct task_struct *task, int ctxn, unsigned long *flags) { struct perf_event_context *ctx; - rcu_read_lock(); retry: + /* + * One of the few rules of preemptible RCU is that one cannot do + * rcu_read_unlock() while holding a scheduler (or nested) lock when + * part of the read side critical section was preemptible -- see + * rcu_read_unlock_special(). + * + * Since ctx->lock nests under rq->lock we must ensure the entire read + * side critical section is non-preemptible. + */ + preempt_disable(); + rcu_read_lock(); ctx = rcu_dereference(task->perf_event_ctxp[ctxn]); if (ctx) { /* @@ -778,6 +788,8 @@ perf_lock_task_context(struct task_struct *task, int ctxn, unsigned long *flags) raw_spin_lock_irqsave(&ctx->lock, *flags); if (ctx != rcu_dereference(task->perf_event_ctxp[ctxn])) { raw_spin_unlock_irqrestore(&ctx->lock, *flags); + rcu_read_unlock(); + preempt_enable(); goto retry; } @@ -787,6 +799,7 @@ perf_lock_task_context(struct task_struct *task, int ctxn, unsigned long *flags) } } rcu_read_unlock(); + preempt_enable(); return ctx; } From e6929efa3320454d9a572300a4eb97a576c7d556 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Thu, 27 Jun 2013 10:58:31 -0400 Subject: [PATCH 0187/1368] tracing: Failed to create system directory commit 6e94a780374ed31b280f939d4757e8d7858dff16 upstream. Running the following: # cd /sys/kernel/debug/tracing # echo p:i do_sys_open > kprobe_events # echo p:j schedule >> kprobe_events # cat kprobe_events p:kprobes/i do_sys_open p:kprobes/j schedule # echo p:i do_sys_open >> kprobe_events # cat kprobe_events p:kprobes/j schedule p:kprobes/i do_sys_open # ls /sys/kernel/debug/tracing/events/kprobes/ enable filter j Notice that the 'i' is missing from the kprobes directory. The console produces: "Failed to create system directory kprobes" This is because kprobes passes in a allocated name for the system and the ftrace event subsystem saves off that name instead of creating a duplicate for it. But the kprobes may free the system name making the pointer to it invalid. This bug was introduced by 92edca073c37 "tracing: Use direct field, type and system names" which switched from using kstrdup() on the system name in favor of just keeping apointer to it, as the internal ftrace event system names are static and exist for the life of the computer being booted. Instead of reverting back to duplicating system names again, we can use core_kernel_data() to determine if the passed in name was allocated or static. Then use the MSB of the ref_count to be a flag to keep track if the name was allocated or not. Then we can still save from having to duplicate strings that will always exist, but still copy the ones that may be freed. Reported-by: "zhangwei(Jovi)" Reported-by: Masami Hiramatsu Tested-by: Masami Hiramatsu Signed-off-by: Steven Rostedt Signed-off-by: Greg Kroah-Hartman --- kernel/trace/trace_events.c | 41 +++++++++++++++++++++++++++++++------ 1 file changed, 35 insertions(+), 6 deletions(-) diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index 27963e2bf4bf..bf56c1d07df3 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -41,6 +41,23 @@ static LIST_HEAD(ftrace_common_fields); static struct kmem_cache *field_cachep; static struct kmem_cache *file_cachep; +#define SYSTEM_FL_FREE_NAME (1 << 31) + +static inline int system_refcount(struct event_subsystem *system) +{ + return system->ref_count & ~SYSTEM_FL_FREE_NAME; +} + +static int system_refcount_inc(struct event_subsystem *system) +{ + return (system->ref_count++) & ~SYSTEM_FL_FREE_NAME; +} + +static int system_refcount_dec(struct event_subsystem *system) +{ + return (--system->ref_count) & ~SYSTEM_FL_FREE_NAME; +} + /* Double loops, do not use break, only goto's work */ #define do_for_each_event_file(tr, file) \ list_for_each_entry(tr, &ftrace_trace_arrays, list) { \ @@ -349,8 +366,8 @@ static void __put_system(struct event_subsystem *system) { struct event_filter *filter = system->filter; - WARN_ON_ONCE(system->ref_count == 0); - if (--system->ref_count) + WARN_ON_ONCE(system_refcount(system) == 0); + if (system_refcount_dec(system)) return; list_del(&system->list); @@ -359,13 +376,15 @@ static void __put_system(struct event_subsystem *system) kfree(filter->filter_string); kfree(filter); } + if (system->ref_count & SYSTEM_FL_FREE_NAME) + kfree(system->name); kfree(system); } static void __get_system(struct event_subsystem *system) { - WARN_ON_ONCE(system->ref_count == 0); - system->ref_count++; + WARN_ON_ONCE(system_refcount(system) == 0); + system_refcount_inc(system); } static void __get_system_dir(struct ftrace_subsystem_dir *dir) @@ -379,7 +398,7 @@ static void __put_system_dir(struct ftrace_subsystem_dir *dir) { WARN_ON_ONCE(dir->ref_count == 0); /* If the subsystem is about to be freed, the dir must be too */ - WARN_ON_ONCE(dir->subsystem->ref_count == 1 && dir->ref_count != 1); + WARN_ON_ONCE(system_refcount(dir->subsystem) == 1 && dir->ref_count != 1); __put_system(dir->subsystem); if (!--dir->ref_count) @@ -1279,7 +1298,15 @@ create_new_subsystem(const char *name) return NULL; system->ref_count = 1; - system->name = name; + + /* Only allocate if dynamic (kprobes and modules) */ + if (!core_kernel_data((unsigned long)name)) { + system->ref_count |= SYSTEM_FL_FREE_NAME; + system->name = kstrdup(name, GFP_KERNEL); + if (!system->name) + goto out_free; + } else + system->name = name; system->filter = NULL; @@ -1292,6 +1319,8 @@ create_new_subsystem(const char *name) return system; out_free: + if (system->ref_count & SYSTEM_FL_FREE_NAME) + kfree(system->name); kfree(system); return NULL; } From 86515381237a7f3c88a4a36e3e4d32d58635b972 Mon Sep 17 00:00:00 2001 From: "zhangwei(Jovi)" Date: Wed, 10 Apr 2013 11:26:23 +0800 Subject: [PATCH 0188/1368] tracing: Fix irqs-off tag display in syscall tracing commit 11034ae9c20f4057a6127fc965906417978e69b2 upstream. All syscall tracing irqs-off tags are wrong, the syscall enter entry doesn't disable irqs. [root@jovi tracing]#echo "syscalls:sys_enter_open" > set_event [root@jovi tracing]# cat trace # tracer: nop # # entries-in-buffer/entries-written: 13/13 #P:2 # # _-----=> irqs-off # / _----=> need-resched # | / _---=> hardirq/softirq # || / _--=> preempt-depth # ||| / delay # TASK-PID CPU# |||| TIMESTAMP FUNCTION # | | | |||| | | irqbalance-513 [000] d... 56115.496766: sys_open(filename: 804e1a6, flags: 0, mode: 1b6) irqbalance-513 [000] d... 56115.497008: sys_open(filename: 804e1bb, flags: 0, mode: 1b6) sendmail-771 [000] d... 56115.827982: sys_open(filename: b770e6d1, flags: 0, mode: 1b6) The reason is syscall tracing doesn't record irq_flags into buffer. The proper display is: [root@jovi tracing]#echo "syscalls:sys_enter_open" > set_event [root@jovi tracing]# cat trace # tracer: nop # # entries-in-buffer/entries-written: 14/14 #P:2 # # _-----=> irqs-off # / _----=> need-resched # | / _---=> hardirq/softirq # || / _--=> preempt-depth # ||| / delay # TASK-PID CPU# |||| TIMESTAMP FUNCTION # | | | |||| | | irqbalance-514 [001] .... 46.213921: sys_open(filename: 804e1a6, flags: 0, mode: 1b6) irqbalance-514 [001] .... 46.214160: sys_open(filename: 804e1bb, flags: 0, mode: 1b6) <...>-920 [001] .... 47.307260: sys_open(filename: 4e82a0c5, flags: 80000, mode: 0) Link: http://lkml.kernel.org/r/1365564393-10972-3-git-send-email-jovi.zhangwei@huawei.com Signed-off-by: zhangwei(Jovi) Signed-off-by: Steven Rostedt Signed-off-by: Greg Kroah-Hartman --- kernel/trace/trace_syscalls.c | 21 +++++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c index 8f2ac73c7a5f..322e16461072 100644 --- a/kernel/trace/trace_syscalls.c +++ b/kernel/trace/trace_syscalls.c @@ -306,6 +306,8 @@ static void ftrace_syscall_enter(void *data, struct pt_regs *regs, long id) struct syscall_metadata *sys_data; struct ring_buffer_event *event; struct ring_buffer *buffer; + unsigned long irq_flags; + int pc; int syscall_nr; int size; @@ -321,9 +323,12 @@ static void ftrace_syscall_enter(void *data, struct pt_regs *regs, long id) size = sizeof(*entry) + sizeof(unsigned long) * sys_data->nb_args; + local_save_flags(irq_flags); + pc = preempt_count(); + buffer = tr->trace_buffer.buffer; event = trace_buffer_lock_reserve(buffer, - sys_data->enter_event->event.type, size, 0, 0); + sys_data->enter_event->event.type, size, irq_flags, pc); if (!event) return; @@ -333,7 +338,8 @@ static void ftrace_syscall_enter(void *data, struct pt_regs *regs, long id) if (!filter_current_check_discard(buffer, sys_data->enter_event, entry, event)) - trace_current_buffer_unlock_commit(buffer, event, 0, 0); + trace_current_buffer_unlock_commit(buffer, event, + irq_flags, pc); } static void ftrace_syscall_exit(void *data, struct pt_regs *regs, long ret) @@ -343,6 +349,8 @@ static void ftrace_syscall_exit(void *data, struct pt_regs *regs, long ret) struct syscall_metadata *sys_data; struct ring_buffer_event *event; struct ring_buffer *buffer; + unsigned long irq_flags; + int pc; int syscall_nr; syscall_nr = trace_get_syscall_nr(current, regs); @@ -355,9 +363,13 @@ static void ftrace_syscall_exit(void *data, struct pt_regs *regs, long ret) if (!sys_data) return; + local_save_flags(irq_flags); + pc = preempt_count(); + buffer = tr->trace_buffer.buffer; event = trace_buffer_lock_reserve(buffer, - sys_data->exit_event->event.type, sizeof(*entry), 0, 0); + sys_data->exit_event->event.type, sizeof(*entry), + irq_flags, pc); if (!event) return; @@ -367,7 +379,8 @@ static void ftrace_syscall_exit(void *data, struct pt_regs *regs, long ret) if (!filter_current_check_discard(buffer, sys_data->exit_event, entry, event)) - trace_current_buffer_unlock_commit(buffer, event, 0, 0); + trace_current_buffer_unlock_commit(buffer, event, + irq_flags, pc); } static int reg_event_syscall_enter(struct ftrace_event_file *file, From b7f15519edb2e3c3d7d07d6a0780a4386ef23085 Mon Sep 17 00:00:00 2001 From: Alexander Z Lam Date: Mon, 1 Jul 2013 15:31:24 -0700 Subject: [PATCH 0189/1368] tracing: Make trace_marker use the correct per-instance buffer commit 2d71619c59fac95a5415a326162fa046161b938c upstream. The trace_marker file was present for each new instance created, but it added the trace mark to the global trace buffer instead of to the instance's buffer. Link: http://lkml.kernel.org/r/1372717885-4543-2-git-send-email-azl@google.com Signed-off-by: Alexander Z Lam Cc: David Sharp Cc: Vaibhav Nagarnaik Cc: Alexander Z Lam Signed-off-by: Steven Rostedt Signed-off-by: Greg Kroah-Hartman --- kernel/trace/trace.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index e71a8be4a6ee..b90c993462be 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -4328,6 +4328,7 @@ tracing_mark_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *fpos) { unsigned long addr = (unsigned long)ubuf; + struct trace_array *tr = filp->private_data; struct ring_buffer_event *event; struct ring_buffer *buffer; struct print_entry *entry; @@ -4387,7 +4388,7 @@ tracing_mark_write(struct file *filp, const char __user *ubuf, local_save_flags(irq_flags); size = sizeof(*entry) + cnt + 2; /* possible \n added */ - buffer = global_trace.trace_buffer.buffer; + buffer = tr->trace_buffer.buffer; event = trace_buffer_lock_reserve(buffer, TRACE_PRINT, size, irq_flags, preempt_count()); if (!event) { From 9713f78568d0053621530fb9cf06756394b4403c Mon Sep 17 00:00:00 2001 From: Alexander Z Lam Date: Mon, 1 Jul 2013 19:37:54 -0700 Subject: [PATCH 0190/1368] tracing: Protect ftrace_trace_arrays list in trace_events.c commit a82274151af2b075163e3c42c828529dee311487 upstream. There are multiple places where the ftrace_trace_arrays list is accessed in trace_events.c without the trace_types_lock held. Link: http://lkml.kernel.org/r/1372732674-22726-1-git-send-email-azl@google.com Signed-off-by: Alexander Z Lam Cc: Vaibhav Nagarnaik Cc: David Sharp Cc: Alexander Z Lam Signed-off-by: Steven Rostedt Signed-off-by: Greg Kroah-Hartman --- kernel/trace/trace.c | 2 +- kernel/trace/trace.h | 2 ++ kernel/trace/trace_events.c | 11 ++++++++++- 3 files changed, 13 insertions(+), 2 deletions(-) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index b90c993462be..10d3f0871b48 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -240,7 +240,7 @@ static struct tracer *trace_types __read_mostly; /* * trace_types_lock is used to protect the trace_types list. */ -static DEFINE_MUTEX(trace_types_lock); +DEFINE_MUTEX(trace_types_lock); /* * serialize the access of the ring buffer diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 20572ed88c5c..7944b9294599 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -224,6 +224,8 @@ enum { extern struct list_head ftrace_trace_arrays; +extern struct mutex trace_types_lock; + /* * The global tracer (top) should be the first trace array added, * but we check the flag anyway. diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index bf56c1d07df3..f82d92dbd614 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -1011,6 +1011,7 @@ static int subsystem_open(struct inode *inode, struct file *filp) int ret; /* Make sure the system still exists */ + mutex_lock(&trace_types_lock); mutex_lock(&event_mutex); list_for_each_entry(tr, &ftrace_trace_arrays, list) { list_for_each_entry(dir, &tr->systems, list) { @@ -1026,6 +1027,7 @@ static int subsystem_open(struct inode *inode, struct file *filp) } exit_loop: mutex_unlock(&event_mutex); + mutex_unlock(&trace_types_lock); if (!system) return -ENODEV; @@ -1620,6 +1622,7 @@ static void __add_event_to_tracers(struct ftrace_event_call *call, int trace_add_event_call(struct ftrace_event_call *call) { int ret; + mutex_lock(&trace_types_lock); mutex_lock(&event_mutex); ret = __register_event(call, NULL); @@ -1627,11 +1630,13 @@ int trace_add_event_call(struct ftrace_event_call *call) __add_event_to_tracers(call, NULL); mutex_unlock(&event_mutex); + mutex_unlock(&trace_types_lock); return ret; } /* - * Must be called under locking both of event_mutex and trace_event_sem. + * Must be called under locking of trace_types_lock, event_mutex and + * trace_event_sem. */ static void __trace_remove_event_call(struct ftrace_event_call *call) { @@ -1643,11 +1648,13 @@ static void __trace_remove_event_call(struct ftrace_event_call *call) /* Remove an event_call */ void trace_remove_event_call(struct ftrace_event_call *call) { + mutex_lock(&trace_types_lock); mutex_lock(&event_mutex); down_write(&trace_event_sem); __trace_remove_event_call(call); up_write(&trace_event_sem); mutex_unlock(&event_mutex); + mutex_unlock(&trace_types_lock); } #define for_each_event(event, start, end) \ @@ -1791,6 +1798,7 @@ static int trace_module_notify(struct notifier_block *self, { struct module *mod = data; + mutex_lock(&trace_types_lock); mutex_lock(&event_mutex); switch (val) { case MODULE_STATE_COMING: @@ -1801,6 +1809,7 @@ static int trace_module_notify(struct notifier_block *self, break; } mutex_unlock(&event_mutex); + mutex_unlock(&trace_types_lock); return 0; } From 59d8f48855856c5e2e112bab78f1b1e6a14c216b Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Mon, 1 Jul 2013 22:50:29 -0400 Subject: [PATCH 0191/1368] tracing: Add trace_array_get/put() to handle instance refs better commit ff451961a8b2a17667a7bfa39c86fb9b351445db upstream. Commit a695cb58162 "tracing: Prevent deleting instances when they are being read" tried to fix a race between deleting a trace instance and reading contents of a trace file. But it wasn't good enough. The following could crash the kernel: # cd /sys/kernel/debug/tracing/instances # ( while :; do mkdir foo; rmdir foo; done ) & # ( while :; do cat foo/trace &> /dev/null; done ) & Luckily this can only be done by root user, but it should be fixed regardless. The problem is that a delete of the file can happen after the reader starts to open the file but before it grabs the trace_types_mutex. The solution is to validate the trace array before using it. If the trace array does not exist in the list of trace arrays, then it returns -ENODEV. There's a possibility that a trace_array could be deleted and a new one created and the open would open its file instead. But that is very minor as it will just return the data of the new trace array, it may confuse the user but it will not crash the system. As this can only be done by root anyway, the race will only occur if root is deleting what its trying to read at the same time. Reported-by: Alexander Lam Signed-off-by: Steven Rostedt Signed-off-by: Greg Kroah-Hartman --- kernel/trace/trace.c | 83 ++++++++++++++++++++++++++++++++++---------- 1 file changed, 65 insertions(+), 18 deletions(-) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 10d3f0871b48..9d076a1ffa0b 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -193,6 +193,37 @@ static struct trace_array global_trace; LIST_HEAD(ftrace_trace_arrays); +int trace_array_get(struct trace_array *this_tr) +{ + struct trace_array *tr; + int ret = -ENODEV; + + mutex_lock(&trace_types_lock); + list_for_each_entry(tr, &ftrace_trace_arrays, list) { + if (tr == this_tr) { + tr->ref++; + ret = 0; + break; + } + } + mutex_unlock(&trace_types_lock); + + return ret; +} + +static void __trace_array_put(struct trace_array *this_tr) +{ + WARN_ON(!this_tr->ref); + this_tr->ref--; +} + +void trace_array_put(struct trace_array *this_tr) +{ + mutex_lock(&trace_types_lock); + __trace_array_put(this_tr); + mutex_unlock(&trace_types_lock); +} + int filter_current_check_discard(struct ring_buffer *buffer, struct ftrace_event_call *call, void *rec, struct ring_buffer_event *event) @@ -2768,10 +2799,9 @@ static const struct seq_operations tracer_seq_ops = { }; static struct trace_iterator * -__tracing_open(struct inode *inode, struct file *file, bool snapshot) +__tracing_open(struct trace_array *tr, struct trace_cpu *tc, + struct inode *inode, struct file *file, bool snapshot) { - struct trace_cpu *tc = inode->i_private; - struct trace_array *tr = tc->tr; struct trace_iterator *iter; int cpu; @@ -2850,8 +2880,6 @@ __tracing_open(struct inode *inode, struct file *file, bool snapshot) tracing_iter_reset(iter, cpu); } - tr->ref++; - mutex_unlock(&trace_types_lock); return iter; @@ -2881,17 +2909,20 @@ static int tracing_release(struct inode *inode, struct file *file) struct trace_array *tr; int cpu; - if (!(file->f_mode & FMODE_READ)) + /* Writes do not use seq_file, need to grab tr from inode */ + if (!(file->f_mode & FMODE_READ)) { + struct trace_cpu *tc = inode->i_private; + + trace_array_put(tc->tr); return 0; + } iter = m->private; tr = iter->tr; + trace_array_put(tr); mutex_lock(&trace_types_lock); - WARN_ON(!tr->ref); - tr->ref--; - for_each_tracing_cpu(cpu) { if (iter->buffer_iter[cpu]) ring_buffer_read_finish(iter->buffer_iter[cpu]); @@ -2910,20 +2941,23 @@ static int tracing_release(struct inode *inode, struct file *file) kfree(iter->trace); kfree(iter->buffer_iter); seq_release_private(inode, file); + return 0; } static int tracing_open(struct inode *inode, struct file *file) { + struct trace_cpu *tc = inode->i_private; + struct trace_array *tr = tc->tr; struct trace_iterator *iter; int ret = 0; + if (trace_array_get(tr) < 0) + return -ENODEV; + /* If this file was open for write, then erase contents */ if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) { - struct trace_cpu *tc = inode->i_private; - struct trace_array *tr = tc->tr; - if (tc->cpu == RING_BUFFER_ALL_CPUS) tracing_reset_online_cpus(&tr->trace_buffer); else @@ -2931,12 +2965,16 @@ static int tracing_open(struct inode *inode, struct file *file) } if (file->f_mode & FMODE_READ) { - iter = __tracing_open(inode, file, false); + iter = __tracing_open(tr, tc, inode, file, false); if (IS_ERR(iter)) ret = PTR_ERR(iter); else if (trace_flags & TRACE_ITER_LATENCY_FMT) iter->iter_flags |= TRACE_FILE_LAT_FMT; } + + if (ret < 0) + trace_array_put(tr); + return ret; } @@ -4512,12 +4550,16 @@ struct ftrace_buffer_info { static int tracing_snapshot_open(struct inode *inode, struct file *file) { struct trace_cpu *tc = inode->i_private; + struct trace_array *tr = tc->tr; struct trace_iterator *iter; struct seq_file *m; int ret = 0; + if (trace_array_get(tr) < 0) + return -ENODEV; + if (file->f_mode & FMODE_READ) { - iter = __tracing_open(inode, file, true); + iter = __tracing_open(tr, tc, inode, file, true); if (IS_ERR(iter)) ret = PTR_ERR(iter); } else { @@ -4530,13 +4572,16 @@ static int tracing_snapshot_open(struct inode *inode, struct file *file) kfree(m); return -ENOMEM; } - iter->tr = tc->tr; + iter->tr = tr; iter->trace_buffer = &tc->tr->max_buffer; iter->cpu_file = tc->cpu; m->private = iter; file->private_data = m; } + if (ret < 0) + trace_array_put(tr); + return ret; } @@ -4617,9 +4662,12 @@ tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt, static int tracing_snapshot_release(struct inode *inode, struct file *file) { struct seq_file *m = file->private_data; + int ret; + + ret = tracing_release(inode, file); if (file->f_mode & FMODE_READ) - return tracing_release(inode, file); + return ret; /* If write only, the seq_file is just a stub */ if (m) @@ -4864,8 +4912,7 @@ static int tracing_buffers_release(struct inode *inode, struct file *file) mutex_lock(&trace_types_lock); - WARN_ON(!iter->tr->ref); - iter->tr->ref--; + __trace_array_put(iter->tr); if (info->spare) ring_buffer_free_read_page(iter->trace_buffer->buffer, info->spare); From 6492334c86dfb441af456337dc3217c2a430f141 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Mon, 1 Jul 2013 23:34:22 -0400 Subject: [PATCH 0192/1368] tracing: Get trace_array ref counts when accessing trace files commit 7b85af63034818e43aee6c1d7bf1c7c6796a9073 upstream. When a trace file is opened that may access a trace array, it must increment its ref count to prevent it from being deleted. Reported-by: Alexander Lam Signed-off-by: Steven Rostedt Signed-off-by: Greg Kroah-Hartman --- kernel/trace/trace.c | 121 +++++++++++++++++++++++++++++++++++++++---- 1 file changed, 112 insertions(+), 9 deletions(-) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 9d076a1ffa0b..0b936d806659 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -2902,6 +2902,43 @@ int tracing_open_generic(struct inode *inode, struct file *filp) return 0; } +/* + * Open and update trace_array ref count. + * Must have the current trace_array passed to it. + */ +int tracing_open_generic_tr(struct inode *inode, struct file *filp) +{ + struct trace_array *tr = inode->i_private; + + if (tracing_disabled) + return -ENODEV; + + if (trace_array_get(tr) < 0) + return -ENODEV; + + filp->private_data = inode->i_private; + + return 0; + +} + +int tracing_open_generic_tc(struct inode *inode, struct file *filp) +{ + struct trace_cpu *tc = inode->i_private; + struct trace_array *tr = tc->tr; + + if (tracing_disabled) + return -ENODEV; + + if (trace_array_get(tr) < 0) + return -ENODEV; + + filp->private_data = inode->i_private; + + return 0; + +} + static int tracing_release(struct inode *inode, struct file *file) { struct seq_file *m = file->private_data; @@ -2945,6 +2982,32 @@ static int tracing_release(struct inode *inode, struct file *file) return 0; } +static int tracing_release_generic_tr(struct inode *inode, struct file *file) +{ + struct trace_array *tr = inode->i_private; + + trace_array_put(tr); + return 0; +} + +static int tracing_release_generic_tc(struct inode *inode, struct file *file) +{ + struct trace_cpu *tc = inode->i_private; + struct trace_array *tr = tc->tr; + + trace_array_put(tr); + return 0; +} + +static int tracing_single_release_tr(struct inode *inode, struct file *file) +{ + struct trace_array *tr = inode->i_private; + + trace_array_put(tr); + + return single_release(inode, file); +} + static int tracing_open(struct inode *inode, struct file *file) { struct trace_cpu *tc = inode->i_private; @@ -3331,9 +3394,14 @@ tracing_trace_options_write(struct file *filp, const char __user *ubuf, static int tracing_trace_options_open(struct inode *inode, struct file *file) { + struct trace_array *tr = inode->i_private; + if (tracing_disabled) return -ENODEV; + if (trace_array_get(tr) < 0) + return -ENODEV; + return single_open(file, tracing_trace_options_show, inode->i_private); } @@ -3341,7 +3409,7 @@ static const struct file_operations tracing_iter_fops = { .open = tracing_trace_options_open, .read = seq_read, .llseek = seq_lseek, - .release = single_release, + .release = tracing_single_release_tr, .write = tracing_trace_options_write, }; @@ -3829,6 +3897,9 @@ static int tracing_open_pipe(struct inode *inode, struct file *filp) if (tracing_disabled) return -ENODEV; + if (trace_array_get(tr) < 0) + return -ENODEV; + mutex_lock(&trace_types_lock); /* create a buffer to store the information to pass to userspace */ @@ -3881,6 +3952,7 @@ static int tracing_open_pipe(struct inode *inode, struct file *filp) fail: kfree(iter->trace); kfree(iter); + __trace_array_put(tr); mutex_unlock(&trace_types_lock); return ret; } @@ -3888,6 +3960,8 @@ static int tracing_open_pipe(struct inode *inode, struct file *filp) static int tracing_release_pipe(struct inode *inode, struct file *file) { struct trace_iterator *iter = file->private_data; + struct trace_cpu *tc = inode->i_private; + struct trace_array *tr = tc->tr; mutex_lock(&trace_types_lock); @@ -3901,6 +3975,8 @@ static int tracing_release_pipe(struct inode *inode, struct file *file) kfree(iter->trace); kfree(iter); + trace_array_put(tr); + return 0; } @@ -4358,6 +4434,8 @@ tracing_free_buffer_release(struct inode *inode, struct file *filp) /* resize the ring buffer to 0 */ tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS); + trace_array_put(tr); + return 0; } @@ -4534,10 +4612,20 @@ static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf, static int tracing_clock_open(struct inode *inode, struct file *file) { + struct trace_array *tr = inode->i_private; + int ret; + if (tracing_disabled) return -ENODEV; - return single_open(file, tracing_clock_show, inode->i_private); + if (trace_array_get(tr)) + return -ENODEV; + + ret = single_open(file, tracing_clock_show, inode->i_private); + if (ret < 0) + trace_array_put(tr); + + return ret; } struct ftrace_buffer_info { @@ -4733,34 +4821,38 @@ static const struct file_operations tracing_pipe_fops = { }; static const struct file_operations tracing_entries_fops = { - .open = tracing_open_generic, + .open = tracing_open_generic_tc, .read = tracing_entries_read, .write = tracing_entries_write, .llseek = generic_file_llseek, + .release = tracing_release_generic_tc, }; static const struct file_operations tracing_total_entries_fops = { - .open = tracing_open_generic, + .open = tracing_open_generic_tr, .read = tracing_total_entries_read, .llseek = generic_file_llseek, + .release = tracing_release_generic_tr, }; static const struct file_operations tracing_free_buffer_fops = { + .open = tracing_open_generic_tr, .write = tracing_free_buffer_write, .release = tracing_free_buffer_release, }; static const struct file_operations tracing_mark_fops = { - .open = tracing_open_generic, + .open = tracing_open_generic_tr, .write = tracing_mark_write, .llseek = generic_file_llseek, + .release = tracing_release_generic_tr, }; static const struct file_operations trace_clock_fops = { .open = tracing_clock_open, .read = seq_read, .llseek = seq_lseek, - .release = single_release, + .release = tracing_single_release_tr, .write = tracing_clock_write, }; @@ -4788,13 +4880,19 @@ static int tracing_buffers_open(struct inode *inode, struct file *filp) struct trace_cpu *tc = inode->i_private; struct trace_array *tr = tc->tr; struct ftrace_buffer_info *info; + int ret; if (tracing_disabled) return -ENODEV; + if (trace_array_get(tr) < 0) + return -ENODEV; + info = kzalloc(sizeof(*info), GFP_KERNEL); - if (!info) + if (!info) { + trace_array_put(tr); return -ENOMEM; + } mutex_lock(&trace_types_lock); @@ -4812,7 +4910,11 @@ static int tracing_buffers_open(struct inode *inode, struct file *filp) mutex_unlock(&trace_types_lock); - return nonseekable_open(inode, filp); + ret = nonseekable_open(inode, filp); + if (ret < 0) + trace_array_put(tr); + + return ret; } static unsigned int @@ -5707,9 +5809,10 @@ rb_simple_write(struct file *filp, const char __user *ubuf, } static const struct file_operations rb_simple_fops = { - .open = tracing_open_generic, + .open = tracing_open_generic_tr, .read = rb_simple_read, .write = rb_simple_write, + .release = tracing_release_generic_tr, .llseek = default_llseek, }; From 68cebd265c91873277cf100e7ac1d047c6598ddf Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Tue, 2 Jul 2013 14:48:23 -0400 Subject: [PATCH 0193/1368] tracing: Fix race between deleting buffer and setting events commit 2a6c24afab70dbcfee49f4c76e1511eec1a3298b upstream. While analyzing the code, I discovered that there's a potential race between deleting a trace instance and setting events. There are a few races that can occur if events are being traced as the buffer is being deleted. Mostly the problem comes with freeing the descriptor used by the trace event callback. To prevent problems like this, the events are disabled before the buffer is deleted. The problem with the current solution is that the event_mutex is let go between disabling the events and freeing the files, which means that the events could be enabled again while the freeing takes place. Signed-off-by: Steven Rostedt Signed-off-by: Greg Kroah-Hartman --- kernel/trace/trace_events.c | 23 +++++++++++++++++------ 1 file changed, 17 insertions(+), 6 deletions(-) diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index f82d92dbd614..32b9895af239 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -415,14 +415,14 @@ static void put_system(struct ftrace_subsystem_dir *dir) /* * __ftrace_set_clr_event(NULL, NULL, NULL, set) will set/unset all events. */ -static int __ftrace_set_clr_event(struct trace_array *tr, const char *match, - const char *sub, const char *event, int set) +static int +__ftrace_set_clr_event_nolock(struct trace_array *tr, const char *match, + const char *sub, const char *event, int set) { struct ftrace_event_file *file; struct ftrace_event_call *call; int ret = -EINVAL; - mutex_lock(&event_mutex); list_for_each_entry(file, &tr->events, list) { call = file->event_call; @@ -448,6 +448,17 @@ static int __ftrace_set_clr_event(struct trace_array *tr, const char *match, ret = 0; } + + return ret; +} + +static int __ftrace_set_clr_event(struct trace_array *tr, const char *match, + const char *sub, const char *event, int set) +{ + int ret; + + mutex_lock(&event_mutex); + ret = __ftrace_set_clr_event_nolock(tr, match, sub, event, set); mutex_unlock(&event_mutex); return ret; @@ -2367,11 +2378,11 @@ early_event_add_tracer(struct dentry *parent, struct trace_array *tr) int event_trace_del_tracer(struct trace_array *tr) { - /* Disable any running events */ - __ftrace_set_clr_event(tr, NULL, NULL, NULL, 0); - mutex_lock(&event_mutex); + /* Disable any running events */ + __ftrace_set_clr_event_nolock(tr, NULL, NULL, NULL, 0); + down_write(&trace_event_sem); __trace_remove_event_dirs(tr); debugfs_remove_recursive(tr->event_dir); From fc82a11a9ce5ddc1cc49ac7bb2a099b9b18b85c0 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Tue, 2 Jul 2013 15:30:53 -0400 Subject: [PATCH 0194/1368] tracing: Add trace_array_get/put() to event handling commit 8e2e2fa47129532a30cff6c25a47078dc97d9260 upstream. Commit a695cb58162 "tracing: Prevent deleting instances when they are being read" tried to fix a race between deleting a trace instance and reading contents of a trace file. But it wasn't good enough. The following could crash the kernel: # cd /sys/kernel/debug/tracing/instances # ( while :; do mkdir foo; rmdir foo; done ) & # ( while :; do echo 1 > foo/events/sched/sched_switch 2> /dev/null; done ) & Luckily this can only be done by root user, but it should be fixed regardless. The problem is that a delete of the file can happen after the write to the event is opened, but before the enabling happens. The solution is to make sure the trace_array is available before succeeding in opening for write, and incerment the ref counter while opened. Now the instance can be deleted when the events are writing to the buffer, but the deletion of the instance will disable all events before the instance is actually deleted. Reported-by: Alexander Lam Signed-off-by: Steven Rostedt Signed-off-by: Greg Kroah-Hartman --- kernel/trace/trace.h | 3 ++ kernel/trace/trace_events.c | 57 +++++++++++++++++++++++++++++++++---- 2 files changed, 55 insertions(+), 5 deletions(-) diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 7944b9294599..51b44483eb78 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -226,6 +226,9 @@ extern struct list_head ftrace_trace_arrays; extern struct mutex trace_types_lock; +extern int trace_array_get(struct trace_array *tr); +extern void trace_array_put(struct trace_array *tr); + /* * The global tracer (top) should be the first trace array added, * but we check the flag anyway. diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index 32b9895af239..6dfd48b5d1c0 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -412,6 +412,35 @@ static void put_system(struct ftrace_subsystem_dir *dir) mutex_unlock(&event_mutex); } +/* + * Open and update trace_array ref count. + * Must have the current trace_array passed to it. + */ +static int tracing_open_generic_file(struct inode *inode, struct file *filp) +{ + struct ftrace_event_file *file = inode->i_private; + struct trace_array *tr = file->tr; + int ret; + + if (trace_array_get(tr) < 0) + return -ENODEV; + + ret = tracing_open_generic(inode, filp); + if (ret < 0) + trace_array_put(tr); + return ret; +} + +static int tracing_release_generic_file(struct inode *inode, struct file *filp) +{ + struct ftrace_event_file *file = inode->i_private; + struct trace_array *tr = file->tr; + + trace_array_put(tr); + + return 0; +} + /* * __ftrace_set_clr_event(NULL, NULL, NULL, set) will set/unset all events. */ @@ -1046,9 +1075,17 @@ static int subsystem_open(struct inode *inode, struct file *filp) /* Some versions of gcc think dir can be uninitialized here */ WARN_ON(!dir); - ret = tracing_open_generic(inode, filp); - if (ret < 0) + /* Still need to increment the ref count of the system */ + if (trace_array_get(tr) < 0) { put_system(dir); + return -ENODEV; + } + + ret = tracing_open_generic(inode, filp); + if (ret < 0) { + trace_array_put(tr); + put_system(dir); + } return ret; } @@ -1059,16 +1096,23 @@ static int system_tr_open(struct inode *inode, struct file *filp) struct trace_array *tr = inode->i_private; int ret; + if (trace_array_get(tr) < 0) + return -ENODEV; + /* Make a temporary dir that has no system but points to tr */ dir = kzalloc(sizeof(*dir), GFP_KERNEL); - if (!dir) + if (!dir) { + trace_array_put(tr); return -ENOMEM; + } dir->tr = tr; ret = tracing_open_generic(inode, filp); - if (ret < 0) + if (ret < 0) { + trace_array_put(tr); kfree(dir); + } filp->private_data = dir; @@ -1079,6 +1123,8 @@ static int subsystem_release(struct inode *inode, struct file *file) { struct ftrace_subsystem_dir *dir = file->private_data; + trace_array_put(dir->tr); + /* * If dir->subsystem is NULL, then this is a temporary * descriptor that was made for a trace_array to enable @@ -1206,9 +1252,10 @@ static const struct file_operations ftrace_set_event_fops = { }; static const struct file_operations ftrace_enable_fops = { - .open = tracing_open_generic, + .open = tracing_open_generic_file, .read = event_enable_read, .write = event_enable_write, + .release = tracing_release_generic_file, .llseek = default_llseek, }; From 81a464838675307c9fc7909fc840f248ed2caed5 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Thu, 25 Jul 2013 15:16:45 -0700 Subject: [PATCH 0195/1368] Linux 3.10.3 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 43367309fc78..b5485529b6bf 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,6 @@ VERSION = 3 PATCHLEVEL = 10 -SUBLEVEL = 2 +SUBLEVEL = 3 EXTRAVERSION = NAME = Unicycling Gorilla From 9fa65e09730a2e63c5b45d008f269b2b271b74bc Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Fri, 28 Jun 2013 16:04:02 +0200 Subject: [PATCH 0196/1368] writeback: Fix periodic writeback after fs mount commit a5faeaf9109578e65e1a32e2a3e76c8b47e7dcb6 upstream. Code in blkdev.c moves a device inode to default_backing_dev_info when the last reference to the device is put and moves the device inode back to its bdi when the first reference is acquired. This includes moving to wb.b_dirty list if the device inode is dirty. The code however doesn't setup timer to wake corresponding flusher thread and while wb.b_dirty list is non-empty __mark_inode_dirty() will not set it up either. Thus periodic writeback is effectively disabled until a sync(2) call which can lead to unexpected data loss in case of crash or power failure. Fix the problem by setting up a timer for periodic writeback in case we add the first dirty inode to wb.b_dirty list in bdev_inode_switch_bdi(). Reported-by: Bert De Jonghe Signed-off-by: Jan Kara Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman --- fs/block_dev.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/fs/block_dev.c b/fs/block_dev.c index 2091db8cdd78..85f5c85ec91c 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -58,17 +58,24 @@ static void bdev_inode_switch_bdi(struct inode *inode, struct backing_dev_info *dst) { struct backing_dev_info *old = inode->i_data.backing_dev_info; + bool wakeup_bdi = false; if (unlikely(dst == old)) /* deadlock avoidance */ return; bdi_lock_two(&old->wb, &dst->wb); spin_lock(&inode->i_lock); inode->i_data.backing_dev_info = dst; - if (inode->i_state & I_DIRTY) + if (inode->i_state & I_DIRTY) { + if (bdi_cap_writeback_dirty(dst) && !wb_has_dirty_io(&dst->wb)) + wakeup_bdi = true; list_move(&inode->i_wb_list, &dst->wb.b_dirty); + } spin_unlock(&inode->i_lock); spin_unlock(&old->wb.list_lock); spin_unlock(&dst->wb.list_lock); + + if (wakeup_bdi) + bdi_wakeup_thread_delayed(dst); } /* Kill _all_ buffers and pagecache , dirty or not.. */ From b692b29868295c504e1e9c306762aa89ab3997a2 Mon Sep 17 00:00:00 2001 From: Olivier DANET Date: Wed, 10 Jul 2013 13:56:10 -0700 Subject: [PATCH 0197/1368] sparc32: vm_area_struct access for old Sun SPARCs. upstream commit 961246b4ed8da3bcf4ee1eb9147f341013553e3c. Commit e4c6bfd2d79d063017ab19a18915f0bc759f32d9 ("mm: rearrange vm_area_struct for fewer cache misses") changed the layout of the vm_area_struct structure, it broke several SPARC32 assembly routines which used numerical constants for accessing the vm_mm field. This patch defines the VMA_VM_MM constant to replace the immediate values. Signed-off-by: Olivier DANET Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- arch/sparc/kernel/asm-offsets.c | 2 ++ arch/sparc/mm/hypersparc.S | 8 ++++---- arch/sparc/mm/swift.S | 8 ++++---- arch/sparc/mm/tsunami.S | 6 +++--- arch/sparc/mm/viking.S | 10 +++++----- 5 files changed, 18 insertions(+), 16 deletions(-) diff --git a/arch/sparc/kernel/asm-offsets.c b/arch/sparc/kernel/asm-offsets.c index 961b87f99e69..f76389a32342 100644 --- a/arch/sparc/kernel/asm-offsets.c +++ b/arch/sparc/kernel/asm-offsets.c @@ -49,6 +49,8 @@ int foo(void) DEFINE(AOFF_task_thread, offsetof(struct task_struct, thread)); BLANK(); DEFINE(AOFF_mm_context, offsetof(struct mm_struct, context)); + BLANK(); + DEFINE(VMA_VM_MM, offsetof(struct vm_area_struct, vm_mm)); /* DEFINE(NUM_USER_SEGMENTS, TASK_SIZE>>28); */ return 0; diff --git a/arch/sparc/mm/hypersparc.S b/arch/sparc/mm/hypersparc.S index 44aad32eeb4e..969f96450f69 100644 --- a/arch/sparc/mm/hypersparc.S +++ b/arch/sparc/mm/hypersparc.S @@ -74,7 +74,7 @@ hypersparc_flush_cache_mm_out: /* The things we do for performance... */ hypersparc_flush_cache_range: - ld [%o0 + 0x0], %o0 /* XXX vma->vm_mm, GROSS XXX */ + ld [%o0 + VMA_VM_MM], %o0 #ifndef CONFIG_SMP ld [%o0 + AOFF_mm_context], %g1 cmp %g1, -1 @@ -163,7 +163,7 @@ hypersparc_flush_cache_range_out: */ /* Verified, my ass... */ hypersparc_flush_cache_page: - ld [%o0 + 0x0], %o0 /* XXX vma->vm_mm, GROSS XXX */ + ld [%o0 + VMA_VM_MM], %o0 ld [%o0 + AOFF_mm_context], %g2 #ifndef CONFIG_SMP cmp %g2, -1 @@ -284,7 +284,7 @@ hypersparc_flush_tlb_mm_out: sta %g5, [%g1] ASI_M_MMUREGS hypersparc_flush_tlb_range: - ld [%o0 + 0x00], %o0 /* XXX vma->vm_mm GROSS XXX */ + ld [%o0 + VMA_VM_MM], %o0 mov SRMMU_CTX_REG, %g1 ld [%o0 + AOFF_mm_context], %o3 lda [%g1] ASI_M_MMUREGS, %g5 @@ -307,7 +307,7 @@ hypersparc_flush_tlb_range_out: sta %g5, [%g1] ASI_M_MMUREGS hypersparc_flush_tlb_page: - ld [%o0 + 0x00], %o0 /* XXX vma->vm_mm GROSS XXX */ + ld [%o0 + VMA_VM_MM], %o0 mov SRMMU_CTX_REG, %g1 ld [%o0 + AOFF_mm_context], %o3 andn %o1, (PAGE_SIZE - 1), %o1 diff --git a/arch/sparc/mm/swift.S b/arch/sparc/mm/swift.S index c801c3953a00..5d2b88d39424 100644 --- a/arch/sparc/mm/swift.S +++ b/arch/sparc/mm/swift.S @@ -105,7 +105,7 @@ swift_flush_cache_mm_out: .globl swift_flush_cache_range swift_flush_cache_range: - ld [%o0 + 0x0], %o0 /* XXX vma->vm_mm, GROSS XXX */ + ld [%o0 + VMA_VM_MM], %o0 sub %o2, %o1, %o2 sethi %hi(4096), %o3 cmp %o2, %o3 @@ -116,7 +116,7 @@ swift_flush_cache_range: .globl swift_flush_cache_page swift_flush_cache_page: - ld [%o0 + 0x0], %o0 /* XXX vma->vm_mm, GROSS XXX */ + ld [%o0 + VMA_VM_MM], %o0 70: ld [%o0 + AOFF_mm_context], %g2 cmp %g2, -1 @@ -219,7 +219,7 @@ swift_flush_sig_insns: .globl swift_flush_tlb_range .globl swift_flush_tlb_all swift_flush_tlb_range: - ld [%o0 + 0x00], %o0 /* XXX vma->vm_mm GROSS XXX */ + ld [%o0 + VMA_VM_MM], %o0 swift_flush_tlb_mm: ld [%o0 + AOFF_mm_context], %g2 cmp %g2, -1 @@ -233,7 +233,7 @@ swift_flush_tlb_all_out: .globl swift_flush_tlb_page swift_flush_tlb_page: - ld [%o0 + 0x00], %o0 /* XXX vma->vm_mm GROSS XXX */ + ld [%o0 + VMA_VM_MM], %o0 mov SRMMU_CTX_REG, %g1 ld [%o0 + AOFF_mm_context], %o3 andn %o1, (PAGE_SIZE - 1), %o1 diff --git a/arch/sparc/mm/tsunami.S b/arch/sparc/mm/tsunami.S index 4e55e8f76648..bf10a345fa8b 100644 --- a/arch/sparc/mm/tsunami.S +++ b/arch/sparc/mm/tsunami.S @@ -24,7 +24,7 @@ /* Sliiick... */ tsunami_flush_cache_page: tsunami_flush_cache_range: - ld [%o0 + 0x0], %o0 /* XXX vma->vm_mm, GROSS XXX */ + ld [%o0 + VMA_VM_MM], %o0 tsunami_flush_cache_mm: ld [%o0 + AOFF_mm_context], %g2 cmp %g2, -1 @@ -46,7 +46,7 @@ tsunami_flush_sig_insns: /* More slick stuff... */ tsunami_flush_tlb_range: - ld [%o0 + 0x00], %o0 /* XXX vma->vm_mm GROSS XXX */ + ld [%o0 + VMA_VM_MM], %o0 tsunami_flush_tlb_mm: ld [%o0 + AOFF_mm_context], %g2 cmp %g2, -1 @@ -65,7 +65,7 @@ tsunami_flush_tlb_out: /* This one can be done in a fine grained manner... */ tsunami_flush_tlb_page: - ld [%o0 + 0x00], %o0 /* XXX vma->vm_mm GROSS XXX */ + ld [%o0 + VMA_VM_MM], %o0 mov SRMMU_CTX_REG, %g1 ld [%o0 + AOFF_mm_context], %o3 andn %o1, (PAGE_SIZE - 1), %o1 diff --git a/arch/sparc/mm/viking.S b/arch/sparc/mm/viking.S index bf8ee0613ae7..852257fcc82b 100644 --- a/arch/sparc/mm/viking.S +++ b/arch/sparc/mm/viking.S @@ -108,7 +108,7 @@ viking_mxcc_flush_page: viking_flush_cache_page: viking_flush_cache_range: #ifndef CONFIG_SMP - ld [%o0 + 0x0], %o0 /* XXX vma->vm_mm, GROSS XXX */ + ld [%o0 + VMA_VM_MM], %o0 #endif viking_flush_cache_mm: #ifndef CONFIG_SMP @@ -148,7 +148,7 @@ viking_flush_tlb_mm: #endif viking_flush_tlb_range: - ld [%o0 + 0x00], %o0 /* XXX vma->vm_mm GROSS XXX */ + ld [%o0 + VMA_VM_MM], %o0 mov SRMMU_CTX_REG, %g1 ld [%o0 + AOFF_mm_context], %o3 lda [%g1] ASI_M_MMUREGS, %g5 @@ -173,7 +173,7 @@ viking_flush_tlb_range: #endif viking_flush_tlb_page: - ld [%o0 + 0x00], %o0 /* XXX vma->vm_mm GROSS XXX */ + ld [%o0 + VMA_VM_MM], %o0 mov SRMMU_CTX_REG, %g1 ld [%o0 + AOFF_mm_context], %o3 lda [%g1] ASI_M_MMUREGS, %g5 @@ -239,7 +239,7 @@ sun4dsmp_flush_tlb_range: tst %g5 bne 3f mov SRMMU_CTX_REG, %g1 - ld [%o0 + 0x00], %o0 /* XXX vma->vm_mm GROSS XXX */ + ld [%o0 + VMA_VM_MM], %o0 ld [%o0 + AOFF_mm_context], %o3 lda [%g1] ASI_M_MMUREGS, %g5 sethi %hi(~((1 << SRMMU_PGDIR_SHIFT) - 1)), %o4 @@ -265,7 +265,7 @@ sun4dsmp_flush_tlb_page: tst %g5 bne 2f mov SRMMU_CTX_REG, %g1 - ld [%o0 + 0x00], %o0 /* XXX vma->vm_mm GROSS XXX */ + ld [%o0 + VMA_VM_MM], %o0 ld [%o0 + AOFF_mm_context], %o3 lda [%g1] ASI_M_MMUREGS, %g5 and %o1, PAGE_MASK, %o1 From ce08aa04215ea3309c47677f55d3b52d31d6a97a Mon Sep 17 00:00:00 2001 From: Hannes Frederic Sowa Date: Thu, 27 Jun 2013 22:46:04 +0200 Subject: [PATCH 0198/1368] ipv6: only apply anti-spoofing checks to not-pointopoint tunnels [ Upstream commit 5c29fb12e8fb8a8105ea048cb160fd79a85a52bb ] Because of commit 218774dc341f219bfcf940304a081b121a0e8099 ("ipv6: add anti-spoofing checks for 6to4 and 6rd") the sit driver dropped packets for 2002::/16 destinations and sources even when configured to work as a tunnel with fixed endpoint. We may only apply the 6rd/6to4 anti-spoofing checks if the device is not in pointopoint mode. This was an oversight from me in the above commit, sorry. Thanks to Roman Mamedov for reporting this! Reported-by: Roman Mamedov Cc: David Miller Cc: YOSHIFUJI Hideaki Signed-off-by: Hannes Frederic Sowa Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv6/sit.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index 335363478bbf..60df36d15390 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -589,7 +589,7 @@ static int ipip6_rcv(struct sk_buff *skb) tunnel->dev->stats.rx_errors++; goto out; } - } else { + } else if (!(tunnel->dev->flags&IFF_POINTOPOINT)) { if (is_spoofed_6rd(tunnel, iph->saddr, &ipv6_hdr(skb)->saddr) || is_spoofed_6rd(tunnel, iph->daddr, From ac294f13d331cb3e315fd922ae505619f853818b Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 28 Jun 2013 02:37:42 -0700 Subject: [PATCH 0199/1368] neighbour: fix a race in neigh_destroy() [ Upstream commit c9ab4d85de222f3390c67aedc9c18a50e767531e ] There is a race in neighbour code, because neigh_destroy() uses skb_queue_purge(&neigh->arp_queue) without holding neighbour lock, while other parts of the code assume neighbour rwlock is what protects arp_queue Convert all skb_queue_purge() calls to the __skb_queue_purge() variant Use __skb_queue_head_init() instead of skb_queue_head_init() to make clear we do not use arp_queue.lock And hold neigh->lock in neigh_destroy() to close the race. Reported-by: Joe Jin Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/core/neighbour.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/net/core/neighbour.c b/net/core/neighbour.c index 5c56b217b999..ce90b0264db5 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -231,7 +231,7 @@ static void neigh_flush_dev(struct neigh_table *tbl, struct net_device *dev) we must kill timers etc. and move it to safe state. */ - skb_queue_purge(&n->arp_queue); + __skb_queue_purge(&n->arp_queue); n->arp_queue_len_bytes = 0; n->output = neigh_blackhole; if (n->nud_state & NUD_VALID) @@ -286,7 +286,7 @@ static struct neighbour *neigh_alloc(struct neigh_table *tbl, struct net_device if (!n) goto out_entries; - skb_queue_head_init(&n->arp_queue); + __skb_queue_head_init(&n->arp_queue); rwlock_init(&n->lock); seqlock_init(&n->ha_lock); n->updated = n->used = now; @@ -708,7 +708,9 @@ void neigh_destroy(struct neighbour *neigh) if (neigh_del_timer(neigh)) pr_warn("Impossible event\n"); - skb_queue_purge(&neigh->arp_queue); + write_lock_bh(&neigh->lock); + __skb_queue_purge(&neigh->arp_queue); + write_unlock_bh(&neigh->lock); neigh->arp_queue_len_bytes = 0; if (dev->netdev_ops->ndo_neigh_destroy) @@ -858,7 +860,7 @@ static void neigh_invalidate(struct neighbour *neigh) neigh->ops->error_report(neigh, skb); write_lock(&neigh->lock); } - skb_queue_purge(&neigh->arp_queue); + __skb_queue_purge(&neigh->arp_queue); neigh->arp_queue_len_bytes = 0; } @@ -1210,7 +1212,7 @@ int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new, write_lock_bh(&neigh->lock); } - skb_queue_purge(&neigh->arp_queue); + __skb_queue_purge(&neigh->arp_queue); neigh->arp_queue_len_bytes = 0; } out: From ea4c218f2c3e67f1bca8078ff1f80d7e0c2ab791 Mon Sep 17 00:00:00 2001 From: Dave Jones Date: Fri, 28 Jun 2013 12:13:52 -0400 Subject: [PATCH 0200/1368] x25: Fix broken locking in ioctl error paths. [ Upstream commit 4ccb93ce7439b63c31bc7597bfffd13567fa483d ] Two of the x25 ioctl cases have error paths that break out of the function without unlocking the socket, leading to this warning: ================================================ [ BUG: lock held when returning to user space! ] 3.10.0-rc7+ #36 Not tainted ------------------------------------------------ trinity-child2/31407 is leaving the kernel with locks still held! 1 lock held by trinity-child2/31407: #0: (sk_lock-AF_X25){+.+.+.}, at: [] x25_ioctl+0x8a/0x740 [x25] Signed-off-by: Dave Jones Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/x25/af_x25.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c index 37ca9694aabe..22c88d2e6846 100644 --- a/net/x25/af_x25.c +++ b/net/x25/af_x25.c @@ -1583,11 +1583,11 @@ static int x25_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) case SIOCX25CALLACCPTAPPRV: { rc = -EINVAL; lock_sock(sk); - if (sk->sk_state != TCP_CLOSE) - break; - clear_bit(X25_ACCPT_APPRV_FLAG, &x25->flags); + if (sk->sk_state == TCP_CLOSE) { + clear_bit(X25_ACCPT_APPRV_FLAG, &x25->flags); + rc = 0; + } release_sock(sk); - rc = 0; break; } @@ -1595,14 +1595,15 @@ static int x25_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) rc = -EINVAL; lock_sock(sk); if (sk->sk_state != TCP_ESTABLISHED) - break; + goto out_sendcallaccpt_release; /* must call accptapprv above */ if (test_bit(X25_ACCPT_APPRV_FLAG, &x25->flags)) - break; + goto out_sendcallaccpt_release; x25_write_internal(sk, X25_CALL_ACCEPTED); x25->state = X25_STATE_3; - release_sock(sk); rc = 0; +out_sendcallaccpt_release: + release_sock(sk); break; } From 51778da544ca660615ea02b09d395430159b4d0c Mon Sep 17 00:00:00 2001 From: Changli Gao Date: Sat, 29 Jun 2013 00:15:51 +0800 Subject: [PATCH 0201/1368] net: Swap ver and type in pppoe_hdr [ Upstream commit b1a5a34bd0b8767ea689e68f8ea513e9710b671e ] Ver and type in pppoe_hdr should be swapped as defined by RFC2516 section-4. Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- include/uapi/linux/if_pppox.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/uapi/linux/if_pppox.h b/include/uapi/linux/if_pppox.h index 0b46fd57c8f6..e36a4aecd311 100644 --- a/include/uapi/linux/if_pppox.h +++ b/include/uapi/linux/if_pppox.h @@ -135,11 +135,11 @@ struct pppoe_tag { struct pppoe_hdr { #if defined(__LITTLE_ENDIAN_BITFIELD) - __u8 ver : 4; __u8 type : 4; + __u8 ver : 4; #elif defined(__BIG_ENDIAN_BITFIELD) - __u8 type : 4; __u8 ver : 4; + __u8 type : 4; #else #error "Please fix " #endif From 3d3fa8bca30d8fd9cf5b95bd3eb9d7e2a50dec1c Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Sat, 29 Jun 2013 12:02:59 +0800 Subject: [PATCH 0202/1368] gre: fix a regression in ioctl [ Upstream commit 6c734fb8592f6768170e48e7102cb2f0a1bb9759 ] When testing GRE tunnel, I got: # ip tunnel show get tunnel gre0 failed: Invalid argument get tunnel gre1 failed: Invalid argument This is a regression introduced by commit c54419321455631079c7d ("GRE: Refactor GRE tunneling code.") because previously we only check the parameters for SIOCADDTUNNEL and SIOCCHGTUNNEL, after that commit, the check is moved for all commands. So, just check for SIOCADDTUNNEL and SIOCCHGTUNNEL. After this patch I got: # ip tunnel show gre0: gre/ip remote any local any ttl inherit nopmtudisc gre1: gre/ip remote 192.168.122.101 local 192.168.122.45 ttl inherit Signed-off-by: Cong Wang Cc: Pravin B Shelar Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/ip_gre.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index 2a83591492dd..855004f08325 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -503,10 +503,11 @@ static int ipgre_tunnel_ioctl(struct net_device *dev, if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) return -EFAULT; - if (p.iph.version != 4 || p.iph.protocol != IPPROTO_GRE || - p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF)) || - ((p.i_flags|p.o_flags)&(GRE_VERSION|GRE_ROUTING))) { - return -EINVAL; + if (cmd == SIOCADDTUNNEL || cmd == SIOCCHGTUNNEL) { + if (p.iph.version != 4 || p.iph.protocol != IPPROTO_GRE || + p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF)) || + ((p.i_flags|p.o_flags)&(GRE_VERSION|GRE_ROUTING))) + return -EINVAL; } p.i_flags = gre_flags_to_tnl_flags(p.i_flags); p.o_flags = gre_flags_to_tnl_flags(p.o_flags); From da04e7df0494a9a6ccfe2aada8a262e38c284914 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Sat, 29 Jun 2013 13:00:57 +0800 Subject: [PATCH 0203/1368] vti: remove duplicated code to fix a memory leak [ Upstream commit ab6c7a0a43c2eaafa57583822b619b22637b49c7 ] vti module allocates dev->tstats twice: in vti_fb_tunnel_init() and in vti_tunnel_init(), this lead to a memory leak of dev->tstats. Just remove the duplicated operations in vti_fb_tunnel_init(). (candidate for -stable) Signed-off-by: Cong Wang Cc: Stephen Hemminger Cc: Saurabh Mohan Acked-by: Stephen Hemminger Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/ip_vti.c | 7 ------- 1 file changed, 7 deletions(-) diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c index c118f6b576bb..17cc0ffa8c0d 100644 --- a/net/ipv4/ip_vti.c +++ b/net/ipv4/ip_vti.c @@ -606,17 +606,10 @@ static int __net_init vti_fb_tunnel_init(struct net_device *dev) struct iphdr *iph = &tunnel->parms.iph; struct vti_net *ipn = net_generic(dev_net(dev), vti_net_id); - tunnel->dev = dev; - strcpy(tunnel->parms.name, dev->name); - iph->version = 4; iph->protocol = IPPROTO_IPIP; iph->ihl = 5; - dev->tstats = alloc_percpu(struct pcpu_tstats); - if (!dev->tstats) - return -ENOMEM; - dev_hold(dev); rcu_assign_pointer(ipn->tunnels_wc[0], tunnel); return 0; From 36bddbad5049d3f3916abc9f594526d47ad3ab84 Mon Sep 17 00:00:00 2001 From: Amerigo Wang Date: Sat, 29 Jun 2013 21:30:49 +0800 Subject: [PATCH 0204/1368] ipv6,mcast: always hold idev->lock before mca_lock [ Upstream commit 8965779d2c0e6ab246c82a405236b1fb2adae6b2, with some bits from commit b7b1bfce0bb68bd8f6e62a28295922785cc63781 ("ipv6: split duplicate address detection and router solicitation timer") to get the __ipv6_get_lladdr() used by this patch. ] dingtianhong reported the following deadlock detected by lockdep: ====================================================== [ INFO: possible circular locking dependency detected ] 3.4.24.05-0.1-default #1 Not tainted ------------------------------------------------------- ksoftirqd/0/3 is trying to acquire lock: (&ndev->lock){+.+...}, at: [] ipv6_get_lladdr+0x74/0x120 but task is already holding lock: (&mc->mca_lock){+.+...}, at: [] mld_send_report+0x40/0x150 which lock already depends on the new lock. the existing dependency chain (in reverse order) is: -> #1 (&mc->mca_lock){+.+...}: [] validate_chain+0x637/0x730 [] __lock_acquire+0x2f7/0x500 [] lock_acquire+0x114/0x150 [] rt_spin_lock+0x4a/0x60 [] igmp6_group_added+0x3b/0x120 [] ipv6_mc_up+0x38/0x60 [] ipv6_find_idev+0x3d/0x80 [] addrconf_notify+0x3d5/0x4b0 [] notifier_call_chain+0x3f/0x80 [] raw_notifier_call_chain+0x11/0x20 [] call_netdevice_notifiers+0x32/0x60 [] __dev_notify_flags+0x34/0x80 [] dev_change_flags+0x40/0x70 [] do_setlink+0x237/0x8a0 [] rtnl_newlink+0x3ec/0x600 [] rtnetlink_rcv_msg+0x160/0x310 [] netlink_rcv_skb+0x89/0xb0 [] rtnetlink_rcv+0x27/0x40 [] netlink_unicast+0x140/0x180 [] netlink_sendmsg+0x33e/0x380 [] sock_sendmsg+0x112/0x130 [] __sys_sendmsg+0x44e/0x460 [] sys_sendmsg+0x44/0x70 [] system_call_fastpath+0x16/0x1b -> #0 (&ndev->lock){+.+...}: [] check_prev_add+0x3de/0x440 [] validate_chain+0x637/0x730 [] __lock_acquire+0x2f7/0x500 [] lock_acquire+0x114/0x150 [] rt_read_lock+0x42/0x60 [] ipv6_get_lladdr+0x74/0x120 [] mld_newpack+0xb6/0x160 [] add_grhead+0xab/0xc0 [] add_grec+0x3ab/0x460 [] mld_send_report+0x5a/0x150 [] igmp6_timer_handler+0x4e/0xb0 [] call_timer_fn+0xca/0x1d0 [] run_timer_softirq+0x1df/0x2e0 [] handle_pending_softirqs+0xf7/0x1f0 [] __do_softirq_common+0x7b/0xf0 [] __thread_do_softirq+0x1af/0x210 [] run_ksoftirqd+0xe1/0x1f0 [] kthread+0xae/0xc0 [] kernel_thread_helper+0x4/0x10 actually we can just hold idev->lock before taking pmc->mca_lock, and avoid taking idev->lock again when iterating idev->addr_list, since the upper callers of mld_newpack() already take read_lock_bh(&idev->lock). Reported-by: dingtianhong Cc: dingtianhong Cc: Hideaki YOSHIFUJI Cc: David S. Miller Cc: Hannes Frederic Sowa Tested-by: Ding Tianhong Tested-by: Chen Weilong Signed-off-by: Cong Wang Acked-by: Hannes Frederic Sowa Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- include/net/addrconf.h | 3 +++ net/ipv6/addrconf.c | 28 ++++++++++++++++++---------- net/ipv6/mcast.c | 18 ++++++++++-------- 3 files changed, 31 insertions(+), 18 deletions(-) diff --git a/include/net/addrconf.h b/include/net/addrconf.h index 21f702704f24..01b1a1ad77d2 100644 --- a/include/net/addrconf.h +++ b/include/net/addrconf.h @@ -86,6 +86,9 @@ extern int ipv6_dev_get_saddr(struct net *net, const struct in6_addr *daddr, unsigned int srcprefs, struct in6_addr *saddr); +extern int __ipv6_get_lladdr(struct inet6_dev *idev, + struct in6_addr *addr, + unsigned char banned_flags); extern int ipv6_get_lladdr(struct net_device *dev, struct in6_addr *addr, unsigned char banned_flags); diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 4ab4c38958c6..fb8c94c4ab86 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -1448,6 +1448,23 @@ int ipv6_dev_get_saddr(struct net *net, const struct net_device *dst_dev, } EXPORT_SYMBOL(ipv6_dev_get_saddr); +int __ipv6_get_lladdr(struct inet6_dev *idev, struct in6_addr *addr, + unsigned char banned_flags) +{ + struct inet6_ifaddr *ifp; + int err = -EADDRNOTAVAIL; + + list_for_each_entry(ifp, &idev->addr_list, if_list) { + if (ifp->scope == IFA_LINK && + !(ifp->flags & banned_flags)) { + *addr = ifp->addr; + err = 0; + break; + } + } + return err; +} + int ipv6_get_lladdr(struct net_device *dev, struct in6_addr *addr, unsigned char banned_flags) { @@ -1457,17 +1474,8 @@ int ipv6_get_lladdr(struct net_device *dev, struct in6_addr *addr, rcu_read_lock(); idev = __in6_dev_get(dev); if (idev) { - struct inet6_ifaddr *ifp; - read_lock_bh(&idev->lock); - list_for_each_entry(ifp, &idev->addr_list, if_list) { - if (ifp->scope == IFA_LINK && - !(ifp->flags & banned_flags)) { - *addr = ifp->addr; - err = 0; - break; - } - } + err = __ipv6_get_lladdr(idev, addr, banned_flags); read_unlock_bh(&idev->lock); } rcu_read_unlock(); diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c index bfa6cc36ef2a..c3998c2bbc5a 100644 --- a/net/ipv6/mcast.c +++ b/net/ipv6/mcast.c @@ -1343,8 +1343,9 @@ static void ip6_mc_hdr(struct sock *sk, struct sk_buff *skb, hdr->daddr = *daddr; } -static struct sk_buff *mld_newpack(struct net_device *dev, int size) +static struct sk_buff *mld_newpack(struct inet6_dev *idev, int size) { + struct net_device *dev = idev->dev; struct net *net = dev_net(dev); struct sock *sk = net->ipv6.igmp_sk; struct sk_buff *skb; @@ -1369,7 +1370,7 @@ static struct sk_buff *mld_newpack(struct net_device *dev, int size) skb_reserve(skb, hlen); - if (ipv6_get_lladdr(dev, &addr_buf, IFA_F_TENTATIVE)) { + if (__ipv6_get_lladdr(idev, &addr_buf, IFA_F_TENTATIVE)) { /* : * use unspecified address as the source address * when a valid link-local address is not available. @@ -1465,7 +1466,7 @@ static struct sk_buff *add_grhead(struct sk_buff *skb, struct ifmcaddr6 *pmc, struct mld2_grec *pgr; if (!skb) - skb = mld_newpack(dev, dev->mtu); + skb = mld_newpack(pmc->idev, dev->mtu); if (!skb) return NULL; pgr = (struct mld2_grec *)skb_put(skb, sizeof(struct mld2_grec)); @@ -1485,7 +1486,8 @@ static struct sk_buff *add_grhead(struct sk_buff *skb, struct ifmcaddr6 *pmc, static struct sk_buff *add_grec(struct sk_buff *skb, struct ifmcaddr6 *pmc, int type, int gdeleted, int sdeleted) { - struct net_device *dev = pmc->idev->dev; + struct inet6_dev *idev = pmc->idev; + struct net_device *dev = idev->dev; struct mld2_report *pmr; struct mld2_grec *pgr = NULL; struct ip6_sf_list *psf, *psf_next, *psf_prev, **psf_list; @@ -1514,7 +1516,7 @@ static struct sk_buff *add_grec(struct sk_buff *skb, struct ifmcaddr6 *pmc, AVAILABLE(skb) < grec_size(pmc, type, gdeleted, sdeleted)) { if (skb) mld_sendpack(skb); - skb = mld_newpack(dev, dev->mtu); + skb = mld_newpack(idev, dev->mtu); } } first = 1; @@ -1541,7 +1543,7 @@ static struct sk_buff *add_grec(struct sk_buff *skb, struct ifmcaddr6 *pmc, pgr->grec_nsrcs = htons(scount); if (skb) mld_sendpack(skb); - skb = mld_newpack(dev, dev->mtu); + skb = mld_newpack(idev, dev->mtu); first = 1; scount = 0; } @@ -1596,8 +1598,8 @@ static void mld_send_report(struct inet6_dev *idev, struct ifmcaddr6 *pmc) struct sk_buff *skb = NULL; int type; + read_lock_bh(&idev->lock); if (!pmc) { - read_lock_bh(&idev->lock); for (pmc=idev->mc_list; pmc; pmc=pmc->next) { if (pmc->mca_flags & MAF_NOREPORT) continue; @@ -1609,7 +1611,6 @@ static void mld_send_report(struct inet6_dev *idev, struct ifmcaddr6 *pmc) skb = add_grec(skb, pmc, type, 0, 0); spin_unlock_bh(&pmc->mca_lock); } - read_unlock_bh(&idev->lock); } else { spin_lock_bh(&pmc->mca_lock); if (pmc->mca_sfcount[MCAST_EXCLUDE]) @@ -1619,6 +1620,7 @@ static void mld_send_report(struct inet6_dev *idev, struct ifmcaddr6 *pmc) skb = add_grec(skb, pmc, type, 0, 0); spin_unlock_bh(&pmc->mca_lock); } + read_unlock_bh(&idev->lock); if (skb) mld_sendpack(skb); } From edb42cad3392ba6a1cd3a688cfb7af16d6b2137f Mon Sep 17 00:00:00 2001 From: Pravin B Shelar Date: Tue, 2 Jul 2013 10:57:33 -0700 Subject: [PATCH 0205/1368] ip_tunnels: Use skb-len to PMTU check. [ Upstream commit 23a3647bc4f93bac3776c66dc2c7f7f68b3cd662 ] In path mtu check, ip header total length works for gre device but not for gre-tap device. Use skb len which is consistent for all tunneling types. This is old bug in gre. This also fixes mtu calculation bug introduced by commit c54419321455631079c7d (GRE: Refactor GRE tunneling code). Reported-by: Timo Teras Signed-off-by: Pravin B Shelar Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/ip_tunnel.c | 97 ++++++++++++++++++++++++-------------------- 1 file changed, 54 insertions(+), 43 deletions(-) diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c index 7fa8f08fa7ae..d05bd02886ed 100644 --- a/net/ipv4/ip_tunnel.c +++ b/net/ipv4/ip_tunnel.c @@ -486,6 +486,53 @@ int ip_tunnel_rcv(struct ip_tunnel *tunnel, struct sk_buff *skb, } EXPORT_SYMBOL_GPL(ip_tunnel_rcv); +static int tnl_update_pmtu(struct net_device *dev, struct sk_buff *skb, + struct rtable *rt, __be16 df) +{ + struct ip_tunnel *tunnel = netdev_priv(dev); + int pkt_size = skb->len - tunnel->hlen; + int mtu; + + if (df) + mtu = dst_mtu(&rt->dst) - dev->hard_header_len + - sizeof(struct iphdr) - tunnel->hlen; + else + mtu = skb_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu; + + if (skb_dst(skb)) + skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu); + + if (skb->protocol == htons(ETH_P_IP)) { + if (!skb_is_gso(skb) && + (df & htons(IP_DF)) && mtu < pkt_size) { + icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu)); + return -E2BIG; + } + } +#if IS_ENABLED(CONFIG_IPV6) + else if (skb->protocol == htons(ETH_P_IPV6)) { + struct rt6_info *rt6 = (struct rt6_info *)skb_dst(skb); + + if (rt6 && mtu < dst_mtu(skb_dst(skb)) && + mtu >= IPV6_MIN_MTU) { + if ((tunnel->parms.iph.daddr && + !ipv4_is_multicast(tunnel->parms.iph.daddr)) || + rt6->rt6i_dst.plen == 128) { + rt6->rt6i_flags |= RTF_MODIFIED; + dst_metric_set(skb_dst(skb), RTAX_MTU, mtu); + } + } + + if (!skb_is_gso(skb) && mtu >= IPV6_MIN_MTU && + mtu < pkt_size) { + icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); + return -E2BIG; + } + } +#endif + return 0; +} + void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, const struct iphdr *tnl_params) { @@ -499,7 +546,6 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, struct net_device *tdev; /* Device to other host */ unsigned int max_headroom; /* The extra header space needed */ __be32 dst; - int mtu; inner_iph = (const struct iphdr *)skb_inner_network_header(skb); @@ -579,50 +625,11 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, goto tx_error; } - df = tnl_params->frag_off; - if (df) - mtu = dst_mtu(&rt->dst) - dev->hard_header_len - - sizeof(struct iphdr); - else - mtu = skb_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu; - - if (skb_dst(skb)) - skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu); - - if (skb->protocol == htons(ETH_P_IP)) { - df |= (inner_iph->frag_off&htons(IP_DF)); - - if (!skb_is_gso(skb) && - (inner_iph->frag_off&htons(IP_DF)) && - mtu < ntohs(inner_iph->tot_len)) { - icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu)); - ip_rt_put(rt); - goto tx_error; - } + if (tnl_update_pmtu(dev, skb, rt, tnl_params->frag_off)) { + ip_rt_put(rt); + goto tx_error; } -#if IS_ENABLED(CONFIG_IPV6) - else if (skb->protocol == htons(ETH_P_IPV6)) { - struct rt6_info *rt6 = (struct rt6_info *)skb_dst(skb); - - if (rt6 && mtu < dst_mtu(skb_dst(skb)) && - mtu >= IPV6_MIN_MTU) { - if ((tunnel->parms.iph.daddr && - !ipv4_is_multicast(tunnel->parms.iph.daddr)) || - rt6->rt6i_dst.plen == 128) { - rt6->rt6i_flags |= RTF_MODIFIED; - dst_metric_set(skb_dst(skb), RTAX_MTU, mtu); - } - } - - if (!skb_is_gso(skb) && mtu >= IPV6_MIN_MTU && - mtu < skb->len) { - icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); - ip_rt_put(rt); - goto tx_error; - } - } -#endif if (tunnel->err_count > 0) { if (time_before(jiffies, @@ -646,6 +653,10 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, ttl = ip4_dst_hoplimit(&rt->dst); } + df = tnl_params->frag_off; + if (skb->protocol == htons(ETH_P_IP)) + df |= (inner_iph->frag_off&htons(IP_DF)); + max_headroom = LL_RESERVED_SPACE(tdev) + sizeof(struct iphdr) + rt->dst.header_len; if (max_headroom > dev->needed_headroom) { From ed7f614ab8c605aadfa163d468a980642b3929da Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Tue, 2 Jul 2013 09:02:07 +0800 Subject: [PATCH 0206/1368] l2tp: add missing .owner to struct pppox_proto [ Upstream commit e1558a93b61962710733dc8c11a2bc765607f1cd ] Add missing .owner of struct pppox_proto. This prevents the module from being removed from underneath its users. Signed-off-by: Wei Yongjun Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/l2tp/l2tp_ppp.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c index 8dec6876dc50..5ebee2ded9e9 100644 --- a/net/l2tp/l2tp_ppp.c +++ b/net/l2tp/l2tp_ppp.c @@ -1793,7 +1793,8 @@ static const struct proto_ops pppol2tp_ops = { static const struct pppox_proto pppol2tp_proto = { .create = pppol2tp_create, - .ioctl = pppol2tp_ioctl + .ioctl = pppol2tp_ioctl, + .owner = THIS_MODULE, }; #ifdef CONFIG_L2TP_V3 From 0e7eadef8d9f5c3ebb664afcbaaad031321bfa58 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Tue, 2 Jul 2013 14:49:34 +0800 Subject: [PATCH 0207/1368] ipip: fix a regression in ioctl [ Upstream commit 3b7b514f44bff05d26a6499c4d4fac2a83938e6e ] This is a regression introduced by commit fd58156e456d9f68fe0448 (IPIP: Use ip-tunneling code.) Similar to GRE tunnel, previously we only check the parameters for SIOCADDTUNNEL and SIOCCHGTUNNEL, after that commit, the check is moved for all commands. So, just check for SIOCADDTUNNEL and SIOCCHGTUNNEL. Also, the check for i_key, o_key etc. is suspicious too, which did not exist before, reset them before passing to ip_tunnel_ioctl(). Signed-off-by: Cong Wang Cc: Pravin B Shelar Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/ipip.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c index 77bfcce64fe5..7cfc45624b6d 100644 --- a/net/ipv4/ipip.c +++ b/net/ipv4/ipip.c @@ -240,11 +240,13 @@ ipip_tunnel_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) return -EFAULT; - if (p.iph.version != 4 || p.iph.protocol != IPPROTO_IPIP || - p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF))) - return -EINVAL; - if (p.i_key || p.o_key || p.i_flags || p.o_flags) - return -EINVAL; + if (cmd == SIOCADDTUNNEL || cmd == SIOCCHGTUNNEL) { + if (p.iph.version != 4 || p.iph.protocol != IPPROTO_IPIP || + p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF))) + return -EINVAL; + } + + p.i_key = p.o_key = p.i_flags = p.o_flags = 0; if (p.iph.ttl) p.iph.frag_off |= htons(IP_DF); From 07243c3d963dc56facf2512f51783448f5d9cf6f Mon Sep 17 00:00:00 2001 From: Hannes Frederic Sowa Date: Mon, 1 Jul 2013 20:21:30 +0200 Subject: [PATCH 0208/1368] ipv6: call udp_push_pending_frames when uncorking a socket with AF_INET pending data [ Upstream commit 8822b64a0fa64a5dd1dfcf837c5b0be83f8c05d1 ] We accidentally call down to ip6_push_pending_frames when uncorking pending AF_INET data on a ipv6 socket. This results in the following splat (from Dave Jones): skbuff: skb_under_panic: text:ffffffff816765f6 len:48 put:40 head:ffff88013deb6df0 data:ffff88013deb6dec tail:0x2c end:0xc0 dev: ------------[ cut here ]------------ kernel BUG at net/core/skbuff.c:126! invalid opcode: 0000 [#1] PREEMPT SMP DEBUG_PAGEALLOC Modules linked in: dccp_ipv4 dccp 8021q garp bridge stp dlci mpoa snd_seq_dummy sctp fuse hidp tun bnep nfnetlink scsi_transport_iscsi rfcomm can_raw can_bcm af_802154 appletalk caif_socket can caif ipt_ULOG x25 rose af_key pppoe pppox ipx phonet irda llc2 ppp_generic slhc p8023 psnap p8022 llc crc_ccitt atm bluetooth +netrom ax25 nfc rfkill rds af_rxrpc coretemp hwmon kvm_intel kvm crc32c_intel snd_hda_codec_realtek ghash_clmulni_intel microcode pcspkr snd_hda_codec_hdmi snd_hda_intel snd_hda_codec snd_hwdep usb_debug snd_seq snd_seq_device snd_pcm e1000e snd_page_alloc snd_timer ptp snd pps_core soundcore xfs libcrc32c CPU: 2 PID: 8095 Comm: trinity-child2 Not tainted 3.10.0-rc7+ #37 task: ffff8801f52c2520 ti: ffff8801e6430000 task.ti: ffff8801e6430000 RIP: 0010:[] [] skb_panic+0x63/0x65 RSP: 0018:ffff8801e6431de8 EFLAGS: 00010282 RAX: 0000000000000086 RBX: ffff8802353d3cc0 RCX: 0000000000000006 RDX: 0000000000003b90 RSI: ffff8801f52c2ca0 RDI: ffff8801f52c2520 RBP: ffff8801e6431e08 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000001 R11: 0000000000000001 R12: ffff88022ea0c800 R13: ffff88022ea0cdf8 R14: ffff8802353ecb40 R15: ffffffff81cc7800 FS: 00007f5720a10740(0000) GS:ffff880244c00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000005862000 CR3: 000000022843c000 CR4: 00000000001407e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000600 Stack: ffff88013deb6dec 000000000000002c 00000000000000c0 ffffffff81a3f6e4 ffff8801e6431e18 ffffffff8159a9aa ffff8801e6431e90 ffffffff816765f6 ffffffff810b756b 0000000700000002 ffff8801e6431e40 0000fea9292aa8c0 Call Trace: [] skb_push+0x3a/0x40 [] ip6_push_pending_frames+0x1f6/0x4d0 [] ? mark_held_locks+0xbb/0x140 [] udp_v6_push_pending_frames+0x2b9/0x3d0 [] ? udplite_getfrag+0x20/0x20 [] udp_lib_setsockopt+0x1aa/0x1f0 [] ? fget_light+0x387/0x4f0 [] udpv6_setsockopt+0x34/0x40 [] sock_common_setsockopt+0x14/0x20 [] SyS_setsockopt+0x71/0xd0 [] tracesys+0xdd/0xe2 Code: 00 00 48 89 44 24 10 8b 87 d8 00 00 00 48 89 44 24 08 48 8b 87 e8 00 00 00 48 c7 c7 c0 04 aa 81 48 89 04 24 31 c0 e8 e1 7e ff ff <0f> 0b 55 48 89 e5 0f 0b 55 48 89 e5 0f 0b 55 48 89 e5 0f 0b 55 RIP [] skb_panic+0x63/0x65 RSP This patch adds a check if the pending data is of address family AF_INET and directly calls udp_push_ending_frames from udp_v6_push_pending_frames if that is the case. This bug was found by Dave Jones with trinity. (Also move the initialization of fl6 below the AF_INET check, even if not strictly necessary.) Signed-off-by: Hannes Frederic Sowa Cc: Dave Jones Cc: YOSHIFUJI Hideaki Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- include/net/udp.h | 1 + net/ipv4/udp.c | 3 ++- net/ipv6/udp.c | 7 ++++++- 3 files changed, 9 insertions(+), 2 deletions(-) diff --git a/include/net/udp.h b/include/net/udp.h index 065f379c6503..ad99eedc6168 100644 --- a/include/net/udp.h +++ b/include/net/udp.h @@ -181,6 +181,7 @@ extern int udp_get_port(struct sock *sk, unsigned short snum, extern void udp_err(struct sk_buff *, u32); extern int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, size_t len); +extern int udp_push_pending_frames(struct sock *sk); extern void udp_flush_pending_frames(struct sock *sk); extern int udp_rcv(struct sk_buff *skb); extern int udp_ioctl(struct sock *sk, int cmd, unsigned long arg); diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 0bf5d399a03c..93b731d53221 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -799,7 +799,7 @@ static int udp_send_skb(struct sk_buff *skb, struct flowi4 *fl4) /* * Push out all pending data as one UDP datagram. Socket is locked. */ -static int udp_push_pending_frames(struct sock *sk) +int udp_push_pending_frames(struct sock *sk) { struct udp_sock *up = udp_sk(sk); struct inet_sock *inet = inet_sk(sk); @@ -818,6 +818,7 @@ static int udp_push_pending_frames(struct sock *sk) up->pending = 0; return err; } +EXPORT_SYMBOL(udp_push_pending_frames); int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, size_t len) diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 42923b14dfa6..e7b28f9bb02b 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -955,11 +955,16 @@ static int udp_v6_push_pending_frames(struct sock *sk) struct udphdr *uh; struct udp_sock *up = udp_sk(sk); struct inet_sock *inet = inet_sk(sk); - struct flowi6 *fl6 = &inet->cork.fl.u.ip6; + struct flowi6 *fl6; int err = 0; int is_udplite = IS_UDPLITE(sk); __wsum csum = 0; + if (up->pending == AF_INET) + return udp_push_pending_frames(sk); + + fl6 = &inet->cork.fl.u.ip6; + /* Grab the skbuff where UDP header space exists. */ if ((skb = skb_peek(&sk->sk_write_queue)) == NULL) goto out; From 7852c5bf367a01828edd02974c0a185b1be7b577 Mon Sep 17 00:00:00 2001 From: Hannes Frederic Sowa Date: Tue, 2 Jul 2013 08:04:05 +0200 Subject: [PATCH 0209/1368] ipv6: ip6_append_data_mtu did not care about pmtudisc and frag_size [ Upstream commit 75a493e60ac4bbe2e977e7129d6d8cbb0dd236be ] If the socket had an IPV6_MTU value set, ip6_append_data_mtu lost track of this when appending the second frame on a corked socket. This results in the following splat: [37598.993962] ------------[ cut here ]------------ [37598.994008] kernel BUG at net/core/skbuff.c:2064! [37598.994008] invalid opcode: 0000 [#1] SMP [37598.994008] Modules linked in: tcp_lp uvcvideo videobuf2_vmalloc videobuf2_memops videobuf2_core videodev media vfat fat usb_storage fuse ebtable_nat xt_CHECKSUM bridge stp llc ipt_MASQUERADE nf_conntrack_netbios_ns nf_conntrack_broadcast ip6table_mangle ip6t_REJECT nf_conntrack_ipv6 nf_defrag_ipv6 iptable_nat +nf_nat_ipv4 nf_nat iptable_mangle nf_conntrack_ipv4 nf_defrag_ipv4 xt_conntrack nf_conntrack ebtable_filter ebtables ip6table_filter ip6_tables be2iscsi iscsi_boot_sysfs bnx2i cnic uio cxgb4i cxgb4 cxgb3i cxgb3 mdio libcxgbi ib_iser rdma_cm ib_addr iw_cm ib_cm ib_sa ib_mad ib_core iscsi_tcp libiscsi_tcp libiscsi +scsi_transport_iscsi rfcomm bnep iTCO_wdt iTCO_vendor_support snd_hda_codec_conexant arc4 iwldvm mac80211 snd_hda_intel acpi_cpufreq mperf coretemp snd_hda_codec microcode cdc_wdm cdc_acm [37598.994008] snd_hwdep cdc_ether snd_seq snd_seq_device usbnet mii joydev btusb snd_pcm bluetooth i2c_i801 e1000e lpc_ich mfd_core ptp iwlwifi pps_core snd_page_alloc mei cfg80211 snd_timer thinkpad_acpi snd tpm_tis soundcore rfkill tpm tpm_bios vhost_net tun macvtap macvlan kvm_intel kvm uinput binfmt_misc +dm_crypt i915 i2c_algo_bit drm_kms_helper drm i2c_core wmi video [37598.994008] CPU 0 [37598.994008] Pid: 27320, comm: t2 Not tainted 3.9.6-200.fc18.x86_64 #1 LENOVO 27744PG/27744PG [37598.994008] RIP: 0010:[] [] skb_copy_and_csum_bits+0x325/0x330 [37598.994008] RSP: 0018:ffff88003670da18 EFLAGS: 00010202 [37598.994008] RAX: ffff88018105c018 RBX: 0000000000000004 RCX: 00000000000006c0 [37598.994008] RDX: ffff88018105a6c0 RSI: ffff88018105a000 RDI: ffff8801e1b0aa00 [37598.994008] RBP: ffff88003670da78 R08: 0000000000000000 R09: ffff88018105c040 [37598.994008] R10: ffff8801e1b0aa00 R11: 0000000000000000 R12: 000000000000fff8 [37598.994008] R13: 00000000000004fc R14: 00000000ffff0504 R15: 0000000000000000 [37598.994008] FS: 00007f28eea59740(0000) GS:ffff88023bc00000(0000) knlGS:0000000000000000 [37598.994008] CS: 0010 DS: 0000 ES: 0000 CR0: 000000008005003b [37598.994008] CR2: 0000003d935789e0 CR3: 00000000365cb000 CR4: 00000000000407f0 [37598.994008] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [37598.994008] DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400 [37598.994008] Process t2 (pid: 27320, threadinfo ffff88003670c000, task ffff88022c162ee0) [37598.994008] Stack: [37598.994008] ffff88022e098a00 ffff88020f973fc0 0000000000000008 00000000000004c8 [37598.994008] ffff88020f973fc0 00000000000004c4 ffff88003670da78 ffff8801e1b0a200 [37598.994008] 0000000000000018 00000000000004c8 ffff88020f973fc0 00000000000004c4 [37598.994008] Call Trace: [37598.994008] [] ip6_append_data+0xccf/0xfe0 [37598.994008] [] ? ip_copy_metadata+0x1a0/0x1a0 [37598.994008] [] ? _raw_spin_lock_bh+0x16/0x40 [37598.994008] [] udpv6_sendmsg+0x1ed/0xc10 [37598.994008] [] ? sock_has_perm+0x75/0x90 [37598.994008] [] inet_sendmsg+0x63/0xb0 [37598.994008] [] ? selinux_socket_sendmsg+0x23/0x30 [37598.994008] [] sock_sendmsg+0xb0/0xe0 [37598.994008] [] ? __switch_to+0x181/0x4a0 [37598.994008] [] sys_sendto+0x12d/0x180 [37598.994008] [] ? __audit_syscall_entry+0x94/0xf0 [37598.994008] [] ? syscall_trace_enter+0x231/0x240 [37598.994008] [] tracesys+0xdd/0xe2 [37598.994008] Code: fe 07 00 00 48 c7 c7 04 28 a6 81 89 45 a0 4c 89 4d b8 44 89 5d a8 e8 1b ac b1 ff 44 8b 5d a8 4c 8b 4d b8 8b 45 a0 e9 cf fe ff ff <0f> 0b 66 0f 1f 84 00 00 00 00 00 66 66 66 66 90 55 48 89 e5 48 [37598.994008] RIP [] skb_copy_and_csum_bits+0x325/0x330 [37598.994008] RSP [37599.007323] ---[ end trace d69f6a17f8ac8eee ]--- While there, also check if path mtu discovery is activated for this socket. The logic was adapted from ip6_append_data when first writing on the corked socket. This bug was introduced with commit 0c1833797a5a6ec23ea9261d979aa18078720b74 ("ipv6: fix incorrect ipsec fragment"). v2: a) Replace IPV6_PMTU_DISC_DO with IPV6_PMTUDISC_PROBE. b) Don't pass ipv6_pinfo to ip6_append_data_mtu (suggestion by Gao feng, thanks!). c) Change mtu to unsigned int, else we get a warning about non-matching types because of the min()-macro type-check. Acked-by: Gao feng Cc: YOSHIFUJI Hideaki Signed-off-by: Hannes Frederic Sowa Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv6/ip6_output.c | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index d5d20cde8d92..6e3ddf806ec2 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -1098,11 +1098,12 @@ static inline struct ipv6_rt_hdr *ip6_rthdr_dup(struct ipv6_rt_hdr *src, return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL; } -static void ip6_append_data_mtu(int *mtu, +static void ip6_append_data_mtu(unsigned int *mtu, int *maxfraglen, unsigned int fragheaderlen, struct sk_buff *skb, - struct rt6_info *rt) + struct rt6_info *rt, + bool pmtuprobe) { if (!(rt->dst.flags & DST_XFRM_TUNNEL)) { if (skb == NULL) { @@ -1114,7 +1115,9 @@ static void ip6_append_data_mtu(int *mtu, * this fragment is not first, the headers * space is regarded as data space. */ - *mtu = dst_mtu(rt->dst.path); + *mtu = min(*mtu, pmtuprobe ? + rt->dst.dev->mtu : + dst_mtu(rt->dst.path)); } *maxfraglen = ((*mtu - fragheaderlen) & ~7) + fragheaderlen - sizeof(struct frag_hdr); @@ -1131,11 +1134,10 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to, struct ipv6_pinfo *np = inet6_sk(sk); struct inet_cork *cork; struct sk_buff *skb, *skb_prev = NULL; - unsigned int maxfraglen, fragheaderlen; + unsigned int maxfraglen, fragheaderlen, mtu; int exthdrlen; int dst_exthdrlen; int hh_len; - int mtu; int copy; int err; int offset = 0; @@ -1292,7 +1294,9 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to, /* update mtu and maxfraglen if necessary */ if (skb == NULL || skb_prev == NULL) ip6_append_data_mtu(&mtu, &maxfraglen, - fragheaderlen, skb, rt); + fragheaderlen, skb, rt, + np->pmtudisc == + IPV6_PMTUDISC_PROBE); skb_prev = skb; From c3a5491228f90fd82e67b7ccd2fcbaf984d44c07 Mon Sep 17 00:00:00 2001 From: Hannes Frederic Sowa Date: Wed, 3 Jul 2013 20:45:04 +0200 Subject: [PATCH 0210/1368] ipv6: rt6_check_neigh should successfully verify neigh if no NUD information are available [ Upstream commit 3630d40067a21d4dfbadc6002bb469ce26ac5d52 ] After the removal of rt->n we do not create a neighbour entry at route insertion time (rt6_bind_neighbour is gone). As long as no neighbour is created because of "useful traffic" we skip this routing entry because rt6_check_neigh cannot pick up a valid neighbour (neigh == NULL) and thus returns false. This change was introduced by commit 887c95cc1da53f66a5890fdeab13414613010097 ("ipv6: Complete neighbour entry removal from dst_entry.") To quote RFC4191: "If the host has no information about the router's reachability, then the host assumes the router is reachable." and also: "A host MUST NOT probe a router's reachability in the absence of useful traffic that the host would have sent to the router if it were reachable." So, just assume the router is reachable and let's rt6_probe do the rest. We don't need to create a neighbour on route insertion time. If we don't compile with CONFIG_IPV6_ROUTER_PREF (RFC4191 support) a neighbour is only valid if its nud_state is NUD_VALID. I did not find any references that we should probe the router on route insertion time via the other RFCs. So skip this route in that case. v2: a) use IS_ENABLED instead of #ifdefs (thanks to Sergei Shtylyov) Reported-by: Pierre Emeriaud Cc: YOSHIFUJI Hideaki Signed-off-by: Hannes Frederic Sowa Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv6/route.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/ipv6/route.c b/net/ipv6/route.c index ad0aa6b0b86a..7f1332fc4346 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -547,6 +547,8 @@ static inline bool rt6_check_neigh(struct rt6_info *rt) ret = true; #endif read_unlock(&neigh->lock); + } else if (IS_ENABLED(CONFIG_IPV6_ROUTER_PREF)) { + ret = true; } rcu_read_unlock_bh(); From 1c6d3d1d21e522c9722cd6bdfd57afb171786028 Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Thu, 4 Jul 2013 23:48:46 +0100 Subject: [PATCH 0211/1368] sfc: Fix memory leak when discarding scattered packets [ Upstream commit 734d4e159b283a4ae4d007b7e7a91d84398ccb92 ] Commit 2768935a4660 ('sfc: reuse pages to avoid DMA mapping/unmapping costs') did not fully take account of DMA scattering which was introduced immediately before. If a received packet is invalid and must be discarded, we only drop a reference to the first buffer's page, but we need to drop a reference for each buffer the packet used. I think this bug was missed partly because efx_recycle_rx_buffers() was not renamed and so no longer does what its name says. It does not change the state of buffers, but only prepares the underlying pages for recycling. Rename it accordingly. Signed-off-by: Ben Hutchings Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/sfc/rx.c | 27 ++++++++++++++++++++------- 1 file changed, 20 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/sfc/rx.c b/drivers/net/ethernet/sfc/rx.c index a7dfe36cabf4..5173eaac5bca 100644 --- a/drivers/net/ethernet/sfc/rx.c +++ b/drivers/net/ethernet/sfc/rx.c @@ -282,9 +282,9 @@ static void efx_fini_rx_buffer(struct efx_rx_queue *rx_queue, } /* Recycle the pages that are used by buffers that have just been received. */ -static void efx_recycle_rx_buffers(struct efx_channel *channel, - struct efx_rx_buffer *rx_buf, - unsigned int n_frags) +static void efx_recycle_rx_pages(struct efx_channel *channel, + struct efx_rx_buffer *rx_buf, + unsigned int n_frags) { struct efx_rx_queue *rx_queue = efx_channel_get_rx_queue(channel); @@ -294,6 +294,20 @@ static void efx_recycle_rx_buffers(struct efx_channel *channel, } while (--n_frags); } +static void efx_discard_rx_packet(struct efx_channel *channel, + struct efx_rx_buffer *rx_buf, + unsigned int n_frags) +{ + struct efx_rx_queue *rx_queue = efx_channel_get_rx_queue(channel); + + efx_recycle_rx_pages(channel, rx_buf, n_frags); + + do { + efx_free_rx_buffer(rx_buf); + rx_buf = efx_rx_buf_next(rx_queue, rx_buf); + } while (--n_frags); +} + /** * efx_fast_push_rx_descriptors - push new RX descriptors quickly * @rx_queue: RX descriptor queue @@ -533,8 +547,7 @@ void efx_rx_packet(struct efx_rx_queue *rx_queue, unsigned int index, */ if (unlikely(rx_buf->flags & EFX_RX_PKT_DISCARD)) { efx_rx_flush_packet(channel); - put_page(rx_buf->page); - efx_recycle_rx_buffers(channel, rx_buf, n_frags); + efx_discard_rx_packet(channel, rx_buf, n_frags); return; } @@ -570,9 +583,9 @@ void efx_rx_packet(struct efx_rx_queue *rx_queue, unsigned int index, efx_sync_rx_buffer(efx, rx_buf, rx_buf->len); } - /* All fragments have been DMA-synced, so recycle buffers and pages. */ + /* All fragments have been DMA-synced, so recycle pages. */ rx_buf = efx_rx_buffer(rx_queue, index); - efx_recycle_rx_buffers(channel, rx_buf, n_frags); + efx_recycle_rx_pages(channel, rx_buf, n_frags); /* Pipeline receives so that we give time for packet headers to be * prefetched into cache. From d1a2a1a6efee375e9ac00ec0a718503fd05b4302 Mon Sep 17 00:00:00 2001 From: Jongsung Kim Date: Tue, 9 Jul 2013 17:36:00 +0900 Subject: [PATCH 0212/1368] net/cadence/macb: fix bug/typo in extracting gem_irq_read_clear bit [ Upstream commit 01276ed2424eb78c95461545410923d5da154d31 ] Signed-off-by: Jongsung Kim Acked-by: Nicolas Ferre Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/cadence/macb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/cadence/macb.c b/drivers/net/ethernet/cadence/macb.c index c89aa41dd448..b4e0dc832c69 100644 --- a/drivers/net/ethernet/cadence/macb.c +++ b/drivers/net/ethernet/cadence/macb.c @@ -1070,7 +1070,7 @@ static void macb_configure_dma(struct macb *bp) static void macb_configure_caps(struct macb *bp) { if (macb_is_gem(bp)) { - if (GEM_BF(IRQCOR, gem_readl(bp, DCFG1)) == 0) + if (GEM_BFEXT(IRQCOR, gem_readl(bp, DCFG1)) == 0) bp->caps |= MACB_CAPS_ISR_CLEAR_ON_WRITE; } } From c23b1ece6112ed0f227fdc881db33c6427b65222 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Tue, 9 Jul 2013 13:19:18 +0300 Subject: [PATCH 0213/1368] virtio: support unlocked queue poll [ Upstream commit cc229884d3f77ec3b1240e467e0236c3e0647c0c ] This adds a way to check ring empty state after enable_cb outside any locks. Will be used by virtio_net. Note: there's room for more optimization: caller is likely to have a memory barrier already, which means we might be able to get rid of a barrier here. Deferring this optimization until we do some benchmarking. Signed-off-by: Michael S. Tsirkin Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/virtio/virtio_ring.c | 70 ++++++++++++++++++++++++++---------- include/linux/virtio.h | 4 +++ 2 files changed, 55 insertions(+), 19 deletions(-) diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c index 5217baf5528c..37d58f84dc50 100644 --- a/drivers/virtio/virtio_ring.c +++ b/drivers/virtio/virtio_ring.c @@ -606,6 +606,55 @@ void virtqueue_disable_cb(struct virtqueue *_vq) } EXPORT_SYMBOL_GPL(virtqueue_disable_cb); +/** + * virtqueue_enable_cb_prepare - restart callbacks after disable_cb + * @vq: the struct virtqueue we're talking about. + * + * This re-enables callbacks; it returns current queue state + * in an opaque unsigned value. This value should be later tested by + * virtqueue_poll, to detect a possible race between the driver checking for + * more work, and enabling callbacks. + * + * Caller must ensure we don't call this with other virtqueue + * operations at the same time (except where noted). + */ +unsigned virtqueue_enable_cb_prepare(struct virtqueue *_vq) +{ + struct vring_virtqueue *vq = to_vvq(_vq); + u16 last_used_idx; + + START_USE(vq); + + /* We optimistically turn back on interrupts, then check if there was + * more to do. */ + /* Depending on the VIRTIO_RING_F_EVENT_IDX feature, we need to + * either clear the flags bit or point the event index at the next + * entry. Always do both to keep code simple. */ + vq->vring.avail->flags &= ~VRING_AVAIL_F_NO_INTERRUPT; + vring_used_event(&vq->vring) = last_used_idx = vq->last_used_idx; + END_USE(vq); + return last_used_idx; +} +EXPORT_SYMBOL_GPL(virtqueue_enable_cb_prepare); + +/** + * virtqueue_poll - query pending used buffers + * @vq: the struct virtqueue we're talking about. + * @last_used_idx: virtqueue state (from call to virtqueue_enable_cb_prepare). + * + * Returns "true" if there are pending used buffers in the queue. + * + * This does not need to be serialized. + */ +bool virtqueue_poll(struct virtqueue *_vq, unsigned last_used_idx) +{ + struct vring_virtqueue *vq = to_vvq(_vq); + + virtio_mb(vq->weak_barriers); + return (u16)last_used_idx != vq->vring.used->idx; +} +EXPORT_SYMBOL_GPL(virtqueue_poll); + /** * virtqueue_enable_cb - restart callbacks after disable_cb. * @vq: the struct virtqueue we're talking about. @@ -619,25 +668,8 @@ EXPORT_SYMBOL_GPL(virtqueue_disable_cb); */ bool virtqueue_enable_cb(struct virtqueue *_vq) { - struct vring_virtqueue *vq = to_vvq(_vq); - - START_USE(vq); - - /* We optimistically turn back on interrupts, then check if there was - * more to do. */ - /* Depending on the VIRTIO_RING_F_EVENT_IDX feature, we need to - * either clear the flags bit or point the event index at the next - * entry. Always do both to keep code simple. */ - vq->vring.avail->flags &= ~VRING_AVAIL_F_NO_INTERRUPT; - vring_used_event(&vq->vring) = vq->last_used_idx; - virtio_mb(vq->weak_barriers); - if (unlikely(more_used(vq))) { - END_USE(vq); - return false; - } - - END_USE(vq); - return true; + unsigned last_used_idx = virtqueue_enable_cb_prepare(_vq); + return !virtqueue_poll(_vq, last_used_idx); } EXPORT_SYMBOL_GPL(virtqueue_enable_cb); diff --git a/include/linux/virtio.h b/include/linux/virtio.h index 9ff8645b7e0b..72398eea6e86 100644 --- a/include/linux/virtio.h +++ b/include/linux/virtio.h @@ -70,6 +70,10 @@ void virtqueue_disable_cb(struct virtqueue *vq); bool virtqueue_enable_cb(struct virtqueue *vq); +unsigned virtqueue_enable_cb_prepare(struct virtqueue *vq); + +bool virtqueue_poll(struct virtqueue *vq, unsigned); + bool virtqueue_enable_cb_delayed(struct virtqueue *vq); void *virtqueue_detach_unused_buf(struct virtqueue *vq); From 2b0e8a4ff83fd85afe5d0b84a2e1c6faa5fa56b8 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Tue, 9 Jul 2013 08:13:04 +0300 Subject: [PATCH 0214/1368] virtio_net: fix race in RX VQ processing [ Upstream commit cbdadbbf0c790f79350a8f36029208944c5487d0 ] virtio net called virtqueue_enable_cq on RX path after napi_complete, so with NAPI_STATE_SCHED clear - outside the implicit napi lock. This violates the requirement to synchronize virtqueue_enable_cq wrt virtqueue_add_buf. In particular, used event can move backwards, causing us to lose interrupts. In a debug build, this can trigger panic within START_USE. Jason Wang reports that he can trigger the races artificially, by adding udelay() in virtqueue_enable_cb() after virtio_mb(). However, we must call napi_complete to clear NAPI_STATE_SCHED before polling the virtqueue for used buffers, otherwise napi_schedule_prep in a callback will fail, causing us to lose RX events. To fix, call virtqueue_enable_cb_prepare with NAPI_STATE_SCHED set (under napi lock), later call virtqueue_poll with NAPI_STATE_SCHED clear (outside the lock). Reported-by: Jason Wang Tested-by: Jason Wang Acked-by: Jason Wang Signed-off-by: Michael S. Tsirkin Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/virtio_net.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index c9e00387d999..42d670a468f8 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -602,7 +602,7 @@ static int virtnet_poll(struct napi_struct *napi, int budget) container_of(napi, struct receive_queue, napi); struct virtnet_info *vi = rq->vq->vdev->priv; void *buf; - unsigned int len, received = 0; + unsigned int r, len, received = 0; again: while (received < budget && @@ -619,8 +619,9 @@ static int virtnet_poll(struct napi_struct *napi, int budget) /* Out of packets? */ if (received < budget) { + r = virtqueue_enable_cb_prepare(rq->vq); napi_complete(napi); - if (unlikely(!virtqueue_enable_cb(rq->vq)) && + if (unlikely(virtqueue_poll(rq->vq, r)) && napi_schedule_prep(napi)) { virtqueue_disable_cb(rq->vq); __napi_schedule(napi); From f5ce1d2513b74d4603769ed99636dc52144f02c4 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Tue, 25 Jun 2013 17:29:46 +0300 Subject: [PATCH 0215/1368] vhost-net: fix use-after-free in vhost_net_flush [ Upstream commit c38e39c378f46f00ce922dd40a91043a9925c28d ] vhost_net_ubuf_put_and_wait has a confusing name: it will actually also free it's argument. Thus since commit 1280c27f8e29acf4af2da914e80ec27c3dbd5c01 "vhost-net: flush outstanding DMAs on memory change" vhost_net_flush tries to use the argument after passing it to vhost_net_ubuf_put_and_wait, this results in use after free. To fix, don't free the argument in vhost_net_ubuf_put_and_wait, add an new API for callers that want to free ubufs. Acked-by: Asias He Acked-by: Jason Wang Signed-off-by: Michael S. Tsirkin Signed-off-by: Greg Kroah-Hartman --- drivers/vhost/net.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c index f80d3dd41d8c..8ca5ac71b845 100644 --- a/drivers/vhost/net.c +++ b/drivers/vhost/net.c @@ -150,6 +150,11 @@ static void vhost_net_ubuf_put_and_wait(struct vhost_net_ubuf_ref *ubufs) { kref_put(&ubufs->kref, vhost_net_zerocopy_done_signal); wait_event(ubufs->wait, !atomic_read(&ubufs->kref.refcount)); +} + +static void vhost_net_ubuf_put_wait_and_free(struct vhost_net_ubuf_ref *ubufs) +{ + vhost_net_ubuf_put_and_wait(ubufs); kfree(ubufs); } @@ -948,7 +953,7 @@ static long vhost_net_set_backend(struct vhost_net *n, unsigned index, int fd) mutex_unlock(&vq->mutex); if (oldubufs) { - vhost_net_ubuf_put_and_wait(oldubufs); + vhost_net_ubuf_put_wait_and_free(oldubufs); mutex_lock(&vq->mutex); vhost_zerocopy_signal_used(n, vq); mutex_unlock(&vq->mutex); @@ -966,7 +971,7 @@ static long vhost_net_set_backend(struct vhost_net *n, unsigned index, int fd) rcu_assign_pointer(vq->private_data, oldsock); vhost_net_enable_vq(n, vq); if (ubufs) - vhost_net_ubuf_put_and_wait(ubufs); + vhost_net_ubuf_put_wait_and_free(ubufs); err_ubufs: fput(sock->file); err_vq: From c51a7a308e2ff38ae5ad3005a7ce8c333779f8fd Mon Sep 17 00:00:00 2001 From: Dave Kleikamp Date: Mon, 1 Jul 2013 16:49:22 -0500 Subject: [PATCH 0216/1368] sunvnet: vnet_port_remove must call unregister_netdev [ Upstream commit aabb9875d02559ab9b928cd6f259a5cc4c21a589 ] The missing call to unregister_netdev() leaves the interface active after the driver is unloaded by rmmod. Signed-off-by: Dave Kleikamp Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/sun/sunvnet.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/sun/sunvnet.c b/drivers/net/ethernet/sun/sunvnet.c index 1df0ff3839e8..3df56840a3b9 100644 --- a/drivers/net/ethernet/sun/sunvnet.c +++ b/drivers/net/ethernet/sun/sunvnet.c @@ -1239,6 +1239,8 @@ static int vnet_port_remove(struct vio_dev *vdev) dev_set_drvdata(&vdev->dev, NULL); kfree(port); + + unregister_netdev(vp->dev); } return 0; } From 6683151aeea6c7aade12dde7ec2008fc380f0728 Mon Sep 17 00:00:00 2001 From: dingtianhong Date: Wed, 10 Jul 2013 12:04:02 +0800 Subject: [PATCH 0217/1368] ifb: fix rcu_sched self-detected stalls [ Upstream commit 440d57bc5ff55ec1efb3efc9cbe9420b4bbdfefa ] According to the commit 16b0dc29c1af9df341428f4c49ada4f626258082 (dummy: fix rcu_sched self-detected stalls) Eric Dumazet fix the problem in dummy, but the ifb will occur the same problem like the dummy modules. Trying to "modprobe ifb numifbs=30000" triggers : INFO: rcu_sched self-detected stall on CPU After this splat, RTNL is locked and reboot is needed. We must call cond_resched() to avoid this, even holding RTNL. Signed-off-by: Ding Tianhong Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ifb.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/ifb.c b/drivers/net/ifb.c index dc9f6a45515d..a11f7a45cb5f 100644 --- a/drivers/net/ifb.c +++ b/drivers/net/ifb.c @@ -292,8 +292,10 @@ static int __init ifb_init_module(void) rtnl_lock(); err = __rtnl_link_register(&ifb_link_ops); - for (i = 0; i < numifbs && !err; i++) + for (i = 0; i < numifbs && !err; i++) { err = ifb_init_one(i); + cond_resched(); + } if (err) __rtnl_link_unregister(&ifb_link_ops); rtnl_unlock(); From d09ec76aebab878e0dc583484152e20a2172a7b4 Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Wed, 10 Jul 2013 13:43:27 +0800 Subject: [PATCH 0218/1368] tuntap: correctly linearize skb when zerocopy is used [ Upstream commit 3dd5c3308e8b671e8e8882ba972f51cefbe9fd0d ] Userspace may produce vectors greater than MAX_SKB_FRAGS. When we try to linearize parts of the skb to let the rest of iov to be fit in the frags, we need count copylen into linear when calling tun_alloc_skb() instead of partly counting it into data_len. Since this breaks zerocopy_sg_from_iovec() since its inner counter assumes nr_frags should be zero at beginning. This cause nr_frags to be increased wrongly without setting the correct frags. This bug were introduced from 0690899b4d4501b3505be069b9a687e68ccbe15b (tun: experimental zero copy tx support) Cc: Michael S. Tsirkin Signed-off-by: Jason Wang Acked-by: Michael S. Tsirkin Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/tun.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/net/tun.c b/drivers/net/tun.c index 9c61f8734a40..c3cb60b1d1a2 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -1044,7 +1044,7 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile, { struct tun_pi pi = { 0, cpu_to_be16(ETH_P_IP) }; struct sk_buff *skb; - size_t len = total_len, align = NET_SKB_PAD; + size_t len = total_len, align = NET_SKB_PAD, linear; struct virtio_net_hdr gso = { 0 }; int offset = 0; int copylen; @@ -1108,10 +1108,13 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile, copylen = gso.hdr_len; if (!copylen) copylen = GOODCOPY_LEN; - } else + linear = copylen; + } else { copylen = len; + linear = gso.hdr_len; + } - skb = tun_alloc_skb(tfile, align, copylen, gso.hdr_len, noblock); + skb = tun_alloc_skb(tfile, align, copylen, linear, noblock); if (IS_ERR(skb)) { if (PTR_ERR(skb) != -EAGAIN) tun->dev->stats.rx_dropped++; From bd31fdd2f48f4e42c6a19d24fa85e59eb2ccf583 Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Wed, 10 Jul 2013 13:43:28 +0800 Subject: [PATCH 0219/1368] macvtap: correctly linearize skb when zerocopy is used [ Upstream commit 61d46bf979d5cd7c164709a80ad5676a35494aae ] Userspace may produce vectors greater than MAX_SKB_FRAGS. When we try to linearize parts of the skb to let the rest of iov to be fit in the frags, we need count copylen into linear when calling macvtap_alloc_skb() instead of partly counting it into data_len. Since this breaks zerocopy_sg_from_iovec() since its inner counter assumes nr_frags should be zero at beginning. This cause nr_frags to be increased wrongly without setting the correct frags. This bug were introduced from b92946e2919134ebe2a4083e4302236295ea2a73 (macvtap: zerocopy: validate vectors before building skb). Cc: Michael S. Tsirkin Signed-off-by: Jason Wang Acked-by: Michael S. Tsirkin Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/macvtap.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/net/macvtap.c b/drivers/net/macvtap.c index b6dd6a75919a..502d9486395f 100644 --- a/drivers/net/macvtap.c +++ b/drivers/net/macvtap.c @@ -647,6 +647,7 @@ static ssize_t macvtap_get_user(struct macvtap_queue *q, struct msghdr *m, int vnet_hdr_len = 0; int copylen = 0; bool zerocopy = false; + size_t linear; if (q->flags & IFF_VNET_HDR) { vnet_hdr_len = q->vnet_hdr_sz; @@ -701,11 +702,14 @@ static ssize_t macvtap_get_user(struct macvtap_queue *q, struct msghdr *m, copylen = vnet_hdr.hdr_len; if (!copylen) copylen = GOODCOPY_LEN; - } else + linear = copylen; + } else { copylen = len; + linear = vnet_hdr.hdr_len; + } skb = macvtap_alloc_skb(&q->sk, NET_IP_ALIGN, copylen, - vnet_hdr.hdr_len, noblock, &err); + linear, noblock, &err); if (!skb) goto err; From a025e28ad81b611a908bf234579b2fc58efcb371 Mon Sep 17 00:00:00 2001 From: Hannes Frederic Sowa Date: Wed, 10 Jul 2013 23:00:57 +0200 Subject: [PATCH 0220/1368] ipv6: in case of link failure remove route directly instead of letting it expire [ Upstream commit 1eb4f758286884e7566627164bca4c4a16952a83 ] We could end up expiring a route which is part of an ecmp route set. Doing so would invalidate the rt->rt6i_nsiblings calculations and could provoke the following panic: [ 80.144667] ------------[ cut here ]------------ [ 80.145172] kernel BUG at net/ipv6/ip6_fib.c:733! [ 80.145172] invalid opcode: 0000 [#1] SMP [ 80.145172] Modules linked in: 8021q nf_conntrack_netbios_ns nf_conntrack_broadcast ipt_MASQUERADE ip6table_mangle ip6t_REJECT nf_conntrack_ipv6 nf_defrag_ipv6 iptable_nat nf_nat_ipv4 nf_nat iptable_mangle nf_conntrack_ipv4 nf_defrag_ipv4 xt_conntrack nf_conntrack ebtable_filter ebtables ip6table_filter ip6_tables +snd_hda_intel snd_hda_codec snd_hwdep snd_seq snd_seq_device snd_pcm snd_page_alloc snd_timer virtio_balloon snd soundcore i2c_piix4 i2c_core virtio_net virtio_blk [ 80.145172] CPU: 1 PID: 786 Comm: ping6 Not tainted 3.10.0+ #118 [ 80.145172] Hardware name: Bochs Bochs, BIOS Bochs 01/01/2011 [ 80.145172] task: ffff880117fa0000 ti: ffff880118770000 task.ti: ffff880118770000 [ 80.145172] RIP: 0010:[] [] fib6_add+0x75d/0x830 [ 80.145172] RSP: 0018:ffff880118771798 EFLAGS: 00010202 [ 80.145172] RAX: 0000000000000000 RBX: 0000000000000000 RCX: ffff88011350e480 [ 80.145172] RDX: ffff88011350e238 RSI: 0000000000000004 RDI: ffff88011350f738 [ 80.145172] RBP: ffff880118771848 R08: ffff880117903280 R09: 0000000000000001 [ 80.145172] R10: 0000000000000000 R11: 0000000000000000 R12: ffff88011350f680 [ 80.145172] R13: ffff880117903280 R14: ffff880118771890 R15: ffff88011350ef90 [ 80.145172] FS: 00007f02b5127740(0000) GS:ffff88011fd00000(0000) knlGS:0000000000000000 [ 80.145172] CS: 0010 DS: 0000 ES: 0000 CR0: 000000008005003b [ 80.145172] CR2: 00007f981322a000 CR3: 00000001181b1000 CR4: 00000000000006e0 [ 80.145172] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 80.145172] DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400 [ 80.145172] Stack: [ 80.145172] 0000000000000001 ffff880100000000 ffff880100000000 ffff880117903280 [ 80.145172] 0000000000000000 ffff880119a4cf00 0000000000000400 00000000000007fa [ 80.145172] 0000000000000000 0000000000000000 0000000000000000 ffff88011350f680 [ 80.145172] Call Trace: [ 80.145172] [] ? rt6_bind_peer+0x4b/0x90 [ 80.145172] [] __ip6_ins_rt+0x45/0x70 [ 80.145172] [] ip6_ins_rt+0x35/0x40 [ 80.145172] [] ip6_pol_route.isra.44+0x3a4/0x4b0 [ 80.145172] [] ip6_pol_route_output+0x2a/0x30 [ 80.145172] [] fib6_rule_action+0xd7/0x210 [ 80.145172] [] ? ip6_pol_route_input+0x30/0x30 [ 80.145172] [] fib_rules_lookup+0xc6/0x140 [ 80.145172] [] fib6_rule_lookup+0x44/0x80 [ 80.145172] [] ? ip6_pol_route_input+0x30/0x30 [ 80.145172] [] ip6_route_output+0x73/0xb0 [ 80.145172] [] ip6_dst_lookup_tail+0x2c3/0x2e0 [ 80.145172] [] ? list_del+0x11/0x40 [ 80.145172] [] ? remove_wait_queue+0x3c/0x50 [ 80.145172] [] ip6_dst_lookup_flow+0x3d/0xa0 [ 80.145172] [] rawv6_sendmsg+0x267/0xc20 [ 80.145172] [] inet_sendmsg+0x63/0xb0 [ 80.145172] [] ? selinux_socket_sendmsg+0x23/0x30 [ 80.145172] [] sock_sendmsg+0xa6/0xd0 [ 80.145172] [] SYSC_sendto+0x128/0x180 [ 80.145172] [] ? update_curr+0xec/0x170 [ 80.145172] [] ? kvm_clock_get_cycles+0x9/0x10 [ 80.145172] [] ? __getnstimeofday+0x3e/0xd0 [ 80.145172] [] SyS_sendto+0xe/0x10 [ 80.145172] [] system_call_fastpath+0x16/0x1b [ 80.145172] Code: fe ff ff 41 f6 45 2a 06 0f 85 ca fe ff ff 49 8b 7e 08 4c 89 ee e8 94 ef ff ff e9 b9 fe ff ff 48 8b 82 28 05 00 00 e9 01 ff ff ff <0f> 0b 49 8b 54 24 30 0d 00 00 40 00 89 83 14 01 00 00 48 89 53 [ 80.145172] RIP [] fib6_add+0x75d/0x830 [ 80.145172] RSP [ 80.387413] ---[ end trace 02f20b7a8b81ed95 ]--- [ 80.390154] Kernel panic - not syncing: Fatal exception in interrupt Signed-off-by: Hannes Frederic Sowa Cc: Nicolas Dichtel Cc: YOSHIFUJI Hideaki Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv6/route.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 7f1332fc4346..262d6d8c7e89 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -1076,10 +1076,13 @@ static void ip6_link_failure(struct sk_buff *skb) rt = (struct rt6_info *) skb_dst(skb); if (rt) { - if (rt->rt6i_flags & RTF_CACHE) - rt6_update_expires(rt, 0); - else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT)) + if (rt->rt6i_flags & RTF_CACHE) { + dst_hold(&rt->dst); + if (ip6_del_rt(rt)) + dst_free(&rt->dst); + } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT)) { rt->rt6i_node->fn_sernum = -1; + } } } From 53effe1697676189395fc9f8a02c749e19cc9daa Mon Sep 17 00:00:00 2001 From: Sasha Levin Date: Thu, 11 Jul 2013 13:16:54 -0400 Subject: [PATCH 0221/1368] 9p: fix off by one causing access violations and memory corruption [ Upstream commit 110ecd69a9feea82a152bbf9b12aba57e6396883 ] p9_release_pages() would attempt to dereference one value past the end of pages[]. This would cause the following crashes: [ 6293.171817] BUG: unable to handle kernel paging request at ffff8807c96f3000 [ 6293.174146] IP: [] p9_release_pages+0x3b/0x60 [ 6293.176447] PGD 79c5067 PUD 82c1e3067 PMD 82c197067 PTE 80000007c96f3060 [ 6293.180060] Oops: 0000 [#1] PREEMPT SMP DEBUG_PAGEALLOC [ 6293.180060] Modules linked in: [ 6293.180060] CPU: 62 PID: 174043 Comm: modprobe Tainted: G W 3.10.0-next-20130710-sasha #3954 [ 6293.180060] task: ffff8807b803b000 ti: ffff880787dde000 task.ti: ffff880787dde000 [ 6293.180060] RIP: 0010:[] [] p9_release_pages+0x3b/0x60 [ 6293.214316] RSP: 0000:ffff880787ddfc28 EFLAGS: 00010202 [ 6293.214316] RAX: 0000000000000001 RBX: ffff8807c96f2ff8 RCX: 0000000000000000 [ 6293.222017] RDX: ffff8807b803b000 RSI: 0000000000000001 RDI: ffffea001c7e3d40 [ 6293.222017] RBP: ffff880787ddfc48 R08: 0000000000000000 R09: 0000000000000000 [ 6293.222017] R10: 0000000000000001 R11: 0000000000000000 R12: 0000000000000001 [ 6293.222017] R13: 0000000000000001 R14: ffff8807cc50c070 R15: ffff8807cc50c070 [ 6293.222017] FS: 00007f572641d700(0000) GS:ffff8807f3600000(0000) knlGS:0000000000000000 [ 6293.256784] CS: 0010 DS: 0000 ES: 0000 CR0: 000000008005003b [ 6293.256784] CR2: ffff8807c96f3000 CR3: 00000007c8e81000 CR4: 00000000000006e0 [ 6293.256784] Stack: [ 6293.256784] ffff880787ddfcc8 ffff880787ddfcc8 0000000000000000 ffff880787ddfcc8 [ 6293.256784] ffff880787ddfd48 ffffffff84128be8 ffff880700000002 0000000000000001 [ 6293.256784] ffff8807b803b000 ffff880787ddfce0 0000100000000000 0000000000000000 [ 6293.256784] Call Trace: [ 6293.256784] [] p9_virtio_zc_request+0x598/0x630 [ 6293.256784] [] ? wake_up_bit+0x40/0x40 [ 6293.256784] [] p9_client_zc_rpc+0x111/0x3a0 [ 6293.256784] [] ? sched_clock_cpu+0x108/0x120 [ 6293.256784] [] p9_client_read+0xe1/0x2c0 [ 6293.256784] [] v9fs_file_read+0x90/0xc0 [ 6293.256784] [] vfs_read+0xc3/0x130 [ 6293.256784] [] ? trace_hardirqs_on+0xd/0x10 [ 6293.256784] [] SyS_read+0x62/0xa0 [ 6293.256784] [] tracesys+0xdd/0xe2 [ 6293.256784] Code: 66 90 48 89 fb 41 89 f5 48 8b 3f 48 85 ff 74 29 85 f6 74 25 45 31 e4 66 0f 1f 84 00 00 00 00 00 e8 eb 14 12 fd 41 ff c4 49 63 c4 <48> 8b 3c c3 48 85 ff 74 05 45 39 e5 75 e7 48 83 c4 08 5b 41 5c [ 6293.256784] RIP [] p9_release_pages+0x3b/0x60 [ 6293.256784] RSP [ 6293.256784] CR2: ffff8807c96f3000 [ 6293.256784] ---[ end trace 50822ee72cd360fc ]--- Signed-off-by: Sasha Levin Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/9p/trans_common.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/net/9p/trans_common.c b/net/9p/trans_common.c index de8df957867d..2ee3879161b1 100644 --- a/net/9p/trans_common.c +++ b/net/9p/trans_common.c @@ -24,11 +24,11 @@ */ void p9_release_pages(struct page **pages, int nr_pages) { - int i = 0; - while (pages[i] && nr_pages--) { - put_page(pages[i]); - i++; - } + int i; + + for (i = 0; i < nr_pages; i++) + if (pages[i]) + put_page(pages[i]); } EXPORT_SYMBOL(p9_release_pages); From 61b6f1280da836ca67e6e4cfaefd65e989f6bfbc Mon Sep 17 00:00:00 2001 From: Maarten Lankhorst Date: Thu, 11 Jul 2013 15:53:21 +0200 Subject: [PATCH 0222/1368] alx: fix lockdep annotation [ Upstream commit a8798a5c77c9981e88caef1373a3310bf8aed219 ] Move spin_lock_init to be called before the spinlocks are used, preventing a lockdep splat. Signed-off-by: Maarten Lankhorst Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/atheros/alx/main.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/atheros/alx/main.c b/drivers/net/ethernet/atheros/alx/main.c index 418de8b13165..d30085c2b454 100644 --- a/drivers/net/ethernet/atheros/alx/main.c +++ b/drivers/net/ethernet/atheros/alx/main.c @@ -1303,6 +1303,8 @@ static int alx_probe(struct pci_dev *pdev, const struct pci_device_id *ent) SET_NETDEV_DEV(netdev, &pdev->dev); alx = netdev_priv(netdev); + spin_lock_init(&alx->hw.mdio_lock); + spin_lock_init(&alx->irq_lock); alx->dev = netdev; alx->hw.pdev = pdev; alx->msg_enable = NETIF_MSG_LINK | NETIF_MSG_HW | NETIF_MSG_IFUP | @@ -1385,9 +1387,6 @@ static int alx_probe(struct pci_dev *pdev, const struct pci_device_id *ent) INIT_WORK(&alx->link_check_wk, alx_link_check); INIT_WORK(&alx->reset_wk, alx_reset); - spin_lock_init(&alx->hw.mdio_lock); - spin_lock_init(&alx->irq_lock); - netif_carrier_off(netdev); err = register_netdev(netdev); From b4f1489ed58cb6337c663d6eabd8be86c84bf9f1 Mon Sep 17 00:00:00 2001 From: Hannes Frederic Sowa Date: Thu, 11 Jul 2013 12:43:42 +0200 Subject: [PATCH 0223/1368] ipv6: fix route selection if kernel is not compiled with CONFIG_IPV6_ROUTER_PREF [ Upstream commit afc154e978de1eb11c555bc8bcec1552f75ebc43 ] This is a follow-up patch to 3630d40067a21d4dfbadc6002bb469ce26ac5d52 ("ipv6: rt6_check_neigh should successfully verify neigh if no NUD information are available"). Since the removal of rt->n in rt6_info we can end up with a dst == NULL in rt6_check_neigh. In case the kernel is not compiled with CONFIG_IPV6_ROUTER_PREF we should also select a route with unkown NUD state but we must not avoid doing round robin selection on routes with the same target. So introduce and pass down a boolean ``do_rr'' to indicate when we should update rt->rr_ptr. As soon as no route is valid we do backtracking and do a lookup on a higher level in the fib trie. v2: a) Improved rt6_check_neigh logic (no need to create neighbour there) and documented return values. v3: a) Introduce enum rt6_nud_state to get rid of the magic numbers (thanks to David Miller). b) Update and shorten commit message a bit to actualy reflect the source. Reported-by: Pierre Emeriaud Cc: YOSHIFUJI Hideaki Signed-off-by: Hannes Frederic Sowa Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv6/route.c | 69 ++++++++++++++++++++++++++++++------------------ 1 file changed, 43 insertions(+), 26 deletions(-) diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 262d6d8c7e89..bacce6c08644 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -65,6 +65,12 @@ #include #endif +enum rt6_nud_state { + RT6_NUD_FAIL_HARD = -2, + RT6_NUD_FAIL_SOFT = -1, + RT6_NUD_SUCCEED = 1 +}; + static struct rt6_info *ip6_rt_copy(struct rt6_info *ort, const struct in6_addr *dest); static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie); @@ -527,28 +533,29 @@ static inline int rt6_check_dev(struct rt6_info *rt, int oif) return 0; } -static inline bool rt6_check_neigh(struct rt6_info *rt) +static inline enum rt6_nud_state rt6_check_neigh(struct rt6_info *rt) { struct neighbour *neigh; - bool ret = false; + enum rt6_nud_state ret = RT6_NUD_FAIL_HARD; if (rt->rt6i_flags & RTF_NONEXTHOP || !(rt->rt6i_flags & RTF_GATEWAY)) - return true; + return RT6_NUD_SUCCEED; rcu_read_lock_bh(); neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway); if (neigh) { read_lock(&neigh->lock); if (neigh->nud_state & NUD_VALID) - ret = true; + ret = RT6_NUD_SUCCEED; #ifdef CONFIG_IPV6_ROUTER_PREF else if (!(neigh->nud_state & NUD_FAILED)) - ret = true; + ret = RT6_NUD_SUCCEED; #endif read_unlock(&neigh->lock); - } else if (IS_ENABLED(CONFIG_IPV6_ROUTER_PREF)) { - ret = true; + } else { + ret = IS_ENABLED(CONFIG_IPV6_ROUTER_PREF) ? + RT6_NUD_SUCCEED : RT6_NUD_FAIL_SOFT; } rcu_read_unlock_bh(); @@ -562,43 +569,52 @@ static int rt6_score_route(struct rt6_info *rt, int oif, m = rt6_check_dev(rt, oif); if (!m && (strict & RT6_LOOKUP_F_IFACE)) - return -1; + return RT6_NUD_FAIL_HARD; #ifdef CONFIG_IPV6_ROUTER_PREF m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2; #endif - if (!rt6_check_neigh(rt) && (strict & RT6_LOOKUP_F_REACHABLE)) - return -1; + if (strict & RT6_LOOKUP_F_REACHABLE) { + int n = rt6_check_neigh(rt); + if (n < 0) + return n; + } return m; } static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict, - int *mpri, struct rt6_info *match) + int *mpri, struct rt6_info *match, + bool *do_rr) { int m; + bool match_do_rr = false; if (rt6_check_expired(rt)) goto out; m = rt6_score_route(rt, oif, strict); - if (m < 0) + if (m == RT6_NUD_FAIL_SOFT && !IS_ENABLED(CONFIG_IPV6_ROUTER_PREF)) { + match_do_rr = true; + m = 0; /* lowest valid score */ + } else if (m < 0) { goto out; - - if (m > *mpri) { - if (strict & RT6_LOOKUP_F_REACHABLE) - rt6_probe(match); - *mpri = m; - match = rt; - } else if (strict & RT6_LOOKUP_F_REACHABLE) { - rt6_probe(rt); } + if (strict & RT6_LOOKUP_F_REACHABLE) + rt6_probe(rt); + + if (m > *mpri) { + *do_rr = match_do_rr; + *mpri = m; + match = rt; + } out: return match; } static struct rt6_info *find_rr_leaf(struct fib6_node *fn, struct rt6_info *rr_head, - u32 metric, int oif, int strict) + u32 metric, int oif, int strict, + bool *do_rr) { struct rt6_info *rt, *match; int mpri = -1; @@ -606,10 +622,10 @@ static struct rt6_info *find_rr_leaf(struct fib6_node *fn, match = NULL; for (rt = rr_head; rt && rt->rt6i_metric == metric; rt = rt->dst.rt6_next) - match = find_match(rt, oif, strict, &mpri, match); + match = find_match(rt, oif, strict, &mpri, match, do_rr); for (rt = fn->leaf; rt && rt != rr_head && rt->rt6i_metric == metric; rt = rt->dst.rt6_next) - match = find_match(rt, oif, strict, &mpri, match); + match = find_match(rt, oif, strict, &mpri, match, do_rr); return match; } @@ -618,15 +634,16 @@ static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict) { struct rt6_info *match, *rt0; struct net *net; + bool do_rr = false; rt0 = fn->rr_ptr; if (!rt0) fn->rr_ptr = rt0 = fn->leaf; - match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict); + match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict, + &do_rr); - if (!match && - (strict & RT6_LOOKUP_F_REACHABLE)) { + if (do_rr) { struct rt6_info *next = rt0->dst.rt6_next; /* no entries matched; do round-robin */ From 629b1d3db71d24e52478b94c71202034eaefed80 Mon Sep 17 00:00:00 2001 From: dingtianhong Date: Thu, 11 Jul 2013 19:04:02 +0800 Subject: [PATCH 0224/1368] dummy: fix oops when loading the dummy failed [ Upstream commit 2c8a01894a12665d8059fad8f0a293c98a264121 ] We rename the dummy in modprobe.conf like this: install dummy0 /sbin/modprobe -o dummy0 --ignore-install dummy install dummy1 /sbin/modprobe -o dummy1 --ignore-install dummy We got oops when we run the command: modprobe dummy0 modprobe dummy1 ------------[ cut here ]------------ [ 3302.187584] BUG: unable to handle kernel NULL pointer dereference at 0000000000000008 [ 3302.195411] IP: [] __rtnl_link_unregister+0x9a/0xd0 [ 3302.201844] PGD 85c94a067 PUD 8517bd067 PMD 0 [ 3302.206305] Oops: 0002 [#1] SMP [ 3302.299737] task: ffff88105ccea300 ti: ffff880eba4a0000 task.ti: ffff880eba4a0000 [ 3302.307186] RIP: 0010:[] [] __rtnl_link_unregister+0x9a/0xd0 [ 3302.316044] RSP: 0018:ffff880eba4a1dd8 EFLAGS: 00010246 [ 3302.321332] RAX: 0000000000000000 RBX: ffffffff81a9d738 RCX: 0000000000000002 [ 3302.328436] RDX: 0000000000000000 RSI: ffffffffa04d602c RDI: ffff880eba4a1dd8 [ 3302.335541] RBP: ffff880eba4a1e18 R08: dead000000200200 R09: dead000000100100 [ 3302.342644] R10: 0000000000000080 R11: 0000000000000003 R12: ffffffff81a9d788 [ 3302.349748] R13: ffffffffa04d7020 R14: ffffffff81a9d670 R15: ffff880eba4a1dd8 [ 3302.364910] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 3302.370630] CR2: 0000000000000008 CR3: 000000085e15e000 CR4: 00000000000427e0 [ 3302.377734] DR0: 0000000000000003 DR1: 00000000000000b0 DR2: 0000000000000001 [ 3302.384838] DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400 [ 3302.391940] Stack: [ 3302.393944] ffff880eba4a1dd8 ffff880eba4a1dd8 ffff880eba4a1e18 ffffffffa04d70c0 [ 3302.401350] 00000000ffffffef ffffffffa01a8000 0000000000000000 ffffffff816111c8 [ 3302.408758] ffff880eba4a1e48 ffffffffa01a80be ffff880eba4a1e48 ffffffffa04d70c0 [ 3302.416164] Call Trace: [ 3302.418605] [] ? 0xffffffffa01a7fff [ 3302.423727] [] dummy_init_module+0xbe/0x1000 [dummy0] [ 3302.430405] [] ? 0xffffffffa01a7fff [ 3302.435535] [] do_one_initcall+0x152/0x1b0 [ 3302.441263] [] do_init_module+0x7b/0x200 [ 3302.446824] [] load_module+0x4e2/0x530 [ 3302.452215] [] ? ddebug_dyndbg_boot_param_cb+0x60/0x60 [ 3302.458979] [] SyS_init_module+0xd1/0x130 [ 3302.464627] [] system_call_fastpath+0x16/0x1b [ 3302.490090] RIP [] __rtnl_link_unregister+0x9a/0xd0 [ 3302.496607] RSP [ 3302.500084] CR2: 0000000000000008 [ 3302.503466] ---[ end trace 8342d49cd49f78ed ]--- The reason is that when loading dummy, if __rtnl_link_register() return failed, the init_module should return and avoid take the wrong path. Signed-off-by: Tan Xiaojun Signed-off-by: Ding Tianhong Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/dummy.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/dummy.c b/drivers/net/dummy.c index 42aa54af6842..b710c6b2d659 100644 --- a/drivers/net/dummy.c +++ b/drivers/net/dummy.c @@ -185,6 +185,8 @@ static int __init dummy_init_module(void) rtnl_lock(); err = __rtnl_link_register(&dummy_link_ops); + if (err < 0) + goto out; for (i = 0; i < numdummies && !err; i++) { err = dummy_init_one(); @@ -192,6 +194,8 @@ static int __init dummy_init_module(void) } if (err < 0) __rtnl_link_unregister(&dummy_link_ops); + +out: rtnl_unlock(); return err; From f84ddbc58cfd8551757efe3c61242c4d6e9918ce Mon Sep 17 00:00:00 2001 From: dingtianhong Date: Thu, 11 Jul 2013 19:04:06 +0800 Subject: [PATCH 0225/1368] ifb: fix oops when loading the ifb failed [ Upstream commit f2966cd5691058b8674a20766525bedeaea9cbcf ] If __rtnl_link_register() return faild when loading the ifb, it will take the wrong path and get oops, so fix it just like dummy. Signed-off-by: Ding Tianhong Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ifb.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/ifb.c b/drivers/net/ifb.c index a11f7a45cb5f..a3bed28197d2 100644 --- a/drivers/net/ifb.c +++ b/drivers/net/ifb.c @@ -291,6 +291,8 @@ static int __init ifb_init_module(void) rtnl_lock(); err = __rtnl_link_register(&ifb_link_ops); + if (err < 0) + goto out; for (i = 0; i < numifbs && !err; i++) { err = ifb_init_one(i); @@ -298,6 +300,8 @@ static int __init ifb_init_module(void) } if (err) __rtnl_link_unregister(&ifb_link_ops); + +out: rtnl_unlock(); return err; From 6afbcb59964ec9dc3dd63ed96a9e2cda31f8a072 Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Thu, 11 Jul 2013 13:12:22 -0700 Subject: [PATCH 0226/1368] gre: Fix MTU sizing check for gretap tunnels [ Upstream commit 8c91e162e058bb91b7766f26f4d5823a21941026 ] This change fixes an MTU sizing issue seen with gretap tunnels when non-gso packets are sent from the interface. In my case I was able to reproduce the issue by simply sending a ping of 1421 bytes with the gretap interface created on a device with a standard 1500 mtu. This fix is based on the fact that the tunnel mtu is already adjusted by dev->hard_header_len so it would make sense that any packets being compared against that mtu should also be adjusted by hard_header_len and the tunnel header instead of just the tunnel header. Signed-off-by: Alexander Duyck Reported-by: Cong Wang Acked-by: Eric Dumazet Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/ip_tunnel.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c index d05bd02886ed..cbfc37f5f05a 100644 --- a/net/ipv4/ip_tunnel.c +++ b/net/ipv4/ip_tunnel.c @@ -490,7 +490,7 @@ static int tnl_update_pmtu(struct net_device *dev, struct sk_buff *skb, struct rtable *rt, __be16 df) { struct ip_tunnel *tunnel = netdev_priv(dev); - int pkt_size = skb->len - tunnel->hlen; + int pkt_size = skb->len - tunnel->hlen - dev->hard_header_len; int mtu; if (df) From d6245ef7c307dfe1ae238a64980cf2469ecc5f68 Mon Sep 17 00:00:00 2001 From: Hannes Frederic Sowa Date: Fri, 12 Jul 2013 23:46:33 +0200 Subject: [PATCH 0227/1368] ipv6: only static routes qualify for equal cost multipathing [ Upstream commit 307f2fb95e9b96b3577916e73d92e104f8f26494 ] Static routes in this case are non-expiring routes which did not get configured by autoconf or by icmpv6 redirects. To make sure we actually get an ecmp route while searching for the first one in this fib6_node's leafs, also make sure it matches the ecmp route assumptions. v2: a) Removed RTF_EXPIRE check in dst.from chain. The check of RTF_ADDRCONF already ensures that this route, even if added again without RTF_EXPIRES (in case of a RA announcement with infinite timeout), does not cause the rt6i_nsiblings logic to go wrong if a later RA updates the expiration time later. v3: a) Allow RTF_EXPIRES routes to enter the ecmp route set. We have to do so, because an pmtu event could update the RTF_EXPIRES flag and we would not count this route, if another route joins this set. We now filter only for RTF_GATEWAY|RTF_ADDRCONF|RTF_DYNAMIC, which are flags that don't get changed after rt6_info construction. Cc: Nicolas Dichtel Signed-off-by: Hannes Frederic Sowa Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv6/ip6_fib.c | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index 192dd1a0e188..5fc9c7a68d8d 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -632,6 +632,12 @@ static struct fib6_node * fib6_add_1(struct fib6_node *root, void *addr, return ln; } +static inline bool rt6_qualify_for_ecmp(struct rt6_info *rt) +{ + return (rt->rt6i_flags & (RTF_GATEWAY|RTF_ADDRCONF|RTF_DYNAMIC)) == + RTF_GATEWAY; +} + /* * Insert routing information in a node. */ @@ -646,6 +652,7 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt, int add = (!info->nlh || (info->nlh->nlmsg_flags & NLM_F_CREATE)); int found = 0; + bool rt_can_ecmp = rt6_qualify_for_ecmp(rt); ins = &fn->leaf; @@ -691,9 +698,8 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt, * To avoid long list, we only had siblings if the * route have a gateway. */ - if (rt->rt6i_flags & RTF_GATEWAY && - !(rt->rt6i_flags & RTF_EXPIRES) && - !(iter->rt6i_flags & RTF_EXPIRES)) + if (rt_can_ecmp && + rt6_qualify_for_ecmp(iter)) rt->rt6i_nsiblings++; } @@ -715,7 +721,8 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt, /* Find the first route that have the same metric */ sibling = fn->leaf; while (sibling) { - if (sibling->rt6i_metric == rt->rt6i_metric) { + if (sibling->rt6i_metric == rt->rt6i_metric && + rt6_qualify_for_ecmp(sibling)) { list_add_tail(&rt->rt6i_siblings, &sibling->rt6i_siblings); break; From dc419b2d5cac92acac3228a46df12720cac358f5 Mon Sep 17 00:00:00 2001 From: Neil Horman Date: Fri, 12 Jul 2013 10:58:48 -0400 Subject: [PATCH 0228/1368] atl1e: fix dma mapping warnings [ Upstream commit 352900b583b2852152a1e05ea0e8b579292e731e ] Recently had this backtrace reported: WARNING: at lib/dma-debug.c:937 check_unmap+0x47d/0x930() Hardware name: System Product Name ATL1E 0000:02:00.0: DMA-API: device driver failed to check map error[device address=0x00000000cbfd1000] [size=90 bytes] [mapped as single] Modules linked in: xt_conntrack nf_conntrack ebtable_filter ebtables ip6table_filter ip6_tables snd_hda_codec_hdmi snd_hda_codec_realtek iTCO_wdt iTCO_vendor_support snd_hda_intel acpi_cpufreq mperf coretemp btrfs zlib_deflate snd_hda_codec snd_hwdep microcode raid6_pq libcrc32c snd_seq usblp serio_raw xor snd_seq_device joydev snd_pcm snd_page_alloc snd_timer snd lpc_ich i2c_i801 soundcore mfd_core atl1e asus_atk0110 ata_generic pata_acpi radeon i2c_algo_bit drm_kms_helper ttm drm i2c_core pata_marvell uinput Pid: 314, comm: systemd-journal Not tainted 3.9.0-0.rc6.git2.3.fc19.x86_64 #1 Call Trace: [] warn_slowpath_common+0x66/0x80 [] warn_slowpath_fmt+0x4c/0x50 [] check_unmap+0x47d/0x930 [] ? sched_clock_cpu+0xa8/0x100 [] debug_dma_unmap_page+0x5f/0x70 [] ? unmap_single+0x20/0x30 [] atl1e_intr+0x3a1/0x5b0 [atl1e] [] ? trace_hardirqs_off+0xd/0x10 [] handle_irq_event_percpu+0x56/0x390 [] handle_irq_event+0x3d/0x60 [] handle_fasteoi_irq+0x5a/0x100 [] handle_irq+0xbf/0x150 [] ? file_sb_list_del+0x3f/0x50 [] ? irq_enter+0x50/0xa0 [] do_IRQ+0x4d/0xc0 [] ? file_sb_list_del+0x3f/0x50 [] common_interrupt+0x72/0x72 [] ? lock_release+0xc2/0x310 [] lg_local_unlock_cpu+0x24/0x50 [] file_sb_list_del+0x3f/0x50 [] fput+0x2d/0xc0 [] filp_close+0x61/0x90 [] __close_fd+0x8d/0x150 [] sys_close+0x20/0x50 [] system_call_fastpath+0x16/0x1b The usual straighforward failure to check for dma_mapping_error after a map operation is completed. This patch should fix it, the reporter wandered off after filing this bz: https://bugzilla.redhat.com/show_bug.cgi?id=954170 and I don't have hardware to test, but the fix is pretty straightforward, so I figured I'd post it for review. Signed-off-by: Neil Horman CC: Jay Cliburn CC: Chris Snook CC: "David S. Miller" Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- .../net/ethernet/atheros/atl1e/atl1e_main.c | 28 +++++++++++++++++-- 1 file changed, 25 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/atheros/atl1e/atl1e_main.c b/drivers/net/ethernet/atheros/atl1e/atl1e_main.c index 0688bb82b442..8116cb84cdb8 100644 --- a/drivers/net/ethernet/atheros/atl1e/atl1e_main.c +++ b/drivers/net/ethernet/atheros/atl1e/atl1e_main.c @@ -1665,8 +1665,8 @@ static int atl1e_tso_csum(struct atl1e_adapter *adapter, return 0; } -static void atl1e_tx_map(struct atl1e_adapter *adapter, - struct sk_buff *skb, struct atl1e_tpd_desc *tpd) +static int atl1e_tx_map(struct atl1e_adapter *adapter, + struct sk_buff *skb, struct atl1e_tpd_desc *tpd) { struct atl1e_tpd_desc *use_tpd = NULL; struct atl1e_tx_buffer *tx_buffer = NULL; @@ -1677,6 +1677,7 @@ static void atl1e_tx_map(struct atl1e_adapter *adapter, u16 nr_frags; u16 f; int segment; + int ring_start = adapter->tx_ring.next_to_use; nr_frags = skb_shinfo(skb)->nr_frags; segment = (tpd->word3 >> TPD_SEGMENT_EN_SHIFT) & TPD_SEGMENT_EN_MASK; @@ -1689,6 +1690,9 @@ static void atl1e_tx_map(struct atl1e_adapter *adapter, tx_buffer->length = map_len; tx_buffer->dma = pci_map_single(adapter->pdev, skb->data, hdr_len, PCI_DMA_TODEVICE); + if (dma_mapping_error(&adapter->pdev->dev, tx_buffer->dma)) + return -ENOSPC; + ATL1E_SET_PCIMAP_TYPE(tx_buffer, ATL1E_TX_PCIMAP_SINGLE); mapped_len += map_len; use_tpd->buffer_addr = cpu_to_le64(tx_buffer->dma); @@ -1715,6 +1719,13 @@ static void atl1e_tx_map(struct atl1e_adapter *adapter, tx_buffer->dma = pci_map_single(adapter->pdev, skb->data + mapped_len, map_len, PCI_DMA_TODEVICE); + + if (dma_mapping_error(&adapter->pdev->dev, tx_buffer->dma)) { + /* Reset the tx rings next pointer */ + adapter->tx_ring.next_to_use = ring_start; + return -ENOSPC; + } + ATL1E_SET_PCIMAP_TYPE(tx_buffer, ATL1E_TX_PCIMAP_SINGLE); mapped_len += map_len; use_tpd->buffer_addr = cpu_to_le64(tx_buffer->dma); @@ -1750,6 +1761,13 @@ static void atl1e_tx_map(struct atl1e_adapter *adapter, (i * MAX_TX_BUF_LEN), tx_buffer->length, DMA_TO_DEVICE); + + if (dma_mapping_error(&adapter->pdev->dev, tx_buffer->dma)) { + /* Reset the ring next to use pointer */ + adapter->tx_ring.next_to_use = ring_start; + return -ENOSPC; + } + ATL1E_SET_PCIMAP_TYPE(tx_buffer, ATL1E_TX_PCIMAP_PAGE); use_tpd->buffer_addr = cpu_to_le64(tx_buffer->dma); use_tpd->word2 = (use_tpd->word2 & (~TPD_BUFLEN_MASK)) | @@ -1767,6 +1785,7 @@ static void atl1e_tx_map(struct atl1e_adapter *adapter, /* The last buffer info contain the skb address, so it will be free after unmap */ tx_buffer->skb = skb; + return 0; } static void atl1e_tx_queue(struct atl1e_adapter *adapter, u16 count, @@ -1834,10 +1853,13 @@ static netdev_tx_t atl1e_xmit_frame(struct sk_buff *skb, return NETDEV_TX_OK; } - atl1e_tx_map(adapter, skb, tpd); + if (atl1e_tx_map(adapter, skb, tpd)) + goto out; + atl1e_tx_queue(adapter, tpd_req, tpd); netdev->trans_start = jiffies; /* NETIF_F_LLTX driver :( */ +out: spin_unlock_irqrestore(&adapter->tx_lock, flags); return NETDEV_TX_OK; } From da7e35cee6bf8aae938baa597863691bfffc26eb Mon Sep 17 00:00:00 2001 From: Neil Horman Date: Tue, 16 Jul 2013 10:49:41 -0400 Subject: [PATCH 0229/1368] atl1e: unmap partially mapped skb on dma error and free skb [ Upstream commit 584ec4355355ffac43571b02a314d43eb2f7fcbf ] Ben Hutchings pointed out that my recent update to atl1e in commit 352900b583b2852152a1e05ea0e8b579292e731e ("atl1e: fix dma mapping warnings") was missing a bit of code. Specifically it reset the hardware tx ring to its origional state when we hit a dma error, but didn't unmap any exiting mappings from the operation. This patch fixes that up. It also remembers to free the skb in the event that an error occurs, so we don't leak. Untested, as I don't have hardware. I think its pretty straightforward, but please review closely. Signed-off-by: Neil Horman CC: Ben Hutchings CC: Jay Cliburn CC: Chris Snook CC: "David S. Miller" Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- .../net/ethernet/atheros/atl1e/atl1e_main.c | 24 ++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/atheros/atl1e/atl1e_main.c b/drivers/net/ethernet/atheros/atl1e/atl1e_main.c index 8116cb84cdb8..c23bb02e3ed0 100644 --- a/drivers/net/ethernet/atheros/atl1e/atl1e_main.c +++ b/drivers/net/ethernet/atheros/atl1e/atl1e_main.c @@ -1678,6 +1678,7 @@ static int atl1e_tx_map(struct atl1e_adapter *adapter, u16 f; int segment; int ring_start = adapter->tx_ring.next_to_use; + int ring_end; nr_frags = skb_shinfo(skb)->nr_frags; segment = (tpd->word3 >> TPD_SEGMENT_EN_SHIFT) & TPD_SEGMENT_EN_MASK; @@ -1721,6 +1722,15 @@ static int atl1e_tx_map(struct atl1e_adapter *adapter, map_len, PCI_DMA_TODEVICE); if (dma_mapping_error(&adapter->pdev->dev, tx_buffer->dma)) { + /* We need to unwind the mappings we've done */ + ring_end = adapter->tx_ring.next_to_use; + adapter->tx_ring.next_to_use = ring_start; + while (adapter->tx_ring.next_to_use != ring_end) { + tpd = atl1e_get_tpd(adapter); + tx_buffer = atl1e_get_tx_buffer(adapter, tpd); + pci_unmap_single(adapter->pdev, tx_buffer->dma, + tx_buffer->length, PCI_DMA_TODEVICE); + } /* Reset the tx rings next pointer */ adapter->tx_ring.next_to_use = ring_start; return -ENOSPC; @@ -1763,6 +1773,16 @@ static int atl1e_tx_map(struct atl1e_adapter *adapter, DMA_TO_DEVICE); if (dma_mapping_error(&adapter->pdev->dev, tx_buffer->dma)) { + /* We need to unwind the mappings we've done */ + ring_end = adapter->tx_ring.next_to_use; + adapter->tx_ring.next_to_use = ring_start; + while (adapter->tx_ring.next_to_use != ring_end) { + tpd = atl1e_get_tpd(adapter); + tx_buffer = atl1e_get_tx_buffer(adapter, tpd); + dma_unmap_page(&adapter->pdev->dev, tx_buffer->dma, + tx_buffer->length, DMA_TO_DEVICE); + } + /* Reset the ring next to use pointer */ adapter->tx_ring.next_to_use = ring_start; return -ENOSPC; @@ -1853,8 +1873,10 @@ static netdev_tx_t atl1e_xmit_frame(struct sk_buff *skb, return NETDEV_TX_OK; } - if (atl1e_tx_map(adapter, skb, tpd)) + if (atl1e_tx_map(adapter, skb, tpd)) { + dev_kfree_skb_any(skb); goto out; + } atl1e_tx_queue(adapter, tpd_req, tpd); From b3923f8211bc60ff3b4567fafe0814beb6bc5a50 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 15 Jul 2013 20:03:19 -0700 Subject: [PATCH 0230/1368] ipv4: set transport header earlier [ Upstream commit 21d1196a35f5686c4323e42a62fdb4b23b0ab4a3 ] commit 45f00f99d6e ("ipv4: tcp: clean up tcp_v4_early_demux()") added a performance regression for non GRO traffic, basically disabling IP early demux. IPv6 stack resets transport header in ip6_rcv() before calling IP early demux in ip6_rcv_finish(), while IPv4 does this only in ip_local_deliver_finish(), _after_ IP early demux. GRO traffic happened to enable IP early demux because transport header is also set in inet_gro_receive() Instead of reverting the faulty commit, we can make IPv4/IPv6 behave the same : transport_header should be set in ip_rcv() instead of ip_local_deliver_finish() ip_local_deliver_finish() can also use skb_network_header_len() which is faster than ip_hdrlen() Signed-off-by: Eric Dumazet Cc: Neal Cardwell Cc: Tom Herbert Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/ip_input.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c index 3da817b89e9b..15e3e683adec 100644 --- a/net/ipv4/ip_input.c +++ b/net/ipv4/ip_input.c @@ -190,10 +190,7 @@ static int ip_local_deliver_finish(struct sk_buff *skb) { struct net *net = dev_net(skb->dev); - __skb_pull(skb, ip_hdrlen(skb)); - - /* Point into the IP datagram, just past the header. */ - skb_reset_transport_header(skb); + __skb_pull(skb, skb_network_header_len(skb)); rcu_read_lock(); { @@ -437,6 +434,8 @@ int ip_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, goto drop; } + skb->transport_header = skb->network_header + iph->ihl*4; + /* Remove any debris in the socket control block */ memset(IPCB(skb), 0, sizeof(struct inet_skb_parm)); From e0ca176c17745ac3e0c5a9969aba3c983e6c69c5 Mon Sep 17 00:00:00 2001 From: Sarveshwar Bandi Date: Tue, 16 Jul 2013 12:44:02 +0530 Subject: [PATCH 0231/1368] be2net: Fix to avoid hardware workaround when not needed [ Upstream commit 52fe29e4bb614367c108b717c6d7fe5953eb7af3 ] Hardware workaround requesting hardware to skip vlan insertion is necessary only when umc or qnq is enabled. Enabling this workaround in other scenarios could cause controller to stall. Signed-off-by: Sarveshwar Bandi Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/emulex/benet/be_main.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c index a0b4be51f0d1..6e4342695fa7 100644 --- a/drivers/net/ethernet/emulex/benet/be_main.c +++ b/drivers/net/ethernet/emulex/benet/be_main.c @@ -782,16 +782,22 @@ static struct sk_buff *be_insert_vlan_in_pkt(struct be_adapter *adapter, if (vlan_tx_tag_present(skb)) vlan_tag = be_get_tx_vlan_tag(adapter, skb); - else if (qnq_async_evt_rcvd(adapter) && adapter->pvid) - vlan_tag = adapter->pvid; + + if (qnq_async_evt_rcvd(adapter) && adapter->pvid) { + if (!vlan_tag) + vlan_tag = adapter->pvid; + /* f/w workaround to set skip_hw_vlan = 1, informs the F/W to + * skip VLAN insertion + */ + if (skip_hw_vlan) + *skip_hw_vlan = true; + } if (vlan_tag) { skb = __vlan_put_tag(skb, htons(ETH_P_8021Q), vlan_tag); if (unlikely(!skb)) return skb; skb->vlan_tci = 0; - if (skip_hw_vlan) - *skip_hw_vlan = true; } /* Insert the outer VLAN, if any */ From 98bec4a114dfd772390a07735606109447481872 Mon Sep 17 00:00:00 2001 From: Haiyang Zhang Date: Tue, 16 Jul 2013 23:01:20 -0700 Subject: [PATCH 0232/1368] hyperv: Fix the NETIF_F_SG flag setting in netvsc [ Upstream commit f45708209dc445bac0844f6ce86e315a2ffe8a29 ] SG mode is not currently supported by netvsc, so remove this flag for now. Otherwise, it will be unconditionally enabled by commit ec5f0615642 "Kill link between CSUM and SG features" Previously, the SG feature is disabled because CSUM is not set here. Signed-off-by: Haiyang Zhang Reviewed-by: K. Y. Srinivasan Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/hyperv/netvsc_drv.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c index 4dccead586be..23a0fff0df52 100644 --- a/drivers/net/hyperv/netvsc_drv.c +++ b/drivers/net/hyperv/netvsc_drv.c @@ -431,8 +431,8 @@ static int netvsc_probe(struct hv_device *dev, net->netdev_ops = &device_ops; /* TODO: Add GSO and Checksum offload */ - net->hw_features = NETIF_F_SG; - net->features = NETIF_F_SG | NETIF_F_HW_VLAN_CTAG_TX; + net->hw_features = 0; + net->features = NETIF_F_HW_VLAN_CTAG_TX; SET_ETHTOOL_OPS(net, ðtool_ops); SET_NETDEV_DEV(net, &dev->device); From c1d220fb027abb92452bf3a59fed7308647b39be Mon Sep 17 00:00:00 2001 From: Paolo Valente Date: Tue, 16 Jul 2013 08:52:30 +0200 Subject: [PATCH 0233/1368] pkt_sched: sch_qfq: remove a source of high packet delay/jitter [ Upstream commit 87f40dd6ce7042caca0b3b557e8923127f51f902 ] QFQ+ inherits from QFQ a design choice that may cause a high packet delay/jitter and a severe short-term unfairness. As QFQ, QFQ+ uses a special quantity, the system virtual time, to track the service provided by the ideal system it approximates. When a packet is dequeued, this quantity must be incremented by the size of the packet, divided by the sum of the weights of the aggregates waiting to be served. Tracking this sum correctly is a non-trivial task, because, to preserve tight service guarantees, the decrement of this sum must be delayed in a special way [1]: this sum can be decremented only after that its value would decrease also in the ideal system approximated by QFQ+. For efficiency, QFQ+ keeps track only of the 'instantaneous' weight sum, increased and decreased immediately as the weight of an aggregate changes, and as an aggregate is created or destroyed (which, in its turn, happens as a consequence of some class being created/destroyed/changed). However, to avoid the problems caused to service guarantees by these immediate decreases, QFQ+ increments the system virtual time using the maximum value allowed for the weight sum, 2^10, in place of the dynamic, instantaneous value. The instantaneous value of the weight sum is used only to check whether a request of weight increase or a class creation can be satisfied. Unfortunately, the problems caused by this choice are worse than the temporary degradation of the service guarantees that may occur, when a class is changed or destroyed, if the instantaneous value of the weight sum was used to update the system virtual time. In fact, the fraction of the link bandwidth guaranteed by QFQ+ to each aggregate is equal to the ratio between the weight of the aggregate and the sum of the weights of the competing aggregates. The packet delay guaranteed to the aggregate is instead inversely proportional to the guaranteed bandwidth. By using the maximum possible value, and not the actual value of the weight sum, QFQ+ provides each aggregate with the worst possible service guarantees, and not with service guarantees related to the actual set of competing aggregates. To see the consequences of this fact, consider the following simple example. Suppose that only the following aggregates are backlogged, i.e., that only the classes in the following aggregates have packets to transmit: one aggregate with weight 10, say A, and ten aggregates with weight 1, say B1, B2, ..., B10. In particular, suppose that these aggregates are always backlogged. Given the weight distribution, the smoothest and fairest service order would be: A B1 A B2 A B3 A B4 A B5 A B6 A B7 A B8 A B9 A B10 A B1 A B2 ... QFQ+ would provide exactly this optimal service if it used the actual value for the weight sum instead of the maximum possible value, i.e., 11 instead of 2^10. In contrast, since QFQ+ uses the latter value, it serves aggregates as follows (easy to prove and to reproduce experimentally): A B1 B2 B3 B4 B5 B6 B7 B8 B9 B10 A A A A A A A A A A B1 B2 ... B10 A A ... By replacing 10 with N in the above example, and by increasing N, one can increase at will the maximum packet delay and the jitter experienced by the classes in aggregate A. This patch addresses this issue by just using the above 'instantaneous' value of the weight sum, instead of the maximum possible value, when updating the system virtual time. After the instantaneous weight sum is decreased, QFQ+ may deviate from the ideal service for a time interval in the order of the time to serve one maximum-size packet for each backlogged class. The worst-case extent of the deviation exhibited by QFQ+ during this time interval [1] is basically the same as of the deviation described above (but, without this patch, QFQ+ suffers from such a deviation all the time). Finally, this patch modifies the comment to the function qfq_slot_insert, to make it coherent with the fact that the weight sum used by QFQ+ can now be lower than the maximum possible value. [1] P. Valente, "Extending WF2Q+ to support a dynamic traffic mix", Proceedings of AAA-IDEA'05, June 2005. Signed-off-by: Paolo Valente Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/sched/sch_qfq.c | 85 +++++++++++++++++++++++++++++---------------- 1 file changed, 56 insertions(+), 29 deletions(-) diff --git a/net/sched/sch_qfq.c b/net/sched/sch_qfq.c index d51852bba01c..57922524f0c7 100644 --- a/net/sched/sch_qfq.c +++ b/net/sched/sch_qfq.c @@ -113,7 +113,6 @@ #define FRAC_BITS 30 /* fixed point arithmetic */ #define ONE_FP (1UL << FRAC_BITS) -#define IWSUM (ONE_FP/QFQ_MAX_WSUM) #define QFQ_MTU_SHIFT 16 /* to support TSO/GSO */ #define QFQ_MIN_LMAX 512 /* see qfq_slot_insert */ @@ -189,6 +188,7 @@ struct qfq_sched { struct qfq_aggregate *in_serv_agg; /* Aggregate being served. */ u32 num_active_agg; /* Num. of active aggregates */ u32 wsum; /* weight sum */ + u32 iwsum; /* inverse weight sum */ unsigned long bitmaps[QFQ_MAX_STATE]; /* Group bitmaps. */ struct qfq_group groups[QFQ_MAX_INDEX + 1]; /* The groups. */ @@ -314,6 +314,7 @@ static void qfq_update_agg(struct qfq_sched *q, struct qfq_aggregate *agg, q->wsum += (int) agg->class_weight * (new_num_classes - agg->num_classes); + q->iwsum = ONE_FP / q->wsum; agg->num_classes = new_num_classes; } @@ -340,6 +341,10 @@ static void qfq_destroy_agg(struct qfq_sched *q, struct qfq_aggregate *agg) { if (!hlist_unhashed(&agg->nonfull_next)) hlist_del_init(&agg->nonfull_next); + q->wsum -= agg->class_weight; + if (q->wsum != 0) + q->iwsum = ONE_FP / q->wsum; + if (q->in_serv_agg == agg) q->in_serv_agg = qfq_choose_next_agg(q); kfree(agg); @@ -827,38 +832,60 @@ static void qfq_make_eligible(struct qfq_sched *q) } } - /* - * The index of the slot in which the aggregate is to be inserted must - * not be higher than QFQ_MAX_SLOTS-2. There is a '-2' and not a '-1' - * because the start time of the group may be moved backward by one - * slot after the aggregate has been inserted, and this would cause - * non-empty slots to be right-shifted by one position. + * The index of the slot in which the input aggregate agg is to be + * inserted must not be higher than QFQ_MAX_SLOTS-2. There is a '-2' + * and not a '-1' because the start time of the group may be moved + * backward by one slot after the aggregate has been inserted, and + * this would cause non-empty slots to be right-shifted by one + * position. * - * If the weight and lmax (max_pkt_size) of the classes do not change, - * then QFQ+ does meet the above contraint according to the current - * values of its parameters. In fact, if the weight and lmax of the - * classes do not change, then, from the theory, QFQ+ guarantees that - * the slot index is never higher than - * 2 + QFQ_MAX_AGG_CLASSES * ((1<budget -= len; - q->V += (u64)len * IWSUM; + q->V += (u64)len * q->iwsum; pr_debug("qfq dequeue: len %u F %lld now %lld\n", len, (unsigned long long) in_serv_agg->F, (unsigned long long) q->V); From 05464d21faec5e37e5b0fca58a5201408d6aab31 Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Thu, 18 Jul 2013 10:55:15 +0800 Subject: [PATCH 0234/1368] tuntap: do not zerocopy if iov needs more pages than MAX_SKB_FRAGS [ Upstream commit 885291761dba2bfe04df4c0f7bb75e4c920ab82e ] We try to linearize part of the skb when the number of iov is greater than MAX_SKB_FRAGS. This is not enough since each single vector may occupy more than one pages, so zerocopy_sg_fromiovec() may still fail and may break the guest network. Solve this problem by calculate the pages needed for iov before trying to do zerocopy and switch to use copy instead of zerocopy if it needs more than MAX_SKB_FRAGS. This is done through introducing a new helper to count the pages for iov, and call uarg->callback() manually when switching from zerocopy to copy to notify vhost. We can do further optimization on top. The bug were introduced from commit 0690899b4d4501b3505be069b9a687e68ccbe15b (tun: experimental zero copy tx support) Cc: Michael S. Tsirkin Signed-off-by: Jason Wang Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/tun.c | 62 +++++++++++++++++++++++++++++------------------ 1 file changed, 38 insertions(+), 24 deletions(-) diff --git a/drivers/net/tun.c b/drivers/net/tun.c index c3cb60b1d1a2..2491eb28a8d2 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -1037,6 +1037,29 @@ static int zerocopy_sg_from_iovec(struct sk_buff *skb, const struct iovec *from, return 0; } +static unsigned long iov_pages(const struct iovec *iv, int offset, + unsigned long nr_segs) +{ + unsigned long seg, base; + int pages = 0, len, size; + + while (nr_segs && (offset >= iv->iov_len)) { + offset -= iv->iov_len; + ++iv; + --nr_segs; + } + + for (seg = 0; seg < nr_segs; seg++) { + base = (unsigned long)iv[seg].iov_base + offset; + len = iv[seg].iov_len - offset; + size = ((base & ~PAGE_MASK) + len + ~PAGE_MASK) >> PAGE_SHIFT; + pages += size; + offset = 0; + } + + return pages; +} + /* Get packet from user space buffer */ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile, void *msg_control, const struct iovec *iv, @@ -1084,32 +1107,18 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile, return -EINVAL; } - if (msg_control) - zerocopy = true; - - if (zerocopy) { - /* Userspace may produce vectors with count greater than - * MAX_SKB_FRAGS, so we need to linearize parts of the skb - * to let the rest of data to be fit in the frags. - */ - if (count > MAX_SKB_FRAGS) { - copylen = iov_length(iv, count - MAX_SKB_FRAGS); - if (copylen < offset) - copylen = 0; - else - copylen -= offset; - } else - copylen = 0; - /* There are 256 bytes to be copied in skb, so there is enough - * room for skb expand head in case it is used. + if (msg_control) { + /* There are 256 bytes to be copied in skb, so there is + * enough room for skb expand head in case it is used. * The rest of the buffer is mapped from userspace. */ - if (copylen < gso.hdr_len) - copylen = gso.hdr_len; - if (!copylen) - copylen = GOODCOPY_LEN; + copylen = gso.hdr_len ? gso.hdr_len : GOODCOPY_LEN; linear = copylen; - } else { + if (iov_pages(iv, offset + copylen, count) <= MAX_SKB_FRAGS) + zerocopy = true; + } + + if (!zerocopy) { copylen = len; linear = gso.hdr_len; } @@ -1123,8 +1132,13 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile, if (zerocopy) err = zerocopy_sg_from_iovec(skb, iv, offset, count); - else + else { err = skb_copy_datagram_from_iovec(skb, 0, iv, offset, len); + if (!err && msg_control) { + struct ubuf_info *uarg = msg_control; + uarg->callback(uarg, false); + } + } if (err) { tun->dev->stats.rx_dropped++; From 7d9e6dd88cdea6c3d62a795ef69e6cc47b6dca2d Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Thu, 18 Jul 2013 10:55:16 +0800 Subject: [PATCH 0235/1368] macvtap: do not zerocopy if iov needs more pages than MAX_SKB_FRAGS [ Upstream commit ece793fcfc417b3925844be88a6a6dc82ae8f7c6 ] We try to linearize part of the skb when the number of iov is greater than MAX_SKB_FRAGS. This is not enough since each single vector may occupy more than one pages, so zerocopy_sg_fromiovec() may still fail and may break the guest network. Solve this problem by calculate the pages needed for iov before trying to do zerocopy and switch to use copy instead of zerocopy if it needs more than MAX_SKB_FRAGS. This is done through introducing a new helper to count the pages for iov, and call uarg->callback() manually when switching from zerocopy to copy to notify vhost. We can do further optimization on top. This bug were introduced from b92946e2919134ebe2a4083e4302236295ea2a73 (macvtap: zerocopy: validate vectors before building skb). Cc: Michael S. Tsirkin Signed-off-by: Jason Wang Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/macvtap.c | 62 ++++++++++++++++++++++++++----------------- 1 file changed, 37 insertions(+), 25 deletions(-) diff --git a/drivers/net/macvtap.c b/drivers/net/macvtap.c index 502d9486395f..523d6b2426a8 100644 --- a/drivers/net/macvtap.c +++ b/drivers/net/macvtap.c @@ -633,6 +633,28 @@ static int macvtap_skb_to_vnet_hdr(const struct sk_buff *skb, return 0; } +static unsigned long iov_pages(const struct iovec *iv, int offset, + unsigned long nr_segs) +{ + unsigned long seg, base; + int pages = 0, len, size; + + while (nr_segs && (offset >= iv->iov_len)) { + offset -= iv->iov_len; + ++iv; + --nr_segs; + } + + for (seg = 0; seg < nr_segs; seg++) { + base = (unsigned long)iv[seg].iov_base + offset; + len = iv[seg].iov_len - offset; + size = ((base & ~PAGE_MASK) + len + ~PAGE_MASK) >> PAGE_SHIFT; + pages += size; + offset = 0; + } + + return pages; +} /* Get packet from user space buffer */ static ssize_t macvtap_get_user(struct macvtap_queue *q, struct msghdr *m, @@ -679,31 +701,15 @@ static ssize_t macvtap_get_user(struct macvtap_queue *q, struct msghdr *m, if (unlikely(count > UIO_MAXIOV)) goto err; - if (m && m->msg_control && sock_flag(&q->sk, SOCK_ZEROCOPY)) - zerocopy = true; - - if (zerocopy) { - /* Userspace may produce vectors with count greater than - * MAX_SKB_FRAGS, so we need to linearize parts of the skb - * to let the rest of data to be fit in the frags. - */ - if (count > MAX_SKB_FRAGS) { - copylen = iov_length(iv, count - MAX_SKB_FRAGS); - if (copylen < vnet_hdr_len) - copylen = 0; - else - copylen -= vnet_hdr_len; - } - /* There are 256 bytes to be copied in skb, so there is enough - * room for skb expand head in case it is used. - * The rest buffer is mapped from userspace. - */ - if (copylen < vnet_hdr.hdr_len) - copylen = vnet_hdr.hdr_len; - if (!copylen) - copylen = GOODCOPY_LEN; + if (m && m->msg_control && sock_flag(&q->sk, SOCK_ZEROCOPY)) { + copylen = vnet_hdr.hdr_len ? vnet_hdr.hdr_len : GOODCOPY_LEN; linear = copylen; - } else { + if (iov_pages(iv, vnet_hdr_len + copylen, count) + <= MAX_SKB_FRAGS) + zerocopy = true; + } + + if (!zerocopy) { copylen = len; linear = vnet_hdr.hdr_len; } @@ -715,9 +721,15 @@ static ssize_t macvtap_get_user(struct macvtap_queue *q, struct msghdr *m, if (zerocopy) err = zerocopy_sg_from_iovec(skb, iv, vnet_hdr_len, count); - else + else { err = skb_copy_datagram_from_iovec(skb, 0, iv, vnet_hdr_len, len); + if (!err && m && m->msg_control) { + struct ubuf_info *uarg = m->msg_control; + uarg->callback(uarg, false); + } + } + if (err) goto err_kfree; From 37b25f3f99d52710de567c5ff880824bb576121c Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 18 Jul 2013 07:19:26 -0700 Subject: [PATCH 0236/1368] vlan: mask vlan prio bits [ Upstream commit d4b812dea4a236f729526facf97df1a9d18e191c ] In commit 48cc32d38a52d0b68f91a171a8d00531edc6a46e ("vlan: don't deliver frames for unknown vlans to protocols") Florian made sure we set pkt_type to PACKET_OTHERHOST if the vlan id is set and we could find a vlan device for this particular id. But we also have a problem if prio bits are set. Steinar reported an issue on a router receiving IPv6 frames with a vlan tag of 4000 (id 0, prio 2), and tunneled into a sit device, because skb->vlan_tci is set. Forwarded frame is completely corrupted : We can see (8100:4000) being inserted in the middle of IPv6 source address : 16:48:00.780413 IP6 2001:16d8:8100:4000:ee1c:0:9d9:bc87 > 9f94:4d95:2001:67c:29f4::: ICMP6, unknown icmp6 type (0), length 64 0x0000: 0000 0029 8000 c7c3 7103 0001 a0ae e651 0x0010: 0000 0000 ccce 0b00 0000 0000 1011 1213 0x0020: 1415 1617 1819 1a1b 1c1d 1e1f 2021 2223 0x0030: 2425 2627 2829 2a2b 2c2d 2e2f 3031 3233 It seems we are not really ready to properly cope with this right now. We can probably do better in future kernels : vlan_get_ingress_priority() should be a netdev property instead of a per vlan_dev one. For stable kernels, lets clear vlan_tci to fix the bugs. Reported-by: Steinar H. Gunderson Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- include/linux/if_vlan.h | 3 +-- net/8021q/vlan_core.c | 2 +- net/core/dev.c | 11 +++++++++-- 3 files changed, 11 insertions(+), 5 deletions(-) diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h index 637fa71de0c7..0b3498800ba0 100644 --- a/include/linux/if_vlan.h +++ b/include/linux/if_vlan.h @@ -79,9 +79,8 @@ static inline int is_vlan_dev(struct net_device *dev) } #define vlan_tx_tag_present(__skb) ((__skb)->vlan_tci & VLAN_TAG_PRESENT) -#define vlan_tx_nonzero_tag_present(__skb) \ - (vlan_tx_tag_present(__skb) && ((__skb)->vlan_tci & VLAN_VID_MASK)) #define vlan_tx_tag_get(__skb) ((__skb)->vlan_tci & ~VLAN_TAG_PRESENT) +#define vlan_tx_tag_get_id(__skb) ((__skb)->vlan_tci & VLAN_VID_MASK) #if defined(CONFIG_VLAN_8021Q) || defined(CONFIG_VLAN_8021Q_MODULE) diff --git a/net/8021q/vlan_core.c b/net/8021q/vlan_core.c index 8a15eaadc4bd..4a78c4de9f20 100644 --- a/net/8021q/vlan_core.c +++ b/net/8021q/vlan_core.c @@ -9,7 +9,7 @@ bool vlan_do_receive(struct sk_buff **skbp) { struct sk_buff *skb = *skbp; __be16 vlan_proto = skb->vlan_proto; - u16 vlan_id = skb->vlan_tci & VLAN_VID_MASK; + u16 vlan_id = vlan_tx_tag_get_id(skb); struct net_device *vlan_dev; struct vlan_pcpu_stats *rx_stats; diff --git a/net/core/dev.c b/net/core/dev.c index faebb398fb46..7ddbb31b10d3 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -3513,8 +3513,15 @@ static int __netif_receive_skb_core(struct sk_buff *skb, bool pfmemalloc) } } - if (vlan_tx_nonzero_tag_present(skb)) - skb->pkt_type = PACKET_OTHERHOST; + if (unlikely(vlan_tx_tag_present(skb))) { + if (vlan_tx_tag_get_id(skb)) + skb->pkt_type = PACKET_OTHERHOST; + /* Note: we might in the future use prio bits + * and set skb->priority like in vlan_do_receive() + * For the time being, just ignore Priority Code Point + */ + skb->vlan_tci = 0; + } /* deliver only exact match when indicated */ null_or_dev = deliver_exact ? skb->dev : NULL; From 5110890cbfa0c783cb727d6a2fbf36ce8560f9a5 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 18 Jul 2013 09:35:10 -0700 Subject: [PATCH 0237/1368] vlan: fix a race in egress prio management [ Upstream commit 3e3aac497513c669e1c62c71e1d552ea85c1d974 ] egress_priority_map[] hash table updates are protected by rtnl, and we never remove elements until device is dismantled. We have to make sure that before inserting an new element in hash table, all its fields are committed to memory or else another cpu could find corrupt values and crash. Signed-off-by: Eric Dumazet Cc: Patrick McHardy Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/8021q/vlan_dev.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c index 3a8c8fd63c88..1cd3d2a406f5 100644 --- a/net/8021q/vlan_dev.c +++ b/net/8021q/vlan_dev.c @@ -73,6 +73,8 @@ vlan_dev_get_egress_qos_mask(struct net_device *dev, struct sk_buff *skb) { struct vlan_priority_tci_mapping *mp; + smp_rmb(); /* coupled with smp_wmb() in vlan_dev_set_egress_priority() */ + mp = vlan_dev_priv(dev)->egress_priority_map[(skb->priority & 0xF)]; while (mp) { if (mp->priority == skb->priority) { @@ -249,6 +251,11 @@ int vlan_dev_set_egress_priority(const struct net_device *dev, np->next = mp; np->priority = skb_prio; np->vlan_qos = vlan_qos; + /* Before inserting this element in hash table, make sure all its fields + * are committed to memory. + * coupled with smp_rmb() in vlan_dev_get_egress_qos_mask() + */ + smp_wmb(); vlan->egress_priority_map[skb_prio & 0xF] = np; if (vlan_qos) vlan->nr_egress_mappings++; From cf6a37e7da73702e865a02f03664fc0e2c4a2b3f Mon Sep 17 00:00:00 2001 From: Ralf Baechle Date: Thu, 13 Jun 2013 01:29:24 +0200 Subject: [PATCH 0238/1368] MIPS: Oceton: Fix build error. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 39205750efa6d335fac4f9bcd32b49c7e71c12b7 upstream. If CONFIG_CAVIUM_OCTEON_LOCK_L2_TLB, CONFIG_CAVIUM_OCTEON_LOCK_L2_EXCEPTION, CONFIG_CAVIUM_OCTEON_LOCK_L2_LOW_LEVEL_INTERRUPT and CONFIG_CAVIUM_OCTEON_LOCK_L2_INTERRUPT are all undefined: arch/mips/cavium-octeon/setup.c: In function ‘prom_init’: arch/mips/cavium-octeon/setup.c:715:12: error: unused variable ‘ebase’ [-Werror=unused-variable] Signed-off-by: Ralf Baechle Signed-off-by: Greg Kroah-Hartman --- arch/mips/cavium-octeon/setup.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/mips/cavium-octeon/setup.c b/arch/mips/cavium-octeon/setup.c index 1e1e18c5a534..2a75ff249e71 100644 --- a/arch/mips/cavium-octeon/setup.c +++ b/arch/mips/cavium-octeon/setup.c @@ -7,6 +7,7 @@ * Copyright (C) 2008, 2009 Wind River Systems * written by Ralf Baechle */ +#include #include #include #include @@ -712,7 +713,7 @@ void __init prom_init(void) if (cvmx_read_csr(CVMX_L2D_FUS3) & (3ull << 34)) { pr_info("Skipping L2 locking due to reduced L2 cache size\n"); } else { - uint32_t ebase = read_c0_ebase() & 0x3ffff000; + uint32_t __maybe_unused ebase = read_c0_ebase() & 0x3ffff000; #ifdef CONFIG_CAVIUM_OCTEON_LOCK_L2_TLB /* TLB refill */ cvmx_l2c_lock_mem_region(ebase, 0x100); From df18f5f93ce9ed27bd9a779196614afd7a0aa017 Mon Sep 17 00:00:00 2001 From: Ralf Baechle Date: Thu, 13 Jun 2013 02:45:53 +0200 Subject: [PATCH 0239/1368] RAPIDIO: IDT_GEN2: Fix build error. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 27f62b9f294b7e2019c94c385abda43a0af6bb8b upstream. CC drivers/rapidio/switches/idt_gen2.o drivers/rapidio/switches/idt_gen2.c: In function ‘idtg2_show_errlog’: drivers/rapidio/switches/idt_gen2.c:379:30: error: ‘PAGE_SIZE’ undeclared (first use in this function) drivers/rapidio/switches/idt_gen2.c:379:30: note: each undeclared identifier is reported only once for each function it appears in Signed-off-by: Ralf Baechle Acked-by: Alexandre Bounine Signed-off-by: Greg Kroah-Hartman --- drivers/rapidio/switches/idt_gen2.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/rapidio/switches/idt_gen2.c b/drivers/rapidio/switches/idt_gen2.c index 809b7a3336ba..5d3b0f014d35 100644 --- a/drivers/rapidio/switches/idt_gen2.c +++ b/drivers/rapidio/switches/idt_gen2.c @@ -15,6 +15,8 @@ #include #include #include + +#include #include "../rio.h" #define LOCAL_RTE_CONF_DESTID_SEL 0x010070 From 05ac7b3a7db7823aecde55df6a5f8d6bb45693f4 Mon Sep 17 00:00:00 2001 From: Niels de Vos Date: Wed, 17 Jul 2013 14:53:53 +0200 Subject: [PATCH 0240/1368] fuse: readdirplus: fix dentry leak commit 53ce9a3364de0723b27d861de93bfc882f7db050 upstream. In case d_lookup() returns a dentry with d_inode == NULL, the dentry is not returned with dput(). This results in triggering a BUG() in shrink_dcache_for_umount_subtree(): BUG: Dentry ...{i=0,n=...} still in use (1) [unmount of fuse fuse] [SzM: need to d_drop() as well] Reported-by: Justin Clift Signed-off-by: Niels de Vos Signed-off-by: Miklos Szeredi Tested-by: Brian Foster Tested-by: Niels de Vos Signed-off-by: Greg Kroah-Hartman --- fs/fuse/dir.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index f3f783dc4f75..5532bf416f9b 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c @@ -1229,9 +1229,15 @@ static int fuse_direntplus_link(struct file *file, name.hash = full_name_hash(name.name, name.len); dentry = d_lookup(parent, &name); - if (dentry && dentry->d_inode) { + if (dentry) { inode = dentry->d_inode; - if (get_node_id(inode) == o->nodeid) { + if (!inode) { + d_drop(dentry); + } else if (get_node_id(inode) != o->nodeid) { + err = d_invalidate(dentry); + if (err) + goto out; + } else { struct fuse_inode *fi; fi = get_fuse_inode(inode); spin_lock(&fc->lock); @@ -1244,9 +1250,6 @@ static int fuse_direntplus_link(struct file *file, */ goto found; } - err = d_invalidate(dentry); - if (err) - goto out; dput(dentry); dentry = NULL; } From dc2a6c2d3e9a2529b3ffc70c165cbb10c85768f5 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Wed, 17 Jul 2013 14:53:53 +0200 Subject: [PATCH 0241/1368] fuse: readdirplus: fix instantiate commit 2914941e3178d84a216fc4eb85292dfef3b6d628 upstream. Fuse does instantiation slightly differently from NFS/CIFS which use d_materialise_unique(). Signed-off-by: Miklos Szeredi Signed-off-by: Greg Kroah-Hartman --- fs/fuse/dir.c | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index 5532bf416f9b..a71df0206f6f 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c @@ -1264,10 +1264,19 @@ static int fuse_direntplus_link(struct file *file, if (!inode) goto out; - alias = d_materialise_unique(dentry, inode); - err = PTR_ERR(alias); - if (IS_ERR(alias)) - goto out; + if (S_ISDIR(inode->i_mode)) { + mutex_lock(&fc->inst_mutex); + alias = fuse_d_add_directory(dentry, inode); + mutex_unlock(&fc->inst_mutex); + err = PTR_ERR(alias); + if (IS_ERR(alias)) { + iput(inode); + goto out; + } + } else { + alias = d_splice_alias(inode, dentry); + } + if (alias) { dput(dentry); dentry = alias; From 223828d8a6ef53f7671351fe5d08f18dca714830 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Wed, 17 Jul 2013 14:53:53 +0200 Subject: [PATCH 0242/1368] fuse: readdirplus: sanity checks commit a28ef45cbb1e7fadd5159deb17b02de15c6e4aaf upstream. Add sanity checks before adding or updating an entry with data received from readdirplus. Signed-off-by: Miklos Szeredi Signed-off-by: Greg Kroah-Hartman --- fs/fuse/dir.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index a71df0206f6f..5b1274699b08 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c @@ -1225,6 +1225,12 @@ static int fuse_direntplus_link(struct file *file, if (name.name[1] == '.' && name.len == 2) return 0; } + + if (invalid_nodeid(o->nodeid)) + return -EIO; + if (!fuse_valid_type(o->attr.mode)) + return -EIO; + fc = get_fuse_conn(dir); name.hash = full_name_hash(name.name, name.len); @@ -1233,10 +1239,14 @@ static int fuse_direntplus_link(struct file *file, inode = dentry->d_inode; if (!inode) { d_drop(dentry); - } else if (get_node_id(inode) != o->nodeid) { + } else if (get_node_id(inode) != o->nodeid || + ((o->attr.mode ^ inode->i_mode) & S_IFMT)) { err = d_invalidate(dentry); if (err) goto out; + } else if (is_bad_inode(inode)) { + err = -EIO; + goto out; } else { struct fuse_inode *fi; fi = get_fuse_inode(inode); From c0d8f455600820eb05208b57c50d4e90a58c4d96 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Wed, 10 Jul 2013 18:04:21 -0700 Subject: [PATCH 0243/1368] bcache: Fix a dumb race commit 6aa8f1a6ca41c49721d2de4e048d3da8d06411f9 upstream. In the far-too-complicated closure code - closures can have destructors, for probably dubious reasons; they get run after the closure is no longer waiting on anything but before dropping the parent ref, intended just for freeing whatever memory the closure is embedded in. Trouble is, when remaining goes to 0 and we've got nothing more to run - we also have to unlock the closure, setting remaining to -1. If there's a destructor, that unlock isn't doing anything - nobody could be trying to lock it if we're about to free it - but if the unlock _is needed... that check for a destructor was racy. Argh. Signed-off-by: Kent Overstreet Signed-off-by: Greg Kroah-Hartman --- drivers/md/bcache/closure.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/md/bcache/closure.c b/drivers/md/bcache/closure.c index bd05a9a8c7cf..9aba2017f0d1 100644 --- a/drivers/md/bcache/closure.c +++ b/drivers/md/bcache/closure.c @@ -66,16 +66,18 @@ static inline void closure_put_after_sub(struct closure *cl, int flags) } else { struct closure *parent = cl->parent; struct closure_waitlist *wait = closure_waitlist(cl); + closure_fn *destructor = cl->fn; closure_debug_destroy(cl); + smp_mb(); atomic_set(&cl->remaining, -1); if (wait) closure_wake_up(wait); - if (cl->fn) - cl->fn(cl); + if (destructor) + destructor(cl); if (parent) closure_put(parent); From 3fcbc17636c83da9d85e2604de4af56c215e6e3c Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Wed, 10 Jul 2013 18:44:40 -0700 Subject: [PATCH 0244/1368] bcache: Advertise that flushes are supported commit 54d12f2b4fd0f218590d1490b41a18d0e2328a9a upstream. Whoops - bcache's flush/FUA was mostly correct, but flushes get filtered out unless we say we support them... Signed-off-by: Kent Overstreet Signed-off-by: Greg Kroah-Hartman --- drivers/md/bcache/request.c | 8 +++++++- drivers/md/bcache/super.c | 2 ++ 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c index e5ff12e52d5b..2f36743ce708 100644 --- a/drivers/md/bcache/request.c +++ b/drivers/md/bcache/request.c @@ -489,6 +489,12 @@ static void bch_insert_data_loop(struct closure *cl) bch_queue_gc(op->c); } + /* + * Journal writes are marked REQ_FLUSH; if the original write was a + * flush, it'll wait on the journal write. + */ + bio->bi_rw &= ~(REQ_FLUSH|REQ_FUA); + do { unsigned i; struct bkey *k; @@ -716,7 +722,7 @@ static struct search *search_alloc(struct bio *bio, struct bcache_device *d) s->task = current; s->orig_bio = bio; s->write = (bio->bi_rw & REQ_WRITE) != 0; - s->op.flush_journal = (bio->bi_rw & REQ_FLUSH) != 0; + s->op.flush_journal = (bio->bi_rw & (REQ_FLUSH|REQ_FUA)) != 0; s->op.skip = (bio->bi_rw & REQ_DISCARD) != 0; s->recoverable = 1; s->start_time = jiffies; diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c index f88e2b653a3f..a7d9828b01d1 100644 --- a/drivers/md/bcache/super.c +++ b/drivers/md/bcache/super.c @@ -781,6 +781,8 @@ static int bcache_device_init(struct bcache_device *d, unsigned block_size) set_bit(QUEUE_FLAG_NONROT, &d->disk->queue->queue_flags); set_bit(QUEUE_FLAG_DISCARD, &d->disk->queue->queue_flags); + blk_queue_flush(q, REQ_FLUSH|REQ_FUA); + return 0; } From 63a53870bb23c2d991f378ae9137d775978d5ae9 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Wed, 10 Jul 2013 21:03:25 -0700 Subject: [PATCH 0245/1368] bcache: Shutdown fix commit 5caa52afc5abd1396e4af720469abb5843a71eb8 upstream. Stopping a cache set is supposed to make it stop attached backing devices, but somewhere along the way that code got lost. Fixing this mainly has the effect of fixing our reboot notifier. Signed-off-by: Kent Overstreet Signed-off-by: Greg Kroah-Hartman --- drivers/md/bcache/super.c | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c index a7d9828b01d1..88113fe57fed 100644 --- a/drivers/md/bcache/super.c +++ b/drivers/md/bcache/super.c @@ -1305,18 +1305,22 @@ static void cache_set_flush(struct closure *cl) static void __cache_set_unregister(struct closure *cl) { struct cache_set *c = container_of(cl, struct cache_set, caching); - struct cached_dev *dc, *t; + struct cached_dev *dc; size_t i; mutex_lock(&bch_register_lock); - if (test_bit(CACHE_SET_UNREGISTERING, &c->flags)) - list_for_each_entry_safe(dc, t, &c->cached_devs, list) - bch_cached_dev_detach(dc); - for (i = 0; i < c->nr_uuids; i++) - if (c->devices[i] && UUID_FLASH_ONLY(&c->uuids[i])) - bcache_device_stop(c->devices[i]); + if (c->devices[i]) { + if (!UUID_FLASH_ONLY(&c->uuids[i]) && + test_bit(CACHE_SET_UNREGISTERING, &c->flags)) { + dc = container_of(c->devices[i], + struct cached_dev, disk); + bch_cached_dev_detach(dc); + } else { + bcache_device_stop(c->devices[i]); + } + } mutex_unlock(&bch_register_lock); From fe1b710530e60c8fbb07a73ad27e9f65305ce06a Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Wed, 10 Jul 2013 21:25:02 -0700 Subject: [PATCH 0246/1368] bcache: Fix a sysfs splat on shutdown commit c9502ea4424b31728703d113fc6b30bfead14633 upstream. If we stopped a bcache device when we were already detaching (or something like that), bcache_device_unlink() would try to remove a symlink from sysfs that was already gone because the bcache dev kobject had already been removed from sysfs. So keep track of whether we've removed stuff from sysfs. Signed-off-by: Kent Overstreet Signed-off-by: Greg Kroah-Hartman --- drivers/md/bcache/bcache.h | 1 + drivers/md/bcache/super.c | 11 ++++++++++- 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h index d3e15b42a4ab..c42b14b2304c 100644 --- a/drivers/md/bcache/bcache.h +++ b/drivers/md/bcache/bcache.h @@ -437,6 +437,7 @@ struct bcache_device { /* If nonzero, we're detaching/unregistering from cache set */ atomic_t detaching; + int flush_done; atomic_long_t sectors_dirty; unsigned long sectors_dirty_gc; diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c index 88113fe57fed..b4713cea1913 100644 --- a/drivers/md/bcache/super.c +++ b/drivers/md/bcache/super.c @@ -704,7 +704,8 @@ static void bcache_device_detach(struct bcache_device *d) atomic_set(&d->detaching, 0); } - bcache_device_unlink(d); + if (!d->flush_done) + bcache_device_unlink(d); d->c->devices[d->id] = NULL; closure_put(&d->c->caching); @@ -1016,6 +1017,14 @@ static void cached_dev_flush(struct closure *cl) struct cached_dev *dc = container_of(cl, struct cached_dev, disk.cl); struct bcache_device *d = &dc->disk; + mutex_lock(&bch_register_lock); + d->flush_done = 1; + + if (d->c) + bcache_device_unlink(d); + + mutex_unlock(&bch_register_lock); + bch_cache_accounting_destroy(&dc->accounting); kobject_del(&d->kobj); From 99e56bf5abce5131746397a2ce551fd92ca8f342 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Thu, 11 Jul 2013 19:43:21 -0700 Subject: [PATCH 0247/1368] bcache: Fix GC_SECTORS_USED() calculation commit 29ebf465b9050f241c4433a796a32e6c896a9dcd upstream. Part of the job of garbage collection is to add up however many sectors of live data it finds in each bucket, but that doesn't work very well if it doesn't reset GC_SECTORS_USED() when it starts. Whoops. This wouldn't have broken anything horribly, but allocation tries to preferentially reclaim buckets that are mostly empty and that's not gonna work with an incorrect GC_SECTORS_USED() value. Signed-off-by: Kent Overstreet Signed-off-by: Greg Kroah-Hartman --- drivers/md/bcache/btree.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c index 7a5658f04e62..7b687a6f3dec 100644 --- a/drivers/md/bcache/btree.c +++ b/drivers/md/bcache/btree.c @@ -1419,8 +1419,10 @@ static void btree_gc_start(struct cache_set *c) for_each_cache(ca, c, i) for_each_bucket(b, ca) { b->gc_gen = b->gen; - if (!atomic_read(&b->pin)) + if (!atomic_read(&b->pin)) { SET_GC_MARK(b, GC_MARK_RECLAIMABLE); + SET_GC_SECTORS_USED(b, 0); + } } for (d = c->devices; From 5e20e8b3713b8a5941891db2626c9eecd3c0888c Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Thu, 11 Jul 2013 22:42:14 -0700 Subject: [PATCH 0248/1368] bcache: Journal replay fix commit faa5673617656ee58369a3cfe4a312cfcdc59c81 upstream. The journal replay code starts by finding something that looks like a valid journal entry, then it does a binary search over the unchecked region of the journal for the journal entries with the highest sequence numbers. Trouble is, the logic was wrong - journal_read_bucket() returns true if it found journal entries we need, but if the range of journal entries we're looking for loops around the end of the journal - in that case journal_read_bucket() could return true when it hadn't found the highest sequence number we'd seen yet, and in that case the binary search did the wrong thing. Whoops. Signed-off-by: Kent Overstreet Signed-off-by: Greg Kroah-Hartman --- drivers/md/bcache/journal.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/md/bcache/journal.c b/drivers/md/bcache/journal.c index 8c8dfdcd9d4c..8a54d3b4f517 100644 --- a/drivers/md/bcache/journal.c +++ b/drivers/md/bcache/journal.c @@ -182,9 +182,14 @@ int bch_journal_read(struct cache_set *c, struct list_head *list, pr_debug("starting binary search, l %u r %u", l, r); while (l + 1 < r) { - m = (l + r) >> 1; + seq = list_entry(list->prev, struct journal_replay, + list)->j.seq; - if (read_bucket(m)) + m = (l + r) >> 1; + read_bucket(m); + + if (seq != list_entry(list->prev, struct journal_replay, + list)->j.seq) l = m; else r = m; From f46ef77da1b61d05dc5f7cbb111df53d92b887a2 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Fri, 19 Jul 2013 12:28:25 +0200 Subject: [PATCH 0249/1368] EDAC: Fix lockdep splat commit 88d84ac97378c2f1d5fec9af1e8b7d9a662d6b00 upstream. Fix the following: BUG: key ffff88043bdd0330 not in .data! ------------[ cut here ]------------ WARNING: at kernel/lockdep.c:2987 lockdep_init_map+0x565/0x5a0() DEBUG_LOCKS_WARN_ON(1) Modules linked in: glue_helper sb_edac(+) edac_core snd acpi_cpufreq lrw gf128mul ablk_helper iTCO_wdt evdev i2c_i801 dcdbas button cryptd pcspkr iTCO_vendor_support usb_common lpc_ich mfd_core soundcore mperf processor microcode CPU: 2 PID: 599 Comm: modprobe Not tainted 3.10.0 #1 Hardware name: Dell Inc. Precision T3600/0PTTT9, BIOS A08 01/24/2013 0000000000000009 ffff880439a1d920 ffffffff8160a9a9 ffff880439a1d958 ffffffff8103d9e0 ffff88043af4a510 ffffffff81a16e11 0000000000000000 ffff88043bdd0330 0000000000000000 ffff880439a1d9b8 ffffffff8103dacc Call Trace: dump_stack warn_slowpath_common warn_slowpath_fmt lockdep_init_map ? trace_hardirqs_on_caller ? trace_hardirqs_on debug_mutex_init __mutex_init bus_register edac_create_sysfs_mci_device edac_mc_add_mc sbridge_probe pci_device_probe driver_probe_device __driver_attach ? driver_probe_device bus_for_each_dev driver_attach bus_add_driver driver_register __pci_register_driver ? 0xffffffffa0010fff sbridge_init ? 0xffffffffa0010fff do_one_initcall load_module ? unset_module_init_ro_nx SyS_init_module tracesys ---[ end trace d24a70b0d3ddf733 ]--- EDAC MC0: Giving out device to 'sbridge_edac.c' 'Sandy Bridge Socket#0': DEV 0000:3f:0e.0 EDAC sbridge: Driver loaded. What happens is that bus_register needs a statically allocated lock_key because the last is handed in to lockdep. However, struct mem_ctl_info embeds struct bus_type (the whole struct, not a pointer to it) and the whole thing gets dynamically allocated. Fix this by using a statically allocated struct bus_type for the MC bus. Signed-off-by: Borislav Petkov Acked-by: Mauro Carvalho Chehab Cc: Markus Trippelsdorf Signed-off-by: Tony Luck Signed-off-by: Greg Kroah-Hartman --- drivers/edac/edac_mc.c | 9 +++++++++ drivers/edac/edac_mc_sysfs.c | 28 +++++++++++++++------------- drivers/edac/i5100_edac.c | 2 +- include/linux/edac.h | 7 ++++++- 4 files changed, 31 insertions(+), 15 deletions(-) diff --git a/drivers/edac/edac_mc.c b/drivers/edac/edac_mc.c index 27e86d938262..89e109022d78 100644 --- a/drivers/edac/edac_mc.c +++ b/drivers/edac/edac_mc.c @@ -48,6 +48,8 @@ static LIST_HEAD(mc_devices); */ static void const *edac_mc_owner; +static struct bus_type mc_bus[EDAC_MAX_MCS]; + unsigned edac_dimm_info_location(struct dimm_info *dimm, char *buf, unsigned len) { @@ -723,6 +725,11 @@ int edac_mc_add_mc(struct mem_ctl_info *mci) int ret = -EINVAL; edac_dbg(0, "\n"); + if (mci->mc_idx >= EDAC_MAX_MCS) { + pr_warn_once("Too many memory controllers: %d\n", mci->mc_idx); + return -ENODEV; + } + #ifdef CONFIG_EDAC_DEBUG if (edac_debug_level >= 3) edac_mc_dump_mci(mci); @@ -762,6 +769,8 @@ int edac_mc_add_mc(struct mem_ctl_info *mci) /* set load time so that error rate can be tracked */ mci->start_time = jiffies; + mci->bus = &mc_bus[mci->mc_idx]; + if (edac_create_sysfs_mci_device(mci)) { edac_mc_printk(mci, KERN_WARNING, "failed to create sysfs device\n"); diff --git a/drivers/edac/edac_mc_sysfs.c b/drivers/edac/edac_mc_sysfs.c index 67610a6ebf87..c4d700a577d2 100644 --- a/drivers/edac/edac_mc_sysfs.c +++ b/drivers/edac/edac_mc_sysfs.c @@ -370,7 +370,7 @@ static int edac_create_csrow_object(struct mem_ctl_info *mci, return -ENODEV; csrow->dev.type = &csrow_attr_type; - csrow->dev.bus = &mci->bus; + csrow->dev.bus = mci->bus; device_initialize(&csrow->dev); csrow->dev.parent = &mci->dev; csrow->mci = mci; @@ -605,7 +605,7 @@ static int edac_create_dimm_object(struct mem_ctl_info *mci, dimm->mci = mci; dimm->dev.type = &dimm_attr_type; - dimm->dev.bus = &mci->bus; + dimm->dev.bus = mci->bus; device_initialize(&dimm->dev); dimm->dev.parent = &mci->dev; @@ -975,11 +975,13 @@ int edac_create_sysfs_mci_device(struct mem_ctl_info *mci) * The memory controller needs its own bus, in order to avoid * namespace conflicts at /sys/bus/edac. */ - mci->bus.name = kasprintf(GFP_KERNEL, "mc%d", mci->mc_idx); - if (!mci->bus.name) + mci->bus->name = kasprintf(GFP_KERNEL, "mc%d", mci->mc_idx); + if (!mci->bus->name) return -ENOMEM; - edac_dbg(0, "creating bus %s\n", mci->bus.name); - err = bus_register(&mci->bus); + + edac_dbg(0, "creating bus %s\n", mci->bus->name); + + err = bus_register(mci->bus); if (err < 0) return err; @@ -988,7 +990,7 @@ int edac_create_sysfs_mci_device(struct mem_ctl_info *mci) device_initialize(&mci->dev); mci->dev.parent = mci_pdev; - mci->dev.bus = &mci->bus; + mci->dev.bus = mci->bus; dev_set_name(&mci->dev, "mc%d", mci->mc_idx); dev_set_drvdata(&mci->dev, mci); pm_runtime_forbid(&mci->dev); @@ -997,8 +999,8 @@ int edac_create_sysfs_mci_device(struct mem_ctl_info *mci) err = device_add(&mci->dev); if (err < 0) { edac_dbg(1, "failure: create device %s\n", dev_name(&mci->dev)); - bus_unregister(&mci->bus); - kfree(mci->bus.name); + bus_unregister(mci->bus); + kfree(mci->bus->name); return err; } @@ -1064,8 +1066,8 @@ int edac_create_sysfs_mci_device(struct mem_ctl_info *mci) } fail2: device_unregister(&mci->dev); - bus_unregister(&mci->bus); - kfree(mci->bus.name); + bus_unregister(mci->bus); + kfree(mci->bus->name); return err; } @@ -1098,8 +1100,8 @@ void edac_unregister_sysfs(struct mem_ctl_info *mci) { edac_dbg(1, "Unregistering device %s\n", dev_name(&mci->dev)); device_unregister(&mci->dev); - bus_unregister(&mci->bus); - kfree(mci->bus.name); + bus_unregister(mci->bus); + kfree(mci->bus->name); } static void mc_attr_release(struct device *dev) diff --git a/drivers/edac/i5100_edac.c b/drivers/edac/i5100_edac.c index 1b635178cc44..157b934e8ce3 100644 --- a/drivers/edac/i5100_edac.c +++ b/drivers/edac/i5100_edac.c @@ -974,7 +974,7 @@ static int i5100_setup_debugfs(struct mem_ctl_info *mci) if (!i5100_debugfs) return -ENODEV; - priv->debugfs = debugfs_create_dir(mci->bus.name, i5100_debugfs); + priv->debugfs = debugfs_create_dir(mci->bus->name, i5100_debugfs); if (!priv->debugfs) return -ENOMEM; diff --git a/include/linux/edac.h b/include/linux/edac.h index 0b763276f619..5c6d7fbaf89e 100644 --- a/include/linux/edac.h +++ b/include/linux/edac.h @@ -622,7 +622,7 @@ struct edac_raw_error_desc { */ struct mem_ctl_info { struct device dev; - struct bus_type bus; + struct bus_type *bus; struct list_head link; /* for global list of mem_ctl_info structs */ @@ -742,4 +742,9 @@ struct mem_ctl_info { #endif }; +/* + * Maximum number of memory controllers in the coherent fabric. + */ +#define EDAC_MAX_MCS 16 + #endif From 72ffadbae16297ac86e31c759450e0e395209903 Mon Sep 17 00:00:00 2001 From: Sreekanth Reddy Date: Sat, 29 Jun 2013 03:53:02 +0530 Subject: [PATCH 0250/1368] SCSI: mpt3sas: Infinite loops can occur if MPI2_IOCSTATUS_CONFIG_INVALID_PAGE is not returned commit 14be49ac965ebd3f8561d57e01ddb22f93f9b454 upstream. Infinite loop can occur if IOCStatus is not equal to MPI2_IOCSTATUS_CONFIG_INVALID_PAGE value in the while loops in functions _scsih_search_responding_sas_devices, _scsih_search_responding_raid_devices and _scsih_search_responding_expanders So, Instead of checking for MPI2_IOCSTATUS_CONFIG_INVALID_PAGE value, in this patch code is modified to check for IOCStatus not equals to MPI2_IOCSTATUS_SUCCESS to break the while loop. Signed-off-by: Sreekanth Reddy Signed-off-by: James Bottomley Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/mpt3sas/mpt3sas_scsih.c | 16 ++++------------ 1 file changed, 4 insertions(+), 12 deletions(-) diff --git a/drivers/scsi/mpt3sas/mpt3sas_scsih.c b/drivers/scsi/mpt3sas/mpt3sas_scsih.c index dcbf7c880cb2..1fb5c6bcb786 100644 --- a/drivers/scsi/mpt3sas/mpt3sas_scsih.c +++ b/drivers/scsi/mpt3sas/mpt3sas_scsih.c @@ -6392,7 +6392,7 @@ _scsih_search_responding_sas_devices(struct MPT3SAS_ADAPTER *ioc) handle))) { ioc_status = le16_to_cpu(mpi_reply.IOCStatus) & MPI2_IOCSTATUS_MASK; - if (ioc_status == MPI2_IOCSTATUS_CONFIG_INVALID_PAGE) + if (ioc_status != MPI2_IOCSTATUS_SUCCESS) break; handle = le16_to_cpu(sas_device_pg0.DevHandle); device_info = le32_to_cpu(sas_device_pg0.DeviceInfo); @@ -6494,7 +6494,7 @@ _scsih_search_responding_raid_devices(struct MPT3SAS_ADAPTER *ioc) &volume_pg1, MPI2_RAID_VOLUME_PGAD_FORM_GET_NEXT_HANDLE, handle))) { ioc_status = le16_to_cpu(mpi_reply.IOCStatus) & MPI2_IOCSTATUS_MASK; - if (ioc_status == MPI2_IOCSTATUS_CONFIG_INVALID_PAGE) + if (ioc_status != MPI2_IOCSTATUS_SUCCESS) break; handle = le16_to_cpu(volume_pg1.DevHandle); @@ -6518,7 +6518,7 @@ _scsih_search_responding_raid_devices(struct MPT3SAS_ADAPTER *ioc) phys_disk_num))) { ioc_status = le16_to_cpu(mpi_reply.IOCStatus) & MPI2_IOCSTATUS_MASK; - if (ioc_status == MPI2_IOCSTATUS_CONFIG_INVALID_PAGE) + if (ioc_status != MPI2_IOCSTATUS_SUCCESS) break; phys_disk_num = pd_pg0.PhysDiskNum; handle = le16_to_cpu(pd_pg0.DevHandle); @@ -6597,7 +6597,7 @@ _scsih_search_responding_expanders(struct MPT3SAS_ADAPTER *ioc) ioc_status = le16_to_cpu(mpi_reply.IOCStatus) & MPI2_IOCSTATUS_MASK; - if (ioc_status == MPI2_IOCSTATUS_CONFIG_INVALID_PAGE) + if (ioc_status != MPI2_IOCSTATUS_SUCCESS) break; handle = le16_to_cpu(expander_pg0.DevHandle); @@ -6742,8 +6742,6 @@ _scsih_scan_for_devices_after_reset(struct MPT3SAS_ADAPTER *ioc) MPI2_SAS_EXPAND_PGAD_FORM_GET_NEXT_HNDL, handle))) { ioc_status = le16_to_cpu(mpi_reply.IOCStatus) & MPI2_IOCSTATUS_MASK; - if (ioc_status == MPI2_IOCSTATUS_CONFIG_INVALID_PAGE) - break; if (ioc_status != MPI2_IOCSTATUS_SUCCESS) { pr_info(MPT3SAS_FMT "\tbreak from expander scan: " \ "ioc_status(0x%04x), loginfo(0x%08x)\n", @@ -6787,8 +6785,6 @@ _scsih_scan_for_devices_after_reset(struct MPT3SAS_ADAPTER *ioc) phys_disk_num))) { ioc_status = le16_to_cpu(mpi_reply.IOCStatus) & MPI2_IOCSTATUS_MASK; - if (ioc_status == MPI2_IOCSTATUS_CONFIG_INVALID_PAGE) - break; if (ioc_status != MPI2_IOCSTATUS_SUCCESS) { pr_info(MPT3SAS_FMT "\tbreak from phys disk scan: "\ "ioc_status(0x%04x), loginfo(0x%08x)\n", @@ -6854,8 +6850,6 @@ _scsih_scan_for_devices_after_reset(struct MPT3SAS_ADAPTER *ioc) &volume_pg1, MPI2_RAID_VOLUME_PGAD_FORM_GET_NEXT_HANDLE, handle))) { ioc_status = le16_to_cpu(mpi_reply.IOCStatus) & MPI2_IOCSTATUS_MASK; - if (ioc_status == MPI2_IOCSTATUS_CONFIG_INVALID_PAGE) - break; if (ioc_status != MPI2_IOCSTATUS_SUCCESS) { pr_info(MPT3SAS_FMT "\tbreak from volume scan: " \ "ioc_status(0x%04x), loginfo(0x%08x)\n", @@ -6914,8 +6908,6 @@ _scsih_scan_for_devices_after_reset(struct MPT3SAS_ADAPTER *ioc) handle))) { ioc_status = le16_to_cpu(mpi_reply.IOCStatus) & MPI2_IOCSTATUS_MASK; - if (ioc_status == MPI2_IOCSTATUS_CONFIG_INVALID_PAGE) - break; if (ioc_status != MPI2_IOCSTATUS_SUCCESS) { pr_info(MPT3SAS_FMT "\tbreak from end device scan:"\ " ioc_status(0x%04x), loginfo(0x%08x)\n", From e90c1dc2c6298f7d369804aa7873d1c4c434f9d3 Mon Sep 17 00:00:00 2001 From: Sreekanth Reddy Date: Sat, 29 Jun 2013 03:52:03 +0530 Subject: [PATCH 0251/1368] SCSI: mpt3sas: fix for kernel panic when driver loads with HBA conected to non LUN 0 configured expander commit b65cfedf4560af65305bd7b3b9f26c02c6fb3660 upstream. With some enclosures when LUN 0 is not created but LUN 1 or LUN X is created then SCSI scan procedure calls target_alloc, slave_alloc call back functions for LUN 0 and slave_destory() for same LUN 0. In these kind of cases within slave_destroy, pointer to scsi_target in _sas_device structure is set to NULL, following which when slave_alloc for LUN 1 is called then starget would not be set properly for this LUN. So, scsi_target pointer pointing to NULL value would lead to a crash later in the discovery procedure. To solve this issue set the sas_device's scsi_target pointer to scsi_device's scsi_target if it is NULL earlier in slave_alloc callback function. Signed-off-by: Sreekanth Reddy Signed-off-by: James Bottomley Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/mpt3sas/mpt3sas_scsih.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/drivers/scsi/mpt3sas/mpt3sas_scsih.c b/drivers/scsi/mpt3sas/mpt3sas_scsih.c index 1fb5c6bcb786..f8c4b8564251 100644 --- a/drivers/scsi/mpt3sas/mpt3sas_scsih.c +++ b/drivers/scsi/mpt3sas/mpt3sas_scsih.c @@ -1273,6 +1273,7 @@ _scsih_slave_alloc(struct scsi_device *sdev) struct MPT3SAS_DEVICE *sas_device_priv_data; struct scsi_target *starget; struct _raid_device *raid_device; + struct _sas_device *sas_device; unsigned long flags; sas_device_priv_data = kzalloc(sizeof(struct scsi_device), GFP_KERNEL); @@ -1301,6 +1302,19 @@ _scsih_slave_alloc(struct scsi_device *sdev) spin_unlock_irqrestore(&ioc->raid_device_lock, flags); } + if (!(sas_target_priv_data->flags & MPT_TARGET_FLAGS_VOLUME)) { + spin_lock_irqsave(&ioc->sas_device_lock, flags); + sas_device = mpt3sas_scsih_sas_device_find_by_sas_address(ioc, + sas_target_priv_data->sas_address); + if (sas_device && (sas_device->starget == NULL)) { + sdev_printk(KERN_INFO, sdev, + "%s : sas_device->starget set to starget @ %d\n", + __func__, __LINE__); + sas_device->starget = starget; + } + spin_unlock_irqrestore(&ioc->sas_device_lock, flags); + } + return 0; } From 4ac697c53bf856ef1c8d32f8fd903987e0494311 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B8rn=20Mork?= Date: Wed, 21 Nov 2012 09:54:48 +0100 Subject: [PATCH 0252/1368] SCSI: megaraid_sas: fix memory leak if SGL has zero length entries MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 7a6a731bd00ca90d0e250867c3b9c05b5ff0fa49 upstream. commit 98cb7e44 ([SCSI] megaraid_sas: Sanity check user supplied length before passing it to dma_alloc_coherent()) introduced a memory leak. Memory allocated for entries following zero length SGL entries will not be freed. Reference: http://bugs.debian.org/688198 Signed-off-by: Bjørn Mork Acked-by: Adam Radford Signed-off-by: James Bottomley Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/megaraid/megaraid_sas_base.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/scsi/megaraid/megaraid_sas_base.c b/drivers/scsi/megaraid/megaraid_sas_base.c index 3a9ddae86f1f..89178b833b52 100644 --- a/drivers/scsi/megaraid/megaraid_sas_base.c +++ b/drivers/scsi/megaraid/megaraid_sas_base.c @@ -4852,10 +4852,12 @@ megasas_mgmt_fw_ioctl(struct megasas_instance *instance, sense, sense_handle); } - for (i = 0; i < ioc->sge_count && kbuff_arr[i]; i++) { - dma_free_coherent(&instance->pdev->dev, - kern_sge32[i].length, - kbuff_arr[i], kern_sge32[i].phys_addr); + for (i = 0; i < ioc->sge_count; i++) { + if (kbuff_arr[i]) + dma_free_coherent(&instance->pdev->dev, + kern_sge32[i].length, + kbuff_arr[i], + kern_sge32[i].phys_addr); } megasas_return_cmd(instance, cmd); From 3c19c4f9b4eeabe75eddbec1a0e8599c940e8ec8 Mon Sep 17 00:00:00 2001 From: Markos Chandras Date: Tue, 11 Jun 2013 08:49:50 +0000 Subject: [PATCH 0253/1368] lib/Kconfig.debug: Restrict FRAME_POINTER for MIPS commit 25c87eae1725ed77a8b44d782a86abdc279b4ede upstream. FAULT_INJECTION_STACKTRACE_FILTER selects FRAME_POINTER but that symbol is not available for MIPS. Fixes the following problem on a randconfig: warning: (LOCKDEP && FAULT_INJECTION_STACKTRACE_FILTER && LATENCYTOP && KMEMCHECK) selects FRAME_POINTER which has unmet direct dependencies (DEBUG_KERNEL && (CRIS || M68K || FRV || UML || AVR32 || SUPERH || BLACKFIN || MN10300 || METAG) || ARCH_WANT_FRAME_POINTERS) Signed-off-by: Markos Chandras Acked-by: Steven J. Hill Cc: linux-mips@linux-mips.org Patchwork: https://patchwork.linux-mips.org/patch/5441/ Signed-off-by: Ralf Baechle Signed-off-by: Greg Kroah-Hartman --- lib/Kconfig.debug | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 566cf2bc08ea..74fdc5cf4adc 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -1272,7 +1272,7 @@ config FAULT_INJECTION_STACKTRACE_FILTER depends on FAULT_INJECTION_DEBUG_FS && STACKTRACE_SUPPORT depends on !X86_64 select STACKTRACE - select FRAME_POINTER if !PPC && !S390 && !MICROBLAZE && !ARM_UNWIND + select FRAME_POINTER if !MIPS && !PPC && !S390 && !MICROBLAZE && !ARM_UNWIND help Provide stacktrace filter for fault-injection capabilities From d547d18d1920026dd08d9c19e8da403cbea62319 Mon Sep 17 00:00:00 2001 From: Enrico Mioso Date: Sat, 29 Jun 2013 15:33:35 +0200 Subject: [PATCH 0254/1368] usb: serial: option: blacklist ONDA MT689DC QMI interface commit 3d1a69e726406ab662ab88fa30a3a05ed404334d upstream. Prevent the option driver from binding itself to the QMI/WWAN interface, making it unusable by the proper driver. Signed-off-by: enrico Mioso Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/option.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index 5dd857de05b0..1fc01efca909 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -817,7 +817,8 @@ static const struct usb_device_id option_ids[] = { { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0017, 0xff, 0xff, 0xff), .driver_info = (kernel_ulong_t)&net_intf3_blacklist }, { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0018, 0xff, 0xff, 0xff) }, - { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0019, 0xff, 0xff, 0xff) }, + { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0019, 0xff, 0xff, 0xff), + .driver_info = (kernel_ulong_t)&net_intf3_blacklist }, { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0020, 0xff, 0xff, 0xff) }, { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0021, 0xff, 0xff, 0xff), .driver_info = (kernel_ulong_t)&net_intf4_blacklist }, From 6fd55cb457cbcc24d8283ae87c9345c72645c93f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B8rn=20Mork?= Date: Fri, 28 Jun 2013 17:15:25 +0200 Subject: [PATCH 0255/1368] usb: option: add TP-LINK MA260 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 94190301ffa059c2d127b3a67ec5d161d5c62681 upstream. Signed-off-by: Bjørn Mork Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/option.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index 1fc01efca909..62c87efe703b 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -1333,6 +1333,8 @@ static const struct usb_device_id option_ids[] = { { USB_DEVICE(PETATEL_VENDOR_ID, PETATEL_PRODUCT_NP10T) }, { USB_DEVICE(TPLINK_VENDOR_ID, TPLINK_PRODUCT_MA180), .driver_info = (kernel_ulong_t)&net_intf4_blacklist }, + { USB_DEVICE(TPLINK_VENDOR_ID, 0x9000), /* TP-Link MA260 */ + .driver_info = (kernel_ulong_t)&net_intf4_blacklist }, { USB_DEVICE(CHANGHONG_VENDOR_ID, CHANGHONG_PRODUCT_CH690) }, { USB_DEVICE_AND_INTERFACE_INFO(0x2001, 0x7d01, 0xff, 0x02, 0x01) }, /* D-Link DWM-156 (variant) */ { USB_DEVICE_AND_INTERFACE_INFO(0x2001, 0x7d01, 0xff, 0x00, 0x00) }, /* D-Link DWM-156 (variant) */ From 177f82a9d23d9ba1a7e8179cca296fd17ebfd9d0 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Wed, 10 Jul 2013 12:25:02 -0500 Subject: [PATCH 0256/1368] usb: serial: option: add Olivetti Olicard 200 commit 4cf76df06ecc852633ed927d91e01c83c33bc331 upstream. Speaks AT on interfaces 5 (command & PPP) and 3 (secondary), other interface protocols are unknown. Signed-off-by: Dan Williams Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/option.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index 62c87efe703b..d68f89eda805 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -341,6 +341,7 @@ static void option_instat_callback(struct urb *urb); #define OLIVETTI_VENDOR_ID 0x0b3c #define OLIVETTI_PRODUCT_OLICARD100 0xc000 #define OLIVETTI_PRODUCT_OLICARD145 0xc003 +#define OLIVETTI_PRODUCT_OLICARD200 0xc005 /* Celot products */ #define CELOT_VENDOR_ID 0x211f @@ -1257,6 +1258,7 @@ static const struct usb_device_id option_ids[] = { { USB_DEVICE(OLIVETTI_VENDOR_ID, OLIVETTI_PRODUCT_OLICARD100) }, { USB_DEVICE(OLIVETTI_VENDOR_ID, OLIVETTI_PRODUCT_OLICARD145) }, + { USB_DEVICE(OLIVETTI_VENDOR_ID, OLIVETTI_PRODUCT_OLICARD200) }, { USB_DEVICE(CELOT_VENDOR_ID, CELOT_PRODUCT_CT680M) }, /* CT-650 CDMA 450 1xEVDO modem */ { USB_DEVICE(ONDA_VENDOR_ID, ONDA_MT825UP) }, /* ONDA MT825UP modem */ { USB_DEVICE_AND_INTERFACE_INFO(SAMSUNG_VENDOR_ID, SAMSUNG_PRODUCT_GT_B3730, USB_CLASS_CDC_DATA, 0x00, 0x00) }, /* Samsung GT-B3730 LTE USB modem.*/ From 5ac2d863c312ec42bb6778988577ada67f09ba85 Mon Sep 17 00:00:00 2001 From: Enrico Mioso Date: Sat, 13 Jul 2013 18:54:14 +0200 Subject: [PATCH 0257/1368] usb: serial: option.c: remove ONDA MT825UP product ID fromdriver commit 878c69aae986ae97084458c0183a8c0a059865b1 upstream. Some (very few) early devices like mine, where not exposting a proper CDC descriptor. This was fixed with an immediate firmware update from the vendor, and pre-installed on newer devices. So actual devices can be driven by cdc_acm.c + cdc_ether.c. Signed-off-by: Enrico Mioso Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/option.c | 7 ------- 1 file changed, 7 deletions(-) diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index d68f89eda805..b2eba3edf34d 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -347,12 +347,6 @@ static void option_instat_callback(struct urb *urb); #define CELOT_VENDOR_ID 0x211f #define CELOT_PRODUCT_CT680M 0x6801 -/* ONDA Communication vendor id */ -#define ONDA_VENDOR_ID 0x1ee8 - -/* ONDA MT825UP HSDPA 14.2 modem */ -#define ONDA_MT825UP 0x000b - /* Samsung products */ #define SAMSUNG_VENDOR_ID 0x04e8 #define SAMSUNG_PRODUCT_GT_B3730 0x6889 @@ -1260,7 +1254,6 @@ static const struct usb_device_id option_ids[] = { { USB_DEVICE(OLIVETTI_VENDOR_ID, OLIVETTI_PRODUCT_OLICARD145) }, { USB_DEVICE(OLIVETTI_VENDOR_ID, OLIVETTI_PRODUCT_OLICARD200) }, { USB_DEVICE(CELOT_VENDOR_ID, CELOT_PRODUCT_CT680M) }, /* CT-650 CDMA 450 1xEVDO modem */ - { USB_DEVICE(ONDA_VENDOR_ID, ONDA_MT825UP) }, /* ONDA MT825UP modem */ { USB_DEVICE_AND_INTERFACE_INFO(SAMSUNG_VENDOR_ID, SAMSUNG_PRODUCT_GT_B3730, USB_CLASS_CDC_DATA, 0x00, 0x00) }, /* Samsung GT-B3730 LTE USB modem.*/ { USB_DEVICE(YUGA_VENDOR_ID, YUGA_PRODUCT_CEM600) }, { USB_DEVICE(YUGA_VENDOR_ID, YUGA_PRODUCT_CEM610) }, From 490217ce2c9ab2318d9a4613547943d98706034b Mon Sep 17 00:00:00 2001 From: Daniil Bolsun Date: Fri, 19 Jul 2013 10:21:23 +0300 Subject: [PATCH 0258/1368] USB: option: append Petatel NP10T device to GSM modems list commit c38e83b6cc2adf80e3f091fd92cfbeacc9748347 upstream. This patch was tested on 3.10.1 kernel. Same models of Petatel NP10T modems have different device IDs. Unfortunately they have no additional revision information on a board which may treat them as different devices. Currently I've seen only two NP10T devices with various IDs. Possibly Petatel NP10T list will be appended upon devices with new IDs will appear. Signed-off-by: Daniil Bolsun Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/option.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index b2eba3edf34d..884c1ea1d3a4 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -439,7 +439,8 @@ static void option_instat_callback(struct urb *urb); /* Hyundai Petatel Inc. products */ #define PETATEL_VENDOR_ID 0x1ff4 -#define PETATEL_PRODUCT_NP10T 0x600e +#define PETATEL_PRODUCT_NP10T_600A 0x600a +#define PETATEL_PRODUCT_NP10T_600E 0x600e /* TP-LINK Incorporated products */ #define TPLINK_VENDOR_ID 0x2357 @@ -1325,7 +1326,8 @@ static const struct usb_device_id option_ids[] = { { USB_DEVICE_AND_INTERFACE_INFO(MEDIATEK_VENDOR_ID, MEDIATEK_PRODUCT_DC_4COM2, 0xff, 0x02, 0x01) }, { USB_DEVICE_AND_INTERFACE_INFO(MEDIATEK_VENDOR_ID, MEDIATEK_PRODUCT_DC_4COM2, 0xff, 0x00, 0x00) }, { USB_DEVICE(CELLIENT_VENDOR_ID, CELLIENT_PRODUCT_MEN200) }, - { USB_DEVICE(PETATEL_VENDOR_ID, PETATEL_PRODUCT_NP10T) }, + { USB_DEVICE(PETATEL_VENDOR_ID, PETATEL_PRODUCT_NP10T_600A) }, + { USB_DEVICE(PETATEL_VENDOR_ID, PETATEL_PRODUCT_NP10T_600E) }, { USB_DEVICE(TPLINK_VENDOR_ID, TPLINK_PRODUCT_MA180), .driver_info = (kernel_ulong_t)&net_intf4_blacklist }, { USB_DEVICE(TPLINK_VENDOR_ID, 0x9000), /* TP-Link MA260 */ From 9cca0118ba973e04573cb3221c246422e917ebe6 Mon Sep 17 00:00:00 2001 From: "Alexandr \\\\\\\"Sky\\\\\\\" Ivanov" Date: Tue, 23 Jul 2013 17:46:40 +0400 Subject: [PATCH 0259/1368] USB: option: add D-Link DWM-152/C1 and DWM-156/C1 commit ca24763588844b14f019ffc45c7df6d9e8f932c5 upstream. Adding support for D-Link DWM-152/C1 and DWM-156/C1 devices. DWM-152/C1: T: Bus=01 Lev=01 Prnt=01 Port=00 Cnt=01 Dev#= 6 Spd=480 MxCh= 0 D: Ver= 2.00 Cls=00(>ifc ) Sub=00 Prot=00 MxPS=64 #Cfgs= 1 P: Vendor=07d1 ProdID=3e01 Rev= 0.00 S: Product=USB Configuration S: SerialNumber=1234567890ABCDEF C:* #Ifs= 5 Cfg#= 1 Atr=e0 MxPwr=500mA I:* If#= 0 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=ff Driver=option E: Ad=81(I) Atr=03(Int.) MxPS= 64 Ivl=2ms E: Ad=82(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=01(O) Atr=02(Bulk) MxPS= 512 Ivl=4ms I:* If#= 1 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=ff Driver=option E: Ad=83(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=02(O) Atr=02(Bulk) MxPS= 512 Ivl=4ms I:* If#= 2 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=ff Driver=option E: Ad=84(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=03(O) Atr=02(Bulk) MxPS= 512 Ivl=4ms I:* If#= 3 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=ff Driver=option E: Ad=85(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=04(O) Atr=02(Bulk) MxPS= 512 Ivl=4ms I:* If#= 4 Alt= 0 #EPs= 2 Cls=08(stor.) Sub=06 Prot=50 Driver=usb-storage E: Ad=05(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=86(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms DWM-156/C1: T: Bus=01 Lev=01 Prnt=01 Port=00 Cnt=01 Dev#= 8 Spd=480 MxCh= 0 D: Ver= 2.00 Cls=00(>ifc ) Sub=00 Prot=00 MxPS=64 #Cfgs= 1 P: Vendor=07d1 ProdID=3e02 Rev= 0.00 S: Product=DataCard Device S: SerialNumber=1234567890ABCDEF C:* #Ifs= 5 Cfg#= 1 Atr=e0 MxPwr=500mA I:* If#= 0 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=ff Driver=option E: Ad=81(I) Atr=03(Int.) MxPS= 64 Ivl=2ms E: Ad=82(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=01(O) Atr=02(Bulk) MxPS= 512 Ivl=4ms I:* If#= 1 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=ff Driver=option E: Ad=83(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=02(O) Atr=02(Bulk) MxPS= 512 Ivl=4ms I:* If#= 2 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=ff Driver=option E: Ad=84(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=03(O) Atr=02(Bulk) MxPS= 512 Ivl=4ms I:* If#= 3 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=ff Driver=option E: Ad=85(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=04(O) Atr=02(Bulk) MxPS= 512 Ivl=4ms I:* If#= 4 Alt= 0 #EPs= 2 Cls=08(stor.) Sub=06 Prot=50 Driver=usb-storage E: Ad=05(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=86(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms Signed-off-by: Alexandr Ivanov Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/option.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index 884c1ea1d3a4..254e2d0498e9 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -1339,6 +1339,8 @@ static const struct usb_device_id option_ids[] = { { USB_DEVICE_AND_INTERFACE_INFO(0x2001, 0x7d02, 0xff, 0x00, 0x00) }, { USB_DEVICE_AND_INTERFACE_INFO(0x2001, 0x7d03, 0xff, 0x02, 0x01) }, { USB_DEVICE_AND_INTERFACE_INFO(0x2001, 0x7d03, 0xff, 0x00, 0x00) }, + { USB_DEVICE_AND_INTERFACE_INFO(0x07d1, 0x3e01, 0xff, 0xff, 0xff) }, /* D-Link DWM-152/C1 */ + { USB_DEVICE_AND_INTERFACE_INFO(0x07d1, 0x3e02, 0xff, 0xff, 0xff) }, /* D-Link DWM-156/C1 */ { } /* Terminating entry */ }; MODULE_DEVICE_TABLE(usb, option_ids); From bb2177cd04ac81f2922cdd614f79b586556f92ad Mon Sep 17 00:00:00 2001 From: Enrico Mioso Date: Thu, 25 Jul 2013 02:01:39 +0200 Subject: [PATCH 0260/1368] usb: serial: option: Add ONYX 3G device support commit 63b5df963f52ccbab6fabedf05b7ac6b465789a4 upstream. This patch adds support for the ONYX 3G device (version 1) from ALFA NETWORK. Signed-off-by: Enrico Mioso Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/option.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index 254e2d0498e9..1cf6f125f5f0 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -778,6 +778,7 @@ static const struct usb_device_id option_ids[] = { { USB_DEVICE(KYOCERA_VENDOR_ID, KYOCERA_PRODUCT_KPC650) }, { USB_DEVICE(KYOCERA_VENDOR_ID, KYOCERA_PRODUCT_KPC680) }, { USB_DEVICE(QUALCOMM_VENDOR_ID, 0x6613)}, /* Onda H600/ZTE MF330 */ + { USB_DEVICE(QUALCOMM_VENDOR_ID, 0x0023)}, /* ONYX 3G device */ { USB_DEVICE(QUALCOMM_VENDOR_ID, 0x9000)}, /* SIMCom SIM5218 */ { USB_DEVICE(CMOTECH_VENDOR_ID, CMOTECH_PRODUCT_6280) }, /* BP3-USB & BP3-EXT HSDPA */ { USB_DEVICE(CMOTECH_VENDOR_ID, CMOTECH_PRODUCT_6008) }, From ee2f769fb4a5a317b1558d0bbcccb89b8c2697a6 Mon Sep 17 00:00:00 2001 From: Sylwester Nawrocki Date: Wed, 24 Jul 2013 13:23:51 +0900 Subject: [PATCH 0261/1368] ARM: S3C24XX: Add missing clkdev entries for s3c2440 UART commit d817468c4b2892b9468e2a0c92116e38a3a61370 upstream. This patch restores serial port operation which has been broken since commit 60e93575476f ("serial: samsung: enable clock before clearing pending interrupts during init") That commit only uncovered the real issue which was missing clkdev entries for the "uart" clocks on S3C2440. It went unnoticed so far because return value of clk API calls were not being checked at all in the samsung serial port driver. This patch should be backported to at least 3.10 stable kernel, since the serial port has not been working on s3c2440 since 3.10-rc5. Signed-off-by: Sylwester Nawrocki Cc: Chander Kashyap [on S3C2440 SoC based Mini2440 board] Tested-by: Sylwester Nawrocki Reviewed-by: Tomasz Figa Tested-by: Juergen Beisert Signed-off-by: Kukjin Kim Signed-off-by: Greg Kroah-Hartman --- arch/arm/mach-s3c24xx/clock-s3c2410.c | 161 +++++++++++++-------- arch/arm/mach-s3c24xx/clock-s3c2440.c | 3 + arch/arm/plat-samsung/include/plat/clock.h | 5 + 3 files changed, 106 insertions(+), 63 deletions(-) diff --git a/arch/arm/mach-s3c24xx/clock-s3c2410.c b/arch/arm/mach-s3c24xx/clock-s3c2410.c index 34fffdf6fc1d..564553694b54 100644 --- a/arch/arm/mach-s3c24xx/clock-s3c2410.c +++ b/arch/arm/mach-s3c24xx/clock-s3c2410.c @@ -119,66 +119,101 @@ static struct clk init_clocks_off[] = { } }; -static struct clk init_clocks[] = { - { - .name = "lcd", - .parent = &clk_h, - .enable = s3c2410_clkcon_enable, - .ctrlbit = S3C2410_CLKCON_LCDC, - }, { - .name = "gpio", - .parent = &clk_p, - .enable = s3c2410_clkcon_enable, - .ctrlbit = S3C2410_CLKCON_GPIO, - }, { - .name = "usb-host", - .parent = &clk_h, - .enable = s3c2410_clkcon_enable, - .ctrlbit = S3C2410_CLKCON_USBH, - }, { - .name = "usb-device", - .parent = &clk_h, - .enable = s3c2410_clkcon_enable, - .ctrlbit = S3C2410_CLKCON_USBD, - }, { - .name = "timers", - .parent = &clk_p, - .enable = s3c2410_clkcon_enable, - .ctrlbit = S3C2410_CLKCON_PWMT, - }, { - .name = "uart", - .devname = "s3c2410-uart.0", - .parent = &clk_p, - .enable = s3c2410_clkcon_enable, - .ctrlbit = S3C2410_CLKCON_UART0, - }, { - .name = "uart", - .devname = "s3c2410-uart.1", - .parent = &clk_p, - .enable = s3c2410_clkcon_enable, - .ctrlbit = S3C2410_CLKCON_UART1, - }, { - .name = "uart", - .devname = "s3c2410-uart.2", - .parent = &clk_p, - .enable = s3c2410_clkcon_enable, - .ctrlbit = S3C2410_CLKCON_UART2, - }, { - .name = "rtc", - .parent = &clk_p, - .enable = s3c2410_clkcon_enable, - .ctrlbit = S3C2410_CLKCON_RTC, - }, { - .name = "watchdog", - .parent = &clk_p, - .ctrlbit = 0, - }, { - .name = "usb-bus-host", - .parent = &clk_usb_bus, - }, { - .name = "usb-bus-gadget", - .parent = &clk_usb_bus, - }, +static struct clk clk_lcd = { + .name = "lcd", + .parent = &clk_h, + .enable = s3c2410_clkcon_enable, + .ctrlbit = S3C2410_CLKCON_LCDC, +}; + +static struct clk clk_gpio = { + .name = "gpio", + .parent = &clk_p, + .enable = s3c2410_clkcon_enable, + .ctrlbit = S3C2410_CLKCON_GPIO, +}; + +static struct clk clk_usb_host = { + .name = "usb-host", + .parent = &clk_h, + .enable = s3c2410_clkcon_enable, + .ctrlbit = S3C2410_CLKCON_USBH, +}; + +static struct clk clk_usb_device = { + .name = "usb-device", + .parent = &clk_h, + .enable = s3c2410_clkcon_enable, + .ctrlbit = S3C2410_CLKCON_USBD, +}; + +static struct clk clk_timers = { + .name = "timers", + .parent = &clk_p, + .enable = s3c2410_clkcon_enable, + .ctrlbit = S3C2410_CLKCON_PWMT, +}; + +struct clk s3c24xx_clk_uart0 = { + .name = "uart", + .devname = "s3c2410-uart.0", + .parent = &clk_p, + .enable = s3c2410_clkcon_enable, + .ctrlbit = S3C2410_CLKCON_UART0, +}; + +struct clk s3c24xx_clk_uart1 = { + .name = "uart", + .devname = "s3c2410-uart.1", + .parent = &clk_p, + .enable = s3c2410_clkcon_enable, + .ctrlbit = S3C2410_CLKCON_UART1, +}; + +struct clk s3c24xx_clk_uart2 = { + .name = "uart", + .devname = "s3c2410-uart.2", + .parent = &clk_p, + .enable = s3c2410_clkcon_enable, + .ctrlbit = S3C2410_CLKCON_UART2, +}; + +static struct clk clk_rtc = { + .name = "rtc", + .parent = &clk_p, + .enable = s3c2410_clkcon_enable, + .ctrlbit = S3C2410_CLKCON_RTC, +}; + +static struct clk clk_watchdog = { + .name = "watchdog", + .parent = &clk_p, + .ctrlbit = 0, +}; + +static struct clk clk_usb_bus_host = { + .name = "usb-bus-host", + .parent = &clk_usb_bus, +}; + +static struct clk clk_usb_bus_gadget = { + .name = "usb-bus-gadget", + .parent = &clk_usb_bus, +}; + +static struct clk *init_clocks[] = { + &clk_lcd, + &clk_gpio, + &clk_usb_host, + &clk_usb_device, + &clk_timers, + &s3c24xx_clk_uart0, + &s3c24xx_clk_uart1, + &s3c24xx_clk_uart2, + &clk_rtc, + &clk_watchdog, + &clk_usb_bus_host, + &clk_usb_bus_gadget, }; /* s3c2410_baseclk_add() @@ -195,7 +230,6 @@ int __init s3c2410_baseclk_add(void) { unsigned long clkslow = __raw_readl(S3C2410_CLKSLOW); unsigned long clkcon = __raw_readl(S3C2410_CLKCON); - struct clk *clkp; struct clk *xtal; int ret; int ptr; @@ -207,8 +241,9 @@ int __init s3c2410_baseclk_add(void) /* register clocks from clock array */ - clkp = init_clocks; - for (ptr = 0; ptr < ARRAY_SIZE(init_clocks); ptr++, clkp++) { + for (ptr = 0; ptr < ARRAY_SIZE(init_clocks); ptr++) { + struct clk *clkp = init_clocks[ptr]; + /* ensure that we note the clock state */ clkp->usage = clkcon & clkp->ctrlbit ? 1 : 0; diff --git a/arch/arm/mach-s3c24xx/clock-s3c2440.c b/arch/arm/mach-s3c24xx/clock-s3c2440.c index 1069b5680826..aaf006d1d6dc 100644 --- a/arch/arm/mach-s3c24xx/clock-s3c2440.c +++ b/arch/arm/mach-s3c24xx/clock-s3c2440.c @@ -166,6 +166,9 @@ static struct clk_lookup s3c2440_clk_lookup[] = { CLKDEV_INIT(NULL, "clk_uart_baud1", &s3c24xx_uclk), CLKDEV_INIT(NULL, "clk_uart_baud2", &clk_p), CLKDEV_INIT(NULL, "clk_uart_baud3", &s3c2440_clk_fclk_n), + CLKDEV_INIT("s3c2440-uart.0", "uart", &s3c24xx_clk_uart0), + CLKDEV_INIT("s3c2440-uart.1", "uart", &s3c24xx_clk_uart1), + CLKDEV_INIT("s3c2440-uart.2", "uart", &s3c24xx_clk_uart2), CLKDEV_INIT("s3c2440-camif", "camera", &s3c2440_clk_cam_upll), }; diff --git a/arch/arm/plat-samsung/include/plat/clock.h b/arch/arm/plat-samsung/include/plat/clock.h index a62753dc15ba..df45d6edc98d 100644 --- a/arch/arm/plat-samsung/include/plat/clock.h +++ b/arch/arm/plat-samsung/include/plat/clock.h @@ -83,6 +83,11 @@ extern struct clk clk_ext; extern struct clksrc_clk clk_epllref; extern struct clksrc_clk clk_esysclk; +/* S3C24XX UART clocks */ +extern struct clk s3c24xx_clk_uart0; +extern struct clk s3c24xx_clk_uart1; +extern struct clk s3c24xx_clk_uart2; + /* S3C64XX specific clocks */ extern struct clk clk_h2; extern struct clk clk_27m; From 1b1eeeb02ec556c6a31a8383f86f1a7bf5419111 Mon Sep 17 00:00:00 2001 From: Mike Frysinger Date: Thu, 27 Jun 2013 22:42:36 -0400 Subject: [PATCH 0262/1368] ARM: footbridge: fix overlapping PCI mappings commit 6287e7319870ec949fb809e4eb4154c2b05b221f upstream. Commit 8ef6e6201b26cb9fde79c1baa08145af6aca2815 (ARM: footbridge: use fixed PCI i/o mapping) broke booting on my netwinder. Before that, everything boots fine. Since then, it crashes on boot. With earlyprintk, I see it BUG-ing like so: kernel BUG at lib/ioremap.c:27! Internal error: Oops - BUG: 0 [#1] ARM ... [] (ioremap_page_range+0x128/0x154) from [] (dc21285_setup+0xd0/0x114) [] (dc21285_setup+0xd0/0x114) from [] (pci_common_init+0xa0/0x298) [] (pci_common_init+0xa0/0x298) from [] (netwinder_pci_init+0xc/0x18) [] (netwinder_pci_init+0xc/0x18) from [] (do_one_initcall+0xb4/0x180) ... Russell points out it's because of overlapping PCI mappings that was added with the aforementioned commit. Rob thought the code would re-use the static mapping, but that turns out to not be the case and instead hits the BUG further down. After deleting this hunk as suggested by Russel, the system boots up fine again and all my PCI devices work (IDE, ethernet, the DC21285). Signed-off-by: Mike Frysinger Acked-by: Rob Herring Signed-off-by: Olof Johansson Signed-off-by: Greg Kroah-Hartman --- arch/arm/mach-footbridge/dc21285.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/arch/arm/mach-footbridge/dc21285.c b/arch/arm/mach-footbridge/dc21285.c index a7cd2cf5e08d..3490a24f969e 100644 --- a/arch/arm/mach-footbridge/dc21285.c +++ b/arch/arm/mach-footbridge/dc21285.c @@ -276,8 +276,6 @@ int __init dc21285_setup(int nr, struct pci_sys_data *sys) sys->mem_offset = DC21285_PCI_MEM; - pci_ioremap_io(0, DC21285_PCI_IO); - pci_add_resource_offset(&sys->resources, &res[0], sys->mem_offset); pci_add_resource_offset(&sys->resources, &res[1], sys->mem_offset); From 29ab402957ba9f329b901c69cb4179fafbfd0cf4 Mon Sep 17 00:00:00 2001 From: Luiz Angelo Daros de Luca Date: Mon, 1 Jul 2013 23:56:25 -0300 Subject: [PATCH 0263/1368] usb: serial: cp210x: Add USB ID for Netgear Switches embedded serial adapter commit 90625070c4253377025878c4e82feed8b35c7116 upstream. This adds NetGear Managed Switch M4100 series, M5300 series, M7100 series USB ID (0846:0110) to the cp210x driver. Without this, the serial adapter is not recognized in Linux. Description was obtained from an Netgear Eng. Signed-off-by: Luiz Angelo Daros de Luca Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/cp210x.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/usb/serial/cp210x.c b/drivers/usb/serial/cp210x.c index 2c659553c07c..de63ed19a806 100644 --- a/drivers/usb/serial/cp210x.c +++ b/drivers/usb/serial/cp210x.c @@ -53,6 +53,7 @@ static const struct usb_device_id id_table[] = { { USB_DEVICE(0x0489, 0xE000) }, /* Pirelli Broadband S.p.A, DP-L10 SIP/GSM Mobile */ { USB_DEVICE(0x0489, 0xE003) }, /* Pirelli Broadband S.p.A, DP-L10 SIP/GSM Mobile */ { USB_DEVICE(0x0745, 0x1000) }, /* CipherLab USB CCD Barcode Scanner 1000 */ + { USB_DEVICE(0x0846, 0x1100) }, /* NetGear Managed Switch M4100 series, M5300 series, M7100 series */ { USB_DEVICE(0x08e6, 0x5501) }, /* Gemalto Prox-PU/CU contactless smartcard reader */ { USB_DEVICE(0x08FD, 0x000A) }, /* Digianswer A/S , ZigBee/802.15.4 MAC Device */ { USB_DEVICE(0x0BED, 0x1100) }, /* MEI (TM) Cashflow-SC Bill/Voucher Acceptor */ From 35976473f0c19ec3e3b88b0d2ba312def35536a7 Mon Sep 17 00:00:00 2001 From: Sami Rahman Date: Mon, 8 Jul 2013 14:28:55 -0400 Subject: [PATCH 0264/1368] USB: cp210x: add MMB and PI ZigBee USB Device Support commit 7681156982026ebf7eafd7301eb0374d7648d068 upstream. Added support for MMB Networks and Planet Innovation Ingeni ZigBee USB devices using customized Silicon Labs' CP210x.c USB to UART bridge drivers with PIDs: 88A4, 88A5. Signed-off-by: Sami Rahman Tested-by: Sami Rahman Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/cp210x.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/usb/serial/cp210x.c b/drivers/usb/serial/cp210x.c index de63ed19a806..dcaa724b4be9 100644 --- a/drivers/usb/serial/cp210x.c +++ b/drivers/usb/serial/cp210x.c @@ -119,6 +119,8 @@ static const struct usb_device_id id_table[] = { { USB_DEVICE(0x10C4, 0x85F8) }, /* Virtenio Preon32 */ { USB_DEVICE(0x10C4, 0x8664) }, /* AC-Services CAN-IF */ { USB_DEVICE(0x10C4, 0x8665) }, /* AC-Services OBD-IF */ + { USB_DEVICE(0x10C4, 0x88A4) }, /* MMB Networks ZigBee USB Device */ + { USB_DEVICE(0x10C4, 0x88A5) }, /* Planet Innovation Ingeni ZigBee USB Device */ { USB_DEVICE(0x10C4, 0xEA60) }, /* Silicon Labs factory default */ { USB_DEVICE(0x10C4, 0xEA61) }, /* Silicon Labs factory default */ { USB_DEVICE(0x10C4, 0xEA70) }, /* Silicon Labs factory default */ From 95405bdbff81ce7f491145b2ea0d4091c71c2af7 Mon Sep 17 00:00:00 2001 From: Barry Grussling Date: Fri, 19 Jul 2013 14:46:12 -0700 Subject: [PATCH 0265/1368] usb: cp210x support SEL C662 Vendor/Device commit b579fa52f6be0b4157ca9cc5e94d44a2c89a7e95 upstream. This patch adds support for the Schweitzer Engineering Laboratories C662 USB cable based off the CP210x driver. Signed-off-by: Barry Grussling Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/cp210x.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/usb/serial/cp210x.c b/drivers/usb/serial/cp210x.c index dcaa724b4be9..c90d960e091b 100644 --- a/drivers/usb/serial/cp210x.c +++ b/drivers/usb/serial/cp210x.c @@ -151,6 +151,7 @@ static const struct usb_device_id id_table[] = { { USB_DEVICE(0x17F4, 0xAAAA) }, /* Wavesense Jazz blood glucose meter */ { USB_DEVICE(0x1843, 0x0200) }, /* Vaisala USB Instrument Cable */ { USB_DEVICE(0x18EF, 0xE00F) }, /* ELV USB-I2C-Interface */ + { USB_DEVICE(0x1ADB, 0x0001) }, /* Schweitzer Engineering C662 Cable */ { USB_DEVICE(0x1BE3, 0x07A6) }, /* WAGO 750-923 USB Service Cable */ { USB_DEVICE(0x1E29, 0x0102) }, /* Festo CPX-USB */ { USB_DEVICE(0x1E29, 0x0501) }, /* Festo CMSP */ From c07ae685f761d7b6850f502a34964005428c181a Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Mon, 15 Jul 2013 00:09:19 -0400 Subject: [PATCH 0266/1368] ext4: fix error handling in ext4_ext_truncate() commit 8acd5e9b1217e58a57124d9e225afa12efeae20d upstream. Previously ext4_ext_truncate() was ignoring potential error returns from ext4_es_remove_extent() and ext4_ext_remove_space(). This can lead to the on-diks extent tree and the extent status tree cache getting out of sync, which is particuarlly bad, and can lead to file system corruption and potential data loss. Signed-off-by: "Theodore Ts'o" Signed-off-by: Greg Kroah-Hartman --- fs/ext4/extents.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index e49da585739f..fddf3d957004 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -4386,9 +4386,20 @@ void ext4_ext_truncate(handle_t *handle, struct inode *inode) last_block = (inode->i_size + sb->s_blocksize - 1) >> EXT4_BLOCK_SIZE_BITS(sb); +retry: err = ext4_es_remove_extent(inode, last_block, EXT_MAX_BLOCKS - last_block); + if (err == ENOMEM) { + cond_resched(); + congestion_wait(BLK_RW_ASYNC, HZ/50); + goto retry; + } + if (err) { + ext4_std_error(inode->i_sb, err); + return; + } err = ext4_ext_remove_space(inode, last_block, EXT_MAX_BLOCKS - 1); + ext4_std_error(inode->i_sb, err); } static void ext4_falloc_update_inode(struct inode *inode, From 7b4fc5f531f64ce52e5f421c9bc285ccf30ccac8 Mon Sep 17 00:00:00 2001 From: Liu ShuoX Date: Thu, 11 Jul 2013 16:03:45 +0800 Subject: [PATCH 0267/1368] PM / Sleep: avoid 'autosleep' in shutdown progress commit e5248a111bf4048a9f3fab1a9c94c4630a10592a upstream. Prevent automatic system suspend from happening during system shutdown by making try_to_suspend() check system_state and return immediately if it is not SYSTEM_RUNNING. This prevents the following breakage from happening (scenario from Zhang Yanmin): Kernel starts shutdown and calls all device driver's shutdown callback. When a driver's shutdown is called, the last wakelock is released and suspend-to-ram starts. However, as some driver's shut down callbacks already shut down devices and disabled runtime pm, the suspend-to-ram calls driver's suspend callback without noticing that device is already off and causes crash. [rjw: Changelog] Signed-off-by: Liu ShuoX Signed-off-by: Rafael J. Wysocki Signed-off-by: Greg Kroah-Hartman --- kernel/power/autosleep.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/kernel/power/autosleep.c b/kernel/power/autosleep.c index c6422ffeda9a..9012ecf7b814 100644 --- a/kernel/power/autosleep.c +++ b/kernel/power/autosleep.c @@ -32,7 +32,8 @@ static void try_to_suspend(struct work_struct *work) mutex_lock(&autosleep_lock); - if (!pm_save_wakeup_count(initial_count)) { + if (!pm_save_wakeup_count(initial_count) || + system_state != SYSTEM_RUNNING) { mutex_unlock(&autosleep_lock); goto out; } From b534f8f9f56d5e800d4529f54e38be960fd064a2 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Thu, 11 Jul 2013 18:00:59 +0200 Subject: [PATCH 0268/1368] media: saa7134: Fix unlocked snd_pcm_stop() call commit e6355ad7b1c6f70e2f48ae159f5658b441ccff95 upstream. snd_pcm_stop() must be called in the PCM substream lock context. Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- drivers/media/pci/saa7134/saa7134-alsa.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/media/pci/saa7134/saa7134-alsa.c b/drivers/media/pci/saa7134/saa7134-alsa.c index 10460fd3ce39..dbcdfbf8aed0 100644 --- a/drivers/media/pci/saa7134/saa7134-alsa.c +++ b/drivers/media/pci/saa7134/saa7134-alsa.c @@ -172,7 +172,9 @@ static void saa7134_irq_alsa_done(struct saa7134_dev *dev, dprintk("irq: overrun [full=%d/%d] - Blocks in %d\n",dev->dmasound.read_count, dev->dmasound.bufsize, dev->dmasound.blocks); spin_unlock(&dev->slock); + snd_pcm_stream_lock(dev->dmasound.substream); snd_pcm_stop(dev->dmasound.substream,SNDRV_PCM_STATE_XRUN); + snd_pcm_stream_unlock(dev->dmasound.substream); return; } From 61eeaa1b66e47443f27f4cf0809940fbe08142f8 Mon Sep 17 00:00:00 2001 From: Soeren Moch Date: Wed, 5 Jun 2013 21:26:23 -0300 Subject: [PATCH 0269/1368] media: dmxdev: remove dvb_ringbuffer_flush() on writer side commit 414abbd2cd4c2618895f02ed3a76ec6647281436 upstream. In dvb_ringbuffer lock-less synchronizationof reader and writer threads is done with separateread and write pointers. Sincedvb_ringbuffer_flush() modifies the read pointer, this function must not be called from the writer thread. This patch removes the dvb_ringbuffer_flush() calls in the dmxdev ringbuffer write functions, this fixes Oopses "Unable to handle kernel paging request" I could observe for the call chaindvb_demux_read ->dvb_dmxdev_buffer_read -> dvb_ringbuffer_read_user -> __copy_to_user (the reader side of the ringbuffer). The flush calls at the write side are not necessary anyway since ringbuffer_flush is also called in dvb_dmxdev_buffer_read() when an error condition is set in the ringbuffer. This patch should also be applied to stable kernels. Signed-off-by: Soeren Moch Reviewed-by: Sakari Ailus Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Greg Kroah-Hartman --- drivers/media/dvb-core/dmxdev.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/drivers/media/dvb-core/dmxdev.c b/drivers/media/dvb-core/dmxdev.c index a1a3a5159d71..0b4616b87195 100644 --- a/drivers/media/dvb-core/dmxdev.c +++ b/drivers/media/dvb-core/dmxdev.c @@ -377,10 +377,8 @@ static int dvb_dmxdev_section_callback(const u8 *buffer1, size_t buffer1_len, ret = dvb_dmxdev_buffer_write(&dmxdevfilter->buffer, buffer2, buffer2_len); } - if (ret < 0) { - dvb_ringbuffer_flush(&dmxdevfilter->buffer); + if (ret < 0) dmxdevfilter->buffer.error = ret; - } if (dmxdevfilter->params.sec.flags & DMX_ONESHOT) dmxdevfilter->state = DMXDEV_STATE_DONE; spin_unlock(&dmxdevfilter->dev->lock); @@ -416,10 +414,8 @@ static int dvb_dmxdev_ts_callback(const u8 *buffer1, size_t buffer1_len, ret = dvb_dmxdev_buffer_write(buffer, buffer1, buffer1_len); if (ret == buffer1_len) ret = dvb_dmxdev_buffer_write(buffer, buffer2, buffer2_len); - if (ret < 0) { - dvb_ringbuffer_flush(buffer); + if (ret < 0) buffer->error = ret; - } spin_unlock(&dmxdevfilter->dev->lock); wake_up(&buffer->queue); return 0; From c4d98e2535e0269fcd513cbb79e937ef3ccc3d01 Mon Sep 17 00:00:00 2001 From: David Jeffery Date: Wed, 10 Jul 2013 13:19:50 -0400 Subject: [PATCH 0270/1368] lockd: protect nlm_blocked access in nlmsvc_retry_blocked commit 1c327d962fc420aea046c16215a552710bde8231 upstream. In nlmsvc_retry_blocked, the check that the list is non-empty and acquiring the pointer of the first entry is unprotected by any lock. This allows a rare race condition when there is only one entry on the list. A function such as nlmsvc_grant_callback() can be called, which will temporarily remove the entry from the list. Between the list_empty() and list_entry(),the list may become empty, causing an invalid pointer to be used as an nlm_block, leading to a possible crash. This patch adds the nlm_block_lock around these calls to prevent concurrent use of the nlm_blocked list. This was a regression introduced by f904be9cc77f361d37d71468b13ff3d1a1823dea "lockd: Mostly remove BKL from the server". Signed-off-by: David Jeffery Cc: Bryan Schumaker Signed-off-by: J. Bruce Fields Signed-off-by: Greg Kroah-Hartman --- fs/lockd/svclock.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/fs/lockd/svclock.c b/fs/lockd/svclock.c index e703318c41df..8ebd3f551e0c 100644 --- a/fs/lockd/svclock.c +++ b/fs/lockd/svclock.c @@ -939,6 +939,7 @@ nlmsvc_retry_blocked(void) unsigned long timeout = MAX_SCHEDULE_TIMEOUT; struct nlm_block *block; + spin_lock(&nlm_blocked_lock); while (!list_empty(&nlm_blocked) && !kthread_should_stop()) { block = list_entry(nlm_blocked.next, struct nlm_block, b_list); @@ -948,6 +949,7 @@ nlmsvc_retry_blocked(void) timeout = block->b_when - jiffies; break; } + spin_unlock(&nlm_blocked_lock); dprintk("nlmsvc_retry_blocked(%p, when=%ld)\n", block, block->b_when); @@ -957,7 +959,9 @@ nlmsvc_retry_blocked(void) retry_deferred_block(block); } else nlmsvc_grant_blocked(block); + spin_lock(&nlm_blocked_lock); } + spin_unlock(&nlm_blocked_lock); return timeout; } From 2d06fa0f825ec1d7943a3bcceecbd85aa82ef27d Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 5 Jul 2013 12:09:18 +0200 Subject: [PATCH 0271/1368] hrtimers: Move SMP function call to thread context commit 5ec2481b7b47a4005bb446d176e5d0257400c77d upstream. smp_call_function_* must not be called from softirq context. But clock_was_set() which calls on_each_cpu() is called from softirq context to implement a delayed clock_was_set() for the timer interrupt handler. Though that almost never gets invoked. A recent change in the resume code uses the softirq based delayed clock_was_set to support Xens resume mechanism. linux-next contains a new warning which warns if smp_call_function_* is called from softirq context which gets triggered by that Xen change. Fix this by moving the delayed clock_was_set() call to a work context. Reported-and-tested-by: Artem Savkov Reported-by: Sasha Levin Cc: David Vrabel Cc: Ingo Molnar Cc: H. Peter Anvin , Cc: Konrad Wilk Cc: John Stultz Cc: xen-devel@lists.xen.org Cc: stable@vger.kernel.org Signed-off-by: Thomas Gleixner Signed-off-by: Greg Kroah-Hartman --- kernel/hrtimer.c | 28 +++++++++++++--------------- 1 file changed, 13 insertions(+), 15 deletions(-) diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c index fd4b13b131f8..2288fbdada16 100644 --- a/kernel/hrtimer.c +++ b/kernel/hrtimer.c @@ -721,17 +721,20 @@ static int hrtimer_switch_to_hres(void) return 1; } +static void clock_was_set_work(struct work_struct *work) +{ + clock_was_set(); +} + +static DECLARE_WORK(hrtimer_work, clock_was_set_work); + /* - * Called from timekeeping code to reprogramm the hrtimer interrupt - * device. If called from the timer interrupt context we defer it to - * softirq context. + * Called from timekeeping and resume code to reprogramm the hrtimer + * interrupt device on all cpus. */ void clock_was_set_delayed(void) { - struct hrtimer_cpu_base *cpu_base = &__get_cpu_var(hrtimer_bases); - - cpu_base->clock_was_set = 1; - __raise_softirq_irqoff(HRTIMER_SOFTIRQ); + schedule_work(&hrtimer_work); } #else @@ -780,8 +783,10 @@ void hrtimers_resume(void) WARN_ONCE(!irqs_disabled(), KERN_INFO "hrtimers_resume() called with IRQs enabled!"); + /* Retrigger on the local CPU */ retrigger_next_event(NULL); - timerfd_clock_was_set(); + /* And schedule a retrigger for all others */ + clock_was_set_delayed(); } static inline void timer_stats_hrtimer_set_start_info(struct hrtimer *timer) @@ -1432,13 +1437,6 @@ void hrtimer_peek_ahead_timers(void) static void run_hrtimer_softirq(struct softirq_action *h) { - struct hrtimer_cpu_base *cpu_base = &__get_cpu_var(hrtimer_bases); - - if (cpu_base->clock_was_set) { - cpu_base->clock_was_set = 0; - clock_was_set(); - } - hrtimer_peek_ahead_timers(); } From 0d9230a39086b5f183fda1091eb078a67be01f05 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Fri, 19 Jul 2013 08:02:25 +0200 Subject: [PATCH 0272/1368] ALSA: hda - Remove NO_PRESENCE bit override for Dell 1420n Laptop commit f3e351eef3a7fd1e36a3e18d4f2f069b00deb23c upstream. The quirk for Dell laptops with STAC9228 overrides the pin default config of NID 0x0f to the value with AC_DEFCFG_MISC_NO_PRESENCE bit on. I'm not quite sure why this was done so, but can guess that this was introduced for avoiding this to be muted by another headphone plug. Now, after transition to the generic parser, this workaround rather causes a problem (notably as unexpected speaker mutes) because the pin is seen as if it's always plugged in. Since the generic parser can handle multiple headphone plugging gracefully, we can get rid of this override now. Reported-and-tested-by: Eric Shattow Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_sigmatel.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/pci/hda/patch_sigmatel.c b/sound/pci/hda/patch_sigmatel.c index 1d9d6427e0bf..486df8a867c8 100644 --- a/sound/pci/hda/patch_sigmatel.c +++ b/sound/pci/hda/patch_sigmatel.c @@ -3227,7 +3227,7 @@ static const struct hda_fixup stac927x_fixups[] = { /* configure the analog microphone on some laptops */ { 0x0c, 0x90a79130 }, /* correct the front output jack as a hp out */ - { 0x0f, 0x0227011f }, + { 0x0f, 0x0221101f }, /* correct the front input jack as a mic */ { 0x0e, 0x02a79130 }, {} From 5a0ecab6b7f55d7b2162e95c396ce778f2f2154c Mon Sep 17 00:00:00 2001 From: Eldad Zack Date: Fri, 19 Jul 2013 18:26:53 +0200 Subject: [PATCH 0273/1368] ALSA: usb-audio: 6fire: return correct XRUN indication commit be2f93a4c4981b3646b6f98f477154411b8516cb upstream. Return SNDRV_PCM_POS_XRUN (snd_pcm_uframes_t) instead of SNDRV_PCM_STATE_XRUN (snd_pcm_state_t) from the pointer function of 6fire, as expected by snd_pcm_update_hw_ptr0(). Caught by sparse. Signed-off-by: Eldad Zack Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/usb/6fire/pcm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/usb/6fire/pcm.c b/sound/usb/6fire/pcm.c index 8221ff2f209f..074aaf7a36db 100644 --- a/sound/usb/6fire/pcm.c +++ b/sound/usb/6fire/pcm.c @@ -543,7 +543,7 @@ static snd_pcm_uframes_t usb6fire_pcm_pointer( snd_pcm_uframes_t ret; if (rt->panic || !sub) - return SNDRV_PCM_STATE_XRUN; + return SNDRV_PCM_POS_XRUN; spin_lock_irqsave(&sub->lock, flags); ret = sub->dma_off; From 9c563a54c85bcd531cc671d9575d4dff64852124 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Fri, 19 Jul 2013 07:58:02 +0200 Subject: [PATCH 0274/1368] ALSA: hda - Fix EAPD GPIO control for Sigmatel codecs commit 1ea9a69d1a36a5b62bf281ba8bb304fcac656dad upstream. The EAPD GPIO is dynamically turned on/off for some machines with Sigmatel codecs, but this didn't work as expected, and it resulted in spontaneous lost of speaker outputs per HP plugging or power-saving. This patch fixes the bug by simply including spec->eapd_mask into spec->gpio_mask and spec->gpio_data bits. Reported-and-tested-by: Eric Shattow Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_sigmatel.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/sound/pci/hda/patch_sigmatel.c b/sound/pci/hda/patch_sigmatel.c index 486df8a867c8..e849e1e0d7d9 100644 --- a/sound/pci/hda/patch_sigmatel.c +++ b/sound/pci/hda/patch_sigmatel.c @@ -417,9 +417,11 @@ static void stac_update_outputs(struct hda_codec *codec) val &= ~spec->eapd_mask; else val |= spec->eapd_mask; - if (spec->gpio_data != val) + if (spec->gpio_data != val) { + spec->gpio_data = val; stac_gpio_set(codec, spec->gpio_mask, spec->gpio_dir, val); + } } } @@ -3608,20 +3610,18 @@ static int stac_parse_auto_config(struct hda_codec *codec) static int stac_init(struct hda_codec *codec) { struct sigmatel_spec *spec = codec->spec; - unsigned int gpio; int i; /* override some hints */ stac_store_hints(codec); /* set up GPIO */ - gpio = spec->gpio_data; /* turn on EAPD statically when spec->eapd_switch isn't set. * otherwise, unsol event will turn it on/off dynamically */ if (!spec->eapd_switch) - gpio |= spec->eapd_mask; - stac_gpio_set(codec, spec->gpio_mask, spec->gpio_dir, gpio); + spec->gpio_data |= spec->eapd_mask; + stac_gpio_set(codec, spec->gpio_mask, spec->gpio_dir, spec->gpio_data); snd_hda_gen_init(codec); @@ -3921,6 +3921,7 @@ static void stac_setup_gpio(struct hda_codec *codec) { struct sigmatel_spec *spec = codec->spec; + spec->gpio_mask |= spec->eapd_mask; if (spec->gpio_led) { if (!spec->vref_mute_led_nid) { spec->gpio_mask |= spec->gpio_led; From 6f7bb6bafb3153e9b9383f3f44a0f23fc00318d9 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Sun, 28 Jul 2013 16:30:49 -0700 Subject: [PATCH 0275/1368] Linux 3.10.4 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index b5485529b6bf..b4df9b20c49f 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,6 @@ VERSION = 3 PATCHLEVEL = 10 -SUBLEVEL = 3 +SUBLEVEL = 4 EXTRAVERSION = NAME = Unicycling Gorilla From 78077c226f0ab20fcd0a85a8f0de42b96973382b Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Wed, 3 Jul 2013 15:03:31 -0700 Subject: [PATCH 0276/1368] mm: fix the TLB range flushed when __tlb_remove_page() runs out of slots commit e6c495a96ce02574e765d5140039a64c8d4e8c9e upstream. zap_pte_range loops from @addr to @end. In the middle, if it runs out of batching slots, TLB entries needs to be flushed for @start to @interim, NOT @interim to @end. Since ARC port doesn't use page free batching I can't test it myself but this seems like the right thing to do. Observed this when working on a fix for the issue at thread: http://www.spinics.net/lists/linux-arch/msg21736.html Signed-off-by: Vineet Gupta Cc: Mel Gorman Cc: Hugh Dickins Cc: Rik van Riel Cc: David Rientjes Cc: Peter Zijlstra Acked-by: Catalin Marinas Cc: Max Filippov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- mm/memory.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/mm/memory.c b/mm/memory.c index 61a262b08e53..5e5080005bc4 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -1101,6 +1101,7 @@ static unsigned long zap_pte_range(struct mmu_gather *tlb, spinlock_t *ptl; pte_t *start_pte; pte_t *pte; + unsigned long range_start = addr; again: init_rss_vec(rss); @@ -1206,12 +1207,14 @@ static unsigned long zap_pte_range(struct mmu_gather *tlb, force_flush = 0; #ifdef HAVE_GENERIC_MMU_GATHER - tlb->start = addr; - tlb->end = end; + tlb->start = range_start; + tlb->end = addr; #endif tlb_flush_mmu(tlb); - if (addr != end) + if (addr != end) { + range_start = addr; goto again; + } } return addr; From fff98879381243f59b6e9f630d55312aa34a89c9 Mon Sep 17 00:00:00 2001 From: Nicholas Bellinger Date: Wed, 26 Jun 2013 02:31:42 -0700 Subject: [PATCH 0277/1368] iser-target: Fix isert_put_reject payload buffer post commit 3df8f68aaf7ebe3d136a22262b41b350b0a1858b upstream. This patch adds the missing isert_put_reject() logic to post a outgoing payload buffer to hold the 48 bytes of original PDU header request payload for the rejected cmd. It also fixes ISTATE_SEND_REJECT handling in isert_response_completion() -> isert_do_control_comp() code, and drops incorrect iscsi_cmd_t->reject_comp usage. Signed-off-by: Nicholas Bellinger Cc: Or Gerlitz Cc: Mike Christie Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/ulp/isert/ib_isert.c | 26 +++++++++++++++++++++---- 1 file changed, 22 insertions(+), 4 deletions(-) diff --git a/drivers/infiniband/ulp/isert/ib_isert.c b/drivers/infiniband/ulp/isert/ib_isert.c index 41712f096515..6559febd255e 100644 --- a/drivers/infiniband/ulp/isert/ib_isert.c +++ b/drivers/infiniband/ulp/isert/ib_isert.c @@ -1222,6 +1222,9 @@ isert_put_cmd(struct isert_cmd *isert_cmd) * associated cmd->se_cmd needs to be released. */ if (cmd->se_cmd.se_tfo != NULL) { + pr_debug("Calling transport_generic_free_cmd from" + " isert_put_cmd for 0x%02x\n", + cmd->iscsi_opcode); transport_generic_free_cmd(&cmd->se_cmd, 0); break; } @@ -1318,8 +1321,8 @@ isert_do_control_comp(struct work_struct *work) atomic_dec(&isert_conn->post_send_buf_count); cmd->i_state = ISTATE_SENT_STATUS; - complete(&cmd->reject_comp); isert_completion_put(&isert_cmd->tx_desc, isert_cmd, ib_dev); + break; case ISTATE_SEND_LOGOUTRSP: pr_debug("Calling iscsit_logout_post_handler >>>>>>>>>>>>>>\n"); /* @@ -1345,7 +1348,8 @@ isert_response_completion(struct iser_tx_desc *tx_desc, struct iscsi_cmd *cmd = &isert_cmd->iscsi_cmd; if (cmd->i_state == ISTATE_SEND_TASKMGTRSP || - cmd->i_state == ISTATE_SEND_LOGOUTRSP) { + cmd->i_state == ISTATE_SEND_LOGOUTRSP || + cmd->i_state == ISTATE_SEND_REJECT) { isert_unmap_tx_desc(tx_desc, ib_dev); INIT_WORK(&isert_cmd->comp_work, isert_do_control_comp); @@ -1637,11 +1641,25 @@ isert_put_reject(struct iscsi_cmd *cmd, struct iscsi_conn *conn) struct isert_cmd, iscsi_cmd); struct isert_conn *isert_conn = (struct isert_conn *)conn->context; struct ib_send_wr *send_wr = &isert_cmd->tx_desc.send_wr; + struct ib_device *ib_dev = isert_conn->conn_cm_id->device; + struct ib_sge *tx_dsg = &isert_cmd->tx_desc.tx_sg[1]; + struct iscsi_reject *hdr = + (struct iscsi_reject *)&isert_cmd->tx_desc.iscsi_header; isert_create_send_desc(isert_conn, isert_cmd, &isert_cmd->tx_desc); - iscsit_build_reject(cmd, conn, (struct iscsi_reject *) - &isert_cmd->tx_desc.iscsi_header); + iscsit_build_reject(cmd, conn, hdr); isert_init_tx_hdrs(isert_conn, &isert_cmd->tx_desc); + + hton24(hdr->dlength, ISCSI_HDR_LEN); + isert_cmd->sense_buf_dma = ib_dma_map_single(ib_dev, + (void *)cmd->buf_ptr, ISCSI_HDR_LEN, + DMA_TO_DEVICE); + isert_cmd->sense_buf_len = ISCSI_HDR_LEN; + tx_dsg->addr = isert_cmd->sense_buf_dma; + tx_dsg->length = ISCSI_HDR_LEN; + tx_dsg->lkey = isert_conn->conn_mr->lkey; + isert_cmd->tx_desc.num_sge = 2; + isert_init_send_wr(isert_cmd, send_wr); pr_debug("Posting Reject IB_WR_SEND >>>>>>>>>>>>>>>>>>>>>>\n"); From a5d56a2217664c7bc06cb7deeac9a2a155ba4345 Mon Sep 17 00:00:00 2001 From: Joern Engel Date: Wed, 3 Jul 2013 11:35:11 -0400 Subject: [PATCH 0278/1368] iscsi-target: Fix tfc_tpg_nacl_auth_cit configfs length overflow commit 0fbfc46fb0b2f543a8b539e94c6c293ebc0b05a6 upstream. This patch fixes a potential buffer overflow while processing iscsi_node_auth input for configfs attributes within NodeACL tfc_tpg_nacl_auth_cit context. Signed-off-by: Joern Engel Signed-off-by: Nicholas Bellinger Signed-off-by: Greg Kroah-Hartman --- drivers/target/iscsi/iscsi_target_configfs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/target/iscsi/iscsi_target_configfs.c b/drivers/target/iscsi/iscsi_target_configfs.c index 8d8b3ff68490..421344da8b51 100644 --- a/drivers/target/iscsi/iscsi_target_configfs.c +++ b/drivers/target/iscsi/iscsi_target_configfs.c @@ -474,7 +474,7 @@ static ssize_t __iscsi_##prefix##_store_##name( \ if (!capable(CAP_SYS_ADMIN)) \ return -EPERM; \ \ - snprintf(auth->name, PAGE_SIZE, "%s", page); \ + snprintf(auth->name, sizeof(auth->name), "%s", page); \ if (!strncmp("NULL", auth->name, 4)) \ auth->naf_flags &= ~flags; \ else \ From d9e507c05ca19ad2ec166577edd8b47e17c8961e Mon Sep 17 00:00:00 2001 From: Nicholas Bellinger Date: Wed, 3 Jul 2013 03:05:37 -0700 Subject: [PATCH 0279/1368] iser-target: Fix session reset bug with RDMA_CM_EVENT_DISCONNECTED commit b2cb96494d83b894a43ba8b9023eead8ff50684b upstream. This patch addresses a bug where RDMA_CM_EVENT_DISCONNECTED may occur before the connection shutdown has been completed by rx/tx threads, that causes isert_free_conn() to wait indefinately on ->conn_wait. This patch allows isert_disconnect_work code to invoke rdma_disconnect when isert_disconnect_work() process context is started by client session reset before isert_free_conn() code has been reached. It also adds isert_conn->conn_mutex protection for ->state within isert_disconnect_work(), isert_cq_comp_err() and isert_free_conn() code, along with isert_check_state() for wait_event usage. (v2: Add explicit iscsit_cause_connection_reinstatement call during isert_disconnect_work() to force conn reset) Cc: Or Gerlitz Signed-off-by: Nicholas Bellinger Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/ulp/isert/ib_isert.c | 74 ++++++++++++++++++++---- drivers/infiniband/ulp/isert/ib_isert.h | 1 + drivers/target/iscsi/iscsi_target_erl0.c | 1 + include/target/iscsi/iscsi_transport.h | 4 ++ 4 files changed, 68 insertions(+), 12 deletions(-) diff --git a/drivers/infiniband/ulp/isert/ib_isert.c b/drivers/infiniband/ulp/isert/ib_isert.c index 6559febd255e..6b20a9d09cbb 100644 --- a/drivers/infiniband/ulp/isert/ib_isert.c +++ b/drivers/infiniband/ulp/isert/ib_isert.c @@ -388,6 +388,7 @@ isert_connect_request(struct rdma_cm_id *cma_id, struct rdma_cm_event *event) init_waitqueue_head(&isert_conn->conn_wait_comp_err); kref_init(&isert_conn->conn_kref); kref_get(&isert_conn->conn_kref); + mutex_init(&isert_conn->conn_mutex); cma_id->context = isert_conn; isert_conn->conn_cm_id = cma_id; @@ -540,15 +541,32 @@ isert_disconnect_work(struct work_struct *work) struct isert_conn, conn_logout_work); pr_debug("isert_disconnect_work(): >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>\n"); - + mutex_lock(&isert_conn->conn_mutex); isert_conn->state = ISER_CONN_DOWN; if (isert_conn->post_recv_buf_count == 0 && atomic_read(&isert_conn->post_send_buf_count) == 0) { pr_debug("Calling wake_up(&isert_conn->conn_wait);\n"); - wake_up(&isert_conn->conn_wait); + mutex_unlock(&isert_conn->conn_mutex); + goto wake_up; } + if (!isert_conn->conn_cm_id) { + mutex_unlock(&isert_conn->conn_mutex); + isert_put_conn(isert_conn); + return; + } + if (!isert_conn->logout_posted) { + pr_debug("Calling rdma_disconnect for !logout_posted from" + " isert_disconnect_work\n"); + rdma_disconnect(isert_conn->conn_cm_id); + mutex_unlock(&isert_conn->conn_mutex); + iscsit_cause_connection_reinstatement(isert_conn->conn, 0); + goto wake_up; + } + mutex_unlock(&isert_conn->conn_mutex); +wake_up: + wake_up(&isert_conn->conn_wait); isert_put_conn(isert_conn); } @@ -1423,7 +1441,11 @@ isert_cq_comp_err(struct iser_tx_desc *tx_desc, struct isert_conn *isert_conn) pr_debug("isert_cq_comp_err >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>\n"); pr_debug("Calling wake_up from isert_cq_comp_err\n"); - isert_conn->state = ISER_CONN_TERMINATING; + mutex_lock(&isert_conn->conn_mutex); + if (isert_conn->state != ISER_CONN_DOWN) + isert_conn->state = ISER_CONN_TERMINATING; + mutex_unlock(&isert_conn->conn_mutex); + wake_up(&isert_conn->conn_wait_comp_err); } } @@ -2193,6 +2215,17 @@ isert_free_np(struct iscsi_np *np) kfree(isert_np); } +static int isert_check_state(struct isert_conn *isert_conn, int state) +{ + int ret; + + mutex_lock(&isert_conn->conn_mutex); + ret = (isert_conn->state == state); + mutex_unlock(&isert_conn->conn_mutex); + + return ret; +} + static void isert_free_conn(struct iscsi_conn *conn) { struct isert_conn *isert_conn = conn->context; @@ -2202,26 +2235,43 @@ static void isert_free_conn(struct iscsi_conn *conn) * Decrement post_send_buf_count for special case when called * from isert_do_control_comp() -> iscsit_logout_post_handler() */ + mutex_lock(&isert_conn->conn_mutex); if (isert_conn->logout_posted) atomic_dec(&isert_conn->post_send_buf_count); - if (isert_conn->conn_cm_id) + if (isert_conn->conn_cm_id && isert_conn->state != ISER_CONN_DOWN) { + pr_debug("Calling rdma_disconnect from isert_free_conn\n"); rdma_disconnect(isert_conn->conn_cm_id); + } /* * Only wait for conn_wait_comp_err if the isert_conn made it * into full feature phase.. */ - if (isert_conn->state > ISER_CONN_INIT) { + if (isert_conn->state == ISER_CONN_UP) { pr_debug("isert_free_conn: Before wait_event comp_err %d\n", isert_conn->state); - wait_event(isert_conn->conn_wait_comp_err, - isert_conn->state == ISER_CONN_TERMINATING); - pr_debug("isert_free_conn: After wait_event #1 >>>>>>>>>>>>\n"); - } + mutex_unlock(&isert_conn->conn_mutex); - pr_debug("isert_free_conn: wait_event conn_wait %d\n", isert_conn->state); - wait_event(isert_conn->conn_wait, isert_conn->state == ISER_CONN_DOWN); - pr_debug("isert_free_conn: After wait_event #2 >>>>>>>>>>>>>>>>>>>>\n"); + wait_event(isert_conn->conn_wait_comp_err, + (isert_check_state(isert_conn, ISER_CONN_TERMINATING))); + + wait_event(isert_conn->conn_wait, + (isert_check_state(isert_conn, ISER_CONN_DOWN))); + + isert_put_conn(isert_conn); + return; + } + if (isert_conn->state == ISER_CONN_INIT) { + mutex_unlock(&isert_conn->conn_mutex); + isert_put_conn(isert_conn); + return; + } + pr_debug("isert_free_conn: wait_event conn_wait %d\n", + isert_conn->state); + mutex_unlock(&isert_conn->conn_mutex); + + wait_event(isert_conn->conn_wait, + (isert_check_state(isert_conn, ISER_CONN_DOWN))); isert_put_conn(isert_conn); } diff --git a/drivers/infiniband/ulp/isert/ib_isert.h b/drivers/infiniband/ulp/isert/ib_isert.h index b104f4c2cd38..5795c82a2306 100644 --- a/drivers/infiniband/ulp/isert/ib_isert.h +++ b/drivers/infiniband/ulp/isert/ib_isert.h @@ -102,6 +102,7 @@ struct isert_conn { struct ib_qp *conn_qp; struct isert_device *conn_device; struct work_struct conn_logout_work; + struct mutex conn_mutex; wait_queue_head_t conn_wait; wait_queue_head_t conn_wait_comp_err; struct kref conn_kref; diff --git a/drivers/target/iscsi/iscsi_target_erl0.c b/drivers/target/iscsi/iscsi_target_erl0.c index dcb199da06b9..7f1116635d17 100644 --- a/drivers/target/iscsi/iscsi_target_erl0.c +++ b/drivers/target/iscsi/iscsi_target_erl0.c @@ -909,6 +909,7 @@ void iscsit_cause_connection_reinstatement(struct iscsi_conn *conn, int sleep) wait_for_completion(&conn->conn_wait_comp); complete(&conn->conn_post_wait_comp); } +EXPORT_SYMBOL(iscsit_cause_connection_reinstatement); void iscsit_fall_back_to_erl0(struct iscsi_session *sess) { diff --git a/include/target/iscsi/iscsi_transport.h b/include/target/iscsi/iscsi_transport.h index 23a87d0cd72c..6db632eb3821 100644 --- a/include/target/iscsi/iscsi_transport.h +++ b/include/target/iscsi/iscsi_transport.h @@ -66,6 +66,10 @@ extern int iscsit_logout_post_handler(struct iscsi_cmd *, struct iscsi_conn *); * From iscsi_target_device.c */ extern void iscsit_increment_maxcmdsn(struct iscsi_cmd *, struct iscsi_session *); +/* + * From iscsi_target_erl0.c + */ +extern void iscsit_cause_connection_reinstatement(struct iscsi_conn *, int); /* * From iscsi_target_erl1.c */ From c6ccbb9b6e24e35d71fd8706dfc5a8eab07f6937 Mon Sep 17 00:00:00 2001 From: Nicholas Bellinger Date: Wed, 3 Jul 2013 03:11:48 -0700 Subject: [PATCH 0280/1368] iscsi-target: Fix ISCSI_OP_SCSI_TMFUNC handling for iser commit 186a9647019587b3784694894c4d136fd00cfd7b upstream. This patch adds target_get_sess_cmd reference counting for iscsit_handle_task_mgt_cmd(), and adds a target_put_sess_cmd() for the failure case. It also fixes a bug where ISCSI_OP_SCSI_TMFUNC type commands where leaking iscsi_cmd->i_conn_node and eventually triggering an OOPs during struct isert_conn shutdown. Signed-off-by: Nicholas Bellinger Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/ulp/isert/ib_isert.c | 16 ++++++++-------- drivers/target/iscsi/iscsi_target.c | 12 ++++++++++-- 2 files changed, 18 insertions(+), 10 deletions(-) diff --git a/drivers/infiniband/ulp/isert/ib_isert.c b/drivers/infiniband/ulp/isert/ib_isert.c index 6b20a9d09cbb..bfc21798bd5e 100644 --- a/drivers/infiniband/ulp/isert/ib_isert.c +++ b/drivers/infiniband/ulp/isert/ib_isert.c @@ -1202,14 +1202,12 @@ isert_put_cmd(struct isert_cmd *isert_cmd) { struct iscsi_cmd *cmd = &isert_cmd->iscsi_cmd; struct isert_conn *isert_conn = isert_cmd->conn; - struct iscsi_conn *conn; + struct iscsi_conn *conn = isert_conn->conn; pr_debug("Entering isert_put_cmd: %p\n", isert_cmd); switch (cmd->iscsi_opcode) { case ISCSI_OP_SCSI_CMD: - conn = isert_conn->conn; - spin_lock_bh(&conn->cmd_lock); if (!list_empty(&cmd->i_conn_node)) list_del(&cmd->i_conn_node); @@ -1219,16 +1217,18 @@ isert_put_cmd(struct isert_cmd *isert_cmd) iscsit_stop_dataout_timer(cmd); isert_unmap_cmd(isert_cmd, isert_conn); - /* - * Fall-through - */ + transport_generic_free_cmd(&cmd->se_cmd, 0); + break; case ISCSI_OP_SCSI_TMFUNC: + spin_lock_bh(&conn->cmd_lock); + if (!list_empty(&cmd->i_conn_node)) + list_del(&cmd->i_conn_node); + spin_unlock_bh(&conn->cmd_lock); + transport_generic_free_cmd(&cmd->se_cmd, 0); break; case ISCSI_OP_REJECT: case ISCSI_OP_NOOP_OUT: - conn = isert_conn->conn; - spin_lock_bh(&conn->cmd_lock); if (!list_empty(&cmd->i_conn_node)) list_del(&cmd->i_conn_node); diff --git a/drivers/target/iscsi/iscsi_target.c b/drivers/target/iscsi/iscsi_target.c index d7705e5824fb..f1db6ee71006 100644 --- a/drivers/target/iscsi/iscsi_target.c +++ b/drivers/target/iscsi/iscsi_target.c @@ -1757,8 +1757,8 @@ iscsit_handle_task_mgt_cmd(struct iscsi_conn *conn, struct iscsi_cmd *cmd, struct se_tmr_req *se_tmr; struct iscsi_tmr_req *tmr_req; struct iscsi_tm *hdr; - int out_of_order_cmdsn = 0; - int ret; + int out_of_order_cmdsn = 0, ret; + bool sess_ref = false; u8 function; hdr = (struct iscsi_tm *) buf; @@ -1814,6 +1814,9 @@ iscsit_handle_task_mgt_cmd(struct iscsi_conn *conn, struct iscsi_cmd *cmd, conn->sess->se_sess, 0, DMA_NONE, MSG_SIMPLE_TAG, cmd->sense_buffer + 2); + target_get_sess_cmd(conn->sess->se_sess, &cmd->se_cmd, true); + sess_ref = true; + switch (function) { case ISCSI_TM_FUNC_ABORT_TASK: tcm_function = TMR_ABORT_TASK; @@ -1956,6 +1959,11 @@ iscsit_handle_task_mgt_cmd(struct iscsi_conn *conn, struct iscsi_cmd *cmd, * For connection recovery, this is also the default action for * TMR TASK_REASSIGN. */ + if (sess_ref) { + pr_debug("Handle TMR, using sess_ref=true check\n"); + target_put_sess_cmd(conn->sess->se_sess, &cmd->se_cmd); + } + iscsit_add_cmd_to_response_queue(cmd, conn, cmd->i_state); return 0; } From e1d2c40df6b0caed73187254e7be9af87cc98850 Mon Sep 17 00:00:00 2001 From: Ren Bigcren Date: Tue, 2 Jul 2013 13:34:30 +0200 Subject: [PATCH 0281/1368] USB: storage: Add MicroVault Flash Drive to unusual_devs commit e7a6121f4929c17215f0cdca3726f4bf3e4e9529 upstream. The device report an error capacity when read_capacity_16(). Using read_capacity_10() can get the correct capacity. Signed-off-by: Ren Bigcren Cc: Matthew Dharm Signed-off-by: Oskar Andero Signed-off-by: Greg Kroah-Hartman --- drivers/usb/storage/unusual_devs.h | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/usb/storage/unusual_devs.h b/drivers/usb/storage/unusual_devs.h index 1799335288bd..c015f2c16729 100644 --- a/drivers/usb/storage/unusual_devs.h +++ b/drivers/usb/storage/unusual_devs.h @@ -665,6 +665,13 @@ UNUSUAL_DEV( 0x054c, 0x016a, 0x0000, 0x9999, USB_SC_DEVICE, USB_PR_DEVICE, NULL, US_FL_FIX_INQUIRY ), +/* Submitted by Ren Bigcren */ +UNUSUAL_DEV( 0x054c, 0x02a5, 0x0100, 0x0100, + "Sony Corp.", + "MicroVault Flash Drive", + USB_SC_DEVICE, USB_PR_DEVICE, NULL, + US_FL_NO_READ_CAPACITY_16 ), + /* floppy reports multiple luns */ UNUSUAL_DEV( 0x055d, 0x2020, 0x0000, 0x0210, "SAMSUNG", From 49a845673d47cddd320ba9bd0d746c071d5ad39a Mon Sep 17 00:00:00 2001 From: David Henningsson Date: Fri, 28 Jun 2013 08:53:34 +0200 Subject: [PATCH 0282/1368] ALSA: hda - Yet another Dell headset mic quirk commit 6c29d68a82ec68db18241b818d03e7864c052be9 upstream. This quirk is needed for the headset mic to work on this Dell machine. BugLink: https://bugs.launchpad.net/bugs/1195597 Signed-off-by: David Henningsson Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_realtek.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 403010c9e82e..8e437841214c 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -3498,6 +3498,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1028, 0x0606, "Dell", ALC269_FIXUP_DELL1_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1028, 0x0608, "Dell", ALC269_FIXUP_DELL1_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1028, 0x0609, "Dell", ALC269_FIXUP_DELL1_MIC_NO_PRESENCE), + SND_PCI_QUIRK(0x1028, 0x0613, "Dell", ALC269_FIXUP_DELL1_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x103c, 0x1586, "HP", ALC269_FIXUP_HP_MUTE_LED_MIC2), SND_PCI_QUIRK(0x103c, 0x18e6, "HP", ALC269_FIXUP_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x1973, "HP Pavilion", ALC269_FIXUP_HP_MUTE_LED_MIC1), From 6d2c6593cf6592707c68e95f43982adbe573c6ac Mon Sep 17 00:00:00 2001 From: David Henningsson Date: Fri, 28 Jun 2013 11:09:56 +0200 Subject: [PATCH 0283/1368] ALSA: hda - Guess what, it's two more Dell headset mic quirks commit cd6fb6793a33e2b02af6e05a8d3f735f7c88a943 upstream. Add two more machines that need quirks for headset mics to work. Tested-by: Shawn Wang BugLink: https://bugs.launchpad.net/bugs/1195636 Signed-off-by: David Henningsson Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_realtek.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 8e437841214c..051c03d5337d 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -3495,6 +3495,8 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1028, 0x05f5, "Dell", ALC269_FIXUP_DELL1_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1028, 0x05f6, "Dell", ALC269_FIXUP_DELL1_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1028, 0x05f8, "Dell", ALC269_FIXUP_DELL1_MIC_NO_PRESENCE), + SND_PCI_QUIRK(0x1028, 0x05f9, "Dell", ALC269_FIXUP_DELL1_MIC_NO_PRESENCE), + SND_PCI_QUIRK(0x1028, 0x05fb, "Dell", ALC269_FIXUP_DELL1_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1028, 0x0606, "Dell", ALC269_FIXUP_DELL1_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1028, 0x0608, "Dell", ALC269_FIXUP_DELL1_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1028, 0x0609, "Dell", ALC269_FIXUP_DELL1_MIC_NO_PRESENCE), From f8c974f6fbdef40d37f957292f3238552a8d3598 Mon Sep 17 00:00:00 2001 From: Clemens Ladisch Date: Mon, 22 Jul 2013 21:32:09 +0200 Subject: [PATCH 0284/1368] firewire: fix libdc1394/FlyCap2 iso event regression commit 0699a73af3811b66b1ab5650575acee5eea841ab upstream. Commit 18d627113b83 (firewire: prevent dropping of completed iso packet header data) was intended to be an obvious bug fix, but libdc1394 and FlyCap2 depend on the old behaviour by ignoring all returned information and thus not noticing that not all packets have been received yet. The result was that the video frame buffers would be saved before they contained the correct data. Reintroduce the old behaviour for old clients. Tested-by: Stepan Salenikovich Tested-by: Josep Bosch Signed-off-by: Clemens Ladisch Signed-off-by: Stefan Richter Signed-off-by: Greg Kroah-Hartman --- drivers/firewire/core-cdev.c | 3 +++ drivers/firewire/ohci.c | 10 ++++++++-- include/linux/firewire.h | 1 + include/uapi/linux/firewire-cdev.h | 4 ++-- 4 files changed, 14 insertions(+), 4 deletions(-) diff --git a/drivers/firewire/core-cdev.c b/drivers/firewire/core-cdev.c index 7ef316fdc4d9..ac1b43a04285 100644 --- a/drivers/firewire/core-cdev.c +++ b/drivers/firewire/core-cdev.c @@ -54,6 +54,7 @@ #define FW_CDEV_KERNEL_VERSION 5 #define FW_CDEV_VERSION_EVENT_REQUEST2 4 #define FW_CDEV_VERSION_ALLOCATE_REGION_END 4 +#define FW_CDEV_VERSION_AUTO_FLUSH_ISO_OVERFLOW 5 struct client { u32 version; @@ -1005,6 +1006,8 @@ static int ioctl_create_iso_context(struct client *client, union ioctl_arg *arg) a->channel, a->speed, a->header_size, cb, client); if (IS_ERR(context)) return PTR_ERR(context); + if (client->version < FW_CDEV_VERSION_AUTO_FLUSH_ISO_OVERFLOW) + context->drop_overflow_headers = true; /* We only support one context at this time. */ spin_lock_irq(&client->lock); diff --git a/drivers/firewire/ohci.c b/drivers/firewire/ohci.c index 9e1db6490b9a..afb701ec90ca 100644 --- a/drivers/firewire/ohci.c +++ b/drivers/firewire/ohci.c @@ -2749,8 +2749,11 @@ static void copy_iso_headers(struct iso_context *ctx, const u32 *dma_hdr) { u32 *ctx_hdr; - if (ctx->header_length + ctx->base.header_size > PAGE_SIZE) + if (ctx->header_length + ctx->base.header_size > PAGE_SIZE) { + if (ctx->base.drop_overflow_headers) + return; flush_iso_completions(ctx); + } ctx_hdr = ctx->header + ctx->header_length; ctx->last_timestamp = (u16)le32_to_cpu((__force __le32)dma_hdr[0]); @@ -2910,8 +2913,11 @@ static int handle_it_packet(struct context *context, sync_it_packet_for_cpu(context, d); - if (ctx->header_length + 4 > PAGE_SIZE) + if (ctx->header_length + 4 > PAGE_SIZE) { + if (ctx->base.drop_overflow_headers) + return 1; flush_iso_completions(ctx); + } ctx_hdr = ctx->header + ctx->header_length; ctx->last_timestamp = le16_to_cpu(last->res_count); diff --git a/include/linux/firewire.h b/include/linux/firewire.h index 191501afd7fb..217e4b42b7c8 100644 --- a/include/linux/firewire.h +++ b/include/linux/firewire.h @@ -434,6 +434,7 @@ struct fw_iso_context { int type; int channel; int speed; + bool drop_overflow_headers; size_t header_size; union { fw_iso_callback_t sc; diff --git a/include/uapi/linux/firewire-cdev.h b/include/uapi/linux/firewire-cdev.h index d50036953497..1db453e4b550 100644 --- a/include/uapi/linux/firewire-cdev.h +++ b/include/uapi/linux/firewire-cdev.h @@ -215,8 +215,8 @@ struct fw_cdev_event_request2 { * with the %FW_CDEV_ISO_INTERRUPT bit set, when explicitly requested with * %FW_CDEV_IOC_FLUSH_ISO, or when there have been so many completed packets * without the interrupt bit set that the kernel's internal buffer for @header - * is about to overflow. (In the last case, kernels with ABI version < 5 drop - * header data up to the next interrupt packet.) + * is about to overflow. (In the last case, ABI versions < 5 drop header data + * up to the next interrupt packet.) * * Isochronous transmit events (context type %FW_CDEV_ISO_CONTEXT_TRANSMIT): * From ed44e4d734910894c6d822d5dfbbb800fa2238ba Mon Sep 17 00:00:00 2001 From: Fabio Estevam Date: Thu, 4 Jul 2013 20:01:02 -0300 Subject: [PATCH 0285/1368] ASoC: sglt5000: Fix the default value of CHIP_SSS_CTRL commit 016fcab8ff46fca29375d484226ec91932aa4a07 upstream. According to the sgtl5000 reference manual, the default value of CHIP_SSS_CTRL is 0x10. Reported-by: Oskar Schirmer Signed-off-by: Fabio Estevam Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- sound/soc/codecs/sgtl5000.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/codecs/sgtl5000.c b/sound/soc/codecs/sgtl5000.c index 92bbfec9b107..ea479388fb5c 100644 --- a/sound/soc/codecs/sgtl5000.c +++ b/sound/soc/codecs/sgtl5000.c @@ -37,7 +37,7 @@ static const u16 sgtl5000_regs[SGTL5000_MAX_REG_OFFSET] = { [SGTL5000_CHIP_CLK_CTRL] = 0x0008, [SGTL5000_CHIP_I2S_CTRL] = 0x0010, - [SGTL5000_CHIP_SSS_CTRL] = 0x0008, + [SGTL5000_CHIP_SSS_CTRL] = 0x0010, [SGTL5000_CHIP_DAC_VOL] = 0x3c3c, [SGTL5000_CHIP_PAD_STRENGTH] = 0x015f, [SGTL5000_CHIP_ANA_HP_CTRL] = 0x1818, From f7fce059bfbbf82f4889d367086402e81a79496b Mon Sep 17 00:00:00 2001 From: Chih-Chung Chang Date: Mon, 15 Jul 2013 09:38:46 -0700 Subject: [PATCH 0286/1368] ASoC: max98088 - fix element type of the register cache. commit cb6f66a2d278e57a6c9d8fb59bd9ebd8ab3965c2 upstream. The registers of max98088 are 8 bits, not 16 bits. This bug causes the contents of registers to be overwritten with bad values when the codec is suspended and then resumed. Signed-off-by: Chih-Chung Chang Signed-off-by: Dylan Reid Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- sound/soc/codecs/max98088.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/codecs/max98088.c b/sound/soc/codecs/max98088.c index 3eeada57e87d..566a367c94fa 100644 --- a/sound/soc/codecs/max98088.c +++ b/sound/soc/codecs/max98088.c @@ -1612,7 +1612,7 @@ static int max98088_dai2_digital_mute(struct snd_soc_dai *codec_dai, int mute) static void max98088_sync_cache(struct snd_soc_codec *codec) { - u16 *reg_cache = codec->reg_cache; + u8 *reg_cache = codec->reg_cache; int i; if (!codec->cache_sync) From bceb4eb2756a33f87dfb0b86bd01f2af4b4b81ac Mon Sep 17 00:00:00 2001 From: Richard Zhao Date: Sun, 21 Jul 2013 10:34:09 +0800 Subject: [PATCH 0287/1368] ASoC: tegra: correct playback_dma_data setup commit 647ab784c507763bfda79155f125b6edd1244806 upstream. The errors were caused by copy/paste mistake in below commit since v3.10: 3489d50 ASoC: tegra: Use common DAI DMA data struct It also corrects slave_id initialization in tegra20_ac97 driver. Signed-off-by: Richard Zhao Acked-by: Stephen Warren Acked-by: Lucas Stach Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- sound/soc/tegra/tegra20_ac97.c | 6 +++--- sound/soc/tegra/tegra20_spdif.c | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/sound/soc/tegra/tegra20_ac97.c b/sound/soc/tegra/tegra20_ac97.c index 2f70ea7f6618..05676c022a16 100644 --- a/sound/soc/tegra/tegra20_ac97.c +++ b/sound/soc/tegra/tegra20_ac97.c @@ -399,9 +399,9 @@ static int tegra20_ac97_platform_probe(struct platform_device *pdev) ac97->capture_dma_data.slave_id = of_dma[1]; ac97->playback_dma_data.addr = mem->start + TEGRA20_AC97_FIFO_TX1; - ac97->capture_dma_data.addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES; - ac97->capture_dma_data.maxburst = 4; - ac97->capture_dma_data.slave_id = of_dma[0]; + ac97->playback_dma_data.addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES; + ac97->playback_dma_data.maxburst = 4; + ac97->playback_dma_data.slave_id = of_dma[1]; ret = snd_soc_register_component(&pdev->dev, &tegra20_ac97_component, &tegra20_ac97_dai, 1); diff --git a/sound/soc/tegra/tegra20_spdif.c b/sound/soc/tegra/tegra20_spdif.c index 5eaa12cdc6eb..551b3c93ce93 100644 --- a/sound/soc/tegra/tegra20_spdif.c +++ b/sound/soc/tegra/tegra20_spdif.c @@ -323,8 +323,8 @@ static int tegra20_spdif_platform_probe(struct platform_device *pdev) } spdif->playback_dma_data.addr = mem->start + TEGRA20_SPDIF_DATA_OUT; - spdif->capture_dma_data.addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES; - spdif->capture_dma_data.maxburst = 4; + spdif->playback_dma_data.addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES; + spdif->playback_dma_data.maxburst = 4; spdif->playback_dma_data.slave_id = dmareq->start; pm_runtime_enable(&pdev->dev); From e1e97450a25d46c665a634f5073f9555236b7b0b Mon Sep 17 00:00:00 2001 From: Nicolin Chen Date: Fri, 14 Jun 2013 12:34:50 +0800 Subject: [PATCH 0288/1368] ASoC: wm8962: Remove remaining direct register cache accesses commit 2e7ee15ced914e109a1a5b6dfcd463d846a13bd5 upstream. Also fix return values for headphone switch updates. Signed-off-by: Nicolin Chen Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- sound/soc/codecs/wm8962.c | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/sound/soc/codecs/wm8962.c b/sound/soc/codecs/wm8962.c index e9710280e5e1..730dd0c0f0ab 100644 --- a/sound/soc/codecs/wm8962.c +++ b/sound/soc/codecs/wm8962.c @@ -1600,7 +1600,6 @@ static int wm8962_put_hp_sw(struct snd_kcontrol *kcontrol, struct snd_ctl_elem_value *ucontrol) { struct snd_soc_codec *codec = snd_kcontrol_chip(kcontrol); - u16 *reg_cache = codec->reg_cache; int ret; /* Apply the update (if any) */ @@ -1609,16 +1608,19 @@ static int wm8962_put_hp_sw(struct snd_kcontrol *kcontrol, return 0; /* If the left PGA is enabled hit that VU bit... */ - if (snd_soc_read(codec, WM8962_PWR_MGMT_2) & WM8962_HPOUTL_PGA_ENA) - return snd_soc_write(codec, WM8962_HPOUTL_VOLUME, - reg_cache[WM8962_HPOUTL_VOLUME]); + ret = snd_soc_read(codec, WM8962_PWR_MGMT_2); + if (ret & WM8962_HPOUTL_PGA_ENA) { + snd_soc_write(codec, WM8962_HPOUTL_VOLUME, + snd_soc_read(codec, WM8962_HPOUTL_VOLUME)); + return 1; + } /* ...otherwise the right. The VU is stereo. */ - if (snd_soc_read(codec, WM8962_PWR_MGMT_2) & WM8962_HPOUTR_PGA_ENA) - return snd_soc_write(codec, WM8962_HPOUTR_VOLUME, - reg_cache[WM8962_HPOUTR_VOLUME]); + if (ret & WM8962_HPOUTR_PGA_ENA) + snd_soc_write(codec, WM8962_HPOUTR_VOLUME, + snd_soc_read(codec, WM8962_HPOUTR_VOLUME)); - return 0; + return 1; } /* The VU bits for the speakers are in a different register to the mute @@ -3374,7 +3376,6 @@ static int wm8962_probe(struct snd_soc_codec *codec) int ret; struct wm8962_priv *wm8962 = snd_soc_codec_get_drvdata(codec); struct wm8962_pdata *pdata = dev_get_platdata(codec->dev); - u16 *reg_cache = codec->reg_cache; int i, trigger, irq_pol; bool dmicclk, dmicdat; @@ -3432,8 +3433,9 @@ static int wm8962_probe(struct snd_soc_codec *codec) /* Put the speakers into mono mode? */ if (pdata->spk_mono) - reg_cache[WM8962_CLASS_D_CONTROL_2] - |= WM8962_SPK_MONO; + snd_soc_update_bits(codec, WM8962_CLASS_D_CONTROL_2, + WM8962_SPK_MONO_MASK, WM8962_SPK_MONO); + /* Micbias setup, detection enable and detection * threasholds. */ From 7ca674af75d75aa6755746327f1fd3222e54b338 Mon Sep 17 00:00:00 2001 From: Gregory CLEMENT Date: Wed, 15 May 2013 09:39:17 +0100 Subject: [PATCH 0289/1368] ARM: 7722/1: zImage: Convert 32bits memory size and address from ATAG to 64bits DTB commit faefd550c45d8d314e8f260f21565320355c947f upstream. When CONFIG_ARM_APPENDED_DTB is selected, if the bootloader provides an ATAG_MEM it replaces the memory size and the memory address in the memory node of the device tree. In the case of a system which can handle more than 4GB, the memory node cell size is 4: each data (memory size and memory address) are 64 bits and then use 2 cells. The current code in atags_to_fdt.c made the assumption of a cell size of 2 (one cell for the memory size and one cell for the memory address), this leads to an improper write of the data and ends with a boot hang. This patch writes the memory size and the memory address on the memory node in the device tree depending of the size of the memory node (32 bits or 64 bits). It has been tested in the 2 cases: - with a dtb using skeleton.dtsi - and with a dtb using skeleton64.dtsi Signed-off-by: Gregory CLEMENT Acked-by: Nicolas Pitre Signed-off-by: Russell King Cc: Willy Tarreau Signed-off-by: Greg Kroah-Hartman --- arch/arm/boot/compressed/atags_to_fdt.c | 44 +++++++++++++++++++++---- 1 file changed, 38 insertions(+), 6 deletions(-) diff --git a/arch/arm/boot/compressed/atags_to_fdt.c b/arch/arm/boot/compressed/atags_to_fdt.c index aabc02a68482..d1153c8a765a 100644 --- a/arch/arm/boot/compressed/atags_to_fdt.c +++ b/arch/arm/boot/compressed/atags_to_fdt.c @@ -53,6 +53,17 @@ static const void *getprop(const void *fdt, const char *node_path, return fdt_getprop(fdt, offset, property, len); } +static uint32_t get_cell_size(const void *fdt) +{ + int len; + uint32_t cell_size = 1; + const uint32_t *size_len = getprop(fdt, "/", "#size-cells", &len); + + if (size_len) + cell_size = fdt32_to_cpu(*size_len); + return cell_size; +} + static void merge_fdt_bootargs(void *fdt, const char *fdt_cmdline) { char cmdline[COMMAND_LINE_SIZE]; @@ -95,9 +106,11 @@ static void merge_fdt_bootargs(void *fdt, const char *fdt_cmdline) int atags_to_fdt(void *atag_list, void *fdt, int total_space) { struct tag *atag = atag_list; - uint32_t mem_reg_property[2 * NR_BANKS]; + /* In the case of 64 bits memory size, need to reserve 2 cells for + * address and size for each bank */ + uint32_t mem_reg_property[2 * 2 * NR_BANKS]; int memcount = 0; - int ret; + int ret, memsize; /* make sure we've got an aligned pointer */ if ((u32)atag_list & 0x3) @@ -137,8 +150,25 @@ int atags_to_fdt(void *atag_list, void *fdt, int total_space) continue; if (!atag->u.mem.size) continue; - mem_reg_property[memcount++] = cpu_to_fdt32(atag->u.mem.start); - mem_reg_property[memcount++] = cpu_to_fdt32(atag->u.mem.size); + memsize = get_cell_size(fdt); + + if (memsize == 2) { + /* if memsize is 2, that means that + * each data needs 2 cells of 32 bits, + * so the data are 64 bits */ + uint64_t *mem_reg_prop64 = + (uint64_t *)mem_reg_property; + mem_reg_prop64[memcount++] = + cpu_to_fdt64(atag->u.mem.start); + mem_reg_prop64[memcount++] = + cpu_to_fdt64(atag->u.mem.size); + } else { + mem_reg_property[memcount++] = + cpu_to_fdt32(atag->u.mem.start); + mem_reg_property[memcount++] = + cpu_to_fdt32(atag->u.mem.size); + } + } else if (atag->hdr.tag == ATAG_INITRD2) { uint32_t initrd_start, initrd_size; initrd_start = atag->u.initrd.start; @@ -150,8 +180,10 @@ int atags_to_fdt(void *atag_list, void *fdt, int total_space) } } - if (memcount) - setprop(fdt, "/memory", "reg", mem_reg_property, 4*memcount); + if (memcount) { + setprop(fdt, "/memory", "reg", mem_reg_property, + 4 * memcount * memsize); + } return fdt_pack(fdt); } From b01c4c2c2e5b7ecf14e10efabf65183d9565ff2f Mon Sep 17 00:00:00 2001 From: Jeff Skirvin Date: Thu, 11 Jul 2013 17:18:58 -0700 Subject: [PATCH 0290/1368] SCSI: isci: Fix a race condition in the SSP task management path commit 96f15f29038e58e1b0a96483e2b369ff446becf1 upstream. This commit fixes a race condition in the isci driver abort task and SSP device task management path. The race is caused when an I/O termination in the SCU hardware is necessary because of an SSP target timeout condition, and the check of the I/O end state races against the HW-termination-driven end state. The failure of the race meant that no TMF was sent to the device to clean-up the pending I/O. Signed-off-by: Jeff Skirvin Reviewed-by: Lukasz Dorau Signed-off-by: James Bottomley Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/isci/task.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/scsi/isci/task.c b/drivers/scsi/isci/task.c index 9bb020ac089c..0d30ca849e8f 100644 --- a/drivers/scsi/isci/task.c +++ b/drivers/scsi/isci/task.c @@ -491,6 +491,7 @@ int isci_task_abort_task(struct sas_task *task) struct isci_tmf tmf; int ret = TMF_RESP_FUNC_FAILED; unsigned long flags; + int target_done_already = 0; /* Get the isci_request reference from the task. Note that * this check does not depend on the pending request list @@ -505,9 +506,11 @@ int isci_task_abort_task(struct sas_task *task) /* If task is already done, the request isn't valid */ if (!(task->task_state_flags & SAS_TASK_STATE_DONE) && (task->task_state_flags & SAS_TASK_AT_INITIATOR) && - old_request) + old_request) { idev = isci_get_device(task->dev->lldd_dev); - + target_done_already = test_bit(IREQ_COMPLETE_IN_TARGET, + &old_request->flags); + } spin_unlock(&task->task_state_lock); spin_unlock_irqrestore(&ihost->scic_lock, flags); @@ -561,7 +564,7 @@ int isci_task_abort_task(struct sas_task *task) if (task->task_proto == SAS_PROTOCOL_SMP || sas_protocol_ata(task->task_proto) || - test_bit(IREQ_COMPLETE_IN_TARGET, &old_request->flags) || + target_done_already || test_bit(IDEV_GONE, &idev->flags)) { spin_unlock_irqrestore(&ihost->scic_lock, flags); From 8cf7b0b08a0bccbf3a72487b3b85ac233e83947f Mon Sep 17 00:00:00 2001 From: "Ewan D. Milne" Date: Fri, 2 Nov 2012 09:38:34 -0400 Subject: [PATCH 0291/1368] SCSI: sd: fix crash when UA received on DIF enabled device commit 085b513f97d8d799d28491239be4b451bcd8c2c5 upstream. sd_prep_fn will allocate a larger CDB for the command via mempool_alloc for devices using DIF type 2 protection. This CDB was being freed in sd_done, which results in a kernel crash if the command is retried due to a UNIT ATTENTION. This change moves the code to free the larger CDB into sd_unprep_fn instead, which is invoked after the request is complete. It is no longer necessary to call scsi_print_command separately for this case as the ->cmnd will no longer be NULL in the normal code path. Also removed conditional test for DIF type 2 when freeing the larger CDB because the protection_type could have been changed via sysfs while the command was executing. Signed-off-by: Ewan D. Milne Acked-by: Martin K. Petersen Signed-off-by: James Bottomley Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/sd.c | 22 +++++++--------------- 1 file changed, 7 insertions(+), 15 deletions(-) diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index 1b1125e67f1e..610417ec45af 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -828,10 +828,17 @@ static int scsi_setup_flush_cmnd(struct scsi_device *sdp, struct request *rq) static void sd_unprep_fn(struct request_queue *q, struct request *rq) { + struct scsi_cmnd *SCpnt = rq->special; + if (rq->cmd_flags & REQ_DISCARD) { free_page((unsigned long)rq->buffer); rq->buffer = NULL; } + if (SCpnt->cmnd != rq->cmd) { + mempool_free(SCpnt->cmnd, sd_cdb_pool); + SCpnt->cmnd = NULL; + SCpnt->cmd_len = 0; + } } /** @@ -1710,21 +1717,6 @@ static int sd_done(struct scsi_cmnd *SCpnt) if (rq_data_dir(SCpnt->request) == READ && scsi_prot_sg_count(SCpnt)) sd_dif_complete(SCpnt, good_bytes); - if (scsi_host_dif_capable(sdkp->device->host, sdkp->protection_type) - == SD_DIF_TYPE2_PROTECTION && SCpnt->cmnd != SCpnt->request->cmd) { - - /* We have to print a failed command here as the - * extended CDB gets freed before scsi_io_completion() - * is called. - */ - if (result) - scsi_print_command(SCpnt); - - mempool_free(SCpnt->cmnd, sd_cdb_pool); - SCpnt->cmnd = NULL; - SCpnt->cmd_len = 0; - } - return good_bytes; } From 25ef812609dbc1a3db74f0f5912c94afdfd65d31 Mon Sep 17 00:00:00 2001 From: Saurav Kashyap Date: Fri, 12 Jul 2013 14:47:51 -0400 Subject: [PATCH 0292/1368] SCSI: qla2xxx: Properly set the tagging for commands. commit c3ccb1d7cf4c4549151876dd37c0944a682fd9e1 upstream. This fixes a regression where Xyratex controllers and disks were lost by the driver: https://bugzilla.kernel.org/show_bug.cgi?id=59601 Reported-by: Jack Hill Signed-off-by: Saurav Kashyap Signed-off-by: Giridhar Malavali Signed-off-by: James Bottomley Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/qla2xxx/qla_iocb.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/drivers/scsi/qla2xxx/qla_iocb.c b/drivers/scsi/qla2xxx/qla_iocb.c index 15e4080b347c..51cd27a50309 100644 --- a/drivers/scsi/qla2xxx/qla_iocb.c +++ b/drivers/scsi/qla2xxx/qla_iocb.c @@ -419,6 +419,8 @@ qla2x00_start_scsi(srb_t *sp) __constant_cpu_to_le16(CF_SIMPLE_TAG); break; } + } else { + cmd_pkt->control_flags = __constant_cpu_to_le16(CF_SIMPLE_TAG); } /* Load SCSI command packet. */ @@ -1308,11 +1310,11 @@ qla24xx_build_scsi_crc_2_iocbs(srb_t *sp, struct cmd_type_crc_2 *cmd_pkt, fcp_cmnd->task_attribute = TSK_ORDERED; break; default: - fcp_cmnd->task_attribute = 0; + fcp_cmnd->task_attribute = TSK_SIMPLE; break; } } else { - fcp_cmnd->task_attribute = 0; + fcp_cmnd->task_attribute = TSK_SIMPLE; } cmd_pkt->fcp_rsp_dseg_len = 0; /* Let response come in status iocb */ @@ -1527,7 +1529,12 @@ qla24xx_start_scsi(srb_t *sp) case ORDERED_QUEUE_TAG: cmd_pkt->task = TSK_ORDERED; break; + default: + cmd_pkt->task = TSK_SIMPLE; + break; } + } else { + cmd_pkt->task = TSK_SIMPLE; } /* Load SCSI command packet. */ From a33eb1f5e1ae58418d9853c794a40d340605d752 Mon Sep 17 00:00:00 2001 From: Alexander Z Lam Date: Wed, 10 Jul 2013 17:34:34 -0700 Subject: [PATCH 0293/1368] tracing: Fix error handling to ensure instances can always be removed commit 609e85a70bcd0eedf4ec60639dbcfb1ab011e054 upstream. Remove debugfs directories for tracing instances during creation if an error occurs causing the trace_array for that instance to not be added to ftrace_trace_arrays. If the directory continues to exist after the error, it cannot be removed because the respective trace_array is not in ftrace_trace_arrays. Link: http://lkml.kernel.org/r/1373502874-1706-2-git-send-email-azl@google.com Signed-off-by: Alexander Z Lam Cc: Vaibhav Nagarnaik Cc: David Sharp Cc: Alexander Z Lam Signed-off-by: Steven Rostedt Signed-off-by: Greg Kroah-Hartman --- kernel/trace/trace.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 0b936d806659..9109d60e2f29 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -5926,8 +5926,10 @@ static int new_instance_create(const char *name) goto out_free_tr; ret = event_trace_add_tracer(tr->dir, tr); - if (ret) + if (ret) { + debugfs_remove_recursive(tr->dir); goto out_free_tr; + } init_tracer_debugfs(tr, tr->dir); From e45ccd09b53e4cd60941891ad6992579ebc78024 Mon Sep 17 00:00:00 2001 From: Alexander Z Lam Date: Thu, 18 Jul 2013 11:18:44 -0700 Subject: [PATCH 0294/1368] tracing: Miscellaneous fixes for trace_array ref counting commit f77d09a384676bde6445413949d9d2c508ff3e62 upstream. Some error paths did not handle ref counting properly, and some trace files need ref counting. Link: http://lkml.kernel.org/r/1374171524-11948-1-git-send-email-azl@google.com Signed-off-by: Alexander Z Lam Cc: Vaibhav Nagarnaik Cc: David Sharp Cc: Alexander Z Lam Signed-off-by: Steven Rostedt Signed-off-by: Greg Kroah-Hartman --- kernel/trace/trace.c | 24 ++++++++++++++++++------ kernel/trace/trace_events.c | 21 +++++++++++++++++++-- 2 files changed, 37 insertions(+), 8 deletions(-) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 9109d60e2f29..97e750110b5a 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -2956,7 +2956,6 @@ static int tracing_release(struct inode *inode, struct file *file) iter = m->private; tr = iter->tr; - trace_array_put(tr); mutex_lock(&trace_types_lock); @@ -2971,6 +2970,9 @@ static int tracing_release(struct inode *inode, struct file *file) if (!iter->snapshot) /* reenable tracing if it was previously enabled */ tracing_start_tr(tr); + + __trace_array_put(tr); + mutex_unlock(&trace_types_lock); mutex_destroy(&iter->mutex); @@ -3395,6 +3397,7 @@ tracing_trace_options_write(struct file *filp, const char __user *ubuf, static int tracing_trace_options_open(struct inode *inode, struct file *file) { struct trace_array *tr = inode->i_private; + int ret; if (tracing_disabled) return -ENODEV; @@ -3402,7 +3405,11 @@ static int tracing_trace_options_open(struct inode *inode, struct file *file) if (trace_array_get(tr) < 0) return -ENODEV; - return single_open(file, tracing_trace_options_show, inode->i_private); + ret = single_open(file, tracing_trace_options_show, inode->i_private); + if (ret < 0) + trace_array_put(tr); + + return ret; } static const struct file_operations tracing_iter_fops = { @@ -3906,6 +3913,7 @@ static int tracing_open_pipe(struct inode *inode, struct file *filp) iter = kzalloc(sizeof(*iter), GFP_KERNEL); if (!iter) { ret = -ENOMEM; + __trace_array_put(tr); goto out; } @@ -4652,21 +4660,24 @@ static int tracing_snapshot_open(struct inode *inode, struct file *file) ret = PTR_ERR(iter); } else { /* Writes still need the seq_file to hold the private data */ + ret = -ENOMEM; m = kzalloc(sizeof(*m), GFP_KERNEL); if (!m) - return -ENOMEM; + goto out; iter = kzalloc(sizeof(*iter), GFP_KERNEL); if (!iter) { kfree(m); - return -ENOMEM; + goto out; } + ret = 0; + iter->tr = tr; iter->trace_buffer = &tc->tr->max_buffer; iter->cpu_file = tc->cpu; m->private = iter; file->private_data = m; } - +out: if (ret < 0) trace_array_put(tr); @@ -5276,9 +5287,10 @@ tracing_stats_read(struct file *filp, char __user *ubuf, } static const struct file_operations tracing_stats_fops = { - .open = tracing_open_generic, + .open = tracing_open_generic_tc, .read = tracing_stats_read, .llseek = generic_file_llseek, + .release = tracing_release_generic_tc, }; #ifdef CONFIG_DYNAMIC_FTRACE diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index 6dfd48b5d1c0..69532630a2d6 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -1221,6 +1221,7 @@ show_header(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos) static int ftrace_event_avail_open(struct inode *inode, struct file *file); static int ftrace_event_set_open(struct inode *inode, struct file *file); +static int ftrace_event_release(struct inode *inode, struct file *file); static const struct seq_operations show_event_seq_ops = { .start = t_start, @@ -1248,7 +1249,7 @@ static const struct file_operations ftrace_set_event_fops = { .read = seq_read, .write = ftrace_event_write, .llseek = seq_lseek, - .release = seq_release, + .release = ftrace_event_release, }; static const struct file_operations ftrace_enable_fops = { @@ -1326,6 +1327,15 @@ ftrace_event_open(struct inode *inode, struct file *file, return ret; } +static int ftrace_event_release(struct inode *inode, struct file *file) +{ + struct trace_array *tr = inode->i_private; + + trace_array_put(tr); + + return seq_release(inode, file); +} + static int ftrace_event_avail_open(struct inode *inode, struct file *file) { @@ -1339,12 +1349,19 @@ ftrace_event_set_open(struct inode *inode, struct file *file) { const struct seq_operations *seq_ops = &show_set_event_seq_ops; struct trace_array *tr = inode->i_private; + int ret; + + if (trace_array_get(tr) < 0) + return -ENODEV; if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) ftrace_clear_events(tr); - return ftrace_event_open(inode, file, seq_ops); + ret = ftrace_event_open(inode, file, seq_ops); + if (ret < 0) + trace_array_put(tr); + return ret; } static struct event_subsystem * From e55679914313bbfc003dcdf8ff74a16dec2fe24d Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Fri, 19 Jul 2013 17:36:44 +0200 Subject: [PATCH 0295/1368] tracing: Kill the unbalanced tr->ref++ in tracing_buffers_open() commit e70e78e3c83b536730e31231dd9b979768d8df3c upstream. tracing_buffers_open() does trace_array_get() and then it wrongly inrcements tr->ref again under trace_types_lock. This means that every caller leaks trace_array: # cd /sys/kernel/debug/tracing/ # mkdir instances/X # true < instances/X/per_cpu/cpu0/trace_pipe_raw # rmdir instances/X rmdir: failed to remove `instances/X': Device or resource busy Link: http://lkml.kernel.org/r/20130719153644.GA18899@redhat.com Signed-off-by: Oleg Nesterov Cc: Ingo Molnar Cc: Frederic Weisbecker Cc: Masami Hiramatsu Signed-off-by: Steven Rostedt Signed-off-by: Greg Kroah-Hartman --- kernel/trace/trace.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 97e750110b5a..073b9e041d32 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -4907,8 +4907,6 @@ static int tracing_buffers_open(struct inode *inode, struct file *filp) mutex_lock(&trace_types_lock); - tr->ref++; - info->iter.tr = tr; info->iter.cpu_file = tc->cpu; info->iter.trace = tr->current_trace; From c62771c88704f0814d0fead60eadc0397a5e832f Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Tue, 23 Jul 2013 22:21:59 -0400 Subject: [PATCH 0296/1368] tracing: Remove locking trace_types_lock from tracing_reset_all_online_cpus() commit 09d8091c024ec88d1541d93eb8ddb2bd5cf10c39 upstream. Commit a82274151af "tracing: Protect ftrace_trace_arrays list in trace_events.c" added taking the trace_types_lock mutex in trace_events.c as there were several locations that needed it for protection. Unfortunately, it also encapsulated a call to tracing_reset_all_online_cpus() which also takes the trace_types_lock, causing a deadlock. This happens when a module has tracepoints and has been traced. When the module is removed, the trace events module notifier will grab the trace_types_lock, do a bunch of clean ups, and also clears the buffer by calling tracing_reset_all_online_cpus. This doesn't happen often which explains why it wasn't caught right away. Commit a82274151af was marked for stable, which means this must be sent to stable too. Link: http://lkml.kernel.org/r/51EEC646.7070306@broadcom.com Reported-by: Arend van Spril Tested-by: Arend van Spriel Cc: Alexander Z Lam Cc: Vaibhav Nagarnaik Cc: David Sharp Signed-off-by: Steven Rostedt Signed-off-by: Greg Kroah-Hartman --- kernel/trace/trace.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 073b9e041d32..f7bc3ce6eaf6 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -1163,18 +1163,17 @@ void tracing_reset_current(int cpu) tracing_reset(&global_trace.trace_buffer, cpu); } +/* Must have trace_types_lock held */ void tracing_reset_all_online_cpus(void) { struct trace_array *tr; - mutex_lock(&trace_types_lock); list_for_each_entry(tr, &ftrace_trace_arrays, list) { tracing_reset_online_cpus(&tr->trace_buffer); #ifdef CONFIG_TRACER_MAX_TRACE tracing_reset_online_cpus(&tr->max_buffer); #endif } - mutex_unlock(&trace_types_lock); } #define SAVED_CMDLINES 128 From a875d82eda58712d81caf78ebb515fb072187138 Mon Sep 17 00:00:00 2001 From: George Cherian Date: Mon, 1 Jul 2013 10:59:12 +0530 Subject: [PATCH 0297/1368] usb: host: xhci: Enable XHCI_SPURIOUS_SUCCESS for all controllers with xhci 1.0 commit 07f3cb7c28bf3f4dd80bfb136cf45810c46ac474 upstream. Xhci controllers with hci_version > 0.96 gives spurious success events on short packet completion. During webcam capture the "ERROR Transfer event TRB DMA ptr not part of current TD" was observed. The same application works fine with synopsis controllers hci_version 0.96. The same issue is seen with Intel Pantherpoint xhci controller. So enabling this quirk in xhci_gen_setup if controller verion is greater than 0.96. For xhci-pci move the quirk to much generic place xhci_gen_setup. Note from Sarah: The xHCI 1.0 spec changed how hardware handles short packets. The HW will notify SW of the TRB where the short packet occurred, and it will also give a successful status for the last TRB in a TD (the one with the IOC flag set). On the second successful status, that warning will be triggered in the driver. Software is now supposed to not assume the TD is not completed until it gets that last successful status. That means we have a slight race condition, although it should have little practical impact. This patch papers over that issue. It's on my long-term to-do list to fix this race condition, but it is a much more involved patch that will probably be too big for stable. This patch is needed for stable to avoid serious log spam. This patch should be backported to kernels as old as 3.0, that contain the commit ad808333d8201d53075a11bc8dd83b81f3d68f0b "Intel xhci: Ignore spurious successful event." The patch will have to be modified for kernels older than 3.2, since that kernel added the xhci_gen_setup function for xhci platform devices. The correct conflict resolution for kernels older than 3.2 is to set XHCI_SPURIOUS_SUCCESS in xhci_pci_quirks for all xHCI 1.0 hosts. Signed-off-by: George Cherian Signed-off-by: Sarah Sharp Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-pci.c | 1 - drivers/usb/host/xhci.c | 7 +++++++ 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/usb/host/xhci-pci.c b/drivers/usb/host/xhci-pci.c index cc24e39b97d5..f00cb203faea 100644 --- a/drivers/usb/host/xhci-pci.c +++ b/drivers/usb/host/xhci-pci.c @@ -93,7 +93,6 @@ static void xhci_pci_quirks(struct device *dev, struct xhci_hcd *xhci) } if (pdev->vendor == PCI_VENDOR_ID_INTEL && pdev->device == PCI_DEVICE_ID_INTEL_PANTHERPOINT_XHCI) { - xhci->quirks |= XHCI_SPURIOUS_SUCCESS; xhci->quirks |= XHCI_EP_LIMIT_QUIRK; xhci->limit_active_eps = 64; xhci->quirks |= XHCI_SW_BW_CHECKING; diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c index d8f640b12dd9..0f7be59d5319 100644 --- a/drivers/usb/host/xhci.c +++ b/drivers/usb/host/xhci.c @@ -4697,6 +4697,13 @@ int xhci_gen_setup(struct usb_hcd *hcd, xhci_get_quirks_t get_quirks) get_quirks(dev, xhci); + /* In xhci controllers which follow xhci 1.0 spec gives a spurious + * success event after a short transfer. This quirk will ignore such + * spurious event. + */ + if (xhci->hci_version > 0x96) + xhci->quirks |= XHCI_SPURIOUS_SUCCESS; + /* Make sure the HC is halted. */ retval = xhci_halt(xhci); if (retval) From 10a1c13241f682cdc866f2aa8a0e5c5373470ecb Mon Sep 17 00:00:00 2001 From: Oleksij Rempel Date: Sun, 21 Jul 2013 15:36:19 +0200 Subject: [PATCH 0298/1368] xhci: fix null pointer dereference on ring_doorbell_for_active_rings commit d66eaf9f89502971fddcb0de550b01fa6f409d83 upstream. in some cases where device is attched to xhci port and do not responding, for example ath9k_htc with stalled firmware, kernel will crash on ring_doorbell_for_active_rings. This patch check if pointer exist before it is used. This patch should be backported to kernels as old as 2.6.35, that contain the commit e9df17eb1408cfafa3d1844bfc7f22c7237b31b8 "USB: xhci: Correct assumptions about number of rings per endpoint" Signed-off-by: Oleksij Rempel Signed-off-by: Sarah Sharp Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-ring.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c index 1969c001b3f9..cc3bfc5d590d 100644 --- a/drivers/usb/host/xhci-ring.c +++ b/drivers/usb/host/xhci-ring.c @@ -434,7 +434,7 @@ static void ring_doorbell_for_active_rings(struct xhci_hcd *xhci, /* A ring has pending URBs if its TD list is not empty */ if (!(ep->ep_state & EP_HAS_STREAMS)) { - if (!(list_empty(&ep->ring->td_list))) + if (ep->ring && !(list_empty(&ep->ring->td_list))) xhci_ring_ep_doorbell(xhci, slot_id, ep_index, 0); return; } From 5dbb5d4f24bf6862a888637b7506d1e04c2d2276 Mon Sep 17 00:00:00 2001 From: Sarah Sharp Date: Wed, 24 Jul 2013 10:27:13 -0700 Subject: [PATCH 0299/1368] xhci: Avoid NULL pointer deref when host dies. commit 203a86613fb3bf2767335659513fa98563a3eb71 upstream. When the host controller fails to respond to an Enable Slot command, and the host fails to respond to the register write to abort the command ring, the xHCI driver will assume the host is dead, and call usb_hc_died(). The USB device's slot_id is still set to zero, and the pointer stored at xhci->devs[0] will always be NULL. The call to xhci_check_args in xhci_free_dev should have caught the NULL virt_dev pointer. However, xhci_free_dev is designed to free the xhci_virt_device structures, even if the host is dead, so that we don't leak kernel memory. xhci_free_dev checks the return value from the generic xhci_check_args function. If the return value is -ENODEV, it carries on trying to free the virtual device. The issue is that xhci_check_args looks at the host controller state before it looks at the xhci_virt_device pointer. It will return -ENIVAL because the host is dead, and xhci_free_dev will ignore the return value, and happily dereference the NULL xhci_virt_device pointer. The fix is to make sure that xhci_check_args checks the xhci_virt_device pointer before it checks the host state. See https://bugs.launchpad.net/ubuntu/+source/linux/+bug/1203453 for further details. This patch doesn't solve the underlying issue, but will ensure we don't see any more NULL pointer dereferences because of the issue. This patch should be backported to kernels as old as 3.1, that contain the commit 7bd89b4017f46a9b92853940fd9771319acb578a "xhci: Don't submit commands or URBs to halted hosts." Signed-off-by: Sarah Sharp Reported-by: Vincent Thiele Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c index 0f7be59d5319..9a550b6ad01d 100644 --- a/drivers/usb/host/xhci.c +++ b/drivers/usb/host/xhci.c @@ -1171,9 +1171,6 @@ static int xhci_check_args(struct usb_hcd *hcd, struct usb_device *udev, } xhci = hcd_to_xhci(hcd); - if (xhci->xhc_state & XHCI_STATE_HALTED) - return -ENODEV; - if (check_virt_dev) { if (!udev->slot_id || !xhci->devs[udev->slot_id]) { printk(KERN_DEBUG "xHCI %s called with unaddressed " @@ -1189,6 +1186,9 @@ static int xhci_check_args(struct usb_hcd *hcd, struct usb_device *udev, } } + if (xhci->xhc_state & XHCI_STATE_HALTED) + return -ENODEV; + return 1; } From c65b5f6ade7209fc5eda9f0779de4ee254a6bb78 Mon Sep 17 00:00:00 2001 From: Roger Quadros Date: Tue, 9 Jul 2013 17:03:50 +0300 Subject: [PATCH 0300/1368] USB: EHCI: Fix resume signalling on remote wakeup commit 47a64a13d54f6c669b00542848d5550be3d3310e upstream. Set the ehci->resuming flag for the port we receive a remote wakeup on so that resume signalling can be completed. Without this, the root hub timer will not fire again to check if the resume was completed and there will be a never-ending wait on on the port. This effect is only observed if the HUB IRQ IN does not come after we have initiated the port resume. Signed-off-by: Roger Quadros Acked-by: Alan Stern Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/ehci-hub.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/usb/host/ehci-hub.c b/drivers/usb/host/ehci-hub.c index 9ab4a4d9768a..ca6289b4b7ad 100644 --- a/drivers/usb/host/ehci-hub.c +++ b/drivers/usb/host/ehci-hub.c @@ -858,6 +858,7 @@ static int ehci_hub_control ( ehci->reset_done[wIndex] = jiffies + msecs_to_jiffies(20); usb_hcd_start_port_resume(&hcd->self, wIndex); + set_bit(wIndex, &ehci->resuming_ports); /* check the port again */ mod_timer(&ehci_to_hcd(ehci)->rh_timer, ehci->reset_done[wIndex]); From d3fe9f67d94aaba648a3df948750658059340cde Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 1 Jul 2013 14:03:33 +0200 Subject: [PATCH 0301/1368] USB: mos7840: fix memory leak in open commit 5f8a2e68b679b41cc8e9b642f2f5aa45dd678641 upstream. Allocated urbs and buffers were never freed on errors in open. Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/mos7840.c | 25 +++++++++++++++++-------- 1 file changed, 17 insertions(+), 8 deletions(-) diff --git a/drivers/usb/serial/mos7840.c b/drivers/usb/serial/mos7840.c index 7e998081e1cd..62b86a685082 100644 --- a/drivers/usb/serial/mos7840.c +++ b/drivers/usb/serial/mos7840.c @@ -914,20 +914,20 @@ static int mos7840_open(struct tty_struct *tty, struct usb_serial_port *port) status = mos7840_get_reg_sync(port, mos7840_port->SpRegOffset, &Data); if (status < 0) { dev_dbg(&port->dev, "Reading Spreg failed\n"); - return -1; + goto err; } Data |= 0x80; status = mos7840_set_reg_sync(port, mos7840_port->SpRegOffset, Data); if (status < 0) { dev_dbg(&port->dev, "writing Spreg failed\n"); - return -1; + goto err; } Data &= ~0x80; status = mos7840_set_reg_sync(port, mos7840_port->SpRegOffset, Data); if (status < 0) { dev_dbg(&port->dev, "writing Spreg failed\n"); - return -1; + goto err; } /* End of block to be checked */ @@ -936,7 +936,7 @@ static int mos7840_open(struct tty_struct *tty, struct usb_serial_port *port) &Data); if (status < 0) { dev_dbg(&port->dev, "Reading Controlreg failed\n"); - return -1; + goto err; } Data |= 0x08; /* Driver done bit */ Data |= 0x20; /* rx_disable */ @@ -944,7 +944,7 @@ static int mos7840_open(struct tty_struct *tty, struct usb_serial_port *port) mos7840_port->ControlRegOffset, Data); if (status < 0) { dev_dbg(&port->dev, "writing Controlreg failed\n"); - return -1; + goto err; } /* do register settings here */ /* Set all regs to the device default values. */ @@ -955,21 +955,21 @@ static int mos7840_open(struct tty_struct *tty, struct usb_serial_port *port) status = mos7840_set_uart_reg(port, INTERRUPT_ENABLE_REGISTER, Data); if (status < 0) { dev_dbg(&port->dev, "disabling interrupts failed\n"); - return -1; + goto err; } /* Set FIFO_CONTROL_REGISTER to the default value */ Data = 0x00; status = mos7840_set_uart_reg(port, FIFO_CONTROL_REGISTER, Data); if (status < 0) { dev_dbg(&port->dev, "Writing FIFO_CONTROL_REGISTER failed\n"); - return -1; + goto err; } Data = 0xcf; status = mos7840_set_uart_reg(port, FIFO_CONTROL_REGISTER, Data); if (status < 0) { dev_dbg(&port->dev, "Writing FIFO_CONTROL_REGISTER failed\n"); - return -1; + goto err; } Data = 0x03; @@ -1114,6 +1114,15 @@ static int mos7840_open(struct tty_struct *tty, struct usb_serial_port *port) /* mos7840_change_port_settings(mos7840_port,old_termios); */ return 0; +err: + for (j = 0; j < NUM_URBS; ++j) { + urb = mos7840_port->write_urb_pool[j]; + if (!urb) + continue; + kfree(urb->transfer_buffer); + usb_free_urb(urb); + } + return status; } /***************************************************************************** From a53338faf612c0f3c0c4031b6844963540979ac4 Mon Sep 17 00:00:00 2001 From: Ruchika Kharwar Date: Thu, 4 Jul 2013 00:59:34 -0500 Subject: [PATCH 0302/1368] usb: dwc3: fix the error returned with usb3_phy failure commit 315955d707b50c8aad20a32ec0dd4c9fe243cabe upstream. When there is an error with the usb3_phy probe or absence, the error returned is erroneously for usb2_phy. Signed-off-by: Ruchika Kharwar Signed-off-by: Felipe Balbi Signed-off-by: Greg Kroah-Hartman --- drivers/usb/dwc3/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/dwc3/core.c b/drivers/usb/dwc3/core.c index c35d49d39b76..358375e0b291 100644 --- a/drivers/usb/dwc3/core.c +++ b/drivers/usb/dwc3/core.c @@ -450,7 +450,7 @@ static int dwc3_probe(struct platform_device *pdev) } if (IS_ERR(dwc->usb3_phy)) { - ret = PTR_ERR(dwc->usb2_phy); + ret = PTR_ERR(dwc->usb3_phy); /* * if -ENXIO is returned, it means PHY layer wasn't From 41557736ac4acc112a1884177f62716c57852ddc Mon Sep 17 00:00:00 2001 From: Huang Rui Date: Thu, 27 Jun 2013 01:08:11 +0800 Subject: [PATCH 0303/1368] usb: dwc3: fix wrong bit mask in dwc3_event_type commit 1974d494dea05ea227cb42f5e918828801e237aa upstream. Per dwc3 2.50a spec, the is_devspec bit is used to distinguish the Device Endpoint-Specific Event or Device-Specific Event (DEVT). If the bit is 1, the event is represented Device-Specific Event, then use [7:1] bits as Device Specific Event to marked the type. It has 7 bits, and we can see the reserved8_31 variable name which means from 8 to 31 bits marked reserved, actually there are 24 bits not 25 bits between that. And 1 + 7 + 24 = 32, the event size is 4 byes. So in dwc3_event_type, the bit mask should be: is_devspec [0] 1 bit type [7:1] 7 bits reserved8_31 [31:8] 24 bits This patch should be backported to kernels as old as 3.2, that contain the commit 72246da40f3719af3bfd104a2365b32537c27d83 "usb: Introduce DesignWare USB3 DRD Driver". Signed-off-by: Huang Rui Signed-off-by: Felipe Balbi Signed-off-by: Greg Kroah-Hartman --- drivers/usb/dwc3/core.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/usb/dwc3/core.h b/drivers/usb/dwc3/core.h index b69d322e3cab..27dad993b007 100644 --- a/drivers/usb/dwc3/core.h +++ b/drivers/usb/dwc3/core.h @@ -759,8 +759,8 @@ struct dwc3 { struct dwc3_event_type { u32 is_devspec:1; - u32 type:6; - u32 reserved8_31:25; + u32 type:7; + u32 reserved8_31:24; } __packed; #define DWC3_DEPEVT_XFERCOMPLETE 0x01 From 003dda8a8dedc8a2e90f33fd0745b9529afdf161 Mon Sep 17 00:00:00 2001 From: Felipe Balbi Date: Mon, 15 Jul 2013 12:36:35 +0300 Subject: [PATCH 0304/1368] usb: dwc3: gadget: don't prevent gadget from being probed if we fail commit cdcedd6981194e511cc206887db661d016069d68 upstream. In case we fail our ->udc_start() callback, we should be ready to accept another modprobe following the failed one. We had forgotten to clear dwc->gadget_driver back to NULL and, because of that, we were preventing gadget driver modprobe from being retried. Signed-off-by: Felipe Balbi Signed-off-by: Greg Kroah-Hartman --- drivers/usb/dwc3/gadget.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c index b5e5b35df49c..f77083fedc68 100644 --- a/drivers/usb/dwc3/gadget.c +++ b/drivers/usb/dwc3/gadget.c @@ -1584,6 +1584,7 @@ static int dwc3_gadget_start(struct usb_gadget *g, __dwc3_gadget_ep_disable(dwc->eps[0]); err0: + dwc->gadget_driver = NULL; spin_unlock_irqrestore(&dwc->lock, flags); return ret; From 592bafa7e0741a4f211e61d5043f16cf59d0023c Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Fri, 28 Jun 2013 12:24:26 +0200 Subject: [PATCH 0305/1368] USB: ti_usb_3410_5052: fix dynamic-id matching commit 1fad56424f5ad3ce4973505a357212b2e2282b3f upstream. The driver failed to take the dynamic ids into account when determining the device type and therefore all devices were detected as 2-port devices when using the dynamic-id interface. Match on the usb-serial-driver field instead of doing redundant id-table searches. Reported-by: Anders Hammarquist Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/ti_usb_3410_5052.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/serial/ti_usb_3410_5052.c b/drivers/usb/serial/ti_usb_3410_5052.c index e581c2549a57..01f79f11e5ad 100644 --- a/drivers/usb/serial/ti_usb_3410_5052.c +++ b/drivers/usb/serial/ti_usb_3410_5052.c @@ -371,7 +371,7 @@ static int ti_startup(struct usb_serial *serial) usb_set_serial_data(serial, tdev); /* determine device type */ - if (usb_match_id(serial->interface, ti_id_table_3410)) + if (serial->type == &ti_1port_device) tdev->td_is_3410 = 1; dev_dbg(&dev->dev, "%s - device type is %s\n", __func__, tdev->td_is_3410 ? "3410" : "5052"); From 909dccf772418ade21930910ab71327552049982 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=B3hann=20B=2E=20Gu=C3=B0mundsson?= Date: Thu, 4 Jul 2013 21:47:52 +0000 Subject: [PATCH 0306/1368] USB: misc: Add Manhattan Hi-Speed USB DVI Converter to sisusbvga MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 58fc90db8261b571c026bb8bf23aad48a7233118 upstream. Signed-off-by: Jóhann B. Guðmundsson Signed-off-by: Greg Kroah-Hartman --- drivers/usb/misc/sisusbvga/sisusb.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/usb/misc/sisusbvga/sisusb.c b/drivers/usb/misc/sisusbvga/sisusb.c index c21386ec5d35..de98906f786d 100644 --- a/drivers/usb/misc/sisusbvga/sisusb.c +++ b/drivers/usb/misc/sisusbvga/sisusb.c @@ -3247,6 +3247,7 @@ static const struct usb_device_id sisusb_table[] = { { USB_DEVICE(0x0711, 0x0903) }, { USB_DEVICE(0x0711, 0x0918) }, { USB_DEVICE(0x0711, 0x0920) }, + { USB_DEVICE(0x0711, 0x0950) }, { USB_DEVICE(0x182d, 0x021c) }, { USB_DEVICE(0x182d, 0x0269) }, { } From 889ba7bc28e5db5da40f648c91678dcc2d2837fd Mon Sep 17 00:00:00 2001 From: William Gulland Date: Thu, 27 Jun 2013 16:10:20 -0700 Subject: [PATCH 0307/1368] usb: Clear both buffers when clearing a control transfer TT buffer. commit 2c7b871b9102c497ba8f972aa5d38532f05b654d upstream. Control transfers have both IN and OUT (or SETUP) packets, so when clearing TT buffers for a control transfer it's necessary to send two HUB_CLEAR_TT_BUFFER requests to the hub. Signed-off-by: William Gulland Acked-by: Alan Stern Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/hub.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c index feef9351463d..868ad745a500 100644 --- a/drivers/usb/core/hub.c +++ b/drivers/usb/core/hub.c @@ -668,6 +668,15 @@ static void hub_irq(struct urb *urb) static inline int hub_clear_tt_buffer (struct usb_device *hdev, u16 devinfo, u16 tt) { + /* Need to clear both directions for control ep */ + if (((devinfo >> 11) & USB_ENDPOINT_XFERTYPE_MASK) == + USB_ENDPOINT_XFER_CONTROL) { + int status = usb_control_msg(hdev, usb_sndctrlpipe(hdev, 0), + HUB_CLEAR_TT_BUFFER, USB_RT_PORT, + devinfo ^ 0x8000, tt, NULL, 0, 1000); + if (status) + return status; + } return usb_control_msg(hdev, usb_sndctrlpipe(hdev, 0), HUB_CLEAR_TT_BUFFER, USB_RT_PORT, devinfo, tt, NULL, 0, 1000); From 5deba4cf6db86cc902526d8b08fc5d56e9d43994 Mon Sep 17 00:00:00 2001 From: Alan Stern Date: Thu, 11 Jul 2013 14:58:04 -0400 Subject: [PATCH 0308/1368] USB: global suspend and remote wakeup don't mix MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit e583d9db9960cf40e0bc8afee4946baa9d71596e upstream. The hub driver was recently changed to use "global" suspend for system suspend transitions on non-SuperSpeed buses. This means that we don't suspend devices individually by setting the suspend feature on the upstream hub port; instead devices all go into suspend automatically when the root hub stops transmitting packets. The idea was to save time and to avoid certain kinds of wakeup races. Now it turns out that many hubs are buggy; they don't relay wakeup requests from a downstream port to their upstream port if the downstream port's suspend feature is not set (depending on the speed of the downstream port, whether or not the hub is enabled for remote wakeup, and possibly other factors). We can't have hubs dropping wakeup requests. Therefore this patch goes partway back to the old policy: It sets the suspend feature for a port if the device attached to that port or any of its descendants is enabled for wakeup. People will still be able to benefit from the time savings if they don't care about wakeup and leave it disabled on all their devices. In order to accomplish this, the patch adds a new field to the usb_hub structure: wakeup_enabled_descendants is a count of how many devices below a suspended hub are enabled for remote wakeup. A corresponding new subroutine determines the number of wakeup-enabled devices at or below an arbitrary suspended USB device. This should be applied to the 3.10 stable kernel. Signed-off-by: Alan Stern Reported-and-tested-by: Toralf Förster Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/hub.c | 39 +++++++++++++++++++++++++++++++-------- drivers/usb/core/hub.h | 3 +++ 2 files changed, 34 insertions(+), 8 deletions(-) diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c index 868ad745a500..b93fc88c4823 100644 --- a/drivers/usb/core/hub.c +++ b/drivers/usb/core/hub.c @@ -2855,6 +2855,15 @@ static int usb_disable_function_remotewakeup(struct usb_device *udev) USB_CTRL_SET_TIMEOUT); } +/* Count of wakeup-enabled devices at or below udev */ +static unsigned wakeup_enabled_descendants(struct usb_device *udev) +{ + struct usb_hub *hub = usb_hub_to_struct_hub(udev); + + return udev->do_remote_wakeup + + (hub ? hub->wakeup_enabled_descendants : 0); +} + /* * usb_port_suspend - suspend a usb device's upstream port * @udev: device that's no longer in active use, not a root hub @@ -2895,8 +2904,8 @@ static int usb_disable_function_remotewakeup(struct usb_device *udev) * Linux (2.6) currently has NO mechanisms to initiate that: no khubd * timer, no SRP, no requests through sysfs. * - * If Runtime PM isn't enabled or used, non-SuperSpeed devices really get - * suspended only when their bus goes into global suspend (i.e., the root + * If Runtime PM isn't enabled or used, non-SuperSpeed devices may not get + * suspended until their bus goes into global suspend (i.e., the root * hub is suspended). Nevertheless, we change @udev->state to * USB_STATE_SUSPENDED as this is the device's "logical" state. The actual * upstream port setting is stored in @udev->port_is_suspended. @@ -2967,15 +2976,21 @@ int usb_port_suspend(struct usb_device *udev, pm_message_t msg) /* see 7.1.7.6 */ if (hub_is_superspeed(hub->hdev)) status = hub_set_port_link_state(hub, port1, USB_SS_PORT_LS_U3); - else if (PMSG_IS_AUTO(msg)) - status = set_port_feature(hub->hdev, port1, - USB_PORT_FEAT_SUSPEND); + /* * For system suspend, we do not need to enable the suspend feature * on individual USB-2 ports. The devices will automatically go * into suspend a few ms after the root hub stops sending packets. * The USB 2.0 spec calls this "global suspend". + * + * However, many USB hubs have a bug: They don't relay wakeup requests + * from a downstream port if the port's suspend feature isn't on. + * Therefore we will turn on the suspend feature if udev or any of its + * descendants is enabled for remote wakeup. */ + else if (PMSG_IS_AUTO(msg) || wakeup_enabled_descendants(udev) > 0) + status = set_port_feature(hub->hdev, port1, + USB_PORT_FEAT_SUSPEND); else { really_suspend = false; status = 0; @@ -3010,15 +3025,16 @@ int usb_port_suspend(struct usb_device *udev, pm_message_t msg) if (!PMSG_IS_AUTO(msg)) status = 0; } else { - /* device has up to 10 msec to fully suspend */ dev_dbg(&udev->dev, "usb %ssuspend, wakeup %d\n", (PMSG_IS_AUTO(msg) ? "auto-" : ""), udev->do_remote_wakeup); - usb_set_device_state(udev, USB_STATE_SUSPENDED); if (really_suspend) { udev->port_is_suspended = 1; + + /* device has up to 10 msec to fully suspend */ msleep(10); } + usb_set_device_state(udev, USB_STATE_SUSPENDED); } /* @@ -3300,7 +3316,11 @@ static int hub_suspend(struct usb_interface *intf, pm_message_t msg) unsigned port1; int status; - /* Warn if children aren't already suspended */ + /* + * Warn if children aren't already suspended. + * Also, add up the number of wakeup-enabled descendants. + */ + hub->wakeup_enabled_descendants = 0; for (port1 = 1; port1 <= hdev->maxchild; port1++) { struct usb_device *udev; @@ -3310,6 +3330,9 @@ static int hub_suspend(struct usb_interface *intf, pm_message_t msg) if (PMSG_IS_AUTO(msg)) return -EBUSY; } + if (udev) + hub->wakeup_enabled_descendants += + wakeup_enabled_descendants(udev); } if (hdev->do_remote_wakeup && hub->quirk_check_port_auto_suspend) { diff --git a/drivers/usb/core/hub.h b/drivers/usb/core/hub.h index 80ab9ee07017..f608b39beaf0 100644 --- a/drivers/usb/core/hub.h +++ b/drivers/usb/core/hub.h @@ -59,6 +59,9 @@ struct usb_hub { struct usb_tt tt; /* Transaction Translator */ unsigned mA_per_port; /* current for each child */ +#ifdef CONFIG_PM + unsigned wakeup_enabled_descendants; +#endif unsigned limited_power:1; unsigned quiescing:1; From 2578ae7c4bbb8528caa3ea835c7451868b4f8607 Mon Sep 17 00:00:00 2001 From: Ian Abbott Date: Fri, 5 Jul 2013 16:49:34 +0100 Subject: [PATCH 0309/1368] staging: comedi: fix a race between do_cmd_ioctl() and read/write commit 4b18f08be01a7b3c7b6df497137b6e3cb28adaa3 upstream. `do_cmd_ioctl()` is called with the comedi device's mutex locked to process the `COMEDI_CMD` ioctl to set up comedi's asynchronous command handling on a comedi subdevice. `comedi_read()` and `comedi_write()` are the `read` and `write` handlers for the comedi device, but do not lock the mutex (for performance reasons, as some things can hold the mutex for quite a long time). There is a race condition if `comedi_read()` or `comedi_write()` is running at the same time and for the same file object and comedi subdevice as `do_cmd_ioctl()`. `do_cmd_ioctl()` sets the subdevice's `busy` pointer to the file object way before it sets the `SRF_RUNNING` flag in the subdevice's `runflags` member. `comedi_read() and `comedi_write()` check the subdevice's `busy` pointer is pointing to the current file object, then if the `SRF_RUNNING` flag is not set, will call `do_become_nonbusy()` to shut down the asyncronous command. Bad things can happen if the asynchronous command is being shutdown and set up at the same time. To prevent the race, don't set the `busy` pointer until after the `SRF_RUNNING` flag has been set. Also, make sure the mutex is held in `comedi_read()` and `comedi_write()` while calling `do_become_nonbusy()` in order to avoid moving the race condition to a point within that function. Change some error handling `goto cleanup` statements in `do_cmd_ioctl()` to simple `return -ERRFOO` statements as a result of changing when the `busy` pointer is set. Signed-off-by: Ian Abbott Signed-off-by: Greg Kroah-Hartman --- drivers/staging/comedi/comedi_fops.c | 25 +++++++++++++++---------- 1 file changed, 15 insertions(+), 10 deletions(-) diff --git a/drivers/staging/comedi/comedi_fops.c b/drivers/staging/comedi/comedi_fops.c index 924c54c9c31f..fe8914310dd4 100644 --- a/drivers/staging/comedi/comedi_fops.c +++ b/drivers/staging/comedi/comedi_fops.c @@ -1401,22 +1401,19 @@ static int do_cmd_ioctl(struct comedi_device *dev, DPRINTK("subdevice busy\n"); return -EBUSY; } - s->busy = file; /* make sure channel/gain list isn't too long */ if (cmd.chanlist_len > s->len_chanlist) { DPRINTK("channel/gain list too long %u > %d\n", cmd.chanlist_len, s->len_chanlist); - ret = -EINVAL; - goto cleanup; + return -EINVAL; } /* make sure channel/gain list isn't too short */ if (cmd.chanlist_len < 1) { DPRINTK("channel/gain list too short %u < 1\n", cmd.chanlist_len); - ret = -EINVAL; - goto cleanup; + return -EINVAL; } async->cmd = cmd; @@ -1426,8 +1423,7 @@ static int do_cmd_ioctl(struct comedi_device *dev, kmalloc(async->cmd.chanlist_len * sizeof(int), GFP_KERNEL); if (!async->cmd.chanlist) { DPRINTK("allocation failed\n"); - ret = -ENOMEM; - goto cleanup; + return -ENOMEM; } if (copy_from_user(async->cmd.chanlist, user_chanlist, @@ -1479,6 +1475,9 @@ static int do_cmd_ioctl(struct comedi_device *dev, comedi_set_subdevice_runflags(s, ~0, SRF_USER | SRF_RUNNING); + /* set s->busy _after_ setting SRF_RUNNING flag to avoid race with + * comedi_read() or comedi_write() */ + s->busy = file; ret = s->do_cmd(dev, s); if (ret == 0) return 0; @@ -2041,11 +2040,13 @@ static ssize_t comedi_write(struct file *file, const char __user *buf, if (!comedi_is_subdevice_running(s)) { if (count == 0) { + mutex_lock(&dev->mutex); if (comedi_is_subdevice_in_error(s)) retval = -EPIPE; else retval = 0; do_become_nonbusy(dev, s); + mutex_unlock(&dev->mutex); } break; } @@ -2144,11 +2145,13 @@ static ssize_t comedi_read(struct file *file, char __user *buf, size_t nbytes, if (n == 0) { if (!comedi_is_subdevice_running(s)) { + mutex_lock(&dev->mutex); do_become_nonbusy(dev, s); if (comedi_is_subdevice_in_error(s)) retval = -EPIPE; else retval = 0; + mutex_unlock(&dev->mutex); break; } if (file->f_flags & O_NONBLOCK) { @@ -2186,9 +2189,11 @@ static ssize_t comedi_read(struct file *file, char __user *buf, size_t nbytes, buf += n; break; /* makes device work like a pipe */ } - if (comedi_is_subdevice_idle(s) && - async->buf_read_count - async->buf_write_count == 0) { - do_become_nonbusy(dev, s); + if (comedi_is_subdevice_idle(s)) { + mutex_lock(&dev->mutex); + if (async->buf_read_count - async->buf_write_count == 0) + do_become_nonbusy(dev, s); + mutex_unlock(&dev->mutex); } set_current_state(TASK_RUNNING); remove_wait_queue(&async->wait_head, &wait); From 867ea7111553d42d7a1545d1c69998a1a53b3166 Mon Sep 17 00:00:00 2001 From: Ian Abbott Date: Mon, 8 Jul 2013 13:36:19 +0100 Subject: [PATCH 0310/1368] staging: comedi: COMEDI_CANCEL ioctl should wake up read/write commit 69acbaac303e8cb948801a9ddd0ac24e86cc4a1b upstream. Comedi devices can do blocking read() or write() (or poll()) if an asynchronous command has been set up, blocking for data (for read()) or buffer space (for write()). Various events associated with the asynchronous command will wake up the blocked reader or writer (or poller). It is also possible to force the asynchronous command to terminate by issuing a `COMEDI_CANCEL` ioctl. That shuts down the asynchronous command, but does not currently wake up the blocked reader or writer (or poller). If the blocked task could be woken up, it would see that the command is no longer active and return. The caller of the `COMEDI_CANCEL` ioctl could attempt to wake up the blocked task by sending a signal, but that's a nasty workaround. Change `do_cancel_ioctl()` to wake up the wait queue after it returns from `do_cancel()`. `do_cancel()` can propagate an error return value from the low-level comedi driver's cancel routine, but it always shuts the command down regardless, so `do_cancel_ioctl()` can wake up he wait queue regardless of the return value from `do_cancel()`. Signed-off-by: Ian Abbott Signed-off-by: Greg Kroah-Hartman --- drivers/staging/comedi/comedi_fops.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/staging/comedi/comedi_fops.c b/drivers/staging/comedi/comedi_fops.c index fe8914310dd4..0ae406a47507 100644 --- a/drivers/staging/comedi/comedi_fops.c +++ b/drivers/staging/comedi/comedi_fops.c @@ -1692,6 +1692,7 @@ static int do_cancel_ioctl(struct comedi_device *dev, unsigned int arg, void *file) { struct comedi_subdevice *s; + int ret; if (arg >= dev->n_subdevices) return -EINVAL; @@ -1708,7 +1709,11 @@ static int do_cancel_ioctl(struct comedi_device *dev, unsigned int arg, if (s->busy != file) return -EBUSY; - return do_cancel(dev, s); + ret = do_cancel(dev, s); + if (comedi_get_subdevice_runflags(s) & SRF_USER) + wake_up_interruptible(&s->async->wait_head); + + return ret; } /* From dbc31d4e7958e7539180b9f2d7c8446e4a0429d4 Mon Sep 17 00:00:00 2001 From: Karlis Ogsts Date: Mon, 22 Jul 2013 13:51:42 -0700 Subject: [PATCH 0311/1368] staging: android: logger: Correct write offset reset on error commit 72bb99cfe9c57d2044445fb34bbc95b4c0bae6f2 upstream. In the situation that a writer fails to copy data from userspace it will reset the write offset to the value it had before it went to sleep. This discarding any messages written while aquiring the mutex. Therefore the reset offset needs to be retrieved after acquiring the mutex. Cc: Android Kernel Team Signed-off-by: Bjorn Andersson Signed-off-by: Greg Kroah-Hartman --- drivers/staging/android/logger.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/staging/android/logger.c b/drivers/staging/android/logger.c index 9bd874789ce5..34519ea14b54 100644 --- a/drivers/staging/android/logger.c +++ b/drivers/staging/android/logger.c @@ -469,7 +469,7 @@ static ssize_t logger_aio_write(struct kiocb *iocb, const struct iovec *iov, unsigned long nr_segs, loff_t ppos) { struct logger_log *log = file_get_log(iocb->ki_filp); - size_t orig = log->w_off; + size_t orig; struct logger_entry header; struct timespec now; ssize_t ret = 0; @@ -490,6 +490,8 @@ static ssize_t logger_aio_write(struct kiocb *iocb, const struct iovec *iov, mutex_lock(&log->mutex); + orig = log->w_off; + /* * Fix up any readers, pulling them forward to the first readable * entry after (what will be) the new write offset. We do this now From cb631ac7738411616e461ae43af9b6d4cb9d29d4 Mon Sep 17 00:00:00 2001 From: Dirk Brandewie Date: Thu, 18 Jul 2013 08:48:42 -0700 Subject: [PATCH 0312/1368] cpufreq / intel_pstate: Change to scale off of max P-state commit 2134ed4d614349b2b4e8d7bb593baa9179b8dd1e upstream. Change to using max P-state instead of max turbo P-state. This change resolves two issues. On a quiet system intel_pstate can fail to respond to a load change. On CPU SKUs that have a limited number of P-states and no turbo range intel_pstate fails to select the highest available P-state. This change is suitable for stable v3.9+ References: https://bugzilla.kernel.org/show_bug.cgi?id=59481 Reported-and-tested-by: Arjan van de Ven Reported-and-tested-by: dsmythies@telus.net Signed-off-by: Dirk Brandewie Acked-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki Signed-off-by: Greg Kroah-Hartman --- drivers/cpufreq/intel_pstate.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index 07f2840ad805..6d6a0b48eb75 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -103,10 +103,10 @@ struct pstate_adjust_policy { static struct pstate_adjust_policy default_policy = { .sample_rate_ms = 10, .deadband = 0, - .setpoint = 109, - .p_gain_pct = 17, + .setpoint = 97, + .p_gain_pct = 20, .d_gain_pct = 0, - .i_gain_pct = 4, + .i_gain_pct = 0, }; struct perf_limits { @@ -468,12 +468,12 @@ static inline void intel_pstate_set_sample_time(struct cpudata *cpu) static inline int intel_pstate_get_scaled_busy(struct cpudata *cpu) { int32_t busy_scaled; - int32_t core_busy, turbo_pstate, current_pstate; + int32_t core_busy, max_pstate, current_pstate; core_busy = int_tofp(cpu->samples[cpu->sample_ptr].core_pct_busy); - turbo_pstate = int_tofp(cpu->pstate.turbo_pstate); + max_pstate = int_tofp(cpu->pstate.max_pstate); current_pstate = int_tofp(cpu->pstate.current_pstate); - busy_scaled = mul_fp(core_busy, div_fp(turbo_pstate, current_pstate)); + busy_scaled = mul_fp(core_busy, div_fp(max_pstate, current_pstate)); return fp_toint(busy_scaled); } From db60e49ae570b342d63f0bbda4d35b85a392da25 Mon Sep 17 00:00:00 2001 From: Stefan Behrens Date: Thu, 4 Jul 2013 16:14:23 +0200 Subject: [PATCH 0313/1368] Btrfs: fix wrong write offset when replacing a device commit 115930cb2d444a684975cf2325759cb48ebf80cc upstream. Miao Xie reported the following issue: The filesystem was corrupted after we did a device replace. Steps to reproduce: # mkfs.btrfs -f -m single -d raid10 .. # mount # btrfs replace start -rfB 1 # umount # btrfsck The reason for the issue is that we changed the write offset by mistake, introduced by commit 625f1c8dc. We read the data from the source device at first, and then write the data into the corresponding place of the new device. In order to implement the "-r" option, the source location is remapped using btrfs_map_block(). The read takes place on the mapped location, and the write needs to take place on the unmapped location. Currently the write is using the mapped location, and this commit changes it back by undoing the change to the write address that the aforementioned commit added by mistake. Reported-by: Miao Xie Signed-off-by: Stefan Behrens Signed-off-by: Josef Bacik Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/scrub.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c index 79bd479317cb..eb84c2db1aca 100644 --- a/fs/btrfs/scrub.c +++ b/fs/btrfs/scrub.c @@ -2501,7 +2501,7 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx, ret = scrub_extent(sctx, extent_logical, extent_len, extent_physical, extent_dev, flags, generation, extent_mirror_num, - extent_physical); + extent_logical - logical + physical); if (ret) goto out; From 14e86565899d123a994d974560624b617d855ca8 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Mon, 15 Jul 2013 12:41:42 -0400 Subject: [PATCH 0314/1368] Btrfs: fix lock leak when resuming snapshot deletion commit fec386ac1428f9c0e672df952cbca5cebd4e4e2f upstream. We aren't setting path->locks[level] when we resume a snapshot deletion which means we won't unlock the buffer when we free the path. This causes deadlocks if we happen to re-allocate the block before we've evicted the extent buffer from cache. Thanks, Reported-by: Alex Lyakas Signed-off-by: Josef Bacik Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/extent-tree.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index df472ab1b5ac..3915155ba274 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -7355,6 +7355,7 @@ int btrfs_drop_snapshot(struct btrfs_root *root, while (1) { btrfs_tree_lock(path->nodes[level]); btrfs_set_lock_blocking(path->nodes[level]); + path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING; ret = btrfs_lookup_extent_info(trans, root, path->nodes[level]->start, @@ -7370,6 +7371,7 @@ int btrfs_drop_snapshot(struct btrfs_root *root, break; btrfs_tree_unlock(path->nodes[level]); + path->locks[level] = 0; WARN_ON(wc->refs[level] != 1); level--; } From 0183a8ee8c5621a84b0e4ba0b627af73f563e37c Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Wed, 17 Jul 2013 19:30:20 -0400 Subject: [PATCH 0315/1368] Btrfs: re-add root to dead root list if we stop dropping it commit d29a9f629e009c9b90e5859bce581070fd6247fc upstream. If we stop dropping a root for whatever reason we need to add it back to the dead root list so that we will re-start the dropping next transaction commit. The other case this happens is if we recover a drop because we will add a root without adding it to the fs radix tree, so we can leak it's root and commit root extent buffer, adding this to the dead root list makes this cleanup happen. Thanks, Reported-by: Alex Lyakas Signed-off-by: Josef Bacik Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/extent-tree.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 3915155ba274..0b272d068337 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -7298,6 +7298,7 @@ int btrfs_drop_snapshot(struct btrfs_root *root, int err = 0; int ret; int level; + bool root_dropped = false; path = btrfs_alloc_path(); if (!path) { @@ -7473,12 +7474,22 @@ int btrfs_drop_snapshot(struct btrfs_root *root, free_extent_buffer(root->commit_root); kfree(root); } + root_dropped = true; out_end_trans: btrfs_end_transaction_throttle(trans, tree_root); out_free: kfree(wc); btrfs_free_path(path); out: + /* + * So if we need to stop dropping the snapshot for whatever reason we + * need to make sure to add it back to the dead root list so that we + * keep trying to do the work later. This also cleans up roots if we + * don't have it in the radix (like when we recover after a power fail + * or unmount) so we don't leak memory. + */ + if (root_dropped == false) + btrfs_add_dead_root(root); if (err) btrfs_std_error(root->fs_info, err); return err; From f4b5b99f18cd5984120ab2334aeadc5384f260cf Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Wed, 17 Jul 2013 08:09:37 +0100 Subject: [PATCH 0316/1368] xen-netfront: pull on receive skb may need to happen earlier commit 093b9c71b6e450e375f4646ba86faed0195ec7df upstream. Due to commit 3683243b ("xen-netfront: use __pskb_pull_tail to ensure linear area is big enough on RX") xennet_fill_frags() may end up filling MAX_SKB_FRAGS + 1 fragments in a receive skb, and only reduce the fragment count subsequently via __pskb_pull_tail(). That's a result of xennet_get_responses() allowing a maximum of one more slot to be consumed (and intermediately transformed into a fragment) if the head slot has a size less than or equal to RX_COPY_THRESHOLD. Hence we need to adjust xennet_fill_frags() to pull earlier if we reached the maximum fragment count - due to the described behavior of xennet_get_responses() this guarantees that at least the first fragment will get completely consumed, and hence the fragment count reduced. In order to not needlessly call __pskb_pull_tail() twice, make the original call conditional upon the pull target not having been reached yet, and defer the newly added one as much as possible (an alternative would have been to always call the function right before the call to xennet_fill_frags(), but that would imply more frequent cases of needing to call it twice). Signed-off-by: Jan Beulich Acked-by: Wei Liu Cc: Ian Campbell Acked-by: Ian Campbell Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/xen-netfront.c | 31 +++++++++++++------------------ 1 file changed, 13 insertions(+), 18 deletions(-) diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c index 1db101415069..0c01b8e33fe1 100644 --- a/drivers/net/xen-netfront.c +++ b/drivers/net/xen-netfront.c @@ -276,8 +276,7 @@ static void xennet_alloc_rx_buffers(struct net_device *dev) break; } - __skb_fill_page_desc(skb, 0, page, 0, 0); - skb_shinfo(skb)->nr_frags = 1; + skb_add_rx_frag(skb, 0, page, 0, 0, PAGE_SIZE); __skb_queue_tail(&np->rx_batch, skb); } @@ -822,7 +821,6 @@ static RING_IDX xennet_fill_frags(struct netfront_info *np, struct sk_buff_head *list) { struct skb_shared_info *shinfo = skb_shinfo(skb); - int nr_frags = shinfo->nr_frags; RING_IDX cons = np->rx.rsp_cons; struct sk_buff *nskb; @@ -831,19 +829,21 @@ static RING_IDX xennet_fill_frags(struct netfront_info *np, RING_GET_RESPONSE(&np->rx, ++cons); skb_frag_t *nfrag = &skb_shinfo(nskb)->frags[0]; - __skb_fill_page_desc(skb, nr_frags, - skb_frag_page(nfrag), - rx->offset, rx->status); + if (shinfo->nr_frags == MAX_SKB_FRAGS) { + unsigned int pull_to = NETFRONT_SKB_CB(skb)->pull_to; - skb->data_len += rx->status; + BUG_ON(pull_to <= skb_headlen(skb)); + __pskb_pull_tail(skb, pull_to - skb_headlen(skb)); + } + BUG_ON(shinfo->nr_frags >= MAX_SKB_FRAGS); + + skb_add_rx_frag(skb, shinfo->nr_frags, skb_frag_page(nfrag), + rx->offset, rx->status, PAGE_SIZE); skb_shinfo(nskb)->nr_frags = 0; kfree_skb(nskb); - - nr_frags++; } - shinfo->nr_frags = nr_frags; return cons; } @@ -929,7 +929,8 @@ static int handle_incoming_queue(struct net_device *dev, while ((skb = __skb_dequeue(rxq)) != NULL) { int pull_to = NETFRONT_SKB_CB(skb)->pull_to; - __pskb_pull_tail(skb, pull_to - skb_headlen(skb)); + if (pull_to > skb_headlen(skb)) + __pskb_pull_tail(skb, pull_to - skb_headlen(skb)); /* Ethernet work: Delayed to here as it peeks the header. */ skb->protocol = eth_type_trans(skb, dev); @@ -1015,16 +1016,10 @@ static int xennet_poll(struct napi_struct *napi, int budget) skb_shinfo(skb)->frags[0].page_offset = rx->offset; skb_frag_size_set(&skb_shinfo(skb)->frags[0], rx->status); skb->data_len = rx->status; + skb->len += rx->status; i = xennet_fill_frags(np, skb, &tmpq); - /* - * Truesize is the actual allocation size, even if the - * allocation is only partially used. - */ - skb->truesize += PAGE_SIZE * skb_shinfo(skb)->nr_frags; - skb->len += skb->data_len; - if (rx->flags & XEN_NETRXF_csum_blank) skb->ip_summed = CHECKSUM_PARTIAL; else if (rx->flags & XEN_NETRXF_data_validated) From 07221c8c2109ae56d9f83cd0644498069ed7cfed Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Wed, 16 Jan 2013 11:33:52 -0500 Subject: [PATCH 0317/1368] xen/blkback: Check device permissions before allowing OP_DISCARD commit 604c499cbbcc3d5fe5fb8d53306aa0fae1990109 upstream. We need to make sure that the device is not RO or that the request is not past the number of sectors we want to issue the DISCARD operation for. This fixes CVE-2013-2140. Acked-by: Jan Beulich Acked-by: Ian Campbell [v1: Made it pr_warn instead of pr_debug] Signed-off-by: Konrad Rzeszutek Wilk Signed-off-by: Greg Kroah-Hartman --- drivers/block/xen-blkback/blkback.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c index dd5b2fed97e9..d81dfca755d1 100644 --- a/drivers/block/xen-blkback/blkback.c +++ b/drivers/block/xen-blkback/blkback.c @@ -647,7 +647,18 @@ static int dispatch_discard_io(struct xen_blkif *blkif, int status = BLKIF_RSP_OKAY; struct block_device *bdev = blkif->vbd.bdev; unsigned long secure; + struct phys_req preq; + preq.sector_number = req->u.discard.sector_number; + preq.nr_sects = req->u.discard.nr_sectors; + + err = xen_vbd_translate(&preq, blkif, WRITE); + if (err) { + pr_warn(DRV_PFX "access denied: DISCARD [%llu->%llu] on dev=%04x\n", + preq.sector_number, + preq.sector_number + preq.nr_sects, blkif->vbd.pdevice); + goto fail_response; + } blkif->st_ds_req++; xen_blkif_get(blkif); @@ -658,7 +669,7 @@ static int dispatch_discard_io(struct xen_blkif *blkif, err = blkdev_issue_discard(bdev, req->u.discard.sector_number, req->u.discard.nr_sectors, GFP_KERNEL, secure); - +fail_response: if (err == -EOPNOTSUPP) { pr_debug(DRV_PFX "discard op failed, not supported\n"); status = BLKIF_RSP_EOPNOTSUPP; From 6d7d28443969512be95873d2fb155f4be9261ca5 Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Fri, 12 Jul 2013 16:48:12 -0700 Subject: [PATCH 0318/1368] x86, suspend: Handle CPUs which fail to #GP on RDMSR MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 5ff560fd48d5b3d82fa0c3aff625c9da1a301911 upstream. There are CPUs which have errata causing RDMSR of a nonexistent MSR to not fault. We would then try to WRMSR to restore the value of that MSR, causing a crash. Specifically, some Pentium M variants would have this problem trying to save and restore the non-existent EFER, causing a crash on resume. Work around this by making sure we can write back the result at suspend time. Huge thanks to Christian Sünkenberg for finding the offending erratum that finally deciphered the mystery. Reported-and-tested-by: Johan Heinrich Debugged-by: Christian Sünkenberg Acked-by: Rafael J. Wysocki Link: http://lkml.kernel.org/r/51DDC972.3010005@student.kit.edu Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/acpi/sleep.c | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/arch/x86/kernel/acpi/sleep.c b/arch/x86/kernel/acpi/sleep.c index b44577bc9744..ec94e11807dc 100644 --- a/arch/x86/kernel/acpi/sleep.c +++ b/arch/x86/kernel/acpi/sleep.c @@ -48,9 +48,20 @@ int acpi_suspend_lowlevel(void) #ifndef CONFIG_64BIT native_store_gdt((struct desc_ptr *)&header->pmode_gdt); + /* + * We have to check that we can write back the value, and not + * just read it. At least on 90 nm Pentium M (Family 6, Model + * 13), reading an invalid MSR is not guaranteed to trap, see + * Erratum X4 in "Intel Pentium M Processor on 90 nm Process + * with 2-MB L2 Cache and Intel® Processor A100 and A110 on 90 + * nm process with 512-KB L2 Cache Specification Update". + */ if (!rdmsr_safe(MSR_EFER, &header->pmode_efer_low, - &header->pmode_efer_high)) + &header->pmode_efer_high) && + !wrmsr_safe(MSR_EFER, + header->pmode_efer_low, + header->pmode_efer_high)) header->pmode_behavior |= (1 << WAKEUP_BEHAVIOR_RESTORE_EFER); #endif /* !CONFIG_64BIT */ @@ -61,7 +72,10 @@ int acpi_suspend_lowlevel(void) } if (!rdmsr_safe(MSR_IA32_MISC_ENABLE, &header->pmode_misc_en_low, - &header->pmode_misc_en_high)) + &header->pmode_misc_en_high) && + !wrmsr_safe(MSR_IA32_MISC_ENABLE, + header->pmode_misc_en_low, + header->pmode_misc_en_high)) header->pmode_behavior |= (1 << WAKEUP_BEHAVIOR_RESTORE_MISC_ENABLE); header->realmode_flags = acpi_realmode_flags; From 943741e352f8b0c524f542b93e8b62978dc0a9b0 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Mon, 15 Jul 2013 11:50:45 -0700 Subject: [PATCH 0319/1368] x86: make sure IDT is page aligned based on 4df05f361937ee86e5a8c9ead8aeb6a19ea9b7d7 upstream. Since the IDT is referenced from a fixmap, make sure it is page aligned. This avoids the risk of the IDT ever being moved in the bss and having the mapping be offset, resulting in calling incorrect handlers. In the current upstream kernel this is not a manifested bug, but heavily patched kernels (such as those using the PaX patch series) did encounter this bug. Signed-off-by: Kees Cook Reported-by: PaX Team Cc: Thomas Gleixner Cc: Ingo Molnar Cc: "H. Peter Anvin" Cc: Yinghai Lu Cc: Seiji Aguchi Cc: Fenghua Yu Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/head_64.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S index 321d65ebaffe..a8368608ab41 100644 --- a/arch/x86/kernel/head_64.S +++ b/arch/x86/kernel/head_64.S @@ -513,7 +513,7 @@ ENTRY(phys_base) #include "../../x86/xen/xen-head.S" .section .bss, "aw", @nobits - .align L1_CACHE_BYTES + .align PAGE_SIZE ENTRY(idt_table) .skip IDT_ENTRIES * 16 From 1d83ab8548ff19bad94dcae62ba13ea476974e7f Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Wed, 17 Jul 2013 14:55:31 +1000 Subject: [PATCH 0320/1368] md: Remove recent change which allows devices to skip recovery. commit 5024c298311f3b97c85cb034f9edaa333fdb9338 upstream. commit 7ceb17e87bde79d285a8b988cfed9eaeebe60b86 md: Allow devices to be re-added to a read-only array. allowed a bit more than just that. It also allows devices to be added to a read-write array and to end up skipping recovery. This patch removes the offending piece of code pending a rewrite for a subsequent release. More specifically: If the array has a bitmap, then the device will still need a bitmap based resync ('saved_raid_disk' is set under different conditions is a bitmap is present). If the array doesn't have a bitmap, then this is correct as long as nothing has been written to the array since the metadata was checked by ->validate_super. However there is no locking to ensure that there was no write. Bug was introduced in 3.10 and causes data corruption so patch is suitable for 3.10-stable. Reported-by: Joe Lawrence Signed-off-by: NeilBrown Signed-off-by: Greg Kroah-Hartman --- drivers/md/md.c | 14 -------------- 1 file changed, 14 deletions(-) diff --git a/drivers/md/md.c b/drivers/md/md.c index 9b82377a833b..51f0345a4ba4 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -7697,20 +7697,6 @@ static int remove_and_add_spares(struct mddev *mddev, continue; rdev->recovery_offset = 0; - if (rdev->saved_raid_disk >= 0 && mddev->in_sync) { - spin_lock_irq(&mddev->write_lock); - if (mddev->in_sync) - /* OK, this device, which is in_sync, - * will definitely be noticed before - * the next write, so recovery isn't - * needed. - */ - rdev->recovery_offset = mddev->recovery_cp; - spin_unlock_irq(&mddev->write_lock); - } - if (mddev->ro && rdev->recovery_offset != MaxSector) - /* not safe to add this disk now */ - continue; if (mddev->pers-> hot_add_disk(mddev, rdev) == 0) { if (sysfs_link_rdev(mddev, rdev)) From 8afb90da9f16abc5e577318544419bfcf3565391 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Wed, 17 Jul 2013 15:19:29 +1000 Subject: [PATCH 0321/1368] md/raid1: fix bio handling problems in process_checks() commit 30bc9b53878a9921b02e3b5bc4283ac1c6de102a upstream. Recent change to use bio_copy_data() in raid1 when repairing an array is faulty. The underlying may have changed the bio in various ways using bio_advance and these need to be undone not just for the 'sbio' which is being copied to, but also the 'pbio' (primary) which is being copied from. So perform the reset on all bios that were read from and do it early. This also ensure that the sbio->bi_io_vec[j].bv_len passed to memcmp is correct. This fixes a crash during a 'check' of a RAID1 array. The crash was introduced in 3.10 so this is suitable for 3.10-stable. Reported-by: Joe Lawrence Signed-off-by: NeilBrown Signed-off-by: Greg Kroah-Hartman --- drivers/md/raid1.c | 53 ++++++++++++++++++++++++++-------------------- 1 file changed, 30 insertions(+), 23 deletions(-) diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index 6e17f8181c4b..6f4824426e86 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -1848,6 +1848,36 @@ static int process_checks(struct r1bio *r1_bio) int i; int vcnt; + /* Fix variable parts of all bios */ + vcnt = (r1_bio->sectors + PAGE_SIZE / 512 - 1) >> (PAGE_SHIFT - 9); + for (i = 0; i < conf->raid_disks * 2; i++) { + int j; + int size; + struct bio *b = r1_bio->bios[i]; + if (b->bi_end_io != end_sync_read) + continue; + /* fixup the bio for reuse */ + bio_reset(b); + b->bi_vcnt = vcnt; + b->bi_size = r1_bio->sectors << 9; + b->bi_sector = r1_bio->sector + + conf->mirrors[i].rdev->data_offset; + b->bi_bdev = conf->mirrors[i].rdev->bdev; + b->bi_end_io = end_sync_read; + b->bi_private = r1_bio; + + size = b->bi_size; + for (j = 0; j < vcnt ; j++) { + struct bio_vec *bi; + bi = &b->bi_io_vec[j]; + bi->bv_offset = 0; + if (size > PAGE_SIZE) + bi->bv_len = PAGE_SIZE; + else + bi->bv_len = size; + size -= PAGE_SIZE; + } + } for (primary = 0; primary < conf->raid_disks * 2; primary++) if (r1_bio->bios[primary]->bi_end_io == end_sync_read && test_bit(BIO_UPTODATE, &r1_bio->bios[primary]->bi_flags)) { @@ -1856,12 +1886,10 @@ static int process_checks(struct r1bio *r1_bio) break; } r1_bio->read_disk = primary; - vcnt = (r1_bio->sectors + PAGE_SIZE / 512 - 1) >> (PAGE_SHIFT - 9); for (i = 0; i < conf->raid_disks * 2; i++) { int j; struct bio *pbio = r1_bio->bios[primary]; struct bio *sbio = r1_bio->bios[i]; - int size; if (sbio->bi_end_io != end_sync_read) continue; @@ -1887,27 +1915,6 @@ static int process_checks(struct r1bio *r1_bio) rdev_dec_pending(conf->mirrors[i].rdev, mddev); continue; } - /* fixup the bio for reuse */ - bio_reset(sbio); - sbio->bi_vcnt = vcnt; - sbio->bi_size = r1_bio->sectors << 9; - sbio->bi_sector = r1_bio->sector + - conf->mirrors[i].rdev->data_offset; - sbio->bi_bdev = conf->mirrors[i].rdev->bdev; - sbio->bi_end_io = end_sync_read; - sbio->bi_private = r1_bio; - - size = sbio->bi_size; - for (j = 0; j < vcnt ; j++) { - struct bio_vec *bi; - bi = &sbio->bi_io_vec[j]; - bi->bv_offset = 0; - if (size > PAGE_SIZE) - bi->bv_len = PAGE_SIZE; - else - bi->bv_len = size; - size -= PAGE_SIZE; - } bio_copy_data(sbio, pbio); } From c1dadcc1086bbbac1b5eeb4d5e157c76f2d8dfba Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Mon, 22 Jul 2013 12:57:21 +1000 Subject: [PATCH 0322/1368] md/raid5: fix interaction of 'replace' and 'recovery'. commit f94c0b6658c7edea8bc19d13be321e3860a3fa54 upstream. If a device in a RAID4/5/6 is being replaced while another is being recovered, then the writes to the replacement device currently don't happen, resulting in corruption when the replacement completes and the new drive takes over. This is because the replacement writes are only triggered when 's.replacing' is set and not when the similar 's.sync' is set (which is the case during resync and recovery - it means all devices need to be read). So schedule those writes when s.replacing is set as well. In this case we cannot use "STRIPE_INSYNC" to record that the replacement has happened as that is needed for recording that any parity calculation is complete. So introduce STRIPE_REPLACED to record if the replacement has happened. For safety we should also check that STRIPE_COMPUTE_RUN is not set. This has a similar effect to the "s.locked == 0" test. The latter ensure that now IO has been flagged but not started. The former checks if any parity calculation has been flagged by not started. We must wait for both of these to complete before triggering the 'replace'. Add a similar test to the subsequent check for "are we finished yet". This possibly isn't needed (is subsumed in the STRIPE_INSYNC test), but it makes it more obvious that the REPLACE will happen before we think we are finished. Finally if a NeedReplace device is not UPTODATE then that is an error. We really must trigger a warning. This bug was introduced in commit 9a3e1101b827a59ac9036a672f5fa8d5279d0fe2 (md/raid5: detect and handle replacements during recovery.) which introduced replacement for raid5. That was in 3.3-rc3, so any stable kernel since then would benefit from this fix. Reported-by: qindehua <13691222965@163.com> Tested-by: qindehua Signed-off-by: NeilBrown Signed-off-by: Greg Kroah-Hartman --- drivers/md/raid5.c | 15 ++++++++++----- drivers/md/raid5.h | 1 + 2 files changed, 11 insertions(+), 5 deletions(-) diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 05e4a105b9c7..a35b846af4f8 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -3462,6 +3462,7 @@ static void handle_stripe(struct stripe_head *sh) test_and_clear_bit(STRIPE_SYNC_REQUESTED, &sh->state)) { set_bit(STRIPE_SYNCING, &sh->state); clear_bit(STRIPE_INSYNC, &sh->state); + clear_bit(STRIPE_REPLACED, &sh->state); } spin_unlock(&sh->stripe_lock); } @@ -3607,19 +3608,23 @@ static void handle_stripe(struct stripe_head *sh) handle_parity_checks5(conf, sh, &s, disks); } - if (s.replacing && s.locked == 0 - && !test_bit(STRIPE_INSYNC, &sh->state)) { + if ((s.replacing || s.syncing) && s.locked == 0 + && !test_bit(STRIPE_COMPUTE_RUN, &sh->state) + && !test_bit(STRIPE_REPLACED, &sh->state)) { /* Write out to replacement devices where possible */ for (i = 0; i < conf->raid_disks; i++) - if (test_bit(R5_UPTODATE, &sh->dev[i].flags) && - test_bit(R5_NeedReplace, &sh->dev[i].flags)) { + if (test_bit(R5_NeedReplace, &sh->dev[i].flags)) { + WARN_ON(!test_bit(R5_UPTODATE, &sh->dev[i].flags)); set_bit(R5_WantReplace, &sh->dev[i].flags); set_bit(R5_LOCKED, &sh->dev[i].flags); s.locked++; } - set_bit(STRIPE_INSYNC, &sh->state); + if (s.replacing) + set_bit(STRIPE_INSYNC, &sh->state); + set_bit(STRIPE_REPLACED, &sh->state); } if ((s.syncing || s.replacing) && s.locked == 0 && + !test_bit(STRIPE_COMPUTE_RUN, &sh->state) && test_bit(STRIPE_INSYNC, &sh->state)) { md_done_sync(conf->mddev, STRIPE_SECTORS, 1); clear_bit(STRIPE_SYNCING, &sh->state); diff --git a/drivers/md/raid5.h b/drivers/md/raid5.h index b0b663b119a8..70c49329ca9a 100644 --- a/drivers/md/raid5.h +++ b/drivers/md/raid5.h @@ -306,6 +306,7 @@ enum { STRIPE_SYNC_REQUESTED, STRIPE_SYNCING, STRIPE_INSYNC, + STRIPE_REPLACED, STRIPE_PREREAD_ACTIVE, STRIPE_DELAYED, STRIPE_DEGRADED, From 5a4de615144fd87589783402cab3ecbc29865fc8 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Wed, 24 Jul 2013 15:37:42 +1000 Subject: [PATCH 0323/1368] md/raid10: remove use-after-free bug. commit 0eb25bb027a100f5a9df8991f2f628e7d851bc1e upstream. We always need to be careful when calling generic_make_request, as it can start a chain of events which might free something that we are using. Here is one place I wasn't careful enough. If the wbio2 is not in use, then it might get freed at the first generic_make_request call. So perform all necessary tests first. This bug was introduced in 3.3-rc3 (24afd80d99) and can cause an oops, so fix is suitable for any -stable since then. Signed-off-by: NeilBrown Signed-off-by: Greg Kroah-Hartman --- drivers/md/raid10.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index d61eb7ea0d81..081bb3345353 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -2268,12 +2268,18 @@ static void recovery_request_write(struct mddev *mddev, struct r10bio *r10_bio) d = r10_bio->devs[1].devnum; wbio = r10_bio->devs[1].bio; wbio2 = r10_bio->devs[1].repl_bio; + /* Need to test wbio2->bi_end_io before we call + * generic_make_request as if the former is NULL, + * the latter is free to free wbio2. + */ + if (wbio2 && !wbio2->bi_end_io) + wbio2 = NULL; if (wbio->bi_end_io) { atomic_inc(&conf->mirrors[d].rdev->nr_pending); md_sync_acct(conf->mirrors[d].rdev->bdev, bio_sectors(wbio)); generic_make_request(wbio); } - if (wbio2 && wbio2->bi_end_io) { + if (wbio2) { atomic_inc(&conf->mirrors[d].replacement->nr_pending); md_sync_acct(conf->mirrors[d].replacement->bdev, bio_sectors(wbio2)); From b1cd27dc900443c2f40abe30d7c97c153b5e3e7d Mon Sep 17 00:00:00 2001 From: Youquan Song Date: Thu, 11 Jul 2013 21:15:57 -0400 Subject: [PATCH 0324/1368] ata: Fix DVD not dectected at some platform with Wellsburg PCH commit eac27f04a71e1f39f196f7e520d16dcefc955d77 upstream. There is a patch b55f84e2d527182e7c611d466cd0bb6ddce201de "ata_piix: Fix DVD not dectected at some Haswell platforms" to fix an issue of DVD not recognized on Haswell Desktop platform with Lynx Point. Recently, it is also found the same issue at some platformas with Wellsburg PCH. So deliver a similar patch to fix it by disables 32bit PIO in IDE mode. Signed-off-by: Youquan Song Signed-off-by: Tejun Heo Signed-off-by: Greg Kroah-Hartman --- drivers/ata/ata_piix.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/ata/ata_piix.c b/drivers/ata/ata_piix.c index 8eae65905750..b92913a528b6 100644 --- a/drivers/ata/ata_piix.c +++ b/drivers/ata/ata_piix.c @@ -330,7 +330,7 @@ static const struct pci_device_id piix_pci_tbl[] = { /* SATA Controller IDE (Wellsburg) */ { 0x8086, 0x8d00, PCI_ANY_ID, PCI_ANY_ID, 0, 0, ich8_sata_snb }, /* SATA Controller IDE (Wellsburg) */ - { 0x8086, 0x8d08, PCI_ANY_ID, PCI_ANY_ID, 0, 0, ich8_2port_sata }, + { 0x8086, 0x8d08, PCI_ANY_ID, PCI_ANY_ID, 0, 0, ich8_2port_sata_snb }, /* SATA Controller IDE (Wellsburg) */ { 0x8086, 0x8d60, PCI_ANY_ID, PCI_ANY_ID, 0, 0, ich8_sata_snb }, /* SATA Controller IDE (Wellsburg) */ From 2794edb180b726d9d8aedcc6c7034e2d877489fc Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 22 Jul 2013 16:53:36 -0400 Subject: [PATCH 0325/1368] libata: make it clear that sata_inic162x is experimental commit bb9696192826a7d9279caf872e95b41bc26c7eff upstream. sata_inic162x never reached a state where it's reliable enough for production use and data corruption is a relatively common occurrence. Make the driver generate warning about the issues and mark the Kconfig option as experimental. If the situation doesn't improve, we'd be better off making it depend on CONFIG_BROKEN. Let's wait for several cycles and see if the kernel message draws any attention. Signed-off-by: Tejun Heo Reported-by: Martin Braure de Calignon Reported-by: Ben Hutchings Reported-by: risc4all@yahoo.com Signed-off-by: Greg Kroah-Hartman --- drivers/ata/Kconfig | 2 +- drivers/ata/sata_inic162x.c | 14 ++++++++++++++ 2 files changed, 15 insertions(+), 1 deletion(-) diff --git a/drivers/ata/Kconfig b/drivers/ata/Kconfig index a5a3ebcbdd2c..78eabff2fe46 100644 --- a/drivers/ata/Kconfig +++ b/drivers/ata/Kconfig @@ -107,7 +107,7 @@ config SATA_FSL If unsure, say N. config SATA_INIC162X - tristate "Initio 162x SATA support" + tristate "Initio 162x SATA support (Very Experimental)" depends on PCI help This option enables support for Initio 162x Serial ATA. diff --git a/drivers/ata/sata_inic162x.c b/drivers/ata/sata_inic162x.c index 1e6827c89429..74456fa8483f 100644 --- a/drivers/ata/sata_inic162x.c +++ b/drivers/ata/sata_inic162x.c @@ -6,6 +6,18 @@ * * This file is released under GPL v2. * + * **** WARNING **** + * + * This driver never worked properly and unfortunately data corruption is + * relatively common. There isn't anyone working on the driver and there's + * no support from the vendor. Do not use this driver in any production + * environment. + * + * http://thread.gmane.org/gmane.linux.debian.devel.bugs.rc/378525/focus=54491 + * https://bugzilla.kernel.org/show_bug.cgi?id=60565 + * + * ***************** + * * This controller is eccentric and easily locks up if something isn't * right. Documentation is available at initio's website but it only * documents registers (not programming model). @@ -807,6 +819,8 @@ static int inic_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) ata_print_version_once(&pdev->dev, DRV_VERSION); + dev_alert(&pdev->dev, "inic162x support is broken with common data corruption issues and will be disabled by default, contact linux-ide@vger.kernel.org if in production use\n"); + /* alloc host */ host = ata_host_alloc_pinfo(&pdev->dev, ppi, NR_PORTS); hpriv = devm_kzalloc(&pdev->dev, sizeof(*hpriv), GFP_KERNEL); From a4208aa227113be2e0ea81613c4f15d3bc2fc768 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 12 Jul 2013 09:39:03 +0300 Subject: [PATCH 0326/1368] svcrdma: underflow issue in decode_write_list() commit b2781e1021525649c0b33fffd005ef219da33926 upstream. My static checker marks everything from ntohl() as untrusted and it complains we could have an underflow problem doing: return (u32 *)&ary->wc_array[nchunks]; Also on 32 bit systems the upper bound check could overflow. Signed-off-by: Dan Carpenter Signed-off-by: J. Bruce Fields Signed-off-by: Greg Kroah-Hartman --- net/sunrpc/xprtrdma/svc_rdma_marshal.c | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/net/sunrpc/xprtrdma/svc_rdma_marshal.c b/net/sunrpc/xprtrdma/svc_rdma_marshal.c index 8d2edddf48cf..65b146297f5a 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_marshal.c +++ b/net/sunrpc/xprtrdma/svc_rdma_marshal.c @@ -98,6 +98,7 @@ void svc_rdma_rcl_chunk_counts(struct rpcrdma_read_chunk *ch, */ static u32 *decode_write_list(u32 *va, u32 *vaend) { + unsigned long start, end; int nchunks; struct rpcrdma_write_array *ary = @@ -113,9 +114,12 @@ static u32 *decode_write_list(u32 *va, u32 *vaend) return NULL; } nchunks = ntohl(ary->wc_nchunks); - if (((unsigned long)&ary->wc_array[0] + - (sizeof(struct rpcrdma_write_chunk) * nchunks)) > - (unsigned long)vaend) { + + start = (unsigned long)&ary->wc_array[0]; + end = (unsigned long)vaend; + if (nchunks < 0 || + nchunks > (SIZE_MAX - start) / sizeof(struct rpcrdma_write_chunk) || + (start + (sizeof(struct rpcrdma_write_chunk) * nchunks)) > end) { dprintk("svcrdma: ary=%p, wc_nchunks=%d, vaend=%p\n", ary, nchunks, vaend); return NULL; @@ -129,6 +133,7 @@ static u32 *decode_write_list(u32 *va, u32 *vaend) static u32 *decode_reply_array(u32 *va, u32 *vaend) { + unsigned long start, end; int nchunks; struct rpcrdma_write_array *ary = (struct rpcrdma_write_array *)va; @@ -143,9 +148,12 @@ static u32 *decode_reply_array(u32 *va, u32 *vaend) return NULL; } nchunks = ntohl(ary->wc_nchunks); - if (((unsigned long)&ary->wc_array[0] + - (sizeof(struct rpcrdma_write_chunk) * nchunks)) > - (unsigned long)vaend) { + + start = (unsigned long)&ary->wc_array[0]; + end = (unsigned long)vaend; + if (nchunks < 0 || + nchunks > (SIZE_MAX - start) / sizeof(struct rpcrdma_write_chunk) || + (start + (sizeof(struct rpcrdma_write_chunk) * nchunks)) > end) { dprintk("svcrdma: ary=%p, wc_nchunks=%d, vaend=%p\n", ary, nchunks, vaend); return NULL; From 85acabeb3c8170634eb69bee29ad58eec6d05482 Mon Sep 17 00:00:00 2001 From: Vakul Garg Date: Wed, 10 Jul 2013 06:26:13 +0000 Subject: [PATCH 0327/1368] crypto: caam - Fixed the memory out of bound overwrite issue commit 9c23b7d3d6bda41e2a27375df705485523a96dc8 upstream. When kernel is compiled with CONFIG_SLUB_DEBUG=y and CRYPTO_MANAGER_DISABLE_TESTS=n, during kernel bootup, the kernel reports error given below. The root cause is that in function hash_digest_key(), for allocating descriptor, insufficient memory was being allocated. The required number of descriptor words apart from input and output pointers are 8 (instead of 6). ============================================================================= BUG dma-kmalloc-32 (Not tainted): Redzone overwritten ----------------------------------------------------------------------------- Disabling lock debugging due to kernel taint INFO: 0xdec5dec0-0xdec5dec3. First byte 0x0 instead of 0xcc INFO: Allocated in ahash_setkey+0x60/0x594 age=7 cpu=1 pid=1257 __kmalloc+0x154/0x1b4 ahash_setkey+0x60/0x594 test_hash+0x260/0x5a0 alg_test_hash+0x48/0xb0 alg_test+0x84/0x228 cryptomgr_test+0x4c/0x54 kthread+0x98/0x9c ret_from_kernel_thread+0x64/0x6c INFO: Slab 0xc0bd0ba0 objects=19 used=2 fp=0xdec5d0d0 flags=0x0081 INFO: Object 0xdec5dea0 @offset=3744 fp=0x5c200014 Bytes b4 dec5de90: 00 00 00 00 00 00 00 00 5a 5a 5a 5a 5a 5a 5a 5a ........ZZZZZZZZ Object dec5dea0: b0 80 00 0a 84 41 00 0d f0 40 00 00 00 67 3f c0 .....A...@...g?. Object dec5deb0: 00 00 00 50 2c 14 00 50 f8 40 00 00 1e c5 d0 00 ...P,..P.@...... Redzone dec5dec0: 00 00 00 14 .... Padding dec5df68: 5a 5a 5a 5a 5a 5a 5a 5a ZZZZZZZZ Call Trace: [dec65b60] [c00071b4] show_stack+0x4c/0x168 (unreliable) [dec65ba0] [c00d4ec8] check_bytes_and_report+0xe4/0x11c [dec65bd0] [c00d507c] check_object+0x17c/0x23c [dec65bf0] [c0550a00] free_debug_processing+0xf4/0x294 [dec65c20] [c0550bdc] __slab_free+0x3c/0x294 [dec65c80] [c03f0744] ahash_setkey+0x4e0/0x594 [dec65cd0] [c01ef138] test_hash+0x260/0x5a0 [dec65e50] [c01ef4c0] alg_test_hash+0x48/0xb0 [dec65e70] [c01eecc4] alg_test+0x84/0x228 [dec65ee0] [c01ec640] cryptomgr_test+0x4c/0x54 [dec65ef0] [c005adc0] kthread+0x98/0x9c [dec65f40] [c000e1ac] ret_from_kernel_thread+0x64/0x6c FIX dma-kmalloc-32: Restoring 0xdec5dec0-0xdec5dec3=0xcc Change-Id: I0c7a1048053e811025d1c3b487940f87345c8f5d Signed-off-by: Vakul Garg Reviewed-by: Geanta Neag Horia Ioan-B05471 Reviewed-by: Fleming Andrew-AFLEMING Tested-by: Fleming Andrew-AFLEMING Signed-off-by: Herbert Xu Signed-off-by: Greg Kroah-Hartman --- drivers/crypto/caam/caamhash.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/crypto/caam/caamhash.c b/drivers/crypto/caam/caamhash.c index 5996521a1caf..84573b4d6f92 100644 --- a/drivers/crypto/caam/caamhash.c +++ b/drivers/crypto/caam/caamhash.c @@ -429,7 +429,7 @@ static int hash_digest_key(struct caam_hash_ctx *ctx, const u8 *key_in, dma_addr_t src_dma, dst_dma; int ret = 0; - desc = kmalloc(CAAM_CMD_SZ * 6 + CAAM_PTR_SZ * 2, GFP_KERNEL | GFP_DMA); + desc = kmalloc(CAAM_CMD_SZ * 8 + CAAM_PTR_SZ * 2, GFP_KERNEL | GFP_DMA); if (!desc) { dev_err(jrdev, "unable to allocate key input memory\n"); return -ENOMEM; From 2b29b0d2f36f54034b7c038367f6b8e06bdc17ce Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Mon, 15 Jul 2013 14:04:50 +1000 Subject: [PATCH 0328/1368] powerpc/modules: Module CRC relocation fix causes perf issues commit 0e0ed6406e61434d3f38fb58aa8464ec4722b77e upstream. Module CRCs are implemented as absolute symbols that get resolved by a linker script. We build an intermediate .o that contains an unresolved symbol for each CRC. genksysms parses this .o, calculates the CRCs and writes a linker script that "resolves" the symbols to the calculated CRC. Unfortunately the ppc64 relocatable kernel sees these CRCs as symbols that need relocating and relocates them at boot. Commit d4703aef (module: handle ppc64 relocating kcrctabs when CONFIG_RELOCATABLE=y) added a hook to reverse the bogus relocations. Part of this patch created a symbol at 0x0: # head -2 /proc/kallsyms 0000000000000000 T reloc_start c000000000000000 T .__start This reloc_start symbol is causing lots of confusion to perf. It thinks reloc_start is a massive function that stretches from 0x0 to 0xc000000000000000 and we get various cryptic errors out of perf, including: problem incrementing symbol count, skipping event This patch removes the reloc_start linker script label and instead defines it as PHYSICAL_START. We also need to wrap it with CONFIG_PPC64 because the ppc32 kernel can set a non zero PHYSICAL_START at compile time and we wouldn't want to subtract it from the CRCs in that case. Signed-off-by: Anton Blanchard Acked-by: Rusty Russell Signed-off-by: Benjamin Herrenschmidt Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/include/asm/module.h | 5 ++--- arch/powerpc/kernel/vmlinux.lds.S | 3 --- 2 files changed, 2 insertions(+), 6 deletions(-) diff --git a/arch/powerpc/include/asm/module.h b/arch/powerpc/include/asm/module.h index c1df590ec444..49fa55bfbac4 100644 --- a/arch/powerpc/include/asm/module.h +++ b/arch/powerpc/include/asm/module.h @@ -82,10 +82,9 @@ struct exception_table_entry; void sort_ex_table(struct exception_table_entry *start, struct exception_table_entry *finish); -#ifdef CONFIG_MODVERSIONS +#if defined(CONFIG_MODVERSIONS) && defined(CONFIG_PPC64) #define ARCH_RELOCATES_KCRCTAB - -extern const unsigned long reloc_start[]; +#define reloc_start PHYSICAL_START #endif #endif /* __KERNEL__ */ #endif /* _ASM_POWERPC_MODULE_H */ diff --git a/arch/powerpc/kernel/vmlinux.lds.S b/arch/powerpc/kernel/vmlinux.lds.S index 654e479802f2..f096e72262f4 100644 --- a/arch/powerpc/kernel/vmlinux.lds.S +++ b/arch/powerpc/kernel/vmlinux.lds.S @@ -38,9 +38,6 @@ jiffies = jiffies_64 + 4; #endif SECTIONS { - . = 0; - reloc_start = .; - . = KERNELBASE; /* From ce4eb5e3eda7c0b8f2eeb07d83c5857c7b373d8c Mon Sep 17 00:00:00 2001 From: Harshula Jayasuriya Date: Tue, 23 Jul 2013 14:21:35 +1000 Subject: [PATCH 0329/1368] nfsd: nfsd_open: when dentry_open returns an error do not propagate as struct file commit e4daf1ffbe6cc3b12aab4d604e627829e93e9914 upstream. The following call chain: ------------------------------------------------------------ nfs4_get_vfs_file - nfsd_open - dentry_open - do_dentry_open - __get_file_write_access - get_write_access - return atomic_inc_unless_negative(&inode->i_writecount) ? 0 : -ETXTBSY; ------------------------------------------------------------ can result in the following state: ------------------------------------------------------------ struct nfs4_file { ... fi_fds = {0xffff880c1fa65c80, 0xffffffffffffffe6, 0x0}, fi_access = {{ counter = 0x1 }, { counter = 0x0 }}, ... ------------------------------------------------------------ 1) First time around, in nfs4_get_vfs_file() fp->fi_fds[O_WRONLY] is NULL, hence nfsd_open() is called where we get status set to an error and fp->fi_fds[O_WRONLY] to -ETXTBSY. Thus we do not reach nfs4_file_get_access() and fi_access[O_WRONLY] is not incremented. 2) Second time around, in nfs4_get_vfs_file() fp->fi_fds[O_WRONLY] is NOT NULL (-ETXTBSY), so nfsd_open() is NOT called, but nfs4_file_get_access() IS called and fi_access[O_WRONLY] is incremented. Thus we leave a landmine in the form of the nfs4_file data structure in an incorrect state. 3) Eventually, when __nfs4_file_put_access() is called it finds fi_access[O_WRONLY] being non-zero, it decrements it and calls nfs4_file_put_fd() which tries to fput -ETXTBSY. ------------------------------------------------------------ ... [exception RIP: fput+0x9] RIP: ffffffff81177fa9 RSP: ffff88062e365c90 RFLAGS: 00010282 RAX: ffff880c2b3d99cc RBX: ffff880c2b3d9978 RCX: 0000000000000002 RDX: dead000000100101 RSI: 0000000000000001 RDI: ffffffffffffffe6 RBP: ffff88062e365c90 R8: ffff88041fe797d8 R9: ffff88062e365d58 R10: 0000000000000008 R11: 0000000000000000 R12: 0000000000000001 R13: 0000000000000007 R14: 0000000000000000 R15: 0000000000000000 ORIG_RAX: ffffffffffffffff CS: 0010 SS: 0018 #9 [ffff88062e365c98] __nfs4_file_put_access at ffffffffa0562334 [nfsd] #10 [ffff88062e365cc8] nfs4_file_put_access at ffffffffa05623ab [nfsd] #11 [ffff88062e365ce8] free_generic_stateid at ffffffffa056634d [nfsd] #12 [ffff88062e365d18] release_open_stateid at ffffffffa0566e4b [nfsd] #13 [ffff88062e365d38] nfsd4_close at ffffffffa0567401 [nfsd] #14 [ffff88062e365d88] nfsd4_proc_compound at ffffffffa0557f28 [nfsd] #15 [ffff88062e365dd8] nfsd_dispatch at ffffffffa054543e [nfsd] #16 [ffff88062e365e18] svc_process_common at ffffffffa04ba5a4 [sunrpc] #17 [ffff88062e365e98] svc_process at ffffffffa04babe0 [sunrpc] #18 [ffff88062e365eb8] nfsd at ffffffffa0545b62 [nfsd] #19 [ffff88062e365ee8] kthread at ffffffff81090886 #20 [ffff88062e365f48] kernel_thread at ffffffff8100c14a ------------------------------------------------------------ Signed-off-by: Harshula Jayasuriya Signed-off-by: J. Bruce Fields Signed-off-by: Greg Kroah-Hartman --- fs/nfsd/vfs.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index 84ce601d8063..baf149a85263 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -802,9 +802,10 @@ nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, umode_t type, flags = O_WRONLY|O_LARGEFILE; } *filp = dentry_open(&path, flags, current_cred()); - if (IS_ERR(*filp)) + if (IS_ERR(*filp)) { host_err = PTR_ERR(*filp); - else { + *filp = NULL; + } else { host_err = ima_file_check(*filp, may_flags); if (may_flags & NFSD_MAY_64BIT_COOKIE) From 0ac7e44316a38c618afb851e05e0b71d8c83fef7 Mon Sep 17 00:00:00 2001 From: "K. Y. Srinivasan" Date: Thu, 11 Jul 2013 12:03:31 -0700 Subject: [PATCH 0330/1368] Tools: hv: KVP: Fix a bug in IPV6 subnet enumeration commit ed4bb9744b41d39ba35080c2390e201575121dc7 upstream. Each subnet string needs to be separated with a semicolon. Fix this bug. Signed-off-by: K. Y. Srinivasan Signed-off-by: Greg Kroah-Hartman --- tools/hv/hv_kvp_daemon.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tools/hv/hv_kvp_daemon.c b/tools/hv/hv_kvp_daemon.c index 5a1f6489d185..274e17867a3a 100644 --- a/tools/hv/hv_kvp_daemon.c +++ b/tools/hv/hv_kvp_daemon.c @@ -1016,9 +1016,10 @@ kvp_get_ip_info(int family, char *if_name, int op, if (sn_offset == 0) strcpy(sn_str, cidr_mask); - else + else { + strcat((char *)ip_buffer->sub_net, ";"); strcat(sn_str, cidr_mask); - strcat((char *)ip_buffer->sub_net, ";"); + } sn_offset += strlen(sn_str) + 1; } From 7b7295e5a040abf574110c14de67e6b463a1d921 Mon Sep 17 00:00:00 2001 From: "K. Y. Srinivasan" Date: Sun, 14 Jul 2013 22:38:11 -0700 Subject: [PATCH 0331/1368] Drivers: hv: balloon: Fix a bug in the hot-add code commit ed07ec93e83ec471d365ce084e43ad90fd205903 upstream. As we hot-add 128 MB chunks of memory, we wait to ensure that the memory is onlined before attempting to hot-add the next chunk. If the udev rule for memory hot-add is not executed within the allowed time, we would rollback the state and abort further hot-add. Since the hot-add has succeeded and the only failure is that the memory is not onlined within the allowed time, we should not be rolling back the state. Fix this bug. Signed-off-by: K. Y. Srinivasan Acked-by: Jason Wang Signed-off-by: Greg Kroah-Hartman --- drivers/hv/hv_balloon.c | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/drivers/hv/hv_balloon.c b/drivers/hv/hv_balloon.c index 4c605c70ebf9..61b7351df1d4 100644 --- a/drivers/hv/hv_balloon.c +++ b/drivers/hv/hv_balloon.c @@ -562,7 +562,7 @@ static void hv_mem_hot_add(unsigned long start, unsigned long size, struct hv_hotadd_state *has) { int ret = 0; - int i, nid, t; + int i, nid; unsigned long start_pfn; unsigned long processed_pfn; unsigned long total_pfn = pfn_count; @@ -607,14 +607,11 @@ static void hv_mem_hot_add(unsigned long start, unsigned long size, /* * Wait for the memory block to be onlined. + * Since the hot add has succeeded, it is ok to + * proceed even if the pages in the hot added region + * have not been "onlined" within the allowed time. */ - t = wait_for_completion_timeout(&dm_device.ol_waitevent, 5*HZ); - if (t == 0) { - pr_info("hot_add memory timedout\n"); - has->ha_end_pfn -= HA_CHUNK; - has->covered_end_pfn -= processed_pfn; - break; - } + wait_for_completion_timeout(&dm_device.ol_waitevent, 5*HZ); } From e39992ecaa7230126f96285a19493a9883294fdf Mon Sep 17 00:00:00 2001 From: "K. Y. Srinivasan" Date: Sun, 14 Jul 2013 22:38:12 -0700 Subject: [PATCH 0332/1368] Drivers: hv: balloon: Do not post pressure status if interrupted commit c5e2254f8d63a6654149aa32ac5f2b7dd66a976d upstream. When we are posting pressure status, we may get interrupted and handle the un-balloon operation. In this case just don't post the status as we know the pressure status is stale. Signed-off-by: K. Y. Srinivasan Signed-off-by: Greg Kroah-Hartman --- drivers/hv/hv_balloon.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/hv/hv_balloon.c b/drivers/hv/hv_balloon.c index 61b7351df1d4..deb5c25305af 100644 --- a/drivers/hv/hv_balloon.c +++ b/drivers/hv/hv_balloon.c @@ -975,6 +975,14 @@ static void post_status(struct hv_dynmem_device *dm) dm->num_pages_ballooned + compute_balloon_floor(); + /* + * If our transaction ID is no longer current, just don't + * send the status. This can happen if we were interrupted + * after we picked our transaction ID. + */ + if (status.hdr.trans_id != atomic_read(&trans_id)) + return; + vmbus_sendpacket(dm->dev->channel, &status, sizeof(struct dm_status), (unsigned long)NULL, From 8acd5b1eaaf5b06ca149e697ed2a017a748bcd57 Mon Sep 17 00:00:00 2001 From: Daniel Mack Date: Thu, 4 Jul 2013 13:11:03 +0200 Subject: [PATCH 0333/1368] regmap: cache: bail in regmap_async_complete() for bus-less maps commit f2e055e7c9c6084bfbaa68701e52562acf96419e upstream. Commit f8bd822cb ("regmap: cache: Factor out block sync") made regcache_rbtree_sync() call regmap_async_complete(), which in turn does not check for map->bus before dereferencing it. This causes a NULL pointer dereference on bus-less maps. Signed-off-by: Daniel Mack Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- drivers/base/regmap/regmap.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/base/regmap/regmap.c b/drivers/base/regmap/regmap.c index a941dcfe7590..d0c81d1f409c 100644 --- a/drivers/base/regmap/regmap.c +++ b/drivers/base/regmap/regmap.c @@ -1717,7 +1717,7 @@ int regmap_async_complete(struct regmap *map) int ret; /* Nothing to do with no async support */ - if (!map->bus->async_write) + if (!map->bus || !map->bus->async_write) return 0; trace_regmap_async_complete_start(map->dev); From 9d80c9e6f3345b343b9a095e8591f7c8a164b2c0 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Mon, 8 Jul 2013 02:01:53 +0200 Subject: [PATCH 0334/1368] ACPI / scan: Always call acpi_bus_scan() for bus check notifications commit 8832f7e43fa7f0f19bd54e13766a825dd1ed4d6f upstream. An ACPI_NOTIFY_BUS_CHECK notification means that we should scan the entire namespace starting from the given handle even if the device represented by that handle is present (other devices below it may just have appeared). For this reason, modify acpi_scan_bus_device_check() to always run acpi_bus_scan() if the notification being handled is of type ACPI_NOTIFY_BUS_CHECK. Signed-off-by: Rafael J. Wysocki Acked-by: Toshi Kani Signed-off-by: Greg Kroah-Hartman --- drivers/acpi/scan.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c index 14807e53ccae..966bcba529ec 100644 --- a/drivers/acpi/scan.c +++ b/drivers/acpi/scan.c @@ -237,10 +237,12 @@ static void acpi_scan_bus_device_check(acpi_handle handle, u32 ost_source) mutex_lock(&acpi_scan_lock); - acpi_bus_get_device(handle, &device); - if (device) { - dev_warn(&device->dev, "Attempt to re-insert\n"); - goto out; + if (ost_source != ACPI_NOTIFY_BUS_CHECK) { + acpi_bus_get_device(handle, &device); + if (device) { + dev_warn(&device->dev, "Attempt to re-insert\n"); + goto out; + } } acpi_evaluate_hotplug_ost(handle, ost_source, ACPI_OST_SC_INSERT_IN_PROGRESS, NULL); From 85471df478e22dd96ca4756789abc795683addc9 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Fri, 12 Jul 2013 13:45:59 +0200 Subject: [PATCH 0335/1368] ACPI / scan: Do not try to attach scan handlers to devices having them commit 3a391a39593b48341f0908511590a6c0e55cc069 upstream. In acpi_bus_device_attach(), if there is an ACPI device object for the given handle and that device object has a scan handler attached to it already, there's nothing more to do for that handle. Moreover, if acpi_scan_attach_handler() is called then, it may execute the .attach() callback of the ACPI scan handler already attached to the device object and that may lead to interesting breakage. For this reason, make acpi_bus_device_attach() return success immediately when the handle's device object has a scan handler attached to it. Reported-by: Toshi Kani Signed-off-by: Rafael J. Wysocki Acked-by: Toshi Kani Signed-off-by: Greg Kroah-Hartman --- drivers/acpi/scan.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c index 966bcba529ec..af658b2ff279 100644 --- a/drivers/acpi/scan.c +++ b/drivers/acpi/scan.c @@ -1892,6 +1892,9 @@ static acpi_status acpi_bus_device_attach(acpi_handle handle, u32 lvl_not_used, if (acpi_bus_get_device(handle, &device)) return AE_CTRL_DEPTH; + if (device->handler) + return AE_OK; + ret = acpi_scan_attach_handler(device); if (ret) return ret > 0 ? AE_OK : AE_CTRL_DEPTH; From bf59e1292e96b3dd4a2c58bbe8ac21e24e71dc42 Mon Sep 17 00:00:00 2001 From: Toshi Kani Date: Wed, 10 Jul 2013 10:47:13 -0600 Subject: [PATCH 0336/1368] ACPI / memhotplug: Fix a stale pointer in error path commit d19f503e22316a84c39bc19445e0e4fdd49b3532 upstream. device->driver_data needs to be cleared when releasing its data, mem_device, in an error path of acpi_memory_device_add(). The function evaluates the _CRS of memory device objects, and fails when it gets an unexpected resource or cannot allocate memory. A kernel crash or data corruption may occur when the kernel accesses the stale pointer. Signed-off-by: Toshi Kani Reviewed-by: Yasuaki Ishimatsu Signed-off-by: Rafael J. Wysocki Signed-off-by: Greg Kroah-Hartman --- drivers/acpi/acpi_memhotplug.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/acpi/acpi_memhotplug.c b/drivers/acpi/acpi_memhotplug.c index 5e6301e94920..2cf0244d0ee9 100644 --- a/drivers/acpi/acpi_memhotplug.c +++ b/drivers/acpi/acpi_memhotplug.c @@ -283,6 +283,7 @@ static int acpi_memory_device_add(struct acpi_device *device, /* Get the range from the _CRS */ result = acpi_memory_get_device_resources(mem_device); if (result) { + device->driver_data = NULL; kfree(mem_device); return result; } From f03519471a796be1c6d2aa9871928139828550c5 Mon Sep 17 00:00:00 2001 From: Lan Tianyu Date: Tue, 16 Jul 2013 10:07:21 +0800 Subject: [PATCH 0337/1368] ACPI / video: ignore BIOS initial backlight value for Fujitsu E753 commit 9657a565a476d517451c10b0bcc106e300785aff upstream. The BIOS of FUjitsu E753 reports an incorrect initial backlight value for WIN8 compatible OS, causing backlight to be dark during startup. This change causes the incorrect initial value from BIOS to be ignored. References: https://bugzilla.kernel.org/show_bug.cgi?id=60161 Reported-and-tested-by: Jan Hinnerk Stosch Signed-off-by: Lan Tianyu Signed-off-by: Rafael J. Wysocki Signed-off-by: Greg Kroah-Hartman --- drivers/acpi/video.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/acpi/video.c b/drivers/acpi/video.c index 440eadf2d32c..0e4b96b62c75 100644 --- a/drivers/acpi/video.c +++ b/drivers/acpi/video.c @@ -448,6 +448,14 @@ static struct dmi_system_id video_dmi_table[] __initdata = { DMI_MATCH(DMI_PRODUCT_NAME, "HP Folio 13 - 2000 Notebook PC"), }, }, + { + .callback = video_ignore_initial_backlight, + .ident = "Fujitsu E753", + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "FUJITSU"), + DMI_MATCH(DMI_PRODUCT_NAME, "LIFEBOOK E753"), + }, + }, { .callback = video_ignore_initial_backlight, .ident = "HP Pavilion dm4", From 565ff95cdc72f76d53d4e98d5e84254368b9a041 Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Wed, 10 Jul 2013 23:41:15 +0100 Subject: [PATCH 0338/1368] dm mpath: fix ioctl deadlock when no paths commit 6c182cd88d179cbbd06f4f8a8a19b6977940753f upstream. When multipath needs to retry an ioctl the reference to the current live table needs to be dropped. Otherwise a deadlock occurs when all paths are down: - dm_blk_ioctl takes a reference to the current table and spins in multipath_ioctl(). - A new table is being loaded, but upon resume the process hangs in dm_table_destroy() waiting for references to drop to zero. With this patch the reference to the old table is dropped prior to retry, thereby avoiding the deadlock. Signed-off-by: Hannes Reinecke Cc: Mike Snitzer Signed-off-by: Alasdair G Kergon Signed-off-by: Greg Kroah-Hartman --- drivers/md/dm-mpath.c | 8 ++------ drivers/md/dm.c | 9 ++++++++- 2 files changed, 10 insertions(+), 7 deletions(-) diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c index bdf26f5bd326..5adede17ddf6 100644 --- a/drivers/md/dm-mpath.c +++ b/drivers/md/dm-mpath.c @@ -1561,7 +1561,6 @@ static int multipath_ioctl(struct dm_target *ti, unsigned int cmd, unsigned long flags; int r; -again: bdev = NULL; mode = 0; r = 0; @@ -1579,7 +1578,7 @@ static int multipath_ioctl(struct dm_target *ti, unsigned int cmd, } if ((pgpath && m->queue_io) || (!pgpath && m->queue_if_no_path)) - r = -EAGAIN; + r = -ENOTCONN; else if (!bdev) r = -EIO; @@ -1591,11 +1590,8 @@ static int multipath_ioctl(struct dm_target *ti, unsigned int cmd, if (!r && ti->len != i_size_read(bdev->bd_inode) >> SECTOR_SHIFT) r = scsi_verify_blk_ioctl(NULL, cmd); - if (r == -EAGAIN && !fatal_signal_pending(current)) { + if (r == -ENOTCONN && !fatal_signal_pending(current)) queue_work(kmultipathd, &m->process_queued_ios); - msleep(10); - goto again; - } return r ? : __blkdev_driver_ioctl(bdev, mode, cmd, arg); } diff --git a/drivers/md/dm.c b/drivers/md/dm.c index d5370a94b2c1..33f20103d8d5 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -386,10 +386,12 @@ static int dm_blk_ioctl(struct block_device *bdev, fmode_t mode, unsigned int cmd, unsigned long arg) { struct mapped_device *md = bdev->bd_disk->private_data; - struct dm_table *map = dm_get_live_table(md); + struct dm_table *map; struct dm_target *tgt; int r = -ENOTTY; +retry: + map = dm_get_live_table(md); if (!map || !dm_table_get_size(map)) goto out; @@ -410,6 +412,11 @@ static int dm_blk_ioctl(struct block_device *bdev, fmode_t mode, out: dm_table_put(map); + if (r == -ENOTCONN) { + msleep(10); + goto retry; + } + return r; } From ba0e6dd5e64bbf820669f7451ac03f4f11cd64f7 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Wed, 10 Jul 2013 23:41:15 +0100 Subject: [PATCH 0339/1368] dm ioctl: set noio flag to avoid __vmalloc deadlock commit 1c0e883e86ece31880fac2f84b260545d66a39e0 upstream. Set noio flag while calling __vmalloc() because it doesn't fully respect gfp flags to avoid a possible deadlock (see commit 502624bdad3dba45dfaacaf36b7d83e39e74b2d2). This should be backported to stable kernels 3.8 and newer. The kernel 3.8 doesn't have memalloc_noio_save(), so we should set and restore process flag PF_MEMALLOC instead. Signed-off-by: Mikulas Patocka Signed-off-by: Alasdair G Kergon Signed-off-by: Greg Kroah-Hartman --- drivers/md/dm-ioctl.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/md/dm-ioctl.c b/drivers/md/dm-ioctl.c index aa04f0224642..81a79b739e97 100644 --- a/drivers/md/dm-ioctl.c +++ b/drivers/md/dm-ioctl.c @@ -1644,7 +1644,10 @@ static int copy_params(struct dm_ioctl __user *user, struct dm_ioctl *param_kern } if (!dmi) { + unsigned noio_flag; + noio_flag = memalloc_noio_save(); dmi = __vmalloc(param_kernel->data_size, GFP_NOIO | __GFP_REPEAT | __GFP_HIGH, PAGE_KERNEL); + memalloc_noio_restore(noio_flag); if (dmi) *param_flags |= DM_PARAMS_VMALLOC; } From dabd2582219f0b1b1724a42e7be4045fa3686598 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Wed, 10 Jul 2013 23:41:16 +0100 Subject: [PATCH 0340/1368] dm verity: fix inability to use a few specific devices sizes commit b1bf2de07271932326af847a3c6a01fdfd29d4be upstream. Fix a boundary condition that caused failure for certain device sizes. The problem is reported at http://code.google.com/p/cryptsetup/issues/detail?id=160 For certain device sizes the number of hashes at a specific level was calculated incorrectly. It happens for example for a device with data and metadata block size 4096 that has 16385 blocks and algorithm sha256. The user can test if he is affected by this bug by running the "veritysetup verify" command and also by activating the dm-verity kernel driver and reading the whole block device. If it passes without an error, then the user is not affected. The condition for the bug is: Split the total number of data blocks (data_block_bits) into bit strings, each string has hash_per_block_bits bits. hash_per_block_bits is rounddown(log2(metadata_block_size/hash_digest_size)). Equivalently, you can say that you convert data_blocks_bits to 2^hash_per_block_bits base. If there some zero bit string below the most significant bit string and at least one bit below this zero bit string is set, then the bug happens. The same bug exists in the userspace veritysetup tool, so you must use fixed veritysetup too if you want to use devices that are affected by this boundary condition. Signed-off-by: Mikulas Patocka Cc: Milan Broz Signed-off-by: Alasdair G Kergon Signed-off-by: Greg Kroah-Hartman --- drivers/md/dm-verity.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/md/dm-verity.c b/drivers/md/dm-verity.c index b948fd864d45..0d2e812d424b 100644 --- a/drivers/md/dm-verity.c +++ b/drivers/md/dm-verity.c @@ -831,9 +831,8 @@ static int verity_ctr(struct dm_target *ti, unsigned argc, char **argv) for (i = v->levels - 1; i >= 0; i--) { sector_t s; v->hash_level_block[i] = hash_position; - s = verity_position_at_level(v, v->data_blocks, i); - s = (s >> v->hash_per_block_bits) + - !!(s & ((1 << v->hash_per_block_bits) - 1)); + s = (v->data_blocks + ((sector_t)1 << ((i + 1) * v->hash_per_block_bits)) - 1) + >> ((i + 1) * v->hash_per_block_bits); if (hash_position + s < hash_position) { ti->error = "Hash device offset overflow"; r = -E2BIG; From 1b4eafabcef5a74ccbc6edd201353cf23046a5a5 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Mon, 8 Jul 2013 18:16:56 -0400 Subject: [PATCH 0341/1368] drm/radeon/hdmi: make sure we have an afmt block assigned commit c2b4cacfe9816c1fe378c785ce8a678cf0635ec6 upstream. Prevents a segfault if an afmt block is not assigned to the encoder such as in the LVDS or eDP case. Fixes: https://bugs.freedesktop.org/show_bug.cgi?id=66714 Signed-off-by: Alex Deucher Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/radeon/evergreen_hdmi.c | 6 ++++++ drivers/gpu/drm/radeon/r600_hdmi.c | 6 ++++++ 2 files changed, 12 insertions(+) diff --git a/drivers/gpu/drm/radeon/evergreen_hdmi.c b/drivers/gpu/drm/radeon/evergreen_hdmi.c index b9c6f7675e59..b0d3fb341417 100644 --- a/drivers/gpu/drm/radeon/evergreen_hdmi.c +++ b/drivers/gpu/drm/radeon/evergreen_hdmi.c @@ -177,6 +177,9 @@ void evergreen_hdmi_setmode(struct drm_encoder *encoder, struct drm_display_mode uint32_t offset; ssize_t err; + if (!dig || !dig->afmt) + return; + /* Silent, r600_hdmi_enable will raise WARN for us */ if (!dig->afmt->enabled) return; @@ -280,6 +283,9 @@ void evergreen_hdmi_enable(struct drm_encoder *encoder, bool enable) struct radeon_encoder *radeon_encoder = to_radeon_encoder(encoder); struct radeon_encoder_atom_dig *dig = radeon_encoder->enc_priv; + if (!dig || !dig->afmt) + return; + /* Silent, r600_hdmi_enable will raise WARN for us */ if (enable && dig->afmt->enabled) return; diff --git a/drivers/gpu/drm/radeon/r600_hdmi.c b/drivers/gpu/drm/radeon/r600_hdmi.c index e73b2a73494a..f48240bb8c56 100644 --- a/drivers/gpu/drm/radeon/r600_hdmi.c +++ b/drivers/gpu/drm/radeon/r600_hdmi.c @@ -266,6 +266,9 @@ void r600_hdmi_setmode(struct drm_encoder *encoder, struct drm_display_mode *mod uint32_t offset; ssize_t err; + if (!dig || !dig->afmt) + return; + /* Silent, r600_hdmi_enable will raise WARN for us */ if (!dig->afmt->enabled) return; @@ -448,6 +451,9 @@ void r600_hdmi_enable(struct drm_encoder *encoder, bool enable) struct radeon_encoder_atom_dig *dig = radeon_encoder->enc_priv; u32 hdmi = HDMI0_ERROR_ACK; + if (!dig || !dig->afmt) + return; + /* Silent, r600_hdmi_enable will raise WARN for us */ if (enable && dig->afmt->enabled) return; From c35486752501196044bbc4a1c61c43f39b17c2ff Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Fri, 12 Jul 2013 10:05:47 +0200 Subject: [PATCH 0342/1368] drm/radeon: fix UVD fence emit MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit c9a6ca4abd5f1978ef15b3ece3474f4372ae5fe7 upstream. Currently doesn't matter cause we allocate the fence in the lower 265MB anyway. Reported-by: Frank Huang Signed-off-by: Christian König Signed-off-by: Alex Deucher Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/radeon/r600.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/radeon/r600.c b/drivers/gpu/drm/radeon/r600.c index 6948eb88c2b7..b60004e08207 100644 --- a/drivers/gpu/drm/radeon/r600.c +++ b/drivers/gpu/drm/radeon/r600.c @@ -2986,7 +2986,7 @@ void r600_uvd_fence_emit(struct radeon_device *rdev, struct radeon_fence *fence) { struct radeon_ring *ring = &rdev->ring[fence->ring]; - uint32_t addr = rdev->fence_drv[fence->ring].gpu_addr; + uint64_t addr = rdev->fence_drv[fence->ring].gpu_addr; radeon_ring_write(ring, PACKET0(UVD_CONTEXT_ID, 0)); radeon_ring_write(ring, fence->seq); From f92c99de9db693786ffc0f44503eece2bc6a237a Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Fri, 12 Jul 2013 15:46:09 -0400 Subject: [PATCH 0343/1368] drm/radeon: allow selection of alignment in the sub-allocator commit 6c4f978b357bc779c703fda1f200e9179623d3e9 upstream. There are cases where we need more than 4k alignment. No functional change with this commit. Signed-off-by: Alex Deucher Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/radeon/radeon.h | 1 + drivers/gpu/drm/radeon/radeon_gart.c | 1 + drivers/gpu/drm/radeon/radeon_object.h | 2 +- drivers/gpu/drm/radeon/radeon_ring.c | 1 + drivers/gpu/drm/radeon/radeon_sa.c | 7 ++++--- 5 files changed, 8 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h index 142ce6cc69f5..f4dcfdd5a28b 100644 --- a/drivers/gpu/drm/radeon/radeon.h +++ b/drivers/gpu/drm/radeon/radeon.h @@ -408,6 +408,7 @@ struct radeon_sa_manager { uint64_t gpu_addr; void *cpu_ptr; uint32_t domain; + uint32_t align; }; struct radeon_sa_bo; diff --git a/drivers/gpu/drm/radeon/radeon_gart.c b/drivers/gpu/drm/radeon/radeon_gart.c index 43ec4a401f07..5ce190b8bd1f 100644 --- a/drivers/gpu/drm/radeon/radeon_gart.c +++ b/drivers/gpu/drm/radeon/radeon_gart.c @@ -467,6 +467,7 @@ int radeon_vm_manager_init(struct radeon_device *rdev) size *= 2; r = radeon_sa_bo_manager_init(rdev, &rdev->vm_manager.sa_manager, RADEON_GPU_PAGE_ALIGN(size), + RADEON_GPU_PAGE_SIZE, RADEON_GEM_DOMAIN_VRAM); if (r) { dev_err(rdev->dev, "failed to allocate vm bo (%dKB)\n", diff --git a/drivers/gpu/drm/radeon/radeon_object.h b/drivers/gpu/drm/radeon/radeon_object.h index e2cb80a96b51..294382394608 100644 --- a/drivers/gpu/drm/radeon/radeon_object.h +++ b/drivers/gpu/drm/radeon/radeon_object.h @@ -158,7 +158,7 @@ static inline void * radeon_sa_bo_cpu_addr(struct radeon_sa_bo *sa_bo) extern int radeon_sa_bo_manager_init(struct radeon_device *rdev, struct radeon_sa_manager *sa_manager, - unsigned size, u32 domain); + unsigned size, u32 align, u32 domain); extern void radeon_sa_bo_manager_fini(struct radeon_device *rdev, struct radeon_sa_manager *sa_manager); extern int radeon_sa_bo_manager_start(struct radeon_device *rdev, diff --git a/drivers/gpu/drm/radeon/radeon_ring.c b/drivers/gpu/drm/radeon/radeon_ring.c index 82434018cbe8..83f6295a0e5e 100644 --- a/drivers/gpu/drm/radeon/radeon_ring.c +++ b/drivers/gpu/drm/radeon/radeon_ring.c @@ -224,6 +224,7 @@ int radeon_ib_pool_init(struct radeon_device *rdev) } r = radeon_sa_bo_manager_init(rdev, &rdev->ring_tmp_bo, RADEON_IB_POOL_SIZE*64*1024, + RADEON_GPU_PAGE_SIZE, RADEON_GEM_DOMAIN_GTT); if (r) { return r; diff --git a/drivers/gpu/drm/radeon/radeon_sa.c b/drivers/gpu/drm/radeon/radeon_sa.c index 0abe5a9431bb..f0bac68254b7 100644 --- a/drivers/gpu/drm/radeon/radeon_sa.c +++ b/drivers/gpu/drm/radeon/radeon_sa.c @@ -49,7 +49,7 @@ static void radeon_sa_bo_try_free(struct radeon_sa_manager *sa_manager); int radeon_sa_bo_manager_init(struct radeon_device *rdev, struct radeon_sa_manager *sa_manager, - unsigned size, u32 domain) + unsigned size, u32 align, u32 domain) { int i, r; @@ -57,13 +57,14 @@ int radeon_sa_bo_manager_init(struct radeon_device *rdev, sa_manager->bo = NULL; sa_manager->size = size; sa_manager->domain = domain; + sa_manager->align = align; sa_manager->hole = &sa_manager->olist; INIT_LIST_HEAD(&sa_manager->olist); for (i = 0; i < RADEON_NUM_RINGS; ++i) { INIT_LIST_HEAD(&sa_manager->flist[i]); } - r = radeon_bo_create(rdev, size, RADEON_GPU_PAGE_SIZE, true, + r = radeon_bo_create(rdev, size, align, true, domain, NULL, &sa_manager->bo); if (r) { dev_err(rdev->dev, "(%d) failed to allocate bo for manager\n", r); @@ -317,7 +318,7 @@ int radeon_sa_bo_new(struct radeon_device *rdev, unsigned tries[RADEON_NUM_RINGS]; int i, r; - BUG_ON(align > RADEON_GPU_PAGE_SIZE); + BUG_ON(align > sa_manager->align); BUG_ON(size > sa_manager->size); *sa_bo = kmalloc(sizeof(struct radeon_sa_bo), GFP_KERNEL); From f57cb1a2d0e5d88484616285a91560a6f5555e68 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Thu, 18 Jul 2013 11:13:53 -0400 Subject: [PATCH 0344/1368] drm/radeon: fix endian issues with DP handling (v3) commit 34be8c9af7b8728465963740fc11136ae90dfc36 upstream. The atom interpreter expects data in LE format, so swap the message buffer as apprioriate. v2: properly handle non-dw aligned byte counts. v3: properly handle remainder Signed-off-by: Alex Deucher Cc: Dong He Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/radeon/atombios_dp.c | 43 +++++++++++++++++++++++++--- 1 file changed, 39 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/radeon/atombios_dp.c b/drivers/gpu/drm/radeon/atombios_dp.c index 064023bed480..32501f6ec991 100644 --- a/drivers/gpu/drm/radeon/atombios_dp.c +++ b/drivers/gpu/drm/radeon/atombios_dp.c @@ -44,6 +44,41 @@ static char *pre_emph_names[] = { }; /***** radeon AUX functions *****/ + +/* Atom needs data in little endian format + * so swap as appropriate when copying data to + * or from atom. Note that atom operates on + * dw units. + */ +static void radeon_copy_swap(u8 *dst, u8 *src, u8 num_bytes, bool to_le) +{ +#ifdef __BIG_ENDIAN + u8 src_tmp[20], dst_tmp[20]; /* used for byteswapping */ + u32 *dst32, *src32; + int i; + + memcpy(src_tmp, src, num_bytes); + src32 = (u32 *)src_tmp; + dst32 = (u32 *)dst_tmp; + if (to_le) { + for (i = 0; i < ((num_bytes + 3) / 4); i++) + dst32[i] = cpu_to_le32(src32[i]); + memcpy(dst, dst_tmp, num_bytes); + } else { + u8 dws = num_bytes & ~3; + for (i = 0; i < ((num_bytes + 3) / 4); i++) + dst32[i] = le32_to_cpu(src32[i]); + memcpy(dst, dst_tmp, dws); + if (num_bytes % 4) { + for (i = 0; i < (num_bytes % 4); i++) + dst[dws+i] = dst_tmp[dws+i]; + } + } +#else + memcpy(dst, src, num_bytes); +#endif +} + union aux_channel_transaction { PROCESS_AUX_CHANNEL_TRANSACTION_PS_ALLOCATION v1; PROCESS_AUX_CHANNEL_TRANSACTION_PARAMETERS_V2 v2; @@ -65,10 +100,10 @@ static int radeon_process_aux_ch(struct radeon_i2c_chan *chan, base = (unsigned char *)(rdev->mode_info.atom_context->scratch + 1); - memcpy(base, send, send_bytes); + radeon_copy_swap(base, send, send_bytes, true); - args.v1.lpAuxRequest = 0 + 4; - args.v1.lpDataOut = 16 + 4; + args.v1.lpAuxRequest = cpu_to_le16((u16)(0 + 4)); + args.v1.lpDataOut = cpu_to_le16((u16)(16 + 4)); args.v1.ucDataOutLen = 0; args.v1.ucChannelID = chan->rec.i2c_id; args.v1.ucDelay = delay / 10; @@ -102,7 +137,7 @@ static int radeon_process_aux_ch(struct radeon_i2c_chan *chan, recv_bytes = recv_size; if (recv && recv_size) - memcpy(recv, base + 16, recv_bytes); + radeon_copy_swap(recv, base + 16, recv_bytes, false); return recv_bytes; } From d19ccc405c87c8ca17ea3e1091d6a6eb2d9ac674 Mon Sep 17 00:00:00 2001 From: Ondrej Zary Date: Fri, 19 Jul 2013 21:08:48 +0200 Subject: [PATCH 0345/1368] drm/radeon: Another card with wrong primary dac adj commit f7929f34fa0e0bb6736a2484fdc07d77a1653081 upstream. Hello, got another card with "too bright" problem: Sapphire Radeon VE 7000 DDR (VGA+S-Video) lspci -vnn: 01:00.0 VGA compatible controller [0300]: Advanced Micro Devices [AMD] nee ATI RV100 QY [Radeon 7000/VE] [1002:5159] (prog-if 00 [VGA controller]) Subsystem: PC Partner Limited Sapphire Radeon VE 7000 DDR [174b:7c28] The patch below fixes the problem for this card. But I don't like the blacklist, couldn't some heuristic be used instead? The interesting thing is that the manufacturer is the same as the other card needing the same quirk. I wonder how many different types are broken this way. The "wrong" ps2_pdac_adj value that comes from BIOS on this card is 0x300. ==================== drm/radeon: Add primary dac adj quirk for Sapphire Radeon VE 7000 DDR Values from BIOS are wrong, causing too bright colors. Use default values instead. Signed-off-by: Ondrej Zary Signed-off-by: Alex Deucher Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/radeon/radeon_combios.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/radeon/radeon_combios.c b/drivers/gpu/drm/radeon/radeon_combios.c index 78edadc9e86b..8528b81cd64f 100644 --- a/drivers/gpu/drm/radeon/radeon_combios.c +++ b/drivers/gpu/drm/radeon/radeon_combios.c @@ -971,10 +971,14 @@ struct radeon_encoder_primary_dac *radeon_combios_get_primary_dac_info(struct } /* quirks */ - /* Radeon 9100 (R200) */ - if ((dev->pdev->device == 0x514D) && + /* Radeon 7000 (RV100) */ + if (((dev->pdev->device == 0x5159) && (dev->pdev->subsystem_vendor == 0x174B) && - (dev->pdev->subsystem_device == 0x7149)) { + (dev->pdev->subsystem_device == 0x7c28)) || + /* Radeon 9100 (R200) */ + ((dev->pdev->device == 0x514D) && + (dev->pdev->subsystem_vendor == 0x174B) && + (dev->pdev->subsystem_device == 0x7149))) { /* vbios value is bad, use the default */ found = 0; } From 405a7ccac881355809e07a4f00e0e8528ee7cec5 Mon Sep 17 00:00:00 2001 From: Mark Kettenis Date: Sun, 21 Jul 2013 16:44:09 -0400 Subject: [PATCH 0346/1368] drm/radeon: fix combios tables on older cards commit cef1d00cd56f600121ad121875655ad410a001b8 upstream. Noticed that my old Radeon 7500 hung after printing drm: GPU not posted. posting now... when it wasn't selected as the primary card the BIOS. Some digging revealed that it was hanging in combios_parse_mmio_table() while parsing the ASIC INIT 3 table. Looking at the BIOS ROM for the card, it becomes obvious that there is no ASIC INIT 3 table in the BIOS. The code is just processing random garbage. No surprise it hangs! Why do I say that there is no ASIC INIT 3 table is the BIOS? This table is found through the MISC INFO table. The MISC INFO table can be found at offset 0x5e in the COMBIOS header. But the header is smaller than that. The COMBIOS header starts at offset 0x126. The standard PCI Data Structure (the bit that starts with 'PCIR') lives at offset 0x180. That means that the COMBIOS header can not be larger than 0x5a bytes and therefore cannot contain a MISC INFO table. I looked at a dozen or so BIOS images, some my own, some downloaded from: It is fairly obvious that the size of the COMBIOS header can be found at offset 0x6 of the header. Not sure if it is a 16-bit number or just an 8-bit number, but that doesn't really matter since the tables seems to be always smaller than 256 bytes. So I think combios_get_table_offset() should check if the requested table is present. This can be done by checking the offset against the size of the header. See the diff below. The diff is against the WIP OpenBSD codebase that roughly corresponds to Linux 3.8.13 at this point. But I don't think this bit of the code changed much since then. For what it is worth: Signed-off-by: Mark Kettenis Signed-off-by: Alex Deucher Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/radeon/radeon_combios.c | 145 +++++++----------------- 1 file changed, 41 insertions(+), 104 deletions(-) diff --git a/drivers/gpu/drm/radeon/radeon_combios.c b/drivers/gpu/drm/radeon/radeon_combios.c index 8528b81cd64f..94303840f650 100644 --- a/drivers/gpu/drm/radeon/radeon_combios.c +++ b/drivers/gpu/drm/radeon/radeon_combios.c @@ -147,7 +147,7 @@ static uint16_t combios_get_table_offset(struct drm_device *dev, enum radeon_combios_table_offset table) { struct radeon_device *rdev = dev->dev_private; - int rev; + int rev, size; uint16_t offset = 0, check_offset; if (!rdev->bios) @@ -156,174 +156,106 @@ static uint16_t combios_get_table_offset(struct drm_device *dev, switch (table) { /* absolute offset tables */ case COMBIOS_ASIC_INIT_1_TABLE: - check_offset = RBIOS16(rdev->bios_header_start + 0xc); - if (check_offset) - offset = check_offset; + check_offset = 0xc; break; case COMBIOS_BIOS_SUPPORT_TABLE: - check_offset = RBIOS16(rdev->bios_header_start + 0x14); - if (check_offset) - offset = check_offset; + check_offset = 0x14; break; case COMBIOS_DAC_PROGRAMMING_TABLE: - check_offset = RBIOS16(rdev->bios_header_start + 0x2a); - if (check_offset) - offset = check_offset; + check_offset = 0x2a; break; case COMBIOS_MAX_COLOR_DEPTH_TABLE: - check_offset = RBIOS16(rdev->bios_header_start + 0x2c); - if (check_offset) - offset = check_offset; + check_offset = 0x2c; break; case COMBIOS_CRTC_INFO_TABLE: - check_offset = RBIOS16(rdev->bios_header_start + 0x2e); - if (check_offset) - offset = check_offset; + check_offset = 0x2e; break; case COMBIOS_PLL_INFO_TABLE: - check_offset = RBIOS16(rdev->bios_header_start + 0x30); - if (check_offset) - offset = check_offset; + check_offset = 0x30; break; case COMBIOS_TV_INFO_TABLE: - check_offset = RBIOS16(rdev->bios_header_start + 0x32); - if (check_offset) - offset = check_offset; + check_offset = 0x32; break; case COMBIOS_DFP_INFO_TABLE: - check_offset = RBIOS16(rdev->bios_header_start + 0x34); - if (check_offset) - offset = check_offset; + check_offset = 0x34; break; case COMBIOS_HW_CONFIG_INFO_TABLE: - check_offset = RBIOS16(rdev->bios_header_start + 0x36); - if (check_offset) - offset = check_offset; + check_offset = 0x36; break; case COMBIOS_MULTIMEDIA_INFO_TABLE: - check_offset = RBIOS16(rdev->bios_header_start + 0x38); - if (check_offset) - offset = check_offset; + check_offset = 0x38; break; case COMBIOS_TV_STD_PATCH_TABLE: - check_offset = RBIOS16(rdev->bios_header_start + 0x3e); - if (check_offset) - offset = check_offset; + check_offset = 0x3e; break; case COMBIOS_LCD_INFO_TABLE: - check_offset = RBIOS16(rdev->bios_header_start + 0x40); - if (check_offset) - offset = check_offset; + check_offset = 0x40; break; case COMBIOS_MOBILE_INFO_TABLE: - check_offset = RBIOS16(rdev->bios_header_start + 0x42); - if (check_offset) - offset = check_offset; + check_offset = 0x42; break; case COMBIOS_PLL_INIT_TABLE: - check_offset = RBIOS16(rdev->bios_header_start + 0x46); - if (check_offset) - offset = check_offset; + check_offset = 0x46; break; case COMBIOS_MEM_CONFIG_TABLE: - check_offset = RBIOS16(rdev->bios_header_start + 0x48); - if (check_offset) - offset = check_offset; + check_offset = 0x48; break; case COMBIOS_SAVE_MASK_TABLE: - check_offset = RBIOS16(rdev->bios_header_start + 0x4a); - if (check_offset) - offset = check_offset; + check_offset = 0x4a; break; case COMBIOS_HARDCODED_EDID_TABLE: - check_offset = RBIOS16(rdev->bios_header_start + 0x4c); - if (check_offset) - offset = check_offset; + check_offset = 0x4c; break; case COMBIOS_ASIC_INIT_2_TABLE: - check_offset = RBIOS16(rdev->bios_header_start + 0x4e); - if (check_offset) - offset = check_offset; + check_offset = 0x4e; break; case COMBIOS_CONNECTOR_INFO_TABLE: - check_offset = RBIOS16(rdev->bios_header_start + 0x50); - if (check_offset) - offset = check_offset; + check_offset = 0x50; break; case COMBIOS_DYN_CLK_1_TABLE: - check_offset = RBIOS16(rdev->bios_header_start + 0x52); - if (check_offset) - offset = check_offset; + check_offset = 0x52; break; case COMBIOS_RESERVED_MEM_TABLE: - check_offset = RBIOS16(rdev->bios_header_start + 0x54); - if (check_offset) - offset = check_offset; + check_offset = 0x54; break; case COMBIOS_EXT_TMDS_INFO_TABLE: - check_offset = RBIOS16(rdev->bios_header_start + 0x58); - if (check_offset) - offset = check_offset; + check_offset = 0x58; break; case COMBIOS_MEM_CLK_INFO_TABLE: - check_offset = RBIOS16(rdev->bios_header_start + 0x5a); - if (check_offset) - offset = check_offset; + check_offset = 0x5a; break; case COMBIOS_EXT_DAC_INFO_TABLE: - check_offset = RBIOS16(rdev->bios_header_start + 0x5c); - if (check_offset) - offset = check_offset; + check_offset = 0x5c; break; case COMBIOS_MISC_INFO_TABLE: - check_offset = RBIOS16(rdev->bios_header_start + 0x5e); - if (check_offset) - offset = check_offset; + check_offset = 0x5e; break; case COMBIOS_CRT_INFO_TABLE: - check_offset = RBIOS16(rdev->bios_header_start + 0x60); - if (check_offset) - offset = check_offset; + check_offset = 0x60; break; case COMBIOS_INTEGRATED_SYSTEM_INFO_TABLE: - check_offset = RBIOS16(rdev->bios_header_start + 0x62); - if (check_offset) - offset = check_offset; + check_offset = 0x62; break; case COMBIOS_COMPONENT_VIDEO_INFO_TABLE: - check_offset = RBIOS16(rdev->bios_header_start + 0x64); - if (check_offset) - offset = check_offset; + check_offset = 0x64; break; case COMBIOS_FAN_SPEED_INFO_TABLE: - check_offset = RBIOS16(rdev->bios_header_start + 0x66); - if (check_offset) - offset = check_offset; + check_offset = 0x66; break; case COMBIOS_OVERDRIVE_INFO_TABLE: - check_offset = RBIOS16(rdev->bios_header_start + 0x68); - if (check_offset) - offset = check_offset; + check_offset = 0x68; break; case COMBIOS_OEM_INFO_TABLE: - check_offset = RBIOS16(rdev->bios_header_start + 0x6a); - if (check_offset) - offset = check_offset; + check_offset = 0x6a; break; case COMBIOS_DYN_CLK_2_TABLE: - check_offset = RBIOS16(rdev->bios_header_start + 0x6c); - if (check_offset) - offset = check_offset; + check_offset = 0x6c; break; case COMBIOS_POWER_CONNECTOR_INFO_TABLE: - check_offset = RBIOS16(rdev->bios_header_start + 0x6e); - if (check_offset) - offset = check_offset; + check_offset = 0x6e; break; case COMBIOS_I2C_INFO_TABLE: - check_offset = RBIOS16(rdev->bios_header_start + 0x70); - if (check_offset) - offset = check_offset; + check_offset = 0x70; break; /* relative offset tables */ case COMBIOS_ASIC_INIT_3_TABLE: /* offset from misc info */ @@ -439,11 +371,16 @@ static uint16_t combios_get_table_offset(struct drm_device *dev, } break; default: + check_offset = 0; break; } - return offset; + size = RBIOS8(rdev->bios_header_start + 0x6); + /* check absolute offset tables */ + if (table < COMBIOS_ASIC_INIT_3_TABLE && check_offset && check_offset < size) + offset = RBIOS16(rdev->bios_header_start + check_offset); + return offset; } bool radeon_combios_check_hardcoded_edid(struct radeon_device *rdev) From d54b22e585edd4401786e7c02cd85bc97fff5ffb Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Fri, 19 Jul 2013 17:44:43 -0400 Subject: [PATCH 0347/1368] drm/radeon: improve dac adjust heuristics for legacy pdac commit 03ed8cf9b28d886c64c7e705c7bb1a365fd8fb95 upstream. Hopefully avoid more quirks in the future due to bogus vbios dac data. Signed-off-by: Alex Deucher Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/radeon/radeon_combios.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/radeon/radeon_combios.c b/drivers/gpu/drm/radeon/radeon_combios.c index 94303840f650..68ce36056019 100644 --- a/drivers/gpu/drm/radeon/radeon_combios.c +++ b/drivers/gpu/drm/radeon/radeon_combios.c @@ -902,8 +902,10 @@ struct radeon_encoder_primary_dac *radeon_combios_get_primary_dac_info(struct dac = RBIOS8(dac_info + 0x3) & 0xf; p_dac->ps2_pdac_adj = (bg << 8) | (dac); } - /* if the values are all zeros, use the table */ - if (p_dac->ps2_pdac_adj) + /* if the values are zeros, use the table */ + if ((dac == 0) || (bg == 0)) + found = 0; + else found = 1; } From 5ac4dd137c34860870e0c0438180c2836053eadf Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Fri, 5 Jul 2013 23:39:50 +0200 Subject: [PATCH 0348/1368] drm/i915: fix up ring cleanup for the i830/i845 CS tlb w/a commit aaf8a5167291b65e9116cb8736d862965b57c13a upstream. It's not a good idea to also run the pipe_control cleanup. This regression has been introduced whith the original cs tlb w/a in commit b45305fce5bb1abec263fcff9d81ebecd6306ede Author: Daniel Vetter Date: Mon Dec 17 16:21:27 2012 +0100 drm/i915: Implement workaround for broken CS tlb on i830/845 Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=64610 Cc: Chris Wilson Reviewed-by: Chris Wilson Signed-off-by: Daniel Vetter Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/i915/intel_ringbuffer.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 1d5d613eb6be..1424f2042838 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -490,9 +490,6 @@ cleanup_pipe_control(struct intel_ring_buffer *ring) struct pipe_control *pc = ring->private; struct drm_i915_gem_object *obj; - if (!ring->private) - return; - obj = pc->obj; kunmap(sg_page(obj->pages->sgl)); @@ -500,7 +497,6 @@ cleanup_pipe_control(struct intel_ring_buffer *ring) drm_gem_object_unreference(&obj->base); kfree(pc); - ring->private = NULL; } static int init_render_ring(struct intel_ring_buffer *ring) @@ -571,7 +567,10 @@ static void render_ring_cleanup(struct intel_ring_buffer *ring) if (HAS_BROKEN_CS_TLB(dev)) drm_gem_object_unreference(to_gem_object(ring->private)); - cleanup_pipe_control(ring); + if (INTEL_INFO(dev)->gen >= 5) + cleanup_pipe_control(ring); + + ring->private = NULL; } static void From c33535efc0e1a637a7dbad918e1a0bf5a692dcdd Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 9 Jul 2013 09:22:39 +0100 Subject: [PATCH 0349/1368] drm/i915: Fix write-read race with multiple rings commit 02978ff57a5bdfbf703d2bc5a4d933a53ede3144 upstream. Daniel noticed a problem where is we wrote to an object with ring A in the middle of a very long running batch, then executed a quick batch on ring B before a batch that reads from the same object, its obj->ring would now point to ring B, but its last_write_seqno would be still relative to ring A. This would allow for the user to read from the object before the GPU had completed the write, as set_domain would only check that ring B had passed the last_write_seqno. To fix this simply (and inelegantly), we bump the last_write_seqno when switching rings so that the last_write_seqno is always relative to the current obj->ring. This fixes igt/tests/gem_write_read_ring_switch. Signed-off-by: Chris Wilson Cc: Daniel Vetter [danvet: Add note about the newly created igt which exercises this bug.] Signed-off-by: Daniel Vetter Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/i915/i915_gem.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 34118b0c02d1..890f5c11880b 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -1881,6 +1881,10 @@ i915_gem_object_move_to_active(struct drm_i915_gem_object *obj, u32 seqno = intel_ring_get_seqno(ring); BUG_ON(ring == NULL); + if (obj->ring != ring && obj->last_write_seqno) { + /* Keep the seqno relative to the current ring */ + obj->last_write_seqno = seqno; + } obj->ring = ring; /* Add a reference if we're newly entering the active list. */ From 8cdbac3c4f987bd260009d3a7b8c3c8c37b5acdb Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Tue, 9 Jul 2013 16:00:31 -0700 Subject: [PATCH 0350/1368] Partially revert "drm/i915: unconditionally use mt forcewake on hsw/ivb" commit c11e5f35ab490bd30591563816fbc83526521777 upstream. This patch partially reverts commit 36ec8f877481449bdfa072e6adf2060869e2b970 for IvyBridge CPUs. The original commit results in repeated 'Timed out waiting for forcewake old ack to clear' messages on a Supermicro C7H61 board (BIOS version 2.00 and 2.00b) with i7-3770K CPU. It ultimately results in a hangup if the system is highly loaded. Reverting the commit for IvyBridge CPUs fixes the issue. Issue a warning if the CPU is IvyBridge and mt forcewake is disabled, since this condition can result in secondary issues. v2: Only revert patch for Ivybridge CPUs Issue info message if mt forcewake is disabled on Ivybridge Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=60541 Cc: Jesse Barnes Cc: Daniel Vetter Cc: Mika Kuoppala Signed-off-by: Guenter Roeck Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=66139 Signed-off-by: Daniel Vetter Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/i915/intel_pm.c | 31 ++++++++++++++++++++++++++++++- 1 file changed, 30 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index aa01128ff192..0392c288324b 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -4510,9 +4510,38 @@ void intel_gt_init(struct drm_device *dev) if (IS_VALLEYVIEW(dev)) { dev_priv->gt.force_wake_get = vlv_force_wake_get; dev_priv->gt.force_wake_put = vlv_force_wake_put; - } else if (IS_IVYBRIDGE(dev) || IS_HASWELL(dev)) { + } else if (IS_HASWELL(dev)) { dev_priv->gt.force_wake_get = __gen6_gt_force_wake_mt_get; dev_priv->gt.force_wake_put = __gen6_gt_force_wake_mt_put; + } else if (IS_IVYBRIDGE(dev)) { + u32 ecobus; + + /* IVB configs may use multi-threaded forcewake */ + + /* A small trick here - if the bios hasn't configured + * MT forcewake, and if the device is in RC6, then + * force_wake_mt_get will not wake the device and the + * ECOBUS read will return zero. Which will be + * (correctly) interpreted by the test below as MT + * forcewake being disabled. + */ + mutex_lock(&dev->struct_mutex); + __gen6_gt_force_wake_mt_get(dev_priv); + ecobus = I915_READ_NOTRACE(ECOBUS); + __gen6_gt_force_wake_mt_put(dev_priv); + mutex_unlock(&dev->struct_mutex); + + if (ecobus & FORCEWAKE_MT_ENABLE) { + dev_priv->gt.force_wake_get = + __gen6_gt_force_wake_mt_get; + dev_priv->gt.force_wake_put = + __gen6_gt_force_wake_mt_put; + } else { + DRM_INFO("No MT forcewake available on Ivybridge, this can result in issues\n"); + DRM_INFO("when using vblank-synced partial screen updates.\n"); + dev_priv->gt.force_wake_get = __gen6_gt_force_wake_get; + dev_priv->gt.force_wake_put = __gen6_gt_force_wake_put; + } } else if (IS_GEN6(dev)) { dev_priv->gt.force_wake_get = __gen6_gt_force_wake_get; dev_priv->gt.force_wake_put = __gen6_gt_force_wake_put; From 1177b868c8b99fa8e4d88ef8b9542741b0d60055 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 10 Jul 2013 13:36:23 +0100 Subject: [PATCH 0351/1368] drm/i915: Fix incoherence with fence updates on Sandybridge+ commit d18b9619034230b6f945e215276425636ca401fe upstream. This hopefully fixes the root cause behind the workaround added in commit 25ff1195f8a0b3724541ae7bbe331b4296de9c06 Author: Chris Wilson Date: Thu Apr 4 21:31:03 2013 +0100 drm/i915: Workaround incoherence between fences and LLC across multiple CPUs Thanks to further investigation by Jon Bloomfield, he realised that the 64-bit register might be broken up by the hardware into two 32-bit writes (a problem we have encountered elsewhere). This non-atomicity would then cause an issue where a second thread would see an intermediate register state (new high dword, old low dword), and this register would randomly be used in preference to its own thread register. This would cause the second thread to read from and write into a fairly random tiled location. Breaking the operation into 3 explicit 32-bit updates (first disable the fence, poke the upper bits, then poke the lower bits and enable) ensures that, given proper serialisation between the 32-bit register write and the memory transfer, that the fence value is always consistent. Armed with this knowledge, we can explain how the previous workaround work. The key to the corruption is that a second thread sees an erroneous fence register that conflicts and overrides its own. By serialising the fence update across all CPUs, we have a small window where no GTT access is occurring and so hide the potential corruption. This also leads to the conclusion that the earlier workaround was incomplete. v2: Be overly paranoid about the order in which fence updates become visible to the GPU to make really sure that we turn the fence off before doing the update, and then only switch the fence on afterwards. Signed-off-by: Jon Bloomfield Signed-off-by: Chris Wilson Cc: Daniel Vetter Cc: Carsten Emde Signed-off-by: Daniel Vetter Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/i915/i915_gem.c | 30 ++++++++++++++++++++++++------ 1 file changed, 24 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 890f5c11880b..8dc8186b6c05 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -2538,7 +2538,6 @@ static void i965_write_fence_reg(struct drm_device *dev, int reg, drm_i915_private_t *dev_priv = dev->dev_private; int fence_reg; int fence_pitch_shift; - uint64_t val; if (INTEL_INFO(dev)->gen >= 6) { fence_reg = FENCE_REG_SANDYBRIDGE_0; @@ -2548,8 +2547,23 @@ static void i965_write_fence_reg(struct drm_device *dev, int reg, fence_pitch_shift = I965_FENCE_PITCH_SHIFT; } + fence_reg += reg * 8; + + /* To w/a incoherency with non-atomic 64-bit register updates, + * we split the 64-bit update into two 32-bit writes. In order + * for a partial fence not to be evaluated between writes, we + * precede the update with write to turn off the fence register, + * and only enable the fence as the last step. + * + * For extra levels of paranoia, we make sure each step lands + * before applying the next step. + */ + I915_WRITE(fence_reg, 0); + POSTING_READ(fence_reg); + if (obj) { u32 size = obj->gtt_space->size; + uint64_t val; val = (uint64_t)((obj->gtt_offset + size - 4096) & 0xfffff000) << 32; @@ -2558,12 +2572,16 @@ static void i965_write_fence_reg(struct drm_device *dev, int reg, if (obj->tiling_mode == I915_TILING_Y) val |= 1 << I965_FENCE_TILING_Y_SHIFT; val |= I965_FENCE_REG_VALID; - } else - val = 0; - fence_reg += reg * 8; - I915_WRITE64(fence_reg, val); - POSTING_READ(fence_reg); + I915_WRITE(fence_reg + 4, val >> 32); + POSTING_READ(fence_reg + 4); + + I915_WRITE(fence_reg + 0, val); + POSTING_READ(fence_reg); + } else { + I915_WRITE(fence_reg + 4, 0); + POSTING_READ(fence_reg + 4); + } } static void i915_write_fence_reg(struct drm_device *dev, int reg, From f4332be72bc7a2095a21057e9db2c38bd44486b2 Mon Sep 17 00:00:00 2001 From: Konstantin Khlebnikov Date: Wed, 17 Jul 2013 10:22:58 +0400 Subject: [PATCH 0352/1368] drm/i915: fix long-standing SNB regression in power consumption after resume v2 commit 7dcd2677ea912573d9ed4bcd629b0023b2d11505 upstream. This patch fixes regression in power consumtion of sandy bridge gpu, which exists since v3.6 Sometimes after resuming from s2ram gpu starts thinking that it's extremely busy. After that it never reaches rc6 state. Bug exists since kernel v3.6: commit b4ae3f22d238617ca11610b29fde16cf8c0bc6e0 Author: Jesse Barnes Date: Thu Jun 14 11:04:48 2012 -0700 drm/i915: load boot context at driver init time For some reason RC6 is already enabled at the beginning of resuming process. Following initliaztion breaks some internal state and confuses RPS engine. This patch disables RC6 at the beginnig of resume and initialization. I've rearranged initialization sequence, because intel_disable_gt_powersave() needs initialized force_wake_get/put and some locks from the dev_priv. Note: The culprit in the initialization sequence seems to be the write to MBCTL added in the above mentioned commit. The first version of this patch just held a forcewake reference across the clock gating init functions, which seems to have been enought to gather quite a few positive test reports. But since that smelled a bit like ad-hoc duct-tape v2 now just disables rps/rc6 across the entire hw setup. [danvet: Add note about v1 vs. v2 of this patch and use standard layout for the commit citation. Also add the tested-bys from v1 and a cc: stable.] References https://bugs.freedesktop.org/show_bug.cgi?id=54089 References https://bugzilla.kernel.org/show_bug.cgi?id=58971 References https://patchwork.kernel.org/patch/2827634/ (patch v1) Signed-off-by: Konstantin Khlebnikov Tested-by: Alexander Kaltsas (v1) Tested-by: rocko (v1) Tested-by: JohnMB (v1) Signed-off-by: Daniel Vetter Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/i915/i915_dma.c | 16 ++++++++-------- drivers/gpu/drm/i915/intel_pm.c | 5 +++-- 2 files changed, 11 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c index 3b315ba85a3e..d1ee611d213d 100644 --- a/drivers/gpu/drm/i915/i915_dma.c +++ b/drivers/gpu/drm/i915/i915_dma.c @@ -1511,6 +1511,13 @@ int i915_driver_load(struct drm_device *dev, unsigned long flags) dev_priv->dev = dev; dev_priv->info = info; + spin_lock_init(&dev_priv->irq_lock); + spin_lock_init(&dev_priv->gpu_error.lock); + spin_lock_init(&dev_priv->rps.lock); + mutex_init(&dev_priv->dpio_lock); + mutex_init(&dev_priv->rps.hw_lock); + mutex_init(&dev_priv->modeset_restore_lock); + i915_dump_device_info(dev_priv); if (i915_get_bridge_dev(dev)) { @@ -1602,6 +1609,7 @@ int i915_driver_load(struct drm_device *dev, unsigned long flags) intel_irq_init(dev); intel_gt_init(dev); + intel_gt_reset(dev); /* Try to make sure MCHBAR is enabled before poking at it */ intel_setup_mchbar(dev); @@ -1626,14 +1634,6 @@ int i915_driver_load(struct drm_device *dev, unsigned long flags) if (!IS_I945G(dev) && !IS_I945GM(dev)) pci_enable_msi(dev->pdev); - spin_lock_init(&dev_priv->irq_lock); - spin_lock_init(&dev_priv->gpu_error.lock); - spin_lock_init(&dev_priv->rps.lock); - mutex_init(&dev_priv->dpio_lock); - - mutex_init(&dev_priv->rps.hw_lock); - mutex_init(&dev_priv->modeset_restore_lock); - dev_priv->num_plane = 1; if (IS_VALLEYVIEW(dev)) dev_priv->num_plane = 2; diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 0392c288324b..60d57affc748 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -4497,6 +4497,9 @@ void intel_gt_reset(struct drm_device *dev) if (IS_IVYBRIDGE(dev) || IS_HASWELL(dev)) __gen6_gt_force_wake_mt_reset(dev_priv); } + + /* BIOS often leaves RC6 enabled, but disable it for hw init */ + intel_disable_gt_powersave(dev); } void intel_gt_init(struct drm_device *dev) @@ -4505,8 +4508,6 @@ void intel_gt_init(struct drm_device *dev) spin_lock_init(&dev_priv->gt_lock); - intel_gt_reset(dev); - if (IS_VALLEYVIEW(dev)) { dev_priv->gt.force_wake_get = vlv_force_wake_get; dev_priv->gt.force_wake_put = vlv_force_wake_put; From 6f973258b047d2913abe2bd1aad95f21cb47d5f6 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 17 Jul 2013 12:14:40 +0100 Subject: [PATCH 0353/1368] drm/i915: Fix dereferencing invalid connectors in is_crtc_connector_off() commit 2e57f47d317dd035b18634b0c602272529368fcc upstream. In commit e3de42b68478a8c95dd27520e9adead2af9477a5 Author: Imre Deak Date: Fri May 3 19:44:07 2013 +0200 drm/i915: force full modeset if the connector is in DPMS OFF mode a new function was added that walked over the set of connectors to see if any of the currently associated CRTC was switched off. This function walked an array of connectors, rather than the array of pointers to connectors contained in the drm_mode_set - i.e. it was dereferencing far past the end of the first connector. This only becomes an issue if we attempt to use a clone mode (i.e. more than one connector per CRTC) such that set->num_connectors > 1. Reported-by: Timo Aaltonen Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=65927 Signed-off-by: Chris Wilson Cc: Imre Deak Cc: Egbert Eich Cc: Jesse Barnes Cc: Daniel Vetter Signed-off-by: Daniel Vetter Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/i915/intel_display.c | 23 +++++++++++++---------- 1 file changed, 13 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 56746dcac40f..d7ff3afbf615 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -8146,15 +8146,20 @@ static void intel_set_config_restore_state(struct drm_device *dev, } static bool -is_crtc_connector_off(struct drm_crtc *crtc, struct drm_connector *connectors, - int num_connectors) +is_crtc_connector_off(struct drm_mode_set *set) { int i; - for (i = 0; i < num_connectors; i++) - if (connectors[i].encoder && - connectors[i].encoder->crtc == crtc && - connectors[i].dpms != DRM_MODE_DPMS_ON) + if (set->num_connectors == 0) + return false; + + if (WARN_ON(set->connectors == NULL)) + return false; + + for (i = 0; i < set->num_connectors; i++) + if (set->connectors[i]->encoder && + set->connectors[i]->encoder->crtc == set->crtc && + set->connectors[i]->dpms != DRM_MODE_DPMS_ON) return true; return false; @@ -8167,10 +8172,8 @@ intel_set_config_compute_mode_changes(struct drm_mode_set *set, /* We should be able to check here if the fb has the same properties * and then just flip_or_move it */ - if (set->connectors != NULL && - is_crtc_connector_off(set->crtc, *set->connectors, - set->num_connectors)) { - config->mode_changed = true; + if (is_crtc_connector_off(set)) { + config->mode_changed = true; } else if (set->crtc->fb != set->fb) { /* If we have no fb then treat it as a full mode set */ if (set->crtc->fb == NULL) { From 19a280cac37e30243023a7f53651504a135ac960 Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Wed, 17 Jul 2013 14:51:28 +0200 Subject: [PATCH 0354/1368] drm/i915: correctly restore fences with objects attached MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 94a335dba34ff47cad3d6d0c29b452d43a1be3c8 upstream. To avoid stalls we delay tiling changes and especially hold of committing the new fence state for as long as possible. Synchronization points are in the execbuf code and in our gtt fault handler. Unfortunately we've missed that tricky detail when adding proper fence restore code in commit 19b2dbde5732170a03bd82cc8bd442cf88d856f7 Author: Chris Wilson Date: Wed Jun 12 10:15:12 2013 +0100 drm/i915: Restore fences after resume and GPU resets The result was that we've restored fences for objects with no tiling, since the object<->fence link still existed after resume. Now that wouldn't have been too bad since any subsequent access would have fixed things up, but if we've changed from tiled to untiled real havoc happened: The tiling stride is stored -1 in the fence register, so a stride of 0 resulted in all 1s in the top 32bits, and so a completely bogus fence spanning everything from the start of the object to the top of the GTT. The tell-tale in the register dumps looks like: FENCE START 2: 0x0214d001 FENCE END 2: 0xfffff3ff Bit 11 isn't set since the hw doesn't store it, even when writing all 1s (at least on my snb here). To prevent such a gaffle in the future add a sanity check for fences with an untiled object attached in i915_gem_write_fence. v2: Fix the WARN, spotted by Chris. v3: Trying to reuse get_fences looked ugly and obfuscated the code. Instead reuse update_fence and to make it really dtrt also move the fence dirty state clearing into update_fence. Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=60530 Cc: Chris Wilson Cc: Stéphane Marchesin Reviewed-by: Chris Wilson Tested-by: Matthew Garrett Tested-by: Björn Bidar Signed-off-by: Daniel Vetter Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/i915/i915_gem.c | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 8dc8186b6c05..894a285e4d84 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -2138,7 +2138,17 @@ void i915_gem_restore_fences(struct drm_device *dev) for (i = 0; i < dev_priv->num_fence_regs; i++) { struct drm_i915_fence_reg *reg = &dev_priv->fence_regs[i]; - i915_gem_write_fence(dev, i, reg->obj); + + /* + * Commit delayed tiling changes if we have an object still + * attached to the fence, otherwise just clear the fence. + */ + if (reg->obj) { + i915_gem_object_update_fence(reg->obj, reg, + reg->obj->tiling_mode); + } else { + i915_gem_write_fence(dev, i, NULL); + } } } @@ -2676,6 +2686,10 @@ static void i915_gem_write_fence(struct drm_device *dev, int reg, if (i915_gem_object_needs_mb(dev_priv->fence_regs[reg].obj)) mb(); + WARN(obj && (!obj->stride || !obj->tiling_mode), + "bogus fence setup with stride: 0x%x, tiling mode: %i\n", + obj->stride, obj->tiling_mode); + switch (INTEL_INFO(dev)->gen) { case 7: case 6: @@ -2735,6 +2749,7 @@ static void i915_gem_object_update_fence(struct drm_i915_gem_object *obj, fence->obj = NULL; list_del_init(&fence->lru_list); } + obj->fence_dirty = false; } static int @@ -2864,7 +2879,6 @@ i915_gem_object_get_fence(struct drm_i915_gem_object *obj) return 0; i915_gem_object_update_fence(obj, reg, enable); - obj->fence_dirty = false; return 0; } From f12155987bdc8d175b41b2fcbd88e8788c1af92d Mon Sep 17 00:00:00 2001 From: Kamal Mostafa Date: Fri, 19 Jul 2013 15:02:01 -0700 Subject: [PATCH 0355/1368] drm/i915: quirk no PCH_PWM_ENABLE for Dell XPS13 backlight commit e85843bec6c2ea7c10ec61238396891cc2b753a9 upstream. BugLink: https://bugzilla.kernel.org/show_bug.cgi?id=47941 BugLink: https://bugs.launchpad.net/bugs/1163720 BugLink: https://bugs.launchpad.net/bugs/1162026 Some machines suffer from non-functional backlight controls if BLM_PCH_PWM_ENABLE is set, so provide a quirk to avoid doing so. Apply this quirk to Dell XPS 13 models. Tested-by: Eric Griffith Tested-by: Kent Baxley Signed-off-by: Kamal Mostafa Signed-off-by: Daniel Vetter Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/i915/i915_drv.h | 1 + drivers/gpu/drm/i915/intel_display.c | 16 ++++++++++++++++ drivers/gpu/drm/i915/intel_panel.c | 3 ++- 3 files changed, 19 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 9669a0b8b440..d69f21cf1851 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -491,6 +491,7 @@ enum intel_sbi_destination { #define QUIRK_PIPEA_FORCE (1<<0) #define QUIRK_LVDS_SSC_DISABLE (1<<1) #define QUIRK_INVERT_BRIGHTNESS (1<<2) +#define QUIRK_NO_PCH_PWM_ENABLE (1<<3) struct intel_fbdev; struct intel_fbc_work; diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index d7ff3afbf615..e1f4e6edaa46 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -8917,6 +8917,17 @@ static void quirk_invert_brightness(struct drm_device *dev) DRM_INFO("applying inverted panel brightness quirk\n"); } +/* + * Some machines (Dell XPS13) suffer broken backlight controls if + * BLM_PCH_PWM_ENABLE is set. + */ +static void quirk_no_pcm_pwm_enable(struct drm_device *dev) +{ + struct drm_i915_private *dev_priv = dev->dev_private; + dev_priv->quirks |= QUIRK_NO_PCH_PWM_ENABLE; + DRM_INFO("applying no-PCH_PWM_ENABLE quirk\n"); +} + struct intel_quirk { int device; int subsystem_vendor; @@ -8986,6 +8997,11 @@ static struct intel_quirk intel_quirks[] = { /* Acer Aspire 4736Z */ { 0x2a42, 0x1025, 0x0260, quirk_invert_brightness }, + + /* Dell XPS13 HD Sandy Bridge */ + { 0x0116, 0x1028, 0x052e, quirk_no_pcm_pwm_enable }, + /* Dell XPS13 HD and XPS13 FHD Ivy Bridge */ + { 0x0166, 0x1028, 0x058b, quirk_no_pcm_pwm_enable }, }; static void intel_init_quirks(struct drm_device *dev) diff --git a/drivers/gpu/drm/i915/intel_panel.c b/drivers/gpu/drm/i915/intel_panel.c index eb5e6e95f3c7..33cb87f7983e 100644 --- a/drivers/gpu/drm/i915/intel_panel.c +++ b/drivers/gpu/drm/i915/intel_panel.c @@ -354,7 +354,8 @@ void intel_panel_enable_backlight(struct drm_device *dev, POSTING_READ(reg); I915_WRITE(reg, tmp | BLM_PWM_ENABLE); - if (HAS_PCH_SPLIT(dev)) { + if (HAS_PCH_SPLIT(dev) && + !(dev_priv->quirks & QUIRK_NO_PCH_PWM_ENABLE)) { tmp = I915_READ(BLC_PWM_PCH_CTL1); tmp |= BLM_PCH_PWM_ENABLE; tmp &= ~BLM_PCH_OVERRIDE_ENABLE; From 8bc91b60f77d174f2485e10be8708c2fd7b85bfb Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 19 Jul 2013 20:36:51 +0100 Subject: [PATCH 0356/1368] drm/i915: Serialize almost all register access commit a7cd1b8fea2f341b626b255d9898a5ca5fabbf0a upstream. In theory, the different register blocks were meant to be only ever touched when holding either the struct_mutex, mode_config.lock or even a specific localised lock. This does not seem to be the case, and the hardware reacts extremely badly if we attempt to concurrently access two registers within the same cacheline. The HSD suggests that we only need to do this workaround for display range registers. However, upon review we need to serialize the multiple stages in our register write functions - if only for preemption protection. Irrespective of the hardware requirements, the current io functions are a little too loose with respect to the combination of pre- and post-condition testing that we do in conjunction with the actual io. As a result, we may be pre-empted and generate both false-postive and false-negative errors. Note well that this is a "90%" solution, there remains a few direct users of ioread/iowrite which will be fixed up in the next few patches. Since they are more invasive and that this simple change will prevent almost all lockups on Haswell, we kept this patch simple to facilitate backporting to stable. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=63914 Signed-off-by: Chris Wilson Signed-off-by: Daniel Vetter Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/i915/i915_drv.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index a2e4953b8e8d..523a633b86f3 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -1247,21 +1247,21 @@ hsw_unclaimed_reg_check(struct drm_i915_private *dev_priv, u32 reg) #define __i915_read(x, y) \ u##x i915_read##x(struct drm_i915_private *dev_priv, u32 reg) { \ + unsigned long irqflags; \ u##x val = 0; \ + spin_lock_irqsave(&dev_priv->gt_lock, irqflags); \ if (IS_GEN5(dev_priv->dev)) \ ilk_dummy_write(dev_priv); \ if (NEEDS_FORCE_WAKE((dev_priv), (reg))) { \ - unsigned long irqflags; \ - spin_lock_irqsave(&dev_priv->gt_lock, irqflags); \ if (dev_priv->forcewake_count == 0) \ dev_priv->gt.force_wake_get(dev_priv); \ val = read##y(dev_priv->regs + reg); \ if (dev_priv->forcewake_count == 0) \ dev_priv->gt.force_wake_put(dev_priv); \ - spin_unlock_irqrestore(&dev_priv->gt_lock, irqflags); \ } else { \ val = read##y(dev_priv->regs + reg); \ } \ + spin_unlock_irqrestore(&dev_priv->gt_lock, irqflags); \ trace_i915_reg_rw(false, reg, val, sizeof(val)); \ return val; \ } @@ -1274,8 +1274,10 @@ __i915_read(64, q) #define __i915_write(x, y) \ void i915_write##x(struct drm_i915_private *dev_priv, u32 reg, u##x val) { \ + unsigned long irqflags; \ u32 __fifo_ret = 0; \ trace_i915_reg_rw(true, reg, val, sizeof(val)); \ + spin_lock_irqsave(&dev_priv->gt_lock, irqflags); \ if (NEEDS_FORCE_WAKE((dev_priv), (reg))) { \ __fifo_ret = __gen6_gt_wait_for_fifo(dev_priv); \ } \ @@ -1287,6 +1289,7 @@ void i915_write##x(struct drm_i915_private *dev_priv, u32 reg, u##x val) { \ gen6_gt_check_fifodbg(dev_priv); \ } \ hsw_unclaimed_reg_check(dev_priv, reg); \ + spin_unlock_irqrestore(&dev_priv->gt_lock, irqflags); \ } __i915_write(8, b) __i915_write(16, w) From 9682e399d77f9bc0a5c9230bd1d69a3e75572ef1 Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Sun, 21 Jul 2013 13:16:24 +0200 Subject: [PATCH 0357/1368] drm/i915: fix up gt init sequence fallout commit 181d1b9e31c668259d3798c521672afb8edd355c upstream. The regression fix for gen6+ rps fallout commit 7dcd2677ea912573d9ed4bcd629b0023b2d11505 Author: Konstantin Khlebnikov Date: Wed Jul 17 10:22:58 2013 +0400 drm/i915: fix long-standing SNB regression in power consumption after resume unintentionally also changed the init sequence ordering between gt_init and gt_reset - we need to reset BIOS damage like leftover forcewake references before we run our own code. Otherwise we can get nasty dmesg noise like [drm:__gen6_gt_force_wake_mt_get] *ERROR* Timed out waiting for forcewake old ack to clear. again. Since _reset suggests that we first need to have stuff initialized (which isn't the case here) call it sanitze instead. While at it also block out the rps disable introduced by the above commit on ilk: We don't have any knowledge of ilk rps being broken in similar ways. And the disable functions uses the default hw state which is only read out when we're enabling rps. So essentially we've been writing random grabage into that register. Reported-by: Chris Wilson Cc: Chris Wilson Cc: Konstantin Khlebnikov Cc: Jesse Barnes Tested-by: Chris Wilson Reviewed-by: Chris Wilson Signed-off-by: Daniel Vetter Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/i915/i915_dma.c | 2 +- drivers/gpu/drm/i915/i915_drv.c | 4 ++-- drivers/gpu/drm/i915/i915_drv.h | 2 +- drivers/gpu/drm/i915/intel_pm.c | 5 +++-- 4 files changed, 7 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c index d1ee611d213d..4d61e09bc212 100644 --- a/drivers/gpu/drm/i915/i915_dma.c +++ b/drivers/gpu/drm/i915/i915_dma.c @@ -1608,8 +1608,8 @@ int i915_driver_load(struct drm_device *dev, unsigned long flags) intel_detect_pch(dev); intel_irq_init(dev); + intel_gt_sanitize(dev); intel_gt_init(dev); - intel_gt_reset(dev); /* Try to make sure MCHBAR is enabled before poking at it */ intel_setup_mchbar(dev); diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index 523a633b86f3..bc6cd3117ac3 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -685,7 +685,7 @@ static int i915_drm_thaw(struct drm_device *dev) { int error = 0; - intel_gt_reset(dev); + intel_gt_sanitize(dev); if (drm_core_check_feature(dev, DRIVER_MODESET)) { mutex_lock(&dev->struct_mutex); @@ -711,7 +711,7 @@ int i915_resume(struct drm_device *dev) pci_set_master(dev->pdev); - intel_gt_reset(dev); + intel_gt_sanitize(dev); /* * Platforms with opregion should have sane BIOS, older ones (gen3 and diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index d69f21cf1851..c76244159a1f 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1477,7 +1477,7 @@ void i915_handle_error(struct drm_device *dev, bool wedged); extern void intel_irq_init(struct drm_device *dev); extern void intel_hpd_init(struct drm_device *dev); extern void intel_gt_init(struct drm_device *dev); -extern void intel_gt_reset(struct drm_device *dev); +extern void intel_gt_sanitize(struct drm_device *dev); void i915_error_state_free(struct kref *error_ref); diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 60d57affc748..81a9e1d7647f 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -4486,7 +4486,7 @@ static void vlv_force_wake_put(struct drm_i915_private *dev_priv) gen6_gt_check_fifodbg(dev_priv); } -void intel_gt_reset(struct drm_device *dev) +void intel_gt_sanitize(struct drm_device *dev) { struct drm_i915_private *dev_priv = dev->dev_private; @@ -4499,7 +4499,8 @@ void intel_gt_reset(struct drm_device *dev) } /* BIOS often leaves RC6 enabled, but disable it for hw init */ - intel_disable_gt_powersave(dev); + if (INTEL_INFO(dev)->gen >= 6) + intel_disable_gt_powersave(dev); } void intel_gt_init(struct drm_device *dev) From c9af307d38974264922d35c77bb71087d171f8f8 Mon Sep 17 00:00:00 2001 From: Ben Widawsky Date: Tue, 30 Jul 2013 16:27:57 -0700 Subject: [PATCH 0358/1368] drm/i915: fix missed hunk after GT access breakage commit e1b4d3036c07ff137955fb1c0197ab62534f46ec upstream. Upon some code refactoring, a hunk was missed. This was fixed for next, but missed the current trees, and hasn't yet been merged by Dave Airlie. It is fixed in: commit 907b28c56ea40629aa6595ddfa414ec2fc7da41c Author: Chris Wilson Date: Fri Jul 19 20:36:52 2013 +0100 drm/i915: Colocate all GT access routines in the same file It is introduced by: commit 181d1b9e31c668259d3798c521672afb8edd355c Author: Daniel Vetter Date: Sun Jul 21 13:16:24 2013 +0200 drm/i915: fix up gt init sequence fallout Reported-by: Dave Jones Signed-off-by: Ben Widawsky Signed-off-by: Dave Airlie Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/i915/i915_dma.c | 1 + drivers/gpu/drm/i915/i915_drv.h | 1 + drivers/gpu/drm/i915/intel_pm.c | 6 ++++++ 3 files changed, 8 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c index 4d61e09bc212..f9685900bad8 100644 --- a/drivers/gpu/drm/i915/i915_dma.c +++ b/drivers/gpu/drm/i915/i915_dma.c @@ -1608,6 +1608,7 @@ int i915_driver_load(struct drm_device *dev, unsigned long flags) intel_detect_pch(dev); intel_irq_init(dev); + intel_pm_init(dev); intel_gt_sanitize(dev); intel_gt_init(dev); diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index c76244159a1f..47d8b68c5004 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1475,6 +1475,7 @@ void i915_hangcheck_elapsed(unsigned long data); void i915_handle_error(struct drm_device *dev, bool wedged); extern void intel_irq_init(struct drm_device *dev); +extern void intel_pm_init(struct drm_device *dev); extern void intel_hpd_init(struct drm_device *dev); extern void intel_gt_init(struct drm_device *dev); extern void intel_gt_sanitize(struct drm_device *dev); diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 81a9e1d7647f..2cfe9f6b0bf2 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -4548,6 +4548,12 @@ void intel_gt_init(struct drm_device *dev) dev_priv->gt.force_wake_get = __gen6_gt_force_wake_get; dev_priv->gt.force_wake_put = __gen6_gt_force_wake_put; } +} + +void intel_pm_init(struct drm_device *dev) +{ + struct drm_i915_private *dev_priv = dev->dev_private; + INIT_DELAYED_WORK(&dev_priv->rps.delayed_resume_work, intel_gen6_powersave_work); } From 6a5213cb7db96dc498b6c3cf6642ab6c8b2e08a3 Mon Sep 17 00:00:00 2001 From: Maarten Lankhorst Date: Tue, 23 Jul 2013 15:49:39 +0200 Subject: [PATCH 0359/1368] drm/nouveau: fix semaphore dmabuf obj commit 7a7da592cbb22a1d360638dbecc393470c5effe3 upstream. Fixes some dmabuf object errors on nv50 chipset and below. Signed-off-by: Maarten Lankhorst Signed-off-by: Ben Skeggs Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/nouveau/nv17_fence.c | 2 +- drivers/gpu/drm/nouveau/nv50_fence.c | 14 +++++++++----- 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/nouveau/nv17_fence.c b/drivers/gpu/drm/nouveau/nv17_fence.c index 8e47a9bae8c3..22aa9963ea6f 100644 --- a/drivers/gpu/drm/nouveau/nv17_fence.c +++ b/drivers/gpu/drm/nouveau/nv17_fence.c @@ -76,7 +76,7 @@ nv17_fence_context_new(struct nouveau_channel *chan) struct ttm_mem_reg *mem = &priv->bo->bo.mem; struct nouveau_object *object; u32 start = mem->start * PAGE_SIZE; - u32 limit = mem->start + mem->size - 1; + u32 limit = start + mem->size - 1; int ret = 0; fctx = chan->fence = kzalloc(sizeof(*fctx), GFP_KERNEL); diff --git a/drivers/gpu/drm/nouveau/nv50_fence.c b/drivers/gpu/drm/nouveau/nv50_fence.c index f9701e567db8..0ee363840035 100644 --- a/drivers/gpu/drm/nouveau/nv50_fence.c +++ b/drivers/gpu/drm/nouveau/nv50_fence.c @@ -39,6 +39,8 @@ nv50_fence_context_new(struct nouveau_channel *chan) struct nv10_fence_chan *fctx; struct ttm_mem_reg *mem = &priv->bo->bo.mem; struct nouveau_object *object; + u32 start = mem->start * PAGE_SIZE; + u32 limit = start + mem->size - 1; int ret, i; fctx = chan->fence = kzalloc(sizeof(*fctx), GFP_KERNEL); @@ -51,26 +53,28 @@ nv50_fence_context_new(struct nouveau_channel *chan) fctx->base.sync = nv17_fence_sync; ret = nouveau_object_new(nv_object(chan->cli), chan->handle, - NvSema, 0x0002, + NvSema, 0x003d, &(struct nv_dma_class) { .flags = NV_DMA_TARGET_VRAM | NV_DMA_ACCESS_RDWR, - .start = mem->start * PAGE_SIZE, - .limit = mem->size - 1, + .start = start, + .limit = limit, }, sizeof(struct nv_dma_class), &object); /* dma objects for display sync channel semaphore blocks */ for (i = 0; !ret && i < dev->mode_config.num_crtc; i++) { struct nouveau_bo *bo = nv50_display_crtc_sema(dev, i); + u32 start = bo->bo.mem.start * PAGE_SIZE; + u32 limit = start + bo->bo.mem.size - 1; ret = nouveau_object_new(nv_object(chan->cli), chan->handle, NvEvoSema0 + i, 0x003d, &(struct nv_dma_class) { .flags = NV_DMA_TARGET_VRAM | NV_DMA_ACCESS_RDWR, - .start = bo->bo.offset, - .limit = bo->bo.offset + 0xfff, + .start = start, + .limit = limit, }, sizeof(struct nv_dma_class), &object); } From 8414d407af75ccbd398898fdb108189afc22ded4 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Fri, 26 Jul 2013 13:26:05 -0400 Subject: [PATCH 0360/1368] drm/radeon: fix audio dto programming on DCE4+ MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 7d61d835824f73dc4097b51f800382467c8049c5 upstream. We need to set the dto source before setting the dividers otherwise we may get stability problems with the dto leading to audio playback problems. Signed-off-by: Alex Deucher Reviewed-by: Christian König Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/radeon/evergreen_hdmi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/radeon/evergreen_hdmi.c b/drivers/gpu/drm/radeon/evergreen_hdmi.c index b0d3fb341417..bb9ea3641312 100644 --- a/drivers/gpu/drm/radeon/evergreen_hdmi.c +++ b/drivers/gpu/drm/radeon/evergreen_hdmi.c @@ -157,9 +157,9 @@ static void evergreen_audio_set_dto(struct drm_encoder *encoder, u32 clock) * number (coefficient of two integer numbers. DCCG_AUDIO_DTOx_PHASE * is the numerator, DCCG_AUDIO_DTOx_MODULE is the denominator */ + WREG32(DCCG_AUDIO_DTO_SOURCE, DCCG_AUDIO_DTO0_SOURCE_SEL(radeon_crtc->crtc_id)); WREG32(DCCG_AUDIO_DTO0_PHASE, base_rate * 100); WREG32(DCCG_AUDIO_DTO0_MODULE, clock * 100); - WREG32(DCCG_AUDIO_DTO_SOURCE, DCCG_AUDIO_DTO0_SOURCE_SEL(radeon_crtc->crtc_id)); } From 6f8bbaf568c7f2c497558bfd04654c0b9841ad57 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Tue, 30 Jul 2013 00:22:53 -0400 Subject: [PATCH 0361/1368] drm/radeon/atom: initialize more atom interpretor elements to 0 commit 42a21826dc54583cdb79cc8477732e911ac9c376 upstream. The ProcessAuxChannel table on some rv635 boards assumes the divmul members are initialized to 0 otherwise we get an invalid fb offset since it has a bad mask set when setting the fb base. While here initialize all the atom interpretor elements to 0. Fixes: https://bugzilla.kernel.org/show_bug.cgi?id=60639 Signed-off-by: Alex Deucher Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/radeon/atom.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/gpu/drm/radeon/atom.c b/drivers/gpu/drm/radeon/atom.c index fb441a790f3d..15da7ef344a4 100644 --- a/drivers/gpu/drm/radeon/atom.c +++ b/drivers/gpu/drm/radeon/atom.c @@ -1222,12 +1222,17 @@ int atom_execute_table(struct atom_context *ctx, int index, uint32_t * params) int r; mutex_lock(&ctx->mutex); + /* reset data block */ + ctx->data_block = 0; /* reset reg block */ ctx->reg_block = 0; /* reset fb window */ ctx->fb_base = 0; /* reset io mode */ ctx->io_mode = ATOM_IO_MM; + /* reset divmul */ + ctx->divmul[0] = 0; + ctx->divmul[1] = 0; r = atom_execute_table_locked(ctx, index, params); mutex_unlock(&ctx->mutex); return r; From 758057d66f667d17034b6080d06a2c6b5b498544 Mon Sep 17 00:00:00 2001 From: Larry Finger Date: Fri, 28 Jun 2013 09:12:53 -0500 Subject: [PATCH 0362/1368] rtlwifi: Initialize power-setting callback for USB devices commit bcfb879432094c267c35a7ff75d953d3a66c193a upstream. Commit a269913c5 entitled "rtlwifi: Rework rtl_lps_leave() and rtl_lps_enter() to use work queue" has two bugs for USB drivers. Firstly, the work queue in question was not initialized. Secondly, the callback routine used by this queue is contained within the file used for PCI devices. As a result, it is not available for architectures without PCI hardware. Signed-off-by: Larry Finger Reported-by: Richard Genoud Tested-by: Richard Genoud Cc: Richard Genoud Signed-off-by: John W. Linville Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/rtlwifi/pci.c | 13 ------------- drivers/net/wireless/rtlwifi/ps.c | 12 ++++++++++++ drivers/net/wireless/rtlwifi/ps.h | 1 + drivers/net/wireless/rtlwifi/usb.c | 2 ++ 4 files changed, 15 insertions(+), 13 deletions(-) diff --git a/drivers/net/wireless/rtlwifi/pci.c b/drivers/net/wireless/rtlwifi/pci.c index c97e9d327331..e70b4ffaf97f 100644 --- a/drivers/net/wireless/rtlwifi/pci.c +++ b/drivers/net/wireless/rtlwifi/pci.c @@ -1008,19 +1008,6 @@ static void _rtl_pci_prepare_bcn_tasklet(struct ieee80211_hw *hw) return; } -static void rtl_lps_change_work_callback(struct work_struct *work) -{ - struct rtl_works *rtlworks = - container_of(work, struct rtl_works, lps_change_work); - struct ieee80211_hw *hw = rtlworks->hw; - struct rtl_priv *rtlpriv = rtl_priv(hw); - - if (rtlpriv->enter_ps) - rtl_lps_enter(hw); - else - rtl_lps_leave(hw); -} - static void _rtl_pci_init_trx_var(struct ieee80211_hw *hw) { struct rtl_pci *rtlpci = rtl_pcidev(rtl_pcipriv(hw)); diff --git a/drivers/net/wireless/rtlwifi/ps.c b/drivers/net/wireless/rtlwifi/ps.c index 884bceae38a9..71e917db8338 100644 --- a/drivers/net/wireless/rtlwifi/ps.c +++ b/drivers/net/wireless/rtlwifi/ps.c @@ -611,6 +611,18 @@ void rtl_swlps_rf_sleep(struct ieee80211_hw *hw) MSECS(sleep_intv * mac->vif->bss_conf.beacon_int - 40)); } +void rtl_lps_change_work_callback(struct work_struct *work) +{ + struct rtl_works *rtlworks = + container_of(work, struct rtl_works, lps_change_work); + struct ieee80211_hw *hw = rtlworks->hw; + struct rtl_priv *rtlpriv = rtl_priv(hw); + + if (rtlpriv->enter_ps) + rtl_lps_enter(hw); + else + rtl_lps_leave(hw); +} void rtl_swlps_wq_callback(void *data) { diff --git a/drivers/net/wireless/rtlwifi/ps.h b/drivers/net/wireless/rtlwifi/ps.h index 4d682b753f50..88bd76ea88f7 100644 --- a/drivers/net/wireless/rtlwifi/ps.h +++ b/drivers/net/wireless/rtlwifi/ps.h @@ -49,5 +49,6 @@ void rtl_swlps_rf_awake(struct ieee80211_hw *hw); void rtl_swlps_rf_sleep(struct ieee80211_hw *hw); void rtl_p2p_ps_cmd(struct ieee80211_hw *hw, u8 p2p_ps_state); void rtl_p2p_info(struct ieee80211_hw *hw, void *data, unsigned int len); +void rtl_lps_change_work_callback(struct work_struct *work); #endif diff --git a/drivers/net/wireless/rtlwifi/usb.c b/drivers/net/wireless/rtlwifi/usb.c index a3532e077871..1feebdc92f41 100644 --- a/drivers/net/wireless/rtlwifi/usb.c +++ b/drivers/net/wireless/rtlwifi/usb.c @@ -1070,6 +1070,8 @@ int rtl_usb_probe(struct usb_interface *intf, spin_lock_init(&rtlpriv->locks.usb_lock); INIT_WORK(&rtlpriv->works.fill_h2c_cmd, rtl_fill_h2c_cmd_work_callback); + INIT_WORK(&rtlpriv->works.lps_change_work, + rtl_lps_change_work_callback); rtlpriv->usb_data_index = 0; init_completion(&rtlpriv->firmware_loading_complete); From 2ff7687bca58a839fd36e5a0b167465b4fa8bf51 Mon Sep 17 00:00:00 2001 From: "Rick Farina (Zero_Chaos)" Date: Mon, 29 Jul 2013 15:17:59 -0400 Subject: [PATCH 0363/1368] USB: serial: ftdi_sio: add more RT Systems ftdi devices commit fed1f1ed90bce42ea010e2904cbc04e7b8304940 upstream. RT Systems makes many usb serial cables based on the ftdi_sio driver for programming various amateur radios. This patch is a full listing of their current product offerings and should allow these cables to all be recognized. Signed-off-by: Rick Farina (Zero_Chaos) Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/ftdi_sio.c | 31 +++++++++++++++++++++++++--- drivers/usb/serial/ftdi_sio_ids.h | 34 ++++++++++++++++++++++++++----- 2 files changed, 57 insertions(+), 8 deletions(-) diff --git a/drivers/usb/serial/ftdi_sio.c b/drivers/usb/serial/ftdi_sio.c index 7260ec660347..b65e657c641d 100644 --- a/drivers/usb/serial/ftdi_sio.c +++ b/drivers/usb/serial/ftdi_sio.c @@ -735,9 +735,34 @@ static struct usb_device_id id_table_combined [] = { { USB_DEVICE(FTDI_VID, FTDI_NDI_AURORA_SCU_PID), .driver_info = (kernel_ulong_t)&ftdi_NDI_device_quirk }, { USB_DEVICE(TELLDUS_VID, TELLDUS_TELLSTICK_PID) }, - { USB_DEVICE(RTSYSTEMS_VID, RTSYSTEMS_SERIAL_VX7_PID) }, - { USB_DEVICE(RTSYSTEMS_VID, RTSYSTEMS_CT29B_PID) }, - { USB_DEVICE(RTSYSTEMS_VID, RTSYSTEMS_RTS01_PID) }, + { USB_DEVICE(RTSYSTEMS_VID, RTSYSTEMS_USB_S03_PID) }, + { USB_DEVICE(RTSYSTEMS_VID, RTSYSTEMS_USB_59_PID) }, + { USB_DEVICE(RTSYSTEMS_VID, RTSYSTEMS_USB_57A_PID) }, + { USB_DEVICE(RTSYSTEMS_VID, RTSYSTEMS_USB_57B_PID) }, + { USB_DEVICE(RTSYSTEMS_VID, RTSYSTEMS_USB_29A_PID) }, + { USB_DEVICE(RTSYSTEMS_VID, RTSYSTEMS_USB_29B_PID) }, + { USB_DEVICE(RTSYSTEMS_VID, RTSYSTEMS_USB_29F_PID) }, + { USB_DEVICE(RTSYSTEMS_VID, RTSYSTEMS_USB_62B_PID) }, + { USB_DEVICE(RTSYSTEMS_VID, RTSYSTEMS_USB_S01_PID) }, + { USB_DEVICE(RTSYSTEMS_VID, RTSYSTEMS_USB_63_PID) }, + { USB_DEVICE(RTSYSTEMS_VID, RTSYSTEMS_USB_29C_PID) }, + { USB_DEVICE(RTSYSTEMS_VID, RTSYSTEMS_USB_81B_PID) }, + { USB_DEVICE(RTSYSTEMS_VID, RTSYSTEMS_USB_82B_PID) }, + { USB_DEVICE(RTSYSTEMS_VID, RTSYSTEMS_USB_K5D_PID) }, + { USB_DEVICE(RTSYSTEMS_VID, RTSYSTEMS_USB_K4Y_PID) }, + { USB_DEVICE(RTSYSTEMS_VID, RTSYSTEMS_USB_K5G_PID) }, + { USB_DEVICE(RTSYSTEMS_VID, RTSYSTEMS_USB_S05_PID) }, + { USB_DEVICE(RTSYSTEMS_VID, RTSYSTEMS_USB_60_PID) }, + { USB_DEVICE(RTSYSTEMS_VID, RTSYSTEMS_USB_61_PID) }, + { USB_DEVICE(RTSYSTEMS_VID, RTSYSTEMS_USB_62_PID) }, + { USB_DEVICE(RTSYSTEMS_VID, RTSYSTEMS_USB_63B_PID) }, + { USB_DEVICE(RTSYSTEMS_VID, RTSYSTEMS_USB_64_PID) }, + { USB_DEVICE(RTSYSTEMS_VID, RTSYSTEMS_USB_65_PID) }, + { USB_DEVICE(RTSYSTEMS_VID, RTSYSTEMS_USB_92_PID) }, + { USB_DEVICE(RTSYSTEMS_VID, RTSYSTEMS_USB_92D_PID) }, + { USB_DEVICE(RTSYSTEMS_VID, RTSYSTEMS_USB_W5R_PID) }, + { USB_DEVICE(RTSYSTEMS_VID, RTSYSTEMS_USB_A5R_PID) }, + { USB_DEVICE(RTSYSTEMS_VID, RTSYSTEMS_USB_PW1_PID) }, { USB_DEVICE(FTDI_VID, FTDI_MAXSTREAM_PID) }, { USB_DEVICE(FTDI_VID, FTDI_PHI_FISCO_PID) }, { USB_DEVICE(TML_VID, TML_USB_SERIAL_PID) }, diff --git a/drivers/usb/serial/ftdi_sio_ids.h b/drivers/usb/serial/ftdi_sio_ids.h index 6dd79253205d..1b8af461b522 100644 --- a/drivers/usb/serial/ftdi_sio_ids.h +++ b/drivers/usb/serial/ftdi_sio_ids.h @@ -815,11 +815,35 @@ /* * RT Systems programming cables for various ham radios */ -#define RTSYSTEMS_VID 0x2100 /* Vendor ID */ -#define RTSYSTEMS_SERIAL_VX7_PID 0x9e52 /* Serial converter for VX-7 Radios using FT232RL */ -#define RTSYSTEMS_CT29B_PID 0x9e54 /* CT29B Radio Cable */ -#define RTSYSTEMS_RTS01_PID 0x9e57 /* USB-RTS01 Radio Cable */ - +#define RTSYSTEMS_VID 0x2100 /* Vendor ID */ +#define RTSYSTEMS_USB_S03_PID 0x9001 /* RTS-03 USB to Serial Adapter */ +#define RTSYSTEMS_USB_59_PID 0x9e50 /* USB-59 USB to 8 pin plug */ +#define RTSYSTEMS_USB_57A_PID 0x9e51 /* USB-57A USB to 4pin 3.5mm plug */ +#define RTSYSTEMS_USB_57B_PID 0x9e52 /* USB-57B USB to extended 4pin 3.5mm plug */ +#define RTSYSTEMS_USB_29A_PID 0x9e53 /* USB-29A USB to 3.5mm stereo plug */ +#define RTSYSTEMS_USB_29B_PID 0x9e54 /* USB-29B USB to 6 pin mini din */ +#define RTSYSTEMS_USB_29F_PID 0x9e55 /* USB-29F USB to 6 pin modular plug */ +#define RTSYSTEMS_USB_62B_PID 0x9e56 /* USB-62B USB to 8 pin mini din plug*/ +#define RTSYSTEMS_USB_S01_PID 0x9e57 /* USB-RTS01 USB to 3.5 mm stereo plug*/ +#define RTSYSTEMS_USB_63_PID 0x9e58 /* USB-63 USB to 9 pin female*/ +#define RTSYSTEMS_USB_29C_PID 0x9e59 /* USB-29C USB to 4 pin modular plug*/ +#define RTSYSTEMS_USB_81B_PID 0x9e5A /* USB-81 USB to 8 pin mini din plug*/ +#define RTSYSTEMS_USB_82B_PID 0x9e5B /* USB-82 USB to 2.5 mm stereo plug*/ +#define RTSYSTEMS_USB_K5D_PID 0x9e5C /* USB-K5D USB to 8 pin modular plug*/ +#define RTSYSTEMS_USB_K4Y_PID 0x9e5D /* USB-K4Y USB to 2.5/3.5 mm plugs*/ +#define RTSYSTEMS_USB_K5G_PID 0x9e5E /* USB-K5G USB to 8 pin modular plug*/ +#define RTSYSTEMS_USB_S05_PID 0x9e5F /* USB-RTS05 USB to 2.5 mm stereo plug*/ +#define RTSYSTEMS_USB_60_PID 0x9e60 /* USB-60 USB to 6 pin din*/ +#define RTSYSTEMS_USB_61_PID 0x9e61 /* USB-61 USB to 6 pin mini din*/ +#define RTSYSTEMS_USB_62_PID 0x9e62 /* USB-62 USB to 8 pin mini din*/ +#define RTSYSTEMS_USB_63B_PID 0x9e63 /* USB-63 USB to 9 pin female*/ +#define RTSYSTEMS_USB_64_PID 0x9e64 /* USB-64 USB to 9 pin male*/ +#define RTSYSTEMS_USB_65_PID 0x9e65 /* USB-65 USB to 9 pin female null modem*/ +#define RTSYSTEMS_USB_92_PID 0x9e66 /* USB-92 USB to 12 pin plug*/ +#define RTSYSTEMS_USB_92D_PID 0x9e67 /* USB-92D USB to 12 pin plug data*/ +#define RTSYSTEMS_USB_W5R_PID 0x9e68 /* USB-W5R USB to 8 pin modular plug*/ +#define RTSYSTEMS_USB_A5R_PID 0x9e69 /* USB-A5R USB to 8 pin modular plug*/ +#define RTSYSTEMS_USB_PW1_PID 0x9e6A /* USB-PW1 USB to 8 pin modular plug*/ /* * Physik Instrumente From 8fc2b1321b8fb0ed3d80de5303b8db99923bdecf Mon Sep 17 00:00:00 2001 From: Rong Wang Date: Sun, 28 Jul 2013 23:01:35 +0800 Subject: [PATCH 0364/1368] usb: gadget: udc-core: fix the typo of udc state attribute commit 1894870eb4240399fabc6f0cb8c6fff4e6edbe83 upstream. The name of udc state attribute file under sysfs is registered as "state", while usb_gadget_set_state take it as "status" when it's going to update. This patch fixes the typo. Signed-off-by: Rong Wang Signed-off-by: Barry Song Signed-off-by: Felipe Balbi Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/udc-core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/gadget/udc-core.c b/drivers/usb/gadget/udc-core.c index ffd8fa541101..5514822114a5 100644 --- a/drivers/usb/gadget/udc-core.c +++ b/drivers/usb/gadget/udc-core.c @@ -105,7 +105,7 @@ void usb_gadget_set_state(struct usb_gadget *gadget, enum usb_device_state state) { gadget->state = state; - sysfs_notify(&gadget->dev.kobj, NULL, "status"); + sysfs_notify(&gadget->dev.kobj, NULL, "state"); } EXPORT_SYMBOL_GPL(usb_gadget_set_state); From 0bd6f78c3016704d2b6978eab835dcd0759dca36 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Wed, 31 Jul 2013 13:53:28 -0700 Subject: [PATCH 0365/1368] mm: mempolicy: fix mbind_range() && vma_adjust() interaction commit 3964acd0dbec123aa0a621973a2a0580034b4788 upstream. vma_adjust() does vma_set_policy(vma, vma_policy(next)) and this is doubly wrong: 1. This leaks vma->vm_policy if it is not NULL and not equal to next->vm_policy. This can happen if vma_merge() expands "area", not prev (case 8). 2. This sets the wrong policy if vma_merge() joins prev and area, area is the vma the caller needs to update and it still has the old policy. Revert commit 1444f92c8498 ("mm: merging memory blocks resets mempolicy") which introduced these problems. Change mbind_range() to recheck mpol_equal() after vma_merge() to fix the problem that commit tried to address. Signed-off-by: Oleg Nesterov Acked-by: KOSAKI Motohiro Cc: Steven T Hampson Cc: Mel Gorman Cc: KOSAKI Motohiro Cc: Rik van Riel Cc: Andi Kleen Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- mm/mempolicy.c | 6 +++++- mm/mmap.c | 2 +- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/mm/mempolicy.c b/mm/mempolicy.c index 74310017296e..4baf12e534d1 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -732,7 +732,10 @@ static int mbind_range(struct mm_struct *mm, unsigned long start, if (prev) { vma = prev; next = vma->vm_next; - continue; + if (mpol_equal(vma_policy(vma), new_pol)) + continue; + /* vma_merge() joined vma && vma->next, case 8 */ + goto replace; } if (vma->vm_start != vmstart) { err = split_vma(vma->vm_mm, vma, vmstart, 1); @@ -744,6 +747,7 @@ static int mbind_range(struct mm_struct *mm, unsigned long start, if (err) goto out; } + replace: err = vma_replace_policy(vma, new_pol); if (err) goto out; diff --git a/mm/mmap.c b/mm/mmap.c index f681e1842fad..7dbe39745be9 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -865,7 +865,7 @@ again: remove_next = 1 + (end > next->vm_end); if (next->anon_vma) anon_vma_merge(vma, next); mm->map_count--; - vma_set_policy(vma, vma_policy(next)); + mpol_put(vma_policy(next)); kmem_cache_free(vm_area_cachep, next); /* * In mprotect's case 6 (see comments on vma_merge), From ba9a3c3ce30b8c399d5bfeef1d48a26170dc499e Mon Sep 17 00:00:00 2001 From: Gianluca Anzolin Date: Thu, 25 Jul 2013 07:26:16 +0200 Subject: [PATCH 0366/1368] tty_port: Fix refcounting leak in tty_port_tty_hangup() commit 1d9e689c934bd5ecb0f273c6c65e0655c5cfee5f upstream. The function tty_port_tty_hangup() could leak a reference to the tty_struct: struct tty_struct *tty = tty_port_tty_get(port); if (tty && (!check_clocal || !C_CLOCAL(tty))) { tty_hangup(tty); tty_kref_put(tty); } If tty != NULL and the second condition is false we never call tty_kref_put and the reference is leaked. Fix by always calling tty_kref_put() which accepts a NULL argument. The patch fixes a regression introduced by commit aa27a094. Acked-by: Gustavo Padovan Signed-off-by: Gianluca Anzolin Acked-by: Jiri Slaby Signed-off-by: Greg Kroah-Hartman --- drivers/tty/tty_port.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/tty/tty_port.c b/drivers/tty/tty_port.c index 121aeb9393e1..f597e88a705d 100644 --- a/drivers/tty/tty_port.c +++ b/drivers/tty/tty_port.c @@ -256,10 +256,9 @@ void tty_port_tty_hangup(struct tty_port *port, bool check_clocal) { struct tty_struct *tty = tty_port_tty_get(port); - if (tty && (!check_clocal || !C_CLOCAL(tty))) { + if (tty && (!check_clocal || !C_CLOCAL(tty))) tty_hangup(tty); - tty_kref_put(tty); - } + tty_kref_put(tty); } EXPORT_SYMBOL_GPL(tty_port_tty_hangup); From d0f1a6e5be8d8ada73891832af1bd3ecb3f52bb7 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 20 Jul 2013 03:13:55 +0400 Subject: [PATCH 0367/1368] livelock avoidance in sget() commit acfec9a5a892f98461f52ed5770de99a3e571ae2 upstream. Eric Sandeen has found a nasty livelock in sget() - take a mount(2) about to fail. The superblock is on ->fs_supers, ->s_umount is held exclusive, ->s_active is 1. Along comes two more processes, trying to mount the same thing; sget() in each is picking that superblock, bumping ->s_count and trying to grab ->s_umount. ->s_active is 3 now. Original mount(2) finally gets to deactivate_locked_super() on failure; ->s_active is 2, superblock is still ->fs_supers because shutdown will *not* happen until ->s_active hits 0. ->s_umount is dropped and now we have two processes chasing each other: s_active = 2, A acquired ->s_umount, B blocked A sees that the damn thing is stillborn, does deactivate_locked_super() s_active = 1, A drops ->s_umount, B gets it A restarts the search and finds the same superblock. And bumps it ->s_active. s_active = 2, B holds ->s_umount, A blocked on trying to get it ... and we are in the earlier situation with A and B switched places. The root cause, of course, is that ->s_active should not grow until we'd got MS_BORN. Then failing ->mount() will have deactivate_locked_super() shut the damn thing down. Fortunately, it's easy to do - the key point is that grab_super() is called only for superblocks currently on ->fs_supers, so it can bump ->s_count and grab ->s_umount first, then check MS_BORN and bump ->s_active; we must never increment ->s_count for superblocks past ->kill_sb(), but grab_super() is never called for those. The bug is pretty old; we would've caught it by now, if not for accidental exclusion between sget() for block filesystems; the things like cgroup or e.g. mtd-based filesystems don't have anything of that sort, so they get bitten. The right way to deal with that is obviously to fix sget()... Signed-off-by: Al Viro Signed-off-by: Greg Kroah-Hartman --- fs/super.c | 25 ++++++++++--------------- 1 file changed, 10 insertions(+), 15 deletions(-) diff --git a/fs/super.c b/fs/super.c index 7465d4364208..68307c029228 100644 --- a/fs/super.c +++ b/fs/super.c @@ -336,19 +336,19 @@ EXPORT_SYMBOL(deactivate_super); * and want to turn it into a full-blown active reference. grab_super() * is called with sb_lock held and drops it. Returns 1 in case of * success, 0 if we had failed (superblock contents was already dead or - * dying when grab_super() had been called). + * dying when grab_super() had been called). Note that this is only + * called for superblocks not in rundown mode (== ones still on ->fs_supers + * of their type), so increment of ->s_count is OK here. */ static int grab_super(struct super_block *s) __releases(sb_lock) { - if (atomic_inc_not_zero(&s->s_active)) { - spin_unlock(&sb_lock); - return 1; - } - /* it's going away */ s->s_count++; spin_unlock(&sb_lock); - /* wait for it to die */ down_write(&s->s_umount); + if ((s->s_flags & MS_BORN) && atomic_inc_not_zero(&s->s_active)) { + put_super(s); + return 1; + } up_write(&s->s_umount); put_super(s); return 0; @@ -463,11 +463,6 @@ struct super_block *sget(struct file_system_type *type, destroy_super(s); s = NULL; } - down_write(&old->s_umount); - if (unlikely(!(old->s_flags & MS_BORN))) { - deactivate_locked_super(old); - goto retry; - } return old; } } @@ -660,10 +655,10 @@ struct super_block *get_active_super(struct block_device *bdev) if (hlist_unhashed(&sb->s_instances)) continue; if (sb->s_bdev == bdev) { - if (grab_super(sb)) /* drops sb_lock */ - return sb; - else + if (!grab_super(sb)) goto restart; + up_write(&sb->s_umount); + return sb; } } spin_unlock(&sb_lock); From 3d63d1e0fe8aeed27f6ef38fa6eaf518dee1bbab Mon Sep 17 00:00:00 2001 From: David Vrabel Date: Fri, 19 Jul 2013 15:51:58 +0100 Subject: [PATCH 0368/1368] xen/evtchn: avoid a deadlock when unbinding an event channel commit 179fbd5a45f0d4034cc6fd37b8d367a3b79663c4 upstream. Unbinding an event channel (either with the ioctl or when the evtchn device is closed) may deadlock because disable_irq() is called with port_user_lock held which is also locked by the interrupt handler. Think of the IOCTL_EVTCHN_UNBIND is being serviced, the routine has just taken the lock, and an interrupt happens. The evtchn_interrupt is invoked, tries to take the lock and spins forever. A quick glance at the code shows that the spinlock is a local IRQ variant. Unfortunately that does not help as "disable_irq() waits for the interrupt handler on all CPUs to stop running. If the irq occurs on another VCPU, it tries to take port_user_lock and can't because the unbind ioctl is holding it." (from David). Hence we cannot depend on the said spinlock to protect us. We could make it a system wide IRQ disable spinlock but there is a better way. We can piggyback on the fact that the existence of the spinlock is to make get_port_user() checks be up-to-date. And we can alter those checks to not depend on the spin lock (as it's protected by u->bind_mutex in the ioctl) and can remove the unnecessary locking (this is IOCTL_EVTCHN_UNBIND) path. In the interrupt handler we cannot use the mutex, but we do not need it. "The unbind disables the irq before making the port user stale, so when you clear it you are guaranteed that the interrupt handler that might use that port cannot be running." (from David). Hence this patch removes the spinlock usage on the teardown path and piggybacks on disable_irq happening before we muck with the get_port_user() data. This ensures that the interrupt handler will never run on stale data. Signed-off-by: David Vrabel Signed-off-by: Konrad Rzeszutek Wilk [v1: Expanded the commit description a bit] Signed-off-by: Jonghwan Choi Signed-off-by: Greg Kroah-Hartman --- drivers/xen/evtchn.c | 21 ++------------------- 1 file changed, 2 insertions(+), 19 deletions(-) diff --git a/drivers/xen/evtchn.c b/drivers/xen/evtchn.c index 45c8efaa6b3e..34924fb9d02a 100644 --- a/drivers/xen/evtchn.c +++ b/drivers/xen/evtchn.c @@ -377,18 +377,12 @@ static long evtchn_ioctl(struct file *file, if (unbind.port >= NR_EVENT_CHANNELS) break; - spin_lock_irq(&port_user_lock); - rc = -ENOTCONN; - if (get_port_user(unbind.port) != u) { - spin_unlock_irq(&port_user_lock); + if (get_port_user(unbind.port) != u) break; - } disable_irq(irq_from_evtchn(unbind.port)); - spin_unlock_irq(&port_user_lock); - evtchn_unbind_from_user(u, unbind.port); rc = 0; @@ -488,26 +482,15 @@ static int evtchn_release(struct inode *inode, struct file *filp) int i; struct per_user_data *u = filp->private_data; - spin_lock_irq(&port_user_lock); - - free_page((unsigned long)u->ring); - for (i = 0; i < NR_EVENT_CHANNELS; i++) { if (get_port_user(i) != u) continue; disable_irq(irq_from_evtchn(i)); - } - - spin_unlock_irq(&port_user_lock); - - for (i = 0; i < NR_EVENT_CHANNELS; i++) { - if (get_port_user(i) != u) - continue; - evtchn_unbind_from_user(get_port_user(i), i); } + free_page((unsigned long)u->ring); kfree(u->name); kfree(u); From 6655d76ecb91fa618c4d0d0dd653e94078b3f050 Mon Sep 17 00:00:00 2001 From: Sergey Senozhatsky Date: Sun, 14 Jul 2013 14:03:27 +0300 Subject: [PATCH 0369/1368] radeon kms: do not flush uninitialized hotplug work commit a01c34f72e7cd2624570818f579b5ab464f93de2 upstream. Fix a warning from lockdep caused by calling flush_work() for uninitialized hotplug work. Initialize hotplug_work, audio_work and reset_work upon successful radeon_irq_kms_init() completion and thus perform hotplug flush_work only when rdev->irq.installed is true. [ 4.790019] [drm] Loading CEDAR Microcode [ 4.790943] r600_cp: Failed to load firmware "radeon/CEDAR_smc.bin" [ 4.791152] [drm:evergreen_startup] *ERROR* Failed to load firmware! [ 4.791330] radeon 0000:01:00.0: disabling GPU acceleration [ 4.792633] INFO: trying to register non-static key. [ 4.792792] the code is fine but needs lockdep annotation. [ 4.792953] turning off the locking correctness validator. [ 4.793114] CPU: 2 PID: 1 Comm: swapper/0 Not tainted 3.11.0-rc0-dbg-10676-gfe56456-dirty #1816 [ 4.793314] Hardware name: Acer Aspire 5741G /Aspire 5741G , BIOS V1.20 02/08/2011 [ 4.793507] ffffffff821fd810 ffff8801530b9a18 ffffffff8160434e 0000000000000002 [ 4.794155] ffff8801530b9ad8 ffffffff810b8404 ffff8801530b0798 ffff8801530b0000 [ 4.794789] ffff8801530b9b00 0000000000000046 00000000000004c0 ffffffff00000000 [ 4.795418] Call Trace: [ 4.795573] [] dump_stack+0x4e/0x82 [ 4.795731] [] __lock_acquire+0x1a64/0x1d30 [ 4.795893] [] ? dev_vprintk_emit+0x50/0x60 [ 4.796034] [] lock_acquire+0xa4/0x200 [ 4.796216] [] ? flush_work+0x5/0x280 [ 4.796375] [] flush_work+0x3d/0x280 [ 4.796520] [] ? flush_work+0x5/0x280 [ 4.796682] [] ? trace_hardirqs_on_caller+0xfd/0x1c0 [ 4.796862] [] ? delay_tsc+0x95/0xf0 [ 4.797024] [] radeon_irq_kms_fini+0x2b/0x70 [ 4.797186] [] evergreen_init+0x2a9/0x2e0 [ 4.797347] [] radeon_device_init+0x5ef/0x700 [ 4.797511] [] ? pci_find_capability+0x47/0x50 [ 4.797672] [] radeon_driver_load_kms+0x8d/0x150 [ 4.797843] [] drm_get_pci_dev+0x166/0x280 [ 4.798007] [] ? kfree+0xf5/0x2e0 [ 4.798168] [] ? radeon_pci_probe+0x98/0xd0 [ 4.798329] [] radeon_pci_probe+0xaa/0xd0 [ 4.798489] [] pci_device_probe+0x84/0xe0 [ 4.798644] [] driver_probe_device+0x76/0x240 [ 4.798805] [] __driver_attach+0x93/0xa0 [ 4.798948] [] ? __device_attach+0x40/0x40 [ 4.799126] [] bus_for_each_dev+0x6b/0xb0 [ 4.799272] [] driver_attach+0x1e/0x20 [ 4.799434] [] bus_add_driver+0x1f0/0x280 [ 4.799596] [] driver_register+0x74/0x150 [ 4.799758] [] __pci_register_driver+0x5d/0x60 [ 4.799936] [] ? ttm_init+0x67/0x67 [ 4.800081] [] drm_pci_init+0x115/0x130 [ 4.800243] [] ? ttm_init+0x67/0x67 [ 4.800405] [] radeon_init+0x9c/0xba [ 4.800586] [] do_one_initcall+0xfa/0x150 [ 4.800746] [] ? parse_args+0x120/0x330 [ 4.800909] [] kernel_init_freeable+0x111/0x191 [ 4.801052] [] ? do_early_param+0x88/0x88 [ 4.801233] [] ? rest_init+0x140/0x140 [ 4.801393] [] kernel_init+0xe/0x180 [ 4.801556] [] ret_from_fork+0x7c/0xb0 [ 4.801718] [] ? rest_init+0x140/0x140 Signed-off-by: Sergey Senozhatsky Signed-off-by: Alex Deucher Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/radeon/radeon_irq_kms.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/radeon/radeon_irq_kms.c b/drivers/gpu/drm/radeon/radeon_irq_kms.c index 5a99d433fc35..1fe12ab5c5ea 100644 --- a/drivers/gpu/drm/radeon/radeon_irq_kms.c +++ b/drivers/gpu/drm/radeon/radeon_irq_kms.c @@ -241,9 +241,6 @@ int radeon_irq_kms_init(struct radeon_device *rdev) { int r = 0; - INIT_WORK(&rdev->hotplug_work, radeon_hotplug_work_func); - INIT_WORK(&rdev->audio_work, r600_audio_update_hdmi); - spin_lock_init(&rdev->irq.lock); r = drm_vblank_init(rdev->ddev, rdev->num_crtc); if (r) { @@ -265,6 +262,10 @@ int radeon_irq_kms_init(struct radeon_device *rdev) rdev->irq.installed = false; return r; } + + INIT_WORK(&rdev->hotplug_work, radeon_hotplug_work_func); + INIT_WORK(&rdev->audio_work, r600_audio_update_hdmi); + DRM_INFO("radeon: irq initialized.\n"); return 0; } @@ -284,8 +285,8 @@ void radeon_irq_kms_fini(struct radeon_device *rdev) rdev->irq.installed = false; if (rdev->msi_enabled) pci_disable_msi(rdev->pdev); + flush_work(&rdev->hotplug_work); } - flush_work(&rdev->hotplug_work); } /** From 1aa58ccd029fc75c115ae35c3fcb4d43043c0725 Mon Sep 17 00:00:00 2001 From: Nicholas Bellinger Date: Tue, 30 Jul 2013 04:04:01 +0000 Subject: [PATCH 0370/1368] iscsi-target: Fix iscsit_add_reject* usage for iser commit ba159914086f06532079fc15141f46ffe7e04a41 upstream This patch changes iscsit_add_reject() + iscsit_add_reject_from_cmd() usage to not sleep on iscsi_cmd->reject_comp to address a free-after-use usage bug in v3.10 with iser-target code. It saves ->reject_reason for use within iscsit_build_reject() so the correct value for both transport cases. It also drops the legacy fail_conn parameter usage throughput iscsi-target code and adds two iscsit_add_reject_cmd() and iscsit_reject_cmd helper functions, along with various small cleanups. (v2: Re-enable target_put_sess_cmd() to be called from iscsit_add_reject_from_cmd() for rejects invoked after target_get_sess_cmd() has been called) Signed-off-by: Nicholas Bellinger Cc: Or Gerlitz Cc: Mike Christie Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/ulp/isert/ib_isert.c | 5 - drivers/target/iscsi/iscsi_target.c | 253 +++++++++-------------- drivers/target/iscsi/iscsi_target.h | 2 +- drivers/target/iscsi/iscsi_target_core.h | 4 +- drivers/target/iscsi/iscsi_target_erl0.c | 7 +- drivers/target/iscsi/iscsi_target_erl1.c | 20 +- drivers/target/iscsi/iscsi_target_util.c | 1 - include/target/iscsi/iscsi_transport.h | 2 - 8 files changed, 117 insertions(+), 177 deletions(-) diff --git a/drivers/infiniband/ulp/isert/ib_isert.c b/drivers/infiniband/ulp/isert/ib_isert.c index bfc21798bd5e..4f6faf0915d5 100644 --- a/drivers/infiniband/ulp/isert/ib_isert.c +++ b/drivers/infiniband/ulp/isert/ib_isert.c @@ -957,11 +957,6 @@ isert_handle_scsi_cmd(struct isert_conn *isert_conn, if (!rc && dump_payload == false && unsol_data) iscsit_set_unsoliticed_dataout(cmd); - if (rc == CMDSN_ERROR_CANNOT_RECOVER) - return iscsit_add_reject_from_cmd( - ISCSI_REASON_PROTOCOL_ERROR, - 1, 0, (unsigned char *)hdr, cmd); - return 0; } diff --git a/drivers/target/iscsi/iscsi_target.c b/drivers/target/iscsi/iscsi_target.c index f1db6ee71006..03ccb610d2bb 100644 --- a/drivers/target/iscsi/iscsi_target.c +++ b/drivers/target/iscsi/iscsi_target.c @@ -628,25 +628,18 @@ static void __exit iscsi_target_cleanup_module(void) } static int iscsit_add_reject( + struct iscsi_conn *conn, u8 reason, - int fail_conn, - unsigned char *buf, - struct iscsi_conn *conn) + unsigned char *buf) { struct iscsi_cmd *cmd; - struct iscsi_reject *hdr; - int ret; cmd = iscsit_allocate_cmd(conn, GFP_KERNEL); if (!cmd) return -1; cmd->iscsi_opcode = ISCSI_OP_REJECT; - if (fail_conn) - cmd->cmd_flags |= ICF_REJECT_FAIL_CONN; - - hdr = (struct iscsi_reject *) cmd->pdu; - hdr->reason = reason; + cmd->reject_reason = reason; cmd->buf_ptr = kmemdup(buf, ISCSI_HDR_LEN, GFP_KERNEL); if (!cmd->buf_ptr) { @@ -662,23 +655,16 @@ static int iscsit_add_reject( cmd->i_state = ISTATE_SEND_REJECT; iscsit_add_cmd_to_response_queue(cmd, conn, cmd->i_state); - ret = wait_for_completion_interruptible(&cmd->reject_comp); - if (ret != 0) - return -1; - - return (!fail_conn) ? 0 : -1; + return -1; } -int iscsit_add_reject_from_cmd( +static int iscsit_add_reject_from_cmd( + struct iscsi_cmd *cmd, u8 reason, - int fail_conn, - int add_to_conn, - unsigned char *buf, - struct iscsi_cmd *cmd) + bool add_to_conn, + unsigned char *buf) { struct iscsi_conn *conn; - struct iscsi_reject *hdr; - int ret; if (!cmd->conn) { pr_err("cmd->conn is NULL for ITT: 0x%08x\n", @@ -688,11 +674,7 @@ int iscsit_add_reject_from_cmd( conn = cmd->conn; cmd->iscsi_opcode = ISCSI_OP_REJECT; - if (fail_conn) - cmd->cmd_flags |= ICF_REJECT_FAIL_CONN; - - hdr = (struct iscsi_reject *) cmd->pdu; - hdr->reason = reason; + cmd->reject_reason = reason; cmd->buf_ptr = kmemdup(buf, ISCSI_HDR_LEN, GFP_KERNEL); if (!cmd->buf_ptr) { @@ -709,8 +691,6 @@ int iscsit_add_reject_from_cmd( cmd->i_state = ISTATE_SEND_REJECT; iscsit_add_cmd_to_response_queue(cmd, conn, cmd->i_state); - - ret = wait_for_completion_interruptible(&cmd->reject_comp); /* * Perform the kref_put now if se_cmd has already been setup by * scsit_setup_scsi_cmd() @@ -719,12 +699,19 @@ int iscsit_add_reject_from_cmd( pr_debug("iscsi reject: calling target_put_sess_cmd >>>>>>\n"); target_put_sess_cmd(conn->sess->se_sess, &cmd->se_cmd); } - if (ret != 0) - return -1; - - return (!fail_conn) ? 0 : -1; + return -1; +} + +static int iscsit_add_reject_cmd(struct iscsi_cmd *cmd, u8 reason, + unsigned char *buf) +{ + return iscsit_add_reject_from_cmd(cmd, reason, true, buf); +} + +int iscsit_reject_cmd(struct iscsi_cmd *cmd, u8 reason, unsigned char *buf) +{ + return iscsit_add_reject_from_cmd(cmd, reason, false, buf); } -EXPORT_SYMBOL(iscsit_add_reject_from_cmd); /* * Map some portion of the allocated scatterlist to an iovec, suitable for @@ -844,8 +831,8 @@ int iscsit_setup_scsi_cmd(struct iscsi_conn *conn, struct iscsi_cmd *cmd, !(hdr->flags & ISCSI_FLAG_CMD_FINAL)) { pr_err("ISCSI_FLAG_CMD_WRITE & ISCSI_FLAG_CMD_FINAL" " not set. Bad iSCSI Initiator.\n"); - return iscsit_add_reject_from_cmd(ISCSI_REASON_BOOKMARK_INVALID, - 1, 1, buf, cmd); + return iscsit_add_reject_cmd(cmd, + ISCSI_REASON_BOOKMARK_INVALID, buf); } if (((hdr->flags & ISCSI_FLAG_CMD_READ) || @@ -865,8 +852,8 @@ int iscsit_setup_scsi_cmd(struct iscsi_conn *conn, struct iscsi_cmd *cmd, pr_err("ISCSI_FLAG_CMD_READ or ISCSI_FLAG_CMD_WRITE" " set when Expected Data Transfer Length is 0 for" " CDB: 0x%02x. Bad iSCSI Initiator.\n", hdr->cdb[0]); - return iscsit_add_reject_from_cmd(ISCSI_REASON_BOOKMARK_INVALID, - 1, 1, buf, cmd); + return iscsit_add_reject_cmd(cmd, + ISCSI_REASON_BOOKMARK_INVALID, buf); } done: @@ -875,62 +862,62 @@ int iscsit_setup_scsi_cmd(struct iscsi_conn *conn, struct iscsi_cmd *cmd, pr_err("ISCSI_FLAG_CMD_READ and/or ISCSI_FLAG_CMD_WRITE" " MUST be set if Expected Data Transfer Length is not 0." " Bad iSCSI Initiator\n"); - return iscsit_add_reject_from_cmd(ISCSI_REASON_BOOKMARK_INVALID, - 1, 1, buf, cmd); + return iscsit_add_reject_cmd(cmd, + ISCSI_REASON_BOOKMARK_INVALID, buf); } if ((hdr->flags & ISCSI_FLAG_CMD_READ) && (hdr->flags & ISCSI_FLAG_CMD_WRITE)) { pr_err("Bidirectional operations not supported!\n"); - return iscsit_add_reject_from_cmd(ISCSI_REASON_BOOKMARK_INVALID, - 1, 1, buf, cmd); + return iscsit_add_reject_cmd(cmd, + ISCSI_REASON_BOOKMARK_INVALID, buf); } if (hdr->opcode & ISCSI_OP_IMMEDIATE) { pr_err("Illegally set Immediate Bit in iSCSI Initiator" " Scsi Command PDU.\n"); - return iscsit_add_reject_from_cmd(ISCSI_REASON_BOOKMARK_INVALID, - 1, 1, buf, cmd); + return iscsit_add_reject_cmd(cmd, + ISCSI_REASON_BOOKMARK_INVALID, buf); } if (payload_length && !conn->sess->sess_ops->ImmediateData) { pr_err("ImmediateData=No but DataSegmentLength=%u," " protocol error.\n", payload_length); - return iscsit_add_reject_from_cmd(ISCSI_REASON_PROTOCOL_ERROR, - 1, 1, buf, cmd); + return iscsit_add_reject_cmd(cmd, + ISCSI_REASON_PROTOCOL_ERROR, buf); } - if ((be32_to_cpu(hdr->data_length )== payload_length) && + if ((be32_to_cpu(hdr->data_length) == payload_length) && (!(hdr->flags & ISCSI_FLAG_CMD_FINAL))) { pr_err("Expected Data Transfer Length and Length of" " Immediate Data are the same, but ISCSI_FLAG_CMD_FINAL" " bit is not set protocol error\n"); - return iscsit_add_reject_from_cmd(ISCSI_REASON_PROTOCOL_ERROR, - 1, 1, buf, cmd); + return iscsit_add_reject_cmd(cmd, + ISCSI_REASON_PROTOCOL_ERROR, buf); } if (payload_length > be32_to_cpu(hdr->data_length)) { pr_err("DataSegmentLength: %u is greater than" " EDTL: %u, protocol error.\n", payload_length, hdr->data_length); - return iscsit_add_reject_from_cmd(ISCSI_REASON_PROTOCOL_ERROR, - 1, 1, buf, cmd); + return iscsit_add_reject_cmd(cmd, + ISCSI_REASON_PROTOCOL_ERROR, buf); } if (payload_length > conn->conn_ops->MaxXmitDataSegmentLength) { pr_err("DataSegmentLength: %u is greater than" " MaxXmitDataSegmentLength: %u, protocol error.\n", payload_length, conn->conn_ops->MaxXmitDataSegmentLength); - return iscsit_add_reject_from_cmd(ISCSI_REASON_PROTOCOL_ERROR, - 1, 1, buf, cmd); + return iscsit_add_reject_cmd(cmd, + ISCSI_REASON_PROTOCOL_ERROR, buf); } if (payload_length > conn->sess->sess_ops->FirstBurstLength) { pr_err("DataSegmentLength: %u is greater than" " FirstBurstLength: %u, protocol error.\n", payload_length, conn->sess->sess_ops->FirstBurstLength); - return iscsit_add_reject_from_cmd(ISCSI_REASON_BOOKMARK_INVALID, - 1, 1, buf, cmd); + return iscsit_add_reject_cmd(cmd, + ISCSI_REASON_BOOKMARK_INVALID, buf); } data_direction = (hdr->flags & ISCSI_FLAG_CMD_WRITE) ? DMA_TO_DEVICE : @@ -985,9 +972,8 @@ int iscsit_setup_scsi_cmd(struct iscsi_conn *conn, struct iscsi_cmd *cmd, dr = iscsit_allocate_datain_req(); if (!dr) - return iscsit_add_reject_from_cmd( - ISCSI_REASON_BOOKMARK_NO_RESOURCES, - 1, 1, buf, cmd); + return iscsit_add_reject_cmd(cmd, + ISCSI_REASON_BOOKMARK_NO_RESOURCES, buf); iscsit_attach_datain_req(cmd, dr); } @@ -1015,18 +1001,16 @@ int iscsit_setup_scsi_cmd(struct iscsi_conn *conn, struct iscsi_cmd *cmd, cmd->sense_reason = target_setup_cmd_from_cdb(&cmd->se_cmd, hdr->cdb); if (cmd->sense_reason) { if (cmd->sense_reason == TCM_OUT_OF_RESOURCES) { - return iscsit_add_reject_from_cmd( - ISCSI_REASON_BOOKMARK_NO_RESOURCES, - 1, 1, buf, cmd); + return iscsit_add_reject_cmd(cmd, + ISCSI_REASON_BOOKMARK_NO_RESOURCES, buf); } goto attach_cmd; } if (iscsit_build_pdu_and_seq_lists(cmd, payload_length) < 0) { - return iscsit_add_reject_from_cmd( - ISCSI_REASON_BOOKMARK_NO_RESOURCES, - 1, 1, buf, cmd); + return iscsit_add_reject_cmd(cmd, + ISCSI_REASON_BOOKMARK_NO_RESOURCES, buf); } attach_cmd: @@ -1075,10 +1059,6 @@ int iscsit_process_scsi_cmd(struct iscsi_conn *conn, struct iscsi_cmd *cmd, target_put_sess_cmd(conn->sess->se_sess, &cmd->se_cmd); return 0; - } else if (cmdsn_ret == CMDSN_ERROR_CANNOT_RECOVER) { - return iscsit_add_reject_from_cmd( - ISCSI_REASON_PROTOCOL_ERROR, - 1, 0, (unsigned char *)hdr, cmd); } } @@ -1149,11 +1129,6 @@ iscsit_get_immediate_data(struct iscsi_cmd *cmd, struct iscsi_scsi_req *hdr, } else if (cmd->unsolicited_data) iscsit_set_unsoliticed_dataout(cmd); - if (cmdsn_ret == CMDSN_ERROR_CANNOT_RECOVER) - return iscsit_add_reject_from_cmd( - ISCSI_REASON_PROTOCOL_ERROR, - 1, 0, (unsigned char *)hdr, cmd); - } else if (immed_ret == IMMEDIATE_DATA_ERL1_CRC_FAILURE) { /* * Immediate Data failed DataCRC and ERL>=1, @@ -1190,9 +1165,8 @@ iscsit_handle_scsi_cmd(struct iscsi_conn *conn, struct iscsi_cmd *cmd, * traditional iSCSI block I/O. */ if (iscsit_allocate_iovecs(cmd) < 0) { - return iscsit_add_reject_from_cmd( - ISCSI_REASON_BOOKMARK_NO_RESOURCES, - 1, 0, buf, cmd); + return iscsit_add_reject_cmd(cmd, + ISCSI_REASON_BOOKMARK_NO_RESOURCES, buf); } immed_data = cmd->immediate_data; @@ -1283,8 +1257,8 @@ iscsit_check_dataout_hdr(struct iscsi_conn *conn, unsigned char *buf, if (!payload_length) { pr_err("DataOUT payload is ZERO, protocol error.\n"); - return iscsit_add_reject(ISCSI_REASON_PROTOCOL_ERROR, 1, - buf, conn); + return iscsit_add_reject(conn, ISCSI_REASON_PROTOCOL_ERROR, + buf); } /* iSCSI write */ @@ -1301,8 +1275,8 @@ iscsit_check_dataout_hdr(struct iscsi_conn *conn, unsigned char *buf, pr_err("DataSegmentLength: %u is greater than" " MaxXmitDataSegmentLength: %u\n", payload_length, conn->conn_ops->MaxXmitDataSegmentLength); - return iscsit_add_reject(ISCSI_REASON_PROTOCOL_ERROR, 1, - buf, conn); + return iscsit_add_reject(conn, ISCSI_REASON_PROTOCOL_ERROR, + buf); } cmd = iscsit_find_cmd_from_itt_or_dump(conn, hdr->itt, @@ -1325,8 +1299,7 @@ iscsit_check_dataout_hdr(struct iscsi_conn *conn, unsigned char *buf, if (cmd->data_direction != DMA_TO_DEVICE) { pr_err("Command ITT: 0x%08x received DataOUT for a" " NON-WRITE command.\n", cmd->init_task_tag); - return iscsit_add_reject_from_cmd(ISCSI_REASON_PROTOCOL_ERROR, - 1, 0, buf, cmd); + return iscsit_reject_cmd(cmd, ISCSI_REASON_PROTOCOL_ERROR, buf); } se_cmd = &cmd->se_cmd; iscsit_mod_dataout_timer(cmd); @@ -1335,8 +1308,7 @@ iscsit_check_dataout_hdr(struct iscsi_conn *conn, unsigned char *buf, pr_err("DataOut Offset: %u, Length %u greater than" " iSCSI Command EDTL %u, protocol error.\n", hdr->offset, payload_length, cmd->se_cmd.data_length); - return iscsit_add_reject_from_cmd(ISCSI_REASON_BOOKMARK_INVALID, - 1, 0, buf, cmd); + return iscsit_reject_cmd(cmd, ISCSI_REASON_BOOKMARK_INVALID, buf); } if (cmd->unsolicited_data) { @@ -1557,8 +1529,8 @@ int iscsit_handle_nop_out(struct iscsi_conn *conn, struct iscsi_cmd *cmd, if (hdr->itt == RESERVED_ITT && !(hdr->opcode & ISCSI_OP_IMMEDIATE)) { pr_err("NOPOUT ITT is reserved, but Immediate Bit is" " not set, protocol error.\n"); - return iscsit_add_reject(ISCSI_REASON_PROTOCOL_ERROR, 1, - buf, conn); + return iscsit_reject_cmd(cmd, ISCSI_REASON_PROTOCOL_ERROR, + (unsigned char *)hdr); } if (payload_length > conn->conn_ops->MaxXmitDataSegmentLength) { @@ -1566,8 +1538,8 @@ int iscsit_handle_nop_out(struct iscsi_conn *conn, struct iscsi_cmd *cmd, " greater than MaxXmitDataSegmentLength: %u, protocol" " error.\n", payload_length, conn->conn_ops->MaxXmitDataSegmentLength); - return iscsit_add_reject(ISCSI_REASON_PROTOCOL_ERROR, 1, - buf, conn); + return iscsit_reject_cmd(cmd, ISCSI_REASON_PROTOCOL_ERROR, + (unsigned char *)hdr); } pr_debug("Got NOPOUT Ping %s ITT: 0x%08x, TTT: 0x%08x," @@ -1584,9 +1556,9 @@ int iscsit_handle_nop_out(struct iscsi_conn *conn, struct iscsi_cmd *cmd, */ if (hdr->ttt == cpu_to_be32(0xFFFFFFFF)) { if (!cmd) - return iscsit_add_reject( + return iscsit_reject_cmd(cmd, ISCSI_REASON_BOOKMARK_NO_RESOURCES, - 1, buf, conn); + (unsigned char *)hdr); cmd->iscsi_opcode = ISCSI_OP_NOOP_OUT; cmd->i_state = ISTATE_SEND_NOPIN; @@ -1706,9 +1678,7 @@ int iscsit_handle_nop_out(struct iscsi_conn *conn, struct iscsi_cmd *cmd, goto ping_out; } if (cmdsn_ret == CMDSN_ERROR_CANNOT_RECOVER) - return iscsit_add_reject_from_cmd( - ISCSI_REASON_PROTOCOL_ERROR, - 1, 0, buf, cmd); + return -1; return 0; } @@ -1782,8 +1752,8 @@ iscsit_handle_task_mgt_cmd(struct iscsi_conn *conn, struct iscsi_cmd *cmd, pr_err("Task Management Request TASK_REASSIGN not" " issued as immediate command, bad iSCSI Initiator" "implementation\n"); - return iscsit_add_reject_from_cmd(ISCSI_REASON_PROTOCOL_ERROR, - 1, 1, buf, cmd); + return iscsit_add_reject_cmd(cmd, + ISCSI_REASON_PROTOCOL_ERROR, buf); } if ((function != ISCSI_TM_FUNC_ABORT_TASK) && be32_to_cpu(hdr->refcmdsn) != ISCSI_RESERVED_TAG) @@ -1795,9 +1765,9 @@ iscsit_handle_task_mgt_cmd(struct iscsi_conn *conn, struct iscsi_cmd *cmd, if (!cmd->tmr_req) { pr_err("Unable to allocate memory for" " Task Management command!\n"); - return iscsit_add_reject_from_cmd( - ISCSI_REASON_BOOKMARK_NO_RESOURCES, - 1, 1, buf, cmd); + return iscsit_add_reject_cmd(cmd, + ISCSI_REASON_BOOKMARK_NO_RESOURCES, + buf); } /* @@ -1842,17 +1812,15 @@ iscsit_handle_task_mgt_cmd(struct iscsi_conn *conn, struct iscsi_cmd *cmd, default: pr_err("Unknown iSCSI TMR Function:" " 0x%02x\n", function); - return iscsit_add_reject_from_cmd( - ISCSI_REASON_BOOKMARK_NO_RESOURCES, - 1, 1, buf, cmd); + return iscsit_add_reject_cmd(cmd, + ISCSI_REASON_BOOKMARK_NO_RESOURCES, buf); } ret = core_tmr_alloc_req(&cmd->se_cmd, cmd->tmr_req, tcm_function, GFP_KERNEL); if (ret < 0) - return iscsit_add_reject_from_cmd( - ISCSI_REASON_BOOKMARK_NO_RESOURCES, - 1, 1, buf, cmd); + return iscsit_add_reject_cmd(cmd, + ISCSI_REASON_BOOKMARK_NO_RESOURCES, buf); cmd->tmr_req->se_tmr_req = cmd->se_cmd.se_tmr_req; } @@ -1911,9 +1879,8 @@ iscsit_handle_task_mgt_cmd(struct iscsi_conn *conn, struct iscsi_cmd *cmd, break; if (iscsit_check_task_reassign_expdatasn(tmr_req, conn) < 0) - return iscsit_add_reject_from_cmd( - ISCSI_REASON_BOOKMARK_INVALID, 1, 1, - buf, cmd); + return iscsit_add_reject_cmd(cmd, + ISCSI_REASON_BOOKMARK_INVALID, buf); break; default: pr_err("Unknown TMR function: 0x%02x, protocol" @@ -1937,9 +1904,7 @@ iscsit_handle_task_mgt_cmd(struct iscsi_conn *conn, struct iscsi_cmd *cmd, else if (cmdsn_ret == CMDSN_LOWER_THAN_EXP) return 0; else if (cmdsn_ret == CMDSN_ERROR_CANNOT_RECOVER) - return iscsit_add_reject_from_cmd( - ISCSI_REASON_PROTOCOL_ERROR, - 1, 0, buf, cmd); + return -1; } iscsit_ack_from_expstatsn(conn, be32_to_cpu(hdr->exp_statsn)); @@ -1989,8 +1954,7 @@ static int iscsit_handle_text_cmd( pr_err("Unable to accept text parameter length: %u" "greater than MaxXmitDataSegmentLength %u.\n", payload_length, conn->conn_ops->MaxXmitDataSegmentLength); - return iscsit_add_reject(ISCSI_REASON_PROTOCOL_ERROR, 1, - buf, conn); + return iscsit_add_reject(conn, ISCSI_REASON_PROTOCOL_ERROR, buf); } pr_debug("Got Text Request: ITT: 0x%08x, CmdSN: 0x%08x," @@ -2092,8 +2056,8 @@ static int iscsit_handle_text_cmd( cmd = iscsit_allocate_cmd(conn, GFP_KERNEL); if (!cmd) - return iscsit_add_reject(ISCSI_REASON_BOOKMARK_NO_RESOURCES, - 1, buf, conn); + return iscsit_add_reject(conn, + ISCSI_REASON_BOOKMARK_NO_RESOURCES, buf); cmd->iscsi_opcode = ISCSI_OP_TEXT; cmd->i_state = ISTATE_SEND_TEXTRSP; @@ -2113,9 +2077,7 @@ static int iscsit_handle_text_cmd( if (!(hdr->opcode & ISCSI_OP_IMMEDIATE)) { cmdsn_ret = iscsit_sequence_cmd(conn, cmd, hdr->cmdsn); if (cmdsn_ret == CMDSN_ERROR_CANNOT_RECOVER) - return iscsit_add_reject_from_cmd( - ISCSI_REASON_PROTOCOL_ERROR, - 1, 0, buf, cmd); + return -1; return 0; } @@ -2301,13 +2263,10 @@ iscsit_handle_logout_cmd(struct iscsi_conn *conn, struct iscsi_cmd *cmd, return ret; } else { cmdsn_ret = iscsit_sequence_cmd(conn, cmd, hdr->cmdsn); - if (cmdsn_ret == CMDSN_LOWER_THAN_EXP) { + if (cmdsn_ret == CMDSN_LOWER_THAN_EXP) logout_remove = 0; - } else if (cmdsn_ret == CMDSN_ERROR_CANNOT_RECOVER) { - return iscsit_add_reject_from_cmd( - ISCSI_REASON_PROTOCOL_ERROR, - 1, 0, buf, cmd); - } + else if (cmdsn_ret == CMDSN_ERROR_CANNOT_RECOVER) + return -1; } return logout_remove; @@ -2331,8 +2290,8 @@ static int iscsit_handle_snack( if (!conn->sess->sess_ops->ErrorRecoveryLevel) { pr_err("Initiator sent SNACK request while in" " ErrorRecoveryLevel=0.\n"); - return iscsit_add_reject(ISCSI_REASON_PROTOCOL_ERROR, 1, - buf, conn); + return iscsit_add_reject(conn, ISCSI_REASON_PROTOCOL_ERROR, + buf); } /* * SNACK_DATA and SNACK_R2T are both 0, so check which function to @@ -2356,13 +2315,13 @@ static int iscsit_handle_snack( case ISCSI_FLAG_SNACK_TYPE_RDATA: /* FIXME: Support R-Data SNACK */ pr_err("R-Data SNACK Not Supported.\n"); - return iscsit_add_reject(ISCSI_REASON_PROTOCOL_ERROR, 1, - buf, conn); + return iscsit_add_reject(conn, ISCSI_REASON_PROTOCOL_ERROR, + buf); default: pr_err("Unknown SNACK type 0x%02x, protocol" " error.\n", hdr->flags & 0x0f); - return iscsit_add_reject(ISCSI_REASON_PROTOCOL_ERROR, 1, - buf, conn); + return iscsit_add_reject(conn, ISCSI_REASON_PROTOCOL_ERROR, + buf); } return 0; @@ -2434,14 +2393,14 @@ static int iscsit_handle_immediate_data( pr_err("Unable to recover from" " Immediate Data digest failure while" " in ERL=0.\n"); - iscsit_add_reject_from_cmd( + iscsit_reject_cmd(cmd, ISCSI_REASON_DATA_DIGEST_ERROR, - 1, 0, (unsigned char *)hdr, cmd); + (unsigned char *)hdr); return IMMEDIATE_DATA_CANNOT_RECOVER; } else { - iscsit_add_reject_from_cmd( + iscsit_reject_cmd(cmd, ISCSI_REASON_DATA_DIGEST_ERROR, - 0, 0, (unsigned char *)hdr, cmd); + (unsigned char *)hdr); return IMMEDIATE_DATA_ERL1_CRC_FAILURE; } } else { @@ -3541,6 +3500,7 @@ iscsit_build_reject(struct iscsi_cmd *cmd, struct iscsi_conn *conn, struct iscsi_reject *hdr) { hdr->opcode = ISCSI_OP_REJECT; + hdr->reason = cmd->reject_reason; hdr->flags |= ISCSI_FLAG_CMD_FINAL; hton24(hdr->dlength, ISCSI_HDR_LEN); hdr->ffffffff = cpu_to_be32(0xffffffff); @@ -3814,18 +3774,11 @@ iscsit_response_queue(struct iscsi_conn *conn, struct iscsi_cmd *cmd, int state) case ISTATE_SEND_STATUS_RECOVERY: case ISTATE_SEND_TEXTRSP: case ISTATE_SEND_TASKMGTRSP: + case ISTATE_SEND_REJECT: spin_lock_bh(&cmd->istate_lock); cmd->i_state = ISTATE_SENT_STATUS; spin_unlock_bh(&cmd->istate_lock); break; - case ISTATE_SEND_REJECT: - if (cmd->cmd_flags & ICF_REJECT_FAIL_CONN) { - cmd->cmd_flags &= ~ICF_REJECT_FAIL_CONN; - complete(&cmd->reject_comp); - goto err; - } - complete(&cmd->reject_comp); - break; default: pr_err("Unknown Opcode: 0x%02x ITT:" " 0x%08x, i_state: %d on CID: %hu\n", @@ -3930,8 +3883,7 @@ static int iscsi_target_rx_opcode(struct iscsi_conn *conn, unsigned char *buf) case ISCSI_OP_SCSI_CMD: cmd = iscsit_allocate_cmd(conn, GFP_KERNEL); if (!cmd) - return iscsit_add_reject(ISCSI_REASON_BOOKMARK_NO_RESOURCES, - 1, buf, conn); + goto reject; ret = iscsit_handle_scsi_cmd(conn, cmd, buf); break; @@ -3943,16 +3895,14 @@ static int iscsi_target_rx_opcode(struct iscsi_conn *conn, unsigned char *buf) if (hdr->ttt == cpu_to_be32(0xFFFFFFFF)) { cmd = iscsit_allocate_cmd(conn, GFP_KERNEL); if (!cmd) - return iscsit_add_reject(ISCSI_REASON_BOOKMARK_NO_RESOURCES, - 1, buf, conn); + goto reject; } ret = iscsit_handle_nop_out(conn, cmd, buf); break; case ISCSI_OP_SCSI_TMFUNC: cmd = iscsit_allocate_cmd(conn, GFP_KERNEL); if (!cmd) - return iscsit_add_reject(ISCSI_REASON_BOOKMARK_NO_RESOURCES, - 1, buf, conn); + goto reject; ret = iscsit_handle_task_mgt_cmd(conn, cmd, buf); break; @@ -3962,8 +3912,7 @@ static int iscsi_target_rx_opcode(struct iscsi_conn *conn, unsigned char *buf) case ISCSI_OP_LOGOUT: cmd = iscsit_allocate_cmd(conn, GFP_KERNEL); if (!cmd) - return iscsit_add_reject(ISCSI_REASON_BOOKMARK_NO_RESOURCES, - 1, buf, conn); + goto reject; ret = iscsit_handle_logout_cmd(conn, cmd, buf); if (ret > 0) @@ -3995,6 +3944,8 @@ static int iscsi_target_rx_opcode(struct iscsi_conn *conn, unsigned char *buf) } return ret; +reject: + return iscsit_add_reject(conn, ISCSI_REASON_BOOKMARK_NO_RESOURCES, buf); } int iscsi_target_rx_thread(void *arg) @@ -4094,8 +4045,8 @@ int iscsi_target_rx_thread(void *arg) (!(opcode & ISCSI_OP_LOGOUT)))) { pr_err("Received illegal iSCSI Opcode: 0x%02x" " while in Discovery Session, rejecting.\n", opcode); - iscsit_add_reject(ISCSI_REASON_PROTOCOL_ERROR, 1, - buffer, conn); + iscsit_add_reject(conn, ISCSI_REASON_PROTOCOL_ERROR, + buffer); goto transport_err; } diff --git a/drivers/target/iscsi/iscsi_target.h b/drivers/target/iscsi/iscsi_target.h index a0050b2f294e..2c437cb8ca00 100644 --- a/drivers/target/iscsi/iscsi_target.h +++ b/drivers/target/iscsi/iscsi_target.h @@ -15,7 +15,7 @@ extern struct iscsi_np *iscsit_add_np(struct __kernel_sockaddr_storage *, extern int iscsit_reset_np_thread(struct iscsi_np *, struct iscsi_tpg_np *, struct iscsi_portal_group *); extern int iscsit_del_np(struct iscsi_np *); -extern int iscsit_add_reject_from_cmd(u8, int, int, unsigned char *, struct iscsi_cmd *); +extern int iscsit_reject_cmd(struct iscsi_cmd *cmd, u8, unsigned char *); extern void iscsit_set_unsoliticed_dataout(struct iscsi_cmd *); extern int iscsit_logout_closesession(struct iscsi_cmd *, struct iscsi_conn *); extern int iscsit_logout_closeconnection(struct iscsi_cmd *, struct iscsi_conn *); diff --git a/drivers/target/iscsi/iscsi_target_core.h b/drivers/target/iscsi/iscsi_target_core.h index 60ec4b92be03..8907dcdc0db9 100644 --- a/drivers/target/iscsi/iscsi_target_core.h +++ b/drivers/target/iscsi/iscsi_target_core.h @@ -132,7 +132,6 @@ enum cmd_flags_table { ICF_CONTIG_MEMORY = 0x00000020, ICF_ATTACHED_TO_RQUEUE = 0x00000040, ICF_OOO_CMDSN = 0x00000080, - ICF_REJECT_FAIL_CONN = 0x00000100, }; /* struct iscsi_cmd->i_state */ @@ -366,6 +365,8 @@ struct iscsi_cmd { u8 maxcmdsn_inc; /* Immediate Unsolicited Dataout */ u8 unsolicited_data; + /* Reject reason code */ + u8 reject_reason; /* CID contained in logout PDU when opcode == ISCSI_INIT_LOGOUT_CMND */ u16 logout_cid; /* Command flags */ @@ -446,7 +447,6 @@ struct iscsi_cmd { struct list_head datain_list; /* R2T List */ struct list_head cmd_r2t_list; - struct completion reject_comp; /* Timer for DataOUT */ struct timer_list dataout_timer; /* Iovecs for SCSI data payload RX/TX w/ kernel level sockets */ diff --git a/drivers/target/iscsi/iscsi_target_erl0.c b/drivers/target/iscsi/iscsi_target_erl0.c index 7f1116635d17..08bd87833321 100644 --- a/drivers/target/iscsi/iscsi_target_erl0.c +++ b/drivers/target/iscsi/iscsi_target_erl0.c @@ -746,13 +746,12 @@ int iscsit_check_post_dataout( if (!conn->sess->sess_ops->ErrorRecoveryLevel) { pr_err("Unable to recover from DataOUT CRC" " failure while ERL=0, closing session.\n"); - iscsit_add_reject_from_cmd(ISCSI_REASON_DATA_DIGEST_ERROR, - 1, 0, buf, cmd); + iscsit_reject_cmd(cmd, ISCSI_REASON_DATA_DIGEST_ERROR, + buf); return DATAOUT_CANNOT_RECOVER; } - iscsit_add_reject_from_cmd(ISCSI_REASON_DATA_DIGEST_ERROR, - 0, 0, buf, cmd); + iscsit_reject_cmd(cmd, ISCSI_REASON_DATA_DIGEST_ERROR, buf); return iscsit_dataout_post_crc_failed(cmd, buf); } } diff --git a/drivers/target/iscsi/iscsi_target_erl1.c b/drivers/target/iscsi/iscsi_target_erl1.c index 40d9dbca987b..d00f1326f0c8 100644 --- a/drivers/target/iscsi/iscsi_target_erl1.c +++ b/drivers/target/iscsi/iscsi_target_erl1.c @@ -162,9 +162,8 @@ static int iscsit_handle_r2t_snack( " protocol error.\n", cmd->init_task_tag, begrun, (begrun + runlength), cmd->acked_data_sn); - return iscsit_add_reject_from_cmd( - ISCSI_REASON_PROTOCOL_ERROR, - 1, 0, buf, cmd); + return iscsit_reject_cmd(cmd, + ISCSI_REASON_PROTOCOL_ERROR, buf); } if (runlength) { @@ -173,8 +172,8 @@ static int iscsit_handle_r2t_snack( " with BegRun: 0x%08x, RunLength: 0x%08x, exceeds" " current R2TSN: 0x%08x, protocol error.\n", cmd->init_task_tag, begrun, runlength, cmd->r2t_sn); - return iscsit_add_reject_from_cmd( - ISCSI_REASON_BOOKMARK_INVALID, 1, 0, buf, cmd); + return iscsit_reject_cmd(cmd, + ISCSI_REASON_BOOKMARK_INVALID, buf); } last_r2tsn = (begrun + runlength); } else @@ -433,8 +432,7 @@ static int iscsit_handle_recovery_datain( " protocol error.\n", cmd->init_task_tag, begrun, (begrun + runlength), cmd->acked_data_sn); - return iscsit_add_reject_from_cmd(ISCSI_REASON_PROTOCOL_ERROR, - 1, 0, buf, cmd); + return iscsit_reject_cmd(cmd, ISCSI_REASON_PROTOCOL_ERROR, buf); } /* @@ -445,14 +443,14 @@ static int iscsit_handle_recovery_datain( pr_err("Initiator requesting BegRun: 0x%08x, RunLength" ": 0x%08x greater than maximum DataSN: 0x%08x.\n", begrun, runlength, (cmd->data_sn - 1)); - return iscsit_add_reject_from_cmd(ISCSI_REASON_BOOKMARK_INVALID, - 1, 0, buf, cmd); + return iscsit_reject_cmd(cmd, ISCSI_REASON_BOOKMARK_INVALID, + buf); } dr = iscsit_allocate_datain_req(); if (!dr) - return iscsit_add_reject_from_cmd(ISCSI_REASON_BOOKMARK_NO_RESOURCES, - 1, 0, buf, cmd); + return iscsit_reject_cmd(cmd, ISCSI_REASON_BOOKMARK_NO_RESOURCES, + buf); dr->data_sn = dr->begrun = begrun; dr->runlength = runlength; diff --git a/drivers/target/iscsi/iscsi_target_util.c b/drivers/target/iscsi/iscsi_target_util.c index 08a3bacef0c5..6374a1a98d6b 100644 --- a/drivers/target/iscsi/iscsi_target_util.c +++ b/drivers/target/iscsi/iscsi_target_util.c @@ -178,7 +178,6 @@ struct iscsi_cmd *iscsit_allocate_cmd(struct iscsi_conn *conn, gfp_t gfp_mask) INIT_LIST_HEAD(&cmd->i_conn_node); INIT_LIST_HEAD(&cmd->datain_list); INIT_LIST_HEAD(&cmd->cmd_r2t_list); - init_completion(&cmd->reject_comp); spin_lock_init(&cmd->datain_lock); spin_lock_init(&cmd->dataout_timeout_lock); spin_lock_init(&cmd->istate_lock); diff --git a/include/target/iscsi/iscsi_transport.h b/include/target/iscsi/iscsi_transport.h index 6db632eb3821..c8da4dd11d26 100644 --- a/include/target/iscsi/iscsi_transport.h +++ b/include/target/iscsi/iscsi_transport.h @@ -34,8 +34,6 @@ extern void iscsit_put_transport(struct iscsit_transport *); /* * From iscsi_target.c */ -extern int iscsit_add_reject_from_cmd(u8, int, int, unsigned char *, - struct iscsi_cmd *); extern int iscsit_setup_scsi_cmd(struct iscsi_conn *, struct iscsi_cmd *, unsigned char *); extern void iscsit_set_unsoliticed_dataout(struct iscsi_cmd *); From adb97c299904814edb0bb26ae894139ca46ae446 Mon Sep 17 00:00:00 2001 From: Nicholas Bellinger Date: Tue, 30 Jul 2013 04:04:02 +0000 Subject: [PATCH 0371/1368] iscsi-target: Fix iscsit_sequence_cmd reject handling for iser commit 561bf15892375597ee59d473a704a3e634c4f311 upstream This patch moves ISCSI_OP_REJECT failures into iscsit_sequence_cmd() in order to avoid external iscsit_reject_cmd() reject usage for all PDU types. It also updates PDU specific handlers for traditional iscsi-target code to not reset the session after posting a ISCSI_OP_REJECT during setup. (v2: Fix CMDSN_LOWER_THAN_EXP for ISCSI_OP_SCSI to call target_put_sess_cmd() after iscsit_sequence_cmd() failure) Signed-off-by: Nicholas Bellinger Cc: Or Gerlitz Cc: Mike Christie Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/ulp/isert/ib_isert.c | 2 +- drivers/target/iscsi/iscsi_target.c | 50 +++++++++++++++--------- drivers/target/iscsi/iscsi_target_erl1.c | 6 +-- drivers/target/iscsi/iscsi_target_util.c | 26 +++++++++--- drivers/target/iscsi/iscsi_target_util.h | 3 +- include/target/iscsi/iscsi_transport.h | 3 +- 6 files changed, 60 insertions(+), 30 deletions(-) diff --git a/drivers/infiniband/ulp/isert/ib_isert.c b/drivers/infiniband/ulp/isert/ib_isert.c index 4f6faf0915d5..5849dc0726b9 100644 --- a/drivers/infiniband/ulp/isert/ib_isert.c +++ b/drivers/infiniband/ulp/isert/ib_isert.c @@ -952,7 +952,7 @@ isert_handle_scsi_cmd(struct isert_conn *isert_conn, } sequence_cmd: - rc = iscsit_sequence_cmd(conn, cmd, hdr->cmdsn); + rc = iscsit_sequence_cmd(conn, cmd, buf, hdr->cmdsn); if (!rc && dump_payload == false && unsol_data) iscsit_set_unsoliticed_dataout(cmd); diff --git a/drivers/target/iscsi/iscsi_target.c b/drivers/target/iscsi/iscsi_target.c index 03ccb610d2bb..012ff8bab0f8 100644 --- a/drivers/target/iscsi/iscsi_target.c +++ b/drivers/target/iscsi/iscsi_target.c @@ -1052,11 +1052,11 @@ int iscsit_process_scsi_cmd(struct iscsi_conn *conn, struct iscsi_cmd *cmd, * be acknowledged. (See below) */ if (!cmd->immediate_data) { - cmdsn_ret = iscsit_sequence_cmd(conn, cmd, hdr->cmdsn); - if (cmdsn_ret == CMDSN_LOWER_THAN_EXP) { - if (!cmd->sense_reason) - return 0; - + cmdsn_ret = iscsit_sequence_cmd(conn, cmd, + (unsigned char *)hdr, hdr->cmdsn); + if (cmdsn_ret == CMDSN_ERROR_CANNOT_RECOVER) + return -1; + else if (cmdsn_ret == CMDSN_LOWER_THAN_EXP) { target_put_sess_cmd(conn->sess->se_sess, &cmd->se_cmd); return 0; } @@ -1083,6 +1083,9 @@ int iscsit_process_scsi_cmd(struct iscsi_conn *conn, struct iscsi_cmd *cmd, * iscsit_check_received_cmdsn() in iscsit_get_immediate_data() below. */ if (cmd->sense_reason) { + if (cmd->reject_reason) + return 0; + target_put_sess_cmd(conn->sess->se_sess, &cmd->se_cmd); return 1; } @@ -1091,10 +1094,8 @@ int iscsit_process_scsi_cmd(struct iscsi_conn *conn, struct iscsi_cmd *cmd, * the backend memory allocation. */ cmd->sense_reason = transport_generic_new_cmd(&cmd->se_cmd); - if (cmd->sense_reason) { - target_put_sess_cmd(conn->sess->se_sess, &cmd->se_cmd); + if (cmd->sense_reason) return 1; - } return 0; } @@ -1104,6 +1105,7 @@ static int iscsit_get_immediate_data(struct iscsi_cmd *cmd, struct iscsi_scsi_req *hdr, bool dump_payload) { + struct iscsi_conn *conn = cmd->conn; int cmdsn_ret = 0, immed_ret = IMMEDIATE_DATA_NORMAL_OPERATION; /* * Special case for Unsupported SAM WRITE Opcodes and ImmediateData=Yes. @@ -1120,12 +1122,22 @@ iscsit_get_immediate_data(struct iscsi_cmd *cmd, struct iscsi_scsi_req *hdr, * DataCRC, check against ExpCmdSN/MaxCmdSN if * Immediate Bit is not set. */ - cmdsn_ret = iscsit_sequence_cmd(cmd->conn, cmd, hdr->cmdsn); + cmdsn_ret = iscsit_sequence_cmd(cmd->conn, cmd, + (unsigned char *)hdr, hdr->cmdsn); + if (cmdsn_ret == CMDSN_ERROR_CANNOT_RECOVER) { + return -1; + } else if (cmdsn_ret == CMDSN_LOWER_THAN_EXP) { + target_put_sess_cmd(conn->sess->se_sess, &cmd->se_cmd); + return 0; + } if (cmd->sense_reason) { - if (iscsit_dump_data_payload(cmd->conn, - cmd->first_burst_len, 1) < 0) - return -1; + int rc; + + rc = iscsit_dump_data_payload(cmd->conn, + cmd->first_burst_len, 1); + target_put_sess_cmd(conn->sess->se_sess, &cmd->se_cmd); + return rc; } else if (cmd->unsolicited_data) iscsit_set_unsoliticed_dataout(cmd); @@ -1159,7 +1171,7 @@ iscsit_handle_scsi_cmd(struct iscsi_conn *conn, struct iscsi_cmd *cmd, rc = iscsit_setup_scsi_cmd(conn, cmd, buf); if (rc < 0) - return rc; + return 0; /* * Allocation iovecs needed for struct socket operations for * traditional iSCSI block I/O. @@ -1500,7 +1512,7 @@ static int iscsit_handle_data_out(struct iscsi_conn *conn, unsigned char *buf) rc = iscsit_check_dataout_hdr(conn, buf, &cmd); if (rc < 0) - return rc; + return 0; else if (!cmd) return 0; @@ -1672,7 +1684,8 @@ int iscsit_handle_nop_out(struct iscsi_conn *conn, struct iscsi_cmd *cmd, return 0; } - cmdsn_ret = iscsit_sequence_cmd(conn, cmd, hdr->cmdsn); + cmdsn_ret = iscsit_sequence_cmd(conn, cmd, + (unsigned char *)hdr, hdr->cmdsn); if (cmdsn_ret == CMDSN_LOWER_THAN_EXP) { ret = 0; goto ping_out; @@ -1898,7 +1911,7 @@ iscsit_handle_task_mgt_cmd(struct iscsi_conn *conn, struct iscsi_cmd *cmd, spin_unlock_bh(&conn->cmd_lock); if (!(hdr->opcode & ISCSI_OP_IMMEDIATE)) { - int cmdsn_ret = iscsit_sequence_cmd(conn, cmd, hdr->cmdsn); + int cmdsn_ret = iscsit_sequence_cmd(conn, cmd, buf, hdr->cmdsn); if (cmdsn_ret == CMDSN_HIGHER_THAN_EXP) out_of_order_cmdsn = 1; else if (cmdsn_ret == CMDSN_LOWER_THAN_EXP) @@ -2075,7 +2088,8 @@ static int iscsit_handle_text_cmd( iscsit_ack_from_expstatsn(conn, be32_to_cpu(hdr->exp_statsn)); if (!(hdr->opcode & ISCSI_OP_IMMEDIATE)) { - cmdsn_ret = iscsit_sequence_cmd(conn, cmd, hdr->cmdsn); + cmdsn_ret = iscsit_sequence_cmd(conn, cmd, + (unsigned char *)hdr, hdr->cmdsn); if (cmdsn_ret == CMDSN_ERROR_CANNOT_RECOVER) return -1; @@ -2262,7 +2276,7 @@ iscsit_handle_logout_cmd(struct iscsi_conn *conn, struct iscsi_cmd *cmd, if (ret < 0) return ret; } else { - cmdsn_ret = iscsit_sequence_cmd(conn, cmd, hdr->cmdsn); + cmdsn_ret = iscsit_sequence_cmd(conn, cmd, buf, hdr->cmdsn); if (cmdsn_ret == CMDSN_LOWER_THAN_EXP) logout_remove = 0; else if (cmdsn_ret == CMDSN_ERROR_CANNOT_RECOVER) diff --git a/drivers/target/iscsi/iscsi_target_erl1.c b/drivers/target/iscsi/iscsi_target_erl1.c index d00f1326f0c8..586c268679a4 100644 --- a/drivers/target/iscsi/iscsi_target_erl1.c +++ b/drivers/target/iscsi/iscsi_target_erl1.c @@ -1088,7 +1088,7 @@ int iscsit_handle_ooo_cmdsn( ooo_cmdsn = iscsit_allocate_ooo_cmdsn(); if (!ooo_cmdsn) - return CMDSN_ERROR_CANNOT_RECOVER; + return -ENOMEM; ooo_cmdsn->cmd = cmd; ooo_cmdsn->batch_count = (batch) ? @@ -1099,10 +1099,10 @@ int iscsit_handle_ooo_cmdsn( if (iscsit_attach_ooo_cmdsn(sess, ooo_cmdsn) < 0) { kmem_cache_free(lio_ooo_cache, ooo_cmdsn); - return CMDSN_ERROR_CANNOT_RECOVER; + return -ENOMEM; } - return CMDSN_HIGHER_THAN_EXP; + return 0; } static int iscsit_set_dataout_timeout_values( diff --git a/drivers/target/iscsi/iscsi_target_util.c b/drivers/target/iscsi/iscsi_target_util.c index 6374a1a98d6b..96e7fdbba9fc 100644 --- a/drivers/target/iscsi/iscsi_target_util.c +++ b/drivers/target/iscsi/iscsi_target_util.c @@ -283,13 +283,12 @@ static inline int iscsit_check_received_cmdsn(struct iscsi_session *sess, u32 cm * Commands may be received out of order if MC/S is in use. * Ensure they are executed in CmdSN order. */ -int iscsit_sequence_cmd( - struct iscsi_conn *conn, - struct iscsi_cmd *cmd, - __be32 cmdsn) +int iscsit_sequence_cmd(struct iscsi_conn *conn, struct iscsi_cmd *cmd, + unsigned char *buf, __be32 cmdsn) { - int ret; - int cmdsn_ret; + int ret, cmdsn_ret; + bool reject = false; + u8 reason = ISCSI_REASON_BOOKMARK_NO_RESOURCES; mutex_lock(&conn->sess->cmdsn_mutex); @@ -299,9 +298,19 @@ int iscsit_sequence_cmd( ret = iscsit_execute_cmd(cmd, 0); if ((ret >= 0) && !list_empty(&conn->sess->sess_ooo_cmdsn_list)) iscsit_execute_ooo_cmdsns(conn->sess); + else if (ret < 0) { + reject = true; + ret = CMDSN_ERROR_CANNOT_RECOVER; + } break; case CMDSN_HIGHER_THAN_EXP: ret = iscsit_handle_ooo_cmdsn(conn->sess, cmd, be32_to_cpu(cmdsn)); + if (ret < 0) { + reject = true; + ret = CMDSN_ERROR_CANNOT_RECOVER; + break; + } + ret = CMDSN_HIGHER_THAN_EXP; break; case CMDSN_LOWER_THAN_EXP: cmd->i_state = ISTATE_REMOVE; @@ -309,11 +318,16 @@ int iscsit_sequence_cmd( ret = cmdsn_ret; break; default: + reason = ISCSI_REASON_PROTOCOL_ERROR; + reject = true; ret = cmdsn_ret; break; } mutex_unlock(&conn->sess->cmdsn_mutex); + if (reject) + iscsit_reject_cmd(cmd, reason, buf); + return ret; } EXPORT_SYMBOL(iscsit_sequence_cmd); diff --git a/drivers/target/iscsi/iscsi_target_util.h b/drivers/target/iscsi/iscsi_target_util.h index a4422659d049..e4fc34a02f57 100644 --- a/drivers/target/iscsi/iscsi_target_util.h +++ b/drivers/target/iscsi/iscsi_target_util.h @@ -13,7 +13,8 @@ extern struct iscsi_cmd *iscsit_allocate_cmd(struct iscsi_conn *, gfp_t); extern struct iscsi_seq *iscsit_get_seq_holder_for_datain(struct iscsi_cmd *, u32); extern struct iscsi_seq *iscsit_get_seq_holder_for_r2t(struct iscsi_cmd *); extern struct iscsi_r2t *iscsit_get_holder_for_r2tsn(struct iscsi_cmd *, u32); -int iscsit_sequence_cmd(struct iscsi_conn *conn, struct iscsi_cmd *cmd, __be32 cmdsn); +extern int iscsit_sequence_cmd(struct iscsi_conn *conn, struct iscsi_cmd *cmd, + unsigned char * ,__be32 cmdsn); extern int iscsit_check_unsolicited_dataout(struct iscsi_cmd *, unsigned char *); extern struct iscsi_cmd *iscsit_find_cmd_from_itt(struct iscsi_conn *, itt_t); extern struct iscsi_cmd *iscsit_find_cmd_from_itt_or_dump(struct iscsi_conn *, diff --git a/include/target/iscsi/iscsi_transport.h b/include/target/iscsi/iscsi_transport.h index c8da4dd11d26..c5aade523863 100644 --- a/include/target/iscsi/iscsi_transport.h +++ b/include/target/iscsi/iscsi_transport.h @@ -82,4 +82,5 @@ extern int iscsit_tmr_post_handler(struct iscsi_cmd *, struct iscsi_conn *); * From iscsi_target_util.c */ extern struct iscsi_cmd *iscsit_allocate_cmd(struct iscsi_conn *, gfp_t); -extern int iscsit_sequence_cmd(struct iscsi_conn *, struct iscsi_cmd *, __be32); +extern int iscsit_sequence_cmd(struct iscsi_conn *, struct iscsi_cmd *, + unsigned char *, __be32); From 2652eb4444fc2965a428a07123b877be1637711a Mon Sep 17 00:00:00 2001 From: Michael Witten Date: Wed, 17 Apr 2013 02:23:16 +0000 Subject: [PATCH 0372/1368] perf tools: Revert regression in configuration of Python support commit a363a9da65d253fa7354ce5fd630f4f94df934cc upstream. Among other things, the following: commit 31160d7feab786c991780d7f0ce2755a469e0e5e Date: Tue Jan 8 16:22:36 2013 -0500 perf tools: Fix GNU make v3.80 compatibility issue attempts to aid the user by tapping into an existing error message, as described in the commit message: ... Also fix an issue where _get_attempt was called with only one argument. This prevented the error message from printing the name of the variable that can be used to fix the problem. or more precisely: -$(if $($(1)),$(call _ge_attempt,$($(1)),$(1)),$(call _ge_attempt,$(2))) +$(if $($(1)),$(call _ge_attempt,$($(1)),$(1)),$(call _ge_attempt,$(2),$(1))) However, The "missing" argument was in fact missing on purpose; it's absence is a signal that the error message should be skipped, because the failure would be due to the default value, not any user-supplied value. This can be seen in how `_ge_attempt' uses `gea_err' (in the config/utilities.mak file): _ge_attempt = $(if $(get-executable),$(get-executable),$(_gea_warn)$(call _gea_err,$(2))) _gea_warn = $(warning The path '$(1)' is not executable.) _gea_err = $(if $(1),$(error Please set '$(1)' appropriately)) That is, because the argument is no longer missing, the value `$(1)' (associated with `_gea_err') always evaluates to true, thus always triggering the error condition that is meant to be reserved for only the case when a user explicitly supplies an invalid value. Concretely, the result is a regression in the Makefile's configuration of python support; rather than gracefully disable support when the relevant executables cannot be found according to default values, the build process halts in error as though the user explicitly supplied the values. This new commit simply reverts the offending one-line change. Reported-by: Pekka Enberg Link: http://lkml.kernel.org/r/CAOJsxLHv17Ys3M7P5q25imkUxQW6LE_vABxh1N3Tt7Mv6Ho4iw@mail.gmail.com Signed-off-by: Michael Witten Cc: Mark Brown Signed-off-by: Greg Kroah-Hartman --- tools/perf/config/utilities.mak | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/config/utilities.mak b/tools/perf/config/utilities.mak index 8ef3bd30a549..3e897198d1f7 100644 --- a/tools/perf/config/utilities.mak +++ b/tools/perf/config/utilities.mak @@ -173,7 +173,7 @@ _ge-abspath = $(if $(is-executable),$(1)) # Usage: absolute-executable-path-or-empty = $(call get-executable-or-default,variable,default) # define get-executable-or-default -$(if $($(1)),$(call _ge_attempt,$($(1)),$(1)),$(call _ge_attempt,$(2),$(1))) +$(if $($(1)),$(call _ge_attempt,$($(1)),$(1)),$(call _ge_attempt,$(2))) endef _ge_attempt = $(if $(get-executable),$(get-executable),$(_gea_warn)$(call _gea_err,$(2))) _gea_warn = $(warning The path '$(1)' is not executable.) From 0915f45b1275f392665df955e2e93ec459dac538 Mon Sep 17 00:00:00 2001 From: Xiong Zhang Date: Fri, 5 Jul 2013 18:53:29 +0800 Subject: [PATCH 0373/1368] drm/i915: Correct obj->mm_list link to dev_priv->dev_priv->mm.inactive_list commit 067556084a0e412013af6b0250a3143ae5afde6d upstream. obj->mm_list link to dev_priv->mm.inactive_list/active_list obj->global_list link to dev_priv->mm.unbound_list/bound_list This regression has been introduced in commit 93927ca52a55c23e0a6a305e7e9082e8411ac9fa Author: Daniel Vetter Date: Thu Jan 10 18:03:00 2013 +0100 drm/i915: Revert shrinker changes from "Track unbound pages" Cc: stable@vger.kernel.org Signed-off-by: Xiong Zhang [danvet: Add regression notice.] Signed-off-by: Daniel Vetter Signed-off-by: Greg Kroah-Hartman Signed-off-by: Zhouping Liu --- drivers/gpu/drm/i915/i915_gem.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 894a285e4d84..0a30088178b0 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -4493,7 +4493,7 @@ i915_gem_inactive_shrink(struct shrinker *shrinker, struct shrink_control *sc) list_for_each_entry(obj, &dev_priv->mm.unbound_list, gtt_list) if (obj->pages_pin_count == 0) cnt += obj->base.size >> PAGE_SHIFT; - list_for_each_entry(obj, &dev_priv->mm.inactive_list, gtt_list) + list_for_each_entry(obj, &dev_priv->mm.inactive_list, mm_list) if (obj->pin_count == 0 && obj->pages_pin_count == 0) cnt += obj->base.size >> PAGE_SHIFT; From b185162885bc2346101a7b79c7737d2683b44782 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Thu, 13 Jun 2013 15:33:35 -0700 Subject: [PATCH 0374/1368] x86: Fix /proc/mtrr with base/size more than 44bits commit d5c78673b1b28467354c2c30c3d4f003666ff385 upstream. On one sytem that mtrr range is more then 44bits, in dmesg we have [ 0.000000] MTRR default type: write-back [ 0.000000] MTRR fixed ranges enabled: [ 0.000000] 00000-9FFFF write-back [ 0.000000] A0000-BFFFF uncachable [ 0.000000] C0000-DFFFF write-through [ 0.000000] E0000-FFFFF write-protect [ 0.000000] MTRR variable ranges enabled: [ 0.000000] 0 [000080000000-0000FFFFFFFF] mask 3FFF80000000 uncachable [ 0.000000] 1 [380000000000-38FFFFFFFFFF] mask 3F0000000000 uncachable [ 0.000000] 2 [000099000000-000099FFFFFF] mask 3FFFFF000000 write-through [ 0.000000] 3 [00009A000000-00009AFFFFFF] mask 3FFFFF000000 write-through [ 0.000000] 4 [381FFA000000-381FFBFFFFFF] mask 3FFFFE000000 write-through [ 0.000000] 5 [381FFC000000-381FFC0FFFFF] mask 3FFFFFF00000 write-through [ 0.000000] 6 [0000AD000000-0000ADFFFFFF] mask 3FFFFF000000 write-through [ 0.000000] 7 [0000BD000000-0000BDFFFFFF] mask 3FFFFF000000 write-through [ 0.000000] 8 disabled [ 0.000000] 9 disabled but /proc/mtrr report wrong: reg00: base=0x080000000 ( 2048MB), size= 2048MB, count=1: uncachable reg01: base=0x80000000000 (8388608MB), size=1048576MB, count=1: uncachable reg02: base=0x099000000 ( 2448MB), size= 16MB, count=1: write-through reg03: base=0x09a000000 ( 2464MB), size= 16MB, count=1: write-through reg04: base=0x81ffa000000 (8519584MB), size= 32MB, count=1: write-through reg05: base=0x81ffc000000 (8519616MB), size= 1MB, count=1: write-through reg06: base=0x0ad000000 ( 2768MB), size= 16MB, count=1: write-through reg07: base=0x0bd000000 ( 3024MB), size= 16MB, count=1: write-through reg08: base=0x09b000000 ( 2480MB), size= 16MB, count=1: write-combining so bit 44 and bit 45 get cut off. We have problems in arch/x86/kernel/cpu/mtrr/generic.c::generic_get_mtrr(). 1. for base, we miss cast base_lo to 64bit before shifting. Fix that by adding u64 casting. 2. for size, it only can handle 44 bits aka 32bits + page_shift Fix that with 64bit mask instead of 32bit mask_lo, then range could be more than 44bits. At the same time, we need to update size_or_mask for old cpus that does support cpuid 0x80000008 to get phys_addr. Need to set high 32bits to all 1s, otherwise will not get correct size for them. Also fix mtrr_add_page: it should check base and (base + size - 1) instead of base and size, as base and size could be small but base + size could bigger enough to be out of boundary. We can use boot_cpu_data.x86_phys_bits directly to avoid size_or_mask. So When are we going to have size more than 44bits? that is 16TiB. after patch we have right ouput: reg00: base=0x080000000 ( 2048MB), size= 2048MB, count=1: uncachable reg01: base=0x380000000000 (58720256MB), size=1048576MB, count=1: uncachable reg02: base=0x099000000 ( 2448MB), size= 16MB, count=1: write-through reg03: base=0x09a000000 ( 2464MB), size= 16MB, count=1: write-through reg04: base=0x381ffa000000 (58851232MB), size= 32MB, count=1: write-through reg05: base=0x381ffc000000 (58851264MB), size= 1MB, count=1: write-through reg06: base=0x0ad000000 ( 2768MB), size= 16MB, count=1: write-through reg07: base=0x0bd000000 ( 3024MB), size= 16MB, count=1: write-through reg08: base=0x09b000000 ( 2480MB), size= 16MB, count=1: write-combining -v2: simply checking in mtrr_add_page according to hpa. [ hpa: This probably wants to go into -stable only after having sat in mainline for a bit. It is not a regression. ] Signed-off-by: Yinghai Lu Link: http://lkml.kernel.org/r/1371162815-29931-1-git-send-email-yinghai@kernel.org Signed-off-by: H. Peter Anvin Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/mtrr/generic.c | 21 +++++++++++---------- arch/x86/kernel/cpu/mtrr/main.c | 16 +++++++++------- 2 files changed, 20 insertions(+), 17 deletions(-) diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c index fa72a39e5d46..3982357de5b0 100644 --- a/arch/x86/kernel/cpu/mtrr/generic.c +++ b/arch/x86/kernel/cpu/mtrr/generic.c @@ -510,8 +510,9 @@ generic_get_free_region(unsigned long base, unsigned long size, int replace_reg) static void generic_get_mtrr(unsigned int reg, unsigned long *base, unsigned long *size, mtrr_type *type) { - unsigned int mask_lo, mask_hi, base_lo, base_hi; - unsigned int tmp, hi; + u32 mask_lo, mask_hi, base_lo, base_hi; + unsigned int hi; + u64 tmp, mask; /* * get_mtrr doesn't need to update mtrr_state, also it could be called @@ -532,18 +533,18 @@ static void generic_get_mtrr(unsigned int reg, unsigned long *base, rdmsr(MTRRphysBase_MSR(reg), base_lo, base_hi); /* Work out the shifted address mask: */ - tmp = mask_hi << (32 - PAGE_SHIFT) | mask_lo >> PAGE_SHIFT; - mask_lo = size_or_mask | tmp; + tmp = (u64)mask_hi << (32 - PAGE_SHIFT) | mask_lo >> PAGE_SHIFT; + mask = size_or_mask | tmp; /* Expand tmp with high bits to all 1s: */ - hi = fls(tmp); + hi = fls64(tmp); if (hi > 0) { - tmp |= ~((1<<(hi - 1)) - 1); + tmp |= ~((1ULL<<(hi - 1)) - 1); - if (tmp != mask_lo) { + if (tmp != mask) { printk(KERN_WARNING "mtrr: your BIOS has configured an incorrect mask, fixing it.\n"); add_taint(TAINT_FIRMWARE_WORKAROUND, LOCKDEP_STILL_OK); - mask_lo = tmp; + mask = tmp; } } @@ -551,8 +552,8 @@ static void generic_get_mtrr(unsigned int reg, unsigned long *base, * This works correctly if size is a power of two, i.e. a * contiguous range: */ - *size = -mask_lo; - *base = base_hi << (32 - PAGE_SHIFT) | base_lo >> PAGE_SHIFT; + *size = -mask; + *base = (u64)base_hi << (32 - PAGE_SHIFT) | base_lo >> PAGE_SHIFT; *type = base_lo & 0xff; out_put_cpu: diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c index 726bf963c227..ca22b73aaa25 100644 --- a/arch/x86/kernel/cpu/mtrr/main.c +++ b/arch/x86/kernel/cpu/mtrr/main.c @@ -305,7 +305,8 @@ int mtrr_add_page(unsigned long base, unsigned long size, return -EINVAL; } - if (base & size_or_mask || size & size_or_mask) { + if ((base | (base + size - 1)) >> + (boot_cpu_data.x86_phys_bits - PAGE_SHIFT)) { pr_warning("mtrr: base or size exceeds the MTRR width\n"); return -EINVAL; } @@ -583,6 +584,7 @@ static struct syscore_ops mtrr_syscore_ops = { int __initdata changed_by_mtrr_cleanup; +#define SIZE_OR_MASK_BITS(n) (~((1ULL << ((n) - PAGE_SHIFT)) - 1)) /** * mtrr_bp_init - initialize mtrrs on the boot CPU * @@ -600,7 +602,7 @@ void __init mtrr_bp_init(void) if (cpu_has_mtrr) { mtrr_if = &generic_mtrr_ops; - size_or_mask = 0xff000000; /* 36 bits */ + size_or_mask = SIZE_OR_MASK_BITS(36); size_and_mask = 0x00f00000; phys_addr = 36; @@ -619,7 +621,7 @@ void __init mtrr_bp_init(void) boot_cpu_data.x86_mask == 0x4)) phys_addr = 36; - size_or_mask = ~((1ULL << (phys_addr - PAGE_SHIFT)) - 1); + size_or_mask = SIZE_OR_MASK_BITS(phys_addr); size_and_mask = ~size_or_mask & 0xfffff00000ULL; } else if (boot_cpu_data.x86_vendor == X86_VENDOR_CENTAUR && boot_cpu_data.x86 == 6) { @@ -627,7 +629,7 @@ void __init mtrr_bp_init(void) * VIA C* family have Intel style MTRRs, * but don't support PAE */ - size_or_mask = 0xfff00000; /* 32 bits */ + size_or_mask = SIZE_OR_MASK_BITS(32); size_and_mask = 0; phys_addr = 32; } @@ -637,21 +639,21 @@ void __init mtrr_bp_init(void) if (cpu_has_k6_mtrr) { /* Pre-Athlon (K6) AMD CPU MTRRs */ mtrr_if = mtrr_ops[X86_VENDOR_AMD]; - size_or_mask = 0xfff00000; /* 32 bits */ + size_or_mask = SIZE_OR_MASK_BITS(32); size_and_mask = 0; } break; case X86_VENDOR_CENTAUR: if (cpu_has_centaur_mcr) { mtrr_if = mtrr_ops[X86_VENDOR_CENTAUR]; - size_or_mask = 0xfff00000; /* 32 bits */ + size_or_mask = SIZE_OR_MASK_BITS(32); size_and_mask = 0; } break; case X86_VENDOR_CYRIX: if (cpu_has_cyrix_arr) { mtrr_if = mtrr_ops[X86_VENDOR_CYRIX]; - size_or_mask = 0xfff00000; /* 32 bits */ + size_or_mask = SIZE_OR_MASK_BITS(32); size_and_mask = 0; } break; From dc51cd2570468bb7bcf1815a60929023316ca868 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Sun, 4 Aug 2013 16:51:49 +0800 Subject: [PATCH 0375/1368] Linux 3.10.5 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index b4df9b20c49f..f8349d0ca14c 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,6 @@ VERSION = 3 PATCHLEVEL = 10 -SUBLEVEL = 4 +SUBLEVEL = 5 EXTRAVERSION = NAME = Unicycling Gorilla From bde516f7a535b9bafa00e54a06c6538b9eb830e5 Mon Sep 17 00:00:00 2001 From: Russell King Date: Thu, 4 Jul 2013 11:00:23 +0100 Subject: [PATCH 0376/1368] ARM: poison the vectors page commit f928d4f2a86f46b030fa0850385b4391fc2b5918 upstream. Fill the empty regions of the vectors page with an exception generating instruction. This ensures that any inappropriate branch to the vector page is appropriately trapped, rather than just encountering some code to execute. (The vectors page was filled with zero before, which corresponds with the "andeq r0, r0, r0" instruction - a no-op.) Acked-by Nicolas Pitre Signed-off-by: Russell King Signed-off-by: Greg Kroah-Hartman --- arch/arm/kernel/traps.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/arch/arm/kernel/traps.c b/arch/arm/kernel/traps.c index 18b32e8e4497..95bdab493f7b 100644 --- a/arch/arm/kernel/traps.c +++ b/arch/arm/kernel/traps.c @@ -817,9 +817,19 @@ void __init early_trap_init(void *vectors_base) extern char __vectors_start[], __vectors_end[]; extern char __kuser_helper_start[], __kuser_helper_end[]; int kuser_sz = __kuser_helper_end - __kuser_helper_start; + unsigned i; vectors_page = vectors_base; + /* + * Poison the vectors page with an undefined instruction. This + * instruction is chosen to be undefined for both ARM and Thumb + * ISAs. The Thumb version is an undefined instruction with a + * branch back to the undefined instruction. + */ + for (i = 0; i < PAGE_SIZE / sizeof(u32); i++) + ((u32 *)vectors_base)[i] = 0xe7fddef1; + /* * Copy the vectors, stubs and kuser helpers (in entry-armv.S) * into the vector page, mapped at 0xffff0000, and ensure these From ddc8d5e58a818e543ac658a572b3155c8ea3e45d Mon Sep 17 00:00:00 2001 From: Russell King Date: Thu, 4 Jul 2013 11:32:04 +0100 Subject: [PATCH 0377/1368] ARM: poison memory between kuser helpers commit 5b43e7a383d69381ffe53423e46dd0fafae07da3 upstream. Poison the memory between each kuser helper. This ensures that any branch between the kuser helpers will be appropriately trapped. Acked-by: Nicolas Pitre Signed-off-by: Russell King Signed-off-by: Greg Kroah-Hartman --- arch/arm/kernel/entry-armv.S | 25 ++++++++++++++++--------- 1 file changed, 16 insertions(+), 9 deletions(-) diff --git a/arch/arm/kernel/entry-armv.S b/arch/arm/kernel/entry-armv.S index 582b405befc5..f864f7e3a673 100644 --- a/arch/arm/kernel/entry-armv.S +++ b/arch/arm/kernel/entry-armv.S @@ -741,6 +741,17 @@ ENDPROC(__switch_to) #endif .endm + .macro kuser_pad, sym, size + .if (. - \sym) & 3 + .rept 4 - (. - \sym) & 3 + .byte 0 + .endr + .endif + .rept (\size - (. - \sym)) / 4 + .word 0xe7fddef1 + .endr + .endm + .align 5 .globl __kuser_helper_start __kuser_helper_start: @@ -831,18 +842,13 @@ kuser_cmpxchg64_fixup: #error "incoherent kernel configuration" #endif - /* pad to next slot */ - .rept (16 - (. - __kuser_cmpxchg64)/4) - .word 0 - .endr - - .align 5 + kuser_pad __kuser_cmpxchg64, 64 __kuser_memory_barrier: @ 0xffff0fa0 smp_dmb arm usr_ret lr - .align 5 + kuser_pad __kuser_memory_barrier, 32 __kuser_cmpxchg: @ 0xffff0fc0 @@ -915,13 +921,14 @@ kuser_cmpxchg32_fixup: #endif - .align 5 + kuser_pad __kuser_cmpxchg, 32 __kuser_get_tls: @ 0xffff0fe0 ldr r0, [pc, #(16 - 8)] @ read TLS, set in kuser_get_tls_init usr_ret lr mrc p15, 0, r0, c13, c0, 3 @ 0xffff0fe8 hardware TLS code - .rep 4 + kuser_pad __kuser_get_tls, 16 + .rep 3 .word 0 @ 0xffff0ff0 software TLS value, then .endr @ pad up to __kuser_helper_version From b85796fa1987e014a61c379e3cb753976e993a46 Mon Sep 17 00:00:00 2001 From: Russell King Date: Thu, 4 Jul 2013 11:40:32 +0100 Subject: [PATCH 0378/1368] ARM: move vector stubs commit 19accfd373847ac3d10623c5d20f948846299741 upstream. Move the machine vector stubs into the page above the vector page, which we can prevent from being visible to userspace. Also move the reset stub, and place the swi vector at a location that the 'ldr' can get to it. This hides pointers into the kernel which could give valuable information to attackers, and reduces the number of exploitable instructions at a fixed address. Acked-by: Nicolas Pitre Signed-off-by: Russell King Signed-off-by: Greg Kroah-Hartman --- arch/arm/Kconfig | 3 ++- arch/arm/kernel/entry-armv.S | 50 +++++++++++++++++------------------- arch/arm/kernel/traps.c | 4 +-- arch/arm/mm/mmu.c | 10 +++++++- 4 files changed, 37 insertions(+), 30 deletions(-) diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index 136f263ed47b..d2e1f71cc197 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -213,7 +213,8 @@ config VECTORS_BASE default DRAM_BASE if REMAP_VECTORS_TO_RAM default 0x00000000 help - The base address of exception vectors. + The base address of exception vectors. This must be two pages + in size. config ARM_PATCH_PHYS_VIRT bool "Patch physical to virtual translations at runtime" if EMBEDDED diff --git a/arch/arm/kernel/entry-armv.S b/arch/arm/kernel/entry-armv.S index f864f7e3a673..bc2180d3f6ec 100644 --- a/arch/arm/kernel/entry-armv.S +++ b/arch/arm/kernel/entry-armv.S @@ -943,9 +943,9 @@ __kuser_helper_end: /* * Vector stubs. * - * This code is copied to 0xffff0200 so we can use branches in the - * vectors, rather than ldr's. Note that this code must not - * exceed 0x300 bytes. + * This code is copied to 0xffff1000 so we can use branches in the + * vectors, rather than ldr's. Note that this code must not exceed + * a page size. * * Common stub entry macro: * Enter in IRQ mode, spsr = SVC/USR CPSR, lr = SVC/USR PC @@ -994,6 +994,15 @@ ENDPROC(vector_\name) .globl __stubs_start __stubs_start: + @ This must be the first word + .word vector_swi + +vector_rst: + ARM( swi SYS_ERROR0 ) + THUMB( svc #0 ) + THUMB( nop ) + b vector_und + /* * Interrupt dispatcher */ @@ -1087,6 +1096,16 @@ __stubs_start: .align 5 +/*============================================================================= + * Address exception handler + *----------------------------------------------------------------------------- + * These aren't too critical. + * (they're not supposed to happen, and won't happen in 32-bit data mode). + */ + +vector_addrexcptn: + b vector_addrexcptn + /*============================================================================= * Undefined FIQs *----------------------------------------------------------------------------- @@ -1100,35 +1119,14 @@ __stubs_start: vector_fiq: subs pc, lr, #4 -/*============================================================================= - * Address exception handler - *----------------------------------------------------------------------------- - * These aren't too critical. - * (they're not supposed to happen, and won't happen in 32-bit data mode). - */ - -vector_addrexcptn: - b vector_addrexcptn - -/* - * We group all the following data together to optimise - * for CPUs with separate I & D caches. - */ - .align 5 - -.LCvswi: - .word vector_swi - .globl __stubs_end __stubs_end: - .equ stubs_offset, __vectors_start + 0x200 - __stubs_start + .equ stubs_offset, __vectors_start + 0x1000 - __stubs_start .globl __vectors_start __vectors_start: - ARM( swi SYS_ERROR0 ) - THUMB( svc #0 ) - THUMB( nop ) + W(b) vector_rst + stubs_offset W(b) vector_und + stubs_offset W(ldr) pc, .LCvswi + stubs_offset W(b) vector_pabt + stubs_offset diff --git a/arch/arm/kernel/traps.c b/arch/arm/kernel/traps.c index 95bdab493f7b..a58f64e4c714 100644 --- a/arch/arm/kernel/traps.c +++ b/arch/arm/kernel/traps.c @@ -836,7 +836,7 @@ void __init early_trap_init(void *vectors_base) * are visible to the instruction stream. */ memcpy((void *)vectors, __vectors_start, __vectors_end - __vectors_start); - memcpy((void *)vectors + 0x200, __stubs_start, __stubs_end - __stubs_start); + memcpy((void *)vectors + 0x1000, __stubs_start, __stubs_end - __stubs_start); memcpy((void *)vectors + 0x1000 - kuser_sz, __kuser_helper_start, kuser_sz); /* @@ -851,6 +851,6 @@ void __init early_trap_init(void *vectors_base) memcpy((void *)(vectors + KERN_SIGRETURN_CODE - CONFIG_VECTORS_BASE), sigreturn_codes, sizeof(sigreturn_codes)); - flush_icache_range(vectors, vectors + PAGE_SIZE); + flush_icache_range(vectors, vectors + PAGE_SIZE * 2); modify_domain(DOMAIN_USER, DOMAIN_CLIENT); } diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c index 4d409e6a552d..08b37c421a5e 100644 --- a/arch/arm/mm/mmu.c +++ b/arch/arm/mm/mmu.c @@ -1175,7 +1175,7 @@ static void __init devicemaps_init(struct machine_desc *mdesc) /* * Allocate the vector page early. */ - vectors = early_alloc(PAGE_SIZE); + vectors = early_alloc(PAGE_SIZE * 2); early_trap_init(vectors); @@ -1225,10 +1225,18 @@ static void __init devicemaps_init(struct machine_desc *mdesc) if (!vectors_high()) { map.virtual = 0; + map.length = PAGE_SIZE * 2; map.type = MT_LOW_VECTORS; create_mapping(&map); } + /* Now create a kernel read-only mapping */ + map.pfn += 1; + map.virtual = 0xffff0000 + PAGE_SIZE; + map.length = PAGE_SIZE; + map.type = MT_LOW_VECTORS; + create_mapping(&map); + /* * Ask the machine support to map in the statically mapped devices. */ From 0477cd427e4862843b6cfb460b83ca09f265742d Mon Sep 17 00:00:00 2001 From: Russell King Date: Thu, 4 Jul 2013 12:03:31 +0100 Subject: [PATCH 0379/1368] ARM: use linker magic for vectors and vector stubs commit b9b32bf70f2fb710b07c94e13afbc729afe221da upstream. Use linker magic to create the vectors and vector stubs: we can tell the linker to place them at an appropriate VMA, but keep the LMA within the kernel. This gets rid of some unnecessary symbol manipulation, and have the linker calculate the relocations appropriately. Acked-by: Nicolas Pitre Signed-off-by: Russell King Signed-off-by: Greg Kroah-Hartman --- arch/arm/kernel/entry-armv.S | 28 ++++++++++------------------ arch/arm/kernel/vmlinux.lds.S | 17 +++++++++++++++++ 2 files changed, 27 insertions(+), 18 deletions(-) diff --git a/arch/arm/kernel/entry-armv.S b/arch/arm/kernel/entry-armv.S index bc2180d3f6ec..12ac1e336645 100644 --- a/arch/arm/kernel/entry-armv.S +++ b/arch/arm/kernel/entry-armv.S @@ -992,7 +992,7 @@ ENDPROC(vector_\name) 1: .endm - .globl __stubs_start + .section .stubs, "ax", %progbits __stubs_start: @ This must be the first word .word vector_swi @@ -1119,24 +1119,16 @@ vector_addrexcptn: vector_fiq: subs pc, lr, #4 - .globl __stubs_end -__stubs_end: - - .equ stubs_offset, __vectors_start + 0x1000 - __stubs_start - - .globl __vectors_start + .section .vectors, "ax", %progbits __vectors_start: - W(b) vector_rst + stubs_offset - W(b) vector_und + stubs_offset - W(ldr) pc, .LCvswi + stubs_offset - W(b) vector_pabt + stubs_offset - W(b) vector_dabt + stubs_offset - W(b) vector_addrexcptn + stubs_offset - W(b) vector_irq + stubs_offset - W(b) vector_fiq + stubs_offset - - .globl __vectors_end -__vectors_end: + W(b) vector_rst + W(b) vector_und + W(ldr) pc, __vectors_start + 0x1000 + W(b) vector_pabt + W(b) vector_dabt + W(b) vector_addrexcptn + W(b) vector_irq + W(b) vector_fiq .data diff --git a/arch/arm/kernel/vmlinux.lds.S b/arch/arm/kernel/vmlinux.lds.S index a871b8e00fca..33f2ea32f5a0 100644 --- a/arch/arm/kernel/vmlinux.lds.S +++ b/arch/arm/kernel/vmlinux.lds.S @@ -152,6 +152,23 @@ SECTIONS . = ALIGN(PAGE_SIZE); __init_begin = .; #endif + /* + * The vectors and stubs are relocatable code, and the + * only thing that matters is their relative offsets + */ + __vectors_start = .; + .vectors 0 : AT(__vectors_start) { + *(.vectors) + } + . = __vectors_start + SIZEOF(.vectors); + __vectors_end = .; + + __stubs_start = .; + .stubs 0x1000 : AT(__stubs_start) { + *(.stubs) + } + . = __stubs_start + SIZEOF(.stubs); + __stubs_end = .; INIT_TEXT_SECTION(8) .exit.text : { From 6904e468bb92a726098a2dfcf792463e11053582 Mon Sep 17 00:00:00 2001 From: Russell King Date: Tue, 9 Jul 2013 01:03:17 +0100 Subject: [PATCH 0380/1368] ARM: update FIQ support for relocation of vectors commit e39e3f3ebfef03450cf7bfa7a974a8c61f7980c8 upstream. FIQ should no longer copy the FIQ code into the user visible vector page. Instead, it should use the hidden page. This change makes that happen. Acked-by: Nicolas Pitre Signed-off-by: Russell King Signed-off-by: Greg Kroah-Hartman --- arch/arm/kernel/entry-armv.S | 3 +++ arch/arm/kernel/fiq.c | 19 ++++++++++++++----- 2 files changed, 17 insertions(+), 5 deletions(-) diff --git a/arch/arm/kernel/entry-armv.S b/arch/arm/kernel/entry-armv.S index 12ac1e336645..4d938421bb9a 100644 --- a/arch/arm/kernel/entry-armv.S +++ b/arch/arm/kernel/entry-armv.S @@ -1119,6 +1119,9 @@ vector_addrexcptn: vector_fiq: subs pc, lr, #4 + .globl vector_fiq_offset + .equ vector_fiq_offset, vector_fiq + .section .vectors, "ax", %progbits __vectors_start: W(b) vector_rst diff --git a/arch/arm/kernel/fiq.c b/arch/arm/kernel/fiq.c index 2adda11f712f..25442f451148 100644 --- a/arch/arm/kernel/fiq.c +++ b/arch/arm/kernel/fiq.c @@ -47,6 +47,11 @@ #include #include +#define FIQ_OFFSET ({ \ + extern void *vector_fiq_offset; \ + (unsigned)&vector_fiq_offset; \ + }) + static unsigned long no_fiq_insn; /* Default reacquire function @@ -80,13 +85,16 @@ int show_fiq_list(struct seq_file *p, int prec) void set_fiq_handler(void *start, unsigned int length) { #if defined(CONFIG_CPU_USE_DOMAINS) - memcpy((void *)0xffff001c, start, length); + void *base = (void *)0xffff0000; #else - memcpy(vectors_page + 0x1c, start, length); + void *base = vectors_page; #endif - flush_icache_range(0xffff001c, 0xffff001c + length); + unsigned offset = FIQ_OFFSET; + + memcpy(base + offset, start, length); + flush_icache_range(0xffff0000 + offset, 0xffff0000 + offset + length); if (!vectors_high()) - flush_icache_range(0x1c, 0x1c + length); + flush_icache_range(offset, offset + length); } int claim_fiq(struct fiq_handler *f) @@ -144,6 +152,7 @@ EXPORT_SYMBOL(disable_fiq); void __init init_FIQ(int start) { - no_fiq_insn = *(unsigned long *)0xffff001c; + unsigned offset = FIQ_OFFSET; + no_fiq_insn = *(unsigned long *)(0xffff0000 + offset); fiq_start = start; } From 7c5db81779e0ab75fc2d71397911c546046f922f Mon Sep 17 00:00:00 2001 From: Russell King Date: Tue, 23 Jul 2013 18:37:00 +0100 Subject: [PATCH 0381/1368] ARM: allow kuser helpers to be removed from the vector page commit f6f91b0d9fd971c630cef908dde8fe8795aefbf8 upstream. Provide a kernel configuration option to allow the kernel user helpers to be removed from the vector page, thereby preventing their use with ROP (return orientated programming) attacks. This option is only visible for CPU architectures which natively support all the operations which kernel user helpers would normally provide, and must be enabled with caution. Acked-by: Nicolas Pitre Signed-off-by: Russell King Signed-off-by: Greg Kroah-Hartman --- arch/arm/kernel/entry-armv.S | 3 +++ arch/arm/kernel/traps.c | 23 ++++++++++++++--------- arch/arm/mm/Kconfig | 34 ++++++++++++++++++++++++++++++++++ 3 files changed, 51 insertions(+), 9 deletions(-) diff --git a/arch/arm/kernel/entry-armv.S b/arch/arm/kernel/entry-armv.S index 4d938421bb9a..d43c7e54ec6c 100644 --- a/arch/arm/kernel/entry-armv.S +++ b/arch/arm/kernel/entry-armv.S @@ -752,6 +752,7 @@ ENDPROC(__switch_to) .endr .endm +#ifdef CONFIG_KUSER_HELPERS .align 5 .globl __kuser_helper_start __kuser_helper_start: @@ -938,6 +939,8 @@ __kuser_helper_version: @ 0xffff0ffc .globl __kuser_helper_end __kuser_helper_end: +#endif + THUMB( .thumb ) /* diff --git a/arch/arm/kernel/traps.c b/arch/arm/kernel/traps.c index a58f64e4c714..7751c26f3f0f 100644 --- a/arch/arm/kernel/traps.c +++ b/arch/arm/kernel/traps.c @@ -800,23 +800,32 @@ void __init trap_init(void) return; } -static void __init kuser_get_tls_init(unsigned long vectors) +#ifdef CONFIG_KUSER_HELPERS +static void __init kuser_init(void *vectors) { + extern char __kuser_helper_start[], __kuser_helper_end[]; + int kuser_sz = __kuser_helper_end - __kuser_helper_start; + + memcpy(vectors + 0x1000 - kuser_sz, __kuser_helper_start, kuser_sz); + /* * vectors + 0xfe0 = __kuser_get_tls * vectors + 0xfe8 = hardware TLS instruction at 0xffff0fe8 */ if (tls_emu || has_tls_reg) - memcpy((void *)vectors + 0xfe0, (void *)vectors + 0xfe8, 4); + memcpy(vectors + 0xfe0, vectors + 0xfe8, 4); } +#else +static void __init kuser_init(void *vectors) +{ +} +#endif void __init early_trap_init(void *vectors_base) { unsigned long vectors = (unsigned long)vectors_base; extern char __stubs_start[], __stubs_end[]; extern char __vectors_start[], __vectors_end[]; - extern char __kuser_helper_start[], __kuser_helper_end[]; - int kuser_sz = __kuser_helper_end - __kuser_helper_start; unsigned i; vectors_page = vectors_base; @@ -837,12 +846,8 @@ void __init early_trap_init(void *vectors_base) */ memcpy((void *)vectors, __vectors_start, __vectors_end - __vectors_start); memcpy((void *)vectors + 0x1000, __stubs_start, __stubs_end - __stubs_start); - memcpy((void *)vectors + 0x1000 - kuser_sz, __kuser_helper_start, kuser_sz); - /* - * Do processor specific fixups for the kuser helpers - */ - kuser_get_tls_init(vectors); + kuser_init(vectors_base); /* * Copy signal return handlers into the vector page, and diff --git a/arch/arm/mm/Kconfig b/arch/arm/mm/Kconfig index 35955b54944c..2950082a531c 100644 --- a/arch/arm/mm/Kconfig +++ b/arch/arm/mm/Kconfig @@ -411,24 +411,28 @@ config CPU_32v3 select CPU_USE_DOMAINS if MMU select NEEDS_SYSCALL_FOR_CMPXCHG if SMP select TLS_REG_EMUL if SMP || !MMU + select NEED_KUSER_HELPERS config CPU_32v4 bool select CPU_USE_DOMAINS if MMU select NEEDS_SYSCALL_FOR_CMPXCHG if SMP select TLS_REG_EMUL if SMP || !MMU + select NEED_KUSER_HELPERS config CPU_32v4T bool select CPU_USE_DOMAINS if MMU select NEEDS_SYSCALL_FOR_CMPXCHG if SMP select TLS_REG_EMUL if SMP || !MMU + select NEED_KUSER_HELPERS config CPU_32v5 bool select CPU_USE_DOMAINS if MMU select NEEDS_SYSCALL_FOR_CMPXCHG if SMP select TLS_REG_EMUL if SMP || !MMU + select NEED_KUSER_HELPERS config CPU_32v6 bool @@ -756,6 +760,7 @@ config CPU_BPREDICT_DISABLE config TLS_REG_EMUL bool + select NEED_KUSER_HELPERS help An SMP system using a pre-ARMv6 processor (there are apparently a few prototypes like that in existence) and therefore access to @@ -763,11 +768,40 @@ config TLS_REG_EMUL config NEEDS_SYSCALL_FOR_CMPXCHG bool + select NEED_KUSER_HELPERS help SMP on a pre-ARMv6 processor? Well OK then. Forget about fast user space cmpxchg support. It is just not possible. +config NEED_KUSER_HELPERS + bool + +config KUSER_HELPERS + bool "Enable kuser helpers in vector page" if !NEED_KUSER_HELPERS + default y + help + Warning: disabling this option may break user programs. + + Provide kuser helpers in the vector page. The kernel provides + helper code to userspace in read only form at a fixed location + in the high vector page to allow userspace to be independent of + the CPU type fitted to the system. This permits binaries to be + run on ARMv4 through to ARMv7 without modification. + + However, the fixed address nature of these helpers can be used + by ROP (return orientated programming) authors when creating + exploits. + + If all of the binaries and libraries which run on your platform + are built specifically for your platform, and make no use of + these helpers, then you can turn this option off. However, + when such an binary or library is run, it will receive a SIGILL + signal, which will terminate the program. + + Say N here only if you are absolutely certain that you do not + need these helpers; otherwise, the safe option is to say Y. + config DMA_CACHE_RWFO bool "Enable read/write for ownership DMA cache maintenance" depends on CPU_V6K && SMP From a5510daad56d3b9738d475957750db9d4fc607a9 Mon Sep 17 00:00:00 2001 From: Russell King Date: Wed, 24 Jul 2013 00:29:18 +0100 Subject: [PATCH 0382/1368] ARM: move signal handlers into a vdso-like page commit 48be69a026b2c17350a5ef18a1959a919f60be7d upstream. Move the signal handlers into a VDSO page rather than keeping them in the vectors page. This allows us to place them randomly within this page, and also map the page at a random location within userspace further protecting these code fragments from ROP attacks. The new VDSO page is also poisoned in the same way as the vector page. Signed-off-by: Russell King Signed-off-by: Greg Kroah-Hartman --- arch/arm/include/asm/elf.h | 4 +++ arch/arm/include/asm/mmu.h | 1 + arch/arm/kernel/process.c | 40 +++++++++++++++++++++++++++--- arch/arm/kernel/signal.c | 50 +++++++++++++++++++++++++++++++++----- arch/arm/kernel/signal.h | 12 --------- arch/arm/kernel/traps.c | 9 ------- 6 files changed, 86 insertions(+), 30 deletions(-) delete mode 100644 arch/arm/kernel/signal.h diff --git a/arch/arm/include/asm/elf.h b/arch/arm/include/asm/elf.h index 38050b1c4800..9c9b30717fda 100644 --- a/arch/arm/include/asm/elf.h +++ b/arch/arm/include/asm/elf.h @@ -130,4 +130,8 @@ struct mm_struct; extern unsigned long arch_randomize_brk(struct mm_struct *mm); #define arch_randomize_brk arch_randomize_brk +#define ARCH_HAS_SETUP_ADDITIONAL_PAGES 1 +struct linux_binprm; +int arch_setup_additional_pages(struct linux_binprm *, int); + #endif diff --git a/arch/arm/include/asm/mmu.h b/arch/arm/include/asm/mmu.h index e3d55547e755..7345e37155d5 100644 --- a/arch/arm/include/asm/mmu.h +++ b/arch/arm/include/asm/mmu.h @@ -8,6 +8,7 @@ typedef struct { atomic64_t id; #endif unsigned int vmalloc_seq; + unsigned long sigpage; } mm_context_t; #ifdef CONFIG_CPU_HAS_ASID diff --git a/arch/arm/kernel/process.c b/arch/arm/kernel/process.c index 6e8931ccf13e..72315e7be22b 100644 --- a/arch/arm/kernel/process.c +++ b/arch/arm/kernel/process.c @@ -435,8 +435,8 @@ unsigned long arch_randomize_brk(struct mm_struct *mm) #ifdef CONFIG_MMU /* * The vectors page is always readable from user space for the - * atomic helpers and the signal restart code. Insert it into the - * gate_vma so that it is visible through ptrace and /proc//mem. + * atomic helpers. Insert it into the gate_vma so that it is visible + * through ptrace and /proc//mem. */ static struct vm_area_struct gate_vma = { .vm_start = 0xffff0000, @@ -468,6 +468,40 @@ int in_gate_area_no_mm(unsigned long addr) const char *arch_vma_name(struct vm_area_struct *vma) { - return (vma == &gate_vma) ? "[vectors]" : NULL; + return (vma == &gate_vma) ? "[vectors]" : + (vma->vm_mm && vma->vm_start == vma->vm_mm->context.sigpage) ? + "[sigpage]" : NULL; +} + +extern struct page *get_signal_page(void); + +int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) +{ + struct mm_struct *mm = current->mm; + struct page *page; + unsigned long addr; + int ret; + + page = get_signal_page(); + if (!page) + return -ENOMEM; + + down_write(&mm->mmap_sem); + addr = get_unmapped_area(NULL, 0, PAGE_SIZE, 0, 0); + if (IS_ERR_VALUE(addr)) { + ret = addr; + goto up_fail; + } + + ret = install_special_mapping(mm, addr, PAGE_SIZE, + VM_READ | VM_EXEC | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC, + &page); + + if (ret == 0) + mm->context.sigpage = addr; + + up_fail: + up_write(&mm->mmap_sem); + return ret; } #endif diff --git a/arch/arm/kernel/signal.c b/arch/arm/kernel/signal.c index 296786bdbb73..8515dba9534d 100644 --- a/arch/arm/kernel/signal.c +++ b/arch/arm/kernel/signal.c @@ -8,6 +8,7 @@ * published by the Free Software Foundation. */ #include +#include #include #include #include @@ -15,12 +16,11 @@ #include #include +#include #include #include #include -#include "signal.h" - /* * For ARM syscalls, we encode the syscall number into the instruction. */ @@ -40,11 +40,13 @@ #define SWI_THUMB_SIGRETURN (0xdf00 << 16 | 0x2700 | (__NR_sigreturn - __NR_SYSCALL_BASE)) #define SWI_THUMB_RT_SIGRETURN (0xdf00 << 16 | 0x2700 | (__NR_rt_sigreturn - __NR_SYSCALL_BASE)) -const unsigned long sigreturn_codes[7] = { +static const unsigned long sigreturn_codes[7] = { MOV_R7_NR_SIGRETURN, SWI_SYS_SIGRETURN, SWI_THUMB_SIGRETURN, MOV_R7_NR_RT_SIGRETURN, SWI_SYS_RT_SIGRETURN, SWI_THUMB_RT_SIGRETURN, }; +static unsigned long signal_return_offset; + #ifdef CONFIG_CRUNCH static int preserve_crunch_context(struct crunch_sigframe __user *frame) { @@ -397,11 +399,14 @@ setup_return(struct pt_regs *regs, struct ksignal *ksig, return 1; if (cpsr & MODE32_BIT) { + struct mm_struct *mm = current->mm; /* - * 32-bit code can use the new high-page - * signal return code support. + * 32-bit code can use the signal return page + * except when the MPU has protected the vectors + * page from PL0 */ - retcode = KERN_SIGRETURN_CODE + (idx << 2) + thumb; + retcode = mm->context.sigpage + signal_return_offset + + (idx << 2) + thumb; } else { /* * Ensure that the instruction cache sees @@ -603,3 +608,36 @@ do_work_pending(struct pt_regs *regs, unsigned int thread_flags, int syscall) } while (thread_flags & _TIF_WORK_MASK); return 0; } + +static struct page *signal_page; + +struct page *get_signal_page(void) +{ + if (!signal_page) { + unsigned long ptr; + unsigned offset; + void *addr; + + signal_page = alloc_pages(GFP_KERNEL, 0); + + if (!signal_page) + return NULL; + + addr = page_address(signal_page); + + /* Give the signal return code some randomness */ + offset = 0x200 + (get_random_int() & 0x7fc); + signal_return_offset = offset; + + /* + * Copy signal return handlers into the vector page, and + * set sigreturn to be a pointer to these. + */ + memcpy(addr + offset, sigreturn_codes, sizeof(sigreturn_codes)); + + ptr = (unsigned long)addr + offset; + flush_icache_range(ptr, ptr + sizeof(sigreturn_codes)); + } + + return signal_page; +} diff --git a/arch/arm/kernel/signal.h b/arch/arm/kernel/signal.h deleted file mode 100644 index 5ff067b7c752..000000000000 --- a/arch/arm/kernel/signal.h +++ /dev/null @@ -1,12 +0,0 @@ -/* - * linux/arch/arm/kernel/signal.h - * - * Copyright (C) 2005-2009 Russell King. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ -#define KERN_SIGRETURN_CODE (CONFIG_VECTORS_BASE + 0x00000500) - -extern const unsigned long sigreturn_codes[7]; diff --git a/arch/arm/kernel/traps.c b/arch/arm/kernel/traps.c index 7751c26f3f0f..6b9567e19bdc 100644 --- a/arch/arm/kernel/traps.c +++ b/arch/arm/kernel/traps.c @@ -35,8 +35,6 @@ #include #include -#include "signal.h" - static const char *handler[]= { "prefetch abort", "data abort", "address exception", "interrupt" }; void *vectors_page; @@ -849,13 +847,6 @@ void __init early_trap_init(void *vectors_base) kuser_init(vectors_base); - /* - * Copy signal return handlers into the vector page, and - * set sigreturn to be a pointer to these. - */ - memcpy((void *)(vectors + KERN_SIGRETURN_CODE - CONFIG_VECTORS_BASE), - sigreturn_codes, sizeof(sigreturn_codes)); - flush_icache_range(vectors, vectors + PAGE_SIZE * 2); modify_domain(DOMAIN_USER, DOMAIN_CLIENT); } From 75bc4446e0d553aceeb632ae05878786d6760e47 Mon Sep 17 00:00:00 2001 From: Russell King Date: Wed, 31 Jul 2013 21:58:56 +0100 Subject: [PATCH 0383/1368] ARM: make vectors page inaccessible from userspace commit a5463cd3435475386cbbe7b06e01292ac169d36f upstream. If kuser helpers are not provided by the kernel, disable user access to the vectors page. With the kuser helpers gone, there is no reason for this page to be visible to userspace. Signed-off-by: Russell King Signed-off-by: Greg Kroah-Hartman --- arch/arm/include/asm/page.h | 2 ++ arch/arm/kernel/process.c | 7 ++++++- arch/arm/mm/mmu.c | 4 ++++ 3 files changed, 12 insertions(+), 1 deletion(-) diff --git a/arch/arm/include/asm/page.h b/arch/arm/include/asm/page.h index 812a4944e783..cbdc7a21f869 100644 --- a/arch/arm/include/asm/page.h +++ b/arch/arm/include/asm/page.h @@ -142,7 +142,9 @@ extern void __cpu_copy_user_highpage(struct page *to, struct page *from, #define clear_page(page) memset((void *)(page), 0, PAGE_SIZE) extern void copy_page(void *to, const void *from); +#ifdef CONFIG_KUSER_HELPERS #define __HAVE_ARCH_GATE_AREA 1 +#endif #ifdef CONFIG_ARM_LPAE #include diff --git a/arch/arm/kernel/process.c b/arch/arm/kernel/process.c index 72315e7be22b..1ed29cc22979 100644 --- a/arch/arm/kernel/process.c +++ b/arch/arm/kernel/process.c @@ -433,6 +433,7 @@ unsigned long arch_randomize_brk(struct mm_struct *mm) } #ifdef CONFIG_MMU +#ifdef CONFIG_KUSER_HELPERS /* * The vectors page is always readable from user space for the * atomic helpers. Insert it into the gate_vma so that it is visible @@ -465,10 +466,14 @@ int in_gate_area_no_mm(unsigned long addr) { return in_gate_area(NULL, addr); } +#define is_gate_vma(vma) ((vma) = &gate_vma) +#else +#define is_gate_vma(vma) 0 +#endif const char *arch_vma_name(struct vm_area_struct *vma) { - return (vma == &gate_vma) ? "[vectors]" : + return is_gate_vma(vma) ? "[vectors]" : (vma->vm_mm && vma->vm_start == vma->vm_mm->context.sigpage) ? "[sigpage]" : NULL; } diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c index 08b37c421a5e..daf336fe0be8 100644 --- a/arch/arm/mm/mmu.c +++ b/arch/arm/mm/mmu.c @@ -1220,7 +1220,11 @@ static void __init devicemaps_init(struct machine_desc *mdesc) map.pfn = __phys_to_pfn(virt_to_phys(vectors)); map.virtual = 0xffff0000; map.length = PAGE_SIZE; +#ifdef CONFIG_KUSER_HELPERS map.type = MT_HIGH_VECTORS; +#else + map.type = MT_LOW_VECTORS; +#endif create_mapping(&map); if (!vectors_high()) { From 17ef32956a8481fba02751e14c17c648c9f472e6 Mon Sep 17 00:00:00 2001 From: Russell King Date: Sat, 3 Aug 2013 10:30:05 +0100 Subject: [PATCH 0384/1368] ARM: fix a cockup in 48be69a02 (ARM: move signal handlers into a vdso-like page) commit e0d407564b532d978b03ceccebd224a05d02f111 upstream. Unfortunately, I never committed the fix to a nasty oops which can occur as a result of that commit: ------------[ cut here ]------------ kernel BUG at /home/olof/work/batch/include/linux/mm.h:414! Internal error: Oops - BUG: 0 [#1] PREEMPT SMP ARM Modules linked in: CPU: 0 PID: 490 Comm: killall5 Not tainted 3.11.0-rc3-00288-gabe0308 #53 task: e90acac0 ti: e9be8000 task.ti: e9be8000 PC is at special_mapping_fault+0xa4/0xc4 LR is at __do_fault+0x68/0x48c This doesn't show up unless you do quite a bit of testing; a simple boot test does not do this, so all my nightly tests were passing fine. The reason for this is that install_special_mapping() expects the page array to stick around, and as this was only inserting one page which was stored on the kernel stack, that's why this was blowing up. Reported-by: Olof Johansson Tested-by: Olof Johansson Signed-off-by: Russell King Signed-off-by: Greg Kroah-Hartman --- arch/arm/kernel/process.c | 9 +++++---- arch/arm/kernel/signal.c | 41 ++++++++++++++++++--------------------- 2 files changed, 24 insertions(+), 26 deletions(-) diff --git a/arch/arm/kernel/process.c b/arch/arm/kernel/process.c index 1ed29cc22979..5bc2615268d4 100644 --- a/arch/arm/kernel/process.c +++ b/arch/arm/kernel/process.c @@ -478,17 +478,18 @@ const char *arch_vma_name(struct vm_area_struct *vma) "[sigpage]" : NULL; } +static struct page *signal_page; extern struct page *get_signal_page(void); int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) { struct mm_struct *mm = current->mm; - struct page *page; unsigned long addr; int ret; - page = get_signal_page(); - if (!page) + if (!signal_page) + signal_page = get_signal_page(); + if (!signal_page) return -ENOMEM; down_write(&mm->mmap_sem); @@ -500,7 +501,7 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) ret = install_special_mapping(mm, addr, PAGE_SIZE, VM_READ | VM_EXEC | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC, - &page); + &signal_page); if (ret == 0) mm->context.sigpage = addr; diff --git a/arch/arm/kernel/signal.c b/arch/arm/kernel/signal.c index 8515dba9534d..8fedf05b06dc 100644 --- a/arch/arm/kernel/signal.c +++ b/arch/arm/kernel/signal.c @@ -609,35 +609,32 @@ do_work_pending(struct pt_regs *regs, unsigned int thread_flags, int syscall) return 0; } -static struct page *signal_page; - struct page *get_signal_page(void) { - if (!signal_page) { - unsigned long ptr; - unsigned offset; - void *addr; + unsigned long ptr; + unsigned offset; + struct page *page; + void *addr; - signal_page = alloc_pages(GFP_KERNEL, 0); + page = alloc_pages(GFP_KERNEL, 0); - if (!signal_page) - return NULL; + if (!page) + return NULL; - addr = page_address(signal_page); + addr = page_address(page); - /* Give the signal return code some randomness */ - offset = 0x200 + (get_random_int() & 0x7fc); - signal_return_offset = offset; + /* Give the signal return code some randomness */ + offset = 0x200 + (get_random_int() & 0x7fc); + signal_return_offset = offset; - /* - * Copy signal return handlers into the vector page, and - * set sigreturn to be a pointer to these. - */ - memcpy(addr + offset, sigreturn_codes, sizeof(sigreturn_codes)); + /* + * Copy signal return handlers into the vector page, and + * set sigreturn to be a pointer to these. + */ + memcpy(addr + offset, sigreturn_codes, sizeof(sigreturn_codes)); - ptr = (unsigned long)addr + offset; - flush_icache_range(ptr, ptr + sizeof(sigreturn_codes)); - } + ptr = (unsigned long)addr + offset; + flush_icache_range(ptr, ptr + sizeof(sigreturn_codes)); - return signal_page; + return page; } From 0a73f943a52c70cc33b29306433fb11cd9a854fc Mon Sep 17 00:00:00 2001 From: Russell King Date: Sat, 3 Aug 2013 10:39:51 +0100 Subject: [PATCH 0385/1368] ARM: fix nommu builds with 48be69a02 (ARM: move signal handlers into a vdso-like page) commit 8c0cc8a5d90bc7373a7a9e7f7a40eb41f51e03fc upstream. Olof reports that noMMU builds error out with: arch/arm/kernel/signal.c: In function 'setup_return': arch/arm/kernel/signal.c:413:25: error: 'mm_context_t' has no member named 'sigpage' This shows one of the evilnesses of IS_ENABLED(). Get rid of it here and replace it with #ifdef's - and as no noMMU platform can make use of sigpage, depend on CONIFG_MMU not CONFIG_ARM_MPU. Reported-by: Olof Johansson Signed-off-by: Russell King Signed-off-by: Greg Kroah-Hartman --- arch/arm/include/asm/elf.h | 2 ++ arch/arm/kernel/signal.c | 5 ++++- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/arch/arm/include/asm/elf.h b/arch/arm/include/asm/elf.h index 9c9b30717fda..56211f2084ef 100644 --- a/arch/arm/include/asm/elf.h +++ b/arch/arm/include/asm/elf.h @@ -130,8 +130,10 @@ struct mm_struct; extern unsigned long arch_randomize_brk(struct mm_struct *mm); #define arch_randomize_brk arch_randomize_brk +#ifdef CONFIG_MMU #define ARCH_HAS_SETUP_ADDITIONAL_PAGES 1 struct linux_binprm; int arch_setup_additional_pages(struct linux_binprm *, int); +#endif #endif diff --git a/arch/arm/kernel/signal.c b/arch/arm/kernel/signal.c index 8fedf05b06dc..5a42c12767af 100644 --- a/arch/arm/kernel/signal.c +++ b/arch/arm/kernel/signal.c @@ -398,6 +398,7 @@ setup_return(struct pt_regs *regs, struct ksignal *ksig, __put_user(sigreturn_codes[idx+1], rc+1)) return 1; +#ifdef CONFIG_MMU if (cpsr & MODE32_BIT) { struct mm_struct *mm = current->mm; /* @@ -407,7 +408,9 @@ setup_return(struct pt_regs *regs, struct ksignal *ksig, */ retcode = mm->context.sigpage + signal_return_offset + (idx << 2) + thumb; - } else { + } else +#endif + { /* * Ensure that the instruction cache sees * the return code written onto the stack. From 3d67947d3a8ecd73a811e56613cc33d0a2126584 Mon Sep 17 00:00:00 2001 From: Aaro Koskinen Date: Sun, 21 Jul 2013 03:30:11 +0300 Subject: [PATCH 0386/1368] powerpc/windfarm: Fix noisy slots-fan on Xserve (rm31) commit fe956a1d4081ce1a959f87df397a15e252201f10 upstream. slots-fan on G5 Xserve is always running at full speed with windfarm_rm31 driver, resulting in a very high acoustic noise level. It seems the fan parameters are incorrect, and have been copied from the Drive Bay fan (RPM, not present on rm31) of the legacy therm_pm72 driver. This patch changes the parameters to match the Slots fan (PWM) of therm_pm72. With the patch, slots-fan speed drops from 99% to 19% during normal use, and slots-temp settle to ~42'C. Signed-off-by: Aaro Koskinen Signed-off-by: Benjamin Herrenschmidt Signed-off-by: Greg Kroah-Hartman --- drivers/macintosh/windfarm_rm31.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/drivers/macintosh/windfarm_rm31.c b/drivers/macintosh/windfarm_rm31.c index 0b9a79b2f48a..82fc86a90c1a 100644 --- a/drivers/macintosh/windfarm_rm31.c +++ b/drivers/macintosh/windfarm_rm31.c @@ -439,15 +439,15 @@ static void backside_setup_pid(void) /* Slots fan */ static const struct wf_pid_param slots_param = { - .interval = 5, - .history_len = 2, - .gd = 30 << 20, - .gp = 5 << 20, - .gr = 0, - .itarget = 40 << 16, - .additive = 1, - .min = 300, - .max = 4000, + .interval = 1, + .history_len = 20, + .gd = 0, + .gp = 0, + .gr = 0x00100000, + .itarget = 3200000, + .additive = 0, + .min = 20, + .max = 100, }; static void slots_fan_tick(void) From 340631138430a7ca215ca76ae74ef70f0be7ac20 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Mon, 15 Jul 2013 14:26:19 +0100 Subject: [PATCH 0387/1368] ARM: 7784/1: mm: ensure SMP alternates assemble to exactly 4 bytes with Thumb-2 commit bf3f0f332f76a85ff3a0b393aaded5a8533769c0 upstream. Commit ae8a8b9553bd ("ARM: 7691/1: mm: kill unused TLB_CAN_READ_FROM_L1_CACHE and use ALT_SMP instead") added early function returns for page table cache flushing operations on ARMv7 SMP CPUs. Unfortunately, when targetting Thumb-2, these `mov pc, lr' sequences assemble to 2 bytes which can lead to corruption of the instruction stream after code patching. This patch fixes the alternates to use wide (32-bit) instructions for Thumb-2, therefore ensuring that the patching code works correctly. Signed-off-by: Will Deacon Signed-off-by: Russell King Signed-off-by: Greg Kroah-Hartman --- arch/arm/mm/proc-v7-2level.S | 2 +- arch/arm/mm/proc-v7-3level.S | 2 +- arch/arm/mm/proc-v7.S | 11 ++++++----- 3 files changed, 8 insertions(+), 7 deletions(-) diff --git a/arch/arm/mm/proc-v7-2level.S b/arch/arm/mm/proc-v7-2level.S index 9704097c450e..b3997c70af32 100644 --- a/arch/arm/mm/proc-v7-2level.S +++ b/arch/arm/mm/proc-v7-2level.S @@ -110,7 +110,7 @@ ENTRY(cpu_v7_set_pte_ext) ARM( str r3, [r0, #2048]! ) THUMB( add r0, r0, #2048 ) THUMB( str r3, [r0] ) - ALT_SMP(mov pc,lr) + ALT_SMP(W(nop)) ALT_UP (mcr p15, 0, r0, c7, c10, 1) @ flush_pte #endif mov pc, lr diff --git a/arch/arm/mm/proc-v7-3level.S b/arch/arm/mm/proc-v7-3level.S index 363027e811d6..6ba4bd9118f2 100644 --- a/arch/arm/mm/proc-v7-3level.S +++ b/arch/arm/mm/proc-v7-3level.S @@ -73,7 +73,7 @@ ENTRY(cpu_v7_set_pte_ext) tst r3, #1 << (55 - 32) @ L_PTE_DIRTY orreq r2, #L_PTE_RDONLY 1: strd r2, r3, [r0] - ALT_SMP(mov pc, lr) + ALT_SMP(W(nop)) ALT_UP (mcr p15, 0, r0, c7, c10, 1) @ flush_pte #endif mov pc, lr diff --git a/arch/arm/mm/proc-v7.S b/arch/arm/mm/proc-v7.S index e35fec34453e..5fbccee5f644 100644 --- a/arch/arm/mm/proc-v7.S +++ b/arch/arm/mm/proc-v7.S @@ -75,13 +75,14 @@ ENTRY(cpu_v7_do_idle) ENDPROC(cpu_v7_do_idle) ENTRY(cpu_v7_dcache_clean_area) - ALT_SMP(mov pc, lr) @ MP extensions imply L1 PTW - ALT_UP(W(nop)) - dcache_line_size r2, r3 -1: mcr p15, 0, r0, c7, c10, 1 @ clean D entry + ALT_SMP(W(nop)) @ MP extensions imply L1 PTW + ALT_UP_B(1f) + mov pc, lr +1: dcache_line_size r2, r3 +2: mcr p15, 0, r0, c7, c10, 1 @ clean D entry add r0, r0, r2 subs r1, r1, r2 - bhi 1b + bhi 2b dsb mov pc, lr ENDPROC(cpu_v7_dcache_clean_area) From 8271eb9ffaaa3390fb0478f94c88f91623b5af78 Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Tue, 23 Jul 2013 16:15:36 +0100 Subject: [PATCH 0388/1368] ARM: 7790/1: Fix deferred mm switch on VIVT processors commit bdae73cd374e28db544fdd9b77de689a36e3c129 upstream. As of commit b9d4d42ad9 (ARM: Remove __ARCH_WANT_INTERRUPTS_ON_CTXSW on pre-ARMv6 CPUs), the mm switching on VIVT processors is done in the finish_arch_post_lock_switch() function to avoid whole cache flushing with interrupts disabled. The need for deferred mm switch is stored as a thread flag (TIF_SWITCH_MM). However, with preemption enabled, we can have another thread switch before finish_arch_post_lock_switch(). If the new thread has the same mm as the previous 'next' thread, the scheduler will not call switch_mm() and the TIF_SWITCH_MM flag won't be set for the new thread. This patch moves the switch pending flag to the mm_context_t structure since this is specific to the mm rather than thread. Signed-off-by: Catalin Marinas Reported-by: Marc Kleine-Budde Tested-by: Marc Kleine-Budde Signed-off-by: Russell King Signed-off-by: Greg Kroah-Hartman --- arch/arm/include/asm/mmu.h | 2 ++ arch/arm/include/asm/mmu_context.h | 20 ++++++++++++++++---- arch/arm/include/asm/thread_info.h | 1 - 3 files changed, 18 insertions(+), 5 deletions(-) diff --git a/arch/arm/include/asm/mmu.h b/arch/arm/include/asm/mmu.h index 7345e37155d5..6f18da09668b 100644 --- a/arch/arm/include/asm/mmu.h +++ b/arch/arm/include/asm/mmu.h @@ -6,6 +6,8 @@ typedef struct { #ifdef CONFIG_CPU_HAS_ASID atomic64_t id; +#else + int switch_pending; #endif unsigned int vmalloc_seq; unsigned long sigpage; diff --git a/arch/arm/include/asm/mmu_context.h b/arch/arm/include/asm/mmu_context.h index dc90203c6ddb..e0b10f19d679 100644 --- a/arch/arm/include/asm/mmu_context.h +++ b/arch/arm/include/asm/mmu_context.h @@ -55,7 +55,7 @@ static inline void check_and_switch_context(struct mm_struct *mm, * on non-ASID CPUs, the old mm will remain valid until the * finish_arch_post_lock_switch() call. */ - set_ti_thread_flag(task_thread_info(tsk), TIF_SWITCH_MM); + mm->context.switch_pending = 1; else cpu_switch_mm(mm->pgd, mm); } @@ -64,9 +64,21 @@ static inline void check_and_switch_context(struct mm_struct *mm, finish_arch_post_lock_switch static inline void finish_arch_post_lock_switch(void) { - if (test_and_clear_thread_flag(TIF_SWITCH_MM)) { - struct mm_struct *mm = current->mm; - cpu_switch_mm(mm->pgd, mm); + struct mm_struct *mm = current->mm; + + if (mm && mm->context.switch_pending) { + /* + * Preemption must be disabled during cpu_switch_mm() as we + * have some stateful cache flush implementations. Check + * switch_pending again in case we were preempted and the + * switch to this mm was already done. + */ + preempt_disable(); + if (mm->context.switch_pending) { + mm->context.switch_pending = 0; + cpu_switch_mm(mm->pgd, mm); + } + preempt_enable_no_resched(); } } diff --git a/arch/arm/include/asm/thread_info.h b/arch/arm/include/asm/thread_info.h index 1995d1a84060..f00b5692cd9d 100644 --- a/arch/arm/include/asm/thread_info.h +++ b/arch/arm/include/asm/thread_info.h @@ -156,7 +156,6 @@ extern int vfp_restore_user_hwstate(struct user_vfp __user *, #define TIF_USING_IWMMXT 17 #define TIF_MEMDIE 18 /* is terminating due to OOM killer */ #define TIF_RESTORE_SIGMASK 20 -#define TIF_SWITCH_MM 22 /* deferred switch_mm */ #define _TIF_SIGPENDING (1 << TIF_SIGPENDING) #define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED) From 8148d0952b58d0664c3a3ddffbe6ded35ad4c790 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Thu, 25 Jul 2013 11:44:48 +0100 Subject: [PATCH 0389/1368] ARM: 7791/1: a.out: remove partial a.out support commit acfdd4b1f7590d02e9bae3b73bdbbc4a31b05d38 upstream. a.out support on ARM requires that argc, argv and envp are passed in r0-r2 respectively, which requires hacking load_aout_binary to prevent argc being clobbered by the return code. Whilst mainline kernels do set the registers up in start_thread, the aout loader has never carried the hack in mainline. Initialising the registers in this way actually goes against the libc expectations for ELF binaries, where argc, argv and envp are passed on the stack, with r0 being used to hold a pointer to an exit function for cleaning up after the dynamic linker if required. If the pointer is NULL, then it is ignored. When execing an ELF binary, Linux currently zeroes r0, then sets it to argc and then finally clobbers it with the return value of the execve syscall, so we actually end up with: r0 = 0 stack[0] = argc r1 = stack[1] = argv r2 = stack[2] = envp libc treats r1 and r2 as undefined. The clobbering of r0 by sys_execve works for user-spawned threads, but when executing an ELF binary from a kernel thread (via call_usermodehelper), the execve is performed on the ret_from_fork path, which restores r0 from the saved pt_regs, resulting in argc being presented to the C library. This has horrible consequences when the application exits, since we have an exit function registered using argc, resulting in a jump to hyperspace. This patch solves the problem by removing the partial a.out support from arch/arm/ altogether. Signed-off-by: Will Deacon Cc: Ashish Sangwan Signed-off-by: Russell King Signed-off-by: Greg Kroah-Hartman --- arch/arm/Kconfig | 1 - arch/arm/include/asm/a.out-core.h | 45 ------------------------------- arch/arm/include/asm/processor.h | 4 --- arch/arm/include/uapi/asm/Kbuild | 1 - arch/arm/include/uapi/asm/a.out.h | 34 ----------------------- 5 files changed, 85 deletions(-) delete mode 100644 arch/arm/include/asm/a.out-core.h delete mode 100644 arch/arm/include/uapi/asm/a.out.h diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index d2e1f71cc197..18a9f5ef643a 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -19,7 +19,6 @@ config ARM select GENERIC_STRNCPY_FROM_USER select GENERIC_STRNLEN_USER select HARDIRQS_SW_RESEND - select HAVE_AOUT select HAVE_ARCH_JUMP_LABEL if !XIP_KERNEL select HAVE_ARCH_KGDB select HAVE_ARCH_SECCOMP_FILTER diff --git a/arch/arm/include/asm/a.out-core.h b/arch/arm/include/asm/a.out-core.h deleted file mode 100644 index 92f10cb5c70c..000000000000 --- a/arch/arm/include/asm/a.out-core.h +++ /dev/null @@ -1,45 +0,0 @@ -/* a.out coredump register dumper - * - * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved. - * Written by David Howells (dhowells@redhat.com) - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public Licence - * as published by the Free Software Foundation; either version - * 2 of the Licence, or (at your option) any later version. - */ - -#ifndef _ASM_A_OUT_CORE_H -#define _ASM_A_OUT_CORE_H - -#ifdef __KERNEL__ - -#include -#include - -/* - * fill in the user structure for an a.out core dump - */ -static inline void aout_dump_thread(struct pt_regs *regs, struct user *dump) -{ - struct task_struct *tsk = current; - - dump->magic = CMAGIC; - dump->start_code = tsk->mm->start_code; - dump->start_stack = regs->ARM_sp & ~(PAGE_SIZE - 1); - - dump->u_tsize = (tsk->mm->end_code - tsk->mm->start_code) >> PAGE_SHIFT; - dump->u_dsize = (tsk->mm->brk - tsk->mm->start_data + PAGE_SIZE - 1) >> PAGE_SHIFT; - dump->u_ssize = 0; - - memset(dump->u_debugreg, 0, sizeof(dump->u_debugreg)); - - if (dump->start_stack < 0x04000000) - dump->u_ssize = (0x04000000 - dump->start_stack) >> PAGE_SHIFT; - - dump->regs = *regs; - dump->u_fpvalid = dump_fpu (regs, &dump->u_fp); -} - -#endif /* __KERNEL__ */ -#endif /* _ASM_A_OUT_CORE_H */ diff --git a/arch/arm/include/asm/processor.h b/arch/arm/include/asm/processor.h index 06e7d509eaac..413f3876341c 100644 --- a/arch/arm/include/asm/processor.h +++ b/arch/arm/include/asm/processor.h @@ -54,7 +54,6 @@ struct thread_struct { #define start_thread(regs,pc,sp) \ ({ \ - unsigned long *stack = (unsigned long *)sp; \ memset(regs->uregs, 0, sizeof(regs->uregs)); \ if (current->personality & ADDR_LIMIT_32BIT) \ regs->ARM_cpsr = USR_MODE; \ @@ -65,9 +64,6 @@ struct thread_struct { regs->ARM_cpsr |= PSR_ENDSTATE; \ regs->ARM_pc = pc & ~1; /* pc */ \ regs->ARM_sp = sp; /* sp */ \ - regs->ARM_r2 = stack[2]; /* r2 (envp) */ \ - regs->ARM_r1 = stack[1]; /* r1 (argv) */ \ - regs->ARM_r0 = stack[0]; /* r0 (argc) */ \ nommu_start_thread(regs); \ }) diff --git a/arch/arm/include/uapi/asm/Kbuild b/arch/arm/include/uapi/asm/Kbuild index 47bcb2d254af..18d76fd5a2af 100644 --- a/arch/arm/include/uapi/asm/Kbuild +++ b/arch/arm/include/uapi/asm/Kbuild @@ -1,7 +1,6 @@ # UAPI Header export list include include/uapi/asm-generic/Kbuild.asm -header-y += a.out.h header-y += byteorder.h header-y += fcntl.h header-y += hwcap.h diff --git a/arch/arm/include/uapi/asm/a.out.h b/arch/arm/include/uapi/asm/a.out.h deleted file mode 100644 index 083894b2e3bc..000000000000 --- a/arch/arm/include/uapi/asm/a.out.h +++ /dev/null @@ -1,34 +0,0 @@ -#ifndef __ARM_A_OUT_H__ -#define __ARM_A_OUT_H__ - -#include -#include - -struct exec -{ - __u32 a_info; /* Use macros N_MAGIC, etc for access */ - __u32 a_text; /* length of text, in bytes */ - __u32 a_data; /* length of data, in bytes */ - __u32 a_bss; /* length of uninitialized data area for file, in bytes */ - __u32 a_syms; /* length of symbol table data in file, in bytes */ - __u32 a_entry; /* start address */ - __u32 a_trsize; /* length of relocation info for text, in bytes */ - __u32 a_drsize; /* length of relocation info for data, in bytes */ -}; - -/* - * This is always the same - */ -#define N_TXTADDR(a) (0x00008000) - -#define N_TRSIZE(a) ((a).a_trsize) -#define N_DRSIZE(a) ((a).a_drsize) -#define N_SYMSIZE(a) ((a).a_syms) - -#define M_ARM 103 - -#ifndef LIBRARY_START_TEXT -#define LIBRARY_START_TEXT (0x00c00000) -#endif - -#endif /* __A_OUT_GNU_H__ */ From 027798707afb90e6f08144955813ba575743b04d Mon Sep 17 00:00:00 2001 From: Robert Jennings Date: Wed, 24 Jul 2013 20:13:21 -0500 Subject: [PATCH 0390/1368] powerpc: VPHN topology change updates all siblings commit 3be7db6ab45b21345386d1a466da133b19cde5e4 upstream. When an associativity level change is found for one thread, the siblings threads need to be updated as well. This is done today for PRRN in stage_topology_update() but is missing for VPHN in update_cpu_associativity_changes_mask(). This patch will correctly update all thread siblings during a topology change. Without this patch a topology update can result in a CPU in init_sched_groups_power() getting stuck indefinitely in a loop. This loop is built in build_sched_groups(). As a result of the thread moving to a node separate from its siblings the struct sched_group will have its next pointer set to point to itself rather than the sched_group struct of the next thread. This happens because we have a domain without the SD_OVERLAP flag, which is correct, and a topology that doesn't conform with reality (threads on the same core assigned to different numa nodes). When this list is traversed by init_sched_groups_power() it will reach the thread's sched_group structure and loop indefinitely; the cpu will be stuck at this point. The bug was exposed when VPHN was enabled in commit b7abef0 (v3.9). Reported-by: Jan Stancek Signed-off-by: Robert Jennings Signed-off-by: Benjamin Herrenschmidt Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/include/asm/smp.h | 4 +++ arch/powerpc/mm/numa.c | 57 +++++++++++++++++++++++++--------- 2 files changed, 47 insertions(+), 14 deletions(-) diff --git a/arch/powerpc/include/asm/smp.h b/arch/powerpc/include/asm/smp.h index ffbaabebcdca..48cfc858abd6 100644 --- a/arch/powerpc/include/asm/smp.h +++ b/arch/powerpc/include/asm/smp.h @@ -145,6 +145,10 @@ extern void __cpu_die(unsigned int cpu); #define smp_setup_cpu_maps() static inline void inhibit_secondary_onlining(void) {} static inline void uninhibit_secondary_onlining(void) {} +static inline const struct cpumask *cpu_sibling_mask(int cpu) +{ + return cpumask_of(cpu); +} #endif /* CONFIG_SMP */ diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c index 2859a1f52279..cafad4017765 100644 --- a/arch/powerpc/mm/numa.c +++ b/arch/powerpc/mm/numa.c @@ -27,6 +27,7 @@ #include #include #include +#include #include #include #include @@ -1319,7 +1320,8 @@ static int update_cpu_associativity_changes_mask(void) } } if (changed) { - cpumask_set_cpu(cpu, changes); + cpumask_or(changes, changes, cpu_sibling_mask(cpu)); + cpu = cpu_last_thread_sibling(cpu); } } @@ -1427,7 +1429,7 @@ static int update_cpu_topology(void *data) if (!data) return -EINVAL; - cpu = get_cpu(); + cpu = smp_processor_id(); for (update = data; update; update = update->next) { if (cpu != update->cpu) @@ -1447,12 +1449,12 @@ static int update_cpu_topology(void *data) */ int arch_update_cpu_topology(void) { - unsigned int cpu, changed = 0; + unsigned int cpu, sibling, changed = 0; struct topology_update_data *updates, *ud; unsigned int associativity[VPHN_ASSOC_BUFSIZE] = {0}; cpumask_t updated_cpus; struct device *dev; - int weight, i = 0; + int weight, new_nid, i = 0; weight = cpumask_weight(&cpu_associativity_changes_mask); if (!weight) @@ -1465,19 +1467,46 @@ int arch_update_cpu_topology(void) cpumask_clear(&updated_cpus); for_each_cpu(cpu, &cpu_associativity_changes_mask) { - ud = &updates[i++]; - ud->cpu = cpu; + /* + * If siblings aren't flagged for changes, updates list + * will be too short. Skip on this update and set for next + * update. + */ + if (!cpumask_subset(cpu_sibling_mask(cpu), + &cpu_associativity_changes_mask)) { + pr_info("Sibling bits not set for associativity " + "change, cpu%d\n", cpu); + cpumask_or(&cpu_associativity_changes_mask, + &cpu_associativity_changes_mask, + cpu_sibling_mask(cpu)); + cpu = cpu_last_thread_sibling(cpu); + continue; + } + + /* Use associativity from first thread for all siblings */ vphn_get_associativity(cpu, associativity); - ud->new_nid = associativity_to_nid(associativity); + new_nid = associativity_to_nid(associativity); + if (new_nid < 0 || !node_online(new_nid)) + new_nid = first_online_node; - if (ud->new_nid < 0 || !node_online(ud->new_nid)) - ud->new_nid = first_online_node; + if (new_nid == numa_cpu_lookup_table[cpu]) { + cpumask_andnot(&cpu_associativity_changes_mask, + &cpu_associativity_changes_mask, + cpu_sibling_mask(cpu)); + cpu = cpu_last_thread_sibling(cpu); + continue; + } - ud->old_nid = numa_cpu_lookup_table[cpu]; - cpumask_set_cpu(cpu, &updated_cpus); - - if (i < weight) - ud->next = &updates[i]; + for_each_cpu(sibling, cpu_sibling_mask(cpu)) { + ud = &updates[i++]; + ud->cpu = sibling; + ud->new_nid = new_nid; + ud->old_nid = numa_cpu_lookup_table[sibling]; + cpumask_set_cpu(sibling, &updated_cpus); + if (i < weight) + ud->next = &updates[i]; + } + cpu = cpu_last_thread_sibling(cpu); } stop_machine(update_cpu_topology, &updates[0], &updated_cpus); From 6a3f7e9ec5c41da18fa1eec3cb9a2d3f2e5e14e3 Mon Sep 17 00:00:00 2001 From: Alex Ivanov Date: Wed, 10 Jul 2013 21:14:55 +0200 Subject: [PATCH 0391/1368] parisc: agp/parisc-agp: allow binding of user memory to the AGP GART MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 06f0cce43a32bd2357cea1d8733bba48693d556b upstream. Allow binding of user memory to the AGP GART on systems with HP Quicksilver AGP bus. This resolves 'bind memory failed' error seen in dmesg: [29.365973] [TTM] AGP Bind memory failed. … [29.367030] [drm] Forcing AGP to PCI mode The system doesn't more fail to bind the memory, and hence not falling back to the PCI mode (if other failures aren't detected). This is just a simple write down from the following patches: agp/amd-k7: Allow binding user memory to the AGP GART agp/hp-agp: Allow binding user memory to the AGP GART Signed-off-by: Alex Ivanov Signed-off-by: Helge Deller Signed-off-by: Greg Kroah-Hartman --- drivers/char/agp/parisc-agp.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/char/agp/parisc-agp.c b/drivers/char/agp/parisc-agp.c index 94821ab01c6d..9576fad5d71c 100644 --- a/drivers/char/agp/parisc-agp.c +++ b/drivers/char/agp/parisc-agp.c @@ -129,7 +129,8 @@ parisc_agp_insert_memory(struct agp_memory *mem, off_t pg_start, int type) off_t j, io_pg_start; int io_pg_count; - if (type != 0 || mem->type != 0) { + if (type != mem->type || + agp_bridge->driver->agp_type_to_mask_type(agp_bridge, type)) { return -EINVAL; } @@ -175,7 +176,8 @@ parisc_agp_remove_memory(struct agp_memory *mem, off_t pg_start, int type) struct _parisc_agp_info *info = &parisc_agp_info; int i, io_pg_start, io_pg_count; - if (type != 0 || mem->type != 0) { + if (type != mem->type || + agp_bridge->driver->agp_type_to_mask_type(agp_bridge, type)) { return -EINVAL; } From b81ed4057ca3d8a069b1d7f7919263b31776d208 Mon Sep 17 00:00:00 2001 From: John David Anglin Date: Tue, 23 Jul 2013 12:27:52 -0400 Subject: [PATCH 0392/1368] parisc: Fix cache routines to ignore vma's with an invalid pfn commit 50861f5a02dbf939c27d35a26c472885e2844188 upstream. The parisc architecture does not have a pte special bit. As a result, special mappings are handled with the VM_PFNMAP and VM_MIXEDMAP flags. VM_MIXEDMAP mappings may or may not have a "struct page" backing. When pfn_valid() is false, there is no "struct page" backing. Otherwise, they are treated as normal pages. The FireGL driver uses the VM_MIXEDMAP without a backing "struct page". This treatment caused a panic due to a TLB data miss in update_mmu_cache. This appeared to be in the code generated for page_address(). We were in fact using a very circular bit of code to determine the physical address of the PFN in various cache routines. This wasn't valid when there was no "struct page" backing. The needed address can in fact be determined simply from the PFN itself without using the "struct page". The attached patch updates update_mmu_cache(), flush_cache_mm(), flush_cache_range() and flush_cache_page() to check pfn_valid() and to directly compute the PFN physical and virtual addresses. Signed-off-by: John David Anglin Signed-off-by: Helge Deller Signed-off-by: Greg Kroah-Hartman --- arch/parisc/kernel/cache.c | 133 +++++++++++++++++++------------------ 1 file changed, 70 insertions(+), 63 deletions(-) diff --git a/arch/parisc/kernel/cache.c b/arch/parisc/kernel/cache.c index 2e65aa54bd10..c035673209f7 100644 --- a/arch/parisc/kernel/cache.c +++ b/arch/parisc/kernel/cache.c @@ -71,18 +71,27 @@ flush_cache_all_local(void) } EXPORT_SYMBOL(flush_cache_all_local); +/* Virtual address of pfn. */ +#define pfn_va(pfn) __va(PFN_PHYS(pfn)) + void update_mmu_cache(struct vm_area_struct *vma, unsigned long address, pte_t *ptep) { - struct page *page = pte_page(*ptep); + unsigned long pfn = pte_pfn(*ptep); + struct page *page; - if (pfn_valid(page_to_pfn(page)) && page_mapping(page) && - test_bit(PG_dcache_dirty, &page->flags)) { + /* We don't have pte special. As a result, we can be called with + an invalid pfn and we don't need to flush the kernel dcache page. + This occurs with FireGL card in C8000. */ + if (!pfn_valid(pfn)) + return; - flush_kernel_dcache_page(page); + page = pfn_to_page(pfn); + if (page_mapping(page) && test_bit(PG_dcache_dirty, &page->flags)) { + flush_kernel_dcache_page_addr(pfn_va(pfn)); clear_bit(PG_dcache_dirty, &page->flags); } else if (parisc_requires_coherency()) - flush_kernel_dcache_page(page); + flush_kernel_dcache_page_addr(pfn_va(pfn)); } void @@ -495,44 +504,42 @@ static inline pte_t *get_ptep(pgd_t *pgd, unsigned long addr) void flush_cache_mm(struct mm_struct *mm) { + struct vm_area_struct *vma; + pgd_t *pgd; + /* Flushing the whole cache on each cpu takes forever on rp3440, etc. So, avoid it if the mm isn't too big. */ - if (mm_total_size(mm) < parisc_cache_flush_threshold) { - struct vm_area_struct *vma; + if (mm_total_size(mm) >= parisc_cache_flush_threshold) { + flush_cache_all(); + return; + } - if (mm->context == mfsp(3)) { - for (vma = mm->mmap; vma; vma = vma->vm_next) { - flush_user_dcache_range_asm(vma->vm_start, - vma->vm_end); - if (vma->vm_flags & VM_EXEC) - flush_user_icache_range_asm( - vma->vm_start, vma->vm_end); - } - } else { - pgd_t *pgd = mm->pgd; - - for (vma = mm->mmap; vma; vma = vma->vm_next) { - unsigned long addr; - - for (addr = vma->vm_start; addr < vma->vm_end; - addr += PAGE_SIZE) { - pte_t *ptep = get_ptep(pgd, addr); - if (ptep != NULL) { - pte_t pte = *ptep; - __flush_cache_page(vma, addr, - page_to_phys(pte_page(pte))); - } - } - } + if (mm->context == mfsp(3)) { + for (vma = mm->mmap; vma; vma = vma->vm_next) { + flush_user_dcache_range_asm(vma->vm_start, vma->vm_end); + if ((vma->vm_flags & VM_EXEC) == 0) + continue; + flush_user_icache_range_asm(vma->vm_start, vma->vm_end); } return; } -#ifdef CONFIG_SMP - flush_cache_all(); -#else - flush_cache_all_local(); -#endif + pgd = mm->pgd; + for (vma = mm->mmap; vma; vma = vma->vm_next) { + unsigned long addr; + + for (addr = vma->vm_start; addr < vma->vm_end; + addr += PAGE_SIZE) { + unsigned long pfn; + pte_t *ptep = get_ptep(pgd, addr); + if (!ptep) + continue; + pfn = pte_pfn(*ptep); + if (!pfn_valid(pfn)) + continue; + __flush_cache_page(vma, addr, PFN_PHYS(pfn)); + } + } } void @@ -556,33 +563,32 @@ flush_user_icache_range(unsigned long start, unsigned long end) void flush_cache_range(struct vm_area_struct *vma, unsigned long start, unsigned long end) { + unsigned long addr; + pgd_t *pgd; + BUG_ON(!vma->vm_mm->context); - if ((end - start) < parisc_cache_flush_threshold) { - if (vma->vm_mm->context == mfsp(3)) { - flush_user_dcache_range_asm(start, end); - if (vma->vm_flags & VM_EXEC) - flush_user_icache_range_asm(start, end); - } else { - unsigned long addr; - pgd_t *pgd = vma->vm_mm->pgd; - - for (addr = start & PAGE_MASK; addr < end; - addr += PAGE_SIZE) { - pte_t *ptep = get_ptep(pgd, addr); - if (ptep != NULL) { - pte_t pte = *ptep; - flush_cache_page(vma, - addr, pte_pfn(pte)); - } - } - } - } else { -#ifdef CONFIG_SMP + if ((end - start) >= parisc_cache_flush_threshold) { flush_cache_all(); -#else - flush_cache_all_local(); -#endif + return; + } + + if (vma->vm_mm->context == mfsp(3)) { + flush_user_dcache_range_asm(start, end); + if (vma->vm_flags & VM_EXEC) + flush_user_icache_range_asm(start, end); + return; + } + + pgd = vma->vm_mm->pgd; + for (addr = start & PAGE_MASK; addr < end; addr += PAGE_SIZE) { + unsigned long pfn; + pte_t *ptep = get_ptep(pgd, addr); + if (!ptep) + continue; + pfn = pte_pfn(*ptep); + if (pfn_valid(pfn)) + __flush_cache_page(vma, addr, PFN_PHYS(pfn)); } } @@ -591,9 +597,10 @@ flush_cache_page(struct vm_area_struct *vma, unsigned long vmaddr, unsigned long { BUG_ON(!vma->vm_mm->context); - flush_tlb_page(vma, vmaddr); - __flush_cache_page(vma, vmaddr, page_to_phys(pfn_to_page(pfn))); - + if (pfn_valid(pfn)) { + flush_tlb_page(vma, vmaddr); + __flush_cache_page(vma, vmaddr, PFN_PHYS(pfn)); + } } #ifdef CONFIG_PARISC_TMPALIAS From 8a08a2ffd5719ed580862a2e525b8615a8b56a6d Mon Sep 17 00:00:00 2001 From: Thomas Bogendoerfer Date: Tue, 30 Jul 2013 02:02:16 +0200 Subject: [PATCH 0393/1368] parisc: Fix interrupt routing for C8000 serial ports commit dd5e6d6a3db09b16b7c222943977865eead88cc3 upstream. We can't use dev->mod_index for selecting the interrupt routing entry, because it's not an index into interrupt routing table. It will be even wrong on a machine with 2 CPUs (4 cores). But all needed information is contained in the PAT entries for the serial ports. mod[0] contains the iosapic address and mod_info has some indications for the interrupt input (at least it looks like it). This patch implements the searching for the right iosapic and uses this interrupt input information. Signed-off-by: Thomas Bogendoerfer Signed-off-by: Helge Deller Signed-off-by: Greg Kroah-Hartman --- arch/parisc/include/asm/parisc-device.h | 3 ++ arch/parisc/kernel/inventory.c | 1 + drivers/parisc/iosapic.c | 38 ++++++++++++++++++------- drivers/tty/serial/8250/8250_gsc.c | 3 +- 4 files changed, 32 insertions(+), 13 deletions(-) diff --git a/arch/parisc/include/asm/parisc-device.h b/arch/parisc/include/asm/parisc-device.h index 9afdad6c2ffb..eaf4dc1c7294 100644 --- a/arch/parisc/include/asm/parisc-device.h +++ b/arch/parisc/include/asm/parisc-device.h @@ -23,6 +23,7 @@ struct parisc_device { /* generic info returned from pdc_pat_cell_module() */ unsigned long mod_info; /* PAT specific - Misc Module info */ unsigned long pmod_loc; /* physical Module location */ + unsigned long mod0; #endif u64 dma_mask; /* DMA mask for I/O */ struct device dev; @@ -61,4 +62,6 @@ parisc_get_drvdata(struct parisc_device *d) extern struct bus_type parisc_bus_type; +int iosapic_serial_irq(struct parisc_device *dev); + #endif /*_ASM_PARISC_PARISC_DEVICE_H_*/ diff --git a/arch/parisc/kernel/inventory.c b/arch/parisc/kernel/inventory.c index 3295ef4a185d..f0b6722fc706 100644 --- a/arch/parisc/kernel/inventory.c +++ b/arch/parisc/kernel/inventory.c @@ -211,6 +211,7 @@ pat_query_module(ulong pcell_loc, ulong mod_index) /* REVISIT: who is the consumer of this? not sure yet... */ dev->mod_info = pa_pdc_cell->mod_info; /* pass to PAT_GET_ENTITY() */ dev->pmod_loc = pa_pdc_cell->mod_location; + dev->mod0 = pa_pdc_cell->mod[0]; register_parisc_device(dev); /* advertise device */ diff --git a/drivers/parisc/iosapic.c b/drivers/parisc/iosapic.c index e79e006eb9ab..9ee04b4b68bf 100644 --- a/drivers/parisc/iosapic.c +++ b/drivers/parisc/iosapic.c @@ -811,18 +811,28 @@ int iosapic_fixup_irq(void *isi_obj, struct pci_dev *pcidev) return pcidev->irq; } -static struct iosapic_info *first_isi = NULL; +static struct iosapic_info *iosapic_list; #ifdef CONFIG_64BIT -int iosapic_serial_irq(int num) +int iosapic_serial_irq(struct parisc_device *dev) { - struct iosapic_info *isi = first_isi; - struct irt_entry *irte = NULL; /* only used if PAT PDC */ + struct iosapic_info *isi; + struct irt_entry *irte; struct vector_info *vi; - int isi_line; /* line used by device */ + int cnt; + int intin; + + intin = (dev->mod_info >> 24) & 15; /* lookup IRT entry for isi/slot/pin set */ - irte = &irt_cell[num]; + for (cnt = 0; cnt < irt_num_entry; cnt++) { + irte = &irt_cell[cnt]; + if (COMPARE_IRTE_ADDR(irte, dev->mod0) && + irte->dest_iosapic_intin == intin) + break; + } + if (cnt >= irt_num_entry) + return 0; /* no irq found, force polling */ DBG_IRT("iosapic_serial_irq(): irte %p %x %x %x %x %x %x %x %x\n", irte, @@ -834,11 +844,17 @@ int iosapic_serial_irq(int num) irte->src_seg_id, irte->dest_iosapic_intin, (u32) irte->dest_iosapic_addr); - isi_line = irte->dest_iosapic_intin; + + /* search for iosapic */ + for (isi = iosapic_list; isi; isi = isi->isi_next) + if (isi->isi_hpa == dev->mod0) + break; + if (!isi) + return 0; /* no iosapic found, force polling */ /* get vector info for this input line */ - vi = isi->isi_vector + isi_line; - DBG_IRT("iosapic_serial_irq: line %d vi 0x%p\n", isi_line, vi); + vi = isi->isi_vector + intin; + DBG_IRT("iosapic_serial_irq: line %d vi 0x%p\n", iosapic_intin, vi); /* If this IRQ line has already been setup, skip it */ if (vi->irte) @@ -941,8 +957,8 @@ void *iosapic_register(unsigned long hpa) vip->irqline = (unsigned char) cnt; vip->iosapic = isi; } - if (!first_isi) - first_isi = isi; + isi->isi_next = iosapic_list; + iosapic_list = isi; return isi; } diff --git a/drivers/tty/serial/8250/8250_gsc.c b/drivers/tty/serial/8250/8250_gsc.c index bb91b4713ebd..2e3ea1a70d7b 100644 --- a/drivers/tty/serial/8250/8250_gsc.c +++ b/drivers/tty/serial/8250/8250_gsc.c @@ -31,9 +31,8 @@ static int __init serial_init_chip(struct parisc_device *dev) int err; #ifdef CONFIG_64BIT - extern int iosapic_serial_irq(int cellnum); if (!dev->irq && (dev->id.sversion == 0xad)) - dev->irq = iosapic_serial_irq(dev->mod_index-1); + dev->irq = iosapic_serial_irq(dev); #endif if (!dev->irq) { From 69325d97cb47677902ce3e873aeb1fe4abe7830f Mon Sep 17 00:00:00 2001 From: Vivien Didelot Date: Tue, 30 Jul 2013 17:14:34 -0400 Subject: [PATCH 0394/1368] hwmon: (max6697) fix MAX6581 ideality commit 5c52add19733eb36d8619713312f5604efef3502 upstream. Without this patch, the values for ideality (register 0x4b) and ideality selection mask (register 0x4c) are inverted. Signed-off-by: Vivien Didelot Signed-off-by: Guenter Roeck Signed-off-by: Greg Kroah-Hartman --- drivers/hwmon/max6697.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/hwmon/max6697.c b/drivers/hwmon/max6697.c index 328fb0353c17..a41b5f3fc506 100644 --- a/drivers/hwmon/max6697.c +++ b/drivers/hwmon/max6697.c @@ -605,12 +605,12 @@ static int max6697_init_chip(struct i2c_client *client) if (ret < 0) return ret; ret = i2c_smbus_write_byte_data(client, MAX6581_REG_IDEALITY, - pdata->ideality_mask >> 1); + pdata->ideality_value); if (ret < 0) return ret; ret = i2c_smbus_write_byte_data(client, MAX6581_REG_IDEALITY_SELECT, - pdata->ideality_value); + pdata->ideality_mask >> 1); if (ret < 0) return ret; } From c6bffb8ffe2b9e42b09f4960cc623cecf2e9d778 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Thu, 1 Aug 2013 08:38:27 +0200 Subject: [PATCH 0395/1368] ALSA: hda - Fix missing fixup for Mac Mini with STAC9221 commit 697aebab78a88c6b164cfb74d19b86817d2ccd82 upstream. A fixup for Apple Mac Mini was lost during the adaption to the generic parser because the fallback for the generic ID 8384:7680 was dropped, and it resulted in the silence output (and maybe other problems). Unfortunately, just adding the missing subsystem ID wasn't enough, in this case. The subsystem ID of this machine is 0000:0100 (what Apple thought...?), and since snd_hda_pick_fixup() doesn't take the vendor id zero into account, the driver ignored this entry. Now it's fixed to regard the vendor id zero as a valid value. Reported-and-tested-by: Linus Torvalds Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/hda_auto_parser.c | 2 +- sound/pci/hda/patch_sigmatel.c | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/sound/pci/hda/hda_auto_parser.c b/sound/pci/hda/hda_auto_parser.c index 7c11d46b84d3..48a9d004d6d9 100644 --- a/sound/pci/hda/hda_auto_parser.c +++ b/sound/pci/hda/hda_auto_parser.c @@ -860,7 +860,7 @@ void snd_hda_pick_fixup(struct hda_codec *codec, } } if (id < 0 && quirk) { - for (q = quirk; q->subvendor; q++) { + for (q = quirk; q->subvendor || q->subdevice; q++) { unsigned int vendorid = q->subdevice | (q->subvendor << 16); unsigned int mask = 0xffff0000 | q->subdevice_mask; diff --git a/sound/pci/hda/patch_sigmatel.c b/sound/pci/hda/patch_sigmatel.c index e849e1e0d7d9..dc4833f47a2b 100644 --- a/sound/pci/hda/patch_sigmatel.c +++ b/sound/pci/hda/patch_sigmatel.c @@ -2815,6 +2815,7 @@ static const struct hda_pintbl ecs202_pin_configs[] = { /* codec SSIDs for Intel Mac sharing the same PCI SSID 8384:7680 */ static const struct snd_pci_quirk stac922x_intel_mac_fixup_tbl[] = { + SND_PCI_QUIRK(0x0000, 0x0100, "Mac Mini", STAC_INTEL_MAC_V3), SND_PCI_QUIRK(0x106b, 0x0800, "Mac", STAC_INTEL_MAC_V1), SND_PCI_QUIRK(0x106b, 0x0600, "Mac", STAC_INTEL_MAC_V2), SND_PCI_QUIRK(0x106b, 0x0700, "Mac", STAC_INTEL_MAC_V2), From d54d012b636fe58770c4a58eda41de13fac40a0e Mon Sep 17 00:00:00 2001 From: Vinod Koul Date: Mon, 29 Jul 2013 15:10:22 +0530 Subject: [PATCH 0396/1368] ALSA: compress: fix the return value for SNDRV_COMPRESS_VERSION commit a8d30608eaed6cc759b8e2e8a8bbbb42591f797f upstream. the return value of SNDRV_COMPRESS_VERSION always return default -ENOTTY as the return value was never updated for this call assign return value from put_user() Reported-by: Haynes Signed-off-by: Vinod Koul Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/core/compress_offload.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/core/compress_offload.c b/sound/core/compress_offload.c index 99db892d7299..98969541cbcc 100644 --- a/sound/core/compress_offload.c +++ b/sound/core/compress_offload.c @@ -743,7 +743,7 @@ static long snd_compr_ioctl(struct file *f, unsigned int cmd, unsigned long arg) mutex_lock(&stream->device->lock); switch (_IOC_NR(cmd)) { case _IOC_NR(SNDRV_COMPRESS_IOCTL_VERSION): - put_user(SNDRV_COMPRESS_VERSION, + retval = put_user(SNDRV_COMPRESS_VERSION, (int __user *)arg) ? -EFAULT : 0; break; case _IOC_NR(SNDRV_COMPRESS_GET_CAPS): From 15406011c343c3062cb2aa50b6173dfd87bb951b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Thu, 4 Jul 2013 11:28:51 +0200 Subject: [PATCH 0397/1368] serial/mxs-auart: fix race condition in interrupt handler MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit d970d7fe65adff5efe75b4a73c4ffc9be57089f7 upstream. The handler needs to ack the pending events before actually handling them. Otherwise a new event might come in after it it considered non-pending or handled and is acked then without being handled. So this event is only noticed when the next interrupt happens. Without this patch an i.MX28 based machine running an rt-patched kernel regularly hangs during boot. Signed-off-by: Uwe Kleine-König Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/mxs-auart.c | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/drivers/tty/serial/mxs-auart.c b/drivers/tty/serial/mxs-auart.c index 4f5f161896a1..3de0fb2712f5 100644 --- a/drivers/tty/serial/mxs-auart.c +++ b/drivers/tty/serial/mxs-auart.c @@ -678,11 +678,18 @@ static void mxs_auart_settermios(struct uart_port *u, static irqreturn_t mxs_auart_irq_handle(int irq, void *context) { - u32 istatus, istat; + u32 istat; struct mxs_auart_port *s = context; u32 stat = readl(s->port.membase + AUART_STAT); - istatus = istat = readl(s->port.membase + AUART_INTR); + istat = readl(s->port.membase + AUART_INTR); + + /* ack irq */ + writel(istat & (AUART_INTR_RTIS + | AUART_INTR_TXIS + | AUART_INTR_RXIS + | AUART_INTR_CTSMIS), + s->port.membase + AUART_INTR_CLR); if (istat & AUART_INTR_CTSMIS) { uart_handle_cts_change(&s->port, stat & AUART_STAT_CTS); @@ -702,12 +709,6 @@ static irqreturn_t mxs_auart_irq_handle(int irq, void *context) istat &= ~AUART_INTR_TXIS; } - writel(istatus & (AUART_INTR_RTIS - | AUART_INTR_TXIS - | AUART_INTR_RXIS - | AUART_INTR_CTSMIS), - s->port.membase + AUART_INTR_CLR); - return IRQ_HANDLED; } From 9ce440addcbede83e6eb6ad944eef8a894883552 Mon Sep 17 00:00:00 2001 From: Axel Lin Date: Sun, 21 Jul 2013 10:14:15 +0800 Subject: [PATCH 0398/1368] serial: arc_uart: Fix module alias commit d5a12ea7a9e58d9e5c19d25cb668aadb396423ec upstream. Platform drivers use "platform:" prefix in module alias. Also use DRIVER_NAME in MODULE_ALIAS to make module autoloading work. Signed-off-by: Axel Lin Acked-by: Vineet Gupta Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/arc_uart.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/tty/serial/arc_uart.c b/drivers/tty/serial/arc_uart.c index cbf1d155b7b2..22f280aa4f2c 100644 --- a/drivers/tty/serial/arc_uart.c +++ b/drivers/tty/serial/arc_uart.c @@ -773,6 +773,6 @@ module_init(arc_serial_init); module_exit(arc_serial_exit); MODULE_LICENSE("GPL"); -MODULE_ALIAS("plat-arcfpga/uart"); +MODULE_ALIAS("platform:" DRIVER_NAME); MODULE_AUTHOR("Vineet Gupta"); MODULE_DESCRIPTION("ARC(Synopsys) On-Chip(fpga) serial driver"); From 17e94a63470ddbd191bcc386f55f62987b58f912 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Fri, 28 Jun 2013 11:49:41 +0200 Subject: [PATCH 0399/1368] serial/mxs-auart: increase time to wait for transmitter to become idle MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 079a036f4283e2b0e5c26080b8c5112bc0cc1831 upstream. Without this patch the driver waits ~1 ms for the UART to become idle. At 115200n8 this time is (theoretically) enough to transfer 11.5 characters (= 115200 bits/s / (10 Bits/char) * 1ms). As the mxs-auart has a fifo size of 16 characters the clock is gated too early. The problem is worse for lower baud rates. This only happens to really shut down the transmitter in the middle of a transfer if /dev/ttyAPPx isn't opened in userspace (e.g. by a getty) but was at least once (because the bootloader doesn't disable the transmitter). So increase the timeout to 20 ms which should be enough for 9600n8, too. Moreover skip gating the clock if the timeout is elapsed. Signed-off-by: Uwe Kleine-König Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/mxs-auart.c | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/drivers/tty/serial/mxs-auart.c b/drivers/tty/serial/mxs-auart.c index 3de0fb2712f5..f85b8e6d0346 100644 --- a/drivers/tty/serial/mxs-auart.c +++ b/drivers/tty/serial/mxs-auart.c @@ -851,7 +851,7 @@ auart_console_write(struct console *co, const char *str, unsigned int count) struct mxs_auart_port *s; struct uart_port *port; unsigned int old_ctrl0, old_ctrl2; - unsigned int to = 1000; + unsigned int to = 20000; if (co->index >= MXS_AUART_PORTS || co->index < 0) return; @@ -872,18 +872,23 @@ auart_console_write(struct console *co, const char *str, unsigned int count) uart_console_write(port, str, count, mxs_auart_console_putchar); - /* - * Finally, wait for transmitter to become empty - * and restore the TCR - */ + /* Finally, wait for transmitter to become empty ... */ while (readl(port->membase + AUART_STAT) & AUART_STAT_BUSY) { + udelay(1); if (!to--) break; - udelay(1); } - writel(old_ctrl0, port->membase + AUART_CTRL0); - writel(old_ctrl2, port->membase + AUART_CTRL2); + /* + * ... and restore the TCR if we waited long enough for the transmitter + * to be idle. This might keep the transmitter enabled although it is + * unused, but that is better than to disable it while it is still + * transmitting. + */ + if (!(readl(port->membase + AUART_STAT) & AUART_STAT_BUSY)) { + writel(old_ctrl0, port->membase + AUART_CTRL0); + writel(old_ctrl2, port->membase + AUART_CTRL2); + } clk_disable(s->clk); } From 9b0f8f01765acdf7c84078b63d2e34c2b2dcafa8 Mon Sep 17 00:00:00 2001 From: Lars-Peter Clausen Date: Tue, 23 Jul 2013 10:24:50 +0200 Subject: [PATCH 0400/1368] dma: pl330: Fix cyclic transfers commit fc51446021f42aca8906e701fc2292965aafcb15 upstream. Allocate a descriptor for each period of a cyclic transfer, not just the first. Also since the callback needs to be called for each finished period make sure to initialize the callback and callback_param fields of each descriptor in a cyclic transfer. Signed-off-by: Lars-Peter Clausen Signed-off-by: Vinod Koul Signed-off-by: Greg Kroah-Hartman --- drivers/dma/pl330.c | 93 ++++++++++++++++++++++++++++++++------------- 1 file changed, 67 insertions(+), 26 deletions(-) diff --git a/drivers/dma/pl330.c b/drivers/dma/pl330.c index 7ec82f0667eb..4c2f465be339 100644 --- a/drivers/dma/pl330.c +++ b/drivers/dma/pl330.c @@ -2527,6 +2527,10 @@ static dma_cookie_t pl330_tx_submit(struct dma_async_tx_descriptor *tx) /* Assign cookies to all nodes */ while (!list_empty(&last->node)) { desc = list_entry(last->node.next, struct dma_pl330_desc, node); + if (pch->cyclic) { + desc->txd.callback = last->txd.callback; + desc->txd.callback_param = last->txd.callback_param; + } dma_cookie_assign(&desc->txd); @@ -2710,45 +2714,82 @@ static struct dma_async_tx_descriptor *pl330_prep_dma_cyclic( size_t period_len, enum dma_transfer_direction direction, unsigned long flags, void *context) { - struct dma_pl330_desc *desc; + struct dma_pl330_desc *desc = NULL, *first = NULL; struct dma_pl330_chan *pch = to_pchan(chan); + struct dma_pl330_dmac *pdmac = pch->dmac; + unsigned int i; dma_addr_t dst; dma_addr_t src; - desc = pl330_get_desc(pch); - if (!desc) { - dev_err(pch->dmac->pif.dev, "%s:%d Unable to fetch desc\n", - __func__, __LINE__); + if (len % period_len != 0) return NULL; - } - switch (direction) { - case DMA_MEM_TO_DEV: - desc->rqcfg.src_inc = 1; - desc->rqcfg.dst_inc = 0; - desc->req.rqtype = MEMTODEV; - src = dma_addr; - dst = pch->fifo_addr; - break; - case DMA_DEV_TO_MEM: - desc->rqcfg.src_inc = 0; - desc->rqcfg.dst_inc = 1; - desc->req.rqtype = DEVTOMEM; - src = pch->fifo_addr; - dst = dma_addr; - break; - default: + if (!is_slave_direction(direction)) { dev_err(pch->dmac->pif.dev, "%s:%d Invalid dma direction\n", __func__, __LINE__); return NULL; } - desc->rqcfg.brst_size = pch->burst_sz; - desc->rqcfg.brst_len = 1; + for (i = 0; i < len / period_len; i++) { + desc = pl330_get_desc(pch); + if (!desc) { + dev_err(pch->dmac->pif.dev, "%s:%d Unable to fetch desc\n", + __func__, __LINE__); + + if (!first) + return NULL; + + spin_lock_irqsave(&pdmac->pool_lock, flags); + + while (!list_empty(&first->node)) { + desc = list_entry(first->node.next, + struct dma_pl330_desc, node); + list_move_tail(&desc->node, &pdmac->desc_pool); + } + + list_move_tail(&first->node, &pdmac->desc_pool); + + spin_unlock_irqrestore(&pdmac->pool_lock, flags); + + return NULL; + } + + switch (direction) { + case DMA_MEM_TO_DEV: + desc->rqcfg.src_inc = 1; + desc->rqcfg.dst_inc = 0; + desc->req.rqtype = MEMTODEV; + src = dma_addr; + dst = pch->fifo_addr; + break; + case DMA_DEV_TO_MEM: + desc->rqcfg.src_inc = 0; + desc->rqcfg.dst_inc = 1; + desc->req.rqtype = DEVTOMEM; + src = pch->fifo_addr; + dst = dma_addr; + break; + default: + break; + } + + desc->rqcfg.brst_size = pch->burst_sz; + desc->rqcfg.brst_len = 1; + fill_px(&desc->px, dst, src, period_len); + + if (!first) + first = desc; + else + list_add_tail(&desc->node, &first->node); + + dma_addr += period_len; + } + + if (!desc) + return NULL; pch->cyclic = true; - - fill_px(&desc->px, dst, src, period_len); + desc->txd.flags = flags; return &desc->txd; } From d614a64a0193ecc3c8d19774e9143173ad11af12 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Fri, 26 Jul 2013 11:55:17 +0200 Subject: [PATCH 0401/1368] USB: mos7840: fix race in register handling commit d8a083cc746664916d9d36ed9e4d08a29525f245 upstream. Fix race in mos7840_get_reg which unconditionally manipulated the control urb (which may already be in use) by adding a control-urb busy flag. Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/mos7840.c | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/drivers/usb/serial/mos7840.c b/drivers/usb/serial/mos7840.c index 62b86a685082..4bf9223f9244 100644 --- a/drivers/usb/serial/mos7840.c +++ b/drivers/usb/serial/mos7840.c @@ -183,6 +183,10 @@ #define LED_ON_MS 500 #define LED_OFF_MS 500 +enum mos7840_flag { + MOS7840_FLAG_CTRL_BUSY, +}; + static int device_type; static const struct usb_device_id id_table[] = { @@ -241,6 +245,8 @@ struct moschip_port { bool led_flag; struct timer_list led_timer1; /* Timer for LED on */ struct timer_list led_timer2; /* Timer for LED off */ + + unsigned long flags; }; /* @@ -467,10 +473,10 @@ static void mos7840_control_callback(struct urb *urb) case -ESHUTDOWN: /* this urb is terminated, clean up */ dev_dbg(dev, "%s - urb shutting down with status: %d\n", __func__, status); - return; + goto out; default: dev_dbg(dev, "%s - nonzero urb status received: %d\n", __func__, status); - return; + goto out; } dev_dbg(dev, "%s urb buffer size is %d\n", __func__, urb->actual_length); @@ -483,6 +489,8 @@ static void mos7840_control_callback(struct urb *urb) mos7840_handle_new_msr(mos7840_port, regval); else if (mos7840_port->MsrLsr == 1) mos7840_handle_new_lsr(mos7840_port, regval); +out: + clear_bit_unlock(MOS7840_FLAG_CTRL_BUSY, &mos7840_port->flags); } static int mos7840_get_reg(struct moschip_port *mcs, __u16 Wval, __u16 reg, @@ -493,6 +501,9 @@ static int mos7840_get_reg(struct moschip_port *mcs, __u16 Wval, __u16 reg, unsigned char *buffer = mcs->ctrl_buf; int ret; + if (test_and_set_bit_lock(MOS7840_FLAG_CTRL_BUSY, &mcs->flags)) + return -EBUSY; + dr->bRequestType = MCS_RD_RTYPE; dr->bRequest = MCS_RDREQ; dr->wValue = cpu_to_le16(Wval); /* 0 */ @@ -504,6 +515,9 @@ static int mos7840_get_reg(struct moschip_port *mcs, __u16 Wval, __u16 reg, mos7840_control_callback, mcs); mcs->control_urb->transfer_buffer_length = 2; ret = usb_submit_urb(mcs->control_urb, GFP_ATOMIC); + if (ret) + clear_bit_unlock(MOS7840_FLAG_CTRL_BUSY, &mcs->flags); + return ret; } From ed183fba4ecbc327570cd0655916cebaae918dcd Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Fri, 26 Jul 2013 11:55:18 +0200 Subject: [PATCH 0402/1368] USB: mos7840: fix device-type detection commit 40c24f2893ba0ba7df485871f6aac0c197ceef5b upstream. Fix race in device-type detection introduced by commit 0eafe4de ("USB: serial: mos7840: add support for MCS7810 devices") which used a static variable to hold the device type. Move type detection to probe and use serial data to store the device type. Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/mos7840.c | 75 +++++++++++++++++------------------- 1 file changed, 35 insertions(+), 40 deletions(-) diff --git a/drivers/usb/serial/mos7840.c b/drivers/usb/serial/mos7840.c index 4bf9223f9244..5f3b1ba38241 100644 --- a/drivers/usb/serial/mos7840.c +++ b/drivers/usb/serial/mos7840.c @@ -187,8 +187,6 @@ enum mos7840_flag { MOS7840_FLAG_CTRL_BUSY, }; -static int device_type; - static const struct usb_device_id id_table[] = { {USB_DEVICE(USB_VENDOR_ID_MOSCHIP, MOSCHIP_DEVICE_ID_7840)}, {USB_DEVICE(USB_VENDOR_ID_MOSCHIP, MOSCHIP_DEVICE_ID_7820)}, @@ -839,18 +837,6 @@ static void mos7840_bulk_out_data_callback(struct urb *urb) /************************************************************************/ /* D R I V E R T T Y I N T E R F A C E F U N C T I O N S */ /************************************************************************/ -#ifdef MCSSerialProbe -static int mos7840_serial_probe(struct usb_serial *serial, - const struct usb_device_id *id) -{ - - /*need to implement the mode_reg reading and updating\ - structures usb_serial_ device_type\ - (i.e num_ports, num_bulkin,bulkout etc) */ - /* Also we can update the changes attach */ - return 1; -} -#endif /***************************************************************************** * mos7840_open @@ -2216,38 +2202,48 @@ static int mos7810_check(struct usb_serial *serial) return 0; } -static int mos7840_calc_num_ports(struct usb_serial *serial) +static int mos7840_probe(struct usb_serial *serial, + const struct usb_device_id *id) { - __u16 data = 0x00; + u16 product = serial->dev->descriptor.idProduct; u8 *buf; - int mos7840_num_ports; + int device_type; + + if (product == MOSCHIP_DEVICE_ID_7810 || + product == MOSCHIP_DEVICE_ID_7820) { + device_type = product; + goto out; + } buf = kzalloc(VENDOR_READ_LENGTH, GFP_KERNEL); - if (buf) { - usb_control_msg(serial->dev, usb_rcvctrlpipe(serial->dev, 0), + if (!buf) + return -ENOMEM; + + usb_control_msg(serial->dev, usb_rcvctrlpipe(serial->dev, 0), MCS_RDREQ, MCS_RD_RTYPE, 0, GPIO_REGISTER, buf, VENDOR_READ_LENGTH, MOS_WDR_TIMEOUT); - data = *buf; - kfree(buf); - } - if (serial->dev->descriptor.idProduct == MOSCHIP_DEVICE_ID_7810 || - serial->dev->descriptor.idProduct == MOSCHIP_DEVICE_ID_7820) { - device_type = serial->dev->descriptor.idProduct; - } else { - /* For a MCS7840 device GPIO0 must be set to 1 */ - if ((data & 0x01) == 1) - device_type = MOSCHIP_DEVICE_ID_7840; - else if (mos7810_check(serial)) - device_type = MOSCHIP_DEVICE_ID_7810; - else - device_type = MOSCHIP_DEVICE_ID_7820; - } + /* For a MCS7840 device GPIO0 must be set to 1 */ + if (buf[0] & 0x01) + device_type = MOSCHIP_DEVICE_ID_7840; + else if (mos7810_check(serial)) + device_type = MOSCHIP_DEVICE_ID_7810; + else + device_type = MOSCHIP_DEVICE_ID_7820; + + kfree(buf); +out: + usb_set_serial_data(serial, (void *)device_type); + + return 0; +} + +static int mos7840_calc_num_ports(struct usb_serial *serial) +{ + int device_type = (int)usb_get_serial_data(serial); + int mos7840_num_ports; mos7840_num_ports = (device_type >> 4) & 0x000F; - serial->num_bulk_in = mos7840_num_ports; - serial->num_bulk_out = mos7840_num_ports; - serial->num_ports = mos7840_num_ports; return mos7840_num_ports; } @@ -2255,6 +2251,7 @@ static int mos7840_calc_num_ports(struct usb_serial *serial) static int mos7840_port_probe(struct usb_serial_port *port) { struct usb_serial *serial = port->serial; + int device_type = (int)usb_get_serial_data(serial); struct moschip_port *mos7840_port; int status; int pnum; @@ -2513,9 +2510,7 @@ static struct usb_serial_driver moschip7840_4port_device = { .throttle = mos7840_throttle, .unthrottle = mos7840_unthrottle, .calc_num_ports = mos7840_calc_num_ports, -#ifdef MCSSerialProbe - .probe = mos7840_serial_probe, -#endif + .probe = mos7840_probe, .ioctl = mos7840_ioctl, .set_termios = mos7840_set_termios, .break_ctl = mos7840_break, From 62a2cfc242dd948a1edfe46681f553276a48cc7f Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Fri, 26 Jul 2013 11:55:19 +0200 Subject: [PATCH 0403/1368] USB: mos7840: fix race in led handling commit 05cf0dec5ccc696a7636c84b265b477173498156 upstream. Fix race in LED handling introduced by commit 0eafe4de ("USB: serial: mos7840: add support for MCS7810 devices") which reused the port control urb for manipulating the LED without making sure that the urb is not already in use. This could lead to the control urb being manipulated while in flight. Fix by adding a dedicated LED urb and ctrlrequest along with a LED-busy flag to handle concurrency. Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/mos7840.c | 59 ++++++++++++++++++++++-------------- 1 file changed, 37 insertions(+), 22 deletions(-) diff --git a/drivers/usb/serial/mos7840.c b/drivers/usb/serial/mos7840.c index 5f3b1ba38241..8ef5fd8cd2d7 100644 --- a/drivers/usb/serial/mos7840.c +++ b/drivers/usb/serial/mos7840.c @@ -185,6 +185,7 @@ enum mos7840_flag { MOS7840_FLAG_CTRL_BUSY, + MOS7840_FLAG_LED_BUSY, }; static const struct usb_device_id id_table[] = { @@ -240,9 +241,10 @@ struct moschip_port { /* For device(s) with LED indicator */ bool has_led; - bool led_flag; struct timer_list led_timer1; /* Timer for LED on */ struct timer_list led_timer2; /* Timer for LED off */ + struct urb *led_urb; + struct usb_ctrlrequest *led_dr; unsigned long flags; }; @@ -542,7 +544,7 @@ static void mos7840_set_led_async(struct moschip_port *mcs, __u16 wval, __u16 reg) { struct usb_device *dev = mcs->port->serial->dev; - struct usb_ctrlrequest *dr = mcs->dr; + struct usb_ctrlrequest *dr = mcs->led_dr; dr->bRequestType = MCS_WR_RTYPE; dr->bRequest = MCS_WRREQ; @@ -550,10 +552,10 @@ static void mos7840_set_led_async(struct moschip_port *mcs, __u16 wval, dr->wIndex = cpu_to_le16(reg); dr->wLength = cpu_to_le16(0); - usb_fill_control_urb(mcs->control_urb, dev, usb_sndctrlpipe(dev, 0), + usb_fill_control_urb(mcs->led_urb, dev, usb_sndctrlpipe(dev, 0), (unsigned char *)dr, NULL, 0, mos7840_set_led_callback, NULL); - usb_submit_urb(mcs->control_urb, GFP_ATOMIC); + usb_submit_urb(mcs->led_urb, GFP_ATOMIC); } static void mos7840_set_led_sync(struct usb_serial_port *port, __u16 reg, @@ -579,7 +581,19 @@ static void mos7840_led_flag_off(unsigned long arg) { struct moschip_port *mcs = (struct moschip_port *) arg; - mcs->led_flag = false; + clear_bit_unlock(MOS7840_FLAG_LED_BUSY, &mcs->flags); +} + +static void mos7840_led_activity(struct usb_serial_port *port) +{ + struct moschip_port *mos7840_port = usb_get_serial_port_data(port); + + if (test_and_set_bit_lock(MOS7840_FLAG_LED_BUSY, &mos7840_port->flags)) + return; + + mos7840_set_led_async(mos7840_port, 0x0301, MODEM_CONTROL_REGISTER); + mod_timer(&mos7840_port->led_timer1, + jiffies + msecs_to_jiffies(LED_ON_MS)); } /***************************************************************************** @@ -779,14 +793,8 @@ static void mos7840_bulk_in_callback(struct urb *urb) return; } - /* Turn on LED */ - if (mos7840_port->has_led && !mos7840_port->led_flag) { - mos7840_port->led_flag = true; - mos7840_set_led_async(mos7840_port, 0x0301, - MODEM_CONTROL_REGISTER); - mod_timer(&mos7840_port->led_timer1, - jiffies + msecs_to_jiffies(LED_ON_MS)); - } + if (mos7840_port->has_led) + mos7840_led_activity(port); mos7840_port->read_urb_busy = true; retval = usb_submit_urb(mos7840_port->read_urb, GFP_ATOMIC); @@ -1467,13 +1475,8 @@ static int mos7840_write(struct tty_struct *tty, struct usb_serial_port *port, data1 = urb->transfer_buffer; dev_dbg(&port->dev, "bulkout endpoint is %d\n", port->bulk_out_endpointAddress); - /* Turn on LED */ - if (mos7840_port->has_led && !mos7840_port->led_flag) { - mos7840_port->led_flag = true; - mos7840_set_led_sync(port, MODEM_CONTROL_REGISTER, 0x0301); - mod_timer(&mos7840_port->led_timer1, - jiffies + msecs_to_jiffies(LED_ON_MS)); - } + if (mos7840_port->has_led) + mos7840_led_activity(port); /* send it down the pipe */ status = usb_submit_urb(urb, GFP_ATOMIC); @@ -2429,6 +2432,14 @@ static int mos7840_port_probe(struct usb_serial_port *port) if (device_type == MOSCHIP_DEVICE_ID_7810) { mos7840_port->has_led = true; + mos7840_port->led_urb = usb_alloc_urb(0, GFP_KERNEL); + mos7840_port->led_dr = kmalloc(sizeof(*mos7840_port->led_dr), + GFP_KERNEL); + if (!mos7840_port->led_urb || !mos7840_port->led_dr) { + status = -ENOMEM; + goto error; + } + init_timer(&mos7840_port->led_timer1); mos7840_port->led_timer1.function = mos7840_led_off; mos7840_port->led_timer1.expires = @@ -2441,8 +2452,6 @@ static int mos7840_port_probe(struct usb_serial_port *port) jiffies + msecs_to_jiffies(LED_OFF_MS); mos7840_port->led_timer2.data = (unsigned long)mos7840_port; - mos7840_port->led_flag = false; - /* Turn off LED */ mos7840_set_led_sync(port, MODEM_CONTROL_REGISTER, 0x0300); } @@ -2464,6 +2473,8 @@ static int mos7840_port_probe(struct usb_serial_port *port) } return 0; error: + kfree(mos7840_port->led_dr); + usb_free_urb(mos7840_port->led_urb); kfree(mos7840_port->dr); kfree(mos7840_port->ctrl_buf); usb_free_urb(mos7840_port->control_urb); @@ -2484,6 +2495,10 @@ static int mos7840_port_remove(struct usb_serial_port *port) del_timer_sync(&mos7840_port->led_timer1); del_timer_sync(&mos7840_port->led_timer2); + + usb_kill_urb(mos7840_port->led_urb); + usb_free_urb(mos7840_port->led_urb); + kfree(mos7840_port->led_dr); } usb_kill_urb(mos7840_port->control_urb); usb_free_urb(mos7840_port->control_urb); From 499c97f9a9ba379df5b246fe73c69c458bbcacd7 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Sat, 27 Jul 2013 13:34:42 +0200 Subject: [PATCH 0404/1368] USB: mos7840: fix pointer casts commit 683a0e4d7971c3186dc4d429027debfe309129aa upstream. Silence compiler warnings on 64-bit systems introduced by commit 05cf0dec ("USB: mos7840: fix race in led handling") which uses the usb-serial data pointer to temporarily store the device type during probe but failed to add the required casts. [gregkh - change uintptr_t to unsigned long] Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/mos7840.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/usb/serial/mos7840.c b/drivers/usb/serial/mos7840.c index 8ef5fd8cd2d7..b92d3338d055 100644 --- a/drivers/usb/serial/mos7840.c +++ b/drivers/usb/serial/mos7840.c @@ -2236,14 +2236,14 @@ static int mos7840_probe(struct usb_serial *serial, kfree(buf); out: - usb_set_serial_data(serial, (void *)device_type); + usb_set_serial_data(serial, (void *)(unsigned long)device_type); return 0; } static int mos7840_calc_num_ports(struct usb_serial *serial) { - int device_type = (int)usb_get_serial_data(serial); + int device_type = (unsigned long)usb_get_serial_data(serial); int mos7840_num_ports; mos7840_num_ports = (device_type >> 4) & 0x000F; @@ -2254,7 +2254,7 @@ static int mos7840_calc_num_ports(struct usb_serial *serial) static int mos7840_port_probe(struct usb_serial_port *port) { struct usb_serial *serial = port->serial; - int device_type = (int)usb_get_serial_data(serial); + int device_type = (unsigned long)usb_get_serial_data(serial); struct moschip_port *mos7840_port; int status; int pnum; From c0e4964618515104556da8bd96cc6604ab8286e2 Mon Sep 17 00:00:00 2001 From: Emmanuel Grumbach Date: Tue, 2 Jul 2013 13:35:35 +0300 Subject: [PATCH 0405/1368] iwlwifi: mvm: fix L2P BA ressources leak commit 93a426673fbfeae7fa6b27008828e2ac4c08dbee upstream. We didn't release the Rx AMPDU ressources properly. This bug led to firmware assert after 16 BA sessions. Signed-off-by: Emmanuel Grumbach Signed-off-by: Johannes Berg Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/iwlwifi/mvm/sta.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/iwlwifi/mvm/sta.c b/drivers/net/wireless/iwlwifi/mvm/sta.c index 5c664ed54400..1ddad1e797b9 100644 --- a/drivers/net/wireless/iwlwifi/mvm/sta.c +++ b/drivers/net/wireless/iwlwifi/mvm/sta.c @@ -621,8 +621,12 @@ int iwl_mvm_sta_rx_agg(struct iwl_mvm *mvm, struct ieee80211_sta *sta, cmd.mac_id_n_color = cpu_to_le32(mvm_sta->mac_id_n_color); cmd.sta_id = mvm_sta->sta_id; cmd.add_modify = STA_MODE_MODIFY; - cmd.add_immediate_ba_tid = (u8) tid; - cmd.add_immediate_ba_ssn = cpu_to_le16(ssn); + if (start) { + cmd.add_immediate_ba_tid = (u8) tid; + cmd.add_immediate_ba_ssn = cpu_to_le16(ssn); + } else { + cmd.remove_immediate_ba_tid = (u8) tid; + } cmd.modify_mask = start ? STA_MODIFY_ADD_BA_TID : STA_MODIFY_REMOVE_BA_TID; From d9bd9f5e484d91b89f022d40a925f2870502e4af Mon Sep 17 00:00:00 2001 From: David Spinadel Date: Thu, 4 Jul 2013 15:17:48 +0300 Subject: [PATCH 0406/1368] iwlwifi: mvm: fix bug in scan ssid commit fe04e83706037802c502ea41c0d1799ec35fc0d7 upstream. Increment index in each iteration. Without this increment we are overriding the added SSIDs and we will send only the last SSId and (n_ssids - 1) broadcast probes. Signed-off-by: David Spinadel Signed-off-by: Johannes Berg Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/iwlwifi/mvm/scan.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/iwlwifi/mvm/scan.c b/drivers/net/wireless/iwlwifi/mvm/scan.c index 2476e43799d5..08fbfe0fca62 100644 --- a/drivers/net/wireless/iwlwifi/mvm/scan.c +++ b/drivers/net/wireless/iwlwifi/mvm/scan.c @@ -137,8 +137,8 @@ static void iwl_mvm_scan_fill_ssids(struct iwl_scan_cmd *cmd, { int fw_idx, req_idx; - fw_idx = 0; - for (req_idx = req->n_ssids - 1; req_idx > 0; req_idx--) { + for (req_idx = req->n_ssids - 1, fw_idx = 0; req_idx > 0; + req_idx--, fw_idx++) { cmd->direct_scan[fw_idx].id = WLAN_EID_SSID; cmd->direct_scan[fw_idx].len = req->ssids[req_idx].ssid_len; memcpy(cmd->direct_scan[fw_idx].ssid, From 139ce60dd49cae12137947393917efb10bbcc219 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 4 Jul 2013 15:55:29 +0200 Subject: [PATCH 0407/1368] iwlwifi: mvm: refuse connection to APs with BI < 16 commit 48bc13072109ea58465542aa1ee31b4e1065468a upstream. Due to a firmware bug, it crashes when the beacon interval is smaller than 16. Avoid this by refusing the station state change creating the AP station, causing mac80211 to abandon the attempt to connect to the AP, and eventually wpa_s to blacklist it. Reviewed-by: Emmanuel Grumbach Signed-off-by: Johannes Berg Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/iwlwifi/mvm/mac80211.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/drivers/net/wireless/iwlwifi/mvm/mac80211.c b/drivers/net/wireless/iwlwifi/mvm/mac80211.c index a5eb8c82f16a..b7e95b0a42b7 100644 --- a/drivers/net/wireless/iwlwifi/mvm/mac80211.c +++ b/drivers/net/wireless/iwlwifi/mvm/mac80211.c @@ -987,6 +987,21 @@ static int iwl_mvm_mac_sta_state(struct ieee80211_hw *hw, mutex_lock(&mvm->mutex); if (old_state == IEEE80211_STA_NOTEXIST && new_state == IEEE80211_STA_NONE) { + /* + * Firmware bug - it'll crash if the beacon interval is less + * than 16. We can't avoid connecting at all, so refuse the + * station state change, this will cause mac80211 to abandon + * attempts to connect to this AP, and eventually wpa_s will + * blacklist the AP... + */ + if (vif->type == NL80211_IFTYPE_STATION && + vif->bss_conf.beacon_int < 16) { + IWL_ERR(mvm, + "AP %pM beacon interval is %d, refusing due to firmware bug!\n", + sta->addr, vif->bss_conf.beacon_int); + ret = -EINVAL; + goto out_unlock; + } ret = iwl_mvm_add_sta(mvm, vif, sta); } else if (old_state == IEEE80211_STA_NONE && new_state == IEEE80211_STA_AUTH) { @@ -1015,6 +1030,7 @@ static int iwl_mvm_mac_sta_state(struct ieee80211_hw *hw, } else { ret = -EIO; } + out_unlock: mutex_unlock(&mvm->mutex); return ret; From 19a646b98d81a2b111d328ceca85f7e4b069b36a Mon Sep 17 00:00:00 2001 From: Emmanuel Grumbach Date: Thu, 18 Jul 2013 19:11:26 +0300 Subject: [PATCH 0408/1368] iwlwifi: add DELL SKU for 5150 HMC commit a1923f1d4723e5757cefdd60f7c7ab30e472007a upstream. This SKU was missing in the list of supported devices https://bugzilla.kernel.org/show_bug.cgi?id=60577 Signed-off-by: Emmanuel Grumbach Signed-off-by: Johannes Berg Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/iwlwifi/pcie/drv.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/wireless/iwlwifi/pcie/drv.c b/drivers/net/wireless/iwlwifi/pcie/drv.c index 8cb53ec2b77b..5283b5552e6f 100644 --- a/drivers/net/wireless/iwlwifi/pcie/drv.c +++ b/drivers/net/wireless/iwlwifi/pcie/drv.c @@ -129,6 +129,7 @@ static DEFINE_PCI_DEVICE_TABLE(iwl_hw_card_ids) = { {IWL_PCI_DEVICE(0x423C, 0x1306, iwl5150_abg_cfg)}, /* Half Mini Card */ {IWL_PCI_DEVICE(0x423C, 0x1221, iwl5150_agn_cfg)}, /* Mini Card */ {IWL_PCI_DEVICE(0x423C, 0x1321, iwl5150_agn_cfg)}, /* Half Mini Card */ + {IWL_PCI_DEVICE(0x423C, 0x1326, iwl5150_abg_cfg)}, /* Half Mini Card */ {IWL_PCI_DEVICE(0x423D, 0x1211, iwl5150_agn_cfg)}, /* Mini Card */ {IWL_PCI_DEVICE(0x423D, 0x1311, iwl5150_agn_cfg)}, /* Half Mini Card */ From fae60f7985950511751a2bd8adc71ba1049753da Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 24 Jul 2013 13:55:51 +0200 Subject: [PATCH 0409/1368] iwlwifi: mvm: fix flushing not started aggregation sessions commit b6658ff80c43bcf84be0bbe371c88af1452e7776 upstream. When a not fully started aggregation session is destroyed and flushed, we get a warning, e.g. WARNING: at drivers/net/wireless/iwlwifi/pcie/tx.c:1142 iwl_trans_pcie_txq_disable+0x11c/0x160 queue 16 not used Modules linked in: [...] Pid: 5135, comm: hostapd Tainted: G W O 3.5.0 #10 Call Trace: wlan0: driver sets block=0 for sta 00:03:7f:10:44:d3 [] warn_slowpath_common+0x72/0xa0 [] warn_slowpath_fmt+0x47/0x50 [] iwl_trans_pcie_txq_disable+0x11c/0x160 [iwlwifi] [] iwl_mvm_sta_tx_agg_flush+0xe9/0x150 [iwlmvm] [] iwl_mvm_mac_ampdu_action+0xf3/0x1e0 [iwlmvm] [] ___ieee80211_stop_tx_ba_session+0x193/0x920 [mac80211] [] __ieee80211_stop_tx_ba_session+0x48/0x70 [mac80211] [] ieee80211_sta_tear_down_BA_sessions+0x4f/0x80 [mac80211] [] __sta_info_destroy+0x66/0x370 [mac80211] [] sta_info_destroy_addr_bss+0x44/0x70 [mac80211] [] ieee80211_del_station+0x26/0x50 [mac80211] [] nl80211_del_station+0x85/0x200 [cfg80211] when a station deauthenticated from us without fully setting up the aggregation session. Fix this by checking the aggregation state before removing the hardware queue. Reviewed-by: Emmanuel Grumbach Signed-off-by: Johannes Berg Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/iwlwifi/mvm/sta.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/drivers/net/wireless/iwlwifi/mvm/sta.c b/drivers/net/wireless/iwlwifi/mvm/sta.c index 1ddad1e797b9..736b50bc962c 100644 --- a/drivers/net/wireless/iwlwifi/mvm/sta.c +++ b/drivers/net/wireless/iwlwifi/mvm/sta.c @@ -898,6 +898,7 @@ int iwl_mvm_sta_tx_agg_flush(struct iwl_mvm *mvm, struct ieee80211_vif *vif, struct iwl_mvm_sta *mvmsta = (void *)sta->drv_priv; struct iwl_mvm_tid_data *tid_data = &mvmsta->tid_data[tid]; u16 txq_id; + enum iwl_mvm_agg_state old_state; /* * First set the agg state to OFF to avoid calling @@ -907,13 +908,17 @@ int iwl_mvm_sta_tx_agg_flush(struct iwl_mvm *mvm, struct ieee80211_vif *vif, txq_id = tid_data->txq_id; IWL_DEBUG_TX_QUEUES(mvm, "Flush AGG: sta %d tid %d q %d state %d\n", mvmsta->sta_id, tid, txq_id, tid_data->state); + old_state = tid_data->state; tid_data->state = IWL_AGG_OFF; spin_unlock_bh(&mvmsta->lock); - if (iwl_mvm_flush_tx_path(mvm, BIT(txq_id), true)) - IWL_ERR(mvm, "Couldn't flush the AGG queue\n"); + if (old_state >= IWL_AGG_ON) { + if (iwl_mvm_flush_tx_path(mvm, BIT(txq_id), true)) + IWL_ERR(mvm, "Couldn't flush the AGG queue\n"); + + iwl_trans_txq_disable(mvm->trans, tid_data->txq_id); + } - iwl_trans_txq_disable(mvm->trans, tid_data->txq_id); mvm->queue_to_mac80211[tid_data->txq_id] = IWL_INVALID_MAC80211_QUEUE; From 6edab054ac50f1c248eacf4d750dcbf5ff4f00b6 Mon Sep 17 00:00:00 2001 From: Oleksij Rempel Date: Fri, 19 Jul 2013 20:16:17 +0200 Subject: [PATCH 0410/1368] ath9k_htc: do some initial hardware configuration commit dc2a87f519a4d8cb376ab54f22b6b98a943b51ce upstream. Currently we configure harwdare and clock, only after interface start. In this case, if we reload module or reboot PC without configuring adapter, firmware will freeze. There is no software way to reset adpter. This patch add initial configuration and set it in disabled state, to avoid this freeze. Behaviour of this patch should be similar to: ifconfig wlan0 up; ifconfig wlan0 down. Bug: https://github.com/qca/open-ath9k-htc-firmware/issues/1 Tested-by: Bo Shi Signed-off-by: Oleksij Rempel Signed-off-by: John W. Linville Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/ath/ath9k/htc_drv_init.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/wireless/ath/ath9k/htc_drv_init.c b/drivers/net/wireless/ath/ath9k/htc_drv_init.c index a47f5e05fc04..3b202ffb3257 100644 --- a/drivers/net/wireless/ath/ath9k/htc_drv_init.c +++ b/drivers/net/wireless/ath/ath9k/htc_drv_init.c @@ -846,6 +846,7 @@ static int ath9k_init_device(struct ath9k_htc_priv *priv, if (error != 0) goto err_rx; + ath9k_hw_disable(priv->ah); #ifdef CONFIG_MAC80211_LEDS /* must be initialized before ieee80211_register_hw */ priv->led_cdev.default_trigger = ieee80211_create_tpt_led_trigger(priv->hw, From e59f8a9d54603dcadbee294849988d1876d4bd38 Mon Sep 17 00:00:00 2001 From: Oleksij Rempel Date: Fri, 19 Jul 2013 20:16:18 +0200 Subject: [PATCH 0411/1368] ath9k_htc: reboot firmware if it was loaded commit 4928bd2ef8ece262f4f314630219999a91eaa440 upstream. Currently ath9k_htc will reboot firmware only if interface was ever started. Which lead to the problem in case where interface was never started but module need to be reloaded. This patch will partially fix bug "ath9k_htc: Target is unresponsive" https://github.com/qca/open-ath9k-htc-firmware/issues/1 Reproduction case: - plug adapter - make sure nothing will touch it. Stop Networkmanager or blacklist mac address of this adapter. - rmmod ath9k_htc; sleep 1; modprobe ath9k_htc Signed-off-by: Oleksij Rempel Signed-off-by: John W. Linville Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/ath/ath9k/hif_usb.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/ath/ath9k/hif_usb.c b/drivers/net/wireless/ath/ath9k/hif_usb.c index f5dda84176c3..75a6376ed289 100644 --- a/drivers/net/wireless/ath/ath9k/hif_usb.c +++ b/drivers/net/wireless/ath/ath9k/hif_usb.c @@ -1289,7 +1289,9 @@ static void ath9k_hif_usb_disconnect(struct usb_interface *interface) usb_set_intfdata(interface, NULL); - if (!unplugged && (hif_dev->flags & HIF_USB_START)) + /* If firmware was loaded we should drop it + * go back to first stage bootloader. */ + if (!unplugged && (hif_dev->flags & HIF_USB_READY)) ath9k_hif_usb_reboot(udev); kfree(hif_dev); From c8942c6aac25cc5e98161c1a6b84b6ae10bff945 Mon Sep 17 00:00:00 2001 From: Michal Kazior Date: Tue, 25 Jun 2013 09:17:17 +0200 Subject: [PATCH 0412/1368] nl80211: fix mgmt tx status and testmode reporting for netns commit a0ec570f4f69c4cb700d743a915096c2c8f56a99 upstream. These two events were sent to the default network namespace. This caused AP mode in a non-default netns to not work correctly. Mgmt tx status was multicasted to a different (default) netns instead of the one the AP was in. Signed-off-by: Michal Kazior Signed-off-by: Johannes Berg Signed-off-by: Greg Kroah-Hartman --- net/wireless/nl80211.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index b14b7e3cb6e6..db8ead94ff7a 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -6588,12 +6588,14 @@ EXPORT_SYMBOL(cfg80211_testmode_alloc_event_skb); void cfg80211_testmode_event(struct sk_buff *skb, gfp_t gfp) { + struct cfg80211_registered_device *rdev = ((void **)skb->cb)[0]; void *hdr = ((void **)skb->cb)[1]; struct nlattr *data = ((void **)skb->cb)[2]; nla_nest_end(skb, data); genlmsg_end(skb, hdr); - genlmsg_multicast(skb, 0, nl80211_testmode_mcgrp.id, gfp); + genlmsg_multicast_netns(wiphy_net(&rdev->wiphy), skb, 0, + nl80211_testmode_mcgrp.id, gfp); } EXPORT_SYMBOL(cfg80211_testmode_event); #endif @@ -10028,7 +10030,8 @@ void cfg80211_mgmt_tx_status(struct wireless_dev *wdev, u64 cookie, genlmsg_end(msg, hdr); - genlmsg_multicast(msg, 0, nl80211_mlme_mcgrp.id, gfp); + genlmsg_multicast_netns(wiphy_net(&rdev->wiphy), msg, 0, + nl80211_mlme_mcgrp.id, gfp); return; nla_put_failure: From 57fe9f79b907bdb8ef1fd8b31824f8901581369e Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Mon, 15 Jul 2013 14:35:06 +0200 Subject: [PATCH 0413/1368] mac80211/minstrel: fix NULL pointer dereference issue commit 5c9fc93bc9bc417418fc1b6366833ae6a07b804d upstream. When priv_sta == NULL, mi->prev_sample is dereferenced too early. Move the assignment further down, after the rate_control_send_low call. Reported-by: Krzysztof Mazur Signed-off-by: Felix Fietkau Signed-off-by: Johannes Berg Signed-off-by: Greg Kroah-Hartman --- net/mac80211/rc80211_minstrel.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/mac80211/rc80211_minstrel.c b/net/mac80211/rc80211_minstrel.c index ac7ef5414bde..e6512e2ffd20 100644 --- a/net/mac80211/rc80211_minstrel.c +++ b/net/mac80211/rc80211_minstrel.c @@ -290,7 +290,7 @@ minstrel_get_rate(void *priv, struct ieee80211_sta *sta, struct minstrel_rate *msr, *mr; unsigned int ndx; bool mrr_capable; - bool prev_sample = mi->prev_sample; + bool prev_sample; int delta; int sampling_ratio; @@ -314,6 +314,7 @@ minstrel_get_rate(void *priv, struct ieee80211_sta *sta, (mi->sample_count + mi->sample_deferred / 2); /* delta < 0: no sampling required */ + prev_sample = mi->prev_sample; mi->prev_sample = false; if (delta < 0 || (!mrr_capable && prev_sample)) return; From ef47a5e4f1aaf1d0e2e6875e34b2c9595897bef6 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Fri, 28 Jun 2013 21:04:35 +0200 Subject: [PATCH 0414/1368] mac80211/minstrel_ht: fix cck rate sampling commit 1cd158573951f737fbc878a35cb5eb47bf9af3d5 upstream. The CCK group needs special treatment to set the right flags and rate index. Add this missing check to prevent setting broken rates for tx packets. Signed-off-by: Felix Fietkau Signed-off-by: Johannes Berg Signed-off-by: Greg Kroah-Hartman --- net/mac80211/rc80211_minstrel_ht.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/net/mac80211/rc80211_minstrel_ht.c b/net/mac80211/rc80211_minstrel_ht.c index 5b2d3012b983..f5aed963b22e 100644 --- a/net/mac80211/rc80211_minstrel_ht.c +++ b/net/mac80211/rc80211_minstrel_ht.c @@ -804,10 +804,18 @@ minstrel_ht_get_rate(void *priv, struct ieee80211_sta *sta, void *priv_sta, sample_group = &minstrel_mcs_groups[sample_idx / MCS_GROUP_RATES]; info->flags |= IEEE80211_TX_CTL_RATE_CTRL_PROBE; + rate->count = 1; + + if (sample_idx / MCS_GROUP_RATES == MINSTREL_CCK_GROUP) { + int idx = sample_idx % ARRAY_SIZE(mp->cck_rates); + rate->idx = mp->cck_rates[idx]; + rate->flags = 0; + return; + } + rate->idx = sample_idx % MCS_GROUP_RATES + (sample_group->streams - 1) * MCS_GROUP_RATES; rate->flags = IEEE80211_TX_RC_MCS | sample_group->flags; - rate->count = 1; } static void From fd14e26686ae1729c25cf9c938eed826eb825e9d Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 11 Jul 2013 22:33:26 +0200 Subject: [PATCH 0415/1368] mac80211: fix duplicate retransmission detection commit 6b0f32745dcfba01d7be33acd1b40306c7a914c6 upstream. The duplicate retransmission detection code in mac80211 erroneously attempts to do the check for every frame, even frames that don't have a sequence control field or that don't use it (QoS-Null frames.) This is problematic because it causes the code to access data beyond the end of the SKB and depending on the data there will drop packets erroneously. Correct the code to not do duplicate detection for such frames. I found this error while testing AP powersave, it lead to retransmitted PS-Poll frames being dropped entirely as the data beyond the end of the SKB was always zero. Signed-off-by: Johannes Berg Signed-off-by: Greg Kroah-Hartman --- net/mac80211/rx.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index 8e2952620256..83f6d29202aa 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -932,8 +932,14 @@ ieee80211_rx_h_check(struct ieee80211_rx_data *rx) struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)rx->skb->data; struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(rx->skb); - /* Drop duplicate 802.11 retransmissions (IEEE 802.11 Chap. 9.2.9) */ - if (rx->sta && !is_multicast_ether_addr(hdr->addr1)) { + /* + * Drop duplicate 802.11 retransmissions + * (IEEE 802.11-2012: 9.3.2.10 "Duplicate detection and recovery") + */ + if (rx->skb->len >= 24 && rx->sta && + !ieee80211_is_ctl(hdr->frame_control) && + !ieee80211_is_qos_nullfunc(hdr->frame_control) && + !is_multicast_ether_addr(hdr->addr1)) { if (unlikely(ieee80211_has_retry(hdr->frame_control) && rx->sta->last_seq_ctrl[rx->seqno_idx] == hdr->seq_ctrl)) { From 090577c961e0843004762905bfcee2d6bf262722 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 8 Jul 2013 10:43:31 +0200 Subject: [PATCH 0416/1368] mac80211: fix ethtool stats for non-station interfaces commit e13bae4f807401729b3f27c7e882a96b8b292809 upstream. As reported in https://bugzilla.kernel.org/show_bug.cgi?id=60514, the station loop never initialises 'sinfo' and therefore adds up a stack values, leaking stack information (the number of times it adds values is easily obtained another way.) Fix this by initialising the sinfo for each station to add. Signed-off-by: Johannes Berg Signed-off-by: Greg Kroah-Hartman --- net/mac80211/cfg.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 4fdb306e42e0..ae36f8e11ae4 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -652,6 +652,8 @@ static void ieee80211_get_et_stats(struct wiphy *wiphy, if (sta->sdata->dev != dev) continue; + sinfo.filled = 0; + sta_set_sinfo(sta, &sinfo); i = 0; ADD_STA_STATS(sta); } From e30649e9650df1b772562525924c1f7453bf18d2 Mon Sep 17 00:00:00 2001 From: Stanislaw Gruszka Date: Tue, 23 Jul 2013 13:56:50 +0200 Subject: [PATCH 0417/1368] mac80211: fix monitor interface suspend crash regression MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit cd34f647a78e7f2296fcb72392b9e5c832793e65 upstream. My commit: commit 12e7f517029dad819c45eca9ca01fdb9ba57616b Author: Stanislaw Gruszka Date: Thu Feb 28 10:55:26 2013 +0100 mac80211: cleanup generic suspend/resume procedures removed check for deleting MONITOR and AP_VLAN when suspend. That can cause a crash (i.e. in iwlagn_mac_remove_interface()) since we remove interface in the driver that we did not add before. Reference: http://marc.info/?l=linux-kernel&m=137391815113860&w=2 Bisected-by: Ortwin Glück Reported-and-tested-by: Ortwin Glück Signed-off-by: Stanislaw Gruszka Signed-off-by: Johannes Berg Signed-off-by: Greg Kroah-Hartman --- net/mac80211/pm.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/net/mac80211/pm.c b/net/mac80211/pm.c index 7fc5d0d8149a..340126204343 100644 --- a/net/mac80211/pm.c +++ b/net/mac80211/pm.c @@ -99,10 +99,13 @@ int __ieee80211_suspend(struct ieee80211_hw *hw, struct cfg80211_wowlan *wowlan) } mutex_unlock(&local->sta_mtx); - /* remove all interfaces */ + /* remove all interfaces that were created in the driver */ list_for_each_entry(sdata, &local->interfaces, list) { - if (!ieee80211_sdata_running(sdata)) + if (!ieee80211_sdata_running(sdata) || + sdata->vif.type == NL80211_IFTYPE_AP_VLAN || + sdata->vif.type == NL80211_IFTYPE_MONITOR) continue; + drv_remove_interface(local, sdata); } From 82056f561468aa59e697436742d460d939778175 Mon Sep 17 00:00:00 2001 From: Jacob Keller Date: Fri, 26 Jul 2013 05:46:35 -0700 Subject: [PATCH 0418/1368] ixgbe: Fix Tx Hang issue with lldpad on 82598EB commit 1eb9ac14c34a948bf1538bfb9034e8ab29099a64 upstream. This patch fixes an issue with the 82598EB device, where lldpad is causing Tx Hangs on the card as soon as it attempts to configure DCB for the device. The adapter will continually Tx hang and reset in a loop. Signed-off-by: Jacob Keller Tested-by: Phil Schmitt Tested-by: Jack Morgan Signed-off-by: Jeff Kirsher Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/intel/ixgbe/ixgbe_dcb_82598.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_dcb_82598.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_dcb_82598.c index ac780770863d..7a77f37a7cbc 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_dcb_82598.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_dcb_82598.c @@ -108,9 +108,8 @@ s32 ixgbe_dcb_config_tx_desc_arbiter_82598(struct ixgbe_hw *hw, /* Enable arbiter */ reg &= ~IXGBE_DPMCS_ARBDIS; - /* Enable DFP and Recycle mode */ - reg |= (IXGBE_DPMCS_TDPAC | IXGBE_DPMCS_TRM); reg |= IXGBE_DPMCS_TSOEF; + /* Configure Max TSO packet size 34KB including payload and headers */ reg |= (0x4 << IXGBE_DPMCS_MTSOS_SHIFT); From 0a8567af5be153e47347546ca0f772c206f1be8a Mon Sep 17 00:00:00 2001 From: Larry Finger Date: Sat, 20 Jul 2013 21:46:48 -0500 Subject: [PATCH 0419/1368] ath: wil6210: Fix build error commit 5d21608a592a9afcac8d82c6478a564e911ce70b upstream. Building driver wil6210 in 3.10 and 3.11 kernels yields the following errors: CC [M] drivers/net/wireless/ath/wil6210/debugfs.o drivers/net/wireless/ath/wil6210/debugfs.c: In function 'wil_print_ring': drivers/net/wireless/ath/wil6210/debugfs.c:163:11: error: pointer targets in passing argument 5 of 'hex_dump_to_buffer' differ in signedness [-Werror=pointer-sign] false); ^ In file included from include/linux/kernel.h:13:0, from include/linux/cache.h:4, from include/linux/time.h:4, from include/linux/stat.h:18, from include/linux/module.h:10, from drivers/net/wireless/ath/wil6210/debugfs.c:17: include/linux/printk.h:361:13: note: expected 'char *' but argument is of type 'unsigned char *' extern void hex_dump_to_buffer(const void *buf, size_t len, ^ drivers/net/wireless/ath/wil6210/debugfs.c: In function 'wil_txdesc_debugfs_show': drivers/net/wireless/ath/wil6210/debugfs.c:429:10: error: pointer targets in passing argument 5 of 'hex_dump_to_buffer' differ in signedness [-Werror=pointer-sign] sizeof(printbuf), false); ^ In file included from include/linux/kernel.h:13:0, from include/linux/cache.h:4, from include/linux/time.h:4, from include/linux/stat.h:18, from include/linux/module.h:10, from drivers/net/wireless/ath/wil6210/debugfs.c:17: include/linux/printk.h:361:13: note: expected 'char *' but argument is of type 'unsigned char *' extern void hex_dump_to_buffer(const void *buf, size_t len, ^ cc1: all warnings being treated as errors make[5]: *** [drivers/net/wireless/ath/wil6210/debugfs.o] Error 1 make[4]: *** [drivers/net/wireless/ath/wil6210] Error 2 make[3]: *** [drivers/net/wireless/ath] Error 2 make[2]: *** [drivers/net/wireless] Error 2 make[1]: *** [drivers/net] Error 2 make: *** [drivers] Error 2 These errors are fixed by changing the type of the buffer from "unsigned char *" to "char *". Reported-by: Thomas Fjellstrom Tested-by: Thomas Fjellstrom Signed-off-by: Larry Finger Cc: Thomas Fjellstrom Signed-off-by: Vladimir Kondratiev Signed-off-by: John W. Linville Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/ath/wil6210/debugfs.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/ath/wil6210/debugfs.c b/drivers/net/wireless/ath/wil6210/debugfs.c index 727b1f53e6ad..d57e5be32ee0 100644 --- a/drivers/net/wireless/ath/wil6210/debugfs.c +++ b/drivers/net/wireless/ath/wil6210/debugfs.c @@ -145,7 +145,7 @@ static void wil_print_ring(struct seq_file *s, const char *prefix, le16_to_cpu(hdr.type), hdr.flags); if (len <= MAX_MBOXITEM_SIZE) { int n = 0; - unsigned char printbuf[16 * 3 + 2]; + char printbuf[16 * 3 + 2]; unsigned char databuf[MAX_MBOXITEM_SIZE]; void __iomem *src = wmi_buffer(wil, d.addr) + sizeof(struct wil6210_mbox_hdr); @@ -416,7 +416,7 @@ static int wil_txdesc_debugfs_show(struct seq_file *s, void *data) seq_printf(s, " SKB = %p\n", skb); if (skb) { - unsigned char printbuf[16 * 3 + 2]; + char printbuf[16 * 3 + 2]; int i = 0; int len = skb_headlen(skb); void *p = skb->data; From 4008b2c77e1b7175d3ed17e99e1afdc0ec531427 Mon Sep 17 00:00:00 2001 From: Thomas Loo Date: Wed, 3 Jul 2013 02:53:54 +0200 Subject: [PATCH 0420/1368] Bluetooth: ath3k: Add support for Fujitsu Lifebook UH5x2 [04c5:1330] commit 84eb2ae1807dd1467bf6f500fc69ae61f1907b75 upstream. The Fujitsu Lifebook UH552/UH572 ships with a Qualcomm AR9462/AR3012 WLAN/BT-Combo card. Add device ID to the ath3k driver to enable the bluetooth side of things. Patch against v3.10. T: Bus=03 Lev=01 Prnt=01 Port=02 Cnt=01 Dev#= 3 Spd=12 MxCh= 0 D: Ver= 1.10 Cls=e0(wlcon) Sub=01 Prot=01 MxPS=64 #Cfgs= 1 P: Vendor=04c5 ProdID=1330 Rev=00.02 C: #Ifs= 2 Cfg#= 1 Atr=e0 MxPwr=100mA I: If#= 0 Alt= 0 #EPs= 3 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb I: If#= 1 Alt= 0 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb Signed-off-by: Thomas Loo Signed-off-by: Gustavo Padovan Signed-off-by: Greg Kroah-Hartman --- drivers/bluetooth/ath3k.c | 2 ++ drivers/bluetooth/btusb.c | 1 + 2 files changed, 3 insertions(+) diff --git a/drivers/bluetooth/ath3k.c b/drivers/bluetooth/ath3k.c index 11f467c00d0a..b22376dee2c0 100644 --- a/drivers/bluetooth/ath3k.c +++ b/drivers/bluetooth/ath3k.c @@ -91,6 +91,7 @@ static struct usb_device_id ath3k_table[] = { { USB_DEVICE(0x0489, 0xe04e) }, { USB_DEVICE(0x0489, 0xe056) }, { USB_DEVICE(0x0489, 0xe04d) }, + { USB_DEVICE(0x04c5, 0x1330) }, /* Atheros AR5BBU12 with sflash firmware */ { USB_DEVICE(0x0489, 0xE02C) }, @@ -128,6 +129,7 @@ static struct usb_device_id ath3k_blist_tbl[] = { { USB_DEVICE(0x0489, 0xe04e), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x0489, 0xe056), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x0489, 0xe04d), .driver_info = BTUSB_ATH3012 }, + { USB_DEVICE(0x04c5, 0x1330), .driver_info = BTUSB_ATH3012 }, /* Atheros AR5BBU22 with sflash firmware */ { USB_DEVICE(0x0489, 0xE03C), .driver_info = BTUSB_ATH3012 }, diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c index 7a7e5f8ecadc..8b8b85dced76 100644 --- a/drivers/bluetooth/btusb.c +++ b/drivers/bluetooth/btusb.c @@ -151,6 +151,7 @@ static struct usb_device_id blacklist_table[] = { { USB_DEVICE(0x0489, 0xe04e), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x0489, 0xe056), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x0489, 0xe04d), .driver_info = BTUSB_ATH3012 }, + { USB_DEVICE(0x04c5, 0x1330), .driver_info = BTUSB_ATH3012 }, /* Atheros AR5BBU12 with sflash firmware */ { USB_DEVICE(0x0489, 0xe02c), .driver_info = BTUSB_IGNORE }, From 6bc14767620b9ef79140de5fe73cd25006e2da5f Mon Sep 17 00:00:00 2001 From: Stanislaw Gruszka Date: Mon, 8 Jul 2013 10:27:23 +0200 Subject: [PATCH 0421/1368] Bluetooth: ath3k: don't use stack memory for DMA commit 517828a87994f41af6ae5a0f96f0f069f05baa81 upstream. Memory allocated by vmalloc (including stack) can not be used for DMA, i.e. data pointer on usb_control_msg() should not point to stack memory. Resolves: https://bugzilla.redhat.com/show_bug.cgi?id=977558 Reported-and-tested-by: Andy Lawrence Signed-off-by: Stanislaw Gruszka Signed-off-by: Gustavo Padovan Signed-off-by: Greg Kroah-Hartman --- drivers/bluetooth/ath3k.c | 38 +++++++++++++++++++++++++++++--------- 1 file changed, 29 insertions(+), 9 deletions(-) diff --git a/drivers/bluetooth/ath3k.c b/drivers/bluetooth/ath3k.c index b22376dee2c0..6f17e4dec441 100644 --- a/drivers/bluetooth/ath3k.c +++ b/drivers/bluetooth/ath3k.c @@ -195,24 +195,44 @@ static int ath3k_load_firmware(struct usb_device *udev, static int ath3k_get_state(struct usb_device *udev, unsigned char *state) { - int pipe = 0; + int ret, pipe = 0; + char *buf; + + buf = kmalloc(sizeof(*buf), GFP_KERNEL); + if (!buf) + return -ENOMEM; pipe = usb_rcvctrlpipe(udev, 0); - return usb_control_msg(udev, pipe, ATH3K_GETSTATE, - USB_TYPE_VENDOR | USB_DIR_IN, 0, 0, - state, 0x01, USB_CTRL_SET_TIMEOUT); + ret = usb_control_msg(udev, pipe, ATH3K_GETSTATE, + USB_TYPE_VENDOR | USB_DIR_IN, 0, 0, + buf, sizeof(*buf), USB_CTRL_SET_TIMEOUT); + + *state = *buf; + kfree(buf); + + return ret; } static int ath3k_get_version(struct usb_device *udev, struct ath3k_version *version) { - int pipe = 0; + int ret, pipe = 0; + struct ath3k_version *buf; + const int size = sizeof(*buf); + + buf = kmalloc(size, GFP_KERNEL); + if (!buf) + return -ENOMEM; pipe = usb_rcvctrlpipe(udev, 0); - return usb_control_msg(udev, pipe, ATH3K_GETVERSION, - USB_TYPE_VENDOR | USB_DIR_IN, 0, 0, version, - sizeof(struct ath3k_version), - USB_CTRL_SET_TIMEOUT); + ret = usb_control_msg(udev, pipe, ATH3K_GETVERSION, + USB_TYPE_VENDOR | USB_DIR_IN, 0, 0, + buf, size, USB_CTRL_SET_TIMEOUT); + + memcpy(version, buf, size); + kfree(buf); + + return ret; } static int ath3k_load_fwfile(struct usb_device *udev, From 44c4127ace6fc02ff4e86318c11d90600874463e Mon Sep 17 00:00:00 2001 From: Sujith Manoharan Date: Mon, 15 Jul 2013 09:29:03 +0530 Subject: [PATCH 0422/1368] Bluetooth: ath3k: Add support for ID 0x13d3/0x3402 commit 5b77a1f3d7b7360dc2b7c6d2188d39b9f8432907 upstream. T: Bus=01 Lev=02 Prnt=02 Port=00 Cnt=01 Dev#= 5 Spd=12 MxCh= 0 D: Ver= 1.10 Cls=e0(wlcon) Sub=01 Prot=01 MxPS=64 #Cfgs= 1 P: Vendor=13d3 ProdID=3402 Rev= 0.02 S: Manufacturer=Atheros Communications S: Product=Bluetooth USB Host Controller S: SerialNumber=Alaska Day 2006 C:* #Ifs= 2 Cfg#= 1 Atr=e0 MxPwr=100mA I:* If#= 0 Alt= 0 #EPs= 3 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb Bug: https://bugzilla.kernel.org/show_bug.cgi?id=59701 Signed-off-by: Sujith Manoharan Signed-off-by: Gustavo Padovan Signed-off-by: Greg Kroah-Hartman --- drivers/bluetooth/ath3k.c | 2 ++ drivers/bluetooth/btusb.c | 1 + 2 files changed, 3 insertions(+) diff --git a/drivers/bluetooth/ath3k.c b/drivers/bluetooth/ath3k.c index 6f17e4dec441..81973eccce52 100644 --- a/drivers/bluetooth/ath3k.c +++ b/drivers/bluetooth/ath3k.c @@ -92,6 +92,7 @@ static struct usb_device_id ath3k_table[] = { { USB_DEVICE(0x0489, 0xe056) }, { USB_DEVICE(0x0489, 0xe04d) }, { USB_DEVICE(0x04c5, 0x1330) }, + { USB_DEVICE(0x13d3, 0x3402) }, /* Atheros AR5BBU12 with sflash firmware */ { USB_DEVICE(0x0489, 0xE02C) }, @@ -130,6 +131,7 @@ static struct usb_device_id ath3k_blist_tbl[] = { { USB_DEVICE(0x0489, 0xe056), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x0489, 0xe04d), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x04c5, 0x1330), .driver_info = BTUSB_ATH3012 }, + { USB_DEVICE(0x13d3, 0x3402), .driver_info = BTUSB_ATH3012 }, /* Atheros AR5BBU22 with sflash firmware */ { USB_DEVICE(0x0489, 0xE03C), .driver_info = BTUSB_ATH3012 }, diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c index 8b8b85dced76..d71322f18618 100644 --- a/drivers/bluetooth/btusb.c +++ b/drivers/bluetooth/btusb.c @@ -152,6 +152,7 @@ static struct usb_device_id blacklist_table[] = { { USB_DEVICE(0x0489, 0xe056), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x0489, 0xe04d), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x04c5, 0x1330), .driver_info = BTUSB_ATH3012 }, + { USB_DEVICE(0x13d3, 0x3402), .driver_info = BTUSB_ATH3012 }, /* Atheros AR5BBU12 with sflash firmware */ { USB_DEVICE(0x0489, 0xe02c), .driver_info = BTUSB_IGNORE }, From df235047f274553f8942286fef812fde4ec32a60 Mon Sep 17 00:00:00 2001 From: AceLan Kao Date: Wed, 17 Jul 2013 11:27:40 +0800 Subject: [PATCH 0423/1368] Bluetooth: Add support for Atheros [0cf3:3121] commit 1ebd0b21ab14efb75950079840eac29afea2a26e upstream. Add support for the AR3012 chip. T: Bus=03 Lev=01 Prnt=01 Port=06 Cnt=01 Dev#= 6 Spd=12 MxCh= 0 D: Ver= 1.10 Cls=e0(wlcon) Sub=01 Prot=01 MxPS=64 #Cfgs= 1 P: Vendor=0cf3 ProdID=3121 Rev=00.02 C: #Ifs= 2 Cfg#= 1 Atr=e0 MxPwr=100mA I: If#= 0 Alt= 0 #EPs= 3 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb I: If#= 1 Alt= 0 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb Signed-off-by: AceLan Kao Signed-off-by: Gustavo Padovan Signed-off-by: Greg Kroah-Hartman --- drivers/bluetooth/ath3k.c | 2 ++ drivers/bluetooth/btusb.c | 1 + 2 files changed, 3 insertions(+) diff --git a/drivers/bluetooth/ath3k.c b/drivers/bluetooth/ath3k.c index 81973eccce52..d3a2d6fafffb 100644 --- a/drivers/bluetooth/ath3k.c +++ b/drivers/bluetooth/ath3k.c @@ -93,6 +93,7 @@ static struct usb_device_id ath3k_table[] = { { USB_DEVICE(0x0489, 0xe04d) }, { USB_DEVICE(0x04c5, 0x1330) }, { USB_DEVICE(0x13d3, 0x3402) }, + { USB_DEVICE(0x0cf3, 0x3121) }, /* Atheros AR5BBU12 with sflash firmware */ { USB_DEVICE(0x0489, 0xE02C) }, @@ -132,6 +133,7 @@ static struct usb_device_id ath3k_blist_tbl[] = { { USB_DEVICE(0x0489, 0xe04d), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x04c5, 0x1330), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x13d3, 0x3402), .driver_info = BTUSB_ATH3012 }, + { USB_DEVICE(0x0cf3, 0x3121), .driver_info = BTUSB_ATH3012 }, /* Atheros AR5BBU22 with sflash firmware */ { USB_DEVICE(0x0489, 0xE03C), .driver_info = BTUSB_ATH3012 }, diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c index d71322f18618..20b5b19f897e 100644 --- a/drivers/bluetooth/btusb.c +++ b/drivers/bluetooth/btusb.c @@ -153,6 +153,7 @@ static struct usb_device_id blacklist_table[] = { { USB_DEVICE(0x0489, 0xe04d), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x04c5, 0x1330), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x13d3, 0x3402), .driver_info = BTUSB_ATH3012 }, + { USB_DEVICE(0x0cf3, 0x3121), .driver_info = BTUSB_ATH3012 }, /* Atheros AR5BBU12 with sflash firmware */ { USB_DEVICE(0x0489, 0xe02c), .driver_info = BTUSB_IGNORE }, From eba6a13b3ca37f4b2c8abb7793f0931a5bfadc7b Mon Sep 17 00:00:00 2001 From: AceLan Kao Date: Thu, 20 Jun 2013 13:38:45 +0800 Subject: [PATCH 0424/1368] Bluetooth: Add support for Atheros [0cf3:e003] commit 1d5b569ef85d013a775560a90050dc630614c045 upstream. Add support for the AR9462 chip T: Bus=02 Lev=02 Prnt=02 Port=04 Cnt=01 Dev#= 4 Spd=12 MxCh= 0 D: Ver= 1.10 Cls=e0(wlcon) Sub=01 Prot=01 MxPS=64 #Cfgs= 1 P: Vendor=0cf3 ProdID=e003 Rev=00.02 C: #Ifs= 2 Cfg#= 1 Atr=e0 MxPwr=100mA I: If#= 0 Alt= 0 #EPs= 3 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb I: If#= 1 Alt= 0 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb Signed-off-by: AceLan Kao Signed-off-by: Gustavo Padovan Signed-off-by: Greg Kroah-Hartman --- drivers/bluetooth/ath3k.c | 2 ++ drivers/bluetooth/btusb.c | 1 + 2 files changed, 3 insertions(+) diff --git a/drivers/bluetooth/ath3k.c b/drivers/bluetooth/ath3k.c index d3a2d6fafffb..a12b923bbaca 100644 --- a/drivers/bluetooth/ath3k.c +++ b/drivers/bluetooth/ath3k.c @@ -94,6 +94,7 @@ static struct usb_device_id ath3k_table[] = { { USB_DEVICE(0x04c5, 0x1330) }, { USB_DEVICE(0x13d3, 0x3402) }, { USB_DEVICE(0x0cf3, 0x3121) }, + { USB_DEVICE(0x0cf3, 0xe003) }, /* Atheros AR5BBU12 with sflash firmware */ { USB_DEVICE(0x0489, 0xE02C) }, @@ -134,6 +135,7 @@ static struct usb_device_id ath3k_blist_tbl[] = { { USB_DEVICE(0x04c5, 0x1330), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x13d3, 0x3402), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x0cf3, 0x3121), .driver_info = BTUSB_ATH3012 }, + { USB_DEVICE(0x0cf3, 0xe003), .driver_info = BTUSB_ATH3012 }, /* Atheros AR5BBU22 with sflash firmware */ { USB_DEVICE(0x0489, 0xE03C), .driver_info = BTUSB_ATH3012 }, diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c index 20b5b19f897e..6696d2918f66 100644 --- a/drivers/bluetooth/btusb.c +++ b/drivers/bluetooth/btusb.c @@ -154,6 +154,7 @@ static struct usb_device_id blacklist_table[] = { { USB_DEVICE(0x04c5, 0x1330), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x13d3, 0x3402), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x0cf3, 0x3121), .driver_info = BTUSB_ATH3012 }, + { USB_DEVICE(0x0cf3, 0xe003), .driver_info = BTUSB_ATH3012 }, /* Atheros AR5BBU12 with sflash firmware */ { USB_DEVICE(0x0489, 0xe02c), .driver_info = BTUSB_IGNORE }, From c519774d4e636eeea2c1490c179fb898bbc61c17 Mon Sep 17 00:00:00 2001 From: "Cho, Yu-Chen" Date: Tue, 4 Jun 2013 21:40:26 +0800 Subject: [PATCH 0425/1368] Bluetooth: Add support for Mediatek Bluetooth device [0e8d:763f] commit 178c059e7640aa8e50213400c6f3dde00189d979 upstream. This patch adds support for Mediatek Bluetooth device T: Bus=02 Lev=01 Prnt=01 Port=03 Cnt=01 Dev#= 2 Spd=480 MxCh= 0 D: Ver= 2.01 Cls=ef(misc ) Sub=02 Prot=01 MxPS=64 #Cfgs= 1 P: Vendor=0e8d ProdID=763f Rev= 1.00 S: Manufacturer=MediaTek S: Product=BT S: SerialNumber=1.0 C:* #Ifs= 2 Cfg#= 1 Atr=a0 MxPwr=450mA A: FirstIf#= 0 IfCount= 2 Cls=ff(vend.) Sub=ff Prot=ff I:* If#= 0 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=ff Driver=(none) E: Ad=81(I) Atr=03(Int.) MxPS= 16 Ivl=125us E: Ad=02(O) Atr=02(Bulk) MxPS= 512 Ivl=125us E: Ad=82(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 1 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=ff Driver=(none) E: Ad=03(O) Atr=01(Isoc) MxPS= 0 Ivl=1ms E: Ad=83(I) Atr=01(Isoc) MxPS= 0 Ivl=1ms I: If#= 1 Alt= 1 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=ff Driver=(none) E: Ad=03(O) Atr=01(Isoc) MxPS= 9 Ivl=1ms E: Ad=83(I) Atr=01(Isoc) MxPS= 9 Ivl=1ms I: If#= 1 Alt= 2 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=ff Driver=(none) E: Ad=03(O) Atr=01(Isoc) MxPS= 17 Ivl=1ms E: Ad=83(I) Atr=01(Isoc) MxPS= 17 Ivl=1ms I: If#= 1 Alt= 3 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=ff Driver=(none) E: Ad=03(O) Atr=01(Isoc) MxPS= 25 Ivl=1ms E: Ad=83(I) Atr=01(Isoc) MxPS= 25 Ivl=1ms I: If#= 1 Alt= 4 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=ff Driver=(none) E: Ad=03(O) Atr=01(Isoc) MxPS= 33 Ivl=1ms E: Ad=83(I) Atr=01(Isoc) MxPS= 33 Ivl=1ms I: If#= 1 Alt= 5 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=ff Driver=(none) E: Ad=03(O) Atr=01(Isoc) MxPS= 49 Ivl=1ms E: Ad=83(I) Atr=01(Isoc) MxPS= 49 Ivl=1ms I: If#= 1 Alt= 6 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=ff Driver=(none) E: Ad=03(O) Atr=01(Isoc) MxPS= 63 Ivl=1ms E: Ad=83(I) Atr=01(Isoc) MxPS= 63 Ivl=1ms Signed-off-by: Cho, Yu-Chen Signed-off-by: Gustavo Padovan Signed-off-by: John W. Linville Signed-off-by: Greg Kroah-Hartman --- drivers/bluetooth/btusb.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c index 6696d2918f66..b852884397b9 100644 --- a/drivers/bluetooth/btusb.c +++ b/drivers/bluetooth/btusb.c @@ -57,6 +57,9 @@ static struct usb_device_id btusb_table[] = { /* Apple-specific (Broadcom) devices */ { USB_VENDOR_AND_INTERFACE_INFO(0x05ac, 0xff, 0x01, 0x01) }, + /* MediaTek MT76x0E */ + { USB_DEVICE(0x0e8d, 0x763f) }, + /* Broadcom SoftSailing reporting vendor specific */ { USB_DEVICE(0x0a5c, 0x21e1) }, From ea788759177526b7a103baed9c711d284d822fe5 Mon Sep 17 00:00:00 2001 From: Adam Lee Date: Wed, 10 Jul 2013 10:02:12 +0800 Subject: [PATCH 0426/1368] Bluetooth: fix wrong use of PTR_ERR() in btusb commit d9c78e9738ccd0017b10b8f44462aafb61904a4a upstream. PTR_ERR() returns a signed long type value which is limited by IS_ERR(), it must be a negative number whose range is [-MAX_ERRNO, 0). The bug here returns negative numbers as error codes, then check it by "if (ret < 0)", but -PTR_ERR() is actually positive. The wrong use here leads to failure as below, even panic. [ 12.958920] Bluetooth: hci0 command 0xfc8e tx timeout [ 14.961765] Bluetooth: hci0 command 0xfc8e tx timeout [ 16.964688] Bluetooth: hci0 command 0xfc8e tx timeout [ 20.954501] Bluetooth: hci0 sending Intel patch command (0xfc8e) failed (-110) [ 22.957358] Bluetooth: hci0 command 0xfc8e tx timeout [ 30.948922] Bluetooth: hci0 sending Intel patch command (0xfc8e) failed (-110) [ 32.951780] Bluetooth: hci0 command 0xfc8e tx timeout [ 40.943359] Bluetooth: hci0 sending Intel patch command (0xfc8e) failed (-110) [ 42.946219] Bluetooth: hci0 command 0xfc8e tx timeout [ 50.937812] Bluetooth: hci0 sending Intel patch command (0xfc8e) failed (-110) [ 52.940670] Bluetooth: hci0 command 0xfc8e tx timeout [ 60.932236] Bluetooth: hci0 sending Intel patch command (0xfc8e) failed (-110) [ 62.935092] Bluetooth: hci0 command 0xfc8e tx timeout [ 70.926688] Bluetooth: hci0 sending Intel patch command (0xfc8e) failed (-110) [ 72.929545] Bluetooth: hci0 command 0xfc8e tx timeout [ 80.921111] Bluetooth: hci0 sending Intel patch command (0xfc8e) failed (-110) [ 82.923969] Bluetooth: hci0 command 0xfc2f tx timeout [ 90.915542] Bluetooth: hci0 sending Intel patch command (0xfc2f) failed (-110) [ 92.918406] Bluetooth: hci0 command 0xfc11 tx timeout [ 100.909955] Bluetooth: hci0 sending Intel patch command (0xfc11) failed (-110) [ 102.912858] Bluetooth: hci0 command 0xfc60 tx timeout [ 110.904394] Bluetooth: hci0 sending Intel patch command (0xfc60) failed (-110) [ 112.907293] Bluetooth: hci0 command 0xfc11 tx timeout [ 120.898831] Bluetooth: hci0 exiting Intel manufacturer mode failed (-110) [ 120.904757] bluetoothd[1030]: segfault at 4 ip 00007f8b2eb55236 sp 00007fff53ff6920 error 4 in bluetoothd[7f8b2eaff000+cb000] Signed-off-by: Adam Lee Signed-off-by: Gustavo Padovan Signed-off-by: Greg Kroah-Hartman --- drivers/bluetooth/btusb.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c index b852884397b9..d0b3d900d452 100644 --- a/drivers/bluetooth/btusb.c +++ b/drivers/bluetooth/btusb.c @@ -1099,7 +1099,7 @@ static int btusb_setup_intel_patching(struct hci_dev *hdev, if (IS_ERR(skb)) { BT_ERR("%s sending Intel patch command (0x%4.4x) failed (%ld)", hdev->name, cmd->opcode, PTR_ERR(skb)); - return -PTR_ERR(skb); + return PTR_ERR(skb); } /* It ensures that the returned event matches the event data read from @@ -1151,7 +1151,7 @@ static int btusb_setup_intel(struct hci_dev *hdev) if (IS_ERR(skb)) { BT_ERR("%s sending initial HCI reset command failed (%ld)", hdev->name, PTR_ERR(skb)); - return -PTR_ERR(skb); + return PTR_ERR(skb); } kfree_skb(skb); @@ -1165,7 +1165,7 @@ static int btusb_setup_intel(struct hci_dev *hdev) if (IS_ERR(skb)) { BT_ERR("%s reading Intel fw version command failed (%ld)", hdev->name, PTR_ERR(skb)); - return -PTR_ERR(skb); + return PTR_ERR(skb); } if (skb->len != sizeof(*ver)) { @@ -1223,7 +1223,7 @@ static int btusb_setup_intel(struct hci_dev *hdev) BT_ERR("%s entering Intel manufacturer mode failed (%ld)", hdev->name, PTR_ERR(skb)); release_firmware(fw); - return -PTR_ERR(skb); + return PTR_ERR(skb); } if (skb->data[0]) { @@ -1280,7 +1280,7 @@ static int btusb_setup_intel(struct hci_dev *hdev) if (IS_ERR(skb)) { BT_ERR("%s exiting Intel manufacturer mode failed (%ld)", hdev->name, PTR_ERR(skb)); - return -PTR_ERR(skb); + return PTR_ERR(skb); } kfree_skb(skb); @@ -1296,7 +1296,7 @@ static int btusb_setup_intel(struct hci_dev *hdev) if (IS_ERR(skb)) { BT_ERR("%s exiting Intel manufacturer mode failed (%ld)", hdev->name, PTR_ERR(skb)); - return -PTR_ERR(skb); + return PTR_ERR(skb); } kfree_skb(skb); @@ -1314,7 +1314,7 @@ static int btusb_setup_intel(struct hci_dev *hdev) if (IS_ERR(skb)) { BT_ERR("%s exiting Intel manufacturer mode failed (%ld)", hdev->name, PTR_ERR(skb)); - return -PTR_ERR(skb); + return PTR_ERR(skb); } kfree_skb(skb); From 9b9f570a271aca8b09d7e5325da0aa836a703ede Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Fri, 7 Jun 2013 10:11:19 -0400 Subject: [PATCH 0427/1368] svcrpc: fix gss-proxy xdr decoding oops commit dc43376c26cef74226174a2394f37f2a3f8a8639 upstream. Uninitialized stack data was being used as the destination for memcpy's. Longer term we'll just delete some of this code; all we're doing is skipping over xdr that we don't care about. Signed-off-by: J. Bruce Fields Signed-off-by: Greg Kroah-Hartman --- net/sunrpc/auth_gss/gss_rpc_xdr.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/net/sunrpc/auth_gss/gss_rpc_xdr.c b/net/sunrpc/auth_gss/gss_rpc_xdr.c index 357f613df7ff..3c85d1c8a028 100644 --- a/net/sunrpc/auth_gss/gss_rpc_xdr.c +++ b/net/sunrpc/auth_gss/gss_rpc_xdr.c @@ -430,7 +430,7 @@ static int dummy_enc_nameattr_array(struct xdr_stream *xdr, static int dummy_dec_nameattr_array(struct xdr_stream *xdr, struct gssx_name_attr_array *naa) { - struct gssx_name_attr dummy; + struct gssx_name_attr dummy = { .attr = {.len = 0} }; u32 count, i; __be32 *p; @@ -493,12 +493,13 @@ static int gssx_enc_name(struct xdr_stream *xdr, return err; } + static int gssx_dec_name(struct xdr_stream *xdr, struct gssx_name *name) { - struct xdr_netobj dummy_netobj; - struct gssx_name_attr_array dummy_name_attr_array; - struct gssx_option_array dummy_option_array; + struct xdr_netobj dummy_netobj = { .len = 0 }; + struct gssx_name_attr_array dummy_name_attr_array = { .count = 0 }; + struct gssx_option_array dummy_option_array = { .count = 0 }; int err; /* name->display_name */ From 43110b3d902b62b9cd12aac237fcb471b4561e46 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Mon, 10 Jun 2013 16:06:44 -0400 Subject: [PATCH 0428/1368] svcrpc: fix gss_rpc_upcall create error commit 9f96392b0ae6aefc02a9b900c3f4889dfafc8402 upstream. Signed-off-by: J. Bruce Fields Signed-off-by: Greg Kroah-Hartman --- net/sunrpc/auth_gss/gss_rpc_upcall.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/sunrpc/auth_gss/gss_rpc_upcall.c b/net/sunrpc/auth_gss/gss_rpc_upcall.c index d304f41260f2..1e1ccf539fac 100644 --- a/net/sunrpc/auth_gss/gss_rpc_upcall.c +++ b/net/sunrpc/auth_gss/gss_rpc_upcall.c @@ -120,7 +120,7 @@ static int gssp_rpc_create(struct net *net, struct rpc_clnt **_clnt) if (IS_ERR(clnt)) { dprintk("RPC: failed to create AF_LOCAL gssproxy " "client (errno %ld).\n", PTR_ERR(clnt)); - result = -PTR_ERR(clnt); + result = PTR_ERR(clnt); *_clnt = NULL; goto out; } From 7b36b789452df175f3bf7c8ffc929f1dd378f164 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Wed, 31 Jul 2013 14:11:14 -0400 Subject: [PATCH 0429/1368] svcrpc: fix kfree oops in gss-proxy code commit 743e217129f69aab074abe520a464fd0c6b1cca1 upstream. mech_oid.data is an array, not kmalloc()'d memory. Signed-off-by: J. Bruce Fields Signed-off-by: Greg Kroah-Hartman --- net/sunrpc/auth_gss/gss_rpc_upcall.c | 1 - 1 file changed, 1 deletion(-) diff --git a/net/sunrpc/auth_gss/gss_rpc_upcall.c b/net/sunrpc/auth_gss/gss_rpc_upcall.c index 1e1ccf539fac..af7ffd447fee 100644 --- a/net/sunrpc/auth_gss/gss_rpc_upcall.c +++ b/net/sunrpc/auth_gss/gss_rpc_upcall.c @@ -328,7 +328,6 @@ void gssp_free_upcall_data(struct gssp_upcall_data *data) kfree(data->in_handle.data); kfree(data->out_handle.data); kfree(data->out_token.data); - kfree(data->mech_oid.data); free_svc_cred(&data->creds); } From 5c2c155d709f5bd3e8c1e9defddf8d74b1677eb1 Mon Sep 17 00:00:00 2001 From: Stanislaw Gruszka Date: Sun, 28 Jul 2013 13:17:22 +0200 Subject: [PATCH 0430/1368] rt2x00: fix stop queue commit e2288b66fe7ff0288382b2af671b4da558b44472 upstream. Since we clear QUEUE_STARTED in rt2x00queue_stop_queue(), following call to rt2x00queue_pause_queue() reduce to noop, i.e we do not stop queue in mac80211. To fix that introduce rt2x00queue_pause_queue_nocheck() function, which will stop queue in mac80211 directly. Note that rt2x00_start_queue() explicitly set QUEUE_PAUSED bit. Note also that reordering operations i.e. first call to rt2x00queue_pause_queue() and then clear QUEUE_STARTED bit, will race with rt2x00queue_unpause_queue(), so calling ieee80211_stop_queue() directly is the only available solution to fix the problem without major rework. Signed-off-by: Stanislaw Gruszka Signed-off-by: John W. Linville Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/rt2x00/rt2x00queue.c | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/drivers/net/wireless/rt2x00/rt2x00queue.c b/drivers/net/wireless/rt2x00/rt2x00queue.c index 2c12311467a9..d955741e48ff 100644 --- a/drivers/net/wireless/rt2x00/rt2x00queue.c +++ b/drivers/net/wireless/rt2x00/rt2x00queue.c @@ -936,13 +936,8 @@ void rt2x00queue_index_inc(struct queue_entry *entry, enum queue_index index) spin_unlock_irqrestore(&queue->index_lock, irqflags); } -void rt2x00queue_pause_queue(struct data_queue *queue) +void rt2x00queue_pause_queue_nocheck(struct data_queue *queue) { - if (!test_bit(DEVICE_STATE_PRESENT, &queue->rt2x00dev->flags) || - !test_bit(QUEUE_STARTED, &queue->flags) || - test_and_set_bit(QUEUE_PAUSED, &queue->flags)) - return; - switch (queue->qid) { case QID_AC_VO: case QID_AC_VI: @@ -958,6 +953,15 @@ void rt2x00queue_pause_queue(struct data_queue *queue) break; } } +void rt2x00queue_pause_queue(struct data_queue *queue) +{ + if (!test_bit(DEVICE_STATE_PRESENT, &queue->rt2x00dev->flags) || + !test_bit(QUEUE_STARTED, &queue->flags) || + test_and_set_bit(QUEUE_PAUSED, &queue->flags)) + return; + + rt2x00queue_pause_queue_nocheck(queue); +} EXPORT_SYMBOL_GPL(rt2x00queue_pause_queue); void rt2x00queue_unpause_queue(struct data_queue *queue) @@ -1019,7 +1023,7 @@ void rt2x00queue_stop_queue(struct data_queue *queue) return; } - rt2x00queue_pause_queue(queue); + rt2x00queue_pause_queue_nocheck(queue); queue->rt2x00dev->ops->lib->stop_queue(queue); From 558c62c2c0883b3c4f58c3baff021856eb4cb95b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tomasz=20Mo=C5=84?= Date: Tue, 23 Jul 2013 07:42:49 +0200 Subject: [PATCH 0431/1368] mwifiex: Add missing endian conversion. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 83e612f632c3897be29ef02e0472f6d63e258378 upstream. Both type and pkt_len variables are in host endian and these should be in Little Endian in the payload. Signed-off-by: Tomasz Moń Acked-by: Bing Zhao Signed-off-by: John W. Linville Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/mwifiex/sdio.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/mwifiex/sdio.c b/drivers/net/wireless/mwifiex/sdio.c index 363ba31b58bf..139c9581aecc 100644 --- a/drivers/net/wireless/mwifiex/sdio.c +++ b/drivers/net/wireless/mwifiex/sdio.c @@ -1441,8 +1441,8 @@ static int mwifiex_sdio_host_to_card(struct mwifiex_adapter *adapter, /* Allocate buffer and copy payload */ blk_size = MWIFIEX_SDIO_BLOCK_SIZE; buf_block_len = (pkt_len + blk_size - 1) / blk_size; - *(u16 *) &payload[0] = (u16) pkt_len; - *(u16 *) &payload[2] = type; + *(__le16 *)&payload[0] = cpu_to_le16((u16)pkt_len); + *(__le16 *)&payload[2] = cpu_to_le16(type); /* * This is SDIO specific header From 440166b9bb8b4fcd182531a65fd75816066f82f9 Mon Sep 17 00:00:00 2001 From: Avinash Patil Date: Mon, 29 Jul 2013 16:32:37 -0700 Subject: [PATCH 0432/1368] mwifiex: check for bss_role instead of bss_mode for STA operations commit 953b3539ef9301b8ef73f4b6e2fd824b86aae65a upstream. This patch fixes an issue wherein association would fail on P2P interfaces. This happened because we are checking priv->mode against NL80211_IFTYPE_STATION. While this check is correct for infrastructure stations, it would fail P2P clients for which mode is NL80211_IFTYPE_P2P_CLIENT. Better check would be bss_role which has only 2 values: STA/AP. Signed-off-by: Avinash Patil Signed-off-by: Stone Piao Signed-off-by: Bing Zhao Signed-off-by: John W. Linville Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/mwifiex/cfg80211.c | 4 ++-- drivers/net/wireless/mwifiex/join.c | 6 ++++-- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/net/wireless/mwifiex/cfg80211.c b/drivers/net/wireless/mwifiex/cfg80211.c index e42b266a023a..e7f7cdfafd51 100644 --- a/drivers/net/wireless/mwifiex/cfg80211.c +++ b/drivers/net/wireless/mwifiex/cfg80211.c @@ -1668,9 +1668,9 @@ mwifiex_cfg80211_connect(struct wiphy *wiphy, struct net_device *dev, struct mwifiex_private *priv = mwifiex_netdev_get_priv(dev); int ret; - if (priv->bss_mode != NL80211_IFTYPE_STATION) { + if (GET_BSS_ROLE(priv) != MWIFIEX_BSS_ROLE_STA) { wiphy_err(wiphy, - "%s: reject infra assoc request in non-STA mode\n", + "%s: reject infra assoc request in non-STA role\n", dev->name); return -EINVAL; } diff --git a/drivers/net/wireless/mwifiex/join.c b/drivers/net/wireless/mwifiex/join.c index 6bcb66e6e97c..96bda6ca316d 100644 --- a/drivers/net/wireless/mwifiex/join.c +++ b/drivers/net/wireless/mwifiex/join.c @@ -1290,8 +1290,10 @@ int mwifiex_associate(struct mwifiex_private *priv, { u8 current_bssid[ETH_ALEN]; - /* Return error if the adapter or table entry is not marked as infra */ - if ((priv->bss_mode != NL80211_IFTYPE_STATION) || + /* Return error if the adapter is not STA role or table entry + * is not marked as infra. + */ + if ((GET_BSS_ROLE(priv) != MWIFIEX_BSS_ROLE_STA) || (bss_desc->bss_mode != NL80211_IFTYPE_STATION)) return -1; From 14d33c9b86b368fd6cab62e61074fa115018f1f8 Mon Sep 17 00:00:00 2001 From: Avinash Patil Date: Mon, 29 Jul 2013 16:32:38 -0700 Subject: [PATCH 0433/1368] mwifiex: fix wrong data rates in P2P client commit 237b2ac8ac89a6b0120decdd05c7bf4637deb98a upstream. This patch fixes an issue wherein adhoc rates were being copied into association request from P2P client. Signed-off-by: Avinash Patil Signed-off-by: Stone Piao Signed-off-by: Bing Zhao Signed-off-by: John W. Linville Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/mwifiex/cfp.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/mwifiex/cfp.c b/drivers/net/wireless/mwifiex/cfp.c index 988552dece75..5178c4630d89 100644 --- a/drivers/net/wireless/mwifiex/cfp.c +++ b/drivers/net/wireless/mwifiex/cfp.c @@ -415,7 +415,8 @@ u32 mwifiex_get_supported_rates(struct mwifiex_private *priv, u8 *rates) u32 k = 0; struct mwifiex_adapter *adapter = priv->adapter; - if (priv->bss_mode == NL80211_IFTYPE_STATION) { + if (priv->bss_mode == NL80211_IFTYPE_STATION || + priv->bss_mode == NL80211_IFTYPE_P2P_CLIENT) { switch (adapter->config_bands) { case BAND_B: dev_dbg(adapter->dev, "info: infra band=%d " From b74c38d5b6af46482480f6f8b83633e2971a0ed8 Mon Sep 17 00:00:00 2001 From: Jiang Liu Date: Fri, 7 Jun 2013 00:07:22 +0800 Subject: [PATCH 0434/1368] zram: avoid invalid memory access in zram_exit() commit 6030ea9b35971a4200062f010341ab832e878ac9 upstream. Memory for zram->disk object may have already been freed after returning from destroy_device(zram), then it's unsafe for zram_reset_device(zram) to access zram->disk again. We can't solve this bug by flipping the order of destroy_device(zram) and zram_reset_device(zram), that will cause deadlock issues to the zram sysfs handler. So fix it by holding an extra reference to zram->disk before calling destroy_device(zram). Signed-off-by: Jiang Liu Signed-off-by: Greg Kroah-Hartman --- drivers/staging/zram/zram_drv.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/staging/zram/zram_drv.c b/drivers/staging/zram/zram_drv.c index e34e3fe0ae2e..ee6b67d77340 100644 --- a/drivers/staging/zram/zram_drv.c +++ b/drivers/staging/zram/zram_drv.c @@ -727,8 +727,10 @@ static void __exit zram_exit(void) for (i = 0; i < num_devices; i++) { zram = &zram_devices[i]; + get_disk(zram->disk); destroy_device(zram); zram_reset_device(zram); + put_disk(zram->disk); } unregister_blkdev(zram_major, "zram"); From 6349a112f4059ab32c1bb2fb644a2e63a620aeb5 Mon Sep 17 00:00:00 2001 From: Jiang Liu Date: Fri, 7 Jun 2013 00:07:23 +0800 Subject: [PATCH 0435/1368] zram: use zram->lock to protect zram_free_page() in swap free notify path commit 57ab048532c0d975538cebd4456491b5c34248f4 upstream. zram_slot_free_notify() is free-running without any protection from concurrent operations. So there are race conditions between zram_bvec_read()/zram_bvec_write() and zram_slot_free_notify(), and possible consequences include: 1) Trigger BUG_ON(!handle) on zram_bvec_write() side. 2) Access to freed pages on zram_bvec_read() side. 3) Break some fields (bad_compress, good_compress, pages_stored) in zram->stats if the swap layer makes concurrently call to zram_slot_free_notify(). So enhance zram_slot_free_notify() to acquire writer lock on zram->lock before calling zram_free_page(). Signed-off-by: Jiang Liu Signed-off-by: Greg Kroah-Hartman --- drivers/staging/zram/zram_drv.c | 2 ++ drivers/staging/zram/zram_drv.h | 5 +++-- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/staging/zram/zram_drv.c b/drivers/staging/zram/zram_drv.c index ee6b67d77340..0738f6c89d27 100644 --- a/drivers/staging/zram/zram_drv.c +++ b/drivers/staging/zram/zram_drv.c @@ -582,7 +582,9 @@ static void zram_slot_free_notify(struct block_device *bdev, struct zram *zram; zram = bdev->bd_disk->private_data; + down_write(&zram->lock); zram_free_page(zram, index); + up_write(&zram->lock); zram_stat64_inc(zram, &zram->stats.notify_free); } diff --git a/drivers/staging/zram/zram_drv.h b/drivers/staging/zram/zram_drv.h index 2d1a3f1e8edb..d542eee81357 100644 --- a/drivers/staging/zram/zram_drv.h +++ b/drivers/staging/zram/zram_drv.h @@ -93,8 +93,9 @@ struct zram_meta { struct zram { struct zram_meta *meta; spinlock_t stat64_lock; /* protect 64-bit stats */ - struct rw_semaphore lock; /* protect compression buffers and table - * against concurrent read and writes */ + struct rw_semaphore lock; /* protect compression buffers, table, + * 32bit stat counters against concurrent + * notifications, reads and writes */ struct request_queue *queue; struct gendisk *disk; int init_done; From 916c68df544806585f63cc157149b9d9559ad500 Mon Sep 17 00:00:00 2001 From: Jiang Liu Date: Fri, 7 Jun 2013 00:07:24 +0800 Subject: [PATCH 0436/1368] zram: destroy all devices on error recovery path in zram_init() commit 39a9b8ac9333e4268ecff7da6c9d1ab3823ff243 upstream. On error recovery path of zram_init(), it leaks the zram device object causing the failure. So change create_device() to free allocated resources on error path. Signed-off-by: Jiang Liu Acked-by: Minchan Kim Acked-by: Jerome Marchand Signed-off-by: Greg Kroah-Hartman --- drivers/staging/zram/zram_drv.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/drivers/staging/zram/zram_drv.c b/drivers/staging/zram/zram_drv.c index 0738f6c89d27..2ca6dc9a593e 100644 --- a/drivers/staging/zram/zram_drv.c +++ b/drivers/staging/zram/zram_drv.c @@ -595,7 +595,7 @@ static const struct block_device_operations zram_devops = { static int create_device(struct zram *zram, int device_id) { - int ret = 0; + int ret = -ENOMEM; init_rwsem(&zram->lock); init_rwsem(&zram->init_lock); @@ -605,7 +605,6 @@ static int create_device(struct zram *zram, int device_id) if (!zram->queue) { pr_err("Error allocating disk queue for device %d\n", device_id); - ret = -ENOMEM; goto out; } @@ -615,11 +614,9 @@ static int create_device(struct zram *zram, int device_id) /* gendisk structure */ zram->disk = alloc_disk(1); if (!zram->disk) { - blk_cleanup_queue(zram->queue); pr_warn("Error allocating disk structure for device %d\n", device_id); - ret = -ENOMEM; - goto out; + goto out_free_queue; } zram->disk->major = zram_major; @@ -648,11 +645,17 @@ static int create_device(struct zram *zram, int device_id) &zram_disk_attr_group); if (ret < 0) { pr_warn("Error creating sysfs group"); - goto out; + goto out_free_disk; } zram->init_done = 0; + return 0; +out_free_disk: + del_gendisk(zram->disk); + put_disk(zram->disk); +out_free_queue: + blk_cleanup_queue(zram->queue); out: return ret; } From f7b071d02492426c5b5c1086fe082ba7a0de6de8 Mon Sep 17 00:00:00 2001 From: Jiang Liu Date: Fri, 7 Jun 2013 00:07:25 +0800 Subject: [PATCH 0437/1368] zram: avoid double free in function zram_bvec_write() commit 65c484609a3b25c35e4edcd5f2c38f98f5226093 upstream. When doing a patial write and the whole page is filled with zero, zram_bvec_write() will free uncmem twice. Signed-off-by: Jiang Liu Acked-by: Minchan Kim Signed-off-by: Greg Kroah-Hartman --- drivers/staging/zram/zram_drv.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/staging/zram/zram_drv.c b/drivers/staging/zram/zram_drv.c index 2ca6dc9a593e..27ab8241deb9 100644 --- a/drivers/staging/zram/zram_drv.c +++ b/drivers/staging/zram/zram_drv.c @@ -272,8 +272,6 @@ static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec, u32 index, if (page_zero_filled(uncmem)) { kunmap_atomic(user_mem); - if (is_partial_io(bvec)) - kfree(uncmem); zram->stats.pages_zero++; zram_set_flag(meta, index, ZRAM_ZERO); ret = 0; From 408fd687175ed6a1fea8cb901882c369ce1d4478 Mon Sep 17 00:00:00 2001 From: Jiang Liu Date: Fri, 7 Jun 2013 00:07:26 +0800 Subject: [PATCH 0438/1368] zram: avoid access beyond the zram device commit 12a7ad3b810e77137d0caf97a6dd97591e075b30 upstream. Function valid_io_request() should verify the entire request are within the zram device address range. Otherwise it may cause invalid memory access when accessing/modifying zram->meta->table[index] because the 'index' is out of range. Then it may access non-exist memory, randomly modify memory belong to other subsystems, which is hard to track down. Signed-off-by: Jiang Liu Signed-off-by: Greg Kroah-Hartman --- drivers/staging/zram/zram_drv.c | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/drivers/staging/zram/zram_drv.c b/drivers/staging/zram/zram_drv.c index 27ab8241deb9..1742ce55b4a9 100644 --- a/drivers/staging/zram/zram_drv.c +++ b/drivers/staging/zram/zram_drv.c @@ -420,13 +420,20 @@ static void __zram_make_request(struct zram *zram, struct bio *bio, int rw) */ static inline int valid_io_request(struct zram *zram, struct bio *bio) { - if (unlikely( - (bio->bi_sector >= (zram->disksize >> SECTOR_SHIFT)) || - (bio->bi_sector & (ZRAM_SECTOR_PER_LOGICAL_BLOCK - 1)) || - (bio->bi_size & (ZRAM_LOGICAL_BLOCK_SIZE - 1)))) { + u64 start, end, bound; + /* unaligned request */ + if (unlikely(bio->bi_sector & (ZRAM_SECTOR_PER_LOGICAL_BLOCK - 1))) + return 0; + if (unlikely(bio->bi_size & (ZRAM_LOGICAL_BLOCK_SIZE - 1))) + return 0; + + start = bio->bi_sector; + end = start + (bio->bi_size >> SECTOR_SHIFT); + bound = zram->disksize >> SECTOR_SHIFT; + /* out of range range */ + if (unlikely(start >= bound || end >= bound || start > end)) return 0; - } /* I/O request is valid */ return 1; From c181b464309da49333b85aa9df1eafe2678f1da3 Mon Sep 17 00:00:00 2001 From: Jiang Liu Date: Fri, 7 Jun 2013 00:07:27 +0800 Subject: [PATCH 0439/1368] zram: protect sysfs handler from invalid memory access commit 5863e10b441e7ea4b492f930f1be180a97d026f3 upstream. Use zram->init_lock to protect access to zram->meta, otherwise it may cause invalid memory access if zram->meta has been freed by zram_reset_device(). This issue may be triggered by: Thread 1: while true; do cat mem_used_total; done Thread 2: while true; do echo 8M > disksize; echo 1 > reset; done Signed-off-by: Jiang Liu Acked-by: Minchan Kim Signed-off-by: Greg Kroah-Hartman --- drivers/staging/zram/zram_sysfs.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/staging/zram/zram_sysfs.c b/drivers/staging/zram/zram_sysfs.c index e6a929d452f7..dc76a3dba1b8 100644 --- a/drivers/staging/zram/zram_sysfs.c +++ b/drivers/staging/zram/zram_sysfs.c @@ -188,8 +188,10 @@ static ssize_t mem_used_total_show(struct device *dev, struct zram *zram = dev_to_zram(dev); struct zram_meta *meta = zram->meta; + down_read(&zram->init_lock); if (zram->init_done) val = zs_get_total_size_bytes(meta->mem_pool); + up_read(&zram->init_lock); return sprintf(buf, "%llu\n", val); } From 6813e1d7dcf1db51970149e22f1524336a0bfa24 Mon Sep 17 00:00:00 2001 From: Lan Tianyu Date: Tue, 30 Jul 2013 14:00:42 +0200 Subject: [PATCH 0440/1368] ACPI / battery: Fix parsing _BIX return value commit 016d5baad04269e8559332df05f89bd95b52d6ad upstream. The _BIX method returns extended battery info as a package. According the ACPI spec (ACPI 5, Section 10.2.2.2), the first member of that package should be "Revision". However, the current ACPI battery driver treats the first member as "Power Unit" which should be the second member. This causes the result of _BIX return data parsing to be incorrect. Fix this by adding a new member called 'revision' to struct acpi_battery and adding the offsetof() information on it to extended_info_offsets[] as the first row. [rjw: Changelog] Reported-and-tested-by: Jan Hoffmann References: http://bugzilla.kernel.org/show_bug.cgi?id=60519 Signed-off-by: Lan Tianyu Signed-off-by: Rafael J. Wysocki Signed-off-by: Greg Kroah-Hartman --- drivers/acpi/battery.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/acpi/battery.c b/drivers/acpi/battery.c index e7100459ac4a..95332717e4f5 100644 --- a/drivers/acpi/battery.c +++ b/drivers/acpi/battery.c @@ -117,6 +117,7 @@ struct acpi_battery { struct acpi_device *device; struct notifier_block pm_nb; unsigned long update_time; + int revision; int rate_now; int capacity_now; int voltage_now; @@ -359,6 +360,7 @@ static struct acpi_offsets info_offsets[] = { }; static struct acpi_offsets extended_info_offsets[] = { + {offsetof(struct acpi_battery, revision), 0}, {offsetof(struct acpi_battery, power_unit), 0}, {offsetof(struct acpi_battery, design_capacity), 0}, {offsetof(struct acpi_battery, full_charge_capacity), 0}, From 657e142082bec684725383eccd54e4ace5a2c293 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Sat, 27 Jul 2013 01:13:26 +0200 Subject: [PATCH 0441/1368] Revert "cpuidle: Quickly notice prediction failure in general case" commit 228b30234f258a193317874854eee1ca7807186e upstream. Revert commit e11538d1 (cpuidle: Quickly notice prediction failure in general case), since it depends on commit 69a37be (cpuidle: Quickly notice prediction failure for repeat mode) that has been identified as the source of a significant performance regression in v3.8 and later. Requested-by: Jeremy Eder Tested-by: Len Brown Signed-off-by: Rafael J. Wysocki Signed-off-by: Greg Kroah-Hartman --- drivers/cpuidle/governors/menu.c | 35 +------------------------------- 1 file changed, 1 insertion(+), 34 deletions(-) diff --git a/drivers/cpuidle/governors/menu.c b/drivers/cpuidle/governors/menu.c index fe343a06b7da..b69a87e22155 100644 --- a/drivers/cpuidle/governors/menu.c +++ b/drivers/cpuidle/governors/menu.c @@ -34,7 +34,7 @@ static DEFINE_PER_CPU(struct hrtimer, menu_hrtimer); static DEFINE_PER_CPU(int, hrtimer_status); /* menu hrtimer mode */ -enum {MENU_HRTIMER_STOP, MENU_HRTIMER_REPEAT, MENU_HRTIMER_GENERAL}; +enum {MENU_HRTIMER_STOP, MENU_HRTIMER_REPEAT}; /* * Concepts and ideas behind the menu governor @@ -116,13 +116,6 @@ enum {MENU_HRTIMER_STOP, MENU_HRTIMER_REPEAT, MENU_HRTIMER_GENERAL}; * */ -/* - * The C-state residency is so long that is is worthwhile to exit - * from the shallow C-state and re-enter into a deeper C-state. - */ -static unsigned int perfect_cstate_ms __read_mostly = 30; -module_param(perfect_cstate_ms, uint, 0000); - struct menu_device { int last_state_idx; int needs_update; @@ -223,16 +216,6 @@ EXPORT_SYMBOL_GPL(menu_hrtimer_cancel); static enum hrtimer_restart menu_hrtimer_notify(struct hrtimer *hrtimer) { int cpu = smp_processor_id(); - struct menu_device *data = &per_cpu(menu_devices, cpu); - - /* In general case, the expected residency is much larger than - * deepest C-state target residency, but prediction logic still - * predicts a small predicted residency, so the prediction - * history is totally broken if the timer is triggered. - * So reset the correction factor. - */ - if (per_cpu(hrtimer_status, cpu) == MENU_HRTIMER_GENERAL) - data->correction_factor[data->bucket] = RESOLUTION * DECAY; per_cpu(hrtimer_status, cpu) = MENU_HRTIMER_STOP; @@ -389,7 +372,6 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev) /* not deepest C-state chosen for low predicted residency */ if (low_predicted) { unsigned int timer_us = 0; - unsigned int perfect_us = 0; /* * Set a timer to detect whether this sleep is much @@ -400,28 +382,13 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev) */ timer_us = 2 * (data->predicted_us + MAX_DEVIATION); - perfect_us = perfect_cstate_ms * 1000; - if (repeat && (4 * timer_us < data->expected_us)) { RCU_NONIDLE(hrtimer_start(hrtmr, ns_to_ktime(1000 * timer_us), HRTIMER_MODE_REL_PINNED)); /* In repeat case, menu hrtimer is started */ per_cpu(hrtimer_status, cpu) = MENU_HRTIMER_REPEAT; - } else if (perfect_us < data->expected_us) { - /* - * The next timer is long. This could be because - * we did not make a useful prediction. - * In that case, it makes sense to re-enter - * into a deeper C-state after some time. - */ - RCU_NONIDLE(hrtimer_start(hrtmr, - ns_to_ktime(1000 * timer_us), - HRTIMER_MODE_REL_PINNED)); - /* In general case, menu hrtimer is started */ - per_cpu(hrtimer_status, cpu) = MENU_HRTIMER_GENERAL; } - } return data->last_state_idx; From e9ef4410a75bea04e5f9e30c41fba0ebfa9b7559 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Tue, 30 Jul 2013 00:32:00 +0200 Subject: [PATCH 0442/1368] cpufreq: Fix cpufreq driver module refcount balance after suspend/resume MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 2a99859932281ed6c2ecdd988855f8f6838f6743 upstream. Since cpufreq_cpu_put() called by __cpufreq_remove_dev() drops the driver module refcount, __cpufreq_remove_dev() causes that refcount to become negative for the cpufreq driver after a suspend/resume cycle. This is not the only bad thing that happens there, however, because kobject_put() should only be called for the policy kobject at this point if the CPU is not the last one for that policy. Namely, if the given CPU is the last one for that policy, the policy kobject's refcount should be 1 at this point, as set by cpufreq_add_dev_interface(), and only needs to be dropped once for the kobject to go away. This actually happens under the cpu == 1 check, so it need not be done before by cpufreq_cpu_put(). On the other hand, if the given CPU is not the last one for that policy, this means that cpufreq_add_policy_cpu() has been called at least once for that policy and cpufreq_cpu_get() has been called for it too. To balance that cpufreq_cpu_get(), we need to call cpufreq_cpu_put() in that case. Thus, to fix the described problem and keep the reference counters balanced in both cases, move the cpufreq_cpu_get() call in __cpufreq_remove_dev() to the code path executed only for CPUs that share the policy with other CPUs. Reported-and-tested-by: Toralf Förster Signed-off-by: Rafael J. Wysocki Reviewed-by: Srivatsa S. Bhat Signed-off-by: Greg Kroah-Hartman --- drivers/cpufreq/cpufreq.c | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index 178fe7a69056..648554742a99 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -1075,14 +1075,11 @@ static int __cpufreq_remove_dev(struct device *dev, struct subsys_interface *sif __func__, cpu_dev->id, cpu); } - if ((cpus == 1) && (cpufreq_driver->target)) - __cpufreq_governor(data, CPUFREQ_GOV_POLICY_EXIT); - - pr_debug("%s: removing link, cpu: %d\n", __func__, cpu); - cpufreq_cpu_put(data); - /* If cpu is last user of policy, free policy */ if (cpus == 1) { + if (cpufreq_driver->target) + __cpufreq_governor(data, CPUFREQ_GOV_POLICY_EXIT); + lock_policy_rwsem_read(cpu); kobj = &data->kobj; cmp = &data->kobj_unregister; @@ -1103,9 +1100,13 @@ static int __cpufreq_remove_dev(struct device *dev, struct subsys_interface *sif free_cpumask_var(data->related_cpus); free_cpumask_var(data->cpus); kfree(data); - } else if (cpufreq_driver->target) { - __cpufreq_governor(data, CPUFREQ_GOV_START); - __cpufreq_governor(data, CPUFREQ_GOV_LIMITS); + } else { + pr_debug("%s: removing link, cpu: %d\n", __func__, cpu); + cpufreq_cpu_put(data); + if (cpufreq_driver->target) { + __cpufreq_governor(data, CPUFREQ_GOV_START); + __cpufreq_governor(data, CPUFREQ_GOV_LIMITS); + } } per_cpu(cpufreq_policy_cpu, cpu) = -1; From d201a0b94daa5d8f7126c81678ccc04f9215772a Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Sat, 27 Jul 2013 01:41:34 +0200 Subject: [PATCH 0443/1368] Revert "cpuidle: Quickly notice prediction failure for repeat mode" commit 148519120c6d1f19ad53349683aeae9f228b0b8d upstream. Revert commit 69a37bea (cpuidle: Quickly notice prediction failure for repeat mode), because it has been identified as the source of a significant performance regression in v3.8 and later as explained by Jeremy Eder: We believe we've identified a particular commit to the cpuidle code that seems to be impacting performance of variety of workloads. The simplest way to reproduce is using netperf TCP_RR test, so we're using that, on a pair of Sandy Bridge based servers. We also have data from a large database setup where performance is also measurably/positively impacted, though that test data isn't easily share-able. Included below are test results from 3 test kernels: kernel reverts ----------------------------------------------------------- 1) vanilla upstream (no reverts) 2) perfteam2 reverts e11538d1f03914eb92af5a1a378375c05ae8520c 3) test reverts 69a37beabf1f0a6705c08e879bdd5d82ff6486c4 e11538d1f03914eb92af5a1a378375c05ae8520c In summary, netperf TCP_RR numbers improve by approximately 4% after reverting 69a37beabf1f0a6705c08e879bdd5d82ff6486c4. When 69a37beabf1f0a6705c08e879bdd5d82ff6486c4 is included, C0 residency never seems to get above 40%. Taking that patch out gets C0 near 100% quite often, and performance increases. The below data are histograms representing the %c0 residency @ 1-second sample rates (using turbostat), while under netperf test. - If you look at the first 4 histograms, you can see %c0 residency almost entirely in the 30,40% bin. - The last pair, which reverts 69a37beabf1f0a6705c08e879bdd5d82ff6486c4, shows %c0 in the 80,90,100% bins. Below each kernel name are netperf TCP_RR trans/s numbers for the particular kernel that can be disclosed publicly, comparing the 3 test kernels. We ran a 4th test with the vanilla kernel where we've also set /dev/cpu_dma_latency=0 to show overall impact boosting single-threaded TCP_RR performance over 11% above baseline. 3.10-rc2 vanilla RX + c0 lock (/dev/cpu_dma_latency=0): TCP_RR trans/s 54323.78 ----------------------------------------------------------- 3.10-rc2 vanilla RX (no reverts) TCP_RR trans/s 48192.47 Receiver %c0 0.0000 - 10.0000 [ 1]: * 10.0000 - 20.0000 [ 0]: 20.0000 - 30.0000 [ 0]: 30.0000 - 40.0000 [ 59]: *********************************************************** 40.0000 - 50.0000 [ 1]: * 50.0000 - 60.0000 [ 0]: 60.0000 - 70.0000 [ 0]: 70.0000 - 80.0000 [ 0]: 80.0000 - 90.0000 [ 0]: 90.0000 - 100.0000 [ 0]: Sender %c0 0.0000 - 10.0000 [ 1]: * 10.0000 - 20.0000 [ 0]: 20.0000 - 30.0000 [ 0]: 30.0000 - 40.0000 [ 11]: *********** 40.0000 - 50.0000 [ 49]: ************************************************* 50.0000 - 60.0000 [ 0]: 60.0000 - 70.0000 [ 0]: 70.0000 - 80.0000 [ 0]: 80.0000 - 90.0000 [ 0]: 90.0000 - 100.0000 [ 0]: ----------------------------------------------------------- 3.10-rc2 perfteam2 RX (reverts commit e11538d1f03914eb92af5a1a378375c05ae8520c) TCP_RR trans/s 49698.69 Receiver %c0 0.0000 - 10.0000 [ 1]: * 10.0000 - 20.0000 [ 1]: * 20.0000 - 30.0000 [ 0]: 30.0000 - 40.0000 [ 59]: *********************************************************** 40.0000 - 50.0000 [ 0]: 50.0000 - 60.0000 [ 0]: 60.0000 - 70.0000 [ 0]: 70.0000 - 80.0000 [ 0]: 80.0000 - 90.0000 [ 0]: 90.0000 - 100.0000 [ 0]: Sender %c0 0.0000 - 10.0000 [ 1]: * 10.0000 - 20.0000 [ 0]: 20.0000 - 30.0000 [ 0]: 30.0000 - 40.0000 [ 2]: ** 40.0000 - 50.0000 [ 58]: ********************************************************** 50.0000 - 60.0000 [ 0]: 60.0000 - 70.0000 [ 0]: 70.0000 - 80.0000 [ 0]: 80.0000 - 90.0000 [ 0]: 90.0000 - 100.0000 [ 0]: ----------------------------------------------------------- 3.10-rc2 test RX (reverts 69a37beabf1f0a6705c08e879bdd5d82ff6486c4 and e11538d1f03914eb92af5a1a378375c05ae8520c) TCP_RR trans/s 47766.95 Receiver %c0 0.0000 - 10.0000 [ 1]: * 10.0000 - 20.0000 [ 1]: * 20.0000 - 30.0000 [ 0]: 30.0000 - 40.0000 [ 27]: *************************** 40.0000 - 50.0000 [ 2]: ** 50.0000 - 60.0000 [ 0]: 60.0000 - 70.0000 [ 2]: ** 70.0000 - 80.0000 [ 0]: 80.0000 - 90.0000 [ 0]: 90.0000 - 100.0000 [ 28]: **************************** Sender: 0.0000 - 10.0000 [ 1]: * 10.0000 - 20.0000 [ 0]: 20.0000 - 30.0000 [ 0]: 30.0000 - 40.0000 [ 11]: *********** 40.0000 - 50.0000 [ 0]: 50.0000 - 60.0000 [ 1]: * 60.0000 - 70.0000 [ 0]: 70.0000 - 80.0000 [ 3]: *** 80.0000 - 90.0000 [ 7]: ******* 90.0000 - 100.0000 [ 38]: ************************************** These results demonstrate gaining back the tendency of the CPU to stay in more responsive, performant C-states (and thus yield measurably better performance), by reverting commit 69a37beabf1f0a6705c08e879bdd5d82ff6486c4. Requested-by: Jeremy Eder Tested-by: Len Brown Signed-off-by: Rafael J. Wysocki Signed-off-by: Greg Kroah-Hartman --- drivers/cpuidle/governors/menu.c | 73 ++------------------------------ include/linux/tick.h | 6 --- kernel/time/tick-sched.c | 9 +--- 3 files changed, 6 insertions(+), 82 deletions(-) diff --git a/drivers/cpuidle/governors/menu.c b/drivers/cpuidle/governors/menu.c index b69a87e22155..bc580b67a652 100644 --- a/drivers/cpuidle/governors/menu.c +++ b/drivers/cpuidle/governors/menu.c @@ -28,13 +28,6 @@ #define MAX_INTERESTING 50000 #define STDDEV_THRESH 400 -/* 60 * 60 > STDDEV_THRESH * INTERVALS = 400 * 8 */ -#define MAX_DEVIATION 60 - -static DEFINE_PER_CPU(struct hrtimer, menu_hrtimer); -static DEFINE_PER_CPU(int, hrtimer_status); -/* menu hrtimer mode */ -enum {MENU_HRTIMER_STOP, MENU_HRTIMER_REPEAT}; /* * Concepts and ideas behind the menu governor @@ -198,42 +191,17 @@ static u64 div_round64(u64 dividend, u32 divisor) return div_u64(dividend + (divisor / 2), divisor); } -/* Cancel the hrtimer if it is not triggered yet */ -void menu_hrtimer_cancel(void) -{ - int cpu = smp_processor_id(); - struct hrtimer *hrtmr = &per_cpu(menu_hrtimer, cpu); - - /* The timer is still not time out*/ - if (per_cpu(hrtimer_status, cpu)) { - hrtimer_cancel(hrtmr); - per_cpu(hrtimer_status, cpu) = MENU_HRTIMER_STOP; - } -} -EXPORT_SYMBOL_GPL(menu_hrtimer_cancel); - -/* Call back for hrtimer is triggered */ -static enum hrtimer_restart menu_hrtimer_notify(struct hrtimer *hrtimer) -{ - int cpu = smp_processor_id(); - - per_cpu(hrtimer_status, cpu) = MENU_HRTIMER_STOP; - - return HRTIMER_NORESTART; -} - /* * Try detecting repeating patterns by keeping track of the last 8 * intervals, and checking if the standard deviation of that set * of points is below a threshold. If it is... then use the * average of these 8 points as the estimated value. */ -static u32 get_typical_interval(struct menu_device *data) +static void get_typical_interval(struct menu_device *data) { int i = 0, divisor = 0; uint64_t max = 0, avg = 0, stddev = 0; int64_t thresh = LLONG_MAX; /* Discard outliers above this value. */ - unsigned int ret = 0; again: @@ -274,16 +242,13 @@ static u32 get_typical_interval(struct menu_device *data) if (((avg > stddev * 6) && (divisor * 4 >= INTERVALS * 3)) || stddev <= 20) { data->predicted_us = avg; - ret = 1; - return ret; + return; } else if ((divisor * 4) > INTERVALS * 3) { /* Exclude the max interval */ thresh = max - 1; goto again; } - - return ret; } /** @@ -298,9 +263,6 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev) int i; int multiplier; struct timespec t; - int repeat = 0, low_predicted = 0; - int cpu = smp_processor_id(); - struct hrtimer *hrtmr = &per_cpu(menu_hrtimer, cpu); if (data->needs_update) { menu_update(drv, dev); @@ -335,7 +297,7 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev) data->predicted_us = div_round64(data->expected_us * data->correction_factor[data->bucket], RESOLUTION * DECAY); - repeat = get_typical_interval(data); + get_typical_interval(data); /* * We want to default to C1 (hlt), not to busy polling @@ -356,10 +318,8 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev) if (s->disabled || su->disable) continue; - if (s->target_residency > data->predicted_us) { - low_predicted = 1; + if (s->target_residency > data->predicted_us) continue; - } if (s->exit_latency > latency_req) continue; if (s->exit_latency * multiplier > data->predicted_us) @@ -369,28 +329,6 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev) data->exit_us = s->exit_latency; } - /* not deepest C-state chosen for low predicted residency */ - if (low_predicted) { - unsigned int timer_us = 0; - - /* - * Set a timer to detect whether this sleep is much - * longer than repeat mode predicted. If the timer - * triggers, the code will evaluate whether to put - * the CPU into a deeper C-state. - * The timer is cancelled on CPU wakeup. - */ - timer_us = 2 * (data->predicted_us + MAX_DEVIATION); - - if (repeat && (4 * timer_us < data->expected_us)) { - RCU_NONIDLE(hrtimer_start(hrtmr, - ns_to_ktime(1000 * timer_us), - HRTIMER_MODE_REL_PINNED)); - /* In repeat case, menu hrtimer is started */ - per_cpu(hrtimer_status, cpu) = MENU_HRTIMER_REPEAT; - } - } - return data->last_state_idx; } @@ -481,9 +419,6 @@ static int menu_enable_device(struct cpuidle_driver *drv, struct cpuidle_device *dev) { struct menu_device *data = &per_cpu(menu_devices, dev->cpu); - struct hrtimer *t = &per_cpu(menu_hrtimer, dev->cpu); - hrtimer_init(t, CLOCK_MONOTONIC, HRTIMER_MODE_REL); - t->function = menu_hrtimer_notify; memset(data, 0, sizeof(struct menu_device)); diff --git a/include/linux/tick.h b/include/linux/tick.h index 9180f4b85e6d..62bd8b72873c 100644 --- a/include/linux/tick.h +++ b/include/linux/tick.h @@ -174,10 +174,4 @@ static inline void tick_nohz_task_switch(struct task_struct *tsk) { } #endif -# ifdef CONFIG_CPU_IDLE_GOV_MENU -extern void menu_hrtimer_cancel(void); -# else -static inline void menu_hrtimer_cancel(void) {} -# endif /* CONFIG_CPU_IDLE_GOV_MENU */ - #endif diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index 0cf1c1453181..4251374578bc 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -832,13 +832,10 @@ void tick_nohz_irq_exit(void) { struct tick_sched *ts = &__get_cpu_var(tick_cpu_sched); - if (ts->inidle) { - /* Cancel the timer because CPU already waken up from the C-states*/ - menu_hrtimer_cancel(); + if (ts->inidle) __tick_nohz_idle_enter(ts); - } else { + else tick_nohz_full_stop_tick(ts); - } } /** @@ -936,8 +933,6 @@ void tick_nohz_idle_exit(void) ts->inidle = 0; - /* Cancel the timer because CPU already waken up from the C-states*/ - menu_hrtimer_cancel(); if (ts->idle_active || ts->tick_stopped) now = ktime_get(); From 5966904487a9a0cd4b9b6ac924cbac44704eb908 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Fri, 19 Jul 2013 12:14:16 -0700 Subject: [PATCH 0444/1368] PCI: pciehp: Fix null pointer deref when hot-removing SR-IOV device commit 29ed1f29b68a8395d5679b3c4e38352b617b3236 upstream. Hot-removing a device with SR-IOV enabled causes a null pointer dereference in v3.9 and v3.10. This is a regression caused by ba518e3c17 ("PCI: pciehp: Iterate over all devices in slot, not functions 0-7"). When we iterate over the bus->devices list, we first remove the PF, which also removes all the VFs from the list. Then the list iterator blows up because more than just the current entry was removed from the list. ac205b7bb7 ("PCI: make sriov work with hotplug remove") works around a similar problem in pci_stop_bus_devices() by iterating over the list in reverse, so the VFs are stopped and removed from the list first, before the PF. This patch changes pciehp_unconfigure_device() to iterate over the list in reverse, too. [bhelgaas: bugzilla, changelog] Reference: https://bugzilla.kernel.org/show_bug.cgi?id=60604 Signed-off-by: Yinghai Lu Signed-off-by: Bjorn Helgaas Acked-by: Yijing Wang Signed-off-by: Greg Kroah-Hartman --- drivers/pci/hotplug/pciehp_pci.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/pci/hotplug/pciehp_pci.c b/drivers/pci/hotplug/pciehp_pci.c index aac7a40e4a4a..0e0d0f7f63fd 100644 --- a/drivers/pci/hotplug/pciehp_pci.c +++ b/drivers/pci/hotplug/pciehp_pci.c @@ -92,7 +92,14 @@ int pciehp_unconfigure_device(struct slot *p_slot) if (ret) presence = 0; - list_for_each_entry_safe(dev, temp, &parent->devices, bus_list) { + /* + * Stopping an SR-IOV PF device removes all the associated VFs, + * which will update the bus->devices list and confuse the + * iterator. Therefore, iterate in reverse so we remove the VFs + * first, then the PF. We do the same in pci_stop_bus_device(). + */ + list_for_each_entry_safe_reverse(dev, temp, &parent->devices, + bus_list) { pci_dev_get(dev); if (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE && presence) { pci_read_config_byte(dev, PCI_BRIDGE_CONTROL, &bctl); From 5f84f7159089e80f66733f8a196bdff2e0bc4f72 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Thu, 25 Jul 2013 06:31:38 -0700 Subject: [PATCH 0445/1368] PCI: Retry allocation of only the resource type that failed commit aa914f5ec25e4371ba18b312971314be1b9b1076 upstream. Ben Herrenschmidt reported the following problem: - The bus has space for all desired MMIO resources, including optional space for SR-IOV devices - We attempt to allocate I/O port space, but it fails because the bus has no I/O space - Because of the I/O allocation failure, we retry MMIO allocation, requesting only the required space, without the optional SR-IOV space This means we don't allocate the optional SR-IOV space, even though we could. This is related to 0c5be0cb0e ("PCI: Retry on IORESOURCE_IO type allocations"). This patch changes how we handle allocation failures. We will now retry allocation of only the resource type that failed. If MMIO allocation fails, we'll retry only MMIO allocation. If I/O port allocation fails, we'll retry only I/O port allocation. [bhelgaas: changelog] Reference: https://lkml.kernel.org/r/1367712653.11982.19.camel@pasglop Reported-by: Benjamin Herrenschmidt Tested-by: Gavin Shan Signed-off-by: Yinghai Lu Signed-off-by: Bjorn Helgaas Signed-off-by: Greg Kroah-Hartman --- drivers/pci/setup-bus.c | 69 ++++++++++++++++++++++++++++++++++++++++- 1 file changed, 68 insertions(+), 1 deletion(-) diff --git a/drivers/pci/setup-bus.c b/drivers/pci/setup-bus.c index d254e2379533..64a7de22d9af 100644 --- a/drivers/pci/setup-bus.c +++ b/drivers/pci/setup-bus.c @@ -300,6 +300,47 @@ static void assign_requested_resources_sorted(struct list_head *head, } } +static unsigned long pci_fail_res_type_mask(struct list_head *fail_head) +{ + struct pci_dev_resource *fail_res; + unsigned long mask = 0; + + /* check failed type */ + list_for_each_entry(fail_res, fail_head, list) + mask |= fail_res->flags; + + /* + * one pref failed resource will set IORESOURCE_MEM, + * as we can allocate pref in non-pref range. + * Will release all assigned non-pref sibling resources + * according to that bit. + */ + return mask & (IORESOURCE_IO | IORESOURCE_MEM | IORESOURCE_PREFETCH); +} + +static bool pci_need_to_release(unsigned long mask, struct resource *res) +{ + if (res->flags & IORESOURCE_IO) + return !!(mask & IORESOURCE_IO); + + /* check pref at first */ + if (res->flags & IORESOURCE_PREFETCH) { + if (mask & IORESOURCE_PREFETCH) + return true; + /* count pref if its parent is non-pref */ + else if ((mask & IORESOURCE_MEM) && + !(res->parent->flags & IORESOURCE_PREFETCH)) + return true; + else + return false; + } + + if (res->flags & IORESOURCE_MEM) + return !!(mask & IORESOURCE_MEM); + + return false; /* should not get here */ +} + static void __assign_resources_sorted(struct list_head *head, struct list_head *realloc_head, struct list_head *fail_head) @@ -312,11 +353,24 @@ static void __assign_resources_sorted(struct list_head *head, * if could do that, could get out early. * if could not do that, we still try to assign requested at first, * then try to reassign add_size for some resources. + * + * Separate three resource type checking if we need to release + * assigned resource after requested + add_size try. + * 1. if there is io port assign fail, will release assigned + * io port. + * 2. if there is pref mmio assign fail, release assigned + * pref mmio. + * if assigned pref mmio's parent is non-pref mmio and there + * is non-pref mmio assign fail, will release that assigned + * pref mmio. + * 3. if there is non-pref mmio assign fail or pref mmio + * assigned fail, will release assigned non-pref mmio. */ LIST_HEAD(save_head); LIST_HEAD(local_fail_head); struct pci_dev_resource *save_res; - struct pci_dev_resource *dev_res; + struct pci_dev_resource *dev_res, *tmp_res; + unsigned long fail_type; /* Check if optional add_size is there */ if (!realloc_head || list_empty(realloc_head)) @@ -348,6 +402,19 @@ static void __assign_resources_sorted(struct list_head *head, return; } + /* check failed type */ + fail_type = pci_fail_res_type_mask(&local_fail_head); + /* remove not need to be released assigned res from head list etc */ + list_for_each_entry_safe(dev_res, tmp_res, head, list) + if (dev_res->res->parent && + !pci_need_to_release(fail_type, dev_res->res)) { + /* remove it from realloc_head list */ + remove_from_list(realloc_head, dev_res->res); + remove_from_list(&save_head, dev_res->res); + list_del(&dev_res->list); + kfree(dev_res); + } + free_list(&local_fail_head); /* Release assigned resource */ list_for_each_entry(dev_res, head, list) From d56b0022c1cc6c63ab85f3bebcd8bf3dfe4be5ca Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Thu, 11 Jul 2013 14:20:11 -0400 Subject: [PATCH 0446/1368] drm/radeon: Disable dma rings for bo moves on r6xx MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit aeea40cbf9388fc829e66fa049f64d97fd72e118 upstream. They still seem to cause instability on some r6xx parts. As a follow up, we can switch to using CP DMA for bo moves on r6xx as a lighter weight alternative to using the 3D engine. A version of this patch should also go to stable kernels. Tested-by: J.N. Reviewed-by: Christian König Signed-off-by: Alex Deucher Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/radeon/radeon_asic.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/radeon/radeon_asic.c b/drivers/gpu/drm/radeon/radeon_asic.c index a2802b47ee95..de36c4722423 100644 --- a/drivers/gpu/drm/radeon/radeon_asic.c +++ b/drivers/gpu/drm/radeon/radeon_asic.c @@ -986,8 +986,8 @@ static struct radeon_asic r600_asic = { .blit_ring_index = RADEON_RING_TYPE_GFX_INDEX, .dma = &r600_copy_dma, .dma_ring_index = R600_RING_TYPE_DMA_INDEX, - .copy = &r600_copy_dma, - .copy_ring_index = R600_RING_TYPE_DMA_INDEX, + .copy = &r600_copy_blit, + .copy_ring_index = RADEON_RING_TYPE_GFX_INDEX, }, .surface = { .set_reg = r600_set_surface_reg, @@ -1074,8 +1074,8 @@ static struct radeon_asic rs780_asic = { .blit_ring_index = RADEON_RING_TYPE_GFX_INDEX, .dma = &r600_copy_dma, .dma_ring_index = R600_RING_TYPE_DMA_INDEX, - .copy = &r600_copy_dma, - .copy_ring_index = R600_RING_TYPE_DMA_INDEX, + .copy = &r600_copy_blit, + .copy_ring_index = RADEON_RING_TYPE_GFX_INDEX, }, .surface = { .set_reg = r600_set_surface_reg, From 7b2b160da7661bb2ade3f924b1bd3e3084e53341 Mon Sep 17 00:00:00 2001 From: Roger Pau Monne Date: Thu, 2 May 2013 10:58:50 +0200 Subject: [PATCH 0447/1368] xen-blkfront: use a different scatterlist for each request MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit b7649158a0d241f8d53d13ff7441858539e16656 upstream. In blkif_queue_request blkfront iterates over the scatterlist in order to set the segments of the request, and in blkif_completion blkfront iterates over the raw request, which makes it hard to know the exact position of the source and destination memory positions. This can be solved by allocating a scatterlist for each request, that will be keep until the request is finished, allowing us to copy the data back to the original memory without having to iterate over the raw request. Oracle-Bug: 16660413 - LARGE ASYNCHRONOUS READS APPEAR BROKEN ON 2.6.39-400 CC: stable@vger.kernel.org Signed-off-by: Roger Pau Monné Reported-and-Tested-by: Anne Milicia Signed-off-by: Konrad Rzeszutek Wilk Signed-off-by: Greg Kroah-Hartman --- drivers/block/xen-blkfront.c | 36 +++++++++++++++++------------------- 1 file changed, 17 insertions(+), 19 deletions(-) diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c index d89ef86220f4..69b45fc97276 100644 --- a/drivers/block/xen-blkfront.c +++ b/drivers/block/xen-blkfront.c @@ -75,6 +75,7 @@ struct blk_shadow { struct blkif_request req; struct request *request; struct grant *grants_used[BLKIF_MAX_SEGMENTS_PER_REQUEST]; + struct scatterlist sg[BLKIF_MAX_SEGMENTS_PER_REQUEST]; }; static DEFINE_MUTEX(blkfront_mutex); @@ -98,7 +99,6 @@ struct blkfront_info enum blkif_state connected; int ring_ref; struct blkif_front_ring ring; - struct scatterlist sg[BLKIF_MAX_SEGMENTS_PER_REQUEST]; unsigned int evtchn, irq; struct request_queue *rq; struct work_struct work; @@ -422,11 +422,11 @@ static int blkif_queue_request(struct request *req) ring_req->u.discard.flag = 0; } else { ring_req->u.rw.nr_segments = blk_rq_map_sg(req->q, req, - info->sg); + info->shadow[id].sg); BUG_ON(ring_req->u.rw.nr_segments > BLKIF_MAX_SEGMENTS_PER_REQUEST); - for_each_sg(info->sg, sg, ring_req->u.rw.nr_segments, i) { + for_each_sg(info->shadow[id].sg, sg, ring_req->u.rw.nr_segments, i) { fsect = sg->offset >> 9; lsect = fsect + (sg->length >> 9) - 1; @@ -867,12 +867,12 @@ static void blkif_completion(struct blk_shadow *s, struct blkfront_info *info, struct blkif_response *bret) { int i = 0; - struct bio_vec *bvec; - struct req_iterator iter; - unsigned long flags; + struct scatterlist *sg; char *bvec_data; void *shared_data; - unsigned int offset = 0; + int nseg; + + nseg = s->req.u.rw.nr_segments; if (bret->operation == BLKIF_OP_READ) { /* @@ -881,19 +881,16 @@ static void blkif_completion(struct blk_shadow *s, struct blkfront_info *info, * than PAGE_SIZE, we have to keep track of the current offset, * to be sure we are copying the data from the right shared page. */ - rq_for_each_segment(bvec, s->request, iter) { - BUG_ON((bvec->bv_offset + bvec->bv_len) > PAGE_SIZE); - if (bvec->bv_offset < offset) - i++; - BUG_ON(i >= s->req.u.rw.nr_segments); + for_each_sg(s->sg, sg, nseg, i) { + BUG_ON(sg->offset + sg->length > PAGE_SIZE); shared_data = kmap_atomic( pfn_to_page(s->grants_used[i]->pfn)); - bvec_data = bvec_kmap_irq(bvec, &flags); - memcpy(bvec_data, shared_data + bvec->bv_offset, - bvec->bv_len); - bvec_kunmap_irq(bvec_data, &flags); + bvec_data = kmap_atomic(sg_page(sg)); + memcpy(bvec_data + sg->offset, + shared_data + sg->offset, + sg->length); + kunmap_atomic(bvec_data); kunmap_atomic(shared_data); - offset = bvec->bv_offset + bvec->bv_len; } } /* Add the persistent grant into the list of free grants */ @@ -1022,7 +1019,7 @@ static int setup_blkring(struct xenbus_device *dev, struct blkfront_info *info) { struct blkif_sring *sring; - int err; + int err, i; info->ring_ref = GRANT_INVALID_REF; @@ -1034,7 +1031,8 @@ static int setup_blkring(struct xenbus_device *dev, SHARED_RING_INIT(sring); FRONT_RING_INIT(&info->ring, sring, PAGE_SIZE); - sg_init_table(info->sg, BLKIF_MAX_SEGMENTS_PER_REQUEST); + for (i = 0; i < BLK_RING_SIZE; i++) + sg_init_table(info->shadow[i].sg, BLKIF_MAX_SEGMENTS_PER_REQUEST); /* Allocate memory for grants */ err = fill_grant_buffer(info, BLK_RING_SIZE * From 6b9f3486bb756d0aa6c00d7f745c8ca65ff35f07 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?St=C3=A9phane=20Marchesin?= Date: Fri, 12 Jul 2013 13:54:41 -0700 Subject: [PATCH 0448/1368] drm/i915: Preserve the DDI_A_4_LANES bit from the bios MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit bcf53de4e60d9000b82f541d654529e2902a4c2c upstream. Otherwise the DDI_A_4_LANES bit gets lost and we can't use > 2 lanes on eDP. This fixes eDP on hsw with > 2 lanes. Also s/port_reversal/saved_port_bits/ since the current name is confusing. Signed-off-by: Stéphane Marchesin Reviewed-by: Paulo Zanoni Signed-off-by: Daniel Vetter Cc: Zhouping Liu Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/i915/intel_ddi.c | 10 ++++++---- drivers/gpu/drm/i915/intel_drv.h | 2 +- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_ddi.c b/drivers/gpu/drm/i915/intel_ddi.c index fb961bb81903..16e674af4d57 100644 --- a/drivers/gpu/drm/i915/intel_ddi.c +++ b/drivers/gpu/drm/i915/intel_ddi.c @@ -684,7 +684,7 @@ static void intel_ddi_mode_set(struct drm_encoder *encoder, struct intel_digital_port *intel_dig_port = enc_to_dig_port(encoder); - intel_dp->DP = intel_dig_port->port_reversal | + intel_dp->DP = intel_dig_port->saved_port_bits | DDI_BUF_CTL_ENABLE | DDI_BUF_EMP_400MV_0DB_HSW; switch (intel_dp->lane_count) { case 1: @@ -1324,7 +1324,8 @@ static void intel_enable_ddi(struct intel_encoder *intel_encoder) * enabling the port. */ I915_WRITE(DDI_BUF_CTL(port), - intel_dig_port->port_reversal | DDI_BUF_CTL_ENABLE); + intel_dig_port->saved_port_bits | + DDI_BUF_CTL_ENABLE); } else if (type == INTEL_OUTPUT_EDP) { struct intel_dp *intel_dp = enc_to_intel_dp(encoder); @@ -1543,8 +1544,9 @@ void intel_ddi_init(struct drm_device *dev, enum port port) intel_encoder->get_hw_state = intel_ddi_get_hw_state; intel_dig_port->port = port; - intel_dig_port->port_reversal = I915_READ(DDI_BUF_CTL(port)) & - DDI_BUF_PORT_REVERSAL; + intel_dig_port->saved_port_bits = I915_READ(DDI_BUF_CTL(port)) & + (DDI_BUF_PORT_REVERSAL | + DDI_A_4_LANES); if (hdmi_connector) intel_dig_port->hdmi.hdmi_reg = DDI_BUF_CTL(port); intel_dig_port->dp.output_reg = DDI_BUF_CTL(port); diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index 624a9e6b8d71..7cd55843e73e 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -426,7 +426,7 @@ struct intel_dp { struct intel_digital_port { struct intel_encoder base; enum port port; - u32 port_reversal; + u32 saved_port_bits; struct intel_dp dp; struct intel_hdmi hdmi; }; From 0297fe5a50da955224e6e26c320383760edce01b Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Mon, 8 Jul 2013 15:59:40 -0700 Subject: [PATCH 0449/1368] fanotify: info leak in copy_event_to_user() commit de1e0c40aceb9d5bff09c3a3b97b2f1b178af53f upstream. The ->reserved field isn't cleared so we leak one byte of stack information to userspace. Signed-off-by: Dan Carpenter Cc: Eric Paris Cc: Al Viro Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Cc: Luis Henriques Signed-off-by: Greg Kroah-Hartman --- fs/notify/fanotify/fanotify_user.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c index 6c80083a984f..77cc85dd0db0 100644 --- a/fs/notify/fanotify/fanotify_user.c +++ b/fs/notify/fanotify/fanotify_user.c @@ -122,6 +122,7 @@ static int fill_event_metadata(struct fsnotify_group *group, metadata->event_len = FAN_EVENT_METADATA_LEN; metadata->metadata_len = FAN_EVENT_METADATA_LEN; metadata->vers = FANOTIFY_METADATA_VERSION; + metadata->reserved = 0; metadata->mask = event->mask & FAN_ALL_OUTGOING_EVENTS; metadata->pid = pid_vnr(event->tgid); if (unlikely(event->mask & FAN_Q_OVERFLOW)) From a09a1b87004a8d574b41e56469005d70fdd2c201 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Tue, 18 Jun 2013 18:40:19 +0800 Subject: [PATCH 0450/1368] cgroup: fix umount vs cgroup_cfts_commit() race commit 084457f284abf6789d90509ee11dae383842b23b upstream. cgroup_cfts_commit() uses dget() to keep cgroup alive after cgroup_mutex is dropped, but dget() won't prevent cgroupfs from being umounted. When the race happens, vfs will see some dentries with non-zero refcnt while umount is in process. Keep running this: mount -t cgroup -o blkio xxx /cgroup umount /cgroup And this: modprobe cfq-iosched rmmod cfs-iosched After a while, the BUG() in shrink_dcache_for_umount_subtree() may be triggered: BUG: Dentry xxx{i=0,n=blkio.yyy} still in use (1) [umount of cgroup cgroup] Signed-off-by: Li Zefan Signed-off-by: Tejun Heo Signed-off-by: Greg Kroah-Hartman --- kernel/cgroup.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/kernel/cgroup.c b/kernel/cgroup.c index c6e77ef2a0a6..2e9b387971d1 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -2769,13 +2769,17 @@ static void cgroup_cfts_commit(struct cgroup_subsys *ss, { LIST_HEAD(pending); struct cgroup *cgrp, *n; + struct super_block *sb = ss->root->sb; /* %NULL @cfts indicates abort and don't bother if @ss isn't attached */ - if (cfts && ss->root != &rootnode) { + if (cfts && ss->root != &rootnode && + atomic_inc_not_zero(&sb->s_active)) { list_for_each_entry(cgrp, &ss->root->allcg_list, allcg_node) { dget(cgrp->dentry); list_add_tail(&cgrp->cft_q_node, &pending); } + } else { + sb = NULL; } mutex_unlock(&cgroup_mutex); @@ -2798,6 +2802,9 @@ static void cgroup_cfts_commit(struct cgroup_subsys *ss, dput(cgrp->dentry); } + if (sb) + deactivate_super(sb); + mutex_unlock(&cgroup_cft_mutex); } From 8ecbcde519f31f5ac76cfe34c99369da1c8a1d2c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Fri, 12 Jul 2013 10:18:09 -0400 Subject: [PATCH 0451/1368] drm/radeon: never unpin UVD bo v3 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 9cc2e0e9f13315559c85c9f99f141e420967c955 upstream. Changing the UVD BOs offset on suspend/resume doesn't work because the VCPU internally keeps pointers to it. Just keep it always pinned and save the content manually. Fixes: https://bugs.freedesktop.org/show_bug.cgi?id=66425 v2: fix compiler warning v3: fix CIK support v4: rebased for 3.10-stable tree Note: a version of this patch needs to go to stable. Signed-off-by: Christian König Signed-off-by: Alex Deucher Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/radeon/radeon.h | 3 +- drivers/gpu/drm/radeon/radeon_fence.c | 2 +- drivers/gpu/drm/radeon/radeon_uvd.c | 124 +++++++++++++------------- drivers/gpu/drm/radeon/rv770.c | 2 +- 4 files changed, 64 insertions(+), 67 deletions(-) diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h index f4dcfdd5a28b..aad18e676826 100644 --- a/drivers/gpu/drm/radeon/radeon.h +++ b/drivers/gpu/drm/radeon/radeon.h @@ -1145,6 +1145,8 @@ struct radeon_uvd { struct radeon_bo *vcpu_bo; void *cpu_addr; uint64_t gpu_addr; + void *saved_bo; + unsigned fw_size; atomic_t handles[RADEON_MAX_UVD_HANDLES]; struct drm_file *filp[RADEON_MAX_UVD_HANDLES]; struct delayed_work idle_work; @@ -1684,7 +1686,6 @@ struct radeon_device { const struct firmware *rlc_fw; /* r6/700 RLC firmware */ const struct firmware *mc_fw; /* NI MC firmware */ const struct firmware *ce_fw; /* SI CE firmware */ - const struct firmware *uvd_fw; /* UVD firmware */ struct r600_blit r600_blit; struct r600_vram_scratch vram_scratch; int msi_enabled; /* msi enabled */ diff --git a/drivers/gpu/drm/radeon/radeon_fence.c b/drivers/gpu/drm/radeon/radeon_fence.c index ddb8f8e04eb5..7ddb0efe2408 100644 --- a/drivers/gpu/drm/radeon/radeon_fence.c +++ b/drivers/gpu/drm/radeon/radeon_fence.c @@ -782,7 +782,7 @@ int radeon_fence_driver_start_ring(struct radeon_device *rdev, int ring) } else { /* put fence directly behind firmware */ - index = ALIGN(rdev->uvd_fw->size, 8); + index = ALIGN(rdev->uvd.fw_size, 8); rdev->fence_drv[ring].cpu_addr = rdev->uvd.cpu_addr + index; rdev->fence_drv[ring].gpu_addr = rdev->uvd.gpu_addr + index; } diff --git a/drivers/gpu/drm/radeon/radeon_uvd.c b/drivers/gpu/drm/radeon/radeon_uvd.c index cad735dd02c6..1b3a91bf1a9d 100644 --- a/drivers/gpu/drm/radeon/radeon_uvd.c +++ b/drivers/gpu/drm/radeon/radeon_uvd.c @@ -55,6 +55,7 @@ static void radeon_uvd_idle_work_handler(struct work_struct *work); int radeon_uvd_init(struct radeon_device *rdev) { struct platform_device *pdev; + const struct firmware *fw; unsigned long bo_size; const char *fw_name; int i, r; @@ -104,7 +105,7 @@ int radeon_uvd_init(struct radeon_device *rdev) return -EINVAL; } - r = request_firmware(&rdev->uvd_fw, fw_name, &pdev->dev); + r = request_firmware(&fw, fw_name, &pdev->dev); if (r) { dev_err(rdev->dev, "radeon_uvd: Can't load firmware \"%s\"\n", fw_name); @@ -114,7 +115,7 @@ int radeon_uvd_init(struct radeon_device *rdev) platform_device_unregister(pdev); - bo_size = RADEON_GPU_PAGE_ALIGN(rdev->uvd_fw->size + 8) + + bo_size = RADEON_GPU_PAGE_ALIGN(fw->size + 8) + RADEON_UVD_STACK_SIZE + RADEON_UVD_HEAP_SIZE; r = radeon_bo_create(rdev, bo_size, PAGE_SIZE, true, RADEON_GEM_DOMAIN_VRAM, NULL, &rdev->uvd.vcpu_bo); @@ -123,64 +124,6 @@ int radeon_uvd_init(struct radeon_device *rdev) return r; } - r = radeon_uvd_resume(rdev); - if (r) - return r; - - memset(rdev->uvd.cpu_addr, 0, bo_size); - memcpy(rdev->uvd.cpu_addr, rdev->uvd_fw->data, rdev->uvd_fw->size); - - r = radeon_uvd_suspend(rdev); - if (r) - return r; - - for (i = 0; i < RADEON_MAX_UVD_HANDLES; ++i) { - atomic_set(&rdev->uvd.handles[i], 0); - rdev->uvd.filp[i] = NULL; - } - - return 0; -} - -void radeon_uvd_fini(struct radeon_device *rdev) -{ - radeon_uvd_suspend(rdev); - radeon_bo_unref(&rdev->uvd.vcpu_bo); -} - -int radeon_uvd_suspend(struct radeon_device *rdev) -{ - int r; - - if (rdev->uvd.vcpu_bo == NULL) - return 0; - - r = radeon_bo_reserve(rdev->uvd.vcpu_bo, false); - if (!r) { - radeon_bo_kunmap(rdev->uvd.vcpu_bo); - radeon_bo_unpin(rdev->uvd.vcpu_bo); - rdev->uvd.cpu_addr = NULL; - if (!radeon_bo_pin(rdev->uvd.vcpu_bo, RADEON_GEM_DOMAIN_CPU, NULL)) { - radeon_bo_kmap(rdev->uvd.vcpu_bo, &rdev->uvd.cpu_addr); - } - radeon_bo_unreserve(rdev->uvd.vcpu_bo); - - if (rdev->uvd.cpu_addr) { - radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_UVD_INDEX); - } else { - rdev->fence_drv[R600_RING_TYPE_UVD_INDEX].cpu_addr = NULL; - } - } - return r; -} - -int radeon_uvd_resume(struct radeon_device *rdev) -{ - int r; - - if (rdev->uvd.vcpu_bo == NULL) - return -EINVAL; - r = radeon_bo_reserve(rdev->uvd.vcpu_bo, false); if (r) { radeon_bo_unref(&rdev->uvd.vcpu_bo); @@ -188,10 +131,6 @@ int radeon_uvd_resume(struct radeon_device *rdev) return r; } - /* Have been pin in cpu unmap unpin */ - radeon_bo_kunmap(rdev->uvd.vcpu_bo); - radeon_bo_unpin(rdev->uvd.vcpu_bo); - r = radeon_bo_pin(rdev->uvd.vcpu_bo, RADEON_GEM_DOMAIN_VRAM, &rdev->uvd.gpu_addr); if (r) { @@ -209,6 +148,63 @@ int radeon_uvd_resume(struct radeon_device *rdev) radeon_bo_unreserve(rdev->uvd.vcpu_bo); + rdev->uvd.fw_size = fw->size; + memset(rdev->uvd.cpu_addr, 0, bo_size); + memcpy(rdev->uvd.cpu_addr, fw->data, fw->size); + + release_firmware(fw); + + for (i = 0; i < RADEON_MAX_UVD_HANDLES; ++i) { + atomic_set(&rdev->uvd.handles[i], 0); + rdev->uvd.filp[i] = NULL; + } + + return 0; +} + +void radeon_uvd_fini(struct radeon_device *rdev) +{ + int r; + + if (rdev->uvd.vcpu_bo == NULL) + return; + + r = radeon_bo_reserve(rdev->uvd.vcpu_bo, false); + if (!r) { + radeon_bo_kunmap(rdev->uvd.vcpu_bo); + radeon_bo_unpin(rdev->uvd.vcpu_bo); + radeon_bo_unreserve(rdev->uvd.vcpu_bo); + } + + radeon_bo_unref(&rdev->uvd.vcpu_bo); +} + +int radeon_uvd_suspend(struct radeon_device *rdev) +{ + unsigned size; + + if (rdev->uvd.vcpu_bo == NULL) + return 0; + + size = radeon_bo_size(rdev->uvd.vcpu_bo); + rdev->uvd.saved_bo = kmalloc(size, GFP_KERNEL); + memcpy(rdev->uvd.saved_bo, rdev->uvd.cpu_addr, size); + + return 0; +} + +int radeon_uvd_resume(struct radeon_device *rdev) +{ + if (rdev->uvd.vcpu_bo == NULL) + return -EINVAL; + + if (rdev->uvd.saved_bo != NULL) { + unsigned size = radeon_bo_size(rdev->uvd.vcpu_bo); + memcpy(rdev->uvd.cpu_addr, rdev->uvd.saved_bo, size); + kfree(rdev->uvd.saved_bo); + rdev->uvd.saved_bo = NULL; + } + return 0; } diff --git a/drivers/gpu/drm/radeon/rv770.c b/drivers/gpu/drm/radeon/rv770.c index 4a62ad2e5399..30ea14e8854c 100644 --- a/drivers/gpu/drm/radeon/rv770.c +++ b/drivers/gpu/drm/radeon/rv770.c @@ -813,7 +813,7 @@ int rv770_uvd_resume(struct radeon_device *rdev) /* programm the VCPU memory controller bits 0-27 */ addr = rdev->uvd.gpu_addr >> 3; - size = RADEON_GPU_PAGE_ALIGN(rdev->uvd_fw->size + 4) >> 3; + size = RADEON_GPU_PAGE_ALIGN(rdev->uvd.fw_size + 4) >> 3; WREG32(UVD_VCPU_CACHE_OFFSET0, addr); WREG32(UVD_VCPU_CACHE_SIZE0, size); From 62fda513d368032d8f6b0375004032d5c0c6060d Mon Sep 17 00:00:00 2001 From: "H.J. Lu" Date: Fri, 26 Jul 2013 09:11:56 -0700 Subject: [PATCH 0452/1368] x86, fpu: correct the asm constraints for fxsave, unbreak mxcsr.daz MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit eaa5a990191d204ba0f9d35dbe5505ec2cdd1460 upstream. GCC will optimize mxcsr_feature_mask_init in arch/x86/kernel/i387.c: memset(&fx_scratch, 0, sizeof(struct i387_fxsave_struct)); asm volatile("fxsave %0" : : "m" (fx_scratch)); mask = fx_scratch.mxcsr_mask; if (mask == 0) mask = 0x0000ffbf; to memset(&fx_scratch, 0, sizeof(struct i387_fxsave_struct)); asm volatile("fxsave %0" : : "m" (fx_scratch)); mask = 0x0000ffbf; since asm statement doesn’t say it will update fx_scratch. As the result, the DAZ bit will be cleared. This patch fixes it. This bug dates back to at least kernel 2.6.12. Signed-off-by: H. Peter Anvin Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/i387.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kernel/i387.c b/arch/x86/kernel/i387.c index cb339097b9ea..f7ea30dce238 100644 --- a/arch/x86/kernel/i387.c +++ b/arch/x86/kernel/i387.c @@ -116,7 +116,7 @@ static void __cpuinit mxcsr_feature_mask_init(void) if (cpu_has_fxsr) { memset(&fx_scratch, 0, sizeof(struct i387_fxsave_struct)); - asm volatile("fxsave %0" : : "m" (fx_scratch)); + asm volatile("fxsave %0" : "+m" (fx_scratch)); mask = fx_scratch.mxcsr_mask; if (mask == 0) mask = 0x0000ffbf; From 98ad9de66428f3dedb2081c7e0a505489d6feaf4 Mon Sep 17 00:00:00 2001 From: Liu Bo Date: Fri, 28 Jun 2013 12:37:45 +0800 Subject: [PATCH 0453/1368] Btrfs: fix crash regarding to ulist_add_merge commit 35f0399db6658f465b00893bdd13b992a0acfef0 upstream. Several users reported this crash of NULL pointer or general protection, the story is that we add a rbtree for speedup ulist iteration, and we use krealloc() to address ulist growth, and krealloc() use memcpy to copy old data to new memory area, so it's OK for an array as it doesn't use pointers while it's not OK for a rbtree as it uses pointers. So krealloc() will mess up our rbtree and it ends up with crash. Reviewed-by: Wang Shilong Signed-off-by: Liu Bo Signed-off-by: Josef Bacik Cc: BJ Quinn Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/ulist.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/fs/btrfs/ulist.c b/fs/btrfs/ulist.c index 7b417e20efe2..b0a523b2c60e 100644 --- a/fs/btrfs/ulist.c +++ b/fs/btrfs/ulist.c @@ -205,6 +205,10 @@ int ulist_add_merge(struct ulist *ulist, u64 val, u64 aux, u64 new_alloced = ulist->nodes_alloced + 128; struct ulist_node *new_nodes; void *old = NULL; + int i; + + for (i = 0; i < ulist->nnodes; i++) + rb_erase(&ulist->nodes[i].rb_node, &ulist->root); /* * if nodes_alloced == ULIST_SIZE no memory has been allocated @@ -224,6 +228,17 @@ int ulist_add_merge(struct ulist *ulist, u64 val, u64 aux, ulist->nodes = new_nodes; ulist->nodes_alloced = new_alloced; + + /* + * krealloc actually uses memcpy, which does not copy rb_node + * pointers, so we have to do it ourselves. Otherwise we may + * be bitten by crashes. + */ + for (i = 0; i < ulist->nnodes; i++) { + ret = ulist_rbtree_insert(ulist, &ulist->nodes[i]); + if (ret < 0) + return ret; + } } ulist->nodes[ulist->nnodes].val = val; ulist->nodes[ulist->nnodes].aux = aux; From 21dd5ff6d6bd6be287e3e627f623075085eabb08 Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Tue, 30 Apr 2013 14:01:45 +0200 Subject: [PATCH 0454/1368] drm/i915: make SDVO TV-out work for multifunction devices commit 09ede5414f0215461c933032630bf9c3a61a8ba3 upstream. We need to track this correctly. While at it shovel the boolean to track whether the sdvo is in tv mode or not into pipe_config. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=36997 Tested-by: Pierre Assal Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=63609 Tested-by: cancan,feng Reviewed-by: Jani Nikula Signed-off-by: Daniel Vetter Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/i915/intel_display.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index e1f4e6edaa46..eea5982657a8 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -4333,7 +4333,8 @@ static void vlv_update_pll(struct intel_crtc *crtc) static void i9xx_update_pll(struct intel_crtc *crtc, intel_clock_t *reduced_clock, - int num_connectors) + int num_connectors, + bool needs_tv_clock) { struct drm_device *dev = crtc->base.dev; struct drm_i915_private *dev_priv = dev->dev_private; @@ -4391,7 +4392,7 @@ static void i9xx_update_pll(struct intel_crtc *crtc, if (INTEL_INFO(dev)->gen >= 4) dpll |= (6 << PLL_LOAD_PULSE_PHASE_SHIFT); - if (is_sdvo && intel_pipe_has_type(&crtc->base, INTEL_OUTPUT_TVOUT)) + if (is_sdvo && needs_tv_clock) dpll |= PLL_REF_INPUT_TVCLKINBC; else if (intel_pipe_has_type(&crtc->base, INTEL_OUTPUT_TVOUT)) /* XXX: just matching BIOS for now */ @@ -4716,7 +4717,8 @@ static int i9xx_crtc_mode_set(struct drm_crtc *crtc, else i9xx_update_pll(intel_crtc, has_reduced_clock ? &reduced_clock : NULL, - num_connectors); + num_connectors, + is_sdvo && is_tv); /* Set up the display plane register */ dspcntr = DISPPLANE_GAMMA_ENABLE; From 72e2729ea923cbbbeda03e5fce14f50b0707f724 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Wed, 24 Jul 2013 10:35:33 +0200 Subject: [PATCH 0455/1368] s390: add support for IBM zBC12 machine commit 594712276e737961d30e11eae80d403b2b3815df upstream. Just add the new model number where appropiate. Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky Signed-off-by: Greg Kroah-Hartman --- arch/s390/Kconfig | 7 ++++--- arch/s390/kernel/setup.c | 1 + arch/s390/mm/init.c | 1 + arch/s390/oprofile/init.c | 2 +- 4 files changed, 7 insertions(+), 4 deletions(-) diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index da183c5a103c..97dcbea97a1c 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -227,11 +227,12 @@ config MARCH_Z196 not work on older machines. config MARCH_ZEC12 - bool "IBM zEC12" + bool "IBM zBC12 and zEC12" select HAVE_MARCH_ZEC12_FEATURES if 64BIT help - Select this to enable optimizations for IBM zEC12 (2827 series). The - kernel will be slightly faster but will not work on older machines. + Select this to enable optimizations for IBM zBC12 and zEC12 (2828 and + 2827 series). The kernel will be slightly faster but will not work on + older machines. endchoice diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index 0a49095104c9..8ad9413148bf 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -998,6 +998,7 @@ static void __init setup_hwcaps(void) strcpy(elf_platform, "z196"); break; case 0x2827: + case 0x2828: strcpy(elf_platform, "zEC12"); break; } diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c index 89ebae4008f2..eba15f18fd38 100644 --- a/arch/s390/mm/init.c +++ b/arch/s390/mm/init.c @@ -69,6 +69,7 @@ static void __init setup_zero_pages(void) order = 2; break; case 0x2827: /* zEC12 */ + case 0x2828: /* zEC12 */ default: order = 5; break; diff --git a/arch/s390/oprofile/init.c b/arch/s390/oprofile/init.c index ffeb17ce7f31..930783d2c99b 100644 --- a/arch/s390/oprofile/init.c +++ b/arch/s390/oprofile/init.c @@ -440,7 +440,7 @@ static int oprofile_hwsampler_init(struct oprofile_operations *ops) switch (id.machine) { case 0x2097: case 0x2098: ops->cpu_type = "s390/z10"; break; case 0x2817: case 0x2818: ops->cpu_type = "s390/z196"; break; - case 0x2827: ops->cpu_type = "s390/zEC12"; break; + case 0x2827: case 0x2828: ops->cpu_type = "s390/zEC12"; break; default: return -ENODEV; } } From fa2446744dd5a99a3938aad9faade9b8928aec48 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Thu, 25 Jul 2013 10:18:17 +0200 Subject: [PATCH 0456/1368] s390/bitops: fix find_next_bit_left commit 3b0040a47ad63f7147e9e7d2febb61a3b564bb90 upstream. The find_next_bit_left function is broken if used with an offset which is not a multiple of 64. The shift to mask the bits of a 64-bit word not to search is in the wrong direction, the result can be either a bit found smaller than the offset or failure to find a set bit. Signed-off-by: Martin Schwidefsky Signed-off-by: Greg Kroah-Hartman --- arch/s390/include/asm/bitops.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/s390/include/asm/bitops.h b/arch/s390/include/asm/bitops.h index 4d8604e311f3..7d4676758733 100644 --- a/arch/s390/include/asm/bitops.h +++ b/arch/s390/include/asm/bitops.h @@ -693,7 +693,7 @@ static inline int find_next_bit_left(const unsigned long *addr, size -= offset; p = addr + offset / BITS_PER_LONG; if (bit) { - set = __flo_word(0, *p & (~0UL << bit)); + set = __flo_word(0, *p & (~0UL >> bit)); if (set >= size) return size + offset; if (set < BITS_PER_LONG) From 73b8bd6de83c0ca182622f83d31a1b0a137281fe Mon Sep 17 00:00:00 2001 From: Shaohua Li Date: Thu, 1 Aug 2013 09:56:36 +0800 Subject: [PATCH 0457/1368] workqueue: copy workqueue_attrs with all fields commit 2865a8fb44cc32420407362cbda80c10fa09c6b2 upstream. $echo '0' > /sys/bus/workqueue/devices/xxx/numa $cat /sys/bus/workqueue/devices/xxx/numa I got 1. It should be 0, the reason is copy_workqueue_attrs() called in apply_workqueue_attrs() doesn't copy no_numa field. Fix it by making copy_workqueue_attrs() copy ->no_numa too. This would also make get_unbound_pool() set a pool's ->no_numa attribute according to the workqueue attributes used when the pool was created. While harmelss, as ->no_numa isn't a pool attribute, this is a bit confusing. Clear it explicitly. tj: Updated description and comments a bit. Signed-off-by: Shaohua Li Signed-off-by: Tejun Heo Signed-off-by: Greg Kroah-Hartman --- kernel/workqueue.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/kernel/workqueue.c b/kernel/workqueue.c index ee8e29a2320c..6f019219ddbd 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -3398,6 +3398,12 @@ static void copy_workqueue_attrs(struct workqueue_attrs *to, { to->nice = from->nice; cpumask_copy(to->cpumask, from->cpumask); + /* + * Unlike hash and equality test, this function doesn't ignore + * ->no_numa as it is used for both pool and wq attrs. Instead, + * get_unbound_pool() explicitly clears ->no_numa after copying. + */ + to->no_numa = from->no_numa; } /* hash value of the content of @attr */ @@ -3565,6 +3571,12 @@ static struct worker_pool *get_unbound_pool(const struct workqueue_attrs *attrs) lockdep_set_subclass(&pool->lock, 1); /* see put_pwq() */ copy_workqueue_attrs(pool->attrs, attrs); + /* + * no_numa isn't a worker_pool attribute, always clear it. See + * 'struct workqueue_attrs' comments for detail. + */ + pool->attrs->no_numa = false; + /* if cpumask is contained inside a NUMA node, we belong to that node */ if (wq_numa_enabled) { for_each_node(node) { From c98ebcb618645d40cfec14b0534ff32126c114ce Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Tue, 6 Aug 2013 19:38:55 +0200 Subject: [PATCH 0458/1368] userns: unshare_userns(&cred) should not populate cred on failure commit 6160968cee8b90a5dd95318d716e31d7775c4ef3 upstream. unshare_userns(new_cred) does *new_cred = prepare_creds() before create_user_ns() which can fail. However, the caller expects that it doesn't need to take care of new_cred if unshare_userns() fails. We could change the single caller, sys_unshare(), but I think it would be more clean to avoid the side effects on failure, so with this patch unshare_userns() does put_cred() itself and initializes *new_cred only if create_user_ns() succeeeds. Signed-off-by: Oleg Nesterov Reviewed-by: Andy Lutomirski Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- kernel/user_namespace.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c index d8c30db06c5b..6e50a44610ee 100644 --- a/kernel/user_namespace.c +++ b/kernel/user_namespace.c @@ -105,16 +105,21 @@ int create_user_ns(struct cred *new) int unshare_userns(unsigned long unshare_flags, struct cred **new_cred) { struct cred *cred; + int err = -ENOMEM; if (!(unshare_flags & CLONE_NEWUSER)) return 0; cred = prepare_creds(); - if (!cred) - return -ENOMEM; + if (cred) { + err = create_user_ns(cred); + if (err) + put_cred(cred); + else + *new_cred = cred; + } - *new_cred = cred; - return create_user_ns(cred); + return err; } void free_user_ns(struct user_namespace *ns) From 5c5f9cd2b539879c4291fa24773795314e777354 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Thu, 8 Aug 2013 18:55:32 +0200 Subject: [PATCH 0459/1368] userns: limit the maximum depth of user_namespace->parent chain commit 8742f229b635bf1c1c84a3dfe5e47c814c20b5c8 upstream. Ensure that user_namespace->parent chain can't grow too much. Currently we use the hardroded 32 as limit. Reported-by: Andy Lutomirski Signed-off-by: Oleg Nesterov Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- include/linux/user_namespace.h | 1 + kernel/user_namespace.c | 4 ++++ 2 files changed, 5 insertions(+) diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h index b6b215f13b45..14105c26a836 100644 --- a/include/linux/user_namespace.h +++ b/include/linux/user_namespace.h @@ -23,6 +23,7 @@ struct user_namespace { struct uid_gid_map projid_map; atomic_t count; struct user_namespace *parent; + int level; kuid_t owner; kgid_t group; unsigned int proc_inum; diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c index 6e50a44610ee..9064b919a406 100644 --- a/kernel/user_namespace.c +++ b/kernel/user_namespace.c @@ -62,6 +62,9 @@ int create_user_ns(struct cred *new) kgid_t group = new->egid; int ret; + if (parent_ns->level > 32) + return -EUSERS; + /* * Verify that we can not violate the policy of which files * may be accessed that is specified by the root directory, @@ -92,6 +95,7 @@ int create_user_ns(struct cred *new) atomic_set(&ns->count, 1); /* Leave the new->user_ns reference with the new user namespace. */ ns->parent = parent_ns; + ns->level = parent_ns->level + 1; ns->owner = owner; ns->group = group; From 8d60f48be745bdaf0630b87e5720b2a9bbc16f33 Mon Sep 17 00:00:00 2001 From: Neil Horman Date: Wed, 17 Jul 2013 07:13:59 -0400 Subject: [PATCH 0460/1368] x86/iommu/vt-d: Expand interrupt remapping quirk to cover x58 chipset commit 803075dba31c17af110e1d9a915fe7262165b213 upstream. Recently we added an early quirk to detect 5500/5520 chipsets with early revisions that had problems with irq draining with interrupt remapping enabled: commit 03bbcb2e7e292838bb0244f5a7816d194c911d62 Author: Neil Horman Date: Tue Apr 16 16:38:32 2013 -0400 iommu/vt-d: add quirk for broken interrupt remapping on 55XX chipsets It turns out this same problem is present in the intel X58 chipset as well. See errata 69 here: http://www.intel.com/content/www/us/en/chipsets/x58-express-specification-update.html This patch extends the pci early quirk so that the chip devices/revisions specified in the above update are also covered in the same way: Signed-off-by: Neil Horman Reviewed-by: Jan Beulich Acked-by: Donald Dutile Cc: Joerg Roedel Cc: Andrew Cooper Cc: Malcolm Crossley Cc: Prarit Bhargava Cc: Don Zickus Link: http://lkml.kernel.org/r/1374059639-8631-1-git-send-email-nhorman@tuxdriver.com [ Small edits. ] Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/early-quirks.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/arch/x86/kernel/early-quirks.c b/arch/x86/kernel/early-quirks.c index 94ab6b90dd3f..63bdb29b2549 100644 --- a/arch/x86/kernel/early-quirks.c +++ b/arch/x86/kernel/early-quirks.c @@ -196,15 +196,23 @@ static void __init ati_bugs_contd(int num, int slot, int func) static void __init intel_remapping_check(int num, int slot, int func) { u8 revision; + u16 device; + device = read_pci_config_16(num, slot, func, PCI_DEVICE_ID); revision = read_pci_config_byte(num, slot, func, PCI_REVISION_ID); /* - * Revision 0x13 of this chipset supports irq remapping - * but has an erratum that breaks its behavior, flag it as such + * Revision 13 of all triggering devices id in this quirk have + * a problem draining interrupts when irq remapping is enabled, + * and should be flagged as broken. Additionally revisions 0x12 + * and 0x22 of device id 0x3405 has this problem. */ if (revision == 0x13) set_irq_remapping_broken(); + else if ((device == 0x3405) && + ((revision == 0x12) || + (revision == 0x22))) + set_irq_remapping_broken(); } @@ -239,6 +247,8 @@ static struct chipset early_qrk[] __initdata = { PCI_CLASS_SERIAL_SMBUS, PCI_ANY_ID, 0, ati_bugs_contd }, { PCI_VENDOR_ID_INTEL, 0x3403, PCI_CLASS_BRIDGE_HOST, PCI_BASE_CLASS_BRIDGE, 0, intel_remapping_check }, + { PCI_VENDOR_ID_INTEL, 0x3405, PCI_CLASS_BRIDGE_HOST, + PCI_BASE_CLASS_BRIDGE, 0, intel_remapping_check }, { PCI_VENDOR_ID_INTEL, 0x3406, PCI_CLASS_BRIDGE_HOST, PCI_BASE_CLASS_BRIDGE, 0, intel_remapping_check }, {} From f2281d42f6edb5934406cc1d32fa982e05b2ed41 Mon Sep 17 00:00:00 2001 From: Christian Eggers Date: Mon, 29 Jul 2013 20:54:09 +0200 Subject: [PATCH 0461/1368] spi: spi-davinci: Fix direction in dma_map_single() commit 89c66ee890af18500fa4598db300cc07c267f900 upstream. Commit 048177ce3b3962852fd34a7e04938959271c7e70 (spi: spi-davinci: convert to DMA engine API) introduced a regression: dma_map_single() is called with direction DMA_FROM_DEVICE for rx and for tx. Signed-off-by: Christian Eggers Acked-by: Matt Porter Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- drivers/spi/spi-davinci.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/spi/spi-davinci.c b/drivers/spi/spi-davinci.c index 50b13c9b1ab6..df0aacc6fc3b 100644 --- a/drivers/spi/spi-davinci.c +++ b/drivers/spi/spi-davinci.c @@ -610,7 +610,7 @@ static int davinci_spi_bufs(struct spi_device *spi, struct spi_transfer *t) else buf = (void *)t->tx_buf; t->tx_dma = dma_map_single(&spi->dev, buf, - t->len, DMA_FROM_DEVICE); + t->len, DMA_TO_DEVICE); if (!t->tx_dma) { ret = -EFAULT; goto err_tx_map; From 773c8fef67aa54c6533b26d5a162a1a3531e1d9f Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 19 Jul 2013 08:48:05 +0300 Subject: [PATCH 0462/1368] arcnet: cleanup sizeof parameter [ Upstream commit 087d273caf4f7d3f2159256f255f1f432bc84a5b ] This patch doesn't change the compiled code because ARC_HDR_SIZE is 4 and sizeof(int) is 4, but the intent was to use the header size and not the sizeof the header size. Signed-off-by: Dan Carpenter Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/arcnet/arcnet.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/arcnet/arcnet.c b/drivers/net/arcnet/arcnet.c index a746ba272f04..a956053608f9 100644 --- a/drivers/net/arcnet/arcnet.c +++ b/drivers/net/arcnet/arcnet.c @@ -1007,7 +1007,7 @@ static void arcnet_rx(struct net_device *dev, int bufnum) soft = &pkt.soft.rfc1201; - lp->hw.copy_from_card(dev, bufnum, 0, &pkt, sizeof(ARC_HDR_SIZE)); + lp->hw.copy_from_card(dev, bufnum, 0, &pkt, ARC_HDR_SIZE); if (pkt.hard.offset[0]) { ofs = pkt.hard.offset[0]; length = 256 - ofs; From c41d4725934c8a747c8157252ea53d76a0fdaece Mon Sep 17 00:00:00 2001 From: Michal Tesar Date: Fri, 19 Jul 2013 14:09:01 +0200 Subject: [PATCH 0463/1368] sysctl net: Keep tcp_syn_retries inside the boundary [ Upstream commit 651e92716aaae60fc41b9652f54cb6803896e0da ] Limit the min/max value passed to the /proc/sys/net/ipv4/tcp_syn_retries. Signed-off-by: Michal Tesar Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/sysctl_net_ipv4.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index fa2f63fc453b..3f25e75ae692 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -36,6 +36,8 @@ static int tcp_adv_win_scale_min = -31; static int tcp_adv_win_scale_max = 31; static int ip_ttl_min = 1; static int ip_ttl_max = 255; +static int tcp_syn_retries_min = 1; +static int tcp_syn_retries_max = MAX_TCP_SYNCNT; static int ip_ping_group_range_min[] = { 0, 0 }; static int ip_ping_group_range_max[] = { GID_T_MAX, GID_T_MAX }; @@ -331,7 +333,9 @@ static struct ctl_table ipv4_table[] = { .data = &sysctl_tcp_syn_retries, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = proc_dointvec + .proc_handler = proc_dointvec_minmax, + .extra1 = &tcp_syn_retries_min, + .extra2 = &tcp_syn_retries_max }, { .procname = "tcp_synack_retries", From 9ff132a60d24baf89dd1af156a4659287ad0c531 Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Tue, 23 Jul 2013 00:17:25 +0100 Subject: [PATCH 0464/1368] sfc: Enable RX scatter for flows steered by RFS [ Upstream commit 7aa0076c497d6f0d5d957b431d0d80e1e9780274 ] Received packets are only scattered if this is enabled in both the matching filter and the receiving queue. This was not being done for filters inserted for RFS, so any packet requiring more than a single descriptor was dropped. Signed-off-by: Ben Hutchings Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/sfc/filter.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/sfc/filter.c b/drivers/net/ethernet/sfc/filter.c index 2397f0e8d3eb..2738b5f1c81f 100644 --- a/drivers/net/ethernet/sfc/filter.c +++ b/drivers/net/ethernet/sfc/filter.c @@ -1196,7 +1196,9 @@ int efx_filter_rfs(struct net_device *net_dev, const struct sk_buff *skb, EFX_BUG_ON_PARANOID(skb_headlen(skb) < nhoff + 4 * ip->ihl + 4); ports = (const __be16 *)(skb->data + nhoff + 4 * ip->ihl); - efx_filter_init_rx(&spec, EFX_FILTER_PRI_HINT, 0, rxq_index); + efx_filter_init_rx(&spec, EFX_FILTER_PRI_HINT, + efx->rx_scatter ? EFX_FILTER_FLAG_RX_SCATTER : 0, + rxq_index); rc = efx_filter_set_ipv4_full(&spec, ip->protocol, ip->daddr, ports[1], ip->saddr, ports[0]); if (rc) From 69af51cb5c56f310d9449150101bf25bd0d8a52d Mon Sep 17 00:00:00 2001 From: Hannes Frederic Sowa Date: Mon, 22 Jul 2013 23:45:53 +0200 Subject: [PATCH 0465/1368] ipv6: take rtnl_lock and mark mrt6 table as freed on namespace cleanup [ Upstream commit 905a6f96a1b18e490a75f810d733ced93c39b0e5 ] Otherwise we end up dereferencing the already freed net->ipv6.mrt pointer which leads to a panic (from Srivatsa S. Bhat): BUG: unable to handle kernel paging request at ffff882018552020 IP: [] ip6mr_sk_done+0x32/0xb0 [ipv6] PGD 290a067 PUD 207ffe0067 PMD 207ff1d067 PTE 8000002018552060 Oops: 0000 [#1] SMP DEBUG_PAGEALLOC Modules linked in: ebtable_nat ebtables nfs fscache nf_conntrack_ipv4 nf_defrag_ipv4 ipt_REJECT xt_CHECKSUM iptable_mangle iptable_filter ip_tables nfsd lockd nfs_acl exportfs auth_rpcgss autofs4 sunrpc 8021q garp bridge stp llc ip6t_REJECT nf_conntrack_ipv6 nf_defrag_ipv6 xt_state nf_conntrack ip6table_filter +ip6_tables ipv6 vfat fat vhost_net macvtap macvlan vhost tun kvm_intel kvm uinput iTCO_wdt iTCO_vendor_support cdc_ether usbnet mii microcode i2c_i801 i2c_core lpc_ich mfd_core shpchp ioatdma dca mlx4_core be2net wmi acpi_cpufreq mperf ext4 jbd2 mbcache dm_mirror dm_region_hash dm_log dm_mod CPU: 0 PID: 7 Comm: kworker/u33:0 Not tainted 3.11.0-rc1-ea45e-a #4 Hardware name: IBM -[8737R2A]-/00Y2738, BIOS -[B2E120RUS-1.20]- 11/30/2012 Workqueue: netns cleanup_net task: ffff8810393641c0 ti: ffff881039366000 task.ti: ffff881039366000 RIP: 0010:[] [] ip6mr_sk_done+0x32/0xb0 [ipv6] RSP: 0018:ffff881039367bd8 EFLAGS: 00010286 RAX: ffff881039367fd8 RBX: ffff882018552000 RCX: dead000000200200 RDX: 0000000000000000 RSI: ffff881039367b68 RDI: ffff881039367b68 RBP: ffff881039367bf8 R08: ffff881039367b68 R09: 2222222222222222 R10: 2222222222222222 R11: 2222222222222222 R12: ffff882015a7a040 R13: ffff882014eb89c0 R14: ffff8820289e2800 R15: 0000000000000000 FS: 0000000000000000(0000) GS:ffff88103fc00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: ffff882018552020 CR3: 0000000001c0b000 CR4: 00000000000407f0 Stack: ffff881039367c18 ffff882014eb89c0 ffff882015e28c00 0000000000000000 ffff881039367c18 ffffffffa034d9d1 ffff8820289e2800 ffff882014eb89c0 ffff881039367c58 ffffffff815bdecb ffffffff815bddf2 ffff882014eb89c0 Call Trace: [] rawv6_close+0x21/0x40 [ipv6] [] inet_release+0xfb/0x220 [] ? inet_release+0x22/0x220 [] inet6_release+0x3f/0x50 [ipv6] [] sock_release+0x29/0xa0 [] sk_release_kernel+0x30/0x70 [] icmpv6_sk_exit+0x3b/0x80 [ipv6] [] ops_exit_list+0x39/0x60 [] cleanup_net+0xfb/0x1a0 [] process_one_work+0x1da/0x610 [] ? process_one_work+0x169/0x610 [] worker_thread+0x120/0x3a0 [] ? process_one_work+0x610/0x610 [] kthread+0xee/0x100 [] ? __init_kthread_worker+0x70/0x70 [] ret_from_fork+0x7c/0xb0 [] ? __init_kthread_worker+0x70/0x70 Code: 20 48 89 5d e8 4c 89 65 f0 4c 89 6d f8 66 66 66 66 90 4c 8b 67 30 49 89 fd e8 db 3c 1e e1 49 8b 9c 24 90 08 00 00 48 85 db 74 06 <4c> 39 6b 20 74 20 bb f3 ff ff ff e8 8e 3c 1e e1 89 d8 4c 8b 65 RIP [] ip6mr_sk_done+0x32/0xb0 [ipv6] RSP CR2: ffff882018552020 Reported-by: Srivatsa S. Bhat Tested-by: Srivatsa S. Bhat Signed-off-by: Hannes Frederic Sowa Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv6/ip6mr.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index 241fb8ad9fcf..4b421249532f 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -259,10 +259,12 @@ static void __net_exit ip6mr_rules_exit(struct net *net) { struct mr6_table *mrt, *next; + rtnl_lock(); list_for_each_entry_safe(mrt, next, &net->ipv6.mr6_tables, list) { list_del(&mrt->list); ip6mr_free_table(mrt); } + rtnl_unlock(); fib_rules_unregister(net->ipv6.mr6_rules_ops); } #else @@ -289,7 +291,10 @@ static int __net_init ip6mr_rules_init(struct net *net) static void __net_exit ip6mr_rules_exit(struct net *net) { + rtnl_lock(); ip6mr_free_table(net->ipv6.mrt6); + net->ipv6.mrt6 = NULL; + rtnl_unlock(); } #endif From ebb72f8df2f7ecf3fce7f848fee47d1c5721901f Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 23 Jul 2013 17:15:54 -0700 Subject: [PATCH 0466/1368] usbnet: do not pretend to support SG/TSO [ Upstream commit 20f0170377264e8449b6987041f0bcc4d746d3ed ] usbnet doesn't support yet SG, so drivers should not advertise SG or TSO capabilities, as they allow TCP stack to build large TSO packets that need to be linearized and might use order-5 pages. This adds an extra copy overhead and possible allocation failures. Current code ignore skb_linearize() return code so crashes are even possible. Best is to not pretend SG/TSO is supported, and add this again when/if usbnet really supports SG for devices who could get a performance gain. Based on a prior patch from Freddy Xin Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/usb/ax88179_178a.c | 9 ++++----- drivers/net/usb/smsc75xx.c | 12 +++--------- 2 files changed, 7 insertions(+), 14 deletions(-) diff --git a/drivers/net/usb/ax88179_178a.c b/drivers/net/usb/ax88179_178a.c index bd8758fa38c1..cea1f3d0311b 100644 --- a/drivers/net/usb/ax88179_178a.c +++ b/drivers/net/usb/ax88179_178a.c @@ -1029,10 +1029,10 @@ static int ax88179_bind(struct usbnet *dev, struct usb_interface *intf) dev->mii.supports_gmii = 1; dev->net->features |= NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM | - NETIF_F_RXCSUM | NETIF_F_SG | NETIF_F_TSO; + NETIF_F_RXCSUM; dev->net->hw_features |= NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM | - NETIF_F_RXCSUM | NETIF_F_SG | NETIF_F_TSO; + NETIF_F_RXCSUM; /* Enable checksum offload */ *tmp = AX_RXCOE_IP | AX_RXCOE_TCP | AX_RXCOE_UDP | @@ -1173,7 +1173,6 @@ ax88179_tx_fixup(struct usbnet *dev, struct sk_buff *skb, gfp_t flags) if (((skb->len + 8) % frame_size) == 0) tx_hdr2 |= 0x80008000; /* Enable padding */ - skb_linearize(skb); headroom = skb_headroom(skb); tailroom = skb_tailroom(skb); @@ -1317,10 +1316,10 @@ static int ax88179_reset(struct usbnet *dev) 1, 1, tmp); dev->net->features |= NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM | - NETIF_F_RXCSUM | NETIF_F_SG | NETIF_F_TSO; + NETIF_F_RXCSUM; dev->net->hw_features |= NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM | - NETIF_F_RXCSUM | NETIF_F_SG | NETIF_F_TSO; + NETIF_F_RXCSUM; /* Enable checksum offload */ *tmp = AX_RXCOE_IP | AX_RXCOE_TCP | AX_RXCOE_UDP | diff --git a/drivers/net/usb/smsc75xx.c b/drivers/net/usb/smsc75xx.c index 75409748c774..66ebbacf066f 100644 --- a/drivers/net/usb/smsc75xx.c +++ b/drivers/net/usb/smsc75xx.c @@ -45,7 +45,6 @@ #define EEPROM_MAC_OFFSET (0x01) #define DEFAULT_TX_CSUM_ENABLE (true) #define DEFAULT_RX_CSUM_ENABLE (true) -#define DEFAULT_TSO_ENABLE (true) #define SMSC75XX_INTERNAL_PHY_ID (1) #define SMSC75XX_TX_OVERHEAD (8) #define MAX_RX_FIFO_SIZE (20 * 1024) @@ -1410,17 +1409,14 @@ static int smsc75xx_bind(struct usbnet *dev, struct usb_interface *intf) INIT_WORK(&pdata->set_multicast, smsc75xx_deferred_multicast_write); - if (DEFAULT_TX_CSUM_ENABLE) { + if (DEFAULT_TX_CSUM_ENABLE) dev->net->features |= NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM; - if (DEFAULT_TSO_ENABLE) - dev->net->features |= NETIF_F_SG | - NETIF_F_TSO | NETIF_F_TSO6; - } + if (DEFAULT_RX_CSUM_ENABLE) dev->net->features |= NETIF_F_RXCSUM; dev->net->hw_features = NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM | - NETIF_F_SG | NETIF_F_TSO | NETIF_F_TSO6 | NETIF_F_RXCSUM; + NETIF_F_RXCSUM; ret = smsc75xx_wait_ready(dev, 0); if (ret < 0) { @@ -2200,8 +2196,6 @@ static struct sk_buff *smsc75xx_tx_fixup(struct usbnet *dev, { u32 tx_cmd_a, tx_cmd_b; - skb_linearize(skb); - if (skb_headroom(skb) < SMSC75XX_TX_OVERHEAD) { struct sk_buff *skb2 = skb_copy_expand(skb, SMSC75XX_TX_OVERHEAD, 0, flags); From 1aa1e4cd9ea8a50f610c86ea324f9de4263ac4bc Mon Sep 17 00:00:00 2001 From: Stanislaw Gruszka Date: Fri, 26 Jul 2013 11:00:10 +0200 Subject: [PATCH 0467/1368] genetlink: release cb_lock before requesting additional module [ Upstream commit c74f2b2678f40b80265dd53556f1f778c8e1823f ] Requesting external module with cb_lock taken can result in the deadlock like showed below: [ 2458.111347] Showing all locks held in the system: [ 2458.111347] 1 lock held by NetworkManager/582: [ 2458.111347] #0: (cb_lock){++++++}, at: [] genl_rcv+0x19/0x40 [ 2458.111347] 1 lock held by modprobe/603: [ 2458.111347] #0: (cb_lock){++++++}, at: [] genl_lock_all+0x15/0x30 [ 2461.579457] SysRq : Show Blocked State [ 2461.580103] task PC stack pid father [ 2461.580103] NetworkManager D ffff880034b84500 4040 582 1 0x00000080 [ 2461.580103] ffff8800197ff720 0000000000000046 00000000001d5340 ffff8800197fffd8 [ 2461.580103] ffff8800197fffd8 00000000001d5340 ffff880019631700 7fffffffffffffff [ 2461.580103] ffff8800197ff880 ffff8800197ff878 ffff880019631700 ffff880019631700 [ 2461.580103] Call Trace: [ 2461.580103] [] schedule+0x29/0x70 [ 2461.580103] [] schedule_timeout+0x1c1/0x360 [ 2461.580103] [] ? mark_held_locks+0xbb/0x140 [ 2461.580103] [] ? _raw_spin_unlock_irq+0x2c/0x50 [ 2461.580103] [] ? trace_hardirqs_on_caller+0xfd/0x1c0 [ 2461.580103] [] wait_for_completion_killable+0xe8/0x170 [ 2461.580103] [] ? wake_up_state+0x20/0x20 [ 2461.580103] [] call_usermodehelper_exec+0x1a5/0x210 [ 2461.580103] [] ? wait_for_completion_killable+0x3d/0x170 [ 2461.580103] [] __request_module+0x1b3/0x370 [ 2461.580103] [] ? trace_hardirqs_on_caller+0xfd/0x1c0 [ 2461.580103] [] ctrl_getfamily+0x159/0x190 [ 2461.580103] [] genl_family_rcv_msg+0x1f4/0x2e0 [ 2461.580103] [] ? genl_family_rcv_msg+0x2e0/0x2e0 [ 2461.580103] [] genl_rcv_msg+0x8e/0xd0 [ 2461.580103] [] netlink_rcv_skb+0xa9/0xc0 [ 2461.580103] [] genl_rcv+0x28/0x40 [ 2461.580103] [] netlink_unicast+0xdd/0x190 [ 2461.580103] [] netlink_sendmsg+0x329/0x750 [ 2461.580103] [] sock_sendmsg+0x99/0xd0 [ 2461.580103] [] ? local_clock+0x5f/0x70 [ 2461.580103] [] ? lock_release_non_nested+0x308/0x350 [ 2461.580103] [] ___sys_sendmsg+0x39e/0x3b0 [ 2461.580103] [] ? kvm_clock_read+0x2f/0x50 [ 2461.580103] [] ? sched_clock+0x9/0x10 [ 2461.580103] [] ? sched_clock_local+0x1d/0x80 [ 2461.580103] [] ? sched_clock_cpu+0xa8/0x100 [ 2461.580103] [] ? trace_hardirqs_off+0xd/0x10 [ 2461.580103] [] ? local_clock+0x5f/0x70 [ 2461.580103] [] ? lock_release_holdtime.part.28+0xf/0x1a0 [ 2461.580103] [] ? fget_light+0xf9/0x510 [ 2461.580103] [] ? fget_light+0x3c/0x510 [ 2461.580103] [] __sys_sendmsg+0x42/0x80 [ 2461.580103] [] SyS_sendmsg+0x12/0x20 [ 2461.580103] [] system_call_fastpath+0x16/0x1b [ 2461.580103] modprobe D ffff88000f2c8000 4632 603 602 0x00000080 [ 2461.580103] ffff88000f04fba8 0000000000000046 00000000001d5340 ffff88000f04ffd8 [ 2461.580103] ffff88000f04ffd8 00000000001d5340 ffff8800377d4500 ffff8800377d4500 [ 2461.580103] ffffffff81d0b260 ffffffff81d0b268 ffffffff00000000 ffffffff81d0b2b0 [ 2461.580103] Call Trace: [ 2461.580103] [] schedule+0x29/0x70 [ 2461.580103] [] rwsem_down_write_failed+0xed/0x1a0 [ 2461.580103] [] ? update_cpu_load_active+0x10/0xb0 [ 2461.580103] [] call_rwsem_down_write_failed+0x13/0x20 [ 2461.580103] [] ? down_write+0x9d/0xb2 [ 2461.580103] [] ? genl_lock_all+0x15/0x30 [ 2461.580103] [] genl_lock_all+0x15/0x30 [ 2461.580103] [] genl_register_family+0x53/0x1f0 [ 2461.580103] [] ? 0xffffffffa01dbfff [ 2461.580103] [] genl_register_family_with_ops+0x20/0x80 [ 2461.580103] [] ? 0xffffffffa01dbfff [ 2461.580103] [] nl80211_init+0x24/0xf0 [cfg80211] [ 2461.580103] [] ? 0xffffffffa01dbfff [ 2461.580103] [] cfg80211_init+0x43/0xdb [cfg80211] [ 2461.580103] [] do_one_initcall+0xfa/0x1b0 [ 2461.580103] [] ? set_memory_nx+0x43/0x50 [ 2461.580103] [] load_module+0x1c6f/0x27f0 [ 2461.580103] [] ? store_uevent+0x40/0x40 [ 2461.580103] [] SyS_finit_module+0x86/0xb0 [ 2461.580103] [] system_call_fastpath+0x16/0x1b [ 2461.580103] Sched Debug Version: v0.10, 3.11.0-0.rc1.git4.1.fc20.x86_64 #1 Problem start to happen after adding net-pf-16-proto-16-family-nl80211 alias name to cfg80211 module by below commit (though that commit itself is perfectly fine): commit fb4e156886ce6e8309e912d8b370d192330d19d3 Author: Marcel Holtmann Date: Sun Apr 28 16:22:06 2013 -0700 nl80211: Add generic netlink module alias for cfg80211/nl80211 Reported-and-tested-by: Jeff Layton Reported-by: Richard W.M. Jones Signed-off-by: Stanislaw Gruszka Reviewed-by: Pravin B Shelar Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/netlink/genetlink.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c index 2fd6dbea327a..1076fe16b122 100644 --- a/net/netlink/genetlink.c +++ b/net/netlink/genetlink.c @@ -877,8 +877,10 @@ static int ctrl_getfamily(struct sk_buff *skb, struct genl_info *info) #ifdef CONFIG_MODULES if (res == NULL) { genl_unlock(); + up_read(&cb_lock); request_module("net-pf-%d-proto-%d-family-%s", PF_NETLINK, NETLINK_GENERIC, name); + down_read(&cb_lock); genl_lock(); res = genl_family_find_byname(name); } From 7515b17b548cfc509ccbe3fcf8d9f004356d1845 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 30 Jul 2013 00:16:21 -0700 Subject: [PATCH 0468/1368] net_sched: Fix stack info leak in cbq_dump_wrr(). [ Upstream commit a0db856a95a29efb1c23db55c02d9f0ff4f0db48 ] Make sure the reserved fields, and padding (if any), are fully initialized. Based upon a patch by Dan Carpenter and feedback from Joe Perches. Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/sched/sch_cbq.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c index 1bc210ffcba2..8ec15988b5f3 100644 --- a/net/sched/sch_cbq.c +++ b/net/sched/sch_cbq.c @@ -1465,6 +1465,7 @@ static int cbq_dump_wrr(struct sk_buff *skb, struct cbq_class *cl) unsigned char *b = skb_tail_pointer(skb); struct tc_cbq_wrropt opt; + memset(&opt, 0, sizeof(opt)); opt.flags = 0; opt.allot = cl->allot; opt.priority = cl->priority + 1; From 6d60b440872edd1160f0f2eb97fe3e305019c99f Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Sun, 28 Jul 2013 23:04:45 +0300 Subject: [PATCH 0469/1368] af_key: more info leaks in pfkey messages [ Upstream commit ff862a4668dd6dba962b1d2d8bd344afa6375683 ] This is inspired by a5cc68f3d6 "af_key: fix info leaks in notify messages". There are some struct members which don't get initialized and could disclose small amounts of private information. Acked-by: Mathias Krause Signed-off-by: Dan Carpenter Acked-by: Steffen Klassert Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/key/af_key.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/net/key/af_key.c b/net/key/af_key.c index 9da862070dd8..ab8bd2cabfa0 100644 --- a/net/key/af_key.c +++ b/net/key/af_key.c @@ -2081,6 +2081,7 @@ static int pfkey_xfrm_policy2msg(struct sk_buff *skb, const struct xfrm_policy * pol->sadb_x_policy_type = IPSEC_POLICY_NONE; } pol->sadb_x_policy_dir = dir+1; + pol->sadb_x_policy_reserved = 0; pol->sadb_x_policy_id = xp->index; pol->sadb_x_policy_priority = xp->priority; @@ -3137,7 +3138,9 @@ static int pfkey_send_acquire(struct xfrm_state *x, struct xfrm_tmpl *t, struct pol->sadb_x_policy_exttype = SADB_X_EXT_POLICY; pol->sadb_x_policy_type = IPSEC_POLICY_IPSEC; pol->sadb_x_policy_dir = XFRM_POLICY_OUT + 1; + pol->sadb_x_policy_reserved = 0; pol->sadb_x_policy_id = xp->index; + pol->sadb_x_policy_priority = xp->priority; /* Set sadb_comb's. */ if (x->id.proto == IPPROTO_AH) @@ -3525,6 +3528,7 @@ static int pfkey_send_migrate(const struct xfrm_selector *sel, u8 dir, u8 type, pol->sadb_x_policy_exttype = SADB_X_EXT_POLICY; pol->sadb_x_policy_type = IPSEC_POLICY_IPSEC; pol->sadb_x_policy_dir = dir + 1; + pol->sadb_x_policy_reserved = 0; pol->sadb_x_policy_id = 0; pol->sadb_x_policy_priority = 0; From 1ec05e6b8c709aa7f250a4ddc9fd4e423bf4cfd1 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 29 Jul 2013 10:24:04 -0700 Subject: [PATCH 0470/1368] atl1c: use custom skb allocator [ Upstream commit 7b70176421993866e616f1cbc4d0dd4054f1bf78 ] We had reports ( https://bugzilla.kernel.org/show_bug.cgi?id=54021 ) that using high order pages for skb allocations is problematic for atl1c We do not know exactly what the problem is, but we suspect that crossing 4K pages is not well supported by this hardware. Use a custom allocator, using page allocator and 2K fragments for optimal stack behavior. We might make this allocator generic in future kernels. Signed-off-by: Eric Dumazet Cc: Luis Henriques Cc: Neil Horman Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/atheros/atl1c/atl1c.h | 3 ++ .../net/ethernet/atheros/atl1c/atl1c_main.c | 40 ++++++++++++++++++- 2 files changed, 42 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/atheros/atl1c/atl1c.h b/drivers/net/ethernet/atheros/atl1c/atl1c.h index b2bf324631dc..0f0556526ba9 100644 --- a/drivers/net/ethernet/atheros/atl1c/atl1c.h +++ b/drivers/net/ethernet/atheros/atl1c/atl1c.h @@ -520,6 +520,9 @@ struct atl1c_adapter { struct net_device *netdev; struct pci_dev *pdev; struct napi_struct napi; + struct page *rx_page; + unsigned int rx_page_offset; + unsigned int rx_frag_size; struct atl1c_hw hw; struct atl1c_hw_stats hw_stats; struct mii_if_info mii; /* MII interface info */ diff --git a/drivers/net/ethernet/atheros/atl1c/atl1c_main.c b/drivers/net/ethernet/atheros/atl1c/atl1c_main.c index 0ba900762b13..11cdf1d43041 100644 --- a/drivers/net/ethernet/atheros/atl1c/atl1c_main.c +++ b/drivers/net/ethernet/atheros/atl1c/atl1c_main.c @@ -481,10 +481,15 @@ static int atl1c_set_mac_addr(struct net_device *netdev, void *p) static void atl1c_set_rxbufsize(struct atl1c_adapter *adapter, struct net_device *dev) { + unsigned int head_size; int mtu = dev->mtu; adapter->rx_buffer_len = mtu > AT_RX_BUF_SIZE ? roundup(mtu + ETH_HLEN + ETH_FCS_LEN + VLAN_HLEN, 8) : AT_RX_BUF_SIZE; + + head_size = SKB_DATA_ALIGN(adapter->rx_buffer_len + NET_SKB_PAD) + + SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); + adapter->rx_frag_size = roundup_pow_of_two(head_size); } static netdev_features_t atl1c_fix_features(struct net_device *netdev, @@ -952,6 +957,10 @@ static void atl1c_free_ring_resources(struct atl1c_adapter *adapter) kfree(adapter->tpd_ring[0].buffer_info); adapter->tpd_ring[0].buffer_info = NULL; } + if (adapter->rx_page) { + put_page(adapter->rx_page); + adapter->rx_page = NULL; + } } /** @@ -1639,6 +1648,35 @@ static inline void atl1c_rx_checksum(struct atl1c_adapter *adapter, skb_checksum_none_assert(skb); } +static struct sk_buff *atl1c_alloc_skb(struct atl1c_adapter *adapter) +{ + struct sk_buff *skb; + struct page *page; + + if (adapter->rx_frag_size > PAGE_SIZE) + return netdev_alloc_skb(adapter->netdev, + adapter->rx_buffer_len); + + page = adapter->rx_page; + if (!page) { + adapter->rx_page = page = alloc_page(GFP_ATOMIC); + if (unlikely(!page)) + return NULL; + adapter->rx_page_offset = 0; + } + + skb = build_skb(page_address(page) + adapter->rx_page_offset, + adapter->rx_frag_size); + if (likely(skb)) { + adapter->rx_page_offset += adapter->rx_frag_size; + if (adapter->rx_page_offset >= PAGE_SIZE) + adapter->rx_page = NULL; + else + get_page(page); + } + return skb; +} + static int atl1c_alloc_rx_buffer(struct atl1c_adapter *adapter) { struct atl1c_rfd_ring *rfd_ring = &adapter->rfd_ring; @@ -1660,7 +1698,7 @@ static int atl1c_alloc_rx_buffer(struct atl1c_adapter *adapter) while (next_info->flags & ATL1C_BUFFER_FREE) { rfd_desc = ATL1C_RFD_DESC(rfd_ring, rfd_next_to_use); - skb = netdev_alloc_skb(adapter->netdev, adapter->rx_buffer_len); + skb = atl1c_alloc_skb(adapter); if (unlikely(!skb)) { if (netif_msg_rx_err(adapter)) dev_warn(&pdev->dev, "alloc rx buffer failed\n"); From 32976e6354d193eb47fd3e405cf22209782efe19 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Tue, 30 Jul 2013 13:23:39 +0300 Subject: [PATCH 0471/1368] net_sched: info leak in atm_tc_dump_class() [ Upstream commit 8cb3b9c3642c0263d48f31d525bcee7170eedc20 ] The "pvc" struct has a hole after pvc.sap_family which is not cleared. Signed-off-by: Dan Carpenter Reviewed-by: Jiri Pirko Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/sched/sch_atm.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/sched/sch_atm.c b/net/sched/sch_atm.c index ca8e0a57d945..1f9c31411f19 100644 --- a/net/sched/sch_atm.c +++ b/net/sched/sch_atm.c @@ -605,6 +605,7 @@ static int atm_tc_dump_class(struct Qdisc *sch, unsigned long cl, struct sockaddr_atmpvc pvc; int state; + memset(&pvc, 0, sizeof(pvc)); pvc.sap_family = AF_ATMPVC; pvc.sap_addr.itf = flow->vcc->dev ? flow->vcc->dev->number : -1; pvc.sap_addr.vpi = flow->vcc->vpi; From 379731ab2b425538cf87437049ec1c1301605e46 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Tue, 30 Jul 2013 10:31:00 -0700 Subject: [PATCH 0472/1368] ndisc: Add missing inline to ndisc_addr_option_pad [ Upstream commit d9d10a30964504af834d8d250a0c76d4ae91eb1e ] Signed-off-by: Joe Perches Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- include/net/ndisc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/net/ndisc.h b/include/net/ndisc.h index 745bf741e029..5043f8b08053 100644 --- a/include/net/ndisc.h +++ b/include/net/ndisc.h @@ -119,7 +119,7 @@ extern struct ndisc_options *ndisc_parse_options(u8 *opt, int opt_len, * if RFC 3831 IPv6-over-Fibre Channel is ever implemented it may * also need a pad of 2. */ -static int ndisc_addr_option_pad(unsigned short type) +static inline int ndisc_addr_option_pad(unsigned short type) { switch (type) { case ARPHRD_INFINIBAND: return 2; From 1eec0e843a893cf2893659ac99f8bd8ab6265ba4 Mon Sep 17 00:00:00 2001 From: Neil Horman Date: Wed, 31 Jul 2013 09:03:56 -0400 Subject: [PATCH 0473/1368] 8139cp: Add dma_mapping_error checking [ Upstream commit cf3c4c03060b688cbc389ebc5065ebcce5653e96 ] Self explanitory dma_mapping_error addition to the 8139 driver, based on this: https://bugzilla.redhat.com/show_bug.cgi?id=947250 It showed several backtraces arising for dma_map_* usage without checking the return code on the mapping. Add the check and abort the rx/tx operation if its failed. Untested as I have no hardware and the reporter has wandered off, but seems pretty straightforward. Signed-off-by: Neil Horman CC: "David S. Miller" CC: Francois Romieu Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/realtek/8139cp.c | 48 +++++++++++++++++++++++++-- 1 file changed, 45 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/realtek/8139cp.c b/drivers/net/ethernet/realtek/8139cp.c index 03523459c406..887aebea298b 100644 --- a/drivers/net/ethernet/realtek/8139cp.c +++ b/drivers/net/ethernet/realtek/8139cp.c @@ -478,7 +478,7 @@ static int cp_rx_poll(struct napi_struct *napi, int budget) while (1) { u32 status, len; - dma_addr_t mapping; + dma_addr_t mapping, new_mapping; struct sk_buff *skb, *new_skb; struct cp_desc *desc; const unsigned buflen = cp->rx_buf_sz; @@ -520,6 +520,13 @@ static int cp_rx_poll(struct napi_struct *napi, int budget) goto rx_next; } + new_mapping = dma_map_single(&cp->pdev->dev, new_skb->data, buflen, + PCI_DMA_FROMDEVICE); + if (dma_mapping_error(&cp->pdev->dev, new_mapping)) { + dev->stats.rx_dropped++; + goto rx_next; + } + dma_unmap_single(&cp->pdev->dev, mapping, buflen, PCI_DMA_FROMDEVICE); @@ -531,12 +538,11 @@ static int cp_rx_poll(struct napi_struct *napi, int budget) skb_put(skb, len); - mapping = dma_map_single(&cp->pdev->dev, new_skb->data, buflen, - PCI_DMA_FROMDEVICE); cp->rx_skb[rx_tail] = new_skb; cp_rx_skb(cp, skb, desc); rx++; + mapping = new_mapping; rx_next: cp->rx_ring[rx_tail].opts2 = 0; @@ -716,6 +722,22 @@ static inline u32 cp_tx_vlan_tag(struct sk_buff *skb) TxVlanTag | swab16(vlan_tx_tag_get(skb)) : 0x00; } +static void unwind_tx_frag_mapping(struct cp_private *cp, struct sk_buff *skb, + int first, int entry_last) +{ + int frag, index; + struct cp_desc *txd; + skb_frag_t *this_frag; + for (frag = 0; frag+first < entry_last; frag++) { + index = first+frag; + cp->tx_skb[index] = NULL; + txd = &cp->tx_ring[index]; + this_frag = &skb_shinfo(skb)->frags[frag]; + dma_unmap_single(&cp->pdev->dev, le64_to_cpu(txd->addr), + skb_frag_size(this_frag), PCI_DMA_TODEVICE); + } +} + static netdev_tx_t cp_start_xmit (struct sk_buff *skb, struct net_device *dev) { @@ -749,6 +771,9 @@ static netdev_tx_t cp_start_xmit (struct sk_buff *skb, len = skb->len; mapping = dma_map_single(&cp->pdev->dev, skb->data, len, PCI_DMA_TODEVICE); + if (dma_mapping_error(&cp->pdev->dev, mapping)) + goto out_dma_error; + txd->opts2 = opts2; txd->addr = cpu_to_le64(mapping); wmb(); @@ -786,6 +811,9 @@ static netdev_tx_t cp_start_xmit (struct sk_buff *skb, first_len = skb_headlen(skb); first_mapping = dma_map_single(&cp->pdev->dev, skb->data, first_len, PCI_DMA_TODEVICE); + if (dma_mapping_error(&cp->pdev->dev, first_mapping)) + goto out_dma_error; + cp->tx_skb[entry] = skb; entry = NEXT_TX(entry); @@ -799,6 +827,11 @@ static netdev_tx_t cp_start_xmit (struct sk_buff *skb, mapping = dma_map_single(&cp->pdev->dev, skb_frag_address(this_frag), len, PCI_DMA_TODEVICE); + if (dma_mapping_error(&cp->pdev->dev, mapping)) { + unwind_tx_frag_mapping(cp, skb, first_entry, entry); + goto out_dma_error; + } + eor = (entry == (CP_TX_RING_SIZE - 1)) ? RingEnd : 0; ctrl = eor | len | DescOwn; @@ -859,11 +892,16 @@ static netdev_tx_t cp_start_xmit (struct sk_buff *skb, if (TX_BUFFS_AVAIL(cp) <= (MAX_SKB_FRAGS + 1)) netif_stop_queue(dev); +out_unlock: spin_unlock_irqrestore(&cp->lock, intr_flags); cpw8(TxPoll, NormalTxPoll); return NETDEV_TX_OK; +out_dma_error: + kfree_skb(skb); + cp->dev->stats.tx_dropped++; + goto out_unlock; } /* Set or clear the multicast filter for this adaptor. @@ -1054,6 +1092,10 @@ static int cp_refill_rx(struct cp_private *cp) mapping = dma_map_single(&cp->pdev->dev, skb->data, cp->rx_buf_sz, PCI_DMA_FROMDEVICE); + if (dma_mapping_error(&cp->pdev->dev, mapping)) { + kfree_skb(skb); + goto err_out; + } cp->rx_skb[i] = skb; cp->rx_ring[i].opts2 = 0; From f870f0c92b820e063dbb6ba9bee0ee80e3dc8fd0 Mon Sep 17 00:00:00 2001 From: Or Gerlitz Date: Thu, 1 Aug 2013 19:55:00 +0300 Subject: [PATCH 0474/1368] net/mlx4_core: Don't give VFs MAC addresses which are derived from the PF MAC [ Upstream commit 0508ad646836007e6e6b62331eee7356844eac3d ] If the user has not assigned a MAC address to a VM, then don't give it MAC which is based on the PF one. The current derivation scheme is wrong and leads to VM MAC collisions when the number of cards/hypervisors becomes big enough. Instead, just give it zeros and let them figure out what to do with that. Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/mellanox/mlx4/fw.c | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.c b/drivers/net/ethernet/mellanox/mlx4/fw.c index 2c97901c6a6d..593177d47711 100644 --- a/drivers/net/ethernet/mellanox/mlx4/fw.c +++ b/drivers/net/ethernet/mellanox/mlx4/fw.c @@ -840,16 +840,7 @@ int mlx4_QUERY_PORT_wrapper(struct mlx4_dev *dev, int slave, MLX4_CMD_NATIVE); if (!err && dev->caps.function != slave) { - /* if config MAC in DB use it */ - if (priv->mfunc.master.vf_oper[slave].vport[vhcr->in_modifier].state.mac) - def_mac = priv->mfunc.master.vf_oper[slave].vport[vhcr->in_modifier].state.mac; - else { - /* set slave default_mac address */ - MLX4_GET(def_mac, outbox->buf, QUERY_PORT_MAC_OFFSET); - def_mac += slave << 8; - priv->mfunc.master.vf_admin[slave].vport[vhcr->in_modifier].mac = def_mac; - } - + def_mac = priv->mfunc.master.vf_oper[slave].vport[vhcr->in_modifier].state.mac; MLX4_PUT(outbox->buf, def_mac, QUERY_PORT_MAC_OFFSET); /* get port type - currently only eth is enabled */ From 7abe9b04d9531916a566828049284ae515dde9ea Mon Sep 17 00:00:00 2001 From: Jack Morgenstein Date: Thu, 1 Aug 2013 19:55:01 +0300 Subject: [PATCH 0475/1368] net/mlx4_core: VFs must ignore the enable_64b_cqe_eqe module param [ Upstream commit b30513202c6c14120f70b2e9aa1e97d47bbc2313 ] Slaves get the 64B CQE/EQE state from QUERY_HCA, not from the module parameter. If the parameter is set to zero, the slave outputs an incorrect/irrelevant warning message that 64B CQEs/EQEs are supported but not enabled (even if the hypervisor has enabled 64B CQEs/EQEs). Signed-off-by: Jack Morgenstein Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/mellanox/mlx4/main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c index 8a434997a0df..1b195fc7f411 100644 --- a/drivers/net/ethernet/mellanox/mlx4/main.c +++ b/drivers/net/ethernet/mellanox/mlx4/main.c @@ -371,7 +371,7 @@ static int mlx4_dev_cap(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap) dev->caps.sqp_demux = (mlx4_is_master(dev)) ? MLX4_MAX_NUM_SLAVES : 0; - if (!enable_64b_cqe_eqe) { + if (!enable_64b_cqe_eqe && !mlx4_is_slave(dev)) { if (dev_cap->flags & (MLX4_DEV_CAP_FLAG_64B_CQE | MLX4_DEV_CAP_FLAG_64B_EQE)) { mlx4_warn(dev, "64B EQEs/CQEs supported by the device but not enabled\n"); From f0f5bc74eab416cea936f669e90a9d8a8dff18fd Mon Sep 17 00:00:00 2001 From: David Spinadel Date: Tue, 23 Jul 2013 14:13:32 +0300 Subject: [PATCH 0476/1368] iwlwifi: mvm: set SSID bits for passive channels commit bb963c4a43eb5127eb0bbfa16c7a6a209b0af5db upstream. Set SSID bitmap for direct scan even on passive channels, for the passive-to-active feature. Without this patch only the SSID from probe request template is sent on passive channels, after passive-to-active switching, causing us to not find all desired networks. Remove the unused passive scan mask constant. Reviewed-by: Emmanuel Grumbach Signed-off-by: David Spinadel Signed-off-by: Johannes Berg Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/iwlwifi/mvm/fw-api-scan.h | 1 - drivers/net/wireless/iwlwifi/mvm/scan.c | 11 ++--------- 2 files changed, 2 insertions(+), 10 deletions(-) diff --git a/drivers/net/wireless/iwlwifi/mvm/fw-api-scan.h b/drivers/net/wireless/iwlwifi/mvm/fw-api-scan.h index b60d14151721..365095a0c3b3 100644 --- a/drivers/net/wireless/iwlwifi/mvm/fw-api-scan.h +++ b/drivers/net/wireless/iwlwifi/mvm/fw-api-scan.h @@ -69,7 +69,6 @@ /* Scan Commands, Responses, Notifications */ /* Masks for iwl_scan_channel.type flags */ -#define SCAN_CHANNEL_TYPE_PASSIVE 0 #define SCAN_CHANNEL_TYPE_ACTIVE BIT(0) #define SCAN_CHANNEL_NARROW_BAND BIT(22) diff --git a/drivers/net/wireless/iwlwifi/mvm/scan.c b/drivers/net/wireless/iwlwifi/mvm/scan.c index 08fbfe0fca62..8e1f6c095a9e 100644 --- a/drivers/net/wireless/iwlwifi/mvm/scan.c +++ b/drivers/net/wireless/iwlwifi/mvm/scan.c @@ -176,19 +176,12 @@ static void iwl_mvm_scan_fill_channels(struct iwl_scan_cmd *cmd, struct iwl_scan_channel *chan = (struct iwl_scan_channel *) (cmd->data + le16_to_cpu(cmd->tx_cmd.len)); int i; - __le32 chan_type_value; - - if (req->n_ssids > 0) - chan_type_value = cpu_to_le32(BIT(req->n_ssids + 1) - 1); - else - chan_type_value = SCAN_CHANNEL_TYPE_PASSIVE; for (i = 0; i < cmd->channel_count; i++) { chan->channel = cpu_to_le16(req->channels[i]->hw_value); + chan->type = cpu_to_le32(BIT(req->n_ssids) - 1); if (req->channels[i]->flags & IEEE80211_CHAN_PASSIVE_SCAN) - chan->type = SCAN_CHANNEL_TYPE_PASSIVE; - else - chan->type = chan_type_value; + chan->type &= cpu_to_le32(~SCAN_CHANNEL_TYPE_ACTIVE); chan->active_dwell = cpu_to_le16(active_dwell); chan->passive_dwell = cpu_to_le16(passive_dwell); chan->iteration_count = cpu_to_le16(1); From 28f7544b8ba92b3975daa5b639ceb14ea2989d48 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 3 May 2013 18:58:16 +0200 Subject: [PATCH 0477/1368] iwlwifi: dvm: don't send BT_CONFIG on devices w/o Bluetooth commit 707aee401d2467baa785a697f40a6e2d9ee79ad5 upstream. The BT_CONFIG command that is sent to the device during startup will enable BT coex unless the module parameter turns it off, but on devices without Bluetooth this may cause problems, as reported in Redhat BZ 885407. Fix this by sending the BT_CONFIG command only when the device has Bluetooth. Reviewed-by: Emmanuel Grumbach Signed-off-by: Johannes Berg Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/iwlwifi/dvm/main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/iwlwifi/dvm/main.c b/drivers/net/wireless/iwlwifi/dvm/main.c index 74d7572e7091..a8afc7bee545 100644 --- a/drivers/net/wireless/iwlwifi/dvm/main.c +++ b/drivers/net/wireless/iwlwifi/dvm/main.c @@ -758,7 +758,7 @@ int iwl_alive_start(struct iwl_priv *priv) BT_COEX_PRIO_TBL_EVT_INIT_CALIB2); if (ret) return ret; - } else { + } else if (priv->cfg->bt_params) { /* * default is 2-wire BT coexexistence support */ From d35fdd8cab944d7bc91cf9766dced16a1647e3cf Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 23 Apr 2013 15:39:50 +0200 Subject: [PATCH 0478/1368] hwrng: bcm2835: fix MODULE_LICENSE tag commit 22e8099f4f6621b8d165e238cdef2a1cf655e159 upstream. The MODULE_LICENSE macro invocation must use either "GPL" or "GPL v2", but not "GPLv2" in order to be detected by the module loader. This fixes the allmodconfig build error: FATAL: modpost: GPL-incompatible module bcm2835-rng.ko uses GPL-only symbol 'platform_driver_unregister' Signed-off-by: Arnd Bergmann Acked-by: Lubomir Rintel Cc: Dom Cobley Cc: Stephen Warren Cc: Matt Mackall Cc: linux-rpi-kernel@lists.infradead.org Cc: Herbert Xu Cc: Guenter Roeck Signed-off-by: Greg Kroah-Hartman --- drivers/char/hw_random/bcm2835-rng.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/char/hw_random/bcm2835-rng.c b/drivers/char/hw_random/bcm2835-rng.c index eb7f14725ebd..43577ca780e3 100644 --- a/drivers/char/hw_random/bcm2835-rng.c +++ b/drivers/char/hw_random/bcm2835-rng.c @@ -110,4 +110,4 @@ module_platform_driver(bcm2835_rng_driver); MODULE_AUTHOR("Lubomir Rintel "); MODULE_DESCRIPTION("BCM2835 Random Number Generator (RNG) driver"); -MODULE_LICENSE("GPLv2"); +MODULE_LICENSE("GPL v2"); From 22e994efa7f9a786fc2c5e07e0b5e312f2db025e Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Sun, 11 Aug 2013 18:49:02 -0700 Subject: [PATCH 0479/1368] Linux 3.10.6 --- Makefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Makefile b/Makefile index f8349d0ca14c..fd92ffba51c3 100644 --- a/Makefile +++ b/Makefile @@ -1,8 +1,8 @@ VERSION = 3 PATCHLEVEL = 10 -SUBLEVEL = 5 +SUBLEVEL = 6 EXTRAVERSION = -NAME = Unicycling Gorilla +NAME = TOSSUG Baby Fish # *DOCUMENTATION* # To see a list of typical targets execute "make help" From 0ac10bd036f0f3b8ce7ac2390446eab9531c72eb Mon Sep 17 00:00:00 2001 From: "Martin K. Petersen" Date: Tue, 30 Jul 2013 22:58:34 -0400 Subject: [PATCH 0480/1368] SCSI: Don't attempt to send extended INQUIRY command if skip_vpd_pages is set commit 7562523e84ddc742fe1f9db8bd76b01acca89f6b upstream. If a device has the skip_vpd_pages flag set we should simply fail the scsi_get_vpd_page() call. Signed-off-by: Martin K. Petersen Acked-by: Alan Stern Tested-by: Stuart Foster Signed-off-by: James Bottomley Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/scsi.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/scsi/scsi.c b/drivers/scsi/scsi.c index 3b1ea34e1f5a..eaa808e6ba91 100644 --- a/drivers/scsi/scsi.c +++ b/drivers/scsi/scsi.c @@ -1031,6 +1031,9 @@ int scsi_get_vpd_page(struct scsi_device *sdev, u8 page, unsigned char *buf, { int i, result; + if (sdev->skip_vpd_pages) + goto fail; + /* Ask for all the pages supported by this device */ result = scsi_vpd_inquiry(sdev, buf, 0, buf_len); if (result) From e842dbeb5777ce9494ed838a4ba0c9af9fa7bd9e Mon Sep 17 00:00:00 2001 From: "Sumit.Saxena@lsi.com" Date: Tue, 16 Jul 2013 02:26:05 +0530 Subject: [PATCH 0481/1368] SCSI: megaraid_sas: megaraid_sas driver init fails in kdump kernel commit 6431f5d7c6025f8b007af06ea090de308f7e6881 upstream. Problem: When Hardware IOMMU is on, megaraid_sas driver initialization fails in kdump kernel with LSI MegaRAID controller(device id-0x73). Actually this issue needs fix in firmware, but for firmware running in field, this driver fix is proposed to resolve the issue. At firmware initialization time, if firmware does not come to ready state, driver will reset the adapter and retry for firmware transition to ready state unconditionally(not only executed for kdump kernel). Signed-off-by: Sumit Saxena Signed-off-by: Kashyap Desai Signed-off-by: James Bottomley Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/megaraid/megaraid_sas_base.c | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/drivers/scsi/megaraid/megaraid_sas_base.c b/drivers/scsi/megaraid/megaraid_sas_base.c index 89178b833b52..9b60dc129a53 100644 --- a/drivers/scsi/megaraid/megaraid_sas_base.c +++ b/drivers/scsi/megaraid/megaraid_sas_base.c @@ -3508,11 +3508,21 @@ static int megasas_init_fw(struct megasas_instance *instance) break; } - /* - * We expect the FW state to be READY - */ - if (megasas_transition_to_ready(instance, 0)) - goto fail_ready_state; + if (megasas_transition_to_ready(instance, 0)) { + atomic_set(&instance->fw_reset_no_pci_access, 1); + instance->instancet->adp_reset + (instance, instance->reg_set); + atomic_set(&instance->fw_reset_no_pci_access, 0); + dev_info(&instance->pdev->dev, + "megasas: FW restarted successfully from %s!\n", + __func__); + + /*waitting for about 30 second before retry*/ + ssleep(30); + + if (megasas_transition_to_ready(instance, 0)) + goto fail_ready_state; + } /* Check if MSI-X is supported while in ready state */ msix_enable = (instance->instancet->read_fw_status_reg(reg_set) & From 96e9cd3d0609e76839ae25efe1ea8b75ca81e588 Mon Sep 17 00:00:00 2001 From: Asias He Date: Thu, 1 Aug 2013 11:07:18 +0930 Subject: [PATCH 0482/1368] virtio-scsi: Fix virtqueue affinity setup commit aa52aeea2725839bdd3dcce394486e9a043065e0 upstream. vscsi->num_queues counts the number of request virtqueue which does not include the control and event virtqueue. It is wrong to subtract VIRTIO_SCSI_VQ_BASE from vscsi->num_queues. This patch fixes the following panic. (qemu) device_del scsi0 BUG: unable to handle kernel NULL pointer dereference at 0000000000000020 IP: [] __virtscsi_set_affinity+0x6f/0x120 PGD 0 Oops: 0000 [#1] SMP Modules linked in: CPU: 0 PID: 659 Comm: kworker/0:1 Not tainted 3.11.0-rc2+ #1172 Hardware name: Bochs Bochs, BIOS Bochs 01/01/2011 Workqueue: kacpi_hotplug _handle_hotplug_event_func task: ffff88007bee1cc0 ti: ffff88007bfe4000 task.ti: ffff88007bfe4000 RIP: 0010:[] [] __virtscsi_set_affinity+0x6f/0x120 RSP: 0018:ffff88007bfe5a38 EFLAGS: 00010202 RAX: 0000000000000010 RBX: ffff880077fd0d28 RCX: 0000000000000050 RDX: 0000000000000000 RSI: 0000000000000246 RDI: 0000000000000000 RBP: ffff88007bfe5a58 R08: ffff880077f6ff00 R09: 0000000000000001 R10: ffffffff8143e673 R11: 0000000000000001 R12: 0000000000000001 R13: ffff880077fd0800 R14: 0000000000000000 R15: ffff88007bf489b0 FS: 0000000000000000(0000) GS:ffff88007ea00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 000000008005003b CR2: 0000000000000020 CR3: 0000000079f8b000 CR4: 00000000000006f0 Stack: ffff880077fd0d28 0000000000000000 ffff880077fd0800 0000000000000008 ffff88007bfe5a78 ffffffff8179b37d ffff88007bccc800 ffff88007bccc800 ffff88007bfe5a98 ffffffff8179b3b6 ffff88007bccc800 ffff880077fd0d28 Call Trace: [] virtscsi_set_affinity+0x2d/0x40 [] virtscsi_remove_vqs+0x26/0x50 [] virtscsi_remove+0x82/0xa0 [] virtio_dev_remove+0x22/0x70 [] __device_release_driver+0x69/0xd0 [] device_release_driver+0x2d/0x40 [] bus_remove_device+0x116/0x150 [] device_del+0x126/0x1e0 [] device_unregister+0x16/0x30 [] unregister_virtio_device+0x19/0x30 [] virtio_pci_remove+0x36/0x80 [] pci_device_remove+0x37/0x70 [] __device_release_driver+0x69/0xd0 [] device_release_driver+0x2d/0x40 [] bus_remove_device+0x116/0x150 [] device_del+0x126/0x1e0 [] pci_stop_bus_device+0x9c/0xb0 [] pci_stop_and_remove_bus_device+0x16/0x30 [] acpiphp_disable_slot+0x8e/0x150 [] hotplug_event_func+0xba/0x1a0 [] ? acpi_os_release_object+0xe/0x12 [] _handle_hotplug_event_func+0x31/0x70 [] process_one_work+0x183/0x500 [] worker_thread+0x122/0x400 [] ? manage_workers+0x2d0/0x2d0 [] kthread+0xce/0xe0 [] ? kthread_freezable_should_stop+0x70/0x70 [] ret_from_fork+0x7c/0xb0 [] ? kthread_freezable_should_stop+0x70/0x70 Code: 01 00 00 00 74 59 45 31 e4 83 bb c8 01 00 00 02 74 46 66 2e 0f 1f 84 00 00 00 00 00 49 63 c4 48 c1 e0 04 48 8b bc 0 3 10 02 00 00 <48> 8b 47 20 48 8b 80 d0 01 00 00 48 8b 40 50 48 85 c0 74 07 be RIP [] __virtscsi_set_affinity+0x6f/0x120 RSP CR2: 0000000000000020 ---[ end trace 99679331a3775f48 ]--- Signed-off-by: Asias He Reviewed-by: Wanlong Gao Signed-off-by: Rusty Russell Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/virtio_scsi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/virtio_scsi.c b/drivers/scsi/virtio_scsi.c index 2168258fb2c3..74b88efde6ad 100644 --- a/drivers/scsi/virtio_scsi.c +++ b/drivers/scsi/virtio_scsi.c @@ -751,7 +751,7 @@ static void __virtscsi_set_affinity(struct virtio_scsi *vscsi, bool affinity) vscsi->affinity_hint_set = true; } else { - for (i = 0; i < vscsi->num_queues - VIRTIO_SCSI_VQ_BASE; i++) + for (i = 0; i < vscsi->num_queues; i++) virtqueue_set_affinity(vscsi->req_vqs[i].vq, -1); vscsi->affinity_hint_set = false; From 70aabee48d28ead09817660d35d9f53a13b99eda Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Wed, 31 Jul 2013 16:31:26 +1000 Subject: [PATCH 0483/1368] powerpc: On POWERNV enable PPC_DENORMALISATION by default commit 4e90a2a7375e86827541bda9393414c03e7721c6 upstream. We want PPC_DENORMALISATION enabled when POWERNV is enabled, so update the Kconfig. Signed-off-by: Anton Blanchard Acked-by: Michael Neuling Signed-off-by: Benjamin Herrenschmidt Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index c33e3ad2c8fd..74991fee2e62 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -572,7 +572,7 @@ config SCHED_SMT config PPC_DENORMALISATION bool "PowerPC denormalisation exception handling" depends on PPC_BOOK3S_64 - default "n" + default "y" if PPC_POWERNV ---help--- Add support for handling denormalisation of single precision values. Useful for bare metal only. If unsure say Y here. From 7ab7de87432c7d6544b8c1b45ba6520f4115539f Mon Sep 17 00:00:00 2001 From: Michael Neuling Date: Fri, 9 Aug 2013 17:29:27 +1000 Subject: [PATCH 0484/1368] powerpc: Fix hypervisor facility unavaliable vector number commit 88f094120bd2f012ff494ae50a8d4e0d8af8f69e upstream. Currently if we take hypervisor facility unavaliable (from 0xf80/0x4f80) we mark it as an OS facility unavaliable (0xf60) as the two share the same code path. The becomes a problem in facility_unavailable_exception() as we aren't able to see the hypervisor facility unavailable exceptions. Below fixes this by duplication the required macros. Signed-off-by: Michael Neuling Signed-off-by: Benjamin Herrenschmidt Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/kernel/exceptions-64s.S | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index 4e00d223b2e3..902ca3c6b4b6 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -848,7 +848,7 @@ hv_facility_unavailable_relon_trampoline: . = 0x4f80 SET_SCRATCH0(r13) EXCEPTION_PROLOG_0(PACA_EXGEN) - b facility_unavailable_relon_hv + b hv_facility_unavailable_relon_hv STD_RELON_EXCEPTION_PSERIES(0x5300, 0x1300, instruction_breakpoint) #ifdef CONFIG_PPC_DENORMALISATION @@ -1175,6 +1175,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_VSX) b .ret_from_except STD_EXCEPTION_COMMON(0xf60, facility_unavailable, .facility_unavailable_exception) + STD_EXCEPTION_COMMON(0xf80, hv_facility_unavailable, .facility_unavailable_exception) .align 7 .globl __end_handlers @@ -1188,7 +1189,7 @@ __end_handlers: STD_RELON_EXCEPTION_PSERIES_OOL(0xf20, altivec_unavailable) STD_RELON_EXCEPTION_PSERIES_OOL(0xf40, vsx_unavailable) STD_RELON_EXCEPTION_PSERIES_OOL(0xf60, facility_unavailable) - STD_RELON_EXCEPTION_HV_OOL(0xf80, facility_unavailable) + STD_RELON_EXCEPTION_HV_OOL(0xf80, hv_facility_unavailable) #if defined(CONFIG_PPC_PSERIES) || defined(CONFIG_PPC_POWERNV) /* From f60232beaf08ab9567fb40b55379acba0c649aed Mon Sep 17 00:00:00 2001 From: Michael Neuling Date: Fri, 9 Aug 2013 17:29:28 +1000 Subject: [PATCH 0485/1368] powerpc: Rework setting up H/FSCR bit definitions commit 74e400cee6c0266ba2d940ed78d981f1e24a8167 upstream. This reworks the Facility Status and Control Regsiter (FSCR) config bit definitions so that we can access the bit numbers. This is needed for a subsequent patch to fix the userspace DSCR handling. HFSCR and FSCR bit definitions are the same, so reuse them. Signed-off-by: Michael Neuling Signed-off-by: Benjamin Herrenschmidt Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/include/asm/reg.h | 31 ++++++++++++++++++++----------- 1 file changed, 20 insertions(+), 11 deletions(-) diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h index 362142b69d5b..e1fb161252ef 100644 --- a/arch/powerpc/include/asm/reg.h +++ b/arch/powerpc/include/asm/reg.h @@ -254,19 +254,28 @@ #define SPRN_HRMOR 0x139 /* Real mode offset register */ #define SPRN_HSRR0 0x13A /* Hypervisor Save/Restore 0 */ #define SPRN_HSRR1 0x13B /* Hypervisor Save/Restore 1 */ +/* HFSCR and FSCR bit numbers are the same */ +#define FSCR_TAR_LG 8 /* Enable Target Address Register */ +#define FSCR_EBB_LG 7 /* Enable Event Based Branching */ +#define FSCR_TM_LG 5 /* Enable Transactional Memory */ +#define FSCR_PM_LG 4 /* Enable prob/priv access to PMU SPRs */ +#define FSCR_BHRB_LG 3 /* Enable Branch History Rolling Buffer*/ +#define FSCR_DSCR_LG 2 /* Enable Data Stream Control Register */ +#define FSCR_VECVSX_LG 1 /* Enable VMX/VSX */ +#define FSCR_FP_LG 0 /* Enable Floating Point */ #define SPRN_FSCR 0x099 /* Facility Status & Control Register */ -#define FSCR_TAR (1 << (63-55)) /* Enable Target Address Register */ -#define FSCR_EBB (1 << (63-56)) /* Enable Event Based Branching */ -#define FSCR_DSCR (1 << (63-61)) /* Enable Data Stream Control Register */ +#define FSCR_TAR __MASK(FSCR_TAR_LG) +#define FSCR_EBB __MASK(FSCR_EBB_LG) +#define FSCR_DSCR __MASK(FSCR_DSCR_LG) #define SPRN_HFSCR 0xbe /* HV=1 Facility Status & Control Register */ -#define HFSCR_TAR (1 << (63-55)) /* Enable Target Address Register */ -#define HFSCR_EBB (1 << (63-56)) /* Enable Event Based Branching */ -#define HFSCR_TM (1 << (63-58)) /* Enable Transactional Memory */ -#define HFSCR_PM (1 << (63-60)) /* Enable prob/priv access to PMU SPRs */ -#define HFSCR_BHRB (1 << (63-59)) /* Enable Branch History Rolling Buffer*/ -#define HFSCR_DSCR (1 << (63-61)) /* Enable Data Stream Control Register */ -#define HFSCR_VECVSX (1 << (63-62)) /* Enable VMX/VSX */ -#define HFSCR_FP (1 << (63-63)) /* Enable Floating Point */ +#define HFSCR_TAR __MASK(FSCR_TAR_LG) +#define HFSCR_EBB __MASK(FSCR_EBB_LG) +#define HFSCR_TM __MASK(FSCR_TM_LG) +#define HFSCR_PM __MASK(FSCR_PM_LG) +#define HFSCR_BHRB __MASK(FSCR_BHRB_LG) +#define HFSCR_DSCR __MASK(FSCR_DSCR_LG) +#define HFSCR_VECVSX __MASK(FSCR_VECVSX_LG) +#define HFSCR_FP __MASK(FSCR_FP_LG) #define SPRN_TAR 0x32f /* Target Address Register */ #define SPRN_LPCR 0x13E /* LPAR Control Register */ #define LPCR_VPM0 (1ul << (63-0)) From 734ce4ea808b69a9896677fd1ebdf272a29665d9 Mon Sep 17 00:00:00 2001 From: Michael Neuling Date: Fri, 9 Aug 2013 17:29:29 +1000 Subject: [PATCH 0486/1368] powerpc: Fix context switch DSCR on POWER8 commit 2517617e0de65f8f7cfe75cae745d06b1fa98586 upstream. POWER8 allows the DSCR to be accessed directly from userspace via a new SPR number 0x3 (Rather than 0x11. DSCR SPR number 0x11 is still used on POWER8 but like POWER7, is only accessible in HV and OS modes). Currently, we allow this by setting H/FSCR DSCR bit on boot. Unfortunately this doesn't work, as the kernel needs to see the DSCR change so that it knows to no longer restore the system wide version of DSCR on context switch (ie. to set thread.dscr_inherit). This clears the H/FSCR DSCR bit initially. If a process then accesses the DSCR (via SPR 0x3), it'll trap into the kernel where we set thread.dscr_inherit in facility_unavailable_exception(). We also change _switch() so that we set or clear the H/FSCR DSCR bit based on the thread.dscr_inherit. Signed-off-by: Michael Neuling Signed-off-by: Benjamin Herrenschmidt Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/kernel/entry_64.S | 27 +++++++++++++++- arch/powerpc/kernel/traps.c | 58 ++++++++++++++++++++-------------- 2 files changed, 60 insertions(+), 25 deletions(-) diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S index 8741c854e03d..48816ed66cf5 100644 --- a/arch/powerpc/kernel/entry_64.S +++ b/arch/powerpc/kernel/entry_64.S @@ -584,9 +584,34 @@ BEGIN_FTR_SECTION ld r7,DSCR_DEFAULT@toc(2) ld r0,THREAD_DSCR(r4) cmpwi r6,0 + li r8, FSCR_DSCR bne 1f ld r0,0(r7) -1: cmpd r0,r25 + b 3f +1: + BEGIN_FTR_SECTION_NESTED(70) + mfspr r6, SPRN_FSCR + or r6, r6, r8 + mtspr SPRN_FSCR, r6 + BEGIN_FTR_SECTION_NESTED(69) + mfspr r6, SPRN_HFSCR + or r6, r6, r8 + mtspr SPRN_HFSCR, r6 + END_FTR_SECTION_NESTED(CPU_FTR_HVMODE, CPU_FTR_HVMODE, 69) + b 4f + END_FTR_SECTION_NESTED(CPU_FTR_ARCH_207S, CPU_FTR_ARCH_207S, 70) +3: + BEGIN_FTR_SECTION_NESTED(70) + mfspr r6, SPRN_FSCR + andc r6, r6, r8 + mtspr SPRN_FSCR, r6 + BEGIN_FTR_SECTION_NESTED(69) + mfspr r6, SPRN_HFSCR + andc r6, r6, r8 + mtspr SPRN_HFSCR, r6 + END_FTR_SECTION_NESTED(CPU_FTR_HVMODE, CPU_FTR_HVMODE, 69) + END_FTR_SECTION_NESTED(CPU_FTR_ARCH_207S, CPU_FTR_ARCH_207S, 70) +4: cmpd r0,r25 beq 2f mtspr SPRN_DSCR,r0 2: diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c index e4f205a209d2..88929b1f4f77 100644 --- a/arch/powerpc/kernel/traps.c +++ b/arch/powerpc/kernel/traps.c @@ -44,9 +44,7 @@ #include #include #include -#ifdef CONFIG_PPC32 #include -#endif #ifdef CONFIG_PMAC_BACKLIGHT #include #endif @@ -1282,43 +1280,54 @@ void vsx_unavailable_exception(struct pt_regs *regs) die("Unrecoverable VSX Unavailable Exception", regs, SIGABRT); } +#ifdef CONFIG_PPC64 void facility_unavailable_exception(struct pt_regs *regs) { static char *facility_strings[] = { - "FPU", - "VMX/VSX", - "DSCR", - "PMU SPRs", - "BHRB", - "TM", - "AT", - "EBB", - "TAR", + [FSCR_FP_LG] = "FPU", + [FSCR_VECVSX_LG] = "VMX/VSX", + [FSCR_DSCR_LG] = "DSCR", + [FSCR_PM_LG] = "PMU SPRs", + [FSCR_BHRB_LG] = "BHRB", + [FSCR_TM_LG] = "TM", + [FSCR_EBB_LG] = "EBB", + [FSCR_TAR_LG] = "TAR", }; - char *facility, *prefix; + char *facility = "unknown"; u64 value; + u8 status; + bool hv; - if (regs->trap == 0xf60) { - value = mfspr(SPRN_FSCR); - prefix = ""; - } else { + hv = (regs->trap == 0xf80); + if (hv) value = mfspr(SPRN_HFSCR); - prefix = "Hypervisor "; + else + value = mfspr(SPRN_FSCR); + + status = value >> 56; + if (status == FSCR_DSCR_LG) { + /* User is acessing the DSCR. Set the inherit bit and allow + * the user to set it directly in future by setting via the + * H/FSCR DSCR bit. + */ + current->thread.dscr_inherit = 1; + if (hv) + mtspr(SPRN_HFSCR, value | HFSCR_DSCR); + else + mtspr(SPRN_FSCR, value | FSCR_DSCR); + return; } - value = value >> 56; + if ((status < ARRAY_SIZE(facility_strings)) && + facility_strings[status]) + facility = facility_strings[status]; /* We restore the interrupt state now */ if (!arch_irq_disabled_regs(regs)) local_irq_enable(); - if (value < ARRAY_SIZE(facility_strings)) - facility = facility_strings[value]; - else - facility = "unknown"; - pr_err("%sFacility '%s' unavailable, exception at 0x%lx, MSR=%lx\n", - prefix, facility, regs->nip, regs->msr); + hv ? "Hypervisor " : "", facility, regs->nip, regs->msr); if (user_mode(regs)) { _exception(SIGILL, regs, ILL_ILLOPC, regs->nip); @@ -1327,6 +1336,7 @@ void facility_unavailable_exception(struct pt_regs *regs) die("Unexpected facility unavailable exception", regs, SIGABRT); } +#endif #ifdef CONFIG_PPC_TRANSACTIONAL_MEM From 80737512591c7b56b6e66e17d0b373218b1428b0 Mon Sep 17 00:00:00 2001 From: Michael Neuling Date: Fri, 9 Aug 2013 17:29:30 +1000 Subject: [PATCH 0487/1368] powerpc: Save the TAR register earlier commit c2d52644e2da8a07ecab5ca62dd0bc563089e8dc upstream. This moves us to save the Target Address Register (TAR) a earlier in __switch_to. It introduces a new function save_tar() to do this. We need to save the TAR earlier as we will overwrite it in the transactional memory reclaim/recheckpoint path. We are going to do this in a subsequent patch which will fix saving the TAR register when it's modified inside a transaction. Signed-off-by: Michael Neuling Signed-off-by: Benjamin Herrenschmidt Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/include/asm/switch_to.h | 9 +++++++++ arch/powerpc/kernel/entry_64.S | 9 --------- arch/powerpc/kernel/process.c | 10 ++++++++++ 3 files changed, 19 insertions(+), 9 deletions(-) diff --git a/arch/powerpc/include/asm/switch_to.h b/arch/powerpc/include/asm/switch_to.h index 200d763a0a67..685ecc86aa8b 100644 --- a/arch/powerpc/include/asm/switch_to.h +++ b/arch/powerpc/include/asm/switch_to.h @@ -15,6 +15,15 @@ extern struct task_struct *__switch_to(struct task_struct *, struct thread_struct; extern struct task_struct *_switch(struct thread_struct *prev, struct thread_struct *next); +#ifdef CONFIG_PPC_BOOK3S_64 +static inline void save_tar(struct thread_struct *prev) +{ + if (cpu_has_feature(CPU_FTR_ARCH_207S)) + prev->tar = mfspr(SPRN_TAR); +} +#else +static inline void save_tar(struct thread_struct *prev) {} +#endif extern void giveup_fpu(struct task_struct *); extern void load_up_fpu(void); diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S index 48816ed66cf5..38847767012d 100644 --- a/arch/powerpc/kernel/entry_64.S +++ b/arch/powerpc/kernel/entry_64.S @@ -449,15 +449,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_DSCR) #ifdef CONFIG_PPC_BOOK3S_64 BEGIN_FTR_SECTION - /* - * Back up the TAR across context switches. Note that the TAR is not - * available for use in the kernel. (To provide this, the TAR should - * be backed up/restored on exception entry/exit instead, and be in - * pt_regs. FIXME, this should be in pt_regs anyway (for debug).) - */ - mfspr r0,SPRN_TAR - std r0,THREAD_TAR(r3) - /* Event based branch registers */ mfspr r0, SPRN_BESCR std r0, THREAD_BESCR(r3) diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index 076d1242507a..7baa27b7abbe 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -600,6 +600,16 @@ struct task_struct *__switch_to(struct task_struct *prev, struct ppc64_tlb_batch *batch; #endif + /* Back up the TAR across context switches. + * Note that the TAR is not available for use in the kernel. (To + * provide this, the TAR should be backed up/restored on exception + * entry/exit instead, and be in pt_regs. FIXME, this should be in + * pt_regs anyway (for debug).) + * Save the TAR here before we do treclaim/trecheckpoint as these + * will change the TAR. + */ + save_tar(&prev->thread); + __switch_to_tm(prev); #ifdef CONFIG_SMP From ab9530578e03d7fc00f41f4c406d213d81579de0 Mon Sep 17 00:00:00 2001 From: Michael Neuling Date: Fri, 9 Aug 2013 17:29:31 +1000 Subject: [PATCH 0488/1368] powerpc/tm: Fix context switching TAR, PPR and DSCR SPRs commit 28e61cc466d8daace4b0f04ba2b83e0bd68f5832 upstream. If a transaction is rolled back, the Target Address Register (TAR), Processor Priority Register (PPR) and Data Stream Control Register (DSCR) should be restored to the checkpointed values before the transaction began. Any changes to these SPRs inside the transaction should not be visible in the abort handler. Currently Linux doesn't save or restore the checkpointed TAR, PPR or DSCR. If we preempt a processes inside a transaction which has modified any of these, on process restore, that same transaction may be aborted we but we won't see the checkpointed versions of these SPRs. This adds checkpointed versions of these SPRs to the thread_struct and adds the save/restore of these three SPRs to the treclaim/trechkpt code. Without this if any of these SPRs are modified during a transaction, users may incorrectly see a speculated SPR value even if the transaction is aborted. Signed-off-by: Michael Neuling Signed-off-by: Benjamin Herrenschmidt Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/include/asm/processor.h | 4 ++++ arch/powerpc/kernel/asm-offsets.c | 3 +++ arch/powerpc/kernel/tm.S | 20 ++++++++++++++++++++ 3 files changed, 27 insertions(+) diff --git a/arch/powerpc/include/asm/processor.h b/arch/powerpc/include/asm/processor.h index 14a658363698..419e7125cce2 100644 --- a/arch/powerpc/include/asm/processor.h +++ b/arch/powerpc/include/asm/processor.h @@ -247,6 +247,10 @@ struct thread_struct { unsigned long tm_orig_msr; /* Thread's MSR on ctx switch */ struct pt_regs ckpt_regs; /* Checkpointed registers */ + unsigned long tm_tar; + unsigned long tm_ppr; + unsigned long tm_dscr; + /* * Transactional FP and VSX 0-31 register set. * NOTE: the sense of these is the opposite of the integer ckpt_regs! diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index 6f16ffafa6f0..302886b77de2 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -139,6 +139,9 @@ int main(void) DEFINE(THREAD_TM_TFHAR, offsetof(struct thread_struct, tm_tfhar)); DEFINE(THREAD_TM_TEXASR, offsetof(struct thread_struct, tm_texasr)); DEFINE(THREAD_TM_TFIAR, offsetof(struct thread_struct, tm_tfiar)); + DEFINE(THREAD_TM_TAR, offsetof(struct thread_struct, tm_tar)); + DEFINE(THREAD_TM_PPR, offsetof(struct thread_struct, tm_ppr)); + DEFINE(THREAD_TM_DSCR, offsetof(struct thread_struct, tm_dscr)); DEFINE(PT_CKPT_REGS, offsetof(struct thread_struct, ckpt_regs)); DEFINE(THREAD_TRANSACT_VR0, offsetof(struct thread_struct, transact_vr[0])); diff --git a/arch/powerpc/kernel/tm.S b/arch/powerpc/kernel/tm.S index 2da67e7a16d5..1edd6c2a168d 100644 --- a/arch/powerpc/kernel/tm.S +++ b/arch/powerpc/kernel/tm.S @@ -224,6 +224,16 @@ dont_backup_fp: std r5, _CCR(r7) std r6, _XER(r7) + + /* ******************** TAR, PPR, DSCR ********** */ + mfspr r3, SPRN_TAR + mfspr r4, SPRN_PPR + mfspr r5, SPRN_DSCR + + std r3, THREAD_TM_TAR(r12) + std r4, THREAD_TM_PPR(r12) + std r5, THREAD_TM_DSCR(r12) + /* MSR and flags: We don't change CRs, and we don't need to alter * MSR. */ @@ -338,6 +348,16 @@ dont_restore_fp: mtmsr r6 /* FP/Vec off again! */ restore_gprs: + + /* ******************** TAR, PPR, DSCR ********** */ + ld r4, THREAD_TM_TAR(r3) + ld r5, THREAD_TM_PPR(r3) + ld r6, THREAD_TM_DSCR(r3) + + mtspr SPRN_TAR, r4 + mtspr SPRN_PPR, r5 + mtspr SPRN_DSCR, r6 + /* ******************** CR,LR,CCR,MSR ********** */ ld r3, _CTR(r7) ld r4, _LINK(r7) From 9aff080cc65e4eb6af8909d2efc1dc0b72fbbc9c Mon Sep 17 00:00:00 2001 From: Eric Sandeen Date: Fri, 26 Jul 2013 15:21:11 -0400 Subject: [PATCH 0489/1368] ext4: destroy ext4_es_cachep on module unload commit dd12ed144e9797094c04736f97aa27d5fe401476 upstream. Without this, module can't be reloaded. [ 500.521980] kmem_cache_sanity_check (ext4_extent_status): Cache name already exists. Signed-off-by: Eric Sandeen Signed-off-by: "Theodore Ts'o" Signed-off-by: Greg Kroah-Hartman --- fs/ext4/super.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 6681c037b99d..2bf2f33107d0 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -5400,6 +5400,7 @@ static void __exit ext4_exit_fs(void) kset_unregister(ext4_kset); ext4_exit_system_zone(); ext4_exit_pageio(); + ext4_exit_es(); } MODULE_AUTHOR("Remy Card, Stephen Tweedie, Andrew Morton, Andreas Dilger, Theodore Ts'o and others"); From dc953e8985b02cf8952eca4214604f6bd20f1d86 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Fri, 26 Jul 2013 15:15:46 -0400 Subject: [PATCH 0490/1368] ext4: make sure group number is bumped after a inode allocation race commit a34eb503742fd25155fd6cff6163daacead9fbc3 upstream. When we try to allocate an inode, and there is a race between two CPU's trying to grab the same inode, _and_ this inode is the last free inode in the block group, make sure the group number is bumped before we continue searching the rest of the block groups. Otherwise, we end up searching the current block group twice, and we end up skipping searching the last block group. So in the unlikely situation where almost all of the inodes are allocated, it's possible that we will return ENOSPC even though there might be free inodes in that last block group. Signed-off-by: "Theodore Ts'o" Signed-off-by: Greg Kroah-Hartman --- fs/ext4/ialloc.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c index 00a818d67b54..3da3bf1b2cd0 100644 --- a/fs/ext4/ialloc.c +++ b/fs/ext4/ialloc.c @@ -734,11 +734,8 @@ struct inode *__ext4_new_inode(handle_t *handle, struct inode *dir, ino = ext4_find_next_zero_bit((unsigned long *) inode_bitmap_bh->b_data, EXT4_INODES_PER_GROUP(sb), ino); - if (ino >= EXT4_INODES_PER_GROUP(sb)) { - if (++group == ngroups) - group = 0; - continue; - } + if (ino >= EXT4_INODES_PER_GROUP(sb)) + goto next_group; if (group == 0 && (ino+1) < EXT4_FIRST_INO(sb)) { ext4_error(sb, "reserved inode found cleared - " "inode=%lu", ino + 1); @@ -768,6 +765,9 @@ struct inode *__ext4_new_inode(handle_t *handle, struct inode *dir, goto got; /* we grabbed the inode! */ if (ino < EXT4_INODES_PER_GROUP(sb)) goto repeat_in_this_group; +next_group: + if (++group == ngroups) + group = 0; } err = -ENOSPC; goto out; From c7039e94cecff031efbdc8ccf7e316c1c6130971 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Mon, 29 Jul 2013 12:12:56 -0400 Subject: [PATCH 0491/1368] ext4: fix retry handling in ext4_ext_truncate() commit 94eec0fc3520c759831763d866421b4d60b599b4 upstream. We tested for ENOMEM instead of -ENOMEM. Oops. Signed-off-by: "Theodore Ts'o" Signed-off-by: Greg Kroah-Hartman --- fs/ext4/extents.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index fddf3d957004..dc1e03047226 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -4389,7 +4389,7 @@ void ext4_ext_truncate(handle_t *handle, struct inode *inode) retry: err = ext4_es_remove_extent(inode, last_block, EXT_MAX_BLOCKS - last_block); - if (err == ENOMEM) { + if (err == -ENOMEM) { cond_resched(); congestion_wait(BLK_RW_ASYNC, HZ/50); goto retry; From 83b83227df02fbabf583c902fc6d55ebab47139a Mon Sep 17 00:00:00 2001 From: Lars-Peter Clausen Date: Mon, 5 Aug 2013 11:21:29 +0200 Subject: [PATCH 0492/1368] regmap: cache: Make sure to sync the last register in a block commit 2d49b5987561e480bdbd8692b27fc5f49a1e2f0b upstream. regcache_sync_block_raw_flush() expects the address of the register after last register that needs to be synced as its parameter. But the last call to regcache_sync_block_raw_flush() in regcache_sync_block_raw() passes the address of the last register in the block. This effectively always skips over the last register in a block, even if it needs to be synced. In order to fix it increase the address by one register. The issue was introduced in commit 75a5f89 ("regmap: cache: Write consecutive registers in a single block write"). Signed-off-by: Lars-Peter Clausen Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- drivers/base/regmap/regcache.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/base/regmap/regcache.c b/drivers/base/regmap/regcache.c index 507ee2da0f6e..46283fd3c4c0 100644 --- a/drivers/base/regmap/regcache.c +++ b/drivers/base/regmap/regcache.c @@ -644,7 +644,8 @@ static int regcache_sync_block_raw(struct regmap *map, void *block, } } - return regcache_sync_block_raw_flush(map, &data, base, regtmp); + return regcache_sync_block_raw_flush(map, &data, base, regtmp + + map->reg_stride); } int regcache_sync_block(struct regmap *map, void *block, From 9300766810ffff36c2db8efc2fe7c7a6c0469129 Mon Sep 17 00:00:00 2001 From: Mateusz Krawczuk Date: Tue, 6 Aug 2013 18:34:40 +0200 Subject: [PATCH 0493/1368] regmap: Add missing header for !CONFIG_REGMAP stubs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 49ccc142f9cbc33fdda18e8fa90c1c5b4a79c0ad upstream. regmap.h requires linux/err.h if CONFIG_REGMAP is not defined. Without it I get error. CC drivers/media/platform/exynos4-is/fimc-reg.o In file included from drivers/media/platform/exynos4-is/fimc-reg.c:14:0: include/linux/regmap.h: In function ‘regmap_write’: include/linux/regmap.h:525:10: error: ‘EINVAL’ undeclared (first use in this function) include/linux/regmap.h:525:10: note: each undeclared identifier is reported only once for each function it appears in Signed-off-by: Mateusz Krawczuk Signed-off-by: Kyungmin Park Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- include/linux/regmap.h | 1 + 1 file changed, 1 insertion(+) diff --git a/include/linux/regmap.h b/include/linux/regmap.h index 02d84e24b7c2..f91bb416122d 100644 --- a/include/linux/regmap.h +++ b/include/linux/regmap.h @@ -15,6 +15,7 @@ #include #include +#include struct module; struct device; From 334428c3b3ad6e9007e370dc59dc1889abaa2222 Mon Sep 17 00:00:00 2001 From: Curt Brune Date: Thu, 8 Aug 2013 12:11:03 -0700 Subject: [PATCH 0494/1368] hwmon: (adt7470) Fix incorrect return code check commit 93d783bcca69bfacc8dc739d8a050498402587b5 upstream. In adt7470_write_word_data(), which writes two bytes using i2c_smbus_write_byte_data(), the return codes are incorrectly AND-ed together when they should be OR-ed together. The return code of i2c_smbus_write_byte_data() is zero for success. The upshot is only the first byte was ever written to the hardware. The 2nd byte was never written out. I noticed that trying to set the fan speed limits was not working correctly on my system. Setting the fan speed limits is the only code that uses adt7470_write_word_data(). After making the change the limit settings work and the alarms work also. Signed-off-by: Curt Brune Signed-off-by: Guenter Roeck Signed-off-by: Greg Kroah-Hartman --- drivers/hwmon/adt7470.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/hwmon/adt7470.c b/drivers/hwmon/adt7470.c index b83bf4bb95eb..58637355c1f6 100644 --- a/drivers/hwmon/adt7470.c +++ b/drivers/hwmon/adt7470.c @@ -215,7 +215,7 @@ static inline int adt7470_write_word_data(struct i2c_client *client, u8 reg, u16 value) { return i2c_smbus_write_byte_data(client, reg, value & 0xFF) - && i2c_smbus_write_byte_data(client, reg + 1, value >> 8); + || i2c_smbus_write_byte_data(client, reg + 1, value >> 8); } static void adt7470_init_client(struct i2c_client *client) From 65967f15aa05ea23cd4fcd58db6ef4ef690982af Mon Sep 17 00:00:00 2001 From: Piotr Sarna Date: Mon, 29 Jul 2013 12:25:20 +0200 Subject: [PATCH 0495/1368] staging: zcache: fix "zcache=" kernel parameter commit 02073798a6b081bf74e6c10d6f7e7a693c067ecd upstream. Commit 835f2f5 ("staging: zcache: enable zcache to be built/loaded as a module") introduced an incorrect handling of "zcache=" parameter. Inside zcache_comp_init() function, zcache_comp_name variable is checked for being empty. If not empty, the above variable is tested for being compatible with Crypto API. Unfortunately, after that function ends unconditionally (by the "goto out" directive) and returns: - non-zero value if verification succeeded, wrongly indicating an error - zero value if verification failed, falsely informing that function zcache_comp_init() ended properly. A solution to this problem is as following: 1. Move the "goto out" directive inside the "if (!ret)" statement 2. In case that crypto_has_comp() returned 0, change the value of ret to non-zero before "goto out" to indicate an error. This patch replaces an earlier one from Michal Hocko (based on report from Cristian Rodriguez): http://permalink.gmane.org/gmane.linux.kernel.mm/102484 It also addressed the same issue but didn't fix the zcache_comp_init() for case when the compressor data passed to "zcache=" option was invalid or unsupported. Signed-off-by: Piotr Sarna [bzolnier: updated patch description] Acked-by: Bartlomiej Zolnierkiewicz Signed-off-by: Kyungmin Park Acked-by: Konrad Rzeszutek Wilk Acked-by: Michal Hocko Cc: Cristian Rodriguez Cc: Bob Liu Signed-off-by: Greg Kroah-Hartman --- drivers/staging/zcache/zcache-main.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/staging/zcache/zcache-main.c b/drivers/staging/zcache/zcache-main.c index dcceed29d31a..81972fa47beb 100644 --- a/drivers/staging/zcache/zcache-main.c +++ b/drivers/staging/zcache/zcache-main.c @@ -1811,10 +1811,12 @@ static int zcache_comp_init(void) #else if (*zcache_comp_name != '\0') { ret = crypto_has_comp(zcache_comp_name, 0, 0); - if (!ret) + if (!ret) { pr_info("zcache: %s not supported\n", zcache_comp_name); - goto out; + ret = 1; + goto out; + } } if (!ret) strcpy(zcache_comp_name, "lzo"); From 630d2243751d762ea9e1baabecd6536e9db8d0be Mon Sep 17 00:00:00 2001 From: Alban Browaeys Date: Tue, 16 Jul 2013 18:57:53 -0300 Subject: [PATCH 0496/1368] media: em28xx: fix assignment of the eeprom data MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit f813b5775b471b656382ae8f087bb34dc894261f upstream. Set the config structure pointer to the eeprom data pointer (data, here eedata dereferenced) not the pointer to the pointer to the eeprom data (eedata itself). Signed-off-by: Alban Browaeys Signed-off-by: Frank Schäfer Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Greg Kroah-Hartman --- drivers/media/usb/em28xx/em28xx-i2c.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/media/usb/em28xx/em28xx-i2c.c b/drivers/media/usb/em28xx/em28xx-i2c.c index 4851cc2e4a4d..c4ff9739a7ae 100644 --- a/drivers/media/usb/em28xx/em28xx-i2c.c +++ b/drivers/media/usb/em28xx/em28xx-i2c.c @@ -726,7 +726,7 @@ static int em28xx_i2c_eeprom(struct em28xx *dev, unsigned bus, *eedata = data; *eedata_len = len; - dev_config = (void *)eedata; + dev_config = (void *)*eedata; switch (le16_to_cpu(dev_config->chip_conf) >> 4 & 0x3) { case 0: From 060a7c36d7ba82d7f520d7f328db5d38e99c20a4 Mon Sep 17 00:00:00 2001 From: Fabio Estevam Date: Mon, 1 Jul 2013 18:14:21 -0300 Subject: [PATCH 0497/1368] i2c: i2c-mxs: Use DMA mode even for small transfers commit d6e102f498cbcc8dd2e36721a01213f036397112 upstream. Recently we have been seing some reports about PIO mode not working properly. - http://www.spinics.net/lists/linux-i2c/msg11985.html - http://marc.info/?l=linux-i2c&m=137235593101385&w=2 - https://lkml.org/lkml/2013/6/24/430 Let's use DMA mode even for small transfers. Without this patch, i2c reads the incorrect sgtl5000 version on a mx28evk when touchscreen is enabled: [ 5.856270] sgtl5000 0-000a: Device with ID register 0 is not a sgtl5000 [ 9.877307] sgtl5000 0-000a: ASoC: failed to probe CODEC -19 [ 9.883528] mxs-sgtl5000 sound.12: ASoC: failed to instantiate card -19 [ 9.892955] mxs-sgtl5000 sound.12: snd_soc_register_card failed (-19) [wsa: we have a proper solution for -next, so this non intrusive solution is OK for now] Signed-off-by: Fabio Estevam Acked-by: Shawn Guo Acked-by: Lucas Stach Acked-by: Marek Vasut Signed-off-by: Wolfram Sang Signed-off-by: Greg Kroah-Hartman --- drivers/i2c/busses/i2c-mxs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/i2c/busses/i2c-mxs.c b/drivers/i2c/busses/i2c-mxs.c index 2039f230482d..6d8094d44987 100644 --- a/drivers/i2c/busses/i2c-mxs.c +++ b/drivers/i2c/busses/i2c-mxs.c @@ -494,7 +494,7 @@ static int mxs_i2c_xfer_msg(struct i2c_adapter *adap, struct i2c_msg *msg, * based on this empirical measurement and a lot of previous frobbing. */ i2c->cmd_err = 0; - if (msg->len < 8) { + if (0) { /* disable PIO mode until a proper fix is made */ ret = mxs_i2c_pio_setup_xfer(adap, msg, flags); if (ret) mxs_i2c_reset(i2c); From 4b0be00599c98bb20e0396f41f47a593e1c9d4ea Mon Sep 17 00:00:00 2001 From: Aaro Koskinen Date: Mon, 5 Aug 2013 21:27:12 +0300 Subject: [PATCH 0498/1368] cpufreq: loongson2: fix regression related to clock management commit f54fe64d14dff3df6d45a48115d248a82557811f upstream. Commit 42913c799 (MIPS: Loongson2: Use clk API instead of direct dereferences) broke the cpufreq functionality on Loongson2 boards: clk_set_rate() is called before the CPU frequency table is initialized, and therefore will always fail. Fix by moving the clk_set_rate() after the table initialization. Tested on Lemote FuLoong mini-PC. Signed-off-by: Aaro Koskinen Acked-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki Signed-off-by: Greg Kroah-Hartman --- drivers/cpufreq/loongson2_cpufreq.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/drivers/cpufreq/loongson2_cpufreq.c b/drivers/cpufreq/loongson2_cpufreq.c index d53912768946..f92b02ae20be 100644 --- a/drivers/cpufreq/loongson2_cpufreq.c +++ b/drivers/cpufreq/loongson2_cpufreq.c @@ -118,11 +118,6 @@ static int loongson2_cpufreq_cpu_init(struct cpufreq_policy *policy) clk_put(cpuclk); return -EINVAL; } - ret = clk_set_rate(cpuclk, rate); - if (ret) { - clk_put(cpuclk); - return ret; - } /* clock table init */ for (i = 2; @@ -130,6 +125,12 @@ static int loongson2_cpufreq_cpu_init(struct cpufreq_policy *policy) i++) loongson2_clockmod_table[i].frequency = (rate * i) / 8; + ret = clk_set_rate(cpuclk, rate); + if (ret) { + clk_put(cpuclk); + return ret; + } + policy->cur = loongson2_cpufreq_get(policy->cpu); cpufreq_frequency_table_get_attr(&loongson2_clockmod_table[0], From da712f3a8c493986b19fd0863b45607c34061ba6 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Mon, 5 Aug 2013 12:28:02 +0530 Subject: [PATCH 0499/1368] cpufreq: rename ignore_nice as ignore_nice_load commit 6c4640c3adfd97ce10efed7c07405f52d002b9a8 upstream. This sysfs file was called ignore_nice_load earlier and commit 4d5dcc4 (cpufreq: governor: Implement per policy instances of governors) changed its name to ignore_nice by mistake. Lets get it renamed back to its original name. Reported-by: Martin von Gagern Signed-off-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki Signed-off-by: Greg Kroah-Hartman --- drivers/cpufreq/cpufreq_conservative.c | 20 ++++++++++---------- drivers/cpufreq/cpufreq_governor.c | 8 ++++---- drivers/cpufreq/cpufreq_governor.h | 4 ++-- drivers/cpufreq/cpufreq_ondemand.c | 20 ++++++++++---------- 4 files changed, 26 insertions(+), 26 deletions(-) diff --git a/drivers/cpufreq/cpufreq_conservative.c b/drivers/cpufreq/cpufreq_conservative.c index 0ceb2eff5a7e..f97cb3d8c5a2 100644 --- a/drivers/cpufreq/cpufreq_conservative.c +++ b/drivers/cpufreq/cpufreq_conservative.c @@ -221,8 +221,8 @@ static ssize_t store_down_threshold(struct dbs_data *dbs_data, const char *buf, return count; } -static ssize_t store_ignore_nice(struct dbs_data *dbs_data, const char *buf, - size_t count) +static ssize_t store_ignore_nice_load(struct dbs_data *dbs_data, + const char *buf, size_t count) { struct cs_dbs_tuners *cs_tuners = dbs_data->tuners; unsigned int input, j; @@ -235,10 +235,10 @@ static ssize_t store_ignore_nice(struct dbs_data *dbs_data, const char *buf, if (input > 1) input = 1; - if (input == cs_tuners->ignore_nice) /* nothing to do */ + if (input == cs_tuners->ignore_nice_load) /* nothing to do */ return count; - cs_tuners->ignore_nice = input; + cs_tuners->ignore_nice_load = input; /* we need to re-evaluate prev_cpu_idle */ for_each_online_cpu(j) { @@ -246,7 +246,7 @@ static ssize_t store_ignore_nice(struct dbs_data *dbs_data, const char *buf, dbs_info = &per_cpu(cs_cpu_dbs_info, j); dbs_info->cdbs.prev_cpu_idle = get_cpu_idle_time(j, &dbs_info->cdbs.prev_cpu_wall, 0); - if (cs_tuners->ignore_nice) + if (cs_tuners->ignore_nice_load) dbs_info->cdbs.prev_cpu_nice = kcpustat_cpu(j).cpustat[CPUTIME_NICE]; } @@ -279,7 +279,7 @@ show_store_one(cs, sampling_rate); show_store_one(cs, sampling_down_factor); show_store_one(cs, up_threshold); show_store_one(cs, down_threshold); -show_store_one(cs, ignore_nice); +show_store_one(cs, ignore_nice_load); show_store_one(cs, freq_step); declare_show_sampling_rate_min(cs); @@ -287,7 +287,7 @@ gov_sys_pol_attr_rw(sampling_rate); gov_sys_pol_attr_rw(sampling_down_factor); gov_sys_pol_attr_rw(up_threshold); gov_sys_pol_attr_rw(down_threshold); -gov_sys_pol_attr_rw(ignore_nice); +gov_sys_pol_attr_rw(ignore_nice_load); gov_sys_pol_attr_rw(freq_step); gov_sys_pol_attr_ro(sampling_rate_min); @@ -297,7 +297,7 @@ static struct attribute *dbs_attributes_gov_sys[] = { &sampling_down_factor_gov_sys.attr, &up_threshold_gov_sys.attr, &down_threshold_gov_sys.attr, - &ignore_nice_gov_sys.attr, + &ignore_nice_load_gov_sys.attr, &freq_step_gov_sys.attr, NULL }; @@ -313,7 +313,7 @@ static struct attribute *dbs_attributes_gov_pol[] = { &sampling_down_factor_gov_pol.attr, &up_threshold_gov_pol.attr, &down_threshold_gov_pol.attr, - &ignore_nice_gov_pol.attr, + &ignore_nice_load_gov_pol.attr, &freq_step_gov_pol.attr, NULL }; @@ -338,7 +338,7 @@ static int cs_init(struct dbs_data *dbs_data) tuners->up_threshold = DEF_FREQUENCY_UP_THRESHOLD; tuners->down_threshold = DEF_FREQUENCY_DOWN_THRESHOLD; tuners->sampling_down_factor = DEF_SAMPLING_DOWN_FACTOR; - tuners->ignore_nice = 0; + tuners->ignore_nice_load = 0; tuners->freq_step = DEF_FREQUENCY_STEP; dbs_data->tuners = tuners; diff --git a/drivers/cpufreq/cpufreq_governor.c b/drivers/cpufreq/cpufreq_governor.c index 5af40ad82d23..a86ff72141f3 100644 --- a/drivers/cpufreq/cpufreq_governor.c +++ b/drivers/cpufreq/cpufreq_governor.c @@ -91,9 +91,9 @@ void dbs_check_cpu(struct dbs_data *dbs_data, int cpu) unsigned int j; if (dbs_data->cdata->governor == GOV_ONDEMAND) - ignore_nice = od_tuners->ignore_nice; + ignore_nice = od_tuners->ignore_nice_load; else - ignore_nice = cs_tuners->ignore_nice; + ignore_nice = cs_tuners->ignore_nice_load; policy = cdbs->cur_policy; @@ -336,12 +336,12 @@ int cpufreq_governor_dbs(struct cpufreq_policy *policy, cs_tuners = dbs_data->tuners; cs_dbs_info = dbs_data->cdata->get_cpu_dbs_info_s(cpu); sampling_rate = cs_tuners->sampling_rate; - ignore_nice = cs_tuners->ignore_nice; + ignore_nice = cs_tuners->ignore_nice_load; } else { od_tuners = dbs_data->tuners; od_dbs_info = dbs_data->cdata->get_cpu_dbs_info_s(cpu); sampling_rate = od_tuners->sampling_rate; - ignore_nice = od_tuners->ignore_nice; + ignore_nice = od_tuners->ignore_nice_load; od_ops = dbs_data->cdata->gov_ops; io_busy = od_tuners->io_is_busy; } diff --git a/drivers/cpufreq/cpufreq_governor.h b/drivers/cpufreq/cpufreq_governor.h index e16a96130cb3..0d9e6befe1d5 100644 --- a/drivers/cpufreq/cpufreq_governor.h +++ b/drivers/cpufreq/cpufreq_governor.h @@ -165,7 +165,7 @@ struct cs_cpu_dbs_info_s { /* Per policy Governers sysfs tunables */ struct od_dbs_tuners { - unsigned int ignore_nice; + unsigned int ignore_nice_load; unsigned int sampling_rate; unsigned int sampling_down_factor; unsigned int up_threshold; @@ -175,7 +175,7 @@ struct od_dbs_tuners { }; struct cs_dbs_tuners { - unsigned int ignore_nice; + unsigned int ignore_nice_load; unsigned int sampling_rate; unsigned int sampling_down_factor; unsigned int up_threshold; diff --git a/drivers/cpufreq/cpufreq_ondemand.c b/drivers/cpufreq/cpufreq_ondemand.c index 93eb5cbcc1f6..c087347d6688 100644 --- a/drivers/cpufreq/cpufreq_ondemand.c +++ b/drivers/cpufreq/cpufreq_ondemand.c @@ -403,8 +403,8 @@ static ssize_t store_sampling_down_factor(struct dbs_data *dbs_data, return count; } -static ssize_t store_ignore_nice(struct dbs_data *dbs_data, const char *buf, - size_t count) +static ssize_t store_ignore_nice_load(struct dbs_data *dbs_data, + const char *buf, size_t count) { struct od_dbs_tuners *od_tuners = dbs_data->tuners; unsigned int input; @@ -419,10 +419,10 @@ static ssize_t store_ignore_nice(struct dbs_data *dbs_data, const char *buf, if (input > 1) input = 1; - if (input == od_tuners->ignore_nice) { /* nothing to do */ + if (input == od_tuners->ignore_nice_load) { /* nothing to do */ return count; } - od_tuners->ignore_nice = input; + od_tuners->ignore_nice_load = input; /* we need to re-evaluate prev_cpu_idle */ for_each_online_cpu(j) { @@ -430,7 +430,7 @@ static ssize_t store_ignore_nice(struct dbs_data *dbs_data, const char *buf, dbs_info = &per_cpu(od_cpu_dbs_info, j); dbs_info->cdbs.prev_cpu_idle = get_cpu_idle_time(j, &dbs_info->cdbs.prev_cpu_wall, od_tuners->io_is_busy); - if (od_tuners->ignore_nice) + if (od_tuners->ignore_nice_load) dbs_info->cdbs.prev_cpu_nice = kcpustat_cpu(j).cpustat[CPUTIME_NICE]; @@ -461,7 +461,7 @@ show_store_one(od, sampling_rate); show_store_one(od, io_is_busy); show_store_one(od, up_threshold); show_store_one(od, sampling_down_factor); -show_store_one(od, ignore_nice); +show_store_one(od, ignore_nice_load); show_store_one(od, powersave_bias); declare_show_sampling_rate_min(od); @@ -469,7 +469,7 @@ gov_sys_pol_attr_rw(sampling_rate); gov_sys_pol_attr_rw(io_is_busy); gov_sys_pol_attr_rw(up_threshold); gov_sys_pol_attr_rw(sampling_down_factor); -gov_sys_pol_attr_rw(ignore_nice); +gov_sys_pol_attr_rw(ignore_nice_load); gov_sys_pol_attr_rw(powersave_bias); gov_sys_pol_attr_ro(sampling_rate_min); @@ -478,7 +478,7 @@ static struct attribute *dbs_attributes_gov_sys[] = { &sampling_rate_gov_sys.attr, &up_threshold_gov_sys.attr, &sampling_down_factor_gov_sys.attr, - &ignore_nice_gov_sys.attr, + &ignore_nice_load_gov_sys.attr, &powersave_bias_gov_sys.attr, &io_is_busy_gov_sys.attr, NULL @@ -494,7 +494,7 @@ static struct attribute *dbs_attributes_gov_pol[] = { &sampling_rate_gov_pol.attr, &up_threshold_gov_pol.attr, &sampling_down_factor_gov_pol.attr, - &ignore_nice_gov_pol.attr, + &ignore_nice_load_gov_pol.attr, &powersave_bias_gov_pol.attr, &io_is_busy_gov_pol.attr, NULL @@ -544,7 +544,7 @@ static int od_init(struct dbs_data *dbs_data) } tuners->sampling_down_factor = DEF_SAMPLING_DOWN_FACTOR; - tuners->ignore_nice = 0; + tuners->ignore_nice_load = 0; tuners->powersave_bias = default_powersave_bias; tuners->io_is_busy = should_io_be_busy(); From b97f921b94b1e70d5de999fd820ccf8504dc6528 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Tue, 6 Aug 2013 02:26:22 +0200 Subject: [PATCH 0500/1368] ACPI / PM: Walk physical_node_list under physical_node_lock commit 623cf33cb055b1e81fa47e4fc16789b2c129e31e upstream. The list of physical devices corresponding to an ACPI device object is walked by acpi_system_wakeup_device_seq_show() and physical_device_enable_wakeup() without taking that object's physical_node_lock mutex. Since each of those functions may be run at any time as a result of a user space action, the lack of appropriate locking in them may lead to a kernel crash if that happens during device hot-add or hot-remove involving the device object in question. Fix the issue by modifying acpi_system_wakeup_device_seq_show() and physical_device_enable_wakeup() to use physical_node_lock as appropriate. Signed-off-by: Rafael J. Wysocki Signed-off-by: Greg Kroah-Hartman --- drivers/acpi/proc.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/acpi/proc.c b/drivers/acpi/proc.c index aa1227a7e3f2..04a13784dd20 100644 --- a/drivers/acpi/proc.c +++ b/drivers/acpi/proc.c @@ -311,6 +311,8 @@ acpi_system_wakeup_device_seq_show(struct seq_file *seq, void *offset) dev->pnp.bus_id, (u32) dev->wakeup.sleep_state); + mutex_lock(&dev->physical_node_lock); + if (!dev->physical_node_count) { seq_printf(seq, "%c%-8s\n", dev->wakeup.flags.run_wake ? '*' : ' ', @@ -338,6 +340,8 @@ acpi_system_wakeup_device_seq_show(struct seq_file *seq, void *offset) put_device(ldev); } } + + mutex_unlock(&dev->physical_node_lock); } mutex_unlock(&acpi_device_lock); return 0; @@ -347,12 +351,16 @@ static void physical_device_enable_wakeup(struct acpi_device *adev) { struct acpi_device_physical_node *entry; + mutex_lock(&adev->physical_node_lock); + list_for_each_entry(entry, &adev->physical_node_list, node) if (entry->dev && device_can_wakeup(entry->dev)) { bool enable = !device_may_wakeup(entry->dev); device_set_wakeup_enable(entry->dev, enable); } + + mutex_unlock(&adev->physical_node_lock); } static ssize_t From 2d7ddf0a8fa2cffded6b8785f16e0c08c66f5007 Mon Sep 17 00:00:00 2001 From: Andrew Vagin Date: Fri, 2 Aug 2013 21:16:43 +0400 Subject: [PATCH 0501/1368] tracing: Fix fields of struct trace_iterator that are zeroed by mistake commit ed5467da0e369e65b247b99eb6403cb79172bcda upstream. tracing_read_pipe zeros all fields bellow "seq". The declaration contains a comment about that, but it doesn't help. The first field is "snapshot", it's true when current open file is snapshot. Looks obvious, that it should not be zeroed. The second field is "started". It was converted from cpumask_t to cpumask_var_t (v2.6.28-4983-g4462344), in other words it was converted from cpumask to pointer on cpumask. Currently the reference on "started" memory is lost after the first read from tracing_read_pipe and a proper object will never be freed. The "started" is never dereferenced for trace_pipe, because trace_pipe can't have the TRACE_FILE_ANNOTATE options. Link: http://lkml.kernel.org/r/1375463803-3085183-1-git-send-email-avagin@openvz.org Signed-off-by: Andrew Vagin Signed-off-by: Steven Rostedt Signed-off-by: Greg Kroah-Hartman --- include/linux/ftrace_event.h | 10 ++++++---- kernel/trace/trace.c | 1 + 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h index 4372658c73ae..44cdc11b15c2 100644 --- a/include/linux/ftrace_event.h +++ b/include/linux/ftrace_event.h @@ -78,6 +78,11 @@ struct trace_iterator { /* trace_seq for __print_flags() and __print_symbolic() etc. */ struct trace_seq tmp_seq; + cpumask_var_t started; + + /* it's true when current open file is snapshot */ + bool snapshot; + /* The below is zeroed out in pipe_read */ struct trace_seq seq; struct trace_entry *ent; @@ -90,10 +95,7 @@ struct trace_iterator { loff_t pos; long idx; - cpumask_var_t started; - - /* it's true when current open file is snapshot */ - bool snapshot; + /* All new field here will be zeroed out in pipe_read */ }; enum trace_iter_flags { diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index f7bc3ce6eaf6..2a250a2aa028 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -4121,6 +4121,7 @@ tracing_read_pipe(struct file *filp, char __user *ubuf, memset(&iter->seq, 0, sizeof(struct trace_iterator) - offsetof(struct trace_iterator, seq)); + cpumask_clear(iter->started); iter->pos = -1; trace_event_read_lock(); From e25d45868ca209a7d13772ed313a99ea0f9194d6 Mon Sep 17 00:00:00 2001 From: Alexander Z Lam Date: Fri, 2 Aug 2013 18:36:15 -0700 Subject: [PATCH 0502/1368] tracing: Make TRACE_ITER_STOP_ON_FREE stop the correct buffer commit 711e124379e0f889e40e2f01d7f5d61936d3cd23 upstream. Releasing the free_buffer file in an instance causes the global buffer to be stopped when TRACE_ITER_STOP_ON_FREE is enabled. Operate on the correct buffer. Link: http://lkml.kernel.org/r/1375493777-17261-1-git-send-email-azl@google.com Signed-off-by: Alexander Z Lam Cc: Vaibhav Nagarnaik Cc: David Sharp Cc: Alexander Z Lam Signed-off-by: Steven Rostedt Signed-off-by: Greg Kroah-Hartman --- kernel/trace/trace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 2a250a2aa028..1226a98c9bef 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -4438,7 +4438,7 @@ tracing_free_buffer_release(struct inode *inode, struct file *filp) /* disable tracing ? */ if (trace_flags & TRACE_ITER_STOP_ON_FREE) - tracing_off(); + tracer_tracing_off(tr); /* resize the ring buffer to 0 */ tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS); From 2fd821ee4dd784a940d6840ebc85ecc8d7b80744 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Mon, 1 Jul 2013 15:58:24 -0400 Subject: [PATCH 0503/1368] tracing: Use flag buffer_disabled for irqsoff tracer commit 10246fa35d4ffdfe472185d4cbf9c2dfd9a9f023 upstream. If the ring buffer is disabled and the irqsoff tracer records a trace it will clear out its buffer and lose the data it had previously recorded. Currently there's a callback when writing to the tracing_of file, but if tracing is disabled via the function tracer trigger, it will not inform the irqsoff tracer to stop recording. By using the "mirror" flag (buffer_disabled) in the trace_array, that keeps track of the status of the trace_array's buffer, it gives the irqsoff tracer a fast way to know if it should record a new trace or not. The flag may be a little behind the real state of the buffer, but it should not affect the trace too much. It's more important for the irqsoff tracer to be fast. Reported-by: Dave Jones Signed-off-by: Steven Rostedt Signed-off-by: Greg Kroah-Hartman --- kernel/trace/trace.c | 101 ++++++++++++++++++++++++----------- kernel/trace/trace_irqsoff.c | 4 +- 2 files changed, 72 insertions(+), 33 deletions(-) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 1226a98c9bef..b2aebd62fd33 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -246,9 +246,24 @@ cycle_t ftrace_now(int cpu) return ts; } +/** + * tracing_is_enabled - Show if global_trace has been disabled + * + * Shows if the global trace has been enabled or not. It uses the + * mirror flag "buffer_disabled" to be used in fast paths such as for + * the irqsoff tracer. But it may be inaccurate due to races. If you + * need to know the accurate state, use tracing_is_on() which is a little + * slower, but accurate. + */ int tracing_is_enabled(void) { - return tracing_is_on(); + /* + * For quick access (irqsoff uses this in fast path), just + * return the mirror variable of the state of the ring buffer. + * It's a little racy, but we don't really care. + */ + smp_rmb(); + return !global_trace.buffer_disabled; } /* @@ -361,6 +376,23 @@ unsigned long trace_flags = TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK | TRACE_ITER_GRAPH_TIME | TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE | TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS | TRACE_ITER_FUNCTION; +void tracer_tracing_on(struct trace_array *tr) +{ + if (tr->trace_buffer.buffer) + ring_buffer_record_on(tr->trace_buffer.buffer); + /* + * This flag is looked at when buffers haven't been allocated + * yet, or by some tracers (like irqsoff), that just want to + * know if the ring buffer has been disabled, but it can handle + * races of where it gets disabled but we still do a record. + * As the check is in the fast path of the tracers, it is more + * important to be fast than accurate. + */ + tr->buffer_disabled = 0; + /* Make the flag seen by readers */ + smp_wmb(); +} + /** * tracing_on - enable tracing buffers * @@ -369,15 +401,7 @@ unsigned long trace_flags = TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK | */ void tracing_on(void) { - if (global_trace.trace_buffer.buffer) - ring_buffer_record_on(global_trace.trace_buffer.buffer); - /* - * This flag is only looked at when buffers haven't been - * allocated yet. We don't really care about the race - * between setting this flag and actually turning - * on the buffer. - */ - global_trace.buffer_disabled = 0; + tracer_tracing_on(&global_trace); } EXPORT_SYMBOL_GPL(tracing_on); @@ -571,6 +595,23 @@ void tracing_snapshot_alloc(void) EXPORT_SYMBOL_GPL(tracing_snapshot_alloc); #endif /* CONFIG_TRACER_SNAPSHOT */ +void tracer_tracing_off(struct trace_array *tr) +{ + if (tr->trace_buffer.buffer) + ring_buffer_record_off(tr->trace_buffer.buffer); + /* + * This flag is looked at when buffers haven't been allocated + * yet, or by some tracers (like irqsoff), that just want to + * know if the ring buffer has been disabled, but it can handle + * races of where it gets disabled but we still do a record. + * As the check is in the fast path of the tracers, it is more + * important to be fast than accurate. + */ + tr->buffer_disabled = 1; + /* Make the flag seen by readers */ + smp_wmb(); +} + /** * tracing_off - turn off tracing buffers * @@ -581,26 +622,29 @@ EXPORT_SYMBOL_GPL(tracing_snapshot_alloc); */ void tracing_off(void) { - if (global_trace.trace_buffer.buffer) - ring_buffer_record_off(global_trace.trace_buffer.buffer); - /* - * This flag is only looked at when buffers haven't been - * allocated yet. We don't really care about the race - * between setting this flag and actually turning - * on the buffer. - */ - global_trace.buffer_disabled = 1; + tracer_tracing_off(&global_trace); } EXPORT_SYMBOL_GPL(tracing_off); +/** + * tracer_tracing_is_on - show real state of ring buffer enabled + * @tr : the trace array to know if ring buffer is enabled + * + * Shows real state of the ring buffer if it is enabled or not. + */ +int tracer_tracing_is_on(struct trace_array *tr) +{ + if (tr->trace_buffer.buffer) + return ring_buffer_record_is_on(tr->trace_buffer.buffer); + return !tr->buffer_disabled; +} + /** * tracing_is_on - show state of ring buffers enabled */ int tracing_is_on(void) { - if (global_trace.trace_buffer.buffer) - return ring_buffer_record_is_on(global_trace.trace_buffer.buffer); - return !global_trace.buffer_disabled; + return tracer_tracing_is_on(&global_trace); } EXPORT_SYMBOL_GPL(tracing_is_on); @@ -4060,7 +4104,7 @@ static int tracing_wait_pipe(struct file *filp) * * iter->pos will be 0 if we haven't read anything. */ - if (!tracing_is_enabled() && iter->pos) + if (!tracing_is_on() && iter->pos) break; } @@ -5772,15 +5816,10 @@ rb_simple_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos) { struct trace_array *tr = filp->private_data; - struct ring_buffer *buffer = tr->trace_buffer.buffer; char buf[64]; int r; - if (buffer) - r = ring_buffer_record_is_on(buffer); - else - r = 0; - + r = tracer_tracing_is_on(tr); r = sprintf(buf, "%d\n", r); return simple_read_from_buffer(ubuf, cnt, ppos, buf, r); @@ -5802,11 +5841,11 @@ rb_simple_write(struct file *filp, const char __user *ubuf, if (buffer) { mutex_lock(&trace_types_lock); if (val) { - ring_buffer_record_on(buffer); + tracer_tracing_on(tr); if (tr->current_trace->start) tr->current_trace->start(tr); } else { - ring_buffer_record_off(buffer); + tracer_tracing_off(tr); if (tr->current_trace->stop) tr->current_trace->stop(tr); } diff --git a/kernel/trace/trace_irqsoff.c b/kernel/trace/trace_irqsoff.c index b19d065a28cb..2aefbee93a6d 100644 --- a/kernel/trace/trace_irqsoff.c +++ b/kernel/trace/trace_irqsoff.c @@ -373,7 +373,7 @@ start_critical_timing(unsigned long ip, unsigned long parent_ip) struct trace_array_cpu *data; unsigned long flags; - if (likely(!tracer_enabled)) + if (!tracer_enabled || !tracing_is_enabled()) return; cpu = raw_smp_processor_id(); @@ -416,7 +416,7 @@ stop_critical_timing(unsigned long ip, unsigned long parent_ip) else return; - if (!tracer_enabled) + if (!tracer_enabled || !tracing_is_enabled()) return; data = per_cpu_ptr(tr->trace_buffer.data, cpu); From e88512e77c0a22fc860cfb1100920cb1ca3705d3 Mon Sep 17 00:00:00 2001 From: Alexander Z Lam Date: Fri, 2 Aug 2013 18:36:16 -0700 Subject: [PATCH 0504/1368] tracing: Fix reset of time stamps during trace_clock changes commit 9457158bbc0ee04ecef76862d73eecd8076e9c7b upstream. Fixed two issues with changing the timestamp clock with trace_clock: - The global buffer was reset on instance clock changes. Change this to pass the correct per-instance buffer - ftrace_now() is used to set buf->time_start in tracing_reset_online_cpus(). This was incorrect because ftrace_now() used the global buffer's clock to return the current time. Change this to use buffer_ftrace_now() which returns the current time for the correct per-instance buffer. Also removed tracing_reset_current() because it is not used anywhere Link: http://lkml.kernel.org/r/1375493777-17261-2-git-send-email-azl@google.com Signed-off-by: Alexander Z Lam Cc: Vaibhav Nagarnaik Cc: David Sharp Cc: Alexander Z Lam Signed-off-by: Steven Rostedt Signed-off-by: Greg Kroah-Hartman --- kernel/trace/trace.c | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index b2aebd62fd33..06a5bcef373f 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -232,20 +232,25 @@ int filter_current_check_discard(struct ring_buffer *buffer, } EXPORT_SYMBOL_GPL(filter_current_check_discard); -cycle_t ftrace_now(int cpu) +cycle_t buffer_ftrace_now(struct trace_buffer *buf, int cpu) { u64 ts; /* Early boot up does not have a buffer yet */ - if (!global_trace.trace_buffer.buffer) + if (!buf->buffer) return trace_clock_local(); - ts = ring_buffer_time_stamp(global_trace.trace_buffer.buffer, cpu); - ring_buffer_normalize_time_stamp(global_trace.trace_buffer.buffer, cpu, &ts); + ts = ring_buffer_time_stamp(buf->buffer, cpu); + ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts); return ts; } +cycle_t ftrace_now(int cpu) +{ + return buffer_ftrace_now(&global_trace.trace_buffer, cpu); +} + /** * tracing_is_enabled - Show if global_trace has been disabled * @@ -1194,7 +1199,7 @@ void tracing_reset_online_cpus(struct trace_buffer *buf) /* Make sure all commits have finished */ synchronize_sched(); - buf->time_start = ftrace_now(buf->cpu); + buf->time_start = buffer_ftrace_now(buf, buf->cpu); for_each_online_cpu(cpu) ring_buffer_reset_cpu(buffer, cpu); @@ -1202,11 +1207,6 @@ void tracing_reset_online_cpus(struct trace_buffer *buf) ring_buffer_record_enable(buffer); } -void tracing_reset_current(int cpu) -{ - tracing_reset(&global_trace.trace_buffer, cpu); -} - /* Must have trace_types_lock held */ void tracing_reset_all_online_cpus(void) { @@ -4647,12 +4647,12 @@ static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf, * New clock may not be consistent with the previous clock. * Reset the buffer so that it doesn't have incomparable timestamps. */ - tracing_reset_online_cpus(&global_trace.trace_buffer); + tracing_reset_online_cpus(&tr->trace_buffer); #ifdef CONFIG_TRACER_MAX_TRACE if (tr->flags & TRACE_ARRAY_FL_GLOBAL && tr->max_buffer.buffer) ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func); - tracing_reset_online_cpus(&global_trace.max_buffer); + tracing_reset_online_cpus(&tr->max_buffer); #endif mutex_unlock(&trace_types_lock); From f004da5326d74b43d90ebf06699674378d4f0bb3 Mon Sep 17 00:00:00 2001 From: Clemens Ladisch Date: Thu, 8 Aug 2013 11:24:55 +0200 Subject: [PATCH 0505/1368] ALSA: usb-audio: do not trust too-big wMaxPacketSize values commit 57e6dae1087bbaa6b33d3dd8a8e90b63888939a3 upstream. The driver used to assume that the streaming endpoint's wMaxPacketSize value would be an indication of how much data the endpoint expects or sends, and compute the number of packets per URB using this value. However, the Focusrite Scarlett 2i4 declares a value of 1024 bytes, while only about 88 or 44 bytes are be actually used. This discrepancy would result in URBs with far too few packets, which would not work correctly on the EHCI driver. To get correct URBs, use wMaxPacketSize only as an upper limit on the packet size. Reported-by: James Stone Tested-by: James Stone Signed-off-by: Clemens Ladisch Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/usb/endpoint.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/sound/usb/endpoint.c b/sound/usb/endpoint.c index 7a444b5501d9..659950e5b94f 100644 --- a/sound/usb/endpoint.c +++ b/sound/usb/endpoint.c @@ -591,17 +591,16 @@ static int data_ep_set_params(struct snd_usb_endpoint *ep, ep->stride = frame_bits >> 3; ep->silence_value = pcm_format == SNDRV_PCM_FORMAT_U8 ? 0x80 : 0; - /* calculate max. frequency */ - if (ep->maxpacksize) { + /* assume max. frequency is 25% higher than nominal */ + ep->freqmax = ep->freqn + (ep->freqn >> 2); + maxsize = ((ep->freqmax + 0xffff) * (frame_bits >> 3)) + >> (16 - ep->datainterval); + /* but wMaxPacketSize might reduce this */ + if (ep->maxpacksize && ep->maxpacksize < maxsize) { /* whatever fits into a max. size packet */ maxsize = ep->maxpacksize; ep->freqmax = (maxsize / (frame_bits >> 3)) << (16 - ep->datainterval); - } else { - /* no max. packet size: just take 25% higher than nominal */ - ep->freqmax = ep->freqn + (ep->freqn >> 2); - maxsize = ((ep->freqmax + 0xffff) * (frame_bits >> 3)) - >> (16 - ep->datainterval); } if (ep->fill_max) From 34a24b3fe7ee2234682e3abcad906c8c9c6de9aa Mon Sep 17 00:00:00 2001 From: Jussi Kivilinna Date: Tue, 6 Aug 2013 14:53:24 +0300 Subject: [PATCH 0506/1368] ALSA: 6fire: fix DMA issues with URB transfer_buffer usage commit ddb6b5a964371e8e52e696b2b258bda144c8bd3f upstream. Patch fixes 6fire not to use stack as URB transfer_buffer. URB buffers need to be DMA-able, which stack is not. Furthermore, transfer_buffer should not be allocated as part of larger device structure because DMA coherency issues and patch fixes this issue too. Signed-off-by: Jussi Kivilinna Tested-by: Torsten Schenk Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/usb/6fire/comm.c | 38 +++++++++++++++++++++++++++++++++----- sound/usb/6fire/comm.h | 2 +- 2 files changed, 34 insertions(+), 6 deletions(-) diff --git a/sound/usb/6fire/comm.c b/sound/usb/6fire/comm.c index 9e6e3ffd86bb..23452ee617e1 100644 --- a/sound/usb/6fire/comm.c +++ b/sound/usb/6fire/comm.c @@ -110,19 +110,37 @@ static int usb6fire_comm_send_buffer(u8 *buffer, struct usb_device *dev) static int usb6fire_comm_write8(struct comm_runtime *rt, u8 request, u8 reg, u8 value) { - u8 buffer[13]; /* 13: maximum length of message */ + u8 *buffer; + int ret; + + /* 13: maximum length of message */ + buffer = kmalloc(13, GFP_KERNEL); + if (!buffer) + return -ENOMEM; usb6fire_comm_init_buffer(buffer, 0x00, request, reg, value, 0x00); - return usb6fire_comm_send_buffer(buffer, rt->chip->dev); + ret = usb6fire_comm_send_buffer(buffer, rt->chip->dev); + + kfree(buffer); + return ret; } static int usb6fire_comm_write16(struct comm_runtime *rt, u8 request, u8 reg, u8 vl, u8 vh) { - u8 buffer[13]; /* 13: maximum length of message */ + u8 *buffer; + int ret; + + /* 13: maximum length of message */ + buffer = kmalloc(13, GFP_KERNEL); + if (!buffer) + return -ENOMEM; usb6fire_comm_init_buffer(buffer, 0x00, request, reg, vl, vh); - return usb6fire_comm_send_buffer(buffer, rt->chip->dev); + ret = usb6fire_comm_send_buffer(buffer, rt->chip->dev); + + kfree(buffer); + return ret; } int usb6fire_comm_init(struct sfire_chip *chip) @@ -135,6 +153,12 @@ int usb6fire_comm_init(struct sfire_chip *chip) if (!rt) return -ENOMEM; + rt->receiver_buffer = kzalloc(COMM_RECEIVER_BUFSIZE, GFP_KERNEL); + if (!rt->receiver_buffer) { + kfree(rt); + return -ENOMEM; + } + urb = &rt->receiver; rt->serial = 1; rt->chip = chip; @@ -153,6 +177,7 @@ int usb6fire_comm_init(struct sfire_chip *chip) urb->interval = 1; ret = usb_submit_urb(urb, GFP_KERNEL); if (ret < 0) { + kfree(rt->receiver_buffer); kfree(rt); snd_printk(KERN_ERR PREFIX "cannot create comm data receiver."); return ret; @@ -171,6 +196,9 @@ void usb6fire_comm_abort(struct sfire_chip *chip) void usb6fire_comm_destroy(struct sfire_chip *chip) { - kfree(chip->comm); + struct comm_runtime *rt = chip->comm; + + kfree(rt->receiver_buffer); + kfree(rt); chip->comm = NULL; } diff --git a/sound/usb/6fire/comm.h b/sound/usb/6fire/comm.h index 6a0840b0dcff..780d5ed8e5d8 100644 --- a/sound/usb/6fire/comm.h +++ b/sound/usb/6fire/comm.h @@ -24,7 +24,7 @@ struct comm_runtime { struct sfire_chip *chip; struct urb receiver; - u8 receiver_buffer[COMM_RECEIVER_BUFSIZE]; + u8 *receiver_buffer; u8 serial; /* urb serial */ From 31a9bd79d76026a8c8cff63c6887483e2e766ee4 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Mon, 22 Jul 2013 12:54:30 -0400 Subject: [PATCH 0507/1368] Btrfs: release both paths before logging dir/changed extents commit f3b15ccdbb9a79781578249a63318805e55a6c34 upstream. The ceph guys tripped over this bug where we were still holding onto the original path that we used to copy the inode with when logging. This is based on Chris's fix which was reported to fix the problem. We need to drop the paths in two cases anyway so just move the drop up so that we don't have duplicate code. Thanks, Signed-off-by: Josef Bacik Signed-off-by: Chris Mason Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/tree-log.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index c276ac9a0ec3..cf68596b51fb 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -3728,8 +3728,9 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans, } log_extents: + btrfs_release_path(path); + btrfs_release_path(dst_path); if (fast_search) { - btrfs_release_path(dst_path); ret = btrfs_log_changed_extents(trans, root, inode, dst_path); if (ret) { err = ret; @@ -3746,8 +3747,6 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans, } if (inode_only == LOG_INODE_ALL && S_ISDIR(inode->i_mode)) { - btrfs_release_path(path); - btrfs_release_path(dst_path); ret = log_directory_changes(trans, root, inode, path, dst_path); if (ret) { err = ret; From 870bfc6b47ecf64845dbf8e5d7a09877998e1b69 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 5 Aug 2013 12:06:12 -0400 Subject: [PATCH 0508/1368] LOCKD: Don't call utsname()->nodename from nlmclnt_setlockargs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 9a1b6bf818e74bb7aabaecb59492b739f2f4d742 upstream. Firstly, nlmclnt_setlockargs can be called from a reclaimer thread, in which case we're in entirely the wrong namespace. Secondly, commit 8aac62706adaaf0fab02c4327761561c8bda9448 (move exit_task_namespaces() outside of exit_notify()) now means that exit_task_work() is called after exit_task_namespaces(), which triggers an Oops when we're freeing up the locks. Fix this by ensuring that we initialise the nlm_host's rpc_client at mount time, so that the cl_nodename field is initialised to the value of utsname()->nodename that the net namespace uses. Then replace the lockd callers of utsname()->nodename. Signed-off-by: Trond Myklebust Cc: Toralf Förster Cc: Oleg Nesterov Cc: Nix Cc: Jeff Layton Signed-off-by: Greg Kroah-Hartman --- fs/lockd/clntlock.c | 13 +++++++++---- fs/lockd/clntproc.c | 5 +++-- 2 files changed, 12 insertions(+), 6 deletions(-) diff --git a/fs/lockd/clntlock.c b/fs/lockd/clntlock.c index 01bfe7662751..41e491b8e5d7 100644 --- a/fs/lockd/clntlock.c +++ b/fs/lockd/clntlock.c @@ -64,12 +64,17 @@ struct nlm_host *nlmclnt_init(const struct nlmclnt_initdata *nlm_init) nlm_init->protocol, nlm_version, nlm_init->hostname, nlm_init->noresvport, nlm_init->net); - if (host == NULL) { - lockd_down(nlm_init->net); - return ERR_PTR(-ENOLCK); - } + if (host == NULL) + goto out_nohost; + if (host->h_rpcclnt == NULL && nlm_bind_host(host) == NULL) + goto out_nobind; return host; +out_nobind: + nlmclnt_release_host(host); +out_nohost: + lockd_down(nlm_init->net); + return ERR_PTR(-ENOLCK); } EXPORT_SYMBOL_GPL(nlmclnt_init); diff --git a/fs/lockd/clntproc.c b/fs/lockd/clntproc.c index 9760ecb9b60f..acd394716349 100644 --- a/fs/lockd/clntproc.c +++ b/fs/lockd/clntproc.c @@ -125,14 +125,15 @@ static void nlmclnt_setlockargs(struct nlm_rqst *req, struct file_lock *fl) { struct nlm_args *argp = &req->a_args; struct nlm_lock *lock = &argp->lock; + char *nodename = req->a_host->h_rpcclnt->cl_nodename; nlmclnt_next_cookie(&argp->cookie); memcpy(&lock->fh, NFS_FH(file_inode(fl->fl_file)), sizeof(struct nfs_fh)); - lock->caller = utsname()->nodename; + lock->caller = nodename; lock->oh.data = req->a_owner; lock->oh.len = snprintf(req->a_owner, sizeof(req->a_owner), "%u@%s", (unsigned int)fl->fl_u.nfs_fl.owner->pid, - utsname()->nodename); + nodename); lock->svid = fl->fl_u.nfs_fl.owner->pid; lock->fl.fl_start = fl->fl_start; lock->fl.fl_end = fl->fl_end; From 533a54ffb012864cccd6aad6917741624c666dc1 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 5 Aug 2013 14:10:43 -0400 Subject: [PATCH 0509/1368] SUNRPC: Don't auto-disconnect from the local rpcbind socket commit 00326ed6442c66021cd4b5e19e80f3e2027d5d42 upstream. There is no need for the kernel to time out the AF_LOCAL connection to the rpcbind socket, and doing so is problematic because when it is time to reconnect, our process may no longer be using the same mount namespace. Reported-by: Nix Signed-off-by: Trond Myklebust Cc: Jeff Layton Signed-off-by: Greg Kroah-Hartman --- net/sunrpc/rpcb_clnt.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c index 3df764dc330c..b0f723227157 100644 --- a/net/sunrpc/rpcb_clnt.c +++ b/net/sunrpc/rpcb_clnt.c @@ -238,6 +238,14 @@ static int rpcb_create_local_unix(struct net *net) .program = &rpcb_program, .version = RPCBVERS_2, .authflavor = RPC_AUTH_NULL, + /* + * We turn off the idle timeout to prevent the kernel + * from automatically disconnecting the socket. + * Otherwise, we'd have to cache the mount namespace + * of the caller and somehow pass that to the socket + * reconnect code. + */ + .flags = RPC_CLNT_CREATE_NO_IDLE_TIMEOUT, }; struct rpc_clnt *clnt, *clnt4; int result = 0; From 4662ffcbe30f95deeae95a7009faa54e1209b466 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 5 Aug 2013 16:04:47 -0400 Subject: [PATCH 0510/1368] SUNRPC: If the rpcbind channel is disconnected, fail the call to unregister commit 786615bc1ce84150ded80daea6bd9f6297f48e73 upstream. If rpcbind causes our connection to the AF_LOCAL socket to close after we've registered a service, then we want to be careful about reconnecting since the mount namespace may have changed. By simply refusing to reconnect the AF_LOCAL socket in the case of unregister, we avoid the need to somehow save the mount namespace. While this may lead to some services not unregistering properly, it should be safe. Signed-off-by: Trond Myklebust Cc: Nix Cc: Jeff Layton Signed-off-by: Greg Kroah-Hartman --- include/linux/sunrpc/sched.h | 1 + net/sunrpc/clnt.c | 4 ++++ net/sunrpc/netns.h | 1 + net/sunrpc/rpcb_clnt.c | 40 ++++++++++++++++++++++++------------ 4 files changed, 33 insertions(+), 13 deletions(-) diff --git a/include/linux/sunrpc/sched.h b/include/linux/sunrpc/sched.h index 84ca436b76c2..9faf0f49199f 100644 --- a/include/linux/sunrpc/sched.h +++ b/include/linux/sunrpc/sched.h @@ -130,6 +130,7 @@ struct rpc_task_setup { #define RPC_TASK_SOFTCONN 0x0400 /* Fail if can't connect */ #define RPC_TASK_SENT 0x0800 /* message was sent */ #define RPC_TASK_TIMEOUT 0x1000 /* fail with ETIMEDOUT on timeout */ +#define RPC_TASK_NOCONNECT 0x2000 /* return ENOTCONN if not connected */ #define RPC_IS_ASYNC(t) ((t)->tk_flags & RPC_TASK_ASYNC) #define RPC_IS_SWAPPER(t) ((t)->tk_flags & RPC_TASK_SWAPPER) diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 5a750b9c3640..426f8fcc4c6c 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -1644,6 +1644,10 @@ call_connect(struct rpc_task *task) task->tk_action = call_connect_status; if (task->tk_status < 0) return; + if (task->tk_flags & RPC_TASK_NOCONNECT) { + rpc_exit(task, -ENOTCONN); + return; + } xprt_connect(task); } } diff --git a/net/sunrpc/netns.h b/net/sunrpc/netns.h index 74d948f5d5a1..779742cfc1ff 100644 --- a/net/sunrpc/netns.h +++ b/net/sunrpc/netns.h @@ -23,6 +23,7 @@ struct sunrpc_net { struct rpc_clnt *rpcb_local_clnt4; spinlock_t rpcb_clnt_lock; unsigned int rpcb_users; + unsigned int rpcb_is_af_local : 1; struct mutex gssp_lock; wait_queue_head_t gssp_wq; diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c index b0f723227157..1891a1022c17 100644 --- a/net/sunrpc/rpcb_clnt.c +++ b/net/sunrpc/rpcb_clnt.c @@ -204,13 +204,15 @@ void rpcb_put_local(struct net *net) } static void rpcb_set_local(struct net *net, struct rpc_clnt *clnt, - struct rpc_clnt *clnt4) + struct rpc_clnt *clnt4, + bool is_af_local) { struct sunrpc_net *sn = net_generic(net, sunrpc_net_id); /* Protected by rpcb_create_local_mutex */ sn->rpcb_local_clnt = clnt; sn->rpcb_local_clnt4 = clnt4; + sn->rpcb_is_af_local = is_af_local ? 1 : 0; smp_wmb(); sn->rpcb_users = 1; dprintk("RPC: created new rpcb local clients (rpcb_local_clnt: " @@ -271,7 +273,7 @@ static int rpcb_create_local_unix(struct net *net) clnt4 = NULL; } - rpcb_set_local(net, clnt, clnt4); + rpcb_set_local(net, clnt, clnt4, true); out: return result; @@ -323,7 +325,7 @@ static int rpcb_create_local_net(struct net *net) clnt4 = NULL; } - rpcb_set_local(net, clnt, clnt4); + rpcb_set_local(net, clnt, clnt4, false); out: return result; @@ -384,13 +386,16 @@ static struct rpc_clnt *rpcb_create(struct net *net, const char *hostname, return rpc_create(&args); } -static int rpcb_register_call(struct rpc_clnt *clnt, struct rpc_message *msg) +static int rpcb_register_call(struct sunrpc_net *sn, struct rpc_clnt *clnt, struct rpc_message *msg, bool is_set) { - int result, error = 0; + int flags = RPC_TASK_NOCONNECT; + int error, result = 0; + if (is_set || !sn->rpcb_is_af_local) + flags = RPC_TASK_SOFTCONN; msg->rpc_resp = &result; - error = rpc_call_sync(clnt, msg, RPC_TASK_SOFTCONN); + error = rpc_call_sync(clnt, msg, flags); if (error < 0) { dprintk("RPC: failed to contact local rpcbind " "server (errno %d).\n", -error); @@ -447,16 +452,19 @@ int rpcb_register(struct net *net, u32 prog, u32 vers, int prot, unsigned short .rpc_argp = &map, }; struct sunrpc_net *sn = net_generic(net, sunrpc_net_id); + bool is_set = false; dprintk("RPC: %sregistering (%u, %u, %d, %u) with local " "rpcbind\n", (port ? "" : "un"), prog, vers, prot, port); msg.rpc_proc = &rpcb_procedures2[RPCBPROC_UNSET]; - if (port) + if (port != 0) { msg.rpc_proc = &rpcb_procedures2[RPCBPROC_SET]; + is_set = true; + } - return rpcb_register_call(sn->rpcb_local_clnt, &msg); + return rpcb_register_call(sn, sn->rpcb_local_clnt, &msg, is_set); } /* @@ -469,6 +477,7 @@ static int rpcb_register_inet4(struct sunrpc_net *sn, const struct sockaddr_in *sin = (const struct sockaddr_in *)sap; struct rpcbind_args *map = msg->rpc_argp; unsigned short port = ntohs(sin->sin_port); + bool is_set = false; int result; map->r_addr = rpc_sockaddr2uaddr(sap, GFP_KERNEL); @@ -479,10 +488,12 @@ static int rpcb_register_inet4(struct sunrpc_net *sn, map->r_addr, map->r_netid); msg->rpc_proc = &rpcb_procedures4[RPCBPROC_UNSET]; - if (port) + if (port != 0) { msg->rpc_proc = &rpcb_procedures4[RPCBPROC_SET]; + is_set = true; + } - result = rpcb_register_call(sn->rpcb_local_clnt4, msg); + result = rpcb_register_call(sn, sn->rpcb_local_clnt4, msg, is_set); kfree(map->r_addr); return result; } @@ -497,6 +508,7 @@ static int rpcb_register_inet6(struct sunrpc_net *sn, const struct sockaddr_in6 *sin6 = (const struct sockaddr_in6 *)sap; struct rpcbind_args *map = msg->rpc_argp; unsigned short port = ntohs(sin6->sin6_port); + bool is_set = false; int result; map->r_addr = rpc_sockaddr2uaddr(sap, GFP_KERNEL); @@ -507,10 +519,12 @@ static int rpcb_register_inet6(struct sunrpc_net *sn, map->r_addr, map->r_netid); msg->rpc_proc = &rpcb_procedures4[RPCBPROC_UNSET]; - if (port) + if (port != 0) { msg->rpc_proc = &rpcb_procedures4[RPCBPROC_SET]; + is_set = true; + } - result = rpcb_register_call(sn->rpcb_local_clnt4, msg); + result = rpcb_register_call(sn, sn->rpcb_local_clnt4, msg, is_set); kfree(map->r_addr); return result; } @@ -527,7 +541,7 @@ static int rpcb_unregister_all_protofamilies(struct sunrpc_net *sn, map->r_addr = ""; msg->rpc_proc = &rpcb_procedures4[RPCBPROC_UNSET]; - return rpcb_register_call(sn->rpcb_local_clnt4, msg); + return rpcb_register_call(sn, sn->rpcb_local_clnt4, msg, false); } /** From 81b80fb88ca579363abebb6da2e7023ffe657203 Mon Sep 17 00:00:00 2001 From: Yoshihiro YUNOMAE Date: Tue, 23 Jul 2013 11:30:49 +0930 Subject: [PATCH 0511/1368] virtio/console: Quit from splice_write if pipe->nrbufs is 0 commit 68c034fefe20eaf7d5569aae84584b07987ce50a upstream. Quit from splice_write if pipe->nrbufs is 0 for avoiding oops in virtio-serial. When an application was doing splice from a kernel buffer to virtio-serial on a guest, the application received signal(SIGINT). This situation will normally happen, but the kernel executed a kernel panic by oops as follows: BUG: unable to handle kernel paging request at ffff882071c8ef28 IP: [] sg_init_table+0x2f/0x50 PGD 1fac067 PUD 0 Oops: 0000 [#1] SMP Modules linked in: lockd sunrpc bnep bluetooth rfkill ip6t_REJECT nf_conntrack_ipv6 nf_defrag_ipv6 xt_state nf_conntrack ip6table_filter ip6_tables snd_hda_intel snd_hda_codec snd_hwdep snd_pcm snd_page_alloc snd_timer snd microcode virtio_balloon virtio_net pcspkr soundcore i2c_piix4 i2c_core uinput floppy CPU: 1 PID: 908 Comm: trace-cmd Not tainted 3.10.0+ #49 Hardware name: Bochs Bochs, BIOS Bochs 01/01/2007 task: ffff880071c64650 ti: ffff88007bf24000 task.ti: ffff88007bf24000 RIP: 0010:[] [] sg_init_table+0x2f/0x50 RSP: 0018:ffff88007bf25dd8 EFLAGS: 00010286 RAX: 0000001fffffffe0 RBX: ffff882071c8ef28 RCX: 0000000000000000 RDX: 0000000000000000 RSI: 0000000000000000 RDI: ffff880071c8ef48 RBP: ffff88007bf25de8 R08: ffff88007fd15d40 R09: ffff880071c8ef48 R10: ffffea0001c71040 R11: ffffffff8139c555 R12: 0000000000000000 R13: ffff88007506a3c0 R14: ffff88007c862500 R15: ffff880071c8ef00 FS: 00007f0a3646c740(0000) GS:ffff88007fd00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: ffff882071c8ef28 CR3: 000000007acbb000 CR4: 00000000000006e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400 Stack: ffff880071c8ef48 ffff88007bf25e20 ffff88007bf25e88 ffffffff8139d6fa ffff88007bf25e28 ffffffff8127a3f4 0000000000000000 0000000000000000 ffff880071c8ef48 0000100000000000 0000000000000003 ffff88007bf25e08 Call Trace: [] port_fops_splice_write+0xaa/0x130 [] ? selinux_file_permission+0xc4/0x120 [] ? wait_port_writable+0x1b0/0x1b0 [] do_splice_from+0xa0/0x110 [] SyS_splice+0x5ff/0x6b0 [] system_call_fastpath+0x16/0x1b Code: c1 e2 05 48 89 e5 48 83 ec 10 4c 89 65 f8 41 89 f4 31 f6 48 89 5d f0 48 89 fb e8 8d ce ff ff 41 8d 44 24 ff 48 c1 e0 05 48 01 c3 <48> 8b 03 48 83 e0 fe 48 83 c8 02 48 89 03 48 8b 5d f0 4c 8b 65 RIP [] sg_init_table+0x2f/0x50 RSP CR2: ffff882071c8ef28 ---[ end trace 86323505eb42ea8f ]--- It seems to induce pagefault in sg_init_tabel() when pipe->nrbufs is equal to zero. This may happen in a following situation: (1) The application normally does splice(read) from a kernel buffer, then does splice(write) to virtio-serial. (2) The application receives SIGINT when is doing splice(read), so splice(read) is failed by EINTR. However, the application does not finish the operation. (3) The application tries to do splice(write) without pipe->nrbufs. (4) The virtio-console driver tries to touch scatterlist structure sgl in sg_init_table(), but the region is out of bound. To avoid the case, a kernel should check whether pipe->nrbufs is empty or not when splice_write is executed in the virtio-console driver. V3: Add Reviewed-by lines and stable@ line in sign-off area. Signed-off-by: Yoshihiro YUNOMAE Reviewed-by: Amit Shah Reviewed-by: Masami Hiramatsu Cc: Amit Shah Cc: Arnd Bergmann Signed-off-by: Rusty Russell Signed-off-by: Greg Kroah-Hartman --- drivers/char/virtio_console.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/char/virtio_console.c b/drivers/char/virtio_console.c index 1b456fe9b87a..8722656cdebf 100644 --- a/drivers/char/virtio_console.c +++ b/drivers/char/virtio_console.c @@ -932,6 +932,13 @@ static ssize_t port_fops_splice_write(struct pipe_inode_info *pipe, if (is_rproc_serial(port->out_vq->vdev)) return -EINVAL; + /* + * pipe->nrbufs == 0 means there are no data to transfer, + * so this returns just 0 for no data. + */ + if (!pipe->nrbufs) + return 0; + ret = wait_port_writable(port, filp->f_flags & O_NONBLOCK); if (ret < 0) return ret; From 98c9710e0aa2b939423605488b5817f27118ce40 Mon Sep 17 00:00:00 2001 From: Yoshihiro YUNOMAE Date: Tue, 23 Jul 2013 11:30:49 +0930 Subject: [PATCH 0512/1368] virtio/console: Add pipe_lock/unlock for splice_write commit 2b4fbf029dff5a28d9bf646346dea891ec43398a upstream. Add pipe_lock/unlock for splice_write to avoid oops by following competition: (1) An application gets fds of a trace buffer, virtio-serial, pipe. (2) The application does fork() (3) The processes execute splice_read(trace buffer) and splice_write(virtio-serial) via same pipe. get fds of a trace buffer, virtio-serial, pipe | fork()----------create--------+ | | splice(read) | ---+ splice(write) | +-- no competition | splice(read) | | splice(write) ---+ | | splice(read) | splice(write) splice(read) ------ competition | splice(write) Two processes share a pipe_inode_info structure. If the child execute splice(read) when the parent tries to execute splice(write), the structure can be broken. Existing virtio-serial driver does not get lock for the structure in splice_write, so this competition will induce oops. BUG: unable to handle kernel NULL pointer dereference at 0000000000000018 IP: [] splice_from_pipe_feed+0x6f/0x130 PGD 7223e067 PUD 72391067 PMD 0 Oops: 0000 [#1] SMP Modules linked in: lockd bnep bluetooth rfkill sunrpc ip6t_REJECT nf_conntrack_ipv6 nf_defrag_ipv6 xt_state nf_conntrack ip6table_filter ip6_tables snd_hda_intel snd_hda_codec snd_hwdep snd_pcm snd_page_alloc snd_timer snd soundcore pcspkr virtio_net virtio_balloon i2c_piix4 i2c_core microcode uinput floppy CPU: 0 PID: 1072 Comm: compete-test Not tainted 3.10.0ws+ #55 Hardware name: Bochs Bochs, BIOS Bochs 01/01/2007 task: ffff880071b98000 ti: ffff88007b55e000 task.ti: ffff88007b55e000 RIP: 0010:[] [] splice_from_pipe_feed+0x6f/0x130 RSP: 0018:ffff88007b55fd78 EFLAGS: 00010287 RAX: 0000000000000000 RBX: ffff88007b55fe20 RCX: 0000000000000000 RDX: 0000000000001000 RSI: ffff88007a95ba30 RDI: ffff880036f9e6c0 RBP: ffff88007b55fda8 R08: 00000000000006ec R09: ffff880077626708 R10: 0000000000000003 R11: ffffffff8139ca59 R12: ffff88007a95ba30 R13: 0000000000000000 R14: ffffffff8139dd00 R15: ffff880036f9e6c0 FS: 00007f2e2e3a0740(0000) GS:ffff88007fc00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 000000008005003b CR2: 0000000000000018 CR3: 0000000071bd1000 CR4: 00000000000006f0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400 Stack: ffffffff8139ca59 ffff88007b55fe20 ffff880036f9e6c0 ffffffff8139dd00 ffff8800776266c0 ffff880077626708 ffff88007b55fde8 ffffffff811a6e8e ffff88007b55fde8 ffffffff8139ca59 ffff880036f9e6c0 ffff88007b55fe20 Call Trace: [] ? alloc_buf.isra.13+0x39/0xb0 [] ? virtcons_restore+0x100/0x100 [] __splice_from_pipe+0x7e/0x90 [] ? alloc_buf.isra.13+0x39/0xb0 [] port_fops_splice_write+0xe9/0x140 [] ? selinux_file_permission+0xc4/0x120 [] ? wait_port_writable+0x1b0/0x1b0 [] do_splice_from+0xa0/0x110 [] SyS_splice+0x5ff/0x6b0 [] tracesys+0xdd/0xe2 Code: 49 8b 87 80 00 00 00 4c 8d 24 d0 8b 53 04 41 8b 44 24 0c 4d 8b 6c 24 10 39 d0 89 03 76 02 89 13 49 8b 44 24 10 4c 89 e6 4c 89 ff 50 18 85 c0 0f 85 aa 00 00 00 48 89 da 4c 89 e6 4c 89 ff 41 RIP [] splice_from_pipe_feed+0x6f/0x130 RSP CR2: 0000000000000018 ---[ end trace 24572beb7764de59 ]--- V2: Fix a locking problem for error V3: Add Reviewed-by lines and stable@ line in sign-off area Signed-off-by: Yoshihiro YUNOMAE Reviewed-by: Amit Shah Reviewed-by: Masami Hiramatsu Cc: Amit Shah Cc: Arnd Bergmann Signed-off-by: Rusty Russell Signed-off-by: Greg Kroah-Hartman --- drivers/char/virtio_console.c | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/drivers/char/virtio_console.c b/drivers/char/virtio_console.c index 8722656cdebf..8a15af3e1a9d 100644 --- a/drivers/char/virtio_console.c +++ b/drivers/char/virtio_console.c @@ -936,16 +936,21 @@ static ssize_t port_fops_splice_write(struct pipe_inode_info *pipe, * pipe->nrbufs == 0 means there are no data to transfer, * so this returns just 0 for no data. */ - if (!pipe->nrbufs) - return 0; + pipe_lock(pipe); + if (!pipe->nrbufs) { + ret = 0; + goto error_out; + } ret = wait_port_writable(port, filp->f_flags & O_NONBLOCK); if (ret < 0) - return ret; + goto error_out; buf = alloc_buf(port->out_vq, 0, pipe->nrbufs); - if (!buf) - return -ENOMEM; + if (!buf) { + ret = -ENOMEM; + goto error_out; + } sgl.n = 0; sgl.len = 0; @@ -953,12 +958,17 @@ static ssize_t port_fops_splice_write(struct pipe_inode_info *pipe, sgl.sg = buf->sg; sg_init_table(sgl.sg, sgl.size); ret = __splice_from_pipe(pipe, &sd, pipe_to_sg); + pipe_unlock(pipe); if (likely(ret > 0)) ret = __send_to_port(port, buf->sg, sgl.n, sgl.len, buf, true); if (unlikely(ret <= 0)) free_buf(buf, true); return ret; + +error_out: + pipe_unlock(pipe); + return ret; } static unsigned int port_fops_poll(struct file *filp, poll_table *wait) From 7b9f0c23359a5a20dccda4db14cc760625ad64c4 Mon Sep 17 00:00:00 2001 From: Amit Shah Date: Mon, 29 Jul 2013 14:16:13 +0930 Subject: [PATCH 0513/1368] virtio: console: fix race with port unplug and open/close commit 057b82be3ca3d066478e43b162fc082930a746c9 upstream. There's a window between find_port_by_devt() returning a port and us taking a kref on the port, where the port could get unplugged. Fix it by taking the reference in find_port_by_devt() itself. Problem reported and analyzed by Mateusz Guzik. Reported-by: Mateusz Guzik Signed-off-by: Amit Shah Signed-off-by: Rusty Russell Signed-off-by: Greg Kroah-Hartman --- drivers/char/virtio_console.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/drivers/char/virtio_console.c b/drivers/char/virtio_console.c index 8a15af3e1a9d..3beea9d478bc 100644 --- a/drivers/char/virtio_console.c +++ b/drivers/char/virtio_console.c @@ -272,9 +272,12 @@ static struct port *find_port_by_devt_in_portdev(struct ports_device *portdev, unsigned long flags; spin_lock_irqsave(&portdev->ports_lock, flags); - list_for_each_entry(port, &portdev->ports, list) - if (port->cdev->dev == dev) + list_for_each_entry(port, &portdev->ports, list) { + if (port->cdev->dev == dev) { + kref_get(&port->kref); goto out; + } + } port = NULL; out: spin_unlock_irqrestore(&portdev->ports_lock, flags); @@ -1036,14 +1039,10 @@ static int port_fops_open(struct inode *inode, struct file *filp) struct port *port; int ret; + /* We get the port with a kref here */ port = find_port_by_devt(cdev->dev); filp->private_data = port; - /* Prevent against a port getting hot-unplugged at the same time */ - spin_lock_irq(&port->portdev->ports_lock); - kref_get(&port->kref); - spin_unlock_irq(&port->portdev->ports_lock); - /* * Don't allow opening of console port devices -- that's done * via /dev/hvc From 60391483d53a0a9bb43e78a867173b92cf9da996 Mon Sep 17 00:00:00 2001 From: Amit Shah Date: Mon, 29 Jul 2013 14:17:13 +0930 Subject: [PATCH 0514/1368] virtio: console: fix race in port_fops_open() and port unplug commit 671bdea2b9f210566610603ecbb6584c8a201c8c upstream. Between open() being called and processed, the port can be unplugged. Check if this happened, and bail out. A simple test script to reproduce this is: while true; do for i in $(seq 1 100); do echo $i > /dev/vport0p3; done; done; This opens and closes the port a lot of times; unplugging the port while this is happening triggers the bug. Signed-off-by: Amit Shah Signed-off-by: Rusty Russell Signed-off-by: Greg Kroah-Hartman --- drivers/char/virtio_console.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/char/virtio_console.c b/drivers/char/virtio_console.c index 3beea9d478bc..ffa7e46faff9 100644 --- a/drivers/char/virtio_console.c +++ b/drivers/char/virtio_console.c @@ -1041,6 +1041,10 @@ static int port_fops_open(struct inode *inode, struct file *filp) /* We get the port with a kref here */ port = find_port_by_devt(cdev->dev); + if (!port) { + /* Port was unplugged before we could proceed */ + return -ENXIO; + } filp->private_data = port; /* From 707ea94eb328e81dce48e3cdc457e82afb745cf1 Mon Sep 17 00:00:00 2001 From: Amit Shah Date: Mon, 29 Jul 2013 14:20:29 +0930 Subject: [PATCH 0515/1368] virtio: console: clean up port data immediately at time of unplug commit ea3768b4386a8d1790f4cc9a35de4f55b92d6442 upstream. We used to keep the port's char device structs and the /sys entries around till the last reference to the port was dropped. This is actually unnecessary, and resulted in buggy behaviour: 1. Open port in guest 2. Hot-unplug port 3. Hot-plug a port with the same 'name' property as the unplugged one This resulted in hot-plug being unsuccessful, as a port with the same name already exists (even though it was unplugged). This behaviour resulted in a warning message like this one: -------------------8<--------------------------------------- WARNING: at fs/sysfs/dir.c:512 sysfs_add_one+0xc9/0x130() (Not tainted) Hardware name: KVM sysfs: cannot create duplicate filename '/devices/pci0000:00/0000:00:04.0/virtio0/virtio-ports/vport0p1' Call Trace: [] ? warn_slowpath_common+0x87/0xc0 [] ? warn_slowpath_fmt+0x46/0x50 [] ? sysfs_add_one+0xc9/0x130 [] ? create_dir+0x68/0xb0 [] ? sysfs_create_dir+0x39/0x50 [] ? kobject_add_internal+0xb9/0x260 [] ? kobject_add_varg+0x38/0x60 [] ? kobject_add+0x44/0x70 [] ? get_device_parent+0xf4/0x1d0 [] ? device_add+0xc9/0x650 -------------------8<--------------------------------------- Instead of relying on guest applications to release all references to the ports, we should go ahead and unregister the port from all the core layers. Any open/read calls on the port will then just return errors, and an unplug/plug operation on the host will succeed as expected. This also caused buggy behaviour in case of the device removal (not just a port): when the device was removed (which means all ports on that device are removed automatically as well), the ports with active users would clean up only when the last references were dropped -- and it would be too late then to be referencing char device pointers, resulting in oopses: -------------------8<--------------------------------------- PID: 6162 TASK: ffff8801147ad500 CPU: 0 COMMAND: "cat" #0 [ffff88011b9d5a90] machine_kexec at ffffffff8103232b #1 [ffff88011b9d5af0] crash_kexec at ffffffff810b9322 #2 [ffff88011b9d5bc0] oops_end at ffffffff814f4a50 #3 [ffff88011b9d5bf0] die at ffffffff8100f26b #4 [ffff88011b9d5c20] do_general_protection at ffffffff814f45e2 #5 [ffff88011b9d5c50] general_protection at ffffffff814f3db5 [exception RIP: strlen+2] RIP: ffffffff81272ae2 RSP: ffff88011b9d5d00 RFLAGS: 00010246 RAX: 0000000000000000 RBX: ffff880118901c18 RCX: 0000000000000000 RDX: ffff88011799982c RSI: 00000000000000d0 RDI: 3a303030302f3030 RBP: ffff88011b9d5d38 R8: 0000000000000006 R9: ffffffffa0134500 R10: 0000000000001000 R11: 0000000000001000 R12: ffff880117a1cc10 R13: 00000000000000d0 R14: 0000000000000017 R15: ffffffff81aff700 ORIG_RAX: ffffffffffffffff CS: 0010 SS: 0018 #6 [ffff88011b9d5d00] kobject_get_path at ffffffff8126dc5d #7 [ffff88011b9d5d40] kobject_uevent_env at ffffffff8126e551 #8 [ffff88011b9d5dd0] kobject_uevent at ffffffff8126e9eb #9 [ffff88011b9d5de0] device_del at ffffffff813440c7 -------------------8<--------------------------------------- So clean up when we have all the context, and all that's left to do when the references to the port have dropped is to free up the port struct itself. Reported-by: chayang Reported-by: YOGANANTH SUBRAMANIAN Reported-by: FuXiangChun Reported-by: Qunfang Zhang Reported-by: Sibiao Luo Signed-off-by: Amit Shah Signed-off-by: Rusty Russell Signed-off-by: Greg Kroah-Hartman --- drivers/char/virtio_console.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/char/virtio_console.c b/drivers/char/virtio_console.c index ffa7e46faff9..4e684faee10b 100644 --- a/drivers/char/virtio_console.c +++ b/drivers/char/virtio_console.c @@ -1518,14 +1518,6 @@ static void remove_port(struct kref *kref) port = container_of(kref, struct port, kref); - sysfs_remove_group(&port->dev->kobj, &port_attribute_group); - device_destroy(pdrvdata.class, port->dev->devt); - cdev_del(port->cdev); - - kfree(port->name); - - debugfs_remove(port->debugfs_file); - kfree(port); } @@ -1583,6 +1575,14 @@ static void unplug_port(struct port *port) */ port->portdev = NULL; + sysfs_remove_group(&port->dev->kobj, &port_attribute_group); + device_destroy(pdrvdata.class, port->dev->devt); + cdev_del(port->cdev); + + kfree(port->name); + + debugfs_remove(port->debugfs_file); + /* * Locks around here are not necessary - a port can't be * opened after we removed the port struct from ports_list From 2817b99f91475b84c23cd353741ea485564f4562 Mon Sep 17 00:00:00 2001 From: Amit Shah Date: Mon, 29 Jul 2013 14:21:32 +0930 Subject: [PATCH 0516/1368] virtio: console: fix raising SIGIO after port unplug commit 92d3453815fbe74d539c86b60dab39ecdf01bb99 upstream. SIGIO should be sent when a port gets unplugged. It should only be sent to prcesses that have the port opened, and have asked for SIGIO to be delivered. We were clearing out guest_connected before calling send_sigio_to_port(), resulting in a sigio not getting sent to processes. Fix by setting guest_connected to false after invoking the sigio function. Signed-off-by: Amit Shah Signed-off-by: Rusty Russell Signed-off-by: Greg Kroah-Hartman --- drivers/char/virtio_console.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/char/virtio_console.c b/drivers/char/virtio_console.c index 4e684faee10b..e4845f1c9a0b 100644 --- a/drivers/char/virtio_console.c +++ b/drivers/char/virtio_console.c @@ -1551,12 +1551,14 @@ static void unplug_port(struct port *port) spin_unlock_irq(&port->portdev->ports_lock); if (port->guest_connected) { - port->guest_connected = false; - port->host_connected = false; - wake_up_interruptible(&port->waitqueue); - /* Let the app know the port is going down. */ send_sigio_to_port(port); + + /* Do this after sigio is actually sent */ + port->guest_connected = false; + port->host_connected = false; + + wake_up_interruptible(&port->waitqueue); } if (is_console_port(port)) { From 61ffac73366844578d7e773cdc33a4f6adca09d3 Mon Sep 17 00:00:00 2001 From: Amit Shah Date: Mon, 29 Jul 2013 14:23:21 +0930 Subject: [PATCH 0517/1368] virtio: console: return -ENODEV on all read operations after unplug commit 96f97a83910cdb9d89d127c5ee523f8fc040a804 upstream. If a port gets unplugged while a user is blocked on read(), -ENODEV is returned. However, subsequent read()s returned 0, indicating there's no host-side connection (but not indicating the device went away). This also happened when a port was unplugged and the user didn't have any blocking operation pending. If the user didn't monitor the SIGIO signal, they won't have a chance to find out if the port went away. Fix by returning -ENODEV on all read()s after the port gets unplugged. write() already behaves this way. Signed-off-by: Amit Shah Signed-off-by: Rusty Russell Signed-off-by: Greg Kroah-Hartman --- drivers/char/virtio_console.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/char/virtio_console.c b/drivers/char/virtio_console.c index e4845f1c9a0b..fc45567ad3ac 100644 --- a/drivers/char/virtio_console.c +++ b/drivers/char/virtio_console.c @@ -749,6 +749,10 @@ static ssize_t port_fops_read(struct file *filp, char __user *ubuf, port = filp->private_data; + /* Port is hot-unplugged. */ + if (!port->guest_connected) + return -ENODEV; + if (!port_has_data(port)) { /* * If nothing's connected on the host just return 0 in @@ -765,7 +769,7 @@ static ssize_t port_fops_read(struct file *filp, char __user *ubuf, if (ret < 0) return ret; } - /* Port got hot-unplugged. */ + /* Port got hot-unplugged while we were waiting above. */ if (!port->guest_connected) return -ENODEV; /* From 18da37c30dfb6bd4e46b39fefe06b891524d80a6 Mon Sep 17 00:00:00 2001 From: Michal Srb Date: Tue, 6 Aug 2013 15:26:50 +0200 Subject: [PATCH 0518/1368] drm/cirrus: Invalidate page tables when pinning a BO commit 109a51598869a39fdcec2d49672a9a39b6d89481 upstream. This is a cirrus version of Egbert Eich's patch for mgag200. Without bo.bdev->dev_mapping set, the ttm_bo_unmap_virtual_locked called from ttm_bo_handle_move_mem returns with no effect. If any application accessed the memory before it was moved, it will access wrong memory next time. This causes crashes when changing resolution down. Signed-off-by: Michal Srb Signed-off-by: Dave Airlie Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/cirrus/cirrus_ttm.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/cirrus/cirrus_ttm.c b/drivers/gpu/drm/cirrus/cirrus_ttm.c index 2ed8cfc740c9..c18faff82651 100644 --- a/drivers/gpu/drm/cirrus/cirrus_ttm.c +++ b/drivers/gpu/drm/cirrus/cirrus_ttm.c @@ -353,6 +353,7 @@ int cirrus_bo_create(struct drm_device *dev, int size, int align, cirrusbo->gem.driver_private = NULL; cirrusbo->bo.bdev = &cirrus->ttm.bdev; + cirrusbo->bo.bdev->dev_mapping = dev->dev_mapping; cirrus_ttm_placement(cirrusbo, TTM_PL_FLAG_VRAM | TTM_PL_FLAG_SYSTEM); From 12fb12f1f103676c2e81bcc392436171349d37db Mon Sep 17 00:00:00 2001 From: Egbert Eich Date: Wed, 17 Jul 2013 17:40:56 +0200 Subject: [PATCH 0519/1368] drm/mgag200: Invalidate page tables when pinning a BO commit ecaac1c866bcda4780a963b3d18cd310d971aea3 upstream. When a BO gets pinned the placement may get changed. If the memory is mapped into user space and user space has already accessed the mapped range the page tables are set up but now point to the wrong memory. Set bo.mdev->dev_mapping in mgag200_bo_create() to make sure that ttm_bo_unmap_virtual() called from ttm_bo_handle_move_mem() will take care of this. v2: Don't call ttm_bo_unmap_virtual() in mgag200_bo_pin(), fix comment. Signed-off-by: Egbert Eich Signed-off-by: Dave Airlie Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/mgag200/mgag200_ttm.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/mgag200/mgag200_ttm.c b/drivers/gpu/drm/mgag200/mgag200_ttm.c index 401c9891d3a8..d2cb32f3c05b 100644 --- a/drivers/gpu/drm/mgag200/mgag200_ttm.c +++ b/drivers/gpu/drm/mgag200/mgag200_ttm.c @@ -347,6 +347,7 @@ int mgag200_bo_create(struct drm_device *dev, int size, int align, mgabo->gem.driver_private = NULL; mgabo->bo.bdev = &mdev->ttm.bdev; + mgabo->bo.bdev->dev_mapping = dev->dev_mapping; mgag200_ttm_placement(mgabo, TTM_PL_FLAG_VRAM | TTM_PL_FLAG_SYSTEM); From aabfe27dfe71d29b9ac08790f1a0ca240c1264b6 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Wed, 7 Aug 2013 10:01:56 +1000 Subject: [PATCH 0520/1368] drm/ast: invalidate page tables when pinning a BO commit 3ac65259328324de323dc006b52ff7c1a5b18d19 upstream. same fix as cirrus and mgag200. Signed-off-by: Dave Airlie Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/ast/ast_ttm.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/ast/ast_ttm.c b/drivers/gpu/drm/ast/ast_ttm.c index 09da3393c527..d5902e21d4a3 100644 --- a/drivers/gpu/drm/ast/ast_ttm.c +++ b/drivers/gpu/drm/ast/ast_ttm.c @@ -348,6 +348,7 @@ int ast_bo_create(struct drm_device *dev, int size, int align, astbo->gem.driver_private = NULL; astbo->bo.bdev = &ast->ttm.bdev; + astbo->bo.bdev->dev_mapping = dev->dev_mapping; ast_ttm_placement(astbo, TTM_PL_FLAG_VRAM | TTM_PL_FLAG_SYSTEM); From ac4ae0b99e3d708e90fe39019601d2a91dff7011 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michel=20D=C3=A4nzer?= Date: Wed, 12 Jun 2013 11:58:44 +0200 Subject: [PATCH 0521/1368] drm: Don't pass negative delta to ktime_sub_ns() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit e91abf80a0998f326107874c88d549f94839f13c upstream. It takes an unsigned value. This happens not to blow up on 64-bit architectures, but it does on 32-bit, causing drm_calc_vbltimestamp_from_scanoutpos() to calculate totally bogus timestamps for vblank events. Which in turn causes e.g. gnome-shell to hang after a DPMS off cycle with current xf86-video-ati Git. [airlied: regression introduced in drm: use monotonic time in drm_calc_vbltimestamp_from_scanoutpos] Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=59339 Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=59836 Tested-by: shui yangwei Signed-off-by: Michel Dänzer Reviewed-by: Imre Deak Signed-off-by: Dave Airlie Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/drm_irq.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/drm_irq.c b/drivers/gpu/drm/drm_irq.c index 8bcce7866d36..f92da0a32f0d 100644 --- a/drivers/gpu/drm/drm_irq.c +++ b/drivers/gpu/drm/drm_irq.c @@ -708,7 +708,10 @@ int drm_calc_vbltimestamp_from_scanoutpos(struct drm_device *dev, int crtc, /* Subtract time delta from raw timestamp to get final * vblank_time timestamp for end of vblank. */ - etime = ktime_sub_ns(etime, delta_ns); + if (delta_ns < 0) + etime = ktime_add_ns(etime, -delta_ns); + else + etime = ktime_sub_ns(etime, delta_ns); *vblank_time = ktime_to_timeval(etime); DRM_DEBUG("crtc %d : v %d p(%d,%d)@ %ld.%ld -> %ld.%ld [e %d us, %d rep]\n", From a76faeeb472b213c92f6fef54ec3cd7246f87f64 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Mon, 29 Jul 2013 18:56:13 -0400 Subject: [PATCH 0522/1368] drm/radeon: select audio dto based on encoder id for DCE3 commit e1accbf0543eecfdb161131208c3dfefee22d61f upstream. There are two audio dtos on radeon asics that you can select between. Normally, dto0 is used for hdmi and dto1 for DP, but it seems that the dto is somehow tied to the encoders on DCE3 asics. fixes: https://bugs.freedesktop.org/show_bug.cgi?id=67435 Signed-off-by: Alex Deucher Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/radeon/r600_hdmi.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/radeon/r600_hdmi.c b/drivers/gpu/drm/radeon/r600_hdmi.c index f48240bb8c56..b9b1139da356 100644 --- a/drivers/gpu/drm/radeon/r600_hdmi.c +++ b/drivers/gpu/drm/radeon/r600_hdmi.c @@ -242,9 +242,15 @@ void r600_audio_set_dto(struct drm_encoder *encoder, u32 clock) /* according to the reg specs, this should DCE3.2 only, but in * practice it seems to cover DCE3.0 as well. */ - WREG32(DCCG_AUDIO_DTO0_PHASE, base_rate * 100); - WREG32(DCCG_AUDIO_DTO0_MODULE, clock * 100); - WREG32(DCCG_AUDIO_DTO_SELECT, 0); /* select DTO0 */ + if (dig->dig_encoder == 0) { + WREG32(DCCG_AUDIO_DTO0_PHASE, base_rate * 100); + WREG32(DCCG_AUDIO_DTO0_MODULE, clock * 100); + WREG32(DCCG_AUDIO_DTO_SELECT, 0); /* select DTO0 */ + } else { + WREG32(DCCG_AUDIO_DTO1_PHASE, base_rate * 100); + WREG32(DCCG_AUDIO_DTO1_MODULE, clock * 100); + WREG32(DCCG_AUDIO_DTO_SELECT, 1); /* select DTO1 */ + } } else { /* according to the reg specs, this should be DCE2.0 and DCE3.0 */ WREG32(AUDIO_DTO, AUDIO_DTO_PHASE(base_rate / 10) | From ce512e58aa200513472a438e72a0fbe2bcaf8524 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Mon, 5 Aug 2013 14:10:56 +0200 Subject: [PATCH 0523/1368] drm/radeon: stop sending invalid UVD destroy msg MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 641a00593f7d07eab778fbabf546fb68fff3d5ce upstream. We also need to check the handle. Signed-off-by: Christian König Signed-off-by: Alex Deucher Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/radeon/radeon_uvd.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/radeon/radeon_uvd.c b/drivers/gpu/drm/radeon/radeon_uvd.c index 1b3a91bf1a9d..e052ff9f9472 100644 --- a/drivers/gpu/drm/radeon/radeon_uvd.c +++ b/drivers/gpu/drm/radeon/radeon_uvd.c @@ -218,8 +218,8 @@ void radeon_uvd_free_handles(struct radeon_device *rdev, struct drm_file *filp) { int i, r; for (i = 0; i < RADEON_MAX_UVD_HANDLES; ++i) { - if (rdev->uvd.filp[i] == filp) { - uint32_t handle = atomic_read(&rdev->uvd.handles[i]); + uint32_t handle = atomic_read(&rdev->uvd.handles[i]); + if (handle != 0 && rdev->uvd.filp[i] == filp) { struct radeon_fence *fence; r = radeon_uvd_get_destroy_msg(rdev, From 1929c51fc204bf84c5cbbd08861dd0a9da86b921 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Thu, 8 Aug 2013 23:01:24 -0400 Subject: [PATCH 0524/1368] ext4: allow the mount options nodelalloc and data=journal commit 59d9fa5c2e9086db11aa287bb4030151d0095a17 upstream. Commit 26092bf ("ext4: use a table-driven handler for mount options") wrongly disallows the specifying the mount options nodelalloc and data=journal simultaneously. This is incorrect; it should have only disallowed the combination of delalloc and data=journal simultaneously. Reported-by: Piotr Sarna Signed-off-by: "Theodore Ts'o" Signed-off-by: Greg Kroah-Hartman --- fs/ext4/super.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 2bf2f33107d0..3c2bf0c4c1e0 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -1341,7 +1341,7 @@ static const struct mount_opts { {Opt_delalloc, EXT4_MOUNT_DELALLOC, MOPT_EXT4_ONLY | MOPT_SET | MOPT_EXPLICIT}, {Opt_nodelalloc, EXT4_MOUNT_DELALLOC, - MOPT_EXT4_ONLY | MOPT_CLEAR | MOPT_EXPLICIT}, + MOPT_EXT4_ONLY | MOPT_CLEAR}, {Opt_journal_checksum, EXT4_MOUNT_JOURNAL_CHECKSUM, MOPT_EXT4_ONLY | MOPT_SET}, {Opt_journal_async_commit, (EXT4_MOUNT_JOURNAL_ASYNC_COMMIT | From c4fa10098c869072e96cca0f372b37d30fa10be2 Mon Sep 17 00:00:00 2001 From: Piotr Sarna Date: Thu, 8 Aug 2013 23:02:24 -0400 Subject: [PATCH 0525/1368] ext4: fix mount/remount error messages for incompatible mount options commit 6ae6514b33f941d3386da0dfbe2942766eab1577 upstream. Commit 5688978 ("ext4: improve handling of conflicting mount options") introduced incorrect messages shown while choosing wrong mount options. First of all, both cases of incorrect mount options, "data=journal,delalloc" and "data=journal,dioread_nolock" result in the same error message. Secondly, the problem above isn't solved for remount option: the mismatched parameter is simply ignored. Moreover, ext4_msg states that remount with options "data=journal,delalloc" succeeded, which is not true. To fix it up, I added a simple check after parse_options() call to ensure that data=journal and delalloc/dioread_nolock parameters are not present at the same time. Signed-off-by: Piotr Sarna Acked-by: Bartlomiej Zolnierkiewicz Signed-off-by: Kyungmin Park Signed-off-by: "Theodore Ts'o" Signed-off-by: Greg Kroah-Hartman --- fs/ext4/super.c | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 3c2bf0c4c1e0..3f7c39e6d097 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -3445,7 +3445,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) } if (test_opt(sb, DIOREAD_NOLOCK)) { ext4_msg(sb, KERN_ERR, "can't mount with " - "both data=journal and delalloc"); + "both data=journal and dioread_nolock"); goto failed_mount; } if (test_opt(sb, DELALLOC)) @@ -4646,6 +4646,21 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data) goto restore_opts; } + if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) { + if (test_opt2(sb, EXPLICIT_DELALLOC)) { + ext4_msg(sb, KERN_ERR, "can't mount with " + "both data=journal and delalloc"); + err = -EINVAL; + goto restore_opts; + } + if (test_opt(sb, DIOREAD_NOLOCK)) { + ext4_msg(sb, KERN_ERR, "can't mount with " + "both data=journal and dioread_nolock"); + err = -EINVAL; + goto restore_opts; + } + } + if (sbi->s_mount_flags & EXT4_MF_FS_ABORTED) ext4_abort(sb, "Abort forced by user"); From 72766b0bec55b104c414a22f2e8ecdbb6bb1e19e Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Mon, 12 Aug 2013 09:29:30 -0400 Subject: [PATCH 0526/1368] ext4: flush the extent status cache during EXT4_IOC_SWAP_BOOT commit cde2d7a796f7e895e25b43471ed658079345636d upstream. Previously we weren't swapping only some of the extent_status LRU fields during the processing of the EXT4_IOC_SWAP_BOOT ioctl. The much safer thing to do is to just completely flush the extent status tree when doing the swap. Signed-off-by: "Theodore Ts'o" Cc: Zheng Liu Signed-off-by: Greg Kroah-Hartman --- fs/ext4/ioctl.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c index 9491ac0590f7..c0427e2f6648 100644 --- a/fs/ext4/ioctl.c +++ b/fs/ext4/ioctl.c @@ -77,8 +77,10 @@ static void swap_inode_data(struct inode *inode1, struct inode *inode2) memswap(ei1->i_data, ei2->i_data, sizeof(ei1->i_data)); memswap(&ei1->i_flags, &ei2->i_flags, sizeof(ei1->i_flags)); memswap(&ei1->i_disksize, &ei2->i_disksize, sizeof(ei1->i_disksize)); - memswap(&ei1->i_es_tree, &ei2->i_es_tree, sizeof(ei1->i_es_tree)); - memswap(&ei1->i_es_lru_nr, &ei2->i_es_lru_nr, sizeof(ei1->i_es_lru_nr)); + ext4_es_remove_extent(inode1, 0, EXT_MAX_BLOCKS); + ext4_es_remove_extent(inode2, 0, EXT_MAX_BLOCKS); + ext4_es_lru_del(inode1); + ext4_es_lru_del(inode2); isize = i_size_read(inode1); i_size_write(inode1, i_size_read(inode2)); From 4350018a57c333907db092beeb9fa65a82a258ed Mon Sep 17 00:00:00 2001 From: Chen Gang Date: Fri, 19 Jul 2013 09:01:36 +0800 Subject: [PATCH 0527/1368] cifs: extend the buffer length enought for sprintf() using commit 057d6332b24a4497c55a761c83c823eed9e3f23b upstream. For cifs_set_cifscreds() in "fs/cifs/connect.c", 'desc' buffer length is 'CIFSCREDS_DESC_SIZE' (56 is less than 256), and 'ses->domainName' length may be "255 + '\0'". The related sprintf() may cause memory overflow, so need extend related buffer enough to hold all things. It is also necessary to be sure of 'ses->domainName' must be less than 256, and define the related macro instead of hard code number '256'. Signed-off-by: Chen Gang Reviewed-by: Jeff Layton Reviewed-by: Shirish Pargaonkar Reviewed-by: Scott Lovenberg Signed-off-by: Steve French Signed-off-by: Greg Kroah-Hartman --- fs/cifs/cifsencrypt.c | 2 +- fs/cifs/cifsglob.h | 1 + fs/cifs/connect.c | 7 ++++--- fs/cifs/sess.c | 6 +++--- 4 files changed, 9 insertions(+), 7 deletions(-) diff --git a/fs/cifs/cifsencrypt.c b/fs/cifs/cifsencrypt.c index f59d0d58258e..5c807b23ca67 100644 --- a/fs/cifs/cifsencrypt.c +++ b/fs/cifs/cifsencrypt.c @@ -389,7 +389,7 @@ find_domain_name(struct cifs_ses *ses, const struct nls_table *nls_cp) if (blobptr + attrsize > blobend) break; if (type == NTLMSSP_AV_NB_DOMAIN_NAME) { - if (!attrsize) + if (!attrsize || attrsize >= CIFS_MAX_DOMAINNAME_LEN) break; if (!ses->domainName) { ses->domainName = diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h index 4f07f6fbe494..ea3a0b3018a5 100644 --- a/fs/cifs/cifsglob.h +++ b/fs/cifs/cifsglob.h @@ -44,6 +44,7 @@ #define MAX_TREE_SIZE (2 + MAX_SERVER_SIZE + 1 + MAX_SHARE_SIZE + 1) #define MAX_SERVER_SIZE 15 #define MAX_SHARE_SIZE 80 +#define CIFS_MAX_DOMAINNAME_LEN 256 /* max domain name length */ #define MAX_USERNAME_SIZE 256 /* reasonable maximum for current servers */ #define MAX_PASSWORD_SIZE 512 /* max for windows seems to be 256 wide chars */ diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index e3bc39bb9d12..d6a5c5ac737b 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -1662,7 +1662,8 @@ cifs_parse_mount_options(const char *mountdata, const char *devname, if (string == NULL) goto out_nomem; - if (strnlen(string, 256) == 256) { + if (strnlen(string, CIFS_MAX_DOMAINNAME_LEN) + == CIFS_MAX_DOMAINNAME_LEN) { printk(KERN_WARNING "CIFS: domain name too" " long\n"); goto cifs_parse_mount_err; @@ -2288,8 +2289,8 @@ cifs_put_smb_ses(struct cifs_ses *ses) #ifdef CONFIG_KEYS -/* strlen("cifs:a:") + INET6_ADDRSTRLEN + 1 */ -#define CIFSCREDS_DESC_SIZE (7 + INET6_ADDRSTRLEN + 1) +/* strlen("cifs:a:") + CIFS_MAX_DOMAINNAME_LEN + 1 */ +#define CIFSCREDS_DESC_SIZE (7 + CIFS_MAX_DOMAINNAME_LEN + 1) /* Populate username and pw fields from keyring if possible */ static int diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c index f230571a7ab3..8edc9eb1ef7b 100644 --- a/fs/cifs/sess.c +++ b/fs/cifs/sess.c @@ -198,7 +198,7 @@ static void unicode_domain_string(char **pbcc_area, struct cifs_ses *ses, bytes_ret = 0; } else bytes_ret = cifs_strtoUTF16((__le16 *) bcc_ptr, ses->domainName, - 256, nls_cp); + CIFS_MAX_DOMAINNAME_LEN, nls_cp); bcc_ptr += 2 * bytes_ret; bcc_ptr += 2; /* account for null terminator */ @@ -256,8 +256,8 @@ static void ascii_ssetup_strings(char **pbcc_area, struct cifs_ses *ses, /* copy domain */ if (ses->domainName != NULL) { - strncpy(bcc_ptr, ses->domainName, 256); - bcc_ptr += strnlen(ses->domainName, 256); + strncpy(bcc_ptr, ses->domainName, CIFS_MAX_DOMAINNAME_LEN); + bcc_ptr += strnlen(ses->domainName, CIFS_MAX_DOMAINNAME_LEN); } /* else we will send a null domain name so the server will default to its own domain */ *bcc_ptr = 0; From d1e8adc0865d7091f3c437d984a8527259b80378 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Wed, 7 Aug 2013 10:29:08 -0400 Subject: [PATCH 0528/1368] cifs: don't instantiate new dentries in readdir for inodes that need to be revalidated immediately commit 757c4f6260febff982276818bb946df89c1105aa upstream. David reported that commit c2b93e06 (cifs: only set ops for inodes in I_NEW state) caused a regression with mfsymlinks. Prior to that patch, if a mfsymlink dentry was instantiated at readdir time, the inode would get a new set of ops when it was revalidated. After that patch, this did not occur. This patch addresses this by simply skipping instantiating dentries in the readdir codepath when we know that they will need to be immediately revalidated. The next attempt to use that dentry will cause a new lookup to occur (which is basically what we want to happen anyway). Reported-and-Tested-by: David McBride Cc: "Stefan (metze) Metzmacher" Cc: Sachin Prabhu Signed-off-by: Jeff Layton Signed-off-by: Steve French Signed-off-by: Greg Kroah-Hartman --- fs/cifs/readdir.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c index 770d5a9781c1..036279c064ff 100644 --- a/fs/cifs/readdir.c +++ b/fs/cifs/readdir.c @@ -111,6 +111,14 @@ cifs_prime_dcache(struct dentry *parent, struct qstr *name, return; } + /* + * If we know that the inode will need to be revalidated immediately, + * then don't create a new dentry for it. We'll end up doing an on + * the wire call either way and this spares us an invalidation. + */ + if (fattr->cf_flags & CIFS_FATTR_NEED_REVAL) + return; + dentry = d_alloc(parent, name); if (!dentry) return; From 94bcc7deb8fed472360ad72ef3717c5409057fca Mon Sep 17 00:00:00 2001 From: Sergey Senozhatsky Date: Sat, 22 Jun 2013 17:21:00 +0300 Subject: [PATCH 0529/1368] zram: allow request end to coincide with disksize commit 75c7caf5a052ffd8db3312fa7864ee2d142890c4 upstream. Pass valid_io_request() checks if request end coincides with disksize (end equals bound), only fail if we attempt to read beyond the bound. mkfs.ext2 produces numerous errors: [ 2164.632747] quiet_error: 1 callbacks suppressed [ 2164.633260] Buffer I/O error on device zram0, logical block 153599 [ 2164.633265] lost page write due to I/O error on zram0 Signed-off-by: Sergey Senozhatsky Cc: Thomas Backlund Cc: Minchan Kim Signed-off-by: Greg Kroah-Hartman --- drivers/staging/zram/zram_drv.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/staging/zram/zram_drv.c b/drivers/staging/zram/zram_drv.c index 1742ce55b4a9..a333d44d0cff 100644 --- a/drivers/staging/zram/zram_drv.c +++ b/drivers/staging/zram/zram_drv.c @@ -432,7 +432,7 @@ static inline int valid_io_request(struct zram *zram, struct bio *bio) end = start + (bio->bi_size >> SECTOR_SHIFT); bound = zram->disksize >> SECTOR_SHIFT; /* out of range range */ - if (unlikely(start >= bound || end >= bound || start > end)) + if (unlikely(start >= bound || end > bound || start > end)) return 0; /* I/O request is valid */ From 6b9cb24deb2e856b111f77415e9a7da9b09020f6 Mon Sep 17 00:00:00 2001 From: Julius Werner Date: Tue, 30 Jul 2013 19:51:20 -0700 Subject: [PATCH 0530/1368] usb: core: don't try to reset_device() a port that got just disconnected commit 481f2d4f89f87a0baa26147f323380e31cfa7c44 upstream. The USB hub driver's event handler contains a check to catch SuperSpeed devices that transitioned into the SS.Inactive state and tries to fix them with a reset. It decides whether to do a plain hub port reset or call the usb_reset_device() function based on whether there was a device attached to the port. However, there are device/hub combinations (found with a JetFlash Transcend mass storage stick (8564:1000) on the root hub of an Intel LynxPoint PCH) which can transition to the SS.Inactive state on disconnect (and stay there long enough for the host to notice). In this case, above-mentioned reset check will call usb_reset_device() on the stale device data structure. The kernel will send pointless LPM control messages to the no longer connected device address and can even cause several 5 second khubd stalls on some (buggy?) host controllers, before finally accepting the device's fate amongst a flurry of error messages. This patch makes the choice of reset dependent on the port status that has just been read from the hub in addition to the existence of an in-kernel data structure for the device, and only proceeds with the more extensive reset if both are valid. Signed-off-by: Julius Werner Signed-off-by: Sarah Sharp Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/hub.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c index b93fc88c4823..da2905a1a18b 100644 --- a/drivers/usb/core/hub.c +++ b/drivers/usb/core/hub.c @@ -4796,7 +4796,8 @@ static void hub_events(void) hub->ports[i - 1]->child; dev_dbg(hub_dev, "warm reset port %d\n", i); - if (!udev) { + if (!udev || !(portstatus & + USB_PORT_STAT_CONNECTION)) { status = hub_port_reset(hub, i, NULL, HUB_BH_RESET_TIME, true); @@ -4806,8 +4807,8 @@ static void hub_events(void) usb_lock_device(udev); status = usb_reset_device(udev); usb_unlock_device(udev); + connect_change = 0; } - connect_change = 0; } if (connect_change) From a9800654317a89b0224edbe51edcde8c54be2acc Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Fri, 26 Jul 2013 17:12:56 +0200 Subject: [PATCH 0531/1368] debugfs: debugfs_remove_recursive() must not rely on list_empty(d_subdirs) commit 776164c1faac4966ab14418bb0922e1820da1d19 upstream. debugfs_remove_recursive() is wrong, 1. it wrongly assumes that !list_empty(d_subdirs) means that this dir should be removed. This is not that bad by itself, but: 2. if d_subdirs does not becomes empty after __debugfs_remove() it gives up and silently fails, it doesn't even try to remove other entries. However ->d_subdirs can be non-empty because it still has the already deleted !debugfs_positive() entries. 3. simple_release_fs() is called even if __debugfs_remove() fails. Suppose we have dir1/ dir2/ file2 file1 and someone opens dir1/dir2/file2. Now, debugfs_remove_recursive(dir1/dir2) succeeds, and dir1/dir2 goes away. But debugfs_remove_recursive(dir1) silently fails and doesn't remove this directory. Because it tries to delete (the already deleted) dir1/dir2/file2 again and then fails due to "Avoid infinite loop" logic. Test-case: #!/bin/sh cd /sys/kernel/debug/tracing echo 'p:probe/sigprocmask sigprocmask' >> kprobe_events sleep 1000 < events/probe/sigprocmask/id & echo -n >| kprobe_events [ -d events/probe ] && echo "ERR!! failed to rm probe" And after that it is not possible to create another probe entry. With this patch debugfs_remove_recursive() skips !debugfs_positive() files although this is not strictly needed. The most important change is that it does not try to make ->d_subdirs empty, it simply scans the whole list(s) recursively and removes as much as possible. Link: http://lkml.kernel.org/r/20130726151256.GC19472@redhat.com Acked-by: Greg Kroah-Hartman Signed-off-by: Oleg Nesterov Signed-off-by: Steven Rostedt Signed-off-by: Greg Kroah-Hartman --- fs/debugfs/inode.c | 71 +++++++++++++++------------------------------- 1 file changed, 23 insertions(+), 48 deletions(-) diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c index 4888cb3fdef7..c7c83ff0f752 100644 --- a/fs/debugfs/inode.c +++ b/fs/debugfs/inode.c @@ -533,8 +533,7 @@ EXPORT_SYMBOL_GPL(debugfs_remove); */ void debugfs_remove_recursive(struct dentry *dentry) { - struct dentry *child; - struct dentry *parent; + struct dentry *child, *next, *parent; if (IS_ERR_OR_NULL(dentry)) return; @@ -544,61 +543,37 @@ void debugfs_remove_recursive(struct dentry *dentry) return; parent = dentry; + down: mutex_lock(&parent->d_inode->i_mutex); + list_for_each_entry_safe(child, next, &parent->d_subdirs, d_u.d_child) { + if (!debugfs_positive(child)) + continue; - while (1) { - /* - * When all dentries under "parent" has been removed, - * walk up the tree until we reach our starting point. - */ - if (list_empty(&parent->d_subdirs)) { - mutex_unlock(&parent->d_inode->i_mutex); - if (parent == dentry) - break; - parent = parent->d_parent; - mutex_lock(&parent->d_inode->i_mutex); - } - child = list_entry(parent->d_subdirs.next, struct dentry, - d_u.d_child); - next_sibling: - - /* - * If "child" isn't empty, walk down the tree and - * remove all its descendants first. - */ + /* perhaps simple_empty(child) makes more sense */ if (!list_empty(&child->d_subdirs)) { mutex_unlock(&parent->d_inode->i_mutex); parent = child; - mutex_lock(&parent->d_inode->i_mutex); - continue; + goto down; } - __debugfs_remove(child, parent); - if (parent->d_subdirs.next == &child->d_u.d_child) { - /* - * Try the next sibling. - */ - if (child->d_u.d_child.next != &parent->d_subdirs) { - child = list_entry(child->d_u.d_child.next, - struct dentry, - d_u.d_child); - goto next_sibling; - } - - /* - * Avoid infinite loop if we fail to remove - * one dentry. - */ - mutex_unlock(&parent->d_inode->i_mutex); - break; - } - simple_release_fs(&debugfs_mount, &debugfs_mount_count); + up: + if (!__debugfs_remove(child, parent)) + simple_release_fs(&debugfs_mount, &debugfs_mount_count); } - parent = dentry->d_parent; - mutex_lock(&parent->d_inode->i_mutex); - __debugfs_remove(dentry, parent); mutex_unlock(&parent->d_inode->i_mutex); - simple_release_fs(&debugfs_mount, &debugfs_mount_count); + child = parent; + parent = parent->d_parent; + mutex_lock(&parent->d_inode->i_mutex); + + if (child != dentry) { + next = list_entry(child->d_u.d_child.next, struct dentry, + d_u.d_child); + goto up; + } + + if (!__debugfs_remove(child, parent)) + simple_release_fs(&debugfs_mount, &debugfs_mount_count); + mutex_unlock(&parent->d_inode->i_mutex); } EXPORT_SYMBOL_GPL(debugfs_remove_recursive); From 71986ee029bada49f4517dbc6b0caf2243a566a2 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 5 Aug 2013 17:37:37 +0400 Subject: [PATCH 0532/1368] reiserfs: fix deadlock in umount commit 672fe15d091ce76d6fb98e489962e9add7c1ba4c upstream. Since remove_proc_entry() started to wait for IO in progress (i.e. since 2007 or so), the locking in fs/reiserfs/proc.c became wrong; if procfs read happens between the moment when umount() locks the victim superblock and removal of /proc/fs/reiserfs//*, we'll get a deadlock - read will wait for s_umount (in sget(), called by r_start()), while umount will wait in remove_proc_entry() for that read to finish, holding s_umount all along. Fortunately, the same change allows a much simpler race avoidance - all we need to do is remove the procfs entries in the very beginning of reiserfs ->kill_sb(); that'll guarantee that pointer to superblock will remain valid for the duration for procfs IO, so we don't need sget() to keep the sucker alive. As the matter of fact, we can get rid of the home-grown iterator completely, and use single_open() instead. Signed-off-by: Al Viro Signed-off-by: Greg Kroah-Hartman --- fs/reiserfs/procfs.c | 99 +++++++++----------------------------------- fs/reiserfs/super.c | 3 +- 2 files changed, 20 insertions(+), 82 deletions(-) diff --git a/fs/reiserfs/procfs.c b/fs/reiserfs/procfs.c index 33532f79b4f7..1d48974c25dd 100644 --- a/fs/reiserfs/procfs.c +++ b/fs/reiserfs/procfs.c @@ -19,12 +19,13 @@ /* * LOCKING: * - * We rely on new Alexander Viro's super-block locking. + * These guys are evicted from procfs as the very first step in ->kill_sb(). * */ -static int show_version(struct seq_file *m, struct super_block *sb) +static int show_version(struct seq_file *m, void *unused) { + struct super_block *sb = m->private; char *format; if (REISERFS_SB(sb)->s_properties & (1 << REISERFS_3_6)) { @@ -66,8 +67,9 @@ static int show_version(struct seq_file *m, struct super_block *sb) #define DJP( x ) le32_to_cpu( jp -> x ) #define JF( x ) ( r -> s_journal -> x ) -static int show_super(struct seq_file *m, struct super_block *sb) +static int show_super(struct seq_file *m, void *unused) { + struct super_block *sb = m->private; struct reiserfs_sb_info *r = REISERFS_SB(sb); seq_printf(m, "state: \t%s\n" @@ -128,8 +130,9 @@ static int show_super(struct seq_file *m, struct super_block *sb) return 0; } -static int show_per_level(struct seq_file *m, struct super_block *sb) +static int show_per_level(struct seq_file *m, void *unused) { + struct super_block *sb = m->private; struct reiserfs_sb_info *r = REISERFS_SB(sb); int level; @@ -186,8 +189,9 @@ static int show_per_level(struct seq_file *m, struct super_block *sb) return 0; } -static int show_bitmap(struct seq_file *m, struct super_block *sb) +static int show_bitmap(struct seq_file *m, void *unused) { + struct super_block *sb = m->private; struct reiserfs_sb_info *r = REISERFS_SB(sb); seq_printf(m, "free_block: %lu\n" @@ -218,8 +222,9 @@ static int show_bitmap(struct seq_file *m, struct super_block *sb) return 0; } -static int show_on_disk_super(struct seq_file *m, struct super_block *sb) +static int show_on_disk_super(struct seq_file *m, void *unused) { + struct super_block *sb = m->private; struct reiserfs_sb_info *sb_info = REISERFS_SB(sb); struct reiserfs_super_block *rs = sb_info->s_rs; int hash_code = DFL(s_hash_function_code); @@ -261,8 +266,9 @@ static int show_on_disk_super(struct seq_file *m, struct super_block *sb) return 0; } -static int show_oidmap(struct seq_file *m, struct super_block *sb) +static int show_oidmap(struct seq_file *m, void *unused) { + struct super_block *sb = m->private; struct reiserfs_sb_info *sb_info = REISERFS_SB(sb); struct reiserfs_super_block *rs = sb_info->s_rs; unsigned int mapsize = le16_to_cpu(rs->s_v1.s_oid_cursize); @@ -291,8 +297,9 @@ static int show_oidmap(struct seq_file *m, struct super_block *sb) return 0; } -static int show_journal(struct seq_file *m, struct super_block *sb) +static int show_journal(struct seq_file *m, void *unused) { + struct super_block *sb = m->private; struct reiserfs_sb_info *r = REISERFS_SB(sb); struct reiserfs_super_block *rs = r->s_rs; struct journal_params *jp = &rs->s_v1.s_journal; @@ -383,92 +390,24 @@ static int show_journal(struct seq_file *m, struct super_block *sb) return 0; } -/* iterator */ -static int test_sb(struct super_block *sb, void *data) -{ - return data == sb; -} - -static int set_sb(struct super_block *sb, void *data) -{ - return -ENOENT; -} - -struct reiserfs_seq_private { - struct super_block *sb; - int (*show) (struct seq_file *, struct super_block *); -}; - -static void *r_start(struct seq_file *m, loff_t * pos) -{ - struct reiserfs_seq_private *priv = m->private; - loff_t l = *pos; - - if (l) - return NULL; - - if (IS_ERR(sget(&reiserfs_fs_type, test_sb, set_sb, 0, priv->sb))) - return NULL; - - up_write(&priv->sb->s_umount); - return priv->sb; -} - -static void *r_next(struct seq_file *m, void *v, loff_t * pos) -{ - ++*pos; - if (v) - deactivate_super(v); - return NULL; -} - -static void r_stop(struct seq_file *m, void *v) -{ - if (v) - deactivate_super(v); -} - -static int r_show(struct seq_file *m, void *v) -{ - struct reiserfs_seq_private *priv = m->private; - return priv->show(m, v); -} - -static const struct seq_operations r_ops = { - .start = r_start, - .next = r_next, - .stop = r_stop, - .show = r_show, -}; - static int r_open(struct inode *inode, struct file *file) { - struct reiserfs_seq_private *priv; - int ret = seq_open_private(file, &r_ops, - sizeof(struct reiserfs_seq_private)); - - if (!ret) { - struct seq_file *m = file->private_data; - priv = m->private; - priv->sb = proc_get_parent_data(inode); - priv->show = PDE_DATA(inode); - } - return ret; + return single_open(file, PDE_DATA(inode), + proc_get_parent_data(inode)); } static const struct file_operations r_file_operations = { .open = r_open, .read = seq_read, .llseek = seq_lseek, - .release = seq_release_private, - .owner = THIS_MODULE, + .release = single_release, }; static struct proc_dir_entry *proc_info_root = NULL; static const char proc_info_root_name[] = "fs/reiserfs"; static void add_file(struct super_block *sb, char *name, - int (*func) (struct seq_file *, struct super_block *)) + int (*func) (struct seq_file *, void *)) { proc_create_data(name, 0, REISERFS_SB(sb)->procdir, &r_file_operations, func); diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c index f8a23c3078f8..e2e202a07b31 100644 --- a/fs/reiserfs/super.c +++ b/fs/reiserfs/super.c @@ -499,6 +499,7 @@ int remove_save_link(struct inode *inode, int truncate) static void reiserfs_kill_sb(struct super_block *s) { if (REISERFS_SB(s)) { + reiserfs_proc_info_done(s); /* * Force any pending inode evictions to occur now. Any * inodes to be removed that have extended attributes @@ -554,8 +555,6 @@ static void reiserfs_put_super(struct super_block *s) REISERFS_SB(s)->reserved_blocks); } - reiserfs_proc_info_done(s); - reiserfs_write_unlock(s); mutex_destroy(&REISERFS_SB(s)->lock); kfree(s->s_fs_info); From 86813a19f976fe2bcac437e3816f12e7a3700186 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Thu, 25 Jul 2013 12:44:34 +0300 Subject: [PATCH 0533/1368] drm/i915: initialize gt_lock early with other spin locks commit 14c5cec5d0cd73e7e9d4fbea2bbfeea8f3ade871 upstream. commit 181d1b9e31c668259d3798c521672afb8edd355c Author: Daniel Vetter Date: Sun Jul 21 13:16:24 2013 +0200 drm/i915: fix up gt init sequence fallout moved dev_priv->gt_lock initialization after use. Do the initialization much earlier with other spin lock initializations. Reported-by: Sedat Dilek Signed-off-by: Jani Nikula Tested-by: Sedat Dilek Signed-off-by: Daniel Vetter Signed-off-by: Zhouping Liu Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/i915/i915_dma.c | 1 + drivers/gpu/drm/i915/intel_pm.c | 2 -- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c index f9685900bad8..17d9b0b6afc5 100644 --- a/drivers/gpu/drm/i915/i915_dma.c +++ b/drivers/gpu/drm/i915/i915_dma.c @@ -1514,6 +1514,7 @@ int i915_driver_load(struct drm_device *dev, unsigned long flags) spin_lock_init(&dev_priv->irq_lock); spin_lock_init(&dev_priv->gpu_error.lock); spin_lock_init(&dev_priv->rps.lock); + spin_lock_init(&dev_priv->gt_lock); mutex_init(&dev_priv->dpio_lock); mutex_init(&dev_priv->rps.hw_lock); mutex_init(&dev_priv->modeset_restore_lock); diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 2cfe9f6b0bf2..94ad6bc08260 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -4507,8 +4507,6 @@ void intel_gt_init(struct drm_device *dev) { struct drm_i915_private *dev_priv = dev->dev_private; - spin_lock_init(&dev_priv->gt_lock); - if (IS_VALLEYVIEW(dev)) { dev_priv->gt.force_wake_get = vlv_force_wake_get; dev_priv->gt.force_wake_put = vlv_force_wake_put; From 6b061129ce0f86d8ae2ef9e01979041676a47fe5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Thu, 1 Aug 2013 17:34:07 +0200 Subject: [PATCH 0534/1368] drm/radeon: fix halting UVD MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 2858c00d2823c83acce2a1175dbabb2cebee8678 upstream. Removing the clock/power or resetting the VCPU can cause hangs if that happens in the middle of a register write. Stall the memory and register bus before putting the VCPU into reset. Keep it in reset when unloading the module or suspending. v2: rebased on 3.10-stable tree Signed-off-by: Christian König Signed-off-by: Alex Deucher Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/radeon/evergreen.c | 3 ++- drivers/gpu/drm/radeon/ni.c | 3 ++- drivers/gpu/drm/radeon/r600.c | 28 +++++++++++++++++++++++----- drivers/gpu/drm/radeon/radeon_asic.h | 2 +- drivers/gpu/drm/radeon/rv770.c | 2 ++ drivers/gpu/drm/radeon/si.c | 6 ++++-- 6 files changed, 34 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/radeon/evergreen.c b/drivers/gpu/drm/radeon/evergreen.c index 0f89ce3d02b9..fdac91956420 100644 --- a/drivers/gpu/drm/radeon/evergreen.c +++ b/drivers/gpu/drm/radeon/evergreen.c @@ -4854,10 +4854,10 @@ int evergreen_resume(struct radeon_device *rdev) int evergreen_suspend(struct radeon_device *rdev) { r600_audio_fini(rdev); + r600_uvd_stop(rdev); radeon_uvd_suspend(rdev); r700_cp_stop(rdev); r600_dma_stop(rdev); - r600_uvd_rbc_stop(rdev); evergreen_irq_suspend(rdev); radeon_wb_disable(rdev); evergreen_pcie_gart_disable(rdev); @@ -4988,6 +4988,7 @@ void evergreen_fini(struct radeon_device *rdev) radeon_ib_pool_fini(rdev); radeon_irq_kms_fini(rdev); evergreen_pcie_gart_fini(rdev); + r600_uvd_stop(rdev); radeon_uvd_fini(rdev); r600_vram_scratch_fini(rdev); radeon_gem_fini(rdev); diff --git a/drivers/gpu/drm/radeon/ni.c b/drivers/gpu/drm/radeon/ni.c index 84583302b081..b6956f6a8bb0 100644 --- a/drivers/gpu/drm/radeon/ni.c +++ b/drivers/gpu/drm/radeon/ni.c @@ -2133,7 +2133,7 @@ int cayman_suspend(struct radeon_device *rdev) radeon_vm_manager_fini(rdev); cayman_cp_enable(rdev, false); cayman_dma_stop(rdev); - r600_uvd_rbc_stop(rdev); + r600_uvd_stop(rdev); radeon_uvd_suspend(rdev); evergreen_irq_suspend(rdev); radeon_wb_disable(rdev); @@ -2265,6 +2265,7 @@ void cayman_fini(struct radeon_device *rdev) radeon_vm_manager_fini(rdev); radeon_ib_pool_fini(rdev); radeon_irq_kms_fini(rdev); + r600_uvd_stop(rdev); radeon_uvd_fini(rdev); cayman_pcie_gart_fini(rdev); r600_vram_scratch_fini(rdev); diff --git a/drivers/gpu/drm/radeon/r600.c b/drivers/gpu/drm/radeon/r600.c index b60004e08207..5a6c7fa347e2 100644 --- a/drivers/gpu/drm/radeon/r600.c +++ b/drivers/gpu/drm/radeon/r600.c @@ -2675,12 +2675,29 @@ int r600_uvd_rbc_start(struct radeon_device *rdev) return 0; } -void r600_uvd_rbc_stop(struct radeon_device *rdev) +void r600_uvd_stop(struct radeon_device *rdev) { struct radeon_ring *ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX]; /* force RBC into idle state */ WREG32(UVD_RBC_RB_CNTL, 0x11010101); + + /* Stall UMC and register bus before resetting VCPU */ + WREG32_P(UVD_LMI_CTRL2, 1 << 8, ~(1 << 8)); + WREG32_P(UVD_RB_ARB_CTRL, 1 << 3, ~(1 << 3)); + mdelay(1); + + /* put VCPU into reset */ + WREG32(UVD_SOFT_RESET, VCPU_SOFT_RESET); + mdelay(5); + + /* disable VCPU clock */ + WREG32(UVD_VCPU_CNTL, 0x0); + + /* Unstall UMC and register bus */ + WREG32_P(UVD_LMI_CTRL2, 0, ~(1 << 8)); + WREG32_P(UVD_RB_ARB_CTRL, 0, ~(1 << 3)); + ring->ready = false; } @@ -2700,6 +2717,11 @@ int r600_uvd_init(struct radeon_device *rdev) /* disable interupt */ WREG32_P(UVD_MASTINT_EN, 0, ~(1 << 1)); + /* Stall UMC and register bus before resetting VCPU */ + WREG32_P(UVD_LMI_CTRL2, 1 << 8, ~(1 << 8)); + WREG32_P(UVD_RB_ARB_CTRL, 1 << 3, ~(1 << 3)); + mdelay(1); + /* put LMI, VCPU, RBC etc... into reset */ WREG32(UVD_SOFT_RESET, LMI_SOFT_RESET | VCPU_SOFT_RESET | LBSI_SOFT_RESET | RBC_SOFT_RESET | CSM_SOFT_RESET | @@ -2729,10 +2751,6 @@ int r600_uvd_init(struct radeon_device *rdev) WREG32(UVD_MPC_SET_ALU, 0); WREG32(UVD_MPC_SET_MUX, 0x88); - /* Stall UMC */ - WREG32_P(UVD_LMI_CTRL2, 1 << 8, ~(1 << 8)); - WREG32_P(UVD_RB_ARB_CTRL, 1 << 3, ~(1 << 3)); - /* take all subblocks out of reset, except VCPU */ WREG32(UVD_SOFT_RESET, VCPU_SOFT_RESET); mdelay(5); diff --git a/drivers/gpu/drm/radeon/radeon_asic.h b/drivers/gpu/drm/radeon/radeon_asic.h index a72759ede753..34223fc3d828 100644 --- a/drivers/gpu/drm/radeon/radeon_asic.h +++ b/drivers/gpu/drm/radeon/radeon_asic.h @@ -399,7 +399,7 @@ uint64_t r600_get_gpu_clock_counter(struct radeon_device *rdev); /* uvd */ int r600_uvd_init(struct radeon_device *rdev); int r600_uvd_rbc_start(struct radeon_device *rdev); -void r600_uvd_rbc_stop(struct radeon_device *rdev); +void r600_uvd_stop(struct radeon_device *rdev); int r600_uvd_ib_test(struct radeon_device *rdev, struct radeon_ring *ring); void r600_uvd_fence_emit(struct radeon_device *rdev, struct radeon_fence *fence); diff --git a/drivers/gpu/drm/radeon/rv770.c b/drivers/gpu/drm/radeon/rv770.c index 30ea14e8854c..f1010131bac0 100644 --- a/drivers/gpu/drm/radeon/rv770.c +++ b/drivers/gpu/drm/radeon/rv770.c @@ -1983,6 +1983,7 @@ int rv770_resume(struct radeon_device *rdev) int rv770_suspend(struct radeon_device *rdev) { r600_audio_fini(rdev); + r600_uvd_stop(rdev); radeon_uvd_suspend(rdev); r700_cp_stop(rdev); r600_dma_stop(rdev); @@ -2098,6 +2099,7 @@ void rv770_fini(struct radeon_device *rdev) radeon_ib_pool_fini(rdev); radeon_irq_kms_fini(rdev); rv770_pcie_gart_fini(rdev); + r600_uvd_stop(rdev); radeon_uvd_fini(rdev); r600_vram_scratch_fini(rdev); radeon_gem_fini(rdev); diff --git a/drivers/gpu/drm/radeon/si.c b/drivers/gpu/drm/radeon/si.c index a1b0da6b5808..f56ef18cf1b2 100644 --- a/drivers/gpu/drm/radeon/si.c +++ b/drivers/gpu/drm/radeon/si.c @@ -5473,7 +5473,7 @@ int si_suspend(struct radeon_device *rdev) si_cp_enable(rdev, false); cayman_dma_stop(rdev); if (rdev->has_uvd) { - r600_uvd_rbc_stop(rdev); + r600_uvd_stop(rdev); radeon_uvd_suspend(rdev); } si_irq_suspend(rdev); @@ -5613,8 +5613,10 @@ void si_fini(struct radeon_device *rdev) radeon_vm_manager_fini(rdev); radeon_ib_pool_fini(rdev); radeon_irq_kms_fini(rdev); - if (rdev->has_uvd) + if (rdev->has_uvd) { + r600_uvd_stop(rdev); radeon_uvd_fini(rdev); + } si_pcie_gart_fini(rdev); r600_vram_scratch_fini(rdev); radeon_gem_fini(rdev); From 23b6ec0bab2444d66ae61f1eb1dff852bfb4f149 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Mon, 5 Aug 2013 14:10:55 +0200 Subject: [PATCH 0535/1368] drm/radeon: only save UVD bo when we have open handles MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 4ad9c1c774c2af152283f510062094e768876f55 upstream. Otherwise just reinitialize from scratch on resume, and so make it more likely to succeed. v2: rebased for 3.10-stable tree Signed-off-by: Christian König Cc: stable@vger.kernel.org Signed-off-by: Alex Deucher Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/radeon/radeon.h | 2 +- drivers/gpu/drm/radeon/radeon_fence.c | 2 +- drivers/gpu/drm/radeon/radeon_uvd.c | 46 +++++++++++++++++++-------- drivers/gpu/drm/radeon/rv770.c | 2 +- 4 files changed, 36 insertions(+), 16 deletions(-) diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h index aad18e676826..bdd9d56812af 100644 --- a/drivers/gpu/drm/radeon/radeon.h +++ b/drivers/gpu/drm/radeon/radeon.h @@ -1146,7 +1146,6 @@ struct radeon_uvd { void *cpu_addr; uint64_t gpu_addr; void *saved_bo; - unsigned fw_size; atomic_t handles[RADEON_MAX_UVD_HANDLES]; struct drm_file *filp[RADEON_MAX_UVD_HANDLES]; struct delayed_work idle_work; @@ -1686,6 +1685,7 @@ struct radeon_device { const struct firmware *rlc_fw; /* r6/700 RLC firmware */ const struct firmware *mc_fw; /* NI MC firmware */ const struct firmware *ce_fw; /* SI CE firmware */ + const struct firmware *uvd_fw; /* UVD firmware */ struct r600_blit r600_blit; struct r600_vram_scratch vram_scratch; int msi_enabled; /* msi enabled */ diff --git a/drivers/gpu/drm/radeon/radeon_fence.c b/drivers/gpu/drm/radeon/radeon_fence.c index 7ddb0efe2408..ddb8f8e04eb5 100644 --- a/drivers/gpu/drm/radeon/radeon_fence.c +++ b/drivers/gpu/drm/radeon/radeon_fence.c @@ -782,7 +782,7 @@ int radeon_fence_driver_start_ring(struct radeon_device *rdev, int ring) } else { /* put fence directly behind firmware */ - index = ALIGN(rdev->uvd.fw_size, 8); + index = ALIGN(rdev->uvd_fw->size, 8); rdev->fence_drv[ring].cpu_addr = rdev->uvd.cpu_addr + index; rdev->fence_drv[ring].gpu_addr = rdev->uvd.gpu_addr + index; } diff --git a/drivers/gpu/drm/radeon/radeon_uvd.c b/drivers/gpu/drm/radeon/radeon_uvd.c index e052ff9f9472..97002a08211f 100644 --- a/drivers/gpu/drm/radeon/radeon_uvd.c +++ b/drivers/gpu/drm/radeon/radeon_uvd.c @@ -55,7 +55,6 @@ static void radeon_uvd_idle_work_handler(struct work_struct *work); int radeon_uvd_init(struct radeon_device *rdev) { struct platform_device *pdev; - const struct firmware *fw; unsigned long bo_size; const char *fw_name; int i, r; @@ -105,7 +104,7 @@ int radeon_uvd_init(struct radeon_device *rdev) return -EINVAL; } - r = request_firmware(&fw, fw_name, &pdev->dev); + r = request_firmware(&rdev->uvd_fw, fw_name, &pdev->dev); if (r) { dev_err(rdev->dev, "radeon_uvd: Can't load firmware \"%s\"\n", fw_name); @@ -115,7 +114,7 @@ int radeon_uvd_init(struct radeon_device *rdev) platform_device_unregister(pdev); - bo_size = RADEON_GPU_PAGE_ALIGN(fw->size + 8) + + bo_size = RADEON_GPU_PAGE_ALIGN(rdev->uvd_fw->size + 8) + RADEON_UVD_STACK_SIZE + RADEON_UVD_HEAP_SIZE; r = radeon_bo_create(rdev, bo_size, PAGE_SIZE, true, RADEON_GEM_DOMAIN_VRAM, NULL, &rdev->uvd.vcpu_bo); @@ -148,12 +147,6 @@ int radeon_uvd_init(struct radeon_device *rdev) radeon_bo_unreserve(rdev->uvd.vcpu_bo); - rdev->uvd.fw_size = fw->size; - memset(rdev->uvd.cpu_addr, 0, bo_size); - memcpy(rdev->uvd.cpu_addr, fw->data, fw->size); - - release_firmware(fw); - for (i = 0; i < RADEON_MAX_UVD_HANDLES; ++i) { atomic_set(&rdev->uvd.handles[i], 0); rdev->uvd.filp[i] = NULL; @@ -177,33 +170,60 @@ void radeon_uvd_fini(struct radeon_device *rdev) } radeon_bo_unref(&rdev->uvd.vcpu_bo); + + release_firmware(rdev->uvd_fw); } int radeon_uvd_suspend(struct radeon_device *rdev) { unsigned size; + void *ptr; + int i; if (rdev->uvd.vcpu_bo == NULL) return 0; + for (i = 0; i < RADEON_MAX_UVD_HANDLES; ++i) + if (atomic_read(&rdev->uvd.handles[i])) + break; + + if (i == RADEON_MAX_UVD_HANDLES) + return 0; + size = radeon_bo_size(rdev->uvd.vcpu_bo); + size -= rdev->uvd_fw->size; + + ptr = rdev->uvd.cpu_addr; + ptr += rdev->uvd_fw->size; + rdev->uvd.saved_bo = kmalloc(size, GFP_KERNEL); - memcpy(rdev->uvd.saved_bo, rdev->uvd.cpu_addr, size); + memcpy(rdev->uvd.saved_bo, ptr, size); return 0; } int radeon_uvd_resume(struct radeon_device *rdev) { + unsigned size; + void *ptr; + if (rdev->uvd.vcpu_bo == NULL) return -EINVAL; + memcpy(rdev->uvd.cpu_addr, rdev->uvd_fw->data, rdev->uvd_fw->size); + + size = radeon_bo_size(rdev->uvd.vcpu_bo); + size -= rdev->uvd_fw->size; + + ptr = rdev->uvd.cpu_addr; + ptr += rdev->uvd_fw->size; + if (rdev->uvd.saved_bo != NULL) { - unsigned size = radeon_bo_size(rdev->uvd.vcpu_bo); - memcpy(rdev->uvd.cpu_addr, rdev->uvd.saved_bo, size); + memcpy(ptr, rdev->uvd.saved_bo, size); kfree(rdev->uvd.saved_bo); rdev->uvd.saved_bo = NULL; - } + } else + memset(ptr, 0, size); return 0; } diff --git a/drivers/gpu/drm/radeon/rv770.c b/drivers/gpu/drm/radeon/rv770.c index f1010131bac0..0804d6037ec4 100644 --- a/drivers/gpu/drm/radeon/rv770.c +++ b/drivers/gpu/drm/radeon/rv770.c @@ -813,7 +813,7 @@ int rv770_uvd_resume(struct radeon_device *rdev) /* programm the VCPU memory controller bits 0-27 */ addr = rdev->uvd.gpu_addr >> 3; - size = RADEON_GPU_PAGE_ALIGN(rdev->uvd.fw_size + 4) >> 3; + size = RADEON_GPU_PAGE_ALIGN(rdev->uvd_fw->size + 4) >> 3; WREG32(UVD_VCPU_CACHE_OFFSET0, addr); WREG32(UVD_VCPU_CACHE_SIZE0, size); From 9ff2cb528a3eb18da1d94ac08abd365b8bb9abae Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Sun, 4 Aug 2013 12:13:17 -0400 Subject: [PATCH 0536/1368] drm/radeon: always program the MC on startup commit 6fab3febf6d949b0a12b1e4e73db38e4a177a79e upstream. For r6xx+ asics. This mirrors the behavior of pre-r6xx asics. We need to program the MC even if something else in startup() fails. Failure to do so results in an unusable GPU. Based on a fix from: Mark Kettenis Signed-off-by: Alex Deucher [ rebased for 3.10 and dropped the drivers/gpu/drm/radeon/cik.c bit as it's 3.11 specific code / tmb ] Signed-off-by: Thomas Backlund Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/radeon/evergreen.c | 3 ++- drivers/gpu/drm/radeon/ni.c | 3 ++- drivers/gpu/drm/radeon/r600.c | 3 ++- drivers/gpu/drm/radeon/rv770.c | 3 ++- drivers/gpu/drm/radeon/si.c | 3 ++- 5 files changed, 10 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/radeon/evergreen.c b/drivers/gpu/drm/radeon/evergreen.c index fdac91956420..687b421f75c3 100644 --- a/drivers/gpu/drm/radeon/evergreen.c +++ b/drivers/gpu/drm/radeon/evergreen.c @@ -4681,6 +4681,8 @@ static int evergreen_startup(struct radeon_device *rdev) /* enable pcie gen2 link */ evergreen_pcie_gen2_enable(rdev); + evergreen_mc_program(rdev); + if (ASIC_IS_DCE5(rdev)) { if (!rdev->me_fw || !rdev->pfp_fw || !rdev->rlc_fw || !rdev->mc_fw) { r = ni_init_microcode(rdev); @@ -4708,7 +4710,6 @@ static int evergreen_startup(struct radeon_device *rdev) if (r) return r; - evergreen_mc_program(rdev); if (rdev->flags & RADEON_IS_AGP) { evergreen_agp_enable(rdev); } else { diff --git a/drivers/gpu/drm/radeon/ni.c b/drivers/gpu/drm/radeon/ni.c index b6956f6a8bb0..3bf43a16adcc 100644 --- a/drivers/gpu/drm/radeon/ni.c +++ b/drivers/gpu/drm/radeon/ni.c @@ -1929,6 +1929,8 @@ static int cayman_startup(struct radeon_device *rdev) /* enable pcie gen2 link */ evergreen_pcie_gen2_enable(rdev); + evergreen_mc_program(rdev); + if (rdev->flags & RADEON_IS_IGP) { if (!rdev->me_fw || !rdev->pfp_fw || !rdev->rlc_fw) { r = ni_init_microcode(rdev); @@ -1957,7 +1959,6 @@ static int cayman_startup(struct radeon_device *rdev) if (r) return r; - evergreen_mc_program(rdev); r = cayman_pcie_gart_enable(rdev); if (r) return r; diff --git a/drivers/gpu/drm/radeon/r600.c b/drivers/gpu/drm/radeon/r600.c index 5a6c7fa347e2..f19620b472f5 100644 --- a/drivers/gpu/drm/radeon/r600.c +++ b/drivers/gpu/drm/radeon/r600.c @@ -3224,6 +3224,8 @@ static int r600_startup(struct radeon_device *rdev) /* enable pcie gen2 link */ r600_pcie_gen2_enable(rdev); + r600_mc_program(rdev); + if (!rdev->me_fw || !rdev->pfp_fw || !rdev->rlc_fw) { r = r600_init_microcode(rdev); if (r) { @@ -3236,7 +3238,6 @@ static int r600_startup(struct radeon_device *rdev) if (r) return r; - r600_mc_program(rdev); if (rdev->flags & RADEON_IS_AGP) { r600_agp_enable(rdev); } else { diff --git a/drivers/gpu/drm/radeon/rv770.c b/drivers/gpu/drm/radeon/rv770.c index 0804d6037ec4..bcc68ec204ad 100644 --- a/drivers/gpu/drm/radeon/rv770.c +++ b/drivers/gpu/drm/radeon/rv770.c @@ -1829,6 +1829,8 @@ static int rv770_startup(struct radeon_device *rdev) /* enable pcie gen2 link */ rv770_pcie_gen2_enable(rdev); + rv770_mc_program(rdev); + if (!rdev->me_fw || !rdev->pfp_fw || !rdev->rlc_fw) { r = r600_init_microcode(rdev); if (r) { @@ -1841,7 +1843,6 @@ static int rv770_startup(struct radeon_device *rdev) if (r) return r; - rv770_mc_program(rdev); if (rdev->flags & RADEON_IS_AGP) { rv770_agp_enable(rdev); } else { diff --git a/drivers/gpu/drm/radeon/si.c b/drivers/gpu/drm/radeon/si.c index f56ef18cf1b2..1a96a16b9996 100644 --- a/drivers/gpu/drm/radeon/si.c +++ b/drivers/gpu/drm/radeon/si.c @@ -5270,6 +5270,8 @@ static int si_startup(struct radeon_device *rdev) struct radeon_ring *ring; int r; + si_mc_program(rdev); + if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw || !rdev->rlc_fw || !rdev->mc_fw) { r = si_init_microcode(rdev); @@ -5289,7 +5291,6 @@ static int si_startup(struct radeon_device *rdev) if (r) return r; - si_mc_program(rdev); r = si_pcie_gart_enable(rdev); if (r) return r; From c7cf2a7505ba6859fa2f5a2055dee3d98b5b3e41 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 14 Mar 2013 15:21:36 +0100 Subject: [PATCH 0537/1368] SCSI: nsp32: use mdelay instead of large udelay constants commit b497ceb964a80ebada3b9b3cea4261409039e25a upstream. ARM cannot handle udelay for more than 2 miliseconds, so we should use mdelay instead for those. Signed-off-by: Arnd Bergmann Acked-by: GOTO Masanori Cc: YOKOTA Hiroshi Cc: "James E.J. Bottomley" Cc: Guenter Roeck Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/nsp32.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/nsp32.c b/drivers/scsi/nsp32.c index 1e3879dcbdcc..0665f9cfdb02 100644 --- a/drivers/scsi/nsp32.c +++ b/drivers/scsi/nsp32.c @@ -2899,7 +2899,7 @@ static void nsp32_do_bus_reset(nsp32_hw_data *data) * reset SCSI bus */ nsp32_write1(base, SCSI_BUS_CONTROL, BUSCTL_RST); - udelay(RESET_HOLD_TIME); + mdelay(RESET_HOLD_TIME / 1000); nsp32_write1(base, SCSI_BUS_CONTROL, 0); for(i = 0; i < 5; i++) { intrdat = nsp32_read2(base, IRQ_STATUS); /* dummy read */ From ddb2a2d9e0b2c87696d1a81b57edf545e201c143 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 23 Apr 2013 15:47:50 +0200 Subject: [PATCH 0538/1368] mtd: omap2: allow bulding as a module commit 930d800bded771b26d9944c47810829130ff7c8c upstream. The omap2 nand device driver calls into the the elm code, which can be a loadable module, and in that case it cannot be built-in itself. I can see no reason why the omap2 driver cannot also be a module, so let's make the option "tristate" in Kconfig to fix this allmodconfig build error: ERROR: "elm_config" [drivers/mtd/nand/omap2.ko] undefined! ERROR: "elm_decode_bch_error_page" [drivers/mtd/nand/omap2.ko] undefined! Signed-off-by: Arnd Bergmann Acked-by: Tony Lindgren Cc: David Woodhouse Cc: Artem Bityutskiy Cc: Afzal Mohammed Cc: Russell King Cc: Guenter Roeck Signed-off-by: Greg Kroah-Hartman --- drivers/mtd/nand/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/mtd/nand/Kconfig b/drivers/mtd/nand/Kconfig index a60f6c17f57b..50543f166215 100644 --- a/drivers/mtd/nand/Kconfig +++ b/drivers/mtd/nand/Kconfig @@ -95,7 +95,7 @@ config MTD_NAND_OMAP2 config MTD_NAND_OMAP_BCH depends on MTD_NAND && MTD_NAND_OMAP2 && ARCH_OMAP3 - bool "Enable support for hardware BCH error correction" + tristate "Enable support for hardware BCH error correction" default n select BCH select BCH_CONST_PARAMS From 5954bd0f29212194f0beb6dab03f9c26a482c560 Mon Sep 17 00:00:00 2001 From: Markos Chandras Date: Mon, 17 Jun 2013 08:09:00 +0000 Subject: [PATCH 0539/1368] MIPS: Expose missing pci_io{map,unmap} declarations commit 78857614104a26cdada4c53eea104752042bf5a1 upstream. The GENERIC_PCI_IOMAP does not depend on CONFIG_PCI so move it to the CONFIG_MIPS symbol so it's always selected for MIPS. This fixes the missing pci_iomap declaration for MIPS. Moreover, the pci_iounmap function was not defined in the io.h header file if the CONFIG_PCI symbol is not set, but it should since MIPS is not using CONFIG_GENERIC_IOMAP. This fixes the following problem on a allyesconfig: drivers/net/ethernet/3com/3c59x.c:1031:2: error: implicit declaration of function 'pci_iomap' [-Werror=implicit-function-declaration] drivers/net/ethernet/3com/3c59x.c:1044:3: error: implicit declaration of function 'pci_iounmap' [-Werror=implicit-function-declaration] Signed-off-by: Markos Chandras Acked-by: Steven J. Hill Patchwork: https://patchwork.linux-mips.org/patch/5478/ Signed-off-by: Ralf Baechle Cc: Guenter Roeck Signed-off-by: Greg Kroah-Hartman --- arch/mips/Kconfig | 2 +- arch/mips/include/asm/io.h | 5 +++++ 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig index 7a58ab933b20..e53e2b40d695 100644 --- a/arch/mips/Kconfig +++ b/arch/mips/Kconfig @@ -27,6 +27,7 @@ config MIPS select HAVE_GENERIC_HARDIRQS select GENERIC_IRQ_PROBE select GENERIC_IRQ_SHOW + select GENERIC_PCI_IOMAP select HAVE_ARCH_JUMP_LABEL select ARCH_WANT_IPC_PARSE_VERSION select IRQ_FORCED_THREADING @@ -2412,7 +2413,6 @@ config PCI bool "Support for PCI controller" depends on HW_HAS_PCI select PCI_DOMAINS - select GENERIC_PCI_IOMAP select NO_GENERIC_PCI_IOPORT_MAP help Find out whether you have a PCI motherboard. PCI is the name of a diff --git a/arch/mips/include/asm/io.h b/arch/mips/include/asm/io.h index b7e59853fd33..b84e1fb3fabf 100644 --- a/arch/mips/include/asm/io.h +++ b/arch/mips/include/asm/io.h @@ -170,6 +170,11 @@ static inline void * isa_bus_to_virt(unsigned long address) extern void __iomem * __ioremap(phys_t offset, phys_t size, unsigned long flags); extern void __iounmap(const volatile void __iomem *addr); +#ifndef CONFIG_PCI +struct pci_dev; +static inline void pci_iounmap(struct pci_dev *dev, void __iomem *addr) {} +#endif + static inline void __iomem * __ioremap_mode(phys_t offset, unsigned long size, unsigned long flags) { From 519be4566e2e60293d55bcfec71490af8e61b9e7 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 14 Aug 2013 22:59:42 -0700 Subject: [PATCH 0540/1368] Linux 3.10.7 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index fd92ffba51c3..33e36aba5fa2 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,6 @@ VERSION = 3 PATCHLEVEL = 10 -SUBLEVEL = 6 +SUBLEVEL = 7 EXTRAVERSION = NAME = TOSSUG Baby Fish From 78de90f9f3cbbd93d467cae8ded64f33a159dfe1 Mon Sep 17 00:00:00 2001 From: Vince Weaver Date: Fri, 2 Aug 2013 10:47:34 -0400 Subject: [PATCH 0541/1368] perf/x86: Fix intel QPI uncore event definitions commit c9601247f8f3fdc18aed7ed7e490e8dfcd07f122 upstream. John McCalpin reports that the "drs_data" and "ncb_data" QPI uncore events are missing the "extra bit" and always return zero values unless the bit is properly set. More details from him: According to the Xeon E5-2600 Product Family Uncore Performance Monitoring Guide, Table 2-94, about 1/2 of the QPI Link Layer events (including the ones that "perf" calls "drs_data" and "ncb_data") require that the "extra bit" be set. This was confusing for a while -- a note at the bottom of page 94 says that the "extra bit" is bit 16 of the control register. Unfortunately, Table 2-86 clearly says that bit 16 is reserved and must be zero. Looking around a bit, I found that bit 21 appears to be the correct "extra bit", and further investigation shows that "perf" actually agrees with me: [root@c560-003.stampede]# cat /sys/bus/event_source/devices/uncore_qpi_0/format/event config:0-7,21 So the command # perf -e "uncore_qpi_0/event=drs_data/" Is the same as # perf -e "uncore_qpi_0/event=0x02,umask=0x08/" While it should be # perf -e "uncore_qpi_0/event=0x102,umask=0x08/" I confirmed that this last version gives results that agree with the amount of data that I expected the STREAM benchmark to move across the QPI link in the second (cross-chip) test of the original script. Reported-by: John McCalpin Signed-off-by: Vince Weaver Cc: zheng.z.yan@intel.com Cc: Peter Zijlstra Cc: Arnaldo Carvalho de Melo Cc: Paul Mackerras Link: http://lkml.kernel.org/r/alpine.DEB.2.10.1308021037280.26119@vincent-weaver-1.um.maine.edu Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/perf_event_intel_uncore.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.c b/arch/x86/kernel/cpu/perf_event_intel_uncore.c index 52441a2af538..8aac56bda7dc 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_uncore.c +++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.c @@ -314,8 +314,8 @@ static struct uncore_event_desc snbep_uncore_imc_events[] = { static struct uncore_event_desc snbep_uncore_qpi_events[] = { INTEL_UNCORE_EVENT_DESC(clockticks, "event=0x14"), INTEL_UNCORE_EVENT_DESC(txl_flits_active, "event=0x00,umask=0x06"), - INTEL_UNCORE_EVENT_DESC(drs_data, "event=0x02,umask=0x08"), - INTEL_UNCORE_EVENT_DESC(ncb_data, "event=0x03,umask=0x04"), + INTEL_UNCORE_EVENT_DESC(drs_data, "event=0x102,umask=0x08"), + INTEL_UNCORE_EVENT_DESC(ncb_data, "event=0x103,umask=0x04"), { /* end: all zeroes */ }, }; From b6664dfcf0573495cdea96d2dd37b7d30290ae25 Mon Sep 17 00:00:00 2001 From: Stephen Boyd Date: Wed, 7 Aug 2013 16:18:08 -0700 Subject: [PATCH 0542/1368] perf/arm: Fix armpmu_map_hw_event() commit b88a2595b6d8aedbd275c07dfa784657b4f757eb upstream. Fix constraint check in armpmu_map_hw_event(). Reported-and-tested-by: Vince Weaver Signed-off-by: Ingo Molnar Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- arch/arm/kernel/perf_event.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/arch/arm/kernel/perf_event.c b/arch/arm/kernel/perf_event.c index d9f5cd4e533f..21f77906602c 100644 --- a/arch/arm/kernel/perf_event.c +++ b/arch/arm/kernel/perf_event.c @@ -53,7 +53,12 @@ armpmu_map_cache_event(const unsigned (*cache_map) static int armpmu_map_hw_event(const unsigned (*event_map)[PERF_COUNT_HW_MAX], u64 config) { - int mapping = (*event_map)[config]; + int mapping; + + if (config >= PERF_COUNT_HW_MAX) + return -ENOENT; + + mapping = (*event_map)[config]; return mapping == HW_OP_UNSUPPORTED ? -ENOENT : mapping; } From 0dcf19b4fb41449de4d1f953f86aa6a90accdff5 Mon Sep 17 00:00:00 2001 From: Andrey Vagin Date: Tue, 13 Aug 2013 16:00:47 -0700 Subject: [PATCH 0543/1368] memcg: don't initialize kmem-cache destroying work for root caches commit 3e6b11df245180949938734bc192eaf32f3a06b3 upstream. struct memcg_cache_params has a union. Different parts of this union are used for root and non-root caches. A part with destroying work is used only for non-root caches. I fixed the same problem in another place v3.9-rc1-16204-gf101a94, but didn't notice this one. This patch fixes the kernel panic: [ 46.848187] BUG: unable to handle kernel paging request at 000000fffffffeb8 [ 46.849026] IP: [] kmem_cache_destroy_memcg_children+0x6c/0xc0 [ 46.849092] PGD 0 [ 46.849092] Oops: 0000 [#1] SMP ... Signed-off-by: Andrey Vagin Cc: Glauber Costa Cc: Johannes Weiner Acked-by: Michal Hocko Cc: Balbir Singh Cc: KAMEZAWA Hiroyuki Cc: Konstantin Khlebnikov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- mm/memcontrol.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 15b040904dc3..82a187aea4c0 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -3186,11 +3186,11 @@ int memcg_register_cache(struct mem_cgroup *memcg, struct kmem_cache *s, if (!s->memcg_params) return -ENOMEM; - INIT_WORK(&s->memcg_params->destroy, - kmem_cache_destroy_work_func); if (memcg) { s->memcg_params->memcg = memcg; s->memcg_params->root_cache = root_cache; + INIT_WORK(&s->memcg_params->destroy, + kmem_cache_destroy_work_func); } else s->memcg_params->is_root_cache = true; From 4f01c72ef36d3305d6273fe7f1f6670c52745c3d Mon Sep 17 00:00:00 2001 From: Michal Simek Date: Tue, 13 Aug 2013 16:00:53 -0700 Subject: [PATCH 0544/1368] microblaze: fix clone syscall commit dfa9771a7c4784bafd0673bc7abcee3813088b77 upstream. Fix inadvertent breakage in the clone syscall ABI for Microblaze that was introduced in commit f3268edbe6fe ("microblaze: switch to generic fork/vfork/clone"). The Microblaze syscall ABI for clone takes the parent tid address in the 4th argument; the third argument slot is used for the stack size. The incorrectly-used CLONE_BACKWARDS type assigned parent tid to the 3rd slot. This commit restores the original ABI so that existing userspace libc code will work correctly. All kernel versions from v3.8-rc1 were affected. Signed-off-by: Michal Simek Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- arch/Kconfig | 6 ++++++ arch/microblaze/Kconfig | 2 +- include/linux/syscalls.h | 5 +++++ kernel/fork.c | 6 ++++++ 4 files changed, 18 insertions(+), 1 deletion(-) diff --git a/arch/Kconfig b/arch/Kconfig index a4429bcd609e..00e3702ec79b 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -404,6 +404,12 @@ config CLONE_BACKWARDS2 help Architecture has the first two arguments of clone(2) swapped. +config CLONE_BACKWARDS3 + bool + help + Architecture has tls passed as the 3rd argument of clone(2), + not the 5th one. + config ODD_RT_SIGACTION bool help diff --git a/arch/microblaze/Kconfig b/arch/microblaze/Kconfig index d22a4ecffff4..4fab52294d98 100644 --- a/arch/microblaze/Kconfig +++ b/arch/microblaze/Kconfig @@ -28,7 +28,7 @@ config MICROBLAZE select GENERIC_CLOCKEVENTS select GENERIC_IDLE_POLL_SETUP select MODULES_USE_ELF_RELA - select CLONE_BACKWARDS + select CLONE_BACKWARDS3 config SWAP def_bool n diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 4147d700a293..84662ecc7b51 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -802,9 +802,14 @@ asmlinkage long sys_vfork(void); asmlinkage long sys_clone(unsigned long, unsigned long, int __user *, int, int __user *); #else +#ifdef CONFIG_CLONE_BACKWARDS3 +asmlinkage long sys_clone(unsigned long, unsigned long, int, int __user *, + int __user *, int); +#else asmlinkage long sys_clone(unsigned long, unsigned long, int __user *, int __user *, int); #endif +#endif asmlinkage long sys_execve(const char __user *filename, const char __user *const __user *argv, diff --git a/kernel/fork.c b/kernel/fork.c index 987b28a1f01b..ffbc0904794e 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1675,6 +1675,12 @@ SYSCALL_DEFINE5(clone, unsigned long, newsp, unsigned long, clone_flags, int __user *, parent_tidptr, int __user *, child_tidptr, int, tls_val) +#elif defined(CONFIG_CLONE_BACKWARDS3) +SYSCALL_DEFINE6(clone, unsigned long, clone_flags, unsigned long, newsp, + int, stack_size, + int __user *, parent_tidptr, + int __user *, child_tidptr, + int, tls_val) #else SYSCALL_DEFINE5(clone, unsigned long, clone_flags, unsigned long, newsp, int __user *, parent_tidptr, From f6c19e2f7d9204bca6576f03a117bec4eaaa9b5b Mon Sep 17 00:00:00 2001 From: Radu Caragea Date: Tue, 13 Aug 2013 16:00:59 -0700 Subject: [PATCH 0545/1368] x86 get_unmapped_area(): use proper mmap base for bottom-up direction commit df54d6fa54275ce59660453e29d1228c2b45a826 upstream. When the stack is set to unlimited, the bottomup direction is used for mmap-ings but the mmap_base is not used and thus effectively renders ASLR for mmapings along with PIE useless. Reviewed-by: Rik van Riel Cc: Michel Lespinasse Cc: Oleg Nesterov Acked-by: Ingo Molnar Cc: Adrian Sendroiu Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/sys_x86_64.c | 2 +- arch/x86/mm/mmap.c | 2 +- include/linux/sched.h | 1 + 3 files changed, 3 insertions(+), 2 deletions(-) diff --git a/arch/x86/kernel/sys_x86_64.c b/arch/x86/kernel/sys_x86_64.c index dbded5aedb81..48f8375e4c6b 100644 --- a/arch/x86/kernel/sys_x86_64.c +++ b/arch/x86/kernel/sys_x86_64.c @@ -101,7 +101,7 @@ static void find_start_end(unsigned long flags, unsigned long *begin, *begin = new_begin; } } else { - *begin = TASK_UNMAPPED_BASE; + *begin = mmap_legacy_base(); *end = TASK_SIZE; } } diff --git a/arch/x86/mm/mmap.c b/arch/x86/mm/mmap.c index 845df6835f9f..c1af32385ab1 100644 --- a/arch/x86/mm/mmap.c +++ b/arch/x86/mm/mmap.c @@ -98,7 +98,7 @@ static unsigned long mmap_base(void) * Bottom-up (legacy) layout on X86_32 did not support randomization, X86_64 * does, but not when emulating X86_32 */ -static unsigned long mmap_legacy_base(void) +unsigned long mmap_legacy_base(void) { if (mmap_is_ia32()) return TASK_UNMAPPED_BASE; diff --git a/include/linux/sched.h b/include/linux/sched.h index 178a8d909f14..3aeb14b06242 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -314,6 +314,7 @@ struct nsproxy; struct user_namespace; #ifdef CONFIG_MMU +extern unsigned long mmap_legacy_base(void); extern void arch_pick_mmap_layout(struct mm_struct *mm); extern unsigned long arch_get_unmapped_area(struct file *, unsigned long, unsigned long, From f30d87b004dcb4b260dcb2667d5ef6998f4aac1f Mon Sep 17 00:00:00 2001 From: yonghua zheng Date: Tue, 13 Aug 2013 16:01:03 -0700 Subject: [PATCH 0546/1368] fs/proc/task_mmu.c: fix buffer overflow in add_page_map() commit 8c8296223f3abb142be8fc31711b18a704c0e7d8 upstream. Recently we met quite a lot of random kernel panic issues after enabling CONFIG_PROC_PAGE_MONITOR. After debuggind we found this has something to do with following bug in pagemap: In struct pagemapread: struct pagemapread { int pos, len; pagemap_entry_t *buffer; bool v2; }; pos is number of PM_ENTRY_BYTES in buffer, but len is the size of buffer, it is a mistake to compare pos and len in add_page_map() for checking buffer is full or not, and this can lead to buffer overflow and random kernel panic issue. Correct len to be total number of PM_ENTRY_BYTES in buffer. [akpm@linux-foundation.org: document pagemapread.pos and .len units, fix PM_ENTRY_BYTES definition] Signed-off-by: Yonghua Zheng Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- fs/proc/task_mmu.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index 3e636d864d56..65fc60a07c47 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -792,14 +792,14 @@ typedef struct { } pagemap_entry_t; struct pagemapread { - int pos, len; + int pos, len; /* units: PM_ENTRY_BYTES, not bytes */ pagemap_entry_t *buffer; }; #define PAGEMAP_WALK_SIZE (PMD_SIZE) #define PAGEMAP_WALK_MASK (PMD_MASK) -#define PM_ENTRY_BYTES sizeof(u64) +#define PM_ENTRY_BYTES sizeof(pagemap_entry_t) #define PM_STATUS_BITS 3 #define PM_STATUS_OFFSET (64 - PM_STATUS_BITS) #define PM_STATUS_MASK (((1LL << PM_STATUS_BITS) - 1) << PM_STATUS_OFFSET) @@ -1038,8 +1038,8 @@ static ssize_t pagemap_read(struct file *file, char __user *buf, if (!count) goto out_task; - pm.len = PM_ENTRY_BYTES * (PAGEMAP_WALK_SIZE >> PAGE_SHIFT); - pm.buffer = kmalloc(pm.len, GFP_TEMPORARY); + pm.len = (PAGEMAP_WALK_SIZE >> PAGE_SHIFT); + pm.buffer = kmalloc(pm.len * PM_ENTRY_BYTES, GFP_TEMPORARY); ret = -ENOMEM; if (!pm.buffer) goto out_task; From dead45bd0527751cc9e71c0547d8f19f498441ed Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 26 Jul 2013 23:48:42 +0200 Subject: [PATCH 0547/1368] sched: Ensure update_cfs_shares() is called for parents of continuously-running tasks commit bf0bd948d1682e3996adc093b43021ed391983e6 upstream. We typically update a task_group's shares within the dequeue/enqueue path. However, continuously running tasks sharing a CPU are not subject to these updates as they are only put/picked. Unfortunately, when we reverted f269ae046 (in 17bc14b7), we lost the augmenting periodic update that was supposed to account for this; resulting in a potential loss of fairness. To fix this, re-introduce the explicit update in update_cfs_rq_blocked_load() [called via entity_tick()]. Reported-by: Max Hailperin Signed-off-by: Peter Zijlstra Reviewed-by: Paul Turner Link: http://lkml.kernel.org/n/tip-9545m3apw5d93ubyrotrj31y@git.kernel.org Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- kernel/sched/fair.c | 1 + 1 file changed, 1 insertion(+) diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index c61a614465c8..03b73bea33d6 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -1984,6 +1984,7 @@ entity_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr, int queued) */ update_entity_load_avg(curr, 1); update_cfs_rq_blocked_load(cfs_rq, 1); + update_cfs_shares(cfs_rq); #ifdef CONFIG_SCHED_HRTICK /* From a6ad83fce072869921cef7c6f4e86bd91639dc34 Mon Sep 17 00:00:00 2001 From: Jianpeng Ma Date: Wed, 3 Jul 2013 13:25:24 +0200 Subject: [PATCH 0548/1368] elevator: Fix a race in elevator switching commit d50235b7bc3ee0a0427984d763ea7534149531b4 upstream. There's a race between elevator switching and normal io operation. Because the allocation of struct elevator_queue and struct elevator_data don't in a atomic operation.So there are have chance to use NULL ->elevator_data. For example: Thread A: Thread B blk_queu_bio elevator_switch spin_lock_irq(q->queue_block) elevator_alloc elv_merge elevator_init_fn Because call elevator_alloc, it can't hold queue_lock and the ->elevator_data is NULL.So at the same time, threadA call elv_merge and nedd some info of elevator_data.So the crash happened. Move the elevator_alloc into func elevator_init_fn, it make the operations in a atomic operation. Using the follow method can easy reproduce this bug 1:dd if=/dev/sdb of=/dev/null 2:while true;do echo noop > scheduler;echo deadline > scheduler;done The test method also use this method. Signed-off-by: Jianpeng Ma Signed-off-by: Jens Axboe Cc: Jonghwan Choi Signed-off-by: Greg Kroah-Hartman --- block/cfq-iosched.c | 19 +++++++++++++++---- block/deadline-iosched.c | 16 +++++++++++++--- block/elevator.c | 25 +++++-------------------- block/noop-iosched.c | 19 +++++++++++++++---- include/linux/elevator.h | 6 +++++- 5 files changed, 53 insertions(+), 32 deletions(-) diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c index d5cd3131c57a..d5bbdcfd0dab 100644 --- a/block/cfq-iosched.c +++ b/block/cfq-iosched.c @@ -4347,18 +4347,28 @@ static void cfq_exit_queue(struct elevator_queue *e) kfree(cfqd); } -static int cfq_init_queue(struct request_queue *q) +static int cfq_init_queue(struct request_queue *q, struct elevator_type *e) { struct cfq_data *cfqd; struct blkcg_gq *blkg __maybe_unused; int i, ret; + struct elevator_queue *eq; - cfqd = kmalloc_node(sizeof(*cfqd), GFP_KERNEL | __GFP_ZERO, q->node); - if (!cfqd) + eq = elevator_alloc(q, e); + if (!eq) return -ENOMEM; + cfqd = kmalloc_node(sizeof(*cfqd), GFP_KERNEL | __GFP_ZERO, q->node); + if (!cfqd) { + kobject_put(&eq->kobj); + return -ENOMEM; + } + eq->elevator_data = cfqd; + cfqd->queue = q; - q->elevator->elevator_data = cfqd; + spin_lock_irq(q->queue_lock); + q->elevator = eq; + spin_unlock_irq(q->queue_lock); /* Init root service tree */ cfqd->grp_service_tree = CFQ_RB_ROOT; @@ -4433,6 +4443,7 @@ static int cfq_init_queue(struct request_queue *q) out_free: kfree(cfqd); + kobject_put(&eq->kobj); return ret; } diff --git a/block/deadline-iosched.c b/block/deadline-iosched.c index ba19a3afab79..20614a332362 100644 --- a/block/deadline-iosched.c +++ b/block/deadline-iosched.c @@ -337,13 +337,21 @@ static void deadline_exit_queue(struct elevator_queue *e) /* * initialize elevator private data (deadline_data). */ -static int deadline_init_queue(struct request_queue *q) +static int deadline_init_queue(struct request_queue *q, struct elevator_type *e) { struct deadline_data *dd; + struct elevator_queue *eq; + + eq = elevator_alloc(q, e); + if (!eq) + return -ENOMEM; dd = kmalloc_node(sizeof(*dd), GFP_KERNEL | __GFP_ZERO, q->node); - if (!dd) + if (!dd) { + kobject_put(&eq->kobj); return -ENOMEM; + } + eq->elevator_data = dd; INIT_LIST_HEAD(&dd->fifo_list[READ]); INIT_LIST_HEAD(&dd->fifo_list[WRITE]); @@ -355,7 +363,9 @@ static int deadline_init_queue(struct request_queue *q) dd->front_merges = 1; dd->fifo_batch = fifo_batch; - q->elevator->elevator_data = dd; + spin_lock_irq(q->queue_lock); + q->elevator = eq; + spin_unlock_irq(q->queue_lock); return 0; } diff --git a/block/elevator.c b/block/elevator.c index eba5b04c29b1..668394d18588 100644 --- a/block/elevator.c +++ b/block/elevator.c @@ -150,7 +150,7 @@ void __init load_default_elevator_module(void) static struct kobj_type elv_ktype; -static struct elevator_queue *elevator_alloc(struct request_queue *q, +struct elevator_queue *elevator_alloc(struct request_queue *q, struct elevator_type *e) { struct elevator_queue *eq; @@ -170,6 +170,7 @@ static struct elevator_queue *elevator_alloc(struct request_queue *q, elevator_put(e); return NULL; } +EXPORT_SYMBOL(elevator_alloc); static void elevator_release(struct kobject *kobj) { @@ -221,16 +222,7 @@ int elevator_init(struct request_queue *q, char *name) } } - q->elevator = elevator_alloc(q, e); - if (!q->elevator) - return -ENOMEM; - - err = e->ops.elevator_init_fn(q); - if (err) { - kobject_put(&q->elevator->kobj); - return err; - } - + err = e->ops.elevator_init_fn(q, e); return 0; } EXPORT_SYMBOL(elevator_init); @@ -935,17 +927,10 @@ static int elevator_switch(struct request_queue *q, struct elevator_type *new_e) spin_unlock_irq(q->queue_lock); /* allocate, init and register new elevator */ - err = -ENOMEM; - q->elevator = elevator_alloc(q, new_e); - if (!q->elevator) + err = new_e->ops.elevator_init_fn(q, new_e); + if (err) goto fail_init; - err = new_e->ops.elevator_init_fn(q); - if (err) { - kobject_put(&q->elevator->kobj); - goto fail_init; - } - if (registered) { err = elv_register_queue(q); if (err) diff --git a/block/noop-iosched.c b/block/noop-iosched.c index 5d1bf70e33d5..3de89d4690f3 100644 --- a/block/noop-iosched.c +++ b/block/noop-iosched.c @@ -59,16 +59,27 @@ noop_latter_request(struct request_queue *q, struct request *rq) return list_entry(rq->queuelist.next, struct request, queuelist); } -static int noop_init_queue(struct request_queue *q) +static int noop_init_queue(struct request_queue *q, struct elevator_type *e) { struct noop_data *nd; + struct elevator_queue *eq; - nd = kmalloc_node(sizeof(*nd), GFP_KERNEL, q->node); - if (!nd) + eq = elevator_alloc(q, e); + if (!eq) return -ENOMEM; + nd = kmalloc_node(sizeof(*nd), GFP_KERNEL, q->node); + if (!nd) { + kobject_put(&eq->kobj); + return -ENOMEM; + } + eq->elevator_data = nd; + INIT_LIST_HEAD(&nd->queue); - q->elevator->elevator_data = nd; + + spin_lock_irq(q->queue_lock); + q->elevator = eq; + spin_unlock_irq(q->queue_lock); return 0; } diff --git a/include/linux/elevator.h b/include/linux/elevator.h index acd0312d46fb..306dd8cd0b6f 100644 --- a/include/linux/elevator.h +++ b/include/linux/elevator.h @@ -7,6 +7,7 @@ #ifdef CONFIG_BLOCK struct io_cq; +struct elevator_type; typedef int (elevator_merge_fn) (struct request_queue *, struct request **, struct bio *); @@ -35,7 +36,8 @@ typedef void (elevator_put_req_fn) (struct request *); typedef void (elevator_activate_req_fn) (struct request_queue *, struct request *); typedef void (elevator_deactivate_req_fn) (struct request_queue *, struct request *); -typedef int (elevator_init_fn) (struct request_queue *); +typedef int (elevator_init_fn) (struct request_queue *, + struct elevator_type *e); typedef void (elevator_exit_fn) (struct elevator_queue *); struct elevator_ops @@ -155,6 +157,8 @@ extern int elevator_init(struct request_queue *, char *); extern void elevator_exit(struct elevator_queue *); extern int elevator_change(struct request_queue *, const char *); extern bool elv_rq_merge_ok(struct request *, struct bio *); +extern struct elevator_queue *elevator_alloc(struct request_queue *, + struct elevator_type *); /* * Helper functions. From 921fa4d670d801e9394f843dd14e2d7faabbba4a Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Fri, 21 Jun 2013 13:08:46 +0100 Subject: [PATCH 0549/1368] ARM: KVM: perform save/restore of PAR commit 6a077e4ab9cbfbf279fb955bae05b03781c97013 upstream. Not saving PAR is an unfortunate oversight. If the guest performs an AT* operation and gets scheduled out before reading the result of the translation from PAR, it could become corrupted by another guest or the host. Saving this register is made slightly more complicated as KVM also uses it on the permission fault handling path, leading to an ugly "stash and restore" sequence. Fortunately, this is already a slow path so we don't really care. Also, Linux doesn't do any AT* operation, so Linux guests are not impacted by this bug. [ Slightly tweaked to use an even register as first operand to ldrd and strd operations in interrupts_head.S - Christoffer ] Signed-off-by: Marc Zyngier Signed-off-by: Christoffer Dall Signed-off-by: Jonghwan Choi Signed-off-by: Greg Kroah-Hartman --- arch/arm/include/asm/kvm_asm.h | 22 ++++++++++++---------- arch/arm/kvm/coproc.c | 4 ++++ arch/arm/kvm/interrupts.S | 12 +++++++++++- arch/arm/kvm/interrupts_head.S | 10 ++++++++-- 4 files changed, 35 insertions(+), 13 deletions(-) diff --git a/arch/arm/include/asm/kvm_asm.h b/arch/arm/include/asm/kvm_asm.h index 18d50322a9e2..4bb08e3e52bc 100644 --- a/arch/arm/include/asm/kvm_asm.h +++ b/arch/arm/include/asm/kvm_asm.h @@ -37,16 +37,18 @@ #define c5_AIFSR 15 /* Auxilary Instrunction Fault Status R */ #define c6_DFAR 16 /* Data Fault Address Register */ #define c6_IFAR 17 /* Instruction Fault Address Register */ -#define c9_L2CTLR 18 /* Cortex A15 L2 Control Register */ -#define c10_PRRR 19 /* Primary Region Remap Register */ -#define c10_NMRR 20 /* Normal Memory Remap Register */ -#define c12_VBAR 21 /* Vector Base Address Register */ -#define c13_CID 22 /* Context ID Register */ -#define c13_TID_URW 23 /* Thread ID, User R/W */ -#define c13_TID_URO 24 /* Thread ID, User R/O */ -#define c13_TID_PRIV 25 /* Thread ID, Privileged */ -#define c14_CNTKCTL 26 /* Timer Control Register (PL1) */ -#define NR_CP15_REGS 27 /* Number of regs (incl. invalid) */ +#define c7_PAR 18 /* Physical Address Register */ +#define c7_PAR_high 19 /* PAR top 32 bits */ +#define c9_L2CTLR 20 /* Cortex A15 L2 Control Register */ +#define c10_PRRR 21 /* Primary Region Remap Register */ +#define c10_NMRR 22 /* Normal Memory Remap Register */ +#define c12_VBAR 23 /* Vector Base Address Register */ +#define c13_CID 24 /* Context ID Register */ +#define c13_TID_URW 25 /* Thread ID, User R/W */ +#define c13_TID_URO 26 /* Thread ID, User R/O */ +#define c13_TID_PRIV 27 /* Thread ID, Privileged */ +#define c14_CNTKCTL 28 /* Timer Control Register (PL1) */ +#define NR_CP15_REGS 29 /* Number of regs (incl. invalid) */ #define ARM_EXCEPTION_RESET 0 #define ARM_EXCEPTION_UNDEFINED 1 diff --git a/arch/arm/kvm/coproc.c b/arch/arm/kvm/coproc.c index 8eea97be1ed5..4a5199070430 100644 --- a/arch/arm/kvm/coproc.c +++ b/arch/arm/kvm/coproc.c @@ -180,6 +180,10 @@ static const struct coproc_reg cp15_regs[] = { NULL, reset_unknown, c6_DFAR }, { CRn( 6), CRm( 0), Op1( 0), Op2( 2), is32, NULL, reset_unknown, c6_IFAR }, + + /* PAR swapped by interrupt.S */ + { CRn( 7), Op1( 0), is64, NULL, reset_unknown64, c7_PAR }, + /* * DC{C,I,CI}SW operations: */ diff --git a/arch/arm/kvm/interrupts.S b/arch/arm/kvm/interrupts.S index f7793df62f58..d0a8fa33409a 100644 --- a/arch/arm/kvm/interrupts.S +++ b/arch/arm/kvm/interrupts.S @@ -414,6 +414,10 @@ guest_trap: mrcne p15, 4, r2, c6, c0, 4 @ HPFAR bne 3f + /* Preserve PAR */ + mrrc p15, 0, r0, r1, c7 @ PAR + push {r0, r1} + /* Resolve IPA using the xFAR */ mcr p15, 0, r2, c7, c8, 0 @ ATS1CPR isb @@ -424,13 +428,19 @@ guest_trap: lsl r2, r2, #4 orr r2, r2, r1, lsl #24 + /* Restore PAR */ + pop {r0, r1} + mcrr p15, 0, r0, r1, c7 @ PAR + 3: load_vcpu @ Load VCPU pointer to r0 str r2, [r0, #VCPU_HPFAR] 1: mov r1, #ARM_EXCEPTION_HVC b __kvm_vcpu_return -4: pop {r0, r1, r2} @ Failed translation, return to guest +4: pop {r0, r1} @ Failed translation, return to guest + mcrr p15, 0, r0, r1, c7 @ PAR + pop {r0, r1, r2} eret /* diff --git a/arch/arm/kvm/interrupts_head.S b/arch/arm/kvm/interrupts_head.S index 3c8f2f0b4c5e..2b44b95a86dd 100644 --- a/arch/arm/kvm/interrupts_head.S +++ b/arch/arm/kvm/interrupts_head.S @@ -302,11 +302,14 @@ vcpu .req r0 @ vcpu pointer always in r0 .endif mrc p15, 0, r2, c14, c1, 0 @ CNTKCTL + mrrc p15, 0, r4, r5, c7 @ PAR .if \store_to_vcpu == 0 - push {r2} + push {r2,r4-r5} .else str r2, [vcpu, #CP15_OFFSET(c14_CNTKCTL)] + add r12, vcpu, #CP15_OFFSET(c7_PAR) + strd r4, r5, [r12] .endif .endm @@ -319,12 +322,15 @@ vcpu .req r0 @ vcpu pointer always in r0 */ .macro write_cp15_state read_from_vcpu .if \read_from_vcpu == 0 - pop {r2} + pop {r2,r4-r5} .else ldr r2, [vcpu, #CP15_OFFSET(c14_CNTKCTL)] + add r12, vcpu, #CP15_OFFSET(c7_PAR) + ldrd r4, r5, [r12] .endif mcr p15, 0, r2, c14, c1, 0 @ CNTKCTL + mcrr p15, 0, r4, r5, c7 @ PAR .if \read_from_vcpu == 0 pop {r2-r12} From 285695e4211008e0f06648c3ae7af8ba09a88399 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Fri, 21 Jun 2013 13:08:47 +0100 Subject: [PATCH 0550/1368] ARM: KVM: add missing dsb before invalidating Stage-2 TLBs commit 479c5ae2f8a55509b691494cd13691d3dc31d102 upstream. When performing a Stage-2 TLB invalidation, it is necessary to make sure the write to the page tables is observable by all CPUs. For this purpose, add a dsb instruction to __kvm_tlb_flush_vmid_ipa before doing the TLB invalidation itself. Signed-off-by: Marc Zyngier Signed-off-by: Christoffer Dall Signed-off-by: Jonghwan Choi Signed-off-by: Greg Kroah-Hartman --- arch/arm/kvm/interrupts.S | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm/kvm/interrupts.S b/arch/arm/kvm/interrupts.S index d0a8fa33409a..20e03d969558 100644 --- a/arch/arm/kvm/interrupts.S +++ b/arch/arm/kvm/interrupts.S @@ -49,6 +49,7 @@ __kvm_hyp_code_start: ENTRY(__kvm_tlb_flush_vmid_ipa) push {r2, r3} + dsb ishst add r0, r0, #KVM_VTTBR ldrd r2, r3, [r0] mcrr p15, 6, r2, r3, c2 @ Write VTTBR From b45835a8365e91c9166e466b92d91675a0d8d1ac Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Fri, 21 Jun 2013 13:08:48 +0100 Subject: [PATCH 0551/1368] ARM: KVM: clear exclusive monitor on all exception returns commit 22cfbb6d730ca2fda236b507d9fba17bf002736c upstream. Make sure we clear the exclusive monitor on all exception returns, which otherwise could lead to lock corruptions. Signed-off-by: Marc Zyngier Signed-off-by: Christoffer Dall Signed-off-by: Jonghwan Choi Signed-off-by: Greg Kroah-Hartman --- arch/arm/kvm/interrupts.S | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/arm/kvm/interrupts.S b/arch/arm/kvm/interrupts.S index 20e03d969558..16cd4ba5d7fd 100644 --- a/arch/arm/kvm/interrupts.S +++ b/arch/arm/kvm/interrupts.S @@ -292,6 +292,7 @@ THUMB( orr r2, r2, #PSR_T_BIT ) ldr r2, =BSYM(panic) msr ELR_hyp, r2 ldr r0, =\panic_str + clrex @ Clear exclusive monitor eret .endm @@ -441,6 +442,7 @@ guest_trap: 4: pop {r0, r1} @ Failed translation, return to guest mcrr p15, 0, r0, r1, c7 @ PAR + clrex pop {r0, r1, r2} eret @@ -467,6 +469,7 @@ switch_to_guest_vfp: pop {r3-r7} pop {r0-r2} + clrex eret #endif From c2c679243cf6529ec3dda86d6f0124ccc10fbd92 Mon Sep 17 00:00:00 2001 From: Stanislaw Gruszka Date: Thu, 1 Aug 2013 12:07:13 +0200 Subject: [PATCH 0552/1368] iwl4965: set power mode early commit eca396d7a5bdcc1fd67b1b12f737c213ac78a6f4 upstream. If device was put into a sleep and system was restarted or module reloaded, we have to wake device up before sending other commands. Otherwise it will fail to start with Microcode error. Signed-off-by: Stanislaw Gruszka Signed-off-by: John W. Linville Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/iwlegacy/4965-mac.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/wireless/iwlegacy/4965-mac.c b/drivers/net/wireless/iwlegacy/4965-mac.c index 9a95045c97b6..8d30fb954240 100644 --- a/drivers/net/wireless/iwlegacy/4965-mac.c +++ b/drivers/net/wireless/iwlegacy/4965-mac.c @@ -5316,6 +5316,9 @@ il4965_alive_start(struct il_priv *il) il->active_rate = RATES_MASK; + il_power_update_mode(il, true); + D_INFO("Updated power mode\n"); + if (il_is_associated(il)) { struct il_rxon_cmd *active_rxon = (struct il_rxon_cmd *)&il->active; @@ -5346,9 +5349,6 @@ il4965_alive_start(struct il_priv *il) D_INFO("ALIVE processing complete.\n"); wake_up(&il->wait_command_queue); - il_power_update_mode(il, true); - D_INFO("Updated power mode\n"); - return; restart: From 5ae473da2f18b175b7ea794326523e485d6795a5 Mon Sep 17 00:00:00 2001 From: Stanislaw Gruszka Date: Thu, 1 Aug 2013 12:07:55 +0200 Subject: [PATCH 0553/1368] iwl4965: reset firmware after rfkill off commit 788f7a56fce1bcb2067b62b851a086fca48a0056 upstream. Using rfkill switch can make firmware unstable, what cause various Microcode errors and kernel warnings. Reseting firmware just after rfkill off (radio on) helped with that. Resolve: https://bugzilla.redhat.com/show_bug.cgi?id=977053 Reported-and-tested-by: Justin Pearce Signed-off-by: Stanislaw Gruszka Signed-off-by: John W. Linville Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/iwlegacy/4965-mac.c | 10 +++++----- drivers/net/wireless/iwlegacy/common.c | 1 + 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/drivers/net/wireless/iwlegacy/4965-mac.c b/drivers/net/wireless/iwlegacy/4965-mac.c index 8d30fb954240..900f5f8c93ee 100644 --- a/drivers/net/wireless/iwlegacy/4965-mac.c +++ b/drivers/net/wireless/iwlegacy/4965-mac.c @@ -4442,12 +4442,12 @@ il4965_irq_tasklet(struct il_priv *il) * is killed. Hence update the killswitch state here. The * rfkill handler will care about restarting if needed. */ - if (!test_bit(S_ALIVE, &il->status)) { - if (hw_rf_kill) - set_bit(S_RFKILL, &il->status); - else - clear_bit(S_RFKILL, &il->status); + if (hw_rf_kill) { + set_bit(S_RFKILL, &il->status); + } else { + clear_bit(S_RFKILL, &il->status); wiphy_rfkill_set_hw_state(il->hw->wiphy, hw_rf_kill); + il_force_reset(il, true); } handled |= CSR_INT_BIT_RF_KILL; diff --git a/drivers/net/wireless/iwlegacy/common.c b/drivers/net/wireless/iwlegacy/common.c index e9a3cbc409ae..9c9ebadc22e2 100644 --- a/drivers/net/wireless/iwlegacy/common.c +++ b/drivers/net/wireless/iwlegacy/common.c @@ -4660,6 +4660,7 @@ il_force_reset(struct il_priv *il, bool external) return 0; } +EXPORT_SYMBOL(il_force_reset); int il_mac_change_interface(struct ieee80211_hw *hw, struct ieee80211_vif *vif, From 7febdf14e427e1ed642f0a0c97182d50b6ce199e Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 31 Jul 2013 11:23:06 +0200 Subject: [PATCH 0554/1368] mac80211: ignore HT primary channel while connected commit 5cdaed1e878d723d56d04ae0be1738124acf9f46 upstream. While we're connected, the AP shouldn't change the primary channel in the HT information. We checked this, and dropped the connection if it did change it. Unfortunately, this is causing problems on some APs, e.g. on the Netgear WRT610NL: the beacons seem to always contain a bad channel and if we made a connection using a probe response (correct data) we drop the connection immediately and can basically not connect properly at all. Work around this by ignoring the HT primary channel information in beacons if we're already connected. Also print out more verbose messages in the other situations to help diagnose similar bugs quicker in the future. Acked-by: Andy Isaacson Signed-off-by: Johannes Berg Signed-off-by: Greg Kroah-Hartman --- net/mac80211/mlme.c | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 741448b30825..87499f8da560 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -237,8 +237,9 @@ ieee80211_determine_chantype(struct ieee80211_sub_if_data *sdata, struct ieee80211_channel *channel, const struct ieee80211_ht_operation *ht_oper, const struct ieee80211_vht_operation *vht_oper, - struct cfg80211_chan_def *chandef, bool verbose) + struct cfg80211_chan_def *chandef, bool tracking) { + struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; struct cfg80211_chan_def vht_chandef; u32 ht_cfreq, ret; @@ -257,7 +258,7 @@ ieee80211_determine_chantype(struct ieee80211_sub_if_data *sdata, ht_cfreq = ieee80211_channel_to_frequency(ht_oper->primary_chan, channel->band); /* check that channel matches the right operating channel */ - if (channel->center_freq != ht_cfreq) { + if (!tracking && channel->center_freq != ht_cfreq) { /* * It's possible that some APs are confused here; * Netgear WNDR3700 sometimes reports 4 higher than @@ -265,11 +266,10 @@ ieee80211_determine_chantype(struct ieee80211_sub_if_data *sdata, * since we look at probe response/beacon data here * it should be OK. */ - if (verbose) - sdata_info(sdata, - "Wrong control channel: center-freq: %d ht-cfreq: %d ht->primary_chan: %d band: %d - Disabling HT\n", - channel->center_freq, ht_cfreq, - ht_oper->primary_chan, channel->band); + sdata_info(sdata, + "Wrong control channel: center-freq: %d ht-cfreq: %d ht->primary_chan: %d band: %d - Disabling HT\n", + channel->center_freq, ht_cfreq, + ht_oper->primary_chan, channel->band); ret = IEEE80211_STA_DISABLE_HT | IEEE80211_STA_DISABLE_VHT; goto out; } @@ -323,7 +323,7 @@ ieee80211_determine_chantype(struct ieee80211_sub_if_data *sdata, channel->band); break; default: - if (verbose) + if (!(ifmgd->flags & IEEE80211_STA_DISABLE_VHT)) sdata_info(sdata, "AP VHT operation IE has invalid channel width (%d), disable VHT\n", vht_oper->chan_width); @@ -332,7 +332,7 @@ ieee80211_determine_chantype(struct ieee80211_sub_if_data *sdata, } if (!cfg80211_chandef_valid(&vht_chandef)) { - if (verbose) + if (!(ifmgd->flags & IEEE80211_STA_DISABLE_VHT)) sdata_info(sdata, "AP VHT information is invalid, disable VHT\n"); ret = IEEE80211_STA_DISABLE_VHT; @@ -345,7 +345,7 @@ ieee80211_determine_chantype(struct ieee80211_sub_if_data *sdata, } if (!cfg80211_chandef_compatible(chandef, &vht_chandef)) { - if (verbose) + if (!(ifmgd->flags & IEEE80211_STA_DISABLE_VHT)) sdata_info(sdata, "AP VHT information doesn't match HT, disable VHT\n"); ret = IEEE80211_STA_DISABLE_VHT; @@ -372,7 +372,7 @@ ieee80211_determine_chantype(struct ieee80211_sub_if_data *sdata, ret |= chandef_downgrade(chandef); } - if (chandef->width != vht_chandef.width && verbose) + if (chandef->width != vht_chandef.width && !tracking) sdata_info(sdata, "capabilities/regulatory prevented using AP HT/VHT configuration, downgraded\n"); @@ -412,7 +412,7 @@ static int ieee80211_config_bw(struct ieee80211_sub_if_data *sdata, /* calculate new channel (type) based on HT/VHT operation IEs */ flags = ieee80211_determine_chantype(sdata, sband, chan, ht_oper, - vht_oper, &chandef, false); + vht_oper, &chandef, true); /* * Downgrade the new channel if we associated with restricted @@ -3906,7 +3906,7 @@ static int ieee80211_prep_channel(struct ieee80211_sub_if_data *sdata, ifmgd->flags |= ieee80211_determine_chantype(sdata, sband, cbss->channel, ht_oper, vht_oper, - &chandef, true); + &chandef, false); sdata->needed_rx_chains = min(ieee80211_ht_vht_rx_chains(sdata, cbss), local->rx_chains); From 42c8df37807471eac800b06891329e70b789cb44 Mon Sep 17 00:00:00 2001 From: Chris Wright Date: Wed, 31 Jul 2013 12:12:24 -0700 Subject: [PATCH 0555/1368] mac80211: fix infinite loop in ieee80211_determine_chantype commit b56e4b857c5210e848bfb80e074e5756a36cd523 upstream. Commit "3d9646d mac80211: fix channel selection bug" introduced a possible infinite loop by moving the out target above the chandef_downgrade while loop. When we downgrade to NL80211_CHAN_WIDTH_20_NOHT, we jump back up to re-run the while loop...indefinitely. Replace goto with break and carry on. This may not be sufficient to connect to the AP, but will at least keep the cpu from livelocking. Thanks to Derek Atkins as an extra pair of debugging eyes. Signed-off-by: Chris Wright Signed-off-by: Johannes Berg Signed-off-by: Greg Kroah-Hartman --- net/mac80211/mlme.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 87499f8da560..1f57de7b7c0f 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -366,7 +366,7 @@ ieee80211_determine_chantype(struct ieee80211_sub_if_data *sdata, if (WARN_ON(chandef->width == NL80211_CHAN_WIDTH_20_NOHT)) { ret = IEEE80211_STA_DISABLE_HT | IEEE80211_STA_DISABLE_VHT; - goto out; + break; } ret |= chandef_downgrade(chandef); From 775f521cd8ebf7edf47e88e6d1424513dc5e70e9 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 31 Jul 2013 20:52:03 +0200 Subject: [PATCH 0556/1368] mac80211: continue using disabled channels while connected commit ddfe49b42d8ad4bfdf92d63d4a74f162660d878d upstream. In case the AP has different regulatory information than we do, it can happen that we connect to an AP based on e.g. the world roaming regulatory data, and then update our database with the AP's country information disables the channel the AP is using. If this happens on an HT AP, the bandwidth tracking code will hit the WARN_ON() and disconnect. Since that's not very useful, ignore the channel-disable flag in bandwidth tracking. Reported-by: Chris Wright Tested-by: Chris Wright Signed-off-by: Johannes Berg Signed-off-by: Greg Kroah-Hartman --- net/mac80211/mlme.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 1f57de7b7c0f..55a42f9c4f39 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -361,8 +361,17 @@ ieee80211_determine_chantype(struct ieee80211_sub_if_data *sdata, if (ret & IEEE80211_STA_DISABLE_VHT) vht_chandef = *chandef; + /* + * Ignore the DISABLED flag when we're already connected and only + * tracking the APs beacon for bandwidth changes - otherwise we + * might get disconnected here if we connect to an AP, update our + * regulatory information based on the AP's country IE and the + * information we have is wrong/outdated and disables the channel + * that we're actually using for the connection to the AP. + */ while (!cfg80211_chandef_usable(sdata->local->hw.wiphy, chandef, - IEEE80211_CHAN_DISABLED)) { + tracking ? 0 : + IEEE80211_CHAN_DISABLED)) { if (WARN_ON(chandef->width == NL80211_CHAN_WIDTH_20_NOHT)) { ret = IEEE80211_STA_DISABLE_HT | IEEE80211_STA_DISABLE_VHT; From d00ff4f2e5340b4a1fae711c46d804500e4ad7f9 Mon Sep 17 00:00:00 2001 From: Stephane Grosjean Date: Fri, 9 Aug 2013 11:44:06 +0200 Subject: [PATCH 0557/1368] can: pcan_usb: fix wrong memcpy() bytes length commit 3c322a56b01695df15c70bfdc2d02e0ccd80654e upstream. Fix possibly wrong memcpy() bytes length since some CAN records received from PCAN-USB could define a DLC field in range [9..15]. In that case, the real DLC value MUST be used to move forward the record pointer but, only 8 bytes max. MUST be copied into the data field of the struct can_frame object of the skb given to the network core. Signed-off-by: Stephane Grosjean Signed-off-by: Marc Kleine-Budde Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/can/usb/peak_usb/pcan_usb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/can/usb/peak_usb/pcan_usb.c b/drivers/net/can/usb/peak_usb/pcan_usb.c index 25723d8ee201..925ab8ec9329 100644 --- a/drivers/net/can/usb/peak_usb/pcan_usb.c +++ b/drivers/net/can/usb/peak_usb/pcan_usb.c @@ -649,7 +649,7 @@ static int pcan_usb_decode_data(struct pcan_usb_msg_context *mc, u8 status_len) if ((mc->ptr + rec_len) > mc->end) goto decode_failed; - memcpy(cf->data, mc->ptr, rec_len); + memcpy(cf->data, mc->ptr, cf->can_dlc); mc->ptr += rec_len; } From aab4f8d490ef8c184d854d5f630438c10406765c Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 13 Aug 2013 09:04:05 +0200 Subject: [PATCH 0558/1368] genetlink: fix family dump race commit 58ad436fcf49810aa006016107f494c9ac9013db upstream. When dumping generic netlink families, only the first dump call is locked with genl_lock(), which protects the list of families, and thus subsequent calls can access the data without locking, racing against family addition/removal. This can cause a crash. Fix it - the locking needs to be conditional because the first time around it's already locked. A similar bug was reported to me on an old kernel (3.4.47) but the exact scenario that happened there is no longer possible, on those kernels the first round wasn't locked either. Looking at the current code I found the race described above, which had also existed on the old kernel. Reported-by: Andrei Otcheretianski Signed-off-by: Johannes Berg Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/netlink/genetlink.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c index 1076fe16b122..ba6e55d1ca44 100644 --- a/net/netlink/genetlink.c +++ b/net/netlink/genetlink.c @@ -789,6 +789,10 @@ static int ctrl_dumpfamily(struct sk_buff *skb, struct netlink_callback *cb) struct net *net = sock_net(skb->sk); int chains_to_skip = cb->args[0]; int fams_to_skip = cb->args[1]; + bool need_locking = chains_to_skip || fams_to_skip; + + if (need_locking) + genl_lock(); for (i = chains_to_skip; i < GENL_FAM_TAB_SIZE; i++) { n = 0; @@ -810,6 +814,9 @@ static int ctrl_dumpfamily(struct sk_buff *skb, struct netlink_callback *cb) cb->args[0] = i; cb->args[1] = n; + if (need_locking) + genl_unlock(); + return skb->len; } From 20fd3d46726e60f936e8901745a7d00bae32958f Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 30 Jul 2013 10:11:25 +0200 Subject: [PATCH 0559/1368] cfg80211: fix P2P GO interface teardown commit 74418edec915d0f446debebde08d170c7b8ba0ee upstream. When a P2P GO interface goes down, cfg80211 doesn't properly tear it down, leading to warnings later. Add the GO interface type to the enumeration to tear it down like AP interfaces. Otherwise, we leave it pending and mac80211's state can get very confused, leading to warnings later. Reported-by: Ilan Peer Tested-by: Ilan Peer Reviewed-by: Emmanuel Grumbach Signed-off-by: Johannes Berg Signed-off-by: Greg Kroah-Hartman --- net/wireless/core.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/wireless/core.c b/net/wireless/core.c index 73405e00c800..64fcbae020d2 100644 --- a/net/wireless/core.c +++ b/net/wireless/core.c @@ -876,6 +876,7 @@ void cfg80211_leave(struct cfg80211_registered_device *rdev, cfg80211_leave_mesh(rdev, dev); break; case NL80211_IFTYPE_AP: + case NL80211_IFTYPE_P2P_GO: cfg80211_stop_ap(rdev, dev); break; default: From 4bd3f15b77578866fda9b0da4ef027f06ed3b746 Mon Sep 17 00:00:00 2001 From: Lars-Peter Clausen Date: Thu, 1 Aug 2013 18:30:38 +0200 Subject: [PATCH 0560/1368] ASoC: dapm: Fix empty list check in dapm_new_mux() commit fe581391147cb3d738d961d0f1233d91a9e1113c upstream. list_first_entry() will always return a valid pointer, even if the list is empty. So the check whether path is NULL will always be false. So we end up calling dapm_create_or_share_mixmux_kcontrol() with a path struct that points right in the middle of the widget struct and by trying to modify the path the widgets memory will become corrupted. Fix this by using list_emtpy() to check if the widget doesn't have any paths. Signed-off-by: Lars-Peter Clausen Tested-by: Stephen Warren Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- sound/soc/soc-dapm.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/sound/soc/soc-dapm.c b/sound/soc/soc-dapm.c index c7051c457b75..360638362e98 100644 --- a/sound/soc/soc-dapm.c +++ b/sound/soc/soc-dapm.c @@ -682,13 +682,14 @@ static int dapm_new_mux(struct snd_soc_dapm_widget *w) return -EINVAL; } - path = list_first_entry(&w->sources, struct snd_soc_dapm_path, - list_sink); - if (!path) { + if (list_empty(&w->sources)) { dev_err(dapm->dev, "ASoC: mux %s has no paths\n", w->name); return -EINVAL; } + path = list_first_entry(&w->sources, struct snd_soc_dapm_path, + list_sink); + ret = dapm_create_or_share_mixmux_kcontrol(w, 0, path); if (ret < 0) return ret; From 6b907d955284264b28692338e9a10fa3586704c6 Mon Sep 17 00:00:00 2001 From: Brian Austin Date: Tue, 6 Aug 2013 12:57:21 -0500 Subject: [PATCH 0561/1368] ASoC: cs42l52: Reorder Min/Max and update to SX_TLV for Beep Volume commit e2c98a8bba958045bde861fe1d66be54315c7790 upstream. Beep Volume Min/Max was backwards. Change to SOC_SONGLE_SX_TLV for correct volume representation Signed-off-by: Brian Austin Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- sound/soc/codecs/cs42l52.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/codecs/cs42l52.c b/sound/soc/codecs/cs42l52.c index 987f728718c5..ee25f325d65c 100644 --- a/sound/soc/codecs/cs42l52.c +++ b/sound/soc/codecs/cs42l52.c @@ -451,7 +451,7 @@ static const struct snd_kcontrol_new cs42l52_snd_controls[] = { SOC_ENUM("Beep Pitch", beep_pitch_enum), SOC_ENUM("Beep on Time", beep_ontime_enum), SOC_ENUM("Beep off Time", beep_offtime_enum), - SOC_SINGLE_TLV("Beep Volume", CS42L52_BEEP_VOL, 0, 0x1f, 0x07, hl_tlv), + SOC_SINGLE_SX_TLV("Beep Volume", CS42L52_BEEP_VOL, 0, 0x07, 0x1f, hl_tlv), SOC_SINGLE("Beep Mixer Switch", CS42L52_BEEP_TONE_CTL, 5, 1, 1), SOC_ENUM("Beep Treble Corner Freq", beep_treble_enum), SOC_ENUM("Beep Bass Corner Freq", beep_bass_enum), From e54f0e6c230d511f3159a6e773ab5f790c548390 Mon Sep 17 00:00:00 2001 From: Stephen Warren Date: Wed, 14 Aug 2013 14:24:16 -0600 Subject: [PATCH 0562/1368] ASoC: tegra: fix Tegra30 I2S capture parameter setup commit c90c0d7a96e634a73ef1580f1d20993606545647 upstream. The Tegra30 I2S driver was writing the AHUB interface parameters to the playback path register rather than the capture path register. This caused the capture parameters not to be configured at all, so if capturing using non-HW-default parameters (e.g. 16-bit stereo rather than 8-bit mono) the audio would be corrupted. With this fixed, audio capture from an analog microphone works correctly on the Cardhu board. Signed-off-by: Stephen Warren Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- sound/soc/tegra/tegra30_i2s.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/tegra/tegra30_i2s.c b/sound/soc/tegra/tegra30_i2s.c index 31d092d83c71..a5432b10eaca 100644 --- a/sound/soc/tegra/tegra30_i2s.c +++ b/sound/soc/tegra/tegra30_i2s.c @@ -228,7 +228,7 @@ static int tegra30_i2s_hw_params(struct snd_pcm_substream *substream, reg = TEGRA30_I2S_CIF_RX_CTRL; } else { val |= TEGRA30_AUDIOCIF_CTRL_DIRECTION_TX; - reg = TEGRA30_I2S_CIF_RX_CTRL; + reg = TEGRA30_I2S_CIF_TX_CTRL; } regmap_write(i2s->regmap, reg, val); From 5c75cd55f7fff95ddc45cfb32cca1b65bb2b1caf Mon Sep 17 00:00:00 2001 From: "Maksim A. Boyko" Date: Sat, 10 Aug 2013 12:20:02 +0400 Subject: [PATCH 0563/1368] ALSA: usb-audio: Fix invalid volume resolution for Logitech HD Webcam C525 commit 140d37de62ffe8405282a1d6498f3b4099006384 upstream. Add the volume control quirk for avoiding the kernel warning for the Logitech HD Webcam C525 as in the similar commit 36691e1be6ec551eef4a5225f126a281f8c051c2 for the Logitech HD Webcam C310. Reported-by: Maksim Boyko Tested-by: Maksim Boyko Signed-off-by: Maksim Boyko Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/usb/mixer.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/usb/mixer.c b/sound/usb/mixer.c index d5438083fd6a..95558ef4a7a0 100644 --- a/sound/usb/mixer.c +++ b/sound/usb/mixer.c @@ -888,6 +888,7 @@ static void volume_control_quirks(struct usb_mixer_elem_info *cval, case USB_ID(0x046d, 0x081b): /* HD Webcam c310 */ case USB_ID(0x046d, 0x081d): /* HD Webcam c510 */ case USB_ID(0x046d, 0x0825): /* HD Webcam c270 */ + case USB_ID(0x046d, 0x0826): /* HD Webcam c525 */ case USB_ID(0x046d, 0x0991): /* Most audio usb devices lie about volume resolution. * Most Logitech webcams have res = 384. From e6c1227df6c1fc67cd70d808429a7ad924d3b154 Mon Sep 17 00:00:00 2001 From: Torsten Schenk Date: Sun, 11 Aug 2013 11:11:19 +0200 Subject: [PATCH 0564/1368] ALSA: 6fire: make buffers DMA-able (pcm) commit 5ece263f1d93fba8d992e67e3ab8a71acf674db9 upstream. Patch makes pcm buffers DMA-able by allocating each one separately. Signed-off-by: Torsten Schenk Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/usb/6fire/pcm.c | 41 ++++++++++++++++++++++++++++++++++++++++- sound/usb/6fire/pcm.h | 2 +- 2 files changed, 41 insertions(+), 2 deletions(-) diff --git a/sound/usb/6fire/pcm.c b/sound/usb/6fire/pcm.c index 074aaf7a36db..25f9e61ad883 100644 --- a/sound/usb/6fire/pcm.c +++ b/sound/usb/6fire/pcm.c @@ -580,6 +580,33 @@ static void usb6fire_pcm_init_urb(struct pcm_urb *urb, urb->instance.number_of_packets = PCM_N_PACKETS_PER_URB; } +static int usb6fire_pcm_buffers_init(struct pcm_runtime *rt) +{ + int i; + + for (i = 0; i < PCM_N_URBS; i++) { + rt->out_urbs[i].buffer = kzalloc(PCM_N_PACKETS_PER_URB + * PCM_MAX_PACKET_SIZE, GFP_KERNEL); + if (!rt->out_urbs[i].buffer) + return -ENOMEM; + rt->in_urbs[i].buffer = kzalloc(PCM_N_PACKETS_PER_URB + * PCM_MAX_PACKET_SIZE, GFP_KERNEL); + if (!rt->in_urbs[i].buffer) + return -ENOMEM; + } + return 0; +} + +static void usb6fire_pcm_buffers_destroy(struct pcm_runtime *rt) +{ + int i; + + for (i = 0; i < PCM_N_URBS; i++) { + kfree(rt->out_urbs[i].buffer); + kfree(rt->in_urbs[i].buffer); + } +} + int usb6fire_pcm_init(struct sfire_chip *chip) { int i; @@ -591,6 +618,13 @@ int usb6fire_pcm_init(struct sfire_chip *chip) if (!rt) return -ENOMEM; + ret = usb6fire_pcm_buffers_init(rt); + if (ret) { + usb6fire_pcm_buffers_destroy(rt); + kfree(rt); + return ret; + } + rt->chip = chip; rt->stream_state = STREAM_DISABLED; rt->rate = ARRAY_SIZE(rates); @@ -612,6 +646,7 @@ int usb6fire_pcm_init(struct sfire_chip *chip) ret = snd_pcm_new(chip->card, "DMX6FireUSB", 0, 1, 1, &pcm); if (ret < 0) { + usb6fire_pcm_buffers_destroy(rt); kfree(rt); snd_printk(KERN_ERR PREFIX "cannot create pcm instance.\n"); return ret; @@ -627,6 +662,7 @@ int usb6fire_pcm_init(struct sfire_chip *chip) snd_dma_continuous_data(GFP_KERNEL), MAX_BUFSIZE, MAX_BUFSIZE); if (ret) { + usb6fire_pcm_buffers_destroy(rt); kfree(rt); snd_printk(KERN_ERR PREFIX "error preallocating pcm buffers.\n"); @@ -671,6 +707,9 @@ void usb6fire_pcm_abort(struct sfire_chip *chip) void usb6fire_pcm_destroy(struct sfire_chip *chip) { - kfree(chip->pcm); + struct pcm_runtime *rt = chip->pcm; + + usb6fire_pcm_buffers_destroy(rt); + kfree(rt); chip->pcm = NULL; } diff --git a/sound/usb/6fire/pcm.h b/sound/usb/6fire/pcm.h index 9b01133ee3fe..f5779d6182c6 100644 --- a/sound/usb/6fire/pcm.h +++ b/sound/usb/6fire/pcm.h @@ -32,7 +32,7 @@ struct pcm_urb { struct urb instance; struct usb_iso_packet_descriptor packets[PCM_N_PACKETS_PER_URB]; /* END DO NOT SEPARATE */ - u8 buffer[PCM_N_PACKETS_PER_URB * PCM_MAX_PACKET_SIZE]; + u8 *buffer; struct pcm_urb *peer; }; From 39d165aba321f11d3c6653acb33b4d2d2d68065e Mon Sep 17 00:00:00 2001 From: Torsten Schenk Date: Sun, 11 Aug 2013 11:11:35 +0200 Subject: [PATCH 0565/1368] ALSA: 6fire: make buffers DMA-able (midi) commit 4c2aee0032b70083dafebd733ed9c774633b2fa3 upstream. Patch makes midi output buffer DMA-able by allocating it separately. Signed-off-by: Torsten Schenk Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/usb/6fire/midi.c | 16 +++++++++++++++- sound/usb/6fire/midi.h | 6 +----- 2 files changed, 16 insertions(+), 6 deletions(-) diff --git a/sound/usb/6fire/midi.c b/sound/usb/6fire/midi.c index 26722423330d..f3dd7266c391 100644 --- a/sound/usb/6fire/midi.c +++ b/sound/usb/6fire/midi.c @@ -19,6 +19,10 @@ #include "chip.h" #include "comm.h" +enum { + MIDI_BUFSIZE = 64 +}; + static void usb6fire_midi_out_handler(struct urb *urb) { struct midi_runtime *rt = urb->context; @@ -156,6 +160,12 @@ int usb6fire_midi_init(struct sfire_chip *chip) if (!rt) return -ENOMEM; + rt->out_buffer = kzalloc(MIDI_BUFSIZE, GFP_KERNEL); + if (!rt->out_buffer) { + kfree(rt); + return -ENOMEM; + } + rt->chip = chip; rt->in_received = usb6fire_midi_in_received; rt->out_buffer[0] = 0x80; /* 'send midi' command */ @@ -169,6 +179,7 @@ int usb6fire_midi_init(struct sfire_chip *chip) ret = snd_rawmidi_new(chip->card, "6FireUSB", 0, 1, 1, &rt->instance); if (ret < 0) { + kfree(rt->out_buffer); kfree(rt); snd_printk(KERN_ERR PREFIX "unable to create midi.\n"); return ret; @@ -197,6 +208,9 @@ void usb6fire_midi_abort(struct sfire_chip *chip) void usb6fire_midi_destroy(struct sfire_chip *chip) { - kfree(chip->midi); + struct midi_runtime *rt = chip->midi; + + kfree(rt->out_buffer); + kfree(rt); chip->midi = NULL; } diff --git a/sound/usb/6fire/midi.h b/sound/usb/6fire/midi.h index c321006e5430..84851b9f5559 100644 --- a/sound/usb/6fire/midi.h +++ b/sound/usb/6fire/midi.h @@ -16,10 +16,6 @@ #include "common.h" -enum { - MIDI_BUFSIZE = 64 -}; - struct midi_runtime { struct sfire_chip *chip; struct snd_rawmidi *instance; @@ -32,7 +28,7 @@ struct midi_runtime { struct snd_rawmidi_substream *out; struct urb out_urb; u8 out_serial; /* serial number of out packet */ - u8 out_buffer[MIDI_BUFSIZE]; + u8 *out_buffer; int buffer_offset; void (*in_received)(struct midi_runtime *rt, u8 *data, int length); From b32bd480ca3f42a49174b0e4e960bc4a40a28741 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Thu, 8 Aug 2013 09:32:37 +0200 Subject: [PATCH 0566/1368] ALSA: hda - Fix missing mute controls for CX5051 commit f69910ddbd8c29391958cf82b598dd78fe5c8640 upstream. We've added a fake mute control (setting the amp volume to zero) for CX5051 at commit [3868137e: ALSA: hda - Add a fake mute feature], but this feature was overlooked in the generic parser implementation. Now the driver lacks of mute controls on these codecs. The fix is just to check both AC_AMPCAP_MUTE and AC_AMPCAP_MIN_MUTE bits in each place checking the amp capabilities. Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=59001 Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/hda_generic.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/sound/pci/hda/hda_generic.c b/sound/pci/hda/hda_generic.c index 24400cffb8f3..ad22decad02b 100644 --- a/sound/pci/hda/hda_generic.c +++ b/sound/pci/hda/hda_generic.c @@ -519,7 +519,7 @@ static bool same_amp_caps(struct hda_codec *codec, hda_nid_t nid1, } #define nid_has_mute(codec, nid, dir) \ - check_amp_caps(codec, nid, dir, AC_AMPCAP_MUTE) + check_amp_caps(codec, nid, dir, (AC_AMPCAP_MUTE | AC_AMPCAP_MIN_MUTE)) #define nid_has_volume(codec, nid, dir) \ check_amp_caps(codec, nid, dir, AC_AMPCAP_NUM_STEPS) @@ -621,7 +621,7 @@ static int get_amp_val_to_activate(struct hda_codec *codec, hda_nid_t nid, if (enable) val = (caps & AC_AMPCAP_OFFSET) >> AC_AMPCAP_OFFSET_SHIFT; } - if (caps & AC_AMPCAP_MUTE) { + if (caps & (AC_AMPCAP_MUTE | AC_AMPCAP_MIN_MUTE)) { if (!enable) val |= HDA_AMP_MUTE; } @@ -645,7 +645,7 @@ static unsigned int get_amp_mask_to_modify(struct hda_codec *codec, { unsigned int mask = 0xff; - if (caps & AC_AMPCAP_MUTE) { + if (caps & (AC_AMPCAP_MUTE | AC_AMPCAP_MIN_MUTE)) { if (is_ctl_associated(codec, nid, dir, idx, NID_PATH_MUTE_CTL)) mask &= ~0x80; } From db3175e1e8bb0701e2a96a5f5e6bee6038da5fd2 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Fri, 9 Aug 2013 12:34:42 +0200 Subject: [PATCH 0567/1368] ALSA: hda - Add pinfix for LG LW25 laptop commit db8a38e5063a4daf61252e65d47ab3495c705f4c upstream. Correct the pins for a line-in and a headphone on LG LW25 laptop with ALC880 codec. Other pins seem fine. Reported-and-tested-by: Joonas Saarinen Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_realtek.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 051c03d5337d..8a44d4a11d1d 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -1027,6 +1027,7 @@ enum { ALC880_FIXUP_GPIO2, ALC880_FIXUP_MEDION_RIM, ALC880_FIXUP_LG, + ALC880_FIXUP_LG_LW25, ALC880_FIXUP_W810, ALC880_FIXUP_EAPD_COEF, ALC880_FIXUP_TCL_S700, @@ -1085,6 +1086,14 @@ static const struct hda_fixup alc880_fixups[] = { { } } }, + [ALC880_FIXUP_LG_LW25] = { + .type = HDA_FIXUP_PINS, + .v.pins = (const struct hda_pintbl[]) { + { 0x1a, 0x0181344f }, /* line-in */ + { 0x1b, 0x0321403f }, /* headphone */ + { } + } + }, [ALC880_FIXUP_W810] = { .type = HDA_FIXUP_PINS, .v.pins = (const struct hda_pintbl[]) { @@ -1337,6 +1346,7 @@ static const struct snd_pci_quirk alc880_fixup_tbl[] = { SND_PCI_QUIRK(0x1854, 0x003b, "LG", ALC880_FIXUP_LG), SND_PCI_QUIRK(0x1854, 0x005f, "LG P1 Express", ALC880_FIXUP_LG), SND_PCI_QUIRK(0x1854, 0x0068, "LG w1", ALC880_FIXUP_LG), + SND_PCI_QUIRK(0x1854, 0x0077, "LG LW25", ALC880_FIXUP_LG_LW25), SND_PCI_QUIRK(0x19db, 0x4188, "TCL S700", ALC880_FIXUP_TCL_S700), /* Below is the copied entries from alc880_quirks.c. From fa392433df90eec8059dae7323ed2b398c92ecb2 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Fri, 16 Aug 2013 08:17:05 +0200 Subject: [PATCH 0568/1368] ALSA: hda - Add a fixup for Gateway LT27 commit 1801928e0f99d94c55e33c584c5eb2ff5e246ee6 upstream. Gateway LT27 needs a fixup for the inverted digital mic. Reported-by: "Nathanael D. Noblet" Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_realtek.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 8a44d4a11d1d..57f9f2a12e85 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -4210,6 +4210,7 @@ static const struct snd_pci_quirk alc662_fixup_tbl[] = { SND_PCI_QUIRK(0x1025, 0x0308, "Acer Aspire 8942G", ALC662_FIXUP_ASPIRE), SND_PCI_QUIRK(0x1025, 0x031c, "Gateway NV79", ALC662_FIXUP_SKU_IGNORE), SND_PCI_QUIRK(0x1025, 0x0349, "eMachines eM250", ALC662_FIXUP_INV_DMIC), + SND_PCI_QUIRK(0x1025, 0x034a, "Gateway LT27", ALC662_FIXUP_INV_DMIC), SND_PCI_QUIRK(0x1025, 0x038b, "Acer Aspire 8943G", ALC662_FIXUP_ASPIRE), SND_PCI_QUIRK(0x1028, 0x05d8, "Dell", ALC668_FIXUP_DELL_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1028, 0x05db, "Dell", ALC668_FIXUP_DELL_MIC_NO_PRESENCE), From 289813a71e600f652d995d1e94a50112fb1dcfd7 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 30 Jul 2013 22:34:28 +0200 Subject: [PATCH 0569/1368] nl80211: fix another nl80211_fam.attrbuf race commit c319d50bfcf678c2857038276d9fab3c6646f3bf upstream. This is similar to the race Linus had reported, but in this case it's an older bug: nl80211_prepare_wdev_dump() uses the wiphy index in cb->args[0] as it is and thus parses the message over and over again instead of just once because 0 is the first valid wiphy index. Similar code in nl80211_testmode_dump() correctly offsets the wiphy_index by 1, do that here as well. Reported-by: Ben Hutchings Signed-off-by: Johannes Berg Signed-off-by: Greg Kroah-Hartman --- net/wireless/nl80211.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index db8ead94ff7a..448c034184e2 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -471,10 +471,12 @@ static int nl80211_prepare_wdev_dump(struct sk_buff *skb, goto out_unlock; } *rdev = wiphy_to_dev((*wdev)->wiphy); - cb->args[0] = (*rdev)->wiphy_idx; + /* 0 is the first index - add 1 to parse only once */ + cb->args[0] = (*rdev)->wiphy_idx + 1; cb->args[1] = (*wdev)->identifier; } else { - struct wiphy *wiphy = wiphy_idx_to_wiphy(cb->args[0]); + /* subtract the 1 again here */ + struct wiphy *wiphy = wiphy_idx_to_wiphy(cb->args[0] - 1); struct wireless_dev *tmp; if (!wiphy) { From 7048e6925f52536c37143518644a4beabdc4846f Mon Sep 17 00:00:00 2001 From: Oliver Neukum Date: Wed, 14 Aug 2013 11:01:46 +0200 Subject: [PATCH 0570/1368] usb: add two quirky touchscreen commit 304ab4ab079a8ed03ce39f1d274964a532db036b upstream. These devices tend to become unresponsive after S3 Signed-off-by: Oliver Neukum Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/quirks.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/usb/core/quirks.c b/drivers/usb/core/quirks.c index a63598895077..5b44cd47da5b 100644 --- a/drivers/usb/core/quirks.c +++ b/drivers/usb/core/quirks.c @@ -78,6 +78,12 @@ static const struct usb_device_id usb_quirk_list[] = { { USB_DEVICE(0x04d8, 0x000c), .driver_info = USB_QUIRK_CONFIG_INTF_STRINGS }, + /* CarrolTouch 4000U */ + { USB_DEVICE(0x04e7, 0x0009), .driver_info = USB_QUIRK_RESET_RESUME }, + + /* CarrolTouch 4500U */ + { USB_DEVICE(0x04e7, 0x0030), .driver_info = USB_QUIRK_RESET_RESUME }, + /* Samsung Android phone modem - ID conflict with SPH-I500 */ { USB_DEVICE(0x04e8, 0x6601), .driver_info = USB_QUIRK_CONFIG_INTF_STRINGS }, From 0179a37034fd74b573e63e7175dc6e65249b5389 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Sun, 11 Aug 2013 16:49:23 +0200 Subject: [PATCH 0571/1368] USB: ti_usb_3410_5052: fix big-endian firmware handling commit e877dd2f2581628b7119df707d4cf03d940cff49 upstream. Fix endianess bugs in firmware handling introduced by commits cb7a7c6a ("ti_usb_3410_5052: add Multi-Tech modem support") and 05a3d905 ("ti_usb_3410_5052: support alternate firmware") which made the driver use the wrong firmware for certain devices on big-endian machines. Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/ti_usb_3410_5052.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/usb/serial/ti_usb_3410_5052.c b/drivers/usb/serial/ti_usb_3410_5052.c index 01f79f11e5ad..32bdd5eac59b 100644 --- a/drivers/usb/serial/ti_usb_3410_5052.c +++ b/drivers/usb/serial/ti_usb_3410_5052.c @@ -1536,14 +1536,15 @@ static int ti_download_firmware(struct ti_device *tdev) char buf[32]; /* try ID specific firmware first, then try generic firmware */ - sprintf(buf, "ti_usb-v%04x-p%04x.fw", dev->descriptor.idVendor, - dev->descriptor.idProduct); + sprintf(buf, "ti_usb-v%04x-p%04x.fw", + le16_to_cpu(dev->descriptor.idVendor), + le16_to_cpu(dev->descriptor.idProduct)); status = request_firmware(&fw_p, buf, &dev->dev); if (status != 0) { buf[0] = '\0'; - if (dev->descriptor.idVendor == MTS_VENDOR_ID) { - switch (dev->descriptor.idProduct) { + if (le16_to_cpu(dev->descriptor.idVendor) == MTS_VENDOR_ID) { + switch (le16_to_cpu(dev->descriptor.idProduct)) { case MTS_CDMA_PRODUCT_ID: strcpy(buf, "mts_cdma.fw"); break; From d802afca86ce011832ec9e8d8120f0ada9dd1835 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Sun, 11 Aug 2013 16:49:20 +0200 Subject: [PATCH 0572/1368] USB: mos7840: fix big-endian probe commit d551ec9b690f3de65b0091a2e767f1382adc792d upstream. Fix bug in device-type detection on big-endian machines originally introduced by commit 0eafe4de ("USB: serial: mos7840: add support for MCS7810 devices") which always matched on little-endian product ids. Reported-by: kbuild test robot Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/mos7840.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/serial/mos7840.c b/drivers/usb/serial/mos7840.c index b92d3338d055..2c1749da1f7e 100644 --- a/drivers/usb/serial/mos7840.c +++ b/drivers/usb/serial/mos7840.c @@ -2208,7 +2208,7 @@ static int mos7810_check(struct usb_serial *serial) static int mos7840_probe(struct usb_serial *serial, const struct usb_device_id *id) { - u16 product = serial->dev->descriptor.idProduct; + u16 product = le16_to_cpu(serial->dev->descriptor.idProduct); u8 *buf; int device_type; From f56852aeaaa9a199ffa0a4c9ba2ef360122021c0 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Tue, 13 Aug 2013 13:27:34 +0200 Subject: [PATCH 0573/1368] USB: mos7720: fix broken control requests commit ef6c8c1d733e244f0499035be0dabe1f4ed98c6f upstream. The parallel-port code of the drivers used a stack allocated control-request buffer for asynchronous (and possibly deferred) control requests. This not only violates the no-DMA-from-stack requirement but could also lead to corrupt control requests being submitted. Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/mos7720.c | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/drivers/usb/serial/mos7720.c b/drivers/usb/serial/mos7720.c index f27c621a9297..5050cc8584ba 100644 --- a/drivers/usb/serial/mos7720.c +++ b/drivers/usb/serial/mos7720.c @@ -90,6 +90,7 @@ struct urbtracker { struct list_head urblist_entry; struct kref ref_count; struct urb *urb; + struct usb_ctrlrequest *setup; }; enum mos7715_pp_modes { @@ -271,6 +272,7 @@ static void destroy_urbtracker(struct kref *kref) struct mos7715_parport *mos_parport = urbtrack->mos_parport; usb_free_urb(urbtrack->urb); + kfree(urbtrack->setup); kfree(urbtrack); kref_put(&mos_parport->ref_count, destroy_mos_parport); } @@ -355,7 +357,6 @@ static int write_parport_reg_nonblock(struct mos7715_parport *mos_parport, struct urbtracker *urbtrack; int ret_val; unsigned long flags; - struct usb_ctrlrequest setup; struct usb_serial *serial = mos_parport->serial; struct usb_device *usbdev = serial->dev; @@ -373,14 +374,20 @@ static int write_parport_reg_nonblock(struct mos7715_parport *mos_parport, kfree(urbtrack); return -ENOMEM; } - setup.bRequestType = (__u8)0x40; - setup.bRequest = (__u8)0x0e; - setup.wValue = get_reg_value(reg, dummy); - setup.wIndex = get_reg_index(reg); - setup.wLength = 0; + urbtrack->setup = kmalloc(sizeof(*urbtrack->setup), GFP_KERNEL); + if (!urbtrack->setup) { + usb_free_urb(urbtrack->urb); + kfree(urbtrack); + return -ENOMEM; + } + urbtrack->setup->bRequestType = (__u8)0x40; + urbtrack->setup->bRequest = (__u8)0x0e; + urbtrack->setup->wValue = get_reg_value(reg, dummy); + urbtrack->setup->wIndex = get_reg_index(reg); + urbtrack->setup->wLength = 0; usb_fill_control_urb(urbtrack->urb, usbdev, usb_sndctrlpipe(usbdev, 0), - (unsigned char *)&setup, + (unsigned char *)urbtrack->setup, NULL, 0, async_complete, urbtrack); kref_init(&urbtrack->ref_count); INIT_LIST_HEAD(&urbtrack->urblist_entry); From 91508a9ae0005f05adbf45f116dda26b803b4f5f Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Tue, 13 Aug 2013 13:27:35 +0200 Subject: [PATCH 0574/1368] USB: keyspan: fix null-deref at disconnect and release commit ff8a43c10f1440f07a5faca0c1556921259f7f76 upstream. Make sure to fail properly if the device is not accepted during attach in order to avoid null-pointer derefs (of missing interface private data) at disconnect or release. Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/keyspan.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/serial/keyspan.c b/drivers/usb/serial/keyspan.c index 3549d073df22..07fbdf0e7ab6 100644 --- a/drivers/usb/serial/keyspan.c +++ b/drivers/usb/serial/keyspan.c @@ -2315,7 +2315,7 @@ static int keyspan_startup(struct usb_serial *serial) if (d_details == NULL) { dev_err(&serial->dev->dev, "%s - unknown product id %x\n", __func__, le16_to_cpu(serial->dev->descriptor.idProduct)); - return 1; + return -ENODEV; } /* Setup private data for serial driver */ From c72a0e036f9d80c609e608a723751343f1f5e9fc Mon Sep 17 00:00:00 2001 From: Alan Stern Date: Wed, 7 Aug 2013 10:58:05 -0400 Subject: [PATCH 0575/1368] USB: EHCI: accept very late isochronous URBs commit 24f531371de17010f2b1b57d90e42240032e7733 upstream. Since commits 4005ad4390bf (EHCI: implement new semantics for URB_ISO_ASAP) and c75c5ab575af (ALSA: USB: adjust for changed 3.8 USB API) became widely distributed, people have been experiencing problems with audio transfers. The slightest underrun causes complete failure, requiring the audio stream to be restarted. It turns out that the current isochronous API doesn't handle underruns in the best way. The ALSA developers would much rather have transfers that are submitted too late be accepted and complete in the normal fashion, rather than being refused outright. This patch implements the requested approach. When an isochronous URB submission is so late that all its scheduled slots have already expired, a debugging message will be printed in the log and the URB will be accepted as usual. Assuming it was submitted by a completion handler (which is normally the case), it will complete shortly thereafter with all the usb_iso_packet_descriptor status fields marked -EXDEV. This fixes (for ehci-hcd) https://bugs.launchpad.net/ubuntu/+source/linux/+bug/1191603 It should be applied to all kernels that include commit 4005ad4390bf. Signed-off-by: Alan Stern Tested-by: Maksim Boyko CC: Clemens Ladisch Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/ehci-sched.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/drivers/usb/host/ehci-sched.c b/drivers/usb/host/ehci-sched.c index f80d0330d548..8e3c878f38cf 100644 --- a/drivers/usb/host/ehci-sched.c +++ b/drivers/usb/host/ehci-sched.c @@ -1391,21 +1391,20 @@ iso_stream_schedule ( /* Behind the scheduling threshold? */ if (unlikely(start < next)) { + unsigned now2 = (now - base) & (mod - 1); /* USB_ISO_ASAP: Round up to the first available slot */ if (urb->transfer_flags & URB_ISO_ASAP) start += (next - start + period - 1) & -period; /* - * Not ASAP: Use the next slot in the stream. If - * the entire URB falls before the threshold, fail. + * Not ASAP: Use the next slot in the stream, + * no matter what. */ - else if (start + span - period < next) { - ehci_dbg(ehci, "iso urb late %p (%u+%u < %u)\n", + else if (start + span - period < now2) { + ehci_dbg(ehci, "iso underrun %p (%u+%u < %u)\n", urb, start + base, - span - period, next + base); - status = -EXDEV; - goto fail; + span - period, now2 + base); } } From 237e0153190c92a4deb80782973f60557a6b2cdb Mon Sep 17 00:00:00 2001 From: Matt Burtch Date: Mon, 12 Aug 2013 10:11:39 -0700 Subject: [PATCH 0576/1368] USB-Serial: Fix error handling of usb_wwan commit 6c1ee66a0b2bdbd64c078fba684d640cf2fd38a9 upstream. This fixes an issue where the bulk-in urb used for incoming data transfer is not resubmitted if the packet recieved contains an error status. This results in the driver locking until the port is closed and re-opened. Tested on a custom board with a Cinterion GSM module. Signed-off-by: Matt Burtch Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/usb_wwan.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/drivers/usb/serial/usb_wwan.c b/drivers/usb/serial/usb_wwan.c index ece326ef63a0..db0cf536de18 100644 --- a/drivers/usb/serial/usb_wwan.c +++ b/drivers/usb/serial/usb_wwan.c @@ -291,18 +291,18 @@ static void usb_wwan_indat_callback(struct urb *urb) tty_flip_buffer_push(&port->port); } else dev_dbg(dev, "%s: empty read urb received\n", __func__); - - /* Resubmit urb so we continue receiving */ - err = usb_submit_urb(urb, GFP_ATOMIC); - if (err) { - if (err != -EPERM) { - dev_err(dev, "%s: resubmit read urb failed. (%d)\n", __func__, err); - /* busy also in error unless we are killed */ - usb_mark_last_busy(port->serial->dev); - } - } else { + } + /* Resubmit urb so we continue receiving */ + err = usb_submit_urb(urb, GFP_ATOMIC); + if (err) { + if (err != -EPERM) { + dev_err(dev, "%s: resubmit read urb failed. (%d)\n", + __func__, err); + /* busy also in error unless we are killed */ usb_mark_last_busy(port->serial->dev); } + } else { + usb_mark_last_busy(port->serial->dev); } } From 1dba303727f52ea062580b0a9b3f0c3b462769cf Mon Sep 17 00:00:00 2001 From: Stephen Boyd Date: Tue, 13 Aug 2013 14:12:40 -0700 Subject: [PATCH 0577/1368] PM / QoS: Fix workqueue deadlock when using pm_qos_update_request_timeout() commit 40fea92ffb5fa0ef26d10ae0fe5688bc8e61c791 upstream. pm_qos_update_request_timeout() updates a qos and then schedules a delayed work item to bring the qos back down to the default after the timeout. When the work item runs, pm_qos_work_fn() will call pm_qos_update_request() and deadlock because it tries to cancel itself via cancel_delayed_work_sync(). Future callers of that qos will also hang waiting to cancel the work that is canceling itself. Let's extract the little bit of code that does the real work of pm_qos_update_request() and call it from the work function so that we don't deadlock. Before ed1ac6e (PM: don't use [delayed_]work_pending()) this didn't happen because the work function wouldn't try to cancel itself. [backport to 3.10 - gregkh] Signed-off-by: Stephen Boyd Reviewed-by: Tejun Heo Signed-off-by: Rafael J. Wysocki Signed-off-by: Greg Kroah-Hartman --- kernel/power/qos.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/kernel/power/qos.c b/kernel/power/qos.c index 587dddeebf15..25cf89bc659e 100644 --- a/kernel/power/qos.c +++ b/kernel/power/qos.c @@ -293,6 +293,15 @@ int pm_qos_request_active(struct pm_qos_request *req) } EXPORT_SYMBOL_GPL(pm_qos_request_active); +static void __pm_qos_update_request(struct pm_qos_request *req, + s32 new_value) +{ + if (new_value != req->node.prio) + pm_qos_update_target( + pm_qos_array[req->pm_qos_class]->constraints, + &req->node, PM_QOS_UPDATE_REQ, new_value); +} + /** * pm_qos_work_fn - the timeout handler of pm_qos_update_request_timeout * @work: work struct for the delayed work (timeout) @@ -305,7 +314,7 @@ static void pm_qos_work_fn(struct work_struct *work) struct pm_qos_request, work); - pm_qos_update_request(req, PM_QOS_DEFAULT_VALUE); + __pm_qos_update_request(req, PM_QOS_DEFAULT_VALUE); } /** @@ -365,6 +374,8 @@ void pm_qos_update_request(struct pm_qos_request *req, pm_qos_update_target( pm_qos_array[req->pm_qos_class]->constraints, &req->node, PM_QOS_UPDATE_REQ, new_value); + + __pm_qos_update_request(req, new_value); } EXPORT_SYMBOL_GPL(pm_qos_update_request); From f34a4837cd0b28abe7449e591a23cb32ab128520 Mon Sep 17 00:00:00 2001 From: Thomas Pugliese Date: Fri, 9 Aug 2013 09:52:13 -0500 Subject: [PATCH 0578/1368] wusbcore: fix kernel panic when disconnecting a wireless USB->serial device commit ec58fad1feb76c323ef47efff1d1e8660ed4644c upstream. This patch fixes a kernel panic that can occur when disconnecting a wireless USB->serial device. When the serial device disconnects, the device cleanup procedure ends up calling usb_hcd_disable_endpoint on the serial device's endpoints. The wusbcore uses the ABORT_RPIPE command to abort all transfers on the given endpoint but it does not properly give back the URBs when the transfer results return from the HWA. This patch prevents the transfer result processing code from bailing out when it sees a WA_XFER_STATUS_ABORTED result code so that these urbs are flushed properly by usb_hcd_disable_endpoint. It also updates wa_urb_dequeue to handle the case where the endpoint has already been cleaned up when usb_kill_urb is called which is where the panic originally occurred. Signed-off-by: Thomas Pugliese Signed-off-by: Greg Kroah-Hartman --- drivers/usb/wusbcore/wa-xfer.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/usb/wusbcore/wa-xfer.c b/drivers/usb/wusbcore/wa-xfer.c index 6ef94bce8c0d..028fc8337435 100644 --- a/drivers/usb/wusbcore/wa-xfer.c +++ b/drivers/usb/wusbcore/wa-xfer.c @@ -1110,6 +1110,12 @@ int wa_urb_dequeue(struct wahc *wa, struct urb *urb) } spin_lock_irqsave(&xfer->lock, flags); rpipe = xfer->ep->hcpriv; + if (rpipe == NULL) { + pr_debug("%s: xfer id 0x%08X has no RPIPE. %s", + __func__, wa_xfer_id(xfer), + "Probably already aborted.\n" ); + goto out_unlock; + } /* Check the delayed list -> if there, release and complete */ spin_lock_irqsave(&wa->xfer_list_lock, flags2); if (!list_empty(&xfer->list_node) && xfer->seg == NULL) @@ -1493,8 +1499,7 @@ static void wa_xfer_result_cb(struct urb *urb) break; } usb_status = xfer_result->bTransferStatus & 0x3f; - if (usb_status == WA_XFER_STATUS_ABORTED - || usb_status == WA_XFER_STATUS_NOT_FOUND) + if (usb_status == WA_XFER_STATUS_NOT_FOUND) /* taken care of already */ break; xfer_id = xfer_result->dwTransferID; From 8e220cfd1a9f6c763a108a3b964a888fe341dabd Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Thu, 15 Aug 2013 11:42:25 -0700 Subject: [PATCH 0579/1368] Fix TLB gather virtual address range invalidation corner cases commit 2b047252d087be7f2ba088b4933cd904f92e6fce upstream. Ben Tebulin reported: "Since v3.7.2 on two independent machines a very specific Git repository fails in 9/10 cases on git-fsck due to an SHA1/memory failures. This only occurs on a very specific repository and can be reproduced stably on two independent laptops. Git mailing list ran out of ideas and for me this looks like some very exotic kernel issue" and bisected the failure to the backport of commit 53a59fc67f97 ("mm: limit mmu_gather batching to fix soft lockups on !CONFIG_PREEMPT"). That commit itself is not actually buggy, but what it does is to make it much more likely to hit the partial TLB invalidation case, since it introduces a new case in tlb_next_batch() that previously only ever happened when running out of memory. The real bug is that the TLB gather virtual memory range setup is subtly buggered. It was introduced in commit 597e1c3580b7 ("mm/mmu_gather: enable tlb flush range in generic mmu_gather"), and the range handling was already fixed at least once in commit e6c495a96ce0 ("mm: fix the TLB range flushed when __tlb_remove_page() runs out of slots"), but that fix was not complete. The problem with the TLB gather virtual address range is that it isn't set up by the initial tlb_gather_mmu() initialization (which didn't get the TLB range information), but it is set up ad-hoc later by the functions that actually flush the TLB. And so any such case that forgot to update the TLB range entries would potentially miss TLB invalidates. Rather than try to figure out exactly which particular ad-hoc range setup was missing (I personally suspect it's the hugetlb case in zap_huge_pmd(), which didn't have the same logic as zap_pte_range() did), this patch just gets rid of the problem at the source: make the TLB range information available to tlb_gather_mmu(), and initialize it when initializing all the other tlb gather fields. This makes the patch larger, but conceptually much simpler. And the end result is much more understandable; even if you want to play games with partial ranges when invalidating the TLB contents in chunks, now the range information is always there, and anybody who doesn't want to bother with it won't introduce subtle bugs. Ben verified that this fixes his problem. Reported-bisected-and-tested-by: Ben Tebulin Build-testing-by: Stephen Rothwell Build-testing-by: Richard Weinberger Reviewed-by: Michal Hocko Acked-by: Peter Zijlstra Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- arch/arm/include/asm/tlb.h | 7 +++++-- arch/arm64/include/asm/tlb.h | 7 +++++-- arch/ia64/include/asm/tlb.h | 9 ++++++--- arch/s390/include/asm/tlb.h | 8 ++++++-- arch/sh/include/asm/tlb.h | 6 ++++-- arch/um/include/asm/tlb.h | 6 ++++-- fs/exec.c | 4 ++-- include/asm-generic/tlb.h | 2 +- mm/hugetlb.c | 2 +- mm/memory.c | 36 +++++++++++++++++++++--------------- mm/mmap.c | 4 ++-- 11 files changed, 57 insertions(+), 34 deletions(-) diff --git a/arch/arm/include/asm/tlb.h b/arch/arm/include/asm/tlb.h index bdf2b8458ec1..aa9b4ac3fdf6 100644 --- a/arch/arm/include/asm/tlb.h +++ b/arch/arm/include/asm/tlb.h @@ -43,6 +43,7 @@ struct mmu_gather { struct mm_struct *mm; unsigned int fullmm; struct vm_area_struct *vma; + unsigned long start, end; unsigned long range_start; unsigned long range_end; unsigned int nr; @@ -107,10 +108,12 @@ static inline void tlb_flush_mmu(struct mmu_gather *tlb) } static inline void -tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, unsigned int fullmm) +tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, unsigned long start, unsigned long end) { tlb->mm = mm; - tlb->fullmm = fullmm; + tlb->fullmm = !(start | (end+1)); + tlb->start = start; + tlb->end = end; tlb->vma = NULL; tlb->max = ARRAY_SIZE(tlb->local); tlb->pages = tlb->local; diff --git a/arch/arm64/include/asm/tlb.h b/arch/arm64/include/asm/tlb.h index 654f0968030b..5546653e5cc8 100644 --- a/arch/arm64/include/asm/tlb.h +++ b/arch/arm64/include/asm/tlb.h @@ -35,6 +35,7 @@ struct mmu_gather { struct mm_struct *mm; unsigned int fullmm; struct vm_area_struct *vma; + unsigned long start, end; unsigned long range_start; unsigned long range_end; unsigned int nr; @@ -97,10 +98,12 @@ static inline void tlb_flush_mmu(struct mmu_gather *tlb) } static inline void -tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, unsigned int fullmm) +tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, unsigned long start, unsigned long end) { tlb->mm = mm; - tlb->fullmm = fullmm; + tlb->fullmm = !(start | (end+1)); + tlb->start = start; + tlb->end = end; tlb->vma = NULL; tlb->max = ARRAY_SIZE(tlb->local); tlb->pages = tlb->local; diff --git a/arch/ia64/include/asm/tlb.h b/arch/ia64/include/asm/tlb.h index ef3a9de01954..bc5efc7c3f3f 100644 --- a/arch/ia64/include/asm/tlb.h +++ b/arch/ia64/include/asm/tlb.h @@ -22,7 +22,7 @@ * unmapping a portion of the virtual address space, these hooks are called according to * the following template: * - * tlb <- tlb_gather_mmu(mm, full_mm_flush); // start unmap for address space MM + * tlb <- tlb_gather_mmu(mm, start, end); // start unmap for address space MM * { * for each vma that needs a shootdown do { * tlb_start_vma(tlb, vma); @@ -58,6 +58,7 @@ struct mmu_gather { unsigned int max; unsigned char fullmm; /* non-zero means full mm flush */ unsigned char need_flush; /* really unmapped some PTEs? */ + unsigned long start, end; unsigned long start_addr; unsigned long end_addr; struct page **pages; @@ -155,13 +156,15 @@ static inline void __tlb_alloc_page(struct mmu_gather *tlb) static inline void -tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, unsigned int full_mm_flush) +tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, unsigned long start, unsigned long end) { tlb->mm = mm; tlb->max = ARRAY_SIZE(tlb->local); tlb->pages = tlb->local; tlb->nr = 0; - tlb->fullmm = full_mm_flush; + tlb->fullmm = !(start | (end+1)); + tlb->start = start; + tlb->end = end; tlb->start_addr = ~0UL; } diff --git a/arch/s390/include/asm/tlb.h b/arch/s390/include/asm/tlb.h index b75d7d686684..23a64d25f2b1 100644 --- a/arch/s390/include/asm/tlb.h +++ b/arch/s390/include/asm/tlb.h @@ -32,6 +32,7 @@ struct mmu_gather { struct mm_struct *mm; struct mmu_table_batch *batch; unsigned int fullmm; + unsigned long start, unsigned long end; }; struct mmu_table_batch { @@ -48,10 +49,13 @@ extern void tlb_remove_table(struct mmu_gather *tlb, void *table); static inline void tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, - unsigned int full_mm_flush) + unsigned long start, + unsigned long end) { tlb->mm = mm; - tlb->fullmm = full_mm_flush; + tlb->start = start; + tlb->end = end; + tlb->fullmm = !(start | (end+1)); tlb->batch = NULL; if (tlb->fullmm) __tlb_flush_mm(mm); diff --git a/arch/sh/include/asm/tlb.h b/arch/sh/include/asm/tlb.h index e61d43d9f689..362192ed12fe 100644 --- a/arch/sh/include/asm/tlb.h +++ b/arch/sh/include/asm/tlb.h @@ -36,10 +36,12 @@ static inline void init_tlb_gather(struct mmu_gather *tlb) } static inline void -tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, unsigned int full_mm_flush) +tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, unsigned long start, unsigned long end) { tlb->mm = mm; - tlb->fullmm = full_mm_flush; + tlb->start = start; + tlb->end = end; + tlb->fullmm = !(start | (end+1)); init_tlb_gather(tlb); } diff --git a/arch/um/include/asm/tlb.h b/arch/um/include/asm/tlb.h index 4febacd1a8a1..29b0301c18aa 100644 --- a/arch/um/include/asm/tlb.h +++ b/arch/um/include/asm/tlb.h @@ -45,10 +45,12 @@ static inline void init_tlb_gather(struct mmu_gather *tlb) } static inline void -tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, unsigned int full_mm_flush) +tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, unsigned long start, unsigned long end) { tlb->mm = mm; - tlb->fullmm = full_mm_flush; + tlb->start = start; + tlb->end = end; + tlb->fullmm = !(start | (end+1)); init_tlb_gather(tlb); } diff --git a/fs/exec.c b/fs/exec.c index ffd7a813ad3d..1f446705636b 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -607,7 +607,7 @@ static int shift_arg_pages(struct vm_area_struct *vma, unsigned long shift) return -ENOMEM; lru_add_drain(); - tlb_gather_mmu(&tlb, mm, 0); + tlb_gather_mmu(&tlb, mm, old_start, old_end); if (new_end > old_start) { /* * when the old and new regions overlap clear from new_end. @@ -624,7 +624,7 @@ static int shift_arg_pages(struct vm_area_struct *vma, unsigned long shift) free_pgd_range(&tlb, old_start, old_end, new_end, vma->vm_next ? vma->vm_next->vm_start : USER_PGTABLES_CEILING); } - tlb_finish_mmu(&tlb, new_end, old_end); + tlb_finish_mmu(&tlb, old_start, old_end); /* * Shrink the vma to just the new range. Always succeeds. diff --git a/include/asm-generic/tlb.h b/include/asm-generic/tlb.h index 13821c339a41..5672d7ea1fa0 100644 --- a/include/asm-generic/tlb.h +++ b/include/asm-generic/tlb.h @@ -112,7 +112,7 @@ struct mmu_gather { #define HAVE_GENERIC_MMU_GATHER -void tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, bool fullmm); +void tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, unsigned long start, unsigned long end); void tlb_flush_mmu(struct mmu_gather *tlb); void tlb_finish_mmu(struct mmu_gather *tlb, unsigned long start, unsigned long end); diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 5cf99bf8cce2..7c5eb85ec645 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -2490,7 +2490,7 @@ void unmap_hugepage_range(struct vm_area_struct *vma, unsigned long start, mm = vma->vm_mm; - tlb_gather_mmu(&tlb, mm, 0); + tlb_gather_mmu(&tlb, mm, start, end); __unmap_hugepage_range(&tlb, vma, start, end, ref_page); tlb_finish_mmu(&tlb, start, end); } diff --git a/mm/memory.c b/mm/memory.c index 5e5080005bc4..5a35443c01ad 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -211,14 +211,15 @@ static int tlb_next_batch(struct mmu_gather *tlb) * tear-down from @mm. The @fullmm argument is used when @mm is without * users and we're going to destroy the full address space (exit/execve). */ -void tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, bool fullmm) +void tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, unsigned long start, unsigned long end) { tlb->mm = mm; - tlb->fullmm = fullmm; + /* Is it from 0 to ~0? */ + tlb->fullmm = !(start | (end+1)); tlb->need_flush_all = 0; - tlb->start = -1UL; - tlb->end = 0; + tlb->start = start; + tlb->end = end; tlb->need_flush = 0; tlb->local.next = NULL; tlb->local.nr = 0; @@ -258,8 +259,6 @@ void tlb_finish_mmu(struct mmu_gather *tlb, unsigned long start, unsigned long e { struct mmu_gather_batch *batch, *next; - tlb->start = start; - tlb->end = end; tlb_flush_mmu(tlb); /* keep the page table cache within bounds */ @@ -1101,7 +1100,6 @@ static unsigned long zap_pte_range(struct mmu_gather *tlb, spinlock_t *ptl; pte_t *start_pte; pte_t *pte; - unsigned long range_start = addr; again: init_rss_vec(rss); @@ -1204,17 +1202,25 @@ static unsigned long zap_pte_range(struct mmu_gather *tlb, * and page-free while holding it. */ if (force_flush) { + unsigned long old_end; + force_flush = 0; -#ifdef HAVE_GENERIC_MMU_GATHER - tlb->start = range_start; + /* + * Flush the TLB just for the previous segment, + * then update the range to be the remaining + * TLB range. + */ + old_end = tlb->end; tlb->end = addr; -#endif + tlb_flush_mmu(tlb); - if (addr != end) { - range_start = addr; + + tlb->start = addr; + tlb->end = old_end; + + if (addr != end) goto again; - } } return addr; @@ -1399,7 +1405,7 @@ void zap_page_range(struct vm_area_struct *vma, unsigned long start, unsigned long end = start + size; lru_add_drain(); - tlb_gather_mmu(&tlb, mm, 0); + tlb_gather_mmu(&tlb, mm, start, end); update_hiwater_rss(mm); mmu_notifier_invalidate_range_start(mm, start, end); for ( ; vma && vma->vm_start < end; vma = vma->vm_next) @@ -1425,7 +1431,7 @@ static void zap_page_range_single(struct vm_area_struct *vma, unsigned long addr unsigned long end = address + size; lru_add_drain(); - tlb_gather_mmu(&tlb, mm, 0); + tlb_gather_mmu(&tlb, mm, address, end); update_hiwater_rss(mm); mmu_notifier_invalidate_range_start(mm, address, end); unmap_single_vma(&tlb, vma, address, end, details); diff --git a/mm/mmap.c b/mm/mmap.c index 7dbe39745be9..8d25fdc653be 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -2356,7 +2356,7 @@ static void unmap_region(struct mm_struct *mm, struct mmu_gather tlb; lru_add_drain(); - tlb_gather_mmu(&tlb, mm, 0); + tlb_gather_mmu(&tlb, mm, start, end); update_hiwater_rss(mm); unmap_vmas(&tlb, vma, start, end); free_pgtables(&tlb, vma, prev ? prev->vm_end : FIRST_USER_ADDRESS, @@ -2735,7 +2735,7 @@ void exit_mmap(struct mm_struct *mm) lru_add_drain(); flush_cache_mm(mm); - tlb_gather_mmu(&tlb, mm, 1); + tlb_gather_mmu(&tlb, mm, 0, -1); /* update_hiwater_rss(mm) here? but nobody should be looking */ /* Use -1 here to ensure all VMAs in the mm are unmapped */ unmap_vmas(&tlb, vma, 0, -1); From 859325460d2b15ef9b78b55eff72d766e1b8ea29 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Wed, 7 Aug 2013 23:39:41 +0100 Subject: [PATCH 0580/1368] ARM: 7809/1: perf: fix event validation for software group leaders commit c95eb3184ea1a3a2551df57190c81da695e2144b upstream. It is possible to construct an event group with a software event as a group leader and then subsequently add a hardware event to the group. This results in the event group being validated by adding all members of the group to a fake PMU and attempting to allocate each event on their respective PMU. Unfortunately, for software events wthout a corresponding arm_pmu, this results in a kernel crash attempting to dereference the ->get_event_idx function pointer. This patch fixes the problem by checking explicitly for software events and ignoring those in event validation (since they can always be scheduled). We will probably want to revisit this for 3.12, since the validation checks don't appear to work correctly when dealing with multiple hardware PMUs anyway. Reported-by: Vince Weaver Tested-by: Vince Weaver Tested-by: Mark Rutland Signed-off-by: Will Deacon Signed-off-by: Russell King Signed-off-by: Greg Kroah-Hartman --- arch/arm/kernel/perf_event.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/arm/kernel/perf_event.c b/arch/arm/kernel/perf_event.c index 21f77906602c..e19edc6f2d15 100644 --- a/arch/arm/kernel/perf_event.c +++ b/arch/arm/kernel/perf_event.c @@ -258,6 +258,9 @@ validate_event(struct pmu_hw_events *hw_events, struct arm_pmu *armpmu = to_arm_pmu(event->pmu); struct pmu *leader_pmu = event->group_leader->pmu; + if (is_software_event(event)) + return 1; + if (event->pmu != leader_pmu || event->state < PERF_EVENT_STATE_OFF) return 1; From e5a16a446ef5bdb37214b100b93e59ac75e8a445 Mon Sep 17 00:00:00 2001 From: Andreas Schwab Date: Fri, 9 Aug 2013 15:14:08 +0200 Subject: [PATCH 0581/1368] m68k: Truncate base in do_div() commit ea077b1b96e073eac5c3c5590529e964767fc5f7 upstream. Explicitly truncate the second operand of do_div() to 32 bits to guard against bogus code calling it with a 64-bit divisor. [Thorsten] After upgrading from 3.2 to 3.10, mounting a btrfs volume fails with: btrfs: setting nodatacow, compression disabled btrfs: enabling auto recovery btrfs: disk space caching is enabled *** ZERO DIVIDE *** FORMAT=2 Current process id is 722 BAD KERNEL TRAP: 00000000 Modules linked in: evdev mac_hid ext4 crc16 jbd2 mbcache btrfs xor lzo_compress zlib_deflate raid6_pq crc32c libcrc32c PC: [<319535b2>] __btrfs_map_block+0x11c/0x119a [btrfs] SR: 2000 SP: 30c1fab4 a2: 30f0faf0 d0: 00000000 d1: 00001000 d2: 00000000 d3: 00000000 d4: 00010000 d5: 00000000 a0: 3085c72c a1: 3085c72c Process mount (pid: 722, task=30f0faf0) Frame format=2 instr addr=319535ae Stack from 30c1faec: 00000000 00000020 00000000 00001000 00000000 01401000 30253928 300ffc00 00a843ac 3026f640 00000000 00010000 0009e250 00d106c0 00011220 00000000 00001000 301c6830 0009e32a 000000ff 00000009 3085c72c 00000000 00000000 30c1fd14 00000000 00000020 00000000 30c1fd14 0009e26c 00000020 00000003 00000000 0009dd8a 300b0b6c 30253928 00a843ac 00001000 00000000 00000000 0000a008 3194e76a 30253928 00a843ac 00001000 00000000 00000000 00000002 Call Trace: [<00001000>] kernel_pg_dir+0x0/0x1000 [...] Code: 222e ff74 2a2e ff5c 2c2e ff60 4c45 1402 <2d40> ff64 2d41 ff68 2205 4c2e 1800 ff68 4c04 0800 2041 d1c0 2206 4c2e 1400 ff68 [Geert] As diagnosed by Andreas, fs/btrfs/volumes.c:__btrfs_map_block() calls do_div(stripe_nr, stripe_len); with stripe_len u64, while do_div() assumes the divisor is a 32-bit number. Due to the lack of truncation in the m68k-specific implementation of do_div(), the division is performed using the upper 32-bit word of stripe_len, which is zero. This was introduced by commit 53b381b3abeb86f12787a6c40fee9b2f71edc23b ("Btrfs: RAID5 and RAID6"), which changed the divisor from map->stripe_len (struct map_lookup.stripe_len is int) to a 64-bit temporary. Reported-by: Thorsten Glaser Signed-off-by: Andreas Schwab Tested-by: Thorsten Glaser Signed-off-by: Geert Uytterhoeven Signed-off-by: Greg Kroah-Hartman --- arch/m68k/include/asm/div64.h | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/arch/m68k/include/asm/div64.h b/arch/m68k/include/asm/div64.h index 444ea8a09e9f..ef881cfbbca9 100644 --- a/arch/m68k/include/asm/div64.h +++ b/arch/m68k/include/asm/div64.h @@ -15,16 +15,17 @@ unsigned long long n64; \ } __n; \ unsigned long __rem, __upper; \ + unsigned long __base = (base); \ \ __n.n64 = (n); \ if ((__upper = __n.n32[0])) { \ asm ("divul.l %2,%1:%0" \ - : "=d" (__n.n32[0]), "=d" (__upper) \ - : "d" (base), "0" (__n.n32[0])); \ + : "=d" (__n.n32[0]), "=d" (__upper) \ + : "d" (__base), "0" (__n.n32[0])); \ } \ asm ("divu.l %2,%1:%0" \ - : "=d" (__n.n32[1]), "=d" (__rem) \ - : "d" (base), "1" (__upper), "0" (__n.n32[1])); \ + : "=d" (__n.n32[1]), "=d" (__rem) \ + : "d" (__base), "1" (__upper), "0" (__n.n32[1])); \ (n) = __n.n64; \ __rem; \ }) From 571b78a1d829263003a8560cca2c6d109bb19f0a Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Fri, 26 Jul 2013 00:08:25 +0200 Subject: [PATCH 0582/1368] m68k/atari: ARAnyM - Fix NatFeat module support commit e8184e10f89736a23ea6eea8e24cd524c5c513d2 upstream. As pointed out by Andreas Schwab, pointers passed to ARAnyM NatFeat calls should be physical addresses, not virtual addresses. Fortunately on Atari, physical and virtual kernel addresses are the same, as long as normal kernel memory is concerned, so this usually worked fine without conversion. But for modules, pointers to literal strings are located in vmalloc()ed memory. Depending on the version of ARAnyM, this causes the nf_get_id() call to just fail, or worse, crash ARAnyM itself with e.g. Gotcha! Illegal memory access. Atari PC = $968c This is a big issue for distro kernels, who want to have all drivers as loadable modules in an initrd. Add a wrapper for nf_get_id() that copies the literal to the stack to work around this issue. Reported-by: Thorsten Glaser Signed-off-by: Geert Uytterhoeven Signed-off-by: Greg Kroah-Hartman --- arch/m68k/emu/natfeat.c | 23 +++++++++++++++++++---- 1 file changed, 19 insertions(+), 4 deletions(-) diff --git a/arch/m68k/emu/natfeat.c b/arch/m68k/emu/natfeat.c index 2291a7d69d49..fa277aecfb78 100644 --- a/arch/m68k/emu/natfeat.c +++ b/arch/m68k/emu/natfeat.c @@ -18,9 +18,11 @@ #include #include +extern long nf_get_id2(const char *feature_name); + asm("\n" -" .global nf_get_id,nf_call\n" -"nf_get_id:\n" +" .global nf_get_id2,nf_call\n" +"nf_get_id2:\n" " .short 0x7300\n" " rts\n" "nf_call:\n" @@ -29,12 +31,25 @@ asm("\n" "1: moveq.l #0,%d0\n" " rts\n" " .section __ex_table,\"a\"\n" -" .long nf_get_id,1b\n" +" .long nf_get_id2,1b\n" " .long nf_call,1b\n" " .previous"); -EXPORT_SYMBOL_GPL(nf_get_id); EXPORT_SYMBOL_GPL(nf_call); +long nf_get_id(const char *feature_name) +{ + /* feature_name may be in vmalloc()ed memory, so make a copy */ + char name_copy[32]; + size_t n; + + n = strlcpy(name_copy, feature_name, sizeof(name_copy)); + if (n >= sizeof(name_copy)) + return 0; + + return nf_get_id2(name_copy); +} +EXPORT_SYMBOL_GPL(nf_get_id); + void nfprint(const char *fmt, ...) { static char buf[256]; From 9234930d6e89a7671042e6e35e318480d6b82e5f Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Fri, 16 Aug 2013 20:50:55 -0700 Subject: [PATCH 0583/1368] s390: Fix broken build commit 215b28a5308f3d332df2ee09ef11fda45d7e4a92 upstream. Fix this build error: In file included from fs/exec.c:61:0: arch/s390/include/asm/tlb.h:35:23: error: expected identifier or '(' before 'unsigned' arch/s390/include/asm/tlb.h:36:1: warning: no semicolon at end of struct or union [enabled by default] arch/s390/include/asm/tlb.h: In function 'tlb_gather_mmu': arch/s390/include/asm/tlb.h:57:5: error: 'struct mmu_gather' has no member named 'end' Broken due to commit 2b047252d0 ("Fix TLB gather virtual address range invalidation corner cases"). Cc: Greg Kroah-Hartman Signed-off-by: Guenter Roeck [ Oh well. We had build testing for ppc amd um, but no s390 - Linus ] Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- arch/s390/include/asm/tlb.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/s390/include/asm/tlb.h b/arch/s390/include/asm/tlb.h index 23a64d25f2b1..6d6d92b4ea11 100644 --- a/arch/s390/include/asm/tlb.h +++ b/arch/s390/include/asm/tlb.h @@ -32,7 +32,7 @@ struct mmu_gather { struct mm_struct *mm; struct mmu_table_batch *batch; unsigned int fullmm; - unsigned long start, unsigned long end; + unsigned long start, end; }; struct mmu_table_batch { From 94aa327e13898c4c495a27303ce96cc52280fb25 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Mon, 12 Aug 2013 09:53:28 -0400 Subject: [PATCH 0584/1368] jbd2: Fix use after free after error in jbd2_journal_dirty_metadata() commit 91aa11fae1cf8c2fd67be0609692ea9741cdcc43 upstream. When jbd2_journal_dirty_metadata() returns error, __ext4_handle_dirty_metadata() stops the handle. However callers of this function do not count with that fact and still happily used now freed handle. This use after free can result in various issues but very likely we oops soon. The motivation of adding __ext4_journal_stop() into __ext4_handle_dirty_metadata() in commit 9ea7a0df seems to be only to improve error reporting. So replace __ext4_journal_stop() with ext4_journal_abort_handle() which was there before that commit and add WARN_ON_ONCE() to dump stack to provide useful information. Reported-by: Sage Weil Signed-off-by: Jan Kara Signed-off-by: "Theodore Ts'o" Signed-off-by: Greg Kroah-Hartman --- fs/ext4/ext4_jbd2.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/fs/ext4/ext4_jbd2.c b/fs/ext4/ext4_jbd2.c index 451eb4045330..1c88061da526 100644 --- a/fs/ext4/ext4_jbd2.c +++ b/fs/ext4/ext4_jbd2.c @@ -219,10 +219,10 @@ int __ext4_handle_dirty_metadata(const char *where, unsigned int line, set_buffer_prio(bh); if (ext4_handle_valid(handle)) { err = jbd2_journal_dirty_metadata(handle, bh); - if (err) { - /* Errors can only happen if there is a bug */ - handle->h_err = err; - __ext4_journal_stop(where, line, handle); + /* Errors can only happen if there is a bug */ + if (WARN_ON_ONCE(err)) { + ext4_journal_abort_handle(where, line, __func__, bh, + handle, err); } } else { if (inode) From 9fa6018620ff121a3d892394021d4285636b7cd5 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Tue, 13 Aug 2013 10:05:59 +0800 Subject: [PATCH 0585/1368] cpuset: fix the return value of cpuset_write_u64() commit a903f0865a190f8778c73df1a810ea6e25e5d7cf upstream. Writing to this file always returns -ENODEV: # echo 1 > cpuset.memory_pressure_enabled -bash: echo: write error: No such device Signed-off-by: Li Zefan Signed-off-by: Tejun Heo Signed-off-by: Greg Kroah-Hartman --- kernel/cpuset.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/kernel/cpuset.c b/kernel/cpuset.c index 64b3f791bbe5..6948e9476b42 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c @@ -1502,11 +1502,13 @@ static int cpuset_write_u64(struct cgroup *cgrp, struct cftype *cft, u64 val) { struct cpuset *cs = cgroup_cs(cgrp); cpuset_filetype_t type = cft->private; - int retval = -ENODEV; + int retval = 0; mutex_lock(&cpuset_mutex); - if (!is_cpuset_online(cs)) + if (!is_cpuset_online(cs)) { + retval = -ENODEV; goto out_unlock; + } switch (type) { case FILE_CPU_EXCLUSIVE: From 6f5405942321c322f1ba83960837b63a8ebb039e Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Tue, 20 Aug 2013 08:43:19 -0700 Subject: [PATCH 0586/1368] Linux 3.10.8 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 33e36aba5fa2..1a216126c709 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,6 @@ VERSION = 3 PATCHLEVEL = 10 -SUBLEVEL = 7 +SUBLEVEL = 8 EXTRAVERSION = NAME = TOSSUG Baby Fish From 8e7430857af5b242c950d3d2bb00289374f1436a Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Tue, 20 Aug 2013 15:32:57 -0700 Subject: [PATCH 0587/1368] Revert "genetlink: fix family dump race" This reverts commit aab4f8d490ef8c184d854d5f630438c10406765c, commit 58ad436fcf49810aa006016107f494c9ac9013db upstream, as it causes problems. Cc: Johannes Berg Cc: Andrei Otcheretianski Cc: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/netlink/genetlink.c | 7 ------- 1 file changed, 7 deletions(-) diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c index ba6e55d1ca44..1076fe16b122 100644 --- a/net/netlink/genetlink.c +++ b/net/netlink/genetlink.c @@ -789,10 +789,6 @@ static int ctrl_dumpfamily(struct sk_buff *skb, struct netlink_callback *cb) struct net *net = sock_net(skb->sk); int chains_to_skip = cb->args[0]; int fams_to_skip = cb->args[1]; - bool need_locking = chains_to_skip || fams_to_skip; - - if (need_locking) - genl_lock(); for (i = chains_to_skip; i < GENL_FAM_TAB_SIZE; i++) { n = 0; @@ -814,9 +810,6 @@ static int ctrl_dumpfamily(struct sk_buff *skb, struct netlink_callback *cb) cb->args[0] = i; cb->args[1] = n; - if (need_locking) - genl_unlock(); - return skb->len; } From 0a4b6d4ff200a553951f77f765971cb3e4c91ec0 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Tue, 20 Aug 2013 15:40:47 -0700 Subject: [PATCH 0588/1368] Linux 3.10.9 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 1a216126c709..4b31d6238f7d 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,6 @@ VERSION = 3 PATCHLEVEL = 10 -SUBLEVEL = 8 +SUBLEVEL = 9 EXTRAVERSION = NAME = TOSSUG Baby Fish From 6904756d54d4f98a9f835ed9cbc3f149ef7ac678 Mon Sep 17 00:00:00 2001 From: Dominik Dingel Date: Fri, 26 Jul 2013 15:04:00 +0200 Subject: [PATCH 0589/1368] KVM: s390: move kvm_guest_enter,exit closer to sie commit 2b29a9fdcb92bfc6b6f4c412d71505869de61a56 upstream. Any uaccess between guest_enter and guest_exit could trigger a page fault, the page fault handler would handle it as a guest fault and translate a user address as guest address. Signed-off-by: Dominik Dingel Signed-off-by: Christian Borntraeger Signed-off-by: Paolo Bonzini Signed-off-by: Greg Kroah-Hartman --- arch/s390/kvm/kvm-s390.c | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index c1c7c683fa26..698fb826e149 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -622,14 +622,25 @@ static int __vcpu_run(struct kvm_vcpu *vcpu) kvm_s390_deliver_pending_interrupts(vcpu); vcpu->arch.sie_block->icptcode = 0; - preempt_disable(); - kvm_guest_enter(); - preempt_enable(); VCPU_EVENT(vcpu, 6, "entering sie flags %x", atomic_read(&vcpu->arch.sie_block->cpuflags)); trace_kvm_s390_sie_enter(vcpu, atomic_read(&vcpu->arch.sie_block->cpuflags)); + + /* + * As PF_VCPU will be used in fault handler, between guest_enter + * and guest_exit should be no uaccess. + */ + preempt_disable(); + kvm_guest_enter(); + preempt_enable(); rc = sie64a(vcpu->arch.sie_block, vcpu->run->s.regs.gprs); + kvm_guest_exit(); + + VCPU_EVENT(vcpu, 6, "exit sie icptcode %d", + vcpu->arch.sie_block->icptcode); + trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode); + if (rc) { if (kvm_is_ucontrol(vcpu->kvm)) { rc = SIE_INTERCEPT_UCONTROL; @@ -639,10 +650,6 @@ static int __vcpu_run(struct kvm_vcpu *vcpu) rc = kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); } } - VCPU_EVENT(vcpu, 6, "exit sie icptcode %d", - vcpu->arch.sie_block->icptcode); - trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode); - kvm_guest_exit(); memcpy(&vcpu->run->s.regs.gprs[14], &vcpu->arch.sie_block->gg14, 16); return rc; From e0f23666f0d04cf80c18fdc7ad762d1a55187d9c Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 29 Jul 2013 23:07:43 +0200 Subject: [PATCH 0590/1368] mac80211: don't wait for TX status forever commit cb236d2d713cff83d024a82b836757d9e2b50715 upstream. TX status notification can get lost, or the frames could get stuck on the queue, so don't wait for the callback from the driver forever and instead time out after half a second. Signed-off-by: Johannes Berg Signed-off-by: Luciano Coelho Signed-off-by: Greg Kroah-Hartman --- net/mac80211/mlme.c | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 55a42f9c4f39..5b4328dcbe4e 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -31,10 +31,12 @@ #include "led.h" #define IEEE80211_AUTH_TIMEOUT (HZ / 5) +#define IEEE80211_AUTH_TIMEOUT_LONG (HZ / 2) #define IEEE80211_AUTH_TIMEOUT_SHORT (HZ / 10) #define IEEE80211_AUTH_MAX_TRIES 3 #define IEEE80211_AUTH_WAIT_ASSOC (HZ * 5) #define IEEE80211_ASSOC_TIMEOUT (HZ / 5) +#define IEEE80211_ASSOC_TIMEOUT_LONG (HZ / 2) #define IEEE80211_ASSOC_TIMEOUT_SHORT (HZ / 10) #define IEEE80211_ASSOC_MAX_TRIES 3 @@ -3470,10 +3472,13 @@ static int ieee80211_probe_auth(struct ieee80211_sub_if_data *sdata) if (tx_flags == 0) { auth_data->timeout = jiffies + IEEE80211_AUTH_TIMEOUT; - ifmgd->auth_data->timeout_started = true; + auth_data->timeout_started = true; run_again(ifmgd, auth_data->timeout); } else { - auth_data->timeout_started = false; + auth_data->timeout = + round_jiffies_up(jiffies + IEEE80211_AUTH_TIMEOUT_LONG); + auth_data->timeout_started = true; + run_again(ifmgd, auth_data->timeout); } return 0; @@ -3510,7 +3515,11 @@ static int ieee80211_do_assoc(struct ieee80211_sub_if_data *sdata) assoc_data->timeout_started = true; run_again(&sdata->u.mgd, assoc_data->timeout); } else { - assoc_data->timeout_started = false; + assoc_data->timeout = + round_jiffies_up(jiffies + + IEEE80211_ASSOC_TIMEOUT_LONG); + assoc_data->timeout_started = true; + run_again(&sdata->u.mgd, assoc_data->timeout); } return 0; From 2ac3b5e4a97cd3c677ea27ce1136c21018e75cbf Mon Sep 17 00:00:00 2001 From: Jeff Wu Date: Wed, 29 May 2013 06:31:30 +0000 Subject: [PATCH 0591/1368] ACPI: add _STA evaluation at do_acpi_find_child() commit c7d9ca90aa9497f0b6e301ec67c52dd4b57a7852 upstream. Once do_acpi_find_child() has found the first matching handle, it makes the acpi_get_child() loop stop and return that handle. On some platforms, though, there are multiple devices with the same value of "_ADR" in the same namespace scope, and if one of them is enabled, the others will be disabled. For example: Address : 0x1FFFF ; path : SB_PCI0.SATA.DEV0 Address : 0x1FFFF ; path : SB_PCI0.SATA.DEV1 Address : 0x1FFFF ; path : SB_PCI0.SATA.DEV2 If DEV0 and DEV1 are disabled and DEV2 is enabled, the handle of DEV2 should be returned, but actually the function always returns the handle of DEV0. To address that issue, make do_acpi_find_child() evaluate _STA to check the device status. If a matching device object exists, but is disabled, acpi_get_child() will continue to walk the namespace in the hope of finding an enabled one. If one is found, its handle will be returned, but otherwise the function will return the handle of the disabled object found before (in case it is enabled going forward). [rjw: Changelog] Signed-off-by: Jeff Wu Signed-off-by: Rafael J. Wysocki Cc: Peter Wu Signed-off-by: Greg Kroah-Hartman --- drivers/acpi/glue.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/acpi/glue.c b/drivers/acpi/glue.c index 40a84cc6740c..51ca764cffea 100644 --- a/drivers/acpi/glue.c +++ b/drivers/acpi/glue.c @@ -81,13 +81,15 @@ static struct acpi_bus_type *acpi_get_bus_type(struct device *dev) static acpi_status do_acpi_find_child(acpi_handle handle, u32 lvl_not_used, void *addr_p, void **ret_p) { - unsigned long long addr; + unsigned long long addr, sta; acpi_status status; status = acpi_evaluate_integer(handle, METHOD_NAME__ADR, NULL, &addr); if (ACPI_SUCCESS(status) && addr == *((u64 *)addr_p)) { *ret_p = handle; - return AE_CTRL_TERMINATE; + status = acpi_bus_get_status_handle(handle, &sta); + if (ACPI_SUCCESS(status) && (sta & ACPI_STA_DEVICE_ENABLED)) + return AE_CTRL_TERMINATE; } return AE_OK; } From 6e4fdb803584635587bd4dc00d1f8c0883a02d3b Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Wed, 7 Aug 2013 22:55:00 +0200 Subject: [PATCH 0592/1368] ACPI: Try harder to resolve _ADR collisions for bridges commit 60f75b8e97daf4a39790a20d962cb861b9220af5 upstream. In theory, under a given ACPI namespace node there should be only one child device object with _ADR whose value matches a given bus address exactly. In practice, however, there are systems in which multiple child device objects under a given parent have _ADR matching exactly the same address. In those cases we use _STA to determine which of the multiple matching devices is enabled, since some systems are known to indicate which ACPI device object to associate with the given physical (usually PCI) device this way. Unfortunately, as it turns out, there are systems in which many device objects under the same parent have _ADR matching exactly the same bus address and none of them has _STA, in which case they all should be regarded as enabled according to the spec. Still, if those device objects are supposed to represent bridges (e.g. this is the case for device objects corresponding to PCIe ports), we can try harder and skip the ones that have no child device objects in the ACPI namespace. With luck, we can avoid using device objects that we are not expected to use this way. Although this only works for bridges whose children also have ACPI namespace representation, it is sufficient to address graphics adapter detection issues on some systems, so rework the code finding a matching device ACPI handle for a given bus address to implement this idea. Introduce a new function, acpi_find_child(), taking three arguments: the ACPI handle of the device's parent, a bus address suitable for the device's bus type and a bool indicating if the device is a bridge and make it work as outlined above. Reimplement the function currently used for this purpose, acpi_get_child(), as a call to acpi_find_child() with the last argument set to 'false' and make the PCI subsystem use acpi_find_child() with the bridge information passed as the last argument to it. [Lan Tianyu notices that it is not sufficient to use pci_is_bridge() for that, because the device's subordinate pointer hasn't been set yet at this point, so use hdr_type instead.] This change fixes a regression introduced inadvertently by commit 33f767d (ACPI: Rework acpi_get_child() to be more efficient) which overlooked the fact that for acpi_walk_namespace() "post-order" means "after all children have been visited" rather than "on the way back", so for device objects without children and for namespace walks of depth 1, as in the acpi_get_child() case, the "post-order" callbacks ordering is actually the same as the ordering of "pre-order" ones. Since that commit changed the namespace walk in acpi_get_child() to terminate after finding the first matching object instead of going through all of them and returning the last one, it effectively changed the result returned by that function in some rare cases and that led to problems (the switch from a "pre-order" to a "post-order" callback was supposed to prevent that from happening, but it was ineffective). As it turns out, the systems where the change made by commit 33f767d actually matters are those where there are multiple ACPI device objects representing the same PCIe port (which effectively is a bridge). Moreover, only one of them, and the one we are expected to use, has child device objects in the ACPI namespace, so the regression can be addressed as described above. References: https://bugzilla.kernel.org/show_bug.cgi?id=60561 Reported-by: Peter Wu Tested-by: Vladimir Lalov Signed-off-by: Rafael J. Wysocki Acked-by: Bjorn Helgaas Cc: Peter Wu Signed-off-by: Greg Kroah-Hartman --- drivers/acpi/glue.c | 99 ++++++++++++++++++++++++++++++++++------- drivers/pci/pci-acpi.c | 15 +++++-- include/acpi/acpi_bus.h | 6 ++- 3 files changed, 98 insertions(+), 22 deletions(-) diff --git a/drivers/acpi/glue.c b/drivers/acpi/glue.c index 51ca764cffea..238412077c83 100644 --- a/drivers/acpi/glue.c +++ b/drivers/acpi/glue.c @@ -78,34 +78,99 @@ static struct acpi_bus_type *acpi_get_bus_type(struct device *dev) return ret; } -static acpi_status do_acpi_find_child(acpi_handle handle, u32 lvl_not_used, - void *addr_p, void **ret_p) +static acpi_status acpi_dev_present(acpi_handle handle, u32 lvl_not_used, + void *not_used, void **ret_p) { - unsigned long long addr, sta; - acpi_status status; + struct acpi_device *adev = NULL; - status = acpi_evaluate_integer(handle, METHOD_NAME__ADR, NULL, &addr); - if (ACPI_SUCCESS(status) && addr == *((u64 *)addr_p)) { + acpi_bus_get_device(handle, &adev); + if (adev) { *ret_p = handle; - status = acpi_bus_get_status_handle(handle, &sta); - if (ACPI_SUCCESS(status) && (sta & ACPI_STA_DEVICE_ENABLED)) - return AE_CTRL_TERMINATE; + return AE_CTRL_TERMINATE; } return AE_OK; } -acpi_handle acpi_get_child(acpi_handle parent, u64 address) +static bool acpi_extra_checks_passed(acpi_handle handle, bool is_bridge) { - void *ret = NULL; + unsigned long long sta; + acpi_status status; - if (!parent) - return NULL; + status = acpi_bus_get_status_handle(handle, &sta); + if (ACPI_FAILURE(status) || !(sta & ACPI_STA_DEVICE_ENABLED)) + return false; - acpi_walk_namespace(ACPI_TYPE_DEVICE, parent, 1, NULL, - do_acpi_find_child, &address, &ret); - return (acpi_handle)ret; + if (is_bridge) { + void *test = NULL; + + /* Check if this object has at least one child device. */ + acpi_walk_namespace(ACPI_TYPE_DEVICE, handle, 1, + acpi_dev_present, NULL, NULL, &test); + return !!test; + } + return true; } -EXPORT_SYMBOL(acpi_get_child); + +struct find_child_context { + u64 addr; + bool is_bridge; + acpi_handle ret; + bool ret_checked; +}; + +static acpi_status do_find_child(acpi_handle handle, u32 lvl_not_used, + void *data, void **not_used) +{ + struct find_child_context *context = data; + unsigned long long addr; + acpi_status status; + + status = acpi_evaluate_integer(handle, METHOD_NAME__ADR, NULL, &addr); + if (ACPI_FAILURE(status) || addr != context->addr) + return AE_OK; + + if (!context->ret) { + /* This is the first matching object. Save its handle. */ + context->ret = handle; + return AE_OK; + } + /* + * There is more than one matching object with the same _ADR value. + * That really is unexpected, so we are kind of beyond the scope of the + * spec here. We have to choose which one to return, though. + * + * First, check if the previously found object is good enough and return + * its handle if so. Second, check the same for the object that we've + * just found. + */ + if (!context->ret_checked) { + if (acpi_extra_checks_passed(context->ret, context->is_bridge)) + return AE_CTRL_TERMINATE; + else + context->ret_checked = true; + } + if (acpi_extra_checks_passed(handle, context->is_bridge)) { + context->ret = handle; + return AE_CTRL_TERMINATE; + } + return AE_OK; +} + +acpi_handle acpi_find_child(acpi_handle parent, u64 addr, bool is_bridge) +{ + if (parent) { + struct find_child_context context = { + .addr = addr, + .is_bridge = is_bridge, + }; + + acpi_walk_namespace(ACPI_TYPE_DEVICE, parent, 1, do_find_child, + NULL, &context, NULL); + return context.ret; + } + return NULL; +} +EXPORT_SYMBOL_GPL(acpi_find_child); static int acpi_bind_one(struct device *dev, acpi_handle handle) { diff --git a/drivers/pci/pci-acpi.c b/drivers/pci/pci-acpi.c index e4b1fb2c0f5d..336b3f94a19a 100644 --- a/drivers/pci/pci-acpi.c +++ b/drivers/pci/pci-acpi.c @@ -317,13 +317,20 @@ void acpi_pci_remove_bus(struct pci_bus *bus) /* ACPI bus type */ static int acpi_pci_find_device(struct device *dev, acpi_handle *handle) { - struct pci_dev * pci_dev; - u64 addr; + struct pci_dev *pci_dev = to_pci_dev(dev); + bool is_bridge; + u64 addr; - pci_dev = to_pci_dev(dev); + /* + * pci_is_bridge() is not suitable here, because pci_dev->subordinate + * is set only after acpi_pci_find_device() has been called for the + * given device. + */ + is_bridge = pci_dev->hdr_type == PCI_HEADER_TYPE_BRIDGE + || pci_dev->hdr_type == PCI_HEADER_TYPE_CARDBUS; /* Please ref to ACPI spec for the syntax of _ADR */ addr = (PCI_SLOT(pci_dev->devfn) << 16) | PCI_FUNC(pci_dev->devfn); - *handle = acpi_get_child(DEVICE_ACPI_HANDLE(dev->parent), addr); + *handle = acpi_find_child(ACPI_HANDLE(dev->parent), addr, is_bridge); if (!*handle) return -ENODEV; return 0; diff --git a/include/acpi/acpi_bus.h b/include/acpi/acpi_bus.h index c13c919ab99e..f45b2a7800c4 100644 --- a/include/acpi/acpi_bus.h +++ b/include/acpi/acpi_bus.h @@ -455,7 +455,11 @@ struct acpi_pci_root { }; /* helper */ -acpi_handle acpi_get_child(acpi_handle, u64); +acpi_handle acpi_find_child(acpi_handle, u64, bool); +static inline acpi_handle acpi_get_child(acpi_handle handle, u64 addr) +{ + return acpi_find_child(handle, addr, false); +} int acpi_is_root_bridge(acpi_handle); struct acpi_pci_root *acpi_pci_find_root(acpi_handle handle); #define DEVICE_ACPI_HANDLE(dev) ((acpi_handle)ACPI_HANDLE(dev)) From 9b2c750d8e81da33cdf6e16db911faa2008652cd Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Tue, 20 Aug 2013 13:38:10 +0530 Subject: [PATCH 0593/1368] ARC: gdbserver breakage in Big-Endian configuration #1 [Based on mainline commit 502a0c775c7f0a: "ARC: pt_regs update #5"] gdbserver needs @stop_pc, served by ptrace, but fetched from pt_regs differently, based on in_brkpt_traps(), which in turn relies on additional machine state in pt_regs->event bitfield. unsigned long orig_r8:16, event:16; For big endian config, this macro was returning false, despite being in breakpoint Trap exception, causing wrong @stop_pc to be returned to gdb. Issue #1: In BE, @event above is at offset 2 in word, while a STW insn at offset 0 was used to update it. Resort to using ST insn which updates the half-word at right location. Issue #2: The union involving bitfields causes all the members to be laid out at offset 0. So with fix #1 above, ASM was now updating at offset 2, "C" code was still referencing at offset 0. Fixed by wrapping bitfield in a struct. Reported-by: Noam Camus Tested-by: Anton Kolesov Signed-off-by: Vineet Gupta Signed-off-by: Greg Kroah-Hartman --- arch/arc/include/asm/ptrace.h | 2 ++ arch/arc/kernel/entry.S | 4 ++-- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/arch/arc/include/asm/ptrace.h b/arch/arc/include/asm/ptrace.h index 6179de7e07c2..2046a89a57cf 100644 --- a/arch/arc/include/asm/ptrace.h +++ b/arch/arc/include/asm/ptrace.h @@ -52,12 +52,14 @@ struct pt_regs { /*to distinguish bet excp, syscall, irq */ union { + struct { #ifdef CONFIG_CPU_BIG_ENDIAN /* so that assembly code is same for LE/BE */ unsigned long orig_r8:16, event:16; #else unsigned long event:16, orig_r8:16; #endif + }; long orig_r8_word; }; }; diff --git a/arch/arc/kernel/entry.S b/arch/arc/kernel/entry.S index 0c6d664d4a83..6dbe359c760d 100644 --- a/arch/arc/kernel/entry.S +++ b/arch/arc/kernel/entry.S @@ -498,7 +498,7 @@ tracesys_exit: trap_with_param: ; stop_pc info by gdb needs this info - stw orig_r8_IS_BRKPT, [sp, PT_orig_r8] + st orig_r8_IS_BRKPT, [sp, PT_orig_r8] mov r0, r12 lr r1, [efa] @@ -723,7 +723,7 @@ not_exception: ; things to what they were, before returning from L2 context ;---------------------------------------------------------------- - ldw r9, [sp, PT_orig_r8] ; get orig_r8 to make sure it is + ld r9, [sp, PT_orig_r8] ; get orig_r8 to make sure it is brne r9, orig_r8_IS_IRQ2, 149f ; infact a L2 ISR ret path ld r9, [sp, PT_status32] ; get statu32_l2 (saved in pt_regs) From 16a06df257641d466895454f0320b8ccb1583b21 Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Tue, 20 Aug 2013 13:38:11 +0530 Subject: [PATCH 0594/1368] ARC: gdbserver breakage in Big-Endian configuration #2 [Based on mainline commit 352c1d95e3220d0: "ARC: stop using pt_regs->orig_r8"] Stop using orig_r8 as it could get clobbered by ST in trap_with_param, and further it is semantically not needed either. Signed-off-by: Vineet Gupta Signed-off-by: Greg Kroah-Hartman --- arch/arc/include/asm/syscall.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/arch/arc/include/asm/syscall.h b/arch/arc/include/asm/syscall.h index 33ab3048e9b2..29de09804306 100644 --- a/arch/arc/include/asm/syscall.h +++ b/arch/arc/include/asm/syscall.h @@ -18,7 +18,7 @@ static inline long syscall_get_nr(struct task_struct *task, struct pt_regs *regs) { if (user_mode(regs) && in_syscall(regs)) - return regs->orig_r8; + return regs->r8; else return -1; } @@ -26,8 +26,7 @@ syscall_get_nr(struct task_struct *task, struct pt_regs *regs) static inline void syscall_rollback(struct task_struct *task, struct pt_regs *regs) { - /* XXX: I can't fathom how pt_regs->r8 will be clobbered ? */ - regs->r8 = regs->orig_r8; + regs->r0 = regs->orig_r0; } static inline long From dfc855921b899a61b9cef8e48d74441a2876e75f Mon Sep 17 00:00:00 2001 From: Nicolas Ferre Date: Thu, 18 Apr 2013 10:13:21 +0200 Subject: [PATCH 0595/1368] ARM: at91: at91sam9x5 RTC is not compatible with at91rm9200 one commit 23fb05c688a8dcb0cf6a4d8d819cffeca82e5c54 upstream. Due to a bug with RTC IMR, we cannot consider at91sam9x5 RTC compatible with the previous one. Modify DT compatibility string, even if the driver is not yet modified to take it into account. Signed-off-by: Nicolas Ferre Signed-off-by: Greg Kroah-Hartman --- arch/arm/boot/dts/at91sam9x5.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/boot/dts/at91sam9x5.dtsi b/arch/arm/boot/dts/at91sam9x5.dtsi index 1145ac330fb7..b5833d1f7cff 100644 --- a/arch/arm/boot/dts/at91sam9x5.dtsi +++ b/arch/arm/boot/dts/at91sam9x5.dtsi @@ -643,7 +643,7 @@ spi1: spi@f0004000 { }; rtc@fffffeb0 { - compatible = "atmel,at91rm9200-rtc"; + compatible = "atmel,at91sam9x5-rtc"; reg = <0xfffffeb0 0x40>; interrupts = <1 4 7>; status = "disabled"; From da989ee1c7702050741a551df6866cc1588f277b Mon Sep 17 00:00:00 2001 From: Samuel Ortiz Date: Fri, 3 May 2013 18:29:30 +0200 Subject: [PATCH 0596/1368] NFC: llcp: Fix non blocking sockets connections commit b4011239a08e7e6c2c6e970dfa9e8ecb73139261 upstream. Without the new LLCP_CONNECTING state, non blocking sockets will be woken up with a POLLHUP right after calling connect() because their state is stuck at LLCP_CLOSED. That prevents userspace from implementing any proper non blocking socket based NFC p2p client. Signed-off-by: Samuel Ortiz Signed-off-by: Greg Kroah-Hartman --- net/nfc/llcp.h | 1 + net/nfc/llcp_sock.c | 8 +++++--- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/net/nfc/llcp.h b/net/nfc/llcp.h index ff8c434f7df8..f924dd209b31 100644 --- a/net/nfc/llcp.h +++ b/net/nfc/llcp.h @@ -19,6 +19,7 @@ enum llcp_state { LLCP_CONNECTED = 1, /* wait_for_packet() wants that */ + LLCP_CONNECTING, LLCP_CLOSED, LLCP_BOUND, LLCP_LISTEN, diff --git a/net/nfc/llcp_sock.c b/net/nfc/llcp_sock.c index 380253eccb74..7522c3708723 100644 --- a/net/nfc/llcp_sock.c +++ b/net/nfc/llcp_sock.c @@ -571,7 +571,7 @@ static unsigned int llcp_sock_poll(struct file *file, struct socket *sock, if (sk->sk_shutdown == SHUTDOWN_MASK) mask |= POLLHUP; - if (sock_writeable(sk)) + if (sock_writeable(sk) && sk->sk_state == LLCP_CONNECTED) mask |= POLLOUT | POLLWRNORM | POLLWRBAND; else set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags); @@ -722,14 +722,16 @@ static int llcp_sock_connect(struct socket *sock, struct sockaddr *_addr, if (ret) goto sock_unlink; + sk->sk_state = LLCP_CONNECTING; + ret = sock_wait_state(sk, LLCP_CONNECTED, sock_sndtimeo(sk, flags & O_NONBLOCK)); - if (ret) + if (ret && ret != -EINPROGRESS) goto sock_unlink; release_sock(sk); - return 0; + return ret; sock_unlink: nfc_llcp_put_ssap(local, llcp_sock->ssap); From 446b977ec00db5f4bfb4187bca300dd38bfe7e7d Mon Sep 17 00:00:00 2001 From: Emmanuel Grumbach Date: Sun, 26 May 2013 20:47:53 +0300 Subject: [PATCH 0597/1368] iwlwifi: mvm: correctly configure MCAST in AP mode commit 86a91ec757338edbce51de5dabd7afb0366f485c upstream. The AP mode needs to use the MCAST fifo for the MCAST frames sent after the DTIM. This fifo needs to be configured with the same parameters as the VOICE FIFO. A separate SCD queue is mapped to this fifo - the cab_queue (cab stands for Content After Beacon). This queue isn't connected to any station, but rather to the MAC context. This queue should (and is already) be set as the MCAST queue - this is part of the of MAC context command. Signed-off-by: Emmanuel Grumbach Reviewed-by: Ilan Peer Signed-off-by: Johannes Berg Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/iwlwifi/mvm/fw-api-mac.h | 8 +++++++- drivers/net/wireless/iwlwifi/mvm/mac-ctxt.c | 6 +++++- drivers/net/wireless/iwlwifi/mvm/mvm.h | 1 + 3 files changed, 13 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/iwlwifi/mvm/fw-api-mac.h b/drivers/net/wireless/iwlwifi/mvm/fw-api-mac.h index d68640ea41d4..98b1feb43d38 100644 --- a/drivers/net/wireless/iwlwifi/mvm/fw-api-mac.h +++ b/drivers/net/wireless/iwlwifi/mvm/fw-api-mac.h @@ -71,7 +71,13 @@ #define MAC_INDEX_MIN_DRIVER 0 #define NUM_MAC_INDEX_DRIVER MAC_INDEX_AUX -#define AC_NUM 4 /* Number of access categories */ +enum iwl_ac { + AC_BK, + AC_BE, + AC_VI, + AC_VO, + AC_NUM, +}; /** * enum iwl_mac_protection_flags - MAC context flags diff --git a/drivers/net/wireless/iwlwifi/mvm/mac-ctxt.c b/drivers/net/wireless/iwlwifi/mvm/mac-ctxt.c index b2cc3d98e0f7..3b69045e3c8d 100644 --- a/drivers/net/wireless/iwlwifi/mvm/mac-ctxt.c +++ b/drivers/net/wireless/iwlwifi/mvm/mac-ctxt.c @@ -362,7 +362,7 @@ int iwl_mvm_mac_ctxt_init(struct iwl_mvm *mvm, struct ieee80211_vif *vif) break; case NL80211_IFTYPE_AP: iwl_trans_ac_txq_enable(mvm->trans, vif->cab_queue, - IWL_MVM_TX_FIFO_VO); + IWL_MVM_TX_FIFO_MCAST); /* fall through */ default: for (ac = 0; ac < IEEE80211_NUM_ACS; ac++) @@ -550,6 +550,10 @@ static void iwl_mvm_mac_ctxt_cmd_common(struct iwl_mvm *mvm, cmd->ac[i].fifos_mask = BIT(iwl_mvm_ac_to_tx_fifo[i]); } + /* in AP mode, the MCAST FIFO takes the EDCA params from VO */ + if (vif->type == NL80211_IFTYPE_AP) + cmd->ac[AC_VO].fifos_mask |= BIT(IWL_MVM_TX_FIFO_MCAST); + if (vif->bss_conf.qos) cmd->qos_flags |= cpu_to_le32(MAC_QOS_FLG_UPDATE_EDCA); diff --git a/drivers/net/wireless/iwlwifi/mvm/mvm.h b/drivers/net/wireless/iwlwifi/mvm/mvm.h index 9f46b23801bc..80862319c42a 100644 --- a/drivers/net/wireless/iwlwifi/mvm/mvm.h +++ b/drivers/net/wireless/iwlwifi/mvm/mvm.h @@ -88,6 +88,7 @@ enum iwl_mvm_tx_fifo { IWL_MVM_TX_FIFO_BE, IWL_MVM_TX_FIFO_VI, IWL_MVM_TX_FIFO_VO, + IWL_MVM_TX_FIFO_MCAST = 5, }; extern struct ieee80211_ops iwl_mvm_hw_ops; From 21e0f1b26a6ef640c02f09c4390057ee726de389 Mon Sep 17 00:00:00 2001 From: Emmanuel Grumbach Date: Sun, 2 Jun 2013 19:54:01 +0300 Subject: [PATCH 0598/1368] iwlwifi: mvm: fix MCAST in AP mode commit 9116a3683902583a302ac5dcb283416d504d9bb4 upstream. In multicast, there is no retries nor RTS since there is no specific recipient that can ACK or send CTS. This means that we must not use the rate scale table for multicast frames. This true for any frame that doesn't have a valid ieee80211_sta pointer. Signed-off-by: Emmanuel Grumbach Signed-off-by: Johannes Berg Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/iwlwifi/mvm/tx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/iwlwifi/mvm/tx.c b/drivers/net/wireless/iwlwifi/mvm/tx.c index 48c1891e3df6..b9ba4e71ea4a 100644 --- a/drivers/net/wireless/iwlwifi/mvm/tx.c +++ b/drivers/net/wireless/iwlwifi/mvm/tx.c @@ -175,7 +175,7 @@ static void iwl_mvm_set_tx_cmd_rate(struct iwl_mvm *mvm, * table is controlled by LINK_QUALITY commands */ - if (ieee80211_is_data(fc)) { + if (ieee80211_is_data(fc) && sta) { tx_cmd->initial_rate_index = 0; tx_cmd->tx_flags |= cpu_to_le32(TX_CMD_FLG_STA_RATE); return; From 06a80c46e85d004a18a51529937049c533e492cd Mon Sep 17 00:00:00 2001 From: Emmanuel Grumbach Date: Sun, 9 Jun 2013 12:59:24 +0300 Subject: [PATCH 0599/1368] iwlwifi: mvm: properly tell the fw that a STA is awake commit 5af01772ee1d6e96849adf728ff837bd71b119c0 upstream. The firmware API wasn't being used correctly, fix that. Signed-off-by: Emmanuel Grumbach Signed-off-by: Johannes Berg Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/iwlwifi/mvm/sta.c | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/drivers/net/wireless/iwlwifi/mvm/sta.c b/drivers/net/wireless/iwlwifi/mvm/sta.c index 736b50bc962c..35c8ed5e58a4 100644 --- a/drivers/net/wireless/iwlwifi/mvm/sta.c +++ b/drivers/net/wireless/iwlwifi/mvm/sta.c @@ -1296,17 +1296,11 @@ void iwl_mvm_sta_modify_ps_wake(struct iwl_mvm *mvm, struct iwl_mvm_add_sta_cmd cmd = { .add_modify = STA_MODE_MODIFY, .sta_id = mvmsta->sta_id, - .modify_mask = STA_MODIFY_SLEEPING_STA_TX_COUNT, - .sleep_state_flags = cpu_to_le16(STA_SLEEP_STATE_AWAKE), + .station_flags_msk = cpu_to_le32(STA_FLG_PS), .mac_id_n_color = cpu_to_le32(mvmsta->mac_id_n_color), }; int ret; - /* - * Same modify mask for sleep_tx_count and sleep_state_flags but this - * should be fine since if we set the STA as "awake", then - * sleep_tx_count is not relevant. - */ ret = iwl_mvm_send_cmd_pdu(mvm, ADD_STA, CMD_ASYNC, sizeof(cmd), &cmd); if (ret) IWL_ERR(mvm, "Failed to send ADD_STA command (%d)\n", ret); From 0710345e2d2eca0c98b75dfe7c1a8a50c04d4829 Mon Sep 17 00:00:00 2001 From: Emmanuel Grumbach Date: Sun, 9 Jun 2013 13:00:12 +0300 Subject: [PATCH 0600/1368] iwlwifi: mvm: don't set the MCAST queue in STA's queue list commit 837fb69f10588caafc883c4473a864660e1403ce upstream. The MCAST queue should be enabled after DTIM only. According to fw API, the MCAST must not be attached to any station, but should appear in the mcast_qid of the AP's mac context only. Signed-off-by: Emmanuel Grumbach Signed-off-by: Johannes Berg Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/iwlwifi/mvm/mac-ctxt.c | 5 +---- drivers/net/wireless/iwlwifi/mvm/sta.c | 3 --- 2 files changed, 1 insertion(+), 7 deletions(-) diff --git a/drivers/net/wireless/iwlwifi/mvm/mac-ctxt.c b/drivers/net/wireless/iwlwifi/mvm/mac-ctxt.c index 3b69045e3c8d..d8e858cc8dd9 100644 --- a/drivers/net/wireless/iwlwifi/mvm/mac-ctxt.c +++ b/drivers/net/wireless/iwlwifi/mvm/mac-ctxt.c @@ -193,14 +193,11 @@ static void iwl_mvm_mac_iface_iterator(void *_data, u8 *mac, u32 iwl_mvm_mac_get_queues_mask(struct iwl_mvm *mvm, struct ieee80211_vif *vif) { - u32 qmask, ac; + u32 qmask = 0, ac; if (vif->type == NL80211_IFTYPE_P2P_DEVICE) return BIT(IWL_MVM_OFFCHANNEL_QUEUE); - qmask = (vif->cab_queue != IEEE80211_INVAL_HW_QUEUE) ? - BIT(vif->cab_queue) : 0; - for (ac = 0; ac < IEEE80211_NUM_ACS; ac++) if (vif->hw_queue[ac] != IEEE80211_INVAL_HW_QUEUE) qmask |= BIT(vif->hw_queue[ac]); diff --git a/drivers/net/wireless/iwlwifi/mvm/sta.c b/drivers/net/wireless/iwlwifi/mvm/sta.c index 35c8ed5e58a4..68f0bbe1f381 100644 --- a/drivers/net/wireless/iwlwifi/mvm/sta.c +++ b/drivers/net/wireless/iwlwifi/mvm/sta.c @@ -226,9 +226,6 @@ int iwl_mvm_add_sta(struct iwl_mvm *mvm, if (vif->hw_queue[i] != IEEE80211_INVAL_HW_QUEUE) mvm_sta->tfd_queue_msk |= BIT(vif->hw_queue[i]); - if (vif->cab_queue != IEEE80211_INVAL_HW_QUEUE) - mvm_sta->tfd_queue_msk |= BIT(vif->cab_queue); - /* for HW restart - need to reset the seq_number etc... */ memset(mvm_sta->tid_data, 0, sizeof(mvm_sta->tid_data)); From 07e915c91b08d06db1e38241bc517e9b2e4fa3bd Mon Sep 17 00:00:00 2001 From: Emmanuel Grumbach Date: Thu, 13 Jun 2013 10:07:47 +0300 Subject: [PATCH 0601/1368] iwlwifi: mvm: take the seqno from packet if transmit failed commit ebea2f32e814445f94f9e087b646f1cf4d55fa5a upstream. The fw is unreliable in all the cases in which the packet wasn't sent. Signed-off-by: Emmanuel Grumbach Signed-off-by: Johannes Berg Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/iwlwifi/mvm/tx.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/iwlwifi/mvm/tx.c b/drivers/net/wireless/iwlwifi/mvm/tx.c index b9ba4e71ea4a..a2e6112e91e9 100644 --- a/drivers/net/wireless/iwlwifi/mvm/tx.c +++ b/drivers/net/wireless/iwlwifi/mvm/tx.c @@ -610,8 +610,8 @@ static void iwl_mvm_rx_tx_cmd_single(struct iwl_mvm *mvm, !(info->flags & IEEE80211_TX_STAT_ACK)) info->flags |= IEEE80211_TX_STAT_AMPDU_NO_BACK; - /* W/A FW bug: seq_ctl is wrong when the queue is flushed */ - if (status == TX_STATUS_FAIL_FIFO_FLUSHED) { + /* W/A FW bug: seq_ctl is wrong when the status isn't success */ + if (status != TX_STATUS_SUCCESS) { struct ieee80211_hdr *hdr = (void *)skb->data; seq_ctl = le16_to_cpu(hdr->seq_ctrl); } From 8351cafff00be058ce4db3753010632e29d104f3 Mon Sep 17 00:00:00 2001 From: Emmanuel Grumbach Date: Mon, 24 Jun 2013 15:44:03 +0300 Subject: [PATCH 0602/1368] iwlwifi: mvm: unregister leds when registration failed commit b7327d89ae694a89f9934d428bde520b77b3131c upstream. This was missing and prevented any further attempts to load the module. Signed-off-by: Emmanuel Grumbach Signed-off-by: Johannes Berg Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/iwlwifi/mvm/mac80211.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/iwlwifi/mvm/mac80211.c b/drivers/net/wireless/iwlwifi/mvm/mac80211.c index b7e95b0a42b7..f7545e06ce2a 100644 --- a/drivers/net/wireless/iwlwifi/mvm/mac80211.c +++ b/drivers/net/wireless/iwlwifi/mvm/mac80211.c @@ -243,7 +243,11 @@ int iwl_mvm_mac_setup_register(struct iwl_mvm *mvm) if (ret) return ret; - return ieee80211_register_hw(mvm->hw); + ret = ieee80211_register_hw(mvm->hw); + if (ret) + iwl_mvm_leds_exit(mvm); + + return ret; } static void iwl_mvm_mac_tx(struct ieee80211_hw *hw, From a76139c02f366c601a6895c1a1c1c6d26ec7d469 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 13 Jun 2013 16:06:08 +0200 Subject: [PATCH 0603/1368] iwlwifi: bump required firmware API version for 3160/7260 commit a2d0909a687b4d250cc2b7481072e361678745ba upstream. As the firmware API has changed significantly and we don't have support code for the old APIs, bump the version to be able to release the version 7 API firmware. Unfortunately this means that the driver in 3.9 and 3.10 can't work, but that's still better than crashing the device/driver there. Signed-off-by: Johannes Berg Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/iwlwifi/iwl-7000.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/net/wireless/iwlwifi/iwl-7000.c b/drivers/net/wireless/iwlwifi/iwl-7000.c index 50263e87fe15..dc94d44d95cd 100644 --- a/drivers/net/wireless/iwlwifi/iwl-7000.c +++ b/drivers/net/wireless/iwlwifi/iwl-7000.c @@ -67,16 +67,16 @@ #include "iwl-agn-hw.h" /* Highest firmware API version supported */ -#define IWL7260_UCODE_API_MAX 6 -#define IWL3160_UCODE_API_MAX 6 +#define IWL7260_UCODE_API_MAX 7 +#define IWL3160_UCODE_API_MAX 7 /* Oldest version we won't warn about */ -#define IWL7260_UCODE_API_OK 6 -#define IWL3160_UCODE_API_OK 6 +#define IWL7260_UCODE_API_OK 7 +#define IWL3160_UCODE_API_OK 7 /* Lowest firmware API version supported */ -#define IWL7260_UCODE_API_MIN 6 -#define IWL3160_UCODE_API_MIN 6 +#define IWL7260_UCODE_API_MIN 7 +#define IWL3160_UCODE_API_MIN 7 /* NVM versions */ #define IWL7260_NVM_VERSION 0x0a1d From b460440f81f11175bdc657ee28a2421f8a02d209 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 15 May 2013 11:44:49 +0200 Subject: [PATCH 0604/1368] iwlwifi: mvm: adjust firmware D3 configuration API commit dfcb4c3aacedee6838e436fb575b31e138505203 upstream. The D3 firmware API changed to include a new field, adjust the driver to it to avoid getting an NMI when configuring. Signed-off-by: Johannes Berg Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/iwlwifi/mvm/fw-api-d3.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/iwlwifi/mvm/fw-api-d3.h b/drivers/net/wireless/iwlwifi/mvm/fw-api-d3.h index 51e015d1dfb2..6f8b2c16ae17 100644 --- a/drivers/net/wireless/iwlwifi/mvm/fw-api-d3.h +++ b/drivers/net/wireless/iwlwifi/mvm/fw-api-d3.h @@ -75,13 +75,15 @@ enum iwl_d3_wakeup_flags { * struct iwl_d3_manager_config - D3 manager configuration command * @min_sleep_time: minimum sleep time (in usec) * @wakeup_flags: wakeup flags, see &enum iwl_d3_wakeup_flags + * @wakeup_host_timer: force wakeup after this many seconds * * The structure is used for the D3_CONFIG_CMD command. */ struct iwl_d3_manager_config { __le32 min_sleep_time; __le32 wakeup_flags; -} __packed; /* D3_MANAGER_CONFIG_CMD_S_VER_3 */ + __le32 wakeup_host_timer; +} __packed; /* D3_MANAGER_CONFIG_CMD_S_VER_4 */ /* TODO: OFFLOADS_QUERY_API_S_VER_1 */ From b0cfaffa7e183719436a7a2b1b814afb5748c05c Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Fri, 7 Jun 2013 15:07:48 +0900 Subject: [PATCH 0605/1368] tracing: Do not call kmem_cache_free() on allocation failure commit aaf6ac0f0871cb7fc0f28f3a00edf329bc7adc29 upstream. There's no point calling it when _alloc() failed. Link: http://lkml.kernel.org/r/1370585268-29169-1-git-send-email-namhyung@kernel.org Signed-off-by: Namhyung Kim Signed-off-by: Steven Rostedt Signed-off-by: Greg Kroah-Hartman --- kernel/trace/trace_events.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index 69532630a2d6..5f9a002d17d1 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -114,7 +114,7 @@ static int __trace_define_field(struct list_head *head, const char *type, field = kmem_cache_alloc(field_cachep, GFP_TRACE); if (!field) - goto err; + return -ENOMEM; field->name = name; field->type = type; @@ -131,11 +131,6 @@ static int __trace_define_field(struct list_head *head, const char *type, list_add(&field->link, head); return 0; - -err: - kmem_cache_free(field_cachep, field); - - return -ENOMEM; } int trace_define_field(struct ftrace_event_call *call, const char *type, From e899e7e4f83ecd606e85d355410d724ec76542cb Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Tue, 9 Jul 2013 18:35:26 +0900 Subject: [PATCH 0606/1368] tracing/kprobe: Wait for disabling all running kprobe handlers commit a232e270dcb55a70ad3241bc6fc160fd9b5c9e6c upstream. Wait for disabling all running kprobe handlers when a kprobe event is disabled, since the caller, trace_remove_event_call() supposes that a removing event is disabled completely by disabling the event. With this change, ftrace can ensure that there is no running event handlers after disabling it. Link: http://lkml.kernel.org/r/20130709093526.20138.93100.stgit@mhiramat-M0-7522 Signed-off-by: Masami Hiramatsu Signed-off-by: Steven Rostedt Signed-off-by: Greg Kroah-Hartman --- kernel/trace/trace_kprobe.c | 21 +++++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index 9f46e98ba8f2..c209c6ec34ca 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -281,6 +281,8 @@ trace_probe_file_index(struct trace_probe *tp, struct ftrace_event_file *file) static int disable_trace_probe(struct trace_probe *tp, struct ftrace_event_file *file) { + struct ftrace_event_file **old = NULL; + int wait = 0; int ret = 0; mutex_lock(&probe_enable_lock); @@ -314,10 +316,7 @@ disable_trace_probe(struct trace_probe *tp, struct ftrace_event_file *file) } rcu_assign_pointer(tp->files, new); - - /* Make sure the probe is done with old files */ - synchronize_sched(); - kfree(old); + wait = 1; } else tp->flags &= ~TP_FLAG_PROFILE; @@ -326,11 +325,25 @@ disable_trace_probe(struct trace_probe *tp, struct ftrace_event_file *file) disable_kretprobe(&tp->rp); else disable_kprobe(&tp->rp.kp); + wait = 1; } out_unlock: mutex_unlock(&probe_enable_lock); + if (wait) { + /* + * Synchronize with kprobe_trace_func/kretprobe_trace_func + * to ensure disabled (all running handlers are finished). + * This is not only for kfree(), but also the caller, + * trace_remove_event_call() supposes it for releasing + * event_call related objects, which will be accessed in + * the kprobe_trace_func/kretprobe_trace_func. + */ + synchronize_sched(); + kfree(old); /* Ignored if link == NULL */ + } + return ret; } From e2d4dbe02175e54616fe52c4e9c367199f8ecdaa Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Tue, 23 Jul 2013 17:25:54 +0200 Subject: [PATCH 0607/1368] tracing: Introduce trace_create_cpu_file() and tracing_get_cpu() commit 649e9c70da6bfbeb563193a35d3424a5aa7c0d38 upstream. Every "file_operations" used by tracing_init_debugfs_percpu is buggy. f_op->open/etc does: 1. struct trace_cpu *tc = inode->i_private; struct trace_array *tr = tc->tr; 2. trace_array_get(tr) or fail; 3. do_something(tc); But tc (and tr) can be already freed before trace_array_get() is called. And it doesn't matter whether this file is per-cpu or it was created by init_tracer_debugfs(), free_percpu() or kfree() are equally bad. Note that even 1. is not safe, the freed memory can be unmapped. But even if it was safe trace_array_get() can wrongly succeed if we also race with the next new_instance_create() which can re-allocate the same tr, or tc was overwritten and ->tr points to the valid tr. In this case 3. uses the freed/reused memory. Add the new trivial helper, trace_create_cpu_file() which simply calls trace_create_file() and encodes "cpu" in "struct inode". Another helper, tracing_get_cpu() will be used to read cpu_nr-or-RING_BUFFER_ALL_CPUS. The patch abuses ->i_cdev to encode the number, it is never used unless the file is S_ISCHR(). But we could use something else, say, i_bytes or even ->d_fsdata. In any case this hack is hidden inside these 2 helpers, it would be trivial to change them if needed. This patch only changes tracing_init_debugfs_percpu() to use the new trace_create_cpu_file(), the next patches will change file_operations. Note: tracing_get_cpu(inode) is always safe but you can't trust the result unless trace_array_get() was called, without trace_types_lock which acts as a barrier it can wrongly return RING_BUFFER_ALL_CPUS. Link: http://lkml.kernel.org/r/20130723152554.GA23710@redhat.com Cc: Al Viro Signed-off-by: Oleg Nesterov Signed-off-by: Steven Rostedt Signed-off-by: Greg Kroah-Hartman --- kernel/trace/trace.c | 50 +++++++++++++++++++++++++++++++------------- 1 file changed, 36 insertions(+), 14 deletions(-) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 06a5bcef373f..f064dfb973fe 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -2834,6 +2834,17 @@ static int s_show(struct seq_file *m, void *v) return 0; } +/* + * Should be used after trace_array_get(), trace_types_lock + * ensures that i_cdev was already initialized. + */ +static inline int tracing_get_cpu(struct inode *inode) +{ + if (inode->i_cdev) /* See trace_create_cpu_file() */ + return (long)inode->i_cdev - 1; + return RING_BUFFER_ALL_CPUS; +} + static const struct seq_operations tracer_seq_ops = { .start = s_start, .next = s_next, @@ -5521,6 +5532,17 @@ static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu) return tr->percpu_dir; } +static struct dentry * +trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent, + void *data, long cpu, const struct file_operations *fops) +{ + struct dentry *ret = trace_create_file(name, mode, parent, data, fops); + + if (ret) /* See tracing_get_cpu() */ + ret->d_inode->i_cdev = (void *)(cpu + 1); + return ret; +} + static void tracing_init_debugfs_percpu(struct trace_array *tr, long cpu) { @@ -5540,28 +5562,28 @@ tracing_init_debugfs_percpu(struct trace_array *tr, long cpu) } /* per cpu trace_pipe */ - trace_create_file("trace_pipe", 0444, d_cpu, - (void *)&data->trace_cpu, &tracing_pipe_fops); + trace_create_cpu_file("trace_pipe", 0444, d_cpu, + &data->trace_cpu, cpu, &tracing_pipe_fops); /* per cpu trace */ - trace_create_file("trace", 0644, d_cpu, - (void *)&data->trace_cpu, &tracing_fops); + trace_create_cpu_file("trace", 0644, d_cpu, + &data->trace_cpu, cpu, &tracing_fops); - trace_create_file("trace_pipe_raw", 0444, d_cpu, - (void *)&data->trace_cpu, &tracing_buffers_fops); + trace_create_cpu_file("trace_pipe_raw", 0444, d_cpu, + &data->trace_cpu, cpu, &tracing_buffers_fops); - trace_create_file("stats", 0444, d_cpu, - (void *)&data->trace_cpu, &tracing_stats_fops); + trace_create_cpu_file("stats", 0444, d_cpu, + &data->trace_cpu, cpu, &tracing_stats_fops); - trace_create_file("buffer_size_kb", 0444, d_cpu, - (void *)&data->trace_cpu, &tracing_entries_fops); + trace_create_cpu_file("buffer_size_kb", 0444, d_cpu, + &data->trace_cpu, cpu, &tracing_entries_fops); #ifdef CONFIG_TRACER_SNAPSHOT - trace_create_file("snapshot", 0644, d_cpu, - (void *)&data->trace_cpu, &snapshot_fops); + trace_create_cpu_file("snapshot", 0644, d_cpu, + &data->trace_cpu, cpu, &snapshot_fops); - trace_create_file("snapshot_raw", 0444, d_cpu, - (void *)&data->trace_cpu, &snapshot_raw_fops); + trace_create_cpu_file("snapshot_raw", 0444, d_cpu, + &data->trace_cpu, cpu, &snapshot_raw_fops); #endif } From 23b625fa111e7358ce84672c4444c068b2b1e204 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Tue, 23 Jul 2013 17:25:57 +0200 Subject: [PATCH 0608/1368] tracing: Change tracing_pipe_fops() to rely on tracing_get_cpu() commit 15544209cb0b5312e5220a9337a1fe61d1a1f2d9 upstream. tracing_open_pipe() is racy, the memory inode->i_private points to can be already freed. Change debugfs_create_file("trace_pipe", data) callers to to pass "data = tr", tracing_open_pipe() can use tracing_get_cpu(). Link: http://lkml.kernel.org/r/20130723152557.GA23717@redhat.com Signed-off-by: Oleg Nesterov Signed-off-by: Steven Rostedt Signed-off-by: Greg Kroah-Hartman --- kernel/trace/trace.c | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index f064dfb973fe..8e8bb0e25489 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -3950,8 +3950,7 @@ tracing_max_lat_write(struct file *filp, const char __user *ubuf, static int tracing_open_pipe(struct inode *inode, struct file *filp) { - struct trace_cpu *tc = inode->i_private; - struct trace_array *tr = tc->tr; + struct trace_array *tr = inode->i_private; struct trace_iterator *iter; int ret = 0; @@ -3997,9 +3996,9 @@ static int tracing_open_pipe(struct inode *inode, struct file *filp) if (trace_clocks[tr->clock_id].in_ns) iter->iter_flags |= TRACE_FILE_TIME_IN_NS; - iter->cpu_file = tc->cpu; - iter->tr = tc->tr; - iter->trace_buffer = &tc->tr->trace_buffer; + iter->tr = tr; + iter->trace_buffer = &tr->trace_buffer; + iter->cpu_file = tracing_get_cpu(inode); mutex_init(&iter->mutex); filp->private_data = iter; @@ -4022,8 +4021,7 @@ static int tracing_open_pipe(struct inode *inode, struct file *filp) static int tracing_release_pipe(struct inode *inode, struct file *file) { struct trace_iterator *iter = file->private_data; - struct trace_cpu *tc = inode->i_private; - struct trace_array *tr = tc->tr; + struct trace_array *tr = inode->i_private; mutex_lock(&trace_types_lock); @@ -5563,7 +5561,7 @@ tracing_init_debugfs_percpu(struct trace_array *tr, long cpu) /* per cpu trace_pipe */ trace_create_cpu_file("trace_pipe", 0444, d_cpu, - &data->trace_cpu, cpu, &tracing_pipe_fops); + tr, cpu, &tracing_pipe_fops); /* per cpu trace */ trace_create_cpu_file("trace", 0644, d_cpu, @@ -6149,7 +6147,7 @@ init_tracer_debugfs(struct trace_array *tr, struct dentry *d_tracer) (void *)&tr->trace_cpu, &tracing_fops); trace_create_file("trace_pipe", 0444, d_tracer, - (void *)&tr->trace_cpu, &tracing_pipe_fops); + tr, &tracing_pipe_fops); trace_create_file("buffer_size_kb", 0644, d_tracer, (void *)&tr->trace_cpu, &tracing_entries_fops); From 92b6279e284257fb7908bf4d24956f5af13aaa2d Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Tue, 23 Jul 2013 17:26:00 +0200 Subject: [PATCH 0609/1368] tracing: Change tracing_buffers_fops to rely on tracing_get_cpu() commit 46ef2be0d1d5ccea0c41bb606143586daadd537c upstream. tracing_buffers_open() is racy, the memory inode->i_private points to can be already freed. Change debugfs_create_file("trace_pipe_raw", data) caller to pass "data = tr", tracing_buffers_open() can use tracing_get_cpu(). Change debugfs_create_file("snapshot_raw_fops", data) caller too, this file uses tracing_buffers_open/release. Link: http://lkml.kernel.org/r/20130723152600.GA23720@redhat.com Signed-off-by: Oleg Nesterov Signed-off-by: Steven Rostedt Signed-off-by: Greg Kroah-Hartman --- kernel/trace/trace.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 8e8bb0e25489..7dcd67332f57 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -4941,8 +4941,7 @@ static const struct file_operations snapshot_raw_fops = { static int tracing_buffers_open(struct inode *inode, struct file *filp) { - struct trace_cpu *tc = inode->i_private; - struct trace_array *tr = tc->tr; + struct trace_array *tr = inode->i_private; struct ftrace_buffer_info *info; int ret; @@ -4961,7 +4960,7 @@ static int tracing_buffers_open(struct inode *inode, struct file *filp) mutex_lock(&trace_types_lock); info->iter.tr = tr; - info->iter.cpu_file = tc->cpu; + info->iter.cpu_file = tracing_get_cpu(inode); info->iter.trace = tr->current_trace; info->iter.trace_buffer = &tr->trace_buffer; info->spare = NULL; @@ -5568,7 +5567,7 @@ tracing_init_debugfs_percpu(struct trace_array *tr, long cpu) &data->trace_cpu, cpu, &tracing_fops); trace_create_cpu_file("trace_pipe_raw", 0444, d_cpu, - &data->trace_cpu, cpu, &tracing_buffers_fops); + tr, cpu, &tracing_buffers_fops); trace_create_cpu_file("stats", 0444, d_cpu, &data->trace_cpu, cpu, &tracing_stats_fops); @@ -5581,7 +5580,7 @@ tracing_init_debugfs_percpu(struct trace_array *tr, long cpu) &data->trace_cpu, cpu, &snapshot_fops); trace_create_cpu_file("snapshot_raw", 0444, d_cpu, - &data->trace_cpu, cpu, &snapshot_raw_fops); + tr, cpu, &snapshot_raw_fops); #endif } From 6dafb0f13798c998ec344bf910b2d89c51961481 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Tue, 23 Jul 2013 17:26:03 +0200 Subject: [PATCH 0610/1368] tracing: Change tracing_stats_fops to rely on tracing_get_cpu() commit 4d3435b8a4c3357695e09c5e7a3bf73a19fca5b0 upstream. tracing_open_generic_tc() is racy, the memory inode->i_private points to can be already freed. 1. Change one of its users, tracing_stats_fops, to use tracing_*_generic_tr() instead. 2. Change trace_create_cpu_file("stats", data) to pass "data = tr". 3. Change tracing_stats_read() to use tracing_get_cpu(). Link: http://lkml.kernel.org/r/20130723152603.GA23727@redhat.com Signed-off-by: Oleg Nesterov Signed-off-by: Steven Rostedt Signed-off-by: Greg Kroah-Hartman --- kernel/trace/trace.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 7dcd67332f57..801d72b84a2a 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -2973,7 +2973,6 @@ int tracing_open_generic_tr(struct inode *inode, struct file *filp) filp->private_data = inode->i_private; return 0; - } int tracing_open_generic_tc(struct inode *inode, struct file *filp) @@ -5277,14 +5276,14 @@ static ssize_t tracing_stats_read(struct file *filp, char __user *ubuf, size_t count, loff_t *ppos) { - struct trace_cpu *tc = filp->private_data; - struct trace_array *tr = tc->tr; + struct inode *inode = file_inode(filp); + struct trace_array *tr = inode->i_private; struct trace_buffer *trace_buf = &tr->trace_buffer; + int cpu = tracing_get_cpu(inode); struct trace_seq *s; unsigned long cnt; unsigned long long t; unsigned long usec_rem; - int cpu = tc->cpu; s = kmalloc(sizeof(*s), GFP_KERNEL); if (!s) @@ -5337,10 +5336,10 @@ tracing_stats_read(struct file *filp, char __user *ubuf, } static const struct file_operations tracing_stats_fops = { - .open = tracing_open_generic_tc, + .open = tracing_open_generic_tr, .read = tracing_stats_read, .llseek = generic_file_llseek, - .release = tracing_release_generic_tc, + .release = tracing_release_generic_tr, }; #ifdef CONFIG_DYNAMIC_FTRACE @@ -5570,7 +5569,7 @@ tracing_init_debugfs_percpu(struct trace_array *tr, long cpu) tr, cpu, &tracing_buffers_fops); trace_create_cpu_file("stats", 0444, d_cpu, - &data->trace_cpu, cpu, &tracing_stats_fops); + tr, cpu, &tracing_stats_fops); trace_create_cpu_file("buffer_size_kb", 0444, d_cpu, &data->trace_cpu, cpu, &tracing_entries_fops); From e5dd03c411713c066b94e1d448359e5216115c1c Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Tue, 23 Jul 2013 17:26:06 +0200 Subject: [PATCH 0611/1368] tracing: Change tracing_entries_fops to rely on tracing_get_cpu() commit 0bc392ee46d0fd8e6b678457ef71f074f19a03c5 upstream. tracing_open_generic_tc() is racy, the memory inode->i_private points to can be already freed. 1. Change its last user, tracing_entries_fops, to use tracing_*_generic_tr() instead. 2. Change debugfs_create_file("buffer_size_kb", data) callers to pass "data = tr". 3. Change tracing_entries_read() and tracing_entries_write() to use tracing_get_cpu(). 4. Kill the no longer used tracing_open_generic_tc() and tracing_release_generic_tc(). Link: http://lkml.kernel.org/r/20130723152606.GA23730@redhat.com Signed-off-by: Oleg Nesterov Signed-off-by: Steven Rostedt Signed-off-by: Greg Kroah-Hartman --- kernel/trace/trace.c | 49 +++++++++++--------------------------------- 1 file changed, 12 insertions(+), 37 deletions(-) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 801d72b84a2a..83fba3c576e8 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -2975,23 +2975,6 @@ int tracing_open_generic_tr(struct inode *inode, struct file *filp) return 0; } -int tracing_open_generic_tc(struct inode *inode, struct file *filp) -{ - struct trace_cpu *tc = inode->i_private; - struct trace_array *tr = tc->tr; - - if (tracing_disabled) - return -ENODEV; - - if (trace_array_get(tr) < 0) - return -ENODEV; - - filp->private_data = inode->i_private; - - return 0; - -} - static int tracing_release(struct inode *inode, struct file *file) { struct seq_file *m = file->private_data; @@ -3045,15 +3028,6 @@ static int tracing_release_generic_tr(struct inode *inode, struct file *file) return 0; } -static int tracing_release_generic_tc(struct inode *inode, struct file *file) -{ - struct trace_cpu *tc = inode->i_private; - struct trace_array *tr = tc->tr; - - trace_array_put(tr); - return 0; -} - static int tracing_single_release_tr(struct inode *inode, struct file *file) { struct trace_array *tr = inode->i_private; @@ -4374,15 +4348,16 @@ static ssize_t tracing_entries_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos) { - struct trace_cpu *tc = filp->private_data; - struct trace_array *tr = tc->tr; + struct inode *inode = file_inode(filp); + struct trace_array *tr = inode->i_private; + int cpu = tracing_get_cpu(inode); char buf[64]; int r = 0; ssize_t ret; mutex_lock(&trace_types_lock); - if (tc->cpu == RING_BUFFER_ALL_CPUS) { + if (cpu == RING_BUFFER_ALL_CPUS) { int cpu, buf_size_same; unsigned long size; @@ -4409,7 +4384,7 @@ tracing_entries_read(struct file *filp, char __user *ubuf, } else r = sprintf(buf, "X\n"); } else - r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->trace_buffer.data, tc->cpu)->entries >> 10); + r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->trace_buffer.data, cpu)->entries >> 10); mutex_unlock(&trace_types_lock); @@ -4421,7 +4396,8 @@ static ssize_t tracing_entries_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos) { - struct trace_cpu *tc = filp->private_data; + struct inode *inode = file_inode(filp); + struct trace_array *tr = inode->i_private; unsigned long val; int ret; @@ -4435,8 +4411,7 @@ tracing_entries_write(struct file *filp, const char __user *ubuf, /* value is in KB */ val <<= 10; - - ret = tracing_resize_ring_buffer(tc->tr, val, tc->cpu); + ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode)); if (ret < 0) return ret; @@ -4884,11 +4859,11 @@ static const struct file_operations tracing_pipe_fops = { }; static const struct file_operations tracing_entries_fops = { - .open = tracing_open_generic_tc, + .open = tracing_open_generic_tr, .read = tracing_entries_read, .write = tracing_entries_write, .llseek = generic_file_llseek, - .release = tracing_release_generic_tc, + .release = tracing_release_generic_tr, }; static const struct file_operations tracing_total_entries_fops = { @@ -5572,7 +5547,7 @@ tracing_init_debugfs_percpu(struct trace_array *tr, long cpu) tr, cpu, &tracing_stats_fops); trace_create_cpu_file("buffer_size_kb", 0444, d_cpu, - &data->trace_cpu, cpu, &tracing_entries_fops); + tr, cpu, &tracing_entries_fops); #ifdef CONFIG_TRACER_SNAPSHOT trace_create_cpu_file("snapshot", 0644, d_cpu, @@ -6148,7 +6123,7 @@ init_tracer_debugfs(struct trace_array *tr, struct dentry *d_tracer) tr, &tracing_pipe_fops); trace_create_file("buffer_size_kb", 0644, d_tracer, - (void *)&tr->trace_cpu, &tracing_entries_fops); + tr, &tracing_entries_fops); trace_create_file("buffer_total_size_kb", 0444, d_tracer, tr, &tracing_total_entries_fops); From 7272711a2fc64dd6ef980630e129d8731897ee56 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Tue, 23 Jul 2013 17:26:10 +0200 Subject: [PATCH 0612/1368] tracing: Change tracing_fops/snapshot_fops to rely on tracing_get_cpu() commit 6484c71cbc170634fa131b6d022d86d61686b88b upstream. tracing_open() and tracing_snapshot_open() are racy, the memory inode->i_private points to can be already freed. Convert these last users of "inode->i_private == trace_cpu" to use "i_private = trace_array" and rely on tracing_get_cpu(). v2: incorporate the fix from Steven, tracing_release() must not blindly dereference file->private_data unless we know that the file was opened for reading. Link: http://lkml.kernel.org/r/20130723152610.GA23737@redhat.com Signed-off-by: Oleg Nesterov Signed-off-by: Steven Rostedt Signed-off-by: Greg Kroah-Hartman --- kernel/trace/trace.c | 50 +++++++++++++++++++------------------------- 1 file changed, 22 insertions(+), 28 deletions(-) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 83fba3c576e8..0582a01a81e3 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -2853,9 +2853,9 @@ static const struct seq_operations tracer_seq_ops = { }; static struct trace_iterator * -__tracing_open(struct trace_array *tr, struct trace_cpu *tc, - struct inode *inode, struct file *file, bool snapshot) +__tracing_open(struct inode *inode, struct file *file, bool snapshot) { + struct trace_array *tr = inode->i_private; struct trace_iterator *iter; int cpu; @@ -2896,8 +2896,8 @@ __tracing_open(struct trace_array *tr, struct trace_cpu *tc, iter->trace_buffer = &tr->trace_buffer; iter->snapshot = snapshot; iter->pos = -1; + iter->cpu_file = tracing_get_cpu(inode); mutex_init(&iter->mutex); - iter->cpu_file = tc->cpu; /* Notify the tracer early; before we stop tracing. */ if (iter->trace && iter->trace->open) @@ -2977,22 +2977,18 @@ int tracing_open_generic_tr(struct inode *inode, struct file *filp) static int tracing_release(struct inode *inode, struct file *file) { + struct trace_array *tr = inode->i_private; struct seq_file *m = file->private_data; struct trace_iterator *iter; - struct trace_array *tr; int cpu; - /* Writes do not use seq_file, need to grab tr from inode */ if (!(file->f_mode & FMODE_READ)) { - struct trace_cpu *tc = inode->i_private; - - trace_array_put(tc->tr); + trace_array_put(tr); return 0; } + /* Writes do not use seq_file */ iter = m->private; - tr = iter->tr; - mutex_lock(&trace_types_lock); for_each_tracing_cpu(cpu) { @@ -3039,8 +3035,7 @@ static int tracing_single_release_tr(struct inode *inode, struct file *file) static int tracing_open(struct inode *inode, struct file *file) { - struct trace_cpu *tc = inode->i_private; - struct trace_array *tr = tc->tr; + struct trace_array *tr = inode->i_private; struct trace_iterator *iter; int ret = 0; @@ -3048,16 +3043,17 @@ static int tracing_open(struct inode *inode, struct file *file) return -ENODEV; /* If this file was open for write, then erase contents */ - if ((file->f_mode & FMODE_WRITE) && - (file->f_flags & O_TRUNC)) { - if (tc->cpu == RING_BUFFER_ALL_CPUS) + if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) { + int cpu = tracing_get_cpu(inode); + + if (cpu == RING_BUFFER_ALL_CPUS) tracing_reset_online_cpus(&tr->trace_buffer); else - tracing_reset(&tr->trace_buffer, tc->cpu); + tracing_reset(&tr->trace_buffer, cpu); } if (file->f_mode & FMODE_READ) { - iter = __tracing_open(tr, tc, inode, file, false); + iter = __tracing_open(inode, file, false); if (IS_ERR(iter)) ret = PTR_ERR(iter); else if (trace_flags & TRACE_ITER_LATENCY_FMT) @@ -4672,8 +4668,7 @@ struct ftrace_buffer_info { #ifdef CONFIG_TRACER_SNAPSHOT static int tracing_snapshot_open(struct inode *inode, struct file *file) { - struct trace_cpu *tc = inode->i_private; - struct trace_array *tr = tc->tr; + struct trace_array *tr = inode->i_private; struct trace_iterator *iter; struct seq_file *m; int ret = 0; @@ -4682,7 +4677,7 @@ static int tracing_snapshot_open(struct inode *inode, struct file *file) return -ENODEV; if (file->f_mode & FMODE_READ) { - iter = __tracing_open(tr, tc, inode, file, true); + iter = __tracing_open(inode, file, true); if (IS_ERR(iter)) ret = PTR_ERR(iter); } else { @@ -4699,8 +4694,8 @@ static int tracing_snapshot_open(struct inode *inode, struct file *file) ret = 0; iter->tr = tr; - iter->trace_buffer = &tc->tr->max_buffer; - iter->cpu_file = tc->cpu; + iter->trace_buffer = &tr->max_buffer; + iter->cpu_file = tracing_get_cpu(inode); m->private = iter; file->private_data = m; } @@ -5517,7 +5512,6 @@ trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent, static void tracing_init_debugfs_percpu(struct trace_array *tr, long cpu) { - struct trace_array_cpu *data = per_cpu_ptr(tr->trace_buffer.data, cpu); struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu); struct dentry *d_cpu; char cpu_dir[30]; /* 30 characters should be more than enough */ @@ -5538,7 +5532,7 @@ tracing_init_debugfs_percpu(struct trace_array *tr, long cpu) /* per cpu trace */ trace_create_cpu_file("trace", 0644, d_cpu, - &data->trace_cpu, cpu, &tracing_fops); + tr, cpu, &tracing_fops); trace_create_cpu_file("trace_pipe_raw", 0444, d_cpu, tr, cpu, &tracing_buffers_fops); @@ -5551,7 +5545,7 @@ tracing_init_debugfs_percpu(struct trace_array *tr, long cpu) #ifdef CONFIG_TRACER_SNAPSHOT trace_create_cpu_file("snapshot", 0644, d_cpu, - &data->trace_cpu, cpu, &snapshot_fops); + tr, cpu, &snapshot_fops); trace_create_cpu_file("snapshot_raw", 0444, d_cpu, tr, cpu, &snapshot_raw_fops); @@ -6117,7 +6111,7 @@ init_tracer_debugfs(struct trace_array *tr, struct dentry *d_tracer) tr, &tracing_iter_fops); trace_create_file("trace", 0644, d_tracer, - (void *)&tr->trace_cpu, &tracing_fops); + tr, &tracing_fops); trace_create_file("trace_pipe", 0444, d_tracer, tr, &tracing_pipe_fops); @@ -6138,11 +6132,11 @@ init_tracer_debugfs(struct trace_array *tr, struct dentry *d_tracer) &trace_clock_fops); trace_create_file("tracing_on", 0644, d_tracer, - tr, &rb_simple_fops); + tr, &rb_simple_fops); #ifdef CONFIG_TRACER_SNAPSHOT trace_create_file("snapshot", 0644, d_tracer, - (void *)&tr->trace_cpu, &snapshot_fops); + tr, &snapshot_fops); #endif for_each_tracing_cpu(cpu) From 29632b10ef02a31dec2918177f6d4244e66d635d Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Tue, 23 Jul 2013 22:06:15 -0400 Subject: [PATCH 0613/1368] ftrace: Add check for NULL regs if ops has SAVE_REGS set commit 195a8afc7ac962f8da795549fe38e825f1372b0d upstream. If a ftrace ops is registered with the SAVE_REGS flag set, and there's already a ops registered to one of its functions but without the SAVE_REGS flag, there's a small race window where the SAVE_REGS ops gets added to the list of callbacks to call for that function before the callback trampoline gets set to save the regs. The problem is, the function is not currently saving regs, which opens a small race window where the ops that is expecting regs to be passed to it, wont. This can cause a crash if the callback were to reference the regs, as the SAVE_REGS guarantees that regs will be set. To fix this, we add a check in the loop case where it checks if the ops has the SAVE_REGS flag set, and if so, it will ignore it if regs is not set. Signed-off-by: Steven Rostedt Signed-off-by: Greg Kroah-Hartman --- kernel/trace/ftrace.c | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 6c508ff33c62..13f12351b350 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -1416,12 +1416,22 @@ ftrace_hash_move(struct ftrace_ops *ops, int enable, * the hashes are freed with call_rcu_sched(). */ static int -ftrace_ops_test(struct ftrace_ops *ops, unsigned long ip) +ftrace_ops_test(struct ftrace_ops *ops, unsigned long ip, void *regs) { struct ftrace_hash *filter_hash; struct ftrace_hash *notrace_hash; int ret; +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS + /* + * There's a small race when adding ops that the ftrace handler + * that wants regs, may be called without them. We can not + * allow that handler to be called if regs is NULL. + */ + if (regs == NULL && (ops->flags & FTRACE_OPS_FL_SAVE_REGS)) + return 0; +#endif + filter_hash = rcu_dereference_raw_notrace(ops->filter_hash); notrace_hash = rcu_dereference_raw_notrace(ops->notrace_hash); @@ -4188,7 +4198,7 @@ static inline void ftrace_startup_enable(int command) { } # define ftrace_shutdown_sysctl() do { } while (0) static inline int -ftrace_ops_test(struct ftrace_ops *ops, unsigned long ip) +ftrace_ops_test(struct ftrace_ops *ops, unsigned long ip, void *regs) { return 1; } @@ -4211,7 +4221,7 @@ ftrace_ops_control_func(unsigned long ip, unsigned long parent_ip, do_for_each_ftrace_op(op, ftrace_control_list) { if (!(op->flags & FTRACE_OPS_FL_STUB) && !ftrace_function_local_disabled(op) && - ftrace_ops_test(op, ip)) + ftrace_ops_test(op, ip, regs)) op->func(ip, parent_ip, op, regs); } while_for_each_ftrace_op(op); trace_recursion_clear(TRACE_CONTROL_BIT); @@ -4244,7 +4254,7 @@ __ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip, */ preempt_disable_notrace(); do_for_each_ftrace_op(op, ftrace_ops_list) { - if (ftrace_ops_test(op, ip)) + if (ftrace_ops_test(op, ip, regs)) op->func(ip, parent_ip, op, regs); } while_for_each_ftrace_op(op); preempt_enable_notrace(); From fdb65fe265a389144db73cca023266dd6d5ff8d9 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Fri, 26 Jul 2013 19:25:32 +0200 Subject: [PATCH 0614/1368] tracing: Turn event/id->i_private into call->event.type commit 1a11126bcb7c93c289bf3218fa546fd3b0c0df8b upstream. event_id_read() is racy, ftrace_event_call can be already freed by trace_remove_event_call() callers. Change event_create_dir() to pass "data = call->event.type", this is all event_id_read() needs. ftrace_event_id_fops no longer needs tracing_open_generic(). We add the new helper, event_file_data(), to read ->i_private, it will have more users. Note: currently ACCESS_ONCE() and "id != 0" check are not needed, but we are going to change event_remove/rmdir to clear ->i_private. Link: http://lkml.kernel.org/r/20130726172532.GA3605@redhat.com Reviewed-by: Masami Hiramatsu Signed-off-by: Oleg Nesterov Signed-off-by: Steven Rostedt Signed-off-by: Greg Kroah-Hartman --- kernel/trace/trace_events.c | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index 5f9a002d17d1..fd16a254173c 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -407,6 +407,11 @@ static void put_system(struct ftrace_subsystem_dir *dir) mutex_unlock(&event_mutex); } +static void *event_file_data(struct file *filp) +{ + return ACCESS_ONCE(file_inode(filp)->i_private); +} + /* * Open and update trace_array ref count. * Must have the current trace_array passed to it. @@ -960,19 +965,22 @@ static int trace_format_open(struct inode *inode, struct file *file) static ssize_t event_id_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos) { - struct ftrace_event_call *call = filp->private_data; + int id = (long)event_file_data(filp); struct trace_seq *s; int r; if (*ppos) return 0; + if (unlikely(!id)) + return -ENODEV; + s = kmalloc(sizeof(*s), GFP_KERNEL); if (!s) return -ENOMEM; trace_seq_init(s); - trace_seq_printf(s, "%d\n", call->event.type); + trace_seq_printf(s, "%d\n", id); r = simple_read_from_buffer(ubuf, cnt, ppos, s->buffer, s->len); @@ -1263,7 +1271,6 @@ static const struct file_operations ftrace_event_format_fops = { }; static const struct file_operations ftrace_event_id_fops = { - .open = tracing_open_generic, .read = event_id_read, .llseek = default_llseek, }; @@ -1511,8 +1518,8 @@ event_create_dir(struct dentry *parent, #ifdef CONFIG_PERF_EVENTS if (call->event.type && call->class->reg) - trace_create_file("id", 0444, file->dir, call, - id); + trace_create_file("id", 0444, file->dir, + (void *)(long)call->event.type, id); #endif /* From df89bf77ca930f69ba07fd459f735ec3cac69f8f Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Fri, 26 Jul 2013 19:25:36 +0200 Subject: [PATCH 0615/1368] tracing: Change event_enable/disable_read() to verify i_private != NULL commit bc6f6b08dee5645770efb4b76186ded313f23752 upstream. tracing_open_generic_file() is racy, ftrace_event_file can be already freed by rmdir or trace_remove_event_call(). Change event_enable_read() and event_disable_read() to read and verify "file = i_private" under event_mutex. This fixes nothing, but now we can change debugfs_remove("enable") callers to nullify ->i_private and fix the the problem. Link: http://lkml.kernel.org/r/20130726172536.GA3612@redhat.com Reviewed-by: Masami Hiramatsu Signed-off-by: Oleg Nesterov Signed-off-by: Steven Rostedt Signed-off-by: Greg Kroah-Hartman --- kernel/trace/trace_events.c | 28 +++++++++++++++++++--------- 1 file changed, 19 insertions(+), 9 deletions(-) diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index fd16a254173c..e8c445d1f190 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -682,13 +682,23 @@ static ssize_t event_enable_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos) { - struct ftrace_event_file *file = filp->private_data; + struct ftrace_event_file *file; + unsigned long flags; char *buf; - if (file->flags & FTRACE_EVENT_FL_ENABLED) { - if (file->flags & FTRACE_EVENT_FL_SOFT_DISABLED) + mutex_lock(&event_mutex); + file = event_file_data(filp); + if (likely(file)) + flags = file->flags; + mutex_unlock(&event_mutex); + + if (!file) + return -ENODEV; + + if (flags & FTRACE_EVENT_FL_ENABLED) { + if (flags & FTRACE_EVENT_FL_SOFT_DISABLED) buf = "0*\n"; - else if (file->flags & FTRACE_EVENT_FL_SOFT_MODE) + else if (flags & FTRACE_EVENT_FL_SOFT_MODE) buf = "1*\n"; else buf = "1\n"; @@ -702,13 +712,10 @@ static ssize_t event_enable_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos) { - struct ftrace_event_file *file = filp->private_data; + struct ftrace_event_file *file; unsigned long val; int ret; - if (!file) - return -EINVAL; - ret = kstrtoul_from_user(ubuf, cnt, 10, &val); if (ret) return ret; @@ -720,8 +727,11 @@ event_enable_write(struct file *filp, const char __user *ubuf, size_t cnt, switch (val) { case 0: case 1: + ret = -ENODEV; mutex_lock(&event_mutex); - ret = ftrace_event_enable_disable(file, val); + file = event_file_data(filp); + if (likely(file)) + ret = ftrace_event_enable_disable(file, val); mutex_unlock(&event_mutex); break; From 70c91fb9a74e7937ef76a542a86c1551d5df4281 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Fri, 26 Jul 2013 19:25:40 +0200 Subject: [PATCH 0616/1368] tracing: Change event_filter_read/write to verify i_private != NULL commit e2912b091c26b8ea95e5e00a43a7ac620f6c94a6 upstream. event_filter_read/write() are racy, ftrace_event_call can be already freed by trace_remove_event_call() callers. 1. Shift mutex_lock(event_mutex) from print/apply_event_filter to the callers. 2. Change the callers, event_filter_read() and event_filter_write() to read i_private under this mutex and abort if it is NULL. This fixes nothing, but now we can change debugfs_remove("filter") callers to nullify ->i_private and fix the the problem. Link: http://lkml.kernel.org/r/20130726172540.GA3619@redhat.com Reviewed-by: Masami Hiramatsu Signed-off-by: Oleg Nesterov Signed-off-by: Steven Rostedt Signed-off-by: Greg Kroah-Hartman --- kernel/trace/trace_events.c | 26 +++++++++++++++++++------- kernel/trace/trace_events_filter.c | 17 ++++++----------- 2 files changed, 25 insertions(+), 18 deletions(-) diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index e8c445d1f190..285589354cb7 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -1002,21 +1002,28 @@ static ssize_t event_filter_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos) { - struct ftrace_event_call *call = filp->private_data; + struct ftrace_event_call *call; struct trace_seq *s; - int r; + int r = -ENODEV; if (*ppos) return 0; s = kmalloc(sizeof(*s), GFP_KERNEL); + if (!s) return -ENOMEM; trace_seq_init(s); - print_event_filter(call, s); - r = simple_read_from_buffer(ubuf, cnt, ppos, s->buffer, s->len); + mutex_lock(&event_mutex); + call = event_file_data(filp); + if (call) + print_event_filter(call, s); + mutex_unlock(&event_mutex); + + if (call) + r = simple_read_from_buffer(ubuf, cnt, ppos, s->buffer, s->len); kfree(s); @@ -1027,9 +1034,9 @@ static ssize_t event_filter_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos) { - struct ftrace_event_call *call = filp->private_data; + struct ftrace_event_call *call; char *buf; - int err; + int err = -ENODEV; if (cnt >= PAGE_SIZE) return -EINVAL; @@ -1044,7 +1051,12 @@ event_filter_write(struct file *filp, const char __user *ubuf, size_t cnt, } buf[cnt] = '\0'; - err = apply_event_filter(call, buf); + mutex_lock(&event_mutex); + call = event_file_data(filp); + if (call) + err = apply_event_filter(call, buf); + mutex_unlock(&event_mutex); + free_page((unsigned long) buf); if (err < 0) return err; diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c index e1b653f7e1ca..0a1edc694d67 100644 --- a/kernel/trace/trace_events_filter.c +++ b/kernel/trace/trace_events_filter.c @@ -631,17 +631,15 @@ static void append_filter_err(struct filter_parse_state *ps, free_page((unsigned long) buf); } +/* caller must hold event_mutex */ void print_event_filter(struct ftrace_event_call *call, struct trace_seq *s) { - struct event_filter *filter; + struct event_filter *filter = call->filter; - mutex_lock(&event_mutex); - filter = call->filter; if (filter && filter->filter_string) trace_seq_printf(s, "%s\n", filter->filter_string); else trace_seq_printf(s, "none\n"); - mutex_unlock(&event_mutex); } void print_subsystem_event_filter(struct event_subsystem *system, @@ -1835,23 +1833,22 @@ static int create_system_filter(struct event_subsystem *system, return err; } +/* caller must hold event_mutex */ int apply_event_filter(struct ftrace_event_call *call, char *filter_string) { struct event_filter *filter; - int err = 0; - - mutex_lock(&event_mutex); + int err; if (!strcmp(strstrip(filter_string), "0")) { filter_disable(call); filter = call->filter; if (!filter) - goto out_unlock; + return 0; RCU_INIT_POINTER(call->filter, NULL); /* Make sure the filter is not being used */ synchronize_sched(); __free_filter(filter); - goto out_unlock; + return 0; } err = create_filter(call, filter_string, true, &filter); @@ -1878,8 +1875,6 @@ int apply_event_filter(struct ftrace_event_call *call, char *filter_string) __free_filter(tmp); } } -out_unlock: - mutex_unlock(&event_mutex); return err; } From b86d0ba62decb830aed2fa525e7557857d3199f2 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Fri, 26 Jul 2013 19:25:43 +0200 Subject: [PATCH 0617/1368] tracing: Change f_start() to take event_mutex and verify i_private != NULL commit c5a44a1200c6eda2202434f25325e8ad19533fca upstream. trace_format_open() and trace_format_seq_ops are racy, nothing protects ftrace_event_call from trace_remove_event_call(). Change f_start() to take event_mutex and verify i_private != NULL, change f_stop() to drop this lock. This fixes nothing, but now we can change debugfs_remove("format") callers to nullify ->i_private and fix the the problem. Note: the usage of event_mutex is sub-optimal but simple, we can change this later. Link: http://lkml.kernel.org/r/20130726172543.GA3622@redhat.com Reviewed-by: Masami Hiramatsu Signed-off-by: Oleg Nesterov Signed-off-by: Steven Rostedt Signed-off-by: Greg Kroah-Hartman --- kernel/trace/trace_events.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index 285589354cb7..09782d623c93 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -838,7 +838,7 @@ enum { static void *f_next(struct seq_file *m, void *v, loff_t *pos) { - struct ftrace_event_call *call = m->private; + struct ftrace_event_call *call = event_file_data(m->private); struct ftrace_event_field *field; struct list_head *common_head = &ftrace_common_fields; struct list_head *head = trace_get_fields(call); @@ -882,6 +882,11 @@ static void *f_start(struct seq_file *m, loff_t *pos) loff_t l = 0; void *p; + /* ->stop() is called even if ->start() fails */ + mutex_lock(&event_mutex); + if (!event_file_data(m->private)) + return ERR_PTR(-ENODEV); + /* Start by showing the header */ if (!*pos) return (void *)FORMAT_HEADER; @@ -896,7 +901,7 @@ static void *f_start(struct seq_file *m, loff_t *pos) static int f_show(struct seq_file *m, void *v) { - struct ftrace_event_call *call = m->private; + struct ftrace_event_call *call = event_file_data(m->private); struct ftrace_event_field *field; const char *array_descriptor; @@ -947,6 +952,7 @@ static int f_show(struct seq_file *m, void *v) static void f_stop(struct seq_file *m, void *p) { + mutex_unlock(&event_mutex); } static const struct seq_operations trace_format_seq_ops = { @@ -958,7 +964,6 @@ static const struct seq_operations trace_format_seq_ops = { static int trace_format_open(struct inode *inode, struct file *file) { - struct ftrace_event_call *call = inode->i_private; struct seq_file *m; int ret; @@ -967,7 +972,7 @@ static int trace_format_open(struct inode *inode, struct file *file) return ret; m = file->private_data; - m->private = call; + m->private = file; return 0; } From c6febdf258313a120acd1298dd1dd41442131a37 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Fri, 26 Jul 2013 19:25:47 +0200 Subject: [PATCH 0618/1368] tracing: Introduce remove_event_file_dir() commit f6a84bdc75b5c11621dec58db73fe102cbaf40cc upstream. Preparation for the next patch. Extract the common code from remove_event_from_tracers() and __trace_remove_event_dirs() into the new helper, remove_event_file_dir(). The patch looks more complicated than it actually is, it also moves remove_subsystem() up to avoid the forward declaration. Link: http://lkml.kernel.org/r/20130726172547.GA3629@redhat.com Reviewed-by: Masami Hiramatsu Signed-off-by: Oleg Nesterov Signed-off-by: Steven Rostedt Signed-off-by: Greg Kroah-Hartman --- kernel/trace/trace_events.c | 47 ++++++++++++++++++------------------- 1 file changed, 23 insertions(+), 24 deletions(-) diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index 09782d623c93..422f99dfe699 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -407,11 +407,31 @@ static void put_system(struct ftrace_subsystem_dir *dir) mutex_unlock(&event_mutex); } +static void remove_subsystem(struct ftrace_subsystem_dir *dir) +{ + if (!dir) + return; + + if (!--dir->nr_events) { + debugfs_remove_recursive(dir->entry); + list_del(&dir->list); + __put_system_dir(dir); + } +} + static void *event_file_data(struct file *filp) { return ACCESS_ONCE(file_inode(filp)->i_private); } +static void remove_event_file_dir(struct ftrace_event_file *file) +{ + list_del(&file->list); + debugfs_remove_recursive(file->dir); + remove_subsystem(file->system); + kmem_cache_free(file_cachep, file); +} + /* * Open and update trace_array ref count. * Must have the current trace_array passed to it. @@ -1571,33 +1591,16 @@ event_create_dir(struct dentry *parent, return 0; } -static void remove_subsystem(struct ftrace_subsystem_dir *dir) -{ - if (!dir) - return; - - if (!--dir->nr_events) { - debugfs_remove_recursive(dir->entry); - list_del(&dir->list); - __put_system_dir(dir); - } -} - static void remove_event_from_tracers(struct ftrace_event_call *call) { struct ftrace_event_file *file; struct trace_array *tr; do_for_each_event_file_safe(tr, file) { - if (file->event_call != call) continue; - list_del(&file->list); - debugfs_remove_recursive(file->dir); - remove_subsystem(file->system); - kmem_cache_free(file_cachep, file); - + remove_event_file_dir(file); /* * The do_for_each_event_file_safe() is * a double loop. After finding the call for this @@ -2330,12 +2333,8 @@ __trace_remove_event_dirs(struct trace_array *tr) { struct ftrace_event_file *file, *next; - list_for_each_entry_safe(file, next, &tr->events, list) { - list_del(&file->list); - debugfs_remove_recursive(file->dir); - remove_subsystem(file->system); - kmem_cache_free(file_cachep, file); - } + list_for_each_entry_safe(file, next, &tr->events, list) + remove_event_file_dir(file); } static void From 012dc156d6af49e02a30fcba7d688e251608d97c Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Sun, 28 Jul 2013 20:35:27 +0200 Subject: [PATCH 0619/1368] tracing: Change remove_event_file_dir() to clear "d_subdirs"->i_private commit bf682c3159c4d298d1126a56793ed3f5e80395f7 upstream. Change remove_event_file_dir() to clear ->i_private for every file we are going to remove. We need to check file->dir != NULL because event_create_dir() can fail. debugfs_remove_recursive(NULL) is fine but the patch moves it under the same check anyway for readability. spin_lock(d_lock) and "d_inode != NULL" check are not needed afaics, but I do not understand this code enough. tracing_open_generic_file() and tracing_release_generic_file() can go away, ftrace_enable_fops and ftrace_event_filter_fops() use tracing_open_generic() but only to check tracing_disabled. This fixes all races with event_remove() or instance_delete(). f_op->read/write/whatever can never use the freed file/call, all event/* files were changed to check and use ->i_private under event_mutex. Note: this doesn't not fix other problems, event_remove() can destroy the active ftrace_event_call, we need more changes but those changes are completely orthogonal. Link: http://lkml.kernel.org/r/20130728183527.GB16723@redhat.com Reviewed-by: Masami Hiramatsu Signed-off-by: Oleg Nesterov Signed-off-by: Steven Rostedt Signed-off-by: Greg Kroah-Hartman --- kernel/trace/trace_events.c | 47 ++++++++++++------------------------- 1 file changed, 15 insertions(+), 32 deletions(-) diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index 422f99dfe699..0bff8aaf581b 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -426,41 +426,25 @@ static void *event_file_data(struct file *filp) static void remove_event_file_dir(struct ftrace_event_file *file) { + struct dentry *dir = file->dir; + struct dentry *child; + + if (dir) { + spin_lock(&dir->d_lock); /* probably unneeded */ + list_for_each_entry(child, &dir->d_subdirs, d_u.d_child) { + if (child->d_inode) /* probably unneeded */ + child->d_inode->i_private = NULL; + } + spin_unlock(&dir->d_lock); + + debugfs_remove_recursive(dir); + } + list_del(&file->list); - debugfs_remove_recursive(file->dir); remove_subsystem(file->system); kmem_cache_free(file_cachep, file); } -/* - * Open and update trace_array ref count. - * Must have the current trace_array passed to it. - */ -static int tracing_open_generic_file(struct inode *inode, struct file *filp) -{ - struct ftrace_event_file *file = inode->i_private; - struct trace_array *tr = file->tr; - int ret; - - if (trace_array_get(tr) < 0) - return -ENODEV; - - ret = tracing_open_generic(inode, filp); - if (ret < 0) - trace_array_put(tr); - return ret; -} - -static int tracing_release_generic_file(struct inode *inode, struct file *filp) -{ - struct ftrace_event_file *file = inode->i_private; - struct trace_array *tr = file->tr; - - trace_array_put(tr); - - return 0; -} - /* * __ftrace_set_clr_event(NULL, NULL, NULL, set) will set/unset all events. */ @@ -1303,10 +1287,9 @@ static const struct file_operations ftrace_set_event_fops = { }; static const struct file_operations ftrace_enable_fops = { - .open = tracing_open_generic_file, + .open = tracing_open_generic, .read = event_enable_read, .write = event_enable_write, - .release = tracing_release_generic_file, .llseek = default_llseek, }; From 8169887b694aa3d9632fdd0308aaa1db7c422e75 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Mon, 29 Jul 2013 19:50:33 +0200 Subject: [PATCH 0620/1368] tracing: trace_remove_event_call() should fail if call/file is in use commit 2816c551c796ec14620325b2c9ed75b9979d3125 upstream. Change trace_remove_event_call(call) to return the error if this call is active. This is what the callers assume but can't verify outside of the tracing locks. Both trace_kprobe.c/trace_uprobe.c need the additional changes, unregister_trace_probe() should abort if trace_remove_event_call() fails. The caller is going to free this call/file so we must ensure that nobody can use them after trace_remove_event_call() succeeds. debugfs should be fine after the previous changes and event_remove() does TRACE_REG_UNREGISTER, but still there are 2 reasons why we need the additional checks: - There could be a perf_event(s) attached to this tp_event, so the patch checks ->perf_refcount. - TRACE_REG_UNREGISTER can be suppressed by FTRACE_EVENT_FL_SOFT_MODE, so we simply check FTRACE_EVENT_FL_ENABLED protected by event_mutex. Link: http://lkml.kernel.org/r/20130729175033.GB26284@redhat.com Reviewed-by: Masami Hiramatsu Signed-off-by: Oleg Nesterov Signed-off-by: Steven Rostedt Signed-off-by: Greg Kroah-Hartman --- include/linux/ftrace_event.h | 2 +- kernel/trace/trace_events.c | 37 +++++++++++++++++++++++++++++++++--- 2 files changed, 35 insertions(+), 4 deletions(-) diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h index 44cdc11b15c2..120d57a1c3a5 100644 --- a/include/linux/ftrace_event.h +++ b/include/linux/ftrace_event.h @@ -334,7 +334,7 @@ extern int trace_define_field(struct ftrace_event_call *call, const char *type, const char *name, int offset, int size, int is_signed, int filter_type); extern int trace_add_event_call(struct ftrace_event_call *call); -extern void trace_remove_event_call(struct ftrace_event_call *call); +extern int trace_remove_event_call(struct ftrace_event_call *call); #define is_signed_type(type) (((type)(-1)) < (type)1) diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index 0bff8aaf581b..3d18aadef493 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -1735,16 +1735,47 @@ static void __trace_remove_event_call(struct ftrace_event_call *call) destroy_preds(call); } -/* Remove an event_call */ -void trace_remove_event_call(struct ftrace_event_call *call) +static int probe_remove_event_call(struct ftrace_event_call *call) { + struct trace_array *tr; + struct ftrace_event_file *file; + +#ifdef CONFIG_PERF_EVENTS + if (call->perf_refcount) + return -EBUSY; +#endif + do_for_each_event_file(tr, file) { + if (file->event_call != call) + continue; + /* + * We can't rely on ftrace_event_enable_disable(enable => 0) + * we are going to do, FTRACE_EVENT_FL_SOFT_MODE can suppress + * TRACE_REG_UNREGISTER. + */ + if (file->flags & FTRACE_EVENT_FL_ENABLED) + return -EBUSY; + break; + } while_for_each_event_file(); + + __trace_remove_event_call(call); + + return 0; +} + +/* Remove an event_call */ +int trace_remove_event_call(struct ftrace_event_call *call) +{ + int ret; + mutex_lock(&trace_types_lock); mutex_lock(&event_mutex); down_write(&trace_event_sem); - __trace_remove_event_call(call); + ret = probe_remove_event_call(call); up_write(&trace_event_sem); mutex_unlock(&event_mutex); mutex_unlock(&trace_types_lock); + + return ret; } #define for_each_event(event, start, end) \ From 428fbcf9d5d654526a73229d97e76d5d7ac47190 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Wed, 3 Jul 2013 23:33:50 -0400 Subject: [PATCH 0621/1368] tracing/kprobes: Fail to unregister if probe event files are in use commit 40c32592668b727cbfcf7b1c0567f581bd62a5e4 upstream. When a probe is being removed, it cleans up the event files that correspond to the probe. But there is a race between writing to one of these files and deleting the probe. This is especially true for the "enable" file. CPU 0 CPU 1 ----- ----- fd = open("enable",O_WRONLY); probes_open() release_all_trace_probes() unregister_trace_probe() if (trace_probe_is_enabled(tp)) return -EBUSY write(fd, "1", 1) __ftrace_set_clr_event() call->class->reg() (kprobe_register) enable_trace_probe(tp) __unregister_trace_probe(tp); list_del(&tp->list) unregister_probe_event(tp) <-- fails! free_trace_probe(tp) write(fd, "0", 1) __ftrace_set_clr_event() call->class->unreg (kprobe_register) disable_trace_probe(tp) <-- BOOM! A test program was written that used two threads to simulate the above scenario adding a nanosleep() interval to change the timings and after several thousand runs, it was able to trigger this bug and crash: BUG: unable to handle kernel paging request at 00000005000000f9 IP: [] probes_open+0x3b/0xa7 PGD 7808a067 PUD 0 Oops: 0000 [#1] PREEMPT SMP Dumping ftrace buffer: --------------------------------- Modules linked in: ipt_MASQUERADE sunrpc ip6t_REJECT nf_conntrack_ipv6 CPU: 1 PID: 2070 Comm: test-kprobe-rem Not tainted 3.11.0-rc3-test+ #47 Hardware name: To Be Filled By O.E.M. To Be Filled By O.E.M./To be filled by O.E.M., BIOS SDBLI944.86P 05/08/2007 task: ffff880077756440 ti: ffff880076e52000 task.ti: ffff880076e52000 RIP: 0010:[] [] probes_open+0x3b/0xa7 RSP: 0018:ffff880076e53c38 EFLAGS: 00010203 RAX: 0000000500000001 RBX: ffff88007844f440 RCX: 0000000000000003 RDX: 0000000000000003 RSI: 0000000000000003 RDI: ffff880076e52000 RBP: ffff880076e53c58 R08: ffff880076e53bd8 R09: 0000000000000000 R10: ffff880077756440 R11: 0000000000000006 R12: ffffffff810dee35 R13: ffff880079250418 R14: 0000000000000000 R15: ffff88007844f450 FS: 00007f87a276f700(0000) GS:ffff88007d480000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 000000008005003b CR2: 00000005000000f9 CR3: 0000000077262000 CR4: 00000000000007e0 Stack: ffff880076e53c58 ffffffff81219ea0 ffff88007844f440 ffffffff810dee35 ffff880076e53ca8 ffffffff81130f78 ffff8800772986c0 ffff8800796f93a0 ffffffff81d1b5d8 ffff880076e53e04 0000000000000000 ffff88007844f440 Call Trace: [] ? security_file_open+0x2c/0x30 [] ? unregister_trace_probe+0x4b/0x4b [] do_dentry_open+0x162/0x226 [] finish_open+0x46/0x54 [] do_last+0x7f6/0x996 [] ? inode_permission+0x42/0x44 [] path_openat+0x232/0x496 [] do_filp_open+0x3a/0x8a [] ? __alloc_fd+0x168/0x17a [] do_sys_open+0x70/0x102 [] ? trace_hardirqs_on_caller+0x160/0x197 [] SyS_open+0x1e/0x20 [] system_call_fastpath+0x16/0x1b Code: e5 41 54 53 48 89 f3 48 83 ec 10 48 23 56 78 48 39 c2 75 6c 31 f6 48 c7 RIP [] probes_open+0x3b/0xa7 RSP CR2: 00000005000000f9 ---[ end trace 35f17d68fc569897 ]--- The unregister_trace_probe() must be done first, and if it fails it must fail the removal of the kprobe. Several changes have already been made by Oleg Nesterov and Masami Hiramatsu to allow moving the unregister_probe_event() before the removal of the probe and exit the function if it fails. This prevents the tp structure from being used after it is freed. Link: http://lkml.kernel.org/r/20130704034038.819592356@goodmis.org Acked-by: Masami Hiramatsu Signed-off-by: Steven Rostedt Signed-off-by: Greg Kroah-Hartman --- kernel/trace/trace_kprobe.c | 21 +++++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index c209c6ec34ca..64abc8ca928b 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -90,7 +90,7 @@ static __kprobes bool trace_probe_is_on_module(struct trace_probe *tp) } static int register_probe_event(struct trace_probe *tp); -static void unregister_probe_event(struct trace_probe *tp); +static int unregister_probe_event(struct trace_probe *tp); static DEFINE_MUTEX(probe_lock); static LIST_HEAD(probe_list); @@ -411,9 +411,12 @@ static int unregister_trace_probe(struct trace_probe *tp) if (trace_probe_is_enabled(tp)) return -EBUSY; + /* Will fail if probe is being used by ftrace or perf */ + if (unregister_probe_event(tp)) + return -EBUSY; + __unregister_trace_probe(tp); list_del(&tp->list); - unregister_probe_event(tp); return 0; } @@ -692,7 +695,9 @@ static int release_all_trace_probes(void) /* TODO: Use batch unregistration */ while (!list_empty(&probe_list)) { tp = list_entry(probe_list.next, struct trace_probe, list); - unregister_trace_probe(tp); + ret = unregister_trace_probe(tp); + if (ret) + goto end; free_trace_probe(tp); } @@ -1325,11 +1330,15 @@ static int register_probe_event(struct trace_probe *tp) return ret; } -static void unregister_probe_event(struct trace_probe *tp) +static int unregister_probe_event(struct trace_probe *tp) { + int ret; + /* tp->event is unregistered in trace_remove_event_call() */ - trace_remove_event_call(&tp->call); - kfree(tp->call.print_fmt); + ret = trace_remove_event_call(&tp->call); + if (!ret) + kfree(tp->call.print_fmt); + return ret; } /* Make a debugfs interface for controlling probe points */ From 396dd500521287215d836b84b2ae61cd9163bba5 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Wed, 3 Jul 2013 23:33:51 -0400 Subject: [PATCH 0622/1368] tracing/uprobes: Fail to unregister if probe event files are in use commit c6c2401d8bbaf9edc189b4c35a8cb2780b8b988e upstream. Uprobes suffer the same problem that kprobes have. There's a race between writing to the "enable" file and removing the probe. The probe checks for it being in use and if it is not, goes about deleting the probe and the event that represents it. But the problem with that is, after it checks if it is in use it can be enabled, and the deletion of the event (access to the probe) will fail, as it is in use. But the uprobe will still be deleted. This is a problem as the event can reference the uprobe that was deleted. The fix is to remove the event first, and check to make sure the event removal succeeds. Then it is safe to remove the probe. When the event exists, either ftrace or perf can enable the probe and prevent the event from being removed. Link: http://lkml.kernel.org/r/20130704034038.991525256@goodmis.org Acked-by: Oleg Nesterov Signed-off-by: Steven Rostedt Signed-off-by: Greg Kroah-Hartman --- kernel/trace/trace_uprobe.c | 51 +++++++++++++++++++++++++++---------- 1 file changed, 38 insertions(+), 13 deletions(-) diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c index d5d0cd368a56..6fd72b768522 100644 --- a/kernel/trace/trace_uprobe.c +++ b/kernel/trace/trace_uprobe.c @@ -70,7 +70,7 @@ struct trace_uprobe { (sizeof(struct probe_arg) * (n))) static int register_uprobe_event(struct trace_uprobe *tu); -static void unregister_uprobe_event(struct trace_uprobe *tu); +static int unregister_uprobe_event(struct trace_uprobe *tu); static DEFINE_MUTEX(uprobe_lock); static LIST_HEAD(uprobe_list); @@ -164,11 +164,17 @@ static struct trace_uprobe *find_probe_event(const char *event, const char *grou } /* Unregister a trace_uprobe and probe_event: call with locking uprobe_lock */ -static void unregister_trace_uprobe(struct trace_uprobe *tu) +static int unregister_trace_uprobe(struct trace_uprobe *tu) { + int ret; + + ret = unregister_uprobe_event(tu); + if (ret) + return ret; + list_del(&tu->list); - unregister_uprobe_event(tu); free_trace_uprobe(tu); + return 0; } /* Register a trace_uprobe and probe_event */ @@ -181,9 +187,12 @@ static int register_trace_uprobe(struct trace_uprobe *tu) /* register as an event */ old_tp = find_probe_event(tu->call.name, tu->call.class->system); - if (old_tp) + if (old_tp) { /* delete old event */ - unregister_trace_uprobe(old_tp); + ret = unregister_trace_uprobe(old_tp); + if (ret) + goto end; + } ret = register_uprobe_event(tu); if (ret) { @@ -256,6 +265,8 @@ static int create_trace_uprobe(int argc, char **argv) group = UPROBE_EVENT_SYSTEM; if (is_delete) { + int ret; + if (!event) { pr_info("Delete command needs an event name.\n"); return -EINVAL; @@ -269,9 +280,9 @@ static int create_trace_uprobe(int argc, char **argv) return -ENOENT; } /* delete an event */ - unregister_trace_uprobe(tu); + ret = unregister_trace_uprobe(tu); mutex_unlock(&uprobe_lock); - return 0; + return ret; } if (argc < 2) { @@ -408,16 +419,20 @@ static int create_trace_uprobe(int argc, char **argv) return ret; } -static void cleanup_all_probes(void) +static int cleanup_all_probes(void) { struct trace_uprobe *tu; + int ret = 0; mutex_lock(&uprobe_lock); while (!list_empty(&uprobe_list)) { tu = list_entry(uprobe_list.next, struct trace_uprobe, list); - unregister_trace_uprobe(tu); + ret = unregister_trace_uprobe(tu); + if (ret) + break; } mutex_unlock(&uprobe_lock); + return ret; } /* Probes listing interfaces */ @@ -462,8 +477,13 @@ static const struct seq_operations probes_seq_op = { static int probes_open(struct inode *inode, struct file *file) { - if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) - cleanup_all_probes(); + int ret; + + if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) { + ret = cleanup_all_probes(); + if (ret) + return ret; + } return seq_open(file, &probes_seq_op); } @@ -970,12 +990,17 @@ static int register_uprobe_event(struct trace_uprobe *tu) return ret; } -static void unregister_uprobe_event(struct trace_uprobe *tu) +static int unregister_uprobe_event(struct trace_uprobe *tu) { + int ret; + /* tu->event is unregistered in trace_remove_event_call() */ - trace_remove_event_call(&tu->call); + ret = trace_remove_event_call(&tu->call); + if (ret) + return ret; kfree(tu->call.print_fmt); tu->call.print_fmt = NULL; + return 0; } /* Make a trace interface for controling probe points */ From f05e999f16aed044c0b8b0837bdb084cbeb8cee6 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Tue, 30 Jul 2013 00:04:32 -0400 Subject: [PATCH 0623/1368] ftrace: Check module functions being traced on reload MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 8c4f3c3fa9681dc549cd35419b259496082fef8b upstream. There's been a nasty bug that would show up and not give much info. The bug displayed the following warning: WARNING: at kernel/trace/ftrace.c:1529 __ftrace_hash_rec_update+0x1e3/0x230() Pid: 20903, comm: bash Tainted: G O 3.6.11+ #38405.trunk Call Trace: [] warn_slowpath_common+0x7f/0xc0 [] warn_slowpath_null+0x1a/0x20 [] __ftrace_hash_rec_update+0x1e3/0x230 [] ftrace_hash_move+0x28/0x1d0 [] ? kfree+0x2c/0x110 [] ftrace_regex_release+0x8e/0x150 [] __fput+0xae/0x220 [] ____fput+0xe/0x10 [] task_work_run+0x72/0x90 [] do_notify_resume+0x6c/0xc0 [] ? trace_hardirqs_on_thunk+0x3a/0x3c [] int_signal+0x12/0x17 ---[ end trace 793179526ee09b2c ]--- It was finally narrowed down to unloading a module that was being traced. It was actually more than that. When functions are being traced, there's a table of all functions that have a ref count of the number of active tracers attached to that function. When a function trace callback is registered to a function, the function's record ref count is incremented. When it is unregistered, the function's record ref count is decremented. If an inconsistency is detected (ref count goes below zero) the above warning is shown and the function tracing is permanently disabled until reboot. The ftrace callback ops holds a hash of functions that it filters on (and/or filters off). If the hash is empty, the default means to filter all functions (for the filter_hash) or to disable no functions (for the notrace_hash). When a module is unloaded, it frees the function records that represent the module functions. These records exist on their own pages, that is function records for one module will not exist on the same page as function records for other modules or even the core kernel. Now when a module unloads, the records that represents its functions are freed. When the module is loaded again, the records are recreated with a default ref count of zero (unless there's a callback that traces all functions, then they will also be traced, and the ref count will be incremented). The problem is that if an ftrace callback hash includes functions of the module being unloaded, those hash entries will not be removed. If the module is reloaded in the same location, the hash entries still point to the functions of the module but the module's ref counts do not reflect that. With the help of Steve and Joern, we found a reproducer: Using uinput module and uinput_release function. cd /sys/kernel/debug/tracing modprobe uinput echo uinput_release > set_ftrace_filter echo function > current_tracer rmmod uinput modprobe uinput # check /proc/modules to see if loaded in same addr, otherwise try again echo nop > current_tracer [BOOM] The above loads the uinput module, which creates a table of functions that can be traced within the module. We add uinput_release to the filter_hash to trace just that function. Enable function tracincg, which increments the ref count of the record associated to uinput_release. Remove uinput, which frees the records including the one that represents uinput_release. Load the uinput module again (and make sure it's at the same address). This recreates the function records all with a ref count of zero, including uinput_release. Disable function tracing, which will decrement the ref count for uinput_release which is now zero because of the module removal and reload, and we have a mismatch (below zero ref count). The solution is to check all currently tracing ftrace callbacks to see if any are tracing any of the module's functions when a module is loaded (it already does that with callbacks that trace all functions). If a callback happens to have a module function being traced, it increments that records ref count and starts tracing that function. There may be a strange side effect with this, where tracing module functions on unload and then reloading a new module may have that new module's functions being traced. This may be something that confuses the user, but it's not a big deal. Another approach is to disable all callback hashes on module unload, but this leaves some ftrace callbacks that may not be registered, but can still have hashes tracing the module's function where ftrace doesn't know about it. That situation can cause the same bug. This solution solves that case too. Another benefit of this solution, is it is possible to trace a module's function on unload and load. Link: http://lkml.kernel.org/r/20130705142629.GA325@redhat.com Reported-by: Jörn Engel Reported-by: Dave Jones Reported-by: Steve Hodgson Tested-by: Steve Hodgson Signed-off-by: Steven Rostedt Signed-off-by: Greg Kroah-Hartman --- kernel/trace/ftrace.c | 71 +++++++++++++++++++++++++++++++++++++------ 1 file changed, 62 insertions(+), 9 deletions(-) diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 13f12351b350..f23449d0650e 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -2144,12 +2144,57 @@ static cycle_t ftrace_update_time; static unsigned long ftrace_update_cnt; unsigned long ftrace_update_tot_cnt; -static int ops_traces_mod(struct ftrace_ops *ops) +static inline int ops_traces_mod(struct ftrace_ops *ops) { - struct ftrace_hash *hash; + /* + * Filter_hash being empty will default to trace module. + * But notrace hash requires a test of individual module functions. + */ + return ftrace_hash_empty(ops->filter_hash) && + ftrace_hash_empty(ops->notrace_hash); +} - hash = ops->filter_hash; - return ftrace_hash_empty(hash); +/* + * Check if the current ops references the record. + * + * If the ops traces all functions, then it was already accounted for. + * If the ops does not trace the current record function, skip it. + * If the ops ignores the function via notrace filter, skip it. + */ +static inline bool +ops_references_rec(struct ftrace_ops *ops, struct dyn_ftrace *rec) +{ + /* If ops isn't enabled, ignore it */ + if (!(ops->flags & FTRACE_OPS_FL_ENABLED)) + return 0; + + /* If ops traces all mods, we already accounted for it */ + if (ops_traces_mod(ops)) + return 0; + + /* The function must be in the filter */ + if (!ftrace_hash_empty(ops->filter_hash) && + !ftrace_lookup_ip(ops->filter_hash, rec->ip)) + return 0; + + /* If in notrace hash, we ignore it too */ + if (ftrace_lookup_ip(ops->notrace_hash, rec->ip)) + return 0; + + return 1; +} + +static int referenced_filters(struct dyn_ftrace *rec) +{ + struct ftrace_ops *ops; + int cnt = 0; + + for (ops = ftrace_ops_list; ops != &ftrace_list_end; ops = ops->next) { + if (ops_references_rec(ops, rec)) + cnt++; + } + + return cnt; } static int ftrace_update_code(struct module *mod) @@ -2158,6 +2203,7 @@ static int ftrace_update_code(struct module *mod) struct dyn_ftrace *p; cycle_t start, stop; unsigned long ref = 0; + bool test = false; int i; /* @@ -2171,9 +2217,12 @@ static int ftrace_update_code(struct module *mod) for (ops = ftrace_ops_list; ops != &ftrace_list_end; ops = ops->next) { - if (ops->flags & FTRACE_OPS_FL_ENABLED && - ops_traces_mod(ops)) - ref++; + if (ops->flags & FTRACE_OPS_FL_ENABLED) { + if (ops_traces_mod(ops)) + ref++; + else + test = true; + } } } @@ -2183,12 +2232,16 @@ static int ftrace_update_code(struct module *mod) for (pg = ftrace_new_pgs; pg; pg = pg->next) { for (i = 0; i < pg->index; i++) { + int cnt = ref; + /* If something went wrong, bail without enabling anything */ if (unlikely(ftrace_disabled)) return -1; p = &pg->records[i]; - p->flags = ref; + if (test) + cnt += referenced_filters(p); + p->flags = cnt; /* * Do the initial record conversion from mcount jump @@ -2208,7 +2261,7 @@ static int ftrace_update_code(struct module *mod) * conversion puts the module to the correct state, thus * passing the ftrace_make_call check. */ - if (ftrace_start_up && ref) { + if (ftrace_start_up && cnt) { int failed = __ftrace_replace_code(p, 1); if (failed) ftrace_bug(failed, p->ip); From 51f508721b778bd48d723bed1c4ca4af0855764c Mon Sep 17 00:00:00 2001 From: Chuck Anderson Date: Tue, 6 Aug 2013 15:12:19 -0700 Subject: [PATCH 0624/1368] xen/smp: initialize IPI vectors before marking CPU online commit fc78d343fa74514f6fd117b5ef4cd27e4ac30236 upstream. An older PVHVM guest (v3.0 based) crashed during vCPU hot-plug with: kernel BUG at drivers/xen/events.c:1328! RCU has detected that a CPU has not entered a quiescent state within the grace period. It needs to send the CPU a reschedule IPI if it is not offline. rcu_implicit_offline_qs() does this check: /* * If the CPU is offline, it is in a quiescent state. We can * trust its state not to change because interrupts are disabled. */ if (cpu_is_offline(rdp->cpu)) { rdp->offline_fqs++; return 1; } Else the CPU is online. Send it a reschedule IPI. The CPU is in the middle of being hot-plugged and has been marked online (!cpu_is_offline()). See start_secondary(): set_cpu_online(smp_processor_id(), true); ... per_cpu(cpu_state, smp_processor_id()) = CPU_ONLINE; start_secondary() then waits for the CPU bringing up the hot-plugged CPU to mark it as active: /* * Wait until the cpu which brought this one up marked it * online before enabling interrupts. If we don't do that then * we can end up waking up the softirq thread before this cpu * reached the active state, which makes the scheduler unhappy * and schedule the softirq thread on the wrong cpu. This is * only observable with forced threaded interrupts, but in * theory it could also happen w/o them. It's just way harder * to achieve. */ while (!cpumask_test_cpu(smp_processor_id(), cpu_active_mask)) cpu_relax(); /* enable local interrupts */ local_irq_enable(); The CPU being hot-plugged will be marked active after it has been fully initialized by the CPU managing the hot-plug. In the Xen PVHVM case xen_smp_intr_init() is called to set up the hot-plugged vCPU's XEN_RESCHEDULE_VECTOR. The hot-plugging CPU is marked online, not marked active and does not have its IPI vectors set up. rcu_implicit_offline_qs() sees the hot-plugging cpu is !cpu_is_offline() and tries to send it a reschedule IPI: This will lead to: kernel BUG at drivers/xen/events.c:1328! xen_send_IPI_one() xen_smp_send_reschedule() rcu_implicit_offline_qs() rcu_implicit_dynticks_qs() force_qs_rnp() force_quiescent_state() __rcu_process_callbacks() rcu_process_callbacks() __do_softirq() call_softirq() do_softirq() irq_exit() xen_evtchn_do_upcall() because xen_send_IPI_one() will attempt to use an uninitialized IRQ for the XEN_RESCHEDULE_VECTOR. There is at least one other place that has caused the same crash: xen_smp_send_reschedule() wake_up_idle_cpu() add_timer_on() clocksource_watchdog() call_timer_fn() run_timer_softirq() __do_softirq() call_softirq() do_softirq() irq_exit() xen_evtchn_do_upcall() xen_hvm_callback_vector() clocksource_watchdog() uses cpu_online_mask to pick the next CPU to handle a watchdog timer: /* * Cycle through CPUs to check if the CPUs stay synchronized * to each other. */ next_cpu = cpumask_next(raw_smp_processor_id(), cpu_online_mask); if (next_cpu >= nr_cpu_ids) next_cpu = cpumask_first(cpu_online_mask); watchdog_timer.expires += WATCHDOG_INTERVAL; add_timer_on(&watchdog_timer, next_cpu); This resulted in an attempt to send an IPI to a hot-plugging CPU that had not initialized its reschedule vector. One option would be to make the RCU code check to not check for CPU offline but for CPU active. As becoming active is done after a CPU is online (in older kernels). But Srivatsa pointed out that "the cpu_active vs cpu_online ordering has been completely reworked - in the online path, cpu_active is set *before* cpu_online, and also, in the cpu offline path, the cpu_active bit is reset in the CPU_DYING notification instead of CPU_DOWN_PREPARE." Drilling in this the bring-up path: "[brought up CPU].. send out a CPU_STARTING notification, and in response to that, the scheduler sets the CPU in the cpu_active_mask. Again, this mask is better left to the scheduler alone, since it has the intelligence to use it judiciously." The conclusion was that: " 1. At the IPI sender side: It is incorrect to send an IPI to an offline CPU (cpu not present in the cpu_online_mask). There are numerous places where we check this and warn/complain. 2. At the IPI receiver side: It is incorrect to let the world know of our presence (by setting ourselves in global bitmasks) until our initialization steps are complete to such an extent that we can handle the consequences (such as receiving interrupts without crashing the sender etc.) " (from Srivatsa) As the native code enables the interrupts at some point we need to be able to service them. In other words a CPU must have valid IPI vectors if it has been marked online. It doesn't need to handle the IPI (interrupts may be disabled) but needs to have valid IPI vectors because another CPU may find it in cpu_online_mask and attempt to send it an IPI. This patch will change the order of the Xen vCPU bring-up functions so that Xen vectors have been set up before start_secondary() is called. It also will not continue to bring up a Xen vCPU if xen_smp_intr_init() fails to initialize it. Orabug 13823853 Signed-off-by Chuck Anderson Acked-by: Srivatsa S. Bhat Signed-off-by: Konrad Rzeszutek Wilk Signed-off-by: Jonghwan Choi Signed-off-by: Greg Kroah-Hartman --- arch/x86/xen/smp.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c index d99cae8147d1..a1e58e19d0cc 100644 --- a/arch/x86/xen/smp.c +++ b/arch/x86/xen/smp.c @@ -667,8 +667,15 @@ static void __init xen_hvm_smp_prepare_cpus(unsigned int max_cpus) static int __cpuinit xen_hvm_cpu_up(unsigned int cpu, struct task_struct *tidle) { int rc; - rc = native_cpu_up(cpu, tidle); - WARN_ON (xen_smp_intr_init(cpu)); + /* + * xen_smp_intr_init() needs to run before native_cpu_up() + * so that IPI vectors are set up on the booting CPU before + * it is marked online in native_cpu_up(). + */ + rc = xen_smp_intr_init(cpu); + WARN_ON(rc); + if (!rc) + rc = native_cpu_up(cpu, tidle); return rc; } From 0cbf39727e08f58246e763f8af2f8dcb8a9f648f Mon Sep 17 00:00:00 2001 From: Joern Rennecke Date: Sat, 24 Aug 2013 12:03:06 +0530 Subject: [PATCH 0625/1368] ARC: [lib] strchr breakage in Big-endian configuration commit b0f55f2a1a295c364be012e82dbab079a2454006 upstream. For a search buffer, 2 byte aligned, strchr() was returning pointer outside of buffer (buf - 1) ------------->8---------------- // Input buffer (default 4 byte aigned) char *buffer = "1AA_"; // actual search start (to mimick 2 byte alignment) char *current_line = &(buffer[2]); // Character to search for char c = 'A'; char *c_pos = strchr(current_line, c); printf("%s\n", c_pos) --> 'AA_' as oppose to 'A_' ------------->8---------------- Reported-by: Anton Kolesov Debugged-by: Anton Kolesov Cc: Noam Camus Signed-off-by: Joern Rennecke Signed-off-by: Vineet Gupta Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- arch/arc/lib/strchr-700.S | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/arch/arc/lib/strchr-700.S b/arch/arc/lib/strchr-700.S index 99c10475d477..9c548c7cf001 100644 --- a/arch/arc/lib/strchr-700.S +++ b/arch/arc/lib/strchr-700.S @@ -39,9 +39,18 @@ ARC_ENTRY strchr ld.a r2,[r0,4] sub r12,r6,r7 bic r12,r12,r6 +#ifdef __LITTLE_ENDIAN__ and r7,r12,r4 breq r7,0,.Loop ; For speed, we want this branch to be unaligned. b .Lfound_char ; Likewise this one. +#else + and r12,r12,r4 + breq r12,0,.Loop ; For speed, we want this branch to be unaligned. + lsr_s r12,r12,7 + bic r2,r7,r6 + b.d .Lfound_char_b + and_s r2,r2,r12 +#endif ; /* We require this code address to be unaligned for speed... */ .Laligned: ld_s r2,[r0] @@ -95,6 +104,7 @@ ARC_ENTRY strchr lsr r7,r7,7 bic r2,r7,r6 +.Lfound_char_b: norm r2,r2 sub_s r0,r0,4 asr_s r2,r2,3 From b169395145ce013ad1ed3ee25878112f647ffd56 Mon Sep 17 00:00:00 2001 From: Jussi Kivilinna Date: Tue, 6 Aug 2013 14:28:42 +0300 Subject: [PATCH 0626/1368] zd1201: do not use stack as URB transfer_buffer commit 1206ff4ff9d2ef7468a355328bc58ac6ebf5be44 upstream. Patch fixes zd1201 not to use stack as URB transfer_buffer. URB buffers need to be DMA-able, which stack is not. Patch is only compile tested. Signed-off-by: Jussi Kivilinna Signed-off-by: John W. Linville Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/zd1201.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/zd1201.c b/drivers/net/wireless/zd1201.c index 4941f201d6c8..b8ba1f925e75 100644 --- a/drivers/net/wireless/zd1201.c +++ b/drivers/net/wireless/zd1201.c @@ -98,10 +98,12 @@ static int zd1201_fw_upload(struct usb_device *dev, int apfw) goto exit; err = usb_control_msg(dev, usb_rcvctrlpipe(dev, 0), 0x4, - USB_DIR_IN | 0x40, 0,0, &ret, sizeof(ret), ZD1201_FW_TIMEOUT); + USB_DIR_IN | 0x40, 0, 0, buf, sizeof(ret), ZD1201_FW_TIMEOUT); if (err < 0) goto exit; + memcpy(&ret, buf, sizeof(ret)); + if (ret & 0x80) { err = -EIO; goto exit; From 2bee1e0f280b159ce8f9f01c2f5c9de410231a3b Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 14 Aug 2013 12:44:39 +0300 Subject: [PATCH 0627/1368] VFS: collect_mounts() should return an ERR_PTR commit 52e220d357a38cb29fa2e29f34ed94c1d66357f4 upstream. This should actually be returning an ERR_PTR on error instead of NULL. That was how it was designed and all the callers expect it. [AV: actually, that's what "VFS: Make clone_mnt()/copy_tree()/collect_mounts() return errors" missed - originally collect_mounts() was expected to return NULL on failure] Signed-off-by: Dan Carpenter Signed-off-by: Al Viro Signed-off-by: Greg Kroah-Hartman --- fs/namespace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/namespace.c b/fs/namespace.c index 7b1ca9ba0b0a..a45ba4f267fe 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -1429,7 +1429,7 @@ struct vfsmount *collect_mounts(struct path *path) CL_COPY_ALL | CL_PRIVATE); namespace_unlock(); if (IS_ERR(tree)) - return NULL; + return ERR_CAST(tree); return &tree->mnt; } From 8a34139a0c0ab7ed616b3e50f061cac8bc1e0531 Mon Sep 17 00:00:00 2001 From: Daniel Drake Date: Fri, 9 Aug 2013 18:14:20 -0400 Subject: [PATCH 0628/1368] x86: Don't clear olpc_ofw_header when sentinel is detected commit d55e37bb0f51316e552376ddc0a3fff34ca7108b upstream. OpenFirmware wasn't quite following the protocol described in boot.txt and the kernel has detected this through use of the sentinel value in boot_params. OFW does zero out almost all of the stuff that it should do, but not the sentinel. This causes the kernel to clear olpc_ofw_header, which breaks x86 OLPC support. OpenFirmware has now been fixed. However, it would be nice if we could maintain Linux compatibility with old firmware versions. To do that, we just have to avoid zeroing out olpc_ofw_header. OFW does not write to any other parts of the header that are being zapped by the sentinel-detection code, and all users of olpc_ofw_header are somewhat protected through checking for the OLPC_OFW_SIG magic value before using it. So this should not cause any problems for anyone. Signed-off-by: Daniel Drake Link: http://lkml.kernel.org/r/20130809221420.618E6FAB03@dev.laptop.org Acked-by: Yinghai Lu Signed-off-by: H. Peter Anvin Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/bootparam_utils.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/include/asm/bootparam_utils.h b/arch/x86/include/asm/bootparam_utils.h index 653668d140f9..4a8cb8d7cbd5 100644 --- a/arch/x86/include/asm/bootparam_utils.h +++ b/arch/x86/include/asm/bootparam_utils.h @@ -35,9 +35,9 @@ static void sanitize_boot_params(struct boot_params *boot_params) */ if (boot_params->sentinel) { /* fields in boot_params are left uninitialized, clear them */ - memset(&boot_params->olpc_ofw_header, 0, + memset(&boot_params->ext_ramdisk_image, 0, (char *)&boot_params->efi_info - - (char *)&boot_params->olpc_ofw_header); + (char *)&boot_params->ext_ramdisk_image); memset(&boot_params->kbd_status, 0, (char *)&boot_params->hdr - (char *)&boot_params->kbd_status); From d8251a942ff0318fd8fde8e69fac5480b60e26b3 Mon Sep 17 00:00:00 2001 From: David Vrabel Date: Thu, 15 Aug 2013 13:21:06 +0100 Subject: [PATCH 0629/1368] xen/events: initialize local per-cpu mask for all possible events commit 84ca7a8e45dafb49cd5ca90a343ba033e2885c17 upstream. The sizeof() argument in init_evtchn_cpu_bindings() is incorrect resulting in only the first 64 (or 32 in 32-bit guests) ports having their bindings being initialized to VCPU 0. In most cases this does not cause a problem as request_irq() will set the irq affinity which will set the correct local per-cpu mask. However, if the request_irq() is called on a VCPU other than 0, there is a window between the unmasking of the event and the affinity being set were an event may be lost because it is not locally unmasked on any VCPU. If request_irq() is called on VCPU 0 then local irqs are disabled during the window and the race does not occur. Fix this by initializing all NR_EVENT_CHANNEL bits in the local per-cpu masks. Signed-off-by: David Vrabel Signed-off-by: Konrad Rzeszutek Wilk Signed-off-by: Greg Kroah-Hartman --- drivers/xen/events.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/xen/events.c b/drivers/xen/events.c index 6a6bbe4ede92..c7338868449a 100644 --- a/drivers/xen/events.c +++ b/drivers/xen/events.c @@ -346,7 +346,7 @@ static void init_evtchn_cpu_bindings(void) for_each_possible_cpu(i) memset(per_cpu(cpu_evtchn_mask, i), - (i == 0) ? ~0 : 0, sizeof(*per_cpu(cpu_evtchn_mask, i))); + (i == 0) ? ~0 : 0, NR_EVENT_CHANNELS/8); } static inline void clear_evtchn(int port) From 131cb95fdf390f414163605c997b48cad591a273 Mon Sep 17 00:00:00 2001 From: David Vrabel Date: Thu, 15 Aug 2013 13:21:07 +0100 Subject: [PATCH 0630/1368] xen/events: mask events when changing their VCPU binding commit 4704fe4f03a5ab27e3c36184af85d5000e0f8a48 upstream. When a event is being bound to a VCPU there is a window between the EVTCHNOP_bind_vpcu call and the adjustment of the local per-cpu masks where an event may be lost. The hypervisor upcalls the new VCPU but the kernel thinks that event is still bound to the old VCPU and ignores it. There is even a problem when the event is being bound to the same VCPU as there is a small window beween the clear_bit() and set_bit() calls in bind_evtchn_to_cpu(). When scanning for pending events, the kernel may read the bit when it is momentarily clear and ignore the event. Avoid this by masking the event during the whole bind operation. Signed-off-by: David Vrabel Signed-off-by: Konrad Rzeszutek Wilk Reviewed-by: Jan Beulich Signed-off-by: Greg Kroah-Hartman --- drivers/xen/events.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/drivers/xen/events.c b/drivers/xen/events.c index c7338868449a..1faa1305c043 100644 --- a/drivers/xen/events.c +++ b/drivers/xen/events.c @@ -1492,8 +1492,10 @@ void rebind_evtchn_irq(int evtchn, int irq) /* Rebind an evtchn so that it gets delivered to a specific cpu */ static int rebind_irq_to_cpu(unsigned irq, unsigned tcpu) { + struct shared_info *s = HYPERVISOR_shared_info; struct evtchn_bind_vcpu bind_vcpu; int evtchn = evtchn_from_irq(irq); + int masked; if (!VALID_EVTCHN(evtchn)) return -1; @@ -1509,6 +1511,12 @@ static int rebind_irq_to_cpu(unsigned irq, unsigned tcpu) bind_vcpu.port = evtchn; bind_vcpu.vcpu = tcpu; + /* + * Mask the event while changing the VCPU binding to prevent + * it being delivered on an unexpected VCPU. + */ + masked = sync_test_and_set_bit(evtchn, BM(s->evtchn_mask)); + /* * If this fails, it usually just indicates that we're dealing with a * virq or IPI channel, which don't actually need to be rebound. Ignore @@ -1517,6 +1525,9 @@ static int rebind_irq_to_cpu(unsigned irq, unsigned tcpu) if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_vcpu, &bind_vcpu) >= 0) bind_evtchn_to_cpu(evtchn, tcpu); + if (!masked) + unmask_evtchn(evtchn); + return 0; } From 66ed6f3d16a224dfffda420d7b047a7346c63821 Mon Sep 17 00:00:00 2001 From: Sekhar Nori Date: Fri, 16 Aug 2013 14:43:48 +0530 Subject: [PATCH 0631/1368] ARM: davinci: nand: specify ecc strength commit acd36357edc08649e85ff15dc4ed62353c912eff upstream. Starting with kernel v3.5, it is mandatory to specify ECC strength when using hardware ECC. Without this, kernel panics with a warning of the sort: Driver must set ecc.strength when using hardware ECC ------------[ cut here ]------------ kernel BUG at drivers/mtd/nand/nand_base.c:3519! Fix this by specifying ECC strength for the boards which were missing this. Reported-by: Holger Freyther Signed-off-by: Sekhar Nori Signed-off-by: Kevin Hilman Signed-off-by: Greg Kroah-Hartman --- arch/arm/mach-davinci/board-dm355-leopard.c | 1 + arch/arm/mach-davinci/board-dm644x-evm.c | 1 + arch/arm/mach-davinci/board-dm646x-evm.c | 1 + arch/arm/mach-davinci/board-neuros-osd2.c | 1 + 4 files changed, 4 insertions(+) diff --git a/arch/arm/mach-davinci/board-dm355-leopard.c b/arch/arm/mach-davinci/board-dm355-leopard.c index dff4ddc5ef81..139e42da25f0 100644 --- a/arch/arm/mach-davinci/board-dm355-leopard.c +++ b/arch/arm/mach-davinci/board-dm355-leopard.c @@ -75,6 +75,7 @@ static struct davinci_nand_pdata davinci_nand_data = { .parts = davinci_nand_partitions, .nr_parts = ARRAY_SIZE(davinci_nand_partitions), .ecc_mode = NAND_ECC_HW_SYNDROME, + .ecc_bits = 4, .bbt_options = NAND_BBT_USE_FLASH, }; diff --git a/arch/arm/mach-davinci/board-dm644x-evm.c b/arch/arm/mach-davinci/board-dm644x-evm.c index a33686a6fbb2..fa4bfaf952d8 100644 --- a/arch/arm/mach-davinci/board-dm644x-evm.c +++ b/arch/arm/mach-davinci/board-dm644x-evm.c @@ -153,6 +153,7 @@ static struct davinci_nand_pdata davinci_evm_nandflash_data = { .parts = davinci_evm_nandflash_partition, .nr_parts = ARRAY_SIZE(davinci_evm_nandflash_partition), .ecc_mode = NAND_ECC_HW, + .ecc_bits = 1, .bbt_options = NAND_BBT_USE_FLASH, .timing = &davinci_evm_nandflash_timing, }; diff --git a/arch/arm/mach-davinci/board-dm646x-evm.c b/arch/arm/mach-davinci/board-dm646x-evm.c index fbb8e5ab1dc1..0c005e876cac 100644 --- a/arch/arm/mach-davinci/board-dm646x-evm.c +++ b/arch/arm/mach-davinci/board-dm646x-evm.c @@ -90,6 +90,7 @@ static struct davinci_nand_pdata davinci_nand_data = { .parts = davinci_nand_partitions, .nr_parts = ARRAY_SIZE(davinci_nand_partitions), .ecc_mode = NAND_ECC_HW, + .ecc_bits = 1, .options = 0, }; diff --git a/arch/arm/mach-davinci/board-neuros-osd2.c b/arch/arm/mach-davinci/board-neuros-osd2.c index 2bc112adf565..808233b60e3d 100644 --- a/arch/arm/mach-davinci/board-neuros-osd2.c +++ b/arch/arm/mach-davinci/board-neuros-osd2.c @@ -88,6 +88,7 @@ static struct davinci_nand_pdata davinci_ntosd2_nandflash_data = { .parts = davinci_ntosd2_nandflash_partition, .nr_parts = ARRAY_SIZE(davinci_ntosd2_nandflash_partition), .ecc_mode = NAND_ECC_HW, + .ecc_bits = 1, .bbt_options = NAND_BBT_USE_FLASH, }; From 805eea0bb13dfded98c2383139b64d0576838b65 Mon Sep 17 00:00:00 2001 From: Nicolas Ferre Date: Fri, 28 Jun 2013 10:39:15 +0200 Subject: [PATCH 0632/1368] ARM: at91/DT: fix at91sam9n12ek memory node commit a57603ca2871ee0773b00839c1ea35c4a2d3eeb0 upstream. Signed-off-by: Nicolas Ferre Signed-off-by: Greg Kroah-Hartman --- arch/arm/boot/dts/at91sam9n12ek.dts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm/boot/dts/at91sam9n12ek.dts b/arch/arm/boot/dts/at91sam9n12ek.dts index d30e48bd1e9d..28ba79893877 100644 --- a/arch/arm/boot/dts/at91sam9n12ek.dts +++ b/arch/arm/boot/dts/at91sam9n12ek.dts @@ -14,11 +14,11 @@ / { compatible = "atmel,at91sam9n12ek", "atmel,at91sam9n12", "atmel,at91sam9"; chosen { - bootargs = "mem=128M console=ttyS0,115200 root=/dev/mtdblock1 rw rootfstype=jffs2"; + bootargs = "console=ttyS0,115200 root=/dev/mtdblock1 rw rootfstype=jffs2"; }; memory { - reg = <0x20000000 0x10000000>; + reg = <0x20000000 0x8000000>; }; clocks { From 975a3cd4cc0dff06a635cb16b8fb25fd3ae88234 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 20 Aug 2013 11:47:39 +0100 Subject: [PATCH 0633/1368] arm64: perf: fix array out of bounds access in armpmu_map_hw_event() commit 868f6fea8fa63f09acbfa93256d0d2abdcabff79 upstream. This is a port of d9f966357b14 ("ARM: 7810/1: perf: Fix array out of bounds access in armpmu_map_hw_event()") to arm64, which fixes an oops in the arm64 perf backend found as a result of Vince's fuzzing tool. Signed-off-by: Will Deacon Signed-off-by: Catalin Marinas Signed-off-by: Greg Kroah-Hartman --- arch/arm64/kernel/perf_event.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c index 9ba33c40cdf8..2012646fb46f 100644 --- a/arch/arm64/kernel/perf_event.c +++ b/arch/arm64/kernel/perf_event.c @@ -107,7 +107,12 @@ armpmu_map_cache_event(const unsigned (*cache_map) static int armpmu_map_event(const unsigned (*event_map)[PERF_COUNT_HW_MAX], u64 config) { - int mapping = (*event_map)[config]; + int mapping; + + if (config >= PERF_COUNT_HW_MAX) + return -EINVAL; + + mapping = (*event_map)[config]; return mapping == HW_OP_UNSUPPORTED ? -ENOENT : mapping; } From 6c6f6c7166e9ee45e545eab850a2862a92c135b2 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 20 Aug 2013 11:47:40 +0100 Subject: [PATCH 0634/1368] arm64: perf: fix event validation for software group leaders commit ee7538a008a45050c8f706d38b600f55953169f9 upstream. This is a port of c95eb3184ea1 ("ARM: 7809/1: perf: fix event validation for software group leaders") to arm64, which fixes a panic in the arm64 perf backend found as a result of Vince's fuzzing tool. Signed-off-by: Will Deacon Signed-off-by: Catalin Marinas Signed-off-by: Greg Kroah-Hartman --- arch/arm64/kernel/perf_event.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c index 2012646fb46f..12e6ccb88691 100644 --- a/arch/arm64/kernel/perf_event.c +++ b/arch/arm64/kernel/perf_event.c @@ -322,6 +322,9 @@ validate_event(struct pmu_hw_events *hw_events, struct hw_perf_event fake_event = event->hw; struct pmu *leader_pmu = event->group_leader->pmu; + if (is_software_event(event)) + return 1; + if (event->pmu != leader_pmu || event->state <= PERF_EVENT_STATE_OFF) return 1; From 90dbc54a171ca7fd96c35d2858d30baf5d7c6376 Mon Sep 17 00:00:00 2001 From: Nicolas Pitre Date: Wed, 14 Aug 2013 22:36:32 +0100 Subject: [PATCH 0635/1368] ARM: 7816/1: CONFIG_KUSER_HELPERS: fix help text commit ac124504ecf6b20a2457d873d0728a8b991a5b0c upstream. Commit f6f91b0d9fd9 ("ARM: allow kuser helpers to be removed from the vector page") introduced some help text for the CONFIG_KUSER_HELPERS option which is rather contradictory. Let's fix that, and improve it a little. Signed-off-by: Nicolas Pitre Signed-off-by: Russell King Signed-off-by: Greg Kroah-Hartman --- arch/arm/mm/Kconfig | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/arch/arm/mm/Kconfig b/arch/arm/mm/Kconfig index 2950082a531c..08c9fe917d1f 100644 --- a/arch/arm/mm/Kconfig +++ b/arch/arm/mm/Kconfig @@ -789,15 +789,18 @@ config KUSER_HELPERS the CPU type fitted to the system. This permits binaries to be run on ARMv4 through to ARMv7 without modification. + See Documentation/arm/kernel_user_helpers.txt for details. + However, the fixed address nature of these helpers can be used by ROP (return orientated programming) authors when creating exploits. If all of the binaries and libraries which run on your platform are built specifically for your platform, and make no use of - these helpers, then you can turn this option off. However, - when such an binary or library is run, it will receive a SIGILL - signal, which will terminate the program. + these helpers, then you can turn this option off to hinder + such exploits. However, in that case, if a binary or library + relying on those helpers is run, it will receive a SIGILL signal, + which will terminate the program. Say N here only if you are absolutely certain that you do not need these helpers; otherwise, the safe option is to say Y. From 6f123e952c4144ddda16ebdac765677ebdf6f37c Mon Sep 17 00:00:00 2001 From: Ian Abbott Date: Fri, 23 Aug 2013 12:37:17 +0100 Subject: [PATCH 0636/1368] staging: comedi: bug-fix NULL pointer dereference on failed attach commit 3955dfa8216f712bc204a5ad2f4e51efff252fde upstream. Commit dcd7b8bd63cb81c5b973bf86510ca3c80bbbd162 ("staging: comedi: put module _after_ detach" by myself) reversed a couple of calls in `comedi_device_attach()` when recovering from an error returned by the low-level driver's 'attach' handler. Unfortunately, that introduced a NULL pointer dereference bug as `dev->driver` is NULL after the call to `comedi_device_detach()`. We still have a pointer to the low-level comedi driver structure in the `driv` variable, so use that instead. Signed-off-by: Ian Abbott Signed-off-by: Greg Kroah-Hartman --- drivers/staging/comedi/drivers.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/staging/comedi/drivers.c b/drivers/staging/comedi/drivers.c index 06d190f8fd34..4a2b04277304 100644 --- a/drivers/staging/comedi/drivers.c +++ b/drivers/staging/comedi/drivers.c @@ -464,7 +464,7 @@ int comedi_device_attach(struct comedi_device *dev, struct comedi_devconfig *it) ret = comedi_device_postconfig(dev); if (ret < 0) { comedi_device_detach(dev); - module_put(dev->driver->module); + module_put(driv->module); } /* On success, the driver module count has been incremented. */ return ret; From 6a33cbd049c293b0c274661d63c8918eb8557086 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Tue, 13 Aug 2013 15:57:32 -0400 Subject: [PATCH 0637/1368] drm/radeon/r7xx: fix copy paste typo in golden register setup commit 022374c02e357ac82e98dd2689fb2efe05723d69 upstream. Uses the wrong array size for some asics which can lead to garbage getting written to registers. Fixes: https://bugzilla.kernel.org/show_bug.cgi?id=60674 Signed-off-by: Alex Deucher Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/radeon/rv770.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/radeon/rv770.c b/drivers/gpu/drm/radeon/rv770.c index bcc68ec204ad..f5e92cfcc140 100644 --- a/drivers/gpu/drm/radeon/rv770.c +++ b/drivers/gpu/drm/radeon/rv770.c @@ -744,10 +744,10 @@ static void rv770_init_golden_registers(struct radeon_device *rdev) (const u32)ARRAY_SIZE(r7xx_golden_dyn_gpr_registers)); radeon_program_register_sequence(rdev, rv730_golden_registers, - (const u32)ARRAY_SIZE(rv770_golden_registers)); + (const u32)ARRAY_SIZE(rv730_golden_registers)); radeon_program_register_sequence(rdev, rv730_mgcg_init, - (const u32)ARRAY_SIZE(rv770_mgcg_init)); + (const u32)ARRAY_SIZE(rv730_mgcg_init)); break; case CHIP_RV710: radeon_program_register_sequence(rdev, @@ -758,18 +758,18 @@ static void rv770_init_golden_registers(struct radeon_device *rdev) (const u32)ARRAY_SIZE(r7xx_golden_dyn_gpr_registers)); radeon_program_register_sequence(rdev, rv710_golden_registers, - (const u32)ARRAY_SIZE(rv770_golden_registers)); + (const u32)ARRAY_SIZE(rv710_golden_registers)); radeon_program_register_sequence(rdev, rv710_mgcg_init, - (const u32)ARRAY_SIZE(rv770_mgcg_init)); + (const u32)ARRAY_SIZE(rv710_mgcg_init)); break; case CHIP_RV740: radeon_program_register_sequence(rdev, rv740_golden_registers, - (const u32)ARRAY_SIZE(rv770_golden_registers)); + (const u32)ARRAY_SIZE(rv740_golden_registers)); radeon_program_register_sequence(rdev, rv740_mgcg_init, - (const u32)ARRAY_SIZE(rv770_mgcg_init)); + (const u32)ARRAY_SIZE(rv740_mgcg_init)); break; default: break; From 21779249f05d0ebdd2eaf464673d224f1883b57a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Sun, 11 Aug 2013 21:27:56 +0200 Subject: [PATCH 0638/1368] drm/radeon: fix UVD message buffer validation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 112a6d0c071808f6d48354fc8834a574e5dcefc0 upstream. When the message buffer is currently moving block until it is idle again. Signed-off-by: Christian König Signed-off-by: Alex Deucher Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/radeon/radeon_uvd.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/gpu/drm/radeon/radeon_uvd.c b/drivers/gpu/drm/radeon/radeon_uvd.c index 97002a08211f..f3ccf6d4addb 100644 --- a/drivers/gpu/drm/radeon/radeon_uvd.c +++ b/drivers/gpu/drm/radeon/radeon_uvd.c @@ -359,6 +359,14 @@ static int radeon_uvd_cs_msg(struct radeon_cs_parser *p, struct radeon_bo *bo, return -EINVAL; } + if (bo->tbo.sync_obj) { + r = radeon_fence_wait(bo->tbo.sync_obj, false); + if (r) { + DRM_ERROR("Failed waiting for UVD message (%d)!\n", r); + return r; + } + } + r = radeon_bo_kmap(bo, &ptr); if (r) return r; From 749e7bffd3f71d4fadc0fb54eebce12a28d045e7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rafa=C5=82=20Mi=C5=82ecki?= Date: Thu, 15 Aug 2013 18:55:22 +0200 Subject: [PATCH 0639/1368] drm/radeon: fix WREG32_OR macro setting bits in a register MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit d43a93c8d9bc4e0dc0293b6458c077c3c797594f upstream. This bug (introduced in 3.10) in WREG32_OR made commit d3418eacad403033e95e49dc14afa37c2112c134 "drm/radeon/evergreen: setup HDMI before enabling it" cause a regression. Sometimes audio over HDMI wasn't working, sometimes display was corrupted. This fixes: https://bugzilla.kernel.org/show_bug.cgi?id=60687 https://bugzilla.kernel.org/show_bug.cgi?id=60709 https://bugs.freedesktop.org/show_bug.cgi?id=67767 Signed-off-by: Rafał Miłecki Signed-off-by: Alex Deucher Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/radeon/radeon.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h index bdd9d56812af..d4ff48ce1d8b 100644 --- a/drivers/gpu/drm/radeon/radeon.h +++ b/drivers/gpu/drm/radeon/radeon.h @@ -1764,7 +1764,7 @@ void r100_io_wreg(struct radeon_device *rdev, u32 reg, u32 v); WREG32(reg, tmp_); \ } while (0) #define WREG32_AND(reg, and) WREG32_P(reg, 0, and) -#define WREG32_OR(reg, or) WREG32_P(reg, or, ~or) +#define WREG32_OR(reg, or) WREG32_P(reg, or, ~(or)) #define WREG32_PLL_P(reg, val, mask) \ do { \ uint32_t tmp_ = RREG32_PLL(reg); \ From ce04434c6e596b264f92f39d3228883c60262b69 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 6 Aug 2013 19:01:14 +0100 Subject: [PATCH 0640/1368] drm/i915: Invalidate TLBs for the rings after a reset commit 884020bf3d2a3787a1cc6df902e98e0eec60330b upstream. After any "soft gfx reset" we must manually invalidate the TLBs associated with each ring. Empirically, it seems that a suspend/resume or D3-D0 cycle count as a "soft reset". The symptom is that the hardware would fail to note the new address for its status page, and so it would continue to write the shadow registers and breadcrumbs into the old physical address (now used by something completely different, scary). Whereas the driver would read the new status page and never see any progress, it would appear that the GPU hung immediately upon resume. Based on a patch by naresh kumar kachhi Reported-by: Thiago Macieira Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=64725 Signed-off-by: Chris Wilson Tested-by: Thiago Macieira Signed-off-by: Daniel Vetter Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/i915/i915_reg.h | 2 ++ drivers/gpu/drm/i915/intel_ringbuffer.c | 12 ++++++++++++ 2 files changed, 14 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 80b0a6626a23..01f6c2cf471f 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -617,6 +617,8 @@ will not assert AGPBUSY# and will only be delivered when out of C3. */ #define INSTPM_FORCE_ORDERING (1<<7) /* GEN6+ */ +#define INSTPM_TLB_INVALIDATE (1<<9) +#define INSTPM_SYNC_FLUSH (1<<5) #define ACTHD 0x020c8 #define FW_BLC 0x020d8 #define FW_BLC2 0x020dc diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 1424f2042838..48fe23e8d180 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -907,6 +907,18 @@ void intel_ring_setup_status_page(struct intel_ring_buffer *ring) I915_WRITE(mmio, (u32)ring->status_page.gfx_addr); POSTING_READ(mmio); + + /* Flush the TLB for this page */ + if (INTEL_INFO(dev)->gen >= 6) { + u32 reg = RING_INSTPM(ring->mmio_base); + I915_WRITE(reg, + _MASKED_BIT_ENABLE(INSTPM_TLB_INVALIDATE | + INSTPM_SYNC_FLUSH)); + if (wait_for((I915_READ(reg) & INSTPM_SYNC_FLUSH) == 0, + 1000)) + DRM_ERROR("%s: wait for SyncFlush to complete for TLB invalidation timed out\n", + ring->name); + } } static int From abcdf87c25278b38972e41ca13fffc42b9d1f6f8 Mon Sep 17 00:00:00 2001 From: Wladislav Wiebe Date: Mon, 12 Aug 2013 13:06:53 +0200 Subject: [PATCH 0641/1368] of: fdt: fix memory initialization for expanded DT commit 9e40127526e857fa3f29d51e83277204fbdfc6ba upstream. Already existing property flags are filled wrong for properties created from initial FDT. This could cause problems if this DYNAMIC device-tree functions are used later, i.e. properties are attached/detached/replaced. Simply dumping flags from the running system show, that some initial static (not allocated via kzmalloc()) nodes are marked as dynamic. I putted some debug extensions to property_proc_show(..) : .. + if (OF_IS_DYNAMIC(pp)) + pr_err("DEBUG: xxx : OF_IS_DYNAMIC\n"); + if (OF_IS_DETACHED(pp)) + pr_err("DEBUG: xxx : OF_IS_DETACHED\n"); when you operate on the nodes (e.g.: ~$ cat /proc/device-tree/*some_node*) you will see that those flags are filled wrong, basically in most cases it will dump a DYNAMIC or DETACHED status, which is in not true. (BTW. this OF_IS_DETACHED is a own define for debug purposes which which just make a test_bit(OF_DETACHED, &x->_flags) If nodes are dynamic kernel is allowed to kfree() them. But it will crash attempting to do so on the nodes from FDT -- they are not allocated via kzmalloc(). Signed-off-by: Wladislav Wiebe Acked-by: Alexander Sverdlin Signed-off-by: Rob Herring Signed-off-by: Greg Kroah-Hartman --- drivers/of/fdt.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/of/fdt.c b/drivers/of/fdt.c index 808be06bb67e..118773751ea4 100644 --- a/drivers/of/fdt.c +++ b/drivers/of/fdt.c @@ -392,6 +392,8 @@ static void __unflatten_device_tree(struct boot_param_header *blob, mem = (unsigned long) dt_alloc(size + 4, __alignof__(struct device_node)); + memset((void *)mem, 0, size); + ((__be32 *)mem)[size / 4] = cpu_to_be32(0xdeadbeef); pr_debug(" unflattening %lx...\n", mem); From b0e01ab2f3f31384dd9e1c660e0c9831e717e73c Mon Sep 17 00:00:00 2001 From: Vyacheslav Dubeyko Date: Thu, 22 Aug 2013 16:35:44 -0700 Subject: [PATCH 0642/1368] nilfs2: remove double bio_put() in nilfs_end_bio_write() for BIO_EOPNOTSUPP error commit 2df37a19c686c2d7c4e9b4ce1505b5141e3e5552 upstream. Remove double call of bio_put() in nilfs_end_bio_write() for the case of BIO_EOPNOTSUPP error detection. The issue was found by Dan Carpenter and he suggests first version of the fix too. Signed-off-by: Vyacheslav Dubeyko Reported-by: Dan Carpenter Acked-by: Ryusuke Konishi Tested-by: Ryusuke Konishi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- fs/nilfs2/segbuf.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/fs/nilfs2/segbuf.c b/fs/nilfs2/segbuf.c index dc9a913784ab..5bacf46dc4b3 100644 --- a/fs/nilfs2/segbuf.c +++ b/fs/nilfs2/segbuf.c @@ -345,8 +345,7 @@ static void nilfs_end_bio_write(struct bio *bio, int err) if (err == -EOPNOTSUPP) { set_bit(BIO_EOPNOTSUPP, &bio->bi_flags); - bio_put(bio); - /* to be detected by submit_seg_bio() */ + /* to be detected by nilfs_segbuf_submit_bio() */ } if (!uptodate) From 020269d2c629a12856a5413528328fbd4ff71ca9 Mon Sep 17 00:00:00 2001 From: Vyacheslav Dubeyko Date: Thu, 22 Aug 2013 16:35:45 -0700 Subject: [PATCH 0643/1368] nilfs2: fix issue with counting number of bio requests for BIO_EOPNOTSUPP error detection commit 4bf93b50fd04118ac7f33a3c2b8a0a1f9fa80bc9 upstream. Fix the issue with improper counting number of flying bio requests for BIO_EOPNOTSUPP error detection case. The sb_nbio must be incremented exactly the same number of times as complete() function was called (or will be called) because nilfs_segbuf_wait() will call wail_for_completion() for the number of times set to sb_nbio: do { wait_for_completion(&segbuf->sb_bio_event); } while (--segbuf->sb_nbio > 0); Two functions complete() and wait_for_completion() must be called the same number of times for the same sb_bio_event. Otherwise, wait_for_completion() will hang or leak. Signed-off-by: Vyacheslav Dubeyko Cc: Dan Carpenter Acked-by: Ryusuke Konishi Tested-by: Ryusuke Konishi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- fs/nilfs2/segbuf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/nilfs2/segbuf.c b/fs/nilfs2/segbuf.c index 5bacf46dc4b3..2d8be51f90dc 100644 --- a/fs/nilfs2/segbuf.c +++ b/fs/nilfs2/segbuf.c @@ -376,12 +376,12 @@ static int nilfs_segbuf_submit_bio(struct nilfs_segment_buffer *segbuf, bio->bi_private = segbuf; bio_get(bio); submit_bio(mode, bio); + segbuf->sb_nbio++; if (bio_flagged(bio, BIO_EOPNOTSUPP)) { bio_put(bio); err = -EOPNOTSUPP; goto failed; } - segbuf->sb_nbio++; bio_put(bio); wi->bio = NULL; From 6797361e99b0103055b26c0a3d1079bf2402a3a0 Mon Sep 17 00:00:00 2001 From: Daniel Drake Date: Thu, 22 Aug 2013 16:35:43 -0700 Subject: [PATCH 0644/1368] drivers/platform/olpc/olpc-ec.c: initialise earlier commit 93dbc1b3b506e16c1f6d5b5dcfe756a85cb1dc58 upstream. Being a low-level component, various drivers (e.g. olpc-battery) assume that it is ok to communicate with the OLPC Embedded Controller during probe. Therefore the OLPC EC driver must be initialised before other drivers try to use it. This was the case until it was recently moved out of arch/x86 and restructured around commits ac2504151f5a ("Platform: OLPC: turn EC driver into a platform_driver") and 85f90cf6ca56 ("x86: OLPC: switch over to using new EC driver on x86"). Use arch_initcall so that olpc-ec is readied earlier, matching the previous behaviour. Fixes a regression introduced in Linux-3.6 where various drivers such as olpc-battery and olpc-xo1-sci failed to load due to an inability to communicate with the EC. The user-visible effect was a lack of battery monitoring, missing ebook/lid switch input devices, etc. Signed-off-by: Daniel Drake Cc: Andres Salomon Cc: Paul Fox Cc: Thomas Gleixner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- drivers/platform/olpc/olpc-ec.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/platform/olpc/olpc-ec.c b/drivers/platform/olpc/olpc-ec.c index 0f9f8596b300..f9119525f557 100644 --- a/drivers/platform/olpc/olpc-ec.c +++ b/drivers/platform/olpc/olpc-ec.c @@ -330,7 +330,7 @@ static int __init olpc_ec_init_module(void) return platform_driver_register(&olpc_ec_plat_driver); } -module_init(olpc_ec_init_module); +arch_initcall(olpc_ec_init_module); MODULE_AUTHOR("Andres Salomon "); MODULE_LICENSE("GPL"); From 2356788f31dee0b3c03b3ef594fd113a861b29c1 Mon Sep 17 00:00:00 2001 From: Anatolij Gustschin Date: Wed, 21 Aug 2013 17:43:31 +0200 Subject: [PATCH 0645/1368] usb: phy: fix build breakage commit 52d5b9aba1f5790ca3231c262979c2c3e26dd99b upstream. Commit 94ae9843 (usb: phy: rename all phy drivers to phy-$name-usb.c) renamed drivers/usb/phy/otg_fsm.h to drivers/usb/phy/phy-fsm-usb.h but changed drivers/usb/phy/phy-fsm-usb.c to include not existing "phy-otg-fsm.h" instead of new "phy-fsm-usb.h". This breaks building: ... drivers/usb/phy/phy-fsm-usb.c:32:25: fatal error: phy-otg-fsm.h: No such file or directory compilation terminated. make[3]: *** [drivers/usb/phy/phy-fsm-usb.o] Error 1 This commit also missed to modify drivers/usb/phy/phy-fsl-usb.h to include new "phy-fsm-usb.h" instead of "otg_fsm.h" resulting in another build breakage: ... In file included from drivers/usb/phy/phy-fsl-usb.c:46:0: drivers/usb/phy/phy-fsl-usb.h:18:21: fatal error: otg_fsm.h: No such file or directory compilation terminated. make[3]: *** [drivers/usb/phy/phy-fsl-usb.o] Error 1 Fix both issues. Signed-off-by: Anatolij Gustschin Signed-off-by: Greg Kroah-Hartman --- drivers/usb/phy/phy-fsl-usb.h | 2 +- drivers/usb/phy/phy-fsm-usb.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/usb/phy/phy-fsl-usb.h b/drivers/usb/phy/phy-fsl-usb.h index ca266280895d..e1859b8ef567 100644 --- a/drivers/usb/phy/phy-fsl-usb.h +++ b/drivers/usb/phy/phy-fsl-usb.h @@ -15,7 +15,7 @@ * 675 Mass Ave, Cambridge, MA 02139, USA. */ -#include "otg_fsm.h" +#include "phy-fsm-usb.h" #include #include diff --git a/drivers/usb/phy/phy-fsm-usb.c b/drivers/usb/phy/phy-fsm-usb.c index c520b3548e7c..7f4596606e18 100644 --- a/drivers/usb/phy/phy-fsm-usb.c +++ b/drivers/usb/phy/phy-fsm-usb.c @@ -29,7 +29,7 @@ #include #include -#include "phy-otg-fsm.h" +#include "phy-fsm-usb.h" /* Change USB protocol when there is a protocol change */ static int otg_set_protocol(struct otg_fsm *fsm, int protocol) From 2650a684ca1c26831ecc552c269ffd7a8def3694 Mon Sep 17 00:00:00 2001 From: Anthony Foiani Date: Mon, 19 Aug 2013 19:20:30 -0600 Subject: [PATCH 0646/1368] sata_fsl: save irqs while coalescing commit 99bbdfa6bdcb4bdf5be914a48e9b46941bf30819 upstream. Before this patch, I was seeing the following lockdep splat on my MPC8315 (PPC32) target: [ 9.086051] ================================= [ 9.090393] [ INFO: inconsistent lock state ] [ 9.094744] 3.9.7-ajf-gc39503d #1 Not tainted [ 9.099087] --------------------------------- [ 9.103432] inconsistent {HARDIRQ-ON-W} -> {IN-HARDIRQ-W} usage. [ 9.109431] scsi_eh_1/39 [HC1[1]:SC0[0]:HE0:SE1] takes: [ 9.114642] (&(&host->lock)->rlock){?.+...}, at: [] sata_fsl_interrupt+0x50/0x250 [ 9.123137] {HARDIRQ-ON-W} state was registered at: [ 9.128004] [] lock_acquire+0x90/0xf4 [ 9.132737] [] _raw_spin_lock+0x34/0x4c [ 9.137645] [] fsl_sata_set_irq_coalescing+0x68/0x100 [ 9.143750] [] sata_fsl_init_controller+0xa8/0xc0 [ 9.149505] [] sata_fsl_probe+0x17c/0x2e8 [ 9.154568] [] driver_probe_device+0x90/0x248 [ 9.159987] [] __driver_attach+0xc4/0xc8 [ 9.164964] [] bus_for_each_dev+0x5c/0xa8 [ 9.170028] [] bus_add_driver+0x100/0x26c [ 9.175091] [] driver_register+0x88/0x198 [ 9.180155] [] do_one_initcall+0x58/0x1b4 [ 9.185226] [] kernel_init_freeable+0x118/0x1c0 [ 9.190823] [] kernel_init+0x18/0x108 [ 9.195542] [] ret_from_kernel_thread+0x64/0x6c [ 9.201142] irq event stamp: 160 [ 9.204366] hardirqs last enabled at (159): [] _raw_spin_unlock_irq+0x30/0x50 [ 9.212469] hardirqs last disabled at (160): [] reenable_mmu+0x30/0x88 [ 9.219867] softirqs last enabled at (144): [] __do_softirq+0x168/0x218 [ 9.227435] softirqs last disabled at (137): [] irq_exit+0xa8/0xb4 [ 9.234481] [ 9.234481] other info that might help us debug this: [ 9.240995] Possible unsafe locking scenario: [ 9.240995] [ 9.246898] CPU0 [ 9.249337] ---- [ 9.251776] lock(&(&host->lock)->rlock); [ 9.255878] [ 9.258492] lock(&(&host->lock)->rlock); [ 9.262765] [ 9.262765] *** DEADLOCK *** [ 9.262765] [ 9.268684] no locks held by scsi_eh_1/39. [ 9.272767] [ 9.272767] stack backtrace: [ 9.277117] Call Trace: [ 9.279589] [cfff9da0] [c0008504] show_stack+0x48/0x150 (unreliable) [ 9.285972] [cfff9de0] [c0447d5c] print_usage_bug.part.35+0x268/0x27c [ 9.292425] [cfff9e10] [c006ace4] mark_lock+0x2ac/0x658 [ 9.297660] [cfff9e40] [c006b7e4] __lock_acquire+0x754/0x1840 [ 9.303414] [cfff9ee0] [c006cdb8] lock_acquire+0x90/0xf4 [ 9.308745] [cfff9f20] [c043ef04] _raw_spin_lock+0x34/0x4c [ 9.314250] [cfff9f30] [c02f4168] sata_fsl_interrupt+0x50/0x250 [ 9.320187] [cfff9f70] [c0079ff0] handle_irq_event_percpu+0x90/0x254 [ 9.326547] [cfff9fc0] [c007a1fc] handle_irq_event+0x48/0x78 [ 9.332220] [cfff9fe0] [c007c95c] handle_level_irq+0x9c/0x104 [ 9.337981] [cfff9ff0] [c000d978] call_handle_irq+0x18/0x28 [ 9.343568] [cc7139f0] [c000608c] do_IRQ+0xf0/0x1a8 [ 9.348464] [cc713a20] [c000fc8c] ret_from_except+0x0/0x14 [ 9.353983] --- Exception: 501 at _raw_spin_unlock_irq+0x40/0x50 [ 9.353983] LR = _raw_spin_unlock_irq+0x30/0x50 [ 9.364839] [cc713af0] [c043db10] wait_for_common+0xac/0x188 [ 9.370513] [cc713b30] [c02ddee4] ata_exec_internal_sg+0x2b0/0x4f0 [ 9.376699] [cc713be0] [c02de18c] ata_exec_internal+0x68/0xa8 [ 9.382454] [cc713c20] [c02de4b8] ata_dev_read_id+0x158/0x594 [ 9.388205] [cc713ca0] [c02ec244] ata_eh_recover+0xd88/0x13d0 [ 9.393962] [cc713d20] [c02f2520] sata_pmp_error_handler+0xc0/0x8ac [ 9.400234] [cc713dd0] [c02ecdc8] ata_scsi_port_error_handler+0x464/0x5e8 [ 9.407023] [cc713e10] [c02ecfd0] ata_scsi_error+0x84/0xb8 [ 9.412528] [cc713e40] [c02c4974] scsi_error_handler+0xd8/0x47c [ 9.418457] [cc713eb0] [c004737c] kthread+0xa8/0xac [ 9.423355] [cc713f40] [c000f6b8] ret_from_kernel_thread+0x64/0x6c This fix was suggested by Bhushan Bharat , and was discussed in email at: http://linuxppc.10917.n7.nabble.com/MPC8315-reboot-failure-lockdep-splat-possibly-related-tp75162.html Same patch successfully tested with 3.9.7. linux-next compiled but not tested on hardware. This patch is based off linux-next tag next-20130819 (which is commit 66a01bae29d11916c09f9f5a937cafe7d402e4a5 ) Signed-off-by: Anthony Foiani Signed-off-by: Tejun Heo Signed-off-by: Greg Kroah-Hartman --- drivers/ata/sata_fsl.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/ata/sata_fsl.c b/drivers/ata/sata_fsl.c index d40e403e82dd..8401061b4040 100644 --- a/drivers/ata/sata_fsl.c +++ b/drivers/ata/sata_fsl.c @@ -293,6 +293,7 @@ static void fsl_sata_set_irq_coalescing(struct ata_host *host, { struct sata_fsl_host_priv *host_priv = host->private_data; void __iomem *hcr_base = host_priv->hcr_base; + unsigned long flags; if (count > ICC_MAX_INT_COUNT_THRESHOLD) count = ICC_MAX_INT_COUNT_THRESHOLD; @@ -305,12 +306,12 @@ static void fsl_sata_set_irq_coalescing(struct ata_host *host, (count > ICC_MIN_INT_COUNT_THRESHOLD)) ticks = ICC_SAFE_INT_TICKS; - spin_lock(&host->lock); + spin_lock_irqsave(&host->lock, flags); iowrite32((count << 24 | ticks), hcr_base + ICC); intr_coalescing_count = count; intr_coalescing_ticks = ticks; - spin_unlock(&host->lock); + spin_unlock_irqrestore(&host->lock, flags); DPRINTK("interrupt coalescing, count = 0x%x, ticks = %x\n", intr_coalescing_count, intr_coalescing_ticks); From 9e50b0dd45dd3a26edf83b3f78117a6765fbe8a2 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 9 Aug 2013 12:52:31 +0300 Subject: [PATCH 0647/1368] Hostap: copying wrong data prism2_ioctl_giwaplist() commit 909bd5926d474e275599094acad986af79671ac9 upstream. We want the data stored in "addr" and "qual", but the extra ampersands mean we are copying stack data instead. Signed-off-by: Dan Carpenter Signed-off-by: John W. Linville Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/hostap/hostap_ioctl.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/hostap/hostap_ioctl.c b/drivers/net/wireless/hostap/hostap_ioctl.c index ac074731335a..e5090309824e 100644 --- a/drivers/net/wireless/hostap/hostap_ioctl.c +++ b/drivers/net/wireless/hostap/hostap_ioctl.c @@ -523,9 +523,9 @@ static int prism2_ioctl_giwaplist(struct net_device *dev, data->length = prism2_ap_get_sta_qual(local, addr, qual, IW_MAX_AP, 1); - memcpy(extra, &addr, sizeof(struct sockaddr) * data->length); + memcpy(extra, addr, sizeof(struct sockaddr) * data->length); data->flags = 1; /* has quality information */ - memcpy(extra + sizeof(struct sockaddr) * data->length, &qual, + memcpy(extra + sizeof(struct sockaddr) * data->length, qual, sizeof(struct iw_quality) * data->length); kfree(addr); From 323d5a7b5762c2fac8d1a5bb6d24d6d4e0efbfb3 Mon Sep 17 00:00:00 2001 From: Terry Suereth Date: Sat, 17 Aug 2013 15:53:12 -0400 Subject: [PATCH 0648/1368] libata: apply behavioral quirks to sil3826 PMP commit 8ffff94d20b7eb446e848e0046107d51b17a20a8 upstream. Fixing support for the Silicon Image 3826 port multiplier, by applying to it the same quirks applied to the Silicon Image 3726. Specifically fixes the repeated timeout/reset process which previously afflicted the 3726, as described from line 290. Slightly based on notes from: https://bugzilla.redhat.com/show_bug.cgi?id=890237 Signed-off-by: Terry Suereth Signed-off-by: Tejun Heo Signed-off-by: Greg Kroah-Hartman --- drivers/ata/libata-pmp.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/ata/libata-pmp.c b/drivers/ata/libata-pmp.c index 1c41722bb7e2..20fd337a5731 100644 --- a/drivers/ata/libata-pmp.c +++ b/drivers/ata/libata-pmp.c @@ -289,24 +289,24 @@ static int sata_pmp_configure(struct ata_device *dev, int print_info) /* Disable sending Early R_OK. * With "cached read" HDD testing and multiple ports busy on a SATA - * host controller, 3726 PMP will very rarely drop a deferred + * host controller, 3x26 PMP will very rarely drop a deferred * R_OK that was intended for the host. Symptom will be all * 5 drives under test will timeout, get reset, and recover. */ - if (vendor == 0x1095 && devid == 0x3726) { + if (vendor == 0x1095 && (devid == 0x3726 || devid == 0x3826)) { u32 reg; err_mask = sata_pmp_read(&ap->link, PMP_GSCR_SII_POL, ®); if (err_mask) { rc = -EIO; - reason = "failed to read Sil3726 Private Register"; + reason = "failed to read Sil3x26 Private Register"; goto fail; } reg &= ~0x1; err_mask = sata_pmp_write(&ap->link, PMP_GSCR_SII_POL, reg); if (err_mask) { rc = -EIO; - reason = "failed to write Sil3726 Private Register"; + reason = "failed to write Sil3x26 Private Register"; goto fail; } } @@ -383,8 +383,8 @@ static void sata_pmp_quirks(struct ata_port *ap) u16 devid = sata_pmp_gscr_devid(gscr); struct ata_link *link; - if (vendor == 0x1095 && devid == 0x3726) { - /* sil3726 quirks */ + if (vendor == 0x1095 && (devid == 0x3726 || devid == 0x3826)) { + /* sil3x26 quirks */ ata_for_each_link(link, ap, EDGE) { /* link reports offline after LPM */ link->flags |= ATA_LFLAG_NO_LPM; From 753fb619d10ed87c9ab5b75e83dd464244986f82 Mon Sep 17 00:00:00 2001 From: Stanislaw Gruszka Date: Fri, 26 Jul 2013 15:29:09 +0200 Subject: [PATCH 0649/1368] iwlwifi: dvm: fix calling ieee80211_chswitch_done() with NULL commit 9186a1fd9ed190739423db84bc344d258ef3e3d7 upstream. If channel switch is pending and we remove interface we can crash like showed below due to passing NULL vif to mac80211: BUG: unable to handle kernel paging request at fffffffffffff8cc IP: [] strnlen+0xd/0x40 Call Trace: [] string.isra.3+0x3e/0xd0 [] vsnprintf+0x219/0x640 [] vscnprintf+0x11/0x30 [] vprintk_emit+0x115/0x4f0 [] printk+0x61/0x63 [] ieee80211_chswitch_done+0xaf/0xd0 [mac80211] [] iwl_chswitch_done+0x34/0x40 [iwldvm] [] iwlagn_commit_rxon+0x2a3/0xdc0 [iwldvm] [] ? iwlagn_set_rxon_chain+0x180/0x2c0 [iwldvm] [] iwl_set_mode+0x36/0x40 [iwldvm] [] iwlagn_mac_remove_interface+0x8d/0x1b0 [iwldvm] [] ieee80211_do_stop+0x29d/0x7f0 [mac80211] This is because we nulify ctx->vif in iwlagn_mac_remove_interface() before calling some other functions that teardown interface. To fix just check ctx->vif on iwl_chswitch_done(). We should not call ieee80211_chswitch_done() as channel switch works were already canceled by mac80211 in ieee80211_do_stop() -> ieee80211_mgd_stop(). Resolve: https://bugzilla.redhat.com/show_bug.cgi?id=979581 Reported-by: Lukasz Jagiello Signed-off-by: Stanislaw Gruszka Reviewed-by: Emmanuel Grumbach Signed-off-by: Johannes Berg Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/iwlwifi/dvm/mac80211.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/iwlwifi/dvm/mac80211.c b/drivers/net/wireless/iwlwifi/dvm/mac80211.c index cab23af0be9e..e04f3da1ccb3 100644 --- a/drivers/net/wireless/iwlwifi/dvm/mac80211.c +++ b/drivers/net/wireless/iwlwifi/dvm/mac80211.c @@ -1059,7 +1059,10 @@ void iwl_chswitch_done(struct iwl_priv *priv, bool is_success) if (test_bit(STATUS_EXIT_PENDING, &priv->status)) return; - if (test_and_clear_bit(STATUS_CHANNEL_SWITCH_PENDING, &priv->status)) + if (!test_and_clear_bit(STATUS_CHANNEL_SWITCH_PENDING, &priv->status)) + return; + + if (ctx->vif) ieee80211_chswitch_done(ctx->vif, is_success); } From 67bdd3c0dc4dc00fc1e708fc9f326304d88cc8be Mon Sep 17 00:00:00 2001 From: Emmanuel Grumbach Date: Mon, 29 Jul 2013 23:05:18 +0300 Subject: [PATCH 0650/1368] iwlwifi: pcie: disable L1 Active after pci_enable_device commit eabc4ac5d7606a57ee2b7308cb7323ea8f60183b upstream. As Arjan pointed out, we mustn't do anything related to PCI configuration until the device is properly enabled with pci_enable_device(). Reported-by: Arjan van de Ven Signed-off-by: Emmanuel Grumbach Signed-off-by: Johannes Berg Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/iwlwifi/pcie/trans.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/net/wireless/iwlwifi/pcie/trans.c b/drivers/net/wireless/iwlwifi/pcie/trans.c index 50ba0a468f94..aeb70e13137a 100644 --- a/drivers/net/wireless/iwlwifi/pcie/trans.c +++ b/drivers/net/wireless/iwlwifi/pcie/trans.c @@ -1481,16 +1481,16 @@ struct iwl_trans *iwl_trans_pcie_alloc(struct pci_dev *pdev, spin_lock_init(&trans_pcie->reg_lock); init_waitqueue_head(&trans_pcie->ucode_write_waitq); - /* W/A - seems to solve weird behavior. We need to remove this if we - * don't want to stay in L1 all the time. This wastes a lot of power */ - pci_disable_link_state(pdev, PCIE_LINK_STATE_L0S | PCIE_LINK_STATE_L1 | - PCIE_LINK_STATE_CLKPM); - if (pci_enable_device(pdev)) { err = -ENODEV; goto out_no_pci; } + /* W/A - seems to solve weird behavior. We need to remove this if we + * don't want to stay in L1 all the time. This wastes a lot of power */ + pci_disable_link_state(pdev, PCIE_LINK_STATE_L0S | PCIE_LINK_STATE_L1 | + PCIE_LINK_STATE_CLKPM); + pci_set_master(pdev); err = pci_set_dma_mask(pdev, DMA_BIT_MASK(36)); From bda5d1efa09527e443a6990f81459779a313e24d Mon Sep 17 00:00:00 2001 From: Martin Peschke Date: Thu, 22 Aug 2013 17:45:36 +0200 Subject: [PATCH 0651/1368] SCSI: zfcp: fix lock imbalance by reworking request queue locking commit d79ff142624e1be080ad8d09101f7004d79c36e1 upstream. This patch adds wait_event_interruptible_lock_irq_timeout(), which is a straight-forward descendant of wait_event_interruptible_timeout() and wait_event_interruptible_lock_irq(). The zfcp driver used to call wait_event_interruptible_timeout() in combination with some intricate and error-prone locking. Using wait_event_interruptible_lock_irq_timeout() as a replacement nicely cleans up that locking. This rework removes a situation that resulted in a locking imbalance in zfcp_qdio_sbal_get(): BUG: workqueue leaked lock or atomic: events/1/0xffffff00/10 last function: zfcp_fc_wka_port_offline+0x0/0xa0 [zfcp] It was introduced by commit c2af7545aaff3495d9bf9a7608c52f0af86fb194 "[SCSI] zfcp: Do not wait for SBALs on stopped queue", which had a new code path related to ZFCP_STATUS_ADAPTER_QDIOUP that took an early exit without a required lock being held. The problem occured when a special, non-SCSI I/O request was being submitted in process context, when the adapter's queues had been torn down. In this case the bug surfaced when the Fibre Channel port connection for a well-known address was closed during a concurrent adapter shut-down procedure, which is a rare constellation. This patch also fixes these warnings from the sparse tool (make C=1): drivers/s390/scsi/zfcp_qdio.c:224:12: warning: context imbalance in 'zfcp_qdio_sbal_check' - wrong count at exit drivers/s390/scsi/zfcp_qdio.c:244:5: warning: context imbalance in 'zfcp_qdio_sbal_get' - unexpected unlock Last but not least, we get rid of that crappy lock-unlock-lock sequence at the beginning of the critical section. It is okay to call zfcp_erp_adapter_reopen() with req_q_lock held. Reported-by: Mikulas Patocka Reported-by: Heiko Carstens Signed-off-by: Martin Peschke Signed-off-by: Steffen Maier Signed-off-by: James Bottomley Signed-off-by: Greg Kroah-Hartman --- drivers/s390/scsi/zfcp_qdio.c | 8 ++--- include/linux/wait.h | 57 +++++++++++++++++++++++++++++++++++ 2 files changed, 59 insertions(+), 6 deletions(-) diff --git a/drivers/s390/scsi/zfcp_qdio.c b/drivers/s390/scsi/zfcp_qdio.c index 665e3cfaaf85..de0598eaacd2 100644 --- a/drivers/s390/scsi/zfcp_qdio.c +++ b/drivers/s390/scsi/zfcp_qdio.c @@ -224,11 +224,9 @@ int zfcp_qdio_sbals_from_sg(struct zfcp_qdio *qdio, struct zfcp_qdio_req *q_req, static int zfcp_qdio_sbal_check(struct zfcp_qdio *qdio) { - spin_lock_irq(&qdio->req_q_lock); if (atomic_read(&qdio->req_q_free) || !(atomic_read(&qdio->adapter->status) & ZFCP_STATUS_ADAPTER_QDIOUP)) return 1; - spin_unlock_irq(&qdio->req_q_lock); return 0; } @@ -246,9 +244,8 @@ int zfcp_qdio_sbal_get(struct zfcp_qdio *qdio) { long ret; - spin_unlock_irq(&qdio->req_q_lock); - ret = wait_event_interruptible_timeout(qdio->req_q_wq, - zfcp_qdio_sbal_check(qdio), 5 * HZ); + ret = wait_event_interruptible_lock_irq_timeout(qdio->req_q_wq, + zfcp_qdio_sbal_check(qdio), qdio->req_q_lock, 5 * HZ); if (!(atomic_read(&qdio->adapter->status) & ZFCP_STATUS_ADAPTER_QDIOUP)) return -EIO; @@ -262,7 +259,6 @@ int zfcp_qdio_sbal_get(struct zfcp_qdio *qdio) zfcp_erp_adapter_reopen(qdio->adapter, 0, "qdsbg_1"); } - spin_lock_irq(&qdio->req_q_lock); return -EIO; } diff --git a/include/linux/wait.h b/include/linux/wait.h index 1133695eb067..c8e576022234 100644 --- a/include/linux/wait.h +++ b/include/linux/wait.h @@ -805,6 +805,63 @@ do { \ __ret; \ }) +#define __wait_event_interruptible_lock_irq_timeout(wq, condition, \ + lock, ret) \ +do { \ + DEFINE_WAIT(__wait); \ + \ + for (;;) { \ + prepare_to_wait(&wq, &__wait, TASK_INTERRUPTIBLE); \ + if (condition) \ + break; \ + if (signal_pending(current)) { \ + ret = -ERESTARTSYS; \ + break; \ + } \ + spin_unlock_irq(&lock); \ + ret = schedule_timeout(ret); \ + spin_lock_irq(&lock); \ + if (!ret) \ + break; \ + } \ + finish_wait(&wq, &__wait); \ +} while (0) + +/** + * wait_event_interruptible_lock_irq_timeout - sleep until a condition gets true or a timeout elapses. + * The condition is checked under the lock. This is expected + * to be called with the lock taken. + * @wq: the waitqueue to wait on + * @condition: a C expression for the event to wait for + * @lock: a locked spinlock_t, which will be released before schedule() + * and reacquired afterwards. + * @timeout: timeout, in jiffies + * + * The process is put to sleep (TASK_INTERRUPTIBLE) until the + * @condition evaluates to true or signal is received. The @condition is + * checked each time the waitqueue @wq is woken up. + * + * wake_up() has to be called after changing any variable that could + * change the result of the wait condition. + * + * This is supposed to be called while holding the lock. The lock is + * dropped before going to sleep and is reacquired afterwards. + * + * The function returns 0 if the @timeout elapsed, -ERESTARTSYS if it + * was interrupted by a signal, and the remaining jiffies otherwise + * if the condition evaluated to true before the timeout elapsed. + */ +#define wait_event_interruptible_lock_irq_timeout(wq, condition, lock, \ + timeout) \ +({ \ + int __ret = timeout; \ + \ + if (!(condition)) \ + __wait_event_interruptible_lock_irq_timeout( \ + wq, condition, lock, __ret); \ + __ret; \ +}) + /* * These are the old interfaces to sleep waiting for an event. From e1a289ee6734dd5f9a81a260f3027ad8010f530a Mon Sep 17 00:00:00 2001 From: Martin Peschke Date: Thu, 22 Aug 2013 17:45:37 +0200 Subject: [PATCH 0652/1368] SCSI: zfcp: fix schedule-inside-lock in scsi_device list loops commit 924dd584b198a58aa7cb3efefd8a03326550ce8f upstream. BUG: sleeping function called from invalid context at kernel/workqueue.c:2752 in_atomic(): 1, irqs_disabled(): 1, pid: 360, name: zfcperp0.0.1700 CPU: 1 Not tainted 3.9.3+ #69 Process zfcperp0.0.1700 (pid: 360, task: 0000000075b7e080, ksp: 000000007476bc30) Call Trace: ([<00000000001165de>] show_trace+0x106/0x154) [<00000000001166a0>] show_stack+0x74/0xf4 [<00000000006ff646>] dump_stack+0xc6/0xd4 [<000000000017f3a0>] __might_sleep+0x128/0x148 [<000000000015ece8>] flush_work+0x54/0x1f8 [<00000000001630de>] __cancel_work_timer+0xc6/0x128 [<00000000005067ac>] scsi_device_dev_release_usercontext+0x164/0x23c [<0000000000161816>] execute_in_process_context+0x96/0xa8 [<00000000004d33d8>] device_release+0x60/0xc0 [<000000000048af48>] kobject_release+0xa8/0x1c4 [<00000000004f4bf2>] __scsi_iterate_devices+0xfa/0x130 [<000003ff801b307a>] zfcp_erp_strategy+0x4da/0x1014 [zfcp] [<000003ff801b3caa>] zfcp_erp_thread+0xf6/0x2b0 [zfcp] [<000000000016b75a>] kthread+0xf2/0xfc [<000000000070c9de>] kernel_thread_starter+0x6/0xc [<000000000070c9d8>] kernel_thread_starter+0x0/0xc Apparently, the ref_count for some scsi_device drops down to zero, triggering device removal through execute_in_process_context(), while the lldd error recovery thread iterates through a scsi device list. Unfortunately, execute_in_process_context() decides to immediately execute that device removal function, instead of scheduling asynchronous execution, since it detects process context and thinks it is safe to do so. But almost all calls to shost_for_each_device() in our lldd are inside spin_lock_irq, even in thread context. Obviously, schedule() inside spin_lock_irq sections is a bad idea. Change the lldd to use the proper iterator function, __shost_for_each_device(), in combination with required locking. Occurences that need to be changed include all calls in zfcp_erp.c, since those might be executed in zfcp error recovery thread context with a lock held. Other occurences of shost_for_each_device() in zfcp_fsf.c do not need to be changed (no process context, no surrounding locking). The problem was introduced in Linux 2.6.37 by commit b62a8d9b45b971a67a0f8413338c230e3117dff5 "[SCSI] zfcp: Use SCSI device data zfcp_scsi_dev instead of zfcp_unit". Reported-by: Christian Borntraeger Signed-off-by: Martin Peschke Signed-off-by: Steffen Maier Signed-off-by: James Bottomley Signed-off-by: Greg Kroah-Hartman --- drivers/s390/scsi/zfcp_erp.c | 29 ++++++++++++++++++++++------- 1 file changed, 22 insertions(+), 7 deletions(-) diff --git a/drivers/s390/scsi/zfcp_erp.c b/drivers/s390/scsi/zfcp_erp.c index 4133ab6e20f1..8e8f3533d2a1 100644 --- a/drivers/s390/scsi/zfcp_erp.c +++ b/drivers/s390/scsi/zfcp_erp.c @@ -102,10 +102,13 @@ static void zfcp_erp_action_dismiss_port(struct zfcp_port *port) if (atomic_read(&port->status) & ZFCP_STATUS_COMMON_ERP_INUSE) zfcp_erp_action_dismiss(&port->erp_action); - else - shost_for_each_device(sdev, port->adapter->scsi_host) + else { + spin_lock(port->adapter->scsi_host->host_lock); + __shost_for_each_device(sdev, port->adapter->scsi_host) if (sdev_to_zfcp(sdev)->port == port) zfcp_erp_action_dismiss_lun(sdev); + spin_unlock(port->adapter->scsi_host->host_lock); + } } static void zfcp_erp_action_dismiss_adapter(struct zfcp_adapter *adapter) @@ -592,9 +595,11 @@ static void _zfcp_erp_lun_reopen_all(struct zfcp_port *port, int clear, { struct scsi_device *sdev; - shost_for_each_device(sdev, port->adapter->scsi_host) + spin_lock(port->adapter->scsi_host->host_lock); + __shost_for_each_device(sdev, port->adapter->scsi_host) if (sdev_to_zfcp(sdev)->port == port) _zfcp_erp_lun_reopen(sdev, clear, id, 0); + spin_unlock(port->adapter->scsi_host->host_lock); } static void zfcp_erp_strategy_followup_failed(struct zfcp_erp_action *act) @@ -1435,8 +1440,10 @@ void zfcp_erp_set_adapter_status(struct zfcp_adapter *adapter, u32 mask) atomic_set_mask(common_mask, &port->status); read_unlock_irqrestore(&adapter->port_list_lock, flags); - shost_for_each_device(sdev, adapter->scsi_host) + spin_lock_irqsave(adapter->scsi_host->host_lock, flags); + __shost_for_each_device(sdev, adapter->scsi_host) atomic_set_mask(common_mask, &sdev_to_zfcp(sdev)->status); + spin_unlock_irqrestore(adapter->scsi_host->host_lock, flags); } /** @@ -1470,11 +1477,13 @@ void zfcp_erp_clear_adapter_status(struct zfcp_adapter *adapter, u32 mask) } read_unlock_irqrestore(&adapter->port_list_lock, flags); - shost_for_each_device(sdev, adapter->scsi_host) { + spin_lock_irqsave(adapter->scsi_host->host_lock, flags); + __shost_for_each_device(sdev, adapter->scsi_host) { atomic_clear_mask(common_mask, &sdev_to_zfcp(sdev)->status); if (clear_counter) atomic_set(&sdev_to_zfcp(sdev)->erp_counter, 0); } + spin_unlock_irqrestore(adapter->scsi_host->host_lock, flags); } /** @@ -1488,16 +1497,19 @@ void zfcp_erp_set_port_status(struct zfcp_port *port, u32 mask) { struct scsi_device *sdev; u32 common_mask = mask & ZFCP_COMMON_FLAGS; + unsigned long flags; atomic_set_mask(mask, &port->status); if (!common_mask) return; - shost_for_each_device(sdev, port->adapter->scsi_host) + spin_lock_irqsave(port->adapter->scsi_host->host_lock, flags); + __shost_for_each_device(sdev, port->adapter->scsi_host) if (sdev_to_zfcp(sdev)->port == port) atomic_set_mask(common_mask, &sdev_to_zfcp(sdev)->status); + spin_unlock_irqrestore(port->adapter->scsi_host->host_lock, flags); } /** @@ -1512,6 +1524,7 @@ void zfcp_erp_clear_port_status(struct zfcp_port *port, u32 mask) struct scsi_device *sdev; u32 common_mask = mask & ZFCP_COMMON_FLAGS; u32 clear_counter = mask & ZFCP_STATUS_COMMON_ERP_FAILED; + unsigned long flags; atomic_clear_mask(mask, &port->status); @@ -1521,13 +1534,15 @@ void zfcp_erp_clear_port_status(struct zfcp_port *port, u32 mask) if (clear_counter) atomic_set(&port->erp_counter, 0); - shost_for_each_device(sdev, port->adapter->scsi_host) + spin_lock_irqsave(port->adapter->scsi_host->host_lock, flags); + __shost_for_each_device(sdev, port->adapter->scsi_host) if (sdev_to_zfcp(sdev)->port == port) { atomic_clear_mask(common_mask, &sdev_to_zfcp(sdev)->status); if (clear_counter) atomic_set(&sdev_to_zfcp(sdev)->erp_counter, 0); } + spin_unlock_irqrestore(port->adapter->scsi_host->host_lock, flags); } /** From a271397a97869112f0b77ef7e162f64cf3c55113 Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Thu, 8 Aug 2013 17:47:34 +1000 Subject: [PATCH 0653/1368] SCSI: lpfc: Don't force CONFIG_GENERIC_CSUM on commit f5944daa0a72316077435c18a6571e73ed338332 upstream. We want ppc64 to be able to select between optimised assembly checksum routines in big endian and the generic lib/checksum.c routines in little endian. The lpfc driver is forcing CONFIG_GENERIC_CSUM on which means we are unable to make the decision to enable it in the arch Kconfig. If the option exists it is always forced on. This got introduced in 3.10 via commit 6a7252fdb0c3 ([SCSI] lpfc: fix up Kconfig dependencies). I spoke to Randy about it and the original issue was with CRC_T10DIF not being defined. As such, remove the select of CONFIG_GENERIC_CSUM. Signed-off-by: Anton Blanchard Signed-off-by: James Bottomley Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/Kconfig | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/scsi/Kconfig b/drivers/scsi/Kconfig index 86af29f53bbe..1348fa47d127 100644 --- a/drivers/scsi/Kconfig +++ b/drivers/scsi/Kconfig @@ -1353,7 +1353,6 @@ config SCSI_LPFC tristate "Emulex LightPulse Fibre Channel Support" depends on PCI && SCSI select SCSI_FC_ATTRS - select GENERIC_CSUM select CRC_T10DIF help This lpfc driver supports the Emulex LightPulse From 32b8d5f874e1c6ca88ec4f2d10cd885b3d70cf17 Mon Sep 17 00:00:00 2001 From: Roland Dreier Date: Mon, 5 Aug 2013 17:55:01 -0700 Subject: [PATCH 0654/1368] SCSI: sg: Fix user memory corruption when SG_IO is interrupted by a signal commit 35dc248383bbab0a7203fca4d722875bc81ef091 upstream. There is a nasty bug in the SCSI SG_IO ioctl that in some circumstances leads to one process writing data into the address space of some other random unrelated process if the ioctl is interrupted by a signal. What happens is the following: - A process issues an SG_IO ioctl with direction DXFER_FROM_DEV (ie the underlying SCSI command will transfer data from the SCSI device to the buffer provided in the ioctl) - Before the command finishes, a signal is sent to the process waiting in the ioctl. This will end up waking up the sg_ioctl() code: result = wait_event_interruptible(sfp->read_wait, (srp_done(sfp, srp) || sdp->detached)); but neither srp_done() nor sdp->detached is true, so we end up just setting srp->orphan and returning to userspace: srp->orphan = 1; write_unlock_irq(&sfp->rq_list_lock); return result; /* -ERESTARTSYS because signal hit process */ At this point the original process is done with the ioctl and blithely goes ahead handling the signal, reissuing the ioctl, etc. - Eventually, the SCSI command issued by the first ioctl finishes and ends up in sg_rq_end_io(). At the end of that function, we run through: write_lock_irqsave(&sfp->rq_list_lock, iflags); if (unlikely(srp->orphan)) { if (sfp->keep_orphan) srp->sg_io_owned = 0; else done = 0; } srp->done = done; write_unlock_irqrestore(&sfp->rq_list_lock, iflags); if (likely(done)) { /* Now wake up any sg_read() that is waiting for this * packet. */ wake_up_interruptible(&sfp->read_wait); kill_fasync(&sfp->async_qp, SIGPOLL, POLL_IN); kref_put(&sfp->f_ref, sg_remove_sfp); } else { INIT_WORK(&srp->ew.work, sg_rq_end_io_usercontext); schedule_work(&srp->ew.work); } Since srp->orphan *is* set, we set done to 0 (assuming the userspace app has not set keep_orphan via an SG_SET_KEEP_ORPHAN ioctl), and therefore we end up scheduling sg_rq_end_io_usercontext() to run in a workqueue. - In workqueue context we go through sg_rq_end_io_usercontext() -> sg_finish_rem_req() -> blk_rq_unmap_user() -> ... -> bio_uncopy_user() -> __bio_copy_iov() -> copy_to_user(). The key point here is that we are doing copy_to_user() on a workqueue -- that is, we're on a kernel thread with current->mm equal to whatever random previous user process was scheduled before this kernel thread. So we end up copying whatever data the SCSI command returned to the virtual address of the buffer passed into the original ioctl, but it's quite likely we do this copying into a different address space! As suggested by James Bottomley , add a check for current->mm (which is NULL if we're on a kernel thread without a real userspace address space) in bio_uncopy_user(), and skip the copy if we're on a kernel thread. There's no reason that I can think of for any caller of bio_uncopy_user() to want to do copying on a kernel thread with a random active userspace address space. Huge thanks to Costa Sapuntzakis for the original pointer to this bug in the sg code. Signed-off-by: Roland Dreier Tested-by: David Milburn Cc: Jens Axboe Signed-off-by: James Bottomley Signed-off-by: Greg Kroah-Hartman --- fs/bio.c | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/fs/bio.c b/fs/bio.c index 94bbc04dba77..c5eae7251490 100644 --- a/fs/bio.c +++ b/fs/bio.c @@ -1045,12 +1045,22 @@ static int __bio_copy_iov(struct bio *bio, struct bio_vec *iovecs, int bio_uncopy_user(struct bio *bio) { struct bio_map_data *bmd = bio->bi_private; - int ret = 0; + struct bio_vec *bvec; + int ret = 0, i; - if (!bio_flagged(bio, BIO_NULL_MAPPED)) - ret = __bio_copy_iov(bio, bmd->iovecs, bmd->sgvecs, - bmd->nr_sgvecs, bio_data_dir(bio) == READ, - 0, bmd->is_our_pages); + if (!bio_flagged(bio, BIO_NULL_MAPPED)) { + /* + * if we're in a workqueue, the request is orphaned, so + * don't copy into a random user address space, just free. + */ + if (current->mm) + ret = __bio_copy_iov(bio, bmd->iovecs, bmd->sgvecs, + bmd->nr_sgvecs, bio_data_dir(bio) == READ, + 0, bmd->is_our_pages); + else if (bmd->is_our_pages) + bio_for_each_segment_all(bvec, bio, i) + __free_page(bvec->bv_page); + } bio_free_map_data(bmd); bio_put(bio); return ret; From bd4b69c1f7f58fc28fb636a3be006770edf58d68 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Thu, 22 Aug 2013 09:13:06 -0700 Subject: [PATCH 0655/1368] Revert "x86 get_unmapped_area(): use proper mmap base for bottom-up direction" commit 5ea80f76a56605a190a7ea16846c82aa63dbd0aa upstream. This reverts commit df54d6fa54275ce59660453e29d1228c2b45a826. The commit isn't necessarily wrong, but because it recalculates the random mmap_base every time, it seems to confuse user memory allocators that expect contiguous mmap allocations even when the mmap address isn't specified. In particular, the MATLAB Java runtime seems to be unhappy. See https://bugzilla.kernel.org/show_bug.cgi?id=60774 So we'll want to apply the random offset only once, and Radu has a patch for that. Revert this older commit in order to apply the other one. Reported-by: Jeff Shorey Cc: Radu Caragea Cc: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/sys_x86_64.c | 2 +- arch/x86/mm/mmap.c | 2 +- include/linux/sched.h | 1 - 3 files changed, 2 insertions(+), 3 deletions(-) diff --git a/arch/x86/kernel/sys_x86_64.c b/arch/x86/kernel/sys_x86_64.c index 48f8375e4c6b..dbded5aedb81 100644 --- a/arch/x86/kernel/sys_x86_64.c +++ b/arch/x86/kernel/sys_x86_64.c @@ -101,7 +101,7 @@ static void find_start_end(unsigned long flags, unsigned long *begin, *begin = new_begin; } } else { - *begin = mmap_legacy_base(); + *begin = TASK_UNMAPPED_BASE; *end = TASK_SIZE; } } diff --git a/arch/x86/mm/mmap.c b/arch/x86/mm/mmap.c index c1af32385ab1..845df6835f9f 100644 --- a/arch/x86/mm/mmap.c +++ b/arch/x86/mm/mmap.c @@ -98,7 +98,7 @@ static unsigned long mmap_base(void) * Bottom-up (legacy) layout on X86_32 did not support randomization, X86_64 * does, but not when emulating X86_32 */ -unsigned long mmap_legacy_base(void) +static unsigned long mmap_legacy_base(void) { if (mmap_is_ia32()) return TASK_UNMAPPED_BASE; diff --git a/include/linux/sched.h b/include/linux/sched.h index 3aeb14b06242..178a8d909f14 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -314,7 +314,6 @@ struct nsproxy; struct user_namespace; #ifdef CONFIG_MMU -extern unsigned long mmap_legacy_base(void); extern void arch_pick_mmap_layout(struct mm_struct *mm); extern unsigned long arch_get_unmapped_area(struct file *, unsigned long, unsigned long, From ff1a668bc01c811ffcf19a3d43af58d7bd658986 Mon Sep 17 00:00:00 2001 From: Radu Caragea Date: Wed, 21 Aug 2013 20:55:59 +0300 Subject: [PATCH 0656/1368] x86 get_unmapped_area: Access mmap_legacy_base through mm_struct member commit 41aacc1eea645c99edbe8fbcf78a97dc9b862adc upstream. This is the updated version of df54d6fa5427 ("x86 get_unmapped_area(): use proper mmap base for bottom-up direction") that only randomizes the mmap base address once. Signed-off-by: Radu Caragea Reported-and-tested-by: Jeff Shorey Cc: Andrew Morton Cc: Michel Lespinasse Cc: Oleg Nesterov Cc: Rik van Riel Cc: Ingo Molnar Cc: Adrian Sendroiu Cc: Kamal Mostafa Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/sys_x86_64.c | 2 +- arch/x86/mm/mmap.c | 6 ++++-- include/linux/mm_types.h | 1 + 3 files changed, 6 insertions(+), 3 deletions(-) diff --git a/arch/x86/kernel/sys_x86_64.c b/arch/x86/kernel/sys_x86_64.c index dbded5aedb81..30277e27431a 100644 --- a/arch/x86/kernel/sys_x86_64.c +++ b/arch/x86/kernel/sys_x86_64.c @@ -101,7 +101,7 @@ static void find_start_end(unsigned long flags, unsigned long *begin, *begin = new_begin; } } else { - *begin = TASK_UNMAPPED_BASE; + *begin = current->mm->mmap_legacy_base; *end = TASK_SIZE; } } diff --git a/arch/x86/mm/mmap.c b/arch/x86/mm/mmap.c index 845df6835f9f..5c1ae28825cd 100644 --- a/arch/x86/mm/mmap.c +++ b/arch/x86/mm/mmap.c @@ -112,12 +112,14 @@ static unsigned long mmap_legacy_base(void) */ void arch_pick_mmap_layout(struct mm_struct *mm) { + mm->mmap_legacy_base = mmap_legacy_base(); + mm->mmap_base = mmap_base(); + if (mmap_is_legacy()) { - mm->mmap_base = mmap_legacy_base(); + mm->mmap_base = mm->mmap_legacy_base; mm->get_unmapped_area = arch_get_unmapped_area; mm->unmap_area = arch_unmap_area; } else { - mm->mmap_base = mmap_base(); mm->get_unmapped_area = arch_get_unmapped_area_topdown; mm->unmap_area = arch_unmap_area_topdown; } diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index ace9a5f01c64..4a189ba6b128 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -333,6 +333,7 @@ struct mm_struct { void (*unmap_area) (struct mm_struct *mm, unsigned long addr); #endif unsigned long mmap_base; /* base of mmap area */ + unsigned long mmap_legacy_base; /* base of mmap area in bottom-up allocations */ unsigned long task_size; /* size of task vm space */ unsigned long cached_hole_size; /* if non-zero, the largest hole below free_area_cache */ unsigned long free_area_cache; /* first hole of size cached_hole_size or larger */ From 061cc2478cdacc2de2e3dd0cdfdd9bfcc5be5d93 Mon Sep 17 00:00:00 2001 From: David Vrabel Date: Fri, 16 Aug 2013 15:42:55 +0100 Subject: [PATCH 0657/1368] x86/xen: do not identity map UNUSABLE regions in the machine E820 commit 3bc38cbceb85881a8eb789ee1aa56678038b1909 upstream. If there are UNUSABLE regions in the machine memory map, dom0 will attempt to map them 1:1 which is not permitted by Xen and the kernel will crash. There isn't anything interesting in the UNUSABLE region that the dom0 kernel needs access to so we can avoid making the 1:1 mapping and treat it as RAM. We only do this for dom0, as that is where tboot case shows up. A PV domU could have an UNUSABLE region in its pseudo-physical map and would need to be handled in another patch. This fixes a boot failure on hosts with tboot. tboot marks a region in the e820 map as unusable and the dom0 kernel would attempt to map this region and Xen does not permit unusable regions to be mapped by guests. (XEN) 0000000000000000 - 0000000000060000 (usable) (XEN) 0000000000060000 - 0000000000068000 (reserved) (XEN) 0000000000068000 - 000000000009e000 (usable) (XEN) 0000000000100000 - 0000000000800000 (usable) (XEN) 0000000000800000 - 0000000000972000 (unusable) tboot marked this region as unusable. (XEN) 0000000000972000 - 00000000cf200000 (usable) (XEN) 00000000cf200000 - 00000000cf38f000 (reserved) (XEN) 00000000cf38f000 - 00000000cf3ce000 (ACPI data) (XEN) 00000000cf3ce000 - 00000000d0000000 (reserved) (XEN) 00000000e0000000 - 00000000f0000000 (reserved) (XEN) 00000000fe000000 - 0000000100000000 (reserved) (XEN) 0000000100000000 - 0000000630000000 (usable) Signed-off-by: David Vrabel [v1: Altered the patch and description with domU's with UNUSABLE regions] Signed-off-by: Konrad Rzeszutek Wilk Signed-off-by: Greg Kroah-Hartman --- arch/x86/xen/setup.c | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c index 94eac5c85cdc..0a9fb7a0b452 100644 --- a/arch/x86/xen/setup.c +++ b/arch/x86/xen/setup.c @@ -313,6 +313,17 @@ static void xen_align_and_add_e820_region(u64 start, u64 size, int type) e820_add_region(start, end - start, type); } +void xen_ignore_unusable(struct e820entry *list, size_t map_size) +{ + struct e820entry *entry; + unsigned int i; + + for (i = 0, entry = list; i < map_size; i++, entry++) { + if (entry->type == E820_UNUSABLE) + entry->type = E820_RAM; + } +} + /** * machine_specific_memory_setup - Hook for machine specific memory setup. **/ @@ -353,6 +364,17 @@ char * __init xen_memory_setup(void) } BUG_ON(rc); + /* + * Xen won't allow a 1:1 mapping to be created to UNUSABLE + * regions, so if we're using the machine memory map leave the + * region as RAM as it is in the pseudo-physical map. + * + * UNUSABLE regions in domUs are not handled and will need + * a patch in the future. + */ + if (xen_initial_domain()) + xen_ignore_unusable(map, memmap.nr_entries); + /* Make sure the Xen-supplied memory map is well-ordered. */ sanitize_e820_map(map, memmap.nr_entries, &memmap.nr_entries); From 7dae89cb15ebdc7e286b884cd79f3a4b94fcdff4 Mon Sep 17 00:00:00 2001 From: Tomas Winkler Date: Wed, 17 Jul 2013 15:13:15 +0300 Subject: [PATCH 0658/1368] mei: me: fix reset state machine commit 315a383ad7dbd484fafb93ef08038e3dbafbb7a8 upstream. ME HW ready bit is down after hw reset was asserted or on error. Only on error we need to enter the reset flow, additional reset need to be prevented when reset was triggered during initialization , power up/down or a reset is already in progress Tested-by: Shuah Khan Signed-off-by: Tomas Winkler Signed-off-by: Greg Kroah-Hartman --- drivers/misc/mei/hw-me.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/misc/mei/hw-me.c b/drivers/misc/mei/hw-me.c index 822170f00348..03108591c139 100644 --- a/drivers/misc/mei/hw-me.c +++ b/drivers/misc/mei/hw-me.c @@ -482,7 +482,9 @@ irqreturn_t mei_me_irq_thread_handler(int irq, void *dev_id) /* check if ME wants a reset */ if (!mei_hw_is_ready(dev) && dev->dev_state != MEI_DEV_RESETTING && - dev->dev_state != MEI_DEV_INITIALIZING) { + dev->dev_state != MEI_DEV_INITIALIZING && + dev->dev_state != MEI_DEV_POWER_DOWN && + dev->dev_state != MEI_DEV_POWER_UP) { dev_dbg(&dev->pdev->dev, "FW not ready.\n"); mei_reset(dev, 1); mutex_unlock(&dev->device_lock); From 47e1cf336bc708bb59ebfcc01540dffecd16862e Mon Sep 17 00:00:00 2001 From: Tomas Winkler Date: Wed, 17 Jul 2013 15:13:16 +0300 Subject: [PATCH 0659/1368] mei: don't have to clean the state on power up commit 99f22c4ef24cf87b0dae6aabe6b5e620b62961d9 upstream. When powering up, we don't have to clean up the device state nothing is connected. Tested-by: Shuah Khan Signed-off-by: Tomas Winkler Signed-off-by: Greg Kroah-Hartman --- drivers/misc/mei/init.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/misc/mei/init.c b/drivers/misc/mei/init.c index f580d30bb784..6eec689ba97e 100644 --- a/drivers/misc/mei/init.c +++ b/drivers/misc/mei/init.c @@ -143,7 +143,8 @@ void mei_reset(struct mei_device *dev, int interrupts_enabled) dev->hbm_state = MEI_HBM_IDLE; - if (dev->dev_state != MEI_DEV_INITIALIZING) { + if (dev->dev_state != MEI_DEV_INITIALIZING && + dev->dev_state != MEI_DEV_POWER_UP) { if (dev->dev_state != MEI_DEV_DISABLED && dev->dev_state != MEI_DEV_POWER_DOWN) dev->dev_state = MEI_DEV_RESETTING; From 644f5d570740c90439fdf69e8b47647584c6dc2f Mon Sep 17 00:00:00 2001 From: Tomas Winkler Date: Wed, 17 Jul 2013 15:13:17 +0300 Subject: [PATCH 0660/1368] mei: me: fix waiting for hw ready commit dab9bf41b23fe700c4a74133e41eb6a21706031e upstream. 1. MEI_INTEROP_TIMEOUT is in seconds not in jiffies so we use mei_secs_to_jiffies macro While cold boot is fast this is relevant in resume 2. wait_event_interruptible_timeout can return with -ERESTARTSYS so do not override it with -ETIMEDOUT 3.Adjust error message Tested-by: Shuah Khan Signed-off-by: Tomas Winkler Signed-off-by: Greg Kroah-Hartman --- drivers/misc/mei/hw-me.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/drivers/misc/mei/hw-me.c b/drivers/misc/mei/hw-me.c index 03108591c139..700fe55095ba 100644 --- a/drivers/misc/mei/hw-me.c +++ b/drivers/misc/mei/hw-me.c @@ -238,14 +238,18 @@ static int mei_me_hw_ready_wait(struct mei_device *dev) if (mei_me_hw_is_ready(dev)) return 0; + dev->recvd_hw_ready = false; mutex_unlock(&dev->device_lock); err = wait_event_interruptible_timeout(dev->wait_hw_ready, - dev->recvd_hw_ready, MEI_INTEROP_TIMEOUT); + dev->recvd_hw_ready, + mei_secs_to_jiffies(MEI_INTEROP_TIMEOUT)); mutex_lock(&dev->device_lock); if (!err && !dev->recvd_hw_ready) { + if (!err) + err = -ETIMEDOUT; dev_err(&dev->pdev->dev, - "wait hw ready failed. status = 0x%x\n", err); - return -ETIMEDOUT; + "wait hw ready failed. status = %d\n", err); + return err; } dev->recvd_hw_ready = false; From ba5c60fc8f5f574c7cec70740ca19d358b780c57 Mon Sep 17 00:00:00 2001 From: Kumar Amit Mehta Date: Tue, 28 May 2013 00:31:15 -0700 Subject: [PATCH 0661/1368] md: bcache: io.c: fix a potential NULL pointer dereference commit 5c694129c8db6d89c9be109049a16510b2f70f6d upstream. bio_alloc_bioset returns NULL on failure. This fix adds a missing check for potential NULL pointer dereferencing. Signed-off-by: Kumar Amit Mehta Signed-off-by: Kent Overstreet Signed-off-by: Greg Kroah-Hartman --- drivers/md/bcache/io.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/md/bcache/io.c b/drivers/md/bcache/io.c index 48efd4dea645..d285cd49104c 100644 --- a/drivers/md/bcache/io.c +++ b/drivers/md/bcache/io.c @@ -97,6 +97,8 @@ struct bio *bch_bio_split(struct bio *bio, int sectors, if (bio->bi_rw & REQ_DISCARD) { ret = bio_alloc_bioset(gfp, 1, bs); + if (!ret) + return NULL; idx = 0; goto out; } From ae61fd4496f9d9290b9e84fc373818b2ee780137 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Wed, 26 Jun 2013 17:25:38 -0700 Subject: [PATCH 0662/1368] bcache: FUA fixes commit e49c7c374e7aacd1f04ecbc21d9dbbeeea4a77d6 upstream. Journal writes need to be marked FUA, not just REQ_FLUSH. And btree node writes have... weird ordering requirements. Signed-off-by: Kent Overstreet Signed-off-by: Greg Kroah-Hartman --- drivers/md/bcache/btree.c | 23 +++++++++++++++++++++-- drivers/md/bcache/journal.c | 2 +- drivers/md/bcache/request.c | 13 ++++++++++++- 3 files changed, 34 insertions(+), 4 deletions(-) diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c index 7b687a6f3dec..833c590806ba 100644 --- a/drivers/md/bcache/btree.c +++ b/drivers/md/bcache/btree.c @@ -326,10 +326,25 @@ static void do_btree_write(struct btree *b) i->csum = btree_csum_set(b, i); btree_bio_init(b); - b->bio->bi_rw = REQ_META|WRITE_SYNC; + b->bio->bi_rw = REQ_META|WRITE_SYNC|REQ_FUA; b->bio->bi_size = set_blocks(i, b->c) * block_bytes(b->c); bch_bio_map(b->bio, i); + /* + * If we're appending to a leaf node, we don't technically need FUA - + * this write just needs to be persisted before the next journal write, + * which will be marked FLUSH|FUA. + * + * Similarly if we're writing a new btree root - the pointer is going to + * be in the next journal entry. + * + * But if we're writing a new btree node (that isn't a root) or + * appending to a non leaf btree node, we need either FUA or a flush + * when we write the parent with the new pointer. FUA is cheaper than a + * flush, and writes appending to leaf nodes aren't blocking anything so + * just make all btree node writes FUA to keep things sane. + */ + bkey_copy(&k.key, &b->key); SET_PTR_OFFSET(&k.key, 0, PTR_OFFSET(&k.key, 0) + bset_offset(b, i)); @@ -2142,6 +2157,9 @@ int bch_btree_insert(struct btree_op *op, struct cache_set *c) void bch_btree_set_root(struct btree *b) { unsigned i; + struct closure cl; + + closure_init_stack(&cl); BUG_ON(!b->written); @@ -2155,8 +2173,9 @@ void bch_btree_set_root(struct btree *b) b->c->root = b; __bkey_put(b->c, &b->key); - bch_journal_meta(b->c, NULL); + bch_journal_meta(b->c, &cl); pr_debug("%s for %pf", pbtree(b), __builtin_return_address(0)); + closure_sync(&cl); } /* Cache lookup */ diff --git a/drivers/md/bcache/journal.c b/drivers/md/bcache/journal.c index 8a54d3b4f517..b49abb246bb6 100644 --- a/drivers/md/bcache/journal.c +++ b/drivers/md/bcache/journal.c @@ -622,7 +622,7 @@ static void journal_write_unlocked(struct closure *cl) bio_reset(bio); bio->bi_sector = PTR_OFFSET(k, i); bio->bi_bdev = ca->bdev; - bio->bi_rw = REQ_WRITE|REQ_SYNC|REQ_META|REQ_FLUSH; + bio->bi_rw = REQ_WRITE|REQ_SYNC|REQ_META|REQ_FLUSH|REQ_FUA; bio->bi_size = sectors << 9; bio->bi_end_io = journal_write_endio; diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c index 2f36743ce708..afb9a998a737 100644 --- a/drivers/md/bcache/request.c +++ b/drivers/md/bcache/request.c @@ -1053,9 +1053,20 @@ static void request_write(struct cached_dev *dc, struct search *s) trace_bcache_writethrough(s->orig_bio); closure_bio_submit(bio, cl, s->d); } else { - s->op.cache_bio = bio; trace_bcache_writeback(s->orig_bio); bch_writeback_add(dc, bio_sectors(bio)); + + if (s->op.flush_journal) { + /* Also need to send a flush to the backing device */ + s->op.cache_bio = bio_clone_bioset(bio, GFP_NOIO, + dc->disk.bio_split); + + bio->bi_size = 0; + bio->bi_vcnt = 0; + closure_bio_submit(bio, cl, s->d); + } else { + s->op.cache_bio = bio; + } } out: closure_call(&s->op.cl, bch_insert_data, NULL, cl); From 8bf3379a74bc9132751bfa685bad2da318fd59d7 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Thu, 29 Aug 2013 09:47:51 -0700 Subject: [PATCH 0663/1368] Linux 3.10.10 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 4b31d6238f7d..b119684ddb2d 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,6 @@ VERSION = 3 PATCHLEVEL = 10 -SUBLEVEL = 9 +SUBLEVEL = 10 EXTRAVERSION = NAME = TOSSUG Baby Fish From 3508b9b01638a5e73fcf913213e4692680a86507 Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Wed, 21 Aug 2013 10:13:30 +1000 Subject: [PATCH 0664/1368] drm/nouveau/mc: fix race condition between constructor and request_irq() commit 6ff8c76a566f823d796359a6c1d76b7668f1e34d upstream. Signed-off-by: Ben Skeggs Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/nouveau/core/include/subdev/mc.h | 7 ++++--- drivers/gpu/drm/nouveau/core/subdev/mc/base.c | 6 +++++- drivers/gpu/drm/nouveau/core/subdev/mc/nv04.c | 3 +-- drivers/gpu/drm/nouveau/core/subdev/mc/nv44.c | 3 +-- drivers/gpu/drm/nouveau/core/subdev/mc/nv50.c | 3 +-- drivers/gpu/drm/nouveau/core/subdev/mc/nv98.c | 3 +-- drivers/gpu/drm/nouveau/core/subdev/mc/nvc0.c | 3 +-- 7 files changed, 14 insertions(+), 14 deletions(-) diff --git a/drivers/gpu/drm/nouveau/core/include/subdev/mc.h b/drivers/gpu/drm/nouveau/core/include/subdev/mc.h index d5502267c30f..9d2cd2006250 100644 --- a/drivers/gpu/drm/nouveau/core/include/subdev/mc.h +++ b/drivers/gpu/drm/nouveau/core/include/subdev/mc.h @@ -20,8 +20,8 @@ nouveau_mc(void *obj) return (void *)nv_device(obj)->subdev[NVDEV_SUBDEV_MC]; } -#define nouveau_mc_create(p,e,o,d) \ - nouveau_mc_create_((p), (e), (o), sizeof(**d), (void **)d) +#define nouveau_mc_create(p,e,o,m,d) \ + nouveau_mc_create_((p), (e), (o), (m), sizeof(**d), (void **)d) #define nouveau_mc_destroy(p) ({ \ struct nouveau_mc *pmc = (p); _nouveau_mc_dtor(nv_object(pmc)); \ }) @@ -33,7 +33,8 @@ nouveau_mc(void *obj) }) int nouveau_mc_create_(struct nouveau_object *, struct nouveau_object *, - struct nouveau_oclass *, int, void **); + struct nouveau_oclass *, const struct nouveau_mc_intr *, + int, void **); void _nouveau_mc_dtor(struct nouveau_object *); int _nouveau_mc_init(struct nouveau_object *); int _nouveau_mc_fini(struct nouveau_object *, bool); diff --git a/drivers/gpu/drm/nouveau/core/subdev/mc/base.c b/drivers/gpu/drm/nouveau/core/subdev/mc/base.c index 1c0330b8c9a4..ec9cd6f10f91 100644 --- a/drivers/gpu/drm/nouveau/core/subdev/mc/base.c +++ b/drivers/gpu/drm/nouveau/core/subdev/mc/base.c @@ -80,7 +80,9 @@ _nouveau_mc_dtor(struct nouveau_object *object) int nouveau_mc_create_(struct nouveau_object *parent, struct nouveau_object *engine, - struct nouveau_oclass *oclass, int length, void **pobject) + struct nouveau_oclass *oclass, + const struct nouveau_mc_intr *intr_map, + int length, void **pobject) { struct nouveau_device *device = nv_device(parent); struct nouveau_mc *pmc; @@ -92,6 +94,8 @@ nouveau_mc_create_(struct nouveau_object *parent, struct nouveau_object *engine, if (ret) return ret; + pmc->intr_map = intr_map; + ret = request_irq(device->pdev->irq, nouveau_mc_intr, IRQF_SHARED, "nouveau", pmc); if (ret < 0) diff --git a/drivers/gpu/drm/nouveau/core/subdev/mc/nv04.c b/drivers/gpu/drm/nouveau/core/subdev/mc/nv04.c index 8c769715227b..64aa4edb0d9d 100644 --- a/drivers/gpu/drm/nouveau/core/subdev/mc/nv04.c +++ b/drivers/gpu/drm/nouveau/core/subdev/mc/nv04.c @@ -50,12 +50,11 @@ nv04_mc_ctor(struct nouveau_object *parent, struct nouveau_object *engine, struct nv04_mc_priv *priv; int ret; - ret = nouveau_mc_create(parent, engine, oclass, &priv); + ret = nouveau_mc_create(parent, engine, oclass, nv04_mc_intr, &priv); *pobject = nv_object(priv); if (ret) return ret; - priv->base.intr_map = nv04_mc_intr; return 0; } diff --git a/drivers/gpu/drm/nouveau/core/subdev/mc/nv44.c b/drivers/gpu/drm/nouveau/core/subdev/mc/nv44.c index 51919371810f..d9891782bf28 100644 --- a/drivers/gpu/drm/nouveau/core/subdev/mc/nv44.c +++ b/drivers/gpu/drm/nouveau/core/subdev/mc/nv44.c @@ -36,12 +36,11 @@ nv44_mc_ctor(struct nouveau_object *parent, struct nouveau_object *engine, struct nv44_mc_priv *priv; int ret; - ret = nouveau_mc_create(parent, engine, oclass, &priv); + ret = nouveau_mc_create(parent, engine, oclass, nv04_mc_intr, &priv); *pobject = nv_object(priv); if (ret) return ret; - priv->base.intr_map = nv04_mc_intr; return 0; } diff --git a/drivers/gpu/drm/nouveau/core/subdev/mc/nv50.c b/drivers/gpu/drm/nouveau/core/subdev/mc/nv50.c index d796924f9930..732d8100344b 100644 --- a/drivers/gpu/drm/nouveau/core/subdev/mc/nv50.c +++ b/drivers/gpu/drm/nouveau/core/subdev/mc/nv50.c @@ -52,12 +52,11 @@ nv50_mc_ctor(struct nouveau_object *parent, struct nouveau_object *engine, struct nv50_mc_priv *priv; int ret; - ret = nouveau_mc_create(parent, engine, oclass, &priv); + ret = nouveau_mc_create(parent, engine, oclass, nv50_mc_intr, &priv); *pobject = nv_object(priv); if (ret) return ret; - priv->base.intr_map = nv50_mc_intr; return 0; } diff --git a/drivers/gpu/drm/nouveau/core/subdev/mc/nv98.c b/drivers/gpu/drm/nouveau/core/subdev/mc/nv98.c index e82fd21b5041..0d57b4d3e001 100644 --- a/drivers/gpu/drm/nouveau/core/subdev/mc/nv98.c +++ b/drivers/gpu/drm/nouveau/core/subdev/mc/nv98.c @@ -54,12 +54,11 @@ nv98_mc_ctor(struct nouveau_object *parent, struct nouveau_object *engine, struct nv98_mc_priv *priv; int ret; - ret = nouveau_mc_create(parent, engine, oclass, &priv); + ret = nouveau_mc_create(parent, engine, oclass, nv98_mc_intr, &priv); *pobject = nv_object(priv); if (ret) return ret; - priv->base.intr_map = nv98_mc_intr; return 0; } diff --git a/drivers/gpu/drm/nouveau/core/subdev/mc/nvc0.c b/drivers/gpu/drm/nouveau/core/subdev/mc/nvc0.c index 737bd4b682e1..4c97cd2e7b56 100644 --- a/drivers/gpu/drm/nouveau/core/subdev/mc/nvc0.c +++ b/drivers/gpu/drm/nouveau/core/subdev/mc/nvc0.c @@ -56,12 +56,11 @@ nvc0_mc_ctor(struct nouveau_object *parent, struct nouveau_object *engine, struct nvc0_mc_priv *priv; int ret; - ret = nouveau_mc_create(parent, engine, oclass, &priv); + ret = nouveau_mc_create(parent, engine, oclass, nvc0_mc_intr, &priv); *pobject = nv_object(priv); if (ret) return ret; - priv->base.intr_map = nvc0_mc_intr; return 0; } From 192caed2bfb7072b7f364dc909b43fd7604b991d Mon Sep 17 00:00:00 2001 From: Dave Kleikamp Date: Thu, 15 Aug 2013 15:36:49 -0500 Subject: [PATCH 0665/1368] jfs: fix readdir cookie incompatibility with NFSv4 commit 44512449c0ab368889dd13ae0031fba74ee7e1d2 upstream. NFSv4 reserves readdir cookie values 0-2 for special entries (. and ..), but jfs allows a value of 2 for a non-special entry. This incompatibility can result in the nfs client reporting a readdir loop. This patch doesn't change the value stored internally, but adds one to the value exposed to the iterate method. Signed-off-by: Dave Kleikamp [bwh: Backported to 3.2: - Adjust context - s/ctx->pos/filp->f_pos/] Tested-by: Christian Kujau Signed-off-by: Ben Hutchings Signed-off-by: Greg Kroah-Hartman --- fs/jfs/jfs_dtree.c | 31 +++++++++++++++++++++++-------- 1 file changed, 23 insertions(+), 8 deletions(-) diff --git a/fs/jfs/jfs_dtree.c b/fs/jfs/jfs_dtree.c index 0ddbeceafc62..c450fdb3d78d 100644 --- a/fs/jfs/jfs_dtree.c +++ b/fs/jfs/jfs_dtree.c @@ -3047,6 +3047,14 @@ int jfs_readdir(struct file *filp, void *dirent, filldir_t filldir) dir_index = (u32) filp->f_pos; + /* + * NFSv4 reserves cookies 1 and 2 for . and .. so we add + * the value we return to the vfs is one greater than the + * one we use internally. + */ + if (dir_index) + dir_index--; + if (dir_index > 1) { struct dir_table_slot dirtab_slot; @@ -3086,7 +3094,7 @@ int jfs_readdir(struct file *filp, void *dirent, filldir_t filldir) if (p->header.flag & BT_INTERNAL) { jfs_err("jfs_readdir: bad index table"); DT_PUTPAGE(mp); - filp->f_pos = -1; + filp->f_pos = DIREND; return 0; } } else { @@ -3094,7 +3102,7 @@ int jfs_readdir(struct file *filp, void *dirent, filldir_t filldir) /* * self "." */ - filp->f_pos = 0; + filp->f_pos = 1; if (filldir(dirent, ".", 1, 0, ip->i_ino, DT_DIR)) return 0; @@ -3102,7 +3110,7 @@ int jfs_readdir(struct file *filp, void *dirent, filldir_t filldir) /* * parent ".." */ - filp->f_pos = 1; + filp->f_pos = 2; if (filldir(dirent, "..", 2, 1, PARENT(ip), DT_DIR)) return 0; @@ -3123,24 +3131,25 @@ int jfs_readdir(struct file *filp, void *dirent, filldir_t filldir) /* * Legacy filesystem - OS/2 & Linux JFS < 0.3.6 * - * pn = index = 0: First entry "." - * pn = 0; index = 1: Second entry ".." + * pn = 0; index = 1: First entry "." + * pn = 0; index = 2: Second entry ".." * pn > 0: Real entries, pn=1 -> leftmost page * pn = index = -1: No more entries */ dtpos = filp->f_pos; - if (dtpos == 0) { + if (dtpos < 2) { /* build "." entry */ + filp->f_pos = 1; if (filldir(dirent, ".", 1, filp->f_pos, ip->i_ino, DT_DIR)) return 0; - dtoffset->index = 1; + dtoffset->index = 2; filp->f_pos = dtpos; } if (dtoffset->pn == 0) { - if (dtoffset->index == 1) { + if (dtoffset->index == 2) { /* build ".." entry */ if (filldir(dirent, "..", 2, filp->f_pos, @@ -3233,6 +3242,12 @@ int jfs_readdir(struct file *filp, void *dirent, filldir_t filldir) } jfs_dirent->position = unique_pos++; } + /* + * We add 1 to the index because we may + * use a value of 2 internally, and NFSv4 + * doesn't like that. + */ + jfs_dirent->position++; } else { jfs_dirent->position = dtpos; len = min(d_namleft, DTLHDRDATALEN_LEGACY); From da958137df4bcd9b2b5ea83db75f66a46c3ef46f Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Thu, 22 Aug 2013 09:55:36 +0200 Subject: [PATCH 0666/1368] ALSA: hda - Fix NULL dereference with CONFIG_SND_DYNAMIC_MINORS=n commit 2ca320e294a738c9134a71b5029de05edbfc7aad upstream. Without the dynamic minor assignment, HDMI codec may have less PCM instances than the number of pins, which eventually leads to Oops. Reported-by: Stratos Karafotis Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_hdmi.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/sound/pci/hda/patch_hdmi.c b/sound/pci/hda/patch_hdmi.c index 496d7f21d3e5..5bc419452198 100644 --- a/sound/pci/hda/patch_hdmi.c +++ b/sound/pci/hda/patch_hdmi.c @@ -1715,6 +1715,9 @@ static int generic_hdmi_build_controls(struct hda_codec *codec) struct snd_pcm_chmap *chmap; struct snd_kcontrol *kctl; int i; + + if (!codec->pcm_info[pin_idx].pcm) + break; err = snd_pcm_add_chmap_ctls(codec->pcm_info[pin_idx].pcm, SNDRV_PCM_STREAM_PLAYBACK, NULL, 0, pin_idx, &chmap); From 35133cc34699a77d66a26f6a115b275a38868127 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Mon, 19 Aug 2013 20:05:50 +0200 Subject: [PATCH 0667/1368] ALSA: hda - Add inverted digital mic fixup for Acer Aspire One commit d3d3835ce919438c00c5d1270d6f9d6ffea59d03 upstream. Yet another entry, just use the existing fixup for this machine, too. Reported-by: "Nathanael D. Noblet" Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_realtek.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 57f9f2a12e85..458cf89b1643 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -4207,6 +4207,7 @@ static const struct hda_fixup alc662_fixups[] = { static const struct snd_pci_quirk alc662_fixup_tbl[] = { SND_PCI_QUIRK(0x1019, 0x9087, "ECS", ALC662_FIXUP_ASUS_MODE2), + SND_PCI_QUIRK(0x1025, 0x022f, "Acer Aspire One", ALC662_FIXUP_INV_DMIC), SND_PCI_QUIRK(0x1025, 0x0308, "Acer Aspire 8942G", ALC662_FIXUP_ASPIRE), SND_PCI_QUIRK(0x1025, 0x031c, "Gateway NV79", ALC662_FIXUP_SKU_IGNORE), SND_PCI_QUIRK(0x1025, 0x0349, "eMachines eM250", ALC662_FIXUP_INV_DMIC), From c990ab45cb4fda0767b196f4303372bdad4f9f7a Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 27 Aug 2013 12:03:01 +0200 Subject: [PATCH 0668/1368] ALSA: opti9xx: Fix conflicting driver object name commit fb615499f0ad28ed74201c1cdfddf9e64e205424 upstream. The recent commit to delay the release of kobject triggered NULL dereferences of opti9xx drivers. The cause is that all snd-opti92x-ad1848, snd-opti92x-cs4231 and snd-opti93x drivers register the PnP card driver with the very same name, and also snd-opti92x-ad1848 and -cs4231 drivers register the ISA driver with the same name, too. When these drivers are built in, quick "register-release-and-re-register" actions occur, and this results in Oops because of the same name is assigned to the kobject. The fix is simply to assign individual names. As a bonus, by using KBUILD_MODNAME, the patch reduces more lines than it adds. The fix is based on the suggestion by Russell King. Reported-and-tested-by: Fengguang Wu Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/isa/opti9xx/opti92x-ad1848.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/sound/isa/opti9xx/opti92x-ad1848.c b/sound/isa/opti9xx/opti92x-ad1848.c index b41ed8661b23..e427dbf76368 100644 --- a/sound/isa/opti9xx/opti92x-ad1848.c +++ b/sound/isa/opti9xx/opti92x-ad1848.c @@ -173,11 +173,7 @@ MODULE_DEVICE_TABLE(pnp_card, snd_opti9xx_pnpids); #endif /* CONFIG_PNP */ -#ifdef OPTi93X -#define DEV_NAME "opti93x" -#else -#define DEV_NAME "opti92x" -#endif +#define DEV_NAME KBUILD_MODNAME static char * snd_opti9xx_names[] = { "unknown", @@ -1168,7 +1164,7 @@ static int snd_opti9xx_pnp_resume(struct pnp_card_link *pcard) static struct pnp_card_driver opti9xx_pnpc_driver = { .flags = PNP_DRIVER_RES_DISABLE, - .name = "opti9xx", + .name = DEV_NAME, .id_table = snd_opti9xx_pnpids, .probe = snd_opti9xx_pnp_probe, .remove = snd_opti9xx_pnp_remove, From 45fe50ea3dadb11c5094d71a4f3e3ccc6f41f1c5 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Tue, 27 Aug 2013 16:07:49 +1000 Subject: [PATCH 0669/1368] powerpc: Work around gcc miscompilation of __pa() on 64-bit commit bdbc29c19b2633b1d9c52638fb732bcde7a2031a upstream. On 64-bit, __pa(&static_var) gets miscompiled by recent versions of gcc as something like: addis 3,2,.LANCHOR1+4611686018427387904@toc@ha addi 3,3,.LANCHOR1+4611686018427387904@toc@l This ends up effectively ignoring the offset, since its bottom 32 bits are zero, and means that the result of __pa() still has 0xC in the top nibble. This happens with gcc 4.8.1, at least. To work around this, for 64-bit we make __pa() use an AND operator, and for symmetry, we make __va() use an OR operator. Using an AND operator rather than a subtraction ends up with slightly shorter code since it can be done with a single clrldi instruction, whereas it takes three instructions to form the constant (-PAGE_OFFSET) and add it on. (Note that MEMORY_START is always 0 on 64-bit.) Signed-off-by: Paul Mackerras Signed-off-by: Benjamin Herrenschmidt Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/Kconfig | 1 + arch/powerpc/include/asm/page.h | 10 ++++++++++ 2 files changed, 11 insertions(+) diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 74991fee2e62..fe404e77246e 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -986,6 +986,7 @@ config RELOCATABLE must live at a different physical address than the primary kernel. +# This value must have zeroes in the bottom 60 bits otherwise lots will break config PAGE_OFFSET hex default "0xc000000000000000" diff --git a/arch/powerpc/include/asm/page.h b/arch/powerpc/include/asm/page.h index 988c812aab5b..b9f426212d3a 100644 --- a/arch/powerpc/include/asm/page.h +++ b/arch/powerpc/include/asm/page.h @@ -211,9 +211,19 @@ extern long long virt_phys_offset; #define __va(x) ((void *)(unsigned long)((phys_addr_t)(x) + VIRT_PHYS_OFFSET)) #define __pa(x) ((unsigned long)(x) - VIRT_PHYS_OFFSET) #else +#ifdef CONFIG_PPC64 +/* + * gcc miscompiles (unsigned long)(&static_var) - PAGE_OFFSET + * with -mcmodel=medium, so we use & and | instead of - and + on 64-bit. + */ +#define __va(x) ((void *)(unsigned long)((phys_addr_t)(x) | PAGE_OFFSET)) +#define __pa(x) ((unsigned long)(x) & 0x0fffffffffffffffUL) + +#else /* 32-bit, non book E */ #define __va(x) ((void *)(unsigned long)((phys_addr_t)(x) + PAGE_OFFSET - MEMORY_START)) #define __pa(x) ((unsigned long)(x) - PAGE_OFFSET + MEMORY_START) #endif +#endif /* * Unfortunately the PLT is in the BSS in the PPC32 ELF ABI, From 501a297b2351addf61311f9ed49dedb10a4fee9e Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Tue, 27 Aug 2013 16:38:33 +1000 Subject: [PATCH 0670/1368] powerpc: Don't Oops when accessing /proc/powerpc/lparcfg without hypervisor commit f5f6cbb61610b7bf9d9d96db9c3979d62a424bab upstream. /proc/powerpc/lparcfg is an ancient facility (though still actively used) which allows access to some informations relative to the partition when running underneath a PAPR compliant hypervisor. It makes no sense on non-pseries machines. However, currently, not only can it be created on these if the kernel has pseries support, but accessing it on such a machine will crash due to trying to do hypervisor calls. In fact, it should also not do HV calls on older pseries that didn't have an hypervisor either. Finally, it has the plumbing to be a module but is a "bool" Kconfig option. This fixes the whole lot by turning it into a machine_device_initcall that is only created on pseries, and adding the necessary hypervisor check before calling the H_GET_EM_PARMS hypercall Signed-off-by: Benjamin Herrenschmidt Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/kernel/lparcfg.c | 22 +++++++++------------- 1 file changed, 9 insertions(+), 13 deletions(-) diff --git a/arch/powerpc/kernel/lparcfg.c b/arch/powerpc/kernel/lparcfg.c index d92f3871e9cf..e2a0a162299b 100644 --- a/arch/powerpc/kernel/lparcfg.c +++ b/arch/powerpc/kernel/lparcfg.c @@ -35,7 +35,13 @@ #include #include #include +#include + +/* + * This isn't a module but we expose that to userspace + * via /proc so leave the definitions here + */ #define MODULE_VERS "1.9" #define MODULE_NAME "lparcfg" @@ -418,7 +424,8 @@ static void parse_em_data(struct seq_file *m) { unsigned long retbuf[PLPAR_HCALL_BUFSIZE]; - if (plpar_hcall(H_GET_EM_PARMS, retbuf) == H_SUCCESS) + if (firmware_has_feature(FW_FEATURE_LPAR) && + plpar_hcall(H_GET_EM_PARMS, retbuf) == H_SUCCESS) seq_printf(m, "power_mode_data=%016lx\n", retbuf[0]); } @@ -677,7 +684,6 @@ static int lparcfg_open(struct inode *inode, struct file *file) } static const struct file_operations lparcfg_fops = { - .owner = THIS_MODULE, .read = seq_read, .write = lparcfg_write, .open = lparcfg_open, @@ -699,14 +705,4 @@ static int __init lparcfg_init(void) } return 0; } - -static void __exit lparcfg_cleanup(void) -{ - remove_proc_subtree("powerpc/lparcfg", NULL); -} - -module_init(lparcfg_init); -module_exit(lparcfg_cleanup); -MODULE_DESCRIPTION("Interface for LPAR configuration data"); -MODULE_AUTHOR("Dave Engebretsen"); -MODULE_LICENSE("GPL"); +machine_device_initcall(pseries, lparcfg_init); From 6f3615326d368cbcc97c98d78fbc1036567520d1 Mon Sep 17 00:00:00 2001 From: Eugene Surovegin Date: Mon, 26 Aug 2013 11:53:32 -0700 Subject: [PATCH 0671/1368] powerpc/hvsi: Increase handshake timeout from 200ms to 400ms. commit d220980b701d838560a70de691b53be007e99e78 upstream. This solves a problem observed in kexec'ed kernel where 200ms timeout is too short and bootconsole fails to initialize. Console did eventually become workable but much later into the boot process. Observed timeout was around 260ms, but I decided to make it a little bigger for more reliability. This has been tested on Power7 machine with Petitboot as a primary bootloader and PowerNV firmware. Signed-off-by: Eugene Surovegin Signed-off-by: Benjamin Herrenschmidt Signed-off-by: Greg Kroah-Hartman --- drivers/tty/hvc/hvsi_lib.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/tty/hvc/hvsi_lib.c b/drivers/tty/hvc/hvsi_lib.c index 3396eb9d57a3..ac2767100df5 100644 --- a/drivers/tty/hvc/hvsi_lib.c +++ b/drivers/tty/hvc/hvsi_lib.c @@ -341,8 +341,8 @@ void hvsilib_establish(struct hvsi_priv *pv) pr_devel("HVSI@%x: ... waiting handshake\n", pv->termno); - /* Try for up to 200s */ - for (timeout = 0; timeout < 20; timeout++) { + /* Try for up to 400ms */ + for (timeout = 0; timeout < 40; timeout++) { if (pv->established) goto established; if (!hvsi_get_packet(pv)) From ff2b0a7db98e7ffdfafb667e60c25f07e9959737 Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Fri, 26 Jul 2013 18:43:45 +0200 Subject: [PATCH 0672/1368] SCSI: pm80xx: fix Adaptec 71605H hang commit 9504a923924d663e1953f872f0a828e6454a6cfc upstream. The IO command size is 128 bytes for these new controllers as opposed to 64 for the old 8001 controller. The Adaptec out-of-tree driver did this correctly. After comparing the two this turned out to be the crucial difference. So don't hardcode the IO command size, instead use pm8001_ha->iomb_size as that is the correct value for both old and new controllers. Signed-off-by: Hans Verkuil Acked-by: Anand Kumar Santhanam Acked-by: Jack Wang Signed-off-by: James Bottomley Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/pm8001/pm8001_hwi.c | 4 ++-- drivers/scsi/pm8001/pm80xx_hwi.c | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/scsi/pm8001/pm8001_hwi.c b/drivers/scsi/pm8001/pm8001_hwi.c index 69dd49c05f1e..ce3f129d39bf 100644 --- a/drivers/scsi/pm8001/pm8001_hwi.c +++ b/drivers/scsi/pm8001/pm8001_hwi.c @@ -221,7 +221,7 @@ static void init_default_table_values(struct pm8001_hba_info *pm8001_ha) pm8001_ha->main_cfg_tbl.pm8001_tbl.fatal_err_interrupt = 0x01; for (i = 0; i < PM8001_MAX_INB_NUM; i++) { pm8001_ha->inbnd_q_tbl[i].element_pri_size_cnt = - PM8001_MPI_QUEUE | (64 << 16) | (0x00<<30); + PM8001_MPI_QUEUE | (pm8001_ha->iomb_size << 16) | (0x00<<30); pm8001_ha->inbnd_q_tbl[i].upper_base_addr = pm8001_ha->memoryMap.region[IB + i].phys_addr_hi; pm8001_ha->inbnd_q_tbl[i].lower_base_addr = @@ -247,7 +247,7 @@ static void init_default_table_values(struct pm8001_hba_info *pm8001_ha) } for (i = 0; i < PM8001_MAX_OUTB_NUM; i++) { pm8001_ha->outbnd_q_tbl[i].element_size_cnt = - PM8001_MPI_QUEUE | (64 << 16) | (0x01<<30); + PM8001_MPI_QUEUE | (pm8001_ha->iomb_size << 16) | (0x01<<30); pm8001_ha->outbnd_q_tbl[i].upper_base_addr = pm8001_ha->memoryMap.region[OB + i].phys_addr_hi; pm8001_ha->outbnd_q_tbl[i].lower_base_addr = diff --git a/drivers/scsi/pm8001/pm80xx_hwi.c b/drivers/scsi/pm8001/pm80xx_hwi.c index 302514d8157b..e1c48961ceac 100644 --- a/drivers/scsi/pm8001/pm80xx_hwi.c +++ b/drivers/scsi/pm8001/pm80xx_hwi.c @@ -275,7 +275,7 @@ static void init_default_table_values(struct pm8001_hba_info *pm8001_ha) for (i = 0; i < PM8001_MAX_SPCV_INB_NUM; i++) { pm8001_ha->inbnd_q_tbl[i].element_pri_size_cnt = - PM8001_MPI_QUEUE | (64 << 16) | (0x00<<30); + PM8001_MPI_QUEUE | (pm8001_ha->iomb_size << 16) | (0x00<<30); pm8001_ha->inbnd_q_tbl[i].upper_base_addr = pm8001_ha->memoryMap.region[IB + i].phys_addr_hi; pm8001_ha->inbnd_q_tbl[i].lower_base_addr = @@ -301,7 +301,7 @@ static void init_default_table_values(struct pm8001_hba_info *pm8001_ha) } for (i = 0; i < PM8001_MAX_SPCV_OUTB_NUM; i++) { pm8001_ha->outbnd_q_tbl[i].element_size_cnt = - PM8001_MPI_QUEUE | (64 << 16) | (0x01<<30); + PM8001_MPI_QUEUE | (pm8001_ha->iomb_size << 16) | (0x01<<30); pm8001_ha->outbnd_q_tbl[i].upper_base_addr = pm8001_ha->memoryMap.region[OB + i].phys_addr_hi; pm8001_ha->outbnd_q_tbl[i].lower_base_addr = From f1179e006ed4315834bdbf0982b40f7029cf0977 Mon Sep 17 00:00:00 2001 From: Kevin Hilman Date: Wed, 14 Aug 2013 16:05:02 -0700 Subject: [PATCH 0673/1368] regmap: Add another missing header for !CONFIG_REGMAP stubs commit 3f0fa9a808f98fa10a18ba2a73f13d65fda990fb upstream. The use of WARN_ON() needs the definitions from bug.h, without it you can get: include/linux/regmap.h: In function 'regmap_write': include/linux/regmap.h:525:2: error: implicit declaration of function 'WARN_ONCE' [-Werror=implicit-function-declaration] Signed-off-by: Kevin Hilman Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- include/linux/regmap.h | 1 + 1 file changed, 1 insertion(+) diff --git a/include/linux/regmap.h b/include/linux/regmap.h index f91bb416122d..98c470ced989 100644 --- a/include/linux/regmap.h +++ b/include/linux/regmap.h @@ -16,6 +16,7 @@ #include #include #include +#include struct module; struct device; From b3772c81e3490f1ddc0547ee479598f3f4221699 Mon Sep 17 00:00:00 2001 From: Nathan Zimmer Date: Wed, 28 Aug 2013 16:35:14 -0700 Subject: [PATCH 0674/1368] timer_list: correct the iterator for timer_list commit 84a78a6504f5c5394a8e558702e5b54131f01d14 upstream. Correct an issue with /proc/timer_list reported by Holger. When reading from the proc file with a sufficiently small buffer, 2k so not really that small, there was one could get hung trying to read the file a chunk at a time. The timer_list_start function failed to account for the possibility that the offset was adjusted outside the timer_list_next. Signed-off-by: Nathan Zimmer Reported-by: Holger Hans Peter Freyther Cc: John Stultz Cc: Thomas Gleixner Cc: Berke Durak Cc: Jeff Layton Tested-by: Al Viro Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- kernel/time/timer_list.c | 43 +++++++++++++++++++++++----------------- 1 file changed, 25 insertions(+), 18 deletions(-) diff --git a/kernel/time/timer_list.c b/kernel/time/timer_list.c index 3bdf28323012..61ed862cdd37 100644 --- a/kernel/time/timer_list.c +++ b/kernel/time/timer_list.c @@ -265,10 +265,9 @@ static inline void timer_list_header(struct seq_file *m, u64 now) static int timer_list_show(struct seq_file *m, void *v) { struct timer_list_iter *iter = v; - u64 now = ktime_to_ns(ktime_get()); if (iter->cpu == -1 && !iter->second_pass) - timer_list_header(m, now); + timer_list_header(m, iter->now); else if (!iter->second_pass) print_cpu(m, iter->cpu, iter->now); #ifdef CONFIG_GENERIC_CLOCKEVENTS @@ -298,33 +297,41 @@ void sysrq_timer_list_show(void) return; } +static void *move_iter(struct timer_list_iter *iter, loff_t offset) +{ + for (; offset; offset--) { + iter->cpu = cpumask_next(iter->cpu, cpu_online_mask); + if (iter->cpu >= nr_cpu_ids) { +#ifdef CONFIG_GENERIC_CLOCKEVENTS + if (!iter->second_pass) { + iter->cpu = -1; + iter->second_pass = true; + } else + return NULL; +#else + return NULL; +#endif + } + } + return iter; +} + static void *timer_list_start(struct seq_file *file, loff_t *offset) { struct timer_list_iter *iter = file->private; - if (!*offset) { - iter->cpu = -1; + if (!*offset) iter->now = ktime_to_ns(ktime_get()); - } else if (iter->cpu >= nr_cpu_ids) { -#ifdef CONFIG_GENERIC_CLOCKEVENTS - if (!iter->second_pass) { - iter->cpu = -1; - iter->second_pass = true; - } else - return NULL; -#else - return NULL; -#endif - } - return iter; + iter->cpu = -1; + iter->second_pass = false; + return move_iter(iter, *offset); } static void *timer_list_next(struct seq_file *file, void *v, loff_t *offset) { struct timer_list_iter *iter = file->private; - iter->cpu = cpumask_next(iter->cpu, cpu_online_mask); ++*offset; - return timer_list_start(file, offset); + return move_iter(iter, 1); } static void timer_list_stop(struct seq_file *seq, void *v) From 07245f175c74d7d50663dc6758094c683c07f510 Mon Sep 17 00:00:00 2001 From: Svenning Soerensen Date: Wed, 28 Aug 2013 16:35:17 -0700 Subject: [PATCH 0675/1368] IPC: bugfix for msgrcv with msgtyp < 0 commit 368ae537e056acd3f751fa276f48423f06803922 upstream. According to 'man msgrcv': "If msgtyp is less than 0, the first message of the lowest type that is less than or equal to the absolute value of msgtyp shall be received." Bug: The kernel only returns a message if its type is 1; other messages with type < abs(msgtype) will never get returned. Fix: After having traversed the list to find the first message with the lowest type, we need to actually return that message. This regression was introduced by commit daaf74cf0867 ("ipc: refactor msg list search into separate function") Signed-off-by: Svenning Soerensen Reviewed-by: Peter Hurley Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- ipc/msg.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/ipc/msg.c b/ipc/msg.c index d0c6d967b390..f8fbe2c095ce 100644 --- a/ipc/msg.c +++ b/ipc/msg.c @@ -795,7 +795,7 @@ static inline void free_copy(struct msg_msg *copy) static struct msg_msg *find_msg(struct msg_queue *msq, long *msgtyp, int mode) { - struct msg_msg *msg; + struct msg_msg *msg, *found = NULL; long count = 0; list_for_each_entry(msg, &msq->q_messages, m_list) { @@ -804,6 +804,7 @@ static struct msg_msg *find_msg(struct msg_queue *msq, long *msgtyp, int mode) *msgtyp, mode)) { if (mode == SEARCH_LESSEQUAL && msg->m_type != 1) { *msgtyp = msg->m_type - 1; + found = msg; } else if (mode == SEARCH_NUMBER) { if (*msgtyp == count) return msg; @@ -813,7 +814,7 @@ static struct msg_msg *find_msg(struct msg_queue *msq, long *msgtyp, int mode) } } - return ERR_PTR(-EAGAIN); + return found ?: ERR_PTR(-EAGAIN); } From ea35d7d69ee590d44aba3f41dea8b492dce0ca72 Mon Sep 17 00:00:00 2001 From: Russ Anderson Date: Wed, 28 Aug 2013 16:35:18 -0700 Subject: [PATCH 0676/1368] drivers/base/memory.c: fix show_mem_removable() to handle missing sections commit 21ea9f5ace3a7317cc3ba1fbc749758021a83136 upstream. "cat /sys/devices/system/memory/memory*/removable" crashed the system. The problem is that show_mem_removable() is passing a bad pfn to is_mem_section_removable(), which causes if (!node_online(page_to_nid(page))) to blow up. Why is it passing in a bad pfn? The reason is that show_mem_removable() will loop sections_per_block times. sections_per_block is 16, but mem->section_count is 8, indicating holes in this memory block. Checking that the memory section is present before checking to see if the memory section is removable fixes the problem. harp5-sys:~ # cat /sys/devices/system/memory/memory*/removable 0 1 1 1 1 1 1 1 1 1 1 1 1 1 BUG: unable to handle kernel paging request at ffffea00c3200000 IP: [] is_pageblock_removable_nolock+0x1/0x90 PGD 83ffd4067 PUD 37bdfce067 PMD 0 Oops: 0000 [#1] SMP Modules linked in: autofs4 binfmt_misc rdma_ucm rdma_cm iw_cm ib_addr ib_srp scsi_transport_srp scsi_tgt ib_ipoib ib_cm ib_uverbs ib_umad iw_cxgb3 cxgb3 mdio mlx4_en mlx4_ib ib_sa mlx4_core ib_mthca ib_mad ib_core fuse nls_iso8859_1 nls_cp437 vfat fat joydev loop hid_generic usbhid hid hwperf(O) numatools(O) dm_mod iTCO_wdt ipv6 iTCO_vendor_support igb i2c_i801 ioatdma i2c_algo_bit ehci_pci pcspkr lpc_ich i2c_core ehci_hcd ptp sg mfd_core dca rtc_cmos pps_core mperf button xhci_hcd sd_mod crc_t10dif usbcore usb_common scsi_dh_emc scsi_dh_hp_sw scsi_dh_alua scsi_dh_rdac scsi_dh gru(O) xvma(O) xfs crc32c libcrc32c thermal sata_nv processor piix mptsas mptscsih scsi_transport_sas mptbase megaraid_sas fan thermal_sys hwmon ext3 jbd ata_piix ahci libahci libata scsi_mod CPU: 4 PID: 5991 Comm: cat Tainted: G O 3.11.0-rc5-rja-uv+ #10 Hardware name: SGI UV2000/ROMLEY, BIOS SGI UV 2000/3000 series BIOS 01/15/2013 task: ffff88081f034580 ti: ffff880820022000 task.ti: ffff880820022000 RIP: 0010:[] [] is_pageblock_removable_nolock+0x1/0x90 RSP: 0018:ffff880820023df8 EFLAGS: 00010287 RAX: 0000000000040000 RBX: ffffea00c3200000 RCX: 0000000000000004 RDX: ffffea00c30b0000 RSI: 00000000001c0000 RDI: ffffea00c3200000 RBP: ffff880820023e38 R08: 0000000000000000 R09: 0000000000000001 R10: 0000000000000000 R11: 0000000000000001 R12: ffffea00c33c0000 R13: 0000160000000000 R14: 6db6db6db6db6db7 R15: 0000000000000001 FS: 00007ffff7fb2700(0000) GS:ffff88083fc80000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: ffffea00c3200000 CR3: 000000081b954000 CR4: 00000000000407e0 Call Trace: show_mem_removable+0x41/0x70 dev_attr_show+0x2a/0x60 sysfs_read_file+0xf7/0x1c0 vfs_read+0xc8/0x130 SyS_read+0x5d/0xa0 system_call_fastpath+0x16/0x1b Signed-off-by: Russ Anderson Cc: "Rafael J. Wysocki" Cc: Yinghai Lu Reviewed-by: Yasuaki Ishimatsu Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- drivers/base/memory.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/base/memory.c b/drivers/base/memory.c index 14f8a6954da0..86abbff912ec 100644 --- a/drivers/base/memory.c +++ b/drivers/base/memory.c @@ -152,6 +152,8 @@ static ssize_t show_mem_removable(struct device *dev, container_of(dev, struct memory_block, dev); for (i = 0; i < sections_per_block; i++) { + if (!present_section_nr(mem->start_section_nr + i)) + continue; pfn = section_nr_to_pfn(mem->start_section_nr + i); ret &= is_mem_section_removable(pfn, PAGES_PER_SECTION); } From a0d9a083e450d581b98707bdb6837f55554e673b Mon Sep 17 00:00:00 2001 From: Andrey Vagin Date: Wed, 28 Aug 2013 16:35:20 -0700 Subject: [PATCH 0677/1368] memcg: check that kmem_cache has memcg_params before accessing it commit 6f6b8951897e487ea6f77b90ea01f70a9c363770 upstream. If the system had a few memory groups and all of them were destroyed, memcg_limited_groups_array_size has non-zero value, but all new caches are created without memcg_params, because memcg_kmem_enabled() returns false. We try to enumirate child caches in a few places and all of them are potentially dangerous. For example my kernel is compiled with CONFIG_SLAB and it crashed when I tryed to mount a NFS share after a few experiments with kmemcg. BUG: unable to handle kernel NULL pointer dereference at 0000000000000008 IP: [] do_tune_cpucache+0x8a/0xd0 PGD b942a067 PUD b999f067 PMD 0 Oops: 0000 [#1] SMP Modules linked in: fscache(+) ip6table_filter ip6_tables iptable_filter ip_tables i2c_piix4 pcspkr virtio_net virtio_balloon i2c_core floppy CPU: 0 PID: 357 Comm: modprobe Not tainted 3.11.0-rc7+ #59 Hardware name: Bochs Bochs, BIOS Bochs 01/01/2011 task: ffff8800b9f98240 ti: ffff8800ba32e000 task.ti: ffff8800ba32e000 RIP: 0010:[] [] do_tune_cpucache+0x8a/0xd0 RSP: 0018:ffff8800ba32fb70 EFLAGS: 00010246 RAX: 0000000000000000 RBX: 0000000000000000 RCX: 0000000000000006 RDX: 0000000000000000 RSI: ffff8800b9f98910 RDI: 0000000000000246 RBP: ffff8800ba32fba0 R08: 0000000000000002 R09: 0000000000000004 R10: 0000000000000001 R11: 0000000000000001 R12: 0000000000000010 R13: 0000000000000008 R14: 00000000000000d0 R15: ffff8800375d0200 FS: 00007f55f1378740(0000) GS:ffff8800bfa00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 000000008005003b CR2: 00007f24feba57a0 CR3: 0000000037b51000 CR4: 00000000000006f0 Call Trace: enable_cpucache+0x49/0x100 setup_cpu_cache+0x215/0x280 __kmem_cache_create+0x2fa/0x450 kmem_cache_create_memcg+0x214/0x350 kmem_cache_create+0x2b/0x30 fscache_init+0x19b/0x230 [fscache] do_one_initcall+0xfa/0x1b0 load_module+0x1c41/0x26d0 SyS_finit_module+0x86/0xb0 system_call_fastpath+0x16/0x1b Signed-off-by: Andrey Vagin Cc: Pekka Enberg Cc: Christoph Lameter Cc: Glauber Costa Cc: Joonsoo Kim Cc: Michal Hocko Cc: Johannes Weiner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- mm/slab.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/mm/slab.h b/mm/slab.h index f96b49e4704e..4d6d836247dd 100644 --- a/mm/slab.h +++ b/mm/slab.h @@ -162,6 +162,8 @@ static inline const char *cache_name(struct kmem_cache *s) static inline struct kmem_cache *cache_from_memcg(struct kmem_cache *s, int idx) { + if (!s->memcg_params) + return NULL; return s->memcg_params->memcg_caches[idx]; } From 6ff96f7340e8a0b7b7e2c40a26bc47fb320e6475 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 28 Aug 2013 17:33:37 -0400 Subject: [PATCH 0678/1368] workqueue: cond_resched() after processing each work item commit b22ce2785d97423846206cceec4efee0c4afd980 upstream. If !PREEMPT, a kworker running work items back to back can hog CPU. This becomes dangerous when a self-requeueing work item which is waiting for something to happen races against stop_machine. Such self-requeueing work item would requeue itself indefinitely hogging the kworker and CPU it's running on while stop_machine would wait for that CPU to enter stop_machine while preventing anything else from happening on all other CPUs. The two would deadlock. Jamie Liu reports that this deadlock scenario exists around scsi_requeue_run_queue() and libata port multiplier support, where one port may exclude command processing from other ports. With the right timing, scsi_requeue_run_queue() can end up requeueing itself trying to execute an IO which is asked to be retried while another device has an exclusive access, which in turn can't make forward progress due to stop_machine. Fix it by invoking cond_resched() after executing each work item. Signed-off-by: Tejun Heo Reported-by: Jamie Liu References: http://thread.gmane.org/gmane.linux.kernel/1552567 Signed-off-by: Greg Kroah-Hartman --- kernel/workqueue.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 6f019219ddbd..e52d002d3893 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -2188,6 +2188,15 @@ __acquires(&pool->lock) dump_stack(); } + /* + * The following prevents a kworker from hogging CPU on !PREEMPT + * kernels, where a requeueing work item waiting for something to + * happen could deadlock with stop_machine as such work item could + * indefinitely requeue itself while all other CPUs are trapped in + * stop_machine. + */ + cond_resched(); + spin_lock_irq(&pool->lock); /* clear cpu intensive status */ From 9c3a5a86636175686a547d6022a50373cc75adbc Mon Sep 17 00:00:00 2001 From: Jakob Bornecrantz Date: Thu, 29 Aug 2013 02:32:53 +0200 Subject: [PATCH 0679/1368] drm/vmwgfx: Split GMR2_REMAP commands if they are to large commit 6e4dcff3adbf25acb87e74500a58e3c07bdec40f upstream. This fixes the piglit test texturing/max-texture-size causing the VM to die due to a too large SVGA command. Signed-off-by: Jakob Bornecrantz Reviewed-by: Biran Paul Reviewed-by: Zack Rusin Signed-off-by: Dave Airlie Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/vmwgfx/vmwgfx_gmr.c | 58 +++++++++++++++++++---------- 1 file changed, 39 insertions(+), 19 deletions(-) diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_gmr.c b/drivers/gpu/drm/vmwgfx/vmwgfx_gmr.c index 3751730764a5..1a0bf07fe54b 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_gmr.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_gmr.c @@ -29,7 +29,9 @@ #include #include -#define VMW_PPN_SIZE sizeof(unsigned long) +#define VMW_PPN_SIZE (sizeof(unsigned long)) +/* A future safe maximum remap size. */ +#define VMW_PPN_PER_REMAP ((31 * 1024) / VMW_PPN_SIZE) static int vmw_gmr2_bind(struct vmw_private *dev_priv, struct page *pages[], @@ -38,43 +40,61 @@ static int vmw_gmr2_bind(struct vmw_private *dev_priv, { SVGAFifoCmdDefineGMR2 define_cmd; SVGAFifoCmdRemapGMR2 remap_cmd; - uint32_t define_size = sizeof(define_cmd) + 4; - uint32_t remap_size = VMW_PPN_SIZE * num_pages + sizeof(remap_cmd) + 4; uint32_t *cmd; uint32_t *cmd_orig; + uint32_t define_size = sizeof(define_cmd) + sizeof(*cmd); + uint32_t remap_num = num_pages / VMW_PPN_PER_REMAP + ((num_pages % VMW_PPN_PER_REMAP) > 0); + uint32_t remap_size = VMW_PPN_SIZE * num_pages + (sizeof(remap_cmd) + sizeof(*cmd)) * remap_num; + uint32_t remap_pos = 0; + uint32_t cmd_size = define_size + remap_size; uint32_t i; - cmd_orig = cmd = vmw_fifo_reserve(dev_priv, define_size + remap_size); + cmd_orig = cmd = vmw_fifo_reserve(dev_priv, cmd_size); if (unlikely(cmd == NULL)) return -ENOMEM; define_cmd.gmrId = gmr_id; define_cmd.numPages = num_pages; + *cmd++ = SVGA_CMD_DEFINE_GMR2; + memcpy(cmd, &define_cmd, sizeof(define_cmd)); + cmd += sizeof(define_cmd) / sizeof(*cmd); + + /* + * Need to split the command if there are too many + * pages that goes into the gmr. + */ + remap_cmd.gmrId = gmr_id; remap_cmd.flags = (VMW_PPN_SIZE > sizeof(*cmd)) ? SVGA_REMAP_GMR2_PPN64 : SVGA_REMAP_GMR2_PPN32; - remap_cmd.offsetPages = 0; - remap_cmd.numPages = num_pages; - *cmd++ = SVGA_CMD_DEFINE_GMR2; - memcpy(cmd, &define_cmd, sizeof(define_cmd)); - cmd += sizeof(define_cmd) / sizeof(uint32); + while (num_pages > 0) { + unsigned long nr = min(num_pages, (unsigned long)VMW_PPN_PER_REMAP); - *cmd++ = SVGA_CMD_REMAP_GMR2; - memcpy(cmd, &remap_cmd, sizeof(remap_cmd)); - cmd += sizeof(remap_cmd) / sizeof(uint32); + remap_cmd.offsetPages = remap_pos; + remap_cmd.numPages = nr; - for (i = 0; i < num_pages; ++i) { - if (VMW_PPN_SIZE <= 4) - *cmd = page_to_pfn(*pages++); - else - *((uint64_t *)cmd) = page_to_pfn(*pages++); + *cmd++ = SVGA_CMD_REMAP_GMR2; + memcpy(cmd, &remap_cmd, sizeof(remap_cmd)); + cmd += sizeof(remap_cmd) / sizeof(*cmd); - cmd += VMW_PPN_SIZE / sizeof(*cmd); + for (i = 0; i < nr; ++i) { + if (VMW_PPN_SIZE <= 4) + *cmd = page_to_pfn(*pages++); + else + *((uint64_t *)cmd) = page_to_pfn(*pages++); + + cmd += VMW_PPN_SIZE / sizeof(*cmd); + } + + num_pages -= nr; + remap_pos += nr; } - vmw_fifo_commit(dev_priv, define_size + remap_size); + BUG_ON(cmd != cmd_orig + cmd_size / sizeof(*cmd)); + + vmw_fifo_commit(dev_priv, cmd_size); return 0; } From 4067d65ec039bf5fc175c123c588d09380e72fdc Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Fri, 23 Aug 2013 23:50:23 +0300 Subject: [PATCH 0680/1368] drm/i915: ivb: fix edp voltage swing reg val commit 77fa4cbd5fa389e28419bbe8ac491b5fdd54840d upstream. Fix the typo introduced in commit 1a2eb4604b85c5efb343da8a4dcf41288fcfca85 Author: Keith Packard Date: Wed Nov 16 16:26:07 2011 -0800 drm/i915: Hook up Ivybridge eDP This fixes eDP link-training failures and cases where all voltage swing /pre-emphasis levels were tried and failed during clock recovery and - as a fallback - we go on to do channel equalization with the last voltage swing/pre-emphasis level which will succeed. Both issues can lead to a blank screen. v2: - improve commit message Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=64880 Tested-by: Jeremy Moles Signed-off-by: Imre Deak Reviewed-by: Paulo Zanoni Signed-off-by: Daniel Vetter Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/i915/i915_reg.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 01f6c2cf471f..a365780aeb1e 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -4246,7 +4246,7 @@ #define EDP_LINK_TRAIN_600MV_0DB_IVB (0x30 <<22) #define EDP_LINK_TRAIN_600MV_3_5DB_IVB (0x36 <<22) #define EDP_LINK_TRAIN_800MV_0DB_IVB (0x38 <<22) -#define EDP_LINK_TRAIN_800MV_3_5DB_IVB (0x33 <<22) +#define EDP_LINK_TRAIN_800MV_3_5DB_IVB (0x3e <<22) /* legacy values */ #define EDP_LINK_TRAIN_500MV_0DB_IVB (0x00 <<22) From 344033d4079c22c92581675d7416c748d9d8201b Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 28 Aug 2013 13:35:13 -0400 Subject: [PATCH 0681/1368] SUNRPC: Fix memory corruption issue on 32-bit highmem systems commit 347e2233b7667e336d9f671f1a52dfa3f0416e2c upstream. Some architectures, such as ARM-32 do not return the same base address when you call kmap_atomic() twice on the same page. This causes problems for the memmove() call in the XDR helper routine "_shift_data_right_pages()", since it defeats the detection of overlapping memory ranges, and has been seen to corrupt memory. The fix is to distinguish between the case where we're doing an inter-page copy or not. In the former case of we know that the memory ranges cannot possibly overlap, so we can additionally micro-optimise by replacing memmove() with memcpy(). Reported-by: Mark Young Reported-by: Matt Craighead Cc: Bruce Fields Signed-off-by: Trond Myklebust Tested-by: Matt Craighead Signed-off-by: Greg Kroah-Hartman --- net/sunrpc/xdr.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c index 75edcfad6e26..1504bb11e4f3 100644 --- a/net/sunrpc/xdr.c +++ b/net/sunrpc/xdr.c @@ -207,10 +207,13 @@ _shift_data_right_pages(struct page **pages, size_t pgto_base, pgfrom_base -= copy; vto = kmap_atomic(*pgto); - vfrom = kmap_atomic(*pgfrom); - memmove(vto + pgto_base, vfrom + pgfrom_base, copy); + if (*pgto != *pgfrom) { + vfrom = kmap_atomic(*pgfrom); + memcpy(vto + pgto_base, vfrom + pgfrom_base, copy); + kunmap_atomic(vfrom); + } else + memmove(vto + pgto_base, vto + pgfrom_base, copy); flush_dcache_page(*pgto); - kunmap_atomic(vfrom); kunmap_atomic(vto); } while ((len -= copy) != 0); From 7c886152ed0b4fdfc4412f0a28ad68500b552660 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Mon, 12 Aug 2013 16:43:24 -0700 Subject: [PATCH 0682/1368] x86/mm: Fix boot crash with DEBUG_PAGE_ALLOC=y and more than 512G RAM commit 527bf129f9a780e11b251cf2467dc30118a57d16 upstream. Dave Hansen reported that systems between 500G and 600G RAM crash early if DEBUG_PAGEALLOC is selected. > [ 0.000000] init_memory_mapping: [mem 0x00000000-0x000fffff] > [ 0.000000] [mem 0x00000000-0x000fffff] page 4k > [ 0.000000] BRK [0x02086000, 0x02086fff] PGTABLE > [ 0.000000] BRK [0x02087000, 0x02087fff] PGTABLE > [ 0.000000] BRK [0x02088000, 0x02088fff] PGTABLE > [ 0.000000] init_memory_mapping: [mem 0xe80ee00000-0xe80effffff] > [ 0.000000] [mem 0xe80ee00000-0xe80effffff] page 4k > [ 0.000000] BRK [0x02089000, 0x02089fff] PGTABLE > [ 0.000000] BRK [0x0208a000, 0x0208afff] PGTABLE > [ 0.000000] Kernel panic - not syncing: alloc_low_page: ran out of memory It turns out that we missed increasing needed pages in BRK to mapping initial 2M and [0,1M) when we switched to use the #PF handler to set memory mappings: > commit 8170e6bed465b4b0c7687f93e9948aca4358a33b > Author: H. Peter Anvin > Date: Thu Jan 24 12:19:52 2013 -0800 > > x86, 64bit: Use a #PF handler to materialize early mappings on demand Before that, we had the maping from [0,512M) in head_64.S, and we can spare two pages [0-1M). After that change, we can not reuse pages anymore. When we have more than 512M ram, we need an extra page for pgd page with [512G, 1024g). Increase pages in BRK for page table to solve the boot crash. Reported-by: Dave Hansen Bisected-by: Dave Hansen Tested-by: Dave Hansen Signed-off-by: Yinghai Lu Link: http://lkml.kernel.org/r/1376351004-4015-1-git-send-email-yinghai@kernel.org Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- arch/x86/mm/init.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c index 1f34e9219775..7a5bf1b76e2f 100644 --- a/arch/x86/mm/init.c +++ b/arch/x86/mm/init.c @@ -78,8 +78,8 @@ __ref void *alloc_low_pages(unsigned int num) return __va(pfn << PAGE_SHIFT); } -/* need 4 4k for initial PMD_SIZE, 4k for 0-ISA_END_ADDRESS */ -#define INIT_PGT_BUF_SIZE (5 * PAGE_SIZE) +/* need 3 4k for initial PMD_SIZE, 3 4k for 0-ISA_END_ADDRESS */ +#define INIT_PGT_BUF_SIZE (6 * PAGE_SIZE) RESERVE_BRK(early_pgt_alloc, INIT_PGT_BUF_SIZE); void __init early_alloc_pgt_buf(void) { From a6dbf50f5b305c51443657f91eabfc2521fa34a9 Mon Sep 17 00:00:00 2001 From: Helmut Schaa Date: Fri, 16 Aug 2013 21:39:40 +0200 Subject: [PATCH 0683/1368] ath9k_htc: Restore skb headroom when returning skb to mac80211 commit d2e9fc141e2aa21f4b35ee27072d84e9aa6e2ba0 upstream. ath9k_htc adds padding between the 802.11 header and the payload during TX by moving the header. When handing the frame back to mac80211 for TX status handling the header is not moved back into its original position. This can result in a too small skb headroom when entering ath9k_htc again (due to a soft retransmission for example) causing an skb_under_panic oops. Fix this by moving the 802.11 header back into its original position before returning the frame to mac80211 as other drivers like rt2x00 or ath5k do. Reported-by: Marc Kleine-Budde Signed-off-by: Helmut Schaa Tested-by: Marc Kleine-Budde Signed-off-by: Marc Kleine-Budde Signed-off-by: John W. Linville Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/ath/ath9k/htc_drv_txrx.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/net/wireless/ath/ath9k/htc_drv_txrx.c b/drivers/net/wireless/ath/ath9k/htc_drv_txrx.c index 6bd0e92ea2aa..417a089cd7e1 100644 --- a/drivers/net/wireless/ath/ath9k/htc_drv_txrx.c +++ b/drivers/net/wireless/ath/ath9k/htc_drv_txrx.c @@ -448,6 +448,7 @@ static void ath9k_htc_tx_process(struct ath9k_htc_priv *priv, struct ieee80211_conf *cur_conf = &priv->hw->conf; bool txok; int slot; + int hdrlen, padsize; slot = strip_drv_header(priv, skb); if (slot < 0) { @@ -504,6 +505,15 @@ static void ath9k_htc_tx_process(struct ath9k_htc_priv *priv, ath9k_htc_tx_clear_slot(priv, slot); + /* Remove padding before handing frame back to mac80211 */ + hdrlen = ieee80211_get_hdrlen_from_skb(skb); + + padsize = hdrlen & 3; + if (padsize && skb->len > hdrlen + padsize) { + memmove(skb->data + padsize, skb->data, hdrlen); + skb_pull(skb, padsize); + } + /* Send status to mac80211 */ ieee80211_tx_status(priv->hw, skb); } From ece7fd7a00604ee6729e878b4d95971e1b55dfff Mon Sep 17 00:00:00 2001 From: Sujith Manoharan Date: Tue, 20 Aug 2013 10:05:59 +0530 Subject: [PATCH 0684/1368] ath9k: Enable PLL fix only for AR9340/AR9330 commit 19c361608ce3e73f352e323262f7e0a8264be3af upstream. The PLL hang workaround is required only for AR9330 and AR9340. This issue was first observed on an AP121 and the WAR is enabled for AR9340 also (DB120 etc.), since it uses a PLL design identical to AR9330. This is not required for AR9485 and AR9550. Various bugs have been reported regarding this: https://bugzilla.redhat.com/show_bug.cgi?id=997217 https://bugzilla.redhat.com/show_bug.cgi?id=994648 Signed-off-by: Sujith Manoharan Signed-off-by: John W. Linville Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/ath/ath9k/main.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/net/wireless/ath/ath9k/main.c b/drivers/net/wireless/ath/ath9k/main.c index 35ced100c183..e33a659e224b 100644 --- a/drivers/net/wireless/ath/ath9k/main.c +++ b/drivers/net/wireless/ath/ath9k/main.c @@ -173,8 +173,7 @@ static void ath_restart_work(struct ath_softc *sc) { ieee80211_queue_delayed_work(sc->hw, &sc->tx_complete_work, 0); - if (AR_SREV_9340(sc->sc_ah) || AR_SREV_9485(sc->sc_ah) || - AR_SREV_9550(sc->sc_ah)) + if (AR_SREV_9340(sc->sc_ah) || AR_SREV_9330(sc->sc_ah)) ieee80211_queue_delayed_work(sc->hw, &sc->hw_pll_work, msecs_to_jiffies(ATH_PLL_WORK_INTERVAL)); From 543037a3023b41ce13e98f62e1f3c2c591e00c5c Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 20 Aug 2013 11:28:50 +0200 Subject: [PATCH 0685/1368] mac80211: add missing channel context release commit 2a3ba63c235fdcd37f6451bdf4a0c7865a3930cf upstream. IBSS needs to release the channel context when leaving but I evidently missed that. Fix it. Signed-off-by: Johannes Berg Signed-off-by: Greg Kroah-Hartman --- net/mac80211/ibss.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/mac80211/ibss.c b/net/mac80211/ibss.c index 170f9a7fa319..3052672e37f7 100644 --- a/net/mac80211/ibss.c +++ b/net/mac80211/ibss.c @@ -1166,6 +1166,7 @@ int ieee80211_ibss_leave(struct ieee80211_sub_if_data *sdata) clear_bit(SDATA_STATE_OFFCHANNEL_BEACON_STOPPED, &sdata->state); ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_BEACON_ENABLED | BSS_CHANGED_IBSS); + ieee80211_vif_release_channel(sdata); synchronize_rcu(); kfree(presp); From 9ddc34b565615be1ffd267ffd8c826d9ef097179 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Tue, 20 Aug 2013 19:43:54 +0200 Subject: [PATCH 0686/1368] mac80211: add a flag to indicate CCK support for HT clients commit 2dfca312a91631311c1cf7c090246cc8103de038 upstream. brcm80211 cannot handle sending frames with CCK rates as part of an A-MPDU session. Other drivers may have issues too. Set the flag in all drivers that have been tested with CCK rates. This fixes a reported brcmsmac regression introduced in commit ef47a5e4f1aaf1d0e2e6875e34b2c9595897bef6 "mac80211/minstrel_ht: fix cck rate sampling" Reported-by: Tom Gundersen Signed-off-by: Felix Fietkau Signed-off-by: Johannes Berg Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/ath/ath9k/init.c | 3 ++- drivers/net/wireless/ath/carl9170/main.c | 3 ++- drivers/net/wireless/rt2x00/rt2800lib.c | 3 ++- include/net/mac80211.h | 1 + net/mac80211/rc80211_minstrel_ht.c | 3 +++ 5 files changed, 10 insertions(+), 3 deletions(-) diff --git a/drivers/net/wireless/ath/ath9k/init.c b/drivers/net/wireless/ath/ath9k/init.c index 2ba494567777..bd126c25a727 100644 --- a/drivers/net/wireless/ath/ath9k/init.c +++ b/drivers/net/wireless/ath/ath9k/init.c @@ -767,7 +767,8 @@ void ath9k_set_hw_capab(struct ath_softc *sc, struct ieee80211_hw *hw) IEEE80211_HW_PS_NULLFUNC_STACK | IEEE80211_HW_SPECTRUM_MGMT | IEEE80211_HW_REPORTS_TX_ACK_STATUS | - IEEE80211_HW_SUPPORTS_RC_TABLE; + IEEE80211_HW_SUPPORTS_RC_TABLE | + IEEE80211_HW_SUPPORTS_HT_CCK_RATES; if (sc->sc_ah->caps.hw_caps & ATH9K_HW_CAP_HT) hw->flags |= IEEE80211_HW_AMPDU_AGGREGATION; diff --git a/drivers/net/wireless/ath/carl9170/main.c b/drivers/net/wireless/ath/carl9170/main.c index e9010a481dfd..0686375a97b9 100644 --- a/drivers/net/wireless/ath/carl9170/main.c +++ b/drivers/net/wireless/ath/carl9170/main.c @@ -1857,7 +1857,8 @@ void *carl9170_alloc(size_t priv_size) IEEE80211_HW_SUPPORTS_PS | IEEE80211_HW_PS_NULLFUNC_STACK | IEEE80211_HW_NEED_DTIM_BEFORE_ASSOC | - IEEE80211_HW_SIGNAL_DBM; + IEEE80211_HW_SIGNAL_DBM | + IEEE80211_HW_SUPPORTS_HT_CCK_RATES; if (!modparam_noht) { /* diff --git a/drivers/net/wireless/rt2x00/rt2800lib.c b/drivers/net/wireless/rt2x00/rt2800lib.c index 705aa3386ee0..7e66a903cfee 100644 --- a/drivers/net/wireless/rt2x00/rt2800lib.c +++ b/drivers/net/wireless/rt2x00/rt2800lib.c @@ -5912,7 +5912,8 @@ static int rt2800_probe_hw_mode(struct rt2x00_dev *rt2x00dev) IEEE80211_HW_SUPPORTS_PS | IEEE80211_HW_PS_NULLFUNC_STACK | IEEE80211_HW_AMPDU_AGGREGATION | - IEEE80211_HW_REPORTS_TX_ACK_STATUS; + IEEE80211_HW_REPORTS_TX_ACK_STATUS | + IEEE80211_HW_SUPPORTS_HT_CCK_RATES; /* * Don't set IEEE80211_HW_HOST_BROADCAST_PS_BUFFERING for USB devices diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 885898a40d13..4e50d3674431 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -1484,6 +1484,7 @@ enum ieee80211_hw_flags { IEEE80211_HW_SUPPORTS_RC_TABLE = 1<<24, IEEE80211_HW_P2P_DEV_ADDR_FOR_INTF = 1<<25, IEEE80211_HW_TIMING_BEACON_ONLY = 1<<26, + IEEE80211_HW_SUPPORTS_HT_CCK_RATES = 1<<27, }; /** diff --git a/net/mac80211/rc80211_minstrel_ht.c b/net/mac80211/rc80211_minstrel_ht.c index f5aed963b22e..f3bbea1eb9e7 100644 --- a/net/mac80211/rc80211_minstrel_ht.c +++ b/net/mac80211/rc80211_minstrel_ht.c @@ -828,6 +828,9 @@ minstrel_ht_update_cck(struct minstrel_priv *mp, struct minstrel_ht_sta *mi, if (sband->band != IEEE80211_BAND_2GHZ) return; + if (!(mp->hw->flags & IEEE80211_HW_SUPPORTS_HT_CCK_RATES)) + return; + mi->cck_supported = 0; mi->cck_supported_short = 0; for (i = 0; i < 4; i++) { From fde233a59d1d73459804759f280346082cfcc5be Mon Sep 17 00:00:00 2001 From: Stanislaw Gruszka Date: Wed, 21 Aug 2013 10:18:19 +0200 Subject: [PATCH 0687/1368] iwl4965: fix rfkill set state regression commit b2fcc0aee58a3435566dd6d8501a0b355552f28b upstream. My current 3.11 fix: commit 788f7a56fce1bcb2067b62b851a086fca48a0056 Author: Stanislaw Gruszka Date: Thu Aug 1 12:07:55 2013 +0200 iwl4965: reset firmware after rfkill off broke rfkill notification to user-space . I missed that bug, because I compiled without CONFIG_RFKILL, sorry about that. Signed-off-by: Stanislaw Gruszka Signed-off-by: John W. Linville Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/iwlegacy/4965-mac.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/iwlegacy/4965-mac.c b/drivers/net/wireless/iwlegacy/4965-mac.c index 900f5f8c93ee..a8eff95ee677 100644 --- a/drivers/net/wireless/iwlegacy/4965-mac.c +++ b/drivers/net/wireless/iwlegacy/4965-mac.c @@ -4446,9 +4446,9 @@ il4965_irq_tasklet(struct il_priv *il) set_bit(S_RFKILL, &il->status); } else { clear_bit(S_RFKILL, &il->status); - wiphy_rfkill_set_hw_state(il->hw->wiphy, hw_rf_kill); il_force_reset(il, true); } + wiphy_rfkill_set_hw_state(il->hw->wiphy, hw_rf_kill); handled |= CSR_INT_BIT_RF_KILL; } From 95ca0f394bc6fe4127d62745b6ad641dae988c02 Mon Sep 17 00:00:00 2001 From: Nicholas Bellinger Date: Wed, 24 Jul 2013 16:15:08 -0700 Subject: [PATCH 0688/1368] target: Fix trailing ASCII space usage in INQUIRY vendor+model commit ee60bddba5a5f23e39598195d944aa0eb2d455e5 upstream. This patch fixes spc_emulate_inquiry_std() to add trailing ASCII spaces for INQUIRY vendor + model fields following SPC-4 text: "ASCII data fields described as being left-aligned shall have any unused bytes at the end of the field (i.e., highest offset) and the unused bytes shall be filled with ASCII space characters (20h)." This addresses a problem with Falconstor NSS multipathing. Reported-by: Tomas Molota Signed-off-by: Nicholas Bellinger Signed-off-by: Greg Kroah-Hartman --- drivers/target/target_core_spc.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/target/target_core_spc.c b/drivers/target/target_core_spc.c index 4cb667d720a7..9fabbf7214cd 100644 --- a/drivers/target/target_core_spc.c +++ b/drivers/target/target_core_spc.c @@ -97,9 +97,12 @@ spc_emulate_inquiry_std(struct se_cmd *cmd, unsigned char *buf) buf[7] = 0x2; /* CmdQue=1 */ - snprintf(&buf[8], 8, "LIO-ORG"); - snprintf(&buf[16], 16, "%s", dev->t10_wwn.model); - snprintf(&buf[32], 4, "%s", dev->t10_wwn.revision); + memcpy(&buf[8], "LIO-ORG ", 8); + memset(&buf[16], 0x20, 16); + memcpy(&buf[16], dev->t10_wwn.model, + min_t(size_t, strlen(dev->t10_wwn.model), 16)); + memcpy(&buf[32], dev->t10_wwn.revision, + min_t(size_t, strlen(dev->t10_wwn.revision), 4)); buf[4] = 31; /* Set additional length to 31 */ return 0; From ce799c71c8c08ad500f4ad56c3bac2c8ac435d9f Mon Sep 17 00:00:00 2001 From: Nicholas Bellinger Date: Thu, 22 Aug 2013 00:05:45 -0700 Subject: [PATCH 0689/1368] iscsi-target: Fix ImmediateData=Yes failure regression in >= v3.10 commit 9d86a2befceb06ee83c1a588915e6d6e0abef797 upstream. This patch addresses a regression bug within ImmediateData=Yes failure handling that ends up triggering an OOPs within >= v3.10 iscsi-target code. The problem occurs when iscsit_process_scsi_cmd() does the call to target_put_sess_cmd(), and once again in iscsit_get_immediate_data() that is triggered during two different cases: - When iscsit_sequence_cmd() returns CMDSN_LOWER_THAN_EXP, for which the descriptor state will already have been set to ISTATE_REMOVE by iscsit_sequence_cmd(), and - When iscsi_cmd->sense_reason is set, for which iscsit_execute_cmd() will have already called transport_send_check_condition_and_sense() to queue the exception response. It changes iscsit_process_scsi_cmd() to drop the early call, and makes iscsit_get_immediate_data() call target_put_sess_cmd() from a single location after dumping the immediate data for the failed command. The regression was initially introduced in commit: commit 561bf15892375597ee59d473a704a3e634c4f311 Author: Nicholas Bellinger Date: Wed Jul 3 03:58:58 2013 -0700 iscsi-target: Fix iscsit_sequence_cmd reject handling for iser Signed-off-by: Nicholas Bellinger Signed-off-by: Greg Kroah-Hartman --- drivers/target/iscsi/iscsi_target.c | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/drivers/target/iscsi/iscsi_target.c b/drivers/target/iscsi/iscsi_target.c index 012ff8bab0f8..981f5a5479fd 100644 --- a/drivers/target/iscsi/iscsi_target.c +++ b/drivers/target/iscsi/iscsi_target.c @@ -1086,7 +1086,6 @@ int iscsit_process_scsi_cmd(struct iscsi_conn *conn, struct iscsi_cmd *cmd, if (cmd->reject_reason) return 0; - target_put_sess_cmd(conn->sess->se_sess, &cmd->se_cmd); return 1; } /* @@ -1124,14 +1123,10 @@ iscsit_get_immediate_data(struct iscsi_cmd *cmd, struct iscsi_scsi_req *hdr, */ cmdsn_ret = iscsit_sequence_cmd(cmd->conn, cmd, (unsigned char *)hdr, hdr->cmdsn); - if (cmdsn_ret == CMDSN_ERROR_CANNOT_RECOVER) { + if (cmdsn_ret == CMDSN_ERROR_CANNOT_RECOVER) return -1; - } else if (cmdsn_ret == CMDSN_LOWER_THAN_EXP) { - target_put_sess_cmd(conn->sess->se_sess, &cmd->se_cmd); - return 0; - } - if (cmd->sense_reason) { + if (cmd->sense_reason || cmdsn_ret == CMDSN_LOWER_THAN_EXP) { int rc; rc = iscsit_dump_data_payload(cmd->conn, From 258d670469ce3b3e8d465c8a503406181e7bede0 Mon Sep 17 00:00:00 2001 From: Nicholas Bellinger Date: Sun, 18 Aug 2013 15:07:44 -0700 Subject: [PATCH 0690/1368] iscsi-target: Fix iscsit_transport reference leak during NP thread reset commit c9a03c12464c851e691e8d5b6c9deba779c512e0 upstream. This patch fixes a bug in __iscsi_target_login_thread() where an explicit network portal thread reset ends up leaking the iscsit_transport module reference, along with the associated iscsi_conn allocation. This manifests itself with iser-target where a NP reset causes the extra iscsit_transport reference to be taken in iscsit_conn_set_transport() during the reset, which prevents the ib_isert module from being unloaded after the NP thread shutdown has finished. Signed-off-by: Nicholas Bellinger Signed-off-by: Greg Kroah-Hartman --- drivers/target/iscsi/iscsi_target_login.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/drivers/target/iscsi/iscsi_target_login.c b/drivers/target/iscsi/iscsi_target_login.c index 3402241be87c..bc788c52b6cc 100644 --- a/drivers/target/iscsi/iscsi_target_login.c +++ b/drivers/target/iscsi/iscsi_target_login.c @@ -1163,12 +1163,11 @@ static int __iscsi_target_login_thread(struct iscsi_np *np) if (np->np_thread_state == ISCSI_NP_THREAD_RESET) { spin_unlock_bh(&np->np_thread_lock); complete(&np->np_restart_comp); - if (ret == -ENODEV) { - iscsit_put_transport(conn->conn_transport); - kfree(conn); - conn = NULL; + iscsit_put_transport(conn->conn_transport); + kfree(conn); + conn = NULL; + if (ret == -ENODEV) goto out; - } /* Get another socket */ return 1; } From 382e9ff11306c83aa73219a4749e55c9146205cf Mon Sep 17 00:00:00 2001 From: Nicholas Bellinger Date: Fri, 23 Aug 2013 22:28:56 -0700 Subject: [PATCH 0691/1368] iscsi-target: Fix potential NULL pointer in solicited NOPOUT reject commit 28aaa950320fc7b8df3f6d2d34fa7833391a9b72 upstream. This patch addresses a potential NULL pointer dereference regression in iscsit_setup_nop_out() code, specifically for two cases when a solicited NOPOUT triggers a ISCSI_REASON_PROTOCOL_ERROR reject to be generated. This is because iscsi_cmd is expected to be NULL for solicited NOPOUT case before iscsit_process_nop_out() locates the descriptor via TTT using iscsit_find_cmd_from_ttt(). This regression was originally introduced in: commit ba159914086f06532079fc15141f46ffe7e04a41 Author: Nicholas Bellinger Date: Wed Jul 3 03:48:24 2013 -0700 iscsi-target: Fix iscsit_add_reject* usage for iser Signed-off-by: Nicholas Bellinger Signed-off-by: Greg Kroah-Hartman --- drivers/target/iscsi/iscsi_target.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/target/iscsi/iscsi_target.c b/drivers/target/iscsi/iscsi_target.c index 981f5a5479fd..4c1b8dbdcb36 100644 --- a/drivers/target/iscsi/iscsi_target.c +++ b/drivers/target/iscsi/iscsi_target.c @@ -1536,6 +1536,10 @@ int iscsit_handle_nop_out(struct iscsi_conn *conn, struct iscsi_cmd *cmd, if (hdr->itt == RESERVED_ITT && !(hdr->opcode & ISCSI_OP_IMMEDIATE)) { pr_err("NOPOUT ITT is reserved, but Immediate Bit is" " not set, protocol error.\n"); + if (!cmd) + return iscsit_add_reject(conn, ISCSI_REASON_PROTOCOL_ERROR, + (unsigned char *)hdr); + return iscsit_reject_cmd(cmd, ISCSI_REASON_PROTOCOL_ERROR, (unsigned char *)hdr); } @@ -1545,6 +1549,10 @@ int iscsit_handle_nop_out(struct iscsi_conn *conn, struct iscsi_cmd *cmd, " greater than MaxXmitDataSegmentLength: %u, protocol" " error.\n", payload_length, conn->conn_ops->MaxXmitDataSegmentLength); + if (!cmd) + return iscsit_add_reject(conn, ISCSI_REASON_PROTOCOL_ERROR, + (unsigned char *)hdr); + return iscsit_reject_cmd(cmd, ISCSI_REASON_PROTOCOL_ERROR, (unsigned char *)hdr); } From 9c289c0c140b31ce6e3b29898fd1deb80ec04b88 Mon Sep 17 00:00:00 2001 From: Tomas Winkler Date: Tue, 30 Jul 2013 14:11:51 +0300 Subject: [PATCH 0692/1368] mei: me: fix hardware reset flow commit ff96066e3171acdea356b331163495957cb833d0 upstream. Both H_IS and H_IE needs to be set to receive H_RDY interrupt 1. Assert H_IS to clear the interrupts during hw reset and use mei_me_reg_write instead of mei_hcsr_set as the later strips down the H_IS 2. fix interrupt disablement embarrassing typo hcsr |= ~H_IE -> hcsr &= ~H_IE; this will remove the unwanted interrupt on power down 3. remove useless debug print outs Signed-off-by: Tomas Winkler Cc: Shuah Khan Cc: Konstantin Khlebnikov Signed-off-by: Greg Kroah-Hartman --- drivers/misc/mei/hw-me.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/drivers/misc/mei/hw-me.c b/drivers/misc/mei/hw-me.c index 700fe55095ba..1bf3f8b5ce3a 100644 --- a/drivers/misc/mei/hw-me.c +++ b/drivers/misc/mei/hw-me.c @@ -176,16 +176,14 @@ static void mei_me_hw_reset(struct mei_device *dev, bool intr_enable) struct mei_me_hw *hw = to_me_hw(dev); u32 hcsr = mei_hcsr_read(hw); - dev_dbg(&dev->pdev->dev, "before reset HCSR = 0x%08x.\n", hcsr); - - hcsr |= (H_RST | H_IG); + hcsr |= H_RST | H_IG | H_IS; if (intr_enable) hcsr |= H_IE; else - hcsr |= ~H_IE; + hcsr &= ~H_IE; - mei_hcsr_set(hw, hcsr); + mei_me_reg_write(hw, H_CSR, hcsr); if (dev->dev_state == MEI_DEV_POWER_DOWN) mei_me_hw_reset_release(dev); From ea599655d74e7b00caf83a0d5c056b00d19d7c2a Mon Sep 17 00:00:00 2001 From: Graham Williams Date: Wed, 28 Aug 2013 16:36:14 -0700 Subject: [PATCH 0693/1368] usb: acm gadget: Null termintate strings table commit d257221854f0b34cca3247e6c45344d0470f7398 upstream. The gadget strings table should be null terminated. usb_gadget_get_string() loops through the table expecting a null at the end of the list. Signed-off-by: Graham Williams Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/f_acm.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/usb/gadget/f_acm.c b/drivers/usb/gadget/f_acm.c index 4b7e33e5d9c6..ab1065afbbd0 100644 --- a/drivers/usb/gadget/f_acm.c +++ b/drivers/usb/gadget/f_acm.c @@ -285,6 +285,7 @@ static struct usb_string acm_string_defs[] = { [ACM_CTRL_IDX].s = "CDC Abstract Control Model (ACM)", [ACM_DATA_IDX].s = "CDC ACM Data", [ACM_IAD_IDX ].s = "CDC Serial", + { } /* end of list */ }; static struct usb_gadget_strings acm_string_table = { From c2fcdfd1a93802ad38b4076a18034f30c1aee18f Mon Sep 17 00:00:00 2001 From: Wei Hu Date: Fri, 23 Aug 2013 13:14:03 -0700 Subject: [PATCH 0694/1368] hwmon: (k10temp) Add support for Fam16h (Kabini) commit 30b146d1cb5e7560192057098eb705118bd5511f upstream. The temperature reporting interface stays the same, so we just add the PCI-ID to the list. Verified on AMD Olive Hill. Signed-off-by: Wei Hu Acked-by: Clemens Ladisch Signed-off-by: Guenter Roeck Signed-off-by: Greg Kroah-Hartman --- Documentation/hwmon/k10temp | 1 + drivers/hwmon/Kconfig | 4 ++-- drivers/hwmon/k10temp.c | 3 ++- 3 files changed, 5 insertions(+), 3 deletions(-) diff --git a/Documentation/hwmon/k10temp b/Documentation/hwmon/k10temp index 90956b618025..4dfdc8f83633 100644 --- a/Documentation/hwmon/k10temp +++ b/Documentation/hwmon/k10temp @@ -12,6 +12,7 @@ Supported chips: * AMD Family 12h processors: "Llano" (E2/A4/A6/A8-Series) * AMD Family 14h processors: "Brazos" (C/E/G/Z-Series) * AMD Family 15h processors: "Bulldozer" (FX-Series), "Trinity" +* AMD Family 16h processors: "Kabini" Prefix: 'k10temp' Addresses scanned: PCI space diff --git a/drivers/hwmon/Kconfig b/drivers/hwmon/Kconfig index 0428e8a74b19..df064e8cd9dc 100644 --- a/drivers/hwmon/Kconfig +++ b/drivers/hwmon/Kconfig @@ -296,8 +296,8 @@ config SENSORS_K10TEMP If you say yes here you get support for the temperature sensor(s) inside your CPU. Supported are later revisions of the AMD Family 10h and all revisions of the AMD Family 11h, - 12h (Llano), 14h (Brazos) and 15h (Bulldozer/Trinity) - microarchitectures. + 12h (Llano), 14h (Brazos), 15h (Bulldozer/Trinity) and + 16h (Kabini) microarchitectures. This driver can also be built as a module. If so, the module will be called k10temp. diff --git a/drivers/hwmon/k10temp.c b/drivers/hwmon/k10temp.c index e3b037c73a7e..e633856370cf 100644 --- a/drivers/hwmon/k10temp.c +++ b/drivers/hwmon/k10temp.c @@ -1,5 +1,5 @@ /* - * k10temp.c - AMD Family 10h/11h/12h/14h/15h processor hardware monitoring + * k10temp.c - AMD Family 10h/11h/12h/14h/15h/16h processor hardware monitoring * * Copyright (c) 2009 Clemens Ladisch * @@ -211,6 +211,7 @@ static DEFINE_PCI_DEVICE_TABLE(k10temp_id_table) = { { PCI_VDEVICE(AMD, PCI_DEVICE_ID_AMD_CNB17H_F3) }, { PCI_VDEVICE(AMD, PCI_DEVICE_ID_AMD_15H_NB_F3) }, { PCI_VDEVICE(AMD, PCI_DEVICE_ID_AMD_15H_M10H_F3) }, + { PCI_VDEVICE(AMD, PCI_DEVICE_ID_AMD_16H_NB_F3) }, {} }; MODULE_DEVICE_TABLE(pci, k10temp_id_table); From 2880e37d6f3ef1a9dd94939a42abf35f41158d74 Mon Sep 17 00:00:00 2001 From: Lan Tianyu Date: Mon, 26 Aug 2013 10:19:18 +0800 Subject: [PATCH 0695/1368] ACPI / EC: Add ASUSTEK L4R to quirk list in order to validate ECDT commit 524f42fab787a9510be826ce3d736b56d454ac6d upstream. The ECDT of ASUSTEK L4R doesn't provide correct command and data I/O ports. The DSDT provides the correct information instead. For this reason, add this machine to quirk list for ECDT validation and use the EC information from the DSDT. [rjw: Changelog] References: https://bugzilla.kernel.org/show_bug.cgi?id=60765 Reported-and-tested-by: Daniele Esposti Signed-off-by: Lan Tianyu Signed-off-by: Rafael J. Wysocki Signed-off-by: Greg Kroah-Hartman --- drivers/acpi/ec.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/acpi/ec.c b/drivers/acpi/ec.c index 80403c1a89f8..45af90a1ec1b 100644 --- a/drivers/acpi/ec.c +++ b/drivers/acpi/ec.c @@ -987,6 +987,10 @@ static struct dmi_system_id __initdata ec_dmi_table[] = { ec_skip_dsdt_scan, "HP Folio 13", { DMI_MATCH(DMI_SYS_VENDOR, "Hewlett-Packard"), DMI_MATCH(DMI_PRODUCT_NAME, "HP Folio 13"),}, NULL}, + { + ec_validate_ecdt, "ASUS hardware", { + DMI_MATCH(DMI_SYS_VENDOR, "ASUSTek Computer Inc."), + DMI_MATCH(DMI_PRODUCT_NAME, "L4R"),}, NULL}, {}, }; From 39d163b38728dd85291afb2ba84f760368d9e8eb Mon Sep 17 00:00:00 2001 From: Mark Rusk Date: Wed, 14 Aug 2013 15:30:01 -0500 Subject: [PATCH 0696/1368] drivers/misc/hpilo: Correct panic when an AUX iLO is detected commit eefbc594abbb1b7e6e7eeadb65ae7c7538474210 upstream. Using an uninitialized variable 'devnum' after 'goto out;' was causing panic. Just go ahead and return, we need to ignore AUX iLO devs. Oops: 0002 [#1] SMP . . . RIP [] ilo_probe+0xec/0xe7c [hpilo] Signed-off-by: Mark Rusk Signed-off-by: Greg Kroah-Hartman --- drivers/misc/hpilo.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/misc/hpilo.c b/drivers/misc/hpilo.c index 621c7a373390..b83e3ca12a41 100644 --- a/drivers/misc/hpilo.c +++ b/drivers/misc/hpilo.c @@ -759,7 +759,7 @@ static int ilo_probe(struct pci_dev *pdev, /* Ignore subsystem_device = 0x1979 (set by BIOS) */ if (pdev->subsystem_device == 0x1979) - goto out; + return 0; if (max_ccb > MAX_CCB) max_ccb = MAX_CCB; @@ -899,7 +899,7 @@ static void __exit ilo_exit(void) class_destroy(ilo_class); } -MODULE_VERSION("1.4"); +MODULE_VERSION("1.4.1"); MODULE_ALIAS(ILO_NAME); MODULE_DESCRIPTION(ILO_NAME); MODULE_AUTHOR("David Altobelli "); From 9e50b358ed4b8af18955ab07549ee2264b5b44f2 Mon Sep 17 00:00:00 2001 From: Julien Grall Date: Mon, 29 Jul 2013 17:06:05 +0100 Subject: [PATCH 0697/1368] xen/arm: missing put_cpu in xen_percpu_init commit 0d7febe58413884f6428143221971618fbf3a47d upstream. When CONFIG_PREEMPT is enabled, Linux will not be able to boot and warn: [ 4.127825] ------------[ cut here ]------------ [ 4.133376] WARNING: at init/main.c:699 do_one_initcall+0x150/0x158() [ 4.140738] initcall xen_init_events+0x0/0x10c returned with preemption imbalance This is because xen_percpu_init uses get_cpu but doesn't have the corresponding put_cpu. Signed-off-by: Julien Grall Signed-off-by: Stefano Stabellini Signed-off-by: Jonghwan Choi Signed-off-by: Greg Kroah-Hartman --- arch/arm/xen/enlighten.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm/xen/enlighten.c b/arch/arm/xen/enlighten.c index 13609e01f4b7..05db95d010ae 100644 --- a/arch/arm/xen/enlighten.c +++ b/arch/arm/xen/enlighten.c @@ -170,6 +170,7 @@ static void __init xen_percpu_init(void *unused) per_cpu(xen_vcpu, cpu) = vcpup; enable_percpu_irq(xen_events_irq, 0); + put_cpu(); } static void xen_restart(char str, const char *cmd) From 2ac13a8f46246a8a3773bfca0a003f0524cf7cf0 Mon Sep 17 00:00:00 2001 From: Fabio Estevam Date: Fri, 28 Jun 2013 13:55:27 -0300 Subject: [PATCH 0698/1368] imx-drm: imx-drm-core: Export imx_drm_encoder_get_mux_id commit ea8d15832016b0d07a8121159904e6b1d21b5b8b upstream. When building imx_v6_v7_defconfig with imx-drm drivers selected as modules, we get the following build error: ERROR: "imx_drm_encoder_get_mux_id" [drivers/staging/imx-drm/imx-ldb.ko] undefined! Export the required function to avoid this problem. Signed-off-by: Fabio Estevam Acked-by: Sascha Hauer Acked-by: Philipp Zabel Cc: Guenter Roeck Signed-off-by: Greg Kroah-Hartman --- drivers/staging/imx-drm/imx-drm-core.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/staging/imx-drm/imx-drm-core.c b/drivers/staging/imx-drm/imx-drm-core.c index 64553058b67e..a532ca568526 100644 --- a/drivers/staging/imx-drm/imx-drm-core.c +++ b/drivers/staging/imx-drm/imx-drm-core.c @@ -681,6 +681,7 @@ int imx_drm_encoder_get_mux_id(struct imx_drm_encoder *imx_drm_encoder, return i; } +EXPORT_SYMBOL_GPL(imx_drm_encoder_get_mux_id); /* * imx_drm_remove_encoder - remove an encoder From 0c6471cc8eb5e6f745b3da21a75096a95548432c Mon Sep 17 00:00:00 2001 From: David Jander Date: Wed, 21 Aug 2013 17:37:22 +0200 Subject: [PATCH 0699/1368] regmap: rbtree: Fix overlapping rbnodes. commit 4e67fb5f5e336250db944921e3c68057d6203034 upstream. Avoid overlapping register regions by making the initial blklen of a new node 1. If a register write occurs to a yet uncached register, that is lower than but near an existing node's base_reg, a new node is created and it's blklen is set to an arbitrary value (sizeof(*rbnode)). That may cause this node to overlap with another node. Those nodes should be merged, but this merge doesn't happen yet, so this patch at least makes the initial blklen small enough to avoid hitting the wrong node, which may otherwise lead to severe breakage. Signed-off-by: David Jander Signed-off-by: Mark Brown Signed-off-by: Zhouping Liu Signed-off-by: Greg Kroah-Hartman --- drivers/base/regmap/regcache-rbtree.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/base/regmap/regcache-rbtree.c b/drivers/base/regmap/regcache-rbtree.c index 02f490bad30f..bb8c3bbc7812 100644 --- a/drivers/base/regmap/regcache-rbtree.c +++ b/drivers/base/regmap/regcache-rbtree.c @@ -362,7 +362,7 @@ static int regcache_rbtree_write(struct regmap *map, unsigned int reg, rbnode = kzalloc(sizeof *rbnode, GFP_KERNEL); if (!rbnode) return -ENOMEM; - rbnode->blklen = sizeof(*rbnode); + rbnode->blklen = 1; rbnode->base_reg = reg; rbnode->block = kmalloc(rbnode->blklen * map->cache_word_size, GFP_KERNEL); From 85cdabba08d484bdcc4b25f0bbc23ac60c75aa5b Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Sat, 7 Sep 2013 22:10:14 -0700 Subject: [PATCH 0700/1368] Linux 3.10.11 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index b119684ddb2d..595076dc14a0 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,6 @@ VERSION = 3 PATCHLEVEL = 10 -SUBLEVEL = 10 +SUBLEVEL = 11 EXTRAVERSION = NAME = TOSSUG Baby Fish From f4cc06ac2d9fc7a483eb778abec66f60487687f4 Mon Sep 17 00:00:00 2001 From: stephen hemminger Date: Thu, 1 Aug 2013 22:32:07 -0700 Subject: [PATCH 0701/1368] htb: fix sign extension bug [ Upstream commit cbd375567f7e4811b1c721f75ec519828ac6583f ] When userspace passes a large priority value the assignment of the unsigned value hopt->prio to signed int cl->prio causes cl->prio to become negative and the comparison is with TC_HTB_NUMPRIO is always false. The result is that HTB crashes by referencing outside the array when processing packets. With this patch the large value wraps around like other values outside the normal range. See: https://bugzilla.kernel.org/show_bug.cgi?id=60669 Signed-off-by: Stephen Hemminger Acked-by: Eric Dumazet Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/sched/sch_htb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c index adaedd79389c..5c9f0b7b2109 100644 --- a/net/sched/sch_htb.c +++ b/net/sched/sch_htb.c @@ -87,7 +87,7 @@ struct htb_class { unsigned int children; struct htb_class *parent; /* parent class */ - int prio; /* these two are used only by leaves... */ + u32 prio; /* these two are used only by leaves... */ int quantum; /* but stored for parent-to-leaf return */ union { From b4f55925ace5646d72df5af6fdab74fdbb288229 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Fri, 2 Aug 2013 11:32:43 +0200 Subject: [PATCH 0702/1368] net: rtm_to_ifaddr: free ifa if ifa_cacheinfo processing fails [ Upstream commit 446266b0c742a2c9ee8f0dce759a0117bce58a86 ] Commit 5c766d642 ("ipv4: introduce address lifetime") leaves the ifa resource that was allocated via inet_alloc_ifa() unfreed when returning the function with -EINVAL. Thus, free it first via inet_free_ifa(). Signed-off-by: Daniel Borkmann Reviewed-by: Jiri Pirko Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/devinet.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index dfc39d4d48b7..9e38217c3931 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -771,7 +771,7 @@ static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh, ci = nla_data(tb[IFA_CACHEINFO]); if (!ci->ifa_valid || ci->ifa_prefered > ci->ifa_valid) { err = -EINVAL; - goto errout; + goto errout_free; } *pvalid_lft = ci->ifa_valid; *pprefered_lft = ci->ifa_prefered; @@ -779,6 +779,8 @@ static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh, return ifa; +errout_free: + inet_free_ifa(ifa); errout: return ERR_PTR(err); } From 4691236cedfb12e2644f4c84b4b14a6882e1fd7b Mon Sep 17 00:00:00 2001 From: Roman Gushchin Date: Fri, 2 Aug 2013 18:36:40 +0400 Subject: [PATCH 0703/1368] net: check net.core.somaxconn sysctl values [ Upstream commit 5f671d6b4ec3e6d66c2a868738af2cdea09e7509 ] It's possible to assign an invalid value to the net.core.somaxconn sysctl variable, because there is no checks at all. The sk_max_ack_backlog field of the sock structure is defined as unsigned short. Therefore, the backlog argument in inet_listen() shouldn't exceed USHRT_MAX. The backlog argument in the listen() syscall is truncated to the somaxconn value. So, the somaxconn value shouldn't exceed 65535 (USHRT_MAX). Also, negative values of somaxconn are meaningless. before: $ sysctl -w net.core.somaxconn=256 net.core.somaxconn = 256 $ sysctl -w net.core.somaxconn=65536 net.core.somaxconn = 65536 $ sysctl -w net.core.somaxconn=-100 net.core.somaxconn = -100 after: $ sysctl -w net.core.somaxconn=256 net.core.somaxconn = 256 $ sysctl -w net.core.somaxconn=65536 error: "Invalid argument" setting key "net.core.somaxconn" $ sysctl -w net.core.somaxconn=-100 error: "Invalid argument" setting key "net.core.somaxconn" Based on a prior patch from Changli Gao. Signed-off-by: Roman Gushchin Reported-by: Changli Gao Suggested-by: Eric Dumazet Acked-by: Eric Dumazet Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/core/sysctl_net_core.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c index cfdb46ab3a7f..2ff093b7c45e 100644 --- a/net/core/sysctl_net_core.c +++ b/net/core/sysctl_net_core.c @@ -20,7 +20,9 @@ #include #include +static int zero = 0; static int one = 1; +static int ushort_max = USHRT_MAX; #ifdef CONFIG_RPS static int rps_sock_flow_sysctl(ctl_table *table, int write, @@ -204,7 +206,9 @@ static struct ctl_table netns_core_table[] = { .data = &init_net.core.sysctl_somaxconn, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = proc_dointvec + .extra1 = &zero, + .extra2 = &ushort_max, + .proc_handler = proc_dointvec_minmax }, { } }; From 3b8dd03efa3008933d14e7203b7b29200330f039 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Mon, 5 Aug 2013 18:25:54 +0300 Subject: [PATCH 0704/1368] macvlan: validate flags [ Upstream commit 1512747820367c8b3b8b72035f0f78c62f2bf1e9 ] commit df8ef8f3aaa6692970a436204c4429210addb23a macvlan: add FDB bridge ops and macvlan flags added a flags field to macvlan, which can be controlled from userspace. The idea is to make the interface future-proof so we can add flags and not new fields. However, flags value isn't validated, as a result, userspace can't detect which flags are supported. Signed-off-by: Michael S. Tsirkin Cc: "David S. Miller" Cc: John Fastabend Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/macvlan.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c index 6e91931a1c2c..06eba6e480c9 100644 --- a/drivers/net/macvlan.c +++ b/drivers/net/macvlan.c @@ -727,6 +727,10 @@ static int macvlan_validate(struct nlattr *tb[], struct nlattr *data[]) return -EADDRNOTAVAIL; } + if (data && data[IFLA_MACVLAN_FLAGS] && + nla_get_u16(data[IFLA_MACVLAN_FLAGS]) & ~MACVLAN_FLAG_NOPROMISC) + return -EINVAL; + if (data && data[IFLA_MACVLAN_MODE]) { switch (nla_get_u32(data[IFLA_MACVLAN_MODE])) { case MACVLAN_MODE_PRIVATE: From 5cf1ad6c6a29186821843e5d6f70ebbbce587f4f Mon Sep 17 00:00:00 2001 From: Veaceslav Falico Date: Fri, 2 Aug 2013 19:07:38 +0200 Subject: [PATCH 0705/1368] neighbour: populate neigh_parms on alloc before calling ndo_neigh_setup [ Upstream commit 63134803a6369dcf7dddf7f0d5e37b9566b308d2 ] dev->ndo_neigh_setup() might need some of the values of neigh_parms, so populate them before calling it. Signed-off-by: Veaceslav Falico Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/core/neighbour.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/net/core/neighbour.c b/net/core/neighbour.c index ce90b0264db5..0034b611fa5e 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -1445,16 +1445,18 @@ struct neigh_parms *neigh_parms_alloc(struct net_device *dev, atomic_set(&p->refcnt, 1); p->reachable_time = neigh_rand_reach_time(p->base_reachable_time); - - if (ops->ndo_neigh_setup && ops->ndo_neigh_setup(dev, p)) { - kfree(p); - return NULL; - } - dev_hold(dev); p->dev = dev; write_pnet(&p->net, hold_net(net)); p->sysctl_table = NULL; + + if (ops->ndo_neigh_setup && ops->ndo_neigh_setup(dev, p)) { + release_net(net); + dev_put(dev); + kfree(p); + return NULL; + } + write_lock_bh(&tbl->lock); p->next = tbl->parms.next; tbl->parms.next = p; From 92986ce4e59db8c4c44caf9633095ab07f1d79f5 Mon Sep 17 00:00:00 2001 From: Veaceslav Falico Date: Fri, 2 Aug 2013 19:07:39 +0200 Subject: [PATCH 0706/1368] bonding: modify only neigh_parms owned by us [ Upstream commit 9918d5bf329d0dc5bb2d9d293bcb772bdb626e65 ] Otherwise, on neighbour creation, bond_neigh_init() will be called with a foreign netdev. Signed-off-by: Veaceslav Falico Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/bonding/bond_main.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index f97569613526..666cf3a49b6d 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -3770,11 +3770,17 @@ static int bond_neigh_init(struct neighbour *n) * The bonding ndo_neigh_setup is called at init time beofre any * slave exists. So we must declare proxy setup function which will * be used at run time to resolve the actual slave neigh param setup. + * + * It's also called by master devices (such as vlans) to setup their + * underlying devices. In that case - do nothing, we're already set up from + * our init. */ static int bond_neigh_setup(struct net_device *dev, struct neigh_parms *parms) { - parms->neigh_setup = bond_neigh_init; + /* modify only our neigh_parms */ + if (parms->dev == dev) + parms->neigh_setup = bond_neigh_init; return 0; } From 8b2b5e27cae0bd2572687869111e516ab445be34 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 5 Aug 2013 11:18:49 -0700 Subject: [PATCH 0707/1368] fib_trie: remove potential out of bound access [ Upstream commit aab515d7c32a34300312416c50314e755ea6f765 ] AddressSanitizer [1] dynamic checker pointed a potential out of bound access in leaf_walk_rcu() We could allocate one more slot in tnode_new() to leave the prefetch() in-place but it looks not worth the pain. Bug added in commit 82cfbb008572b ("[IPV4] fib_trie: iterator recode") [1] : https://code.google.com/p/address-sanitizer/wiki/AddressSanitizerForKernel Reported-by: Andrey Konovalov Signed-off-by: Eric Dumazet Cc: Dmitry Vyukov Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/fib_trie.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index 49616fed9340..6e8a13da6cbd 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -71,7 +71,6 @@ #include #include #include -#include #include #include #include @@ -1761,10 +1760,8 @@ static struct leaf *leaf_walk_rcu(struct tnode *p, struct rt_trie_node *c) if (!c) continue; - if (IS_LEAF(c)) { - prefetch(rcu_dereference_rtnl(p->child[idx])); + if (IS_LEAF(c)) return (struct leaf *) c; - } /* Rescan start scanning in new node */ p = (struct tnode *) c; From 5ddf771720b5761435133058842f274072afa8ed Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Linus=20L=C3=BCssing?= Date: Tue, 6 Aug 2013 00:32:05 +0200 Subject: [PATCH 0708/1368] bridge: don't try to update timers in case of broken MLD queries MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 248ba8ec05a2c3b118c2224e57eb10c128176ab1 ] Currently we are reading an uninitialized value for the max_delay variable when snooping an MLD query message of invalid length and would update our timers with that. Fixing this by simply ignoring such broken MLD queries (just like we do for IGMP already). This is a regression introduced by: "bridge: disable snooping if there is no querier" (b00589af3b04) Reported-by: Paul Bolle Signed-off-by: Linus Lüssing Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/bridge/br_multicast.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c index d6448e35e027..75a81281c979 100644 --- a/net/bridge/br_multicast.c +++ b/net/bridge/br_multicast.c @@ -1185,7 +1185,7 @@ static int br_ip6_multicast_query(struct net_bridge *br, max_delay = msecs_to_jiffies(ntohs(mld->mld_maxdelay)); if (max_delay) group = &mld->mld_mca; - } else if (skb->len >= sizeof(*mld2q)) { + } else { if (!pskb_may_pull(skb, sizeof(*mld2q))) { err = -EINVAL; goto out; From 16f033319c6805588375afbda88ea5a513393dd3 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 5 Aug 2013 17:10:15 -0700 Subject: [PATCH 0709/1368] tcp: cubic: fix overflow error in bictcp_update() [ Upstream commit 2ed0edf9090bf4afa2c6fc4f38575a85a80d4b20 ] commit 17a6e9f1aa9 ("tcp_cubic: fix clock dependency") added an overflow error in bictcp_update() in following code : /* change the unit from HZ to bictcp_HZ */ t = ((tcp_time_stamp + msecs_to_jiffies(ca->delay_min>>3) - ca->epoch_start) << BICTCP_HZ) / HZ; Because msecs_to_jiffies() being unsigned long, compiler does implicit type promotion. We really want to constrain (tcp_time_stamp - ca->epoch_start) to a signed 32bit value, or else 't' has unexpected high values. This bugs triggers an increase of retransmit rates ~24 days after boot [1], as the high order bit of tcp_time_stamp flips. [1] for hosts with HZ=1000 Big thanks to Van Jacobson for spotting this problem. Diagnosed-by: Van Jacobson Signed-off-by: Eric Dumazet Cc: Neal Cardwell Cc: Yuchung Cheng Cc: Stephen Hemminger Acked-by: Neal Cardwell Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/tcp_cubic.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/net/ipv4/tcp_cubic.c b/net/ipv4/tcp_cubic.c index a9077f441cb2..b6b591f0a788 100644 --- a/net/ipv4/tcp_cubic.c +++ b/net/ipv4/tcp_cubic.c @@ -206,8 +206,8 @@ static u32 cubic_root(u64 a) */ static inline void bictcp_update(struct bictcp *ca, u32 cwnd) { - u64 offs; - u32 delta, t, bic_target, max_cnt; + u32 delta, bic_target, max_cnt; + u64 offs, t; ca->ack_cnt++; /* count the number of ACKs */ @@ -250,9 +250,11 @@ static inline void bictcp_update(struct bictcp *ca, u32 cwnd) * if the cwnd < 1 million packets !!! */ + t = (s32)(tcp_time_stamp - ca->epoch_start); + t += msecs_to_jiffies(ca->delay_min >> 3); /* change the unit from HZ to bictcp_HZ */ - t = ((tcp_time_stamp + msecs_to_jiffies(ca->delay_min>>3) - - ca->epoch_start) << BICTCP_HZ) / HZ; + t <<= BICTCP_HZ; + do_div(t, HZ); if (t < ca->bic_K) /* t - K */ offs = ca->bic_K - t; From ca02d414915693909f9d7f455c4598d3f8b514b3 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 5 Aug 2013 20:05:12 -0700 Subject: [PATCH 0710/1368] tcp: cubic: fix bug in bictcp_acked() [ Upstream commit cd6b423afd3c08b27e1fed52db828ade0addbc6b ] While investigating about strange increase of retransmit rates on hosts ~24 days after boot, Van found hystart was disabled if ca->epoch_start was 0, as following condition is true when tcp_time_stamp high order bit is set. (s32)(tcp_time_stamp - ca->epoch_start) < HZ Quoting Van : At initialization & after every loss ca->epoch_start is set to zero so I believe that the above line will turn off hystart as soon as the 2^31 bit is set in tcp_time_stamp & hystart will stay off for 24 days. I think we've observed that cubic's restart is too aggressive without hystart so this might account for the higher drop rate we observe. Diagnosed-by: Van Jacobson Signed-off-by: Eric Dumazet Cc: Neal Cardwell Cc: Yuchung Cheng Acked-by: Neal Cardwell Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/tcp_cubic.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv4/tcp_cubic.c b/net/ipv4/tcp_cubic.c index b6b591f0a788..b6ae92a51f58 100644 --- a/net/ipv4/tcp_cubic.c +++ b/net/ipv4/tcp_cubic.c @@ -416,7 +416,7 @@ static void bictcp_acked(struct sock *sk, u32 cnt, s32 rtt_us) return; /* Discard delay samples right after fast recovery */ - if ((s32)(tcp_time_stamp - ca->epoch_start) < HZ) + if (ca->epoch_start && (s32)(tcp_time_stamp - ca->epoch_start) < HZ) return; delay = (rtt_us << 3) / USEC_PER_MSEC; From 24e8ac721e0e13cab478bd190977daac48b6146c Mon Sep 17 00:00:00 2001 From: Hannes Frederic Sowa Date: Wed, 7 Aug 2013 02:34:31 +0200 Subject: [PATCH 0711/1368] ipv6: don't stop backtracking in fib6_lookup_1 if subtree does not match [ Upstream commit 3e3be275851bc6fc90bfdcd732cd95563acd982b ] In case a subtree did not match we currently stop backtracking and return NULL (root table from fib_lookup). This could yield in invalid routing table lookups when using subtrees. Instead continue to backtrack until a valid subtree or node is found and return this match. Also remove unneeded NULL check. Reported-by: Teco Boot Cc: YOSHIFUJI Hideaki Cc: David Lamparter Cc: Signed-off-by: Hannes Frederic Sowa Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv6/ip6_fib.c | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index 5fc9c7a68d8d..2221ff6a308b 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -993,14 +993,22 @@ static struct fib6_node * fib6_lookup_1(struct fib6_node *root, if (ipv6_prefix_equal(&key->addr, args->addr, key->plen)) { #ifdef CONFIG_IPV6_SUBTREES - if (fn->subtree) - fn = fib6_lookup_1(fn->subtree, args + 1); + if (fn->subtree) { + struct fib6_node *sfn; + sfn = fib6_lookup_1(fn->subtree, + args + 1); + if (!sfn) + goto backtrack; + fn = sfn; + } #endif - if (!fn || fn->fn_flags & RTN_RTINFO) + if (fn->fn_flags & RTN_RTINFO) return fn; } } - +#ifdef CONFIG_IPV6_SUBTREES +backtrack: +#endif if (fn->fn_flags & RTN_ROOT) break; From 27c1c98bd3b44b7c5f5c0ecfe1a1ec1240b73829 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Timo=20Ter=C3=A4s?= Date: Tue, 6 Aug 2013 13:45:43 +0300 Subject: [PATCH 0712/1368] ip_gre: fix ipgre_header to return correct offset MIME-Version: 1.0 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 77a482bdb2e68d13fae87541b341905ba70d572b ] Fix ipgre_header() (header_ops->create) to return the correct amount of bytes pushed. Most callers of dev_hard_header() seem to care only if it was success, but af_packet.c uses it as offset to the skb to copy from userspace only once. In practice this fixes packet socket sendto()/sendmsg() to gre tunnels. Regression introduced in c54419321455631079c7d6e60bc732dd0c5914c5 ("GRE: Refactor GRE tunneling code.") Cc: Pravin B Shelar Signed-off-by: Timo Teräs Acked-by: Eric Dumazet Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/ip_gre.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index 855004f08325..c52fee0976da 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -572,7 +572,7 @@ static int ipgre_header(struct sk_buff *skb, struct net_device *dev, if (daddr) memcpy(&iph->daddr, daddr, 4); if (iph->daddr) - return t->hlen; + return t->hlen + sizeof(*iph); return -(t->hlen + sizeof(*iph)); } From 6c1a4bb872100c852b07480396744608278dd9ee Mon Sep 17 00:00:00 2001 From: Dave Jones Date: Fri, 9 Aug 2013 11:16:34 -0700 Subject: [PATCH 0713/1368] 8139cp: Fix skb leak in rx_status_loop failure path. [ Upstream commit d06f5187469eee1b2932c02fd093d113cfc60d5e ] Introduced in cf3c4c03060b688cbc389ebc5065ebcce5653e96 ("8139cp: Add dma_mapping_error checking") Signed-off-by: Dave Jones Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/realtek/8139cp.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/realtek/8139cp.c b/drivers/net/ethernet/realtek/8139cp.c index 887aebea298b..9095ff930f29 100644 --- a/drivers/net/ethernet/realtek/8139cp.c +++ b/drivers/net/ethernet/realtek/8139cp.c @@ -524,6 +524,7 @@ static int cp_rx_poll(struct napi_struct *napi, int budget) PCI_DMA_FROMDEVICE); if (dma_mapping_error(&cp->pdev->dev, new_mapping)) { dev->stats.rx_dropped++; + kfree_skb(new_skb); goto rx_next; } From 21db4be1321b1fe80a28eb122e459c8ab3c2bd1f Mon Sep 17 00:00:00 2001 From: Sridhar Samudrala Date: Thu, 8 Aug 2013 15:19:48 -0700 Subject: [PATCH 0714/1368] rtnetlink: Fix inverted check in ndo_dflt_fdb_del() [ Upstream commit 645359930231d5e78fd3296a38b98c1a658a7ade ] Fix inverted check when deleting an fdb entry. Signed-off-by: Sridhar Samudrala Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/core/rtnetlink.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index a08bd2b7fe3f..373a8e76af00 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -2142,7 +2142,7 @@ int ndo_dflt_fdb_del(struct ndmsg *ndm, /* If aging addresses are supported device will need to * implement its own handler for this. */ - if (ndm->ndm_state & NUD_PERMANENT) { + if (!(ndm->ndm_state & NUD_PERMANENT)) { pr_info("%s: FDB only supports static addresses\n", dev->name); return -EINVAL; } From e307a8acf8c075261f8110b7088f20d2f7206f56 Mon Sep 17 00:00:00 2001 From: Pravin B Shelar Date: Fri, 23 Aug 2013 12:44:55 -0700 Subject: [PATCH 0715/1368] genl: Fix genl dumpit() locking. [ Upstream commit 9b96309c5b0b9e466773c07a5bc8b7b68fcf010a ] In case of genl-family with parallel ops off, dumpif() callback is expected to run under genl_lock, But commit def3117493eafd9df (genl: Allow concurrent genl callbacks.) changed this behaviour where only first dumpit() op was called under genl-lock. For subsequent dump, only nlk->cb_lock was taken. Following patch fixes it by defining locked dumpit() and done() callback which takes care of genl-locking. Signed-off-by: Pravin B Shelar CC: Jesse Gross CC: Johannes Berg Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/netlink/genetlink.c | 51 +++++++++++++++++++++++++++++++++++++---- 1 file changed, 46 insertions(+), 5 deletions(-) diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c index 1076fe16b122..8ee0b4506beb 100644 --- a/net/netlink/genetlink.c +++ b/net/netlink/genetlink.c @@ -544,6 +544,30 @@ void *genlmsg_put(struct sk_buff *skb, u32 portid, u32 seq, } EXPORT_SYMBOL(genlmsg_put); +static int genl_lock_dumpit(struct sk_buff *skb, struct netlink_callback *cb) +{ + struct genl_ops *ops = cb->data; + int rc; + + genl_lock(); + rc = ops->dumpit(skb, cb); + genl_unlock(); + return rc; +} + +static int genl_lock_done(struct netlink_callback *cb) +{ + struct genl_ops *ops = cb->data; + int rc = 0; + + if (ops->done) { + genl_lock(); + rc = ops->done(cb); + genl_unlock(); + } + return rc; +} + static int genl_family_rcv_msg(struct genl_family *family, struct sk_buff *skb, struct nlmsghdr *nlh) @@ -572,15 +596,32 @@ static int genl_family_rcv_msg(struct genl_family *family, return -EPERM; if (nlh->nlmsg_flags & NLM_F_DUMP) { - struct netlink_dump_control c = { - .dump = ops->dumpit, - .done = ops->done, - }; + int rc; if (ops->dumpit == NULL) return -EOPNOTSUPP; - return netlink_dump_start(net->genl_sock, skb, nlh, &c); + if (!family->parallel_ops) { + struct netlink_dump_control c = { + .data = ops, + .dump = genl_lock_dumpit, + .done = genl_lock_done, + }; + + genl_unlock(); + rc = netlink_dump_start(net->genl_sock, skb, nlh, &c); + genl_lock(); + + } else { + struct netlink_dump_control c = { + .dump = ops->dumpit, + .done = ops->done, + }; + + rc = netlink_dump_start(net->genl_sock, skb, nlh, &c); + } + + return rc; } if (ops->doit == NULL) From a8077ef001460c41bc0509b096d3d342002c4d9b Mon Sep 17 00:00:00 2001 From: Pravin B Shelar Date: Fri, 23 Aug 2013 12:45:04 -0700 Subject: [PATCH 0716/1368] genl: Hold reference on correct module while netlink-dump. [ Upstream commit 33c6b1f6b154894321f5734e50c66621e9134e7e ] netlink dump operations take module as parameter to hold reference for entire netlink dump duration. Currently it holds ref only on genl module which is not correct when we use ops registered to genl from another module. Following patch adds module pointer to genl_ops so that netlink can hold ref count on it. Signed-off-by: Pravin B Shelar CC: Jesse Gross CC: Johannes Berg Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- include/net/genetlink.h | 20 ++++++++++++++++++-- net/netlink/genetlink.c | 20 +++++++++++--------- 2 files changed, 29 insertions(+), 11 deletions(-) diff --git a/include/net/genetlink.h b/include/net/genetlink.h index 93024a47e0e2..8e0b6c856a13 100644 --- a/include/net/genetlink.h +++ b/include/net/genetlink.h @@ -61,6 +61,7 @@ struct genl_family { struct list_head ops_list; /* private */ struct list_head family_list; /* private */ struct list_head mcast_groups; /* private */ + struct module *module; }; /** @@ -121,9 +122,24 @@ struct genl_ops { struct list_head ops_list; }; -extern int genl_register_family(struct genl_family *family); -extern int genl_register_family_with_ops(struct genl_family *family, +extern int __genl_register_family(struct genl_family *family); + +static inline int genl_register_family(struct genl_family *family) +{ + family->module = THIS_MODULE; + return __genl_register_family(family); +} + +extern int __genl_register_family_with_ops(struct genl_family *family, struct genl_ops *ops, size_t n_ops); + +static inline int genl_register_family_with_ops(struct genl_family *family, + struct genl_ops *ops, size_t n_ops) +{ + family->module = THIS_MODULE; + return __genl_register_family_with_ops(family, ops, n_ops); +} + extern int genl_unregister_family(struct genl_family *family); extern int genl_register_ops(struct genl_family *, struct genl_ops *ops); extern int genl_unregister_ops(struct genl_family *, struct genl_ops *ops); diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c index 8ee0b4506beb..393f17eea1a2 100644 --- a/net/netlink/genetlink.c +++ b/net/netlink/genetlink.c @@ -364,7 +364,7 @@ int genl_unregister_ops(struct genl_family *family, struct genl_ops *ops) EXPORT_SYMBOL(genl_unregister_ops); /** - * genl_register_family - register a generic netlink family + * __genl_register_family - register a generic netlink family * @family: generic netlink family * * Registers the specified family after validating it first. Only one @@ -374,7 +374,7 @@ EXPORT_SYMBOL(genl_unregister_ops); * * Return 0 on success or a negative error code. */ -int genl_register_family(struct genl_family *family) +int __genl_register_family(struct genl_family *family) { int err = -EINVAL; @@ -430,10 +430,10 @@ int genl_register_family(struct genl_family *family) errout: return err; } -EXPORT_SYMBOL(genl_register_family); +EXPORT_SYMBOL(__genl_register_family); /** - * genl_register_family_with_ops - register a generic netlink family + * __genl_register_family_with_ops - register a generic netlink family * @family: generic netlink family * @ops: operations to be registered * @n_ops: number of elements to register @@ -457,12 +457,12 @@ EXPORT_SYMBOL(genl_register_family); * * Return 0 on success or a negative error code. */ -int genl_register_family_with_ops(struct genl_family *family, +int __genl_register_family_with_ops(struct genl_family *family, struct genl_ops *ops, size_t n_ops) { int err, i; - err = genl_register_family(family); + err = __genl_register_family(family); if (err) return err; @@ -476,7 +476,7 @@ int genl_register_family_with_ops(struct genl_family *family, genl_unregister_family(family); return err; } -EXPORT_SYMBOL(genl_register_family_with_ops); +EXPORT_SYMBOL(__genl_register_family_with_ops); /** * genl_unregister_family - unregister generic netlink family @@ -603,22 +603,24 @@ static int genl_family_rcv_msg(struct genl_family *family, if (!family->parallel_ops) { struct netlink_dump_control c = { + .module = family->module, .data = ops, .dump = genl_lock_dumpit, .done = genl_lock_done, }; genl_unlock(); - rc = netlink_dump_start(net->genl_sock, skb, nlh, &c); + rc = __netlink_dump_start(net->genl_sock, skb, nlh, &c); genl_lock(); } else { struct netlink_dump_control c = { + .module = family->module, .dump = ops->dumpit, .done = ops->done, }; - rc = netlink_dump_start(net->genl_sock, skb, nlh, &c); + rc = __netlink_dump_start(net->genl_sock, skb, nlh, &c); } return rc; From e8d11678770b9bbdcc5c2a9b3a041d136575f322 Mon Sep 17 00:00:00 2001 From: Pravin B Shelar Date: Tue, 13 Aug 2013 01:41:06 -0700 Subject: [PATCH 0717/1368] ip_tunnel: Do not use inner ip-header-id for tunnel ip-header-id. [ Upstream commit 4221f40513233fa8edeef7fc82e44163fde03b9b ] Using inner-id for tunnel id is not safe in some rare cases. E.g. packets coming from multiple sources entering same tunnel can have same id. Therefore on tunnel packet receive we could have packets from two different stream but with same source and dst IP with same ip-id which could confuse ip packet reassembly. Following patch reverts optimization from commit 490ab08127 (IP_GRE: Fix IP-Identification.) Signed-off-by: Pravin B Shelar CC: Jarno Rajahalme CC: Ansis Atteka Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/vxlan.c | 2 +- include/net/ip_tunnels.h | 14 -------------- net/ipv4/ip_tunnel.c | 2 +- 3 files changed, 2 insertions(+), 16 deletions(-) diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c index 57325f356d4f..054489fdf54a 100644 --- a/drivers/net/vxlan.c +++ b/drivers/net/vxlan.c @@ -1090,7 +1090,7 @@ static netdev_tx_t vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev, iph->daddr = dst; iph->saddr = fl4.saddr; iph->ttl = ttl ? : ip4_dst_hoplimit(&rt->dst); - tunnel_ip_select_ident(skb, old_iph, &rt->dst); + __ip_select_ident(iph, &rt->dst, (skb_shinfo(skb)->gso_segs ?: 1) - 1); nf_reset(skb); diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h index 09b1360e10bf..a9942e1faefb 100644 --- a/include/net/ip_tunnels.h +++ b/include/net/ip_tunnels.h @@ -141,20 +141,6 @@ static inline u8 ip_tunnel_ecn_encap(u8 tos, const struct iphdr *iph, return INET_ECN_encapsulate(tos, inner); } -static inline void tunnel_ip_select_ident(struct sk_buff *skb, - const struct iphdr *old_iph, - struct dst_entry *dst) -{ - struct iphdr *iph = ip_hdr(skb); - - /* Use inner packet iph-id if possible. */ - if (skb->protocol == htons(ETH_P_IP) && old_iph->id) - iph->id = old_iph->id; - else - __ip_select_ident(iph, dst, - (skb_shinfo(skb)->gso_segs ?: 1) - 1); -} - static inline void iptunnel_xmit(struct sk_buff *skb, struct net_device *dev) { int err; diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c index cbfc37f5f05a..b7a4c21c06ec 100644 --- a/net/ipv4/ip_tunnel.c +++ b/net/ipv4/ip_tunnel.c @@ -686,7 +686,7 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, iph->daddr = fl4.daddr; iph->saddr = fl4.saddr; iph->ttl = ttl; - tunnel_ip_select_ident(skb, inner_iph, &rt->dst); + __ip_select_ident(iph, &rt->dst, (skb_shinfo(skb)->gso_segs ?: 1) - 1); iptunnel_xmit(skb, dev); return; From f784dbb9b9b868185b78295b120dbc1f79513e55 Mon Sep 17 00:00:00 2001 From: Asbjoern Sloth Toennesen Date: Mon, 12 Aug 2013 16:30:09 +0000 Subject: [PATCH 0718/1368] rtnetlink: rtnl_bridge_getlink: Call nlmsg_find_attr() with ifinfomsg header [ Upstream commit 3e805ad288c524bb65aad3f1e004402223d3d504 ] Fix the iproute2 command `bridge vlan show`, after switching from rtgenmsg to ifinfomsg. Let's start with a little history: Feb 20: Vlad Yasevich got his VLAN-aware bridge patchset included in the 3.9 merge window. In the kernel commit 6cbdceeb, he added attribute support to bridge GETLINK requests sent with rtgenmsg. Mar 6th: Vlad got this iproute2 reference implementation of the bridge vlan netlink interface accepted (iproute2 9eff0e5c) Apr 25th: iproute2 switched from using rtgenmsg to ifinfomsg (63338dca) http://patchwork.ozlabs.org/patch/239602/ http://marc.info/?t=136680900700007 Apr 28th: Linus released 3.9 Apr 30th: Stephen released iproute2 3.9.0 The `bridge vlan show` command haven't been working since the switch to ifinfomsg, or in a released version of iproute2. Since the kernel side only supports rtgenmsg, which iproute2 switched away from just prior to the iproute2 3.9.0 release. I haven't been able to find any documentation, about neither rtgenmsg nor ifinfomsg, and in which situation to use which, but kernel commit 88c5b5ce seams to suggest that ifinfomsg should be used. Fixing this in kernel will break compatibility, but I doubt that anybody have been using it due to this bug in the user space reference implementation, at least not without noticing this bug. That said the functionality is still fully functional in 3.9, when reversing iproute2 commit 63338dca. This could also be fixed in iproute2, but thats an ugly patch that would reintroduce rtgenmsg in iproute2, and from searching in netdev it seams like rtgenmsg usage is discouraged. I'm assuming that the only reason that Vlad implemented the kernel side to use rtgenmsg, was because iproute2 was using it at the time. Signed-off-by: Asbjoern Sloth Toennesen Reviewed-by: Vlad Yasevich Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/core/rtnetlink.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 373a8e76af00..fd01eca52a13 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -2374,7 +2374,7 @@ static int rtnl_bridge_getlink(struct sk_buff *skb, struct netlink_callback *cb) struct nlattr *extfilt; u32 filter_mask = 0; - extfilt = nlmsg_find_attr(cb->nlh, sizeof(struct rtgenmsg), + extfilt = nlmsg_find_attr(cb->nlh, sizeof(struct ifinfomsg), IFLA_EXT_MASK); if (extfilt) filter_mask = nla_get_u32(extfilt); From b59bde78dbf049a2671603034562940ac6eb1181 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Thu, 15 Aug 2013 15:52:57 +0300 Subject: [PATCH 0719/1368] tun: signedness bug in tun_get_user() [ Upstream commit 15718ea0d844e4816dbd95d57a8a0e3e264ba90e ] The recent fix d9bf5f1309 "tun: compare with 0 instead of total_len" is not totally correct. Because "len" and "sizeof()" are size_t type, that means they are never less than zero. Signed-off-by: Dan Carpenter Acked-by: Michael S. Tsirkin Acked-by: Neil Horman Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/tun.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/net/tun.c b/drivers/net/tun.c index 2491eb28a8d2..7b54f4f052d0 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -1076,8 +1076,9 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile, u32 rxhash; if (!(tun->flags & TUN_NO_PI)) { - if ((len -= sizeof(pi)) > total_len) + if (len < sizeof(pi)) return -EINVAL; + len -= sizeof(pi); if (memcpy_fromiovecend((void *)&pi, iv, 0, sizeof(pi))) return -EFAULT; @@ -1085,8 +1086,9 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile, } if (tun->flags & TUN_VNET_HDR) { - if ((len -= tun->vnet_hdr_sz) > total_len) + if (len < tun->vnet_hdr_sz) return -EINVAL; + len -= tun->vnet_hdr_sz; if (memcpy_fromiovecend((void *)&gso, iv, offset, sizeof(gso))) return -EFAULT; From ad558a2970f60e8a52f048c96f0f6ea7dc691fb7 Mon Sep 17 00:00:00 2001 From: Hannes Frederic Sowa Date: Fri, 16 Aug 2013 13:02:27 +0200 Subject: [PATCH 0720/1368] ipv6: remove max_addresses check from ipv6_create_tempaddr [ Upstream commit 4b08a8f1bd8cb4541c93ec170027b4d0782dab52 ] Because of the max_addresses check attackers were able to disable privacy extensions on an interface by creating enough autoconfigured addresses: But the check is not actually needed: max_addresses protects the kernel to install too many ipv6 addresses on an interface and guards addrconf_prefix_rcv to install further addresses as soon as this limit is reached. We only generate temporary addresses in direct response of a new address showing up. As soon as we filled up the maximum number of addresses of an interface, we stop installing more addresses and thus also stop generating more temp addresses. Even if the attacker tries to generate a lot of temporary addresses by announcing a prefix and removing it again (lifetime == 0) we won't install more temp addresses, because the temporary addresses do count to the maximum number of addresses, thus we would stop installing new autoconfigured addresses when the limit is reached. This patch fixes CVE-2013-0343 (but other layer-2 attacks are still possible). Thanks to Ding Tianhong to bring this topic up again. Signed-off-by: Hannes Frederic Sowa Cc: Ding Tianhong Cc: George Kargiotakis Cc: P J P Cc: YOSHIFUJI Hideaki Acked-by: Ding Tianhong Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv6/addrconf.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index fb8c94c4ab86..d3057f9b04a1 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -1124,12 +1124,10 @@ static int ipv6_create_tempaddr(struct inet6_ifaddr *ifp, struct inet6_ifaddr *i if (ifp->flags & IFA_F_OPTIMISTIC) addr_flags |= IFA_F_OPTIMISTIC; - ift = !max_addresses || - ipv6_count_addresses(idev) < max_addresses ? - ipv6_add_addr(idev, &addr, tmp_plen, - ipv6_addr_type(&addr)&IPV6_ADDR_SCOPE_MASK, - addr_flags) : NULL; - if (IS_ERR_OR_NULL(ift)) { + ift = ipv6_add_addr(idev, &addr, tmp_plen, + ipv6_addr_type(&addr)&IPV6_ADDR_SCOPE_MASK, + addr_flags); + if (IS_ERR(ift)) { in6_ifa_put(ifp); in6_dev_put(idev); pr_info("%s: retry temporary address regeneration\n", __func__); From a829a28873ec2f5daedd77ef91e430b260af1521 Mon Sep 17 00:00:00 2001 From: Hannes Frederic Sowa Date: Fri, 16 Aug 2013 13:30:07 +0200 Subject: [PATCH 0721/1368] ipv6: drop packets with multiple fragmentation headers [ Upstream commit f46078cfcd77fa5165bf849f5e568a7ac5fa569c ] It is not allowed for an ipv6 packet to contain multiple fragmentation headers. So discard packets which were already reassembled by fragmentation logic and send back a parameter problem icmp. The updates for RFC 6980 will come in later, I have to do a bit more research here. Cc: YOSHIFUJI Hideaki Signed-off-by: Hannes Frederic Sowa Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- include/linux/ipv6.h | 1 + net/ipv6/reassembly.c | 5 +++++ 2 files changed, 6 insertions(+) diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index 850e95bc766c..b8b7dc755752 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -101,6 +101,7 @@ struct inet6_skb_parm { #define IP6SKB_FORWARDED 2 #define IP6SKB_REROUTED 4 #define IP6SKB_ROUTERALERT 8 +#define IP6SKB_FRAGMENTED 16 }; #define IP6CB(skb) ((struct inet6_skb_parm*)((skb)->cb)) diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c index 790d9f4b8b0b..1aeb473b2cc6 100644 --- a/net/ipv6/reassembly.c +++ b/net/ipv6/reassembly.c @@ -490,6 +490,7 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev, ipv6_hdr(head)->payload_len = htons(payload_len); ipv6_change_dsfield(ipv6_hdr(head), 0xff, ecn); IP6CB(head)->nhoff = nhoff; + IP6CB(head)->flags |= IP6SKB_FRAGMENTED; /* Yes, and fold redundant checksum back. 8) */ if (head->ip_summed == CHECKSUM_COMPLETE) @@ -524,6 +525,9 @@ static int ipv6_frag_rcv(struct sk_buff *skb) struct net *net = dev_net(skb_dst(skb)->dev); int evicted; + if (IP6CB(skb)->flags & IP6SKB_FRAGMENTED) + goto fail_hdr; + IP6_INC_STATS_BH(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_REASMREQDS); /* Jumbo payload inhibits frag. header */ @@ -544,6 +548,7 @@ static int ipv6_frag_rcv(struct sk_buff *skb) ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_REASMOKS); IP6CB(skb)->nhoff = (u8 *)fhdr - skb_network_header(skb); + IP6CB(skb)->flags |= IP6SKB_FRAGMENTED; return 1; } From f3f905389f1aaae9e091a28d018c66e08c85eddd Mon Sep 17 00:00:00 2001 From: Andrey Vagin Date: Fri, 16 Aug 2013 19:04:36 +0400 Subject: [PATCH 0722/1368] tcp: set timestamps for restored skb-s [ Upstream commit 7ed5c5ae96d23da22de95e1c7a239537acd378b1 ] When the repair mode is turned off, the write queue seqs are updated so that the whole queue is considered to be 'already sent. The "when" field must be set for such skb. It's used in tcp_rearm_rto for example. If the "when" field isn't set, the retransmit timeout can be calculated incorrectly and a tcp connected can stop for two minutes (TCP_RTO_MAX). Acked-by: Pavel Emelyanov Cc: "David S. Miller" Cc: Alexey Kuznetsov Cc: James Morris Cc: Hideaki YOSHIFUJI Cc: Patrick McHardy Signed-off-by: Andrey Vagin Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/tcp.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index ab450c099aa4..2005561861ad 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -1117,6 +1117,13 @@ int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, if (!skb) goto wait_for_memory; + /* + * All packets are restored as if they have + * already been sent. + */ + if (tp->repair) + TCP_SKB_CB(skb)->when = tcp_time_stamp; + /* * Check whether we can use HW checksum. */ From fc26e4cf6f6be400ea7b5442982c58e76b6beda4 Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Mon, 19 Aug 2013 16:40:22 -0400 Subject: [PATCH 0723/1368] packet: restore packet statistics tp_packets to include drops [ Upstream commit 8bcdeaff5ed544704a9a691d4aef0adb3f9c5b8f ] getsockopt PACKET_STATISTICS returns tp_packets + tp_drops. Commit ee80fbf301 ("packet: account statistics only in tpacket_stats_u") cleaned up the getsockopt PACKET_STATISTICS code. This also changed semantics. Historically, tp_packets included tp_drops on return. The commit removed the line that adds tp_drops into tp_packets. This patch reinstates the old semantics. Signed-off-by: Willem de Bruijn Acked-by: Daniel Borkmann Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/packet/af_packet.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 20a1bd0e6549..a6895ab597c2 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -3259,9 +3259,11 @@ static int packet_getsockopt(struct socket *sock, int level, int optname, if (po->tp_version == TPACKET_V3) { lv = sizeof(struct tpacket_stats_v3); + st.stats3.tp_packets += st.stats3.tp_drops; data = &st.stats3; } else { lv = sizeof(struct tpacket_stats); + st.stats1.tp_packets += st.stats1.tp_drops; data = &st.stats1; } From 9f0bd377e1210501cd11eef80159e5d7f6160fef Mon Sep 17 00:00:00 2001 From: Toshiaki Makita Date: Tue, 20 Aug 2013 17:10:18 +0900 Subject: [PATCH 0724/1368] bridge: Use the correct bit length for bitmap functions in the VLAN code [ Upstream commit ef40b7ef181b7b1a24df2ef2d1ef84956bffa635 ] The VLAN code needs to know the length of the per-port VLAN bitmap to perform its most basic operations (retrieving VLAN informations, removing VLANs, forwarding database manipulation, etc). Unfortunately, in the current implementation we are using a macro that indicates the bitmap size in longs in places where the size in bits is expected, which in some cases can cause what appear to be random failures. Use the correct macro. Signed-off-by: Toshiaki Makita Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/bridge/br_fdb.c | 10 +++++----- net/bridge/br_netlink.c | 4 ++-- net/bridge/br_vlan.c | 4 ++-- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c index ebfa4443c69b..84dd783abe5c 100644 --- a/net/bridge/br_fdb.c +++ b/net/bridge/br_fdb.c @@ -161,7 +161,7 @@ void br_fdb_change_mac_address(struct net_bridge *br, const u8 *newaddr) if (!pv) return; - for_each_set_bit_from(vid, pv->vlan_bitmap, BR_VLAN_BITMAP_LEN) { + for_each_set_bit_from(vid, pv->vlan_bitmap, VLAN_N_VID) { f = __br_fdb_get(br, br->dev->dev_addr, vid); if (f && f->is_local && !f->dst) fdb_delete(br, f); @@ -725,7 +725,7 @@ int br_fdb_add(struct ndmsg *ndm, struct nlattr *tb[], /* VID was specified, so use it. */ err = __br_fdb_add(ndm, p, addr, nlh_flags, vid); } else { - if (!pv || bitmap_empty(pv->vlan_bitmap, BR_VLAN_BITMAP_LEN)) { + if (!pv || bitmap_empty(pv->vlan_bitmap, VLAN_N_VID)) { err = __br_fdb_add(ndm, p, addr, nlh_flags, 0); goto out; } @@ -734,7 +734,7 @@ int br_fdb_add(struct ndmsg *ndm, struct nlattr *tb[], * specify a VLAN. To be nice, add/update entry for every * vlan on this port. */ - for_each_set_bit(vid, pv->vlan_bitmap, BR_VLAN_BITMAP_LEN) { + for_each_set_bit(vid, pv->vlan_bitmap, VLAN_N_VID) { err = __br_fdb_add(ndm, p, addr, nlh_flags, vid); if (err) goto out; @@ -812,7 +812,7 @@ int br_fdb_delete(struct ndmsg *ndm, struct nlattr *tb[], err = __br_fdb_delete(p, addr, vid); } else { - if (!pv || bitmap_empty(pv->vlan_bitmap, BR_VLAN_BITMAP_LEN)) { + if (!pv || bitmap_empty(pv->vlan_bitmap, VLAN_N_VID)) { err = __br_fdb_delete(p, addr, 0); goto out; } @@ -822,7 +822,7 @@ int br_fdb_delete(struct ndmsg *ndm, struct nlattr *tb[], * vlan on this port. */ err = -ENOENT; - for_each_set_bit(vid, pv->vlan_bitmap, BR_VLAN_BITMAP_LEN) { + for_each_set_bit(vid, pv->vlan_bitmap, VLAN_N_VID) { err &= __br_fdb_delete(p, addr, vid); } } diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c index 8e3abf564798..f66a03453a8e 100644 --- a/net/bridge/br_netlink.c +++ b/net/bridge/br_netlink.c @@ -128,7 +128,7 @@ static int br_fill_ifinfo(struct sk_buff *skb, else pv = br_get_vlan_info(br); - if (!pv || bitmap_empty(pv->vlan_bitmap, BR_VLAN_BITMAP_LEN)) + if (!pv || bitmap_empty(pv->vlan_bitmap, VLAN_N_VID)) goto done; af = nla_nest_start(skb, IFLA_AF_SPEC); @@ -136,7 +136,7 @@ static int br_fill_ifinfo(struct sk_buff *skb, goto nla_put_failure; pvid = br_get_pvid(pv); - for_each_set_bit(vid, pv->vlan_bitmap, BR_VLAN_BITMAP_LEN) { + for_each_set_bit(vid, pv->vlan_bitmap, VLAN_N_VID) { vinfo.vid = vid; vinfo.flags = 0; if (vid == pvid) diff --git a/net/bridge/br_vlan.c b/net/bridge/br_vlan.c index bd58b45f5f90..9a9ffe7e4019 100644 --- a/net/bridge/br_vlan.c +++ b/net/bridge/br_vlan.c @@ -108,7 +108,7 @@ static int __vlan_del(struct net_port_vlans *v, u16 vid) clear_bit(vid, v->vlan_bitmap); v->num_vlans--; - if (bitmap_empty(v->vlan_bitmap, BR_VLAN_BITMAP_LEN)) { + if (bitmap_empty(v->vlan_bitmap, VLAN_N_VID)) { if (v->port_idx) rcu_assign_pointer(v->parent.port->vlan_info, NULL); else @@ -122,7 +122,7 @@ static void __vlan_flush(struct net_port_vlans *v) { smp_wmb(); v->pvid = 0; - bitmap_zero(v->vlan_bitmap, BR_VLAN_BITMAP_LEN); + bitmap_zero(v->vlan_bitmap, VLAN_N_VID); if (v->port_idx) rcu_assign_pointer(v->parent.port->vlan_info, NULL); else From c06ab09127706af38e9e2869afef82a0f63e5fd7 Mon Sep 17 00:00:00 2001 From: Jesper Dangaard Brouer Date: Wed, 14 Aug 2013 23:47:11 +0200 Subject: [PATCH 0725/1368] net_sched: restore "linklayer atm" handling [ Upstream commit 8a8e3d84b1719a56f9151909e80ea6ebc5b8e318 ] commit 56b765b79 ("htb: improved accuracy at high rates") broke the "linklayer atm" handling. tc class add ... htb rate X ceil Y linklayer atm The linklayer setting is implemented by modifying the rate table which is send to the kernel. No direct parameter were transferred to the kernel indicating the linklayer setting. The commit 56b765b79 ("htb: improved accuracy at high rates") removed the use of the rate table system. To keep compatible with older iproute2 utils, this patch detects the linklayer by parsing the rate table. It also supports future versions of iproute2 to send this linklayer parameter to the kernel directly. This is done by using the __reserved field in struct tc_ratespec, to convey the choosen linklayer option, but only using the lower 4 bits of this field. Linklayer detection is limited to speeds below 100Mbit/s, because at high rates the rtab is gets too inaccurate, so bad that several fields contain the same values, this resembling the ATM detect. Fields even start to contain "0" time to send, e.g. at 1000Mbit/s sending a 96 bytes packet cost "0", thus the rtab have been more broken than we first realized. Signed-off-by: Jesper Dangaard Brouer Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- include/net/sch_generic.h | 9 +++++++- include/uapi/linux/pkt_sched.h | 10 ++++++++- net/sched/sch_api.c | 41 ++++++++++++++++++++++++++++++++++ net/sched/sch_generic.c | 1 + net/sched/sch_htb.c | 13 +++++++++++ 5 files changed, 72 insertions(+), 2 deletions(-) diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index e7f4e21cc3e1..63ed1d1dd9e7 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -682,13 +682,19 @@ struct psched_ratecfg { u64 rate_bps; u32 mult; u16 overhead; + u8 linklayer; u8 shift; }; static inline u64 psched_l2t_ns(const struct psched_ratecfg *r, unsigned int len) { - return ((u64)(len + r->overhead) * r->mult) >> r->shift; + len += r->overhead; + + if (unlikely(r->linklayer == TC_LINKLAYER_ATM)) + return ((u64)(DIV_ROUND_UP(len,48)*53) * r->mult) >> r->shift; + + return ((u64)len * r->mult) >> r->shift; } extern void psched_ratecfg_precompute(struct psched_ratecfg *r, const struct tc_ratespec *conf); @@ -699,6 +705,7 @@ static inline void psched_ratecfg_getrate(struct tc_ratespec *res, memset(res, 0, sizeof(*res)); res->rate = r->rate_bps >> 3; res->overhead = r->overhead; + res->linklayer = (r->linklayer & TC_LINKLAYER_MASK); } #endif diff --git a/include/uapi/linux/pkt_sched.h b/include/uapi/linux/pkt_sched.h index dbd71b0c7d8c..09d62b9228ff 100644 --- a/include/uapi/linux/pkt_sched.h +++ b/include/uapi/linux/pkt_sched.h @@ -73,9 +73,17 @@ struct tc_estimator { #define TC_H_ROOT (0xFFFFFFFFU) #define TC_H_INGRESS (0xFFFFFFF1U) +/* Need to corrospond to iproute2 tc/tc_core.h "enum link_layer" */ +enum tc_link_layer { + TC_LINKLAYER_UNAWARE, /* Indicate unaware old iproute2 util */ + TC_LINKLAYER_ETHERNET, + TC_LINKLAYER_ATM, +}; +#define TC_LINKLAYER_MASK 0x0F /* limit use to lower 4 bits */ + struct tc_ratespec { unsigned char cell_log; - unsigned char __reserved; + __u8 linklayer; /* lower 4 bits */ unsigned short overhead; short cell_align; unsigned short mpu; diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index 281c1bded1f6..51b968d3febb 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -285,6 +285,45 @@ static struct Qdisc_ops *qdisc_lookup_ops(struct nlattr *kind) return q; } +/* The linklayer setting were not transferred from iproute2, in older + * versions, and the rate tables lookup systems have been dropped in + * the kernel. To keep backward compatible with older iproute2 tc + * utils, we detect the linklayer setting by detecting if the rate + * table were modified. + * + * For linklayer ATM table entries, the rate table will be aligned to + * 48 bytes, thus some table entries will contain the same value. The + * mpu (min packet unit) is also encoded into the old rate table, thus + * starting from the mpu, we find low and high table entries for + * mapping this cell. If these entries contain the same value, when + * the rate tables have been modified for linklayer ATM. + * + * This is done by rounding mpu to the nearest 48 bytes cell/entry, + * and then roundup to the next cell, calc the table entry one below, + * and compare. + */ +static __u8 __detect_linklayer(struct tc_ratespec *r, __u32 *rtab) +{ + int low = roundup(r->mpu, 48); + int high = roundup(low+1, 48); + int cell_low = low >> r->cell_log; + int cell_high = (high >> r->cell_log) - 1; + + /* rtab is too inaccurate at rates > 100Mbit/s */ + if ((r->rate > (100000000/8)) || (rtab[0] == 0)) { + pr_debug("TC linklayer: Giving up ATM detection\n"); + return TC_LINKLAYER_ETHERNET; + } + + if ((cell_high > cell_low) && (cell_high < 256) + && (rtab[cell_low] == rtab[cell_high])) { + pr_debug("TC linklayer: Detected ATM, low(%d)=high(%d)=%u\n", + cell_low, cell_high, rtab[cell_high]); + return TC_LINKLAYER_ATM; + } + return TC_LINKLAYER_ETHERNET; +} + static struct qdisc_rate_table *qdisc_rtab_list; struct qdisc_rate_table *qdisc_get_rtab(struct tc_ratespec *r, struct nlattr *tab) @@ -308,6 +347,8 @@ struct qdisc_rate_table *qdisc_get_rtab(struct tc_ratespec *r, struct nlattr *ta rtab->rate = *r; rtab->refcnt = 1; memcpy(rtab->data, nla_data(tab), 1024); + if (r->linklayer == TC_LINKLAYER_UNAWARE) + r->linklayer = __detect_linklayer(r, rtab->data); rtab->next = qdisc_rtab_list; qdisc_rtab_list = rtab; } diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index 20224086cc28..a7f838b45dc8 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -908,6 +908,7 @@ void psched_ratecfg_precompute(struct psched_ratecfg *r, memset(r, 0, sizeof(*r)); r->overhead = conf->overhead; r->rate_bps = (u64)conf->rate << 3; + r->linklayer = (conf->linklayer & TC_LINKLAYER_MASK); r->mult = 1; /* * Calibrate mult, shift so that token counting is accurate diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c index 5c9f0b7b2109..910667cbc1e3 100644 --- a/net/sched/sch_htb.c +++ b/net/sched/sch_htb.c @@ -1312,6 +1312,7 @@ static int htb_change_class(struct Qdisc *sch, u32 classid, struct htb_sched *q = qdisc_priv(sch); struct htb_class *cl = (struct htb_class *)*arg, *parent; struct nlattr *opt = tca[TCA_OPTIONS]; + struct qdisc_rate_table *rtab = NULL, *ctab = NULL; struct nlattr *tb[TCA_HTB_MAX + 1]; struct tc_htb_opt *hopt; @@ -1333,6 +1334,18 @@ static int htb_change_class(struct Qdisc *sch, u32 classid, if (!hopt->rate.rate || !hopt->ceil.rate) goto failure; + /* Keeping backward compatible with rate_table based iproute2 tc */ + if (hopt->rate.linklayer == TC_LINKLAYER_UNAWARE) { + rtab = qdisc_get_rtab(&hopt->rate, tb[TCA_HTB_RTAB]); + if (rtab) + qdisc_put_rtab(rtab); + } + if (hopt->ceil.linklayer == TC_LINKLAYER_UNAWARE) { + ctab = qdisc_get_rtab(&hopt->ceil, tb[TCA_HTB_CTAB]); + if (ctab) + qdisc_put_rtab(ctab); + } + if (!cl) { /* new class */ struct Qdisc *new_q; int prio; From 40361e8fcc76eb603970d4da02592d4dc4a6f631 Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Tue, 9 Jul 2013 17:12:49 +0100 Subject: [PATCH 0726/1368] sfc: Fix lookup of default RX MAC filters when steered using ethtool [ Upstream commit f3851b0acc5a75bd33c6d344a2e4f920e1622ff0 ] commit 385904f819e3 ('sfc: Don't use efx_filter_{build,hash,increment}() for default MAC filters') used the wrong name to find the index of default RX MAC filters at insertion/ update time. This could result in memory corruption and would in any case silently fail to update the filter. Signed-off-by: Ben Hutchings Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/sfc/filter.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/sfc/filter.c b/drivers/net/ethernet/sfc/filter.c index 2738b5f1c81f..a52046581a6c 100644 --- a/drivers/net/ethernet/sfc/filter.c +++ b/drivers/net/ethernet/sfc/filter.c @@ -675,7 +675,7 @@ s32 efx_filter_insert_filter(struct efx_nic *efx, struct efx_filter_spec *spec, BUILD_BUG_ON(EFX_FILTER_INDEX_UC_DEF != 0); BUILD_BUG_ON(EFX_FILTER_INDEX_MC_DEF != EFX_FILTER_MC_DEF - EFX_FILTER_UC_DEF); - rep_index = spec->type - EFX_FILTER_INDEX_UC_DEF; + rep_index = spec->type - EFX_FILTER_UC_DEF; ins_index = rep_index; spin_lock_bh(&state->lock); From 3d740e64f6e333efb2c539d5ea7ed56a5ba2757d Mon Sep 17 00:00:00 2001 From: Sathya Perla Date: Thu, 22 Aug 2013 12:23:41 +0530 Subject: [PATCH 0727/1368] be2net: fix disabling TX in be_close() [ Upstream commit 6e1f99757a2b24b7255263b2240a0eb04215174d ] commit fba875591 ("disable TX in be_close()") disabled TX in be_close() to protect be_xmit() from touching freed up queues in the AER recovery flow. But, TX must be disabled *before* cleaning up TX completions in the close() path, not after. This allows be_tx_compl_clean() to free up all TX-req skbs that were notified to the HW. Signed-off-by: Sathya Perla Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/emulex/benet/be_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c index 6e4342695fa7..7371626c56a1 100644 --- a/drivers/net/ethernet/emulex/benet/be_main.c +++ b/drivers/net/ethernet/emulex/benet/be_main.c @@ -2561,8 +2561,8 @@ static int be_close(struct net_device *netdev) /* Wait for all pending tx completions to arrive so that * all tx skbs are freed. */ - be_tx_compl_clean(adapter); netif_tx_disable(netdev); + be_tx_compl_clean(adapter); be_rx_qs_destroy(adapter); From f2e39b5ea18377e079ab44323d9c8653a15fc5d6 Mon Sep 17 00:00:00 2001 From: Rob Gardner Date: Sun, 25 Aug 2013 16:02:23 -0600 Subject: [PATCH 0728/1368] net: usb: Add HP hs2434 device to ZLP exception table MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 03803a59e32453ee5737c6096a295f748f03cc49 ] This patch adds another entry (HP hs2434 Mobile Broadband) to the list of exceptional devices that require a zero length packet in order to function properly. This list was added in commit 844e88f0. The hs2434 is manufactured by Sierra Wireless, who also produces the MC7710, which the ZLP exception list was created for in the first place. So hopefully it is just this one producer's devices that will need this workaround. Tested on a DM1-4310NR HP notebook, which does not function without this change. Signed-off-by: Rob Gardner Acked-by: Bjørn Mork Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/usb/cdc_mbim.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/usb/cdc_mbim.c b/drivers/net/usb/cdc_mbim.c index 872819851aef..25ba7eca9a13 100644 --- a/drivers/net/usb/cdc_mbim.c +++ b/drivers/net/usb/cdc_mbim.c @@ -400,6 +400,10 @@ static const struct usb_device_id mbim_devs[] = { { USB_DEVICE_AND_INTERFACE_INFO(0x1199, 0x68a2, USB_CLASS_COMM, USB_CDC_SUBCLASS_MBIM, USB_CDC_PROTO_NONE), .driver_info = (unsigned long)&cdc_mbim_info_zlp, }, + /* HP hs2434 Mobile Broadband Module needs ZLPs */ + { USB_DEVICE_AND_INTERFACE_INFO(0x3f0, 0x4b1d, USB_CLASS_COMM, USB_CDC_SUBCLASS_MBIM, USB_CDC_PROTO_NONE), + .driver_info = (unsigned long)&cdc_mbim_info_zlp, + }, { USB_INTERFACE_INFO(USB_CLASS_COMM, USB_CDC_SUBCLASS_MBIM, USB_CDC_PROTO_NONE), .driver_info = (unsigned long)&cdc_mbim_info, }, From 6f198dcab737db9335f1f4c7d97f801e0d5de186 Mon Sep 17 00:00:00 2001 From: Andrew Vagin Date: Tue, 27 Aug 2013 12:20:40 +0400 Subject: [PATCH 0729/1368] tcp: initialize rcv_tstamp for restored sockets [ Upstream commit c7781a6e3c4a9a17e144ec2db00ebfea327bd627 ] u32 rcv_tstamp; /* timestamp of last received ACK */ Its value used in tcp_retransmit_timer, which closes socket if the last ack was received more then TCP_RTO_MAX ago. Currently rcv_tstamp is initialized to zero and if tcp_retransmit_timer is called before receiving a first ack, the connection is closed. This patch initializes rcv_tstamp to a timestamp, when a socket was restored. Reported-by: Cyrill Gorcunov Cc: Pavel Emelyanov Cc: Eric Dumazet Cc: "David S. Miller" Cc: Alexey Kuznetsov Cc: James Morris Cc: Hideaki YOSHIFUJI Cc: Patrick McHardy Signed-off-by: Andrey Vagin Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/tcp_output.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index ec335fabd5cc..2c48d51f47a0 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -2808,6 +2808,8 @@ void tcp_connect_init(struct sock *sk) if (likely(!tp->repair)) tp->rcv_nxt = 0; + else + tp->rcv_tstamp = tcp_time_stamp; tp->rcv_wup = tp->rcv_nxt; tp->copied_seq = tp->rcv_nxt; From 6fe6efd941d49f7169e78b2e60ca2dc6a56ca8b4 Mon Sep 17 00:00:00 2001 From: Andrew Vagin Date: Tue, 27 Aug 2013 12:21:55 +0400 Subject: [PATCH 0730/1368] tcp: don't apply tsoffset if rcv_tsecr is zero [ Upstream commit e3e12028315749b7fa2edbc37328e5847be9ede9 ] The zero value means that tsecr is not valid, so it's a special case. tsoffset is used to customize tcp_time_stamp for one socket. tsoffset is usually zero, it's used when a socket was moved from one host to another host. Currently this issue affects logic of tcp_rcv_rtt_measure_ts. Due to incorrect value of rcv_tsecr, tcp_rcv_rtt_measure_ts sets rto to TCP_RTO_MAX. Reported-by: Cyrill Gorcunov Cc: Pavel Emelyanov Cc: Eric Dumazet Cc: "David S. Miller" Cc: Alexey Kuznetsov Cc: James Morris Cc: Hideaki YOSHIFUJI Cc: Patrick McHardy Signed-off-by: Andrey Vagin Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/tcp_input.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 9c6225780bd5..4b75aad14b04 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -3598,7 +3598,10 @@ static bool tcp_parse_aligned_timestamp(struct tcp_sock *tp, const struct tcphdr ++ptr; tp->rx_opt.rcv_tsval = ntohl(*ptr); ++ptr; - tp->rx_opt.rcv_tsecr = ntohl(*ptr) - tp->tsoffset; + if (*ptr) + tp->rx_opt.rcv_tsecr = ntohl(*ptr) - tp->tsoffset; + else + tp->rx_opt.rcv_tsecr = 0; return true; } return false; @@ -3623,7 +3626,7 @@ static bool tcp_fast_parse_options(const struct sk_buff *skb, } tcp_parse_options(skb, &tp->rx_opt, 1, NULL); - if (tp->rx_opt.saw_tstamp) + if (tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr) tp->rx_opt.rcv_tsecr -= tp->tsoffset; return true; @@ -5376,7 +5379,7 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb, int saved_clamp = tp->rx_opt.mss_clamp; tcp_parse_options(skb, &tp->rx_opt, 0, &foc); - if (tp->rx_opt.saw_tstamp) + if (tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr) tp->rx_opt.rcv_tsecr -= tp->tsoffset; if (th->ack) { From 5612d36ca1438c23280b962a80943d14b2e9f778 Mon Sep 17 00:00:00 2001 From: Chris Clark Date: Tue, 27 Aug 2013 12:02:15 -0600 Subject: [PATCH 0731/1368] ipv4: sendto/hdrincl: don't use destination address found in header [ Upstream commit c27c9322d015dc1d9dfdf31724fca71c0476c4d1 ] ipv4: raw_sendmsg: don't use header's destination address A sendto() regression was bisected and found to start with commit f8126f1d5136be1 (ipv4: Adjust semantics of rt->rt_gateway.) The problem is that it tries to ARP-lookup the constructed packet's destination address rather than the explicitly provided address. Fix this using FLOWI_FLAG_KNOWN_NH so that given nexthop is used. cf. commit 2ad5b9e4bd314fc685086b99e90e5de3bc59e26b Reported-by: Chris Clark Bisected-by: Chris Clark Tested-by: Chris Clark Suggested-by: Julian Anastasov Signed-off-by: Chris Clark Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/raw.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index dd44e0ab600c..61e60d67adca 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -571,7 +571,8 @@ static int raw_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, flowi4_init_output(&fl4, ipc.oif, sk->sk_mark, tos, RT_SCOPE_UNIVERSE, inet->hdrincl ? IPPROTO_RAW : sk->sk_protocol, - inet_sk_flowi_flags(sk) | FLOWI_FLAG_CAN_SLEEP, + inet_sk_flowi_flags(sk) | FLOWI_FLAG_CAN_SLEEP | + (inet->hdrincl ? FLOWI_FLAG_KNOWN_NH : 0), daddr, saddr, 0, 0); if (!inet->hdrincl) { From 4b7ead801d3e174ae14ccaed02773041419ae278 Mon Sep 17 00:00:00 2001 From: Thomas Graf Date: Tue, 3 Sep 2013 13:37:01 +0200 Subject: [PATCH 0732/1368] ipv6: Don't depend on per socket memory for neighbour discovery messages [ Upstream commit 25a6e6b84fba601eff7c28d30da8ad7cfbef0d43 ] Allocating skbs when sending out neighbour discovery messages currently uses sock_alloc_send_skb() based on a per net namespace socket and thus share a socket wmem buffer space. If a netdevice is temporarily unable to transmit due to carrier loss or for other reasons, the queued up ndisc messages will cosnume all of the wmem space and will thus prevent from any more skbs to be allocated even for netdevices that are able to transmit packets. The number of neighbour discovery messages sent is very limited, use of alloc_skb() bypasses the socket wmem buffer size enforcement while the manual call to skb_set_owner_w() maintains the socket reference needed for the IPv6 output path. This patch has orginally been posted by Eric Dumazet in a modified form. Signed-off-by: Thomas Graf Cc: Eric Dumazet Cc: Hannes Frederic Sowa Cc: Stephen Warren Cc: Fabio Estevam Tested-by: Fabio Estevam Tested-by: Stephen Warren Acked-by: Hannes Frederic Sowa Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv6/ndisc.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index ca4ffcc287f1..060a0449acaa 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -372,14 +372,11 @@ static struct sk_buff *ndisc_alloc_skb(struct net_device *dev, int tlen = dev->needed_tailroom; struct sock *sk = dev_net(dev)->ipv6.ndisc_sk; struct sk_buff *skb; - int err; - skb = sock_alloc_send_skb(sk, - hlen + sizeof(struct ipv6hdr) + len + tlen, - 1, &err); + skb = alloc_skb(hlen + sizeof(struct ipv6hdr) + len + tlen, GFP_ATOMIC); if (!skb) { - ND_PRINTK(0, err, "ndisc: %s failed to allocate an skb, err=%d\n", - __func__, err); + ND_PRINTK(0, err, "ndisc: %s failed to allocate an skb\n", + __func__); return NULL; } @@ -389,6 +386,11 @@ static struct sk_buff *ndisc_alloc_skb(struct net_device *dev, skb_reserve(skb, hlen + sizeof(struct ipv6hdr)); skb_reset_transport_header(skb); + /* Manually assign socket ownership as we avoid calling + * sock_alloc_send_pskb() to bypass wmem buffer limits + */ + skb_set_owner_w(skb, sk); + return skb; } From b70a23ab4ab5a95ab9be1bf77b73c1ad9f4e15a4 Mon Sep 17 00:00:00 2001 From: Phil Oester Date: Tue, 27 Aug 2013 16:41:40 -0700 Subject: [PATCH 0733/1368] tcp: tcp_make_synack() should use sock_wmalloc [ Upstream commit eb8895debe1baba41fcb62c78a16f0c63c21662a ] In commit 90ba9b19 (tcp: tcp_make_synack() can use alloc_skb()), Eric changed the call to sock_wmalloc in tcp_make_synack to alloc_skb. In doing so, the netfilter owner match lost its ability to block the SYNACK packet on outbound listening sockets. Revert the change, restoring the owner match functionality. This closes netfilter bugzilla #847. Signed-off-by: Phil Oester Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/tcp_output.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 2c48d51f47a0..0145ce7e6098 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -2664,7 +2664,7 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst, int tcp_header_size; int mss; - skb = alloc_skb(MAX_TCP_HEADER + 15, sk_gfp_atomic(sk, GFP_ATOMIC)); + skb = sock_wmalloc(sk, MAX_TCP_HEADER + 15, 1, GFP_ATOMIC); if (unlikely(!skb)) { dst_release(dst); return NULL; From 8db07b82b70897d868d864402b43a68da5e0cd59 Mon Sep 17 00:00:00 2001 From: Erik Hugne Date: Wed, 28 Aug 2013 09:29:58 +0200 Subject: [PATCH 0734/1368] tipc: set sk_err correctly when connection fails [ Upstream commit 2c8d85182348021fc0a1bed193a4be4161dc8364 ] Should a connect fail, if the publication/server is unavailable or due to some other error, a positive value will be returned and errno is never set. If the application code checks for an explicit zero return from connect (success) or a negative return (failure), it will not catch the error and subsequent send() calls will fail as shown from the strace snippet below. socket(0x1e /* PF_??? */, SOCK_SEQPACKET, 0) = 3 connect(3, {sa_family=0x1e /* AF_??? */, sa_data="\2\1\322\4\0\0\322\4\0\0\0\0\0\0"}, 16) = 111 sendto(3, "test", 4, 0, NULL, 0) = -1 EPIPE (Broken pipe) The reason for this behaviour is that TIPC wrongly inverts error codes set in sk_err. Signed-off-by: Erik Hugne Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/tipc/socket.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/tipc/socket.c b/net/tipc/socket.c index 515ce38e4f4c..7e26ad416af1 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -1179,7 +1179,7 @@ static u32 filter_connect(struct tipc_sock *tsock, struct sk_buff **buf) /* Accept only ACK or NACK message */ if (unlikely(msg_errcode(msg))) { sock->state = SS_DISCONNECTING; - sk->sk_err = -ECONNREFUSED; + sk->sk_err = ECONNREFUSED; retval = TIPC_OK; break; } @@ -1190,7 +1190,7 @@ static u32 filter_connect(struct tipc_sock *tsock, struct sk_buff **buf) res = auto_connect(sock, msg); if (res) { sock->state = SS_DISCONNECTING; - sk->sk_err = res; + sk->sk_err = -res; retval = TIPC_OK; break; } From 56a12acebcbd08342f7287a5870fe7ec2c0de91a Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 28 Aug 2013 18:10:43 -0700 Subject: [PATCH 0735/1368] net: revert 8728c544a9c ("net: dev_pick_tx() fix") [ Upstream commit 702821f4ea6f68db18aa1de7d8ed62c6ba586a64 ] commit 8728c544a9cbdc ("net: dev_pick_tx() fix") and commit b6fe83e9525a ("bonding: refine IFF_XMIT_DST_RELEASE capability") are quite incompatible : Queue selection is disabled because skb dst was dropped before entering bonding device. This causes major performance regression, mainly because TCP packets for a given flow can be sent to multiple queues. This is particularly visible when using the new FQ packet scheduler with MQ + FQ setup on the slaves. We can safely revert the first commit now that 416186fbf8c5b ("net: Split core bits of netdev_pick_tx into __netdev_pick_tx") properly caps the queue_index. Reported-by: Xi Wang Diagnosed-by: Xi Wang Signed-off-by: Eric Dumazet Cc: Tom Herbert Cc: Alexander Duyck Cc: Denys Fedorysychenko Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/core/flow_dissector.c | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c index 00ee068efc1c..c99cc371bbd7 100644 --- a/net/core/flow_dissector.c +++ b/net/core/flow_dissector.c @@ -345,14 +345,9 @@ u16 __netdev_pick_tx(struct net_device *dev, struct sk_buff *skb) if (new_index < 0) new_index = skb_tx_hash(dev, skb); - if (queue_index != new_index && sk) { - struct dst_entry *dst = - rcu_dereference_check(sk->sk_dst_cache, 1); - - if (dst && skb_dst(skb) == dst) - sk_tx_queue_set(sk, queue_index); - - } + if (queue_index != new_index && sk && + rcu_access_pointer(sk->sk_dst_cache)) + sk_tx_queue_set(sk, queue_index); queue_index = new_index; } From 00897febb83864e2dd388d719b256cb362198e27 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Thu, 29 Aug 2013 23:55:05 +0200 Subject: [PATCH 0736/1368] net: bridge: convert MLDv2 Query MRC into msecs_to_jiffies for max_delay MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 2d98c29b6fb3de44d9eaa73c09f9cf7209346383 ] While looking into MLDv1/v2 code, I noticed that bridging code does not convert it's max delay into jiffies for MLDv2 messages as we do in core IPv6' multicast code. RFC3810, 5.1.3. Maximum Response Code says: The Maximum Response Code field specifies the maximum time allowed before sending a responding Report. The actual time allowed, called the Maximum Response Delay, is represented in units of milliseconds, and is derived from the Maximum Response Code as follows: [...] As we update timers that work with jiffies, we need to convert it. Signed-off-by: Daniel Borkmann Cc: Linus Lüssing Cc: Hannes Frederic Sowa Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/bridge/br_multicast.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c index 75a81281c979..d82058f6fc79 100644 --- a/net/bridge/br_multicast.c +++ b/net/bridge/br_multicast.c @@ -1193,7 +1193,8 @@ static int br_ip6_multicast_query(struct net_bridge *br, mld2q = (struct mld2_query *)icmp6_hdr(skb); if (!mld2q->mld2q_nsrcs) group = &mld2q->mld2q_mca; - max_delay = mld2q->mld2q_mrc ? MLDV2_MRC(ntohs(mld2q->mld2q_mrc)) : 1; + + max_delay = max(msecs_to_jiffies(MLDV2_MRC(ntohs(mld2q->mld2q_mrc))), 1UL); } if (!group) From 2aae409672a9ec6078702b2fe92bd41ecf05d826 Mon Sep 17 00:00:00 2001 From: Jiri Bohac Date: Fri, 30 Aug 2013 11:18:45 +0200 Subject: [PATCH 0737/1368] ICMPv6: treat dest unreachable codes 5 and 6 as EACCES, not EPROTO [ Upstream commit 61e76b178dbe7145e8d6afa84bb4ccea71918994 ] RFC 4443 has defined two additional codes for ICMPv6 type 1 (destination unreachable) messages: 5 - Source address failed ingress/egress policy 6 - Reject route to destination Now they are treated as protocol error and icmpv6_err_convert() converts them to EPROTO. RFC 4443 says: "Codes 5 and 6 are more informative subsets of code 1." Treat codes 5 and 6 as code 1 (EACCES) Btw, connect() returning -EPROTO confuses firefox, so that fallback to other/IPv4 addresses does not work: https://bugzilla.mozilla.org/show_bug.cgi?id=910773 Signed-off-by: Jiri Bohac Acked-by: Hannes Frederic Sowa Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- include/uapi/linux/icmpv6.h | 2 ++ net/ipv6/icmp.c | 10 +++++++++- 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/include/uapi/linux/icmpv6.h b/include/uapi/linux/icmpv6.h index e0133c73c304..590beda78ea0 100644 --- a/include/uapi/linux/icmpv6.h +++ b/include/uapi/linux/icmpv6.h @@ -115,6 +115,8 @@ struct icmp6hdr { #define ICMPV6_NOT_NEIGHBOUR 2 #define ICMPV6_ADDR_UNREACH 3 #define ICMPV6_PORT_UNREACH 4 +#define ICMPV6_POLICY_FAIL 5 +#define ICMPV6_REJECT_ROUTE 6 /* * Codes for Time Exceeded diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index b4ff0a42b8c7..70e704d49007 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -931,6 +931,14 @@ static const struct icmp6_err { .err = ECONNREFUSED, .fatal = 1, }, + { /* POLICY_FAIL */ + .err = EACCES, + .fatal = 1, + }, + { /* REJECT_ROUTE */ + .err = EACCES, + .fatal = 1, + }, }; int icmpv6_err_convert(u8 type, u8 code, int *err) @@ -942,7 +950,7 @@ int icmpv6_err_convert(u8 type, u8 code, int *err) switch (type) { case ICMPV6_DEST_UNREACH: fatal = 1; - if (code <= ICMPV6_PORT_UNREACH) { + if (code < ARRAY_SIZE(tab_unreach)) { *err = tab_unreach[code].err; fatal = tab_unreach[code].fatal; } From 84a38c47c2bbcb361caa1631c92eaf5916e178d6 Mon Sep 17 00:00:00 2001 From: Nithin Sujir Date: Fri, 30 Aug 2013 17:01:36 -0700 Subject: [PATCH 0738/1368] tg3: Don't turn off led on 5719 serdes port 0 [ Upstream commit 989038e217e94161862a959e82f9a1ecf8dda152 ] Turning off led on port 0 of the 5719 serdes causes all other ports to lose power and stop functioning. Add tg3_phy_led_bug() function to check for this condition. We use a switch() in tg3_phy_led_bug() for consistency with the tg3_phy_power_bug() function. Signed-off-by: Nithin Nayak Sujir Signed-off-by: Michael Chan Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/broadcom/tg3.c | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/broadcom/tg3.c b/drivers/net/ethernet/broadcom/tg3.c index a13463e8a2c3..0877a052e3ee 100644 --- a/drivers/net/ethernet/broadcom/tg3.c +++ b/drivers/net/ethernet/broadcom/tg3.c @@ -3003,6 +3003,19 @@ static bool tg3_phy_power_bug(struct tg3 *tp) return false; } +static bool tg3_phy_led_bug(struct tg3 *tp) +{ + switch (tg3_asic_rev(tp)) { + case ASIC_REV_5719: + if ((tp->phy_flags & TG3_PHYFLG_MII_SERDES) && + !tp->pci_fn) + return true; + return false; + } + + return false; +} + static void tg3_power_down_phy(struct tg3 *tp, bool do_low_power) { u32 val; @@ -3050,8 +3063,9 @@ static void tg3_power_down_phy(struct tg3 *tp, bool do_low_power) } return; } else if (do_low_power) { - tg3_writephy(tp, MII_TG3_EXT_CTRL, - MII_TG3_EXT_CTRL_FORCE_LED_OFF); + if (!tg3_phy_led_bug(tp)) + tg3_writephy(tp, MII_TG3_EXT_CTRL, + MII_TG3_EXT_CTRL_FORCE_LED_OFF); val = MII_TG3_AUXCTL_PCTL_100TX_LPWR | MII_TG3_AUXCTL_PCTL_SPR_ISOLATE | From bd35c1a7f659dfa01179e456881285a9f057d30c Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Mon, 2 Sep 2013 16:41:00 +0800 Subject: [PATCH 0739/1368] vhost_net: poll vhost queue after marking DMA is done [ Upstream commit 19c73b3e08d16ee923f3962df4abf6205127896a ] We used to poll vhost queue before making DMA is done, this is racy if vhost thread were waked up before marking DMA is done which can result the signal to be missed. Fix this by always polling the vhost thread before DMA is done. Signed-off-by: Jason Wang Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/vhost/net.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c index 8ca5ac71b845..d6a518ce4d6d 100644 --- a/drivers/vhost/net.c +++ b/drivers/vhost/net.c @@ -307,6 +307,11 @@ static void vhost_zerocopy_callback(struct ubuf_info *ubuf, bool success) struct vhost_virtqueue *vq = ubufs->vq; int cnt = atomic_read(&ubufs->kref.refcount); + /* set len to mark this desc buffers done DMA */ + vq->heads[ubuf->desc].len = success ? + VHOST_DMA_DONE_LEN : VHOST_DMA_FAILED_LEN; + vhost_net_ubuf_put(ubufs); + /* * Trigger polling thread if guest stopped submitting new buffers: * in this case, the refcount after decrement will eventually reach 1 @@ -317,10 +322,6 @@ static void vhost_zerocopy_callback(struct ubuf_info *ubuf, bool success) */ if (cnt <= 2 || !(cnt % 16)) vhost_poll_queue(&vq->poll); - /* set len to mark this desc buffers done DMA */ - vq->heads[ubuf->desc].len = success ? - VHOST_DMA_DONE_LEN : VHOST_DMA_FAILED_LEN; - vhost_net_ubuf_put(ubufs); } /* Expects to be always run from workqueue - which acts as From ef1f8bcdc2febd53978905b5b0a5201104cce653 Mon Sep 17 00:00:00 2001 From: Hannes Frederic Sowa Date: Tue, 3 Sep 2013 02:13:31 +0200 Subject: [PATCH 0740/1368] ipv6: fix null pointer dereference in __ip6addrlbl_add [ Upstream commit 639739b5e609a5074839bb22fc061b37baa06269 ] Commit b67bfe0d42cac56c512dd5da4b1b347a23f4b70a ("hlist: drop the node parameter from iterators") changed the behavior of hlist_for_each_entry_safe to leave the p argument NULL. Fix this up by tracking the last argument. Reported-by: Michele Baldessari Cc: Hideaki YOSHIFUJI Cc: Sasha Levin Signed-off-by: Hannes Frederic Sowa Tested-by: Michele Baldessari Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv6/addrlabel.c | 48 +++++++++++++++++++++----------------------- 1 file changed, 23 insertions(+), 25 deletions(-) diff --git a/net/ipv6/addrlabel.c b/net/ipv6/addrlabel.c index f083a583a05c..b30ad3741b46 100644 --- a/net/ipv6/addrlabel.c +++ b/net/ipv6/addrlabel.c @@ -251,38 +251,36 @@ static struct ip6addrlbl_entry *ip6addrlbl_alloc(struct net *net, /* add a label */ static int __ip6addrlbl_add(struct ip6addrlbl_entry *newp, int replace) { + struct hlist_node *n; + struct ip6addrlbl_entry *last = NULL, *p = NULL; int ret = 0; - ADDRLABEL(KERN_DEBUG "%s(newp=%p, replace=%d)\n", - __func__, - newp, replace); + ADDRLABEL(KERN_DEBUG "%s(newp=%p, replace=%d)\n", __func__, newp, + replace); - if (hlist_empty(&ip6addrlbl_table.head)) { - hlist_add_head_rcu(&newp->list, &ip6addrlbl_table.head); - } else { - struct hlist_node *n; - struct ip6addrlbl_entry *p = NULL; - hlist_for_each_entry_safe(p, n, - &ip6addrlbl_table.head, list) { - if (p->prefixlen == newp->prefixlen && - net_eq(ip6addrlbl_net(p), ip6addrlbl_net(newp)) && - p->ifindex == newp->ifindex && - ipv6_addr_equal(&p->prefix, &newp->prefix)) { - if (!replace) { - ret = -EEXIST; - goto out; - } - hlist_replace_rcu(&p->list, &newp->list); - ip6addrlbl_put(p); - goto out; - } else if ((p->prefixlen == newp->prefixlen && !p->ifindex) || - (p->prefixlen < newp->prefixlen)) { - hlist_add_before_rcu(&newp->list, &p->list); + hlist_for_each_entry_safe(p, n, &ip6addrlbl_table.head, list) { + if (p->prefixlen == newp->prefixlen && + net_eq(ip6addrlbl_net(p), ip6addrlbl_net(newp)) && + p->ifindex == newp->ifindex && + ipv6_addr_equal(&p->prefix, &newp->prefix)) { + if (!replace) { + ret = -EEXIST; goto out; } + hlist_replace_rcu(&p->list, &newp->list); + ip6addrlbl_put(p); + goto out; + } else if ((p->prefixlen == newp->prefixlen && !p->ifindex) || + (p->prefixlen < newp->prefixlen)) { + hlist_add_before_rcu(&newp->list, &p->list); + goto out; } - hlist_add_after_rcu(&p->list, &newp->list); + last = p; } + if (last) + hlist_add_after_rcu(&last->list, &newp->list); + else + hlist_add_head_rcu(&newp->list, &ip6addrlbl_table.head); out: if (!ret) ip6addrlbl_table.seq++; From a22eb149b18ed385c72d527c42dc398c97b6387f Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Tue, 3 Sep 2013 19:29:12 +0200 Subject: [PATCH 0741/1368] net: ipv6: tcp: fix potential use after free in tcp_v6_do_rcv [ Upstream commit 3a1c756590633c0e86df606e5c618c190926a0df ] In tcp_v6_do_rcv() code, when processing pkt options, we soley work on our skb clone opt_skb that we've created earlier before entering tcp_rcv_established() on our way. However, only in condition ... if (np->rxopt.bits.rxtclass) np->rcv_tclass = ipv6_get_dsfield(ipv6_hdr(skb)); ... we work on skb itself. As we extract every other information out of opt_skb in ipv6_pktoptions path, this seems wrong, since skb can already be released by tcp_rcv_established() earlier on. When we try to access it in ipv6_hdr(), we will dereference freed skb. [ Bug added by commit 4c507d2897bd9b ("net: implement IP_RECVTOS for IP_PKTOPTIONS") ] Signed-off-by: Daniel Borkmann Cc: Eric Dumazet Acked-by: Eric Dumazet Acked-by: Jiri Benc Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv6/tcp_ipv6.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 0a17ed9eaf39..66c718854e5a 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1426,7 +1426,7 @@ static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb) if (np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim) np->mcast_hops = ipv6_hdr(opt_skb)->hop_limit; if (np->rxopt.bits.rxtclass) - np->rcv_tclass = ipv6_get_dsfield(ipv6_hdr(skb)); + np->rcv_tclass = ipv6_get_dsfield(ipv6_hdr(opt_skb)); if (ipv6_opt_accepted(sk, opt_skb)) { skb_set_owner_r(opt_skb, sk); opt_skb = xchg(&np->pktoptions, opt_skb); From 4c54b9db01842eb0f4eb54af6949b7606ea39e7a Mon Sep 17 00:00:00 2001 From: Thomas Petazzoni Date: Wed, 4 Sep 2013 16:21:18 +0200 Subject: [PATCH 0742/1368] net: mvneta: properly disable HW PHY polling and ensure adjust_link() works MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 714086029116b6b0a34e67ba1dd2f0d1cf26770c ] This commit fixes a long-standing bug that has been reported by many users: on some Armada 370 platforms, only the network interface that has been used in U-Boot to tftp the kernel works properly in Linux. The other network interfaces can see a 'link up', but are unable to transmit data. The reports were generally made on the Armada 370-based Mirabox, but have also been given on the Armada 370-RD board. The network MAC in the Armada 370/XP (supported by the mvneta driver in Linux) has a functionality that allows it to continuously poll the PHY and directly update the MAC configuration accordingly (speed, duplex, etc.). The very first versions of the driver submitted for review were using this hardware mechanism, but due to this, the driver was not integrated with the kernel phylib. Following reviews, the driver was changed to use the phylib, and therefore a software based polling. In software based polling, Linux regularly talks to the PHY over the MDIO bus, and sees if the link status has changed. If it's the case then the adjust_link() callback of the driver is called to update the MAC configuration accordingly. However, it turns out that the adjust_link() callback was not configuring the hardware in a completely correct way: while it was setting the speed and duplex bits correctly, it wasn't telling the hardware to actually take into account those bits rather than what the hardware-based PHY polling mechanism has concluded. So, in fact the adjust_link() callback was basically a no-op. However, the network happened to be working because on the network interfaces used by U-Boot for tftp on Armada 370 platforms because the hardware PHY polling was enabled by the bootloader, and left enabled by Linux. However, the second network interface not used for tftp (or both network interfaces if the kernel is loaded from USB, NAND or SD card) didn't had the hardware PHY polling enabled. This patch fixes this situation by: (1) Making sure that the hardware PHY polling is disabled by clearing the MVNETA_PHY_POLLING_ENABLE bit in the MVNETA_UNIT_CONTROL register in the driver ->probe() function. (2) Making sure that the duplex and speed selections made by the adjust_link() callback are taken into account by clearing the MVNETA_GMAC_AN_SPEED_EN and MVNETA_GMAC_AN_DUPLEX_EN bits in the MVNETA_GMAC_AUTONEG_CONFIG register. This patch has been tested on Armada 370 Mirabox, and now both network interfaces are usable after boot. [ Problem introduced by commit c5aff18 ("net: mvneta: driver for Marvell Armada 370/XP network unit") ] Signed-off-by: Thomas Petazzoni Cc: Willy Tarreau Cc: Jochen De Smet Cc: Peter Sanford Cc: Ethan Tuttle Cc: Chény Yves-Gael Cc: Ryan Press Cc: Simon Guinot Cc: vdonnefort@lacie.com Cc: stable@vger.kernel.org Acked-by: Jason Cooper Tested-by: Vincent Donnefort Tested-by: Yves-Gael Cheny Tested-by: Gregory CLEMENT Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/marvell/mvneta.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c index c96678555233..254f255204f9 100644 --- a/drivers/net/ethernet/marvell/mvneta.c +++ b/drivers/net/ethernet/marvell/mvneta.c @@ -136,7 +136,9 @@ #define MVNETA_GMAC_FORCE_LINK_PASS BIT(1) #define MVNETA_GMAC_CONFIG_MII_SPEED BIT(5) #define MVNETA_GMAC_CONFIG_GMII_SPEED BIT(6) +#define MVNETA_GMAC_AN_SPEED_EN BIT(7) #define MVNETA_GMAC_CONFIG_FULL_DUPLEX BIT(12) +#define MVNETA_GMAC_AN_DUPLEX_EN BIT(13) #define MVNETA_MIB_COUNTERS_BASE 0x3080 #define MVNETA_MIB_LATE_COLLISION 0x7c #define MVNETA_DA_FILT_SPEC_MCAST 0x3400 @@ -911,6 +913,13 @@ static void mvneta_defaults_set(struct mvneta_port *pp) /* Assign port SDMA configuration */ mvreg_write(pp, MVNETA_SDMA_CONFIG, val); + /* Disable PHY polling in hardware, since we're using the + * kernel phylib to do this. + */ + val = mvreg_read(pp, MVNETA_UNIT_CONTROL); + val &= ~MVNETA_PHY_POLLING_ENABLE; + mvreg_write(pp, MVNETA_UNIT_CONTROL, val); + mvneta_set_ucast_table(pp, -1); mvneta_set_special_mcast_table(pp, -1); mvneta_set_other_mcast_table(pp, -1); @@ -2288,7 +2297,9 @@ static void mvneta_adjust_link(struct net_device *ndev) val = mvreg_read(pp, MVNETA_GMAC_AUTONEG_CONFIG); val &= ~(MVNETA_GMAC_CONFIG_MII_SPEED | MVNETA_GMAC_CONFIG_GMII_SPEED | - MVNETA_GMAC_CONFIG_FULL_DUPLEX); + MVNETA_GMAC_CONFIG_FULL_DUPLEX | + MVNETA_GMAC_AN_SPEED_EN | + MVNETA_GMAC_AN_DUPLEX_EN); if (phydev->duplex) val |= MVNETA_GMAC_CONFIG_FULL_DUPLEX; From 751190a69d7c885b98716e2aca339f6c4b988704 Mon Sep 17 00:00:00 2001 From: John Haxby Date: Wed, 14 Aug 2013 16:23:18 +0100 Subject: [PATCH 0743/1368] crypto: xor - Check for osxsave as well as avx in crypto/xor commit edb6f29464afc65fc73767540b854abf63ae7144 upstream. This affects xen pv guests with sufficiently old versions of xen and sufficiently new hardware. On such a system, a guest with a btrfs root won't even boot. Signed-off-by: John Haxby Signed-off-by: Herbert Xu Reported-by: Michael Marineau Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/xor_avx.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/include/asm/xor_avx.h b/arch/x86/include/asm/xor_avx.h index 7ea79c5fa1f2..492b29802f57 100644 --- a/arch/x86/include/asm/xor_avx.h +++ b/arch/x86/include/asm/xor_avx.h @@ -167,12 +167,12 @@ static struct xor_block_template xor_block_avx = { #define AVX_XOR_SPEED \ do { \ - if (cpu_has_avx) \ + if (cpu_has_avx && cpu_has_osxsave) \ xor_speed(&xor_block_avx); \ } while (0) #define AVX_SELECT(FASTEST) \ - (cpu_has_avx ? &xor_block_avx : FASTEST) + (cpu_has_avx && cpu_has_osxsave ? &xor_block_avx : FASTEST) #else From 70aa7b8f105b0b5bb82d2082a230efb7f34645a0 Mon Sep 17 00:00:00 2001 From: Sangjung Woo Date: Wed, 11 Sep 2013 14:24:21 -0700 Subject: [PATCH 0744/1368] drivers/rtc/rtc-max77686.c: Fix wrong register commit 1748cbf7f7c464593232cde914f5a103181a83b5 upstream. Fix a read of the wrong register when checking whether the RTC timer has reached the alarm time. Signed-off-by: Sangjung Woo Signed-off-by: Myugnjoo Ham Reviewed-by: Jonghwa Lee Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Jonghwan Choi Signed-off-by: Greg Kroah-Hartman --- drivers/rtc/rtc-max77686.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/rtc/rtc-max77686.c b/drivers/rtc/rtc-max77686.c index 771812d62e6b..3bb9401f1cae 100644 --- a/drivers/rtc/rtc-max77686.c +++ b/drivers/rtc/rtc-max77686.c @@ -240,9 +240,9 @@ static int max77686_rtc_read_alarm(struct device *dev, struct rtc_wkalrm *alrm) } alrm->pending = 0; - ret = regmap_read(info->max77686->regmap, MAX77686_REG_STATUS1, &val); + ret = regmap_read(info->max77686->regmap, MAX77686_REG_STATUS2, &val); if (ret < 0) { - dev_err(info->dev, "%s:%d fail to read status1 reg(%d)\n", + dev_err(info->dev, "%s:%d fail to read status2 reg(%d)\n", __func__, __LINE__, ret); goto out; } From 7bf0f5bd9213cd0d75383ec36d3aae5f08e185c4 Mon Sep 17 00:00:00 2001 From: Bing Zhao Date: Mon, 19 Aug 2013 16:10:21 -0700 Subject: [PATCH 0745/1368] mwifiex: do not create AP and P2P interfaces upon driver loading commit 1211c961170cedb21c30d5bb7e2033c8720b38db upstream. Bug 60747 - 1286:2044 [Microsoft Surface Pro] Marvell 88W8797 wifi show 3 interface under network https://bugzilla.kernel.org/show_bug.cgi?id=60747 This issue was also reported previously by OLPC and some folks from the community. There are 3 network interfaces with different types being created when mwifiex driver is loaded: 1. mlan0 (infra. STA) 2. uap0 (AP) 3. p2p0 (P2P_CLIENT) The Network Manager attempts to use all 3 interfaces above without filtering the managed interface type. As the result, 3 identical interfaces are displayed under network manager. If user happens to click on an entry under which its interface is uap0 or p2p0, the association will fail. Work around it by removing the creation of AP and P2P interfaces at driver loading time. These interfaces can be added with 'iw' or other applications manually when they are needed. Signed-off-by: Bing Zhao Signed-off-by: Avinash Patil Signed-off-by: John W. Linville Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/mwifiex/main.c | 14 -------------- 1 file changed, 14 deletions(-) diff --git a/drivers/net/wireless/mwifiex/main.c b/drivers/net/wireless/mwifiex/main.c index 2eb88ea9acf7..c4a2e775fe1a 100644 --- a/drivers/net/wireless/mwifiex/main.c +++ b/drivers/net/wireless/mwifiex/main.c @@ -363,20 +363,6 @@ static void mwifiex_fw_dpc(const struct firmware *firmware, void *context) dev_err(adapter->dev, "cannot create default STA interface\n"); goto err_add_intf; } - - /* Create AP interface by default */ - if (!mwifiex_add_virtual_intf(adapter->wiphy, "uap%d", - NL80211_IFTYPE_AP, NULL, NULL)) { - dev_err(adapter->dev, "cannot create default AP interface\n"); - goto err_add_intf; - } - - /* Create P2P interface by default */ - if (!mwifiex_add_virtual_intf(adapter->wiphy, "p2p%d", - NL80211_IFTYPE_P2P_CLIENT, NULL, NULL)) { - dev_err(adapter->dev, "cannot create default P2P interface\n"); - goto err_add_intf; - } rtnl_unlock(); mwifiex_drv_get_driver_version(adapter, fmt, sizeof(fmt) - 1); From 9cfceacb4983272c1f0a6926aec2b127143df692 Mon Sep 17 00:00:00 2001 From: Jean-Christophe PLAGNIOL-VILLARD Date: Sun, 26 May 2013 16:55:59 +0800 Subject: [PATCH 0746/1368] ARM: at91: dt: sam9260: add i2c gpio pinctrl commit f89ae61bd74ae195c464bdd97a134e30908884d5 upstream. Signed-off-by: Jean-Christophe PLAGNIOL-VILLARD Signed-off-by: Nicolas Ferre Cc: Boris BREZILLON Signed-off-by: Greg Kroah-Hartman --- arch/arm/boot/dts/at91sam9260.dtsi | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/arch/arm/boot/dts/at91sam9260.dtsi b/arch/arm/boot/dts/at91sam9260.dtsi index 84c4bef2d726..43a18f74cddb 100644 --- a/arch/arm/boot/dts/at91sam9260.dtsi +++ b/arch/arm/boot/dts/at91sam9260.dtsi @@ -340,6 +340,14 @@ pinctrl_spi1: spi1-0 { }; }; + i2c_gpio0 { + pinctrl_i2c_gpio0: i2c_gpio0-0 { + atmel,pins = + <0 23 0x0 0x3 /* PA23 gpio I2C_SDA pin */ + 0 24 0x0 0x3>; /* PA24 gpio I2C_SCL pin */ + }; + }; + pioA: gpio@fffff400 { compatible = "atmel,at91rm9200-gpio"; reg = <0xfffff400 0x200>; @@ -592,6 +600,8 @@ &pioA 24 0 /* scl */ i2c-gpio,delay-us = <2>; /* ~100 kHz */ #address-cells = <1>; #size-cells = <0>; + pinctrl-names = "default"; + pinctrl-0 = <&pinctrl_i2c_gpio0>; status = "disabled"; }; }; From 410dbb746130bc052bb3c7a337a86252874adbdf Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Sat, 14 Sep 2013 06:55:12 -0700 Subject: [PATCH 0747/1368] Linux 3.10.12 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 595076dc14a0..afe001e3b2d6 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,6 @@ VERSION = 3 PATCHLEVEL = 10 -SUBLEVEL = 11 +SUBLEVEL = 12 EXTRAVERSION = NAME = TOSSUG Baby Fish From e62b0f0196a7ea462d04e320cdd727aed51dd60f Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Tue, 3 Sep 2013 15:00:11 -0700 Subject: [PATCH 0748/1368] SCSI: Allow MPT Fusion SAS 3.0 driver to be built into the kernel commit 9807b4d94911be4e4efb9a08481b24292a9edf8a upstream. Right now the Makefile for the mpt3sas driver does not even allow the driver to be built into the kernel. So fix that up, as there doesn't seem to be any obvious reason why this shouldn't be done. Signed-off-by: Greg Kroah-Hartman Acked-by: Sreekanth Reddy Signed-off-by: James Bottomley --- drivers/scsi/mpt3sas/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/mpt3sas/Makefile b/drivers/scsi/mpt3sas/Makefile index 4c1d2e7a1176..efb0c4c2e310 100644 --- a/drivers/scsi/mpt3sas/Makefile +++ b/drivers/scsi/mpt3sas/Makefile @@ -1,5 +1,5 @@ # mpt3sas makefile -obj-m += mpt3sas.o +obj-$(CONFIG_SCSI_MPT3SAS) += mpt3sas.o mpt3sas-y += mpt3sas_base.o \ mpt3sas_config.o \ mpt3sas_scsih.o \ From e7d63334e76b3cbb0ef599fef1643701fdb28aad Mon Sep 17 00:00:00 2001 From: Richard Weinberger Date: Mon, 19 Aug 2013 08:48:12 +0200 Subject: [PATCH 0749/1368] UBI: Fix PEB leak in wear_leveling_worker() commit 5ef4414f4bc26a19cfd5cd11aee9697a863e4d51 upstream. get_peb_for_wl() removes the PEB from the free list. If the WL subsystem detects that no wear leveling is needed it cancels the operation and drops the gained PEB. In this case we have to put the PEB back into the free list. This issue was introduced with commit ed4b7021c (UBI: remove PEB from free tree in get_peb_for_wl()). Signed-off-by: Richard Weinberger Signed-off-by: Artem Bityutskiy Signed-off-by: Greg Kroah-Hartman --- drivers/mtd/ubi/wl.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/mtd/ubi/wl.c b/drivers/mtd/ubi/wl.c index 5df49d3cb5c7..c95bfb183c62 100644 --- a/drivers/mtd/ubi/wl.c +++ b/drivers/mtd/ubi/wl.c @@ -1069,6 +1069,9 @@ static int wear_leveling_worker(struct ubi_device *ubi, struct ubi_work *wrk, if (!(e2->ec - e1->ec >= UBI_WL_THRESHOLD)) { dbg_wl("no WL needed: min used EC %d, max free EC %d", e1->ec, e2->ec); + + /* Give the unused PEB back */ + wl_tree_add(e2, &ubi->free); goto out_cancel; } self_check_in_wl_tree(ubi, e1, &ubi->used); From 13915e203f991e77eb5a52bcd4c61e5f155e3bbc Mon Sep 17 00:00:00 2001 From: Alan Stern Date: Fri, 6 Sep 2013 11:49:51 -0400 Subject: [PATCH 0750/1368] SCSI: sd: Fix potential out-of-bounds access commit 984f1733fcee3fbc78d47e26c5096921c5d9946a upstream. This patch fixes an out-of-bounds error in sd_read_cache_type(), found by Google's AddressSanitizer tool. When the loop ends, we know that "offset" lies beyond the end of the data in the buffer, so no Caching mode page was found. In theory it may be present, but the buffer size is limited to 512 bytes. Signed-off-by: Alan Stern Reported-by: Dmitry Vyukov Signed-off-by: James Bottomley Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/sd.c | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index 610417ec45af..c39863441337 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -2409,14 +2409,9 @@ sd_read_cache_type(struct scsi_disk *sdkp, unsigned char *buffer) } } - if (modepage == 0x3F) { - sd_printk(KERN_ERR, sdkp, "No Caching mode page " - "present\n"); - goto defaults; - } else if ((buffer[offset] & 0x3f) != modepage) { - sd_printk(KERN_ERR, sdkp, "Got wrong page\n"); - goto defaults; - } + sd_printk(KERN_ERR, sdkp, "No Caching mode page found\n"); + goto defaults; + Page_found: if (modepage == 8) { sdkp->WCE = ((buffer[offset + 2] & 0x04) != 0); From 139653e11c9eec4d5eff614e688d6bec99b78d7f Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Sun, 8 Sep 2013 14:33:50 +1000 Subject: [PATCH 0751/1368] crypto: api - Fix race condition in larval lookup commit 77dbd7a95e4a4f15264c333a9e9ab97ee27dc2aa upstream. crypto_larval_lookup should only return a larval if it created one. Any larval created by another entity must be processed through crypto_larval_wait before being returned. Otherwise this will lead to a larval being killed twice, which will most likely lead to a crash. Reported-by: Kees Cook Tested-by: Kees Cook Signed-off-by: Herbert Xu Signed-off-by: Greg Kroah-Hartman --- crypto/api.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/crypto/api.c b/crypto/api.c index 3b6180336d3d..37c4c7213de0 100644 --- a/crypto/api.c +++ b/crypto/api.c @@ -34,6 +34,8 @@ EXPORT_SYMBOL_GPL(crypto_alg_sem); BLOCKING_NOTIFIER_HEAD(crypto_chain); EXPORT_SYMBOL_GPL(crypto_chain); +static struct crypto_alg *crypto_larval_wait(struct crypto_alg *alg); + struct crypto_alg *crypto_mod_get(struct crypto_alg *alg) { return try_module_get(alg->cra_module) ? crypto_alg_get(alg) : NULL; @@ -144,8 +146,11 @@ static struct crypto_alg *crypto_larval_add(const char *name, u32 type, } up_write(&crypto_alg_sem); - if (alg != &larval->alg) + if (alg != &larval->alg) { kfree(larval); + if (crypto_is_larval(alg)) + alg = crypto_larval_wait(alg); + } return alg; } From d8ba750f9a2feb07bb6f0448807907b674ca4525 Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Wed, 7 Aug 2013 02:01:19 +1000 Subject: [PATCH 0752/1368] powerpc: Handle unaligned ldbrx/stdbrx commit 230aef7a6a23b6166bd4003bfff5af23c9bd381f upstream. Normally when we haven't implemented an alignment handler for a load or store instruction the process will be terminated. The alignment handler uses the DSISR (or a pseudo one) to locate the right handler. Unfortunately ldbrx and stdbrx overlap lfs and stfs so we incorrectly think ldbrx is an lfs and stdbrx is an stfs. This bug is particularly nasty - instead of terminating the process we apply an incorrect fixup and continue on. With more and more overlapping instructions we should stop creating a pseudo DSISR and index using the instruction directly, but for now add a special case to catch ldbrx/stdbrx. Signed-off-by: Anton Blanchard Signed-off-by: Benjamin Herrenschmidt Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/kernel/align.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/arch/powerpc/kernel/align.c b/arch/powerpc/kernel/align.c index ee5b690a0bed..52e5758ea368 100644 --- a/arch/powerpc/kernel/align.c +++ b/arch/powerpc/kernel/align.c @@ -764,6 +764,16 @@ int fix_alignment(struct pt_regs *regs) nb = aligninfo[instr].len; flags = aligninfo[instr].flags; + /* ldbrx/stdbrx overlap lfs/stfs in the DSISR unfortunately */ + if (IS_XFORM(instruction) && ((instruction >> 1) & 0x3ff) == 532) { + nb = 8; + flags = LD+SW; + } else if (IS_XFORM(instruction) && + ((instruction >> 1) & 0x3ff) == 660) { + nb = 8; + flags = ST+SW; + } + /* Byteswap little endian loads and stores */ swiz = 0; if (regs->msr & MSR_LE) { From d08bfff9ef39c029befa68b2641ad2f7a2c894b6 Mon Sep 17 00:00:00 2001 From: Vaidyanathan Srinivasan Date: Fri, 6 Sep 2013 00:25:06 +0530 Subject: [PATCH 0753/1368] powerpc: Default arch idle could cede processor on pseries commit 363edbe2614aa90df706c0f19ccfa2a6c06af0be upstream. When adding cpuidle support to pSeries, we introduced two regressions: - The new cpuidle backend driver only works under hypervisors supporting the "SLPLAR" option, which isn't the case of the old POWER4 hypervisor and the HV "light" used on js2x blades - The cpuidle driver registers fairly late, meaning that for a significant portion of the boot process, we end up having all threads spinning. This slows down the boot process and increases the overall resource usage if the hypervisor has shared processors. This fixes both by implementing a "default" idle that will cede to the hypervisor when possible, in a very simple way without all the bells and whisles of cpuidle. Reported-by: Paul Mackerras Signed-off-by: Vaidyanathan Srinivasan Acked-by: Deepthi Dharwar Signed-off-by: Benjamin Herrenschmidt Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/platforms/pseries/setup.c | 31 +++++++++++++++++--------- 1 file changed, 21 insertions(+), 10 deletions(-) diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c index c11c8238797c..54b998f2750d 100644 --- a/arch/powerpc/platforms/pseries/setup.c +++ b/arch/powerpc/platforms/pseries/setup.c @@ -354,7 +354,7 @@ static int alloc_dispatch_log_kmem_cache(void) } early_initcall(alloc_dispatch_log_kmem_cache); -static void pSeries_idle(void) +static void pseries_lpar_idle(void) { /* This would call on the cpuidle framework, and the back-end pseries * driver to go to idle states @@ -362,10 +362,22 @@ static void pSeries_idle(void) if (cpuidle_idle_call()) { /* On error, execute default handler * to go into low thread priority and possibly - * low power mode. + * low power mode by cedeing processor to hypervisor */ - HMT_low(); - HMT_very_low(); + + /* Indicate to hypervisor that we are idle. */ + get_lppaca()->idle = 1; + + /* + * Yield the processor to the hypervisor. We return if + * an external interrupt occurs (which are driven prior + * to returning here) or if a prod occurs from another + * processor. When returning here, external interrupts + * are enabled. + */ + cede_processor(); + + get_lppaca()->idle = 0; } } @@ -456,15 +468,14 @@ static void __init pSeries_setup_arch(void) pSeries_nvram_init(); - if (firmware_has_feature(FW_FEATURE_SPLPAR)) { + if (firmware_has_feature(FW_FEATURE_LPAR)) { vpa_init(boot_cpuid); - ppc_md.power_save = pSeries_idle; - } - - if (firmware_has_feature(FW_FEATURE_LPAR)) + ppc_md.power_save = pseries_lpar_idle; ppc_md.enable_pmcs = pseries_lpar_enable_pmcs; - else + } else { + /* No special idle routine */ ppc_md.enable_pmcs = power4_enable_pmcs; + } ppc_md.pcibios_root_bridge_prepare = pseries_root_bridge_prepare; From 93326685f124a1b2a71843b1c41328be13f97bbb Mon Sep 17 00:00:00 2001 From: Roger Pau Monne Date: Wed, 31 Jul 2013 17:00:42 +0200 Subject: [PATCH 0754/1368] xen-gnt: prevent adding duplicate gnt callbacks MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 5f338d9001094a56cf87bd8a280b4e7ff953bb59 upstream. With the current implementation, the callback in the tail of the list can be added twice, because the check done in gnttab_request_free_callback is bogus, callback->next can be NULL if it is the last callback in the list. If we add the same callback twice we end up with an infinite loop, were callback == callback->next. Replace this check with a proper one that iterates over the list to see if the callback has already been added. Signed-off-by: Roger Pau Monné Cc: Konrad Rzeszutek Wilk Cc: David Vrabel Signed-off-by: Konrad Rzeszutek Wilk Acked-by: Matt Wilson Reviewed-by: David Vrabel Signed-off-by: Greg Kroah-Hartman --- drivers/xen/grant-table.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/drivers/xen/grant-table.c b/drivers/xen/grant-table.c index 04c1b2d9b775..d5418c1be9aa 100644 --- a/drivers/xen/grant-table.c +++ b/drivers/xen/grant-table.c @@ -729,9 +729,18 @@ void gnttab_request_free_callback(struct gnttab_free_callback *callback, void (*fn)(void *), void *arg, u16 count) { unsigned long flags; + struct gnttab_free_callback *cb; + spin_lock_irqsave(&gnttab_list_lock, flags); - if (callback->next) - goto out; + + /* Check if the callback is already on the list */ + cb = gnttab_free_callback_list; + while (cb) { + if (cb == callback) + goto out; + cb = cb->next; + } + callback->fn = fn; callback->arg = arg; callback->count = count; From 863008a9b50259d3a4b1c9a387804f9509413de4 Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Thu, 29 Aug 2013 07:43:52 -0500 Subject: [PATCH 0755/1368] ARM: xen: only set pm function ptrs for Xen guests commit 9dd4b2944c46e1fdbd0a516c221c8a2670cbf005 upstream. xen_pm_init was unconditionally setting pm_power_off and arm_pm_restart function pointers. This breaks multi-platform kernels. Make this conditional on running as a Xen guest and make it a late_initcall to ensure it is setup after platform code for Dom0. Signed-off-by: Rob Herring Signed-off-by: Stefano Stabellini Signed-off-by: Greg Kroah-Hartman --- arch/arm/xen/enlighten.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/arch/arm/xen/enlighten.c b/arch/arm/xen/enlighten.c index 05db95d010ae..81edd31bb4ac 100644 --- a/arch/arm/xen/enlighten.c +++ b/arch/arm/xen/enlighten.c @@ -273,12 +273,15 @@ core_initcall(xen_guest_init); static int __init xen_pm_init(void) { + if (!xen_domain()) + return -ENODEV; + pm_power_off = xen_power_off; arm_pm_restart = xen_restart; return 0; } -subsys_initcall(xen_pm_init); +late_initcall(xen_pm_init); static irqreturn_t xen_arm_callback(int irq, void *arg) { From 61704f036693ca93a7d916899288eba9a98b006f Mon Sep 17 00:00:00 2001 From: Colin Cross Date: Wed, 28 Aug 2013 18:41:47 -0700 Subject: [PATCH 0756/1368] cpuidle: coupled: abort idle if pokes are pending commit f983827bcb9d2c34c4d8935861a1e9128aec2baf upstream. Joseph Lo reported a lockup on Tegra20 caused by a race condition in coupled cpuidle. When two or more cpus enter idle at the same time, the first cpus to arrive may go to the ready loop without processing pending pokes from the last cpu to arrive. This patch adds a check for pending pokes once all cpus have been synchronized in the ready loop and resets the coupled state and retries if any cpus failed to handle their pending poke. Retrying on all cpus may trigger the same issue again, so this patch also adds a check to ensure that each cpu has received at least one poke between when it enters the waiting loop and when it moves on to the ready loop. Reported-and-tested-by: Joseph Lo Tested-by: Stephen Warren Signed-off-by: Colin Cross Signed-off-by: Rafael J. Wysocki Signed-off-by: Greg Kroah-Hartman --- drivers/cpuidle/coupled.c | 107 +++++++++++++++++++++++++++++--------- 1 file changed, 82 insertions(+), 25 deletions(-) diff --git a/drivers/cpuidle/coupled.c b/drivers/cpuidle/coupled.c index 2a297f86dbad..dc9e64764022 100644 --- a/drivers/cpuidle/coupled.c +++ b/drivers/cpuidle/coupled.c @@ -106,6 +106,7 @@ struct cpuidle_coupled { cpumask_t coupled_cpus; int requested_state[NR_CPUS]; atomic_t ready_waiting_counts; + atomic_t abort_barrier; int online_count; int refcnt; int prevent; @@ -122,12 +123,19 @@ static DEFINE_MUTEX(cpuidle_coupled_lock); static DEFINE_PER_CPU(struct call_single_data, cpuidle_coupled_poke_cb); /* - * The cpuidle_coupled_poked_mask mask is used to avoid calling + * The cpuidle_coupled_poke_pending mask is used to avoid calling * __smp_call_function_single with the per cpu call_single_data struct already * in use. This prevents a deadlock where two cpus are waiting for each others * call_single_data struct to be available */ -static cpumask_t cpuidle_coupled_poked_mask; +static cpumask_t cpuidle_coupled_poke_pending; + +/* + * The cpuidle_coupled_poked mask is used to ensure that each cpu has been poked + * once to minimize entering the ready loop with a poke pending, which would + * require aborting and retrying. + */ +static cpumask_t cpuidle_coupled_poked; /** * cpuidle_coupled_parallel_barrier - synchronize all online coupled cpus @@ -291,10 +299,11 @@ static inline int cpuidle_coupled_get_state(struct cpuidle_device *dev, return state; } -static void cpuidle_coupled_poked(void *info) +static void cpuidle_coupled_handle_poke(void *info) { int cpu = (unsigned long)info; - cpumask_clear_cpu(cpu, &cpuidle_coupled_poked_mask); + cpumask_set_cpu(cpu, &cpuidle_coupled_poked); + cpumask_clear_cpu(cpu, &cpuidle_coupled_poke_pending); } /** @@ -313,7 +322,7 @@ static void cpuidle_coupled_poke(int cpu) { struct call_single_data *csd = &per_cpu(cpuidle_coupled_poke_cb, cpu); - if (!cpumask_test_and_set_cpu(cpu, &cpuidle_coupled_poked_mask)) + if (!cpumask_test_and_set_cpu(cpu, &cpuidle_coupled_poke_pending)) __smp_call_function_single(cpu, csd, 0); } @@ -340,30 +349,19 @@ static void cpuidle_coupled_poke_others(int this_cpu, * @coupled: the struct coupled that contains the current cpu * @next_state: the index in drv->states of the requested state for this cpu * - * Updates the requested idle state for the specified cpuidle device, - * poking all coupled cpus out of idle if necessary to let them see the new - * state. + * Updates the requested idle state for the specified cpuidle device. + * Returns the number of waiting cpus. */ -static void cpuidle_coupled_set_waiting(int cpu, +static int cpuidle_coupled_set_waiting(int cpu, struct cpuidle_coupled *coupled, int next_state) { - int w; - coupled->requested_state[cpu] = next_state; /* - * If this is the last cpu to enter the waiting state, poke - * all the other cpus out of their waiting state so they can - * enter a deeper state. This can race with one of the cpus - * exiting the waiting state due to an interrupt and - * decrementing waiting_count, see comment below. - * * The atomic_inc_return provides a write barrier to order the write * to requested_state with the later write that increments ready_count. */ - w = atomic_inc_return(&coupled->ready_waiting_counts) & WAITING_MASK; - if (w == coupled->online_count) - cpuidle_coupled_poke_others(cpu, coupled); + return atomic_inc_return(&coupled->ready_waiting_counts) & WAITING_MASK; } /** @@ -418,13 +416,24 @@ static void cpuidle_coupled_set_done(int cpu, struct cpuidle_coupled *coupled) static int cpuidle_coupled_clear_pokes(int cpu) { local_irq_enable(); - while (cpumask_test_cpu(cpu, &cpuidle_coupled_poked_mask)) + while (cpumask_test_cpu(cpu, &cpuidle_coupled_poke_pending)) cpu_relax(); local_irq_disable(); return need_resched() ? -EINTR : 0; } +static bool cpuidle_coupled_any_pokes_pending(struct cpuidle_coupled *coupled) +{ + cpumask_t cpus; + int ret; + + cpumask_and(&cpus, cpu_online_mask, &coupled->coupled_cpus); + ret = cpumask_and(&cpus, &cpuidle_coupled_poke_pending, &cpus); + + return ret; +} + /** * cpuidle_enter_state_coupled - attempt to enter a state with coupled cpus * @dev: struct cpuidle_device for the current cpu @@ -449,6 +458,7 @@ int cpuidle_enter_state_coupled(struct cpuidle_device *dev, { int entered_state = -1; struct cpuidle_coupled *coupled = dev->coupled; + int w; if (!coupled) return -EINVAL; @@ -465,14 +475,33 @@ int cpuidle_enter_state_coupled(struct cpuidle_device *dev, /* Read barrier ensures online_count is read after prevent is cleared */ smp_rmb(); - cpuidle_coupled_set_waiting(dev->cpu, coupled, next_state); +reset: + cpumask_clear_cpu(dev->cpu, &cpuidle_coupled_poked); + + w = cpuidle_coupled_set_waiting(dev->cpu, coupled, next_state); + /* + * If this is the last cpu to enter the waiting state, poke + * all the other cpus out of their waiting state so they can + * enter a deeper state. This can race with one of the cpus + * exiting the waiting state due to an interrupt and + * decrementing waiting_count, see comment below. + */ + if (w == coupled->online_count) { + cpumask_set_cpu(dev->cpu, &cpuidle_coupled_poked); + cpuidle_coupled_poke_others(dev->cpu, coupled); + } retry: /* * Wait for all coupled cpus to be idle, using the deepest state - * allowed for a single cpu. + * allowed for a single cpu. If this was not the poking cpu, wait + * for at least one poke before leaving to avoid a race where + * two cpus could arrive at the waiting loop at the same time, + * but the first of the two to arrive could skip the loop without + * processing the pokes from the last to arrive. */ - while (!cpuidle_coupled_cpus_waiting(coupled)) { + while (!cpuidle_coupled_cpus_waiting(coupled) || + !cpumask_test_cpu(dev->cpu, &cpuidle_coupled_poked)) { if (cpuidle_coupled_clear_pokes(dev->cpu)) { cpuidle_coupled_set_not_waiting(dev->cpu, coupled); goto out; @@ -492,6 +521,12 @@ int cpuidle_enter_state_coupled(struct cpuidle_device *dev, goto out; } + /* + * Make sure final poke status for this cpu is visible before setting + * cpu as ready. + */ + smp_wmb(); + /* * All coupled cpus are probably idle. There is a small chance that * one of the other cpus just became active. Increment the ready count, @@ -511,6 +546,28 @@ int cpuidle_enter_state_coupled(struct cpuidle_device *dev, cpu_relax(); } + /* + * Make sure read of all cpus ready is done before reading pending pokes + */ + smp_rmb(); + + /* + * There is a small chance that a cpu left and reentered idle after this + * cpu saw that all cpus were waiting. The cpu that reentered idle will + * have sent this cpu a poke, which will still be pending after the + * ready loop. The pending interrupt may be lost by the interrupt + * controller when entering the deep idle state. It's not possible to + * clear a pending interrupt without turning interrupts on and handling + * it, and it's too late to turn on interrupts here, so reset the + * coupled idle state of all cpus and retry. + */ + if (cpuidle_coupled_any_pokes_pending(coupled)) { + cpuidle_coupled_set_done(dev->cpu, coupled); + /* Wait for all cpus to see the pending pokes */ + cpuidle_coupled_parallel_barrier(dev, &coupled->abort_barrier); + goto reset; + } + /* all cpus have acked the coupled state */ next_state = cpuidle_coupled_get_state(dev, coupled); @@ -596,7 +653,7 @@ int cpuidle_coupled_register_device(struct cpuidle_device *dev) coupled->refcnt++; csd = &per_cpu(cpuidle_coupled_poke_cb, dev->cpu); - csd->func = cpuidle_coupled_poked; + csd->func = cpuidle_coupled_handle_poke; csd->info = (void *)(unsigned long)dev->cpu; return 0; From 736899ab702dc85345217e124686b661a56ecb6a Mon Sep 17 00:00:00 2001 From: Colin Cross Date: Fri, 23 Aug 2013 12:45:12 -0700 Subject: [PATCH 0757/1368] cpuidle: coupled: fix race condition between pokes and safe state commit 9e19b73c30a5fa42a53583a1f7817dd857126156 upstream. The coupled cpuidle waiting loop clears pending pokes before entering the safe state. If a poke arrives just before the pokes are cleared, but after the while loop condition checks, the poke will be lost and the cpu will stay in the safe state until another interrupt arrives. This may cause the cpu that sent the poke to spin in the ready loop with interrupts off until another cpu receives an interrupt, and if no other cpus have interrupts routed to them it can spin forever. Change the return value of cpuidle_coupled_clear_pokes to return if a poke was cleared, and move the need_resched() checks into the callers. In the waiting loop, if a poke was cleared restart the loop to repeat the while condition checks. Reported-by: Neil Zhang Signed-off-by: Colin Cross Signed-off-by: Rafael J. Wysocki Signed-off-by: Greg Kroah-Hartman --- drivers/cpuidle/coupled.c | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/drivers/cpuidle/coupled.c b/drivers/cpuidle/coupled.c index dc9e64764022..fe853903fe10 100644 --- a/drivers/cpuidle/coupled.c +++ b/drivers/cpuidle/coupled.c @@ -408,19 +408,22 @@ static void cpuidle_coupled_set_done(int cpu, struct cpuidle_coupled *coupled) * been processed and the poke bit has been cleared. * * Other interrupts may also be processed while interrupts are enabled, so - * need_resched() must be tested after turning interrupts off again to make sure + * need_resched() must be tested after this function returns to make sure * the interrupt didn't schedule work that should take the cpu out of idle. * - * Returns 0 if need_resched was false, -EINTR if need_resched was true. + * Returns 0 if no poke was pending, 1 if a poke was cleared. */ static int cpuidle_coupled_clear_pokes(int cpu) { + if (!cpumask_test_cpu(cpu, &cpuidle_coupled_poke_pending)) + return 0; + local_irq_enable(); while (cpumask_test_cpu(cpu, &cpuidle_coupled_poke_pending)) cpu_relax(); local_irq_disable(); - return need_resched() ? -EINTR : 0; + return 1; } static bool cpuidle_coupled_any_pokes_pending(struct cpuidle_coupled *coupled) @@ -464,7 +467,8 @@ int cpuidle_enter_state_coupled(struct cpuidle_device *dev, return -EINVAL; while (coupled->prevent) { - if (cpuidle_coupled_clear_pokes(dev->cpu)) { + cpuidle_coupled_clear_pokes(dev->cpu); + if (need_resched()) { local_irq_enable(); return entered_state; } @@ -502,7 +506,10 @@ int cpuidle_enter_state_coupled(struct cpuidle_device *dev, */ while (!cpuidle_coupled_cpus_waiting(coupled) || !cpumask_test_cpu(dev->cpu, &cpuidle_coupled_poked)) { - if (cpuidle_coupled_clear_pokes(dev->cpu)) { + if (cpuidle_coupled_clear_pokes(dev->cpu)) + continue; + + if (need_resched()) { cpuidle_coupled_set_not_waiting(dev->cpu, coupled); goto out; } @@ -516,7 +523,8 @@ int cpuidle_enter_state_coupled(struct cpuidle_device *dev, dev->safe_state_index); } - if (cpuidle_coupled_clear_pokes(dev->cpu)) { + cpuidle_coupled_clear_pokes(dev->cpu); + if (need_resched()) { cpuidle_coupled_set_not_waiting(dev->cpu, coupled); goto out; } From 0fc9593a11f12e1c8cbe28cecf7a0ef3b2797822 Mon Sep 17 00:00:00 2001 From: Lorenzo Pieralisi Date: Thu, 18 Apr 2013 18:31:35 +0100 Subject: [PATCH 0758/1368] ARM: dts: at91: cpus/cpu node dts updates commit e757a6ee3e6fc1583b12b156588e8583f798d35c upstream. This patch updates the in-kernel dts files according to the latest cpus and cpu bindings updates for ARM. Signed-off-by: Lorenzo Pieralisi Acked-by: Nicolas Ferre Signed-off-by: Greg Kroah-Hartman --- arch/arm/boot/dts/at91rm9200.dtsi | 6 +++++- arch/arm/boot/dts/at91sam9260.dtsi | 8 ++++++-- arch/arm/boot/dts/at91sam9263.dtsi | 8 ++++++-- arch/arm/boot/dts/at91sam9g45.dtsi | 8 ++++++-- arch/arm/boot/dts/at91sam9n12.dtsi | 8 ++++++-- arch/arm/boot/dts/at91sam9x5.dtsi | 8 ++++++-- arch/arm/boot/dts/sama5d3.dtsi | 2 ++ 7 files changed, 37 insertions(+), 11 deletions(-) diff --git a/arch/arm/boot/dts/at91rm9200.dtsi b/arch/arm/boot/dts/at91rm9200.dtsi index 5d3ed5aafc69..0af879a4eafa 100644 --- a/arch/arm/boot/dts/at91rm9200.dtsi +++ b/arch/arm/boot/dts/at91rm9200.dtsi @@ -35,8 +35,12 @@ aliases { ssc2 = &ssc2; }; cpus { - cpu@0 { + #address-cells = <0>; + #size-cells = <0>; + + cpu { compatible = "arm,arm920t"; + device_type = "cpu"; }; }; diff --git a/arch/arm/boot/dts/at91sam9260.dtsi b/arch/arm/boot/dts/at91sam9260.dtsi index 43a18f74cddb..0dbdb846f90a 100644 --- a/arch/arm/boot/dts/at91sam9260.dtsi +++ b/arch/arm/boot/dts/at91sam9260.dtsi @@ -32,8 +32,12 @@ aliases { ssc0 = &ssc0; }; cpus { - cpu@0 { - compatible = "arm,arm926ejs"; + #address-cells = <0>; + #size-cells = <0>; + + cpu { + compatible = "arm,arm926ej-s"; + device_type = "cpu"; }; }; diff --git a/arch/arm/boot/dts/at91sam9263.dtsi b/arch/arm/boot/dts/at91sam9263.dtsi index 94b58ab2cc08..fcd38f89904e 100644 --- a/arch/arm/boot/dts/at91sam9263.dtsi +++ b/arch/arm/boot/dts/at91sam9263.dtsi @@ -29,8 +29,12 @@ aliases { ssc1 = &ssc1; }; cpus { - cpu@0 { - compatible = "arm,arm926ejs"; + #address-cells = <0>; + #size-cells = <0>; + + cpu { + compatible = "arm,arm926ej-s"; + device_type = "cpu"; }; }; diff --git a/arch/arm/boot/dts/at91sam9g45.dtsi b/arch/arm/boot/dts/at91sam9g45.dtsi index bf18a735c37d..479a0622cdb8 100644 --- a/arch/arm/boot/dts/at91sam9g45.dtsi +++ b/arch/arm/boot/dts/at91sam9g45.dtsi @@ -35,8 +35,12 @@ aliases { ssc1 = &ssc1; }; cpus { - cpu@0 { - compatible = "arm,arm926ejs"; + #address-cells = <0>; + #size-cells = <0>; + + cpu { + compatible = "arm,arm926ej-s"; + device_type = "cpu"; }; }; diff --git a/arch/arm/boot/dts/at91sam9n12.dtsi b/arch/arm/boot/dts/at91sam9n12.dtsi index 8d25f889928e..a92ec78349a2 100644 --- a/arch/arm/boot/dts/at91sam9n12.dtsi +++ b/arch/arm/boot/dts/at91sam9n12.dtsi @@ -31,8 +31,12 @@ aliases { ssc0 = &ssc0; }; cpus { - cpu@0 { - compatible = "arm,arm926ejs"; + #address-cells = <0>; + #size-cells = <0>; + + cpu { + compatible = "arm,arm926ej-s"; + device_type = "cpu"; }; }; diff --git a/arch/arm/boot/dts/at91sam9x5.dtsi b/arch/arm/boot/dts/at91sam9x5.dtsi index b5833d1f7cff..2b2b6923d16b 100644 --- a/arch/arm/boot/dts/at91sam9x5.dtsi +++ b/arch/arm/boot/dts/at91sam9x5.dtsi @@ -33,8 +33,12 @@ aliases { ssc0 = &ssc0; }; cpus { - cpu@0 { - compatible = "arm,arm926ejs"; + #address-cells = <0>; + #size-cells = <0>; + + cpu { + compatible = "arm,arm926ej-s"; + device_type = "cpu"; }; }; diff --git a/arch/arm/boot/dts/sama5d3.dtsi b/arch/arm/boot/dts/sama5d3.dtsi index 5000e0d42849..ea324767f7e7 100644 --- a/arch/arm/boot/dts/sama5d3.dtsi +++ b/arch/arm/boot/dts/sama5d3.dtsi @@ -36,7 +36,9 @@ aliases { }; cpus { cpu@0 { + device_type = "cpu"; compatible = "arm,cortex-a5"; + reg = <0x0>; }; }; From ed58fb796257c770cd9f1de07b49ed9a054f93eb Mon Sep 17 00:00:00 2001 From: Lorenzo Pieralisi Date: Thu, 18 Apr 2013 18:41:57 +0100 Subject: [PATCH 0759/1368] ARM: dts: sunxi: cpus/cpu nodes dts updates commit 14c44aa541744d4cf06db89c27a1e6df293c64d5 upstream. This patch updates the in-kernel dts files according to the latest cpus and cpu bindings updates for ARM. Signed-off-by: Lorenzo Pieralisi Acked-by: Maxime Ripard Signed-off-by: Greg Kroah-Hartman --- arch/arm/boot/dts/sun4i-a10.dtsi | 2 ++ arch/arm/boot/dts/sun5i-a13.dtsi | 2 ++ 2 files changed, 4 insertions(+) diff --git a/arch/arm/boot/dts/sun4i-a10.dtsi b/arch/arm/boot/dts/sun4i-a10.dtsi index e7ef619a70a2..39a8f61528d9 100644 --- a/arch/arm/boot/dts/sun4i-a10.dtsi +++ b/arch/arm/boot/dts/sun4i-a10.dtsi @@ -17,7 +17,9 @@ / { cpus { cpu@0 { + device_type = "cpu"; compatible = "arm,cortex-a8"; + reg = <0x0>; }; }; diff --git a/arch/arm/boot/dts/sun5i-a13.dtsi b/arch/arm/boot/dts/sun5i-a13.dtsi index 31fa38f8cc98..00a2637da62e 100644 --- a/arch/arm/boot/dts/sun5i-a13.dtsi +++ b/arch/arm/boot/dts/sun5i-a13.dtsi @@ -18,7 +18,9 @@ / { cpus { cpu@0 { + device_type = "cpu"; compatible = "arm,cortex-a8"; + reg = <0x0>; }; }; From 464de2dc1539e6355fad1342ad106f08f7642edd Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 10 Jun 2013 16:48:36 +0200 Subject: [PATCH 0760/1368] ARM: dts: add missing cpu #address-cell values commit 8b2efa896cc618e055e90c9d9600e7c8388ae3b7 upstream. A recent series has added CPU numbers to a lot of dts files, but unfortunately in a few cases the #address-cells and #size-cells values are missing, which causes build warnings. This adds the missing ones for sunxi and sama5 that I found through build testing. Signed-off-by: Arnd Bergmann Cc: Lorenzo Pieralisi Cc: Maxime Ripard Cc: Nicolas Ferre Signed-off-by: Greg Kroah-Hartman --- arch/arm/boot/dts/sama5d3.dtsi | 2 ++ arch/arm/boot/dts/sun4i-a10.dtsi | 2 ++ arch/arm/boot/dts/sun5i-a13.dtsi | 2 ++ 3 files changed, 6 insertions(+) diff --git a/arch/arm/boot/dts/sama5d3.dtsi b/arch/arm/boot/dts/sama5d3.dtsi index ea324767f7e7..642775d7ca67 100644 --- a/arch/arm/boot/dts/sama5d3.dtsi +++ b/arch/arm/boot/dts/sama5d3.dtsi @@ -35,6 +35,8 @@ aliases { ssc1 = &ssc1; }; cpus { + #address-cells = <1>; + #size-cells = <0>; cpu@0 { device_type = "cpu"; compatible = "arm,cortex-a5"; diff --git a/arch/arm/boot/dts/sun4i-a10.dtsi b/arch/arm/boot/dts/sun4i-a10.dtsi index 39a8f61528d9..06ef8b625dba 100644 --- a/arch/arm/boot/dts/sun4i-a10.dtsi +++ b/arch/arm/boot/dts/sun4i-a10.dtsi @@ -16,6 +16,8 @@ / { interrupt-parent = <&intc>; cpus { + #address-cells = <1>; + #size-cells = <0>; cpu@0 { device_type = "cpu"; compatible = "arm,cortex-a8"; diff --git a/arch/arm/boot/dts/sun5i-a13.dtsi b/arch/arm/boot/dts/sun5i-a13.dtsi index 00a2637da62e..d2852547b572 100644 --- a/arch/arm/boot/dts/sun5i-a13.dtsi +++ b/arch/arm/boot/dts/sun5i-a13.dtsi @@ -17,6 +17,8 @@ / { interrupt-parent = <&intc>; cpus { + #address-cells = <1>; + #size-cells = <0>; cpu@0 { device_type = "cpu"; compatible = "arm,cortex-a8"; From 008fe511c11fa20d1060c9e2ddb9c829ac44e0a7 Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Mon, 5 Aug 2013 18:08:41 -0700 Subject: [PATCH 0761/1368] ARM: KVM: Fix 64-bit coprocessor handling commit 240e99cbd00aa541b572480e3ea7ecb0d480bc79 upstream. The PAR was exported as CRn == 7 and CRm == 0, but in fact the primary coprocessor register number was determined by CRm for 64-bit coprocessor registers as the user space API was modeled after the coprocessor access instructions (see the ARM ARM rev. C - B3-1445). However, just changing the CRn to CRm breaks the sorting check when booting the kernel, because the internal kernel logic always treats CRn as the primary register number, and it makes the table sorting impossible to understand for humans. Alternatively we could change the logic to always have CRn == CRm, but that becomes unclear in the number of ways we do look up of a coprocessor register. We could also have a separate 64-bit table but that feels somewhat over-engineered. Instead, keep CRn the primary representation of the primary coproc. register number in-kernel and always export the primary number as CRm as per the existing user space ABI. Note: The TTBR registers just magically worked because they happened to follow the CRn(0) regs and were considered CRn(0) in the in-kernel representation. Signed-off-by: Christoffer Dall Signed-off-by: Kim Phillips Cc: Gleb Natapov Cc: Paolo Bonzini Cc: Russell King Cc: Jonghwan Choi Signed-off-by: Greg Kroah-Hartman --- arch/arm/kvm/coproc.c | 26 +++++++++++++++++++------- arch/arm/kvm/coproc.h | 3 +++ arch/arm/kvm/coproc_a15.c | 6 +++++- 3 files changed, 27 insertions(+), 8 deletions(-) diff --git a/arch/arm/kvm/coproc.c b/arch/arm/kvm/coproc.c index 4a5199070430..db9cf692d4dd 100644 --- a/arch/arm/kvm/coproc.c +++ b/arch/arm/kvm/coproc.c @@ -146,7 +146,11 @@ static bool pm_fake(struct kvm_vcpu *vcpu, #define access_pmintenclr pm_fake /* Architected CP15 registers. - * Important: Must be sorted ascending by CRn, CRM, Op1, Op2 + * CRn denotes the primary register number, but is copied to the CRm in the + * user space API for 64-bit register access in line with the terminology used + * in the ARM ARM. + * Important: Must be sorted ascending by CRn, CRM, Op1, Op2 and with 64-bit + * registers preceding 32-bit ones. */ static const struct coproc_reg cp15_regs[] = { /* CSSELR: swapped by interrupt.S. */ @@ -154,8 +158,8 @@ static const struct coproc_reg cp15_regs[] = { NULL, reset_unknown, c0_CSSELR }, /* TTBR0/TTBR1: swapped by interrupt.S. */ - { CRm( 2), Op1( 0), is64, NULL, reset_unknown64, c2_TTBR0 }, - { CRm( 2), Op1( 1), is64, NULL, reset_unknown64, c2_TTBR1 }, + { CRm64( 2), Op1( 0), is64, NULL, reset_unknown64, c2_TTBR0 }, + { CRm64( 2), Op1( 1), is64, NULL, reset_unknown64, c2_TTBR1 }, /* TTBCR: swapped by interrupt.S. */ { CRn( 2), CRm( 0), Op1( 0), Op2( 2), is32, @@ -182,7 +186,7 @@ static const struct coproc_reg cp15_regs[] = { NULL, reset_unknown, c6_IFAR }, /* PAR swapped by interrupt.S */ - { CRn( 7), Op1( 0), is64, NULL, reset_unknown64, c7_PAR }, + { CRm64( 7), Op1( 0), is64, NULL, reset_unknown64, c7_PAR }, /* * DC{C,I,CI}SW operations: @@ -399,12 +403,13 @@ static bool index_to_params(u64 id, struct coproc_params *params) | KVM_REG_ARM_OPC1_MASK)) return false; params->is_64bit = true; - params->CRm = ((id & KVM_REG_ARM_CRM_MASK) + /* CRm to CRn: see cp15_to_index for details */ + params->CRn = ((id & KVM_REG_ARM_CRM_MASK) >> KVM_REG_ARM_CRM_SHIFT); params->Op1 = ((id & KVM_REG_ARM_OPC1_MASK) >> KVM_REG_ARM_OPC1_SHIFT); params->Op2 = 0; - params->CRn = 0; + params->CRm = 0; return true; default: return false; @@ -898,7 +903,14 @@ static u64 cp15_to_index(const struct coproc_reg *reg) if (reg->is_64) { val |= KVM_REG_SIZE_U64; val |= (reg->Op1 << KVM_REG_ARM_OPC1_SHIFT); - val |= (reg->CRm << KVM_REG_ARM_CRM_SHIFT); + /* + * CRn always denotes the primary coproc. reg. nr. for the + * in-kernel representation, but the user space API uses the + * CRm for the encoding, because it is modelled after the + * MRRC/MCRR instructions: see the ARM ARM rev. c page + * B3-1445 + */ + val |= (reg->CRn << KVM_REG_ARM_CRM_SHIFT); } else { val |= KVM_REG_SIZE_U32; val |= (reg->Op1 << KVM_REG_ARM_OPC1_SHIFT); diff --git a/arch/arm/kvm/coproc.h b/arch/arm/kvm/coproc.h index b7301d3e4799..0461d5c8d3de 100644 --- a/arch/arm/kvm/coproc.h +++ b/arch/arm/kvm/coproc.h @@ -135,6 +135,8 @@ static inline int cmp_reg(const struct coproc_reg *i1, return -1; if (i1->CRn != i2->CRn) return i1->CRn - i2->CRn; + if (i1->is_64 != i2->is_64) + return i2->is_64 - i1->is_64; if (i1->CRm != i2->CRm) return i1->CRm - i2->CRm; if (i1->Op1 != i2->Op1) @@ -145,6 +147,7 @@ static inline int cmp_reg(const struct coproc_reg *i1, #define CRn(_x) .CRn = _x #define CRm(_x) .CRm = _x +#define CRm64(_x) .CRn = _x, .CRm = 0 #define Op1(_x) .Op1 = _x #define Op2(_x) .Op2 = _x #define is64 .is_64 = true diff --git a/arch/arm/kvm/coproc_a15.c b/arch/arm/kvm/coproc_a15.c index 685063a6d0cf..cf93472b9dd6 100644 --- a/arch/arm/kvm/coproc_a15.c +++ b/arch/arm/kvm/coproc_a15.c @@ -114,7 +114,11 @@ static bool access_l2ectlr(struct kvm_vcpu *vcpu, /* * A15-specific CP15 registers. - * Important: Must be sorted ascending by CRn, CRM, Op1, Op2 + * CRn denotes the primary register number, but is copied to the CRm in the + * user space API for 64-bit register access in line with the terminology used + * in the ARM ARM. + * Important: Must be sorted ascending by CRn, CRM, Op1, Op2 and with 64-bit + * registers preceding 32-bit ones. */ static const struct coproc_reg a15_regs[] = { /* MPIDR: we use VMPIDR for guest access. */ From 1f96d83b38937294584ede9473c2b2961528f287 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 20 Aug 2013 11:47:41 +0100 Subject: [PATCH 0762/1368] arm64: perf: fix group validation when using enable_on_exec commit 8455e6ec70f33b0e8c3ffd47067e00481f09f454 upstream. This is a port of cb2d8b342aa0 ("ARM: 7698/1: perf: fix group validation when using enable_on_exec") to arm64, which fixes the event validation checking so that events in the OFF state are still considered when enable_on_exec is true. Signed-off-by: Will Deacon Signed-off-by: Catalin Marinas Signed-off-by: Greg Kroah-Hartman --- arch/arm64/kernel/perf_event.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c index 12e6ccb88691..2a1e9163d67a 100644 --- a/arch/arm64/kernel/perf_event.c +++ b/arch/arm64/kernel/perf_event.c @@ -325,7 +325,10 @@ validate_event(struct pmu_hw_events *hw_events, if (is_software_event(event)) return 1; - if (event->pmu != leader_pmu || event->state <= PERF_EVENT_STATE_OFF) + if (event->pmu != leader_pmu || event->state < PERF_EVENT_STATE_OFF) + return 1; + + if (event->state == PERF_EVENT_STATE_OFF && !event->attr.enable_on_exec) return 1; return armpmu->get_event_idx(hw_events, &fake_event) >= 0; From 0fe9a0dc92a64c088e76fcd3d35b2ba36b4d7f3c Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 20 Aug 2013 11:47:42 +0100 Subject: [PATCH 0763/1368] arm64: perf: fix ARMv8 EVTYPE_MASK to include NSH bit commit 178cd9ce377232518ec17ff2ecab2e80fa60784c upstream. This is a port of f2fe09b055e2 ("ARM: 7663/1: perf: fix ARMv7 EVTYPE_MASK to include NSH bit") to arm64, which fixes the broken evtype mask to include the NSH bit, allowing profiling at EL2. Signed-off-by: Will Deacon Signed-off-by: Catalin Marinas Signed-off-by: Greg Kroah-Hartman --- arch/arm64/kernel/perf_event.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c index 2a1e9163d67a..cea1594ff933 100644 --- a/arch/arm64/kernel/perf_event.c +++ b/arch/arm64/kernel/perf_event.c @@ -784,7 +784,7 @@ static const unsigned armv8_pmuv3_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] /* * PMXEVTYPER: Event selection reg */ -#define ARMV8_EVTYPE_MASK 0xc00000ff /* Mask for writable bits */ +#define ARMV8_EVTYPE_MASK 0xc80000ff /* Mask for writable bits */ #define ARMV8_EVTYPE_EVENT 0xff /* Mask for EVENT bits */ /* From 3e4d5c23583779763a3393ba1bcc2ad4ba481fa2 Mon Sep 17 00:00:00 2001 From: Peter Maydell Date: Thu, 22 Aug 2013 17:47:48 +0100 Subject: [PATCH 0764/1368] ARM: PCI: versatile: Fix map_irq function to match hardware commit f9b71fef12f0d6ac5c7051cfd87f7700f78c56b6 upstream. The PCI controller code for the Versatile board has never had the correct IRQ mapping for hardware. For many years it had an odd mapping ("all interrupts are int 27") which aligned with the equivalent bug in QEMU. However as of commit 1bc39ac5dab265 the mapping changed and no longer matched either hardware or QEMU, with the result that any PCI card beyond the first in QEMU would not have functioning interrupts; for example a boot with a SCSI controller would time out as follows: ------------ sym0: <895a> rev 0x0 at pci 0000:00:0d.0 irq 92 sym0: SCSI BUS has been reset. scsi0 : sym-2.2.3 [...] scsi 0:0:0:0: ABORT operation started scsi 0:0:0:0: ABORT operation timed-out. scsi 0:0:0:0: DEVICE RESET operation started scsi 0:0:0:0: DEVICE RESET operation timed-out. scsi 0:0:0:0: BUS RESET operation started scsi 0:0:0:0: BUS RESET operation timed-out. scsi 0:0:0:0: HOST RESET operation started sym0: SCSI BUS has been reset ------------ Fix the mapping so that it matches real hardware (checked against the schematics for PB926 and backplane, and tested against the hardware). This allows PCI cards using interrupts to work on hardware for the first time; this change will also work with QEMU 1.5 or later, where the equivalent bugs in the modelling of the hardware have been fixed. Although QEMU will attempt to autodetect whether the kernel is expecting the long-standing "everything is int 27" mapping or the one hardware has, for certainty we force it into "definitely behave like hardware mode"; this will avoid unexpected surprises later if we implement sparse irqs. This is harmless on hardware. Thanks to Paul Gortmaker for bisecting the problem and finding an initial solution, to Russell King for providing the correct interrupt mapping, and to Guenter Roeck for providing an initial version of this patch and prodding me into relocating the hardware and retesting everything. Signed-off-by: Peter Maydell Reviewed-by: Linus Walleij Signed-off-by: Kevin Hilman Signed-off-by: Greg Kroah-Hartman --- arch/arm/mach-versatile/pci.c | 25 +++++++++++++++++++------ 1 file changed, 19 insertions(+), 6 deletions(-) diff --git a/arch/arm/mach-versatile/pci.c b/arch/arm/mach-versatile/pci.c index e92e5e0705bc..234740d90385 100644 --- a/arch/arm/mach-versatile/pci.c +++ b/arch/arm/mach-versatile/pci.c @@ -294,6 +294,19 @@ int __init pci_versatile_setup(int nr, struct pci_sys_data *sys) __raw_writel(PHYS_OFFSET, local_pci_cfg_base + PCI_BASE_ADDRESS_1); __raw_writel(PHYS_OFFSET, local_pci_cfg_base + PCI_BASE_ADDRESS_2); + /* + * For many years the kernel and QEMU were symbiotically buggy + * in that they both assumed the same broken IRQ mapping. + * QEMU therefore attempts to auto-detect old broken kernels + * so that they still work on newer QEMU as they did on old + * QEMU. Since we now use the correct (ie matching-hardware) + * IRQ mapping we write a definitely different value to a + * PCI_INTERRUPT_LINE register to tell QEMU that we expect + * real hardware behaviour and it need not be backwards + * compatible for us. This write is harmless on real hardware. + */ + __raw_writel(0, VERSATILE_PCI_VIRT_BASE+PCI_INTERRUPT_LINE); + /* * Do not to map Versatile FPGA PCI device into memory space */ @@ -327,13 +340,13 @@ static int __init versatile_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) { int irq; - /* slot, pin, irq - * 24 1 IRQ_SIC_PCI0 - * 25 1 IRQ_SIC_PCI1 - * 26 1 IRQ_SIC_PCI2 - * 27 1 IRQ_SIC_PCI3 + /* + * Slot INTA INTB INTC INTD + * 31 PCI1 PCI2 PCI3 PCI0 + * 30 PCI0 PCI1 PCI2 PCI3 + * 29 PCI3 PCI0 PCI1 PCI2 */ - irq = IRQ_SIC_PCI0 + ((slot - 24 + pin - 1) & 3); + irq = IRQ_SIC_PCI0 + ((slot + 2 + pin - 1) & 3); return irq; } From d6e770beb3489067fc68e0d468c99f0e44a42052 Mon Sep 17 00:00:00 2001 From: Peter Maydell Date: Thu, 22 Aug 2013 17:47:49 +0100 Subject: [PATCH 0765/1368] ARM: PCI: versatile: Fix PCI I/O commit 829f9fedee30cde2ec15e88d57ec11074db791e2 upstream. The versatile PCI controller code was confused between the PCI I/O window (at 0x43000000) and the first PCI memory window (at 0x44000000). Pass the correct base address to pci_remap_io() so that PCI I/O accesses work. Since the first PCI memory window isn't used at all (it's an odd size), rename the associated variables and labels so that it's clear that it isn't related to the I/O window. This has been tested and confirmed to fix PCI I/O accesses both on physical PB926+PCI backplane hardware and on QEMU. Signed-off-by: Peter Maydell Reviewed-by: Linus Walleij Signed-off-by: Kevin Hilman Signed-off-by: Greg Kroah-Hartman --- arch/arm/mach-versatile/include/mach/platform.h | 2 ++ arch/arm/mach-versatile/pci.c | 16 ++++++++-------- 2 files changed, 10 insertions(+), 8 deletions(-) diff --git a/arch/arm/mach-versatile/include/mach/platform.h b/arch/arm/mach-versatile/include/mach/platform.h index ec087407b163..6f938ccb0c54 100644 --- a/arch/arm/mach-versatile/include/mach/platform.h +++ b/arch/arm/mach-versatile/include/mach/platform.h @@ -231,12 +231,14 @@ /* PCI space */ #define VERSATILE_PCI_BASE 0x41000000 /* PCI Interface */ #define VERSATILE_PCI_CFG_BASE 0x42000000 +#define VERSATILE_PCI_IO_BASE 0x43000000 #define VERSATILE_PCI_MEM_BASE0 0x44000000 #define VERSATILE_PCI_MEM_BASE1 0x50000000 #define VERSATILE_PCI_MEM_BASE2 0x60000000 /* Sizes of above maps */ #define VERSATILE_PCI_BASE_SIZE 0x01000000 #define VERSATILE_PCI_CFG_BASE_SIZE 0x02000000 +#define VERSATILE_PCI_IO_BASE_SIZE 0x01000000 #define VERSATILE_PCI_MEM_BASE0_SIZE 0x0c000000 /* 32Mb */ #define VERSATILE_PCI_MEM_BASE1_SIZE 0x10000000 /* 256Mb */ #define VERSATILE_PCI_MEM_BASE2_SIZE 0x10000000 /* 256Mb */ diff --git a/arch/arm/mach-versatile/pci.c b/arch/arm/mach-versatile/pci.c index 234740d90385..1e48878cc768 100644 --- a/arch/arm/mach-versatile/pci.c +++ b/arch/arm/mach-versatile/pci.c @@ -170,8 +170,8 @@ static struct pci_ops pci_versatile_ops = { .write = versatile_write_config, }; -static struct resource io_mem = { - .name = "PCI I/O space", +static struct resource unused_mem = { + .name = "PCI unused", .start = VERSATILE_PCI_MEM_BASE0, .end = VERSATILE_PCI_MEM_BASE0+VERSATILE_PCI_MEM_BASE0_SIZE-1, .flags = IORESOURCE_MEM, @@ -195,9 +195,9 @@ static int __init pci_versatile_setup_resources(struct pci_sys_data *sys) { int ret = 0; - ret = request_resource(&iomem_resource, &io_mem); + ret = request_resource(&iomem_resource, &unused_mem); if (ret) { - printk(KERN_ERR "PCI: unable to allocate I/O " + printk(KERN_ERR "PCI: unable to allocate unused " "memory region (%d)\n", ret); goto out; } @@ -205,7 +205,7 @@ static int __init pci_versatile_setup_resources(struct pci_sys_data *sys) if (ret) { printk(KERN_ERR "PCI: unable to allocate non-prefetchable " "memory region (%d)\n", ret); - goto release_io_mem; + goto release_unused_mem; } ret = request_resource(&iomem_resource, &pre_mem); if (ret) { @@ -225,8 +225,8 @@ static int __init pci_versatile_setup_resources(struct pci_sys_data *sys) release_non_mem: release_resource(&non_mem); - release_io_mem: - release_resource(&io_mem); + release_unused_mem: + release_resource(&unused_mem); out: return ret; } @@ -246,7 +246,7 @@ int __init pci_versatile_setup(int nr, struct pci_sys_data *sys) goto out; } - ret = pci_ioremap_io(0, VERSATILE_PCI_MEM_BASE0); + ret = pci_ioremap_io(0, VERSATILE_PCI_IO_BASE); if (ret) goto out; From d9a004a0a3bb56b62f36a460ec61c76112dd8442 Mon Sep 17 00:00:00 2001 From: Peter Maydell Date: Thu, 22 Aug 2013 17:47:50 +0100 Subject: [PATCH 0766/1368] ARM: PCI: versatile: Fix SMAP register offsets commit 99f2b130370b904ca5300079243fdbcafa2c708b upstream. The SMAP register offsets in the versatile PCI controller code were all off by four. (This didn't have any observable bad effects because on this board PHYS_OFFSET is zero, and (a) writing zero to the flags register at offset 0x10 has no effect and (b) the reset value of the SMAP register is zero anyway, so failing to write SMAP2 didn't matter.) Signed-off-by: Peter Maydell Reviewed-by: Linus Walleij Signed-off-by: Kevin Hilman Signed-off-by: Greg Kroah-Hartman --- arch/arm/mach-versatile/pci.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/arm/mach-versatile/pci.c b/arch/arm/mach-versatile/pci.c index 1e48878cc768..c97be4ea76d2 100644 --- a/arch/arm/mach-versatile/pci.c +++ b/arch/arm/mach-versatile/pci.c @@ -43,9 +43,9 @@ #define PCI_IMAP0 __IO_ADDRESS(VERSATILE_PCI_CORE_BASE+0x0) #define PCI_IMAP1 __IO_ADDRESS(VERSATILE_PCI_CORE_BASE+0x4) #define PCI_IMAP2 __IO_ADDRESS(VERSATILE_PCI_CORE_BASE+0x8) -#define PCI_SMAP0 __IO_ADDRESS(VERSATILE_PCI_CORE_BASE+0x10) -#define PCI_SMAP1 __IO_ADDRESS(VERSATILE_PCI_CORE_BASE+0x14) -#define PCI_SMAP2 __IO_ADDRESS(VERSATILE_PCI_CORE_BASE+0x18) +#define PCI_SMAP0 __IO_ADDRESS(VERSATILE_PCI_CORE_BASE+0x14) +#define PCI_SMAP1 __IO_ADDRESS(VERSATILE_PCI_CORE_BASE+0x18) +#define PCI_SMAP2 __IO_ADDRESS(VERSATILE_PCI_CORE_BASE+0x1c) #define PCI_SELFID __IO_ADDRESS(VERSATILE_PCI_CORE_BASE+0xc) #define DEVICE_ID_OFFSET 0x00 From faa94b0365522a939cd10d05d526a355a7eeaadf Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Tue, 6 Aug 2013 14:13:44 +1000 Subject: [PATCH 0767/1368] KVM: PPC: Book3S: Fix compile error in XICS emulation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 7bfa9ad55d691f2b836b576769b11eca2cf50816 upstream. Commit 8e44ddc3f3 ("powerpc/kvm/book3s: Add support for H_IPOLL and H_XIRR_X in XICS emulation") added a call to get_tb() but didn't include the header that defines it, and on some configs this means book3s_xics.c fails to compile: arch/powerpc/kvm/book3s_xics.c: In function ‘kvmppc_xics_hcall’: arch/powerpc/kvm/book3s_xics.c:812:3: error: implicit declaration of function ‘get_tb’ [-Werror=implicit-function-declaration] Signed-off-by: Paul Mackerras Signed-off-by: Alexander Graf Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/kvm/book3s_xics.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/powerpc/kvm/book3s_xics.c b/arch/powerpc/kvm/book3s_xics.c index 94c1dd46b83d..a3a5cb8ee7ea 100644 --- a/arch/powerpc/kvm/book3s_xics.c +++ b/arch/powerpc/kvm/book3s_xics.c @@ -19,6 +19,7 @@ #include #include #include +#include #include #include From a6025b95e6d05833f19eaca2f4e181bf77a854ed Mon Sep 17 00:00:00 2001 From: Sarah Sharp Date: Thu, 8 Aug 2013 10:08:34 -0700 Subject: [PATCH 0768/1368] xhci-plat: Don't enable legacy PCI interrupts. commit 52fb61250a7a132b0cfb9f4a1060a1f3c49e5a25 upstream. The xHCI platform driver calls into usb_add_hcd to register the irq for its platform device. It does not want the xHCI generic driver to register an interrupt for it at all. The original code did that by setting the XHCI_BROKEN_MSI quirk, which tells the xHCI driver to not enable MSI or MSI-X for a PCI host. Unfortunately, if CONFIG_PCI is enabled, and CONFIG_USB_DW3 is enabled, the xHCI generic driver will attempt to register a legacy PCI interrupt for the xHCI platform device in xhci_try_enable_msi(). This will result in a bogus irq being registered, since the underlying device is a platform_device, not a pci_device, and thus the pci_device->irq pointer will be bogus. Add a new quirk, XHCI_PLAT, so that the xHCI generic driver can distinguish between a PCI device that can't handle MSI or MSI-X, and a platform device that should not have its interrupts touched at all. This quirk may be useful in the future, in case other corner cases like this arise. This patch should be backported to kernels as old as 3.9, that contain the commit 00eed9c814cb8f281be6f0f5d8f45025dc0a97eb "USB: xhci: correctly enable interrupts". Signed-off-by: Sarah Sharp Reported-by: Yu Y Wang Tested-by: Yu Y Wang Reviewed-by: Felipe Balbi Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-plat.c | 2 +- drivers/usb/host/xhci.c | 7 ++++++- drivers/usb/host/xhci.h | 1 + 3 files changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/usb/host/xhci-plat.c b/drivers/usb/host/xhci-plat.c index 93ad67eca053..6e70ce976769 100644 --- a/drivers/usb/host/xhci-plat.c +++ b/drivers/usb/host/xhci-plat.c @@ -24,7 +24,7 @@ static void xhci_plat_quirks(struct device *dev, struct xhci_hcd *xhci) * here that the generic code does not try to make a pci_dev from our * dev struct in order to setup MSI */ - xhci->quirks |= XHCI_BROKEN_MSI; + xhci->quirks |= XHCI_PLAT; } /* called during probe() after chip reset completes */ diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c index 9a550b6ad01d..a9f925b32b61 100644 --- a/drivers/usb/host/xhci.c +++ b/drivers/usb/host/xhci.c @@ -342,9 +342,14 @@ static void xhci_msix_sync_irqs(struct xhci_hcd *xhci) static int xhci_try_enable_msi(struct usb_hcd *hcd) { struct xhci_hcd *xhci = hcd_to_xhci(hcd); - struct pci_dev *pdev = to_pci_dev(xhci_to_hcd(xhci)->self.controller); + struct pci_dev *pdev; int ret; + /* The xhci platform device has set up IRQs through usb_add_hcd. */ + if (xhci->quirks & XHCI_PLAT) + return 0; + + pdev = to_pci_dev(xhci_to_hcd(xhci)->self.controller); /* * Some Fresco Logic host controllers advertise MSI, but fail to * generate interrupts. Don't even try to enable MSI. diff --git a/drivers/usb/host/xhci.h b/drivers/usb/host/xhci.h index 77600cefcaf1..3638f1d779e6 100644 --- a/drivers/usb/host/xhci.h +++ b/drivers/usb/host/xhci.h @@ -1516,6 +1516,7 @@ struct xhci_hcd { #define XHCI_SPURIOUS_REBOOT (1 << 13) #define XHCI_COMP_MODE_QUIRK (1 << 14) #define XHCI_AVOID_BEI (1 << 15) +#define XHCI_PLAT (1 << 16) unsigned int num_active_eps; unsigned int limit_active_eps; /* There are two roothubs to keep track of bus suspend info for */ From 54d8c40da1d8763cfa5699516626f8ed37db3eed Mon Sep 17 00:00:00 2001 From: Shawn Nematbakhsh Date: Mon, 19 Aug 2013 10:36:13 -0700 Subject: [PATCH 0769/1368] usb: xhci: Disable runtime PM suspend for quirky controllers commit c8476fb855434c733099079063990e5bfa7ecad6 upstream. If a USB controller with XHCI_RESET_ON_RESUME goes to runtime suspend, a reset will be performed upon runtime resume. Any previously suspended devices attached to the controller will be re-enumerated at this time. This will cause problems, for example, if an open system call on the device triggered the resume (the open call will fail). Note that this change is only relevant when persist_enabled is not set for USB devices. This patch should be backported to kernels as old as 3.0, that contain the commit c877b3b2ad5cb9d4fe523c5496185cc328ff3ae9 "xhci: Add reset on resume quirk for asrock p67 host". Signed-off-by: Shawn Nematbakhsh Signed-off-by: Sarah Sharp Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci.c | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c index a9f925b32b61..f4a49c45299c 100644 --- a/drivers/usb/host/xhci.c +++ b/drivers/usb/host/xhci.c @@ -3511,10 +3511,21 @@ void xhci_free_dev(struct usb_hcd *hcd, struct usb_device *udev) { struct xhci_hcd *xhci = hcd_to_xhci(hcd); struct xhci_virt_device *virt_dev; + struct device *dev = hcd->self.controller; unsigned long flags; u32 state; int i, ret; +#ifndef CONFIG_USB_DEFAULT_PERSIST + /* + * We called pm_runtime_get_noresume when the device was attached. + * Decrement the counter here to allow controller to runtime suspend + * if no devices remain. + */ + if (xhci->quirks & XHCI_RESET_ON_RESUME) + pm_runtime_put_noidle(dev); +#endif + ret = xhci_check_args(hcd, udev, NULL, 0, true, __func__); /* If the host is halted due to driver unload, we still need to free the * device. @@ -3586,6 +3597,7 @@ static int xhci_reserve_host_control_ep_resources(struct xhci_hcd *xhci) int xhci_alloc_dev(struct usb_hcd *hcd, struct usb_device *udev) { struct xhci_hcd *xhci = hcd_to_xhci(hcd); + struct device *dev = hcd->self.controller; unsigned long flags; int timeleft; int ret; @@ -3638,6 +3650,16 @@ int xhci_alloc_dev(struct usb_hcd *hcd, struct usb_device *udev) goto disable_slot; } udev->slot_id = xhci->slot_id; + +#ifndef CONFIG_USB_DEFAULT_PERSIST + /* + * If resetting upon resume, we can't put the controller into runtime + * suspend if there is a device attached. + */ + if (xhci->quirks & XHCI_RESET_ON_RESUME) + pm_runtime_get_noresume(dev); +#endif + /* Is this a LS or FS device under a HS hub? */ /* Hub or peripherial? */ return 1; From 734b2fe93f4bcce29f7d971a106ea94e263e8676 Mon Sep 17 00:00:00 2001 From: Felipe Balbi Date: Thu, 27 Jun 2013 10:00:18 +0300 Subject: [PATCH 0770/1368] usb: dwc3: gadget: don't request IRQs in atomic commit b0d7ffd44ba9cd2dfbf299674418193a5f9ed21a upstream. We cannot request an IRQ with spinlocks held as that would trigger a sleeping inside spinlock warning. Reported-by: Stephen Boyd Signed-off-by: Felipe Balbi Signed-off-by: Greg Kroah-Hartman --- drivers/usb/dwc3/gadget.c | 39 ++++++++++++++++++++++----------------- 1 file changed, 22 insertions(+), 17 deletions(-) diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c index f77083fedc68..14d28d6184f6 100644 --- a/drivers/usb/dwc3/gadget.c +++ b/drivers/usb/dwc3/gadget.c @@ -1508,6 +1508,15 @@ static int dwc3_gadget_start(struct usb_gadget *g, int irq; u32 reg; + irq = platform_get_irq(to_platform_device(dwc->dev), 0); + ret = request_threaded_irq(irq, dwc3_interrupt, dwc3_thread_interrupt, + IRQF_SHARED | IRQF_ONESHOT, "dwc3", dwc); + if (ret) { + dev_err(dwc->dev, "failed to request irq #%d --> %d\n", + irq, ret); + goto err0; + } + spin_lock_irqsave(&dwc->lock, flags); if (dwc->gadget_driver) { @@ -1515,7 +1524,7 @@ static int dwc3_gadget_start(struct usb_gadget *g, dwc->gadget.name, dwc->gadget_driver->driver.name); ret = -EBUSY; - goto err0; + goto err1; } dwc->gadget_driver = driver; @@ -1551,42 +1560,38 @@ static int dwc3_gadget_start(struct usb_gadget *g, ret = __dwc3_gadget_ep_enable(dep, &dwc3_gadget_ep0_desc, NULL, false); if (ret) { dev_err(dwc->dev, "failed to enable %s\n", dep->name); - goto err0; + goto err2; } dep = dwc->eps[1]; ret = __dwc3_gadget_ep_enable(dep, &dwc3_gadget_ep0_desc, NULL, false); if (ret) { dev_err(dwc->dev, "failed to enable %s\n", dep->name); - goto err1; + goto err3; } /* begin to receive SETUP packets */ dwc->ep0state = EP0_SETUP_PHASE; dwc3_ep0_out_start(dwc); - irq = platform_get_irq(to_platform_device(dwc->dev), 0); - ret = request_threaded_irq(irq, dwc3_interrupt, dwc3_thread_interrupt, - IRQF_SHARED | IRQF_ONESHOT, "dwc3", dwc); - if (ret) { - dev_err(dwc->dev, "failed to request irq #%d --> %d\n", - irq, ret); - goto err1; - } - dwc3_gadget_enable_irq(dwc); spin_unlock_irqrestore(&dwc->lock, flags); return 0; -err1: +err3: __dwc3_gadget_ep_disable(dwc->eps[0]); -err0: +err2: dwc->gadget_driver = NULL; + +err1: spin_unlock_irqrestore(&dwc->lock, flags); + free_irq(irq, dwc); + +err0: return ret; } @@ -1600,9 +1605,6 @@ static int dwc3_gadget_stop(struct usb_gadget *g, spin_lock_irqsave(&dwc->lock, flags); dwc3_gadget_disable_irq(dwc); - irq = platform_get_irq(to_platform_device(dwc->dev), 0); - free_irq(irq, dwc); - __dwc3_gadget_ep_disable(dwc->eps[0]); __dwc3_gadget_ep_disable(dwc->eps[1]); @@ -1610,6 +1612,9 @@ static int dwc3_gadget_stop(struct usb_gadget *g, spin_unlock_irqrestore(&dwc->lock, flags); + irq = platform_get_irq(to_platform_device(dwc->dev), 0); + free_irq(irq, dwc); + return 0; } From d5cc290b92837eb9844579e297e969cc7a804da0 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Sun, 15 Sep 2013 17:50:26 +0200 Subject: [PATCH 0771/1368] tty: disassociate_ctty() sends the extra SIGCONT commit 03e1261778cca782d41a3d8e3945ca88cf93e01e upstream. Starting from v3.10 (probably commit f91e2590410b: "tty: Signal foreground group processes in hangup") disassociate_ctty() sends SIGCONT if tty && on_exit. This breaks LSB test-suite, in particular test8 in _exit.c and test40 in sigcon5.c. Put the "!on_exit" check back to restore the old behaviour. Review by Peter Hurley: "Yes, this regression was introduced by me in that commit. The effect of the regression is that ptys will receive a SIGCONT when, in similar circumstances, ttys would not. The fact that two test vectors accidentally tripped over this regression suggests that some other apps may as well. Thanks for catching this" Signed-off-by: Oleg Nesterov Reported-by: Karel Srot Reviewed-by: Peter Hurley Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- drivers/tty/tty_io.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/tty/tty_io.c b/drivers/tty/tty_io.c index 447668213900..59d26ef538d8 100644 --- a/drivers/tty/tty_io.c +++ b/drivers/tty/tty_io.c @@ -850,7 +850,8 @@ void disassociate_ctty(int on_exit) struct pid *tty_pgrp = tty_get_pgrp(tty); if (tty_pgrp) { kill_pgrp(tty_pgrp, SIGHUP, on_exit); - kill_pgrp(tty_pgrp, SIGCONT, on_exit); + if (!on_exit) + kill_pgrp(tty_pgrp, SIGCONT, on_exit); put_pid(tty_pgrp); } } From 9b8ace6745bf4722ff9c48089f5b0723478a849b Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Thu, 5 Sep 2013 08:38:10 -0400 Subject: [PATCH 0772/1368] cifs: ensure that srv_mutex is held when dealing with ssocket pointer commit 73e216a8a42c0ef3d08071705c946c38fdbe12b0 upstream. Oleksii reported that he had seen an oops similar to this: BUG: unable to handle kernel NULL pointer dereference at 0000000000000088 IP: [] sock_sendmsg+0x93/0xd0 PGD 0 Oops: 0000 [#1] PREEMPT SMP Modules linked in: ipt_MASQUERADE xt_REDIRECT xt_tcpudp iptable_nat nf_conntrack_ipv4 nf_defrag_ipv4 nf_nat_ipv4 nf_nat nf_conntrack ip_tables x_tables carl9170 ath usb_storage f2fs nfnetlink_log nfnetlink md4 cifs dns_resolver hid_generic usbhid hid af_packet uvcvideo videobuf2_vmalloc videobuf2_memops videobuf2_core videodev rfcomm btusb bnep bluetooth qmi_wwan qcserial cdc_wdm usb_wwan usbnet usbserial mii snd_hda_codec_hdmi snd_hda_codec_realtek iwldvm mac80211 coretemp intel_powerclamp kvm_intel kvm iwlwifi snd_hda_intel cfg80211 snd_hda_codec xhci_hcd e1000e ehci_pci snd_hwdep sdhci_pci snd_pcm ehci_hcd microcode psmouse sdhci thinkpad_acpi mmc_core i2c_i801 pcspkr usbcore hwmon snd_timer snd_page_alloc snd ptp rfkill pps_core soundcore evdev usb_common vboxnetflt(O) vboxdrv(O)Oops#2 Part8 loop tun binfmt_misc fuse msr acpi_call(O) ipv6 autofs4 CPU: 0 PID: 21612 Comm: kworker/0:1 Tainted: G W O 3.10.1SIGN #28 Hardware name: LENOVO 2306CTO/2306CTO, BIOS G2ET92WW (2.52 ) 02/22/2013 Workqueue: cifsiod cifs_echo_request [cifs] task: ffff8801e1f416f0 ti: ffff880148744000 task.ti: ffff880148744000 RIP: 0010:[] [] sock_sendmsg+0x93/0xd0 RSP: 0000:ffff880148745b00 EFLAGS: 00010246 RAX: 0000000000000000 RBX: ffff880148745b78 RCX: 0000000000000048 RDX: ffff880148745c90 RSI: ffff880181864a00 RDI: ffff880148745b78 RBP: ffff880148745c48 R08: 0000000000000048 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000000 R12: ffff880181864a00 R13: ffff880148745c90 R14: 0000000000000048 R15: 0000000000000048 FS: 0000000000000000(0000) GS:ffff88021e200000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000088 CR3: 000000020c42c000 CR4: 00000000001407b0 Oops#2 Part7 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400 Stack: ffff880148745b30 ffffffff810c4af9 0000004848745b30 ffff880181864a00 ffffffff81ffbc40 0000000000000000 ffff880148745c90 ffffffff810a5aab ffff880148745bc0 ffffffff81ffbc40 ffff880148745b60 ffffffff815a9fb8 Call Trace: [] ? finish_task_switch+0x49/0xe0 [] ? lock_timer_base.isra.36+0x2b/0x50 [] ? _raw_spin_unlock_irqrestore+0x18/0x40 [] ? try_to_del_timer_sync+0x4f/0x70 [] ? _raw_spin_unlock_bh+0x1f/0x30 [] kernel_sendmsg+0x37/0x50 [] smb_send_kvec+0xd0/0x1d0 [cifs] [] smb_send_rqst+0x83/0x1f0 [cifs] [] cifs_call_async+0xec/0x1b0 [cifs] [] ? free_rsp_buf+0x40/0x40 [cifs] Oops#2 Part6 [] SMB2_echo+0x8e/0xb0 [cifs] [] cifs_echo_request+0x79/0xa0 [cifs] [] process_one_work+0x173/0x4a0 [] worker_thread+0x121/0x3a0 [] ? manage_workers.isra.27+0x2b0/0x2b0 [] kthread+0xc0/0xd0 [] ? kthread_create_on_node+0x120/0x120 [] ret_from_fork+0x7c/0xb0 [] ? kthread_create_on_node+0x120/0x120 Code: 84 24 b8 00 00 00 4c 89 f1 4c 89 ea 4c 89 e6 48 89 df 4c 89 60 18 48 c7 40 28 00 00 00 00 4c 89 68 30 44 89 70 14 49 8b 44 24 28 90 88 00 00 00 3d ef fd ff ff 74 10 48 8d 65 e0 5b 41 5c 41 RIP [] sock_sendmsg+0x93/0xd0 RSP CR2: 0000000000000088 The client was in the middle of trying to send a frame when the server->ssocket pointer got zeroed out. In most places, that we access that pointer, the srv_mutex is held. There's only one spot that I see that the server->ssocket pointer gets set and the srv_mutex isn't held. This patch corrects that. The upstream bug report was here: https://bugzilla.kernel.org/show_bug.cgi?id=60557 Reported-by: Oleksii Shevchuk Signed-off-by: Jeff Layton Signed-off-by: Steve French Signed-off-by: Greg Kroah-Hartman --- fs/cifs/connect.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index d6a5c5ac737b..d05a30072023 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -377,6 +377,7 @@ cifs_reconnect(struct TCP_Server_Info *server) try_to_freeze(); /* we should try only the port we connected to before */ + mutex_lock(&server->srv_mutex); rc = generic_ip_connect(server); if (rc) { cifs_dbg(FYI, "reconnect error %d\n", rc); @@ -388,6 +389,7 @@ cifs_reconnect(struct TCP_Server_Info *server) server->tcpStatus = CifsNeedNegotiate; spin_unlock(&GlobalMid_Lock); } + mutex_unlock(&server->srv_mutex); } while (server->tcpStatus == CifsNeedReconnect); return rc; From af66f40c4cb1d31b3b403d5f9a8471261a0cc945 Mon Sep 17 00:00:00 2001 From: Pavel Shilovsky Date: Thu, 5 Sep 2013 15:04:04 +0400 Subject: [PATCH 0773/1368] CIFS: Fix a memory leak when a lease break comes commit 1a05096de82f3cd672c76389f63964952678506f upstream. This happens when we receive a lease break from a server, then find an appropriate lease key in opened files and schedule the oplock_break slow work. lw pointer isn't freed in this case. Signed-off-by: Pavel Shilovsky Signed-off-by: Steve French Signed-off-by: Greg Kroah-Hartman --- fs/cifs/smb2misc.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/cifs/smb2misc.c b/fs/cifs/smb2misc.c index 10383d8c015b..2b1dc7f4464a 100644 --- a/fs/cifs/smb2misc.c +++ b/fs/cifs/smb2misc.c @@ -469,6 +469,7 @@ smb2_is_valid_lease_break(char *buffer, struct TCP_Server_Info *server) queue_work(cifsiod_wq, &cfile->oplock_break); + kfree(lw); spin_unlock(&cifs_file_list_lock); spin_unlock(&cifs_tcp_ses_lock); return true; From b08d9b572074b3d0899349070bb61688ad6eb630 Mon Sep 17 00:00:00 2001 From: Pavel Shilovsky Date: Thu, 5 Sep 2013 15:00:07 +0400 Subject: [PATCH 0774/1368] CIFS: Fix missing lease break commit 933d4b36576c951d0371bbfed05ec0135d516a6e upstream. If a server sends a lease break to a connection that doesn't have opens with a lease key specified in the server response, we can't find an open file to send an ack. Fix this by walking through all connections we have. Signed-off-by: Pavel Shilovsky Signed-off-by: Steve French Signed-off-by: Greg Kroah-Hartman --- fs/cifs/smb2misc.c | 149 ++++++++++++++++++++++++--------------------- 1 file changed, 80 insertions(+), 69 deletions(-) diff --git a/fs/cifs/smb2misc.c b/fs/cifs/smb2misc.c index 2b1dc7f4464a..4f791e0e98d7 100644 --- a/fs/cifs/smb2misc.c +++ b/fs/cifs/smb2misc.c @@ -413,19 +413,76 @@ cifs_ses_oplock_break(struct work_struct *work) } static bool -smb2_is_valid_lease_break(char *buffer, struct TCP_Server_Info *server) +smb2_tcon_has_lease(struct cifs_tcon *tcon, struct smb2_lease_break *rsp, + struct smb2_lease_break_work *lw) +{ + bool found; + __u8 lease_state; + struct list_head *tmp; + struct cifsFileInfo *cfile; + struct cifs_pending_open *open; + struct cifsInodeInfo *cinode; + int ack_req = le32_to_cpu(rsp->Flags & + SMB2_NOTIFY_BREAK_LEASE_FLAG_ACK_REQUIRED); + + lease_state = smb2_map_lease_to_oplock(rsp->NewLeaseState); + + list_for_each(tmp, &tcon->openFileList) { + cfile = list_entry(tmp, struct cifsFileInfo, tlist); + cinode = CIFS_I(cfile->dentry->d_inode); + + if (memcmp(cinode->lease_key, rsp->LeaseKey, + SMB2_LEASE_KEY_SIZE)) + continue; + + cifs_dbg(FYI, "found in the open list\n"); + cifs_dbg(FYI, "lease key match, lease break 0x%d\n", + le32_to_cpu(rsp->NewLeaseState)); + + smb2_set_oplock_level(cinode, lease_state); + + if (ack_req) + cfile->oplock_break_cancelled = false; + else + cfile->oplock_break_cancelled = true; + + queue_work(cifsiod_wq, &cfile->oplock_break); + kfree(lw); + return true; + } + + found = false; + list_for_each_entry(open, &tcon->pending_opens, olist) { + if (memcmp(open->lease_key, rsp->LeaseKey, + SMB2_LEASE_KEY_SIZE)) + continue; + + if (!found && ack_req) { + found = true; + memcpy(lw->lease_key, open->lease_key, + SMB2_LEASE_KEY_SIZE); + lw->tlink = cifs_get_tlink(open->tlink); + queue_work(cifsiod_wq, &lw->lease_break); + } + + cifs_dbg(FYI, "found in the pending open list\n"); + cifs_dbg(FYI, "lease key match, lease break 0x%d\n", + le32_to_cpu(rsp->NewLeaseState)); + + open->oplock = lease_state; + } + return found; +} + +static bool +smb2_is_valid_lease_break(char *buffer) { struct smb2_lease_break *rsp = (struct smb2_lease_break *)buffer; struct list_head *tmp, *tmp1, *tmp2; + struct TCP_Server_Info *server; struct cifs_ses *ses; struct cifs_tcon *tcon; - struct cifsInodeInfo *cinode; - struct cifsFileInfo *cfile; - struct cifs_pending_open *open; struct smb2_lease_break_work *lw; - bool found; - int ack_req = le32_to_cpu(rsp->Flags & - SMB2_NOTIFY_BREAK_LEASE_FLAG_ACK_REQUIRED); lw = kmalloc(sizeof(struct smb2_lease_break_work), GFP_KERNEL); if (!lw) @@ -438,72 +495,26 @@ smb2_is_valid_lease_break(char *buffer, struct TCP_Server_Info *server) /* look up tcon based on tid & uid */ spin_lock(&cifs_tcp_ses_lock); - list_for_each(tmp, &server->smb_ses_list) { - ses = list_entry(tmp, struct cifs_ses, smb_ses_list); + list_for_each(tmp, &cifs_tcp_ses_list) { + server = list_entry(tmp, struct TCP_Server_Info, tcp_ses_list); - spin_lock(&cifs_file_list_lock); - list_for_each(tmp1, &ses->tcon_list) { - tcon = list_entry(tmp1, struct cifs_tcon, tcon_list); + list_for_each(tmp1, &server->smb_ses_list) { + ses = list_entry(tmp1, struct cifs_ses, smb_ses_list); - cifs_stats_inc(&tcon->stats.cifs_stats.num_oplock_brks); - list_for_each(tmp2, &tcon->openFileList) { - cfile = list_entry(tmp2, struct cifsFileInfo, - tlist); - cinode = CIFS_I(cfile->dentry->d_inode); - - if (memcmp(cinode->lease_key, rsp->LeaseKey, - SMB2_LEASE_KEY_SIZE)) - continue; - - cifs_dbg(FYI, "found in the open list\n"); - cifs_dbg(FYI, "lease key match, lease break 0x%d\n", - le32_to_cpu(rsp->NewLeaseState)); - - smb2_set_oplock_level(cinode, - smb2_map_lease_to_oplock(rsp->NewLeaseState)); - - if (ack_req) - cfile->oplock_break_cancelled = false; - else - cfile->oplock_break_cancelled = true; - - queue_work(cifsiod_wq, &cfile->oplock_break); - - kfree(lw); - spin_unlock(&cifs_file_list_lock); - spin_unlock(&cifs_tcp_ses_lock); - return true; - } - - found = false; - list_for_each_entry(open, &tcon->pending_opens, olist) { - if (memcmp(open->lease_key, rsp->LeaseKey, - SMB2_LEASE_KEY_SIZE)) - continue; - - if (!found && ack_req) { - found = true; - memcpy(lw->lease_key, open->lease_key, - SMB2_LEASE_KEY_SIZE); - lw->tlink = cifs_get_tlink(open->tlink); - queue_work(cifsiod_wq, - &lw->lease_break); + spin_lock(&cifs_file_list_lock); + list_for_each(tmp2, &ses->tcon_list) { + tcon = list_entry(tmp2, struct cifs_tcon, + tcon_list); + cifs_stats_inc( + &tcon->stats.cifs_stats.num_oplock_brks); + if (smb2_tcon_has_lease(tcon, rsp, lw)) { + spin_unlock(&cifs_file_list_lock); + spin_unlock(&cifs_tcp_ses_lock); + return true; } - - cifs_dbg(FYI, "found in the pending open list\n"); - cifs_dbg(FYI, "lease key match, lease break 0x%d\n", - le32_to_cpu(rsp->NewLeaseState)); - - open->oplock = - smb2_map_lease_to_oplock(rsp->NewLeaseState); - } - if (found) { - spin_unlock(&cifs_file_list_lock); - spin_unlock(&cifs_tcp_ses_lock); - return true; } + spin_unlock(&cifs_file_list_lock); } - spin_unlock(&cifs_file_list_lock); } spin_unlock(&cifs_tcp_ses_lock); kfree(lw); @@ -529,7 +540,7 @@ smb2_is_valid_oplock_break(char *buffer, struct TCP_Server_Info *server) if (rsp->StructureSize != smb2_rsp_struct_sizes[SMB2_OPLOCK_BREAK_HE]) { if (le16_to_cpu(rsp->StructureSize) == 44) - return smb2_is_valid_lease_break(buffer, server); + return smb2_is_valid_lease_break(buffer); else return false; } From ac78ae630c039e548e2b8d41b6036f240386d770 Mon Sep 17 00:00:00 2001 From: Alan Stern Date: Mon, 26 Aug 2013 15:01:40 -0400 Subject: [PATCH 0775/1368] USB: OHCI: Allow runtime PM without system sleep commit 69820e01aa756b8d228143d997f71523c1e97984 upstream. Since ohci-hcd supports runtime PM, the .pm field in its pci_driver structure should be protected by CONFIG_PM rather than CONFIG_PM_SLEEP. Without this change, OHCI controllers won't do runtime suspend if system suspend or hibernation isn't enabled. Signed-off-by: Alan Stern Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/ohci-pci.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/host/ohci-pci.c b/drivers/usb/host/ohci-pci.c index 951514ef446d..ef6782bd1fa9 100644 --- a/drivers/usb/host/ohci-pci.c +++ b/drivers/usb/host/ohci-pci.c @@ -371,7 +371,7 @@ static struct pci_driver ohci_pci_driver = { .remove = usb_hcd_pci_remove, .shutdown = usb_hcd_pci_shutdown, -#ifdef CONFIG_PM_SLEEP +#ifdef CONFIG_PM .driver = { .pm = &usb_hcd_pci_pm_ops }, From 979ad974d2d7d1e98e21b582a11acdf0713914e6 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Thu, 22 Aug 2013 11:39:15 -0700 Subject: [PATCH 0776/1368] net: Check the correct namespace when spoofing pid over SCM_RIGHTS commit d661684cf6820331feae71146c35da83d794467e upstream. This is a security bug. The follow-up will fix nsproxy to discourage this type of issue from happening again. Signed-off-by: Andy Lutomirski Reviewed-by: "Eric W. Biederman" Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/core/scm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/core/scm.c b/net/core/scm.c index 03795d0147f2..b4da80b1cc07 100644 --- a/net/core/scm.c +++ b/net/core/scm.c @@ -54,7 +54,7 @@ static __inline__ int scm_check_creds(struct ucred *creds) return -EINVAL; if ((creds->pid == task_tgid_vnr(current) || - ns_capable(current->nsproxy->pid_ns->user_ns, CAP_SYS_ADMIN)) && + ns_capable(task_active_pid_ns(current)->user_ns, CAP_SYS_ADMIN)) && ((uid_eq(uid, cred->uid) || uid_eq(uid, cred->euid) || uid_eq(uid, cred->suid)) || nsown_capable(CAP_SETUID)) && ((gid_eq(gid, cred->gid) || gid_eq(gid, cred->egid) || From 6b844e7d601f709942cf15e5f5808953f5382402 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Tue, 20 Aug 2013 11:57:35 +0300 Subject: [PATCH 0777/1368] staging: comedi: dt282x: dt282x_ai_insn_read() always fails commit 2c4283ca7cdcc6605859c836fc536fcd83a4525f upstream. In dt282x_ai_insn_read() we call this macro like: wait_for(!mux_busy(), comedi_error(dev, "timeout\n"); return -ETIME;); Because the if statement doesn't have curly braces it means we always return -ETIME and the function never succeeds. Signed-off-by: Dan Carpenter Acked-by: Ian Abbott Signed-off-by: Greg Kroah-Hartman --- drivers/staging/comedi/drivers/dt282x.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/staging/comedi/drivers/dt282x.c b/drivers/staging/comedi/drivers/dt282x.c index 90f2de9bc402..f4c1e998cbe9 100644 --- a/drivers/staging/comedi/drivers/dt282x.c +++ b/drivers/staging/comedi/drivers/dt282x.c @@ -269,8 +269,9 @@ struct dt282x_private { } \ udelay(5); \ } \ - if (_i) \ + if (_i) { \ b \ + } \ } while (0) static int prep_ai_dma(struct comedi_device *dev, int chan, int size); From abea758371f89b0cdb5f0b2dad24674e0148f2ee Mon Sep 17 00:00:00 2001 From: Marek Vasut Date: Sat, 29 Jun 2013 22:20:00 +0100 Subject: [PATCH 0778/1368] iio: mxs-lradc: Fix misuse of iio->trig commit e1b1fa66a0398f0b52ae79a2bdc7de87c205d074 upstream. The struct iio_dev .trig field is to be used only by the IIO core, the driver shall not fill this field. This fixes ugly crash when the driver is compiled as a module and the module is rmmod'd. Signed-off-by: Marek Vasut Cc: Fabio Estevam Cc: Jonathan Cameron Cc: Shawn Guo Signed-off-by: Jonathan Cameron Signed-off-by: Greg Kroah-Hartman --- drivers/staging/iio/adc/mxs-lradc.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/drivers/staging/iio/adc/mxs-lradc.c b/drivers/staging/iio/adc/mxs-lradc.c index 163c638e4095..59809566bc42 100644 --- a/drivers/staging/iio/adc/mxs-lradc.c +++ b/drivers/staging/iio/adc/mxs-lradc.c @@ -661,12 +661,13 @@ static int mxs_lradc_trigger_init(struct iio_dev *iio) { int ret; struct iio_trigger *trig; + struct mxs_lradc *lradc = iio_priv(iio); trig = iio_trigger_alloc("%s-dev%i", iio->name, iio->id); if (trig == NULL) return -ENOMEM; - trig->dev.parent = iio->dev.parent; + trig->dev.parent = lradc->dev; iio_trigger_set_drvdata(trig, iio); trig->ops = &mxs_lradc_trigger_ops; @@ -676,15 +677,17 @@ static int mxs_lradc_trigger_init(struct iio_dev *iio) return ret; } - iio->trig = trig; + lradc->trig = trig; return 0; } static void mxs_lradc_trigger_remove(struct iio_dev *iio) { - iio_trigger_unregister(iio->trig); - iio_trigger_free(iio->trig); + struct mxs_lradc *lradc = iio_priv(iio); + + iio_trigger_unregister(lradc->trig); + iio_trigger_free(lradc->trig); } static int mxs_lradc_buffer_preenable(struct iio_dev *iio) From f7e0ca347bcb540e2894833eccc33f9a65f4c822 Mon Sep 17 00:00:00 2001 From: Marek Vasut Date: Wed, 3 Jul 2013 22:25:00 +0100 Subject: [PATCH 0779/1368] iio: mxs-lradc: Remove useless check in read_raw commit 2a961d0995cdadbfba565b28beada59c5ae7ebae upstream. The removed check in the read_raw implementation was always true, therefore remove it. This also fixes a bug, by closely inspecting the code, one can notice the iio_validate_scan_mask_onehot() will always return 1 and therefore the subsequent condition will always succeed, therefore making the mxs_lradc_read_raw() function always return -EINVAL; . Signed-off-by: Marek Vasut Tested-by: Otavio Salvador Acked-by: Hector Palacios Signed-off-by: Jonathan Cameron Signed-off-by: Greg Kroah-Hartman --- drivers/staging/iio/adc/mxs-lradc.c | 7 ------- 1 file changed, 7 deletions(-) diff --git a/drivers/staging/iio/adc/mxs-lradc.c b/drivers/staging/iio/adc/mxs-lradc.c index 59809566bc42..972a0723afac 100644 --- a/drivers/staging/iio/adc/mxs-lradc.c +++ b/drivers/staging/iio/adc/mxs-lradc.c @@ -234,7 +234,6 @@ static int mxs_lradc_read_raw(struct iio_dev *iio_dev, { struct mxs_lradc *lradc = iio_priv(iio_dev); int ret; - unsigned long mask; if (m != IIO_CHAN_INFO_RAW) return -EINVAL; @@ -243,12 +242,6 @@ static int mxs_lradc_read_raw(struct iio_dev *iio_dev, if (chan->channel > LRADC_MAX_TOTAL_CHANS) return -EINVAL; - /* Validate the channel if it doesn't intersect with reserved chans. */ - bitmap_set(&mask, chan->channel, 1); - ret = iio_validate_scan_mask_onehot(iio_dev, &mask); - if (ret) - return -EINVAL; - /* * See if there is no buffered operation in progess. If there is, simply * bail out. This can be improved to support both buffered and raw IO at From 3fa0d2f124263d0080f266c4c7bfc60153f29b91 Mon Sep 17 00:00:00 2001 From: Mika Westerberg Date: Mon, 2 Sep 2013 13:30:25 +0300 Subject: [PATCH 0780/1368] ACPI / LPSS: don't crash if a device has no MMIO resources commit af65cfe9aeae03e0682bebdf4db94582d75562dd upstream. Intel LPSS devices that are enumerated from ACPI have both MMIO and IRQ resources returned in their _CRS method. However, Apple Macbook Air with Haswell has LPSS devices enumerated from PCI bus instead and _CRS method returns only an interrupt number (but the device has _HID set that causes the scan handler to match it). The current ACPI / LPSS code sets pdata->dev_desc only when MMIO resource is found for the device and in case of Macbook Air it is never found. That leads to a NULL pointer dereference in register_device_clock(). Correct this by always setting the pdata->dev_desc. Reported-and-tested-by: Imre Kaloz Signed-off-by: Mika Westerberg Signed-off-by: Rafael J. Wysocki Signed-off-by: Greg Kroah-Hartman --- drivers/acpi/acpi_lpss.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/acpi/acpi_lpss.c b/drivers/acpi/acpi_lpss.c index cab13f2fc28e..7c451cb26254 100644 --- a/drivers/acpi/acpi_lpss.c +++ b/drivers/acpi/acpi_lpss.c @@ -155,12 +155,13 @@ static int acpi_lpss_create_device(struct acpi_device *adev, pdata->mmio_size = resource_size(&rentry->res); pdata->mmio_base = ioremap(rentry->res.start, pdata->mmio_size); - pdata->dev_desc = dev_desc; break; } acpi_dev_free_resource_list(&resource_list); + pdata->dev_desc = dev_desc; + if (dev_desc->clk_required) { ret = register_device_clock(adev, pdata); if (ret) { From 3765762187edd49446c94884aa88c952cec41116 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 16 Aug 2013 10:16:59 +0300 Subject: [PATCH 0781/1368] USB: mos7720: use GFP_ATOMIC under spinlock commit d0bd9a41186e076ea543c397ad8a67a6cf604b55 upstream. The write_parport_reg_nonblock() function shouldn't sleep because it's called with spinlocks held. Signed-off-by: Dan Carpenter Acked-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/mos7720.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/serial/mos7720.c b/drivers/usb/serial/mos7720.c index 5050cc8584ba..6577b7d1d2b8 100644 --- a/drivers/usb/serial/mos7720.c +++ b/drivers/usb/serial/mos7720.c @@ -374,7 +374,7 @@ static int write_parport_reg_nonblock(struct mos7715_parport *mos_parport, kfree(urbtrack); return -ENOMEM; } - urbtrack->setup = kmalloc(sizeof(*urbtrack->setup), GFP_KERNEL); + urbtrack->setup = kmalloc(sizeof(*urbtrack->setup), GFP_ATOMIC); if (!urbtrack->setup) { usb_free_urb(urbtrack->urb); kfree(urbtrack); From 7965eef853ccd9e30f1ee5d5df27a48b77a22a4b Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 19 Aug 2013 13:05:45 +0200 Subject: [PATCH 0782/1368] USB: mos7720: fix big-endian control requests commit 3b716caf190ccc6f2a09387210e0e6a26c1d81a4 upstream. Fix endianess bugs in parallel-port code which caused corrupt control-requests to be issued on big-endian machines. Reported-by: kbuild test robot Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/mos7720.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/usb/serial/mos7720.c b/drivers/usb/serial/mos7720.c index 6577b7d1d2b8..0f16bf6ea71c 100644 --- a/drivers/usb/serial/mos7720.c +++ b/drivers/usb/serial/mos7720.c @@ -382,8 +382,8 @@ static int write_parport_reg_nonblock(struct mos7715_parport *mos_parport, } urbtrack->setup->bRequestType = (__u8)0x40; urbtrack->setup->bRequest = (__u8)0x0e; - urbtrack->setup->wValue = get_reg_value(reg, dummy); - urbtrack->setup->wIndex = get_reg_index(reg); + urbtrack->setup->wValue = cpu_to_le16(get_reg_value(reg, dummy)); + urbtrack->setup->wIndex = cpu_to_le16(get_reg_index(reg)); urbtrack->setup->wLength = 0; usb_fill_control_urb(urbtrack->urb, usbdev, usb_sndctrlpipe(usbdev, 0), From b096d2211a059f6b22f089e3907e061409dbb6c5 Mon Sep 17 00:00:00 2001 From: Daniel Mack Date: Wed, 21 Aug 2013 11:17:21 +0200 Subject: [PATCH 0783/1368] usb: ehci-mxc: check for pdata before dereferencing commit f375fc520d4df0cd9fcb570f33c103c6c0311f9e upstream. Commit 7e8d5cd93fac ("USB: Add EHCI support for MX27 and MX31 based boards") introduced code that could potentially lead to a NULL pointer dereference on driver removal. Fix this by checking for the value of pdata before dereferencing it. Signed-off-by: Daniel Mack Reported-by: Dan Carpenter Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/ehci-mxc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/host/ehci-mxc.c b/drivers/usb/host/ehci-mxc.c index c369767b00e2..ec128bc72deb 100644 --- a/drivers/usb/host/ehci-mxc.c +++ b/drivers/usb/host/ehci-mxc.c @@ -184,7 +184,7 @@ static int ehci_mxc_drv_remove(struct platform_device *pdev) if (pdata && pdata->exit) pdata->exit(pdev); - if (pdata->otg) + if (pdata && pdata->otg) usb_phy_shutdown(pdata->otg); clk_disable_unprepare(priv->usbclk); From c552e84c9bfd1b7feb5cd51e970b8110a31c4b89 Mon Sep 17 00:00:00 2001 From: Oliver Neukum Date: Tue, 6 Aug 2013 14:22:59 +0200 Subject: [PATCH 0784/1368] USB: cdc-wdm: fix race between interrupt handler and tasklet commit 6dd433e6cf2475ce8abec1b467720858c24450eb upstream. Both could want to submit the same URB. Some checks of the flag intended to prevent that were missing. Signed-off-by: Oliver Neukum Signed-off-by: Greg Kroah-Hartman --- drivers/usb/class/cdc-wdm.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/drivers/usb/class/cdc-wdm.c b/drivers/usb/class/cdc-wdm.c index 8a230f0ef77c..d3318a0df8ee 100644 --- a/drivers/usb/class/cdc-wdm.c +++ b/drivers/usb/class/cdc-wdm.c @@ -209,6 +209,7 @@ static void wdm_in_callback(struct urb *urb) static void wdm_int_callback(struct urb *urb) { int rv = 0; + int responding; int status = urb->status; struct wdm_device *desc; struct usb_cdc_notification *dr; @@ -262,8 +263,8 @@ static void wdm_int_callback(struct urb *urb) spin_lock(&desc->iuspin); clear_bit(WDM_READ, &desc->flags); - set_bit(WDM_RESPONDING, &desc->flags); - if (!test_bit(WDM_DISCONNECTING, &desc->flags) + responding = test_and_set_bit(WDM_RESPONDING, &desc->flags); + if (!responding && !test_bit(WDM_DISCONNECTING, &desc->flags) && !test_bit(WDM_SUSPENDING, &desc->flags)) { rv = usb_submit_urb(desc->response, GFP_ATOMIC); dev_dbg(&desc->intf->dev, "%s: usb_submit_urb %d", @@ -685,16 +686,20 @@ static void wdm_rxwork(struct work_struct *work) { struct wdm_device *desc = container_of(work, struct wdm_device, rxwork); unsigned long flags; - int rv; + int rv = 0; + int responding; spin_lock_irqsave(&desc->iuspin, flags); if (test_bit(WDM_DISCONNECTING, &desc->flags)) { spin_unlock_irqrestore(&desc->iuspin, flags); } else { + responding = test_and_set_bit(WDM_RESPONDING, &desc->flags); spin_unlock_irqrestore(&desc->iuspin, flags); - rv = usb_submit_urb(desc->response, GFP_KERNEL); + if (!responding) + rv = usb_submit_urb(desc->response, GFP_KERNEL); if (rv < 0 && rv != -EPERM) { spin_lock_irqsave(&desc->iuspin, flags); + clear_bit(WDM_RESPONDING, &desc->flags); if (!test_bit(WDM_DISCONNECTING, &desc->flags)) schedule_work(&desc->rxwork); spin_unlock_irqrestore(&desc->iuspin, flags); From ec7329b595fa4ef248e1ef9bbb6b49cee416ac7c Mon Sep 17 00:00:00 2001 From: Laurent Pinchart Date: Mon, 29 Apr 2013 22:18:01 +0200 Subject: [PATCH 0785/1368] usb: gadget: uvc: Fix error handling in uvc_queue_buffer() commit ebe864a6cb8e087ede047fa1fa6b6d06fcb9a9e4 upstream. The conversion to videobuf2 failed to check the return value of vb2_qbuf(). Fix it. Reported-by: Michael Grzeschik Signed-off-by: Laurent Pinchart Tested-By: Michael Grzeschik Signed-off-by: Felipe Balbi Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/uvc_queue.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/usb/gadget/uvc_queue.c b/drivers/usb/gadget/uvc_queue.c index 7ce27e35550b..de456a5a5e0a 100644 --- a/drivers/usb/gadget/uvc_queue.c +++ b/drivers/usb/gadget/uvc_queue.c @@ -177,12 +177,16 @@ static int uvc_queue_buffer(struct uvc_video_queue *queue, mutex_lock(&queue->mutex); ret = vb2_qbuf(&queue->queue, buf); + if (ret < 0) + goto done; + spin_lock_irqsave(&queue->irqlock, flags); ret = (queue->flags & UVC_QUEUE_PAUSED) != 0; queue->flags &= ~UVC_QUEUE_PAUSED; spin_unlock_irqrestore(&queue->irqlock, flags); - mutex_unlock(&queue->mutex); +done: + mutex_unlock(&queue->mutex); return ret; } From ddcc49fb2db6457861dbb0b99844b2a5b53b5c88 Mon Sep 17 00:00:00 2001 From: Sarah Sharp Date: Mon, 5 Aug 2013 18:58:15 -0700 Subject: [PATCH 0786/1368] usb: Don't fail port power resume on device disconnect. commit d49dad3e11638f66be4e16573ffaa8c46a09e3b3 upstream. Userspace can tell the kernel to power off any USB port, including ones that are visible and connectible to users. When an attached USB device goes into suspend, the port will be powered off if the pm_qos_no_port_poweroff file for its port is set to 0, the device does not have remote wakeup enabled, and the device is marked as persistent. If the user disconnects the USB device while the port is powered off, the current code does not handle that properly. If you disconnect a device, and then run `lsusb -v -s` for the device, the device disconnect does not get handled by the USB core. The runtime resume of the port fails, because hub_port_debounce_be_connected() returns -ETIMEDOUT. This means the port resume fails and khubd doesn't handle the USB device disconnect. This leaves the device listed in lsusb, and the port's runtime_status will be permanently marked as "error". Fix this by ignoring the return value of hub_port_debounce_be_connected. Users can disconnect USB devices while the ports are powered off, and we must be able to handle that. This patch should be backported to kernels as old as 3.9, that contain the commit ad493e5e580546e6c3024b76a41535476da1546a "usb: add usb port auto power off mechanism" Signed-off-by: Sarah Sharp Cc: Lan Tianyu Cc: Alan Stern Cc: Rafael J. Wysocki Signed-off-by: Sarah Sharp Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/port.c | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/drivers/usb/core/port.c b/drivers/usb/core/port.c index b8bad294eeb8..ef07b3596d06 100644 --- a/drivers/usb/core/port.c +++ b/drivers/usb/core/port.c @@ -89,22 +89,19 @@ static int usb_port_runtime_resume(struct device *dev) retval = usb_hub_set_port_power(hdev, port1, true); if (port_dev->child && !retval) { /* - * Wait for usb hub port to be reconnected in order to make - * the resume procedure successful. + * Attempt to wait for usb hub port to be reconnected in order + * to make the resume procedure successful. The device may have + * disconnected while the port was powered off, so ignore the + * return status. */ retval = hub_port_debounce_be_connected(hub, port1); - if (retval < 0) { + if (retval < 0) dev_dbg(&port_dev->dev, "can't get reconnection after setting port power on, status %d\n", retval); - goto out; - } usb_clear_port_feature(hdev, port1, USB_PORT_FEAT_C_ENABLE); - - /* Set return value to 0 if debounce successful */ retval = 0; } -out: clear_bit(port1, hub->busy_bits); usb_autopm_put_interface(intf); return retval; From 94cc662c4bd9f8487d54262bbfc2307d9b1fc6b1 Mon Sep 17 00:00:00 2001 From: Alan Stern Date: Fri, 30 Aug 2013 10:46:00 -0400 Subject: [PATCH 0787/1368] USB: fix build error when CONFIG_PM_SLEEP isn't enabled commit 9d8924297cd9c256c23c02abae40202563452453 upstream. This patch fixes a build error that occurs when CONFIG_PM is enabled and CONFIG_PM_SLEEP isn't: >> drivers/usb/host/ohci-pci.c:294:10: error: 'usb_hcd_pci_pm_ops' undeclared here (not in a function) .pm = &usb_hcd_pci_pm_ops Since the usb_hcd_pci_pm_ops structure is defined and used when CONFIG_PM is enabled, its declaration should not be protected by CONFIG_PM_SLEEP. Signed-off-by: Alan Stern Reported-by: kbuild test robot Signed-off-by: Greg Kroah-Hartman --- include/linux/usb/hcd.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/usb/hcd.h b/include/linux/usb/hcd.h index f5f5c7dfda90..0fdff28d5015 100644 --- a/include/linux/usb/hcd.h +++ b/include/linux/usb/hcd.h @@ -410,7 +410,7 @@ extern int usb_hcd_pci_probe(struct pci_dev *dev, extern void usb_hcd_pci_remove(struct pci_dev *dev); extern void usb_hcd_pci_shutdown(struct pci_dev *dev); -#ifdef CONFIG_PM_SLEEP +#ifdef CONFIG_PM extern const struct dev_pm_ops usb_hcd_pci_pm_ops; #endif #endif /* CONFIG_PCI */ From ef7198be2830f7d6951512724203326746756a86 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Sat, 3 Aug 2013 16:37:48 +0200 Subject: [PATCH 0788/1368] usb: config->desc.bLength may not exceed amount of data returned by the device commit b4f17a488ae2e09bfcf95c0e0b4219c246f1116a upstream. While reading the config parsing code I noticed this check is missing, without this check config->desc.wTotalLength can end up with a value larger then the dev->rawdescriptors length for the config, and when userspace then tries to get the rawdescriptors bad things may happen. Signed-off-by: Hans de Goede Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/config.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/usb/core/config.c b/drivers/usb/core/config.c index 7199adccf444..a6b2cabe7930 100644 --- a/drivers/usb/core/config.c +++ b/drivers/usb/core/config.c @@ -424,7 +424,8 @@ static int usb_parse_configuration(struct usb_device *dev, int cfgidx, memcpy(&config->desc, buffer, USB_DT_CONFIG_SIZE); if (config->desc.bDescriptorType != USB_DT_CONFIG || - config->desc.bLength < USB_DT_CONFIG_SIZE) { + config->desc.bLength < USB_DT_CONFIG_SIZE || + config->desc.bLength > size) { dev_err(ddev, "invalid descriptor for config index %d: " "type = 0x%X, length = %d\n", cfgidx, config->desc.bDescriptorType, config->desc.bLength); From b504b4a1e3484b4b40a25ff228432df55cb24758 Mon Sep 17 00:00:00 2001 From: Alan Stern Date: Tue, 30 Jul 2013 15:39:02 -0400 Subject: [PATCH 0789/1368] USB: handle LPM errors during device suspend correctly commit aa5ceae24bf8dff1d6fe87c6c4b08e69c6d33550 upstream. The hub driver's usb_port_suspend() routine doesn't handle errors related to Link Power Management properly. It always returns failure, it doesn't try to clean up the wakeup setting, (in the case of system sleep) it doesn't try to go ahead with the port suspend regardless, and it doesn't try to apply the new power-off mechanism. This patch fixes these problems. Note: Sarah fixed this patch to apply against 3.11, since the original commit (4fae6f0fa86f92e6bc7429371b1e177ad0aaac66 "USB: handle LPM errors during device suspend correctly") called usb_disable_remote_wakeup, which won't be added until 3.12. This patch should be backported to kernels as old as 3.5, that contain the commit 8306095fd2c1100e8244c09bf560f97aca5a311d "USB: Disable USB 3.0 LPM in critical sections.". There will be merge conflicts, since LTM wasn't added until 3.6. Signed-off-by: Alan Stern Signed-off-by: Sarah Sharp Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/hub.c | 71 ++++++++++++++++++++++-------------------- 1 file changed, 38 insertions(+), 33 deletions(-) diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c index da2905a1a18b..b6ea8b6fe90b 100644 --- a/drivers/usb/core/hub.c +++ b/drivers/usb/core/hub.c @@ -2916,7 +2916,6 @@ int usb_port_suspend(struct usb_device *udev, pm_message_t msg) { struct usb_hub *hub = usb_hub_to_struct_hub(udev->parent); struct usb_port *port_dev = hub->ports[udev->portnum - 1]; - enum pm_qos_flags_status pm_qos_stat; int port1 = udev->portnum; int status; bool really_suspend = true; @@ -2954,7 +2953,7 @@ int usb_port_suspend(struct usb_device *udev, pm_message_t msg) status); /* bail if autosuspend is requested */ if (PMSG_IS_AUTO(msg)) - return status; + goto err_wakeup; } } @@ -2963,14 +2962,16 @@ int usb_port_suspend(struct usb_device *udev, pm_message_t msg) usb_set_usb2_hardware_lpm(udev, 0); if (usb_disable_ltm(udev)) { - dev_err(&udev->dev, "%s Failed to disable LTM before suspend\n.", - __func__); - return -ENOMEM; + dev_err(&udev->dev, "Failed to disable LTM before suspend\n."); + status = -ENOMEM; + if (PMSG_IS_AUTO(msg)) + goto err_ltm; } if (usb_unlocked_disable_lpm(udev)) { - dev_err(&udev->dev, "%s Failed to disable LPM before suspend\n.", - __func__); - return -ENOMEM; + dev_err(&udev->dev, "Failed to disable LPM before suspend\n."); + status = -ENOMEM; + if (PMSG_IS_AUTO(msg)) + goto err_lpm3; } /* see 7.1.7.6 */ @@ -2998,28 +2999,31 @@ int usb_port_suspend(struct usb_device *udev, pm_message_t msg) if (status) { dev_dbg(hub->intfdev, "can't suspend port %d, status %d\n", port1, status); - /* paranoia: "should not happen" */ - if (udev->do_remote_wakeup) { - if (!hub_is_superspeed(hub->hdev)) { - (void) usb_control_msg(udev, - usb_sndctrlpipe(udev, 0), - USB_REQ_CLEAR_FEATURE, - USB_RECIP_DEVICE, - USB_DEVICE_REMOTE_WAKEUP, 0, - NULL, 0, - USB_CTRL_SET_TIMEOUT); - } else - (void) usb_disable_function_remotewakeup(udev); - - } + /* Try to enable USB3 LPM and LTM again */ + usb_unlocked_enable_lpm(udev); + err_lpm3: + usb_enable_ltm(udev); + err_ltm: /* Try to enable USB2 hardware LPM again */ if (udev->usb2_hw_lpm_capable == 1) usb_set_usb2_hardware_lpm(udev, 1); - /* Try to enable USB3 LTM and LPM again */ - usb_enable_ltm(udev); - usb_unlocked_enable_lpm(udev); + if (udev->do_remote_wakeup) { + if (udev->speed < USB_SPEED_SUPER) + usb_control_msg(udev, usb_sndctrlpipe(udev, 0), + USB_REQ_CLEAR_FEATURE, + USB_RECIP_DEVICE, + USB_DEVICE_REMOTE_WAKEUP, 0, + NULL, 0, USB_CTRL_SET_TIMEOUT); + else + usb_control_msg(udev, usb_sndctrlpipe(udev, 0), + USB_REQ_CLEAR_FEATURE, + USB_RECIP_INTERFACE, + USB_INTRF_FUNC_SUSPEND, 0, + NULL, 0, USB_CTRL_SET_TIMEOUT); + } + err_wakeup: /* System sleep transitions should never fail */ if (!PMSG_IS_AUTO(msg)) @@ -3041,14 +3045,15 @@ int usb_port_suspend(struct usb_device *udev, pm_message_t msg) * Check whether current status meets the requirement of * usb port power off mechanism */ - pm_qos_stat = dev_pm_qos_flags(&port_dev->dev, - PM_QOS_FLAG_NO_POWER_OFF); - if (!udev->do_remote_wakeup - && pm_qos_stat != PM_QOS_FLAGS_ALL - && udev->persist_enabled - && !status) { - pm_runtime_put_sync(&port_dev->dev); - port_dev->did_runtime_put = true; + if (status == 0 && !udev->do_remote_wakeup && udev->persist_enabled) { + enum pm_qos_flags_status pm_qos_stat; + + pm_qos_stat = dev_pm_qos_flags(&port_dev->dev, + PM_QOS_FLAG_NO_POWER_OFF); + if (pm_qos_stat != PM_QOS_FLAGS_ALL) { + pm_runtime_put_sync(&port_dev->dev); + port_dev->did_runtime_put = true; + } } usb_mark_last_busy(hub->hdev); From e083505592ec12ab57fd930e286607583ecc641c Mon Sep 17 00:00:00 2001 From: Lan Tianyu Date: Wed, 3 Jul 2013 22:17:54 +0800 Subject: [PATCH 0790/1368] usb: don't check pm qos NO_POWER_OFF flag in usb_port_suspend() commit 98a4f1ff7bea8002ab79d6776e30d27932e88244 upstream. The pm qos NO_POWER_OFF flag is checked twice during usb device suspend to see if the usb port power off condition is met. This is redundant and also will prevent the port from being powered off if the NO_POWER_OFF flag is changed to 1 from 0 after the device was already suspended. More detail in the following link. http://marc.info/?l=linux-usb&m=136543949130865&w=2 This patch should be backported to kernels as old as 3.7, that contain the commit f7ac7787ad361e31a7972e2854ed8dc2eedfac3b "usb/acpi: Use ACPI methods to power off ports." Signed-off-by: Lan Tianyu Signed-off-by: Sarah Sharp Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/hub.c | 14 ++------------ 1 file changed, 2 insertions(+), 12 deletions(-) diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c index b6ea8b6fe90b..6cf2ae0aa1f7 100644 --- a/drivers/usb/core/hub.c +++ b/drivers/usb/core/hub.c @@ -3041,19 +3041,9 @@ int usb_port_suspend(struct usb_device *udev, pm_message_t msg) usb_set_device_state(udev, USB_STATE_SUSPENDED); } - /* - * Check whether current status meets the requirement of - * usb port power off mechanism - */ if (status == 0 && !udev->do_remote_wakeup && udev->persist_enabled) { - enum pm_qos_flags_status pm_qos_stat; - - pm_qos_stat = dev_pm_qos_flags(&port_dev->dev, - PM_QOS_FLAG_NO_POWER_OFF); - if (pm_qos_stat != PM_QOS_FLAGS_ALL) { - pm_runtime_put_sync(&port_dev->dev); - port_dev->did_runtime_put = true; - } + pm_runtime_put_sync(&port_dev->dev); + port_dev->did_runtime_put = true; } usb_mark_last_busy(hub->hdev); From 215840ab83bfdd15deca0a8b0f140369afab7f52 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 28 Jun 2013 10:34:48 -0700 Subject: [PATCH 0791/1368] rculist: list_first_or_null_rcu() should use list_entry_rcu() commit c34ac00caefbe49d40058ae7200bd58725cebb45 upstream. list_first_or_null() should test whether the list is empty and return pointer to the first entry if not in a RCU safe manner. It's broken in several ways. * It compares __kernel @__ptr with __rcu @__next triggering the following sparse warning. net/core/dev.c:4331:17: error: incompatible types in comparison expression (different address spaces) * It doesn't perform rcu_dereference*() and computes the entry address using container_of() directly from the __rcu pointer which is inconsitent with other rculist interface. As a result, all three in-kernel users - net/core/dev.c, macvlan, cgroup - are buggy. They dereference the pointer w/o going through read barrier. * While ->next dereference passes through list_next_rcu(), the compiler is still free to fetch ->next more than once and thus nullify the "__ptr != __next" condition check. Fix it by making list_first_or_null_rcu() dereference ->next directly using ACCESS_ONCE() and then use list_entry_rcu() on it like other rculist accessors. v2: Paul pointed out that the compiler may fetch the pointer more than once nullifying the condition check. ACCESS_ONCE() added on ->next dereference. v3: Restored () around macro param which was accidentally removed. Spotted by Paul. Signed-off-by: Tejun Heo Reported-by: Fengguang Wu Cc: Dipankar Sarma Cc: "Paul E. McKenney" Cc: "David S. Miller" Cc: Li Zefan Cc: Patrick McHardy Signed-off-by: Paul E. McKenney Reviewed-by: Josh Triplett Signed-off-by: Greg Kroah-Hartman --- include/linux/rculist.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/include/linux/rculist.h b/include/linux/rculist.h index f4b1001a4676..4106721c4e5e 100644 --- a/include/linux/rculist.h +++ b/include/linux/rculist.h @@ -267,8 +267,9 @@ static inline void list_splice_init_rcu(struct list_head *list, */ #define list_first_or_null_rcu(ptr, type, member) \ ({struct list_head *__ptr = (ptr); \ - struct list_head __rcu *__next = list_next_rcu(__ptr); \ - likely(__ptr != __next) ? container_of(__next, type, member) : NULL; \ + struct list_head *__next = ACCESS_ONCE(__ptr->next); \ + likely(__ptr != __next) ? \ + list_entry_rcu(__next, type, member) : NULL; \ }) /** From a1b8ce5ac7db6a7c29a4ca658d5aff5384bd073e Mon Sep 17 00:00:00 2001 From: Mike Dyer Date: Fri, 16 Aug 2013 18:36:28 +0100 Subject: [PATCH 0792/1368] ASoC: wm8960: Fix PLL register writes commit 85fa532b6ef920b32598df86b194571a7059a77c upstream. Bit 9 of PLL2,3 and 4 is reserved as '0'. The 24bit fractional part should be split across each register in 8bit chunks. Signed-off-by: Mike Dyer Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- sound/soc/codecs/wm8960.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/sound/soc/codecs/wm8960.c b/sound/soc/codecs/wm8960.c index 0a4ffdd1d2a7..5e5af898f7f8 100644 --- a/sound/soc/codecs/wm8960.c +++ b/sound/soc/codecs/wm8960.c @@ -857,9 +857,9 @@ static int wm8960_set_dai_pll(struct snd_soc_dai *codec_dai, int pll_id, if (pll_div.k) { reg |= 0x20; - snd_soc_write(codec, WM8960_PLL2, (pll_div.k >> 18) & 0x3f); - snd_soc_write(codec, WM8960_PLL3, (pll_div.k >> 9) & 0x1ff); - snd_soc_write(codec, WM8960_PLL4, pll_div.k & 0x1ff); + snd_soc_write(codec, WM8960_PLL2, (pll_div.k >> 16) & 0xff); + snd_soc_write(codec, WM8960_PLL3, (pll_div.k >> 8) & 0xff); + snd_soc_write(codec, WM8960_PLL4, pll_div.k & 0xff); } snd_soc_write(codec, WM8960_PLL1, reg); From 87d5b89a6a77f1e141d41c5447f6a4e83feb8d11 Mon Sep 17 00:00:00 2001 From: Steffen Trumtrar Date: Mon, 9 Sep 2013 18:09:12 +0200 Subject: [PATCH 0793/1368] ASoC: mc13783: add spi errata fix commit 9f6f0afbb9fdabf6dcac642dfec457f28981e3f8 upstream. The MC13783 Chip Errata, Rev. 4 says, that depending on SPI clock and main audio clock speed, the Audio Codec or Stereo DAC do sometimes not start when programmed to do so. This is due to an internal clock timing issue related to the loading of the SPI bits into the audio block. On an i.MX27 based system, this issue lead to switched audio channels under certain circumstances: RTC + Touch + Audio are used and loaded at startup. The mentioned workaround of writing registers 40 and 41 two times is implemented here. Signed-off-by: Steffen Trumtrar Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- sound/soc/codecs/mc13783.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/sound/soc/codecs/mc13783.c b/sound/soc/codecs/mc13783.c index 5402dfbbb716..8a8d9364e87f 100644 --- a/sound/soc/codecs/mc13783.c +++ b/sound/soc/codecs/mc13783.c @@ -126,6 +126,10 @@ static int mc13783_write(struct snd_soc_codec *codec, ret = mc13xxx_reg_write(priv->mc13xxx, reg, value); + /* include errata fix for spi audio problems */ + if (reg == MC13783_AUDIO_CODEC || reg == MC13783_AUDIO_DAC) + ret = mc13xxx_reg_write(priv->mc13xxx, reg, value); + mc13xxx_unlock(priv->mc13xxx); return ret; From 1159cd1ded90b41db788f84bf94644a810dd322a Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Fri, 30 Aug 2013 15:43:03 -0700 Subject: [PATCH 0794/1368] x86, smap: Handle csum_partial_copy_*_user() commit 7263dda41b5a28ae6566fd126d9b06ada73dd721 upstream. Add SMAP annotations to csum_partial_copy_to/from_user(). These functions legitimately access user space and thus need to set the AC flag. TODO: add explicit checks that the side with the kernel space pointer really points into kernel space. Signed-off-by: H. Peter Anvin Link: http://lkml.kernel.org/n/tip-2aps0u00eer658fd5xyanan7@git.kernel.org Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/checksum_32.h | 22 +++++++++++++++++----- arch/x86/lib/csum-wrappers_64.c | 12 ++++++++++-- 2 files changed, 27 insertions(+), 7 deletions(-) diff --git a/arch/x86/include/asm/checksum_32.h b/arch/x86/include/asm/checksum_32.h index 46fc474fd819..f50de6951738 100644 --- a/arch/x86/include/asm/checksum_32.h +++ b/arch/x86/include/asm/checksum_32.h @@ -49,9 +49,15 @@ static inline __wsum csum_partial_copy_from_user(const void __user *src, int len, __wsum sum, int *err_ptr) { + __wsum ret; + might_sleep(); - return csum_partial_copy_generic((__force void *)src, dst, - len, sum, err_ptr, NULL); + stac(); + ret = csum_partial_copy_generic((__force void *)src, dst, + len, sum, err_ptr, NULL); + clac(); + + return ret; } /* @@ -176,10 +182,16 @@ static inline __wsum csum_and_copy_to_user(const void *src, int len, __wsum sum, int *err_ptr) { + __wsum ret; + might_sleep(); - if (access_ok(VERIFY_WRITE, dst, len)) - return csum_partial_copy_generic(src, (__force void *)dst, - len, sum, NULL, err_ptr); + if (access_ok(VERIFY_WRITE, dst, len)) { + stac(); + ret = csum_partial_copy_generic(src, (__force void *)dst, + len, sum, NULL, err_ptr); + clac(); + return ret; + } if (len) *err_ptr = -EFAULT; diff --git a/arch/x86/lib/csum-wrappers_64.c b/arch/x86/lib/csum-wrappers_64.c index 25b7ae8d058a..7609e0e421ec 100644 --- a/arch/x86/lib/csum-wrappers_64.c +++ b/arch/x86/lib/csum-wrappers_64.c @@ -6,6 +6,7 @@ */ #include #include +#include /** * csum_partial_copy_from_user - Copy and checksum from user space. @@ -52,8 +53,10 @@ csum_partial_copy_from_user(const void __user *src, void *dst, len -= 2; } } + stac(); isum = csum_partial_copy_generic((__force const void *)src, dst, len, isum, errp, NULL); + clac(); if (unlikely(*errp)) goto out_err; @@ -82,6 +85,8 @@ __wsum csum_partial_copy_to_user(const void *src, void __user *dst, int len, __wsum isum, int *errp) { + __wsum ret; + might_sleep(); if (unlikely(!access_ok(VERIFY_WRITE, dst, len))) { @@ -105,8 +110,11 @@ csum_partial_copy_to_user(const void *src, void __user *dst, } *errp = 0; - return csum_partial_copy_generic(src, (void __force *)dst, - len, isum, NULL, errp); + stac(); + ret = csum_partial_copy_generic(src, (void __force *)dst, + len, isum, NULL, errp); + clac(); + return ret; } EXPORT_SYMBOL(csum_partial_copy_to_user); From a168ad2687045d24e98b2525d1ee678c4f2a9a96 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 1 Sep 2013 20:35:01 +0100 Subject: [PATCH 0795/1368] Introduce [compat_]save_altstack_ex() to unbreak x86 SMAP commit bd1c149aa9915b9abb6d83d0f01dfd2ace0680b5 upstream. For performance reasons, when SMAP is in use, SMAP is left open for an entire put_user_try { ... } put_user_catch(); block, however, calling __put_user() in the middle of that block will close SMAP as the STAC..CLAC constructs intentionally do not nest. Furthermore, using __put_user() rather than put_user_ex() here is bad for performance. Thus, introduce new [compat_]save_altstack_ex() helpers that replace __[compat_]save_altstack() for x86, being currently the only architecture which supports put_user_try { ... } put_user_catch(). Reported-by: H. Peter Anvin Signed-off-by: Al Viro Signed-off-by: H. Peter Anvin Link: http://lkml.kernel.org/n/tip-es5p6y64if71k8p5u08agv9n@git.kernel.org Signed-off-by: Greg Kroah-Hartman --- arch/x86/ia32/ia32_signal.c | 2 +- arch/x86/kernel/signal.c | 6 +++--- include/linux/compat.h | 7 +++++++ include/linux/signal.h | 8 ++++++++ 4 files changed, 19 insertions(+), 4 deletions(-) diff --git a/arch/x86/ia32/ia32_signal.c b/arch/x86/ia32/ia32_signal.c index cf1a471a18a2..10adb41f162e 100644 --- a/arch/x86/ia32/ia32_signal.c +++ b/arch/x86/ia32/ia32_signal.c @@ -459,7 +459,7 @@ int ia32_setup_rt_frame(int sig, struct ksignal *ksig, else put_user_ex(0, &frame->uc.uc_flags); put_user_ex(0, &frame->uc.uc_link); - err |= __compat_save_altstack(&frame->uc.uc_stack, regs->sp); + compat_save_altstack_ex(&frame->uc.uc_stack, regs->sp); if (ksig->ka.sa.sa_flags & SA_RESTORER) restorer = ksig->ka.sa.sa_restorer; diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c index 69562992e457..087ab2af381a 100644 --- a/arch/x86/kernel/signal.c +++ b/arch/x86/kernel/signal.c @@ -364,7 +364,7 @@ static int __setup_rt_frame(int sig, struct ksignal *ksig, else put_user_ex(0, &frame->uc.uc_flags); put_user_ex(0, &frame->uc.uc_link); - err |= __save_altstack(&frame->uc.uc_stack, regs->sp); + save_altstack_ex(&frame->uc.uc_stack, regs->sp); /* Set up to return from userspace. */ restorer = VDSO32_SYMBOL(current->mm->context.vdso, rt_sigreturn); @@ -429,7 +429,7 @@ static int __setup_rt_frame(int sig, struct ksignal *ksig, else put_user_ex(0, &frame->uc.uc_flags); put_user_ex(0, &frame->uc.uc_link); - err |= __save_altstack(&frame->uc.uc_stack, regs->sp); + save_altstack_ex(&frame->uc.uc_stack, regs->sp); /* Set up to return from userspace. If provided, use a stub already in userspace. */ @@ -496,7 +496,7 @@ static int x32_setup_rt_frame(struct ksignal *ksig, else put_user_ex(0, &frame->uc.uc_flags); put_user_ex(0, &frame->uc.uc_link); - err |= __compat_save_altstack(&frame->uc.uc_stack, regs->sp); + compat_save_altstack_ex(&frame->uc.uc_stack, regs->sp); put_user_ex(0, &frame->uc.uc__pad0); if (ksig->ka.sa.sa_flags & SA_RESTORER) { diff --git a/include/linux/compat.h b/include/linux/compat.h index 7f0c1dd09079..ec1aee4aec9c 100644 --- a/include/linux/compat.h +++ b/include/linux/compat.h @@ -669,6 +669,13 @@ asmlinkage long compat_sys_sigaltstack(const compat_stack_t __user *uss_ptr, int compat_restore_altstack(const compat_stack_t __user *uss); int __compat_save_altstack(compat_stack_t __user *, unsigned long); +#define compat_save_altstack_ex(uss, sp) do { \ + compat_stack_t __user *__uss = uss; \ + struct task_struct *t = current; \ + put_user_ex(ptr_to_compat((void __user *)t->sas_ss_sp), &__uss->ss_sp); \ + put_user_ex(sas_ss_flags(sp), &__uss->ss_flags); \ + put_user_ex(t->sas_ss_size, &__uss->ss_size); \ +} while (0); asmlinkage long compat_sys_sched_rr_get_interval(compat_pid_t pid, struct compat_timespec __user *interval); diff --git a/include/linux/signal.h b/include/linux/signal.h index d897484730c0..2ac423bdb676 100644 --- a/include/linux/signal.h +++ b/include/linux/signal.h @@ -434,6 +434,14 @@ void signals_init(void); int restore_altstack(const stack_t __user *); int __save_altstack(stack_t __user *, unsigned long); +#define save_altstack_ex(uss, sp) do { \ + stack_t __user *__uss = uss; \ + struct task_struct *t = current; \ + put_user_ex((void __user *)t->sas_ss_sp, &__uss->ss_sp); \ + put_user_ex(sas_ss_flags(sp), &__uss->ss_flags); \ + put_user_ex(t->sas_ss_size, &__uss->ss_size); \ +} while (0); + #ifdef CONFIG_PROC_FS struct seq_file; extern void render_sigset_t(struct seq_file *, const char *, sigset_t *); From 5e3db40138469c967ea0d323eeb8143f2d46a6aa Mon Sep 17 00:00:00 2001 From: Aravind Gopalakrishnan Date: Fri, 2 Aug 2013 17:43:02 -0500 Subject: [PATCH 0796/1368] pci_ids: Add PCI device ID functions 3 and 4 for newer F15h models. commit 6bdaa63c2957ac04e8d596880f732b79f9c06c3c upstream. Add PCI device IDs for AMD F15h, model 30h. They will be used in amd_nb.c and amd64_edac.c Signed-off-by: Aravind Gopalakrishnan Signed-off-by: Borislav Petkov Signed-off-by: Greg Kroah-Hartman --- include/linux/pci_ids.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index c12916248469..6c7bb35ad6d1 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -518,6 +518,8 @@ #define PCI_DEVICE_ID_AMD_11H_NB_MISC 0x1303 #define PCI_DEVICE_ID_AMD_11H_NB_LINK 0x1304 #define PCI_DEVICE_ID_AMD_15H_M10H_F3 0x1403 +#define PCI_DEVICE_ID_AMD_15H_M30H_NB_F3 0x141d +#define PCI_DEVICE_ID_AMD_15H_M30H_NB_F4 0x141e #define PCI_DEVICE_ID_AMD_15H_NB_F0 0x1600 #define PCI_DEVICE_ID_AMD_15H_NB_F1 0x1601 #define PCI_DEVICE_ID_AMD_15H_NB_F2 0x1602 From b50361f39ca5a27cdadbd53b7a7d46daa3cde151 Mon Sep 17 00:00:00 2001 From: Aravind Gopalakrishnan Date: Fri, 2 Aug 2013 17:43:03 -0500 Subject: [PATCH 0797/1368] x86, amd_nb: Clarify F15h, model 30h GART and L3 support commit 7d64ac6422092adbbdaa279ab32f9d4c90a84558 upstream. F15h, models 0x30 and later don't have a GART. Note that. Also check CPUID leaf 0x80000006 for L3 prescence because there are models which don't sport an L3 cache. Signed-off-by: Aravind Gopalakrishnan [ Boris: rewrite commit message, cleanup comments. ] Signed-off-by: Borislav Petkov Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/amd_nb.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/arch/x86/kernel/amd_nb.c b/arch/x86/kernel/amd_nb.c index 3048ded1b598..59554dca96ec 100644 --- a/arch/x86/kernel/amd_nb.c +++ b/arch/x86/kernel/amd_nb.c @@ -20,6 +20,7 @@ const struct pci_device_id amd_nb_misc_ids[] = { { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_10H_NB_MISC) }, { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_15H_NB_F3) }, { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_15H_M10H_F3) }, + { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_15H_M30H_NB_F3) }, { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_16H_NB_F3) }, {} }; @@ -27,6 +28,7 @@ EXPORT_SYMBOL(amd_nb_misc_ids); static const struct pci_device_id amd_nb_link_ids[] = { { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_15H_NB_F4) }, + { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_15H_M30H_NB_F4) }, { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_16H_NB_F4) }, {} }; @@ -81,12 +83,19 @@ int amd_cache_northbridges(void) next_northbridge(misc, amd_nb_misc_ids); node_to_amd_nb(i)->link = link = next_northbridge(link, amd_nb_link_ids); - } + } + /* GART present only on Fam15h upto model 0fh */ if (boot_cpu_data.x86 == 0xf || boot_cpu_data.x86 == 0x10 || - boot_cpu_data.x86 == 0x15) + (boot_cpu_data.x86 == 0x15 && boot_cpu_data.x86_model < 0x10)) amd_northbridges.flags |= AMD_NB_GART; + /* + * Check for L3 cache presence. + */ + if (!cpuid_edx(0x80000006)) + return 0; + /* * Some CPU families support L3 Cache Index Disable. There are some * limitations because of E382 and E388 on family 0x10. From 8ceb02f11454cf5102bfede5a6e112abe3990bb2 Mon Sep 17 00:00:00 2001 From: Tony Luck Date: Wed, 24 Jul 2013 13:54:20 -0700 Subject: [PATCH 0798/1368] x86/mce: Pay no attention to 'F' bit in MCACOD when parsing 'UC' errors commit 0ca06c0857aee11911f91621db14498496f2c2cd upstream. The 0x1000 bit of the MCACOD field of machine check MCi_STATUS registers is only defined for corrected errors (where it means that hardware may be filtering errors see SDM section 15.9.2.1). For uncorrected errors it may, or may not be set - so we should mask it out when checking for the architecturaly defined recoverable error signatures (see SDM 15.9.3.1 and 15.9.3.2) Acked-by: Naveen N. Rao Signed-off-by: Tony Luck Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/mce.h | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h index fa5f71e021d5..e6833c655e59 100644 --- a/arch/x86/include/asm/mce.h +++ b/arch/x86/include/asm/mce.h @@ -32,11 +32,20 @@ #define MCI_STATUS_PCC (1ULL<<57) /* processor context corrupt */ #define MCI_STATUS_S (1ULL<<56) /* Signaled machine check */ #define MCI_STATUS_AR (1ULL<<55) /* Action required */ -#define MCACOD 0xffff /* MCA Error Code */ + +/* + * Note that the full MCACOD field of IA32_MCi_STATUS MSR is + * bits 15:0. But bit 12 is the 'F' bit, defined for corrected + * errors to indicate that errors are being filtered by hardware. + * We should mask out bit 12 when looking for specific signatures + * of uncorrected errors - so the F bit is deliberately skipped + * in this #define. + */ +#define MCACOD 0xefff /* MCA Error Code */ /* Architecturally defined codes from SDM Vol. 3B Chapter 15 */ #define MCACOD_SCRUB 0x00C0 /* 0xC0-0xCF Memory Scrubbing */ -#define MCACOD_SCRUBMSK 0xfff0 +#define MCACOD_SCRUBMSK 0xeff0 /* Skip bit 12 ('F' bit) */ #define MCACOD_L3WB 0x017A /* L3 Explicit Writeback */ #define MCACOD_DATA 0x0134 /* Data Load */ #define MCACOD_INSTR 0x0150 /* Instruction Fetch */ From c18c0f9da9d97ea6710804971b981456a40cd01a Mon Sep 17 00:00:00 2001 From: Rik van Riel Date: Wed, 31 Jul 2013 22:14:21 -0400 Subject: [PATCH 0799/1368] sched/x86: Optimize switch_mm() for multi-threaded workloads commit 8f898fbbe5ee5e20a77c4074472a1fd088dc47d1 upstream. Dick Fowles, Don Zickus and Joe Mario have been working on improvements to perf, and noticed heavy cache line contention on the mm_cpumask, running linpack on a 60 core / 120 thread system. The cause turned out to be unnecessary atomic accesses to the mm_cpumask. When in lazy TLB mode, the CPU is only removed from the mm_cpumask if there is a TLB flush event. Most of the time, no such TLB flush happens, and the kernel skips the TLB reload. It can also skip the atomic memory set & test. Here is a summary of Joe's test results: * The __schedule function dropped from 24% of all program cycles down to 5.5%. * The cacheline contention/hotness for accesses to that bitmask went from being the 1st/2nd hottest - down to the 84th hottest (0.3% of all shared misses which is now quite cold) * The average load latency for the bit-test-n-set instruction in __schedule dropped from 10k-15k cycles down to an average of 600 cycles. * The linpack program results improved from 133 GFlops to 144 GFlops. Peak GFlops rose from 133 to 153. Reported-by: Don Zickus Reported-by: Joe Mario Tested-by: Joe Mario Signed-off-by: Rik van Riel Reviewed-by: Paul Turner Acked-by: Linus Torvalds Link: http://lkml.kernel.org/r/20130731221421.616d3d20@annuminas.surriel.com [ Made the comments consistent around the modified code. ] Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/mmu_context.h | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h index cdbf36776106..be12c534fd59 100644 --- a/arch/x86/include/asm/mmu_context.h +++ b/arch/x86/include/asm/mmu_context.h @@ -45,22 +45,28 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, /* Re-load page tables */ load_cr3(next->pgd); - /* stop flush ipis for the previous mm */ + /* Stop flush ipis for the previous mm */ cpumask_clear_cpu(cpu, mm_cpumask(prev)); - /* - * load the LDT, if the LDT is different: - */ + /* Load the LDT, if the LDT is different: */ if (unlikely(prev->context.ldt != next->context.ldt)) load_LDT_nolock(&next->context); } #ifdef CONFIG_SMP - else { + else { this_cpu_write(cpu_tlbstate.state, TLBSTATE_OK); BUG_ON(this_cpu_read(cpu_tlbstate.active_mm) != next); - if (!cpumask_test_and_set_cpu(cpu, mm_cpumask(next))) { - /* We were in lazy tlb mode and leave_mm disabled + if (!cpumask_test_cpu(cpu, mm_cpumask(next))) { + /* + * On established mms, the mm_cpumask is only changed + * from irq context, from ptep_clear_flush() while in + * lazy tlb mode, and here. Irqs are blocked during + * schedule, protecting us from simultaneous changes. + */ + cpumask_set_cpu(cpu, mm_cpumask(next)); + /* + * We were in lazy tlb mode and leave_mm disabled * tlb flush IPI delivery. We must reload CR3 * to make sure to use no freed page tables. */ From dc780f7fc64b6ca11854cd5ac724b85fa1853495 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Mon, 2 Sep 2013 12:33:02 +0200 Subject: [PATCH 0800/1368] ALSA: hda - Re-setup HDMI pin and audio infoframe on stream switches commit b054087dbacee30a9dddaef2c9a96312146be04e upstream. When the transcoder:port mapping on Haswell HDMI/DP audio is changed during the stream playback, the sound gets lost. Typically this problem is seen when the user switches the graphics mode from eDP+DP to DP-only configuration, where CRTC 1 is used for DP in the former while CRTC 0 is used for the latter. The graphics controller notifies the change via the normal ELD update procedure, so we get the intrinsic event. For enabling the sound again, the HDMI audio driver needs to reset the pin and set up the audio infoframe again. This patch achieves it by: - keep the current status of channels and info frame setup in per_pin struct, - check the reconnection in the intrinsic event handler, - reset the pin and the re-invoke hdmi_setup_audio_infoframe() accordingly. The hdmi_setup_audio_infoframe() function has been changed, too, so that it can be invoked without passing the substream instance. The patch is mostly based on the work by Mengdong Lin. Cc: Mengdong Lin Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_hdmi.c | 41 ++++++++++++++++++++++++++++---------- 1 file changed, 31 insertions(+), 10 deletions(-) diff --git a/sound/pci/hda/patch_hdmi.c b/sound/pci/hda/patch_hdmi.c index 5bc419452198..83c1ad54ae75 100644 --- a/sound/pci/hda/patch_hdmi.c +++ b/sound/pci/hda/patch_hdmi.c @@ -67,6 +67,8 @@ struct hdmi_spec_per_pin { struct delayed_work work; struct snd_kcontrol *eld_ctl; int repoll_count; + bool setup; /* the stream has been set up by prepare callback */ + int channels; /* current number of channels */ bool non_pcm; bool chmap_set; /* channel-map override by ALSA API? */ unsigned char chmap[8]; /* ALSA API channel-map */ @@ -868,18 +870,19 @@ static bool hdmi_infoframe_uptodate(struct hda_codec *codec, hda_nid_t pin_nid, return true; } -static void hdmi_setup_audio_infoframe(struct hda_codec *codec, int pin_idx, - bool non_pcm, - struct snd_pcm_substream *substream) +static void hdmi_setup_audio_infoframe(struct hda_codec *codec, + struct hdmi_spec_per_pin *per_pin, + bool non_pcm) { - struct hdmi_spec *spec = codec->spec; - struct hdmi_spec_per_pin *per_pin = get_pin(spec, pin_idx); hda_nid_t pin_nid = per_pin->pin_nid; - int channels = substream->runtime->channels; + int channels = per_pin->channels; struct hdmi_eld *eld; int ca; union audio_infoframe ai; + if (!channels) + return; + eld = &per_pin->sink_eld; if (!eld->monitor_present) return; @@ -1263,6 +1266,7 @@ static void hdmi_present_sense(struct hdmi_spec_per_pin *per_pin, int repoll) eld_changed = true; } if (update_eld) { + bool old_eld_valid = pin_eld->eld_valid; pin_eld->eld_valid = eld->eld_valid; eld_changed = pin_eld->eld_size != eld->eld_size || memcmp(pin_eld->eld_buffer, eld->eld_buffer, @@ -1272,6 +1276,18 @@ static void hdmi_present_sense(struct hdmi_spec_per_pin *per_pin, int repoll) eld->eld_size); pin_eld->eld_size = eld->eld_size; pin_eld->info = eld->info; + + /* Haswell-specific workaround: re-setup when the transcoder is + * changed during the stream playback + */ + if (codec->vendor_id == 0x80862807 && + eld->eld_valid && !old_eld_valid && per_pin->setup) { + snd_hda_codec_write(codec, pin_nid, 0, + AC_VERB_SET_AMP_GAIN_MUTE, + AMP_OUT_UNMUTE); + hdmi_setup_audio_infoframe(codec, per_pin, + per_pin->non_pcm); + } } mutex_unlock(&pin_eld->lock); @@ -1444,14 +1460,17 @@ static int generic_hdmi_playback_pcm_prepare(struct hda_pcm_stream *hinfo, hda_nid_t cvt_nid = hinfo->nid; struct hdmi_spec *spec = codec->spec; int pin_idx = hinfo_to_pin_index(spec, hinfo); - hda_nid_t pin_nid = get_pin(spec, pin_idx)->pin_nid; + struct hdmi_spec_per_pin *per_pin = get_pin(spec, pin_idx); + hda_nid_t pin_nid = per_pin->pin_nid; bool non_pcm; non_pcm = check_non_pcm_per_cvt(codec, cvt_nid); + per_pin->channels = substream->runtime->channels; + per_pin->setup = true; hdmi_set_channel_count(codec, cvt_nid, substream->runtime->channels); - hdmi_setup_audio_infoframe(codec, pin_idx, non_pcm, substream); + hdmi_setup_audio_infoframe(codec, per_pin, non_pcm); return hdmi_setup_stream(codec, cvt_nid, pin_nid, stream_tag, format); } @@ -1491,6 +1510,9 @@ static int hdmi_pcm_close(struct hda_pcm_stream *hinfo, snd_hda_spdif_ctls_unassign(codec, pin_idx); per_pin->chmap_set = false; memset(per_pin->chmap, 0, sizeof(per_pin->chmap)); + + per_pin->setup = false; + per_pin->channels = 0; } return 0; @@ -1626,8 +1648,7 @@ static int hdmi_chmap_ctl_put(struct snd_kcontrol *kcontrol, per_pin->chmap_set = true; memcpy(per_pin->chmap, chmap, sizeof(chmap)); if (prepared) - hdmi_setup_audio_infoframe(codec, pin_idx, per_pin->non_pcm, - substream); + hdmi_setup_audio_infoframe(codec, per_pin, per_pin->non_pcm); return 0; } From 686edbc09d22070e8ff3edaf7a6f53f80b4517b0 Mon Sep 17 00:00:00 2001 From: Anssi Hannula Date: Sun, 1 Sep 2013 14:36:47 +0300 Subject: [PATCH 0801/1368] ALSA: hda - hdmi: Fallback to ALSA allocation when selecting CA MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 18e391862cceaf43ddb8eb5cca05e1a83abdebaa upstream. hdmi_channel_allocation() tries to find a HDMI channel allocation that matches the number channels in the playback stream and contains only speakers that the HDMI sink has reported as available via EDID. If no such allocation is found, 0 (stereo audio) is used. Using CA 0 causes the audio causes the sink to discard everything except the first two channels (front left and front right). However, the sink may be capable of receiving more channels than it has speakers (and then perform downmix or discard the extra channels), in which case it is preferable to use a CA that contains extra channels than to use CA 0 which discards all the non-stereo channels. Additionally, it seems that HBR (HD) passthrough output does not work on Intel HDMI codecs when CA is set to 0 (possibly the codec zeroes channels not present in CA). This happens with all receivers that report a 5.1 speaker mask since a HBR stream is carried on 8 channels to the codec. Add a fallback in the CA selection so that the CA channel count at least matches the stream channel count, even if the stream contains channels not present in the sink speaker descriptor. Thanks to GrimGriefer at OpenELEC forums for discovering that changing the sink speaker mask allowed HBR output. Reported-by: GrimGriefer Reported-by: Ashecrow Reported-by: Frank Zafka Reported-by: Peter Frühberger Signed-off-by: Anssi Hannula Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_hdmi.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/sound/pci/hda/patch_hdmi.c b/sound/pci/hda/patch_hdmi.c index 83c1ad54ae75..b5375ed022d7 100644 --- a/sound/pci/hda/patch_hdmi.c +++ b/sound/pci/hda/patch_hdmi.c @@ -553,6 +553,17 @@ static int hdmi_channel_allocation(struct hdmi_eld *eld, int channels) } } + if (!ca) { + /* if there was no match, select the regular ALSA channel + * allocation with the matching number of channels */ + for (i = 0; i < ARRAY_SIZE(channel_allocations); i++) { + if (channels == channel_allocations[i].channels) { + ca = channel_allocations[i].ca_index; + break; + } + } + } + snd_print_channel_allocation(eld->info.spk_alloc, buf, sizeof(buf)); snd_printdd("HDMI: select CA 0x%x for %d-channel allocation: %s\n", ca, channels, buf); From ff70cfafb27ead6e06ba3d2e2f648fee7137d9e8 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Mon, 9 Sep 2013 10:20:48 +0200 Subject: [PATCH 0802/1368] ALSA: hda - Add Toshiba Satellite C870 to MSI blacklist commit 83f72151352791836a1b9c1542614cc9bf71ac61 upstream. Toshiba Satellite C870 shows interrupt problems occasionally when certain mixer controls like "Mic Switch" is toggled. This seems worked around by not using MSI. Bugzilla: https://bugzilla.novell.com/show_bug.cgi?id=833585 Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/hda_intel.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c index de18722c4873..624e6c044a44 100644 --- a/sound/pci/hda/hda_intel.c +++ b/sound/pci/hda/hda_intel.c @@ -3335,6 +3335,7 @@ static struct snd_pci_quirk msi_black_list[] = { SND_PCI_QUIRK(0x1043, 0x81f2, "ASUS", 0), /* Athlon64 X2 + nvidia */ SND_PCI_QUIRK(0x1043, 0x81f6, "ASUS", 0), /* nvidia */ SND_PCI_QUIRK(0x1043, 0x822d, "ASUS", 0), /* Athlon64 X2 + nvidia MCP55 */ + SND_PCI_QUIRK(0x1179, 0xfb44, "Toshiba Satellite C870", 0), /* AMD Hudson */ SND_PCI_QUIRK(0x1849, 0x0888, "ASRock", 0), /* Athlon64 X2 + nvidia */ SND_PCI_QUIRK(0xa0a0, 0x0575, "Aopen MZ915-M", 0), /* ICH6 */ {} From 25a870d4dc930531f545aa1ae0cdca281d99f980 Mon Sep 17 00:00:00 2001 From: Boris BREZILLON Date: Tue, 27 Aug 2013 15:19:21 +0200 Subject: [PATCH 0803/1368] pinctrl: at91: fix get_pullup/down function return commit 05d3534a321d7fe4524b3b83bb20318282f3ec2c upstream. In PIO_PUSR and PIO_PPDSR register if a given bit is set 1 this means the pullup/down for this pin (pin is represented as a bit position) is disabled. Signed-off-by: Boris BREZILLON Acked-by: Nicolas Ferre Signed-off-by: Linus Walleij Signed-off-by: Greg Kroah-Hartman --- drivers/pinctrl/pinctrl-at91.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/pinctrl/pinctrl-at91.c b/drivers/pinctrl/pinctrl-at91.c index 5d7529ed5392..314e5e8e9122 100644 --- a/drivers/pinctrl/pinctrl-at91.c +++ b/drivers/pinctrl/pinctrl-at91.c @@ -325,7 +325,7 @@ static void at91_mux_disable_interrupt(void __iomem *pio, unsigned mask) static unsigned at91_mux_get_pullup(void __iomem *pio, unsigned pin) { - return (readl_relaxed(pio + PIO_PUSR) >> pin) & 0x1; + return !((readl_relaxed(pio + PIO_PUSR) >> pin) & 0x1); } static void at91_mux_set_pullup(void __iomem *pio, unsigned mask, bool on) @@ -445,7 +445,7 @@ static void at91_mux_pio3_set_debounce(void __iomem *pio, unsigned mask, static bool at91_mux_pio3_get_pulldown(void __iomem *pio, unsigned pin) { - return (__raw_readl(pio + PIO_PPDSR) >> pin) & 0x1; + return !((__raw_readl(pio + PIO_PPDSR) >> pin) & 0x1); } static void at91_mux_pio3_set_pulldown(void __iomem *pio, unsigned mask, bool is_on) From 263c784f2b66e18e34208a2f0e56df65c039d918 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Sat, 17 Aug 2013 10:07:17 -0400 Subject: [PATCH 0804/1368] ext4: simplify truncation code in ext4_setattr() commit 5208386c501276df18fee464e21d3c58d2d79517 upstream. Merge conditions in ext4_setattr() handling inode size changes, also move ext4_begin_ordered_truncate() call somewhat earlier because it simplifies error recovery in case of failure. Also add error handling in case i_disksize update fails. Signed-off-by: Jan Kara Signed-off-by: "Theodore Ts'o" Signed-off-by: Greg Kroah-Hartman --- fs/ext4/inode.c | 109 ++++++++++++++++++++++-------------------------- 1 file changed, 49 insertions(+), 60 deletions(-) diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index ccbfbbb12dc5..904ca1a21dce 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -4706,7 +4706,9 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr) ext4_journal_stop(handle); } - if (attr->ia_valid & ATTR_SIZE) { + if (attr->ia_valid & ATTR_SIZE && attr->ia_size != inode->i_size) { + handle_t *handle; + loff_t oldsize = inode->i_size; if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) { struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); @@ -4714,73 +4716,60 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr) if (attr->ia_size > sbi->s_bitmap_maxbytes) return -EFBIG; } - } - - if (S_ISREG(inode->i_mode) && - attr->ia_valid & ATTR_SIZE && - (attr->ia_size < inode->i_size)) { - handle_t *handle; - - handle = ext4_journal_start(inode, EXT4_HT_INODE, 3); - if (IS_ERR(handle)) { - error = PTR_ERR(handle); - goto err_out; - } - if (ext4_handle_valid(handle)) { - error = ext4_orphan_add(handle, inode); - orphan = 1; - } - EXT4_I(inode)->i_disksize = attr->ia_size; - rc = ext4_mark_inode_dirty(handle, inode); - if (!error) - error = rc; - ext4_journal_stop(handle); - - if (ext4_should_order_data(inode)) { - error = ext4_begin_ordered_truncate(inode, + if (S_ISREG(inode->i_mode) && + (attr->ia_size < inode->i_size)) { + if (ext4_should_order_data(inode)) { + error = ext4_begin_ordered_truncate(inode, attr->ia_size); - if (error) { - /* Do as much error cleanup as possible */ - handle = ext4_journal_start(inode, - EXT4_HT_INODE, 3); - if (IS_ERR(handle)) { - ext4_orphan_del(NULL, inode); + if (error) goto err_out; - } - ext4_orphan_del(handle, inode); - orphan = 0; - ext4_journal_stop(handle); + } + handle = ext4_journal_start(inode, EXT4_HT_INODE, 3); + if (IS_ERR(handle)) { + error = PTR_ERR(handle); + goto err_out; + } + if (ext4_handle_valid(handle)) { + error = ext4_orphan_add(handle, inode); + orphan = 1; + } + EXT4_I(inode)->i_disksize = attr->ia_size; + rc = ext4_mark_inode_dirty(handle, inode); + if (!error) + error = rc; + ext4_journal_stop(handle); + if (error) { + ext4_orphan_del(NULL, inode); goto err_out; } } - } - if (attr->ia_valid & ATTR_SIZE) { - if (attr->ia_size != inode->i_size) { - loff_t oldsize = inode->i_size; - - i_size_write(inode, attr->ia_size); - /* - * Blocks are going to be removed from the inode. Wait - * for dio in flight. Temporarily disable - * dioread_nolock to prevent livelock. - */ - if (orphan) { - if (!ext4_should_journal_data(inode)) { - ext4_inode_block_unlocked_dio(inode); - inode_dio_wait(inode); - ext4_inode_resume_unlocked_dio(inode); - } else - ext4_wait_for_tail_page_commit(inode); - } - /* - * Truncate pagecache after we've waited for commit - * in data=journal mode to make pages freeable. - */ - truncate_pagecache(inode, oldsize, inode->i_size); + i_size_write(inode, attr->ia_size); + /* + * Blocks are going to be removed from the inode. Wait + * for dio in flight. Temporarily disable + * dioread_nolock to prevent livelock. + */ + if (orphan) { + if (!ext4_should_journal_data(inode)) { + ext4_inode_block_unlocked_dio(inode); + inode_dio_wait(inode); + ext4_inode_resume_unlocked_dio(inode); + } else + ext4_wait_for_tail_page_commit(inode); } - ext4_truncate(inode); + /* + * Truncate pagecache after we've waited for commit + * in data=journal mode to make pages freeable. + */ + truncate_pagecache(inode, oldsize, inode->i_size); } + /* + * We want to call ext4_truncate() even if attr->ia_size == + * inode->i_size for cases like truncation of fallocated space + */ + if (attr->ia_valid & ATTR_SIZE) + ext4_truncate(inode); if (!rc) { setattr_copy(inode, attr); From d31a13c733d14114da0c7f6cf01396a6a2e1ccb2 Mon Sep 17 00:00:00 2001 From: "John W. Linville" Date: Fri, 9 Aug 2013 13:36:21 -0400 Subject: [PATCH 0805/1368] brcmsmac: Fix WARNING caused by lack of calls to dma_mapping_error() commit 67d0cf50bd32b66eab709871714e55725ee30ce4 upstream. The driver fails to check the results of DMA mapping in twp places, which results in the following warning: [ 28.078515] ------------[ cut here ]------------ [ 28.078529] WARNING: at lib/dma-debug.c:937 check_unmap+0x47e/0x930() [ 28.078533] bcma-pci-bridge 0000:0e:00.0: DMA-API: device driver failed to check map error[device address=0x00000000b5d60d6c] [size=1876 bytes] [mapped as single] [ 28.078536] Modules linked in: bnep bluetooth vboxpci(O) vboxnetadp(O) vboxnetflt(O) vboxdrv(O) ipv6 b43 brcmsmac rtl8192cu rtl8192c_common rtlwifi mac802 11 brcmutil cfg80211 snd_hda_codec_conexant rng_core snd_hda_intel kvm_amd snd_hda_codec ssb kvm mmc_core snd_pcm snd_seq snd_timer snd_seq_device snd k8temp cordic joydev serio_raw hwmon sr_mod sg pcmcia pcmcia_core soundcore cdrom i2c_nforce2 i2c_core forcedeth bcma snd_page_alloc autofs4 ext4 jbd2 mbcache crc1 6 scsi_dh_alua scsi_dh_hp_sw scsi_dh_rdac scsi_dh_emc scsi_dh ata_generic pata_amd [ 28.078602] CPU: 1 PID: 2570 Comm: NetworkManager Tainted: G O 3.10.0-rc7-wl+ #42 [ 28.078605] Hardware name: Hewlett-Packard HP Pavilion dv2700 Notebook PC/30D6, BIOS F.27 11/27/2008 [ 28.078607] 0000000000000009 ffff8800bbb03ad8 ffffffff8144f898 ffff8800bbb03b18 [ 28.078612] ffffffff8103e1eb 0000000000000002 ffff8800b719f480 ffff8800b7b9c010 [ 28.078617] ffffffff824204c0 ffffffff81754d57 0000000000000754 ffff8800bbb03b78 [ 28.078622] Call Trace: [ 28.078624] [] dump_stack+0x19/0x1b [ 28.078634] [] warn_slowpath_common+0x6b/0xa0 [ 28.078638] [] warn_slowpath_fmt+0x41/0x50 [ 28.078650] [] check_unmap+0x47e/0x930 [ 28.078655] [] debug_dma_unmap_page+0x5c/0x70 [ 28.078679] [] dma64_getnextrxp+0x10c/0x190 [brcmsmac] [ 28.078691] [] dma_rx+0x62/0x240 [brcmsmac] [ 28.078707] [] brcms_c_dpc+0x211/0x9d0 [brcmsmac] [ 28.078717] [] ? brcms_dpc+0x27/0xf0 [brcmsmac] [ 28.078731] [] brcms_dpc+0x47/0xf0 [brcmsmac] [ 28.078736] [] tasklet_action+0x6c/0xf0 --snip-- [ 28.078974] [] SyS_sendmsg+0xd/0x20 [ 28.078979] [] tracesys+0xdd/0xe2 [ 28.078982] ---[ end trace 6164d1a08148e9c8 ]--- [ 28.078984] Mapped at: [ 28.078985] [] debug_dma_map_page+0x9d/0x150 [ 28.078989] [] dma_rxfill+0x102/0x3d0 [brcmsmac] [ 28.079001] [] brcms_c_init+0x87d/0x1100 [brcmsmac] [ 28.079010] [] brcms_init+0x21/0x30 [brcmsmac] [ 28.079018] [] brcms_c_up+0x150/0x430 [brcmsmac] As the patch adds a new failure mechanism to dma_rxfill(). When I changed the comment at the start of the routine to add that information, I also polished the wording. Signed-off-by: Larry Finger Cc: Brett Rudley Cc: Franky (Zhenhui) Lin Cc: Hante Meuleman Cc: brcm80211-dev-list@broadcom.com Acked-by: Arend van Spriel Signed-off-by: John W. Linville Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/brcm80211/brcmsmac/dma.c | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/drivers/net/wireless/brcm80211/brcmsmac/dma.c b/drivers/net/wireless/brcm80211/brcmsmac/dma.c index 1860c572b3c4..4fb9635d3919 100644 --- a/drivers/net/wireless/brcm80211/brcmsmac/dma.c +++ b/drivers/net/wireless/brcm80211/brcmsmac/dma.c @@ -1015,9 +1015,10 @@ static bool dma64_txidle(struct dma_info *di) /* * post receive buffers - * return false is refill failed completely and ring is empty this will stall - * the rx dma and user might want to call rxfill again asap. This unlikely - * happens on memory-rich NIC, but often on memory-constrained dongle + * Return false if refill failed completely or dma mapping failed. The ring + * is empty, which will stall the rx dma and user might want to call rxfill + * again asap. This is unlikely to happen on a memory-rich NIC, but often on + * memory-constrained dongle. */ bool dma_rxfill(struct dma_pub *pub) { @@ -1078,6 +1079,8 @@ bool dma_rxfill(struct dma_pub *pub) pa = dma_map_single(di->dmadev, p->data, di->rxbufsize, DMA_FROM_DEVICE); + if (dma_mapping_error(di->dmadev, pa)) + return false; /* save the free packet pointer */ di->rxp[rxout] = p; @@ -1284,7 +1287,11 @@ static void dma_txenq(struct dma_info *di, struct sk_buff *p) /* get physical address of buffer start */ pa = dma_map_single(di->dmadev, data, len, DMA_TO_DEVICE); - + /* if mapping failed, free skb */ + if (dma_mapping_error(di->dmadev, pa)) { + brcmu_pkt_buf_free_skb(p); + return; + } /* With a DMA segment list, Descriptor table is filled * using the segment list instead of looping over * buffers in multi-chain DMA. Therefore, EOF for SGLIST From 31f34c79a7e8fb75c7bf7d61d56fc8eeff2a7886 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Tue, 6 Aug 2013 14:18:10 +0200 Subject: [PATCH 0806/1368] ath9k: always clear ps filter bit on new assoc commit 026d5b07c03458f9c0ccd19c3850564a5409c325 upstream. Otherwise in some cases, EAPOL frames might be filtered during the initial handshake, causing delays and assoc failures. Signed-off-by: Felix Fietkau Signed-off-by: John W. Linville Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/ath/ath9k/xmit.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/wireless/ath/ath9k/xmit.c b/drivers/net/wireless/ath/ath9k/xmit.c index 83ab6be3fe6d..e752f5d4995d 100644 --- a/drivers/net/wireless/ath/ath9k/xmit.c +++ b/drivers/net/wireless/ath/ath9k/xmit.c @@ -2387,6 +2387,7 @@ void ath_tx_node_init(struct ath_softc *sc, struct ath_node *an) for (acno = 0, ac = &an->ac[acno]; acno < IEEE80211_NUM_ACS; acno++, ac++) { ac->sched = false; + ac->clear_ps_filter = true; ac->txq = sc->tx.txq_map[acno]; INIT_LIST_HEAD(&ac->tid_q); } From 8e4d4c932d23091953185297d40315e8ba76837f Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Sat, 10 Aug 2013 15:59:15 +0200 Subject: [PATCH 0807/1368] ath9k: fix rx descriptor related race condition commit e96542e55a2aacf4bdeccfe2f17b77c4895b4df2 upstream. Similar to a race condition that exists in the tx path, the hardware might re-read the 'next' pointer of a descriptor of the last completed frame. This only affects non-EDMA (pre-AR93xx) devices. To deal with this race, defer clearing and re-linking a completed rx descriptor until the next one has been processed. Signed-off-by: Felix Fietkau Signed-off-by: John W. Linville Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/ath/ath9k/ath9k.h | 5 +---- drivers/net/wireless/ath/ath9k/recv.c | 17 +++++++++++++---- 2 files changed, 14 insertions(+), 8 deletions(-) diff --git a/drivers/net/wireless/ath/ath9k/ath9k.h b/drivers/net/wireless/ath/ath9k/ath9k.h index 42b03dc39d14..4ebd9fd8a0a4 100644 --- a/drivers/net/wireless/ath/ath9k/ath9k.h +++ b/drivers/net/wireless/ath/ath9k/ath9k.h @@ -79,10 +79,6 @@ struct ath_config { sizeof(struct ath_buf_state)); \ } while (0) -#define ATH_RXBUF_RESET(_bf) do { \ - (_bf)->bf_stale = false; \ - } while (0) - /** * enum buffer_type - Buffer type flags * @@ -316,6 +312,7 @@ struct ath_rx { struct ath_descdma rxdma; struct ath_rx_edma rx_edma[ATH9K_RX_QUEUE_MAX]; + struct ath_buf *buf_hold; struct sk_buff *frag; u32 ampdu_ref; diff --git a/drivers/net/wireless/ath/ath9k/recv.c b/drivers/net/wireless/ath/ath9k/recv.c index 8be2b5d8c155..f53dbd1133ce 100644 --- a/drivers/net/wireless/ath/ath9k/recv.c +++ b/drivers/net/wireless/ath/ath9k/recv.c @@ -42,8 +42,6 @@ static void ath_rx_buf_link(struct ath_softc *sc, struct ath_buf *bf) struct ath_desc *ds; struct sk_buff *skb; - ATH_RXBUF_RESET(bf); - ds = bf->bf_desc; ds->ds_link = 0; /* link to null */ ds->ds_data = bf->bf_buf_addr; @@ -70,6 +68,14 @@ static void ath_rx_buf_link(struct ath_softc *sc, struct ath_buf *bf) sc->rx.rxlink = &ds->ds_link; } +static void ath_rx_buf_relink(struct ath_softc *sc, struct ath_buf *bf) +{ + if (sc->rx.buf_hold) + ath_rx_buf_link(sc, sc->rx.buf_hold); + + sc->rx.buf_hold = bf; +} + static void ath_setdefantenna(struct ath_softc *sc, u32 antenna) { /* XXX block beacon interrupts */ @@ -117,7 +123,6 @@ static bool ath_rx_edma_buf_link(struct ath_softc *sc, skb = bf->bf_mpdu; - ATH_RXBUF_RESET(bf); memset(skb->data, 0, ah->caps.rx_status_len); dma_sync_single_for_device(sc->dev, bf->bf_buf_addr, ah->caps.rx_status_len, DMA_TO_DEVICE); @@ -432,6 +437,7 @@ int ath_startrecv(struct ath_softc *sc) if (list_empty(&sc->rx.rxbuf)) goto start_recv; + sc->rx.buf_hold = NULL; sc->rx.rxlink = NULL; list_for_each_entry_safe(bf, tbf, &sc->rx.rxbuf, list) { ath_rx_buf_link(sc, bf); @@ -677,6 +683,9 @@ static struct ath_buf *ath_get_next_rx_buf(struct ath_softc *sc, } bf = list_first_entry(&sc->rx.rxbuf, struct ath_buf, list); + if (bf == sc->rx.buf_hold) + return NULL; + ds = bf->bf_desc; /* @@ -1378,7 +1387,7 @@ int ath_rx_tasklet(struct ath_softc *sc, int flush, bool hp) if (edma) { ath_rx_edma_buf_link(sc, qtype); } else { - ath_rx_buf_link(sc, bf); + ath_rx_buf_relink(sc, bf); ath9k_hw_rxena(ah); } } while (1); From 4bd13a76d67743f11e4f05c60340a333f54bb92b Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Tue, 13 Aug 2013 12:33:28 +0200 Subject: [PATCH 0808/1368] ath9k: avoid accessing MRC registers on single-chain devices commit a1c781bb20ac1e03280e420abd47a99eb8bbdd3b upstream. They are not implemented, and accessing them might trigger errors Signed-off-by: Felix Fietkau Signed-off-by: John W. Linville Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/ath/ath9k/ar9003_phy.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/wireless/ath/ath9k/ar9003_phy.c b/drivers/net/wireless/ath/ath9k/ar9003_phy.c index e1714d7c9eeb..3457ca5382f4 100644 --- a/drivers/net/wireless/ath/ath9k/ar9003_phy.c +++ b/drivers/net/wireless/ath/ath9k/ar9003_phy.c @@ -1076,6 +1076,10 @@ static bool ar9003_hw_ani_control(struct ath_hw *ah, * is_on == 0 means MRC CCK is OFF (more noise imm) */ bool is_on = param ? 1 : 0; + + if (ah->caps.rx_chainmask == 1) + break; + REG_RMW_FIELD(ah, AR_PHY_MRC_CCK_CTRL, AR_PHY_MRC_CCK_ENABLE, is_on); REG_RMW_FIELD(ah, AR_PHY_MRC_CCK_CTRL, From 32a190b73789d7d26846ca52d4d7eaacbf9ad6b7 Mon Sep 17 00:00:00 2001 From: Henrik Rydberg Date: Sun, 1 Sep 2013 15:31:44 +0200 Subject: [PATCH 0809/1368] HID: Correct the USB IDs for the new Macbook Air 6 commit 8c89cc17b91992845bd635813cd162fe8dfcec6e upstream. A recent patch (9d9a04ee) added support for the new machine, but got the sequence of USB ids wrong. Reports from both Ian and Linus T show that the 0x0291 id is for ISO, not ANSI, which should have the missing number 0x0290. This patchs moves the three numbers accordingly, fixing the problem. Reported-and-tested-by: Ian Munsie Tested-by: Linus G Thiel Signed-off-by: Henrik Rydberg Acked-by: Dmitry Torokhov Signed-off-by: Jiri Kosina Signed-off-by: Greg Kroah-Hartman --- drivers/hid/hid-ids.h | 6 +++--- drivers/input/mouse/bcm5974.c | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h index 216888538c50..2a6d15d00e64 100644 --- a/drivers/hid/hid-ids.h +++ b/drivers/hid/hid-ids.h @@ -135,9 +135,9 @@ #define USB_DEVICE_ID_APPLE_ALU_WIRELESS_2009_JIS 0x023b #define USB_DEVICE_ID_APPLE_ALU_WIRELESS_2011_ANSI 0x0255 #define USB_DEVICE_ID_APPLE_ALU_WIRELESS_2011_ISO 0x0256 -#define USB_DEVICE_ID_APPLE_WELLSPRING8_ANSI 0x0291 -#define USB_DEVICE_ID_APPLE_WELLSPRING8_ISO 0x0292 -#define USB_DEVICE_ID_APPLE_WELLSPRING8_JIS 0x0293 +#define USB_DEVICE_ID_APPLE_WELLSPRING8_ANSI 0x0290 +#define USB_DEVICE_ID_APPLE_WELLSPRING8_ISO 0x0291 +#define USB_DEVICE_ID_APPLE_WELLSPRING8_JIS 0x0292 #define USB_DEVICE_ID_APPLE_FOUNTAIN_TP_ONLY 0x030a #define USB_DEVICE_ID_APPLE_GEYSER1_TP_ONLY 0x030b #define USB_DEVICE_ID_APPLE_IRCONTROL 0x8240 diff --git a/drivers/input/mouse/bcm5974.c b/drivers/input/mouse/bcm5974.c index 4ef4d5e198ae..a73f9618b0ad 100644 --- a/drivers/input/mouse/bcm5974.c +++ b/drivers/input/mouse/bcm5974.c @@ -89,9 +89,9 @@ #define USB_DEVICE_ID_APPLE_WELLSPRING7A_ISO 0x025a #define USB_DEVICE_ID_APPLE_WELLSPRING7A_JIS 0x025b /* MacbookAir6,2 (unibody, June 2013) */ -#define USB_DEVICE_ID_APPLE_WELLSPRING8_ANSI 0x0291 -#define USB_DEVICE_ID_APPLE_WELLSPRING8_ISO 0x0292 -#define USB_DEVICE_ID_APPLE_WELLSPRING8_JIS 0x0293 +#define USB_DEVICE_ID_APPLE_WELLSPRING8_ANSI 0x0290 +#define USB_DEVICE_ID_APPLE_WELLSPRING8_ISO 0x0291 +#define USB_DEVICE_ID_APPLE_WELLSPRING8_JIS 0x0292 #define BCM5974_DEVICE(prod) { \ .match_flags = (USB_DEVICE_ID_MATCH_DEVICE | \ From 769eea247c41519e72675c72d0da8d95fd6b7c09 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Wed, 28 Aug 2013 22:30:49 +0200 Subject: [PATCH 0810/1368] HID: pantherlord: validate output report details commit 412f30105ec6735224535791eed5cdc02888ecb4 upstream. A HID device could send a malicious output report that would cause the pantherlord HID driver to write beyond the output report allocation during initialization, causing a heap overflow: [ 310.939483] usb 1-1: New USB device found, idVendor=0e8f, idProduct=0003 ... [ 315.980774] BUG kmalloc-192 (Tainted: G W ): Redzone overwritten CVE-2013-2892 Signed-off-by: Kees Cook Signed-off-by: Jiri Kosina Signed-off-by: Greg Kroah-Hartman --- drivers/hid/hid-pl.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/hid/hid-pl.c b/drivers/hid/hid-pl.c index d29112fa5cd5..2dcd7d98dbd6 100644 --- a/drivers/hid/hid-pl.c +++ b/drivers/hid/hid-pl.c @@ -132,8 +132,14 @@ static int plff_init(struct hid_device *hid) strong = &report->field[0]->value[2]; weak = &report->field[0]->value[3]; debug("detected single-field device"); - } else if (report->maxfield >= 4 && report->field[0]->maxusage == 1 && - report->field[0]->usage[0].hid == (HID_UP_LED | 0x43)) { + } else if (report->field[0]->maxusage == 1 && + report->field[0]->usage[0].hid == + (HID_UP_LED | 0x43) && + report->maxfield >= 4 && + report->field[0]->report_count >= 1 && + report->field[1]->report_count >= 1 && + report->field[2]->report_count >= 1 && + report->field[3]->report_count >= 1) { report->field[0]->value[0] = 0x00; report->field[1]->value[0] = 0x00; strong = &report->field[2]->value[0]; From 469e7f80f25f79a4c11bed47027da81fcf30d045 Mon Sep 17 00:00:00 2001 From: Stefan Kriwanek Date: Sun, 25 Aug 2013 10:46:13 +0200 Subject: [PATCH 0811/1368] HID: Fix Speedlink VAD Cezanne support for some devices commit 06bb5219118fb098f4b0c7dcb484b28a52bf1c14 upstream. Some devices of the "Speedlink VAD Cezanne" model need more aggressive fixing than already done. I made sure through testing that this patch would not interfere with the proper working of a device that is bug-free. (The driver drops EV_REL events with abs(val) >= 256, which are not achievable even on the highest laser resolution hardware setting.) Signed-off-by: Stefan Kriwanek Signed-off-by: Jiri Kosina Signed-off-by: Greg Kroah-Hartman --- drivers/hid/hid-speedlink.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/drivers/hid/hid-speedlink.c b/drivers/hid/hid-speedlink.c index a2f587d004e1..7112f3e832ee 100644 --- a/drivers/hid/hid-speedlink.c +++ b/drivers/hid/hid-speedlink.c @@ -3,7 +3,7 @@ * Fixes "jumpy" cursor and removes nonexistent keyboard LEDS from * the HID descriptor. * - * Copyright (c) 2011 Stefan Kriwanek + * Copyright (c) 2011, 2013 Stefan Kriwanek */ /* @@ -46,8 +46,13 @@ static int speedlink_event(struct hid_device *hdev, struct hid_field *field, struct hid_usage *usage, __s32 value) { /* No other conditions due to usage_table. */ - /* Fix "jumpy" cursor (invalid events sent by device). */ - if (value == 256) + + /* This fixes the "jumpy" cursor occuring due to invalid events sent + * by the device. Some devices only send them with value==+256, others + * don't. However, catching abs(value)>=256 is restrictive enough not + * to interfere with devices that were bug-free (has been tested). + */ + if (abs(value) >= 256) return 1; /* Drop useless distance 0 events (on button clicks etc.) as well */ if (value == 0) From a3957df756ccf3a46c24c8e2d4f8b26c932357b3 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Wed, 28 Aug 2013 22:31:44 +0200 Subject: [PATCH 0812/1368] HID: sensor-hub: validate feature report details commit 9e8910257397372633e74b333ef891f20c800ee4 upstream. A HID device could send a malicious feature report that would cause the sensor-hub HID driver to read past the end of heap allocation, leaking kernel memory contents to the caller. CVE-2013-2898 Signed-off-by: Kees Cook Reviewed-by: Mika Westerberg Signed-off-by: Jiri Kosina Signed-off-by: Greg Kroah-Hartman --- drivers/hid/hid-sensor-hub.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/hid/hid-sensor-hub.c b/drivers/hid/hid-sensor-hub.c index ca7498107327..aa34755ca205 100644 --- a/drivers/hid/hid-sensor-hub.c +++ b/drivers/hid/hid-sensor-hub.c @@ -221,7 +221,8 @@ int sensor_hub_get_feature(struct hid_sensor_hub_device *hsdev, u32 report_id, mutex_lock(&data->mutex); report = sensor_hub_report(report_id, hsdev->hdev, HID_FEATURE_REPORT); - if (!report || (field_index >= report->maxfield)) { + if (!report || (field_index >= report->maxfield) || + report->field[field_index]->report_count < 1) { ret = -EINVAL; goto done_proc; } From 56085cec9ef90f5133effbcf71149d19ea2f5170 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Wed, 28 Aug 2013 22:29:55 +0200 Subject: [PATCH 0813/1368] HID: validate HID report id size commit 43622021d2e2b82ea03d883926605bdd0525e1d1 upstream. The "Report ID" field of a HID report is used to build indexes of reports. The kernel's index of these is limited to 256 entries, so any malicious device that sets a Report ID greater than 255 will trigger memory corruption on the host: [ 1347.156239] BUG: unable to handle kernel paging request at ffff88094958a878 [ 1347.156261] IP: [] hid_register_report+0x2a/0x8b CVE-2013-2888 Signed-off-by: Kees Cook Signed-off-by: Jiri Kosina Signed-off-by: Greg Kroah-Hartman --- drivers/hid/hid-core.c | 10 +++++++--- include/linux/hid.h | 4 +++- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/drivers/hid/hid-core.c b/drivers/hid/hid-core.c index 402f48689943..eb202481fb09 100644 --- a/drivers/hid/hid-core.c +++ b/drivers/hid/hid-core.c @@ -63,6 +63,8 @@ struct hid_report *hid_register_report(struct hid_device *device, unsigned type, struct hid_report_enum *report_enum = device->report_enum + type; struct hid_report *report; + if (id >= HID_MAX_IDS) + return NULL; if (report_enum->report_id_hash[id]) return report_enum->report_id_hash[id]; @@ -404,8 +406,10 @@ static int hid_parser_global(struct hid_parser *parser, struct hid_item *item) case HID_GLOBAL_ITEM_TAG_REPORT_ID: parser->global.report_id = item_udata(item); - if (parser->global.report_id == 0) { - hid_err(parser->device, "report_id 0 is invalid\n"); + if (parser->global.report_id == 0 || + parser->global.report_id >= HID_MAX_IDS) { + hid_err(parser->device, "report_id %u is invalid\n", + parser->global.report_id); return -1; } return 0; @@ -575,7 +579,7 @@ static void hid_close_report(struct hid_device *device) for (i = 0; i < HID_REPORT_TYPES; i++) { struct hid_report_enum *report_enum = device->report_enum + i; - for (j = 0; j < 256; j++) { + for (j = 0; j < HID_MAX_IDS; j++) { struct hid_report *report = report_enum->report_id_hash[j]; if (report) hid_free_report(report); diff --git a/include/linux/hid.h b/include/linux/hid.h index 0c48991b0402..ff545cc33c3a 100644 --- a/include/linux/hid.h +++ b/include/linux/hid.h @@ -393,10 +393,12 @@ struct hid_report { struct hid_device *device; /* associated device */ }; +#define HID_MAX_IDS 256 + struct hid_report_enum { unsigned numbered; struct list_head report_list; - struct hid_report *report_id_hash[256]; + struct hid_report *report_id_hash[HID_MAX_IDS]; }; #define HID_REPORT_TYPES 3 From 0697d8057661db5032a55dd5b0d115262c883cb4 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Wed, 28 Aug 2013 22:31:52 +0200 Subject: [PATCH 0814/1368] HID: picolcd_core: validate output report details MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 1e87a2456b0227ca4ab881e19a11bb99d164e792 upstream. A HID device could send a malicious output report that would cause the picolcd HID driver to trigger a NULL dereference during attr file writing. [jkosina@suse.cz: changed report->maxfield < 1 to report->maxfield != 1 as suggested by Bruno]. CVE-2013-2899 Signed-off-by: Kees Cook Reviewed-by: Bruno Prémont Acked-by: Bruno Prémont Signed-off-by: Jiri Kosina Signed-off-by: Greg Kroah-Hartman --- drivers/hid/hid-picolcd_core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/hid/hid-picolcd_core.c b/drivers/hid/hid-picolcd_core.c index b48092d0e139..acbb021065ec 100644 --- a/drivers/hid/hid-picolcd_core.c +++ b/drivers/hid/hid-picolcd_core.c @@ -290,7 +290,7 @@ static ssize_t picolcd_operation_mode_store(struct device *dev, buf += 10; cnt -= 10; } - if (!report) + if (!report || report->maxfield != 1) return -EINVAL; while (cnt > 0 && (buf[cnt-1] == '\n' || buf[cnt-1] == '\r')) From 7c91362f8c00c6fb75da374c749a024d6d3563fb Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Wed, 28 Aug 2013 22:31:28 +0200 Subject: [PATCH 0815/1368] HID: ntrig: validate feature report details commit 875b4e3763dbc941f15143dd1a18d10bb0be303b upstream. A HID device could send a malicious feature report that would cause the ntrig HID driver to trigger a NULL dereference during initialization: [57383.031190] usb 3-1: New USB device found, idVendor=1b96, idProduct=0001 ... [57383.315193] BUG: unable to handle kernel NULL pointer dereference at 0000000000000030 [57383.315308] IP: [] ntrig_probe+0x25e/0x420 [hid_ntrig] CVE-2013-2896 Signed-off-by: Kees Cook Signed-off-by: Rafi Rubin Signed-off-by: Jiri Kosina Signed-off-by: Greg Kroah-Hartman --- drivers/hid/hid-ntrig.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/hid/hid-ntrig.c b/drivers/hid/hid-ntrig.c index ef95102515e4..5482156ab4de 100644 --- a/drivers/hid/hid-ntrig.c +++ b/drivers/hid/hid-ntrig.c @@ -115,7 +115,8 @@ static inline int ntrig_get_mode(struct hid_device *hdev) struct hid_report *report = hdev->report_enum[HID_FEATURE_REPORT]. report_id_hash[0x0d]; - if (!report) + if (!report || report->maxfield < 1 || + report->field[0]->report_count < 1) return -EINVAL; hid_hw_request(hdev, report, HID_REQ_GET_REPORT); From 69f2af2ddcfd0d9984e764bdf0953a8d9248755a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bruno=20Pr=C3=A9mont?= Date: Sat, 31 Aug 2013 14:07:48 +0200 Subject: [PATCH 0816/1368] HID: picolcd: Prevent NULL pointer dereference on _remove() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 1cde501bb4655e98fb832194beb88ac73be5a05d upstream. When picolcd is switched into bootloader mode (for FW flashing) make sure not to try to dereference NULL-pointers of feature-devices during unplug/unbind. This fixes following BUG: BUG: unable to handle kernel NULL pointer dereference at 00000298 IP: [] picolcd_exit_framebuffer+0x1b/0x80 [hid_picolcd] *pde = 00000000 Oops: 0000 [#1] Modules linked in: hid_picolcd syscopyarea sysfillrect sysimgblt fb_sys_fops CPU: 0 PID: 15 Comm: khubd Not tainted 3.11.0-rc7-00002-g50d62d4 #2 EIP: 0060:[] EFLAGS: 00010292 CPU: 0 EIP is at picolcd_exit_framebuffer+0x1b/0x80 [hid_picolcd] Call Trace: [] picolcd_remove+0xcb/0x120 [hid_picolcd] [] hid_device_remove+0x59/0xc0 [] __device_release_driver+0x5a/0xb0 [] device_release_driver+0x1f/0x30 [] bus_remove_device+0x9d/0xd0 [] device_del+0xd5/0x150 [] hid_destroy_device+0x24/0x60 [] usbhid_disconnect+0x1b/0x40 ... Signed-off-by: Bruno Prémont Signed-off-by: Jiri Kosina Signed-off-by: Greg Kroah-Hartman --- drivers/hid/hid-picolcd_cir.c | 3 ++- drivers/hid/hid-picolcd_fb.c | 6 +++++- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/hid/hid-picolcd_cir.c b/drivers/hid/hid-picolcd_cir.c index e346038f0f11..59d5eb1e742c 100644 --- a/drivers/hid/hid-picolcd_cir.c +++ b/drivers/hid/hid-picolcd_cir.c @@ -145,6 +145,7 @@ void picolcd_exit_cir(struct picolcd_data *data) struct rc_dev *rdev = data->rc_dev; data->rc_dev = NULL; - rc_unregister_device(rdev); + if (rdev) + rc_unregister_device(rdev); } diff --git a/drivers/hid/hid-picolcd_fb.c b/drivers/hid/hid-picolcd_fb.c index 591f6b22aa94..c930ab8554ea 100644 --- a/drivers/hid/hid-picolcd_fb.c +++ b/drivers/hid/hid-picolcd_fb.c @@ -593,10 +593,14 @@ int picolcd_init_framebuffer(struct picolcd_data *data) void picolcd_exit_framebuffer(struct picolcd_data *data) { struct fb_info *info = data->fb_info; - struct picolcd_fb_data *fbdata = info->par; + struct picolcd_fb_data *fbdata; unsigned long flags; + if (!info) + return; + device_remove_file(&data->hdev->dev, &dev_attr_fb_update_rate); + fbdata = info->par; /* disconnect framebuffer from HID dev */ spin_lock_irqsave(&fbdata->lock, flags); From 65a839f4b7fd73d2a84c2d404c94a162751e8edb Mon Sep 17 00:00:00 2001 From: Jiri Kosina Date: Mon, 2 Sep 2013 13:43:00 +0200 Subject: [PATCH 0817/1368] HID: battery: don't do DMA from stack commit 6c2794a2984f4c17a58117a68703cc7640f01c5a upstream. Instead of using data from stack for DMA in hidinput_get_battery_property(), allocate the buffer dynamically. Reported-by: Richard Ryniker Reported-by: Alan Stern Signed-off-by: Jiri Kosina Signed-off-by: Greg Kroah-Hartman --- drivers/hid/hid-input.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/drivers/hid/hid-input.c b/drivers/hid/hid-input.c index 945b8158ec4c..ac5e93528dfa 100644 --- a/drivers/hid/hid-input.c +++ b/drivers/hid/hid-input.c @@ -340,7 +340,7 @@ static int hidinput_get_battery_property(struct power_supply *psy, { struct hid_device *dev = container_of(psy, struct hid_device, battery); int ret = 0; - __u8 buf[2] = {}; + __u8 *buf; switch (prop) { case POWER_SUPPLY_PROP_PRESENT: @@ -349,13 +349,20 @@ static int hidinput_get_battery_property(struct power_supply *psy, break; case POWER_SUPPLY_PROP_CAPACITY: + + buf = kmalloc(2 * sizeof(__u8), GFP_KERNEL); + if (!buf) { + ret = -ENOMEM; + break; + } ret = dev->hid_get_raw_report(dev, dev->battery_report_id, - buf, sizeof(buf), + buf, 2, dev->battery_report_type); if (ret != 2) { if (ret >= 0) ret = -EINVAL; + kfree(buf); break; } @@ -364,6 +371,7 @@ static int hidinput_get_battery_property(struct power_supply *psy, buf[1] <= dev->battery_max) val->intval = (100 * (buf[1] - dev->battery_min)) / (dev->battery_max - dev->battery_min); + kfree(buf); break; case POWER_SUPPLY_PROP_MODEL_NAME: From d0de24dd0e200c83382cd3fff8e8c06cb6dcef2b Mon Sep 17 00:00:00 2001 From: Manoj Chourasia Date: Mon, 22 Jul 2013 15:33:13 +0530 Subject: [PATCH 0818/1368] HID: hidraw: correctly deallocate memory on device disconnect commit 212a871a3934beccf43431608c27ed2e05a476ec upstream. This changes puts the commit 4fe9f8e203f back in place with the fixes for slab corruption because of the commit. When a device is unplugged, wait for all processes that have opened the device to close before deallocating the device. This commit was solving kernel crash because of the corruption in rb tree of vmalloc. The rootcause was the device data pointer was geting excessed after the memory associated with hidraw was freed. The commit 4fe9f8e203f was buggy as it was also freeing the hidraw first and then calling delete operation on the list associated with that hidraw leading to slab corruption. Signed-off-by: Manoj Chourasia Tested-by: Peter Wu Signed-off-by: Jiri Kosina Signed-off-by: Greg Kroah-Hartman --- drivers/hid/hidraw.c | 60 ++++++++++++++++++-------------------------- 1 file changed, 25 insertions(+), 35 deletions(-) diff --git a/drivers/hid/hidraw.c b/drivers/hid/hidraw.c index a7451632ceb4..612a655bc9f0 100644 --- a/drivers/hid/hidraw.c +++ b/drivers/hid/hidraw.c @@ -113,7 +113,7 @@ static ssize_t hidraw_send_report(struct file *file, const char __user *buffer, __u8 *buf; int ret = 0; - if (!hidraw_table[minor]) { + if (!hidraw_table[minor] || !hidraw_table[minor]->exist) { ret = -ENODEV; goto out; } @@ -261,7 +261,7 @@ static int hidraw_open(struct inode *inode, struct file *file) } mutex_lock(&minors_lock); - if (!hidraw_table[minor]) { + if (!hidraw_table[minor] || !hidraw_table[minor]->exist) { err = -ENODEV; goto out_unlock; } @@ -302,39 +302,38 @@ static int hidraw_fasync(int fd, struct file *file, int on) return fasync_helper(fd, file, on, &list->fasync); } +static void drop_ref(struct hidraw *hidraw, int exists_bit) +{ + if (exists_bit) { + hid_hw_close(hidraw->hid); + hidraw->exist = 0; + if (hidraw->open) + wake_up_interruptible(&hidraw->wait); + } else { + --hidraw->open; + } + + if (!hidraw->open && !hidraw->exist) { + device_destroy(hidraw_class, MKDEV(hidraw_major, hidraw->minor)); + hidraw_table[hidraw->minor] = NULL; + kfree(hidraw); + } +} + static int hidraw_release(struct inode * inode, struct file * file) { unsigned int minor = iminor(inode); - struct hidraw *dev; struct hidraw_list *list = file->private_data; - int ret; - int i; mutex_lock(&minors_lock); - if (!hidraw_table[minor]) { - ret = -ENODEV; - goto unlock; - } list_del(&list->node); - dev = hidraw_table[minor]; - if (!--dev->open) { - if (list->hidraw->exist) { - hid_hw_power(dev->hid, PM_HINT_NORMAL); - hid_hw_close(dev->hid); - } else { - kfree(list->hidraw); - } - } - - for (i = 0; i < HIDRAW_BUFFER_SIZE; ++i) - kfree(list->buffer[i].value); kfree(list); - ret = 0; -unlock: - mutex_unlock(&minors_lock); - return ret; + drop_ref(hidraw_table[minor], 0); + + mutex_unlock(&minors_lock); + return 0; } static long hidraw_ioctl(struct file *file, unsigned int cmd, @@ -539,18 +538,9 @@ void hidraw_disconnect(struct hid_device *hid) struct hidraw *hidraw = hid->hidraw; mutex_lock(&minors_lock); - hidraw->exist = 0; - device_destroy(hidraw_class, MKDEV(hidraw_major, hidraw->minor)); + drop_ref(hidraw, 1); - hidraw_table[hidraw->minor] = NULL; - - if (hidraw->open) { - hid_hw_close(hid); - wake_up_interruptible(&hidraw->wait); - } else { - kfree(hidraw); - } mutex_unlock(&minors_lock); } EXPORT_SYMBOL_GPL(hidraw_disconnect); From 8bb7aacefb408f829835f3692dd34fab9801d86d Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Wed, 28 Aug 2013 22:32:01 +0200 Subject: [PATCH 0819/1368] HID: check for NULL field when setting values commit be67b68d52fa28b9b721c47bb42068f0c1214855 upstream. Defensively check that the field to be worked on is not NULL. Signed-off-by: Kees Cook Signed-off-by: Jiri Kosina Signed-off-by: Greg Kroah-Hartman --- drivers/hid/hid-core.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/hid/hid-core.c b/drivers/hid/hid-core.c index eb202481fb09..ed626e01514b 100644 --- a/drivers/hid/hid-core.c +++ b/drivers/hid/hid-core.c @@ -1156,7 +1156,12 @@ EXPORT_SYMBOL_GPL(hid_output_report); int hid_set_field(struct hid_field *field, unsigned offset, __s32 value) { - unsigned size = field->report_size; + unsigned size; + + if (!field) + return -1; + + size = field->report_size; hid_dump_input(field->report->device, field->usage + offset, value); From 5f24e1842d5d65d479818b719adb1286d9925840 Mon Sep 17 00:00:00 2001 From: Vasily Titskiy Date: Fri, 30 Aug 2013 18:25:04 -0400 Subject: [PATCH 0820/1368] HID: usbhid: quirk for N-Trig DuoSense Touch Screen commit 9e0bf92c223dabe0789714f8f85f6e26f8f9cda4 upstream. The DuoSense touchscreen device causes a 10 second timeout. This fix removes the delay. Signed-off-by: Vasily Titskiy Signed-off-by: Jiri Kosina Signed-off-by: Greg Kroah-Hartman --- drivers/hid/hid-ids.h | 1 + drivers/hid/usbhid/hid-quirks.c | 2 ++ 2 files changed, 3 insertions(+) diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h index 2a6d15d00e64..ca0219f98c65 100644 --- a/drivers/hid/hid-ids.h +++ b/drivers/hid/hid-ids.h @@ -646,6 +646,7 @@ #define USB_DEVICE_ID_NTRIG_TOUCH_SCREEN_16 0x0012 #define USB_DEVICE_ID_NTRIG_TOUCH_SCREEN_17 0x0013 #define USB_DEVICE_ID_NTRIG_TOUCH_SCREEN_18 0x0014 +#define USB_DEVICE_ID_NTRIG_DUOSENSE 0x1500 #define USB_VENDOR_ID_ONTRAK 0x0a07 #define USB_DEVICE_ID_ONTRAK_ADU100 0x0064 diff --git a/drivers/hid/usbhid/hid-quirks.c b/drivers/hid/usbhid/hid-quirks.c index 19b8360f2330..07345521f421 100644 --- a/drivers/hid/usbhid/hid-quirks.c +++ b/drivers/hid/usbhid/hid-quirks.c @@ -109,6 +109,8 @@ static const struct hid_blacklist { { USB_VENDOR_ID_SIGMA_MICRO, USB_DEVICE_ID_SIGMA_MICRO_KEYBOARD, HID_QUIRK_NO_INIT_REPORTS }, { USB_VENDOR_ID_KYE, USB_DEVICE_ID_KYE_MOUSEPEN_I608X, HID_QUIRK_MULTI_INPUT }, { USB_VENDOR_ID_KYE, USB_DEVICE_ID_KYE_EASYPEN_M610X, HID_QUIRK_MULTI_INPUT }, + { USB_VENDOR_ID_NTRIG, USB_DEVICE_ID_NTRIG_DUOSENSE, HID_QUIRK_NO_INIT_REPORTS }, + { 0, 0 } }; From 1d9d780fcd8df7c70312fdfe43ecc802b99b2e5c Mon Sep 17 00:00:00 2001 From: Arun Kumar K Date: Fri, 26 Jul 2013 07:28:01 -0300 Subject: [PATCH 0821/1368] media: exynos-gsc: Register v4l2 device commit d0b1c31349969973204fad21a076aecf131cc5e4 upstream. Gscaler video device registration was happening without reference to a parent v4l2_dev causing probe to fail. The patch creates a parent v4l2 device and uses it for the gsc m2m video device registration. This fixes regression introduced with comit commit 1c1d86a1ea07506 [media] v4l2: always require v4l2_dev, rename parent to dev_parent Signed-off-by: Arun Kumar K Signed-off-by: Sylwester Nawrocki Signed-off-by: Mauro Carvalho Chehab Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/media/platform/exynos-gsc/gsc-core.c | 9 ++++++++- drivers/media/platform/exynos-gsc/gsc-core.h | 1 + drivers/media/platform/exynos-gsc/gsc-m2m.c | 1 + 3 files changed, 10 insertions(+), 1 deletion(-) diff --git a/drivers/media/platform/exynos-gsc/gsc-core.c b/drivers/media/platform/exynos-gsc/gsc-core.c index 33b5ffc8d66d..f45b940d6072 100644 --- a/drivers/media/platform/exynos-gsc/gsc-core.c +++ b/drivers/media/platform/exynos-gsc/gsc-core.c @@ -1122,10 +1122,14 @@ static int gsc_probe(struct platform_device *pdev) goto err_clk; } - ret = gsc_register_m2m_device(gsc); + ret = v4l2_device_register(dev, &gsc->v4l2_dev); if (ret) goto err_clk; + ret = gsc_register_m2m_device(gsc); + if (ret) + goto err_v4l2; + platform_set_drvdata(pdev, gsc); pm_runtime_enable(dev); ret = pm_runtime_get_sync(&pdev->dev); @@ -1147,6 +1151,8 @@ static int gsc_probe(struct platform_device *pdev) pm_runtime_put(dev); err_m2m: gsc_unregister_m2m_device(gsc); +err_v4l2: + v4l2_device_unregister(&gsc->v4l2_dev); err_clk: gsc_clk_put(gsc); return ret; @@ -1157,6 +1163,7 @@ static int gsc_remove(struct platform_device *pdev) struct gsc_dev *gsc = platform_get_drvdata(pdev); gsc_unregister_m2m_device(gsc); + v4l2_device_unregister(&gsc->v4l2_dev); vb2_dma_contig_cleanup_ctx(gsc->alloc_ctx); pm_runtime_disable(&pdev->dev); diff --git a/drivers/media/platform/exynos-gsc/gsc-core.h b/drivers/media/platform/exynos-gsc/gsc-core.h index cc19bba09bd1..76435d3bf62d 100644 --- a/drivers/media/platform/exynos-gsc/gsc-core.h +++ b/drivers/media/platform/exynos-gsc/gsc-core.h @@ -343,6 +343,7 @@ struct gsc_dev { unsigned long state; struct vb2_alloc_ctx *alloc_ctx; struct video_device vdev; + struct v4l2_device v4l2_dev; }; /** diff --git a/drivers/media/platform/exynos-gsc/gsc-m2m.c b/drivers/media/platform/exynos-gsc/gsc-m2m.c index 40a73f7d20da..e576ff2de3de 100644 --- a/drivers/media/platform/exynos-gsc/gsc-m2m.c +++ b/drivers/media/platform/exynos-gsc/gsc-m2m.c @@ -751,6 +751,7 @@ int gsc_register_m2m_device(struct gsc_dev *gsc) gsc->vdev.release = video_device_release_empty; gsc->vdev.lock = &gsc->lock; gsc->vdev.vfl_dir = VFL_DIR_M2M; + gsc->vdev.v4l2_dev = &gsc->v4l2_dev; snprintf(gsc->vdev.name, sizeof(gsc->vdev.name), "%s.%d:m2m", GSC_MODULE_NAME, gsc->id); From cb6ecb39fe845251412e34e1d10bb6f4a3652f2f Mon Sep 17 00:00:00 2001 From: Sylwester Nawrocki Date: Mon, 29 Jul 2013 06:53:59 -0300 Subject: [PATCH 0822/1368] media: exynos4-is: Fix entity unregistration on error path commit d2b903b4427e417a73863cef36ad0796ea6b7404 upstream. This patch corrects media entities unregistration order to make sure the fimc.N.capture and fimc-lite video nodes are unregistered with fimc->lock mutex held. This prevents races between video device open() and defered probing and NULL pointer dereference in open() callback as follows: [ 77.645000] Unable to handle kernel NULL pointer dereference at virtual address 00000290t [ 77.655000] pgd = ee7a8000 [ 77.660000] [00000290] *pgd=6e13c831, *pte=00000000, *ppte=00000000 [ 77.665000] Internal error: Oops: 17 [#1] PREEMPT SMP ARM [ 77.670000] Modules linked in: s5p_fimc ipv6 exynos_fimc_is exynos_fimc_lite s5p_csis v4l2_mem2mem videobuf2_dma_contig videobuf2_memops exynos4_is_common videobuf2_core [last unloaded: s5p_fimc] [ 77.685000] CPU: 0 PID : 2998 Comm: v4l_id Tainted: G W 3.10.0-next-20130709-00039-g39f491b-dirty #1548 [ 77.695000] task: ee084000 ti: ee46e000 task.ti: ee46e000 [ 77.700000] PC is at __mutex_lock_slowpath+0x54/0x368 [ 77.705000] LR is at __mutex_lock_slowpath+0x24/0x368 [ 77.710000] pc : [] lr : [] psr: 60000093 [ 77.710000] sp : ee46fd70 ip : 000008c8 fp : c054e34c [ 77.725000] r10: ee084000 r9 : 00000000 r8 : ee439480 [ 77.730000] r7 : ee46e000 r6 : 60000013 r5 : 00000290 r4 : 0000028c [ 77.735000] r3 : 00000000 r2 : 00000000 r1 : 20000093 r0 : 00000001 [ 77.740000] Flags: nZCv IRQs off FIQs on Mode SVC_32 ISA ARM Segment user [ 77.750000] Control: 10c5387d Table: 6e7a804a DAC: 00000015 [ 77.755000] Process v4l_id (pid: 2998, stack limit = 0xee46e238) [ 77.760000] Stack: (0xee46fd70 to 0xee470000) ... [ 77.935000] [] (__mutex_lock_slowpath+0x54/0x368) from [] (mutex_lock+0xc/0x24) [ 77.945000] [] (mutex_lock+0xc/0x24) from [] (fimc_lite_open+0x12c/0x2bc [exynos_fimc_lite]) [ 77.955000] [] (fimc_lite_open+0x12c/0x2bc [exynos_fimc_lite]) from [] (v4l2_open+0xa0/0xe0) [ 77.965000] [] (v4l2_open+0xa0/0xe0) from [] (chrdev_open+0x88/0x170) [ 77.975000] [] (chrdev_open+0x88/0x170) from [] (do_dentry_open.isra.14+0x1d8/0x258) [ 77.985000] [] (do_dentry_open.isra.14+0x1d8/0x258) from [] (finish_open+0x20/0x38) [ 77.995000] [] (finish_open+0x20/0x38) from [] (do_last.isra.43+0x538/0xb1c) [ 78.000000] [] (do_last.isra.43+0x538/0xb1c) from [] (path_openat+0xb4/0x5c4) [ 78.010000] [] (path_openat+0xb4/0x5c4) from [] (do_filp_open+0x2c/0x80) [ 78.020000] [] (do_filp_open+0x2c/0x80) from [] (do_sys_open+0xf4/0x1a8) [ 78.025000] [] (do_sys_open+0xf4/0x1a8) from [] (ret_fast_syscall+0x0/0x30) [ 78.035000] Code: 1a000093 e10f6000 f10c0080 e2845004 (e1953f9f) Reported-by: Andrzej Hajda Signed-off-by: Sylwester Nawrocki Signed-off-by: Kyungmin Park Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Greg Kroah-Hartman --- drivers/media/platform/exynos4-is/media-dev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/media/platform/exynos4-is/media-dev.c b/drivers/media/platform/exynos4-is/media-dev.c index 15ef8f28239b..b5b480befcc3 100644 --- a/drivers/media/platform/exynos4-is/media-dev.c +++ b/drivers/media/platform/exynos4-is/media-dev.c @@ -1441,9 +1441,9 @@ static int fimc_md_probe(struct platform_device *pdev) err_unlock: mutex_unlock(&fmd->media_dev.graph_mutex); err_clk: - media_device_unregister(&fmd->media_dev); fimc_md_put_clocks(fmd); fimc_md_unregister_entities(fmd); + media_device_unregister(&fmd->media_dev); err_md: v4l2_device_unregister(&fmd->v4l2_dev); return ret; From 469641b2b568a9295396ff723573bdeaf2c4293e Mon Sep 17 00:00:00 2001 From: Sachin Kamat Date: Mon, 15 Jul 2013 02:36:23 -0300 Subject: [PATCH 0823/1368] media: s5p-g2d: Fix registration failure commit 8a09a4cc9bd9389dc6a3b5b2dd3a7d64d2fab7e1 upstream. Commit 1c1d86a1ea ("[media] v4l2: always require v4l2_dev, rename parent to dev_parent") expects v4l2_dev to be always set. It converted most of the drivers using the parent field of video_device to v4l2_dev field. G2D driver did not set the parent field. Hence it got left out. Without this patch we get the following boot warning and G2D driver fails to register the video device. WARNING: CPU: 0 PID: 1 at drivers/media/v4l2-core/v4l2-dev.c:775 __video_register_device+0xfc0/0x1028() Modules linked in: CPU: 0 PID: 1 Comm: swapper/0 Not tainted 3.11.0-rc1-00001-g1c3e372-dirty #9 [] (unwind_backtrace+0x0/0xf4) from [] (show_stack+0x10/0x14) [] (show_stack+0x10/0x14) from [] (dump_stack+0x7c/0xb0) [] (dump_stack+0x7c/0xb0) from [] (warn_slowpath_common+0x6c/0x88) [] (warn_slowpath_common+0x6c/0x88) from [] (warn_slowpath_null+0x1c/0x24) [] (warn_slowpath_null+0x1c/0x24) from [] (__video_register_device+0xfc0/0x1028) [] (__video_register_device+0xfc0/0x1028) from [] (g2d_probe+0x1f8/0x398) [] (g2d_probe+0x1f8/0x398) from [] (platform_drv_probe+0x14/0x18) [] (platform_drv_probe+0x14/0x18) from [] (driver_probe_device+0x108/0x220) [] (driver_probe_device+0x108/0x220) from [] (__driver_attach+0x8c/0x90) [] (__driver_attach+0x8c/0x90) from [] (bus_for_each_dev+0x60/0x94) [] (bus_for_each_dev+0x60/0x94) from [] (bus_add_driver+0x1c0/0x24c) [] (bus_add_driver+0x1c0/0x24c) from [] (driver_register+0x78/0x140) [] (driver_register+0x78/0x140) from [] (do_one_initcall+0xf8/0x144) [] (do_one_initcall+0xf8/0x144) from [] (kernel_init_freeable+0x13c/0x1d8) [] (kernel_init_freeable+0x13c/0x1d8) from [] (kernel_init+0xc/0x160) [] (kernel_init+0xc/0x160) from [] (ret_from_fork+0x14/0x3c) ---[ end trace 4e0ec028b0028e02 ]--- s5p-g2d 12800000.g2d: Failed to register video device s5p-g2d: probe of 12800000.g2d failed with error -22 Signed-off-by: Sachin Kamat Cc: Hans Verkuil Signed-off-by: Kamil Debski Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Greg Kroah-Hartman --- drivers/media/platform/s5p-g2d/g2d.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/media/platform/s5p-g2d/g2d.c b/drivers/media/platform/s5p-g2d/g2d.c index 553d87e5ceab..fd6289d60cde 100644 --- a/drivers/media/platform/s5p-g2d/g2d.c +++ b/drivers/media/platform/s5p-g2d/g2d.c @@ -784,6 +784,7 @@ static int g2d_probe(struct platform_device *pdev) } *vfd = g2d_videodev; vfd->lock = &dev->mutex; + vfd->v4l2_dev = &dev->v4l2_dev; ret = video_register_device(vfd, VFL_TYPE_GRABBER, 0); if (ret) { v4l2_err(&dev->v4l2_dev, "Failed to register video device\n"); From bd7dcb5af0f8771fbcf2fd999545281ec7561d3e Mon Sep 17 00:00:00 2001 From: Andrzej Hajda Date: Fri, 28 Jun 2013 05:34:20 -0300 Subject: [PATCH 0824/1368] media: DocBook: upgrade media_api DocBook version to 4.2 commit 8bfd4a68ecc003c1a142f35551be846d6b13e822 upstream. Fixes the last three errors of media_api DocBook validatation: (...) media_api.xml:414: element imagedata: validity error : Value "SVG" for attribute format of imagedata is not among the enumerated set media_api.xml:432: element imagedata: validity error : Value "SVG" for attribute format of imagedata is not among the enumerated set media_api.xml:452: element imagedata: validity error : Value "SVG" for attribute format of imagedata is not among the enumerated set (...) Signed-off-by: Andrzej Hajda Signed-off-by: Kyungmin Park Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Greg Kroah-Hartman --- Documentation/DocBook/media_api.tmpl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Documentation/DocBook/media_api.tmpl b/Documentation/DocBook/media_api.tmpl index 6a8b7158697f..9c92bb879b6d 100644 --- a/Documentation/DocBook/media_api.tmpl +++ b/Documentation/DocBook/media_api.tmpl @@ -1,6 +1,6 @@ - %media-entities; From ed6db5dcac3698346227a49afb2635dfcb784057 Mon Sep 17 00:00:00 2001 From: Alexey Khoroshilov Date: Wed, 3 Jul 2013 16:17:34 -0300 Subject: [PATCH 0825/1368] media: hdpvr: fix iteration over uninitialized lists in hdpvr_probe() commit 2e923a0527ac439e135b9961e58d3acd876bba10 upstream. free_buff_list and rec_buff_list are initialized in the middle of hdpvr_probe(), but if something bad happens before that, error handling code calls hdpvr_delete(), which contains iteration over the lists (via hdpvr_free_buffers()). The patch moves the lists initialization to the beginning and by the way fixes goto label in error handling of registering videodev. Found by Linux Driver Verification project (linuxtesting.org). Signed-off-by: Alexey Khoroshilov Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Greg Kroah-Hartman --- drivers/media/usb/hdpvr/hdpvr-core.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/drivers/media/usb/hdpvr/hdpvr-core.c b/drivers/media/usb/hdpvr/hdpvr-core.c index 8247c19d6260..77d7b7fbdc7e 100644 --- a/drivers/media/usb/hdpvr/hdpvr-core.c +++ b/drivers/media/usb/hdpvr/hdpvr-core.c @@ -311,6 +311,11 @@ static int hdpvr_probe(struct usb_interface *interface, dev->workqueue = 0; + /* init video transfer queues first of all */ + /* to prevent oops in hdpvr_delete() on error paths */ + INIT_LIST_HEAD(&dev->free_buff_list); + INIT_LIST_HEAD(&dev->rec_buff_list); + /* register v4l2_device early so it can be used for printks */ if (v4l2_device_register(&interface->dev, &dev->v4l2_dev)) { dev_err(&interface->dev, "v4l2_device_register failed\n"); @@ -333,10 +338,6 @@ static int hdpvr_probe(struct usb_interface *interface, if (!dev->workqueue) goto error; - /* init video transfer queues */ - INIT_LIST_HEAD(&dev->free_buff_list); - INIT_LIST_HEAD(&dev->rec_buff_list); - dev->options = hdpvr_default_options; if (default_video_input < HDPVR_VIDEO_INPUTS) @@ -413,7 +414,7 @@ static int hdpvr_probe(struct usb_interface *interface, video_nr[atomic_inc_return(&dev_nr)]); if (retval < 0) { v4l2_err(&dev->v4l2_dev, "registering videodev failed\n"); - goto error; + goto reg_fail; } /* let the user know what node this device is now attached to */ From cd08ebc07c7cb27785b94a09e4b441e021ce302f Mon Sep 17 00:00:00 2001 From: Andrzej Hajda Date: Fri, 28 Jun 2013 05:44:22 -0300 Subject: [PATCH 0826/1368] media: v4l2: added missing mutex.h include to v4l2-ctrls.h MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit a19dec6ea94c036af68c31930c1c92681f55af41 upstream. This patch fixes following error: include/media/v4l2-ctrls.h:193:15: error: field ‘_lock’ has incomplete type include/media/v4l2-ctrls.h: In function ‘v4l2_ctrl_lock’: include/media/v4l2-ctrls.h:570:2: error: implicit declaration of function ‘mutex_lock’ [-Werror=implicit-function-declaration] include/media/v4l2-ctrls.h: In function ‘v4l2_ctrl_unlock’: include/media/v4l2-ctrls.h:579:2: error: implicit declaration of function ‘mutex_unlock’ [-Werror=implicit-function-declaration] Signed-off-by: Andrzej Hajda Signed-off-by: Kyungmin Park Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Greg Kroah-Hartman --- include/media/v4l2-ctrls.h | 1 + 1 file changed, 1 insertion(+) diff --git a/include/media/v4l2-ctrls.h b/include/media/v4l2-ctrls.h index 7343a27fe819..47ada23345a1 100644 --- a/include/media/v4l2-ctrls.h +++ b/include/media/v4l2-ctrls.h @@ -22,6 +22,7 @@ #define _V4L2_CTRLS_H #include +#include #include /* forward references */ From 4ff5ef250fb348707bad956e6eef7f84f1021675 Mon Sep 17 00:00:00 2001 From: Alexander Shiyan Date: Sat, 15 Jun 2013 08:09:57 -0300 Subject: [PATCH 0827/1368] media: media: coda: Fix DT driver data pointer for i.MX27 commit 7b0dd9e60e714951b5400dd0740b3c4c3c3cb76f upstream. The data pointer should point to DT data, and not to the ID array. Signed-off-by: Alexander Shiyan Signed-off-by: Kamil Debski Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Greg Kroah-Hartman --- drivers/media/platform/coda.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/media/platform/coda.c b/drivers/media/platform/coda.c index 9d1481a60bd9..c504f70d4e90 100644 --- a/drivers/media/platform/coda.c +++ b/drivers/media/platform/coda.c @@ -1933,7 +1933,7 @@ MODULE_DEVICE_TABLE(platform, coda_platform_ids); #ifdef CONFIG_OF static const struct of_device_id coda_dt_ids[] = { - { .compatible = "fsl,imx27-vpu", .data = &coda_platform_ids[CODA_IMX27] }, + { .compatible = "fsl,imx27-vpu", .data = &coda_devdata[CODA_IMX27] }, { .compatible = "fsl,imx53-vpu", .data = &coda_devdata[CODA_IMX53] }, { /* sentinel */ } }; From b3fcd91b96de3082d24636478a145bb50947c9eb Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Fri, 9 Aug 2013 08:53:26 -0300 Subject: [PATCH 0828/1368] media: mb86a20s: Fix TS parallel mode commit 9d32069faacdc81fe1dcb5d297c32a3ac81da8f0 upstream. changeset 768e6dadd74 caused a regression on using mb86a20s in parallel mode, as the parallel mode selection got overriden by mb86a20s_init2. Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Greg Kroah-Hartman --- drivers/media/dvb-frontends/mb86a20s.c | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/drivers/media/dvb-frontends/mb86a20s.c b/drivers/media/dvb-frontends/mb86a20s.c index 856374bd3676..2c7217fb1415 100644 --- a/drivers/media/dvb-frontends/mb86a20s.c +++ b/drivers/media/dvb-frontends/mb86a20s.c @@ -157,7 +157,6 @@ static struct regdata mb86a20s_init2[] = { { 0x45, 0x04 }, /* CN symbol 4 */ { 0x48, 0x04 }, /* CN manual mode */ - { 0x50, 0xd5 }, { 0x51, 0x01 }, /* Serial */ { 0x50, 0xd6 }, { 0x51, 0x1f }, { 0x50, 0xd2 }, { 0x51, 0x03 }, { 0x50, 0xd7 }, { 0x51, 0xbf }, @@ -1860,16 +1859,15 @@ static int mb86a20s_initfe(struct dvb_frontend *fe) dev_dbg(&state->i2c->dev, "%s: IF=%d, IF reg=0x%06llx\n", __func__, state->if_freq, (long long)pll); - if (!state->config->is_serial) { + if (!state->config->is_serial) regD5 &= ~1; - rc = mb86a20s_writereg(state, 0x50, 0xd5); - if (rc < 0) - goto err; - rc = mb86a20s_writereg(state, 0x51, regD5); - if (rc < 0) - goto err; - } + rc = mb86a20s_writereg(state, 0x50, 0xd5); + if (rc < 0) + goto err; + rc = mb86a20s_writereg(state, 0x51, regD5); + if (rc < 0) + goto err; rc = mb86a20s_writeregdata(state, mb86a20s_init2); if (rc < 0) From d2310f7119ba394d3256c221886c048189dcfb91 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B8rn=20Mork?= Date: Wed, 14 Aug 2013 05:24:39 -0300 Subject: [PATCH 0829/1368] media: siano: fix divide error on 0 counters MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit ec532503209053bbee0c7dac410031e50835e01a upstream. GIT_AUTHOR_DATE=1376465691 I took a quick look at the code and wonder if the problem is caused by an initial zero statistics message? This is all just a wild guess, but if it is correct, then the attached untested patch might fix it... Bjørn >From d78a0599d5b5d4da384eae08bf7da316389dfbe5 Mon Sep 17 00:00:00 2001 ts_packets and ets_packets counters can be 0. Don't fall over if they are. Fixes: [ 846.851711] divide error: 0000 [#1] SMP [ 846.851806] Modules linked in: smsdvb dvb_core ir_lirc_codec lirc_dev ir_sanyo_decoder ir_mce_kbd_decoder ir_sony_decoder ir_jvc_decoder ir_rc6_decoder ir_rc5_decoder ir_nec_decoder rc_hauppauge smsusb smsmdtv rc_core pci_stub vboxpci(O) vboxnetadp(O) vboxnetflt(O) vboxdrv(O) parport_pc ppdev lp parport cpufreq_userspace cpufreq_powersave cpufreq_stats cpufreq_conservative rfcomm bnep binfmt_misc uinput nfsd auth_rpcgss oid_registry nfs_acl nfs lockd dns_resolver fscache sunrpc ext4 jbd2 fuse tp_smapi(O) thinkpad_ec(O) loop firewire_sbp2 dm_crypt snd_hda_codec_conexant snd_hda_intel snd_hda_codec snd_hwdep snd_pcm_oss snd_mixer_oss snd_pcm thinkpad_acpi nvram snd_page_alloc hid_generic snd_seq_midi snd_seq_midi_event arc4 usbhid snd_rawmidi uvcvideo hid iwldvm coretemp kvm_intel mac8021 1 cdc_wdm [ 846.853477] cdc_acm snd_seq videobuf2_vmalloc videobuf2_memops videobuf2_core videodev media kvm radeon r852 ttm joydev cdc_ether usbnet pcmcia mii sm_common nand btusb drm_kms_helper tpm_tis acpi_cpufreq bluetooth iwlwifi nand_ecc drm nand_ids i2c_i801 mtd snd_seq_device iTCO_wdt iTCO_vendor_support r592 memstick lpc_ich mperf tpm yenta_socket pcmcia_rsrc pcmcia_core cfg80211 snd_timer snd pcspkr i2c_algo_bit crc16 i2c_core tpm_bios processor mfd_core wmi psmouse mei_me rfkill mei serio_raw soundcore evdev battery button video ac microcode ext3 mbcache jbd md_mod dm_mirror dm_region_hash dm_log dm_mod sg sr_mod sd_mod cdrom crc_t10dif firewire_ohci sdhci_pci sdhci mmc_core firewire_core crc_itu_t thermal thermal_sys ahci libahci ehci_pci uhci_hcd ehci_hcd libata scsi_mod usbcore e1000 e usb_common [ 846.855310] ptp pps_core [ 846.855356] CPU: 0 PID: 0 Comm: swapper/0 Tainted: G O 3.10-2-amd64 #1 Debian 3.10.5-1 [ 846.855490] Hardware name: LENOVO 4061WFA/4061WFA, BIOS 6FET92WW (3.22 ) 12/14/2011 [ 846.855609] task: ffffffff81613400 ti: ffffffff81600000 task.ti: ffffffff81600000 [ 846.855636] RIP: 0010:[] [] smsdvb_onresponse+0x264/0xa86 [smsdvb] [ 846.863906] RSP: 0018:ffff88013bc03cf0 EFLAGS: 00010046 [ 846.863906] RAX: 0000000000000000 RBX: ffff880133bf6000 RCX: 0000000000000000 [ 846.863906] RDX: 0000000000000000 RSI: ffff88005d3b58c0 RDI: ffff880133bf6000 [ 846.863906] RBP: ffff88005d1da000 R08: 0000000000000058 R09: 0000000000000015 [ 846.863906] R10: 0000000000001a0d R11: 000000000000021a R12: ffff88005d3b58c0 [ 846.863906] R13: ffff88005d1da008 R14: 00000000ffffff8d R15: ffff880036cf5060 [ 846.863906] FS: 0000000000000000(0000) GS:ffff88013bc00000(0000) knlGS:0000000000000000 [ 846.863906] CS: 0010 DS: 0000 ES: 0000 CR0: 000000008005003b [ 846.863906] CR2: 00007f3a4b69ae50 CR3: 0000000036dac000 CR4: 00000000000407f0 [ 846.863906] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 846.863906] DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400 [ 846.863906] Stack: [ 846.863906] ffff88007a102000 ffff88005d1da000 ffff88005d3b58c0 0000000000085824 [ 846.863906] ffffffffa08c5aa3 ffff88005d1da000 ffff8800a6907390 ffff8800a69073b0 [ 846.863906] ffff8800a6907000 ffffffffa08b642c 000000000000021a ffff8800a69073b0 [ 846.863906] Call Trace: [ 846.863906] [ 846.863906] [ 846.863906] [] ? smscore_onresponse+0x1d5/0x353 [smsmdtv] [ 846.863906] [] ? smsusb_onresponse+0x146/0x192 [smsusb] [ 846.863906] [] ? usb_hcd_giveback_urb+0x6c/0xac [usbcore] [ 846.863906] [] ? ehci_urb_done+0x62/0x72 [ehci_hcd] [ 846.863906] [] ? qh_completions+0x91/0x364 [ehci_hcd] [ 846.863906] [] ? ehci_work+0x8a/0x68e [ehci_hcd] [ 846.863906] [] ? timekeeping_get_ns.constprop.10+0xd/0x31 [ 846.863906] [] ? update_cfs_rq_blocked_load+0xde/0xec [ 846.863906] [] ? run_posix_cpu_timers+0x25/0x575 [ 846.863906] [] ? ehci_irq+0x211/0x23d [ehci_hcd] [ 846.863906] [] ? usb_hcd_irq+0x31/0x48 [usbcore] [ 846.863906] [] ? handle_irq_event_percpu+0x49/0x1a4 [ 846.863906] [] ? handle_irq_event+0x32/0x4b [ 846.863906] [] ? handle_fasteoi_irq+0x80/0xb6 [ 846.863906] [] ? handle_irq+0x18/0x20 [ 846.863906] [] ? do_IRQ+0x40/0x95 [ 846.863906] [] ? common_interrupt+0x6d/0x6d [ 846.863906] [ 846.863906] [ 846.863906] [] ? arch_local_irq_enable+0x4/0x8 [ 846.863906] [] ? cpuidle_enter_state+0x52/0xc1 [ 846.863906] [] ? cpuidle_idle_call+0xd4/0x143 [ 846.863906] [] ? arch_cpu_idle+0x5/0x17 [ 846.863906] [] ? cpu_startup_entry+0x10d/0x187 [ 846.863906] [] ? start_kernel+0x3e8/0x3f3 [ 846.863906] [] ? repair_env_string+0x54/0x54 [ 846.863906] [] ? x86_64_start_kernel+0xf2/0xfd [ 846.863906] Code: 25 09 00 00 c6 83 da 08 00 00 03 8b 45 54 48 01 83 b6 08 00 00 8b 45 50 48 01 83 db 08 00 00 8b 4d 18 69 c1 ff ff 00 00 03 4d 14 <48> f7 f1 89 83 a8 09 00 00 e9 68 fe ff ff 48 8b 7f 10 e8 79 92 [ 846.863906] RIP [] smsdvb_onresponse+0x264/0xa86 [smsdvb] [ 846.863906] RSP Reference: http://bugs.debian.org/719623 Reported-by: Johannes Rohr Signed-off-by: Bjørn Mork Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Greg Kroah-Hartman --- drivers/media/common/siano/smsdvb-main.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/media/common/siano/smsdvb-main.c b/drivers/media/common/siano/smsdvb-main.c index 297f1b2f9a32..8df1aea5fbff 100644 --- a/drivers/media/common/siano/smsdvb-main.c +++ b/drivers/media/common/siano/smsdvb-main.c @@ -275,7 +275,8 @@ static void smsdvb_update_per_slices(struct smsdvb_client_t *client, /* Legacy PER/BER */ tmp = p->ets_packets * 65535; - do_div(tmp, p->ts_packets + p->ets_packets); + if (p->ts_packets + p->ets_packets) + do_div(tmp, p->ts_packets + p->ets_packets); client->legacy_per = tmp; } From 42cc8e56740efe9acc271f4780b81ac5ecef188c Mon Sep 17 00:00:00 2001 From: Stefan Behrens Date: Mon, 19 Aug 2013 18:51:13 +0200 Subject: [PATCH 0830/1368] Btrfs: don't allow the replace procedure on read only filesystems commit bbb651e469d99f0088e286fdeb54acca7bb4ad4e upstream. If you start the replace procedure on a read only filesystem, at the end the procedure fails to write the updated dev_items to the chunk tree. The problem is that this error is not indicated except for a WARN_ON(). If the user now thinks that everything was done as expected and destroys the source device (with mkfs or with a hammer). The next mount fails with "failed to read chunk root" and the filesystem is gone. This commit adds code to fail the attempt to start the replace procedure if the filesystem is mounted read-only. Signed-off-by: Stefan Behrens Signed-off-by: Josef Bacik Signed-off-by: Chris Mason Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/ioctl.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 0f81d67cdc8d..8dedf4019672 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -3299,6 +3299,9 @@ static long btrfs_ioctl_dev_replace(struct btrfs_root *root, void __user *arg) switch (p->cmd) { case BTRFS_IOCTL_DEV_REPLACE_CMD_START: + if (root->fs_info->sb->s_flags & MS_RDONLY) + return -EROFS; + if (atomic_xchg( &root->fs_info->mutually_exclusive_operation_running, 1)) { From 73e2c2b7c105e46344eb409575a4508f24a82eee Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Wed, 11 Sep 2013 17:47:26 +0200 Subject: [PATCH 0831/1368] uprobes: Fix utask->depth accounting in handle_trampoline() commit 878b5a6efd38030c7a90895dc8346e8fb1e09b4c upstream. Currently utask->depth is simply the number of allocated/pending return_instance's in uprobe_task->return_instances list. handle_trampoline() should decrement this counter every time we handle/free an instance, but due to typo it does this only if ->chained == T. This means that in the likely case this counter is never decremented and the probed task can't report more than MAX_URETPROBE_DEPTH events. Reported-by: Mikhail Kulemin Reported-by: Hemant Kumar Shaw Signed-off-by: Oleg Nesterov Acked-by: Anton Arapov Cc: masami.hiramatsu.pt@hitachi.com Cc: srikar@linux.vnet.ibm.com Cc: systemtap@sourceware.org Link: http://lkml.kernel.org/r/20130911154726.GA8093@redhat.com Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- kernel/events/uprobes.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c index f3569747d629..ad8e1bdca70e 100644 --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c @@ -1682,12 +1682,10 @@ static bool handle_trampoline(struct pt_regs *regs) tmp = ri; ri = ri->next; kfree(tmp); + utask->depth--; if (!chained) break; - - utask->depth--; - BUG_ON(!ri); } From d041e861d49f1f558e9e18f2075345f8a37427dc Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Thu, 29 Aug 2013 07:18:14 -0700 Subject: [PATCH 0832/1368] leds: wm831x-status: Request a REG resource commit 61abeba5222895d6900b13115f5d8eba7988d7d6 upstream. The wm831x-status driver was not converted to use a REG resource when they were introduced and the rest of the wm831x drivers converted, causing it to fail to probe due to requesting the wrong resource type. Signed-off-by: Mark Brown Signed-off-by: Bryan Wu Signed-off-by: Greg Kroah-Hartman --- drivers/leds/leds-wm831x-status.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/leds/leds-wm831x-status.c b/drivers/leds/leds-wm831x-status.c index 6bd5c679d877..b7d83d6838f3 100644 --- a/drivers/leds/leds-wm831x-status.c +++ b/drivers/leds/leds-wm831x-status.c @@ -230,9 +230,9 @@ static int wm831x_status_probe(struct platform_device *pdev) int id = pdev->id % ARRAY_SIZE(chip_pdata->status); int ret; - res = platform_get_resource(pdev, IORESOURCE_IO, 0); + res = platform_get_resource(pdev, IORESOURCE_REG, 0); if (res == NULL) { - dev_err(&pdev->dev, "No I/O resource\n"); + dev_err(&pdev->dev, "No register resource\n"); ret = -EINVAL; goto err; } From 2008c41d079db04dfc6d6a29bd8a7292ed1299a3 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Wed, 28 Aug 2013 10:41:42 +0200 Subject: [PATCH 0833/1368] MIPS: ath79: Fix ar933x watchdog clock commit a1191927ace7e6f827132aa9e062779eb3f11fa5 upstream. The watchdog device on the AR933x is connected to the AHB clock, however the current code uses the reference clock. Due to the wrong rate, the watchdog driver can't calculate correct register values for a given timeout value and the watchdog unexpectedly restarts the system. The code uses the wrong value since the initial commit 04225e1d227c8e68d685936ecf42ac175fec0e54 (MIPS: ath79: add AR933X specific clock init) The patch fixes the code to use the correct clock rate to avoid the problem. Signed-off-by: Felix Fietkau Signed-off-by: Gabor Juhos Cc: linux-mips@linux-mips.org Patchwork: https://patchwork.linux-mips.org/patch/5777/ Signed-off-by: Ralf Baechle Signed-off-by: Greg Kroah-Hartman --- arch/mips/ath79/clock.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/mips/ath79/clock.c b/arch/mips/ath79/clock.c index 765ef30e3e1c..733017b3dfe7 100644 --- a/arch/mips/ath79/clock.c +++ b/arch/mips/ath79/clock.c @@ -164,7 +164,7 @@ static void __init ar933x_clocks_init(void) ath79_ahb_clk.rate = freq / t; } - ath79_wdt_clk.rate = ath79_ref_clk.rate; + ath79_wdt_clk.rate = ath79_ahb_clk.rate; ath79_uart_clk.rate = ath79_ref_clk.rate; } From e041da063093c8f0a713b09560dc5bf6985e8a0a Mon Sep 17 00:00:00 2001 From: Gera Kazakov Date: Mon, 9 Sep 2013 15:47:06 -0700 Subject: [PATCH 0834/1368] target: Fix >= v3.9+ regression in PR APTPL + ALUA metadata write-out commit f730f9158f6ee7b5c4d892af6b51a72194445ea4 upstream. This patch fixes a >= v3.9+ regression in __core_scsi3_write_aptpl_to_file() + core_alua_write_tpg_metadata() write-out, where a return value of -EIO was incorrectly being returned upon success. This bug was originally introduced in: commit 0e9b10a90f1c30f25dd6f130130240745ab14010 Author: Al Viro Date: Sat Feb 23 15:22:43 2013 -0500 target: writev() on single-element vector is pointless However, given that the return of core_scsi3_update_and_write_aptpl() was not used to determine if a command should be returned with non GOOD status, this bug was not being triggered in PR logic until v3.11-rc1 by commit: commit 459f213ba162bd13e113d6f92a8fa6c780fd67ed Author: Andy Grover Date: Thu May 16 10:41:02 2013 -0700 target: Allocate aptpl_buf inside update_and_write_aptpl() So, go ahead and only return -EIO if kernel_write() returned a negative value. Reported-by: Gera Kazakov Signed-off-by: Gera Kazakov Cc: Al Viro Cc: Andy Grover Signed-off-by: Nicholas Bellinger Signed-off-by: Greg Kroah-Hartman --- drivers/target/target_core_alua.c | 2 +- drivers/target/target_core_pr.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/target/target_core_alua.c b/drivers/target/target_core_alua.c index cbe48ab41745..f608fbc14a27 100644 --- a/drivers/target/target_core_alua.c +++ b/drivers/target/target_core_alua.c @@ -730,7 +730,7 @@ static int core_alua_write_tpg_metadata( if (ret < 0) pr_err("Error writing ALUA metadata file: %s\n", path); fput(file); - return ret ? -EIO : 0; + return (ret < 0) ? -EIO : 0; } /* diff --git a/drivers/target/target_core_pr.c b/drivers/target/target_core_pr.c index 3240f2cc81ef..04a74938bb43 100644 --- a/drivers/target/target_core_pr.c +++ b/drivers/target/target_core_pr.c @@ -1987,7 +1987,7 @@ static int __core_scsi3_write_aptpl_to_file( pr_debug("Error writing APTPL metadata file: %s\n", path); fput(file); - return ret ? -EIO : 0; + return (ret < 0) ? -EIO : 0; } static int From 1b24e0e487698ae239764165495f45401d5930ce Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Sat, 15 Jun 2013 10:27:19 -0600 Subject: [PATCH 0835/1368] intel-iommu: Fix leaks in pagetable freeing commit 3269ee0bd6686baf86630300d528500ac5b516d7 upstream. At best the current code only seems to free the leaf pagetables and the root. If you're unlucky enough to have a large gap (like any QEMU guest with more than 3G of memory), only the first chunk of leaf pagetables are freed (plus the root). This is a massive memory leak. This patch re-writes the pagetable freeing function to use a recursive algorithm and manages to not only free all the pagetables, but does it without any apparent performance loss versus the current broken version. Signed-off-by: Alex Williamson Reviewed-by: Marcelo Tosatti Signed-off-by: Joerg Roedel Signed-off-by: Greg Kroah-Hartman --- drivers/iommu/intel-iommu.c | 72 ++++++++++++++++++------------------- 1 file changed, 35 insertions(+), 37 deletions(-) diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index b4f0e28dfa41..fa004b112a89 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -890,56 +890,54 @@ static int dma_pte_clear_range(struct dmar_domain *domain, return order; } +static void dma_pte_free_level(struct dmar_domain *domain, int level, + struct dma_pte *pte, unsigned long pfn, + unsigned long start_pfn, unsigned long last_pfn) +{ + pfn = max(start_pfn, pfn); + pte = &pte[pfn_level_offset(pfn, level)]; + + do { + unsigned long level_pfn; + struct dma_pte *level_pte; + + if (!dma_pte_present(pte) || dma_pte_superpage(pte)) + goto next; + + level_pfn = pfn & level_mask(level - 1); + level_pte = phys_to_virt(dma_pte_addr(pte)); + + if (level > 2) + dma_pte_free_level(domain, level - 1, level_pte, + level_pfn, start_pfn, last_pfn); + + /* If range covers entire pagetable, free it */ + if (!(start_pfn > level_pfn || + last_pfn < level_pfn + level_size(level))) { + dma_clear_pte(pte); + domain_flush_cache(domain, pte, sizeof(*pte)); + free_pgtable_page(level_pte); + } +next: + pfn += level_size(level); + } while (!first_pte_in_page(++pte) && pfn <= last_pfn); +} + /* free page table pages. last level pte should already be cleared */ static void dma_pte_free_pagetable(struct dmar_domain *domain, unsigned long start_pfn, unsigned long last_pfn) { int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT; - struct dma_pte *first_pte, *pte; - int total = agaw_to_level(domain->agaw); - int level; - unsigned long tmp; - int large_page = 2; BUG_ON(addr_width < BITS_PER_LONG && start_pfn >> addr_width); BUG_ON(addr_width < BITS_PER_LONG && last_pfn >> addr_width); BUG_ON(start_pfn > last_pfn); /* We don't need lock here; nobody else touches the iova range */ - level = 2; - while (level <= total) { - tmp = align_to_level(start_pfn, level); + dma_pte_free_level(domain, agaw_to_level(domain->agaw), + domain->pgd, 0, start_pfn, last_pfn); - /* If we can't even clear one PTE at this level, we're done */ - if (tmp + level_size(level) - 1 > last_pfn) - return; - - do { - large_page = level; - first_pte = pte = dma_pfn_level_pte(domain, tmp, level, &large_page); - if (large_page > level) - level = large_page + 1; - if (!pte) { - tmp = align_to_level(tmp + 1, level + 1); - continue; - } - do { - if (dma_pte_present(pte)) { - free_pgtable_page(phys_to_virt(dma_pte_addr(pte))); - dma_clear_pte(pte); - } - pte++; - tmp += level_size(level); - } while (!first_pte_in_page(pte) && - tmp + level_size(level) - 1 <= last_pfn); - - domain_flush_cache(domain, first_pte, - (void *)pte - (void *)first_pte); - - } while (tmp && tmp + level_size(level) - 1 <= last_pfn); - level++; - } /* free pgd */ if (start_pfn == 0 && last_pfn == DOMAIN_MAX_PFN(domain->gaw)) { free_pgtable_page(domain->pgd); From 5a48788ca4d6dc78d1856d9ddeea1b1160097cc9 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Thu, 29 Aug 2013 13:56:50 -0700 Subject: [PATCH 0836/1368] pidns: Fix hang in zap_pid_ns_processes by sending a potentially extra wakeup commit a606488513543312805fab2b93070cefe6a3016c upstream. Serge Hallyn writes: > Since commit af4b8a83add95ef40716401395b44a1b579965f4 it's been > possible to get into a situation where a pidns reaper is > , reparented to host pid 1, but never reaped. How to > reproduce this is documented at > > https://bugs.launchpad.net/ubuntu/+source/lxc/+bug/1168526 > (and see > https://bugs.launchpad.net/ubuntu/+source/lxc/+bug/1168526/comments/13) > In short, run repeated starts of a container whose init is > > Process.exit(0); > > sysrq-t when such a task is playing zombie shows: > > [ 131.132978] init x ffff88011fc14580 0 2084 2039 0x00000000 > [ 131.132978] ffff880116e89ea8 0000000000000002 ffff880116e89fd8 0000000000014580 > [ 131.132978] ffff880116e89fd8 0000000000014580 ffff8801172a0000 ffff8801172a0000 > [ 131.132978] ffff8801172a0630 ffff88011729fff0 ffff880116e14650 ffff88011729fff0 > [ 131.132978] Call Trace: > [ 131.132978] [] schedule+0x29/0x70 > [ 131.132978] [] do_exit+0x6e1/0xa40 > [ 131.132978] [] ? signal_wake_up_state+0x1e/0x30 > [ 131.132978] [] do_group_exit+0x3f/0xa0 > [ 131.132978] [] SyS_exit_group+0x14/0x20 > [ 131.132978] [] tracesys+0xe1/0xe6 > > Further debugging showed that every time this happened, zap_pid_ns_processes() > started with nr_hashed being 3, while we were expecting it to drop to 2. > Any time it didn't happen, nr_hashed was 1 or 2. So the reaper was > waiting for nr_hashed to become 2, but free_pid() only wakes the reaper > if nr_hashed hits 1. The issue is that when the task group leader of an init process exits before other tasks of the init process when the init process finally exits it will be a secondary task sleeping in zap_pid_ns_processes and waiting to wake up when the number of hashed pids drops to two. This case waits forever as free_pid only sends a wake up when the number of hashed pids drops to 1. To correct this the simple strategy of sending a possibly unncessary wake up when the number of hashed pids drops to 2 is adopted. Sending one extraneous wake up is relatively harmless, at worst we waste a little cpu time in the rare case when a pid namespace appropaches exiting. We can detect the case when the pid namespace drops to just two pids hashed race free in free_pid. Dereferencing pid_ns->child_reaper with the pidmap_lock held is safe without out the tasklist_lock because it is guaranteed that the detach_pid will be called on the child_reaper before it is freed and detach_pid calls __change_pid which calls free_pid which takes the pidmap_lock. __change_pid only calls free_pid if this is the last use of the pid. For a thread that is not the thread group leader the threads pid will only ever have one user because a threads pid is not allowed to be the pid of a process, of a process group or a session. For a thread that is a thread group leader all of the other threads of that process will be reaped before it is allowed for the thread group leader to be reaped ensuring there will only be one user of the threads pid as a process pid. Furthermore because the thread is the init process of a pid namespace all of the other processes in the pid namespace will have also been already freed leading to the fact that the pid will not be used as a session pid or a process group pid for any other running process. Acked-by: Serge Hallyn Tested-by: Serge Hallyn Reported-by: Serge Hallyn Signed-off-by: "Eric W. Biederman" Signed-off-by: Greg Kroah-Hartman --- kernel/pid.c | 1 + 1 file changed, 1 insertion(+) diff --git a/kernel/pid.c b/kernel/pid.c index 0db3e791a06d..0eb6d8e8b1da 100644 --- a/kernel/pid.c +++ b/kernel/pid.c @@ -264,6 +264,7 @@ void free_pid(struct pid *pid) struct pid_namespace *ns = upid->ns; hlist_del_rcu(&upid->pid_chain); switch(--ns->nr_hashed) { + case 2: case 1: /* When all that is left in the pid namespace * is the reaper wake up the reaper. The reaper From f608ebd760cb9b48a333700fdc10245e257d9fce Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Wed, 11 Sep 2013 14:19:38 -0700 Subject: [PATCH 0837/1368] pidns: fix vfork() after unshare(CLONE_NEWPID) commit e79f525e99b04390ca4d2366309545a836c03bf1 upstream. Commit 8382fcac1b81 ("pidns: Outlaw thread creation after unshare(CLONE_NEWPID)") nacks CLONE_VM if the forking process unshared pid_ns, this obviously breaks vfork: int main(void) { assert(unshare(CLONE_NEWUSER | CLONE_NEWPID) == 0); assert(vfork() >= 0); _exit(0); return 0; } fails without this patch. Change this check to use CLONE_SIGHAND instead. This also forbids CLONE_THREAD automatically, and this is what the comment implies. We could probably even drop CLONE_SIGHAND and use CLONE_THREAD, but it would be safer to not do this. The current check denies CLONE_SIGHAND implicitely and there is no reason to change this. Eric said "CLONE_SIGHAND is fine. CLONE_THREAD would be even better. Having shared signal handling between two different pid namespaces is the case that we are fundamentally guarding against." Signed-off-by: Oleg Nesterov Reported-by: Colin Walters Acked-by: Andy Lutomirski Reviewed-by: "Eric W. Biederman" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- kernel/fork.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/kernel/fork.c b/kernel/fork.c index ffbc0904794e..80d92e987f21 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1171,10 +1171,11 @@ static struct task_struct *copy_process(unsigned long clone_flags, return ERR_PTR(-EINVAL); /* - * If the new process will be in a different pid namespace - * don't allow the creation of threads. + * If the new process will be in a different pid namespace don't + * allow it to share a thread group or signal handlers with the + * forking task. */ - if ((clone_flags & (CLONE_VM|CLONE_NEWPID)) && + if ((clone_flags & (CLONE_SIGHAND | CLONE_NEWPID)) && (task_active_pid_ns(current) != current->nsproxy->pid_ns)) return ERR_PTR(-EINVAL); From 3c46f72697d06f219222314b4372562973975995 Mon Sep 17 00:00:00 2001 From: Jie Liu Date: Wed, 11 Sep 2013 14:20:05 -0700 Subject: [PATCH 0838/1368] ocfs2: fix the end cluster offset of FIEMAP commit 28e8be31803b19d0d8f76216cb11b480b8a98bec upstream. Call fiemap ioctl(2) with given start offset as well as an desired mapping range should show extents if possible. However, we somehow figure out the end offset of mapping via 'mapping_end -= cpos' before iterating the extent records which would cause problems if the given fiemap length is too small to a cluster size, e.g, Cluster size 4096: debugfs.ocfs2 1.6.3 Block Size Bits: 12 Cluster Size Bits: 12 The extended fiemap test utility From David: https://gist.github.com/anonymous/6172331 # dd if=/dev/urandom of=/ocfs2/test_file bs=1M count=1000 # ./fiemap /ocfs2/test_file 4096 10 start: 4096, length: 10 File /ocfs2/test_file has 0 extents: # Logical Physical Length Flags ^^^^^ <-- No extent is shown In this case, at ocfs2_fiemap(): cpos == mapping_end == 1. Hence the loop of searching extent records was not executed at all. This patch remove the in question 'mapping_end -= cpos', and loops until the cpos is larger than the mapping_end as usual. # ./fiemap /ocfs2/test_file 4096 10 start: 4096, length: 10 File /ocfs2/test_file has 1 extents: # Logical Physical Length Flags 0: 0000000000000000 0000000056a01000 0000000006a00000 0000 Signed-off-by: Jie Liu Reported-by: David Weber Tested-by: David Weber Cc: Sunil Mushran Cc: Mark Fashen Cc: Joel Becker Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- fs/ocfs2/extent_map.c | 1 - 1 file changed, 1 deletion(-) diff --git a/fs/ocfs2/extent_map.c b/fs/ocfs2/extent_map.c index 2487116d0d33..846064726682 100644 --- a/fs/ocfs2/extent_map.c +++ b/fs/ocfs2/extent_map.c @@ -781,7 +781,6 @@ int ocfs2_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, cpos = map_start >> osb->s_clustersize_bits; mapping_end = ocfs2_clusters_for_bytes(inode->i_sb, map_start + map_len); - mapping_end -= cpos; is_last = 0; while (cpos < mapping_end && !is_last) { u32 fe_flags; From d96fa17975d6b1d1dd1264170ccdfabb254f71c2 Mon Sep 17 00:00:00 2001 From: Greg Thelen Date: Wed, 11 Sep 2013 14:23:08 -0700 Subject: [PATCH 0839/1368] memcg: fix multiple large threshold notifications commit 2bff24a3707093c435ab3241c47dcdb5f16e432b upstream. A memory cgroup with (1) multiple threshold notifications and (2) at least one threshold >=2G was not reliable. Specifically the notifications would either not fire or would not fire in the proper order. The __mem_cgroup_threshold() signaling logic depends on keeping 64 bit thresholds in sorted order. mem_cgroup_usage_register_event() sorts them with compare_thresholds(), which returns the difference of two 64 bit thresholds as an int. If the difference is positive but has bit[31] set, then sort() treats the difference as negative and breaks sort order. This fix compares the two arbitrary 64 bit thresholds returning the classic -1, 0, 1 result. The test below sets two notifications (at 0x1000 and 0x81001000): cd /sys/fs/cgroup/memory mkdir x for x in 4096 2164264960; do cgroup_event_listener x/memory.usage_in_bytes $x | sed "s/^/$x listener:/" & done echo $$ > x/cgroup.procs anon_leaker 500M v3.11-rc7 fails to signal the 4096 event listener: Leaking... Done leaking pages. Patched v3.11-rc7 properly notifies: Leaking... 4096 listener:2013:8:31:14:13:36 Done leaking pages. The fixed bug is old. It appears to date back to the introduction of memcg threshold notifications in v2.6.34-rc1-116-g2e72b6347c94 "memcg: implement memory thresholds" Signed-off-by: Greg Thelen Acked-by: Michal Hocko Acked-by: Kirill A. Shutemov Acked-by: Johannes Weiner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- mm/memcontrol.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 82a187aea4c0..905ce72c8c4e 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -5584,7 +5584,13 @@ static int compare_thresholds(const void *a, const void *b) const struct mem_cgroup_threshold *_a = a; const struct mem_cgroup_threshold *_b = b; - return _a->threshold - _b->threshold; + if (_a->threshold > _b->threshold) + return 1; + + if (_a->threshold < _b->threshold) + return -1; + + return 0; } static int mem_cgroup_oom_notify_cb(struct mem_cgroup *memcg) From 8b89ae8a4914ee393c0db530735d933481272e97 Mon Sep 17 00:00:00 2001 From: Libin Date: Wed, 11 Sep 2013 14:20:38 -0700 Subject: [PATCH 0840/1368] mm/huge_memory.c: fix potential NULL pointer dereference commit a8f531ebc33052642b4bd7b812eedf397108ce64 upstream. In collapse_huge_page() there is a race window between releasing the mmap_sem read lock and taking the mmap_sem write lock, so find_vma() may return NULL. So check the return value to avoid NULL pointer dereference. collapse_huge_page khugepaged_alloc_page up_read(&mm->mmap_sem) down_write(&mm->mmap_sem) vma = find_vma(mm, address) Signed-off-by: Libin Acked-by: Kirill A. Shutemov Reviewed-by: Wanpeng Li Reviewed-by: Michal Hocko Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- mm/huge_memory.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 362c329b83fe..b92d0ce428b1 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -2286,6 +2286,8 @@ static void collapse_huge_page(struct mm_struct *mm, goto out; vma = find_vma(mm, address); + if (!vma) + goto out; hstart = (vma->vm_start + ~HPAGE_PMD_MASK) & HPAGE_PMD_MASK; hend = vma->vm_end & HPAGE_PMD_MASK; if (address < hstart || address + HPAGE_PMD_SIZE > hend) From 1ca91545961a92067cb8ad3ebc1558c8d1574456 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Mon, 25 Mar 2013 19:57:10 -0700 Subject: [PATCH 0841/1368] proc: Restrict mounting the proc filesystem commit aee1c13dd0f6c2fc56e0e492b349ee8ac655880f upstream. Don't allow mounting the proc filesystem unless the caller has CAP_SYS_ADMIN rights over the pid namespace. The principle here is if you create or have capabilities over it you can mount it, otherwise you get to live with what other people have mounted. Andy pointed out that this is needed to prevent users in a user namespace from remounting proc and specifying different hidepid and gid options on already existing proc mounts. Reported-by: Andy Lutomirski Signed-off-by: "Eric W. Biederman" Signed-off-by: Greg Kroah-Hartman --- fs/proc/root.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/proc/root.c b/fs/proc/root.c index 41a6ea93f486..04ec276c7bab 100644 --- a/fs/proc/root.c +++ b/fs/proc/root.c @@ -110,7 +110,8 @@ static struct dentry *proc_mount(struct file_system_type *fs_type, ns = task_active_pid_ns(current); options = data; - if (!current_user_ns()->may_mount_proc) + if (!current_user_ns()->may_mount_proc || + !ns_capable(ns->user_ns, CAP_SYS_ADMIN)) return ERR_PTR(-EPERM); } From 4b5fe51a9a8e6d4c74a6f2717bf25e4132c551d3 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Thu, 25 Jul 2013 11:49:11 +0200 Subject: [PATCH 0842/1368] isofs: Refuse RW mount of the filesystem instead of making it RO commit 17b7f7cf58926844e1dd40f5eb5348d481deca6a upstream. Refuse RW mount of isofs filesystem. So far we just silently changed it to RO mount but when the media is writeable, block layer won't notice this change and thus will think device is used RW and will block eject button of the drive. That is unexpected by users because for non-writeable media eject button works just fine. Userspace mount(8) command handles this just fine and retries mounting with MS_RDONLY set so userspace shouldn't see any regression. Plus any tool mounting isofs is likely confronted with the case of read-only media where block layer already refuses to mount the filesystem without MS_RDONLY set so our behavior shouldn't be anything new for it. Reported-by: Hui Wang Signed-off-by: Jan Kara Signed-off-by: Greg Kroah-Hartman --- fs/isofs/inode.c | 16 +++++----------- 1 file changed, 5 insertions(+), 11 deletions(-) diff --git a/fs/isofs/inode.c b/fs/isofs/inode.c index d9b8aebdeb22..d3705490ff9c 100644 --- a/fs/isofs/inode.c +++ b/fs/isofs/inode.c @@ -125,8 +125,8 @@ static void destroy_inodecache(void) static int isofs_remount(struct super_block *sb, int *flags, char *data) { - /* we probably want a lot more here */ - *flags |= MS_RDONLY; + if (!(*flags & MS_RDONLY)) + return -EROFS; return 0; } @@ -779,15 +779,6 @@ static int isofs_fill_super(struct super_block *s, void *data, int silent) */ s->s_maxbytes = 0x80000000000LL; - /* - * The CDROM is read-only, has no nodes (devices) on it, and since - * all of the files appear to be owned by root, we really do not want - * to allow suid. (suid or devices will not show up unless we have - * Rock Ridge extensions) - */ - - s->s_flags |= MS_RDONLY /* | MS_NODEV | MS_NOSUID */; - /* Set this for reference. Its not currently used except on write which we don't have .. */ @@ -1546,6 +1537,9 @@ struct inode *isofs_iget(struct super_block *sb, static struct dentry *isofs_mount(struct file_system_type *fs_type, int flags, const char *dev_name, void *data) { + /* We don't support read-write mounts */ + if (!(flags & MS_RDONLY)) + return ERR_PTR(-EACCES); return mount_bdev(fs_type, flags, dev_name, data, isofs_fill_super); } From 34db3c078aab40f22846b76c1a2c960c2a378f40 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Tue, 23 Jul 2013 20:01:23 +0200 Subject: [PATCH 0843/1368] amd64_edac: Fix single-channel setups commit f0a56c480196a98479760862468cc95879df3de0 upstream. It can happen that configurations are running in a single-channel mode even with a dual-channel memory controller, by, say, putting the DIMMs only on the one channel and leaving the other empty. This causes a problem in init_csrows which implicitly assumes that when the second channel is enabled, i.e. channel 1, the struct dimm hierarchy will be present. Which is not. So always allocate two channels unconditionally. This provides for the nice side effect that the data structures are initialized so some day, when memory hotplug is supported, it should just work out of the box when all of a sudden a second channel appears. Reported-and-tested-by: Roger Leigh Signed-off-by: Borislav Petkov Signed-off-by: Greg Kroah-Hartman --- drivers/edac/amd64_edac.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c index 8b6a0343c220..8b3d90143514 100644 --- a/drivers/edac/amd64_edac.c +++ b/drivers/edac/amd64_edac.c @@ -2470,8 +2470,15 @@ static int amd64_init_one_instance(struct pci_dev *F2) layers[0].size = pvt->csels[0].b_cnt; layers[0].is_virt_csrow = true; layers[1].type = EDAC_MC_LAYER_CHANNEL; - layers[1].size = pvt->channel_count; + + /* + * Always allocate two channels since we can have setups with DIMMs on + * only one channel. Also, this simplifies handling later for the price + * of a couple of KBs tops. + */ + layers[1].size = 2; layers[1].is_virt_csrow = false; + mci = edac_mc_alloc(nid, ARRAY_SIZE(layers), layers, 0); if (!mci) goto err_siblings; From 579db19eca10b594acd9d4516814b599eb4486ec Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Mon, 12 Aug 2013 11:04:29 -0400 Subject: [PATCH 0844/1368] drm/edid: add quirk for Medion MD30217PG commit 118bdbd86b39dbb843155054021d2c59058f1e05 upstream. This LCD monitor (1280x1024 native) has a completely bogus detailed timing (640x350@70hz). User reports that 1280x1024@60 has waves so prefer 1280x1024@75. Manufacturer: MED Model: 7b8 Serial#: 99188 Year: 2005 Week: 5 EDID Version: 1.3 Analog Display Input, Input Voltage Level: 0.700/0.700 V Sync: Separate Max Image Size [cm]: horiz.: 34 vert.: 27 Gamma: 2.50 DPMS capabilities: Off; RGB/Color Display First detailed timing is preferred mode redX: 0.645 redY: 0.348 greenX: 0.280 greenY: 0.605 blueX: 0.142 blueY: 0.071 whiteX: 0.313 whiteY: 0.329 Supported established timings: 720x400@70Hz 640x480@60Hz 640x480@72Hz 640x480@75Hz 800x600@56Hz 800x600@60Hz 800x600@72Hz 800x600@75Hz 1024x768@60Hz 1024x768@70Hz 1024x768@75Hz 1280x1024@75Hz Manufacturer's mask: 0 Supported standard timings: Supported detailed timing: clock: 25.2 MHz Image Size: 337 x 270 mm h_active: 640 h_sync: 688 h_sync_end 784 h_blank_end 800 h_border: 0 v_active: 350 v_sync: 350 v_sync_end 352 v_blanking: 449 v_border: 0 Monitor name: MD30217PG Ranges: V min: 56 V max: 76 Hz, H min: 30 H max: 83 kHz, PixClock max 145 MHz Serial No: 501099188 EDID (in hex): 00ffffffffffff0034a4b80774830100 050f010368221b962a0c55a559479b24 125054afcf00310a0101010101018180 000000000000d60980a0205e63103060 0200510e1100001e000000fc004d4433 3032313750470a202020000000fd0038 4c1e530e000a202020202020000000ff 003530313039393138380a2020200078 Signed-off-by: Alex Deucher Reported-by: friedrich@mailstation.de Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/drm_edid.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/drm_edid.c b/drivers/gpu/drm/drm_edid.c index 9e62bbedb5ad..0cb9b5d8e30a 100644 --- a/drivers/gpu/drm/drm_edid.c +++ b/drivers/gpu/drm/drm_edid.c @@ -125,6 +125,9 @@ static struct edid_quirk { /* ViewSonic VA2026w */ { "VSC", 5020, EDID_QUIRK_FORCE_REDUCED_BLANKING }, + + /* Medion MD 30217 PG */ + { "MED", 0x7b8, EDID_QUIRK_PREFER_LARGE_75 }, }; /* From 4fdaa3d47985338f80d7262de700533a9e630d29 Mon Sep 17 00:00:00 2001 From: Richard Weinberger Date: Sat, 17 Aug 2013 18:46:00 +0200 Subject: [PATCH 0845/1368] um: Implement probe_kernel_read() commit f75b1b1bedfb498cc43a992ce4d7ed8df3b1e770 upstream. UML needs it's own probe_kernel_read() to handle kernel mode faults correctly. The implementation uses mincore() on the host side to detect whether a page is owned by the UML kernel process. This fixes also a possible crash when sysrq-t is used. Starting with 3.10 sysrq-t calls probe_kernel_read() to read details from the kernel workers. As kernel worker are completely async pointers may turn NULL while reading them. Signed-off-by: Richard Weinberger Cc: Cc: Signed-off-by: Greg Kroah-Hartman --- arch/um/include/shared/os.h | 1 + arch/um/kernel/Makefile | 2 +- arch/um/kernel/maccess.c | 24 +++++++++++++++++ arch/um/os-Linux/process.c | 52 +++++++++++++++++++++++++++++++++++++ 4 files changed, 78 insertions(+), 1 deletion(-) create mode 100644 arch/um/kernel/maccess.c diff --git a/arch/um/include/shared/os.h b/arch/um/include/shared/os.h index 95feaa47a2fb..c70a234a3f8c 100644 --- a/arch/um/include/shared/os.h +++ b/arch/um/include/shared/os.h @@ -200,6 +200,7 @@ extern int os_unmap_memory(void *addr, int len); extern int os_drop_memory(void *addr, int length); extern int can_drop_memory(void); extern void os_flush_stdout(void); +extern int os_mincore(void *addr, unsigned long len); /* execvp.c */ extern int execvp_noalloc(char *buf, const char *file, char *const argv[]); diff --git a/arch/um/kernel/Makefile b/arch/um/kernel/Makefile index babe21826e3e..d8b78a03855c 100644 --- a/arch/um/kernel/Makefile +++ b/arch/um/kernel/Makefile @@ -13,7 +13,7 @@ clean-files := obj-y = config.o exec.o exitcode.o irq.o ksyms.o mem.o \ physmem.o process.o ptrace.o reboot.o sigio.o \ signal.o smp.o syscall.o sysrq.o time.o tlb.o trap.o \ - um_arch.o umid.o skas/ + um_arch.o umid.o maccess.o skas/ obj-$(CONFIG_BLK_DEV_INITRD) += initrd.o obj-$(CONFIG_GPROF) += gprof_syms.o diff --git a/arch/um/kernel/maccess.c b/arch/um/kernel/maccess.c new file mode 100644 index 000000000000..1f3d5c4910d1 --- /dev/null +++ b/arch/um/kernel/maccess.c @@ -0,0 +1,24 @@ +/* + * Copyright (C) 2013 Richard Weinberger + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include + +long probe_kernel_read(void *dst, const void *src, size_t size) +{ + void *psrc = (void *)rounddown((unsigned long)src, PAGE_SIZE); + + if ((unsigned long)src < PAGE_SIZE || size <= 0) + return -EFAULT; + + if (os_mincore(psrc, size + src - psrc) <= 0) + return -EFAULT; + + return __probe_kernel_read(dst, src, size); +} diff --git a/arch/um/os-Linux/process.c b/arch/um/os-Linux/process.c index b8f34c9e53ae..67b9c8f5a89e 100644 --- a/arch/um/os-Linux/process.c +++ b/arch/um/os-Linux/process.c @@ -4,6 +4,7 @@ */ #include +#include #include #include #include @@ -232,6 +233,57 @@ int __init can_drop_memory(void) return ok; } +static int os_page_mincore(void *addr) +{ + char vec[2]; + int ret; + + ret = mincore(addr, UM_KERN_PAGE_SIZE, vec); + if (ret < 0) { + if (errno == ENOMEM || errno == EINVAL) + return 0; + else + return -errno; + } + + return vec[0] & 1; +} + +int os_mincore(void *addr, unsigned long len) +{ + char *vec; + int ret, i; + + if (len <= UM_KERN_PAGE_SIZE) + return os_page_mincore(addr); + + vec = calloc(1, (len + UM_KERN_PAGE_SIZE - 1) / UM_KERN_PAGE_SIZE); + if (!vec) + return -ENOMEM; + + ret = mincore(addr, UM_KERN_PAGE_SIZE, vec); + if (ret < 0) { + if (errno == ENOMEM || errno == EINVAL) + ret = 0; + else + ret = -errno; + + goto out; + } + + for (i = 0; i < ((len + UM_KERN_PAGE_SIZE - 1) / UM_KERN_PAGE_SIZE); i++) { + if (!(vec[i] & 1)) { + ret = 0; + goto out; + } + } + + ret = 1; +out: + free(vec); + return ret; +} + void init_new_thread_signals(void) { set_handler(SIGSEGV); From 2ab0ad6af3a87818ea3525914be6779fca833801 Mon Sep 17 00:00:00 2001 From: majianpeng Date: Tue, 16 Jul 2013 15:45:48 +0800 Subject: [PATCH 0846/1368] libceph: unregister request in __map_request failed and nofail == false commit 73d9f7eef3d98c3920e144797cc1894c6b005a1e upstream. For nofail == false request, if __map_request failed, the caller does cleanup work, like releasing the relative pages. It doesn't make any sense to retry this request. Signed-off-by: Jianpeng Ma Reviewed-by: Sage Weil Signed-off-by: Greg Kroah-Hartman --- net/ceph/osd_client.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c index 3a246a6cab47..46ec7672cccc 100644 --- a/net/ceph/osd_client.c +++ b/net/ceph/osd_client.c @@ -2130,6 +2130,8 @@ int ceph_osdc_start_request(struct ceph_osd_client *osdc, dout("osdc_start_request failed map, " " will retry %lld\n", req->r_tid); rc = 0; + } else { + __unregister_request(osdc, req); } goto out_unlock; } From fd5e2dea537bbf0bfb09f79a8b34c148bb502735 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Wed, 28 Aug 2013 17:17:29 -0700 Subject: [PATCH 0847/1368] libceph: use pg_num_mask instead of pgp_num_mask for pg.seed calc commit 9542cf0bf9b1a3adcc2ef271edbcbdba03abf345 upstream. Fix a typo that used the wrong bitmask for the pg.seed calculation. This is normally unnoticed because in most cases pg_num == pgp_num. It is, however, a bug that is easily corrected. Signed-off-by: Sage Weil Reviewed-by: Alex Elder Signed-off-by: Greg Kroah-Hartman --- net/ceph/osdmap.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ceph/osdmap.c b/net/ceph/osdmap.c index 603ddd92db19..dbd9a4792427 100644 --- a/net/ceph/osdmap.c +++ b/net/ceph/osdmap.c @@ -1129,7 +1129,7 @@ static int *calc_pg_raw(struct ceph_osdmap *osdmap, struct ceph_pg pgid, /* pg_temp? */ pgid.seed = ceph_stable_mod(pgid.seed, pool->pg_num, - pool->pgp_num_mask); + pool->pg_num_mask); pg = __lookup_pg_mapping(&osdmap->pg_temp, pgid); if (pg) { *num = pg->len; From 4f8e2fc10e249dcae05898f4807d9cc450b070bb Mon Sep 17 00:00:00 2001 From: majianpeng Date: Tue, 16 Jul 2013 19:36:21 +0800 Subject: [PATCH 0848/1368] ceph: Don't forget the 'up_read(&osdc->map_sem)' if met error. commit 494ddd11be3e2621096bb425eed2886f8e8446d4 upstream. Signed-off-by: Jianpeng Ma Reviewed-by: Sage Weil Signed-off-by: Greg Kroah-Hartman --- fs/ceph/ioctl.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/ceph/ioctl.c b/fs/ceph/ioctl.c index e0b4ef31d3c8..a5ce62eb7806 100644 --- a/fs/ceph/ioctl.c +++ b/fs/ceph/ioctl.c @@ -196,8 +196,10 @@ static long ceph_ioctl_get_dataloc(struct file *file, void __user *arg) r = ceph_calc_file_object_mapping(&ci->i_layout, dl.file_offset, len, &dl.object_no, &dl.object_offset, &olen); - if (r < 0) + if (r < 0) { + up_read(&osdc->map_sem); return -EIO; + } dl.file_offset -= dl.object_offset; dl.object_size = ceph_file_layout_object_size(ci->i_layout); dl.block_size = ceph_file_layout_su(ci->i_layout); From be4c4b85002b3c20773c2c3d3e997bd1aedc1453 Mon Sep 17 00:00:00 2001 From: Josh Durgin Date: Mon, 26 Aug 2013 17:55:38 -0700 Subject: [PATCH 0849/1368] rbd: fix I/O error propagation for reads commit 17c1cc1d9293a568a00545469078e29555cc7f39 upstream. When a request returns an error, the driver needs to report the entire extent of the request as completed. Writes already did this, since they always set xferred = length, but reads were skipping that step if an error other than -ENOENT occurred. Instead, rbd would end up passing 0 xferred to blk_end_request(), which would always report needing more data. This resulted in an assert failing when more data was required by the block layer, but all the object requests were done: [ 1868.719077] rbd: obj_request read result -108 xferred 0 [ 1868.719077] [ 1868.719518] end_request: I/O error, dev rbd1, sector 0 [ 1868.719739] [ 1868.719739] Assertion failure in rbd_img_obj_callback() at line 1736: [ 1868.719739] [ 1868.719739] rbd_assert(more ^ (which == img_request->obj_request_count)); Without this assert, reads that hit errors would hang forever, since the block layer considered them incomplete. Fixes: http://tracker.ceph.com/issues/5647 Signed-off-by: Josh Durgin Reviewed-by: Alex Elder Signed-off-by: Greg Kroah-Hartman --- drivers/block/rbd.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index aff789d6fccd..8c7421af8f15 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c @@ -1565,11 +1565,12 @@ rbd_img_obj_request_read_callback(struct rbd_obj_request *obj_request) obj_request, obj_request->img_request, obj_request->result, xferred, length); /* - * ENOENT means a hole in the image. We zero-fill the - * entire length of the request. A short read also implies - * zero-fill to the end of the request. Either way we - * update the xferred count to indicate the whole request - * was satisfied. + * ENOENT means a hole in the image. We zero-fill the entire + * length of the request. A short read also implies zero-fill + * to the end of the request. An error requires the whole + * length of the request to be reported finished with an error + * to the block layer. In each case we update the xferred + * count to indicate the whole request was satisfied. */ rbd_assert(obj_request->type != OBJ_REQUEST_NODATA); if (obj_request->result == -ENOENT) { @@ -1578,14 +1579,13 @@ rbd_img_obj_request_read_callback(struct rbd_obj_request *obj_request) else zero_pages(obj_request->pages, 0, length); obj_request->result = 0; - obj_request->xferred = length; } else if (xferred < length && !obj_request->result) { if (obj_request->type == OBJ_REQUEST_BIO) zero_bio_chain(obj_request->bio_list, xferred); else zero_pages(obj_request->pages, xferred, length); - obj_request->xferred = length; } + obj_request->xferred = length; obj_request_done_set(obj_request); } From c0da08882ef2c738edacd5e7fbca671e663f9951 Mon Sep 17 00:00:00 2001 From: Sergei Shtylyov Date: Sat, 24 Aug 2013 23:38:15 -0400 Subject: [PATCH 0850/1368] mmc: tmio_mmc_dma: fix PIO fallback on SDHI commit f936f9b67b7f8c2eae01dd303a0e90bd777c4679 upstream. I'm testing SH-Mobile SDHI driver in DMA mode with a new DMA controller using 'bonnie++' and getting DMA error after which the tmio_mmc_dma.c code falls back to PIO but all commands time out after that. It turned out that the fallback code calls tmio_mmc_enable_dma() with RX/TX channels already freed and pointers to them cleared, so that the function bails out early instead of clearing the DMA bit in the CTL_DMA_ENABLE register. The regression was introduced by commit 162f43e31c5a376ec16336e5d0ac973373d54c89 (mmc: tmio: fix a deadlock). Moving tmio_mmc_enable_dma() calls to the top of the PIO fallback code in tmio_mmc_start_dma_{rx|tx}() helps. Signed-off-by: Sergei Shtylyov Acked-by: Guennadi Liakhovetski Signed-off-by: Chris Ball Signed-off-by: Greg Kroah-Hartman --- drivers/mmc/host/tmio_mmc_dma.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/mmc/host/tmio_mmc_dma.c b/drivers/mmc/host/tmio_mmc_dma.c index fff928604859..491e9ecc92c2 100644 --- a/drivers/mmc/host/tmio_mmc_dma.c +++ b/drivers/mmc/host/tmio_mmc_dma.c @@ -104,6 +104,7 @@ static void tmio_mmc_start_dma_rx(struct tmio_mmc_host *host) pio: if (!desc) { /* DMA failed, fall back to PIO */ + tmio_mmc_enable_dma(host, false); if (ret >= 0) ret = -EIO; host->chan_rx = NULL; @@ -116,7 +117,6 @@ static void tmio_mmc_start_dma_rx(struct tmio_mmc_host *host) } dev_warn(&host->pdev->dev, "DMA failed: %d, falling back to PIO\n", ret); - tmio_mmc_enable_dma(host, false); } dev_dbg(&host->pdev->dev, "%s(): desc %p, cookie %d, sg[%d]\n", __func__, @@ -185,6 +185,7 @@ static void tmio_mmc_start_dma_tx(struct tmio_mmc_host *host) pio: if (!desc) { /* DMA failed, fall back to PIO */ + tmio_mmc_enable_dma(host, false); if (ret >= 0) ret = -EIO; host->chan_tx = NULL; @@ -197,7 +198,6 @@ static void tmio_mmc_start_dma_tx(struct tmio_mmc_host *host) } dev_warn(&host->pdev->dev, "DMA failed: %d, falling back to PIO\n", ret); - tmio_mmc_enable_dma(host, false); } dev_dbg(&host->pdev->dev, "%s(): desc %p, cookie %d\n", __func__, From 6830e9ab4b67cee41edd609fb7ebe14c66fca434 Mon Sep 17 00:00:00 2001 From: Grant Likely Date: Wed, 28 Aug 2013 21:24:17 +0100 Subject: [PATCH 0851/1368] of: Fix missing memory initialization on FDT unflattening commit 0640332e073be9207f0784df43595c0c39716e42 upstream. Any calls to dt_alloc() need to be zeroed. This is a temporary fix, but the allocation function itself needs to zero memory before returning it. This is a follow up to patch 9e4012752, "of: fdt: fix memory initialization for expanded DT" which fixed one call site but missed another. Signed-off-by: Grant Likely Acked-by: Wladislav Wiebe Signed-off-by: Greg Kroah-Hartman --- drivers/of/base.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/of/base.c b/drivers/of/base.c index a6f584a7f4a1..1d10b4ec6814 100644 --- a/drivers/of/base.c +++ b/drivers/of/base.c @@ -1629,6 +1629,7 @@ void of_alias_scan(void * (*dt_alloc)(u64 size, u64 align)) ap = dt_alloc(sizeof(*ap) + len + 1, 4); if (!ap) continue; + memset(ap, 0, sizeof(*ap) + len + 1); ap->alias = start; of_alias_add(ap, np, id, start, len); } From 497587f84f16a5775b91d0a1051930ae8997d02d Mon Sep 17 00:00:00 2001 From: Brian Norris Date: Thu, 18 Jul 2013 01:17:02 -0700 Subject: [PATCH 0852/1368] mtd: nand: fix NAND_BUSWIDTH_AUTO for x16 devices commit 68e8078072e802e77134664f11d2ffbfbd2f8fbe upstream. The code for NAND_BUSWIDTH_AUTO is broken. According to Alexander: "I have a problem with attach NAND UBI in 16 bit mode. NAND works fine if I specify NAND_BUSWIDTH_16 option, but not working with NAND_BUSWIDTH_AUTO option. In second case NAND chip is identifyed with ONFI." See his report for the rest of the details: http://lists.infradead.org/pipermail/linux-mtd/2013-July/047515.html Anyway, the problem is that nand_set_defaults() is called twice, we intend it to reset the chip functions to their x16 buswidth verions if the buswidth changed from x8 to x16; however, nand_set_defaults() does exactly nothing if called a second time. Fix this by hacking nand_set_defaults() to reset the buswidth-dependent functions if they were set to the x8 version the first time. Note that this does not do anything to reset from x16 to x8, but that's not the supported use case for NAND_BUSWIDTH_AUTO anyway. Signed-off-by: Brian Norris Reported-by: Alexander Shiyan Tested-by: Alexander Shiyan Cc: Matthieu Castet Signed-off-by: Artem Bityutskiy Signed-off-by: David Woodhouse Signed-off-by: Greg Kroah-Hartman --- drivers/mtd/nand/nand_base.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/mtd/nand/nand_base.c b/drivers/mtd/nand/nand_base.c index dfcd0a565c5b..fb8c4deaac27 100644 --- a/drivers/mtd/nand/nand_base.c +++ b/drivers/mtd/nand/nand_base.c @@ -2793,7 +2793,9 @@ static void nand_set_defaults(struct nand_chip *chip, int busw) if (!chip->select_chip) chip->select_chip = nand_select_chip; - if (!chip->read_byte) + + /* If called twice, pointers that depend on busw may need to be reset */ + if (!chip->read_byte || chip->read_byte == nand_read_byte) chip->read_byte = busw ? nand_read_byte16 : nand_read_byte; if (!chip->read_word) chip->read_word = nand_read_word; @@ -2801,9 +2803,9 @@ static void nand_set_defaults(struct nand_chip *chip, int busw) chip->block_bad = nand_block_bad; if (!chip->block_markbad) chip->block_markbad = nand_default_block_markbad; - if (!chip->write_buf) + if (!chip->write_buf || chip->write_buf == nand_write_buf) chip->write_buf = busw ? nand_write_buf16 : nand_write_buf; - if (!chip->read_buf) + if (!chip->read_buf || chip->read_buf == nand_read_buf) chip->read_buf = busw ? nand_read_buf16 : nand_read_buf; if (!chip->scan_bbt) chip->scan_bbt = nand_default_bbt; From d662986b4f282ed5d9e42e6dbd5e6dbb2b5e7d0b Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Thu, 29 Aug 2013 12:21:01 +0100 Subject: [PATCH 0853/1368] clk: wm831x: Initialise wm831x pointer on init commit 08442ce993deeb15a070c14cc3f3459e87d111e0 upstream. Otherwise any attempt to interact with the hardware will crash. This is what happens when drivers get written blind. Signed-off-by: Mark Brown Signed-off-by: Mike Turquette Signed-off-by: Greg Kroah-Hartman --- drivers/clk/clk-wm831x.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/clk/clk-wm831x.c b/drivers/clk/clk-wm831x.c index 16ed06808554..917a3ab482f9 100644 --- a/drivers/clk/clk-wm831x.c +++ b/drivers/clk/clk-wm831x.c @@ -360,6 +360,8 @@ static int wm831x_clk_probe(struct platform_device *pdev) if (!clkdata) return -ENOMEM; + clkdata->wm831x = wm831x; + /* XTAL_ENA can only be set via OTP/InstantConfig so just read once */ ret = wm831x_reg_read(wm831x, WM831X_CLOCK_CONTROL_2); if (ret < 0) { From 3002e63bec05766a5af794b09d9cec34fda60519 Mon Sep 17 00:00:00 2001 From: Maxim Patlasov Date: Mon, 12 Aug 2013 20:39:30 +0400 Subject: [PATCH 0854/1368] fuse: postpone end_page_writeback() in fuse_writepage_locked() commit 4a4ac4eba1010ef9a804569058ab29e3450c0315 upstream. The patch fixes a race between ftruncate(2), mmap-ed write and write(2): 1) An user makes a page dirty via mmap-ed write. 2) The user performs shrinking truncate(2) intended to purge the page. 3) Before fuse_do_setattr calls truncate_pagecache, the page goes to writeback. fuse_writepage_locked fills FUSE_WRITE request and releases the original page by end_page_writeback. 4) fuse_do_setattr() completes and successfully returns. Since now, i_mutex is free. 5) Ordinary write(2) extends i_size back to cover the page. Note that fuse_send_write_pages do wait for fuse writeback, but for another page->index. 6) fuse_writepage_locked proceeds by queueing FUSE_WRITE request. fuse_send_writepage is supposed to crop inarg->size of the request, but it doesn't because i_size has already been extended back. Moving end_page_writeback to the end of fuse_writepage_locked fixes the race because now the fact that truncate_pagecache is successfully returned infers that fuse_writepage_locked has already called end_page_writeback. And this, in turn, infers that fuse_flush_writepages has already called fuse_send_writepage, and the latter used valid (shrunk) i_size. write(2) could not extend it because of i_mutex held by ftruncate(2). Signed-off-by: Maxim Patlasov Signed-off-by: Miklos Szeredi Signed-off-by: Greg Kroah-Hartman --- fs/fuse/file.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/fuse/file.c b/fs/fuse/file.c index 35f281033142..4943f2c9943d 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -1530,7 +1530,6 @@ static int fuse_writepage_locked(struct page *page) inc_bdi_stat(mapping->backing_dev_info, BDI_WRITEBACK); inc_zone_page_state(tmp_page, NR_WRITEBACK_TEMP); - end_page_writeback(page); spin_lock(&fc->lock); list_add(&req->writepages_entry, &fi->writepages); @@ -1538,6 +1537,8 @@ static int fuse_writepage_locked(struct page *page) fuse_flush_writepages(inode); spin_unlock(&fc->lock); + end_page_writeback(page); + return 0; err_free: From eb97a45d12c9517b7846ebe3c97ee554b777ad34 Mon Sep 17 00:00:00 2001 From: Anand Avati Date: Tue, 20 Aug 2013 02:21:07 -0400 Subject: [PATCH 0855/1368] fuse: invalidate inode attributes on xattr modification commit d331a415aef98717393dda0be69b7947da08eba3 upstream. Calls like setxattr and removexattr result in updation of ctime. Therefore invalidate inode attributes to force a refresh. Signed-off-by: Anand Avati Reviewed-by: Brian Foster Signed-off-by: Miklos Szeredi Signed-off-by: Greg Kroah-Hartman --- fs/fuse/dir.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index 5b1274699b08..e1536f9ae809 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c @@ -1753,6 +1753,8 @@ static int fuse_setxattr(struct dentry *entry, const char *name, fc->no_setxattr = 1; err = -EOPNOTSUPP; } + if (!err) + fuse_invalidate_attr(inode); return err; } @@ -1882,6 +1884,8 @@ static int fuse_removexattr(struct dentry *entry, const char *name) fc->no_removexattr = 1; err = -EOPNOTSUPP; } + if (!err) + fuse_invalidate_attr(inode); return err; } From 4e208303119d234347b1f4337b84d47306e73811 Mon Sep 17 00:00:00 2001 From: Maxim Patlasov Date: Fri, 30 Aug 2013 17:06:04 +0400 Subject: [PATCH 0856/1368] fuse: hotfix truncate_pagecache() issue commit 06a7c3c2781409af95000c60a5df743fd4e2f8b4 upstream. The way how fuse calls truncate_pagecache() from fuse_change_attributes() is completely wrong. Because, w/o i_mutex held, we never sure whether 'oldsize' and 'attr->size' are valid by the time of execution of truncate_pagecache(inode, oldsize, attr->size). In fact, as soon as we released fc->lock in the middle of fuse_change_attributes(), we completely loose control of actions which may happen with given inode until we reach truncate_pagecache. The list of potentially dangerous actions includes mmap-ed reads and writes, ftruncate(2) and write(2) extending file size. The typical outcome of doing truncate_pagecache() with outdated arguments is data corruption from user point of view. This is (in some sense) acceptable in cases when the issue is triggered by a change of the file on the server (i.e. externally wrt fuse operation), but it is absolutely intolerable in scenarios when a single fuse client modifies a file without any external intervention. A real life case I discovered by fsx-linux looked like this: 1. Shrinking ftruncate(2) comes to fuse_do_setattr(). The latter sends FUSE_SETATTR to the server synchronously, but before getting fc->lock ... 2. fuse_dentry_revalidate() is asynchronously called. It sends FUSE_LOOKUP to the server synchronously, then calls fuse_change_attributes(). The latter updates i_size, releases fc->lock, but before comparing oldsize vs attr->size.. 3. fuse_do_setattr() from the first step proceeds by acquiring fc->lock and updating attributes and i_size, but now oldsize is equal to outarg.attr.size because i_size has just been updated (step 2). Hence, fuse_do_setattr() returns w/o calling truncate_pagecache(). 4. As soon as ftruncate(2) completes, the user extends file size by write(2) making a hole in the middle of file, then reads data from the hole either by read(2) or mmap-ed read. The user expects to get zero data from the hole, but gets stale data because truncate_pagecache() is not executed yet. The scenario above illustrates one side of the problem: not truncating the page cache even though we should. Another side corresponds to truncating page cache too late, when the state of inode changed significantly. Theoretically, the following is possible: 1. As in the previous scenario fuse_dentry_revalidate() discovered that i_size changed (due to our own fuse_do_setattr()) and is going to call truncate_pagecache() for some 'new_size' it believes valid right now. But by the time that particular truncate_pagecache() is called ... 2. fuse_do_setattr() returns (either having called truncate_pagecache() or not -- it doesn't matter). 3. The file is extended either by write(2) or ftruncate(2) or fallocate(2). 4. mmap-ed write makes a page in the extended region dirty. The result will be the lost of data user wrote on the fourth step. The patch is a hotfix resolving the issue in a simplistic way: let's skip dangerous i_size update and truncate_pagecache if an operation changing file size is in progress. This simplistic approach looks correct for the cases w/o external changes. And to handle them properly, more sophisticated and intrusive techniques (e.g. NFS-like one) would be required. I'd like to postpone it until the issue is well discussed on the mailing list(s). Changed in v2: - improved patch description to cover both sides of the issue. Signed-off-by: Maxim Patlasov Signed-off-by: Miklos Szeredi Signed-off-by: Greg Kroah-Hartman --- fs/fuse/dir.c | 7 ++++++- fs/fuse/file.c | 8 +++++++- fs/fuse/fuse_i.h | 2 ++ fs/fuse/inode.c | 3 ++- 4 files changed, 17 insertions(+), 3 deletions(-) diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index e1536f9ae809..a30c60d5ce4f 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c @@ -1594,6 +1594,7 @@ int fuse_do_setattr(struct inode *inode, struct iattr *attr, struct file *file) { struct fuse_conn *fc = get_fuse_conn(inode); + struct fuse_inode *fi = get_fuse_inode(inode); struct fuse_req *req; struct fuse_setattr_in inarg; struct fuse_attr_out outarg; @@ -1621,8 +1622,10 @@ int fuse_do_setattr(struct inode *inode, struct iattr *attr, if (IS_ERR(req)) return PTR_ERR(req); - if (is_truncate) + if (is_truncate) { fuse_set_nowrite(inode); + set_bit(FUSE_I_SIZE_UNSTABLE, &fi->state); + } memset(&inarg, 0, sizeof(inarg)); memset(&outarg, 0, sizeof(outarg)); @@ -1684,12 +1687,14 @@ int fuse_do_setattr(struct inode *inode, struct iattr *attr, invalidate_inode_pages2(inode->i_mapping); } + clear_bit(FUSE_I_SIZE_UNSTABLE, &fi->state); return 0; error: if (is_truncate) fuse_release_nowrite(inode); + clear_bit(FUSE_I_SIZE_UNSTABLE, &fi->state); return err; } diff --git a/fs/fuse/file.c b/fs/fuse/file.c index 4943f2c9943d..473e8453a7df 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -630,7 +630,8 @@ static void fuse_read_update_size(struct inode *inode, loff_t size, struct fuse_inode *fi = get_fuse_inode(inode); spin_lock(&fc->lock); - if (attr_ver == fi->attr_version && size < inode->i_size) { + if (attr_ver == fi->attr_version && size < inode->i_size && + !test_bit(FUSE_I_SIZE_UNSTABLE, &fi->state)) { fi->attr_version = ++fc->attr_version; i_size_write(inode, size); } @@ -1033,12 +1034,16 @@ static ssize_t fuse_perform_write(struct file *file, { struct inode *inode = mapping->host; struct fuse_conn *fc = get_fuse_conn(inode); + struct fuse_inode *fi = get_fuse_inode(inode); int err = 0; ssize_t res = 0; if (is_bad_inode(inode)) return -EIO; + if (inode->i_size < pos + iov_iter_count(ii)) + set_bit(FUSE_I_SIZE_UNSTABLE, &fi->state); + do { struct fuse_req *req; ssize_t count; @@ -1074,6 +1079,7 @@ static ssize_t fuse_perform_write(struct file *file, if (res > 0) fuse_write_update_size(inode, pos); + clear_bit(FUSE_I_SIZE_UNSTABLE, &fi->state); fuse_invalidate_attr(inode); return res > 0 ? res : err; diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index fde7249a3a96..5ced199b50bb 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h @@ -115,6 +115,8 @@ struct fuse_inode { enum { /** Advise readdirplus */ FUSE_I_ADVISE_RDPLUS, + /** An operation changing file size is in progress */ + FUSE_I_SIZE_UNSTABLE, }; struct fuse_conn; diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index 9a0cdde14a08..b5718516825b 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -201,7 +201,8 @@ void fuse_change_attributes(struct inode *inode, struct fuse_attr *attr, struct timespec old_mtime; spin_lock(&fc->lock); - if (attr_version != 0 && fi->attr_version > attr_version) { + if ((attr_version != 0 && fi->attr_version > attr_version) || + test_bit(FUSE_I_SIZE_UNSTABLE, &fi->state)) { spin_unlock(&fc->lock); return; } From e5362560a01dd6fac5922434e16b16a1accea504 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Tue, 3 Sep 2013 14:28:38 +0200 Subject: [PATCH 0857/1368] fuse: readdir: check for slash in names commit efeb9e60d48f7778fdcad4a0f3ad9ea9b19e5dfd upstream. Userspace can add names containing a slash character to the directory listing. Don't allow this as it could cause all sorts of trouble. Signed-off-by: Miklos Szeredi Signed-off-by: Greg Kroah-Hartman --- fs/fuse/dir.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index a30c60d5ce4f..e67b13de2ebc 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c @@ -1175,6 +1175,8 @@ static int parse_dirfile(char *buf, size_t nbytes, struct file *file, return -EIO; if (reclen > nbytes) break; + if (memchr(dirent->name, '/', dirent->namelen) != NULL) + return -EIO; over = filldir(dstbuf, dirent->name, dirent->namelen, file->f_pos, dirent->ino, dirent->type); @@ -1323,6 +1325,8 @@ static int parse_dirplusfile(char *buf, size_t nbytes, struct file *file, return -EIO; if (reclen > nbytes) break; + if (memchr(dirent->name, '/', dirent->namelen) != NULL) + return -EIO; if (!over) { /* We fill entries into dstbuf only as much as From cff43fc8785eb4f8b3fa5a1030890fd8cad0cbdd Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Thu, 26 Sep 2013 17:18:49 -0700 Subject: [PATCH 0858/1368] Linux 3.10.13 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index afe001e3b2d6..25d38b79b3e0 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,6 @@ VERSION = 3 PATCHLEVEL = 10 -SUBLEVEL = 12 +SUBLEVEL = 13 EXTRAVERSION = NAME = TOSSUG Baby Fish From 20aab0887ba6b06584fea3a8b46268b5662a1149 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Thu, 24 Oct 2013 20:30:14 +0100 Subject: [PATCH 0859/1368] arm64: unify smp_psci.c and psci.c The functions in psci.c are only used from smp_psci.c, and smp_psci cannot function without psci.c. Additionally psci.c is built when !SMP, where it's expected that cpu_suspend may be useful. This patch unifies the two files, removing pointless duplication and paving the way for PSCI support in UP systems. Signed-off-by: Mark Rutland Signed-off-by: Catalin Marinas (cherry picked from commit 00ef54bb97389bfe8894272f48496f4191aae946) Signed-off-by: Mark Brown --- arch/arm64/include/asm/psci.h | 19 ------------ arch/arm64/kernel/Makefile | 2 +- arch/arm64/kernel/psci.c | 54 ++++++++++++++++++++++++++++++++++- arch/arm64/kernel/smp_psci.c | 53 ---------------------------------- 4 files changed, 54 insertions(+), 74 deletions(-) delete mode 100644 arch/arm64/kernel/smp_psci.c diff --git a/arch/arm64/include/asm/psci.h b/arch/arm64/include/asm/psci.h index 0604237ecd99..e5312ea0ec1a 100644 --- a/arch/arm64/include/asm/psci.h +++ b/arch/arm64/include/asm/psci.h @@ -14,25 +14,6 @@ #ifndef __ASM_PSCI_H #define __ASM_PSCI_H -#define PSCI_POWER_STATE_TYPE_STANDBY 0 -#define PSCI_POWER_STATE_TYPE_POWER_DOWN 1 - -struct psci_power_state { - u16 id; - u8 type; - u8 affinity_level; -}; - -struct psci_operations { - int (*cpu_suspend)(struct psci_power_state state, - unsigned long entry_point); - int (*cpu_off)(struct psci_power_state state); - int (*cpu_on)(unsigned long cpuid, unsigned long entry_point); - int (*migrate)(unsigned long cpuid); -}; - -extern struct psci_operations psci_ops; - int psci_init(void); #endif /* __ASM_PSCI_H */ diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile index 7b4b564961d4..75a90c15b30b 100644 --- a/arch/arm64/kernel/Makefile +++ b/arch/arm64/kernel/Makefile @@ -14,7 +14,7 @@ arm64-obj-y := cputable.o debug-monitors.o entry.o irq.o fpsimd.o \ arm64-obj-$(CONFIG_COMPAT) += sys32.o kuser32.o signal32.o \ sys_compat.o arm64-obj-$(CONFIG_MODULES) += arm64ksyms.o module.o -arm64-obj-$(CONFIG_SMP) += smp.o smp_spin_table.o smp_psci.o +arm64-obj-$(CONFIG_SMP) += smp.o smp_spin_table.o arm64-obj-$(CONFIG_HW_PERF_EVENTS) += perf_event.o arm64-obj-$(CONFIG_HAVE_HW_BREAKPOINT)+= hw_breakpoint.o arm64-obj-$(CONFIG_EARLY_PRINTK) += early_printk.o diff --git a/arch/arm64/kernel/psci.c b/arch/arm64/kernel/psci.c index 14f73c445ff5..368b78788cb5 100644 --- a/arch/arm64/kernel/psci.c +++ b/arch/arm64/kernel/psci.c @@ -17,12 +17,31 @@ #include #include +#include #include #include #include +#include -struct psci_operations psci_ops; +#define PSCI_POWER_STATE_TYPE_STANDBY 0 +#define PSCI_POWER_STATE_TYPE_POWER_DOWN 1 + +struct psci_power_state { + u16 id; + u8 type; + u8 affinity_level; +}; + +struct psci_operations { + int (*cpu_suspend)(struct psci_power_state state, + unsigned long entry_point); + int (*cpu_off)(struct psci_power_state state); + int (*cpu_on)(unsigned long cpuid, unsigned long entry_point); + int (*migrate)(unsigned long cpuid); +}; + +static struct psci_operations psci_ops; static int (*invoke_psci_fn)(u64, u64, u64, u64); @@ -209,3 +228,36 @@ int __init psci_init(void) of_node_put(np); return err; } + +#ifdef CONFIG_SMP + +static int __init smp_psci_init_cpu(struct device_node *dn, int cpu) +{ + return 0; +} + +static int __init smp_psci_prepare_cpu(int cpu) +{ + int err; + + if (!psci_ops.cpu_on) { + pr_err("no cpu_on method, not booting CPU%d\n", cpu); + return -ENODEV; + } + + err = psci_ops.cpu_on(cpu_logical_map(cpu), __pa(secondary_holding_pen)); + if (err) { + pr_err("failed to boot CPU%d (%d)\n", cpu, err); + return err; + } + + return 0; +} + +const struct smp_enable_ops smp_psci_ops __initconst = { + .name = "psci", + .init_cpu = smp_psci_init_cpu, + .prepare_cpu = smp_psci_prepare_cpu, +}; + +#endif diff --git a/arch/arm64/kernel/smp_psci.c b/arch/arm64/kernel/smp_psci.c deleted file mode 100644 index 0c533301be77..000000000000 --- a/arch/arm64/kernel/smp_psci.c +++ /dev/null @@ -1,53 +0,0 @@ -/* - * PSCI SMP initialisation - * - * Copyright (C) 2013 ARM Ltd. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ - -#include -#include -#include - -#include -#include - -static int __init smp_psci_init_cpu(struct device_node *dn, int cpu) -{ - return 0; -} - -static int __init smp_psci_prepare_cpu(int cpu) -{ - int err; - - if (!psci_ops.cpu_on) { - pr_err("psci: no cpu_on method, not booting CPU%d\n", cpu); - return -ENODEV; - } - - err = psci_ops.cpu_on(cpu_logical_map(cpu), __pa(secondary_holding_pen)); - if (err) { - pr_err("psci: failed to boot CPU%d (%d)\n", cpu, err); - return err; - } - - return 0; -} - -const struct smp_enable_ops smp_psci_ops __initconst = { - .name = "psci", - .init_cpu = smp_psci_init_cpu, - .prepare_cpu = smp_psci_prepare_cpu, -}; From eee896b4ad47791576e0becaffe6a7b5e73b1bab Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Thu, 24 Oct 2013 20:30:15 +0100 Subject: [PATCH 0860/1368] arm64: reorganise smp_enable_ops For hotplug support, we're going to want a place to store operations that do more than bring CPUs online, and it makes sense to group these with our current smp_enable_ops. For cpuidle support, we'll want to group additional functions, and we may want them even for UP kernels. This patch renames smp_enable_ops to the more general cpu_operations, and pulls the definitions out of smp code such that they can be used in UP kernels. While we're at it, fix up instances of the cpu parameter to be an unsigned int, drop the init markings and rename the *_cpu functions to cpu_* to reduce future churn when cpu_operations is extended. Signed-off-by: Mark Rutland Signed-off-by: Catalin Marinas (cherry picked from commit cd1aebf5277a3a154a9e4c0ea4b3acabb62e5cab) Signed-off-by: Mark Brown --- arch/arm64/include/asm/cpu_ops.h | 33 +++++++++++++++++++++ arch/arm64/include/asm/smp.h | 11 ------- arch/arm64/kernel/Makefile | 2 +- arch/arm64/kernel/cpu_ops.c | 47 ++++++++++++++++++++++++++++++ arch/arm64/kernel/psci.c | 11 +++---- arch/arm64/kernel/smp.c | 39 ++++++------------------- arch/arm64/kernel/smp_spin_table.c | 11 +++---- 7 files changed, 102 insertions(+), 52 deletions(-) create mode 100644 arch/arm64/include/asm/cpu_ops.h create mode 100644 arch/arm64/kernel/cpu_ops.c diff --git a/arch/arm64/include/asm/cpu_ops.h b/arch/arm64/include/asm/cpu_ops.h new file mode 100644 index 000000000000..3c60c8d1d928 --- /dev/null +++ b/arch/arm64/include/asm/cpu_ops.h @@ -0,0 +1,33 @@ +/* + * Copyright (C) 2013 ARM Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ +#ifndef __ASM_CPU_OPS_H +#define __ASM_CPU_OPS_H + +#include +#include + +struct device_node; + +struct cpu_operations { + const char *name; + int (*cpu_init)(struct device_node *, unsigned int); + int (*cpu_prepare)(unsigned int); +}; + +extern const struct cpu_operations *cpu_ops[NR_CPUS]; +extern const struct cpu_operations * __init cpu_get_ops(const char *name); + +#endif /* ifndef __ASM_CPU_OPS_H */ diff --git a/arch/arm64/include/asm/smp.h b/arch/arm64/include/asm/smp.h index 4b8023c5d146..7e34295f78e3 100644 --- a/arch/arm64/include/asm/smp.h +++ b/arch/arm64/include/asm/smp.h @@ -66,15 +66,4 @@ extern volatile unsigned long secondary_holding_pen_release; extern void arch_send_call_function_single_ipi(int cpu); extern void arch_send_call_function_ipi_mask(const struct cpumask *mask); -struct device_node; - -struct smp_enable_ops { - const char *name; - int (*init_cpu)(struct device_node *, int); - int (*prepare_cpu)(int); -}; - -extern const struct smp_enable_ops smp_spin_table_ops; -extern const struct smp_enable_ops smp_psci_ops; - #endif /* ifndef __ASM_SMP_H */ diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile index 75a90c15b30b..5ba2fd43a75b 100644 --- a/arch/arm64/kernel/Makefile +++ b/arch/arm64/kernel/Makefile @@ -9,7 +9,7 @@ AFLAGS_head.o := -DTEXT_OFFSET=$(TEXT_OFFSET) arm64-obj-y := cputable.o debug-monitors.o entry.o irq.o fpsimd.o \ entry-fpsimd.o process.o ptrace.o setup.o signal.o \ sys.o stacktrace.o time.o traps.o io.o vdso.o \ - hyp-stub.o psci.o + hyp-stub.o psci.o cpu_ops.o arm64-obj-$(CONFIG_COMPAT) += sys32.o kuser32.o signal32.o \ sys_compat.o diff --git a/arch/arm64/kernel/cpu_ops.c b/arch/arm64/kernel/cpu_ops.c new file mode 100644 index 000000000000..e2652aadd9ea --- /dev/null +++ b/arch/arm64/kernel/cpu_ops.c @@ -0,0 +1,47 @@ +/* + * CPU kernel entry/exit control + * + * Copyright (C) 2013 ARM Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#include +#include + +extern const struct cpu_operations smp_spin_table_ops; +extern const struct cpu_operations cpu_psci_ops; + +const struct cpu_operations *cpu_ops[NR_CPUS]; + +static const struct cpu_operations *supported_cpu_ops[] __initconst = { +#ifdef CONFIG_SMP + &smp_spin_table_ops, + &cpu_psci_ops, +#endif + NULL, +}; + +const struct cpu_operations * __init cpu_get_ops(const char *name) +{ + const struct cpu_operations **ops = supported_cpu_ops; + + while (*ops) { + if (!strcmp(name, (*ops)->name)) + return *ops; + + ops++; + } + + return NULL; +} diff --git a/arch/arm64/kernel/psci.c b/arch/arm64/kernel/psci.c index 368b78788cb5..ccec2ca67755 100644 --- a/arch/arm64/kernel/psci.c +++ b/arch/arm64/kernel/psci.c @@ -20,6 +20,7 @@ #include #include +#include #include #include #include @@ -231,12 +232,12 @@ int __init psci_init(void) #ifdef CONFIG_SMP -static int __init smp_psci_init_cpu(struct device_node *dn, int cpu) +static int __init cpu_psci_cpu_init(struct device_node *dn, unsigned int cpu) { return 0; } -static int __init smp_psci_prepare_cpu(int cpu) +static int __init cpu_psci_cpu_prepare(unsigned int cpu) { int err; @@ -254,10 +255,10 @@ static int __init smp_psci_prepare_cpu(int cpu) return 0; } -const struct smp_enable_ops smp_psci_ops __initconst = { +const struct cpu_operations cpu_psci_ops = { .name = "psci", - .init_cpu = smp_psci_init_cpu, - .prepare_cpu = smp_psci_prepare_cpu, + .cpu_init = cpu_psci_cpu_init, + .cpu_prepare = cpu_psci_cpu_prepare, }; #endif diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c index 5d54e3717bf8..96b14a1d5b02 100644 --- a/arch/arm64/kernel/smp.c +++ b/arch/arm64/kernel/smp.c @@ -39,6 +39,7 @@ #include #include #include +#include #include #include #include @@ -235,28 +236,6 @@ void __init smp_prepare_boot_cpu(void) static void (*smp_cross_call)(const struct cpumask *, unsigned int); -static const struct smp_enable_ops *enable_ops[] __initconst = { - &smp_spin_table_ops, - &smp_psci_ops, - NULL, -}; - -static const struct smp_enable_ops *smp_enable_ops[NR_CPUS]; - -static const struct smp_enable_ops * __init smp_get_enable_ops(const char *name) -{ - const struct smp_enable_ops **ops = enable_ops; - - while (*ops) { - if (!strcmp(name, (*ops)->name)) - return *ops; - - ops++; - } - - return NULL; -} - /* * Enumerate the possible CPU set from the device tree and build the * cpu logical map array containing MPIDR values related to logical @@ -266,7 +245,7 @@ void __init smp_init_cpus(void) { const char *enable_method; struct device_node *dn = NULL; - int i, cpu = 1; + unsigned int i, cpu = 1; bool bootcpu_valid = false; while ((dn = of_find_node_by_type(dn, "cpu"))) { @@ -345,15 +324,15 @@ void __init smp_init_cpus(void) goto next; } - smp_enable_ops[cpu] = smp_get_enable_ops(enable_method); + cpu_ops[cpu] = cpu_get_ops(enable_method); - if (!smp_enable_ops[cpu]) { + if (!cpu_ops[cpu]) { pr_err("%s: invalid enable-method property: %s\n", dn->full_name, enable_method); goto next; } - if (smp_enable_ops[cpu]->init_cpu(dn, cpu)) + if (cpu_ops[cpu]->cpu_init(dn, cpu)) goto next; pr_debug("cpu logical map 0x%llx\n", hwid); @@ -383,8 +362,8 @@ void __init smp_init_cpus(void) void __init smp_prepare_cpus(unsigned int max_cpus) { - int cpu, err; - unsigned int ncores = num_possible_cpus(); + int err; + unsigned int cpu, ncores = num_possible_cpus(); /* * are we trying to boot more cores than exist? @@ -411,10 +390,10 @@ void __init smp_prepare_cpus(unsigned int max_cpus) if (cpu == smp_processor_id()) continue; - if (!smp_enable_ops[cpu]) + if (!cpu_ops[cpu]) continue; - err = smp_enable_ops[cpu]->prepare_cpu(cpu); + err = cpu_ops[cpu]->cpu_prepare(cpu); if (err) continue; diff --git a/arch/arm64/kernel/smp_spin_table.c b/arch/arm64/kernel/smp_spin_table.c index 7c35fa682f76..a8b76e4eccff 100644 --- a/arch/arm64/kernel/smp_spin_table.c +++ b/arch/arm64/kernel/smp_spin_table.c @@ -21,10 +21,11 @@ #include #include +#include static phys_addr_t cpu_release_addr[NR_CPUS]; -static int __init smp_spin_table_init_cpu(struct device_node *dn, int cpu) +static int smp_spin_table_cpu_init(struct device_node *dn, unsigned int cpu) { /* * Determine the address from which the CPU is polling. @@ -40,7 +41,7 @@ static int __init smp_spin_table_init_cpu(struct device_node *dn, int cpu) return 0; } -static int __init smp_spin_table_prepare_cpu(int cpu) +static int smp_spin_table_cpu_prepare(unsigned int cpu) { void **release_addr; @@ -59,8 +60,8 @@ static int __init smp_spin_table_prepare_cpu(int cpu) return 0; } -const struct smp_enable_ops smp_spin_table_ops __initconst = { +const struct cpu_operations smp_spin_table_ops = { .name = "spin-table", - .init_cpu = smp_spin_table_init_cpu, - .prepare_cpu = smp_spin_table_prepare_cpu, + .cpu_init = smp_spin_table_cpu_init, + .cpu_prepare = smp_spin_table_cpu_prepare, }; From 92201a63ef9b5a692a3c273ea8964aa3ad83a8f7 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Thu, 24 Oct 2013 20:30:16 +0100 Subject: [PATCH 0861/1368] arm64: factor out spin-table boot method The arm64 kernel has an internal holding pen, which is necessary for some systems where we can't bring CPUs online individually and must hold multiple CPUs in a safe area until the kernel is able to handle them. The current SMP infrastructure for arm64 is closely coupled to this holding pen, and alternative boot methods must launch CPUs into the pen, where they sit before they are launched into the kernel proper. With PSCI (and possibly other future boot methods), we can bring CPUs online individually, and need not perform the secondary_holding_pen dance. Instead, this patch factors the holding pen management code out to the spin-table boot method code, as it is the only boot method requiring the pen. A new entry point for secondaries, secondary_entry is added for other boot methods to use, which bypasses the holding pen and its associated overhead when bringing CPUs online. The smp.pen.text section is also removed, as the pen can live in head.text without problem. The cpu_operations structure is extended with two new functions, cpu_boot and cpu_postboot, for bringing a cpu into the kernel and performing any post-boot cleanup required by a bootmethod (e.g. resetting the secondary_holding_pen_release to INVALID_HWID). Documentation is added for cpu_operations. Signed-off-by: Mark Rutland Signed-off-by: Catalin Marinas (cherry picked from commit 652af899799354049b273af897b798b8f03fdd88) Signed-off-by: Mark Brown --- arch/arm64/include/asm/cpu_ops.h | 16 +++++++ arch/arm64/include/asm/smp.h | 3 +- arch/arm64/kernel/head.S | 12 ++++- arch/arm64/kernel/psci.c | 18 +++---- arch/arm64/kernel/smp.c | 65 ++------------------------ arch/arm64/kernel/smp_spin_table.c | 75 ++++++++++++++++++++++++++++++ arch/arm64/kernel/vmlinux.lds.S | 1 - 7 files changed, 117 insertions(+), 73 deletions(-) diff --git a/arch/arm64/include/asm/cpu_ops.h b/arch/arm64/include/asm/cpu_ops.h index 3c60c8d1d928..67bc4fd83798 100644 --- a/arch/arm64/include/asm/cpu_ops.h +++ b/arch/arm64/include/asm/cpu_ops.h @@ -21,10 +21,26 @@ struct device_node; +/** + * struct cpu_operations - Callback operations for hotplugging CPUs. + * + * @name: Name of the property as appears in a devicetree cpu node's + * enable-method property. + * @cpu_init: Reads any data necessary for a specific enable-method from the + * devicetree, for a given cpu node and proposed logical id. + * @cpu_prepare: Early one-time preparation step for a cpu. If there is a + * mechanism for doing so, tests whether it is possible to boot + * the given CPU. + * @cpu_boot: Boots a cpu into the kernel. + * @cpu_postboot: Optionally, perform any post-boot cleanup or necesary + * synchronisation. Called from the cpu being booted. + */ struct cpu_operations { const char *name; int (*cpu_init)(struct device_node *, unsigned int); int (*cpu_prepare)(unsigned int); + int (*cpu_boot)(unsigned int); + void (*cpu_postboot)(void); }; extern const struct cpu_operations *cpu_ops[NR_CPUS]; diff --git a/arch/arm64/include/asm/smp.h b/arch/arm64/include/asm/smp.h index 7e34295f78e3..d64187ce69a2 100644 --- a/arch/arm64/include/asm/smp.h +++ b/arch/arm64/include/asm/smp.h @@ -60,8 +60,7 @@ struct secondary_data { void *stack; }; extern struct secondary_data secondary_data; -extern void secondary_holding_pen(void); -extern volatile unsigned long secondary_holding_pen_release; +extern void secondary_entry(void); extern void arch_send_call_function_single_ipi(int cpu); extern void arch_send_call_function_ipi_mask(const struct cpumask *mask); diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S index 53dcae49e729..3532ca613718 100644 --- a/arch/arm64/kernel/head.S +++ b/arch/arm64/kernel/head.S @@ -217,7 +217,6 @@ ENTRY(__boot_cpu_mode) .quad PAGE_OFFSET #ifdef CONFIG_SMP - .pushsection .smp.pen.text, "ax" .align 3 1: .quad . .quad secondary_holding_pen_release @@ -242,7 +241,16 @@ pen: ldr x4, [x3] wfe b pen ENDPROC(secondary_holding_pen) - .popsection + + /* + * Secondary entry point that jumps straight into the kernel. Only to + * be used where CPUs are brought online dynamically by the kernel. + */ +ENTRY(secondary_entry) + bl __calc_phys_offset // x2=phys offset + bl el2_setup // Drop to EL1 + b secondary_startup +ENDPROC(secondary_entry) ENTRY(secondary_startup) /* diff --git a/arch/arm64/kernel/psci.c b/arch/arm64/kernel/psci.c index ccec2ca67755..fb56b6158344 100644 --- a/arch/arm64/kernel/psci.c +++ b/arch/arm64/kernel/psci.c @@ -239,26 +239,28 @@ static int __init cpu_psci_cpu_init(struct device_node *dn, unsigned int cpu) static int __init cpu_psci_cpu_prepare(unsigned int cpu) { - int err; - if (!psci_ops.cpu_on) { pr_err("no cpu_on method, not booting CPU%d\n", cpu); return -ENODEV; } - err = psci_ops.cpu_on(cpu_logical_map(cpu), __pa(secondary_holding_pen)); - if (err) { - pr_err("failed to boot CPU%d (%d)\n", cpu, err); - return err; - } - return 0; } +static int cpu_psci_cpu_boot(unsigned int cpu) +{ + int err = psci_ops.cpu_on(cpu_logical_map(cpu), __pa(secondary_entry)); + if (err) + pr_err("psci: failed to boot CPU%d (%d)\n", cpu, err); + + return err; +} + const struct cpu_operations cpu_psci_ops = { .name = "psci", .cpu_init = cpu_psci_cpu_init, .cpu_prepare = cpu_psci_cpu_prepare, + .cpu_boot = cpu_psci_cpu_boot, }; #endif diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c index 96b14a1d5b02..6e9779c878ad 100644 --- a/arch/arm64/kernel/smp.c +++ b/arch/arm64/kernel/smp.c @@ -55,7 +55,6 @@ * where to place its SVC stack */ struct secondary_data secondary_data; -volatile unsigned long secondary_holding_pen_release = INVALID_HWID; enum ipi_msg_type { IPI_RESCHEDULE, @@ -64,61 +63,16 @@ enum ipi_msg_type { IPI_CPU_STOP, }; -static DEFINE_RAW_SPINLOCK(boot_lock); - -/* - * Write secondary_holding_pen_release in a way that is guaranteed to be - * visible to all observers, irrespective of whether they're taking part - * in coherency or not. This is necessary for the hotplug code to work - * reliably. - */ -static void __cpuinit write_pen_release(u64 val) -{ - void *start = (void *)&secondary_holding_pen_release; - unsigned long size = sizeof(secondary_holding_pen_release); - - secondary_holding_pen_release = val; - __flush_dcache_area(start, size); -} - /* * Boot a secondary CPU, and assign it the specified idle task. * This also gives us the initial stack to use for this CPU. */ static int __cpuinit boot_secondary(unsigned int cpu, struct task_struct *idle) { - unsigned long timeout; + if (cpu_ops[cpu]->cpu_boot) + return cpu_ops[cpu]->cpu_boot(cpu); - /* - * Set synchronisation state between this boot processor - * and the secondary one - */ - raw_spin_lock(&boot_lock); - - /* - * Update the pen release flag. - */ - write_pen_release(cpu_logical_map(cpu)); - - /* - * Send an event, causing the secondaries to read pen_release. - */ - sev(); - - timeout = jiffies + (1 * HZ); - while (time_before(jiffies, timeout)) { - if (secondary_holding_pen_release == INVALID_HWID) - break; - udelay(10); - } - - /* - * Now the secondary core is starting up let it run its - * calibrations, then wait for it to finish - */ - raw_spin_unlock(&boot_lock); - - return secondary_holding_pen_release != INVALID_HWID ? -ENOSYS : 0; + return -EOPNOTSUPP; } static DECLARE_COMPLETION(cpu_running); @@ -188,17 +142,8 @@ asmlinkage void __cpuinit secondary_start_kernel(void) preempt_disable(); trace_hardirqs_off(); - /* - * Let the primary processor know we're out of the - * pen, then head off into the C entry point - */ - write_pen_release(INVALID_HWID); - - /* - * Synchronise with the boot thread. - */ - raw_spin_lock(&boot_lock); - raw_spin_unlock(&boot_lock); + if (cpu_ops[cpu]->cpu_postboot) + cpu_ops[cpu]->cpu_postboot(); /* * Enable local interrupts. diff --git a/arch/arm64/kernel/smp_spin_table.c b/arch/arm64/kernel/smp_spin_table.c index a8b76e4eccff..27f08367a6e7 100644 --- a/arch/arm64/kernel/smp_spin_table.c +++ b/arch/arm64/kernel/smp_spin_table.c @@ -16,14 +16,37 @@ * along with this program. If not, see . */ +#include #include #include #include #include #include +#include +#include + +extern void secondary_holding_pen(void); +volatile unsigned long secondary_holding_pen_release = INVALID_HWID; static phys_addr_t cpu_release_addr[NR_CPUS]; +static DEFINE_RAW_SPINLOCK(boot_lock); + +/* + * Write secondary_holding_pen_release in a way that is guaranteed to be + * visible to all observers, irrespective of whether they're taking part + * in coherency or not. This is necessary for the hotplug code to work + * reliably. + */ +static void write_pen_release(u64 val) +{ + void *start = (void *)&secondary_holding_pen_release; + unsigned long size = sizeof(secondary_holding_pen_release); + + secondary_holding_pen_release = val; + __flush_dcache_area(start, size); +} + static int smp_spin_table_cpu_init(struct device_node *dn, unsigned int cpu) { @@ -60,8 +83,60 @@ static int smp_spin_table_cpu_prepare(unsigned int cpu) return 0; } +static int smp_spin_table_cpu_boot(unsigned int cpu) +{ + unsigned long timeout; + + /* + * Set synchronisation state between this boot processor + * and the secondary one + */ + raw_spin_lock(&boot_lock); + + /* + * Update the pen release flag. + */ + write_pen_release(cpu_logical_map(cpu)); + + /* + * Send an event, causing the secondaries to read pen_release. + */ + sev(); + + timeout = jiffies + (1 * HZ); + while (time_before(jiffies, timeout)) { + if (secondary_holding_pen_release == INVALID_HWID) + break; + udelay(10); + } + + /* + * Now the secondary core is starting up let it run its + * calibrations, then wait for it to finish + */ + raw_spin_unlock(&boot_lock); + + return secondary_holding_pen_release != INVALID_HWID ? -ENOSYS : 0; +} + +void smp_spin_table_cpu_postboot(void) +{ + /* + * Let the primary processor know we're out of the pen. + */ + write_pen_release(INVALID_HWID); + + /* + * Synchronise with the boot thread. + */ + raw_spin_lock(&boot_lock); + raw_spin_unlock(&boot_lock); +} + const struct cpu_operations smp_spin_table_ops = { .name = "spin-table", .cpu_init = smp_spin_table_cpu_init, .cpu_prepare = smp_spin_table_cpu_prepare, + .cpu_boot = smp_spin_table_cpu_boot, + .cpu_postboot = smp_spin_table_cpu_postboot, }; diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S index 3fae2be8b016..2c8a95b539ce 100644 --- a/arch/arm64/kernel/vmlinux.lds.S +++ b/arch/arm64/kernel/vmlinux.lds.S @@ -41,7 +41,6 @@ SECTIONS } .text : { /* Real text segment */ _stext = .; /* Text and read-only data */ - *(.smp.pen.text) __exception_text_start = .; *(.exception.text) __exception_text_end = .; From 5ad709f5d9cb351caf1be770d03f420bd88bf248 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Thu, 24 Oct 2013 20:30:17 +0100 Subject: [PATCH 0862/1368] arm64: read enable-method for CPU0 With the advent of CPU_HOTPLUG, the enable-method property for CPU0 may tells us something useful (i.e. how to hotplug it back on), so we must read it along with all the enable-method for all the other CPUs. Even on UP the enable-method may tell us useful information (e.g. if a core has some mechanism that might be usable for cpuidle), so we should always read it. This patch factors out the reading of the enable method, and ensures that CPU0's enable method is read regardless of whether the kernel is built with SMP support. Signed-off-by: Mark Rutland Signed-off-by: Catalin Marinas (cherry picked from commit e8765b265a69c83504afc6901d6e137b1811d1f0) Signed-off-by: Mark Brown --- arch/arm64/include/asm/cpu_ops.h | 3 +- arch/arm64/kernel/cpu_ops.c | 54 +++++++++++++++++++++++++++++++- arch/arm64/kernel/setup.c | 2 ++ arch/arm64/kernel/smp.c | 18 +---------- 4 files changed, 58 insertions(+), 19 deletions(-) diff --git a/arch/arm64/include/asm/cpu_ops.h b/arch/arm64/include/asm/cpu_ops.h index 67bc4fd83798..1a98dbfc4a5f 100644 --- a/arch/arm64/include/asm/cpu_ops.h +++ b/arch/arm64/include/asm/cpu_ops.h @@ -44,6 +44,7 @@ struct cpu_operations { }; extern const struct cpu_operations *cpu_ops[NR_CPUS]; -extern const struct cpu_operations * __init cpu_get_ops(const char *name); +extern int __init cpu_read_ops(struct device_node *dn, int cpu); +extern void __init cpu_read_bootcpu_ops(void); #endif /* ifndef __ASM_CPU_OPS_H */ diff --git a/arch/arm64/kernel/cpu_ops.c b/arch/arm64/kernel/cpu_ops.c index e2652aadd9ea..aa0c9e78dbe1 100644 --- a/arch/arm64/kernel/cpu_ops.c +++ b/arch/arm64/kernel/cpu_ops.c @@ -17,6 +17,9 @@ */ #include +#include +#include +#include #include extern const struct cpu_operations smp_spin_table_ops; @@ -32,7 +35,7 @@ static const struct cpu_operations *supported_cpu_ops[] __initconst = { NULL, }; -const struct cpu_operations * __init cpu_get_ops(const char *name) +static const struct cpu_operations * __init cpu_get_ops(const char *name) { const struct cpu_operations **ops = supported_cpu_ops; @@ -45,3 +48,52 @@ const struct cpu_operations * __init cpu_get_ops(const char *name) return NULL; } + +/* + * Read a cpu's enable method from the device tree and record it in cpu_ops. + */ +int __init cpu_read_ops(struct device_node *dn, int cpu) +{ + const char *enable_method = of_get_property(dn, "enable-method", NULL); + if (!enable_method) { + /* + * The boot CPU may not have an enable method (e.g. when + * spin-table is used for secondaries). Don't warn spuriously. + */ + if (cpu != 0) + pr_err("%s: missing enable-method property\n", + dn->full_name); + return -ENOENT; + } + + cpu_ops[cpu] = cpu_get_ops(enable_method); + if (!cpu_ops[cpu]) { + pr_err("%s: invalid enable-method property: %s\n", + dn->full_name, enable_method); + return -EOPNOTSUPP; + } + + return 0; +} + +void __init cpu_read_bootcpu_ops(void) +{ + struct device_node *dn = NULL; + u64 mpidr = cpu_logical_map(0); + + while ((dn = of_find_node_by_type(dn, "cpu"))) { + u64 hwid; + const __be32 *prop; + + prop = of_get_property(dn, "reg", NULL); + if (!prop) + continue; + + hwid = of_read_number(prop, of_n_addr_cells(dn)); + if (hwid == mpidr) { + cpu_read_ops(dn, 0); + of_node_put(dn); + return; + } + } +} diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c index add6ea616843..3dc5c2f7c6b1 100644 --- a/arch/arm64/kernel/setup.c +++ b/arch/arm64/kernel/setup.c @@ -45,6 +45,7 @@ #include #include #include +#include #include #include #include @@ -269,6 +270,7 @@ void __init setup_arch(char **cmdline_p) psci_init(); cpu_logical_map(0) = read_cpuid_mpidr() & MPIDR_HWID_BITMASK; + cpu_read_bootcpu_ops(); #ifdef CONFIG_SMP smp_init_cpus(); #endif diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c index 6e9779c878ad..82eea4a18a85 100644 --- a/arch/arm64/kernel/smp.c +++ b/arch/arm64/kernel/smp.c @@ -188,7 +188,6 @@ static void (*smp_cross_call)(const struct cpumask *, unsigned int); */ void __init smp_init_cpus(void) { - const char *enable_method; struct device_node *dn = NULL; unsigned int i, cpu = 1; bool bootcpu_valid = false; @@ -259,23 +258,8 @@ void __init smp_init_cpus(void) if (cpu >= NR_CPUS) goto next; - /* - * We currently support only the "spin-table" enable-method. - */ - enable_method = of_get_property(dn, "enable-method", NULL); - if (!enable_method) { - pr_err("%s: missing enable-method property\n", - dn->full_name); + if (cpu_read_ops(dn, cpu) != 0) goto next; - } - - cpu_ops[cpu] = cpu_get_ops(enable_method); - - if (!cpu_ops[cpu]) { - pr_err("%s: invalid enable-method property: %s\n", - dn->full_name, enable_method); - goto next; - } if (cpu_ops[cpu]->cpu_init(dn, cpu)) goto next; From d3bde2dcb4434ae9f905d9286e36feb59953f1e3 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Thu, 24 Oct 2013 20:30:18 +0100 Subject: [PATCH 0863/1368] arm64: add CPU_HOTPLUG infrastructure This patch adds the basic infrastructure necessary to support CPU_HOTPLUG on arm64, based on the arm implementation. Actual hotplug support will depend on an implementation's cpu_operations (e.g. PSCI). Signed-off-by: Mark Rutland Signed-off-by: Catalin Marinas (cherry picked from commit 9327e2c6bb8cb0131b38a07847cd58c78dc095e9) Signed-off-by: Mark Brown --- arch/arm64/Kconfig | 7 +++ arch/arm64/include/asm/cpu_ops.h | 9 +++ arch/arm64/include/asm/irq.h | 1 + arch/arm64/include/asm/smp.h | 5 ++ arch/arm64/kernel/cputable.c | 2 +- arch/arm64/kernel/irq.c | 61 ++++++++++++++++++++ arch/arm64/kernel/process.c | 7 +++ arch/arm64/kernel/smp.c | 96 ++++++++++++++++++++++++++++++++ 8 files changed, 187 insertions(+), 1 deletion(-) diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 56b3f6d447ae..022f20097930 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -150,6 +150,13 @@ config NR_CPUS depends on SMP default "4" +config HOTPLUG_CPU + bool "Support for hot-pluggable CPUs" + depends on SMP + help + Say Y here to experiment with turning CPUs off and on. CPUs + can be controlled through /sys/devices/system/cpu. + source kernel/Kconfig.preempt config HZ diff --git a/arch/arm64/include/asm/cpu_ops.h b/arch/arm64/include/asm/cpu_ops.h index 1a98dbfc4a5f..c4cdb5e5b73d 100644 --- a/arch/arm64/include/asm/cpu_ops.h +++ b/arch/arm64/include/asm/cpu_ops.h @@ -34,6 +34,11 @@ struct device_node; * @cpu_boot: Boots a cpu into the kernel. * @cpu_postboot: Optionally, perform any post-boot cleanup or necesary * synchronisation. Called from the cpu being booted. + * @cpu_disable: Prepares a cpu to die. May fail for some mechanism-specific + * reason, which will cause the hot unplug to be aborted. Called + * from the cpu to be killed. + * @cpu_die: Makes a cpu leave the kernel. Must not fail. Called from the + * cpu being killed. */ struct cpu_operations { const char *name; @@ -41,6 +46,10 @@ struct cpu_operations { int (*cpu_prepare)(unsigned int); int (*cpu_boot)(unsigned int); void (*cpu_postboot)(void); +#ifdef CONFIG_HOTPLUG_CPU + int (*cpu_disable)(unsigned int cpu); + void (*cpu_die)(unsigned int cpu); +#endif }; extern const struct cpu_operations *cpu_ops[NR_CPUS]; diff --git a/arch/arm64/include/asm/irq.h b/arch/arm64/include/asm/irq.h index 0332fc077f6e..e1f7ecdde11f 100644 --- a/arch/arm64/include/asm/irq.h +++ b/arch/arm64/include/asm/irq.h @@ -4,6 +4,7 @@ #include extern void (*handle_arch_irq)(struct pt_regs *); +extern void migrate_irqs(void); extern void set_handle_irq(void (*handle_irq)(struct pt_regs *)); #endif diff --git a/arch/arm64/include/asm/smp.h b/arch/arm64/include/asm/smp.h index d64187ce69a2..a498f2cd2c2a 100644 --- a/arch/arm64/include/asm/smp.h +++ b/arch/arm64/include/asm/smp.h @@ -65,4 +65,9 @@ extern void secondary_entry(void); extern void arch_send_call_function_single_ipi(int cpu); extern void arch_send_call_function_ipi_mask(const struct cpumask *mask); +extern int __cpu_disable(void); + +extern void __cpu_die(unsigned int cpu); +extern void cpu_die(void); + #endif /* ifndef __ASM_SMP_H */ diff --git a/arch/arm64/kernel/cputable.c b/arch/arm64/kernel/cputable.c index 63cfc4a43f4e..fd3993cb060f 100644 --- a/arch/arm64/kernel/cputable.c +++ b/arch/arm64/kernel/cputable.c @@ -22,7 +22,7 @@ extern unsigned long __cpu_setup(void); -struct cpu_info __initdata cpu_table[] = { +struct cpu_info cpu_table[] = { { .cpu_id_val = 0x000f0000, .cpu_id_mask = 0x000f0000, diff --git a/arch/arm64/kernel/irq.c b/arch/arm64/kernel/irq.c index ecb3354292ed..473e5dbf8f39 100644 --- a/arch/arm64/kernel/irq.c +++ b/arch/arm64/kernel/irq.c @@ -81,3 +81,64 @@ void __init init_IRQ(void) if (!handle_arch_irq) panic("No interrupt controller found."); } + +#ifdef CONFIG_HOTPLUG_CPU +static bool migrate_one_irq(struct irq_desc *desc) +{ + struct irq_data *d = irq_desc_get_irq_data(desc); + const struct cpumask *affinity = d->affinity; + struct irq_chip *c; + bool ret = false; + + /* + * If this is a per-CPU interrupt, or the affinity does not + * include this CPU, then we have nothing to do. + */ + if (irqd_is_per_cpu(d) || !cpumask_test_cpu(smp_processor_id(), affinity)) + return false; + + if (cpumask_any_and(affinity, cpu_online_mask) >= nr_cpu_ids) { + affinity = cpu_online_mask; + ret = true; + } + + c = irq_data_get_irq_chip(d); + if (!c->irq_set_affinity) + pr_debug("IRQ%u: unable to set affinity\n", d->irq); + else if (c->irq_set_affinity(d, affinity, true) == IRQ_SET_MASK_OK && ret) + cpumask_copy(d->affinity, affinity); + + return ret; +} + +/* + * The current CPU has been marked offline. Migrate IRQs off this CPU. + * If the affinity settings do not allow other CPUs, force them onto any + * available CPU. + * + * Note: we must iterate over all IRQs, whether they have an attached + * action structure or not, as we need to get chained interrupts too. + */ +void migrate_irqs(void) +{ + unsigned int i; + struct irq_desc *desc; + unsigned long flags; + + local_irq_save(flags); + + for_each_irq_desc(i, desc) { + bool affinity_broken; + + raw_spin_lock(&desc->lock); + affinity_broken = migrate_one_irq(desc); + raw_spin_unlock(&desc->lock); + + if (affinity_broken) + pr_warn_ratelimited("IRQ%u no longer affine to CPU%u\n", + i, smp_processor_id()); + } + + local_irq_restore(flags); +} +#endif /* CONFIG_HOTPLUG_CPU */ diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c index 46f02c3b5015..4caf198ca44f 100644 --- a/arch/arm64/kernel/process.c +++ b/arch/arm64/kernel/process.c @@ -102,6 +102,13 @@ void arch_cpu_idle(void) local_irq_enable(); } +#ifdef CONFIG_HOTPLUG_CPU +void arch_cpu_idle_dead(void) +{ + cpu_die(); +} +#endif + void machine_shutdown(void) { #ifdef CONFIG_SMP diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c index 82eea4a18a85..e0b7c0e45b87 100644 --- a/arch/arm64/kernel/smp.c +++ b/arch/arm64/kernel/smp.c @@ -166,6 +166,102 @@ asmlinkage void __cpuinit secondary_start_kernel(void) cpu_startup_entry(CPUHP_ONLINE); } +#ifdef CONFIG_HOTPLUG_CPU +static int op_cpu_disable(unsigned int cpu) +{ + /* + * If we don't have a cpu_die method, abort before we reach the point + * of no return. CPU0 may not have an cpu_ops, so test for it. + */ + if (!cpu_ops[cpu] || !cpu_ops[cpu]->cpu_die) + return -EOPNOTSUPP; + + /* + * We may need to abort a hot unplug for some other mechanism-specific + * reason. + */ + if (cpu_ops[cpu]->cpu_disable) + return cpu_ops[cpu]->cpu_disable(cpu); + + return 0; +} + +/* + * __cpu_disable runs on the processor to be shutdown. + */ +int __cpu_disable(void) +{ + unsigned int cpu = smp_processor_id(); + int ret; + + ret = op_cpu_disable(cpu); + if (ret) + return ret; + + /* + * Take this CPU offline. Once we clear this, we can't return, + * and we must not schedule until we're ready to give up the cpu. + */ + set_cpu_online(cpu, false); + + /* + * OK - migrate IRQs away from this CPU + */ + migrate_irqs(); + + /* + * Remove this CPU from the vm mask set of all processes. + */ + clear_tasks_mm_cpumask(cpu); + + return 0; +} + +static DECLARE_COMPLETION(cpu_died); + +/* + * called on the thread which is asking for a CPU to be shutdown - + * waits until shutdown has completed, or it is timed out. + */ +void __cpu_die(unsigned int cpu) +{ + if (!wait_for_completion_timeout(&cpu_died, msecs_to_jiffies(5000))) { + pr_crit("CPU%u: cpu didn't die\n", cpu); + return; + } + pr_notice("CPU%u: shutdown\n", cpu); +} + +/* + * Called from the idle thread for the CPU which has been shutdown. + * + * Note that we disable IRQs here, but do not re-enable them + * before returning to the caller. This is also the behaviour + * of the other hotplug-cpu capable cores, so presumably coming + * out of idle fixes this. + */ +void cpu_die(void) +{ + unsigned int cpu = smp_processor_id(); + + idle_task_exit(); + + local_irq_disable(); + + /* Tell __cpu_die() that this CPU is now safe to dispose of */ + complete(&cpu_died); + + /* + * Actually shutdown the CPU. This must never fail. The specific hotplug + * mechanism must perform all required cache maintenance to ensure that + * no dirty lines are lost in the process of shutting down the CPU. + */ + cpu_ops[cpu]->cpu_die(cpu); + + BUG(); +} +#endif + void __init smp_cpus_done(unsigned int max_cpus) { unsigned long bogosum = loops_per_jiffy * num_online_cpus(); From 8cea5d72615ec55b8e31d8d4a066a38041e0dca2 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Thu, 24 Oct 2013 20:30:19 +0100 Subject: [PATCH 0864/1368] arm64: add PSCI CPU_OFF-based hotplug support This patch adds support for using PSCI CPU_OFF calls for CPU hotplug. With this code it is possible to hot unplug CPUs with "psci" as their boot-method, as long as there's an appropriate cpu_off function id specified in the psci node. Signed-off-by: Mark Rutland Signed-off-by: Catalin Marinas (cherry picked from commit 831ccf79b46e02f31cfd16c66df9d5600f635155) Signed-off-by: Mark Brown --- arch/arm64/kernel/psci.c | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/arch/arm64/kernel/psci.c b/arch/arm64/kernel/psci.c index fb56b6158344..4f97db3d7363 100644 --- a/arch/arm64/kernel/psci.c +++ b/arch/arm64/kernel/psci.c @@ -256,11 +256,41 @@ static int cpu_psci_cpu_boot(unsigned int cpu) return err; } +#ifdef CONFIG_HOTPLUG_CPU +static int cpu_psci_cpu_disable(unsigned int cpu) +{ + /* Fail early if we don't have CPU_OFF support */ + if (!psci_ops.cpu_off) + return -EOPNOTSUPP; + return 0; +} + +static void cpu_psci_cpu_die(unsigned int cpu) +{ + int ret; + /* + * There are no known implementations of PSCI actually using the + * power state field, pass a sensible default for now. + */ + struct psci_power_state state = { + .type = PSCI_POWER_STATE_TYPE_POWER_DOWN, + }; + + ret = psci_ops.cpu_off(state); + + pr_crit("psci: unable to power off CPU%u (%d)\n", cpu, ret); +} +#endif + const struct cpu_operations cpu_psci_ops = { .name = "psci", .cpu_init = cpu_psci_cpu_init, .cpu_prepare = cpu_psci_cpu_prepare, .cpu_boot = cpu_psci_cpu_boot, +#ifdef CONFIG_HOTPLUG_CPU + .cpu_disable = cpu_psci_cpu_disable, + .cpu_die = cpu_psci_cpu_die, +#endif }; #endif From 914866357990d96231a6bb8d2cad870cf21bd44a Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Thu, 31 Oct 2013 16:37:26 +0000 Subject: [PATCH 0865/1368] arm64: Slightly improve the warning on CPU0 enable-method Commit e8765b265a69 (arm64: read enable-method for CPU0) introduced checks for the enable method on CPU0 (to be later used with CPU suspend). However, if the kernel is compiled for UP and a DT file is used with a method like 'spin-table', Linux complains about 'invalid enable method'. This patch turns it into an 'unsupported enable method' warning. Signed-off-by: Catalin Marinas (cherry picked from commit 264666e62848c19ba9252c80e895ffec9ad1d391) Signed-off-by: Mark Brown --- arch/arm64/kernel/cpu_ops.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/kernel/cpu_ops.c b/arch/arm64/kernel/cpu_ops.c index aa0c9e78dbe1..04efea8fe4bc 100644 --- a/arch/arm64/kernel/cpu_ops.c +++ b/arch/arm64/kernel/cpu_ops.c @@ -68,8 +68,8 @@ int __init cpu_read_ops(struct device_node *dn, int cpu) cpu_ops[cpu] = cpu_get_ops(enable_method); if (!cpu_ops[cpu]) { - pr_err("%s: invalid enable-method property: %s\n", - dn->full_name, enable_method); + pr_warn("%s: unsupported enable-method property: %s\n", + dn->full_name, enable_method); return -EOPNOTSUPP; } From 5066e800ba3438724f7541c284d674acd9909ba6 Mon Sep 17 00:00:00 2001 From: Sudeep KarkadaNagesha Date: Mon, 21 Oct 2013 13:29:42 +0100 Subject: [PATCH 0866/1368] ARM64: DT: define ARM64 specific arch_match_cpu_phys_id OF/DT core library provides architecture specific hook to match the logical cpu index with the corresponding physical identifier. On ARM64, the MPIDR_EL1 contains specific bitfields(MPIDR_EL1.Aff{3..0}) which uniquely identify a CPU, in addition to some non-identifying information and reserved bits. The ARM cpu binding defines the 'reg' property to only contain the affinity bits, and any cpu nodes with other bits set in their 'reg' entry are skipped. This patch overrides the weak definition of arch_match_cpu_phys_id with ARM64 specific version using MPIDR_EL1.Aff{3..0} as cpu physical identifiers. Signed-off-by: Sudeep KarkadaNagesha Acked-by: Mark Rutland Cc: Will Deacon Signed-off-by: Catalin Marinas (cherry picked from commit 6e15d0e04bfeaa5662a289ee915273307326e45a) --- arch/arm64/kernel/setup.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c index 3dc5c2f7c6b1..85afdae9cc05 100644 --- a/arch/arm64/kernel/setup.c +++ b/arch/arm64/kernel/setup.c @@ -98,6 +98,11 @@ void __init early_print(const char *str, ...) printk("%s", buf); } +bool arch_match_cpu_phys_id(int cpu, u64 phys_id) +{ + return phys_id == cpu_logical_map(cpu); +} + static void __init setup_processor(void) { struct cpu_info *cpu_info; From b1c2e623df56e29d9304863791dd8d20b5bbea7b Mon Sep 17 00:00:00 2001 From: Damian Hobson-Garcia Date: Tue, 30 Apr 2013 04:02:13 +0100 Subject: [PATCH 0867/1368] arm64: Provide default implementation for dma_{alloc,free}_attrs Most architectures that define CONFIG_HAS_DMA, have implementations for both dma_alloc_attrs() and dma_free_attrs(). All achitectures that do not define CONFIG_HAS_DMA also have both of these definitions provided by dma-mapping-broken.h. Add default implementations for these functions on arm64. Signed-off-by: Damian Hobson-Garcia Signed-off-by: Catalin Marinas (cherry picked from commit d25749afc6f2a40471a70c04a35633e30cbe59a5) Signed-off-by: Mark Brown --- arch/arm64/include/asm/dma-mapping.h | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/arch/arm64/include/asm/dma-mapping.h b/arch/arm64/include/asm/dma-mapping.h index 994776894198..8d1810001aef 100644 --- a/arch/arm64/include/asm/dma-mapping.h +++ b/arch/arm64/include/asm/dma-mapping.h @@ -81,8 +81,12 @@ static inline void dma_mark_clean(void *addr, size_t size) { } -static inline void *dma_alloc_coherent(struct device *dev, size_t size, - dma_addr_t *dma_handle, gfp_t flags) +#define dma_alloc_coherent(d, s, h, f) dma_alloc_attrs(d, s, h, f, NULL) +#define dma_free_coherent(d, s, h, f) dma_free_attrs(d, s, h, f, NULL) + +static inline void *dma_alloc_attrs(struct device *dev, size_t size, + dma_addr_t *dma_handle, gfp_t flags, + struct dma_attrs *attrs) { struct dma_map_ops *ops = get_dma_ops(dev); void *vaddr; @@ -90,13 +94,14 @@ static inline void *dma_alloc_coherent(struct device *dev, size_t size, if (dma_alloc_from_coherent(dev, size, dma_handle, &vaddr)) return vaddr; - vaddr = ops->alloc(dev, size, dma_handle, flags, NULL); + vaddr = ops->alloc(dev, size, dma_handle, flags, attrs); debug_dma_alloc_coherent(dev, size, *dma_handle, vaddr); return vaddr; } -static inline void dma_free_coherent(struct device *dev, size_t size, - void *vaddr, dma_addr_t dev_addr) +static inline void dma_free_attrs(struct device *dev, size_t size, + void *vaddr, dma_addr_t dev_addr, + struct dma_attrs *attrs) { struct dma_map_ops *ops = get_dma_ops(dev); @@ -104,7 +109,7 @@ static inline void dma_free_coherent(struct device *dev, size_t size, return; debug_dma_free_coherent(dev, size, vaddr, dev_addr); - ops->free(dev, size, vaddr, dev_addr, NULL); + ops->free(dev, size, vaddr, dev_addr, attrs); } /* From d7cc23e7edb26ee1a953294409ff43e58acdfb21 Mon Sep 17 00:00:00 2001 From: Ritesh Harjani Date: Thu, 6 Feb 2014 17:21:51 +0530 Subject: [PATCH 0868/1368] arm64: Change misleading function names in dma-mapping arm64_swiotlb_alloc/free_coherent name can be misleading somtimes with CMA support being enabled after this patch (c2104debc235b745265b64d610237a6833fd53) Change this name to be more generic: __dma_alloc/free_coherent Signed-off-by: Ritesh Harjani [catalin.marinas@arm.com: renamed arm64_swiotlb_dma_ops to coherent_swiotlb_dma_ops] Signed-off-by: Catalin Marinas (cherry picked from commit bb10eb7b4d176f408d45fb492df28bed2981a1f3) Signed-off-by: Mark Brown --- arch/arm64/mm/dma-mapping.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c index 4bd7579ec9e6..65ce60fb3746 100644 --- a/arch/arm64/mm/dma-mapping.c +++ b/arch/arm64/mm/dma-mapping.c @@ -29,9 +29,9 @@ struct dma_map_ops *dma_ops; EXPORT_SYMBOL(dma_ops); -static void *arm64_swiotlb_alloc_coherent(struct device *dev, size_t size, - dma_addr_t *dma_handle, gfp_t flags, - struct dma_attrs *attrs) +static void *__dma_alloc_coherent(struct device *dev, size_t size, + dma_addr_t *dma_handle, gfp_t flags, + struct dma_attrs *attrs) { if (IS_ENABLED(CONFIG_ZONE_DMA32) && dev->coherent_dma_mask <= DMA_BIT_MASK(32)) @@ -39,16 +39,16 @@ static void *arm64_swiotlb_alloc_coherent(struct device *dev, size_t size, return swiotlb_alloc_coherent(dev, size, dma_handle, flags); } -static void arm64_swiotlb_free_coherent(struct device *dev, size_t size, - void *vaddr, dma_addr_t dma_handle, - struct dma_attrs *attrs) +static void __dma_free_coherent(struct device *dev, size_t size, + void *vaddr, dma_addr_t dma_handle, + struct dma_attrs *attrs) { swiotlb_free_coherent(dev, size, vaddr, dma_handle); } -static struct dma_map_ops arm64_swiotlb_dma_ops = { - .alloc = arm64_swiotlb_alloc_coherent, - .free = arm64_swiotlb_free_coherent, +static struct dma_map_ops coherent_swiotlb_dma_ops = { + .alloc = __dma_alloc_coherent, + .free = __dma_free_coherent, .map_page = swiotlb_map_page, .unmap_page = swiotlb_unmap_page, .map_sg = swiotlb_map_sg_attrs, @@ -63,7 +63,7 @@ static struct dma_map_ops arm64_swiotlb_dma_ops = { void __init arm64_swiotlb_init(void) { - dma_ops = &arm64_swiotlb_dma_ops; + dma_ops = &coherent_swiotlb_dma_ops; swiotlb_init(1); } From e816f573ecb2aeb35032cec7f57fe37f224fd979 Mon Sep 17 00:00:00 2001 From: Stefano Stabellini Date: Tue, 15 Oct 2013 15:49:03 +0000 Subject: [PATCH 0869/1368] arm64: define DMA_ERROR_CODE Signed-off-by: Stefano Stabellini Acked-by: Catalin Marinas CC: will.deacon@arm.com Changes in v8: - cast to dma_addr_t before returning. (cherry picked from commit 25b719d7b45947a79d298414cdfb5ec8fadf0ec8) Signed-off-by: Mark Brown --- arch/arm64/include/asm/dma-mapping.h | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm64/include/asm/dma-mapping.h b/arch/arm64/include/asm/dma-mapping.h index 8d1810001aef..59206b178494 100644 --- a/arch/arm64/include/asm/dma-mapping.h +++ b/arch/arm64/include/asm/dma-mapping.h @@ -25,6 +25,7 @@ #define ARCH_HAS_DMA_GET_REQUIRED_MASK +#define DMA_ERROR_CODE (~(dma_addr_t)0) extern struct dma_map_ops *dma_ops; static inline struct dma_map_ops *get_dma_ops(struct device *dev) From 184fd2a86928754500ebdbf4b58fd5e3ea20128e Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Tue, 21 May 2013 17:35:19 +0100 Subject: [PATCH 0870/1368] arm64: Implement coherent DMA API based on swiotlb This patch adds support for DMA API cache maintenance on SoCs without hardware device cache coherency. Signed-off-by: Catalin Marinas (cherry picked from commit 7363590d2c4691593fd280f94b3deaeb5e83dbbd) Signed-off-by: Mark Brown Conflicts: arch/arm64/include/asm/dma-mapping.h (due to Xen) --- arch/arm64/include/asm/cacheflush.h | 7 ++ arch/arm64/include/asm/dma-mapping.h | 7 ++ arch/arm64/mm/cache.S | 78 +++++++++++++ arch/arm64/mm/dma-mapping.c | 162 ++++++++++++++++++++++++++- 4 files changed, 253 insertions(+), 1 deletion(-) diff --git a/arch/arm64/include/asm/cacheflush.h b/arch/arm64/include/asm/cacheflush.h index 3300cbd18a89..f3d03f82a319 100644 --- a/arch/arm64/include/asm/cacheflush.h +++ b/arch/arm64/include/asm/cacheflush.h @@ -84,6 +84,13 @@ static inline void flush_cache_page(struct vm_area_struct *vma, { } +/* + * Cache maintenance functions used by the DMA API. No to be used directly. + */ +extern void __dma_map_area(const void *, size_t, int); +extern void __dma_unmap_area(const void *, size_t, int); +extern void __dma_flush_range(const void *, const void *); + /* * Copy user data from/to a page which is mapped into a different * processes address space. Really, we want to allow our "user diff --git a/arch/arm64/include/asm/dma-mapping.h b/arch/arm64/include/asm/dma-mapping.h index 59206b178494..00a41aab4a37 100644 --- a/arch/arm64/include/asm/dma-mapping.h +++ b/arch/arm64/include/asm/dma-mapping.h @@ -27,6 +27,8 @@ #define DMA_ERROR_CODE (~(dma_addr_t)0) extern struct dma_map_ops *dma_ops; +extern struct dma_map_ops coherent_swiotlb_dma_ops; +extern struct dma_map_ops noncoherent_swiotlb_dma_ops; static inline struct dma_map_ops *get_dma_ops(struct device *dev) { @@ -36,6 +38,11 @@ static inline struct dma_map_ops *get_dma_ops(struct device *dev) return dev->archdata.dma_ops; } +static inline void set_dma_ops(struct device *dev, struct dma_map_ops *ops) +{ + dev->archdata.dma_ops = ops; +} + #include static inline dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr) diff --git a/arch/arm64/mm/cache.S b/arch/arm64/mm/cache.S index 48a386094fa3..4726b8209d37 100644 --- a/arch/arm64/mm/cache.S +++ b/arch/arm64/mm/cache.S @@ -166,3 +166,81 @@ ENTRY(__flush_dcache_area) dsb sy ret ENDPROC(__flush_dcache_area) + +/* + * __dma_inv_range(start, end) + * - start - virtual start address of region + * - end - virtual end address of region + */ +__dma_inv_range: + dcache_line_size x2, x3 + sub x3, x2, #1 + bic x0, x0, x3 + bic x1, x1, x3 +1: dc ivac, x0 // invalidate D / U line + add x0, x0, x2 + cmp x0, x1 + b.lo 1b + dsb sy + ret +ENDPROC(__dma_inv_range) + +/* + * __dma_clean_range(start, end) + * - start - virtual start address of region + * - end - virtual end address of region + */ +__dma_clean_range: + dcache_line_size x2, x3 + sub x3, x2, #1 + bic x0, x0, x3 +1: dc cvac, x0 // clean D / U line + add x0, x0, x2 + cmp x0, x1 + b.lo 1b + dsb sy + ret +ENDPROC(__dma_clean_range) + +/* + * __dma_flush_range(start, end) + * - start - virtual start address of region + * - end - virtual end address of region + */ +ENTRY(__dma_flush_range) + dcache_line_size x2, x3 + sub x3, x2, #1 + bic x0, x0, x3 +1: dc civac, x0 // clean & invalidate D / U line + add x0, x0, x2 + cmp x0, x1 + b.lo 1b + dsb sy + ret +ENDPROC(__dma_flush_range) + +/* + * __dma_map_area(start, size, dir) + * - start - kernel virtual start address + * - size - size of region + * - dir - DMA direction + */ +ENTRY(__dma_map_area) + add x1, x1, x0 + cmp w2, #DMA_FROM_DEVICE + b.eq __dma_inv_range + b __dma_clean_range +ENDPROC(__dma_map_area) + +/* + * __dma_unmap_area(start, size, dir) + * - start - kernel virtual start address + * - size - size of region + * - dir - DMA direction + */ +ENTRY(__dma_unmap_area) + add x1, x1, x0 + cmp w2, #DMA_TO_DEVICE + b.ne __dma_inv_range + ret +ENDPROC(__dma_unmap_area) diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c index 65ce60fb3746..ba0ff75666ee 100644 --- a/arch/arm64/mm/dma-mapping.c +++ b/arch/arm64/mm/dma-mapping.c @@ -46,7 +46,166 @@ static void __dma_free_coherent(struct device *dev, size_t size, swiotlb_free_coherent(dev, size, vaddr, dma_handle); } -static struct dma_map_ops coherent_swiotlb_dma_ops = { +static void *__dma_alloc_noncoherent(struct device *dev, size_t size, + dma_addr_t *dma_handle, gfp_t flags, + struct dma_attrs *attrs) +{ + struct page *page, **map; + void *ptr, *coherent_ptr; + int order, i; + + size = PAGE_ALIGN(size); + order = get_order(size); + + ptr = __dma_alloc_coherent(dev, size, dma_handle, flags, attrs); + if (!ptr) + goto no_mem; + map = kmalloc(sizeof(struct page *) << order, flags & ~GFP_DMA); + if (!map) + goto no_map; + + /* remove any dirty cache lines on the kernel alias */ + __dma_flush_range(ptr, ptr + size); + + /* create a coherent mapping */ + page = virt_to_page(ptr); + for (i = 0; i < (size >> PAGE_SHIFT); i++) + map[i] = page + i; + coherent_ptr = vmap(map, size >> PAGE_SHIFT, VM_MAP, + pgprot_dmacoherent(pgprot_default)); + kfree(map); + if (!coherent_ptr) + goto no_map; + + return coherent_ptr; + +no_map: + __dma_free_coherent(dev, size, ptr, *dma_handle, attrs); +no_mem: + *dma_handle = ~0; + return NULL; +} + +static void __dma_free_noncoherent(struct device *dev, size_t size, + void *vaddr, dma_addr_t dma_handle, + struct dma_attrs *attrs) +{ + void *swiotlb_addr = phys_to_virt(dma_to_phys(dev, dma_handle)); + + vunmap(vaddr); + __dma_free_coherent(dev, size, swiotlb_addr, dma_handle, attrs); +} + +static dma_addr_t __swiotlb_map_page(struct device *dev, struct page *page, + unsigned long offset, size_t size, + enum dma_data_direction dir, + struct dma_attrs *attrs) +{ + dma_addr_t dev_addr; + + dev_addr = swiotlb_map_page(dev, page, offset, size, dir, attrs); + __dma_map_area(phys_to_virt(dma_to_phys(dev, dev_addr)), size, dir); + + return dev_addr; +} + + +static void __swiotlb_unmap_page(struct device *dev, dma_addr_t dev_addr, + size_t size, enum dma_data_direction dir, + struct dma_attrs *attrs) +{ + __dma_unmap_area(phys_to_virt(dma_to_phys(dev, dev_addr)), size, dir); + swiotlb_unmap_page(dev, dev_addr, size, dir, attrs); +} + +static int __swiotlb_map_sg_attrs(struct device *dev, struct scatterlist *sgl, + int nelems, enum dma_data_direction dir, + struct dma_attrs *attrs) +{ + struct scatterlist *sg; + int i, ret; + + ret = swiotlb_map_sg_attrs(dev, sgl, nelems, dir, attrs); + for_each_sg(sgl, sg, ret, i) + __dma_map_area(phys_to_virt(dma_to_phys(dev, sg->dma_address)), + sg->length, dir); + + return ret; +} + +static void __swiotlb_unmap_sg_attrs(struct device *dev, + struct scatterlist *sgl, int nelems, + enum dma_data_direction dir, + struct dma_attrs *attrs) +{ + struct scatterlist *sg; + int i; + + for_each_sg(sgl, sg, nelems, i) + __dma_unmap_area(phys_to_virt(dma_to_phys(dev, sg->dma_address)), + sg->length, dir); + swiotlb_unmap_sg_attrs(dev, sgl, nelems, dir, attrs); +} + +static void __swiotlb_sync_single_for_cpu(struct device *dev, + dma_addr_t dev_addr, size_t size, + enum dma_data_direction dir) +{ + __dma_unmap_area(phys_to_virt(dma_to_phys(dev, dev_addr)), size, dir); + swiotlb_sync_single_for_cpu(dev, dev_addr, size, dir); +} + +static void __swiotlb_sync_single_for_device(struct device *dev, + dma_addr_t dev_addr, size_t size, + enum dma_data_direction dir) +{ + swiotlb_sync_single_for_device(dev, dev_addr, size, dir); + __dma_map_area(phys_to_virt(dma_to_phys(dev, dev_addr)), size, dir); +} + +static void __swiotlb_sync_sg_for_cpu(struct device *dev, + struct scatterlist *sgl, int nelems, + enum dma_data_direction dir) +{ + struct scatterlist *sg; + int i; + + for_each_sg(sgl, sg, nelems, i) + __dma_unmap_area(phys_to_virt(dma_to_phys(dev, sg->dma_address)), + sg->length, dir); + swiotlb_sync_sg_for_cpu(dev, sgl, nelems, dir); +} + +static void __swiotlb_sync_sg_for_device(struct device *dev, + struct scatterlist *sgl, int nelems, + enum dma_data_direction dir) +{ + struct scatterlist *sg; + int i; + + swiotlb_sync_sg_for_device(dev, sgl, nelems, dir); + for_each_sg(sgl, sg, nelems, i) + __dma_map_area(phys_to_virt(dma_to_phys(dev, sg->dma_address)), + sg->length, dir); +} + +struct dma_map_ops noncoherent_swiotlb_dma_ops = { + .alloc = __dma_alloc_noncoherent, + .free = __dma_free_noncoherent, + .map_page = __swiotlb_map_page, + .unmap_page = __swiotlb_unmap_page, + .map_sg = __swiotlb_map_sg_attrs, + .unmap_sg = __swiotlb_unmap_sg_attrs, + .sync_single_for_cpu = __swiotlb_sync_single_for_cpu, + .sync_single_for_device = __swiotlb_sync_single_for_device, + .sync_sg_for_cpu = __swiotlb_sync_sg_for_cpu, + .sync_sg_for_device = __swiotlb_sync_sg_for_device, + .dma_supported = swiotlb_dma_supported, + .mapping_error = swiotlb_dma_mapping_error, +}; +EXPORT_SYMBOL(noncoherent_swiotlb_dma_ops); + +struct dma_map_ops coherent_swiotlb_dma_ops = { .alloc = __dma_alloc_coherent, .free = __dma_free_coherent, .map_page = swiotlb_map_page, @@ -60,6 +219,7 @@ static struct dma_map_ops coherent_swiotlb_dma_ops = { .dma_supported = swiotlb_dma_supported, .mapping_error = swiotlb_dma_mapping_error, }; +EXPORT_SYMBOL(coherent_swiotlb_dma_ops); void __init arm64_swiotlb_init(void) { From a8e2e0b434c081722330335f7e9628e0d8d17494 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Tue, 2 Jul 2013 11:15:15 +0530 Subject: [PATCH 0871/1368] mm/cma: Move dma contiguous changes into a seperate config We want to use CMA for allocating hash page table and real mode area for PPC64. Hence move DMA contiguous related changes into a seperate config so that ppc64 can enable CMA without requiring DMA contiguous. Acked-by: Michal Nazarewicz Acked-by: Paul Mackerras Signed-off-by: Aneesh Kumar K.V [removed defconfig changes] Signed-off-by: Marek Szyprowski (cherry picked from commit f825c736e75b11adb59ec52a4a1096efddd2ec97) Signed-off-by: Alex Shi --- arch/arm/include/asm/dma-contiguous.h | 2 +- arch/arm/mm/dma-mapping.c | 6 +++--- drivers/base/Kconfig | 20 ++++---------------- drivers/base/Makefile | 2 +- include/linux/dma-contiguous.h | 2 +- mm/Kconfig | 24 ++++++++++++++++++++++++ 6 files changed, 34 insertions(+), 22 deletions(-) diff --git a/arch/arm/include/asm/dma-contiguous.h b/arch/arm/include/asm/dma-contiguous.h index 3ed37b4d93da..e072bb2ba1b1 100644 --- a/arch/arm/include/asm/dma-contiguous.h +++ b/arch/arm/include/asm/dma-contiguous.h @@ -2,7 +2,7 @@ #define ASMARM_DMA_CONTIGUOUS_H #ifdef __KERNEL__ -#ifdef CONFIG_CMA +#ifdef CONFIG_DMA_CMA #include #include diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c index ef3e0f3aac96..1fb40dc37ec2 100644 --- a/arch/arm/mm/dma-mapping.c +++ b/arch/arm/mm/dma-mapping.c @@ -358,7 +358,7 @@ static int __init atomic_pool_init(void) if (!pages) goto no_pages; - if (IS_ENABLED(CONFIG_CMA)) + if (IS_ENABLED(CONFIG_DMA_CMA)) ptr = __alloc_from_contiguous(NULL, pool->size, prot, &page, atomic_pool_init); else @@ -670,7 +670,7 @@ static void *__dma_alloc(struct device *dev, size_t size, dma_addr_t *handle, addr = __alloc_simple_buffer(dev, size, gfp, &page); else if (!(gfp & __GFP_WAIT)) addr = __alloc_from_pool(size, &page); - else if (!IS_ENABLED(CONFIG_CMA)) + else if (!IS_ENABLED(CONFIG_DMA_CMA)) addr = __alloc_remap_buffer(dev, size, gfp, prot, &page, caller); else addr = __alloc_from_contiguous(dev, size, prot, &page, caller); @@ -759,7 +759,7 @@ static void __arm_dma_free(struct device *dev, size_t size, void *cpu_addr, __dma_free_buffer(page, size); } else if (__free_from_pool(cpu_addr, size)) { return; - } else if (!IS_ENABLED(CONFIG_CMA)) { + } else if (!IS_ENABLED(CONFIG_DMA_CMA)) { __dma_free_remap(cpu_addr, size); __dma_free_buffer(page, size); } else { diff --git a/drivers/base/Kconfig b/drivers/base/Kconfig index 07abd9d76f7f..10cd80af2aec 100644 --- a/drivers/base/Kconfig +++ b/drivers/base/Kconfig @@ -202,11 +202,9 @@ config DMA_SHARED_BUFFER APIs extension; the file's descriptor can then be passed on to other driver. -config CMA - bool "Contiguous Memory Allocator" - depends on HAVE_DMA_CONTIGUOUS && HAVE_MEMBLOCK - select MIGRATION - select MEMORY_ISOLATION +config DMA_CMA + bool "DMA Contiguous Memory Allocator" + depends on HAVE_DMA_CONTIGUOUS && CMA help This enables the Contiguous Memory Allocator which allows drivers to allocate big physically-contiguous blocks of memory for use with @@ -215,17 +213,7 @@ config CMA For more information see . If unsure, say "n". -if CMA - -config CMA_DEBUG - bool "CMA debug messages (DEVELOPMENT)" - depends on DEBUG_KERNEL - help - Turns on debug messages in CMA. This produces KERN_DEBUG - messages for every CMA call as well as various messages while - processing calls such as dma_alloc_from_contiguous(). - This option does not affect warning and error messages. - +if DMA_CMA comment "Default contiguous memory area size:" config CMA_SIZE_MBYTES diff --git a/drivers/base/Makefile b/drivers/base/Makefile index 4e22ce3ed73d..5d93bb519753 100644 --- a/drivers/base/Makefile +++ b/drivers/base/Makefile @@ -6,7 +6,7 @@ obj-y := core.o bus.o dd.o syscore.o \ attribute_container.o transport_class.o \ topology.o obj-$(CONFIG_DEVTMPFS) += devtmpfs.o -obj-$(CONFIG_CMA) += dma-contiguous.o +obj-$(CONFIG_DMA_CMA) += dma-contiguous.o obj-y += power/ obj-$(CONFIG_HAS_DMA) += dma-mapping.o obj-$(CONFIG_HAVE_GENERIC_DMA_COHERENT) += dma-coherent.o diff --git a/include/linux/dma-contiguous.h b/include/linux/dma-contiguous.h index 01b5c84be828..00141d3325fe 100644 --- a/include/linux/dma-contiguous.h +++ b/include/linux/dma-contiguous.h @@ -57,7 +57,7 @@ struct cma; struct page; struct device; -#ifdef CONFIG_CMA +#ifdef CONFIG_DMA_CMA /* * There is always at least global CMA area and a few optional device diff --git a/mm/Kconfig b/mm/Kconfig index e742d06285b7..26a5f815cfc3 100644 --- a/mm/Kconfig +++ b/mm/Kconfig @@ -477,3 +477,27 @@ config FRONTSWAP and swap data is stored as normal on the matching swap device. If unsure, say Y to enable frontswap. + +config CMA + bool "Contiguous Memory Allocator" + depends on HAVE_MEMBLOCK + select MIGRATION + select MEMORY_ISOLATION + help + This enables the Contiguous Memory Allocator which allows other + subsystems to allocate big physically-contiguous blocks of memory. + CMA reserves a region of memory and allows only movable pages to + be allocated from it. This way, the kernel can use the memory for + pagecache and when a subsystem requests for contiguous area, the + allocated pages are migrated away to serve the contiguous request. + + If unsure, say "n". + +config CMA_DEBUG + bool "CMA debug messages (DEVELOPMENT)" + depends on DEBUG_KERNEL && CMA + help + Turns on debug messages in CMA. This produces KERN_DEBUG + messages for every CMA call as well as various messages while + processing calls such as dma_alloc_from_contiguous(). + This option does not affect warning and error messages. From 1980dd3801892bcf7e4dfffc137d2119e13150b8 Mon Sep 17 00:00:00 2001 From: Marek Szyprowski Date: Mon, 29 Jul 2013 14:31:45 +0200 Subject: [PATCH 0872/1368] drivers: dma-contiguous: clean source code and prepare for device tree This patch cleans the initialization of dma contiguous framework. The all-in-one dma_declare_contiguous() function is now separated into dma_contiguous_reserve_area() which only steals the the memory from memblock allocator and dma_contiguous_add_device() function, which assigns given device to the specified reserved memory area. This improves the flexibility in defining contiguous memory areas and assigning device to them, because now it is possible to assign more than one device to the given contiguous memory area. Such split in initialization procedure is also required for upcoming device tree support. Signed-off-by: Marek Szyprowski Acked-by: Kyungmin Park Acked-by: Michal Nazarewicz Acked-by: Tomasz Figa (cherry picked from commit a2547380393ac82c659b40182b0da8d05a8365f3) Signed-off-by: Alex Shi --- arch/arm/include/asm/dma-contiguous.h | 1 - arch/x86/include/asm/dma-contiguous.h | 1 - drivers/base/dma-contiguous.c | 119 ++++++++++---------------- include/asm-generic/dma-contiguous.h | 28 ------ include/linux/device.h | 2 +- include/linux/dma-contiguous.h | 62 +++++++++++++- 6 files changed, 105 insertions(+), 108 deletions(-) delete mode 100644 include/asm-generic/dma-contiguous.h diff --git a/arch/arm/include/asm/dma-contiguous.h b/arch/arm/include/asm/dma-contiguous.h index e072bb2ba1b1..4f8e9e5514b1 100644 --- a/arch/arm/include/asm/dma-contiguous.h +++ b/arch/arm/include/asm/dma-contiguous.h @@ -5,7 +5,6 @@ #ifdef CONFIG_DMA_CMA #include -#include void dma_contiguous_early_fixup(phys_addr_t base, unsigned long size); diff --git a/arch/x86/include/asm/dma-contiguous.h b/arch/x86/include/asm/dma-contiguous.h index c09241659971..b4b38bacb404 100644 --- a/arch/x86/include/asm/dma-contiguous.h +++ b/arch/x86/include/asm/dma-contiguous.h @@ -4,7 +4,6 @@ #ifdef __KERNEL__ #include -#include static inline void dma_contiguous_early_fixup(phys_addr_t base, unsigned long size) { } diff --git a/drivers/base/dma-contiguous.c b/drivers/base/dma-contiguous.c index 0ca54421ce97..99802d6f3c60 100644 --- a/drivers/base/dma-contiguous.c +++ b/drivers/base/dma-contiguous.c @@ -96,7 +96,7 @@ static inline __maybe_unused phys_addr_t cma_early_percent_memory(void) #endif /** - * dma_contiguous_reserve() - reserve area for contiguous memory handling + * dma_contiguous_reserve() - reserve area(s) for contiguous memory handling * @limit: End address of the reserved memory (optional, 0 for any). * * This function reserves memory from early allocator. It should be @@ -124,22 +124,29 @@ void __init dma_contiguous_reserve(phys_addr_t limit) #endif } - if (selected_size) { + if (selected_size && !dma_contiguous_default_area) { pr_debug("%s: reserving %ld MiB for global area\n", __func__, (unsigned long)selected_size / SZ_1M); - dma_declare_contiguous(NULL, selected_size, 0, limit); + dma_contiguous_reserve_area(selected_size, 0, limit, + &dma_contiguous_default_area); } }; static DEFINE_MUTEX(cma_mutex); -static __init int cma_activate_area(unsigned long base_pfn, unsigned long count) +static int __init cma_activate_area(struct cma *cma) { - unsigned long pfn = base_pfn; - unsigned i = count >> pageblock_order; + int bitmap_size = BITS_TO_LONGS(cma->count) * sizeof(long); + unsigned long base_pfn = cma->base_pfn, pfn = base_pfn; + unsigned i = cma->count >> pageblock_order; struct zone *zone; + cma->bitmap = kzalloc(bitmap_size, GFP_KERNEL); + + if (!cma->bitmap) + return -ENOMEM; + WARN_ON_ONCE(!pfn_valid(pfn)); zone = page_zone(pfn_to_page(pfn)); @@ -153,92 +160,53 @@ static __init int cma_activate_area(unsigned long base_pfn, unsigned long count) } init_cma_reserved_pageblock(pfn_to_page(base_pfn)); } while (--i); + return 0; } -static __init struct cma *cma_create_area(unsigned long base_pfn, - unsigned long count) -{ - int bitmap_size = BITS_TO_LONGS(count) * sizeof(long); - struct cma *cma; - int ret = -ENOMEM; - - pr_debug("%s(base %08lx, count %lx)\n", __func__, base_pfn, count); - - cma = kmalloc(sizeof *cma, GFP_KERNEL); - if (!cma) - return ERR_PTR(-ENOMEM); - - cma->base_pfn = base_pfn; - cma->count = count; - cma->bitmap = kzalloc(bitmap_size, GFP_KERNEL); - - if (!cma->bitmap) - goto no_mem; - - ret = cma_activate_area(base_pfn, count); - if (ret) - goto error; - - pr_debug("%s: returned %p\n", __func__, (void *)cma); - return cma; - -error: - kfree(cma->bitmap); -no_mem: - kfree(cma); - return ERR_PTR(ret); -} - -static struct cma_reserved { - phys_addr_t start; - unsigned long size; - struct device *dev; -} cma_reserved[MAX_CMA_AREAS] __initdata; -static unsigned cma_reserved_count __initdata; +static struct cma cma_areas[MAX_CMA_AREAS]; +static unsigned cma_area_count; static int __init cma_init_reserved_areas(void) { - struct cma_reserved *r = cma_reserved; - unsigned i = cma_reserved_count; + int i; - pr_debug("%s()\n", __func__); - - for (; i; --i, ++r) { - struct cma *cma; - cma = cma_create_area(PFN_DOWN(r->start), - r->size >> PAGE_SHIFT); - if (!IS_ERR(cma)) - dev_set_cma_area(r->dev, cma); + for (i = 0; i < cma_area_count; i++) { + int ret = cma_activate_area(&cma_areas[i]); + if (ret) + return ret; } + return 0; } core_initcall(cma_init_reserved_areas); /** - * dma_declare_contiguous() - reserve area for contiguous memory handling - * for particular device - * @dev: Pointer to device structure. - * @size: Size of the reserved memory. - * @base: Start address of the reserved memory (optional, 0 for any). + * dma_contiguous_reserve_area() - reserve custom contiguous area + * @size: Size of the reserved area (in bytes), + * @base: Base address of the reserved area optional, use 0 for any * @limit: End address of the reserved memory (optional, 0 for any). + * @res_cma: Pointer to store the created cma region. * - * This function reserves memory for specified device. It should be - * called by board specific code when early allocator (memblock or bootmem) - * is still activate. + * This function reserves memory from early allocator. It should be + * called by arch specific code once the early allocator (memblock or bootmem) + * has been activated and all other subsystems have already allocated/reserved + * memory. This function allows to create custom reserved areas for specific + * devices. */ -int __init dma_declare_contiguous(struct device *dev, phys_addr_t size, - phys_addr_t base, phys_addr_t limit) +int __init dma_contiguous_reserve_area(phys_addr_t size, phys_addr_t base, + phys_addr_t limit, struct cma **res_cma) { - struct cma_reserved *r = &cma_reserved[cma_reserved_count]; + struct cma *cma = &cma_areas[cma_area_count]; phys_addr_t alignment; + int ret = 0; pr_debug("%s(size %lx, base %08lx, limit %08lx)\n", __func__, (unsigned long)size, (unsigned long)base, (unsigned long)limit); /* Sanity checks */ - if (cma_reserved_count == ARRAY_SIZE(cma_reserved)) { + if (cma_area_count == ARRAY_SIZE(cma_areas)) { pr_err("Not enough slots for CMA reserved regions!\n"); return -ENOSPC; } @@ -256,7 +224,7 @@ int __init dma_declare_contiguous(struct device *dev, phys_addr_t size, if (base) { if (memblock_is_region_reserved(base, size) || memblock_reserve(base, size) < 0) { - base = -EBUSY; + ret = -EBUSY; goto err; } } else { @@ -266,7 +234,7 @@ int __init dma_declare_contiguous(struct device *dev, phys_addr_t size, */ phys_addr_t addr = __memblock_alloc_base(size, alignment, limit); if (!addr) { - base = -ENOMEM; + ret = -ENOMEM; goto err; } else { base = addr; @@ -277,10 +245,11 @@ int __init dma_declare_contiguous(struct device *dev, phys_addr_t size, * Each reserved area must be initialised later, when more kernel * subsystems (like slab allocator) are available. */ - r->start = base; - r->size = size; - r->dev = dev; - cma_reserved_count++; + cma->base_pfn = PFN_DOWN(base); + cma->count = size >> PAGE_SHIFT; + *res_cma = cma; + cma_area_count++; + pr_info("CMA: reserved %ld MiB at %08lx\n", (unsigned long)size / SZ_1M, (unsigned long)base); @@ -289,7 +258,7 @@ int __init dma_declare_contiguous(struct device *dev, phys_addr_t size, return 0; err: pr_err("CMA: failed to reserve %ld MiB\n", (unsigned long)size / SZ_1M); - return base; + return ret; } /** diff --git a/include/asm-generic/dma-contiguous.h b/include/asm-generic/dma-contiguous.h deleted file mode 100644 index 294b1e755ab2..000000000000 --- a/include/asm-generic/dma-contiguous.h +++ /dev/null @@ -1,28 +0,0 @@ -#ifndef ASM_DMA_CONTIGUOUS_H -#define ASM_DMA_CONTIGUOUS_H - -#ifdef __KERNEL__ -#ifdef CONFIG_CMA - -#include -#include - -static inline struct cma *dev_get_cma_area(struct device *dev) -{ - if (dev && dev->cma_area) - return dev->cma_area; - return dma_contiguous_default_area; -} - -static inline void dev_set_cma_area(struct device *dev, struct cma *cma) -{ - if (dev) - dev->cma_area = cma; - if (!dev && !dma_contiguous_default_area) - dma_contiguous_default_area = cma; -} - -#endif -#endif - -#endif diff --git a/include/linux/device.h b/include/linux/device.h index c0a126125325..d98ec771de42 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -698,7 +698,7 @@ struct device { struct dma_coherent_mem *dma_mem; /* internal for coherent mem override */ -#ifdef CONFIG_CMA +#ifdef CONFIG_DMA_CMA struct cma *cma_area; /* contiguous memory area for dma allocations */ #endif diff --git a/include/linux/dma-contiguous.h b/include/linux/dma-contiguous.h index 00141d3325fe..3b28f937d959 100644 --- a/include/linux/dma-contiguous.h +++ b/include/linux/dma-contiguous.h @@ -67,9 +67,53 @@ struct device; extern struct cma *dma_contiguous_default_area; +static inline struct cma *dev_get_cma_area(struct device *dev) +{ + if (dev && dev->cma_area) + return dev->cma_area; + return dma_contiguous_default_area; +} + +static inline void dev_set_cma_area(struct device *dev, struct cma *cma) +{ + if (dev) + dev->cma_area = cma; +} + +static inline void dma_contiguous_set_default(struct cma *cma) +{ + dma_contiguous_default_area = cma; +} + void dma_contiguous_reserve(phys_addr_t addr_limit); -int dma_declare_contiguous(struct device *dev, phys_addr_t size, - phys_addr_t base, phys_addr_t limit); + +int __init dma_contiguous_reserve_area(phys_addr_t size, phys_addr_t base, + phys_addr_t limit, struct cma **res_cma); + +/** + * dma_declare_contiguous() - reserve area for contiguous memory handling + * for particular device + * @dev: Pointer to device structure. + * @size: Size of the reserved memory. + * @base: Start address of the reserved memory (optional, 0 for any). + * @limit: End address of the reserved memory (optional, 0 for any). + * + * This function reserves memory for specified device. It should be + * called by board specific code when early allocator (memblock or bootmem) + * is still activate. + */ + +static inline int dma_declare_contiguous(struct device *dev, phys_addr_t size, + phys_addr_t base, phys_addr_t limit) +{ + struct cma *cma; + int ret; + ret = dma_contiguous_reserve_area(size, base, limit, &cma); + if (ret == 0) + dev_set_cma_area(dev, cma); + + return ret; +} struct page *dma_alloc_from_contiguous(struct device *dev, int count, unsigned int order); @@ -80,8 +124,22 @@ bool dma_release_from_contiguous(struct device *dev, struct page *pages, #define MAX_CMA_AREAS (0) +static inline struct cma *dev_get_cma_area(struct device *dev) +{ + return NULL; +} + +static inline void dev_set_cma_area(struct device *dev, struct cma *cma) { } + +static inline void dma_contiguous_set_default(struct cma *cma) { } + static inline void dma_contiguous_reserve(phys_addr_t limit) { } +static inline int dma_contiguous_reserve_area(phys_addr_t size, phys_addr_t base, + phys_addr_t limit, struct cma **res_cma) { + return -ENOSYS; +} + static inline int dma_declare_contiguous(struct device *dev, phys_addr_t size, phys_addr_t base, phys_addr_t limit) From 97e4a880c9677b82ddb41e4613d0d4b6a3479a80 Mon Sep 17 00:00:00 2001 From: Laura Abbott Date: Thu, 12 Dec 2013 19:28:33 +0000 Subject: [PATCH 0873/1368] arm64: Enable CMA arm64 bit targets need the features CMA provides. Add the appropriate hooks, header files, and Kconfig to allow this to happen. Cc: Will Deacon Cc: Marek Szyprowski Signed-off-by: Laura Abbott Signed-off-by: Catalin Marinas (cherry picked from commit 6ac2104debc235b745265b64d610237a6833fd53) Signed-off-by: Alex Shi Conflicts: arch/arm64/Kconfig (cherry picked from commit 0a4b0f00f4c9cb80a0fb11b9e0f898f82745e370) Signed-off-by: Alex Shi Conflicts: arch/arm64/mm/dma-mapping.c --- arch/arm64/Kconfig | 1 + arch/arm64/include/asm/dma-contiguous.h | 29 ++++++++++++++++++++++++ arch/arm64/mm/dma-mapping.c | 30 +++++++++++++++++++++++-- arch/arm64/mm/init.c | 3 +++ 4 files changed, 61 insertions(+), 2 deletions(-) create mode 100644 arch/arm64/include/asm/dma-contiguous.h diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 56b3f6d447ae..1c650048a4e3 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -21,6 +21,7 @@ config ARM64 select HAVE_DEBUG_KMEMLEAK select HAVE_DMA_API_DEBUG select HAVE_DMA_ATTRS + select HAVE_DMA_CONTIGUOUS select HAVE_GENERIC_DMA_COHERENT select HAVE_GENERIC_HARDIRQS select HAVE_HW_BREAKPOINT if PERF_EVENTS diff --git a/arch/arm64/include/asm/dma-contiguous.h b/arch/arm64/include/asm/dma-contiguous.h new file mode 100644 index 000000000000..d6aacb61ff4a --- /dev/null +++ b/arch/arm64/include/asm/dma-contiguous.h @@ -0,0 +1,29 @@ +/* + * Copyright (c) 2013, The Linux Foundation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 and + * only version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#ifndef _ASM_DMA_CONTIGUOUS_H +#define _ASM_DMA_CONTIGUOUS_H + +#ifdef __KERNEL__ +#ifdef CONFIG_DMA_CMA + +#include +#include + +static inline void +dma_contiguous_early_fixup(phys_addr_t base, unsigned long size) { } + +#endif +#endif + +#endif diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c index 4bd7579ec9e6..c4c005cc2ed2 100644 --- a/arch/arm64/mm/dma-mapping.c +++ b/arch/arm64/mm/dma-mapping.c @@ -21,6 +21,7 @@ #include #include #include +#include #include #include @@ -36,14 +37,39 @@ static void *arm64_swiotlb_alloc_coherent(struct device *dev, size_t size, if (IS_ENABLED(CONFIG_ZONE_DMA32) && dev->coherent_dma_mask <= DMA_BIT_MASK(32)) flags |= GFP_DMA32; - return swiotlb_alloc_coherent(dev, size, dma_handle, flags); + if (IS_ENABLED(CONFIG_DMA_CMA)) { + struct page *page; + + page = dma_alloc_from_contiguous(dev, size >> PAGE_SHIFT, + get_order(size)); + if (!page) + return NULL; + + *dma_handle = phys_to_dma(dev, page_to_phys(page)); + return page_address(page); + } else { + return swiotlb_alloc_coherent(dev, size, dma_handle, flags); + } } static void arm64_swiotlb_free_coherent(struct device *dev, size_t size, void *vaddr, dma_addr_t dma_handle, struct dma_attrs *attrs) { - swiotlb_free_coherent(dev, size, vaddr, dma_handle); + if (dev == NULL) { + WARN_ONCE(1, "Use an actual device structure for DMA allocation\n"); + return; + } + + if (IS_ENABLED(CONFIG_DMA_CMA)) { + phys_addr_t paddr = dma_to_phys(dev, dma_handle); + + dma_release_from_contiguous(dev, + phys_to_page(paddr), + size >> PAGE_SHIFT); + } else { + swiotlb_free_coherent(dev, size, vaddr, dma_handle); + } } static struct dma_map_ops arm64_swiotlb_dma_ops = { diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index f497ca77925a..851d3c137e03 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -30,6 +30,7 @@ #include #include #include +#include #include #include @@ -173,6 +174,8 @@ void __init arm64_memblock_init(void) memblock_reserve(base, size); } + dma_contiguous_reserve(0); + memblock_allow_resize(); memblock_dump_all(); } From 9d93ccedc26f8591ca4e6f2b65dcbf2bdbfd6453 Mon Sep 17 00:00:00 2001 From: Pankaj Dubey Date: Fri, 24 Jan 2014 08:23:08 +0000 Subject: [PATCH 0874/1368] arm64: fix build error if DMA_CMA is enabled arm64/include/asm/dma-contiguous.h is trying to include which does not exist, and thus failing build for arm64 if we enable CONFIG_DMA_CMA. This patch fixes build error by removing unwanted header inclusion from arm64's dma-contiguous.h. Signed-off-by: Pankaj Dubey Signed-off-by: Somraj Mani Acked-by: Laura Abbott Signed-off-by: Catalin Marinas (cherry picked from commit ac525f59fb011e05e77c5be8b9834883ee1d5613) Signed-off-by: Alex Shi --- arch/arm64/include/asm/dma-contiguous.h | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/arm64/include/asm/dma-contiguous.h b/arch/arm64/include/asm/dma-contiguous.h index d6aacb61ff4a..14c4c0ca7f2a 100644 --- a/arch/arm64/include/asm/dma-contiguous.h +++ b/arch/arm64/include/asm/dma-contiguous.h @@ -18,7 +18,6 @@ #ifdef CONFIG_DMA_CMA #include -#include static inline void dma_contiguous_early_fixup(phys_addr_t base, unsigned long size) { } From 7e7f87dda0fbc095f27f3ccd97aed18c2542b93c Mon Sep 17 00:00:00 2001 From: Laura Abbott Date: Tue, 4 Feb 2014 23:08:57 +0000 Subject: [PATCH 0875/1368] arm64: Align CMA sizes to PAGE_SIZE dma_alloc_from_contiguous takes number of pages for a size. Align up the dma size passed in to page size to avoid truncation and allocation failures on sizes less than PAGE_SIZE. Cc: Will Deacon Signed-off-by: Laura Abbott Signed-off-by: Catalin Marinas (cherry picked from commit ccc9e244eb1b9654915634827322932cbafd8244) Signed-off-by: Alex Shi --- arch/arm64/mm/dma-mapping.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c index c4c005cc2ed2..0150f5d14eda 100644 --- a/arch/arm64/mm/dma-mapping.c +++ b/arch/arm64/mm/dma-mapping.c @@ -40,6 +40,7 @@ static void *arm64_swiotlb_alloc_coherent(struct device *dev, size_t size, if (IS_ENABLED(CONFIG_DMA_CMA)) { struct page *page; + size = PAGE_ALIGN(size); page = dma_alloc_from_contiguous(dev, size >> PAGE_SHIFT, get_order(size)); if (!page) From 5f525ac6a41ca1875b21f0c35bb9c540c00a11ab Mon Sep 17 00:00:00 2001 From: Steve Capper Date: Tue, 23 Apr 2013 12:35:02 +0100 Subject: [PATCH 0876/1368] mm: hugetlb: Copy huge_pmd_share from x86 to mm. Under x86, multiple puds can be made to reference the same bank of huge pmds provided that they represent a full PUD_SIZE of shared huge memory that is aligned to a PUD_SIZE boundary. The code to share pmds does not require any architecture specific knowledge other than the fact that pmds can be indexed, thus can be beneficial to some other architectures. This patch copies the huge pmd sharing (and unsharing) logic from x86/ to mm/ and introduces a new config option to activate it: CONFIG_ARCH_WANTS_HUGE_PMD_SHARE Signed-off-by: Steve Capper Acked-by: Catalin Marinas Acked-by: Andrew Morton --- include/linux/hugetlb.h | 4 ++ mm/hugetlb.c | 122 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 126 insertions(+) diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index 6b4890fa57e7..981546ad231c 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -69,6 +69,10 @@ void hugetlb_unreserve_pages(struct inode *inode, long offset, long freed); int dequeue_hwpoisoned_huge_page(struct page *page); void copy_huge_page(struct page *dst, struct page *src); +#ifdef CONFIG_ARCH_WANT_HUGE_PMD_SHARE +pte_t *huge_pmd_share(struct mm_struct *mm, unsigned long addr, pud_t *pud); +#endif + extern unsigned long hugepages_treat_as_movable; extern const unsigned long hugetlb_zero, hugetlb_infinity; extern int sysctl_hugetlb_shm_group; diff --git a/mm/hugetlb.c b/mm/hugetlb.c index e2bfbf73a551..15d84e55ceac 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -3169,6 +3169,128 @@ void hugetlb_unreserve_pages(struct inode *inode, long offset, long freed) hugetlb_acct_memory(h, -(chg - freed)); } +#ifdef CONFIG_ARCH_WANT_HUGE_PMD_SHARE +static unsigned long page_table_shareable(struct vm_area_struct *svma, + struct vm_area_struct *vma, + unsigned long addr, pgoff_t idx) +{ + unsigned long saddr = ((idx - svma->vm_pgoff) << PAGE_SHIFT) + + svma->vm_start; + unsigned long sbase = saddr & PUD_MASK; + unsigned long s_end = sbase + PUD_SIZE; + + /* Allow segments to share if only one is marked locked */ + unsigned long vm_flags = vma->vm_flags & ~VM_LOCKED; + unsigned long svm_flags = svma->vm_flags & ~VM_LOCKED; + + /* + * match the virtual addresses, permission and the alignment of the + * page table page. + */ + if (pmd_index(addr) != pmd_index(saddr) || + vm_flags != svm_flags || + sbase < svma->vm_start || svma->vm_end < s_end) + return 0; + + return saddr; +} + +static int vma_shareable(struct vm_area_struct *vma, unsigned long addr) +{ + unsigned long base = addr & PUD_MASK; + unsigned long end = base + PUD_SIZE; + + /* + * check on proper vm_flags and page table alignment + */ + if (vma->vm_flags & VM_MAYSHARE && + vma->vm_start <= base && end <= vma->vm_end) + return 1; + return 0; +} + +/* + * Search for a shareable pmd page for hugetlb. In any case calls pmd_alloc() + * and returns the corresponding pte. While this is not necessary for the + * !shared pmd case because we can allocate the pmd later as well, it makes the + * code much cleaner. pmd allocation is essential for the shared case because + * pud has to be populated inside the same i_mmap_mutex section - otherwise + * racing tasks could either miss the sharing (see huge_pte_offset) or select a + * bad pmd for sharing. + */ +pte_t *huge_pmd_share(struct mm_struct *mm, unsigned long addr, pud_t *pud) +{ + struct vm_area_struct *vma = find_vma(mm, addr); + struct address_space *mapping = vma->vm_file->f_mapping; + pgoff_t idx = ((addr - vma->vm_start) >> PAGE_SHIFT) + + vma->vm_pgoff; + struct vm_area_struct *svma; + unsigned long saddr; + pte_t *spte = NULL; + pte_t *pte; + + if (!vma_shareable(vma, addr)) + return (pte_t *)pmd_alloc(mm, pud, addr); + + mutex_lock(&mapping->i_mmap_mutex); + vma_interval_tree_foreach(svma, &mapping->i_mmap, idx, idx) { + if (svma == vma) + continue; + + saddr = page_table_shareable(svma, vma, addr, idx); + if (saddr) { + spte = huge_pte_offset(svma->vm_mm, saddr); + if (spte) { + get_page(virt_to_page(spte)); + break; + } + } + } + + if (!spte) + goto out; + + spin_lock(&mm->page_table_lock); + if (pud_none(*pud)) + pud_populate(mm, pud, + (pmd_t *)((unsigned long)spte & PAGE_MASK)); + else + put_page(virt_to_page(spte)); + spin_unlock(&mm->page_table_lock); +out: + pte = (pte_t *)pmd_alloc(mm, pud, addr); + mutex_unlock(&mapping->i_mmap_mutex); + return pte; +} + +/* + * unmap huge page backed by shared pte. + * + * Hugetlb pte page is ref counted at the time of mapping. If pte is shared + * indicated by page_count > 1, unmap is achieved by clearing pud and + * decrementing the ref count. If count == 1, the pte page is not shared. + * + * called with vma->vm_mm->page_table_lock held. + * + * returns: 1 successfully unmapped a shared pte page + * 0 the underlying pte page is not shared, or it is the last user + */ +int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep) +{ + pgd_t *pgd = pgd_offset(mm, *addr); + pud_t *pud = pud_offset(pgd, *addr); + + BUG_ON(page_count(virt_to_page(ptep)) == 0); + if (page_count(virt_to_page(ptep)) == 1) + return 0; + + pud_clear(pud); + put_page(virt_to_page(ptep)); + *addr = ALIGN(*addr, HPAGE_SIZE * PTRS_PER_PTE) - HPAGE_SIZE; + return 1; +} +#endif /* CONFIG_ARCH_WANT_HUGE_PMD_SHARE */ + #ifdef CONFIG_MEMORY_FAILURE /* Should be called in hugetlb_lock */ From 4daa1bbe44332f96f55a9d79abdf9bee04da48b7 Mon Sep 17 00:00:00 2001 From: Steve Capper Date: Mon, 29 Apr 2013 14:29:48 +0100 Subject: [PATCH 0877/1368] x86: mm: Remove x86 version of huge_pmd_share. The huge_pmd_share code has been copied over to mm/hugetlb.c to make it accessible to other architectures. Remove the x86 copy of the huge_pmd_share code and enable the ARCH_WANT_HUGE_PMD_SHARE config flag. That way we reference the general one. Signed-off-by: Steve Capper Acked-by: Catalin Marinas Acked-by: Andrew Morton --- arch/x86/Kconfig | 3 + arch/x86/mm/hugetlbpage.c | 120 -------------------------------------- 2 files changed, 3 insertions(+), 120 deletions(-) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index fe120da25625..56d606b2497c 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -207,6 +207,9 @@ config ARCH_HIBERNATION_POSSIBLE config ARCH_SUSPEND_POSSIBLE def_bool y +config ARCH_WANT_HUGE_PMD_SHARE + def_bool y + config ZONE_DMA32 bool default X86_64 diff --git a/arch/x86/mm/hugetlbpage.c b/arch/x86/mm/hugetlbpage.c index ae1aa71d0115..7e522a359972 100644 --- a/arch/x86/mm/hugetlbpage.c +++ b/arch/x86/mm/hugetlbpage.c @@ -16,126 +16,6 @@ #include #include -static unsigned long page_table_shareable(struct vm_area_struct *svma, - struct vm_area_struct *vma, - unsigned long addr, pgoff_t idx) -{ - unsigned long saddr = ((idx - svma->vm_pgoff) << PAGE_SHIFT) + - svma->vm_start; - unsigned long sbase = saddr & PUD_MASK; - unsigned long s_end = sbase + PUD_SIZE; - - /* Allow segments to share if only one is marked locked */ - unsigned long vm_flags = vma->vm_flags & ~VM_LOCKED; - unsigned long svm_flags = svma->vm_flags & ~VM_LOCKED; - - /* - * match the virtual addresses, permission and the alignment of the - * page table page. - */ - if (pmd_index(addr) != pmd_index(saddr) || - vm_flags != svm_flags || - sbase < svma->vm_start || svma->vm_end < s_end) - return 0; - - return saddr; -} - -static int vma_shareable(struct vm_area_struct *vma, unsigned long addr) -{ - unsigned long base = addr & PUD_MASK; - unsigned long end = base + PUD_SIZE; - - /* - * check on proper vm_flags and page table alignment - */ - if (vma->vm_flags & VM_MAYSHARE && - vma->vm_start <= base && end <= vma->vm_end) - return 1; - return 0; -} - -/* - * Search for a shareable pmd page for hugetlb. In any case calls pmd_alloc() - * and returns the corresponding pte. While this is not necessary for the - * !shared pmd case because we can allocate the pmd later as well, it makes the - * code much cleaner. pmd allocation is essential for the shared case because - * pud has to be populated inside the same i_mmap_mutex section - otherwise - * racing tasks could either miss the sharing (see huge_pte_offset) or select a - * bad pmd for sharing. - */ -static pte_t * -huge_pmd_share(struct mm_struct *mm, unsigned long addr, pud_t *pud) -{ - struct vm_area_struct *vma = find_vma(mm, addr); - struct address_space *mapping = vma->vm_file->f_mapping; - pgoff_t idx = ((addr - vma->vm_start) >> PAGE_SHIFT) + - vma->vm_pgoff; - struct vm_area_struct *svma; - unsigned long saddr; - pte_t *spte = NULL; - pte_t *pte; - - if (!vma_shareable(vma, addr)) - return (pte_t *)pmd_alloc(mm, pud, addr); - - mutex_lock(&mapping->i_mmap_mutex); - vma_interval_tree_foreach(svma, &mapping->i_mmap, idx, idx) { - if (svma == vma) - continue; - - saddr = page_table_shareable(svma, vma, addr, idx); - if (saddr) { - spte = huge_pte_offset(svma->vm_mm, saddr); - if (spte) { - get_page(virt_to_page(spte)); - break; - } - } - } - - if (!spte) - goto out; - - spin_lock(&mm->page_table_lock); - if (pud_none(*pud)) - pud_populate(mm, pud, (pmd_t *)((unsigned long)spte & PAGE_MASK)); - else - put_page(virt_to_page(spte)); - spin_unlock(&mm->page_table_lock); -out: - pte = (pte_t *)pmd_alloc(mm, pud, addr); - mutex_unlock(&mapping->i_mmap_mutex); - return pte; -} - -/* - * unmap huge page backed by shared pte. - * - * Hugetlb pte page is ref counted at the time of mapping. If pte is shared - * indicated by page_count > 1, unmap is achieved by clearing pud and - * decrementing the ref count. If count == 1, the pte page is not shared. - * - * called with vma->vm_mm->page_table_lock held. - * - * returns: 1 successfully unmapped a shared pte page - * 0 the underlying pte page is not shared, or it is the last user - */ -int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep) -{ - pgd_t *pgd = pgd_offset(mm, *addr); - pud_t *pud = pud_offset(pgd, *addr); - - BUG_ON(page_count(virt_to_page(ptep)) == 0); - if (page_count(virt_to_page(ptep)) == 1) - return 0; - - pud_clear(pud); - put_page(virt_to_page(ptep)); - *addr = ALIGN(*addr, HPAGE_SIZE * PTRS_PER_PTE) - HPAGE_SIZE; - return 1; -} - pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr, unsigned long sz) { From ba47dd3ca215b7dbdd638bad985db555997506a5 Mon Sep 17 00:00:00 2001 From: Steve Capper Date: Tue, 30 Apr 2013 08:02:03 +0100 Subject: [PATCH 0878/1368] mm: hugetlb: Copy general hugetlb code from x86 to mm. The huge_pte_alloc, huge_pte_offset and follow_huge_p[mu]d functions in x86/mm/hugetlbpage.c do not rely on any architecture specific knowledge other than the fact that pmds and puds can be treated as huge ptes. To allow other architectures to use this code (and reduce the need for code duplication), this patch copies these functions into mm, replaces the use of pud_large with pud_huge and provides a config flag to activate them: CONFIG_ARCH_WANT_GENERAL_HUGETLB If CONFIG_ARCH_WANT_HUGE_PMD_SHARE is also active then the huge_pmd_share code will be called by huge_pte_alloc (othewise we call pmd_alloc and skip the sharing code). Signed-off-by: Steve Capper Acked-by: Catalin Marinas Acked-by: Andrew Morton --- mm/hugetlb.c | 97 +++++++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 88 insertions(+), 9 deletions(-) diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 15d84e55ceac..dbd3d42ae031 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -2931,15 +2931,6 @@ int hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma, return ret; } -/* Can be overriden by architectures */ -__attribute__((weak)) struct page * -follow_huge_pud(struct mm_struct *mm, unsigned long address, - pud_t *pud, int write) -{ - BUG(); - return NULL; -} - long follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma, struct page **pages, struct vm_area_struct **vmas, unsigned long *position, unsigned long *nr_pages, @@ -3289,8 +3280,96 @@ int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep) *addr = ALIGN(*addr, HPAGE_SIZE * PTRS_PER_PTE) - HPAGE_SIZE; return 1; } +#define want_pmd_share() (1) +#else /* !CONFIG_ARCH_WANT_HUGE_PMD_SHARE */ +pte_t *huge_pmd_share(struct mm_struct *mm, unsigned long addr, pud_t *pud) +{ + return NULL; +} +#define want_pmd_share() (0) #endif /* CONFIG_ARCH_WANT_HUGE_PMD_SHARE */ +#ifdef CONFIG_ARCH_WANT_GENERAL_HUGETLB +pte_t *huge_pte_alloc(struct mm_struct *mm, + unsigned long addr, unsigned long sz) +{ + pgd_t *pgd; + pud_t *pud; + pte_t *pte = NULL; + + pgd = pgd_offset(mm, addr); + pud = pud_alloc(mm, pgd, addr); + if (pud) { + if (sz == PUD_SIZE) { + pte = (pte_t *)pud; + } else { + BUG_ON(sz != PMD_SIZE); + if (want_pmd_share() && pud_none(*pud)) + pte = huge_pmd_share(mm, addr, pud); + else + pte = (pte_t *)pmd_alloc(mm, pud, addr); + } + } + BUG_ON(pte && !pte_none(*pte) && !pte_huge(*pte)); + + return pte; +} + +pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr) +{ + pgd_t *pgd; + pud_t *pud; + pmd_t *pmd = NULL; + + pgd = pgd_offset(mm, addr); + if (pgd_present(*pgd)) { + pud = pud_offset(pgd, addr); + if (pud_present(*pud)) { + if (pud_huge(*pud)) + return (pte_t *)pud; + pmd = pmd_offset(pud, addr); + } + } + return (pte_t *) pmd; +} + +struct page * +follow_huge_pmd(struct mm_struct *mm, unsigned long address, + pmd_t *pmd, int write) +{ + struct page *page; + + page = pte_page(*(pte_t *)pmd); + if (page) + page += ((address & ~PMD_MASK) >> PAGE_SHIFT); + return page; +} + +struct page * +follow_huge_pud(struct mm_struct *mm, unsigned long address, + pud_t *pud, int write) +{ + struct page *page; + + page = pte_page(*(pte_t *)pud); + if (page) + page += ((address & ~PUD_MASK) >> PAGE_SHIFT); + return page; +} + +#else /* !CONFIG_ARCH_WANT_GENERAL_HUGETLB */ + +/* Can be overriden by architectures */ +__attribute__((weak)) struct page * +follow_huge_pud(struct mm_struct *mm, unsigned long address, + pud_t *pud, int write) +{ + BUG(); + return NULL; +} + +#endif /* CONFIG_ARCH_WANT_GENERAL_HUGETLB */ + #ifdef CONFIG_MEMORY_FAILURE /* Should be called in hugetlb_lock */ From 66ea6bbaf026e39c62d0245e37bfd99170337fcf Mon Sep 17 00:00:00 2001 From: Steve Capper Date: Tue, 30 Apr 2013 08:03:42 +0100 Subject: [PATCH 0879/1368] x86: mm: Remove general hugetlb code from x86. huge_pte_alloc, huge_pte_offset and follow_huge_p[mu]d have already been copied over to mm. This patch removes the x86 copies of these functions and activates the general ones by enabling: CONFIG_ARCH_WANT_GENERAL_HUGETLB Signed-off-by: Steve Capper Acked-by: Catalin Marinas Acked-by: Andrew Morton --- arch/x86/Kconfig | 3 ++ arch/x86/mm/hugetlbpage.c | 67 --------------------------------------- 2 files changed, 3 insertions(+), 67 deletions(-) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 56d606b2497c..787072769a80 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -210,6 +210,9 @@ config ARCH_SUSPEND_POSSIBLE config ARCH_WANT_HUGE_PMD_SHARE def_bool y +config ARCH_WANT_GENERAL_HUGETLB + def_bool y + config ZONE_DMA32 bool default X86_64 diff --git a/arch/x86/mm/hugetlbpage.c b/arch/x86/mm/hugetlbpage.c index 7e522a359972..7e73e8c69096 100644 --- a/arch/x86/mm/hugetlbpage.c +++ b/arch/x86/mm/hugetlbpage.c @@ -16,49 +16,6 @@ #include #include -pte_t *huge_pte_alloc(struct mm_struct *mm, - unsigned long addr, unsigned long sz) -{ - pgd_t *pgd; - pud_t *pud; - pte_t *pte = NULL; - - pgd = pgd_offset(mm, addr); - pud = pud_alloc(mm, pgd, addr); - if (pud) { - if (sz == PUD_SIZE) { - pte = (pte_t *)pud; - } else { - BUG_ON(sz != PMD_SIZE); - if (pud_none(*pud)) - pte = huge_pmd_share(mm, addr, pud); - else - pte = (pte_t *)pmd_alloc(mm, pud, addr); - } - } - BUG_ON(pte && !pte_none(*pte) && !pte_huge(*pte)); - - return pte; -} - -pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr) -{ - pgd_t *pgd; - pud_t *pud; - pmd_t *pmd = NULL; - - pgd = pgd_offset(mm, addr); - if (pgd_present(*pgd)) { - pud = pud_offset(pgd, addr); - if (pud_present(*pud)) { - if (pud_large(*pud)) - return (pte_t *)pud; - pmd = pmd_offset(pud, addr); - } - } - return (pte_t *) pmd; -} - #if 0 /* This is just for testing */ struct page * follow_huge_addr(struct mm_struct *mm, unsigned long address, int write) @@ -120,30 +77,6 @@ int pud_huge(pud_t pud) return !!(pud_val(pud) & _PAGE_PSE); } -struct page * -follow_huge_pmd(struct mm_struct *mm, unsigned long address, - pmd_t *pmd, int write) -{ - struct page *page; - - page = pte_page(*(pte_t *)pmd); - if (page) - page += ((address & ~PMD_MASK) >> PAGE_SHIFT); - return page; -} - -struct page * -follow_huge_pud(struct mm_struct *mm, unsigned long address, - pud_t *pud, int write) -{ - struct page *page; - - page = pte_page(*(pte_t *)pud); - if (page) - page += ((address & ~PUD_MASK) >> PAGE_SHIFT); - return page; -} - #endif /* x86_64 also uses this file */ From 1d85bc6282e21be5170b4b52413c097bce070258 Mon Sep 17 00:00:00 2001 From: Steve Capper Date: Tue, 7 May 2013 14:46:03 +0100 Subject: [PATCH 0880/1368] mm: thp: Correct the HPAGE_PMD_ORDER check. All Transparent Huge Pages are allocated by the buddy allocator. A compile time check is in place that fails when the order of a transparent huge page is too large to be allocated by the buddy allocator. Unfortunately that compile time check passes when: HPAGE_PMD_ORDER == MAX_ORDER ( which is incorrect as the buddy allocator can only allocate memory of order strictly less than MAX_ORDER. ) This patch updates the compile time check to fail in the above case. Signed-off-by: Steve Capper Acked-by: Catalin Marinas Acked-by: Andrew Morton --- include/linux/huge_mm.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h index 528454c2caa9..26ee56c80dc7 100644 --- a/include/linux/huge_mm.h +++ b/include/linux/huge_mm.h @@ -123,7 +123,7 @@ extern void __split_huge_page_pmd(struct vm_area_struct *vma, } while (0) extern void split_huge_page_pmd_mm(struct mm_struct *mm, unsigned long address, pmd_t *pmd); -#if HPAGE_PMD_ORDER > MAX_ORDER +#if HPAGE_PMD_ORDER >= MAX_ORDER #error "hugepages can't be allocated by the buddy allocator" #endif extern int hugepage_madvise(struct vm_area_struct *vma, From f45009d07121dfa40d0b9c74c8364669dbe8c4af Mon Sep 17 00:00:00 2001 From: Steve Capper Date: Tue, 30 Apr 2013 11:00:33 +0100 Subject: [PATCH 0881/1368] ARM64: mm: Restore memblock limit when map_mem finished. In paging_init the memblock limit is set to restrict any addresses returned by early_alloc to fit within the initial direct kernel mapping in swapper_pg_dir. This allows map_mem to allocate puds, pmds and ptes from the initial direct kernel mapping. The limit stays low after paging_init() though, meaning any bootmem allocations will be from a restricted subset of memory. Gigabyte huge pages, for instance, are normally allocated from bootmem as their order (18) is too large for the default buddy allocator (MAX_ORDER = 11). This patch restores the memblock limit when map_mem has finished, allowing gigabyte huge pages (and other objects) to be allocated from all of bootmem. Signed-off-by: Steve Capper Acked-by: Catalin Marinas --- arch/arm64/mm/mmu.c | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index eeecc9c8ed68..9fa027b39156 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -297,6 +297,16 @@ static void __init map_mem(void) { struct memblock_region *reg; + /* + * Temporarily limit the memblock range. We need to do this as + * create_mapping requires puds, pmds and ptes to be allocated from + * memory addressable from the initial direct kernel mapping. + * + * The initial direct kernel mapping, located at swapper_pg_dir, + * gives us PGDIR_SIZE memory starting from PHYS_OFFSET (aligned). + */ + memblock_set_current_limit((PHYS_OFFSET & PGDIR_MASK) + PGDIR_SIZE); + /* map all the memory banks */ for_each_memblock(memory, reg) { phys_addr_t start = reg->base; @@ -307,6 +317,9 @@ static void __init map_mem(void) create_mapping(start, __phys_to_virt(start), end - start); } + + /* Limit no longer required. */ + memblock_set_current_limit(MEMBLOCK_ALLOC_ANYWHERE); } /* @@ -317,12 +330,6 @@ void __init paging_init(void) { void *zero_page; - /* - * Maximum PGDIR_SIZE addressable via the initial direct kernel - * mapping in swapper_pg_dir. - */ - memblock_set_current_limit((PHYS_OFFSET & PGDIR_MASK) + PGDIR_SIZE); - init_mem_pgprot(); map_mem(); From 5ea0c51491e4fef3dceef3fe0d3bac9318752c13 Mon Sep 17 00:00:00 2001 From: Steve Capper Date: Thu, 2 May 2013 16:25:42 +0100 Subject: [PATCH 0882/1368] ARM64: mm: Make PAGE_NONE pages read only and no-execute. If we consider the following code sequence: my_pte = pte_modify(entry, myprot); x = pte_write(my_pte); y = pte_exec(my_pte); If myprot comes from a PROT_NONE page, then x and y will both be true which is undesireable behaviour. This patch sets the no-execute and read-only bits for PAGE_NONE such that the code above will return false for both x and y. Signed-off-by: Steve Capper Acked-by: Catalin Marinas --- arch/arm64/include/asm/pgtable.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index e333a243bfcc..77b09d6fee23 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -66,7 +66,7 @@ extern pgprot_t pgprot_default; #define _MOD_PROT(p, b) __pgprot_modify(p, 0, b) -#define PAGE_NONE __pgprot_modify(pgprot_default, PTE_TYPE_MASK, PTE_PROT_NONE) +#define PAGE_NONE __pgprot_modify(pgprot_default, PTE_TYPE_MASK, PTE_PROT_NONE | PTE_RDONLY | PTE_PXN | PTE_UXN) #define PAGE_SHARED _MOD_PROT(pgprot_default, PTE_USER | PTE_NG | PTE_PXN | PTE_UXN) #define PAGE_SHARED_EXEC _MOD_PROT(pgprot_default, PTE_USER | PTE_NG | PTE_PXN) #define PAGE_COPY _MOD_PROT(pgprot_default, PTE_USER | PTE_NG | PTE_PXN | PTE_UXN | PTE_RDONLY) @@ -76,7 +76,7 @@ extern pgprot_t pgprot_default; #define PAGE_KERNEL _MOD_PROT(pgprot_default, PTE_PXN | PTE_UXN | PTE_DIRTY) #define PAGE_KERNEL_EXEC _MOD_PROT(pgprot_default, PTE_UXN | PTE_DIRTY) -#define __PAGE_NONE __pgprot(((_PAGE_DEFAULT) & ~PTE_TYPE_MASK) | PTE_PROT_NONE) +#define __PAGE_NONE __pgprot(((_PAGE_DEFAULT) & ~PTE_TYPE_MASK) | PTE_PROT_NONE | PTE_RDONLY | PTE_PXN | PTE_UXN) #define __PAGE_SHARED __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN | PTE_UXN) #define __PAGE_SHARED_EXEC __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN) #define __PAGE_COPY __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN | PTE_UXN | PTE_RDONLY) From 7308560ac76c63d199d51490680b70451b471d62 Mon Sep 17 00:00:00 2001 From: Steve Capper Date: Tue, 28 May 2013 13:35:51 +0100 Subject: [PATCH 0883/1368] ARM64: mm: Move PTE_PROT_NONE bit. Under ARM64, PTEs can be broadly categorised as follows: - Present and valid: Bit #0 is set. The PTE is valid and memory access to the region may fault. - Present and invalid: Bit #0 is clear and bit #1 is set. Represents present memory with PROT_NONE protection. The PTE is an invalid entry, and the user fault handler will raise a SIGSEGV. - Not present (file or swap): Bits #0 and #1 are clear. Memory represented has been paged out. The PTE is an invalid entry, and the fault handler will try and re-populate the memory where necessary. Huge PTEs are block descriptors that have bit #1 clear. If we wish to represent PROT_NONE huge PTEs we then run into a problem as there is no way to distinguish between regular and huge PTEs if we set bit #1. To resolve this ambiguity this patch moves PTE_PROT_NONE from bit #1 to bit #2 and moves PTE_FILE from bit #2 to bit #3. The number of swap/file bits is reduced by 1 as a consequence, leaving 60 bits for file and swap entries. Signed-off-by: Steve Capper Acked-by: Catalin Marinas --- arch/arm64/include/asm/pgtable.h | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index 77b09d6fee23..2291de0258ed 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -25,8 +25,8 @@ * Software defined PTE bits definition. */ #define PTE_VALID (_AT(pteval_t, 1) << 0) -#define PTE_PROT_NONE (_AT(pteval_t, 1) << 1) /* only when !PTE_VALID */ -#define PTE_FILE (_AT(pteval_t, 1) << 2) /* only when !pte_present() */ +#define PTE_PROT_NONE (_AT(pteval_t, 1) << 2) /* only when !PTE_VALID */ +#define PTE_FILE (_AT(pteval_t, 1) << 3) /* only when !pte_present() */ #define PTE_DIRTY (_AT(pteval_t, 1) << 55) #define PTE_SPECIAL (_AT(pteval_t, 1) << 56) @@ -281,12 +281,12 @@ extern pgd_t idmap_pg_dir[PTRS_PER_PGD]; /* * Encode and decode a swap entry: - * bits 0-1: present (must be zero) - * bit 2: PTE_FILE - * bits 3-8: swap type + * bits 0, 2: present (must both be zero) + * bit 3: PTE_FILE + * bits 4-8: swap type * bits 9-63: swap offset */ -#define __SWP_TYPE_SHIFT 3 +#define __SWP_TYPE_SHIFT 4 #define __SWP_TYPE_BITS 6 #define __SWP_TYPE_MASK ((1 << __SWP_TYPE_BITS) - 1) #define __SWP_OFFSET_SHIFT (__SWP_TYPE_BITS + __SWP_TYPE_SHIFT) @@ -306,15 +306,15 @@ extern pgd_t idmap_pg_dir[PTRS_PER_PGD]; /* * Encode and decode a file entry: - * bits 0-1: present (must be zero) - * bit 2: PTE_FILE - * bits 3-63: file offset / PAGE_SIZE + * bits 0, 2: present (must both be zero) + * bit 3: PTE_FILE + * bits 4-63: file offset / PAGE_SIZE */ #define pte_file(pte) (pte_val(pte) & PTE_FILE) -#define pte_to_pgoff(x) (pte_val(x) >> 3) -#define pgoff_to_pte(x) __pte(((x) << 3) | PTE_FILE) +#define pte_to_pgoff(x) (pte_val(x) >> 4) +#define pgoff_to_pte(x) __pte(((x) << 4) | PTE_FILE) -#define PTE_FILE_MAX_BITS 61 +#define PTE_FILE_MAX_BITS 60 extern int kern_addr_valid(unsigned long addr); From 597173f474cb5a28b4c9a1dbf865ac002c7e5010 Mon Sep 17 00:00:00 2001 From: Steve Capper Date: Wed, 10 Apr 2013 13:48:00 +0100 Subject: [PATCH 0884/1368] ARM64: mm: HugeTLB support. Add huge page support to ARM64, different huge page sizes are supported depending on the size of normal pages: PAGE_SIZE is 4KB: 2MB - (pmds) these can be allocated at any time. 1024MB - (puds) usually allocated on bootup with the command line with something like: hugepagesz=1G hugepages=6 PAGE_SIZE is 64KB: 512MB - (pmds) usually allocated on bootup via command line. Signed-off-by: Steve Capper Acked-by: Catalin Marinas --- arch/arm64/Kconfig | 9 ++ arch/arm64/include/asm/hugetlb.h | 117 +++++++++++++++++++++++++ arch/arm64/include/asm/pgtable-hwdef.h | 8 ++ arch/arm64/include/asm/pgtable.h | 13 ++- arch/arm64/mm/Makefile | 1 + arch/arm64/mm/fault.c | 19 +--- arch/arm64/mm/hugetlbpage.c | 70 +++++++++++++++ 7 files changed, 220 insertions(+), 17 deletions(-) create mode 100644 arch/arm64/include/asm/hugetlb.h create mode 100644 arch/arm64/mm/hugetlbpage.c diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 56b3f6d447ae..cd6eca84a21c 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -180,6 +180,15 @@ config HW_PERF_EVENTS Enable hardware performance counter support for perf events. If disabled, perf events will use software events only. +config SYS_SUPPORTS_HUGETLBFS + def_bool y + +config ARCH_WANT_GENERAL_HUGETLB + def_bool y + +config ARCH_WANT_HUGE_PMD_SHARE + def_bool y if !ARM64_64K_PAGES + source "mm/Kconfig" endmenu diff --git a/arch/arm64/include/asm/hugetlb.h b/arch/arm64/include/asm/hugetlb.h new file mode 100644 index 000000000000..5b7ca8ace95f --- /dev/null +++ b/arch/arm64/include/asm/hugetlb.h @@ -0,0 +1,117 @@ +/* + * arch/arm64/include/asm/hugetlb.h + * + * Copyright (C) 2013 Linaro Ltd. + * + * Based on arch/x86/include/asm/hugetlb.h + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef __ASM_HUGETLB_H +#define __ASM_HUGETLB_H + +#include +#include + +static inline pte_t huge_ptep_get(pte_t *ptep) +{ + return *ptep; +} + +static inline void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, + pte_t *ptep, pte_t pte) +{ + set_pte_at(mm, addr, ptep, pte); +} + +static inline void huge_ptep_clear_flush(struct vm_area_struct *vma, + unsigned long addr, pte_t *ptep) +{ + ptep_clear_flush(vma, addr, ptep); +} + +static inline void huge_ptep_set_wrprotect(struct mm_struct *mm, + unsigned long addr, pte_t *ptep) +{ + ptep_set_wrprotect(mm, addr, ptep); +} + +static inline pte_t huge_ptep_get_and_clear(struct mm_struct *mm, + unsigned long addr, pte_t *ptep) +{ + return ptep_get_and_clear(mm, addr, ptep); +} + +static inline int huge_ptep_set_access_flags(struct vm_area_struct *vma, + unsigned long addr, pte_t *ptep, + pte_t pte, int dirty) +{ + return ptep_set_access_flags(vma, addr, ptep, pte, dirty); +} + +static inline void hugetlb_free_pgd_range(struct mmu_gather *tlb, + unsigned long addr, unsigned long end, + unsigned long floor, + unsigned long ceiling) +{ + free_pgd_range(tlb, addr, end, floor, ceiling); +} + +static inline int is_hugepage_only_range(struct mm_struct *mm, + unsigned long addr, unsigned long len) +{ + return 0; +} + +static inline int prepare_hugepage_range(struct file *file, + unsigned long addr, unsigned long len) +{ + struct hstate *h = hstate_file(file); + if (len & ~huge_page_mask(h)) + return -EINVAL; + if (addr & ~huge_page_mask(h)) + return -EINVAL; + return 0; +} + +static inline void hugetlb_prefault_arch_hook(struct mm_struct *mm) +{ +} + +static inline int huge_pte_none(pte_t pte) +{ + return pte_none(pte); +} + +static inline pte_t huge_pte_wrprotect(pte_t pte) +{ + return pte_wrprotect(pte); +} + +static inline int arch_prepare_hugepage(struct page *page) +{ + return 0; +} + +static inline void arch_release_hugepage(struct page *page) +{ +} + +static inline void arch_clear_hugepage_flags(struct page *page) +{ + clear_bit(PG_dcache_clean, &page->flags); +} + +#endif /* __ASM_HUGETLB_H */ diff --git a/arch/arm64/include/asm/pgtable-hwdef.h b/arch/arm64/include/asm/pgtable-hwdef.h index 75fd13d289b9..e6e0a0d4cf9a 100644 --- a/arch/arm64/include/asm/pgtable-hwdef.h +++ b/arch/arm64/include/asm/pgtable-hwdef.h @@ -25,12 +25,19 @@ /* * Hardware page table definitions. * + * Level 1 descriptor (PUD). + */ + +#define PUD_TABLE_BIT (_AT(pgdval_t, 1) << 1) + +/* * Level 2 descriptor (PMD). */ #define PMD_TYPE_MASK (_AT(pmdval_t, 3) << 0) #define PMD_TYPE_FAULT (_AT(pmdval_t, 0) << 0) #define PMD_TYPE_TABLE (_AT(pmdval_t, 3) << 0) #define PMD_TYPE_SECT (_AT(pmdval_t, 1) << 0) +#define PMD_TABLE_BIT (_AT(pmdval_t, 1) << 1) /* * Section @@ -53,6 +60,7 @@ #define PTE_TYPE_MASK (_AT(pteval_t, 3) << 0) #define PTE_TYPE_FAULT (_AT(pteval_t, 0) << 0) #define PTE_TYPE_PAGE (_AT(pteval_t, 3) << 0) +#define PTE_TABLE_BIT (_AT(pteval_t, 1) << 1) #define PTE_USER (_AT(pteval_t, 1) << 6) /* AP[1] */ #define PTE_RDONLY (_AT(pteval_t, 1) << 7) /* AP[2] */ #define PTE_SHARED (_AT(pteval_t, 3) << 8) /* SH[1:0], inner shareable */ diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index 2291de0258ed..21771330f809 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -173,8 +173,17 @@ static inline void set_pte_at(struct mm_struct *mm, unsigned long addr, /* * Huge pte definitions. */ -#define pte_huge(pte) ((pte_val(pte) & PTE_TYPE_MASK) == PTE_TYPE_HUGEPAGE) -#define pte_mkhuge(pte) (__pte((pte_val(pte) & ~PTE_TYPE_MASK) | PTE_TYPE_HUGEPAGE)) +#define pte_huge(pte) (!(pte_val(pte) & PTE_TABLE_BIT)) +#define pte_mkhuge(pte) (__pte(pte_val(pte) & ~PTE_TABLE_BIT)) + +/* + * Hugetlb definitions. + */ +#define HUGE_MAX_HSTATE 2 +#define HPAGE_SHIFT PMD_SHIFT +#define HPAGE_SIZE (_AC(1, UL) << HPAGE_SHIFT) +#define HPAGE_MASK (~(HPAGE_SIZE - 1)) +#define HUGETLB_PAGE_ORDER (HPAGE_SHIFT - PAGE_SHIFT) #define __HAVE_ARCH_PTE_SPECIAL diff --git a/arch/arm64/mm/Makefile b/arch/arm64/mm/Makefile index 3140a2abcdc2..b51d36401d83 100644 --- a/arch/arm64/mm/Makefile +++ b/arch/arm64/mm/Makefile @@ -2,3 +2,4 @@ obj-y := dma-mapping.o extable.o fault.o init.o \ cache.o copypage.o flush.o \ ioremap.o mmap.o pgd.o mmu.o \ context.o tlb.o proc.o +obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c index 1426468b77f3..0ecac8980aae 100644 --- a/arch/arm64/mm/fault.c +++ b/arch/arm64/mm/fault.c @@ -364,17 +364,6 @@ static int __kprobes do_translation_fault(unsigned long addr, return 0; } -/* - * Some section permission faults need to be handled gracefully. They can - * happen due to a __{get,put}_user during an oops. - */ -static int do_sect_fault(unsigned long addr, unsigned int esr, - struct pt_regs *regs) -{ - do_bad_area(addr, esr, regs); - return 0; -} - /* * This abort handler always returns "fault". */ @@ -398,12 +387,12 @@ static struct fault_info { { do_translation_fault, SIGSEGV, SEGV_MAPERR, "level 2 translation fault" }, { do_page_fault, SIGSEGV, SEGV_MAPERR, "level 3 translation fault" }, { do_bad, SIGBUS, 0, "reserved access flag fault" }, - { do_bad, SIGSEGV, SEGV_ACCERR, "level 1 access flag fault" }, - { do_bad, SIGSEGV, SEGV_ACCERR, "level 2 access flag fault" }, + { do_page_fault, SIGSEGV, SEGV_ACCERR, "level 1 access flag fault" }, + { do_page_fault, SIGSEGV, SEGV_ACCERR, "level 2 access flag fault" }, { do_page_fault, SIGSEGV, SEGV_ACCERR, "level 3 access flag fault" }, { do_bad, SIGBUS, 0, "reserved permission fault" }, - { do_bad, SIGSEGV, SEGV_ACCERR, "level 1 permission fault" }, - { do_sect_fault, SIGSEGV, SEGV_ACCERR, "level 2 permission fault" }, + { do_page_fault, SIGSEGV, SEGV_ACCERR, "level 1 permission fault" }, + { do_page_fault, SIGSEGV, SEGV_ACCERR, "level 2 permission fault" }, { do_page_fault, SIGSEGV, SEGV_ACCERR, "level 3 permission fault" }, { do_bad, SIGBUS, 0, "synchronous external abort" }, { do_bad, SIGBUS, 0, "asynchronous external abort" }, diff --git a/arch/arm64/mm/hugetlbpage.c b/arch/arm64/mm/hugetlbpage.c new file mode 100644 index 000000000000..2fc8258bab2d --- /dev/null +++ b/arch/arm64/mm/hugetlbpage.c @@ -0,0 +1,70 @@ +/* + * arch/arm64/mm/hugetlbpage.c + * + * Copyright (C) 2013 Linaro Ltd. + * + * Based on arch/x86/mm/hugetlbpage.c. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#ifndef CONFIG_ARCH_WANT_HUGE_PMD_SHARE +int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep) +{ + return 0; +} +#endif + +struct page *follow_huge_addr(struct mm_struct *mm, unsigned long address, + int write) +{ + return ERR_PTR(-EINVAL); +} + +int pmd_huge(pmd_t pmd) +{ + return !(pmd_val(pmd) & PMD_TABLE_BIT); +} + +int pud_huge(pud_t pud) +{ + return !(pud_val(pud) & PUD_TABLE_BIT); +} + +static __init int setup_hugepagesz(char *opt) +{ + unsigned long ps = memparse(opt, &opt); + if (ps == PMD_SIZE) { + hugetlb_add_hstate(PMD_SHIFT - PAGE_SHIFT); + } else if (ps == PUD_SIZE) { + hugetlb_add_hstate(PUD_SHIFT - PAGE_SHIFT); + } else { + pr_err("hugepagesz: Unsupported page size %lu M\n", ps >> 20); + return 0; + } + return 1; +} +__setup("hugepagesz=", setup_hugepagesz); From 74c50894f33d4bf349499e8f6ac75db3dd1ff3ff Mon Sep 17 00:00:00 2001 From: Steve Capper Date: Thu, 25 Apr 2013 15:19:21 +0100 Subject: [PATCH 0885/1368] ARM64: mm: Raise MAX_ORDER for 64KB pages and THP. The buddy allocator has a default MAX_ORDER of 11, which is too low to allocate enough memory for 512MB Transparent HugePages if our base page size is 64KB. This patch introduces MAX_ZONE_ORDER and sets it to 14 when 64KB pages are used in conjuction with THP, otherwise the default value of 11 is used. Signed-off-by: Steve Capper Acked-by: Catalin Marinas --- arch/arm64/Kconfig | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index cd6eca84a21c..10607d63b945 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -191,6 +191,11 @@ config ARCH_WANT_HUGE_PMD_SHARE source "mm/Kconfig" +config FORCE_MAX_ZONEORDER + int + default "14" if (ARM64_64K_PAGES && TRANSPARENT_HUGEPAGE) + default "11" + endmenu menu "Boot options" From a348ad67a1dfd4a7aa2400f8d39082c05dab0303 Mon Sep 17 00:00:00 2001 From: Steve Capper Date: Fri, 19 Apr 2013 16:23:57 +0100 Subject: [PATCH 0886/1368] ARM64: mm: THP support. Bring Transparent HugePage support to ARM. The size of a transparent huge page depends on the normal page size. A transparent huge page is always represented as a pmd. If PAGE_SIZE is 4KB, THPs are 2MB. If PAGE_SIZE is 64KB, THPs are 512MB. Signed-off-by: Steve Capper Acked-by: Catalin Marinas --- arch/arm64/Kconfig | 3 ++ arch/arm64/include/asm/pgtable-hwdef.h | 4 ++ arch/arm64/include/asm/pgtable.h | 55 ++++++++++++++++++++++++++ arch/arm64/include/asm/tlb.h | 6 +++ arch/arm64/include/asm/tlbflush.h | 2 + 5 files changed, 70 insertions(+) diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 10607d63b945..308a55636f76 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -189,6 +189,9 @@ config ARCH_WANT_GENERAL_HUGETLB config ARCH_WANT_HUGE_PMD_SHARE def_bool y if !ARM64_64K_PAGES +config HAVE_ARCH_TRANSPARENT_HUGEPAGE + def_bool y + source "mm/Kconfig" config FORCE_MAX_ZONEORDER diff --git a/arch/arm64/include/asm/pgtable-hwdef.h b/arch/arm64/include/asm/pgtable-hwdef.h index e6e0a0d4cf9a..63c9d0de05bb 100644 --- a/arch/arm64/include/asm/pgtable-hwdef.h +++ b/arch/arm64/include/asm/pgtable-hwdef.h @@ -42,6 +42,10 @@ /* * Section */ +#define PMD_SECT_VALID (_AT(pmdval_t, 1) << 0) +#define PMD_SECT_PROT_NONE (_AT(pmdval_t, 1) << 2) +#define PMD_SECT_USER (_AT(pmdval_t, 1) << 6) /* AP[1] */ +#define PMD_SECT_RDONLY (_AT(pmdval_t, 1) << 7) /* AP[2] */ #define PMD_SECT_S (_AT(pmdval_t, 3) << 8) #define PMD_SECT_AF (_AT(pmdval_t, 1) << 10) #define PMD_SECT_NG (_AT(pmdval_t, 1) << 11) diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index 21771330f809..720fc4a2be49 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -187,6 +187,61 @@ static inline void set_pte_at(struct mm_struct *mm, unsigned long addr, #define __HAVE_ARCH_PTE_SPECIAL +/* + * Software PMD bits for THP + */ + +#define PMD_SECT_DIRTY (_AT(pmdval_t, 1) << 55) +#define PMD_SECT_SPLITTING (_AT(pmdval_t, 1) << 57) + +/* + * THP definitions. + */ +#define pmd_young(pmd) (pmd_val(pmd) & PMD_SECT_AF) + +#define __HAVE_ARCH_PMD_WRITE +#define pmd_write(pmd) (!(pmd_val(pmd) & PMD_SECT_RDONLY)) + +#ifdef CONFIG_TRANSPARENT_HUGEPAGE +#define pmd_trans_huge(pmd) (pmd_val(pmd) && !(pmd_val(pmd) & PMD_TABLE_BIT)) +#define pmd_trans_splitting(pmd) (pmd_val(pmd) & PMD_SECT_SPLITTING) +#endif + +#define PMD_BIT_FUNC(fn,op) \ +static inline pmd_t pmd_##fn(pmd_t pmd) { pmd_val(pmd) op; return pmd; } + +PMD_BIT_FUNC(wrprotect, |= PMD_SECT_RDONLY); +PMD_BIT_FUNC(mkold, &= ~PMD_SECT_AF); +PMD_BIT_FUNC(mksplitting, |= PMD_SECT_SPLITTING); +PMD_BIT_FUNC(mkwrite, &= ~PMD_SECT_RDONLY); +PMD_BIT_FUNC(mkdirty, |= PMD_SECT_DIRTY); +PMD_BIT_FUNC(mkyoung, |= PMD_SECT_AF); +PMD_BIT_FUNC(mknotpresent, &= ~PMD_TYPE_MASK); + +#define pmd_mkhuge(pmd) (__pmd(pmd_val(pmd) & ~PMD_TABLE_BIT)) + +#define pmd_pfn(pmd) (((pmd_val(pmd) & PMD_MASK) & PHYS_MASK) >> PAGE_SHIFT) +#define pfn_pmd(pfn,prot) (__pmd(((phys_addr_t)(pfn) << PAGE_SHIFT) | pgprot_val(prot))) +#define mk_pmd(page,prot) pfn_pmd(page_to_pfn(page),prot) + +#define pmd_page(pmd) pfn_to_page(__phys_to_pfn(pmd_val(pmd) & PHYS_MASK)) + +static inline pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot) +{ + const pmdval_t mask = PMD_SECT_USER | PMD_SECT_PXN | PMD_SECT_UXN | + PMD_SECT_RDONLY | PMD_SECT_PROT_NONE | + PMD_SECT_VALID; + pmd_val(pmd) = (pmd_val(pmd) & ~mask) | (pgprot_val(newprot) & mask); + return pmd; +} + +#define set_pmd_at(mm, addr, pmdp, pmd) set_pmd(pmdp, pmd) + +static inline int has_transparent_hugepage(void) +{ + return 1; +} + /* * Mark the prot value as uncacheable and unbufferable. */ diff --git a/arch/arm64/include/asm/tlb.h b/arch/arm64/include/asm/tlb.h index 654f0968030b..46b3beb4b773 100644 --- a/arch/arm64/include/asm/tlb.h +++ b/arch/arm64/include/asm/tlb.h @@ -187,4 +187,10 @@ static inline void __pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmdp, #define tlb_migrate_finish(mm) do { } while (0) +static inline void +tlb_remove_pmd_tlb_entry(struct mmu_gather *tlb, pmd_t *pmdp, unsigned long addr) +{ + tlb_add_flush(tlb, addr); +} + #endif diff --git a/arch/arm64/include/asm/tlbflush.h b/arch/arm64/include/asm/tlbflush.h index 122d6320f745..8b482035cfc2 100644 --- a/arch/arm64/include/asm/tlbflush.h +++ b/arch/arm64/include/asm/tlbflush.h @@ -117,6 +117,8 @@ static inline void update_mmu_cache(struct vm_area_struct *vma, dsb(); } +#define update_mmu_cache_pmd(vma, address, pmd) do { } while (0) + #endif #endif From a2b07b7b16594e55d18891498b5d37acd884f63f Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Wed, 27 Nov 2013 16:59:27 +0000 Subject: [PATCH 0887/1368] arm64: Move PTE_PROT_NONE higher up PTE_PROT_NONE means that a pte is present but does not have any read/write attributes. However, setting the memory type like pgprot_writecombine() is allowed and such bits overlap with PTE_PROT_NONE. This causes mmap/munmap issues in drivers that change the vma->vm_pg_prot on PROT_NONE mappings. This patch reverts the PTE_FILE/PTE_PROT_NONE shift in commit 59911ca4325d (ARM64: mm: Move PTE_PROT_NONE bit) and moves PTE_PROT_NONE together with the other software bits. Signed-off-by: Steve Capper Signed-off-by: Catalin Marinas Tested-by: Steve Capper Cc: # 3.11+ --- arch/arm64/include/asm/pgtable.h | 31 +++++++++++++++++-------------- 1 file changed, 17 insertions(+), 14 deletions(-) diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index 720fc4a2be49..15d05a69af6f 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -25,10 +25,11 @@ * Software defined PTE bits definition. */ #define PTE_VALID (_AT(pteval_t, 1) << 0) -#define PTE_PROT_NONE (_AT(pteval_t, 1) << 2) /* only when !PTE_VALID */ -#define PTE_FILE (_AT(pteval_t, 1) << 3) /* only when !pte_present() */ +#define PTE_FILE (_AT(pteval_t, 1) << 2) /* only when !pte_present() */ #define PTE_DIRTY (_AT(pteval_t, 1) << 55) #define PTE_SPECIAL (_AT(pteval_t, 1) << 56) + /* bit 57 for PMD_SECT_SPLITTING */ +#define PTE_PROT_NONE (_AT(pteval_t, 1) << 58) /* only when !PTE_VALID */ /* * VMALLOC and SPARSEMEM_VMEMMAP ranges. @@ -345,18 +346,20 @@ extern pgd_t idmap_pg_dir[PTRS_PER_PGD]; /* * Encode and decode a swap entry: - * bits 0, 2: present (must both be zero) - * bit 3: PTE_FILE - * bits 4-8: swap type - * bits 9-63: swap offset + * bits 0-1: present (must be zero) + * bit 2: PTE_FILE + * bits 3-8: swap type + * bits 9-57: swap offset */ -#define __SWP_TYPE_SHIFT 4 +#define __SWP_TYPE_SHIFT 3 #define __SWP_TYPE_BITS 6 +#define __SWP_OFFSET_BITS 49 #define __SWP_TYPE_MASK ((1 << __SWP_TYPE_BITS) - 1) #define __SWP_OFFSET_SHIFT (__SWP_TYPE_BITS + __SWP_TYPE_SHIFT) +#define __SWP_OFFSET_MASK ((1UL << __SWP_OFFSET_BITS) - 1) #define __swp_type(x) (((x).val >> __SWP_TYPE_SHIFT) & __SWP_TYPE_MASK) -#define __swp_offset(x) ((x).val >> __SWP_OFFSET_SHIFT) +#define __swp_offset(x) (((x).val >> __SWP_OFFSET_SHIFT) & __SWP_OFFSET_MASK) #define __swp_entry(type,offset) ((swp_entry_t) { ((type) << __SWP_TYPE_SHIFT) | ((offset) << __SWP_OFFSET_SHIFT) }) #define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) }) @@ -370,15 +373,15 @@ extern pgd_t idmap_pg_dir[PTRS_PER_PGD]; /* * Encode and decode a file entry: - * bits 0, 2: present (must both be zero) - * bit 3: PTE_FILE - * bits 4-63: file offset / PAGE_SIZE + * bits 0-1: present (must be zero) + * bit 2: PTE_FILE + * bits 3-57: file offset / PAGE_SIZE */ #define pte_file(pte) (pte_val(pte) & PTE_FILE) -#define pte_to_pgoff(x) (pte_val(x) >> 4) -#define pgoff_to_pte(x) __pte(((x) << 4) | PTE_FILE) +#define pte_to_pgoff(x) (pte_val(x) >> 3) +#define pgoff_to_pte(x) __pte(((x) << 3) | PTE_FILE) -#define PTE_FILE_MAX_BITS 60 +#define PTE_FILE_MAX_BITS 55 extern int kern_addr_valid(unsigned long addr); From 7e5f0d41d30d8914400e0e4b892bc25b19cd38f1 Mon Sep 17 00:00:00 2001 From: Steve Capper Date: Thu, 5 Dec 2013 12:04:51 +0000 Subject: [PATCH 0888/1368] arm64: mm: Fix PMD_SECT_PROT_NONE definition Modify the value of PMD_SECT_PROT_NONE to match that of PTE_NONE. This should have been in commit 3676f9ef5481 (Move PTE_PROT_NONE higher up). Signed-off-by: Steve Capper Cc: # 3.11+: 3676f9ef5481: arm64: Move PTE_PROT_NONE higher up Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/pgtable-hwdef.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/include/asm/pgtable-hwdef.h b/arch/arm64/include/asm/pgtable-hwdef.h index 63c9d0de05bb..2294f2330960 100644 --- a/arch/arm64/include/asm/pgtable-hwdef.h +++ b/arch/arm64/include/asm/pgtable-hwdef.h @@ -43,7 +43,7 @@ * Section */ #define PMD_SECT_VALID (_AT(pmdval_t, 1) << 0) -#define PMD_SECT_PROT_NONE (_AT(pmdval_t, 1) << 2) +#define PMD_SECT_PROT_NONE (_AT(pmdval_t, 1) << 58) #define PMD_SECT_USER (_AT(pmdval_t, 1) << 6) /* AP[1] */ #define PMD_SECT_RDONLY (_AT(pmdval_t, 1) << 7) /* AP[2] */ #define PMD_SECT_S (_AT(pmdval_t, 3) << 8) From 89a6a78ec21e274c8abe1a63f2abc876ba127b60 Mon Sep 17 00:00:00 2001 From: Andreas Sandberg Date: Tue, 21 Jan 2014 15:49:09 -0800 Subject: [PATCH 0889/1368] mm/hugetlb.c: call MMU notifiers when copying a hugetlb page range When copy_hugetlb_page_range() is called to copy a range of hugetlb mappings, the secondary MMUs are not notified if there is a protection downgrade, which breaks COW semantics in KVM. This patch adds the necessary MMU notifier calls. Signed-off-by: Andreas Sandberg Acked-by: Steve Capper Acked-by: Marc Zyngier Cc: Mel Gorman Cc: Rik van Riel Cc: Hugh Dickins Cc: Andrea Arcangeli Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/hugetlb.c | 21 ++++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) diff --git a/mm/hugetlb.c b/mm/hugetlb.c index dbd3d42ae031..a7482d1d2131 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -2295,16 +2295,26 @@ int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src, int cow; struct hstate *h = hstate_vma(vma); unsigned long sz = huge_page_size(h); + unsigned long mmun_start; /* For mmu_notifiers */ + unsigned long mmun_end; /* For mmu_notifiers */ + int ret = 0; cow = (vma->vm_flags & (VM_SHARED | VM_MAYWRITE)) == VM_MAYWRITE; + mmun_start = vma->vm_start; + mmun_end = vma->vm_end; + if (cow) + mmu_notifier_invalidate_range_start(src, mmun_start, mmun_end); + for (addr = vma->vm_start; addr < vma->vm_end; addr += sz) { src_pte = huge_pte_offset(src, addr); if (!src_pte) continue; dst_pte = huge_pte_alloc(dst, addr, sz); - if (!dst_pte) - goto nomem; + if (!dst_pte) { + ret = -ENOMEM; + break; + } /* If the pagetables are shared don't copy or take references */ if (dst_pte == src_pte) @@ -2324,10 +2334,11 @@ int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src, spin_unlock(&src->page_table_lock); spin_unlock(&dst->page_table_lock); } - return 0; -nomem: - return -ENOMEM; + if (cow) + mmu_notifier_invalidate_range_end(src, mmun_start, mmun_end); + + return ret; } static int is_hugetlb_entry_migration(pte_t pte) From e0934c26244ab270ec6117e41b06844fbf8dd367 Mon Sep 17 00:00:00 2001 From: Steve Capper Date: Wed, 15 Jan 2014 14:07:12 +0000 Subject: [PATCH 0890/1368] arm64: mm: Remove PTE_BIT_FUNC macro Expand out the pte manipulation functions. This makes our life easier when using things like tags and cscope. Signed-off-by: Steve Capper Reviewed-by: Catalin Marinas Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/pgtable.h | 49 ++++++++++++++++++++++++++------ 1 file changed, 40 insertions(+), 9 deletions(-) diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index 15d05a69af6f..be3ac5372424 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -140,16 +140,47 @@ extern struct page *empty_zero_page; #define pte_valid_user(pte) \ ((pte_val(pte) & (PTE_VALID | PTE_USER)) == (PTE_VALID | PTE_USER)) -#define PTE_BIT_FUNC(fn,op) \ -static inline pte_t pte_##fn(pte_t pte) { pte_val(pte) op; return pte; } +static inline pte_t pte_wrprotect(pte_t pte) +{ + pte_val(pte) |= PTE_RDONLY; + return pte; +} -PTE_BIT_FUNC(wrprotect, |= PTE_RDONLY); -PTE_BIT_FUNC(mkwrite, &= ~PTE_RDONLY); -PTE_BIT_FUNC(mkclean, &= ~PTE_DIRTY); -PTE_BIT_FUNC(mkdirty, |= PTE_DIRTY); -PTE_BIT_FUNC(mkold, &= ~PTE_AF); -PTE_BIT_FUNC(mkyoung, |= PTE_AF); -PTE_BIT_FUNC(mkspecial, |= PTE_SPECIAL); +static inline pte_t pte_mkwrite(pte_t pte) +{ + pte_val(pte) &= ~PTE_RDONLY; + return pte; +} + +static inline pte_t pte_mkclean(pte_t pte) +{ + pte_val(pte) &= ~PTE_DIRTY; + return pte; +} + +static inline pte_t pte_mkdirty(pte_t pte) +{ + pte_val(pte) |= PTE_DIRTY; + return pte; +} + +static inline pte_t pte_mkold(pte_t pte) +{ + pte_val(pte) &= ~PTE_AF; + return pte; +} + +static inline pte_t pte_mkyoung(pte_t pte) +{ + pte_val(pte) |= PTE_AF; + return pte; +} + +static inline pte_t pte_mkspecial(pte_t pte) +{ + pte_val(pte) |= PTE_SPECIAL; + return pte; +} static inline void set_pte(pte_t *ptep, pte_t pte) { From 6d251ba203e4517823c4dea69a7941a77e7580c1 Mon Sep 17 00:00:00 2001 From: Steve Capper Date: Wed, 15 Jan 2014 14:07:13 +0000 Subject: [PATCH 0891/1368] arm64: mm: Introduce PTE_WRITE We have the following means for encoding writable or dirty ptes: PTE_DIRTY PTE_RDONLY !pte_dirty && !pte_write 0 1 !pte_dirty && pte_write 0 1 pte_dirty && !pte_write 1 1 pte_dirty && pte_write 1 0 So we can't distinguish between writable clean ptes and read only ptes. This can cause problems with ptes being incorrectly flagged as read only when they are writable but not dirty. This patch introduces a new software bit PTE_WRITE which allows us to correctly identify writable ptes. PTE_RDONLY is now only clear for valid ptes where a page is both writable and dirty. Signed-off-by: Steve Capper Reviewed-by: Catalin Marinas Signed-off-by: Catalin Marinas Conflicts: arch/arm64/include/asm/pgtable.h --- arch/arm64/include/asm/pgtable.h | 48 +++++++++++++++++--------------- 1 file changed, 25 insertions(+), 23 deletions(-) diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index be3ac5372424..ca9746d66d41 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -28,7 +28,7 @@ #define PTE_FILE (_AT(pteval_t, 1) << 2) /* only when !pte_present() */ #define PTE_DIRTY (_AT(pteval_t, 1) << 55) #define PTE_SPECIAL (_AT(pteval_t, 1) << 56) - /* bit 57 for PMD_SECT_SPLITTING */ +#define PTE_WRITE (_AT(pteval_t, 1) << 57) #define PTE_PROT_NONE (_AT(pteval_t, 1) << 58) /* only when !PTE_VALID */ /* @@ -67,23 +67,23 @@ extern pgprot_t pgprot_default; #define _MOD_PROT(p, b) __pgprot_modify(p, 0, b) -#define PAGE_NONE __pgprot_modify(pgprot_default, PTE_TYPE_MASK, PTE_PROT_NONE | PTE_RDONLY | PTE_PXN | PTE_UXN) -#define PAGE_SHARED _MOD_PROT(pgprot_default, PTE_USER | PTE_NG | PTE_PXN | PTE_UXN) -#define PAGE_SHARED_EXEC _MOD_PROT(pgprot_default, PTE_USER | PTE_NG | PTE_PXN) -#define PAGE_COPY _MOD_PROT(pgprot_default, PTE_USER | PTE_NG | PTE_PXN | PTE_UXN | PTE_RDONLY) -#define PAGE_COPY_EXEC _MOD_PROT(pgprot_default, PTE_USER | PTE_NG | PTE_PXN | PTE_RDONLY) -#define PAGE_READONLY _MOD_PROT(pgprot_default, PTE_USER | PTE_NG | PTE_PXN | PTE_UXN | PTE_RDONLY) -#define PAGE_READONLY_EXEC _MOD_PROT(pgprot_default, PTE_USER | PTE_NG | PTE_PXN | PTE_RDONLY) -#define PAGE_KERNEL _MOD_PROT(pgprot_default, PTE_PXN | PTE_UXN | PTE_DIRTY) -#define PAGE_KERNEL_EXEC _MOD_PROT(pgprot_default, PTE_UXN | PTE_DIRTY) +#define PAGE_NONE __pgprot_modify(pgprot_default, PTE_TYPE_MASK, PTE_PROT_NONE | PTE_PXN | PTE_UXN) +#define PAGE_SHARED _MOD_PROT(pgprot_default, PTE_USER | PTE_NG | PTE_PXN | PTE_UXN | PTE_WRITE) +#define PAGE_SHARED_EXEC _MOD_PROT(pgprot_default, PTE_USER | PTE_NG | PTE_PXN | PTE_WRITE) +#define PAGE_COPY _MOD_PROT(pgprot_default, PTE_USER | PTE_NG | PTE_PXN | PTE_UXN) +#define PAGE_COPY_EXEC _MOD_PROT(pgprot_default, PTE_USER | PTE_NG | PTE_PXN) +#define PAGE_READONLY _MOD_PROT(pgprot_default, PTE_USER | PTE_NG | PTE_PXN | PTE_UXN) +#define PAGE_READONLY_EXEC _MOD_PROT(pgprot_default, PTE_USER | PTE_NG | PTE_PXN) +#define PAGE_KERNEL _MOD_PROT(pgprot_default, PTE_PXN | PTE_UXN | PTE_DIRTY | PTE_WRITE) +#define PAGE_KERNEL_EXEC _MOD_PROT(pgprot_default, PTE_UXN | PTE_DIRTY | PTE_WRITE) -#define __PAGE_NONE __pgprot(((_PAGE_DEFAULT) & ~PTE_TYPE_MASK) | PTE_PROT_NONE | PTE_RDONLY | PTE_PXN | PTE_UXN) -#define __PAGE_SHARED __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN | PTE_UXN) -#define __PAGE_SHARED_EXEC __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN) -#define __PAGE_COPY __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN | PTE_UXN | PTE_RDONLY) -#define __PAGE_COPY_EXEC __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN | PTE_RDONLY) -#define __PAGE_READONLY __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN | PTE_UXN | PTE_RDONLY) -#define __PAGE_READONLY_EXEC __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN | PTE_RDONLY) +define __PAGE_NONE __pgprot(((_PAGE_DEFAULT) & ~PTE_TYPE_MASK) | PTE_PROT_NONE | PTE_PXN | PTE_UXN) +#define __PAGE_SHARED __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN | PTE_UXN | PTE_WRITE) +#define __PAGE_SHARED_EXEC __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN | PTE_WRITE) +#define __PAGE_COPY __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN | PTE_UXN) +#define __PAGE_COPY_EXEC __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN) +#define __PAGE_READONLY __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN | PTE_UXN) +#define __PAGE_READONLY_EXEC __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN) #endif /* __ASSEMBLY__ */ @@ -134,7 +134,7 @@ extern struct page *empty_zero_page; #define pte_dirty(pte) (pte_val(pte) & PTE_DIRTY) #define pte_young(pte) (pte_val(pte) & PTE_AF) #define pte_special(pte) (pte_val(pte) & PTE_SPECIAL) -#define pte_write(pte) (!(pte_val(pte) & PTE_RDONLY)) +#define pte_write(pte) (pte_val(pte) & PTE_WRITE) #define pte_exec(pte) (!(pte_val(pte) & PTE_UXN)) #define pte_valid_user(pte) \ @@ -142,13 +142,13 @@ extern struct page *empty_zero_page; static inline pte_t pte_wrprotect(pte_t pte) { - pte_val(pte) |= PTE_RDONLY; + pte_val(pte) &= ~PTE_WRITE; return pte; } static inline pte_t pte_mkwrite(pte_t pte) { - pte_val(pte) &= ~PTE_RDONLY; + pte_val(pte) |= PTE_WRITE; return pte; } @@ -195,8 +195,10 @@ static inline void set_pte_at(struct mm_struct *mm, unsigned long addr, if (pte_valid_user(pte)) { if (pte_exec(pte)) __sync_icache_dcache(pte, addr); - if (!pte_dirty(pte)) - pte = pte_wrprotect(pte); + if (pte_dirty(pte) && pte_write(pte)) + pte_val(pte) &= ~PTE_RDONLY; + else + pte_val(pte) |= PTE_RDONLY; } set_pte(ptep, pte); @@ -364,7 +366,7 @@ static inline pmd_t *pmd_offset(pud_t *pud, unsigned long addr) static inline pte_t pte_modify(pte_t pte, pgprot_t newprot) { const pteval_t mask = PTE_USER | PTE_PXN | PTE_UXN | PTE_RDONLY | - PTE_PROT_NONE | PTE_VALID; + PTE_PROT_NONE | PTE_VALID | PTE_WRITE; pte_val(pte) = (pte_val(pte) & ~mask) | (pgprot_val(newprot) & mask); return pte; } From e0834c5484ef79b4d631eb0ae9dd25d4055c445f Mon Sep 17 00:00:00 2001 From: Steve Capper Date: Tue, 25 Feb 2014 11:38:53 +0000 Subject: [PATCH 0892/1368] arm64: mm: Add double logical invert to pte accessors Page table entries on ARM64 are 64 bits, and some pte functions such as pte_dirty return a bitwise-and of a flag with the pte value. If the flag to be tested resides in the upper 32 bits of the pte, then we run into the danger of the result being dropped if downcast. For example: gather_stats(page, md, pte_dirty(*pte), 1); where pte_dirty(*pte) is downcast to an int. This patch adds a double logical invert to all the pte_ accessors to ensure predictable downcasting. Signed-off-by: Steve Capper Cc: Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/pgtable.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index ca9746d66d41..2aa0ccfd3bbb 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -130,11 +130,11 @@ extern struct page *empty_zero_page; /* * The following only work if pte_present(). Undefined behaviour otherwise. */ -#define pte_present(pte) (pte_val(pte) & (PTE_VALID | PTE_PROT_NONE)) -#define pte_dirty(pte) (pte_val(pte) & PTE_DIRTY) -#define pte_young(pte) (pte_val(pte) & PTE_AF) -#define pte_special(pte) (pte_val(pte) & PTE_SPECIAL) -#define pte_write(pte) (pte_val(pte) & PTE_WRITE) +#define pte_present(pte) (!!(pte_val(pte) & (PTE_VALID | PTE_PROT_NONE))) +#define pte_dirty(pte) (!!(pte_val(pte) & PTE_DIRTY)) +#define pte_young(pte) (!!(pte_val(pte) & PTE_AF)) +#define pte_special(pte) (!!(pte_val(pte) & PTE_SPECIAL)) +#define pte_write(pte) (!!(pte_val(pte) & PTE_WRITE)) #define pte_exec(pte) (!(pte_val(pte) & PTE_UXN)) #define pte_valid_user(pte) \ From 08ff25978588beb417dde2b8d102dac4ac06c07d Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Mon, 14 Apr 2014 18:25:05 +0100 Subject: [PATCH 0893/1368] arm64: Fix build for __PAGE_NONE define Simple typo. Signed-off-by: Mark Brown --- arch/arm64/include/asm/pgtable.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index 2aa0ccfd3bbb..b0e6b8f36f51 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -77,7 +77,7 @@ extern pgprot_t pgprot_default; #define PAGE_KERNEL _MOD_PROT(pgprot_default, PTE_PXN | PTE_UXN | PTE_DIRTY | PTE_WRITE) #define PAGE_KERNEL_EXEC _MOD_PROT(pgprot_default, PTE_UXN | PTE_DIRTY | PTE_WRITE) -define __PAGE_NONE __pgprot(((_PAGE_DEFAULT) & ~PTE_TYPE_MASK) | PTE_PROT_NONE | PTE_PXN | PTE_UXN) +#define __PAGE_NONE __pgprot(((_PAGE_DEFAULT) & ~PTE_TYPE_MASK) | PTE_PROT_NONE | PTE_PXN | PTE_UXN) #define __PAGE_SHARED __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN | PTE_UXN | PTE_WRITE) #define __PAGE_SHARED_EXEC __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN | PTE_WRITE) #define __PAGE_COPY __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN | PTE_UXN) From 475e06edbfdad8f7bbc5edb3d803c52e27d84c2a Mon Sep 17 00:00:00 2001 From: Russell King Date: Fri, 9 May 2014 15:58:14 +0100 Subject: [PATCH 0894/1368] DMA-API: provide a helper to set both DMA and coherent DMA masks Provide a helper to set both the DMA and coherent DMA masks to the same value - this avoids duplicated code in a number of drivers, sometimes with buggy error handling, and also allows us identify which drivers do things differently. Signed-off-by: Russell King (cherry picked from commit 4aa806b771d16b810771d86ce23c4c3160888db3) Signed-off-by: Ryan Harkin Signed-off-by: Mark Brown --- Documentation/DMA-API-HOWTO.txt | 37 ++++++++++++++++++++------------- Documentation/DMA-API.txt | 8 +++++++ include/linux/dma-mapping.h | 14 +++++++++++++ 3 files changed, 44 insertions(+), 15 deletions(-) diff --git a/Documentation/DMA-API-HOWTO.txt b/Documentation/DMA-API-HOWTO.txt index 14129f149a75..5e983031cc11 100644 --- a/Documentation/DMA-API-HOWTO.txt +++ b/Documentation/DMA-API-HOWTO.txt @@ -101,14 +101,23 @@ style to do this even if your device holds the default setting, because this shows that you did think about these issues wrt. your device. -The query is performed via a call to dma_set_mask(): +The query is performed via a call to dma_set_mask_and_coherent(): - int dma_set_mask(struct device *dev, u64 mask); + int dma_set_mask_and_coherent(struct device *dev, u64 mask); -The query for consistent allocations is performed via a call to -dma_set_coherent_mask(): +which will query the mask for both streaming and coherent APIs together. +If you have some special requirements, then the following two separate +queries can be used instead: - int dma_set_coherent_mask(struct device *dev, u64 mask); + The query for streaming mappings is performed via a call to + dma_set_mask(): + + int dma_set_mask(struct device *dev, u64 mask); + + The query for consistent allocations is performed via a call + to dma_set_coherent_mask(): + + int dma_set_coherent_mask(struct device *dev, u64 mask); Here, dev is a pointer to the device struct of your device, and mask is a bit mask describing which bits of an address your device @@ -137,7 +146,7 @@ exactly why. The standard 32-bit addressing device would do something like this: - if (dma_set_mask(dev, DMA_BIT_MASK(32))) { + if (dma_set_mask_and_coherent(dev, DMA_BIT_MASK(32))) { printk(KERN_WARNING "mydev: No suitable DMA available.\n"); goto ignore_this_device; @@ -171,22 +180,20 @@ the case would look like this: int using_dac, consistent_using_dac; - if (!dma_set_mask(dev, DMA_BIT_MASK(64))) { + if (!dma_set_mask_and_coherent(dev, DMA_BIT_MASK(64))) { using_dac = 1; consistent_using_dac = 1; - dma_set_coherent_mask(dev, DMA_BIT_MASK(64)); - } else if (!dma_set_mask(dev, DMA_BIT_MASK(32))) { + } else if (!dma_set_mask_and_coherent(dev, DMA_BIT_MASK(32))) { using_dac = 0; consistent_using_dac = 0; - dma_set_coherent_mask(dev, DMA_BIT_MASK(32)); } else { printk(KERN_WARNING "mydev: No suitable DMA available.\n"); goto ignore_this_device; } -dma_set_coherent_mask() will always be able to set the same or a -smaller mask as dma_set_mask(). However for the rare case that a +The coherent coherent mask will always be able to set the same or a +smaller mask as the streaming mask. However for the rare case that a device driver only uses consistent allocations, one would have to check the return value from dma_set_coherent_mask(). @@ -199,9 +206,9 @@ address you might do something like: goto ignore_this_device; } -When dma_set_mask() is successful, and returns zero, the kernel saves -away this mask you have provided. The kernel will use this -information later when you make DMA mappings. +When dma_set_mask() or dma_set_mask_and_coherent() is successful, and +returns zero, the kernel saves away this mask you have provided. The +kernel will use this information later when you make DMA mappings. There is a case which we are aware of at this time, which is worth mentioning in this documentation. If your device supports multiple diff --git a/Documentation/DMA-API.txt b/Documentation/DMA-API.txt index 78a6c569d204..e865279cec58 100644 --- a/Documentation/DMA-API.txt +++ b/Documentation/DMA-API.txt @@ -141,6 +141,14 @@ won't change the current mask settings. It is more intended as an internal API for use by the platform than an external API for use by driver writers. +int +dma_set_mask_and_coherent(struct device *dev, u64 mask) + +Checks to see if the mask is possible and updates the device +streaming and coherent DMA mask parameters if it is. + +Returns: 0 if successful and a negative error if not. + int dma_set_mask(struct device *dev, u64 mask) diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h index 94af41858513..8f7a2e84e527 100644 --- a/include/linux/dma-mapping.h +++ b/include/linux/dma-mapping.h @@ -97,6 +97,20 @@ static inline int dma_set_coherent_mask(struct device *dev, u64 mask) } #endif +/* + * Set both the DMA mask and the coherent DMA mask to the same thing. + * Note that we don't check the return value from dma_set_coherent_mask() + * as the DMA API guarantees that the coherent DMA mask can be set to + * the same or smaller than the streaming DMA mask. + */ +static inline int dma_set_mask_and_coherent(struct device *dev, u64 mask) +{ + int rc = dma_set_mask(dev, mask); + if (rc == 0) + dma_set_coherent_mask(dev, mask); + return rc; +} + extern u64 dma_get_required_mask(struct device *dev); static inline unsigned int dma_get_max_seg_size(struct device *dev) From 1487ad49044023a8ce9b7fc5a9cdf6bbec0b8748 Mon Sep 17 00:00:00 2001 From: Russell King Date: Fri, 9 May 2014 15:58:15 +0100 Subject: [PATCH 0895/1368] DMA-API: provide a helper to setup DMA masks Many drivers contain code such as: dev->dma_mask = &dev->coherent_dma_mask; dev->coherent_dma_mask = MASK; Let's move this pattern out of drivers and have the DMA API provide a helper for it. This helper uses dma_set_mask_and_coherent() to allow platform issues to be properly dealt with via dma_set_mask()/ dma_is_supported(). Signed-off-by: Russell King (cherry picked from commit fa6a8d6d65b19ab44e5244ea499bcd553cc72343) Signed-off-by: Ryan Harkin Signed-off-by: Mark Brown --- include/linux/dma-mapping.h | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h index 8f7a2e84e527..48ef6f50d86c 100644 --- a/include/linux/dma-mapping.h +++ b/include/linux/dma-mapping.h @@ -111,6 +111,16 @@ static inline int dma_set_mask_and_coherent(struct device *dev, u64 mask) return rc; } +/* + * Similar to the above, except it deals with the case where the device + * does not have dev->dma_mask appropriately setup. + */ +static inline int dma_coerce_mask_and_coherent(struct device *dev, u64 mask) +{ + dev->dma_mask = &dev->coherent_dma_mask; + return dma_set_mask_and_coherent(dev, mask); +} + extern u64 dma_get_required_mask(struct device *dev); static inline unsigned int dma_get_max_seg_size(struct device *dev) From 03b0d29e0369054e638328b3994b8cfbb9d34a35 Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Fri, 9 May 2014 15:58:16 +0100 Subject: [PATCH 0896/1368] arm64: Fix DMA range invalidation for cache line unaligned buffers If the buffer needing cache invalidation for inbound DMA does start or end on a cache line aligned address, we need to use the non-destructive clean&invalidate operation. This issue was introduced by commit 7363590d2c46 (arm64: Implement coherent DMA API based on swiotlb). Signed-off-by: Catalin Marinas Reported-by: Jon Medhurst (Tixy) (cherry picked from commit ebf81a938dade3b450eb11c57fa744cfac4b523f) Signed-off-by: Ryan Harkin Signed-off-by: Mark Brown --- arch/arm64/mm/cache.S | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/arch/arm64/mm/cache.S b/arch/arm64/mm/cache.S index 4726b8209d37..39b6542b138e 100644 --- a/arch/arm64/mm/cache.S +++ b/arch/arm64/mm/cache.S @@ -175,12 +175,19 @@ ENDPROC(__flush_dcache_area) __dma_inv_range: dcache_line_size x2, x3 sub x3, x2, #1 - bic x0, x0, x3 + tst x1, x3 // end cache line aligned? bic x1, x1, x3 -1: dc ivac, x0 // invalidate D / U line - add x0, x0, x2 + b.eq 1f + dc civac, x1 // clean & invalidate D / U line +1: tst x0, x3 // start cache line aligned? + bic x0, x0, x3 + b.eq 2f + dc civac, x0 // clean & invalidate D / U line + b 3f +2: dc ivac, x0 // invalidate D / U line +3: add x0, x0, x2 cmp x0, x1 - b.lo 1b + b.lo 2b dsb sy ret ENDPROC(__dma_inv_range) From f44acdf9c761dbbd932ff7bdb8130c85668d3201 Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Fri, 9 May 2014 15:58:17 +0100 Subject: [PATCH 0897/1368] arm64: Replace ZONE_DMA32 with ZONE_DMA On arm64 we do not have two DMA zones, so it does not make sense to implement ZONE_DMA32. This patch changes ZONE_DMA32 with ZONE_DMA, the latter covering 32-bit dma address space to honour GFP_DMA allocations. Signed-off-by: Catalin Marinas (cherry picked from commit 19e7640d1f2302c20df2733e3e3df49acb17189e) Signed-off-by: Ryan Harkin Conflicts: arch/arm64/mm/dma-mapping.c Signed-off-by: Mark Brown --- arch/arm64/Kconfig | 2 +- arch/arm64/mm/dma-mapping.c | 4 ++-- arch/arm64/mm/init.c | 31 ++++++++++++++++--------------- 3 files changed, 19 insertions(+), 18 deletions(-) diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 1c650048a4e3..e418519ec904 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -78,7 +78,7 @@ config GENERIC_CSUM config GENERIC_CALIBRATE_DELAY def_bool y -config ZONE_DMA32 +config ZONE_DMA def_bool y config ARCH_DMA_ADDR_T_64BIT diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c index 550873ace597..1baca2efb234 100644 --- a/arch/arm64/mm/dma-mapping.c +++ b/arch/arm64/mm/dma-mapping.c @@ -34,9 +34,9 @@ static void *__dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle, gfp_t flags, struct dma_attrs *attrs) { - if (IS_ENABLED(CONFIG_ZONE_DMA32) && + if (IS_ENABLED(CONFIG_ZONE_DMA) && dev->coherent_dma_mask <= DMA_BIT_MASK(32)) - flags |= GFP_DMA32; + flags |= GFP_DMA; if (IS_ENABLED(CONFIG_DMA_CMA)) { struct page *page; diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index 851d3c137e03..63f679b276b6 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -30,6 +30,7 @@ #include #include #include +#include #include #include @@ -68,22 +69,22 @@ static int __init early_initrd(char *p) } early_param("initrd", early_initrd); -#define MAX_DMA32_PFN ((4UL * 1024 * 1024 * 1024) >> PAGE_SHIFT) - static void __init zone_sizes_init(unsigned long min, unsigned long max) { struct memblock_region *reg; unsigned long zone_size[MAX_NR_ZONES], zhole_size[MAX_NR_ZONES]; - unsigned long max_dma32 = min; + unsigned long max_dma = min; memset(zone_size, 0, sizeof(zone_size)); -#ifdef CONFIG_ZONE_DMA32 /* 4GB maximum for 32-bit only capable devices */ - max_dma32 = max(min, min(max, MAX_DMA32_PFN)); - zone_size[ZONE_DMA32] = max_dma32 - min; -#endif - zone_size[ZONE_NORMAL] = max - max_dma32; + if (IS_ENABLED(CONFIG_ZONE_DMA)) { + unsigned long max_dma_phys = + (unsigned long)dma_to_phys(NULL, DMA_BIT_MASK(32) + 1); + max_dma = max(min, min(max, max_dma_phys >> PAGE_SHIFT)); + zone_size[ZONE_DMA] = max_dma - min; + } + zone_size[ZONE_NORMAL] = max - max_dma; memcpy(zhole_size, zone_size, sizeof(zhole_size)); @@ -93,15 +94,15 @@ static void __init zone_sizes_init(unsigned long min, unsigned long max) if (start >= max) continue; -#ifdef CONFIG_ZONE_DMA32 - if (start < max_dma32) { - unsigned long dma_end = min(end, max_dma32); - zhole_size[ZONE_DMA32] -= dma_end - start; + + if (IS_ENABLED(CONFIG_ZONE_DMA) && start < max_dma) { + unsigned long dma_end = min(end, max_dma); + zhole_size[ZONE_DMA] -= dma_end - start; } -#endif - if (end > max_dma32) { + + if (end > max_dma) { unsigned long normal_end = min(end, max); - unsigned long normal_start = max(start, max_dma32); + unsigned long normal_start = max(start, max_dma); zhole_size[ZONE_NORMAL] -= normal_end - normal_start; } } From dbcc9311db25d7d67bcb1ddc99168a810dc5ff07 Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Fri, 9 May 2014 15:58:18 +0100 Subject: [PATCH 0898/1368] arm64: Use swiotlb late initialisation Since arm64 does not support ISA, there is no need for early swiotlb initialisation. This patch switches the DMA mapping code to swiotlb_tlb_late_init_with_default_size(). A side effect of this is that GFP_DMA is used for the swiotlb buffer and devices with a 32-bit coherent mask are correctly supported. Signed-off-by: Catalin Marinas (cherry picked from commit 3690951fc6d42f3a0903987677d0e592c49dd8db) Signed-off-by: Ryan Harkin Conflicts: arch/arm64/mm/init.c Signed-off-by: Mark Brown --- arch/arm64/mm/dma-mapping.c | 10 ++++++++-- arch/arm64/mm/init.c | 2 -- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c index 1baca2efb234..a48f1da828f6 100644 --- a/arch/arm64/mm/dma-mapping.c +++ b/arch/arm64/mm/dma-mapping.c @@ -248,11 +248,17 @@ struct dma_map_ops coherent_swiotlb_dma_ops = { }; EXPORT_SYMBOL(coherent_swiotlb_dma_ops); -void __init arm64_swiotlb_init(void) +extern int swiotlb_late_init_with_default_size(size_t default_size); + +static int __init swiotlb_late_init(void) { + size_t swiotlb_size = min(SZ_64M, MAX_ORDER_NR_PAGES << PAGE_SHIFT); + dma_ops = &coherent_swiotlb_dma_ops; - swiotlb_init(1); + + return swiotlb_late_init_with_default_size(swiotlb_size); } +subsys_initcall(swiotlb_late_init); #define PREALLOC_DMA_DEBUG_ENTRIES 4096 diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index 63f679b276b6..fa3651855334 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -287,8 +287,6 @@ void __init mem_init(void) unsigned long reserved_pages, free_pages; struct memblock_region *reg; - arm64_swiotlb_init(); - max_mapnr = pfn_to_page(max_pfn + PHYS_PFN_OFFSET) - mem_map; #ifndef CONFIG_SPARSEMEM_VMEMMAP From 3abb7f235bacff959dc5a12c4ef77a4f358d2dc5 Mon Sep 17 00:00:00 2001 From: Ritesh Harjani Date: Fri, 9 May 2014 15:58:19 +0100 Subject: [PATCH 0899/1368] arm64: Make default dma_ops to be noncoherent Currently arm64 dma_ops is by default made coherent which makes it opposite in default policy from arm. Make default dma_ops to be noncoherent (same as arm), as currently there aren't any dma-capable drivers which assumes coherent ops Signed-off-by: Ritesh Harjani Acked-by: Will Deacon Signed-off-by: Catalin Marinas (cherry picked from commit c7a4a7658d689f664050c45493d79adf053f226e) Signed-off-by: Jon Medhurst Signed-off-by: Mark Brown --- arch/arm64/mm/dma-mapping.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c index a48f1da828f6..d3d9bf5884f8 100644 --- a/arch/arm64/mm/dma-mapping.c +++ b/arch/arm64/mm/dma-mapping.c @@ -254,7 +254,7 @@ static int __init swiotlb_late_init(void) { size_t swiotlb_size = min(SZ_64M, MAX_ORDER_NR_PAGES << PAGE_SHIFT); - dma_ops = &coherent_swiotlb_dma_ops; + dma_ops = &noncoherent_swiotlb_dma_ops; return swiotlb_late_init_with_default_size(swiotlb_size); } From bcc37aab3547afa2b1fda11ae525930e0de76b90 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Fri, 19 Jul 2013 15:37:12 +0100 Subject: [PATCH 0900/1368] arm64: mm: don't treat user cache maintenance faults as writes commit db6f41063cbdb58b14846e600e6bc3f4e4c2e888 upstream. On arm64, cache maintenance faults appear as data aborts with the CM bit set in the ESR. The WnR bit, usually used to distinguish between faulting loads and stores, always reads as 1 and (slightly confusingly) the instructions are treated as reads by the architecture. This patch fixes our fault handling code to treat cache maintenance faults in the same way as loads. Signed-off-by: Will Deacon Signed-off-by: Catalin Marinas Signed-off-by: Greg Kroah-Hartman (cherry picked from commit 88c0a794e5d9bcc29926e636cd1d6eb5c9dcb235) Signed-off-by: Mark Brown --- arch/arm64/mm/fault.c | 46 +++++++++++++++++++------------------------ 1 file changed, 20 insertions(+), 26 deletions(-) diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c index 1426468b77f3..f51d669c8ebd 100644 --- a/arch/arm64/mm/fault.c +++ b/arch/arm64/mm/fault.c @@ -152,25 +152,8 @@ void do_bad_area(unsigned long addr, unsigned int esr, struct pt_regs *regs) #define ESR_CM (1 << 8) #define ESR_LNX_EXEC (1 << 24) -/* - * Check that the permissions on the VMA allow for the fault which occurred. - * If we encountered a write fault, we must have write permission, otherwise - * we allow any permission. - */ -static inline bool access_error(unsigned int esr, struct vm_area_struct *vma) -{ - unsigned int mask = VM_READ | VM_WRITE | VM_EXEC; - - if (esr & ESR_WRITE) - mask = VM_WRITE; - if (esr & ESR_LNX_EXEC) - mask = VM_EXEC; - - return vma->vm_flags & mask ? false : true; -} - static int __do_page_fault(struct mm_struct *mm, unsigned long addr, - unsigned int esr, unsigned int flags, + unsigned int mm_flags, unsigned long vm_flags, struct task_struct *tsk) { struct vm_area_struct *vma; @@ -188,12 +171,17 @@ static int __do_page_fault(struct mm_struct *mm, unsigned long addr, * it. */ good_area: - if (access_error(esr, vma)) { + /* + * Check that the permissions on the VMA allow for the fault which + * occurred. If we encountered a write or exec fault, we must have + * appropriate permissions, otherwise we allow any permission. + */ + if (!(vma->vm_flags & vm_flags)) { fault = VM_FAULT_BADACCESS; goto out; } - return handle_mm_fault(mm, vma, addr & PAGE_MASK, flags); + return handle_mm_fault(mm, vma, addr & PAGE_MASK, mm_flags); check_stack: if (vma->vm_flags & VM_GROWSDOWN && !expand_stack(vma, addr)) @@ -208,9 +196,15 @@ static int __kprobes do_page_fault(unsigned long addr, unsigned int esr, struct task_struct *tsk; struct mm_struct *mm; int fault, sig, code; - bool write = (esr & ESR_WRITE) && !(esr & ESR_CM); - unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE | - (write ? FAULT_FLAG_WRITE : 0); + unsigned long vm_flags = VM_READ | VM_WRITE | VM_EXEC; + unsigned int mm_flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE; + + if (esr & ESR_LNX_EXEC) { + vm_flags = VM_EXEC; + } else if ((esr & ESR_WRITE) && !(esr & ESR_CM)) { + vm_flags = VM_WRITE; + mm_flags |= FAULT_FLAG_WRITE; + } tsk = current; mm = tsk->mm; @@ -248,7 +242,7 @@ static int __kprobes do_page_fault(unsigned long addr, unsigned int esr, #endif } - fault = __do_page_fault(mm, addr, esr, flags, tsk); + fault = __do_page_fault(mm, addr, mm_flags, vm_flags, tsk); /* * If we need to retry but a fatal signal is pending, handle the @@ -265,7 +259,7 @@ static int __kprobes do_page_fault(unsigned long addr, unsigned int esr, */ perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, addr); - if (flags & FAULT_FLAG_ALLOW_RETRY) { + if (mm_flags & FAULT_FLAG_ALLOW_RETRY) { if (fault & VM_FAULT_MAJOR) { tsk->maj_flt++; perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, regs, @@ -280,7 +274,7 @@ static int __kprobes do_page_fault(unsigned long addr, unsigned int esr, * Clear FAULT_FLAG_ALLOW_RETRY to avoid any risk of * starvation. */ - flags &= ~FAULT_FLAG_ALLOW_RETRY; + mm_flags &= ~FAULT_FLAG_ALLOW_RETRY; goto retry; } } From 741dff38f169e49d8731e7362569fe4b1f9482de Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 20 Aug 2013 11:47:39 +0100 Subject: [PATCH 0901/1368] arm64: perf: fix array out of bounds access in armpmu_map_hw_event() commit 868f6fea8fa63f09acbfa93256d0d2abdcabff79 upstream. This is a port of d9f966357b14 ("ARM: 7810/1: perf: Fix array out of bounds access in armpmu_map_hw_event()") to arm64, which fixes an oops in the arm64 perf backend found as a result of Vince's fuzzing tool. Signed-off-by: Will Deacon Signed-off-by: Catalin Marinas Signed-off-by: Greg Kroah-Hartman (cherry picked from commit 975a3cd4cc0dff06a635cb16b8fb25fd3ae88234) Signed-off-by: Mark Brown --- arch/arm64/kernel/perf_event.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c index 9ba33c40cdf8..2012646fb46f 100644 --- a/arch/arm64/kernel/perf_event.c +++ b/arch/arm64/kernel/perf_event.c @@ -107,7 +107,12 @@ armpmu_map_cache_event(const unsigned (*cache_map) static int armpmu_map_event(const unsigned (*event_map)[PERF_COUNT_HW_MAX], u64 config) { - int mapping = (*event_map)[config]; + int mapping; + + if (config >= PERF_COUNT_HW_MAX) + return -EINVAL; + + mapping = (*event_map)[config]; return mapping == HW_OP_UNSUPPORTED ? -ENOENT : mapping; } From 4281a11a7e44655baa41241c470b09d4fae4be35 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 20 Aug 2013 11:47:40 +0100 Subject: [PATCH 0902/1368] arm64: perf: fix event validation for software group leaders commit ee7538a008a45050c8f706d38b600f55953169f9 upstream. This is a port of c95eb3184ea1 ("ARM: 7809/1: perf: fix event validation for software group leaders") to arm64, which fixes a panic in the arm64 perf backend found as a result of Vince's fuzzing tool. Signed-off-by: Will Deacon Signed-off-by: Catalin Marinas Signed-off-by: Greg Kroah-Hartman (cherry picked from commit 6c6f6c7166e9ee45e545eab850a2862a92c135b2) Signed-off-by: Mark Brown --- arch/arm64/kernel/perf_event.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c index 2012646fb46f..12e6ccb88691 100644 --- a/arch/arm64/kernel/perf_event.c +++ b/arch/arm64/kernel/perf_event.c @@ -322,6 +322,9 @@ validate_event(struct pmu_hw_events *hw_events, struct hw_perf_event fake_event = event->hw; struct pmu *leader_pmu = event->group_leader->pmu; + if (is_software_event(event)) + return 1; + if (event->pmu != leader_pmu || event->state <= PERF_EVENT_STATE_OFF) return 1; From 0d7707cb914db6188e9041e4a4440687a3f89ddf Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 20 Aug 2013 11:47:41 +0100 Subject: [PATCH 0903/1368] arm64: perf: fix group validation when using enable_on_exec commit 8455e6ec70f33b0e8c3ffd47067e00481f09f454 upstream. This is a port of cb2d8b342aa0 ("ARM: 7698/1: perf: fix group validation when using enable_on_exec") to arm64, which fixes the event validation checking so that events in the OFF state are still considered when enable_on_exec is true. Signed-off-by: Will Deacon Signed-off-by: Catalin Marinas Signed-off-by: Greg Kroah-Hartman (cherry picked from commit 1f96d83b38937294584ede9473c2b2961528f287) Signed-off-by: Mark Brown --- arch/arm64/kernel/perf_event.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c index 12e6ccb88691..2a1e9163d67a 100644 --- a/arch/arm64/kernel/perf_event.c +++ b/arch/arm64/kernel/perf_event.c @@ -325,7 +325,10 @@ validate_event(struct pmu_hw_events *hw_events, if (is_software_event(event)) return 1; - if (event->pmu != leader_pmu || event->state <= PERF_EVENT_STATE_OFF) + if (event->pmu != leader_pmu || event->state < PERF_EVENT_STATE_OFF) + return 1; + + if (event->state == PERF_EVENT_STATE_OFF && !event->attr.enable_on_exec) return 1; return armpmu->get_event_idx(hw_events, &fake_event) >= 0; From a73eb27dbbbbf1b3f40a4cec380f8d9f238c26aa Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 20 Aug 2013 11:47:42 +0100 Subject: [PATCH 0904/1368] arm64: perf: fix ARMv8 EVTYPE_MASK to include NSH bit commit 178cd9ce377232518ec17ff2ecab2e80fa60784c upstream. This is a port of f2fe09b055e2 ("ARM: 7663/1: perf: fix ARMv7 EVTYPE_MASK to include NSH bit") to arm64, which fixes the broken evtype mask to include the NSH bit, allowing profiling at EL2. Signed-off-by: Will Deacon Signed-off-by: Catalin Marinas Signed-off-by: Greg Kroah-Hartman (cherry picked from commit 0fe9a0dc92a64c088e76fcd3d35b2ba36b4d7f3c) Signed-off-by: Mark Brown --- arch/arm64/kernel/perf_event.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c index 2a1e9163d67a..cea1594ff933 100644 --- a/arch/arm64/kernel/perf_event.c +++ b/arch/arm64/kernel/perf_event.c @@ -784,7 +784,7 @@ static const unsigned armv8_pmuv3_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] /* * PMXEVTYPER: Event selection reg */ -#define ARMV8_EVTYPE_MASK 0xc00000ff /* Mask for writable bits */ +#define ARMV8_EVTYPE_MASK 0xc80000ff /* Mask for writable bits */ #define ARMV8_EVTYPE_EVENT 0xff /* Mask for EVENT bits */ /* From ff5ec2d401f7ab457cece4b1c00587e8af26d74c Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 17 Dec 2013 17:09:08 +0000 Subject: [PATCH 0905/1368] arm64: ptrace: avoid using HW_BREAKPOINT_EMPTY for disabled events commit cdc27c27843248ae7eb0df5fc261dd004eaa5670 upstream. Commit 8f34a1da35ae ("arm64: ptrace: use HW_BREAKPOINT_EMPTY type for disabled breakpoints") fixed an issue with GDB trying to zero breakpoint control registers. The problem there is that the arch hw_breakpoint code will attempt to create a (disabled), execute breakpoint of length 0. This will fail validation and report unexpected failure to GDB. To avoid this, we treated disabled breakpoints as HW_BREAKPOINT_EMPTY, but that seems to have broken with recent kernels, causing watchpoints to be treated as TYPE_INST in the core code and returning ENOSPC for any further breakpoints. This patch fixes the problem by prioritising the `enable' field of the breakpoint: if it is cleared, we simply update the perf_event_attr to indicate that the thing is disabled and don't bother changing either the type or the length. This reinforces the behaviour that the breakpoint control register is essentially read-only apart from the enable bit when disabling a breakpoint. Reported-by: Aaron Liu Signed-off-by: Will Deacon Signed-off-by: Catalin Marinas Signed-off-by: Greg Kroah-Hartman (cherry picked from commit 11b81802921618b02122855db021a63df7d9fd49) Signed-off-by: Mark Brown --- arch/arm64/kernel/ptrace.c | 36 +++++++++++++++++------------------- 1 file changed, 17 insertions(+), 19 deletions(-) diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c index 6e1e77f1831c..5341534b6d04 100644 --- a/arch/arm64/kernel/ptrace.c +++ b/arch/arm64/kernel/ptrace.c @@ -236,31 +236,29 @@ static int ptrace_hbp_fill_attr_ctrl(unsigned int note_type, { int err, len, type, disabled = !ctrl.enabled; - if (disabled) { - len = 0; - type = HW_BREAKPOINT_EMPTY; - } else { - err = arch_bp_generic_fields(ctrl, &len, &type); - if (err) - return err; + attr->disabled = disabled; + if (disabled) + return 0; - switch (note_type) { - case NT_ARM_HW_BREAK: - if ((type & HW_BREAKPOINT_X) != type) - return -EINVAL; - break; - case NT_ARM_HW_WATCH: - if ((type & HW_BREAKPOINT_RW) != type) - return -EINVAL; - break; - default: + err = arch_bp_generic_fields(ctrl, &len, &type); + if (err) + return err; + + switch (note_type) { + case NT_ARM_HW_BREAK: + if ((type & HW_BREAKPOINT_X) != type) return -EINVAL; - } + break; + case NT_ARM_HW_WATCH: + if ((type & HW_BREAKPOINT_RW) != type) + return -EINVAL; + break; + default: + return -EINVAL; } attr->bp_len = len; attr->bp_type = type; - attr->disabled = disabled; return 0; } From ec2a2a7f53125cf8e1c9ec40fa433909521647c2 Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Fri, 31 May 2013 16:30:58 +0100 Subject: [PATCH 0906/1368] arm64: spinlock: retry trylock operation if strex fails on free lock commit 4ecf7ccb1973fd826456b6ab1e6dfafe9023c753 upstream. An exclusive store instruction may fail for reasons other than lock contention (e.g. a cache eviction during the critical section) so, in line with other architectures using similar exclusive instructions (alpha, mips, powerpc), retry the trylock operation if the lock appears to be free but the strex reported failure. Signed-off-by: Catalin Marinas Reported-by: Tony Thompson Acked-by: Will Deacon Cc: Mark Brown Signed-off-by: Greg Kroah-Hartman (cherry picked from commit 42b5bb47a62b7b2d8cd0b9af58f914c5e83ef76e) Signed-off-by: Mark Brown --- arch/arm64/include/asm/spinlock.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/arm64/include/asm/spinlock.h b/arch/arm64/include/asm/spinlock.h index 7065e920149d..0defa0728a9b 100644 --- a/arch/arm64/include/asm/spinlock.h +++ b/arch/arm64/include/asm/spinlock.h @@ -59,9 +59,10 @@ static inline int arch_spin_trylock(arch_spinlock_t *lock) unsigned int tmp; asm volatile( - " ldaxr %w0, %1\n" + "2: ldaxr %w0, %1\n" " cbnz %w0, 1f\n" " stxr %w0, %w2, %1\n" + " cbnz %w0, 2b\n" "1:\n" : "=&r" (tmp), "+Q" (lock->lock) : "r" (1) From 31cc1d7d724ea7ec31c1d65d3a10e842c0e74b86 Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Fri, 19 Jul 2013 15:08:15 +0100 Subject: [PATCH 0907/1368] arm64: Only enable local interrupts after the CPU is marked online commit 53ae3acd4390ffeecb3a11dbd5be347b5a3d98f2 upstream. There is a slight chance that (timer) interrupts are triggered before a secondary CPU has been marked online with implications on softirq thread affinity. Signed-off-by: Catalin Marinas Reported-by: Kirill Tkhai Cc: Mark Brown Signed-off-by: Greg Kroah-Hartman (cherry picked from commit 81ca15a3e04961b54d4a0b395e681bffb6cfbc68) Signed-off-by: Mark Brown --- arch/arm64/kernel/smp.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c index 5d54e3717bf8..9c93e126328c 100644 --- a/arch/arm64/kernel/smp.c +++ b/arch/arm64/kernel/smp.c @@ -199,13 +199,6 @@ asmlinkage void __cpuinit secondary_start_kernel(void) raw_spin_lock(&boot_lock); raw_spin_unlock(&boot_lock); - /* - * Enable local interrupts. - */ - notify_cpu_starting(cpu); - local_irq_enable(); - local_fiq_enable(); - /* * OK, now it's safe to let the boot CPU continue. Wait for * the CPU migration code to notice that the CPU is online @@ -214,6 +207,14 @@ asmlinkage void __cpuinit secondary_start_kernel(void) set_cpu_online(cpu, true); complete(&cpu_running); + /* + * Enable GIC and timers. + */ + notify_cpu_starting(cpu); + + local_irq_enable(); + local_fiq_enable(); + /* * OK, it's off to the idle thread for us */ From 6a23efcb3808db22be5d2625ac928a5ae998a77c Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Tue, 9 Jul 2013 15:16:06 +0100 Subject: [PATCH 0908/1368] arm64: virt: ensure visibility of __boot_cpu_mode commit 82b2f495fba338d1e3098dde1df54944a9c19751 upstream. Secondary CPUs write to __boot_cpu_mode with caches disabled, and thus a cached value of __boot_cpu_mode may be incoherent with that in memory. This could lead to a failure to detect mismatched boot modes. This patch adds flushing to ensure that writes by secondaries to __boot_cpu_mode are made visible before we test against it. Signed-off-by: Mark Rutland Cc: Marc Zyngier Cc: Christoffer Dall Signed-off-by: Catalin Marinas Cc: Mark Brown Signed-off-by: Greg Kroah-Hartman (cherry picked from commit 5fb08df3dd1f7b8e83936808b042725a8b067562) Signed-off-by: Mark Brown --- arch/arm64/include/asm/virt.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/arch/arm64/include/asm/virt.h b/arch/arm64/include/asm/virt.h index 439827271e3d..26e310c54344 100644 --- a/arch/arm64/include/asm/virt.h +++ b/arch/arm64/include/asm/virt.h @@ -21,6 +21,7 @@ #define BOOT_CPU_MODE_EL2 (0x0e12b007) #ifndef __ASSEMBLY__ +#include /* * __boot_cpu_mode records what mode CPUs were booted in. @@ -36,9 +37,20 @@ extern u32 __boot_cpu_mode[2]; void __hyp_set_vectors(phys_addr_t phys_vector_base); phys_addr_t __hyp_get_vectors(void); +static inline void sync_boot_mode(void) +{ + /* + * As secondaries write to __boot_cpu_mode with caches disabled, we + * must flush the corresponding cache entries to ensure the visibility + * of their writes. + */ + __flush_dcache_area(__boot_cpu_mode, sizeof(__boot_cpu_mode)); +} + /* Reports the availability of HYP mode */ static inline bool is_hyp_mode_available(void) { + sync_boot_mode(); return (__boot_cpu_mode[0] == BOOT_CPU_MODE_EL2 && __boot_cpu_mode[1] == BOOT_CPU_MODE_EL2); } @@ -46,6 +58,7 @@ static inline bool is_hyp_mode_available(void) /* Check if the bootloader has booted CPUs in different modes */ static inline bool is_hyp_mode_mismatched(void) { + sync_boot_mode(); return __boot_cpu_mode[0] != __boot_cpu_mode[1]; } From 6c0ab8b35469a97968c1482b14106afa8db3d743 Mon Sep 17 00:00:00 2001 From: Feng Kan Date: Tue, 23 Jul 2013 18:52:31 +0100 Subject: [PATCH 0909/1368] arm64: Change kernel stack size to 16K commit 845ad05ec31e0f3872a321e10dbeaf872022632c upstream. Written by Catalin Marinas, tested by APM on storm platform. This is needed because of the failures encountered when running SpecWeb benchmark test. Signed-off-by: Feng Kan Acked-by: Kumar Sankaran Signed-off-by: Catalin Marinas Cc: Mark Brown Signed-off-by: Greg Kroah-Hartman (cherry picked from commit 79f783f05539479676406d3d42c3d86bd203f083) Signed-off-by: Mark Brown --- arch/arm64/include/asm/thread_info.h | 4 ++-- arch/arm64/kernel/entry.S | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/arm64/include/asm/thread_info.h b/arch/arm64/include/asm/thread_info.h index 3659e460071d..23a3c4791d86 100644 --- a/arch/arm64/include/asm/thread_info.h +++ b/arch/arm64/include/asm/thread_info.h @@ -24,10 +24,10 @@ #include #ifndef CONFIG_ARM64_64K_PAGES -#define THREAD_SIZE_ORDER 1 +#define THREAD_SIZE_ORDER 2 #endif -#define THREAD_SIZE 8192 +#define THREAD_SIZE 16384 #define THREAD_START_SP (THREAD_SIZE - 16) #ifndef __ASSEMBLY__ diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index 1d1314280a03..6ad781b21c08 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -121,7 +121,7 @@ .macro get_thread_info, rd mov \rd, sp - and \rd, \rd, #~((1 << 13) - 1) // top of 8K stack + and \rd, \rd, #~(THREAD_SIZE - 1) // top of stack .endm /* From 47b1e20b206944ea097b775e0e4a569f8bdeeb7c Mon Sep 17 00:00:00 2001 From: Jiang Liu Date: Fri, 27 Sep 2013 09:04:41 +0100 Subject: [PATCH 0910/1368] arm64: fix possible invalid FPSIMD initialization state commit 6db83cea1c975b9a102e17def7d2795814e1ae2b upstream. If context switching happens during executing fpsimd_flush_thread(), stale value in FPSIMD registers will be saved into current thread's fpsimd_state by fpsimd_thread_switch(). That may cause invalid initialization state for the new process, so disable preemption when executing fpsimd_flush_thread(). Signed-off-by: Jiang Liu Cc: Jiang Liu Signed-off-by: Catalin Marinas Cc: Mark Brown Signed-off-by: Greg Kroah-Hartman (cherry picked from commit 2bf5861acf602fe6201ac1f82955ac7283e56b6f) Signed-off-by: Mark Brown --- arch/arm64/kernel/fpsimd.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c index e8b8357aedb4..2fa308e4a1fa 100644 --- a/arch/arm64/kernel/fpsimd.c +++ b/arch/arm64/kernel/fpsimd.c @@ -79,8 +79,10 @@ void fpsimd_thread_switch(struct task_struct *next) void fpsimd_flush_thread(void) { + preempt_disable(); memset(¤t->thread.fpsimd_state, 0, sizeof(struct fpsimd_state)); fpsimd_load_state(¤t->thread.fpsimd_state); + preempt_enable(); } /* From af68cb6b43a9f32f0143e69e80d17802247818ee Mon Sep 17 00:00:00 2001 From: AKASHI Takahiro Date: Thu, 3 Oct 2013 06:47:44 +0100 Subject: [PATCH 0911/1368] arm64: check for number of arguments in syscall_get/set_arguments() commit 7b22c03536a539142f931815528d55df455ffe2d upstream. In ftrace_syscall_enter(), syscall_get_arguments(..., 0, n, ...) if (i == 0) { ...; n--;} memcpy(..., n * sizeof(args[0])); If 'number of arguments(n)' is zero and 'argument index(i)' is also zero in syscall_get_arguments(), none of arguments should be copied by memcpy(). Otherwise 'n--' can be a big positive number and unexpected amount of data will be copied. Tracing system calls which take no argument, say sync(void), may hit this case and eventually make the system corrupted. This patch fixes the issue both in syscall_get_arguments() and syscall_set_arguments(). Signed-off-by: AKASHI Takahiro Acked-by: Will Deacon Signed-off-by: Catalin Marinas Cc: Mark Brown Signed-off-by: Greg Kroah-Hartman (cherry picked from commit e2956ef5b5ddb3ede70962afe9297d8340787fa6) Signed-off-by: Mark Brown --- arch/arm64/include/asm/syscall.h | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/arch/arm64/include/asm/syscall.h b/arch/arm64/include/asm/syscall.h index 89c047f9a971..70ba9d4ee978 100644 --- a/arch/arm64/include/asm/syscall.h +++ b/arch/arm64/include/asm/syscall.h @@ -59,6 +59,9 @@ static inline void syscall_get_arguments(struct task_struct *task, unsigned int i, unsigned int n, unsigned long *args) { + if (n == 0) + return; + if (i + n > SYSCALL_MAX_ARGS) { unsigned long *args_bad = args + SYSCALL_MAX_ARGS - i; unsigned int n_bad = n + i - SYSCALL_MAX_ARGS; @@ -82,6 +85,9 @@ static inline void syscall_set_arguments(struct task_struct *task, unsigned int i, unsigned int n, const unsigned long *args) { + if (n == 0) + return; + if (i + n > SYSCALL_MAX_ARGS) { pr_warning("%s called with max args %d, handling only %d\n", __func__, i + n, SYSCALL_MAX_ARGS); From 62416c1afeaa92d112ea2b0693a027f6b68f2272 Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Thu, 14 Nov 2013 15:15:37 +0000 Subject: [PATCH 0912/1368] arm64: dts: Reserve the memory used for secondary CPU release address commit df503ba7f653c590b475ab80bde788edf5af70d5 upstream. With the spin-table SMP booting method, secondary CPUs poll a location passed in the DT. The foundation-v8.dts file doesn't have this memory reserved and there is a risk of Linux using it before secondary CPUs are started. Signed-off-by: Catalin Marinas Cc: Mark Brown Signed-off-by: Greg Kroah-Hartman (cherry picked from commit 29f37087bb51dfe4ff61daad60bd93f7bcfa3d72) Signed-off-by: Mark Brown --- arch/arm64/boot/dts/foundation-v8.dts | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/arm64/boot/dts/foundation-v8.dts b/arch/arm64/boot/dts/foundation-v8.dts index 84fcc5018284..519c4b2c0687 100644 --- a/arch/arm64/boot/dts/foundation-v8.dts +++ b/arch/arm64/boot/dts/foundation-v8.dts @@ -6,6 +6,8 @@ /dts-v1/; +/memreserve/ 0x80000000 0x00010000; + / { model = "Foundation-v8A"; compatible = "arm,foundation-aarch64", "arm,vexpress"; From 833b6f7f66c2343e1b23dd8d35c12324d70a6731 Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Mon, 2 Sep 2013 16:33:54 +0100 Subject: [PATCH 0913/1368] arm64: Remove unused cpu_name ascii in arch/arm64/mm/proc.S commit f3a1d7d53dccf51959aec16b574617cc6bfeca09 upstream. This string has been moved to arch/arm64/kernel/cputable.c. Signed-off-by: Catalin Marinas Cc: Mark Brown Signed-off-by: Greg Kroah-Hartman (cherry picked from commit ae5092fadd50cfd0d29e762771d30cbf7f2c2fec) Signed-off-by: Mark Brown --- arch/arm64/mm/proc.S | 4 ---- 1 file changed, 4 deletions(-) diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S index a82ae8868077..f84fcf71f129 100644 --- a/arch/arm64/mm/proc.S +++ b/arch/arm64/mm/proc.S @@ -95,10 +95,6 @@ ENTRY(cpu_do_switch_mm) ret ENDPROC(cpu_do_switch_mm) -cpu_name: - .ascii "AArch64 Processor" - .align - .section ".text.init", #alloc, #execinstr /* From 66effa189f8ded4d968dd9c310137c50244477ad Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Wed, 30 Jan 2013 17:51:26 +0000 Subject: [PATCH 0914/1368] clocksource: arch_timer: use virtual counters commit 0d651e4e65e96989f72236bf83bd4c6e55eb6ce4 upstream. Switching between reading the virtual or physical counters is problematic, as some core code wants a view of time before we're fully set up. Using a function pointer and switching the source after the first read can make time appear to go backwards, and having a check in the read function is an unfortunate block on what we want to be a fast path. Instead, this patch makes us always use the virtual counters. If we're a guest, or don't have hyp mode, we'll use the virtual timers, and as such don't care about CNTVOFF as long as it doesn't change in such a way as to make time appear to travel backwards. As the guest will use the virtual timers, a (potential) KVM host must use the physical timers (which can wake up the host even if they fire while a guest is executing), and hence a host must have CNTVOFF set to zero so as to have a consistent view of time between the physical timers and virtual counters. Signed-off-by: Mark Rutland Acked-by: Catalin Marinas Acked-by: Marc Zyngier Acked-by: Santosh Shilimkar Cc: Rob Herring Cc: Mark Brown Signed-off-by: Greg Kroah-Hartman (cherry picked from commit 714c21cb90951905b269870087a99c37f3a7af0c) Signed-off-by: Mark Brown --- arch/arm/include/asm/arch_timer.h | 9 --------- arch/arm64/include/asm/arch_timer.h | 10 ---------- drivers/clocksource/arm_arch_timer.c | 23 +++++------------------ include/clocksource/arm_arch_timer.h | 2 +- 4 files changed, 6 insertions(+), 38 deletions(-) diff --git a/arch/arm/include/asm/arch_timer.h b/arch/arm/include/asm/arch_timer.h index 7c1bfc0aea0c..accefe099182 100644 --- a/arch/arm/include/asm/arch_timer.h +++ b/arch/arm/include/asm/arch_timer.h @@ -80,15 +80,6 @@ static inline u32 arch_timer_get_cntfrq(void) return val; } -static inline u64 arch_counter_get_cntpct(void) -{ - u64 cval; - - isb(); - asm volatile("mrrc p15, 0, %Q0, %R0, c14" : "=r" (cval)); - return cval; -} - static inline u64 arch_counter_get_cntvct(void) { u64 cval; diff --git a/arch/arm64/include/asm/arch_timer.h b/arch/arm64/include/asm/arch_timer.h index bf6ab242f047..d56ed11ba9a3 100644 --- a/arch/arm64/include/asm/arch_timer.h +++ b/arch/arm64/include/asm/arch_timer.h @@ -110,16 +110,6 @@ static inline void __cpuinit arch_counter_set_user_access(void) asm volatile("msr cntkctl_el1, %0" : : "r" (cntkctl)); } -static inline u64 arch_counter_get_cntpct(void) -{ - u64 cval; - - isb(); - asm volatile("mrs %0, cntpct_el0" : "=r" (cval)); - - return cval; -} - static inline u64 arch_counter_get_cntvct(void) { u64 cval; diff --git a/drivers/clocksource/arm_arch_timer.c b/drivers/clocksource/arm_arch_timer.c index a2b254189782..053d846ab5b1 100644 --- a/drivers/clocksource/arm_arch_timer.c +++ b/drivers/clocksource/arm_arch_timer.c @@ -186,27 +186,19 @@ u32 arch_timer_get_rate(void) return arch_timer_rate; } -/* - * Some external users of arch_timer_read_counter (e.g. sched_clock) may try to - * call it before it has been initialised. Rather than incur a performance - * penalty checking for initialisation, provide a default implementation that - * won't lead to time appearing to jump backwards. - */ -static u64 arch_timer_read_zero(void) +u64 arch_timer_read_counter(void) { - return 0; + return arch_counter_get_cntvct(); } -u64 (*arch_timer_read_counter)(void) = arch_timer_read_zero; - static cycle_t arch_counter_read(struct clocksource *cs) { - return arch_timer_read_counter(); + return arch_counter_get_cntvct(); } static cycle_t arch_counter_read_cc(const struct cyclecounter *cc) { - return arch_timer_read_counter(); + return arch_counter_get_cntvct(); } static struct clocksource clocksource_counter = { @@ -287,7 +279,7 @@ static int __init arch_timer_register(void) cyclecounter.mult = clocksource_counter.mult; cyclecounter.shift = clocksource_counter.shift; timecounter_init(&timecounter, &cyclecounter, - arch_counter_get_cntpct()); + arch_counter_get_cntvct()); if (arch_timer_use_virtual) { ppi = arch_timer_ppi[VIRT_PPI]; @@ -376,11 +368,6 @@ static void __init arch_timer_init(struct device_node *np) } } - if (arch_timer_use_virtual) - arch_timer_read_counter = arch_counter_get_cntvct; - else - arch_timer_read_counter = arch_counter_get_cntpct; - arch_timer_register(); arch_timer_arch_init(); } diff --git a/include/clocksource/arm_arch_timer.h b/include/clocksource/arm_arch_timer.h index e6c9c4cc9b23..c463ce990c48 100644 --- a/include/clocksource/arm_arch_timer.h +++ b/include/clocksource/arm_arch_timer.h @@ -32,7 +32,7 @@ #ifdef CONFIG_ARM_ARCH_TIMER extern u32 arch_timer_get_rate(void); -extern u64 (*arch_timer_read_counter)(void); +extern u64 arch_timer_read_counter(void); extern struct timecounter *arch_timer_get_timecounter(void); #else From 84ddb8b066d3750c9971b9eed826e8d1dd4f9c53 Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Wed, 1 May 2013 12:23:05 +0100 Subject: [PATCH 0915/1368] arm64: Avoid cache flushing in flush_dcache_page() commit b5b6c9e9149d8a7c3f1d7b9d0c046c6184e1dd17 upstream. The flush_dcache_page() function is called when the kernel modified a page cache page. Since the D-cache on AArch64 does not have aliases this function can simply mark the page as dirty for later flushing via set_pte_at()/__sync_icache_dcache() if the page is executable (to ensure the I-D cache coherency). Signed-off-by: Catalin Marinas Reported-by: Will Deacon Acked-by: Will Deacon Cc: Mark Brown Signed-off-by: Greg Kroah-Hartman (cherry picked from commit 1e616427f20943c9966296dfff9e7a2b825846aa) Signed-off-by: Mark Brown --- arch/arm64/mm/flush.c | 22 ++++------------------ 1 file changed, 4 insertions(+), 18 deletions(-) diff --git a/arch/arm64/mm/flush.c b/arch/arm64/mm/flush.c index 88611c3a421a..b9cd7a4deeca 100644 --- a/arch/arm64/mm/flush.c +++ b/arch/arm64/mm/flush.c @@ -94,28 +94,14 @@ void __sync_icache_dcache(pte_t pte, unsigned long addr) } /* - * Ensure cache coherency between kernel mapping and userspace mapping of this - * page. + * This function is called when a page has been modified by the kernel. Mark + * it as dirty for later flushing when mapped in user space (if executable, + * see __sync_icache_dcache). */ void flush_dcache_page(struct page *page) { - struct address_space *mapping; - - /* - * The zero page is never written to, so never has any dirty cache - * lines, and therefore never needs to be flushed. - */ - if (page == ZERO_PAGE(0)) - return; - - mapping = page_mapping(page); - if (mapping && mapping_mapped(mapping)) { - __flush_dcache_page(page); - __flush_icache_all(); - set_bit(PG_dcache_clean, &page->flags); - } else { + if (test_bit(PG_dcache_clean, &page->flags)) clear_bit(PG_dcache_clean, &page->flags); - } } EXPORT_SYMBOL(flush_dcache_page); From b67a42ee58fa4e5a2d5a20a9009b160e77e132a7 Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Wed, 1 May 2013 16:34:22 +0100 Subject: [PATCH 0916/1368] arm64: Do not flush the D-cache for anonymous pages commit 7249b79f6b4cc3c2aa9138dca52e535a4c789107 upstream. The D-cache on AArch64 is VIPT non-aliasing, so there is no need to flush it for anonymous pages. Signed-off-by: Catalin Marinas Reported-by: Will Deacon Acked-by: Will Deacon Cc: Mark Brown Signed-off-by: Greg Kroah-Hartman (cherry picked from commit fc54900e08d840fcfec9e5d2fba2c6f233aa49b9) Signed-off-by: Mark Brown --- arch/arm64/mm/flush.c | 8 +++----- arch/arm64/mm/mmu.c | 1 - 2 files changed, 3 insertions(+), 6 deletions(-) diff --git a/arch/arm64/mm/flush.c b/arch/arm64/mm/flush.c index b9cd7a4deeca..7c716634a671 100644 --- a/arch/arm64/mm/flush.c +++ b/arch/arm64/mm/flush.c @@ -77,14 +77,12 @@ void __flush_dcache_page(struct page *page) void __sync_icache_dcache(pte_t pte, unsigned long addr) { - unsigned long pfn; - struct page *page; + struct page *page = pte_page(pte); - pfn = pte_pfn(pte); - if (!pfn_valid(pfn)) + /* no flushing needed for anonymous pages */ + if (!page_mapping(page)) return; - page = pfn_to_page(pfn); if (!test_and_set_bit(PG_dcache_clean, &page->flags)) { __flush_dcache_page(page); __flush_icache_all(); diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index eeecc9c8ed68..80a369eab637 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -339,7 +339,6 @@ void __init paging_init(void) bootmem_init(); empty_zero_page = virt_to_page(zero_page); - __flush_dcache_page(empty_zero_page); /* * TTBR0 is only used for the identity mapping at this stage. Make it From e850da006c7ca131c8aa1ef614bd1bcee4a3e929 Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Fri, 29 Nov 2013 10:56:14 +0000 Subject: [PATCH 0917/1368] arm64: Use Normal NonCacheable memory for writecombine commit 4f00130b70e5eee813cc7bc298e0f3fdf79673cc upstream. This provides better performance compared to Device GRE and also allows unaligned accesses. Such memory is intended to be used with standard RAM (e.g. framebuffers) and not I/O. Signed-off-by: Catalin Marinas Cc: Mark Brown Signed-off-by: Greg Kroah-Hartman (cherry picked from commit 3655a197b1ea3ce989d34868768c5f4b6205061c) Signed-off-by: Mark Brown --- arch/arm64/include/asm/pgtable.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index e333a243bfcc..e9a1a1d81892 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -184,7 +184,7 @@ static inline void set_pte_at(struct mm_struct *mm, unsigned long addr, #define pgprot_noncached(prot) \ __pgprot_modify(prot, PTE_ATTRINDX_MASK, PTE_ATTRINDX(MT_DEVICE_nGnRnE)) #define pgprot_writecombine(prot) \ - __pgprot_modify(prot, PTE_ATTRINDX_MASK, PTE_ATTRINDX(MT_DEVICE_GRE)) + __pgprot_modify(prot, PTE_ATTRINDX_MASK, PTE_ATTRINDX(MT_NORMAL_NC)) #define pgprot_dmacoherent(prot) \ __pgprot_modify(prot, PTE_ATTRINDX_MASK, PTE_ATTRINDX(MT_NORMAL_NC)) #define __HAVE_PHYS_MEM_ACCESS_PROT From 058fe8b82589846921b62de504f65bac8e2f3ecc Mon Sep 17 00:00:00 2001 From: Nathan Lynch Date: Mon, 3 Feb 2014 19:48:52 +0000 Subject: [PATCH 0918/1368] arm64: vdso: update wtm fields for CLOCK_MONOTONIC_COARSE commit d4022a335271a48cce49df35d825897914fbffe3 upstream. Update wall-to-monotonic fields in the VDSO data page unconditionally. These are used to service CLOCK_MONOTONIC_COARSE, which is not guarded by use_syscall. Signed-off-by: Nathan Lynch Acked-by: Will Deacon Signed-off-by: Catalin Marinas Signed-off-by: Greg Kroah-Hartman (cherry picked from commit b666f382900c74f23c78556777041c2ceb7c2b24) Signed-off-by: Mark Brown --- arch/arm64/kernel/vdso.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/kernel/vdso.c b/arch/arm64/kernel/vdso.c index 6a389dc1bd49..0ea7a22bcdf2 100644 --- a/arch/arm64/kernel/vdso.c +++ b/arch/arm64/kernel/vdso.c @@ -235,6 +235,8 @@ void update_vsyscall(struct timekeeper *tk) vdso_data->use_syscall = use_syscall; vdso_data->xtime_coarse_sec = xtime_coarse.tv_sec; vdso_data->xtime_coarse_nsec = xtime_coarse.tv_nsec; + vdso_data->wtm_clock_sec = tk->wall_to_monotonic.tv_sec; + vdso_data->wtm_clock_nsec = tk->wall_to_monotonic.tv_nsec; if (!use_syscall) { vdso_data->cs_cycle_last = tk->clock->cycle_last; @@ -242,8 +244,6 @@ void update_vsyscall(struct timekeeper *tk) vdso_data->xtime_clock_nsec = tk->xtime_nsec; vdso_data->cs_mult = tk->mult; vdso_data->cs_shift = tk->shift; - vdso_data->wtm_clock_sec = tk->wall_to_monotonic.tv_sec; - vdso_data->wtm_clock_nsec = tk->wall_to_monotonic.tv_nsec; } smp_wmb(); From 36d43783007fa8ee9a782802ab28d02032de7740 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 4 Feb 2014 14:41:26 +0000 Subject: [PATCH 0919/1368] arm64: vdso: prevent ld from aligning PT_LOAD segments to 64k commit 40507403485fcb56b83d6ddfc954e9b08305054c upstream. Whilst the text segment for our VDSO is marked as PT_LOAD in the ELF headers, it is mapped by the kernel and not actually subject to demand-paging. ld doesn't realise this, and emits a p_align field of 64k (the maximum supported page size), which conflicts with the load address picked by the kernel on 4k systems, which will be 4k aligned. This causes GDB to fail with "Failed to read a valid object file image from memory" when attempting to load the VDSO. This patch passes the -n option to ld, which prevents it from aligning PT_LOAD segments to the maximum page size. Reported-by: Kyle McMartin Acked-by: Kyle McMartin Signed-off-by: Will Deacon Signed-off-by: Catalin Marinas Signed-off-by: Greg Kroah-Hartman (cherry picked from commit 6737eaebff6297e5b6aeb458a4b4ad4b61df8f57) Signed-off-by: Mark Brown --- arch/arm64/kernel/vdso/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/kernel/vdso/Makefile b/arch/arm64/kernel/vdso/Makefile index d8064af42e62..6d20b7d162d8 100644 --- a/arch/arm64/kernel/vdso/Makefile +++ b/arch/arm64/kernel/vdso/Makefile @@ -48,7 +48,7 @@ $(obj-vdso): %.o: %.S # Actual build commands quiet_cmd_vdsold = VDSOL $@ - cmd_vdsold = $(CC) $(c_flags) -Wl,-T $^ -o $@ + cmd_vdsold = $(CC) $(c_flags) -Wl,-n -Wl,-T $^ -o $@ quiet_cmd_vdsoas = VDSOA $@ cmd_vdsoas = $(CC) $(a_flags) -c -o $@ $< From 3a44eb49a2734233b5fc9d98afee53be701f4c4d Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Tue, 4 Feb 2014 16:01:31 +0000 Subject: [PATCH 0920/1368] arm64: Invalidate the TLB when replacing pmd entries during boot commit a55f9929a9b257f84b6cc7b2397379cabd744a22 upstream. With the 64K page size configuration, __create_page_tables in head.S maps enough memory to get started but using 64K pages rather than 512M sections with a single pgd/pud/pmd entry pointing to a pte table. create_mapping() may override the pgd/pud/pmd table entry with a block (section) one if the RAM size is more than 512MB and aligned correctly. For the end of this block to be accessible, the old TLB entry must be invalidated. Reported-by: Mark Salter Tested-by: Mark Salter Signed-off-by: Catalin Marinas Signed-off-by: Greg Kroah-Hartman (cherry picked from commit f35f27e775d6f8aa265fb698975c9c95f2757ef4) Signed-off-by: Mark Brown --- arch/arm64/mm/mmu.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index 80a369eab637..ba7477efad5c 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -203,10 +203,18 @@ static void __init alloc_init_pmd(pud_t *pud, unsigned long addr, do { next = pmd_addr_end(addr, end); /* try section mapping first */ - if (((addr | next | phys) & ~SECTION_MASK) == 0) + if (((addr | next | phys) & ~SECTION_MASK) == 0) { + pmd_t old_pmd =*pmd; set_pmd(pmd, __pmd(phys | prot_sect_kernel)); - else + /* + * Check for previous table entries created during + * boot (__create_page_tables) and flush them. + */ + if (!pmd_none(old_pmd)) + flush_tlb_all(); + } else { alloc_init_pte(pmd, addr, next, __phys_to_pfn(phys)); + } phys += next - addr; } while (pmd++, addr = next, addr != end); } From 1a94c46eed86275ee899eae5210dd14d4129a03a Mon Sep 17 00:00:00 2001 From: Nathan Lynch Date: Wed, 5 Feb 2014 05:53:04 +0000 Subject: [PATCH 0921/1368] arm64: vdso: fix coarse clock handling commit 069b918623e1510e58dacf178905a72c3baa3ae4 upstream. When __kernel_clock_gettime is called with a CLOCK_MONOTONIC_COARSE or CLOCK_REALTIME_COARSE clock id, it returns incorrectly to whatever the caller has placed in x2 ("ret x2" to return from the fast path). Fix this by saving x30/LR to x2 only in code that will call __do_get_tspec, restoring x30 afterward, and using a plain "ret" to return from the routine. Also: while the resulting tv_nsec value for CLOCK_REALTIME and CLOCK_MONOTONIC must be computed using intermediate values that are left-shifted by cs_shift (x12, set by __do_get_tspec), the results for coarse clocks should be calculated using unshifted values (xtime_coarse_nsec is in units of actual nanoseconds). The current code shifts intermediate values by x12 unconditionally, but x12 is uninitialized when servicing a coarse clock. Fix this by setting x12 to 0 once we know we are dealing with a coarse clock id. Signed-off-by: Nathan Lynch Acked-by: Will Deacon Signed-off-by: Catalin Marinas Signed-off-by: Greg Kroah-Hartman (cherry picked from commit fb569d15d867a06e89b1be8278404b6fbf6b5bde) Signed-off-by: Mark Brown --- arch/arm64/kernel/vdso/gettimeofday.S | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/arch/arm64/kernel/vdso/gettimeofday.S b/arch/arm64/kernel/vdso/gettimeofday.S index f0a6d10b5211..fe652ffd34c2 100644 --- a/arch/arm64/kernel/vdso/gettimeofday.S +++ b/arch/arm64/kernel/vdso/gettimeofday.S @@ -103,6 +103,8 @@ ENTRY(__kernel_clock_gettime) bl __do_get_tspec seqcnt_check w9, 1b + mov x30, x2 + cmp w0, #CLOCK_MONOTONIC b.ne 6f @@ -118,6 +120,9 @@ ENTRY(__kernel_clock_gettime) ccmp w0, #CLOCK_MONOTONIC_COARSE, #0x4, ne b.ne 8f + /* xtime_coarse_nsec is already right-shifted */ + mov x12, #0 + /* Get coarse timespec. */ adr vdso_data, _vdso_data 3: seqcnt_acquire @@ -156,7 +161,7 @@ ENTRY(__kernel_clock_gettime) lsr x11, x11, x12 stp x10, x11, [x1, #TSPEC_TV_SEC] mov x0, xzr - ret x2 + ret 7: mov x30, x2 8: /* Syscall fallback. */ From 891ed08e70ce022069af9b421b689db5b0cb66ae Mon Sep 17 00:00:00 2001 From: Vinayak Kale Date: Wed, 5 Feb 2014 09:34:36 +0000 Subject: [PATCH 0922/1368] arm64: add DSB after icache flush in __flush_icache_all() commit 5044bad43ee573d0b6d90e3ccb7a40c2c7d25eb4 upstream. Add DSB after icache flush to complete the cache maintenance operation. The function __flush_icache_all() is used only for user space mappings and an ISB is not required because of an exception return before executing user instructions. An exception return would behave like an ISB. Signed-off-by: Vinayak Kale Acked-by: Will Deacon Signed-off-by: Catalin Marinas Signed-off-by: Greg Kroah-Hartman (cherry picked from commit 02599bad3774a5147eb8d98df7c9362fdc1a50c6) Signed-off-by: Mark Brown --- arch/arm64/include/asm/cacheflush.h | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm64/include/asm/cacheflush.h b/arch/arm64/include/asm/cacheflush.h index 3300cbd18a89..0c13554965b8 100644 --- a/arch/arm64/include/asm/cacheflush.h +++ b/arch/arm64/include/asm/cacheflush.h @@ -116,6 +116,7 @@ extern void flush_dcache_page(struct page *); static inline void __flush_icache_all(void) { asm("ic ialluis"); + dsb(); } #define flush_dcache_mmap_lock(mapping) \ From 226b19b9d3d9d3f231ac889fb96b6ff98febf6a6 Mon Sep 17 00:00:00 2001 From: Olof Johansson Date: Fri, 14 Feb 2014 19:35:15 +0000 Subject: [PATCH 0923/1368] ARM64: unwind: Fix PC calculation commit e306dfd06fcb44d21c80acb8e5a88d55f3d1cf63 upstream. The frame PC value in the unwind code used to just take the saved LR value and use that. That's incorrect as a stack trace, since it shows the return path stack, not the call path stack. In particular, it shows faulty information in case the bl is done as the very last instruction of one label, since the return point will be in the next label. That can easily be seen with tail calls to panic(), which is marked __noreturn and thus doesn't have anything useful after it. Easiest here is to just correct the unwind code and do a -4, to get the actual call site for the backtrace instead of the return site. Signed-off-by: Olof Johansson Signed-off-by: Catalin Marinas Signed-off-by: Greg Kroah-Hartman (cherry picked from commit c56e0dc1b70f1537659acffc6ac8d8d615be2dae) Signed-off-by: Mark Brown --- arch/arm64/kernel/stacktrace.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/arch/arm64/kernel/stacktrace.c b/arch/arm64/kernel/stacktrace.c index d25459ff57fc..048334bb2651 100644 --- a/arch/arm64/kernel/stacktrace.c +++ b/arch/arm64/kernel/stacktrace.c @@ -48,7 +48,11 @@ int unwind_frame(struct stackframe *frame) frame->sp = fp + 0x10; frame->fp = *(unsigned long *)(fp); - frame->pc = *(unsigned long *)(fp + 8); + /* + * -4 here because we care about the PC at time of bl, + * not where the return will go. + */ + frame->pc = *(unsigned long *)(fp + 8) - 4; return 0; } From 5aab0869c941fc278fa72ff1d05bbbbb70f7d7c0 Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Wed, 12 Mar 2014 16:28:09 +0000 Subject: [PATCH 0924/1368] arm64: Do not synchronise I and D caches for special ptes commit 71fdb6bf61bf0692f004f9daf5650392c0cfe300 upstream. Special pte mappings are not intended to be executable and do not even have an associated struct page. This patch ensures that we do not call __sync_icache_dcache() on such ptes. Signed-off-by: Catalin Marinas Reported-by: Steve Capper Tested-by: Laura Abbott Tested-by: Bharat Bhushan Signed-off-by: Greg Kroah-Hartman (cherry picked from commit b1f2bfc9d8bb42b4e5a93470e8a0974cd4c04697) Signed-off-by: Mark Brown --- arch/arm64/include/asm/pgtable.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index e9a1a1d81892..1569b2bd91bb 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -161,7 +161,7 @@ static inline void set_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t pte) { if (pte_valid_user(pte)) { - if (pte_exec(pte)) + if (!pte_special(pte) && pte_exec(pte)) __sync_icache_dcache(pte, addr); if (!pte_dirty(pte)) pte = pte_wrprotect(pte); From e4cb973f4aa39836b5dc01f35f4c100607229e06 Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Wed, 12 Mar 2014 16:07:06 +0000 Subject: [PATCH 0925/1368] arm64: Make DMA coherent and strongly ordered mappings not executable commit de2db7432917a82b62d55bb59635586eeca6d1bd upstream. pgprot_{dmacoherent,writecombine,noncached} don't need to generate executable mappings with side-effects like __sync_icache_dcache() being called when the mapping is in user space. Signed-off-by: Catalin Marinas Reported-by: Bharat Bhushan Tested-by: Laura Abbott Tested-by: Bharat Bhushan Signed-off-by: Greg Kroah-Hartman (cherry picked from commit 73e8697b71fa956642403304b96442fd4b57ce97) Signed-off-by: Mark Brown --- arch/arm64/include/asm/pgtable.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index 1569b2bd91bb..3a710d7b14ce 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -182,11 +182,11 @@ static inline void set_pte_at(struct mm_struct *mm, unsigned long addr, * Mark the prot value as uncacheable and unbufferable. */ #define pgprot_noncached(prot) \ - __pgprot_modify(prot, PTE_ATTRINDX_MASK, PTE_ATTRINDX(MT_DEVICE_nGnRnE)) + __pgprot_modify(prot, PTE_ATTRINDX_MASK, PTE_ATTRINDX(MT_DEVICE_nGnRnE) | PTE_PXN | PTE_UXN) #define pgprot_writecombine(prot) \ - __pgprot_modify(prot, PTE_ATTRINDX_MASK, PTE_ATTRINDX(MT_NORMAL_NC)) + __pgprot_modify(prot, PTE_ATTRINDX_MASK, PTE_ATTRINDX(MT_NORMAL_NC) | PTE_PXN | PTE_UXN) #define pgprot_dmacoherent(prot) \ - __pgprot_modify(prot, PTE_ATTRINDX_MASK, PTE_ATTRINDX(MT_NORMAL_NC)) + __pgprot_modify(prot, PTE_ATTRINDX_MASK, PTE_ATTRINDX(MT_NORMAL_NC) | PTE_PXN | PTE_UXN) #define __HAVE_PHYS_MEM_ACCESS_PROT struct file; extern pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn, From f651b7f202cb2f70971db9050be120e437696ccb Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Wed, 1 May 2013 16:38:23 +0100 Subject: [PATCH 0926/1368] arm64: Remove __flush_dcache_page() This function is only used in __sync_icache_dcache(), so remove it and call __flush_dcache_area() directly. The flush_icache_user_range() function is not used in the arm64 kernel. Signed-off-by: Catalin Marinas Reported-by: Will Deacon Acked-by: Will Deacon (cherry picked from commit ebd88367de80f9509bd30a09342d0a19c925b23e) Signed-off-by: Mark Brown --- arch/arm64/include/asm/cacheflush.h | 3 --- arch/arm64/mm/flush.c | 7 +------ arch/arm64/mm/mm.h | 1 - 3 files changed, 1 insertion(+), 10 deletions(-) diff --git a/arch/arm64/include/asm/cacheflush.h b/arch/arm64/include/asm/cacheflush.h index 3300cbd18a89..fea9ee327206 100644 --- a/arch/arm64/include/asm/cacheflush.h +++ b/arch/arm64/include/asm/cacheflush.h @@ -123,9 +123,6 @@ static inline void __flush_icache_all(void) #define flush_dcache_mmap_unlock(mapping) \ spin_unlock_irq(&(mapping)->tree_lock) -#define flush_icache_user_range(vma,page,addr,len) \ - flush_dcache_page(page) - /* * We don't appear to need to do anything here. In fact, if we did, we'd * duplicate cache flushing elsewhere performed by flush_dcache_page(). diff --git a/arch/arm64/mm/flush.c b/arch/arm64/mm/flush.c index 88611c3a421a..d3751b5f7f07 100644 --- a/arch/arm64/mm/flush.c +++ b/arch/arm64/mm/flush.c @@ -70,11 +70,6 @@ void copy_to_user_page(struct vm_area_struct *vma, struct page *page, #endif } -void __flush_dcache_page(struct page *page) -{ - __flush_dcache_area(page_address(page), PAGE_SIZE); -} - void __sync_icache_dcache(pte_t pte, unsigned long addr) { unsigned long pfn; @@ -86,7 +81,7 @@ void __sync_icache_dcache(pte_t pte, unsigned long addr) page = pfn_to_page(pfn); if (!test_and_set_bit(PG_dcache_clean, &page->flags)) { - __flush_dcache_page(page); + __flush_dcache_area(page_address(page), PAGE_SIZE); __flush_icache_all(); } else if (icache_is_aivivt()) { __flush_icache_all(); diff --git a/arch/arm64/mm/mm.h b/arch/arm64/mm/mm.h index 916701e6d040..d519f4f50c8c 100644 --- a/arch/arm64/mm/mm.h +++ b/arch/arm64/mm/mm.h @@ -1,3 +1,2 @@ -extern void __flush_dcache_page(struct page *page); extern void __init bootmem_init(void); extern void __init arm64_swiotlb_init(void); From aee2c0d2d061df8d978c73022b48049de78e6a46 Mon Sep 17 00:00:00 2001 From: Steve Capper Date: Tue, 23 Apr 2013 12:35:02 +0100 Subject: [PATCH 0927/1368] mm: hugetlb: Copy huge_pmd_share from x86 to mm. Under x86, multiple puds can be made to reference the same bank of huge pmds provided that they represent a full PUD_SIZE of shared huge memory that is aligned to a PUD_SIZE boundary. The code to share pmds does not require any architecture specific knowledge other than the fact that pmds can be indexed, thus can be beneficial to some other architectures. This patch copies the huge pmd sharing (and unsharing) logic from x86/ to mm/ and introduces a new config option to activate it: CONFIG_ARCH_WANTS_HUGE_PMD_SHARE Signed-off-by: Steve Capper Acked-by: Catalin Marinas Acked-by: Andrew Morton --- include/linux/hugetlb.h | 4 ++ mm/hugetlb.c | 122 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 126 insertions(+) diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index 6b4890fa57e7..981546ad231c 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -69,6 +69,10 @@ void hugetlb_unreserve_pages(struct inode *inode, long offset, long freed); int dequeue_hwpoisoned_huge_page(struct page *page); void copy_huge_page(struct page *dst, struct page *src); +#ifdef CONFIG_ARCH_WANT_HUGE_PMD_SHARE +pte_t *huge_pmd_share(struct mm_struct *mm, unsigned long addr, pud_t *pud); +#endif + extern unsigned long hugepages_treat_as_movable; extern const unsigned long hugetlb_zero, hugetlb_infinity; extern int sysctl_hugetlb_shm_group; diff --git a/mm/hugetlb.c b/mm/hugetlb.c index e2bfbf73a551..15d84e55ceac 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -3169,6 +3169,128 @@ void hugetlb_unreserve_pages(struct inode *inode, long offset, long freed) hugetlb_acct_memory(h, -(chg - freed)); } +#ifdef CONFIG_ARCH_WANT_HUGE_PMD_SHARE +static unsigned long page_table_shareable(struct vm_area_struct *svma, + struct vm_area_struct *vma, + unsigned long addr, pgoff_t idx) +{ + unsigned long saddr = ((idx - svma->vm_pgoff) << PAGE_SHIFT) + + svma->vm_start; + unsigned long sbase = saddr & PUD_MASK; + unsigned long s_end = sbase + PUD_SIZE; + + /* Allow segments to share if only one is marked locked */ + unsigned long vm_flags = vma->vm_flags & ~VM_LOCKED; + unsigned long svm_flags = svma->vm_flags & ~VM_LOCKED; + + /* + * match the virtual addresses, permission and the alignment of the + * page table page. + */ + if (pmd_index(addr) != pmd_index(saddr) || + vm_flags != svm_flags || + sbase < svma->vm_start || svma->vm_end < s_end) + return 0; + + return saddr; +} + +static int vma_shareable(struct vm_area_struct *vma, unsigned long addr) +{ + unsigned long base = addr & PUD_MASK; + unsigned long end = base + PUD_SIZE; + + /* + * check on proper vm_flags and page table alignment + */ + if (vma->vm_flags & VM_MAYSHARE && + vma->vm_start <= base && end <= vma->vm_end) + return 1; + return 0; +} + +/* + * Search for a shareable pmd page for hugetlb. In any case calls pmd_alloc() + * and returns the corresponding pte. While this is not necessary for the + * !shared pmd case because we can allocate the pmd later as well, it makes the + * code much cleaner. pmd allocation is essential for the shared case because + * pud has to be populated inside the same i_mmap_mutex section - otherwise + * racing tasks could either miss the sharing (see huge_pte_offset) or select a + * bad pmd for sharing. + */ +pte_t *huge_pmd_share(struct mm_struct *mm, unsigned long addr, pud_t *pud) +{ + struct vm_area_struct *vma = find_vma(mm, addr); + struct address_space *mapping = vma->vm_file->f_mapping; + pgoff_t idx = ((addr - vma->vm_start) >> PAGE_SHIFT) + + vma->vm_pgoff; + struct vm_area_struct *svma; + unsigned long saddr; + pte_t *spte = NULL; + pte_t *pte; + + if (!vma_shareable(vma, addr)) + return (pte_t *)pmd_alloc(mm, pud, addr); + + mutex_lock(&mapping->i_mmap_mutex); + vma_interval_tree_foreach(svma, &mapping->i_mmap, idx, idx) { + if (svma == vma) + continue; + + saddr = page_table_shareable(svma, vma, addr, idx); + if (saddr) { + spte = huge_pte_offset(svma->vm_mm, saddr); + if (spte) { + get_page(virt_to_page(spte)); + break; + } + } + } + + if (!spte) + goto out; + + spin_lock(&mm->page_table_lock); + if (pud_none(*pud)) + pud_populate(mm, pud, + (pmd_t *)((unsigned long)spte & PAGE_MASK)); + else + put_page(virt_to_page(spte)); + spin_unlock(&mm->page_table_lock); +out: + pte = (pte_t *)pmd_alloc(mm, pud, addr); + mutex_unlock(&mapping->i_mmap_mutex); + return pte; +} + +/* + * unmap huge page backed by shared pte. + * + * Hugetlb pte page is ref counted at the time of mapping. If pte is shared + * indicated by page_count > 1, unmap is achieved by clearing pud and + * decrementing the ref count. If count == 1, the pte page is not shared. + * + * called with vma->vm_mm->page_table_lock held. + * + * returns: 1 successfully unmapped a shared pte page + * 0 the underlying pte page is not shared, or it is the last user + */ +int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep) +{ + pgd_t *pgd = pgd_offset(mm, *addr); + pud_t *pud = pud_offset(pgd, *addr); + + BUG_ON(page_count(virt_to_page(ptep)) == 0); + if (page_count(virt_to_page(ptep)) == 1) + return 0; + + pud_clear(pud); + put_page(virt_to_page(ptep)); + *addr = ALIGN(*addr, HPAGE_SIZE * PTRS_PER_PTE) - HPAGE_SIZE; + return 1; +} +#endif /* CONFIG_ARCH_WANT_HUGE_PMD_SHARE */ + #ifdef CONFIG_MEMORY_FAILURE /* Should be called in hugetlb_lock */ From 2b78f19ada9fc885b16d148163831ae7eb67b739 Mon Sep 17 00:00:00 2001 From: Steve Capper Date: Mon, 29 Apr 2013 14:29:48 +0100 Subject: [PATCH 0928/1368] x86: mm: Remove x86 version of huge_pmd_share. The huge_pmd_share code has been copied over to mm/hugetlb.c to make it accessible to other architectures. Remove the x86 copy of the huge_pmd_share code and enable the ARCH_WANT_HUGE_PMD_SHARE config flag. That way we reference the general one. Signed-off-by: Steve Capper Acked-by: Catalin Marinas Acked-by: Andrew Morton --- arch/x86/Kconfig | 3 + arch/x86/mm/hugetlbpage.c | 120 -------------------------------------- 2 files changed, 3 insertions(+), 120 deletions(-) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index fe120da25625..56d606b2497c 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -207,6 +207,9 @@ config ARCH_HIBERNATION_POSSIBLE config ARCH_SUSPEND_POSSIBLE def_bool y +config ARCH_WANT_HUGE_PMD_SHARE + def_bool y + config ZONE_DMA32 bool default X86_64 diff --git a/arch/x86/mm/hugetlbpage.c b/arch/x86/mm/hugetlbpage.c index ae1aa71d0115..7e522a359972 100644 --- a/arch/x86/mm/hugetlbpage.c +++ b/arch/x86/mm/hugetlbpage.c @@ -16,126 +16,6 @@ #include #include -static unsigned long page_table_shareable(struct vm_area_struct *svma, - struct vm_area_struct *vma, - unsigned long addr, pgoff_t idx) -{ - unsigned long saddr = ((idx - svma->vm_pgoff) << PAGE_SHIFT) + - svma->vm_start; - unsigned long sbase = saddr & PUD_MASK; - unsigned long s_end = sbase + PUD_SIZE; - - /* Allow segments to share if only one is marked locked */ - unsigned long vm_flags = vma->vm_flags & ~VM_LOCKED; - unsigned long svm_flags = svma->vm_flags & ~VM_LOCKED; - - /* - * match the virtual addresses, permission and the alignment of the - * page table page. - */ - if (pmd_index(addr) != pmd_index(saddr) || - vm_flags != svm_flags || - sbase < svma->vm_start || svma->vm_end < s_end) - return 0; - - return saddr; -} - -static int vma_shareable(struct vm_area_struct *vma, unsigned long addr) -{ - unsigned long base = addr & PUD_MASK; - unsigned long end = base + PUD_SIZE; - - /* - * check on proper vm_flags and page table alignment - */ - if (vma->vm_flags & VM_MAYSHARE && - vma->vm_start <= base && end <= vma->vm_end) - return 1; - return 0; -} - -/* - * Search for a shareable pmd page for hugetlb. In any case calls pmd_alloc() - * and returns the corresponding pte. While this is not necessary for the - * !shared pmd case because we can allocate the pmd later as well, it makes the - * code much cleaner. pmd allocation is essential for the shared case because - * pud has to be populated inside the same i_mmap_mutex section - otherwise - * racing tasks could either miss the sharing (see huge_pte_offset) or select a - * bad pmd for sharing. - */ -static pte_t * -huge_pmd_share(struct mm_struct *mm, unsigned long addr, pud_t *pud) -{ - struct vm_area_struct *vma = find_vma(mm, addr); - struct address_space *mapping = vma->vm_file->f_mapping; - pgoff_t idx = ((addr - vma->vm_start) >> PAGE_SHIFT) + - vma->vm_pgoff; - struct vm_area_struct *svma; - unsigned long saddr; - pte_t *spte = NULL; - pte_t *pte; - - if (!vma_shareable(vma, addr)) - return (pte_t *)pmd_alloc(mm, pud, addr); - - mutex_lock(&mapping->i_mmap_mutex); - vma_interval_tree_foreach(svma, &mapping->i_mmap, idx, idx) { - if (svma == vma) - continue; - - saddr = page_table_shareable(svma, vma, addr, idx); - if (saddr) { - spte = huge_pte_offset(svma->vm_mm, saddr); - if (spte) { - get_page(virt_to_page(spte)); - break; - } - } - } - - if (!spte) - goto out; - - spin_lock(&mm->page_table_lock); - if (pud_none(*pud)) - pud_populate(mm, pud, (pmd_t *)((unsigned long)spte & PAGE_MASK)); - else - put_page(virt_to_page(spte)); - spin_unlock(&mm->page_table_lock); -out: - pte = (pte_t *)pmd_alloc(mm, pud, addr); - mutex_unlock(&mapping->i_mmap_mutex); - return pte; -} - -/* - * unmap huge page backed by shared pte. - * - * Hugetlb pte page is ref counted at the time of mapping. If pte is shared - * indicated by page_count > 1, unmap is achieved by clearing pud and - * decrementing the ref count. If count == 1, the pte page is not shared. - * - * called with vma->vm_mm->page_table_lock held. - * - * returns: 1 successfully unmapped a shared pte page - * 0 the underlying pte page is not shared, or it is the last user - */ -int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep) -{ - pgd_t *pgd = pgd_offset(mm, *addr); - pud_t *pud = pud_offset(pgd, *addr); - - BUG_ON(page_count(virt_to_page(ptep)) == 0); - if (page_count(virt_to_page(ptep)) == 1) - return 0; - - pud_clear(pud); - put_page(virt_to_page(ptep)); - *addr = ALIGN(*addr, HPAGE_SIZE * PTRS_PER_PTE) - HPAGE_SIZE; - return 1; -} - pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr, unsigned long sz) { From ba310aa51d8b732c9f6d66d66b9458bc917c19a7 Mon Sep 17 00:00:00 2001 From: Steve Capper Date: Tue, 30 Apr 2013 08:02:03 +0100 Subject: [PATCH 0929/1368] mm: hugetlb: Copy general hugetlb code from x86 to mm. The huge_pte_alloc, huge_pte_offset and follow_huge_p[mu]d functions in x86/mm/hugetlbpage.c do not rely on any architecture specific knowledge other than the fact that pmds and puds can be treated as huge ptes. To allow other architectures to use this code (and reduce the need for code duplication), this patch copies these functions into mm, replaces the use of pud_large with pud_huge and provides a config flag to activate them: CONFIG_ARCH_WANT_GENERAL_HUGETLB If CONFIG_ARCH_WANT_HUGE_PMD_SHARE is also active then the huge_pmd_share code will be called by huge_pte_alloc (othewise we call pmd_alloc and skip the sharing code). Signed-off-by: Steve Capper Acked-by: Catalin Marinas Acked-by: Andrew Morton --- mm/hugetlb.c | 97 +++++++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 88 insertions(+), 9 deletions(-) diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 15d84e55ceac..dbd3d42ae031 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -2931,15 +2931,6 @@ int hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma, return ret; } -/* Can be overriden by architectures */ -__attribute__((weak)) struct page * -follow_huge_pud(struct mm_struct *mm, unsigned long address, - pud_t *pud, int write) -{ - BUG(); - return NULL; -} - long follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma, struct page **pages, struct vm_area_struct **vmas, unsigned long *position, unsigned long *nr_pages, @@ -3289,8 +3280,96 @@ int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep) *addr = ALIGN(*addr, HPAGE_SIZE * PTRS_PER_PTE) - HPAGE_SIZE; return 1; } +#define want_pmd_share() (1) +#else /* !CONFIG_ARCH_WANT_HUGE_PMD_SHARE */ +pte_t *huge_pmd_share(struct mm_struct *mm, unsigned long addr, pud_t *pud) +{ + return NULL; +} +#define want_pmd_share() (0) #endif /* CONFIG_ARCH_WANT_HUGE_PMD_SHARE */ +#ifdef CONFIG_ARCH_WANT_GENERAL_HUGETLB +pte_t *huge_pte_alloc(struct mm_struct *mm, + unsigned long addr, unsigned long sz) +{ + pgd_t *pgd; + pud_t *pud; + pte_t *pte = NULL; + + pgd = pgd_offset(mm, addr); + pud = pud_alloc(mm, pgd, addr); + if (pud) { + if (sz == PUD_SIZE) { + pte = (pte_t *)pud; + } else { + BUG_ON(sz != PMD_SIZE); + if (want_pmd_share() && pud_none(*pud)) + pte = huge_pmd_share(mm, addr, pud); + else + pte = (pte_t *)pmd_alloc(mm, pud, addr); + } + } + BUG_ON(pte && !pte_none(*pte) && !pte_huge(*pte)); + + return pte; +} + +pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr) +{ + pgd_t *pgd; + pud_t *pud; + pmd_t *pmd = NULL; + + pgd = pgd_offset(mm, addr); + if (pgd_present(*pgd)) { + pud = pud_offset(pgd, addr); + if (pud_present(*pud)) { + if (pud_huge(*pud)) + return (pte_t *)pud; + pmd = pmd_offset(pud, addr); + } + } + return (pte_t *) pmd; +} + +struct page * +follow_huge_pmd(struct mm_struct *mm, unsigned long address, + pmd_t *pmd, int write) +{ + struct page *page; + + page = pte_page(*(pte_t *)pmd); + if (page) + page += ((address & ~PMD_MASK) >> PAGE_SHIFT); + return page; +} + +struct page * +follow_huge_pud(struct mm_struct *mm, unsigned long address, + pud_t *pud, int write) +{ + struct page *page; + + page = pte_page(*(pte_t *)pud); + if (page) + page += ((address & ~PUD_MASK) >> PAGE_SHIFT); + return page; +} + +#else /* !CONFIG_ARCH_WANT_GENERAL_HUGETLB */ + +/* Can be overriden by architectures */ +__attribute__((weak)) struct page * +follow_huge_pud(struct mm_struct *mm, unsigned long address, + pud_t *pud, int write) +{ + BUG(); + return NULL; +} + +#endif /* CONFIG_ARCH_WANT_GENERAL_HUGETLB */ + #ifdef CONFIG_MEMORY_FAILURE /* Should be called in hugetlb_lock */ From 60061a496643b97b9e36042c5a3af7f286d7f0fa Mon Sep 17 00:00:00 2001 From: Steve Capper Date: Tue, 30 Apr 2013 08:03:42 +0100 Subject: [PATCH 0930/1368] x86: mm: Remove general hugetlb code from x86. huge_pte_alloc, huge_pte_offset and follow_huge_p[mu]d have already been copied over to mm. This patch removes the x86 copies of these functions and activates the general ones by enabling: CONFIG_ARCH_WANT_GENERAL_HUGETLB Signed-off-by: Steve Capper Acked-by: Catalin Marinas Acked-by: Andrew Morton --- arch/x86/Kconfig | 3 ++ arch/x86/mm/hugetlbpage.c | 67 --------------------------------------- 2 files changed, 3 insertions(+), 67 deletions(-) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 56d606b2497c..787072769a80 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -210,6 +210,9 @@ config ARCH_SUSPEND_POSSIBLE config ARCH_WANT_HUGE_PMD_SHARE def_bool y +config ARCH_WANT_GENERAL_HUGETLB + def_bool y + config ZONE_DMA32 bool default X86_64 diff --git a/arch/x86/mm/hugetlbpage.c b/arch/x86/mm/hugetlbpage.c index 7e522a359972..7e73e8c69096 100644 --- a/arch/x86/mm/hugetlbpage.c +++ b/arch/x86/mm/hugetlbpage.c @@ -16,49 +16,6 @@ #include #include -pte_t *huge_pte_alloc(struct mm_struct *mm, - unsigned long addr, unsigned long sz) -{ - pgd_t *pgd; - pud_t *pud; - pte_t *pte = NULL; - - pgd = pgd_offset(mm, addr); - pud = pud_alloc(mm, pgd, addr); - if (pud) { - if (sz == PUD_SIZE) { - pte = (pte_t *)pud; - } else { - BUG_ON(sz != PMD_SIZE); - if (pud_none(*pud)) - pte = huge_pmd_share(mm, addr, pud); - else - pte = (pte_t *)pmd_alloc(mm, pud, addr); - } - } - BUG_ON(pte && !pte_none(*pte) && !pte_huge(*pte)); - - return pte; -} - -pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr) -{ - pgd_t *pgd; - pud_t *pud; - pmd_t *pmd = NULL; - - pgd = pgd_offset(mm, addr); - if (pgd_present(*pgd)) { - pud = pud_offset(pgd, addr); - if (pud_present(*pud)) { - if (pud_large(*pud)) - return (pte_t *)pud; - pmd = pmd_offset(pud, addr); - } - } - return (pte_t *) pmd; -} - #if 0 /* This is just for testing */ struct page * follow_huge_addr(struct mm_struct *mm, unsigned long address, int write) @@ -120,30 +77,6 @@ int pud_huge(pud_t pud) return !!(pud_val(pud) & _PAGE_PSE); } -struct page * -follow_huge_pmd(struct mm_struct *mm, unsigned long address, - pmd_t *pmd, int write) -{ - struct page *page; - - page = pte_page(*(pte_t *)pmd); - if (page) - page += ((address & ~PMD_MASK) >> PAGE_SHIFT); - return page; -} - -struct page * -follow_huge_pud(struct mm_struct *mm, unsigned long address, - pud_t *pud, int write) -{ - struct page *page; - - page = pte_page(*(pte_t *)pud); - if (page) - page += ((address & ~PUD_MASK) >> PAGE_SHIFT); - return page; -} - #endif /* x86_64 also uses this file */ From 6991fe93fc746ffe73d88e0fc47b674bf4dad4f9 Mon Sep 17 00:00:00 2001 From: Steve Capper Date: Tue, 7 May 2013 14:46:03 +0100 Subject: [PATCH 0931/1368] mm: thp: Correct the HPAGE_PMD_ORDER check. All Transparent Huge Pages are allocated by the buddy allocator. A compile time check is in place that fails when the order of a transparent huge page is too large to be allocated by the buddy allocator. Unfortunately that compile time check passes when: HPAGE_PMD_ORDER == MAX_ORDER ( which is incorrect as the buddy allocator can only allocate memory of order strictly less than MAX_ORDER. ) This patch updates the compile time check to fail in the above case. Signed-off-by: Steve Capper Acked-by: Catalin Marinas Acked-by: Andrew Morton --- include/linux/huge_mm.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h index 528454c2caa9..26ee56c80dc7 100644 --- a/include/linux/huge_mm.h +++ b/include/linux/huge_mm.h @@ -123,7 +123,7 @@ extern void __split_huge_page_pmd(struct vm_area_struct *vma, } while (0) extern void split_huge_page_pmd_mm(struct mm_struct *mm, unsigned long address, pmd_t *pmd); -#if HPAGE_PMD_ORDER > MAX_ORDER +#if HPAGE_PMD_ORDER >= MAX_ORDER #error "hugepages can't be allocated by the buddy allocator" #endif extern int hugepage_madvise(struct vm_area_struct *vma, From bf966f2702f43577b7f7b93db5c4ef2a0bc875a1 Mon Sep 17 00:00:00 2001 From: Steve Capper Date: Tue, 30 Apr 2013 11:00:33 +0100 Subject: [PATCH 0932/1368] ARM64: mm: Restore memblock limit when map_mem finished. In paging_init the memblock limit is set to restrict any addresses returned by early_alloc to fit within the initial direct kernel mapping in swapper_pg_dir. This allows map_mem to allocate puds, pmds and ptes from the initial direct kernel mapping. The limit stays low after paging_init() though, meaning any bootmem allocations will be from a restricted subset of memory. Gigabyte huge pages, for instance, are normally allocated from bootmem as their order (18) is too large for the default buddy allocator (MAX_ORDER = 11). This patch restores the memblock limit when map_mem has finished, allowing gigabyte huge pages (and other objects) to be allocated from all of bootmem. Signed-off-by: Steve Capper Acked-by: Catalin Marinas --- arch/arm64/mm/mmu.c | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index ba7477efad5c..49961d1fa033 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -305,6 +305,16 @@ static void __init map_mem(void) { struct memblock_region *reg; + /* + * Temporarily limit the memblock range. We need to do this as + * create_mapping requires puds, pmds and ptes to be allocated from + * memory addressable from the initial direct kernel mapping. + * + * The initial direct kernel mapping, located at swapper_pg_dir, + * gives us PGDIR_SIZE memory starting from PHYS_OFFSET (aligned). + */ + memblock_set_current_limit((PHYS_OFFSET & PGDIR_MASK) + PGDIR_SIZE); + /* map all the memory banks */ for_each_memblock(memory, reg) { phys_addr_t start = reg->base; @@ -315,6 +325,9 @@ static void __init map_mem(void) create_mapping(start, __phys_to_virt(start), end - start); } + + /* Limit no longer required. */ + memblock_set_current_limit(MEMBLOCK_ALLOC_ANYWHERE); } /* @@ -325,12 +338,6 @@ void __init paging_init(void) { void *zero_page; - /* - * Maximum PGDIR_SIZE addressable via the initial direct kernel - * mapping in swapper_pg_dir. - */ - memblock_set_current_limit((PHYS_OFFSET & PGDIR_MASK) + PGDIR_SIZE); - init_mem_pgprot(); map_mem(); From 9bdb6ff441dabb89d736317de07c7b943e31052d Mon Sep 17 00:00:00 2001 From: Steve Capper Date: Thu, 2 May 2013 16:25:42 +0100 Subject: [PATCH 0933/1368] ARM64: mm: Make PAGE_NONE pages read only and no-execute. If we consider the following code sequence: my_pte = pte_modify(entry, myprot); x = pte_write(my_pte); y = pte_exec(my_pte); If myprot comes from a PROT_NONE page, then x and y will both be true which is undesireable behaviour. This patch sets the no-execute and read-only bits for PAGE_NONE such that the code above will return false for both x and y. Signed-off-by: Steve Capper Acked-by: Catalin Marinas --- arch/arm64/include/asm/pgtable.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index 3a710d7b14ce..31d4b1e5d44b 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -66,7 +66,7 @@ extern pgprot_t pgprot_default; #define _MOD_PROT(p, b) __pgprot_modify(p, 0, b) -#define PAGE_NONE __pgprot_modify(pgprot_default, PTE_TYPE_MASK, PTE_PROT_NONE) +#define PAGE_NONE __pgprot_modify(pgprot_default, PTE_TYPE_MASK, PTE_PROT_NONE | PTE_RDONLY | PTE_PXN | PTE_UXN) #define PAGE_SHARED _MOD_PROT(pgprot_default, PTE_USER | PTE_NG | PTE_PXN | PTE_UXN) #define PAGE_SHARED_EXEC _MOD_PROT(pgprot_default, PTE_USER | PTE_NG | PTE_PXN) #define PAGE_COPY _MOD_PROT(pgprot_default, PTE_USER | PTE_NG | PTE_PXN | PTE_UXN | PTE_RDONLY) @@ -76,7 +76,7 @@ extern pgprot_t pgprot_default; #define PAGE_KERNEL _MOD_PROT(pgprot_default, PTE_PXN | PTE_UXN | PTE_DIRTY) #define PAGE_KERNEL_EXEC _MOD_PROT(pgprot_default, PTE_UXN | PTE_DIRTY) -#define __PAGE_NONE __pgprot(((_PAGE_DEFAULT) & ~PTE_TYPE_MASK) | PTE_PROT_NONE) +#define __PAGE_NONE __pgprot(((_PAGE_DEFAULT) & ~PTE_TYPE_MASK) | PTE_PROT_NONE | PTE_RDONLY | PTE_PXN | PTE_UXN) #define __PAGE_SHARED __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN | PTE_UXN) #define __PAGE_SHARED_EXEC __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN) #define __PAGE_COPY __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN | PTE_UXN | PTE_RDONLY) From d2ddf5c8634f83ee20f26b7b911a48e77e77912c Mon Sep 17 00:00:00 2001 From: Steve Capper Date: Tue, 28 May 2013 13:35:51 +0100 Subject: [PATCH 0934/1368] ARM64: mm: Move PTE_PROT_NONE bit. Under ARM64, PTEs can be broadly categorised as follows: - Present and valid: Bit #0 is set. The PTE is valid and memory access to the region may fault. - Present and invalid: Bit #0 is clear and bit #1 is set. Represents present memory with PROT_NONE protection. The PTE is an invalid entry, and the user fault handler will raise a SIGSEGV. - Not present (file or swap): Bits #0 and #1 are clear. Memory represented has been paged out. The PTE is an invalid entry, and the fault handler will try and re-populate the memory where necessary. Huge PTEs are block descriptors that have bit #1 clear. If we wish to represent PROT_NONE huge PTEs we then run into a problem as there is no way to distinguish between regular and huge PTEs if we set bit #1. To resolve this ambiguity this patch moves PTE_PROT_NONE from bit #1 to bit #2 and moves PTE_FILE from bit #2 to bit #3. The number of swap/file bits is reduced by 1 as a consequence, leaving 60 bits for file and swap entries. Signed-off-by: Steve Capper Acked-by: Catalin Marinas --- arch/arm64/include/asm/pgtable.h | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index 31d4b1e5d44b..f1a357894c3c 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -25,8 +25,8 @@ * Software defined PTE bits definition. */ #define PTE_VALID (_AT(pteval_t, 1) << 0) -#define PTE_PROT_NONE (_AT(pteval_t, 1) << 1) /* only when !PTE_VALID */ -#define PTE_FILE (_AT(pteval_t, 1) << 2) /* only when !pte_present() */ +#define PTE_PROT_NONE (_AT(pteval_t, 1) << 2) /* only when !PTE_VALID */ +#define PTE_FILE (_AT(pteval_t, 1) << 3) /* only when !pte_present() */ #define PTE_DIRTY (_AT(pteval_t, 1) << 55) #define PTE_SPECIAL (_AT(pteval_t, 1) << 56) @@ -281,12 +281,12 @@ extern pgd_t idmap_pg_dir[PTRS_PER_PGD]; /* * Encode and decode a swap entry: - * bits 0-1: present (must be zero) - * bit 2: PTE_FILE - * bits 3-8: swap type + * bits 0, 2: present (must both be zero) + * bit 3: PTE_FILE + * bits 4-8: swap type * bits 9-63: swap offset */ -#define __SWP_TYPE_SHIFT 3 +#define __SWP_TYPE_SHIFT 4 #define __SWP_TYPE_BITS 6 #define __SWP_TYPE_MASK ((1 << __SWP_TYPE_BITS) - 1) #define __SWP_OFFSET_SHIFT (__SWP_TYPE_BITS + __SWP_TYPE_SHIFT) @@ -306,15 +306,15 @@ extern pgd_t idmap_pg_dir[PTRS_PER_PGD]; /* * Encode and decode a file entry: - * bits 0-1: present (must be zero) - * bit 2: PTE_FILE - * bits 3-63: file offset / PAGE_SIZE + * bits 0, 2: present (must both be zero) + * bit 3: PTE_FILE + * bits 4-63: file offset / PAGE_SIZE */ #define pte_file(pte) (pte_val(pte) & PTE_FILE) -#define pte_to_pgoff(x) (pte_val(x) >> 3) -#define pgoff_to_pte(x) __pte(((x) << 3) | PTE_FILE) +#define pte_to_pgoff(x) (pte_val(x) >> 4) +#define pgoff_to_pte(x) __pte(((x) << 4) | PTE_FILE) -#define PTE_FILE_MAX_BITS 61 +#define PTE_FILE_MAX_BITS 60 extern int kern_addr_valid(unsigned long addr); From 3b5a3a6a3716d107d0c5854eab70a4eac40534a9 Mon Sep 17 00:00:00 2001 From: Steve Capper Date: Wed, 10 Apr 2013 13:48:00 +0100 Subject: [PATCH 0935/1368] ARM64: mm: HugeTLB support. Add huge page support to ARM64, different huge page sizes are supported depending on the size of normal pages: PAGE_SIZE is 4KB: 2MB - (pmds) these can be allocated at any time. 1024MB - (puds) usually allocated on bootup with the command line with something like: hugepagesz=1G hugepages=6 PAGE_SIZE is 64KB: 512MB - (pmds) usually allocated on bootup via command line. Signed-off-by: Steve Capper Acked-by: Catalin Marinas --- arch/arm64/Kconfig | 9 ++ arch/arm64/include/asm/hugetlb.h | 117 +++++++++++++++++++++++++ arch/arm64/include/asm/pgtable-hwdef.h | 8 ++ arch/arm64/include/asm/pgtable.h | 13 ++- arch/arm64/mm/Makefile | 1 + arch/arm64/mm/fault.c | 19 +--- arch/arm64/mm/hugetlbpage.c | 70 +++++++++++++++ 7 files changed, 220 insertions(+), 17 deletions(-) create mode 100644 arch/arm64/include/asm/hugetlb.h create mode 100644 arch/arm64/mm/hugetlbpage.c diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 56b3f6d447ae..cd6eca84a21c 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -180,6 +180,15 @@ config HW_PERF_EVENTS Enable hardware performance counter support for perf events. If disabled, perf events will use software events only. +config SYS_SUPPORTS_HUGETLBFS + def_bool y + +config ARCH_WANT_GENERAL_HUGETLB + def_bool y + +config ARCH_WANT_HUGE_PMD_SHARE + def_bool y if !ARM64_64K_PAGES + source "mm/Kconfig" endmenu diff --git a/arch/arm64/include/asm/hugetlb.h b/arch/arm64/include/asm/hugetlb.h new file mode 100644 index 000000000000..5b7ca8ace95f --- /dev/null +++ b/arch/arm64/include/asm/hugetlb.h @@ -0,0 +1,117 @@ +/* + * arch/arm64/include/asm/hugetlb.h + * + * Copyright (C) 2013 Linaro Ltd. + * + * Based on arch/x86/include/asm/hugetlb.h + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef __ASM_HUGETLB_H +#define __ASM_HUGETLB_H + +#include +#include + +static inline pte_t huge_ptep_get(pte_t *ptep) +{ + return *ptep; +} + +static inline void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, + pte_t *ptep, pte_t pte) +{ + set_pte_at(mm, addr, ptep, pte); +} + +static inline void huge_ptep_clear_flush(struct vm_area_struct *vma, + unsigned long addr, pte_t *ptep) +{ + ptep_clear_flush(vma, addr, ptep); +} + +static inline void huge_ptep_set_wrprotect(struct mm_struct *mm, + unsigned long addr, pte_t *ptep) +{ + ptep_set_wrprotect(mm, addr, ptep); +} + +static inline pte_t huge_ptep_get_and_clear(struct mm_struct *mm, + unsigned long addr, pte_t *ptep) +{ + return ptep_get_and_clear(mm, addr, ptep); +} + +static inline int huge_ptep_set_access_flags(struct vm_area_struct *vma, + unsigned long addr, pte_t *ptep, + pte_t pte, int dirty) +{ + return ptep_set_access_flags(vma, addr, ptep, pte, dirty); +} + +static inline void hugetlb_free_pgd_range(struct mmu_gather *tlb, + unsigned long addr, unsigned long end, + unsigned long floor, + unsigned long ceiling) +{ + free_pgd_range(tlb, addr, end, floor, ceiling); +} + +static inline int is_hugepage_only_range(struct mm_struct *mm, + unsigned long addr, unsigned long len) +{ + return 0; +} + +static inline int prepare_hugepage_range(struct file *file, + unsigned long addr, unsigned long len) +{ + struct hstate *h = hstate_file(file); + if (len & ~huge_page_mask(h)) + return -EINVAL; + if (addr & ~huge_page_mask(h)) + return -EINVAL; + return 0; +} + +static inline void hugetlb_prefault_arch_hook(struct mm_struct *mm) +{ +} + +static inline int huge_pte_none(pte_t pte) +{ + return pte_none(pte); +} + +static inline pte_t huge_pte_wrprotect(pte_t pte) +{ + return pte_wrprotect(pte); +} + +static inline int arch_prepare_hugepage(struct page *page) +{ + return 0; +} + +static inline void arch_release_hugepage(struct page *page) +{ +} + +static inline void arch_clear_hugepage_flags(struct page *page) +{ + clear_bit(PG_dcache_clean, &page->flags); +} + +#endif /* __ASM_HUGETLB_H */ diff --git a/arch/arm64/include/asm/pgtable-hwdef.h b/arch/arm64/include/asm/pgtable-hwdef.h index 75fd13d289b9..e6e0a0d4cf9a 100644 --- a/arch/arm64/include/asm/pgtable-hwdef.h +++ b/arch/arm64/include/asm/pgtable-hwdef.h @@ -25,12 +25,19 @@ /* * Hardware page table definitions. * + * Level 1 descriptor (PUD). + */ + +#define PUD_TABLE_BIT (_AT(pgdval_t, 1) << 1) + +/* * Level 2 descriptor (PMD). */ #define PMD_TYPE_MASK (_AT(pmdval_t, 3) << 0) #define PMD_TYPE_FAULT (_AT(pmdval_t, 0) << 0) #define PMD_TYPE_TABLE (_AT(pmdval_t, 3) << 0) #define PMD_TYPE_SECT (_AT(pmdval_t, 1) << 0) +#define PMD_TABLE_BIT (_AT(pmdval_t, 1) << 1) /* * Section @@ -53,6 +60,7 @@ #define PTE_TYPE_MASK (_AT(pteval_t, 3) << 0) #define PTE_TYPE_FAULT (_AT(pteval_t, 0) << 0) #define PTE_TYPE_PAGE (_AT(pteval_t, 3) << 0) +#define PTE_TABLE_BIT (_AT(pteval_t, 1) << 1) #define PTE_USER (_AT(pteval_t, 1) << 6) /* AP[1] */ #define PTE_RDONLY (_AT(pteval_t, 1) << 7) /* AP[2] */ #define PTE_SHARED (_AT(pteval_t, 3) << 8) /* SH[1:0], inner shareable */ diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index f1a357894c3c..b7ef981f912d 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -173,8 +173,17 @@ static inline void set_pte_at(struct mm_struct *mm, unsigned long addr, /* * Huge pte definitions. */ -#define pte_huge(pte) ((pte_val(pte) & PTE_TYPE_MASK) == PTE_TYPE_HUGEPAGE) -#define pte_mkhuge(pte) (__pte((pte_val(pte) & ~PTE_TYPE_MASK) | PTE_TYPE_HUGEPAGE)) +#define pte_huge(pte) (!(pte_val(pte) & PTE_TABLE_BIT)) +#define pte_mkhuge(pte) (__pte(pte_val(pte) & ~PTE_TABLE_BIT)) + +/* + * Hugetlb definitions. + */ +#define HUGE_MAX_HSTATE 2 +#define HPAGE_SHIFT PMD_SHIFT +#define HPAGE_SIZE (_AC(1, UL) << HPAGE_SHIFT) +#define HPAGE_MASK (~(HPAGE_SIZE - 1)) +#define HUGETLB_PAGE_ORDER (HPAGE_SHIFT - PAGE_SHIFT) #define __HAVE_ARCH_PTE_SPECIAL diff --git a/arch/arm64/mm/Makefile b/arch/arm64/mm/Makefile index 3140a2abcdc2..b51d36401d83 100644 --- a/arch/arm64/mm/Makefile +++ b/arch/arm64/mm/Makefile @@ -2,3 +2,4 @@ obj-y := dma-mapping.o extable.o fault.o init.o \ cache.o copypage.o flush.o \ ioremap.o mmap.o pgd.o mmu.o \ context.o tlb.o proc.o +obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c index f51d669c8ebd..6c8ba25bf6bb 100644 --- a/arch/arm64/mm/fault.c +++ b/arch/arm64/mm/fault.c @@ -358,17 +358,6 @@ static int __kprobes do_translation_fault(unsigned long addr, return 0; } -/* - * Some section permission faults need to be handled gracefully. They can - * happen due to a __{get,put}_user during an oops. - */ -static int do_sect_fault(unsigned long addr, unsigned int esr, - struct pt_regs *regs) -{ - do_bad_area(addr, esr, regs); - return 0; -} - /* * This abort handler always returns "fault". */ @@ -392,12 +381,12 @@ static struct fault_info { { do_translation_fault, SIGSEGV, SEGV_MAPERR, "level 2 translation fault" }, { do_page_fault, SIGSEGV, SEGV_MAPERR, "level 3 translation fault" }, { do_bad, SIGBUS, 0, "reserved access flag fault" }, - { do_bad, SIGSEGV, SEGV_ACCERR, "level 1 access flag fault" }, - { do_bad, SIGSEGV, SEGV_ACCERR, "level 2 access flag fault" }, + { do_page_fault, SIGSEGV, SEGV_ACCERR, "level 1 access flag fault" }, + { do_page_fault, SIGSEGV, SEGV_ACCERR, "level 2 access flag fault" }, { do_page_fault, SIGSEGV, SEGV_ACCERR, "level 3 access flag fault" }, { do_bad, SIGBUS, 0, "reserved permission fault" }, - { do_bad, SIGSEGV, SEGV_ACCERR, "level 1 permission fault" }, - { do_sect_fault, SIGSEGV, SEGV_ACCERR, "level 2 permission fault" }, + { do_page_fault, SIGSEGV, SEGV_ACCERR, "level 1 permission fault" }, + { do_page_fault, SIGSEGV, SEGV_ACCERR, "level 2 permission fault" }, { do_page_fault, SIGSEGV, SEGV_ACCERR, "level 3 permission fault" }, { do_bad, SIGBUS, 0, "synchronous external abort" }, { do_bad, SIGBUS, 0, "asynchronous external abort" }, diff --git a/arch/arm64/mm/hugetlbpage.c b/arch/arm64/mm/hugetlbpage.c new file mode 100644 index 000000000000..2fc8258bab2d --- /dev/null +++ b/arch/arm64/mm/hugetlbpage.c @@ -0,0 +1,70 @@ +/* + * arch/arm64/mm/hugetlbpage.c + * + * Copyright (C) 2013 Linaro Ltd. + * + * Based on arch/x86/mm/hugetlbpage.c. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#ifndef CONFIG_ARCH_WANT_HUGE_PMD_SHARE +int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep) +{ + return 0; +} +#endif + +struct page *follow_huge_addr(struct mm_struct *mm, unsigned long address, + int write) +{ + return ERR_PTR(-EINVAL); +} + +int pmd_huge(pmd_t pmd) +{ + return !(pmd_val(pmd) & PMD_TABLE_BIT); +} + +int pud_huge(pud_t pud) +{ + return !(pud_val(pud) & PUD_TABLE_BIT); +} + +static __init int setup_hugepagesz(char *opt) +{ + unsigned long ps = memparse(opt, &opt); + if (ps == PMD_SIZE) { + hugetlb_add_hstate(PMD_SHIFT - PAGE_SHIFT); + } else if (ps == PUD_SIZE) { + hugetlb_add_hstate(PUD_SHIFT - PAGE_SHIFT); + } else { + pr_err("hugepagesz: Unsupported page size %lu M\n", ps >> 20); + return 0; + } + return 1; +} +__setup("hugepagesz=", setup_hugepagesz); From 00d9acb13773b654361ea92da5e915e2804e544f Mon Sep 17 00:00:00 2001 From: Steve Capper Date: Thu, 25 Apr 2013 15:19:21 +0100 Subject: [PATCH 0936/1368] ARM64: mm: Raise MAX_ORDER for 64KB pages and THP. The buddy allocator has a default MAX_ORDER of 11, which is too low to allocate enough memory for 512MB Transparent HugePages if our base page size is 64KB. This patch introduces MAX_ZONE_ORDER and sets it to 14 when 64KB pages are used in conjuction with THP, otherwise the default value of 11 is used. Signed-off-by: Steve Capper Acked-by: Catalin Marinas --- arch/arm64/Kconfig | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index cd6eca84a21c..10607d63b945 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -191,6 +191,11 @@ config ARCH_WANT_HUGE_PMD_SHARE source "mm/Kconfig" +config FORCE_MAX_ZONEORDER + int + default "14" if (ARM64_64K_PAGES && TRANSPARENT_HUGEPAGE) + default "11" + endmenu menu "Boot options" From d9ccf6b1647eae73f79d45c12474ae73e29dc784 Mon Sep 17 00:00:00 2001 From: Steve Capper Date: Fri, 19 Apr 2013 16:23:57 +0100 Subject: [PATCH 0937/1368] ARM64: mm: THP support. Bring Transparent HugePage support to ARM. The size of a transparent huge page depends on the normal page size. A transparent huge page is always represented as a pmd. If PAGE_SIZE is 4KB, THPs are 2MB. If PAGE_SIZE is 64KB, THPs are 512MB. Signed-off-by: Steve Capper Acked-by: Catalin Marinas --- arch/arm64/Kconfig | 3 ++ arch/arm64/include/asm/pgtable-hwdef.h | 4 ++ arch/arm64/include/asm/pgtable.h | 55 ++++++++++++++++++++++++++ arch/arm64/include/asm/tlb.h | 6 +++ arch/arm64/include/asm/tlbflush.h | 2 + 5 files changed, 70 insertions(+) diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 10607d63b945..308a55636f76 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -189,6 +189,9 @@ config ARCH_WANT_GENERAL_HUGETLB config ARCH_WANT_HUGE_PMD_SHARE def_bool y if !ARM64_64K_PAGES +config HAVE_ARCH_TRANSPARENT_HUGEPAGE + def_bool y + source "mm/Kconfig" config FORCE_MAX_ZONEORDER diff --git a/arch/arm64/include/asm/pgtable-hwdef.h b/arch/arm64/include/asm/pgtable-hwdef.h index e6e0a0d4cf9a..63c9d0de05bb 100644 --- a/arch/arm64/include/asm/pgtable-hwdef.h +++ b/arch/arm64/include/asm/pgtable-hwdef.h @@ -42,6 +42,10 @@ /* * Section */ +#define PMD_SECT_VALID (_AT(pmdval_t, 1) << 0) +#define PMD_SECT_PROT_NONE (_AT(pmdval_t, 1) << 2) +#define PMD_SECT_USER (_AT(pmdval_t, 1) << 6) /* AP[1] */ +#define PMD_SECT_RDONLY (_AT(pmdval_t, 1) << 7) /* AP[2] */ #define PMD_SECT_S (_AT(pmdval_t, 3) << 8) #define PMD_SECT_AF (_AT(pmdval_t, 1) << 10) #define PMD_SECT_NG (_AT(pmdval_t, 1) << 11) diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index b7ef981f912d..c9a5f264cab8 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -187,6 +187,61 @@ static inline void set_pte_at(struct mm_struct *mm, unsigned long addr, #define __HAVE_ARCH_PTE_SPECIAL +/* + * Software PMD bits for THP + */ + +#define PMD_SECT_DIRTY (_AT(pmdval_t, 1) << 55) +#define PMD_SECT_SPLITTING (_AT(pmdval_t, 1) << 57) + +/* + * THP definitions. + */ +#define pmd_young(pmd) (pmd_val(pmd) & PMD_SECT_AF) + +#define __HAVE_ARCH_PMD_WRITE +#define pmd_write(pmd) (!(pmd_val(pmd) & PMD_SECT_RDONLY)) + +#ifdef CONFIG_TRANSPARENT_HUGEPAGE +#define pmd_trans_huge(pmd) (pmd_val(pmd) && !(pmd_val(pmd) & PMD_TABLE_BIT)) +#define pmd_trans_splitting(pmd) (pmd_val(pmd) & PMD_SECT_SPLITTING) +#endif + +#define PMD_BIT_FUNC(fn,op) \ +static inline pmd_t pmd_##fn(pmd_t pmd) { pmd_val(pmd) op; return pmd; } + +PMD_BIT_FUNC(wrprotect, |= PMD_SECT_RDONLY); +PMD_BIT_FUNC(mkold, &= ~PMD_SECT_AF); +PMD_BIT_FUNC(mksplitting, |= PMD_SECT_SPLITTING); +PMD_BIT_FUNC(mkwrite, &= ~PMD_SECT_RDONLY); +PMD_BIT_FUNC(mkdirty, |= PMD_SECT_DIRTY); +PMD_BIT_FUNC(mkyoung, |= PMD_SECT_AF); +PMD_BIT_FUNC(mknotpresent, &= ~PMD_TYPE_MASK); + +#define pmd_mkhuge(pmd) (__pmd(pmd_val(pmd) & ~PMD_TABLE_BIT)) + +#define pmd_pfn(pmd) (((pmd_val(pmd) & PMD_MASK) & PHYS_MASK) >> PAGE_SHIFT) +#define pfn_pmd(pfn,prot) (__pmd(((phys_addr_t)(pfn) << PAGE_SHIFT) | pgprot_val(prot))) +#define mk_pmd(page,prot) pfn_pmd(page_to_pfn(page),prot) + +#define pmd_page(pmd) pfn_to_page(__phys_to_pfn(pmd_val(pmd) & PHYS_MASK)) + +static inline pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot) +{ + const pmdval_t mask = PMD_SECT_USER | PMD_SECT_PXN | PMD_SECT_UXN | + PMD_SECT_RDONLY | PMD_SECT_PROT_NONE | + PMD_SECT_VALID; + pmd_val(pmd) = (pmd_val(pmd) & ~mask) | (pgprot_val(newprot) & mask); + return pmd; +} + +#define set_pmd_at(mm, addr, pmdp, pmd) set_pmd(pmdp, pmd) + +static inline int has_transparent_hugepage(void) +{ + return 1; +} + /* * Mark the prot value as uncacheable and unbufferable. */ diff --git a/arch/arm64/include/asm/tlb.h b/arch/arm64/include/asm/tlb.h index 654f0968030b..46b3beb4b773 100644 --- a/arch/arm64/include/asm/tlb.h +++ b/arch/arm64/include/asm/tlb.h @@ -187,4 +187,10 @@ static inline void __pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmdp, #define tlb_migrate_finish(mm) do { } while (0) +static inline void +tlb_remove_pmd_tlb_entry(struct mmu_gather *tlb, pmd_t *pmdp, unsigned long addr) +{ + tlb_add_flush(tlb, addr); +} + #endif diff --git a/arch/arm64/include/asm/tlbflush.h b/arch/arm64/include/asm/tlbflush.h index 122d6320f745..8b482035cfc2 100644 --- a/arch/arm64/include/asm/tlbflush.h +++ b/arch/arm64/include/asm/tlbflush.h @@ -117,6 +117,8 @@ static inline void update_mmu_cache(struct vm_area_struct *vma, dsb(); } +#define update_mmu_cache_pmd(vma, address, pmd) do { } while (0) + #endif #endif From 6d9c4ff8c2fc63a1d79453c1f68323cca4386e7d Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Wed, 27 Nov 2013 16:59:27 +0000 Subject: [PATCH 0938/1368] arm64: Move PTE_PROT_NONE higher up PTE_PROT_NONE means that a pte is present but does not have any read/write attributes. However, setting the memory type like pgprot_writecombine() is allowed and such bits overlap with PTE_PROT_NONE. This causes mmap/munmap issues in drivers that change the vma->vm_pg_prot on PROT_NONE mappings. This patch reverts the PTE_FILE/PTE_PROT_NONE shift in commit 59911ca4325d (ARM64: mm: Move PTE_PROT_NONE bit) and moves PTE_PROT_NONE together with the other software bits. Signed-off-by: Steve Capper Signed-off-by: Catalin Marinas Tested-by: Steve Capper Cc: # 3.11+ --- arch/arm64/include/asm/pgtable.h | 31 +++++++++++++++++-------------- 1 file changed, 17 insertions(+), 14 deletions(-) diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index c9a5f264cab8..6c85e51324a7 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -25,10 +25,11 @@ * Software defined PTE bits definition. */ #define PTE_VALID (_AT(pteval_t, 1) << 0) -#define PTE_PROT_NONE (_AT(pteval_t, 1) << 2) /* only when !PTE_VALID */ -#define PTE_FILE (_AT(pteval_t, 1) << 3) /* only when !pte_present() */ +#define PTE_FILE (_AT(pteval_t, 1) << 2) /* only when !pte_present() */ #define PTE_DIRTY (_AT(pteval_t, 1) << 55) #define PTE_SPECIAL (_AT(pteval_t, 1) << 56) + /* bit 57 for PMD_SECT_SPLITTING */ +#define PTE_PROT_NONE (_AT(pteval_t, 1) << 58) /* only when !PTE_VALID */ /* * VMALLOC and SPARSEMEM_VMEMMAP ranges. @@ -345,18 +346,20 @@ extern pgd_t idmap_pg_dir[PTRS_PER_PGD]; /* * Encode and decode a swap entry: - * bits 0, 2: present (must both be zero) - * bit 3: PTE_FILE - * bits 4-8: swap type - * bits 9-63: swap offset + * bits 0-1: present (must be zero) + * bit 2: PTE_FILE + * bits 3-8: swap type + * bits 9-57: swap offset */ -#define __SWP_TYPE_SHIFT 4 +#define __SWP_TYPE_SHIFT 3 #define __SWP_TYPE_BITS 6 +#define __SWP_OFFSET_BITS 49 #define __SWP_TYPE_MASK ((1 << __SWP_TYPE_BITS) - 1) #define __SWP_OFFSET_SHIFT (__SWP_TYPE_BITS + __SWP_TYPE_SHIFT) +#define __SWP_OFFSET_MASK ((1UL << __SWP_OFFSET_BITS) - 1) #define __swp_type(x) (((x).val >> __SWP_TYPE_SHIFT) & __SWP_TYPE_MASK) -#define __swp_offset(x) ((x).val >> __SWP_OFFSET_SHIFT) +#define __swp_offset(x) (((x).val >> __SWP_OFFSET_SHIFT) & __SWP_OFFSET_MASK) #define __swp_entry(type,offset) ((swp_entry_t) { ((type) << __SWP_TYPE_SHIFT) | ((offset) << __SWP_OFFSET_SHIFT) }) #define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) }) @@ -370,15 +373,15 @@ extern pgd_t idmap_pg_dir[PTRS_PER_PGD]; /* * Encode and decode a file entry: - * bits 0, 2: present (must both be zero) - * bit 3: PTE_FILE - * bits 4-63: file offset / PAGE_SIZE + * bits 0-1: present (must be zero) + * bit 2: PTE_FILE + * bits 3-57: file offset / PAGE_SIZE */ #define pte_file(pte) (pte_val(pte) & PTE_FILE) -#define pte_to_pgoff(x) (pte_val(x) >> 4) -#define pgoff_to_pte(x) __pte(((x) << 4) | PTE_FILE) +#define pte_to_pgoff(x) (pte_val(x) >> 3) +#define pgoff_to_pte(x) __pte(((x) << 3) | PTE_FILE) -#define PTE_FILE_MAX_BITS 60 +#define PTE_FILE_MAX_BITS 55 extern int kern_addr_valid(unsigned long addr); From ce30e7d5f7d20ab0df99c6f3d7f45600f0deeecc Mon Sep 17 00:00:00 2001 From: Steve Capper Date: Thu, 5 Dec 2013 12:04:51 +0000 Subject: [PATCH 0939/1368] arm64: mm: Fix PMD_SECT_PROT_NONE definition Modify the value of PMD_SECT_PROT_NONE to match that of PTE_NONE. This should have been in commit 3676f9ef5481 (Move PTE_PROT_NONE higher up). Signed-off-by: Steve Capper Cc: # 3.11+: 3676f9ef5481: arm64: Move PTE_PROT_NONE higher up Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/pgtable-hwdef.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/include/asm/pgtable-hwdef.h b/arch/arm64/include/asm/pgtable-hwdef.h index 63c9d0de05bb..2294f2330960 100644 --- a/arch/arm64/include/asm/pgtable-hwdef.h +++ b/arch/arm64/include/asm/pgtable-hwdef.h @@ -43,7 +43,7 @@ * Section */ #define PMD_SECT_VALID (_AT(pmdval_t, 1) << 0) -#define PMD_SECT_PROT_NONE (_AT(pmdval_t, 1) << 2) +#define PMD_SECT_PROT_NONE (_AT(pmdval_t, 1) << 58) #define PMD_SECT_USER (_AT(pmdval_t, 1) << 6) /* AP[1] */ #define PMD_SECT_RDONLY (_AT(pmdval_t, 1) << 7) /* AP[2] */ #define PMD_SECT_S (_AT(pmdval_t, 3) << 8) From d5edd6d254097253b9feb3707c7405777a91630b Mon Sep 17 00:00:00 2001 From: Andreas Sandberg Date: Tue, 21 Jan 2014 15:49:09 -0800 Subject: [PATCH 0940/1368] mm/hugetlb.c: call MMU notifiers when copying a hugetlb page range When copy_hugetlb_page_range() is called to copy a range of hugetlb mappings, the secondary MMUs are not notified if there is a protection downgrade, which breaks COW semantics in KVM. This patch adds the necessary MMU notifier calls. Signed-off-by: Andreas Sandberg Acked-by: Steve Capper Acked-by: Marc Zyngier Cc: Mel Gorman Cc: Rik van Riel Cc: Hugh Dickins Cc: Andrea Arcangeli Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/hugetlb.c | 21 ++++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) diff --git a/mm/hugetlb.c b/mm/hugetlb.c index dbd3d42ae031..a7482d1d2131 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -2295,16 +2295,26 @@ int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src, int cow; struct hstate *h = hstate_vma(vma); unsigned long sz = huge_page_size(h); + unsigned long mmun_start; /* For mmu_notifiers */ + unsigned long mmun_end; /* For mmu_notifiers */ + int ret = 0; cow = (vma->vm_flags & (VM_SHARED | VM_MAYWRITE)) == VM_MAYWRITE; + mmun_start = vma->vm_start; + mmun_end = vma->vm_end; + if (cow) + mmu_notifier_invalidate_range_start(src, mmun_start, mmun_end); + for (addr = vma->vm_start; addr < vma->vm_end; addr += sz) { src_pte = huge_pte_offset(src, addr); if (!src_pte) continue; dst_pte = huge_pte_alloc(dst, addr, sz); - if (!dst_pte) - goto nomem; + if (!dst_pte) { + ret = -ENOMEM; + break; + } /* If the pagetables are shared don't copy or take references */ if (dst_pte == src_pte) @@ -2324,10 +2334,11 @@ int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src, spin_unlock(&src->page_table_lock); spin_unlock(&dst->page_table_lock); } - return 0; -nomem: - return -ENOMEM; + if (cow) + mmu_notifier_invalidate_range_end(src, mmun_start, mmun_end); + + return ret; } static int is_hugetlb_entry_migration(pte_t pte) From 9370103e565eebf905ae0ab6b437b4131b2c2e73 Mon Sep 17 00:00:00 2001 From: Steve Capper Date: Wed, 15 Jan 2014 14:07:12 +0000 Subject: [PATCH 0941/1368] arm64: mm: Remove PTE_BIT_FUNC macro Expand out the pte manipulation functions. This makes our life easier when using things like tags and cscope. Signed-off-by: Steve Capper Reviewed-by: Catalin Marinas Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/pgtable.h | 49 ++++++++++++++++++++++++++------ 1 file changed, 40 insertions(+), 9 deletions(-) diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index 6c85e51324a7..ac14abf80afd 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -140,16 +140,47 @@ extern struct page *empty_zero_page; #define pte_valid_user(pte) \ ((pte_val(pte) & (PTE_VALID | PTE_USER)) == (PTE_VALID | PTE_USER)) -#define PTE_BIT_FUNC(fn,op) \ -static inline pte_t pte_##fn(pte_t pte) { pte_val(pte) op; return pte; } +static inline pte_t pte_wrprotect(pte_t pte) +{ + pte_val(pte) |= PTE_RDONLY; + return pte; +} -PTE_BIT_FUNC(wrprotect, |= PTE_RDONLY); -PTE_BIT_FUNC(mkwrite, &= ~PTE_RDONLY); -PTE_BIT_FUNC(mkclean, &= ~PTE_DIRTY); -PTE_BIT_FUNC(mkdirty, |= PTE_DIRTY); -PTE_BIT_FUNC(mkold, &= ~PTE_AF); -PTE_BIT_FUNC(mkyoung, |= PTE_AF); -PTE_BIT_FUNC(mkspecial, |= PTE_SPECIAL); +static inline pte_t pte_mkwrite(pte_t pte) +{ + pte_val(pte) &= ~PTE_RDONLY; + return pte; +} + +static inline pte_t pte_mkclean(pte_t pte) +{ + pte_val(pte) &= ~PTE_DIRTY; + return pte; +} + +static inline pte_t pte_mkdirty(pte_t pte) +{ + pte_val(pte) |= PTE_DIRTY; + return pte; +} + +static inline pte_t pte_mkold(pte_t pte) +{ + pte_val(pte) &= ~PTE_AF; + return pte; +} + +static inline pte_t pte_mkyoung(pte_t pte) +{ + pte_val(pte) |= PTE_AF; + return pte; +} + +static inline pte_t pte_mkspecial(pte_t pte) +{ + pte_val(pte) |= PTE_SPECIAL; + return pte; +} static inline void set_pte(pte_t *ptep, pte_t pte) { From 2f4620f7ab52b501c56125278225dce92e21bf1f Mon Sep 17 00:00:00 2001 From: Steve Capper Date: Wed, 15 Jan 2014 14:07:13 +0000 Subject: [PATCH 0942/1368] arm64: mm: Introduce PTE_WRITE We have the following means for encoding writable or dirty ptes: PTE_DIRTY PTE_RDONLY !pte_dirty && !pte_write 0 1 !pte_dirty && pte_write 0 1 pte_dirty && !pte_write 1 1 pte_dirty && pte_write 1 0 So we can't distinguish between writable clean ptes and read only ptes. This can cause problems with ptes being incorrectly flagged as read only when they are writable but not dirty. This patch introduces a new software bit PTE_WRITE which allows us to correctly identify writable ptes. PTE_RDONLY is now only clear for valid ptes where a page is both writable and dirty. Signed-off-by: Steve Capper Reviewed-by: Catalin Marinas Signed-off-by: Catalin Marinas Conflicts: arch/arm64/include/asm/pgtable.h --- arch/arm64/include/asm/pgtable.h | 48 +++++++++++++++++--------------- 1 file changed, 25 insertions(+), 23 deletions(-) diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index ac14abf80afd..7deb0105921d 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -28,7 +28,7 @@ #define PTE_FILE (_AT(pteval_t, 1) << 2) /* only when !pte_present() */ #define PTE_DIRTY (_AT(pteval_t, 1) << 55) #define PTE_SPECIAL (_AT(pteval_t, 1) << 56) - /* bit 57 for PMD_SECT_SPLITTING */ +#define PTE_WRITE (_AT(pteval_t, 1) << 57) #define PTE_PROT_NONE (_AT(pteval_t, 1) << 58) /* only when !PTE_VALID */ /* @@ -67,23 +67,23 @@ extern pgprot_t pgprot_default; #define _MOD_PROT(p, b) __pgprot_modify(p, 0, b) -#define PAGE_NONE __pgprot_modify(pgprot_default, PTE_TYPE_MASK, PTE_PROT_NONE | PTE_RDONLY | PTE_PXN | PTE_UXN) -#define PAGE_SHARED _MOD_PROT(pgprot_default, PTE_USER | PTE_NG | PTE_PXN | PTE_UXN) -#define PAGE_SHARED_EXEC _MOD_PROT(pgprot_default, PTE_USER | PTE_NG | PTE_PXN) -#define PAGE_COPY _MOD_PROT(pgprot_default, PTE_USER | PTE_NG | PTE_PXN | PTE_UXN | PTE_RDONLY) -#define PAGE_COPY_EXEC _MOD_PROT(pgprot_default, PTE_USER | PTE_NG | PTE_PXN | PTE_RDONLY) -#define PAGE_READONLY _MOD_PROT(pgprot_default, PTE_USER | PTE_NG | PTE_PXN | PTE_UXN | PTE_RDONLY) -#define PAGE_READONLY_EXEC _MOD_PROT(pgprot_default, PTE_USER | PTE_NG | PTE_PXN | PTE_RDONLY) -#define PAGE_KERNEL _MOD_PROT(pgprot_default, PTE_PXN | PTE_UXN | PTE_DIRTY) -#define PAGE_KERNEL_EXEC _MOD_PROT(pgprot_default, PTE_UXN | PTE_DIRTY) +#define PAGE_NONE __pgprot_modify(pgprot_default, PTE_TYPE_MASK, PTE_PROT_NONE | PTE_PXN | PTE_UXN) +#define PAGE_SHARED _MOD_PROT(pgprot_default, PTE_USER | PTE_NG | PTE_PXN | PTE_UXN | PTE_WRITE) +#define PAGE_SHARED_EXEC _MOD_PROT(pgprot_default, PTE_USER | PTE_NG | PTE_PXN | PTE_WRITE) +#define PAGE_COPY _MOD_PROT(pgprot_default, PTE_USER | PTE_NG | PTE_PXN | PTE_UXN) +#define PAGE_COPY_EXEC _MOD_PROT(pgprot_default, PTE_USER | PTE_NG | PTE_PXN) +#define PAGE_READONLY _MOD_PROT(pgprot_default, PTE_USER | PTE_NG | PTE_PXN | PTE_UXN) +#define PAGE_READONLY_EXEC _MOD_PROT(pgprot_default, PTE_USER | PTE_NG | PTE_PXN) +#define PAGE_KERNEL _MOD_PROT(pgprot_default, PTE_PXN | PTE_UXN | PTE_DIRTY | PTE_WRITE) +#define PAGE_KERNEL_EXEC _MOD_PROT(pgprot_default, PTE_UXN | PTE_DIRTY | PTE_WRITE) -#define __PAGE_NONE __pgprot(((_PAGE_DEFAULT) & ~PTE_TYPE_MASK) | PTE_PROT_NONE | PTE_RDONLY | PTE_PXN | PTE_UXN) -#define __PAGE_SHARED __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN | PTE_UXN) -#define __PAGE_SHARED_EXEC __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN) -#define __PAGE_COPY __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN | PTE_UXN | PTE_RDONLY) -#define __PAGE_COPY_EXEC __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN | PTE_RDONLY) -#define __PAGE_READONLY __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN | PTE_UXN | PTE_RDONLY) -#define __PAGE_READONLY_EXEC __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN | PTE_RDONLY) +define __PAGE_NONE __pgprot(((_PAGE_DEFAULT) & ~PTE_TYPE_MASK) | PTE_PROT_NONE | PTE_PXN | PTE_UXN) +#define __PAGE_SHARED __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN | PTE_UXN | PTE_WRITE) +#define __PAGE_SHARED_EXEC __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN | PTE_WRITE) +#define __PAGE_COPY __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN | PTE_UXN) +#define __PAGE_COPY_EXEC __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN) +#define __PAGE_READONLY __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN | PTE_UXN) +#define __PAGE_READONLY_EXEC __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN) #endif /* __ASSEMBLY__ */ @@ -134,7 +134,7 @@ extern struct page *empty_zero_page; #define pte_dirty(pte) (pte_val(pte) & PTE_DIRTY) #define pte_young(pte) (pte_val(pte) & PTE_AF) #define pte_special(pte) (pte_val(pte) & PTE_SPECIAL) -#define pte_write(pte) (!(pte_val(pte) & PTE_RDONLY)) +#define pte_write(pte) (pte_val(pte) & PTE_WRITE) #define pte_exec(pte) (!(pte_val(pte) & PTE_UXN)) #define pte_valid_user(pte) \ @@ -142,13 +142,13 @@ extern struct page *empty_zero_page; static inline pte_t pte_wrprotect(pte_t pte) { - pte_val(pte) |= PTE_RDONLY; + pte_val(pte) &= ~PTE_WRITE; return pte; } static inline pte_t pte_mkwrite(pte_t pte) { - pte_val(pte) &= ~PTE_RDONLY; + pte_val(pte) |= PTE_WRITE; return pte; } @@ -195,8 +195,10 @@ static inline void set_pte_at(struct mm_struct *mm, unsigned long addr, if (pte_valid_user(pte)) { if (!pte_special(pte) && pte_exec(pte)) __sync_icache_dcache(pte, addr); - if (!pte_dirty(pte)) - pte = pte_wrprotect(pte); + if (pte_dirty(pte) && pte_write(pte)) + pte_val(pte) &= ~PTE_RDONLY; + else + pte_val(pte) |= PTE_RDONLY; } set_pte(ptep, pte); @@ -364,7 +366,7 @@ static inline pmd_t *pmd_offset(pud_t *pud, unsigned long addr) static inline pte_t pte_modify(pte_t pte, pgprot_t newprot) { const pteval_t mask = PTE_USER | PTE_PXN | PTE_UXN | PTE_RDONLY | - PTE_PROT_NONE | PTE_VALID; + PTE_PROT_NONE | PTE_VALID | PTE_WRITE; pte_val(pte) = (pte_val(pte) & ~mask) | (pgprot_val(newprot) & mask); return pte; } From 2457c2704c8b2bbd1caa254cb91ba787c8ec2a4d Mon Sep 17 00:00:00 2001 From: Steve Capper Date: Tue, 25 Feb 2014 11:38:53 +0000 Subject: [PATCH 0943/1368] arm64: mm: Add double logical invert to pte accessors Page table entries on ARM64 are 64 bits, and some pte functions such as pte_dirty return a bitwise-and of a flag with the pte value. If the flag to be tested resides in the upper 32 bits of the pte, then we run into the danger of the result being dropped if downcast. For example: gather_stats(page, md, pte_dirty(*pte), 1); where pte_dirty(*pte) is downcast to an int. This patch adds a double logical invert to all the pte_ accessors to ensure predictable downcasting. Signed-off-by: Steve Capper Cc: Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/pgtable.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index 7deb0105921d..154590ef6646 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -130,11 +130,11 @@ extern struct page *empty_zero_page; /* * The following only work if pte_present(). Undefined behaviour otherwise. */ -#define pte_present(pte) (pte_val(pte) & (PTE_VALID | PTE_PROT_NONE)) -#define pte_dirty(pte) (pte_val(pte) & PTE_DIRTY) -#define pte_young(pte) (pte_val(pte) & PTE_AF) -#define pte_special(pte) (pte_val(pte) & PTE_SPECIAL) -#define pte_write(pte) (pte_val(pte) & PTE_WRITE) +#define pte_present(pte) (!!(pte_val(pte) & (PTE_VALID | PTE_PROT_NONE))) +#define pte_dirty(pte) (!!(pte_val(pte) & PTE_DIRTY)) +#define pte_young(pte) (!!(pte_val(pte) & PTE_AF)) +#define pte_special(pte) (!!(pte_val(pte) & PTE_SPECIAL)) +#define pte_write(pte) (!!(pte_val(pte) & PTE_WRITE)) #define pte_exec(pte) (!(pte_val(pte) & PTE_UXN)) #define pte_valid_user(pte) \ From ae41944bea69a143f6e464f02784a9e1f5161e07 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Mon, 14 Apr 2014 18:25:05 +0100 Subject: [PATCH 0944/1368] arm64: Fix build for __PAGE_NONE define Simple typo. Signed-off-by: Mark Brown --- arch/arm64/include/asm/pgtable.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index 154590ef6646..b8940733ee30 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -77,7 +77,7 @@ extern pgprot_t pgprot_default; #define PAGE_KERNEL _MOD_PROT(pgprot_default, PTE_PXN | PTE_UXN | PTE_DIRTY | PTE_WRITE) #define PAGE_KERNEL_EXEC _MOD_PROT(pgprot_default, PTE_UXN | PTE_DIRTY | PTE_WRITE) -define __PAGE_NONE __pgprot(((_PAGE_DEFAULT) & ~PTE_TYPE_MASK) | PTE_PROT_NONE | PTE_PXN | PTE_UXN) +#define __PAGE_NONE __pgprot(((_PAGE_DEFAULT) & ~PTE_TYPE_MASK) | PTE_PROT_NONE | PTE_PXN | PTE_UXN) #define __PAGE_SHARED __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN | PTE_UXN | PTE_WRITE) #define __PAGE_SHARED_EXEC __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN | PTE_WRITE) #define __PAGE_COPY __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN | PTE_UXN) From f81685d08038409c7f46741372a7cbe7299d5f87 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Wed, 5 Jun 2013 12:40:34 +0100 Subject: [PATCH 0945/1368] arm64: mm: don't bother invalidating the icache in switch_mm We don't support software broadcast of cache maintenance operations, so this flush is not required (__sync_icache_dcache will always affect all CPUs). Signed-off-by: Will Deacon Signed-off-by: Catalin Marinas (cherry picked from commit 737c16dffc458e58ee7840556d43b874cb8e16a0) Signed-off-by: Mark Brown --- arch/arm64/include/asm/mmu_context.h | 6 ------ 1 file changed, 6 deletions(-) diff --git a/arch/arm64/include/asm/mmu_context.h b/arch/arm64/include/asm/mmu_context.h index e2bc385adb6b..a9eee33dfa62 100644 --- a/arch/arm64/include/asm/mmu_context.h +++ b/arch/arm64/include/asm/mmu_context.h @@ -151,12 +151,6 @@ switch_mm(struct mm_struct *prev, struct mm_struct *next, { unsigned int cpu = smp_processor_id(); -#ifdef CONFIG_SMP - /* check for possible thread migration */ - if (!cpumask_empty(mm_cpumask(next)) && - !cpumask_test_cpu(cpu, mm_cpumask(next))) - __flush_icache_all(); -#endif if (!cpumask_test_and_set_cpu(cpu, mm_cpumask(next)) || prev != next) check_and_switch_context(next, tsk); } From b1793a187423385f424f1d71264ca768dae79e6a Mon Sep 17 00:00:00 2001 From: Chen Gang Date: Tue, 21 May 2013 10:46:05 +0100 Subject: [PATCH 0946/1368] arm64: kernel: compiling issue, need delete read_current_timer() Under arm64, we will calibrate the delay loop statically using a known timer frequency, so delete read_current_timer(), or it will cause compiling issue with allmodconfig. The related error: ERROR: "read_current_timer" [lib/rbtree_test.ko] undefined! ERROR: "read_current_timer" [lib/interval_tree_test.ko] undefined! ERROR: "read_current_timer" [fs/ext4/ext4.ko] undefined! ERROR: "read_current_timer" [crypto/tcrypt.ko] undefined! Signed-off-by: Chen Gang Acked-by: Marc Zyngier Signed-off-by: Catalin Marinas (cherry picked from commit 6916b14ea140ff5c915895eefe9431888a39a84d) Signed-off-by: Mark Brown --- arch/arm64/include/asm/timex.h | 6 +++--- arch/arm64/kernel/time.c | 6 ------ 2 files changed, 3 insertions(+), 9 deletions(-) diff --git a/arch/arm64/include/asm/timex.h b/arch/arm64/include/asm/timex.h index b24a31a7e2c9..81a076eb37fa 100644 --- a/arch/arm64/include/asm/timex.h +++ b/arch/arm64/include/asm/timex.h @@ -16,14 +16,14 @@ #ifndef __ASM_TIMEX_H #define __ASM_TIMEX_H +#include + /* * Use the current timer as a cycle counter since this is what we use for * the delay loop. */ -#define get_cycles() ({ cycles_t c; read_current_timer(&c); c; }) +#define get_cycles() arch_counter_get_cntvct() #include -#define ARCH_HAS_READ_CURRENT_TIMER - #endif diff --git a/arch/arm64/kernel/time.c b/arch/arm64/kernel/time.c index a551f88ae2c1..03dc3718eb13 100644 --- a/arch/arm64/kernel/time.c +++ b/arch/arm64/kernel/time.c @@ -68,12 +68,6 @@ unsigned long long notrace sched_clock(void) return arch_timer_read_counter() * sched_clock_mult; } -int read_current_timer(unsigned long *timer_value) -{ - *timer_value = arch_timer_read_counter(); - return 0; -} - void __init time_init(void) { u32 arch_timer_rate; From 712bda65b9c4206dd4f0aed78474c7dfdde3697f Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Mon, 10 Jun 2013 19:34:41 +0100 Subject: [PATCH 0947/1368] arm64: pgtable: use pte_index instead of __pte_index pte_index is a useful helper outside of arch/arm64, for things like the ARM SMMU driver, so rename __pte_index to pte_index to be consistent with both arch/arm/ and also the definitions of pmd_index and pgd_index. Signed-off-by: Will Deacon Signed-off-by: Catalin Marinas (cherry picked from commit 9ab6d02fddc6831b166812956ff387d7112ff626) Signed-off-by: Mark Brown --- arch/arm64/include/asm/pgtable.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index b8940733ee30..508ee304fde0 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -120,7 +120,7 @@ extern struct page *empty_zero_page; #define pte_none(pte) (!pte_val(pte)) #define pte_clear(mm,addr,ptep) set_pte(ptep, __pte(0)) #define pte_page(pte) (pfn_to_page(pte_pfn(pte))) -#define pte_offset_kernel(dir,addr) (pmd_page_vaddr(*(dir)) + __pte_index(addr)) +#define pte_offset_kernel(dir,addr) (pmd_page_vaddr(*(dir)) + pte_index(addr)) #define pte_offset_map(dir,addr) pte_offset_kernel((dir), (addr)) #define pte_offset_map_nested(dir,addr) pte_offset_kernel((dir), (addr)) @@ -361,7 +361,7 @@ static inline pmd_t *pmd_offset(pud_t *pud, unsigned long addr) #endif /* Find an entry in the third-level page table.. */ -#define __pte_index(addr) (((addr) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1)) +#define pte_index(addr) (((addr) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1)) static inline pte_t pte_modify(pte_t pte, pgprot_t newprot) { From 179e79828c8daac4e902fb8716fbedc275befc34 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Mon, 10 Jun 2013 19:34:42 +0100 Subject: [PATCH 0948/1368] arm64: device: add iommu pointer to device archdata When using an IOMMU for device mappings, it is necessary to keep a pointer between the device and the IOMMU to which it is attached in order to obtain the correct IOMMU when attaching the device to a domain. This patch adds an iommu pointer to the dev_archdata structure, in a similar manner to other architectures (ARM, PowerPC, x86, ...). Signed-off-by: Will Deacon Signed-off-by: Catalin Marinas (cherry picked from commit 73150c983ac1f9b7653cfd3823b1ad4a44aad3bf) Signed-off-by: Mark Brown --- arch/arm64/include/asm/device.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/arm64/include/asm/device.h b/arch/arm64/include/asm/device.h index 0d8453c755a8..cf98b362094b 100644 --- a/arch/arm64/include/asm/device.h +++ b/arch/arm64/include/asm/device.h @@ -18,6 +18,9 @@ struct dev_archdata { struct dma_map_ops *dma_ops; +#ifdef CONFIG_IOMMU_API + void *iommu; /* private IOMMU data */ +#endif }; struct pdev_archdata { From dc1307d2947cef80a10f53f1aa136b72accec79f Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Wed, 8 May 2013 17:29:24 +0100 Subject: [PATCH 0949/1368] arm64: extable: sort the exception table at build time As is done for other architectures, sort the exception table at build-time rather than during boot. Since sortextable appears to be a standalone C program relying on the host elf.h to provide EM_AARCH64, I've had to add a conditional check in order to allow cross-compilation on machines that aren't running a bleeding-edge libc-dev. Signed-off-by: Will Deacon Signed-off-by: Catalin Marinas (cherry picked from commit adace89562c7a9645b8dc84f6e1ac7ba8756094e) Signed-off-by: Mark Brown --- arch/arm64/Kconfig | 1 + arch/arm64/kernel/vmlinux.lds.S | 10 +--------- scripts/sortextable.c | 5 +++++ 3 files changed, 7 insertions(+), 9 deletions(-) diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 308a55636f76..7f553338d109 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -7,6 +7,7 @@ config ARM64 select ARM_AMBA select ARM_ARCH_TIMER select ARM_GIC + select BUILDTIME_EXTABLE_SORT select CLONE_BACKWARDS select COMMON_CLK select GENERIC_CLOCKEVENTS diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S index 3fae2be8b016..5e06a1786e26 100644 --- a/arch/arm64/kernel/vmlinux.lds.S +++ b/arch/arm64/kernel/vmlinux.lds.S @@ -56,7 +56,7 @@ SECTIONS } RO_DATA(PAGE_SIZE) - + EXCEPTION_TABLE(8) _etext = .; /* End of text and rodata section */ . = ALIGN(PAGE_SIZE); @@ -98,14 +98,6 @@ SECTIONS CACHELINE_ALIGNED_DATA(64) READ_MOSTLY_DATA(64) - /* - * The exception fixup table (might need resorting at runtime) - */ - . = ALIGN(32); - __start___ex_table = .; - *(__ex_table) - __stop___ex_table = .; - /* * and the usual data section */ diff --git a/scripts/sortextable.c b/scripts/sortextable.c index 1f10e89d15b4..f9ce1160419b 100644 --- a/scripts/sortextable.c +++ b/scripts/sortextable.c @@ -31,6 +31,10 @@ #include #include +#ifndef EM_AARCH64 +#define EM_AARCH64 183 +#endif + static int fd_map; /* File descriptor for file being modified. */ static int mmap_failed; /* Boolean flag. */ static void *ehdr_curr; /* current ElfXX_Ehdr * for resource cleanup */ @@ -249,6 +253,7 @@ do_file(char const *const fname) custom_sort = sort_relative_table; break; case EM_ARM: + case EM_AARCH64: case EM_MIPS: break; } /* end switch */ From 9377422c5c84f693385907fd61d8e7fb19596530 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Sat, 16 Mar 2013 08:48:13 +0000 Subject: [PATCH 0950/1368] arm64: debug: consolidate software breakpoint handlers The software breakpoint handlers are hooked in directly from ptrace, which makes it difficult to add additional handlers for things like kprobes and kgdb. This patch moves the handling code into debug-monitors.c, where we can dispatch to different debug subsystems more easily. Signed-off-by: Will Deacon Signed-off-by: Catalin Marinas (cherry picked from commit 1442b6ed249d2b3d2cfcf45b65ac64393495c96c) Signed-off-by: Mark Brown --- arch/arm64/include/asm/debug-monitors.h | 9 ++++ arch/arm64/include/asm/ptrace.h | 2 - arch/arm64/kernel/debug-monitors.c | 66 ++++++++++++++++++++++++- arch/arm64/kernel/ptrace.c | 59 ---------------------- arch/arm64/kernel/traps.c | 5 +- 5 files changed, 75 insertions(+), 66 deletions(-) diff --git a/arch/arm64/include/asm/debug-monitors.h b/arch/arm64/include/asm/debug-monitors.h index 7eaa0b302493..ef8235c68c09 100644 --- a/arch/arm64/include/asm/debug-monitors.h +++ b/arch/arm64/include/asm/debug-monitors.h @@ -83,6 +83,15 @@ static inline int reinstall_suspended_bps(struct pt_regs *regs) } #endif +#ifdef CONFIG_COMPAT +int aarch32_break_handler(struct pt_regs *regs); +#else +static int aarch32_break_handler(struct pt_regs *regs) +{ + return -EFAULT; +} +#endif + #endif /* __ASSEMBLY */ #endif /* __KERNEL__ */ #endif /* __ASM_DEBUG_MONITORS_H */ diff --git a/arch/arm64/include/asm/ptrace.h b/arch/arm64/include/asm/ptrace.h index 41a71ee4c3df..0dacbbf9458b 100644 --- a/arch/arm64/include/asm/ptrace.h +++ b/arch/arm64/include/asm/ptrace.h @@ -171,7 +171,5 @@ extern unsigned long profile_pc(struct pt_regs *regs); #define profile_pc(regs) instruction_pointer(regs) #endif -extern int aarch32_break_trap(struct pt_regs *regs); - #endif /* __ASSEMBLY__ */ #endif diff --git a/arch/arm64/kernel/debug-monitors.c b/arch/arm64/kernel/debug-monitors.c index f4726dc054b3..08018e3df580 100644 --- a/arch/arm64/kernel/debug-monitors.c +++ b/arch/arm64/kernel/debug-monitors.c @@ -24,6 +24,7 @@ #include #include #include +#include #include #include @@ -226,13 +227,74 @@ static int single_step_handler(unsigned long addr, unsigned int esr, return 0; } -static int __init single_step_init(void) +static int brk_handler(unsigned long addr, unsigned int esr, + struct pt_regs *regs) +{ + siginfo_t info; + + if (!user_mode(regs)) + return -EFAULT; + + info = (siginfo_t) { + .si_signo = SIGTRAP, + .si_errno = 0, + .si_code = TRAP_BRKPT, + .si_addr = (void __user *)instruction_pointer(regs), + }; + + force_sig_info(SIGTRAP, &info, current); + return 0; +} + +int aarch32_break_handler(struct pt_regs *regs) +{ + siginfo_t info; + unsigned int instr; + bool bp = false; + void __user *pc = (void __user *)instruction_pointer(regs); + + if (!compat_user_mode(regs)) + return -EFAULT; + + if (compat_thumb_mode(regs)) { + /* get 16-bit Thumb instruction */ + get_user(instr, (u16 __user *)pc); + if (instr == AARCH32_BREAK_THUMB2_LO) { + /* get second half of 32-bit Thumb-2 instruction */ + get_user(instr, (u16 __user *)(pc + 2)); + bp = instr == AARCH32_BREAK_THUMB2_HI; + } else { + bp = instr == AARCH32_BREAK_THUMB; + } + } else { + /* 32-bit ARM instruction */ + get_user(instr, (u32 __user *)pc); + bp = (instr & ~0xf0000000) == AARCH32_BREAK_ARM; + } + + if (!bp) + return -EFAULT; + + info = (siginfo_t) { + .si_signo = SIGTRAP, + .si_errno = 0, + .si_code = TRAP_BRKPT, + .si_addr = pc, + }; + + force_sig_info(SIGTRAP, &info, current); + return 0; +} + +static int __init debug_traps_init(void) { hook_debug_fault_code(DBG_ESR_EVT_HWSS, single_step_handler, SIGTRAP, TRAP_HWBKPT, "single-step handler"); + hook_debug_fault_code(DBG_ESR_EVT_BRK, brk_handler, SIGTRAP, + TRAP_BRKPT, "ptrace BRK handler"); return 0; } -arch_initcall(single_step_init); +arch_initcall(debug_traps_init); /* Re-enable single step for syscall restarting. */ void user_rewind_single_step(struct task_struct *task) diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c index 5341534b6d04..c484d5625ffb 100644 --- a/arch/arm64/kernel/ptrace.c +++ b/arch/arm64/kernel/ptrace.c @@ -53,28 +53,6 @@ void ptrace_disable(struct task_struct *child) { } -/* - * Handle hitting a breakpoint. - */ -static int ptrace_break(struct pt_regs *regs) -{ - siginfo_t info = { - .si_signo = SIGTRAP, - .si_errno = 0, - .si_code = TRAP_BRKPT, - .si_addr = (void __user *)instruction_pointer(regs), - }; - - force_sig_info(SIGTRAP, &info, current); - return 0; -} - -static int arm64_break_trap(unsigned long addr, unsigned int esr, - struct pt_regs *regs) -{ - return ptrace_break(regs); -} - #ifdef CONFIG_HAVE_HW_BREAKPOINT /* * Handle hitting a HW-breakpoint. @@ -815,33 +793,6 @@ static const struct user_regset_view user_aarch32_view = { .regsets = aarch32_regsets, .n = ARRAY_SIZE(aarch32_regsets) }; -int aarch32_break_trap(struct pt_regs *regs) -{ - unsigned int instr; - bool bp = false; - void __user *pc = (void __user *)instruction_pointer(regs); - - if (compat_thumb_mode(regs)) { - /* get 16-bit Thumb instruction */ - get_user(instr, (u16 __user *)pc); - if (instr == AARCH32_BREAK_THUMB2_LO) { - /* get second half of 32-bit Thumb-2 instruction */ - get_user(instr, (u16 __user *)(pc + 2)); - bp = instr == AARCH32_BREAK_THUMB2_HI; - } else { - bp = instr == AARCH32_BREAK_THUMB; - } - } else { - /* 32-bit ARM instruction */ - get_user(instr, (u32 __user *)pc); - bp = (instr & ~0xf0000000) == AARCH32_BREAK_ARM; - } - - if (bp) - return ptrace_break(regs); - return 1; -} - static int compat_ptrace_read_user(struct task_struct *tsk, compat_ulong_t off, compat_ulong_t __user *ret) { @@ -1109,16 +1060,6 @@ long arch_ptrace(struct task_struct *child, long request, return ptrace_request(child, request, addr, data); } - -static int __init ptrace_break_init(void) -{ - hook_debug_fault_code(DBG_ESR_EVT_BRK, arm64_break_trap, SIGTRAP, - TRAP_BRKPT, "ptrace BRK handler"); - return 0; -} -core_initcall(ptrace_break_init); - - asmlinkage int syscall_trace(int dir, struct pt_regs *regs) { unsigned long saved_reg; diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c index f30852d28590..7ffadddb645d 100644 --- a/arch/arm64/kernel/traps.c +++ b/arch/arm64/kernel/traps.c @@ -32,6 +32,7 @@ #include #include +#include #include #include #include @@ -261,11 +262,9 @@ asmlinkage void __exception do_undefinstr(struct pt_regs *regs) siginfo_t info; void __user *pc = (void __user *)instruction_pointer(regs); -#ifdef CONFIG_COMPAT /* check for AArch32 breakpoint instructions */ - if (compat_user_mode(regs) && aarch32_break_trap(regs) == 0) + if (!aarch32_break_handler(regs)) return; -#endif if (show_unhandled_signals && unhandled_signal(current, SIGILL) && printk_ratelimit()) { From a8ed08a5daf8292f9feacf9aaea2ec49def9be5f Mon Sep 17 00:00:00 2001 From: Kyle McMartin Date: Sun, 16 Jun 2013 20:32:44 +0100 Subject: [PATCH 0951/1368] arm64/Makefile: provide vdso_install target Provide a vdso_install target in the arm64 Makefile, as other architectures with a vdso do. Signed-off-by: Kyle McMartin Acked-by: Will Deacon Signed-off-by: Catalin Marinas (cherry picked from commit 3c01742a8ac93a3abf9b099758db970410427afd) Signed-off-by: Mark Brown --- arch/arm64/Makefile | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile index c95c5cb212fd..b6ccf8a36e2d 100644 --- a/arch/arm64/Makefile +++ b/arch/arm64/Makefile @@ -60,6 +60,10 @@ zinstall install: vmlinux dtbs: scripts $(Q)$(MAKE) $(build)=$(boot)/dts dtbs +PHONY += vdso_install +vdso_install: + $(Q)$(MAKE) $(build)=arch/arm64/kernel/vdso $@ + # We use MRPROPER_FILES and CLEAN_FILES now archclean: $(Q)$(MAKE) $(clean)=$(boot) From a9439270f923281fae2213f13963ee64e767bafe Mon Sep 17 00:00:00 2001 From: Vinayak Kale Date: Wed, 24 Apr 2013 10:06:57 +0100 Subject: [PATCH 0952/1368] arm64: Add Kconfig option for APM X-Gene SOC family This patch adds arm64/Kconfig option for APM X-Gene SOC family. Signed-off-by: Kumar Sankaran Signed-off-by: Loc Ho Signed-off-by: Feng Kan Signed-off-by: Catalin Marinas (cherry picked from commit 159428538323188158a6058956c16c199909d844) Signed-off-by: Mark Brown --- arch/arm64/Kconfig | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 7f553338d109..52077610cb7e 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -112,6 +112,11 @@ config ARCH_VEXPRESS This enables support for the ARMv8 software model (Versatile Express). +config ARCH_XGENE + bool "AppliedMicro X-Gene SOC Family" + help + This enables support for AppliedMicro X-Gene SOC Family + endmenu menu "Bus support" @@ -149,6 +154,8 @@ config NR_CPUS int "Maximum number of CPUs (2-32)" range 2 32 depends on SMP + # These have to remain sorted largest to smallest + default "8" if ARCH_XGENE default "4" source kernel/Kconfig.preempt From 4e7b7c9c2a3a9779156890cc69d32731948c1106 Mon Sep 17 00:00:00 2001 From: Vinayak Kale Date: Wed, 24 Apr 2013 10:06:58 +0100 Subject: [PATCH 0953/1368] arm64: Enable APM X-Gene SOC family in the defconfig This patch enables APM X-Gene SOC family in the defconfig. It also enables 8250 serial driver needed by X-Gene SOC family. Signed-off-by: Kumar Sankaran Signed-off-by: Loc Ho Signed-off-by: Feng Kan Signed-off-by: Catalin Marinas (cherry picked from commit c1db16dc9e74addba4ef8c954702f13e457f0c7e) Signed-off-by: Mark Brown --- arch/arm64/configs/defconfig | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/arm64/configs/defconfig b/arch/arm64/configs/defconfig index 8d9696adb440..5b3e83217b03 100644 --- a/arch/arm64/configs/defconfig +++ b/arch/arm64/configs/defconfig @@ -24,6 +24,7 @@ CONFIG_MODULE_UNLOAD=y # CONFIG_BLK_DEV_BSG is not set # CONFIG_IOSCHED_DEADLINE is not set CONFIG_ARCH_VEXPRESS=y +CONFIG_ARCH_XGENE=y CONFIG_SMP=y CONFIG_PREEMPT_VOLUNTARY=y CONFIG_CMDLINE="console=ttyAMA0" @@ -54,6 +55,9 @@ CONFIG_INPUT_EVDEV=y # CONFIG_SERIO_I8042 is not set # CONFIG_SERIO_SERPORT is not set CONFIG_LEGACY_PTY_COUNT=16 +CONFIG_SERIAL_8250=y +CONFIG_SERIAL_8250_CONSOLE=y +CONFIG_SERIAL_OF_PLATFORM=y CONFIG_SERIAL_AMBA_PL011=y CONFIG_SERIAL_AMBA_PL011_CONSOLE=y # CONFIG_HW_RANDOM is not set From 63442c002f0090d38ba036c63b9e4f4595d46ccc Mon Sep 17 00:00:00 2001 From: Vinayak Kale Date: Wed, 24 Apr 2013 10:06:59 +0100 Subject: [PATCH 0954/1368] arm64: Add defines for APM ARMv8 implementation This patch adds defines for APM CPU implementer ID and APM CPU part numbers in asm/cputype.h Signed-off-by: Kumar Sankaran Signed-off-by: Loc Ho Signed-off-by: Feng Kan Signed-off-by: Catalin Marinas (cherry picked from commit 4ad637a452d5683ca7ff9e9eb994ac4b7a517073) Signed-off-by: Mark Brown --- arch/arm64/include/asm/cputype.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/arm64/include/asm/cputype.h b/arch/arm64/include/asm/cputype.h index cf2749488cd4..5fe138e0b828 100644 --- a/arch/arm64/include/asm/cputype.h +++ b/arch/arm64/include/asm/cputype.h @@ -37,11 +37,14 @@ }) #define ARM_CPU_IMP_ARM 0x41 +#define ARM_CPU_IMP_APM 0x50 #define ARM_CPU_PART_AEM_V8 0xD0F0 #define ARM_CPU_PART_FOUNDATION 0xD000 #define ARM_CPU_PART_CORTEX_A57 0xD070 +#define APM_CPU_PART_POTENZA 0x0000 + #ifndef __ASSEMBLY__ /* From eadf099dfab2335c8f975a8242b53fd5be4f890f Mon Sep 17 00:00:00 2001 From: Vinayak Kale Date: Wed, 24 Apr 2013 10:07:00 +0100 Subject: [PATCH 0955/1368] arm64: Add initial DTS for APM X-Gene Storm SOC and APM Mustang board This patch adds initial DTS files required for APM Mustang board. Signed-off-by: Kumar Sankaran Signed-off-by: Loc Ho Signed-off-by: Feng Kan Signed-off-by: Catalin Marinas (cherry picked from commit ee877b5321c4dfee9dc9f2a12b19ddcd33149f6a) Signed-off-by: Mark Brown --- arch/arm64/boot/dts/Makefile | 1 + arch/arm64/boot/dts/apm-mustang.dts | 26 +++++++ arch/arm64/boot/dts/apm-storm.dtsi | 116 ++++++++++++++++++++++++++++ 3 files changed, 143 insertions(+) create mode 100644 arch/arm64/boot/dts/apm-mustang.dts create mode 100644 arch/arm64/boot/dts/apm-storm.dtsi diff --git a/arch/arm64/boot/dts/Makefile b/arch/arm64/boot/dts/Makefile index 68457e9e0975..c52bdb051f66 100644 --- a/arch/arm64/boot/dts/Makefile +++ b/arch/arm64/boot/dts/Makefile @@ -1,4 +1,5 @@ dtb-$(CONFIG_ARCH_VEXPRESS) += rtsm_ve-aemv8a.dtb foundation-v8.dtb +dtb-$(CONFIG_ARCH_XGENE) += apm-mustang.dtb targets += dtbs targets += $(dtb-y) diff --git a/arch/arm64/boot/dts/apm-mustang.dts b/arch/arm64/boot/dts/apm-mustang.dts new file mode 100644 index 000000000000..1247ca1200b1 --- /dev/null +++ b/arch/arm64/boot/dts/apm-mustang.dts @@ -0,0 +1,26 @@ +/* + * dts file for AppliedMicro (APM) Mustang Board + * + * Copyright (C) 2013, Applied Micro Circuits Corporation + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of + * the License, or (at your option) any later version. + */ + +/dts-v1/; + +/include/ "apm-storm.dtsi" + +/ { + model = "APM X-Gene Mustang board"; + compatible = "apm,mustang", "apm,xgene-storm"; + + chosen { }; + + memory { + device_type = "memory"; + reg = < 0x1 0x00000000 0x0 0x80000000 >; /* Updated by bootloader */ + }; +}; diff --git a/arch/arm64/boot/dts/apm-storm.dtsi b/arch/arm64/boot/dts/apm-storm.dtsi new file mode 100644 index 000000000000..bfdc57834929 --- /dev/null +++ b/arch/arm64/boot/dts/apm-storm.dtsi @@ -0,0 +1,116 @@ +/* + * dts file for AppliedMicro (APM) X-Gene Storm SOC + * + * Copyright (C) 2013, Applied Micro Circuits Corporation + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of + * the License, or (at your option) any later version. + */ + +/ { + compatible = "apm,xgene-storm"; + interrupt-parent = <&gic>; + #address-cells = <2>; + #size-cells = <2>; + + cpus { + #address-cells = <2>; + #size-cells = <0>; + + cpu@000 { + device_type = "cpu"; + compatible = "apm,potenza", "arm,armv8"; + reg = <0x0 0x000>; + enable-method = "spin-table"; + cpu-release-addr = <0x1 0x0000fff8>; + }; + cpu@001 { + device_type = "cpu"; + compatible = "apm,potenza", "arm,armv8"; + reg = <0x0 0x001>; + enable-method = "spin-table"; + cpu-release-addr = <0x1 0x0000fff8>; + }; + cpu@100 { + device_type = "cpu"; + compatible = "apm,potenza", "arm,armv8"; + reg = <0x0 0x100>; + enable-method = "spin-table"; + cpu-release-addr = <0x1 0x0000fff8>; + }; + cpu@101 { + device_type = "cpu"; + compatible = "apm,potenza", "arm,armv8"; + reg = <0x0 0x101>; + enable-method = "spin-table"; + cpu-release-addr = <0x1 0x0000fff8>; + }; + cpu@200 { + device_type = "cpu"; + compatible = "apm,potenza", "arm,armv8"; + reg = <0x0 0x200>; + enable-method = "spin-table"; + cpu-release-addr = <0x1 0x0000fff8>; + }; + cpu@201 { + device_type = "cpu"; + compatible = "apm,potenza", "arm,armv8"; + reg = <0x0 0x201>; + enable-method = "spin-table"; + cpu-release-addr = <0x1 0x0000fff8>; + }; + cpu@300 { + device_type = "cpu"; + compatible = "apm,potenza", "arm,armv8"; + reg = <0x0 0x300>; + enable-method = "spin-table"; + cpu-release-addr = <0x1 0x0000fff8>; + }; + cpu@301 { + device_type = "cpu"; + compatible = "apm,potenza", "arm,armv8"; + reg = <0x0 0x301>; + enable-method = "spin-table"; + cpu-release-addr = <0x1 0x0000fff8>; + }; + }; + + gic: interrupt-controller@78010000 { + compatible = "arm,cortex-a15-gic"; + #interrupt-cells = <3>; + interrupt-controller; + reg = <0x0 0x78010000 0x0 0x1000>, /* GIC Dist */ + <0x0 0x78020000 0x0 0x1000>, /* GIC CPU */ + <0x0 0x78040000 0x0 0x2000>, /* GIC VCPU Control */ + <0x0 0x78060000 0x0 0x2000>; /* GIC VCPU */ + interrupts = <1 9 0xf04>; /* GIC Maintenence IRQ */ + }; + + timer { + compatible = "arm,armv8-timer"; + interrupts = <1 0 0xff01>, /* Secure Phys IRQ */ + <1 13 0xff01>, /* Non-secure Phys IRQ */ + <1 14 0xff01>, /* Virt IRQ */ + <1 15 0xff01>; /* Hyp IRQ */ + clock-frequency = <50000000>; + }; + + soc { + compatible = "simple-bus"; + #address-cells = <2>; + #size-cells = <2>; + ranges; + + serial0: serial@1c020000 { + device_type = "serial"; + compatible = "ns16550"; + reg = <0 0x1c020000 0x0 0x1000>; + reg-shift = <2>; + clock-frequency = <10000000>; /* Updated by bootloader */ + interrupt-parent = <&gic>; + interrupts = <0x0 0x4c 0x4>; + }; + }; +}; From c0d0b7ddd13cad209c8211a4da862c9e56096493 Mon Sep 17 00:00:00 2001 From: Chen Gang Date: Mon, 24 Jun 2013 10:27:49 +0100 Subject: [PATCH 0956/1368] arm64: add '#ifdef CONFIG_COMPAT' for aarch32_break_handler() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If 'COMPAT' not defined, aarch32_break_handler() cannot pass compiling, and it can work independent with 'COMPAT', so remove dummy definition. The related error: arch/arm64/kernel/debug-monitors.c:249:5: error: redefinition of ‘aarch32_break_handler’ In file included from arch/arm64/kernel/debug-monitors.c:29:0: /root/linux-next/arch/arm64/include/asm/debug-monitors.h:89:12: note: previous definition of ‘aarch32_break_handler’ was here Signed-off-by: Chen Gang Acked-by: Will Deacon Signed-off-by: Catalin Marinas (cherry picked from commit c783c2815e13bbb0c0b99997cc240bd7e91b6bb8) Signed-off-by: Mark Brown --- arch/arm64/include/asm/debug-monitors.h | 7 ------- 1 file changed, 7 deletions(-) diff --git a/arch/arm64/include/asm/debug-monitors.h b/arch/arm64/include/asm/debug-monitors.h index ef8235c68c09..a2232d07be9d 100644 --- a/arch/arm64/include/asm/debug-monitors.h +++ b/arch/arm64/include/asm/debug-monitors.h @@ -83,14 +83,7 @@ static inline int reinstall_suspended_bps(struct pt_regs *regs) } #endif -#ifdef CONFIG_COMPAT int aarch32_break_handler(struct pt_regs *regs); -#else -static int aarch32_break_handler(struct pt_regs *regs) -{ - return -EFAULT; -} -#endif #endif /* __ASSEMBLY */ #endif /* __KERNEL__ */ From 9a87b8d3cd27f298d18b2f1812031dae7520fd74 Mon Sep 17 00:00:00 2001 From: Santosh Shilimkar Date: Mon, 1 Jul 2013 14:20:35 -0400 Subject: [PATCH 0957/1368] of: Specify initrd location using 64-bit On some PAE architectures, the entire range of physical memory could reside outside the 32-bit limit. These systems need the ability to specify the initrd location using 64-bit numbers. This patch globally modifies the early_init_dt_setup_initrd_arch() function to use 64-bit numbers instead of the current unsigned long. There has been quite a bit of debate about whether to use u64 or phys_addr_t. It was concluded to stick to u64 to be consistent with rest of the device tree code. As summarized by Geert, "The address to load the initrd is decided by the bootloader/user and set at that point later in time. The dtb should not be tied to the kernel you are booting" More details on the discussion can be found here: https://lkml.org/lkml/2013/6/20/690 https://lkml.org/lkml/2012/9/13/544 Signed-off-by: Santosh Shilimkar Acked-by: Rob Herring Acked-by: Vineet Gupta Acked-by: Jean-Christophe PLAGNIOL-VILLARD Signed-off-by: Grant Likely (cherry picked from commit 374d5c9964c10373ba39bbe934f4262eb87d7114) Signed-off-by: Mark Brown --- arch/arc/mm/init.c | 5 ++--- arch/arm/mm/init.c | 2 +- arch/arm64/mm/init.c | 3 +-- arch/c6x/kernel/devicetree.c | 3 +-- arch/metag/mm/init.c | 5 ++--- arch/microblaze/kernel/prom.c | 3 +-- arch/mips/kernel/prom.c | 3 +-- arch/openrisc/kernel/prom.c | 3 +-- arch/powerpc/kernel/prom.c | 3 +-- arch/x86/kernel/devicetree.c | 3 +-- arch/xtensa/kernel/setup.c | 3 +-- drivers/of/fdt.c | 10 ++++++---- include/linux/of_fdt.h | 3 +-- 13 files changed, 20 insertions(+), 29 deletions(-) diff --git a/arch/arc/mm/init.c b/arch/arc/mm/init.c index 4a177365b2c4..7991e08d606b 100644 --- a/arch/arc/mm/init.c +++ b/arch/arc/mm/init.c @@ -157,9 +157,8 @@ void __init free_initrd_mem(unsigned long start, unsigned long end) #endif #ifdef CONFIG_OF_FLATTREE -void __init early_init_dt_setup_initrd_arch(unsigned long start, - unsigned long end) +void __init early_init_dt_setup_initrd_arch(u64 start, u64 end) { - pr_err("%s(%lx, %lx)\n", __func__, start, end); + pr_err("%s(%llx, %llx)\n", __func__, start, end); } #endif /* CONFIG_OF_FLATTREE */ diff --git a/arch/arm/mm/init.c b/arch/arm/mm/init.c index 9a5cdc01fcdf..afeaef7a8ffc 100644 --- a/arch/arm/mm/init.c +++ b/arch/arm/mm/init.c @@ -76,7 +76,7 @@ static int __init parse_tag_initrd2(const struct tag *tag) __tagtable(ATAG_INITRD2, parse_tag_initrd2); #ifdef CONFIG_OF_FLATTREE -void __init early_init_dt_setup_initrd_arch(unsigned long start, unsigned long end) +void __init early_init_dt_setup_initrd_arch(u64 start, u64 end) { phys_initrd_start = start; phys_initrd_size = end - start; diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index f497ca77925a..704770891d06 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -44,8 +44,7 @@ static unsigned long phys_initrd_size __initdata = 0; phys_addr_t memstart_addr __read_mostly = 0; -void __init early_init_dt_setup_initrd_arch(unsigned long start, - unsigned long end) +void __init early_init_dt_setup_initrd_arch(u64 start, u64 end) { phys_initrd_start = start; phys_initrd_size = end - start; diff --git a/arch/c6x/kernel/devicetree.c b/arch/c6x/kernel/devicetree.c index bdb56f09d0ac..287d0e64dfba 100644 --- a/arch/c6x/kernel/devicetree.c +++ b/arch/c6x/kernel/devicetree.c @@ -33,8 +33,7 @@ void __init early_init_devtree(void *params) #ifdef CONFIG_BLK_DEV_INITRD -void __init early_init_dt_setup_initrd_arch(unsigned long start, - unsigned long end) +void __init early_init_dt_setup_initrd_arch(u64 start, u64 end) { initrd_start = (unsigned long)__va(start); initrd_end = (unsigned long)__va(end); diff --git a/arch/metag/mm/init.c b/arch/metag/mm/init.c index d05b8455c44c..bdc48111f0df 100644 --- a/arch/metag/mm/init.c +++ b/arch/metag/mm/init.c @@ -419,10 +419,9 @@ void free_initrd_mem(unsigned long start, unsigned long end) #endif #ifdef CONFIG_OF_FLATTREE -void __init early_init_dt_setup_initrd_arch(unsigned long start, - unsigned long end) +void __init early_init_dt_setup_initrd_arch(u64 start, u64 end) { - pr_err("%s(%lx, %lx)\n", + pr_err("%s(%llx, %llx)\n", __func__, start, end); } #endif /* CONFIG_OF_FLATTREE */ diff --git a/arch/microblaze/kernel/prom.c b/arch/microblaze/kernel/prom.c index 0a2c68f9f9b0..62e2e8f2c5d6 100644 --- a/arch/microblaze/kernel/prom.c +++ b/arch/microblaze/kernel/prom.c @@ -136,8 +136,7 @@ void __init early_init_devtree(void *params) } #ifdef CONFIG_BLK_DEV_INITRD -void __init early_init_dt_setup_initrd_arch(unsigned long start, - unsigned long end) +void __init early_init_dt_setup_initrd_arch(u64 start, u64 end) { initrd_start = (unsigned long)__va(start); initrd_end = (unsigned long)__va(end); diff --git a/arch/mips/kernel/prom.c b/arch/mips/kernel/prom.c index 5712bb532245..32b87882ac87 100644 --- a/arch/mips/kernel/prom.c +++ b/arch/mips/kernel/prom.c @@ -58,8 +58,7 @@ void * __init early_init_dt_alloc_memory_arch(u64 size, u64 align) } #ifdef CONFIG_BLK_DEV_INITRD -void __init early_init_dt_setup_initrd_arch(unsigned long start, - unsigned long end) +void __init early_init_dt_setup_initrd_arch(u64 start, u64 end) { initrd_start = (unsigned long)__va(start); initrd_end = (unsigned long)__va(end); diff --git a/arch/openrisc/kernel/prom.c b/arch/openrisc/kernel/prom.c index 5869e3fa5dd3..150215a91711 100644 --- a/arch/openrisc/kernel/prom.c +++ b/arch/openrisc/kernel/prom.c @@ -96,8 +96,7 @@ void __init early_init_devtree(void *params) } #ifdef CONFIG_BLK_DEV_INITRD -void __init early_init_dt_setup_initrd_arch(unsigned long start, - unsigned long end) +void __init early_init_dt_setup_initrd_arch(u64 start, u64 end) { initrd_start = (unsigned long)__va(start); initrd_end = (unsigned long)__va(end); diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c index 8b6f7a99cce2..2f3e25225158 100644 --- a/arch/powerpc/kernel/prom.c +++ b/arch/powerpc/kernel/prom.c @@ -550,8 +550,7 @@ void * __init early_init_dt_alloc_memory_arch(u64 size, u64 align) } #ifdef CONFIG_BLK_DEV_INITRD -void __init early_init_dt_setup_initrd_arch(unsigned long start, - unsigned long end) +void __init early_init_dt_setup_initrd_arch(u64 start, u64 end) { initrd_start = (unsigned long)__va(start); initrd_end = (unsigned long)__va(end); diff --git a/arch/x86/kernel/devicetree.c b/arch/x86/kernel/devicetree.c index b1581527a236..2fbad6b9f23c 100644 --- a/arch/x86/kernel/devicetree.c +++ b/arch/x86/kernel/devicetree.c @@ -52,8 +52,7 @@ void * __init early_init_dt_alloc_memory_arch(u64 size, u64 align) } #ifdef CONFIG_BLK_DEV_INITRD -void __init early_init_dt_setup_initrd_arch(unsigned long start, - unsigned long end) +void __init early_init_dt_setup_initrd_arch(u64 start, u64 end) { initrd_start = (unsigned long)__va(start); initrd_end = (unsigned long)__va(end); diff --git a/arch/xtensa/kernel/setup.c b/arch/xtensa/kernel/setup.c index 6dd25ecde3f5..d45e602f4328 100644 --- a/arch/xtensa/kernel/setup.c +++ b/arch/xtensa/kernel/setup.c @@ -170,8 +170,7 @@ static int __init parse_tag_fdt(const bp_tag_t *tag) __tagtable(BP_TAG_FDT, parse_tag_fdt); -void __init early_init_dt_setup_initrd_arch(unsigned long start, - unsigned long end) +void __init early_init_dt_setup_initrd_arch(u64 start, u64 end) { initrd_start = (void *)__va(start); initrd_end = (void *)__va(end); diff --git a/drivers/of/fdt.c b/drivers/of/fdt.c index 808be06bb67e..21123b8ee4d4 100644 --- a/drivers/of/fdt.c +++ b/drivers/of/fdt.c @@ -550,7 +550,8 @@ int __init of_flat_dt_match(unsigned long node, const char *const *compat) */ void __init early_init_dt_check_for_initrd(unsigned long node) { - unsigned long start, end, len; + u64 start, end; + unsigned long len; __be32 *prop; pr_debug("Looking for initrd properties... "); @@ -558,15 +559,16 @@ void __init early_init_dt_check_for_initrd(unsigned long node) prop = of_get_flat_dt_prop(node, "linux,initrd-start", &len); if (!prop) return; - start = of_read_ulong(prop, len/4); + start = of_read_number(prop, len/4); prop = of_get_flat_dt_prop(node, "linux,initrd-end", &len); if (!prop) return; - end = of_read_ulong(prop, len/4); + end = of_read_number(prop, len/4); early_init_dt_setup_initrd_arch(start, end); - pr_debug("initrd_start=0x%lx initrd_end=0x%lx\n", start, end); + pr_debug("initrd_start=0x%llx initrd_end=0x%llx\n", + (unsigned long long)start, (unsigned long long)end); } #else inline void early_init_dt_check_for_initrd(unsigned long node) diff --git a/include/linux/of_fdt.h b/include/linux/of_fdt.h index ed136ad698ce..4a17939b95cc 100644 --- a/include/linux/of_fdt.h +++ b/include/linux/of_fdt.h @@ -106,8 +106,7 @@ extern u64 dt_mem_next_cell(int s, __be32 **cellp); * physical addresses. */ #ifdef CONFIG_BLK_DEV_INITRD -extern void early_init_dt_setup_initrd_arch(unsigned long start, - unsigned long end); +extern void early_init_dt_setup_initrd_arch(u64 start, u64 end); #endif /* Early flat tree scan hooks */ From 52e83d9043b1699156b3a5f50356c23026ef17ad Mon Sep 17 00:00:00 2001 From: Chen Gang Date: Wed, 21 Aug 2013 10:50:17 +0100 Subject: [PATCH 0958/1368] ARM64: include: asm: include "asm/types.h" in "pgtable-2level-types.h" and "pgtable-3level-types.h" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Need include "asm/types.h", just like arm has done, or can not pass compiling, the related error: In file included from arch/arm64/include/asm/page.h:37:0, from drivers/staging/lustre/include/linux/lnet/linux/lib-lnet.h:42, from drivers/staging/lustre/include/linux/lnet/lib-lnet.h:44, from drivers/staging/lustre/lnet/lnet/api-ni.c:38: arch/arm64/include/asm/pgtable-2level-types.h:19:1: error: unknown type name ‘u64 arch/arm64/include/asm/pgtable-2level-types.h:20:1: error: unknown type name ‘u64’ Signed-off-by: Chen Gang Signed-off-by: Catalin Marinas (cherry picked from commit 360b35a874f130305715b1b854b67dc40826fc91) Signed-off-by: Mark Brown --- arch/arm64/include/asm/pgtable-2level-types.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/arm64/include/asm/pgtable-2level-types.h b/arch/arm64/include/asm/pgtable-2level-types.h index 3c3ca7d361e4..5f101e63dfc1 100644 --- a/arch/arm64/include/asm/pgtable-2level-types.h +++ b/arch/arm64/include/asm/pgtable-2level-types.h @@ -16,6 +16,8 @@ #ifndef __ASM_PGTABLE_2LEVEL_TYPES_H #define __ASM_PGTABLE_2LEVEL_TYPES_H +#include + typedef u64 pteval_t; typedef u64 pgdval_t; typedef pgdval_t pmdval_t; From 3033aae67ae55a86e2a0b73199984ff060effa7b Mon Sep 17 00:00:00 2001 From: Roy Franz Date: Thu, 15 Aug 2013 00:10:00 +0100 Subject: [PATCH 0959/1368] arm64: Expand arm64 image header Expand the arm64 image header to allow for co-existance with PE/COFF header required by the EFI stub. The PE/COFF format requires the "MZ" header to be at offset 0, and the offset to the PE/COFF header to be at offset 0x3c. The image header is expanded to allow 2 instructions at the beginning to accommodate a benign intruction at offset 0 that includes the "MZ" header, a magic number, and the offset to the PE/COFF header. Signed-off-by: Roy Franz Signed-off-by: Catalin Marinas (cherry picked from commit 4370eec05a887b0cd4392cd5dc5b2713174745c0) Signed-off-by: Mark Brown --- Documentation/arm64/booting.txt | 16 +++++++++++++--- arch/arm64/kernel/head.S | 8 ++++++++ 2 files changed, 21 insertions(+), 3 deletions(-) diff --git a/Documentation/arm64/booting.txt b/Documentation/arm64/booting.txt index 9c4d388daddc..5273c4d60e65 100644 --- a/Documentation/arm64/booting.txt +++ b/Documentation/arm64/booting.txt @@ -68,13 +68,23 @@ Image target is available instead. Requirement: MANDATORY -The decompressed kernel image contains a 32-byte header as follows: +The decompressed kernel image contains a 64-byte header as follows: - u32 magic = 0x14000008; /* branch to stext, little-endian */ - u32 res0 = 0; /* reserved */ + u32 code0; /* Executable code */ + u32 code1; /* Executable code */ u64 text_offset; /* Image load offset */ + u64 res0 = 0; /* reserved */ u64 res1 = 0; /* reserved */ u64 res2 = 0; /* reserved */ + u64 res3 = 0; /* reserved */ + u64 res4 = 0; /* reserved */ + u32 magic = 0x644d5241; /* Magic number, little endian, "ARM\x64" */ + u32 res5 = 0; /* reserved */ + + +Header notes: + +- code0/code1 are responsible for branching to stext. The image must be placed at the specified offset (currently 0x80000) from the start of the system RAM and called there. The start of the diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S index 53dcae49e729..7090c126797c 100644 --- a/arch/arm64/kernel/head.S +++ b/arch/arm64/kernel/head.S @@ -112,6 +112,14 @@ .quad TEXT_OFFSET // Image load offset from start of RAM .quad 0 // reserved .quad 0 // reserved + .quad 0 // reserved + .quad 0 // reserved + .quad 0 // reserved + .byte 0x41 // Magic number, "ARM\x64" + .byte 0x52 + .byte 0x4d + .byte 0x64 + .word 0 // reserved ENTRY(stext) mov x21, x0 // x21=FDT From 0195e1c67cd4b47ed4c2dbd0a94e69543121e0f0 Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Thu, 22 Aug 2013 11:47:37 +0100 Subject: [PATCH 0960/1368] arm64: Enable interrupts in the EL0 undef handler do_undefinstr() has to be called with interrupts disabled since it may read the instruction from the user address space which could lead to a data abort and subsequent might_sleep() warning in do_page_fault(). Signed-off-by: Catalin Marinas (cherry picked from commit 2600e130b3c90c8d6c13229d3d3a14dcb898a87b) Signed-off-by: Mark Brown --- arch/arm64/kernel/entry.S | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index 6ad781b21c08..57640df9d70c 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -476,6 +476,8 @@ el0_undef: * Undefined instruction */ mov x0, sp + // enable interrupts before calling the main handler + enable_irq b do_undefinstr el0_dbg: /* From 60d914ababfa53c35bf92b6be1921f265e535ab6 Mon Sep 17 00:00:00 2001 From: Mark Salter Date: Fri, 23 Aug 2013 16:16:42 +0100 Subject: [PATCH 0961/1368] arm64: move elf notes into readonly segment The current vmlinux.lds.S places the notes sections between the end of rw data and start of bss. This means that _edata doesn't really point to the end of data. Since notes are read-only, this patch moves them to the read-only segment so that _edata does point to the end of initialized rw data. Signed-off-by: Mark Salter Signed-off-by: Catalin Marinas (cherry picked from commit c80b7ee8520606f77fbc8ced870c96659053269e) Signed-off-by: Mark Brown --- arch/arm64/kernel/vmlinux.lds.S | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S index 5e06a1786e26..1e8e6be0241c 100644 --- a/arch/arm64/kernel/vmlinux.lds.S +++ b/arch/arm64/kernel/vmlinux.lds.S @@ -57,6 +57,7 @@ SECTIONS RO_DATA(PAGE_SIZE) EXCEPTION_TABLE(8) + NOTES _etext = .; /* End of text and rodata section */ . = ALIGN(PAGE_SIZE); @@ -108,8 +109,6 @@ SECTIONS } _edata_loc = __data_loc + SIZEOF(.data); - NOTES - BSS_SECTION(0, 0, 0) _end = .; From 6d5002154f8dad3866240e5368c4341a1af6aafc Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Fri, 23 Aug 2013 18:04:44 +0100 Subject: [PATCH 0962/1368] arm64: Fix mapping of memory banks not ending on a PMD_SIZE boundary The map_mem() function limits the current memblock limit to PGDIR_SIZE (the initial swapper_pg_dir mapping) to avoid create_mapping() allocating memory from unmapped areas. However, if the first block is within PGDIR_SIZE and not ending on a PMD_SIZE boundary, when 4K page configuration is enabled, create_mapping() will try to allocate a pte page. Such page may be returned by memblock_alloc() from the end of such bank (or any subsequent bank within PGDIR_SIZE) which is not mapped yet. The patch limits the current memblock limit to the aligned end of the first bank and gradually increases it as more memory is mapped. It also ensures that the start of the first bank is aligned to PMD_SIZE to avoid pte page allocation for this mapping. Signed-off-by: Catalin Marinas Reported-by: "Leizhen (ThunderTown, Euler)" Tested-by: "Leizhen (ThunderTown, Euler)" (cherry picked from commit e25208f77c2dad5a9f2ab3d3df61252a90b71afa) Signed-off-by: Mark Brown --- arch/arm64/mm/mmu.c | 23 +++++++++++++++++++++-- 1 file changed, 21 insertions(+), 2 deletions(-) diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index 49961d1fa033..f8dc7e8fce6f 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -304,6 +304,7 @@ void __iomem * __init early_io_map(phys_addr_t phys, unsigned long virt) static void __init map_mem(void) { struct memblock_region *reg; + phys_addr_t limit; /* * Temporarily limit the memblock range. We need to do this as @@ -311,9 +312,11 @@ static void __init map_mem(void) * memory addressable from the initial direct kernel mapping. * * The initial direct kernel mapping, located at swapper_pg_dir, - * gives us PGDIR_SIZE memory starting from PHYS_OFFSET (aligned). + * gives us PGDIR_SIZE memory starting from PHYS_OFFSET (which must be + * aligned to 2MB as per Documentation/arm64/booting.txt). */ - memblock_set_current_limit((PHYS_OFFSET & PGDIR_MASK) + PGDIR_SIZE); + limit = PHYS_OFFSET + PGDIR_SIZE; + memblock_set_current_limit(limit); /* map all the memory banks */ for_each_memblock(memory, reg) { @@ -323,6 +326,22 @@ static void __init map_mem(void) if (start >= end) break; +#ifndef CONFIG_ARM64_64K_PAGES + /* + * For the first memory bank align the start address and + * current memblock limit to prevent create_mapping() from + * allocating pte page tables from unmapped memory. + * When 64K pages are enabled, the pte page table for the + * first PGDIR_SIZE is already present in swapper_pg_dir. + */ + if (start < limit) + start = ALIGN(start, PMD_SIZE); + if (end < limit) { + limit = end & PMD_MASK; + memblock_set_current_limit(limit); + } +#endif + create_mapping(start, __phys_to_virt(start), end - start); } From 0fa5264c3f0ce5625640fee0b7e82fe4ab9f7f17 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Fri, 30 Aug 2013 18:06:48 +0100 Subject: [PATCH 0963/1368] arm64: delay: don't bother reporting bogomips in /proc/cpuinfo We always use a timer-backed delay loop for arm64, so don't bother reporting a bogomips value which appears to confuse some people. Signed-off-by: Will Deacon Signed-off-by: Catalin Marinas (cherry picked from commit 326b16db9f69fd0d279be873c6c00f88c0a4aad5) Signed-off-by: Mark Brown --- arch/arm64/kernel/setup.c | 3 --- arch/arm64/kernel/smp.c | 6 +----- 2 files changed, 1 insertion(+), 8 deletions(-) diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c index add6ea616843..bca4c1c2052a 100644 --- a/arch/arm64/kernel/setup.c +++ b/arch/arm64/kernel/setup.c @@ -328,9 +328,6 @@ static int c_show(struct seq_file *m, void *v) #ifdef CONFIG_SMP seq_printf(m, "processor\t: %d\n", i); #endif - seq_printf(m, "BogoMIPS\t: %lu.%02lu\n\n", - loops_per_jiffy / (500000UL/HZ), - loops_per_jiffy / (5000UL/HZ) % 100); } /* dump out the processor features */ diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c index 9c93e126328c..edbfe241eacb 100644 --- a/arch/arm64/kernel/smp.c +++ b/arch/arm64/kernel/smp.c @@ -223,11 +223,7 @@ asmlinkage void __cpuinit secondary_start_kernel(void) void __init smp_cpus_done(unsigned int max_cpus) { - unsigned long bogosum = loops_per_jiffy * num_online_cpus(); - - pr_info("SMP: Total of %d processors activated (%lu.%02lu BogoMIPS).\n", - num_online_cpus(), bogosum / (500000/HZ), - (bogosum / (5000/HZ)) % 100); + pr_info("SMP: Total of %d processors activated.\n", num_online_cpus()); } void __init smp_prepare_boot_cpu(void) From 760b97f0952ff32d3856241ac6847c82d1477ffd Mon Sep 17 00:00:00 2001 From: Dan Aloni Date: Wed, 28 Aug 2013 14:24:53 +0100 Subject: [PATCH 0964/1368] Move the EM_ARM and EM_AARCH64 definitions to uapi/linux/elf-em.h Signed-off-by: Dan Aloni Signed-off-by: Catalin Marinas (cherry picked from commit 909e3ee4119f87b85c6e1b8534b2287ed1ea3ca2) Signed-off-by: Mark Brown --- arch/arm/include/asm/elf.h | 2 -- arch/arm64/include/asm/elf.h | 3 --- include/uapi/linux/elf-em.h | 2 ++ 3 files changed, 2 insertions(+), 5 deletions(-) diff --git a/arch/arm/include/asm/elf.h b/arch/arm/include/asm/elf.h index 38050b1c4800..e110a672e160 100644 --- a/arch/arm/include/asm/elf.h +++ b/arch/arm/include/asm/elf.h @@ -19,8 +19,6 @@ typedef elf_greg_t elf_gregset_t[ELF_NGREG]; typedef struct user_fp elf_fpregset_t; -#define EM_ARM 40 - #define EF_ARM_EABI_MASK 0xff000000 #define EF_ARM_EABI_UNKNOWN 0x00000000 #define EF_ARM_EABI_VER1 0x01000000 diff --git a/arch/arm64/include/asm/elf.h b/arch/arm64/include/asm/elf.h index fe32c0e4ac01..e7fa87f9201b 100644 --- a/arch/arm64/include/asm/elf.h +++ b/arch/arm64/include/asm/elf.h @@ -33,8 +33,6 @@ typedef unsigned long elf_greg_t; typedef elf_greg_t elf_gregset_t[ELF_NGREG]; typedef struct user_fpsimd_state elf_fpregset_t; -#define EM_AARCH64 183 - /* * AArch64 static relocation types. */ @@ -151,7 +149,6 @@ extern unsigned long arch_randomize_brk(struct mm_struct *mm); #define arch_randomize_brk arch_randomize_brk #ifdef CONFIG_COMPAT -#define EM_ARM 40 #define COMPAT_ELF_PLATFORM ("v8l") #define COMPAT_ELF_ET_DYN_BASE (randomize_et_dyn(2 * TASK_SIZE_32 / 3)) diff --git a/include/uapi/linux/elf-em.h b/include/uapi/linux/elf-em.h index 8e2b7bac4378..59c17a2d38ad 100644 --- a/include/uapi/linux/elf-em.h +++ b/include/uapi/linux/elf-em.h @@ -22,6 +22,7 @@ #define EM_PPC 20 /* PowerPC */ #define EM_PPC64 21 /* PowerPC64 */ #define EM_SPU 23 /* Cell BE SPU */ +#define EM_ARM 40 /* ARM 32 bit */ #define EM_SH 42 /* SuperH */ #define EM_SPARCV9 43 /* SPARC v9 64-bit */ #define EM_IA_64 50 /* HP/Intel IA-64 */ @@ -34,6 +35,7 @@ #define EM_MN10300 89 /* Panasonic/MEI MN10300, AM33 */ #define EM_BLACKFIN 106 /* ADI Blackfin Processor */ #define EM_TI_C6000 140 /* TI C6X DSPs */ +#define EM_AARCH64 183 /* ARM 64 bit */ #define EM_FRV 0x5441 /* Fujitsu FR-V */ #define EM_AVR32 0x18ad /* Atmel AVR32 */ From f164a6c211ef728f8a0f2fa8fa9f06549b138034 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Wed, 12 Jun 2013 16:28:04 +0100 Subject: [PATCH 0965/1368] arm64: mm: permit use of tagged pointers at EL0 TCR.TBI0 can be used to cause hardware address translation to ignore the top byte of userspace virtual addresses. Whilst not especially useful in standard C programs, this can be used by JITs to `tag' pointers with various pieces of metadata. This patch enables this bit for AArch64 Linux, and adds a new file to Documentation/arm64/ which describes some potential caveats when using tagged virtual addresses. Signed-off-by: Will Deacon Signed-off-by: Catalin Marinas (cherry picked from commit d50240a5f6ceaf690a77b0fccb17be51cfa151c2) Signed-off-by: Mark Brown --- Documentation/arm64/tagged-pointers.txt | 34 +++++++++++++++++++++++++ arch/arm64/include/asm/pgtable-hwdef.h | 1 + arch/arm64/kernel/entry.S | 1 + arch/arm64/mm/proc.S | 2 +- 4 files changed, 37 insertions(+), 1 deletion(-) create mode 100644 Documentation/arm64/tagged-pointers.txt diff --git a/Documentation/arm64/tagged-pointers.txt b/Documentation/arm64/tagged-pointers.txt new file mode 100644 index 000000000000..264e9841563a --- /dev/null +++ b/Documentation/arm64/tagged-pointers.txt @@ -0,0 +1,34 @@ + Tagged virtual addresses in AArch64 Linux + ========================================= + +Author: Will Deacon +Date : 12 June 2013 + +This document briefly describes the provision of tagged virtual +addresses in the AArch64 translation system and their potential uses +in AArch64 Linux. + +The kernel configures the translation tables so that translations made +via TTBR0 (i.e. userspace mappings) have the top byte (bits 63:56) of +the virtual address ignored by the translation hardware. This frees up +this byte for application use, with the following caveats: + + (1) The kernel requires that all user addresses passed to EL1 + are tagged with tag 0x00. This means that any syscall + parameters containing user virtual addresses *must* have + their top byte cleared before trapping to the kernel. + + (2) Tags are not guaranteed to be preserved when delivering + signals. This means that signal handlers in applications + making use of tags cannot rely on the tag information for + user virtual addresses being maintained for fields inside + siginfo_t. One exception to this rule is for signals raised + in response to debug exceptions, where the tag information + will be preserved. + + (3) Special care should be taken when using tagged pointers, + since it is likely that C compilers will not hazard two + addresses differing only in the upper bits. + +The architecture prevents the use of a tagged PC, so the upper byte will +be set to a sign-extension of bit 55 on exception return. diff --git a/arch/arm64/include/asm/pgtable-hwdef.h b/arch/arm64/include/asm/pgtable-hwdef.h index 2294f2330960..d25991747650 100644 --- a/arch/arm64/include/asm/pgtable-hwdef.h +++ b/arch/arm64/include/asm/pgtable-hwdef.h @@ -104,5 +104,6 @@ #define TCR_TG1_64K (UL(1) << 30) #define TCR_IPS_40BIT (UL(2) << 32) #define TCR_ASID16 (UL(1) << 36) +#define TCR_TBI0 (UL(1) << 37) #endif diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index 57640df9d70c..3881fd115ebb 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -423,6 +423,7 @@ el0_da: * Data abort handling */ mrs x0, far_el1 + bic x0, x0, #(0xff << 56) disable_step x1 isb enable_dbg diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S index f84fcf71f129..b1b31bbc967b 100644 --- a/arch/arm64/mm/proc.S +++ b/arch/arm64/mm/proc.S @@ -147,7 +147,7 @@ ENTRY(__cpu_setup) * both user and kernel. */ ldr x10, =TCR_TxSZ(VA_BITS) | TCR_FLAGS | TCR_IPS_40BIT | \ - TCR_ASID16 | (1 << 31) + TCR_ASID16 | TCR_TBI0 | (1 << 31) #ifdef CONFIG_ARM64_64K_PAGES orr x10, x10, TCR_TG0_64K orr x10, x10, TCR_TG1_64K From 08868ba57e4ec7729db37199e65480dadd9f5d30 Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Mon, 16 Sep 2013 15:18:28 +0100 Subject: [PATCH 0966/1368] arm64: Make do_bad_area() function static This function is only called from arch/arm64/mm/fault.c. Signed-off-by: Catalin Marinas (cherry picked from commit 59f67e16e6b79697241c3fd030e3da300377893e) Signed-off-by: Mark Brown --- arch/arm64/mm/fault.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c index 6c8ba25bf6bb..df4f2fd187c3 100644 --- a/arch/arm64/mm/fault.c +++ b/arch/arm64/mm/fault.c @@ -130,7 +130,7 @@ static void __do_user_fault(struct task_struct *tsk, unsigned long addr, force_sig_info(sig, &si, tsk); } -void do_bad_area(unsigned long addr, unsigned int esr, struct pt_regs *regs) +static void do_bad_area(unsigned long addr, unsigned int esr, struct pt_regs *regs) { struct task_struct *tsk = current; struct mm_struct *mm = tsk->active_mm; From 37f03baee485e2031b2226d77e4ae4c6f28ff390 Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Tue, 17 Sep 2013 18:49:46 +0100 Subject: [PATCH 0967/1368] arm64: Correctly report LR and SP for compat tasks When a task crashes and we print debugging information, ensure that compat tasks show the actual AArch32 LR and SP registers rather than the AArch64 ones. Signed-off-by: Catalin Marinas (cherry picked from commit 6ca68e802612c87c31aa83d50c37ed0d88774a46) Signed-off-by: Mark Brown --- arch/arm64/kernel/process.c | 21 ++++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c index 46f02c3b5015..2090389b95b0 100644 --- a/arch/arm64/kernel/process.c +++ b/arch/arm64/kernel/process.c @@ -143,15 +143,26 @@ void machine_restart(char *cmd) void __show_regs(struct pt_regs *regs) { - int i; + int i, top_reg; + u64 lr, sp; + + if (compat_user_mode(regs)) { + lr = regs->compat_lr; + sp = regs->compat_sp; + top_reg = 12; + } else { + lr = regs->regs[30]; + sp = regs->sp; + top_reg = 29; + } show_regs_print_info(KERN_DEFAULT); print_symbol("PC is at %s\n", instruction_pointer(regs)); - print_symbol("LR is at %s\n", regs->regs[30]); + print_symbol("LR is at %s\n", lr); printk("pc : [<%016llx>] lr : [<%016llx>] pstate: %08llx\n", - regs->pc, regs->regs[30], regs->pstate); - printk("sp : %016llx\n", regs->sp); - for (i = 29; i >= 0; i--) { + regs->pc, lr, regs->pstate); + printk("sp : %016llx\n", sp); + for (i = top_reg; i >= 0; i--) { printk("x%-2d: %016llx ", i, regs->regs[i]); if (i % 2 == 0) printk("\n"); From 9d6eaf2f9176f131889d41801da4148f9b6718a3 Mon Sep 17 00:00:00 2001 From: Steve Capper Date: Wed, 18 Sep 2013 16:14:28 +0100 Subject: [PATCH 0968/1368] arm64: Widen hwcap to be 64 bit Under arm64 elf_hwcap is a 32 bit quantity, but it is stored in a 64 bit auxiliary ELF field and glibc reads hwcap as 64 bit. This patch widens elf_hwcap to be 64 bit. Signed-off-by: Steve Capper Signed-off-by: Catalin Marinas (cherry picked from commit 25804e6a96681d5d2142058948e218999e4f547c) Signed-off-by: Mark Brown --- arch/arm64/include/asm/hwcap.h | 2 +- arch/arm64/kernel/setup.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/include/asm/hwcap.h b/arch/arm64/include/asm/hwcap.h index 6d4482fa35bc..e2950b098e76 100644 --- a/arch/arm64/include/asm/hwcap.h +++ b/arch/arm64/include/asm/hwcap.h @@ -43,6 +43,6 @@ COMPAT_HWCAP_VFPv3|COMPAT_HWCAP_VFPv4|\ COMPAT_HWCAP_NEON|COMPAT_HWCAP_IDIV) -extern unsigned int elf_hwcap; +extern unsigned long elf_hwcap; #endif #endif diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c index bca4c1c2052a..40ae3ced37a1 100644 --- a/arch/arm64/kernel/setup.c +++ b/arch/arm64/kernel/setup.c @@ -57,7 +57,7 @@ unsigned int processor_id; EXPORT_SYMBOL(processor_id); -unsigned int elf_hwcap __read_mostly; +unsigned long elf_hwcap __read_mostly; EXPORT_SYMBOL_GPL(elf_hwcap); static const char *cpu_name; From e9b67be1af51a366ae73c8893987cdafb081b935 Mon Sep 17 00:00:00 2001 From: Loc Ho Date: Wed, 26 Jun 2013 11:56:10 -0600 Subject: [PATCH 0969/1368] clk: arm64: Add DTS clock entry for APM X-Gene Storm SoC clk: arm64: Add DTS clock entry for APM X-Gene Storm SoC with reference to reference, PCP PLL, SoC PLL, and Ethernet clocks. Signed-off-by: Loc Ho Signed-off-by: Kumar Sankaran Signed-off-by: Vinayak Kale Signed-off-by: Feng Kan Signed-off-by: Mike Turquette (cherry picked from commit 3eb15d84e355f4ea1acf0eaba11ca173de342107) Signed-off-by: Mark Brown --- arch/arm64/boot/dts/apm-storm.dtsi | 75 ++++++++++++++++++++++++++++++ 1 file changed, 75 insertions(+) diff --git a/arch/arm64/boot/dts/apm-storm.dtsi b/arch/arm64/boot/dts/apm-storm.dtsi index bfdc57834929..d37d7369e260 100644 --- a/arch/arm64/boot/dts/apm-storm.dtsi +++ b/arch/arm64/boot/dts/apm-storm.dtsi @@ -103,6 +103,81 @@ soc { #size-cells = <2>; ranges; + clocks { + #address-cells = <2>; + #size-cells = <2>; + ranges; + refclk: refclk { + compatible = "fixed-clock"; + #clock-cells = <1>; + clock-frequency = <100000000>; + clock-output-names = "refclk"; + }; + + pcppll: pcppll@17000100 { + compatible = "apm,xgene-pcppll-clock"; + #clock-cells = <1>; + clocks = <&refclk 0>; + clock-names = "pcppll"; + reg = <0x0 0x17000100 0x0 0x1000>; + clock-output-names = "pcppll"; + type = <0>; + }; + + socpll: socpll@17000120 { + compatible = "apm,xgene-socpll-clock"; + #clock-cells = <1>; + clocks = <&refclk 0>; + clock-names = "socpll"; + reg = <0x0 0x17000120 0x0 0x1000>; + clock-output-names = "socpll"; + type = <1>; + }; + + socplldiv2: socplldiv2 { + compatible = "fixed-factor-clock"; + #clock-cells = <1>; + clocks = <&socpll 0>; + clock-names = "socplldiv2"; + clock-mult = <1>; + clock-div = <2>; + clock-output-names = "socplldiv2"; + }; + + qmlclk: qmlclk { + compatible = "apm,xgene-device-clock"; + #clock-cells = <1>; + clocks = <&socplldiv2 0>; + clock-names = "qmlclk"; + reg = <0x0 0x1703C000 0x0 0x1000>; + reg-names = "csr-reg"; + clock-output-names = "qmlclk"; + }; + + ethclk: ethclk { + compatible = "apm,xgene-device-clock"; + #clock-cells = <1>; + clocks = <&socplldiv2 0>; + clock-names = "ethclk"; + reg = <0x0 0x17000000 0x0 0x1000>; + reg-names = "div-reg"; + divider-offset = <0x238>; + divider-width = <0x9>; + divider-shift = <0x0>; + clock-output-names = "ethclk"; + }; + + eth8clk: eth8clk { + compatible = "apm,xgene-device-clock"; + #clock-cells = <1>; + clocks = <ðclk 0>; + clock-names = "eth8clk"; + reg = <0x0 0x1702C000 0x0 0x1000>; + reg-names = "csr-reg"; + clock-output-names = "eth8clk"; + }; + }; + serial0: serial@1c020000 { device_type = "serial"; compatible = "ns16550"; From 5aa580e940c4ba9027b348729a53ee3cd4032cce Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Wed, 9 Oct 2013 15:54:26 +0100 Subject: [PATCH 0970/1368] arm64: locks: introduce ticket-based spinlock implementation This patch introduces a ticket lock implementation for arm64, along the same lines as the implementation for arch/arm/. Reviewed-by: Catalin Marinas Signed-off-by: Will Deacon Signed-off-by: Catalin Marinas (cherry picked from commit 52ea2a560a9dba57fe5fd6b4726b1089751accf2) Signed-off-by: Mark Brown --- arch/arm64/include/asm/spinlock.h | 79 +++++++++++++++++-------- arch/arm64/include/asm/spinlock_types.h | 10 ++-- 2 files changed, 58 insertions(+), 31 deletions(-) diff --git a/arch/arm64/include/asm/spinlock.h b/arch/arm64/include/asm/spinlock.h index 0defa0728a9b..525dd535443e 100644 --- a/arch/arm64/include/asm/spinlock.h +++ b/arch/arm64/include/asm/spinlock.h @@ -22,17 +22,10 @@ /* * Spinlock implementation. * - * The old value is read exclusively and the new one, if unlocked, is written - * exclusively. In case of failure, the loop is restarted. - * * The memory barriers are implicit with the load-acquire and store-release * instructions. - * - * Unlocked value: 0 - * Locked value: 1 */ -#define arch_spin_is_locked(x) ((x)->lock != 0) #define arch_spin_unlock_wait(lock) \ do { while (arch_spin_is_locked(lock)) cpu_relax(); } while (0) @@ -41,32 +34,51 @@ static inline void arch_spin_lock(arch_spinlock_t *lock) { unsigned int tmp; + arch_spinlock_t lockval, newval; asm volatile( - " sevl\n" - "1: wfe\n" - "2: ldaxr %w0, %1\n" - " cbnz %w0, 1b\n" - " stxr %w0, %w2, %1\n" - " cbnz %w0, 2b\n" - : "=&r" (tmp), "+Q" (lock->lock) - : "r" (1) - : "cc", "memory"); + /* Atomically increment the next ticket. */ +" prfm pstl1strm, %3\n" +"1: ldaxr %w0, %3\n" +" add %w1, %w0, %w5\n" +" stxr %w2, %w1, %3\n" +" cbnz %w2, 1b\n" + /* Did we get the lock? */ +" eor %w1, %w0, %w0, ror #16\n" +" cbz %w1, 3f\n" + /* + * No: spin on the owner. Send a local event to avoid missing an + * unlock before the exclusive load. + */ +" sevl\n" +"2: wfe\n" +" ldaxrh %w2, %4\n" +" eor %w1, %w2, %w0, lsr #16\n" +" cbnz %w1, 2b\n" + /* We got the lock. Critical section starts here. */ +"3:" + : "=&r" (lockval), "=&r" (newval), "=&r" (tmp), "+Q" (*lock) + : "Q" (lock->owner), "I" (1 << TICKET_SHIFT) + : "memory"); } static inline int arch_spin_trylock(arch_spinlock_t *lock) { unsigned int tmp; + arch_spinlock_t lockval; asm volatile( - "2: ldaxr %w0, %1\n" - " cbnz %w0, 1f\n" - " stxr %w0, %w2, %1\n" - " cbnz %w0, 2b\n" - "1:\n" - : "=&r" (tmp), "+Q" (lock->lock) - : "r" (1) - : "cc", "memory"); +" prfm pstl1strm, %2\n" +"1: ldaxr %w0, %2\n" +" eor %w1, %w0, %w0, ror #16\n" +" cbnz %w1, 2f\n" +" add %w0, %w0, %3\n" +" stxr %w1, %w0, %2\n" +" cbnz %w1, 1b\n" +"2:" + : "=&r" (lockval), "=&r" (tmp), "+Q" (*lock) + : "I" (1 << TICKET_SHIFT) + : "memory"); return !tmp; } @@ -74,10 +86,25 @@ static inline int arch_spin_trylock(arch_spinlock_t *lock) static inline void arch_spin_unlock(arch_spinlock_t *lock) { asm volatile( - " stlr %w1, %0\n" - : "=Q" (lock->lock) : "r" (0) : "memory"); +" stlrh %w1, %0\n" + : "=Q" (lock->owner) + : "r" (lock->owner + 1) + : "memory"); } +static inline int arch_spin_is_locked(arch_spinlock_t *lock) +{ + arch_spinlock_t lockval = ACCESS_ONCE(*lock); + return lockval.owner != lockval.next; +} + +static inline int arch_spin_is_contended(arch_spinlock_t *lock) +{ + arch_spinlock_t lockval = ACCESS_ONCE(*lock); + return (lockval.next - lockval.owner) > 1; +} +#define arch_spin_is_contended arch_spin_is_contended + /* * Write lock implementation. * diff --git a/arch/arm64/include/asm/spinlock_types.h b/arch/arm64/include/asm/spinlock_types.h index 9a494346efed..87692750ed94 100644 --- a/arch/arm64/include/asm/spinlock_types.h +++ b/arch/arm64/include/asm/spinlock_types.h @@ -20,14 +20,14 @@ # error "please don't include this file directly" #endif -/* We only require natural alignment for exclusive accesses. */ -#define __lock_aligned +#define TICKET_SHIFT 16 typedef struct { - volatile unsigned int lock; -} arch_spinlock_t; + u16 owner; + u16 next; +} __aligned(4) arch_spinlock_t; -#define __ARCH_SPIN_LOCK_UNLOCKED { 0 } +#define __ARCH_SPIN_LOCK_UNLOCKED { 0 , 0 } typedef struct { volatile unsigned int lock; From 9f89c68bf6d720eae561dc5a796f727a57e8daea Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Wed, 9 Oct 2013 15:54:27 +0100 Subject: [PATCH 0971/1368] arm64: lockref: add support for lockless lockrefs using cmpxchg Our spinlocks are only 32-bit (2x16-bit tickets) and our cmpxchg can deal with 8-bytes (as one would hope!). This patch wires up the cmpxchg-based lockless lockref implementation for arm64. Signed-off-by: Will Deacon Signed-off-by: Catalin Marinas (cherry picked from commit 5686b06cea34e31ec0a549d9b5ac00776e8e8d6d) Signed-off-by: Mark Brown --- arch/arm64/Kconfig | 1 + arch/arm64/include/asm/spinlock.h | 8 ++++++-- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 52077610cb7e..9115252e5425 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -1,6 +1,7 @@ config ARM64 def_bool y select ARCH_HAS_ATOMIC64_DEC_IF_POSITIVE + select ARCH_USE_CMPXCHG_LOCKREF select ARCH_WANT_OPTIONAL_GPIOLIB select ARCH_WANT_COMPAT_IPC_PARSE_VERSION select ARCH_WANT_FRAME_POINTERS diff --git a/arch/arm64/include/asm/spinlock.h b/arch/arm64/include/asm/spinlock.h index 525dd535443e..3d5cf064d7a1 100644 --- a/arch/arm64/include/asm/spinlock.h +++ b/arch/arm64/include/asm/spinlock.h @@ -92,10 +92,14 @@ static inline void arch_spin_unlock(arch_spinlock_t *lock) : "memory"); } +static inline int arch_spin_value_unlocked(arch_spinlock_t lock) +{ + return lock.owner == lock.next; +} + static inline int arch_spin_is_locked(arch_spinlock_t *lock) { - arch_spinlock_t lockval = ACCESS_ONCE(*lock); - return lockval.owner != lockval.next; + return !arch_spin_value_unlocked(ACCESS_ONCE(*lock)); } static inline int arch_spin_is_contended(arch_spinlock_t *lock) From 9e7ced5a249da2bec86d53d0e11e90b6a8e1afe2 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Wed, 9 Oct 2013 15:54:28 +0100 Subject: [PATCH 0972/1368] arm64: cmpxchg: implement cmpxchg64_relaxed This patch introduces cmpxchg64_relaxed for arm64 using the existing cmpxchg_local macro, which performs a cmpxchg operation (up to 64 bits) without barrier semantics. Signed-off-by: Will Deacon Signed-off-by: Catalin Marinas (cherry picked from commit cf10b79a7d88edc689479af989b3a88e9adf07ff) Signed-off-by: Mark Brown --- arch/arm64/include/asm/cmpxchg.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/arm64/include/asm/cmpxchg.h b/arch/arm64/include/asm/cmpxchg.h index 8a8ce0e73a38..3914c0dcd09c 100644 --- a/arch/arm64/include/asm/cmpxchg.h +++ b/arch/arm64/include/asm/cmpxchg.h @@ -173,4 +173,6 @@ static inline unsigned long __cmpxchg_mb(volatile void *ptr, unsigned long old, #define cmpxchg64(ptr,o,n) cmpxchg((ptr),(o),(n)) #define cmpxchg64_local(ptr,o,n) cmpxchg_local((ptr),(o),(n)) +#define cmpxchg64_relaxed(ptr,o,n) cmpxchg_local((ptr),(o),(n)) + #endif /* __ASM_CMPXCHG_H */ From 9109f9215e9e238f0ba4d34b048f0d729a954460 Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Mon, 7 Oct 2013 16:42:05 +0100 Subject: [PATCH 0973/1368] arm64: Export __copy_in_user() to modules This function may be called from loadable modules, so it needs exporting. Signed-off-by: Catalin Marinas Reported-by: Loc Ho (cherry picked from commit 2a3f912c782f2364f5e5813ab66ca6c92fb43acb) Signed-off-by: Mark Brown --- arch/arm64/kernel/arm64ksyms.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm64/kernel/arm64ksyms.c b/arch/arm64/kernel/arm64ksyms.c index 41b4f626d554..e7ee770c0697 100644 --- a/arch/arm64/kernel/arm64ksyms.c +++ b/arch/arm64/kernel/arm64ksyms.c @@ -39,6 +39,7 @@ EXPORT_SYMBOL(clear_page); EXPORT_SYMBOL(__copy_from_user); EXPORT_SYMBOL(__copy_to_user); EXPORT_SYMBOL(__clear_user); +EXPORT_SYMBOL(__copy_in_user); /* physical memory */ EXPORT_SYMBOL(memstart_addr); From b357e64d1fb045568adeacc7e504788401d9db9b Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Mon, 7 Oct 2013 18:30:34 +0100 Subject: [PATCH 0974/1368] arm64: update 32-bit kuser helpers to ARMv8 This patch updates the barrier semantics in the kuser helper functions to take advantage of the ARMv8 additions to AArch32, which are guaranteed to be available in situations where these functions will be called. Note that this slightly changes the cmpxchg functions in that they are no longer necessarily full barriers if they return 1. However, the documentation only states they include their own barriers "as needed", not that they are obligated to act as a full barrier for the caller. Signed-off-by: Robin Murphy Acked-by: Will Deacon CC: Matthew Leach CC: Dave Martin CC: Mark Rutland Signed-off-by: Catalin Marinas (cherry picked from commit d0f38f9130b7683e39611c5a661349e301ee43c8) Signed-off-by: Mark Brown --- arch/arm64/kernel/kuser32.S | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/arch/arm64/kernel/kuser32.S b/arch/arm64/kernel/kuser32.S index 8b69ecb1d8bc..f2754710f5e9 100644 --- a/arch/arm64/kernel/kuser32.S +++ b/arch/arm64/kernel/kuser32.S @@ -35,33 +35,30 @@ __kuser_cmpxchg64: // 0xffff0f60 .inst 0xe92d00f0 // push {r4, r5, r6, r7} .inst 0xe1c040d0 // ldrd r4, r5, [r0] .inst 0xe1c160d0 // ldrd r6, r7, [r1] - .inst 0xf57ff05f // dmb sy - .inst 0xe1b20f9f // 1: ldrexd r0, r1, [r2] + .inst 0xe1b20e9f // 1: ldaexd r0, r1, [r2] .inst 0xe0303004 // eors r3, r0, r4 .inst 0x00313005 // eoreqs r3, r1, r5 - .inst 0x01a23f96 // strexdeq r3, r6, [r2] + .inst 0x01a23e96 // stlexdeq r3, r6, [r2] .inst 0x03330001 // teqeq r3, #1 .inst 0x0afffff9 // beq 1b - .inst 0xf57ff05f // dmb sy .inst 0xe2730000 // rsbs r0, r3, #0 .inst 0xe8bd00f0 // pop {r4, r5, r6, r7} .inst 0xe12fff1e // bx lr .align 5 __kuser_memory_barrier: // 0xffff0fa0 - .inst 0xf57ff05f // dmb sy + .inst 0xf57ff05b // dmb ish .inst 0xe12fff1e // bx lr .align 5 __kuser_cmpxchg: // 0xffff0fc0 - .inst 0xf57ff05f // dmb sy - .inst 0xe1923f9f // 1: ldrex r3, [r2] + .inst 0xe1923e9f // 1: ldaex r3, [r2] .inst 0xe0533000 // subs r3, r3, r0 - .inst 0x01823f91 // strexeq r3, r1, [r2] + .inst 0x01823e91 // stlexeq r3, r1, [r2] .inst 0x03330001 // teqeq r3, #1 .inst 0x0afffffa // beq 1b .inst 0xe2730000 // rsbs r0, r3, #0 - .inst 0xeaffffef // b <__kuser_memory_barrier> + .inst 0xe12fff1e // bx lr .align 5 __kuser_get_tls: // 0xffff0fe0 From b35f594dc7939d9be5fcae354738a419cc0c081e Mon Sep 17 00:00:00 2001 From: Mark Salter Date: Mon, 4 Nov 2013 16:38:47 +0000 Subject: [PATCH 0975/1368] arm64: use generic RW_DATA_SECTION macro in linker script The .data section in the arm64 linker script currently lacks a definition for page-aligned data. This leads to a .page_aligned section being placed between the end of data and start of bss. This patch corrects that by using the generic RW_DATA_SECTION macro which includes support for page-aligned data. Signed-off-by: Mark Salter Acked-by: Will Deacon Signed-off-by: Catalin Marinas (cherry picked from commit 3c620626c0cd4cfca856d70a846398275b48a768) Signed-off-by: Mark Brown --- arch/arm64/kernel/vmlinux.lds.S | 29 ++++++----------------------- 1 file changed, 6 insertions(+), 23 deletions(-) diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S index 1e8e6be0241c..047feef69828 100644 --- a/arch/arm64/kernel/vmlinux.lds.S +++ b/arch/arm64/kernel/vmlinux.lds.S @@ -83,30 +83,13 @@ SECTIONS PERCPU_SECTION(64) __init_end = .; - . = ALIGN(THREAD_SIZE); - __data_loc = .; - .data : AT(__data_loc) { - _data = .; /* address in memory */ - _sdata = .; - - /* - * first, the init task union, aligned - * to an 8192 byte boundary. - */ - INIT_TASK_DATA(THREAD_SIZE) - NOSAVE_DATA - CACHELINE_ALIGNED_DATA(64) - READ_MOSTLY_DATA(64) - - /* - * and the usual data section - */ - DATA_DATA - CONSTRUCTORS - - _edata = .; - } + . = ALIGN(PAGE_SIZE); + _data = .; + __data_loc = _data - LOAD_OFFSET; + _sdata = .; + RW_DATA_SECTION(64, PAGE_SIZE, THREAD_SIZE) + _edata = .; _edata_loc = __data_loc + SIZEOF(.data); BSS_SECTION(0, 0, 0) From d5c5e6a15e4786dc21a08937821b6365466d6e34 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 4 Nov 2013 20:14:58 +0000 Subject: [PATCH 0976/1368] arm64: fix access to preempt_count from assembly code preempt_count is defined as an int. Oddly enough, we access it as a 64bit value. Things become interesting when running a BE kernel, and looking at the current CPU number, which is stored as an int next to preempt_count. Like in a per-cpu interrupt handler, for example... Using a 32bit access fixes the issue for good. Cc: Matthew Leach Signed-off-by: Marc Zyngier Signed-off-by: Catalin Marinas (cherry picked from commit 717321fcb58ed95169bf344ae47ac6098ba5dfbe) Signed-off-by: Mark Brown --- arch/arm64/kernel/entry.S | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index 3881fd115ebb..e1166145ca29 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -311,14 +311,14 @@ el1_irq: #endif #ifdef CONFIG_PREEMPT get_thread_info tsk - ldr x24, [tsk, #TI_PREEMPT] // get preempt count - add x0, x24, #1 // increment it - str x0, [tsk, #TI_PREEMPT] + ldr w24, [tsk, #TI_PREEMPT] // get preempt count + add w0, w24, #1 // increment it + str w0, [tsk, #TI_PREEMPT] #endif irq_handler #ifdef CONFIG_PREEMPT - str x24, [tsk, #TI_PREEMPT] // restore preempt count - cbnz x24, 1f // preempt count != 0 + str w24, [tsk, #TI_PREEMPT] // restore preempt count + cbnz w24, 1f // preempt count != 0 ldr x0, [tsk, #TI_FLAGS] // get flags tbz x0, #TIF_NEED_RESCHED, 1f // needs rescheduling? bl el1_preempt @@ -509,15 +509,15 @@ el0_irq_naked: #endif get_thread_info tsk #ifdef CONFIG_PREEMPT - ldr x24, [tsk, #TI_PREEMPT] // get preempt count - add x23, x24, #1 // increment it - str x23, [tsk, #TI_PREEMPT] + ldr w24, [tsk, #TI_PREEMPT] // get preempt count + add w23, w24, #1 // increment it + str w23, [tsk, #TI_PREEMPT] #endif irq_handler #ifdef CONFIG_PREEMPT - ldr x0, [tsk, #TI_PREEMPT] - str x24, [tsk, #TI_PREEMPT] - cmp x0, x23 + ldr w0, [tsk, #TI_PREEMPT] + str w24, [tsk, #TI_PREEMPT] + cmp w0, w23 b.eq 1f mov x1, #0 str x1, [x1] // BUG From 4b905a8fdae8797cebe274d0150b4a1799b98a3e Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Wed, 23 Oct 2013 16:50:07 +0100 Subject: [PATCH 0977/1368] arm64: Use 42-bit address space with 64K pages This patch expands the VA_BITS to 42 when the 64K page configuration is enabled allowing 2TB kernel linear mapping. Linux still uses 2 levels of page tables in this configuration with pgd now being a full page. Signed-off-by: Catalin Marinas Acked-by: Will Deacon Acked-by: Marc Zyngier (cherry picked from commit 847264fb7e73ade5b5e4b6eea3daa243a1f5217e) Signed-off-by: Mark Brown --- Documentation/arm64/memory.txt | 27 ++++++++++++++++++- arch/arm64/include/asm/memory.h | 11 +++++--- arch/arm64/include/asm/pgtable-2level-hwdef.h | 4 +-- arch/arm64/include/asm/pgtable.h | 2 +- 4 files changed, 37 insertions(+), 7 deletions(-) diff --git a/Documentation/arm64/memory.txt b/Documentation/arm64/memory.txt index 5f583af0a6e1..a776d7a7c3cc 100644 --- a/Documentation/arm64/memory.txt +++ b/Documentation/arm64/memory.txt @@ -21,7 +21,7 @@ The swapper_pgd_dir address is written to TTBR1 and never written to TTBR0. -AArch64 Linux memory layout: +AArch64 Linux memory layout with 4KB pages: Start End Size Use ----------------------------------------------------------------------- @@ -46,6 +46,31 @@ ffffffbffc000000 ffffffbfffffffff 64MB modules ffffffc000000000 ffffffffffffffff 256GB kernel logical memory map +AArch64 Linux memory layout with 64KB pages: + +Start End Size Use +----------------------------------------------------------------------- +0000000000000000 000003ffffffffff 4TB user + +fffffc0000000000 fffffdfbfffeffff ~2TB vmalloc + +fffffdfbffff0000 fffffdfbffffffff 64KB [guard page] + +fffffdfc00000000 fffffdfdffffffff 8GB vmemmap + +fffffdfe00000000 fffffdfffbbfffff ~8GB [guard, future vmmemap] + +fffffdfffbc00000 fffffdfffbdfffff 2MB earlyprintk device + +fffffdfffbe00000 fffffdfffbe0ffff 64KB PCI I/O space + +fffffdfffbe10000 fffffdfffbffffff ~2MB [guard] + +fffffdfffc000000 fffffdffffffffff 64MB modules + +fffffe0000000000 ffffffffffffffff 2TB kernel logical memory map + + Translation table lookup with 4KB pages: +--------+--------+--------+--------+--------+--------+--------+--------+ diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h index 381f556b664e..9abb57d743b9 100644 --- a/arch/arm64/include/asm/memory.h +++ b/arch/arm64/include/asm/memory.h @@ -33,18 +33,23 @@ #define UL(x) _AC(x, UL) /* - * PAGE_OFFSET - the virtual address of the start of the kernel image. + * PAGE_OFFSET - the virtual address of the start of the kernel image (top + * (VA_BITS - 1)) * VA_BITS - the maximum number of bits for virtual addresses. * TASK_SIZE - the maximum size of a user space task. * TASK_UNMAPPED_BASE - the lower boundary of the mmap VM area. * The module space lives between the addresses given by TASK_SIZE * and PAGE_OFFSET - it must be within 128MB of the kernel text. */ -#define PAGE_OFFSET UL(0xffffffc000000000) +#ifdef CONFIG_ARM64_64K_PAGES +#define VA_BITS (42) +#else +#define VA_BITS (39) +#endif +#define PAGE_OFFSET (UL(0xffffffffffffffff) << (VA_BITS - 1)) #define MODULES_END (PAGE_OFFSET) #define MODULES_VADDR (MODULES_END - SZ_64M) #define EARLYCON_IOBASE (MODULES_VADDR - SZ_4M) -#define VA_BITS (39) #define TASK_SIZE_64 (UL(1) << VA_BITS) #ifdef CONFIG_COMPAT diff --git a/arch/arm64/include/asm/pgtable-2level-hwdef.h b/arch/arm64/include/asm/pgtable-2level-hwdef.h index 0a8ed3f94e93..2593b490c56a 100644 --- a/arch/arm64/include/asm/pgtable-2level-hwdef.h +++ b/arch/arm64/include/asm/pgtable-2level-hwdef.h @@ -21,10 +21,10 @@ * 8192 entries of 8 bytes each, occupying a 64KB page. Levels 0 and 1 are not * used. The 2nd level table (PGD for Linux) can cover a range of 4TB, each * entry representing 512MB. The user and kernel address spaces are limited to - * 512GB and therefore we only use 1024 entries in the PGD. + * 4TB in the 64KB page configuration. */ #define PTRS_PER_PTE 8192 -#define PTRS_PER_PGD 1024 +#define PTRS_PER_PGD 8192 /* * PGDIR_SHIFT determines the size a top-level page table entry can map. diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index 508ee304fde0..608ec24f2f52 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -34,7 +34,7 @@ /* * VMALLOC and SPARSEMEM_VMEMMAP ranges. */ -#define VMALLOC_START UL(0xffffff8000000000) +#define VMALLOC_START (UL(0xffffffffffffffff) << VA_BITS) #define VMALLOC_END (PAGE_OFFSET - UL(0x400000000) - SZ_64K) #define vmemmap ((struct page *)(VMALLOC_END + SZ_64K)) From 3cf0e16070078af9afdd9a067af9ab64f322ebb8 Mon Sep 17 00:00:00 2001 From: "T.J. Purtell" Date: Tue, 5 Nov 2013 17:07:18 +0000 Subject: [PATCH 0978/1368] arm64: compat: Clear the IT state independent of the 32-bit ARM or Thumb-2 mode The ARM architecture reference specifies that the IT state bits in the PSR must be all zeros in ARM mode or behavior is unspecified. If an ARM function is registered as a signal handler, and that signal is delivered inside a block of instructions following an IT instruction, some of the instructions at the beginning of the signal handler may be skipped if the IT state bits of the Program Status Register are not cleared by the kernel. Signed-off-by: T.J. Purtell [catalin.marinas@arm.com: code comment and commit log updated] Signed-off-by: Catalin Marinas (cherry picked from commit aa62c2091129af81a172350b718eb35d5448cebc) Signed-off-by: Mark Brown --- arch/arm64/kernel/signal32.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/arch/arm64/kernel/signal32.c b/arch/arm64/kernel/signal32.c index e393174fe859..3edf7f48c54b 100644 --- a/arch/arm64/kernel/signal32.c +++ b/arch/arm64/kernel/signal32.c @@ -474,12 +474,13 @@ static void compat_setup_return(struct pt_regs *regs, struct k_sigaction *ka, /* Check if the handler is written for ARM or Thumb */ thumb = handler & 1; - if (thumb) { + if (thumb) spsr |= COMPAT_PSR_T_BIT; - spsr &= ~COMPAT_PSR_IT_MASK; - } else { + else spsr &= ~COMPAT_PSR_T_BIT; - } + + /* The IT state must be cleared for both ARM and Thumb-2 */ + spsr &= ~COMPAT_PSR_IT_MASK; if (ka->sa.sa_flags & SA_RESTORER) { retcode = ptr_to_compat(ka->sa.sa_restorer); From ff3986a150bbea5ef6c34b0cecdc23e230ef19ab Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Wed, 6 Nov 2013 11:42:41 +0000 Subject: [PATCH 0979/1368] arm64: locks: Remove CONFIG_GENERIC_LOCKBREAK Commit 52ea2a560a9d (arm64: locks: introduce ticket-based spinlock implementation) introduces the arch_spin_is_contended() function making CONFIG_GENERIC_LOCKBREAK unnecessary. Signed-off-by: Catalin Marinas Acked-by: Will Deacon (cherry picked from commit 61c77e0802719efce8966619cdc4234de7f252c1) Signed-off-by: Mark Brown --- arch/arm64/Kconfig | 4 ---- 1 file changed, 4 deletions(-) diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 9115252e5425..0f24a8515841 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -63,10 +63,6 @@ config LOCKDEP_SUPPORT config TRACE_IRQFLAGS_SUPPORT def_bool y -config GENERIC_LOCKBREAK - def_bool y - depends on SMP && PREEMPT - config RWSEM_GENERIC_SPINLOCK def_bool y From 3b1bf3873e56409bbb5b2ec29d6ba418fd69b9fe Mon Sep 17 00:00:00 2001 From: Sudeep KarkadaNagesha Date: Thu, 7 Nov 2013 15:25:44 +0000 Subject: [PATCH 0980/1368] ARM64: /proc/interrupts: display IPIs of online CPUs only The non-IPI interrupts are displayed only for the online cpus from show_interrupts in kernel/irq/proc.c before calling arch_show_interrupts(). As a result, the column headers and the IPI count don't match if any CPU is offline. This patch fixes show_ipi_list to display IPIs for online CPUs only. Signed-off-by: Sudeep KarkadaNagesha Cc: Will Deacon Signed-off-by: Catalin Marinas (cherry picked from commit 67317c2689567c24d18e0dd43ab6d409fd42dc6e) Signed-off-by: Mark Brown --- arch/arm64/kernel/smp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c index edbfe241eacb..97eaf79319d5 100644 --- a/arch/arm64/kernel/smp.c +++ b/arch/arm64/kernel/smp.c @@ -451,7 +451,7 @@ void show_ipi_list(struct seq_file *p, int prec) for (i = 0; i < NR_IPI; i++) { seq_printf(p, "%*s%u:%s", prec - 1, "IPI", i + IPI_RESCHEDULE, prec >= 4 ? " " : ""); - for_each_present_cpu(cpu) + for_each_online_cpu(cpu) seq_printf(p, "%10u ", __get_irq_stat(cpu, ipi_irqs[i])); seq_printf(p, " %s\n", ipi_types[i]); From 555fdaf51b5022858eb79151af4c780b3f4e6b59 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Mon, 2 Dec 2013 16:11:00 +0000 Subject: [PATCH 0981/1368] arm64: ensure completion of TLB invalidatation Currently there is no dsb between the tlbi in __cpu_setup and the write to SCTLR_EL1 which enables the MMU in __turn_mmu_on. This means that the TLB invalidation is not guaranteed to have completed at the point address translation is enabled, leading to a number of possible issues including incorrect translations and TLB conflict faults. This patch moves the tlbi in __cpu_setup above an existing dsb used to synchronise I-cache invalidation, ensuring that the TLBs have been invalidated at the point the MMU is enabled. Signed-off-by: Mark Rutland Cc: Catalin Marinas Cc: Will Deacon Signed-off-by: Catalin Marinas (cherry picked from commit 3cea71bc6b470372ae407881b87128aadf0afec0) Signed-off-by: Mark Brown --- arch/arm64/mm/proc.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S index b1b31bbc967b..2fbd93e9be15 100644 --- a/arch/arm64/mm/proc.S +++ b/arch/arm64/mm/proc.S @@ -111,12 +111,12 @@ ENTRY(__cpu_setup) bl __flush_dcache_all mov lr, x28 ic iallu // I+BTB cache invalidate + tlbi vmalle1is // invalidate I + D TLBs dsb sy mov x0, #3 << 20 msr cpacr_el1, x0 // Enable FP/ASIMD msr mdscr_el1, xzr // Reset mdscr_el1 - tlbi vmalle1is // invalidate I + D TLBs /* * Memory region attributes for LPAE: * From 8751cf9a6c5f5bf02a2de8a4c025e09b38556df4 Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Fri, 22 Nov 2013 21:07:31 +0000 Subject: [PATCH 0982/1368] arm64: make default NR_CPUS 8 Rather than continue to add per platform defaults, make the default a likely common core count. 8 is also the default for x86. Signed-off-by: Rob Herring Cc: Will Deacon Signed-off-by: Catalin Marinas (cherry picked from commit 62aceb8ff4b3f442575eb7e23629da36020dca77) Signed-off-by: Mark Brown --- arch/arm64/Kconfig | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 0f24a8515841..dba7308041ad 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -152,8 +152,7 @@ config NR_CPUS range 2 32 depends on SMP # These have to remain sorted largest to smallest - default "8" if ARCH_XGENE - default "4" + default "8" source kernel/Kconfig.preempt From edf0cf19ba3b6c750871b391c5edcb679c02daad Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 5 Nov 2013 18:10:47 +0000 Subject: [PATCH 0983/1368] arm64: percpu: implement optimised pcpu access using tpidr_el1 This patch implements optimised percpu variable accesses using the el1 r/w thread register (tpidr_el1) along the same lines as arch/arm/. Signed-off-by: Will Deacon Signed-off-by: Catalin Marinas (cherry picked from commit 7158627686f02319c50c8d9d78f75d4c8d126ff2) Signed-off-by: Mark Brown --- arch/arm64/include/asm/Kbuild | 1 - arch/arm64/include/asm/percpu.h | 41 +++++++++++++++++++++++++++++++++ arch/arm64/kernel/setup.c | 10 ++++++++ arch/arm64/kernel/smp.c | 6 +++-- 4 files changed, 55 insertions(+), 3 deletions(-) create mode 100644 arch/arm64/include/asm/percpu.h diff --git a/arch/arm64/include/asm/Kbuild b/arch/arm64/include/asm/Kbuild index 79a642d199f2..ae0612c4fa45 100644 --- a/arch/arm64/include/asm/Kbuild +++ b/arch/arm64/include/asm/Kbuild @@ -26,7 +26,6 @@ generic-y += mman.h generic-y += msgbuf.h generic-y += mutex.h generic-y += pci.h -generic-y += percpu.h generic-y += poll.h generic-y += posix_types.h generic-y += resource.h diff --git a/arch/arm64/include/asm/percpu.h b/arch/arm64/include/asm/percpu.h new file mode 100644 index 000000000000..13fb0b3efc5f --- /dev/null +++ b/arch/arm64/include/asm/percpu.h @@ -0,0 +1,41 @@ +/* + * Copyright (C) 2013 ARM Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ +#ifndef __ASM_PERCPU_H +#define __ASM_PERCPU_H + +static inline void set_my_cpu_offset(unsigned long off) +{ + asm volatile("msr tpidr_el1, %0" :: "r" (off) : "memory"); +} + +static inline unsigned long __my_cpu_offset(void) +{ + unsigned long off; + register unsigned long *sp asm ("sp"); + + /* + * We want to allow caching the value, so avoid using volatile and + * instead use a fake stack read to hazard against barrier(). + */ + asm("mrs %0, tpidr_el1" : "=r" (off) : "Q" (*sp)); + + return off; +} +#define __my_cpu_offset __my_cpu_offset() + +#include + +#endif /* __ASM_PERCPU_H */ diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c index 3bab6766eed5..43c89cbb30fc 100644 --- a/arch/arm64/kernel/setup.c +++ b/arch/arm64/kernel/setup.c @@ -98,6 +98,16 @@ void __init early_print(const char *str, ...) printk("%s", buf); } +void __init smp_setup_processor_id(void) +{ + /* + * clear __my_cpu_offset on boot CPU to avoid hang caused by + * using percpu variable early, for example, lockdep will + * access percpu variable inside lock_release + */ + set_my_cpu_offset(0); +} + bool arch_match_cpu_phys_id(int cpu, u64 phys_id) { return phys_id == cpu_logical_map(cpu); diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c index 53ed158ec31c..08030da7f3c2 100644 --- a/arch/arm64/kernel/smp.c +++ b/arch/arm64/kernel/smp.c @@ -122,8 +122,6 @@ asmlinkage void __cpuinit secondary_start_kernel(void) struct mm_struct *mm = &init_mm; unsigned int cpu = smp_processor_id(); - printk("CPU%u: Booted secondary processor\n", cpu); - /* * All kernel threads share the same mm context; grab a * reference and switch to it. @@ -132,6 +130,9 @@ asmlinkage void __cpuinit secondary_start_kernel(void) current->active_mm = mm; cpumask_set_cpu(cpu, mm_cpumask(mm)); + set_my_cpu_offset(per_cpu_offset(smp_processor_id())); + printk("CPU%u: Booted secondary processor\n", cpu); + /* * TTBR0 is only used for the identity mapping at this stage. Make it * point to zero page to avoid speculatively fetching new entries. @@ -274,6 +275,7 @@ void __init smp_cpus_done(unsigned int max_cpus) void __init smp_prepare_boot_cpu(void) { + set_my_cpu_offset(per_cpu_offset(smp_processor_id())); } static void (*smp_cross_call)(const struct cpumask *, unsigned int); From 18ce3c2a82fb1a7461b8d63cd54a40a47067be47 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Wed, 6 Nov 2013 17:20:22 +0000 Subject: [PATCH 0984/1368] arm64: use generic strnlen_user and strncpy_from_user functions This patch implements the word-at-a-time interface for arm64 using the same algorithm as ARM. We use the fls64 macro, which expands to a clz instruction via a compiler builtin. Big-endian configurations make use of the implementation from asm-generic. With this implemented, we can replace our byte-at-a-time strnlen_user and strncpy_from_user functions with the optimised generic versions. Signed-off-by: Will Deacon Signed-off-by: Catalin Marinas (cherry picked from commit 12a0ef7b0ac38677bd2d85f33df5ca0a57868819) Signed-off-by: Mark Brown --- arch/arm64/Kconfig | 2 + arch/arm64/include/asm/uaccess.h | 25 ++---------- arch/arm64/include/asm/word-at-a-time.h | 54 +++++++++++++++++++++++++ arch/arm64/kernel/arm64ksyms.c | 5 +-- arch/arm64/lib/Makefile | 8 ++-- arch/arm64/lib/strncpy_from_user.S | 50 ----------------------- arch/arm64/lib/strnlen_user.S | 47 --------------------- 7 files changed, 64 insertions(+), 127 deletions(-) create mode 100644 arch/arm64/include/asm/word-at-a-time.h delete mode 100644 arch/arm64/lib/strncpy_from_user.S delete mode 100644 arch/arm64/lib/strnlen_user.S diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 40bbfe560551..8d555ca7cdfa 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -16,6 +16,8 @@ config ARM64 select GENERIC_IRQ_PROBE select GENERIC_IRQ_SHOW select GENERIC_SMP_IDLE_THREAD + select GENERIC_STRNCPY_FROM_USER + select GENERIC_STRNLEN_USER select GENERIC_TIME_VSYSCALL select HARDIRQS_SW_RESEND select HAVE_ARCH_TRACEHOOK diff --git a/arch/arm64/include/asm/uaccess.h b/arch/arm64/include/asm/uaccess.h index 008f8481da65..8b181415e843 100644 --- a/arch/arm64/include/asm/uaccess.h +++ b/arch/arm64/include/asm/uaccess.h @@ -100,6 +100,7 @@ static inline void set_fs(mm_segment_t fs) }) #define access_ok(type, addr, size) __range_ok(addr, size) +#define user_addr_max get_fs /* * The "__xxx" versions of the user access functions do not verify the address @@ -238,9 +239,6 @@ extern unsigned long __must_check __copy_to_user(void __user *to, const void *fr extern unsigned long __must_check __copy_in_user(void __user *to, const void __user *from, unsigned long n); extern unsigned long __must_check __clear_user(void __user *addr, unsigned long n); -extern unsigned long __must_check __strncpy_from_user(char *to, const char __user *from, unsigned long count); -extern unsigned long __must_check __strnlen_user(const char __user *s, long n); - static inline unsigned long __must_check copy_from_user(void *to, const void __user *from, unsigned long n) { if (access_ok(VERIFY_READ, from, n)) @@ -274,24 +272,9 @@ static inline unsigned long __must_check clear_user(void __user *to, unsigned lo return n; } -static inline long __must_check strncpy_from_user(char *dst, const char __user *src, long count) -{ - long res = -EFAULT; - if (access_ok(VERIFY_READ, src, 1)) - res = __strncpy_from_user(dst, src, count); - return res; -} +extern long strncpy_from_user(char *dest, const char __user *src, long count); -#define strlen_user(s) strnlen_user(s, ~0UL >> 1) - -static inline long __must_check strnlen_user(const char __user *s, long n) -{ - unsigned long res = 0; - - if (__addr_ok(s)) - res = __strnlen_user(s, n); - - return res; -} +extern __must_check long strlen_user(const char __user *str); +extern __must_check long strnlen_user(const char __user *str, long n); #endif /* __ASM_UACCESS_H */ diff --git a/arch/arm64/include/asm/word-at-a-time.h b/arch/arm64/include/asm/word-at-a-time.h new file mode 100644 index 000000000000..27a167d044d9 --- /dev/null +++ b/arch/arm64/include/asm/word-at-a-time.h @@ -0,0 +1,54 @@ +/* + * Copyright (C) 2013 ARM Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ +#ifndef __ASM_WORD_AT_A_TIME_H +#define __ASM_WORD_AT_A_TIME_H + +#ifndef __AARCH64EB__ + +#include + +struct word_at_a_time { + const unsigned long one_bits, high_bits; +}; + +#define WORD_AT_A_TIME_CONSTANTS { REPEAT_BYTE(0x01), REPEAT_BYTE(0x80) } + +static inline unsigned long has_zero(unsigned long a, unsigned long *bits, + const struct word_at_a_time *c) +{ + unsigned long mask = ((a - c->one_bits) & ~a) & c->high_bits; + *bits = mask; + return mask; +} + +#define prep_zero_mask(a, bits, c) (bits) + +static inline unsigned long create_zero_mask(unsigned long bits) +{ + bits = (bits - 1) & ~bits; + return bits >> 7; +} + +static inline unsigned long find_zero(unsigned long mask) +{ + return fls64(mask) >> 3; +} + +#else /* __AARCH64EB__ */ +#include +#endif + +#endif /* __ASM_WORD_AT_A_TIME_H */ diff --git a/arch/arm64/kernel/arm64ksyms.c b/arch/arm64/kernel/arm64ksyms.c index e7ee770c0697..338b568cd8ae 100644 --- a/arch/arm64/kernel/arm64ksyms.c +++ b/arch/arm64/kernel/arm64ksyms.c @@ -29,13 +29,10 @@ #include - /* user mem (segment) */ -EXPORT_SYMBOL(__strnlen_user); -EXPORT_SYMBOL(__strncpy_from_user); - EXPORT_SYMBOL(copy_page); EXPORT_SYMBOL(clear_page); + /* user mem (segment) */ EXPORT_SYMBOL(__copy_from_user); EXPORT_SYMBOL(__copy_to_user); EXPORT_SYMBOL(__clear_user); diff --git a/arch/arm64/lib/Makefile b/arch/arm64/lib/Makefile index 59acc0ef0462..328ce1a99daa 100644 --- a/arch/arm64/lib/Makefile +++ b/arch/arm64/lib/Makefile @@ -1,6 +1,4 @@ -lib-y := bitops.o delay.o \ - strncpy_from_user.o strnlen_user.o clear_user.o \ - copy_from_user.o copy_to_user.o copy_in_user.o \ - copy_page.o clear_page.o \ - memchr.o memcpy.o memmove.o memset.o \ +lib-y := bitops.o clear_user.o delay.o copy_from_user.o \ + copy_to_user.o copy_in_user.o copy_page.o \ + clear_page.o memchr.o memcpy.o memmove.o memset.o \ strchr.o strrchr.o diff --git a/arch/arm64/lib/strncpy_from_user.S b/arch/arm64/lib/strncpy_from_user.S deleted file mode 100644 index 56e448a831a0..000000000000 --- a/arch/arm64/lib/strncpy_from_user.S +++ /dev/null @@ -1,50 +0,0 @@ -/* - * Based on arch/arm/lib/strncpy_from_user.S - * - * Copyright (C) 1995-2000 Russell King - * Copyright (C) 2012 ARM Ltd. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ - -#include -#include -#include - - .text - .align 5 - -/* - * Copy a string from user space to kernel space. - * x0 = dst, x1 = src, x2 = byte length - * returns the number of characters copied (strlen of copied string), - * -EFAULT on exception, or "len" if we fill the whole buffer - */ -ENTRY(__strncpy_from_user) - mov x4, x1 -1: subs x2, x2, #1 - bmi 2f -USER(9f, ldrb w3, [x1], #1 ) - strb w3, [x0], #1 - cbnz w3, 1b - sub x1, x1, #1 // take NUL character out of count -2: sub x0, x1, x4 - ret -ENDPROC(__strncpy_from_user) - - .section .fixup,"ax" - .align 0 -9: strb wzr, [x0] // null terminate - mov x0, #-EFAULT - ret - .previous diff --git a/arch/arm64/lib/strnlen_user.S b/arch/arm64/lib/strnlen_user.S deleted file mode 100644 index 7f7b176a5646..000000000000 --- a/arch/arm64/lib/strnlen_user.S +++ /dev/null @@ -1,47 +0,0 @@ -/* - * Based on arch/arm/lib/strnlen_user.S - * - * Copyright (C) 1995-2000 Russell King - * Copyright (C) 2012 ARM Ltd. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ - -#include -#include -#include - - .text - .align 5 - -/* Prototype: unsigned long __strnlen_user(const char *str, long n) - * Purpose : get length of a string in user memory - * Params : str - address of string in user memory - * Returns : length of string *including terminator* - * or zero on exception, or n if too long - */ -ENTRY(__strnlen_user) - mov x2, x0 -1: subs x1, x1, #1 - b.mi 2f -USER(9f, ldrb w3, [x0], #1 ) - cbnz w3, 1b -2: sub x0, x0, x2 - ret -ENDPROC(__strnlen_user) - - .section .fixup,"ax" - .align 0 -9: mov x0, #0 - ret - .previous From a9e7bd3bdf50acc92e9e04c56d06026c70f515ef Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Wed, 6 Nov 2013 19:31:24 +0000 Subject: [PATCH 0985/1368] arm64: futex: ensure .fixup entries are sufficiently aligned AArch64 instructions must be 4-byte aligned, so make sure this is true for the futex .fixup section. Signed-off-by: Will Deacon Signed-off-by: Catalin Marinas (cherry picked from commit 4da7a56c59f28e27e8dcff61b5d7b05f6e203606) Signed-off-by: Mark Brown --- arch/arm64/include/asm/futex.h | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm64/include/asm/futex.h b/arch/arm64/include/asm/futex.h index c582fa316366..78cc3aba5d69 100644 --- a/arch/arm64/include/asm/futex.h +++ b/arch/arm64/include/asm/futex.h @@ -30,6 +30,7 @@ " cbnz %w3, 1b\n" \ "3:\n" \ " .pushsection .fixup,\"ax\"\n" \ +" .align 2\n" \ "4: mov %w0, %w5\n" \ " b 3b\n" \ " .popsection\n" \ From 43296ae8c72a377a4af761de90de4de7cde90ce9 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Wed, 6 Nov 2013 19:32:13 +0000 Subject: [PATCH 0986/1368] arm64: dcache: select DCACHE_WORD_ACCESS for little-endian CPUs DCACHE_WORD_ACCESS uses the word-at-a-time API for optimised string comparisons in the vfs layer. This patch implements support for load_unaligned_zeropad in much the same way as has been done for ARM, although big-endian systems are also supported. Signed-off-by: Will Deacon Signed-off-by: Catalin Marinas (cherry picked from commit 7bc13fd33adb9536bd73965cd46bbf7377df097c) Signed-off-by: Mark Brown --- arch/arm64/Kconfig | 1 + arch/arm64/include/asm/word-at-a-time.h | 40 +++++++++++++++++++++++++ 2 files changed, 41 insertions(+) diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 8d555ca7cdfa..075075f9d247 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -11,6 +11,7 @@ config ARM64 select BUILDTIME_EXTABLE_SORT select CLONE_BACKWARDS select COMMON_CLK + select DCACHE_WORD_ACCESS select GENERIC_CLOCKEVENTS select GENERIC_IOMAP select GENERIC_IRQ_PROBE diff --git a/arch/arm64/include/asm/word-at-a-time.h b/arch/arm64/include/asm/word-at-a-time.h index 27a167d044d9..aab5bf09e9d9 100644 --- a/arch/arm64/include/asm/word-at-a-time.h +++ b/arch/arm64/include/asm/word-at-a-time.h @@ -47,8 +47,48 @@ static inline unsigned long find_zero(unsigned long mask) return fls64(mask) >> 3; } +#define zero_bytemask(mask) (mask) + #else /* __AARCH64EB__ */ #include #endif +/* + * Load an unaligned word from kernel space. + * + * In the (very unlikely) case of the word being a page-crosser + * and the next page not being mapped, take the exception and + * return zeroes in the non-existing part. + */ +static inline unsigned long load_unaligned_zeropad(const void *addr) +{ + unsigned long ret, offset; + + /* Load word from unaligned pointer addr */ + asm( + "1: ldr %0, %3\n" + "2:\n" + " .pushsection .fixup,\"ax\"\n" + " .align 2\n" + "3: and %1, %2, #0x7\n" + " bic %2, %2, #0x7\n" + " ldr %0, [%2]\n" + " lsl %1, %1, #0x3\n" +#ifndef __AARCH64EB__ + " lsr %0, %0, %1\n" +#else + " lsl %0, %0, %1\n" +#endif + " b 2b\n" + " .popsection\n" + " .pushsection __ex_table,\"a\"\n" + " .align 3\n" + " .quad 1b, 3b\n" + " .popsection" + : "=&r" (ret), "=&r" (offset) + : "r" (addr), "Q" (*(unsigned long *)addr)); + + return ret; +} + #endif /* __ASM_WORD_AT_A_TIME_H */ From e342d91c37452299405e70e80e4615890dec1c10 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Mon, 16 Dec 2013 17:50:08 +0000 Subject: [PATCH 0987/1368] arm64: kconfig: select HAVE_EFFICIENT_UNALIGNED_ACCESS ARMv8 CPUs can perform efficient unaligned memory accesses in hardware and this feature is relied up on by code such as the dcache word-at-a-time name hashing. This patch selects HAVE_EFFICIENT_UNALIGNED_ACCESS for arm64. Signed-off-by: Will Deacon Signed-off-by: Catalin Marinas (cherry picked from commit 50afc33a90e710c02d9bbf2f3673936365f0e690) Signed-off-by: Mark Brown --- arch/arm64/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 075075f9d247..61ef3f0de6ee 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -26,6 +26,7 @@ config ARM64 select HAVE_DEBUG_KMEMLEAK select HAVE_DMA_API_DEBUG select HAVE_DMA_ATTRS + select HAVE_EFFICIENT_UNALIGNED_ACCESS select HAVE_GENERIC_DMA_COHERENT select HAVE_GENERIC_HARDIRQS select HAVE_HW_BREAKPOINT if PERF_EVENTS From a62c1e2a0a220b20d942e0cc0ca204347b7b7aa6 Mon Sep 17 00:00:00 2001 From: Konstantin Khlebnikov Date: Thu, 5 Dec 2013 13:30:10 +0000 Subject: [PATCH 0988/1368] ARM64: check stack pointer in get_wchan get_wchan() is lockless. Task may wakeup at any time and change its own stack, thus each next stack frame may be overwritten and filled with random stuff. Signed-off-by: Konstantin Khlebnikov Signed-off-by: Catalin Marinas (cherry picked from commit 408c3658b0d49315974ce8b5aed385c8e1527595) Signed-off-by: Mark Brown --- arch/arm64/kernel/process.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c index adc68bf21d8b..02a41bba165d 100644 --- a/arch/arm64/kernel/process.c +++ b/arch/arm64/kernel/process.c @@ -308,6 +308,7 @@ struct task_struct *__switch_to(struct task_struct *prev, unsigned long get_wchan(struct task_struct *p) { struct stackframe frame; + unsigned long stack_page; int count = 0; if (!p || p == current || p->state == TASK_RUNNING) return 0; @@ -315,9 +316,11 @@ unsigned long get_wchan(struct task_struct *p) frame.fp = thread_saved_fp(p); frame.sp = thread_saved_sp(p); frame.pc = thread_saved_pc(p); + stack_page = (unsigned long)task_stack_page(p); do { - int ret = unwind_frame(&frame); - if (ret < 0) + if (frame.sp < stack_page || + frame.sp >= stack_page + THREAD_SIZE || + unwind_frame(&frame)) return 0; if (!in_sched_functions(frame.pc)) return frame.pc; From 75448bec59b915885b3e84bca0d38acf34b5db61 Mon Sep 17 00:00:00 2001 From: Konstantin Khlebnikov Date: Thu, 5 Dec 2013 13:30:16 +0000 Subject: [PATCH 0989/1368] ARM64: fix framepointer check in unwind_frame We need at least 24 bytes above frame pointer. Signed-off-by: Konstantin Khlebnikov Signed-off-by: Catalin Marinas (cherry picked from commit 26920dd2da79a3207803da9453c0e6c82ac968ca) Signed-off-by: Mark Brown --- arch/arm64/kernel/stacktrace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/kernel/stacktrace.c b/arch/arm64/kernel/stacktrace.c index 048334bb2651..38f0558f0c0a 100644 --- a/arch/arm64/kernel/stacktrace.c +++ b/arch/arm64/kernel/stacktrace.c @@ -43,7 +43,7 @@ int unwind_frame(struct stackframe *frame) low = frame->sp; high = ALIGN(low, THREAD_SIZE); - if (fp < low || fp > high || fp & 0xf) + if (fp < low || fp > high - 0x18 || fp & 0xf) return -EINVAL; frame->sp = fp + 0x10; From eec87ea4cbd500f024960831946d083cad1885f1 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Sat, 16 Mar 2013 08:48:13 +0000 Subject: [PATCH 0990/1368] arm64: debug: consolidate software breakpoint handlers The software breakpoint handlers are hooked in directly from ptrace, which makes it difficult to add additional handlers for things like kprobes and kgdb. This patch moves the handling code into debug-monitors.c, where we can dispatch to different debug subsystems more easily. Signed-off-by: Will Deacon Signed-off-by: Catalin Marinas (cherry picked from commit 1442b6ed249d2b3d2cfcf45b65ac64393495c96c) Signed-off-by: Mark Brown --- arch/arm64/include/asm/debug-monitors.h | 9 ++++ arch/arm64/include/asm/ptrace.h | 2 - arch/arm64/kernel/debug-monitors.c | 66 ++++++++++++++++++++++++- arch/arm64/kernel/ptrace.c | 59 ---------------------- arch/arm64/kernel/traps.c | 5 +- 5 files changed, 75 insertions(+), 66 deletions(-) diff --git a/arch/arm64/include/asm/debug-monitors.h b/arch/arm64/include/asm/debug-monitors.h index 7eaa0b302493..ef8235c68c09 100644 --- a/arch/arm64/include/asm/debug-monitors.h +++ b/arch/arm64/include/asm/debug-monitors.h @@ -83,6 +83,15 @@ static inline int reinstall_suspended_bps(struct pt_regs *regs) } #endif +#ifdef CONFIG_COMPAT +int aarch32_break_handler(struct pt_regs *regs); +#else +static int aarch32_break_handler(struct pt_regs *regs) +{ + return -EFAULT; +} +#endif + #endif /* __ASSEMBLY */ #endif /* __KERNEL__ */ #endif /* __ASM_DEBUG_MONITORS_H */ diff --git a/arch/arm64/include/asm/ptrace.h b/arch/arm64/include/asm/ptrace.h index 41a71ee4c3df..0dacbbf9458b 100644 --- a/arch/arm64/include/asm/ptrace.h +++ b/arch/arm64/include/asm/ptrace.h @@ -171,7 +171,5 @@ extern unsigned long profile_pc(struct pt_regs *regs); #define profile_pc(regs) instruction_pointer(regs) #endif -extern int aarch32_break_trap(struct pt_regs *regs); - #endif /* __ASSEMBLY__ */ #endif diff --git a/arch/arm64/kernel/debug-monitors.c b/arch/arm64/kernel/debug-monitors.c index f4726dc054b3..08018e3df580 100644 --- a/arch/arm64/kernel/debug-monitors.c +++ b/arch/arm64/kernel/debug-monitors.c @@ -24,6 +24,7 @@ #include #include #include +#include #include #include @@ -226,13 +227,74 @@ static int single_step_handler(unsigned long addr, unsigned int esr, return 0; } -static int __init single_step_init(void) +static int brk_handler(unsigned long addr, unsigned int esr, + struct pt_regs *regs) +{ + siginfo_t info; + + if (!user_mode(regs)) + return -EFAULT; + + info = (siginfo_t) { + .si_signo = SIGTRAP, + .si_errno = 0, + .si_code = TRAP_BRKPT, + .si_addr = (void __user *)instruction_pointer(regs), + }; + + force_sig_info(SIGTRAP, &info, current); + return 0; +} + +int aarch32_break_handler(struct pt_regs *regs) +{ + siginfo_t info; + unsigned int instr; + bool bp = false; + void __user *pc = (void __user *)instruction_pointer(regs); + + if (!compat_user_mode(regs)) + return -EFAULT; + + if (compat_thumb_mode(regs)) { + /* get 16-bit Thumb instruction */ + get_user(instr, (u16 __user *)pc); + if (instr == AARCH32_BREAK_THUMB2_LO) { + /* get second half of 32-bit Thumb-2 instruction */ + get_user(instr, (u16 __user *)(pc + 2)); + bp = instr == AARCH32_BREAK_THUMB2_HI; + } else { + bp = instr == AARCH32_BREAK_THUMB; + } + } else { + /* 32-bit ARM instruction */ + get_user(instr, (u32 __user *)pc); + bp = (instr & ~0xf0000000) == AARCH32_BREAK_ARM; + } + + if (!bp) + return -EFAULT; + + info = (siginfo_t) { + .si_signo = SIGTRAP, + .si_errno = 0, + .si_code = TRAP_BRKPT, + .si_addr = pc, + }; + + force_sig_info(SIGTRAP, &info, current); + return 0; +} + +static int __init debug_traps_init(void) { hook_debug_fault_code(DBG_ESR_EVT_HWSS, single_step_handler, SIGTRAP, TRAP_HWBKPT, "single-step handler"); + hook_debug_fault_code(DBG_ESR_EVT_BRK, brk_handler, SIGTRAP, + TRAP_BRKPT, "ptrace BRK handler"); return 0; } -arch_initcall(single_step_init); +arch_initcall(debug_traps_init); /* Re-enable single step for syscall restarting. */ void user_rewind_single_step(struct task_struct *task) diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c index 6e1e77f1831c..fecdbf7de82e 100644 --- a/arch/arm64/kernel/ptrace.c +++ b/arch/arm64/kernel/ptrace.c @@ -53,28 +53,6 @@ void ptrace_disable(struct task_struct *child) { } -/* - * Handle hitting a breakpoint. - */ -static int ptrace_break(struct pt_regs *regs) -{ - siginfo_t info = { - .si_signo = SIGTRAP, - .si_errno = 0, - .si_code = TRAP_BRKPT, - .si_addr = (void __user *)instruction_pointer(regs), - }; - - force_sig_info(SIGTRAP, &info, current); - return 0; -} - -static int arm64_break_trap(unsigned long addr, unsigned int esr, - struct pt_regs *regs) -{ - return ptrace_break(regs); -} - #ifdef CONFIG_HAVE_HW_BREAKPOINT /* * Handle hitting a HW-breakpoint. @@ -817,33 +795,6 @@ static const struct user_regset_view user_aarch32_view = { .regsets = aarch32_regsets, .n = ARRAY_SIZE(aarch32_regsets) }; -int aarch32_break_trap(struct pt_regs *regs) -{ - unsigned int instr; - bool bp = false; - void __user *pc = (void __user *)instruction_pointer(regs); - - if (compat_thumb_mode(regs)) { - /* get 16-bit Thumb instruction */ - get_user(instr, (u16 __user *)pc); - if (instr == AARCH32_BREAK_THUMB2_LO) { - /* get second half of 32-bit Thumb-2 instruction */ - get_user(instr, (u16 __user *)(pc + 2)); - bp = instr == AARCH32_BREAK_THUMB2_HI; - } else { - bp = instr == AARCH32_BREAK_THUMB; - } - } else { - /* 32-bit ARM instruction */ - get_user(instr, (u32 __user *)pc); - bp = (instr & ~0xf0000000) == AARCH32_BREAK_ARM; - } - - if (bp) - return ptrace_break(regs); - return 1; -} - static int compat_ptrace_read_user(struct task_struct *tsk, compat_ulong_t off, compat_ulong_t __user *ret) { @@ -1111,16 +1062,6 @@ long arch_ptrace(struct task_struct *child, long request, return ptrace_request(child, request, addr, data); } - -static int __init ptrace_break_init(void) -{ - hook_debug_fault_code(DBG_ESR_EVT_BRK, arm64_break_trap, SIGTRAP, - TRAP_BRKPT, "ptrace BRK handler"); - return 0; -} -core_initcall(ptrace_break_init); - - asmlinkage int syscall_trace(int dir, struct pt_regs *regs) { unsigned long saved_reg; diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c index f30852d28590..7ffadddb645d 100644 --- a/arch/arm64/kernel/traps.c +++ b/arch/arm64/kernel/traps.c @@ -32,6 +32,7 @@ #include #include +#include #include #include #include @@ -261,11 +262,9 @@ asmlinkage void __exception do_undefinstr(struct pt_regs *regs) siginfo_t info; void __user *pc = (void __user *)instruction_pointer(regs); -#ifdef CONFIG_COMPAT /* check for AArch32 breakpoint instructions */ - if (compat_user_mode(regs) && aarch32_break_trap(regs) == 0) + if (!aarch32_break_handler(regs)) return; -#endif if (show_unhandled_signals && unhandled_signal(current, SIGILL) && printk_ratelimit()) { From a6e15359d5c2aa6dd7f1eb0830a341c15e7c55b3 Mon Sep 17 00:00:00 2001 From: Sandeepa Prabhu Date: Wed, 4 Dec 2013 05:50:20 +0000 Subject: [PATCH 0991/1368] arm64: support single-step and breakpoint handler hooks AArch64 Single Steping and Breakpoint debug exceptions will be used by multiple debug framworks like kprobes & kgdb. This patch implements the hooks for those frameworks to register their own handlers for handling breakpoint and single step events. Reworked the debug exception handler in entry.S: do_dbg to route software breakpoint (BRK64) exception to do_debug_exception() Signed-off-by: Sandeepa Prabhu Signed-off-by: Deepak Saxena Acked-by: Will Deacon Signed-off-by: Catalin Marinas (cherry picked from commit ee6214cec7818867f368c35843ea1f3dffcbb57c) Signed-off-by: Mark Brown --- arch/arm64/include/asm/debug-monitors.h | 21 ++++++ arch/arm64/kernel/debug-monitors.c | 88 ++++++++++++++++++++++++- arch/arm64/kernel/entry.S | 2 + 3 files changed, 110 insertions(+), 1 deletion(-) diff --git a/arch/arm64/include/asm/debug-monitors.h b/arch/arm64/include/asm/debug-monitors.h index ef8235c68c09..1bc1c4f15c14 100644 --- a/arch/arm64/include/asm/debug-monitors.h +++ b/arch/arm64/include/asm/debug-monitors.h @@ -62,6 +62,27 @@ struct task_struct; #define DBG_ARCH_ID_RESERVED 0 /* In case of ptrace ABI updates. */ +#define DBG_HOOK_HANDLED 0 +#define DBG_HOOK_ERROR 1 + +struct step_hook { + struct list_head node; + int (*fn)(struct pt_regs *regs, unsigned int esr); +}; + +void register_step_hook(struct step_hook *hook); +void unregister_step_hook(struct step_hook *hook); + +struct break_hook { + struct list_head node; + u32 esr_val; + u32 esr_mask; + int (*fn)(struct pt_regs *regs, unsigned int esr); +}; + +void register_break_hook(struct break_hook *hook); +void unregister_break_hook(struct break_hook *hook); + u8 debug_monitors_arch(void); void enable_debug_monitors(enum debug_el el); diff --git a/arch/arm64/kernel/debug-monitors.c b/arch/arm64/kernel/debug-monitors.c index 08018e3df580..2867749b56be 100644 --- a/arch/arm64/kernel/debug-monitors.c +++ b/arch/arm64/kernel/debug-monitors.c @@ -188,6 +188,48 @@ static void clear_regs_spsr_ss(struct pt_regs *regs) regs->pstate = spsr; } +/* EL1 Single Step Handler hooks */ +static LIST_HEAD(step_hook); +DEFINE_RWLOCK(step_hook_lock); + +void register_step_hook(struct step_hook *hook) +{ + write_lock(&step_hook_lock); + list_add(&hook->node, &step_hook); + write_unlock(&step_hook_lock); +} + +void unregister_step_hook(struct step_hook *hook) +{ + write_lock(&step_hook_lock); + list_del(&hook->node); + write_unlock(&step_hook_lock); +} + +/* + * Call registered single step handers + * There is no Syndrome info to check for determining the handler. + * So we call all the registered handlers, until the right handler is + * found which returns zero. + */ +static int call_step_hook(struct pt_regs *regs, unsigned int esr) +{ + struct step_hook *hook; + int retval = DBG_HOOK_ERROR; + + read_lock(&step_hook_lock); + + list_for_each_entry(hook, &step_hook, node) { + retval = hook->fn(regs, esr); + if (retval == DBG_HOOK_HANDLED) + break; + } + + read_unlock(&step_hook_lock); + + return retval; +} + static int single_step_handler(unsigned long addr, unsigned int esr, struct pt_regs *regs) { @@ -215,7 +257,9 @@ static int single_step_handler(unsigned long addr, unsigned int esr, */ user_rewind_single_step(current); } else { - /* TODO: route to KGDB */ + if (call_step_hook(regs, esr) == DBG_HOOK_HANDLED) + return 0; + pr_warning("Unexpected kernel single-step exception at EL1\n"); /* * Re-enable stepping since we know that we will be @@ -227,11 +271,53 @@ static int single_step_handler(unsigned long addr, unsigned int esr, return 0; } +/* + * Breakpoint handler is re-entrant as another breakpoint can + * hit within breakpoint handler, especically in kprobes. + * Use reader/writer locks instead of plain spinlock. + */ +static LIST_HEAD(break_hook); +DEFINE_RWLOCK(break_hook_lock); + +void register_break_hook(struct break_hook *hook) +{ + write_lock(&break_hook_lock); + list_add(&hook->node, &break_hook); + write_unlock(&break_hook_lock); +} + +void unregister_break_hook(struct break_hook *hook) +{ + write_lock(&break_hook_lock); + list_del(&hook->node); + write_unlock(&break_hook_lock); +} + +static int call_break_hook(struct pt_regs *regs, unsigned int esr) +{ + struct break_hook *hook; + int (*fn)(struct pt_regs *regs, unsigned int esr) = NULL; + + read_lock(&break_hook_lock); + list_for_each_entry(hook, &break_hook, node) + if ((esr & hook->esr_mask) == hook->esr_val) + fn = hook->fn; + read_unlock(&break_hook_lock); + + return fn ? fn(regs, esr) : DBG_HOOK_ERROR; +} + static int brk_handler(unsigned long addr, unsigned int esr, struct pt_regs *regs) { siginfo_t info; + if (call_break_hook(regs, esr) == DBG_HOOK_HANDLED) + return 0; + + pr_warn("unexpected brk exception at %lx, esr=0x%x\n", + (long)instruction_pointer(regs), esr); + if (!user_mode(regs)) return -EFAULT; diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index 1d1314280a03..6d5f1cb01b11 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -288,6 +288,8 @@ el1_dbg: /* * Debug exception handling */ + cmp x24, #ESR_EL1_EC_BRK64 // if BRK64 + cinc x24, x24, eq // set bit '0' tbz x24, #0, el1_inv // EL1 only mrs x0, far_el1 mov x2, sp // struct pt_regs From c2f82992879739219a04d36de1cd6bbea1753cfa Mon Sep 17 00:00:00 2001 From: Liviu Dudau Date: Tue, 17 Dec 2013 18:19:46 +0000 Subject: [PATCH 0992/1368] arm64: Remove outdated comment Code referenced in the comment has moved to arch/arm64/kernel/cputable.c Signed-off-by: Liviu Dudau Signed-off-by: Catalin Marinas (cherry picked from commit 81cac699440fc3707fd80f16bf34a7e506d41487) Signed-off-by: Mark Brown --- arch/arm64/kernel/setup.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c index 43c89cbb30fc..bb99e3a5ac56 100644 --- a/arch/arm64/kernel/setup.c +++ b/arch/arm64/kernel/setup.c @@ -117,11 +117,6 @@ static void __init setup_processor(void) { struct cpu_info *cpu_info; - /* - * locate processor in the list of supported processor - * types. The linker builds this table for us from the - * entries in arch/arm/mm/proc.S - */ cpu_info = lookup_processor_type(read_cpuid_id()); if (!cpu_info) { printk("CPU configuration botched (ID %08x), unable to continue.\n", From 5c1bcd9ba5ff1db5eeac2de987945fcf7e77677a Mon Sep 17 00:00:00 2001 From: Geoff Levand Date: Sat, 14 Dec 2013 00:20:13 +0000 Subject: [PATCH 0993/1368] arm64: Remove unused __data_loc variable The __data_loc variable is an unused left over from the 32 bit arm implementation. Remove that variable and adjust the __mmap_switched startup routine accordingly. Signed-off-by: Geoff Levand for Huawei, Linaro Signed-off-by: Catalin Marinas (cherry picked from commit b22cf637bbaf99d4caf9908997a32f91cdcfae52) Signed-off-by: Mark Brown --- arch/arm64/kernel/head.S | 10 ---------- arch/arm64/kernel/vmlinux.lds.S | 2 -- 2 files changed, 12 deletions(-) diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S index bf7efdf2d7e7..a0c10594f628 100644 --- a/arch/arm64/kernel/head.S +++ b/arch/arm64/kernel/head.S @@ -454,8 +454,6 @@ ENDPROC(__create_page_tables) .type __switch_data, %object __switch_data: .quad __mmap_switched - .quad __data_loc // x4 - .quad _data // x5 .quad __bss_start // x6 .quad _end // x7 .quad processor_id // x4 @@ -470,15 +468,7 @@ __switch_data: __mmap_switched: adr x3, __switch_data + 8 - ldp x4, x5, [x3], #16 ldp x6, x7, [x3], #16 - cmp x4, x5 // Copy data segment if needed -1: ccmp x5, x6, #4, ne - b.eq 2f - ldr x16, [x4], #8 - str x16, [x5], #8 - b 1b -2: 1: cmp x6, x7 b.hs 2f str xzr, [x6], #8 // Clear BSS diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S index af46c8e13737..18a08e10357f 100644 --- a/arch/arm64/kernel/vmlinux.lds.S +++ b/arch/arm64/kernel/vmlinux.lds.S @@ -85,11 +85,9 @@ SECTIONS . = ALIGN(PAGE_SIZE); _data = .; - __data_loc = _data - LOAD_OFFSET; _sdata = .; RW_DATA_SECTION(64, PAGE_SIZE, THREAD_SIZE) _edata = .; - _edata_loc = __data_loc + SIZEOF(.data); BSS_SECTION(0, 0, 0) _end = .; From a0d25707b775aa7f3875bf0aed7e092a68d3500a Mon Sep 17 00:00:00 2001 From: Jingoo Han Date: Mon, 20 Jan 2014 05:00:21 +0000 Subject: [PATCH 0994/1368] arm64: mm: use ubfm for dcache_line_size Use 'ubfm' for the bitfield move instruction; thus, single instruction can be used instead of two instructions, when getting the minimum D-cache line size from CTR_EL0 register. Signed-off-by: Jingoo Han Acked-by: Will Deacon Signed-off-by: Catalin Marinas (cherry picked from commit bd5f6dc304a054ccdc8dab43bef5e41d9a575b61) Signed-off-by: Mark Brown --- arch/arm64/mm/proc-macros.S | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/arm64/mm/proc-macros.S b/arch/arm64/mm/proc-macros.S index 8957b822010b..005d29e2977d 100644 --- a/arch/arm64/mm/proc-macros.S +++ b/arch/arm64/mm/proc-macros.S @@ -38,8 +38,7 @@ */ .macro dcache_line_size, reg, tmp mrs \tmp, ctr_el0 // read CTR - lsr \tmp, \tmp, #16 - and \tmp, \tmp, #0xf // cache line size encoding + ubfm \tmp, \tmp, #16, #19 // cache line size encoding mov \reg, #4 // bytes per word lsl \reg, \reg, \tmp // actual cache line size .endm From 2090a8e22be4075b6b827010d5325666bb93ce18 Mon Sep 17 00:00:00 2001 From: Jingoo Han Date: Tue, 21 Jan 2014 01:17:47 +0000 Subject: [PATCH 0995/1368] arm64: mm: fix the function name in comment of __flush_dcache_area Fix the function name of comment of __flush_dcache_area, because __flush_dcache_area is the correct name. Also, the missing variable 'size' is added to the comment. Signed-off-by: Jingoo Han Signed-off-by: Catalin Marinas (cherry picked from commit 03324e6e6e66ebd171d9b4b90fd6a2655980dc13) Signed-off-by: Mark Brown --- arch/arm64/mm/cache.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/mm/cache.S b/arch/arm64/mm/cache.S index 48a386094fa3..1ea9f26d1b70 100644 --- a/arch/arm64/mm/cache.S +++ b/arch/arm64/mm/cache.S @@ -146,7 +146,7 @@ ENDPROC(flush_icache_range) ENDPROC(__flush_cache_user_range) /* - * __flush_kern_dcache_page(kaddr) + * __flush_dcache_area(kaddr, size) * * Ensure that the data held in the page kaddr is written back to the * page in question. From 0f4a77d8b060c5a95fb823c4d205be845126c544 Mon Sep 17 00:00:00 2001 From: Jingoo Han Date: Mon, 27 Jan 2014 07:19:32 +0000 Subject: [PATCH 0996/1368] arm64: mm: fix the function name in comment of cpu_do_switch_mm Fix the function name of comment of cpu_do_switch_mm, because cpu_do_switch_mm is the correct name. Signed-off-by: Jingoo Han Signed-off-by: Catalin Marinas (cherry picked from commit 812944e91dbbfeadaeeb4443a5560a7f45648f0b) Signed-off-by: Mark Brown --- arch/arm64/mm/proc.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S index 2fbd93e9be15..9ef7633a8eb9 100644 --- a/arch/arm64/mm/proc.S +++ b/arch/arm64/mm/proc.S @@ -81,7 +81,7 @@ ENTRY(cpu_do_idle) ENDPROC(cpu_do_idle) /* - * cpu_switch_mm(pgd_phys, tsk) + * cpu_do_switch_mm(pgd_phys, tsk) * * Set the translation table base pointer to be pgd_phys. * From 94464a71aadc3a3801fe0ca6b9a7f3ca566e3a83 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Wed, 5 Feb 2014 10:24:12 +0000 Subject: [PATCH 0997/1368] arm64: fix typo: s/SERRROR/SERROR/ Somehow SERROR has acquired an additional 'R' in a couple of headers. This patch removes them before they spread further. As neither instance is in use yet, no other sites need to be fixed up. Signed-off-by: Mark Rutland Acked-by: Marc Zyngier Signed-off-by: Catalin Marinas (cherry picked from commit bfb67a5606376bb32cb6f93dc05cda2e8c2038a5) Signed-off-by: Mark Brown Conflicts: arch/arm64/include/asm/kvm_arm.h --- arch/arm64/include/asm/esr.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/include/asm/esr.h b/arch/arm64/include/asm/esr.h index 78834123a32e..c4a7f940b387 100644 --- a/arch/arm64/include/asm/esr.h +++ b/arch/arm64/include/asm/esr.h @@ -42,7 +42,7 @@ #define ESR_EL1_EC_SP_ALIGN (0x26) #define ESR_EL1_EC_FP_EXC32 (0x28) #define ESR_EL1_EC_FP_EXC64 (0x2C) -#define ESR_EL1_EC_SERRROR (0x2F) +#define ESR_EL1_EC_SERROR (0x2F) #define ESR_EL1_EC_BREAKPT_EL0 (0x30) #define ESR_EL1_EC_BREAKPT_EL1 (0x31) #define ESR_EL1_EC_SOFTSTP_EL0 (0x32) From 007f5253a50c554755465c7e586947b8baf6f18a Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Wed, 5 Feb 2014 10:24:13 +0000 Subject: [PATCH 0998/1368] arm64: simplify pgd_alloc Currently pgd_alloc has a redundant NULL check in its return path that can be removed with no ill effects. With that removed it's also possible to return early and eliminate the new_pgd temporary variable. This patch applies said modifications, making the logic of pgd_alloc correspond 1-1 with that of pgd_free. Signed-off-by: Mark Rutland Cc: Will Deacon Signed-off-by: Catalin Marinas (cherry picked from commit 883d50a0ed403446437444a495356ce31e1197a3) Signed-off-by: Mark Brown --- arch/arm64/mm/pgd.c | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) diff --git a/arch/arm64/mm/pgd.c b/arch/arm64/mm/pgd.c index 7083cdada657..62c6101df260 100644 --- a/arch/arm64/mm/pgd.c +++ b/arch/arm64/mm/pgd.c @@ -32,17 +32,10 @@ pgd_t *pgd_alloc(struct mm_struct *mm) { - pgd_t *new_pgd; - if (PGD_SIZE == PAGE_SIZE) - new_pgd = (pgd_t *)get_zeroed_page(GFP_KERNEL); + return (pgd_t *)get_zeroed_page(GFP_KERNEL); else - new_pgd = kzalloc(PGD_SIZE, GFP_KERNEL); - - if (!new_pgd) - return NULL; - - return new_pgd; + return kzalloc(PGD_SIZE, GFP_KERNEL); } void pgd_free(struct mm_struct *mm, pgd_t *pgd) From a268cb19ce04497bd27da6e8e8fd02ebc5d4c347 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Thu, 6 Feb 2014 11:30:48 +0000 Subject: [PATCH 0999/1368] arm64: barriers: allow dsb macro to take option parameter The dsb instruction takes an option specifying both the target access types and shareability domain. This patch allows such an option to be passed to the dsb macro, resulting in potentially more efficient code. Currently the option is ignored until all callers are updated (unlike ARM, the option is mandated by the assembler). Signed-off-by: Will Deacon Signed-off-by: Catalin Marinas (cherry picked from commit 4a7ac12eedd190cdf071e61145defa73df1675c0) Signed-off-by: Mark Brown --- arch/arm64/include/asm/barrier.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/include/asm/barrier.h b/arch/arm64/include/asm/barrier.h index d4a63338a53c..60ac8661bee3 100644 --- a/arch/arm64/include/asm/barrier.h +++ b/arch/arm64/include/asm/barrier.h @@ -25,7 +25,7 @@ #define wfi() asm volatile("wfi" : : : "memory") #define isb() asm volatile("isb" : : : "memory") -#define dsb() asm volatile("dsb sy" : : : "memory") +#define dsb(opt) asm volatile("dsb sy" : : : "memory") #define mb() dsb() #define rmb() asm volatile("dsb ld" : : : "memory") From 8d0390d596f62f74c324aa627f4df56c2481f757 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 4 Feb 2014 12:29:13 +0000 Subject: [PATCH 1000/1368] arm64: asm: remove redundant "cc" clobbers cbnz/tbnz don't update the condition flags, so remove the "cc" clobbers from inline asm blocks that only use these instructions to implement conditional branches. Signed-off-by: Will Deacon Signed-off-by: Catalin Marinas (cherry picked from commit 95c4189689f92fba7ecf9097173404d4928c6e9b) Signed-off-by: Mark Brown --- arch/arm64/include/asm/atomic.h | 24 ++++++++++-------------- arch/arm64/include/asm/cmpxchg.h | 8 ++++---- arch/arm64/include/asm/futex.h | 4 ++-- arch/arm64/include/asm/spinlock.h | 10 +++++----- 4 files changed, 21 insertions(+), 25 deletions(-) diff --git a/arch/arm64/include/asm/atomic.h b/arch/arm64/include/asm/atomic.h index 836364468571..52c2b6a44c15 100644 --- a/arch/arm64/include/asm/atomic.h +++ b/arch/arm64/include/asm/atomic.h @@ -54,8 +54,7 @@ static inline void atomic_add(int i, atomic_t *v) " stxr %w1, %w0, %2\n" " cbnz %w1, 1b" : "=&r" (result), "=&r" (tmp), "+Q" (v->counter) - : "Ir" (i) - : "cc"); + : "Ir" (i)); } static inline int atomic_add_return(int i, atomic_t *v) @@ -70,7 +69,7 @@ static inline int atomic_add_return(int i, atomic_t *v) " cbnz %w1, 1b" : "=&r" (result), "=&r" (tmp), "+Q" (v->counter) : "Ir" (i) - : "cc", "memory"); + : "memory"); return result; } @@ -86,8 +85,7 @@ static inline void atomic_sub(int i, atomic_t *v) " stxr %w1, %w0, %2\n" " cbnz %w1, 1b" : "=&r" (result), "=&r" (tmp), "+Q" (v->counter) - : "Ir" (i) - : "cc"); + : "Ir" (i)); } static inline int atomic_sub_return(int i, atomic_t *v) @@ -102,7 +100,7 @@ static inline int atomic_sub_return(int i, atomic_t *v) " cbnz %w1, 1b" : "=&r" (result), "=&r" (tmp), "+Q" (v->counter) : "Ir" (i) - : "cc", "memory"); + : "memory"); return result; } @@ -121,7 +119,7 @@ static inline int atomic_cmpxchg(atomic_t *ptr, int old, int new) "2:" : "=&r" (tmp), "=&r" (oldval), "+Q" (ptr->counter) : "Ir" (old), "r" (new) - : "cc", "memory"); + : "cc"); return oldval; } @@ -187,8 +185,7 @@ static inline void atomic64_add(u64 i, atomic64_t *v) " stxr %w1, %0, %2\n" " cbnz %w1, 1b" : "=&r" (result), "=&r" (tmp), "+Q" (v->counter) - : "Ir" (i) - : "cc"); + : "Ir" (i)); } static inline long atomic64_add_return(long i, atomic64_t *v) @@ -203,7 +200,7 @@ static inline long atomic64_add_return(long i, atomic64_t *v) " cbnz %w1, 1b" : "=&r" (result), "=&r" (tmp), "+Q" (v->counter) : "Ir" (i) - : "cc", "memory"); + : "memory"); return result; } @@ -219,8 +216,7 @@ static inline void atomic64_sub(u64 i, atomic64_t *v) " stxr %w1, %0, %2\n" " cbnz %w1, 1b" : "=&r" (result), "=&r" (tmp), "+Q" (v->counter) - : "Ir" (i) - : "cc"); + : "Ir" (i)); } static inline long atomic64_sub_return(long i, atomic64_t *v) @@ -235,7 +231,7 @@ static inline long atomic64_sub_return(long i, atomic64_t *v) " cbnz %w1, 1b" : "=&r" (result), "=&r" (tmp), "+Q" (v->counter) : "Ir" (i) - : "cc", "memory"); + : "memory"); return result; } @@ -254,7 +250,7 @@ static inline long atomic64_cmpxchg(atomic64_t *ptr, long old, long new) "2:" : "=&r" (res), "=&r" (oldval), "+Q" (ptr->counter) : "Ir" (old), "r" (new) - : "cc", "memory"); + : "cc"); return oldval; } diff --git a/arch/arm64/include/asm/cmpxchg.h b/arch/arm64/include/asm/cmpxchg.h index 3914c0dcd09c..9f85665be748 100644 --- a/arch/arm64/include/asm/cmpxchg.h +++ b/arch/arm64/include/asm/cmpxchg.h @@ -34,7 +34,7 @@ static inline unsigned long __xchg(unsigned long x, volatile void *ptr, int size " cbnz %w1, 1b\n" : "=&r" (ret), "=&r" (tmp), "+Q" (*(u8 *)ptr) : "r" (x) - : "cc", "memory"); + : "memory"); break; case 2: asm volatile("// __xchg2\n" @@ -43,7 +43,7 @@ static inline unsigned long __xchg(unsigned long x, volatile void *ptr, int size " cbnz %w1, 1b\n" : "=&r" (ret), "=&r" (tmp), "+Q" (*(u16 *)ptr) : "r" (x) - : "cc", "memory"); + : "memory"); break; case 4: asm volatile("// __xchg4\n" @@ -52,7 +52,7 @@ static inline unsigned long __xchg(unsigned long x, volatile void *ptr, int size " cbnz %w1, 1b\n" : "=&r" (ret), "=&r" (tmp), "+Q" (*(u32 *)ptr) : "r" (x) - : "cc", "memory"); + : "memory"); break; case 8: asm volatile("// __xchg8\n" @@ -61,7 +61,7 @@ static inline unsigned long __xchg(unsigned long x, volatile void *ptr, int size " cbnz %w1, 1b\n" : "=&r" (ret), "=&r" (tmp), "+Q" (*(u64 *)ptr) : "r" (x) - : "cc", "memory"); + : "memory"); break; default: BUILD_BUG(); diff --git a/arch/arm64/include/asm/futex.h b/arch/arm64/include/asm/futex.h index 78cc3aba5d69..6230baba7869 100644 --- a/arch/arm64/include/asm/futex.h +++ b/arch/arm64/include/asm/futex.h @@ -40,7 +40,7 @@ " .popsection\n" \ : "=&r" (ret), "=&r" (oldval), "+Q" (*uaddr), "=&r" (tmp) \ : "r" (oparg), "Ir" (-EFAULT) \ - : "cc", "memory") + : "memory") static inline int futex_atomic_op_inuser (int encoded_op, u32 __user *uaddr) @@ -127,7 +127,7 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, " .popsection\n" : "+r" (ret), "=&r" (val), "+Q" (*uaddr), "=&r" (tmp) : "r" (oldval), "r" (newval), "Ir" (-EFAULT) - : "cc", "memory"); + : "memory"); *uval = val; return ret; diff --git a/arch/arm64/include/asm/spinlock.h b/arch/arm64/include/asm/spinlock.h index 3d5cf064d7a1..c45b7b1b7197 100644 --- a/arch/arm64/include/asm/spinlock.h +++ b/arch/arm64/include/asm/spinlock.h @@ -132,7 +132,7 @@ static inline void arch_write_lock(arch_rwlock_t *rw) " cbnz %w0, 2b\n" : "=&r" (tmp), "+Q" (rw->lock) : "r" (0x80000000) - : "cc", "memory"); + : "memory"); } static inline int arch_write_trylock(arch_rwlock_t *rw) @@ -146,7 +146,7 @@ static inline int arch_write_trylock(arch_rwlock_t *rw) "1:\n" : "=&r" (tmp), "+Q" (rw->lock) : "r" (0x80000000) - : "cc", "memory"); + : "memory"); return !tmp; } @@ -187,7 +187,7 @@ static inline void arch_read_lock(arch_rwlock_t *rw) " cbnz %w1, 2b\n" : "=&r" (tmp), "=&r" (tmp2), "+Q" (rw->lock) : - : "cc", "memory"); + : "memory"); } static inline void arch_read_unlock(arch_rwlock_t *rw) @@ -201,7 +201,7 @@ static inline void arch_read_unlock(arch_rwlock_t *rw) " cbnz %w1, 1b\n" : "=&r" (tmp), "=&r" (tmp2), "+Q" (rw->lock) : - : "cc", "memory"); + : "memory"); } static inline int arch_read_trylock(arch_rwlock_t *rw) @@ -216,7 +216,7 @@ static inline int arch_read_trylock(arch_rwlock_t *rw) "1:\n" : "=&r" (tmp), "+r" (tmp2), "+Q" (rw->lock) : - : "cc", "memory"); + : "memory"); return !tmp2; } From e2a137cc656127ef5c7694a0e01d9dc28b8f072f Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Fri, 7 Feb 2014 17:12:45 +0000 Subject: [PATCH 1001/1368] arm64: defconfig: Expand default enabled features FPGA implementations of the Cortex-A57 and Cortex-A53 are now available in the form of the SMM-A57 and SMM-A53 Soft Macrocell Models (SMMs) for Versatile Express. As these attach to a Motherboard Express V2M-P1 it would be useful to have support for some V2M-P1 peripherals enabled by default. Additionally a couple of of features have been introduced since the last defconfig update (CMA, jump labels) that would be good to have enabled by default to ensure they are build and boot tested. This patch updates the arm64 defconfig to enable support for these devices and features. The arm64 Kconfig is modified to select HAVE_PATA_PLATFORM, which is required to enable support for the CompactFlash controller on the V2M-P1. A few options which don't need to appear in defconfig are trimmed: * BLK_DEV - selected by default * EXPERIMENTAL - otherwise gone from the kernel * MII - selected by drivers which require it * USB_SUPPORT - selected by default Signed-off-by: Mark Rutland Signed-off-by: Catalin Marinas (cherry picked from commit 55834a773fe343912b705bef8114ec93fd337188) Signed-off-by: Mark Brown Conflicts: arch/arm64/configs/defconfig --- arch/arm64/Kconfig | 1 + arch/arm64/configs/defconfig | 18 +++++++++++++++--- 2 files changed, 16 insertions(+), 3 deletions(-) diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 61ef3f0de6ee..62a2b7243b79 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -31,6 +31,7 @@ config ARM64 select HAVE_GENERIC_HARDIRQS select HAVE_HW_BREAKPOINT if PERF_EVENTS select HAVE_MEMBLOCK + select HAVE_PATA_PLATFORM select HAVE_PERF_EVENTS select IRQ_DOMAIN select MODULES_USE_ELF_RELA diff --git a/arch/arm64/configs/defconfig b/arch/arm64/configs/defconfig index 5b3e83217b03..6d48a72419b4 100644 --- a/arch/arm64/configs/defconfig +++ b/arch/arm64/configs/defconfig @@ -1,4 +1,3 @@ -CONFIG_EXPERIMENTAL=y # CONFIG_LOCALVERSION_AUTO is not set # CONFIG_SWAP is not set CONFIG_SYSVIPC=y @@ -19,6 +18,7 @@ CONFIG_BLK_DEV_INITRD=y CONFIG_KALLSYMS_ALL=y # CONFIG_COMPAT_BRK is not set CONFIG_PROFILING=y +CONFIG_JUMP_LABEL=y CONFIG_MODULES=y CONFIG_MODULE_UNLOAD=y # CONFIG_BLK_DEV_BSG is not set @@ -26,7 +26,9 @@ CONFIG_MODULE_UNLOAD=y CONFIG_ARCH_VEXPRESS=y CONFIG_ARCH_XGENE=y CONFIG_SMP=y +CONFIG_PREEMPT=y CONFIG_PREEMPT_VOLUNTARY=y +CONFIG_CMA=y CONFIG_CMDLINE="console=ttyAMA0" # CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set CONFIG_COMPAT=y @@ -43,13 +45,17 @@ CONFIG_IP_PNP_BOOTP=y CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" CONFIG_DEVTMPFS=y # CONFIG_BLK_DEV is not set +CONFIG_DMA_CMA=y CONFIG_SCSI=y # CONFIG_SCSI_PROC_FS is not set CONFIG_BLK_DEV_SD=y # CONFIG_SCSI_LOWLEVEL is not set +CONFIG_ATA=y +CONFIG_PATA_PLATFORM=y +CONFIG_PATA_OF_PLATFORM=y CONFIG_NETDEVICES=y -CONFIG_MII=y CONFIG_SMC91X=y +CONFIG_SMSC911X=y # CONFIG_WLAN is not set CONFIG_INPUT_EVDEV=y # CONFIG_SERIO_I8042 is not set @@ -62,13 +68,19 @@ CONFIG_SERIAL_AMBA_PL011=y CONFIG_SERIAL_AMBA_PL011_CONSOLE=y # CONFIG_HW_RANDOM is not set # CONFIG_HWMON is not set +CONFIG_REGULATOR=y +CONFIG_REGULATOR_FIXED_VOLTAGE=y CONFIG_FB=y # CONFIG_VGA_CONSOLE is not set CONFIG_FRAMEBUFFER_CONSOLE=y CONFIG_LOGO=y # CONFIG_LOGO_LINUX_MONO is not set # CONFIG_LOGO_LINUX_VGA16 is not set -# CONFIG_USB_SUPPORT is not set +CONFIG_USB=y +CONFIG_USB_ISP1760_HCD=y +CONFIG_USB_STORAGE=y +CONFIG_MMC=y +CONFIG_MMC_ARMMMCI=y # CONFIG_IOMMU_SUPPORT is not set CONFIG_EXT2_FS=y CONFIG_EXT3_FS=y From 92d246de32b84126e1a2974458608eeeec41e214 Mon Sep 17 00:00:00 2001 From: Vijaya Kumar K Date: Tue, 28 Jan 2014 11:20:17 +0000 Subject: [PATCH 1002/1368] arm64: Add macros to manage processor debug state Add macros to enable and disable to manage PSTATE.D for debugging. The macros local_dbg_save and local_dbg_restore are moved to irqflags.h file KGDB boot tests fail because of PSTATE.D is masked. unmask it for debugging support Signed-off-by: Vijaya Kumar K Acked-by: Will Deacon Signed-off-by: Catalin Marinas (cherry picked from commit c7db4ff5d2b459a579d348532a92fd5885520ce6) Signed-off-by: Mark Brown --- arch/arm64/include/asm/debug-monitors.h | 17 ----------------- arch/arm64/include/asm/irqflags.h | 23 +++++++++++++++++++++++ arch/arm64/kernel/debug-monitors.c | 1 + 3 files changed, 24 insertions(+), 17 deletions(-) diff --git a/arch/arm64/include/asm/debug-monitors.h b/arch/arm64/include/asm/debug-monitors.h index 1bc1c4f15c14..9beb51e5a1a6 100644 --- a/arch/arm64/include/asm/debug-monitors.h +++ b/arch/arm64/include/asm/debug-monitors.h @@ -43,23 +43,6 @@ enum debug_el { #ifndef __ASSEMBLY__ struct task_struct; -#define local_dbg_save(flags) \ - do { \ - typecheck(unsigned long, flags); \ - asm volatile( \ - "mrs %0, daif // local_dbg_save\n" \ - "msr daifset, #8" \ - : "=r" (flags) : : "memory"); \ - } while (0) - -#define local_dbg_restore(flags) \ - do { \ - typecheck(unsigned long, flags); \ - asm volatile( \ - "msr daif, %0 // local_dbg_restore\n" \ - : : "r" (flags) : "memory"); \ - } while (0) - #define DBG_ARCH_ID_RESERVED 0 /* In case of ptrace ABI updates. */ #define DBG_HOOK_HANDLED 0 diff --git a/arch/arm64/include/asm/irqflags.h b/arch/arm64/include/asm/irqflags.h index aa11943b8502..0ed52c691868 100644 --- a/arch/arm64/include/asm/irqflags.h +++ b/arch/arm64/include/asm/irqflags.h @@ -87,5 +87,28 @@ static inline int arch_irqs_disabled_flags(unsigned long flags) return flags & PSR_I_BIT; } +/* + * save and restore debug state + */ +#define local_dbg_save(flags) \ + do { \ + typecheck(unsigned long, flags); \ + asm volatile( \ + "mrs %0, daif // local_dbg_save\n" \ + "msr daifset, #8" \ + : "=r" (flags) : : "memory"); \ + } while (0) + +#define local_dbg_restore(flags) \ + do { \ + typecheck(unsigned long, flags); \ + asm volatile( \ + "msr daif, %0 // local_dbg_restore\n" \ + : : "r" (flags) : "memory"); \ + } while (0) + +#define local_dbg_enable() asm("msr daifclr, #8" : : : "memory") +#define local_dbg_disable() asm("msr daifset, #8" : : : "memory") + #endif #endif diff --git a/arch/arm64/kernel/debug-monitors.c b/arch/arm64/kernel/debug-monitors.c index 2867749b56be..f092fdbf5479 100644 --- a/arch/arm64/kernel/debug-monitors.c +++ b/arch/arm64/kernel/debug-monitors.c @@ -139,6 +139,7 @@ static void clear_os_lock(void *unused) { asm volatile("msr oslar_el1, %0" : : "r" (0)); isb(); + local_dbg_enable(); } static int __cpuinit os_lock_notify(struct notifier_block *self, From 412cfd6f06511aaf8c0ee956a39fed14d089f7e0 Mon Sep 17 00:00:00 2001 From: Vijaya Kumar K Date: Tue, 28 Jan 2014 16:50:18 +0530 Subject: [PATCH 1003/1368] arm64: KGDB: Add Basic KGDB support Add KGDB debug support for kernel debugging. With this patch, basic KGDB debugging is possible.GDB register layout is updated and GDB tool can establish connection with target and can set/clear breakpoints. Signed-off-by: Vijaya Kumar K Reviewed-by: Will Deacon Signed-off-by: Catalin Marinas (cherry picked from commit bcf5763b0d58d20e288ac52f96cbd7788e262cac) Signed-off-by: Mark Brown Conflicts: arch/arm64/kernel/Makefile --- arch/arm64/include/asm/debug-monitors.h | 47 ++++ arch/arm64/include/asm/kgdb.h | 84 +++++++ arch/arm64/kernel/Makefile | 1 + arch/arm64/kernel/kgdb.c | 288 ++++++++++++++++++++++++ 4 files changed, 420 insertions(+) create mode 100644 arch/arm64/include/asm/kgdb.h create mode 100644 arch/arm64/kernel/kgdb.c diff --git a/arch/arm64/include/asm/debug-monitors.h b/arch/arm64/include/asm/debug-monitors.h index 9beb51e5a1a6..7c951a510b54 100644 --- a/arch/arm64/include/asm/debug-monitors.h +++ b/arch/arm64/include/asm/debug-monitors.h @@ -26,6 +26,53 @@ #define DBG_ESR_EVT_HWWP 0x2 #define DBG_ESR_EVT_BRK 0x6 +/* + * Break point instruction encoding + */ +#define BREAK_INSTR_SIZE 4 + +/* + * ESR values expected for dynamic and compile time BRK instruction + */ +#define DBG_ESR_VAL_BRK(x) (0xf2000000 | ((x) & 0xfffff)) + +/* + * #imm16 values used for BRK instruction generation + * Allowed values for kgbd are 0x400 - 0x7ff + * 0x400: for dynamic BRK instruction + * 0x401: for compile time BRK instruction + */ +#define KGDB_DYN_DGB_BRK_IMM 0x400 +#define KDBG_COMPILED_DBG_BRK_IMM 0x401 + +/* + * BRK instruction encoding + * The #imm16 value should be placed at bits[20:5] within BRK ins + */ +#define AARCH64_BREAK_MON 0xd4200000 + +/* + * Extract byte from BRK instruction + */ +#define KGDB_DYN_DGB_BRK_INS_BYTE(x) \ + ((((AARCH64_BREAK_MON) & 0xffe0001f) >> (x * 8)) & 0xff) + +/* + * Extract byte from BRK #imm16 + */ +#define KGBD_DYN_DGB_BRK_IMM_BYTE(x) \ + (((((KGDB_DYN_DGB_BRK_IMM) & 0xffff) << 5) >> (x * 8)) & 0xff) + +#define KGDB_DYN_DGB_BRK_BYTE(x) \ + (KGDB_DYN_DGB_BRK_INS_BYTE(x) | KGBD_DYN_DGB_BRK_IMM_BYTE(x)) + +#define KGDB_DYN_BRK_INS_BYTE0 KGDB_DYN_DGB_BRK_BYTE(0) +#define KGDB_DYN_BRK_INS_BYTE1 KGDB_DYN_DGB_BRK_BYTE(1) +#define KGDB_DYN_BRK_INS_BYTE2 KGDB_DYN_DGB_BRK_BYTE(2) +#define KGDB_DYN_BRK_INS_BYTE3 KGDB_DYN_DGB_BRK_BYTE(3) + +#define CACHE_FLUSH_IS_SAFE 1 + enum debug_el { DBG_ACTIVE_EL0 = 0, DBG_ACTIVE_EL1, diff --git a/arch/arm64/include/asm/kgdb.h b/arch/arm64/include/asm/kgdb.h new file mode 100644 index 000000000000..3c8aafc1082f --- /dev/null +++ b/arch/arm64/include/asm/kgdb.h @@ -0,0 +1,84 @@ +/* + * AArch64 KGDB support + * + * Based on arch/arm/include/kgdb.h + * + * Copyright (C) 2013 Cavium Inc. + * Author: Vijaya Kumar K + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#ifndef __ARM_KGDB_H +#define __ARM_KGDB_H + +#include +#include + +#ifndef __ASSEMBLY__ + +static inline void arch_kgdb_breakpoint(void) +{ + asm ("brk %0" : : "I" (KDBG_COMPILED_DBG_BRK_IMM)); +} + +extern void kgdb_handle_bus_error(void); +extern int kgdb_fault_expected; + +#endif /* !__ASSEMBLY__ */ + +/* + * gdb is expecting the following registers layout. + * + * General purpose regs: + * r0-r30: 64 bit + * sp,pc : 64 bit + * pstate : 64 bit + * Total: 34 + * FPU regs: + * f0-f31: 128 bit + * Total: 32 + * Extra regs + * fpsr & fpcr: 32 bit + * Total: 2 + * + */ + +#define _GP_REGS 34 +#define _FP_REGS 32 +#define _EXTRA_REGS 2 +/* + * general purpose registers size in bytes. + * pstate is only 4 bytes. subtract 4 bytes + */ +#define GP_REG_BYTES (_GP_REGS * 8) +#define DBG_MAX_REG_NUM (_GP_REGS + _FP_REGS + _EXTRA_REGS) + +/* + * Size of I/O buffer for gdb packet. + * considering to hold all register contents, size is set + */ + +#define BUFMAX 2048 + +/* + * Number of bytes required for gdb_regs buffer. + * _GP_REGS: 8 bytes, _FP_REGS: 16 bytes and _EXTRA_REGS: 4 bytes each + * GDB fails to connect for size beyond this with error + * "'g' packet reply is too long" + */ + +#define NUMREGBYTES ((_GP_REGS * 8) + (_FP_REGS * 16) + \ + (_EXTRA_REGS * 4)) + +#endif /* __ASM_KGDB_H */ diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile index 7b4b564961d4..69613bbe4345 100644 --- a/arch/arm64/kernel/Makefile +++ b/arch/arm64/kernel/Makefile @@ -18,6 +18,7 @@ arm64-obj-$(CONFIG_SMP) += smp.o smp_spin_table.o smp_psci.o arm64-obj-$(CONFIG_HW_PERF_EVENTS) += perf_event.o arm64-obj-$(CONFIG_HAVE_HW_BREAKPOINT)+= hw_breakpoint.o arm64-obj-$(CONFIG_EARLY_PRINTK) += early_printk.o +arm64-obj-$(CONFIG_KGDB) += kgdb.o obj-y += $(arm64-obj-y) vdso/ obj-m += $(arm64-obj-m) diff --git a/arch/arm64/kernel/kgdb.c b/arch/arm64/kernel/kgdb.c new file mode 100644 index 000000000000..4b7a5695175e --- /dev/null +++ b/arch/arm64/kernel/kgdb.c @@ -0,0 +1,288 @@ +/* + * AArch64 KGDB support + * + * Based on arch/arm/kernel/kgdb.c + * + * Copyright (C) 2013 Cavium Inc. + * Author: Vijaya Kumar K + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#include +#include +#include +#include + +struct dbg_reg_def_t dbg_reg_def[DBG_MAX_REG_NUM] = { + { "x0", 8, offsetof(struct pt_regs, regs[0])}, + { "x1", 8, offsetof(struct pt_regs, regs[1])}, + { "x2", 8, offsetof(struct pt_regs, regs[2])}, + { "x3", 8, offsetof(struct pt_regs, regs[3])}, + { "x4", 8, offsetof(struct pt_regs, regs[4])}, + { "x5", 8, offsetof(struct pt_regs, regs[5])}, + { "x6", 8, offsetof(struct pt_regs, regs[6])}, + { "x7", 8, offsetof(struct pt_regs, regs[7])}, + { "x8", 8, offsetof(struct pt_regs, regs[8])}, + { "x9", 8, offsetof(struct pt_regs, regs[9])}, + { "x10", 8, offsetof(struct pt_regs, regs[10])}, + { "x11", 8, offsetof(struct pt_regs, regs[11])}, + { "x12", 8, offsetof(struct pt_regs, regs[12])}, + { "x13", 8, offsetof(struct pt_regs, regs[13])}, + { "x14", 8, offsetof(struct pt_regs, regs[14])}, + { "x15", 8, offsetof(struct pt_regs, regs[15])}, + { "x16", 8, offsetof(struct pt_regs, regs[16])}, + { "x17", 8, offsetof(struct pt_regs, regs[17])}, + { "x18", 8, offsetof(struct pt_regs, regs[18])}, + { "x19", 8, offsetof(struct pt_regs, regs[19])}, + { "x20", 8, offsetof(struct pt_regs, regs[20])}, + { "x21", 8, offsetof(struct pt_regs, regs[21])}, + { "x22", 8, offsetof(struct pt_regs, regs[22])}, + { "x23", 8, offsetof(struct pt_regs, regs[23])}, + { "x24", 8, offsetof(struct pt_regs, regs[24])}, + { "x25", 8, offsetof(struct pt_regs, regs[25])}, + { "x26", 8, offsetof(struct pt_regs, regs[26])}, + { "x27", 8, offsetof(struct pt_regs, regs[27])}, + { "x28", 8, offsetof(struct pt_regs, regs[28])}, + { "x29", 8, offsetof(struct pt_regs, regs[29])}, + { "x30", 8, offsetof(struct pt_regs, regs[30])}, + { "sp", 8, offsetof(struct pt_regs, sp)}, + { "pc", 8, offsetof(struct pt_regs, pc)}, + { "pstate", 8, offsetof(struct pt_regs, pstate)}, + { "v0", 16, -1 }, + { "v1", 16, -1 }, + { "v2", 16, -1 }, + { "v3", 16, -1 }, + { "v4", 16, -1 }, + { "v5", 16, -1 }, + { "v6", 16, -1 }, + { "v7", 16, -1 }, + { "v8", 16, -1 }, + { "v9", 16, -1 }, + { "v10", 16, -1 }, + { "v11", 16, -1 }, + { "v12", 16, -1 }, + { "v13", 16, -1 }, + { "v14", 16, -1 }, + { "v15", 16, -1 }, + { "v16", 16, -1 }, + { "v17", 16, -1 }, + { "v18", 16, -1 }, + { "v19", 16, -1 }, + { "v20", 16, -1 }, + { "v21", 16, -1 }, + { "v22", 16, -1 }, + { "v23", 16, -1 }, + { "v24", 16, -1 }, + { "v25", 16, -1 }, + { "v26", 16, -1 }, + { "v27", 16, -1 }, + { "v28", 16, -1 }, + { "v29", 16, -1 }, + { "v30", 16, -1 }, + { "v31", 16, -1 }, + { "fpsr", 4, -1 }, + { "fpcr", 4, -1 }, +}; + +char *dbg_get_reg(int regno, void *mem, struct pt_regs *regs) +{ + if (regno >= DBG_MAX_REG_NUM || regno < 0) + return NULL; + + if (dbg_reg_def[regno].offset != -1) + memcpy(mem, (void *)regs + dbg_reg_def[regno].offset, + dbg_reg_def[regno].size); + else + memset(mem, 0, dbg_reg_def[regno].size); + return dbg_reg_def[regno].name; +} + +int dbg_set_reg(int regno, void *mem, struct pt_regs *regs) +{ + if (regno >= DBG_MAX_REG_NUM || regno < 0) + return -EINVAL; + + if (dbg_reg_def[regno].offset != -1) + memcpy((void *)regs + dbg_reg_def[regno].offset, mem, + dbg_reg_def[regno].size); + return 0; +} + +void +sleeping_thread_to_gdb_regs(unsigned long *gdb_regs, struct task_struct *task) +{ + struct pt_regs *thread_regs; + + /* Initialize to zero */ + memset((char *)gdb_regs, 0, NUMREGBYTES); + thread_regs = task_pt_regs(task); + memcpy((void *)gdb_regs, (void *)thread_regs->regs, GP_REG_BYTES); +} + +void kgdb_arch_set_pc(struct pt_regs *regs, unsigned long pc) +{ + regs->pc = pc; +} + +static int compiled_break; + +int kgdb_arch_handle_exception(int exception_vector, int signo, + int err_code, char *remcom_in_buffer, + char *remcom_out_buffer, + struct pt_regs *linux_regs) +{ + unsigned long addr; + char *ptr; + int err; + + switch (remcom_in_buffer[0]) { + case 'D': + case 'k': + /* + * Packet D (Detach), k (kill). No special handling + * is required here. Handle same as c packet. + */ + case 'c': + /* + * Packet c (Continue) to continue executing. + * Set pc to required address. + * Try to read optional parameter and set pc. + * If this was a compiled breakpoint, we need to move + * to the next instruction else we will just breakpoint + * over and over again. + */ + ptr = &remcom_in_buffer[1]; + if (kgdb_hex2long(&ptr, &addr)) + kgdb_arch_set_pc(linux_regs, addr); + else if (compiled_break == 1) + kgdb_arch_set_pc(linux_regs, linux_regs->pc + 4); + + compiled_break = 0; + err = 0; + break; + default: + err = -1; + } + return err; +} + +static int kgdb_brk_fn(struct pt_regs *regs, unsigned int esr) +{ + kgdb_handle_exception(1, SIGTRAP, 0, regs); + return 0; +} + +static int kgdb_compiled_brk_fn(struct pt_regs *regs, unsigned int esr) +{ + compiled_break = 1; + kgdb_handle_exception(1, SIGTRAP, 0, regs); + + return 0; +} + +static struct break_hook kgdb_brkpt_hook = { + .esr_mask = 0xffffffff, + .esr_val = DBG_ESR_VAL_BRK(KGDB_DYN_DGB_BRK_IMM), + .fn = kgdb_brk_fn +}; + +static struct break_hook kgdb_compiled_brkpt_hook = { + .esr_mask = 0xffffffff, + .esr_val = DBG_ESR_VAL_BRK(KDBG_COMPILED_DBG_BRK_IMM), + .fn = kgdb_compiled_brk_fn +}; + +static void kgdb_call_nmi_hook(void *ignored) +{ + kgdb_nmicallback(raw_smp_processor_id(), get_irq_regs()); +} + +void kgdb_roundup_cpus(unsigned long flags) +{ + local_irq_enable(); + smp_call_function(kgdb_call_nmi_hook, NULL, 0); + local_irq_disable(); +} + +static int __kgdb_notify(struct die_args *args, unsigned long cmd) +{ + struct pt_regs *regs = args->regs; + + if (kgdb_handle_exception(1, args->signr, cmd, regs)) + return NOTIFY_DONE; + return NOTIFY_STOP; +} + +static int +kgdb_notify(struct notifier_block *self, unsigned long cmd, void *ptr) +{ + unsigned long flags; + int ret; + + local_irq_save(flags); + ret = __kgdb_notify(ptr, cmd); + local_irq_restore(flags); + + return ret; +} + +static struct notifier_block kgdb_notifier = { + .notifier_call = kgdb_notify, + /* + * Want to be lowest priority + */ + .priority = -INT_MAX, +}; + +/* + * kgdb_arch_init - Perform any architecture specific initalization. + * This function will handle the initalization of any architecture + * specific callbacks. + */ +int kgdb_arch_init(void) +{ + int ret = register_die_notifier(&kgdb_notifier); + + if (ret != 0) + return ret; + + register_break_hook(&kgdb_brkpt_hook); + register_break_hook(&kgdb_compiled_brkpt_hook); + return 0; +} + +/* + * kgdb_arch_exit - Perform any architecture specific uninitalization. + * This function will handle the uninitalization of any architecture + * specific callbacks, for dynamic registration and unregistration. + */ +void kgdb_arch_exit(void) +{ + unregister_break_hook(&kgdb_brkpt_hook); + unregister_break_hook(&kgdb_compiled_brkpt_hook); + unregister_die_notifier(&kgdb_notifier); +} + +/* + * ARM instructions are always in LE. + * Break instruction is encoded in LE format + */ +struct kgdb_arch arch_kgdb_ops = { + .gdb_bpt_instr = { + KGDB_DYN_BRK_INS_BYTE0, + KGDB_DYN_BRK_INS_BYTE1, + KGDB_DYN_BRK_INS_BYTE2, + KGDB_DYN_BRK_INS_BYTE3, + } +}; From 2d694c96ec2d0d2c4d9a125ecf7c6c96d0f19a69 Mon Sep 17 00:00:00 2001 From: Vijaya Kumar K Date: Tue, 28 Jan 2014 11:20:19 +0000 Subject: [PATCH 1004/1368] arm64: KGDB: Add step debugging support Add KGDB software step debugging support for EL1 debug in AArch64 mode. KGDB registers step debug handler with debug monitor. On receiving 'step' command from GDB tool, target enables software step debugging and step address is updated in ELR. Software Step debugging is disabled when 'continue' command is received Signed-off-by: Vijaya Kumar K Reviewed-by: Will Deacon Signed-off-by: Catalin Marinas (cherry picked from commit 44679a4f142b69ae0c68ed815a48bbd164827281) Signed-off-by: Mark Brown --- arch/arm64/kernel/kgdb.c | 64 +++++++++++++++++++++++++++++++++++----- 1 file changed, 56 insertions(+), 8 deletions(-) diff --git a/arch/arm64/kernel/kgdb.c b/arch/arm64/kernel/kgdb.c index 4b7a5695175e..75c9cf1aafee 100644 --- a/arch/arm64/kernel/kgdb.c +++ b/arch/arm64/kernel/kgdb.c @@ -137,13 +137,26 @@ void kgdb_arch_set_pc(struct pt_regs *regs, unsigned long pc) static int compiled_break; +static void kgdb_arch_update_addr(struct pt_regs *regs, + char *remcom_in_buffer) +{ + unsigned long addr; + char *ptr; + + ptr = &remcom_in_buffer[1]; + if (kgdb_hex2long(&ptr, &addr)) + kgdb_arch_set_pc(regs, addr); + else if (compiled_break == 1) + kgdb_arch_set_pc(regs, regs->pc + 4); + + compiled_break = 0; +} + int kgdb_arch_handle_exception(int exception_vector, int signo, int err_code, char *remcom_in_buffer, char *remcom_out_buffer, struct pt_regs *linux_regs) { - unsigned long addr; - char *ptr; int err; switch (remcom_in_buffer[0]) { @@ -162,13 +175,36 @@ int kgdb_arch_handle_exception(int exception_vector, int signo, * to the next instruction else we will just breakpoint * over and over again. */ - ptr = &remcom_in_buffer[1]; - if (kgdb_hex2long(&ptr, &addr)) - kgdb_arch_set_pc(linux_regs, addr); - else if (compiled_break == 1) - kgdb_arch_set_pc(linux_regs, linux_regs->pc + 4); + kgdb_arch_update_addr(linux_regs, remcom_in_buffer); + atomic_set(&kgdb_cpu_doing_single_step, -1); + kgdb_single_step = 0; - compiled_break = 0; + /* + * Received continue command, disable single step + */ + if (kernel_active_single_step()) + kernel_disable_single_step(); + + err = 0; + break; + case 's': + /* + * Update step address value with address passed + * with step packet. + * On debug exception return PC is copied to ELR + * So just update PC. + * If no step address is passed, resume from the address + * pointed by PC. Do not update PC + */ + kgdb_arch_update_addr(linux_regs, remcom_in_buffer); + atomic_set(&kgdb_cpu_doing_single_step, raw_smp_processor_id()); + kgdb_single_step = 1; + + /* + * Enable single step handling + */ + if (!kernel_active_single_step()) + kernel_enable_single_step(linux_regs); err = 0; break; default: @@ -191,6 +227,12 @@ static int kgdb_compiled_brk_fn(struct pt_regs *regs, unsigned int esr) return 0; } +static int kgdb_step_brk_fn(struct pt_regs *regs, unsigned int esr) +{ + kgdb_handle_exception(1, SIGTRAP, 0, regs); + return 0; +} + static struct break_hook kgdb_brkpt_hook = { .esr_mask = 0xffffffff, .esr_val = DBG_ESR_VAL_BRK(KGDB_DYN_DGB_BRK_IMM), @@ -203,6 +245,10 @@ static struct break_hook kgdb_compiled_brkpt_hook = { .fn = kgdb_compiled_brk_fn }; +static struct step_hook kgdb_step_hook = { + .fn = kgdb_step_brk_fn +}; + static void kgdb_call_nmi_hook(void *ignored) { kgdb_nmicallback(raw_smp_processor_id(), get_irq_regs()); @@ -259,6 +305,7 @@ int kgdb_arch_init(void) register_break_hook(&kgdb_brkpt_hook); register_break_hook(&kgdb_compiled_brkpt_hook); + register_step_hook(&kgdb_step_hook); return 0; } @@ -271,6 +318,7 @@ void kgdb_arch_exit(void) { unregister_break_hook(&kgdb_brkpt_hook); unregister_break_hook(&kgdb_compiled_brkpt_hook); + unregister_step_hook(&kgdb_step_hook); unregister_die_notifier(&kgdb_notifier); } From c607b2e09c18d62bd5f54cd779a37c752e7b4f0a Mon Sep 17 00:00:00 2001 From: Vijaya Kumar K Date: Tue, 28 Jan 2014 16:50:21 +0530 Subject: [PATCH 1005/1368] misc: debug: remove compilation warnings typecast instruction_pointer macro to unsigned long to resolve following compiler warnings like warning: format '%lx' expects argument of type 'long unsigned int', but argument 2 has type 'u64' [-Wformat] Signed-off-by: Vijaya Kumar K Signed-off-by: Catalin Marinas (cherry picked from commit 58dcc204f18af2821f683b235bb376f9db2557f5) Signed-off-by: Mark Brown --- arch/arm64/include/asm/ptrace.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/include/asm/ptrace.h b/arch/arm64/include/asm/ptrace.h index 0dacbbf9458b..2e9ddd71b24a 100644 --- a/arch/arm64/include/asm/ptrace.h +++ b/arch/arm64/include/asm/ptrace.h @@ -163,7 +163,7 @@ static inline int valid_user_regs(struct user_pt_regs *regs) return 0; } -#define instruction_pointer(regs) (regs)->pc +#define instruction_pointer(regs) ((unsigned long)(regs)->pc) #ifdef CONFIG_SMP extern unsigned long profile_pc(struct pt_regs *regs); From 9da2fb2070585c138ccd041f0c4f9e72886312e6 Mon Sep 17 00:00:00 2001 From: Vijaya Kumar K Date: Tue, 28 Jan 2014 11:20:22 +0000 Subject: [PATCH 1006/1368] arm64: KGDB: Add KGDB config Add HAVE_ARCH_KGDB for arm64 Kconfig Signed-off-by: Vijaya Kumar K Signed-off-by: Catalin Marinas (cherry picked from commit 9529247db9ecfc5a723e17093614e7437ab0d5bd) Signed-off-by: Mark Brown Conflicts: arch/arm64/Kconfig --- arch/arm64/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 56b3f6d447ae..20f25572218e 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -16,6 +16,7 @@ config ARM64 select GENERIC_SMP_IDLE_THREAD select GENERIC_TIME_VSYSCALL select HARDIRQS_SW_RESEND + select HAVE_ARCH_KGDB select HAVE_ARCH_TRACEHOOK select HAVE_DEBUG_BUGVERBOSE select HAVE_DEBUG_KMEMLEAK From eeae892aeac2f66e7d1a6baaee4cf8108c3456b7 Mon Sep 17 00:00:00 2001 From: Jiang Liu Date: Tue, 7 Jan 2014 22:17:12 +0800 Subject: [PATCH 1007/1368] arm64, jump label: detect %c support for ARM64 As commit a9468f30b5eac6 "ARM: 7333/2: jump label: detect %c support for ARM", this patch detects the same thing for ARM64 because some ARM64 GCC versions have the same issue. Some versions of ARM64 GCC which do support asm goto, do not support the %c specifier. Since we need the %c to support jump labels on ARM64, detect that too in the asm goto detection script to avoid build errors with these versions. Acked-by: Will Deacon Signed-off-by: Jiang Liu Signed-off-by: Catalin Marinas (cherry picked from commit f3c003f72dfb2497056bcbb864885837a1968ed5) Signed-off-by: Mark Brown --- scripts/gcc-goto.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/gcc-goto.sh b/scripts/gcc-goto.sh index a2af2e88daf3..c9469d34ecc6 100644 --- a/scripts/gcc-goto.sh +++ b/scripts/gcc-goto.sh @@ -5,7 +5,7 @@ cat << "END" | $@ -x c - -c -o /dev/null >/dev/null 2>&1 && echo "y" int main(void) { -#ifdef __arm__ +#if defined(__arm__) || defined(__aarch64__) /* * Not related to asm goto, but used by jump label * and broken on some ARM GCC versions (see GCC Bug 48637). From 8a411cf1c24488b9be28cbf8047259dfa785f8ce Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Tue, 4 Feb 2014 16:37:59 +0000 Subject: [PATCH 1008/1368] arm64: Extend the PCI I/O space to 16MB The patch moves the PCI I/O space (currently at 64K) before the earlyprintk mapping and extends it to 16MB. Signed-off-by: Catalin Marinas (cherry picked from commit 22bd1c91fe13d59cff734b69b6757adcfbd8dee9) Signed-off-by: Mark Brown Conflicts: Documentation/arm64/memory.txt --- Documentation/arm64/memory.txt | 16 ++++++++++------ arch/arm64/include/asm/io.h | 2 +- 2 files changed, 11 insertions(+), 7 deletions(-) diff --git a/Documentation/arm64/memory.txt b/Documentation/arm64/memory.txt index a776d7a7c3cc..7835a54b55b0 100644 --- a/Documentation/arm64/memory.txt +++ b/Documentation/arm64/memory.txt @@ -35,11 +35,13 @@ ffffffbc00000000 ffffffbdffffffff 8GB vmemmap ffffffbe00000000 ffffffbffbbfffff ~8GB [guard, future vmmemap] +ffffffbffa000000 ffffffbffaffffff 16MB PCI I/O space + +ffffffbffb000000 ffffffbffbbfffff 12MB [guard] + ffffffbffbc00000 ffffffbffbdfffff 2MB earlyprintk device -ffffffbffbe00000 ffffffbffbe0ffff 64KB PCI I/O space - -ffffffbbffff0000 ffffffbcffffffff ~2MB [guard] +ffffffbffbe00000 ffffffbffbffffff 2MB [guard] ffffffbffc000000 ffffffbfffffffff 64MB modules @@ -60,11 +62,13 @@ fffffdfc00000000 fffffdfdffffffff 8GB vmemmap fffffdfe00000000 fffffdfffbbfffff ~8GB [guard, future vmmemap] +fffffdfffa000000 fffffdfffaffffff 16MB PCI I/O space + +fffffdfffb000000 fffffdfffbbfffff 12MB [guard] + fffffdfffbc00000 fffffdfffbdfffff 2MB earlyprintk device -fffffdfffbe00000 fffffdfffbe0ffff 64KB PCI I/O space - -fffffdfffbe10000 fffffdfffbffffff ~2MB [guard] +fffffdfffbe00000 fffffdfffbffffff 2MB [guard] fffffdfffc000000 fffffdffffffffff 64MB modules diff --git a/arch/arm64/include/asm/io.h b/arch/arm64/include/asm/io.h index 2e12258aa7e4..ad80e05825df 100644 --- a/arch/arm64/include/asm/io.h +++ b/arch/arm64/include/asm/io.h @@ -118,7 +118,7 @@ static inline u64 __raw_readq(const volatile void __iomem *addr) * I/O port access primitives. */ #define IO_SPACE_LIMIT 0xffff -#define PCI_IOBASE ((void __iomem *)(MODULES_VADDR - SZ_2M)) +#define PCI_IOBASE ((void __iomem *)(MODULES_VADDR - SZ_32M)) static inline u8 inb(unsigned long addr) { From dabf5d44b1a7fca13cef1b4ee4ec2c02e394899e Mon Sep 17 00:00:00 2001 From: Vijaya Kumar K Date: Fri, 21 Feb 2014 05:13:49 +0000 Subject: [PATCH 1009/1368] arm64: enable processor debug state for secondary cpus processor debug state PSTATE.D is unmasked in smp call clear_os_lock for secondary cpus. So debug state is still masked in normal kernel context. With this patch, unmask debug state on secondary boot for the cpus in normal kernel context. Now kgdb tests passed with multicore. Signed-off-by: Vijaya Kumar K Acked-by: Will Deacon Signed-off-by: Catalin Marinas (cherry picked from commit d8ed442a009ecfe155b57d58f231db3d6084633d) Signed-off-by: Mark Brown Conflicts: arch/arm64/kernel/smp.c --- arch/arm64/kernel/debug-monitors.c | 7 +++---- arch/arm64/kernel/smp.c | 1 + 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/arm64/kernel/debug-monitors.c b/arch/arm64/kernel/debug-monitors.c index f092fdbf5479..90777ba7b661 100644 --- a/arch/arm64/kernel/debug-monitors.c +++ b/arch/arm64/kernel/debug-monitors.c @@ -138,8 +138,6 @@ void disable_debug_monitors(enum debug_el el) static void clear_os_lock(void *unused) { asm volatile("msr oslar_el1, %0" : : "r" (0)); - isb(); - local_dbg_enable(); } static int __cpuinit os_lock_notify(struct notifier_block *self, @@ -158,8 +156,9 @@ static struct notifier_block __cpuinitdata os_lock_nb = { static int __cpuinit debug_monitors_init(void) { /* Clear the OS lock. */ - smp_call_function(clear_os_lock, NULL, 1); - clear_os_lock(NULL); + on_each_cpu(clear_os_lock, NULL, 1); + isb(); + local_dbg_enable(); /* Register hotplug handler. */ register_cpu_notifier(&os_lock_nb); diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c index 08030da7f3c2..6555060f9e97 100644 --- a/arch/arm64/kernel/smp.c +++ b/arch/arm64/kernel/smp.c @@ -159,6 +159,7 @@ asmlinkage void __cpuinit secondary_start_kernel(void) */ notify_cpu_starting(cpu); + local_dbg_enable(); local_irq_enable(); local_fiq_enable(); From 25bf912c54b9085d3a856af9b0f7c0316b947f61 Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Mon, 17 Feb 2014 12:03:25 +0000 Subject: [PATCH 1010/1368] arm64: Extend the idmap to the whole kernel image This patch changes the idmap page table creation during boot to cover the whole kernel image, allowing functions like cpu_reset() to be safely called with the physical address. This patch also simplifies the create_block_map asm macro to no longer take an idmap argument and always use the phys/virt/end parameters. For the idmap case, phys == virt. Signed-off-by: Catalin Marinas (cherry picked from commit ea8c2e1124457f266f82effc3e6558552527943a) Signed-off-by: Mark Brown --- arch/arm64/kernel/head.S | 20 ++++++++------------ 1 file changed, 8 insertions(+), 12 deletions(-) diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S index a0c10594f628..6fc9fc2b7220 100644 --- a/arch/arm64/kernel/head.S +++ b/arch/arm64/kernel/head.S @@ -356,26 +356,18 @@ ENDPROC(__calc_phys_offset) * Preserves: tbl, flags * Corrupts: phys, start, end, pstate */ - .macro create_block_map, tbl, flags, phys, start, end, idmap=0 + .macro create_block_map, tbl, flags, phys, start, end lsr \phys, \phys, #BLOCK_SHIFT - .if \idmap - and \start, \phys, #PTRS_PER_PTE - 1 // table index - .else lsr \start, \start, #BLOCK_SHIFT and \start, \start, #PTRS_PER_PTE - 1 // table index - .endif orr \phys, \flags, \phys, lsl #BLOCK_SHIFT // table entry - .ifnc \start,\end lsr \end, \end, #BLOCK_SHIFT and \end, \end, #PTRS_PER_PTE - 1 // table end index - .endif 9999: str \phys, [\tbl, \start, lsl #3] // store the entry - .ifnc \start,\end add \start, \start, #1 // next entry add \phys, \phys, #BLOCK_SIZE // next block cmp \start, \end b.ls 9999b - .endif .endm /* @@ -407,9 +399,13 @@ __create_page_tables: * Create the identity mapping. */ add x0, x25, #PAGE_SIZE // section table address - adr x3, __turn_mmu_on // virtual/physical address + ldr x3, =KERNEL_START + add x3, x3, x28 // __pa(KERNEL_START) create_pgd_entry x25, x0, x3, x5, x6 - create_block_map x0, x7, x3, x5, x5, idmap=1 + ldr x6, =KERNEL_END + mov x5, x3 // __pa(KERNEL_START) + add x6, x6, x28 // __pa(KERNEL_END) + create_block_map x0, x7, x3, x5, x6 /* * Map the kernel image (starting with PHYS_OFFSET). @@ -417,7 +413,7 @@ __create_page_tables: add x0, x26, #PAGE_SIZE // section table address mov x5, #PAGE_OFFSET create_pgd_entry x26, x0, x5, x3, x6 - ldr x6, =KERNEL_END - 1 + ldr x6, =KERNEL_END mov x3, x24 // phys offset create_block_map x0, x7, x3, x5, x6 From 4a64e4b612733d71f7d86ff9e60fd095e46d2e96 Mon Sep 17 00:00:00 2001 From: Geoff Levand Date: Tue, 17 Dec 2013 00:19:29 +0000 Subject: [PATCH 1011/1368] arm64: Fix the soft_restart routine Change the soft_restart() routine to call cpu_reset() at its identity mapped physical address. The cpu_reset() routine must be called at its identity mapped physical address so that when the MMU is turned off the instruction pointer will be at the correct location in physical memory. Signed-off-by: Geoff Levand for Huawei, Linaro Signed-off-by: Catalin Marinas (cherry picked from commit 09024aa61e1bc994404683e2e5b363484a15dd12) Signed-off-by: Mark Brown --- arch/arm64/kernel/process.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c index 02a41bba165d..75af46648f26 100644 --- a/arch/arm64/kernel/process.c +++ b/arch/arm64/kernel/process.c @@ -71,8 +71,17 @@ static void setup_restart(void) void soft_restart(unsigned long addr) { + typedef void (*phys_reset_t)(unsigned long); + phys_reset_t phys_reset; + setup_restart(); - cpu_reset(addr); + + /* Switch to the identity mapping */ + phys_reset = (phys_reset_t)virt_to_phys(cpu_reset); + phys_reset(addr); + + /* Should never get here */ + BUG(); } /* From ce13b8ed049445435a9af9fdfcbcd84c04451301 Mon Sep 17 00:00:00 2001 From: Ritesh Harjani Date: Thu, 6 Feb 2014 17:21:51 +0530 Subject: [PATCH 1012/1368] arm64: Change misleading function names in dma-mapping arm64_swiotlb_alloc/free_coherent name can be misleading somtimes with CMA support being enabled after this patch (c2104debc235b745265b64d610237a6833fd53) Change this name to be more generic: __dma_alloc/free_coherent Signed-off-by: Ritesh Harjani [catalin.marinas@arm.com: renamed arm64_swiotlb_dma_ops to coherent_swiotlb_dma_ops] Signed-off-by: Catalin Marinas (cherry picked from commit bb10eb7b4d176f408d45fb492df28bed2981a1f3) Signed-off-by: Mark Brown --- arch/arm64/mm/dma-mapping.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c index 4bd7579ec9e6..65ce60fb3746 100644 --- a/arch/arm64/mm/dma-mapping.c +++ b/arch/arm64/mm/dma-mapping.c @@ -29,9 +29,9 @@ struct dma_map_ops *dma_ops; EXPORT_SYMBOL(dma_ops); -static void *arm64_swiotlb_alloc_coherent(struct device *dev, size_t size, - dma_addr_t *dma_handle, gfp_t flags, - struct dma_attrs *attrs) +static void *__dma_alloc_coherent(struct device *dev, size_t size, + dma_addr_t *dma_handle, gfp_t flags, + struct dma_attrs *attrs) { if (IS_ENABLED(CONFIG_ZONE_DMA32) && dev->coherent_dma_mask <= DMA_BIT_MASK(32)) @@ -39,16 +39,16 @@ static void *arm64_swiotlb_alloc_coherent(struct device *dev, size_t size, return swiotlb_alloc_coherent(dev, size, dma_handle, flags); } -static void arm64_swiotlb_free_coherent(struct device *dev, size_t size, - void *vaddr, dma_addr_t dma_handle, - struct dma_attrs *attrs) +static void __dma_free_coherent(struct device *dev, size_t size, + void *vaddr, dma_addr_t dma_handle, + struct dma_attrs *attrs) { swiotlb_free_coherent(dev, size, vaddr, dma_handle); } -static struct dma_map_ops arm64_swiotlb_dma_ops = { - .alloc = arm64_swiotlb_alloc_coherent, - .free = arm64_swiotlb_free_coherent, +static struct dma_map_ops coherent_swiotlb_dma_ops = { + .alloc = __dma_alloc_coherent, + .free = __dma_free_coherent, .map_page = swiotlb_map_page, .unmap_page = swiotlb_unmap_page, .map_sg = swiotlb_map_sg_attrs, @@ -63,7 +63,7 @@ static struct dma_map_ops arm64_swiotlb_dma_ops = { void __init arm64_swiotlb_init(void) { - dma_ops = &arm64_swiotlb_dma_ops; + dma_ops = &coherent_swiotlb_dma_ops; swiotlb_init(1); } From 17ef801d93fada1828f553e362b8029e19a6735d Mon Sep 17 00:00:00 2001 From: Nathan Lynch Date: Tue, 11 Feb 2014 22:28:42 +0000 Subject: [PATCH 1013/1368] arm64: vdso: clean up vdso_pagelist initialization Remove some unnecessary bits that were apparently carried over from another architecture's implementation: - No need to get_page() the vdso text/data - these are part of the kernel image. - No need for ClearPageReserved on the vdso text. - No need to vmap the first text page to check the ELF header - this can be done through &vdso_start. Also some minor cleanup: - Use kcalloc for vdso_pagelist array allocation. - Don't print on allocation failure, slab/slub will do that for us. Signed-off-by: Nathan Lynch Acked-by: Will Deacon Signed-off-by: Catalin Marinas (cherry picked from commit 16fb1a9bec6126162560f159df449e4781560807) Signed-off-by: Mark Brown --- arch/arm64/kernel/vdso.c | 42 ++++++++++++---------------------------- 1 file changed, 12 insertions(+), 30 deletions(-) diff --git a/arch/arm64/kernel/vdso.c b/arch/arm64/kernel/vdso.c index 0ea7a22bcdf2..10c27f5f6d23 100644 --- a/arch/arm64/kernel/vdso.c +++ b/arch/arm64/kernel/vdso.c @@ -103,49 +103,31 @@ int aarch32_setup_vectors_page(struct linux_binprm *bprm, int uses_interp) static int __init vdso_init(void) { - struct page *pg; - char *vbase; - int i, ret = 0; + int i; + + if (memcmp(&vdso_start, "\177ELF", 4)) { + pr_err("vDSO is not a valid ELF object!\n"); + return -EINVAL; + } vdso_pages = (&vdso_end - &vdso_start) >> PAGE_SHIFT; pr_info("vdso: %ld pages (%ld code, %ld data) at base %p\n", vdso_pages + 1, vdso_pages, 1L, &vdso_start); /* Allocate the vDSO pagelist, plus a page for the data. */ - vdso_pagelist = kzalloc(sizeof(struct page *) * (vdso_pages + 1), + vdso_pagelist = kcalloc(vdso_pages + 1, sizeof(struct page *), GFP_KERNEL); - if (vdso_pagelist == NULL) { - pr_err("Failed to allocate vDSO pagelist!\n"); + if (vdso_pagelist == NULL) return -ENOMEM; - } /* Grab the vDSO code pages. */ - for (i = 0; i < vdso_pages; i++) { - pg = virt_to_page(&vdso_start + i*PAGE_SIZE); - ClearPageReserved(pg); - get_page(pg); - vdso_pagelist[i] = pg; - } - - /* Sanity check the shared object header. */ - vbase = vmap(vdso_pagelist, 1, 0, PAGE_KERNEL); - if (vbase == NULL) { - pr_err("Failed to map vDSO pagelist!\n"); - return -ENOMEM; - } else if (memcmp(vbase, "\177ELF", 4)) { - pr_err("vDSO is not a valid ELF object!\n"); - ret = -EINVAL; - goto unmap; - } + for (i = 0; i < vdso_pages; i++) + vdso_pagelist[i] = virt_to_page(&vdso_start + i * PAGE_SIZE); /* Grab the vDSO data page. */ - pg = virt_to_page(vdso_data); - get_page(pg); - vdso_pagelist[i] = pg; + vdso_pagelist[i] = virt_to_page(vdso_data); -unmap: - vunmap(vbase); - return ret; + return 0; } arch_initcall(vdso_init); From 8c3d47b311844601dc6f6c0647a9763155b137ce Mon Sep 17 00:00:00 2001 From: Vladimir Murzin Date: Fri, 28 Feb 2014 09:57:47 +0000 Subject: [PATCH 1014/1368] arm64: remove redundant "psci:" prefixes Since 652af899799354049b273af897b798b8f03fdd88 "arm64: factor out spin-table boot method" psci prefix's been introduced. We have a common pr_fmt, so clean them up. Signed-off-by: Vladimir Murzin Acked-by: Mark Rutland Signed-off-by: Catalin Marinas (cherry picked from commit 288ac26cc2334e5e6ecad6416e9bf750691afd84) Signed-off-by: Mark Brown --- arch/arm64/kernel/psci.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/kernel/psci.c b/arch/arm64/kernel/psci.c index 4f97db3d7363..83ebee880d19 100644 --- a/arch/arm64/kernel/psci.c +++ b/arch/arm64/kernel/psci.c @@ -251,7 +251,7 @@ static int cpu_psci_cpu_boot(unsigned int cpu) { int err = psci_ops.cpu_on(cpu_logical_map(cpu), __pa(secondary_entry)); if (err) - pr_err("psci: failed to boot CPU%d (%d)\n", cpu, err); + pr_err("failed to boot CPU%d (%d)\n", cpu, err); return err; } @@ -278,7 +278,7 @@ static void cpu_psci_cpu_die(unsigned int cpu) ret = psci_ops.cpu_off(state); - pr_crit("psci: unable to power off CPU%u (%d)\n", cpu, ret); + pr_crit("unable to power off CPU%u (%d)\n", cpu, ret); } #endif From 0306b0fc8f9ea2acdc43f73e694836cac5b72c4a Mon Sep 17 00:00:00 2001 From: Vladimir Murzin Date: Fri, 28 Feb 2014 09:57:33 +0000 Subject: [PATCH 1015/1368] arm64: remove return value form psci_init() psci_init() is written to return err code if something goes wrong. However, the single user, setup_arch(), doesn't care about it. Moreover, every error path is supplied with a clear message which is enough for pleasant debugging. Signed-off-by: Vladimir Murzin Acked-by: Mark Rutland Signed-off-by: Catalin Marinas (cherry picked from commit 64b4f60f497058f1c6ba118a0260249ee5c091a6) Signed-off-by: Mark Brown --- arch/arm64/include/asm/psci.h | 2 +- arch/arm64/kernel/psci.c | 9 +++------ 2 files changed, 4 insertions(+), 7 deletions(-) diff --git a/arch/arm64/include/asm/psci.h b/arch/arm64/include/asm/psci.h index e5312ea0ec1a..d15ab8b46336 100644 --- a/arch/arm64/include/asm/psci.h +++ b/arch/arm64/include/asm/psci.h @@ -14,6 +14,6 @@ #ifndef __ASM_PSCI_H #define __ASM_PSCI_H -int psci_init(void); +void psci_init(void); #endif /* __ASM_PSCI_H */ diff --git a/arch/arm64/kernel/psci.c b/arch/arm64/kernel/psci.c index 83ebee880d19..ea4828a4aa96 100644 --- a/arch/arm64/kernel/psci.c +++ b/arch/arm64/kernel/psci.c @@ -176,22 +176,20 @@ static const struct of_device_id psci_of_match[] __initconst = { {}, }; -int __init psci_init(void) +void __init psci_init(void) { struct device_node *np; const char *method; u32 id; - int err = 0; np = of_find_matching_node(NULL, psci_of_match); if (!np) - return -ENODEV; + return; pr_info("probing function IDs from device-tree\n"); if (of_property_read_string(np, "method", &method)) { pr_warning("missing \"method\" property\n"); - err = -ENXIO; goto out_put_node; } @@ -201,7 +199,6 @@ int __init psci_init(void) invoke_psci_fn = __invoke_psci_fn_smc; } else { pr_warning("invalid \"method\" property: %s\n", method); - err = -EINVAL; goto out_put_node; } @@ -227,7 +224,7 @@ int __init psci_init(void) out_put_node: of_node_put(np); - return err; + return; } #ifdef CONFIG_SMP From 581d750b3b9ddb7ad81d7ae980ddd0fddb79692e Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Fri, 28 Feb 2014 16:12:25 +0000 Subject: [PATCH 1016/1368] arm64: Fix !CONFIG_SMP kernel build Commit fb4a96029c8a (arm64: kernel: fix per-cpu offset restore on resume) uses per_cpu_offset() unconditionally during CPU wakeup, however, this is only defined for the SMP case. Signed-off-by: Catalin Marinas Reported-by: Dave P Martin (cherry picked from commit b57fc9e80692043e2a3a74e1d2c047eb700dcd0c) Signed-off-by: Mark Brown --- arch/arm64/include/asm/percpu.h | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/arch/arm64/include/asm/percpu.h b/arch/arm64/include/asm/percpu.h index 13fb0b3efc5f..453a179469a3 100644 --- a/arch/arm64/include/asm/percpu.h +++ b/arch/arm64/include/asm/percpu.h @@ -16,6 +16,8 @@ #ifndef __ASM_PERCPU_H #define __ASM_PERCPU_H +#ifdef CONFIG_SMP + static inline void set_my_cpu_offset(unsigned long off) { asm volatile("msr tpidr_el1, %0" :: "r" (off) : "memory"); @@ -36,6 +38,12 @@ static inline unsigned long __my_cpu_offset(void) } #define __my_cpu_offset __my_cpu_offset() +#else /* !CONFIG_SMP */ + +#define set_my_cpu_offset(x) do { } while (0) + +#endif /* CONFIG_SMP */ + #include #endif /* __ASM_PERCPU_H */ From 0fc36c022860cf5344d1921d981ef450315e7d0f Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Wed, 14 Aug 2013 09:54:54 +0100 Subject: [PATCH 1017/1368] arm64: remove unnecessary cache flush at boot Currently we flush the entire dcache at boot within __cpu_setup, but this is unnecessary as the booting protocol demands that the dcache is invalid and off upon entering the kernel. The presence of the cache flush only serves to hide bugs in bootloaders, and is not safe in the presence of SMP. In an SMP boot scenario the CPUs enter coherency outside of the kernel, and the primary CPU enables its caches before bringing up secondary CPUs. Therefore if any secondary CPU has an entry in its cache (in violation of the boot protocol), the primary CPU might snoop it even if the secondary CPU's cache is disabled. The boot-time cache flush only serves to hide a firmware bug, and slows down a cpu boot unnecessarily. This patch removes the unnecessary boot-time cache flush. Signed-off-by: Mark Rutland Acked-by: Will Deacon [catalin.marinas@arm.com: make __flush_dcache_all local only] Signed-off-by: Catalin Marinas (cherry picked from commit bff705950e2cdcf35641dee35eb14bad9ed49e8f) Signed-off-by: Mark Brown --- arch/arm64/mm/cache.S | 2 +- arch/arm64/mm/proc.S | 6 ------ 2 files changed, 1 insertion(+), 7 deletions(-) diff --git a/arch/arm64/mm/cache.S b/arch/arm64/mm/cache.S index 1ea9f26d1b70..6a3c378f3292 100644 --- a/arch/arm64/mm/cache.S +++ b/arch/arm64/mm/cache.S @@ -30,7 +30,7 @@ * * Corrupted registers: x0-x7, x9-x11 */ -ENTRY(__flush_dcache_all) +__flush_dcache_all: dsb sy // ensure ordering with previous memory accesses mrs x0, clidr_el1 // read clidr and x3, x0, #0x7000000 // extract loc from clidr diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S index 9ef7633a8eb9..8e0158f198d7 100644 --- a/arch/arm64/mm/proc.S +++ b/arch/arm64/mm/proc.S @@ -104,12 +104,6 @@ ENDPROC(cpu_do_switch_mm) * value of the SCTLR_EL1 register. */ ENTRY(__cpu_setup) - /* - * Preserve the link register across the function call. - */ - mov x28, lr - bl __flush_dcache_all - mov lr, x28 ic iallu // I+BTB cache invalidate tlbi vmalle1is // invalidate I + D TLBs dsb sy From c83b7c920a1f7ee9d5cd30a25dbe314ea0fbc7ed Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Mon, 10 Mar 2014 10:36:52 +0000 Subject: [PATCH 1018/1368] arm64: barriers: add dmb barrier Commit 8adbf57fc429 ("irqchip: gic: use dmb ishst instead of dsb when raising a softirq") added an explicit dmb(...) call to the GIC driver. This patch adds a simple dmb() macro to arm64, which expands to a DMB SY instruction. Signed-off-by: Will Deacon Signed-off-by: Catalin Marinas (cherry picked from commit d152d22a18c240286c19997a6249ee76ea055926) Signed-off-by: Mark Brown --- arch/arm64/include/asm/barrier.h | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm64/include/asm/barrier.h b/arch/arm64/include/asm/barrier.h index 60ac8661bee3..2306cd195055 100644 --- a/arch/arm64/include/asm/barrier.h +++ b/arch/arm64/include/asm/barrier.h @@ -25,6 +25,7 @@ #define wfi() asm volatile("wfi" : : : "memory") #define isb() asm volatile("isb" : : : "memory") +#define dmb(opt) asm volatile("dmb sy" : : : "memory") #define dsb(opt) asm volatile("dsb sy" : : : "memory") #define mb() dsb() From aa6a76713515af0b64331e1538fbe084c29eaf51 Mon Sep 17 00:00:00 2001 From: Jingoo Han Date: Wed, 5 Mar 2014 05:34:32 +0000 Subject: [PATCH 1019/1368] arm64: debug: make local symbols static Make local symbols static, because these are used only in this file. Signed-off-by: Jingoo Han Signed-off-by: Catalin Marinas (cherry picked from commit 242c04bc4be959ae28618772e439c27e87a7d880) Signed-off-by: Mark Brown --- arch/arm64/kernel/debug-monitors.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/kernel/debug-monitors.c b/arch/arm64/kernel/debug-monitors.c index 90777ba7b661..553a120fc838 100644 --- a/arch/arm64/kernel/debug-monitors.c +++ b/arch/arm64/kernel/debug-monitors.c @@ -190,7 +190,7 @@ static void clear_regs_spsr_ss(struct pt_regs *regs) /* EL1 Single Step Handler hooks */ static LIST_HEAD(step_hook); -DEFINE_RWLOCK(step_hook_lock); +static DEFINE_RWLOCK(step_hook_lock); void register_step_hook(struct step_hook *hook) { @@ -277,7 +277,7 @@ static int single_step_handler(unsigned long addr, unsigned int esr, * Use reader/writer locks instead of plain spinlock. */ static LIST_HEAD(break_hook); -DEFINE_RWLOCK(break_hook_lock); +static DEFINE_RWLOCK(break_hook_lock); void register_break_hook(struct break_hook *hook) { From 1e7a212cb995f66800bb01d90806df02529d24fb Mon Sep 17 00:00:00 2001 From: Jingoo Han Date: Wed, 5 Mar 2014 05:35:45 +0000 Subject: [PATCH 1020/1368] arm64: smp: make local symbol static Make smp_spin_table_cpu_postboot() static, because this function is used only in this file. Signed-off-by: Jingoo Han Signed-off-by: Catalin Marinas (cherry picked from commit 7184659bed3090248e382d98a49a3c1bcfe11174) Signed-off-by: Mark Brown --- arch/arm64/kernel/smp_spin_table.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/kernel/smp_spin_table.c b/arch/arm64/kernel/smp_spin_table.c index 27f08367a6e7..03c0843e379c 100644 --- a/arch/arm64/kernel/smp_spin_table.c +++ b/arch/arm64/kernel/smp_spin_table.c @@ -119,7 +119,7 @@ static int smp_spin_table_cpu_boot(unsigned int cpu) return secondary_holding_pen_release != INVALID_HWID ? -ENOSYS : 0; } -void smp_spin_table_cpu_postboot(void) +static void smp_spin_table_cpu_postboot(void) { /* * Let the primary processor know we're out of the pen. From 83a6783cbe03e736f3c66e1d1de2a0d360e82821 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Fri, 14 Mar 2014 17:47:04 +0000 Subject: [PATCH 1021/1368] asm-generic: rwsem: de-PPCify rwsem.h asm-generic/rwsem.h used to live under arch/powerpc. During its liberation to common code, a few references to its former home where preserved, in particular the definition of RWSEM_ACTIVE_MASK is predicated on CONFIG_PPC64. This patch updates the ifdefs and comments to architecturally neutral versions. Acked-by: Arnd Bergmann Acked-by: Richard Kuo Signed-off-by: Will Deacon Signed-off-by: Catalin Marinas (cherry picked from commit e172800e5d3162f97d332b3745e3743ce150ec48) Signed-off-by: Mark Brown --- include/asm-generic/rwsem.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/include/asm-generic/rwsem.h b/include/asm-generic/rwsem.h index bb1e2cdeb9bf..d48bf5a95cc1 100644 --- a/include/asm-generic/rwsem.h +++ b/include/asm-generic/rwsem.h @@ -1,5 +1,5 @@ -#ifndef _ASM_POWERPC_RWSEM_H -#define _ASM_POWERPC_RWSEM_H +#ifndef _ASM_GENERIC_RWSEM_H +#define _ASM_GENERIC_RWSEM_H #ifndef _LINUX_RWSEM_H #error "Please don't include directly, use instead." @@ -8,7 +8,7 @@ #ifdef __KERNEL__ /* - * R/W semaphores for PPC using the stuff in lib/rwsem.c. + * R/W semaphores originally for PPC using the stuff in lib/rwsem.c. * Adapted largely from include/asm-i386/rwsem.h * by Paul Mackerras . */ @@ -16,7 +16,7 @@ /* * the semaphore definition */ -#ifdef CONFIG_PPC64 +#ifdef CONFIG_64BIT # define RWSEM_ACTIVE_MASK 0xffffffffL #else # define RWSEM_ACTIVE_MASK 0x0000ffffL @@ -129,4 +129,4 @@ static inline long rwsem_atomic_update(long delta, struct rw_semaphore *sem) } #endif /* __KERNEL__ */ -#endif /* _ASM_POWERPC_RWSEM_H */ +#endif /* _ASM_GENERIC_RWSEM_H */ From af6b38abb48ee39ae743dcbd0c3425af0bbaa2ee Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Fri, 14 Mar 2014 17:47:05 +0000 Subject: [PATCH 1022/1368] arm64: rwsem: use asm-generic rwsem implementation asm-generic offers an atomic-add based rwsem implementation, which can avoid the need for heavier, spinlock-based synchronisation on the fast path. This patch makes use of the optimised implementation for arm64 CPUs. Signed-off-by: Will Deacon Signed-off-by: Catalin Marinas (cherry picked from commit c209f79940ac0c75ae8d2f503a2b9d86255e266c) Signed-off-by: Mark Brown --- arch/arm64/Kconfig | 2 +- arch/arm64/include/asm/Kbuild | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index d5b813ed86a1..8efcec96de72 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -69,7 +69,7 @@ config LOCKDEP_SUPPORT config TRACE_IRQFLAGS_SUPPORT def_bool y -config RWSEM_GENERIC_SPINLOCK +config RWSEM_XCHGADD_ALGORITHM def_bool y config GENERIC_HWEIGHT diff --git a/arch/arm64/include/asm/Kbuild b/arch/arm64/include/asm/Kbuild index ae0612c4fa45..53c4a2946444 100644 --- a/arch/arm64/include/asm/Kbuild +++ b/arch/arm64/include/asm/Kbuild @@ -29,6 +29,7 @@ generic-y += pci.h generic-y += poll.h generic-y += posix_types.h generic-y += resource.h +generic-y += rwsem.h generic-y += scatterlist.h generic-y += sections.h generic-y += segment.h From 7605e331e064ce4885c25022f0c78aeda2047d75 Mon Sep 17 00:00:00 2001 From: Loc Ho Date: Fri, 14 Mar 2014 17:53:18 -0600 Subject: [PATCH 1023/1368] arm64: Add APM X-Gene SoC 15Gbps Multi-purpose PHY DTS entries This patch adds the DTS entries for the APM X-Gene SoC 15Gbps Multi-purpose PHY driver. The PHY for SATA controller 2 and 3 are enabled by default. Signed-off-by: Loc Ho Signed-off-by: Tuan Phan Signed-off-by: Suman Tripathi Acked-by: Arnd Bergmann Signed-off-by: Tejun Heo (cherry picked from commit 71b70ee9350f239ea021bbb737771ebd5d02c020) Signed-off-by: Mark Brown --- arch/arm64/boot/dts/apm-storm.dtsi | 72 ++++++++++++++++++++++++++++++ 1 file changed, 72 insertions(+) diff --git a/arch/arm64/boot/dts/apm-storm.dtsi b/arch/arm64/boot/dts/apm-storm.dtsi index d37d7369e260..6d4f493aac9a 100644 --- a/arch/arm64/boot/dts/apm-storm.dtsi +++ b/arch/arm64/boot/dts/apm-storm.dtsi @@ -176,6 +176,48 @@ eth8clk: eth8clk { reg-names = "csr-reg"; clock-output-names = "eth8clk"; }; + + sataphy1clk: sataphy1clk@1f21c000 { + compatible = "apm,xgene-device-clock"; + #clock-cells = <1>; + clocks = <&socplldiv2 0>; + reg = <0x0 0x1f21c000 0x0 0x1000>; + reg-names = "csr-reg"; + clock-output-names = "sataphy1clk"; + status = "disabled"; + csr-offset = <0x4>; + csr-mask = <0x00>; + enable-offset = <0x0>; + enable-mask = <0x06>; + }; + + sataphy2clk: sataphy1clk@1f22c000 { + compatible = "apm,xgene-device-clock"; + #clock-cells = <1>; + clocks = <&socplldiv2 0>; + reg = <0x0 0x1f22c000 0x0 0x1000>; + reg-names = "csr-reg"; + clock-output-names = "sataphy2clk"; + status = "ok"; + csr-offset = <0x4>; + csr-mask = <0x3a>; + enable-offset = <0x0>; + enable-mask = <0x06>; + }; + + sataphy3clk: sataphy1clk@1f23c000 { + compatible = "apm,xgene-device-clock"; + #clock-cells = <1>; + clocks = <&socplldiv2 0>; + reg = <0x0 0x1f23c000 0x0 0x1000>; + reg-names = "csr-reg"; + clock-output-names = "sataphy3clk"; + status = "ok"; + csr-offset = <0x4>; + csr-mask = <0x3a>; + enable-offset = <0x0>; + enable-mask = <0x06>; + }; }; serial0: serial@1c020000 { @@ -187,5 +229,35 @@ serial0: serial@1c020000 { interrupt-parent = <&gic>; interrupts = <0x0 0x4c 0x4>; }; + + phy1: phy@1f21a000 { + compatible = "apm,xgene-phy"; + reg = <0x0 0x1f21a000 0x0 0x100>; + #phy-cells = <1>; + clocks = <&sataphy1clk 0>; + status = "disabled"; + apm,tx-boost-gain = <30 30 30 30 30 30>; + apm,tx-eye-tuning = <2 10 10 2 10 10>; + }; + + phy2: phy@1f22a000 { + compatible = "apm,xgene-phy"; + reg = <0x0 0x1f22a000 0x0 0x100>; + #phy-cells = <1>; + clocks = <&sataphy2clk 0>; + status = "ok"; + apm,tx-boost-gain = <30 30 30 30 30 30>; + apm,tx-eye-tuning = <1 10 10 2 10 10>; + }; + + phy3: phy@1f23a000 { + compatible = "apm,xgene-phy"; + reg = <0x0 0x1f23a000 0x0 0x100>; + #phy-cells = <1>; + clocks = <&sataphy3clk 0>; + status = "ok"; + apm,tx-boost-gain = <31 31 31 31 31 31>; + apm,tx-eye-tuning = <2 10 10 2 10 10>; + }; }; }; From 1af7205a961f33b011f9a7f1e6df3c1d0ffa0a90 Mon Sep 17 00:00:00 2001 From: Loc Ho Date: Fri, 14 Mar 2014 17:53:21 -0600 Subject: [PATCH 1024/1368] arm64: Add APM X-Gene SoC AHCI SATA host controller DTS entries This patch adds APM X-Gene SoC AHCI SATA host controller DTS entries. Signed-off-by: Loc Ho Signed-off-by: Tuan Phan Signed-off-by: Suman Tripathi Acked-by: Arnd Bergmann Signed-off-by: Tejun Heo (cherry picked from commit db8c0286d18c2d3eaec2c4da34767db0f4f6ffaa) Signed-off-by: Mark Brown --- arch/arm64/boot/dts/apm-storm.dtsi | 80 ++++++++++++++++++++++++++++++ 1 file changed, 80 insertions(+) diff --git a/arch/arm64/boot/dts/apm-storm.dtsi b/arch/arm64/boot/dts/apm-storm.dtsi index 6d4f493aac9a..93f4b2dd9248 100644 --- a/arch/arm64/boot/dts/apm-storm.dtsi +++ b/arch/arm64/boot/dts/apm-storm.dtsi @@ -218,6 +218,45 @@ sataphy3clk: sataphy1clk@1f23c000 { enable-offset = <0x0>; enable-mask = <0x06>; }; + + sata01clk: sata01clk@1f21c000 { + compatible = "apm,xgene-device-clock"; + #clock-cells = <1>; + clocks = <&socplldiv2 0>; + reg = <0x0 0x1f21c000 0x0 0x1000>; + reg-names = "csr-reg"; + clock-output-names = "sata01clk"; + csr-offset = <0x4>; + csr-mask = <0x05>; + enable-offset = <0x0>; + enable-mask = <0x39>; + }; + + sata23clk: sata23clk@1f22c000 { + compatible = "apm,xgene-device-clock"; + #clock-cells = <1>; + clocks = <&socplldiv2 0>; + reg = <0x0 0x1f22c000 0x0 0x1000>; + reg-names = "csr-reg"; + clock-output-names = "sata23clk"; + csr-offset = <0x4>; + csr-mask = <0x05>; + enable-offset = <0x0>; + enable-mask = <0x39>; + }; + + sata45clk: sata45clk@1f23c000 { + compatible = "apm,xgene-device-clock"; + #clock-cells = <1>; + clocks = <&socplldiv2 0>; + reg = <0x0 0x1f23c000 0x0 0x1000>; + reg-names = "csr-reg"; + clock-output-names = "sata45clk"; + csr-offset = <0x4>; + csr-mask = <0x05>; + enable-offset = <0x0>; + enable-mask = <0x39>; + }; }; serial0: serial@1c020000 { @@ -259,5 +298,46 @@ phy3: phy@1f23a000 { apm,tx-boost-gain = <31 31 31 31 31 31>; apm,tx-eye-tuning = <2 10 10 2 10 10>; }; + + sata1: sata@1a000000 { + compatible = "apm,xgene-ahci"; + reg = <0x0 0x1a000000 0x0 0x1000>, + <0x0 0x1f210000 0x0 0x1000>, + <0x0 0x1f21d000 0x0 0x1000>, + <0x0 0x1f21e000 0x0 0x1000>, + <0x0 0x1f217000 0x0 0x1000>; + interrupts = <0x0 0x86 0x4>; + status = "disabled"; + clocks = <&sata01clk 0>; + phys = <&phy1 0>; + phy-names = "sata-phy"; + }; + + sata2: sata@1a400000 { + compatible = "apm,xgene-ahci"; + reg = <0x0 0x1a400000 0x0 0x1000>, + <0x0 0x1f220000 0x0 0x1000>, + <0x0 0x1f22d000 0x0 0x1000>, + <0x0 0x1f22e000 0x0 0x1000>, + <0x0 0x1f227000 0x0 0x1000>; + interrupts = <0x0 0x87 0x4>; + status = "ok"; + clocks = <&sata23clk 0>; + phys = <&phy2 0>; + phy-names = "sata-phy"; + }; + + sata3: sata@1a800000 { + compatible = "apm,xgene-ahci"; + reg = <0x0 0x1a800000 0x0 0x1000>, + <0x0 0x1f230000 0x0 0x1000>, + <0x0 0x1f23d000 0x0 0x1000>, + <0x0 0x1f23e000 0x0 0x1000>; + interrupts = <0x0 0x88 0x4>; + status = "ok"; + clocks = <&sata45clk 0>; + phys = <&phy3 0>; + phy-names = "sata-phy"; + }; }; }; From edd542927515fbdd8e5341aabada596d620b69c6 Mon Sep 17 00:00:00 2001 From: Christopher Covington Date: Wed, 19 Mar 2014 16:29:37 +0000 Subject: [PATCH 1025/1368] arm64: Fix __range_ok macro Without this, the following scenario is incorrectly determined to be invalid. addr 0x7f_ffffe000 size 8192 addr_limit 0x80_00000000 This behavior was observed while trying to vmsplice the stack as part of a CRIU dump of a process on a system started with the norandmaps kernel parameter. Signed-off-by: Christopher Covington Signed-off-by: Catalin Marinas (cherry picked from commit 31b1e940c5d47ee1a01baeccfb1b2b8890822d1a) Signed-off-by: Mark Brown --- arch/arm64/include/asm/uaccess.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/include/asm/uaccess.h b/arch/arm64/include/asm/uaccess.h index 8b181415e843..8e8df0135f41 100644 --- a/arch/arm64/include/asm/uaccess.h +++ b/arch/arm64/include/asm/uaccess.h @@ -83,7 +83,7 @@ static inline void set_fs(mm_segment_t fs) * Returns 1 if the range is valid, 0 otherwise. * * This is equivalent to the following test: - * (u65)addr + (u65)size < (u65)current->addr_limit + * (u65)addr + (u65)size <= current->addr_limit * * This needs 65-bit arithmetic. */ @@ -91,7 +91,7 @@ static inline void set_fs(mm_segment_t fs) ({ \ unsigned long flag, roksum; \ __chk_user_ptr(addr); \ - asm("adds %1, %1, %3; ccmp %1, %4, #2, cc; cset %0, cc" \ + asm("adds %1, %1, %3; ccmp %1, %4, #2, cc; cset %0, ls" \ : "=&r" (flag), "=&r" (roksum) \ : "1" (addr), "Ir" (size), \ "r" (current_thread_info()->addr_limit) \ From 0d2f4e91d89ece1556224ddef344867a572c69f9 Mon Sep 17 00:00:00 2001 From: Laura Abbott Date: Fri, 14 Mar 2014 19:52:23 +0000 Subject: [PATCH 1026/1368] arm64: Implement custom mmap functions for dma mapping The current dma_ops do not specify an mmap function so maping falls back to the default implementation. There are at least two issues with using the default implementation: 1) The pgprot is always pgprot_noncached (strongly ordered) memory even with coherent operations 2) dma_common_mmap calls virt_to_page on the remapped non-coherent address which leads to invalid memory being mapped. Fix both these issue by implementing a custom mmap function which correctly accounts for remapped addresses and sets vm_pg_prot appropriately. Signed-off-by: Laura Abbott [catalin.marinas@arm.com: replaced "arm64_" with "__" prefix for consistency] Signed-off-by: Catalin Marinas (cherry picked from commit 6e8d7968e92f7668a2a615773ad3940f0219dcbd) Signed-off-by: Mark Brown --- arch/arm64/mm/dma-mapping.c | 44 +++++++++++++++++++++++++++++++++++++ 1 file changed, 44 insertions(+) diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c index d3d9bf5884f8..18b83177f4b6 100644 --- a/arch/arm64/mm/dma-mapping.c +++ b/arch/arm64/mm/dma-mapping.c @@ -216,9 +216,52 @@ static void __swiotlb_sync_sg_for_device(struct device *dev, sg->length, dir); } +/* vma->vm_page_prot must be set appropriately before calling this function */ +static int __dma_common_mmap(struct device *dev, struct vm_area_struct *vma, + void *cpu_addr, dma_addr_t dma_addr, size_t size) +{ + int ret = -ENXIO; + unsigned long nr_vma_pages = (vma->vm_end - vma->vm_start) >> + PAGE_SHIFT; + unsigned long nr_pages = PAGE_ALIGN(size) >> PAGE_SHIFT; + unsigned long pfn = dma_to_phys(dev, dma_addr) >> PAGE_SHIFT; + unsigned long off = vma->vm_pgoff; + + if (dma_mmap_from_coherent(dev, vma, cpu_addr, size, &ret)) + return ret; + + if (off < nr_pages && nr_vma_pages <= (nr_pages - off)) { + ret = remap_pfn_range(vma, vma->vm_start, + pfn + off, + vma->vm_end - vma->vm_start, + vma->vm_page_prot); + } + + return ret; +} + +static int __swiotlb_mmap_noncoherent(struct device *dev, + struct vm_area_struct *vma, + void *cpu_addr, dma_addr_t dma_addr, size_t size, + struct dma_attrs *attrs) +{ + vma->vm_page_prot = pgprot_dmacoherent(vma->vm_page_prot); + return __dma_common_mmap(dev, vma, cpu_addr, dma_addr, size); +} + +static int __swiotlb_mmap_coherent(struct device *dev, + struct vm_area_struct *vma, + void *cpu_addr, dma_addr_t dma_addr, size_t size, + struct dma_attrs *attrs) +{ + /* Just use whatever page_prot attributes were specified */ + return __dma_common_mmap(dev, vma, cpu_addr, dma_addr, size); +} + struct dma_map_ops noncoherent_swiotlb_dma_ops = { .alloc = __dma_alloc_noncoherent, .free = __dma_free_noncoherent, + .mmap = __swiotlb_mmap_noncoherent, .map_page = __swiotlb_map_page, .unmap_page = __swiotlb_unmap_page, .map_sg = __swiotlb_map_sg_attrs, @@ -235,6 +278,7 @@ EXPORT_SYMBOL(noncoherent_swiotlb_dma_ops); struct dma_map_ops coherent_swiotlb_dma_ops = { .alloc = __dma_alloc_coherent, .free = __dma_free_coherent, + .mmap = __swiotlb_mmap_coherent, .map_page = swiotlb_map_page, .unmap_page = swiotlb_unmap_page, .map_sg = swiotlb_map_sg_attrs, From 6b185a7d18246ecd52ce3da955fa589f6b8c99ec Mon Sep 17 00:00:00 2001 From: Laura Abbott Date: Fri, 14 Mar 2014 19:52:24 +0000 Subject: [PATCH 1027/1368] arm64: Support DMA_ATTR_WRITE_COMBINE DMA_ATTR_WRITE_COMBINE is currently ignored. Set the pgprot appropriately for non coherent opperations. Signed-off-by: Laura Abbott Signed-off-by: Catalin Marinas (cherry picked from commit 214fdbe74a096c3aeb7af81d7900e2ab966b10d6) Signed-off-by: Mark Brown --- arch/arm64/mm/dma-mapping.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c index 18b83177f4b6..e5eef09808b6 100644 --- a/arch/arm64/mm/dma-mapping.c +++ b/arch/arm64/mm/dma-mapping.c @@ -30,6 +30,16 @@ struct dma_map_ops *dma_ops; EXPORT_SYMBOL(dma_ops); +static pgprot_t __get_dma_pgprot(struct dma_attrs *attrs, pgprot_t prot, + bool coherent) +{ + if (dma_get_attr(DMA_ATTR_WRITE_COMBINE, attrs)) + return pgprot_writecombine(prot); + else if (!coherent) + return pgprot_dmacoherent(prot); + return prot; +} + static void *__dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle, gfp_t flags, struct dma_attrs *attrs) @@ -99,7 +109,7 @@ static void *__dma_alloc_noncoherent(struct device *dev, size_t size, for (i = 0; i < (size >> PAGE_SHIFT); i++) map[i] = page + i; coherent_ptr = vmap(map, size >> PAGE_SHIFT, VM_MAP, - pgprot_dmacoherent(pgprot_default)); + __get_dma_pgprot(attrs, pgprot_default, false)); kfree(map); if (!coherent_ptr) goto no_map; @@ -245,7 +255,7 @@ static int __swiotlb_mmap_noncoherent(struct device *dev, void *cpu_addr, dma_addr_t dma_addr, size_t size, struct dma_attrs *attrs) { - vma->vm_page_prot = pgprot_dmacoherent(vma->vm_page_prot); + vma->vm_page_prot = __get_dma_pgprot(attrs, vma->vm_page_prot, false); return __dma_common_mmap(dev, vma, cpu_addr, dma_addr, size); } From 3162b0e7e2725f7b17ba94fa0d1cbb3defd59697 Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Fri, 28 Mar 2014 09:49:13 +0000 Subject: [PATCH 1028/1368] Revert "arm64: virt: ensure visibility of __boot_cpu_mode" This reverts commit 82b2f495fba338d1e3098dde1df54944a9c19751. The __boot_cpu_mode variable is flushed in head.S after being written, therefore the additional cache flushing is no longer required. Signed-off-by: Catalin Marinas (cherry picked from commit 0a997ecc08e0b551119c56d52a591d9e5b38a7cd) Signed-off-by: Mark Brown --- arch/arm64/include/asm/virt.h | 13 ------------- 1 file changed, 13 deletions(-) diff --git a/arch/arm64/include/asm/virt.h b/arch/arm64/include/asm/virt.h index 26e310c54344..439827271e3d 100644 --- a/arch/arm64/include/asm/virt.h +++ b/arch/arm64/include/asm/virt.h @@ -21,7 +21,6 @@ #define BOOT_CPU_MODE_EL2 (0x0e12b007) #ifndef __ASSEMBLY__ -#include /* * __boot_cpu_mode records what mode CPUs were booted in. @@ -37,20 +36,9 @@ extern u32 __boot_cpu_mode[2]; void __hyp_set_vectors(phys_addr_t phys_vector_base); phys_addr_t __hyp_get_vectors(void); -static inline void sync_boot_mode(void) -{ - /* - * As secondaries write to __boot_cpu_mode with caches disabled, we - * must flush the corresponding cache entries to ensure the visibility - * of their writes. - */ - __flush_dcache_area(__boot_cpu_mode, sizeof(__boot_cpu_mode)); -} - /* Reports the availability of HYP mode */ static inline bool is_hyp_mode_available(void) { - sync_boot_mode(); return (__boot_cpu_mode[0] == BOOT_CPU_MODE_EL2 && __boot_cpu_mode[1] == BOOT_CPU_MODE_EL2); } @@ -58,7 +46,6 @@ static inline bool is_hyp_mode_available(void) /* Check if the bootloader has booted CPUs in different modes */ static inline bool is_hyp_mode_mismatched(void) { - sync_boot_mode(); return __boot_cpu_mode[0] != __boot_cpu_mode[1]; } From 0ee87a5c8fb9ce32d3bc7da08e5ff198ad450e64 Mon Sep 17 00:00:00 2001 From: Laura Abbott Date: Sat, 5 Apr 2014 01:30:50 +0100 Subject: [PATCH 1029/1368] arm64: Add missing Kconfig for CONFIG_STRICT_DEVMEM The Kconfig for CONFIG_STRICT_DEVMEM is missing despite being used in mmap.c. Add it. Signed-off-by: Laura Abbott Signed-off-by: Catalin Marinas (cherry picked from commit d253b4406df69fa7a74231769d6f6ad80dc33063) Signed-off-by: Mark Brown Conflicts: arch/arm64/Kconfig.debug --- arch/arm64/Kconfig.debug | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/arch/arm64/Kconfig.debug b/arch/arm64/Kconfig.debug index 1a6bfe954d49..e1b0c4601b3e 100644 --- a/arch/arm64/Kconfig.debug +++ b/arch/arm64/Kconfig.debug @@ -13,6 +13,20 @@ config DEBUG_STACK_USAGE Enables the display of the minimum amount of free stack which each task has ever had available in the sysrq-T output. +config STRICT_DEVMEM + bool "Filter access to /dev/mem" + depends on MMU + help + If this option is disabled, you allow userspace (root) access to all + of memory, including kernel and userspace memory. Accidental + access to this is obviously disastrous, but specific access can + be used by people debugging the kernel. + + If this option is switched on, the /dev/mem file only allows + userspace access to memory mapped peripherals. + + If in doubt, say Y. + config EARLY_PRINTK bool "Early printk support" default y From a5bc41402eb320904e65c5e87c4ef2f7eacd2525 Mon Sep 17 00:00:00 2001 From: Mark Salter Date: Mon, 7 Apr 2014 15:39:51 -0700 Subject: [PATCH 1030/1368] arm64: initialize pgprot info earlier in boot Presently, paging_init() calls init_mem_pgprot() to initialize pgprot values used by macros such as PAGE_KERNEL, PAGE_KERNEL_EXEC, etc. The new fixmap and early_ioremap support also needs to use these macros before paging_init() is called. This patch moves the init_mem_pgprot() call out of paging_init() and into setup_arch() so that pgprot_default gets initialized in time for fixmap and early_ioremap. Signed-off-by: Mark Salter Acked-by: Catalin Marinas Cc: Will Deacon Cc: Borislav Petkov Cc: Dave Young Cc: H. Peter Anvin Cc: Ingo Molnar Cc: Thomas Gleixner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds (cherry picked from commit 0bf757c73d6612d3d279de3f61b35062aa9c8b1d) Signed-off-by: Mark Brown --- arch/arm64/include/asm/mmu.h | 1 + arch/arm64/kernel/setup.c | 2 ++ arch/arm64/mm/mmu.c | 3 +-- 3 files changed, 4 insertions(+), 2 deletions(-) diff --git a/arch/arm64/include/asm/mmu.h b/arch/arm64/include/asm/mmu.h index 2494fc01896a..f600d400c07d 100644 --- a/arch/arm64/include/asm/mmu.h +++ b/arch/arm64/include/asm/mmu.h @@ -27,5 +27,6 @@ typedef struct { extern void paging_init(void); extern void setup_mm_for_reboot(void); extern void __iomem *early_io_map(phys_addr_t phys, unsigned long virt); +extern void init_mem_pgprot(void); #endif diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c index bb99e3a5ac56..b04b6118602a 100644 --- a/arch/arm64/kernel/setup.c +++ b/arch/arm64/kernel/setup.c @@ -268,6 +268,8 @@ void __init setup_arch(char **cmdline_p) *cmdline_p = boot_command_line; + init_mem_pgprot(); + parse_early_param(); arm64_memblock_init(); diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index f8dc7e8fce6f..ba259a0e385e 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -125,7 +125,7 @@ early_param("cachepolicy", early_cachepolicy); /* * Adjust the PMD section entries according to the CPU in use. */ -static void __init init_mem_pgprot(void) +void __init init_mem_pgprot(void) { pteval_t default_pgprot; int i; @@ -357,7 +357,6 @@ void __init paging_init(void) { void *zero_page; - init_mem_pgprot(); map_mem(); /* From 6d08693da9e9512a6739c4a873c9f42d0c8d0f26 Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Fri, 25 Apr 2014 15:31:45 +0100 Subject: [PATCH 1031/1368] arm64: Use bus notifiers to set per-device coherent DMA ops Recently, the default DMA ops have been changed to non-coherent for alignment with 32-bit ARM platforms (and DT files). This patch adds bus notifiers to be able to set the coherent DMA ops (with no cache maintenance) for devices explicitly marked as coherent via the "dma-coherent" DT property. Signed-off-by: Catalin Marinas (cherry picked from commit 6ecba8eb51b7d23fda66388a5420be7d8688b186) Signed-off-by: Mark Brown --- arch/arm64/kernel/setup.c | 2 +- arch/arm64/mm/dma-mapping.c | 33 ++++++++++++++++++++++++++++++++- 2 files changed, 33 insertions(+), 2 deletions(-) diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c index add6ea616843..a13856e29485 100644 --- a/arch/arm64/kernel/setup.c +++ b/arch/arm64/kernel/setup.c @@ -288,7 +288,7 @@ static int __init arm64_device_init(void) of_platform_populate(NULL, of_default_bus_match_table, NULL, NULL); return 0; } -arch_initcall(arm64_device_init); +arch_initcall_sync(arm64_device_init); static DEFINE_PER_CPU(struct cpu, cpu_data); diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c index e5eef09808b6..86f0b9ef45ad 100644 --- a/arch/arm64/mm/dma-mapping.c +++ b/arch/arm64/mm/dma-mapping.c @@ -22,8 +22,11 @@ #include #include #include +#include +#include #include #include +#include #include @@ -302,17 +305,45 @@ struct dma_map_ops coherent_swiotlb_dma_ops = { }; EXPORT_SYMBOL(coherent_swiotlb_dma_ops); +static int dma_bus_notifier(struct notifier_block *nb, + unsigned long event, void *_dev) +{ + struct device *dev = _dev; + + if (event != BUS_NOTIFY_ADD_DEVICE) + return NOTIFY_DONE; + + if (of_property_read_bool(dev->of_node, "dma-coherent")) + set_dma_ops(dev, &coherent_swiotlb_dma_ops); + + return NOTIFY_OK; +} + +static struct notifier_block platform_bus_nb = { + .notifier_call = dma_bus_notifier, +}; + +static struct notifier_block amba_bus_nb = { + .notifier_call = dma_bus_notifier, +}; + extern int swiotlb_late_init_with_default_size(size_t default_size); static int __init swiotlb_late_init(void) { size_t swiotlb_size = min(SZ_64M, MAX_ORDER_NR_PAGES << PAGE_SHIFT); + /* + * These must be registered before of_platform_populate(). + */ + bus_register_notifier(&platform_bus_type, &platform_bus_nb); + bus_register_notifier(&amba_bustype, &amba_bus_nb); + dma_ops = &noncoherent_swiotlb_dma_ops; return swiotlb_late_init_with_default_size(swiotlb_size); } -subsys_initcall(swiotlb_late_init); +arch_initcall(swiotlb_late_init); #define PREALLOC_DMA_DEBUG_ENTRIES 4096 From 2cf50c98683628f23a6152e3c0d58c80974c8452 Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Fri, 18 Apr 2014 17:19:59 -0500 Subject: [PATCH 1032/1368] arm64: enable FIX_EARLYCON_MEM kconfig In order to support earlycon on arm64, we need to enable earlycon fixmap support. Signed-off-by: Rob Herring Cc: Catalin Marinas Cc: Will Deacon Signed-off-by: Greg Kroah-Hartman (cherry picked from commit 92cc15fcb543a8ab9af5682a2011944e6f48fd4c) Signed-off-by: Mark Brown Conflicts: arch/arm64/Kconfig --- arch/arm64/Kconfig | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 8efcec96de72..8540eef3db8b 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -99,6 +99,9 @@ config SWIOTLB config IOMMU_HELPER def_bool SWIOTLB +config FIX_EARLYCON_MEM + def_bool y + source "init/Kconfig" source "kernel/Kconfig.freezer" From 55fc9aa7e46ea98bcc9c5c445a817811f11579a6 Mon Sep 17 00:00:00 2001 From: Leo Yan Date: Wed, 16 Apr 2014 13:26:35 +0100 Subject: [PATCH 1033/1368] arm64: initialize spinlock for init_mm's context ARM64 has defined the spinlock for init_mm's context, so need initialize the spinlock structure; otherwise during the suspend flow it will dump the info for spinlock's bad magic warning as below: [ 39.084394] Disabling non-boot CPUs ... [ 39.092871] BUG: spinlock bad magic on CPU#1, swapper/1/0 [ 39.092896] lock: init_mm+0x338/0x3e0, .magic: 00000000, .owner: /-1, .owner_cpu: 0 [ 39.092907] CPU: 1 PID: 0 Comm: swapper/1 Tainted: G O 3.10.33 #125 [ 39.092912] Call trace: [ 39.092927] [] dump_backtrace+0x0/0x16c [ 39.092934] [] show_stack+0x10/0x1c [ 39.092947] [] dump_stack+0x1c/0x28 [ 39.092953] [] spin_dump+0x78/0x88 [ 39.092960] [] spin_bug+0x24/0x34 [ 39.092971] [] do_raw_spin_lock+0x98/0x17c [ 39.092979] [] _raw_spin_lock_irqsave+0x4c/0x60 [ 39.092990] [] set_mm_context+0x1c/0x6c [ 39.092996] [] __new_context+0x94/0x10c [ 39.093007] [] idle_task_exit+0x104/0x1b0 [ 39.093014] [] cpu_die+0x14/0x74 [ 39.093021] [] arch_cpu_idle_dead+0x8/0x14 [ 39.093030] [] cpu_startup_entry+0x1ec/0x258 [ 39.093036] [] secondary_start_kernel+0x114/0x124 Signed-off-by: Leo Yan Acked-by: Will Deacon Signed-off-by: Catalin Marinas (cherry picked from commit 8f0712037b4ed63dfce844939ac9866054f15ca0) Signed-off-by: Mark Brown --- arch/arm64/include/asm/mmu.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/arm64/include/asm/mmu.h b/arch/arm64/include/asm/mmu.h index f600d400c07d..aff0292c8f4d 100644 --- a/arch/arm64/include/asm/mmu.h +++ b/arch/arm64/include/asm/mmu.h @@ -22,6 +22,9 @@ typedef struct { void *vdso; } mm_context_t; +#define INIT_MM_CONTEXT(name) \ + .context.id_lock = __RAW_SPIN_LOCK_UNLOCKED(name.context.id_lock), + #define ASID(mm) ((mm)->context.id & 0xffff) extern void paging_init(void); From 638b6642b041f83802ea5d7ca68b45ce508bbc5c Mon Sep 17 00:00:00 2001 From: Chanho Min Date: Mon, 14 Apr 2014 08:38:53 +0100 Subject: [PATCH 1034/1368] arm64: init: Move of_clk_init to time_init Clock providers should be initialized before clocksource_of_init. If not, Clock source initialization can be fail to get the clock. Acked-by: Will Deacon Signed-off-by: Chanho Min Signed-off-by: Will Deacon Signed-off-by: Catalin Marinas (cherry picked from commit bc3ee18a7a57243721ecfd879319e3d2e882f289) Signed-off-by: Mark Brown --- arch/arm64/kernel/setup.c | 1 - arch/arm64/kernel/time.c | 2 ++ 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c index b04b6118602a..6c87fe0f6857 100644 --- a/arch/arm64/kernel/setup.c +++ b/arch/arm64/kernel/setup.c @@ -298,7 +298,6 @@ void __init setup_arch(char **cmdline_p) static int __init arm64_device_init(void) { - of_clk_init(NULL); of_platform_populate(NULL, of_default_bus_match_table, NULL, NULL); return 0; } diff --git a/arch/arm64/kernel/time.c b/arch/arm64/kernel/time.c index 03dc3718eb13..dbe1c8d8e90f 100644 --- a/arch/arm64/kernel/time.c +++ b/arch/arm64/kernel/time.c @@ -33,6 +33,7 @@ #include #include #include +#include #include @@ -72,6 +73,7 @@ void __init time_init(void) { u32 arch_timer_rate; + of_clk_init(NULL); clocksource_of_init(); arch_timer_rate = arch_timer_get_rate(); From 7d7a38f930c760ca03a9fc9ef9b90eef0d07e379 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Tue, 11 Mar 2014 11:23:39 +0100 Subject: [PATCH 1035/1368] arm64: mm: Remove superfluous "the" in comment Signed-off-by: Geert Uytterhoeven Cc: Catalin Marinas Cc: linux-arm-kernel@lists.infradead.org Signed-off-by: Jiri Kosina (cherry picked from commit aad9061bf37e05d29a2a94ae8fe1e12d8808a0dd) Signed-off-by: Mark Brown --- arch/arm64/include/asm/pgtable.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index 608ec24f2f52..c5d5286ee8ad 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -400,7 +400,7 @@ extern pgd_t idmap_pg_dir[PTRS_PER_PGD]; /* * Ensure that there are not more swap files than can be encoded in the kernel - * the PTEs. + * PTEs. */ #define MAX_SWAPFILES_CHECK() BUILD_BUG_ON(MAX_SWAPFILES_SHIFT > __SWP_TYPE_BITS) From 4dfb441fe4ed5de576daca2c1fc4e79246d07991 Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Fri, 4 Apr 2014 11:49:05 +0100 Subject: [PATCH 1036/1368] arm64: Remove boot thread synchronisation for spin-table release method The synchronisation with the boot thread already happens in __cpu_up() via wait_for_completion_timeout(). In addition, __cpu_up() calls are protected by the cpu_add_remove_lock mutex and already serialised. Signed-off-by: Catalin Marinas (cherry picked from commit 6400111399e16a535231ebd76389c894ea1837ff) Signed-off-by: Mark Brown --- arch/arm64/kernel/smp_spin_table.c | 39 +----------------------------- 1 file changed, 1 insertion(+), 38 deletions(-) diff --git a/arch/arm64/kernel/smp_spin_table.c b/arch/arm64/kernel/smp_spin_table.c index 03c0843e379c..e3e5755f61bb 100644 --- a/arch/arm64/kernel/smp_spin_table.c +++ b/arch/arm64/kernel/smp_spin_table.c @@ -30,7 +30,6 @@ extern void secondary_holding_pen(void); volatile unsigned long secondary_holding_pen_release = INVALID_HWID; static phys_addr_t cpu_release_addr[NR_CPUS]; -static DEFINE_RAW_SPINLOCK(boot_lock); /* * Write secondary_holding_pen_release in a way that is guaranteed to be @@ -85,14 +84,6 @@ static int smp_spin_table_cpu_prepare(unsigned int cpu) static int smp_spin_table_cpu_boot(unsigned int cpu) { - unsigned long timeout; - - /* - * Set synchronisation state between this boot processor - * and the secondary one - */ - raw_spin_lock(&boot_lock); - /* * Update the pen release flag. */ @@ -103,34 +94,7 @@ static int smp_spin_table_cpu_boot(unsigned int cpu) */ sev(); - timeout = jiffies + (1 * HZ); - while (time_before(jiffies, timeout)) { - if (secondary_holding_pen_release == INVALID_HWID) - break; - udelay(10); - } - - /* - * Now the secondary core is starting up let it run its - * calibrations, then wait for it to finish - */ - raw_spin_unlock(&boot_lock); - - return secondary_holding_pen_release != INVALID_HWID ? -ENOSYS : 0; -} - -static void smp_spin_table_cpu_postboot(void) -{ - /* - * Let the primary processor know we're out of the pen. - */ - write_pen_release(INVALID_HWID); - - /* - * Synchronise with the boot thread. - */ - raw_spin_lock(&boot_lock); - raw_spin_unlock(&boot_lock); + return 0; } const struct cpu_operations smp_spin_table_ops = { @@ -138,5 +102,4 @@ const struct cpu_operations smp_spin_table_ops = { .cpu_init = smp_spin_table_cpu_init, .cpu_prepare = smp_spin_table_cpu_prepare, .cpu_boot = smp_spin_table_cpu_boot, - .cpu_postboot = smp_spin_table_cpu_postboot, }; From 7fea0941e63f119a57a3a0d4048120e8a3ebc58f Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Fri, 4 Apr 2014 15:42:16 +0100 Subject: [PATCH 1037/1368] arm64: Remove the aux_context structure This patch removes the aux_context structure (and the containing file) to allow the placement of the _aarch64_ctx end magic based on the context stored on the signal stack. Signed-off-by: Catalin Marinas (cherry picked from commit 0e0276d1e1dd063cd14ce377707970d0417a0792) Signed-off-by: Mark Brown --- arch/arm64/include/asm/sigcontext.h | 31 ----------------------------- arch/arm64/kernel/signal.c | 27 +++++++++++++++---------- 2 files changed, 17 insertions(+), 41 deletions(-) delete mode 100644 arch/arm64/include/asm/sigcontext.h diff --git a/arch/arm64/include/asm/sigcontext.h b/arch/arm64/include/asm/sigcontext.h deleted file mode 100644 index dca1094acc74..000000000000 --- a/arch/arm64/include/asm/sigcontext.h +++ /dev/null @@ -1,31 +0,0 @@ -/* - * Copyright (C) 2012 ARM Ltd. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ -#ifndef __ASM_SIGCONTEXT_H -#define __ASM_SIGCONTEXT_H - -#include - -/* - * Auxiliary context saved in the sigcontext.__reserved array. Not exported to - * user space as it will change with the addition of new context. User space - * should check the magic/size information. - */ -struct aux_context { - struct fpsimd_context fpsimd; - /* additional context to be added before "end" */ - struct _aarch64_ctx end; -}; -#endif diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c index 890a591f75dd..7ff2eee96c6b 100644 --- a/arch/arm64/kernel/signal.c +++ b/arch/arm64/kernel/signal.c @@ -100,8 +100,7 @@ static int restore_sigframe(struct pt_regs *regs, { sigset_t set; int i, err; - struct aux_context __user *aux = - (struct aux_context __user *)sf->uc.uc_mcontext.__reserved; + void *aux = sf->uc.uc_mcontext.__reserved; err = __copy_from_user(&set, &sf->uc.uc_sigmask, sizeof(set)); if (err == 0) @@ -121,8 +120,11 @@ static int restore_sigframe(struct pt_regs *regs, err |= !valid_user_regs(®s->user_regs); - if (err == 0) - err |= restore_fpsimd_context(&aux->fpsimd); + if (err == 0) { + struct fpsimd_context *fpsimd_ctx = + container_of(aux, struct fpsimd_context, head); + err |= restore_fpsimd_context(fpsimd_ctx); + } return err; } @@ -167,8 +169,8 @@ static int setup_sigframe(struct rt_sigframe __user *sf, struct pt_regs *regs, sigset_t *set) { int i, err = 0; - struct aux_context __user *aux = - (struct aux_context __user *)sf->uc.uc_mcontext.__reserved; + void *aux = sf->uc.uc_mcontext.__reserved; + struct _aarch64_ctx *end; /* set up the stack frame for unwinding */ __put_user_error(regs->regs[29], &sf->fp, err); @@ -185,12 +187,17 @@ static int setup_sigframe(struct rt_sigframe __user *sf, err |= __copy_to_user(&sf->uc.uc_sigmask, set, sizeof(*set)); - if (err == 0) - err |= preserve_fpsimd_context(&aux->fpsimd); + if (err == 0) { + struct fpsimd_context *fpsimd_ctx = + container_of(aux, struct fpsimd_context, head); + err |= preserve_fpsimd_context(fpsimd_ctx); + aux += sizeof(*fpsimd_ctx); + } /* set the "end" magic */ - __put_user_error(0, &aux->end.magic, err); - __put_user_error(0, &aux->end.size, err); + end = aux; + __put_user_error(0, &end->magic, err); + __put_user_error(0, &end->size, err); return err; } From 2d89dd023e91acb648876b73f67435a13811c632 Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Thu, 3 Apr 2014 16:17:32 +0100 Subject: [PATCH 1038/1368] arm64: Introduce execute-only page access permissions The ARMv8 architecture allows execute-only user permissions by clearing the PTE_UXN and PTE_USER bits. The kernel, however, can still access such page, so execute-only page permission does not protect against read(2)/write(2) etc. accesses. Systems requiring such protection must implement/enable features like SECCOMP. This patch changes the arm64 __P100 and __S100 protection_map[] macros to the new __PAGE_EXECONLY attributes. A side effect is that pte_valid_user() no longer triggers for __PAGE_EXECONLY since PTE_USER isn't set. To work around this, the check is done on the PTE_NG bit via the pte_valid_ng() macro. VM_READ is also checked now for page faults. Signed-off-by: Catalin Marinas (cherry picked from commit bc07c2c6e9ed125d362af0214b6313dca180cb08) Signed-off-by: Mark Brown --- arch/arm64/include/asm/pgtable.h | 11 ++++++----- arch/arm64/mm/fault.c | 5 ++--- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index c5d5286ee8ad..0359c2335b9c 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -84,6 +84,7 @@ extern pgprot_t pgprot_default; #define __PAGE_COPY_EXEC __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN) #define __PAGE_READONLY __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN | PTE_UXN) #define __PAGE_READONLY_EXEC __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN) +#define __PAGE_EXECONLY __pgprot(_PAGE_DEFAULT | PTE_NG | PTE_PXN) #endif /* __ASSEMBLY__ */ @@ -91,7 +92,7 @@ extern pgprot_t pgprot_default; #define __P001 __PAGE_READONLY #define __P010 __PAGE_COPY #define __P011 __PAGE_COPY -#define __P100 __PAGE_READONLY_EXEC +#define __P100 __PAGE_EXECONLY #define __P101 __PAGE_READONLY_EXEC #define __P110 __PAGE_COPY_EXEC #define __P111 __PAGE_COPY_EXEC @@ -100,7 +101,7 @@ extern pgprot_t pgprot_default; #define __S001 __PAGE_READONLY #define __S010 __PAGE_SHARED #define __S011 __PAGE_SHARED -#define __S100 __PAGE_READONLY_EXEC +#define __S100 __PAGE_EXECONLY #define __S101 __PAGE_READONLY_EXEC #define __S110 __PAGE_SHARED_EXEC #define __S111 __PAGE_SHARED_EXEC @@ -137,8 +138,8 @@ extern struct page *empty_zero_page; #define pte_write(pte) (!!(pte_val(pte) & PTE_WRITE)) #define pte_exec(pte) (!(pte_val(pte) & PTE_UXN)) -#define pte_valid_user(pte) \ - ((pte_val(pte) & (PTE_VALID | PTE_USER)) == (PTE_VALID | PTE_USER)) +#define pte_valid_ng(pte) \ + ((pte_val(pte) & (PTE_VALID | PTE_NG)) == (PTE_VALID | PTE_NG)) static inline pte_t pte_wrprotect(pte_t pte) { @@ -192,7 +193,7 @@ extern void __sync_icache_dcache(pte_t pteval, unsigned long addr); static inline void set_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t pte) { - if (pte_valid_user(pte)) { + if (pte_valid_ng(pte)) { if (!pte_special(pte) && pte_exec(pte)) __sync_icache_dcache(pte, addr); if (pte_dirty(pte) && pte_write(pte)) diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c index df4f2fd187c3..c1b42273c8fa 100644 --- a/arch/arm64/mm/fault.c +++ b/arch/arm64/mm/fault.c @@ -173,8 +173,7 @@ static int __do_page_fault(struct mm_struct *mm, unsigned long addr, good_area: /* * Check that the permissions on the VMA allow for the fault which - * occurred. If we encountered a write or exec fault, we must have - * appropriate permissions, otherwise we allow any permission. + * occurred. */ if (!(vma->vm_flags & vm_flags)) { fault = VM_FAULT_BADACCESS; @@ -196,7 +195,7 @@ static int __kprobes do_page_fault(unsigned long addr, unsigned int esr, struct task_struct *tsk; struct mm_struct *mm; int fault, sig, code; - unsigned long vm_flags = VM_READ | VM_WRITE | VM_EXEC; + unsigned long vm_flags = VM_READ | VM_WRITE; unsigned int mm_flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE; if (esr & ESR_LNX_EXEC) { From 045d86c293258ef52458ca8fc7990444ef8f4d55 Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Thu, 8 May 2014 22:13:47 +0100 Subject: [PATCH 1039/1368] arm64: Make atomic64_t() return "long", not "long long" arm64 sets CONFIG_64BIT=y and hence uses the "long counter" atomic64_t definition from include/linux/types.h. Make atomic64_read() return "long", not "long long". Signed-off-by: Bjorn Helgaas Signed-off-by: Catalin Marinas (cherry picked from commit ba6bf8c85cb0d263ca9a98ef6a76ab651a97c60b) Signed-off-by: Mark Brown --- arch/arm64/include/asm/atomic.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/include/asm/atomic.h b/arch/arm64/include/asm/atomic.h index 52c2b6a44c15..736c5916d367 100644 --- a/arch/arm64/include/asm/atomic.h +++ b/arch/arm64/include/asm/atomic.h @@ -171,7 +171,7 @@ static inline int __atomic_add_unless(atomic_t *v, int a, int u) */ #define ATOMIC64_INIT(i) { (i) } -#define atomic64_read(v) (*(volatile long long *)&(v)->counter) +#define atomic64_read(v) (*(volatile long *)&(v)->counter) #define atomic64_set(v,i) (((v)->counter) = (i)) static inline void atomic64_add(u64 i, atomic64_t *v) From f706043a1bbb304424643db83d21630bbc08888b Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Wed, 30 Apr 2014 16:23:06 +0100 Subject: [PATCH 1040/1368] arm64: xchg: prevent warning if return value is unused MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Some users of xchg() don't bother using the return value, which results in a compiler warning like the following (from kgdb): In file included from linux/arch/arm64/include/asm/atomic.h:27:0, from include/linux/atomic.h:4, from include/linux/spinlock.h:402, from include/linux/seqlock.h:35, from include/linux/time.h:5, from include/uapi/linux/timex.h:56, from include/linux/timex.h:56, from include/linux/sched.h:19, from include/linux/pid_namespace.h:4, from kernel/debug/debug_core.c:30: kernel/debug/debug_core.c: In function ‘kgdb_cpu_enter’: linux/arch/arm64/include/asm/cmpxchg.h:75:3: warning: value computed is not used [-Wunused-value] ((__typeof__(*(ptr)))__xchg((unsigned long)(x),(ptr),sizeof(*(ptr)))) ^ linux/arch/arm64/include/asm/atomic.h:132:30: note: in expansion of macro ‘xchg’ #define atomic_xchg(v, new) (xchg(&((v)->counter), new)) kernel/debug/debug_core.c:504:4: note: in expansion of macro ‘atomic_xchg’ atomic_xchg(&kgdb_active, cpu); ^ This patch makes use of the same trick as we do for cmpxchg, by assigning the return value to a dummy variable in the xchg() macro itself. Signed-off-by: Will Deacon Signed-off-by: Catalin Marinas (cherry picked from commit e1dfda9ced9bea1413a736f0d578f8218a7788ec) Signed-off-by: Mark Brown --- arch/arm64/include/asm/cmpxchg.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/arch/arm64/include/asm/cmpxchg.h b/arch/arm64/include/asm/cmpxchg.h index 9f85665be748..0a234d0a41d0 100644 --- a/arch/arm64/include/asm/cmpxchg.h +++ b/arch/arm64/include/asm/cmpxchg.h @@ -71,7 +71,12 @@ static inline unsigned long __xchg(unsigned long x, volatile void *ptr, int size } #define xchg(ptr,x) \ - ((__typeof__(*(ptr)))__xchg((unsigned long)(x),(ptr),sizeof(*(ptr)))) +({ \ + __typeof__(*(ptr)) __ret; \ + __ret = (__typeof__(*(ptr))) \ + __xchg((unsigned long)(x), (ptr), sizeof(*(ptr))); \ + __ret; \ +}) static inline unsigned long __cmpxchg(volatile void *ptr, unsigned long old, unsigned long new, int size) From 2b3a92c809be30cc706deaf808f31f98d2d917db Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Fri, 2 May 2014 16:24:10 +0100 Subject: [PATCH 1041/1368] arm64: barriers: make use of barrier options with explicit barriers When calling our low-level barrier macros directly, we can often suffice with more relaxed behaviour than the default "all accesses, full system" option. This patch updates the users of dsb() to specify the option which they actually require. Acked-by: Catalin Marinas Signed-off-by: Will Deacon Signed-off-by: Catalin Marinas (cherry picked from commit 98f7685ee69f871ba991089cb9685f0da07517ea) Signed-off-by: Mark Brown Conflicts: arch/arm64/kvm/sys_regs.c --- arch/arm64/include/asm/barrier.h | 2 +- arch/arm64/include/asm/cacheflush.h | 4 ++-- arch/arm64/include/asm/pgtable.h | 4 ++-- arch/arm64/include/asm/tlbflush.h | 14 +++++++------- arch/arm64/kernel/process.c | 2 +- 5 files changed, 13 insertions(+), 13 deletions(-) diff --git a/arch/arm64/include/asm/barrier.h b/arch/arm64/include/asm/barrier.h index 2306cd195055..c98d0a88916a 100644 --- a/arch/arm64/include/asm/barrier.h +++ b/arch/arm64/include/asm/barrier.h @@ -28,7 +28,7 @@ #define dmb(opt) asm volatile("dmb sy" : : : "memory") #define dsb(opt) asm volatile("dsb sy" : : : "memory") -#define mb() dsb() +#define mb() dsb(sy) #define rmb() asm volatile("dsb ld" : : : "memory") #define wmb() asm volatile("dsb st" : : : "memory") diff --git a/arch/arm64/include/asm/cacheflush.h b/arch/arm64/include/asm/cacheflush.h index 889324981aa4..24bc1d2ebdb9 100644 --- a/arch/arm64/include/asm/cacheflush.h +++ b/arch/arm64/include/asm/cacheflush.h @@ -116,7 +116,7 @@ extern void flush_dcache_page(struct page *); static inline void __flush_icache_all(void) { asm("ic ialluis"); - dsb(); + dsb(ish); } #define flush_dcache_mmap_lock(mapping) \ @@ -143,7 +143,7 @@ static inline void flush_cache_vmap(unsigned long start, unsigned long end) * set_pte_at() called from vmap_pte_range() does not * have a DSB after cleaning the cache line. */ - dsb(); + dsb(ish); } static inline void flush_cache_vunmap(unsigned long start, unsigned long end) diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index 0359c2335b9c..8af30ab8d3c2 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -299,7 +299,7 @@ extern pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn, static inline void set_pmd(pmd_t *pmdp, pmd_t pmd) { *pmdp = pmd; - dsb(); + dsb(ishst); } static inline void pmd_clear(pmd_t *pmdp) @@ -329,7 +329,7 @@ static inline pte_t *pmd_page_vaddr(pmd_t pmd) static inline void set_pud(pud_t *pudp, pud_t pud) { *pudp = pud; - dsb(); + dsb(ishst); } static inline void pud_clear(pud_t *pudp) diff --git a/arch/arm64/include/asm/tlbflush.h b/arch/arm64/include/asm/tlbflush.h index 8b482035cfc2..3083a08f9622 100644 --- a/arch/arm64/include/asm/tlbflush.h +++ b/arch/arm64/include/asm/tlbflush.h @@ -72,9 +72,9 @@ extern struct cpu_tlb_fns cpu_tlb; */ static inline void flush_tlb_all(void) { - dsb(); + dsb(ishst); asm("tlbi vmalle1is"); - dsb(); + dsb(ish); isb(); } @@ -82,9 +82,9 @@ static inline void flush_tlb_mm(struct mm_struct *mm) { unsigned long asid = (unsigned long)ASID(mm) << 48; - dsb(); + dsb(ishst); asm("tlbi aside1is, %0" : : "r" (asid)); - dsb(); + dsb(ish); } static inline void flush_tlb_page(struct vm_area_struct *vma, @@ -93,9 +93,9 @@ static inline void flush_tlb_page(struct vm_area_struct *vma, unsigned long addr = uaddr >> 12 | ((unsigned long)ASID(vma->vm_mm) << 48); - dsb(); + dsb(ishst); asm("tlbi vae1is, %0" : : "r" (addr)); - dsb(); + dsb(ish); } /* @@ -114,7 +114,7 @@ static inline void update_mmu_cache(struct vm_area_struct *vma, * set_pte() does not have a DSB, so make sure that the page table * write is visible. */ - dsb(); + dsb(ishst); } #define update_mmu_cache_pmd(vma, address, pmd) do { } while (0) diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c index 75af46648f26..57bd961f2917 100644 --- a/arch/arm64/kernel/process.c +++ b/arch/arm64/kernel/process.c @@ -306,7 +306,7 @@ struct task_struct *__switch_to(struct task_struct *prev, * Complete any pending TLB or cache maintenance on this CPU in case * the thread migrates to a different CPU. */ - dsb(); + dsb(ish); /* the actual thread switch */ last = cpu_switch_to(prev, next); From a99a4e55f652f72dc736935a795936b65c47a17f Mon Sep 17 00:00:00 2001 From: Loc Ho Date: Wed, 14 May 2014 10:02:37 +1000 Subject: [PATCH 1042/1368] arm64: add APM X-Gene SoC RTC DTS entry This patch adds APM X-Gene SoC RTC DTS entry Signed-off-by: Rameshwar Prasad Sahu Signed-off-by: Loc Ho Cc: Jon Masters Cc: Alessandro Zummo Cc: Catalin Marinas Cc: Will Deacon Signed-off-by: Andrew Morton (cherry picked from commit 7fe2f8776216e25ad7fdb22f3966177777c5022c) Signed-off-by: Mark Brown --- arch/arm64/boot/dts/apm-storm.dtsi | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/arch/arm64/boot/dts/apm-storm.dtsi b/arch/arm64/boot/dts/apm-storm.dtsi index 93f4b2dd9248..4917f3b81a44 100644 --- a/arch/arm64/boot/dts/apm-storm.dtsi +++ b/arch/arm64/boot/dts/apm-storm.dtsi @@ -257,6 +257,19 @@ sata45clk: sata45clk@1f23c000 { enable-offset = <0x0>; enable-mask = <0x39>; }; + + rtcclk: rtcclk@17000000 { + compatible = "apm,xgene-device-clock"; + #clock-cells = <1>; + clocks = <&socplldiv2 0>; + reg = <0x0 0x17000000 0x0 0x2000>; + reg-names = "csr-reg"; + csr-offset = <0xc>; + csr-mask = <0x2>; + enable-offset = <0x10>; + enable-mask = <0x2>; + clock-output-names = "rtcclk"; + }; }; serial0: serial@1c020000 { @@ -339,5 +352,13 @@ sata3: sata@1a800000 { phys = <&phy3 0>; phy-names = "sata-phy"; }; + + rtc: rtc@10510000 { + compatible = "apm,xgene-rtc"; + reg = <0x0 0x10510000 0x0 0x400>; + interrupts = <0x0 0x46 0x4>; + #clock-cells = <1>; + clocks = <&rtcclk 0>; + }; }; }; From b029d457835437482947aaedd542617909e46694 Mon Sep 17 00:00:00 2001 From: Mark Salter Date: Thu, 23 Jan 2014 15:53:48 -0800 Subject: [PATCH 1043/1368] add generic fixmap.h Many architectures provide an asm/fixmap.h which defines support for compile-time 'special' virtual mappings which need to be made before paging_init() has run. This support is also used for early ioremap on x86. Much of this support is identical across the architectures. This patch consolidates all of the common bits into asm-generic/fixmap.h which is intended to be included from arch/*/include/asm/fixmap.h. Signed-off-by: Mark Salter Acked-by: Arnd Bergmann Acked-by: Ralf Baechle Cc: Russell King Cc: Richard Kuo Cc: James Hogan Cc: Michal Simek Cc: Benjamin Herrenschmidt Cc: Paul Mackerras Cc: "H. Peter Anvin" Cc: Chris Metcalf Cc: Ingo Molnar Cc: Jeff Dike Cc: Paul Mundt Cc: Richard Weinberger Cc: Thomas Gleixner Cc: Jonas Bonn Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds (cherry picked from commit d57c33c5daa4efa9e4d303bd0faf868080b532be) Signed-off-by: Mark Brown --- include/asm-generic/fixmap.h | 97 ++++++++++++++++++++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 include/asm-generic/fixmap.h diff --git a/include/asm-generic/fixmap.h b/include/asm-generic/fixmap.h new file mode 100644 index 000000000000..5a64ca4621f3 --- /dev/null +++ b/include/asm-generic/fixmap.h @@ -0,0 +1,97 @@ +/* + * fixmap.h: compile-time virtual memory allocation + * + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 1998 Ingo Molnar + * + * Support of BIGMEM added by Gerhard Wichert, Siemens AG, July 1999 + * x86_32 and x86_64 integration by Gustavo F. Padovan, February 2009 + * Break out common bits to asm-generic by Mark Salter, November 2013 + */ + +#ifndef __ASM_GENERIC_FIXMAP_H +#define __ASM_GENERIC_FIXMAP_H + +#include + +#define __fix_to_virt(x) (FIXADDR_TOP - ((x) << PAGE_SHIFT)) +#define __virt_to_fix(x) ((FIXADDR_TOP - ((x)&PAGE_MASK)) >> PAGE_SHIFT) + +#ifndef __ASSEMBLY__ +/* + * 'index to address' translation. If anyone tries to use the idx + * directly without translation, we catch the bug with a NULL-deference + * kernel oops. Illegal ranges of incoming indices are caught too. + */ +static __always_inline unsigned long fix_to_virt(const unsigned int idx) +{ + BUILD_BUG_ON(idx >= __end_of_fixed_addresses); + return __fix_to_virt(idx); +} + +static inline unsigned long virt_to_fix(const unsigned long vaddr) +{ + BUG_ON(vaddr >= FIXADDR_TOP || vaddr < FIXADDR_START); + return __virt_to_fix(vaddr); +} + +/* + * Provide some reasonable defaults for page flags. + * Not all architectures use all of these different types and some + * architectures use different names. + */ +#ifndef FIXMAP_PAGE_NORMAL +#define FIXMAP_PAGE_NORMAL PAGE_KERNEL +#endif +#ifndef FIXMAP_PAGE_NOCACHE +#define FIXMAP_PAGE_NOCACHE PAGE_KERNEL_NOCACHE +#endif +#ifndef FIXMAP_PAGE_IO +#define FIXMAP_PAGE_IO PAGE_KERNEL_IO +#endif +#ifndef FIXMAP_PAGE_CLEAR +#define FIXMAP_PAGE_CLEAR __pgprot(0) +#endif + +#ifndef set_fixmap +#define set_fixmap(idx, phys) \ + __set_fixmap(idx, phys, FIXMAP_PAGE_NORMAL) +#endif + +#ifndef clear_fixmap +#define clear_fixmap(idx) \ + __set_fixmap(idx, 0, FIXMAP_PAGE_CLEAR) +#endif + +/* Return a pointer with offset calculated */ +#define __set_fixmap_offset(idx, phys, flags) \ +({ \ + unsigned long addr; \ + __set_fixmap(idx, phys, flags); \ + addr = fix_to_virt(idx) + ((phys) & (PAGE_SIZE - 1)); \ + addr; \ +}) + +#define set_fixmap_offset(idx, phys) \ + __set_fixmap_offset(idx, phys, FIXMAP_PAGE_NORMAL) + +/* + * Some hardware wants to get fixmapped without caching. + */ +#define set_fixmap_nocache(idx, phys) \ + __set_fixmap(idx, phys, FIXMAP_PAGE_NOCACHE) + +#define set_fixmap_offset_nocache(idx, phys) \ + __set_fixmap_offset(idx, phys, FIXMAP_PAGE_NOCACHE) + +/* + * Some fixmaps are for IO + */ +#define set_fixmap_io(idx, phys) \ + __set_fixmap(idx, phys, FIXMAP_PAGE_IO) + +#endif /* __ASSEMBLY__ */ +#endif /* __ASM_GENERIC_FIXMAP_H */ From fbc223483b269e451312552f5daf5ecba6c87df8 Mon Sep 17 00:00:00 2001 From: Mark Salter Date: Mon, 7 Apr 2014 15:39:48 -0700 Subject: [PATCH 1044/1368] mm: create generic early_ioremap() support This patch creates a generic implementation of early_ioremap() support based on the existing x86 implementation. early_ioremp() is useful for early boot code which needs to temporarily map I/O or memory regions before normal mapping functions such as ioremap() are available. Some architectures have optional MMU. In the no-MMU case, the remap functions simply return the passed in physical address and the unmap functions do nothing. Signed-off-by: Mark Salter Acked-by: Catalin Marinas Acked-by: H. Peter Anvin Cc: Borislav Petkov Cc: Dave Young Cc: Will Deacon Cc: Ingo Molnar Cc: Thomas Gleixner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds (cherry picked from commit 9e5c33d7aeeef62e5fa7e74f94432685bd03026b) Signed-off-by: Mark Brown Conflicts: mm/Kconfig mm/Makefile --- include/asm-generic/early_ioremap.h | 42 +++++ mm/Kconfig | 3 + mm/Makefile | 1 + mm/early_ioremap.c | 245 ++++++++++++++++++++++++++++ 4 files changed, 291 insertions(+) create mode 100644 include/asm-generic/early_ioremap.h create mode 100644 mm/early_ioremap.c diff --git a/include/asm-generic/early_ioremap.h b/include/asm-generic/early_ioremap.h new file mode 100644 index 000000000000..a5de55c04fb2 --- /dev/null +++ b/include/asm-generic/early_ioremap.h @@ -0,0 +1,42 @@ +#ifndef _ASM_EARLY_IOREMAP_H_ +#define _ASM_EARLY_IOREMAP_H_ + +#include + +/* + * early_ioremap() and early_iounmap() are for temporary early boot-time + * mappings, before the real ioremap() is functional. + */ +extern void __iomem *early_ioremap(resource_size_t phys_addr, + unsigned long size); +extern void *early_memremap(resource_size_t phys_addr, + unsigned long size); +extern void early_iounmap(void __iomem *addr, unsigned long size); +extern void early_memunmap(void *addr, unsigned long size); + +/* + * Weak function called by early_ioremap_reset(). It does nothing, but + * architectures may provide their own version to do any needed cleanups. + */ +extern void early_ioremap_shutdown(void); + +#if defined(CONFIG_GENERIC_EARLY_IOREMAP) && defined(CONFIG_MMU) +/* Arch-specific initialization */ +extern void early_ioremap_init(void); + +/* Generic initialization called by architecture code */ +extern void early_ioremap_setup(void); + +/* + * Called as last step in paging_init() so library can act + * accordingly for subsequent map/unmap requests. + */ +extern void early_ioremap_reset(void); + +#else +static inline void early_ioremap_init(void) { } +static inline void early_ioremap_setup(void) { } +static inline void early_ioremap_reset(void) { } +#endif + +#endif /* _ASM_EARLY_IOREMAP_H_ */ diff --git a/mm/Kconfig b/mm/Kconfig index e742d06285b7..19b2bd2f6a61 100644 --- a/mm/Kconfig +++ b/mm/Kconfig @@ -477,3 +477,6 @@ config FRONTSWAP and swap data is stored as normal on the matching swap device. If unsure, say Y to enable frontswap. + +config GENERIC_EARLY_IOREMAP + bool diff --git a/mm/Makefile b/mm/Makefile index 72c5acb9345f..89244cb96221 100644 --- a/mm/Makefile +++ b/mm/Makefile @@ -58,3 +58,4 @@ obj-$(CONFIG_DEBUG_KMEMLEAK) += kmemleak.o obj-$(CONFIG_DEBUG_KMEMLEAK_TEST) += kmemleak-test.o obj-$(CONFIG_CLEANCACHE) += cleancache.o obj-$(CONFIG_MEMORY_ISOLATION) += page_isolation.o +obj-$(CONFIG_GENERIC_EARLY_IOREMAP) += early_ioremap.o diff --git a/mm/early_ioremap.c b/mm/early_ioremap.c new file mode 100644 index 000000000000..e10ccd299d66 --- /dev/null +++ b/mm/early_ioremap.c @@ -0,0 +1,245 @@ +/* + * Provide common bits of early_ioremap() support for architectures needing + * temporary mappings during boot before ioremap() is available. + * + * This is mostly a direct copy of the x86 early_ioremap implementation. + * + * (C) Copyright 1995 1996, 2014 Linus Torvalds + * + */ +#include +#include +#include +#include +#include +#include +#include +#include + +#ifdef CONFIG_MMU +static int early_ioremap_debug __initdata; + +static int __init early_ioremap_debug_setup(char *str) +{ + early_ioremap_debug = 1; + + return 0; +} +early_param("early_ioremap_debug", early_ioremap_debug_setup); + +static int after_paging_init __initdata; + +void __init __weak early_ioremap_shutdown(void) +{ +} + +void __init early_ioremap_reset(void) +{ + early_ioremap_shutdown(); + after_paging_init = 1; +} + +/* + * Generally, ioremap() is available after paging_init() has been called. + * Architectures wanting to allow early_ioremap after paging_init() can + * define __late_set_fixmap and __late_clear_fixmap to do the right thing. + */ +#ifndef __late_set_fixmap +static inline void __init __late_set_fixmap(enum fixed_addresses idx, + phys_addr_t phys, pgprot_t prot) +{ + BUG(); +} +#endif + +#ifndef __late_clear_fixmap +static inline void __init __late_clear_fixmap(enum fixed_addresses idx) +{ + BUG(); +} +#endif + +static void __iomem *prev_map[FIX_BTMAPS_SLOTS] __initdata; +static unsigned long prev_size[FIX_BTMAPS_SLOTS] __initdata; +static unsigned long slot_virt[FIX_BTMAPS_SLOTS] __initdata; + +void __init early_ioremap_setup(void) +{ + int i; + + for (i = 0; i < FIX_BTMAPS_SLOTS; i++) + if (WARN_ON(prev_map[i])) + break; + + for (i = 0; i < FIX_BTMAPS_SLOTS; i++) + slot_virt[i] = __fix_to_virt(FIX_BTMAP_BEGIN - NR_FIX_BTMAPS*i); +} + +static int __init check_early_ioremap_leak(void) +{ + int count = 0; + int i; + + for (i = 0; i < FIX_BTMAPS_SLOTS; i++) + if (prev_map[i]) + count++; + + if (WARN(count, KERN_WARNING + "Debug warning: early ioremap leak of %d areas detected.\n" + "please boot with early_ioremap_debug and report the dmesg.\n", + count)) + return 1; + return 0; +} +late_initcall(check_early_ioremap_leak); + +static void __init __iomem * +__early_ioremap(resource_size_t phys_addr, unsigned long size, pgprot_t prot) +{ + unsigned long offset; + resource_size_t last_addr; + unsigned int nrpages; + enum fixed_addresses idx; + int i, slot; + + WARN_ON(system_state != SYSTEM_BOOTING); + + slot = -1; + for (i = 0; i < FIX_BTMAPS_SLOTS; i++) { + if (!prev_map[i]) { + slot = i; + break; + } + } + + if (WARN(slot < 0, "%s(%08llx, %08lx) not found slot\n", + __func__, (u64)phys_addr, size)) + return NULL; + + /* Don't allow wraparound or zero size */ + last_addr = phys_addr + size - 1; + if (WARN_ON(!size || last_addr < phys_addr)) + return NULL; + + prev_size[slot] = size; + /* + * Mappings have to be page-aligned + */ + offset = phys_addr & ~PAGE_MASK; + phys_addr &= PAGE_MASK; + size = PAGE_ALIGN(last_addr + 1) - phys_addr; + + /* + * Mappings have to fit in the FIX_BTMAP area. + */ + nrpages = size >> PAGE_SHIFT; + if (WARN_ON(nrpages > NR_FIX_BTMAPS)) + return NULL; + + /* + * Ok, go for it.. + */ + idx = FIX_BTMAP_BEGIN - NR_FIX_BTMAPS*slot; + while (nrpages > 0) { + if (after_paging_init) + __late_set_fixmap(idx, phys_addr, prot); + else + __early_set_fixmap(idx, phys_addr, prot); + phys_addr += PAGE_SIZE; + --idx; + --nrpages; + } + WARN(early_ioremap_debug, "%s(%08llx, %08lx) [%d] => %08lx + %08lx\n", + __func__, (u64)phys_addr, size, slot, offset, slot_virt[slot]); + + prev_map[slot] = (void __iomem *)(offset + slot_virt[slot]); + return prev_map[slot]; +} + +void __init early_iounmap(void __iomem *addr, unsigned long size) +{ + unsigned long virt_addr; + unsigned long offset; + unsigned int nrpages; + enum fixed_addresses idx; + int i, slot; + + slot = -1; + for (i = 0; i < FIX_BTMAPS_SLOTS; i++) { + if (prev_map[i] == addr) { + slot = i; + break; + } + } + + if (WARN(slot < 0, "early_iounmap(%p, %08lx) not found slot\n", + addr, size)) + return; + + if (WARN(prev_size[slot] != size, + "early_iounmap(%p, %08lx) [%d] size not consistent %08lx\n", + addr, size, slot, prev_size[slot])) + return; + + WARN(early_ioremap_debug, "early_iounmap(%p, %08lx) [%d]\n", + addr, size, slot); + + virt_addr = (unsigned long)addr; + if (WARN_ON(virt_addr < fix_to_virt(FIX_BTMAP_BEGIN))) + return; + + offset = virt_addr & ~PAGE_MASK; + nrpages = PAGE_ALIGN(offset + size) >> PAGE_SHIFT; + + idx = FIX_BTMAP_BEGIN - NR_FIX_BTMAPS*slot; + while (nrpages > 0) { + if (after_paging_init) + __late_clear_fixmap(idx); + else + __early_set_fixmap(idx, 0, FIXMAP_PAGE_CLEAR); + --idx; + --nrpages; + } + prev_map[slot] = NULL; +} + +/* Remap an IO device */ +void __init __iomem * +early_ioremap(resource_size_t phys_addr, unsigned long size) +{ + return __early_ioremap(phys_addr, size, FIXMAP_PAGE_IO); +} + +/* Remap memory */ +void __init * +early_memremap(resource_size_t phys_addr, unsigned long size) +{ + return (__force void *)__early_ioremap(phys_addr, size, + FIXMAP_PAGE_NORMAL); +} +#else /* CONFIG_MMU */ + +void __init __iomem * +early_ioremap(resource_size_t phys_addr, unsigned long size) +{ + return (__force void __iomem *)phys_addr; +} + +/* Remap memory */ +void __init * +early_memremap(resource_size_t phys_addr, unsigned long size) +{ + return (void *)phys_addr; +} + +void __init early_iounmap(void __iomem *addr, unsigned long size) +{ +} + +#endif /* CONFIG_MMU */ + + +void __init early_memunmap(void *addr, unsigned long size) +{ + early_iounmap((__force void __iomem *)addr, size); +} From 2036aef6cd62206f0092b4b8af8206201fc08c10 Mon Sep 17 00:00:00 2001 From: Mark Salter Date: Mon, 7 Apr 2014 15:39:52 -0700 Subject: [PATCH 1045/1368] arm64: add early_ioremap support Add support for early IO or memory mappings which are needed before the normal ioremap() is usable. This also adds fixmap support for permanent fixed mappings such as that used by the earlyprintk device register region. Signed-off-by: Mark Salter Acked-by: Catalin Marinas Cc: Borislav Petkov Cc: Dave Young Cc: H. Peter Anvin Cc: Will Deacon Cc: Ingo Molnar Cc: Thomas Gleixner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds (cherry picked from commit bf4b558eba920a38f91beb5ee62a8ce2628c92f7) Signed-off-by: Mark Brown Conflicts: arch/arm64/Kconfig arch/arm64/mm/ioremap.c --- Documentation/arm64/memory.txt | 4 +- arch/arm64/Kconfig | 1 + arch/arm64/include/asm/Kbuild | 1 + arch/arm64/include/asm/fixmap.h | 67 ++++++++++++++++++++++ arch/arm64/include/asm/io.h | 1 + arch/arm64/include/asm/memory.h | 2 +- arch/arm64/kernel/early_printk.c | 8 ++- arch/arm64/kernel/head.S | 9 +-- arch/arm64/kernel/setup.c | 2 + arch/arm64/mm/ioremap.c | 96 ++++++++++++++++++++++++++++++++ arch/arm64/mm/mmu.c | 41 -------------- 11 files changed, 180 insertions(+), 52 deletions(-) create mode 100644 arch/arm64/include/asm/fixmap.h diff --git a/Documentation/arm64/memory.txt b/Documentation/arm64/memory.txt index 7835a54b55b0..c6941f815f15 100644 --- a/Documentation/arm64/memory.txt +++ b/Documentation/arm64/memory.txt @@ -39,7 +39,7 @@ ffffffbffa000000 ffffffbffaffffff 16MB PCI I/O space ffffffbffb000000 ffffffbffbbfffff 12MB [guard] -ffffffbffbc00000 ffffffbffbdfffff 2MB earlyprintk device +ffffffbffbc00000 ffffffbffbdfffff 2MB fixed mappings ffffffbffbe00000 ffffffbffbffffff 2MB [guard] @@ -66,7 +66,7 @@ fffffdfffa000000 fffffdfffaffffff 16MB PCI I/O space fffffdfffb000000 fffffdfffbbfffff 12MB [guard] -fffffdfffbc00000 fffffdfffbdfffff 2MB earlyprintk device +fffffdfffbc00000 fffffdfffbdfffff 2MB fixed mappings fffffdfffbe00000 fffffdfffbffffff 2MB [guard] diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 8540eef3db8b..1eded479dfd7 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -13,6 +13,7 @@ config ARM64 select COMMON_CLK select DCACHE_WORD_ACCESS select GENERIC_CLOCKEVENTS + select GENERIC_EARLY_IOREMAP select GENERIC_IOMAP select GENERIC_IRQ_PROBE select GENERIC_IRQ_SHOW diff --git a/arch/arm64/include/asm/Kbuild b/arch/arm64/include/asm/Kbuild index 53c4a2946444..bc5da00f8d84 100644 --- a/arch/arm64/include/asm/Kbuild +++ b/arch/arm64/include/asm/Kbuild @@ -10,6 +10,7 @@ generic-y += delay.h generic-y += div64.h generic-y += dma.h generic-y += emergency-restart.h +generic-y += early_ioremap.h generic-y += errno.h generic-y += ftrace.h generic-y += hw_irq.h diff --git a/arch/arm64/include/asm/fixmap.h b/arch/arm64/include/asm/fixmap.h new file mode 100644 index 000000000000..5f7bfe6df723 --- /dev/null +++ b/arch/arm64/include/asm/fixmap.h @@ -0,0 +1,67 @@ +/* + * fixmap.h: compile-time virtual memory allocation + * + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 1998 Ingo Molnar + * Copyright (C) 2013 Mark Salter + * + * Adapted from arch/x86_64 version. + * + */ + +#ifndef _ASM_ARM64_FIXMAP_H +#define _ASM_ARM64_FIXMAP_H + +#ifndef __ASSEMBLY__ +#include +#include + +/* + * Here we define all the compile-time 'special' virtual + * addresses. The point is to have a constant address at + * compile time, but to set the physical address only + * in the boot process. + * + * These 'compile-time allocated' memory buffers are + * page-sized. Use set_fixmap(idx,phys) to associate + * physical memory with fixmap indices. + * + */ +enum fixed_addresses { + FIX_EARLYCON_MEM_BASE, + __end_of_permanent_fixed_addresses, + + /* + * Temporary boot-time mappings, used by early_ioremap(), + * before ioremap() is functional. + */ +#ifdef CONFIG_ARM64_64K_PAGES +#define NR_FIX_BTMAPS 4 +#else +#define NR_FIX_BTMAPS 64 +#endif +#define FIX_BTMAPS_SLOTS 7 +#define TOTAL_FIX_BTMAPS (NR_FIX_BTMAPS * FIX_BTMAPS_SLOTS) + + FIX_BTMAP_END = __end_of_permanent_fixed_addresses, + FIX_BTMAP_BEGIN = FIX_BTMAP_END + TOTAL_FIX_BTMAPS - 1, + __end_of_fixed_addresses +}; + +#define FIXADDR_SIZE (__end_of_permanent_fixed_addresses << PAGE_SHIFT) +#define FIXADDR_START (FIXADDR_TOP - FIXADDR_SIZE) + +#define FIXMAP_PAGE_IO __pgprot(PROT_DEVICE_nGnRE) + +extern void __early_set_fixmap(enum fixed_addresses idx, + phys_addr_t phys, pgprot_t flags); + +#define __set_fixmap __early_set_fixmap + +#include + +#endif /* !__ASSEMBLY__ */ +#endif /* _ASM_ARM64_FIXMAP_H */ diff --git a/arch/arm64/include/asm/io.h b/arch/arm64/include/asm/io.h index ad80e05825df..b1ee8f0958ee 100644 --- a/arch/arm64/include/asm/io.h +++ b/arch/arm64/include/asm/io.h @@ -26,6 +26,7 @@ #include #include #include +#include /* * Generic IO read/write. These perform native-endian accesses. diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h index 9abb57d743b9..83138d2fb90d 100644 --- a/arch/arm64/include/asm/memory.h +++ b/arch/arm64/include/asm/memory.h @@ -49,7 +49,7 @@ #define PAGE_OFFSET (UL(0xffffffffffffffff) << (VA_BITS - 1)) #define MODULES_END (PAGE_OFFSET) #define MODULES_VADDR (MODULES_END - SZ_64M) -#define EARLYCON_IOBASE (MODULES_VADDR - SZ_4M) +#define FIXADDR_TOP (MODULES_VADDR - SZ_2M - PAGE_SIZE) #define TASK_SIZE_64 (UL(1) << VA_BITS) #ifdef CONFIG_COMPAT diff --git a/arch/arm64/kernel/early_printk.c b/arch/arm64/kernel/early_printk.c index fbb6e1843659..ffbbdde7aba1 100644 --- a/arch/arm64/kernel/early_printk.c +++ b/arch/arm64/kernel/early_printk.c @@ -26,6 +26,8 @@ #include #include +#include + static void __iomem *early_base; static void (*printch)(char ch); @@ -141,8 +143,10 @@ static int __init setup_early_printk(char *buf) } /* no options parsing yet */ - if (paddr) - early_base = early_io_map(paddr, EARLYCON_IOBASE); + if (paddr) { + set_fixmap_io(FIX_EARLYCON_MEM_BASE, paddr); + early_base = (void __iomem *)fix_to_virt(FIX_EARLYCON_MEM_BASE); + } printch = match->printch; early_console = &early_console_dev; diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S index 6fc9fc2b7220..1445f7869cf3 100644 --- a/arch/arm64/kernel/head.S +++ b/arch/arm64/kernel/head.S @@ -376,7 +376,7 @@ ENDPROC(__calc_phys_offset) * - identity mapping to enable the MMU (low address, TTBR0) * - first few MB of the kernel linear mapping to jump to once the MMU has * been enabled, including the FDT blob (TTBR1) - * - UART mapping if CONFIG_EARLY_PRINTK is enabled (TTBR1) + * - pgd entry for fixed mappings (TTBR1) */ __create_page_tables: pgtbl x25, x26, x24 // idmap_pg_dir and swapper_pg_dir addresses @@ -433,15 +433,12 @@ __create_page_tables: sub x6, x6, #1 // inclusive range create_block_map x0, x7, x3, x5, x6 1: -#ifdef CONFIG_EARLY_PRINTK /* - * Create the pgd entry for the UART mapping. The full mapping is done - * later based earlyprintk kernel parameter. + * Create the pgd entry for the fixed mappings. */ - ldr x5, =EARLYCON_IOBASE // UART virtual address + ldr x5, =FIXADDR_TOP // Fixed mapping virtual address add x0, x26, #2 * PAGE_SIZE // section table address create_pgd_entry x26, x0, x5, x6, x7 -#endif ret ENDPROC(__create_page_tables) .ltorg diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c index 6c87fe0f6857..5f2e5f2a1763 100644 --- a/arch/arm64/kernel/setup.c +++ b/arch/arm64/kernel/setup.c @@ -42,6 +42,7 @@ #include #include +#include #include #include #include @@ -269,6 +270,7 @@ void __init setup_arch(char **cmdline_p) *cmdline_p = boot_command_line; init_mem_pgprot(); + early_ioremap_init(); parse_early_param(); diff --git a/arch/arm64/mm/ioremap.c b/arch/arm64/mm/ioremap.c index 1725cd6db37a..00d315ae1de9 100644 --- a/arch/arm64/mm/ioremap.c +++ b/arch/arm64/mm/ioremap.c @@ -25,6 +25,10 @@ #include #include +#include +#include +#include + static void __iomem *__ioremap_caller(phys_addr_t phys_addr, size_t size, pgprot_t prot, void *caller) { @@ -82,3 +86,95 @@ void __iounmap(volatile void __iomem *io_addr) vunmap(addr); } EXPORT_SYMBOL(__iounmap); + +void __iomem *ioremap_cache(phys_addr_t phys_addr, size_t size) +{ + /* For normal memory we already have a cacheable mapping. */ + if (pfn_valid(__phys_to_pfn(phys_addr))) + return (void __iomem *)__phys_to_virt(phys_addr); + + return __ioremap_caller(phys_addr, size, __pgprot(PROT_NORMAL), + __builtin_return_address(0)); +} +EXPORT_SYMBOL(ioremap_cache); + +#ifndef CONFIG_ARM64_64K_PAGES +static pte_t bm_pte[PTRS_PER_PTE] __page_aligned_bss; +#endif + +static inline pmd_t * __init early_ioremap_pmd(unsigned long addr) +{ + pgd_t *pgd; + pud_t *pud; + + pgd = pgd_offset_k(addr); + BUG_ON(pgd_none(*pgd) || pgd_bad(*pgd)); + + pud = pud_offset(pgd, addr); + BUG_ON(pud_none(*pud) || pud_bad(*pud)); + + return pmd_offset(pud, addr); +} + +static inline pte_t * __init early_ioremap_pte(unsigned long addr) +{ + pmd_t *pmd = early_ioremap_pmd(addr); + + BUG_ON(pmd_none(*pmd) || pmd_bad(*pmd)); + + return pte_offset_kernel(pmd, addr); +} + +void __init early_ioremap_init(void) +{ + pmd_t *pmd; + + pmd = early_ioremap_pmd(fix_to_virt(FIX_BTMAP_BEGIN)); +#ifndef CONFIG_ARM64_64K_PAGES + /* need to populate pmd for 4k pagesize only */ + pmd_populate_kernel(&init_mm, pmd, bm_pte); +#endif + /* + * The boot-ioremap range spans multiple pmds, for which + * we are not prepared: + */ + BUILD_BUG_ON((__fix_to_virt(FIX_BTMAP_BEGIN) >> PMD_SHIFT) + != (__fix_to_virt(FIX_BTMAP_END) >> PMD_SHIFT)); + + if (pmd != early_ioremap_pmd(fix_to_virt(FIX_BTMAP_END))) { + WARN_ON(1); + pr_warn("pmd %p != %p\n", + pmd, early_ioremap_pmd(fix_to_virt(FIX_BTMAP_END))); + pr_warn("fix_to_virt(FIX_BTMAP_BEGIN): %08lx\n", + fix_to_virt(FIX_BTMAP_BEGIN)); + pr_warn("fix_to_virt(FIX_BTMAP_END): %08lx\n", + fix_to_virt(FIX_BTMAP_END)); + + pr_warn("FIX_BTMAP_END: %d\n", FIX_BTMAP_END); + pr_warn("FIX_BTMAP_BEGIN: %d\n", + FIX_BTMAP_BEGIN); + } + + early_ioremap_setup(); +} + +void __init __early_set_fixmap(enum fixed_addresses idx, + phys_addr_t phys, pgprot_t flags) +{ + unsigned long addr = __fix_to_virt(idx); + pte_t *pte; + + if (idx >= __end_of_fixed_addresses) { + BUG(); + return; + } + + pte = early_ioremap_pte(addr); + + if (pgprot_val(flags)) + set_pte(pte, pfn_pte(phys >> PAGE_SHIFT, flags)); + else { + pte_clear(&init_mm, addr, pte); + flush_tlb_kernel_range(addr, addr+PAGE_SIZE); + } +} diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index ba259a0e385e..6b7e89569a3a 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -260,47 +260,6 @@ static void __init create_mapping(phys_addr_t phys, unsigned long virt, } while (pgd++, addr = next, addr != end); } -#ifdef CONFIG_EARLY_PRINTK -/* - * Create an early I/O mapping using the pgd/pmd entries already populated - * in head.S as this function is called too early to allocated any memory. The - * mapping size is 2MB with 4KB pages or 64KB or 64KB pages. - */ -void __iomem * __init early_io_map(phys_addr_t phys, unsigned long virt) -{ - unsigned long size, mask; - bool page64k = IS_ENABLED(CONFIG_ARM64_64K_PAGES); - pgd_t *pgd; - pud_t *pud; - pmd_t *pmd; - pte_t *pte; - - /* - * No early pte entries with !ARM64_64K_PAGES configuration, so using - * sections (pmd). - */ - size = page64k ? PAGE_SIZE : SECTION_SIZE; - mask = ~(size - 1); - - pgd = pgd_offset_k(virt); - pud = pud_offset(pgd, virt); - if (pud_none(*pud)) - return NULL; - pmd = pmd_offset(pud, virt); - - if (page64k) { - if (pmd_none(*pmd)) - return NULL; - pte = pte_offset_kernel(pmd, virt); - set_pte(pte, __pte((phys & mask) | PROT_DEVICE_nGnRE)); - } else { - set_pmd(pmd, __pmd((phys & mask) | PROT_SECT_DEVICE_nGnRE)); - } - - return (void __iomem *)((virt & mask) + (phys & ~mask)); -} -#endif - static void __init map_mem(void) { struct memblock_region *reg; From 8b343c860c2eb637c24ca77dec244d1f1a7560a6 Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Thu, 3 Apr 2014 15:57:15 +0100 Subject: [PATCH 1046/1368] arm64: Clean up the default pgprot setting The primary aim of this patchset is to remove the pgprot_default and prot_sect_default global variables and rely strictly on predefined values. The original goal was to be able to run SMP kernels on UP hardware by not setting the Shareability bit. However, it is unlikely to see UP ARMv8 hardware and even if we do, the Shareability bit is no longer assumed to disable cacheable accesses. A side effect is that the device mappings now have the Shareability attribute set. The hardware, however, should ignore it since Device accesses are always Outer Shareable. Following the removal of the two global variables, there is some PROT_* macro reshuffling and cleanup, including the __PAGE_* macros (replaced by PAGE_*). Signed-off-by: Catalin Marinas Acked-by: Will Deacon (cherry picked from commit a501e32430d4232012ab708b8f0ce841f29e0f02) Signed-off-by: Mark Brown Conflicts: arch/arm64/include/asm/io.h arch/arm64/include/asm/pgtable.h --- arch/arm64/include/asm/io.h | 7 --- arch/arm64/include/asm/pgtable.h | 94 ++++++++++++++++---------------- arch/arm64/kernel/setup.c | 1 - arch/arm64/mm/dma-mapping.c | 2 +- arch/arm64/mm/mmu.c | 36 +----------- 5 files changed, 51 insertions(+), 89 deletions(-) diff --git a/arch/arm64/include/asm/io.h b/arch/arm64/include/asm/io.h index b1ee8f0958ee..e1018b75c954 100644 --- a/arch/arm64/include/asm/io.h +++ b/arch/arm64/include/asm/io.h @@ -226,18 +226,11 @@ extern void __memset_io(volatile void __iomem *, int, size_t); extern void __iomem *__ioremap(phys_addr_t phys_addr, size_t size, pgprot_t prot); extern void __iounmap(volatile void __iomem *addr); -#define PROT_DEFAULT (PTE_TYPE_PAGE | PTE_AF | PTE_DIRTY) -#define PROT_DEVICE_nGnRE (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_ATTRINDX(MT_DEVICE_nGnRE)) -#define PROT_NORMAL_NC (PROT_DEFAULT | PTE_ATTRINDX(MT_NORMAL_NC)) - #define ioremap(addr, size) __ioremap((addr), (size), __pgprot(PROT_DEVICE_nGnRE)) #define ioremap_nocache(addr, size) __ioremap((addr), (size), __pgprot(PROT_DEVICE_nGnRE)) #define ioremap_wc(addr, size) __ioremap((addr), (size), __pgprot(PROT_NORMAL_NC)) #define iounmap __iounmap -#define PROT_SECT_DEFAULT (PMD_TYPE_SECT | PMD_SECT_AF) -#define PROT_SECT_DEVICE_nGnRE (PROT_SECT_DEFAULT | PTE_PXN | PTE_UXN | PMD_ATTRINDX(MT_DEVICE_nGnRE)) - #define ARCH_HAS_IOREMAP_WC #include diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index 8af30ab8d3c2..ff13c70ea585 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -52,61 +52,60 @@ extern void __pgd_error(const char *file, int line, unsigned long val); #endif #define pgd_ERROR(pgd) __pgd_error(__FILE__, __LINE__, pgd_val(pgd)) -/* - * The pgprot_* and protection_map entries will be fixed up at runtime to - * include the cachable and bufferable bits based on memory policy, as well as - * any architecture dependent bits like global/ASID and SMP shared mapping - * bits. - */ -#define _PAGE_DEFAULT PTE_TYPE_PAGE | PTE_AF +#ifdef CONFIG_SMP +#define PROT_DEFAULT (PTE_TYPE_PAGE | PTE_AF | PTE_SHARED) +#define PROT_SECT_DEFAULT (PMD_TYPE_SECT | PMD_SECT_AF | PMD_SECT_S) +#else +#define PROT_DEFAULT (PTE_TYPE_PAGE | PTE_AF) +#define PROT_SECT_DEFAULT (PMD_TYPE_SECT | PMD_SECT_AF) +#endif -extern pgprot_t pgprot_default; +#define PROT_DEVICE_nGnRE (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_ATTRINDX(MT_DEVICE_nGnRE)) +#define PROT_NORMAL_NC (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_ATTRINDX(MT_NORMAL_NC)) +#define PROT_NORMAL (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_ATTRINDX(MT_NORMAL)) -#define __pgprot_modify(prot,mask,bits) \ - __pgprot((pgprot_val(prot) & ~(mask)) | (bits)) +#define PROT_SECT_DEVICE_nGnRE (PROT_SECT_DEFAULT | PMD_SECT_PXN | PMD_SECT_UXN | PMD_ATTRINDX(MT_DEVICE_nGnRE)) +#define PROT_SECT_NORMAL (PROT_SECT_DEFAULT | PMD_SECT_PXN | PMD_SECT_UXN | PMD_ATTRINDX(MT_NORMAL)) +#define PROT_SECT_NORMAL_EXEC (PROT_SECT_DEFAULT | PMD_SECT_UXN | PMD_ATTRINDX(MT_NORMAL)) -#define _MOD_PROT(p, b) __pgprot_modify(p, 0, b) +#define _PAGE_DEFAULT (PROT_DEFAULT | PTE_ATTRINDX(MT_NORMAL)) -#define PAGE_NONE __pgprot_modify(pgprot_default, PTE_TYPE_MASK, PTE_PROT_NONE | PTE_PXN | PTE_UXN) -#define PAGE_SHARED _MOD_PROT(pgprot_default, PTE_USER | PTE_NG | PTE_PXN | PTE_UXN | PTE_WRITE) -#define PAGE_SHARED_EXEC _MOD_PROT(pgprot_default, PTE_USER | PTE_NG | PTE_PXN | PTE_WRITE) -#define PAGE_COPY _MOD_PROT(pgprot_default, PTE_USER | PTE_NG | PTE_PXN | PTE_UXN) -#define PAGE_COPY_EXEC _MOD_PROT(pgprot_default, PTE_USER | PTE_NG | PTE_PXN) -#define PAGE_READONLY _MOD_PROT(pgprot_default, PTE_USER | PTE_NG | PTE_PXN | PTE_UXN) -#define PAGE_READONLY_EXEC _MOD_PROT(pgprot_default, PTE_USER | PTE_NG | PTE_PXN) -#define PAGE_KERNEL _MOD_PROT(pgprot_default, PTE_PXN | PTE_UXN | PTE_DIRTY | PTE_WRITE) -#define PAGE_KERNEL_EXEC _MOD_PROT(pgprot_default, PTE_UXN | PTE_DIRTY | PTE_WRITE) +#define PAGE_KERNEL __pgprot(_PAGE_DEFAULT | PTE_PXN | PTE_UXN | PTE_DIRTY | PTE_WRITE) +#define PAGE_KERNEL_EXEC __pgprot(_PAGE_DEFAULT | PTE_UXN | PTE_DIRTY | PTE_WRITE) -#define __PAGE_NONE __pgprot(((_PAGE_DEFAULT) & ~PTE_TYPE_MASK) | PTE_PROT_NONE | PTE_PXN | PTE_UXN) -#define __PAGE_SHARED __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN | PTE_UXN | PTE_WRITE) -#define __PAGE_SHARED_EXEC __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN | PTE_WRITE) -#define __PAGE_COPY __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN | PTE_UXN) -#define __PAGE_COPY_EXEC __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN) -#define __PAGE_READONLY __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN | PTE_UXN) -#define __PAGE_READONLY_EXEC __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN) -#define __PAGE_EXECONLY __pgprot(_PAGE_DEFAULT | PTE_NG | PTE_PXN) +#define PAGE_HYP __pgprot(_PAGE_DEFAULT | PTE_HYP) +#define PAGE_HYP_DEVICE __pgprot(PROT_DEVICE_nGnRE | PTE_HYP) -#endif /* __ASSEMBLY__ */ +#define PAGE_S2 __pgprot(PROT_DEFAULT | PTE_S2_MEMATTR(MT_S2_NORMAL) | PTE_S2_RDONLY) +#define PAGE_S2_DEVICE __pgprot(PROT_DEFAULT | PTE_S2_MEMATTR(MT_S2_DEVICE_nGnRE) | PTE_S2_RDWR | PTE_UXN) -#define __P000 __PAGE_NONE -#define __P001 __PAGE_READONLY -#define __P010 __PAGE_COPY -#define __P011 __PAGE_COPY -#define __P100 __PAGE_EXECONLY -#define __P101 __PAGE_READONLY_EXEC -#define __P110 __PAGE_COPY_EXEC -#define __P111 __PAGE_COPY_EXEC +#define PAGE_NONE __pgprot(((_PAGE_DEFAULT) & ~PTE_TYPE_MASK) | PTE_PROT_NONE | PTE_PXN | PTE_UXN) +#define PAGE_SHARED __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN | PTE_UXN | PTE_WRITE) +#define PAGE_SHARED_EXEC __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN | PTE_WRITE) +#define PAGE_COPY __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN | PTE_UXN) +#define PAGE_COPY_EXEC __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN) +#define PAGE_READONLY __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN | PTE_UXN) +#define PAGE_READONLY_EXEC __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN) +#define PAGE_EXECONLY __pgprot(_PAGE_DEFAULT | PTE_NG | PTE_PXN) -#define __S000 __PAGE_NONE -#define __S001 __PAGE_READONLY -#define __S010 __PAGE_SHARED -#define __S011 __PAGE_SHARED -#define __S100 __PAGE_EXECONLY -#define __S101 __PAGE_READONLY_EXEC -#define __S110 __PAGE_SHARED_EXEC -#define __S111 __PAGE_SHARED_EXEC +#define __P000 PAGE_NONE +#define __P001 PAGE_READONLY +#define __P010 PAGE_COPY +#define __P011 PAGE_COPY +#define __P100 PAGE_EXECONLY +#define __P101 PAGE_READONLY_EXEC +#define __P110 PAGE_COPY_EXEC +#define __P111 PAGE_COPY_EXEC + +#define __S000 PAGE_NONE +#define __S001 PAGE_READONLY +#define __S010 PAGE_SHARED +#define __S011 PAGE_SHARED +#define __S100 PAGE_EXECONLY +#define __S101 PAGE_READONLY_EXEC +#define __S110 PAGE_SHARED_EXEC +#define __S111 PAGE_SHARED_EXEC -#ifndef __ASSEMBLY__ /* * ZERO_PAGE is a global shared page that is always zero: used * for zero-mapped memory areas etc.. @@ -277,6 +276,9 @@ static inline int has_transparent_hugepage(void) return 1; } +#define __pgprot_modify(prot,mask,bits) \ + __pgprot((pgprot_val(prot) & ~(mask)) | (bits)) + /* * Mark the prot value as uncacheable and unbufferable. */ diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c index 329dc6607539..1e05fc993331 100644 --- a/arch/arm64/kernel/setup.c +++ b/arch/arm64/kernel/setup.c @@ -269,7 +269,6 @@ void __init setup_arch(char **cmdline_p) *cmdline_p = boot_command_line; - init_mem_pgprot(); early_ioremap_init(); parse_early_param(); diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c index 86f0b9ef45ad..f39a55d58918 100644 --- a/arch/arm64/mm/dma-mapping.c +++ b/arch/arm64/mm/dma-mapping.c @@ -112,7 +112,7 @@ static void *__dma_alloc_noncoherent(struct device *dev, size_t size, for (i = 0; i < (size >> PAGE_SHIFT); i++) map[i] = page + i; coherent_ptr = vmap(map, size >> PAGE_SHIFT, VM_MAP, - __get_dma_pgprot(attrs, pgprot_default, false)); + __get_dma_pgprot(attrs, __pgprot(PROT_NORMAL_NC), false)); kfree(map); if (!coherent_ptr) goto no_map; diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index 6b7e89569a3a..28e56293ad4f 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -43,11 +43,6 @@ struct page *empty_zero_page; EXPORT_SYMBOL(empty_zero_page); -pgprot_t pgprot_default; -EXPORT_SYMBOL(pgprot_default); - -static pmdval_t prot_sect_kernel; - struct cachepolicy { const char policy[16]; u64 mair; @@ -122,33 +117,6 @@ static int __init early_cachepolicy(char *p) } early_param("cachepolicy", early_cachepolicy); -/* - * Adjust the PMD section entries according to the CPU in use. - */ -void __init init_mem_pgprot(void) -{ - pteval_t default_pgprot; - int i; - - default_pgprot = PTE_ATTRINDX(MT_NORMAL); - prot_sect_kernel = PMD_TYPE_SECT | PMD_SECT_AF | PMD_ATTRINDX(MT_NORMAL); - -#ifdef CONFIG_SMP - /* - * Mark memory with the "shared" attribute for SMP systems - */ - default_pgprot |= PTE_SHARED; - prot_sect_kernel |= PMD_SECT_S; -#endif - - for (i = 0; i < 16; i++) { - unsigned long v = pgprot_val(protection_map[i]); - protection_map[i] = __pgprot(v | default_pgprot); - } - - pgprot_default = __pgprot(PTE_TYPE_PAGE | PTE_AF | default_pgprot); -} - pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn, unsigned long size, pgprot_t vma_prot) { @@ -205,7 +173,7 @@ static void __init alloc_init_pmd(pud_t *pud, unsigned long addr, /* try section mapping first */ if (((addr | next | phys) & ~SECTION_MASK) == 0) { pmd_t old_pmd =*pmd; - set_pmd(pmd, __pmd(phys | prot_sect_kernel)); + set_pmd(pmd, __pmd(phys | PROT_SECT_NORMAL_EXEC)); /* * Check for previous table entries created during * boot (__create_page_tables) and flush them. @@ -414,7 +382,7 @@ int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node) if (!p) return -ENOMEM; - set_pmd(pmd, __pmd(__pa(p) | prot_sect_kernel)); + set_pmd(pmd, __pmd(__pa(p) | PROT_SECT_NORMAL)); } else vmemmap_verify((pte_t *)pmd, node, addr, next); } while (addr = next, addr != end); From 3698e46b6e5bcd43d7b2faad9e77a015a3495011 Mon Sep 17 00:00:00 2001 From: Alex Shi Date: Mon, 26 May 2014 16:31:57 +0800 Subject: [PATCH 1047/1368] Revert "arm64: init: Move of_clk_init to time_init" This reverts commit 638b6642b041f83802ea5d7ca68b45ce508bbc5c. Since time is close to 14.05 release, we revert this commit for a quick fix to clock missing bug on armv8: [ 0.000000] Hierarchical RCU implementation. [ 0.000000] NR_IRQS:64 nr_irqs:64 0 [ 0.000000] vexpress-osc: Failed to obtain config func for node '/smb/motherboard/mcc/osc@1'! Signed-off-by: Alex Shi --- arch/arm64/kernel/setup.c | 1 + arch/arm64/kernel/time.c | 2 -- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c index 1e05fc993331..5fdfc0255953 100644 --- a/arch/arm64/kernel/setup.c +++ b/arch/arm64/kernel/setup.c @@ -299,6 +299,7 @@ void __init setup_arch(char **cmdline_p) static int __init arm64_device_init(void) { + of_clk_init(NULL); of_platform_populate(NULL, of_default_bus_match_table, NULL, NULL); return 0; } diff --git a/arch/arm64/kernel/time.c b/arch/arm64/kernel/time.c index dbe1c8d8e90f..03dc3718eb13 100644 --- a/arch/arm64/kernel/time.c +++ b/arch/arm64/kernel/time.c @@ -33,7 +33,6 @@ #include #include #include -#include #include @@ -73,7 +72,6 @@ void __init time_init(void) { u32 arch_timer_rate; - of_clk_init(NULL); clocksource_of_init(); arch_timer_rate = arch_timer_get_rate(); From 5af70c99ecd544f98f84c4d369d8545fe221dca7 Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Fri, 16 May 2014 16:44:32 +0100 Subject: [PATCH 1048/1368] Revert "arm64: Introduce execute-only page access permissions" This reverts commit bc07c2c6e9ed125d362af0214b6313dca180cb08. While the aim is increased security for --x memory maps, it does not protect against kernel level reads. Until SECCOMP is implemented for arm64, revert this patch to avoid giving a false idea of execute-only mappings. Signed-off-by: Catalin Marinas (cherry picked from commit 5a0fdfada3a2aa50d7b947a2e958bf00cbe0d830) Signed-off-by: Mark Brown --- arch/arm64/include/asm/pgtable.h | 11 +++++------ arch/arm64/mm/fault.c | 5 +++-- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index ff13c70ea585..b3f6193036a8 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -86,13 +86,12 @@ extern void __pgd_error(const char *file, int line, unsigned long val); #define PAGE_COPY_EXEC __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN) #define PAGE_READONLY __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN | PTE_UXN) #define PAGE_READONLY_EXEC __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN) -#define PAGE_EXECONLY __pgprot(_PAGE_DEFAULT | PTE_NG | PTE_PXN) #define __P000 PAGE_NONE #define __P001 PAGE_READONLY #define __P010 PAGE_COPY #define __P011 PAGE_COPY -#define __P100 PAGE_EXECONLY +#define __P100 PAGE_READONLY_EXEC #define __P101 PAGE_READONLY_EXEC #define __P110 PAGE_COPY_EXEC #define __P111 PAGE_COPY_EXEC @@ -101,7 +100,7 @@ extern void __pgd_error(const char *file, int line, unsigned long val); #define __S001 PAGE_READONLY #define __S010 PAGE_SHARED #define __S011 PAGE_SHARED -#define __S100 PAGE_EXECONLY +#define __S100 PAGE_READONLY_EXEC #define __S101 PAGE_READONLY_EXEC #define __S110 PAGE_SHARED_EXEC #define __S111 PAGE_SHARED_EXEC @@ -137,8 +136,8 @@ extern struct page *empty_zero_page; #define pte_write(pte) (!!(pte_val(pte) & PTE_WRITE)) #define pte_exec(pte) (!(pte_val(pte) & PTE_UXN)) -#define pte_valid_ng(pte) \ - ((pte_val(pte) & (PTE_VALID | PTE_NG)) == (PTE_VALID | PTE_NG)) +#define pte_valid_user(pte) \ + ((pte_val(pte) & (PTE_VALID | PTE_USER)) == (PTE_VALID | PTE_USER)) static inline pte_t pte_wrprotect(pte_t pte) { @@ -192,7 +191,7 @@ extern void __sync_icache_dcache(pte_t pteval, unsigned long addr); static inline void set_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t pte) { - if (pte_valid_ng(pte)) { + if (pte_valid_user(pte)) { if (!pte_special(pte) && pte_exec(pte)) __sync_icache_dcache(pte, addr); if (pte_dirty(pte) && pte_write(pte)) diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c index c1b42273c8fa..df4f2fd187c3 100644 --- a/arch/arm64/mm/fault.c +++ b/arch/arm64/mm/fault.c @@ -173,7 +173,8 @@ static int __do_page_fault(struct mm_struct *mm, unsigned long addr, good_area: /* * Check that the permissions on the VMA allow for the fault which - * occurred. + * occurred. If we encountered a write or exec fault, we must have + * appropriate permissions, otherwise we allow any permission. */ if (!(vma->vm_flags & vm_flags)) { fault = VM_FAULT_BADACCESS; @@ -195,7 +196,7 @@ static int __kprobes do_page_fault(unsigned long addr, unsigned int esr, struct task_struct *tsk; struct mm_struct *mm; int fault, sig, code; - unsigned long vm_flags = VM_READ | VM_WRITE; + unsigned long vm_flags = VM_READ | VM_WRITE | VM_EXEC; unsigned int mm_flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE; if (esr & ESR_LNX_EXEC) { From 8c800e8ee95a1396901f694a8d9dcf880efdd43c Mon Sep 17 00:00:00 2001 From: Stefano Stabellini Date: Thu, 8 May 2014 15:48:13 +0000 Subject: [PATCH 1049/1368] arm64: introduce virt_to_pfn virt_to_pfn has been defined in arch/arm/include/asm/memory.h by commit e26a9e0 "ARM: Better virt_to_page() handling" and Xen has come to rely on it. Introduce virt_to_pfn on arm64 too. Signed-off-by: Stefano Stabellini Acked-by: Catalin Marinas (cherry picked from commit 1f53ba6e81749a420226e5502c49ab83ba85c81d) Signed-off-by: Mark Brown --- arch/arm64/include/asm/memory.h | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h index 83138d2fb90d..212ded1662bf 100644 --- a/arch/arm64/include/asm/memory.h +++ b/arch/arm64/include/asm/memory.h @@ -132,6 +132,7 @@ static inline void *phys_to_virt(phys_addr_t x) #define __pa(x) __virt_to_phys((unsigned long)(x)) #define __va(x) ((void *)__phys_to_virt((phys_addr_t)(x))) #define pfn_to_kaddr(pfn) __va((pfn) << PAGE_SHIFT) +#define virt_to_pfn(x) __phys_to_pfn(__virt_to_phys(x)) /* * virt_to_page(k) convert a _valid_ virtual address to struct page * From 4d1e99cd1602a17035f53d6e420e4c18f4125a3b Mon Sep 17 00:00:00 2001 From: AKASHI Takahiro Date: Tue, 24 Sep 2013 10:00:50 +0100 Subject: [PATCH 1050/1368] arm64: avoid multiple evaluation of ptr in get_user/put_user() get_user() is defined as a function macro in arm64, and trace_get_user() calls it as followed: get_user(ch, ptr++); Since the second parameter occurs twice in the definition, 'ptr++' is unexpectedly evaluated twice and trace_get_user() will generate a bogus string from user-provided one. As a result, some ftrace sysfs operations, like "echo FUNCNAME > set_ftrace_filter," hit this case and eventually fail. This patch fixes the issue both in get_user() and put_user(). Signed-off-by: AKASHI Takahiro [catalin.marinas@arm.com: added __user type annotation and s/optr/__p/] Signed-off-by: Catalin Marinas (cherry picked from commit 1f65c13efef69b6dc908e588f91a133641d8475c) Signed-off-by: Mark Brown Conflicts: arch/arm64/include/asm/uaccess.h --- arch/arm64/include/asm/uaccess.h | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/arch/arm64/include/asm/uaccess.h b/arch/arm64/include/asm/uaccess.h index 8e8df0135f41..3bf8f4e99a51 100644 --- a/arch/arm64/include/asm/uaccess.h +++ b/arch/arm64/include/asm/uaccess.h @@ -167,9 +167,10 @@ do { \ #define get_user(x, ptr) \ ({ \ - might_sleep(); \ - access_ok(VERIFY_READ, (ptr), sizeof(*(ptr))) ? \ - __get_user((x), (ptr)) : \ + __typeof__(*(ptr)) __user *__p = (ptr); \ + might_fault(); \ + access_ok(VERIFY_READ, __p, sizeof(*__p)) ? \ + __get_user((x), __p) : \ ((x) = 0, -EFAULT); \ }) @@ -228,9 +229,10 @@ do { \ #define put_user(x, ptr) \ ({ \ - might_sleep(); \ - access_ok(VERIFY_WRITE, (ptr), sizeof(*(ptr))) ? \ - __put_user((x), (ptr)) : \ + __typeof__(*(ptr)) __user *__p = (ptr); \ + might_fault(); \ + access_ok(VERIFY_WRITE, __p, sizeof(*__p)) ? \ + __put_user((x), __p) : \ -EFAULT; \ }) From 454c83cd38dfab90b5892e10e67a7a206827ac64 Mon Sep 17 00:00:00 2001 From: Vinayak Kale Date: Wed, 26 Mar 2014 12:19:06 +0000 Subject: [PATCH 1051/1368] arm64: dts: Add more serial port nodes in APM X-Gene device tree APM X-Gene Storm SoC supports 4 serial ports. This patch adds device nodes for serial ports 1 to 3 (a device node for serial port 0 is already present in the dts file). This patch also sets the compatible property of serial nodes to "ns16550a". Signed-off-by: Vinayak Kale Acked-by: Rob Herring Signed-off-by: Catalin Marinas (cherry picked from commit 457ced8458605f1935214289d44aabb80bf75756) Signed-off-by: Mark Brown --- arch/arm64/boot/dts/apm-mustang.dts | 4 ++++ arch/arm64/boot/dts/apm-storm.dtsi | 36 ++++++++++++++++++++++++++++- 2 files changed, 39 insertions(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/apm-mustang.dts b/arch/arm64/boot/dts/apm-mustang.dts index 1247ca1200b1..6541962f5d70 100644 --- a/arch/arm64/boot/dts/apm-mustang.dts +++ b/arch/arm64/boot/dts/apm-mustang.dts @@ -24,3 +24,7 @@ memory { reg = < 0x1 0x00000000 0x0 0x80000000 >; /* Updated by bootloader */ }; }; + +&serial0 { + status = "ok"; +}; diff --git a/arch/arm64/boot/dts/apm-storm.dtsi b/arch/arm64/boot/dts/apm-storm.dtsi index 4917f3b81a44..42edf193d084 100644 --- a/arch/arm64/boot/dts/apm-storm.dtsi +++ b/arch/arm64/boot/dts/apm-storm.dtsi @@ -273,8 +273,9 @@ rtcclk: rtcclk@17000000 { }; serial0: serial@1c020000 { + status = "disabled"; device_type = "serial"; - compatible = "ns16550"; + compatible = "ns16550a"; reg = <0 0x1c020000 0x0 0x1000>; reg-shift = <2>; clock-frequency = <10000000>; /* Updated by bootloader */ @@ -282,6 +283,39 @@ serial0: serial@1c020000 { interrupts = <0x0 0x4c 0x4>; }; + serial1: serial@1c021000 { + status = "disabled"; + device_type = "serial"; + compatible = "ns16550a"; + reg = <0 0x1c021000 0x0 0x1000>; + reg-shift = <2>; + clock-frequency = <10000000>; /* Updated by bootloader */ + interrupt-parent = <&gic>; + interrupts = <0x0 0x4d 0x4>; + }; + + serial2: serial@1c022000 { + status = "disabled"; + device_type = "serial"; + compatible = "ns16550a"; + reg = <0 0x1c022000 0x0 0x1000>; + reg-shift = <2>; + clock-frequency = <10000000>; /* Updated by bootloader */ + interrupt-parent = <&gic>; + interrupts = <0x0 0x4e 0x4>; + }; + + serial3: serial@1c023000 { + status = "disabled"; + device_type = "serial"; + compatible = "ns16550a"; + reg = <0 0x1c023000 0x0 0x1000>; + reg-shift = <2>; + clock-frequency = <10000000>; /* Updated by bootloader */ + interrupt-parent = <&gic>; + interrupts = <0x0 0x4f 0x4>; + }; + phy1: phy@1f21a000 { compatible = "apm,xgene-phy"; reg = <0x0 0x1f21a000 0x0 0x100>; From 8a0ac656b3e2f6a69dc737f6f77b948868bc0efb Mon Sep 17 00:00:00 2001 From: Mark Salter Date: Tue, 17 Jun 2014 18:14:26 +0100 Subject: [PATCH 1052/1368] arm64: export __cpu_{clear,copy}_user_page functions The __cpu_clear_user_page() and __cpu_copy_user_page() functions are not currently exported. This prevents modules from using clear_user_page() and copy_user_page(). Signed-off-by: Mark Salter Signed-off-by: Catalin Marinas (cherry picked from commit bec7cedc8a92bfe96d32febe72634b30c63896bd) Signed-off-by: Mark Brown --- arch/arm64/mm/copypage.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/arm64/mm/copypage.c b/arch/arm64/mm/copypage.c index 9aecbace4128..13bbc3be6f5a 100644 --- a/arch/arm64/mm/copypage.c +++ b/arch/arm64/mm/copypage.c @@ -27,8 +27,10 @@ void __cpu_copy_user_page(void *kto, const void *kfrom, unsigned long vaddr) copy_page(kto, kfrom); __flush_dcache_area(kto, PAGE_SIZE); } +EXPORT_SYMBOL_GPL(__cpu_copy_user_page); void __cpu_clear_user_page(void *kaddr, unsigned long vaddr) { clear_page(kaddr); } +EXPORT_SYMBOL_GPL(__cpu_clear_user_page); From 55ae4c39fcb908a8350809606cd1fb6831abc27e Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Tue, 24 Jun 2014 16:51:34 +0100 Subject: [PATCH 1053/1368] arm64: head.S: remove unnecessary function alignment Currently __turn_mmu_on is aligned to 64 bytes to ensure that it doesn't span any page boundary, which simplifies the idmap and spares us requiring an additional page table to map half of the function. In keeping with other important requirements in architecture code, this fact is undocumented. Additionally, as the function consists of three instructions totalling 12 bytes with no literal pool data, a smaller alignment of 16 bytes would be sufficient. This patch reduces the alignment to 16 bytes and documents the underlying reason for the alignment. This reduces the required alignment of the entire .head.text section from 64 bytes to 16 bytes, though it may still be aligned to a larger value depending on TEXT_OFFSET. Signed-off-by: Mark Rutland Tested-by: Laura Abbott Acked-by: Will Deacon Signed-off-by: Catalin Marinas (cherry picked from commit 909a4069da65a5cfca8c968edf9f0d99f694d2f3) Signed-off-by: Mark Brown --- arch/arm64/kernel/head.S | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S index 1445f7869cf3..751ccd0a9105 100644 --- a/arch/arm64/kernel/head.S +++ b/arch/arm64/kernel/head.S @@ -312,8 +312,13 @@ ENDPROC(__enable_mmu) * x27 = *virtual* address to jump to upon completion * * other registers depend on the function called upon completion + * + * We align the entire function to the smallest power of two larger than it to + * ensure it fits within a single block map entry. Otherwise were PHYS_OFFSET + * close to the end of a 512MB or 1GB block we might require an additional + * table to map the entire function. */ - .align 6 + .align 4 __turn_mmu_on: msr sctlr_el1, x0 isb From a39a796dd388d14a65d26753a3430be66a100265 Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Tue, 15 Jul 2014 15:46:02 +0100 Subject: [PATCH 1054/1368] arm64: Remove duplicate (SWAPPER|IDMAP)_DIR_SIZE definitions Just keep the asm/page.h definition as this is included in vmlinux.lds.S as well. Signed-off-by: Catalin Marinas Acked-by: Mark Rutland (cherry picked from commit b2f8c07bcb7d1a3575f41444d2d8048d0c922762) Signed-off-by: Mark Brown --- arch/arm64/include/asm/pgtable.h | 3 --- 1 file changed, 3 deletions(-) diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index b3f6193036a8..d786b8940bbe 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -376,9 +376,6 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot) extern pgd_t swapper_pg_dir[PTRS_PER_PGD]; extern pgd_t idmap_pg_dir[PTRS_PER_PGD]; -#define SWAPPER_DIR_SIZE (3 * PAGE_SIZE) -#define IDMAP_DIR_SIZE (2 * PAGE_SIZE) - /* * Encode and decode a swap entry: * bits 0-1: present (must be zero) From b44e79e0d891a097da4328538f4722c8fb581c3b Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Wed, 9 Jul 2014 19:22:11 +0100 Subject: [PATCH 1055/1368] arm64: vdso: put vdso datapage in a separate vma The VDSO datapage doesn't need to be executable (no code there) or CoW-able (the kernel writes the page, so a private copy is totally useless). This patch moves the datapage into its own VMA, identified as "[vvar]" in /proc//maps. Cc: Andy Lutomirski Signed-off-by: Will Deacon Signed-off-by: Catalin Marinas (cherry picked from commit 8715493852783358ef8656a0054a14bf822509cf) Signed-off-by: Mark Brown --- arch/arm64/kernel/vdso.c | 34 ++++++++++++++++++++++++++-------- 1 file changed, 26 insertions(+), 8 deletions(-) diff --git a/arch/arm64/kernel/vdso.c b/arch/arm64/kernel/vdso.c index 10c27f5f6d23..40ba5dea2ed7 100644 --- a/arch/arm64/kernel/vdso.c +++ b/arch/arm64/kernel/vdso.c @@ -135,11 +135,12 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) { struct mm_struct *mm = current->mm; - unsigned long vdso_base, vdso_mapping_len; + unsigned long vdso_base, vdso_text_len, vdso_mapping_len; int ret; + vdso_text_len = vdso_pages << PAGE_SHIFT; /* Be sure to map the data page */ - vdso_mapping_len = (vdso_pages + 1) << PAGE_SHIFT; + vdso_mapping_len = vdso_text_len + PAGE_SIZE; down_write(&mm->mmap_sem); vdso_base = get_unmapped_area(NULL, 0, vdso_mapping_len, 0, 0); @@ -149,35 +150,52 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, } mm->context.vdso = (void *)vdso_base; - ret = install_special_mapping(mm, vdso_base, vdso_mapping_len, + ret = install_special_mapping(mm, vdso_base, vdso_text_len, VM_READ|VM_EXEC| VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC, vdso_pagelist); - if (ret) { - mm->context.vdso = NULL; + if (ret) goto up_fail; - } + + vdso_base += vdso_text_len; + ret = install_special_mapping(mm, vdso_base, PAGE_SIZE, + VM_READ|VM_MAYREAD, + vdso_pagelist + vdso_pages); + if (ret) + goto up_fail; + + up_write(&mm->mmap_sem); + return 0; up_fail: + mm->context.vdso = NULL; up_write(&mm->mmap_sem); - return ret; } const char *arch_vma_name(struct vm_area_struct *vma) { + unsigned long vdso_text; + + if (!vma->vm_mm) + return NULL; + + vdso_text = (unsigned long)vma->vm_mm->context.vdso; + /* * We can re-use the vdso pointer in mm_context_t for identifying * the vectors page for compat applications. The vDSO will always * sit above TASK_UNMAPPED_BASE and so we don't need to worry about * it conflicting with the vectors base. */ - if (vma->vm_mm && vma->vm_start == (long)vma->vm_mm->context.vdso) { + if (vma->vm_start == vdso_text) { #ifdef CONFIG_COMPAT if (vma->vm_start == AARCH32_VECTORS_BASE) return "[vectors]"; #endif return "[vdso]"; + } else if (vma->vm_start == (vdso_text + (vdso_pages << PAGE_SHIFT))) { + return "[vvar]"; } return NULL; From 099776d20c5a1801cdf413fdec34e5ee29b761f3 Mon Sep 17 00:00:00 2001 From: Ian Campbell Date: Tue, 15 Jul 2014 08:38:08 +0100 Subject: [PATCH 1056/1368] arm64: Align the kbuild output for VDSOL and VDSOA Signed-off-by: Ian Campbell Cc: Catalin Marinas Cc: Will Deacon Cc: Michal Marek Cc: linux-arm-kernel@lists.infradead.org Cc: linux-kbuild@vger.kernel.org Signed-off-by: Catalin Marinas (cherry picked from commit ad789ba5f7086138461420d2156478d33fb61077) Signed-off-by: Mark Brown --- arch/arm64/kernel/vdso/Makefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/kernel/vdso/Makefile b/arch/arm64/kernel/vdso/Makefile index 6d20b7d162d8..84b942612051 100644 --- a/arch/arm64/kernel/vdso/Makefile +++ b/arch/arm64/kernel/vdso/Makefile @@ -47,9 +47,9 @@ $(obj-vdso): %.o: %.S $(call if_changed_dep,vdsoas) # Actual build commands -quiet_cmd_vdsold = VDSOL $@ +quiet_cmd_vdsold = VDSOL $@ cmd_vdsold = $(CC) $(c_flags) -Wl,-n -Wl,-T $^ -o $@ -quiet_cmd_vdsoas = VDSOA $@ +quiet_cmd_vdsoas = VDSOA $@ cmd_vdsoas = $(CC) $(a_flags) -c -o $@ $< # Install commands for the unstripped file From c4885474b72e7b4034156eb9e6868fa6f471efbf Mon Sep 17 00:00:00 2001 From: Matthew Leach Date: Fri, 11 Oct 2013 14:52:16 +0100 Subject: [PATCH 1057/1368] arm64: head: create a new function for setting the boot_cpu_mode flag Currently, the code for setting the __cpu_boot_mode flag is munged in with el2_setup. This makes things difficult on a BE bringup as a memory access has to have occurred before el2_setup which is the place that we'd like to set the endianess on the current EL. Create a new function for setting __cpu_boot_mode and have el2_setup return the mode the CPU. Also define a new constant in virt.h, BOOT_CPU_MODE_EL1, for readability. Acked-by: Marc Zyngier Acked-by: Will Deacon Signed-off-by: Matthew Leach Signed-off-by: Catalin Marinas (cherry picked from commit 828e9834e9a5b7e61046aa3c5f603a4fecba2fb4) Signed-off-by: Mark Brown --- arch/arm64/include/asm/virt.h | 3 ++- arch/arm64/kernel/head.S | 34 +++++++++++++++++++++++++--------- 2 files changed, 27 insertions(+), 10 deletions(-) diff --git a/arch/arm64/include/asm/virt.h b/arch/arm64/include/asm/virt.h index 439827271e3d..215ad4649dd7 100644 --- a/arch/arm64/include/asm/virt.h +++ b/arch/arm64/include/asm/virt.h @@ -18,7 +18,8 @@ #ifndef __ASM__VIRT_H #define __ASM__VIRT_H -#define BOOT_CPU_MODE_EL2 (0x0e12b007) +#define BOOT_CPU_MODE_EL1 (0xe11) +#define BOOT_CPU_MODE_EL2 (0xe12) #ifndef __ASSEMBLY__ diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S index 751ccd0a9105..669027367e7a 100644 --- a/arch/arm64/kernel/head.S +++ b/arch/arm64/kernel/head.S @@ -123,8 +123,9 @@ ENTRY(stext) mov x21, x0 // x21=FDT + bl el2_setup // Drop to EL1, w20=cpu_boot_mode bl __calc_phys_offset // x24=PHYS_OFFSET, x28=PHYS_OFFSET-PAGE_OFFSET - bl el2_setup // Drop to EL1 + bl set_cpu_boot_mode_flag mrs x22, midr_el1 // x22=cpuid mov x0, x22 bl lookup_processor_type @@ -150,21 +151,20 @@ ENDPROC(stext) /* * If we're fortunate enough to boot at EL2, ensure that the world is * sane before dropping to EL1. + * + * Returns either BOOT_CPU_MODE_EL1 or BOOT_CPU_MODE_EL2 in x20 if + * booted in EL1 or EL2 respectively. */ ENTRY(el2_setup) mrs x0, CurrentEL cmp x0, #PSR_MODE_EL2t ccmp x0, #PSR_MODE_EL2h, #0x4, ne - ldr x0, =__boot_cpu_mode // Compute __boot_cpu_mode - add x0, x0, x28 b.eq 1f - str wzr, [x0] // Remember we don't have EL2... + mov w20, #BOOT_CPU_MODE_EL1 // This cpu booted in EL1 ret /* Hyp configuration. */ -1: ldr w1, =BOOT_CPU_MODE_EL2 - str w1, [x0, #4] // This CPU has EL2 - mov x0, #(1 << 31) // 64-bit EL1 +1: mov x0, #(1 << 31) // 64-bit EL1 msr hcr_el2, x0 /* Generic timers. */ @@ -204,9 +204,24 @@ ENTRY(el2_setup) PSR_MODE_EL1h) msr spsr_el2, x0 msr elr_el2, lr + mov w20, #BOOT_CPU_MODE_EL2 // This CPU booted in EL2 eret ENDPROC(el2_setup) +/* + * Sets the __boot_cpu_mode flag depending on the CPU boot mode passed + * in x20. See arch/arm64/include/asm/virt.h for more info. + */ +ENTRY(set_cpu_boot_mode_flag) + ldr x1, =__boot_cpu_mode // Compute __boot_cpu_mode + add x1, x1, x28 + cmp w20, #BOOT_CPU_MODE_EL2 + b.ne 1f + add x1, x1, #4 +1: str w20, [x1] // This CPU has booted in EL1 + ret +ENDPROC(set_cpu_boot_mode_flag) + /* * We need to find out the CPU boot mode long after boot, so we need to * store it in a writable variable. @@ -234,8 +249,9 @@ ENTRY(__boot_cpu_mode) * cores are held until we're ready for them to initialise. */ ENTRY(secondary_holding_pen) - bl __calc_phys_offset // x24=phys offset - bl el2_setup // Drop to EL1 + bl el2_setup // Drop to EL1, w20=cpu_boot_mode + bl __calc_phys_offset // x24=PHYS_OFFSET, x28=PHYS_OFFSET-PAGE_OFFSET + bl set_cpu_boot_mode_flag mrs x0, mpidr_el1 ldr x1, =MPIDR_HWID_BITMASK and x0, x0, x1 From 83c5fe50e10a327e7f460f0c145aa65d9fa96b73 Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Wed, 26 Mar 2014 18:25:55 +0000 Subject: [PATCH 1058/1368] arm64: Relax the kernel cache requirements for boot With system caches for the host OS or architected caches for guest OS we cannot easily guarantee that there are no dirty or stale cache lines for the areas of memory written by the kernel during boot with the MMU off (therefore non-cacheable accesses). This patch adds the necessary cache maintenance during boot and relaxes the booting requirements. Signed-off-by: Catalin Marinas (cherry picked from commit c218bca74eeafa2f8528b6bbb34d112075fcf40a) Signed-off-by: Mark Brown Conflicts: arch/arm64/kernel/head.S --- Documentation/arm64/booting.txt | 10 ++++++++-- arch/arm64/kernel/head.S | 30 ++++++++++++++++++++++++++++-- arch/arm64/mm/cache.S | 9 +++++++++ 3 files changed, 45 insertions(+), 4 deletions(-) diff --git a/Documentation/arm64/booting.txt b/Documentation/arm64/booting.txt index 5273c4d60e65..1b0c968098aa 100644 --- a/Documentation/arm64/booting.txt +++ b/Documentation/arm64/booting.txt @@ -111,8 +111,14 @@ Before jumping into the kernel, the following conditions must be met: - Caches, MMUs The MMU must be off. Instruction cache may be on or off. - Data cache must be off and invalidated. - External caches (if present) must be configured and disabled. + The address range corresponding to the loaded kernel image must be + cleaned to the PoC. In the presence of a system cache or other + coherent masters with caches enabled, this will typically require + cache maintenance by VA rather than set/way operations. + System caches which respect the architected cache maintenance by VA + operations must be configured and may be enabled. + System caches which do not respect architected cache maintenance by VA + operations (not recommended) must be configured and disabled. - Architected timers CNTFRQ must be programmed with the timer frequency. diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S index 669027367e7a..30746fc16f81 100644 --- a/arch/arm64/kernel/head.S +++ b/arch/arm64/kernel/head.S @@ -26,6 +26,7 @@ #include #include #include +#include #include #include #include @@ -218,7 +219,11 @@ ENTRY(set_cpu_boot_mode_flag) cmp w20, #BOOT_CPU_MODE_EL2 b.ne 1f add x1, x1, #4 -1: str w20, [x1] // This CPU has booted in EL1 +1: dc cvac, x1 // Clean potentially dirty cache line + dsb sy + str w20, [x1] // This CPU has booted in EL1 + dc civac, x1 // Clean&invalidate potentially stale cache line + dsb sy ret ENDPROC(set_cpu_boot_mode_flag) @@ -229,8 +234,9 @@ ENDPROC(set_cpu_boot_mode_flag) * This is not in .bss, because we set it sufficiently early that the boot-time * zeroing of .bss would clobber it. */ - .pushsection .data + .pushsection .data..cacheline_aligned ENTRY(__boot_cpu_mode) + .align L1_CACHE_SHIFT .long BOOT_CPU_MODE_EL2 .long 0 .popsection @@ -401,6 +407,15 @@ ENDPROC(__calc_phys_offset) */ __create_page_tables: pgtbl x25, x26, x24 // idmap_pg_dir and swapper_pg_dir addresses + mov x27, lr + + /* + * Invalidate the idmap and swapper page tables to avoid potential + * dirty cache lines being evicted. + */ + mov x0, x25 + add x1, x26, #SWAPPER_DIR_SIZE + bl __inval_cache_range /* * Clear the idmap and swapper page tables. @@ -460,6 +475,17 @@ __create_page_tables: ldr x5, =FIXADDR_TOP // Fixed mapping virtual address add x0, x26, #2 * PAGE_SIZE // section table address create_pgd_entry x26, x0, x5, x6, x7 + + /* + * Since the page tables have been populated with non-cacheable + * accesses (MMU disabled), invalidate the idmap and swapper page + * tables again to remove any speculatively loaded cache lines. + */ + mov x0, x25 + add x1, x26, #SWAPPER_DIR_SIZE + bl __inval_cache_range + + mov lr, x27 ret ENDPROC(__create_page_tables) .ltorg diff --git a/arch/arm64/mm/cache.S b/arch/arm64/mm/cache.S index 0e379c44544b..fda756875fa6 100644 --- a/arch/arm64/mm/cache.S +++ b/arch/arm64/mm/cache.S @@ -167,6 +167,14 @@ ENTRY(__flush_dcache_area) ret ENDPROC(__flush_dcache_area) +/* + * __inval_cache_range(start, end) + * - start - start address of region + * - end - end address of region + */ +ENTRY(__inval_cache_range) + /* FALLTHROUGH */ + /* * __dma_inv_range(start, end) * - start - virtual start address of region @@ -190,6 +198,7 @@ __dma_inv_range: b.lo 2b dsb sy ret +ENDPROC(__inval_cache_range) ENDPROC(__dma_inv_range) /* From 0c0ec28f520fb65ab9439d458cd4f2fab8753063 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Tue, 24 Jun 2014 16:51:35 +0100 Subject: [PATCH 1059/1368] arm64: place initial page tables above the kernel Currently we place swapper_pg_dir and idmap_pg_dir below the kernel image, between PHYS_OFFSET and (PHYS_OFFSET + TEXT_OFFSET). However, bootloaders may use portions of this memory below the kernel and we do not parse the memory reservation list until after the MMU has been enabled. As such we may clobber some memory a bootloader wishes to have preserved. To enable the use of all of this memory by bootloaders (when the required memory reservations are communicated to the kernel) it is necessary to move our initial page tables elsewhere. As we currently have an effectively unbound requirement for memory at the end of the kernel image for .bss, we can place the page tables here. This patch moves the initial page table to the end of the kernel image, after the BSS. As they do not consist of any initialised data they will be stripped from the kernel Image as with the BSS. The BSS clearing routine is updated to stop at __bss_stop rather than _end so as to not clobber the page tables, and memory reservations made redundant by the new organisation are removed. Signed-off-by: Mark Rutland Tested-by: Laura Abbott Acked-by: Will Deacon Signed-off-by: Catalin Marinas (cherry picked from commit bd00cd5f8c8c3c282bb1e1eac6a6679a4f808091) Signed-off-by: Mark Brown Conflicts: arch/arm64/mm/init.c --- arch/arm64/include/asm/page.h | 9 +++++++++ arch/arm64/kernel/head.S | 28 ++++++++-------------------- arch/arm64/kernel/vmlinux.lds.S | 7 +++++++ arch/arm64/mm/init.c | 12 ++++-------- 4 files changed, 28 insertions(+), 28 deletions(-) diff --git a/arch/arm64/include/asm/page.h b/arch/arm64/include/asm/page.h index 46bf66628b6a..a6331e6a92b5 100644 --- a/arch/arm64/include/asm/page.h +++ b/arch/arm64/include/asm/page.h @@ -31,6 +31,15 @@ /* We do define AT_SYSINFO_EHDR but don't use the gate mechanism */ #define __HAVE_ARCH_GATE_AREA 1 +/* + * The idmap and swapper page tables need some space reserved in the kernel + * image. The idmap only requires a pgd and a next level table to (section) map + * the kernel, while the swapper also maps the FDT and requires an additional + * table to map an early UART. See __create_page_tables for more information. + */ +#define SWAPPER_DIR_SIZE (3 * PAGE_SIZE) +#define IDMAP_DIR_SIZE (2 * PAGE_SIZE) + #ifndef __ASSEMBLY__ #ifdef CONFIG_ARM64_64K_PAGES diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S index 30746fc16f81..39a8a83f1883 100644 --- a/arch/arm64/kernel/head.S +++ b/arch/arm64/kernel/head.S @@ -35,29 +35,17 @@ #include #include -/* - * swapper_pg_dir is the virtual address of the initial page table. We place - * the page tables 3 * PAGE_SIZE below KERNEL_RAM_VADDR. The idmap_pg_dir has - * 2 pages and is placed below swapper_pg_dir. - */ #define KERNEL_RAM_VADDR (PAGE_OFFSET + TEXT_OFFSET) #if (KERNEL_RAM_VADDR & 0xfffff) != 0x80000 #error KERNEL_RAM_VADDR must start at 0xXXX80000 #endif -#define SWAPPER_DIR_SIZE (3 * PAGE_SIZE) -#define IDMAP_DIR_SIZE (2 * PAGE_SIZE) - - .globl swapper_pg_dir - .equ swapper_pg_dir, KERNEL_RAM_VADDR - SWAPPER_DIR_SIZE - - .globl idmap_pg_dir - .equ idmap_pg_dir, swapper_pg_dir - IDMAP_DIR_SIZE - - .macro pgtbl, ttb0, ttb1, phys - add \ttb1, \phys, #TEXT_OFFSET - SWAPPER_DIR_SIZE - sub \ttb0, \ttb1, #IDMAP_DIR_SIZE + .macro pgtbl, ttb0, ttb1, virt_to_phys + ldr \ttb1, =swapper_pg_dir + ldr \ttb0, =idmap_pg_dir + add \ttb1, \ttb1, \virt_to_phys + add \ttb0, \ttb0, \virt_to_phys .endm #ifdef CONFIG_ARM64_64K_PAGES @@ -292,7 +280,7 @@ ENTRY(secondary_startup) mov x23, x0 // x23=current cpu_table cbz x23, __error_p // invalid processor (x23=0)? - pgtbl x25, x26, x24 // x25=TTBR0, x26=TTBR1 + pgtbl x25, x26, x28 // x25=TTBR0, x26=TTBR1 ldr x12, [x23, #CPU_INFO_SETUP] add x12, x12, x28 // __virt_to_phys blr x12 // initialise processor @@ -406,7 +394,7 @@ ENDPROC(__calc_phys_offset) * - pgd entry for fixed mappings (TTBR1) */ __create_page_tables: - pgtbl x25, x26, x24 // idmap_pg_dir and swapper_pg_dir addresses + pgtbl x25, x26, x28 // idmap_pg_dir and swapper_pg_dir addresses mov x27, lr /* @@ -495,7 +483,7 @@ ENDPROC(__create_page_tables) __switch_data: .quad __mmap_switched .quad __bss_start // x6 - .quad _end // x7 + .quad __bss_stop // x7 .quad processor_id // x4 .quad __fdt_pointer // x5 .quad memstart_addr // x6 diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S index 18a08e10357f..ce2d97255ba9 100644 --- a/arch/arm64/kernel/vmlinux.lds.S +++ b/arch/arm64/kernel/vmlinux.lds.S @@ -90,6 +90,13 @@ SECTIONS _edata = .; BSS_SECTION(0, 0, 0) + + . = ALIGN(PAGE_SIZE); + idmap_pg_dir = .; + . += IDMAP_DIR_SIZE; + swapper_pg_dir = .; + . += SWAPPER_DIR_SIZE; + _end = .; STABS_DEBUG diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index 5c47534fe47d..52806427e15d 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -136,7 +136,10 @@ void __init arm64_memblock_init(void) { u64 *reserve_map, base, size; - /* Register the kernel text, kernel data and initrd with memblock */ + /* + * Register the kernel text, kernel data, initrd, and initial + * pagetables with memblock. + */ memblock_reserve(__pa(_text), _end - _text); #ifdef CONFIG_BLK_DEV_INITRD if (phys_initrd_size) { @@ -148,13 +151,6 @@ void __init arm64_memblock_init(void) } #endif - /* - * Reserve the page tables. These are already in use, - * and can only be in node 0. - */ - memblock_reserve(__pa(swapper_pg_dir), SWAPPER_DIR_SIZE); - memblock_reserve(__pa(idmap_pg_dir), IDMAP_DIR_SIZE); - /* Reserve the dtb region */ memblock_reserve(virt_to_phys(initial_boot_params), be32_to_cpu(initial_boot_params->totalsize)); From 61c80b44d5fac78b1c319b3dd138fb0e0dc80907 Mon Sep 17 00:00:00 2001 From: Mark Salter Date: Wed, 12 Mar 2014 12:28:06 -0400 Subject: [PATCH 1060/1368] arm64: Add function to create identity mappings At boot time, before switching to a virtual UEFI memory map, firmware expects UEFI memory and IO regions to be identity mapped whenever kernel makes runtime services calls. The existing early boot code creates an identity map of kernel text/data but this is not sufficient for UEFI. This patch adds a create_id_mapping() function which reuses the core code of the existing create_mapping(). Signed-off-by: Mark Salter [ Fixed error message formatting (%pa). ] Signed-off-by: Mark Rutland Signed-off-by: Leif Lindholm Acked-by: Catalin Marinas Signed-off-by: Matt Fleming (cherry picked from commit d7ecbddf4caefbac1b99478dd2b679f83dfc2545) Signed-off-by: Mark Brown Conflicts: arch/arm64/mm/mmu.c --- arch/arm64/include/asm/mmu.h | 2 ++ arch/arm64/mm/mmu.c | 65 ++++++++++++++++++++++++++---------- 2 files changed, 49 insertions(+), 18 deletions(-) diff --git a/arch/arm64/include/asm/mmu.h b/arch/arm64/include/asm/mmu.h index aff0292c8f4d..c2f006c48bdb 100644 --- a/arch/arm64/include/asm/mmu.h +++ b/arch/arm64/include/asm/mmu.h @@ -31,5 +31,7 @@ extern void paging_init(void); extern void setup_mm_for_reboot(void); extern void __iomem *early_io_map(phys_addr_t phys, unsigned long virt); extern void init_mem_pgprot(void); +/* create an identity mapping for memory (or io if map_io is true) */ +extern void create_id_mapping(phys_addr_t addr, phys_addr_t size, int map_io); #endif diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index 28e56293ad4f..37d02a863c0e 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -136,7 +136,8 @@ static void __init *early_alloc(unsigned long sz) } static void __init alloc_init_pte(pmd_t *pmd, unsigned long addr, - unsigned long end, unsigned long pfn) + unsigned long end, unsigned long pfn, + pgprot_t prot) { pte_t *pte; @@ -148,16 +149,28 @@ static void __init alloc_init_pte(pmd_t *pmd, unsigned long addr, pte = pte_offset_kernel(pmd, addr); do { - set_pte(pte, pfn_pte(pfn, PAGE_KERNEL_EXEC)); + set_pte(pte, pfn_pte(pfn, prot)); pfn++; } while (pte++, addr += PAGE_SIZE, addr != end); } static void __init alloc_init_pmd(pud_t *pud, unsigned long addr, - unsigned long end, phys_addr_t phys) + unsigned long end, phys_addr_t phys, + int map_io) { pmd_t *pmd; unsigned long next; + pmdval_t prot_sect; + pgprot_t prot_pte; + + if (map_io) { + prot_sect = PMD_TYPE_SECT | PMD_SECT_AF | + PMD_ATTRINDX(MT_DEVICE_nGnRE); + prot_pte = __pgprot(PROT_DEVICE_nGnRE); + } else { + prot_sect = prot_sect_kernel; + prot_pte = PAGE_KERNEL_EXEC; + } /* * Check for initial section mappings in the pgd/pud and remove them. @@ -173,7 +186,7 @@ static void __init alloc_init_pmd(pud_t *pud, unsigned long addr, /* try section mapping first */ if (((addr | next | phys) & ~SECTION_MASK) == 0) { pmd_t old_pmd =*pmd; - set_pmd(pmd, __pmd(phys | PROT_SECT_NORMAL_EXEC)); + set_pmd(pmd, __pmd(phys | prot_sect)); /* * Check for previous table entries created during * boot (__create_page_tables) and flush them. @@ -181,21 +194,23 @@ static void __init alloc_init_pmd(pud_t *pud, unsigned long addr, if (!pmd_none(old_pmd)) flush_tlb_all(); } else { - alloc_init_pte(pmd, addr, next, __phys_to_pfn(phys)); + alloc_init_pte(pmd, addr, next, __phys_to_pfn(phys), + prot_pte); } phys += next - addr; } while (pmd++, addr = next, addr != end); } static void __init alloc_init_pud(pgd_t *pgd, unsigned long addr, - unsigned long end, unsigned long phys) + unsigned long end, unsigned long phys, + int map_io) { pud_t *pud = pud_offset(pgd, addr); unsigned long next; do { next = pud_addr_end(addr, end); - alloc_init_pmd(pud, addr, next, phys); + alloc_init_pmd(pud, addr, next, phys, map_io); phys += next - addr; } while (pud++, addr = next, addr != end); } @@ -204,30 +219,44 @@ static void __init alloc_init_pud(pgd_t *pgd, unsigned long addr, * Create the page directory entries and any necessary page tables for the * mapping specified by 'md'. */ -static void __init create_mapping(phys_addr_t phys, unsigned long virt, - phys_addr_t size) +static void __init __create_mapping(pgd_t *pgd, phys_addr_t phys, + unsigned long virt, phys_addr_t size, + int map_io) { unsigned long addr, length, end, next; - pgd_t *pgd; - - if (virt < VMALLOC_START) { - pr_warning("BUG: not creating mapping for 0x%016llx at 0x%016lx - outside kernel range\n", - phys, virt); - return; - } addr = virt & PAGE_MASK; length = PAGE_ALIGN(size + (virt & ~PAGE_MASK)); - pgd = pgd_offset_k(addr); end = addr + length; do { next = pgd_addr_end(addr, end); - alloc_init_pud(pgd, addr, next, phys); + alloc_init_pud(pgd, addr, next, phys, map_io); phys += next - addr; } while (pgd++, addr = next, addr != end); } +static void __init create_mapping(phys_addr_t phys, unsigned long virt, + phys_addr_t size) +{ + if (virt < VMALLOC_START) { + pr_warn("BUG: not creating mapping for %pa at 0x%016lx - outside kernel range\n", + &phys, virt); + return; + } + __create_mapping(pgd_offset_k(virt & PAGE_MASK), phys, virt, size, 0); +} + +void __init create_id_mapping(phys_addr_t addr, phys_addr_t size, int map_io) +{ + if ((addr >> PGDIR_SHIFT) >= ARRAY_SIZE(idmap_pg_dir)) { + pr_warn("BUG: not creating id mapping for %pa\n", &addr); + return; + } + __create_mapping(&idmap_pg_dir[pgd_index(addr)], + addr, addr, size, map_io); +} + static void __init map_mem(void) { struct memblock_region *reg; From 21b549b0dd52728a10687a233ef8266aa4d4c8a1 Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Thu, 3 Apr 2014 15:57:15 +0100 Subject: [PATCH 1061/1368] arm64: Clean up the default pgprot setting The primary aim of this patchset is to remove the pgprot_default and prot_sect_default global variables and rely strictly on predefined values. The original goal was to be able to run SMP kernels on UP hardware by not setting the Shareability bit. However, it is unlikely to see UP ARMv8 hardware and even if we do, the Shareability bit is no longer assumed to disable cacheable accesses. A side effect is that the device mappings now have the Shareability attribute set. The hardware, however, should ignore it since Device accesses are always Outer Shareable. Following the removal of the two global variables, there is some PROT_* macro reshuffling and cleanup, including the __PAGE_* macros (replaced by PAGE_*). Signed-off-by: Catalin Marinas Acked-by: Will Deacon (cherry picked from commit a501e32430d4232012ab708b8f0ce841f29e0f02) Signed-off-by: Mark Brown Conflicts: arch/arm64/include/asm/io.h arch/arm64/include/asm/pgtable.h arch/arm64/mm/mmu.c --- arch/arm64/include/asm/io.h | 1 + arch/arm64/include/asm/pgtable.h | 3 +++ 2 files changed, 4 insertions(+) diff --git a/arch/arm64/include/asm/io.h b/arch/arm64/include/asm/io.h index e1018b75c954..732e3d51c2cb 100644 --- a/arch/arm64/include/asm/io.h +++ b/arch/arm64/include/asm/io.h @@ -225,6 +225,7 @@ extern void __memset_io(volatile void __iomem *, int, size_t); */ extern void __iomem *__ioremap(phys_addr_t phys_addr, size_t size, pgprot_t prot); extern void __iounmap(volatile void __iomem *addr); +extern void __iomem *ioremap_cache(phys_addr_t phys_addr, size_t size); #define ioremap(addr, size) __ioremap((addr), (size), __pgprot(PROT_DEVICE_nGnRE)) #define ioremap_nocache(addr, size) __ioremap((addr), (size), __pgprot(PROT_DEVICE_nGnRE)) diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index d786b8940bbe..56336f19b60d 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -86,12 +86,14 @@ extern void __pgd_error(const char *file, int line, unsigned long val); #define PAGE_COPY_EXEC __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN) #define PAGE_READONLY __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN | PTE_UXN) #define PAGE_READONLY_EXEC __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN) +#define PAGE_EXECONLY __pgprot(_PAGE_DEFAULT | PTE_NG | PTE_PXN) #define __P000 PAGE_NONE #define __P001 PAGE_READONLY #define __P010 PAGE_COPY #define __P011 PAGE_COPY #define __P100 PAGE_READONLY_EXEC +#define __P100 PAGE_EXECONLY #define __P101 PAGE_READONLY_EXEC #define __P110 PAGE_COPY_EXEC #define __P111 PAGE_COPY_EXEC @@ -101,6 +103,7 @@ extern void __pgd_error(const char *file, int line, unsigned long val); #define __S010 PAGE_SHARED #define __S011 PAGE_SHARED #define __S100 PAGE_READONLY_EXEC +#define __S100 PAGE_EXECONLY #define __S101 PAGE_READONLY_EXEC #define __S110 PAGE_SHARED_EXEC #define __S111 PAGE_SHARED_EXEC From 39cbf100c68a8eb833aa164eb925460955617b73 Mon Sep 17 00:00:00 2001 From: Dave Anderson Date: Tue, 15 Apr 2014 18:53:24 +0100 Subject: [PATCH 1062/1368] arm64: Fix for the arm64 kern_addr_valid() function Fix for the arm64 kern_addr_valid() function to recognize virtual addresses in the kernel logical memory map. The function fails as written because it does not check whether the addresses in that region are mapped at the pmd level to 2MB or 512MB pages, continues the page table walk to the pte level, and issues a garbage value to pfn_valid(). Tested on 4K-page and 64K-page kernels. Signed-off-by: Dave Anderson Signed-off-by: Catalin Marinas (cherry picked from commit da6e4cb67c6dd1f72257c0a4a97c26dc4e80d3a7) Signed-off-by: Mark Brown --- arch/arm64/include/asm/pgtable.h | 3 --- arch/arm64/mm/mmu.c | 3 +++ 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index 56336f19b60d..d786b8940bbe 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -86,14 +86,12 @@ extern void __pgd_error(const char *file, int line, unsigned long val); #define PAGE_COPY_EXEC __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN) #define PAGE_READONLY __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN | PTE_UXN) #define PAGE_READONLY_EXEC __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN) -#define PAGE_EXECONLY __pgprot(_PAGE_DEFAULT | PTE_NG | PTE_PXN) #define __P000 PAGE_NONE #define __P001 PAGE_READONLY #define __P010 PAGE_COPY #define __P011 PAGE_COPY #define __P100 PAGE_READONLY_EXEC -#define __P100 PAGE_EXECONLY #define __P101 PAGE_READONLY_EXEC #define __P110 PAGE_COPY_EXEC #define __P111 PAGE_COPY_EXEC @@ -103,7 +101,6 @@ extern void __pgd_error(const char *file, int line, unsigned long val); #define __S010 PAGE_SHARED #define __S011 PAGE_SHARED #define __S100 PAGE_READONLY_EXEC -#define __S100 PAGE_EXECONLY #define __S101 PAGE_READONLY_EXEC #define __S110 PAGE_SHARED_EXEC #define __S111 PAGE_SHARED_EXEC diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index 37d02a863c0e..639dca61ad7a 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -371,6 +371,9 @@ int kern_addr_valid(unsigned long addr) if (pmd_none(*pmd)) return 0; + if (pmd_sect(*pmd)) + return pfn_valid(pmd_pfn(*pmd)); + pte = pte_offset_kernel(pmd, addr); if (pte_none(*pte)) return 0; From 20f2b5cafcd9af698faa789daccc55c7062dcc68 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Fri, 7 Dec 2012 18:35:41 +0000 Subject: [PATCH 1063/1368] arm64: KVM: define HYP and Stage-2 translation page flags Add HYP and S2 page flags, for both normal and device memory. Reviewed-by: Christopher Covington Reviewed-by: Catalin Marinas Signed-off-by: Marc Zyngier (cherry picked from commit 363116073a26dbc2903d8417047597eebcc05273) Signed-off-by: Mark Brown Conflicts: arch/arm64/include/asm/pgtable-hwdef.h arch/arm64/include/asm/pgtable.h --- arch/arm64/include/asm/memory.h | 6 ++++++ arch/arm64/include/asm/pgtable-hwdef.h | 18 ++++++++++++++++++ arch/arm64/include/asm/pgtable.h | 6 ++++++ 3 files changed, 30 insertions(+) diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h index 212ded1662bf..8b656af942c6 100644 --- a/arch/arm64/include/asm/memory.h +++ b/arch/arm64/include/asm/memory.h @@ -95,6 +95,12 @@ #define MT_NORMAL_NC 3 #define MT_NORMAL 4 +/* + * Memory types for Stage-2 translation + */ +#define MT_S2_NORMAL 0xf +#define MT_S2_DEVICE_nGnRE 0x1 + #ifndef __ASSEMBLY__ extern phys_addr_t memstart_addr; diff --git a/arch/arm64/include/asm/pgtable-hwdef.h b/arch/arm64/include/asm/pgtable-hwdef.h index d25991747650..2e9d83673ef6 100644 --- a/arch/arm64/include/asm/pgtable-hwdef.h +++ b/arch/arm64/include/asm/pgtable-hwdef.h @@ -79,6 +79,24 @@ #define PTE_ATTRINDX(t) (_AT(pteval_t, (t)) << 2) #define PTE_ATTRINDX_MASK (_AT(pteval_t, 7) << 2) +/* + * 2nd stage PTE definitions + */ +#define PTE_S2_RDONLY (_AT(pteval_t, 1) << 6) /* HAP[2:1] */ +#define PTE_S2_RDWR (_AT(pteval_t, 3) << 6) /* HAP[2:1] */ + +/* + * Memory Attribute override for Stage-2 (MemAttr[3:0]) + */ +#define PTE_S2_MEMATTR(t) (_AT(pteval_t, (t)) << 2) +#define PTE_S2_MEMATTR_MASK (_AT(pteval_t, 0xf) << 2) + +/* + * EL2/HYP PTE/PMD definitions + */ +#define PMD_HYP PMD_SECT_USER +#define PTE_HYP PTE_USER + /* * 40-bit physical address supported. */ diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index d786b8940bbe..b2e7867e3290 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -297,6 +297,12 @@ extern pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn, #define pmd_bad(pmd) (!(pmd_val(pmd) & 2)) +#define pmd_table(pmd) ((pmd_val(pmd) & PMD_TYPE_MASK) == \ + PMD_TYPE_TABLE) +#define pmd_sect(pmd) ((pmd_val(pmd) & PMD_TYPE_MASK) == \ + PMD_TYPE_SECT) + + static inline void set_pmd(pmd_t *pmdp, pmd_t pmd) { *pmdp = pmd; From fb0399cdbf68d79fe31c32ac6169bff0f048d2e9 Mon Sep 17 00:00:00 2001 From: Santosh Shilimkar Date: Mon, 1 Jul 2013 14:20:35 -0400 Subject: [PATCH 1064/1368] of: Specify initrd location using 64-bit On some PAE architectures, the entire range of physical memory could reside outside the 32-bit limit. These systems need the ability to specify the initrd location using 64-bit numbers. This patch globally modifies the early_init_dt_setup_initrd_arch() function to use 64-bit numbers instead of the current unsigned long. There has been quite a bit of debate about whether to use u64 or phys_addr_t. It was concluded to stick to u64 to be consistent with rest of the device tree code. As summarized by Geert, "The address to load the initrd is decided by the bootloader/user and set at that point later in time. The dtb should not be tied to the kernel you are booting" More details on the discussion can be found here: https://lkml.org/lkml/2013/6/20/690 https://lkml.org/lkml/2012/9/13/544 Signed-off-by: Santosh Shilimkar Acked-by: Rob Herring Acked-by: Vineet Gupta Acked-by: Jean-Christophe PLAGNIOL-VILLARD Signed-off-by: Grant Likely (cherry picked from commit 374d5c9964c10373ba39bbe934f4262eb87d7114) Signed-off-by: Mark Brown --- arch/arc/mm/init.c | 5 ++--- arch/arm/mm/init.c | 2 +- arch/arm64/mm/init.c | 3 +-- arch/c6x/kernel/devicetree.c | 3 +-- arch/metag/mm/init.c | 5 ++--- arch/microblaze/kernel/prom.c | 3 +-- arch/mips/kernel/prom.c | 3 +-- arch/openrisc/kernel/prom.c | 3 +-- arch/powerpc/kernel/prom.c | 3 +-- arch/x86/kernel/devicetree.c | 3 +-- arch/xtensa/kernel/setup.c | 3 +-- drivers/of/fdt.c | 10 ++++++---- include/linux/of_fdt.h | 3 +-- 13 files changed, 20 insertions(+), 29 deletions(-) diff --git a/arch/arc/mm/init.c b/arch/arc/mm/init.c index 4a177365b2c4..7991e08d606b 100644 --- a/arch/arc/mm/init.c +++ b/arch/arc/mm/init.c @@ -157,9 +157,8 @@ void __init free_initrd_mem(unsigned long start, unsigned long end) #endif #ifdef CONFIG_OF_FLATTREE -void __init early_init_dt_setup_initrd_arch(unsigned long start, - unsigned long end) +void __init early_init_dt_setup_initrd_arch(u64 start, u64 end) { - pr_err("%s(%lx, %lx)\n", __func__, start, end); + pr_err("%s(%llx, %llx)\n", __func__, start, end); } #endif /* CONFIG_OF_FLATTREE */ diff --git a/arch/arm/mm/init.c b/arch/arm/mm/init.c index 9a5cdc01fcdf..afeaef7a8ffc 100644 --- a/arch/arm/mm/init.c +++ b/arch/arm/mm/init.c @@ -76,7 +76,7 @@ static int __init parse_tag_initrd2(const struct tag *tag) __tagtable(ATAG_INITRD2, parse_tag_initrd2); #ifdef CONFIG_OF_FLATTREE -void __init early_init_dt_setup_initrd_arch(unsigned long start, unsigned long end) +void __init early_init_dt_setup_initrd_arch(u64 start, u64 end) { phys_initrd_start = start; phys_initrd_size = end - start; diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index f497ca77925a..704770891d06 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -44,8 +44,7 @@ static unsigned long phys_initrd_size __initdata = 0; phys_addr_t memstart_addr __read_mostly = 0; -void __init early_init_dt_setup_initrd_arch(unsigned long start, - unsigned long end) +void __init early_init_dt_setup_initrd_arch(u64 start, u64 end) { phys_initrd_start = start; phys_initrd_size = end - start; diff --git a/arch/c6x/kernel/devicetree.c b/arch/c6x/kernel/devicetree.c index bdb56f09d0ac..287d0e64dfba 100644 --- a/arch/c6x/kernel/devicetree.c +++ b/arch/c6x/kernel/devicetree.c @@ -33,8 +33,7 @@ void __init early_init_devtree(void *params) #ifdef CONFIG_BLK_DEV_INITRD -void __init early_init_dt_setup_initrd_arch(unsigned long start, - unsigned long end) +void __init early_init_dt_setup_initrd_arch(u64 start, u64 end) { initrd_start = (unsigned long)__va(start); initrd_end = (unsigned long)__va(end); diff --git a/arch/metag/mm/init.c b/arch/metag/mm/init.c index d05b8455c44c..bdc48111f0df 100644 --- a/arch/metag/mm/init.c +++ b/arch/metag/mm/init.c @@ -419,10 +419,9 @@ void free_initrd_mem(unsigned long start, unsigned long end) #endif #ifdef CONFIG_OF_FLATTREE -void __init early_init_dt_setup_initrd_arch(unsigned long start, - unsigned long end) +void __init early_init_dt_setup_initrd_arch(u64 start, u64 end) { - pr_err("%s(%lx, %lx)\n", + pr_err("%s(%llx, %llx)\n", __func__, start, end); } #endif /* CONFIG_OF_FLATTREE */ diff --git a/arch/microblaze/kernel/prom.c b/arch/microblaze/kernel/prom.c index 0a2c68f9f9b0..62e2e8f2c5d6 100644 --- a/arch/microblaze/kernel/prom.c +++ b/arch/microblaze/kernel/prom.c @@ -136,8 +136,7 @@ void __init early_init_devtree(void *params) } #ifdef CONFIG_BLK_DEV_INITRD -void __init early_init_dt_setup_initrd_arch(unsigned long start, - unsigned long end) +void __init early_init_dt_setup_initrd_arch(u64 start, u64 end) { initrd_start = (unsigned long)__va(start); initrd_end = (unsigned long)__va(end); diff --git a/arch/mips/kernel/prom.c b/arch/mips/kernel/prom.c index 5712bb532245..32b87882ac87 100644 --- a/arch/mips/kernel/prom.c +++ b/arch/mips/kernel/prom.c @@ -58,8 +58,7 @@ void * __init early_init_dt_alloc_memory_arch(u64 size, u64 align) } #ifdef CONFIG_BLK_DEV_INITRD -void __init early_init_dt_setup_initrd_arch(unsigned long start, - unsigned long end) +void __init early_init_dt_setup_initrd_arch(u64 start, u64 end) { initrd_start = (unsigned long)__va(start); initrd_end = (unsigned long)__va(end); diff --git a/arch/openrisc/kernel/prom.c b/arch/openrisc/kernel/prom.c index 5869e3fa5dd3..150215a91711 100644 --- a/arch/openrisc/kernel/prom.c +++ b/arch/openrisc/kernel/prom.c @@ -96,8 +96,7 @@ void __init early_init_devtree(void *params) } #ifdef CONFIG_BLK_DEV_INITRD -void __init early_init_dt_setup_initrd_arch(unsigned long start, - unsigned long end) +void __init early_init_dt_setup_initrd_arch(u64 start, u64 end) { initrd_start = (unsigned long)__va(start); initrd_end = (unsigned long)__va(end); diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c index 8b6f7a99cce2..2f3e25225158 100644 --- a/arch/powerpc/kernel/prom.c +++ b/arch/powerpc/kernel/prom.c @@ -550,8 +550,7 @@ void * __init early_init_dt_alloc_memory_arch(u64 size, u64 align) } #ifdef CONFIG_BLK_DEV_INITRD -void __init early_init_dt_setup_initrd_arch(unsigned long start, - unsigned long end) +void __init early_init_dt_setup_initrd_arch(u64 start, u64 end) { initrd_start = (unsigned long)__va(start); initrd_end = (unsigned long)__va(end); diff --git a/arch/x86/kernel/devicetree.c b/arch/x86/kernel/devicetree.c index b1581527a236..2fbad6b9f23c 100644 --- a/arch/x86/kernel/devicetree.c +++ b/arch/x86/kernel/devicetree.c @@ -52,8 +52,7 @@ void * __init early_init_dt_alloc_memory_arch(u64 size, u64 align) } #ifdef CONFIG_BLK_DEV_INITRD -void __init early_init_dt_setup_initrd_arch(unsigned long start, - unsigned long end) +void __init early_init_dt_setup_initrd_arch(u64 start, u64 end) { initrd_start = (unsigned long)__va(start); initrd_end = (unsigned long)__va(end); diff --git a/arch/xtensa/kernel/setup.c b/arch/xtensa/kernel/setup.c index 6dd25ecde3f5..d45e602f4328 100644 --- a/arch/xtensa/kernel/setup.c +++ b/arch/xtensa/kernel/setup.c @@ -170,8 +170,7 @@ static int __init parse_tag_fdt(const bp_tag_t *tag) __tagtable(BP_TAG_FDT, parse_tag_fdt); -void __init early_init_dt_setup_initrd_arch(unsigned long start, - unsigned long end) +void __init early_init_dt_setup_initrd_arch(u64 start, u64 end) { initrd_start = (void *)__va(start); initrd_end = (void *)__va(end); diff --git a/drivers/of/fdt.c b/drivers/of/fdt.c index 808be06bb67e..21123b8ee4d4 100644 --- a/drivers/of/fdt.c +++ b/drivers/of/fdt.c @@ -550,7 +550,8 @@ int __init of_flat_dt_match(unsigned long node, const char *const *compat) */ void __init early_init_dt_check_for_initrd(unsigned long node) { - unsigned long start, end, len; + u64 start, end; + unsigned long len; __be32 *prop; pr_debug("Looking for initrd properties... "); @@ -558,15 +559,16 @@ void __init early_init_dt_check_for_initrd(unsigned long node) prop = of_get_flat_dt_prop(node, "linux,initrd-start", &len); if (!prop) return; - start = of_read_ulong(prop, len/4); + start = of_read_number(prop, len/4); prop = of_get_flat_dt_prop(node, "linux,initrd-end", &len); if (!prop) return; - end = of_read_ulong(prop, len/4); + end = of_read_number(prop, len/4); early_init_dt_setup_initrd_arch(start, end); - pr_debug("initrd_start=0x%lx initrd_end=0x%lx\n", start, end); + pr_debug("initrd_start=0x%llx initrd_end=0x%llx\n", + (unsigned long long)start, (unsigned long long)end); } #else inline void early_init_dt_check_for_initrd(unsigned long node) diff --git a/include/linux/of_fdt.h b/include/linux/of_fdt.h index ed136ad698ce..4a17939b95cc 100644 --- a/include/linux/of_fdt.h +++ b/include/linux/of_fdt.h @@ -106,8 +106,7 @@ extern u64 dt_mem_next_cell(int s, __be32 **cellp); * physical addresses. */ #ifdef CONFIG_BLK_DEV_INITRD -extern void early_init_dt_setup_initrd_arch(unsigned long start, - unsigned long end); +extern void early_init_dt_setup_initrd_arch(u64 start, u64 end); #endif /* Early flat tree scan hooks */ From 88a8104df9b0f987f1a2995e1ae108662689ba61 Mon Sep 17 00:00:00 2001 From: Sascha Hauer Date: Mon, 5 Aug 2013 14:40:44 +0200 Subject: [PATCH 1065/1368] OF: Add helper for matching against linux,stdout-path devicetrees may have a linux,stdout-path property in the chosen node describing the console device. This adds a helper function to match a device against this property so a driver can call add_preferred_console for a matching device. Signed-off-by: Sascha Hauer Acked-by: Rob Herring Signed-off-by: Greg Kroah-Hartman (cherry picked from commit 5c19e95216b93b0d29c6a4887e69a980edc6fc81) Signed-off-by: Mark Brown --- drivers/of/base.c | 26 ++++++++++++++++++++++++++ include/linux/of.h | 7 +++++++ 2 files changed, 33 insertions(+) diff --git a/drivers/of/base.c b/drivers/of/base.c index a6f584a7f4a1..ffb84d506030 100644 --- a/drivers/of/base.c +++ b/drivers/of/base.c @@ -32,6 +32,7 @@ struct device_node *of_allnodes; EXPORT_SYMBOL(of_allnodes); struct device_node *of_chosen; struct device_node *of_aliases; +static struct device_node *of_stdout; DEFINE_MUTEX(of_aliases_mutex); @@ -1595,6 +1596,15 @@ void of_alias_scan(void * (*dt_alloc)(u64 size, u64 align)) of_chosen = of_find_node_by_path("/chosen"); if (of_chosen == NULL) of_chosen = of_find_node_by_path("/chosen@0"); + + if (of_chosen) { + const char *name; + + name = of_get_property(of_chosen, "linux,stdout-path", NULL); + if (name) + of_stdout = of_find_node_by_path(name); + } + of_aliases = of_find_node_by_path("/aliases"); if (!of_aliases) return; @@ -1703,3 +1713,19 @@ const char *of_prop_next_string(struct property *prop, const char *cur) return curv; } EXPORT_SYMBOL_GPL(of_prop_next_string); + +/** + * of_device_is_stdout_path - check if a device node matches the + * linux,stdout-path property + * + * Check if this device node matches the linux,stdout-path property + * in the chosen node. return true if yes, false otherwise. + */ +int of_device_is_stdout_path(struct device_node *dn) +{ + if (!of_stdout) + return false; + + return of_stdout == dn; +} +EXPORT_SYMBOL_GPL(of_device_is_stdout_path); diff --git a/include/linux/of.h b/include/linux/of.h index 1fd08ca23106..429e16801858 100644 --- a/include/linux/of.h +++ b/include/linux/of.h @@ -343,6 +343,8 @@ const char *of_prop_next_string(struct property *prop, const char *cur); s; \ s = of_prop_next_string(prop, s)) +int of_device_is_stdout_path(struct device_node *dn); + #else /* CONFIG_OF */ static inline const char* of_node_full_name(struct device_node *np) @@ -505,6 +507,11 @@ static inline int of_machine_is_compatible(const char *compat) return 0; } +static inline int of_device_is_stdout_path(struct device_node *dn) +{ + return 0; +} + #define of_match_ptr(_ptr) NULL #define of_match_node(_matches, _node) NULL #define of_property_for_each_u32(np, propname, prop, p, u) \ From 3e1298883243f952731db48d9642ddb75dc003c8 Mon Sep 17 00:00:00 2001 From: Marek Szyprowski Date: Mon, 26 Aug 2013 14:41:56 +0200 Subject: [PATCH 1066/1368] drivers: of: add function to scan fdt nodes given by path Add a function to scan the flattened device-tree starting from the node given by the path. It is used to extract information (like reserved memory), which is required on early boot before we can unflatten the tree. Signed-off-by: Marek Szyprowski Acked-by: Michal Nazarewicz Acked-by: Tomasz Figa Reviewed-by: Rob Herring (cherry picked from commit 57d74bcf3072b65bde5aa540cedc976a75c48e5c) Signed-off-by: Mark Brown --- drivers/of/fdt.c | 76 ++++++++++++++++++++++++++++++++++++++++++ include/linux/of_fdt.h | 3 ++ 2 files changed, 79 insertions(+) diff --git a/drivers/of/fdt.c b/drivers/of/fdt.c index 21123b8ee4d4..e7ae94a7c42f 100644 --- a/drivers/of/fdt.c +++ b/drivers/of/fdt.c @@ -543,6 +543,82 @@ int __init of_flat_dt_match(unsigned long node, const char *const *compat) return of_fdt_match(initial_boot_params, node, compat); } +struct fdt_scan_status { + const char *name; + int namelen; + int depth; + int found; + int (*iterator)(unsigned long node, const char *uname, int depth, void *data); + void *data; +}; + +/** + * fdt_scan_node_by_path - iterator for of_scan_flat_dt_by_path function + */ +static int __init fdt_scan_node_by_path(unsigned long node, const char *uname, + int depth, void *data) +{ + struct fdt_scan_status *st = data; + + /* + * if scan at the requested fdt node has been completed, + * return -ENXIO to abort further scanning + */ + if (depth <= st->depth) + return -ENXIO; + + /* requested fdt node has been found, so call iterator function */ + if (st->found) + return st->iterator(node, uname, depth, st->data); + + /* check if scanning automata is entering next level of fdt nodes */ + if (depth == st->depth + 1 && + strncmp(st->name, uname, st->namelen) == 0 && + uname[st->namelen] == 0) { + st->depth += 1; + if (st->name[st->namelen] == 0) { + st->found = 1; + } else { + const char *next = st->name + st->namelen + 1; + st->name = next; + st->namelen = strcspn(next, "/"); + } + return 0; + } + + /* scan next fdt node */ + return 0; +} + +/** + * of_scan_flat_dt_by_path - scan flattened tree blob and call callback on each + * child of the given path. + * @path: path to start searching for children + * @it: callback function + * @data: context data pointer + * + * This function is used to scan the flattened device-tree starting from the + * node given by path. It is used to extract information (like reserved + * memory), which is required on ealy boot before we can unflatten the tree. + */ +int __init of_scan_flat_dt_by_path(const char *path, + int (*it)(unsigned long node, const char *name, int depth, void *data), + void *data) +{ + struct fdt_scan_status st = {path, 0, -1, 0, it, data}; + int ret = 0; + + if (initial_boot_params) + ret = of_scan_flat_dt(fdt_scan_node_by_path, &st); + + if (!st.found) + return -ENOENT; + else if (ret == -ENXIO) /* scan has been completed */ + return 0; + else + return ret; +} + #ifdef CONFIG_BLK_DEV_INITRD /** * early_init_dt_check_for_initrd - Decode initrd location from flat tree diff --git a/include/linux/of_fdt.h b/include/linux/of_fdt.h index 4a17939b95cc..a478c62a2aab 100644 --- a/include/linux/of_fdt.h +++ b/include/linux/of_fdt.h @@ -90,6 +90,9 @@ extern void *of_get_flat_dt_prop(unsigned long node, const char *name, extern int of_flat_dt_is_compatible(unsigned long node, const char *name); extern int of_flat_dt_match(unsigned long node, const char *const *matches); extern unsigned long of_get_flat_dt_root(void); +extern int of_scan_flat_dt_by_path(const char *path, + int (*it)(unsigned long node, const char *name, int depth, void *data), + void *data); extern int early_init_dt_scan_chosen(unsigned long node, const char *uname, int depth, void *data); From 73eebf3ecfd418b26f0d3536cbdecb41717dfe31 Mon Sep 17 00:00:00 2001 From: Marek Szyprowski Date: Fri, 28 Feb 2014 14:42:47 +0100 Subject: [PATCH 1067/1368] drivers: of: add initialization code for static reserved memory This patch adds support for static (defined by 'reg' property) reserved memory regions declared in device tree. Memory blocks can be reliably reserved only during early boot. This must happen before the whole memory management subsystem is initialized, because we need to ensure that the given contiguous blocks are not yet allocated by kernel. Also it must happen before kernel mappings for the whole low memory are created, to ensure that there will be no mappings (for reserved blocks). Typically, all this happens before device tree structures are unflattened, so we need to get reserved memory layout directly from fdt. Based on previous code provided by Josh Cartwright Signed-off-by: Marek Szyprowski Signed-off-by: Grant Likely (cherry picked from commit e8d9d1f5485b52ec3c4d7af839e6914438f6c285) Signed-off-by: Mark Brown Conflicts: drivers/of/fdt.c include/linux/of_fdt.h --- drivers/of/fdt.c | 188 +++++++++++++++++++++++++++++++++++++++++ include/linux/of_fdt.h | 5 ++ 2 files changed, 193 insertions(+) diff --git a/drivers/of/fdt.c b/drivers/of/fdt.c index e7ae94a7c42f..79ab36714580 100644 --- a/drivers/of/fdt.c +++ b/drivers/of/fdt.c @@ -14,9 +14,11 @@ #include #include #include +#include #include #include #include +#include #include /* for COMMAND_LINE_SIZE */ #ifdef CONFIG_PPC @@ -440,6 +442,118 @@ struct boot_param_header *initial_boot_params; #ifdef CONFIG_OF_EARLY_FLATTREE +/** + * res_mem_reserve_reg() - reserve all memory described in 'reg' property + */ +static int __init __reserved_mem_reserve_reg(unsigned long node, + const char *uname) +{ + int t_len = (dt_root_addr_cells + dt_root_size_cells) * sizeof(__be32); + phys_addr_t base, size; + unsigned long len; + __be32 *prop; + int nomap; + + prop = of_get_flat_dt_prop(node, "reg", &len); + if (!prop) + return -ENOENT; + + if (len && len % t_len != 0) { + pr_err("Reserved memory: invalid reg property in '%s', skipping node.\n", + uname); + return -EINVAL; + } + + nomap = of_get_flat_dt_prop(node, "no-map", NULL) != NULL; + + while (len >= t_len) { + base = dt_mem_next_cell(dt_root_addr_cells, &prop); + size = dt_mem_next_cell(dt_root_size_cells, &prop); + + if (base && size && + early_init_dt_reserve_memory_arch(base, size, nomap) == 0) + pr_debug("Reserved memory: reserved region for node '%s': base %pa, size %ld MiB\n", + uname, &base, (unsigned long)size / SZ_1M); + else + pr_info("Reserved memory: failed to reserve memory for node '%s': base %pa, size %ld MiB\n", + uname, &base, (unsigned long)size / SZ_1M); + + len -= t_len; + } + return 0; +} + +/** + * __reserved_mem_check_root() - check if #size-cells, #address-cells provided + * in /reserved-memory matches the values supported by the current implementation, + * also check if ranges property has been provided + */ +static int __reserved_mem_check_root(unsigned long node) +{ + __be32 *prop; + + prop = of_get_flat_dt_prop(node, "#size-cells", NULL); + if (!prop || be32_to_cpup(prop) != dt_root_size_cells) + return -EINVAL; + + prop = of_get_flat_dt_prop(node, "#address-cells", NULL); + if (!prop || be32_to_cpup(prop) != dt_root_addr_cells) + return -EINVAL; + + prop = of_get_flat_dt_prop(node, "ranges", NULL); + if (!prop) + return -EINVAL; + return 0; +} + +/** + * fdt_scan_reserved_mem() - scan a single FDT node for reserved memory + */ +static int __init __fdt_scan_reserved_mem(unsigned long node, const char *uname, + int depth, void *data) +{ + static int found; + const char *status; + + if (!found && depth == 1 && strcmp(uname, "reserved-memory") == 0) { + if (__reserved_mem_check_root(node) != 0) { + pr_err("Reserved memory: unsupported node format, ignoring\n"); + /* break scan */ + return 1; + } + found = 1; + /* scan next node */ + return 0; + } else if (!found) { + /* scan next node */ + return 0; + } else if (found && depth < 2) { + /* scanning of /reserved-memory has been finished */ + return 1; + } + + status = of_get_flat_dt_prop(node, "status", NULL); + if (status && strcmp(status, "okay") != 0 && strcmp(status, "ok") != 0) + return 0; + + __reserved_mem_reserve_reg(node, uname); + + /* scan next node */ + return 0; +} + +/** + * early_init_fdt_scan_reserved_mem() - create reserved memory regions + * + * This function grabs memory from early allocator for device exclusive use + * defined in device tree structures. It should be called by arch specific code + * once the early allocator (i.e. memblock) has been fully activated. + */ +void __init early_init_fdt_scan_reserved_mem(void) +{ + of_scan_flat_dt(__fdt_scan_reserved_mem, NULL); +} + /** * of_scan_flat_dt - scan flattened tree blob and call callback on each. * @it: callback function @@ -774,6 +888,80 @@ int __init early_init_dt_scan_chosen(unsigned long node, const char *uname, return 1; } +#ifdef CONFIG_HAVE_MEMBLOCK +void __init __weak early_init_dt_add_memory_arch(u64 base, u64 size) +{ + const u64 phys_offset = __pa(PAGE_OFFSET); + base &= PAGE_MASK; + size &= PAGE_MASK; + if (base + size < phys_offset) { + pr_warning("Ignoring memory block 0x%llx - 0x%llx\n", + base, base + size); + return; + } + if (base < phys_offset) { + pr_warning("Ignoring memory range 0x%llx - 0x%llx\n", + base, phys_offset); + size -= phys_offset - base; + base = phys_offset; + } + memblock_add(base, size); +} + +int __init __weak early_init_dt_reserve_memory_arch(phys_addr_t base, + phys_addr_t size, bool nomap) +{ + if (memblock_is_region_reserved(base, size)) + return -EBUSY; + if (nomap) + return memblock_remove(base, size); + return memblock_reserve(base, size); +} + +/* + * called from unflatten_device_tree() to bootstrap devicetree itself + * Architectures can override this definition if memblock isn't used + */ +void * __init __weak early_init_dt_alloc_memory_arch(u64 size, u64 align) +{ + return __va(memblock_alloc(size, align)); +} +#else +int __init __weak early_init_dt_reserve_memory_arch(phys_addr_t base, + phys_addr_t size, bool nomap) +{ + pr_err("Reserved memory not supported, ignoring range 0x%llx - 0x%llx%s\n", + base, size, nomap ? " (nomap)" : ""); + return -ENOSYS; +} +#endif + +bool __init early_init_dt_scan(void *params) +{ + if (!params) + return false; + + /* Setup flat device-tree pointer */ + initial_boot_params = params; + + /* check device tree validity */ + if (be32_to_cpu(initial_boot_params->magic) != OF_DT_HEADER) { + initial_boot_params = NULL; + return false; + } + + /* Retrieve various information from the /chosen node */ + of_scan_flat_dt(early_init_dt_scan_chosen, boot_command_line); + + /* Initialize {size,address}-cells info */ + of_scan_flat_dt(early_init_dt_scan_root, NULL); + + /* Setup memory, calling early_init_dt_add_memory_arch */ + of_scan_flat_dt(early_init_dt_scan_memory, NULL); + + return true; +} + /** * unflatten_device_tree - create tree of device_nodes from flat blob * diff --git a/include/linux/of_fdt.h b/include/linux/of_fdt.h index a478c62a2aab..a792cb3d5ec8 100644 --- a/include/linux/of_fdt.h +++ b/include/linux/of_fdt.h @@ -99,7 +99,10 @@ extern int early_init_dt_scan_chosen(unsigned long node, const char *uname, extern void early_init_dt_check_for_initrd(unsigned long node); extern int early_init_dt_scan_memory(unsigned long node, const char *uname, int depth, void *data); +extern void early_init_fdt_scan_reserved_mem(void); extern void early_init_dt_add_memory_arch(u64 base, u64 size); +extern int early_init_dt_reserve_memory_arch(phys_addr_t base, phys_addr_t size, + bool no_map); extern void * early_init_dt_alloc_memory_arch(u64 size, u64 align); extern u64 dt_mem_next_cell(int s, __be32 **cellp); @@ -120,6 +123,8 @@ extern int early_init_dt_scan_root(unsigned long node, const char *uname, extern void unflatten_device_tree(void); extern void early_init_devtree(void *); #else /* CONFIG_OF_FLATTREE */ +static inline void early_init_fdt_scan_reserved_mem(void) {} +static inline const char *of_flat_dt_get_machine_name(void) { return NULL; } static inline void unflatten_device_tree(void) {} #endif /* CONFIG_OF_FLATTREE */ From 5b8f828963db94fb7ae67951060fc7fc13516902 Mon Sep 17 00:00:00 2001 From: Marek Szyprowski Date: Fri, 28 Feb 2014 14:42:48 +0100 Subject: [PATCH 1068/1368] drivers: of: add initialization code for dynamic reserved memory This patch adds support for dynamically allocated reserved memory regions declared in device tree. Such regions are defined by 'size', 'alignment' and 'alloc-ranges' properties. Based on previous code provided by Josh Cartwright Signed-off-by: Marek Szyprowski Signed-off-by: Grant Likely (cherry picked from commit 3f0c8206644836e4f10a6b9fc47cda6a9a372f9b) Signed-off-by: Mark Brown --- drivers/of/Kconfig | 6 + drivers/of/Makefile | 1 + drivers/of/fdt.c | 13 ++- drivers/of/of_reserved_mem.c | 188 ++++++++++++++++++++++++++++++++ include/linux/of_reserved_mem.h | 21 ++++ 5 files changed, 227 insertions(+), 2 deletions(-) create mode 100644 drivers/of/of_reserved_mem.c create mode 100644 include/linux/of_reserved_mem.h diff --git a/drivers/of/Kconfig b/drivers/of/Kconfig index d37bfcf5a3a2..bdc7f9a0c500 100644 --- a/drivers/of/Kconfig +++ b/drivers/of/Kconfig @@ -83,4 +83,10 @@ config OF_MTD depends on MTD def_bool y +config OF_RESERVED_MEM + depends on OF_EARLY_FLATTREE + bool + help + Helpers to allow for reservation of memory regions + endmenu # OF diff --git a/drivers/of/Makefile b/drivers/of/Makefile index e027f444d10c..8098b4dd357f 100644 --- a/drivers/of/Makefile +++ b/drivers/of/Makefile @@ -11,3 +11,4 @@ obj-$(CONFIG_OF_MDIO) += of_mdio.o obj-$(CONFIG_OF_PCI) += of_pci.o obj-$(CONFIG_OF_PCI_IRQ) += of_pci_irq.o obj-$(CONFIG_OF_MTD) += of_mtd.o +obj-$(CONFIG_OF_RESERVED_MEM) += of_reserved_mem.o diff --git a/drivers/of/fdt.c b/drivers/of/fdt.c index 79ab36714580..b6e4d7fcd316 100644 --- a/drivers/of/fdt.c +++ b/drivers/of/fdt.c @@ -14,6 +14,7 @@ #include #include #include +#include #include #include #include @@ -452,7 +453,7 @@ static int __init __reserved_mem_reserve_reg(unsigned long node, phys_addr_t base, size; unsigned long len; __be32 *prop; - int nomap; + int nomap, first = 1; prop = of_get_flat_dt_prop(node, "reg", &len); if (!prop) @@ -479,6 +480,10 @@ static int __init __reserved_mem_reserve_reg(unsigned long node, uname, &base, (unsigned long)size / SZ_1M); len -= t_len; + if (first) { + fdt_reserved_mem_save_node(node, uname, base, size); + first = 0; + } } return 0; } @@ -514,6 +519,7 @@ static int __init __fdt_scan_reserved_mem(unsigned long node, const char *uname, { static int found; const char *status; + int err; if (!found && depth == 1 && strcmp(uname, "reserved-memory") == 0) { if (__reserved_mem_check_root(node) != 0) { @@ -536,7 +542,9 @@ static int __init __fdt_scan_reserved_mem(unsigned long node, const char *uname, if (status && strcmp(status, "okay") != 0 && strcmp(status, "ok") != 0) return 0; - __reserved_mem_reserve_reg(node, uname); + err = __reserved_mem_reserve_reg(node, uname); + if (err == -ENOENT && of_get_flat_dt_prop(node, "size", NULL)) + fdt_reserved_mem_save_node(node, uname, 0, 0); /* scan next node */ return 0; @@ -552,6 +560,7 @@ static int __init __fdt_scan_reserved_mem(unsigned long node, const char *uname, void __init early_init_fdt_scan_reserved_mem(void) { of_scan_flat_dt(__fdt_scan_reserved_mem, NULL); + fdt_init_reserved_mem(); } /** diff --git a/drivers/of/of_reserved_mem.c b/drivers/of/of_reserved_mem.c new file mode 100644 index 000000000000..69b811779585 --- /dev/null +++ b/drivers/of/of_reserved_mem.c @@ -0,0 +1,188 @@ +/* + * Device tree based initialization code for reserved memory. + * + * Copyright (c) 2013, The Linux Foundation. All Rights Reserved. + * Copyright (c) 2013,2014 Samsung Electronics Co., Ltd. + * http://www.samsung.com + * Author: Marek Szyprowski + * Author: Josh Cartwright + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License or (at your optional) any later version of the license. + */ + +#include +#include +#include +#include +#include +#include +#include + +#define MAX_RESERVED_REGIONS 16 +static struct reserved_mem reserved_mem[MAX_RESERVED_REGIONS]; +static int reserved_mem_count; + +#if defined(CONFIG_HAVE_MEMBLOCK) +#include +int __init __weak early_init_dt_alloc_reserved_memory_arch(phys_addr_t size, + phys_addr_t align, phys_addr_t start, phys_addr_t end, bool nomap, + phys_addr_t *res_base) +{ + /* + * We use __memblock_alloc_base() because memblock_alloc_base() + * panic()s on allocation failure. + */ + phys_addr_t base = __memblock_alloc_base(size, align, end); + if (!base) + return -ENOMEM; + + /* + * Check if the allocated region fits in to start..end window + */ + if (base < start) { + memblock_free(base, size); + return -ENOMEM; + } + + *res_base = base; + if (nomap) + return memblock_remove(base, size); + return 0; +} +#else +int __init __weak early_init_dt_alloc_reserved_memory_arch(phys_addr_t size, + phys_addr_t align, phys_addr_t start, phys_addr_t end, bool nomap, + phys_addr_t *res_base) +{ + pr_err("Reserved memory not supported, ignoring region 0x%llx%s\n", + size, nomap ? " (nomap)" : ""); + return -ENOSYS; +} +#endif + +/** + * res_mem_save_node() - save fdt node for second pass initialization + */ +void __init fdt_reserved_mem_save_node(unsigned long node, const char *uname, + phys_addr_t base, phys_addr_t size) +{ + struct reserved_mem *rmem = &reserved_mem[reserved_mem_count]; + + if (reserved_mem_count == ARRAY_SIZE(reserved_mem)) { + pr_err("Reserved memory: not enough space all defined regions.\n"); + return; + } + + rmem->fdt_node = node; + rmem->name = uname; + rmem->base = base; + rmem->size = size; + + reserved_mem_count++; + return; +} + +/** + * res_mem_alloc_size() - allocate reserved memory described by 'size', 'align' + * and 'alloc-ranges' properties + */ +static int __init __reserved_mem_alloc_size(unsigned long node, + const char *uname, phys_addr_t *res_base, phys_addr_t *res_size) +{ + int t_len = (dt_root_addr_cells + dt_root_size_cells) * sizeof(__be32); + phys_addr_t start = 0, end = 0; + phys_addr_t base = 0, align = 0, size; + unsigned long len; + __be32 *prop; + int nomap; + int ret; + + prop = of_get_flat_dt_prop(node, "size", &len); + if (!prop) + return -EINVAL; + + if (len != dt_root_size_cells * sizeof(__be32)) { + pr_err("Reserved memory: invalid size property in '%s' node.\n", + uname); + return -EINVAL; + } + size = dt_mem_next_cell(dt_root_size_cells, &prop); + + nomap = of_get_flat_dt_prop(node, "no-map", NULL) != NULL; + + prop = of_get_flat_dt_prop(node, "alignment", &len); + if (prop) { + if (len != dt_root_addr_cells * sizeof(__be32)) { + pr_err("Reserved memory: invalid alignment property in '%s' node.\n", + uname); + return -EINVAL; + } + align = dt_mem_next_cell(dt_root_addr_cells, &prop); + } + + prop = of_get_flat_dt_prop(node, "alloc-ranges", &len); + if (prop) { + + if (len % t_len != 0) { + pr_err("Reserved memory: invalid alloc-ranges property in '%s', skipping node.\n", + uname); + return -EINVAL; + } + + base = 0; + + while (len > 0) { + start = dt_mem_next_cell(dt_root_addr_cells, &prop); + end = start + dt_mem_next_cell(dt_root_size_cells, + &prop); + + ret = early_init_dt_alloc_reserved_memory_arch(size, + align, start, end, nomap, &base); + if (ret == 0) { + pr_debug("Reserved memory: allocated memory for '%s' node: base %pa, size %ld MiB\n", + uname, &base, + (unsigned long)size / SZ_1M); + break; + } + len -= t_len; + } + + } else { + ret = early_init_dt_alloc_reserved_memory_arch(size, align, + 0, 0, nomap, &base); + if (ret == 0) + pr_debug("Reserved memory: allocated memory for '%s' node: base %pa, size %ld MiB\n", + uname, &base, (unsigned long)size / SZ_1M); + } + + if (base == 0) { + pr_info("Reserved memory: failed to allocate memory for node '%s'\n", + uname); + return -ENOMEM; + } + + *res_base = base; + *res_size = size; + + return 0; +} + +/** + * fdt_init_reserved_mem - allocate and init all saved reserved memory regions + */ +void __init fdt_init_reserved_mem(void) +{ + int i; + for (i = 0; i < reserved_mem_count; i++) { + struct reserved_mem *rmem = &reserved_mem[i]; + unsigned long node = rmem->fdt_node; + int err = 0; + + if (rmem->size == 0) + err = __reserved_mem_alloc_size(node, rmem->name, + &rmem->base, &rmem->size); + } +} diff --git a/include/linux/of_reserved_mem.h b/include/linux/of_reserved_mem.h new file mode 100644 index 000000000000..89226ed7d954 --- /dev/null +++ b/include/linux/of_reserved_mem.h @@ -0,0 +1,21 @@ +#ifndef __OF_RESERVED_MEM_H +#define __OF_RESERVED_MEM_H + +struct reserved_mem { + const char *name; + unsigned long fdt_node; + phys_addr_t base; + phys_addr_t size; +}; + +#ifdef CONFIG_OF_RESERVED_MEM +void fdt_init_reserved_mem(void); +void fdt_reserved_mem_save_node(unsigned long node, const char *uname, + phys_addr_t base, phys_addr_t size); +#else +static inline void fdt_init_reserved_mem(void) { } +static inline void fdt_reserved_mem_save_node(unsigned long node, + const char *uname, phys_addr_t base, phys_addr_t size) { } +#endif + +#endif /* __OF_RESERVED_MEM_H */ From 7c828dcc5ce4d19c7863ec204cac0dea341e347f Mon Sep 17 00:00:00 2001 From: Marek Szyprowski Date: Fri, 28 Feb 2014 14:42:49 +0100 Subject: [PATCH 1069/1368] drivers: of: add support for custom reserved memory drivers Add support for custom reserved memory drivers. Call their init() function for each reserved region and prepare for using operations provided by them with by the reserved_mem->ops array. Based on previous code provided by Josh Cartwright Signed-off-by: Marek Szyprowski Signed-off-by: Grant Likely (cherry picked from commit f618c4703a14672d27bc2ca5d132a844363d6f5f) Signed-off-by: Mark Brown --- drivers/of/of_reserved_mem.c | 29 ++++++++++++++++++++++++++++ include/asm-generic/vmlinux.lds.h | 11 +++++++++++ include/linux/of_reserved_mem.h | 32 +++++++++++++++++++++++++++++++ 3 files changed, 72 insertions(+) diff --git a/drivers/of/of_reserved_mem.c b/drivers/of/of_reserved_mem.c index 69b811779585..daaaf935911d 100644 --- a/drivers/of/of_reserved_mem.c +++ b/drivers/of/of_reserved_mem.c @@ -170,6 +170,33 @@ static int __init __reserved_mem_alloc_size(unsigned long node, return 0; } +static const struct of_device_id __rmem_of_table_sentinel + __used __section(__reservedmem_of_table_end); + +/** + * res_mem_init_node() - call region specific reserved memory init code + */ +static int __init __reserved_mem_init_node(struct reserved_mem *rmem) +{ + extern const struct of_device_id __reservedmem_of_table[]; + const struct of_device_id *i; + + for (i = __reservedmem_of_table; i < &__rmem_of_table_sentinel; i++) { + reservedmem_of_init_fn initfn = i->data; + const char *compat = i->compatible; + + if (!of_flat_dt_is_compatible(rmem->fdt_node, compat)) + continue; + + if (initfn(rmem, rmem->fdt_node, rmem->name) == 0) { + pr_info("Reserved memory: initialized node %s, compatible id %s\n", + rmem->name, compat); + return 0; + } + } + return -ENOENT; +} + /** * fdt_init_reserved_mem - allocate and init all saved reserved memory regions */ @@ -184,5 +211,7 @@ void __init fdt_init_reserved_mem(void) if (rmem->size == 0) err = __reserved_mem_alloc_size(node, rmem->name, &rmem->base, &rmem->size); + if (err == 0) + __reserved_mem_init_node(rmem); } } diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index eb58d2d7d971..7414dd5132e5 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -171,6 +171,16 @@ #define CLK_OF_TABLES() #endif +#ifdef CONFIG_OF_RESERVED_MEM +#define RESERVEDMEM_OF_TABLES() \ + . = ALIGN(8); \ + VMLINUX_SYMBOL(__reservedmem_of_table) = .; \ + *(__reservedmem_of_table) \ + *(__reservedmem_of_table_end) +#else +#define RESERVEDMEM_OF_TABLES() +#endif + #define KERNEL_DTB() \ STRUCT_ALIGN(); \ VMLINUX_SYMBOL(__dtb_start) = .; \ @@ -515,6 +525,7 @@ CPU_DISCARD(init.rodata) \ MEM_DISCARD(init.rodata) \ CLK_OF_TABLES() \ + RESERVEDMEM_OF_TABLES() \ CLKSRC_OF_TABLES() \ KERNEL_DTB() \ IRQCHIP_OF_MATCH_TABLE() diff --git a/include/linux/of_reserved_mem.h b/include/linux/of_reserved_mem.h index 89226ed7d954..9b1fbb7f29fc 100644 --- a/include/linux/of_reserved_mem.h +++ b/include/linux/of_reserved_mem.h @@ -1,21 +1,53 @@ #ifndef __OF_RESERVED_MEM_H #define __OF_RESERVED_MEM_H +struct device; +struct of_phandle_args; +struct reserved_mem_ops; + struct reserved_mem { const char *name; unsigned long fdt_node; + const struct reserved_mem_ops *ops; phys_addr_t base; phys_addr_t size; + void *priv; }; +struct reserved_mem_ops { + void (*device_init)(struct reserved_mem *rmem, + struct device *dev); + void (*device_release)(struct reserved_mem *rmem, + struct device *dev); +}; + +typedef int (*reservedmem_of_init_fn)(struct reserved_mem *rmem, + unsigned long node, const char *uname); + #ifdef CONFIG_OF_RESERVED_MEM void fdt_init_reserved_mem(void); void fdt_reserved_mem_save_node(unsigned long node, const char *uname, phys_addr_t base, phys_addr_t size); + +#define RESERVEDMEM_OF_DECLARE(name, compat, init) \ + static const struct of_device_id __reservedmem_of_table_##name \ + __used __section(__reservedmem_of_table) \ + = { .compatible = compat, \ + .data = (init == (reservedmem_of_init_fn)NULL) ? \ + init : init } + #else static inline void fdt_init_reserved_mem(void) { } static inline void fdt_reserved_mem_save_node(unsigned long node, const char *uname, phys_addr_t base, phys_addr_t size) { } + +#define RESERVEDMEM_OF_DECLARE(name, compat, init) \ + static const struct of_device_id __reservedmem_of_table_##name \ + __attribute__((unused)) \ + = { .compatible = compat, \ + .data = (init == (reservedmem_of_init_fn)NULL) ? \ + init : init } + #endif #endif /* __OF_RESERVED_MEM_H */ From b94c8bedffeabb49f57bed9de17030f2addd21a0 Mon Sep 17 00:00:00 2001 From: Josh Cartwright Date: Thu, 13 Mar 2014 16:36:36 -0500 Subject: [PATCH 1070/1368] of: only scan for reserved mem when fdt present When the reserved memory patches hit -next, several legacy (non-DT) boot failures were detected and bisected down to that commit. There needs to be some sanity checking whether a DT is even present before parsing the reserved ranges. Reported-by: Kevin Hilman Signed-off-by: Josh Cartwright Tested-by: Kevin Hilman Signed-off-by: Grant Likely (cherry picked from commit 2040b52768ebab6e7bd73af0dc63703269c62f17) Signed-off-by: Mark Brown --- drivers/of/fdt.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/of/fdt.c b/drivers/of/fdt.c index b6e4d7fcd316..e521121ff71e 100644 --- a/drivers/of/fdt.c +++ b/drivers/of/fdt.c @@ -559,6 +559,9 @@ static int __init __fdt_scan_reserved_mem(unsigned long node, const char *uname, */ void __init early_init_fdt_scan_reserved_mem(void) { + if (!initial_boot_params) + return; + of_scan_flat_dt(__fdt_scan_reserved_mem, NULL); fdt_init_reserved_mem(); } From 550c31c90a19bef6a09e7a35b864537ba981faed Mon Sep 17 00:00:00 2001 From: Xiubo Li Date: Tue, 8 Apr 2014 13:48:07 +0800 Subject: [PATCH 1071/1368] of: Fix the section mismatch warnings. In tag next-20140407, building with CONFIG_DEBUG_SECTION_MISMATCH enabled, the following WARNING is occured: WARNING: drivers/built-in.o(.text.unlikely+0x2220): Section mismatch in reference from the function __reserved_mem_check_root() to the function .init.text:of_get_flat_dt_prop() The function __reserved_mem_check_root() references the function __init of_get_flat_dt_prop(). This is often because __reserved_mem_check_root lacks a __init annotation or the annotation of of_get_flat_dt_prop is wrong. WARNING: vmlinux.o(.text.unlikely+0xb9d0): Section mismatch in reference from the function __reserved_mem_check_root() to the (unknown reference) .init.data:(unknown) The function __reserved_mem_check_root() references the (unknown reference) __initdata (unknown). This is often because __reserved_mem_check_root lacks a __initdata annotation or the annotation of (unknown) is wrong. This is cause by : 'drivers: of: add initialization code for dynamic reserved memory'. Signed-off-by: Xiubo Li Signed-off-by: Rob Herring (cherry picked from commit 5b6241185e2cded07ca3f5f646b55c641928ba4e) Signed-off-by: Mark Brown --- drivers/of/fdt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/of/fdt.c b/drivers/of/fdt.c index e521121ff71e..f6241a50cc90 100644 --- a/drivers/of/fdt.c +++ b/drivers/of/fdt.c @@ -493,7 +493,7 @@ static int __init __reserved_mem_reserve_reg(unsigned long node, * in /reserved-memory matches the values supported by the current implementation, * also check if ranges property has been provided */ -static int __reserved_mem_check_root(unsigned long node) +static int __init __reserved_mem_check_root(unsigned long node) { __be32 *prop; From 7f76e96eff22e87f525f2ba84d2c601aba9be069 Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Sat, 29 Mar 2014 14:14:17 -0500 Subject: [PATCH 1072/1368] of/fdt: remove unused of_scan_flat_dt_by_path of_scan_flat_dt_by_path is unused anywhere in the kernel, so remove it. Signed-off-by: Rob Herring Tested-by: Michal Simek Tested-by: Grant Likely Tested-by: Stephen Chivers (cherry picked from commit bba04d965d06abbbe10afd3687742389107e198e) Signed-off-by: Mark Brown Conflicts: drivers/of/fdt.c --- drivers/of/fdt.c | 67 ------------------------------------------ include/linux/of_fdt.h | 3 -- 2 files changed, 70 deletions(-) diff --git a/drivers/of/fdt.c b/drivers/of/fdt.c index f6241a50cc90..776c62feee13 100644 --- a/drivers/of/fdt.c +++ b/drivers/of/fdt.c @@ -678,73 +678,6 @@ struct fdt_scan_status { void *data; }; -/** - * fdt_scan_node_by_path - iterator for of_scan_flat_dt_by_path function - */ -static int __init fdt_scan_node_by_path(unsigned long node, const char *uname, - int depth, void *data) -{ - struct fdt_scan_status *st = data; - - /* - * if scan at the requested fdt node has been completed, - * return -ENXIO to abort further scanning - */ - if (depth <= st->depth) - return -ENXIO; - - /* requested fdt node has been found, so call iterator function */ - if (st->found) - return st->iterator(node, uname, depth, st->data); - - /* check if scanning automata is entering next level of fdt nodes */ - if (depth == st->depth + 1 && - strncmp(st->name, uname, st->namelen) == 0 && - uname[st->namelen] == 0) { - st->depth += 1; - if (st->name[st->namelen] == 0) { - st->found = 1; - } else { - const char *next = st->name + st->namelen + 1; - st->name = next; - st->namelen = strcspn(next, "/"); - } - return 0; - } - - /* scan next fdt node */ - return 0; -} - -/** - * of_scan_flat_dt_by_path - scan flattened tree blob and call callback on each - * child of the given path. - * @path: path to start searching for children - * @it: callback function - * @data: context data pointer - * - * This function is used to scan the flattened device-tree starting from the - * node given by path. It is used to extract information (like reserved - * memory), which is required on ealy boot before we can unflatten the tree. - */ -int __init of_scan_flat_dt_by_path(const char *path, - int (*it)(unsigned long node, const char *name, int depth, void *data), - void *data) -{ - struct fdt_scan_status st = {path, 0, -1, 0, it, data}; - int ret = 0; - - if (initial_boot_params) - ret = of_scan_flat_dt(fdt_scan_node_by_path, &st); - - if (!st.found) - return -ENOENT; - else if (ret == -ENXIO) /* scan has been completed */ - return 0; - else - return ret; -} - #ifdef CONFIG_BLK_DEV_INITRD /** * early_init_dt_check_for_initrd - Decode initrd location from flat tree diff --git a/include/linux/of_fdt.h b/include/linux/of_fdt.h index a792cb3d5ec8..ff627bddb9d3 100644 --- a/include/linux/of_fdt.h +++ b/include/linux/of_fdt.h @@ -90,9 +90,6 @@ extern void *of_get_flat_dt_prop(unsigned long node, const char *name, extern int of_flat_dt_is_compatible(unsigned long node, const char *name); extern int of_flat_dt_match(unsigned long node, const char *const *matches); extern unsigned long of_get_flat_dt_root(void); -extern int of_scan_flat_dt_by_path(const char *path, - int (*it)(unsigned long node, const char *name, int depth, void *data), - void *data); extern int early_init_dt_scan_chosen(unsigned long node, const char *uname, int depth, void *data); From d6c2d4f195ac6f41838651c8b04ff0788c0f8adb Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Thu, 24 Jul 2014 21:06:21 +0100 Subject: [PATCH 1073/1368] of/fdt: update of_get_flat_dt_prop in prep for libfdt Make of_get_flat_dt_prop arguments compatible with libfdt fdt_getprop call in preparation to convert FDT code to use libfdt. Make the return value const and the property length ptr type an int. Signed-off-by: Rob Herring Tested-by: Michal Simek Tested-by: Grant Likely Tested-by: Stephen Chivers (cherry picked from commit 9d0c4dfedd96ee54fc075b16d02f82499c8cc3a6) Signed-off-by: Mark Brown Conflicts: arch/arc/kernel/devtree.c arch/arm/kernel/devtree.c arch/arm/mach-exynos/exynos.c arch/arm/plat-samsung/s5p-dev-mfc.c arch/powerpc/kernel/epapr_paravirt.c arch/powerpc/kernel/prom.c arch/powerpc/mm/hash_utils_64.c arch/powerpc/platforms/powernv/opal.c arch/xtensa/kernel/setup.c drivers/of/fdt.c --- arch/arc/kernel/devtree.c | 2 +- arch/arm/kernel/devtree.c | 2 +- arch/arm/mach-vexpress/platsmp.c | 2 +- arch/arm/plat-samsung/s5p-dev-mfc.c | 4 +-- arch/microblaze/kernel/prom.c | 8 +++--- arch/powerpc/kernel/fadump.c | 4 +-- arch/powerpc/kernel/prom.c | 25 ++++++++--------- arch/powerpc/kernel/rtas.c | 2 +- arch/powerpc/mm/hash_utils_64.c | 22 +++++++-------- arch/powerpc/platforms/52xx/efika.c | 4 +-- arch/powerpc/platforms/chrp/setup.c | 4 +-- arch/powerpc/platforms/powernv/opal.c | 12 ++++----- arch/powerpc/platforms/pseries/setup.c | 4 +-- arch/xtensa/kernel/setup.c | 37 ++++++++++++++++++++++++++ drivers/of/fdt.c | 37 +++++++++++++------------- drivers/of/of_reserved_mem.c | 4 +-- include/linux/of_fdt.h | 8 +++--- 17 files changed, 110 insertions(+), 71 deletions(-) diff --git a/arch/arc/kernel/devtree.c b/arch/arc/kernel/devtree.c index bdee3a812052..afdd13cf881c 100644 --- a/arch/arc/kernel/devtree.c +++ b/arch/arc/kernel/devtree.c @@ -40,7 +40,7 @@ struct machine_desc * __init setup_machine_fdt(void *dt) const char *model, *compat; void *clk; char manufacturer[16]; - unsigned long len; + int len; /* check device tree validity */ if (be32_to_cpu(devtree->magic) != OF_DT_HEADER) diff --git a/arch/arm/kernel/devtree.c b/arch/arm/kernel/devtree.c index 5859c8bc727c..a44e7d11ab02 100644 --- a/arch/arm/kernel/devtree.c +++ b/arch/arm/kernel/devtree.c @@ -212,7 +212,7 @@ struct machine_desc * __init setup_machine_fdt(unsigned int dt_phys) } if (!mdesc_best) { const char *prop; - long size; + int size; early_print("\nError: unrecognized/unsupported " "device tree compatible list:\n[ "); diff --git a/arch/arm/mach-vexpress/platsmp.c b/arch/arm/mach-vexpress/platsmp.c index dc1ace55d557..84bd6fde7a7c 100644 --- a/arch/arm/mach-vexpress/platsmp.c +++ b/arch/arm/mach-vexpress/platsmp.c @@ -51,7 +51,7 @@ static int __init vexpress_dt_find_scu(unsigned long node, { if (of_flat_dt_match(node, vexpress_dt_cortex_a9_match)) { phys_addr_t phys_addr; - __be32 *reg = of_get_flat_dt_prop(node, "reg", NULL); + const __be32 *reg = of_get_flat_dt_prop(node, "reg", NULL); if (WARN_ON(!reg)) return -EINVAL; diff --git a/arch/arm/plat-samsung/s5p-dev-mfc.c b/arch/arm/plat-samsung/s5p-dev-mfc.c index a93fb6fb6606..586ca73d1059 100644 --- a/arch/arm/plat-samsung/s5p-dev-mfc.c +++ b/arch/arm/plat-samsung/s5p-dev-mfc.c @@ -116,8 +116,8 @@ device_initcall(s5p_mfc_memory_init); int __init s5p_fdt_find_mfc_mem(unsigned long node, const char *uname, int depth, void *data) { - __be32 *prop; - unsigned long len; + const __be32 *prop; + int len; struct s5p_mfc_dt_meminfo *mfc_mem = data; if (!data) diff --git a/arch/microblaze/kernel/prom.c b/arch/microblaze/kernel/prom.c index 62e2e8f2c5d6..c9c766fe6321 100644 --- a/arch/microblaze/kernel/prom.c +++ b/arch/microblaze/kernel/prom.c @@ -52,13 +52,13 @@ void * __init early_init_dt_alloc_memory_arch(u64 size, u64 align) } #ifdef CONFIG_EARLY_PRINTK -static char *stdout; +static const char *stdout; static int __init early_init_dt_scan_chosen_serial(unsigned long node, const char *uname, int depth, void *data) { - unsigned long l; - char *p; + int l; + const char *p; pr_debug("%s: depth: %d, uname: %s\n", __func__, depth, uname); @@ -89,7 +89,7 @@ static int __init early_init_dt_scan_chosen_serial(unsigned long node, (strncmp(p, "xlnx,opb-uartlite", 17) == 0) || (strncmp(p, "xlnx,axi-uartlite", 17) == 0) || (strncmp(p, "xlnx,mdm", 8) == 0)) { - unsigned int *addrp; + const unsigned int *addrp; *(u32 *)data = UARTLITE; diff --git a/arch/powerpc/kernel/fadump.c b/arch/powerpc/kernel/fadump.c index 2230fd0ca3e4..7213d930918d 100644 --- a/arch/powerpc/kernel/fadump.c +++ b/arch/powerpc/kernel/fadump.c @@ -55,9 +55,9 @@ int crash_mem_ranges; int __init early_init_dt_scan_fw_dump(unsigned long node, const char *uname, int depth, void *data) { - __be32 *sections; + const __be32 *sections; int i, num_sections; - unsigned long size; + int size; const int *token; if (depth != 1 || strcmp(uname, "rtas") != 0) diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c index 2f3e25225158..2a954ab60839 100644 --- a/arch/powerpc/kernel/prom.c +++ b/arch/powerpc/kernel/prom.c @@ -162,7 +162,7 @@ static struct ibm_pa_feature { {CPU_FTR_REAL_LE, PPC_FEATURE_TRUE_LE, 5, 0, 0}, }; -static void __init scan_features(unsigned long node, unsigned char *ftrs, +static void __init scan_features(unsigned long node, const unsigned char *ftrs, unsigned long tablelen, struct ibm_pa_feature *fp, unsigned long ft_size) @@ -201,8 +201,8 @@ static void __init scan_features(unsigned long node, unsigned char *ftrs, static void __init check_cpu_pa_features(unsigned long node) { - unsigned char *pa_ftrs; - unsigned long tablelen; + const unsigned char *pa_ftrs; + int tablelen; pa_ftrs = of_get_flat_dt_prop(node, "ibm,pa-features", &tablelen); if (pa_ftrs == NULL) @@ -215,7 +215,7 @@ static void __init check_cpu_pa_features(unsigned long node) #ifdef CONFIG_PPC_STD_MMU_64 static void __init check_cpu_slb_size(unsigned long node) { - u32 *slb_size_ptr; + const __be32 *slb_size_ptr; slb_size_ptr = of_get_flat_dt_prop(node, "slb-size", NULL); if (slb_size_ptr != NULL) { @@ -256,7 +256,7 @@ static struct feature_property { static inline void identical_pvr_fixup(unsigned long node) { unsigned int pvr; - char *model = of_get_flat_dt_prop(node, "model", NULL); + const char *model = of_get_flat_dt_prop(node, "model", NULL); /* * Since 440GR(x)/440EP(x) processors have the same pvr, @@ -294,11 +294,11 @@ static int __init early_init_dt_scan_cpus(unsigned long node, const char *uname, int depth, void *data) { - char *type = of_get_flat_dt_prop(node, "device_type", NULL); - const u32 *prop; - const u32 *intserv; + const char *type = of_get_flat_dt_prop(node, "device_type", NULL); + const __be32 *prop; + const __be32 *intserv; int i, nthreads; - unsigned long len; + int len; int found = -1; int found_thread = 0; @@ -389,7 +389,7 @@ static int __init early_init_dt_scan_cpus(unsigned long node, int __init early_init_dt_scan_chosen_ppc(unsigned long node, const char *uname, int depth, void *data) { - unsigned long *lprop; + const unsigned long *lprop; /* All these set by kernel, so no need to convert endian */ /* Use common scan routine to determine if this is the chosen node */ if (early_init_dt_scan_chosen(node, uname, depth, data) == 0) @@ -440,8 +440,9 @@ int __init early_init_dt_scan_chosen_ppc(unsigned long node, const char *uname, */ static int __init early_init_dt_scan_drconf_memory(unsigned long node) { - __be32 *dm, *ls, *usm; - unsigned long l, n, flags; + const __be32 *dm, *ls, *usm; + int l; + unsigned long n, flags; u64 base, size, memblock_size; unsigned int is_kexec_kdump = 0, rngs; diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c index 52add6f3e201..2d6f5a8e19e2 100644 --- a/arch/powerpc/kernel/rtas.c +++ b/arch/powerpc/kernel/rtas.c @@ -1135,7 +1135,7 @@ void __init rtas_initialize(void) int __init early_init_dt_scan_rtas(unsigned long node, const char *uname, int depth, void *data) { - u32 *basep, *entryp, *sizep; + const u32 *basep, *entryp, *sizep; if (depth != 1 || strcmp(uname, "rtas") != 0) return 0; diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c index e303a6d74e3a..929e3e675868 100644 --- a/arch/powerpc/mm/hash_utils_64.c +++ b/arch/powerpc/mm/hash_utils_64.c @@ -250,9 +250,9 @@ static int __init htab_dt_scan_seg_sizes(unsigned long node, const char *uname, int depth, void *data) { - char *type = of_get_flat_dt_prop(node, "device_type", NULL); - u32 *prop; - unsigned long size = 0; + const char *type = of_get_flat_dt_prop(node, "device_type", NULL); + const __be32 *prop; + int size = 0; /* We are scanning "cpu" nodes only */ if (type == NULL || strcmp(type, "cpu") != 0) @@ -306,9 +306,9 @@ static int __init htab_dt_scan_page_sizes(unsigned long node, const char *uname, int depth, void *data) { - char *type = of_get_flat_dt_prop(node, "device_type", NULL); - u32 *prop; - unsigned long size = 0; + const char *type = of_get_flat_dt_prop(node, "device_type", NULL); + const __be32 *prop; + int size = 0; /* We are scanning "cpu" nodes only */ if (type == NULL || strcmp(type, "cpu") != 0) @@ -389,9 +389,9 @@ static int __init htab_dt_scan_page_sizes(unsigned long node, static int __init htab_dt_scan_hugepage_blocks(unsigned long node, const char *uname, int depth, void *data) { - char *type = of_get_flat_dt_prop(node, "device_type", NULL); - unsigned long *addr_prop; - u32 *page_count_prop; + const char *type = of_get_flat_dt_prop(node, "device_type", NULL); + const __be64 *addr_prop; + const __be32 *page_count_prop; unsigned int expected_pages; long unsigned int phys_addr; long unsigned int block_size; @@ -533,8 +533,8 @@ static int __init htab_dt_scan_pftsize(unsigned long node, const char *uname, int depth, void *data) { - char *type = of_get_flat_dt_prop(node, "device_type", NULL); - u32 *prop; + const char *type = of_get_flat_dt_prop(node, "device_type", NULL); + const __be32 *prop; /* We are scanning "cpu" nodes only */ if (type == NULL || strcmp(type, "cpu") != 0) diff --git a/arch/powerpc/platforms/52xx/efika.c b/arch/powerpc/platforms/52xx/efika.c index 18c104820198..6e19b0ad5d26 100644 --- a/arch/powerpc/platforms/52xx/efika.c +++ b/arch/powerpc/platforms/52xx/efika.c @@ -199,8 +199,8 @@ static void __init efika_setup_arch(void) static int __init efika_probe(void) { - char *model = of_get_flat_dt_prop(of_get_flat_dt_root(), - "model", NULL); + const char *model = of_get_flat_dt_prop(of_get_flat_dt_root(), + "model", NULL); if (model == NULL) return 0; diff --git a/arch/powerpc/platforms/chrp/setup.c b/arch/powerpc/platforms/chrp/setup.c index c665d7de6c99..7044fd36197b 100644 --- a/arch/powerpc/platforms/chrp/setup.c +++ b/arch/powerpc/platforms/chrp/setup.c @@ -574,8 +574,8 @@ chrp_init2(void) static int __init chrp_probe(void) { - char *dtype = of_get_flat_dt_prop(of_get_flat_dt_root(), - "device_type", NULL); + const char *dtype = of_get_flat_dt_prop(of_get_flat_dt_root(), + "device_type", NULL); if (dtype == NULL) return 0; if (strcmp(dtype, "chrp")) diff --git a/arch/powerpc/platforms/powernv/opal.c b/arch/powerpc/platforms/powernv/opal.c index 628c564ceadb..06620ea33a8e 100644 --- a/arch/powerpc/platforms/powernv/opal.c +++ b/arch/powerpc/platforms/powernv/opal.c @@ -35,8 +35,8 @@ static unsigned int opal_irq_count; int __init early_init_dt_scan_opal(unsigned long node, const char *uname, int depth, void *data) { - const void *basep, *entryp; - unsigned long basesz, entrysz; + const void *basep, *entryp, *sizep; + int basesz, entrysz, runtimesz; if (depth != 1 || strcmp(uname, "ibm,opal") != 0) return 0; @@ -50,10 +50,10 @@ int __init early_init_dt_scan_opal(unsigned long node, opal.base = of_read_number(basep, basesz/4); opal.entry = of_read_number(entryp, entrysz/4); - pr_debug("OPAL Base = 0x%llx (basep=%p basesz=%ld)\n", + pr_debug("OPAL Base = 0x%llx (basep=%p basesz=%d)\n", opal.base, basep, basesz); - pr_debug("OPAL Entry = 0x%llx (entryp=%p basesz=%ld)\n", - opal.entry, entryp, entrysz); + pr_debug("OPAL Entry = 0x%llx (sizep=%p runtimesz=%d)\n", + opal.size, sizep, runtimesz); powerpc_firmware_features |= FW_FEATURE_OPAL; if (of_flat_dt_is_compatible(node, "ibm,opal-v3")) { @@ -105,7 +105,7 @@ int opal_get_chars(uint32_t vtermno, char *buf, int count) opal_poll_events(&evt); if ((evt & OPAL_EVENT_CONSOLE_INPUT) == 0) return 0; - len = count; + len = cpu_to_be64(count); rc = opal_console_read(vtermno, &len, buf); if (rc == OPAL_SUCCESS) return len; diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c index c11c8238797c..3afad933593a 100644 --- a/arch/powerpc/platforms/pseries/setup.c +++ b/arch/powerpc/platforms/pseries/setup.c @@ -635,7 +635,7 @@ static int __init pseries_probe_fw_features(unsigned long node, void *data) { const char *prop; - unsigned long len; + int len; static int hypertas_found; static int vec5_found; @@ -668,7 +668,7 @@ static int __init pseries_probe_fw_features(unsigned long node, static int __init pSeries_probe(void) { unsigned long root = of_get_flat_dt_root(); - char *dtype = of_get_flat_dt_prop(root, "device_type", NULL); + const char *dtype = of_get_flat_dt_prop(root, "device_type", NULL); if (dtype == NULL) return 0; diff --git a/arch/xtensa/kernel/setup.c b/arch/xtensa/kernel/setup.c index d45e602f4328..8f4484acbb08 100644 --- a/arch/xtensa/kernel/setup.c +++ b/arch/xtensa/kernel/setup.c @@ -222,6 +222,43 @@ static int __init parse_bootparam(const bp_tag_t* tag) } #ifdef CONFIG_OF +bool __initdata dt_memory_scan = false; + +#if XCHAL_HAVE_PTP_MMU && XCHAL_HAVE_SPANNING_WAY +unsigned long xtensa_kio_paddr = XCHAL_KIO_DEFAULT_PADDR; +EXPORT_SYMBOL(xtensa_kio_paddr); + +static int __init xtensa_dt_io_area(unsigned long node, const char *uname, + int depth, void *data) +{ + const __be32 *ranges; + int len; + + if (depth > 1) + return 0; + + if (!of_flat_dt_is_compatible(node, "simple-bus")) + return 0; + + ranges = of_get_flat_dt_prop(node, "ranges", &len); + if (!ranges) + return 1; + if (len == 0) + return 1; + + xtensa_kio_paddr = of_read_ulong(ranges+1, 1); + /* round down to nearest 256MB boundary */ + xtensa_kio_paddr &= 0xf0000000; + + return 1; +} +#else +static int __init xtensa_dt_io_area(unsigned long node, const char *uname, + int depth, void *data) +{ + return 1; +} +#endif void __init early_init_dt_add_memory_arch(u64 base, u64 size) { diff --git a/drivers/of/fdt.c b/drivers/of/fdt.c index 776c62feee13..39ac2961737b 100644 --- a/drivers/of/fdt.c +++ b/drivers/of/fdt.c @@ -40,7 +40,7 @@ char *of_fdt_get_string(struct boot_param_header *blob, u32 offset) */ void *of_fdt_get_property(struct boot_param_header *blob, unsigned long node, const char *name, - unsigned long *size) + int *size) { unsigned long p = node; @@ -90,7 +90,8 @@ int of_fdt_is_compatible(struct boot_param_header *blob, unsigned long node, const char *compat) { const char *cp; - unsigned long cplen, l, score = 0; + int cplen; + unsigned long l, score = 0; cp = of_fdt_get_property(blob, node, "compatible", &cplen); if (cp == NULL) @@ -451,8 +452,8 @@ static int __init __reserved_mem_reserve_reg(unsigned long node, { int t_len = (dt_root_addr_cells + dt_root_size_cells) * sizeof(__be32); phys_addr_t base, size; - unsigned long len; - __be32 *prop; + int len; + const __be32 *prop; int nomap, first = 1; prop = of_get_flat_dt_prop(node, "reg", &len); @@ -495,7 +496,7 @@ static int __init __reserved_mem_reserve_reg(unsigned long node, */ static int __init __reserved_mem_check_root(unsigned long node) { - __be32 *prop; + const __be32 *prop; prop = of_get_flat_dt_prop(node, "#size-cells", NULL); if (!prop || be32_to_cpup(prop) != dt_root_size_cells) @@ -645,8 +646,8 @@ unsigned long __init of_get_flat_dt_root(void) * This function can be used within scan_flattened_dt callback to get * access to properties */ -void *__init of_get_flat_dt_prop(unsigned long node, const char *name, - unsigned long *size) +const void *__init of_get_flat_dt_prop(unsigned long node, const char *name, + int *size) { return of_fdt_get_property(initial_boot_params, node, name, size); } @@ -686,8 +687,8 @@ struct fdt_scan_status { void __init early_init_dt_check_for_initrd(unsigned long node) { u64 start, end; - unsigned long len; - __be32 *prop; + int len; + const __be32 *prop; pr_debug("Looking for initrd properties... "); @@ -717,7 +718,7 @@ inline void early_init_dt_check_for_initrd(unsigned long node) int __init early_init_dt_scan_root(unsigned long node, const char *uname, int depth, void *data) { - __be32 *prop; + const __be32 *prop; if (depth != 0) return 0; @@ -739,9 +740,9 @@ int __init early_init_dt_scan_root(unsigned long node, const char *uname, return 1; } -u64 __init dt_mem_next_cell(int s, __be32 **cellp) +u64 __init dt_mem_next_cell(int s, const __be32 **cellp) { - __be32 *p = *cellp; + const __be32 *p = *cellp; *cellp = p + s; return of_read_number(p, s); @@ -753,9 +754,9 @@ u64 __init dt_mem_next_cell(int s, __be32 **cellp) int __init early_init_dt_scan_memory(unsigned long node, const char *uname, int depth, void *data) { - char *type = of_get_flat_dt_prop(node, "device_type", NULL); - __be32 *reg, *endp; - unsigned long l; + const char *type = of_get_flat_dt_prop(node, "device_type", NULL); + const __be32 *reg, *endp; + int l; /* We are scanning "memory" nodes only */ if (type == NULL) { @@ -776,7 +777,7 @@ int __init early_init_dt_scan_memory(unsigned long node, const char *uname, endp = reg + (l / sizeof(__be32)); - pr_debug("memory scan node %s, reg size %ld, data: %x %x %x %x,\n", + pr_debug("memory scan node %s, reg size %d, data: %x %x %x %x,\n", uname, l, reg[0], reg[1], reg[2], reg[3]); while ((endp - reg) >= (dt_root_addr_cells + dt_root_size_cells)) { @@ -799,8 +800,8 @@ int __init early_init_dt_scan_memory(unsigned long node, const char *uname, int __init early_init_dt_scan_chosen(unsigned long node, const char *uname, int depth, void *data) { - unsigned long l; - char *p; + int l; + const char *p; pr_debug("search \"chosen\", depth: %d, uname: %s\n", depth, uname); diff --git a/drivers/of/of_reserved_mem.c b/drivers/of/of_reserved_mem.c index daaaf935911d..e420eb52e5c9 100644 --- a/drivers/of/of_reserved_mem.c +++ b/drivers/of/of_reserved_mem.c @@ -95,8 +95,8 @@ static int __init __reserved_mem_alloc_size(unsigned long node, int t_len = (dt_root_addr_cells + dt_root_size_cells) * sizeof(__be32); phys_addr_t start = 0, end = 0; phys_addr_t base = 0, align = 0, size; - unsigned long len; - __be32 *prop; + int len; + const __be32 *prop; int nomap; int ret; diff --git a/include/linux/of_fdt.h b/include/linux/of_fdt.h index ff627bddb9d3..461b5114d9af 100644 --- a/include/linux/of_fdt.h +++ b/include/linux/of_fdt.h @@ -66,7 +66,7 @@ extern char *of_fdt_get_string(struct boot_param_header *blob, u32 offset); extern void *of_fdt_get_property(struct boot_param_header *blob, unsigned long node, const char *name, - unsigned long *size); + int *size); extern int of_fdt_is_compatible(struct boot_param_header *blob, unsigned long node, const char *compat); @@ -85,8 +85,8 @@ extern char *find_flat_dt_string(u32 offset); extern int of_scan_flat_dt(int (*it)(unsigned long node, const char *uname, int depth, void *data), void *data); -extern void *of_get_flat_dt_prop(unsigned long node, const char *name, - unsigned long *size); +extern const void *of_get_flat_dt_prop(unsigned long node, const char *name, + int *size); extern int of_flat_dt_is_compatible(unsigned long node, const char *name); extern int of_flat_dt_match(unsigned long node, const char *const *matches); extern unsigned long of_get_flat_dt_root(void); @@ -101,7 +101,7 @@ extern void early_init_dt_add_memory_arch(u64 base, u64 size); extern int early_init_dt_reserve_memory_arch(phys_addr_t base, phys_addr_t size, bool no_map); extern void * early_init_dt_alloc_memory_arch(u64 size, u64 align); -extern u64 dt_mem_next_cell(int s, __be32 **cellp); +extern u64 dt_mem_next_cell(int s, const __be32 **cellp); /* * If BLK_DEV_INITRD, the fdt early init code will call this function, From a0e28c9fb2fbb2234dd0cb0eeb9d3f63160d8a53 Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Wed, 2 Apr 2014 15:10:14 -0500 Subject: [PATCH 1074/1368] of/fdt: Convert FDT functions to use libfdt The kernel FDT functions predate libfdt and are much more limited in functionality. Also, the kernel functions and libfdt functions are not compatible with each other because they have different definitions of node offsets. To avoid this incompatibility and in preparation to add more FDT parsing functions which will need libfdt, let's first convert the existing code to use libfdt. The FDT unflattening, top-level FDT scanning, and property retrieval functions are converted to use libfdt. The scanning code should be re-worked to be more efficient and understandable by using libfdt to find nodes directly by path or compatible strings. Signed-off-by: Rob Herring Tested-by: Michal Simek Tested-by: Grant Likely Tested-by: Stephen Chivers (cherry picked from commit e6a6928c3ea1d0195ed75a091e345696b916c09b) Signed-off-by: Mark Brown Conflicts: drivers/of/fdt.c --- drivers/of/Kconfig | 1 + drivers/of/Makefile | 2 + drivers/of/fdt.c | 223 ++++++++++++----------------------------- include/linux/of_fdt.h | 1 - 4 files changed, 66 insertions(+), 161 deletions(-) diff --git a/drivers/of/Kconfig b/drivers/of/Kconfig index bdc7f9a0c500..a7bb5da6a96b 100644 --- a/drivers/of/Kconfig +++ b/drivers/of/Kconfig @@ -27,6 +27,7 @@ config OF_SELFTEST config OF_FLATTREE bool select DTC + select LIBFDT config OF_EARLY_FLATTREE bool diff --git a/drivers/of/Makefile b/drivers/of/Makefile index 8098b4dd357f..2aaa7b90fc14 100644 --- a/drivers/of/Makefile +++ b/drivers/of/Makefile @@ -12,3 +12,5 @@ obj-$(CONFIG_OF_PCI) += of_pci.o obj-$(CONFIG_OF_PCI_IRQ) += of_pci_irq.o obj-$(CONFIG_OF_MTD) += of_mtd.o obj-$(CONFIG_OF_RESERVED_MEM) += of_reserved_mem.o + +CFLAGS_fdt.o = -I$(src)/../../scripts/dtc/libfdt diff --git a/drivers/of/fdt.c b/drivers/of/fdt.c index 39ac2961737b..10c08e82054b 100644 --- a/drivers/of/fdt.c +++ b/drivers/of/fdt.c @@ -20,6 +20,7 @@ #include #include #include +#include #include /* for COMMAND_LINE_SIZE */ #ifdef CONFIG_PPC @@ -28,54 +29,6 @@ #include -char *of_fdt_get_string(struct boot_param_header *blob, u32 offset) -{ - return ((char *)blob) + - be32_to_cpu(blob->off_dt_strings) + offset; -} - -/** - * of_fdt_get_property - Given a node in the given flat blob, return - * the property ptr - */ -void *of_fdt_get_property(struct boot_param_header *blob, - unsigned long node, const char *name, - int *size) -{ - unsigned long p = node; - - do { - u32 tag = be32_to_cpup((__be32 *)p); - u32 sz, noff; - const char *nstr; - - p += 4; - if (tag == OF_DT_NOP) - continue; - if (tag != OF_DT_PROP) - return NULL; - - sz = be32_to_cpup((__be32 *)p); - noff = be32_to_cpup((__be32 *)(p + 4)); - p += 8; - if (be32_to_cpu(blob->version) < 0x10) - p = ALIGN(p, sz >= 8 ? 8 : 4); - - nstr = of_fdt_get_string(blob, noff); - if (nstr == NULL) { - pr_warning("Can't find property index name !\n"); - return NULL; - } - if (strcmp(name, nstr) == 0) { - if (size) - *size = sz; - return (void *)p; - } - p += sz; - p = ALIGN(p, 4); - } while (1); -} - /** * of_fdt_is_compatible - Return true if given node from the given blob has * compat in its compatible list @@ -93,7 +46,7 @@ int of_fdt_is_compatible(struct boot_param_header *blob, int cplen; unsigned long l, score = 0; - cp = of_fdt_get_property(blob, node, "compatible", &cplen); + cp = fdt_getprop(blob, node, "compatible", &cplen); if (cp == NULL) return 0; while (cplen > 0) { @@ -129,12 +82,12 @@ int of_fdt_match(struct boot_param_header *blob, unsigned long node, return score; } -static void *unflatten_dt_alloc(unsigned long *mem, unsigned long size, +static void *unflatten_dt_alloc(void **mem, unsigned long size, unsigned long align) { void *res; - *mem = ALIGN(*mem, align); + *mem = PTR_ALIGN(*mem, align); res = (void *)*mem; *mem += size; @@ -150,30 +103,29 @@ static void *unflatten_dt_alloc(unsigned long *mem, unsigned long size, * @allnextpp: pointer to ->allnext from last allocated device_node * @fpsize: Size of the node path up at the current depth. */ -static unsigned long unflatten_dt_node(struct boot_param_header *blob, - unsigned long mem, - unsigned long *p, +static void * unflatten_dt_node(struct boot_param_header *blob, + void *mem, + int *poffset, struct device_node *dad, struct device_node ***allnextpp, unsigned long fpsize) { + const __be32 *p; struct device_node *np; struct property *pp, **prev_pp = NULL; - char *pathp; - u32 tag; + const char *pathp; unsigned int l, allocl; + static int depth = 0; + int old_depth; + int offset; int has_name = 0; int new_format = 0; - tag = be32_to_cpup((__be32 *)(*p)); - if (tag != OF_DT_BEGIN_NODE) { - pr_err("Weird tag at start of node: %x\n", tag); + pathp = fdt_get_name(blob, *poffset, &l); + if (!pathp) return mem; - } - *p += 4; - pathp = (char *)*p; - l = allocl = strlen(pathp) + 1; - *p = ALIGN(*p + l, 4); + + allocl = l++; /* version 0x10 has a more compact unit name here instead of the full * path. we accumulate the full path size using "fpsize", we'll rebuild @@ -191,7 +143,7 @@ static unsigned long unflatten_dt_node(struct boot_param_header *blob, fpsize = 1; allocl = 2; l = 1; - *pathp = '\0'; + pathp = ""; } else { /* account for '/' and path size minus terminal 0 * already in 'l' @@ -239,32 +191,23 @@ static unsigned long unflatten_dt_node(struct boot_param_header *blob, kref_init(&np->kref); } /* process properties */ - while (1) { - u32 sz, noff; - char *pname; + for (offset = fdt_first_property_offset(blob, *poffset); + (offset >= 0); + (offset = fdt_next_property_offset(blob, offset))) { + const char *pname; + u32 sz; - tag = be32_to_cpup((__be32 *)(*p)); - if (tag == OF_DT_NOP) { - *p += 4; - continue; - } - if (tag != OF_DT_PROP) + if (!(p = fdt_getprop_by_offset(blob, offset, &pname, &sz))) { + offset = -FDT_ERR_INTERNAL; break; - *p += 4; - sz = be32_to_cpup((__be32 *)(*p)); - noff = be32_to_cpup((__be32 *)((*p) + 4)); - *p += 8; - if (be32_to_cpu(blob->version) < 0x10) - *p = ALIGN(*p, sz >= 8 ? 8 : 4); + } - pname = of_fdt_get_string(blob, noff); if (pname == NULL) { pr_info("Can't find property name in list !\n"); break; } if (strcmp(pname, "name") == 0) has_name = 1; - l = strlen(pname) + 1; pp = unflatten_dt_alloc(&mem, sizeof(struct property), __alignof__(struct property)); if (allnextpp) { @@ -276,26 +219,25 @@ static unsigned long unflatten_dt_node(struct boot_param_header *blob, if ((strcmp(pname, "phandle") == 0) || (strcmp(pname, "linux,phandle") == 0)) { if (np->phandle == 0) - np->phandle = be32_to_cpup((__be32*)*p); + np->phandle = be32_to_cpup(p); } /* And we process the "ibm,phandle" property * used in pSeries dynamic device tree * stuff */ if (strcmp(pname, "ibm,phandle") == 0) - np->phandle = be32_to_cpup((__be32 *)*p); - pp->name = pname; + np->phandle = be32_to_cpup(p); + pp->name = (char *)pname; pp->length = sz; - pp->value = (void *)*p; + pp->value = (__be32 *)p; *prev_pp = pp; prev_pp = &pp->next; } - *p = ALIGN((*p) + sz, 4); } /* with version 0x10 we may not have the name property, recreate * it here from the unit name if absent */ if (!has_name) { - char *p1 = pathp, *ps = pathp, *pa = NULL; + const char *p1 = pathp, *ps = pathp, *pa = NULL; int sz; while (*p1) { @@ -332,19 +274,18 @@ static unsigned long unflatten_dt_node(struct boot_param_header *blob, if (!np->type) np->type = ""; } - while (tag == OF_DT_BEGIN_NODE || tag == OF_DT_NOP) { - if (tag == OF_DT_NOP) - *p += 4; - else - mem = unflatten_dt_node(blob, mem, p, np, allnextpp, - fpsize); - tag = be32_to_cpup((__be32 *)(*p)); - } - if (tag != OF_DT_END_NODE) { - pr_err("Weird tag at end of node: %x\n", tag); - return mem; - } - *p += 4; + + old_depth = depth; + *poffset = fdt_next_node(blob, *poffset, &depth); + if (depth < 0) + depth = 0; + while (*poffset > 0 && depth > old_depth) + mem = unflatten_dt_node(blob, mem, poffset, np, allnextpp, + fpsize); + + if (*poffset < 0 && *poffset != -FDT_ERR_NOTFOUND) + pr_err("unflatten: error %d processing FDT\n", *poffset); + return mem; } @@ -364,7 +305,9 @@ static void __unflatten_device_tree(struct boot_param_header *blob, struct device_node **mynodes, void * (*dt_alloc)(u64 size, u64 align)) { - unsigned long start, mem, size; + unsigned long size; + int start; + void *mem; struct device_node **allnextp = mynodes; pr_debug(" -> unflatten_device_tree()\n"); @@ -385,28 +328,23 @@ static void __unflatten_device_tree(struct boot_param_header *blob, } /* First pass, scan for size */ - start = ((unsigned long)blob) + - be32_to_cpu(blob->off_dt_struct); - size = unflatten_dt_node(blob, 0, &start, NULL, NULL, 0); - size = (size | 3) + 1; + start = 0; + size = (unsigned long)unflatten_dt_node(blob, 0, &start, NULL, NULL, 0); + size = ALIGN(size, 4); pr_debug(" size is %lx, allocating...\n", size); /* Allocate memory for the expanded device tree */ - mem = (unsigned long) - dt_alloc(size + 4, __alignof__(struct device_node)); + mem = dt_alloc(size + 4, __alignof__(struct device_node)); ((__be32 *)mem)[size / 4] = cpu_to_be32(0xdeadbeef); - pr_debug(" unflattening %lx...\n", mem); + pr_debug(" unflattening %p...\n", mem); /* Second pass, do actual unflattening */ - start = ((unsigned long)blob) + - be32_to_cpu(blob->off_dt_struct); + start = 0; unflatten_dt_node(blob, mem, &start, NULL, &allnextp, 0); - if (be32_to_cpup((__be32 *)start) != OF_DT_END) - pr_warning("Weird tag at end of tree: %08x\n", *((u32 *)start)); - if (be32_to_cpu(((__be32 *)mem)[size / 4]) != 0xdeadbeef) + if (be32_to_cpup(mem + size) != 0xdeadbeef) pr_warning("End of tree marker overwritten: %08x\n", be32_to_cpu(((__be32 *)mem)[size / 4])); *allnextp = NULL; @@ -581,47 +519,19 @@ int __init of_scan_flat_dt(int (*it)(unsigned long node, void *data), void *data) { - unsigned long p = ((unsigned long)initial_boot_params) + - be32_to_cpu(initial_boot_params->off_dt_struct); - int rc = 0; - int depth = -1; + const void *blob = initial_boot_params; + const char *pathp; + int offset, rc = 0, depth = -1; - do { - u32 tag = be32_to_cpup((__be32 *)p); - const char *pathp; + for (offset = fdt_next_node(blob, -1, &depth); + offset >= 0 && depth >= 0 && !rc; + offset = fdt_next_node(blob, offset, &depth)) { - p += 4; - if (tag == OF_DT_END_NODE) { - depth--; - continue; - } - if (tag == OF_DT_NOP) - continue; - if (tag == OF_DT_END) - break; - if (tag == OF_DT_PROP) { - u32 sz = be32_to_cpup((__be32 *)p); - p += 8; - if (be32_to_cpu(initial_boot_params->version) < 0x10) - p = ALIGN(p, sz >= 8 ? 8 : 4); - p += sz; - p = ALIGN(p, 4); - continue; - } - if (tag != OF_DT_BEGIN_NODE) { - pr_err("Invalid tag %x in flat device tree!\n", tag); - return -EINVAL; - } - depth++; - pathp = (char *)p; - p = ALIGN(p + strlen(pathp) + 1, 4); + pathp = fdt_get_name(blob, offset, NULL); if (*pathp == '/') pathp = kbasename(pathp); - rc = it(p, pathp, depth, data); - if (rc != 0) - break; - } while (1); - + rc = it(offset, pathp, depth, data); + } return rc; } @@ -630,14 +540,7 @@ int __init of_scan_flat_dt(int (*it)(unsigned long node, */ unsigned long __init of_get_flat_dt_root(void) { - unsigned long p = ((unsigned long)initial_boot_params) + - be32_to_cpu(initial_boot_params->off_dt_struct); - - while (be32_to_cpup((__be32 *)p) == OF_DT_NOP) - p += 4; - BUG_ON(be32_to_cpup((__be32 *)p) != OF_DT_BEGIN_NODE); - p += 4; - return ALIGN(p + strlen((char *)p) + 1, 4); + return 0; } /** @@ -649,7 +552,7 @@ unsigned long __init of_get_flat_dt_root(void) const void *__init of_get_flat_dt_prop(unsigned long node, const char *name, int *size) { - return of_fdt_get_property(initial_boot_params, node, name, size); + return fdt_getprop(initial_boot_params, node, name, size); } /** diff --git a/include/linux/of_fdt.h b/include/linux/of_fdt.h index 461b5114d9af..c9722fdf39c5 100644 --- a/include/linux/of_fdt.h +++ b/include/linux/of_fdt.h @@ -81,7 +81,6 @@ extern int __initdata dt_root_size_cells; extern struct boot_param_header *initial_boot_params; /* For scanning the flat device-tree at boot time */ -extern char *find_flat_dt_string(u32 offset); extern int of_scan_flat_dt(int (*it)(unsigned long node, const char *uname, int depth, void *data), void *data); From fc40eed01c76797b6f71c982a782cef2d6ab48a6 Mon Sep 17 00:00:00 2001 From: Mark Salter Date: Tue, 4 Feb 2014 11:11:10 -0500 Subject: [PATCH 1075/1368] lib: add fdt_empty_tree.c CONFIG_LIBFDT support does not include fdt_empty_tree.c which is needed by arm64 EFI stub. Add it to libfdt_files. Signed-off-by: Mark Salter Signed-off-by: Leif Lindholm Acked-by: Catalin Marinas Signed-off-by: Matt Fleming (cherry picked from commit adaf5687846c25613d58c0a2f5d9e024547cdbec) Signed-off-by: Mark Brown --- lib/Makefile | 3 ++- lib/fdt_empty_tree.c | 2 ++ 2 files changed, 4 insertions(+), 1 deletion(-) create mode 100644 lib/fdt_empty_tree.c diff --git a/lib/Makefile b/lib/Makefile index c55a037a354e..1dcad1b93284 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -139,7 +139,8 @@ obj-$(CONFIG_GENERIC_STRNLEN_USER) += strnlen_user.o obj-$(CONFIG_STMP_DEVICE) += stmp_device.o -libfdt_files = fdt.o fdt_ro.o fdt_wip.o fdt_rw.o fdt_sw.o fdt_strerror.o +libfdt_files = fdt.o fdt_ro.o fdt_wip.o fdt_rw.o fdt_sw.o fdt_strerror.o \ + fdt_empty_tree.o $(foreach file, $(libfdt_files), \ $(eval CFLAGS_$(file) = -I$(src)/../scripts/dtc/libfdt)) lib-$(CONFIG_LIBFDT) += $(libfdt_files) diff --git a/lib/fdt_empty_tree.c b/lib/fdt_empty_tree.c new file mode 100644 index 000000000000..5d30c58150ad --- /dev/null +++ b/lib/fdt_empty_tree.c @@ -0,0 +1,2 @@ +#include +#include "../scripts/dtc/libfdt/fdt_empty_tree.c" From e69503831c2f0d7782c0aaa521c3e4d52ea2ae57 Mon Sep 17 00:00:00 2001 From: Matthew Leach Date: Wed, 25 Sep 2013 16:33:13 +0100 Subject: [PATCH 1076/1368] arm64: use correct register width when retrieving ASID The ASID is represented as an unsigned int in mm_context_t and we currently use the mmid assembler macro to access this element of the struct. This should be accessed with a register of 32-bit width. If the incorrect register width is used the ASID will be returned in bits[32:63] of the register when running under big-endian. Fix a use of the mmid macro in tlb.S to use a 32-bit access. Signed-off-by: Will Deacon Signed-off-by: Matthew Leach Signed-off-by: Catalin Marinas (cherry picked from commit fc18047c732f6becba92618a397555927687efd3) Signed-off-by: Mark Brown --- arch/arm64/mm/tlb.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/mm/tlb.S b/arch/arm64/mm/tlb.S index 8ae80a18e8ec..19da91e0cd27 100644 --- a/arch/arm64/mm/tlb.S +++ b/arch/arm64/mm/tlb.S @@ -35,7 +35,7 @@ */ ENTRY(__cpu_flush_user_tlb_range) vma_vm_mm x3, x2 // get vma->vm_mm - mmid x3, x3 // get vm_mm->context.id + mmid w3, x3 // get vm_mm->context.id dsb sy lsr x0, x0, #12 // align address lsr x1, x1, #12 From 47672573e09067c14f7e248778acaf6360ec2770 Mon Sep 17 00:00:00 2001 From: Steve Capper Date: Fri, 2 May 2014 14:49:00 +0100 Subject: [PATCH 1077/1368] arm64: mm: Optimise tlb flush logic where we have >4K granule The tlb maintainence functions: __cpu_flush_user_tlb_range and __cpu_flush_kern_tlb_range do not take into consideration the page granule when looping through the address range, and repeatedly flush tlb entries for the same page when operating with 64K pages. This patch re-works the logic s.t. we instead advance the loop by 1 << (PAGE_SHIFT - 12), so avoid repeating ourselves. Also the routines have been converted from assembler to static inline functions to aid with legibility and potential compiler optimisations. The isb() has been removed from flush_tlb_kernel_range(.) as it is only needed when changing the execute permission of a mapping. If one needs to set an area of the kernel as execute/non-execute an isb() must be inserted after the call to flush_tlb_kernel_range. Cc: Laura Abbott Signed-off-by: Steve Capper Acked-by: Will Deacon Signed-off-by: Catalin Marinas (cherry picked from commit fa48e6f780a681cdbc7820e33259edfe1a79b9e3) Signed-off-by: Mark Brown --- arch/arm64/include/asm/tlbflush.h | 30 ++++++++++--- arch/arm64/mm/Makefile | 2 +- arch/arm64/mm/tlb.S | 71 ------------------------------- 3 files changed, 26 insertions(+), 77 deletions(-) delete mode 100644 arch/arm64/mm/tlb.S diff --git a/arch/arm64/include/asm/tlbflush.h b/arch/arm64/include/asm/tlbflush.h index 3083a08f9622..b9349c4513ea 100644 --- a/arch/arm64/include/asm/tlbflush.h +++ b/arch/arm64/include/asm/tlbflush.h @@ -98,11 +98,31 @@ static inline void flush_tlb_page(struct vm_area_struct *vma, dsb(ish); } -/* - * Convert calls to our calling convention. - */ -#define flush_tlb_range(vma,start,end) __cpu_flush_user_tlb_range(start,end,vma) -#define flush_tlb_kernel_range(s,e) __cpu_flush_kern_tlb_range(s,e) +static inline void flush_tlb_range(struct vm_area_struct *vma, + unsigned long start, unsigned long end) +{ + unsigned long asid = (unsigned long)ASID(vma->vm_mm) << 48; + unsigned long addr; + start = asid | (start >> 12); + end = asid | (end >> 12); + + dsb(ishst); + for (addr = start; addr < end; addr += 1 << (PAGE_SHIFT - 12)) + asm("tlbi vae1is, %0" : : "r"(addr)); + dsb(ish); +} + +static inline void flush_tlb_kernel_range(unsigned long start, unsigned long end) +{ + unsigned long addr; + start >>= 12; + end >>= 12; + + dsb(ishst); + for (addr = start; addr < end; addr += 1 << (PAGE_SHIFT - 12)) + asm("tlbi vaae1is, %0" : : "r"(addr)); + dsb(ish); +} /* * On AArch64, the cache coherency is handled via the set_pte_at() function. diff --git a/arch/arm64/mm/Makefile b/arch/arm64/mm/Makefile index b51d36401d83..3ecb56c624d3 100644 --- a/arch/arm64/mm/Makefile +++ b/arch/arm64/mm/Makefile @@ -1,5 +1,5 @@ obj-y := dma-mapping.o extable.o fault.o init.o \ cache.o copypage.o flush.o \ ioremap.o mmap.o pgd.o mmu.o \ - context.o tlb.o proc.o + context.o proc.o obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o diff --git a/arch/arm64/mm/tlb.S b/arch/arm64/mm/tlb.S deleted file mode 100644 index 19da91e0cd27..000000000000 --- a/arch/arm64/mm/tlb.S +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Based on arch/arm/mm/tlb.S - * - * Copyright (C) 1997-2002 Russell King - * Copyright (C) 2012 ARM Ltd. - * Written by Catalin Marinas - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ -#include -#include -#include -#include -#include -#include "proc-macros.S" - -/* - * __cpu_flush_user_tlb_range(start, end, vma) - * - * Invalidate a range of TLB entries in the specified address space. - * - * - start - start address (may not be aligned) - * - end - end address (exclusive, may not be aligned) - * - vma - vma_struct describing address range - */ -ENTRY(__cpu_flush_user_tlb_range) - vma_vm_mm x3, x2 // get vma->vm_mm - mmid w3, x3 // get vm_mm->context.id - dsb sy - lsr x0, x0, #12 // align address - lsr x1, x1, #12 - bfi x0, x3, #48, #16 // start VA and ASID - bfi x1, x3, #48, #16 // end VA and ASID -1: tlbi vae1is, x0 // TLB invalidate by address and ASID - add x0, x0, #1 - cmp x0, x1 - b.lo 1b - dsb sy - ret -ENDPROC(__cpu_flush_user_tlb_range) - -/* - * __cpu_flush_kern_tlb_range(start,end) - * - * Invalidate a range of kernel TLB entries. - * - * - start - start address (may not be aligned) - * - end - end address (exclusive, may not be aligned) - */ -ENTRY(__cpu_flush_kern_tlb_range) - dsb sy - lsr x0, x0, #12 // align address - lsr x1, x1, #12 -1: tlbi vaae1is, x0 // TLB invalidate by address - add x0, x0, #1 - cmp x0, x1 - b.lo 1b - dsb sy - isb - ret -ENDPROC(__cpu_flush_kern_tlb_range) From 6dca4f12e885d399d7a6fb9ab396981153886f9e Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Mon, 9 Jun 2014 11:55:03 +0100 Subject: [PATCH 1078/1368] arm64: Fix barriers used for page table modifications The architecture specification states that both DSB and ISB are required between page table modifications and subsequent memory accesses using the corresponding virtual address. When TLB invalidation takes place, the tlb_flush_* functions already have the necessary barriers. However, there are other functions like create_mapping() for which this is not the case. The patch adds the DSB+ISB instructions in the set_pte() function for valid kernel mappings. The invalid pte case is handled by tlb_flush_* and the user mappings in general have a corresponding update_mmu_cache() call containing a DSB. Even when update_mmu_cache() isn't called, the kernel can still cope with an unlikely spurious page fault by re-executing the instruction. In addition, the set_pmd, set_pud() functions gain an ISB for architecture compliance when block mappings are created. Signed-off-by: Catalin Marinas Reported-by: Leif Lindholm Acked-by: Steve Capper Cc: Will Deacon (cherry picked from commit 54d6ba0ede61f12b2a03d74bdbf004719a9cfefc) Signed-off-by: Mark Brown --- arch/arm64/include/asm/cacheflush.h | 11 +---------- arch/arm64/include/asm/pgtable.h | 13 +++++++++++++ arch/arm64/include/asm/tlbflush.h | 5 +++-- 3 files changed, 17 insertions(+), 12 deletions(-) diff --git a/arch/arm64/include/asm/cacheflush.h b/arch/arm64/include/asm/cacheflush.h index a5176cf32dad..f2defe1c380c 100644 --- a/arch/arm64/include/asm/cacheflush.h +++ b/arch/arm64/include/asm/cacheflush.h @@ -138,19 +138,10 @@ static inline void __flush_icache_all(void) #define flush_icache_page(vma,page) do { } while (0) /* - * flush_cache_vmap() is used when creating mappings (eg, via vmap, - * vmalloc, ioremap etc) in kernel space for pages. On non-VIPT - * caches, since the direct-mappings of these pages may contain cached - * data, we need to do a full cache flush to ensure that writebacks - * don't corrupt data placed into these pages via the new mappings. + * Not required on AArch64 (PIPT or VIPT non-aliasing D-cache). */ static inline void flush_cache_vmap(unsigned long start, unsigned long end) { - /* - * set_pte_at() called from vmap_pte_range() does not - * have a DSB after cleaning the cache line. - */ - dsb(ish); } static inline void flush_cache_vunmap(unsigned long start, unsigned long end) diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index b2e7867e3290..225f0398b208 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -138,6 +138,8 @@ extern struct page *empty_zero_page; #define pte_valid_user(pte) \ ((pte_val(pte) & (PTE_VALID | PTE_USER)) == (PTE_VALID | PTE_USER)) +#define pte_valid_not_user(pte) \ + ((pte_val(pte) & (PTE_VALID | PTE_USER)) == PTE_VALID) static inline pte_t pte_wrprotect(pte_t pte) { @@ -184,6 +186,15 @@ static inline pte_t pte_mkspecial(pte_t pte) static inline void set_pte(pte_t *ptep, pte_t pte) { *ptep = pte; + + /* + * Only if the new pte is valid and kernel, otherwise TLB maintenance + * or update_mmu_cache() have the necessary barriers. + */ + if (pte_valid_not_user(pte)) { + dsb(ishst); + isb(); + } } extern void __sync_icache_dcache(pte_t pteval, unsigned long addr); @@ -307,6 +318,7 @@ static inline void set_pmd(pmd_t *pmdp, pmd_t pmd) { *pmdp = pmd; dsb(ishst); + isb(); } static inline void pmd_clear(pmd_t *pmdp) @@ -337,6 +349,7 @@ static inline void set_pud(pud_t *pudp, pud_t pud) { *pudp = pud; dsb(ishst); + isb(); } static inline void pud_clear(pud_t *pudp) diff --git a/arch/arm64/include/asm/tlbflush.h b/arch/arm64/include/asm/tlbflush.h index b9349c4513ea..3796ea6bb734 100644 --- a/arch/arm64/include/asm/tlbflush.h +++ b/arch/arm64/include/asm/tlbflush.h @@ -122,6 +122,7 @@ static inline void flush_tlb_kernel_range(unsigned long start, unsigned long end for (addr = start; addr < end; addr += 1 << (PAGE_SHIFT - 12)) asm("tlbi vaae1is, %0" : : "r"(addr)); dsb(ish); + isb(); } /* @@ -131,8 +132,8 @@ static inline void update_mmu_cache(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep) { /* - * set_pte() does not have a DSB, so make sure that the page table - * write is visible. + * set_pte() does not have a DSB for user mappings, so make sure that + * the page table write is visible. */ dsb(ishst); } From a95eca36ffbf8891d1e1a20d5c4a237f6d8f6a1f Mon Sep 17 00:00:00 2001 From: Steve Capper Date: Wed, 2 Jul 2014 11:46:23 +0100 Subject: [PATCH 1079/1368] arm64: mm: Make icache synchronisation logic huge page aware The __sync_icache_dcache routine will only flush the dcache for the first page of a compound page, potentially leading to stale icache data residing further on in a hugetlb page. This patch addresses this issue by taking into consideration the order of the page when flushing the dcache. Reported-by: Mark Brown Tested-by: Mark Brown Signed-off-by: Steve Capper Acked-by: Will Deacon Signed-off-by: Catalin Marinas Cc: # v3.11+ (cherry picked from commit 923b8f5044da753e4985ab15c1374ced2cdf616c) Signed-off-by: Mark Brown --- arch/arm64/mm/flush.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/arm64/mm/flush.c b/arch/arm64/mm/flush.c index e4193e3adc7f..0d64089d28b5 100644 --- a/arch/arm64/mm/flush.c +++ b/arch/arm64/mm/flush.c @@ -79,7 +79,8 @@ void __sync_icache_dcache(pte_t pte, unsigned long addr) return; if (!test_and_set_bit(PG_dcache_clean, &page->flags)) { - __flush_dcache_area(page_address(page), PAGE_SIZE); + __flush_dcache_area(page_address(page), + PAGE_SIZE << compound_order(page)); __flush_icache_all(); } else if (icache_is_aivivt()) { __flush_icache_all(); From d0d4907e3550caf434e2ffa862676c98d9975b7f Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 4 Feb 2014 12:29:12 +0000 Subject: [PATCH 1080/1368] arm64: atomics: fix use of acquire + release for full barrier semantics Linux requires a number of atomic operations to provide full barrier semantics, that is no memory accesses after the operation can be observed before any accesses up to and including the operation in program order. On arm64, these operations have been incorrectly implemented as follows: // A, B, C are independent memory locations // atomic_op (B) 1: ldaxr x0, [B] // Exclusive load with acquire stlxr w1, x0, [B] // Exclusive store with release cbnz w1, 1b The assumption here being that two half barriers are equivalent to a full barrier, so the only permitted ordering would be A -> B -> C (where B is the atomic operation involving both a load and a store). Unfortunately, this is not the case by the letter of the architecture and, in fact, the accesses to A and C are permitted to pass their nearest half barrier resulting in orderings such as Bl -> A -> C -> Bs or Bl -> C -> A -> Bs (where Bl is the load-acquire on B and Bs is the store-release on B). This is a clear violation of the full barrier requirement. The simple way to fix this is to implement the same algorithm as ARMv7 using explicit barriers: // atomic_op (B) dmb ish // Full barrier 1: ldxr x0, [B] // Exclusive load stxr w1, x0, [B] // Exclusive store cbnz w1, 1b dmb ish // Full barrier but this has the undesirable effect of introducing *two* full barrier instructions. A better approach is actually the following, non-intuitive sequence: // atomic_op (B) 1: ldxr x0, [B] // Exclusive load stlxr w1, x0, [B] // Exclusive store with release cbnz w1, 1b dmb ish // Full barrier The simple observations here are: - The dmb ensures that no subsequent accesses (e.g. the access to C) can enter or pass the atomic sequence. - The dmb also ensures that no prior accesses (e.g. the access to A) can pass the atomic sequence. - Therefore, no prior access can pass a subsequent access, or vice-versa (i.e. A is strictly ordered before C). - The stlxr ensures that no prior access can pass the store component of the atomic operation. The only tricky part remaining is the ordering between the ldxr and the access to A, since the absence of the first dmb means that we're now permitting re-ordering between the ldxr and any prior accesses. From an (arbitrary) observer's point of view, there are two scenarios: 1. We have observed the ldxr. This means that if we perform a store to [B], the ldxr will still return older data. If we can observe the ldxr, then we can potentially observe the permitted re-ordering with the access to A, which is clearly an issue when compared to the dmb variant of the code. Thankfully, the exclusive monitor will save us here since it will be cleared as a result of the store and the ldxr will retry. Notice that any use of a later memory observation to imply observation of the ldxr will also imply observation of the access to A, since the stlxr/dmb ensure strict ordering. 2. We have not observed the ldxr. This means we can perform a store and influence the later ldxr. However, that doesn't actually tell us anything about the access to [A], so we've not lost anything here either when compared to the dmb variant. This patch implements this solution for our barriered atomic operations, ensuring that we satisfy the full barrier requirements where they are needed. Cc: Cc: Peter Zijlstra Signed-off-by: Will Deacon Signed-off-by: Catalin Marinas (cherry picked from commit 8e86f0b409a44193f1587e87b69c5dcf8f65be67) Signed-off-by: Mark Brown --- arch/arm64/include/asm/atomic.h | 29 ++++++++++++++++++++--------- arch/arm64/include/asm/cmpxchg.h | 9 +++++---- arch/arm64/include/asm/futex.h | 6 ++++-- arch/arm64/kernel/kuser32.S | 6 ++++-- arch/arm64/lib/bitops.S | 3 ++- 5 files changed, 35 insertions(+), 18 deletions(-) diff --git a/arch/arm64/include/asm/atomic.h b/arch/arm64/include/asm/atomic.h index 736c5916d367..a049bf7f5150 100644 --- a/arch/arm64/include/asm/atomic.h +++ b/arch/arm64/include/asm/atomic.h @@ -63,7 +63,7 @@ static inline int atomic_add_return(int i, atomic_t *v) int result; asm volatile("// atomic_add_return\n" -"1: ldaxr %w0, %2\n" +"1: ldxr %w0, %2\n" " add %w0, %w0, %w3\n" " stlxr %w1, %w0, %2\n" " cbnz %w1, 1b" @@ -71,6 +71,7 @@ static inline int atomic_add_return(int i, atomic_t *v) : "Ir" (i) : "memory"); + smp_mb(); return result; } @@ -94,7 +95,7 @@ static inline int atomic_sub_return(int i, atomic_t *v) int result; asm volatile("// atomic_sub_return\n" -"1: ldaxr %w0, %2\n" +"1: ldxr %w0, %2\n" " sub %w0, %w0, %w3\n" " stlxr %w1, %w0, %2\n" " cbnz %w1, 1b" @@ -102,6 +103,7 @@ static inline int atomic_sub_return(int i, atomic_t *v) : "Ir" (i) : "memory"); + smp_mb(); return result; } @@ -110,17 +112,20 @@ static inline int atomic_cmpxchg(atomic_t *ptr, int old, int new) unsigned long tmp; int oldval; + smp_mb(); + asm volatile("// atomic_cmpxchg\n" -"1: ldaxr %w1, %2\n" +"1: ldxr %w1, %2\n" " cmp %w1, %w3\n" " b.ne 2f\n" -" stlxr %w0, %w4, %2\n" +" stxr %w0, %w4, %2\n" " cbnz %w0, 1b\n" "2:" : "=&r" (tmp), "=&r" (oldval), "+Q" (ptr->counter) : "Ir" (old), "r" (new) : "cc"); + smp_mb(); return oldval; } @@ -194,7 +199,7 @@ static inline long atomic64_add_return(long i, atomic64_t *v) unsigned long tmp; asm volatile("// atomic64_add_return\n" -"1: ldaxr %0, %2\n" +"1: ldxr %0, %2\n" " add %0, %0, %3\n" " stlxr %w1, %0, %2\n" " cbnz %w1, 1b" @@ -202,6 +207,7 @@ static inline long atomic64_add_return(long i, atomic64_t *v) : "Ir" (i) : "memory"); + smp_mb(); return result; } @@ -225,7 +231,7 @@ static inline long atomic64_sub_return(long i, atomic64_t *v) unsigned long tmp; asm volatile("// atomic64_sub_return\n" -"1: ldaxr %0, %2\n" +"1: ldxr %0, %2\n" " sub %0, %0, %3\n" " stlxr %w1, %0, %2\n" " cbnz %w1, 1b" @@ -233,6 +239,7 @@ static inline long atomic64_sub_return(long i, atomic64_t *v) : "Ir" (i) : "memory"); + smp_mb(); return result; } @@ -241,17 +248,20 @@ static inline long atomic64_cmpxchg(atomic64_t *ptr, long old, long new) long oldval; unsigned long res; + smp_mb(); + asm volatile("// atomic64_cmpxchg\n" -"1: ldaxr %1, %2\n" +"1: ldxr %1, %2\n" " cmp %1, %3\n" " b.ne 2f\n" -" stlxr %w0, %4, %2\n" +" stxr %w0, %4, %2\n" " cbnz %w0, 1b\n" "2:" : "=&r" (res), "=&r" (oldval), "+Q" (ptr->counter) : "Ir" (old), "r" (new) : "cc"); + smp_mb(); return oldval; } @@ -263,11 +273,12 @@ static inline long atomic64_dec_if_positive(atomic64_t *v) unsigned long tmp; asm volatile("// atomic64_dec_if_positive\n" -"1: ldaxr %0, %2\n" +"1: ldxr %0, %2\n" " subs %0, %0, #1\n" " b.mi 2f\n" " stlxr %w1, %0, %2\n" " cbnz %w1, 1b\n" +" dmb ish\n" "2:" : "=&r" (result), "=&r" (tmp), "+Q" (v->counter) : diff --git a/arch/arm64/include/asm/cmpxchg.h b/arch/arm64/include/asm/cmpxchg.h index 0a234d0a41d0..d3d4089d9fc3 100644 --- a/arch/arm64/include/asm/cmpxchg.h +++ b/arch/arm64/include/asm/cmpxchg.h @@ -29,7 +29,7 @@ static inline unsigned long __xchg(unsigned long x, volatile void *ptr, int size switch (size) { case 1: asm volatile("// __xchg1\n" - "1: ldaxrb %w0, %2\n" + "1: ldxrb %w0, %2\n" " stlxrb %w1, %w3, %2\n" " cbnz %w1, 1b\n" : "=&r" (ret), "=&r" (tmp), "+Q" (*(u8 *)ptr) @@ -38,7 +38,7 @@ static inline unsigned long __xchg(unsigned long x, volatile void *ptr, int size break; case 2: asm volatile("// __xchg2\n" - "1: ldaxrh %w0, %2\n" + "1: ldxrh %w0, %2\n" " stlxrh %w1, %w3, %2\n" " cbnz %w1, 1b\n" : "=&r" (ret), "=&r" (tmp), "+Q" (*(u16 *)ptr) @@ -47,7 +47,7 @@ static inline unsigned long __xchg(unsigned long x, volatile void *ptr, int size break; case 4: asm volatile("// __xchg4\n" - "1: ldaxr %w0, %2\n" + "1: ldxr %w0, %2\n" " stlxr %w1, %w3, %2\n" " cbnz %w1, 1b\n" : "=&r" (ret), "=&r" (tmp), "+Q" (*(u32 *)ptr) @@ -56,7 +56,7 @@ static inline unsigned long __xchg(unsigned long x, volatile void *ptr, int size break; case 8: asm volatile("// __xchg8\n" - "1: ldaxr %0, %2\n" + "1: ldxr %0, %2\n" " stlxr %w1, %3, %2\n" " cbnz %w1, 1b\n" : "=&r" (ret), "=&r" (tmp), "+Q" (*(u64 *)ptr) @@ -67,6 +67,7 @@ static inline unsigned long __xchg(unsigned long x, volatile void *ptr, int size BUILD_BUG(); } + smp_mb(); return ret; } diff --git a/arch/arm64/include/asm/futex.h b/arch/arm64/include/asm/futex.h index 6230baba7869..5f750dc96e0f 100644 --- a/arch/arm64/include/asm/futex.h +++ b/arch/arm64/include/asm/futex.h @@ -24,10 +24,11 @@ #define __futex_atomic_op(insn, ret, oldval, uaddr, tmp, oparg) \ asm volatile( \ -"1: ldaxr %w1, %2\n" \ +"1: ldxr %w1, %2\n" \ insn "\n" \ "2: stlxr %w3, %w0, %2\n" \ " cbnz %w3, 1b\n" \ +" dmb ish\n" \ "3:\n" \ " .pushsection .fixup,\"ax\"\n" \ " .align 2\n" \ @@ -111,11 +112,12 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, return -EFAULT; asm volatile("// futex_atomic_cmpxchg_inatomic\n" -"1: ldaxr %w1, %2\n" +"1: ldxr %w1, %2\n" " sub %w3, %w1, %w4\n" " cbnz %w3, 3f\n" "2: stlxr %w3, %w5, %2\n" " cbnz %w3, 1b\n" +" dmb ish\n" "3:\n" " .pushsection .fixup,\"ax\"\n" "4: mov %w0, %w6\n" diff --git a/arch/arm64/kernel/kuser32.S b/arch/arm64/kernel/kuser32.S index f2754710f5e9..87542397b3ac 100644 --- a/arch/arm64/kernel/kuser32.S +++ b/arch/arm64/kernel/kuser32.S @@ -35,12 +35,13 @@ __kuser_cmpxchg64: // 0xffff0f60 .inst 0xe92d00f0 // push {r4, r5, r6, r7} .inst 0xe1c040d0 // ldrd r4, r5, [r0] .inst 0xe1c160d0 // ldrd r6, r7, [r1] - .inst 0xe1b20e9f // 1: ldaexd r0, r1, [r2] + .inst 0xe1b20f9f // 1: ldrexd r0, r1, [r2] .inst 0xe0303004 // eors r3, r0, r4 .inst 0x00313005 // eoreqs r3, r1, r5 .inst 0x01a23e96 // stlexdeq r3, r6, [r2] .inst 0x03330001 // teqeq r3, #1 .inst 0x0afffff9 // beq 1b + .inst 0xf57ff05b // dmb ish .inst 0xe2730000 // rsbs r0, r3, #0 .inst 0xe8bd00f0 // pop {r4, r5, r6, r7} .inst 0xe12fff1e // bx lr @@ -52,11 +53,12 @@ __kuser_memory_barrier: // 0xffff0fa0 .align 5 __kuser_cmpxchg: // 0xffff0fc0 - .inst 0xe1923e9f // 1: ldaex r3, [r2] + .inst 0xe1923f9f // 1: ldrex r3, [r2] .inst 0xe0533000 // subs r3, r3, r0 .inst 0x01823e91 // stlexeq r3, r1, [r2] .inst 0x03330001 // teqeq r3, #1 .inst 0x0afffffa // beq 1b + .inst 0xf57ff05b // dmb ish .inst 0xe2730000 // rsbs r0, r3, #0 .inst 0xe12fff1e // bx lr diff --git a/arch/arm64/lib/bitops.S b/arch/arm64/lib/bitops.S index e5db797790d3..7dac371cc9a2 100644 --- a/arch/arm64/lib/bitops.S +++ b/arch/arm64/lib/bitops.S @@ -46,11 +46,12 @@ ENTRY( \name ) mov x2, #1 add x1, x1, x0, lsr #3 // Get word offset lsl x4, x2, x3 // Create mask -1: ldaxr x2, [x1] +1: ldxr x2, [x1] lsr x0, x2, x3 // Save old value of bit \instr x2, x2, x4 // toggle bit stlxr w5, x2, [x1] cbnz w5, 1b + dmb ish and x0, x0, #1 3: ret ENDPROC(\name ) From 91e8ffa66ead9f85ff86bdcb03042ada441acf78 Mon Sep 17 00:00:00 2001 From: Marek Szyprowski Date: Fri, 28 Feb 2014 14:42:55 +0100 Subject: [PATCH 1081/1368] arm64: add support for reserved memory defined by device tree Enable reserved memory initialization from device tree. Signed-off-by: Marek Szyprowski Acked-by: Catalin Marinas Signed-off-by: Grant Likely (cherry picked from commit 9bf14b7c540ae9ca7747af3a0c0d8470ef77b6ce) Signed-off-by: Mark Brown --- arch/arm64/Kconfig | 1 + arch/arm64/mm/init.c | 1 + 2 files changed, 2 insertions(+) diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 21201a0cb3f7..43068cf44c2d 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -41,6 +41,7 @@ config ARM64 select NO_BOOTMEM select OF select OF_EARLY_FLATTREE + select OF_RESERVED_MEM select PERF_USE_VMALLOC select POWER_RESET select POWER_SUPPLY diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index 52806427e15d..81bdd29df3a4 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -170,6 +170,7 @@ void __init arm64_memblock_init(void) memblock_reserve(base, size); } + early_init_fdt_scan_reserved_mem(); dma_contiguous_reserve(0); memblock_allow_resize(); From 5cb100b38a7bf5bc5e892fa46998ebcbab925f37 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Sun, 5 May 2013 20:03:40 +0800 Subject: [PATCH 1082/1368] KVM: add missing misc_deregister() on error in kvm_init() Add the missing misc_deregister() before return from kvm_init() in the debugfs init error handling case. Signed-off-by: Wei Yongjun Signed-off-by: Gleb Natapov (cherry picked from commit afc2f792cdcb67f4257f0e68d10ee4a7b7eae57a) Signed-off-by: Christoffer Dall --- virt/kvm/kvm_main.c | 1 + 1 file changed, 1 insertion(+) diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 302681c4aa44..b547a1ceecbc 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -3181,6 +3181,7 @@ int kvm_init(void *opaque, unsigned vcpu_size, unsigned vcpu_align, out_undebugfs: unregister_syscore_ops(&kvm_syscore_ops); + misc_deregister(&kvm_dev); out_unreg: kvm_async_pf_deinit(); out_free: From 86882a4950ff8f054d0b89bb36655446a04b89c0 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 14 May 2013 14:31:01 +0100 Subject: [PATCH 1083/1368] ARM: KVM: move GIC/timer code to a common location As KVM/arm64 is looming on the horizon, it makes sense to move some of the common code to a single location in order to reduce duplication. The code could live anywhere. Actually, most of KVM is already built with a bunch of ugly ../../.. hacks in the various Makefiles, so we're not exactly talking about style here. But maybe it is time to start moving into a less ugly direction. The include files must be in a "public" location, as they are accessed from non-KVM files (arch/arm/kernel/asm-offsets.c). For this purpose, introduce two new locations: - virt/kvm/arm/ : x86 and ia64 already share the ioapic code in virt/kvm, so this could be seen as a (very ugly) precedent. - include/kvm/ : there is already an include/xen, and while the intent is slightly different, this seems as good a location as any Eventually, we should probably have independant Makefiles at every levels (just like everywhere else in the kernel), but this is just the first step. Signed-off-by: Marc Zyngier Signed-off-by: Gleb Natapov (cherry picked from commit 7275acdfe29ba03ad2f6e150386900c4e2d43fb1) Signed-off-by: Christoffer Dall --- arch/arm/include/asm/kvm_host.h | 4 ++-- arch/arm/kvm/Makefile | 7 ++++--- .../asm/kvm_arch_timer.h => include/kvm/arm_arch_timer.h | 0 arch/arm/include/asm/kvm_vgic.h => include/kvm/arm_vgic.h | 0 {arch/arm/kvm => virt/kvm/arm}/arch_timer.c | 4 ++-- {arch/arm/kvm => virt/kvm/arm}/vgic.c | 0 6 files changed, 8 insertions(+), 7 deletions(-) rename arch/arm/include/asm/kvm_arch_timer.h => include/kvm/arm_arch_timer.h (100%) rename arch/arm/include/asm/kvm_vgic.h => include/kvm/arm_vgic.h (100%) rename {arch/arm/kvm => virt/kvm/arm}/arch_timer.c (99%) rename {arch/arm/kvm => virt/kvm/arm}/vgic.c (100%) diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h index 57cb786a6203..ff5aaf10e6ec 100644 --- a/arch/arm/include/asm/kvm_host.h +++ b/arch/arm/include/asm/kvm_host.h @@ -23,7 +23,7 @@ #include #include #include -#include +#include #define KVM_MAX_VCPUS CONFIG_KVM_ARM_MAX_VCPUS #define KVM_USER_MEM_SLOTS 32 @@ -38,7 +38,7 @@ #define KVM_NR_PAGE_SIZES 1 #define KVM_PAGES_PER_HPAGE(x) (1UL<<31) -#include +#include struct kvm_vcpu; u32 *kvm_vcpu_reg(struct kvm_vcpu *vcpu, u8 reg_num, u32 mode); diff --git a/arch/arm/kvm/Makefile b/arch/arm/kvm/Makefile index 53c5ed83d16f..9184a491d172 100644 --- a/arch/arm/kvm/Makefile +++ b/arch/arm/kvm/Makefile @@ -14,10 +14,11 @@ CFLAGS_mmu.o := -I. AFLAGS_init.o := -Wa,-march=armv7-a$(plus_virt) AFLAGS_interrupts.o := -Wa,-march=armv7-a$(plus_virt) -kvm-arm-y = $(addprefix ../../../virt/kvm/, kvm_main.o coalesced_mmio.o) +KVM := ../../../virt/kvm +kvm-arm-y = $(addprefix $(KVM)/, kvm_main.o coalesced_mmio.o) obj-y += kvm-arm.o init.o interrupts.o obj-y += arm.o handle_exit.o guest.o mmu.o emulate.o reset.o obj-y += coproc.o coproc_a15.o mmio.o psci.o perf.o -obj-$(CONFIG_KVM_ARM_VGIC) += vgic.o -obj-$(CONFIG_KVM_ARM_TIMER) += arch_timer.o +obj-$(CONFIG_KVM_ARM_VGIC) += $(KVM)/arm/vgic.o +obj-$(CONFIG_KVM_ARM_TIMER) += $(KVM)/arm/arch_timer.o diff --git a/arch/arm/include/asm/kvm_arch_timer.h b/include/kvm/arm_arch_timer.h similarity index 100% rename from arch/arm/include/asm/kvm_arch_timer.h rename to include/kvm/arm_arch_timer.h diff --git a/arch/arm/include/asm/kvm_vgic.h b/include/kvm/arm_vgic.h similarity index 100% rename from arch/arm/include/asm/kvm_vgic.h rename to include/kvm/arm_vgic.h diff --git a/arch/arm/kvm/arch_timer.c b/virt/kvm/arm/arch_timer.c similarity index 99% rename from arch/arm/kvm/arch_timer.c rename to virt/kvm/arm/arch_timer.c index c55b6089e923..2d00b2925780 100644 --- a/arch/arm/kvm/arch_timer.c +++ b/virt/kvm/arm/arch_timer.c @@ -25,8 +25,8 @@ #include #include -#include -#include +#include +#include static struct timecounter *timecounter; static struct workqueue_struct *wqueue; diff --git a/arch/arm/kvm/vgic.c b/virt/kvm/arm/vgic.c similarity index 100% rename from arch/arm/kvm/vgic.c rename to virt/kvm/arm/vgic.c From 7ce8a35ca784be99a50520476c1a34aad2755b7a Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 14 May 2013 14:31:02 +0100 Subject: [PATCH 1084/1368] KVM: get rid of $(addprefix ../../../virt/kvm/, ...) in Makefiles As requested by the KVM maintainers, remove the addprefix used to refer to the main KVM code from the arch code, and replace it with a KVM variable that does the same thing. Tested-by: Christian Borntraeger Cc: Paolo Bonzini Cc: Gleb Natapov Cc: Christoffer Dall Acked-by: Xiantao Zhang Cc: Tony Luck Cc: Fenghua Yu Cc: Alexander Graf Cc: Benjamin Herrenschmidt Cc: Christian Borntraeger Cc: Cornelia Huck Signed-off-by: Marc Zyngier Signed-off-by: Gleb Natapov (cherry picked from commit 535cf7b3b13c7faed3dfabafb6598417de1129ca) Signed-off-by: Christoffer Dall --- arch/arm/kvm/Makefile | 2 +- arch/ia64/kvm/Makefile | 7 ++++--- arch/powerpc/kvm/Makefile | 13 +++++++------ arch/s390/kvm/Makefile | 3 ++- arch/x86/kvm/Makefile | 13 +++++++------ 5 files changed, 21 insertions(+), 17 deletions(-) diff --git a/arch/arm/kvm/Makefile b/arch/arm/kvm/Makefile index 9184a491d172..d99bee4950e5 100644 --- a/arch/arm/kvm/Makefile +++ b/arch/arm/kvm/Makefile @@ -15,7 +15,7 @@ AFLAGS_init.o := -Wa,-march=armv7-a$(plus_virt) AFLAGS_interrupts.o := -Wa,-march=armv7-a$(plus_virt) KVM := ../../../virt/kvm -kvm-arm-y = $(addprefix $(KVM)/, kvm_main.o coalesced_mmio.o) +kvm-arm-y = $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o obj-y += kvm-arm.o init.o interrupts.o obj-y += arm.o handle_exit.o guest.o mmu.o emulate.o reset.o diff --git a/arch/ia64/kvm/Makefile b/arch/ia64/kvm/Makefile index 1a4053789d01..18e45ec49bbf 100644 --- a/arch/ia64/kvm/Makefile +++ b/arch/ia64/kvm/Makefile @@ -47,12 +47,13 @@ FORCE : $(obj)/$(offsets-file) ccflags-y := -Ivirt/kvm -Iarch/ia64/kvm/ asflags-y := -Ivirt/kvm -Iarch/ia64/kvm/ +KVM := ../../../virt/kvm -common-objs = $(addprefix ../../../virt/kvm/, kvm_main.o ioapic.o \ - coalesced_mmio.o irq_comm.o) +common-objs = $(KVM)/kvm_main.o $(KVM)/ioapic.o \ + $(KVM)/coalesced_mmio.o $(KVM)/irq_comm.o ifeq ($(CONFIG_KVM_DEVICE_ASSIGNMENT),y) -common-objs += $(addprefix ../../../virt/kvm/, assigned-dev.o iommu.o) +common-objs += $(KVM)/assigned-dev.o $(KVM)/iommu.o endif kvm-objs := $(common-objs) kvm-ia64.o kvm_fw.o diff --git a/arch/powerpc/kvm/Makefile b/arch/powerpc/kvm/Makefile index 422de3f4d46c..008cd856c5b5 100644 --- a/arch/powerpc/kvm/Makefile +++ b/arch/powerpc/kvm/Makefile @@ -5,9 +5,10 @@ subdir-ccflags-$(CONFIG_PPC_WERROR) := -Werror ccflags-y := -Ivirt/kvm -Iarch/powerpc/kvm +KVM := ../../../virt/kvm -common-objs-y = $(addprefix ../../../virt/kvm/, kvm_main.o coalesced_mmio.o \ - eventfd.o) +common-objs-y = $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o \ + $(KVM)/eventfd.o CFLAGS_44x_tlb.o := -I. CFLAGS_e500_mmu.o := -I. @@ -53,7 +54,7 @@ kvm-e500mc-objs := \ kvm-objs-$(CONFIG_KVM_E500MC) := $(kvm-e500mc-objs) kvm-book3s_64-objs-$(CONFIG_KVM_BOOK3S_64_PR) := \ - ../../../virt/kvm/coalesced_mmio.o \ + $(KVM)/coalesced_mmio.o \ fpu.o \ book3s_paired_singles.o \ book3s_pr.o \ @@ -86,8 +87,8 @@ kvm-book3s_64-objs-$(CONFIG_KVM_XICS) += \ book3s_xics.o kvm-book3s_64-module-objs := \ - ../../../virt/kvm/kvm_main.o \ - ../../../virt/kvm/eventfd.o \ + $(KVM)/kvm_main.o \ + $(KVM)/eventfd.o \ powerpc.o \ emulate.o \ book3s.o \ @@ -111,7 +112,7 @@ kvm-book3s_32-objs := \ kvm-objs-$(CONFIG_KVM_BOOK3S_32) := $(kvm-book3s_32-objs) kvm-objs-$(CONFIG_KVM_MPIC) += mpic.o -kvm-objs-$(CONFIG_HAVE_KVM_IRQ_ROUTING) += $(addprefix ../../../virt/kvm/, irqchip.o) +kvm-objs-$(CONFIG_HAVE_KVM_IRQ_ROUTING) += $(KVM)/irqchip.o kvm-objs := $(kvm-objs-m) $(kvm-objs-y) diff --git a/arch/s390/kvm/Makefile b/arch/s390/kvm/Makefile index 8fe9d65a4585..40b4c6470f88 100644 --- a/arch/s390/kvm/Makefile +++ b/arch/s390/kvm/Makefile @@ -6,7 +6,8 @@ # it under the terms of the GNU General Public License (version 2 only) # as published by the Free Software Foundation. -common-objs = $(addprefix ../../../virt/kvm/, kvm_main.o eventfd.o) +KVM := ../../../virt/kvm +common-objs = $(KVM)/kvm_main.o $(KVM)/eventfd.o ccflags-y := -Ivirt/kvm -Iarch/s390/kvm diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile index d609e1d84048..bf4fb04d0112 100644 --- a/arch/x86/kvm/Makefile +++ b/arch/x86/kvm/Makefile @@ -5,12 +5,13 @@ CFLAGS_x86.o := -I. CFLAGS_svm.o := -I. CFLAGS_vmx.o := -I. -kvm-y += $(addprefix ../../../virt/kvm/, kvm_main.o ioapic.o \ - coalesced_mmio.o irq_comm.o eventfd.o \ - irqchip.o) -kvm-$(CONFIG_KVM_DEVICE_ASSIGNMENT) += $(addprefix ../../../virt/kvm/, \ - assigned-dev.o iommu.o) -kvm-$(CONFIG_KVM_ASYNC_PF) += $(addprefix ../../../virt/kvm/, async_pf.o) +KVM := ../../../virt/kvm + +kvm-y += $(KVM)/kvm_main.o $(KVM)/ioapic.o \ + $(KVM)/coalesced_mmio.o $(KVM)/irq_comm.o \ + $(KVM)/eventfd.o $(KVM)/irqchip.o +kvm-$(CONFIG_KVM_DEVICE_ASSIGNMENT) += $(KVM)/assigned-dev.o $(KVM)/iommu.o +kvm-$(CONFIG_KVM_ASYNC_PF) += $(KVM)/async_pf.o kvm-y += x86.o mmu.o emulate.o i8259.o irq.o lapic.o \ i8254.o cpuid.o pmu.o From 9861d210e02d5fb6ffcd5ea992f62043105813b3 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Tue, 26 Mar 2013 13:41:35 +0000 Subject: [PATCH 1085/1368] ARM: KVM: arch_timers: zero CNTVOFF upon return to host To use the virtual counters from the host, we need to ensure that CNTVOFF doesn't change unexpectedly. When we change to a guest, we replace the host's CNTVOFF, but we don't restore it when returning to the host. As the host sets CNTVOFF to zero, and never changes it, we can simply zero CNTVOFF when returning to the host. This patch adds said zeroing to the return to host path. Signed-off-by: Mark Rutland Acked-by: Marc Zyngier Acked-by: Santosh Shilimkar Acked-by: Christoffer Dall (cherry picked from commit f793c23ebbe5afd1cabf4a42a3a297022213756f) Signed-off-by: Christoffer Dall --- arch/arm/kvm/interrupts_head.S | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/arm/kvm/interrupts_head.S b/arch/arm/kvm/interrupts_head.S index 2b44b95a86dd..6f18695a09cb 100644 --- a/arch/arm/kvm/interrupts_head.S +++ b/arch/arm/kvm/interrupts_head.S @@ -503,6 +503,10 @@ vcpu .req r0 @ vcpu pointer always in r0 add r5, vcpu, r4 strd r2, r3, [r5] + @ Ensure host CNTVCT == CNTPCT + mov r2, #0 + mcrr p15, 4, r2, r2, c14 @ CNTVOFF + 1: #endif @ Allow physical timer/counter access for the host From 16d32e6b5aad12c2566cd8c6918e1be3ecac06f5 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Fri, 7 Dec 2012 18:40:43 +0000 Subject: [PATCH 1086/1368] arm64: KVM: HYP mode idmap support Add the necessary infrastructure for identity-mapped HYP page tables. Idmap-ed code must be in the ".hyp.idmap.text" linker section. The rest of the HYP ends up in ".hyp.text". Reviewed-by: Catalin Marinas Signed-off-by: Marc Zyngier (cherry picked from commit 2240bbb697354f5617d95e3ee104ca61bb812507) Signed-off-by: Christoffer Dall --- arch/arm64/kernel/vmlinux.lds.S | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S index ce2d97255ba9..55d0e035205f 100644 --- a/arch/arm64/kernel/vmlinux.lds.S +++ b/arch/arm64/kernel/vmlinux.lds.S @@ -17,6 +17,19 @@ ENTRY(stext) jiffies = jiffies_64; +#define HYPERVISOR_TEXT \ + /* \ + * Force the alignment to be compatible with \ + * the vectors requirements \ + */ \ + . = ALIGN(2048); \ + VMLINUX_SYMBOL(__hyp_idmap_text_start) = .; \ + *(.hyp.idmap.text) \ + VMLINUX_SYMBOL(__hyp_idmap_text_end) = .; \ + VMLINUX_SYMBOL(__hyp_text_start) = .; \ + *(.hyp.text) \ + VMLINUX_SYMBOL(__hyp_text_end) = .; + SECTIONS { /* @@ -48,6 +61,7 @@ SECTIONS TEXT_TEXT SCHED_TEXT LOCK_TEXT + HYPERVISOR_TEXT *(.fixup) *(.gnu.warning) . = ALIGN(16); @@ -102,3 +116,9 @@ SECTIONS STABS_DEBUG .comment 0 : { *(.comment) } } + +/* + * The HYP init code can't be more than a page long. + */ +ASSERT(((__hyp_idmap_text_start + PAGE_SIZE) > __hyp_idmap_text_end), + "HYP init code too big") From a51ca39794b462370c0b4f46b360370f55303454 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 10 Dec 2012 10:46:47 +0000 Subject: [PATCH 1087/1368] arm64: KVM: EL2 register definitions Define all the useful bitfields for EL2 registers. Reviewed-by: Christopher Covington Reviewed-by: Catalin Marinas Signed-off-by: Marc Zyngier (cherry picked from commit 0369f6a34b9facd16eea4236518ca6f9cbc9e5ef) Signed-off-by: Christoffer Dall --- arch/arm64/include/asm/kvm_arm.h | 245 +++++++++++++++++++++++++++++++ 1 file changed, 245 insertions(+) create mode 100644 arch/arm64/include/asm/kvm_arm.h diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h new file mode 100644 index 000000000000..a5f28e2720c7 --- /dev/null +++ b/arch/arm64/include/asm/kvm_arm.h @@ -0,0 +1,245 @@ +/* + * Copyright (C) 2012,2013 - ARM Ltd + * Author: Marc Zyngier + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#ifndef __ARM64_KVM_ARM_H__ +#define __ARM64_KVM_ARM_H__ + +#include + +/* Hyp Configuration Register (HCR) bits */ +#define HCR_ID (UL(1) << 33) +#define HCR_CD (UL(1) << 32) +#define HCR_RW_SHIFT 31 +#define HCR_RW (UL(1) << HCR_RW_SHIFT) +#define HCR_TRVM (UL(1) << 30) +#define HCR_HCD (UL(1) << 29) +#define HCR_TDZ (UL(1) << 28) +#define HCR_TGE (UL(1) << 27) +#define HCR_TVM (UL(1) << 26) +#define HCR_TTLB (UL(1) << 25) +#define HCR_TPU (UL(1) << 24) +#define HCR_TPC (UL(1) << 23) +#define HCR_TSW (UL(1) << 22) +#define HCR_TAC (UL(1) << 21) +#define HCR_TIDCP (UL(1) << 20) +#define HCR_TSC (UL(1) << 19) +#define HCR_TID3 (UL(1) << 18) +#define HCR_TID2 (UL(1) << 17) +#define HCR_TID1 (UL(1) << 16) +#define HCR_TID0 (UL(1) << 15) +#define HCR_TWE (UL(1) << 14) +#define HCR_TWI (UL(1) << 13) +#define HCR_DC (UL(1) << 12) +#define HCR_BSU (3 << 10) +#define HCR_BSU_IS (UL(1) << 10) +#define HCR_FB (UL(1) << 9) +#define HCR_VA (UL(1) << 8) +#define HCR_VI (UL(1) << 7) +#define HCR_VF (UL(1) << 6) +#define HCR_AMO (UL(1) << 5) +#define HCR_IMO (UL(1) << 4) +#define HCR_FMO (UL(1) << 3) +#define HCR_PTW (UL(1) << 2) +#define HCR_SWIO (UL(1) << 1) +#define HCR_VM (UL(1) << 0) + +/* + * The bits we set in HCR: + * RW: 64bit by default, can be overriden for 32bit VMs + * TAC: Trap ACTLR + * TSC: Trap SMC + * TSW: Trap cache operations by set/way + * TWI: Trap WFI + * TIDCP: Trap L2CTLR/L2ECTLR + * BSU_IS: Upgrade barriers to the inner shareable domain + * FB: Force broadcast of all maintainance operations + * AMO: Override CPSR.A and enable signaling with VA + * IMO: Override CPSR.I and enable signaling with VI + * FMO: Override CPSR.F and enable signaling with VF + * SWIO: Turn set/way invalidates into set/way clean+invalidate + */ +#define HCR_GUEST_FLAGS (HCR_TSC | HCR_TSW | HCR_TWI | HCR_VM | HCR_BSU_IS | \ + HCR_FB | HCR_TAC | HCR_AMO | HCR_IMO | HCR_FMO | \ + HCR_SWIO | HCR_TIDCP | HCR_RW) +#define HCR_VIRT_EXCP_MASK (HCR_VA | HCR_VI | HCR_VF) + +/* Hyp System Control Register (SCTLR_EL2) bits */ +#define SCTLR_EL2_EE (1 << 25) +#define SCTLR_EL2_WXN (1 << 19) +#define SCTLR_EL2_I (1 << 12) +#define SCTLR_EL2_SA (1 << 3) +#define SCTLR_EL2_C (1 << 2) +#define SCTLR_EL2_A (1 << 1) +#define SCTLR_EL2_M 1 +#define SCTLR_EL2_FLAGS (SCTLR_EL2_M | SCTLR_EL2_A | SCTLR_EL2_C | \ + SCTLR_EL2_SA | SCTLR_EL2_I) + +/* TCR_EL2 Registers bits */ +#define TCR_EL2_TBI (1 << 20) +#define TCR_EL2_PS (7 << 16) +#define TCR_EL2_PS_40B (2 << 16) +#define TCR_EL2_TG0 (1 << 14) +#define TCR_EL2_SH0 (3 << 12) +#define TCR_EL2_ORGN0 (3 << 10) +#define TCR_EL2_IRGN0 (3 << 8) +#define TCR_EL2_T0SZ 0x3f +#define TCR_EL2_MASK (TCR_EL2_TG0 | TCR_EL2_SH0 | \ + TCR_EL2_ORGN0 | TCR_EL2_IRGN0 | TCR_EL2_T0SZ) + +#define TCR_EL2_FLAGS (TCR_EL2_PS_40B) + +/* VTCR_EL2 Registers bits */ +#define VTCR_EL2_PS_MASK (7 << 16) +#define VTCR_EL2_PS_40B (2 << 16) +#define VTCR_EL2_TG0_MASK (1 << 14) +#define VTCR_EL2_TG0_4K (0 << 14) +#define VTCR_EL2_TG0_64K (1 << 14) +#define VTCR_EL2_SH0_MASK (3 << 12) +#define VTCR_EL2_SH0_INNER (3 << 12) +#define VTCR_EL2_ORGN0_MASK (3 << 10) +#define VTCR_EL2_ORGN0_WBWA (1 << 10) +#define VTCR_EL2_IRGN0_MASK (3 << 8) +#define VTCR_EL2_IRGN0_WBWA (1 << 8) +#define VTCR_EL2_SL0_MASK (3 << 6) +#define VTCR_EL2_SL0_LVL1 (1 << 6) +#define VTCR_EL2_T0SZ_MASK 0x3f +#define VTCR_EL2_T0SZ_40B 24 + +#ifdef CONFIG_ARM64_64K_PAGES +/* + * Stage2 translation configuration: + * 40bits output (PS = 2) + * 40bits input (T0SZ = 24) + * 64kB pages (TG0 = 1) + * 2 level page tables (SL = 1) + */ +#define VTCR_EL2_FLAGS (VTCR_EL2_PS_40B | VTCR_EL2_TG0_64K | \ + VTCR_EL2_SH0_INNER | VTCR_EL2_ORGN0_WBWA | \ + VTCR_EL2_IRGN0_WBWA | VTCR_EL2_SL0_LVL1 | \ + VTCR_EL2_T0SZ_40B) +#define VTTBR_X (38 - VTCR_EL2_T0SZ_40B) +#else +/* + * Stage2 translation configuration: + * 40bits output (PS = 2) + * 40bits input (T0SZ = 24) + * 4kB pages (TG0 = 0) + * 3 level page tables (SL = 1) + */ +#define VTCR_EL2_FLAGS (VTCR_EL2_PS_40B | VTCR_EL2_TG0_4K | \ + VTCR_EL2_SH0_INNER | VTCR_EL2_ORGN0_WBWA | \ + VTCR_EL2_IRGN0_WBWA | VTCR_EL2_SL0_LVL1 | \ + VTCR_EL2_T0SZ_40B) +#define VTTBR_X (37 - VTCR_EL2_T0SZ_40B) +#endif + +#define VTTBR_BADDR_SHIFT (VTTBR_X - 1) +#define VTTBR_BADDR_MASK (((1LLU << (40 - VTTBR_X)) - 1) << VTTBR_BADDR_SHIFT) +#define VTTBR_VMID_SHIFT (48LLU) +#define VTTBR_VMID_MASK (0xffLLU << VTTBR_VMID_SHIFT) + +/* Hyp System Trap Register */ +#define HSTR_EL2_TTEE (1 << 16) +#define HSTR_EL2_T(x) (1 << x) + +/* Hyp Coprocessor Trap Register */ +#define CPTR_EL2_TCPAC (1 << 31) +#define CPTR_EL2_TTA (1 << 20) +#define CPTR_EL2_TFP (1 << 10) + +/* Hyp Debug Configuration Register bits */ +#define MDCR_EL2_TDRA (1 << 11) +#define MDCR_EL2_TDOSA (1 << 10) +#define MDCR_EL2_TDA (1 << 9) +#define MDCR_EL2_TDE (1 << 8) +#define MDCR_EL2_HPME (1 << 7) +#define MDCR_EL2_TPM (1 << 6) +#define MDCR_EL2_TPMCR (1 << 5) +#define MDCR_EL2_HPMN_MASK (0x1F) + +/* Exception Syndrome Register (ESR) bits */ +#define ESR_EL2_EC_SHIFT (26) +#define ESR_EL2_EC (0x3fU << ESR_EL2_EC_SHIFT) +#define ESR_EL2_IL (1U << 25) +#define ESR_EL2_ISS (ESR_EL2_IL - 1) +#define ESR_EL2_ISV_SHIFT (24) +#define ESR_EL2_ISV (1U << ESR_EL2_ISV_SHIFT) +#define ESR_EL2_SAS_SHIFT (22) +#define ESR_EL2_SAS (3U << ESR_EL2_SAS_SHIFT) +#define ESR_EL2_SSE (1 << 21) +#define ESR_EL2_SRT_SHIFT (16) +#define ESR_EL2_SRT_MASK (0x1f << ESR_EL2_SRT_SHIFT) +#define ESR_EL2_SF (1 << 15) +#define ESR_EL2_AR (1 << 14) +#define ESR_EL2_EA (1 << 9) +#define ESR_EL2_CM (1 << 8) +#define ESR_EL2_S1PTW (1 << 7) +#define ESR_EL2_WNR (1 << 6) +#define ESR_EL2_FSC (0x3f) +#define ESR_EL2_FSC_TYPE (0x3c) + +#define ESR_EL2_CV_SHIFT (24) +#define ESR_EL2_CV (1U << ESR_EL2_CV_SHIFT) +#define ESR_EL2_COND_SHIFT (20) +#define ESR_EL2_COND (0xfU << ESR_EL2_COND_SHIFT) + + +#define FSC_FAULT (0x04) +#define FSC_PERM (0x0c) + +/* Hyp Prefetch Fault Address Register (HPFAR/HDFAR) */ +#define HPFAR_MASK (~0xFUL) + +#define ESR_EL2_EC_UNKNOWN (0x00) +#define ESR_EL2_EC_WFI (0x01) +#define ESR_EL2_EC_CP15_32 (0x03) +#define ESR_EL2_EC_CP15_64 (0x04) +#define ESR_EL2_EC_CP14_MR (0x05) +#define ESR_EL2_EC_CP14_LS (0x06) +#define ESR_EL2_EC_FP_ASIMD (0x07) +#define ESR_EL2_EC_CP10_ID (0x08) +#define ESR_EL2_EC_CP14_64 (0x0C) +#define ESR_EL2_EC_ILL_ISS (0x0E) +#define ESR_EL2_EC_SVC32 (0x11) +#define ESR_EL2_EC_HVC32 (0x12) +#define ESR_EL2_EC_SMC32 (0x13) +#define ESR_EL2_EC_SVC64 (0x15) +#define ESR_EL2_EC_HVC64 (0x16) +#define ESR_EL2_EC_SMC64 (0x17) +#define ESR_EL2_EC_SYS64 (0x18) +#define ESR_EL2_EC_IABT (0x20) +#define ESR_EL2_EC_IABT_HYP (0x21) +#define ESR_EL2_EC_PC_ALIGN (0x22) +#define ESR_EL2_EC_DABT (0x24) +#define ESR_EL2_EC_DABT_HYP (0x25) +#define ESR_EL2_EC_SP_ALIGN (0x26) +#define ESR_EL2_EC_FP_EXC32 (0x28) +#define ESR_EL2_EC_FP_EXC64 (0x2C) +#define ESR_EL2_EC_SERRROR (0x2F) +#define ESR_EL2_EC_BREAKPT (0x30) +#define ESR_EL2_EC_BREAKPT_HYP (0x31) +#define ESR_EL2_EC_SOFTSTP (0x32) +#define ESR_EL2_EC_SOFTSTP_HYP (0x33) +#define ESR_EL2_EC_WATCHPT (0x34) +#define ESR_EL2_EC_WATCHPT_HYP (0x35) +#define ESR_EL2_EC_BKPT32 (0x38) +#define ESR_EL2_EC_VECTOR32 (0x3A) +#define ESR_EL2_EC_BRK64 (0x3C) + +#define ESR_EL2_EC_xABT_xFSR_EXTABT 0x10 + +#endif /* __ARM64_KVM_ARM_H__ */ From 44848d746b7f29803f7ac9579d594f368a1cad11 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 10 Dec 2012 11:16:40 +0000 Subject: [PATCH 1088/1368] arm64: KVM: system register definitions for 64bit guests Define the saved/restored registers for 64bit guests. Reviewed-by: Christopher Covington Reviewed-by: Catalin Marinas Signed-off-by: Marc Zyngier (cherry picked from commit fd9fc9f73cc2070d2637a7ee082800a817fd45f3) Signed-off-by: Christoffer Dall --- arch/arm64/include/asm/kvm_asm.h | 68 ++++++++++++++++++++++++++++++++ 1 file changed, 68 insertions(+) create mode 100644 arch/arm64/include/asm/kvm_asm.h diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h new file mode 100644 index 000000000000..591ac219964a --- /dev/null +++ b/arch/arm64/include/asm/kvm_asm.h @@ -0,0 +1,68 @@ +/* + * Copyright (C) 2012,2013 - ARM Ltd + * Author: Marc Zyngier + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#ifndef __ARM_KVM_ASM_H__ +#define __ARM_KVM_ASM_H__ + +/* + * 0 is reserved as an invalid value. + * Order *must* be kept in sync with the hyp switch code. + */ +#define MPIDR_EL1 1 /* MultiProcessor Affinity Register */ +#define CSSELR_EL1 2 /* Cache Size Selection Register */ +#define SCTLR_EL1 3 /* System Control Register */ +#define ACTLR_EL1 4 /* Auxilliary Control Register */ +#define CPACR_EL1 5 /* Coprocessor Access Control */ +#define TTBR0_EL1 6 /* Translation Table Base Register 0 */ +#define TTBR1_EL1 7 /* Translation Table Base Register 1 */ +#define TCR_EL1 8 /* Translation Control Register */ +#define ESR_EL1 9 /* Exception Syndrome Register */ +#define AFSR0_EL1 10 /* Auxilary Fault Status Register 0 */ +#define AFSR1_EL1 11 /* Auxilary Fault Status Register 1 */ +#define FAR_EL1 12 /* Fault Address Register */ +#define MAIR_EL1 13 /* Memory Attribute Indirection Register */ +#define VBAR_EL1 14 /* Vector Base Address Register */ +#define CONTEXTIDR_EL1 15 /* Context ID Register */ +#define TPIDR_EL0 16 /* Thread ID, User R/W */ +#define TPIDRRO_EL0 17 /* Thread ID, User R/O */ +#define TPIDR_EL1 18 /* Thread ID, Privileged */ +#define AMAIR_EL1 19 /* Aux Memory Attribute Indirection Register */ +#define CNTKCTL_EL1 20 /* Timer Control Register (EL1) */ +#define NR_SYS_REGS 21 + +#define ARM_EXCEPTION_IRQ 0 +#define ARM_EXCEPTION_TRAP 1 + +#ifndef __ASSEMBLY__ +struct kvm; +struct kvm_vcpu; + +extern char __kvm_hyp_init[]; +extern char __kvm_hyp_init_end[]; + +extern char __kvm_hyp_vector[]; + +extern char __kvm_hyp_code_start[]; +extern char __kvm_hyp_code_end[]; + +extern void __kvm_flush_vm_context(void); +extern void __kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa); + +extern int __kvm_vcpu_run(struct kvm_vcpu *vcpu); +#endif + +#endif /* __ARM_KVM_ASM_H__ */ From b04c4cdfff4b701f01f1dae2ada92b580cc281d5 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 10 Dec 2012 13:27:52 +0000 Subject: [PATCH 1089/1368] arm64: KVM: Basic ESR_EL2 helpers and vcpu register access Implements helpers for dealing with the EL2 syndrome register as well as accessing the vcpu registers. Reviewed-by: Christopher Covington Reviewed-by: Catalin Marinas Signed-off-by: Marc Zyngier (cherry picked from commit 83a4979483c8e597b69d4403794f87fea51fa549) Signed-off-by: Christoffer Dall --- arch/arm64/include/asm/kvm_emulate.h | 158 +++++++++++++++++++++++++++ 1 file changed, 158 insertions(+) create mode 100644 arch/arm64/include/asm/kvm_emulate.h diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h new file mode 100644 index 000000000000..6c1725e93b0b --- /dev/null +++ b/arch/arm64/include/asm/kvm_emulate.h @@ -0,0 +1,158 @@ +/* + * Copyright (C) 2012,2013 - ARM Ltd + * Author: Marc Zyngier + * + * Derived from arch/arm/include/kvm_emulate.h + * Copyright (C) 2012 - Virtual Open Systems and Columbia University + * Author: Christoffer Dall + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#ifndef __ARM64_KVM_EMULATE_H__ +#define __ARM64_KVM_EMULATE_H__ + +#include +#include +#include +#include +#include + +void kvm_inject_undefined(struct kvm_vcpu *vcpu); +void kvm_inject_dabt(struct kvm_vcpu *vcpu, unsigned long addr); +void kvm_inject_pabt(struct kvm_vcpu *vcpu, unsigned long addr); + +static inline unsigned long *vcpu_pc(const struct kvm_vcpu *vcpu) +{ + return (unsigned long *)&vcpu_gp_regs(vcpu)->regs.pc; +} + +static inline unsigned long *vcpu_elr_el1(const struct kvm_vcpu *vcpu) +{ + return (unsigned long *)&vcpu_gp_regs(vcpu)->elr_el1; +} + +static inline unsigned long *vcpu_cpsr(const struct kvm_vcpu *vcpu) +{ + return (unsigned long *)&vcpu_gp_regs(vcpu)->regs.pstate; +} + +static inline bool vcpu_mode_is_32bit(const struct kvm_vcpu *vcpu) +{ + return false; /* 32bit? Bahhh... */ +} + +static inline bool kvm_condition_valid(const struct kvm_vcpu *vcpu) +{ + return true; /* No conditionals on arm64 */ +} + +static inline void kvm_skip_instr(struct kvm_vcpu *vcpu, bool is_wide_instr) +{ + *vcpu_pc(vcpu) += 4; +} + +static inline void vcpu_set_thumb(struct kvm_vcpu *vcpu) +{ +} + +static inline unsigned long *vcpu_reg(const struct kvm_vcpu *vcpu, u8 reg_num) +{ + return (unsigned long *)&vcpu_gp_regs(vcpu)->regs.regs[reg_num]; +} + +/* Get vcpu SPSR for current mode */ +static inline unsigned long *vcpu_spsr(const struct kvm_vcpu *vcpu) +{ + return (unsigned long *)&vcpu_gp_regs(vcpu)->spsr[KVM_SPSR_EL1]; +} + +static inline bool vcpu_mode_priv(const struct kvm_vcpu *vcpu) +{ + u32 mode = *vcpu_cpsr(vcpu) & PSR_MODE_MASK; + + return mode != PSR_MODE_EL0t; +} + +static inline u32 kvm_vcpu_get_hsr(const struct kvm_vcpu *vcpu) +{ + return vcpu->arch.fault.esr_el2; +} + +static inline unsigned long kvm_vcpu_get_hfar(const struct kvm_vcpu *vcpu) +{ + return vcpu->arch.fault.far_el2; +} + +static inline phys_addr_t kvm_vcpu_get_fault_ipa(const struct kvm_vcpu *vcpu) +{ + return ((phys_addr_t)vcpu->arch.fault.hpfar_el2 & HPFAR_MASK) << 8; +} + +static inline bool kvm_vcpu_dabt_isvalid(const struct kvm_vcpu *vcpu) +{ + return !!(kvm_vcpu_get_hsr(vcpu) & ESR_EL2_ISV); +} + +static inline bool kvm_vcpu_dabt_iswrite(const struct kvm_vcpu *vcpu) +{ + return !!(kvm_vcpu_get_hsr(vcpu) & ESR_EL2_WNR); +} + +static inline bool kvm_vcpu_dabt_issext(const struct kvm_vcpu *vcpu) +{ + return !!(kvm_vcpu_get_hsr(vcpu) & ESR_EL2_SSE); +} + +static inline int kvm_vcpu_dabt_get_rd(const struct kvm_vcpu *vcpu) +{ + return (kvm_vcpu_get_hsr(vcpu) & ESR_EL2_SRT_MASK) >> ESR_EL2_SRT_SHIFT; +} + +static inline bool kvm_vcpu_dabt_isextabt(const struct kvm_vcpu *vcpu) +{ + return !!(kvm_vcpu_get_hsr(vcpu) & ESR_EL2_EA); +} + +static inline bool kvm_vcpu_dabt_iss1tw(const struct kvm_vcpu *vcpu) +{ + return !!(kvm_vcpu_get_hsr(vcpu) & ESR_EL2_S1PTW); +} + +static inline int kvm_vcpu_dabt_get_as(const struct kvm_vcpu *vcpu) +{ + return 1 << ((kvm_vcpu_get_hsr(vcpu) & ESR_EL2_SAS) >> ESR_EL2_SAS_SHIFT); +} + +/* This one is not specific to Data Abort */ +static inline bool kvm_vcpu_trap_il_is32bit(const struct kvm_vcpu *vcpu) +{ + return !!(kvm_vcpu_get_hsr(vcpu) & ESR_EL2_IL); +} + +static inline u8 kvm_vcpu_trap_get_class(const struct kvm_vcpu *vcpu) +{ + return kvm_vcpu_get_hsr(vcpu) >> ESR_EL2_EC_SHIFT; +} + +static inline bool kvm_vcpu_trap_is_iabt(const struct kvm_vcpu *vcpu) +{ + return kvm_vcpu_trap_get_class(vcpu) == ESR_EL2_EC_IABT; +} + +static inline u8 kvm_vcpu_trap_get_fault(const struct kvm_vcpu *vcpu) +{ + return kvm_vcpu_get_hsr(vcpu) & ESR_EL2_FSC_TYPE; +} + +#endif /* __ARM64_KVM_EMULATE_H__ */ From fbd17d89d0b567e7c5c12955648cf728f0c04169 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 17 Dec 2012 12:27:42 +0000 Subject: [PATCH 1090/1368] arm64: KVM: fault injection into a guest Implement the injection of a fault (undefined, data abort or prefetch abort) into a 64bit guest. Reviewed-by: Christopher Covington Reviewed-by: Catalin Marinas Signed-off-by: Marc Zyngier (cherry picked from commit aa8eff9bfbd531e0fcc8e68052f4ac545cd004c5) Signed-off-by: Christoffer Dall --- arch/arm64/kvm/inject_fault.c | 126 ++++++++++++++++++++++++++++++++++ 1 file changed, 126 insertions(+) create mode 100644 arch/arm64/kvm/inject_fault.c diff --git a/arch/arm64/kvm/inject_fault.c b/arch/arm64/kvm/inject_fault.c new file mode 100644 index 000000000000..54f656271266 --- /dev/null +++ b/arch/arm64/kvm/inject_fault.c @@ -0,0 +1,126 @@ +/* + * Fault injection for 64bit guests. + * + * Copyright (C) 2012,2013 - ARM Ltd + * Author: Marc Zyngier + * + * Based on arch/arm/kvm/emulate.c + * Copyright (C) 2012 - Virtual Open Systems and Columbia University + * Author: Christoffer Dall + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#include +#include +#include + +#define PSTATE_FAULT_BITS_64 (PSR_MODE_EL1h | PSR_A_BIT | PSR_F_BIT | \ + PSR_I_BIT | PSR_D_BIT) +#define EL1_EXCEPT_SYNC_OFFSET 0x200 + +static void inject_abt64(struct kvm_vcpu *vcpu, bool is_iabt, unsigned long addr) +{ + unsigned long cpsr = *vcpu_cpsr(vcpu); + bool is_aarch32; + u32 esr = 0; + + is_aarch32 = vcpu_mode_is_32bit(vcpu); + + *vcpu_spsr(vcpu) = cpsr; + *vcpu_elr_el1(vcpu) = *vcpu_pc(vcpu); + + *vcpu_cpsr(vcpu) = PSTATE_FAULT_BITS_64; + *vcpu_pc(vcpu) = vcpu_sys_reg(vcpu, VBAR_EL1) + EL1_EXCEPT_SYNC_OFFSET; + + vcpu_sys_reg(vcpu, FAR_EL1) = addr; + + /* + * Build an {i,d}abort, depending on the level and the + * instruction set. Report an external synchronous abort. + */ + if (kvm_vcpu_trap_il_is32bit(vcpu)) + esr |= ESR_EL1_IL; + + /* + * Here, the guest runs in AArch64 mode when in EL1. If we get + * an AArch32 fault, it means we managed to trap an EL0 fault. + */ + if (is_aarch32 || (cpsr & PSR_MODE_MASK) == PSR_MODE_EL0t) + esr |= (ESR_EL1_EC_IABT_EL0 << ESR_EL1_EC_SHIFT); + else + esr |= (ESR_EL1_EC_IABT_EL1 << ESR_EL1_EC_SHIFT); + + if (!is_iabt) + esr |= ESR_EL1_EC_DABT_EL0; + + vcpu_sys_reg(vcpu, ESR_EL1) = esr | ESR_EL2_EC_xABT_xFSR_EXTABT; +} + +static void inject_undef64(struct kvm_vcpu *vcpu) +{ + unsigned long cpsr = *vcpu_cpsr(vcpu); + u32 esr = (ESR_EL1_EC_UNKNOWN << ESR_EL1_EC_SHIFT); + + *vcpu_spsr(vcpu) = cpsr; + *vcpu_elr_el1(vcpu) = *vcpu_pc(vcpu); + + *vcpu_cpsr(vcpu) = PSTATE_FAULT_BITS_64; + *vcpu_pc(vcpu) = vcpu_sys_reg(vcpu, VBAR_EL1) + EL1_EXCEPT_SYNC_OFFSET; + + /* + * Build an unknown exception, depending on the instruction + * set. + */ + if (kvm_vcpu_trap_il_is32bit(vcpu)) + esr |= ESR_EL1_IL; + + vcpu_sys_reg(vcpu, ESR_EL1) = esr; +} + +/** + * kvm_inject_dabt - inject a data abort into the guest + * @vcpu: The VCPU to receive the undefined exception + * @addr: The address to report in the DFAR + * + * It is assumed that this code is called from the VCPU thread and that the + * VCPU therefore is not currently executing guest code. + */ +void kvm_inject_dabt(struct kvm_vcpu *vcpu, unsigned long addr) +{ + inject_abt64(vcpu, false, addr); +} + +/** + * kvm_inject_pabt - inject a prefetch abort into the guest + * @vcpu: The VCPU to receive the undefined exception + * @addr: The address to report in the DFAR + * + * It is assumed that this code is called from the VCPU thread and that the + * VCPU therefore is not currently executing guest code. + */ +void kvm_inject_pabt(struct kvm_vcpu *vcpu, unsigned long addr) +{ + inject_abt64(vcpu, true, addr); +} + +/** + * kvm_inject_undefined - inject an undefined instruction into the guest + * + * It is assumed that this code is called from the VCPU thread and that the + * VCPU therefore is not currently executing guest code. + */ +void kvm_inject_undefined(struct kvm_vcpu *vcpu) +{ + inject_undef64(vcpu); +} From bef26a85389996c6f9c79f09d2ba09da0cd46351 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 10 Dec 2012 15:35:24 +0000 Subject: [PATCH 1091/1368] arm64: KVM: architecture specific MMU backend Define the arm64 specific MMU backend: - HYP/kernel VA offset - S2 4/64kB definitions - S2 page table populating and flushing - icache cleaning Reviewed-by: Christopher Covington Reviewed-by: Catalin Marinas Signed-off-by: Marc Zyngier (cherry picked from commit 37c437532b0126d1df5685080db9cecf3d918175) Signed-off-by: Christoffer Dall --- arch/arm64/include/asm/kvm_mmu.h | 135 +++++++++++++++++++++++++++++++ 1 file changed, 135 insertions(+) create mode 100644 arch/arm64/include/asm/kvm_mmu.h diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h new file mode 100644 index 000000000000..efe609c6a3c9 --- /dev/null +++ b/arch/arm64/include/asm/kvm_mmu.h @@ -0,0 +1,135 @@ +/* + * Copyright (C) 2012,2013 - ARM Ltd + * Author: Marc Zyngier + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#ifndef __ARM64_KVM_MMU_H__ +#define __ARM64_KVM_MMU_H__ + +#include +#include + +/* + * As we only have the TTBR0_EL2 register, we cannot express + * "negative" addresses. This makes it impossible to directly share + * mappings with the kernel. + * + * Instead, give the HYP mode its own VA region at a fixed offset from + * the kernel by just masking the top bits (which are all ones for a + * kernel address). + */ +#define HYP_PAGE_OFFSET_SHIFT VA_BITS +#define HYP_PAGE_OFFSET_MASK ((UL(1) << HYP_PAGE_OFFSET_SHIFT) - 1) +#define HYP_PAGE_OFFSET (PAGE_OFFSET & HYP_PAGE_OFFSET_MASK) + +/* + * Our virtual mapping for the idmap-ed MMU-enable code. Must be + * shared across all the page-tables. Conveniently, we use the last + * possible page, where no kernel mapping will ever exist. + */ +#define TRAMPOLINE_VA (HYP_PAGE_OFFSET_MASK & PAGE_MASK) + +#ifdef __ASSEMBLY__ + +/* + * Convert a kernel VA into a HYP VA. + * reg: VA to be converted. + */ +.macro kern_hyp_va reg + and \reg, \reg, #HYP_PAGE_OFFSET_MASK +.endm + +#else + +#include +#include + +#define KERN_TO_HYP(kva) ((unsigned long)kva - PAGE_OFFSET + HYP_PAGE_OFFSET) + +/* + * Align KVM with the kernel's view of physical memory. Should be + * 40bit IPA, with PGD being 8kB aligned in the 4KB page configuration. + */ +#define KVM_PHYS_SHIFT PHYS_MASK_SHIFT +#define KVM_PHYS_SIZE (1UL << KVM_PHYS_SHIFT) +#define KVM_PHYS_MASK (KVM_PHYS_SIZE - 1UL) + +/* Make sure we get the right size, and thus the right alignment */ +#define PTRS_PER_S2_PGD (1 << (KVM_PHYS_SHIFT - PGDIR_SHIFT)) +#define S2_PGD_ORDER get_order(PTRS_PER_S2_PGD * sizeof(pgd_t)) + +int create_hyp_mappings(void *from, void *to); +int create_hyp_io_mappings(void *from, void *to, phys_addr_t); +void free_boot_hyp_pgd(void); +void free_hyp_pgds(void); + +int kvm_alloc_stage2_pgd(struct kvm *kvm); +void kvm_free_stage2_pgd(struct kvm *kvm); +int kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t guest_ipa, + phys_addr_t pa, unsigned long size); + +int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run); + +void kvm_mmu_free_memory_caches(struct kvm_vcpu *vcpu); + +phys_addr_t kvm_mmu_get_httbr(void); +phys_addr_t kvm_mmu_get_boot_httbr(void); +phys_addr_t kvm_get_idmap_vector(void); +int kvm_mmu_init(void); +void kvm_clear_hyp_idmap(void); + +#define kvm_set_pte(ptep, pte) set_pte(ptep, pte) + +static inline bool kvm_is_write_fault(unsigned long esr) +{ + unsigned long esr_ec = esr >> ESR_EL2_EC_SHIFT; + + if (esr_ec == ESR_EL2_EC_IABT) + return false; + + if ((esr & ESR_EL2_ISV) && !(esr & ESR_EL2_WNR)) + return false; + + return true; +} + +static inline void kvm_clean_dcache_area(void *addr, size_t size) {} +static inline void kvm_clean_pgd(pgd_t *pgd) {} +static inline void kvm_clean_pmd_entry(pmd_t *pmd) {} +static inline void kvm_clean_pte(pte_t *pte) {} +static inline void kvm_clean_pte_entry(pte_t *pte) {} + +static inline void kvm_set_s2pte_writable(pte_t *pte) +{ + pte_val(*pte) |= PTE_S2_RDWR; +} + +struct kvm; + +static inline void coherent_icache_guest_page(struct kvm *kvm, gfn_t gfn) +{ + if (!icache_is_aliasing()) { /* PIPT */ + unsigned long hva = gfn_to_hva(kvm, gfn); + flush_icache_range(hva, hva + PAGE_SIZE); + } else if (!icache_is_aivivt()) { /* non ASID-tagged VIVT */ + /* any kind of VIPT cache */ + __flush_icache_all(); + } +} + +#define kvm_flush_dcache_to_poc(a,l) __flush_dcache_area((a), (l)) + +#endif /* __ASSEMBLY__ */ +#endif /* __ARM64_KVM_MMU_H__ */ From becdc5b63f089f0664fc1381fa168b7a7dce09dc Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 10 Dec 2012 16:29:28 +0000 Subject: [PATCH 1092/1368] arm64: KVM: user space interface Provide the kvm.h file that defines the user space visible interface. Reviewed-by: Christopher Covington Reviewed-by: Catalin Marinas Signed-off-by: Marc Zyngier (cherry picked from commit 54f81d0eb93896da73d1636bca84cf90f52cabdf) Signed-off-by: Christoffer Dall --- arch/arm64/include/uapi/asm/kvm.h | 117 ++++++++++++++++++++++++++++++ 1 file changed, 117 insertions(+) create mode 100644 arch/arm64/include/uapi/asm/kvm.h diff --git a/arch/arm64/include/uapi/asm/kvm.h b/arch/arm64/include/uapi/asm/kvm.h new file mode 100644 index 000000000000..4e64570a20c9 --- /dev/null +++ b/arch/arm64/include/uapi/asm/kvm.h @@ -0,0 +1,117 @@ +/* + * Copyright (C) 2012,2013 - ARM Ltd + * Author: Marc Zyngier + * + * Derived from arch/arm/include/uapi/asm/kvm.h: + * Copyright (C) 2012 - Virtual Open Systems and Columbia University + * Author: Christoffer Dall + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#ifndef __ARM_KVM_H__ +#define __ARM_KVM_H__ + +#define KVM_SPSR_EL1 0 +#define KVM_NR_SPSR 1 + +#ifndef __ASSEMBLY__ +#include +#include + +#define __KVM_HAVE_GUEST_DEBUG +#define __KVM_HAVE_IRQ_LINE + +#define KVM_REG_SIZE(id) \ + (1U << (((id) & KVM_REG_SIZE_MASK) >> KVM_REG_SIZE_SHIFT)) + +struct kvm_regs { + struct user_pt_regs regs; /* sp = sp_el0 */ + + __u64 sp_el1; + __u64 elr_el1; + + __u64 spsr[KVM_NR_SPSR]; + + struct user_fpsimd_state fp_regs; +}; + +/* Supported Processor Types */ +#define KVM_ARM_TARGET_AEM_V8 0 +#define KVM_ARM_TARGET_FOUNDATION_V8 1 +#define KVM_ARM_TARGET_CORTEX_A57 2 + +#define KVM_ARM_NUM_TARGETS 3 + +/* KVM_ARM_SET_DEVICE_ADDR ioctl id encoding */ +#define KVM_ARM_DEVICE_TYPE_SHIFT 0 +#define KVM_ARM_DEVICE_TYPE_MASK (0xffff << KVM_ARM_DEVICE_TYPE_SHIFT) +#define KVM_ARM_DEVICE_ID_SHIFT 16 +#define KVM_ARM_DEVICE_ID_MASK (0xffff << KVM_ARM_DEVICE_ID_SHIFT) + +/* Supported device IDs */ +#define KVM_ARM_DEVICE_VGIC_V2 0 + +/* Supported VGIC address types */ +#define KVM_VGIC_V2_ADDR_TYPE_DIST 0 +#define KVM_VGIC_V2_ADDR_TYPE_CPU 1 + +#define KVM_VGIC_V2_DIST_SIZE 0x1000 +#define KVM_VGIC_V2_CPU_SIZE 0x2000 + +struct kvm_vcpu_init { + __u32 target; + __u32 features[7]; +}; + +struct kvm_sregs { +}; + +struct kvm_fpu { +}; + +struct kvm_guest_debug_arch { +}; + +struct kvm_debug_exit_arch { +}; + +struct kvm_sync_regs { +}; + +struct kvm_arch_memory_slot { +}; + +/* KVM_IRQ_LINE irq field index values */ +#define KVM_ARM_IRQ_TYPE_SHIFT 24 +#define KVM_ARM_IRQ_TYPE_MASK 0xff +#define KVM_ARM_IRQ_VCPU_SHIFT 16 +#define KVM_ARM_IRQ_VCPU_MASK 0xff +#define KVM_ARM_IRQ_NUM_SHIFT 0 +#define KVM_ARM_IRQ_NUM_MASK 0xffff + +/* irq_type field */ +#define KVM_ARM_IRQ_TYPE_CPU 0 +#define KVM_ARM_IRQ_TYPE_SPI 1 +#define KVM_ARM_IRQ_TYPE_PPI 2 + +/* out-of-kernel GIC cpu interrupt injection irq_number field */ +#define KVM_ARM_IRQ_CPU_IRQ 0 +#define KVM_ARM_IRQ_CPU_FIQ 1 + +/* Highest supported SPI, from VGIC_NR_IRQS */ +#define KVM_ARM_IRQ_GIC_MAX 127 + +#endif + +#endif /* __ARM_KVM_H__ */ From 068d803462e03f87953424ea6f3910578f3f293e Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 10 Dec 2012 16:15:34 +0000 Subject: [PATCH 1093/1368] arm64: KVM: system register handling Provide 64bit system register handling, modeled after the cp15 handling for ARM. Reviewed-by: Christopher Covington Reviewed-by: Catalin Marinas Signed-off-by: Marc Zyngier (cherry picked from commit 7c8c5e6a9101ea57a1c2c9faff0917e79251a21e) Signed-off-by: Christoffer Dall --- arch/arm64/include/asm/kvm_coproc.h | 51 ++ arch/arm64/include/uapi/asm/kvm.h | 29 + arch/arm64/kvm/sys_regs.c | 883 ++++++++++++++++++++++++++++ arch/arm64/kvm/sys_regs.h | 138 +++++ include/uapi/linux/kvm.h | 1 + 5 files changed, 1102 insertions(+) create mode 100644 arch/arm64/include/asm/kvm_coproc.h create mode 100644 arch/arm64/kvm/sys_regs.c create mode 100644 arch/arm64/kvm/sys_regs.h diff --git a/arch/arm64/include/asm/kvm_coproc.h b/arch/arm64/include/asm/kvm_coproc.h new file mode 100644 index 000000000000..9b4477acb554 --- /dev/null +++ b/arch/arm64/include/asm/kvm_coproc.h @@ -0,0 +1,51 @@ +/* + * Copyright (C) 2012,2013 - ARM Ltd + * Author: Marc Zyngier + * + * Derived from arch/arm/include/asm/kvm_coproc.h + * Copyright (C) 2012 Rusty Russell IBM Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#ifndef __ARM64_KVM_COPROC_H__ +#define __ARM64_KVM_COPROC_H__ + +#include + +void kvm_reset_sys_regs(struct kvm_vcpu *vcpu); + +struct kvm_sys_reg_table { + const struct sys_reg_desc *table; + size_t num; +}; + +struct kvm_sys_reg_target_table { + struct kvm_sys_reg_table table64; +}; + +void kvm_register_target_sys_reg_table(unsigned int target, + struct kvm_sys_reg_target_table *table); + +int kvm_handle_sys_reg(struct kvm_vcpu *vcpu, struct kvm_run *run); + +#define kvm_coproc_table_init kvm_sys_reg_table_init +void kvm_sys_reg_table_init(void); + +struct kvm_one_reg; +int kvm_arm_copy_sys_reg_indices(struct kvm_vcpu *vcpu, u64 __user *uindices); +int kvm_arm_sys_reg_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *); +int kvm_arm_sys_reg_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *); +unsigned long kvm_arm_num_sys_reg_descs(struct kvm_vcpu *vcpu); + +#endif /* __ARM64_KVM_COPROC_H__ */ diff --git a/arch/arm64/include/uapi/asm/kvm.h b/arch/arm64/include/uapi/asm/kvm.h index 4e64570a20c9..ebac919dc0ca 100644 --- a/arch/arm64/include/uapi/asm/kvm.h +++ b/arch/arm64/include/uapi/asm/kvm.h @@ -92,6 +92,35 @@ struct kvm_sync_regs { struct kvm_arch_memory_slot { }; +/* If you need to interpret the index values, here is the key: */ +#define KVM_REG_ARM_COPROC_MASK 0x000000000FFF0000 +#define KVM_REG_ARM_COPROC_SHIFT 16 + +/* Normal registers are mapped as coprocessor 16. */ +#define KVM_REG_ARM_CORE (0x0010 << KVM_REG_ARM_COPROC_SHIFT) +#define KVM_REG_ARM_CORE_REG(name) (offsetof(struct kvm_regs, name) / sizeof(__u32)) + +/* Some registers need more space to represent values. */ +#define KVM_REG_ARM_DEMUX (0x0011 << KVM_REG_ARM_COPROC_SHIFT) +#define KVM_REG_ARM_DEMUX_ID_MASK 0x000000000000FF00 +#define KVM_REG_ARM_DEMUX_ID_SHIFT 8 +#define KVM_REG_ARM_DEMUX_ID_CCSIDR (0x00 << KVM_REG_ARM_DEMUX_ID_SHIFT) +#define KVM_REG_ARM_DEMUX_VAL_MASK 0x00000000000000FF +#define KVM_REG_ARM_DEMUX_VAL_SHIFT 0 + +/* AArch64 system registers */ +#define KVM_REG_ARM64_SYSREG (0x0013 << KVM_REG_ARM_COPROC_SHIFT) +#define KVM_REG_ARM64_SYSREG_OP0_MASK 0x000000000000c000 +#define KVM_REG_ARM64_SYSREG_OP0_SHIFT 14 +#define KVM_REG_ARM64_SYSREG_OP1_MASK 0x0000000000003800 +#define KVM_REG_ARM64_SYSREG_OP1_SHIFT 11 +#define KVM_REG_ARM64_SYSREG_CRN_MASK 0x0000000000000780 +#define KVM_REG_ARM64_SYSREG_CRN_SHIFT 7 +#define KVM_REG_ARM64_SYSREG_CRM_MASK 0x0000000000000078 +#define KVM_REG_ARM64_SYSREG_CRM_SHIFT 3 +#define KVM_REG_ARM64_SYSREG_OP2_MASK 0x0000000000000007 +#define KVM_REG_ARM64_SYSREG_OP2_SHIFT 0 + /* KVM_IRQ_LINE irq field index values */ #define KVM_ARM_IRQ_TYPE_SHIFT 24 #define KVM_ARM_IRQ_TYPE_MASK 0xff diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c new file mode 100644 index 000000000000..52fff0ae3442 --- /dev/null +++ b/arch/arm64/kvm/sys_regs.c @@ -0,0 +1,883 @@ +/* + * Copyright (C) 2012,2013 - ARM Ltd + * Author: Marc Zyngier + * + * Derived from arch/arm/kvm/coproc.c: + * Copyright (C) 2012 - Virtual Open Systems and Columbia University + * Authors: Rusty Russell + * Christoffer Dall + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "sys_regs.h" + +/* + * All of this file is extremly similar to the ARM coproc.c, but the + * types are different. My gut feeling is that it should be pretty + * easy to merge, but that would be an ABI breakage -- again. VFP + * would also need to be abstracted. + */ + +/* 3 bits per cache level, as per CLIDR, but non-existent caches always 0 */ +static u32 cache_levels; + +/* CSSELR values; used to index KVM_REG_ARM_DEMUX_ID_CCSIDR */ +#define CSSELR_MAX 12 + +/* Which cache CCSIDR represents depends on CSSELR value. */ +static u32 get_ccsidr(u32 csselr) +{ + u32 ccsidr; + + /* Make sure noone else changes CSSELR during this! */ + local_irq_disable(); + /* Put value into CSSELR */ + asm volatile("msr csselr_el1, %x0" : : "r" (csselr)); + isb(); + /* Read result out of CCSIDR */ + asm volatile("mrs %0, ccsidr_el1" : "=r" (ccsidr)); + local_irq_enable(); + + return ccsidr; +} + +static void do_dc_cisw(u32 val) +{ + asm volatile("dc cisw, %x0" : : "r" (val)); + dsb(); +} + +static void do_dc_csw(u32 val) +{ + asm volatile("dc csw, %x0" : : "r" (val)); + dsb(); +} + +/* See note at ARM ARM B1.14.4 */ +static bool access_dcsw(struct kvm_vcpu *vcpu, + const struct sys_reg_params *p, + const struct sys_reg_desc *r) +{ + unsigned long val; + int cpu; + + if (!p->is_write) + return read_from_write_only(vcpu, p); + + cpu = get_cpu(); + + cpumask_setall(&vcpu->arch.require_dcache_flush); + cpumask_clear_cpu(cpu, &vcpu->arch.require_dcache_flush); + + /* If we were already preempted, take the long way around */ + if (cpu != vcpu->arch.last_pcpu) { + flush_cache_all(); + goto done; + } + + val = *vcpu_reg(vcpu, p->Rt); + + switch (p->CRm) { + case 6: /* Upgrade DCISW to DCCISW, as per HCR.SWIO */ + case 14: /* DCCISW */ + do_dc_cisw(val); + break; + + case 10: /* DCCSW */ + do_dc_csw(val); + break; + } + +done: + put_cpu(); + + return true; +} + +/* + * We could trap ID_DFR0 and tell the guest we don't support performance + * monitoring. Unfortunately the patch to make the kernel check ID_DFR0 was + * NAKed, so it will read the PMCR anyway. + * + * Therefore we tell the guest we have 0 counters. Unfortunately, we + * must always support PMCCNTR (the cycle counter): we just RAZ/WI for + * all PM registers, which doesn't crash the guest kernel at least. + */ +static bool pm_fake(struct kvm_vcpu *vcpu, + const struct sys_reg_params *p, + const struct sys_reg_desc *r) +{ + if (p->is_write) + return ignore_write(vcpu, p); + else + return read_zero(vcpu, p); +} + +static void reset_amair_el1(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r) +{ + u64 amair; + + asm volatile("mrs %0, amair_el1\n" : "=r" (amair)); + vcpu_sys_reg(vcpu, AMAIR_EL1) = amair; +} + +static void reset_mpidr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r) +{ + /* + * Simply map the vcpu_id into the Aff0 field of the MPIDR. + */ + vcpu_sys_reg(vcpu, MPIDR_EL1) = (1UL << 31) | (vcpu->vcpu_id & 0xff); +} + +/* + * Architected system registers. + * Important: Must be sorted ascending by Op0, Op1, CRn, CRm, Op2 + */ +static const struct sys_reg_desc sys_reg_descs[] = { + /* DC ISW */ + { Op0(0b01), Op1(0b000), CRn(0b0111), CRm(0b0110), Op2(0b010), + access_dcsw }, + /* DC CSW */ + { Op0(0b01), Op1(0b000), CRn(0b0111), CRm(0b1010), Op2(0b010), + access_dcsw }, + /* DC CISW */ + { Op0(0b01), Op1(0b000), CRn(0b0111), CRm(0b1110), Op2(0b010), + access_dcsw }, + + /* MPIDR_EL1 */ + { Op0(0b11), Op1(0b000), CRn(0b0000), CRm(0b0000), Op2(0b101), + NULL, reset_mpidr, MPIDR_EL1 }, + /* SCTLR_EL1 */ + { Op0(0b11), Op1(0b000), CRn(0b0001), CRm(0b0000), Op2(0b000), + NULL, reset_val, SCTLR_EL1, 0x00C50078 }, + /* CPACR_EL1 */ + { Op0(0b11), Op1(0b000), CRn(0b0001), CRm(0b0000), Op2(0b010), + NULL, reset_val, CPACR_EL1, 0 }, + /* TTBR0_EL1 */ + { Op0(0b11), Op1(0b000), CRn(0b0010), CRm(0b0000), Op2(0b000), + NULL, reset_unknown, TTBR0_EL1 }, + /* TTBR1_EL1 */ + { Op0(0b11), Op1(0b000), CRn(0b0010), CRm(0b0000), Op2(0b001), + NULL, reset_unknown, TTBR1_EL1 }, + /* TCR_EL1 */ + { Op0(0b11), Op1(0b000), CRn(0b0010), CRm(0b0000), Op2(0b010), + NULL, reset_val, TCR_EL1, 0 }, + + /* AFSR0_EL1 */ + { Op0(0b11), Op1(0b000), CRn(0b0101), CRm(0b0001), Op2(0b000), + NULL, reset_unknown, AFSR0_EL1 }, + /* AFSR1_EL1 */ + { Op0(0b11), Op1(0b000), CRn(0b0101), CRm(0b0001), Op2(0b001), + NULL, reset_unknown, AFSR1_EL1 }, + /* ESR_EL1 */ + { Op0(0b11), Op1(0b000), CRn(0b0101), CRm(0b0010), Op2(0b000), + NULL, reset_unknown, ESR_EL1 }, + /* FAR_EL1 */ + { Op0(0b11), Op1(0b000), CRn(0b0110), CRm(0b0000), Op2(0b000), + NULL, reset_unknown, FAR_EL1 }, + + /* PMINTENSET_EL1 */ + { Op0(0b11), Op1(0b000), CRn(0b1001), CRm(0b1110), Op2(0b001), + pm_fake }, + /* PMINTENCLR_EL1 */ + { Op0(0b11), Op1(0b000), CRn(0b1001), CRm(0b1110), Op2(0b010), + pm_fake }, + + /* MAIR_EL1 */ + { Op0(0b11), Op1(0b000), CRn(0b1010), CRm(0b0010), Op2(0b000), + NULL, reset_unknown, MAIR_EL1 }, + /* AMAIR_EL1 */ + { Op0(0b11), Op1(0b000), CRn(0b1010), CRm(0b0011), Op2(0b000), + NULL, reset_amair_el1, AMAIR_EL1 }, + + /* VBAR_EL1 */ + { Op0(0b11), Op1(0b000), CRn(0b1100), CRm(0b0000), Op2(0b000), + NULL, reset_val, VBAR_EL1, 0 }, + /* CONTEXTIDR_EL1 */ + { Op0(0b11), Op1(0b000), CRn(0b1101), CRm(0b0000), Op2(0b001), + NULL, reset_val, CONTEXTIDR_EL1, 0 }, + /* TPIDR_EL1 */ + { Op0(0b11), Op1(0b000), CRn(0b1101), CRm(0b0000), Op2(0b100), + NULL, reset_unknown, TPIDR_EL1 }, + + /* CNTKCTL_EL1 */ + { Op0(0b11), Op1(0b000), CRn(0b1110), CRm(0b0001), Op2(0b000), + NULL, reset_val, CNTKCTL_EL1, 0}, + + /* CSSELR_EL1 */ + { Op0(0b11), Op1(0b010), CRn(0b0000), CRm(0b0000), Op2(0b000), + NULL, reset_unknown, CSSELR_EL1 }, + + /* PMCR_EL0 */ + { Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1100), Op2(0b000), + pm_fake }, + /* PMCNTENSET_EL0 */ + { Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1100), Op2(0b001), + pm_fake }, + /* PMCNTENCLR_EL0 */ + { Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1100), Op2(0b010), + pm_fake }, + /* PMOVSCLR_EL0 */ + { Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1100), Op2(0b011), + pm_fake }, + /* PMSWINC_EL0 */ + { Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1100), Op2(0b100), + pm_fake }, + /* PMSELR_EL0 */ + { Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1100), Op2(0b101), + pm_fake }, + /* PMCEID0_EL0 */ + { Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1100), Op2(0b110), + pm_fake }, + /* PMCEID1_EL0 */ + { Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1100), Op2(0b111), + pm_fake }, + /* PMCCNTR_EL0 */ + { Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1101), Op2(0b000), + pm_fake }, + /* PMXEVTYPER_EL0 */ + { Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1101), Op2(0b001), + pm_fake }, + /* PMXEVCNTR_EL0 */ + { Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1101), Op2(0b010), + pm_fake }, + /* PMUSERENR_EL0 */ + { Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1110), Op2(0b000), + pm_fake }, + /* PMOVSSET_EL0 */ + { Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1110), Op2(0b011), + pm_fake }, + + /* TPIDR_EL0 */ + { Op0(0b11), Op1(0b011), CRn(0b1101), CRm(0b0000), Op2(0b010), + NULL, reset_unknown, TPIDR_EL0 }, + /* TPIDRRO_EL0 */ + { Op0(0b11), Op1(0b011), CRn(0b1101), CRm(0b0000), Op2(0b011), + NULL, reset_unknown, TPIDRRO_EL0 }, +}; + +/* Target specific emulation tables */ +static struct kvm_sys_reg_target_table *target_tables[KVM_ARM_NUM_TARGETS]; + +void kvm_register_target_sys_reg_table(unsigned int target, + struct kvm_sys_reg_target_table *table) +{ + target_tables[target] = table; +} + +/* Get specific register table for this target. */ +static const struct sys_reg_desc *get_target_table(unsigned target, size_t *num) +{ + struct kvm_sys_reg_target_table *table; + + table = target_tables[target]; + *num = table->table64.num; + return table->table64.table; +} + +static const struct sys_reg_desc *find_reg(const struct sys_reg_params *params, + const struct sys_reg_desc table[], + unsigned int num) +{ + unsigned int i; + + for (i = 0; i < num; i++) { + const struct sys_reg_desc *r = &table[i]; + + if (params->Op0 != r->Op0) + continue; + if (params->Op1 != r->Op1) + continue; + if (params->CRn != r->CRn) + continue; + if (params->CRm != r->CRm) + continue; + if (params->Op2 != r->Op2) + continue; + + return r; + } + return NULL; +} + +static int emulate_sys_reg(struct kvm_vcpu *vcpu, + const struct sys_reg_params *params) +{ + size_t num; + const struct sys_reg_desc *table, *r; + + table = get_target_table(vcpu->arch.target, &num); + + /* Search target-specific then generic table. */ + r = find_reg(params, table, num); + if (!r) + r = find_reg(params, sys_reg_descs, ARRAY_SIZE(sys_reg_descs)); + + if (likely(r)) { + /* + * Not having an accessor means that we have + * configured a trap that we don't know how to + * handle. This certainly qualifies as a gross bug + * that should be fixed right away. + */ + BUG_ON(!r->access); + + if (likely(r->access(vcpu, params, r))) { + /* Skip instruction, since it was emulated */ + kvm_skip_instr(vcpu, kvm_vcpu_trap_il_is32bit(vcpu)); + return 1; + } + /* If access function fails, it should complain. */ + } else { + kvm_err("Unsupported guest sys_reg access at: %lx\n", + *vcpu_pc(vcpu)); + print_sys_reg_instr(params); + } + kvm_inject_undefined(vcpu); + return 1; +} + +static void reset_sys_reg_descs(struct kvm_vcpu *vcpu, + const struct sys_reg_desc *table, size_t num) +{ + unsigned long i; + + for (i = 0; i < num; i++) + if (table[i].reset) + table[i].reset(vcpu, &table[i]); +} + +/** + * kvm_handle_sys_reg -- handles a mrs/msr trap on a guest sys_reg access + * @vcpu: The VCPU pointer + * @run: The kvm_run struct + */ +int kvm_handle_sys_reg(struct kvm_vcpu *vcpu, struct kvm_run *run) +{ + struct sys_reg_params params; + unsigned long esr = kvm_vcpu_get_hsr(vcpu); + + params.Op0 = (esr >> 20) & 3; + params.Op1 = (esr >> 14) & 0x7; + params.CRn = (esr >> 10) & 0xf; + params.CRm = (esr >> 1) & 0xf; + params.Op2 = (esr >> 17) & 0x7; + params.Rt = (esr >> 5) & 0x1f; + params.is_write = !(esr & 1); + + return emulate_sys_reg(vcpu, ¶ms); +} + +/****************************************************************************** + * Userspace API + *****************************************************************************/ + +static bool index_to_params(u64 id, struct sys_reg_params *params) +{ + switch (id & KVM_REG_SIZE_MASK) { + case KVM_REG_SIZE_U64: + /* Any unused index bits means it's not valid. */ + if (id & ~(KVM_REG_ARCH_MASK | KVM_REG_SIZE_MASK + | KVM_REG_ARM_COPROC_MASK + | KVM_REG_ARM64_SYSREG_OP0_MASK + | KVM_REG_ARM64_SYSREG_OP1_MASK + | KVM_REG_ARM64_SYSREG_CRN_MASK + | KVM_REG_ARM64_SYSREG_CRM_MASK + | KVM_REG_ARM64_SYSREG_OP2_MASK)) + return false; + params->Op0 = ((id & KVM_REG_ARM64_SYSREG_OP0_MASK) + >> KVM_REG_ARM64_SYSREG_OP0_SHIFT); + params->Op1 = ((id & KVM_REG_ARM64_SYSREG_OP1_MASK) + >> KVM_REG_ARM64_SYSREG_OP1_SHIFT); + params->CRn = ((id & KVM_REG_ARM64_SYSREG_CRN_MASK) + >> KVM_REG_ARM64_SYSREG_CRN_SHIFT); + params->CRm = ((id & KVM_REG_ARM64_SYSREG_CRM_MASK) + >> KVM_REG_ARM64_SYSREG_CRM_SHIFT); + params->Op2 = ((id & KVM_REG_ARM64_SYSREG_OP2_MASK) + >> KVM_REG_ARM64_SYSREG_OP2_SHIFT); + return true; + default: + return false; + } +} + +/* Decode an index value, and find the sys_reg_desc entry. */ +static const struct sys_reg_desc *index_to_sys_reg_desc(struct kvm_vcpu *vcpu, + u64 id) +{ + size_t num; + const struct sys_reg_desc *table, *r; + struct sys_reg_params params; + + /* We only do sys_reg for now. */ + if ((id & KVM_REG_ARM_COPROC_MASK) != KVM_REG_ARM64_SYSREG) + return NULL; + + if (!index_to_params(id, ¶ms)) + return NULL; + + table = get_target_table(vcpu->arch.target, &num); + r = find_reg(¶ms, table, num); + if (!r) + r = find_reg(¶ms, sys_reg_descs, ARRAY_SIZE(sys_reg_descs)); + + /* Not saved in the sys_reg array? */ + if (r && !r->reg) + r = NULL; + + return r; +} + +/* + * These are the invariant sys_reg registers: we let the guest see the + * host versions of these, so they're part of the guest state. + * + * A future CPU may provide a mechanism to present different values to + * the guest, or a future kvm may trap them. + */ + +#define FUNCTION_INVARIANT(reg) \ + static void get_##reg(struct kvm_vcpu *v, \ + const struct sys_reg_desc *r) \ + { \ + u64 val; \ + \ + asm volatile("mrs %0, " __stringify(reg) "\n" \ + : "=r" (val)); \ + ((struct sys_reg_desc *)r)->val = val; \ + } + +FUNCTION_INVARIANT(midr_el1) +FUNCTION_INVARIANT(ctr_el0) +FUNCTION_INVARIANT(revidr_el1) +FUNCTION_INVARIANT(id_pfr0_el1) +FUNCTION_INVARIANT(id_pfr1_el1) +FUNCTION_INVARIANT(id_dfr0_el1) +FUNCTION_INVARIANT(id_afr0_el1) +FUNCTION_INVARIANT(id_mmfr0_el1) +FUNCTION_INVARIANT(id_mmfr1_el1) +FUNCTION_INVARIANT(id_mmfr2_el1) +FUNCTION_INVARIANT(id_mmfr3_el1) +FUNCTION_INVARIANT(id_isar0_el1) +FUNCTION_INVARIANT(id_isar1_el1) +FUNCTION_INVARIANT(id_isar2_el1) +FUNCTION_INVARIANT(id_isar3_el1) +FUNCTION_INVARIANT(id_isar4_el1) +FUNCTION_INVARIANT(id_isar5_el1) +FUNCTION_INVARIANT(clidr_el1) +FUNCTION_INVARIANT(aidr_el1) + +/* ->val is filled in by kvm_sys_reg_table_init() */ +static struct sys_reg_desc invariant_sys_regs[] = { + { Op0(0b11), Op1(0b000), CRn(0b0000), CRm(0b0000), Op2(0b000), + NULL, get_midr_el1 }, + { Op0(0b11), Op1(0b000), CRn(0b0000), CRm(0b0000), Op2(0b110), + NULL, get_revidr_el1 }, + { Op0(0b11), Op1(0b000), CRn(0b0000), CRm(0b0001), Op2(0b000), + NULL, get_id_pfr0_el1 }, + { Op0(0b11), Op1(0b000), CRn(0b0000), CRm(0b0001), Op2(0b001), + NULL, get_id_pfr1_el1 }, + { Op0(0b11), Op1(0b000), CRn(0b0000), CRm(0b0001), Op2(0b010), + NULL, get_id_dfr0_el1 }, + { Op0(0b11), Op1(0b000), CRn(0b0000), CRm(0b0001), Op2(0b011), + NULL, get_id_afr0_el1 }, + { Op0(0b11), Op1(0b000), CRn(0b0000), CRm(0b0001), Op2(0b100), + NULL, get_id_mmfr0_el1 }, + { Op0(0b11), Op1(0b000), CRn(0b0000), CRm(0b0001), Op2(0b101), + NULL, get_id_mmfr1_el1 }, + { Op0(0b11), Op1(0b000), CRn(0b0000), CRm(0b0001), Op2(0b110), + NULL, get_id_mmfr2_el1 }, + { Op0(0b11), Op1(0b000), CRn(0b0000), CRm(0b0001), Op2(0b111), + NULL, get_id_mmfr3_el1 }, + { Op0(0b11), Op1(0b000), CRn(0b0000), CRm(0b0010), Op2(0b000), + NULL, get_id_isar0_el1 }, + { Op0(0b11), Op1(0b000), CRn(0b0000), CRm(0b0010), Op2(0b001), + NULL, get_id_isar1_el1 }, + { Op0(0b11), Op1(0b000), CRn(0b0000), CRm(0b0010), Op2(0b010), + NULL, get_id_isar2_el1 }, + { Op0(0b11), Op1(0b000), CRn(0b0000), CRm(0b0010), Op2(0b011), + NULL, get_id_isar3_el1 }, + { Op0(0b11), Op1(0b000), CRn(0b0000), CRm(0b0010), Op2(0b100), + NULL, get_id_isar4_el1 }, + { Op0(0b11), Op1(0b000), CRn(0b0000), CRm(0b0010), Op2(0b101), + NULL, get_id_isar5_el1 }, + { Op0(0b11), Op1(0b001), CRn(0b0000), CRm(0b0000), Op2(0b001), + NULL, get_clidr_el1 }, + { Op0(0b11), Op1(0b001), CRn(0b0000), CRm(0b0000), Op2(0b111), + NULL, get_aidr_el1 }, + { Op0(0b11), Op1(0b011), CRn(0b0000), CRm(0b0000), Op2(0b001), + NULL, get_ctr_el0 }, +}; + +static int reg_from_user(void *val, const void __user *uaddr, u64 id) +{ + /* This Just Works because we are little endian. */ + if (copy_from_user(val, uaddr, KVM_REG_SIZE(id)) != 0) + return -EFAULT; + return 0; +} + +static int reg_to_user(void __user *uaddr, const void *val, u64 id) +{ + /* This Just Works because we are little endian. */ + if (copy_to_user(uaddr, val, KVM_REG_SIZE(id)) != 0) + return -EFAULT; + return 0; +} + +static int get_invariant_sys_reg(u64 id, void __user *uaddr) +{ + struct sys_reg_params params; + const struct sys_reg_desc *r; + + if (!index_to_params(id, ¶ms)) + return -ENOENT; + + r = find_reg(¶ms, invariant_sys_regs, ARRAY_SIZE(invariant_sys_regs)); + if (!r) + return -ENOENT; + + return reg_to_user(uaddr, &r->val, id); +} + +static int set_invariant_sys_reg(u64 id, void __user *uaddr) +{ + struct sys_reg_params params; + const struct sys_reg_desc *r; + int err; + u64 val = 0; /* Make sure high bits are 0 for 32-bit regs */ + + if (!index_to_params(id, ¶ms)) + return -ENOENT; + r = find_reg(¶ms, invariant_sys_regs, ARRAY_SIZE(invariant_sys_regs)); + if (!r) + return -ENOENT; + + err = reg_from_user(&val, uaddr, id); + if (err) + return err; + + /* This is what we mean by invariant: you can't change it. */ + if (r->val != val) + return -EINVAL; + + return 0; +} + +static bool is_valid_cache(u32 val) +{ + u32 level, ctype; + + if (val >= CSSELR_MAX) + return -ENOENT; + + /* Bottom bit is Instruction or Data bit. Next 3 bits are level. */ + level = (val >> 1); + ctype = (cache_levels >> (level * 3)) & 7; + + switch (ctype) { + case 0: /* No cache */ + return false; + case 1: /* Instruction cache only */ + return (val & 1); + case 2: /* Data cache only */ + case 4: /* Unified cache */ + return !(val & 1); + case 3: /* Separate instruction and data caches */ + return true; + default: /* Reserved: we can't know instruction or data. */ + return false; + } +} + +static int demux_c15_get(u64 id, void __user *uaddr) +{ + u32 val; + u32 __user *uval = uaddr; + + /* Fail if we have unknown bits set. */ + if (id & ~(KVM_REG_ARCH_MASK|KVM_REG_SIZE_MASK|KVM_REG_ARM_COPROC_MASK + | ((1 << KVM_REG_ARM_COPROC_SHIFT)-1))) + return -ENOENT; + + switch (id & KVM_REG_ARM_DEMUX_ID_MASK) { + case KVM_REG_ARM_DEMUX_ID_CCSIDR: + if (KVM_REG_SIZE(id) != 4) + return -ENOENT; + val = (id & KVM_REG_ARM_DEMUX_VAL_MASK) + >> KVM_REG_ARM_DEMUX_VAL_SHIFT; + if (!is_valid_cache(val)) + return -ENOENT; + + return put_user(get_ccsidr(val), uval); + default: + return -ENOENT; + } +} + +static int demux_c15_set(u64 id, void __user *uaddr) +{ + u32 val, newval; + u32 __user *uval = uaddr; + + /* Fail if we have unknown bits set. */ + if (id & ~(KVM_REG_ARCH_MASK|KVM_REG_SIZE_MASK|KVM_REG_ARM_COPROC_MASK + | ((1 << KVM_REG_ARM_COPROC_SHIFT)-1))) + return -ENOENT; + + switch (id & KVM_REG_ARM_DEMUX_ID_MASK) { + case KVM_REG_ARM_DEMUX_ID_CCSIDR: + if (KVM_REG_SIZE(id) != 4) + return -ENOENT; + val = (id & KVM_REG_ARM_DEMUX_VAL_MASK) + >> KVM_REG_ARM_DEMUX_VAL_SHIFT; + if (!is_valid_cache(val)) + return -ENOENT; + + if (get_user(newval, uval)) + return -EFAULT; + + /* This is also invariant: you can't change it. */ + if (newval != get_ccsidr(val)) + return -EINVAL; + return 0; + default: + return -ENOENT; + } +} + +int kvm_arm_sys_reg_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) +{ + const struct sys_reg_desc *r; + void __user *uaddr = (void __user *)(unsigned long)reg->addr; + + if ((reg->id & KVM_REG_ARM_COPROC_MASK) == KVM_REG_ARM_DEMUX) + return demux_c15_get(reg->id, uaddr); + + if (KVM_REG_SIZE(reg->id) != sizeof(__u64)) + return -ENOENT; + + r = index_to_sys_reg_desc(vcpu, reg->id); + if (!r) + return get_invariant_sys_reg(reg->id, uaddr); + + return reg_to_user(uaddr, &vcpu_sys_reg(vcpu, r->reg), reg->id); +} + +int kvm_arm_sys_reg_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) +{ + const struct sys_reg_desc *r; + void __user *uaddr = (void __user *)(unsigned long)reg->addr; + + if ((reg->id & KVM_REG_ARM_COPROC_MASK) == KVM_REG_ARM_DEMUX) + return demux_c15_set(reg->id, uaddr); + + if (KVM_REG_SIZE(reg->id) != sizeof(__u64)) + return -ENOENT; + + r = index_to_sys_reg_desc(vcpu, reg->id); + if (!r) + return set_invariant_sys_reg(reg->id, uaddr); + + return reg_from_user(&vcpu_sys_reg(vcpu, r->reg), uaddr, reg->id); +} + +static unsigned int num_demux_regs(void) +{ + unsigned int i, count = 0; + + for (i = 0; i < CSSELR_MAX; i++) + if (is_valid_cache(i)) + count++; + + return count; +} + +static int write_demux_regids(u64 __user *uindices) +{ + u64 val = KVM_REG_ARM | KVM_REG_SIZE_U32 | KVM_REG_ARM_DEMUX; + unsigned int i; + + val |= KVM_REG_ARM_DEMUX_ID_CCSIDR; + for (i = 0; i < CSSELR_MAX; i++) { + if (!is_valid_cache(i)) + continue; + if (put_user(val | i, uindices)) + return -EFAULT; + uindices++; + } + return 0; +} + +static u64 sys_reg_to_index(const struct sys_reg_desc *reg) +{ + return (KVM_REG_ARM64 | KVM_REG_SIZE_U64 | + KVM_REG_ARM64_SYSREG | + (reg->Op0 << KVM_REG_ARM64_SYSREG_OP0_SHIFT) | + (reg->Op1 << KVM_REG_ARM64_SYSREG_OP1_SHIFT) | + (reg->CRn << KVM_REG_ARM64_SYSREG_CRN_SHIFT) | + (reg->CRm << KVM_REG_ARM64_SYSREG_CRM_SHIFT) | + (reg->Op2 << KVM_REG_ARM64_SYSREG_OP2_SHIFT)); +} + +static bool copy_reg_to_user(const struct sys_reg_desc *reg, u64 __user **uind) +{ + if (!*uind) + return true; + + if (put_user(sys_reg_to_index(reg), *uind)) + return false; + + (*uind)++; + return true; +} + +/* Assumed ordered tables, see kvm_sys_reg_table_init. */ +static int walk_sys_regs(struct kvm_vcpu *vcpu, u64 __user *uind) +{ + const struct sys_reg_desc *i1, *i2, *end1, *end2; + unsigned int total = 0; + size_t num; + + /* We check for duplicates here, to allow arch-specific overrides. */ + i1 = get_target_table(vcpu->arch.target, &num); + end1 = i1 + num; + i2 = sys_reg_descs; + end2 = sys_reg_descs + ARRAY_SIZE(sys_reg_descs); + + BUG_ON(i1 == end1 || i2 == end2); + + /* Walk carefully, as both tables may refer to the same register. */ + while (i1 || i2) { + int cmp = cmp_sys_reg(i1, i2); + /* target-specific overrides generic entry. */ + if (cmp <= 0) { + /* Ignore registers we trap but don't save. */ + if (i1->reg) { + if (!copy_reg_to_user(i1, &uind)) + return -EFAULT; + total++; + } + } else { + /* Ignore registers we trap but don't save. */ + if (i2->reg) { + if (!copy_reg_to_user(i2, &uind)) + return -EFAULT; + total++; + } + } + + if (cmp <= 0 && ++i1 == end1) + i1 = NULL; + if (cmp >= 0 && ++i2 == end2) + i2 = NULL; + } + return total; +} + +unsigned long kvm_arm_num_sys_reg_descs(struct kvm_vcpu *vcpu) +{ + return ARRAY_SIZE(invariant_sys_regs) + + num_demux_regs() + + walk_sys_regs(vcpu, (u64 __user *)NULL); +} + +int kvm_arm_copy_sys_reg_indices(struct kvm_vcpu *vcpu, u64 __user *uindices) +{ + unsigned int i; + int err; + + /* Then give them all the invariant registers' indices. */ + for (i = 0; i < ARRAY_SIZE(invariant_sys_regs); i++) { + if (put_user(sys_reg_to_index(&invariant_sys_regs[i]), uindices)) + return -EFAULT; + uindices++; + } + + err = walk_sys_regs(vcpu, uindices); + if (err < 0) + return err; + uindices += err; + + return write_demux_regids(uindices); +} + +void kvm_sys_reg_table_init(void) +{ + unsigned int i; + struct sys_reg_desc clidr; + + /* Make sure tables are unique and in order. */ + for (i = 1; i < ARRAY_SIZE(sys_reg_descs); i++) + BUG_ON(cmp_sys_reg(&sys_reg_descs[i-1], &sys_reg_descs[i]) >= 0); + + /* We abuse the reset function to overwrite the table itself. */ + for (i = 0; i < ARRAY_SIZE(invariant_sys_regs); i++) + invariant_sys_regs[i].reset(NULL, &invariant_sys_regs[i]); + + /* + * CLIDR format is awkward, so clean it up. See ARM B4.1.20: + * + * If software reads the Cache Type fields from Ctype1 + * upwards, once it has seen a value of 0b000, no caches + * exist at further-out levels of the hierarchy. So, for + * example, if Ctype3 is the first Cache Type field with a + * value of 0b000, the values of Ctype4 to Ctype7 must be + * ignored. + */ + get_clidr_el1(NULL, &clidr); /* Ugly... */ + cache_levels = clidr.val; + for (i = 0; i < 7; i++) + if (((cache_levels >> (i*3)) & 7) == 0) + break; + /* Clear all higher bits. */ + cache_levels &= (1 << (i*3))-1; +} + +/** + * kvm_reset_sys_regs - sets system registers to reset value + * @vcpu: The VCPU pointer + * + * This function finds the right table above and sets the registers on the + * virtual CPU struct to their architecturally defined reset values. + */ +void kvm_reset_sys_regs(struct kvm_vcpu *vcpu) +{ + size_t num; + const struct sys_reg_desc *table; + + /* Catch someone adding a register without putting in reset entry. */ + memset(&vcpu->arch.ctxt.sys_regs, 0x42, sizeof(vcpu->arch.ctxt.sys_regs)); + + /* Generic chip reset first (so target could override). */ + reset_sys_reg_descs(vcpu, sys_reg_descs, ARRAY_SIZE(sys_reg_descs)); + + table = get_target_table(vcpu->arch.target, &num); + reset_sys_reg_descs(vcpu, table, num); + + for (num = 1; num < NR_SYS_REGS; num++) + if (vcpu_sys_reg(vcpu, num) == 0x4242424242424242) + panic("Didn't reset vcpu_sys_reg(%zi)", num); +} diff --git a/arch/arm64/kvm/sys_regs.h b/arch/arm64/kvm/sys_regs.h new file mode 100644 index 000000000000..d50d3722998e --- /dev/null +++ b/arch/arm64/kvm/sys_regs.h @@ -0,0 +1,138 @@ +/* + * Copyright (C) 2012,2013 - ARM Ltd + * Author: Marc Zyngier + * + * Derived from arch/arm/kvm/coproc.h + * Copyright (C) 2012 - Virtual Open Systems and Columbia University + * Authors: Christoffer Dall + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#ifndef __ARM64_KVM_SYS_REGS_LOCAL_H__ +#define __ARM64_KVM_SYS_REGS_LOCAL_H__ + +struct sys_reg_params { + u8 Op0; + u8 Op1; + u8 CRn; + u8 CRm; + u8 Op2; + u8 Rt; + bool is_write; +}; + +struct sys_reg_desc { + /* MRS/MSR instruction which accesses it. */ + u8 Op0; + u8 Op1; + u8 CRn; + u8 CRm; + u8 Op2; + + /* Trapped access from guest, if non-NULL. */ + bool (*access)(struct kvm_vcpu *, + const struct sys_reg_params *, + const struct sys_reg_desc *); + + /* Initialization for vcpu. */ + void (*reset)(struct kvm_vcpu *, const struct sys_reg_desc *); + + /* Index into sys_reg[], or 0 if we don't need to save it. */ + int reg; + + /* Value (usually reset value) */ + u64 val; +}; + +static inline void print_sys_reg_instr(const struct sys_reg_params *p) +{ + /* Look, we even formatted it for you to paste into the table! */ + kvm_pr_unimpl(" { Op0(%2u), Op1(%2u), CRn(%2u), CRm(%2u), Op2(%2u), func_%s },\n", + p->Op0, p->Op1, p->CRn, p->CRm, p->Op2, p->is_write ? "write" : "read"); +} + +static inline bool ignore_write(struct kvm_vcpu *vcpu, + const struct sys_reg_params *p) +{ + return true; +} + +static inline bool read_zero(struct kvm_vcpu *vcpu, + const struct sys_reg_params *p) +{ + *vcpu_reg(vcpu, p->Rt) = 0; + return true; +} + +static inline bool write_to_read_only(struct kvm_vcpu *vcpu, + const struct sys_reg_params *params) +{ + kvm_debug("sys_reg write to read-only register at: %lx\n", + *vcpu_pc(vcpu)); + print_sys_reg_instr(params); + return false; +} + +static inline bool read_from_write_only(struct kvm_vcpu *vcpu, + const struct sys_reg_params *params) +{ + kvm_debug("sys_reg read to write-only register at: %lx\n", + *vcpu_pc(vcpu)); + print_sys_reg_instr(params); + return false; +} + +/* Reset functions */ +static inline void reset_unknown(struct kvm_vcpu *vcpu, + const struct sys_reg_desc *r) +{ + BUG_ON(!r->reg); + BUG_ON(r->reg >= NR_SYS_REGS); + vcpu_sys_reg(vcpu, r->reg) = 0x1de7ec7edbadc0deULL; +} + +static inline void reset_val(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r) +{ + BUG_ON(!r->reg); + BUG_ON(r->reg >= NR_SYS_REGS); + vcpu_sys_reg(vcpu, r->reg) = r->val; +} + +static inline int cmp_sys_reg(const struct sys_reg_desc *i1, + const struct sys_reg_desc *i2) +{ + BUG_ON(i1 == i2); + if (!i1) + return 1; + else if (!i2) + return -1; + if (i1->Op0 != i2->Op0) + return i1->Op0 - i2->Op0; + if (i1->Op1 != i2->Op1) + return i1->Op1 - i2->Op1; + if (i1->CRn != i2->CRn) + return i1->CRn - i2->CRn; + if (i1->CRm != i2->CRm) + return i1->CRm - i2->CRm; + return i1->Op2 - i2->Op2; +} + + +#define Op0(_x) .Op0 = _x +#define Op1(_x) .Op1 = _x +#define CRn(_x) .CRn = _x +#define CRm(_x) .CRm = _x +#define Op2(_x) .Op2 = _x + +#endif /* __ARM64_KVM_SYS_REGS_LOCAL_H__ */ diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index d88c8ee00c8b..97277d333e82 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -783,6 +783,7 @@ struct kvm_dirty_tlb { #define KVM_REG_IA64 0x3000000000000000ULL #define KVM_REG_ARM 0x4000000000000000ULL #define KVM_REG_S390 0x5000000000000000ULL +#define KVM_REG_ARM64 0x6000000000000000ULL #define KVM_REG_MIPS 0x7000000000000000ULL #define KVM_REG_SIZE_SHIFT 52 From 3107348ebf0b00722a27b7a4a32c1f4c34fdb1e2 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 6 Feb 2013 17:30:48 +0000 Subject: [PATCH 1094/1368] arm64: KVM: CPU specific system registers handling Add the support code for CPU specific system registers. Not much here yet. Reviewed-by: Christopher Covington Reviewed-by: Catalin Marinas Signed-off-by: Marc Zyngier (cherry picked from commit b990a9d3152bddca62cc1f8bf80518430b98737b) Signed-off-by: Christoffer Dall --- arch/arm64/kvm/sys_regs_generic_v8.c | 85 ++++++++++++++++++++++++++++ 1 file changed, 85 insertions(+) create mode 100644 arch/arm64/kvm/sys_regs_generic_v8.c diff --git a/arch/arm64/kvm/sys_regs_generic_v8.c b/arch/arm64/kvm/sys_regs_generic_v8.c new file mode 100644 index 000000000000..d4e803907312 --- /dev/null +++ b/arch/arm64/kvm/sys_regs_generic_v8.c @@ -0,0 +1,85 @@ +/* + * Copyright (C) 2012,2013 - ARM Ltd + * Author: Marc Zyngier + * + * Based on arch/arm/kvm/coproc_a15.c: + * Copyright (C) 2012 - Virtual Open Systems and Columbia University + * Authors: Rusty Russell + * Christoffer Dall + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ +#include +#include +#include +#include +#include +#include +#include +#include + +#include "sys_regs.h" + +static bool access_actlr(struct kvm_vcpu *vcpu, + const struct sys_reg_params *p, + const struct sys_reg_desc *r) +{ + if (p->is_write) + return ignore_write(vcpu, p); + + *vcpu_reg(vcpu, p->Rt) = vcpu_sys_reg(vcpu, ACTLR_EL1); + return true; +} + +static void reset_actlr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r) +{ + u64 actlr; + + asm volatile("mrs %0, actlr_el1\n" : "=r" (actlr)); + vcpu_sys_reg(vcpu, ACTLR_EL1) = actlr; +} + +/* + * Implementation specific sys-reg registers. + * Important: Must be sorted ascending by Op0, Op1, CRn, CRm, Op2 + */ +static const struct sys_reg_desc genericv8_sys_regs[] = { + /* ACTLR_EL1 */ + { Op0(0b11), Op1(0b000), CRn(0b0001), CRm(0b0000), Op2(0b001), + access_actlr, reset_actlr, ACTLR_EL1 }, +}; + +static struct kvm_sys_reg_target_table genericv8_target_table = { + .table64 = { + .table = genericv8_sys_regs, + .num = ARRAY_SIZE(genericv8_sys_regs), + }, +}; + +static int __init sys_reg_genericv8_init(void) +{ + unsigned int i; + + for (i = 1; i < ARRAY_SIZE(genericv8_sys_regs); i++) + BUG_ON(cmp_sys_reg(&genericv8_sys_regs[i-1], + &genericv8_sys_regs[i]) >= 0); + + kvm_register_target_sys_reg_table(KVM_ARM_TARGET_AEM_V8, + &genericv8_target_table); + kvm_register_target_sys_reg_table(KVM_ARM_TARGET_FOUNDATION_V8, + &genericv8_target_table); + kvm_register_target_sys_reg_table(KVM_ARM_TARGET_CORTEX_A57, + &genericv8_target_table); + return 0; +} +late_initcall(sys_reg_genericv8_init); From cac664305a439463255eb7954ecd375b39f4299f Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 10 Dec 2012 16:23:59 +0000 Subject: [PATCH 1095/1368] arm64: KVM: virtual CPU reset Provide the reset code for a virtual CPU booted in 64bit mode. Reviewed-by: Christopher Covington Reviewed-by: Catalin Marinas Signed-off-by: Marc Zyngier (cherry picked from commit f4672752c321ea36ce099cebdd7a082a8f327505) Signed-off-by: Christoffer Dall --- arch/arm64/kvm/reset.c | 76 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 76 insertions(+) create mode 100644 arch/arm64/kvm/reset.c diff --git a/arch/arm64/kvm/reset.c b/arch/arm64/kvm/reset.c new file mode 100644 index 000000000000..f6536a06231a --- /dev/null +++ b/arch/arm64/kvm/reset.c @@ -0,0 +1,76 @@ +/* + * Copyright (C) 2012,2013 - ARM Ltd + * Author: Marc Zyngier + * + * Derived from arch/arm/kvm/reset.c + * Copyright (C) 2012 - Virtual Open Systems and Columbia University + * Author: Christoffer Dall + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#include +#include +#include + +#include +#include +#include +#include + +/* + * ARMv8 Reset Values + */ +static const struct kvm_regs default_regs_reset = { + .regs.pstate = (PSR_MODE_EL1h | PSR_A_BIT | PSR_I_BIT | + PSR_F_BIT | PSR_D_BIT), +}; + +int kvm_arch_dev_ioctl_check_extension(long ext) +{ + int r; + + switch (ext) { + default: + r = 0; + } + + return r; +} + +/** + * kvm_reset_vcpu - sets core registers and sys_regs to reset value + * @vcpu: The VCPU pointer + * + * This function finds the right table above and sets the registers on + * the virtual CPU struct to their architectually defined reset + * values. + */ +int kvm_reset_vcpu(struct kvm_vcpu *vcpu) +{ + const struct kvm_regs *cpu_reset; + + switch (vcpu->arch.target) { + default: + cpu_reset = &default_regs_reset; + break; + } + + /* Reset core registers */ + memcpy(vcpu_gp_regs(vcpu), cpu_reset, sizeof(*cpu_reset)); + + /* Reset system registers */ + kvm_reset_sys_regs(vcpu); + + return 0; +} From 913d79134dee46fc407ad2f3819bb5dce2f47e38 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 10 Dec 2012 16:29:28 +0000 Subject: [PATCH 1096/1368] arm64: KVM: kvm_arch and kvm_vcpu_arch definitions Provide the architecture dependent structures for VM and vcpu abstractions. Reviewed-by: Christopher Covington Reviewed-by: Catalin Marinas Signed-off-by: Marc Zyngier (cherry picked from commit 4f8d6632ec71372a3b8dbb4775662c2c9025d173) Signed-off-by: Christoffer Dall --- arch/arm64/include/asm/kvm_host.h | 186 ++++++++++++++++++++++++++++++ 1 file changed, 186 insertions(+) create mode 100644 arch/arm64/include/asm/kvm_host.h diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h new file mode 100644 index 000000000000..4a2622f5e81f --- /dev/null +++ b/arch/arm64/include/asm/kvm_host.h @@ -0,0 +1,186 @@ +/* + * Copyright (C) 2012,2013 - ARM Ltd + * Author: Marc Zyngier + * + * Derived from arch/arm/include/asm/kvm_host.h: + * Copyright (C) 2012 - Virtual Open Systems and Columbia University + * Author: Christoffer Dall + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#ifndef __ARM64_KVM_HOST_H__ +#define __ARM64_KVM_HOST_H__ + +#include +#include +#include + +#define KVM_MAX_VCPUS 4 +#define KVM_USER_MEM_SLOTS 32 +#define KVM_PRIVATE_MEM_SLOTS 4 +#define KVM_COALESCED_MMIO_PAGE_OFFSET 1 + +#include +#include + +#define KVM_VCPU_MAX_FEATURES 0 + +/* We don't currently support large pages. */ +#define KVM_HPAGE_GFN_SHIFT(x) 0 +#define KVM_NR_PAGE_SIZES 1 +#define KVM_PAGES_PER_HPAGE(x) (1UL<<31) + +struct kvm_vcpu; +int kvm_target_cpu(void); +int kvm_reset_vcpu(struct kvm_vcpu *vcpu); +int kvm_arch_dev_ioctl_check_extension(long ext); + +struct kvm_arch { + /* The VMID generation used for the virt. memory system */ + u64 vmid_gen; + u32 vmid; + + /* 1-level 2nd stage table and lock */ + spinlock_t pgd_lock; + pgd_t *pgd; + + /* VTTBR value associated with above pgd and vmid */ + u64 vttbr; + + /* Interrupt controller */ + struct vgic_dist vgic; + + /* Timer */ + struct arch_timer_kvm timer; +}; + +#define KVM_NR_MEM_OBJS 40 + +/* + * We don't want allocation failures within the mmu code, so we preallocate + * enough memory for a single page fault in a cache. + */ +struct kvm_mmu_memory_cache { + int nobjs; + void *objects[KVM_NR_MEM_OBJS]; +}; + +struct kvm_vcpu_fault_info { + u32 esr_el2; /* Hyp Syndrom Register */ + u64 far_el2; /* Hyp Fault Address Register */ + u64 hpfar_el2; /* Hyp IPA Fault Address Register */ +}; + +struct kvm_cpu_context { + struct kvm_regs gp_regs; + u64 sys_regs[NR_SYS_REGS]; +}; + +typedef struct kvm_cpu_context kvm_cpu_context_t; + +struct kvm_vcpu_arch { + struct kvm_cpu_context ctxt; + + /* HYP configuration */ + u64 hcr_el2; + + /* Exception Information */ + struct kvm_vcpu_fault_info fault; + + /* Pointer to host CPU context */ + kvm_cpu_context_t *host_cpu_context; + + /* VGIC state */ + struct vgic_cpu vgic_cpu; + struct arch_timer_cpu timer_cpu; + + /* + * Anything that is not used directly from assembly code goes + * here. + */ + /* dcache set/way operation pending */ + int last_pcpu; + cpumask_t require_dcache_flush; + + /* Don't run the guest */ + bool pause; + + /* IO related fields */ + struct kvm_decode mmio_decode; + + /* Interrupt related fields */ + u64 irq_lines; /* IRQ and FIQ levels */ + + /* Cache some mmu pages needed inside spinlock regions */ + struct kvm_mmu_memory_cache mmu_page_cache; + + /* Target CPU and feature flags */ + u32 target; + DECLARE_BITMAP(features, KVM_VCPU_MAX_FEATURES); + + /* Detect first run of a vcpu */ + bool has_run_once; +}; + +#define vcpu_gp_regs(v) (&(v)->arch.ctxt.gp_regs) +#define vcpu_sys_reg(v,r) ((v)->arch.ctxt.sys_regs[(r)]) +#define vcpu_cp15(v,r) ((v)->arch.ctxt.cp15[(r)]) + +struct kvm_vm_stat { + u32 remote_tlb_flush; +}; + +struct kvm_vcpu_stat { + u32 halt_wakeup; +}; + +struct kvm_vcpu_init; +int kvm_vcpu_set_target(struct kvm_vcpu *vcpu, + const struct kvm_vcpu_init *init); +unsigned long kvm_arm_num_regs(struct kvm_vcpu *vcpu); +int kvm_arm_copy_reg_indices(struct kvm_vcpu *vcpu, u64 __user *indices); +struct kvm_one_reg; +int kvm_arm_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg); +int kvm_arm_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg); + +#define KVM_ARCH_WANT_MMU_NOTIFIER +struct kvm; +int kvm_unmap_hva(struct kvm *kvm, unsigned long hva); +int kvm_unmap_hva_range(struct kvm *kvm, + unsigned long start, unsigned long end); +void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte); + +/* We do not have shadow page tables, hence the empty hooks */ +static inline int kvm_age_hva(struct kvm *kvm, unsigned long hva) +{ + return 0; +} + +static inline int kvm_test_age_hva(struct kvm *kvm, unsigned long hva) +{ + return 0; +} + +struct kvm_vcpu *kvm_arm_get_running_vcpu(void); +struct kvm_vcpu __percpu **kvm_get_running_vcpus(void); + +u64 kvm_call_hyp(void *hypfn, ...); + +int handle_exit(struct kvm_vcpu *vcpu, struct kvm_run *run, + int exception_index); + +int kvm_perf_init(void); +int kvm_perf_teardown(void); + +#endif /* __ARM64_KVM_HOST_H__ */ From 56a8fbf4b8d368562273336cf8f43068aaed91a3 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 10 Dec 2012 16:29:50 +0000 Subject: [PATCH 1097/1368] arm64: KVM: MMIO access backend Define the necessary structures to perform an MMIO access. Reviewed-by: Christopher Covington Reviewed-by: Catalin Marinas Signed-off-by: Marc Zyngier (cherry picked from commit d7246bf3571a82834984a42db52261525bc11159) Signed-off-by: Christoffer Dall --- arch/arm64/include/asm/kvm_mmio.h | 59 +++++++++++++++++++++++++++++++ 1 file changed, 59 insertions(+) create mode 100644 arch/arm64/include/asm/kvm_mmio.h diff --git a/arch/arm64/include/asm/kvm_mmio.h b/arch/arm64/include/asm/kvm_mmio.h new file mode 100644 index 000000000000..fc2f689c0694 --- /dev/null +++ b/arch/arm64/include/asm/kvm_mmio.h @@ -0,0 +1,59 @@ +/* + * Copyright (C) 2012 - Virtual Open Systems and Columbia University + * Author: Christoffer Dall + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#ifndef __ARM64_KVM_MMIO_H__ +#define __ARM64_KVM_MMIO_H__ + +#include +#include +#include + +/* + * This is annoying. The mmio code requires this, even if we don't + * need any decoding. To be fixed. + */ +struct kvm_decode { + unsigned long rt; + bool sign_extend; +}; + +/* + * The in-kernel MMIO emulation code wants to use a copy of run->mmio, + * which is an anonymous type. Use our own type instead. + */ +struct kvm_exit_mmio { + phys_addr_t phys_addr; + u8 data[8]; + u32 len; + bool is_write; +}; + +static inline void kvm_prepare_mmio(struct kvm_run *run, + struct kvm_exit_mmio *mmio) +{ + run->mmio.phys_addr = mmio->phys_addr; + run->mmio.len = mmio->len; + run->mmio.is_write = mmio->is_write; + memcpy(run->mmio.data, mmio->data, mmio->len); + run->exit_reason = KVM_EXIT_MMIO; +} + +int kvm_handle_mmio_return(struct kvm_vcpu *vcpu, struct kvm_run *run); +int io_mem_abort(struct kvm_vcpu *vcpu, struct kvm_run *run, + phys_addr_t fault_ipa); + +#endif /* __ARM64_KVM_MMIO_H__ */ From f98733fb49495d20251f55dc1ddff88f5761a797 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 10 Dec 2012 16:37:02 +0000 Subject: [PATCH 1098/1368] arm64: KVM: guest one-reg interface Let userspace play with the guest registers. Reviewed-by: Christopher Covington Signed-off-by: Marc Zyngier (cherry picked from commit 2f4a07c5f9fe4a5cdb9867e1e2fcab3165846ea7) Signed-off-by: Christoffer Dall --- arch/arm64/kvm/guest.c | 259 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 259 insertions(+) create mode 100644 arch/arm64/kvm/guest.c diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c new file mode 100644 index 000000000000..3d7518a7ebaa --- /dev/null +++ b/arch/arm64/kvm/guest.c @@ -0,0 +1,259 @@ +/* + * Copyright (C) 2012,2013 - ARM Ltd + * Author: Marc Zyngier + * + * Derived from arch/arm/kvm/guest.c: + * Copyright (C) 2012 - Virtual Open Systems and Columbia University + * Author: Christoffer Dall + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +struct kvm_stats_debugfs_item debugfs_entries[] = { + { NULL } +}; + +int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu) +{ + vcpu->arch.hcr_el2 = HCR_GUEST_FLAGS; + return 0; +} + +static u64 core_reg_offset_from_id(u64 id) +{ + return id & ~(KVM_REG_ARCH_MASK | KVM_REG_SIZE_MASK | KVM_REG_ARM_CORE); +} + +static int get_core_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) +{ + /* + * Because the kvm_regs structure is a mix of 32, 64 and + * 128bit fields, we index it as if it was a 32bit + * array. Hence below, nr_regs is the number of entries, and + * off the index in the "array". + */ + __u32 __user *uaddr = (__u32 __user *)(unsigned long)reg->addr; + struct kvm_regs *regs = vcpu_gp_regs(vcpu); + int nr_regs = sizeof(*regs) / sizeof(__u32); + u32 off; + + /* Our ID is an index into the kvm_regs struct. */ + off = core_reg_offset_from_id(reg->id); + if (off >= nr_regs || + (off + (KVM_REG_SIZE(reg->id) / sizeof(__u32))) >= nr_regs) + return -ENOENT; + + if (copy_to_user(uaddr, ((u32 *)regs) + off, KVM_REG_SIZE(reg->id))) + return -EFAULT; + + return 0; +} + +static int set_core_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) +{ + __u32 __user *uaddr = (__u32 __user *)(unsigned long)reg->addr; + struct kvm_regs *regs = vcpu_gp_regs(vcpu); + int nr_regs = sizeof(*regs) / sizeof(__u32); + __uint128_t tmp; + void *valp = &tmp; + u64 off; + int err = 0; + + /* Our ID is an index into the kvm_regs struct. */ + off = core_reg_offset_from_id(reg->id); + if (off >= nr_regs || + (off + (KVM_REG_SIZE(reg->id) / sizeof(__u32))) >= nr_regs) + return -ENOENT; + + if (KVM_REG_SIZE(reg->id) > sizeof(tmp)) + return -EINVAL; + + if (copy_from_user(valp, uaddr, KVM_REG_SIZE(reg->id))) { + err = -EFAULT; + goto out; + } + + if (off == KVM_REG_ARM_CORE_REG(regs.pstate)) { + u32 mode = (*(u32 *)valp) & COMPAT_PSR_MODE_MASK; + switch (mode) { + case PSR_MODE_EL0t: + case PSR_MODE_EL1t: + case PSR_MODE_EL1h: + break; + default: + err = -EINVAL; + goto out; + } + } + + memcpy((u32 *)regs + off, valp, KVM_REG_SIZE(reg->id)); +out: + return err; +} + +int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) +{ + return -EINVAL; +} + +int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) +{ + return -EINVAL; +} + +static unsigned long num_core_regs(void) +{ + return sizeof(struct kvm_regs) / sizeof(__u32); +} + +/** + * kvm_arm_num_regs - how many registers do we present via KVM_GET_ONE_REG + * + * This is for all registers. + */ +unsigned long kvm_arm_num_regs(struct kvm_vcpu *vcpu) +{ + return num_core_regs() + kvm_arm_num_sys_reg_descs(vcpu); +} + +/** + * kvm_arm_copy_reg_indices - get indices of all registers. + * + * We do core registers right here, then we apppend system regs. + */ +int kvm_arm_copy_reg_indices(struct kvm_vcpu *vcpu, u64 __user *uindices) +{ + unsigned int i; + const u64 core_reg = KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE; + + for (i = 0; i < sizeof(struct kvm_regs) / sizeof(__u32); i++) { + if (put_user(core_reg | i, uindices)) + return -EFAULT; + uindices++; + } + + return kvm_arm_copy_sys_reg_indices(vcpu, uindices); +} + +int kvm_arm_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) +{ + /* We currently use nothing arch-specific in upper 32 bits */ + if ((reg->id & ~KVM_REG_SIZE_MASK) >> 32 != KVM_REG_ARM64 >> 32) + return -EINVAL; + + /* Register group 16 means we want a core register. */ + if ((reg->id & KVM_REG_ARM_COPROC_MASK) == KVM_REG_ARM_CORE) + return get_core_reg(vcpu, reg); + + return kvm_arm_sys_reg_get_reg(vcpu, reg); +} + +int kvm_arm_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) +{ + /* We currently use nothing arch-specific in upper 32 bits */ + if ((reg->id & ~KVM_REG_SIZE_MASK) >> 32 != KVM_REG_ARM64 >> 32) + return -EINVAL; + + /* Register group 16 means we set a core register. */ + if ((reg->id & KVM_REG_ARM_COPROC_MASK) == KVM_REG_ARM_CORE) + return set_core_reg(vcpu, reg); + + return kvm_arm_sys_reg_set_reg(vcpu, reg); +} + +int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu, + struct kvm_sregs *sregs) +{ + return -EINVAL; +} + +int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, + struct kvm_sregs *sregs) +{ + return -EINVAL; +} + +int __attribute_const__ kvm_target_cpu(void) +{ + unsigned long implementor = read_cpuid_implementor(); + unsigned long part_number = read_cpuid_part_number(); + + if (implementor != ARM_CPU_IMP_ARM) + return -EINVAL; + + switch (part_number) { + case ARM_CPU_PART_AEM_V8: + return KVM_ARM_TARGET_AEM_V8; + case ARM_CPU_PART_FOUNDATION: + return KVM_ARM_TARGET_FOUNDATION_V8; + case ARM_CPU_PART_CORTEX_A57: + /* Currently handled by the generic backend */ + return KVM_ARM_TARGET_CORTEX_A57; + default: + return -EINVAL; + } +} + +int kvm_vcpu_set_target(struct kvm_vcpu *vcpu, + const struct kvm_vcpu_init *init) +{ + unsigned int i; + int phys_target = kvm_target_cpu(); + + if (init->target != phys_target) + return -EINVAL; + + vcpu->arch.target = phys_target; + bitmap_zero(vcpu->arch.features, KVM_VCPU_MAX_FEATURES); + + /* -ENOENT for unknown features, -EINVAL for invalid combinations. */ + for (i = 0; i < sizeof(init->features) * 8; i++) { + if (init->features[i / 32] & (1 << (i % 32))) { + if (i >= KVM_VCPU_MAX_FEATURES) + return -ENOENT; + set_bit(i, vcpu->arch.features); + } + } + + /* Now we know what it is, we can reset it. */ + return kvm_reset_vcpu(vcpu); +} + +int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) +{ + return -EINVAL; +} + +int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) +{ + return -EINVAL; +} + +int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu, + struct kvm_translation *tr) +{ + return -EINVAL; +} From f756a17fc0ab844b88c1282cf5df2ffb03185825 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 17 Dec 2012 17:07:52 +0000 Subject: [PATCH 1099/1368] arm64: KVM: hypervisor initialization code Provide EL2 with page tables and stack, and set the vectors to point to the full blown world-switch code. Signed-off-by: Marc Zyngier (cherry picked from commit 092bd143cbb481b4ce1d55247a2987eaaf61f967) Signed-off-by: Christoffer Dall --- arch/arm64/include/asm/kvm_host.h | 13 ++++ arch/arm64/kvm/hyp-init.S | 107 ++++++++++++++++++++++++++++++ 2 files changed, 120 insertions(+) create mode 100644 arch/arm64/kvm/hyp-init.S diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 4a2622f5e81f..2500eb6a4d2a 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -183,4 +183,17 @@ int handle_exit(struct kvm_vcpu *vcpu, struct kvm_run *run, int kvm_perf_init(void); int kvm_perf_teardown(void); +static inline void __cpu_init_hyp_mode(phys_addr_t boot_pgd_ptr, + phys_addr_t pgd_ptr, + unsigned long hyp_stack_ptr, + unsigned long vector_ptr) +{ + /* + * Call initialization code, and switch to the full blown + * HYP code. + */ + kvm_call_hyp((void *)boot_pgd_ptr, pgd_ptr, + hyp_stack_ptr, vector_ptr); +} + #endif /* __ARM64_KVM_HOST_H__ */ diff --git a/arch/arm64/kvm/hyp-init.S b/arch/arm64/kvm/hyp-init.S new file mode 100644 index 000000000000..ba84e6705e20 --- /dev/null +++ b/arch/arm64/kvm/hyp-init.S @@ -0,0 +1,107 @@ +/* + * Copyright (C) 2012,2013 - ARM Ltd + * Author: Marc Zyngier + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#include + +#include +#include +#include + + .text + .pushsection .hyp.idmap.text, "ax" + + .align 11 + +ENTRY(__kvm_hyp_init) + ventry __invalid // Synchronous EL2t + ventry __invalid // IRQ EL2t + ventry __invalid // FIQ EL2t + ventry __invalid // Error EL2t + + ventry __invalid // Synchronous EL2h + ventry __invalid // IRQ EL2h + ventry __invalid // FIQ EL2h + ventry __invalid // Error EL2h + + ventry __do_hyp_init // Synchronous 64-bit EL1 + ventry __invalid // IRQ 64-bit EL1 + ventry __invalid // FIQ 64-bit EL1 + ventry __invalid // Error 64-bit EL1 + + ventry __invalid // Synchronous 32-bit EL1 + ventry __invalid // IRQ 32-bit EL1 + ventry __invalid // FIQ 32-bit EL1 + ventry __invalid // Error 32-bit EL1 + +__invalid: + b . + + /* + * x0: HYP boot pgd + * x1: HYP pgd + * x2: HYP stack + * x3: HYP vectors + */ +__do_hyp_init: + + msr ttbr0_el2, x0 + + mrs x4, tcr_el1 + ldr x5, =TCR_EL2_MASK + and x4, x4, x5 + ldr x5, =TCR_EL2_FLAGS + orr x4, x4, x5 + msr tcr_el2, x4 + + ldr x4, =VTCR_EL2_FLAGS + msr vtcr_el2, x4 + + mrs x4, mair_el1 + msr mair_el2, x4 + isb + + mov x4, #SCTLR_EL2_FLAGS + msr sctlr_el2, x4 + isb + + /* MMU is now enabled. Get ready for the trampoline dance */ + ldr x4, =TRAMPOLINE_VA + adr x5, target + bfi x4, x5, #0, #PAGE_SHIFT + br x4 + +target: /* We're now in the trampoline code, switch page tables */ + msr ttbr0_el2, x1 + isb + + /* Invalidate the old TLBs */ + tlbi alle2 + dsb sy + + /* Set the stack and new vectors */ + kern_hyp_va x2 + mov sp, x2 + kern_hyp_va x3 + msr vbar_el2, x3 + + /* Hello, World! */ + eret +ENDPROC(__kvm_hyp_init) + + .ltorg + + .popsection From 066bddb132227937d1f68dc5ec4554786e393345 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 10 Dec 2012 16:40:18 +0000 Subject: [PATCH 1100/1368] arm64: KVM: HYP mode world switch implementation The HYP mode world switch in all its glory. Implements save/restore of host/guest registers, EL2 trapping, IPA resolution, and additional services (tlb invalidation). Reviewed-by: Christopher Covington Signed-off-by: Marc Zyngier (cherry picked from commit 55c7401d92e16360e0987afe39355f1eb6300f31) Signed-off-by: Christoffer Dall --- arch/arm64/kernel/asm-offsets.c | 33 ++ arch/arm64/kvm/hyp.S | 617 ++++++++++++++++++++++++++++++++ 2 files changed, 650 insertions(+) create mode 100644 arch/arm64/kvm/hyp.S diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c index a2a4d810bea3..49c162c03b69 100644 --- a/arch/arm64/kernel/asm-offsets.c +++ b/arch/arm64/kernel/asm-offsets.c @@ -104,5 +104,38 @@ int main(void) BLANK(); DEFINE(TZ_MINWEST, offsetof(struct timezone, tz_minuteswest)); DEFINE(TZ_DSTTIME, offsetof(struct timezone, tz_dsttime)); + BLANK(); +#ifdef CONFIG_KVM_ARM_HOST + DEFINE(VCPU_CONTEXT, offsetof(struct kvm_vcpu, arch.ctxt)); + DEFINE(CPU_GP_REGS, offsetof(struct kvm_cpu_context, gp_regs)); + DEFINE(CPU_USER_PT_REGS, offsetof(struct kvm_regs, regs)); + DEFINE(CPU_FP_REGS, offsetof(struct kvm_regs, fp_regs)); + DEFINE(CPU_SP_EL1, offsetof(struct kvm_regs, sp_el1)); + DEFINE(CPU_ELR_EL1, offsetof(struct kvm_regs, elr_el1)); + DEFINE(CPU_SPSR, offsetof(struct kvm_regs, spsr)); + DEFINE(CPU_SYSREGS, offsetof(struct kvm_cpu_context, sys_regs)); + DEFINE(VCPU_ESR_EL2, offsetof(struct kvm_vcpu, arch.fault.esr_el2)); + DEFINE(VCPU_FAR_EL2, offsetof(struct kvm_vcpu, arch.fault.far_el2)); + DEFINE(VCPU_HPFAR_EL2, offsetof(struct kvm_vcpu, arch.fault.hpfar_el2)); + DEFINE(VCPU_HCR_EL2, offsetof(struct kvm_vcpu, arch.hcr_el2)); + DEFINE(VCPU_IRQ_LINES, offsetof(struct kvm_vcpu, arch.irq_lines)); + DEFINE(VCPU_HOST_CONTEXT, offsetof(struct kvm_vcpu, arch.host_cpu_context)); + DEFINE(VCPU_TIMER_CNTV_CTL, offsetof(struct kvm_vcpu, arch.timer_cpu.cntv_ctl)); + DEFINE(VCPU_TIMER_CNTV_CVAL, offsetof(struct kvm_vcpu, arch.timer_cpu.cntv_cval)); + DEFINE(KVM_TIMER_CNTVOFF, offsetof(struct kvm, arch.timer.cntvoff)); + DEFINE(KVM_TIMER_ENABLED, offsetof(struct kvm, arch.timer.enabled)); + DEFINE(VCPU_KVM, offsetof(struct kvm_vcpu, kvm)); + DEFINE(VCPU_VGIC_CPU, offsetof(struct kvm_vcpu, arch.vgic_cpu)); + DEFINE(VGIC_CPU_HCR, offsetof(struct vgic_cpu, vgic_hcr)); + DEFINE(VGIC_CPU_VMCR, offsetof(struct vgic_cpu, vgic_vmcr)); + DEFINE(VGIC_CPU_MISR, offsetof(struct vgic_cpu, vgic_misr)); + DEFINE(VGIC_CPU_EISR, offsetof(struct vgic_cpu, vgic_eisr)); + DEFINE(VGIC_CPU_ELRSR, offsetof(struct vgic_cpu, vgic_elrsr)); + DEFINE(VGIC_CPU_APR, offsetof(struct vgic_cpu, vgic_apr)); + DEFINE(VGIC_CPU_LR, offsetof(struct vgic_cpu, vgic_lr)); + DEFINE(VGIC_CPU_NR_LR, offsetof(struct vgic_cpu, nr_lr)); + DEFINE(KVM_VTTBR, offsetof(struct kvm, arch.vttbr)); + DEFINE(KVM_VGIC_VCTRL, offsetof(struct kvm, arch.vgic.vctrl_base)); +#endif return 0; } diff --git a/arch/arm64/kvm/hyp.S b/arch/arm64/kvm/hyp.S new file mode 100644 index 000000000000..0b18c2e1e043 --- /dev/null +++ b/arch/arm64/kvm/hyp.S @@ -0,0 +1,617 @@ +/* + * Copyright (C) 2012,2013 - ARM Ltd + * Author: Marc Zyngier + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +#define CPU_GP_REG_OFFSET(x) (CPU_GP_REGS + x) +#define CPU_XREG_OFFSET(x) CPU_GP_REG_OFFSET(CPU_USER_PT_REGS + 8*x) +#define CPU_SPSR_OFFSET(x) CPU_GP_REG_OFFSET(CPU_SPSR + 8*x) +#define CPU_SYSREG_OFFSET(x) (CPU_SYSREGS + 8*x) + + .text + .pushsection .hyp.text, "ax" + .align PAGE_SHIFT + +__kvm_hyp_code_start: + .globl __kvm_hyp_code_start + +.macro save_common_regs + // x2: base address for cpu context + // x3: tmp register + + add x3, x2, #CPU_XREG_OFFSET(19) + stp x19, x20, [x3] + stp x21, x22, [x3, #16] + stp x23, x24, [x3, #32] + stp x25, x26, [x3, #48] + stp x27, x28, [x3, #64] + stp x29, lr, [x3, #80] + + mrs x19, sp_el0 + mrs x20, elr_el2 // EL1 PC + mrs x21, spsr_el2 // EL1 pstate + + stp x19, x20, [x3, #96] + str x21, [x3, #112] + + mrs x22, sp_el1 + mrs x23, elr_el1 + mrs x24, spsr_el1 + + str x22, [x2, #CPU_GP_REG_OFFSET(CPU_SP_EL1)] + str x23, [x2, #CPU_GP_REG_OFFSET(CPU_ELR_EL1)] + str x24, [x2, #CPU_SPSR_OFFSET(KVM_SPSR_EL1)] +.endm + +.macro restore_common_regs + // x2: base address for cpu context + // x3: tmp register + + ldr x22, [x2, #CPU_GP_REG_OFFSET(CPU_SP_EL1)] + ldr x23, [x2, #CPU_GP_REG_OFFSET(CPU_ELR_EL1)] + ldr x24, [x2, #CPU_SPSR_OFFSET(KVM_SPSR_EL1)] + + msr sp_el1, x22 + msr elr_el1, x23 + msr spsr_el1, x24 + + add x3, x2, #CPU_XREG_OFFSET(31) // SP_EL0 + ldp x19, x20, [x3] + ldr x21, [x3, #16] + + msr sp_el0, x19 + msr elr_el2, x20 // EL1 PC + msr spsr_el2, x21 // EL1 pstate + + add x3, x2, #CPU_XREG_OFFSET(19) + ldp x19, x20, [x3] + ldp x21, x22, [x3, #16] + ldp x23, x24, [x3, #32] + ldp x25, x26, [x3, #48] + ldp x27, x28, [x3, #64] + ldp x29, lr, [x3, #80] +.endm + +.macro save_host_regs + save_common_regs +.endm + +.macro restore_host_regs + restore_common_regs +.endm + +.macro save_fpsimd + // x2: cpu context address + // x3, x4: tmp regs + add x3, x2, #CPU_GP_REG_OFFSET(CPU_FP_REGS) + fpsimd_save x3, 4 +.endm + +.macro restore_fpsimd + // x2: cpu context address + // x3, x4: tmp regs + add x3, x2, #CPU_GP_REG_OFFSET(CPU_FP_REGS) + fpsimd_restore x3, 4 +.endm + +.macro save_guest_regs + // x0 is the vcpu address + // x1 is the return code, do not corrupt! + // x2 is the cpu context + // x3 is a tmp register + // Guest's x0-x3 are on the stack + + // Compute base to save registers + add x3, x2, #CPU_XREG_OFFSET(4) + stp x4, x5, [x3] + stp x6, x7, [x3, #16] + stp x8, x9, [x3, #32] + stp x10, x11, [x3, #48] + stp x12, x13, [x3, #64] + stp x14, x15, [x3, #80] + stp x16, x17, [x3, #96] + str x18, [x3, #112] + + pop x6, x7 // x2, x3 + pop x4, x5 // x0, x1 + + add x3, x2, #CPU_XREG_OFFSET(0) + stp x4, x5, [x3] + stp x6, x7, [x3, #16] + + save_common_regs +.endm + +.macro restore_guest_regs + // x0 is the vcpu address. + // x2 is the cpu context + // x3 is a tmp register + + // Prepare x0-x3 for later restore + add x3, x2, #CPU_XREG_OFFSET(0) + ldp x4, x5, [x3] + ldp x6, x7, [x3, #16] + push x4, x5 // Push x0-x3 on the stack + push x6, x7 + + // x4-x18 + ldp x4, x5, [x3, #32] + ldp x6, x7, [x3, #48] + ldp x8, x9, [x3, #64] + ldp x10, x11, [x3, #80] + ldp x12, x13, [x3, #96] + ldp x14, x15, [x3, #112] + ldp x16, x17, [x3, #128] + ldr x18, [x3, #144] + + // x19-x29, lr, sp*, elr*, spsr* + restore_common_regs + + // Last bits of the 64bit state + pop x2, x3 + pop x0, x1 + + // Do not touch any register after this! +.endm + +/* + * Macros to perform system register save/restore. + * + * Ordering here is absolutely critical, and must be kept consistent + * in {save,restore}_sysregs, {save,restore}_guest_32bit_state, + * and in kvm_asm.h. + * + * In other words, don't touch any of these unless you know what + * you are doing. + */ +.macro save_sysregs + // x2: base address for cpu context + // x3: tmp register + + add x3, x2, #CPU_SYSREG_OFFSET(MPIDR_EL1) + + mrs x4, vmpidr_el2 + mrs x5, csselr_el1 + mrs x6, sctlr_el1 + mrs x7, actlr_el1 + mrs x8, cpacr_el1 + mrs x9, ttbr0_el1 + mrs x10, ttbr1_el1 + mrs x11, tcr_el1 + mrs x12, esr_el1 + mrs x13, afsr0_el1 + mrs x14, afsr1_el1 + mrs x15, far_el1 + mrs x16, mair_el1 + mrs x17, vbar_el1 + mrs x18, contextidr_el1 + mrs x19, tpidr_el0 + mrs x20, tpidrro_el0 + mrs x21, tpidr_el1 + mrs x22, amair_el1 + mrs x23, cntkctl_el1 + + stp x4, x5, [x3] + stp x6, x7, [x3, #16] + stp x8, x9, [x3, #32] + stp x10, x11, [x3, #48] + stp x12, x13, [x3, #64] + stp x14, x15, [x3, #80] + stp x16, x17, [x3, #96] + stp x18, x19, [x3, #112] + stp x20, x21, [x3, #128] + stp x22, x23, [x3, #144] +.endm + +.macro restore_sysregs + // x2: base address for cpu context + // x3: tmp register + + add x3, x2, #CPU_SYSREG_OFFSET(MPIDR_EL1) + + ldp x4, x5, [x3] + ldp x6, x7, [x3, #16] + ldp x8, x9, [x3, #32] + ldp x10, x11, [x3, #48] + ldp x12, x13, [x3, #64] + ldp x14, x15, [x3, #80] + ldp x16, x17, [x3, #96] + ldp x18, x19, [x3, #112] + ldp x20, x21, [x3, #128] + ldp x22, x23, [x3, #144] + + msr vmpidr_el2, x4 + msr csselr_el1, x5 + msr sctlr_el1, x6 + msr actlr_el1, x7 + msr cpacr_el1, x8 + msr ttbr0_el1, x9 + msr ttbr1_el1, x10 + msr tcr_el1, x11 + msr esr_el1, x12 + msr afsr0_el1, x13 + msr afsr1_el1, x14 + msr far_el1, x15 + msr mair_el1, x16 + msr vbar_el1, x17 + msr contextidr_el1, x18 + msr tpidr_el0, x19 + msr tpidrro_el0, x20 + msr tpidr_el1, x21 + msr amair_el1, x22 + msr cntkctl_el1, x23 +.endm + +.macro activate_traps + ldr x2, [x0, #VCPU_IRQ_LINES] + ldr x1, [x0, #VCPU_HCR_EL2] + orr x2, x2, x1 + msr hcr_el2, x2 + + ldr x2, =(CPTR_EL2_TTA) + msr cptr_el2, x2 + + ldr x2, =(1 << 15) // Trap CP15 Cr=15 + msr hstr_el2, x2 + + mrs x2, mdcr_el2 + and x2, x2, #MDCR_EL2_HPMN_MASK + orr x2, x2, #(MDCR_EL2_TPM | MDCR_EL2_TPMCR) + msr mdcr_el2, x2 +.endm + +.macro deactivate_traps + mov x2, #HCR_RW + msr hcr_el2, x2 + msr cptr_el2, xzr + msr hstr_el2, xzr + + mrs x2, mdcr_el2 + and x2, x2, #MDCR_EL2_HPMN_MASK + msr mdcr_el2, x2 +.endm + +.macro activate_vm + ldr x1, [x0, #VCPU_KVM] + kern_hyp_va x1 + ldr x2, [x1, #KVM_VTTBR] + msr vttbr_el2, x2 +.endm + +.macro deactivate_vm + msr vttbr_el2, xzr +.endm + +__save_sysregs: + save_sysregs + ret + +__restore_sysregs: + restore_sysregs + ret + +__save_fpsimd: + save_fpsimd + ret + +__restore_fpsimd: + restore_fpsimd + ret + +/* + * u64 __kvm_vcpu_run(struct kvm_vcpu *vcpu); + * + * This is the world switch. The first half of the function + * deals with entering the guest, and anything from __kvm_vcpu_return + * to the end of the function deals with reentering the host. + * On the enter path, only x0 (vcpu pointer) must be preserved until + * the last moment. On the exit path, x0 (vcpu pointer) and x1 (exception + * code) must both be preserved until the epilogue. + * In both cases, x2 points to the CPU context we're saving/restoring from/to. + */ +ENTRY(__kvm_vcpu_run) + kern_hyp_va x0 + msr tpidr_el2, x0 // Save the vcpu register + + // Host context + ldr x2, [x0, #VCPU_HOST_CONTEXT] + kern_hyp_va x2 + + save_host_regs + bl __save_fpsimd + bl __save_sysregs + + activate_traps + activate_vm + + // Guest context + add x2, x0, #VCPU_CONTEXT + + bl __restore_sysregs + bl __restore_fpsimd + restore_guest_regs + + // That's it, no more messing around. + eret + +__kvm_vcpu_return: + // Assume x0 is the vcpu pointer, x1 the return code + // Guest's x0-x3 are on the stack + + // Guest context + add x2, x0, #VCPU_CONTEXT + + save_guest_regs + bl __save_fpsimd + bl __save_sysregs + + deactivate_traps + deactivate_vm + + // Host context + ldr x2, [x0, #VCPU_HOST_CONTEXT] + kern_hyp_va x2 + + bl __restore_sysregs + bl __restore_fpsimd + restore_host_regs + + mov x0, x1 + ret +END(__kvm_vcpu_run) + +// void __kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa); +ENTRY(__kvm_tlb_flush_vmid_ipa) + kern_hyp_va x0 + ldr x2, [x0, #KVM_VTTBR] + msr vttbr_el2, x2 + isb + + /* + * We could do so much better if we had the VA as well. + * Instead, we invalidate Stage-2 for this IPA, and the + * whole of Stage-1. Weep... + */ + tlbi ipas2e1is, x1 + dsb sy + tlbi vmalle1is + dsb sy + isb + + msr vttbr_el2, xzr + ret +ENDPROC(__kvm_tlb_flush_vmid_ipa) + +ENTRY(__kvm_flush_vm_context) + tlbi alle1is + ic ialluis + dsb sy + ret +ENDPROC(__kvm_flush_vm_context) + +__kvm_hyp_panic: + // Guess the context by looking at VTTBR: + // If zero, then we're already a host. + // Otherwise restore a minimal host context before panicing. + mrs x0, vttbr_el2 + cbz x0, 1f + + mrs x0, tpidr_el2 + + deactivate_traps + deactivate_vm + + ldr x2, [x0, #VCPU_HOST_CONTEXT] + kern_hyp_va x2 + + bl __restore_sysregs + +1: adr x0, __hyp_panic_str + adr x1, 2f + ldp x2, x3, [x1] + sub x0, x0, x2 + add x0, x0, x3 + mrs x1, spsr_el2 + mrs x2, elr_el2 + mrs x3, esr_el2 + mrs x4, far_el2 + mrs x5, hpfar_el2 + mrs x6, par_el1 + mrs x7, tpidr_el2 + + mov lr, #(PSR_F_BIT | PSR_I_BIT | PSR_A_BIT | PSR_D_BIT |\ + PSR_MODE_EL1h) + msr spsr_el2, lr + ldr lr, =panic + msr elr_el2, lr + eret + + .align 3 +2: .quad HYP_PAGE_OFFSET + .quad PAGE_OFFSET +ENDPROC(__kvm_hyp_panic) + +__hyp_panic_str: + .ascii "HYP panic:\nPS:%08x PC:%p ESR:%p\nFAR:%p HPFAR:%p PAR:%p\nVCPU:%p\n\0" + + .align 2 + +ENTRY(kvm_call_hyp) + hvc #0 + ret +ENDPROC(kvm_call_hyp) + +.macro invalid_vector label, target + .align 2 +\label: + b \target +ENDPROC(\label) +.endm + + /* None of these should ever happen */ + invalid_vector el2t_sync_invalid, __kvm_hyp_panic + invalid_vector el2t_irq_invalid, __kvm_hyp_panic + invalid_vector el2t_fiq_invalid, __kvm_hyp_panic + invalid_vector el2t_error_invalid, __kvm_hyp_panic + invalid_vector el2h_sync_invalid, __kvm_hyp_panic + invalid_vector el2h_irq_invalid, __kvm_hyp_panic + invalid_vector el2h_fiq_invalid, __kvm_hyp_panic + invalid_vector el2h_error_invalid, __kvm_hyp_panic + invalid_vector el1_sync_invalid, __kvm_hyp_panic + invalid_vector el1_irq_invalid, __kvm_hyp_panic + invalid_vector el1_fiq_invalid, __kvm_hyp_panic + invalid_vector el1_error_invalid, __kvm_hyp_panic + +el1_sync: // Guest trapped into EL2 + push x0, x1 + push x2, x3 + + mrs x1, esr_el2 + lsr x2, x1, #ESR_EL2_EC_SHIFT + + cmp x2, #ESR_EL2_EC_HVC64 + b.ne el1_trap + + mrs x3, vttbr_el2 // If vttbr is valid, the 64bit guest + cbnz x3, el1_trap // called HVC + + /* Here, we're pretty sure the host called HVC. */ + pop x2, x3 + pop x0, x1 + + push lr, xzr + + /* + * Compute the function address in EL2, and shuffle the parameters. + */ + kern_hyp_va x0 + mov lr, x0 + mov x0, x1 + mov x1, x2 + mov x2, x3 + blr lr + + pop lr, xzr + eret + +el1_trap: + /* + * x1: ESR + * x2: ESR_EC + */ + cmp x2, #ESR_EL2_EC_DABT + mov x0, #ESR_EL2_EC_IABT + ccmp x2, x0, #4, ne + b.ne 1f // Not an abort we care about + + /* This is an abort. Check for permission fault */ + and x2, x1, #ESR_EL2_FSC_TYPE + cmp x2, #FSC_PERM + b.ne 1f // Not a permission fault + + /* + * Check for Stage-1 page table walk, which is guaranteed + * to give a valid HPFAR_EL2. + */ + tbnz x1, #7, 1f // S1PTW is set + + /* + * Permission fault, HPFAR_EL2 is invalid. + * Resolve the IPA the hard way using the guest VA. + * Stage-1 translation already validated the memory access rights. + * As such, we can use the EL1 translation regime, and don't have + * to distinguish between EL0 and EL1 access. + */ + mrs x2, far_el2 + at s1e1r, x2 + isb + + /* Read result */ + mrs x3, par_el1 + tbnz x3, #0, 3f // Bail out if we failed the translation + ubfx x3, x3, #12, #36 // Extract IPA + lsl x3, x3, #4 // and present it like HPFAR + b 2f + +1: mrs x3, hpfar_el2 + mrs x2, far_el2 + +2: mrs x0, tpidr_el2 + str x1, [x0, #VCPU_ESR_EL2] + str x2, [x0, #VCPU_FAR_EL2] + str x3, [x0, #VCPU_HPFAR_EL2] + + mov x1, #ARM_EXCEPTION_TRAP + b __kvm_vcpu_return + + /* + * Translation failed. Just return to the guest and + * let it fault again. Another CPU is probably playing + * behind our back. + */ +3: pop x2, x3 + pop x0, x1 + + eret + +el1_irq: + push x0, x1 + push x2, x3 + mrs x0, tpidr_el2 + mov x1, #ARM_EXCEPTION_IRQ + b __kvm_vcpu_return + + .ltorg + + .align 11 + +ENTRY(__kvm_hyp_vector) + ventry el2t_sync_invalid // Synchronous EL2t + ventry el2t_irq_invalid // IRQ EL2t + ventry el2t_fiq_invalid // FIQ EL2t + ventry el2t_error_invalid // Error EL2t + + ventry el2h_sync_invalid // Synchronous EL2h + ventry el2h_irq_invalid // IRQ EL2h + ventry el2h_fiq_invalid // FIQ EL2h + ventry el2h_error_invalid // Error EL2h + + ventry el1_sync // Synchronous 64-bit EL1 + ventry el1_irq // IRQ 64-bit EL1 + ventry el1_fiq_invalid // FIQ 64-bit EL1 + ventry el1_error_invalid // Error 64-bit EL1 + + ventry el1_sync // Synchronous 32-bit EL1 + ventry el1_irq // IRQ 32-bit EL1 + ventry el1_fiq_invalid // FIQ 32-bit EL1 + ventry el1_error_invalid // Error 32-bit EL1 +ENDPROC(__kvm_hyp_vector) + +__kvm_hyp_code_end: + .globl __kvm_hyp_code_end + + .popsection From b0353aa3722552c9058737452c9bec86d73ccdad Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 10 Dec 2012 16:40:41 +0000 Subject: [PATCH 1101/1368] arm64: KVM: Exit handling Handle the exit of a VM, decoding the exit reason from HYP mode and calling the corresponding handler. Reviewed-by: Christopher Covington Reviewed-by: Catalin Marinas Signed-off-by: Marc Zyngier (cherry picked from commit c4b1afd022e93eada6ee4b209be37101cd4b3494) Signed-off-by: Christoffer Dall --- arch/arm64/kvm/handle_exit.c | 119 +++++++++++++++++++++++++++++++++++ 1 file changed, 119 insertions(+) create mode 100644 arch/arm64/kvm/handle_exit.c diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c new file mode 100644 index 000000000000..c65d1154f969 --- /dev/null +++ b/arch/arm64/kvm/handle_exit.c @@ -0,0 +1,119 @@ +/* + * Copyright (C) 2012,2013 - ARM Ltd + * Author: Marc Zyngier + * + * Derived from arch/arm/kvm/handle_exit.c: + * Copyright (C) 2012 - Virtual Open Systems and Columbia University + * Author: Christoffer Dall + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#include +#include +#include +#include +#include + +typedef int (*exit_handle_fn)(struct kvm_vcpu *, struct kvm_run *); + +static int handle_hvc(struct kvm_vcpu *vcpu, struct kvm_run *run) +{ + /* + * Guest called HVC instruction: + * Let it know we don't want that by injecting an undefined exception. + */ + kvm_debug("hvc: %x (at %08lx)", kvm_vcpu_get_hsr(vcpu) & ((1 << 16) - 1), + *vcpu_pc(vcpu)); + kvm_debug(" HSR: %8x", kvm_vcpu_get_hsr(vcpu)); + kvm_inject_undefined(vcpu); + return 1; +} + +static int handle_smc(struct kvm_vcpu *vcpu, struct kvm_run *run) +{ + /* We don't support SMC; don't do that. */ + kvm_debug("smc: at %08lx", *vcpu_pc(vcpu)); + kvm_inject_undefined(vcpu); + return 1; +} + +/** + * kvm_handle_wfi - handle a wait-for-interrupts instruction executed by a guest + * @vcpu: the vcpu pointer + * + * Simply call kvm_vcpu_block(), which will halt execution of + * world-switches and schedule other host processes until there is an + * incoming IRQ or FIQ to the VM. + */ +static int kvm_handle_wfi(struct kvm_vcpu *vcpu, struct kvm_run *run) +{ + kvm_vcpu_block(vcpu); + return 1; +} + +static exit_handle_fn arm_exit_handlers[] = { + [ESR_EL2_EC_WFI] = kvm_handle_wfi, + [ESR_EL2_EC_HVC64] = handle_hvc, + [ESR_EL2_EC_SMC64] = handle_smc, + [ESR_EL2_EC_SYS64] = kvm_handle_sys_reg, + [ESR_EL2_EC_IABT] = kvm_handle_guest_abort, + [ESR_EL2_EC_DABT] = kvm_handle_guest_abort, +}; + +static exit_handle_fn kvm_get_exit_handler(struct kvm_vcpu *vcpu) +{ + u8 hsr_ec = kvm_vcpu_trap_get_class(vcpu); + + if (hsr_ec >= ARRAY_SIZE(arm_exit_handlers) || + !arm_exit_handlers[hsr_ec]) { + kvm_err("Unkown exception class: hsr: %#08x\n", + (unsigned int)kvm_vcpu_get_hsr(vcpu)); + BUG(); + } + + return arm_exit_handlers[hsr_ec]; +} + +/* + * Return > 0 to return to guest, < 0 on error, 0 (and set exit_reason) on + * proper exit to userspace. + */ +int handle_exit(struct kvm_vcpu *vcpu, struct kvm_run *run, + int exception_index) +{ + exit_handle_fn exit_handler; + + switch (exception_index) { + case ARM_EXCEPTION_IRQ: + return 1; + case ARM_EXCEPTION_TRAP: + /* + * See ARM ARM B1.14.1: "Hyp traps on instructions + * that fail their condition code check" + */ + if (!kvm_condition_valid(vcpu)) { + kvm_skip_instr(vcpu, kvm_vcpu_trap_il_is32bit(vcpu)); + return 1; + } + + exit_handler = kvm_get_exit_handler(vcpu); + + return exit_handler(vcpu, run); + default: + kvm_pr_unimpl("Unsupported exception type: %d", + exception_index); + run->exit_reason = KVM_EXIT_INTERNAL_ERROR; + return 0; + } +} From 4f0c6d89a863d5a86d2dea50a9e114559e73d97f Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Fri, 7 Dec 2012 17:54:54 +0000 Subject: [PATCH 1102/1368] arm64: KVM: Plug the VGIC Add support for the in-kernel GIC emulation. Reviewed-by: Christopher Covington Reviewed-by: Catalin Marinas Signed-off-by: Marc Zyngier (cherry picked from commit 1f17f3b6044d8a81a74dc6c962b3b38a7336106b) Signed-off-by: Christoffer Dall --- arch/arm64/kvm/hyp.S | 88 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 88 insertions(+) diff --git a/arch/arm64/kvm/hyp.S b/arch/arm64/kvm/hyp.S index 0b18c2e1e043..8dc27a367d77 100644 --- a/arch/arm64/kvm/hyp.S +++ b/arch/arm64/kvm/hyp.S @@ -306,6 +306,90 @@ __kvm_hyp_code_start: msr vttbr_el2, xzr .endm +/* + * Save the VGIC CPU state into memory + * x0: Register pointing to VCPU struct + * Do not corrupt x1!!! + */ +.macro save_vgic_state + /* Get VGIC VCTRL base into x2 */ + ldr x2, [x0, #VCPU_KVM] + kern_hyp_va x2 + ldr x2, [x2, #KVM_VGIC_VCTRL] + kern_hyp_va x2 + cbz x2, 2f // disabled + + /* Compute the address of struct vgic_cpu */ + add x3, x0, #VCPU_VGIC_CPU + + /* Save all interesting registers */ + ldr w4, [x2, #GICH_HCR] + ldr w5, [x2, #GICH_VMCR] + ldr w6, [x2, #GICH_MISR] + ldr w7, [x2, #GICH_EISR0] + ldr w8, [x2, #GICH_EISR1] + ldr w9, [x2, #GICH_ELRSR0] + ldr w10, [x2, #GICH_ELRSR1] + ldr w11, [x2, #GICH_APR] + + str w4, [x3, #VGIC_CPU_HCR] + str w5, [x3, #VGIC_CPU_VMCR] + str w6, [x3, #VGIC_CPU_MISR] + str w7, [x3, #VGIC_CPU_EISR] + str w8, [x3, #(VGIC_CPU_EISR + 4)] + str w9, [x3, #VGIC_CPU_ELRSR] + str w10, [x3, #(VGIC_CPU_ELRSR + 4)] + str w11, [x3, #VGIC_CPU_APR] + + /* Clear GICH_HCR */ + str wzr, [x2, #GICH_HCR] + + /* Save list registers */ + add x2, x2, #GICH_LR0 + ldr w4, [x3, #VGIC_CPU_NR_LR] + add x3, x3, #VGIC_CPU_LR +1: ldr w5, [x2], #4 + str w5, [x3], #4 + sub w4, w4, #1 + cbnz w4, 1b +2: +.endm + +/* + * Restore the VGIC CPU state from memory + * x0: Register pointing to VCPU struct + */ +.macro restore_vgic_state + /* Get VGIC VCTRL base into x2 */ + ldr x2, [x0, #VCPU_KVM] + kern_hyp_va x2 + ldr x2, [x2, #KVM_VGIC_VCTRL] + kern_hyp_va x2 + cbz x2, 2f // disabled + + /* Compute the address of struct vgic_cpu */ + add x3, x0, #VCPU_VGIC_CPU + + /* We only restore a minimal set of registers */ + ldr w4, [x3, #VGIC_CPU_HCR] + ldr w5, [x3, #VGIC_CPU_VMCR] + ldr w6, [x3, #VGIC_CPU_APR] + + str w4, [x2, #GICH_HCR] + str w5, [x2, #GICH_VMCR] + str w6, [x2, #GICH_APR] + + /* Restore list registers */ + add x2, x2, #GICH_LR0 + ldr w4, [x3, #VGIC_CPU_NR_LR] + add x3, x3, #VGIC_CPU_LR +1: ldr w5, [x3], #4 + str w5, [x2], #4 + sub w4, w4, #1 + cbnz w4, 1b +2: +.endm + __save_sysregs: save_sysregs ret @@ -348,6 +432,8 @@ ENTRY(__kvm_vcpu_run) activate_traps activate_vm + restore_vgic_state + // Guest context add x2, x0, #VCPU_CONTEXT @@ -369,6 +455,8 @@ __kvm_vcpu_return: bl __save_fpsimd bl __save_sysregs + save_vgic_state + deactivate_traps deactivate_vm From 615fd459aa42eb0450667fb191c1931a1da749b3 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 30 May 2013 18:31:28 +0100 Subject: [PATCH 1103/1368] ARM: KVM: timer: allow DT matching for ARMv8 cores ARMv8 cores have the exact same timer as ARMv7 cores. Make sure the KVM timer code can match it in the device tree. Signed-off-by: Marc Zyngier (cherry picked from commit f61701e0a24a09aa4a44baf24e57dcc5e706afa8) Signed-off-by: Christoffer Dall --- virt/kvm/arm/arch_timer.c | 1 + 1 file changed, 1 insertion(+) diff --git a/virt/kvm/arm/arch_timer.c b/virt/kvm/arm/arch_timer.c index 2d00b2925780..6f485eaf643b 100644 --- a/virt/kvm/arm/arch_timer.c +++ b/virt/kvm/arm/arch_timer.c @@ -195,6 +195,7 @@ static struct notifier_block kvm_timer_cpu_nb = { static const struct of_device_id arch_timer_of_match[] = { { .compatible = "arm,armv7-timer", }, + { .compatible = "arm,armv8-timer", }, {}, }; From 5b12bf6aa91a284eb32fdb5f02ebc770149dbf50 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Fri, 7 Dec 2012 17:52:03 +0000 Subject: [PATCH 1104/1368] arm64: KVM: Plug the arch timer Add support for the in-kernel timer emulation. Reviewed-by: Christopher Covington Reviewed-by: Catalin Marinas Signed-off-by: Marc Zyngier (cherry picked from commit 003300de6c3e51934fb52eb2677f6f4fb4996cbd) Signed-off-by: Christoffer Dall --- arch/arm64/kvm/hyp.S | 56 ++++++++++++++++++++++++++++++++++++++++++ arch/arm64/kvm/reset.c | 12 +++++++++ 2 files changed, 68 insertions(+) diff --git a/arch/arm64/kvm/hyp.S b/arch/arm64/kvm/hyp.S index 8dc27a367d77..8b510835b440 100644 --- a/arch/arm64/kvm/hyp.S +++ b/arch/arm64/kvm/hyp.S @@ -390,6 +390,60 @@ __kvm_hyp_code_start: 2: .endm +.macro save_timer_state + // x0: vcpu pointer + ldr x2, [x0, #VCPU_KVM] + kern_hyp_va x2 + ldr w3, [x2, #KVM_TIMER_ENABLED] + cbz w3, 1f + + mrs x3, cntv_ctl_el0 + and x3, x3, #3 + str w3, [x0, #VCPU_TIMER_CNTV_CTL] + bic x3, x3, #1 // Clear Enable + msr cntv_ctl_el0, x3 + + isb + + mrs x3, cntv_cval_el0 + str x3, [x0, #VCPU_TIMER_CNTV_CVAL] + +1: + // Allow physical timer/counter access for the host + mrs x2, cnthctl_el2 + orr x2, x2, #3 + msr cnthctl_el2, x2 + + // Clear cntvoff for the host + msr cntvoff_el2, xzr +.endm + +.macro restore_timer_state + // x0: vcpu pointer + // Disallow physical timer access for the guest + // Physical counter access is allowed + mrs x2, cnthctl_el2 + orr x2, x2, #1 + bic x2, x2, #2 + msr cnthctl_el2, x2 + + ldr x2, [x0, #VCPU_KVM] + kern_hyp_va x2 + ldr w3, [x2, #KVM_TIMER_ENABLED] + cbz w3, 1f + + ldr x3, [x2, #KVM_TIMER_CNTVOFF] + msr cntvoff_el2, x3 + ldr x2, [x0, #VCPU_TIMER_CNTV_CVAL] + msr cntv_cval_el0, x2 + isb + + ldr w2, [x0, #VCPU_TIMER_CNTV_CTL] + and x2, x2, #3 + msr cntv_ctl_el0, x2 +1: +.endm + __save_sysregs: save_sysregs ret @@ -433,6 +487,7 @@ ENTRY(__kvm_vcpu_run) activate_vm restore_vgic_state + restore_timer_state // Guest context add x2, x0, #VCPU_CONTEXT @@ -455,6 +510,7 @@ __kvm_vcpu_return: bl __save_fpsimd bl __save_sysregs + save_timer_state save_vgic_state deactivate_traps diff --git a/arch/arm64/kvm/reset.c b/arch/arm64/kvm/reset.c index f6536a06231a..766150ac76ed 100644 --- a/arch/arm64/kvm/reset.c +++ b/arch/arm64/kvm/reset.c @@ -23,6 +23,8 @@ #include #include +#include + #include #include #include @@ -36,6 +38,11 @@ static const struct kvm_regs default_regs_reset = { PSR_F_BIT | PSR_D_BIT), }; +static const struct kvm_irq_level default_vtimer_irq = { + .irq = 27, + .level = 1, +}; + int kvm_arch_dev_ioctl_check_extension(long ext) { int r; @@ -58,11 +65,13 @@ int kvm_arch_dev_ioctl_check_extension(long ext) */ int kvm_reset_vcpu(struct kvm_vcpu *vcpu) { + const struct kvm_irq_level *cpu_vtimer_irq; const struct kvm_regs *cpu_reset; switch (vcpu->arch.target) { default: cpu_reset = &default_regs_reset; + cpu_vtimer_irq = &default_vtimer_irq; break; } @@ -72,5 +81,8 @@ int kvm_reset_vcpu(struct kvm_vcpu *vcpu) /* Reset system registers */ kvm_reset_sys_regs(vcpu); + /* Reset timer */ + kvm_timer_vcpu_reset(vcpu, cpu_vtimer_irq); + return 0; } From 5732aca89a79f4f9446145c8d921963c81b4157f Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 12 Dec 2012 18:52:05 +0000 Subject: [PATCH 1105/1368] arm64: KVM: PSCI implementation Wire the PSCI backend into the exit handling code. Reviewed-by: Christopher Covington Reviewed-by: Catalin Marinas Signed-off-by: Marc Zyngier (cherry picked from commit dcd2e40c1e1cce302498d16d095b0f8a30326f74) Signed-off-by: Christoffer Dall --- arch/arm64/include/asm/kvm_host.h | 2 +- arch/arm64/include/asm/kvm_psci.h | 23 +++++++++++++++++++++++ arch/arm64/include/uapi/asm/kvm.h | 16 ++++++++++++++++ arch/arm64/kvm/handle_exit.c | 16 +++++++--------- 4 files changed, 47 insertions(+), 10 deletions(-) create mode 100644 arch/arm64/include/asm/kvm_psci.h diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 2500eb6a4d2a..2fdeb326c3ee 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -34,7 +34,7 @@ #include #include -#define KVM_VCPU_MAX_FEATURES 0 +#define KVM_VCPU_MAX_FEATURES 1 /* We don't currently support large pages. */ #define KVM_HPAGE_GFN_SHIFT(x) 0 diff --git a/arch/arm64/include/asm/kvm_psci.h b/arch/arm64/include/asm/kvm_psci.h new file mode 100644 index 000000000000..e301a4816355 --- /dev/null +++ b/arch/arm64/include/asm/kvm_psci.h @@ -0,0 +1,23 @@ +/* + * Copyright (C) 2012,2013 - ARM Ltd + * Author: Marc Zyngier + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#ifndef __ARM64_KVM_PSCI_H__ +#define __ARM64_KVM_PSCI_H__ + +bool kvm_psci_call(struct kvm_vcpu *vcpu); + +#endif /* __ARM64_KVM_PSCI_H__ */ diff --git a/arch/arm64/include/uapi/asm/kvm.h b/arch/arm64/include/uapi/asm/kvm.h index ebac919dc0ca..fb60f9037057 100644 --- a/arch/arm64/include/uapi/asm/kvm.h +++ b/arch/arm64/include/uapi/asm/kvm.h @@ -69,6 +69,8 @@ struct kvm_regs { #define KVM_VGIC_V2_DIST_SIZE 0x1000 #define KVM_VGIC_V2_CPU_SIZE 0x2000 +#define KVM_ARM_VCPU_POWER_OFF 0 /* CPU is started in OFF state */ + struct kvm_vcpu_init { __u32 target; __u32 features[7]; @@ -141,6 +143,20 @@ struct kvm_arch_memory_slot { /* Highest supported SPI, from VGIC_NR_IRQS */ #define KVM_ARM_IRQ_GIC_MAX 127 +/* PSCI interface */ +#define KVM_PSCI_FN_BASE 0x95c1ba5e +#define KVM_PSCI_FN(n) (KVM_PSCI_FN_BASE + (n)) + +#define KVM_PSCI_FN_CPU_SUSPEND KVM_PSCI_FN(0) +#define KVM_PSCI_FN_CPU_OFF KVM_PSCI_FN(1) +#define KVM_PSCI_FN_CPU_ON KVM_PSCI_FN(2) +#define KVM_PSCI_FN_MIGRATE KVM_PSCI_FN(3) + +#define KVM_PSCI_RET_SUCCESS 0 +#define KVM_PSCI_RET_NI ((unsigned long)-1) +#define KVM_PSCI_RET_INVAL ((unsigned long)-2) +#define KVM_PSCI_RET_DENIED ((unsigned long)-3) + #endif #endif /* __ARM_KVM_H__ */ diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c index c65d1154f969..4766b7f3515e 100644 --- a/arch/arm64/kvm/handle_exit.c +++ b/arch/arm64/kvm/handle_exit.c @@ -24,26 +24,24 @@ #include #include #include +#include typedef int (*exit_handle_fn)(struct kvm_vcpu *, struct kvm_run *); static int handle_hvc(struct kvm_vcpu *vcpu, struct kvm_run *run) { - /* - * Guest called HVC instruction: - * Let it know we don't want that by injecting an undefined exception. - */ - kvm_debug("hvc: %x (at %08lx)", kvm_vcpu_get_hsr(vcpu) & ((1 << 16) - 1), - *vcpu_pc(vcpu)); - kvm_debug(" HSR: %8x", kvm_vcpu_get_hsr(vcpu)); + if (kvm_psci_call(vcpu)) + return 1; + kvm_inject_undefined(vcpu); return 1; } static int handle_smc(struct kvm_vcpu *vcpu, struct kvm_run *run) { - /* We don't support SMC; don't do that. */ - kvm_debug("smc: at %08lx", *vcpu_pc(vcpu)); + if (kvm_psci_call(vcpu)) + return 1; + kvm_inject_undefined(vcpu); return 1; } From ea4ebae1594250431731dcdacd6da33b865fd3b1 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 10 Dec 2012 16:41:44 +0000 Subject: [PATCH 1106/1368] arm64: KVM: Build system integration Only the Makefile is plugged in. The Kconfig stuff is in a separate patch to allow for an easier merge process. Reviewed-by: Christopher Covington Signed-off-by: Marc Zyngier (cherry picked from commit 6211753fdfd05af9e08f54c8d0ba3ee516034878) Signed-off-by: Christoffer Dall --- arch/arm64/Makefile | 1 + arch/arm64/kvm/Makefile | 23 +++++++++++++++++++++++ 2 files changed, 24 insertions(+) create mode 100644 arch/arm64/kvm/Makefile diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile index b6ccf8a36e2d..7ab6b358cc35 100644 --- a/arch/arm64/Makefile +++ b/arch/arm64/Makefile @@ -37,6 +37,7 @@ TEXT_OFFSET := 0x00080000 export TEXT_OFFSET GZFLAGS core-y += arch/arm64/kernel/ arch/arm64/mm/ +core-$(CONFIG_KVM) += arch/arm64/kvm/ libs-y := arch/arm64/lib/ $(libs-y) libs-y += $(LIBGCC) diff --git a/arch/arm64/kvm/Makefile b/arch/arm64/kvm/Makefile new file mode 100644 index 000000000000..dca110556683 --- /dev/null +++ b/arch/arm64/kvm/Makefile @@ -0,0 +1,23 @@ +# +# Makefile for Kernel-based Virtual Machine module +# + +ccflags-y += -Ivirt/kvm -Iarch/arm64/kvm +CFLAGS_arm.o := -I. +CFLAGS_mmu.o := -I. + +KVM=../../../virt/kvm +ARM=../../../arch/arm/kvm + +obj-$(CONFIG_KVM_ARM_HOST) += kvm.o + +kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o +kvm-$(CONFIG_KVM_ARM_HOST) += $(ARM)/arm.o $(ARM)/mmu.o $(ARM)/mmio.o +kvm-$(CONFIG_KVM_ARM_HOST) += $(ARM)/psci.o $(ARM)/perf.o + +kvm-$(CONFIG_KVM_ARM_HOST) += inject_fault.o +kvm-$(CONFIG_KVM_ARM_HOST) += hyp.o hyp-init.o handle_exit.o +kvm-$(CONFIG_KVM_ARM_HOST) += guest.o reset.o sys_regs.o sys_regs_generic_v8.o + +kvm-$(CONFIG_KVM_ARM_VGIC) += $(KVM)/arm/vgic.o +kvm-$(CONFIG_KVM_ARM_TIMER) += $(KVM)/arm/arch_timer.o From 8ee55043e2fc5e6c90209f478c70b600dce523a9 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 6 Feb 2013 19:17:50 +0000 Subject: [PATCH 1107/1368] arm64: KVM: define 32bit specific registers Define the 32bit specific registers (SPSRs, cp15...). Most CPU registers are directly mapped to a 64bit register (r0->x0...). Only the SPSRs have separate registers. cp15 registers are also mapped into their 64bit counterpart in most cases. Reviewed-by: Christopher Covington Reviewed-by: Catalin Marinas Signed-off-by: Marc Zyngier (cherry picked from commit 40033a614ea3db196d57c477ca328f44eb1e4df0) Signed-off-by: Christoffer Dall --- arch/arm64/include/asm/kvm_asm.h | 38 ++++++++++++++++++++++++++++++- arch/arm64/include/asm/kvm_host.h | 5 +++- arch/arm64/include/uapi/asm/kvm.h | 7 +++++- 3 files changed, 47 insertions(+), 3 deletions(-) diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h index 591ac219964a..c92de4163eba 100644 --- a/arch/arm64/include/asm/kvm_asm.h +++ b/arch/arm64/include/asm/kvm_asm.h @@ -42,7 +42,43 @@ #define TPIDR_EL1 18 /* Thread ID, Privileged */ #define AMAIR_EL1 19 /* Aux Memory Attribute Indirection Register */ #define CNTKCTL_EL1 20 /* Timer Control Register (EL1) */ -#define NR_SYS_REGS 21 +/* 32bit specific registers. Keep them at the end of the range */ +#define DACR32_EL2 21 /* Domain Access Control Register */ +#define IFSR32_EL2 22 /* Instruction Fault Status Register */ +#define FPEXC32_EL2 23 /* Floating-Point Exception Control Register */ +#define DBGVCR32_EL2 24 /* Debug Vector Catch Register */ +#define TEECR32_EL1 25 /* ThumbEE Configuration Register */ +#define TEEHBR32_EL1 26 /* ThumbEE Handler Base Register */ +#define NR_SYS_REGS 27 + +/* 32bit mapping */ +#define c0_MPIDR (MPIDR_EL1 * 2) /* MultiProcessor ID Register */ +#define c0_CSSELR (CSSELR_EL1 * 2)/* Cache Size Selection Register */ +#define c1_SCTLR (SCTLR_EL1 * 2) /* System Control Register */ +#define c1_ACTLR (ACTLR_EL1 * 2) /* Auxiliary Control Register */ +#define c1_CPACR (CPACR_EL1 * 2) /* Coprocessor Access Control */ +#define c2_TTBR0 (TTBR0_EL1 * 2) /* Translation Table Base Register 0 */ +#define c2_TTBR0_high (c2_TTBR0 + 1) /* TTBR0 top 32 bits */ +#define c2_TTBR1 (TTBR1_EL1 * 2) /* Translation Table Base Register 1 */ +#define c2_TTBR1_high (c2_TTBR1 + 1) /* TTBR1 top 32 bits */ +#define c2_TTBCR (TCR_EL1 * 2) /* Translation Table Base Control R. */ +#define c3_DACR (DACR32_EL2 * 2)/* Domain Access Control Register */ +#define c5_DFSR (ESR_EL1 * 2) /* Data Fault Status Register */ +#define c5_IFSR (IFSR32_EL2 * 2)/* Instruction Fault Status Register */ +#define c5_ADFSR (AFSR0_EL1 * 2) /* Auxiliary Data Fault Status R */ +#define c5_AIFSR (AFSR1_EL1 * 2) /* Auxiliary Instr Fault Status R */ +#define c6_DFAR (FAR_EL1 * 2) /* Data Fault Address Register */ +#define c6_IFAR (c6_DFAR + 1) /* Instruction Fault Address Register */ +#define c10_PRRR (MAIR_EL1 * 2) /* Primary Region Remap Register */ +#define c10_NMRR (c10_PRRR + 1) /* Normal Memory Remap Register */ +#define c12_VBAR (VBAR_EL1 * 2) /* Vector Base Address Register */ +#define c13_CID (CONTEXTIDR_EL1 * 2) /* Context ID Register */ +#define c13_TID_URW (TPIDR_EL0 * 2) /* Thread ID, User R/W */ +#define c13_TID_URO (TPIDRRO_EL0 * 2)/* Thread ID, User R/O */ +#define c13_TID_PRIV (TPIDR_EL1 * 2) /* Thread ID, Privileged */ +#define c10_AMAIR (AMAIR_EL1 * 2) /* Aux Memory Attr Indirection Reg */ +#define c14_CNTKCTL (CNTKCTL_EL1 * 2) /* Timer Control Register (PL1) */ +#define NR_CP15_REGS (NR_SYS_REGS * 2) #define ARM_EXCEPTION_IRQ 0 #define ARM_EXCEPTION_TRAP 1 diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 2fdeb326c3ee..3f5830b3ca3f 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -84,7 +84,10 @@ struct kvm_vcpu_fault_info { struct kvm_cpu_context { struct kvm_regs gp_regs; - u64 sys_regs[NR_SYS_REGS]; + union { + u64 sys_regs[NR_SYS_REGS]; + u32 cp15[NR_CP15_REGS]; + }; }; typedef struct kvm_cpu_context kvm_cpu_context_t; diff --git a/arch/arm64/include/uapi/asm/kvm.h b/arch/arm64/include/uapi/asm/kvm.h index fb60f9037057..5b1110c49df5 100644 --- a/arch/arm64/include/uapi/asm/kvm.h +++ b/arch/arm64/include/uapi/asm/kvm.h @@ -23,7 +23,12 @@ #define __ARM_KVM_H__ #define KVM_SPSR_EL1 0 -#define KVM_NR_SPSR 1 +#define KVM_SPSR_SVC KVM_SPSR_EL1 +#define KVM_SPSR_ABT 1 +#define KVM_SPSR_UND 2 +#define KVM_SPSR_IRQ 3 +#define KVM_SPSR_FIQ 4 +#define KVM_NR_SPSR 5 #ifndef __ASSEMBLY__ #include From 934f190b989d879c8ef59e26c9d6eb2d10632f0a Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 6 Feb 2013 19:40:29 +0000 Subject: [PATCH 1108/1368] arm64: KVM: 32bit GP register access Allow access to the 32bit register file through the usual API. Reviewed-by: Christopher Covington Reviewed-by: Catalin Marinas Signed-off-by: Marc Zyngier (cherry picked from commit b547631fc64e249a3c507e6ce854642507fa7c1c) Signed-off-by: Christoffer Dall --- arch/arm64/include/asm/kvm_emulate.h | 15 ++- arch/arm64/kvm/Makefile | 2 +- arch/arm64/kvm/regmap.c | 168 +++++++++++++++++++++++++++ 3 files changed, 183 insertions(+), 2 deletions(-) create mode 100644 arch/arm64/kvm/regmap.c diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h index 6c1725e93b0b..20a1a3931d8d 100644 --- a/arch/arm64/include/asm/kvm_emulate.h +++ b/arch/arm64/include/asm/kvm_emulate.h @@ -28,6 +28,9 @@ #include #include +unsigned long *vcpu_reg32(const struct kvm_vcpu *vcpu, u8 reg_num); +unsigned long *vcpu_spsr32(const struct kvm_vcpu *vcpu); + void kvm_inject_undefined(struct kvm_vcpu *vcpu); void kvm_inject_dabt(struct kvm_vcpu *vcpu, unsigned long addr); void kvm_inject_pabt(struct kvm_vcpu *vcpu, unsigned long addr); @@ -49,7 +52,7 @@ static inline unsigned long *vcpu_cpsr(const struct kvm_vcpu *vcpu) static inline bool vcpu_mode_is_32bit(const struct kvm_vcpu *vcpu) { - return false; /* 32bit? Bahhh... */ + return !!(*vcpu_cpsr(vcpu) & PSR_MODE32_BIT); } static inline bool kvm_condition_valid(const struct kvm_vcpu *vcpu) @@ -64,16 +67,23 @@ static inline void kvm_skip_instr(struct kvm_vcpu *vcpu, bool is_wide_instr) static inline void vcpu_set_thumb(struct kvm_vcpu *vcpu) { + *vcpu_cpsr(vcpu) |= COMPAT_PSR_T_BIT; } static inline unsigned long *vcpu_reg(const struct kvm_vcpu *vcpu, u8 reg_num) { + if (vcpu_mode_is_32bit(vcpu)) + return vcpu_reg32(vcpu, reg_num); + return (unsigned long *)&vcpu_gp_regs(vcpu)->regs.regs[reg_num]; } /* Get vcpu SPSR for current mode */ static inline unsigned long *vcpu_spsr(const struct kvm_vcpu *vcpu) { + if (vcpu_mode_is_32bit(vcpu)) + return vcpu_spsr32(vcpu); + return (unsigned long *)&vcpu_gp_regs(vcpu)->spsr[KVM_SPSR_EL1]; } @@ -81,6 +91,9 @@ static inline bool vcpu_mode_priv(const struct kvm_vcpu *vcpu) { u32 mode = *vcpu_cpsr(vcpu) & PSR_MODE_MASK; + if (vcpu_mode_is_32bit(vcpu)) + return mode > COMPAT_PSR_MODE_USR; + return mode != PSR_MODE_EL0t; } diff --git a/arch/arm64/kvm/Makefile b/arch/arm64/kvm/Makefile index dca110556683..a2169ec8d93b 100644 --- a/arch/arm64/kvm/Makefile +++ b/arch/arm64/kvm/Makefile @@ -15,7 +15,7 @@ kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o kvm-$(CONFIG_KVM_ARM_HOST) += $(ARM)/arm.o $(ARM)/mmu.o $(ARM)/mmio.o kvm-$(CONFIG_KVM_ARM_HOST) += $(ARM)/psci.o $(ARM)/perf.o -kvm-$(CONFIG_KVM_ARM_HOST) += inject_fault.o +kvm-$(CONFIG_KVM_ARM_HOST) += inject_fault.o regmap.o kvm-$(CONFIG_KVM_ARM_HOST) += hyp.o hyp-init.o handle_exit.o kvm-$(CONFIG_KVM_ARM_HOST) += guest.o reset.o sys_regs.o sys_regs_generic_v8.o diff --git a/arch/arm64/kvm/regmap.c b/arch/arm64/kvm/regmap.c new file mode 100644 index 000000000000..bbc6ae32e4af --- /dev/null +++ b/arch/arm64/kvm/regmap.c @@ -0,0 +1,168 @@ +/* + * Copyright (C) 2012,2013 - ARM Ltd + * Author: Marc Zyngier + * + * Derived from arch/arm/kvm/emulate.c: + * Copyright (C) 2012 - Virtual Open Systems and Columbia University + * Author: Christoffer Dall + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#include +#include +#include +#include + +#define VCPU_NR_MODES 6 +#define REG_OFFSET(_reg) \ + (offsetof(struct user_pt_regs, _reg) / sizeof(unsigned long)) + +#define USR_REG_OFFSET(R) REG_OFFSET(compat_usr(R)) + +static const unsigned long vcpu_reg_offsets[VCPU_NR_MODES][16] = { + /* USR Registers */ + { + USR_REG_OFFSET(0), USR_REG_OFFSET(1), USR_REG_OFFSET(2), + USR_REG_OFFSET(3), USR_REG_OFFSET(4), USR_REG_OFFSET(5), + USR_REG_OFFSET(6), USR_REG_OFFSET(7), USR_REG_OFFSET(8), + USR_REG_OFFSET(9), USR_REG_OFFSET(10), USR_REG_OFFSET(11), + USR_REG_OFFSET(12), USR_REG_OFFSET(13), USR_REG_OFFSET(14), + REG_OFFSET(pc) + }, + + /* FIQ Registers */ + { + USR_REG_OFFSET(0), USR_REG_OFFSET(1), USR_REG_OFFSET(2), + USR_REG_OFFSET(3), USR_REG_OFFSET(4), USR_REG_OFFSET(5), + USR_REG_OFFSET(6), USR_REG_OFFSET(7), + REG_OFFSET(compat_r8_fiq), /* r8 */ + REG_OFFSET(compat_r9_fiq), /* r9 */ + REG_OFFSET(compat_r10_fiq), /* r10 */ + REG_OFFSET(compat_r11_fiq), /* r11 */ + REG_OFFSET(compat_r12_fiq), /* r12 */ + REG_OFFSET(compat_sp_fiq), /* r13 */ + REG_OFFSET(compat_lr_fiq), /* r14 */ + REG_OFFSET(pc) + }, + + /* IRQ Registers */ + { + USR_REG_OFFSET(0), USR_REG_OFFSET(1), USR_REG_OFFSET(2), + USR_REG_OFFSET(3), USR_REG_OFFSET(4), USR_REG_OFFSET(5), + USR_REG_OFFSET(6), USR_REG_OFFSET(7), USR_REG_OFFSET(8), + USR_REG_OFFSET(9), USR_REG_OFFSET(10), USR_REG_OFFSET(11), + USR_REG_OFFSET(12), + REG_OFFSET(compat_sp_irq), /* r13 */ + REG_OFFSET(compat_lr_irq), /* r14 */ + REG_OFFSET(pc) + }, + + /* SVC Registers */ + { + USR_REG_OFFSET(0), USR_REG_OFFSET(1), USR_REG_OFFSET(2), + USR_REG_OFFSET(3), USR_REG_OFFSET(4), USR_REG_OFFSET(5), + USR_REG_OFFSET(6), USR_REG_OFFSET(7), USR_REG_OFFSET(8), + USR_REG_OFFSET(9), USR_REG_OFFSET(10), USR_REG_OFFSET(11), + USR_REG_OFFSET(12), + REG_OFFSET(compat_sp_svc), /* r13 */ + REG_OFFSET(compat_lr_svc), /* r14 */ + REG_OFFSET(pc) + }, + + /* ABT Registers */ + { + USR_REG_OFFSET(0), USR_REG_OFFSET(1), USR_REG_OFFSET(2), + USR_REG_OFFSET(3), USR_REG_OFFSET(4), USR_REG_OFFSET(5), + USR_REG_OFFSET(6), USR_REG_OFFSET(7), USR_REG_OFFSET(8), + USR_REG_OFFSET(9), USR_REG_OFFSET(10), USR_REG_OFFSET(11), + USR_REG_OFFSET(12), + REG_OFFSET(compat_sp_abt), /* r13 */ + REG_OFFSET(compat_lr_abt), /* r14 */ + REG_OFFSET(pc) + }, + + /* UND Registers */ + { + USR_REG_OFFSET(0), USR_REG_OFFSET(1), USR_REG_OFFSET(2), + USR_REG_OFFSET(3), USR_REG_OFFSET(4), USR_REG_OFFSET(5), + USR_REG_OFFSET(6), USR_REG_OFFSET(7), USR_REG_OFFSET(8), + USR_REG_OFFSET(9), USR_REG_OFFSET(10), USR_REG_OFFSET(11), + USR_REG_OFFSET(12), + REG_OFFSET(compat_sp_und), /* r13 */ + REG_OFFSET(compat_lr_und), /* r14 */ + REG_OFFSET(pc) + }, +}; + +/* + * Return a pointer to the register number valid in the current mode of + * the virtual CPU. + */ +unsigned long *vcpu_reg32(const struct kvm_vcpu *vcpu, u8 reg_num) +{ + unsigned long *reg_array = (unsigned long *)&vcpu->arch.ctxt.gp_regs.regs; + unsigned long mode = *vcpu_cpsr(vcpu) & COMPAT_PSR_MODE_MASK; + + switch (mode) { + case COMPAT_PSR_MODE_USR ... COMPAT_PSR_MODE_SVC: + mode &= ~PSR_MODE32_BIT; /* 0 ... 3 */ + break; + + case COMPAT_PSR_MODE_ABT: + mode = 4; + break; + + case COMPAT_PSR_MODE_UND: + mode = 5; + break; + + case COMPAT_PSR_MODE_SYS: + mode = 0; /* SYS maps to USR */ + break; + + default: + BUG(); + } + + return reg_array + vcpu_reg_offsets[mode][reg_num]; +} + +/* + * Return the SPSR for the current mode of the virtual CPU. + */ +unsigned long *vcpu_spsr32(const struct kvm_vcpu *vcpu) +{ + unsigned long mode = *vcpu_cpsr(vcpu) & COMPAT_PSR_MODE_MASK; + switch (mode) { + case COMPAT_PSR_MODE_SVC: + mode = KVM_SPSR_SVC; + break; + case COMPAT_PSR_MODE_ABT: + mode = KVM_SPSR_ABT; + break; + case COMPAT_PSR_MODE_UND: + mode = KVM_SPSR_UND; + break; + case COMPAT_PSR_MODE_IRQ: + mode = KVM_SPSR_IRQ; + break; + case COMPAT_PSR_MODE_FIQ: + mode = KVM_SPSR_FIQ; + break; + default: + BUG(); + } + + return (unsigned long *)&vcpu_gp_regs(vcpu)->spsr[mode]; +} From 4129306976df31cf80f13a85d6886e2f1245662b Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 6 Feb 2013 19:54:04 +0000 Subject: [PATCH 1109/1368] arm64: KVM: 32bit conditional execution emulation As conditional instructions can trap on AArch32, add the thinest possible emulation layer to keep 32bit guests happy. Reviewed-by: Christopher Covington Reviewed-by: Catalin Marinas Signed-off-by: Marc Zyngier (cherry picked from commit 27b190bd9fbfee34536cb858f0b5924d294aac38) Signed-off-by: Christoffer Dall --- arch/arm64/include/asm/kvm_emulate.h | 13 ++- arch/arm64/kvm/Makefile | 2 +- arch/arm64/kvm/emulate.c | 158 +++++++++++++++++++++++++++ 3 files changed, 170 insertions(+), 3 deletions(-) create mode 100644 arch/arm64/kvm/emulate.c diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h index 20a1a3931d8d..eec073875218 100644 --- a/arch/arm64/include/asm/kvm_emulate.h +++ b/arch/arm64/include/asm/kvm_emulate.h @@ -31,6 +31,9 @@ unsigned long *vcpu_reg32(const struct kvm_vcpu *vcpu, u8 reg_num); unsigned long *vcpu_spsr32(const struct kvm_vcpu *vcpu); +bool kvm_condition_valid32(const struct kvm_vcpu *vcpu); +void kvm_skip_instr32(struct kvm_vcpu *vcpu, bool is_wide_instr); + void kvm_inject_undefined(struct kvm_vcpu *vcpu); void kvm_inject_dabt(struct kvm_vcpu *vcpu, unsigned long addr); void kvm_inject_pabt(struct kvm_vcpu *vcpu, unsigned long addr); @@ -57,12 +60,18 @@ static inline bool vcpu_mode_is_32bit(const struct kvm_vcpu *vcpu) static inline bool kvm_condition_valid(const struct kvm_vcpu *vcpu) { - return true; /* No conditionals on arm64 */ + if (vcpu_mode_is_32bit(vcpu)) + return kvm_condition_valid32(vcpu); + + return true; } static inline void kvm_skip_instr(struct kvm_vcpu *vcpu, bool is_wide_instr) { - *vcpu_pc(vcpu) += 4; + if (vcpu_mode_is_32bit(vcpu)) + kvm_skip_instr32(vcpu, is_wide_instr); + else + *vcpu_pc(vcpu) += 4; } static inline void vcpu_set_thumb(struct kvm_vcpu *vcpu) diff --git a/arch/arm64/kvm/Makefile b/arch/arm64/kvm/Makefile index a2169ec8d93b..72a9fd583ad3 100644 --- a/arch/arm64/kvm/Makefile +++ b/arch/arm64/kvm/Makefile @@ -15,7 +15,7 @@ kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o kvm-$(CONFIG_KVM_ARM_HOST) += $(ARM)/arm.o $(ARM)/mmu.o $(ARM)/mmio.o kvm-$(CONFIG_KVM_ARM_HOST) += $(ARM)/psci.o $(ARM)/perf.o -kvm-$(CONFIG_KVM_ARM_HOST) += inject_fault.o regmap.o +kvm-$(CONFIG_KVM_ARM_HOST) += emulate.o inject_fault.o regmap.o kvm-$(CONFIG_KVM_ARM_HOST) += hyp.o hyp-init.o handle_exit.o kvm-$(CONFIG_KVM_ARM_HOST) += guest.o reset.o sys_regs.o sys_regs_generic_v8.o diff --git a/arch/arm64/kvm/emulate.c b/arch/arm64/kvm/emulate.c new file mode 100644 index 000000000000..124418d17049 --- /dev/null +++ b/arch/arm64/kvm/emulate.c @@ -0,0 +1,158 @@ +/* + * (not much of an) Emulation layer for 32bit guests. + * + * Copyright (C) 2012,2013 - ARM Ltd + * Author: Marc Zyngier + * + * based on arch/arm/kvm/emulate.c + * Copyright (C) 2012 - Virtual Open Systems and Columbia University + * Author: Christoffer Dall + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#include +#include + +/* + * stolen from arch/arm/kernel/opcodes.c + * + * condition code lookup table + * index into the table is test code: EQ, NE, ... LT, GT, AL, NV + * + * bit position in short is condition code: NZCV + */ +static const unsigned short cc_map[16] = { + 0xF0F0, /* EQ == Z set */ + 0x0F0F, /* NE */ + 0xCCCC, /* CS == C set */ + 0x3333, /* CC */ + 0xFF00, /* MI == N set */ + 0x00FF, /* PL */ + 0xAAAA, /* VS == V set */ + 0x5555, /* VC */ + 0x0C0C, /* HI == C set && Z clear */ + 0xF3F3, /* LS == C clear || Z set */ + 0xAA55, /* GE == (N==V) */ + 0x55AA, /* LT == (N!=V) */ + 0x0A05, /* GT == (!Z && (N==V)) */ + 0xF5FA, /* LE == (Z || (N!=V)) */ + 0xFFFF, /* AL always */ + 0 /* NV */ +}; + +static int kvm_vcpu_get_condition(const struct kvm_vcpu *vcpu) +{ + u32 esr = kvm_vcpu_get_hsr(vcpu); + + if (esr & ESR_EL2_CV) + return (esr & ESR_EL2_COND) >> ESR_EL2_COND_SHIFT; + + return -1; +} + +/* + * Check if a trapped instruction should have been executed or not. + */ +bool kvm_condition_valid32(const struct kvm_vcpu *vcpu) +{ + unsigned long cpsr; + u32 cpsr_cond; + int cond; + + /* Top two bits non-zero? Unconditional. */ + if (kvm_vcpu_get_hsr(vcpu) >> 30) + return true; + + /* Is condition field valid? */ + cond = kvm_vcpu_get_condition(vcpu); + if (cond == 0xE) + return true; + + cpsr = *vcpu_cpsr(vcpu); + + if (cond < 0) { + /* This can happen in Thumb mode: examine IT state. */ + unsigned long it; + + it = ((cpsr >> 8) & 0xFC) | ((cpsr >> 25) & 0x3); + + /* it == 0 => unconditional. */ + if (it == 0) + return true; + + /* The cond for this insn works out as the top 4 bits. */ + cond = (it >> 4); + } + + cpsr_cond = cpsr >> 28; + + if (!((cc_map[cond] >> cpsr_cond) & 1)) + return false; + + return true; +} + +/** + * adjust_itstate - adjust ITSTATE when emulating instructions in IT-block + * @vcpu: The VCPU pointer + * + * When exceptions occur while instructions are executed in Thumb IF-THEN + * blocks, the ITSTATE field of the CPSR is not advanced (updated), so we have + * to do this little bit of work manually. The fields map like this: + * + * IT[7:0] -> CPSR[26:25],CPSR[15:10] + */ +static void kvm_adjust_itstate(struct kvm_vcpu *vcpu) +{ + unsigned long itbits, cond; + unsigned long cpsr = *vcpu_cpsr(vcpu); + bool is_arm = !(cpsr & COMPAT_PSR_T_BIT); + + BUG_ON(is_arm && (cpsr & COMPAT_PSR_IT_MASK)); + + if (!(cpsr & COMPAT_PSR_IT_MASK)) + return; + + cond = (cpsr & 0xe000) >> 13; + itbits = (cpsr & 0x1c00) >> (10 - 2); + itbits |= (cpsr & (0x3 << 25)) >> 25; + + /* Perform ITAdvance (see page A2-52 in ARM DDI 0406C) */ + if ((itbits & 0x7) == 0) + itbits = cond = 0; + else + itbits = (itbits << 1) & 0x1f; + + cpsr &= ~COMPAT_PSR_IT_MASK; + cpsr |= cond << 13; + cpsr |= (itbits & 0x1c) << (10 - 2); + cpsr |= (itbits & 0x3) << 25; + *vcpu_cpsr(vcpu) = cpsr; +} + +/** + * kvm_skip_instr - skip a trapped instruction and proceed to the next + * @vcpu: The vcpu pointer + */ +void kvm_skip_instr32(struct kvm_vcpu *vcpu, bool is_wide_instr) +{ + bool is_thumb; + + is_thumb = !!(*vcpu_cpsr(vcpu) & COMPAT_PSR_T_BIT); + if (is_thumb && !is_wide_instr) + *vcpu_pc(vcpu) += 2; + else + *vcpu_pc(vcpu) += 4; + kvm_adjust_itstate(vcpu); +} From 33056d384f68642d62ba6dc6fe0a17ef92473135 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 7 Feb 2013 10:32:33 +0000 Subject: [PATCH 1110/1368] arm64: KVM: 32bit handling of coprocessor traps Provide the necessary infrastructure to trap coprocessor accesses that occur when running 32bit guests. Also wire SMC and HVC trapped in 32bit mode while were at it. Reviewed-by: Christopher Covington Reviewed-by: Catalin Marinas Signed-off-by: Marc Zyngier (cherry picked from commit 62a89c44954f09072bf07a714c8f68bda14ab87e) Signed-off-by: Christoffer Dall --- arch/arm64/include/asm/kvm_coproc.h | 5 + arch/arm64/kvm/handle_exit.c | 7 ++ arch/arm64/kvm/sys_regs.c | 181 ++++++++++++++++++++++++++-- 3 files changed, 186 insertions(+), 7 deletions(-) diff --git a/arch/arm64/include/asm/kvm_coproc.h b/arch/arm64/include/asm/kvm_coproc.h index 9b4477acb554..9a59301cd014 100644 --- a/arch/arm64/include/asm/kvm_coproc.h +++ b/arch/arm64/include/asm/kvm_coproc.h @@ -32,11 +32,16 @@ struct kvm_sys_reg_table { struct kvm_sys_reg_target_table { struct kvm_sys_reg_table table64; + struct kvm_sys_reg_table table32; }; void kvm_register_target_sys_reg_table(unsigned int target, struct kvm_sys_reg_target_table *table); +int kvm_handle_cp14_load_store(struct kvm_vcpu *vcpu, struct kvm_run *run); +int kvm_handle_cp14_access(struct kvm_vcpu *vcpu, struct kvm_run *run); +int kvm_handle_cp15_32(struct kvm_vcpu *vcpu, struct kvm_run *run); +int kvm_handle_cp15_64(struct kvm_vcpu *vcpu, struct kvm_run *run); int kvm_handle_sys_reg(struct kvm_vcpu *vcpu, struct kvm_run *run); #define kvm_coproc_table_init kvm_sys_reg_table_init diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c index 4766b7f3515e..9beaca033437 100644 --- a/arch/arm64/kvm/handle_exit.c +++ b/arch/arm64/kvm/handle_exit.c @@ -62,6 +62,13 @@ static int kvm_handle_wfi(struct kvm_vcpu *vcpu, struct kvm_run *run) static exit_handle_fn arm_exit_handlers[] = { [ESR_EL2_EC_WFI] = kvm_handle_wfi, + [ESR_EL2_EC_CP15_32] = kvm_handle_cp15_32, + [ESR_EL2_EC_CP15_64] = kvm_handle_cp15_64, + [ESR_EL2_EC_CP14_MR] = kvm_handle_cp14_access, + [ESR_EL2_EC_CP14_LS] = kvm_handle_cp14_load_store, + [ESR_EL2_EC_CP14_64] = kvm_handle_cp14_access, + [ESR_EL2_EC_HVC32] = handle_hvc, + [ESR_EL2_EC_SMC32] = handle_smc, [ESR_EL2_EC_HVC64] = handle_hvc, [ESR_EL2_EC_SMC64] = handle_smc, [ESR_EL2_EC_SYS64] = kvm_handle_sys_reg, diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index 52fff0ae3442..94923609753b 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -38,6 +38,10 @@ * types are different. My gut feeling is that it should be pretty * easy to merge, but that would be an ABI breakage -- again. VFP * would also need to be abstracted. + * + * For AArch32, we only take care of what is being trapped. Anything + * that has to do with init and userspace access has to go via the + * 64bit interface. */ /* 3 bits per cache level, as per CLIDR, but non-existent caches always 0 */ @@ -166,6 +170,16 @@ static const struct sys_reg_desc sys_reg_descs[] = { { Op0(0b01), Op1(0b000), CRn(0b0111), CRm(0b1110), Op2(0b010), access_dcsw }, + /* TEECR32_EL1 */ + { Op0(0b10), Op1(0b010), CRn(0b0000), CRm(0b0000), Op2(0b000), + NULL, reset_val, TEECR32_EL1, 0 }, + /* TEEHBR32_EL1 */ + { Op0(0b10), Op1(0b010), CRn(0b0001), CRm(0b0000), Op2(0b000), + NULL, reset_val, TEEHBR32_EL1, 0 }, + /* DBGVCR32_EL2 */ + { Op0(0b10), Op1(0b100), CRn(0b0000), CRm(0b0111), Op2(0b000), + NULL, reset_val, DBGVCR32_EL2, 0 }, + /* MPIDR_EL1 */ { Op0(0b11), Op1(0b000), CRn(0b0000), CRm(0b0000), Op2(0b101), NULL, reset_mpidr, MPIDR_EL1 }, @@ -276,6 +290,39 @@ static const struct sys_reg_desc sys_reg_descs[] = { /* TPIDRRO_EL0 */ { Op0(0b11), Op1(0b011), CRn(0b1101), CRm(0b0000), Op2(0b011), NULL, reset_unknown, TPIDRRO_EL0 }, + + /* DACR32_EL2 */ + { Op0(0b11), Op1(0b100), CRn(0b0011), CRm(0b0000), Op2(0b000), + NULL, reset_unknown, DACR32_EL2 }, + /* IFSR32_EL2 */ + { Op0(0b11), Op1(0b100), CRn(0b0101), CRm(0b0000), Op2(0b001), + NULL, reset_unknown, IFSR32_EL2 }, + /* FPEXC32_EL2 */ + { Op0(0b11), Op1(0b100), CRn(0b0101), CRm(0b0011), Op2(0b000), + NULL, reset_val, FPEXC32_EL2, 0x70 }, +}; + +/* Trapped cp15 registers */ +static const struct sys_reg_desc cp15_regs[] = { + /* + * DC{C,I,CI}SW operations: + */ + { Op1( 0), CRn( 7), CRm( 6), Op2( 2), access_dcsw }, + { Op1( 0), CRn( 7), CRm(10), Op2( 2), access_dcsw }, + { Op1( 0), CRn( 7), CRm(14), Op2( 2), access_dcsw }, + { Op1( 0), CRn( 9), CRm(12), Op2( 0), pm_fake }, + { Op1( 0), CRn( 9), CRm(12), Op2( 1), pm_fake }, + { Op1( 0), CRn( 9), CRm(12), Op2( 2), pm_fake }, + { Op1( 0), CRn( 9), CRm(12), Op2( 3), pm_fake }, + { Op1( 0), CRn( 9), CRm(12), Op2( 5), pm_fake }, + { Op1( 0), CRn( 9), CRm(12), Op2( 6), pm_fake }, + { Op1( 0), CRn( 9), CRm(12), Op2( 7), pm_fake }, + { Op1( 0), CRn( 9), CRm(13), Op2( 0), pm_fake }, + { Op1( 0), CRn( 9), CRm(13), Op2( 1), pm_fake }, + { Op1( 0), CRn( 9), CRm(13), Op2( 2), pm_fake }, + { Op1( 0), CRn( 9), CRm(14), Op2( 0), pm_fake }, + { Op1( 0), CRn( 9), CRm(14), Op2( 1), pm_fake }, + { Op1( 0), CRn( 9), CRm(14), Op2( 2), pm_fake }, }; /* Target specific emulation tables */ @@ -288,13 +335,20 @@ void kvm_register_target_sys_reg_table(unsigned int target, } /* Get specific register table for this target. */ -static const struct sys_reg_desc *get_target_table(unsigned target, size_t *num) +static const struct sys_reg_desc *get_target_table(unsigned target, + bool mode_is_64, + size_t *num) { struct kvm_sys_reg_target_table *table; table = target_tables[target]; - *num = table->table64.num; - return table->table64.table; + if (mode_is_64) { + *num = table->table64.num; + return table->table64.table; + } else { + *num = table->table32.num; + return table->table32.table; + } } static const struct sys_reg_desc *find_reg(const struct sys_reg_params *params, @@ -322,13 +376,126 @@ static const struct sys_reg_desc *find_reg(const struct sys_reg_params *params, return NULL; } +int kvm_handle_cp14_load_store(struct kvm_vcpu *vcpu, struct kvm_run *run) +{ + kvm_inject_undefined(vcpu); + return 1; +} + +int kvm_handle_cp14_access(struct kvm_vcpu *vcpu, struct kvm_run *run) +{ + kvm_inject_undefined(vcpu); + return 1; +} + +static void emulate_cp15(struct kvm_vcpu *vcpu, + const struct sys_reg_params *params) +{ + size_t num; + const struct sys_reg_desc *table, *r; + + table = get_target_table(vcpu->arch.target, false, &num); + + /* Search target-specific then generic table. */ + r = find_reg(params, table, num); + if (!r) + r = find_reg(params, cp15_regs, ARRAY_SIZE(cp15_regs)); + + if (likely(r)) { + /* + * Not having an accessor means that we have + * configured a trap that we don't know how to + * handle. This certainly qualifies as a gross bug + * that should be fixed right away. + */ + BUG_ON(!r->access); + + if (likely(r->access(vcpu, params, r))) { + /* Skip instruction, since it was emulated */ + kvm_skip_instr(vcpu, kvm_vcpu_trap_il_is32bit(vcpu)); + return; + } + /* If access function fails, it should complain. */ + } + + kvm_err("Unsupported guest CP15 access at: %08lx\n", *vcpu_pc(vcpu)); + print_sys_reg_instr(params); + kvm_inject_undefined(vcpu); +} + +/** + * kvm_handle_cp15_64 -- handles a mrrc/mcrr trap on a guest CP15 access + * @vcpu: The VCPU pointer + * @run: The kvm_run struct + */ +int kvm_handle_cp15_64(struct kvm_vcpu *vcpu, struct kvm_run *run) +{ + struct sys_reg_params params; + u32 hsr = kvm_vcpu_get_hsr(vcpu); + int Rt2 = (hsr >> 10) & 0xf; + + params.CRm = (hsr >> 1) & 0xf; + params.Rt = (hsr >> 5) & 0xf; + params.is_write = ((hsr & 1) == 0); + + params.Op0 = 0; + params.Op1 = (hsr >> 16) & 0xf; + params.Op2 = 0; + params.CRn = 0; + + /* + * Massive hack here. Store Rt2 in the top 32bits so we only + * have one register to deal with. As we use the same trap + * backends between AArch32 and AArch64, we get away with it. + */ + if (params.is_write) { + u64 val = *vcpu_reg(vcpu, params.Rt); + val &= 0xffffffff; + val |= *vcpu_reg(vcpu, Rt2) << 32; + *vcpu_reg(vcpu, params.Rt) = val; + } + + emulate_cp15(vcpu, ¶ms); + + /* Do the opposite hack for the read side */ + if (!params.is_write) { + u64 val = *vcpu_reg(vcpu, params.Rt); + val >>= 32; + *vcpu_reg(vcpu, Rt2) = val; + } + + return 1; +} + +/** + * kvm_handle_cp15_32 -- handles a mrc/mcr trap on a guest CP15 access + * @vcpu: The VCPU pointer + * @run: The kvm_run struct + */ +int kvm_handle_cp15_32(struct kvm_vcpu *vcpu, struct kvm_run *run) +{ + struct sys_reg_params params; + u32 hsr = kvm_vcpu_get_hsr(vcpu); + + params.CRm = (hsr >> 1) & 0xf; + params.Rt = (hsr >> 5) & 0xf; + params.is_write = ((hsr & 1) == 0); + params.CRn = (hsr >> 10) & 0xf; + params.Op0 = 0; + params.Op1 = (hsr >> 14) & 0x7; + params.Op2 = (hsr >> 17) & 0x7; + + emulate_cp15(vcpu, ¶ms); + return 1; +} + static int emulate_sys_reg(struct kvm_vcpu *vcpu, const struct sys_reg_params *params) { size_t num; const struct sys_reg_desc *table, *r; - table = get_target_table(vcpu->arch.target, &num); + table = get_target_table(vcpu->arch.target, true, &num); /* Search target-specific then generic table. */ r = find_reg(params, table, num); @@ -438,7 +605,7 @@ static const struct sys_reg_desc *index_to_sys_reg_desc(struct kvm_vcpu *vcpu, if (!index_to_params(id, ¶ms)) return NULL; - table = get_target_table(vcpu->arch.target, &num); + table = get_target_table(vcpu->arch.target, true, &num); r = find_reg(¶ms, table, num); if (!r) r = find_reg(¶ms, sys_reg_descs, ARRAY_SIZE(sys_reg_descs)); @@ -762,7 +929,7 @@ static int walk_sys_regs(struct kvm_vcpu *vcpu, u64 __user *uind) size_t num; /* We check for duplicates here, to allow arch-specific overrides. */ - i1 = get_target_table(vcpu->arch.target, &num); + i1 = get_target_table(vcpu->arch.target, true, &num); end1 = i1 + num; i2 = sys_reg_descs; end2 = sys_reg_descs + ARRAY_SIZE(sys_reg_descs); @@ -874,7 +1041,7 @@ void kvm_reset_sys_regs(struct kvm_vcpu *vcpu) /* Generic chip reset first (so target could override). */ reset_sys_reg_descs(vcpu, sys_reg_descs, ARRAY_SIZE(sys_reg_descs)); - table = get_target_table(vcpu->arch.target, &num); + table = get_target_table(vcpu->arch.target, true, &num); reset_sys_reg_descs(vcpu, table, num); for (num = 1; num < NR_SYS_REGS; num++) From a6df8b5dd61c84f691c41663a1d2bb0e05342053 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 7 Feb 2013 10:50:18 +0000 Subject: [PATCH 1111/1368] arm64: KVM: CPU specific 32bit coprocessor access Enable handling of CPU specific 32bit coprocessor access. Not much here either. Reviewed-by: Christopher Covington Reviewed-by: Catalin Marinas Signed-off-by: Marc Zyngier (cherry picked from commit 06c7654d2fb8bac7b1af4340ad59434a5d89b86a) Signed-off-by: Christoffer Dall --- arch/arm64/kvm/sys_regs_generic_v8.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/arch/arm64/kvm/sys_regs_generic_v8.c b/arch/arm64/kvm/sys_regs_generic_v8.c index d4e803907312..4268ab9356b1 100644 --- a/arch/arm64/kvm/sys_regs_generic_v8.c +++ b/arch/arm64/kvm/sys_regs_generic_v8.c @@ -59,11 +59,21 @@ static const struct sys_reg_desc genericv8_sys_regs[] = { access_actlr, reset_actlr, ACTLR_EL1 }, }; +static const struct sys_reg_desc genericv8_cp15_regs[] = { + /* ACTLR */ + { Op1(0b000), CRn(0b0001), CRm(0b0000), Op2(0b001), + access_actlr }, +}; + static struct kvm_sys_reg_target_table genericv8_target_table = { .table64 = { .table = genericv8_sys_regs, .num = ARRAY_SIZE(genericv8_sys_regs), }, + .table32 = { + .table = genericv8_cp15_regs, + .num = ARRAY_SIZE(genericv8_cp15_regs), + }, }; static int __init sys_reg_genericv8_init(void) From ad0ed2f67ccf23f49c201cb05c542d46e04fb9c1 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 7 Feb 2013 10:52:10 +0000 Subject: [PATCH 1112/1368] arm64: KVM: 32bit specific register world switch Allow registers specific to 32bit guests to be saved/restored during the world switch. Reviewed-by: Christopher Covington Reviewed-by: Catalin Marinas Signed-off-by: Marc Zyngier (cherry picked from commit b4afad06c19e3489767532f86ff453a1d1e28b8c) Signed-off-by: Christoffer Dall --- arch/arm64/kvm/hyp.S | 70 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 70 insertions(+) diff --git a/arch/arm64/kvm/hyp.S b/arch/arm64/kvm/hyp.S index 8b510835b440..ff985e3d8b72 100644 --- a/arch/arm64/kvm/hyp.S +++ b/arch/arm64/kvm/hyp.S @@ -266,6 +266,74 @@ __kvm_hyp_code_start: msr cntkctl_el1, x23 .endm +.macro skip_32bit_state tmp, target + // Skip 32bit state if not needed + mrs \tmp, hcr_el2 + tbnz \tmp, #HCR_RW_SHIFT, \target +.endm + +.macro skip_tee_state tmp, target + // Skip ThumbEE state if not needed + mrs \tmp, id_pfr0_el1 + tbz \tmp, #12, \target +.endm + +.macro save_guest_32bit_state + skip_32bit_state x3, 1f + + add x3, x2, #CPU_SPSR_OFFSET(KVM_SPSR_ABT) + mrs x4, spsr_abt + mrs x5, spsr_und + mrs x6, spsr_irq + mrs x7, spsr_fiq + stp x4, x5, [x3] + stp x6, x7, [x3, #16] + + add x3, x2, #CPU_SYSREG_OFFSET(DACR32_EL2) + mrs x4, dacr32_el2 + mrs x5, ifsr32_el2 + mrs x6, fpexc32_el2 + mrs x7, dbgvcr32_el2 + stp x4, x5, [x3] + stp x6, x7, [x3, #16] + + skip_tee_state x8, 1f + + add x3, x2, #CPU_SYSREG_OFFSET(TEECR32_EL1) + mrs x4, teecr32_el1 + mrs x5, teehbr32_el1 + stp x4, x5, [x3] +1: +.endm + +.macro restore_guest_32bit_state + skip_32bit_state x3, 1f + + add x3, x2, #CPU_SPSR_OFFSET(KVM_SPSR_ABT) + ldp x4, x5, [x3] + ldp x6, x7, [x3, #16] + msr spsr_abt, x4 + msr spsr_und, x5 + msr spsr_irq, x6 + msr spsr_fiq, x7 + + add x3, x2, #CPU_SYSREG_OFFSET(DACR32_EL2) + ldp x4, x5, [x3] + ldp x6, x7, [x3, #16] + msr dacr32_el2, x4 + msr ifsr32_el2, x5 + msr fpexc32_el2, x6 + msr dbgvcr32_el2, x7 + + skip_tee_state x8, 1f + + add x3, x2, #CPU_SYSREG_OFFSET(TEECR32_EL1) + ldp x4, x5, [x3] + msr teecr32_el1, x4 + msr teehbr32_el1, x5 +1: +.endm + .macro activate_traps ldr x2, [x0, #VCPU_IRQ_LINES] ldr x1, [x0, #VCPU_HCR_EL2] @@ -494,6 +562,7 @@ ENTRY(__kvm_vcpu_run) bl __restore_sysregs bl __restore_fpsimd + restore_guest_32bit_state restore_guest_regs // That's it, no more messing around. @@ -509,6 +578,7 @@ __kvm_vcpu_return: save_guest_regs bl __save_fpsimd bl __save_sysregs + save_guest_32bit_state save_timer_state save_vgic_state From 31960c92ba1763c3c5db1d09fb0f4547a39c8d4d Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 6 Feb 2013 11:29:35 +0000 Subject: [PATCH 1113/1368] arm64: KVM: 32bit guest fault injection Add fault injection capability for 32bit guests. Reviewed-by: Christopher Covington Reviewed-by: Catalin Marinas Signed-off-by: Marc Zyngier (cherry picked from commit e82e030556e42e823e174e0c3bd97988d1a09d1f) Signed-off-by: Christoffer Dall --- arch/arm64/kvm/inject_fault.c | 79 ++++++++++++++++++++++++++++++++++- 1 file changed, 78 insertions(+), 1 deletion(-) diff --git a/arch/arm64/kvm/inject_fault.c b/arch/arm64/kvm/inject_fault.c index 54f656271266..81a02a8762b0 100644 --- a/arch/arm64/kvm/inject_fault.c +++ b/arch/arm64/kvm/inject_fault.c @@ -1,5 +1,5 @@ /* - * Fault injection for 64bit guests. + * Fault injection for both 32 and 64bit guests. * * Copyright (C) 2012,2013 - ARM Ltd * Author: Marc Zyngier @@ -29,6 +29,74 @@ PSR_I_BIT | PSR_D_BIT) #define EL1_EXCEPT_SYNC_OFFSET 0x200 +static void prepare_fault32(struct kvm_vcpu *vcpu, u32 mode, u32 vect_offset) +{ + unsigned long cpsr; + unsigned long new_spsr_value = *vcpu_cpsr(vcpu); + bool is_thumb = (new_spsr_value & COMPAT_PSR_T_BIT); + u32 return_offset = (is_thumb) ? 4 : 0; + u32 sctlr = vcpu_cp15(vcpu, c1_SCTLR); + + cpsr = mode | COMPAT_PSR_I_BIT; + + if (sctlr & (1 << 30)) + cpsr |= COMPAT_PSR_T_BIT; + if (sctlr & (1 << 25)) + cpsr |= COMPAT_PSR_E_BIT; + + *vcpu_cpsr(vcpu) = cpsr; + + /* Note: These now point to the banked copies */ + *vcpu_spsr(vcpu) = new_spsr_value; + *vcpu_reg(vcpu, 14) = *vcpu_pc(vcpu) + return_offset; + + /* Branch to exception vector */ + if (sctlr & (1 << 13)) + vect_offset += 0xffff0000; + else /* always have security exceptions */ + vect_offset += vcpu_cp15(vcpu, c12_VBAR); + + *vcpu_pc(vcpu) = vect_offset; +} + +static void inject_undef32(struct kvm_vcpu *vcpu) +{ + prepare_fault32(vcpu, COMPAT_PSR_MODE_UND, 4); +} + +/* + * Modelled after TakeDataAbortException() and TakePrefetchAbortException + * pseudocode. + */ +static void inject_abt32(struct kvm_vcpu *vcpu, bool is_pabt, + unsigned long addr) +{ + u32 vect_offset; + u32 *far, *fsr; + bool is_lpae; + + if (is_pabt) { + vect_offset = 12; + far = &vcpu_cp15(vcpu, c6_IFAR); + fsr = &vcpu_cp15(vcpu, c5_IFSR); + } else { /* !iabt */ + vect_offset = 16; + far = &vcpu_cp15(vcpu, c6_DFAR); + fsr = &vcpu_cp15(vcpu, c5_DFSR); + } + + prepare_fault32(vcpu, COMPAT_PSR_MODE_ABT | COMPAT_PSR_A_BIT, vect_offset); + + *far = addr; + + /* Give the guest an IMPLEMENTATION DEFINED exception */ + is_lpae = (vcpu_cp15(vcpu, c2_TTBCR) >> 31); + if (is_lpae) + *fsr = 1 << 9 | 0x34; + else + *fsr = 0x14; +} + static void inject_abt64(struct kvm_vcpu *vcpu, bool is_iabt, unsigned long addr) { unsigned long cpsr = *vcpu_cpsr(vcpu); @@ -98,6 +166,9 @@ static void inject_undef64(struct kvm_vcpu *vcpu) */ void kvm_inject_dabt(struct kvm_vcpu *vcpu, unsigned long addr) { + if (!(vcpu->arch.hcr_el2 & HCR_RW)) + inject_abt32(vcpu, false, addr); + inject_abt64(vcpu, false, addr); } @@ -111,6 +182,9 @@ void kvm_inject_dabt(struct kvm_vcpu *vcpu, unsigned long addr) */ void kvm_inject_pabt(struct kvm_vcpu *vcpu, unsigned long addr) { + if (!(vcpu->arch.hcr_el2 & HCR_RW)) + inject_abt32(vcpu, true, addr); + inject_abt64(vcpu, true, addr); } @@ -122,5 +196,8 @@ void kvm_inject_pabt(struct kvm_vcpu *vcpu, unsigned long addr) */ void kvm_inject_undefined(struct kvm_vcpu *vcpu) { + if (!(vcpu->arch.hcr_el2 & HCR_RW)) + inject_undef32(vcpu); + inject_undef64(vcpu); } From 80e531580f7bbb453119afd17bc752d9763676b5 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 7 Feb 2013 10:46:46 +0000 Subject: [PATCH 1114/1368] arm64: KVM: enable initialization of a 32bit vcpu Wire the init of a 32bit vcpu by allowing 32bit modes in pstate, and providing sensible defaults out of reset state. This feature is of course conditioned by the presence of 32bit capability on the physical CPU, and is checked by the KVM_CAP_ARM_EL1_32BIT capability. Reviewed-by: Catalin Marinas Signed-off-by: Marc Zyngier (cherry picked from commit 0d854a60b1d7d39a37b25dd28f63cfa0df637b91) Signed-off-by: Christoffer Dall --- arch/arm64/include/asm/kvm_host.h | 2 +- arch/arm64/include/uapi/asm/kvm.h | 1 + arch/arm64/kvm/guest.c | 6 ++++++ arch/arm64/kvm/reset.c | 26 +++++++++++++++++++++++++- include/uapi/linux/kvm.h | 1 + 5 files changed, 34 insertions(+), 2 deletions(-) diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 3f5830b3ca3f..644d73956864 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -34,7 +34,7 @@ #include #include -#define KVM_VCPU_MAX_FEATURES 1 +#define KVM_VCPU_MAX_FEATURES 2 /* We don't currently support large pages. */ #define KVM_HPAGE_GFN_SHIFT(x) 0 diff --git a/arch/arm64/include/uapi/asm/kvm.h b/arch/arm64/include/uapi/asm/kvm.h index 5b1110c49df5..5031f4263937 100644 --- a/arch/arm64/include/uapi/asm/kvm.h +++ b/arch/arm64/include/uapi/asm/kvm.h @@ -75,6 +75,7 @@ struct kvm_regs { #define KVM_VGIC_V2_CPU_SIZE 0x2000 #define KVM_ARM_VCPU_POWER_OFF 0 /* CPU is started in OFF state */ +#define KVM_ARM_VCPU_EL1_32BIT 1 /* CPU running a 32bit VM */ struct kvm_vcpu_init { __u32 target; diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c index 3d7518a7ebaa..2c3ff67a8ecb 100644 --- a/arch/arm64/kvm/guest.c +++ b/arch/arm64/kvm/guest.c @@ -99,6 +99,12 @@ static int set_core_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) if (off == KVM_REG_ARM_CORE_REG(regs.pstate)) { u32 mode = (*(u32 *)valp) & COMPAT_PSR_MODE_MASK; switch (mode) { + case COMPAT_PSR_MODE_USR: + case COMPAT_PSR_MODE_FIQ: + case COMPAT_PSR_MODE_IRQ: + case COMPAT_PSR_MODE_SVC: + case COMPAT_PSR_MODE_ABT: + case COMPAT_PSR_MODE_UND: case PSR_MODE_EL0t: case PSR_MODE_EL1t: case PSR_MODE_EL1h: diff --git a/arch/arm64/kvm/reset.c b/arch/arm64/kvm/reset.c index 766150ac76ed..70a7816535cd 100644 --- a/arch/arm64/kvm/reset.c +++ b/arch/arm64/kvm/reset.c @@ -38,16 +38,32 @@ static const struct kvm_regs default_regs_reset = { PSR_F_BIT | PSR_D_BIT), }; +static const struct kvm_regs default_regs_reset32 = { + .regs.pstate = (COMPAT_PSR_MODE_SVC | COMPAT_PSR_A_BIT | + COMPAT_PSR_I_BIT | COMPAT_PSR_F_BIT), +}; + static const struct kvm_irq_level default_vtimer_irq = { .irq = 27, .level = 1, }; +static bool cpu_has_32bit_el1(void) +{ + u64 pfr0; + + pfr0 = read_cpuid(ID_AA64PFR0_EL1); + return !!(pfr0 & 0x20); +} + int kvm_arch_dev_ioctl_check_extension(long ext) { int r; switch (ext) { + case KVM_CAP_ARM_EL1_32BIT: + r = cpu_has_32bit_el1(); + break; default: r = 0; } @@ -70,7 +86,15 @@ int kvm_reset_vcpu(struct kvm_vcpu *vcpu) switch (vcpu->arch.target) { default: - cpu_reset = &default_regs_reset; + if (test_bit(KVM_ARM_VCPU_EL1_32BIT, vcpu->arch.features)) { + if (!cpu_has_32bit_el1()) + return -EINVAL; + cpu_reset = &default_regs_reset32; + vcpu->arch.hcr_el2 &= ~HCR_RW; + } else { + cpu_reset = &default_regs_reset; + } + cpu_vtimer_irq = &default_vtimer_irq; break; } diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 97277d333e82..acccd08be6c7 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -666,6 +666,7 @@ struct kvm_ppc_smmu_info { #define KVM_CAP_IRQ_MPIC 90 #define KVM_CAP_PPC_RTAS 91 #define KVM_CAP_IRQ_XICS 92 +#define KVM_CAP_ARM_EL1_32BIT 93 #ifdef KVM_CAP_IRQ_ROUTING From d3845bf4d5f6b104ec346dabf58817aa9d66d740 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 2 Apr 2013 17:46:31 +0100 Subject: [PATCH 1115/1368] arm64: KVM: userspace API documentation Unsurprisingly, the arm64 userspace API is extremely similar to the 32bit one, the only significant difference being the ONE_REG register mapping. Reviewed-by: Catalin Marinas Signed-off-by: Marc Zyngier (cherry picked from commit 379e04c79e8a9ded8a202f1e266f0c5830185bea) Signed-off-by: Christoffer Dall --- Documentation/virtual/kvm/api.txt | 58 +++++++++++++++++++++---------- 1 file changed, 39 insertions(+), 19 deletions(-) diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt index 5f91eda91647..9bfadeb8be31 100644 --- a/Documentation/virtual/kvm/api.txt +++ b/Documentation/virtual/kvm/api.txt @@ -280,7 +280,7 @@ kvm_run' (see below). 4.11 KVM_GET_REGS Capability: basic -Architectures: all except ARM +Architectures: all except ARM, arm64 Type: vcpu ioctl Parameters: struct kvm_regs (out) Returns: 0 on success, -1 on error @@ -301,7 +301,7 @@ struct kvm_regs { 4.12 KVM_SET_REGS Capability: basic -Architectures: all except ARM +Architectures: all except ARM, arm64 Type: vcpu ioctl Parameters: struct kvm_regs (in) Returns: 0 on success, -1 on error @@ -587,7 +587,7 @@ struct kvm_fpu { 4.24 KVM_CREATE_IRQCHIP Capability: KVM_CAP_IRQCHIP -Architectures: x86, ia64, ARM +Architectures: x86, ia64, ARM, arm64 Type: vm ioctl Parameters: none Returns: 0 on success, -1 on error @@ -595,14 +595,14 @@ Returns: 0 on success, -1 on error Creates an interrupt controller model in the kernel. On x86, creates a virtual ioapic, a virtual PIC (two PICs, nested), and sets up future vcpus to have a local APIC. IRQ routing for GSIs 0-15 is set to both PIC and IOAPIC; GSI 16-23 -only go to the IOAPIC. On ia64, a IOSAPIC is created. On ARM, a GIC is +only go to the IOAPIC. On ia64, a IOSAPIC is created. On ARM/arm64, a GIC is created. 4.25 KVM_IRQ_LINE Capability: KVM_CAP_IRQCHIP -Architectures: x86, ia64, arm +Architectures: x86, ia64, arm, arm64 Type: vm ioctl Parameters: struct kvm_irq_level Returns: 0 on success, -1 on error @@ -612,9 +612,10 @@ On some architectures it is required that an interrupt controller model has been previously created with KVM_CREATE_IRQCHIP. Note that edge-triggered interrupts require the level to be set to 1 and then back to 0. -ARM can signal an interrupt either at the CPU level, or at the in-kernel irqchip -(GIC), and for in-kernel irqchip can tell the GIC to use PPIs designated for -specific cpus. The irq field is interpreted like this: +ARM/arm64 can signal an interrupt either at the CPU level, or at the +in-kernel irqchip (GIC), and for in-kernel irqchip can tell the GIC to +use PPIs designated for specific cpus. The irq field is interpreted +like this:  bits: | 31 ... 24 | 23 ... 16 | 15 ... 0 | field: | irq_type | vcpu_index | irq_id | @@ -1831,6 +1832,22 @@ ARM 32-bit VFP control registers have the following id bit patterns: ARM 64-bit FP registers have the following id bit patterns: 0x4030 0000 0012 0 + +arm64 registers are mapped using the lower 32 bits. The upper 16 of +that is the register group type, or coprocessor number: + +arm64 core/FP-SIMD registers have the following id bit patterns. Note +that the size of the access is variable, as the kvm_regs structure +contains elements ranging from 32 to 128 bits. The index is a 32bit +value in the kvm_regs structure seen as a 32bit array. + 0x60x0 0000 0010 + +arm64 CCSIDR registers are demultiplexed by CSSELR value: + 0x6020 0000 0011 00 + +arm64 system registers have the following id bit patterns: + 0x6030 0000 0013 + 4.69 KVM_GET_ONE_REG Capability: KVM_CAP_ONE_REG @@ -2264,7 +2281,7 @@ current state. "addr" is ignored. 4.77 KVM_ARM_VCPU_INIT Capability: basic -Architectures: arm +Architectures: arm, arm64 Type: vcpu ioctl Parameters: struct struct kvm_vcpu_init (in) Returns: 0 on success; -1 on error @@ -2283,12 +2300,14 @@ should be created before this ioctl is invoked. Possible features: - KVM_ARM_VCPU_POWER_OFF: Starts the CPU in a power-off state. Depends on KVM_CAP_ARM_PSCI. + - KVM_ARM_VCPU_EL1_32BIT: Starts the CPU in a 32bit mode. + Depends on KVM_CAP_ARM_EL1_32BIT (arm64 only). 4.78 KVM_GET_REG_LIST Capability: basic -Architectures: arm +Architectures: arm, arm64 Type: vcpu ioctl Parameters: struct kvm_reg_list (in/out) Returns: 0 on success; -1 on error @@ -2308,7 +2327,7 @@ KVM_GET_ONE_REG/KVM_SET_ONE_REG calls. 4.80 KVM_ARM_SET_DEVICE_ADDR Capability: KVM_CAP_ARM_SET_DEVICE_ADDR -Architectures: arm +Architectures: arm, arm64 Type: vm ioctl Parameters: struct kvm_arm_device_address (in) Returns: 0 on success, -1 on error @@ -2329,18 +2348,19 @@ can access emulated or directly exposed devices, which the host kernel needs to know about. The id field is an architecture specific identifier for a specific device. -ARM divides the id field into two parts, a device id and an address type id -specific to the individual device. +ARM/arm64 divides the id field into two parts, a device id and an +address type id specific to the individual device.  bits: | 63 ... 32 | 31 ... 16 | 15 ... 0 | field: | 0x00000000 | device id | addr type id | -ARM currently only require this when using the in-kernel GIC support for the -hardware VGIC features, using KVM_ARM_DEVICE_VGIC_V2 as the device id. When -setting the base address for the guest's mapping of the VGIC virtual CPU -and distributor interface, the ioctl must be called after calling -KVM_CREATE_IRQCHIP, but before calling KVM_RUN on any of the VCPUs. Calling -this ioctl twice for any of the base addresses will return -EEXIST. +ARM/arm64 currently only require this when using the in-kernel GIC +support for the hardware VGIC features, using KVM_ARM_DEVICE_VGIC_V2 +as the device id. When setting the base address for the guest's +mapping of the VGIC virtual CPU and distributor interface, the ioctl +must be called after calling KVM_CREATE_IRQCHIP, but before calling +KVM_RUN on any of the VCPUs. Calling this ioctl twice for any of the +base addresses will return -EEXIST. 4.82 KVM_PPC_RTAS_DEFINE_TOKEN From 1a12f6e7bcaaf0ec1a6a01fe5fb9341eb2f74ffe Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 2 Apr 2013 17:49:40 +0100 Subject: [PATCH 1116/1368] arm64: KVM: MAINTAINERS update Elect myself as the KVM/arm64 maintainer. Acked-by: Catalin Marinas Signed-off-by: Marc Zyngier (cherry picked from commit 6394a3ec02ab39147aab9ea56d0dabafd3dcae60) Signed-off-by: Christoffer Dall --- MAINTAINERS | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index 48c748080c96..823010fce98a 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -4719,6 +4719,15 @@ F: arch/arm/include/uapi/asm/kvm* F: arch/arm/include/asm/kvm* F: arch/arm/kvm/ +KERNEL VIRTUAL MACHINE FOR ARM64 (KVM/arm64) +M: Marc Zyngier +L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) +L: kvmarm@lists.cs.columbia.edu +S: Maintained +F: arch/arm64/include/uapi/asm/kvm* +F: arch/arm64/include/asm/kvm* +F: arch/arm64/kvm/ + KEXEC M: Eric Biederman W: http://kernel.org/pub/linux/utils/kernel/kexec/ From 092f9fbf3845c1c6e0bb14f3eae1be583c071a7d Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 2 May 2013 14:31:03 +0100 Subject: [PATCH 1117/1368] arm64: KVM: document kernel object mappings in HYP HYP mode has access to some of the kernel pages. Document the memory mapping and the offset between kernel VA and HYP VA. Reviewed-by: Catalin Marinas Signed-off-by: Marc Zyngier (cherry picked from commit aa4a73a0a23a65a2f531d01f1865d1e61c6acb55) Signed-off-by: Christoffer Dall --- Documentation/arm64/memory.txt | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/Documentation/arm64/memory.txt b/Documentation/arm64/memory.txt index c6941f815f15..d50fa618371b 100644 --- a/Documentation/arm64/memory.txt +++ b/Documentation/arm64/memory.txt @@ -102,3 +102,10 @@ Translation table lookup with 64KB pages: | | +--------------------------> [41:29] L2 index (only 38:29 used) | +-------------------------------> [47:42] L1 index (not used) +-------------------------------------------------> [63] TTBR0/1 + +When using KVM, the hypervisor maps kernel pages in EL2, at a fixed +offset from the kernel VA (top 24bits of the kernel VA set to zero): + +Start End Size Use +----------------------------------------------------------------------- +0000004000000000 0000007fffffffff 256GB kernel objects mapped in HYP From 146844e0e0dc7bcb2a080a2fdfd792674bff105b Mon Sep 17 00:00:00 2001 From: Anup Patel Date: Tue, 30 Apr 2013 12:02:15 +0530 Subject: [PATCH 1118/1368] ARM: KVM: Allow host virt timer irq to be different from guest timer virt irq The arch_timer irq numbers (or PPI numbers) are implementation dependent, so the host virtual timer irq number can be different from guest virtual timer irq number. This patch ensures that host virtual timer irq number is read from DTB and guest virtual timer irq is determined based on vcpu target type. Signed-off-by: Anup Patel Signed-off-by: Pranavkumar Sawargaonkar Signed-off-by: Christoffer Dall (cherry picked from commit 5ae7f87a56fab10b8f9b135a8377c144397293ca) Signed-off-by: Christoffer Dall --- arch/arm/kvm/reset.c | 12 ++++++++++++ include/kvm/arm_arch_timer.h | 4 ++++ virt/kvm/arm/arch_timer.c | 29 ++++++++++++++++++++--------- 3 files changed, 36 insertions(+), 9 deletions(-) diff --git a/arch/arm/kvm/reset.c b/arch/arm/kvm/reset.c index b80256b554cd..b7840e7aa452 100644 --- a/arch/arm/kvm/reset.c +++ b/arch/arm/kvm/reset.c @@ -27,6 +27,8 @@ #include #include +#include + /****************************************************************************** * Cortex-A15 Reset Values */ @@ -37,6 +39,11 @@ static struct kvm_regs a15_regs_reset = { .usr_regs.ARM_cpsr = SVC_MODE | PSR_A_BIT | PSR_I_BIT | PSR_F_BIT, }; +static const struct kvm_irq_level a15_vtimer_irq = { + .irq = 27, + .level = 1, +}; + /******************************************************************************* * Exported reset function @@ -52,6 +59,7 @@ static struct kvm_regs a15_regs_reset = { int kvm_reset_vcpu(struct kvm_vcpu *vcpu) { struct kvm_regs *cpu_reset; + const struct kvm_irq_level *cpu_vtimer_irq; switch (vcpu->arch.target) { case KVM_ARM_TARGET_CORTEX_A15: @@ -59,6 +67,7 @@ int kvm_reset_vcpu(struct kvm_vcpu *vcpu) return -EINVAL; cpu_reset = &a15_regs_reset; vcpu->arch.midr = read_cpuid_id(); + cpu_vtimer_irq = &a15_vtimer_irq; break; default: return -ENODEV; @@ -70,5 +79,8 @@ int kvm_reset_vcpu(struct kvm_vcpu *vcpu) /* Reset CP15 registers */ kvm_reset_coprocs(vcpu); + /* Reset arch_timer context */ + kvm_timer_vcpu_reset(vcpu, cpu_vtimer_irq); + return 0; } diff --git a/include/kvm/arm_arch_timer.h b/include/kvm/arm_arch_timer.h index 68cb9e1dfb81..6d9aeddc09bf 100644 --- a/include/kvm/arm_arch_timer.h +++ b/include/kvm/arm_arch_timer.h @@ -61,6 +61,8 @@ struct arch_timer_cpu { #ifdef CONFIG_KVM_ARM_TIMER int kvm_timer_hyp_init(void); int kvm_timer_init(struct kvm *kvm); +void kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu, + const struct kvm_irq_level *irq); void kvm_timer_vcpu_init(struct kvm_vcpu *vcpu); void kvm_timer_flush_hwstate(struct kvm_vcpu *vcpu); void kvm_timer_sync_hwstate(struct kvm_vcpu *vcpu); @@ -76,6 +78,8 @@ static inline int kvm_timer_init(struct kvm *kvm) return 0; } +static inline void kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu, + const struct kvm_irq_level *irq) {} static inline void kvm_timer_vcpu_init(struct kvm_vcpu *vcpu) {} static inline void kvm_timer_flush_hwstate(struct kvm_vcpu *vcpu) {} static inline void kvm_timer_sync_hwstate(struct kvm_vcpu *vcpu) {} diff --git a/virt/kvm/arm/arch_timer.c b/virt/kvm/arm/arch_timer.c index 6f485eaf643b..c2e1ef4604e8 100644 --- a/virt/kvm/arm/arch_timer.c +++ b/virt/kvm/arm/arch_timer.c @@ -30,9 +30,7 @@ static struct timecounter *timecounter; static struct workqueue_struct *wqueue; -static struct kvm_irq_level timer_irq = { - .level = 1, -}; +static unsigned int host_vtimer_irq; static cycle_t kvm_phys_timer_read(void) { @@ -67,8 +65,8 @@ static void kvm_timer_inject_irq(struct kvm_vcpu *vcpu) timer->cntv_ctl |= ARCH_TIMER_CTRL_IT_MASK; kvm_vgic_inject_irq(vcpu->kvm, vcpu->vcpu_id, - vcpu->arch.timer_cpu.irq->irq, - vcpu->arch.timer_cpu.irq->level); + timer->irq->irq, + timer->irq->level); } static irqreturn_t kvm_arch_timer_handler(int irq, void *dev_id) @@ -156,6 +154,20 @@ void kvm_timer_sync_hwstate(struct kvm_vcpu *vcpu) timer_arm(timer, ns); } +void kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu, + const struct kvm_irq_level *irq) +{ + struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; + + /* + * The vcpu timer irq number cannot be determined in + * kvm_timer_vcpu_init() because it is called much before + * kvm_vcpu_set_target(). To handle this, we determine + * vcpu timer irq number when the vcpu is reset. + */ + timer->irq = irq; +} + void kvm_timer_vcpu_init(struct kvm_vcpu *vcpu) { struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; @@ -163,12 +175,11 @@ void kvm_timer_vcpu_init(struct kvm_vcpu *vcpu) INIT_WORK(&timer->expired, kvm_timer_inject_irq_work); hrtimer_init(&timer->timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS); timer->timer.function = kvm_timer_expire; - timer->irq = &timer_irq; } static void kvm_timer_init_interrupt(void *info) { - enable_percpu_irq(timer_irq.irq, 0); + enable_percpu_irq(host_vtimer_irq, 0); } @@ -182,7 +193,7 @@ static int kvm_timer_cpu_notify(struct notifier_block *self, break; case CPU_DYING: case CPU_DYING_FROZEN: - disable_percpu_irq(timer_irq.irq); + disable_percpu_irq(host_vtimer_irq); break; } @@ -230,7 +241,7 @@ int kvm_timer_hyp_init(void) goto out; } - timer_irq.irq = ppi; + host_vtimer_irq = ppi; err = register_cpu_notifier(&kvm_timer_cpu_nb); if (err) { From 2da084abe92d8aa40c8972c461114528c336ab54 Mon Sep 17 00:00:00 2001 From: Dave P Martin Date: Wed, 1 May 2013 17:49:28 +0100 Subject: [PATCH 1119/1368] ARM: KVM: Don't handle PSCI calls via SMC Currently, kvmtool unconditionally declares that HVC should be used to call PSCI, so the function numbers in the DT tell the guest nothing about the function ID namespace or calling convention for SMC. We already assume that the guest will examine and honour the DT, since there is no way it could possibly guess the KVM-specific PSCI function IDs otherwise. So let's not encourage guests to violate what's specified in the DT by using SMC to make the call. [ Modified to apply to top of kvm/arm tree - Christoffer ] Signed-off-by: Dave P Martin Acked-by: Marc Zyngier Signed-off-by: Christoffer Dall (cherry picked from commit 24a7f675752e06729589d40a5256970998a21502) Signed-off-by: Christoffer Dall --- arch/arm/kvm/handle_exit.c | 3 --- arch/arm/kvm/psci.c | 2 +- 2 files changed, 1 insertion(+), 4 deletions(-) diff --git a/arch/arm/kvm/handle_exit.c b/arch/arm/kvm/handle_exit.c index 3d74a0be47db..df4c82d47ad7 100644 --- a/arch/arm/kvm/handle_exit.c +++ b/arch/arm/kvm/handle_exit.c @@ -52,9 +52,6 @@ static int handle_hvc(struct kvm_vcpu *vcpu, struct kvm_run *run) static int handle_smc(struct kvm_vcpu *vcpu, struct kvm_run *run) { - if (kvm_psci_call(vcpu)) - return 1; - kvm_inject_undefined(vcpu); return 1; } diff --git a/arch/arm/kvm/psci.c b/arch/arm/kvm/psci.c index 7ee5bb7a3667..86a693a02ba3 100644 --- a/arch/arm/kvm/psci.c +++ b/arch/arm/kvm/psci.c @@ -75,7 +75,7 @@ static unsigned long kvm_psci_vcpu_on(struct kvm_vcpu *source_vcpu) * kvm_psci_call - handle PSCI call if r0 value is in range * @vcpu: Pointer to the VCPU struct * - * Handle PSCI calls from guests through traps from HVC or SMC instructions. + * Handle PSCI calls from guests through traps from HVC instructions. * The calling convention is similar to SMC calls to the secure world where * the function number is placed in r0 and this function returns true if the * function number specified in r0 is withing the PSCI range, and false From fd741ad3dd186a0df45994059de3a08517a856bc Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 14 May 2013 12:11:35 +0100 Subject: [PATCH 1120/1368] ARM: KVM: remove dead prototype for __kvm_tlb_flush_vmid __kvm_tlb_flush_vmid has been renamed to __kvm_tlb_flush_vmid_ipa, and the old prototype should have been removed when the code was modified. Acked-by: Catalin Marinas Signed-off-by: Marc Zyngier Signed-off-by: Christoffer Dall (cherry picked from commit 368074d908b785588778f00b4384376cd636f4a1) Signed-off-by: Christoffer Dall --- arch/arm/include/asm/kvm_asm.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/arch/arm/include/asm/kvm_asm.h b/arch/arm/include/asm/kvm_asm.h index 4bb08e3e52bc..a2f43ddcc300 100644 --- a/arch/arm/include/asm/kvm_asm.h +++ b/arch/arm/include/asm/kvm_asm.h @@ -74,8 +74,6 @@ extern char __kvm_hyp_vector[]; extern char __kvm_hyp_code_start[]; extern char __kvm_hyp_code_end[]; -extern void __kvm_tlb_flush_vmid(struct kvm *kvm); - extern void __kvm_flush_vm_context(void); extern void __kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa); From 3be39e7f5aaca996a006717658c517650f345476 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 14 May 2013 12:11:37 +0100 Subject: [PATCH 1121/1368] ARM: KVM: use phys_addr_t instead of unsigned long long for HYP PGDs HYP PGDs are passed around as phys_addr_t, except just before calling into the hypervisor init code, where they are cast to a rather weird unsigned long long. Just keep them around as phys_addr_t, which is what makes the most sense. Reported-by: Catalin Marinas Acked-by: Catalin Marinas Signed-off-by: Marc Zyngier Signed-off-by: Christoffer Dall (cherry picked from commit dac288f7b38a7439502b77dabcdf8a9a5c4ae721) Signed-off-by: Christoffer Dall --- arch/arm/include/asm/kvm_host.h | 4 ++-- arch/arm/kvm/arm.c | 8 ++++---- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h index ff5aaf10e6ec..1f3cee2e210e 100644 --- a/arch/arm/include/asm/kvm_host.h +++ b/arch/arm/include/asm/kvm_host.h @@ -190,8 +190,8 @@ int kvm_arm_coproc_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *); int handle_exit(struct kvm_vcpu *vcpu, struct kvm_run *run, int exception_index); -static inline void __cpu_init_hyp_mode(unsigned long long boot_pgd_ptr, - unsigned long long pgd_ptr, +static inline void __cpu_init_hyp_mode(phys_addr_t boot_pgd_ptr, + phys_addr_t pgd_ptr, unsigned long hyp_stack_ptr, unsigned long vector_ptr) { diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c index ef1703b9587b..741f66a2edbd 100644 --- a/arch/arm/kvm/arm.c +++ b/arch/arm/kvm/arm.c @@ -800,8 +800,8 @@ long kvm_arch_vm_ioctl(struct file *filp, static void cpu_init_hyp_mode(void *dummy) { - unsigned long long boot_pgd_ptr; - unsigned long long pgd_ptr; + phys_addr_t boot_pgd_ptr; + phys_addr_t pgd_ptr; unsigned long hyp_stack_ptr; unsigned long stack_page; unsigned long vector_ptr; @@ -809,8 +809,8 @@ static void cpu_init_hyp_mode(void *dummy) /* Switch from the HYP stub to our own HYP init vector */ __hyp_set_vectors(kvm_get_idmap_vector()); - boot_pgd_ptr = (unsigned long long)kvm_mmu_get_boot_httbr(); - pgd_ptr = (unsigned long long)kvm_mmu_get_httbr(); + boot_pgd_ptr = kvm_mmu_get_boot_httbr(); + pgd_ptr = kvm_mmu_get_httbr(); stack_page = __get_cpu_var(kvm_arm_hyp_stack_page); hyp_stack_ptr = stack_page + PAGE_SIZE; vector_ptr = (unsigned long)__kvm_hyp_vector; From a7232858be9e29371ef8ed2a39658443344b7765 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 14 May 2013 12:11:38 +0100 Subject: [PATCH 1122/1368] ARM: KVM: don't special case PC when doing an MMIO Admitedly, reading a MMIO register to load PC is very weird. Writing PC to a MMIO register is probably even worse. But the architecture doesn't forbid any of these, and injecting a Prefetch Abort is the wrong thing to do anyway. Remove this check altogether, and let the adventurous guest wander into LaLaLand if they feel compelled to do so. Reported-by: Catalin Marinas Acked-by: Catalin Marinas Signed-off-by: Marc Zyngier Signed-off-by: Christoffer Dall (cherry picked from commit 8734f16fb2aa4ff0bb57ad6532661a38bc8ff957) Signed-off-by: Christoffer Dall --- arch/arm/include/asm/kvm_emulate.h | 5 ----- arch/arm/kvm/mmio.c | 6 ------ 2 files changed, 11 deletions(-) diff --git a/arch/arm/include/asm/kvm_emulate.h b/arch/arm/include/asm/kvm_emulate.h index 82b4babead2c..a464e8d7b6c5 100644 --- a/arch/arm/include/asm/kvm_emulate.h +++ b/arch/arm/include/asm/kvm_emulate.h @@ -65,11 +65,6 @@ static inline bool vcpu_mode_priv(struct kvm_vcpu *vcpu) return cpsr_mode > USR_MODE;; } -static inline bool kvm_vcpu_reg_is_pc(struct kvm_vcpu *vcpu, int reg) -{ - return reg == 15; -} - static inline u32 kvm_vcpu_get_hsr(struct kvm_vcpu *vcpu) { return vcpu->arch.fault.hsr; diff --git a/arch/arm/kvm/mmio.c b/arch/arm/kvm/mmio.c index 72a12f2171b2..b8e06b7a2833 100644 --- a/arch/arm/kvm/mmio.c +++ b/arch/arm/kvm/mmio.c @@ -86,12 +86,6 @@ static int decode_hsr(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, sign_extend = kvm_vcpu_dabt_issext(vcpu); rt = kvm_vcpu_dabt_get_rd(vcpu); - if (kvm_vcpu_reg_is_pc(vcpu, rt)) { - /* IO memory trying to read/write pc */ - kvm_inject_pabt(vcpu, kvm_vcpu_get_hfar(vcpu)); - return 1; - } - mmio->is_write = is_write; mmio->phys_addr = fault_ipa; mmio->len = len; From 99f2cf12576cffa1a0168b0cc05216de54ca12f4 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 14 May 2013 12:11:39 +0100 Subject: [PATCH 1123/1368] ARM: KVM: get rid of S2_PGD_SIZE S2_PGD_SIZE defines the number of pages used by a stage-2 PGD and is unused, except for a VM_BUG_ON check that missuses the define. As the check is very unlikely to ever triggered except in circumstances where KVM is the least of our worries, just kill both the define and the VM_BUG_ON check. Acked-by: Catalin Marinas Signed-off-by: Marc Zyngier Signed-off-by: Christoffer Dall (cherry picked from commit 4db845c3d8e2f8a219e8ac48834dd4fe085e5d63) Signed-off-by: Christoffer Dall --- arch/arm/include/asm/kvm_arm.h | 1 - arch/arm/kvm/mmu.c | 3 --- 2 files changed, 4 deletions(-) diff --git a/arch/arm/include/asm/kvm_arm.h b/arch/arm/include/asm/kvm_arm.h index 124623e5ef14..64e96960de29 100644 --- a/arch/arm/include/asm/kvm_arm.h +++ b/arch/arm/include/asm/kvm_arm.h @@ -135,7 +135,6 @@ #define KVM_PHYS_MASK (KVM_PHYS_SIZE - 1ULL) #define PTRS_PER_S2_PGD (1ULL << (KVM_PHYS_SHIFT - 30)) #define S2_PGD_ORDER get_order(PTRS_PER_S2_PGD * sizeof(pgd_t)) -#define S2_PGD_SIZE (1 << S2_PGD_ORDER) /* Virtualization Translation Control Register (VTCR) bits */ #define VTCR_SH0 (3 << 12) diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c index 84ba67b982c0..ca6bea4859b4 100644 --- a/arch/arm/kvm/mmu.c +++ b/arch/arm/kvm/mmu.c @@ -382,9 +382,6 @@ int kvm_alloc_stage2_pgd(struct kvm *kvm) if (!pgd) return -ENOMEM; - /* stage-2 pgd must be aligned to its size */ - VM_BUG_ON((unsigned long)pgd & (S2_PGD_SIZE - 1)); - memset(pgd, 0, PTRS_PER_S2_PGD * sizeof(pgd_t)); kvm_clean_pgd(pgd); kvm->arch.pgd = pgd; From d144c1d2cd86381c80187f5ccee5eb27dad6b21b Mon Sep 17 00:00:00 2001 From: Geoff Levand Date: Thu, 6 Jun 2013 18:02:54 -0700 Subject: [PATCH 1124/1368] arm/kvm: Cleanup KVM_ARM_MAX_VCPUS logic Commit d21a1c83c7595e387545632e44cd7797b76e19cc (ARM: KVM: define KVM_ARM_MAX_VCPUS unconditionally) changed the Kconfig logic for KVM_ARM_MAX_VCPUS to work around a build error arising from the use of KVM_ARM_MAX_VCPUS when CONFIG_KVM=n. The resulting Kconfig logic is a bit awkward and leaves a KVM_ARM_MAX_VCPUS always defined in the kernel config file. This change reverts the Kconfig logic back and adds a simple preprocessor conditional in kvm_host.h to handle when CONFIG_KVM_ARM_MAX_VCPUS is undefined. Signed-off-by: Geoff Levand Signed-off-by: Christoffer Dall (cherry picked from commit f2dda9d829818b055510187059cdfa4ece10c82d) Signed-off-by: Christoffer Dall --- arch/arm/include/asm/kvm_host.h | 5 +++++ arch/arm/kvm/Kconfig | 6 +++--- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h index 1f3cee2e210e..7d22517d8071 100644 --- a/arch/arm/include/asm/kvm_host.h +++ b/arch/arm/include/asm/kvm_host.h @@ -25,7 +25,12 @@ #include #include +#if defined(CONFIG_KVM_ARM_MAX_VCPUS) #define KVM_MAX_VCPUS CONFIG_KVM_ARM_MAX_VCPUS +#else +#define KVM_MAX_VCPUS 0 +#endif + #define KVM_USER_MEM_SLOTS 32 #define KVM_PRIVATE_MEM_SLOTS 4 #define KVM_COALESCED_MMIO_PAGE_OFFSET 1 diff --git a/arch/arm/kvm/Kconfig b/arch/arm/kvm/Kconfig index 370e1a8af6ac..49dd64e579c2 100644 --- a/arch/arm/kvm/Kconfig +++ b/arch/arm/kvm/Kconfig @@ -41,9 +41,9 @@ config KVM_ARM_HOST Provides host support for ARM processors. config KVM_ARM_MAX_VCPUS - int "Number maximum supported virtual CPUs per VM" if KVM_ARM_HOST - default 4 if KVM_ARM_HOST - default 0 + int "Number maximum supported virtual CPUs per VM" + depends on KVM_ARM_HOST + default 4 help Static number of max supported virtual CPUs per VM. From 8847470177f0d453fe41bc8de72b89a41a264830 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 21 Jun 2013 22:33:22 +0200 Subject: [PATCH 1125/1368] ARM: kvm: don't include drivers/virtio/Kconfig The virtio configuration has recently moved and is now visible everywhere. Including the file again from KVM as we used to need earlier now causes dependency problems: warning: (CAIF_VIRTIO && VIRTIO_PCI && VIRTIO_MMIO && REMOTEPROC && RPMSG) selects VIRTIO which has unmet direct dependencies (VIRTUALIZATION) Cc: Christoffer Dall Signed-off-by: Arnd Bergmann Signed-off-by: Christoffer Dall (cherry picked from commit 8bd4ffd6b3a98f00267051dc095076ea2ff06ea8) Signed-off-by: Christoffer Dall --- arch/arm/kvm/Kconfig | 2 -- 1 file changed, 2 deletions(-) diff --git a/arch/arm/kvm/Kconfig b/arch/arm/kvm/Kconfig index 49dd64e579c2..ebf5015508b5 100644 --- a/arch/arm/kvm/Kconfig +++ b/arch/arm/kvm/Kconfig @@ -67,6 +67,4 @@ config KVM_ARM_TIMER ---help--- Adds support for the Architected Timers in virtual machines -source drivers/virtio/Kconfig - endif # VIRTUALIZATION From 54993349e584212be7d554a2188f6fef3efeb701 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 4 Jul 2013 13:34:32 +0100 Subject: [PATCH 1126/1368] arm64: KVM: Kconfig integration Finally plug KVM/arm64 into the config system, making it possible to enable KVM support on AArch64 CPUs. Signed-off-by: Marc Zyngier Signed-off-by: Paolo Bonzini (cherry picked from commit c3eb5b14449a0949e9764d39374a2ea63faae14f) Signed-off-by: Christoffer Dall --- arch/arm64/Kconfig | 2 ++ arch/arm64/kernel/asm-offsets.c | 1 + arch/arm64/kvm/Kconfig | 51 +++++++++++++++++++++++++++++++++ 3 files changed, 54 insertions(+) create mode 100644 arch/arm64/kvm/Kconfig diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 43068cf44c2d..de9eeb43f622 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -275,6 +275,8 @@ source "drivers/Kconfig" source "fs/Kconfig" +source "arch/arm64/kvm/Kconfig" + source "arch/arm64/Kconfig.debug" source "security/Kconfig" diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c index 49c162c03b69..666e231d410b 100644 --- a/arch/arm64/kernel/asm-offsets.c +++ b/arch/arm64/kernel/asm-offsets.c @@ -21,6 +21,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/arm64/kvm/Kconfig b/arch/arm64/kvm/Kconfig new file mode 100644 index 000000000000..21e90820bd23 --- /dev/null +++ b/arch/arm64/kvm/Kconfig @@ -0,0 +1,51 @@ +# +# KVM configuration +# + +source "virt/kvm/Kconfig" + +menuconfig VIRTUALIZATION + bool "Virtualization" + ---help--- + Say Y here to get to see options for using your Linux host to run + other operating systems inside virtual machines (guests). + This option alone does not add any kernel code. + + If you say N, all options in this submenu will be skipped and + disabled. + +if VIRTUALIZATION + +config KVM + bool "Kernel-based Virtual Machine (KVM) support" + select MMU_NOTIFIER + select PREEMPT_NOTIFIERS + select ANON_INODES + select KVM_MMIO + select KVM_ARM_HOST + select KVM_ARM_VGIC + select KVM_ARM_TIMER + ---help--- + Support hosting virtualized guest machines. + + If unsure, say N. + +config KVM_ARM_HOST + bool + ---help--- + Provides host support for ARM processors. + +config KVM_ARM_VGIC + bool + depends on KVM_ARM_HOST && OF + select HAVE_KVM_IRQCHIP + ---help--- + Adds support for a hardware assisted, in-kernel GIC emulation. + +config KVM_ARM_TIMER + bool + depends on KVM_ARM_VGIC + ---help--- + Adds support for the Architected Timers in virtual machines. + +endif # VIRTUALIZATION From a043deb0317044bdeda876d2170de0b24c2bfba8 Mon Sep 17 00:00:00 2001 From: Takuya Yoshikawa Date: Thu, 4 Jul 2013 13:40:29 +0900 Subject: [PATCH 1127/1368] KVM: Introduce kvm_arch_memslots_updated() This is called right after the memslots is updated, i.e. when the result of update_memslots() gets installed in install_new_memslots(). Since the memslots needs to be updated twice when we delete or move a memslot, kvm_arch_commit_memory_region() does not correspond to this exactly. In the following patch, x86 will use this new API to check if the mmio generation has reached its maximum value, in which case mmio sptes need to be flushed out. Signed-off-by: Takuya Yoshikawa Acked-by: Alexander Graf Reviewed-by: Xiao Guangrong Signed-off-by: Paolo Bonzini (cherry picked from commit e59dbe09f8e6fb8f6ee19dc79d1a2f14299e4cd2) Signed-off-by: Christoffer Dall --- arch/arm/kvm/arm.c | 4 ++++ arch/ia64/kvm/kvm-ia64.c | 4 ++++ arch/mips/kvm/kvm_mips.c | 4 ++++ arch/powerpc/kvm/powerpc.c | 4 ++++ arch/s390/kvm/kvm-s390.c | 4 ++++ arch/x86/kvm/x86.c | 4 ++++ include/linux/kvm_host.h | 1 + virt/kvm/kvm_main.c | 5 ++++- 8 files changed, 29 insertions(+), 1 deletion(-) diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c index 741f66a2edbd..9c697db2787e 100644 --- a/arch/arm/kvm/arm.c +++ b/arch/arm/kvm/arm.c @@ -219,6 +219,10 @@ long kvm_arch_dev_ioctl(struct file *filp, return -EINVAL; } +void kvm_arch_memslots_updated(struct kvm *kvm) +{ +} + int kvm_arch_prepare_memory_region(struct kvm *kvm, struct kvm_memory_slot *memslot, struct kvm_userspace_memory_region *mem, diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c index 5b2dc0d10c8f..bdfd8789b376 100644 --- a/arch/ia64/kvm/kvm-ia64.c +++ b/arch/ia64/kvm/kvm-ia64.c @@ -1560,6 +1560,10 @@ int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages) return 0; } +void kvm_arch_memslots_updated(struct kvm *kvm) +{ +} + int kvm_arch_prepare_memory_region(struct kvm *kvm, struct kvm_memory_slot *memslot, struct kvm_userspace_memory_region *mem, diff --git a/arch/mips/kvm/kvm_mips.c b/arch/mips/kvm/kvm_mips.c index dd203e59e6fd..a7b044536de4 100644 --- a/arch/mips/kvm/kvm_mips.c +++ b/arch/mips/kvm/kvm_mips.c @@ -208,6 +208,10 @@ int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages) return 0; } +void kvm_arch_memslots_updated(struct kvm *kvm) +{ +} + int kvm_arch_prepare_memory_region(struct kvm *kvm, struct kvm_memory_slot *memslot, struct kvm_userspace_memory_region *mem, diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 6316ee336e88..ae63ae4a1a5f 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -420,6 +420,10 @@ int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages) return kvmppc_core_create_memslot(slot, npages); } +void kvm_arch_memslots_updated(struct kvm *kvm) +{ +} + int kvm_arch_prepare_memory_region(struct kvm *kvm, struct kvm_memory_slot *memslot, struct kvm_userspace_memory_region *mem, diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 698fb826e149..e515b2d4a947 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -981,6 +981,10 @@ int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages) return 0; } +void kvm_arch_memslots_updated(struct kvm *kvm) +{ +} + /* Section: memory related */ int kvm_arch_prepare_memory_region(struct kvm *kvm, struct kvm_memory_slot *memslot, diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index e8ba99c34180..894003d79d22 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -6976,6 +6976,10 @@ int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages) return -ENOMEM; } +void kvm_arch_memslots_updated(struct kvm *kvm) +{ +} + int kvm_arch_prepare_memory_region(struct kvm *kvm, struct kvm_memory_slot *memslot, struct kvm_userspace_memory_region *mem, diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 8db53cfaccdb..15018c572ac5 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -497,6 +497,7 @@ int __kvm_set_memory_region(struct kvm *kvm, void kvm_arch_free_memslot(struct kvm_memory_slot *free, struct kvm_memory_slot *dont); int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages); +void kvm_arch_memslots_updated(struct kvm *kvm); int kvm_arch_prepare_memory_region(struct kvm *kvm, struct kvm_memory_slot *memslot, struct kvm_userspace_memory_region *mem, diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index b547a1ceecbc..7ca9939ee9a7 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -731,7 +731,10 @@ static struct kvm_memslots *install_new_memslots(struct kvm *kvm, update_memslots(slots, new, kvm->memslots->generation); rcu_assign_pointer(kvm->memslots, slots); synchronize_srcu_expedited(&kvm->srcu); - return old_memslots; + + kvm_arch_memslots_updated(kvm); + + return old_memslots; } /* From 3b180da00d212c6968ab821dcf85f3776929a44d Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Tue, 6 Aug 2013 13:50:54 -0700 Subject: [PATCH 1128/1368] ARM: KVM: Fix unaligned unmap_range leak The unmap_range function did not properly cover the case when the start address was not aligned to PMD_SIZE or PUD_SIZE and an entire pte table or pmd table was cleared, causing us to leak memory when incrementing the addr. The fix is to always move onto the next page table entry boundary instead of adding the full size of the VA range covered by the corresponding table level entry. Acked-by: Marc Zyngier Signed-off-by: Christoffer Dall (cherry picked from commit d3840b26614d8ce3db53c98061d9fcb1b9ccb0dd) Signed-off-by: Christoffer Dall --- arch/arm/kvm/mmu.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c index ca6bea4859b4..80a83ec4a9ae 100644 --- a/arch/arm/kvm/mmu.c +++ b/arch/arm/kvm/mmu.c @@ -132,37 +132,37 @@ static void unmap_range(struct kvm *kvm, pgd_t *pgdp, pmd_t *pmd; pte_t *pte; unsigned long long addr = start, end = start + size; - u64 range; + u64 next; while (addr < end) { pgd = pgdp + pgd_index(addr); pud = pud_offset(pgd, addr); if (pud_none(*pud)) { - addr += PUD_SIZE; + addr = pud_addr_end(addr, end); continue; } pmd = pmd_offset(pud, addr); if (pmd_none(*pmd)) { - addr += PMD_SIZE; + addr = pmd_addr_end(addr, end); continue; } pte = pte_offset_kernel(pmd, addr); clear_pte_entry(kvm, pte, addr); - range = PAGE_SIZE; + next = addr + PAGE_SIZE; /* If we emptied the pte, walk back up the ladder */ if (pte_empty(pte)) { clear_pmd_entry(kvm, pmd, addr); - range = PMD_SIZE; + next = pmd_addr_end(addr, end); if (pmd_empty(pmd)) { clear_pud_entry(kvm, pud, addr); - range = PUD_SIZE; + next = pud_addr_end(addr, end); } } - addr += range; + addr = next; } } From ce7fc7403b536adb6b0871f3e4d07a4da08ad63b Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 6 Aug 2013 13:05:48 +0100 Subject: [PATCH 1129/1368] arm64: KVM: fix 2-level page tables unmapping When using 64kB pages, we only have two levels of page tables, meaning that PGD, PUD and PMD are fused. In this case, trying to refcount PUDs and PMDs independently is a a complete disaster, as they are the same. We manage to get it right for the allocation (stage2_set_pte uses {pmd,pud}_none), but the unmapping path clears both pud and pmd refcounts, which fails spectacularly with 2-level page tables. The fix is to avoid calling clear_pud_entry when both the pmd and pud pages are empty. For this, and instead of introducing another pud_empty function, consolidate both pte_empty and pmd_empty into page_empty (the code is actually identical) and use that to also test the validity of the pud. Signed-off-by: Marc Zyngier Signed-off-by: Christoffer Dall (cherry picked from commit 979acd5e18c3e5cb7e3308c699d79553af5af8c6) Signed-off-by: Christoffer Dall --- arch/arm/kvm/mmu.c | 22 ++++++++-------------- 1 file changed, 8 insertions(+), 14 deletions(-) diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c index 80a83ec4a9ae..0988d9e04dd4 100644 --- a/arch/arm/kvm/mmu.c +++ b/arch/arm/kvm/mmu.c @@ -85,6 +85,12 @@ static void *mmu_memory_cache_alloc(struct kvm_mmu_memory_cache *mc) return p; } +static bool page_empty(void *ptr) +{ + struct page *ptr_page = virt_to_page(ptr); + return page_count(ptr_page) == 1; +} + static void clear_pud_entry(struct kvm *kvm, pud_t *pud, phys_addr_t addr) { pmd_t *pmd_table = pmd_offset(pud, 0); @@ -103,12 +109,6 @@ static void clear_pmd_entry(struct kvm *kvm, pmd_t *pmd, phys_addr_t addr) put_page(virt_to_page(pmd)); } -static bool pmd_empty(pmd_t *pmd) -{ - struct page *pmd_page = virt_to_page(pmd); - return page_count(pmd_page) == 1; -} - static void clear_pte_entry(struct kvm *kvm, pte_t *pte, phys_addr_t addr) { if (pte_present(*pte)) { @@ -118,12 +118,6 @@ static void clear_pte_entry(struct kvm *kvm, pte_t *pte, phys_addr_t addr) } } -static bool pte_empty(pte_t *pte) -{ - struct page *pte_page = virt_to_page(pte); - return page_count(pte_page) == 1; -} - static void unmap_range(struct kvm *kvm, pgd_t *pgdp, unsigned long long start, u64 size) { @@ -153,10 +147,10 @@ static void unmap_range(struct kvm *kvm, pgd_t *pgdp, next = addr + PAGE_SIZE; /* If we emptied the pte, walk back up the ladder */ - if (pte_empty(pte)) { + if (page_empty(pte)) { clear_pmd_entry(kvm, pmd, addr); next = pmd_addr_end(addr, end); - if (pmd_empty(pmd)) { + if (page_empty(pmd) && !page_empty(pud)) { clear_pud_entry(kvm, pud, addr); next = pud_addr_end(addr, end); } From ebd362b7533b7866928c9bca7727c48558aedbd2 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Fri, 7 Jun 2013 11:02:34 +0100 Subject: [PATCH 1130/1368] arm64: KVM: perform save/restore of PAR_EL1 Not saving PAR_EL1 is an unfortunate oversight. If the guest performs an AT* operation and gets scheduled out before reading the result of the translation from PAREL1, it could become corrupted by another guest or the host. Saving this register is made slightly more complicated as KVM also uses it on the permission fault handling path, leading to an ugly "stash and restore" sequence. Fortunately, this is already a slow path so we don't really care. Also, Linux doesn't do any AT* operation, so Linux guests are not impacted by this bug. Signed-off-by: Marc Zyngier (cherry picked from commit 1bbd80549810637b7381ab0649ba7c7d62f1342a) Signed-off-by: Christoffer Dall --- arch/arm64/include/asm/kvm_asm.h | 17 ++++++++++------- arch/arm64/kvm/hyp.S | 10 ++++++++++ arch/arm64/kvm/sys_regs.c | 3 +++ 3 files changed, 23 insertions(+), 7 deletions(-) diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h index c92de4163eba..b25763bc0ec4 100644 --- a/arch/arm64/include/asm/kvm_asm.h +++ b/arch/arm64/include/asm/kvm_asm.h @@ -42,14 +42,15 @@ #define TPIDR_EL1 18 /* Thread ID, Privileged */ #define AMAIR_EL1 19 /* Aux Memory Attribute Indirection Register */ #define CNTKCTL_EL1 20 /* Timer Control Register (EL1) */ +#define PAR_EL1 21 /* Physical Address Register */ /* 32bit specific registers. Keep them at the end of the range */ -#define DACR32_EL2 21 /* Domain Access Control Register */ -#define IFSR32_EL2 22 /* Instruction Fault Status Register */ -#define FPEXC32_EL2 23 /* Floating-Point Exception Control Register */ -#define DBGVCR32_EL2 24 /* Debug Vector Catch Register */ -#define TEECR32_EL1 25 /* ThumbEE Configuration Register */ -#define TEEHBR32_EL1 26 /* ThumbEE Handler Base Register */ -#define NR_SYS_REGS 27 +#define DACR32_EL2 22 /* Domain Access Control Register */ +#define IFSR32_EL2 23 /* Instruction Fault Status Register */ +#define FPEXC32_EL2 24 /* Floating-Point Exception Control Register */ +#define DBGVCR32_EL2 25 /* Debug Vector Catch Register */ +#define TEECR32_EL1 26 /* ThumbEE Configuration Register */ +#define TEEHBR32_EL1 27 /* ThumbEE Handler Base Register */ +#define NR_SYS_REGS 28 /* 32bit mapping */ #define c0_MPIDR (MPIDR_EL1 * 2) /* MultiProcessor ID Register */ @@ -69,6 +70,8 @@ #define c5_AIFSR (AFSR1_EL1 * 2) /* Auxiliary Instr Fault Status R */ #define c6_DFAR (FAR_EL1 * 2) /* Data Fault Address Register */ #define c6_IFAR (c6_DFAR + 1) /* Instruction Fault Address Register */ +#define c7_PAR (PAR_EL1 * 2) /* Physical Address Register */ +#define c7_PAR_high (c7_PAR + 1) /* PAR top 32 bits */ #define c10_PRRR (MAIR_EL1 * 2) /* Primary Region Remap Register */ #define c10_NMRR (c10_PRRR + 1) /* Normal Memory Remap Register */ #define c12_VBAR (VBAR_EL1 * 2) /* Vector Base Address Register */ diff --git a/arch/arm64/kvm/hyp.S b/arch/arm64/kvm/hyp.S index ff985e3d8b72..218802f68b20 100644 --- a/arch/arm64/kvm/hyp.S +++ b/arch/arm64/kvm/hyp.S @@ -214,6 +214,7 @@ __kvm_hyp_code_start: mrs x21, tpidr_el1 mrs x22, amair_el1 mrs x23, cntkctl_el1 + mrs x24, par_el1 stp x4, x5, [x3] stp x6, x7, [x3, #16] @@ -225,6 +226,7 @@ __kvm_hyp_code_start: stp x18, x19, [x3, #112] stp x20, x21, [x3, #128] stp x22, x23, [x3, #144] + str x24, [x3, #160] .endm .macro restore_sysregs @@ -243,6 +245,7 @@ __kvm_hyp_code_start: ldp x18, x19, [x3, #112] ldp x20, x21, [x3, #128] ldp x22, x23, [x3, #144] + ldr x24, [x3, #160] msr vmpidr_el2, x4 msr csselr_el1, x5 @@ -264,6 +267,7 @@ __kvm_hyp_code_start: msr tpidr_el1, x21 msr amair_el1, x22 msr cntkctl_el1, x23 + msr par_el1, x24 .endm .macro skip_32bit_state tmp, target @@ -753,6 +757,10 @@ el1_trap: */ tbnz x1, #7, 1f // S1PTW is set + /* Preserve PAR_EL1 */ + mrs x3, par_el1 + push x3, xzr + /* * Permission fault, HPFAR_EL2 is invalid. * Resolve the IPA the hard way using the guest VA. @@ -766,6 +774,8 @@ el1_trap: /* Read result */ mrs x3, par_el1 + pop x0, xzr // Restore PAR_EL1 from the stack + msr par_el1, x0 tbnz x3, #0, 3f // Bail out if we failed the translation ubfx x3, x3, #12, #36 // Extract IPA lsl x3, x3, #4 // and present it like HPFAR diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index 94923609753b..02e9d09e1d80 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -211,6 +211,9 @@ static const struct sys_reg_desc sys_reg_descs[] = { /* FAR_EL1 */ { Op0(0b11), Op1(0b000), CRn(0b0110), CRm(0b0000), Op2(0b000), NULL, reset_unknown, FAR_EL1 }, + /* PAR_EL1 */ + { Op0(0b11), Op1(0b000), CRn(0b0111), CRm(0b0100), Op2(0b000), + NULL, reset_unknown, PAR_EL1 }, /* PMINTENSET_EL1 */ { Op0(0b11), Op1(0b000), CRn(0b1001), CRm(0b1110), Op2(0b001), From e336bcc2613d7332083819ac8f148313e9471f59 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 11 Jun 2013 18:05:25 +0100 Subject: [PATCH 1131/1368] arm64: KVM: add missing dsb before invalidating Stage-2 TLBs When performing a Stage-2 TLB invalidation, it is necessary to make sure the write to the page tables is observable by all CPUs. For this purpose, add dsb instructions to __kvm_tlb_flush_vmid_ipa and __kvm_flush_vm_context before doing the TLB invalidation itself. Signed-off-by: Marc Zyngier (cherry picked from commit f142e5eeb724cfbedd203b32b3b542d78dbe2545) Signed-off-by: Christoffer Dall --- arch/arm64/kvm/hyp.S | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/arm64/kvm/hyp.S b/arch/arm64/kvm/hyp.S index 218802f68b20..1ac0bbbdddb2 100644 --- a/arch/arm64/kvm/hyp.S +++ b/arch/arm64/kvm/hyp.S @@ -604,6 +604,8 @@ END(__kvm_vcpu_run) // void __kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa); ENTRY(__kvm_tlb_flush_vmid_ipa) + dsb ishst + kern_hyp_va x0 ldr x2, [x0, #KVM_VTTBR] msr vttbr_el2, x2 @@ -625,6 +627,7 @@ ENTRY(__kvm_tlb_flush_vmid_ipa) ENDPROC(__kvm_tlb_flush_vmid_ipa) ENTRY(__kvm_flush_vm_context) + dsb ishst tlbi alle1is ic ialluis dsb sy From fbcac5446f1c5be6e54e282e1dbed74c58671a72 Mon Sep 17 00:00:00 2001 From: Chen Gang Date: Mon, 22 Jul 2013 04:40:38 +0100 Subject: [PATCH 1132/1368] arm64: KVM: use 'int' instead of 'u32' for variable 'target' in kvm_host.h. 'target' will be set to '-1' in kvm_arch_vcpu_init(), and it need check 'target' whether less than zero or not in kvm_vcpu_initialized(). So need define target as 'int' instead of 'u32', just like ARM has done. The related warning: arch/arm64/kvm/../../../arch/arm/kvm/arm.c:497:2: warning: comparison of unsigned expression >= 0 is always true [-Wtype-limits] Signed-off-by: Chen Gang [Marc: reformated the Subject line to fit the series] Signed-off-by: Marc Zyngier (cherry picked from commit 6c8c0c4dc0e98ee2191211d66e9f876e95787073) Signed-off-by: Christoffer Dall --- arch/arm64/include/asm/kvm_host.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 644d73956864..0859a4ddd1e7 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -129,7 +129,7 @@ struct kvm_vcpu_arch { struct kvm_mmu_memory_cache mmu_page_cache; /* Target CPU and feature flags */ - u32 target; + int target; DECLARE_BITMAP(features, KVM_VCPU_MAX_FEATURES); /* Detect first run of a vcpu */ From 54118be422ac3ff0af613676d47f8d46cc9f8801 Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Mon, 29 Jul 2013 20:46:04 -0700 Subject: [PATCH 1133/1368] KVM: ARM: Squash len warning The 'len' variable was declared an unsigned and then checked for less than 0, which results in warnings on some compilers. Since len is assigned an int, make it an int. Signed-off-by: Christoffer Dall (cherry picked from commit 2184a60de26b94bc5a88de3e5a960ef9ff54ba5a) Signed-off-by: Christoffer Dall --- arch/arm/kvm/mmio.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/arm/kvm/mmio.c b/arch/arm/kvm/mmio.c index b8e06b7a2833..0c25d9487d53 100644 --- a/arch/arm/kvm/mmio.c +++ b/arch/arm/kvm/mmio.c @@ -63,7 +63,8 @@ int kvm_handle_mmio_return(struct kvm_vcpu *vcpu, struct kvm_run *run) static int decode_hsr(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, struct kvm_exit_mmio *mmio) { - unsigned long rt, len; + unsigned long rt; + int len; bool is_write, sign_extend; if (kvm_vcpu_dabt_isextabt(vcpu)) { From d829a739332e33ffc9753c607325f461e9c994d8 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Mon, 13 May 2013 12:08:06 +0100 Subject: [PATCH 1134/1368] ARM: kvm: use inner-shareable barriers after TLB flushing When flushing the TLB at PL2 in response to remapping at stage-2 or VMID rollover, we have a dsb instruction to ensure completion of the command before continuing. Since we only care about other processors for TLB invalidation, use the inner-shareable variant of the dsb instruction instead. Acked-by: Marc Zyngier Reviewed-by: Catalin Marinas Signed-off-by: Will Deacon (cherry picked from commit e3ab547f57bd626201d4b715b696c80ad1ef4ba2) Signed-off-by: Christoffer Dall --- arch/arm/kvm/init.S | 2 +- arch/arm/kvm/interrupts.S | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/arm/kvm/init.S b/arch/arm/kvm/init.S index f048338135f7..1b9844d369cc 100644 --- a/arch/arm/kvm/init.S +++ b/arch/arm/kvm/init.S @@ -142,7 +142,7 @@ target: @ We're now in the trampoline code, switch page tables @ Invalidate the old TLBs mcr p15, 4, r0, c8, c7, 0 @ TLBIALLH - dsb + dsb ish eret diff --git a/arch/arm/kvm/interrupts.S b/arch/arm/kvm/interrupts.S index 16cd4ba5d7fd..f85052facffc 100644 --- a/arch/arm/kvm/interrupts.S +++ b/arch/arm/kvm/interrupts.S @@ -55,7 +55,7 @@ ENTRY(__kvm_tlb_flush_vmid_ipa) mcrr p15, 6, r2, r3, c2 @ Write VTTBR isb mcr p15, 0, r0, c8, c3, 0 @ TLBIALLIS (rt ignored) - dsb + dsb ish isb mov r2, #0 mov r3, #0 @@ -79,7 +79,7 @@ ENTRY(__kvm_flush_vm_context) mcr p15, 4, r0, c8, c3, 4 /* Invalidate instruction caches Inner Shareable (ICIALLUIS) */ mcr p15, 0, r0, c7, c1, 0 - dsb + dsb ish isb @ Not necessary if followed by eret bx lr From 678b5999d8ded1bff60ef8e3bfb95c93d63c5ebc Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Tue, 6 Aug 2013 05:34:16 +0100 Subject: [PATCH 1135/1368] ARM: 7808/1: KVM: mm: Get rid of L_PTE_USER ref from PAGE_S2_DEVICE THe L_PTE_USER actually has nothing to do with stage 2 mappings and the L_PTE_S2_RDWR value sets the readable bit, which was what L_PTE_USER was used for before proper handling of stage 2 memory defines. Changelog: [v3]: Drop call to kvm_set_s2pte_writable in mmu.c [v2]: Change default mappings to be r/w instead of r/o, as per Marc Zyngier's suggestion. Cc: Marc Zyngier Signed-off-by: Christoffer Dall Signed-off-by: Russell King (cherry picked from commit 8947c09d05da9f0436f423518f449beaa5ea1bdc) Signed-off-by: Christoffer Dall --- arch/arm/include/asm/pgtable.h | 2 +- arch/arm/kvm/mmu.c | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/arm/include/asm/pgtable.h b/arch/arm/include/asm/pgtable.h index 9bcd262a9008..8afc60c55e82 100644 --- a/arch/arm/include/asm/pgtable.h +++ b/arch/arm/include/asm/pgtable.h @@ -97,7 +97,7 @@ extern pgprot_t pgprot_s2_device; #define PAGE_HYP _MOD_PROT(pgprot_kernel, L_PTE_HYP) #define PAGE_HYP_DEVICE _MOD_PROT(pgprot_hyp_device, L_PTE_HYP) #define PAGE_S2 _MOD_PROT(pgprot_s2, L_PTE_S2_RDONLY) -#define PAGE_S2_DEVICE _MOD_PROT(pgprot_s2_device, L_PTE_USER | L_PTE_S2_RDONLY) +#define PAGE_S2_DEVICE _MOD_PROT(pgprot_s2_device, L_PTE_S2_RDWR) #define __PAGE_NONE __pgprot(_L_PTE_DEFAULT | L_PTE_RDONLY | L_PTE_XN | L_PTE_NONE) #define __PAGE_SHARED __pgprot(_L_PTE_DEFAULT | L_PTE_USER | L_PTE_XN) diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c index 0988d9e04dd4..b0de86b56c13 100644 --- a/arch/arm/kvm/mmu.c +++ b/arch/arm/kvm/mmu.c @@ -489,7 +489,6 @@ int kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t guest_ipa, for (addr = guest_ipa; addr < end; addr += PAGE_SIZE) { pte_t pte = pfn_pte(pfn, PAGE_S2_DEVICE); - kvm_set_s2pte_writable(&pte); ret = mmu_topup_memory_cache(&cache, 2, 2); if (ret) From effab3928a14e5574f32801eb665ce7c6a98ef14 Mon Sep 17 00:00:00 2001 From: Yann Droneaud Date: Sat, 24 Aug 2013 22:14:07 +0200 Subject: [PATCH 1136/1368] kvm: use anon_inode_getfd() with O_CLOEXEC flag KVM uses anon_inode_get() to allocate file descriptors as part of some of its ioctls. But those ioctls are lacking a flag argument allowing userspace to choose options for the newly opened file descriptor. In such case it's advised to use O_CLOEXEC by default so that userspace is allowed to choose, without race, if the file descriptor is going to be inherited across exec(). This patch set O_CLOEXEC flag on all file descriptors created with anon_inode_getfd() to not leak file descriptors across exec(). Signed-off-by: Yann Droneaud Link: http://lkml.kernel.org/r/cover.1377372576.git.ydroneaud@opteya.com Reviewed-by: Paolo Bonzini Signed-off-by: Gleb Natapov (cherry picked from commit 24009b0549de563006705b9af8694fc8fc9a5aa1) Signed-off-by: Christoffer Dall --- virt/kvm/kvm_main.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 7ca9939ee9a7..b5afee319073 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -1896,7 +1896,7 @@ static struct file_operations kvm_vcpu_fops = { */ static int create_vcpu_fd(struct kvm_vcpu *vcpu) { - return anon_inode_getfd("kvm-vcpu", &kvm_vcpu_fops, vcpu, O_RDWR); + return anon_inode_getfd("kvm-vcpu", &kvm_vcpu_fops, vcpu, O_RDWR | O_CLOEXEC); } /* @@ -2305,7 +2305,7 @@ static int kvm_ioctl_create_device(struct kvm *kvm, return ret; } - ret = anon_inode_getfd(ops->name, &kvm_device_fops, dev, O_RDWR); + ret = anon_inode_getfd(ops->name, &kvm_device_fops, dev, O_RDWR | O_CLOEXEC); if (ret < 0) { ops->destroy(dev); return ret; @@ -2589,7 +2589,7 @@ static int kvm_dev_ioctl_create_vm(unsigned long type) return r; } #endif - r = anon_inode_getfd("kvm-vm", &kvm_vm_fops, kvm, O_RDWR); + r = anon_inode_getfd("kvm-vm", &kvm_vm_fops, kvm, O_RDWR | O_CLOEXEC); if (r < 0) kvm_put_kvm(kvm); From 4ace5f4542de34fb99068fa2285947d6bbfee26a Mon Sep 17 00:00:00 2001 From: Andrea Arcangeli Date: Thu, 25 Jul 2013 03:04:38 +0200 Subject: [PATCH 1137/1368] kvm: optimize away THP checks in kvm_is_mmio_pfn() The checks on PG_reserved in the page structure on head and tail pages aren't necessary because split_huge_page wouldn't transfer the PG_reserved bit from head to tail anyway. This was a forward-thinking check done in the case PageReserved was set by a driver-owned page mapped in userland with something like remap_pfn_range in a VM_PFNMAP region, but using hugepmds (not possible right now). It was meant to be very safe, but it's overkill as it's unlikely split_huge_page could ever run without the driver noticing and tearing down the hugepage itself. And if a driver in the future will really want to map a reserved hugepage in userland using an huge pmd it should simply take care of marking all subpages reserved too to keep KVM safe. This of course would require such a hypothetical driver to tear down the huge pmd itself and splitting the hugepage itself, instead of relaying on split_huge_page, but that sounds very reasonable, especially considering split_huge_page wouldn't currently transfer the reserved bit anyway. Signed-off-by: Andrea Arcangeli Signed-off-by: Gleb Natapov (cherry picked from commit 11feeb498086a3a5907b8148bdf1786a9b18fc55) Signed-off-by: Christoffer Dall --- virt/kvm/kvm_main.c | 24 ++---------------------- 1 file changed, 2 insertions(+), 22 deletions(-) diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index b5afee319073..6c9130bd16c8 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -102,28 +102,8 @@ static bool largepages_enabled = true; bool kvm_is_mmio_pfn(pfn_t pfn) { - if (pfn_valid(pfn)) { - int reserved; - struct page *tail = pfn_to_page(pfn); - struct page *head = compound_trans_head(tail); - reserved = PageReserved(head); - if (head != tail) { - /* - * "head" is not a dangling pointer - * (compound_trans_head takes care of that) - * but the hugepage may have been splitted - * from under us (and we may not hold a - * reference count on the head page so it can - * be reused before we run PageReferenced), so - * we've to check PageTail before returning - * what we just read. - */ - smp_rmb(); - if (PageTail(tail)) - return reserved; - } - return PageReserved(tail); - } + if (pfn_valid(pfn)) + return PageReserved(pfn_to_page(pfn)); return true; } From 1752d0c7e51f1a5e8d1f021d6b4eef1845c6da06 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 29 Aug 2013 11:08:22 +0100 Subject: [PATCH 1138/1368] ARM: KVM: vgic: simplify vgic_get_target_reg vgic_get_target_reg is quite complicated, for no good reason. Actually, it is fairly easy to write it in a much more efficient way by using the target CPU array instead of the bitmap. Signed-off-by: Marc Zyngier Signed-off-by: Gleb Natapov (cherry picked from commit 986af8e0789a41ac4844e6eefed4a33e86524918) Signed-off-by: Christoffer Dall --- virt/kvm/arm/vgic.c | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c index 17c5ac7d10ed..a2d478aec046 100644 --- a/virt/kvm/arm/vgic.c +++ b/virt/kvm/arm/vgic.c @@ -432,19 +432,13 @@ static bool handle_mmio_priority_reg(struct kvm_vcpu *vcpu, static u32 vgic_get_target_reg(struct kvm *kvm, int irq) { struct vgic_dist *dist = &kvm->arch.vgic; - struct kvm_vcpu *vcpu; - int i, c; - unsigned long *bmap; + int i; u32 val = 0; irq -= VGIC_NR_PRIVATE_IRQS; - kvm_for_each_vcpu(c, vcpu, kvm) { - bmap = vgic_bitmap_get_shared_map(&dist->irq_spi_target[c]); - for (i = 0; i < GICD_IRQS_PER_ITARGETSR; i++) - if (test_bit(irq + i, bmap)) - val |= 1 << (c + i * 8); - } + for (i = 0; i < GICD_IRQS_PER_ITARGETSR; i++) + val |= 1 << (dist->irq_spi_cpu[irq + i] + i * 8); return val; } From 635c887f2931a200a52694170122e2b62d1b6218 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 29 Aug 2013 11:08:23 +0100 Subject: [PATCH 1139/1368] ARM: KVM: vgic: fix GICD_ICFGRn access All the code in handle_mmio_cfg_reg() assumes the offset has been shifted right to accomodate for the 2:1 bit compression, but this is only done when getting the register address. Shift the offset early so the code works mostly unchanged. Reported-by: Zhaobo (Bob, ERC) Signed-off-by: Marc Zyngier Signed-off-by: Gleb Natapov (cherry picked from commit 6545eae3d7a1b6dc2edb8ede9107998aee1207ef) Signed-off-by: Christoffer Dall --- virt/kvm/arm/vgic.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c index a2d478aec046..902789ff4abb 100644 --- a/virt/kvm/arm/vgic.c +++ b/virt/kvm/arm/vgic.c @@ -541,8 +541,12 @@ static bool handle_mmio_cfg_reg(struct kvm_vcpu *vcpu, struct kvm_exit_mmio *mmio, phys_addr_t offset) { u32 val; - u32 *reg = vgic_bitmap_get_reg(&vcpu->kvm->arch.vgic.irq_cfg, - vcpu->vcpu_id, offset >> 1); + u32 *reg; + + offset >>= 1; + reg = vgic_bitmap_get_reg(&vcpu->kvm->arch.vgic.irq_cfg, + vcpu->vcpu_id, offset); + if (offset & 2) val = *reg >> 16; else From a144ec826c81facd70fd157aef6f7b7030687f68 Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Thu, 29 Aug 2013 11:08:24 +0100 Subject: [PATCH 1140/1368] ARM: KVM: Bugfix: vgic_bytemap_get_reg per cpu regs For bytemaps each IRQ field is 1 byte wide, so we pack 4 irq fields in one word and since there are 32 private (per cpu) irqs, we have 8 private u32 fields on the vgic_bytemap struct. We shift the offset from the base of the register group right by 2, giving us the word index instead of the field index. But then there are 8 private words, not 4, which is also why we subtract 8 words from the offset of the shared words. Signed-off-by: Christoffer Dall Signed-off-by: Marc Zyngier Signed-off-by: Gleb Natapov (cherry picked from commit 8d98915b6bda499e47d19166101d0bbcfd409c80) Signed-off-by: Christoffer Dall --- virt/kvm/arm/vgic.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c index 902789ff4abb..685fc72fc751 100644 --- a/virt/kvm/arm/vgic.c +++ b/virt/kvm/arm/vgic.c @@ -149,7 +149,7 @@ static u32 *vgic_bytemap_get_reg(struct vgic_bytemap *x, int cpuid, u32 offset) { offset >>= 2; BUG_ON(offset > (VGIC_NR_IRQS / 4)); - if (offset < 4) + if (offset < 8) return x->percpu[cpuid] + offset; else return x->shared + offset - 8; From de5324b84480246ad6064cd3469d362995c2e929 Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Thu, 29 Aug 2013 11:08:25 +0100 Subject: [PATCH 1141/1368] ARM: KVM: vgic: Bump VGIC_NR_IRQS to 256 The Versatile Express TC2 board, which we use as our main emulated platform in QEMU, defines 160+32 == 192 interrupts, so limiting the number of interrupts to 128 is not quite going to cut it for real board emulation. Note that this didn't use to be a problem because QEMU was buggy and only defined 128 interrupts until recently. Signed-off-by: Christoffer Dall Signed-off-by: Marc Zyngier Signed-off-by: Gleb Natapov (cherry picked from commit 9b2d2e0df8a49414b1e5bc89148c9984dd87782a) Signed-off-by: Christoffer Dall --- include/kvm/arm_vgic.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h index 343744e4809c..7e2d15837b02 100644 --- a/include/kvm/arm_vgic.h +++ b/include/kvm/arm_vgic.h @@ -26,7 +26,7 @@ #include #include -#define VGIC_NR_IRQS 128 +#define VGIC_NR_IRQS 256 #define VGIC_NR_SGIS 16 #define VGIC_NR_PPIS 16 #define VGIC_NR_PRIVATE_IRQS (VGIC_NR_SGIS + VGIC_NR_PPIS) From 07557caabbdc70c2644e6f195fd07ca8be7d16b2 Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Thu, 8 Aug 2013 20:35:07 -0700 Subject: [PATCH 1142/1368] ARM: KVM: Fix kvm_set_pte assignment THe kvm_set_pte function was actually assigning the entire struct to the structure member, which should work because the structure only has that one member, but it is still not very nice. Acked-by: Marc Zyngier Signed-off-by: Christoffer Dall (cherry picked from commit 0963e5d0f22f9d197dbf206d8b5b2a150722cf5e) Signed-off-by: Christoffer Dall --- arch/arm/include/asm/kvm_mmu.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/include/asm/kvm_mmu.h b/arch/arm/include/asm/kvm_mmu.h index 472ac7091003..9b28c41f4ba9 100644 --- a/arch/arm/include/asm/kvm_mmu.h +++ b/arch/arm/include/asm/kvm_mmu.h @@ -64,7 +64,7 @@ void kvm_clear_hyp_idmap(void); static inline void kvm_set_pte(pte_t *pte, pte_t new_pte) { - pte_val(*pte) = new_pte; + *pte = new_pte; /* * flush_pmd_entry just takes a void pointer and cleans the necessary * cache entries, so we can reuse the function for ptes. From 14cffe44b8aad9a839fcbcf35f9d2bbd90f572c0 Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Thu, 8 Aug 2013 20:34:22 -0700 Subject: [PATCH 1143/1368] ARM: KVM: Simplify tracepoint text The tracepoint for kvm_guest_fault was extremely long, make it a slightly bit shorter. Cc: Sergei Shtylyov Acked-by: Marc Zyngier Signed-off-by: Christoffer Dall (cherry picked from commit 6e72cc5700fe6b8776d537b736dab64b21ae0f1f) Signed-off-by: Christoffer Dall --- arch/arm/kvm/trace.h | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/arch/arm/kvm/trace.h b/arch/arm/kvm/trace.h index a8e73ed5ad5b..b1d640f78623 100644 --- a/arch/arm/kvm/trace.h +++ b/arch/arm/kvm/trace.h @@ -59,10 +59,9 @@ TRACE_EVENT(kvm_guest_fault, __entry->ipa = ipa; ), - TP_printk("guest fault at PC %#08lx (hxfar %#08lx, " - "ipa %#16llx, hsr %#08lx", - __entry->vcpu_pc, __entry->hxfar, - __entry->ipa, __entry->hsr) + TP_printk("ipa %#llx, hsr %#08lx, hxfar %#08lx, pc %#08lx", + __entry->ipa, __entry->hsr, + __entry->hxfar, __entry->vcpu_pc) ); TRACE_EVENT(kvm_irq_line, From 411b0c990125d54d48314d8ecd6e800bc9660509 Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Mon, 19 Aug 2013 14:16:57 -0700 Subject: [PATCH 1144/1368] ARM: KVM: Work around older compiler bug Compilers before 4.6 do not behave well with unnamed fields in structure initializers and therefore produces build errors: http://gcc.gnu.org/bugzilla/show_bug.cgi?id=10676 By refering to the unnamed union using braces, both older and newer compilers produce the same result. Acked-by: Marc Zyngier Reported-by: Russell King Tested-by: Russell King Signed-off-by: Christoffer Dall (cherry picked from commit 6833d83891140aedab7841589b7c7dbd7b600235) Signed-off-by: Christoffer Dall --- arch/arm/kvm/reset.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/kvm/reset.c b/arch/arm/kvm/reset.c index b7840e7aa452..71e08baee209 100644 --- a/arch/arm/kvm/reset.c +++ b/arch/arm/kvm/reset.c @@ -40,7 +40,7 @@ static struct kvm_regs a15_regs_reset = { }; static const struct kvm_irq_level a15_vtimer_irq = { - .irq = 27, + { .irq = 27 }, .level = 1, }; From 215ed7558daf77d296ee388f64df34b0d479ea07 Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Wed, 14 Aug 2013 12:33:48 -0700 Subject: [PATCH 1145/1368] ARM: KVM: Add newlines to panic strings The panic strings are hard to read and on narrow terminals some characters are simply truncated off the panic message. Make is slightly prettier with a newline in the Hyp panic strings. Acked-by: Marc Zyngier Signed-off-by: Christoffer Dall (cherry picked from commit 1fe40f6d39d23f39e643607a3e1883bfc74f1244) Signed-off-by: Christoffer Dall --- arch/arm/kvm/interrupts.S | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/arm/kvm/interrupts.S b/arch/arm/kvm/interrupts.S index f85052facffc..ddc15539bad2 100644 --- a/arch/arm/kvm/interrupts.S +++ b/arch/arm/kvm/interrupts.S @@ -492,10 +492,10 @@ __kvm_hyp_code_end: .section ".rodata" und_die_str: - .ascii "unexpected undefined exception in Hyp mode at: %#08x" + .ascii "unexpected undefined exception in Hyp mode at: %#08x\n" pabt_die_str: - .ascii "unexpected prefetch abort in Hyp mode at: %#08x" + .ascii "unexpected prefetch abort in Hyp mode at: %#08x\n" dabt_die_str: - .ascii "unexpected data abort in Hyp mode at: %#08x" + .ascii "unexpected data abort in Hyp mode at: %#08x\n" svc_die_str: - .ascii "unexpected HVC/SVC trap in Hyp mode at: %#08x" + .ascii "unexpected HVC/SVC trap in Hyp mode at: %#08x\n" From 02ef4f0c0d0930d9d44e9929caaee12043447010 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Mon, 9 Sep 2013 13:52:33 +0200 Subject: [PATCH 1146/1368] KVM: mmu: allow page tables to be in read-only slots Page tables in a read-only memory slot will currently cause a triple fault because the page walker uses gfn_to_hva and it fails on such a slot. OVMF uses such a page table; however, real hardware seems to be fine with that as long as the accessed/dirty bits are set. Save whether the slot is readonly, and later check it when updating the accessed and dirty bits. Reviewed-by: Xiao Guangrong Reviewed-by: Gleb Natapov Signed-off-by: Paolo Bonzini (cherry picked from commit ba6a3541545542721ce821d1e7e5ce35752e6fdf) Signed-off-by: Christoffer Dall --- arch/x86/kvm/paging_tmpl.h | 20 +++++++++++++++++++- include/linux/kvm_host.h | 1 + virt/kvm/kvm_main.c | 14 +++++++++----- 3 files changed, 29 insertions(+), 6 deletions(-) diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index da20860b457a..e1af2394a23f 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h @@ -69,6 +69,7 @@ struct guest_walker { pt_element_t prefetch_ptes[PTE_PREFETCH_NUM]; gpa_t pte_gpa[PT_MAX_FULL_LEVELS]; pt_element_t __user *ptep_user[PT_MAX_FULL_LEVELS]; + bool pte_writable[PT_MAX_FULL_LEVELS]; unsigned pt_access; unsigned pte_access; gfn_t gfn; @@ -130,6 +131,22 @@ static int FNAME(update_accessed_dirty_bits)(struct kvm_vcpu *vcpu, if (pte == orig_pte) continue; + /* + * If the slot is read-only, simply do not process the accessed + * and dirty bits. This is the correct thing to do if the slot + * is ROM, and page tables in read-as-ROM/write-as-MMIO slots + * are only supported if the accessed and dirty bits are already + * set in the ROM (so that MMIO writes are never needed). + * + * Note that NPT does not allow this at all and faults, since + * it always wants nested page table entries for the guest + * page tables to be writable. And EPT works but will simply + * overwrite the read-only memory to set the accessed and dirty + * bits. + */ + if (unlikely(!walker->pte_writable[level - 1])) + continue; + ret = FNAME(cmpxchg_gpte)(vcpu, mmu, ptep_user, index, orig_pte, pte); if (ret) return ret; @@ -204,7 +221,8 @@ static int FNAME(walk_addr_generic)(struct guest_walker *walker, goto error; real_gfn = gpa_to_gfn(real_gfn); - host_addr = gfn_to_hva(vcpu->kvm, real_gfn); + host_addr = gfn_to_hva_prot(vcpu->kvm, real_gfn, + &walker->pte_writable[walker->level - 1]); if (unlikely(kvm_is_error_hva(host_addr))) goto error; diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 15018c572ac5..e2befc2b1647 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -519,6 +519,7 @@ int gfn_to_page_many_atomic(struct kvm *kvm, gfn_t gfn, struct page **pages, struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn); unsigned long gfn_to_hva(struct kvm *kvm, gfn_t gfn); +unsigned long gfn_to_hva_prot(struct kvm *kvm, gfn_t gfn, bool *writable); unsigned long gfn_to_hva_memslot(struct kvm_memory_slot *slot, gfn_t gfn); void kvm_release_page_clean(struct page *page); void kvm_release_page_dirty(struct page *page); diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 6c9130bd16c8..fcbcf5312f93 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -1058,11 +1058,15 @@ unsigned long gfn_to_hva(struct kvm *kvm, gfn_t gfn) EXPORT_SYMBOL_GPL(gfn_to_hva); /* - * The hva returned by this function is only allowed to be read. - * It should pair with kvm_read_hva() or kvm_read_hva_atomic(). + * If writable is set to false, the hva returned by this function is only + * allowed to be read. */ -static unsigned long gfn_to_hva_read(struct kvm *kvm, gfn_t gfn) +unsigned long gfn_to_hva_prot(struct kvm *kvm, gfn_t gfn, bool *writable) { + struct kvm_memory_slot *slot = gfn_to_memslot(kvm, gfn); + if (writable) + *writable = !memslot_is_readonly(slot); + return __gfn_to_hva_many(gfn_to_memslot(kvm, gfn), gfn, NULL, false); } @@ -1430,7 +1434,7 @@ int kvm_read_guest_page(struct kvm *kvm, gfn_t gfn, void *data, int offset, int r; unsigned long addr; - addr = gfn_to_hva_read(kvm, gfn); + addr = gfn_to_hva_prot(kvm, gfn, NULL); if (kvm_is_error_hva(addr)) return -EFAULT; r = kvm_read_hva(data, (void __user *)addr + offset, len); @@ -1468,7 +1472,7 @@ int kvm_read_guest_atomic(struct kvm *kvm, gpa_t gpa, void *data, gfn_t gfn = gpa >> PAGE_SHIFT; int offset = offset_in_page(gpa); - addr = gfn_to_hva_read(kvm, gfn); + addr = gfn_to_hva_prot(kvm, gfn, NULL); if (kvm_is_error_hva(addr)) return -EFAULT; pagefault_disable(); From ed363014a225b257a32ad69b1ef0da615aecb50a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Radim=20Kr=C4=8Dm=C3=A1=C5=99?= Date: Wed, 4 Sep 2013 22:32:23 +0200 Subject: [PATCH 1147/1368] kvm: free resources after canceling async_pf MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When we cancel 'async_pf_execute()', we should behave as if the work was never scheduled in 'kvm_setup_async_pf()'. Fixes a bug when we can't unload module because the vm wasn't destroyed. Signed-off-by: Radim Krčmář Reviewed-by: Paolo Bonzini Reviewed-by: Gleb Natapov Signed-off-by: Paolo Bonzini (cherry picked from commit 28b441e24088081c1e213139d1303b451a34a4f4) Signed-off-by: Christoffer Dall --- virt/kvm/async_pf.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/virt/kvm/async_pf.c b/virt/kvm/async_pf.c index ea475cd03511..8a39dda7a325 100644 --- a/virt/kvm/async_pf.c +++ b/virt/kvm/async_pf.c @@ -101,8 +101,11 @@ void kvm_clear_async_pf_completion_queue(struct kvm_vcpu *vcpu) typeof(*work), queue); cancel_work_sync(&work->work); list_del(&work->queue); - if (!work->done) /* work was canceled */ + if (!work->done) { /* work was canceled */ + mmdrop(work->mm); + kvm_put_kvm(vcpu->kvm); /* == work->vcpu->kvm */ kmem_cache_free(async_pf_cache, work); + } } spin_lock(&vcpu->async_pf.lock); From 78169fda11a0b68310885d8991399bc3a51e5b2d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Radim=20Kr=C4=8Dm=C3=A1=C5=99?= Date: Wed, 4 Sep 2013 22:32:24 +0200 Subject: [PATCH 1148/1368] kvm: remove .done from struct kvm_async_pf MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit '.done' is used to mark the completion of 'async_pf_execute()', but 'cancel_work_sync()' returns true when the work was canceled, so we use it instead. Signed-off-by: Radim Krčmář Reviewed-by: Paolo Bonzini Reviewed-by: Gleb Natapov Signed-off-by: Paolo Bonzini (cherry picked from commit 98fda169290b3b28c0f2db2b8f02290c13da50ef) Signed-off-by: Christoffer Dall --- include/linux/kvm_host.h | 1 - virt/kvm/async_pf.c | 5 +---- 2 files changed, 1 insertion(+), 5 deletions(-) diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index e2befc2b1647..dbbd78215204 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -176,7 +176,6 @@ struct kvm_async_pf { unsigned long addr; struct kvm_arch_async_pf arch; struct page *page; - bool done; }; void kvm_clear_async_pf_completion_queue(struct kvm_vcpu *vcpu); diff --git a/virt/kvm/async_pf.c b/virt/kvm/async_pf.c index 8a39dda7a325..b197950ac4d5 100644 --- a/virt/kvm/async_pf.c +++ b/virt/kvm/async_pf.c @@ -75,7 +75,6 @@ static void async_pf_execute(struct work_struct *work) spin_lock(&vcpu->async_pf.lock); list_add_tail(&apf->link, &vcpu->async_pf.done); apf->page = page; - apf->done = true; spin_unlock(&vcpu->async_pf.lock); /* @@ -99,9 +98,8 @@ void kvm_clear_async_pf_completion_queue(struct kvm_vcpu *vcpu) struct kvm_async_pf *work = list_entry(vcpu->async_pf.queue.next, typeof(*work), queue); - cancel_work_sync(&work->work); list_del(&work->queue); - if (!work->done) { /* work was canceled */ + if (cancel_work_sync(&work->work)) { mmdrop(work->mm); kvm_put_kvm(vcpu->kvm); /* == work->vcpu->kvm */ kmem_cache_free(async_pf_cache, work); @@ -166,7 +164,6 @@ int kvm_setup_async_pf(struct kvm_vcpu *vcpu, gva_t gva, gfn_t gfn, return 0; work->page = NULL; - work->done = false; work->vcpu = vcpu; work->gva = gva; work->addr = gfn_to_hva(vcpu->kvm, gfn); From c3832c083abf356e0df1b581df22ee8a21034841 Mon Sep 17 00:00:00 2001 From: Olof Johansson Date: Wed, 11 Sep 2013 15:27:41 -0700 Subject: [PATCH 1149/1368] ARM: kvm: rename cpu_reset to avoid name clash cpu_reset is already #defined in as processor.reset, so it expands here and causes problems. Cc: Signed-off-by: Olof Johansson Signed-off-by: Christoffer Dall (cherry picked from commit ac570e0493815e0b41681c89cb50d66421429d27) Signed-off-by: Christoffer Dall --- arch/arm/kvm/reset.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/arm/kvm/reset.c b/arch/arm/kvm/reset.c index 71e08baee209..c02ba4af599f 100644 --- a/arch/arm/kvm/reset.c +++ b/arch/arm/kvm/reset.c @@ -58,14 +58,14 @@ static const struct kvm_irq_level a15_vtimer_irq = { */ int kvm_reset_vcpu(struct kvm_vcpu *vcpu) { - struct kvm_regs *cpu_reset; + struct kvm_regs *reset_regs; const struct kvm_irq_level *cpu_vtimer_irq; switch (vcpu->arch.target) { case KVM_ARM_TARGET_CORTEX_A15: if (vcpu->vcpu_id > a15_max_cpu_idx) return -EINVAL; - cpu_reset = &a15_regs_reset; + reset_regs = &a15_regs_reset; vcpu->arch.midr = read_cpuid_id(); cpu_vtimer_irq = &a15_vtimer_irq; break; @@ -74,7 +74,7 @@ int kvm_reset_vcpu(struct kvm_vcpu *vcpu) } /* Reset core registers */ - memcpy(&vcpu->arch.regs, cpu_reset, sizeof(vcpu->arch.regs)); + memcpy(&vcpu->arch.regs, reset_regs, sizeof(vcpu->arch.regs)); /* Reset CP15 registers */ kvm_reset_coprocs(vcpu); From ae0e4b34f8d96028faefddb83994cfd2c8cb8681 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Tue, 10 Sep 2013 12:57:17 +0200 Subject: [PATCH 1150/1368] KVM: cleanup (physical) CPU hotplug Remove the useless argument, and do not do anything if there are no VMs running at the time of the hotplug. Cc: kvm@vger.kernel.org Cc: gleb@redhat.com Cc: jan.kiszka@siemens.com Reviewed-by: Gleb Natapov Signed-off-by: Paolo Bonzini (cherry picked from commit 4fa92fb25ae5a2d79d872ab54df511c831b1f363) Signed-off-by: Christoffer Dall --- virt/kvm/kvm_main.c | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index fcbcf5312f93..7103b989ba54 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -2681,10 +2681,11 @@ static void hardware_enable_nolock(void *junk) } } -static void hardware_enable(void *junk) +static void hardware_enable(void) { raw_spin_lock(&kvm_lock); - hardware_enable_nolock(junk); + if (kvm_usage_count) + hardware_enable_nolock(NULL); raw_spin_unlock(&kvm_lock); } @@ -2698,10 +2699,11 @@ static void hardware_disable_nolock(void *junk) kvm_arch_hardware_disable(NULL); } -static void hardware_disable(void *junk) +static void hardware_disable(void) { raw_spin_lock(&kvm_lock); - hardware_disable_nolock(junk); + if (kvm_usage_count) + hardware_disable_nolock(NULL); raw_spin_unlock(&kvm_lock); } @@ -2748,20 +2750,17 @@ static int kvm_cpu_hotplug(struct notifier_block *notifier, unsigned long val, { int cpu = (long)v; - if (!kvm_usage_count) - return NOTIFY_OK; - val &= ~CPU_TASKS_FROZEN; switch (val) { case CPU_DYING: printk(KERN_INFO "kvm: disabling virtualization on CPU%d\n", cpu); - hardware_disable(NULL); + hardware_disable(); break; case CPU_STARTING: printk(KERN_INFO "kvm: enabling virtualization on CPU%d\n", cpu); - hardware_enable(NULL); + hardware_enable(); break; } return NOTIFY_OK; From 753f251708b5322430cf4902264d432a5daf06e4 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Tue, 10 Sep 2013 12:58:35 +0200 Subject: [PATCH 1151/1368] KVM: protect kvm_usage_count with its own spinlock The VM list need not be protected by a raw spinlock. Separate the two so that kvm_lock can be made non-raw. Cc: kvm@vger.kernel.org Cc: gleb@redhat.com Cc: jan.kiszka@siemens.com Reviewed-by: Gleb Natapov Signed-off-by: Paolo Bonzini (cherry picked from commit 4a937f96f3a29c58b7edd349d2e4dfac371efdf2) Signed-off-by: Christoffer Dall --- Documentation/virtual/kvm/locking.txt | 6 +++++- virt/kvm/kvm_main.c | 19 ++++++++++--------- 2 files changed, 15 insertions(+), 10 deletions(-) diff --git a/Documentation/virtual/kvm/locking.txt b/Documentation/virtual/kvm/locking.txt index 41b7ac9884b5..b1f5de22d090 100644 --- a/Documentation/virtual/kvm/locking.txt +++ b/Documentation/virtual/kvm/locking.txt @@ -135,7 +135,11 @@ Name: kvm_lock Type: raw_spinlock Arch: any Protects: - vm_list - - hardware virtualization enable/disable + +Name: kvm_count_lock +Type: raw_spinlock_t +Arch: any +Protects: - hardware virtualization enable/disable Comment: 'raw' because hardware enabling/disabling must be atomic /wrt migration. diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 7103b989ba54..6bc0481bf8e5 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -71,6 +71,7 @@ MODULE_LICENSE("GPL"); */ DEFINE_RAW_SPINLOCK(kvm_lock); +static DEFINE_RAW_SPINLOCK(kvm_count_lock); LIST_HEAD(vm_list); static cpumask_var_t cpus_hardware_enabled; @@ -2683,10 +2684,10 @@ static void hardware_enable_nolock(void *junk) static void hardware_enable(void) { - raw_spin_lock(&kvm_lock); + raw_spin_lock(&kvm_count_lock); if (kvm_usage_count) hardware_enable_nolock(NULL); - raw_spin_unlock(&kvm_lock); + raw_spin_unlock(&kvm_count_lock); } static void hardware_disable_nolock(void *junk) @@ -2701,10 +2702,10 @@ static void hardware_disable_nolock(void *junk) static void hardware_disable(void) { - raw_spin_lock(&kvm_lock); + raw_spin_lock(&kvm_count_lock); if (kvm_usage_count) hardware_disable_nolock(NULL); - raw_spin_unlock(&kvm_lock); + raw_spin_unlock(&kvm_count_lock); } static void hardware_disable_all_nolock(void) @@ -2718,16 +2719,16 @@ static void hardware_disable_all_nolock(void) static void hardware_disable_all(void) { - raw_spin_lock(&kvm_lock); + raw_spin_lock(&kvm_count_lock); hardware_disable_all_nolock(); - raw_spin_unlock(&kvm_lock); + raw_spin_unlock(&kvm_count_lock); } static int hardware_enable_all(void) { int r = 0; - raw_spin_lock(&kvm_lock); + raw_spin_lock(&kvm_count_lock); kvm_usage_count++; if (kvm_usage_count == 1) { @@ -2740,7 +2741,7 @@ static int hardware_enable_all(void) } } - raw_spin_unlock(&kvm_lock); + raw_spin_unlock(&kvm_count_lock); return r; } @@ -3050,7 +3051,7 @@ static int kvm_suspend(void) static void kvm_resume(void) { if (kvm_usage_count) { - WARN_ON(raw_spin_is_locked(&kvm_lock)); + WARN_ON(raw_spin_is_locked(&kvm_count_lock)); hardware_enable_nolock(NULL); } } From 9c904866d4424d18852872878a57a899e48f3838 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Wed, 25 Sep 2013 13:53:07 +0200 Subject: [PATCH 1152/1368] KVM: Convert kvm_lock back to non-raw spinlock In commit e935b8372cf8 ("KVM: Convert kvm_lock to raw_spinlock"), the kvm_lock was made a raw lock. However, the kvm mmu_shrink() function tries to grab the (non-raw) mmu_lock within the scope of the raw locked kvm_lock being held. This leads to the following: BUG: sleeping function called from invalid context at kernel/rtmutex.c:659 in_atomic(): 1, irqs_disabled(): 0, pid: 55, name: kswapd0 Preemption disabled at:[] mmu_shrink+0x5c/0x1b0 [kvm] Pid: 55, comm: kswapd0 Not tainted 3.4.34_preempt-rt Call Trace: [] __might_sleep+0xfd/0x160 [] rt_spin_lock+0x24/0x50 [] mmu_shrink+0xec/0x1b0 [kvm] [] shrink_slab+0x17d/0x3a0 [] ? mem_cgroup_iter+0x130/0x260 [] balance_pgdat+0x54a/0x730 [] ? set_pgdat_percpu_threshold+0xa7/0xd0 [] kswapd+0x18f/0x490 [] ? get_parent_ip+0x11/0x50 [] ? __init_waitqueue_head+0x50/0x50 [] ? balance_pgdat+0x730/0x730 [] kthread+0xdb/0xe0 [] ? finish_task_switch+0x52/0x100 [] kernel_thread_helper+0x4/0x10 [] ? __init_kthread_worker+0x After the previous patch, kvm_lock need not be a raw spinlock anymore, so change it back. Reported-by: Paul Gortmaker Cc: kvm@vger.kernel.org Cc: gleb@redhat.com Cc: jan.kiszka@siemens.com Reviewed-by: Gleb Natapov Signed-off-by: Paolo Bonzini (cherry picked from commit 2f303b74a62fb74983c0a66e2df353be963c527c) Signed-off-by: Christoffer Dall --- Documentation/virtual/kvm/locking.txt | 2 +- arch/x86/kvm/mmu.c | 4 ++-- arch/x86/kvm/x86.c | 8 ++++---- include/linux/kvm_host.h | 2 +- virt/kvm/kvm_main.c | 18 +++++++++--------- 5 files changed, 17 insertions(+), 17 deletions(-) diff --git a/Documentation/virtual/kvm/locking.txt b/Documentation/virtual/kvm/locking.txt index b1f5de22d090..ba035c33d01c 100644 --- a/Documentation/virtual/kvm/locking.txt +++ b/Documentation/virtual/kvm/locking.txt @@ -132,7 +132,7 @@ See the comments in spte_has_volatile_bits() and mmu_spte_update(). ------------ Name: kvm_lock -Type: raw_spinlock +Type: spinlock_t Arch: any Protects: - vm_list diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 004cc87b781c..3c1877bbfe6a 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -4220,7 +4220,7 @@ static int mmu_shrink(struct shrinker *shrink, struct shrink_control *sc) if (nr_to_scan == 0) goto out; - raw_spin_lock(&kvm_lock); + spin_lock(&kvm_lock); list_for_each_entry(kvm, &vm_list, vm_list) { int idx; @@ -4256,7 +4256,7 @@ static int mmu_shrink(struct shrinker *shrink, struct shrink_control *sc) break; } - raw_spin_unlock(&kvm_lock); + spin_unlock(&kvm_lock); out: return percpu_counter_read_positive(&kvm_total_used_mmu_pages); diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 894003d79d22..96765c116c26 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -5104,7 +5104,7 @@ static int kvmclock_cpufreq_notifier(struct notifier_block *nb, unsigned long va smp_call_function_single(freq->cpu, tsc_khz_changed, freq, 1); - raw_spin_lock(&kvm_lock); + spin_lock(&kvm_lock); list_for_each_entry(kvm, &vm_list, vm_list) { kvm_for_each_vcpu(i, vcpu, kvm) { if (vcpu->cpu != freq->cpu) @@ -5114,7 +5114,7 @@ static int kvmclock_cpufreq_notifier(struct notifier_block *nb, unsigned long va send_ipi = 1; } } - raw_spin_unlock(&kvm_lock); + spin_unlock(&kvm_lock); if (freq->old < freq->new && send_ipi) { /* @@ -5261,12 +5261,12 @@ static void pvclock_gtod_update_fn(struct work_struct *work) struct kvm_vcpu *vcpu; int i; - raw_spin_lock(&kvm_lock); + spin_lock(&kvm_lock); list_for_each_entry(kvm, &vm_list, vm_list) kvm_for_each_vcpu(i, vcpu, kvm) set_bit(KVM_REQ_MASTERCLOCK_UPDATE, &vcpu->requests); atomic_set(&kvm_guest_has_master_clock, 0); - raw_spin_unlock(&kvm_lock); + spin_unlock(&kvm_lock); } static DECLARE_WORK(pvclock_gtod_work, pvclock_gtod_update_fn); diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index dbbd78215204..97e39fc02020 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -133,7 +133,7 @@ struct kvm; struct kvm_vcpu; extern struct kmem_cache *kvm_vcpu_cache; -extern raw_spinlock_t kvm_lock; +extern spinlock_t kvm_lock; extern struct list_head vm_list; struct kvm_io_range { diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 6bc0481bf8e5..8b47fd241a61 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -70,7 +70,7 @@ MODULE_LICENSE("GPL"); * kvm->lock --> kvm->slots_lock --> kvm->irq_lock */ -DEFINE_RAW_SPINLOCK(kvm_lock); +DEFINE_SPINLOCK(kvm_lock); static DEFINE_RAW_SPINLOCK(kvm_count_lock); LIST_HEAD(vm_list); @@ -491,9 +491,9 @@ static struct kvm *kvm_create_vm(unsigned long type) if (r) goto out_err; - raw_spin_lock(&kvm_lock); + spin_lock(&kvm_lock); list_add(&kvm->vm_list, &vm_list); - raw_spin_unlock(&kvm_lock); + spin_unlock(&kvm_lock); return kvm; @@ -582,9 +582,9 @@ static void kvm_destroy_vm(struct kvm *kvm) struct mm_struct *mm = kvm->mm; kvm_arch_sync_events(kvm); - raw_spin_lock(&kvm_lock); + spin_lock(&kvm_lock); list_del(&kvm->vm_list); - raw_spin_unlock(&kvm_lock); + spin_unlock(&kvm_lock); kvm_free_irq_routing(kvm); for (i = 0; i < KVM_NR_BUSES; i++) kvm_io_bus_destroy(kvm->buses[i]); @@ -2974,10 +2974,10 @@ static int vm_stat_get(void *_offset, u64 *val) struct kvm *kvm; *val = 0; - raw_spin_lock(&kvm_lock); + spin_lock(&kvm_lock); list_for_each_entry(kvm, &vm_list, vm_list) *val += *(u32 *)((void *)kvm + offset); - raw_spin_unlock(&kvm_lock); + spin_unlock(&kvm_lock); return 0; } @@ -2991,12 +2991,12 @@ static int vcpu_stat_get(void *_offset, u64 *val) int i; *val = 0; - raw_spin_lock(&kvm_lock); + spin_lock(&kvm_lock); list_for_each_entry(kvm, &vm_list, vm_list) kvm_for_each_vcpu(i, vcpu, kvm) *val += *(u32 *)((void *)vcpu + offset); - raw_spin_unlock(&kvm_lock); + spin_unlock(&kvm_lock); return 0; } From 5c1d6aafed853c88a645ac52134e217389c2cc8a Mon Sep 17 00:00:00 2001 From: Anup Patel Date: Wed, 11 Sep 2013 18:34:22 +0530 Subject: [PATCH 1153/1368] KVM: ARM: Fix typo in comments of inject_abt() Very minor typo in comments of inject_abt() when we update fault status register for injecting prefetch abort. Signed-off-by: Anup Patel Signed-off-by: Pranavkumar Sawargaonkar Signed-off-by: Christoffer Dall (cherry picked from commit b373e492f3a3469c615c2ae218d2f723900bf981) Signed-off-by: Christoffer Dall --- arch/arm/kvm/emulate.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/kvm/emulate.c b/arch/arm/kvm/emulate.c index bdede9e7da51..d6c005283678 100644 --- a/arch/arm/kvm/emulate.c +++ b/arch/arm/kvm/emulate.c @@ -354,7 +354,7 @@ static void inject_abt(struct kvm_vcpu *vcpu, bool is_pabt, unsigned long addr) *vcpu_pc(vcpu) = exc_vector_base(vcpu) + vect_offset; if (is_pabt) { - /* Set DFAR and DFSR */ + /* Set IFAR and IFSR */ vcpu->arch.cp15[c6_IFAR] = addr; is_lpae = (vcpu->arch.cp15[c2_TTBCR] >> 31); /* Always give debug fault for now - should give guest a clue */ From f166457e087b577af2ee72ec843d43ef6653e0f8 Mon Sep 17 00:00:00 2001 From: Anup Patel Date: Mon, 30 Sep 2013 14:20:05 +0530 Subject: [PATCH 1154/1368] ARM: KVM: Implement kvm_vcpu_preferred_target() function This patch implements kvm_vcpu_preferred_target() function for KVM ARM which will help us implement KVM_ARM_PREFERRED_TARGET ioctl for user space. Signed-off-by: Anup Patel Signed-off-by: Pranavkumar Sawargaonkar Signed-off-by: Christoffer Dall (cherry picked from commit 4a6fee805d5e278e4733bf933cb5b184b7a8be1f) Signed-off-by: Christoffer Dall --- arch/arm/include/asm/kvm_host.h | 1 + arch/arm/kvm/guest.c | 20 ++++++++++++++++++++ 2 files changed, 21 insertions(+) diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h index 7d22517d8071..76f3c1978442 100644 --- a/arch/arm/include/asm/kvm_host.h +++ b/arch/arm/include/asm/kvm_host.h @@ -154,6 +154,7 @@ struct kvm_vcpu_stat { struct kvm_vcpu_init; int kvm_vcpu_set_target(struct kvm_vcpu *vcpu, const struct kvm_vcpu_init *init); +int kvm_vcpu_preferred_target(struct kvm_vcpu_init *init); unsigned long kvm_arm_num_regs(struct kvm_vcpu *vcpu); int kvm_arm_copy_reg_indices(struct kvm_vcpu *vcpu, u64 __user *indices); struct kvm_one_reg; diff --git a/arch/arm/kvm/guest.c b/arch/arm/kvm/guest.c index 152d03612181..ec98209fda71 100644 --- a/arch/arm/kvm/guest.c +++ b/arch/arm/kvm/guest.c @@ -222,6 +222,26 @@ int kvm_vcpu_set_target(struct kvm_vcpu *vcpu, return kvm_reset_vcpu(vcpu); } +int kvm_vcpu_preferred_target(struct kvm_vcpu_init *init) +{ + int target = kvm_target_cpu(); + + if (target < 0) + return -ENODEV; + + memset(init, 0, sizeof(*init)); + + /* + * For now, we don't return any features. + * In future, we might use features to return target + * specific features available for the preferred + * target type. + */ + init->target = (__u32)target; + + return 0; +} + int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) { return -EINVAL; From 9778336fe11f088c3ddbd12714ab88f5760a1de4 Mon Sep 17 00:00:00 2001 From: Anup Patel Date: Mon, 30 Sep 2013 14:20:06 +0530 Subject: [PATCH 1155/1368] ARM64: KVM: Implement kvm_vcpu_preferred_target() function This patch implements kvm_vcpu_preferred_target() function for KVM ARM64 which will help us implement KVM_ARM_PREFERRED_TARGET ioctl for user space. Signed-off-by: Anup Patel Signed-off-by: Pranavkumar Sawargaonkar Signed-off-by: Christoffer Dall (cherry picked from commit 473bdc0e6565ebb22455657a40daa21b6b4ee16b) Signed-off-by: Christoffer Dall --- arch/arm64/include/asm/kvm_host.h | 1 + arch/arm64/kvm/guest.c | 20 ++++++++++++++++++++ 2 files changed, 21 insertions(+) diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 0859a4ddd1e7..4cc8c7078f39 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -151,6 +151,7 @@ struct kvm_vcpu_stat { struct kvm_vcpu_init; int kvm_vcpu_set_target(struct kvm_vcpu *vcpu, const struct kvm_vcpu_init *init); +int kvm_vcpu_preferred_target(struct kvm_vcpu_init *init); unsigned long kvm_arm_num_regs(struct kvm_vcpu *vcpu); int kvm_arm_copy_reg_indices(struct kvm_vcpu *vcpu, u64 __user *indices); struct kvm_one_reg; diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c index 2c3ff67a8ecb..3f0731e53274 100644 --- a/arch/arm64/kvm/guest.c +++ b/arch/arm64/kvm/guest.c @@ -248,6 +248,26 @@ int kvm_vcpu_set_target(struct kvm_vcpu *vcpu, return kvm_reset_vcpu(vcpu); } +int kvm_vcpu_preferred_target(struct kvm_vcpu_init *init) +{ + int target = kvm_target_cpu(); + + if (target < 0) + return -ENODEV; + + memset(init, 0, sizeof(*init)); + + /* + * For now, we don't return any features. + * In future, we might use features to return target + * specific features available for the preferred + * target type. + */ + init->target = (__u32)target; + + return 0; +} + int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) { return -EINVAL; From df50e5da2c1c65f1db1e9186112b57a8dd0b41ae Mon Sep 17 00:00:00 2001 From: Anup Patel Date: Mon, 30 Sep 2013 14:20:07 +0530 Subject: [PATCH 1156/1368] ARM/ARM64: KVM: Implement KVM_ARM_PREFERRED_TARGET ioctl For implementing CPU=host, we need a mechanism for querying preferred VCPU target type on underlying Host. This patch implements KVM_ARM_PREFERRED_TARGET vm ioctl which returns struct kvm_vcpu_init instance containing information about preferred VCPU target type and target specific features available for it. Signed-off-by: Anup Patel Signed-off-by: Pranavkumar Sawargaonkar Signed-off-by: Christoffer Dall (cherry picked from commit 42c4e0c77ac91505ab94284b14025e3a0865c0a5) Signed-off-by: Christoffer Dall --- arch/arm/kvm/arm.c | 13 +++++++++++++ include/uapi/linux/kvm.h | 1 + 2 files changed, 14 insertions(+) diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c index 9c697db2787e..cc5adb9349ef 100644 --- a/arch/arm/kvm/arm.c +++ b/arch/arm/kvm/arm.c @@ -797,6 +797,19 @@ long kvm_arch_vm_ioctl(struct file *filp, return -EFAULT; return kvm_vm_ioctl_set_device_addr(kvm, &dev_addr); } + case KVM_ARM_PREFERRED_TARGET: { + int err; + struct kvm_vcpu_init init; + + err = kvm_vcpu_preferred_target(&init); + if (err) + return err; + + if (copy_to_user(argp, &init, sizeof(init))) + return -EFAULT; + + return 0; + } default: return -EINVAL; } diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index acccd08be6c7..cba62019348d 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -1011,6 +1011,7 @@ struct kvm_s390_ucas_mapping { /* VM is being stopped by host */ #define KVM_KVMCLOCK_CTRL _IO(KVMIO, 0xad) #define KVM_ARM_VCPU_INIT _IOW(KVMIO, 0xae, struct kvm_vcpu_init) +#define KVM_ARM_PREFERRED_TARGET _IOR(KVMIO, 0xaf, struct kvm_vcpu_init) #define KVM_GET_REG_LIST _IOWR(KVMIO, 0xb0, struct kvm_reg_list) #define KVM_DEV_ASSIGN_ENABLE_IOMMU (1 << 0) From 89c774beb24924963aa146784a149e47efb29d82 Mon Sep 17 00:00:00 2001 From: Gleb Natapov Date: Tue, 1 Oct 2013 19:58:36 +0300 Subject: [PATCH 1157/1368] Fix NULL dereference in gfn_to_hva_prot() gfn_to_memslot() can return NULL or invalid slot. We need to check slot validity before accessing it. Reviewed-by: Paolo Bonzini Signed-off-by: Gleb Natapov (cherry picked from commit a2ac07fe292ea41296049dfdbfeed203e2467ee7) Signed-off-by: Christoffer Dall --- virt/kvm/kvm_main.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 8b47fd241a61..c5bc5aef11f5 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -1065,10 +1065,12 @@ EXPORT_SYMBOL_GPL(gfn_to_hva); unsigned long gfn_to_hva_prot(struct kvm *kvm, gfn_t gfn, bool *writable) { struct kvm_memory_slot *slot = gfn_to_memslot(kvm, gfn); - if (writable) + unsigned long hva = __gfn_to_hva_many(slot, gfn, NULL, false); + + if (!kvm_is_error_hva(hva) && writable) *writable = !memslot_is_readonly(slot); - return __gfn_to_hva_many(gfn_to_memslot(kvm, gfn), gfn, NULL, false); + return hva; } static int kvm_read_hva(void *data, void __user *hva, int len) From 708854902d50f40783a81a629c4131383249236d Mon Sep 17 00:00:00 2001 From: Andre Richter Date: Wed, 2 Oct 2013 12:23:26 +0200 Subject: [PATCH 1158/1368] virt/kvm/iommu.c: Add leading zeros to device's BDF notation in debug messages When KVM (de)assigns PCI(e) devices to VMs, a debug message is printed including the BDF notation of the respective device. Currently, the BDF notation does not have the commonly used leading zeros. This produces messages like "assign device 0:1:8.0", which look strange at first sight. The patch fixes this by exchanging the printk(KERN_DEBUG ...) with dev_info() and also inserts "kvm" into the debug message, so that it is obvious where the message comes from. Also reduces LoC. Acked-by: Alex Williamson Signed-off-by: Andre Richter Signed-off-by: Gleb Natapov (cherry picked from commit 29242cb5c63b1f8e12e8055ba1a6c3e0004fa86d) Signed-off-by: Christoffer Dall --- virt/kvm/iommu.c | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) diff --git a/virt/kvm/iommu.c b/virt/kvm/iommu.c index 72a130bc448a..a3b14109049b 100644 --- a/virt/kvm/iommu.c +++ b/virt/kvm/iommu.c @@ -190,11 +190,7 @@ int kvm_assign_device(struct kvm *kvm, pdev->dev_flags |= PCI_DEV_FLAGS_ASSIGNED; - printk(KERN_DEBUG "assign device %x:%x:%x.%x\n", - assigned_dev->host_segnr, - assigned_dev->host_busnr, - PCI_SLOT(assigned_dev->host_devfn), - PCI_FUNC(assigned_dev->host_devfn)); + dev_info(&pdev->dev, "kvm assign device\n"); return 0; out_unmap: @@ -220,11 +216,7 @@ int kvm_deassign_device(struct kvm *kvm, pdev->dev_flags &= ~PCI_DEV_FLAGS_ASSIGNED; - printk(KERN_DEBUG "deassign device %x:%x:%x.%x\n", - assigned_dev->host_segnr, - assigned_dev->host_busnr, - PCI_SLOT(assigned_dev->host_devfn), - PCI_FUNC(assigned_dev->host_devfn)); + dev_info(&pdev->dev, "kvm deassign device\n"); return 0; } From c1b378c34e3f2856c0ce060a8dad0877c6d6bfb9 Mon Sep 17 00:00:00 2001 From: Jonathan Austin Date: Thu, 26 Sep 2013 16:49:27 +0100 Subject: [PATCH 1159/1368] KVM: ARM: Fix calculation of virtual CPU ID KVM does not have a notion of multiple clusters for CPUs, just a linear array of CPUs. When using a system with cores in more than one cluster, the current method for calculating the virtual MPIDR will leak the (physical) cluster information into the virtual MPIDR. One effect of this is that Linux under KVM fails to boot multiple CPUs that aren't in the 0th cluster. This patch does away with exposing the real MPIDR fields in favour of simply using the virtual CPU number (but preserving the U bit, as before). Signed-off-by: Jonathan Austin Acked-by: Marc Zyngier Signed-off-by: Christoffer Dall (cherry picked from commit 1158fca401e09665c440a9fe4fd4f131ee85c13b) Signed-off-by: Christoffer Dall --- arch/arm/kvm/coproc_a15.c | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/arch/arm/kvm/coproc_a15.c b/arch/arm/kvm/coproc_a15.c index cf93472b9dd6..bbd4b888dbf3 100644 --- a/arch/arm/kvm/coproc_a15.c +++ b/arch/arm/kvm/coproc_a15.c @@ -27,14 +27,11 @@ static void reset_mpidr(struct kvm_vcpu *vcpu, const struct coproc_reg *r) { /* - * Compute guest MPIDR: - * (Even if we present only one VCPU to the guest on an SMP - * host we don't set the U bit in the MPIDR, or vice versa, as - * revealing the underlying hardware properties is likely to - * be the best choice). + * Compute guest MPIDR. No need to mess around with different clusters + * but we read the 'U' bit from the underlying hardware directly. */ - vcpu->arch.cp15[c0_MPIDR] = (read_cpuid_mpidr() & ~MPIDR_LEVEL_MASK) - | (vcpu->vcpu_id & MPIDR_LEVEL_MASK); + vcpu->arch.cp15[c0_MPIDR] = (read_cpuid_mpidr() & MPIDR_SMP_BITMASK) + | vcpu->vcpu_id; } #include "coproc.h" From e82866a69bd6e3ed1a1405ba19f1d5e66e5129a2 Mon Sep 17 00:00:00 2001 From: Jonathan Austin Date: Thu, 26 Sep 2013 16:49:26 +0100 Subject: [PATCH 1160/1368] KVM: ARM: fix the size of TTBCR_{T0SZ,T1SZ} masks The T{0,1}SZ fields of TTBCR are 3 bits wide when using the long descriptor format. Likewise, the T0SZ field of the HTCR is 3-bits. KVM currently defines TTBCR_T{0,1}SZ as 3, not 7. The T0SZ mask is used to calculate the value for the HTCR, both to pick out TTBCR.T0SZ and mask off the equivalent field in the HTCR during read-modify-write. The incorrect mask size causes the (UNKNOWN) reset value of HTCR.T0SZ to leak in to the calculated HTCR value. Linux will hang when initializing KVM if HTCR's reset value has bit 2 set (sometimes the case on A7/TC2) Fixing T0SZ allows A7 cores to boot and T1SZ is also fixed for completeness. Signed-off-by: Jonathan Austin Acked-by: Marc Zyngier Signed-off-by: Christoffer Dall (cherry picked from commit 5e497046f005528464f9600a4ee04f49df713596) Signed-off-by: Christoffer Dall --- arch/arm/include/asm/kvm_arm.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm/include/asm/kvm_arm.h b/arch/arm/include/asm/kvm_arm.h index 64e96960de29..d556f03bca17 100644 --- a/arch/arm/include/asm/kvm_arm.h +++ b/arch/arm/include/asm/kvm_arm.h @@ -95,12 +95,12 @@ #define TTBCR_IRGN1 (3 << 24) #define TTBCR_EPD1 (1 << 23) #define TTBCR_A1 (1 << 22) -#define TTBCR_T1SZ (3 << 16) +#define TTBCR_T1SZ (7 << 16) #define TTBCR_SH0 (3 << 12) #define TTBCR_ORGN0 (3 << 10) #define TTBCR_IRGN0 (3 << 8) #define TTBCR_EPD0 (1 << 7) -#define TTBCR_T0SZ 3 +#define TTBCR_T0SZ (7 << 0) #define HTCR_MASK (TTBCR_T0SZ | TTBCR_IRGN0 | TTBCR_ORGN0 | TTBCR_SH0) /* Hyp System Trap Register */ From 5e1ddf60b6830d405ebb026e3d9570e9236a6600 Mon Sep 17 00:00:00 2001 From: Jonathan Austin Date: Thu, 26 Sep 2013 16:49:28 +0100 Subject: [PATCH 1161/1368] KVM: ARM: Add support for Cortex-A7 This patch adds support for running Cortex-A7 guests on Cortex-A7 hosts. As Cortex-A7 is architecturally compatible with A15, this patch is largely just generalising existing code. Areas where 'implementation defined' behaviour is identical for A7 and A15 is moved to allow it to be used by both cores. The check to ensure that coprocessor register tables are sorted correctly is also moved in to 'common' code to avoid each new cpu doing its own check (and possibly forgetting to do so!) Signed-off-by: Jonathan Austin Acked-by: Marc Zyngier Signed-off-by: Christoffer Dall (cherry picked from commit e8c2d99f8277d68d28a9f99d16289712bc2aee7f) Signed-off-by: Christoffer Dall --- arch/arm/include/asm/kvm_asm.h | 2 +- arch/arm/include/uapi/asm/kvm.h | 3 +- arch/arm/kvm/Makefile | 2 +- arch/arm/kvm/coproc.c | 114 ++++++++++++++++++++++++++++++++ arch/arm/kvm/coproc_a15.c | 114 +------------------------------- arch/arm/kvm/coproc_a7.c | 54 +++++++++++++++ arch/arm/kvm/guest.c | 4 +- arch/arm/kvm/reset.c | 15 +++-- 8 files changed, 184 insertions(+), 124 deletions(-) create mode 100644 arch/arm/kvm/coproc_a7.c diff --git a/arch/arm/include/asm/kvm_asm.h b/arch/arm/include/asm/kvm_asm.h index a2f43ddcc300..661da11f76f4 100644 --- a/arch/arm/include/asm/kvm_asm.h +++ b/arch/arm/include/asm/kvm_asm.h @@ -39,7 +39,7 @@ #define c6_IFAR 17 /* Instruction Fault Address Register */ #define c7_PAR 18 /* Physical Address Register */ #define c7_PAR_high 19 /* PAR top 32 bits */ -#define c9_L2CTLR 20 /* Cortex A15 L2 Control Register */ +#define c9_L2CTLR 20 /* Cortex A15/A7 L2 Control Register */ #define c10_PRRR 21 /* Primary Region Remap Register */ #define c10_NMRR 22 /* Normal Memory Remap Register */ #define c12_VBAR 23 /* Vector Base Address Register */ diff --git a/arch/arm/include/uapi/asm/kvm.h b/arch/arm/include/uapi/asm/kvm.h index c1ee007523d7..c498b60c0505 100644 --- a/arch/arm/include/uapi/asm/kvm.h +++ b/arch/arm/include/uapi/asm/kvm.h @@ -63,7 +63,8 @@ struct kvm_regs { /* Supported Processor Types */ #define KVM_ARM_TARGET_CORTEX_A15 0 -#define KVM_ARM_NUM_TARGETS 1 +#define KVM_ARM_TARGET_CORTEX_A7 1 +#define KVM_ARM_NUM_TARGETS 2 /* KVM_ARM_SET_DEVICE_ADDR ioctl id encoding */ #define KVM_ARM_DEVICE_TYPE_SHIFT 0 diff --git a/arch/arm/kvm/Makefile b/arch/arm/kvm/Makefile index d99bee4950e5..789bca9e64a7 100644 --- a/arch/arm/kvm/Makefile +++ b/arch/arm/kvm/Makefile @@ -19,6 +19,6 @@ kvm-arm-y = $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o obj-y += kvm-arm.o init.o interrupts.o obj-y += arm.o handle_exit.o guest.o mmu.o emulate.o reset.o -obj-y += coproc.o coproc_a15.o mmio.o psci.o perf.o +obj-y += coproc.o coproc_a15.o coproc_a7.o mmio.o psci.o perf.o obj-$(CONFIG_KVM_ARM_VGIC) += $(KVM)/arm/vgic.o obj-$(CONFIG_KVM_ARM_TIMER) += $(KVM)/arm/arch_timer.o diff --git a/arch/arm/kvm/coproc.c b/arch/arm/kvm/coproc.c index db9cf692d4dd..a629f2c1d0f9 100644 --- a/arch/arm/kvm/coproc.c +++ b/arch/arm/kvm/coproc.c @@ -71,6 +71,92 @@ int kvm_handle_cp14_access(struct kvm_vcpu *vcpu, struct kvm_run *run) return 1; } +static void reset_mpidr(struct kvm_vcpu *vcpu, const struct coproc_reg *r) +{ + /* + * Compute guest MPIDR. No need to mess around with different clusters + * but we read the 'U' bit from the underlying hardware directly. + */ + vcpu->arch.cp15[c0_MPIDR] = (read_cpuid_mpidr() & MPIDR_SMP_BITMASK) + | vcpu->vcpu_id; +} + +/* TRM entries A7:4.3.31 A15:4.3.28 - RO WI */ +static bool access_actlr(struct kvm_vcpu *vcpu, + const struct coproc_params *p, + const struct coproc_reg *r) +{ + if (p->is_write) + return ignore_write(vcpu, p); + + *vcpu_reg(vcpu, p->Rt1) = vcpu->arch.cp15[c1_ACTLR]; + return true; +} + +/* TRM entries A7:4.3.56, A15:4.3.60 - R/O. */ +static bool access_cbar(struct kvm_vcpu *vcpu, + const struct coproc_params *p, + const struct coproc_reg *r) +{ + if (p->is_write) + return write_to_read_only(vcpu, p); + return read_zero(vcpu, p); +} + +/* TRM entries A7:4.3.49, A15:4.3.48 - R/O WI */ +static bool access_l2ctlr(struct kvm_vcpu *vcpu, + const struct coproc_params *p, + const struct coproc_reg *r) +{ + if (p->is_write) + return ignore_write(vcpu, p); + + *vcpu_reg(vcpu, p->Rt1) = vcpu->arch.cp15[c9_L2CTLR]; + return true; +} + +static void reset_l2ctlr(struct kvm_vcpu *vcpu, const struct coproc_reg *r) +{ + u32 l2ctlr, ncores; + + asm volatile("mrc p15, 1, %0, c9, c0, 2\n" : "=r" (l2ctlr)); + l2ctlr &= ~(3 << 24); + ncores = atomic_read(&vcpu->kvm->online_vcpus) - 1; + l2ctlr |= (ncores & 3) << 24; + + vcpu->arch.cp15[c9_L2CTLR] = l2ctlr; +} + +static void reset_actlr(struct kvm_vcpu *vcpu, const struct coproc_reg *r) +{ + u32 actlr; + + /* ACTLR contains SMP bit: make sure you create all cpus first! */ + asm volatile("mrc p15, 0, %0, c1, c0, 1\n" : "=r" (actlr)); + /* Make the SMP bit consistent with the guest configuration */ + if (atomic_read(&vcpu->kvm->online_vcpus) > 1) + actlr |= 1U << 6; + else + actlr &= ~(1U << 6); + + vcpu->arch.cp15[c1_ACTLR] = actlr; +} + +/* + * TRM entries: A7:4.3.50, A15:4.3.49 + * R/O WI (even if NSACR.NS_L2ERR, a write of 1 is ignored). + */ +static bool access_l2ectlr(struct kvm_vcpu *vcpu, + const struct coproc_params *p, + const struct coproc_reg *r) +{ + if (p->is_write) + return ignore_write(vcpu, p); + + *vcpu_reg(vcpu, p->Rt1) = 0; + return true; +} + /* See note at ARM ARM B1.14.4 */ static bool access_dcsw(struct kvm_vcpu *vcpu, const struct coproc_params *p, @@ -153,10 +239,22 @@ static bool pm_fake(struct kvm_vcpu *vcpu, * registers preceding 32-bit ones. */ static const struct coproc_reg cp15_regs[] = { + /* MPIDR: we use VMPIDR for guest access. */ + { CRn( 0), CRm( 0), Op1( 0), Op2( 5), is32, + NULL, reset_mpidr, c0_MPIDR }, + /* CSSELR: swapped by interrupt.S. */ { CRn( 0), CRm( 0), Op1( 2), Op2( 0), is32, NULL, reset_unknown, c0_CSSELR }, + /* ACTLR: trapped by HCR.TAC bit. */ + { CRn( 1), CRm( 0), Op1( 0), Op2( 1), is32, + access_actlr, reset_actlr, c1_ACTLR }, + + /* CPACR: swapped by interrupt.S. */ + { CRn( 1), CRm( 0), Op1( 0), Op2( 2), is32, + NULL, reset_val, c1_CPACR, 0x00000000 }, + /* TTBR0/TTBR1: swapped by interrupt.S. */ { CRm64( 2), Op1( 0), is64, NULL, reset_unknown64, c2_TTBR0 }, { CRm64( 2), Op1( 1), is64, NULL, reset_unknown64, c2_TTBR1 }, @@ -194,6 +292,13 @@ static const struct coproc_reg cp15_regs[] = { { CRn( 7), CRm( 6), Op1( 0), Op2( 2), is32, access_dcsw}, { CRn( 7), CRm(10), Op1( 0), Op2( 2), is32, access_dcsw}, { CRn( 7), CRm(14), Op1( 0), Op2( 2), is32, access_dcsw}, + /* + * L2CTLR access (guest wants to know #CPUs). + */ + { CRn( 9), CRm( 0), Op1( 1), Op2( 2), is32, + access_l2ctlr, reset_l2ctlr, c9_L2CTLR }, + { CRn( 9), CRm( 0), Op1( 1), Op2( 3), is32, access_l2ectlr}, + /* * Dummy performance monitor implementation. */ @@ -234,6 +339,9 @@ static const struct coproc_reg cp15_regs[] = { /* CNTKCTL: swapped by interrupt.S. */ { CRn(14), CRm( 1), Op1( 0), Op2( 0), is32, NULL, reset_val, c14_CNTKCTL, 0x00000000 }, + + /* The Configuration Base Address Register. */ + { CRn(15), CRm( 0), Op1( 4), Op2( 0), is32, access_cbar}, }; /* Target specific emulation tables */ @@ -241,6 +349,12 @@ static struct kvm_coproc_target_table *target_tables[KVM_ARM_NUM_TARGETS]; void kvm_register_target_coproc_table(struct kvm_coproc_target_table *table) { + unsigned int i; + + for (i = 1; i < table->num; i++) + BUG_ON(cmp_reg(&table->table[i-1], + &table->table[i]) >= 0); + target_tables[table->target] = table; } diff --git a/arch/arm/kvm/coproc_a15.c b/arch/arm/kvm/coproc_a15.c index bbd4b888dbf3..bb0cac1410cc 100644 --- a/arch/arm/kvm/coproc_a15.c +++ b/arch/arm/kvm/coproc_a15.c @@ -17,98 +17,12 @@ * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ #include -#include -#include -#include -#include #include +#include #include -static void reset_mpidr(struct kvm_vcpu *vcpu, const struct coproc_reg *r) -{ - /* - * Compute guest MPIDR. No need to mess around with different clusters - * but we read the 'U' bit from the underlying hardware directly. - */ - vcpu->arch.cp15[c0_MPIDR] = (read_cpuid_mpidr() & MPIDR_SMP_BITMASK) - | vcpu->vcpu_id; -} - #include "coproc.h" -/* A15 TRM 4.3.28: RO WI */ -static bool access_actlr(struct kvm_vcpu *vcpu, - const struct coproc_params *p, - const struct coproc_reg *r) -{ - if (p->is_write) - return ignore_write(vcpu, p); - - *vcpu_reg(vcpu, p->Rt1) = vcpu->arch.cp15[c1_ACTLR]; - return true; -} - -/* A15 TRM 4.3.60: R/O. */ -static bool access_cbar(struct kvm_vcpu *vcpu, - const struct coproc_params *p, - const struct coproc_reg *r) -{ - if (p->is_write) - return write_to_read_only(vcpu, p); - return read_zero(vcpu, p); -} - -/* A15 TRM 4.3.48: R/O WI. */ -static bool access_l2ctlr(struct kvm_vcpu *vcpu, - const struct coproc_params *p, - const struct coproc_reg *r) -{ - if (p->is_write) - return ignore_write(vcpu, p); - - *vcpu_reg(vcpu, p->Rt1) = vcpu->arch.cp15[c9_L2CTLR]; - return true; -} - -static void reset_l2ctlr(struct kvm_vcpu *vcpu, const struct coproc_reg *r) -{ - u32 l2ctlr, ncores; - - asm volatile("mrc p15, 1, %0, c9, c0, 2\n" : "=r" (l2ctlr)); - l2ctlr &= ~(3 << 24); - ncores = atomic_read(&vcpu->kvm->online_vcpus) - 1; - l2ctlr |= (ncores & 3) << 24; - - vcpu->arch.cp15[c9_L2CTLR] = l2ctlr; -} - -static void reset_actlr(struct kvm_vcpu *vcpu, const struct coproc_reg *r) -{ - u32 actlr; - - /* ACTLR contains SMP bit: make sure you create all cpus first! */ - asm volatile("mrc p15, 0, %0, c1, c0, 1\n" : "=r" (actlr)); - /* Make the SMP bit consistent with the guest configuration */ - if (atomic_read(&vcpu->kvm->online_vcpus) > 1) - actlr |= 1U << 6; - else - actlr &= ~(1U << 6); - - vcpu->arch.cp15[c1_ACTLR] = actlr; -} - -/* A15 TRM 4.3.49: R/O WI (even if NSACR.NS_L2ERR, a write of 1 is ignored). */ -static bool access_l2ectlr(struct kvm_vcpu *vcpu, - const struct coproc_params *p, - const struct coproc_reg *r) -{ - if (p->is_write) - return ignore_write(vcpu, p); - - *vcpu_reg(vcpu, p->Rt1) = 0; - return true; -} - /* * A15-specific CP15 registers. * CRn denotes the primary register number, but is copied to the CRm in the @@ -118,29 +32,9 @@ static bool access_l2ectlr(struct kvm_vcpu *vcpu, * registers preceding 32-bit ones. */ static const struct coproc_reg a15_regs[] = { - /* MPIDR: we use VMPIDR for guest access. */ - { CRn( 0), CRm( 0), Op1( 0), Op2( 5), is32, - NULL, reset_mpidr, c0_MPIDR }, - /* SCTLR: swapped by interrupt.S. */ { CRn( 1), CRm( 0), Op1( 0), Op2( 0), is32, NULL, reset_val, c1_SCTLR, 0x00C50078 }, - /* ACTLR: trapped by HCR.TAC bit. */ - { CRn( 1), CRm( 0), Op1( 0), Op2( 1), is32, - access_actlr, reset_actlr, c1_ACTLR }, - /* CPACR: swapped by interrupt.S. */ - { CRn( 1), CRm( 0), Op1( 0), Op2( 2), is32, - NULL, reset_val, c1_CPACR, 0x00000000 }, - - /* - * L2CTLR access (guest wants to know #CPUs). - */ - { CRn( 9), CRm( 0), Op1( 1), Op2( 2), is32, - access_l2ctlr, reset_l2ctlr, c9_L2CTLR }, - { CRn( 9), CRm( 0), Op1( 1), Op2( 3), is32, access_l2ectlr}, - - /* The Configuration Base Address Register. */ - { CRn(15), CRm( 0), Op1( 4), Op2( 0), is32, access_cbar}, }; static struct kvm_coproc_target_table a15_target_table = { @@ -151,12 +45,6 @@ static struct kvm_coproc_target_table a15_target_table = { static int __init coproc_a15_init(void) { - unsigned int i; - - for (i = 1; i < ARRAY_SIZE(a15_regs); i++) - BUG_ON(cmp_reg(&a15_regs[i-1], - &a15_regs[i]) >= 0); - kvm_register_target_coproc_table(&a15_target_table); return 0; } diff --git a/arch/arm/kvm/coproc_a7.c b/arch/arm/kvm/coproc_a7.c new file mode 100644 index 000000000000..1df767331588 --- /dev/null +++ b/arch/arm/kvm/coproc_a7.c @@ -0,0 +1,54 @@ +/* + * Copyright (C) 2012 - Virtual Open Systems and Columbia University + * Copyright (C) 2013 - ARM Ltd + * + * Authors: Rusty Russell + * Christoffer Dall + * Jonathan Austin + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ +#include +#include +#include +#include + +#include "coproc.h" + +/* + * Cortex-A7 specific CP15 registers. + * CRn denotes the primary register number, but is copied to the CRm in the + * user space API for 64-bit register access in line with the terminology used + * in the ARM ARM. + * Important: Must be sorted ascending by CRn, CRM, Op1, Op2 and with 64-bit + * registers preceding 32-bit ones. + */ +static const struct coproc_reg a7_regs[] = { + /* SCTLR: swapped by interrupt.S. */ + { CRn( 1), CRm( 0), Op1( 0), Op2( 0), is32, + NULL, reset_val, c1_SCTLR, 0x00C50878 }, +}; + +static struct kvm_coproc_target_table a7_target_table = { + .target = KVM_ARM_TARGET_CORTEX_A7, + .table = a7_regs, + .num = ARRAY_SIZE(a7_regs), +}; + +static int __init coproc_a7_init(void) +{ + kvm_register_target_coproc_table(&a7_target_table); + return 0; +} +late_initcall(coproc_a7_init); diff --git a/arch/arm/kvm/guest.c b/arch/arm/kvm/guest.c index ec98209fda71..20f8d97904af 100644 --- a/arch/arm/kvm/guest.c +++ b/arch/arm/kvm/guest.c @@ -190,6 +190,8 @@ int __attribute_const__ kvm_target_cpu(void) return -EINVAL; switch (part_number) { + case ARM_CPU_PART_CORTEX_A7: + return KVM_ARM_TARGET_CORTEX_A7; case ARM_CPU_PART_CORTEX_A15: return KVM_ARM_TARGET_CORTEX_A15; default: @@ -202,7 +204,7 @@ int kvm_vcpu_set_target(struct kvm_vcpu *vcpu, { unsigned int i; - /* We can only do a cortex A15 for now. */ + /* We can only cope with guest==host and only on A15/A7 (for now). */ if (init->target != kvm_target_cpu()) return -EINVAL; diff --git a/arch/arm/kvm/reset.c b/arch/arm/kvm/reset.c index c02ba4af599f..d153e64d1255 100644 --- a/arch/arm/kvm/reset.c +++ b/arch/arm/kvm/reset.c @@ -30,16 +30,16 @@ #include /****************************************************************************** - * Cortex-A15 Reset Values + * Cortex-A15 and Cortex-A7 Reset Values */ -static const int a15_max_cpu_idx = 3; +static const int cortexa_max_cpu_idx = 3; -static struct kvm_regs a15_regs_reset = { +static struct kvm_regs cortexa_regs_reset = { .usr_regs.ARM_cpsr = SVC_MODE | PSR_A_BIT | PSR_I_BIT | PSR_F_BIT, }; -static const struct kvm_irq_level a15_vtimer_irq = { +static const struct kvm_irq_level cortexa_vtimer_irq = { { .irq = 27 }, .level = 1, }; @@ -62,12 +62,13 @@ int kvm_reset_vcpu(struct kvm_vcpu *vcpu) const struct kvm_irq_level *cpu_vtimer_irq; switch (vcpu->arch.target) { + case KVM_ARM_TARGET_CORTEX_A7: case KVM_ARM_TARGET_CORTEX_A15: - if (vcpu->vcpu_id > a15_max_cpu_idx) + if (vcpu->vcpu_id > cortexa_max_cpu_idx) return -EINVAL; - reset_regs = &a15_regs_reset; + reset_regs = &cortexa_regs_reset; vcpu->arch.midr = read_cpuid_id(); - cpu_vtimer_irq = &a15_vtimer_irq; + cpu_vtimer_irq = &cortexa_vtimer_irq; break; default: return -ENODEV; From c7a9a5f3f02ff35d7ee820e98c1b9d46ab425808 Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Wed, 2 Oct 2013 14:22:28 -0700 Subject: [PATCH 1162/1368] KVM: Move gfn_to_index to x86 specific code The gfn_to_index function relies on huge page defines which either may not make sense on systems that don't support huge pages or are defined in an unconvenient way for other architectures. Since this is x86-specific, move the function to arch/x86/include/asm/kvm_host.h. Signed-off-by: Christoffer Dall Signed-off-by: Gleb Natapov (cherry picked from commit 6d9d41e57440e32a3400f37aa05ef7a1a09ced64) Signed-off-by: Christoffer Dall --- arch/x86/include/asm/kvm_host.h | 7 +++++++ include/linux/kvm_host.h | 7 ------- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 3741c653767c..53db582487c9 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -79,6 +79,13 @@ #define KVM_HPAGE_MASK(x) (~(KVM_HPAGE_SIZE(x) - 1)) #define KVM_PAGES_PER_HPAGE(x) (KVM_HPAGE_SIZE(x) / PAGE_SIZE) +static inline gfn_t gfn_to_index(gfn_t gfn, gfn_t base_gfn, int level) +{ + /* KVM_HPAGE_GFN_SHIFT(PT_PAGE_TABLE_LEVEL) must be 0. */ + return (gfn >> KVM_HPAGE_GFN_SHIFT(level)) - + (base_gfn >> KVM_HPAGE_GFN_SHIFT(level)); +} + #define SELECTOR_TI_MASK (1 << 2) #define SELECTOR_RPL_MASK 0x03 diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 97e39fc02020..b5267c4f5392 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -827,13 +827,6 @@ static inline int memslot_id(struct kvm *kvm, gfn_t gfn) return gfn_to_memslot(kvm, gfn)->id; } -static inline gfn_t gfn_to_index(gfn_t gfn, gfn_t base_gfn, int level) -{ - /* KVM_HPAGE_GFN_SHIFT(PT_PAGE_TABLE_LEVEL) must be 0. */ - return (gfn >> KVM_HPAGE_GFN_SHIFT(level)) - - (base_gfn >> KVM_HPAGE_GFN_SHIFT(level)); -} - static inline gfn_t hva_to_gfn_memslot(unsigned long hva, struct kvm_memory_slot *slot) { From 4b0745341e63f3878fcc1240d44d1ddcab068ce1 Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Wed, 2 Oct 2013 14:22:29 -0700 Subject: [PATCH 1163/1368] KVM: ARM: Get rid of KVM_HPAGE defines The KVM_HPAGE_DEFINES are a little artificial on ARM, since the huge page size is statically defined at compile time and there is only a single huge page size. Now when the main kvm code relying on these defines has been moved to the x86 specific part of the world, we can get rid of these. Signed-off-by: Christoffer Dall Signed-off-by: Gleb Natapov (cherry picked from commit dc6f6763dfeaf2dfec906bb78875dcea162accd9) Signed-off-by: Christoffer Dall --- arch/arm/include/asm/kvm_host.h | 5 ----- 1 file changed, 5 deletions(-) diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h index 76f3c1978442..8a6f6db14ee4 100644 --- a/arch/arm/include/asm/kvm_host.h +++ b/arch/arm/include/asm/kvm_host.h @@ -38,11 +38,6 @@ #define KVM_VCPU_MAX_FEATURES 1 -/* We don't currently support large pages. */ -#define KVM_HPAGE_GFN_SHIFT(x) 0 -#define KVM_NR_PAGE_SIZES 1 -#define KVM_PAGES_PER_HPAGE(x) (1UL<<31) - #include struct kvm_vcpu; From 1feff9299a9b7b1f137166f062772cd2044c7cbb Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Wed, 2 Oct 2013 14:22:30 -0700 Subject: [PATCH 1164/1368] KVM: arm64: Get rid of KVM_HPAGE defines Now when the main kvm code relying on these defines has been moved to the x86 specific part of the world, we can get rid of these. Signed-off-by: Christoffer Dall Signed-off-by: Gleb Natapov (cherry picked from commit ef0cfe71c2b1710cd4ae747537e36c56f9a26ccf) Signed-off-by: Christoffer Dall --- arch/arm64/include/asm/kvm_host.h | 5 ----- 1 file changed, 5 deletions(-) diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 4cc8c7078f39..5d85a02d1231 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -36,11 +36,6 @@ #define KVM_VCPU_MAX_FEATURES 2 -/* We don't currently support large pages. */ -#define KVM_HPAGE_GFN_SHIFT(x) 0 -#define KVM_NR_PAGE_SIZES 1 -#define KVM_PAGES_PER_HPAGE(x) (1UL<<31) - struct kvm_vcpu; int kvm_target_cpu(void); int kvm_reset_vcpu(struct kvm_vcpu *vcpu); From 0a2e6af7befb4c0336f2c789d172bbb5438aec72 Mon Sep 17 00:00:00 2001 From: chai wen Date: Mon, 14 Oct 2013 22:22:33 +0800 Subject: [PATCH 1165/1368] KVM: Drop FOLL_GET in GUP when doing async page fault Page pinning is not mandatory in kvm async page fault processing since after async page fault event is delivered to a guest it accesses page once again and does its own GUP. Drop the FOLL_GET flag in GUP in async_pf code, and do some simplifying in check/clear processing. Suggested-by: Gleb Natapov Signed-off-by: Gu zheng Signed-off-by: chai wen Signed-off-by: Gleb Natapov (cherry picked from commit f2e106692d5189303997ad7b96de8d8123aa5613) Signed-off-by: Christoffer Dall --- arch/x86/kvm/x86.c | 4 ++-- include/linux/kvm_host.h | 2 +- include/trace/events/kvm.h | 10 ++++------ virt/kvm/async_pf.c | 17 +++++------------ 4 files changed, 12 insertions(+), 21 deletions(-) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 96765c116c26..e4985fc604fe 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -7118,7 +7118,7 @@ void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu, struct kvm_async_pf *work) int r; if ((vcpu->arch.mmu.direct_map != work->arch.direct_map) || - is_error_page(work->page)) + work->wakeup_all) return; r = kvm_mmu_reload(vcpu); @@ -7228,7 +7228,7 @@ void kvm_arch_async_page_present(struct kvm_vcpu *vcpu, struct x86_exception fault; trace_kvm_async_pf_ready(work->arch.token, work->gva); - if (is_error_page(work->page)) + if (work->wakeup_all) work->arch.token = ~0; /* broadcast wakeup */ else kvm_del_async_pf_gfn(vcpu, work->arch.gfn); diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index b5267c4f5392..384fb0a74924 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -175,7 +175,7 @@ struct kvm_async_pf { gva_t gva; unsigned long addr; struct kvm_arch_async_pf arch; - struct page *page; + bool wakeup_all; }; void kvm_clear_async_pf_completion_queue(struct kvm_vcpu *vcpu); diff --git a/include/trace/events/kvm.h b/include/trace/events/kvm.h index 7005d1109ec9..131a0bda7aec 100644 --- a/include/trace/events/kvm.h +++ b/include/trace/events/kvm.h @@ -296,23 +296,21 @@ DEFINE_EVENT(kvm_async_pf_nopresent_ready, kvm_async_pf_ready, TRACE_EVENT( kvm_async_pf_completed, - TP_PROTO(unsigned long address, struct page *page, u64 gva), - TP_ARGS(address, page, gva), + TP_PROTO(unsigned long address, u64 gva), + TP_ARGS(address, gva), TP_STRUCT__entry( __field(unsigned long, address) - __field(pfn_t, pfn) __field(u64, gva) ), TP_fast_assign( __entry->address = address; - __entry->pfn = page ? page_to_pfn(page) : 0; __entry->gva = gva; ), - TP_printk("gva %#llx address %#lx pfn %#llx", __entry->gva, - __entry->address, __entry->pfn) + TP_printk("gva %#llx address %#lx", __entry->gva, + __entry->address) ); #endif diff --git a/virt/kvm/async_pf.c b/virt/kvm/async_pf.c index b197950ac4d5..8631d9c14320 100644 --- a/virt/kvm/async_pf.c +++ b/virt/kvm/async_pf.c @@ -56,7 +56,6 @@ void kvm_async_pf_vcpu_init(struct kvm_vcpu *vcpu) static void async_pf_execute(struct work_struct *work) { - struct page *page = NULL; struct kvm_async_pf *apf = container_of(work, struct kvm_async_pf, work); struct mm_struct *mm = apf->mm; @@ -68,13 +67,12 @@ static void async_pf_execute(struct work_struct *work) use_mm(mm); down_read(&mm->mmap_sem); - get_user_pages(current, mm, addr, 1, 1, 0, &page, NULL); + get_user_pages(current, mm, addr, 1, 1, 0, NULL, NULL); up_read(&mm->mmap_sem); unuse_mm(mm); spin_lock(&vcpu->async_pf.lock); list_add_tail(&apf->link, &vcpu->async_pf.done); - apf->page = page; spin_unlock(&vcpu->async_pf.lock); /* @@ -82,7 +80,7 @@ static void async_pf_execute(struct work_struct *work) * this point */ - trace_kvm_async_pf_completed(addr, page, gva); + trace_kvm_async_pf_completed(addr, gva); if (waitqueue_active(&vcpu->wq)) wake_up_interruptible(&vcpu->wq); @@ -112,8 +110,6 @@ void kvm_clear_async_pf_completion_queue(struct kvm_vcpu *vcpu) list_entry(vcpu->async_pf.done.next, typeof(*work), link); list_del(&work->link); - if (!is_error_page(work->page)) - kvm_release_page_clean(work->page); kmem_cache_free(async_pf_cache, work); } spin_unlock(&vcpu->async_pf.lock); @@ -133,14 +129,11 @@ void kvm_check_async_pf_completion(struct kvm_vcpu *vcpu) list_del(&work->link); spin_unlock(&vcpu->async_pf.lock); - if (work->page) - kvm_arch_async_page_ready(vcpu, work); + kvm_arch_async_page_ready(vcpu, work); kvm_arch_async_page_present(vcpu, work); list_del(&work->queue); vcpu->async_pf.queued--; - if (!is_error_page(work->page)) - kvm_release_page_clean(work->page); kmem_cache_free(async_pf_cache, work); } } @@ -163,7 +156,7 @@ int kvm_setup_async_pf(struct kvm_vcpu *vcpu, gva_t gva, gfn_t gfn, if (!work) return 0; - work->page = NULL; + work->wakeup_all = false; work->vcpu = vcpu; work->gva = gva; work->addr = gfn_to_hva(vcpu->kvm, gfn); @@ -203,7 +196,7 @@ int kvm_async_pf_wakeup_all(struct kvm_vcpu *vcpu) if (!work) return -ENOMEM; - work->page = KVM_ERR_PTR_BAD_PAGE; + work->wakeup_all = true; INIT_LIST_HEAD(&work->queue); /* for list_del to work */ spin_lock(&vcpu->async_pf.lock); From 841372680792857d33c4c49d21be8a3cbde0490b Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Mon, 7 Oct 2013 22:18:00 +0530 Subject: [PATCH 1166/1368] kvm: Add struct kvm arg to memslot APIs We will use that in the later patch to find the kvm ops handler Signed-off-by: Aneesh Kumar K.V Signed-off-by: Alexander Graf (cherry picked from commit 5587027ce9d59a57aecaa190be1c8e560aaff45d) Signed-off-by: Christoffer Dall --- arch/arm/kvm/arm.c | 5 +++-- arch/ia64/kvm/kvm-ia64.c | 5 +++-- arch/mips/kvm/kvm_mips.c | 5 +++-- arch/powerpc/include/asm/kvm_ppc.h | 6 ++++-- arch/powerpc/kvm/booke.c | 4 ++-- arch/powerpc/kvm/powerpc.c | 9 +++++---- arch/s390/kvm/kvm-s390.c | 5 +++-- arch/x86/kvm/x86.c | 5 +++-- include/linux/kvm_host.h | 5 +++-- virt/kvm/kvm_main.c | 12 ++++++------ 10 files changed, 35 insertions(+), 26 deletions(-) diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c index cc5adb9349ef..e312e4a53f8d 100644 --- a/arch/arm/kvm/arm.c +++ b/arch/arm/kvm/arm.c @@ -152,12 +152,13 @@ int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf) return VM_FAULT_SIGBUS; } -void kvm_arch_free_memslot(struct kvm_memory_slot *free, +void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free, struct kvm_memory_slot *dont) { } -int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages) +int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot, + unsigned long npages) { return 0; } diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c index bdfd8789b376..985bf80c622e 100644 --- a/arch/ia64/kvm/kvm-ia64.c +++ b/arch/ia64/kvm/kvm-ia64.c @@ -1550,12 +1550,13 @@ int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf) return VM_FAULT_SIGBUS; } -void kvm_arch_free_memslot(struct kvm_memory_slot *free, +void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free, struct kvm_memory_slot *dont) { } -int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages) +int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot, + unsigned long npages) { return 0; } diff --git a/arch/mips/kvm/kvm_mips.c b/arch/mips/kvm/kvm_mips.c index a7b044536de4..73b34827826c 100644 --- a/arch/mips/kvm/kvm_mips.c +++ b/arch/mips/kvm/kvm_mips.c @@ -198,12 +198,13 @@ kvm_arch_dev_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg) return -ENOIOCTLCMD; } -void kvm_arch_free_memslot(struct kvm_memory_slot *free, +void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free, struct kvm_memory_slot *dont) { } -int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages) +int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot, + unsigned long npages) { return 0; } diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h index a5287fe03d77..e2dd05c81bc6 100644 --- a/arch/powerpc/include/asm/kvm_ppc.h +++ b/arch/powerpc/include/asm/kvm_ppc.h @@ -143,9 +143,11 @@ extern struct kvmppc_linear_info *kvm_alloc_hpt(void); extern void kvm_release_hpt(struct kvmppc_linear_info *li); extern int kvmppc_core_init_vm(struct kvm *kvm); extern void kvmppc_core_destroy_vm(struct kvm *kvm); -extern void kvmppc_core_free_memslot(struct kvm_memory_slot *free, +extern void kvmppc_core_free_memslot(struct kvm *kvm, + struct kvm_memory_slot *free, struct kvm_memory_slot *dont); -extern int kvmppc_core_create_memslot(struct kvm_memory_slot *slot, +extern int kvmppc_core_create_memslot(struct kvm *kvm, + struct kvm_memory_slot *slot, unsigned long npages); extern int kvmppc_core_prepare_memory_region(struct kvm *kvm, struct kvm_memory_slot *memslot, diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c index 1a1b51189773..0a91f47e264b 100644 --- a/arch/powerpc/kvm/booke.c +++ b/arch/powerpc/kvm/booke.c @@ -1592,12 +1592,12 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log) return -ENOTSUPP; } -void kvmppc_core_free_memslot(struct kvm_memory_slot *free, +void kvmppc_core_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free, struct kvm_memory_slot *dont) { } -int kvmppc_core_create_memslot(struct kvm_memory_slot *slot, +int kvmppc_core_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot, unsigned long npages) { return 0; diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index ae63ae4a1a5f..750835a4ef70 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -409,15 +409,16 @@ long kvm_arch_dev_ioctl(struct file *filp, return -EINVAL; } -void kvm_arch_free_memslot(struct kvm_memory_slot *free, +void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free, struct kvm_memory_slot *dont) { - kvmppc_core_free_memslot(free, dont); + kvmppc_core_free_memslot(kvm, free, dont); } -int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages) +int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot, + unsigned long npages) { - return kvmppc_core_create_memslot(slot, npages); + return kvmppc_core_create_memslot(kvm, slot, npages); } void kvm_arch_memslots_updated(struct kvm *kvm) diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index e515b2d4a947..54612d0e79dd 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -971,12 +971,13 @@ int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf) return VM_FAULT_SIGBUS; } -void kvm_arch_free_memslot(struct kvm_memory_slot *free, +void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free, struct kvm_memory_slot *dont) { } -int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages) +int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot, + unsigned long npages) { return 0; } diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index e4985fc604fe..68b139fe0dbd 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -6897,7 +6897,7 @@ void kvm_arch_destroy_vm(struct kvm *kvm) kfree(rcu_dereference_check(kvm->arch.apic_map, 1)); } -void kvm_arch_free_memslot(struct kvm_memory_slot *free, +void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free, struct kvm_memory_slot *dont) { int i; @@ -6918,7 +6918,8 @@ void kvm_arch_free_memslot(struct kvm_memory_slot *free, } } -int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages) +int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot, + unsigned long npages) { int i; diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 384fb0a74924..4de0a8fedf3f 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -493,9 +493,10 @@ int kvm_set_memory_region(struct kvm *kvm, struct kvm_userspace_memory_region *mem); int __kvm_set_memory_region(struct kvm *kvm, struct kvm_userspace_memory_region *mem); -void kvm_arch_free_memslot(struct kvm_memory_slot *free, +void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free, struct kvm_memory_slot *dont); -int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages); +int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot, + unsigned long npages); void kvm_arch_memslots_updated(struct kvm *kvm); int kvm_arch_prepare_memory_region(struct kvm *kvm, struct kvm_memory_slot *memslot, diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index c5bc5aef11f5..c777a6e582f0 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -541,13 +541,13 @@ static void kvm_destroy_dirty_bitmap(struct kvm_memory_slot *memslot) /* * Free any memory in @free but not in @dont. */ -static void kvm_free_physmem_slot(struct kvm_memory_slot *free, +static void kvm_free_physmem_slot(struct kvm *kvm, struct kvm_memory_slot *free, struct kvm_memory_slot *dont) { if (!dont || free->dirty_bitmap != dont->dirty_bitmap) kvm_destroy_dirty_bitmap(free); - kvm_arch_free_memslot(free, dont); + kvm_arch_free_memslot(kvm, free, dont); free->npages = 0; } @@ -558,7 +558,7 @@ void kvm_free_physmem(struct kvm *kvm) struct kvm_memory_slot *memslot; kvm_for_each_memslot(memslot, slots) - kvm_free_physmem_slot(memslot, NULL); + kvm_free_physmem_slot(kvm, memslot, NULL); kfree(kvm->memslots); } @@ -822,7 +822,7 @@ int __kvm_set_memory_region(struct kvm *kvm, if (change == KVM_MR_CREATE) { new.userspace_addr = mem->userspace_addr; - if (kvm_arch_create_memslot(&new, npages)) + if (kvm_arch_create_memslot(kvm, &new, npages)) goto out_free; } @@ -898,7 +898,7 @@ int __kvm_set_memory_region(struct kvm *kvm, kvm_arch_commit_memory_region(kvm, mem, &old, change); - kvm_free_physmem_slot(&old, &new); + kvm_free_physmem_slot(kvm, &old, &new); kfree(old_memslots); return 0; @@ -906,7 +906,7 @@ int __kvm_set_memory_region(struct kvm *kvm, out_slots: kfree(slots); out_free: - kvm_free_physmem_slot(&new, &old); + kvm_free_physmem_slot(kvm, &new, &old); out: return r; } From 46a037fd36656b8283c282319876dd8e9c4f1087 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 8 Oct 2013 18:38:13 +0100 Subject: [PATCH 1167/1368] ARM: KVM: Yield CPU when vcpu executes a WFE On an (even slightly) oversubscribed system, spinlocks are quickly becoming a bottleneck, as some vcpus are spinning, waiting for a lock to be released, while the vcpu holding the lock may not be running at all. This creates contention, and the observed slowdown is 40x for hackbench. No, this isn't a typo. The solution is to trap blocking WFEs and tell KVM that we're now spinning. This ensures that other vpus will get a scheduling boost, allowing the lock to be released more quickly. Also, using CONFIG_HAVE_KVM_CPU_RELAX_INTERCEPT slightly improves the performance when the VM is severely overcommited. Quick test to estimate the performance: hackbench 1 process 1000 2xA15 host (baseline): 1.843s 2xA15 guest w/o patch: 2.083s 4xA15 guest w/o patch: 80.212s 8xA15 guest w/o patch: Could not be bothered to find out 2xA15 guest w/ patch: 2.102s 4xA15 guest w/ patch: 3.205s 8xA15 guest w/ patch: 6.887s So we go from a 40x degradation to 1.5x in the 2x overcommit case, which is vaguely more acceptable. Signed-off-by: Marc Zyngier Signed-off-by: Christoffer Dall (cherry picked from commit 58d5ec8f8ee318b26b29207874fbaee626973952) Signed-off-by: Christoffer Dall --- arch/arm/include/asm/kvm_arm.h | 4 +++- arch/arm/kvm/Kconfig | 1 + arch/arm/kvm/handle_exit.c | 6 +++++- 3 files changed, 9 insertions(+), 2 deletions(-) diff --git a/arch/arm/include/asm/kvm_arm.h b/arch/arm/include/asm/kvm_arm.h index d556f03bca17..fe395b7b1ce2 100644 --- a/arch/arm/include/asm/kvm_arm.h +++ b/arch/arm/include/asm/kvm_arm.h @@ -67,7 +67,7 @@ */ #define HCR_GUEST_MASK (HCR_TSC | HCR_TSW | HCR_TWI | HCR_VM | HCR_BSU_IS | \ HCR_FB | HCR_TAC | HCR_AMO | HCR_IMO | HCR_FMO | \ - HCR_SWIO | HCR_TIDCP) + HCR_TWE | HCR_SWIO | HCR_TIDCP) #define HCR_VIRT_EXCP_MASK (HCR_VA | HCR_VI | HCR_VF) /* System Control Register (SCTLR) bits */ @@ -208,6 +208,8 @@ #define HSR_EC_DABT (0x24) #define HSR_EC_DABT_HYP (0x25) +#define HSR_WFI_IS_WFE (1U << 0) + #define HSR_HVC_IMM_MASK ((1UL << 16) - 1) #define HSR_DABT_S1PTW (1U << 7) diff --git a/arch/arm/kvm/Kconfig b/arch/arm/kvm/Kconfig index ebf5015508b5..466bd299b1a8 100644 --- a/arch/arm/kvm/Kconfig +++ b/arch/arm/kvm/Kconfig @@ -20,6 +20,7 @@ config KVM bool "Kernel-based Virtual Machine (KVM) support" select PREEMPT_NOTIFIERS select ANON_INODES + select HAVE_KVM_CPU_RELAX_INTERCEPT select KVM_MMIO select KVM_ARM_HOST depends on ARM_VIRT_EXT && ARM_LPAE diff --git a/arch/arm/kvm/handle_exit.c b/arch/arm/kvm/handle_exit.c index df4c82d47ad7..c4c496f7619c 100644 --- a/arch/arm/kvm/handle_exit.c +++ b/arch/arm/kvm/handle_exit.c @@ -84,7 +84,11 @@ static int handle_dabt_hyp(struct kvm_vcpu *vcpu, struct kvm_run *run) static int kvm_handle_wfi(struct kvm_vcpu *vcpu, struct kvm_run *run) { trace_kvm_wfi(*vcpu_pc(vcpu)); - kvm_vcpu_block(vcpu); + if (kvm_vcpu_get_hsr(vcpu) & HSR_WFI_IS_WFE) + kvm_vcpu_on_spin(vcpu); + else + kvm_vcpu_block(vcpu); + return 1; } From 1391c263d202d6ce8b74f361cd3607c69ae26e77 Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Tue, 15 Oct 2013 18:10:42 -0700 Subject: [PATCH 1168/1368] KVM: ARM: Update comments for kvm_handle_wfi Update comments to reflect what is really going on and add the TWE bit to the comments in kvm_arm.h. Also renames the function to kvm_handle_wfx like is done on arm64 for consistency and uber-correctness. Signed-off-by: Christoffer Dall (cherry picked from commit 86ed81aa2e1ce05a4e7f0819f0dfc34e8d8fb910) Signed-off-by: Christoffer Dall --- arch/arm/include/asm/kvm_arm.h | 1 + arch/arm/kvm/handle_exit.c | 14 ++++++++------ 2 files changed, 9 insertions(+), 6 deletions(-) diff --git a/arch/arm/include/asm/kvm_arm.h b/arch/arm/include/asm/kvm_arm.h index fe395b7b1ce2..1d3153c7eb41 100644 --- a/arch/arm/include/asm/kvm_arm.h +++ b/arch/arm/include/asm/kvm_arm.h @@ -57,6 +57,7 @@ * TSC: Trap SMC * TSW: Trap cache operations by set/way * TWI: Trap WFI + * TWE: Trap WFE * TIDCP: Trap L2CTLR/L2ECTLR * BSU_IS: Upgrade barriers to the inner shareable domain * FB: Force broadcast of all maintainance operations diff --git a/arch/arm/kvm/handle_exit.c b/arch/arm/kvm/handle_exit.c index c4c496f7619c..a92079011a83 100644 --- a/arch/arm/kvm/handle_exit.c +++ b/arch/arm/kvm/handle_exit.c @@ -73,15 +73,17 @@ static int handle_dabt_hyp(struct kvm_vcpu *vcpu, struct kvm_run *run) } /** - * kvm_handle_wfi - handle a wait-for-interrupts instruction executed by a guest + * kvm_handle_wfx - handle a WFI or WFE instructions trapped in guests * @vcpu: the vcpu pointer * @run: the kvm_run structure pointer * - * Simply sets the wait_for_interrupts flag on the vcpu structure, which will - * halt execution of world-switches and schedule other host processes until - * there is an incoming IRQ or FIQ to the VM. + * WFE: Yield the CPU and come back to this vcpu when the scheduler + * decides to. + * WFI: Simply call kvm_vcpu_block(), which will halt execution of + * world-switches and schedule other host processes until there is an + * incoming IRQ or FIQ to the VM. */ -static int kvm_handle_wfi(struct kvm_vcpu *vcpu, struct kvm_run *run) +static int kvm_handle_wfx(struct kvm_vcpu *vcpu, struct kvm_run *run) { trace_kvm_wfi(*vcpu_pc(vcpu)); if (kvm_vcpu_get_hsr(vcpu) & HSR_WFI_IS_WFE) @@ -93,7 +95,7 @@ static int kvm_handle_wfi(struct kvm_vcpu *vcpu, struct kvm_run *run) } static exit_handle_fn arm_exit_handlers[] = { - [HSR_EC_WFI] = kvm_handle_wfi, + [HSR_EC_WFI] = kvm_handle_wfx, [HSR_EC_CP15_32] = kvm_handle_cp15_32, [HSR_EC_CP15_64] = kvm_handle_cp15_64, [HSR_EC_CP14_MR] = kvm_handle_cp14_access, From cd709bb9fb35e687107761d5feed331d4c2c48e5 Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Thu, 1 Nov 2012 17:14:45 +0100 Subject: [PATCH 1169/1368] KVM: ARM: Support hugetlbfs backed huge pages Support huge pages in KVM/ARM and KVM/ARM64. The pud_huge checking on the unmap path may feel a bit silly as the pud_huge check is always defined to false, but the compiler should be smart about this. Note: This deals only with VMAs marked as huge which are allocated by users through hugetlbfs only. Transparent huge pages can only be detected by looking at the underlying pages (or the page tables themselves) and this patch so far simply maps these on a page-by-page level in the Stage-2 page tables. Cc: Catalin Marinas Cc: Russell King Acked-by: Catalin Marinas Acked-by: Marc Zyngier Signed-off-by: Christoffer Dall (cherry picked from commit ad361f093c1e31d0b43946210a32ab4ff5c49850) Signed-off-by: Christoffer Dall --- arch/arm/include/asm/kvm_mmu.h | 17 ++- arch/arm/include/asm/pgtable-3level.h | 2 + arch/arm/kvm/mmu.c | 169 +++++++++++++++++++------ arch/arm64/include/asm/kvm_mmu.h | 12 +- arch/arm64/include/asm/pgtable-hwdef.h | 2 + 5 files changed, 158 insertions(+), 44 deletions(-) diff --git a/arch/arm/include/asm/kvm_mmu.h b/arch/arm/include/asm/kvm_mmu.h index 9b28c41f4ba9..77de4a41cc50 100644 --- a/arch/arm/include/asm/kvm_mmu.h +++ b/arch/arm/include/asm/kvm_mmu.h @@ -62,6 +62,12 @@ phys_addr_t kvm_get_idmap_vector(void); int kvm_mmu_init(void); void kvm_clear_hyp_idmap(void); +static inline void kvm_set_pmd(pmd_t *pmd, pmd_t new_pmd) +{ + *pmd = new_pmd; + flush_pmd_entry(pmd); +} + static inline void kvm_set_pte(pte_t *pte, pte_t new_pte) { *pte = new_pte; @@ -103,9 +109,15 @@ static inline void kvm_set_s2pte_writable(pte_t *pte) pte_val(*pte) |= L_PTE_S2_RDWR; } +static inline void kvm_set_s2pmd_writable(pmd_t *pmd) +{ + pmd_val(*pmd) |= L_PMD_S2_RDWR; +} + struct kvm; -static inline void coherent_icache_guest_page(struct kvm *kvm, gfn_t gfn) +static inline void coherent_icache_guest_page(struct kvm *kvm, hva_t hva, + unsigned long size) { /* * If we are going to insert an instruction page and the icache is @@ -120,8 +132,7 @@ static inline void coherent_icache_guest_page(struct kvm *kvm, gfn_t gfn) * need any kind of flushing (DDI 0406C.b - Page B3-1392). */ if (icache_is_pipt()) { - unsigned long hva = gfn_to_hva(kvm, gfn); - __cpuc_coherent_user_range(hva, hva + PAGE_SIZE); + __cpuc_coherent_user_range(hva, hva + size); } else if (!icache_is_vivt_asid_tagged()) { /* any kind of VIPT cache */ __flush_icache_all(); diff --git a/arch/arm/include/asm/pgtable-3level.h b/arch/arm/include/asm/pgtable-3level.h index 86b8fe398b95..ad52938bb264 100644 --- a/arch/arm/include/asm/pgtable-3level.h +++ b/arch/arm/include/asm/pgtable-3level.h @@ -113,6 +113,8 @@ #define L_PTE_S2_RDONLY (_AT(pteval_t, 1) << 6) /* HAP[1] */ #define L_PTE_S2_RDWR (_AT(pteval_t, 3) << 6) /* HAP[2:1] */ +#define L_PMD_S2_RDWR (_AT(pmdval_t, 3) << 6) /* HAP[2:1] */ + /* * Hyp-mode PL2 PTE definitions for LPAE. */ diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c index b0de86b56c13..745d8b1630cc 100644 --- a/arch/arm/kvm/mmu.c +++ b/arch/arm/kvm/mmu.c @@ -19,6 +19,7 @@ #include #include #include +#include #include #include #include @@ -41,6 +42,8 @@ static unsigned long hyp_idmap_start; static unsigned long hyp_idmap_end; static phys_addr_t hyp_idmap_vector; +#define kvm_pmd_huge(_x) (pmd_huge(_x)) + static void kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa) { /* @@ -93,19 +96,29 @@ static bool page_empty(void *ptr) static void clear_pud_entry(struct kvm *kvm, pud_t *pud, phys_addr_t addr) { - pmd_t *pmd_table = pmd_offset(pud, 0); - pud_clear(pud); - kvm_tlb_flush_vmid_ipa(kvm, addr); - pmd_free(NULL, pmd_table); + if (pud_huge(*pud)) { + pud_clear(pud); + kvm_tlb_flush_vmid_ipa(kvm, addr); + } else { + pmd_t *pmd_table = pmd_offset(pud, 0); + pud_clear(pud); + kvm_tlb_flush_vmid_ipa(kvm, addr); + pmd_free(NULL, pmd_table); + } put_page(virt_to_page(pud)); } static void clear_pmd_entry(struct kvm *kvm, pmd_t *pmd, phys_addr_t addr) { - pte_t *pte_table = pte_offset_kernel(pmd, 0); - pmd_clear(pmd); - kvm_tlb_flush_vmid_ipa(kvm, addr); - pte_free_kernel(NULL, pte_table); + if (kvm_pmd_huge(*pmd)) { + pmd_clear(pmd); + kvm_tlb_flush_vmid_ipa(kvm, addr); + } else { + pte_t *pte_table = pte_offset_kernel(pmd, 0); + pmd_clear(pmd); + kvm_tlb_flush_vmid_ipa(kvm, addr); + pte_free_kernel(NULL, pte_table); + } put_page(virt_to_page(pmd)); } @@ -136,18 +149,32 @@ static void unmap_range(struct kvm *kvm, pgd_t *pgdp, continue; } + if (pud_huge(*pud)) { + /* + * If we are dealing with a huge pud, just clear it and + * move on. + */ + clear_pud_entry(kvm, pud, addr); + addr = pud_addr_end(addr, end); + continue; + } + pmd = pmd_offset(pud, addr); if (pmd_none(*pmd)) { addr = pmd_addr_end(addr, end); continue; } - pte = pte_offset_kernel(pmd, addr); - clear_pte_entry(kvm, pte, addr); - next = addr + PAGE_SIZE; + if (!kvm_pmd_huge(*pmd)) { + pte = pte_offset_kernel(pmd, addr); + clear_pte_entry(kvm, pte, addr); + next = addr + PAGE_SIZE; + } - /* If we emptied the pte, walk back up the ladder */ - if (page_empty(pte)) { + /* + * If the pmd entry is to be cleared, walk back up the ladder + */ + if (kvm_pmd_huge(*pmd) || page_empty(pte)) { clear_pmd_entry(kvm, pmd, addr); next = pmd_addr_end(addr, end); if (page_empty(pmd) && !page_empty(pud)) { @@ -420,29 +447,71 @@ void kvm_free_stage2_pgd(struct kvm *kvm) kvm->arch.pgd = NULL; } - -static int stage2_set_pte(struct kvm *kvm, struct kvm_mmu_memory_cache *cache, - phys_addr_t addr, const pte_t *new_pte, bool iomap) +static pmd_t *stage2_get_pmd(struct kvm *kvm, struct kvm_mmu_memory_cache *cache, + phys_addr_t addr) { pgd_t *pgd; pud_t *pud; pmd_t *pmd; - pte_t *pte, old_pte; - /* Create 2nd stage page table mapping - Level 1 */ pgd = kvm->arch.pgd + pgd_index(addr); pud = pud_offset(pgd, addr); if (pud_none(*pud)) { if (!cache) - return 0; /* ignore calls from kvm_set_spte_hva */ + return NULL; pmd = mmu_memory_cache_alloc(cache); pud_populate(NULL, pud, pmd); get_page(virt_to_page(pud)); } - pmd = pmd_offset(pud, addr); + return pmd_offset(pud, addr); +} - /* Create 2nd stage page table mapping - Level 2 */ +static int stage2_set_pmd_huge(struct kvm *kvm, struct kvm_mmu_memory_cache + *cache, phys_addr_t addr, const pmd_t *new_pmd) +{ + pmd_t *pmd, old_pmd; + + pmd = stage2_get_pmd(kvm, cache, addr); + VM_BUG_ON(!pmd); + + /* + * Mapping in huge pages should only happen through a fault. If a + * page is merged into a transparent huge page, the individual + * subpages of that huge page should be unmapped through MMU + * notifiers before we get here. + * + * Merging of CompoundPages is not supported; they should become + * splitting first, unmapped, merged, and mapped back in on-demand. + */ + VM_BUG_ON(pmd_present(*pmd) && pmd_pfn(*pmd) != pmd_pfn(*new_pmd)); + + old_pmd = *pmd; + kvm_set_pmd(pmd, *new_pmd); + if (pmd_present(old_pmd)) + kvm_tlb_flush_vmid_ipa(kvm, addr); + else + get_page(virt_to_page(pmd)); + return 0; +} + +static int stage2_set_pte(struct kvm *kvm, struct kvm_mmu_memory_cache *cache, + phys_addr_t addr, const pte_t *new_pte, bool iomap) +{ + pmd_t *pmd; + pte_t *pte, old_pte; + + /* Create stage-2 page table mapping - Level 1 */ + pmd = stage2_get_pmd(kvm, cache, addr); + if (!pmd) { + /* + * Ignore calls from kvm_set_spte_hva for unallocated + * address ranges. + */ + return 0; + } + + /* Create stage-2 page mappings - Level 2 */ if (pmd_none(*pmd)) { if (!cache) return 0; /* ignore calls from kvm_set_spte_hva */ @@ -508,15 +577,18 @@ int kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t guest_ipa, } static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, - gfn_t gfn, struct kvm_memory_slot *memslot, + struct kvm_memory_slot *memslot, unsigned long fault_status) { - pte_t new_pte; - pfn_t pfn; int ret; - bool write_fault, writable; + bool write_fault, writable, hugetlb = false; unsigned long mmu_seq; + gfn_t gfn = fault_ipa >> PAGE_SHIFT; + unsigned long hva = gfn_to_hva(vcpu->kvm, gfn); + struct kvm *kvm = vcpu->kvm; struct kvm_mmu_memory_cache *memcache = &vcpu->arch.mmu_page_cache; + struct vm_area_struct *vma; + pfn_t pfn; write_fault = kvm_is_write_fault(kvm_vcpu_get_hsr(vcpu)); if (fault_status == FSC_PERM && !write_fault) { @@ -524,6 +596,15 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, return -EFAULT; } + /* Let's check if we will get back a huge page backed by hugetlbfs */ + down_read(¤t->mm->mmap_sem); + vma = find_vma_intersection(current->mm, hva, hva + 1); + if (is_vm_hugetlb_page(vma)) { + hugetlb = true; + gfn = (fault_ipa & PMD_MASK) >> PAGE_SHIFT; + } + up_read(¤t->mm->mmap_sem); + /* We need minimum second+third level pages */ ret = mmu_topup_memory_cache(memcache, 2, KVM_NR_MEM_OBJS); if (ret) @@ -541,26 +622,38 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, */ smp_rmb(); - pfn = gfn_to_pfn_prot(vcpu->kvm, gfn, write_fault, &writable); + pfn = gfn_to_pfn_prot(kvm, gfn, write_fault, &writable); if (is_error_pfn(pfn)) return -EFAULT; - new_pte = pfn_pte(pfn, PAGE_S2); - coherent_icache_guest_page(vcpu->kvm, gfn); - - spin_lock(&vcpu->kvm->mmu_lock); - if (mmu_notifier_retry(vcpu->kvm, mmu_seq)) + spin_lock(&kvm->mmu_lock); + if (mmu_notifier_retry(kvm, mmu_seq)) goto out_unlock; - if (writable) { - kvm_set_s2pte_writable(&new_pte); - kvm_set_pfn_dirty(pfn); + + if (hugetlb) { + pmd_t new_pmd = pfn_pmd(pfn, PAGE_S2); + new_pmd = pmd_mkhuge(new_pmd); + if (writable) { + kvm_set_s2pmd_writable(&new_pmd); + kvm_set_pfn_dirty(pfn); + } + coherent_icache_guest_page(kvm, hva & PMD_MASK, PMD_SIZE); + ret = stage2_set_pmd_huge(kvm, memcache, fault_ipa, &new_pmd); + } else { + pte_t new_pte = pfn_pte(pfn, PAGE_S2); + if (writable) { + kvm_set_s2pte_writable(&new_pte); + kvm_set_pfn_dirty(pfn); + } + coherent_icache_guest_page(kvm, hva, PAGE_SIZE); + ret = stage2_set_pte(kvm, memcache, fault_ipa, &new_pte, false); } - stage2_set_pte(vcpu->kvm, memcache, fault_ipa, &new_pte, false); + out_unlock: - spin_unlock(&vcpu->kvm->mmu_lock); + spin_unlock(&kvm->mmu_lock); kvm_release_pfn_clean(pfn); - return 0; + return ret; } /** @@ -629,7 +722,7 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run) memslot = gfn_to_memslot(vcpu->kvm, gfn); - ret = user_mem_abort(vcpu, fault_ipa, gfn, memslot, fault_status); + ret = user_mem_abort(vcpu, fault_ipa, memslot, fault_status); if (ret == 0) ret = 1; out_unlock: diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h index efe609c6a3c9..680f74e67497 100644 --- a/arch/arm64/include/asm/kvm_mmu.h +++ b/arch/arm64/include/asm/kvm_mmu.h @@ -91,6 +91,7 @@ int kvm_mmu_init(void); void kvm_clear_hyp_idmap(void); #define kvm_set_pte(ptep, pte) set_pte(ptep, pte) +#define kvm_set_pmd(pmdp, pmd) set_pmd(pmdp, pmd) static inline bool kvm_is_write_fault(unsigned long esr) { @@ -116,13 +117,18 @@ static inline void kvm_set_s2pte_writable(pte_t *pte) pte_val(*pte) |= PTE_S2_RDWR; } +static inline void kvm_set_s2pmd_writable(pmd_t *pmd) +{ + pmd_val(*pmd) |= PMD_S2_RDWR; +} + struct kvm; -static inline void coherent_icache_guest_page(struct kvm *kvm, gfn_t gfn) +static inline void coherent_icache_guest_page(struct kvm *kvm, hva_t hva, + unsigned long size) { if (!icache_is_aliasing()) { /* PIPT */ - unsigned long hva = gfn_to_hva(kvm, gfn); - flush_icache_range(hva, hva + PAGE_SIZE); + flush_icache_range(hva, hva + size); } else if (!icache_is_aivivt()) { /* non ASID-tagged VIVT */ /* any kind of VIPT cache */ __flush_icache_all(); diff --git a/arch/arm64/include/asm/pgtable-hwdef.h b/arch/arm64/include/asm/pgtable-hwdef.h index 2e9d83673ef6..b1d2e26c3c88 100644 --- a/arch/arm64/include/asm/pgtable-hwdef.h +++ b/arch/arm64/include/asm/pgtable-hwdef.h @@ -85,6 +85,8 @@ #define PTE_S2_RDONLY (_AT(pteval_t, 1) << 6) /* HAP[2:1] */ #define PTE_S2_RDWR (_AT(pteval_t, 3) << 6) /* HAP[2:1] */ +#define PMD_S2_RDWR (_AT(pmdval_t, 3) << 6) /* HAP[2:1] */ + /* * Memory Attribute override for Stage-2 (MemAttr[3:0]) */ From ad8b3ca795b8534c292e8916ffdfc36b4bd09940 Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Wed, 2 Oct 2013 15:32:01 -0700 Subject: [PATCH 1170/1368] KVM: ARM: Transparent huge page (THP) support Support transparent huge pages in KVM/ARM and KVM/ARM64. The transparent_hugepage_adjust is not very pretty, but this is also how it's solved on x86 and seems to be simply an artifact on how THPs behave. This should eventually be shared across architectures if possible, but that can always be changed down the road. Acked-by: Marc Zyngier Signed-off-by: Christoffer Dall (cherry picked from commit 9b5fdb9781f74fb15827e465bfb5aa63211953c8) Signed-off-by: Christoffer Dall --- arch/arm/kvm/mmu.c | 58 ++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 56 insertions(+), 2 deletions(-) diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c index 745d8b1630cc..371958370de4 100644 --- a/arch/arm/kvm/mmu.c +++ b/arch/arm/kvm/mmu.c @@ -42,7 +42,7 @@ static unsigned long hyp_idmap_start; static unsigned long hyp_idmap_end; static phys_addr_t hyp_idmap_vector; -#define kvm_pmd_huge(_x) (pmd_huge(_x)) +#define kvm_pmd_huge(_x) (pmd_huge(_x) || pmd_trans_huge(_x)) static void kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa) { @@ -576,12 +576,53 @@ int kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t guest_ipa, return ret; } +static bool transparent_hugepage_adjust(pfn_t *pfnp, phys_addr_t *ipap) +{ + pfn_t pfn = *pfnp; + gfn_t gfn = *ipap >> PAGE_SHIFT; + + if (PageTransCompound(pfn_to_page(pfn))) { + unsigned long mask; + /* + * The address we faulted on is backed by a transparent huge + * page. However, because we map the compound huge page and + * not the individual tail page, we need to transfer the + * refcount to the head page. We have to be careful that the + * THP doesn't start to split while we are adjusting the + * refcounts. + * + * We are sure this doesn't happen, because mmu_notifier_retry + * was successful and we are holding the mmu_lock, so if this + * THP is trying to split, it will be blocked in the mmu + * notifier before touching any of the pages, specifically + * before being able to call __split_huge_page_refcount(). + * + * We can therefore safely transfer the refcount from PG_tail + * to PG_head and switch the pfn from a tail page to the head + * page accordingly. + */ + mask = PTRS_PER_PMD - 1; + VM_BUG_ON((gfn & mask) != (pfn & mask)); + if (pfn & mask) { + *ipap &= PMD_MASK; + kvm_release_pfn_clean(pfn); + pfn &= ~mask; + kvm_get_pfn(pfn); + *pfnp = pfn; + } + + return true; + } + + return false; +} + static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, struct kvm_memory_slot *memslot, unsigned long fault_status) { int ret; - bool write_fault, writable, hugetlb = false; + bool write_fault, writable, hugetlb = false, force_pte = false; unsigned long mmu_seq; gfn_t gfn = fault_ipa >> PAGE_SHIFT; unsigned long hva = gfn_to_hva(vcpu->kvm, gfn); @@ -602,6 +643,17 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, if (is_vm_hugetlb_page(vma)) { hugetlb = true; gfn = (fault_ipa & PMD_MASK) >> PAGE_SHIFT; + } else { + /* + * Pages belonging to VMAs not aligned to the PMD mapping + * granularity cannot be mapped using block descriptors even + * if the pages belong to a THP for the process, because the + * stage-2 block descriptor will cover more than a single THP + * and we loose atomicity for unmapping, updates, and splits + * of the THP or other pages in the stage-2 block range. + */ + if (vma->vm_start & ~PMD_MASK) + force_pte = true; } up_read(¤t->mm->mmap_sem); @@ -629,6 +681,8 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, spin_lock(&kvm->mmu_lock); if (mmu_notifier_retry(kvm, mmu_seq)) goto out_unlock; + if (!hugetlb && !force_pte) + hugetlb = transparent_hugepage_adjust(&pfn, &fault_ipa); if (hugetlb) { pmd_t new_pmd = pfn_pmd(pfn, PAGE_S2); From c0acda6fd7c83ca3cc8c5892247ece8c9651c9f3 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Fri, 18 Oct 2013 18:19:04 +0100 Subject: [PATCH 1171/1368] ARM: KVM: Fix MPIDR computing to support virtual clusters In order to be able to support more than 4 A7 or A15 CPUs, we need to fix the MPIDR computing to reflect the fact that both A15 and A7 can only exist in clusters of at most 4 CPUs. Fix the MPIDR computing to allow virtual clusters to be exposed to the guest. Signed-off-by: Marc Zyngier Signed-off-by: Christoffer Dall (cherry picked from commit 2d1d841bd44e24b58a3d3cc4fa793670aaa38fbf) Signed-off-by: Christoffer Dall --- arch/arm/kvm/coproc.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/arch/arm/kvm/coproc.c b/arch/arm/kvm/coproc.c index a629f2c1d0f9..631e6bd0e05f 100644 --- a/arch/arm/kvm/coproc.c +++ b/arch/arm/kvm/coproc.c @@ -74,11 +74,13 @@ int kvm_handle_cp14_access(struct kvm_vcpu *vcpu, struct kvm_run *run) static void reset_mpidr(struct kvm_vcpu *vcpu, const struct coproc_reg *r) { /* - * Compute guest MPIDR. No need to mess around with different clusters - * but we read the 'U' bit from the underlying hardware directly. + * Compute guest MPIDR. We build a virtual cluster out of the + * vcpu_id, but we read the 'U' bit from the underlying + * hardware directly. */ - vcpu->arch.cp15[c0_MPIDR] = (read_cpuid_mpidr() & MPIDR_SMP_BITMASK) - | vcpu->vcpu_id; + vcpu->arch.cp15[c0_MPIDR] = ((read_cpuid_mpidr() & MPIDR_SMP_BITMASK) | + ((vcpu->vcpu_id >> 2) << MPIDR_LEVEL_BITS) | + (vcpu->vcpu_id & 3)); } /* TRM entries A7:4.3.31 A15:4.3.28 - RO WI */ From 8c02aa50010946b98e5077e5e05b5d99e6286ab1 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Fri, 18 Oct 2013 18:19:05 +0100 Subject: [PATCH 1172/1368] ARM: KVM: fix L2CTLR to be per-cluster The L2CTLR register contains the number of CPUs in this cluster. Make sure the register content is actually relevant to the vcpu that is being configured by computing the number of cores that are part of its cluster. Signed-off-by: Marc Zyngier Signed-off-by: Christoffer Dall (cherry picked from commit 9cbb6d969cb6561de45d917b8bb9281cb374bb35) Signed-off-by: Christoffer Dall --- arch/arm/kvm/coproc.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/arm/kvm/coproc.c b/arch/arm/kvm/coproc.c index 631e6bd0e05f..78c0885d6501 100644 --- a/arch/arm/kvm/coproc.c +++ b/arch/arm/kvm/coproc.c @@ -124,6 +124,10 @@ static void reset_l2ctlr(struct kvm_vcpu *vcpu, const struct coproc_reg *r) asm volatile("mrc p15, 1, %0, c9, c0, 2\n" : "=r" (l2ctlr)); l2ctlr &= ~(3 << 24); ncores = atomic_read(&vcpu->kvm->online_vcpus) - 1; + /* How many cores in the current cluster and the next ones */ + ncores -= (vcpu->vcpu_id & ~3); + /* Cap it to the maximum number of cores in a single cluster */ + ncores = min(ncores, 3U); l2ctlr |= (ncores & 3) << 24; vcpu->arch.cp15[c9_L2CTLR] = l2ctlr; From c93a79267ff11590dd7835c6622bd9f29ef43e73 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Fri, 18 Oct 2013 18:19:06 +0100 Subject: [PATCH 1173/1368] ARM: KVM: drop limitation to 4 CPU VMs Now that the KVM/arm code knows about affinity, remove the hard limit of 4 vcpus per VM. Signed-off-by: Marc Zyngier Signed-off-by: Christoffer Dall (cherry picked from commit 7999b4d18211bcfb40e3574cf75e94518e9fa2c6) Signed-off-by: Christoffer Dall --- arch/arm/kvm/reset.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/arch/arm/kvm/reset.c b/arch/arm/kvm/reset.c index d153e64d1255..f558c073c023 100644 --- a/arch/arm/kvm/reset.c +++ b/arch/arm/kvm/reset.c @@ -33,8 +33,6 @@ * Cortex-A15 and Cortex-A7 Reset Values */ -static const int cortexa_max_cpu_idx = 3; - static struct kvm_regs cortexa_regs_reset = { .usr_regs.ARM_cpsr = SVC_MODE | PSR_A_BIT | PSR_I_BIT | PSR_F_BIT, }; @@ -64,8 +62,6 @@ int kvm_reset_vcpu(struct kvm_vcpu *vcpu) switch (vcpu->arch.target) { case KVM_ARM_TARGET_CORTEX_A7: case KVM_ARM_TARGET_CORTEX_A15: - if (vcpu->vcpu_id > cortexa_max_cpu_idx) - return -EINVAL; reset_regs = &cortexa_regs_reset; vcpu->arch.midr = read_cpuid_id(); cpu_vtimer_irq = &cortexa_vtimer_irq; From 3d6b7ab3028ceacadd0a29b4757a788e24987d10 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Fri, 18 Oct 2013 18:19:03 +0100 Subject: [PATCH 1174/1368] arm/arm64: KVM: PSCI: use MPIDR to identify a target CPU The KVM PSCI code blindly assumes that vcpu_id and MPIDR are the same thing. This is true when vcpus are organized as a flat topology, but is wrong when trying to emulate any other topology (such as A15 clusters). Change the KVM PSCI CPU_ON code to look at the MPIDR instead of the vcpu_id to pick a target CPU. Signed-off-by: Marc Zyngier Signed-off-by: Christoffer Dall (cherry picked from commit 79c648806f9034abf54332b78043bb242189d953) Signed-off-by: Christoffer Dall --- arch/arm/include/asm/kvm_emulate.h | 5 +++++ arch/arm/kvm/psci.c | 17 +++++++++++++---- arch/arm64/include/asm/kvm_emulate.h | 5 +++++ 3 files changed, 23 insertions(+), 4 deletions(-) diff --git a/arch/arm/include/asm/kvm_emulate.h b/arch/arm/include/asm/kvm_emulate.h index a464e8d7b6c5..708e4d8a647f 100644 --- a/arch/arm/include/asm/kvm_emulate.h +++ b/arch/arm/include/asm/kvm_emulate.h @@ -157,4 +157,9 @@ static inline u32 kvm_vcpu_hvc_get_imm(struct kvm_vcpu *vcpu) return kvm_vcpu_get_hsr(vcpu) & HSR_HVC_IMM_MASK; } +static inline unsigned long kvm_vcpu_get_mpidr(struct kvm_vcpu *vcpu) +{ + return vcpu->arch.cp15[c0_MPIDR]; +} + #endif /* __ARM_KVM_EMULATE_H__ */ diff --git a/arch/arm/kvm/psci.c b/arch/arm/kvm/psci.c index 86a693a02ba3..311263124acf 100644 --- a/arch/arm/kvm/psci.c +++ b/arch/arm/kvm/psci.c @@ -18,6 +18,7 @@ #include #include +#include #include #include @@ -34,22 +35,30 @@ static void kvm_psci_vcpu_off(struct kvm_vcpu *vcpu) static unsigned long kvm_psci_vcpu_on(struct kvm_vcpu *source_vcpu) { struct kvm *kvm = source_vcpu->kvm; - struct kvm_vcpu *vcpu; + struct kvm_vcpu *vcpu = NULL, *tmp; wait_queue_head_t *wq; unsigned long cpu_id; + unsigned long mpidr; phys_addr_t target_pc; + int i; cpu_id = *vcpu_reg(source_vcpu, 1); if (vcpu_mode_is_32bit(source_vcpu)) cpu_id &= ~((u32) 0); - if (cpu_id >= atomic_read(&kvm->online_vcpus)) + kvm_for_each_vcpu(i, tmp, kvm) { + mpidr = kvm_vcpu_get_mpidr(tmp); + if ((mpidr & MPIDR_HWID_BITMASK) == (cpu_id & MPIDR_HWID_BITMASK)) { + vcpu = tmp; + break; + } + } + + if (!vcpu) return KVM_PSCI_RET_INVAL; target_pc = *vcpu_reg(source_vcpu, 2); - vcpu = kvm_get_vcpu(kvm, cpu_id); - wq = kvm_arch_vcpu_wq(vcpu); if (!waitqueue_active(wq)) return KVM_PSCI_RET_INVAL; diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h index eec073875218..6df93cdc652b 100644 --- a/arch/arm64/include/asm/kvm_emulate.h +++ b/arch/arm64/include/asm/kvm_emulate.h @@ -177,4 +177,9 @@ static inline u8 kvm_vcpu_trap_get_fault(const struct kvm_vcpu *vcpu) return kvm_vcpu_get_hsr(vcpu) & ESR_EL2_FSC_TYPE; } +static inline unsigned long kvm_vcpu_get_mpidr(struct kvm_vcpu *vcpu) +{ + return vcpu_sys_reg(vcpu, MPIDR_EL1); +} + #endif /* __ARM64_KVM_EMULATE_H__ */ From b695de83412eb8ff7c77b215fa24bca5b1871618 Mon Sep 17 00:00:00 2001 From: Yang Zhang Date: Thu, 24 Oct 2013 09:56:39 +0800 Subject: [PATCH 1175/1368] KVM: Mapping IOMMU pages after updating memslot In kvm_iommu_map_pages(), we need to know the page size via call kvm_host_page_size(). And it will check whether the target slot is valid before return the right page size. Currently, we will map the iommu pages when creating a new slot. But we call kvm_iommu_map_pages() during preparing the new slot. At that time, the new slot is not visible by domain(still in preparing). So we cannot get the right page size from kvm_host_page_size() and this will break the IOMMU super page logic. The solution is to map the iommu pages after we insert the new slot into domain. Signed-off-by: Yang Zhang Tested-by: Patrick Lu Signed-off-by: Paolo Bonzini (cherry picked from commit e0230e1327fb862c9b6cde24ae62d55f9db62c9b) Signed-off-by: Christoffer Dall --- virt/kvm/kvm_main.c | 29 ++++++++++++++--------------- 1 file changed, 14 insertions(+), 15 deletions(-) diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index c777a6e582f0..340c97cf55b6 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -873,21 +873,6 @@ int __kvm_set_memory_region(struct kvm *kvm, goto out_free; } - /* - * IOMMU mapping: New slots need to be mapped. Old slots need to be - * un-mapped and re-mapped if their base changes. Since base change - * unmapping is handled above with slot deletion, mapping alone is - * needed here. Anything else the iommu might care about for existing - * slots (size changes, userspace addr changes and read-only flag - * changes) is disallowed above, so any other attribute changes getting - * here can be skipped. - */ - if ((change == KVM_MR_CREATE) || (change == KVM_MR_MOVE)) { - r = kvm_iommu_map_pages(kvm, &new); - if (r) - goto out_slots; - } - /* actual memory is freed via old in kvm_free_physmem_slot below */ if (change == KVM_MR_DELETE) { new.dirty_bitmap = NULL; @@ -901,6 +886,20 @@ int __kvm_set_memory_region(struct kvm *kvm, kvm_free_physmem_slot(kvm, &old, &new); kfree(old_memslots); + /* + * IOMMU mapping: New slots need to be mapped. Old slots need to be + * un-mapped and re-mapped if their base changes. Since base change + * unmapping is handled above with slot deletion, mapping alone is + * needed here. Anything else the iommu might care about for existing + * slots (size changes, userspace addr changes and read-only flag + * changes) is disallowed above, so any other attribute changes getting + * here can be skipped. + */ + if ((change == KVM_MR_CREATE) || (change == KVM_MR_MOVE)) { + r = kvm_iommu_map_pages(kvm, &new); + return r; + } + return 0; out_slots: From c0cdef185a6b5c8f3c5e6b923e95774c36764011 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Fri, 2 Aug 2013 11:41:13 +0100 Subject: [PATCH 1176/1368] arm64: KVM: Yield CPU when vcpu executes a WFE On an (even slightly) oversubscribed system, spinlocks are quickly becoming a bottleneck, as some vcpus are spinning, waiting for a lock to be released, while the vcpu holding the lock may not be running at all. The solution is to trap blocking WFEs and tell KVM that we're now spinning. This ensures that other vpus will get a scheduling boost, allowing the lock to be released more quickly. Also, using CONFIG_HAVE_KVM_CPU_RELAX_INTERCEPT slightly improves the performance when the VM is severely overcommited. Acked-by: Christoffer Dall Signed-off-by: Marc Zyngier (cherry picked from commit d241aac798eb042e605f78c31a4122e583b2cd13) Signed-off-by: Christoffer Dall --- arch/arm64/include/asm/kvm_arm.h | 8 ++++++-- arch/arm64/kvm/Kconfig | 1 + arch/arm64/kvm/handle_exit.c | 18 +++++++++++++----- 3 files changed, 20 insertions(+), 7 deletions(-) diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h index a5f28e2720c7..c98ef4771c73 100644 --- a/arch/arm64/include/asm/kvm_arm.h +++ b/arch/arm64/include/asm/kvm_arm.h @@ -63,6 +63,7 @@ * TAC: Trap ACTLR * TSC: Trap SMC * TSW: Trap cache operations by set/way + * TWE: Trap WFE * TWI: Trap WFI * TIDCP: Trap L2CTLR/L2ECTLR * BSU_IS: Upgrade barriers to the inner shareable domain @@ -72,8 +73,9 @@ * FMO: Override CPSR.F and enable signaling with VF * SWIO: Turn set/way invalidates into set/way clean+invalidate */ -#define HCR_GUEST_FLAGS (HCR_TSC | HCR_TSW | HCR_TWI | HCR_VM | HCR_BSU_IS | \ - HCR_FB | HCR_TAC | HCR_AMO | HCR_IMO | HCR_FMO | \ +#define HCR_GUEST_FLAGS (HCR_TSC | HCR_TSW | HCR_TWE | HCR_TWI | HCR_VM | \ + HCR_BSU_IS | HCR_FB | HCR_TAC | \ + HCR_AMO | HCR_IMO | HCR_FMO | \ HCR_SWIO | HCR_TIDCP | HCR_RW) #define HCR_VIRT_EXCP_MASK (HCR_VA | HCR_VI | HCR_VF) @@ -242,4 +244,6 @@ #define ESR_EL2_EC_xABT_xFSR_EXTABT 0x10 +#define ESR_EL2_EC_WFI_ISS_WFE (1 << 0) + #endif /* __ARM64_KVM_ARM_H__ */ diff --git a/arch/arm64/kvm/Kconfig b/arch/arm64/kvm/Kconfig index 21e90820bd23..4480ab339a00 100644 --- a/arch/arm64/kvm/Kconfig +++ b/arch/arm64/kvm/Kconfig @@ -21,6 +21,7 @@ config KVM select MMU_NOTIFIER select PREEMPT_NOTIFIERS select ANON_INODES + select HAVE_KVM_CPU_RELAX_INTERCEPT select KVM_MMIO select KVM_ARM_HOST select KVM_ARM_VGIC diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c index 9beaca033437..8da56067c304 100644 --- a/arch/arm64/kvm/handle_exit.c +++ b/arch/arm64/kvm/handle_exit.c @@ -47,21 +47,29 @@ static int handle_smc(struct kvm_vcpu *vcpu, struct kvm_run *run) } /** - * kvm_handle_wfi - handle a wait-for-interrupts instruction executed by a guest + * kvm_handle_wfx - handle a wait-for-interrupts or wait-for-event + * instruction executed by a guest + * * @vcpu: the vcpu pointer * - * Simply call kvm_vcpu_block(), which will halt execution of + * WFE: Yield the CPU and come back to this vcpu when the scheduler + * decides to. + * WFI: Simply call kvm_vcpu_block(), which will halt execution of * world-switches and schedule other host processes until there is an * incoming IRQ or FIQ to the VM. */ -static int kvm_handle_wfi(struct kvm_vcpu *vcpu, struct kvm_run *run) +static int kvm_handle_wfx(struct kvm_vcpu *vcpu, struct kvm_run *run) { - kvm_vcpu_block(vcpu); + if (kvm_vcpu_get_hsr(vcpu) & ESR_EL2_EC_WFI_ISS_WFE) + kvm_vcpu_on_spin(vcpu); + else + kvm_vcpu_block(vcpu); + return 1; } static exit_handle_fn arm_exit_handlers[] = { - [ESR_EL2_EC_WFI] = kvm_handle_wfi, + [ESR_EL2_EC_WFI] = kvm_handle_wfx, [ESR_EL2_EC_CP15_32] = kvm_handle_cp15_32, [ESR_EL2_EC_CP15_64] = kvm_handle_cp15_64, [ESR_EL2_EC_CP14_MR] = kvm_handle_cp14_access, From e845f9d367f2ac8197a142fd29d1baa8fcc4dd75 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Wed, 30 Oct 2013 12:12:13 +0100 Subject: [PATCH 1177/1368] KVM: use a more sensible error number when debugfs directory creation fails I don't know if this was due to cut and paste, or somebody was really using a D20 to pick the error code for kvm_init_debugfs as suggested by Linus (EFAULT is 14, so the possibility cannot be entirely ruled out). In any case, this patch fixes it. Reported-by: Tim Gardner Signed-off-by: Paolo Bonzini (cherry picked from commit 0c8eb04a6241da28deb108181213b791c378123b) Signed-off-by: Christoffer Dall --- virt/kvm/kvm_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 340c97cf55b6..d6b7d797cb16 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -3010,7 +3010,7 @@ static const struct file_operations *stat_fops[] = { static int kvm_init_debug(void) { - int r = -EFAULT; + int r = -EEXIST; struct kvm_stats_debugfs_item *p; kvm_debugfs_dir = debugfs_create_dir("kvm", NULL); From b4fe3057a0c543ed42aa2a69a6ab9a5312571c34 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Sun, 22 Sep 2013 16:44:50 +0200 Subject: [PATCH 1178/1368] kvm: Add KVM_GET_EMULATED_CPUID Add a kvm ioctl which states which system functionality kvm emulates. The format used is that of CPUID and we return the corresponding CPUID bits set for which we do emulate functionality. Make sure ->padding is being passed on clean from userspace so that we can use it for something in the future, after the ioctl gets cast in stone. s/kvm_dev_ioctl_get_supported_cpuid/kvm_dev_ioctl_get_cpuid/ while at it. Signed-off-by: Borislav Petkov Signed-off-by: Paolo Bonzini (cherry picked from commit 9c15bb1d0a8411f9bb3395d21d5309bde7da0c1c) Signed-off-by: Christoffer Dall --- Documentation/virtual/kvm/api.txt | 77 +++++++++++++++++++++++++++++-- arch/x86/include/uapi/asm/kvm.h | 6 +-- arch/x86/kvm/cpuid.c | 57 ++++++++++++++++++++--- arch/x86/kvm/cpuid.h | 5 +- arch/x86/kvm/x86.c | 9 ++-- include/uapi/linux/kvm.h | 2 + 6 files changed, 139 insertions(+), 17 deletions(-) diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt index 9bfadeb8be31..d196ebe8956e 100644 --- a/Documentation/virtual/kvm/api.txt +++ b/Documentation/virtual/kvm/api.txt @@ -1122,9 +1122,9 @@ struct kvm_cpuid2 { struct kvm_cpuid_entry2 entries[0]; }; -#define KVM_CPUID_FLAG_SIGNIFCANT_INDEX 1 -#define KVM_CPUID_FLAG_STATEFUL_FUNC 2 -#define KVM_CPUID_FLAG_STATE_READ_NEXT 4 +#define KVM_CPUID_FLAG_SIGNIFCANT_INDEX BIT(0) +#define KVM_CPUID_FLAG_STATEFUL_FUNC BIT(1) +#define KVM_CPUID_FLAG_STATE_READ_NEXT BIT(2) struct kvm_cpuid_entry2 { __u32 function; @@ -2661,6 +2661,77 @@ and usually define the validity of a groups of registers. (e.g. one bit }; +4.81 KVM_GET_EMULATED_CPUID + +Capability: KVM_CAP_EXT_EMUL_CPUID +Architectures: x86 +Type: system ioctl +Parameters: struct kvm_cpuid2 (in/out) +Returns: 0 on success, -1 on error + +struct kvm_cpuid2 { + __u32 nent; + __u32 flags; + struct kvm_cpuid_entry2 entries[0]; +}; + +The member 'flags' is used for passing flags from userspace. + +#define KVM_CPUID_FLAG_SIGNIFCANT_INDEX BIT(0) +#define KVM_CPUID_FLAG_STATEFUL_FUNC BIT(1) +#define KVM_CPUID_FLAG_STATE_READ_NEXT BIT(2) + +struct kvm_cpuid_entry2 { + __u32 function; + __u32 index; + __u32 flags; + __u32 eax; + __u32 ebx; + __u32 ecx; + __u32 edx; + __u32 padding[3]; +}; + +This ioctl returns x86 cpuid features which are emulated by +kvm.Userspace can use the information returned by this ioctl to query +which features are emulated by kvm instead of being present natively. + +Userspace invokes KVM_GET_EMULATED_CPUID by passing a kvm_cpuid2 +structure with the 'nent' field indicating the number of entries in +the variable-size array 'entries'. If the number of entries is too low +to describe the cpu capabilities, an error (E2BIG) is returned. If the +number is too high, the 'nent' field is adjusted and an error (ENOMEM) +is returned. If the number is just right, the 'nent' field is adjusted +to the number of valid entries in the 'entries' array, which is then +filled. + +The entries returned are the set CPUID bits of the respective features +which kvm emulates, as returned by the CPUID instruction, with unknown +or unsupported feature bits cleared. + +Features like x2apic, for example, may not be present in the host cpu +but are exposed by kvm in KVM_GET_SUPPORTED_CPUID because they can be +emulated efficiently and thus not included here. + +The fields in each entry are defined as follows: + + function: the eax value used to obtain the entry + index: the ecx value used to obtain the entry (for entries that are + affected by ecx) + flags: an OR of zero or more of the following: + KVM_CPUID_FLAG_SIGNIFCANT_INDEX: + if the index field is valid + KVM_CPUID_FLAG_STATEFUL_FUNC: + if cpuid for this function returns different values for successive + invocations; there will be several entries with the same function, + all with this flag set + KVM_CPUID_FLAG_STATE_READ_NEXT: + for KVM_CPUID_FLAG_STATEFUL_FUNC entries, set if this entry is + the first entry to be read by a cpu + eax, ebx, ecx, edx: the values returned by the cpuid instruction for + this function/index combination + + 6. Capabilities that can be enabled ----------------------------------- diff --git a/arch/x86/include/uapi/asm/kvm.h b/arch/x86/include/uapi/asm/kvm.h index 5d9a3033b3d7..d3a87780c70b 100644 --- a/arch/x86/include/uapi/asm/kvm.h +++ b/arch/x86/include/uapi/asm/kvm.h @@ -211,9 +211,9 @@ struct kvm_cpuid_entry2 { __u32 padding[3]; }; -#define KVM_CPUID_FLAG_SIGNIFCANT_INDEX 1 -#define KVM_CPUID_FLAG_STATEFUL_FUNC 2 -#define KVM_CPUID_FLAG_STATE_READ_NEXT 4 +#define KVM_CPUID_FLAG_SIGNIFCANT_INDEX BIT(0) +#define KVM_CPUID_FLAG_STATEFUL_FUNC BIT(1) +#define KVM_CPUID_FLAG_STATE_READ_NEXT BIT(2) /* for KVM_SET_CPUID2 */ struct kvm_cpuid2 { diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index a20ecb5b6cbf..89d288237b9c 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -187,8 +187,14 @@ static bool supported_xcr0_bit(unsigned bit) #define F(x) bit(X86_FEATURE_##x) -static int do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, - u32 index, int *nent, int maxnent) +static int __do_cpuid_ent_emulated(struct kvm_cpuid_entry2 *entry, + u32 func, u32 index, int *nent, int maxnent) +{ + return 0; +} + +static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, + u32 index, int *nent, int maxnent) { int r; unsigned f_nx = is_efer_nx() ? F(NX) : 0; @@ -480,6 +486,15 @@ static int do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, return r; } +static int do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 func, + u32 idx, int *nent, int maxnent, unsigned int type) +{ + if (type == KVM_GET_EMULATED_CPUID) + return __do_cpuid_ent_emulated(entry, func, idx, nent, maxnent); + + return __do_cpuid_ent(entry, func, idx, nent, maxnent); +} + #undef F struct kvm_cpuid_param { @@ -494,8 +509,34 @@ static bool is_centaur_cpu(const struct kvm_cpuid_param *param) return boot_cpu_data.x86_vendor == X86_VENDOR_CENTAUR; } -int kvm_dev_ioctl_get_supported_cpuid(struct kvm_cpuid2 *cpuid, - struct kvm_cpuid_entry2 __user *entries) +static bool sanity_check_entries(struct kvm_cpuid_entry2 __user *entries, + __u32 num_entries, unsigned int ioctl_type) +{ + int i; + + if (ioctl_type != KVM_GET_EMULATED_CPUID) + return false; + + /* + * We want to make sure that ->padding is being passed clean from + * userspace in case we want to use it for something in the future. + * + * Sadly, this wasn't enforced for KVM_GET_SUPPORTED_CPUID and so we + * have to give ourselves satisfied only with the emulated side. /me + * sheds a tear. + */ + for (i = 0; i < num_entries; i++) { + if (entries[i].padding[0] || + entries[i].padding[1] || + entries[i].padding[2]) + return true; + } + return false; +} + +int kvm_dev_ioctl_get_cpuid(struct kvm_cpuid2 *cpuid, + struct kvm_cpuid_entry2 __user *entries, + unsigned int type) { struct kvm_cpuid_entry2 *cpuid_entries; int limit, nent = 0, r = -E2BIG, i; @@ -512,6 +553,10 @@ int kvm_dev_ioctl_get_supported_cpuid(struct kvm_cpuid2 *cpuid, goto out; if (cpuid->nent > KVM_MAX_CPUID_ENTRIES) cpuid->nent = KVM_MAX_CPUID_ENTRIES; + + if (sanity_check_entries(entries, cpuid->nent, type)) + return -EINVAL; + r = -ENOMEM; cpuid_entries = vmalloc(sizeof(struct kvm_cpuid_entry2) * cpuid->nent); if (!cpuid_entries) @@ -525,7 +570,7 @@ int kvm_dev_ioctl_get_supported_cpuid(struct kvm_cpuid2 *cpuid, continue; r = do_cpuid_ent(&cpuid_entries[nent], ent->func, ent->idx, - &nent, cpuid->nent); + &nent, cpuid->nent, type); if (r) goto out_free; @@ -536,7 +581,7 @@ int kvm_dev_ioctl_get_supported_cpuid(struct kvm_cpuid2 *cpuid, limit = cpuid_entries[nent - 1].eax; for (func = ent->func + 1; func <= limit && nent < cpuid->nent && r == 0; ++func) r = do_cpuid_ent(&cpuid_entries[nent], func, ent->idx, - &nent, cpuid->nent); + &nent, cpuid->nent, type); if (r) goto out_free; diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h index b7fd07984888..f1e4895174b2 100644 --- a/arch/x86/kvm/cpuid.h +++ b/arch/x86/kvm/cpuid.h @@ -6,8 +6,9 @@ void kvm_update_cpuid(struct kvm_vcpu *vcpu); struct kvm_cpuid_entry2 *kvm_find_cpuid_entry(struct kvm_vcpu *vcpu, u32 function, u32 index); -int kvm_dev_ioctl_get_supported_cpuid(struct kvm_cpuid2 *cpuid, - struct kvm_cpuid_entry2 __user *entries); +int kvm_dev_ioctl_get_cpuid(struct kvm_cpuid2 *cpuid, + struct kvm_cpuid_entry2 __user *entries, + unsigned int type); int kvm_vcpu_ioctl_set_cpuid(struct kvm_vcpu *vcpu, struct kvm_cpuid *cpuid, struct kvm_cpuid_entry __user *entries); diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 68b139fe0dbd..5276618579d3 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -2510,6 +2510,7 @@ int kvm_dev_ioctl_check_extension(long ext) case KVM_CAP_MMU_SHADOW_CACHE_CONTROL: case KVM_CAP_SET_TSS_ADDR: case KVM_CAP_EXT_CPUID: + case KVM_CAP_EXT_EMUL_CPUID: case KVM_CAP_CLOCKSOURCE: case KVM_CAP_PIT: case KVM_CAP_NOP_IO_DELAY: @@ -2619,15 +2620,17 @@ long kvm_arch_dev_ioctl(struct file *filp, r = 0; break; } - case KVM_GET_SUPPORTED_CPUID: { + case KVM_GET_SUPPORTED_CPUID: + case KVM_GET_EMULATED_CPUID: { struct kvm_cpuid2 __user *cpuid_arg = argp; struct kvm_cpuid2 cpuid; r = -EFAULT; if (copy_from_user(&cpuid, cpuid_arg, sizeof cpuid)) goto out; - r = kvm_dev_ioctl_get_supported_cpuid(&cpuid, - cpuid_arg->entries); + + r = kvm_dev_ioctl_get_cpuid(&cpuid, cpuid_arg->entries, + ioctl); if (r) goto out; diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index cba62019348d..3d365d191145 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -541,6 +541,7 @@ struct kvm_ppc_smmu_info { #define KVM_TRACE_ENABLE __KVM_DEPRECATED_MAIN_W_0x06 #define KVM_TRACE_PAUSE __KVM_DEPRECATED_MAIN_0x07 #define KVM_TRACE_DISABLE __KVM_DEPRECATED_MAIN_0x08 +#define KVM_GET_EMULATED_CPUID _IOWR(KVMIO, 0x09, struct kvm_cpuid2) /* * Extension capability list. @@ -667,6 +668,7 @@ struct kvm_ppc_smmu_info { #define KVM_CAP_PPC_RTAS 91 #define KVM_CAP_IRQ_XICS 92 #define KVM_CAP_ARM_EL1_32BIT 93 +#define KVM_CAP_EXT_EMUL_CPUID 95 #ifdef KVM_CAP_IRQ_ROUTING From a7dc5f55357702cc04773cd9e92a853767209d7d Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Wed, 30 Oct 2013 11:02:17 -0600 Subject: [PATCH 1179/1368] kvm: Add VFIO device So far we've succeeded at making KVM and VFIO mostly unaware of each other, but areas are cropping up where a connection beyond eventfds and irqfds needs to be made. This patch introduces a KVM-VFIO device that is meant to be a gateway for such interaction. The user creates the device and can add and remove VFIO groups to it via file descriptors. When a group is added, KVM verifies the group is valid and gets a reference to it via the VFIO external user interface. Signed-off-by: Alex Williamson Signed-off-by: Paolo Bonzini (cherry picked from commit ec53500fae421e07c5d035918ca454a429732ef4) Signed-off-by: Christoffer Dall --- Documentation/virtual/kvm/devices/vfio.txt | 22 +++ arch/x86/kvm/Kconfig | 1 + arch/x86/kvm/Makefile | 2 +- include/linux/kvm_host.h | 1 + include/uapi/linux/kvm.h | 4 + virt/kvm/Kconfig | 3 + virt/kvm/kvm_main.c | 5 + virt/kvm/vfio.c | 220 +++++++++++++++++++++ 8 files changed, 257 insertions(+), 1 deletion(-) create mode 100644 Documentation/virtual/kvm/devices/vfio.txt create mode 100644 virt/kvm/vfio.c diff --git a/Documentation/virtual/kvm/devices/vfio.txt b/Documentation/virtual/kvm/devices/vfio.txt new file mode 100644 index 000000000000..ef51740c67ca --- /dev/null +++ b/Documentation/virtual/kvm/devices/vfio.txt @@ -0,0 +1,22 @@ +VFIO virtual device +=================== + +Device types supported: + KVM_DEV_TYPE_VFIO + +Only one VFIO instance may be created per VM. The created device +tracks VFIO groups in use by the VM and features of those groups +important to the correctness and acceleration of the VM. As groups +are enabled and disabled for use by the VM, KVM should be updated +about their presence. When registered with KVM, a reference to the +VFIO-group is held by KVM. + +Groups: + KVM_DEV_VFIO_GROUP + +KVM_DEV_VFIO_GROUP attributes: + KVM_DEV_VFIO_GROUP_ADD: Add a VFIO group to VFIO-KVM device tracking + KVM_DEV_VFIO_GROUP_DEL: Remove a VFIO group from VFIO-KVM device tracking + +For each, kvm_device_attr.addr points to an int32_t file descriptor +for the VFIO group. diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig index a47a3e54b964..b89c5db2b832 100644 --- a/arch/x86/kvm/Kconfig +++ b/arch/x86/kvm/Kconfig @@ -38,6 +38,7 @@ config KVM select PERF_EVENTS select HAVE_KVM_MSI select HAVE_KVM_CPU_RELAX_INTERCEPT + select KVM_VFIO ---help--- Support hosting fully virtualized guest machines using hardware virtualization extensions. You will need a fairly recent diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile index bf4fb04d0112..25d22b2d6509 100644 --- a/arch/x86/kvm/Makefile +++ b/arch/x86/kvm/Makefile @@ -9,7 +9,7 @@ KVM := ../../../virt/kvm kvm-y += $(KVM)/kvm_main.o $(KVM)/ioapic.o \ $(KVM)/coalesced_mmio.o $(KVM)/irq_comm.o \ - $(KVM)/eventfd.o $(KVM)/irqchip.o + $(KVM)/eventfd.o $(KVM)/irqchip.o $(KVM)/vfio.o kvm-$(CONFIG_KVM_DEVICE_ASSIGNMENT) += $(KVM)/assigned-dev.o $(KVM)/iommu.o kvm-$(CONFIG_KVM_ASYNC_PF) += $(KVM)/async_pf.o diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 4de0a8fedf3f..214fc2d84366 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -1045,6 +1045,7 @@ struct kvm_device *kvm_device_from_filp(struct file *filp); extern struct kvm_device_ops kvm_mpic_ops; extern struct kvm_device_ops kvm_xics_ops; +extern struct kvm_device_ops kvm_vfio_ops; #ifdef CONFIG_HAVE_KVM_CPU_RELAX_INTERCEPT diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 3d365d191145..d5b8501cebfd 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -844,6 +844,10 @@ struct kvm_device_attr { #define KVM_DEV_TYPE_FSL_MPIC_20 1 #define KVM_DEV_TYPE_FSL_MPIC_42 2 #define KVM_DEV_TYPE_XICS 3 +#define KVM_DEV_TYPE_VFIO 4 +#define KVM_DEV_VFIO_GROUP 1 +#define KVM_DEV_VFIO_GROUP_ADD 1 +#define KVM_DEV_VFIO_GROUP_DEL 2 /* * ioctls for VM fds diff --git a/virt/kvm/Kconfig b/virt/kvm/Kconfig index 779262f59e25..fbe1a48bd629 100644 --- a/virt/kvm/Kconfig +++ b/virt/kvm/Kconfig @@ -27,3 +27,6 @@ config HAVE_KVM_MSI config HAVE_KVM_CPU_RELAX_INTERCEPT bool + +config KVM_VFIO + bool diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index d6b7d797cb16..652d682b1d55 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -2270,6 +2270,11 @@ static int kvm_ioctl_create_device(struct kvm *kvm, case KVM_DEV_TYPE_XICS: ops = &kvm_xics_ops; break; +#endif +#ifdef CONFIG_KVM_VFIO + case KVM_DEV_TYPE_VFIO: + ops = &kvm_vfio_ops; + break; #endif default: return -ENODEV; diff --git a/virt/kvm/vfio.c b/virt/kvm/vfio.c new file mode 100644 index 000000000000..597c258245ea --- /dev/null +++ b/virt/kvm/vfio.c @@ -0,0 +1,220 @@ +/* + * VFIO-KVM bridge pseudo device + * + * Copyright (C) 2013 Red Hat, Inc. All rights reserved. + * Author: Alex Williamson + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +struct kvm_vfio_group { + struct list_head node; + struct vfio_group *vfio_group; +}; + +struct kvm_vfio { + struct list_head group_list; + struct mutex lock; +}; + +static struct vfio_group *kvm_vfio_group_get_external_user(struct file *filep) +{ + struct vfio_group *vfio_group; + struct vfio_group *(*fn)(struct file *); + + fn = symbol_get(vfio_group_get_external_user); + if (!fn) + return ERR_PTR(-EINVAL); + + vfio_group = fn(filep); + + symbol_put(vfio_group_get_external_user); + + return vfio_group; +} + +static void kvm_vfio_group_put_external_user(struct vfio_group *vfio_group) +{ + void (*fn)(struct vfio_group *); + + fn = symbol_get(vfio_group_put_external_user); + if (!fn) + return; + + fn(vfio_group); + + symbol_put(vfio_group_put_external_user); +} + +static int kvm_vfio_set_group(struct kvm_device *dev, long attr, u64 arg) +{ + struct kvm_vfio *kv = dev->private; + struct vfio_group *vfio_group; + struct kvm_vfio_group *kvg; + void __user *argp = (void __user *)arg; + struct fd f; + int32_t fd; + int ret; + + switch (attr) { + case KVM_DEV_VFIO_GROUP_ADD: + if (get_user(fd, (int32_t __user *)argp)) + return -EFAULT; + + f = fdget(fd); + if (!f.file) + return -EBADF; + + vfio_group = kvm_vfio_group_get_external_user(f.file); + fdput(f); + + if (IS_ERR(vfio_group)) + return PTR_ERR(vfio_group); + + mutex_lock(&kv->lock); + + list_for_each_entry(kvg, &kv->group_list, node) { + if (kvg->vfio_group == vfio_group) { + mutex_unlock(&kv->lock); + kvm_vfio_group_put_external_user(vfio_group); + return -EEXIST; + } + } + + kvg = kzalloc(sizeof(*kvg), GFP_KERNEL); + if (!kvg) { + mutex_unlock(&kv->lock); + kvm_vfio_group_put_external_user(vfio_group); + return -ENOMEM; + } + + list_add_tail(&kvg->node, &kv->group_list); + kvg->vfio_group = vfio_group; + + mutex_unlock(&kv->lock); + + return 0; + + case KVM_DEV_VFIO_GROUP_DEL: + if (get_user(fd, (int32_t __user *)argp)) + return -EFAULT; + + f = fdget(fd); + if (!f.file) + return -EBADF; + + vfio_group = kvm_vfio_group_get_external_user(f.file); + fdput(f); + + if (IS_ERR(vfio_group)) + return PTR_ERR(vfio_group); + + ret = -ENOENT; + + mutex_lock(&kv->lock); + + list_for_each_entry(kvg, &kv->group_list, node) { + if (kvg->vfio_group != vfio_group) + continue; + + list_del(&kvg->node); + kvm_vfio_group_put_external_user(kvg->vfio_group); + kfree(kvg); + ret = 0; + break; + } + + mutex_unlock(&kv->lock); + + kvm_vfio_group_put_external_user(vfio_group); + + return ret; + } + + return -ENXIO; +} + +static int kvm_vfio_set_attr(struct kvm_device *dev, + struct kvm_device_attr *attr) +{ + switch (attr->group) { + case KVM_DEV_VFIO_GROUP: + return kvm_vfio_set_group(dev, attr->attr, attr->addr); + } + + return -ENXIO; +} + +static int kvm_vfio_has_attr(struct kvm_device *dev, + struct kvm_device_attr *attr) +{ + switch (attr->group) { + case KVM_DEV_VFIO_GROUP: + switch (attr->attr) { + case KVM_DEV_VFIO_GROUP_ADD: + case KVM_DEV_VFIO_GROUP_DEL: + return 0; + } + + break; + } + + return -ENXIO; +} + +static void kvm_vfio_destroy(struct kvm_device *dev) +{ + struct kvm_vfio *kv = dev->private; + struct kvm_vfio_group *kvg, *tmp; + + list_for_each_entry_safe(kvg, tmp, &kv->group_list, node) { + kvm_vfio_group_put_external_user(kvg->vfio_group); + list_del(&kvg->node); + kfree(kvg); + } + + kfree(kv); + kfree(dev); /* alloc by kvm_ioctl_create_device, free by .destroy */ +} + +static int kvm_vfio_create(struct kvm_device *dev, u32 type) +{ + struct kvm_device *tmp; + struct kvm_vfio *kv; + + /* Only one VFIO "device" per VM */ + list_for_each_entry(tmp, &dev->kvm->devices, vm_node) + if (tmp->ops == &kvm_vfio_ops) + return -EBUSY; + + kv = kzalloc(sizeof(*kv), GFP_KERNEL); + if (!kv) + return -ENOMEM; + + INIT_LIST_HEAD(&kv->group_list); + mutex_init(&kv->lock); + + dev->private = kv; + + return 0; +} + +struct kvm_device_ops kvm_vfio_ops = { + .name = "kvm-vfio", + .create = kvm_vfio_create, + .destroy = kvm_vfio_destroy, + .set_attr = kvm_vfio_set_attr, + .has_attr = kvm_vfio_has_attr, +}; From 7f17a13bdd7902c3e4d55c273820e3099c1e1d33 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Wed, 30 Oct 2013 21:43:01 +0200 Subject: [PATCH 1180/1368] kvm_host: typo fix fix up typo in comment. Signed-off-by: Michael S. Tsirkin Signed-off-by: Paolo Bonzini (cherry picked from commit 81e87e26796782e014fd1f2bb9cd8fb6ce4021a8) Signed-off-by: Christoffer Dall --- include/linux/kvm_host.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 214fc2d84366..6d24a2671d29 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -775,7 +775,7 @@ static inline void kvm_guest_enter(void) /* KVM does not hold any references to rcu protected data when it * switches CPU into a guest mode. In fact switching to a guest mode - * is very similar to exiting to userspase from rcu point of view. In + * is very similar to exiting to userspace from rcu point of view. In * addition CPU may stay in a guest mode for quite a long time (up to * one time slice). Lets treat guest mode as quiescent state, just like * we do with user-mode execution. From d49c7a4473eebfda645f469c7a2e77846c8008e2 Mon Sep 17 00:00:00 2001 From: Gleb Natapov Date: Tue, 5 Nov 2013 16:04:18 +0200 Subject: [PATCH 1181/1368] KVM: remove vm mmap method It was used in conjunction with KVM_SET_MEMORY_REGION ioctl which was removed by b74a07beed0 in 2010, QEMU stopped using it in 2008, so it is time to remove the code finally. Signed-off-by: Gleb Natapov (cherry picked from commit 80f5b5e700fa9c58480eafce0d47367bafb70006) Signed-off-by: Christoffer Dall --- virt/kvm/kvm_main.c | 32 -------------------------------- 1 file changed, 32 deletions(-) diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 652d682b1d55..dd9ce144cf99 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -2524,44 +2524,12 @@ static long kvm_vm_compat_ioctl(struct file *filp, } #endif -static int kvm_vm_fault(struct vm_area_struct *vma, struct vm_fault *vmf) -{ - struct page *page[1]; - unsigned long addr; - int npages; - gfn_t gfn = vmf->pgoff; - struct kvm *kvm = vma->vm_file->private_data; - - addr = gfn_to_hva(kvm, gfn); - if (kvm_is_error_hva(addr)) - return VM_FAULT_SIGBUS; - - npages = get_user_pages(current, current->mm, addr, 1, 1, 0, page, - NULL); - if (unlikely(npages != 1)) - return VM_FAULT_SIGBUS; - - vmf->page = page[0]; - return 0; -} - -static const struct vm_operations_struct kvm_vm_vm_ops = { - .fault = kvm_vm_fault, -}; - -static int kvm_vm_mmap(struct file *file, struct vm_area_struct *vma) -{ - vma->vm_ops = &kvm_vm_vm_ops; - return 0; -} - static struct file_operations kvm_vm_fops = { .release = kvm_vm_release, .unlocked_ioctl = kvm_vm_ioctl, #ifdef CONFIG_COMPAT .compat_ioctl = kvm_vm_compat_ioctl, #endif - .mmap = kvm_vm_mmap, .llseek = noop_llseek, }; From 60979ebbbbe9d4c56e87cffcaa92933d96637d7b Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 5 Nov 2013 18:29:45 +0000 Subject: [PATCH 1182/1368] arm64: KVM: initialize HYP mode following the kernel endianness Force SCTLR_EL2.EE to 1 if the kernel is compiled as BE. Signed-off-by: Marc Zyngier Signed-off-by: Catalin Marinas (cherry picked from commit 18ea3dbc9e5c8a53a361b17c4a5676ea6f4bcb72) Signed-off-by: Christoffer Dall --- arch/arm64/kvm/hyp-init.S | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/arch/arm64/kvm/hyp-init.S b/arch/arm64/kvm/hyp-init.S index ba84e6705e20..2b0244d65c16 100644 --- a/arch/arm64/kvm/hyp-init.S +++ b/arch/arm64/kvm/hyp-init.S @@ -74,7 +74,10 @@ __do_hyp_init: msr mair_el2, x4 isb - mov x4, #SCTLR_EL2_FLAGS + mrs x4, sctlr_el2 + and x4, x4, #SCTLR_EL2_EE // preserve endianness of EL2 + ldr x5, =SCTLR_EL2_FLAGS + orr x4, x4, x5 msr sctlr_el2, x4 isb From 128a021aa83b73d07153f8fee5dd22e6933f62e2 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 5 Nov 2013 18:29:46 +0000 Subject: [PATCH 1183/1368] arm64: KVM: vgic: byteswap GICv2 access on world switch if BE Ensure that accesses to the GICH_* registers are byteswapped when the kernel is compiled as big-endian. Signed-off-by: Marc Zyngier Signed-off-by: Catalin Marinas (cherry picked from commit c5b2c0f5203b3bc678a8967daedf7114029975ae) Signed-off-by: Christoffer Dall --- arch/arm64/kvm/hyp.S | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/arch/arm64/kvm/hyp.S b/arch/arm64/kvm/hyp.S index 1ac0bbbdddb2..3b47c36e10ff 100644 --- a/arch/arm64/kvm/hyp.S +++ b/arch/arm64/kvm/hyp.S @@ -403,6 +403,14 @@ __kvm_hyp_code_start: ldr w9, [x2, #GICH_ELRSR0] ldr w10, [x2, #GICH_ELRSR1] ldr w11, [x2, #GICH_APR] +CPU_BE( rev w4, w4 ) +CPU_BE( rev w5, w5 ) +CPU_BE( rev w6, w6 ) +CPU_BE( rev w7, w7 ) +CPU_BE( rev w8, w8 ) +CPU_BE( rev w9, w9 ) +CPU_BE( rev w10, w10 ) +CPU_BE( rev w11, w11 ) str w4, [x3, #VGIC_CPU_HCR] str w5, [x3, #VGIC_CPU_VMCR] @@ -421,6 +429,7 @@ __kvm_hyp_code_start: ldr w4, [x3, #VGIC_CPU_NR_LR] add x3, x3, #VGIC_CPU_LR 1: ldr w5, [x2], #4 +CPU_BE( rev w5, w5 ) str w5, [x3], #4 sub w4, w4, #1 cbnz w4, 1b @@ -446,6 +455,9 @@ __kvm_hyp_code_start: ldr w4, [x3, #VGIC_CPU_HCR] ldr w5, [x3, #VGIC_CPU_VMCR] ldr w6, [x3, #VGIC_CPU_APR] +CPU_BE( rev w4, w4 ) +CPU_BE( rev w5, w5 ) +CPU_BE( rev w6, w6 ) str w4, [x2, #GICH_HCR] str w5, [x2, #GICH_VMCR] @@ -456,6 +468,7 @@ __kvm_hyp_code_start: ldr w4, [x3, #VGIC_CPU_NR_LR] add x3, x3, #VGIC_CPU_LR 1: ldr w5, [x3], #4 +CPU_BE( rev w5, w5 ) str w5, [x2], #4 sub w4, w4, #1 cbnz w4, 1b From dd8858820ec48b90c5ab5c6631f50a613deb55bc Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 12 Feb 2013 12:40:22 +0000 Subject: [PATCH 1184/1368] arm/arm64: KVM: MMIO support for BE guest Do the necessary byteswap when host and guest have different views of the universe. Actually, the only case we need to take care of is when the guest is BE. All the other cases are naturally handled. Also be careful about endianness when the data is being memcopy-ed from/to the run buffer. Acked-by: Christoffer Dall Signed-off-by: Marc Zyngier (cherry picked from commit 6d89d2d9b5bac9dbe40ee106ceda9307b6265234) Signed-off-by: Christoffer Dall --- arch/arm/include/asm/kvm_emulate.h | 41 +++++++++++++ arch/arm/kvm/mmio.c | 86 ++++++++++++++++++++++++---- arch/arm64/include/asm/kvm_emulate.h | 48 ++++++++++++++++ 3 files changed, 164 insertions(+), 11 deletions(-) diff --git a/arch/arm/include/asm/kvm_emulate.h b/arch/arm/include/asm/kvm_emulate.h index 708e4d8a647f..b79cd8d3d6ee 100644 --- a/arch/arm/include/asm/kvm_emulate.h +++ b/arch/arm/include/asm/kvm_emulate.h @@ -162,4 +162,45 @@ static inline unsigned long kvm_vcpu_get_mpidr(struct kvm_vcpu *vcpu) return vcpu->arch.cp15[c0_MPIDR]; } +static inline bool kvm_vcpu_is_be(struct kvm_vcpu *vcpu) +{ + return !!(*vcpu_cpsr(vcpu) & PSR_E_BIT); +} + +static inline unsigned long vcpu_data_guest_to_host(struct kvm_vcpu *vcpu, + unsigned long data, + unsigned int len) +{ + if (kvm_vcpu_is_be(vcpu)) { + switch (len) { + case 1: + return data & 0xff; + case 2: + return be16_to_cpu(data & 0xffff); + default: + return be32_to_cpu(data); + } + } + + return data; /* Leave LE untouched */ +} + +static inline unsigned long vcpu_data_host_to_guest(struct kvm_vcpu *vcpu, + unsigned long data, + unsigned int len) +{ + if (kvm_vcpu_is_be(vcpu)) { + switch (len) { + case 1: + return data & 0xff; + case 2: + return cpu_to_be16(data & 0xffff); + default: + return cpu_to_be32(data); + } + } + + return data; /* Leave LE untouched */ +} + #endif /* __ARM_KVM_EMULATE_H__ */ diff --git a/arch/arm/kvm/mmio.c b/arch/arm/kvm/mmio.c index 0c25d9487d53..4cb5a93182e9 100644 --- a/arch/arm/kvm/mmio.c +++ b/arch/arm/kvm/mmio.c @@ -23,6 +23,68 @@ #include "trace.h" +static void mmio_write_buf(char *buf, unsigned int len, unsigned long data) +{ + void *datap = NULL; + union { + u8 byte; + u16 hword; + u32 word; + u64 dword; + } tmp; + + switch (len) { + case 1: + tmp.byte = data; + datap = &tmp.byte; + break; + case 2: + tmp.hword = data; + datap = &tmp.hword; + break; + case 4: + tmp.word = data; + datap = &tmp.word; + break; + case 8: + tmp.dword = data; + datap = &tmp.dword; + break; + } + + memcpy(buf, datap, len); +} + +static unsigned long mmio_read_buf(char *buf, unsigned int len) +{ + unsigned long data = 0; + union { + u16 hword; + u32 word; + u64 dword; + } tmp; + + switch (len) { + case 1: + data = buf[0]; + break; + case 2: + memcpy(&tmp.hword, buf, len); + data = tmp.hword; + break; + case 4: + memcpy(&tmp.word, buf, len); + data = tmp.word; + break; + case 8: + memcpy(&tmp.dword, buf, len); + data = tmp.dword; + break; + } + + return data; +} + /** * kvm_handle_mmio_return -- Handle MMIO loads after user space emulation * @vcpu: The VCPU pointer @@ -33,28 +95,27 @@ */ int kvm_handle_mmio_return(struct kvm_vcpu *vcpu, struct kvm_run *run) { - unsigned long *dest; + unsigned long data; unsigned int len; int mask; if (!run->mmio.is_write) { - dest = vcpu_reg(vcpu, vcpu->arch.mmio_decode.rt); - *dest = 0; - len = run->mmio.len; if (len > sizeof(unsigned long)) return -EINVAL; - memcpy(dest, run->mmio.data, len); - - trace_kvm_mmio(KVM_TRACE_MMIO_READ, len, run->mmio.phys_addr, - *((u64 *)run->mmio.data)); + data = mmio_read_buf(run->mmio.data, len); if (vcpu->arch.mmio_decode.sign_extend && len < sizeof(unsigned long)) { mask = 1U << ((len * 8) - 1); - *dest = (*dest ^ mask) - mask; + data = (data ^ mask) - mask; } + + trace_kvm_mmio(KVM_TRACE_MMIO_READ, len, run->mmio.phys_addr, + data); + data = vcpu_data_host_to_guest(vcpu, data, len); + *vcpu_reg(vcpu, vcpu->arch.mmio_decode.rt) = data; } return 0; @@ -105,6 +166,7 @@ int io_mem_abort(struct kvm_vcpu *vcpu, struct kvm_run *run, phys_addr_t fault_ipa) { struct kvm_exit_mmio mmio; + unsigned long data; unsigned long rt; int ret; @@ -125,13 +187,15 @@ int io_mem_abort(struct kvm_vcpu *vcpu, struct kvm_run *run, } rt = vcpu->arch.mmio_decode.rt; + data = vcpu_data_guest_to_host(vcpu, *vcpu_reg(vcpu, rt), mmio.len); + trace_kvm_mmio((mmio.is_write) ? KVM_TRACE_MMIO_WRITE : KVM_TRACE_MMIO_READ_UNSATISFIED, mmio.len, fault_ipa, - (mmio.is_write) ? *vcpu_reg(vcpu, rt) : 0); + (mmio.is_write) ? data : 0); if (mmio.is_write) - memcpy(mmio.data, vcpu_reg(vcpu, rt), mmio.len); + mmio_write_buf(mmio.data, mmio.len, data); if (vgic_handle_mmio(vcpu, run, &mmio)) return 1; diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h index 6df93cdc652b..291f87cf457f 100644 --- a/arch/arm64/include/asm/kvm_emulate.h +++ b/arch/arm64/include/asm/kvm_emulate.h @@ -182,4 +182,52 @@ static inline unsigned long kvm_vcpu_get_mpidr(struct kvm_vcpu *vcpu) return vcpu_sys_reg(vcpu, MPIDR_EL1); } +static inline bool kvm_vcpu_is_be(struct kvm_vcpu *vcpu) +{ + if (vcpu_mode_is_32bit(vcpu)) + return !!(*vcpu_cpsr(vcpu) & COMPAT_PSR_E_BIT); + + return !!(vcpu_sys_reg(vcpu, SCTLR_EL1) & (1 << 25)); +} + +static inline unsigned long vcpu_data_guest_to_host(struct kvm_vcpu *vcpu, + unsigned long data, + unsigned int len) +{ + if (kvm_vcpu_is_be(vcpu)) { + switch (len) { + case 1: + return data & 0xff; + case 2: + return be16_to_cpu(data & 0xffff); + case 4: + return be32_to_cpu(data & 0xffffffff); + default: + return be64_to_cpu(data); + } + } + + return data; /* Leave LE untouched */ +} + +static inline unsigned long vcpu_data_host_to_guest(struct kvm_vcpu *vcpu, + unsigned long data, + unsigned int len) +{ + if (kvm_vcpu_is_be(vcpu)) { + switch (len) { + case 1: + return data & 0xff; + case 2: + return cpu_to_be16(data & 0xffff); + case 4: + return cpu_to_be32(data & 0xffffffff); + default: + return cpu_to_be64(data); + } + } + + return data; /* Leave LE untouched */ +} + #endif /* __ARM64_KVM_EMULATE_H__ */ From 53e38964402dd81c8528f3d2c6fd119c4091a390 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 5 Nov 2013 14:12:15 +0000 Subject: [PATCH 1185/1368] arm/arm64: KVM: PSCI: propagate caller endianness to the incoming vcpu When booting a vcpu using PSCI, make sure we start it with the endianness of the caller. Otherwise, secondaries can be pretty unhappy to execute a BE kernel in LE mode... This conforms to PSCI spec Rev B, 5.13.3. Acked-by: Christoffer Dall Signed-off-by: Marc Zyngier (cherry picked from commit ce94fe93d566bf381c6ecbd45010d36c5f04d692) Signed-off-by: Christoffer Dall --- arch/arm/include/asm/kvm_emulate.h | 5 +++++ arch/arm/kvm/psci.c | 4 ++++ arch/arm64/include/asm/kvm_emulate.h | 8 ++++++++ 3 files changed, 17 insertions(+) diff --git a/arch/arm/include/asm/kvm_emulate.h b/arch/arm/include/asm/kvm_emulate.h index b79cd8d3d6ee..0fa90c962ac8 100644 --- a/arch/arm/include/asm/kvm_emulate.h +++ b/arch/arm/include/asm/kvm_emulate.h @@ -162,6 +162,11 @@ static inline unsigned long kvm_vcpu_get_mpidr(struct kvm_vcpu *vcpu) return vcpu->arch.cp15[c0_MPIDR]; } +static inline void kvm_vcpu_set_be(struct kvm_vcpu *vcpu) +{ + *vcpu_cpsr(vcpu) |= PSR_E_BIT; +} + static inline bool kvm_vcpu_is_be(struct kvm_vcpu *vcpu) { return !!(*vcpu_cpsr(vcpu) & PSR_E_BIT); diff --git a/arch/arm/kvm/psci.c b/arch/arm/kvm/psci.c index 311263124acf..0881bf169fbc 100644 --- a/arch/arm/kvm/psci.c +++ b/arch/arm/kvm/psci.c @@ -71,6 +71,10 @@ static unsigned long kvm_psci_vcpu_on(struct kvm_vcpu *source_vcpu) vcpu_set_thumb(vcpu); } + /* Propagate caller endianness */ + if (kvm_vcpu_is_be(source_vcpu)) + kvm_vcpu_set_be(vcpu); + *vcpu_pc(vcpu) = target_pc; vcpu->arch.pause = false; smp_mb(); /* Make sure the above is visible */ diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h index 291f87cf457f..dd8ecfc3f995 100644 --- a/arch/arm64/include/asm/kvm_emulate.h +++ b/arch/arm64/include/asm/kvm_emulate.h @@ -182,6 +182,14 @@ static inline unsigned long kvm_vcpu_get_mpidr(struct kvm_vcpu *vcpu) return vcpu_sys_reg(vcpu, MPIDR_EL1); } +static inline void kvm_vcpu_set_be(struct kvm_vcpu *vcpu) +{ + if (vcpu_mode_is_32bit(vcpu)) + *vcpu_cpsr(vcpu) |= COMPAT_PSR_E_BIT; + else + vcpu_sys_reg(vcpu, SCTLR_EL1) |= (1 << 25); +} + static inline bool kvm_vcpu_is_be(struct kvm_vcpu *vcpu) { if (vcpu_mode_is_32bit(vcpu)) From 52031ff6011942b7e29788b9ce5066cb45fe3e4e Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Fri, 15 Nov 2013 13:14:12 -0800 Subject: [PATCH 1186/1368] arm/arm64: KVM: Fix hyp mappings of vmalloc regions Using virt_to_phys on percpu mappings is horribly wrong as it may be backed by vmalloc. Introduce kvm_kaddr_to_phys which translates both types of valid kernel addresses to the corresponding physical address. At the same time resolves a typing issue where we were storing the physical address as a 32 bit unsigned long (on arm), truncating the physical address for addresses above the 4GB limit. This caused breakage on Keystone. Cc: [3.10+] Reported-by: Santosh Shilimkar Tested-by: Santosh Shilimkar Signed-off-by: Marc Zyngier Signed-off-by: Christoffer Dall (cherry picked from commit 40c2729bab48e2832b17c1fa8af9db60e776131b) Signed-off-by: Christoffer Dall --- arch/arm/kvm/mmu.c | 34 ++++++++++++++++++++++++++++------ 1 file changed, 28 insertions(+), 6 deletions(-) diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c index 371958370de4..580906989db1 100644 --- a/arch/arm/kvm/mmu.c +++ b/arch/arm/kvm/mmu.c @@ -334,6 +334,17 @@ static int __create_hyp_mappings(pgd_t *pgdp, return err; } +static phys_addr_t kvm_kaddr_to_phys(void *kaddr) +{ + if (!is_vmalloc_addr(kaddr)) { + BUG_ON(!virt_addr_valid(kaddr)); + return __pa(kaddr); + } else { + return page_to_phys(vmalloc_to_page(kaddr)) + + offset_in_page(kaddr); + } +} + /** * create_hyp_mappings - duplicate a kernel virtual address range in Hyp mode * @from: The virtual kernel start address of the range @@ -345,16 +356,27 @@ static int __create_hyp_mappings(pgd_t *pgdp, */ int create_hyp_mappings(void *from, void *to) { - unsigned long phys_addr = virt_to_phys(from); + phys_addr_t phys_addr; + unsigned long virt_addr; unsigned long start = KERN_TO_HYP((unsigned long)from); unsigned long end = KERN_TO_HYP((unsigned long)to); - /* Check for a valid kernel memory mapping */ - if (!virt_addr_valid(from) || !virt_addr_valid(to - 1)) - return -EINVAL; + start = start & PAGE_MASK; + end = PAGE_ALIGN(end); - return __create_hyp_mappings(hyp_pgd, start, end, - __phys_to_pfn(phys_addr), PAGE_HYP); + for (virt_addr = start; virt_addr < end; virt_addr += PAGE_SIZE) { + int err; + + phys_addr = kvm_kaddr_to_phys(from + virt_addr - start); + err = __create_hyp_mappings(hyp_pgd, virt_addr, + virt_addr + PAGE_SIZE, + __phys_to_pfn(phys_addr), + PAGE_HYP); + if (err) + return err; + } + + return 0; } /** From 4ae68e1ee672530df9236b07ff267f06e107bc9a Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 18 Nov 2013 10:35:55 +0100 Subject: [PATCH 1187/1368] KVM: kvm_clear_guest_page(): fix empty_zero_page usage Using the address of 'empty_zero_page' as source address in order to clear a page is wrong. On some architectures empty_zero_page is only the pointer to the struct page of the empty_zero_page. Therefore the clear page operation would copy the contents of a couple of struct pages instead of clearing a page. For kvm only arm/arm64 are affected by this bug. To fix this use the ZERO_PAGE macro instead which will return the struct page address of the empty_zero_page on all architectures. Signed-off-by: Heiko Carstens Signed-off-by: Gleb Natapov (cherry picked from commit 8a3caa6d74597c2a083f7c87f866891a0b12540b) Signed-off-by: Christoffer Dall --- virt/kvm/kvm_main.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index dd9ce144cf99..e2b9a0670639 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -1613,8 +1613,9 @@ EXPORT_SYMBOL_GPL(kvm_read_guest_cached); int kvm_clear_guest_page(struct kvm *kvm, gfn_t gfn, int offset, int len) { - return kvm_write_guest_page(kvm, gfn, (const void *) empty_zero_page, - offset, len); + const void *zero_page = (const void *) __va(page_to_phys(ZERO_PAGE(0))); + + return kvm_write_guest_page(kvm, gfn, zero_page, offset, len); } EXPORT_SYMBOL_GPL(kvm_clear_guest_page); From 4479f3b0060731a9d61b30570db282b5587c885f Mon Sep 17 00:00:00 2001 From: Santosh Shilimkar Date: Tue, 19 Nov 2013 14:59:12 -0500 Subject: [PATCH 1188/1368] arm/arm64: kvm: Use virt_to_idmap instead of virt_to_phys for idmap mappings KVM initialisation fails on architectures implementing virt_to_idmap() because virt_to_phys() on such architectures won't fetch you the correct idmap page. So update the KVM ARM code to use the virt_to_idmap() to fix the issue. Since the KVM code is shared between arm and arm64, we create kvm_virt_to_phys() and handle the redirection in respective headers. Cc: Christoffer Dall Cc: Marc Zyngier Cc: Catalin Marinas Signed-off-by: Santosh Shilimkar Signed-off-by: Christoffer Dall (cherry picked from commit 4fda342cc7f577599c53fd27b99c953c7b1da18a) Signed-off-by: Christoffer Dall --- arch/arm/include/asm/kvm_mmu.h | 1 + arch/arm/kvm/mmu.c | 8 ++++---- arch/arm64/include/asm/kvm_mmu.h | 1 + 3 files changed, 6 insertions(+), 4 deletions(-) diff --git a/arch/arm/include/asm/kvm_mmu.h b/arch/arm/include/asm/kvm_mmu.h index 77de4a41cc50..2d122adcdb22 100644 --- a/arch/arm/include/asm/kvm_mmu.h +++ b/arch/arm/include/asm/kvm_mmu.h @@ -140,6 +140,7 @@ static inline void coherent_icache_guest_page(struct kvm *kvm, hva_t hva, } #define kvm_flush_dcache_to_poc(a,l) __cpuc_flush_dcache_area((a), (l)) +#define kvm_virt_to_phys(x) virt_to_idmap((unsigned long)(x)) #endif /* !__ASSEMBLY__ */ diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c index 580906989db1..659db0ed1370 100644 --- a/arch/arm/kvm/mmu.c +++ b/arch/arm/kvm/mmu.c @@ -916,9 +916,9 @@ int kvm_mmu_init(void) { int err; - hyp_idmap_start = virt_to_phys(__hyp_idmap_text_start); - hyp_idmap_end = virt_to_phys(__hyp_idmap_text_end); - hyp_idmap_vector = virt_to_phys(__kvm_hyp_init); + hyp_idmap_start = kvm_virt_to_phys(__hyp_idmap_text_start); + hyp_idmap_end = kvm_virt_to_phys(__hyp_idmap_text_end); + hyp_idmap_vector = kvm_virt_to_phys(__kvm_hyp_init); if ((hyp_idmap_start ^ hyp_idmap_end) & PAGE_MASK) { /* @@ -945,7 +945,7 @@ int kvm_mmu_init(void) */ kvm_flush_dcache_to_poc(init_bounce_page, len); - phys_base = virt_to_phys(init_bounce_page); + phys_base = kvm_virt_to_phys(init_bounce_page); hyp_idmap_vector += phys_base - hyp_idmap_start; hyp_idmap_start = phys_base; hyp_idmap_end = phys_base + len; diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h index 680f74e67497..7f1f9408ff66 100644 --- a/arch/arm64/include/asm/kvm_mmu.h +++ b/arch/arm64/include/asm/kvm_mmu.h @@ -136,6 +136,7 @@ static inline void coherent_icache_guest_page(struct kvm *kvm, hva_t hva, } #define kvm_flush_dcache_to_poc(a,l) __flush_dcache_area((a), (l)) +#define kvm_virt_to_phys(x) __virt_to_phys((unsigned long)(x)) #endif /* __ASSEMBLY__ */ #endif /* __ARM64_KVM_MMU_H__ */ From bc071b4cc769735a12fc9fa17cb54dd42f3fb0e6 Mon Sep 17 00:00:00 2001 From: Andy Honig Date: Mon, 18 Nov 2013 16:09:22 -0800 Subject: [PATCH 1189/1368] KVM: Improve create VCPU parameter (CVE-2013-4587) In multiple functions the vcpu_id is used as an offset into a bitfield. Ag malicious user could specify a vcpu_id greater than 255 in order to set or clear bits in kernel memory. This could be used to elevate priveges in the kernel. This patch verifies that the vcpu_id provided is less than 255. The api documentation already specifies that the vcpu_id must be less than max_vcpus, but this is currently not checked. Reported-by: Andrew Honig Cc: stable@vger.kernel.org Signed-off-by: Andrew Honig Signed-off-by: Paolo Bonzini (cherry picked from commit 338c7dbadd2671189cec7faf64c84d01071b3f96) Signed-off-by: Christoffer Dall --- virt/kvm/kvm_main.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index e2b9a0670639..2dd59b957164 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -1894,6 +1894,9 @@ static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, u32 id) int r; struct kvm_vcpu *vcpu, *v; + if (id >= KVM_MAX_VCPUS) + return -EINVAL; + vcpu = kvm_arch_vcpu_create(kvm, id); if (IS_ERR(vcpu)) return PTR_ERR(vcpu); From 580ab4613364270a998a44850d49c4d8c5df5144 Mon Sep 17 00:00:00 2001 From: Takuya Yoshikawa Date: Fri, 13 Dec 2013 15:07:21 +0900 Subject: [PATCH 1190/1368] KVM: Use cond_resched() directly and remove useless kvm_resched() Since the commit 15ad7146 ("KVM: Use the scheduler preemption notifiers to make kvm preemptible"), the remaining stuff in this function is a simple cond_resched() call with an extra need_resched() check which was there to avoid dropping VCPUs unnecessarily. Now it is meaningless. Signed-off-by: Takuya Yoshikawa Signed-off-by: Paolo Bonzini (cherry picked from commit c08ac06ab3f3cdb8d34376c3a8a5e46a31a62c8f) Signed-off-by: Christoffer Dall --- arch/ia64/kvm/kvm-ia64.c | 2 +- arch/powerpc/kvm/book3s_hv.c | 2 +- arch/x86/kvm/x86.c | 2 +- include/linux/kvm_host.h | 1 - virt/kvm/kvm_main.c | 8 -------- 5 files changed, 3 insertions(+), 12 deletions(-) diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c index 985bf80c622e..53f44bee9ebb 100644 --- a/arch/ia64/kvm/kvm-ia64.c +++ b/arch/ia64/kvm/kvm-ia64.c @@ -702,7 +702,7 @@ static int __vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) out: srcu_read_unlock(&vcpu->kvm->srcu, idx); if (r > 0) { - kvm_resched(vcpu); + cond_resched(); idx = srcu_read_lock(&vcpu->kvm->srcu); goto again; } diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 550f5928b394..717e5b525f3b 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -1253,7 +1253,7 @@ static void kvmppc_run_core(struct kvmppc_vcore *vc) kvm_guest_exit(); preempt_enable(); - kvm_resched(vcpu); + cond_resched(); spin_lock(&vc->lock); now = get_tb(); diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 5276618579d3..805c8e92cf66 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -5947,7 +5947,7 @@ static int __vcpu_run(struct kvm_vcpu *vcpu) } if (need_resched()) { srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx); - kvm_resched(vcpu); + cond_resched(); vcpu->srcu_idx = srcu_read_lock(&kvm->srcu); } } diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 6d24a2671d29..9c0f8545df73 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -569,7 +569,6 @@ void kvm_vcpu_block(struct kvm_vcpu *vcpu); void kvm_vcpu_kick(struct kvm_vcpu *vcpu); bool kvm_vcpu_yield_to(struct kvm_vcpu *target); void kvm_vcpu_on_spin(struct kvm_vcpu *vcpu); -void kvm_resched(struct kvm_vcpu *vcpu); void kvm_load_guest_fpu(struct kvm_vcpu *vcpu); void kvm_put_guest_fpu(struct kvm_vcpu *vcpu); diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 2dd59b957164..cb9a865c8e01 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -1706,14 +1706,6 @@ void kvm_vcpu_kick(struct kvm_vcpu *vcpu) EXPORT_SYMBOL_GPL(kvm_vcpu_kick); #endif /* !CONFIG_S390 */ -void kvm_resched(struct kvm_vcpu *vcpu) -{ - if (!need_resched()) - return; - cond_resched(); -} -EXPORT_SYMBOL_GPL(kvm_resched); - bool kvm_vcpu_yield_to(struct kvm_vcpu *target) { struct pid *pid; From 8215ed10b4803d159b3f10784140a79eafd94382 Mon Sep 17 00:00:00 2001 From: Lorenzo Pieralisi Date: Mon, 5 Aug 2013 15:04:46 +0100 Subject: [PATCH 1191/1368] arm: kvm: implement CPU PM notifier Upon CPU shutdown and consequent warm-reboot, the hypervisor CPU state must be re-initialized. This patch implements a CPU PM notifier that upon warm-boot calls a KVM hook to reinitialize properly the hypervisor state so that the CPU can be safely resumed. Acked-by: Marc Zyngier Acked-by: Christoffer Dall Signed-off-by: Lorenzo Pieralisi (cherry picked from commit 1fcf7ce0c60213994269fb59569ec161eb6e08d6) Signed-off-by: Christoffer Dall --- arch/arm/kvm/arm.c | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c index e312e4a53f8d..b04013608e7e 100644 --- a/arch/arm/kvm/arm.c +++ b/arch/arm/kvm/arm.c @@ -17,6 +17,7 @@ */ #include +#include #include #include #include @@ -853,6 +854,33 @@ static struct notifier_block hyp_init_cpu_nb = { .notifier_call = hyp_init_cpu_notify, }; +#ifdef CONFIG_CPU_PM +static int hyp_init_cpu_pm_notifier(struct notifier_block *self, + unsigned long cmd, + void *v) +{ + if (cmd == CPU_PM_EXIT) { + cpu_init_hyp_mode(NULL); + return NOTIFY_OK; + } + + return NOTIFY_DONE; +} + +static struct notifier_block hyp_init_cpu_pm_nb = { + .notifier_call = hyp_init_cpu_pm_notifier, +}; + +static void __init hyp_cpu_pm_init(void) +{ + cpu_pm_register_notifier(&hyp_init_cpu_pm_nb); +} +#else +static inline void hyp_cpu_pm_init(void) +{ +} +#endif + /** * Inits Hyp-mode on all online CPUs */ @@ -1013,6 +1041,8 @@ int kvm_arch_init(void *opaque) goto out_err; } + hyp_cpu_pm_init(); + kvm_coproc_table_init(); return 0; out_err: From 90715c6a824fcc239e7da97c4078b85c7944df7c Mon Sep 17 00:00:00 2001 From: Masanari Iida Date: Mon, 9 Dec 2013 00:22:53 +0900 Subject: [PATCH 1192/1368] treewide: Fix typos in printk Correct spelling typo in various part of kernel [ cdall: Pickes KVM/arm64 specific part not already merged into LSK ] Signed-off-by: Masanari Iida Acked-by: Randy Dunlap Signed-off-by: Jiri Kosina (cherry picked from commit 77d84ff87e9d38072abcca665ca22cb1da41cb86) Signed-off-by: Christoffer Dall --- arch/arm64/kvm/handle_exit.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c index 8da56067c304..42a0f1bddfe7 100644 --- a/arch/arm64/kvm/handle_exit.c +++ b/arch/arm64/kvm/handle_exit.c @@ -90,7 +90,7 @@ static exit_handle_fn kvm_get_exit_handler(struct kvm_vcpu *vcpu) if (hsr_ec >= ARRAY_SIZE(arm_exit_handlers) || !arm_exit_handlers[hsr_ec]) { - kvm_err("Unkown exception class: hsr: %#08x\n", + kvm_err("Unknown exception class: hsr: %#08x\n", (unsigned int)kvm_vcpu_get_hsr(vcpu)); BUG(); } From 7e2c9ce0199412b559bbf527956c004a05a64619 Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Tue, 19 Nov 2013 17:43:19 -0800 Subject: [PATCH 1193/1368] arm: KVM: Don't return PSCI_INVAL if waitqueue is inactive The current KVM implementation of PSCI returns INVALID_PARAMETERS if the waitqueue for the corresponding CPU is not active. This does not seem correct, since KVM should not care what the specific thread is doing, for example, user space may not have called KVM_RUN on this VCPU yet or the thread may be busy looping to user space because it received a signal; this is really up to the user space implementation. Instead we should check specifically that the CPU is marked as being turned off, regardless of the VCPU thread state, and if it is, we shall simply clear the pause flag on the CPU and wake up the thread if it happens to be blocked for us. Further, the implementation seems to be racy when executing multiple VCPU threads. There really isn't a reasonable user space programming scheme to ensure all secondary CPUs have reached kvm_vcpu_first_run_init before turning on the boot CPU. Therefore, set the pause flag on the vcpu at VCPU init time (which can reasonably be expected to be completed for all CPUs by user space before running any VCPUs) and clear both this flag and the feature (in case the feature can somehow get set again in the future) and ping the waitqueue on turning on a VCPU using PSCI. Reported-by: Peter Maydell Signed-off-by: Christoffer Dall (cherry picked from commit 478a8237f656d86d25b3e4e4bf3c48f590156294) Signed-off-by: Christoffer Dall --- arch/arm/kvm/arm.c | 30 +++++++++++++++++++----------- arch/arm/kvm/psci.c | 11 ++++++----- 2 files changed, 25 insertions(+), 16 deletions(-) diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c index b04013608e7e..bb43b8cc1231 100644 --- a/arch/arm/kvm/arm.c +++ b/arch/arm/kvm/arm.c @@ -479,15 +479,6 @@ static int kvm_vcpu_first_run_init(struct kvm_vcpu *vcpu) return ret; } - /* - * Handle the "start in power-off" case by calling into the - * PSCI code. - */ - if (test_and_clear_bit(KVM_ARM_VCPU_POWER_OFF, vcpu->arch.features)) { - *vcpu_reg(vcpu, 0) = KVM_PSCI_FN_CPU_OFF; - kvm_psci_call(vcpu); - } - return 0; } @@ -701,6 +692,24 @@ int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_level, return -EINVAL; } +static int kvm_arch_vcpu_ioctl_vcpu_init(struct kvm_vcpu *vcpu, + struct kvm_vcpu_init *init) +{ + int ret; + + ret = kvm_vcpu_set_target(vcpu, init); + if (ret) + return ret; + + /* + * Handle the "start in power-off" case by marking the VCPU as paused. + */ + if (__test_and_clear_bit(KVM_ARM_VCPU_POWER_OFF, vcpu->arch.features)) + vcpu->arch.pause = true; + + return 0; +} + long kvm_arch_vcpu_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg) { @@ -714,8 +723,7 @@ long kvm_arch_vcpu_ioctl(struct file *filp, if (copy_from_user(&init, argp, sizeof(init))) return -EFAULT; - return kvm_vcpu_set_target(vcpu, &init); - + return kvm_arch_vcpu_ioctl_vcpu_init(vcpu, &init); } case KVM_SET_ONE_REG: case KVM_GET_ONE_REG: { diff --git a/arch/arm/kvm/psci.c b/arch/arm/kvm/psci.c index 0881bf169fbc..448f60e8d23c 100644 --- a/arch/arm/kvm/psci.c +++ b/arch/arm/kvm/psci.c @@ -54,15 +54,15 @@ static unsigned long kvm_psci_vcpu_on(struct kvm_vcpu *source_vcpu) } } - if (!vcpu) + /* + * Make sure the caller requested a valid CPU and that the CPU is + * turned off. + */ + if (!vcpu || !vcpu->arch.pause) return KVM_PSCI_RET_INVAL; target_pc = *vcpu_reg(source_vcpu, 2); - wq = kvm_arch_vcpu_wq(vcpu); - if (!waitqueue_active(wq)) - return KVM_PSCI_RET_INVAL; - kvm_reset_vcpu(vcpu); /* Gracefully handle Thumb2 entry point */ @@ -79,6 +79,7 @@ static unsigned long kvm_psci_vcpu_on(struct kvm_vcpu *source_vcpu) vcpu->arch.pause = false; smp_mb(); /* Make sure the above is visible */ + wq = kvm_arch_vcpu_wq(vcpu); wake_up_interruptible(wq); return KVM_PSCI_RET_SUCCESS; From b5a94dd48dc470624dcb7d581432a439d7a289c0 Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Sat, 16 Nov 2013 10:51:25 -0800 Subject: [PATCH 1194/1368] arm/arm64: KVM: arch_timer: Initialize cntvoff at kvm_init Initialize the cntvoff at kvm_init_vm time, not before running the VCPUs at the first time because that will overwrite any potentially restored values from user space. Cc: Andre Przywara Acked-by: Marc Zynger Signed-off-by: Christoffer Dall (cherry picked from commit a1a64387adeeba7a34ce06f2774e81f496ee803b) Signed-off-by: Christoffer Dall --- arch/arm/kvm/arm.c | 2 ++ virt/kvm/arm/vgic.c | 1 - 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c index bb43b8cc1231..71ad3a8706d9 100644 --- a/arch/arm/kvm/arm.c +++ b/arch/arm/kvm/arm.c @@ -138,6 +138,8 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) if (ret) goto out_free_stage2_pgd; + kvm_timer_init(kvm); + /* Mark the initial VMID generation invalid */ kvm->arch.vmid_gen = 0; diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c index 685fc72fc751..81e9481184a7 100644 --- a/virt/kvm/arm/vgic.c +++ b/virt/kvm/arm/vgic.c @@ -1409,7 +1409,6 @@ int kvm_vgic_init(struct kvm *kvm) for (i = VGIC_NR_PRIVATE_IRQS; i < VGIC_NR_IRQS; i += 4) vgic_set_target_reg(kvm, 0, i); - kvm_timer_init(kvm); kvm->arch.vgic.ready = true; out: mutex_unlock(&kvm->lock); From 764ee339777196e8cf9fa090f5041b02efc93630 Mon Sep 17 00:00:00 2001 From: Andre Przywara Date: Fri, 13 Dec 2013 14:23:26 +0100 Subject: [PATCH 1195/1368] ARM/KVM: save and restore generic timer registers For migration to work we need to save (and later restore) the state of each core's virtual generic timer. Since this is per VCPU, we can use the [gs]et_one_reg ioctl and export the three needed registers (control, counter, compare value). Though they live in cp15 space, we don't use the existing list, since they need special accessor functions and the arch timer is optional. Acked-by: Marc Zynger Signed-off-by: Andre Przywara Signed-off-by: Christoffer Dall (cherry picked from commit 39735a3a390431bcf60f9174b7d64f787fd6afa9) Signed-off-by: Christoffer Dall --- arch/arm/include/asm/kvm_host.h | 3 + arch/arm/include/uapi/asm/kvm.h | 20 +++++++ arch/arm/kvm/guest.c | 92 ++++++++++++++++++++++++++++++- arch/arm64/include/uapi/asm/kvm.h | 18 ++++++ virt/kvm/arm/arch_timer.c | 34 ++++++++++++ 5 files changed, 166 insertions(+), 1 deletion(-) diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h index 8a6f6db14ee4..098f7dd6d564 100644 --- a/arch/arm/include/asm/kvm_host.h +++ b/arch/arm/include/asm/kvm_host.h @@ -225,4 +225,7 @@ static inline int kvm_arch_dev_ioctl_check_extension(long ext) int kvm_perf_init(void); int kvm_perf_teardown(void); +u64 kvm_arm_timer_get_reg(struct kvm_vcpu *, u64 regid); +int kvm_arm_timer_set_reg(struct kvm_vcpu *, u64 regid, u64 value); + #endif /* __ARM_KVM_HOST_H__ */ diff --git a/arch/arm/include/uapi/asm/kvm.h b/arch/arm/include/uapi/asm/kvm.h index c498b60c0505..835b8678de03 100644 --- a/arch/arm/include/uapi/asm/kvm.h +++ b/arch/arm/include/uapi/asm/kvm.h @@ -119,6 +119,26 @@ struct kvm_arch_memory_slot { #define KVM_REG_ARM_32_CRN_MASK 0x0000000000007800 #define KVM_REG_ARM_32_CRN_SHIFT 11 +#define ARM_CP15_REG_SHIFT_MASK(x,n) \ + (((x) << KVM_REG_ARM_ ## n ## _SHIFT) & KVM_REG_ARM_ ## n ## _MASK) + +#define __ARM_CP15_REG(op1,crn,crm,op2) \ + (KVM_REG_ARM | (15 << KVM_REG_ARM_COPROC_SHIFT) | \ + ARM_CP15_REG_SHIFT_MASK(op1, OPC1) | \ + ARM_CP15_REG_SHIFT_MASK(crn, 32_CRN) | \ + ARM_CP15_REG_SHIFT_MASK(crm, CRM) | \ + ARM_CP15_REG_SHIFT_MASK(op2, 32_OPC2)) + +#define ARM_CP15_REG32(...) (__ARM_CP15_REG(__VA_ARGS__) | KVM_REG_SIZE_U32) + +#define __ARM_CP15_REG64(op1,crm) \ + (__ARM_CP15_REG(op1, 0, crm, 0) | KVM_REG_SIZE_U64) +#define ARM_CP15_REG64(...) __ARM_CP15_REG64(__VA_ARGS__) + +#define KVM_REG_ARM_TIMER_CTL ARM_CP15_REG32(0, 14, 3, 1) +#define KVM_REG_ARM_TIMER_CNT ARM_CP15_REG64(1, 14) +#define KVM_REG_ARM_TIMER_CVAL ARM_CP15_REG64(3, 14) + /* Normal registers are mapped as coprocessor 16. */ #define KVM_REG_ARM_CORE (0x0010 << KVM_REG_ARM_COPROC_SHIFT) #define KVM_REG_ARM_CORE_REG(name) (offsetof(struct kvm_regs, name) / 4) diff --git a/arch/arm/kvm/guest.c b/arch/arm/kvm/guest.c index 20f8d97904af..2786eae10c0d 100644 --- a/arch/arm/kvm/guest.c +++ b/arch/arm/kvm/guest.c @@ -109,6 +109,83 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) return -EINVAL; } +#ifndef CONFIG_KVM_ARM_TIMER + +#define NUM_TIMER_REGS 0 + +static int copy_timer_indices(struct kvm_vcpu *vcpu, u64 __user *uindices) +{ + return 0; +} + +static bool is_timer_reg(u64 index) +{ + return false; +} + +int kvm_arm_timer_set_reg(struct kvm_vcpu *vcpu, u64 regid, u64 value) +{ + return 0; +} + +u64 kvm_arm_timer_get_reg(struct kvm_vcpu *vcpu, u64 regid) +{ + return 0; +} + +#else + +#define NUM_TIMER_REGS 3 + +static bool is_timer_reg(u64 index) +{ + switch (index) { + case KVM_REG_ARM_TIMER_CTL: + case KVM_REG_ARM_TIMER_CNT: + case KVM_REG_ARM_TIMER_CVAL: + return true; + } + return false; +} + +static int copy_timer_indices(struct kvm_vcpu *vcpu, u64 __user *uindices) +{ + if (put_user(KVM_REG_ARM_TIMER_CTL, uindices)) + return -EFAULT; + uindices++; + if (put_user(KVM_REG_ARM_TIMER_CNT, uindices)) + return -EFAULT; + uindices++; + if (put_user(KVM_REG_ARM_TIMER_CVAL, uindices)) + return -EFAULT; + + return 0; +} + +#endif + +static int set_timer_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) +{ + void __user *uaddr = (void __user *)(long)reg->addr; + u64 val; + int ret; + + ret = copy_from_user(&val, uaddr, KVM_REG_SIZE(reg->id)); + if (ret != 0) + return ret; + + return kvm_arm_timer_set_reg(vcpu, reg->id, val); +} + +static int get_timer_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) +{ + void __user *uaddr = (void __user *)(long)reg->addr; + u64 val; + + val = kvm_arm_timer_get_reg(vcpu, reg->id); + return copy_to_user(uaddr, &val, KVM_REG_SIZE(reg->id)); +} + static unsigned long num_core_regs(void) { return sizeof(struct kvm_regs) / sizeof(u32); @@ -121,7 +198,8 @@ static unsigned long num_core_regs(void) */ unsigned long kvm_arm_num_regs(struct kvm_vcpu *vcpu) { - return num_core_regs() + kvm_arm_num_coproc_regs(vcpu); + return num_core_regs() + kvm_arm_num_coproc_regs(vcpu) + + NUM_TIMER_REGS; } /** @@ -133,6 +211,7 @@ int kvm_arm_copy_reg_indices(struct kvm_vcpu *vcpu, u64 __user *uindices) { unsigned int i; const u64 core_reg = KVM_REG_ARM | KVM_REG_SIZE_U32 | KVM_REG_ARM_CORE; + int ret; for (i = 0; i < sizeof(struct kvm_regs)/sizeof(u32); i++) { if (put_user(core_reg | i, uindices)) @@ -140,6 +219,11 @@ int kvm_arm_copy_reg_indices(struct kvm_vcpu *vcpu, u64 __user *uindices) uindices++; } + ret = copy_timer_indices(vcpu, uindices); + if (ret) + return ret; + uindices += NUM_TIMER_REGS; + return kvm_arm_copy_coproc_indices(vcpu, uindices); } @@ -153,6 +237,9 @@ int kvm_arm_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) if ((reg->id & KVM_REG_ARM_COPROC_MASK) == KVM_REG_ARM_CORE) return get_core_reg(vcpu, reg); + if (is_timer_reg(reg->id)) + return get_timer_reg(vcpu, reg); + return kvm_arm_coproc_get_reg(vcpu, reg); } @@ -166,6 +253,9 @@ int kvm_arm_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) if ((reg->id & KVM_REG_ARM_COPROC_MASK) == KVM_REG_ARM_CORE) return set_core_reg(vcpu, reg); + if (is_timer_reg(reg->id)) + return set_timer_reg(vcpu, reg); + return kvm_arm_coproc_set_reg(vcpu, reg); } diff --git a/arch/arm64/include/uapi/asm/kvm.h b/arch/arm64/include/uapi/asm/kvm.h index 5031f4263937..7c25ca8b02b3 100644 --- a/arch/arm64/include/uapi/asm/kvm.h +++ b/arch/arm64/include/uapi/asm/kvm.h @@ -129,6 +129,24 @@ struct kvm_arch_memory_slot { #define KVM_REG_ARM64_SYSREG_OP2_MASK 0x0000000000000007 #define KVM_REG_ARM64_SYSREG_OP2_SHIFT 0 +#define ARM64_SYS_REG_SHIFT_MASK(x,n) \ + (((x) << KVM_REG_ARM64_SYSREG_ ## n ## _SHIFT) & \ + KVM_REG_ARM64_SYSREG_ ## n ## _MASK) + +#define __ARM64_SYS_REG(op0,op1,crn,crm,op2) \ + (KVM_REG_ARM64 | KVM_REG_ARM64_SYSREG | \ + ARM64_SYS_REG_SHIFT_MASK(op0, OP0) | \ + ARM64_SYS_REG_SHIFT_MASK(op1, OP1) | \ + ARM64_SYS_REG_SHIFT_MASK(crn, CRN) | \ + ARM64_SYS_REG_SHIFT_MASK(crm, CRM) | \ + ARM64_SYS_REG_SHIFT_MASK(op2, OP2)) + +#define ARM64_SYS_REG(...) (__ARM64_SYS_REG(__VA_ARGS__) | KVM_REG_SIZE_U64) + +#define KVM_REG_ARM_TIMER_CTL ARM64_SYS_REG(3, 3, 14, 3, 1) +#define KVM_REG_ARM_TIMER_CNT ARM64_SYS_REG(3, 3, 14, 3, 2) +#define KVM_REG_ARM_TIMER_CVAL ARM64_SYS_REG(3, 3, 14, 0, 2) + /* KVM_IRQ_LINE irq field index values */ #define KVM_ARM_IRQ_TYPE_SHIFT 24 #define KVM_ARM_IRQ_TYPE_MASK 0xff diff --git a/virt/kvm/arm/arch_timer.c b/virt/kvm/arm/arch_timer.c index c2e1ef4604e8..5081e809821f 100644 --- a/virt/kvm/arm/arch_timer.c +++ b/virt/kvm/arm/arch_timer.c @@ -182,6 +182,40 @@ static void kvm_timer_init_interrupt(void *info) enable_percpu_irq(host_vtimer_irq, 0); } +int kvm_arm_timer_set_reg(struct kvm_vcpu *vcpu, u64 regid, u64 value) +{ + struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; + + switch (regid) { + case KVM_REG_ARM_TIMER_CTL: + timer->cntv_ctl = value; + break; + case KVM_REG_ARM_TIMER_CNT: + vcpu->kvm->arch.timer.cntvoff = kvm_phys_timer_read() - value; + break; + case KVM_REG_ARM_TIMER_CVAL: + timer->cntv_cval = value; + break; + default: + return -1; + } + return 0; +} + +u64 kvm_arm_timer_get_reg(struct kvm_vcpu *vcpu, u64 regid) +{ + struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; + + switch (regid) { + case KVM_REG_ARM_TIMER_CTL: + return timer->cntv_ctl; + case KVM_REG_ARM_TIMER_CNT: + return kvm_phys_timer_read() - vcpu->kvm->arch.timer.cntvoff; + case KVM_REG_ARM_TIMER_CVAL: + return timer->cntv_cval; + } + return (u64)-1; +} static int kvm_timer_cpu_notify(struct notifier_block *self, unsigned long action, void *cpu) From d90651fa17fcc2112dcfe05832a6c1169d26dee3 Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Mon, 23 Sep 2013 14:55:55 -0700 Subject: [PATCH 1196/1368] ARM: KVM: Allow creating the VGIC after VCPUs Rework the VGIC initialization slightly to allow initialization of the vgic cpu-specific state even if the irqchip (the VGIC) hasn't been created by user space yet. This is safe, because the vgic data structures are already allocated when the CPU is allocated if VGIC support is compiled into the kernel. Further, the init process does not depend on any other information and the sacrifice is a slight performance degradation for creating VMs in the no-VGIC case. The reason is that the new device control API doesn't mandate creating the VGIC before creating the VCPU and it is unreasonable to require user space to create the VGIC before creating the VCPUs. At the same time move the irqchip_in_kernel check out of kvm_vcpu_first_run_init and into the init function to make the per-vcpu and global init functions symmetric and add comments on the exported functions making it a bit easier to understand the init flow by only looking at vgic.c. Acked-by: Marc Zyngier Signed-off-by: Christoffer Dall (cherry picked from commit e1ba0207a1b3714bb3f000e506285ae5123cdfa7) Signed-off-by: Christoffer Dall --- arch/arm/kvm/arm.c | 7 ++++--- virt/kvm/arm/vgic.c | 22 +++++++++++++++++++--- 2 files changed, 23 insertions(+), 6 deletions(-) diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c index 71ad3a8706d9..ea8da1f4fe49 100644 --- a/arch/arm/kvm/arm.c +++ b/arch/arm/kvm/arm.c @@ -465,6 +465,8 @@ static void update_vttbr(struct kvm *kvm) static int kvm_vcpu_first_run_init(struct kvm_vcpu *vcpu) { + int ret; + if (likely(vcpu->arch.has_run_once)) return 0; @@ -474,9 +476,8 @@ static int kvm_vcpu_first_run_init(struct kvm_vcpu *vcpu) * Initialize the VGIC before running a vcpu the first time on * this VM. */ - if (irqchip_in_kernel(vcpu->kvm) && - unlikely(!vgic_initialized(vcpu->kvm))) { - int ret = kvm_vgic_init(vcpu->kvm); + if (unlikely(!vgic_initialized(vcpu->kvm))) { + ret = kvm_vgic_init(vcpu->kvm); if (ret) return ret; } diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c index 81e9481184a7..5e9df47778fb 100644 --- a/virt/kvm/arm/vgic.c +++ b/virt/kvm/arm/vgic.c @@ -1243,15 +1243,19 @@ static irqreturn_t vgic_maintenance_handler(int irq, void *data) return IRQ_HANDLED; } +/** + * kvm_vgic_vcpu_init - Initialize per-vcpu VGIC state + * @vcpu: pointer to the vcpu struct + * + * Initialize the vgic_cpu struct and vgic_dist struct fields pertaining to + * this vcpu and enable the VGIC for this VCPU + */ int kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu) { struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; struct vgic_dist *dist = &vcpu->kvm->arch.vgic; int i; - if (!irqchip_in_kernel(vcpu->kvm)) - return 0; - if (vcpu->vcpu_id >= VGIC_MAX_CPUS) return -EBUSY; @@ -1383,10 +1387,22 @@ int kvm_vgic_hyp_init(void) return ret; } +/** + * kvm_vgic_init - Initialize global VGIC state before running any VCPUs + * @kvm: pointer to the kvm struct + * + * Map the virtual CPU interface into the VM before running any VCPUs. We + * can't do this at creation time, because user space must first set the + * virtual CPU interface address in the guest physical address space. Also + * initialize the ITARGETSRn regs to 0 on the emulated distributor. + */ int kvm_vgic_init(struct kvm *kvm) { int ret = 0, i; + if (!irqchip_in_kernel(kvm)) + return 0; + mutex_lock(&kvm->lock); if (vgic_initialized(kvm)) From 1032acb686aaabafb124e12ae15a6adc9b3da557 Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Fri, 25 Oct 2013 17:29:18 +0100 Subject: [PATCH 1197/1368] KVM: arm-vgic: Support KVM_CREATE_DEVICE for VGIC Support creating the ARM VGIC device through the KVM_CREATE_DEVICE ioctl, which can then later be leveraged to use the KVM_{GET/SET}_DEVICE_ATTR, which is useful both for setting addresses in a more generic API than the ARM-specific one and is useful for save/restore of VGIC state. Adds KVM_CAP_DEVICE_CTRL to ARM capabilities. Note that we change the check for creating a VGIC from bailing out if any VCPUs were created, to bailing out if any VCPUs were ever run. This is an important distinction that shouldn't break anything, but allows creating the VGIC after the VCPUs have been created. Acked-by: Marc Zyngier Signed-off-by: Christoffer Dall (cherry picked from commit 7330672befe6269e575f79b924a7068b26c144b4) Signed-off-by: Christoffer Dall --- .../virtual/kvm/devices/arm-vgic.txt | 10 +++ arch/arm/kvm/arm.c | 1 + include/linux/kvm_host.h | 1 + include/uapi/linux/kvm.h | 1 + virt/kvm/arm/vgic.c | 63 ++++++++++++++++++- virt/kvm/kvm_main.c | 5 ++ 6 files changed, 79 insertions(+), 2 deletions(-) create mode 100644 Documentation/virtual/kvm/devices/arm-vgic.txt diff --git a/Documentation/virtual/kvm/devices/arm-vgic.txt b/Documentation/virtual/kvm/devices/arm-vgic.txt new file mode 100644 index 000000000000..38f27f709a99 --- /dev/null +++ b/Documentation/virtual/kvm/devices/arm-vgic.txt @@ -0,0 +1,10 @@ +ARM Virtual Generic Interrupt Controller (VGIC) +=============================================== + +Device types supported: + KVM_DEV_TYPE_ARM_VGIC_V2 ARM Generic Interrupt Controller v2.0 + +Only one VGIC instance may be instantiated through either this API or the +legacy KVM_CREATE_IRQCHIP api. The created VGIC will act as the VM interrupt +controller, requiring emulated user-space devices to inject interrupts to the +VGIC instead of directly to CPUs. diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c index ea8da1f4fe49..fcb68bb96176 100644 --- a/arch/arm/kvm/arm.c +++ b/arch/arm/kvm/arm.c @@ -191,6 +191,7 @@ int kvm_dev_ioctl_check_extension(long ext) case KVM_CAP_IRQCHIP: r = vgic_present; break; + case KVM_CAP_DEVICE_CTRL: case KVM_CAP_USER_MEMORY: case KVM_CAP_SYNC_MMU: case KVM_CAP_DESTROY_MEMORY_REGION_WORKS: diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 9c0f8545df73..1dfc17255ee0 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -1045,6 +1045,7 @@ struct kvm_device *kvm_device_from_filp(struct file *filp); extern struct kvm_device_ops kvm_mpic_ops; extern struct kvm_device_ops kvm_xics_ops; extern struct kvm_device_ops kvm_vfio_ops; +extern struct kvm_device_ops kvm_arm_vgic_v2_ops; #ifdef CONFIG_HAVE_KVM_CPU_RELAX_INTERCEPT diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index d5b8501cebfd..56347ab54cf7 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -848,6 +848,7 @@ struct kvm_device_attr { #define KVM_DEV_VFIO_GROUP 1 #define KVM_DEV_VFIO_GROUP_ADD 1 #define KVM_DEV_VFIO_GROUP_DEL 2 +#define KVM_DEV_TYPE_ARM_VGIC_V2 5 /* * ioctls for VM fds diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c index 5e9df47778fb..b15d6c17a090 100644 --- a/virt/kvm/arm/vgic.c +++ b/virt/kvm/arm/vgic.c @@ -1433,20 +1433,45 @@ int kvm_vgic_init(struct kvm *kvm) int kvm_vgic_create(struct kvm *kvm) { - int ret = 0; + int i, vcpu_lock_idx = -1, ret = 0; + struct kvm_vcpu *vcpu; mutex_lock(&kvm->lock); - if (atomic_read(&kvm->online_vcpus) || kvm->arch.vgic.vctrl_base) { + if (kvm->arch.vgic.vctrl_base) { ret = -EEXIST; goto out; } + /* + * Any time a vcpu is run, vcpu_load is called which tries to grab the + * vcpu->mutex. By grabbing the vcpu->mutex of all VCPUs we ensure + * that no other VCPUs are run while we create the vgic. + */ + kvm_for_each_vcpu(i, vcpu, kvm) { + if (!mutex_trylock(&vcpu->mutex)) + goto out_unlock; + vcpu_lock_idx = i; + } + + kvm_for_each_vcpu(i, vcpu, kvm) { + if (vcpu->arch.has_run_once) { + ret = -EBUSY; + goto out_unlock; + } + } + spin_lock_init(&kvm->arch.vgic.lock); kvm->arch.vgic.vctrl_base = vgic_vctrl_base; kvm->arch.vgic.vgic_dist_base = VGIC_ADDR_UNDEF; kvm->arch.vgic.vgic_cpu_base = VGIC_ADDR_UNDEF; +out_unlock: + for (; vcpu_lock_idx >= 0; vcpu_lock_idx--) { + vcpu = kvm_get_vcpu(kvm, vcpu_lock_idx); + mutex_unlock(&vcpu->mutex); + } + out: mutex_unlock(&kvm->lock); return ret; @@ -1510,3 +1535,37 @@ int kvm_vgic_set_addr(struct kvm *kvm, unsigned long type, u64 addr) mutex_unlock(&kvm->lock); return r; } + +static int vgic_set_attr(struct kvm_device *dev, struct kvm_device_attr *attr) +{ + return -ENXIO; +} + +static int vgic_get_attr(struct kvm_device *dev, struct kvm_device_attr *attr) +{ + return -ENXIO; +} + +static int vgic_has_attr(struct kvm_device *dev, struct kvm_device_attr *attr) +{ + return -ENXIO; +} + +static void vgic_destroy(struct kvm_device *dev) +{ + kfree(dev); +} + +static int vgic_create(struct kvm_device *dev, u32 type) +{ + return kvm_vgic_create(dev->kvm); +} + +struct kvm_device_ops kvm_arm_vgic_v2_ops = { + .name = "kvm-arm-vgic", + .create = vgic_create, + .destroy = vgic_destroy, + .set_attr = vgic_set_attr, + .get_attr = vgic_get_attr, + .has_attr = vgic_has_attr, +}; diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index cb9a865c8e01..e9a43b6455be 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -2271,6 +2271,11 @@ static int kvm_ioctl_create_device(struct kvm *kvm, case KVM_DEV_TYPE_VFIO: ops = &kvm_vfio_ops; break; +#endif +#ifdef CONFIG_KVM_ARM_VGIC + case KVM_DEV_TYPE_ARM_VGIC_V2: + ops = &kvm_arm_vgic_v2_ops; + break; #endif default: return -ENODEV; From c4ad31ff7d94f6504c757ddcf742f0597c494080 Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Mon, 23 Sep 2013 14:55:56 -0700 Subject: [PATCH 1198/1368] KVM: arm-vgic: Set base addr through device API Support setting the distributor and cpu interface base addresses in the VM physical address space through the KVM_{SET,GET}_DEVICE_ATTR API in addition to the ARM specific API. This has the added benefit of being able to share more code in user space and do things in a uniform manner. Also deprecate the older API at the same time, but backwards compatibility will be maintained. Acked-by: Marc Zyngier Signed-off-by: Christoffer Dall (cherry picked from commit ce01e4e8874d410738f4b4733b26642d6611a331) Signed-off-by: Christoffer Dall --- Documentation/virtual/kvm/api.txt | 8 +- .../virtual/kvm/devices/arm-vgic.txt | 11 +++ arch/arm/include/uapi/asm/kvm.h | 2 + arch/arm/kvm/arm.c | 2 +- include/kvm/arm_vgic.h | 2 +- virt/kvm/arm/vgic.c | 87 ++++++++++++++++--- 6 files changed, 96 insertions(+), 16 deletions(-) diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt index d196ebe8956e..3c75a17555a8 100644 --- a/Documentation/virtual/kvm/api.txt +++ b/Documentation/virtual/kvm/api.txt @@ -2324,7 +2324,7 @@ This ioctl returns the guest registers that are supported for the KVM_GET_ONE_REG/KVM_SET_ONE_REG calls. -4.80 KVM_ARM_SET_DEVICE_ADDR +4.85 KVM_ARM_SET_DEVICE_ADDR (deprecated) Capability: KVM_CAP_ARM_SET_DEVICE_ADDR Architectures: arm, arm64 @@ -2362,7 +2362,11 @@ must be called after calling KVM_CREATE_IRQCHIP, but before calling KVM_RUN on any of the VCPUs. Calling this ioctl twice for any of the base addresses will return -EEXIST. -4.82 KVM_PPC_RTAS_DEFINE_TOKEN +Note, this IOCTL is deprecated and the more flexible SET/GET_DEVICE_ATTR API +should be used instead. + + +4.86 KVM_PPC_RTAS_DEFINE_TOKEN Capability: KVM_CAP_PPC_RTAS Architectures: ppc diff --git a/Documentation/virtual/kvm/devices/arm-vgic.txt b/Documentation/virtual/kvm/devices/arm-vgic.txt index 38f27f709a99..c9febb2a0c3e 100644 --- a/Documentation/virtual/kvm/devices/arm-vgic.txt +++ b/Documentation/virtual/kvm/devices/arm-vgic.txt @@ -8,3 +8,14 @@ Only one VGIC instance may be instantiated through either this API or the legacy KVM_CREATE_IRQCHIP api. The created VGIC will act as the VM interrupt controller, requiring emulated user-space devices to inject interrupts to the VGIC instead of directly to CPUs. + +Groups: + KVM_DEV_ARM_VGIC_GRP_ADDR + Attributes: + KVM_VGIC_V2_ADDR_TYPE_DIST (rw, 64-bit) + Base address in the guest physical address space of the GIC distributor + register mappings. + + KVM_VGIC_V2_ADDR_TYPE_CPU (rw, 64-bit) + Base address in the guest physical address space of the GIC virtual cpu + interface register mappings. diff --git a/arch/arm/include/uapi/asm/kvm.h b/arch/arm/include/uapi/asm/kvm.h index 835b8678de03..76a742769e2b 100644 --- a/arch/arm/include/uapi/asm/kvm.h +++ b/arch/arm/include/uapi/asm/kvm.h @@ -163,6 +163,8 @@ struct kvm_arch_memory_slot { #define KVM_REG_ARM_VFP_FPINST 0x1009 #define KVM_REG_ARM_VFP_FPINST2 0x100A +/* Device Control API: ARM VGIC */ +#define KVM_DEV_ARM_VGIC_GRP_ADDR 0 /* KVM_IRQ_LINE irq field index values */ #define KVM_ARM_IRQ_TYPE_SHIFT 24 diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c index fcb68bb96176..e71f6e15d3cd 100644 --- a/arch/arm/kvm/arm.c +++ b/arch/arm/kvm/arm.c @@ -785,7 +785,7 @@ static int kvm_vm_ioctl_set_device_addr(struct kvm *kvm, case KVM_ARM_DEVICE_VGIC_V2: if (!vgic_present) return -ENXIO; - return kvm_vgic_set_addr(kvm, type, dev_addr->addr); + return kvm_vgic_addr(kvm, type, &dev_addr->addr, true); default: return -ENODEV; } diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h index 7e2d15837b02..be85127bfed3 100644 --- a/include/kvm/arm_vgic.h +++ b/include/kvm/arm_vgic.h @@ -144,7 +144,7 @@ struct kvm_run; struct kvm_exit_mmio; #ifdef CONFIG_KVM_ARM_VGIC -int kvm_vgic_set_addr(struct kvm *kvm, unsigned long type, u64 addr); +int kvm_vgic_addr(struct kvm *kvm, unsigned long type, u64 *addr, bool write); int kvm_vgic_hyp_init(void); int kvm_vgic_init(struct kvm *kvm); int kvm_vgic_create(struct kvm *kvm); diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c index b15d6c17a090..45db48de4282 100644 --- a/virt/kvm/arm/vgic.c +++ b/virt/kvm/arm/vgic.c @@ -1495,6 +1495,12 @@ static int vgic_ioaddr_assign(struct kvm *kvm, phys_addr_t *ioaddr, { int ret; + if (addr & ~KVM_PHYS_MASK) + return -E2BIG; + + if (addr & (SZ_4K - 1)) + return -EINVAL; + if (!IS_VGIC_ADDR_UNDEF(*ioaddr)) return -EEXIST; if (addr + size < addr) @@ -1507,26 +1513,41 @@ static int vgic_ioaddr_assign(struct kvm *kvm, phys_addr_t *ioaddr, return ret; } -int kvm_vgic_set_addr(struct kvm *kvm, unsigned long type, u64 addr) +/** + * kvm_vgic_addr - set or get vgic VM base addresses + * @kvm: pointer to the vm struct + * @type: the VGIC addr type, one of KVM_VGIC_V2_ADDR_TYPE_XXX + * @addr: pointer to address value + * @write: if true set the address in the VM address space, if false read the + * address + * + * Set or get the vgic base addresses for the distributor and the virtual CPU + * interface in the VM physical address space. These addresses are properties + * of the emulated core/SoC and therefore user space initially knows this + * information. + */ +int kvm_vgic_addr(struct kvm *kvm, unsigned long type, u64 *addr, bool write) { int r = 0; struct vgic_dist *vgic = &kvm->arch.vgic; - if (addr & ~KVM_PHYS_MASK) - return -E2BIG; - - if (addr & (SZ_4K - 1)) - return -EINVAL; - mutex_lock(&kvm->lock); switch (type) { case KVM_VGIC_V2_ADDR_TYPE_DIST: - r = vgic_ioaddr_assign(kvm, &vgic->vgic_dist_base, - addr, KVM_VGIC_V2_DIST_SIZE); + if (write) { + r = vgic_ioaddr_assign(kvm, &vgic->vgic_dist_base, + *addr, KVM_VGIC_V2_DIST_SIZE); + } else { + *addr = vgic->vgic_dist_base; + } break; case KVM_VGIC_V2_ADDR_TYPE_CPU: - r = vgic_ioaddr_assign(kvm, &vgic->vgic_cpu_base, - addr, KVM_VGIC_V2_CPU_SIZE); + if (write) { + r = vgic_ioaddr_assign(kvm, &vgic->vgic_cpu_base, + *addr, KVM_VGIC_V2_CPU_SIZE); + } else { + *addr = vgic->vgic_cpu_base; + } break; default: r = -ENODEV; @@ -1538,16 +1559,58 @@ int kvm_vgic_set_addr(struct kvm *kvm, unsigned long type, u64 addr) static int vgic_set_attr(struct kvm_device *dev, struct kvm_device_attr *attr) { + int r; + + switch (attr->group) { + case KVM_DEV_ARM_VGIC_GRP_ADDR: { + u64 __user *uaddr = (u64 __user *)(long)attr->addr; + u64 addr; + unsigned long type = (unsigned long)attr->attr; + + if (copy_from_user(&addr, uaddr, sizeof(addr))) + return -EFAULT; + + r = kvm_vgic_addr(dev->kvm, type, &addr, true); + return (r == -ENODEV) ? -ENXIO : r; + } + } + return -ENXIO; } static int vgic_get_attr(struct kvm_device *dev, struct kvm_device_attr *attr) { - return -ENXIO; + int r = -ENXIO; + + switch (attr->group) { + case KVM_DEV_ARM_VGIC_GRP_ADDR: { + u64 __user *uaddr = (u64 __user *)(long)attr->addr; + u64 addr; + unsigned long type = (unsigned long)attr->attr; + + r = kvm_vgic_addr(dev->kvm, type, &addr, false); + if (r) + return (r == -ENODEV) ? -ENXIO : r; + + if (copy_to_user(uaddr, &addr, sizeof(addr))) + return -EFAULT; + } + } + + return r; } static int vgic_has_attr(struct kvm_device *dev, struct kvm_device_attr *attr) { + switch (attr->group) { + case KVM_DEV_ARM_VGIC_GRP_ADDR: + switch (attr->attr) { + case KVM_VGIC_V2_ADDR_TYPE_DIST: + case KVM_VGIC_V2_ADDR_TYPE_CPU: + return 0; + } + break; + } return -ENXIO; } From 10b316b72589c7ba6f74c7263a289ffbaa1bf80b Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Mon, 23 Sep 2013 14:55:56 -0700 Subject: [PATCH 1199/1368] irqchip: arm-gic: Define additional MMIO offsets and masks Define CPU interface offsets for the GICC_ABPR, GICC_APR, and GICC_IIDR registers. Define distributor registers for the GICD_SPENDSGIR and the GICD_CPENDSGIR. KVM/ARM needs to know about these definitions to fully support save/restore of the VGIC. Also define some masks and shifts for the various GICH_VMCR fields. Cc: Thomas Gleixner Acked-by: Marc Zyngier Signed-off-by: Christoffer Dall (cherry picked from commit 0307e1770fdeff2732cf7a35d0f7f49db67c6621) Signed-off-by: Christoffer Dall --- include/linux/irqchip/arm-gic.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/include/linux/irqchip/arm-gic.h b/include/linux/irqchip/arm-gic.h index 3e203eb23cc7..4483adb61c88 100644 --- a/include/linux/irqchip/arm-gic.h +++ b/include/linux/irqchip/arm-gic.h @@ -17,6 +17,9 @@ #define GIC_CPU_EOI 0x10 #define GIC_CPU_RUNNINGPRI 0x14 #define GIC_CPU_HIGHPRI 0x18 +#define GIC_CPU_ALIAS_BINPOINT 0x1c +#define GIC_CPU_ACTIVEPRIO 0xd0 +#define GIC_CPU_IDENT 0xfc #define GIC_DIST_CTRL 0x000 #define GIC_DIST_CTR 0x004 @@ -54,6 +57,15 @@ #define GICH_LR_ACTIVE_BIT (1 << 29) #define GICH_LR_EOI (1 << 19) +#define GICH_VMCR_CTRL_SHIFT 0 +#define GICH_VMCR_CTRL_MASK (0x21f << GICH_VMCR_CTRL_SHIFT) +#define GICH_VMCR_PRIMASK_SHIFT 27 +#define GICH_VMCR_PRIMASK_MASK (0x1f << GICH_VMCR_PRIMASK_SHIFT) +#define GICH_VMCR_BINPOINT_SHIFT 21 +#define GICH_VMCR_BINPOINT_MASK (0x7 << GICH_VMCR_BINPOINT_SHIFT) +#define GICH_VMCR_ALIAS_BINPOINT_SHIFT 18 +#define GICH_VMCR_ALIAS_BINPOINT_MASK (0x7 << GICH_VMCR_ALIAS_BINPOINT_SHIFT) + #define GICH_MISR_EOI (1 << 0) #define GICH_MISR_U (1 << 1) From 405003b808d4ff3de73eebc18dfb6f1e708626c4 Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Mon, 23 Sep 2013 14:55:56 -0700 Subject: [PATCH 1200/1368] KVM: arm-vgic: Make vgic mmio functions more generic Rename the vgic_ranges array to vgic_dist_ranges to be more specific and to prepare for handling CPU interface register access as well (for save/restore of VGIC state). Pass offset from distributor or interface MMIO base to find_matching_range function instead of the physical address of the access in the VM memory map. This allows other callers unaware of the VM specifics, but with generic VGIC knowledge to reuse the function. Acked-by: Marc Zyngier Signed-off-by: Christoffer Dall (cherry picked from commit 1006e8cb22e861260688917ca4cfe6cde8ad69eb) Signed-off-by: Christoffer Dall --- virt/kvm/arm/vgic.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c index 45db48de4282..e2596f618281 100644 --- a/virt/kvm/arm/vgic.c +++ b/virt/kvm/arm/vgic.c @@ -602,7 +602,7 @@ struct mmio_range { phys_addr_t offset); }; -static const struct mmio_range vgic_ranges[] = { +static const struct mmio_range vgic_dist_ranges[] = { { .base = GIC_DIST_CTRL, .len = 12, @@ -669,14 +669,13 @@ static const struct mmio_range vgic_ranges[] = { static const struct mmio_range *find_matching_range(const struct mmio_range *ranges, struct kvm_exit_mmio *mmio, - phys_addr_t base) + phys_addr_t offset) { const struct mmio_range *r = ranges; - phys_addr_t addr = mmio->phys_addr - base; while (r->len) { - if (addr >= r->base && - (addr + mmio->len) <= (r->base + r->len)) + if (offset >= r->base && + (offset + mmio->len) <= (r->base + r->len)) return r; r++; } @@ -713,7 +712,8 @@ bool vgic_handle_mmio(struct kvm_vcpu *vcpu, struct kvm_run *run, return true; } - range = find_matching_range(vgic_ranges, mmio, base); + offset = mmio->phys_addr - base; + range = find_matching_range(vgic_dist_ranges, mmio, offset); if (unlikely(!range || !range->handle_mmio)) { pr_warn("Unhandled access %d %08llx %d\n", mmio->is_write, mmio->phys_addr, mmio->len); From f05c65c6daf2d2c33c31ddc3b48df2693f8e9594 Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Wed, 11 Dec 2013 20:29:11 -0800 Subject: [PATCH 1201/1368] arm/arm64: kvm: Set vcpu->cpu to -1 on vcpu_put The arch-generic KVM code expects the cpu field of a vcpu to be -1 if the vcpu is no longer assigned to a cpu. This is used for the optimized make_all_cpus_request path and will be used by the vgic code to check that no vcpus are running. Acked-by: Marc Zyngier Signed-off-by: Christoffer Dall (cherry picked from commit e9b152cb957cb194437f37e79f0f3c9d34fe53d6) Signed-off-by: Christoffer Dall --- arch/arm/kvm/arm.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c index e71f6e15d3cd..169c718ddd90 100644 --- a/arch/arm/kvm/arm.c +++ b/arch/arm/kvm/arm.c @@ -343,6 +343,13 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) { + /* + * The arch-generic KVM code expects the cpu field of a vcpu to be -1 + * if the vcpu is no longer assigned to a cpu. This is used for the + * optimized make_all_cpus_request path. + */ + vcpu->cpu = -1; + kvm_arm_set_running_vcpu(NULL); } From 0b3a540dcc4087698c95c6d1bda4071424283345 Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Fri, 25 Oct 2013 21:17:31 +0100 Subject: [PATCH 1202/1368] KVM: arm-vgic: Add vgic reg access from dev attr Add infrastructure to handle distributor and cpu interface register accesses through the KVM_{GET/SET}_DEVICE_ATTR interface by adding the KVM_DEV_ARM_VGIC_GRP_DIST_REGS and KVM_DEV_ARM_VGIC_GRP_CPU_REGS groups and defining the semantics of the attr field to be the MMIO offset as specified in the GICv2 specs. Missing register accesses or other changes in individual register access functions to support save/restore of the VGIC state is added in subsequent patches. Acked-by: Marc Zyngier Signed-off-by: Christoffer Dall (cherry picked from commit c07a0191ef2de1f9510f12d1f88e3b0b5cd8d66f) Signed-off-by: Christoffer Dall --- .../virtual/kvm/devices/arm-vgic.txt | 52 +++++ arch/arm/include/uapi/asm/kvm.h | 6 + virt/kvm/arm/vgic.c | 178 ++++++++++++++++++ 3 files changed, 236 insertions(+) diff --git a/Documentation/virtual/kvm/devices/arm-vgic.txt b/Documentation/virtual/kvm/devices/arm-vgic.txt index c9febb2a0c3e..7f4e91b1316b 100644 --- a/Documentation/virtual/kvm/devices/arm-vgic.txt +++ b/Documentation/virtual/kvm/devices/arm-vgic.txt @@ -19,3 +19,55 @@ Groups: KVM_VGIC_V2_ADDR_TYPE_CPU (rw, 64-bit) Base address in the guest physical address space of the GIC virtual cpu interface register mappings. + + KVM_DEV_ARM_VGIC_GRP_DIST_REGS + Attributes: + The attr field of kvm_device_attr encodes two values: + bits: | 63 .... 40 | 39 .. 32 | 31 .... 0 | + values: | reserved | cpu id | offset | + + All distributor regs are (rw, 32-bit) + + The offset is relative to the "Distributor base address" as defined in the + GICv2 specs. Getting or setting such a register has the same effect as + reading or writing the register on the actual hardware from the cpu + specified with cpu id field. Note that most distributor fields are not + banked, but return the same value regardless of the cpu id used to access + the register. + Limitations: + - Priorities are not implemented, and registers are RAZ/WI + Errors: + -ENODEV: Getting or setting this register is not yet supported + -EBUSY: One or more VCPUs are running + + KVM_DEV_ARM_VGIC_GRP_CPU_REGS + Attributes: + The attr field of kvm_device_attr encodes two values: + bits: | 63 .... 40 | 39 .. 32 | 31 .... 0 | + values: | reserved | cpu id | offset | + + All CPU interface regs are (rw, 32-bit) + + The offset specifies the offset from the "CPU interface base address" as + defined in the GICv2 specs. Getting or setting such a register has the + same effect as reading or writing the register on the actual hardware. + + The Active Priorities Registers APRn are implementation defined, so we set a + fixed format for our implementation that fits with the model of a "GICv2 + implementation without the security extensions" which we present to the + guest. This interface always exposes four register APR[0-3] describing the + maximum possible 128 preemption levels. The semantics of the register + indicate if any interrupts in a given preemption level are in the active + state by setting the corresponding bit. + + Thus, preemption level X has one or more active interrupts if and only if: + + APRn[X mod 32] == 0b1, where n = X / 32 + + Bits for undefined preemption levels are RAZ/WI. + + Limitations: + - Priorities are not implemented, and registers are RAZ/WI + Errors: + -ENODEV: Getting or setting this register is not yet supported + -EBUSY: One or more VCPUs are running diff --git a/arch/arm/include/uapi/asm/kvm.h b/arch/arm/include/uapi/asm/kvm.h index 76a742769e2b..ef0c8785ba16 100644 --- a/arch/arm/include/uapi/asm/kvm.h +++ b/arch/arm/include/uapi/asm/kvm.h @@ -165,6 +165,12 @@ struct kvm_arch_memory_slot { /* Device Control API: ARM VGIC */ #define KVM_DEV_ARM_VGIC_GRP_ADDR 0 +#define KVM_DEV_ARM_VGIC_GRP_DIST_REGS 1 +#define KVM_DEV_ARM_VGIC_GRP_CPU_REGS 2 +#define KVM_DEV_ARM_VGIC_CPUID_SHIFT 32 +#define KVM_DEV_ARM_VGIC_CPUID_MASK (0xffULL << KVM_DEV_ARM_VGIC_CPUID_SHIFT) +#define KVM_DEV_ARM_VGIC_OFFSET_SHIFT 0 +#define KVM_DEV_ARM_VGIC_OFFSET_MASK (0xffffffffULL << KVM_DEV_ARM_VGIC_OFFSET_SHIFT) /* KVM_IRQ_LINE irq field index values */ #define KVM_ARM_IRQ_TYPE_SHIFT 24 diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c index e2596f618281..88599b585362 100644 --- a/virt/kvm/arm/vgic.c +++ b/virt/kvm/arm/vgic.c @@ -589,6 +589,20 @@ static bool handle_mmio_sgi_reg(struct kvm_vcpu *vcpu, return false; } +static bool handle_mmio_sgi_clear(struct kvm_vcpu *vcpu, + struct kvm_exit_mmio *mmio, + phys_addr_t offset) +{ + return false; +} + +static bool handle_mmio_sgi_set(struct kvm_vcpu *vcpu, + struct kvm_exit_mmio *mmio, + phys_addr_t offset) +{ + return false; +} + /* * I would have liked to use the kvm_bus_io_*() API instead, but it * cannot cope with banked registers (only the VM pointer is passed @@ -663,6 +677,16 @@ static const struct mmio_range vgic_dist_ranges[] = { .len = 4, .handle_mmio = handle_mmio_sgi_reg, }, + { + .base = GIC_DIST_SGI_PENDING_CLEAR, + .len = VGIC_NR_SGIS, + .handle_mmio = handle_mmio_sgi_clear, + }, + { + .base = GIC_DIST_SGI_PENDING_SET, + .len = VGIC_NR_SGIS, + .handle_mmio = handle_mmio_sgi_set, + }, {} }; @@ -1557,6 +1581,114 @@ int kvm_vgic_addr(struct kvm *kvm, unsigned long type, u64 *addr, bool write) return r; } +static bool handle_cpu_mmio_misc(struct kvm_vcpu *vcpu, + struct kvm_exit_mmio *mmio, phys_addr_t offset) +{ + return true; +} + +static const struct mmio_range vgic_cpu_ranges[] = { + { + .base = GIC_CPU_CTRL, + .len = 12, + .handle_mmio = handle_cpu_mmio_misc, + }, + { + .base = GIC_CPU_ALIAS_BINPOINT, + .len = 4, + .handle_mmio = handle_cpu_mmio_misc, + }, + { + .base = GIC_CPU_ACTIVEPRIO, + .len = 16, + .handle_mmio = handle_cpu_mmio_misc, + }, + { + .base = GIC_CPU_IDENT, + .len = 4, + .handle_mmio = handle_cpu_mmio_misc, + }, +}; + +static int vgic_attr_regs_access(struct kvm_device *dev, + struct kvm_device_attr *attr, + u32 *reg, bool is_write) +{ + const struct mmio_range *r = NULL, *ranges; + phys_addr_t offset; + int ret, cpuid, c; + struct kvm_vcpu *vcpu, *tmp_vcpu; + struct vgic_dist *vgic; + struct kvm_exit_mmio mmio; + + offset = attr->attr & KVM_DEV_ARM_VGIC_OFFSET_MASK; + cpuid = (attr->attr & KVM_DEV_ARM_VGIC_CPUID_MASK) >> + KVM_DEV_ARM_VGIC_CPUID_SHIFT; + + mutex_lock(&dev->kvm->lock); + + if (cpuid >= atomic_read(&dev->kvm->online_vcpus)) { + ret = -EINVAL; + goto out; + } + + vcpu = kvm_get_vcpu(dev->kvm, cpuid); + vgic = &dev->kvm->arch.vgic; + + mmio.len = 4; + mmio.is_write = is_write; + if (is_write) + mmio_data_write(&mmio, ~0, *reg); + switch (attr->group) { + case KVM_DEV_ARM_VGIC_GRP_DIST_REGS: + mmio.phys_addr = vgic->vgic_dist_base + offset; + ranges = vgic_dist_ranges; + break; + case KVM_DEV_ARM_VGIC_GRP_CPU_REGS: + mmio.phys_addr = vgic->vgic_cpu_base + offset; + ranges = vgic_cpu_ranges; + break; + default: + BUG(); + } + r = find_matching_range(ranges, &mmio, offset); + + if (unlikely(!r || !r->handle_mmio)) { + ret = -ENXIO; + goto out; + } + + + spin_lock(&vgic->lock); + + /* + * Ensure that no other VCPU is running by checking the vcpu->cpu + * field. If no other VPCUs are running we can safely access the VGIC + * state, because even if another VPU is run after this point, that + * VCPU will not touch the vgic state, because it will block on + * getting the vgic->lock in kvm_vgic_sync_hwstate(). + */ + kvm_for_each_vcpu(c, tmp_vcpu, dev->kvm) { + if (unlikely(tmp_vcpu->cpu != -1)) { + ret = -EBUSY; + goto out_vgic_unlock; + } + } + + offset -= r->base; + r->handle_mmio(vcpu, &mmio, offset); + + if (!is_write) + *reg = mmio_data_read(&mmio, ~0); + + ret = 0; +out_vgic_unlock: + spin_unlock(&vgic->lock); +out: + mutex_unlock(&dev->kvm->lock); + return ret; +} + static int vgic_set_attr(struct kvm_device *dev, struct kvm_device_attr *attr) { int r; @@ -1573,6 +1705,18 @@ static int vgic_set_attr(struct kvm_device *dev, struct kvm_device_attr *attr) r = kvm_vgic_addr(dev->kvm, type, &addr, true); return (r == -ENODEV) ? -ENXIO : r; } + + case KVM_DEV_ARM_VGIC_GRP_DIST_REGS: + case KVM_DEV_ARM_VGIC_GRP_CPU_REGS: { + u32 __user *uaddr = (u32 __user *)(long)attr->addr; + u32 reg; + + if (get_user(reg, uaddr)) + return -EFAULT; + + return vgic_attr_regs_access(dev, attr, ®, true); + } + } return -ENXIO; @@ -1594,14 +1738,42 @@ static int vgic_get_attr(struct kvm_device *dev, struct kvm_device_attr *attr) if (copy_to_user(uaddr, &addr, sizeof(addr))) return -EFAULT; + break; } + + case KVM_DEV_ARM_VGIC_GRP_DIST_REGS: + case KVM_DEV_ARM_VGIC_GRP_CPU_REGS: { + u32 __user *uaddr = (u32 __user *)(long)attr->addr; + u32 reg = 0; + + r = vgic_attr_regs_access(dev, attr, ®, false); + if (r) + return r; + r = put_user(reg, uaddr); + break; + } + } return r; } +static int vgic_has_attr_regs(const struct mmio_range *ranges, + phys_addr_t offset) +{ + struct kvm_exit_mmio dev_attr_mmio; + + dev_attr_mmio.len = 4; + if (find_matching_range(ranges, &dev_attr_mmio, offset)) + return 0; + else + return -ENXIO; +} + static int vgic_has_attr(struct kvm_device *dev, struct kvm_device_attr *attr) { + phys_addr_t offset; + switch (attr->group) { case KVM_DEV_ARM_VGIC_GRP_ADDR: switch (attr->attr) { @@ -1610,6 +1782,12 @@ static int vgic_has_attr(struct kvm_device *dev, struct kvm_device_attr *attr) return 0; } break; + case KVM_DEV_ARM_VGIC_GRP_DIST_REGS: + offset = attr->attr & KVM_DEV_ARM_VGIC_OFFSET_MASK; + return vgic_has_attr_regs(vgic_dist_ranges, offset); + case KVM_DEV_ARM_VGIC_GRP_CPU_REGS: + offset = attr->attr & KVM_DEV_ARM_VGIC_OFFSET_MASK; + return vgic_has_attr_regs(vgic_cpu_ranges, offset); } return -ENXIO; } From 944b8a6f9f400ded36fcdc61acd801fbd7413985 Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Fri, 15 Nov 2013 20:51:31 -0800 Subject: [PATCH 1203/1368] KVM: arm-vgic: Support unqueueing of LRs to the dist To properly access the VGIC state from user space it is very unpractical to have to loop through all the LRs in all register access functions. Instead, support moving all pending state from LRs to the distributor, but leave active state LRs alone. Note that to accurately present the active and pending state to VCPUs reading these distributor registers from a live VM, we would have to stop all other VPUs than the calling VCPU and ask each CPU to unqueue their LR state onto the distributor and add fields to track active state on the distributor side as well. We don't have any users of such functionality yet and there are other inaccuracies of the GIC emulation, so don't provide accurate synchronized access to this state just yet. However, when the time comes, having this function should help. Acked-by: Marc Zyngier Signed-off-by: Christoffer Dall (cherry picked from commit cbd333a4bfd0d93bba36d46a0e4e7979228873a6) Signed-off-by: Christoffer Dall --- virt/kvm/arm/vgic.c | 88 ++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 83 insertions(+), 5 deletions(-) diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c index 88599b585362..d08ba28e729a 100644 --- a/virt/kvm/arm/vgic.c +++ b/virt/kvm/arm/vgic.c @@ -589,6 +589,80 @@ static bool handle_mmio_sgi_reg(struct kvm_vcpu *vcpu, return false; } +#define LR_CPUID(lr) \ + (((lr) & GICH_LR_PHYSID_CPUID) >> GICH_LR_PHYSID_CPUID_SHIFT) +#define LR_IRQID(lr) \ + ((lr) & GICH_LR_VIRTUALID) + +static void vgic_retire_lr(int lr_nr, int irq, struct vgic_cpu *vgic_cpu) +{ + clear_bit(lr_nr, vgic_cpu->lr_used); + vgic_cpu->vgic_lr[lr_nr] &= ~GICH_LR_STATE; + vgic_cpu->vgic_irq_lr_map[irq] = LR_EMPTY; +} + +/** + * vgic_unqueue_irqs - move pending IRQs from LRs to the distributor + * @vgic_cpu: Pointer to the vgic_cpu struct holding the LRs + * + * Move any pending IRQs that have already been assigned to LRs back to the + * emulated distributor state so that the complete emulated state can be read + * from the main emulation structures without investigating the LRs. + * + * Note that IRQs in the active state in the LRs get their pending state moved + * to the distributor but the active state stays in the LRs, because we don't + * track the active state on the distributor side. + */ +static void vgic_unqueue_irqs(struct kvm_vcpu *vcpu) +{ + struct vgic_dist *dist = &vcpu->kvm->arch.vgic; + struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; + int vcpu_id = vcpu->vcpu_id; + int i, irq, source_cpu; + u32 *lr; + + for_each_set_bit(i, vgic_cpu->lr_used, vgic_cpu->nr_lr) { + lr = &vgic_cpu->vgic_lr[i]; + irq = LR_IRQID(*lr); + source_cpu = LR_CPUID(*lr); + + /* + * There are three options for the state bits: + * + * 01: pending + * 10: active + * 11: pending and active + * + * If the LR holds only an active interrupt (not pending) then + * just leave it alone. + */ + if ((*lr & GICH_LR_STATE) == GICH_LR_ACTIVE_BIT) + continue; + + /* + * Reestablish the pending state on the distributor and the + * CPU interface. It may have already been pending, but that + * is fine, then we are only setting a few bits that were + * already set. + */ + vgic_dist_irq_set(vcpu, irq); + if (irq < VGIC_NR_SGIS) + dist->irq_sgi_sources[vcpu_id][irq] |= 1 << source_cpu; + *lr &= ~GICH_LR_PENDING_BIT; + + /* + * If there's no state left on the LR (it could still be + * active), then the LR does not hold any useful info and can + * be marked as free for other use. + */ + if (!(*lr & GICH_LR_STATE)) + vgic_retire_lr(i, irq, vgic_cpu); + + /* Finally update the VGIC state. */ + vgic_update_state(vcpu->kvm); + } +} + static bool handle_mmio_sgi_clear(struct kvm_vcpu *vcpu, struct kvm_exit_mmio *mmio, phys_addr_t offset) @@ -848,8 +922,6 @@ static void vgic_update_state(struct kvm *kvm) } } -#define LR_CPUID(lr) \ - (((lr) & GICH_LR_PHYSID_CPUID) >> GICH_LR_PHYSID_CPUID_SHIFT) #define MK_LR_PEND(src, irq) \ (GICH_LR_PENDING_BIT | ((src) << GICH_LR_PHYSID_CPUID_SHIFT) | (irq)) @@ -871,9 +943,7 @@ static void vgic_retire_disabled_irqs(struct kvm_vcpu *vcpu) int irq = vgic_cpu->vgic_lr[lr] & GICH_LR_VIRTUALID; if (!vgic_irq_is_enabled(vcpu, irq)) { - vgic_cpu->vgic_irq_lr_map[irq] = LR_EMPTY; - clear_bit(lr, vgic_cpu->lr_used); - vgic_cpu->vgic_lr[lr] &= ~GICH_LR_STATE; + vgic_retire_lr(lr, irq, vgic_cpu); if (vgic_irq_is_active(vcpu, irq)) vgic_irq_clear_active(vcpu, irq); } @@ -1675,6 +1745,14 @@ static int vgic_attr_regs_access(struct kvm_device *dev, } } + /* + * Move all pending IRQs from the LRs on all VCPUs so the pending + * state can be properly represented in the register state accessible + * through this API. + */ + kvm_for_each_vcpu(c, tmp_vcpu, dev->kvm) + vgic_unqueue_irqs(tmp_vcpu); + offset -= r->base; r->handle_mmio(vcpu, &mmio, offset); From 8a280d12e7988c920e8805de50c0a7e129032d1b Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Fri, 25 Oct 2013 21:22:31 +0100 Subject: [PATCH 1204/1368] KVM: arm-vgic: Add GICD_SPENDSGIR and GICD_CPENDSGIR handlers Handle MMIO accesses to the two registers which should support both the case where the VMs want to read/write either of these registers and the case where user space reads/writes these registers to do save/restore of the VGIC state. Note that the added complexity compared to simple set/clear enable registers stems from the bookkeping of source cpu ids. It may be possible to change the underlying data structure to simplify the complexity, but since this is not in the critical path at all, this will do. Also note that reading this register from a live guest will not be accurate compared to on hardware, because some state may be living on the CPU LRs and the only way to give a consistent read would be to force stop all the VCPUs and request them to unqueu the LR state onto the distributor. Until we have an actual user of live reading this register, we can live with the difference. Acked-by: Marc Zyngier Signed-off-by: Christoffer Dall (cherry picked from commit 90a5355ee7639e92c0492ec592cba5c31bd80687) Signed-off-by: Christoffer Dall --- virt/kvm/arm/vgic.c | 70 ++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 66 insertions(+), 4 deletions(-) diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c index d08ba28e729a..e59aaa4c64e5 100644 --- a/virt/kvm/arm/vgic.c +++ b/virt/kvm/arm/vgic.c @@ -663,18 +663,80 @@ static void vgic_unqueue_irqs(struct kvm_vcpu *vcpu) } } -static bool handle_mmio_sgi_clear(struct kvm_vcpu *vcpu, - struct kvm_exit_mmio *mmio, - phys_addr_t offset) +/* Handle reads of GICD_CPENDSGIRn and GICD_SPENDSGIRn */ +static bool read_set_clear_sgi_pend_reg(struct kvm_vcpu *vcpu, + struct kvm_exit_mmio *mmio, + phys_addr_t offset) { + struct vgic_dist *dist = &vcpu->kvm->arch.vgic; + int sgi; + int min_sgi = (offset & ~0x3) * 4; + int max_sgi = min_sgi + 3; + int vcpu_id = vcpu->vcpu_id; + u32 reg = 0; + + /* Copy source SGIs from distributor side */ + for (sgi = min_sgi; sgi <= max_sgi; sgi++) { + int shift = 8 * (sgi - min_sgi); + reg |= (u32)dist->irq_sgi_sources[vcpu_id][sgi] << shift; + } + + mmio_data_write(mmio, ~0, reg); return false; } +static bool write_set_clear_sgi_pend_reg(struct kvm_vcpu *vcpu, + struct kvm_exit_mmio *mmio, + phys_addr_t offset, bool set) +{ + struct vgic_dist *dist = &vcpu->kvm->arch.vgic; + int sgi; + int min_sgi = (offset & ~0x3) * 4; + int max_sgi = min_sgi + 3; + int vcpu_id = vcpu->vcpu_id; + u32 reg; + bool updated = false; + + reg = mmio_data_read(mmio, ~0); + + /* Clear pending SGIs on the distributor */ + for (sgi = min_sgi; sgi <= max_sgi; sgi++) { + u8 mask = reg >> (8 * (sgi - min_sgi)); + if (set) { + if ((dist->irq_sgi_sources[vcpu_id][sgi] & mask) != mask) + updated = true; + dist->irq_sgi_sources[vcpu_id][sgi] |= mask; + } else { + if (dist->irq_sgi_sources[vcpu_id][sgi] & mask) + updated = true; + dist->irq_sgi_sources[vcpu_id][sgi] &= ~mask; + } + } + + if (updated) + vgic_update_state(vcpu->kvm); + + return updated; +} + static bool handle_mmio_sgi_set(struct kvm_vcpu *vcpu, struct kvm_exit_mmio *mmio, phys_addr_t offset) { - return false; + if (!mmio->is_write) + return read_set_clear_sgi_pend_reg(vcpu, mmio, offset); + else + return write_set_clear_sgi_pend_reg(vcpu, mmio, offset, true); +} + +static bool handle_mmio_sgi_clear(struct kvm_vcpu *vcpu, + struct kvm_exit_mmio *mmio, + phys_addr_t offset) +{ + if (!mmio->is_write) + return read_set_clear_sgi_pend_reg(vcpu, mmio, offset); + else + return write_set_clear_sgi_pend_reg(vcpu, mmio, offset, false); } /* From a0a11ba68172ffeb62b0a7be8dc4162a0fc2ad6d Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Mon, 23 Sep 2013 14:55:57 -0700 Subject: [PATCH 1205/1368] KVM: arm-vgic: Support CPU interface reg access Implement support for the CPU interface register access driven by MMIO address offsets from the CPU interface base address. Useful for user space to support save/restore of the VGIC state. This commit adds support only for the same logic as the current VGIC support, and no more. For example, the active priority registers are handled as RAZ/WI, just like setting priorities on the emulated distributor. Acked-by: Marc Zyngier Signed-off-by: Christoffer Dall (cherry picked from commit fa20f5aea56f271f83e91b9cde00f043a5a14990) Signed-off-by: Christoffer Dall --- virt/kvm/arm/vgic.c | 81 ++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 73 insertions(+), 8 deletions(-) diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c index e59aaa4c64e5..be456ce264d0 100644 --- a/virt/kvm/arm/vgic.c +++ b/virt/kvm/arm/vgic.c @@ -71,6 +71,10 @@ #define VGIC_ADDR_UNDEF (-1) #define IS_VGIC_ADDR_UNDEF(_x) ((_x) == VGIC_ADDR_UNDEF) +#define PRODUCT_ID_KVM 0x4b /* ASCII code K */ +#define IMPLEMENTER_ARM 0x43b +#define GICC_ARCH_VERSION_V2 0x2 + /* Physical address of vgic virtual cpu interface */ static phys_addr_t vgic_vcpu_base; @@ -312,7 +316,7 @@ static bool handle_mmio_misc(struct kvm_vcpu *vcpu, u32 word_offset = offset & 3; switch (offset & ~3) { - case 0: /* CTLR */ + case 0: /* GICD_CTLR */ reg = vcpu->kvm->arch.vgic.enabled; vgic_reg_access(mmio, ®, word_offset, ACCESS_READ_VALUE | ACCESS_WRITE_VALUE); @@ -323,15 +327,15 @@ static bool handle_mmio_misc(struct kvm_vcpu *vcpu, } break; - case 4: /* TYPER */ + case 4: /* GICD_TYPER */ reg = (atomic_read(&vcpu->kvm->online_vcpus) - 1) << 5; reg |= (VGIC_NR_IRQS >> 5) - 1; vgic_reg_access(mmio, ®, word_offset, ACCESS_READ_VALUE | ACCESS_WRITE_IGNORED); break; - case 8: /* IIDR */ - reg = 0x4B00043B; + case 8: /* GICD_IIDR */ + reg = (PRODUCT_ID_KVM << 24) | (IMPLEMENTER_ARM << 0); vgic_reg_access(mmio, ®, word_offset, ACCESS_READ_VALUE | ACCESS_WRITE_IGNORED); break; @@ -1716,9 +1720,70 @@ int kvm_vgic_addr(struct kvm *kvm, unsigned long type, u64 *addr, bool write) static bool handle_cpu_mmio_misc(struct kvm_vcpu *vcpu, struct kvm_exit_mmio *mmio, phys_addr_t offset) { - return true; + struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; + u32 reg, mask = 0, shift = 0; + bool updated = false; + + switch (offset & ~0x3) { + case GIC_CPU_CTRL: + mask = GICH_VMCR_CTRL_MASK; + shift = GICH_VMCR_CTRL_SHIFT; + break; + case GIC_CPU_PRIMASK: + mask = GICH_VMCR_PRIMASK_MASK; + shift = GICH_VMCR_PRIMASK_SHIFT; + break; + case GIC_CPU_BINPOINT: + mask = GICH_VMCR_BINPOINT_MASK; + shift = GICH_VMCR_BINPOINT_SHIFT; + break; + case GIC_CPU_ALIAS_BINPOINT: + mask = GICH_VMCR_ALIAS_BINPOINT_MASK; + shift = GICH_VMCR_ALIAS_BINPOINT_SHIFT; + break; + } + + if (!mmio->is_write) { + reg = (vgic_cpu->vgic_vmcr & mask) >> shift; + mmio_data_write(mmio, ~0, reg); + } else { + reg = mmio_data_read(mmio, ~0); + reg = (reg << shift) & mask; + if (reg != (vgic_cpu->vgic_vmcr & mask)) + updated = true; + vgic_cpu->vgic_vmcr &= ~mask; + vgic_cpu->vgic_vmcr |= reg; + } + return updated; } +static bool handle_mmio_abpr(struct kvm_vcpu *vcpu, + struct kvm_exit_mmio *mmio, phys_addr_t offset) +{ + return handle_cpu_mmio_misc(vcpu, mmio, GIC_CPU_ALIAS_BINPOINT); +} + +static bool handle_cpu_mmio_ident(struct kvm_vcpu *vcpu, + struct kvm_exit_mmio *mmio, + phys_addr_t offset) +{ + u32 reg; + + if (mmio->is_write) + return false; + + /* GICC_IIDR */ + reg = (PRODUCT_ID_KVM << 20) | + (GICC_ARCH_VERSION_V2 << 16) | + (IMPLEMENTER_ARM << 0); + mmio_data_write(mmio, ~0, reg); + return false; +} + +/* + * CPU Interface Register accesses - these are not accessed by the VM, but by + * user space for saving and restoring VGIC state. + */ static const struct mmio_range vgic_cpu_ranges[] = { { .base = GIC_CPU_CTRL, @@ -1728,17 +1793,17 @@ static const struct mmio_range vgic_cpu_ranges[] = { { .base = GIC_CPU_ALIAS_BINPOINT, .len = 4, - .handle_mmio = handle_cpu_mmio_misc, + .handle_mmio = handle_mmio_abpr, }, { .base = GIC_CPU_ACTIVEPRIO, .len = 16, - .handle_mmio = handle_cpu_mmio_misc, + .handle_mmio = handle_mmio_raz_wi, }, { .base = GIC_CPU_IDENT, .len = 4, - .handle_mmio = handle_cpu_mmio_misc, + .handle_mmio = handle_cpu_mmio_ident, }, }; From c23fe6933f79809bae75b5e8c16c36d1ffc79e41 Mon Sep 17 00:00:00 2001 From: Anup Patel Date: Thu, 12 Dec 2013 16:12:22 +0000 Subject: [PATCH 1206/1368] arm64: KVM: Add Kconfig option for max VCPUs per-Guest Current max VCPUs per-Guest is set to 4 which is preventing us from creating a Guest (or VM) with 8 VCPUs on Host (e.g. X-Gene Storm SOC) with 8 Host CPUs. The correct value of max VCPUs per-Guest should be same as the max CPUs supported by GICv2 which is 8 but, increasing value of max VCPUs per-Guest can make things slower hence we add Kconfig option to let KVM users select appropriate max VCPUs per-Guest. Signed-off-by: Anup Patel Signed-off-by: Pranavkumar Sawargaonkar Signed-off-by: Marc Zyngier (cherry picked from commit da7814700a0c408bead58ce4714b7625ffbaade1) Signed-off-by: Christoffer Dall --- arch/arm64/include/asm/kvm_host.h | 7 ++++++- arch/arm64/kvm/Kconfig | 11 +++++++++++ 2 files changed, 17 insertions(+), 1 deletion(-) diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 5d85a02d1231..0a1d69751562 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -26,7 +26,12 @@ #include #include -#define KVM_MAX_VCPUS 4 +#if defined(CONFIG_KVM_ARM_MAX_VCPUS) +#define KVM_MAX_VCPUS CONFIG_KVM_ARM_MAX_VCPUS +#else +#define KVM_MAX_VCPUS 0 +#endif + #define KVM_USER_MEM_SLOTS 32 #define KVM_PRIVATE_MEM_SLOTS 4 #define KVM_COALESCED_MMIO_PAGE_OFFSET 1 diff --git a/arch/arm64/kvm/Kconfig b/arch/arm64/kvm/Kconfig index 4480ab339a00..8ba85e9ea388 100644 --- a/arch/arm64/kvm/Kconfig +++ b/arch/arm64/kvm/Kconfig @@ -36,6 +36,17 @@ config KVM_ARM_HOST ---help--- Provides host support for ARM processors. +config KVM_ARM_MAX_VCPUS + int "Number maximum supported virtual CPUs per VM" + depends on KVM_ARM_HOST + default 4 + help + Static number of max supported virtual CPUs per VM. + + If you choose a high number, the vcpu structures will be quite + large, so only choose a reasonable number that you expect to + actually use. + config KVM_ARM_VGIC bool depends on KVM_ARM_HOST && OF From af4604a0f9c909b60d043cebb20110804639460e Mon Sep 17 00:00:00 2001 From: Anup Patel Date: Thu, 14 Nov 2013 15:20:08 +0000 Subject: [PATCH 1207/1368] arm64: KVM: Support X-Gene guest VCPU on APM X-Gene host This patch allows us to have X-Gene guest VCPU when using KVM arm64 on APM X-Gene host. We add KVM_ARM_TARGET_XGENE_POTENZA for X-Gene Potenza compatible guest VCPU and we return KVM_ARM_TARGET_XGENE_POTENZA in kvm_target_cpu() when running on X-Gene host with Potenza core. [maz: sanitized the commit log] Signed-off-by: Anup Patel Signed-off-by: Pranavkumar Sawargaonkar Signed-off-by: Marc Zyngier (cherry picked from commit e28100bd8ed9e37b7cd4578140a1e7f95bd40835) Signed-off-by: Christoffer Dall --- arch/arm64/include/uapi/asm/kvm.h | 3 ++- arch/arm64/kvm/guest.c | 32 +++++++++++++++++----------- arch/arm64/kvm/sys_regs_generic_v8.c | 3 +++ 3 files changed, 24 insertions(+), 14 deletions(-) diff --git a/arch/arm64/include/uapi/asm/kvm.h b/arch/arm64/include/uapi/asm/kvm.h index 7c25ca8b02b3..495ab6f84a61 100644 --- a/arch/arm64/include/uapi/asm/kvm.h +++ b/arch/arm64/include/uapi/asm/kvm.h @@ -55,8 +55,9 @@ struct kvm_regs { #define KVM_ARM_TARGET_AEM_V8 0 #define KVM_ARM_TARGET_FOUNDATION_V8 1 #define KVM_ARM_TARGET_CORTEX_A57 2 +#define KVM_ARM_TARGET_XGENE_POTENZA 3 -#define KVM_ARM_NUM_TARGETS 3 +#define KVM_ARM_NUM_TARGETS 4 /* KVM_ARM_SET_DEVICE_ADDR ioctl id encoding */ #define KVM_ARM_DEVICE_TYPE_SHIFT 0 diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c index 3f0731e53274..08745578d54d 100644 --- a/arch/arm64/kvm/guest.c +++ b/arch/arm64/kvm/guest.c @@ -207,20 +207,26 @@ int __attribute_const__ kvm_target_cpu(void) unsigned long implementor = read_cpuid_implementor(); unsigned long part_number = read_cpuid_part_number(); - if (implementor != ARM_CPU_IMP_ARM) - return -EINVAL; + switch (implementor) { + case ARM_CPU_IMP_ARM: + switch (part_number) { + case ARM_CPU_PART_AEM_V8: + return KVM_ARM_TARGET_AEM_V8; + case ARM_CPU_PART_FOUNDATION: + return KVM_ARM_TARGET_FOUNDATION_V8; + case ARM_CPU_PART_CORTEX_A57: + return KVM_ARM_TARGET_CORTEX_A57; + }; + break; + case ARM_CPU_IMP_APM: + switch (part_number) { + case APM_CPU_PART_POTENZA: + return KVM_ARM_TARGET_XGENE_POTENZA; + }; + break; + }; - switch (part_number) { - case ARM_CPU_PART_AEM_V8: - return KVM_ARM_TARGET_AEM_V8; - case ARM_CPU_PART_FOUNDATION: - return KVM_ARM_TARGET_FOUNDATION_V8; - case ARM_CPU_PART_CORTEX_A57: - /* Currently handled by the generic backend */ - return KVM_ARM_TARGET_CORTEX_A57; - default: - return -EINVAL; - } + return -EINVAL; } int kvm_vcpu_set_target(struct kvm_vcpu *vcpu, diff --git a/arch/arm64/kvm/sys_regs_generic_v8.c b/arch/arm64/kvm/sys_regs_generic_v8.c index 4268ab9356b1..8fe6f76b0edc 100644 --- a/arch/arm64/kvm/sys_regs_generic_v8.c +++ b/arch/arm64/kvm/sys_regs_generic_v8.c @@ -90,6 +90,9 @@ static int __init sys_reg_genericv8_init(void) &genericv8_target_table); kvm_register_target_sys_reg_table(KVM_ARM_TARGET_CORTEX_A57, &genericv8_target_table); + kvm_register_target_sys_reg_table(KVM_ARM_TARGET_XGENE_POTENZA, + &genericv8_target_table); + return 0; } late_initcall(sys_reg_genericv8_init); From dfbd506266a97ad252336ea96e41791fdd0c0828 Mon Sep 17 00:00:00 2001 From: Anup Patel Date: Thu, 12 Dec 2013 16:12:23 +0000 Subject: [PATCH 1208/1368] arm64: KVM: Force undefined exception for Guest SMC intructions The SMC-based PSCI emulation for Guest is going to be very different from the in-kernel HVC-based PSCI emulation hence for now just inject undefined exception when Guest executes SMC instruction. Signed-off-by: Anup Patel Signed-off-by: Pranavkumar Sawargaonkar Signed-off-by: marc Zyngier (cherry picked from commit e5cf9dcdbfd26cd4e1991db08755da900454efeb) Signed-off-by: Christoffer Dall --- arch/arm64/kvm/handle_exit.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c index 42a0f1bddfe7..7bc41eab4c64 100644 --- a/arch/arm64/kvm/handle_exit.c +++ b/arch/arm64/kvm/handle_exit.c @@ -39,9 +39,6 @@ static int handle_hvc(struct kvm_vcpu *vcpu, struct kvm_run *run) static int handle_smc(struct kvm_vcpu *vcpu, struct kvm_run *run) { - if (kvm_psci_call(vcpu)) - return 1; - kvm_inject_undefined(vcpu); return 1; } From 2aad15258f0795432073b14b616a634e452432f0 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Sun, 29 Dec 2013 12:12:29 -0800 Subject: [PATCH 1209/1368] kvm: make local functions static Running 'make namespacecheck' found lots of functions that should be declared static, since only used in one file. Signed-off-by: Stephen Hemminger Signed-off-by: Marcelo Tosatti (cherry picked from commit 7940876e1330671708186ac3386aa521ffb5c182) Signed-off-by: Christoffer Dall --- include/linux/kvm_host.h | 16 ---------------- virt/kvm/ioapic.c | 2 +- virt/kvm/ioapic.h | 1 - virt/kvm/kvm_main.c | 35 ++++++++++++++++++----------------- 4 files changed, 19 insertions(+), 35 deletions(-) diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 1dfc17255ee0..95625d5cb3f0 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -449,8 +449,6 @@ void kvm_exit(void); void kvm_get_kvm(struct kvm *kvm); void kvm_put_kvm(struct kvm *kvm); -void update_memslots(struct kvm_memslots *slots, struct kvm_memory_slot *new, - u64 last_generation); static inline struct kvm_memslots *kvm_memslots(struct kvm *kvm) { @@ -523,7 +521,6 @@ unsigned long gfn_to_hva_prot(struct kvm *kvm, gfn_t gfn, bool *writable); unsigned long gfn_to_hva_memslot(struct kvm_memory_slot *slot, gfn_t gfn); void kvm_release_page_clean(struct page *page); void kvm_release_page_dirty(struct page *page); -void kvm_set_page_dirty(struct page *page); void kvm_set_page_accessed(struct page *page); pfn_t gfn_to_pfn_atomic(struct kvm *kvm, gfn_t gfn); @@ -535,7 +532,6 @@ pfn_t gfn_to_pfn_prot(struct kvm *kvm, gfn_t gfn, bool write_fault, pfn_t gfn_to_pfn_memslot(struct kvm_memory_slot *slot, gfn_t gfn); pfn_t gfn_to_pfn_memslot_atomic(struct kvm_memory_slot *slot, gfn_t gfn); -void kvm_release_pfn_dirty(pfn_t pfn); void kvm_release_pfn_clean(pfn_t pfn); void kvm_set_pfn_dirty(pfn_t pfn); void kvm_set_pfn_accessed(pfn_t pfn); @@ -562,8 +558,6 @@ struct kvm_memory_slot *gfn_to_memslot(struct kvm *kvm, gfn_t gfn); int kvm_is_visible_gfn(struct kvm *kvm, gfn_t gfn); unsigned long kvm_host_page_size(struct kvm *kvm, gfn_t gfn); void mark_page_dirty(struct kvm *kvm, gfn_t gfn); -void mark_page_dirty_in_slot(struct kvm *kvm, struct kvm_memory_slot *memslot, - gfn_t gfn); void kvm_vcpu_block(struct kvm_vcpu *vcpu); void kvm_vcpu_kick(struct kvm_vcpu *vcpu); @@ -590,8 +584,6 @@ int kvm_get_dirty_log(struct kvm *kvm, int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log); -int kvm_vm_ioctl_set_memory_region(struct kvm *kvm, - struct kvm_userspace_memory_region *mem); int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_level, bool line_status); long kvm_arch_vm_ioctl(struct file *filp, @@ -639,8 +631,6 @@ void kvm_arch_check_processor_compat(void *rtn); int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu); int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu); -void kvm_free_physmem(struct kvm *kvm); - void *kvm_kvzalloc(unsigned long size); void kvm_kvfree(const void *addr); @@ -1067,12 +1057,6 @@ static inline void kvm_vcpu_set_in_spin_loop(struct kvm_vcpu *vcpu, bool val) static inline void kvm_vcpu_set_dy_eligible(struct kvm_vcpu *vcpu, bool val) { } - -static inline bool kvm_vcpu_eligible_for_directed_yield(struct kvm_vcpu *vcpu) -{ - return true; -} - #endif /* CONFIG_HAVE_KVM_CPU_RELAX_INTERCEPT */ #endif diff --git a/virt/kvm/ioapic.c b/virt/kvm/ioapic.c index 2d682977ce82..ce9ed99ad7dc 100644 --- a/virt/kvm/ioapic.c +++ b/virt/kvm/ioapic.c @@ -520,7 +520,7 @@ static int ioapic_mmio_write(struct kvm_io_device *this, gpa_t addr, int len, return 0; } -void kvm_ioapic_reset(struct kvm_ioapic *ioapic) +static void kvm_ioapic_reset(struct kvm_ioapic *ioapic) { int i; diff --git a/virt/kvm/ioapic.h b/virt/kvm/ioapic.h index 615d8c995c3c..90d43e95dcf8 100644 --- a/virt/kvm/ioapic.h +++ b/virt/kvm/ioapic.h @@ -91,7 +91,6 @@ void kvm_ioapic_destroy(struct kvm *kvm); int kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int irq_source_id, int level, bool line_status); void kvm_ioapic_clear_all(struct kvm_ioapic *ioapic, int irq_source_id); -void kvm_ioapic_reset(struct kvm_ioapic *ioapic); int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src, struct kvm_lapic_irq *irq, unsigned long *dest_map); int kvm_get_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state); diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index e9a43b6455be..2162bd5d17d7 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -95,6 +95,12 @@ static int hardware_enable_all(void); static void hardware_disable_all(void); static void kvm_io_bus_destroy(struct kvm_io_bus *bus); +static void update_memslots(struct kvm_memslots *slots, + struct kvm_memory_slot *new, u64 last_generation); + +static void kvm_release_pfn_dirty(pfn_t pfn); +static void mark_page_dirty_in_slot(struct kvm *kvm, + struct kvm_memory_slot *memslot, gfn_t gfn); bool kvm_rebooting; EXPORT_SYMBOL_GPL(kvm_rebooting); @@ -552,7 +558,7 @@ static void kvm_free_physmem_slot(struct kvm *kvm, struct kvm_memory_slot *free, free->npages = 0; } -void kvm_free_physmem(struct kvm *kvm) +static void kvm_free_physmem(struct kvm *kvm) { struct kvm_memslots *slots = kvm->memslots; struct kvm_memory_slot *memslot; @@ -674,8 +680,9 @@ static void sort_memslots(struct kvm_memslots *slots) slots->id_to_index[slots->memslots[i].id] = i; } -void update_memslots(struct kvm_memslots *slots, struct kvm_memory_slot *new, - u64 last_generation) +static void update_memslots(struct kvm_memslots *slots, + struct kvm_memory_slot *new, + u64 last_generation) { if (new) { int id = new->id; @@ -923,8 +930,8 @@ int kvm_set_memory_region(struct kvm *kvm, } EXPORT_SYMBOL_GPL(kvm_set_memory_region); -int kvm_vm_ioctl_set_memory_region(struct kvm *kvm, - struct kvm_userspace_memory_region *mem) +static int kvm_vm_ioctl_set_memory_region(struct kvm *kvm, + struct kvm_userspace_memory_region *mem) { if (mem->slot >= KVM_USER_MEM_SLOTS) return -EINVAL; @@ -1045,7 +1052,7 @@ static unsigned long gfn_to_hva_many(struct kvm_memory_slot *slot, gfn_t gfn, } unsigned long gfn_to_hva_memslot(struct kvm_memory_slot *slot, - gfn_t gfn) + gfn_t gfn) { return gfn_to_hva_many(slot, gfn, NULL); } @@ -1385,18 +1392,11 @@ void kvm_release_page_dirty(struct page *page) } EXPORT_SYMBOL_GPL(kvm_release_page_dirty); -void kvm_release_pfn_dirty(pfn_t pfn) +static void kvm_release_pfn_dirty(pfn_t pfn) { kvm_set_pfn_dirty(pfn); kvm_release_pfn_clean(pfn); } -EXPORT_SYMBOL_GPL(kvm_release_pfn_dirty); - -void kvm_set_page_dirty(struct page *page) -{ - kvm_set_pfn_dirty(page_to_pfn(page)); -} -EXPORT_SYMBOL_GPL(kvm_set_page_dirty); void kvm_set_pfn_dirty(pfn_t pfn) { @@ -1638,8 +1638,9 @@ int kvm_clear_guest(struct kvm *kvm, gpa_t gpa, unsigned long len) } EXPORT_SYMBOL_GPL(kvm_clear_guest); -void mark_page_dirty_in_slot(struct kvm *kvm, struct kvm_memory_slot *memslot, - gfn_t gfn) +static void mark_page_dirty_in_slot(struct kvm *kvm, + struct kvm_memory_slot *memslot, + gfn_t gfn) { if (memslot && memslot->dirty_bitmap) { unsigned long rel_gfn = gfn - memslot->base_gfn; @@ -1753,7 +1754,7 @@ EXPORT_SYMBOL_GPL(kvm_vcpu_yield_to); * locking does not harm. It may result in trying to yield to same VCPU, fail * and continue with next VCPU and so on. */ -bool kvm_vcpu_eligible_for_directed_yield(struct kvm_vcpu *vcpu) +static bool kvm_vcpu_eligible_for_directed_yield(struct kvm_vcpu *vcpu) { bool eligible; From c9dad332f97620c9221a724e20d2bc4db51bb7a0 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Fri, 13 Dec 2013 16:56:06 +0000 Subject: [PATCH 1210/1368] arm/arm64: KVM: relax the requirements of VMA alignment for THP The THP code in KVM/ARM is a bit restrictive in not allowing a THP to be used if the VMA is not 2MB aligned. Actually, it is not so much the VMA that matters, but the associated memslot: A process can perfectly mmap a region with no particular alignment restriction, and then pass a 2MB aligned address to KVM. In this case, KVM will only use this 2MB aligned region, and will ignore the range between vma->vm_start and memslot->userspace_addr. It can also choose to place this memslot at whatever alignment it wants in the IPA space. In the end, what matters is the relative alignment of the user space and IPA mappings with respect to a 2M page. They absolutely must be the same if you want to use THP. Cc: Christoffer Dall Signed-off-by: Marc Zyngier Signed-off-by: Christoffer Dall (cherry picked from commit 136d737fd20102f1be9b02356590fd55e3a40d0e) Signed-off-by: Christoffer Dall --- arch/arm/kvm/mmu.c | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c index 659db0ed1370..7789857d1470 100644 --- a/arch/arm/kvm/mmu.c +++ b/arch/arm/kvm/mmu.c @@ -667,14 +667,16 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, gfn = (fault_ipa & PMD_MASK) >> PAGE_SHIFT; } else { /* - * Pages belonging to VMAs not aligned to the PMD mapping - * granularity cannot be mapped using block descriptors even - * if the pages belong to a THP for the process, because the - * stage-2 block descriptor will cover more than a single THP - * and we loose atomicity for unmapping, updates, and splits - * of the THP or other pages in the stage-2 block range. + * Pages belonging to memslots that don't have the same + * alignment for userspace and IPA cannot be mapped using + * block descriptors even if the pages belong to a THP for + * the process, because the stage-2 block descriptor will + * cover more than a single THP and we loose atomicity for + * unmapping, updates, and splits of the THP or other pages + * in the stage-2 block range. */ - if (vma->vm_start & ~PMD_MASK) + if ((memslot->userspace_addr & ~PMD_MASK) != + ((memslot->base_gfn << PAGE_SHIFT) & ~PMD_MASK)) force_pte = true; } up_read(¤t->mm->mmap_sem); From 3b254a9515ba76de3746f4c28041845ba27b3745 Mon Sep 17 00:00:00 2001 From: Sachin Kamat Date: Tue, 7 Jan 2014 13:45:15 +0530 Subject: [PATCH 1211/1368] KVM: ARM: Remove duplicate include trace.h was included twice. Remove duplicate inclusion. Signed-off-by: Sachin Kamat Signed-off-by: Christoffer Dall (cherry picked from commit 61466710de078c697106fa5b70ec7afc9feab520) Signed-off-by: Christoffer Dall --- arch/arm/kvm/handle_exit.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/arch/arm/kvm/handle_exit.c b/arch/arm/kvm/handle_exit.c index a92079011a83..0de91fc6de0f 100644 --- a/arch/arm/kvm/handle_exit.c +++ b/arch/arm/kvm/handle_exit.c @@ -26,8 +26,6 @@ #include "trace.h" -#include "trace.h" - typedef int (*exit_handle_fn)(struct kvm_vcpu *, struct kvm_run *); static int handle_svc_hyp(struct kvm_vcpu *vcpu, struct kvm_run *run) From 328c366d43fda9d85c758c455b5f522747fa3d7e Mon Sep 17 00:00:00 2001 From: Scott Wood Date: Thu, 9 Jan 2014 18:43:16 -0600 Subject: [PATCH 1212/1368] kvm: Provide kvm_vcpu_eligible_for_directed_yield() stub Commit 7940876e1330671708186ac3386aa521ffb5c182 ("kvm: make local functions static") broke KVM PPC builds due to removing (rather than moving) the stub version of kvm_vcpu_eligible_for_directed_yield(). This patch reintroduces it. Signed-off-by: Scott Wood Cc: Stephen Hemminger Cc: Alexander Graf [Move the #ifdef inside the function. - Paolo] Signed-off-by: Paolo Bonzini (cherry picked from commit 4a55dd7273c95b4a19fbcf0ae1bbd1cfd09dfc36) Signed-off-by: Christoffer Dall --- virt/kvm/kvm_main.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 2162bd5d17d7..2d57b23683a4 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -1731,7 +1731,6 @@ bool kvm_vcpu_yield_to(struct kvm_vcpu *target) } EXPORT_SYMBOL_GPL(kvm_vcpu_yield_to); -#ifdef CONFIG_HAVE_KVM_CPU_RELAX_INTERCEPT /* * Helper that checks whether a VCPU is eligible for directed yield. * Most eligible candidate to yield is decided by following heuristics: @@ -1756,6 +1755,7 @@ EXPORT_SYMBOL_GPL(kvm_vcpu_yield_to); */ static bool kvm_vcpu_eligible_for_directed_yield(struct kvm_vcpu *vcpu) { +#ifdef CONFIG_HAVE_KVM_CPU_RELAX_INTERCEPT bool eligible; eligible = !vcpu->spin_loop.in_spin_loop || @@ -1766,8 +1766,10 @@ static bool kvm_vcpu_eligible_for_directed_yield(struct kvm_vcpu *vcpu) kvm_vcpu_set_dy_eligible(vcpu, !vcpu->spin_loop.dy_eligible); return eligible; -} +#else + return true; #endif +} void kvm_vcpu_on_spin(struct kvm_vcpu *me) { From 031cbab9d6d2a6beca9e19472a40f5e22ecfbe07 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 29 Jan 2014 16:16:39 +0300 Subject: [PATCH 1213/1368] KVM: return an error code in kvm_vm_ioctl_register_coalesced_mmio() If kvm_io_bus_register_dev() fails then it returns success but it should return an error code. I also did a little cleanup like removing an impossible NULL test. Cc: stable@vger.kernel.org Fixes: 2b3c246a682c ('KVM: Make coalesced mmio use a device per zone') Signed-off-by: Dan Carpenter Signed-off-by: Paolo Bonzini (cherry picked from commit aac5c4226e7136c331ed384c25d5560204da10a0) Signed-off-by: Christoffer Dall --- virt/kvm/coalesced_mmio.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/virt/kvm/coalesced_mmio.c b/virt/kvm/coalesced_mmio.c index 88b2fe3ddf42..00d86427af0f 100644 --- a/virt/kvm/coalesced_mmio.c +++ b/virt/kvm/coalesced_mmio.c @@ -154,17 +154,13 @@ int kvm_vm_ioctl_register_coalesced_mmio(struct kvm *kvm, list_add_tail(&dev->list, &kvm->coalesced_zones); mutex_unlock(&kvm->slots_lock); - return ret; + return 0; out_free_dev: mutex_unlock(&kvm->slots_lock); - kfree(dev); - if (dev == NULL) - return -ENXIO; - - return 0; + return ret; } int kvm_vm_ioctl_unregister_coalesced_mmio(struct kvm *kvm, From e9aa8f627c7c504c3880acdfa57849bd3d75dc83 Mon Sep 17 00:00:00 2001 From: Dominik Dingel Date: Thu, 6 Jun 2013 15:32:37 +0200 Subject: [PATCH 1214/1368] KVM: async_pf: Provide additional direct page notification By setting a Kconfig option, the architecture can control when guest notifications will be presented by the apf backend. There is the default batch mechanism, working as before, where the vcpu thread should pull in this information. Opposite to this, there is now the direct mechanism, that will push the information to the guest. This way s390 can use an already existing architecture interface. Still the vcpu thread should call check_completion to cleanup leftovers. Signed-off-by: Dominik Dingel Signed-off-by: Christian Borntraeger (cherry picked from commit e0ead41a6dac09f86675ce07a66e4b253a9b7bd5) Signed-off-by: Christoffer Dall --- arch/x86/kvm/mmu.c | 2 +- include/linux/kvm_host.h | 2 +- virt/kvm/Kconfig | 4 ++++ virt/kvm/async_pf.c | 20 ++++++++++++++++++-- 4 files changed, 24 insertions(+), 4 deletions(-) diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 3c1877bbfe6a..f47d2e11108e 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -3232,7 +3232,7 @@ static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu, gva_t gva, gfn_t gfn) arch.direct_map = vcpu->arch.mmu.direct_map; arch.cr3 = vcpu->arch.mmu.get_cr3(vcpu); - return kvm_setup_async_pf(vcpu, gva, gfn, &arch); + return kvm_setup_async_pf(vcpu, gva, gfn_to_hva(vcpu->kvm, gfn), &arch); } static bool can_do_async_pf(struct kvm_vcpu *vcpu) diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 95625d5cb3f0..f6801d10e04c 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -180,7 +180,7 @@ struct kvm_async_pf { void kvm_clear_async_pf_completion_queue(struct kvm_vcpu *vcpu); void kvm_check_async_pf_completion(struct kvm_vcpu *vcpu); -int kvm_setup_async_pf(struct kvm_vcpu *vcpu, gva_t gva, gfn_t gfn, +int kvm_setup_async_pf(struct kvm_vcpu *vcpu, gva_t gva, unsigned long hva, struct kvm_arch_async_pf *arch); int kvm_async_pf_wakeup_all(struct kvm_vcpu *vcpu); #endif diff --git a/virt/kvm/Kconfig b/virt/kvm/Kconfig index fbe1a48bd629..13f2d19793e3 100644 --- a/virt/kvm/Kconfig +++ b/virt/kvm/Kconfig @@ -22,6 +22,10 @@ config KVM_MMIO config KVM_ASYNC_PF bool +# Toggle to switch between direct notification and batch job +config KVM_ASYNC_PF_SYNC + bool + config HAVE_KVM_MSI bool diff --git a/virt/kvm/async_pf.c b/virt/kvm/async_pf.c index 8631d9c14320..00980ab02c45 100644 --- a/virt/kvm/async_pf.c +++ b/virt/kvm/async_pf.c @@ -28,6 +28,21 @@ #include "async_pf.h" #include +static inline void kvm_async_page_present_sync(struct kvm_vcpu *vcpu, + struct kvm_async_pf *work) +{ +#ifdef CONFIG_KVM_ASYNC_PF_SYNC + kvm_arch_async_page_present(vcpu, work); +#endif +} +static inline void kvm_async_page_present_async(struct kvm_vcpu *vcpu, + struct kvm_async_pf *work) +{ +#ifndef CONFIG_KVM_ASYNC_PF_SYNC + kvm_arch_async_page_present(vcpu, work); +#endif +} + static struct kmem_cache *async_pf_cache; int kvm_async_pf_init(void) @@ -69,6 +84,7 @@ static void async_pf_execute(struct work_struct *work) down_read(&mm->mmap_sem); get_user_pages(current, mm, addr, 1, 1, 0, NULL, NULL); up_read(&mm->mmap_sem); + kvm_async_page_present_sync(vcpu, apf); unuse_mm(mm); spin_lock(&vcpu->async_pf.lock); @@ -138,7 +154,7 @@ void kvm_check_async_pf_completion(struct kvm_vcpu *vcpu) } } -int kvm_setup_async_pf(struct kvm_vcpu *vcpu, gva_t gva, gfn_t gfn, +int kvm_setup_async_pf(struct kvm_vcpu *vcpu, gva_t gva, unsigned long hva, struct kvm_arch_async_pf *arch) { struct kvm_async_pf *work; @@ -159,7 +175,7 @@ int kvm_setup_async_pf(struct kvm_vcpu *vcpu, gva_t gva, gfn_t gfn, work->wakeup_all = false; work->vcpu = vcpu; work->gva = gva; - work->addr = gfn_to_hva(vcpu->kvm, gfn); + work->addr = hva; work->arch = *arch; work->mm = current->mm; atomic_inc(&work->mm->mm_count); From 65d762a03f29316956725f348546794de0160fcc Mon Sep 17 00:00:00 2001 From: Dominik Dingel Date: Tue, 3 Sep 2013 12:31:16 +0200 Subject: [PATCH 1215/1368] KVM: async_pf: Allow to wait for outstanding work On s390 we are not able to cancel work. Instead we will flush the work and wait for completion. Signed-off-by: Dominik Dingel Signed-off-by: Christian Borntraeger (cherry picked from commit 9f2ceda49c6b8827c795731c204f6c2587886e2c) Signed-off-by: Christoffer Dall --- virt/kvm/async_pf.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/virt/kvm/async_pf.c b/virt/kvm/async_pf.c index 00980ab02c45..889aad022014 100644 --- a/virt/kvm/async_pf.c +++ b/virt/kvm/async_pf.c @@ -113,11 +113,16 @@ void kvm_clear_async_pf_completion_queue(struct kvm_vcpu *vcpu) list_entry(vcpu->async_pf.queue.next, typeof(*work), queue); list_del(&work->queue); + +#ifdef CONFIG_KVM_ASYNC_PF_SYNC + flush_work(&work->work); +#else if (cancel_work_sync(&work->work)) { mmdrop(work->mm); kvm_put_kvm(vcpu->kvm); /* == work->vcpu->kvm */ kmem_cache_free(async_pf_cache, work); } +#endif } spin_lock(&vcpu->async_pf.lock); From c76f5b54ec88ab03043cdd2ab2c148daa0bccd7c Mon Sep 17 00:00:00 2001 From: Dominik Dingel Date: Fri, 31 Jan 2014 14:32:46 +0100 Subject: [PATCH 1216/1368] KVM: async_pf: Add missing call for async page present Commit KVM: async_pf: Provide additional direct page notification missed the call from kvm_check_async_pf_completion to the new introduced function. Reported-by: Paolo Bonzini Signed-off-by: Dominik Dingel Signed-off-by: Paolo Bonzini (cherry picked from commit 1179ba539541347d5427cde8bcfdaa5ead14f3aa) Signed-off-by: Christoffer Dall --- virt/kvm/async_pf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/virt/kvm/async_pf.c b/virt/kvm/async_pf.c index 889aad022014..10df100c4514 100644 --- a/virt/kvm/async_pf.c +++ b/virt/kvm/async_pf.c @@ -151,7 +151,7 @@ void kvm_check_async_pf_completion(struct kvm_vcpu *vcpu) spin_unlock(&vcpu->async_pf.lock); kvm_arch_async_page_ready(vcpu, work); - kvm_arch_async_page_present(vcpu, work); + kvm_async_page_present_async(vcpu, work); list_del(&work->queue); vcpu->async_pf.queued--; From 8cf654c3d7b883a4ffc64e6301b4af5a9f3b6b93 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Wed, 5 Feb 2014 10:24:12 +0000 Subject: [PATCH 1217/1368] arm64: fix typo: s/SERRROR/SERROR/ Somehow SERROR has acquired an additional 'R' in a couple of headers. This patch removes them before they spread further. As neither instance is in use yet, no other sites need to be fixed up. Signed-off-by: Mark Rutland Acked-by: Marc Zyngier Signed-off-by: Catalin Marinas (cherry picked from commit bfb67a5606376bb32cb6f93dc05cda2e8c2038a5) Signed-off-by: Christoffer Dall --- arch/arm64/include/asm/kvm_arm.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h index c98ef4771c73..0eb398655378 100644 --- a/arch/arm64/include/asm/kvm_arm.h +++ b/arch/arm64/include/asm/kvm_arm.h @@ -231,7 +231,7 @@ #define ESR_EL2_EC_SP_ALIGN (0x26) #define ESR_EL2_EC_FP_EXC32 (0x28) #define ESR_EL2_EC_FP_EXC64 (0x2C) -#define ESR_EL2_EC_SERRROR (0x2F) +#define ESR_EL2_EC_SERROR (0x2F) #define ESR_EL2_EC_BREAKPT (0x30) #define ESR_EL2_EC_BREAKPT_HYP (0x31) #define ESR_EL2_EC_SOFTSTP (0x32) From 4635cac7cb81e457a59eb1cbea32d828f6c25a05 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Sat, 8 Feb 2014 08:51:57 +0100 Subject: [PATCH 1218/1368] asmlinkage, kvm: Make kvm_rebooting visible kvm_rebooting is referenced from assembler code, thus needs to be visible. Cc: Gleb Natapov Cc: Paolo Bonzini Signed-off-by: Andi Kleen Link: http://lkml.kernel.org/r/1391845930-28580-1-git-send-email-ak@linux.intel.com Signed-off-by: H. Peter Anvin (cherry picked from commit 52480137d82062bb8d0fb778cb9934667958e367) Signed-off-by: Christoffer Dall --- virt/kvm/kvm_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 2d57b23683a4..88660cbe70a4 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -102,7 +102,7 @@ static void kvm_release_pfn_dirty(pfn_t pfn); static void mark_page_dirty_in_slot(struct kvm *kvm, struct kvm_memory_slot *memslot, gfn_t gfn); -bool kvm_rebooting; +__visible bool kvm_rebooting; EXPORT_SYMBOL_GPL(kvm_rebooting); static bool largepages_enabled = true; From 312c49473d6543ad3888172fc63992e5a326c622 Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Sun, 2 Feb 2014 13:41:02 -0800 Subject: [PATCH 1219/1368] arm64: KVM: Add VGIC device control for arm64 This fixes the build breakage introduced by c07a0191ef2de1f9510f12d1f88e3b0b5cd8d66f and adds support for the device control API and save/restore of the VGIC state for ARMv8. The defines were simply missing from the arm64 header files and uaccess.h must be implicitly imported from somewhere else on arm. Signed-off-by: Christoffer Dall Signed-off-by: Paolo Bonzini (cherry picked from commit 2a2f3e269c75edf916de5967079069aeb6a601cb) Signed-off-by: Christoffer Dall --- arch/arm64/include/uapi/asm/kvm.h | 9 +++++++++ virt/kvm/arm/vgic.c | 1 + 2 files changed, 10 insertions(+) diff --git a/arch/arm64/include/uapi/asm/kvm.h b/arch/arm64/include/uapi/asm/kvm.h index 495ab6f84a61..eaf54a30bedc 100644 --- a/arch/arm64/include/uapi/asm/kvm.h +++ b/arch/arm64/include/uapi/asm/kvm.h @@ -148,6 +148,15 @@ struct kvm_arch_memory_slot { #define KVM_REG_ARM_TIMER_CNT ARM64_SYS_REG(3, 3, 14, 3, 2) #define KVM_REG_ARM_TIMER_CVAL ARM64_SYS_REG(3, 3, 14, 0, 2) +/* Device Control API: ARM VGIC */ +#define KVM_DEV_ARM_VGIC_GRP_ADDR 0 +#define KVM_DEV_ARM_VGIC_GRP_DIST_REGS 1 +#define KVM_DEV_ARM_VGIC_GRP_CPU_REGS 2 +#define KVM_DEV_ARM_VGIC_CPUID_SHIFT 32 +#define KVM_DEV_ARM_VGIC_CPUID_MASK (0xffULL << KVM_DEV_ARM_VGIC_CPUID_SHIFT) +#define KVM_DEV_ARM_VGIC_OFFSET_SHIFT 0 +#define KVM_DEV_ARM_VGIC_OFFSET_MASK (0xffffffffULL << KVM_DEV_ARM_VGIC_OFFSET_SHIFT) + /* KVM_IRQ_LINE irq field index values */ #define KVM_ARM_IRQ_TYPE_SHIFT 24 #define KVM_ARM_IRQ_TYPE_MASK 0xff diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c index be456ce264d0..8ca405cd7c1a 100644 --- a/virt/kvm/arm/vgic.c +++ b/virt/kvm/arm/vgic.c @@ -24,6 +24,7 @@ #include #include #include +#include #include From 2aaea8b6ef1ba1dd5de6646abf86a4bb2a3a0642 Mon Sep 17 00:00:00 2001 From: Michael Mueller Date: Wed, 26 Feb 2014 16:14:18 +0100 Subject: [PATCH 1220/1368] KVM: add kvm_arch_vcpu_runnable() test to kvm_vcpu_on_spin() loop Use the arch specific function kvm_arch_vcpu_runnable() to add a further criterium to identify a suitable vcpu to yield to during undirected yield processing. Signed-off-by: Michael Mueller Reviewed-by: Christian Borntraeger Signed-off-by: Christian Borntraeger Signed-off-by: Paolo Bonzini (cherry picked from commit 98f4a14676127397c54cab7d6119537ed4d113a2) Signed-off-by: Christoffer Dall --- virt/kvm/kvm_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 88660cbe70a4..9f5fab0a4fda 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -1800,7 +1800,7 @@ void kvm_vcpu_on_spin(struct kvm_vcpu *me) continue; if (vcpu == me) continue; - if (waitqueue_active(&vcpu->wq)) + if (waitqueue_active(&vcpu->wq) && !kvm_arch_vcpu_runnable(vcpu)) continue; if (!kvm_vcpu_eligible_for_directed_yield(vcpu)) continue; From 24e14ecb92fafddb1f3c9cce1b4aed3596f86859 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 26 Feb 2014 18:47:36 +0000 Subject: [PATCH 1221/1368] arm/arm64: KVM: detect CPU reset on CPU_PM_EXIT Commit 1fcf7ce0c602 (arm: kvm: implement CPU PM notifier) added support for CPU power-management, using a cpu_notifier to re-init KVM on a CPU that entered CPU idle. The code assumed that a CPU entering idle would actually be powered off, loosing its state entierely, and would then need to be reinitialized. It turns out that this is not always the case, and some HW performs CPU PM without actually killing the core. In this case, we try to reinitialize KVM while it is still live. It ends up badly, as reported by Andre Przywara (using a Calxeda Midway): [ 3.663897] Kernel panic - not syncing: unexpected prefetch abort in Hyp mode at: 0x685760 [ 3.663897] unexpected data abort in Hyp mode at: 0xc067d150 [ 3.663897] unexpected HVC/SVC trap in Hyp mode at: 0xc0901dd0 The trick here is to detect if we've been through a full re-init or not by looking at HVBAR (VBAR_EL2 on arm64). This involves implementing the backend for __hyp_get_vectors in the main KVM HYP code (rather small), and checking the return value against the default one when the CPU notifier is called on CPU_PM_EXIT. Reported-by: Andre Przywara Tested-by: Andre Przywara Cc: Lorenzo Pieralisi Cc: Rob Herring Acked-by: Christoffer Dall Signed-off-by: Marc Zyngier Signed-off-by: Paolo Bonzini (cherry picked from commit b20c9f29c5c25921c6ad18b50d4b61e6d181c3cc) Signed-off-by: Christoffer Dall --- arch/arm/kvm/arm.c | 3 ++- arch/arm/kvm/interrupts.S | 11 ++++++++++- arch/arm64/kvm/hyp.S | 27 +++++++++++++++++++++++++-- 3 files changed, 37 insertions(+), 4 deletions(-) diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c index 169c718ddd90..9804406ff37e 100644 --- a/arch/arm/kvm/arm.c +++ b/arch/arm/kvm/arm.c @@ -878,7 +878,8 @@ static int hyp_init_cpu_pm_notifier(struct notifier_block *self, unsigned long cmd, void *v) { - if (cmd == CPU_PM_EXIT) { + if (cmd == CPU_PM_EXIT && + __hyp_get_vectors() == hyp_default_vectors) { cpu_init_hyp_mode(NULL); return NOTIFY_OK; } diff --git a/arch/arm/kvm/interrupts.S b/arch/arm/kvm/interrupts.S index ddc15539bad2..0d68d4073068 100644 --- a/arch/arm/kvm/interrupts.S +++ b/arch/arm/kvm/interrupts.S @@ -220,6 +220,10 @@ after_vfp_restore: * in Hyp mode (see init_hyp_mode in arch/arm/kvm/arm.c). Return values are * passed in r0 and r1. * + * A function pointer with a value of 0xffffffff has a special meaning, + * and is used to implement __hyp_get_vectors in the same way as in + * arch/arm/kernel/hyp_stub.S. + * * The calling convention follows the standard AAPCS: * r0 - r3: caller save * r12: caller save @@ -363,6 +367,11 @@ hyp_hvc: host_switch_to_hyp: pop {r0, r1, r2} + /* Check for __hyp_get_vectors */ + cmp r0, #-1 + mrceq p15, 4, r0, c12, c0, 0 @ get HVBAR + beq 1f + push {lr} mrs lr, SPSR push {lr} @@ -378,7 +387,7 @@ THUMB( orr lr, #1) pop {lr} msr SPSR_csxf, lr pop {lr} - eret +1: eret guest_trap: load_vcpu @ Load VCPU pointer to r0 diff --git a/arch/arm64/kvm/hyp.S b/arch/arm64/kvm/hyp.S index 3b47c36e10ff..2c56012cb2d2 100644 --- a/arch/arm64/kvm/hyp.S +++ b/arch/arm64/kvm/hyp.S @@ -694,6 +694,24 @@ __hyp_panic_str: .align 2 +/* + * u64 kvm_call_hyp(void *hypfn, ...); + * + * This is not really a variadic function in the classic C-way and care must + * be taken when calling this to ensure parameters are passed in registers + * only, since the stack will change between the caller and the callee. + * + * Call the function with the first argument containing a pointer to the + * function you wish to call in Hyp mode, and subsequent arguments will be + * passed as x0, x1, and x2 (a maximum of 3 arguments in addition to the + * function pointer can be passed). The function being called must be mapped + * in Hyp mode (see init_hyp_mode in arch/arm/kvm/arm.c). Return values are + * passed in r0 and r1. + * + * A function pointer with a value of 0 has a special meaning, and is + * used to implement __hyp_get_vectors in the same way as in + * arch/arm64/kernel/hyp_stub.S. + */ ENTRY(kvm_call_hyp) hvc #0 ret @@ -737,7 +755,12 @@ el1_sync: // Guest trapped into EL2 pop x2, x3 pop x0, x1 - push lr, xzr + /* Check for __hyp_get_vectors */ + cbnz x0, 1f + mrs x0, vbar_el2 + b 2f + +1: push lr, xzr /* * Compute the function address in EL2, and shuffle the parameters. @@ -750,7 +773,7 @@ el1_sync: // Guest trapped into EL2 blr lr pop lr, xzr - eret +2: eret el1_trap: /* From fca920166b7c07b3932b2a8a24916e5002a57900 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 14 Jan 2014 19:13:10 +0000 Subject: [PATCH 1222/1368] arm64: KVM: force cache clean on page fault when caches are off In order for the guest with caches off to observe data written contained in a given page, we need to make sure that page is committed to memory, and not just hanging in the cache (as guest accesses are completely bypassing the cache until it decides to enable it). For this purpose, hook into the coherent_icache_guest_page function and flush the region if the guest SCTLR_EL1 register doesn't show the MMU and caches as being enabled. The function also get renamed to coherent_cache_guest_page. Signed-off-by: Marc Zyngier Reviewed-by: Catalin Marinas Reviewed-by: Christoffer Dall (cherry picked from commit 2d58b733c87689d3d5144e4ac94ea861cc729145) Signed-off-by: Christoffer Dall --- arch/arm/include/asm/kvm_mmu.h | 4 ++-- arch/arm/kvm/mmu.c | 4 ++-- arch/arm64/include/asm/kvm_mmu.h | 16 ++++++++++++---- 3 files changed, 16 insertions(+), 8 deletions(-) diff --git a/arch/arm/include/asm/kvm_mmu.h b/arch/arm/include/asm/kvm_mmu.h index 2d122adcdb22..6d0f3d3023b7 100644 --- a/arch/arm/include/asm/kvm_mmu.h +++ b/arch/arm/include/asm/kvm_mmu.h @@ -116,8 +116,8 @@ static inline void kvm_set_s2pmd_writable(pmd_t *pmd) struct kvm; -static inline void coherent_icache_guest_page(struct kvm *kvm, hva_t hva, - unsigned long size) +static inline void coherent_cache_guest_page(struct kvm_vcpu *vcpu, hva_t hva, + unsigned long size) { /* * If we are going to insert an instruction page and the icache is diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c index 7789857d1470..fc71a8df0e13 100644 --- a/arch/arm/kvm/mmu.c +++ b/arch/arm/kvm/mmu.c @@ -715,7 +715,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, kvm_set_s2pmd_writable(&new_pmd); kvm_set_pfn_dirty(pfn); } - coherent_icache_guest_page(kvm, hva & PMD_MASK, PMD_SIZE); + coherent_cache_guest_page(vcpu, hva & PMD_MASK, PMD_SIZE); ret = stage2_set_pmd_huge(kvm, memcache, fault_ipa, &new_pmd); } else { pte_t new_pte = pfn_pte(pfn, PAGE_S2); @@ -723,7 +723,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, kvm_set_s2pte_writable(&new_pte); kvm_set_pfn_dirty(pfn); } - coherent_icache_guest_page(kvm, hva, PAGE_SIZE); + coherent_cache_guest_page(vcpu, hva, PAGE_SIZE); ret = stage2_set_pte(kvm, memcache, fault_ipa, &new_pte, false); } diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h index 7f1f9408ff66..6eaf69b5e42c 100644 --- a/arch/arm64/include/asm/kvm_mmu.h +++ b/arch/arm64/include/asm/kvm_mmu.h @@ -106,7 +106,6 @@ static inline bool kvm_is_write_fault(unsigned long esr) return true; } -static inline void kvm_clean_dcache_area(void *addr, size_t size) {} static inline void kvm_clean_pgd(pgd_t *pgd) {} static inline void kvm_clean_pmd_entry(pmd_t *pmd) {} static inline void kvm_clean_pte(pte_t *pte) {} @@ -124,9 +123,19 @@ static inline void kvm_set_s2pmd_writable(pmd_t *pmd) struct kvm; -static inline void coherent_icache_guest_page(struct kvm *kvm, hva_t hva, - unsigned long size) +#define kvm_flush_dcache_to_poc(a,l) __flush_dcache_area((a), (l)) + +static inline bool vcpu_has_cache_enabled(struct kvm_vcpu *vcpu) { + return (vcpu_sys_reg(vcpu, SCTLR_EL1) & 0b101) == 0b101; +} + +static inline void coherent_cache_guest_page(struct kvm_vcpu *vcpu, hva_t hva, + unsigned long size) +{ + if (!vcpu_has_cache_enabled(vcpu)) + kvm_flush_dcache_to_poc((void *)hva, size); + if (!icache_is_aliasing()) { /* PIPT */ flush_icache_range(hva, hva + size); } else if (!icache_is_aivivt()) { /* non ASID-tagged VIVT */ @@ -135,7 +144,6 @@ static inline void coherent_icache_guest_page(struct kvm *kvm, hva_t hva, } } -#define kvm_flush_dcache_to_poc(a,l) __flush_dcache_area((a), (l)) #define kvm_virt_to_phys(x) __virt_to_phys((unsigned long)(x)) #endif /* __ASSEMBLY__ */ From 96d03922c8cb37a48451b02f95e80cd749266bbe Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 21 Jan 2014 10:55:17 +0000 Subject: [PATCH 1223/1368] arm64: KVM: allows discrimination of AArch32 sysreg access The current handling of AArch32 trapping is slightly less than perfect, as it is not possible (from a handler point of view) to distinguish it from an AArch64 access, nor to tell a 32bit from a 64bit access either. Fix this by introducing two additional flags: - is_aarch32: true if the access was made in AArch32 mode - is_32bit: true if is_aarch32 == true and a MCR/MRC instruction was used to perform the access (as opposed to MCRR/MRRC). This allows a handler to cover all the possible conditions in which a system register gets trapped. Signed-off-by: Marc Zyngier Acked-by: Christoffer Dall (cherry picked from commit 2072d29c46b73e39b3c6c56c6027af77086f45fd) Signed-off-by: Christoffer Dall --- arch/arm64/kvm/sys_regs.c | 6 ++++++ arch/arm64/kvm/sys_regs.h | 2 ++ 2 files changed, 8 insertions(+) diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index 02e9d09e1d80..bf03e0fadf1f 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -437,6 +437,8 @@ int kvm_handle_cp15_64(struct kvm_vcpu *vcpu, struct kvm_run *run) u32 hsr = kvm_vcpu_get_hsr(vcpu); int Rt2 = (hsr >> 10) & 0xf; + params.is_aarch32 = true; + params.is_32bit = false; params.CRm = (hsr >> 1) & 0xf; params.Rt = (hsr >> 5) & 0xf; params.is_write = ((hsr & 1) == 0); @@ -480,6 +482,8 @@ int kvm_handle_cp15_32(struct kvm_vcpu *vcpu, struct kvm_run *run) struct sys_reg_params params; u32 hsr = kvm_vcpu_get_hsr(vcpu); + params.is_aarch32 = true; + params.is_32bit = true; params.CRm = (hsr >> 1) & 0xf; params.Rt = (hsr >> 5) & 0xf; params.is_write = ((hsr & 1) == 0); @@ -549,6 +553,8 @@ int kvm_handle_sys_reg(struct kvm_vcpu *vcpu, struct kvm_run *run) struct sys_reg_params params; unsigned long esr = kvm_vcpu_get_hsr(vcpu); + params.is_aarch32 = false; + params.is_32bit = false; params.Op0 = (esr >> 20) & 3; params.Op1 = (esr >> 14) & 0x7; params.CRn = (esr >> 10) & 0xf; diff --git a/arch/arm64/kvm/sys_regs.h b/arch/arm64/kvm/sys_regs.h index d50d3722998e..d411e251412c 100644 --- a/arch/arm64/kvm/sys_regs.h +++ b/arch/arm64/kvm/sys_regs.h @@ -30,6 +30,8 @@ struct sys_reg_params { u8 Op2; u8 Rt; bool is_write; + bool is_aarch32; + bool is_32bit; /* Only valid if is_aarch32 is true */ }; struct sys_reg_desc { From 8d8d8cd59448d8f2f9cba73d6cd5558ebe7d5a43 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 14 Jan 2014 18:00:55 +0000 Subject: [PATCH 1224/1368] arm64: KVM: trap VM system registers until MMU and caches are ON In order to be able to detect the point where the guest enables its MMU and caches, trap all the VM related system registers. Once we see the guest enabling both the MMU and the caches, we can go back to a saner mode of operation, which is to leave these registers in complete control of the guest. Signed-off-by: Marc Zyngier Reviewed-by: Catalin Marinas Reviewed-by: Christoffer Dall (cherry picked from commit 4d44923b17bff283c002ed961373848284aaff1b) Signed-off-by: Christoffer Dall --- arch/arm64/include/asm/kvm_arm.h | 3 +- arch/arm64/include/asm/kvm_asm.h | 3 +- arch/arm64/kvm/sys_regs.c | 90 +++++++++++++++++++++++++++----- 3 files changed, 82 insertions(+), 14 deletions(-) diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h index 0eb398655378..00fbaa75dc7b 100644 --- a/arch/arm64/include/asm/kvm_arm.h +++ b/arch/arm64/include/asm/kvm_arm.h @@ -62,6 +62,7 @@ * RW: 64bit by default, can be overriden for 32bit VMs * TAC: Trap ACTLR * TSC: Trap SMC + * TVM: Trap VM ops (until M+C set in SCTLR_EL1) * TSW: Trap cache operations by set/way * TWE: Trap WFE * TWI: Trap WFI @@ -74,7 +75,7 @@ * SWIO: Turn set/way invalidates into set/way clean+invalidate */ #define HCR_GUEST_FLAGS (HCR_TSC | HCR_TSW | HCR_TWE | HCR_TWI | HCR_VM | \ - HCR_BSU_IS | HCR_FB | HCR_TAC | \ + HCR_TVM | HCR_BSU_IS | HCR_FB | HCR_TAC | \ HCR_AMO | HCR_IMO | HCR_FMO | \ HCR_SWIO | HCR_TIDCP | HCR_RW) #define HCR_VIRT_EXCP_MASK (HCR_VA | HCR_VI | HCR_VF) diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h index b25763bc0ec4..9fcd54b1e16d 100644 --- a/arch/arm64/include/asm/kvm_asm.h +++ b/arch/arm64/include/asm/kvm_asm.h @@ -79,7 +79,8 @@ #define c13_TID_URW (TPIDR_EL0 * 2) /* Thread ID, User R/W */ #define c13_TID_URO (TPIDRRO_EL0 * 2)/* Thread ID, User R/O */ #define c13_TID_PRIV (TPIDR_EL1 * 2) /* Thread ID, Privileged */ -#define c10_AMAIR (AMAIR_EL1 * 2) /* Aux Memory Attr Indirection Reg */ +#define c10_AMAIR0 (AMAIR_EL1 * 2) /* Aux Memory Attr Indirection Reg */ +#define c10_AMAIR1 (c10_AMAIR0 + 1)/* Aux Memory Attr Indirection Reg */ #define c14_CNTKCTL (CNTKCTL_EL1 * 2) /* Timer Control Register (PL1) */ #define NR_CP15_REGS (NR_SYS_REGS * 2) diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index bf03e0fadf1f..2097e5ecba42 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -120,6 +120,46 @@ static bool access_dcsw(struct kvm_vcpu *vcpu, return true; } +/* + * Generic accessor for VM registers. Only called as long as HCR_TVM + * is set. + */ +static bool access_vm_reg(struct kvm_vcpu *vcpu, + const struct sys_reg_params *p, + const struct sys_reg_desc *r) +{ + unsigned long val; + + BUG_ON(!p->is_write); + + val = *vcpu_reg(vcpu, p->Rt); + if (!p->is_aarch32) { + vcpu_sys_reg(vcpu, r->reg) = val; + } else { + vcpu_cp15(vcpu, r->reg) = val & 0xffffffffUL; + if (!p->is_32bit) + vcpu_cp15(vcpu, r->reg + 1) = val >> 32; + } + return true; +} + +/* + * SCTLR_EL1 accessor. Only called as long as HCR_TVM is set. If the + * guest enables the MMU, we stop trapping the VM sys_regs and leave + * it in complete control of the caches. + */ +static bool access_sctlr(struct kvm_vcpu *vcpu, + const struct sys_reg_params *p, + const struct sys_reg_desc *r) +{ + access_vm_reg(vcpu, p, r); + + if (vcpu_has_cache_enabled(vcpu)) /* MMU+Caches enabled? */ + vcpu->arch.hcr_el2 &= ~HCR_TVM; + + return true; +} + /* * We could trap ID_DFR0 and tell the guest we don't support performance * monitoring. Unfortunately the patch to make the kernel check ID_DFR0 was @@ -185,32 +225,32 @@ static const struct sys_reg_desc sys_reg_descs[] = { NULL, reset_mpidr, MPIDR_EL1 }, /* SCTLR_EL1 */ { Op0(0b11), Op1(0b000), CRn(0b0001), CRm(0b0000), Op2(0b000), - NULL, reset_val, SCTLR_EL1, 0x00C50078 }, + access_sctlr, reset_val, SCTLR_EL1, 0x00C50078 }, /* CPACR_EL1 */ { Op0(0b11), Op1(0b000), CRn(0b0001), CRm(0b0000), Op2(0b010), NULL, reset_val, CPACR_EL1, 0 }, /* TTBR0_EL1 */ { Op0(0b11), Op1(0b000), CRn(0b0010), CRm(0b0000), Op2(0b000), - NULL, reset_unknown, TTBR0_EL1 }, + access_vm_reg, reset_unknown, TTBR0_EL1 }, /* TTBR1_EL1 */ { Op0(0b11), Op1(0b000), CRn(0b0010), CRm(0b0000), Op2(0b001), - NULL, reset_unknown, TTBR1_EL1 }, + access_vm_reg, reset_unknown, TTBR1_EL1 }, /* TCR_EL1 */ { Op0(0b11), Op1(0b000), CRn(0b0010), CRm(0b0000), Op2(0b010), - NULL, reset_val, TCR_EL1, 0 }, + access_vm_reg, reset_val, TCR_EL1, 0 }, /* AFSR0_EL1 */ { Op0(0b11), Op1(0b000), CRn(0b0101), CRm(0b0001), Op2(0b000), - NULL, reset_unknown, AFSR0_EL1 }, + access_vm_reg, reset_unknown, AFSR0_EL1 }, /* AFSR1_EL1 */ { Op0(0b11), Op1(0b000), CRn(0b0101), CRm(0b0001), Op2(0b001), - NULL, reset_unknown, AFSR1_EL1 }, + access_vm_reg, reset_unknown, AFSR1_EL1 }, /* ESR_EL1 */ { Op0(0b11), Op1(0b000), CRn(0b0101), CRm(0b0010), Op2(0b000), - NULL, reset_unknown, ESR_EL1 }, + access_vm_reg, reset_unknown, ESR_EL1 }, /* FAR_EL1 */ { Op0(0b11), Op1(0b000), CRn(0b0110), CRm(0b0000), Op2(0b000), - NULL, reset_unknown, FAR_EL1 }, + access_vm_reg, reset_unknown, FAR_EL1 }, /* PAR_EL1 */ { Op0(0b11), Op1(0b000), CRn(0b0111), CRm(0b0100), Op2(0b000), NULL, reset_unknown, PAR_EL1 }, @@ -224,17 +264,17 @@ static const struct sys_reg_desc sys_reg_descs[] = { /* MAIR_EL1 */ { Op0(0b11), Op1(0b000), CRn(0b1010), CRm(0b0010), Op2(0b000), - NULL, reset_unknown, MAIR_EL1 }, + access_vm_reg, reset_unknown, MAIR_EL1 }, /* AMAIR_EL1 */ { Op0(0b11), Op1(0b000), CRn(0b1010), CRm(0b0011), Op2(0b000), - NULL, reset_amair_el1, AMAIR_EL1 }, + access_vm_reg, reset_amair_el1, AMAIR_EL1 }, /* VBAR_EL1 */ { Op0(0b11), Op1(0b000), CRn(0b1100), CRm(0b0000), Op2(0b000), NULL, reset_val, VBAR_EL1, 0 }, /* CONTEXTIDR_EL1 */ { Op0(0b11), Op1(0b000), CRn(0b1101), CRm(0b0000), Op2(0b001), - NULL, reset_val, CONTEXTIDR_EL1, 0 }, + access_vm_reg, reset_val, CONTEXTIDR_EL1, 0 }, /* TPIDR_EL1 */ { Op0(0b11), Op1(0b000), CRn(0b1101), CRm(0b0000), Op2(0b100), NULL, reset_unknown, TPIDR_EL1 }, @@ -305,14 +345,32 @@ static const struct sys_reg_desc sys_reg_descs[] = { NULL, reset_val, FPEXC32_EL2, 0x70 }, }; -/* Trapped cp15 registers */ +/* + * Trapped cp15 registers. TTBR0/TTBR1 get a double encoding, + * depending on the way they are accessed (as a 32bit or a 64bit + * register). + */ static const struct sys_reg_desc cp15_regs[] = { + { Op1( 0), CRn( 0), CRm( 2), Op2( 0), access_vm_reg, NULL, c2_TTBR0 }, + { Op1( 0), CRn( 1), CRm( 0), Op2( 0), access_sctlr, NULL, c1_SCTLR }, + { Op1( 0), CRn( 2), CRm( 0), Op2( 0), access_vm_reg, NULL, c2_TTBR0 }, + { Op1( 0), CRn( 2), CRm( 0), Op2( 1), access_vm_reg, NULL, c2_TTBR1 }, + { Op1( 0), CRn( 2), CRm( 0), Op2( 2), access_vm_reg, NULL, c2_TTBCR }, + { Op1( 0), CRn( 3), CRm( 0), Op2( 0), access_vm_reg, NULL, c3_DACR }, + { Op1( 0), CRn( 5), CRm( 0), Op2( 0), access_vm_reg, NULL, c5_DFSR }, + { Op1( 0), CRn( 5), CRm( 0), Op2( 1), access_vm_reg, NULL, c5_IFSR }, + { Op1( 0), CRn( 5), CRm( 1), Op2( 0), access_vm_reg, NULL, c5_ADFSR }, + { Op1( 0), CRn( 5), CRm( 1), Op2( 1), access_vm_reg, NULL, c5_AIFSR }, + { Op1( 0), CRn( 6), CRm( 0), Op2( 0), access_vm_reg, NULL, c6_DFAR }, + { Op1( 0), CRn( 6), CRm( 0), Op2( 2), access_vm_reg, NULL, c6_IFAR }, + /* * DC{C,I,CI}SW operations: */ { Op1( 0), CRn( 7), CRm( 6), Op2( 2), access_dcsw }, { Op1( 0), CRn( 7), CRm(10), Op2( 2), access_dcsw }, { Op1( 0), CRn( 7), CRm(14), Op2( 2), access_dcsw }, + { Op1( 0), CRn( 9), CRm(12), Op2( 0), pm_fake }, { Op1( 0), CRn( 9), CRm(12), Op2( 1), pm_fake }, { Op1( 0), CRn( 9), CRm(12), Op2( 2), pm_fake }, @@ -326,6 +384,14 @@ static const struct sys_reg_desc cp15_regs[] = { { Op1( 0), CRn( 9), CRm(14), Op2( 0), pm_fake }, { Op1( 0), CRn( 9), CRm(14), Op2( 1), pm_fake }, { Op1( 0), CRn( 9), CRm(14), Op2( 2), pm_fake }, + + { Op1( 0), CRn(10), CRm( 2), Op2( 0), access_vm_reg, NULL, c10_PRRR }, + { Op1( 0), CRn(10), CRm( 2), Op2( 1), access_vm_reg, NULL, c10_NMRR }, + { Op1( 0), CRn(10), CRm( 3), Op2( 0), access_vm_reg, NULL, c10_AMAIR0 }, + { Op1( 0), CRn(10), CRm( 3), Op2( 1), access_vm_reg, NULL, c10_AMAIR1 }, + { Op1( 0), CRn(13), CRm( 0), Op2( 1), access_vm_reg, NULL, c13_CID }, + + { Op1( 1), CRn( 0), CRm( 2), Op2( 0), access_vm_reg, NULL, c2_TTBR1 }, }; /* Target specific emulation tables */ From 086ffbacac938e5d6208d286ffdf01867753a071 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 18 Feb 2014 14:29:03 +0000 Subject: [PATCH 1225/1368] ARM: KVM: introduce kvm_p*d_addr_end The use of p*d_addr_end with stage-2 translation is slightly dodgy, as the IPA is 40bits, while all the p*d_addr_end helpers are taking an unsigned long (arm64 is fine with that as unligned long is 64bit). The fix is to introduce 64bit clean versions of the same helpers, and use them in the stage-2 page table code. Signed-off-by: Marc Zyngier Acked-by: Catalin Marinas Reviewed-by: Christoffer Dall (cherry picked from commit a3c8bd31af260a17d626514f636849ee1cd1f63e) Signed-off-by: Christoffer Dall --- arch/arm/include/asm/kvm_mmu.h | 13 +++++++++++++ arch/arm/kvm/mmu.c | 10 +++++----- arch/arm64/include/asm/kvm_mmu.h | 4 ++++ 3 files changed, 22 insertions(+), 5 deletions(-) diff --git a/arch/arm/include/asm/kvm_mmu.h b/arch/arm/include/asm/kvm_mmu.h index 6d0f3d3023b7..891afe78311a 100644 --- a/arch/arm/include/asm/kvm_mmu.h +++ b/arch/arm/include/asm/kvm_mmu.h @@ -114,6 +114,19 @@ static inline void kvm_set_s2pmd_writable(pmd_t *pmd) pmd_val(*pmd) |= L_PMD_S2_RDWR; } +/* Open coded p*d_addr_end that can deal with 64bit addresses */ +#define kvm_pgd_addr_end(addr, end) \ +({ u64 __boundary = ((addr) + PGDIR_SIZE) & PGDIR_MASK; \ + (__boundary - 1 < (end) - 1)? __boundary: (end); \ +}) + +#define kvm_pud_addr_end(addr,end) (end) + +#define kvm_pmd_addr_end(addr, end) \ +({ u64 __boundary = ((addr) + PMD_SIZE) & PMD_MASK; \ + (__boundary - 1 < (end) - 1)? __boundary: (end); \ +}) + struct kvm; static inline void coherent_cache_guest_page(struct kvm_vcpu *vcpu, hva_t hva, diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c index fc71a8df0e13..c1c08b240f35 100644 --- a/arch/arm/kvm/mmu.c +++ b/arch/arm/kvm/mmu.c @@ -145,7 +145,7 @@ static void unmap_range(struct kvm *kvm, pgd_t *pgdp, pgd = pgdp + pgd_index(addr); pud = pud_offset(pgd, addr); if (pud_none(*pud)) { - addr = pud_addr_end(addr, end); + addr = kvm_pud_addr_end(addr, end); continue; } @@ -155,13 +155,13 @@ static void unmap_range(struct kvm *kvm, pgd_t *pgdp, * move on. */ clear_pud_entry(kvm, pud, addr); - addr = pud_addr_end(addr, end); + addr = kvm_pud_addr_end(addr, end); continue; } pmd = pmd_offset(pud, addr); if (pmd_none(*pmd)) { - addr = pmd_addr_end(addr, end); + addr = kvm_pmd_addr_end(addr, end); continue; } @@ -176,10 +176,10 @@ static void unmap_range(struct kvm *kvm, pgd_t *pgdp, */ if (kvm_pmd_huge(*pmd) || page_empty(pte)) { clear_pmd_entry(kvm, pmd, addr); - next = pmd_addr_end(addr, end); + next = kvm_pmd_addr_end(addr, end); if (page_empty(pmd) && !page_empty(pud)) { clear_pud_entry(kvm, pud, addr); - next = pud_addr_end(addr, end); + next = kvm_pud_addr_end(addr, end); } } diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h index 6eaf69b5e42c..00c0cc8b8045 100644 --- a/arch/arm64/include/asm/kvm_mmu.h +++ b/arch/arm64/include/asm/kvm_mmu.h @@ -121,6 +121,10 @@ static inline void kvm_set_s2pmd_writable(pmd_t *pmd) pmd_val(*pmd) |= PMD_S2_RDWR; } +#define kvm_pgd_addr_end(addr, end) pgd_addr_end(addr, end) +#define kvm_pud_addr_end(addr, end) pud_addr_end(addr, end) +#define kvm_pmd_addr_end(addr, end) pmd_addr_end(addr, end) + struct kvm; #define kvm_flush_dcache_to_poc(a,l) __flush_dcache_area((a), (l)) From 5ac10a803ba46a03fccfbd078b60fa0cd83422c0 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 15 Jan 2014 12:50:23 +0000 Subject: [PATCH 1226/1368] arm64: KVM: flush VM pages before letting the guest enable caches When the guest runs with caches disabled (like in an early boot sequence, for example), all the writes are diectly going to RAM, bypassing the caches altogether. Once the MMU and caches are enabled, whatever sits in the cache becomes suddenly visible, which isn't what the guest expects. A way to avoid this potential disaster is to invalidate the cache when the MMU is being turned on. For this, we hook into the SCTLR_EL1 trapping code, and scan the stage-2 page tables, invalidating the pages/sections that have already been mapped in. Signed-off-by: Marc Zyngier Reviewed-by: Catalin Marinas Reviewed-by: Christoffer Dall (cherry picked from commit 9d218a1fcf4c6b759d442ef702842fae92e1ea61) Signed-off-by: Christoffer Dall --- arch/arm/include/asm/kvm_mmu.h | 2 + arch/arm/kvm/mmu.c | 93 ++++++++++++++++++++++++++++++++ arch/arm64/include/asm/kvm_mmu.h | 2 + arch/arm64/kvm/sys_regs.c | 5 +- 4 files changed, 101 insertions(+), 1 deletion(-) diff --git a/arch/arm/include/asm/kvm_mmu.h b/arch/arm/include/asm/kvm_mmu.h index 891afe78311a..eb85b81eea6f 100644 --- a/arch/arm/include/asm/kvm_mmu.h +++ b/arch/arm/include/asm/kvm_mmu.h @@ -155,6 +155,8 @@ static inline void coherent_cache_guest_page(struct kvm_vcpu *vcpu, hva_t hva, #define kvm_flush_dcache_to_poc(a,l) __cpuc_flush_dcache_area((a), (l)) #define kvm_virt_to_phys(x) virt_to_idmap((unsigned long)(x)) +void stage2_flush_vm(struct kvm *kvm); + #endif /* !__ASSEMBLY__ */ #endif /* __ARM_KVM_MMU_H__ */ diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c index c1c08b240f35..d7e998c6a08f 100644 --- a/arch/arm/kvm/mmu.c +++ b/arch/arm/kvm/mmu.c @@ -187,6 +187,99 @@ static void unmap_range(struct kvm *kvm, pgd_t *pgdp, } } +static void stage2_flush_ptes(struct kvm *kvm, pmd_t *pmd, + phys_addr_t addr, phys_addr_t end) +{ + pte_t *pte; + + pte = pte_offset_kernel(pmd, addr); + do { + if (!pte_none(*pte)) { + hva_t hva = gfn_to_hva(kvm, addr >> PAGE_SHIFT); + kvm_flush_dcache_to_poc((void*)hva, PAGE_SIZE); + } + } while (pte++, addr += PAGE_SIZE, addr != end); +} + +static void stage2_flush_pmds(struct kvm *kvm, pud_t *pud, + phys_addr_t addr, phys_addr_t end) +{ + pmd_t *pmd; + phys_addr_t next; + + pmd = pmd_offset(pud, addr); + do { + next = kvm_pmd_addr_end(addr, end); + if (!pmd_none(*pmd)) { + if (kvm_pmd_huge(*pmd)) { + hva_t hva = gfn_to_hva(kvm, addr >> PAGE_SHIFT); + kvm_flush_dcache_to_poc((void*)hva, PMD_SIZE); + } else { + stage2_flush_ptes(kvm, pmd, addr, next); + } + } + } while (pmd++, addr = next, addr != end); +} + +static void stage2_flush_puds(struct kvm *kvm, pgd_t *pgd, + phys_addr_t addr, phys_addr_t end) +{ + pud_t *pud; + phys_addr_t next; + + pud = pud_offset(pgd, addr); + do { + next = kvm_pud_addr_end(addr, end); + if (!pud_none(*pud)) { + if (pud_huge(*pud)) { + hva_t hva = gfn_to_hva(kvm, addr >> PAGE_SHIFT); + kvm_flush_dcache_to_poc((void*)hva, PUD_SIZE); + } else { + stage2_flush_pmds(kvm, pud, addr, next); + } + } + } while (pud++, addr = next, addr != end); +} + +static void stage2_flush_memslot(struct kvm *kvm, + struct kvm_memory_slot *memslot) +{ + phys_addr_t addr = memslot->base_gfn << PAGE_SHIFT; + phys_addr_t end = addr + PAGE_SIZE * memslot->npages; + phys_addr_t next; + pgd_t *pgd; + + pgd = kvm->arch.pgd + pgd_index(addr); + do { + next = kvm_pgd_addr_end(addr, end); + stage2_flush_puds(kvm, pgd, addr, next); + } while (pgd++, addr = next, addr != end); +} + +/** + * stage2_flush_vm - Invalidate cache for pages mapped in stage 2 + * @kvm: The struct kvm pointer + * + * Go through the stage 2 page tables and invalidate any cache lines + * backing memory already mapped to the VM. + */ +void stage2_flush_vm(struct kvm *kvm) +{ + struct kvm_memslots *slots; + struct kvm_memory_slot *memslot; + int idx; + + idx = srcu_read_lock(&kvm->srcu); + spin_lock(&kvm->mmu_lock); + + slots = kvm_memslots(kvm); + kvm_for_each_memslot(memslot, slots) + stage2_flush_memslot(kvm, memslot); + + spin_unlock(&kvm->mmu_lock); + srcu_read_unlock(&kvm->srcu, idx); +} + /** * free_boot_hyp_pgd - free HYP boot page tables * diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h index 00c0cc8b8045..7d29847a893b 100644 --- a/arch/arm64/include/asm/kvm_mmu.h +++ b/arch/arm64/include/asm/kvm_mmu.h @@ -150,5 +150,7 @@ static inline void coherent_cache_guest_page(struct kvm_vcpu *vcpu, hva_t hva, #define kvm_virt_to_phys(x) __virt_to_phys((unsigned long)(x)) +void stage2_flush_vm(struct kvm *kvm); + #endif /* __ASSEMBLY__ */ #endif /* __ARM64_KVM_MMU_H__ */ diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index 2097e5ecba42..03244582bc55 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -27,6 +27,7 @@ #include #include #include +#include #include #include #include @@ -154,8 +155,10 @@ static bool access_sctlr(struct kvm_vcpu *vcpu, { access_vm_reg(vcpu, p, r); - if (vcpu_has_cache_enabled(vcpu)) /* MMU+Caches enabled? */ + if (vcpu_has_cache_enabled(vcpu)) { /* MMU+Caches enabled? */ vcpu->arch.hcr_el2 &= ~HCR_TVM; + stage2_flush_vm(vcpu->kvm); + } return true; } From ebdea56c8b2acd4fd7a421d1d670e6bf10f37e60 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 14 Jan 2014 19:13:10 +0000 Subject: [PATCH 1227/1368] ARM: KVM: force cache clean on page fault when caches are off In order for a guest with caches disabled to observe data written contained in a given page, we need to make sure that page is committed to memory, and not just hanging in the cache (as guest accesses are completely bypassing the cache until it decides to enable it). For this purpose, hook into the coherent_cache_guest_page function and flush the region if the guest SCTLR register doesn't show the MMU and caches as being enabled. Signed-off-by: Marc Zyngier Reviewed-by: Christoffer Dall Reviewed-by: Catalin Marinas (cherry picked from commit 159793001d7d85af17855630c94f0a176848e16b) Signed-off-by: Christoffer Dall --- arch/arm/include/asm/kvm_mmu.h | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/arch/arm/include/asm/kvm_mmu.h b/arch/arm/include/asm/kvm_mmu.h index eb85b81eea6f..5c7aa3c1519f 100644 --- a/arch/arm/include/asm/kvm_mmu.h +++ b/arch/arm/include/asm/kvm_mmu.h @@ -129,9 +129,19 @@ static inline void kvm_set_s2pmd_writable(pmd_t *pmd) struct kvm; +#define kvm_flush_dcache_to_poc(a,l) __cpuc_flush_dcache_area((a), (l)) + +static inline bool vcpu_has_cache_enabled(struct kvm_vcpu *vcpu) +{ + return (vcpu->arch.cp15[c1_SCTLR] & 0b101) == 0b101; +} + static inline void coherent_cache_guest_page(struct kvm_vcpu *vcpu, hva_t hva, unsigned long size) { + if (!vcpu_has_cache_enabled(vcpu)) + kvm_flush_dcache_to_poc((void *)hva, size); + /* * If we are going to insert an instruction page and the icache is * either VIPT or PIPT, there is a potential problem where the host @@ -152,7 +162,6 @@ static inline void coherent_cache_guest_page(struct kvm_vcpu *vcpu, hva_t hva, } } -#define kvm_flush_dcache_to_poc(a,l) __cpuc_flush_dcache_area((a), (l)) #define kvm_virt_to_phys(x) virt_to_idmap((unsigned long)(x)) void stage2_flush_vm(struct kvm *kvm); From d773d11dd453f43793082c5206799c818a4a4b55 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 21 Jan 2014 18:56:26 +0000 Subject: [PATCH 1228/1368] ARM: KVM: fix handling of trapped 64bit coprocessor accesses Commit 240e99cbd00a (ARM: KVM: Fix 64-bit coprocessor handling) changed the way we match the 64bit coprocessor access from user space, but didn't update the trap handler for the same set of registers. The effect is that a trapped 64bit access is never matched, leading to a fault being injected into the guest. This went unnoticed as we didn't really trap any 64bit register so far. Placing the CRm field of the access into the CRn field of the matching structure fixes the problem. Also update the debug feature to emit the expected string in case of failing match. Signed-off-by: Marc Zyngier Reviewed-by: Christoffer Dall Acked-by: Catalin Marinas (cherry picked from commit 46c214dd595381c880794413facadfa07fba5c95) Signed-off-by: Christoffer Dall --- arch/arm/kvm/coproc.c | 4 ++-- arch/arm/kvm/coproc.h | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/arm/kvm/coproc.c b/arch/arm/kvm/coproc.c index 78c0885d6501..126c90d18387 100644 --- a/arch/arm/kvm/coproc.c +++ b/arch/arm/kvm/coproc.c @@ -443,7 +443,7 @@ int kvm_handle_cp15_64(struct kvm_vcpu *vcpu, struct kvm_run *run) { struct coproc_params params; - params.CRm = (kvm_vcpu_get_hsr(vcpu) >> 1) & 0xf; + params.CRn = (kvm_vcpu_get_hsr(vcpu) >> 1) & 0xf; params.Rt1 = (kvm_vcpu_get_hsr(vcpu) >> 5) & 0xf; params.is_write = ((kvm_vcpu_get_hsr(vcpu) & 1) == 0); params.is_64bit = true; @@ -451,7 +451,7 @@ int kvm_handle_cp15_64(struct kvm_vcpu *vcpu, struct kvm_run *run) params.Op1 = (kvm_vcpu_get_hsr(vcpu) >> 16) & 0xf; params.Op2 = 0; params.Rt2 = (kvm_vcpu_get_hsr(vcpu) >> 10) & 0xf; - params.CRn = 0; + params.CRm = 0; return emulate_cp15(vcpu, ¶ms); } diff --git a/arch/arm/kvm/coproc.h b/arch/arm/kvm/coproc.h index 0461d5c8d3de..c5ad7ff40c96 100644 --- a/arch/arm/kvm/coproc.h +++ b/arch/arm/kvm/coproc.h @@ -58,8 +58,8 @@ static inline void print_cp_instr(const struct coproc_params *p) { /* Look, we even formatted it for you to paste into the table! */ if (p->is_64bit) { - kvm_pr_unimpl(" { CRm(%2lu), Op1(%2lu), is64, func_%s },\n", - p->CRm, p->Op1, p->is_write ? "write" : "read"); + kvm_pr_unimpl(" { CRm64(%2lu), Op1(%2lu), is64, func_%s },\n", + p->CRn, p->Op1, p->is_write ? "write" : "read"); } else { kvm_pr_unimpl(" { CRn(%2lu), CRm(%2lu), Op1(%2lu), Op2(%2lu), is32," " func_%s },\n", From ba3c65fc739a98f9eae4e8997d8a6e66d130b0cf Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 21 Jan 2014 18:56:26 +0000 Subject: [PATCH 1229/1368] ARM: KVM: fix ordering of 64bit coprocessor accesses Commit 240e99cbd00a (ARM: KVM: Fix 64-bit coprocessor handling) added an ordering dependency for the 64bit registers. The order described is: CRn, CRm, Op1, Op2, 64bit-first. Unfortunately, the implementation is: CRn, 64bit-first, CRm... Move the 64bit test to be last in order to match the documentation. Signed-off-by: Marc Zyngier Reviewed-by: Christoffer Dall Acked-by: Catalin Marinas (cherry picked from commit 547f781378a22b65c2ab468f235c23001b5924da) Signed-off-by: Christoffer Dall --- arch/arm/kvm/coproc.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/arm/kvm/coproc.h b/arch/arm/kvm/coproc.h index c5ad7ff40c96..8dda870e84f9 100644 --- a/arch/arm/kvm/coproc.h +++ b/arch/arm/kvm/coproc.h @@ -135,13 +135,13 @@ static inline int cmp_reg(const struct coproc_reg *i1, return -1; if (i1->CRn != i2->CRn) return i1->CRn - i2->CRn; - if (i1->is_64 != i2->is_64) - return i2->is_64 - i1->is_64; if (i1->CRm != i2->CRm) return i1->CRm - i2->CRm; if (i1->Op1 != i2->Op1) return i1->Op1 - i2->Op1; - return i1->Op2 - i2->Op2; + if (i1->Op2 != i2->Op2) + return i1->Op2 - i2->Op2; + return i2->is_64 - i1->is_64; } From ff860e2b7761196f6379888acd6668ecddb703df Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 22 Jan 2014 09:43:38 +0000 Subject: [PATCH 1230/1368] ARM: KVM: introduce per-vcpu HYP Configuration Register So far, KVM/ARM used a fixed HCR configuration per guest, except for the VI/VF/VA bits to control the interrupt in absence of VGIC. With the upcoming need to dynamically reconfigure trapping, it becomes necessary to allow the HCR to be changed on a per-vcpu basis. The fix here is to mimic what KVM/arm64 already does: a per vcpu HCR field, initialized at setup time. Signed-off-by: Marc Zyngier Reviewed-by: Christoffer Dall Acked-by: Catalin Marinas (cherry picked from commit ac30a11e8e92a03dbe236b285c5cbae0bf563141) Signed-off-by: Christoffer Dall --- arch/arm/include/asm/kvm_arm.h | 1 - arch/arm/include/asm/kvm_host.h | 9 ++++++--- arch/arm/kernel/asm-offsets.c | 1 + arch/arm/kvm/guest.c | 1 + arch/arm/kvm/interrupts_head.S | 9 +++------ 5 files changed, 11 insertions(+), 10 deletions(-) diff --git a/arch/arm/include/asm/kvm_arm.h b/arch/arm/include/asm/kvm_arm.h index 1d3153c7eb41..a843e74a384c 100644 --- a/arch/arm/include/asm/kvm_arm.h +++ b/arch/arm/include/asm/kvm_arm.h @@ -69,7 +69,6 @@ #define HCR_GUEST_MASK (HCR_TSC | HCR_TSW | HCR_TWI | HCR_VM | HCR_BSU_IS | \ HCR_FB | HCR_TAC | HCR_AMO | HCR_IMO | HCR_FMO | \ HCR_TWE | HCR_SWIO | HCR_TIDCP) -#define HCR_VIRT_EXCP_MASK (HCR_VA | HCR_VI | HCR_VF) /* System Control Register (SCTLR) bits */ #define SCTLR_TE (1 << 30) diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h index 098f7dd6d564..09af14999c9b 100644 --- a/arch/arm/include/asm/kvm_host.h +++ b/arch/arm/include/asm/kvm_host.h @@ -101,6 +101,12 @@ struct kvm_vcpu_arch { /* The CPU type we expose to the VM */ u32 midr; + /* HYP trapping configuration */ + u32 hcr; + + /* Interrupt related fields */ + u32 irq_lines; /* IRQ and FIQ levels */ + /* Exception Information */ struct kvm_vcpu_fault_info fault; @@ -128,9 +134,6 @@ struct kvm_vcpu_arch { /* IO related fields */ struct kvm_decode mmio_decode; - /* Interrupt related fields */ - u32 irq_lines; /* IRQ and FIQ levels */ - /* Cache some mmu pages needed inside spinlock regions */ struct kvm_mmu_memory_cache mmu_page_cache; diff --git a/arch/arm/kernel/asm-offsets.c b/arch/arm/kernel/asm-offsets.c index ee68cce6b48e..aa2acc1dd986 100644 --- a/arch/arm/kernel/asm-offsets.c +++ b/arch/arm/kernel/asm-offsets.c @@ -168,6 +168,7 @@ int main(void) DEFINE(VCPU_FIQ_REGS, offsetof(struct kvm_vcpu, arch.regs.fiq_regs)); DEFINE(VCPU_PC, offsetof(struct kvm_vcpu, arch.regs.usr_regs.ARM_pc)); DEFINE(VCPU_CPSR, offsetof(struct kvm_vcpu, arch.regs.usr_regs.ARM_cpsr)); + DEFINE(VCPU_HCR, offsetof(struct kvm_vcpu, arch.hcr)); DEFINE(VCPU_IRQ_LINES, offsetof(struct kvm_vcpu, arch.irq_lines)); DEFINE(VCPU_HSR, offsetof(struct kvm_vcpu, arch.fault.hsr)); DEFINE(VCPU_HxFAR, offsetof(struct kvm_vcpu, arch.fault.hxfar)); diff --git a/arch/arm/kvm/guest.c b/arch/arm/kvm/guest.c index 2786eae10c0d..b23a59c1c522 100644 --- a/arch/arm/kvm/guest.c +++ b/arch/arm/kvm/guest.c @@ -38,6 +38,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = { int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu) { + vcpu->arch.hcr = HCR_GUEST_MASK; return 0; } diff --git a/arch/arm/kvm/interrupts_head.S b/arch/arm/kvm/interrupts_head.S index 6f18695a09cb..a37270d7d4d6 100644 --- a/arch/arm/kvm/interrupts_head.S +++ b/arch/arm/kvm/interrupts_head.S @@ -597,17 +597,14 @@ vcpu .req r0 @ vcpu pointer always in r0 /* Enable/Disable: stage-2 trans., trap interrupts, trap wfi, trap smc */ .macro configure_hyp_role operation - mrc p15, 4, r2, c1, c1, 0 @ HCR - bic r2, r2, #HCR_VIRT_EXCP_MASK - ldr r3, =HCR_GUEST_MASK .if \operation == vmentry - orr r2, r2, r3 + ldr r2, [vcpu, #VCPU_HCR] ldr r3, [vcpu, #VCPU_IRQ_LINES] orr r2, r2, r3 .else - bic r2, r2, r3 + mov r2, #0 .endif - mcr p15, 4, r2, c1, c1, 0 + mcr p15, 4, r2, c1, c1, 0 @ HCR .endm .macro load_vcpu From 6076945ed741ff33724410d0bb9dfa535b475e79 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 22 Jan 2014 10:20:09 +0000 Subject: [PATCH 1231/1368] ARM: KVM: add world-switch for AMAIR{0,1} HCR.TVM traps (among other things) accesses to AMAIR0 and AMAIR1. In order to minimise the amount of surprise a guest could generate by trying to access these registers with caches off, add them to the list of registers we switch/handle. Signed-off-by: Marc Zyngier Reviewed-by: Christoffer Dall Acked-by: Catalin Marinas (cherry picked from commit af20814ee927ed888288d98917a766b4179c4fe0) Signed-off-by: Christoffer Dall --- arch/arm/include/asm/kvm_asm.h | 4 +++- arch/arm/kvm/coproc.c | 6 ++++++ arch/arm/kvm/interrupts_head.S | 12 ++++++++++-- 3 files changed, 19 insertions(+), 3 deletions(-) diff --git a/arch/arm/include/asm/kvm_asm.h b/arch/arm/include/asm/kvm_asm.h index 661da11f76f4..53b3c4a50d5c 100644 --- a/arch/arm/include/asm/kvm_asm.h +++ b/arch/arm/include/asm/kvm_asm.h @@ -48,7 +48,9 @@ #define c13_TID_URO 26 /* Thread ID, User R/O */ #define c13_TID_PRIV 27 /* Thread ID, Privileged */ #define c14_CNTKCTL 28 /* Timer Control Register (PL1) */ -#define NR_CP15_REGS 29 /* Number of regs (incl. invalid) */ +#define c10_AMAIR0 29 /* Auxilary Memory Attribute Indirection Reg0 */ +#define c10_AMAIR1 30 /* Auxilary Memory Attribute Indirection Reg1 */ +#define NR_CP15_REGS 31 /* Number of regs (incl. invalid) */ #define ARM_EXCEPTION_RESET 0 #define ARM_EXCEPTION_UNDEFINED 1 diff --git a/arch/arm/kvm/coproc.c b/arch/arm/kvm/coproc.c index 126c90d18387..a5a54a48d51b 100644 --- a/arch/arm/kvm/coproc.c +++ b/arch/arm/kvm/coproc.c @@ -328,6 +328,12 @@ static const struct coproc_reg cp15_regs[] = { { CRn(10), CRm( 2), Op1( 0), Op2( 1), is32, NULL, reset_unknown, c10_NMRR}, + /* AMAIR0/AMAIR1: swapped by interrupt.S. */ + { CRn(10), CRm( 3), Op1( 0), Op2( 0), is32, + access_vm_reg, reset_unknown, c10_AMAIR0}, + { CRn(10), CRm( 3), Op1( 0), Op2( 1), is32, + access_vm_reg, reset_unknown, c10_AMAIR1}, + /* VBAR: swapped by interrupt.S. */ { CRn(12), CRm( 0), Op1( 0), Op2( 0), is32, NULL, reset_val, c12_VBAR, 0x00000000 }, diff --git a/arch/arm/kvm/interrupts_head.S b/arch/arm/kvm/interrupts_head.S index a37270d7d4d6..76af93025574 100644 --- a/arch/arm/kvm/interrupts_head.S +++ b/arch/arm/kvm/interrupts_head.S @@ -303,13 +303,17 @@ vcpu .req r0 @ vcpu pointer always in r0 mrc p15, 0, r2, c14, c1, 0 @ CNTKCTL mrrc p15, 0, r4, r5, c7 @ PAR + mrc p15, 0, r6, c10, c3, 0 @ AMAIR0 + mrc p15, 0, r7, c10, c3, 1 @ AMAIR1 .if \store_to_vcpu == 0 - push {r2,r4-r5} + push {r2,r4-r7} .else str r2, [vcpu, #CP15_OFFSET(c14_CNTKCTL)] add r12, vcpu, #CP15_OFFSET(c7_PAR) strd r4, r5, [r12] + str r6, [vcpu, #CP15_OFFSET(c10_AMAIR0)] + str r7, [vcpu, #CP15_OFFSET(c10_AMAIR1)] .endif .endm @@ -322,15 +326,19 @@ vcpu .req r0 @ vcpu pointer always in r0 */ .macro write_cp15_state read_from_vcpu .if \read_from_vcpu == 0 - pop {r2,r4-r5} + pop {r2,r4-r7} .else ldr r2, [vcpu, #CP15_OFFSET(c14_CNTKCTL)] add r12, vcpu, #CP15_OFFSET(c7_PAR) ldrd r4, r5, [r12] + ldr r6, [vcpu, #CP15_OFFSET(c10_AMAIR0)] + ldr r7, [vcpu, #CP15_OFFSET(c10_AMAIR1)] .endif mcr p15, 0, r2, c14, c1, 0 @ CNTKCTL mcrr p15, 0, r4, r5, c7 @ PAR + mcr p15, 0, r6, c10, c3, 0 @ AMAIR0 + mcr p15, 0, r7, c10, c3, 1 @ AMAIR1 .if \read_from_vcpu == 0 pop {r2-r12} From 11178dce0398cd8419c639c8ab81357cb4f2bbf0 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 14 Jan 2014 18:00:55 +0000 Subject: [PATCH 1232/1368] ARM: KVM: trap VM system registers until MMU and caches are ON In order to be able to detect the point where the guest enables its MMU and caches, trap all the VM related system registers. Once we see the guest enabling both the MMU and the caches, we can go back to a saner mode of operation, which is to leave these registers in complete control of the guest. Signed-off-by: Marc Zyngier Acked-by: Catalin Marinas Reviewed-by: Christoffer Dall (cherry picked from commit 8034699a42d68043b495c7e0cfafccd920707ec8) Signed-off-by: Christoffer Dall --- arch/arm/include/asm/kvm_arm.h | 3 +- arch/arm/kvm/coproc.c | 74 ++++++++++++++++++++++++++-------- arch/arm/kvm/coproc.h | 4 ++ arch/arm/kvm/coproc_a15.c | 2 +- arch/arm/kvm/coproc_a7.c | 2 +- 5 files changed, 66 insertions(+), 19 deletions(-) diff --git a/arch/arm/include/asm/kvm_arm.h b/arch/arm/include/asm/kvm_arm.h index a843e74a384c..816db0bf2dd8 100644 --- a/arch/arm/include/asm/kvm_arm.h +++ b/arch/arm/include/asm/kvm_arm.h @@ -55,6 +55,7 @@ * The bits we set in HCR: * TAC: Trap ACTLR * TSC: Trap SMC + * TVM: Trap VM ops (until MMU and caches are on) * TSW: Trap cache operations by set/way * TWI: Trap WFI * TWE: Trap WFE @@ -68,7 +69,7 @@ */ #define HCR_GUEST_MASK (HCR_TSC | HCR_TSW | HCR_TWI | HCR_VM | HCR_BSU_IS | \ HCR_FB | HCR_TAC | HCR_AMO | HCR_IMO | HCR_FMO | \ - HCR_TWE | HCR_SWIO | HCR_TIDCP) + HCR_TVM | HCR_TWE | HCR_SWIO | HCR_TIDCP) /* System Control Register (SCTLR) bits */ #define SCTLR_TE (1 << 30) diff --git a/arch/arm/kvm/coproc.c b/arch/arm/kvm/coproc.c index a5a54a48d51b..c58a35116f63 100644 --- a/arch/arm/kvm/coproc.c +++ b/arch/arm/kvm/coproc.c @@ -23,6 +23,7 @@ #include #include #include +#include #include #include #include @@ -204,6 +205,44 @@ static bool access_dcsw(struct kvm_vcpu *vcpu, return true; } +/* + * Generic accessor for VM registers. Only called as long as HCR_TVM + * is set. + */ +static bool access_vm_reg(struct kvm_vcpu *vcpu, + const struct coproc_params *p, + const struct coproc_reg *r) +{ + BUG_ON(!p->is_write); + + vcpu->arch.cp15[r->reg] = *vcpu_reg(vcpu, p->Rt1); + if (p->is_64bit) + vcpu->arch.cp15[r->reg + 1] = *vcpu_reg(vcpu, p->Rt2); + + return true; +} + +/* + * SCTLR accessor. Only called as long as HCR_TVM is set. If the + * guest enables the MMU, we stop trapping the VM sys_regs and leave + * it in complete control of the caches. + * + * Used by the cpu-specific code. + */ +bool access_sctlr(struct kvm_vcpu *vcpu, + const struct coproc_params *p, + const struct coproc_reg *r) +{ + access_vm_reg(vcpu, p, r); + + if (vcpu_has_cache_enabled(vcpu)) { /* MMU+Caches enabled? */ + vcpu->arch.hcr &= ~HCR_TVM; + stage2_flush_vm(vcpu->kvm); + } + + return true; +} + /* * We could trap ID_DFR0 and tell the guest we don't support performance * monitoring. Unfortunately the patch to make the kernel check ID_DFR0 was @@ -261,33 +300,36 @@ static const struct coproc_reg cp15_regs[] = { { CRn( 1), CRm( 0), Op1( 0), Op2( 2), is32, NULL, reset_val, c1_CPACR, 0x00000000 }, - /* TTBR0/TTBR1: swapped by interrupt.S. */ - { CRm64( 2), Op1( 0), is64, NULL, reset_unknown64, c2_TTBR0 }, - { CRm64( 2), Op1( 1), is64, NULL, reset_unknown64, c2_TTBR1 }, - - /* TTBCR: swapped by interrupt.S. */ + /* TTBR0/TTBR1/TTBCR: swapped by interrupt.S. */ + { CRm64( 2), Op1( 0), is64, access_vm_reg, reset_unknown64, c2_TTBR0 }, + { CRn(2), CRm( 0), Op1( 0), Op2( 0), is32, + access_vm_reg, reset_unknown, c2_TTBR0 }, + { CRn(2), CRm( 0), Op1( 0), Op2( 1), is32, + access_vm_reg, reset_unknown, c2_TTBR1 }, { CRn( 2), CRm( 0), Op1( 0), Op2( 2), is32, - NULL, reset_val, c2_TTBCR, 0x00000000 }, + access_vm_reg, reset_val, c2_TTBCR, 0x00000000 }, + { CRm64( 2), Op1( 1), is64, access_vm_reg, reset_unknown64, c2_TTBR1 }, + /* DACR: swapped by interrupt.S. */ { CRn( 3), CRm( 0), Op1( 0), Op2( 0), is32, - NULL, reset_unknown, c3_DACR }, + access_vm_reg, reset_unknown, c3_DACR }, /* DFSR/IFSR/ADFSR/AIFSR: swapped by interrupt.S. */ { CRn( 5), CRm( 0), Op1( 0), Op2( 0), is32, - NULL, reset_unknown, c5_DFSR }, + access_vm_reg, reset_unknown, c5_DFSR }, { CRn( 5), CRm( 0), Op1( 0), Op2( 1), is32, - NULL, reset_unknown, c5_IFSR }, + access_vm_reg, reset_unknown, c5_IFSR }, { CRn( 5), CRm( 1), Op1( 0), Op2( 0), is32, - NULL, reset_unknown, c5_ADFSR }, + access_vm_reg, reset_unknown, c5_ADFSR }, { CRn( 5), CRm( 1), Op1( 0), Op2( 1), is32, - NULL, reset_unknown, c5_AIFSR }, + access_vm_reg, reset_unknown, c5_AIFSR }, /* DFAR/IFAR: swapped by interrupt.S. */ { CRn( 6), CRm( 0), Op1( 0), Op2( 0), is32, - NULL, reset_unknown, c6_DFAR }, + access_vm_reg, reset_unknown, c6_DFAR }, { CRn( 6), CRm( 0), Op1( 0), Op2( 2), is32, - NULL, reset_unknown, c6_IFAR }, + access_vm_reg, reset_unknown, c6_IFAR }, /* PAR swapped by interrupt.S */ { CRm64( 7), Op1( 0), is64, NULL, reset_unknown64, c7_PAR }, @@ -324,9 +366,9 @@ static const struct coproc_reg cp15_regs[] = { /* PRRR/NMRR (aka MAIR0/MAIR1): swapped by interrupt.S. */ { CRn(10), CRm( 2), Op1( 0), Op2( 0), is32, - NULL, reset_unknown, c10_PRRR}, + access_vm_reg, reset_unknown, c10_PRRR}, { CRn(10), CRm( 2), Op1( 0), Op2( 1), is32, - NULL, reset_unknown, c10_NMRR}, + access_vm_reg, reset_unknown, c10_NMRR}, /* AMAIR0/AMAIR1: swapped by interrupt.S. */ { CRn(10), CRm( 3), Op1( 0), Op2( 0), is32, @@ -340,7 +382,7 @@ static const struct coproc_reg cp15_regs[] = { /* CONTEXTIDR/TPIDRURW/TPIDRURO/TPIDRPRW: swapped by interrupt.S. */ { CRn(13), CRm( 0), Op1( 0), Op2( 1), is32, - NULL, reset_val, c13_CID, 0x00000000 }, + access_vm_reg, reset_val, c13_CID, 0x00000000 }, { CRn(13), CRm( 0), Op1( 0), Op2( 2), is32, NULL, reset_unknown, c13_TID_URW }, { CRn(13), CRm( 0), Op1( 0), Op2( 3), is32, diff --git a/arch/arm/kvm/coproc.h b/arch/arm/kvm/coproc.h index 8dda870e84f9..1a44bbe39643 100644 --- a/arch/arm/kvm/coproc.h +++ b/arch/arm/kvm/coproc.h @@ -153,4 +153,8 @@ static inline int cmp_reg(const struct coproc_reg *i1, #define is64 .is_64 = true #define is32 .is_64 = false +bool access_sctlr(struct kvm_vcpu *vcpu, + const struct coproc_params *p, + const struct coproc_reg *r); + #endif /* __ARM_KVM_COPROC_LOCAL_H__ */ diff --git a/arch/arm/kvm/coproc_a15.c b/arch/arm/kvm/coproc_a15.c index bb0cac1410cc..e6f4ae48bda9 100644 --- a/arch/arm/kvm/coproc_a15.c +++ b/arch/arm/kvm/coproc_a15.c @@ -34,7 +34,7 @@ static const struct coproc_reg a15_regs[] = { /* SCTLR: swapped by interrupt.S. */ { CRn( 1), CRm( 0), Op1( 0), Op2( 0), is32, - NULL, reset_val, c1_SCTLR, 0x00C50078 }, + access_sctlr, reset_val, c1_SCTLR, 0x00C50078 }, }; static struct kvm_coproc_target_table a15_target_table = { diff --git a/arch/arm/kvm/coproc_a7.c b/arch/arm/kvm/coproc_a7.c index 1df767331588..17fc7cd479d3 100644 --- a/arch/arm/kvm/coproc_a7.c +++ b/arch/arm/kvm/coproc_a7.c @@ -37,7 +37,7 @@ static const struct coproc_reg a7_regs[] = { /* SCTLR: swapped by interrupt.S. */ { CRn( 1), CRm( 0), Op1( 0), Op2( 0), is32, - NULL, reset_val, c1_SCTLR, 0x00C50878 }, + access_sctlr, reset_val, c1_SCTLR, 0x00C50878 }, }; static struct kvm_coproc_target_table a7_target_table = { From 0ddf276ba9c11c5bd993c7c87226dde6fc5ffa51 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 30 Jan 2014 17:38:33 +0000 Subject: [PATCH 1233/1368] ARM: KVM: fix warning in mmu.c MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Compiling with THP enabled leads to the following warning: arch/arm/kvm/mmu.c: In function ‘unmap_range’: arch/arm/kvm/mmu.c:177:39: warning: ‘pte’ may be used uninitialized in this function [-Wmaybe-uninitialized] if (kvm_pmd_huge(*pmd) || page_empty(pte)) { ^ Code inspection reveals that these two cases are mutually exclusive, so GCC is a bit overzealous here. Silence it anyway by initializing pte to NULL and testing it later on. Signed-off-by: Marc Zyngier Acked-by: Christoffer Dall (cherry picked from commit 56041bf920d2937b7cadcb30cb206f0372eee814) Signed-off-by: Christoffer Dall --- arch/arm/kvm/mmu.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c index d7e998c6a08f..80bb1e6c2c29 100644 --- a/arch/arm/kvm/mmu.c +++ b/arch/arm/kvm/mmu.c @@ -144,6 +144,7 @@ static void unmap_range(struct kvm *kvm, pgd_t *pgdp, while (addr < end) { pgd = pgdp + pgd_index(addr); pud = pud_offset(pgd, addr); + pte = NULL; if (pud_none(*pud)) { addr = kvm_pud_addr_end(addr, end); continue; @@ -174,7 +175,7 @@ static void unmap_range(struct kvm *kvm, pgd_t *pgdp, /* * If the pmd entry is to be cleared, walk back up the ladder */ - if (kvm_pmd_huge(*pmd) || page_empty(pte)) { + if (kvm_pmd_huge(*pmd) || (pte && page_empty(pte))) { clear_pmd_entry(kvm, pmd, addr); next = kvm_pmd_addr_end(addr, end); if (page_empty(pmd) && !page_empty(pud)) { From 500d9e60bb1f98d24a2780542a68d6a1ffc32315 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 6 Mar 2014 03:30:46 +0000 Subject: [PATCH 1234/1368] ARM: KVM: fix non-VGIC compilation Add a stub for kvm_vgic_addr when compiling without CONFIG_KVM_ARM_VGIC. The usefulness of this configurarion is extremely doubtful, but let's fix it anyway (until we decide that we'll always support a VGIC). Reported-by: Michele Paolino Cc: Paolo Bonzini Cc: Christoffer Dall Signed-off-by: Marc Zyngier Signed-off-by: Paolo Bonzini (cherry picked from commit 6cbde8253a8143ada18ec0d1711230747a7c1934) Signed-off-by: Christoffer Dall --- include/kvm/arm_vgic.h | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h index be85127bfed3..f27000f55a83 100644 --- a/include/kvm/arm_vgic.h +++ b/include/kvm/arm_vgic.h @@ -171,6 +171,11 @@ static inline int kvm_vgic_set_addr(struct kvm *kvm, unsigned long type, u64 add return 0; } +static inline int kvm_vgic_addr(struct kvm *kvm, unsigned long type, u64 *addr, bool write) +{ + return -ENXIO; +} + static inline int kvm_vgic_init(struct kvm *kvm) { return 0; From 1fd8c219ef68ef56a4a0b0effa66c38e409697eb Mon Sep 17 00:00:00 2001 From: Radha Mohan Chintakuntla Date: Fri, 7 Mar 2014 08:49:25 +0000 Subject: [PATCH 1235/1368] arm64: Add boot time configuration of Intermediate Physical Address size ARMv8 supports a range of physical address bit sizes. The PARange bits from ID_AA64MMFR0_EL1 register are read during boot-time and the intermediate physical address size bits are written in the translation control registers (TCR_EL1 and VTCR_EL2). There is no change in the VA bits and levels of translation. Signed-off-by: Radha Mohan Chintakuntla Reviewed-by: Will Deacon Acked-by: Marc Zyngier Signed-off-by: Catalin Marinas (cherry picked from commit 87366d8cf7b3f6dc34633938aa8766e5a390ce33) Signed-off-by: Christoffer Dall --- arch/arm64/include/asm/kvm_arm.h | 15 ++++++--------- arch/arm64/include/asm/pgtable-hwdef.h | 5 ++--- arch/arm64/kvm/hyp-init.S | 6 ++++++ arch/arm64/mm/proc.S | 8 +++++++- 4 files changed, 21 insertions(+), 13 deletions(-) diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h index 00fbaa75dc7b..3d6903006a8a 100644 --- a/arch/arm64/include/asm/kvm_arm.h +++ b/arch/arm64/include/asm/kvm_arm.h @@ -107,7 +107,6 @@ /* VTCR_EL2 Registers bits */ #define VTCR_EL2_PS_MASK (7 << 16) -#define VTCR_EL2_PS_40B (2 << 16) #define VTCR_EL2_TG0_MASK (1 << 14) #define VTCR_EL2_TG0_4K (0 << 14) #define VTCR_EL2_TG0_64K (1 << 14) @@ -130,10 +129,9 @@ * 64kB pages (TG0 = 1) * 2 level page tables (SL = 1) */ -#define VTCR_EL2_FLAGS (VTCR_EL2_PS_40B | VTCR_EL2_TG0_64K | \ - VTCR_EL2_SH0_INNER | VTCR_EL2_ORGN0_WBWA | \ - VTCR_EL2_IRGN0_WBWA | VTCR_EL2_SL0_LVL1 | \ - VTCR_EL2_T0SZ_40B) +#define VTCR_EL2_FLAGS (VTCR_EL2_TG0_64K | VTCR_EL2_SH0_INNER | \ + VTCR_EL2_ORGN0_WBWA | VTCR_EL2_IRGN0_WBWA | \ + VTCR_EL2_SL0_LVL1 | VTCR_EL2_T0SZ_40B) #define VTTBR_X (38 - VTCR_EL2_T0SZ_40B) #else /* @@ -143,10 +141,9 @@ * 4kB pages (TG0 = 0) * 3 level page tables (SL = 1) */ -#define VTCR_EL2_FLAGS (VTCR_EL2_PS_40B | VTCR_EL2_TG0_4K | \ - VTCR_EL2_SH0_INNER | VTCR_EL2_ORGN0_WBWA | \ - VTCR_EL2_IRGN0_WBWA | VTCR_EL2_SL0_LVL1 | \ - VTCR_EL2_T0SZ_40B) +#define VTCR_EL2_FLAGS (VTCR_EL2_TG0_4K | VTCR_EL2_SH0_INNER | \ + VTCR_EL2_ORGN0_WBWA | VTCR_EL2_IRGN0_WBWA | \ + VTCR_EL2_SL0_LVL1 | VTCR_EL2_T0SZ_40B) #define VTTBR_X (37 - VTCR_EL2_T0SZ_40B) #endif diff --git a/arch/arm64/include/asm/pgtable-hwdef.h b/arch/arm64/include/asm/pgtable-hwdef.h index b1d2e26c3c88..f7af66b54cb2 100644 --- a/arch/arm64/include/asm/pgtable-hwdef.h +++ b/arch/arm64/include/asm/pgtable-hwdef.h @@ -100,9 +100,9 @@ #define PTE_HYP PTE_USER /* - * 40-bit physical address supported. + * Highest possible physical address supported. */ -#define PHYS_MASK_SHIFT (40) +#define PHYS_MASK_SHIFT (48) #define PHYS_MASK ((UL(1) << PHYS_MASK_SHIFT) - 1) /* @@ -122,7 +122,6 @@ #define TCR_SHARED ((UL(3) << 12) | (UL(3) << 28)) #define TCR_TG0_64K (UL(1) << 14) #define TCR_TG1_64K (UL(1) << 30) -#define TCR_IPS_40BIT (UL(2) << 32) #define TCR_ASID16 (UL(1) << 36) #define TCR_TBI0 (UL(1) << 37) diff --git a/arch/arm64/kvm/hyp-init.S b/arch/arm64/kvm/hyp-init.S index 2b0244d65c16..d968796f4b2d 100644 --- a/arch/arm64/kvm/hyp-init.S +++ b/arch/arm64/kvm/hyp-init.S @@ -68,6 +68,12 @@ __do_hyp_init: msr tcr_el2, x4 ldr x4, =VTCR_EL2_FLAGS + /* + * Read the PARange bits from ID_AA64MMFR0_EL1 and set the PS bits in + * VTCR_EL2. + */ + mrs x5, ID_AA64MMFR0_EL1 + bfi x4, x5, #16, #3 msr vtcr_el2, x4 mrs x4, mair_el1 diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S index 8e0158f198d7..55a40f6dbf78 100644 --- a/arch/arm64/mm/proc.S +++ b/arch/arm64/mm/proc.S @@ -140,8 +140,14 @@ ENTRY(__cpu_setup) * Set/prepare TCR and TTBR. We use 512GB (39-bit) address range for * both user and kernel. */ - ldr x10, =TCR_TxSZ(VA_BITS) | TCR_FLAGS | TCR_IPS_40BIT | \ + ldr x10, =TCR_TxSZ(VA_BITS) | TCR_FLAGS | \ TCR_ASID16 | TCR_TBI0 | (1 << 31) + /* + * Read the PARange bits from ID_AA64MMFR0_EL1 and set the IPS bits in + * TCR_EL1. + */ + mrs x9, ID_AA64MMFR0_EL1 + bfi x10, x9, #32, #3 #ifdef CONFIG_ARM64_64K_PAGES orr x10, x10, TCR_TG0_64K orr x10, x10, TCR_TG1_64K From 7fe3c71dbc4c635cc5e4b527b748749fc771eb57 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Mon, 7 Apr 2014 01:36:08 +0800 Subject: [PATCH 1236/1368] arm, kvm: fix double lock on cpu_add_remove_lock Commit 8146875de7d4 (arm, kvm: Fix CPU hotplug callback registration) holds the lock before calling the two functions: kvm_vgic_hyp_init() kvm_timer_hyp_init() and both the two functions are calling register_cpu_notifier() to register cpu notifier, so cause double lock on cpu_add_remove_lock. Considered that both two functions are only called inside kvm_arch_init() with holding cpu_add_remove_lock, so simply use __register_cpu_notifier() to fix the problem. Fixes: 8146875de7d4 (arm, kvm: Fix CPU hotplug callback registration) Signed-off-by: Ming Lei Reviewed-by: Srivatsa S. Bhat Signed-off-by: Rafael J. Wysocki (cherry picked from commit 553f809e23f00976caea7a1ebdabaa58a6383e7d) Signed-off-by: Christoffer Dall --- virt/kvm/arm/arch_timer.c | 2 +- virt/kvm/arm/vgic.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/virt/kvm/arm/arch_timer.c b/virt/kvm/arm/arch_timer.c index 5081e809821f..22fa819a9b6a 100644 --- a/virt/kvm/arm/arch_timer.c +++ b/virt/kvm/arm/arch_timer.c @@ -277,7 +277,7 @@ int kvm_timer_hyp_init(void) host_vtimer_irq = ppi; - err = register_cpu_notifier(&kvm_timer_cpu_nb); + err = __register_cpu_notifier(&kvm_timer_cpu_nb); if (err) { kvm_err("Cannot register timer CPU notifier\n"); goto out_free; diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c index 8ca405cd7c1a..47b29834a6b6 100644 --- a/virt/kvm/arm/vgic.c +++ b/virt/kvm/arm/vgic.c @@ -1496,7 +1496,7 @@ int kvm_vgic_hyp_init(void) goto out; } - ret = register_cpu_notifier(&vgic_cpu_nb); + ret = __register_cpu_notifier(&vgic_cpu_nb); if (ret) { kvm_err("Cannot register vgic CPU notifier\n"); goto out_free_irq; From 9b937709747526fc1fcac118d605705f78424d3c Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Wed, 1 Jan 2014 16:09:21 +0100 Subject: [PATCH 1237/1368] KVM: add kvm_is_error_gpa() helper It's quite common (in the s390 guest access code) to test if a guest physical address points to a valid guest memory area or not. So add a simple helper function in common code, since this might be of interest for other architectures as well. Signed-off-by: Heiko Carstens Reviewed-by: Thomas Huth Reviewed-by: Cornelia Huck Signed-off-by: Christian Borntraeger (cherry picked from commit dfeec843fb237d73947e818f961e8d6f0df22b01) Signed-off-by: Christoffer Dall --- include/linux/kvm_host.h | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index f6801d10e04c..eef946f92c0c 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -840,6 +840,13 @@ static inline hpa_t pfn_to_hpa(pfn_t pfn) return (hpa_t)pfn << PAGE_SHIFT; } +static inline bool kvm_is_error_gpa(struct kvm *kvm, gpa_t gpa) +{ + unsigned long hva = gfn_to_hva(kvm, gpa_to_gfn(gpa)); + + return kvm_is_error_hva(hva); +} + static inline void kvm_migrate_timers(struct kvm_vcpu *vcpu) { set_bit(KVM_REQ_MIGRATE_TIMER, &vcpu->requests); From d80e22fea4940bd2ad63648a941620b6fccd59a3 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Fri, 25 Apr 2014 11:46:04 +0100 Subject: [PATCH 1238/1368] ARM: KVM: disable KVM in Kconfig on big-endian systems KVM currently crashes and burns on big-endian hosts, so don't allow it to be selected until we've got that fixed. Cc: Marc Zyngier Signed-off-by: Will Deacon Signed-off-by: Christoffer Dall (cherry picked from commit 4e4468fac4381b92eb333d94256e7fb8350f3de3) Signed-off-by: Christoffer Dall --- arch/arm/kvm/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/kvm/Kconfig b/arch/arm/kvm/Kconfig index 466bd299b1a8..4be5bb150bdd 100644 --- a/arch/arm/kvm/Kconfig +++ b/arch/arm/kvm/Kconfig @@ -23,7 +23,7 @@ config KVM select HAVE_KVM_CPU_RELAX_INTERCEPT select KVM_MMIO select KVM_ARM_HOST - depends on ARM_VIRT_EXT && ARM_LPAE + depends on ARM_VIRT_EXT && ARM_LPAE && !CPU_BIG_ENDIAN ---help--- Support hosting virtualized guest machines. You will also need to select one or more of the processor modules below. From 16b80f7131f5d763118811815439c82e4ac45e50 Mon Sep 17 00:00:00 2001 From: Mark Salter Date: Fri, 28 Mar 2014 14:25:19 +0000 Subject: [PATCH 1239/1368] arm: KVM: fix possible misalignment of PGDs and bounce page The kvm/mmu code shared by arm and arm64 uses kalloc() to allocate a bounce page (if hypervisor init code crosses page boundary) and hypervisor PGDs. The problem is that kalloc() does not guarantee the proper alignment. In the case of the bounce page, the page sized buffer allocated may also cross a page boundary negating the purpose and leading to a hang during kvm initialization. Likewise the PGDs allocated may not meet the minimum alignment requirements of the underlying MMU. This patch uses __get_free_page() to guarantee the worst case alignment needs of the bounce page and PGDs on both arm and arm64. Cc: # 3.10+ Signed-off-by: Mark Salter Acked-by: Marc Zyngier Signed-off-by: Christoffer Dall (cherry picked from commit 5d4e08c45a6cf8f1ab3c7fa375007635ac569165) Signed-off-by: Christoffer Dall --- arch/arm/kvm/mmu.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c index 80bb1e6c2c29..16f804938b8f 100644 --- a/arch/arm/kvm/mmu.c +++ b/arch/arm/kvm/mmu.c @@ -42,6 +42,8 @@ static unsigned long hyp_idmap_start; static unsigned long hyp_idmap_end; static phys_addr_t hyp_idmap_vector; +#define pgd_order get_order(PTRS_PER_PGD * sizeof(pgd_t)) + #define kvm_pmd_huge(_x) (pmd_huge(_x) || pmd_trans_huge(_x)) static void kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa) @@ -293,14 +295,14 @@ void free_boot_hyp_pgd(void) if (boot_hyp_pgd) { unmap_range(NULL, boot_hyp_pgd, hyp_idmap_start, PAGE_SIZE); unmap_range(NULL, boot_hyp_pgd, TRAMPOLINE_VA, PAGE_SIZE); - kfree(boot_hyp_pgd); + free_pages((unsigned long)boot_hyp_pgd, pgd_order); boot_hyp_pgd = NULL; } if (hyp_pgd) unmap_range(NULL, hyp_pgd, TRAMPOLINE_VA, PAGE_SIZE); - kfree(init_bounce_page); + free_page((unsigned long)init_bounce_page); init_bounce_page = NULL; mutex_unlock(&kvm_hyp_pgd_mutex); @@ -330,7 +332,7 @@ void free_hyp_pgds(void) for (addr = VMALLOC_START; is_vmalloc_addr((void*)addr); addr += PGDIR_SIZE) unmap_range(NULL, hyp_pgd, KERN_TO_HYP(addr), PGDIR_SIZE); - kfree(hyp_pgd); + free_pages((unsigned long)hyp_pgd, pgd_order); hyp_pgd = NULL; } @@ -1024,7 +1026,7 @@ int kvm_mmu_init(void) size_t len = __hyp_idmap_text_end - __hyp_idmap_text_start; phys_addr_t phys_base; - init_bounce_page = kmalloc(PAGE_SIZE, GFP_KERNEL); + init_bounce_page = (void *)__get_free_page(GFP_KERNEL); if (!init_bounce_page) { kvm_err("Couldn't allocate HYP init bounce page\n"); err = -ENOMEM; @@ -1050,8 +1052,9 @@ int kvm_mmu_init(void) (unsigned long)phys_base); } - hyp_pgd = kzalloc(PTRS_PER_PGD * sizeof(pgd_t), GFP_KERNEL); - boot_hyp_pgd = kzalloc(PTRS_PER_PGD * sizeof(pgd_t), GFP_KERNEL); + hyp_pgd = (pgd_t *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, pgd_order); + boot_hyp_pgd = (pgd_t *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, pgd_order); + if (!hyp_pgd || !boot_hyp_pgd) { kvm_err("Hyp mode PGD not allocated\n"); err = -ENOMEM; From 738a77ae39541495f4d5396ed093bccdebc589f1 Mon Sep 17 00:00:00 2001 From: Haibin Wang Date: Thu, 10 Apr 2014 13:14:32 +0100 Subject: [PATCH 1240/1368] KVM: ARM: vgic: Fix sgi dispatch problem When dispatch SGI(mode == 0), that is the vcpu of VM should send sgi to the cpu which the target_cpus list. So, there must add the "break" to branch of case 0. Cc: # 3.10+ Signed-off-by: Haibin Wang Acked-by: Marc Zyngier Signed-off-by: Christoffer Dall (cherry picked from commit 91021a6c8ffdc55804dab5acdfc7de4f278b9ac3) Signed-off-by: Christoffer Dall --- virt/kvm/arm/vgic.c | 1 + 1 file changed, 1 insertion(+) diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c index 47b29834a6b6..7e8b44efb739 100644 --- a/virt/kvm/arm/vgic.c +++ b/virt/kvm/arm/vgic.c @@ -916,6 +916,7 @@ static void vgic_dispatch_sgi(struct kvm_vcpu *vcpu, u32 reg) case 0: if (!target_cpus) return; + break; case 1: target_cpus = ((1 << nrcpus) - 1) & ~(1 << vcpu_id) & 0xff; From 6e349e33cec7724f9ae0520a1539ab7cf1d24893 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Mon, 21 Apr 2014 15:26:01 +0200 Subject: [PATCH 1241/1368] KVM: async_pf: mm->mm_users can not pin apf->mm get_user_pages(mm) is simply wrong if mm->mm_users == 0 and exit_mmap/etc was already called (or is in progress), mm->mm_count can only pin mm->pgd and mm_struct itself. Change kvm_setup_async_pf/async_pf_execute to inc/dec mm->mm_users. kvm_create_vm/kvm_destroy_vm play with ->mm_count too but this case looks fine at first glance, it seems that this ->mm is only used to verify that current->mm == kvm->mm. Signed-off-by: Oleg Nesterov Cc: stable@vger.kernel.org Signed-off-by: Paolo Bonzini (cherry picked from commit 41c22f626254b9dc0376928cae009e73d1b6a49a) Signed-off-by: Christoffer Dall --- virt/kvm/async_pf.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/virt/kvm/async_pf.c b/virt/kvm/async_pf.c index 10df100c4514..06e6401d6ef4 100644 --- a/virt/kvm/async_pf.c +++ b/virt/kvm/async_pf.c @@ -101,7 +101,7 @@ static void async_pf_execute(struct work_struct *work) if (waitqueue_active(&vcpu->wq)) wake_up_interruptible(&vcpu->wq); - mmdrop(mm); + mmput(mm); kvm_put_kvm(vcpu->kvm); } @@ -118,7 +118,7 @@ void kvm_clear_async_pf_completion_queue(struct kvm_vcpu *vcpu) flush_work(&work->work); #else if (cancel_work_sync(&work->work)) { - mmdrop(work->mm); + mmput(work->mm); kvm_put_kvm(vcpu->kvm); /* == work->vcpu->kvm */ kmem_cache_free(async_pf_cache, work); } @@ -183,7 +183,7 @@ int kvm_setup_async_pf(struct kvm_vcpu *vcpu, gva_t gva, unsigned long hva, work->addr = hva; work->arch = *arch; work->mm = current->mm; - atomic_inc(&work->mm->mm_count); + atomic_inc(&work->mm->mm_users); kvm_get_kvm(work->vcpu->kvm); /* this can't really happen otherwise gfn_to_pfn_async @@ -201,7 +201,7 @@ int kvm_setup_async_pf(struct kvm_vcpu *vcpu, gva_t gva, unsigned long hva, return 1; retry_sync: kvm_put_kvm(work->vcpu->kvm); - mmdrop(work->mm); + mmput(work->mm); kmem_cache_free(async_pf_cache, work); return 0; } From bc38de2312a2a078f2d5ad596888a131d811b143 Mon Sep 17 00:00:00 2001 From: Andre Przywara Date: Fri, 11 Apr 2014 00:07:18 +0200 Subject: [PATCH 1242/1368] KVM: arm/arm64: vgic: fix GICD_ICFGR register accesses Since KVM internally represents the ICFGR registers by stuffing two of them into one word, the offset for accessing the internal representation and the one for the MMIO based access are different. So keep the original offset around, but adjust the internal array offset by one bit. Reported-by: Haibin Wang Signed-off-by: Andre Przywara Signed-off-by: Marc Zyngier Signed-off-by: Christoffer Dall (cherry picked from commit f2ae85b2ab3776b9e4e42e5b6fa090f40d396794) Signed-off-by: Christoffer Dall --- virt/kvm/arm/vgic.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c index 7e8b44efb739..f9af48c9eb37 100644 --- a/virt/kvm/arm/vgic.c +++ b/virt/kvm/arm/vgic.c @@ -548,11 +548,10 @@ static bool handle_mmio_cfg_reg(struct kvm_vcpu *vcpu, u32 val; u32 *reg; - offset >>= 1; reg = vgic_bitmap_get_reg(&vcpu->kvm->arch.vgic.irq_cfg, - vcpu->vcpu_id, offset); + vcpu->vcpu_id, offset >> 1); - if (offset & 2) + if (offset & 4) val = *reg >> 16; else val = *reg & 0xffff; @@ -561,13 +560,13 @@ static bool handle_mmio_cfg_reg(struct kvm_vcpu *vcpu, vgic_reg_access(mmio, &val, offset, ACCESS_READ_VALUE | ACCESS_WRITE_VALUE); if (mmio->is_write) { - if (offset < 4) { + if (offset < 8) { *reg = ~0U; /* Force PPIs/SGIs to 1 */ return false; } val = vgic_cfg_compress(val); - if (offset & 2) { + if (offset & 4) { *reg &= 0xffff; *reg |= val << 16; } else { From 2a1992b43b4e9f453e77e62740181ef665426c93 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Mon, 21 Apr 2014 15:25:58 +0200 Subject: [PATCH 1243/1368] KVM: async_pf: kill the unnecessary use_mm/unuse_mm async_pf_execute() async_pf_execute() has no reasons to adopt apf->mm, gup(current, mm) should work just fine even if current has another or NULL ->mm. Recently kvm_async_page_present_sync() was added insedie the "use_mm" section, but it seems that it doesn't need current->mm too. Signed-off-by: Oleg Nesterov Reviewed-by: Andrea Arcangeli Signed-off-by: Paolo Bonzini (cherry picked from commit d72d946d0b649b79709b99b9d5cb7269fff8afaa) Signed-off-by: Christoffer Dall --- virt/kvm/async_pf.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/virt/kvm/async_pf.c b/virt/kvm/async_pf.c index 06e6401d6ef4..cda703e512d3 100644 --- a/virt/kvm/async_pf.c +++ b/virt/kvm/async_pf.c @@ -80,12 +80,10 @@ static void async_pf_execute(struct work_struct *work) might_sleep(); - use_mm(mm); down_read(&mm->mmap_sem); get_user_pages(current, mm, addr, 1, 1, 0, NULL, NULL); up_read(&mm->mmap_sem); kvm_async_page_present_sync(vcpu, apf); - unuse_mm(mm); spin_lock(&vcpu->async_pf.lock); list_add_tail(&apf->link, &vcpu->async_pf.done); From 4c158ade8e722cd59ba1a6d8ca46be8427c8ac90 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Mon, 28 Apr 2014 17:03:00 +0200 Subject: [PATCH 1244/1368] KVM: async_pf: change async_pf_execute() to use get_user_pages(tsk => NULL) async_pf_execute() passes tsk == current to gup(), this is doesn't hurt but unnecessary and misleading. "tsk" is only used to account the number of faults and current is the random workqueue thread. Signed-off-by: Oleg Nesterov Suggested-by: Andrea Arcangeli Signed-off-by: Paolo Bonzini (cherry picked from commit e9545b9f8aeb63e05818e4b3250057260bc072aa) Signed-off-by: Christoffer Dall --- virt/kvm/async_pf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/virt/kvm/async_pf.c b/virt/kvm/async_pf.c index cda703e512d3..d6a3d0993d88 100644 --- a/virt/kvm/async_pf.c +++ b/virt/kvm/async_pf.c @@ -81,7 +81,7 @@ static void async_pf_execute(struct work_struct *work) might_sleep(); down_read(&mm->mmap_sem); - get_user_pages(current, mm, addr, 1, 1, 0, NULL, NULL); + get_user_pages(NULL, mm, addr, 1, 1, 0, NULL, NULL); up_read(&mm->mmap_sem); kvm_async_page_present_sync(vcpu, apf); From de055bd5eb6f2daf69c81f6f62241f0b977a2ae4 Mon Sep 17 00:00:00 2001 From: Haibin Wang Date: Tue, 29 Apr 2014 14:49:17 +0800 Subject: [PATCH 1245/1368] KVM: ARM: vgic: Fix the overlap check action about setting the GICD & GICC base address. Currently below check in vgic_ioaddr_overlap will always succeed, because the vgic dist base and vgic cpu base are still kept UNDEF after initialization. The code as follows will be return forever. if (IS_VGIC_ADDR_UNDEF(dist) || IS_VGIC_ADDR_UNDEF(cpu)) return 0; So, before invoking the vgic_ioaddr_overlap, it needs to set the corresponding base address firstly. Signed-off-by: Haibin Wang Acked-by: Marc Zyngier Signed-off-by: Christoffer Dall (cherry picked from commit 30c2117085bc4e05d091cee6eba79f069b41a9cd) Signed-off-by: Christoffer Dall --- virt/kvm/arm/vgic.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c index f9af48c9eb37..56ff9bebb577 100644 --- a/virt/kvm/arm/vgic.c +++ b/virt/kvm/arm/vgic.c @@ -1667,10 +1667,11 @@ static int vgic_ioaddr_assign(struct kvm *kvm, phys_addr_t *ioaddr, if (addr + size < addr) return -EINVAL; + *ioaddr = addr; ret = vgic_ioaddr_overlap(kvm); if (ret) - return ret; - *ioaddr = addr; + *ioaddr = VGIC_ADDR_UNDEF; + return ret; } From 7d46aca1cf97c1ff323d799fd82ffd49bc294bd1 Mon Sep 17 00:00:00 2001 From: Anup Patel Date: Tue, 29 Apr 2014 11:24:14 +0530 Subject: [PATCH 1246/1368] KVM: Add capability to advertise PSCI v0.2 support User space (i.e. QEMU or KVMTOOL) should be able to check whether KVM ARM/ARM64 supports in-kernel PSCI v0.2 emulation. For this purpose, we define KVM_CAP_ARM_PSCI_0_2 in KVM user space interface header. Signed-off-by: Anup Patel Signed-off-by: Pranavkumar Sawargaonkar Acked-by: Christoffer Dall Acked-by: Marc Zyngier Signed-off-by: Christoffer Dall (cherry picked from commit 717abd208dff75b343243aa5ed688f62190dda5e) Signed-off-by: Christoffer Dall --- include/uapi/linux/kvm.h | 1 + 1 file changed, 1 insertion(+) diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 56347ab54cf7..cb7ebcc7a9db 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -669,6 +669,7 @@ struct kvm_ppc_smmu_info { #define KVM_CAP_IRQ_XICS 92 #define KVM_CAP_ARM_EL1_32BIT 93 #define KVM_CAP_EXT_EMUL_CPUID 95 +#define KVM_CAP_ARM_PSCI_0_2 102 #ifdef KVM_CAP_IRQ_ROUTING From 041d3f918f8daf3715fc87f1516c40466f94e6dd Mon Sep 17 00:00:00 2001 From: Anup Patel Date: Tue, 29 Apr 2014 11:24:15 +0530 Subject: [PATCH 1247/1368] ARM/ARM64: KVM: Add common header for PSCI related defines We need a common place to share PSCI related defines among ARM kernel, ARM64 kernel, KVM ARM/ARM64 PSCI emulation, and user space. We introduce uapi/linux/psci.h for this purpose. This newly added header will be first used by KVM ARM/ARM64 in-kernel PSCI emulation and user space (i.e. QEMU or KVMTOOL). Signed-off-by: Anup Patel Signed-off-by: Pranavkumar Sawargaonkar Signed-off-by: Ashwin Chaugule Signed-off-by: Christoffer Dall (cherry picked from commit e546eea74ec66698e29c583639cf6e2a11e46490) Signed-off-by: Christoffer Dall --- include/uapi/linux/Kbuild | 1 + include/uapi/linux/psci.h | 90 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 91 insertions(+) create mode 100644 include/uapi/linux/psci.h diff --git a/include/uapi/linux/Kbuild b/include/uapi/linux/Kbuild index bdc6e87ff3eb..405887bec8b3 100644 --- a/include/uapi/linux/Kbuild +++ b/include/uapi/linux/Kbuild @@ -311,6 +311,7 @@ header-y += ppp-ioctl.h header-y += ppp_defs.h header-y += pps.h header-y += prctl.h +header-y += psci.h header-y += ptp_clock.h header-y += ptrace.h header-y += qnx4_fs.h diff --git a/include/uapi/linux/psci.h b/include/uapi/linux/psci.h new file mode 100644 index 000000000000..310d83e0a91b --- /dev/null +++ b/include/uapi/linux/psci.h @@ -0,0 +1,90 @@ +/* + * ARM Power State and Coordination Interface (PSCI) header + * + * This header holds common PSCI defines and macros shared + * by: ARM kernel, ARM64 kernel, KVM ARM/ARM64 and user space. + * + * Copyright (C) 2014 Linaro Ltd. + * Author: Anup Patel + */ + +#ifndef _UAPI_LINUX_PSCI_H +#define _UAPI_LINUX_PSCI_H + +/* + * PSCI v0.1 interface + * + * The PSCI v0.1 function numbers are implementation defined. + * + * Only PSCI return values such as: SUCCESS, NOT_SUPPORTED, + * INVALID_PARAMS, and DENIED defined below are applicable + * to PSCI v0.1. + */ + +/* PSCI v0.2 interface */ +#define PSCI_0_2_FN_BASE 0x84000000 +#define PSCI_0_2_FN(n) (PSCI_0_2_FN_BASE + (n)) +#define PSCI_0_2_64BIT 0x40000000 +#define PSCI_0_2_FN64_BASE \ + (PSCI_0_2_FN_BASE + PSCI_0_2_64BIT) +#define PSCI_0_2_FN64(n) (PSCI_0_2_FN64_BASE + (n)) + +#define PSCI_0_2_FN_PSCI_VERSION PSCI_0_2_FN(0) +#define PSCI_0_2_FN_CPU_SUSPEND PSCI_0_2_FN(1) +#define PSCI_0_2_FN_CPU_OFF PSCI_0_2_FN(2) +#define PSCI_0_2_FN_CPU_ON PSCI_0_2_FN(3) +#define PSCI_0_2_FN_AFFINITY_INFO PSCI_0_2_FN(4) +#define PSCI_0_2_FN_MIGRATE PSCI_0_2_FN(5) +#define PSCI_0_2_FN_MIGRATE_INFO_TYPE PSCI_0_2_FN(6) +#define PSCI_0_2_FN_MIGRATE_INFO_UP_CPU PSCI_0_2_FN(7) +#define PSCI_0_2_FN_SYSTEM_OFF PSCI_0_2_FN(8) +#define PSCI_0_2_FN_SYSTEM_RESET PSCI_0_2_FN(9) + +#define PSCI_0_2_FN64_CPU_SUSPEND PSCI_0_2_FN64(1) +#define PSCI_0_2_FN64_CPU_ON PSCI_0_2_FN64(3) +#define PSCI_0_2_FN64_AFFINITY_INFO PSCI_0_2_FN64(4) +#define PSCI_0_2_FN64_MIGRATE PSCI_0_2_FN64(5) +#define PSCI_0_2_FN64_MIGRATE_INFO_UP_CPU PSCI_0_2_FN64(7) + +/* PSCI v0.2 power state encoding for CPU_SUSPEND function */ +#define PSCI_0_2_POWER_STATE_ID_MASK 0xffff +#define PSCI_0_2_POWER_STATE_ID_SHIFT 0 +#define PSCI_0_2_POWER_STATE_TYPE_SHIFT 16 +#define PSCI_0_2_POWER_STATE_TYPE_MASK \ + (0x1 << PSCI_0_2_POWER_STATE_TYPE_SHIFT) +#define PSCI_0_2_POWER_STATE_AFFL_SHIFT 24 +#define PSCI_0_2_POWER_STATE_AFFL_MASK \ + (0x3 << PSCI_0_2_POWER_STATE_AFFL_SHIFT) + +/* PSCI v0.2 affinity level state returned by AFFINITY_INFO */ +#define PSCI_0_2_AFFINITY_LEVEL_ON 0 +#define PSCI_0_2_AFFINITY_LEVEL_OFF 1 +#define PSCI_0_2_AFFINITY_LEVEL_ON_PENDING 2 + +/* PSCI v0.2 multicore support in Trusted OS returned by MIGRATE_INFO_TYPE */ +#define PSCI_0_2_TOS_UP_MIGRATE 0 +#define PSCI_0_2_TOS_UP_NO_MIGRATE 1 +#define PSCI_0_2_TOS_MP 2 + +/* PSCI version decoding (independent of PSCI version) */ +#define PSCI_VERSION_MAJOR_SHIFT 16 +#define PSCI_VERSION_MINOR_MASK \ + ((1U << PSCI_VERSION_MAJOR_SHIFT) - 1) +#define PSCI_VERSION_MAJOR_MASK ~PSCI_VERSION_MINOR_MASK +#define PSCI_VERSION_MAJOR(ver) \ + (((ver) & PSCI_VERSION_MAJOR_MASK) >> PSCI_VERSION_MAJOR_SHIFT) +#define PSCI_VERSION_MINOR(ver) \ + ((ver) & PSCI_VERSION_MINOR_MASK) + +/* PSCI return values (inclusive of all PSCI versions) */ +#define PSCI_RET_SUCCESS 0 +#define PSCI_RET_NOT_SUPPORTED -1 +#define PSCI_RET_INVALID_PARAMS -2 +#define PSCI_RET_DENIED -3 +#define PSCI_RET_ALREADY_ON -4 +#define PSCI_RET_ON_PENDING -5 +#define PSCI_RET_INTERNAL_FAILURE -6 +#define PSCI_RET_NOT_PRESENT -7 +#define PSCI_RET_DISABLED -8 + +#endif /* _UAPI_LINUX_PSCI_H */ From 4ccf6abe18fc14863159b2c51a4958214b4c13dc Mon Sep 17 00:00:00 2001 From: Anup Patel Date: Tue, 29 Apr 2014 11:24:16 +0530 Subject: [PATCH 1248/1368] ARM/ARM64: KVM: Add base for PSCI v0.2 emulation Currently, the in-kernel PSCI emulation provides PSCI v0.1 interface to VCPUs. This patch extends current in-kernel PSCI emulation to provide PSCI v0.2 interface to VCPUs. By default, ARM/ARM64 KVM will always provide PSCI v0.1 interface for keeping the ABI backward-compatible. To select PSCI v0.2 interface for VCPUs, the user space (i.e. QEMU or KVMTOOL) will have to set KVM_ARM_VCPU_PSCI_0_2 feature when doing VCPU init using KVM_ARM_VCPU_INIT ioctl. Signed-off-by: Anup Patel Signed-off-by: Pranavkumar Sawargaonkar Acked-by: Christoffer Dall Acked-by: Marc Zyngier Signed-off-by: Christoffer Dall (cherry picked from commit 7d0f84aae9e231930985eaff63ac91b61aaa15d6) Signed-off-by: Christoffer Dall --- arch/arm/include/asm/kvm_host.h | 2 +- arch/arm/include/asm/kvm_psci.h | 4 ++ arch/arm/include/uapi/asm/kvm.h | 10 +-- arch/arm/kvm/psci.c | 105 +++++++++++++++++++++++------- arch/arm64/include/asm/kvm_host.h | 2 +- arch/arm64/include/asm/kvm_psci.h | 4 ++ arch/arm64/include/uapi/asm/kvm.h | 10 +-- 7 files changed, 105 insertions(+), 32 deletions(-) diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h index 09af14999c9b..193ceaf01bfd 100644 --- a/arch/arm/include/asm/kvm_host.h +++ b/arch/arm/include/asm/kvm_host.h @@ -36,7 +36,7 @@ #define KVM_COALESCED_MMIO_PAGE_OFFSET 1 #define KVM_HAVE_ONE_REG -#define KVM_VCPU_MAX_FEATURES 1 +#define KVM_VCPU_MAX_FEATURES 2 #include diff --git a/arch/arm/include/asm/kvm_psci.h b/arch/arm/include/asm/kvm_psci.h index 9a83d98bf170..4c0e3e1d1597 100644 --- a/arch/arm/include/asm/kvm_psci.h +++ b/arch/arm/include/asm/kvm_psci.h @@ -18,6 +18,10 @@ #ifndef __ARM_KVM_PSCI_H__ #define __ARM_KVM_PSCI_H__ +#define KVM_ARM_PSCI_0_1 1 +#define KVM_ARM_PSCI_0_2 2 + +int kvm_psci_version(struct kvm_vcpu *vcpu); bool kvm_psci_call(struct kvm_vcpu *vcpu); #endif /* __ARM_KVM_PSCI_H__ */ diff --git a/arch/arm/include/uapi/asm/kvm.h b/arch/arm/include/uapi/asm/kvm.h index ef0c8785ba16..e6ebdd3471e5 100644 --- a/arch/arm/include/uapi/asm/kvm.h +++ b/arch/arm/include/uapi/asm/kvm.h @@ -20,6 +20,7 @@ #define __ARM_KVM_H__ #include +#include #include #define __KVM_HAVE_GUEST_DEBUG @@ -83,6 +84,7 @@ struct kvm_regs { #define KVM_VGIC_V2_CPU_SIZE 0x2000 #define KVM_ARM_VCPU_POWER_OFF 0 /* CPU is started in OFF state */ +#define KVM_ARM_VCPU_PSCI_0_2 1 /* CPU uses PSCI v0.2 */ struct kvm_vcpu_init { __u32 target; @@ -201,9 +203,9 @@ struct kvm_arch_memory_slot { #define KVM_PSCI_FN_CPU_ON KVM_PSCI_FN(2) #define KVM_PSCI_FN_MIGRATE KVM_PSCI_FN(3) -#define KVM_PSCI_RET_SUCCESS 0 -#define KVM_PSCI_RET_NI ((unsigned long)-1) -#define KVM_PSCI_RET_INVAL ((unsigned long)-2) -#define KVM_PSCI_RET_DENIED ((unsigned long)-3) +#define KVM_PSCI_RET_SUCCESS PSCI_RET_SUCCESS +#define KVM_PSCI_RET_NI PSCI_RET_NOT_SUPPORTED +#define KVM_PSCI_RET_INVAL PSCI_RET_INVALID_PARAMS +#define KVM_PSCI_RET_DENIED PSCI_RET_DENIED #endif /* __ARM_KVM_H__ */ diff --git a/arch/arm/kvm/psci.c b/arch/arm/kvm/psci.c index 448f60e8d23c..8c42596cdbdf 100644 --- a/arch/arm/kvm/psci.c +++ b/arch/arm/kvm/psci.c @@ -59,7 +59,7 @@ static unsigned long kvm_psci_vcpu_on(struct kvm_vcpu *source_vcpu) * turned off. */ if (!vcpu || !vcpu->arch.pause) - return KVM_PSCI_RET_INVAL; + return PSCI_RET_INVALID_PARAMS; target_pc = *vcpu_reg(source_vcpu, 2); @@ -82,7 +82,82 @@ static unsigned long kvm_psci_vcpu_on(struct kvm_vcpu *source_vcpu) wq = kvm_arch_vcpu_wq(vcpu); wake_up_interruptible(wq); - return KVM_PSCI_RET_SUCCESS; + return PSCI_RET_SUCCESS; +} + +int kvm_psci_version(struct kvm_vcpu *vcpu) +{ + if (test_bit(KVM_ARM_VCPU_PSCI_0_2, vcpu->arch.features)) + return KVM_ARM_PSCI_0_2; + + return KVM_ARM_PSCI_0_1; +} + +static bool kvm_psci_0_2_call(struct kvm_vcpu *vcpu) +{ + unsigned long psci_fn = *vcpu_reg(vcpu, 0) & ~((u32) 0); + unsigned long val; + + switch (psci_fn) { + case PSCI_0_2_FN_PSCI_VERSION: + /* + * Bits[31:16] = Major Version = 0 + * Bits[15:0] = Minor Version = 2 + */ + val = 2; + break; + case PSCI_0_2_FN_CPU_OFF: + kvm_psci_vcpu_off(vcpu); + val = PSCI_RET_SUCCESS; + break; + case PSCI_0_2_FN_CPU_ON: + case PSCI_0_2_FN64_CPU_ON: + val = kvm_psci_vcpu_on(vcpu); + break; + case PSCI_0_2_FN_CPU_SUSPEND: + case PSCI_0_2_FN_AFFINITY_INFO: + case PSCI_0_2_FN_MIGRATE: + case PSCI_0_2_FN_MIGRATE_INFO_TYPE: + case PSCI_0_2_FN_MIGRATE_INFO_UP_CPU: + case PSCI_0_2_FN_SYSTEM_OFF: + case PSCI_0_2_FN_SYSTEM_RESET: + case PSCI_0_2_FN64_CPU_SUSPEND: + case PSCI_0_2_FN64_AFFINITY_INFO: + case PSCI_0_2_FN64_MIGRATE: + case PSCI_0_2_FN64_MIGRATE_INFO_UP_CPU: + val = PSCI_RET_NOT_SUPPORTED; + break; + default: + return false; + } + + *vcpu_reg(vcpu, 0) = val; + return true; +} + +static bool kvm_psci_0_1_call(struct kvm_vcpu *vcpu) +{ + unsigned long psci_fn = *vcpu_reg(vcpu, 0) & ~((u32) 0); + unsigned long val; + + switch (psci_fn) { + case KVM_PSCI_FN_CPU_OFF: + kvm_psci_vcpu_off(vcpu); + val = PSCI_RET_SUCCESS; + break; + case KVM_PSCI_FN_CPU_ON: + val = kvm_psci_vcpu_on(vcpu); + break; + case KVM_PSCI_FN_CPU_SUSPEND: + case KVM_PSCI_FN_MIGRATE: + val = PSCI_RET_NOT_SUPPORTED; + break; + default: + return false; + } + + *vcpu_reg(vcpu, 0) = val; + return true; } /** @@ -97,26 +172,12 @@ static unsigned long kvm_psci_vcpu_on(struct kvm_vcpu *source_vcpu) */ bool kvm_psci_call(struct kvm_vcpu *vcpu) { - unsigned long psci_fn = *vcpu_reg(vcpu, 0) & ~((u32) 0); - unsigned long val; - - switch (psci_fn) { - case KVM_PSCI_FN_CPU_OFF: - kvm_psci_vcpu_off(vcpu); - val = KVM_PSCI_RET_SUCCESS; - break; - case KVM_PSCI_FN_CPU_ON: - val = kvm_psci_vcpu_on(vcpu); - break; - case KVM_PSCI_FN_CPU_SUSPEND: - case KVM_PSCI_FN_MIGRATE: - val = KVM_PSCI_RET_NI; - break; - + switch (kvm_psci_version(vcpu)) { + case KVM_ARM_PSCI_0_2: + return kvm_psci_0_2_call(vcpu); + case KVM_ARM_PSCI_0_1: + return kvm_psci_0_1_call(vcpu); default: return false; - } - - *vcpu_reg(vcpu, 0) = val; - return true; + }; } diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 0a1d69751562..92242ce06309 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -39,7 +39,7 @@ #include #include -#define KVM_VCPU_MAX_FEATURES 2 +#define KVM_VCPU_MAX_FEATURES 3 struct kvm_vcpu; int kvm_target_cpu(void); diff --git a/arch/arm64/include/asm/kvm_psci.h b/arch/arm64/include/asm/kvm_psci.h index e301a4816355..e25c658a757b 100644 --- a/arch/arm64/include/asm/kvm_psci.h +++ b/arch/arm64/include/asm/kvm_psci.h @@ -18,6 +18,10 @@ #ifndef __ARM64_KVM_PSCI_H__ #define __ARM64_KVM_PSCI_H__ +#define KVM_ARM_PSCI_0_1 1 +#define KVM_ARM_PSCI_0_2 2 + +int kvm_psci_version(struct kvm_vcpu *vcpu); bool kvm_psci_call(struct kvm_vcpu *vcpu); #endif /* __ARM64_KVM_PSCI_H__ */ diff --git a/arch/arm64/include/uapi/asm/kvm.h b/arch/arm64/include/uapi/asm/kvm.h index eaf54a30bedc..e6471daf3fb5 100644 --- a/arch/arm64/include/uapi/asm/kvm.h +++ b/arch/arm64/include/uapi/asm/kvm.h @@ -31,6 +31,7 @@ #define KVM_NR_SPSR 5 #ifndef __ASSEMBLY__ +#include #include #include @@ -77,6 +78,7 @@ struct kvm_regs { #define KVM_ARM_VCPU_POWER_OFF 0 /* CPU is started in OFF state */ #define KVM_ARM_VCPU_EL1_32BIT 1 /* CPU running a 32bit VM */ +#define KVM_ARM_VCPU_PSCI_0_2 2 /* CPU uses PSCI v0.2 */ struct kvm_vcpu_init { __u32 target; @@ -186,10 +188,10 @@ struct kvm_arch_memory_slot { #define KVM_PSCI_FN_CPU_ON KVM_PSCI_FN(2) #define KVM_PSCI_FN_MIGRATE KVM_PSCI_FN(3) -#define KVM_PSCI_RET_SUCCESS 0 -#define KVM_PSCI_RET_NI ((unsigned long)-1) -#define KVM_PSCI_RET_INVAL ((unsigned long)-2) -#define KVM_PSCI_RET_DENIED ((unsigned long)-3) +#define KVM_PSCI_RET_SUCCESS PSCI_RET_SUCCESS +#define KVM_PSCI_RET_NI PSCI_RET_NOT_SUPPORTED +#define KVM_PSCI_RET_INVAL PSCI_RET_INVALID_PARAMS +#define KVM_PSCI_RET_DENIED PSCI_RET_DENIED #endif From c6debeb5f9cf9a1c1f8867d991563302797e20c3 Mon Sep 17 00:00:00 2001 From: Anup Patel Date: Tue, 29 Apr 2014 11:24:18 +0530 Subject: [PATCH 1249/1368] ARM/ARM64: KVM: Make kvm_psci_call() return convention more flexible Currently, the kvm_psci_call() returns 'true' or 'false' based on whether the PSCI function call was handled successfully or not. This does not help us emulate system-level PSCI functions where the actual emulation work will be done by user space (QEMU or KVMTOOL). Examples of such system-level PSCI functions are: PSCI v0.2 SYSTEM_OFF and SYSTEM_RESET. This patch updates kvm_psci_call() to return three types of values: 1) > 0 (success) 2) = 0 (success but exit to user space) 3) < 0 (errors) Signed-off-by: Anup Patel Signed-off-by: Pranavkumar Sawargaonkar Reviewed-by: Christoffer Dall Acked-by: Marc Zyngier Signed-off-by: Christoffer Dall (cherry picked from commit e8e7fcc5e2710b31ef842ee799db99c07986c364) Signed-off-by: Christoffer Dall --- arch/arm/include/asm/kvm_psci.h | 2 +- arch/arm/kvm/handle_exit.c | 10 +++++++--- arch/arm/kvm/psci.c | 28 ++++++++++++++++------------ arch/arm64/include/asm/kvm_psci.h | 2 +- arch/arm64/kvm/handle_exit.c | 12 ++++++++---- 5 files changed, 33 insertions(+), 21 deletions(-) diff --git a/arch/arm/include/asm/kvm_psci.h b/arch/arm/include/asm/kvm_psci.h index 4c0e3e1d1597..6bda945d31fa 100644 --- a/arch/arm/include/asm/kvm_psci.h +++ b/arch/arm/include/asm/kvm_psci.h @@ -22,6 +22,6 @@ #define KVM_ARM_PSCI_0_2 2 int kvm_psci_version(struct kvm_vcpu *vcpu); -bool kvm_psci_call(struct kvm_vcpu *vcpu); +int kvm_psci_call(struct kvm_vcpu *vcpu); #endif /* __ARM_KVM_PSCI_H__ */ diff --git a/arch/arm/kvm/handle_exit.c b/arch/arm/kvm/handle_exit.c index 0de91fc6de0f..4c979d466cc1 100644 --- a/arch/arm/kvm/handle_exit.c +++ b/arch/arm/kvm/handle_exit.c @@ -38,14 +38,18 @@ static int handle_svc_hyp(struct kvm_vcpu *vcpu, struct kvm_run *run) static int handle_hvc(struct kvm_vcpu *vcpu, struct kvm_run *run) { + int ret; + trace_kvm_hvc(*vcpu_pc(vcpu), *vcpu_reg(vcpu, 0), kvm_vcpu_hvc_get_imm(vcpu)); - if (kvm_psci_call(vcpu)) + ret = kvm_psci_call(vcpu); + if (ret < 0) { + kvm_inject_undefined(vcpu); return 1; + } - kvm_inject_undefined(vcpu); - return 1; + return ret; } static int handle_smc(struct kvm_vcpu *vcpu, struct kvm_run *run) diff --git a/arch/arm/kvm/psci.c b/arch/arm/kvm/psci.c index 8c42596cdbdf..14e6fa6c8e35 100644 --- a/arch/arm/kvm/psci.c +++ b/arch/arm/kvm/psci.c @@ -93,7 +93,7 @@ int kvm_psci_version(struct kvm_vcpu *vcpu) return KVM_ARM_PSCI_0_1; } -static bool kvm_psci_0_2_call(struct kvm_vcpu *vcpu) +static int kvm_psci_0_2_call(struct kvm_vcpu *vcpu) { unsigned long psci_fn = *vcpu_reg(vcpu, 0) & ~((u32) 0); unsigned long val; @@ -128,14 +128,14 @@ static bool kvm_psci_0_2_call(struct kvm_vcpu *vcpu) val = PSCI_RET_NOT_SUPPORTED; break; default: - return false; + return -EINVAL; } *vcpu_reg(vcpu, 0) = val; - return true; + return 1; } -static bool kvm_psci_0_1_call(struct kvm_vcpu *vcpu) +static int kvm_psci_0_1_call(struct kvm_vcpu *vcpu) { unsigned long psci_fn = *vcpu_reg(vcpu, 0) & ~((u32) 0); unsigned long val; @@ -153,11 +153,11 @@ static bool kvm_psci_0_1_call(struct kvm_vcpu *vcpu) val = PSCI_RET_NOT_SUPPORTED; break; default: - return false; + return -EINVAL; } *vcpu_reg(vcpu, 0) = val; - return true; + return 1; } /** @@ -165,12 +165,16 @@ static bool kvm_psci_0_1_call(struct kvm_vcpu *vcpu) * @vcpu: Pointer to the VCPU struct * * Handle PSCI calls from guests through traps from HVC instructions. - * The calling convention is similar to SMC calls to the secure world where - * the function number is placed in r0 and this function returns true if the - * function number specified in r0 is withing the PSCI range, and false - * otherwise. + * The calling convention is similar to SMC calls to the secure world + * where the function number is placed in r0. + * + * This function returns: > 0 (success), 0 (success but exit to user + * space), and < 0 (errors) + * + * Errors: + * -EINVAL: Unrecognized PSCI function */ -bool kvm_psci_call(struct kvm_vcpu *vcpu) +int kvm_psci_call(struct kvm_vcpu *vcpu) { switch (kvm_psci_version(vcpu)) { case KVM_ARM_PSCI_0_2: @@ -178,6 +182,6 @@ bool kvm_psci_call(struct kvm_vcpu *vcpu) case KVM_ARM_PSCI_0_1: return kvm_psci_0_1_call(vcpu); default: - return false; + return -EINVAL; }; } diff --git a/arch/arm64/include/asm/kvm_psci.h b/arch/arm64/include/asm/kvm_psci.h index e25c658a757b..bc39e557c56c 100644 --- a/arch/arm64/include/asm/kvm_psci.h +++ b/arch/arm64/include/asm/kvm_psci.h @@ -22,6 +22,6 @@ #define KVM_ARM_PSCI_0_2 2 int kvm_psci_version(struct kvm_vcpu *vcpu); -bool kvm_psci_call(struct kvm_vcpu *vcpu); +int kvm_psci_call(struct kvm_vcpu *vcpu); #endif /* __ARM64_KVM_PSCI_H__ */ diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c index 7bc41eab4c64..182415e1a952 100644 --- a/arch/arm64/kvm/handle_exit.c +++ b/arch/arm64/kvm/handle_exit.c @@ -30,11 +30,15 @@ typedef int (*exit_handle_fn)(struct kvm_vcpu *, struct kvm_run *); static int handle_hvc(struct kvm_vcpu *vcpu, struct kvm_run *run) { - if (kvm_psci_call(vcpu)) - return 1; + int ret; - kvm_inject_undefined(vcpu); - return 1; + ret = kvm_psci_call(vcpu); + if (ret < 0) { + kvm_inject_undefined(vcpu); + return 1; + } + + return ret; } static int handle_smc(struct kvm_vcpu *vcpu, struct kvm_run *run) From 7b0aa85882b902333c01f07bef88da07e5c53ebf Mon Sep 17 00:00:00 2001 From: Anup Patel Date: Tue, 29 Apr 2014 11:24:19 +0530 Subject: [PATCH 1250/1368] KVM: Add KVM_EXIT_SYSTEM_EVENT to user space API header Currently, we don't have an exit reason to notify user space about a system-level event (for e.g. system reset or shutdown) triggered by the VCPU. This patch adds exit reason KVM_EXIT_SYSTEM_EVENT for this purpose. We can also inform user space about the 'type' and architecture specific 'flags' of a system-level event using the kvm_run structure. This newly added KVM_EXIT_SYSTEM_EVENT will be used by KVM ARM/ARM64 in-kernel PSCI v0.2 support to reset/shutdown VMs. Signed-off-by: Anup Patel Signed-off-by: Pranavkumar Sawargaonkar Reviewed-by: Christoffer Dall Reviewed-by: Marc Zyngier Signed-off-by: Christoffer Dall (cherry picked from commit 8ad6b634928a25971dc42dce101808b1491f87ec) Signed-off-by: Christoffer Dall --- Documentation/virtual/kvm/api.txt | 15 +++++++++++++++ include/uapi/linux/kvm.h | 8 ++++++++ 2 files changed, 23 insertions(+) diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt index 3c75a17555a8..48e7888291f5 100644 --- a/Documentation/virtual/kvm/api.txt +++ b/Documentation/virtual/kvm/api.txt @@ -2636,6 +2636,21 @@ It gets triggered whenever both KVM_CAP_PPC_EPR are enabled and an external interrupt has just been delivered into the guest. User space should put the acknowledged interrupt vector into the 'epr' field. + /* KVM_EXIT_SYSTEM_EVENT */ + struct { +#define KVM_SYSTEM_EVENT_SHUTDOWN 1 +#define KVM_SYSTEM_EVENT_RESET 2 + __u32 type; + __u64 flags; + } system_event; + +If exit_reason is KVM_EXIT_SYSTEM_EVENT then the vcpu has triggered +a system-level event using some architecture specific mechanism (hypercall +or some special instruction). In case of ARM/ARM64, this is triggered using +HVC instruction based PSCI call from the vcpu. The 'type' field describes +the system-level event type. The 'flags' field describes architecture +specific flags for the system-level event. + /* Fix the size of the union. */ char padding[256]; }; diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index cb7ebcc7a9db..0a70874e4a63 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -171,6 +171,7 @@ struct kvm_pit_config { #define KVM_EXIT_WATCHDOG 21 #define KVM_EXIT_S390_TSCH 22 #define KVM_EXIT_EPR 23 +#define KVM_EXIT_SYSTEM_EVENT 24 /* For KVM_EXIT_INTERNAL_ERROR */ /* Emulate instruction failed. */ @@ -301,6 +302,13 @@ struct kvm_run { struct { __u32 epr; } epr; + /* KVM_EXIT_SYSTEM_EVENT */ + struct { +#define KVM_SYSTEM_EVENT_SHUTDOWN 1 +#define KVM_SYSTEM_EVENT_RESET 2 + __u32 type; + __u64 flags; + } system_event; /* Fix the size of the union. */ char padding[256]; }; From 2834b56a4051c66feb99c5546723fd6afa434d0e Mon Sep 17 00:00:00 2001 From: Anup Patel Date: Tue, 29 Apr 2014 11:24:20 +0530 Subject: [PATCH 1251/1368] ARM/ARM64: KVM: Emulate PSCI v0.2 SYSTEM_OFF and SYSTEM_RESET The PSCI v0.2 SYSTEM_OFF and SYSTEM_RESET functions are system-level functions hence cannot be fully emulated by in-kernel PSCI emulation code. To tackle this, we forward PSCI v0.2 SYSTEM_OFF and SYSTEM_RESET function calls from vcpu to user space (i.e. QEMU or KVMTOOL) via kvm_run structure using KVM_EXIT_SYSTEM_EVENT exit reasons. Signed-off-by: Anup Patel Signed-off-by: Pranavkumar Sawargaonkar Reviewed-by: Christoffer Dall Acked-by: Marc Zyngier Signed-off-by: Christoffer Dall (cherry picked from commit 4b1238269ed340d59ef829fd9c30a39cfb2923a8) Signed-off-by: Christoffer Dall --- arch/arm/kvm/psci.c | 46 ++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 43 insertions(+), 3 deletions(-) diff --git a/arch/arm/kvm/psci.c b/arch/arm/kvm/psci.c index 14e6fa6c8e35..59362131b79f 100644 --- a/arch/arm/kvm/psci.c +++ b/arch/arm/kvm/psci.c @@ -85,6 +85,23 @@ static unsigned long kvm_psci_vcpu_on(struct kvm_vcpu *source_vcpu) return PSCI_RET_SUCCESS; } +static void kvm_prepare_system_event(struct kvm_vcpu *vcpu, u32 type) +{ + memset(&vcpu->run->system_event, 0, sizeof(vcpu->run->system_event)); + vcpu->run->system_event.type = type; + vcpu->run->exit_reason = KVM_EXIT_SYSTEM_EVENT; +} + +static void kvm_psci_system_off(struct kvm_vcpu *vcpu) +{ + kvm_prepare_system_event(vcpu, KVM_SYSTEM_EVENT_SHUTDOWN); +} + +static void kvm_psci_system_reset(struct kvm_vcpu *vcpu) +{ + kvm_prepare_system_event(vcpu, KVM_SYSTEM_EVENT_RESET); +} + int kvm_psci_version(struct kvm_vcpu *vcpu) { if (test_bit(KVM_ARM_VCPU_PSCI_0_2, vcpu->arch.features)) @@ -95,6 +112,7 @@ int kvm_psci_version(struct kvm_vcpu *vcpu) static int kvm_psci_0_2_call(struct kvm_vcpu *vcpu) { + int ret = 1; unsigned long psci_fn = *vcpu_reg(vcpu, 0) & ~((u32) 0); unsigned long val; @@ -114,13 +132,35 @@ static int kvm_psci_0_2_call(struct kvm_vcpu *vcpu) case PSCI_0_2_FN64_CPU_ON: val = kvm_psci_vcpu_on(vcpu); break; + case PSCI_0_2_FN_SYSTEM_OFF: + kvm_psci_system_off(vcpu); + /* + * We should'nt be going back to guest VCPU after + * receiving SYSTEM_OFF request. + * + * If user space accidently/deliberately resumes + * guest VCPU after SYSTEM_OFF request then guest + * VCPU should see internal failure from PSCI return + * value. To achieve this, we preload r0 (or x0) with + * PSCI return value INTERNAL_FAILURE. + */ + val = PSCI_RET_INTERNAL_FAILURE; + ret = 0; + break; + case PSCI_0_2_FN_SYSTEM_RESET: + kvm_psci_system_reset(vcpu); + /* + * Same reason as SYSTEM_OFF for preloading r0 (or x0) + * with PSCI return value INTERNAL_FAILURE. + */ + val = PSCI_RET_INTERNAL_FAILURE; + ret = 0; + break; case PSCI_0_2_FN_CPU_SUSPEND: case PSCI_0_2_FN_AFFINITY_INFO: case PSCI_0_2_FN_MIGRATE: case PSCI_0_2_FN_MIGRATE_INFO_TYPE: case PSCI_0_2_FN_MIGRATE_INFO_UP_CPU: - case PSCI_0_2_FN_SYSTEM_OFF: - case PSCI_0_2_FN_SYSTEM_RESET: case PSCI_0_2_FN64_CPU_SUSPEND: case PSCI_0_2_FN64_AFFINITY_INFO: case PSCI_0_2_FN64_MIGRATE: @@ -132,7 +172,7 @@ static int kvm_psci_0_2_call(struct kvm_vcpu *vcpu) } *vcpu_reg(vcpu, 0) = val; - return 1; + return ret; } static int kvm_psci_0_1_call(struct kvm_vcpu *vcpu) From 4c7726b875f0ae0aa10b553d0ddf1662ca1c8536 Mon Sep 17 00:00:00 2001 From: Anup Patel Date: Tue, 29 Apr 2014 11:24:21 +0530 Subject: [PATCH 1252/1368] ARM/ARM64: KVM: Emulate PSCI v0.2 AFFINITY_INFO This patch adds emulation of PSCI v0.2 AFFINITY_INFO function call for KVM ARM/ARM64. This is a VCPU-level function call which will be used to determine current state of given affinity level. Signed-off-by: Anup Patel Signed-off-by: Pranavkumar Sawargaonkar Reviewed-by: Christoffer Dall Acked-by: Marc Zyngier Signed-off-by: Christoffer Dall (cherry picked from commit e6bc13c8a70eabc6a39098ccedf6129c734e3db3) Signed-off-by: Christoffer Dall --- arch/arm/kvm/psci.c | 52 +++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 50 insertions(+), 2 deletions(-) diff --git a/arch/arm/kvm/psci.c b/arch/arm/kvm/psci.c index 59362131b79f..3b6a0cf25c7d 100644 --- a/arch/arm/kvm/psci.c +++ b/arch/arm/kvm/psci.c @@ -27,6 +27,16 @@ * as described in ARM document number ARM DEN 0022A. */ +#define AFFINITY_MASK(level) ~((0x1UL << ((level) * MPIDR_LEVEL_BITS)) - 1) + +static unsigned long psci_affinity_mask(unsigned long affinity_level) +{ + if (affinity_level <= 3) + return MPIDR_HWID_BITMASK & AFFINITY_MASK(affinity_level); + + return 0; +} + static void kvm_psci_vcpu_off(struct kvm_vcpu *vcpu) { vcpu->arch.pause = true; @@ -85,6 +95,42 @@ static unsigned long kvm_psci_vcpu_on(struct kvm_vcpu *source_vcpu) return PSCI_RET_SUCCESS; } +static unsigned long kvm_psci_vcpu_affinity_info(struct kvm_vcpu *vcpu) +{ + int i; + unsigned long mpidr; + unsigned long target_affinity; + unsigned long target_affinity_mask; + unsigned long lowest_affinity_level; + struct kvm *kvm = vcpu->kvm; + struct kvm_vcpu *tmp; + + target_affinity = *vcpu_reg(vcpu, 1); + lowest_affinity_level = *vcpu_reg(vcpu, 2); + + /* Determine target affinity mask */ + target_affinity_mask = psci_affinity_mask(lowest_affinity_level); + if (!target_affinity_mask) + return PSCI_RET_INVALID_PARAMS; + + /* Ignore other bits of target affinity */ + target_affinity &= target_affinity_mask; + + /* + * If one or more VCPU matching target affinity are running + * then ON else OFF + */ + kvm_for_each_vcpu(i, tmp, kvm) { + mpidr = kvm_vcpu_get_mpidr(tmp); + if (((mpidr & target_affinity_mask) == target_affinity) && + !tmp->arch.pause) { + return PSCI_0_2_AFFINITY_LEVEL_ON; + } + } + + return PSCI_0_2_AFFINITY_LEVEL_OFF; +} + static void kvm_prepare_system_event(struct kvm_vcpu *vcpu, u32 type) { memset(&vcpu->run->system_event, 0, sizeof(vcpu->run->system_event)); @@ -132,6 +178,10 @@ static int kvm_psci_0_2_call(struct kvm_vcpu *vcpu) case PSCI_0_2_FN64_CPU_ON: val = kvm_psci_vcpu_on(vcpu); break; + case PSCI_0_2_FN_AFFINITY_INFO: + case PSCI_0_2_FN64_AFFINITY_INFO: + val = kvm_psci_vcpu_affinity_info(vcpu); + break; case PSCI_0_2_FN_SYSTEM_OFF: kvm_psci_system_off(vcpu); /* @@ -157,12 +207,10 @@ static int kvm_psci_0_2_call(struct kvm_vcpu *vcpu) ret = 0; break; case PSCI_0_2_FN_CPU_SUSPEND: - case PSCI_0_2_FN_AFFINITY_INFO: case PSCI_0_2_FN_MIGRATE: case PSCI_0_2_FN_MIGRATE_INFO_TYPE: case PSCI_0_2_FN_MIGRATE_INFO_UP_CPU: case PSCI_0_2_FN64_CPU_SUSPEND: - case PSCI_0_2_FN64_AFFINITY_INFO: case PSCI_0_2_FN64_MIGRATE: case PSCI_0_2_FN64_MIGRATE_INFO_UP_CPU: val = PSCI_RET_NOT_SUPPORTED; From f11d09d9c9ce2dcca77dd41fcb16c3811cc513d5 Mon Sep 17 00:00:00 2001 From: Anup Patel Date: Tue, 29 Apr 2014 11:24:22 +0530 Subject: [PATCH 1253/1368] ARM/ARM64: KVM: Emulate PSCI v0.2 MIGRATE_INFO_TYPE and related functions This patch adds emulation of PSCI v0.2 MIGRATE, MIGRATE_INFO_TYPE, and MIGRATE_INFO_UP_CPU function calls for KVM ARM/ARM64. KVM ARM/ARM64 being a hypervisor (and not a Trusted OS), we cannot provide this functions hence we emulate these functions in following way: 1. MIGRATE - Returns "Not Supported" 2. MIGRATE_INFO_TYPE - Return 2 i.e. Trusted OS is not present 3. MIGRATE_INFO_UP_CPU - Returns "Not Supported" Signed-off-by: Anup Patel Signed-off-by: Pranavkumar Sawargaonkar Reviewed-by: Christoffer Dall Acked-by: Marc Zyngier Signed-off-by: Christoffer Dall (cherry picked from commit bab0b43012a8ad64877fa46134370a7f5c6ce861) Signed-off-by: Christoffer Dall --- arch/arm/kvm/psci.c | 21 ++++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) diff --git a/arch/arm/kvm/psci.c b/arch/arm/kvm/psci.c index 3b6a0cf25c7d..cce901a510fa 100644 --- a/arch/arm/kvm/psci.c +++ b/arch/arm/kvm/psci.c @@ -182,6 +182,22 @@ static int kvm_psci_0_2_call(struct kvm_vcpu *vcpu) case PSCI_0_2_FN64_AFFINITY_INFO: val = kvm_psci_vcpu_affinity_info(vcpu); break; + case PSCI_0_2_FN_MIGRATE: + case PSCI_0_2_FN64_MIGRATE: + val = PSCI_RET_NOT_SUPPORTED; + break; + case PSCI_0_2_FN_MIGRATE_INFO_TYPE: + /* + * Trusted OS is MP hence does not require migration + * or + * Trusted OS is not present + */ + val = PSCI_0_2_TOS_MP; + break; + case PSCI_0_2_FN_MIGRATE_INFO_UP_CPU: + case PSCI_0_2_FN64_MIGRATE_INFO_UP_CPU: + val = PSCI_RET_NOT_SUPPORTED; + break; case PSCI_0_2_FN_SYSTEM_OFF: kvm_psci_system_off(vcpu); /* @@ -207,12 +223,7 @@ static int kvm_psci_0_2_call(struct kvm_vcpu *vcpu) ret = 0; break; case PSCI_0_2_FN_CPU_SUSPEND: - case PSCI_0_2_FN_MIGRATE: - case PSCI_0_2_FN_MIGRATE_INFO_TYPE: - case PSCI_0_2_FN_MIGRATE_INFO_UP_CPU: case PSCI_0_2_FN64_CPU_SUSPEND: - case PSCI_0_2_FN64_MIGRATE: - case PSCI_0_2_FN64_MIGRATE_INFO_UP_CPU: val = PSCI_RET_NOT_SUPPORTED; break; default: From 69590f71e0ef474e0d44514484308c2eb826306d Mon Sep 17 00:00:00 2001 From: Anup Patel Date: Tue, 29 Apr 2014 11:24:23 +0530 Subject: [PATCH 1254/1368] ARM/ARM64: KVM: Fix CPU_ON emulation for PSCI v0.2 As-per PSCI v0.2, the source CPU provides physical address of "entry point" and "context id" for starting a target CPU. Also, if target CPU is already running then we should return ALREADY_ON. Current emulation of CPU_ON function does not consider physical address of "context id" and returns INVALID_PARAMETERS if target CPU is already running. This patch updates kvm_psci_vcpu_on() such that it works for both PSCI v0.1 and PSCI v0.2. Signed-off-by: Anup Patel Signed-off-by: Pranavkumar Sawargaonkar Reviewed-by: Christoffer Dall Acked-by: Marc Zyngier Signed-off-by: Christoffer Dall (cherry picked from commit aa8aeefe5e567637bbec7d7a3031cc057e3af303) Signed-off-by: Christoffer Dall --- arch/arm/kvm/psci.c | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/arch/arm/kvm/psci.c b/arch/arm/kvm/psci.c index cce901a510fa..1067579c7336 100644 --- a/arch/arm/kvm/psci.c +++ b/arch/arm/kvm/psci.c @@ -48,6 +48,7 @@ static unsigned long kvm_psci_vcpu_on(struct kvm_vcpu *source_vcpu) struct kvm_vcpu *vcpu = NULL, *tmp; wait_queue_head_t *wq; unsigned long cpu_id; + unsigned long context_id; unsigned long mpidr; phys_addr_t target_pc; int i; @@ -68,10 +69,17 @@ static unsigned long kvm_psci_vcpu_on(struct kvm_vcpu *source_vcpu) * Make sure the caller requested a valid CPU and that the CPU is * turned off. */ - if (!vcpu || !vcpu->arch.pause) + if (!vcpu) return PSCI_RET_INVALID_PARAMS; + if (!vcpu->arch.pause) { + if (kvm_psci_version(source_vcpu) != KVM_ARM_PSCI_0_1) + return PSCI_RET_ALREADY_ON; + else + return PSCI_RET_INVALID_PARAMS; + } target_pc = *vcpu_reg(source_vcpu, 2); + context_id = *vcpu_reg(source_vcpu, 3); kvm_reset_vcpu(vcpu); @@ -86,6 +94,11 @@ static unsigned long kvm_psci_vcpu_on(struct kvm_vcpu *source_vcpu) kvm_vcpu_set_be(vcpu); *vcpu_pc(vcpu) = target_pc; + /* + * NOTE: We always update r0 (or x0) because for PSCI v0.1 + * the general puspose registers are undefined upon CPU_ON. + */ + *vcpu_reg(vcpu, 0) = context_id; vcpu->arch.pause = false; smp_mb(); /* Make sure the above is visible */ From 6824eda31449e4585085a49739d60e640b1c7583 Mon Sep 17 00:00:00 2001 From: Anup Patel Date: Tue, 29 Apr 2014 11:24:24 +0530 Subject: [PATCH 1255/1368] ARM/ARM64: KVM: Emulate PSCI v0.2 CPU_SUSPEND This patch adds emulation of PSCI v0.2 CPU_SUSPEND function call for KVM ARM/ARM64. This is a CPU-level function call which can suspend current CPU or current CPU cluster. We don't have VCPU clusters in KVM so we only suspend the current VCPU. The CPU_SUSPEND emulation is not tested much because currently there is no CPUIDLE driver in Linux kernel that uses PSCI CPU_SUSPEND. The PSCI CPU_SUSPEND implementation in ARM64 kernel was tested using a Simple CPUIDLE driver which is not published due to unstable DT-bindings for PSCI. (For more info, http://lwn.net/Articles/574950/) For simplicity, we implement CPU_SUSPEND emulation similar to WFI (Wait-for-interrupt) emulation and we also treat power-down request to be same as stand-by request. This is consistent with section 5.4.1 and section 5.4.2 of PSCI v0.2 specification. Signed-off-by: Anup Patel Signed-off-by: Pranavkumar Sawargaonkar Acked-by: Christoffer Dall Acked-by: Marc Zyngier Signed-off-by: Christoffer Dall (cherry picked from commit b376d02b53b87f8684f91f13ba4ee43331850fcd) Signed-off-by: Christoffer Dall --- arch/arm/kvm/psci.c | 28 ++++++++++++++++++++++++---- 1 file changed, 24 insertions(+), 4 deletions(-) diff --git a/arch/arm/kvm/psci.c b/arch/arm/kvm/psci.c index 1067579c7336..09cf37737ee2 100644 --- a/arch/arm/kvm/psci.c +++ b/arch/arm/kvm/psci.c @@ -37,6 +37,26 @@ static unsigned long psci_affinity_mask(unsigned long affinity_level) return 0; } +static unsigned long kvm_psci_vcpu_suspend(struct kvm_vcpu *vcpu) +{ + /* + * NOTE: For simplicity, we make VCPU suspend emulation to be + * same-as WFI (Wait-for-interrupt) emulation. + * + * This means for KVM the wakeup events are interrupts and + * this is consistent with intended use of StateID as described + * in section 5.4.1 of PSCI v0.2 specification (ARM DEN 0022A). + * + * Further, we also treat power-down request to be same as + * stand-by request as-per section 5.4.2 clause 3 of PSCI v0.2 + * specification (ARM DEN 0022A). This means all suspend states + * for KVM will preserve the register state. + */ + kvm_vcpu_block(vcpu); + + return PSCI_RET_SUCCESS; +} + static void kvm_psci_vcpu_off(struct kvm_vcpu *vcpu) { vcpu->arch.pause = true; @@ -183,6 +203,10 @@ static int kvm_psci_0_2_call(struct kvm_vcpu *vcpu) */ val = 2; break; + case PSCI_0_2_FN_CPU_SUSPEND: + case PSCI_0_2_FN64_CPU_SUSPEND: + val = kvm_psci_vcpu_suspend(vcpu); + break; case PSCI_0_2_FN_CPU_OFF: kvm_psci_vcpu_off(vcpu); val = PSCI_RET_SUCCESS; @@ -235,10 +259,6 @@ static int kvm_psci_0_2_call(struct kvm_vcpu *vcpu) val = PSCI_RET_INTERNAL_FAILURE; ret = 0; break; - case PSCI_0_2_FN_CPU_SUSPEND: - case PSCI_0_2_FN64_CPU_SUSPEND: - val = PSCI_RET_NOT_SUPPORTED; - break; default: return -EINVAL; } From f4aa5773a7ac1dad4bc6fc726b178547289e6045 Mon Sep 17 00:00:00 2001 From: Anup Patel Date: Tue, 29 Apr 2014 11:24:25 +0530 Subject: [PATCH 1256/1368] ARM/ARM64: KVM: Advertise KVM_CAP_ARM_PSCI_0_2 to user space We have PSCI v0.2 emulation available in KVM ARM/ARM64 hence advertise this to user space (i.e. QEMU or KVMTOOL) via KVM_CHECK_EXTENSION ioctl. Signed-off-by: Anup Patel Signed-off-by: Pranavkumar Sawargaonkar Acked-by: Christoffer Dall Acked-by: Marc Zyngier Signed-off-by: Christoffer Dall (cherry picked from commit 4447a208f7fc2e2dff8c6a8df2a1fd6dd72fb3e2) Signed-off-by: Christoffer Dall --- arch/arm/kvm/arm.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c index 9804406ff37e..354dc42b6395 100644 --- a/arch/arm/kvm/arm.c +++ b/arch/arm/kvm/arm.c @@ -197,6 +197,7 @@ int kvm_dev_ioctl_check_extension(long ext) case KVM_CAP_DESTROY_MEMORY_REGION_WORKS: case KVM_CAP_ONE_REG: case KVM_CAP_ARM_PSCI: + case KVM_CAP_ARM_PSCI_0_2: r = 1; break; case KVM_CAP_COALESCED_MMIO: From 5ab6e0ee8a2375651052456289f6d397f78f737e Mon Sep 17 00:00:00 2001 From: Christian Borntraeger Date: Thu, 16 Jan 2014 13:44:20 +0100 Subject: [PATCH 1257/1368] kvm/irqchip: Speed up KVM_SET_GSI_ROUTING When starting lots of dataplane devices the bootup takes very long on Christian's s390 with irqfd patches. With larger setups he is even able to trigger some timeouts in some components. Turns out that the KVM_SET_GSI_ROUTING ioctl takes very long (strace claims up to 0.1 sec) when having multiple CPUs. This is caused by the synchronize_rcu and the HZ=100 of s390. By changing the code to use a private srcu we can speed things up. This patch reduces the boot time till mounting root from 8 to 2 seconds on my s390 guest with 100 disks. Uses of hlist_for_each_entry_rcu, hlist_add_head_rcu, hlist_del_init_rcu are fine because they do not have lockdep checks (hlist_for_each_entry_rcu uses rcu_dereference_raw rather than rcu_dereference, and write-sides do not do rcu lockdep at all). Note that we're hardly relying on the "sleepable" part of srcu. We just want SRCU's faster detection of grace periods. Testing was done by Andrew Theurer using netperf tests STREAM, MAERTS and RR. The difference between results "before" and "after" the patch has mean -0.2% and standard deviation 0.6%. Using a paired t-test on the data points says that there is a 2.5% probability that the patch is the cause of the performance difference (rather than a random fluctuation). (Restricting the t-test to RR, which is the most likely to be affected, changes the numbers to respectively -0.3% mean, 0.7% stdev, and 8% probability that the numbers actually say something about the patch. The probability increases mostly because there are fewer data points). Cc: Marcelo Tosatti Cc: Michael S. Tsirkin Tested-by: Christian Borntraeger # s390 Reviewed-by: Christian Borntraeger Signed-off-by: Christian Borntraeger Signed-off-by: Paolo Bonzini (cherry picked from commit 719d93cd5f5c5c8775b7a38192069e8e1d1ac46e) Signed-off-by: Christoffer Dall --- include/linux/kvm_host.h | 1 + virt/kvm/eventfd.c | 25 +++++++++++++++---------- virt/kvm/irq_comm.c | 17 +++++++++-------- virt/kvm/irqchip.c | 31 ++++++++++++++++--------------- virt/kvm/kvm_main.c | 16 ++++++++++------ 5 files changed, 51 insertions(+), 39 deletions(-) diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index eef946f92c0c..a3c83491a791 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -346,6 +346,7 @@ struct kvm { struct mm_struct *mm; /* userspace tied to this vm */ struct kvm_memslots *memslots; struct srcu_struct srcu; + struct srcu_struct irq_srcu; #ifdef CONFIG_KVM_APIC_ARCHITECTURE u32 bsp_vcpu_id; #endif diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c index 64ee720b75c7..b51c19ffd8fd 100644 --- a/virt/kvm/eventfd.c +++ b/virt/kvm/eventfd.c @@ -31,6 +31,7 @@ #include #include #include +#include #include #include "iodev.h" @@ -118,19 +119,22 @@ static void irqfd_resampler_ack(struct kvm_irq_ack_notifier *kian) { struct _irqfd_resampler *resampler; + struct kvm *kvm; struct _irqfd *irqfd; + int idx; resampler = container_of(kian, struct _irqfd_resampler, notifier); + kvm = resampler->kvm; - kvm_set_irq(resampler->kvm, KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID, + kvm_set_irq(kvm, KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID, resampler->notifier.gsi, 0, false); - rcu_read_lock(); + idx = srcu_read_lock(&kvm->irq_srcu); list_for_each_entry_rcu(irqfd, &resampler->list, resampler_link) eventfd_signal(irqfd->resamplefd, 1); - rcu_read_unlock(); + srcu_read_unlock(&kvm->irq_srcu, idx); } static void @@ -142,7 +146,7 @@ irqfd_resampler_shutdown(struct _irqfd *irqfd) mutex_lock(&kvm->irqfds.resampler_lock); list_del_rcu(&irqfd->resampler_link); - synchronize_rcu(); + synchronize_srcu(&kvm->irq_srcu); if (list_empty(&resampler->list)) { list_del(&resampler->link); @@ -221,17 +225,18 @@ irqfd_wakeup(wait_queue_t *wait, unsigned mode, int sync, void *key) unsigned long flags = (unsigned long)key; struct kvm_kernel_irq_routing_entry *irq; struct kvm *kvm = irqfd->kvm; + int idx; if (flags & POLLIN) { - rcu_read_lock(); - irq = rcu_dereference(irqfd->irq_entry); + idx = srcu_read_lock(&kvm->irq_srcu); + irq = srcu_dereference(irqfd->irq_entry, &kvm->irq_srcu); /* An event has been signaled, inject an interrupt */ if (irq) kvm_set_msi(irq, kvm, KVM_USERSPACE_IRQ_SOURCE_ID, 1, false); else schedule_work(&irqfd->inject); - rcu_read_unlock(); + srcu_read_unlock(&kvm->irq_srcu, idx); } if (flags & POLLHUP) { @@ -363,7 +368,7 @@ kvm_irqfd_assign(struct kvm *kvm, struct kvm_irqfd *args) } list_add_rcu(&irqfd->resampler_link, &irqfd->resampler->list); - synchronize_rcu(); + synchronize_srcu(&kvm->irq_srcu); mutex_unlock(&kvm->irqfds.resampler_lock); } @@ -465,7 +470,7 @@ kvm_irqfd_deassign(struct kvm *kvm, struct kvm_irqfd *args) * another thread calls kvm_irq_routing_update before * we flush workqueue below (we synchronize with * kvm_irq_routing_update using irqfds.lock). - * It is paired with synchronize_rcu done by caller + * It is paired with synchronize_srcu done by caller * of that function. */ rcu_assign_pointer(irqfd->irq_entry, NULL); @@ -524,7 +529,7 @@ kvm_irqfd_release(struct kvm *kvm) /* * Change irq_routing and irqfd. - * Caller must invoke synchronize_rcu afterwards. + * Caller must invoke synchronize_srcu(&kvm->irq_srcu) afterwards. */ void kvm_irq_routing_update(struct kvm *kvm, struct kvm_irq_routing_table *irq_rt) diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c index e2e6b4473a96..ced4a542a031 100644 --- a/virt/kvm/irq_comm.c +++ b/virt/kvm/irq_comm.c @@ -163,6 +163,7 @@ int kvm_set_irq_inatomic(struct kvm *kvm, int irq_source_id, u32 irq, int level) struct kvm_kernel_irq_routing_entry *e; int ret = -EINVAL; struct kvm_irq_routing_table *irq_rt; + int idx; trace_kvm_set_irq(irq, level, irq_source_id); @@ -174,8 +175,8 @@ int kvm_set_irq_inatomic(struct kvm *kvm, int irq_source_id, u32 irq, int level) * Since there's no easy way to do this, we only support injecting MSI * which is limited to 1:1 GSI mapping. */ - rcu_read_lock(); - irq_rt = rcu_dereference(kvm->irq_routing); + idx = srcu_read_lock(&kvm->irq_srcu); + irq_rt = srcu_dereference(kvm->irq_routing, &kvm->irq_srcu); if (irq < irq_rt->nr_rt_entries) hlist_for_each_entry(e, &irq_rt->map[irq], link) { if (likely(e->type == KVM_IRQ_ROUTING_MSI)) @@ -184,7 +185,7 @@ int kvm_set_irq_inatomic(struct kvm *kvm, int irq_source_id, u32 irq, int level) ret = -EWOULDBLOCK; break; } - rcu_read_unlock(); + srcu_read_unlock(&kvm->irq_srcu, idx); return ret; } @@ -253,22 +254,22 @@ void kvm_unregister_irq_mask_notifier(struct kvm *kvm, int irq, mutex_lock(&kvm->irq_lock); hlist_del_rcu(&kimn->link); mutex_unlock(&kvm->irq_lock); - synchronize_rcu(); + synchronize_srcu(&kvm->irq_srcu); } void kvm_fire_mask_notifiers(struct kvm *kvm, unsigned irqchip, unsigned pin, bool mask) { struct kvm_irq_mask_notifier *kimn; - int gsi; + int idx, gsi; - rcu_read_lock(); - gsi = rcu_dereference(kvm->irq_routing)->chip[irqchip][pin]; + idx = srcu_read_lock(&kvm->irq_srcu); + gsi = srcu_dereference(kvm->irq_routing, &kvm->irq_srcu)->chip[irqchip][pin]; if (gsi != -1) hlist_for_each_entry_rcu(kimn, &kvm->mask_notifier_list, link) if (kimn->irq == gsi) kimn->func(kimn, mask); - rcu_read_unlock(); + srcu_read_unlock(&kvm->irq_srcu, idx); } int kvm_set_routing_entry(struct kvm_irq_routing_table *rt, diff --git a/virt/kvm/irqchip.c b/virt/kvm/irqchip.c index 20dc9e4a8f6c..b43c275775cd 100644 --- a/virt/kvm/irqchip.c +++ b/virt/kvm/irqchip.c @@ -26,6 +26,7 @@ #include #include +#include #include #include #include "irq.h" @@ -33,19 +34,19 @@ bool kvm_irq_has_notifier(struct kvm *kvm, unsigned irqchip, unsigned pin) { struct kvm_irq_ack_notifier *kian; - int gsi; + int gsi, idx; - rcu_read_lock(); - gsi = rcu_dereference(kvm->irq_routing)->chip[irqchip][pin]; + idx = srcu_read_lock(&kvm->irq_srcu); + gsi = srcu_dereference(kvm->irq_routing, &kvm->irq_srcu)->chip[irqchip][pin]; if (gsi != -1) hlist_for_each_entry_rcu(kian, &kvm->irq_ack_notifier_list, link) if (kian->gsi == gsi) { - rcu_read_unlock(); + srcu_read_unlock(&kvm->irq_srcu, idx); return true; } - rcu_read_unlock(); + srcu_read_unlock(&kvm->irq_srcu, idx); return false; } @@ -54,18 +55,18 @@ EXPORT_SYMBOL_GPL(kvm_irq_has_notifier); void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin) { struct kvm_irq_ack_notifier *kian; - int gsi; + int gsi, idx; trace_kvm_ack_irq(irqchip, pin); - rcu_read_lock(); - gsi = rcu_dereference(kvm->irq_routing)->chip[irqchip][pin]; + idx = srcu_read_lock(&kvm->irq_srcu); + gsi = srcu_dereference(kvm->irq_routing, &kvm->irq_srcu)->chip[irqchip][pin]; if (gsi != -1) hlist_for_each_entry_rcu(kian, &kvm->irq_ack_notifier_list, link) if (kian->gsi == gsi) kian->irq_acked(kian); - rcu_read_unlock(); + srcu_read_unlock(&kvm->irq_srcu, idx); } void kvm_register_irq_ack_notifier(struct kvm *kvm, @@ -85,7 +86,7 @@ void kvm_unregister_irq_ack_notifier(struct kvm *kvm, mutex_lock(&kvm->irq_lock); hlist_del_init_rcu(&kian->link); mutex_unlock(&kvm->irq_lock); - synchronize_rcu(); + synchronize_srcu(&kvm->irq_srcu); #ifdef __KVM_HAVE_IOAPIC kvm_vcpu_request_scan_ioapic(kvm); #endif @@ -115,7 +116,7 @@ int kvm_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level, bool line_status) { struct kvm_kernel_irq_routing_entry *e, irq_set[KVM_NR_IRQCHIPS]; - int ret = -1, i = 0; + int ret = -1, i = 0, idx; struct kvm_irq_routing_table *irq_rt; trace_kvm_set_irq(irq, level, irq_source_id); @@ -124,12 +125,12 @@ int kvm_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level, * IOAPIC. So set the bit in both. The guest will ignore * writes to the unused one. */ - rcu_read_lock(); - irq_rt = rcu_dereference(kvm->irq_routing); + idx = srcu_read_lock(&kvm->irq_srcu); + irq_rt = srcu_dereference(kvm->irq_routing, &kvm->irq_srcu); if (irq < irq_rt->nr_rt_entries) hlist_for_each_entry(e, &irq_rt->map[irq], link) irq_set[i++] = *e; - rcu_read_unlock(); + srcu_read_unlock(&kvm->irq_srcu, idx); while(i--) { int r; @@ -226,7 +227,7 @@ int kvm_set_irq_routing(struct kvm *kvm, kvm_irq_routing_update(kvm, new); mutex_unlock(&kvm->irq_lock); - synchronize_rcu(); + synchronize_srcu_expedited(&kvm->irq_srcu); new = old; r = 0; diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 9f5fab0a4fda..17b107fa7a2b 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -456,11 +456,11 @@ static struct kvm *kvm_create_vm(unsigned long type) r = kvm_arch_init_vm(kvm, type); if (r) - goto out_err_nodisable; + goto out_err_no_disable; r = hardware_enable_all(); if (r) - goto out_err_nodisable; + goto out_err_no_disable; #ifdef CONFIG_HAVE_KVM_IRQCHIP INIT_HLIST_HEAD(&kvm->mask_notifier_list); @@ -472,10 +472,12 @@ static struct kvm *kvm_create_vm(unsigned long type) r = -ENOMEM; kvm->memslots = kzalloc(sizeof(struct kvm_memslots), GFP_KERNEL); if (!kvm->memslots) - goto out_err_nosrcu; + goto out_err_no_srcu; kvm_init_memslots_id(kvm); if (init_srcu_struct(&kvm->srcu)) - goto out_err_nosrcu; + goto out_err_no_srcu; + if (init_srcu_struct(&kvm->irq_srcu)) + goto out_err_no_irq_srcu; for (i = 0; i < KVM_NR_BUSES; i++) { kvm->buses[i] = kzalloc(sizeof(struct kvm_io_bus), GFP_KERNEL); @@ -504,10 +506,12 @@ static struct kvm *kvm_create_vm(unsigned long type) return kvm; out_err: + cleanup_srcu_struct(&kvm->irq_srcu); +out_err_no_irq_srcu: cleanup_srcu_struct(&kvm->srcu); -out_err_nosrcu: +out_err_no_srcu: hardware_disable_all(); -out_err_nodisable: +out_err_no_disable: for (i = 0; i < KVM_NR_BUSES; i++) kfree(kvm->buses[i]); kfree(kvm->memslots); From ccc916d9aac9d95baab922ab859f0ef37bb22558 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Fri, 2 May 2014 16:24:10 +0100 Subject: [PATCH 1258/1368] arm64: barriers: make use of barrier options with explicit barriers When calling our low-level barrier macros directly, we can often suffice with more relaxed behaviour than the default "all accesses, full system" option. This patch updates the users of dsb() to specify the option which they actually require. Acked-by: Catalin Marinas Signed-off-by: Will Deacon Signed-off-by: Catalin Marinas (cherry picked from commit 98f7685ee69f871ba991089cb9685f0da07517ea) Signed-off-by: Christoffer Dall --- arch/arm64/kvm/sys_regs.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index 03244582bc55..c59a1bdab5eb 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -71,13 +71,13 @@ static u32 get_ccsidr(u32 csselr) static void do_dc_cisw(u32 val) { asm volatile("dc cisw, %x0" : : "r" (val)); - dsb(); + dsb(ish); } static void do_dc_csw(u32 val) { asm volatile("dc csw, %x0" : : "r" (val)); - dsb(); + dsb(ish); } /* See note at ARM ARM B1.14.4 */ From 17acc52b78027a728c419a0d7b4d69d30b94294a Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Fri, 2 May 2014 16:24:14 +0100 Subject: [PATCH 1259/1368] arm64: kvm: use inner-shareable barriers for inner-shareable maintenance In order to ensure completion of inner-shareable maintenance instructions (cache and TLB) on AArch64, we can use the -ish suffix to the dsb instruction. This patch relaxes our dsb sy instructions to dsb ish where possible. Acked-by: Catalin Marinas Acked-by: Marc Zyngier Signed-off-by: Will Deacon Signed-off-by: Catalin Marinas (cherry picked from commit ee9e101c11478680d579bd20bb38a4d3e2514fe3) Signed-off-by: Christoffer Dall --- arch/arm64/kvm/hyp.S | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/arch/arm64/kvm/hyp.S b/arch/arm64/kvm/hyp.S index 2c56012cb2d2..b0d1512acf08 100644 --- a/arch/arm64/kvm/hyp.S +++ b/arch/arm64/kvm/hyp.S @@ -630,9 +630,15 @@ ENTRY(__kvm_tlb_flush_vmid_ipa) * whole of Stage-1. Weep... */ tlbi ipas2e1is, x1 - dsb sy + /* + * We have to ensure completion of the invalidation at Stage-2, + * since a table walk on another CPU could refill a TLB with a + * complete (S1 + S2) walk based on the old Stage-2 mapping if + * the Stage-1 invalidation happened first. + */ + dsb ish tlbi vmalle1is - dsb sy + dsb ish isb msr vttbr_el2, xzr @@ -643,7 +649,7 @@ ENTRY(__kvm_flush_vm_context) dsb ishst tlbi alle1is ic ialluis - dsb sy + dsb ish ret ENDPROC(__kvm_flush_vm_context) From 502f5822c7ac7b11040d4abb7ca1d09d56de251c Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 20 May 2014 18:06:03 +0100 Subject: [PATCH 1260/1368] arm64: KVM: Enable minimalistic support for Cortex-A53 In order to allow KVM to run on Cortex-A53 implementations, wire the minimal support required. Signed-off-by: Marc Zyngier Signed-off-by: Christoffer Dall (cherry picked from commit 1252b3313642c3d0dff5b951b625468bf0dcd059) Signed-off-by: Christoffer Dall --- arch/arm64/include/asm/cputype.h | 1 + arch/arm64/include/uapi/asm/kvm.h | 3 ++- arch/arm64/kvm/guest.c | 2 ++ arch/arm64/kvm/sys_regs_generic_v8.c | 2 ++ 4 files changed, 7 insertions(+), 1 deletion(-) diff --git a/arch/arm64/include/asm/cputype.h b/arch/arm64/include/asm/cputype.h index 5fe138e0b828..343f7f737970 100644 --- a/arch/arm64/include/asm/cputype.h +++ b/arch/arm64/include/asm/cputype.h @@ -41,6 +41,7 @@ #define ARM_CPU_PART_AEM_V8 0xD0F0 #define ARM_CPU_PART_FOUNDATION 0xD000 +#define ARM_CPU_PART_CORTEX_A53 0xD030 #define ARM_CPU_PART_CORTEX_A57 0xD070 #define APM_CPU_PART_POTENZA 0x0000 diff --git a/arch/arm64/include/uapi/asm/kvm.h b/arch/arm64/include/uapi/asm/kvm.h index e6471daf3fb5..e633ff8cdec8 100644 --- a/arch/arm64/include/uapi/asm/kvm.h +++ b/arch/arm64/include/uapi/asm/kvm.h @@ -57,8 +57,9 @@ struct kvm_regs { #define KVM_ARM_TARGET_FOUNDATION_V8 1 #define KVM_ARM_TARGET_CORTEX_A57 2 #define KVM_ARM_TARGET_XGENE_POTENZA 3 +#define KVM_ARM_TARGET_CORTEX_A53 4 -#define KVM_ARM_NUM_TARGETS 4 +#define KVM_ARM_NUM_TARGETS 5 /* KVM_ARM_SET_DEVICE_ADDR ioctl id encoding */ #define KVM_ARM_DEVICE_TYPE_SHIFT 0 diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c index 08745578d54d..60b5c31f3c10 100644 --- a/arch/arm64/kvm/guest.c +++ b/arch/arm64/kvm/guest.c @@ -214,6 +214,8 @@ int __attribute_const__ kvm_target_cpu(void) return KVM_ARM_TARGET_AEM_V8; case ARM_CPU_PART_FOUNDATION: return KVM_ARM_TARGET_FOUNDATION_V8; + case ARM_CPU_PART_CORTEX_A53: + return KVM_ARM_TARGET_CORTEX_A53; case ARM_CPU_PART_CORTEX_A57: return KVM_ARM_TARGET_CORTEX_A57; }; diff --git a/arch/arm64/kvm/sys_regs_generic_v8.c b/arch/arm64/kvm/sys_regs_generic_v8.c index 8fe6f76b0edc..475fd2929310 100644 --- a/arch/arm64/kvm/sys_regs_generic_v8.c +++ b/arch/arm64/kvm/sys_regs_generic_v8.c @@ -88,6 +88,8 @@ static int __init sys_reg_genericv8_init(void) &genericv8_target_table); kvm_register_target_sys_reg_table(KVM_ARM_TARGET_FOUNDATION_V8, &genericv8_target_table); + kvm_register_target_sys_reg_table(KVM_ARM_TARGET_CORTEX_A53, + &genericv8_target_table); kvm_register_target_sys_reg_table(KVM_ARM_TARGET_CORTEX_A57, &genericv8_target_table); kvm_register_target_sys_reg_table(KVM_ARM_TARGET_XGENE_POTENZA, From c3bd5cf97635e0d04a2b0a7025d4a7512a6e6345 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Tue, 3 Jun 2014 13:44:17 +0200 Subject: [PATCH 1261/1368] KVM: add missing cleanup_srcu_struct Reported-by: hrg Signed-off-by: Paolo Bonzini (cherry picked from commit 820b3fcdeb80d30410f4427d2cbf9161c35fdeef) Signed-off-by: Christoffer Dall --- virt/kvm/kvm_main.c | 1 + 1 file changed, 1 insertion(+) diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 17b107fa7a2b..3db56912caed 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -607,6 +607,7 @@ static void kvm_destroy_vm(struct kvm *kvm) kvm_arch_destroy_vm(kvm); kvm_destroy_devices(kvm); kvm_free_physmem(kvm); + cleanup_srcu_struct(&kvm->irq_srcu); cleanup_srcu_struct(&kvm->srcu); kvm_arch_free_vm(kvm); hardware_disable_all(); From 892f5c6841880062cbe947d08131c2c55613ae94 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Mon, 12 May 2014 16:05:13 +0200 Subject: [PATCH 1262/1368] KVM: prepare for KVM_(S|G)ET_MP_STATE on other architectures Highlight the aspects of the ioctls that are actually specific to x86 and ia64. As defined restrictions (irqchip) and mp states may not apply to other architectures, these parts are flagged to belong to x86 and ia64. In preparation for the use of KVM_(S|G)ET_MP_STATE by s390. Fix a spelling error (KVM_SET_MP_STATE vs. KVM_SET_MPSTATE) on the way. Signed-off-by: David Hildenbrand Reviewed-by: Cornelia Huck Acked-by: Christian Borntraeger Signed-off-by: Christian Borntraeger (cherry picked from commit 0b4820d6d8b6448bc9f7fac1bb1a801a53b425e1) Signed-off-by: Christoffer Dall --- Documentation/virtual/kvm/api.txt | 21 ++++++++++++--------- include/uapi/linux/kvm.h | 3 ++- 2 files changed, 14 insertions(+), 10 deletions(-) diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt index 48e7888291f5..8d135672b69a 100644 --- a/Documentation/virtual/kvm/api.txt +++ b/Documentation/virtual/kvm/api.txt @@ -969,18 +969,20 @@ uniprocessor guests). Possible values are: - - KVM_MP_STATE_RUNNABLE: the vcpu is currently running + - KVM_MP_STATE_RUNNABLE: the vcpu is currently running [x86, ia64] - KVM_MP_STATE_UNINITIALIZED: the vcpu is an application processor (AP) - which has not yet received an INIT signal + which has not yet received an INIT signal [x86, + ia64] - KVM_MP_STATE_INIT_RECEIVED: the vcpu has received an INIT signal, and is - now ready for a SIPI + now ready for a SIPI [x86, ia64] - KVM_MP_STATE_HALTED: the vcpu has executed a HLT instruction and - is waiting for an interrupt + is waiting for an interrupt [x86, ia64] - KVM_MP_STATE_SIPI_RECEIVED: the vcpu has just received a SIPI (vector - accessible via KVM_GET_VCPU_EVENTS) + accessible via KVM_GET_VCPU_EVENTS) [x86, ia64] -This ioctl is only useful after KVM_CREATE_IRQCHIP. Without an in-kernel -irqchip, the multiprocessing state must be maintained by userspace. +On x86 and ia64, this ioctl is only useful after KVM_CREATE_IRQCHIP. Without an +in-kernel irqchip, the multiprocessing state must be maintained by userspace on +these architectures. 4.39 KVM_SET_MP_STATE @@ -994,8 +996,9 @@ Returns: 0 on success; -1 on error Sets the vcpu's current "multiprocessing state"; see KVM_GET_MP_STATE for arguments. -This ioctl is only useful after KVM_CREATE_IRQCHIP. Without an in-kernel -irqchip, the multiprocessing state must be maintained by userspace. +On x86 and ia64, this ioctl is only useful after KVM_CREATE_IRQCHIP. Without an +in-kernel irqchip, the multiprocessing state must be maintained by userspace on +these architectures. 4.40 KVM_SET_IDENTITY_MAP_ADDR diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 0a70874e4a63..521e4c0a08ac 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -399,8 +399,9 @@ struct kvm_vapic_addr { __u64 vapic_addr; }; -/* for KVM_SET_MPSTATE */ +/* for KVM_SET_MP_STATE */ +/* not all states are valid on all architectures */ #define KVM_MP_STATE_RUNNABLE 0 #define KVM_MP_STATE_UNINITIALIZED 1 #define KVM_MP_STATE_INIT_RECEIVED 2 From bdcfc46a6f2760588d3e59e2ff7abf235a2a513a Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Fri, 9 May 2014 23:31:31 +0200 Subject: [PATCH 1263/1368] arm/arm64: KVM: Fix and refactor unmap_range unmap_range() was utterly broken, to quote Marc, and broke in all sorts of situations. It was also quite complicated to follow and didn't follow the usual scheme of having a separate iterating function for each level of page tables. Address this by refactoring the code and introduce a pgd_clear() function. Reviewed-by: Jungseok Lee Reviewed-by: Mario Smarduch Acked-by: Marc Zyngier Signed-off-by: Christoffer Dall (cherry picked from commit 4f853a714bf16338ff5261128e6c7ae2569e9505) Signed-off-by: Christoffer Dall --- arch/arm/include/asm/kvm_mmu.h | 12 +++ arch/arm/kvm/mmu.c | 165 ++++++++++++++++--------------- arch/arm64/include/asm/kvm_mmu.h | 15 +++ 3 files changed, 115 insertions(+), 77 deletions(-) diff --git a/arch/arm/include/asm/kvm_mmu.h b/arch/arm/include/asm/kvm_mmu.h index 5c7aa3c1519f..5cc0b0f5f72f 100644 --- a/arch/arm/include/asm/kvm_mmu.h +++ b/arch/arm/include/asm/kvm_mmu.h @@ -127,6 +127,18 @@ static inline void kvm_set_s2pmd_writable(pmd_t *pmd) (__boundary - 1 < (end) - 1)? __boundary: (end); \ }) +static inline bool kvm_page_empty(void *ptr) +{ + struct page *ptr_page = virt_to_page(ptr); + return page_count(ptr_page) == 1; +} + + +#define kvm_pte_table_empty(ptep) kvm_page_empty(ptep) +#define kvm_pmd_table_empty(pmdp) kvm_page_empty(pmdp) +#define kvm_pud_table_empty(pudp) (0) + + struct kvm; #define kvm_flush_dcache_to_poc(a,l) __cpuc_flush_dcache_area((a), (l)) diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c index 16f804938b8f..23360610aeac 100644 --- a/arch/arm/kvm/mmu.c +++ b/arch/arm/kvm/mmu.c @@ -90,104 +90,115 @@ static void *mmu_memory_cache_alloc(struct kvm_mmu_memory_cache *mc) return p; } -static bool page_empty(void *ptr) +static void clear_pgd_entry(struct kvm *kvm, pgd_t *pgd, phys_addr_t addr) { - struct page *ptr_page = virt_to_page(ptr); - return page_count(ptr_page) == 1; + pud_t *pud_table __maybe_unused = pud_offset(pgd, 0); + pgd_clear(pgd); + kvm_tlb_flush_vmid_ipa(kvm, addr); + pud_free(NULL, pud_table); + put_page(virt_to_page(pgd)); } static void clear_pud_entry(struct kvm *kvm, pud_t *pud, phys_addr_t addr) { - if (pud_huge(*pud)) { - pud_clear(pud); - kvm_tlb_flush_vmid_ipa(kvm, addr); - } else { - pmd_t *pmd_table = pmd_offset(pud, 0); - pud_clear(pud); - kvm_tlb_flush_vmid_ipa(kvm, addr); - pmd_free(NULL, pmd_table); - } + pmd_t *pmd_table = pmd_offset(pud, 0); + VM_BUG_ON(pud_huge(*pud)); + pud_clear(pud); + kvm_tlb_flush_vmid_ipa(kvm, addr); + pmd_free(NULL, pmd_table); put_page(virt_to_page(pud)); } static void clear_pmd_entry(struct kvm *kvm, pmd_t *pmd, phys_addr_t addr) { - if (kvm_pmd_huge(*pmd)) { - pmd_clear(pmd); - kvm_tlb_flush_vmid_ipa(kvm, addr); - } else { - pte_t *pte_table = pte_offset_kernel(pmd, 0); - pmd_clear(pmd); - kvm_tlb_flush_vmid_ipa(kvm, addr); - pte_free_kernel(NULL, pte_table); - } + pte_t *pte_table = pte_offset_kernel(pmd, 0); + VM_BUG_ON(kvm_pmd_huge(*pmd)); + pmd_clear(pmd); + kvm_tlb_flush_vmid_ipa(kvm, addr); + pte_free_kernel(NULL, pte_table); put_page(virt_to_page(pmd)); } -static void clear_pte_entry(struct kvm *kvm, pte_t *pte, phys_addr_t addr) +static void unmap_ptes(struct kvm *kvm, pmd_t *pmd, + phys_addr_t addr, phys_addr_t end) { - if (pte_present(*pte)) { - kvm_set_pte(pte, __pte(0)); - put_page(virt_to_page(pte)); - kvm_tlb_flush_vmid_ipa(kvm, addr); - } + phys_addr_t start_addr = addr; + pte_t *pte, *start_pte; + + start_pte = pte = pte_offset_kernel(pmd, addr); + do { + if (!pte_none(*pte)) { + kvm_set_pte(pte, __pte(0)); + put_page(virt_to_page(pte)); + kvm_tlb_flush_vmid_ipa(kvm, addr); + } + } while (pte++, addr += PAGE_SIZE, addr != end); + + if (kvm_pte_table_empty(start_pte)) + clear_pmd_entry(kvm, pmd, start_addr); } -static void unmap_range(struct kvm *kvm, pgd_t *pgdp, - unsigned long long start, u64 size) +static void unmap_pmds(struct kvm *kvm, pud_t *pud, + phys_addr_t addr, phys_addr_t end) { - pgd_t *pgd; - pud_t *pud; - pmd_t *pmd; - pte_t *pte; - unsigned long long addr = start, end = start + size; - u64 next; + phys_addr_t next, start_addr = addr; + pmd_t *pmd, *start_pmd; - while (addr < end) { - pgd = pgdp + pgd_index(addr); - pud = pud_offset(pgd, addr); - pte = NULL; - if (pud_none(*pud)) { - addr = kvm_pud_addr_end(addr, end); - continue; - } - - if (pud_huge(*pud)) { - /* - * If we are dealing with a huge pud, just clear it and - * move on. - */ - clear_pud_entry(kvm, pud, addr); - addr = kvm_pud_addr_end(addr, end); - continue; - } - - pmd = pmd_offset(pud, addr); - if (pmd_none(*pmd)) { - addr = kvm_pmd_addr_end(addr, end); - continue; - } - - if (!kvm_pmd_huge(*pmd)) { - pte = pte_offset_kernel(pmd, addr); - clear_pte_entry(kvm, pte, addr); - next = addr + PAGE_SIZE; - } - - /* - * If the pmd entry is to be cleared, walk back up the ladder - */ - if (kvm_pmd_huge(*pmd) || (pte && page_empty(pte))) { - clear_pmd_entry(kvm, pmd, addr); - next = kvm_pmd_addr_end(addr, end); - if (page_empty(pmd) && !page_empty(pud)) { - clear_pud_entry(kvm, pud, addr); - next = kvm_pud_addr_end(addr, end); + start_pmd = pmd = pmd_offset(pud, addr); + do { + next = kvm_pmd_addr_end(addr, end); + if (!pmd_none(*pmd)) { + if (kvm_pmd_huge(*pmd)) { + pmd_clear(pmd); + kvm_tlb_flush_vmid_ipa(kvm, addr); + put_page(virt_to_page(pmd)); + } else { + unmap_ptes(kvm, pmd, addr, next); } } + } while (pmd++, addr = next, addr != end); - addr = next; - } + if (kvm_pmd_table_empty(start_pmd)) + clear_pud_entry(kvm, pud, start_addr); +} + +static void unmap_puds(struct kvm *kvm, pgd_t *pgd, + phys_addr_t addr, phys_addr_t end) +{ + phys_addr_t next, start_addr = addr; + pud_t *pud, *start_pud; + + start_pud = pud = pud_offset(pgd, addr); + do { + next = kvm_pud_addr_end(addr, end); + if (!pud_none(*pud)) { + if (pud_huge(*pud)) { + pud_clear(pud); + kvm_tlb_flush_vmid_ipa(kvm, addr); + put_page(virt_to_page(pud)); + } else { + unmap_pmds(kvm, pud, addr, next); + } + } + } while (pud++, addr = next, addr != end); + + if (kvm_pud_table_empty(start_pud)) + clear_pgd_entry(kvm, pgd, start_addr); +} + + +static void unmap_range(struct kvm *kvm, pgd_t *pgdp, + phys_addr_t start, u64 size) +{ + pgd_t *pgd; + phys_addr_t addr = start, end = start + size; + phys_addr_t next; + + pgd = pgdp + pgd_index(addr); + do { + next = kvm_pgd_addr_end(addr, end); + unmap_puds(kvm, pgd, addr, next); + } while (pgd++, addr = next, addr != end); } static void stage2_flush_ptes(struct kvm *kvm, pmd_t *pmd, diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h index 7d29847a893b..8e138c7c53ac 100644 --- a/arch/arm64/include/asm/kvm_mmu.h +++ b/arch/arm64/include/asm/kvm_mmu.h @@ -125,6 +125,21 @@ static inline void kvm_set_s2pmd_writable(pmd_t *pmd) #define kvm_pud_addr_end(addr, end) pud_addr_end(addr, end) #define kvm_pmd_addr_end(addr, end) pmd_addr_end(addr, end) +static inline bool kvm_page_empty(void *ptr) +{ + struct page *ptr_page = virt_to_page(ptr); + return page_count(ptr_page) == 1; +} + +#define kvm_pte_table_empty(ptep) kvm_page_empty(ptep) +#ifndef CONFIG_ARM64_64K_PAGES +#define kvm_pmd_table_empty(pmdp) kvm_page_empty(pmdp) +#else +#define kvm_pmd_table_empty(pmdp) (0) +#endif +#define kvm_pud_table_empty(pudp) (0) + + struct kvm; #define kvm_flush_dcache_to_poc(a,l) __flush_dcache_area((a), (l)) From 332075bc83a401bc1a4deb1caeea1d45bd340866 Mon Sep 17 00:00:00 2001 From: Eric Auger Date: Fri, 6 Jun 2014 11:10:23 +0200 Subject: [PATCH 1264/1368] ARM: KVM: Unmap IPA on memslot delete/move Currently when a KVM region is deleted or moved after KVM_SET_USER_MEMORY_REGION ioctl, the corresponding intermediate physical memory is not unmapped. This patch corrects this and unmaps the region's IPA range in kvm_arch_commit_memory_region using unmap_stage2_range. Signed-off-by: Eric Auger Signed-off-by: Christoffer Dall (cherry picked from commit df6ce24f2ee485c4f9a5cb610063a5eb60da8267) Signed-off-by: Christoffer Dall --- arch/arm/kvm/arm.c | 37 ------------------------------------- arch/arm/kvm/mmu.c | 46 ++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 46 insertions(+), 37 deletions(-) diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c index 354dc42b6395..e2c2bfd4da95 100644 --- a/arch/arm/kvm/arm.c +++ b/arch/arm/kvm/arm.c @@ -155,16 +155,6 @@ int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf) return VM_FAULT_SIGBUS; } -void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free, - struct kvm_memory_slot *dont) -{ -} - -int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot, - unsigned long npages) -{ - return 0; -} /** * kvm_arch_destroy_vm - destroy the VM data structure @@ -225,33 +215,6 @@ long kvm_arch_dev_ioctl(struct file *filp, return -EINVAL; } -void kvm_arch_memslots_updated(struct kvm *kvm) -{ -} - -int kvm_arch_prepare_memory_region(struct kvm *kvm, - struct kvm_memory_slot *memslot, - struct kvm_userspace_memory_region *mem, - enum kvm_mr_change change) -{ - return 0; -} - -void kvm_arch_commit_memory_region(struct kvm *kvm, - struct kvm_userspace_memory_region *mem, - const struct kvm_memory_slot *old, - enum kvm_mr_change change) -{ -} - -void kvm_arch_flush_shadow_all(struct kvm *kvm) -{ -} - -void kvm_arch_flush_shadow_memslot(struct kvm *kvm, - struct kvm_memory_slot *slot) -{ -} struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, unsigned int id) { diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c index 23360610aeac..b2a708be1407 100644 --- a/arch/arm/kvm/mmu.c +++ b/arch/arm/kvm/mmu.c @@ -1111,3 +1111,49 @@ int kvm_mmu_init(void) free_hyp_pgds(); return err; } + +void kvm_arch_commit_memory_region(struct kvm *kvm, + struct kvm_userspace_memory_region *mem, + const struct kvm_memory_slot *old, + enum kvm_mr_change change) +{ + gpa_t gpa = old->base_gfn << PAGE_SHIFT; + phys_addr_t size = old->npages << PAGE_SHIFT; + if (change == KVM_MR_DELETE || change == KVM_MR_MOVE) { + spin_lock(&kvm->mmu_lock); + unmap_stage2_range(kvm, gpa, size); + spin_unlock(&kvm->mmu_lock); + } +} + +int kvm_arch_prepare_memory_region(struct kvm *kvm, + struct kvm_memory_slot *memslot, + struct kvm_userspace_memory_region *mem, + enum kvm_mr_change change) +{ + return 0; +} + +void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free, + struct kvm_memory_slot *dont) +{ +} + +int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot, + unsigned long npages) +{ + return 0; +} + +void kvm_arch_memslots_updated(struct kvm *kvm) +{ +} + +void kvm_arch_flush_shadow_all(struct kvm *kvm) +{ +} + +void kvm_arch_flush_shadow_memslot(struct kvm *kvm, + struct kvm_memory_slot *slot) +{ +} From c2ca19e2dc3f71d003a4ab691462d1df6974d00f Mon Sep 17 00:00:00 2001 From: Kim Phillips Date: Thu, 26 Jun 2014 01:45:51 +0100 Subject: [PATCH 1265/1368] ARM: KVM: user_mem_abort: support stage 2 MMIO page mapping A userspace process can map device MMIO memory via VFIO or /dev/mem, e.g., for platform device passthrough support in QEMU. During early development, we found the PAGE_S2 memory type being used for MMIO mappings. This patch corrects that by using the more strongly ordered memory type for device MMIO mappings: PAGE_S2_DEVICE. Signed-off-by: Kim Phillips Acked-by: Christoffer Dall Acked-by: Will Deacon Signed-off-by: Marc Zyngier (cherry picked from commit b88657674d39fc2127d62d0de9ca142e166443c8) Signed-off-by: Christoffer Dall --- arch/arm/kvm/mmu.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c index b2a708be1407..16e7994bf347 100644 --- a/arch/arm/kvm/mmu.c +++ b/arch/arm/kvm/mmu.c @@ -759,6 +759,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, struct kvm_mmu_memory_cache *memcache = &vcpu->arch.mmu_page_cache; struct vm_area_struct *vma; pfn_t pfn; + pgprot_t mem_type = PAGE_S2; write_fault = kvm_is_write_fault(kvm_vcpu_get_hsr(vcpu)); if (fault_status == FSC_PERM && !write_fault) { @@ -809,6 +810,9 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, if (is_error_pfn(pfn)) return -EFAULT; + if (kvm_is_mmio_pfn(pfn)) + mem_type = PAGE_S2_DEVICE; + spin_lock(&kvm->mmu_lock); if (mmu_notifier_retry(kvm, mmu_seq)) goto out_unlock; @@ -816,7 +820,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, hugetlb = transparent_hugepage_adjust(&pfn, &fault_ipa); if (hugetlb) { - pmd_t new_pmd = pfn_pmd(pfn, PAGE_S2); + pmd_t new_pmd = pfn_pmd(pfn, mem_type); new_pmd = pmd_mkhuge(new_pmd); if (writable) { kvm_set_s2pmd_writable(&new_pmd); @@ -825,13 +829,14 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, coherent_cache_guest_page(vcpu, hva & PMD_MASK, PMD_SIZE); ret = stage2_set_pmd_huge(kvm, memcache, fault_ipa, &new_pmd); } else { - pte_t new_pte = pfn_pte(pfn, PAGE_S2); + pte_t new_pte = pfn_pte(pfn, mem_type); if (writable) { kvm_set_s2pte_writable(&new_pte); kvm_set_pfn_dirty(pfn); } coherent_cache_guest_page(vcpu, hva, PAGE_SIZE); - ret = stage2_set_pte(kvm, memcache, fault_ipa, &new_pte, false); + ret = stage2_set_pte(kvm, memcache, fault_ipa, &new_pte, + mem_type == PAGE_S2_DEVICE); } From 896f51b90443e91d7358914df4a24d9c152c2b48 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alex=20Benn=C3=A9e?= Date: Tue, 1 Jul 2014 16:53:13 +0100 Subject: [PATCH 1266/1368] arm64: KVM: export demux regids as KVM_REG_ARM64 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit I suspect this is a -ECUTPASTE fault from the initial implementation. If we don't declare the register ID to be KVM_REG_ARM64 the KVM_GET_ONE_REG implementation kvm_arm_get_reg() returns -EINVAL and hilarity ensues. The kvm/api.txt document describes all arm64 registers as starting with 0x60xx... (i.e KVM_REG_ARM64). Signed-off-by: Alex Bennée Acked-by: Christoffer Dall Acked-by: Marc Zyngier Signed-off-by: Marc Zyngier (cherry picked from commit efd48ceacea78e4d4656aa0a6bf4c5b92ed22130) Signed-off-by: Christoffer Dall --- arch/arm64/kvm/sys_regs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index c59a1bdab5eb..34f25a590bd7 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -962,7 +962,7 @@ static unsigned int num_demux_regs(void) static int write_demux_regids(u64 __user *uindices) { - u64 val = KVM_REG_ARM | KVM_REG_SIZE_U32 | KVM_REG_ARM_DEMUX; + u64 val = KVM_REG_ARM64 | KVM_REG_SIZE_U32 | KVM_REG_ARM_DEMUX; unsigned int i; val |= KVM_REG_ARM_DEMUX_ID_CCSIDR; From c675c7cef37bad776609e6923fe64019c10e2945 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alex=20Benn=C3=A9e?= Date: Fri, 4 Jul 2014 15:54:14 +0100 Subject: [PATCH 1267/1368] arm64: KVM: allow export and import of generic timer regs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit For correct guest suspend/resume behaviour we need to ensure we include the generic timer registers for 64 bit guests. As CONFIG_KVM_ARM_TIMER is always set for arm64 we don't need to worry about null implementations. However I have re-jigged the kvm_arm_timer_set/get_reg declarations to be in the common include/kvm/arm_arch_timer.h headers. Acked-by: Marc Zyngier Reviewed-by: Christoffer Dall Signed-off-by: Alex Bennée Signed-off-by: Christoffer Dall (cherry picked from commit 1df08ba0aa95f1a8832b7162eec51069bd9be7ae) Signed-off-by: Christoffer Dall --- arch/arm/include/asm/kvm_host.h | 3 -- arch/arm/kvm/guest.c | 10 ----- arch/arm64/kvm/guest.c | 68 ++++++++++++++++++++++++++++++++- include/kvm/arm_arch_timer.h | 14 +++++++ 4 files changed, 81 insertions(+), 14 deletions(-) diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h index 193ceaf01bfd..dc4e3edf39cc 100644 --- a/arch/arm/include/asm/kvm_host.h +++ b/arch/arm/include/asm/kvm_host.h @@ -228,7 +228,4 @@ static inline int kvm_arch_dev_ioctl_check_extension(long ext) int kvm_perf_init(void); int kvm_perf_teardown(void); -u64 kvm_arm_timer_get_reg(struct kvm_vcpu *, u64 regid); -int kvm_arm_timer_set_reg(struct kvm_vcpu *, u64 regid, u64 value); - #endif /* __ARM_KVM_HOST_H__ */ diff --git a/arch/arm/kvm/guest.c b/arch/arm/kvm/guest.c index b23a59c1c522..986e625b5dbd 100644 --- a/arch/arm/kvm/guest.c +++ b/arch/arm/kvm/guest.c @@ -124,16 +124,6 @@ static bool is_timer_reg(u64 index) return false; } -int kvm_arm_timer_set_reg(struct kvm_vcpu *vcpu, u64 regid, u64 value) -{ - return 0; -} - -u64 kvm_arm_timer_get_reg(struct kvm_vcpu *vcpu, u64 regid) -{ - return 0; -} - #else #define NUM_TIMER_REGS 3 diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c index 60b5c31f3c10..8d1ec2887a26 100644 --- a/arch/arm64/kvm/guest.c +++ b/arch/arm64/kvm/guest.c @@ -135,6 +135,59 @@ static unsigned long num_core_regs(void) return sizeof(struct kvm_regs) / sizeof(__u32); } +/** + * ARM64 versions of the TIMER registers, always available on arm64 + */ + +#define NUM_TIMER_REGS 3 + +static bool is_timer_reg(u64 index) +{ + switch (index) { + case KVM_REG_ARM_TIMER_CTL: + case KVM_REG_ARM_TIMER_CNT: + case KVM_REG_ARM_TIMER_CVAL: + return true; + } + return false; +} + +static int copy_timer_indices(struct kvm_vcpu *vcpu, u64 __user *uindices) +{ + if (put_user(KVM_REG_ARM_TIMER_CTL, uindices)) + return -EFAULT; + uindices++; + if (put_user(KVM_REG_ARM_TIMER_CNT, uindices)) + return -EFAULT; + uindices++; + if (put_user(KVM_REG_ARM_TIMER_CVAL, uindices)) + return -EFAULT; + + return 0; +} + +static int set_timer_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) +{ + void __user *uaddr = (void __user *)(long)reg->addr; + u64 val; + int ret; + + ret = copy_from_user(&val, uaddr, KVM_REG_SIZE(reg->id)); + if (ret != 0) + return ret; + + return kvm_arm_timer_set_reg(vcpu, reg->id, val); +} + +static int get_timer_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) +{ + void __user *uaddr = (void __user *)(long)reg->addr; + u64 val; + + val = kvm_arm_timer_get_reg(vcpu, reg->id); + return copy_to_user(uaddr, &val, KVM_REG_SIZE(reg->id)); +} + /** * kvm_arm_num_regs - how many registers do we present via KVM_GET_ONE_REG * @@ -142,7 +195,8 @@ static unsigned long num_core_regs(void) */ unsigned long kvm_arm_num_regs(struct kvm_vcpu *vcpu) { - return num_core_regs() + kvm_arm_num_sys_reg_descs(vcpu); + return num_core_regs() + kvm_arm_num_sys_reg_descs(vcpu) + + NUM_TIMER_REGS; } /** @@ -154,6 +208,7 @@ int kvm_arm_copy_reg_indices(struct kvm_vcpu *vcpu, u64 __user *uindices) { unsigned int i; const u64 core_reg = KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE; + int ret; for (i = 0; i < sizeof(struct kvm_regs) / sizeof(__u32); i++) { if (put_user(core_reg | i, uindices)) @@ -161,6 +216,11 @@ int kvm_arm_copy_reg_indices(struct kvm_vcpu *vcpu, u64 __user *uindices) uindices++; } + ret = copy_timer_indices(vcpu, uindices); + if (ret) + return ret; + uindices += NUM_TIMER_REGS; + return kvm_arm_copy_sys_reg_indices(vcpu, uindices); } @@ -174,6 +234,9 @@ int kvm_arm_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) if ((reg->id & KVM_REG_ARM_COPROC_MASK) == KVM_REG_ARM_CORE) return get_core_reg(vcpu, reg); + if (is_timer_reg(reg->id)) + return get_timer_reg(vcpu, reg); + return kvm_arm_sys_reg_get_reg(vcpu, reg); } @@ -187,6 +250,9 @@ int kvm_arm_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) if ((reg->id & KVM_REG_ARM_COPROC_MASK) == KVM_REG_ARM_CORE) return set_core_reg(vcpu, reg); + if (is_timer_reg(reg->id)) + return set_timer_reg(vcpu, reg); + return kvm_arm_sys_reg_set_reg(vcpu, reg); } diff --git a/include/kvm/arm_arch_timer.h b/include/kvm/arm_arch_timer.h index 6d9aeddc09bf..ad9db6045b2f 100644 --- a/include/kvm/arm_arch_timer.h +++ b/include/kvm/arm_arch_timer.h @@ -67,6 +67,10 @@ void kvm_timer_vcpu_init(struct kvm_vcpu *vcpu); void kvm_timer_flush_hwstate(struct kvm_vcpu *vcpu); void kvm_timer_sync_hwstate(struct kvm_vcpu *vcpu); void kvm_timer_vcpu_terminate(struct kvm_vcpu *vcpu); + +u64 kvm_arm_timer_get_reg(struct kvm_vcpu *, u64 regid); +int kvm_arm_timer_set_reg(struct kvm_vcpu *, u64 regid, u64 value); + #else static inline int kvm_timer_hyp_init(void) { @@ -84,6 +88,16 @@ static inline void kvm_timer_vcpu_init(struct kvm_vcpu *vcpu) {} static inline void kvm_timer_flush_hwstate(struct kvm_vcpu *vcpu) {} static inline void kvm_timer_sync_hwstate(struct kvm_vcpu *vcpu) {} static inline void kvm_timer_vcpu_terminate(struct kvm_vcpu *vcpu) {} + +static inline int kvm_arm_timer_set_reg(struct kvm_vcpu *vcpu, u64 regid, u64 value) +{ + return 0; +} + +static inline u64 kvm_arm_timer_get_reg(struct kvm_vcpu *vcpu, u64 regid) +{ + return 0; +} #endif #endif From 7f124577d6247ae088a0acb0762297a5bdaeb9a3 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 30 May 2013 10:20:36 +0100 Subject: [PATCH 1268/1368] KVM: arm/arm64: vgic: move GICv2 registers to their own structure In order to make way for the GICv3 registers, move the v2-specific registers to their own structure. Acked-by: Catalin Marinas Reviewed-by: Christoffer Dall Signed-off-by: Marc Zyngier (cherry picked from commit eede821dbfd58df89edb072da64e006321eaef58) Signed-off-by: Christoffer Dall --- arch/arm/kernel/asm-offsets.c | 14 ++++----- arch/arm/kvm/interrupts_head.S | 26 +++++++-------- arch/arm64/kernel/asm-offsets.c | 14 ++++----- arch/arm64/kvm/hyp.S | 26 +++++++-------- include/kvm/arm_vgic.h | 20 +++++++----- virt/kvm/arm/vgic.c | 56 ++++++++++++++++----------------- 6 files changed, 81 insertions(+), 75 deletions(-) diff --git a/arch/arm/kernel/asm-offsets.c b/arch/arm/kernel/asm-offsets.c index aa2acc1dd986..776d9186e9c1 100644 --- a/arch/arm/kernel/asm-offsets.c +++ b/arch/arm/kernel/asm-offsets.c @@ -176,13 +176,13 @@ int main(void) DEFINE(VCPU_HYP_PC, offsetof(struct kvm_vcpu, arch.fault.hyp_pc)); #ifdef CONFIG_KVM_ARM_VGIC DEFINE(VCPU_VGIC_CPU, offsetof(struct kvm_vcpu, arch.vgic_cpu)); - DEFINE(VGIC_CPU_HCR, offsetof(struct vgic_cpu, vgic_hcr)); - DEFINE(VGIC_CPU_VMCR, offsetof(struct vgic_cpu, vgic_vmcr)); - DEFINE(VGIC_CPU_MISR, offsetof(struct vgic_cpu, vgic_misr)); - DEFINE(VGIC_CPU_EISR, offsetof(struct vgic_cpu, vgic_eisr)); - DEFINE(VGIC_CPU_ELRSR, offsetof(struct vgic_cpu, vgic_elrsr)); - DEFINE(VGIC_CPU_APR, offsetof(struct vgic_cpu, vgic_apr)); - DEFINE(VGIC_CPU_LR, offsetof(struct vgic_cpu, vgic_lr)); + DEFINE(VGIC_V2_CPU_HCR, offsetof(struct vgic_cpu, vgic_v2.vgic_hcr)); + DEFINE(VGIC_V2_CPU_VMCR, offsetof(struct vgic_cpu, vgic_v2.vgic_vmcr)); + DEFINE(VGIC_V2_CPU_MISR, offsetof(struct vgic_cpu, vgic_v2.vgic_misr)); + DEFINE(VGIC_V2_CPU_EISR, offsetof(struct vgic_cpu, vgic_v2.vgic_eisr)); + DEFINE(VGIC_V2_CPU_ELRSR, offsetof(struct vgic_cpu, vgic_v2.vgic_elrsr)); + DEFINE(VGIC_V2_CPU_APR, offsetof(struct vgic_cpu, vgic_v2.vgic_apr)); + DEFINE(VGIC_V2_CPU_LR, offsetof(struct vgic_cpu, vgic_v2.vgic_lr)); DEFINE(VGIC_CPU_NR_LR, offsetof(struct vgic_cpu, nr_lr)); #ifdef CONFIG_KVM_ARM_TIMER DEFINE(VCPU_TIMER_CNTV_CTL, offsetof(struct kvm_vcpu, arch.timer_cpu.cntv_ctl)); diff --git a/arch/arm/kvm/interrupts_head.S b/arch/arm/kvm/interrupts_head.S index 76af93025574..e4eaf30205c5 100644 --- a/arch/arm/kvm/interrupts_head.S +++ b/arch/arm/kvm/interrupts_head.S @@ -421,14 +421,14 @@ vcpu .req r0 @ vcpu pointer always in r0 ldr r9, [r2, #GICH_ELRSR1] ldr r10, [r2, #GICH_APR] - str r3, [r11, #VGIC_CPU_HCR] - str r4, [r11, #VGIC_CPU_VMCR] - str r5, [r11, #VGIC_CPU_MISR] - str r6, [r11, #VGIC_CPU_EISR] - str r7, [r11, #(VGIC_CPU_EISR + 4)] - str r8, [r11, #VGIC_CPU_ELRSR] - str r9, [r11, #(VGIC_CPU_ELRSR + 4)] - str r10, [r11, #VGIC_CPU_APR] + str r3, [r11, #VGIC_V2_CPU_HCR] + str r4, [r11, #VGIC_V2_CPU_VMCR] + str r5, [r11, #VGIC_V2_CPU_MISR] + str r6, [r11, #VGIC_V2_CPU_EISR] + str r7, [r11, #(VGIC_V2_CPU_EISR + 4)] + str r8, [r11, #VGIC_V2_CPU_ELRSR] + str r9, [r11, #(VGIC_V2_CPU_ELRSR + 4)] + str r10, [r11, #VGIC_V2_CPU_APR] /* Clear GICH_HCR */ mov r5, #0 @@ -436,7 +436,7 @@ vcpu .req r0 @ vcpu pointer always in r0 /* Save list registers */ add r2, r2, #GICH_LR0 - add r3, r11, #VGIC_CPU_LR + add r3, r11, #VGIC_V2_CPU_LR ldr r4, [r11, #VGIC_CPU_NR_LR] 1: ldr r6, [r2], #4 str r6, [r3], #4 @@ -463,9 +463,9 @@ vcpu .req r0 @ vcpu pointer always in r0 add r11, vcpu, #VCPU_VGIC_CPU /* We only restore a minimal set of registers */ - ldr r3, [r11, #VGIC_CPU_HCR] - ldr r4, [r11, #VGIC_CPU_VMCR] - ldr r8, [r11, #VGIC_CPU_APR] + ldr r3, [r11, #VGIC_V2_CPU_HCR] + ldr r4, [r11, #VGIC_V2_CPU_VMCR] + ldr r8, [r11, #VGIC_V2_CPU_APR] str r3, [r2, #GICH_HCR] str r4, [r2, #GICH_VMCR] @@ -473,7 +473,7 @@ vcpu .req r0 @ vcpu pointer always in r0 /* Restore list registers */ add r2, r2, #GICH_LR0 - add r3, r11, #VGIC_CPU_LR + add r3, r11, #VGIC_V2_CPU_LR ldr r4, [r11, #VGIC_CPU_NR_LR] 1: ldr r6, [r3], #4 str r6, [r2], #4 diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c index 666e231d410b..dcfd8a616a94 100644 --- a/arch/arm64/kernel/asm-offsets.c +++ b/arch/arm64/kernel/asm-offsets.c @@ -127,13 +127,13 @@ int main(void) DEFINE(KVM_TIMER_ENABLED, offsetof(struct kvm, arch.timer.enabled)); DEFINE(VCPU_KVM, offsetof(struct kvm_vcpu, kvm)); DEFINE(VCPU_VGIC_CPU, offsetof(struct kvm_vcpu, arch.vgic_cpu)); - DEFINE(VGIC_CPU_HCR, offsetof(struct vgic_cpu, vgic_hcr)); - DEFINE(VGIC_CPU_VMCR, offsetof(struct vgic_cpu, vgic_vmcr)); - DEFINE(VGIC_CPU_MISR, offsetof(struct vgic_cpu, vgic_misr)); - DEFINE(VGIC_CPU_EISR, offsetof(struct vgic_cpu, vgic_eisr)); - DEFINE(VGIC_CPU_ELRSR, offsetof(struct vgic_cpu, vgic_elrsr)); - DEFINE(VGIC_CPU_APR, offsetof(struct vgic_cpu, vgic_apr)); - DEFINE(VGIC_CPU_LR, offsetof(struct vgic_cpu, vgic_lr)); + DEFINE(VGIC_V2_CPU_HCR, offsetof(struct vgic_cpu, vgic_v2.vgic_hcr)); + DEFINE(VGIC_V2_CPU_VMCR, offsetof(struct vgic_cpu, vgic_v2.vgic_vmcr)); + DEFINE(VGIC_V2_CPU_MISR, offsetof(struct vgic_cpu, vgic_v2.vgic_misr)); + DEFINE(VGIC_V2_CPU_EISR, offsetof(struct vgic_cpu, vgic_v2.vgic_eisr)); + DEFINE(VGIC_V2_CPU_ELRSR, offsetof(struct vgic_cpu, vgic_v2.vgic_elrsr)); + DEFINE(VGIC_V2_CPU_APR, offsetof(struct vgic_cpu, vgic_v2.vgic_apr)); + DEFINE(VGIC_V2_CPU_LR, offsetof(struct vgic_cpu, vgic_v2.vgic_lr)); DEFINE(VGIC_CPU_NR_LR, offsetof(struct vgic_cpu, nr_lr)); DEFINE(KVM_VTTBR, offsetof(struct kvm, arch.vttbr)); DEFINE(KVM_VGIC_VCTRL, offsetof(struct kvm, arch.vgic.vctrl_base)); diff --git a/arch/arm64/kvm/hyp.S b/arch/arm64/kvm/hyp.S index b0d1512acf08..877d82a134bc 100644 --- a/arch/arm64/kvm/hyp.S +++ b/arch/arm64/kvm/hyp.S @@ -412,14 +412,14 @@ CPU_BE( rev w9, w9 ) CPU_BE( rev w10, w10 ) CPU_BE( rev w11, w11 ) - str w4, [x3, #VGIC_CPU_HCR] - str w5, [x3, #VGIC_CPU_VMCR] - str w6, [x3, #VGIC_CPU_MISR] - str w7, [x3, #VGIC_CPU_EISR] - str w8, [x3, #(VGIC_CPU_EISR + 4)] - str w9, [x3, #VGIC_CPU_ELRSR] - str w10, [x3, #(VGIC_CPU_ELRSR + 4)] - str w11, [x3, #VGIC_CPU_APR] + str w4, [x3, #VGIC_V2_CPU_HCR] + str w5, [x3, #VGIC_V2_CPU_VMCR] + str w6, [x3, #VGIC_V2_CPU_MISR] + str w7, [x3, #VGIC_V2_CPU_EISR] + str w8, [x3, #(VGIC_V2_CPU_EISR + 4)] + str w9, [x3, #VGIC_V2_CPU_ELRSR] + str w10, [x3, #(VGIC_V2_CPU_ELRSR + 4)] + str w11, [x3, #VGIC_V2_CPU_APR] /* Clear GICH_HCR */ str wzr, [x2, #GICH_HCR] @@ -427,7 +427,7 @@ CPU_BE( rev w11, w11 ) /* Save list registers */ add x2, x2, #GICH_LR0 ldr w4, [x3, #VGIC_CPU_NR_LR] - add x3, x3, #VGIC_CPU_LR + add x3, x3, #VGIC_V2_CPU_LR 1: ldr w5, [x2], #4 CPU_BE( rev w5, w5 ) str w5, [x3], #4 @@ -452,9 +452,9 @@ CPU_BE( rev w5, w5 ) add x3, x0, #VCPU_VGIC_CPU /* We only restore a minimal set of registers */ - ldr w4, [x3, #VGIC_CPU_HCR] - ldr w5, [x3, #VGIC_CPU_VMCR] - ldr w6, [x3, #VGIC_CPU_APR] + ldr w4, [x3, #VGIC_V2_CPU_HCR] + ldr w5, [x3, #VGIC_V2_CPU_VMCR] + ldr w6, [x3, #VGIC_V2_CPU_APR] CPU_BE( rev w4, w4 ) CPU_BE( rev w5, w5 ) CPU_BE( rev w6, w6 ) @@ -466,7 +466,7 @@ CPU_BE( rev w6, w6 ) /* Restore list registers */ add x2, x2, #GICH_LR0 ldr w4, [x3, #VGIC_CPU_NR_LR] - add x3, x3, #VGIC_CPU_LR + add x3, x3, #VGIC_V2_CPU_LR 1: ldr w5, [x3], #4 CPU_BE( rev w5, w5 ) str w5, [x2], #4 diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h index f27000f55a83..f738e5a69ee9 100644 --- a/include/kvm/arm_vgic.h +++ b/include/kvm/arm_vgic.h @@ -110,6 +110,16 @@ struct vgic_dist { #endif }; +struct vgic_v2_cpu_if { + u32 vgic_hcr; + u32 vgic_vmcr; + u32 vgic_misr; /* Saved only */ + u32 vgic_eisr[2]; /* Saved only */ + u32 vgic_elrsr[2]; /* Saved only */ + u32 vgic_apr; + u32 vgic_lr[VGIC_MAX_LRS]; +}; + struct vgic_cpu { #ifdef CONFIG_KVM_ARM_VGIC /* per IRQ to LR mapping */ @@ -126,13 +136,9 @@ struct vgic_cpu { int nr_lr; /* CPU vif control registers for world switch */ - u32 vgic_hcr; - u32 vgic_vmcr; - u32 vgic_misr; /* Saved only */ - u32 vgic_eisr[2]; /* Saved only */ - u32 vgic_elrsr[2]; /* Saved only */ - u32 vgic_apr; - u32 vgic_lr[VGIC_MAX_LRS]; + union { + struct vgic_v2_cpu_if vgic_v2; + }; #endif }; diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c index 56ff9bebb577..0ba1ab0721fd 100644 --- a/virt/kvm/arm/vgic.c +++ b/virt/kvm/arm/vgic.c @@ -601,7 +601,7 @@ static bool handle_mmio_sgi_reg(struct kvm_vcpu *vcpu, static void vgic_retire_lr(int lr_nr, int irq, struct vgic_cpu *vgic_cpu) { clear_bit(lr_nr, vgic_cpu->lr_used); - vgic_cpu->vgic_lr[lr_nr] &= ~GICH_LR_STATE; + vgic_cpu->vgic_v2.vgic_lr[lr_nr] &= ~GICH_LR_STATE; vgic_cpu->vgic_irq_lr_map[irq] = LR_EMPTY; } @@ -626,7 +626,7 @@ static void vgic_unqueue_irqs(struct kvm_vcpu *vcpu) u32 *lr; for_each_set_bit(i, vgic_cpu->lr_used, vgic_cpu->nr_lr) { - lr = &vgic_cpu->vgic_lr[i]; + lr = &vgic_cpu->vgic_v2.vgic_lr[i]; irq = LR_IRQID(*lr); source_cpu = LR_CPUID(*lr); @@ -1007,7 +1007,7 @@ static void vgic_retire_disabled_irqs(struct kvm_vcpu *vcpu) int lr; for_each_set_bit(lr, vgic_cpu->lr_used, vgic_cpu->nr_lr) { - int irq = vgic_cpu->vgic_lr[lr] & GICH_LR_VIRTUALID; + int irq = vgic_cpu->vgic_v2.vgic_lr[lr] & GICH_LR_VIRTUALID; if (!vgic_irq_is_enabled(vcpu, irq)) { vgic_retire_lr(lr, irq, vgic_cpu); @@ -1037,11 +1037,11 @@ static bool vgic_queue_irq(struct kvm_vcpu *vcpu, u8 sgi_source_id, int irq) /* Do we have an active interrupt for the same CPUID? */ if (lr != LR_EMPTY && - (LR_CPUID(vgic_cpu->vgic_lr[lr]) == sgi_source_id)) { + (LR_CPUID(vgic_cpu->vgic_v2.vgic_lr[lr]) == sgi_source_id)) { kvm_debug("LR%d piggyback for IRQ%d %x\n", - lr, irq, vgic_cpu->vgic_lr[lr]); + lr, irq, vgic_cpu->vgic_v2.vgic_lr[lr]); BUG_ON(!test_bit(lr, vgic_cpu->lr_used)); - vgic_cpu->vgic_lr[lr] |= GICH_LR_PENDING_BIT; + vgic_cpu->vgic_v2.vgic_lr[lr] |= GICH_LR_PENDING_BIT; return true; } @@ -1052,12 +1052,12 @@ static bool vgic_queue_irq(struct kvm_vcpu *vcpu, u8 sgi_source_id, int irq) return false; kvm_debug("LR%d allocated for IRQ%d %x\n", lr, irq, sgi_source_id); - vgic_cpu->vgic_lr[lr] = MK_LR_PEND(sgi_source_id, irq); + vgic_cpu->vgic_v2.vgic_lr[lr] = MK_LR_PEND(sgi_source_id, irq); vgic_cpu->vgic_irq_lr_map[irq] = lr; set_bit(lr, vgic_cpu->lr_used); if (!vgic_irq_is_edge(vcpu, irq)) - vgic_cpu->vgic_lr[lr] |= GICH_LR_EOI; + vgic_cpu->vgic_v2.vgic_lr[lr] |= GICH_LR_EOI; return true; } @@ -1155,9 +1155,9 @@ static void __kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu) epilog: if (overflow) { - vgic_cpu->vgic_hcr |= GICH_HCR_UIE; + vgic_cpu->vgic_v2.vgic_hcr |= GICH_HCR_UIE; } else { - vgic_cpu->vgic_hcr &= ~GICH_HCR_UIE; + vgic_cpu->vgic_v2.vgic_hcr &= ~GICH_HCR_UIE; /* * We're about to run this VCPU, and we've consumed * everything the distributor had in store for @@ -1173,21 +1173,21 @@ static bool vgic_process_maintenance(struct kvm_vcpu *vcpu) struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; bool level_pending = false; - kvm_debug("MISR = %08x\n", vgic_cpu->vgic_misr); + kvm_debug("MISR = %08x\n", vgic_cpu->vgic_v2.vgic_misr); - if (vgic_cpu->vgic_misr & GICH_MISR_EOI) { + if (vgic_cpu->vgic_v2.vgic_misr & GICH_MISR_EOI) { /* * Some level interrupts have been EOIed. Clear their * active bit. */ int lr, irq; - for_each_set_bit(lr, (unsigned long *)vgic_cpu->vgic_eisr, + for_each_set_bit(lr, (unsigned long *)vgic_cpu->vgic_v2.vgic_eisr, vgic_cpu->nr_lr) { - irq = vgic_cpu->vgic_lr[lr] & GICH_LR_VIRTUALID; + irq = vgic_cpu->vgic_v2.vgic_lr[lr] & GICH_LR_VIRTUALID; vgic_irq_clear_active(vcpu, irq); - vgic_cpu->vgic_lr[lr] &= ~GICH_LR_EOI; + vgic_cpu->vgic_v2.vgic_lr[lr] &= ~GICH_LR_EOI; /* Any additional pending interrupt? */ if (vgic_dist_irq_is_pending(vcpu, irq)) { @@ -1201,13 +1201,13 @@ static bool vgic_process_maintenance(struct kvm_vcpu *vcpu) * Despite being EOIed, the LR may not have * been marked as empty. */ - set_bit(lr, (unsigned long *)vgic_cpu->vgic_elrsr); - vgic_cpu->vgic_lr[lr] &= ~GICH_LR_ACTIVE_BIT; + set_bit(lr, (unsigned long *)vgic_cpu->vgic_v2.vgic_elrsr); + vgic_cpu->vgic_v2.vgic_lr[lr] &= ~GICH_LR_ACTIVE_BIT; } } - if (vgic_cpu->vgic_misr & GICH_MISR_U) - vgic_cpu->vgic_hcr &= ~GICH_HCR_UIE; + if (vgic_cpu->vgic_v2.vgic_misr & GICH_MISR_U) + vgic_cpu->vgic_v2.vgic_hcr &= ~GICH_HCR_UIE; return level_pending; } @@ -1226,21 +1226,21 @@ static void __kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu) level_pending = vgic_process_maintenance(vcpu); /* Clear mappings for empty LRs */ - for_each_set_bit(lr, (unsigned long *)vgic_cpu->vgic_elrsr, + for_each_set_bit(lr, (unsigned long *)vgic_cpu->vgic_v2.vgic_elrsr, vgic_cpu->nr_lr) { int irq; if (!test_and_clear_bit(lr, vgic_cpu->lr_used)) continue; - irq = vgic_cpu->vgic_lr[lr] & GICH_LR_VIRTUALID; + irq = vgic_cpu->vgic_v2.vgic_lr[lr] & GICH_LR_VIRTUALID; BUG_ON(irq >= VGIC_NR_IRQS); vgic_cpu->vgic_irq_lr_map[irq] = LR_EMPTY; } /* Check if we still have something up our sleeve... */ - pending = find_first_zero_bit((unsigned long *)vgic_cpu->vgic_elrsr, + pending = find_first_zero_bit((unsigned long *)vgic_cpu->vgic_v2.vgic_elrsr, vgic_cpu->nr_lr); if (level_pending || pending < vgic_cpu->nr_lr) set_bit(vcpu->vcpu_id, &dist->irq_pending_on_cpu); @@ -1436,10 +1436,10 @@ int kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu) * points to their reset values. Anything else resets to zero * anyway. */ - vgic_cpu->vgic_vmcr = 0; + vgic_cpu->vgic_v2.vgic_vmcr = 0; vgic_cpu->nr_lr = vgic_nr_lr; - vgic_cpu->vgic_hcr = GICH_HCR_EN; /* Get the show on the road... */ + vgic_cpu->vgic_v2.vgic_hcr = GICH_HCR_EN; /* Get the show on the road... */ return 0; } @@ -1746,15 +1746,15 @@ static bool handle_cpu_mmio_misc(struct kvm_vcpu *vcpu, } if (!mmio->is_write) { - reg = (vgic_cpu->vgic_vmcr & mask) >> shift; + reg = (vgic_cpu->vgic_v2.vgic_vmcr & mask) >> shift; mmio_data_write(mmio, ~0, reg); } else { reg = mmio_data_read(mmio, ~0); reg = (reg << shift) & mask; - if (reg != (vgic_cpu->vgic_vmcr & mask)) + if (reg != (vgic_cpu->vgic_v2.vgic_vmcr & mask)) updated = true; - vgic_cpu->vgic_vmcr &= ~mask; - vgic_cpu->vgic_vmcr |= reg; + vgic_cpu->vgic_v2.vgic_vmcr &= ~mask; + vgic_cpu->vgic_v2.vgic_vmcr |= reg; } return updated; } From 9f92df7009267f1a105e1338def7e99da648417c Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 3 Jun 2013 15:55:02 +0100 Subject: [PATCH 1269/1368] KVM: ARM: vgic: introduce vgic_ops and LR manipulation primitives In order to split the various register manipulation from the main vgic code, introduce a vgic_ops structure, and start by abstracting the LR manipulation code with a couple of accessors. Reviewed-by: Christoffer Dall Signed-off-by: Marc Zyngier (cherry picked from commit 8d5c6b06a5d5f8ebcf40558e566781d572920740) Signed-off-by: Christoffer Dall --- include/kvm/arm_vgic.h | 18 +++++ virt/kvm/arm/vgic.c | 162 ++++++++++++++++++++++++++++------------- 2 files changed, 128 insertions(+), 52 deletions(-) diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h index f738e5a69ee9..17bbe51b79a1 100644 --- a/include/kvm/arm_vgic.h +++ b/include/kvm/arm_vgic.h @@ -68,6 +68,24 @@ struct vgic_bytemap { u32 shared[VGIC_NR_SHARED_IRQS / 4]; }; +struct kvm_vcpu; + +#define LR_STATE_PENDING (1 << 0) +#define LR_STATE_ACTIVE (1 << 1) +#define LR_STATE_MASK (3 << 0) +#define LR_EOI_INT (1 << 2) + +struct vgic_lr { + u16 irq; + u8 source; + u8 state; +}; + +struct vgic_ops { + struct vgic_lr (*get_lr)(const struct kvm_vcpu *, int); + void (*set_lr)(struct kvm_vcpu *, int, struct vgic_lr); +}; + struct vgic_dist { #ifdef CONFIG_KVM_ARM_VGIC spinlock_t lock; diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c index 0ba1ab0721fd..11408fee600e 100644 --- a/virt/kvm/arm/vgic.c +++ b/virt/kvm/arm/vgic.c @@ -94,9 +94,12 @@ static struct device_node *vgic_node; #define ACCESS_WRITE_MASK(x) ((x) & (3 << 1)) static void vgic_retire_disabled_irqs(struct kvm_vcpu *vcpu); +static void vgic_retire_lr(int lr_nr, int irq, struct kvm_vcpu *vcpu); static void vgic_update_state(struct kvm *kvm); static void vgic_kick_vcpus(struct kvm *kvm); static void vgic_dispatch_sgi(struct kvm_vcpu *vcpu, u32 reg); +static struct vgic_lr vgic_get_lr(const struct kvm_vcpu *vcpu, int lr); +static void vgic_set_lr(struct kvm_vcpu *vcpu, int lr, struct vgic_lr lr_desc); static u32 vgic_nr_lr; static unsigned int vgic_maint_irq; @@ -593,18 +596,6 @@ static bool handle_mmio_sgi_reg(struct kvm_vcpu *vcpu, return false; } -#define LR_CPUID(lr) \ - (((lr) & GICH_LR_PHYSID_CPUID) >> GICH_LR_PHYSID_CPUID_SHIFT) -#define LR_IRQID(lr) \ - ((lr) & GICH_LR_VIRTUALID) - -static void vgic_retire_lr(int lr_nr, int irq, struct vgic_cpu *vgic_cpu) -{ - clear_bit(lr_nr, vgic_cpu->lr_used); - vgic_cpu->vgic_v2.vgic_lr[lr_nr] &= ~GICH_LR_STATE; - vgic_cpu->vgic_irq_lr_map[irq] = LR_EMPTY; -} - /** * vgic_unqueue_irqs - move pending IRQs from LRs to the distributor * @vgic_cpu: Pointer to the vgic_cpu struct holding the LRs @@ -622,13 +613,10 @@ static void vgic_unqueue_irqs(struct kvm_vcpu *vcpu) struct vgic_dist *dist = &vcpu->kvm->arch.vgic; struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; int vcpu_id = vcpu->vcpu_id; - int i, irq, source_cpu; - u32 *lr; + int i; for_each_set_bit(i, vgic_cpu->lr_used, vgic_cpu->nr_lr) { - lr = &vgic_cpu->vgic_v2.vgic_lr[i]; - irq = LR_IRQID(*lr); - source_cpu = LR_CPUID(*lr); + struct vgic_lr lr = vgic_get_lr(vcpu, i); /* * There are three options for the state bits: @@ -640,7 +628,7 @@ static void vgic_unqueue_irqs(struct kvm_vcpu *vcpu) * If the LR holds only an active interrupt (not pending) then * just leave it alone. */ - if ((*lr & GICH_LR_STATE) == GICH_LR_ACTIVE_BIT) + if ((lr.state & LR_STATE_MASK) == LR_STATE_ACTIVE) continue; /* @@ -649,18 +637,19 @@ static void vgic_unqueue_irqs(struct kvm_vcpu *vcpu) * is fine, then we are only setting a few bits that were * already set. */ - vgic_dist_irq_set(vcpu, irq); - if (irq < VGIC_NR_SGIS) - dist->irq_sgi_sources[vcpu_id][irq] |= 1 << source_cpu; - *lr &= ~GICH_LR_PENDING_BIT; + vgic_dist_irq_set(vcpu, lr.irq); + if (lr.irq < VGIC_NR_SGIS) + dist->irq_sgi_sources[vcpu_id][lr.irq] |= 1 << lr.source; + lr.state &= ~LR_STATE_PENDING; + vgic_set_lr(vcpu, i, lr); /* * If there's no state left on the LR (it could still be * active), then the LR does not hold any useful info and can * be marked as free for other use. */ - if (!(*lr & GICH_LR_STATE)) - vgic_retire_lr(i, irq, vgic_cpu); + if (!(lr.state & LR_STATE_MASK)) + vgic_retire_lr(i, lr.irq, vcpu); /* Finally update the VGIC state. */ vgic_update_state(vcpu->kvm); @@ -989,8 +978,69 @@ static void vgic_update_state(struct kvm *kvm) } } -#define MK_LR_PEND(src, irq) \ - (GICH_LR_PENDING_BIT | ((src) << GICH_LR_PHYSID_CPUID_SHIFT) | (irq)) +static struct vgic_lr vgic_v2_get_lr(const struct kvm_vcpu *vcpu, int lr) +{ + struct vgic_lr lr_desc; + u32 val = vcpu->arch.vgic_cpu.vgic_v2.vgic_lr[lr]; + + lr_desc.irq = val & GICH_LR_VIRTUALID; + if (lr_desc.irq <= 15) + lr_desc.source = (val >> GICH_LR_PHYSID_CPUID_SHIFT) & 0x7; + else + lr_desc.source = 0; + lr_desc.state = 0; + + if (val & GICH_LR_PENDING_BIT) + lr_desc.state |= LR_STATE_PENDING; + if (val & GICH_LR_ACTIVE_BIT) + lr_desc.state |= LR_STATE_ACTIVE; + if (val & GICH_LR_EOI) + lr_desc.state |= LR_EOI_INT; + + return lr_desc; +} + +static void vgic_v2_set_lr(struct kvm_vcpu *vcpu, int lr, + struct vgic_lr lr_desc) +{ + u32 lr_val = (lr_desc.source << GICH_LR_PHYSID_CPUID_SHIFT) | lr_desc.irq; + + if (lr_desc.state & LR_STATE_PENDING) + lr_val |= GICH_LR_PENDING_BIT; + if (lr_desc.state & LR_STATE_ACTIVE) + lr_val |= GICH_LR_ACTIVE_BIT; + if (lr_desc.state & LR_EOI_INT) + lr_val |= GICH_LR_EOI; + + vcpu->arch.vgic_cpu.vgic_v2.vgic_lr[lr] = lr_val; +} + +static const struct vgic_ops vgic_ops = { + .get_lr = vgic_v2_get_lr, + .set_lr = vgic_v2_set_lr, +}; + +static struct vgic_lr vgic_get_lr(const struct kvm_vcpu *vcpu, int lr) +{ + return vgic_ops.get_lr(vcpu, lr); +} + +static void vgic_set_lr(struct kvm_vcpu *vcpu, int lr, + struct vgic_lr vlr) +{ + vgic_ops.set_lr(vcpu, lr, vlr); +} + +static void vgic_retire_lr(int lr_nr, int irq, struct kvm_vcpu *vcpu) +{ + struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; + struct vgic_lr vlr = vgic_get_lr(vcpu, lr_nr); + + vlr.state = 0; + vgic_set_lr(vcpu, lr_nr, vlr); + clear_bit(lr_nr, vgic_cpu->lr_used); + vgic_cpu->vgic_irq_lr_map[irq] = LR_EMPTY; +} /* * An interrupt may have been disabled after being made pending on the @@ -1007,12 +1057,12 @@ static void vgic_retire_disabled_irqs(struct kvm_vcpu *vcpu) int lr; for_each_set_bit(lr, vgic_cpu->lr_used, vgic_cpu->nr_lr) { - int irq = vgic_cpu->vgic_v2.vgic_lr[lr] & GICH_LR_VIRTUALID; + struct vgic_lr vlr = vgic_get_lr(vcpu, lr); - if (!vgic_irq_is_enabled(vcpu, irq)) { - vgic_retire_lr(lr, irq, vgic_cpu); - if (vgic_irq_is_active(vcpu, irq)) - vgic_irq_clear_active(vcpu, irq); + if (!vgic_irq_is_enabled(vcpu, vlr.irq)) { + vgic_retire_lr(lr, vlr.irq, vcpu); + if (vgic_irq_is_active(vcpu, vlr.irq)) + vgic_irq_clear_active(vcpu, vlr.irq); } } } @@ -1024,6 +1074,7 @@ static void vgic_retire_disabled_irqs(struct kvm_vcpu *vcpu) static bool vgic_queue_irq(struct kvm_vcpu *vcpu, u8 sgi_source_id, int irq) { struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; + struct vgic_lr vlr; int lr; /* Sanitize the input... */ @@ -1036,13 +1087,15 @@ static bool vgic_queue_irq(struct kvm_vcpu *vcpu, u8 sgi_source_id, int irq) lr = vgic_cpu->vgic_irq_lr_map[irq]; /* Do we have an active interrupt for the same CPUID? */ - if (lr != LR_EMPTY && - (LR_CPUID(vgic_cpu->vgic_v2.vgic_lr[lr]) == sgi_source_id)) { - kvm_debug("LR%d piggyback for IRQ%d %x\n", - lr, irq, vgic_cpu->vgic_v2.vgic_lr[lr]); - BUG_ON(!test_bit(lr, vgic_cpu->lr_used)); - vgic_cpu->vgic_v2.vgic_lr[lr] |= GICH_LR_PENDING_BIT; - return true; + if (lr != LR_EMPTY) { + vlr = vgic_get_lr(vcpu, lr); + if (vlr.source == sgi_source_id) { + kvm_debug("LR%d piggyback for IRQ%d\n", lr, vlr.irq); + BUG_ON(!test_bit(lr, vgic_cpu->lr_used)); + vlr.state |= LR_STATE_PENDING; + vgic_set_lr(vcpu, lr, vlr); + return true; + } } /* Try to use another LR for this interrupt */ @@ -1052,12 +1105,16 @@ static bool vgic_queue_irq(struct kvm_vcpu *vcpu, u8 sgi_source_id, int irq) return false; kvm_debug("LR%d allocated for IRQ%d %x\n", lr, irq, sgi_source_id); - vgic_cpu->vgic_v2.vgic_lr[lr] = MK_LR_PEND(sgi_source_id, irq); vgic_cpu->vgic_irq_lr_map[irq] = lr; set_bit(lr, vgic_cpu->lr_used); + vlr.irq = irq; + vlr.source = sgi_source_id; + vlr.state = LR_STATE_PENDING; if (!vgic_irq_is_edge(vcpu, irq)) - vgic_cpu->vgic_v2.vgic_lr[lr] |= GICH_LR_EOI; + vlr.state |= LR_EOI_INT; + + vgic_set_lr(vcpu, lr, vlr); return true; } @@ -1180,21 +1237,23 @@ static bool vgic_process_maintenance(struct kvm_vcpu *vcpu) * Some level interrupts have been EOIed. Clear their * active bit. */ - int lr, irq; + int lr; for_each_set_bit(lr, (unsigned long *)vgic_cpu->vgic_v2.vgic_eisr, vgic_cpu->nr_lr) { - irq = vgic_cpu->vgic_v2.vgic_lr[lr] & GICH_LR_VIRTUALID; + struct vgic_lr vlr = vgic_get_lr(vcpu, lr); - vgic_irq_clear_active(vcpu, irq); - vgic_cpu->vgic_v2.vgic_lr[lr] &= ~GICH_LR_EOI; + vgic_irq_clear_active(vcpu, vlr.irq); + WARN_ON(vlr.state & LR_STATE_MASK); + vlr.state = 0; + vgic_set_lr(vcpu, lr, vlr); /* Any additional pending interrupt? */ - if (vgic_dist_irq_is_pending(vcpu, irq)) { - vgic_cpu_irq_set(vcpu, irq); + if (vgic_dist_irq_is_pending(vcpu, vlr.irq)) { + vgic_cpu_irq_set(vcpu, vlr.irq); level_pending = true; } else { - vgic_cpu_irq_clear(vcpu, irq); + vgic_cpu_irq_clear(vcpu, vlr.irq); } /* @@ -1202,7 +1261,6 @@ static bool vgic_process_maintenance(struct kvm_vcpu *vcpu) * been marked as empty. */ set_bit(lr, (unsigned long *)vgic_cpu->vgic_v2.vgic_elrsr); - vgic_cpu->vgic_v2.vgic_lr[lr] &= ~GICH_LR_ACTIVE_BIT; } } @@ -1228,15 +1286,15 @@ static void __kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu) /* Clear mappings for empty LRs */ for_each_set_bit(lr, (unsigned long *)vgic_cpu->vgic_v2.vgic_elrsr, vgic_cpu->nr_lr) { - int irq; + struct vgic_lr vlr; if (!test_and_clear_bit(lr, vgic_cpu->lr_used)) continue; - irq = vgic_cpu->vgic_v2.vgic_lr[lr] & GICH_LR_VIRTUALID; + vlr = vgic_get_lr(vcpu, lr); - BUG_ON(irq >= VGIC_NR_IRQS); - vgic_cpu->vgic_irq_lr_map[irq] = LR_EMPTY; + BUG_ON(vlr.irq >= VGIC_NR_IRQS); + vgic_cpu->vgic_irq_lr_map[vlr.irq] = LR_EMPTY; } /* Check if we still have something up our sleeve... */ From 32d98f25d20daedfa49c32a73e57630bdc0e74a0 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 4 Jun 2013 10:29:39 +0100 Subject: [PATCH 1270/1368] KVM: ARM: vgic: abstract access to the ELRSR bitmap Move the GICH_ELRSR access to its own functions, and add them to the vgic_ops structure. Acked-by: Catalin Marinas Reviewed-by: Christoffer Dall Signed-off-by: Marc Zyngier (cherry picked from commit 69bb2c9fbc11d9d4358fbb798db15c9092eb4d8c) Signed-off-by: Christoffer Dall --- include/kvm/arm_vgic.h | 2 ++ virt/kvm/arm/vgic.c | 46 +++++++++++++++++++++++++++++++++++++----- 2 files changed, 43 insertions(+), 5 deletions(-) diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h index 17bbe51b79a1..38864f5e47bc 100644 --- a/include/kvm/arm_vgic.h +++ b/include/kvm/arm_vgic.h @@ -84,6 +84,8 @@ struct vgic_lr { struct vgic_ops { struct vgic_lr (*get_lr)(const struct kvm_vcpu *, int); void (*set_lr)(struct kvm_vcpu *, int, struct vgic_lr); + void (*sync_lr_elrsr)(struct kvm_vcpu *, int, struct vgic_lr); + u64 (*get_elrsr)(const struct kvm_vcpu *vcpu); }; struct vgic_dist { diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c index 11408fee600e..6dcc974fa2b4 100644 --- a/virt/kvm/arm/vgic.c +++ b/virt/kvm/arm/vgic.c @@ -1015,9 +1015,32 @@ static void vgic_v2_set_lr(struct kvm_vcpu *vcpu, int lr, vcpu->arch.vgic_cpu.vgic_v2.vgic_lr[lr] = lr_val; } +static void vgic_v2_sync_lr_elrsr(struct kvm_vcpu *vcpu, int lr, + struct vgic_lr lr_desc) +{ + if (!(lr_desc.state & LR_STATE_MASK)) + set_bit(lr, (unsigned long *)vcpu->arch.vgic_cpu.vgic_v2.vgic_elrsr); +} + +static u64 vgic_v2_get_elrsr(const struct kvm_vcpu *vcpu) +{ + u64 val; + +#if BITS_PER_LONG == 64 + val = vcpu->arch.vgic_cpu.vgic_v2.vgic_elrsr[1]; + val <<= 32; + val |= vcpu->arch.vgic_cpu.vgic_v2.vgic_elrsr[0]; +#else + val = *(u64 *)vcpu->arch.vgic_cpu.vgic_v2.vgic_elrsr; +#endif + return val; +} + static const struct vgic_ops vgic_ops = { .get_lr = vgic_v2_get_lr, .set_lr = vgic_v2_set_lr, + .sync_lr_elrsr = vgic_v2_sync_lr_elrsr, + .get_elrsr = vgic_v2_get_elrsr, }; static struct vgic_lr vgic_get_lr(const struct kvm_vcpu *vcpu, int lr) @@ -1031,6 +1054,17 @@ static void vgic_set_lr(struct kvm_vcpu *vcpu, int lr, vgic_ops.set_lr(vcpu, lr, vlr); } +static void vgic_sync_lr_elrsr(struct kvm_vcpu *vcpu, int lr, + struct vgic_lr vlr) +{ + vgic_ops.sync_lr_elrsr(vcpu, lr, vlr); +} + +static inline u64 vgic_get_elrsr(struct kvm_vcpu *vcpu) +{ + return vgic_ops.get_elrsr(vcpu); +} + static void vgic_retire_lr(int lr_nr, int irq, struct kvm_vcpu *vcpu) { struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; @@ -1260,7 +1294,7 @@ static bool vgic_process_maintenance(struct kvm_vcpu *vcpu) * Despite being EOIed, the LR may not have * been marked as empty. */ - set_bit(lr, (unsigned long *)vgic_cpu->vgic_v2.vgic_elrsr); + vgic_sync_lr_elrsr(vcpu, lr, vlr); } } @@ -1278,14 +1312,17 @@ static void __kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu) { struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; struct vgic_dist *dist = &vcpu->kvm->arch.vgic; + u64 elrsr; + unsigned long *elrsr_ptr; int lr, pending; bool level_pending; level_pending = vgic_process_maintenance(vcpu); + elrsr = vgic_get_elrsr(vcpu); + elrsr_ptr = (unsigned long *)&elrsr; /* Clear mappings for empty LRs */ - for_each_set_bit(lr, (unsigned long *)vgic_cpu->vgic_v2.vgic_elrsr, - vgic_cpu->nr_lr) { + for_each_set_bit(lr, elrsr_ptr, vgic_cpu->nr_lr) { struct vgic_lr vlr; if (!test_and_clear_bit(lr, vgic_cpu->lr_used)) @@ -1298,8 +1335,7 @@ static void __kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu) } /* Check if we still have something up our sleeve... */ - pending = find_first_zero_bit((unsigned long *)vgic_cpu->vgic_v2.vgic_elrsr, - vgic_cpu->nr_lr); + pending = find_first_zero_bit(elrsr_ptr, vgic_cpu->nr_lr); if (level_pending || pending < vgic_cpu->nr_lr) set_bit(vcpu->vcpu_id, &dist->irq_pending_on_cpu); } From b874bb5cb4795de150d1157a0cf10138122cc093 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 4 Jun 2013 10:33:43 +0100 Subject: [PATCH 1271/1368] KVM: ARM: vgic: abstract EISR bitmap access Move the GICH_EISR access to its own function. Acked-by: Catalin Marinas Reviewed-by: Christoffer Dall Signed-off-by: Marc Zyngier (cherry picked from commit 8d6a0313c125c3c7b208b75695fe6ab00afab4c5) Signed-off-by: Christoffer Dall --- include/kvm/arm_vgic.h | 1 + virt/kvm/arm/vgic.c | 25 +++++++++++++++++++++++-- 2 files changed, 24 insertions(+), 2 deletions(-) diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h index 38864f5e47bc..ccb9b59818f4 100644 --- a/include/kvm/arm_vgic.h +++ b/include/kvm/arm_vgic.h @@ -86,6 +86,7 @@ struct vgic_ops { void (*set_lr)(struct kvm_vcpu *, int, struct vgic_lr); void (*sync_lr_elrsr)(struct kvm_vcpu *, int, struct vgic_lr); u64 (*get_elrsr)(const struct kvm_vcpu *vcpu); + u64 (*get_eisr)(const struct kvm_vcpu *vcpu); }; struct vgic_dist { diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c index 6dcc974fa2b4..1e857e6e66b5 100644 --- a/virt/kvm/arm/vgic.c +++ b/virt/kvm/arm/vgic.c @@ -1036,11 +1036,26 @@ static u64 vgic_v2_get_elrsr(const struct kvm_vcpu *vcpu) return val; } +static u64 vgic_v2_get_eisr(const struct kvm_vcpu *vcpu) +{ + u64 val; + +#if BITS_PER_LONG == 64 + val = vcpu->arch.vgic_cpu.vgic_v2.vgic_eisr[1]; + val <<= 32; + val |= vcpu->arch.vgic_cpu.vgic_v2.vgic_eisr[0]; +#else + val = *(u64 *)vcpu->arch.vgic_cpu.vgic_v2.vgic_eisr; +#endif + return val; +} + static const struct vgic_ops vgic_ops = { .get_lr = vgic_v2_get_lr, .set_lr = vgic_v2_set_lr, .sync_lr_elrsr = vgic_v2_sync_lr_elrsr, .get_elrsr = vgic_v2_get_elrsr, + .get_eisr = vgic_v2_get_eisr, }; static struct vgic_lr vgic_get_lr(const struct kvm_vcpu *vcpu, int lr) @@ -1065,6 +1080,11 @@ static inline u64 vgic_get_elrsr(struct kvm_vcpu *vcpu) return vgic_ops.get_elrsr(vcpu); } +static inline u64 vgic_get_eisr(struct kvm_vcpu *vcpu) +{ + return vgic_ops.get_eisr(vcpu); +} + static void vgic_retire_lr(int lr_nr, int irq, struct kvm_vcpu *vcpu) { struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; @@ -1271,10 +1291,11 @@ static bool vgic_process_maintenance(struct kvm_vcpu *vcpu) * Some level interrupts have been EOIed. Clear their * active bit. */ + u64 eisr = vgic_get_eisr(vcpu); + unsigned long *eisr_ptr = (unsigned long *)&eisr; int lr; - for_each_set_bit(lr, (unsigned long *)vgic_cpu->vgic_v2.vgic_eisr, - vgic_cpu->nr_lr) { + for_each_set_bit(lr, eisr_ptr, vgic_cpu->nr_lr) { struct vgic_lr vlr = vgic_get_lr(vcpu, lr); vgic_irq_clear_active(vcpu, vlr.irq); From 3f79fb5a583821ca4c97d1b694285d36697e69c9 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 4 Jun 2013 11:02:10 +0100 Subject: [PATCH 1272/1368] KVM: ARM: vgic: abstract MISR decoding Instead of directly dealing with the GICH_MISR bits, move the code to its own function and use a couple of public flags to represent the actual state. Acked-by: Catalin Marinas Reviewed-by: Christoffer Dall Signed-off-by: Marc Zyngier (cherry picked from commit 495dd859f304689a7cd5ef413c439cb090dc25e6) Signed-off-by: Christoffer Dall --- include/kvm/arm_vgic.h | 4 ++++ virt/kvm/arm/vgic.c | 26 +++++++++++++++++++++++--- 2 files changed, 27 insertions(+), 3 deletions(-) diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h index ccb9b59818f4..4857508b12e7 100644 --- a/include/kvm/arm_vgic.h +++ b/include/kvm/arm_vgic.h @@ -87,6 +87,7 @@ struct vgic_ops { void (*sync_lr_elrsr)(struct kvm_vcpu *, int, struct vgic_lr); u64 (*get_elrsr)(const struct kvm_vcpu *vcpu); u64 (*get_eisr)(const struct kvm_vcpu *vcpu); + u32 (*get_interrupt_status)(const struct kvm_vcpu *vcpu); }; struct vgic_dist { @@ -165,6 +166,9 @@ struct vgic_cpu { #define LR_EMPTY 0xff +#define INT_STATUS_EOI (1 << 0) +#define INT_STATUS_UNDERFLOW (1 << 1) + struct kvm; struct kvm_vcpu; struct kvm_run; diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c index 1e857e6e66b5..c0bcc9735424 100644 --- a/virt/kvm/arm/vgic.c +++ b/virt/kvm/arm/vgic.c @@ -1050,12 +1050,26 @@ static u64 vgic_v2_get_eisr(const struct kvm_vcpu *vcpu) return val; } +static u32 vgic_v2_get_interrupt_status(const struct kvm_vcpu *vcpu) +{ + u32 misr = vcpu->arch.vgic_cpu.vgic_v2.vgic_misr; + u32 ret = 0; + + if (misr & GICH_MISR_EOI) + ret |= INT_STATUS_EOI; + if (misr & GICH_MISR_U) + ret |= INT_STATUS_UNDERFLOW; + + return ret; +} + static const struct vgic_ops vgic_ops = { .get_lr = vgic_v2_get_lr, .set_lr = vgic_v2_set_lr, .sync_lr_elrsr = vgic_v2_sync_lr_elrsr, .get_elrsr = vgic_v2_get_elrsr, .get_eisr = vgic_v2_get_eisr, + .get_interrupt_status = vgic_v2_get_interrupt_status, }; static struct vgic_lr vgic_get_lr(const struct kvm_vcpu *vcpu, int lr) @@ -1085,6 +1099,11 @@ static inline u64 vgic_get_eisr(struct kvm_vcpu *vcpu) return vgic_ops.get_eisr(vcpu); } +static inline u32 vgic_get_interrupt_status(struct kvm_vcpu *vcpu) +{ + return vgic_ops.get_interrupt_status(vcpu); +} + static void vgic_retire_lr(int lr_nr, int irq, struct kvm_vcpu *vcpu) { struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; @@ -1282,11 +1301,12 @@ static void __kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu) static bool vgic_process_maintenance(struct kvm_vcpu *vcpu) { struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; + u32 status = vgic_get_interrupt_status(vcpu); bool level_pending = false; - kvm_debug("MISR = %08x\n", vgic_cpu->vgic_v2.vgic_misr); + kvm_debug("STATUS = %08x\n", status); - if (vgic_cpu->vgic_v2.vgic_misr & GICH_MISR_EOI) { + if (status & INT_STATUS_EOI) { /* * Some level interrupts have been EOIed. Clear their * active bit. @@ -1319,7 +1339,7 @@ static bool vgic_process_maintenance(struct kvm_vcpu *vcpu) } } - if (vgic_cpu->vgic_v2.vgic_misr & GICH_MISR_U) + if (status & INT_STATUS_UNDERFLOW) vgic_cpu->vgic_v2.vgic_hcr &= ~GICH_HCR_UIE; return level_pending; From 900f2c6eeaaa0daf752a06d654fed8d98d626f9f Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 4 Jun 2013 11:24:17 +0100 Subject: [PATCH 1273/1368] KVM: ARM: vgic: move underflow handling to vgic_ops Move the code dealing with LR underflow handling to its own functions, and make them accessible through vgic_ops. Acked-by: Catalin Marinas Reviewed-by: Christoffer Dall Signed-off-by: Marc Zyngier (cherry picked from commit 909d9b5025f149af6cfc304a76ad6218e6622cc0) Signed-off-by: Christoffer Dall --- include/kvm/arm_vgic.h | 2 ++ virt/kvm/arm/vgic.c | 28 +++++++++++++++++++++++++--- 2 files changed, 27 insertions(+), 3 deletions(-) diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h index 4857508b12e7..cdfa5d9567c6 100644 --- a/include/kvm/arm_vgic.h +++ b/include/kvm/arm_vgic.h @@ -88,6 +88,8 @@ struct vgic_ops { u64 (*get_elrsr)(const struct kvm_vcpu *vcpu); u64 (*get_eisr)(const struct kvm_vcpu *vcpu); u32 (*get_interrupt_status)(const struct kvm_vcpu *vcpu); + void (*enable_underflow)(struct kvm_vcpu *vcpu); + void (*disable_underflow)(struct kvm_vcpu *vcpu); }; struct vgic_dist { diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c index c0bcc9735424..6d618e0b08a1 100644 --- a/virt/kvm/arm/vgic.c +++ b/virt/kvm/arm/vgic.c @@ -1063,6 +1063,16 @@ static u32 vgic_v2_get_interrupt_status(const struct kvm_vcpu *vcpu) return ret; } +static void vgic_v2_enable_underflow(struct kvm_vcpu *vcpu) +{ + vcpu->arch.vgic_cpu.vgic_v2.vgic_hcr |= GICH_HCR_UIE; +} + +static void vgic_v2_disable_underflow(struct kvm_vcpu *vcpu) +{ + vcpu->arch.vgic_cpu.vgic_v2.vgic_hcr &= ~GICH_HCR_UIE; +} + static const struct vgic_ops vgic_ops = { .get_lr = vgic_v2_get_lr, .set_lr = vgic_v2_set_lr, @@ -1070,6 +1080,8 @@ static const struct vgic_ops vgic_ops = { .get_elrsr = vgic_v2_get_elrsr, .get_eisr = vgic_v2_get_eisr, .get_interrupt_status = vgic_v2_get_interrupt_status, + .enable_underflow = vgic_v2_enable_underflow, + .disable_underflow = vgic_v2_disable_underflow, }; static struct vgic_lr vgic_get_lr(const struct kvm_vcpu *vcpu, int lr) @@ -1104,6 +1116,16 @@ static inline u32 vgic_get_interrupt_status(struct kvm_vcpu *vcpu) return vgic_ops.get_interrupt_status(vcpu); } +static inline void vgic_enable_underflow(struct kvm_vcpu *vcpu) +{ + vgic_ops.enable_underflow(vcpu); +} + +static inline void vgic_disable_underflow(struct kvm_vcpu *vcpu) +{ + vgic_ops.disable_underflow(vcpu); +} + static void vgic_retire_lr(int lr_nr, int irq, struct kvm_vcpu *vcpu) { struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; @@ -1285,9 +1307,9 @@ static void __kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu) epilog: if (overflow) { - vgic_cpu->vgic_v2.vgic_hcr |= GICH_HCR_UIE; + vgic_enable_underflow(vcpu); } else { - vgic_cpu->vgic_v2.vgic_hcr &= ~GICH_HCR_UIE; + vgic_disable_underflow(vcpu); /* * We're about to run this VCPU, and we've consumed * everything the distributor had in store for @@ -1340,7 +1362,7 @@ static bool vgic_process_maintenance(struct kvm_vcpu *vcpu) } if (status & INT_STATUS_UNDERFLOW) - vgic_cpu->vgic_v2.vgic_hcr &= ~GICH_HCR_UIE; + vgic_disable_underflow(vcpu); return level_pending; } From 32a0e35ba5210fab6b6379971bba24886b69952a Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 4 Feb 2014 17:48:10 +0000 Subject: [PATCH 1274/1368] KVM: ARM: vgic: abstract VMCR access Instead of directly messing with with the GICH_VMCR bits for the CPU interface save/restore code, add accessors that encode/decode the entire set of registers exposed by VMCR. Not the most efficient thing, but given that this code is only used by the save/restore code, performance is far from being critical. Reviewed-by: Christoffer Dall Signed-off-by: Marc Zyngier (cherry picked from commit beee38b9d0c0ea6cf2a7f35c3108f7d8281d4545) Signed-off-by: Christoffer Dall --- include/kvm/arm_vgic.h | 9 ++++++ virt/kvm/arm/vgic.c | 69 ++++++++++++++++++++++++++++++++---------- 2 files changed, 62 insertions(+), 16 deletions(-) diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h index cdfa5d9567c6..f51580043170 100644 --- a/include/kvm/arm_vgic.h +++ b/include/kvm/arm_vgic.h @@ -81,6 +81,13 @@ struct vgic_lr { u8 state; }; +struct vgic_vmcr { + u32 ctlr; + u32 abpr; + u32 bpr; + u32 pmr; +}; + struct vgic_ops { struct vgic_lr (*get_lr)(const struct kvm_vcpu *, int); void (*set_lr)(struct kvm_vcpu *, int, struct vgic_lr); @@ -90,6 +97,8 @@ struct vgic_ops { u32 (*get_interrupt_status)(const struct kvm_vcpu *vcpu); void (*enable_underflow)(struct kvm_vcpu *vcpu); void (*disable_underflow)(struct kvm_vcpu *vcpu); + void (*get_vmcr)(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr); + void (*set_vmcr)(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr); }; struct vgic_dist { diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c index 6d618e0b08a1..5c706393956d 100644 --- a/virt/kvm/arm/vgic.c +++ b/virt/kvm/arm/vgic.c @@ -100,8 +100,10 @@ static void vgic_kick_vcpus(struct kvm *kvm); static void vgic_dispatch_sgi(struct kvm_vcpu *vcpu, u32 reg); static struct vgic_lr vgic_get_lr(const struct kvm_vcpu *vcpu, int lr); static void vgic_set_lr(struct kvm_vcpu *vcpu, int lr, struct vgic_lr lr_desc); -static u32 vgic_nr_lr; +static void vgic_get_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr); +static void vgic_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr); +static u32 vgic_nr_lr; static unsigned int vgic_maint_irq; static u32 *vgic_bitmap_get_reg(struct vgic_bitmap *x, @@ -1073,6 +1075,28 @@ static void vgic_v2_disable_underflow(struct kvm_vcpu *vcpu) vcpu->arch.vgic_cpu.vgic_v2.vgic_hcr &= ~GICH_HCR_UIE; } +static void vgic_v2_get_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcrp) +{ + u32 vmcr = vcpu->arch.vgic_cpu.vgic_v2.vgic_vmcr; + + vmcrp->ctlr = (vmcr & GICH_VMCR_CTRL_MASK) >> GICH_VMCR_CTRL_SHIFT; + vmcrp->abpr = (vmcr & GICH_VMCR_ALIAS_BINPOINT_MASK) >> GICH_VMCR_ALIAS_BINPOINT_SHIFT; + vmcrp->bpr = (vmcr & GICH_VMCR_BINPOINT_MASK) >> GICH_VMCR_BINPOINT_SHIFT; + vmcrp->pmr = (vmcr & GICH_VMCR_PRIMASK_MASK) >> GICH_VMCR_PRIMASK_SHIFT; +} + +static void vgic_v2_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcrp) +{ + u32 vmcr; + + vmcr = (vmcrp->ctlr << GICH_VMCR_CTRL_SHIFT) & GICH_VMCR_CTRL_MASK; + vmcr |= (vmcrp->abpr << GICH_VMCR_ALIAS_BINPOINT_SHIFT) & GICH_VMCR_ALIAS_BINPOINT_MASK; + vmcr |= (vmcrp->bpr << GICH_VMCR_BINPOINT_SHIFT) & GICH_VMCR_BINPOINT_MASK; + vmcr |= (vmcrp->pmr << GICH_VMCR_PRIMASK_SHIFT) & GICH_VMCR_PRIMASK_MASK; + + vcpu->arch.vgic_cpu.vgic_v2.vgic_vmcr = vmcr; +} + static const struct vgic_ops vgic_ops = { .get_lr = vgic_v2_get_lr, .set_lr = vgic_v2_set_lr, @@ -1082,6 +1106,8 @@ static const struct vgic_ops vgic_ops = { .get_interrupt_status = vgic_v2_get_interrupt_status, .enable_underflow = vgic_v2_enable_underflow, .disable_underflow = vgic_v2_disable_underflow, + .get_vmcr = vgic_v2_get_vmcr, + .set_vmcr = vgic_v2_set_vmcr, }; static struct vgic_lr vgic_get_lr(const struct kvm_vcpu *vcpu, int lr) @@ -1126,6 +1152,16 @@ static inline void vgic_disable_underflow(struct kvm_vcpu *vcpu) vgic_ops.disable_underflow(vcpu); } +static inline void vgic_get_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr) +{ + vgic_ops.get_vmcr(vcpu, vmcr); +} + +static void vgic_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr) +{ + vgic_ops.set_vmcr(vcpu, vmcr); +} + static void vgic_retire_lr(int lr_nr, int irq, struct kvm_vcpu *vcpu) { struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; @@ -1879,39 +1915,40 @@ int kvm_vgic_addr(struct kvm *kvm, unsigned long type, u64 *addr, bool write) static bool handle_cpu_mmio_misc(struct kvm_vcpu *vcpu, struct kvm_exit_mmio *mmio, phys_addr_t offset) { - struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; - u32 reg, mask = 0, shift = 0; bool updated = false; + struct vgic_vmcr vmcr; + u32 *vmcr_field; + u32 reg; + + vgic_get_vmcr(vcpu, &vmcr); switch (offset & ~0x3) { case GIC_CPU_CTRL: - mask = GICH_VMCR_CTRL_MASK; - shift = GICH_VMCR_CTRL_SHIFT; + vmcr_field = &vmcr.ctlr; break; case GIC_CPU_PRIMASK: - mask = GICH_VMCR_PRIMASK_MASK; - shift = GICH_VMCR_PRIMASK_SHIFT; + vmcr_field = &vmcr.pmr; break; case GIC_CPU_BINPOINT: - mask = GICH_VMCR_BINPOINT_MASK; - shift = GICH_VMCR_BINPOINT_SHIFT; + vmcr_field = &vmcr.bpr; break; case GIC_CPU_ALIAS_BINPOINT: - mask = GICH_VMCR_ALIAS_BINPOINT_MASK; - shift = GICH_VMCR_ALIAS_BINPOINT_SHIFT; + vmcr_field = &vmcr.abpr; break; + default: + BUG(); } if (!mmio->is_write) { - reg = (vgic_cpu->vgic_v2.vgic_vmcr & mask) >> shift; + reg = *vmcr_field; mmio_data_write(mmio, ~0, reg); } else { reg = mmio_data_read(mmio, ~0); - reg = (reg << shift) & mask; - if (reg != (vgic_cpu->vgic_v2.vgic_vmcr & mask)) + if (reg != *vmcr_field) { + *vmcr_field = reg; + vgic_set_vmcr(vcpu, &vmcr); updated = true; - vgic_cpu->vgic_v2.vgic_vmcr &= ~mask; - vgic_cpu->vgic_v2.vgic_vmcr |= reg; + } } return updated; } From 6d6775fa71660fd7c58741881d6bbbfd2be68f3a Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 4 Jun 2013 11:36:38 +0100 Subject: [PATCH 1275/1368] KVM: ARM: vgic: introduce vgic_enable Move the code dealing with enabling the VGIC on to vgic_ops. Acked-by: Catalin Marinas Reviewed-by: Christoffer Dall Signed-off-by: Marc Zyngier (cherry picked from commit da8dafd1777cdd93091207952297d221a88e6479) Signed-off-by: Christoffer Dall --- include/kvm/arm_vgic.h | 1 + virt/kvm/arm/vgic.c | 29 +++++++++++++++++++++-------- 2 files changed, 22 insertions(+), 8 deletions(-) diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h index f51580043170..2228973ea8e4 100644 --- a/include/kvm/arm_vgic.h +++ b/include/kvm/arm_vgic.h @@ -99,6 +99,7 @@ struct vgic_ops { void (*disable_underflow)(struct kvm_vcpu *vcpu); void (*get_vmcr)(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr); void (*set_vmcr)(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr); + void (*enable)(struct kvm_vcpu *vcpu); }; struct vgic_dist { diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c index 5c706393956d..70f674bb13a1 100644 --- a/virt/kvm/arm/vgic.c +++ b/virt/kvm/arm/vgic.c @@ -1097,6 +1097,19 @@ static void vgic_v2_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcrp) vcpu->arch.vgic_cpu.vgic_v2.vgic_vmcr = vmcr; } +static void vgic_v2_enable(struct kvm_vcpu *vcpu) +{ + /* + * By forcing VMCR to zero, the GIC will restore the binary + * points to their reset values. Anything else resets to zero + * anyway. + */ + vcpu->arch.vgic_cpu.vgic_v2.vgic_vmcr = 0; + + /* Get the show on the road... */ + vcpu->arch.vgic_cpu.vgic_v2.vgic_hcr = GICH_HCR_EN; +} + static const struct vgic_ops vgic_ops = { .get_lr = vgic_v2_get_lr, .set_lr = vgic_v2_set_lr, @@ -1108,6 +1121,7 @@ static const struct vgic_ops vgic_ops = { .disable_underflow = vgic_v2_disable_underflow, .get_vmcr = vgic_v2_get_vmcr, .set_vmcr = vgic_v2_set_vmcr, + .enable = vgic_v2_enable, }; static struct vgic_lr vgic_get_lr(const struct kvm_vcpu *vcpu, int lr) @@ -1162,6 +1176,11 @@ static void vgic_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr) vgic_ops.set_vmcr(vcpu, vmcr); } +static inline void vgic_enable(struct kvm_vcpu *vcpu) +{ + vgic_ops.enable(vcpu); +} + static void vgic_retire_lr(int lr_nr, int irq, struct kvm_vcpu *vcpu) { struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; @@ -1624,15 +1643,9 @@ int kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu) vgic_cpu->vgic_irq_lr_map[i] = LR_EMPTY; } - /* - * By forcing VMCR to zero, the GIC will restore the binary - * points to their reset values. Anything else resets to zero - * anyway. - */ - vgic_cpu->vgic_v2.vgic_vmcr = 0; - vgic_cpu->nr_lr = vgic_nr_lr; - vgic_cpu->vgic_v2.vgic_hcr = GICH_HCR_EN; /* Get the show on the road... */ + + vgic_enable(vcpu); return 0; } From 6e707c119719a9261ac7305194c0004d11bb594a Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 18 Jun 2013 19:17:28 +0100 Subject: [PATCH 1276/1368] KVM: ARM: introduce vgic_params structure Move all the data specific to a given GIC implementation into its own little structure. Acked-by: Catalin Marinas Reviewed-by: Christoffer Dall Signed-off-by: Marc Zyngier (cherry picked from commit ca85f623e37d096206e092ef037a145a60fa7f85) Signed-off-by: Christoffer Dall --- include/kvm/arm_vgic.h | 11 +++++++ virt/kvm/arm/vgic.c | 70 ++++++++++++++++++++---------------------- 2 files changed, 44 insertions(+), 37 deletions(-) diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h index 2228973ea8e4..ce2e14226dbf 100644 --- a/include/kvm/arm_vgic.h +++ b/include/kvm/arm_vgic.h @@ -102,6 +102,17 @@ struct vgic_ops { void (*enable)(struct kvm_vcpu *vcpu); }; +struct vgic_params { + /* Physical address of vgic virtual cpu interface */ + phys_addr_t vcpu_base; + /* Number of list registers */ + u32 nr_lr; + /* Interrupt number */ + unsigned int maint_irq; + /* Virtual control interface base address */ + void __iomem *vctrl_base; +}; + struct vgic_dist { #ifdef CONFIG_KVM_ARM_VGIC spinlock_t lock; diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c index 70f674bb13a1..f3a996d0a100 100644 --- a/virt/kvm/arm/vgic.c +++ b/virt/kvm/arm/vgic.c @@ -76,14 +76,6 @@ #define IMPLEMENTER_ARM 0x43b #define GICC_ARCH_VERSION_V2 0x2 -/* Physical address of vgic virtual cpu interface */ -static phys_addr_t vgic_vcpu_base; - -/* Virtual control interface base address */ -static void __iomem *vgic_vctrl_base; - -static struct device_node *vgic_node; - #define ACCESS_READ_VALUE (1 << 0) #define ACCESS_READ_RAZ (0 << 0) #define ACCESS_READ_MASK(x) ((x) & (1 << 0)) @@ -103,8 +95,7 @@ static void vgic_set_lr(struct kvm_vcpu *vcpu, int lr, struct vgic_lr lr_desc); static void vgic_get_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr); static void vgic_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr); -static u32 vgic_nr_lr; -static unsigned int vgic_maint_irq; +static struct vgic_params vgic; static u32 *vgic_bitmap_get_reg(struct vgic_bitmap *x, int cpuid, u32 offset) @@ -1206,7 +1197,7 @@ static void vgic_retire_disabled_irqs(struct kvm_vcpu *vcpu) struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; int lr; - for_each_set_bit(lr, vgic_cpu->lr_used, vgic_cpu->nr_lr) { + for_each_set_bit(lr, vgic_cpu->lr_used, vgic.nr_lr) { struct vgic_lr vlr = vgic_get_lr(vcpu, lr); if (!vgic_irq_is_enabled(vcpu, vlr.irq)) { @@ -1250,8 +1241,8 @@ static bool vgic_queue_irq(struct kvm_vcpu *vcpu, u8 sgi_source_id, int irq) /* Try to use another LR for this interrupt */ lr = find_first_zero_bit((unsigned long *)vgic_cpu->lr_used, - vgic_cpu->nr_lr); - if (lr >= vgic_cpu->nr_lr) + vgic.nr_lr); + if (lr >= vgic.nr_lr) return false; kvm_debug("LR%d allocated for IRQ%d %x\n", lr, irq, sgi_source_id); @@ -1377,7 +1368,6 @@ static void __kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu) static bool vgic_process_maintenance(struct kvm_vcpu *vcpu) { - struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; u32 status = vgic_get_interrupt_status(vcpu); bool level_pending = false; @@ -1392,7 +1382,7 @@ static bool vgic_process_maintenance(struct kvm_vcpu *vcpu) unsigned long *eisr_ptr = (unsigned long *)&eisr; int lr; - for_each_set_bit(lr, eisr_ptr, vgic_cpu->nr_lr) { + for_each_set_bit(lr, eisr_ptr, vgic.nr_lr) { struct vgic_lr vlr = vgic_get_lr(vcpu, lr); vgic_irq_clear_active(vcpu, vlr.irq); @@ -1440,7 +1430,7 @@ static void __kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu) elrsr_ptr = (unsigned long *)&elrsr; /* Clear mappings for empty LRs */ - for_each_set_bit(lr, elrsr_ptr, vgic_cpu->nr_lr) { + for_each_set_bit(lr, elrsr_ptr, vgic.nr_lr) { struct vgic_lr vlr; if (!test_and_clear_bit(lr, vgic_cpu->lr_used)) @@ -1453,8 +1443,8 @@ static void __kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu) } /* Check if we still have something up our sleeve... */ - pending = find_first_zero_bit(elrsr_ptr, vgic_cpu->nr_lr); - if (level_pending || pending < vgic_cpu->nr_lr) + pending = find_first_zero_bit(elrsr_ptr, vgic.nr_lr); + if (level_pending || pending < vgic.nr_lr) set_bit(vcpu->vcpu_id, &dist->irq_pending_on_cpu); } @@ -1643,7 +1633,12 @@ int kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu) vgic_cpu->vgic_irq_lr_map[i] = LR_EMPTY; } - vgic_cpu->nr_lr = vgic_nr_lr; + /* + * Store the number of LRs per vcpu, so we don't have to go + * all the way to the distributor structure to find out. Only + * assembly code should use this one. + */ + vgic_cpu->nr_lr = vgic.nr_lr; vgic_enable(vcpu); @@ -1652,7 +1647,7 @@ int kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu) static void vgic_init_maintenance_interrupt(void *info) { - enable_percpu_irq(vgic_maint_irq, 0); + enable_percpu_irq(vgic.maint_irq, 0); } static int vgic_cpu_notify(struct notifier_block *self, @@ -1665,7 +1660,7 @@ static int vgic_cpu_notify(struct notifier_block *self, break; case CPU_DYING: case CPU_DYING_FROZEN: - disable_percpu_irq(vgic_maint_irq); + disable_percpu_irq(vgic.maint_irq); break; } @@ -1681,6 +1676,7 @@ int kvm_vgic_hyp_init(void) int ret; struct resource vctrl_res; struct resource vcpu_res; + struct device_node *vgic_node; vgic_node = of_find_compatible_node(NULL, NULL, "arm,cortex-a15-gic"); if (!vgic_node) { @@ -1688,17 +1684,17 @@ int kvm_vgic_hyp_init(void) return -ENODEV; } - vgic_maint_irq = irq_of_parse_and_map(vgic_node, 0); - if (!vgic_maint_irq) { + vgic.maint_irq = irq_of_parse_and_map(vgic_node, 0); + if (!vgic.maint_irq) { kvm_err("error getting vgic maintenance irq from DT\n"); ret = -ENXIO; goto out; } - ret = request_percpu_irq(vgic_maint_irq, vgic_maintenance_handler, + ret = request_percpu_irq(vgic.maint_irq, vgic_maintenance_handler, "vgic", kvm_get_running_vcpus()); if (ret) { - kvm_err("Cannot register interrupt %d\n", vgic_maint_irq); + kvm_err("Cannot register interrupt %d\n", vgic.maint_irq); goto out; } @@ -1714,18 +1710,18 @@ int kvm_vgic_hyp_init(void) goto out_free_irq; } - vgic_vctrl_base = of_iomap(vgic_node, 2); - if (!vgic_vctrl_base) { + vgic.vctrl_base = of_iomap(vgic_node, 2); + if (!vgic.vctrl_base) { kvm_err("Cannot ioremap VCTRL\n"); ret = -ENOMEM; goto out_free_irq; } - vgic_nr_lr = readl_relaxed(vgic_vctrl_base + GICH_VTR); - vgic_nr_lr = (vgic_nr_lr & 0x3f) + 1; + vgic.nr_lr = readl_relaxed(vgic.vctrl_base + GICH_VTR); + vgic.nr_lr = (vgic.nr_lr & 0x3f) + 1; - ret = create_hyp_io_mappings(vgic_vctrl_base, - vgic_vctrl_base + resource_size(&vctrl_res), + ret = create_hyp_io_mappings(vgic.vctrl_base, + vgic.vctrl_base + resource_size(&vctrl_res), vctrl_res.start); if (ret) { kvm_err("Cannot map VCTRL into hyp\n"); @@ -1733,7 +1729,7 @@ int kvm_vgic_hyp_init(void) } kvm_info("%s@%llx IRQ%d\n", vgic_node->name, - vctrl_res.start, vgic_maint_irq); + vctrl_res.start, vgic.maint_irq); on_each_cpu(vgic_init_maintenance_interrupt, NULL, 1); if (of_address_to_resource(vgic_node, 3, &vcpu_res)) { @@ -1741,14 +1737,14 @@ int kvm_vgic_hyp_init(void) ret = -ENXIO; goto out_unmap; } - vgic_vcpu_base = vcpu_res.start; + vgic.vcpu_base = vcpu_res.start; goto out; out_unmap: - iounmap(vgic_vctrl_base); + iounmap(vgic.vctrl_base); out_free_irq: - free_percpu_irq(vgic_maint_irq, kvm_get_running_vcpus()); + free_percpu_irq(vgic.maint_irq, kvm_get_running_vcpus()); out: of_node_put(vgic_node); return ret; @@ -1783,7 +1779,7 @@ int kvm_vgic_init(struct kvm *kvm) } ret = kvm_phys_addr_ioremap(kvm, kvm->arch.vgic.vgic_cpu_base, - vgic_vcpu_base, KVM_VGIC_V2_CPU_SIZE); + vgic.vcpu_base, KVM_VGIC_V2_CPU_SIZE); if (ret) { kvm_err("Unable to remap VGIC CPU to VCPU\n"); goto out; @@ -1829,7 +1825,7 @@ int kvm_vgic_create(struct kvm *kvm) } spin_lock_init(&kvm->arch.vgic.lock); - kvm->arch.vgic.vctrl_base = vgic_vctrl_base; + kvm->arch.vgic.vctrl_base = vgic.vctrl_base; kvm->arch.vgic.vgic_dist_base = VGIC_ADDR_UNDEF; kvm->arch.vgic.vgic_cpu_base = VGIC_ADDR_UNDEF; From ab8bb40339927ab4e5875330461a9a9ffaf42a56 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 4 Feb 2014 18:13:03 +0000 Subject: [PATCH 1277/1368] KVM: ARM: vgic: split GICv2 backend from the main vgic code Brutally hack the innocent vgic code, and move the GICv2 specific code to its own file, using vgic_ops and vgic_params as a way to pass information between the two blocks. Acked-by: Catalin Marinas Reviewed-by: Christoffer Dall Signed-off-by: Marc Zyngier (cherry picked from commit 8f186d522c69bb18dd9b93a634da4953228c67d4) Signed-off-by: Christoffer Dall --- arch/arm/kvm/Makefile | 1 + arch/arm64/kvm/Makefile | 2 +- include/kvm/arm_vgic.h | 11 +- virt/kvm/arm/vgic-v2.c | 248 +++++++++++++++++++++++++++++++++++++ virt/kvm/arm/vgic.c | 267 +++++++--------------------------------- 5 files changed, 304 insertions(+), 225 deletions(-) create mode 100644 virt/kvm/arm/vgic-v2.c diff --git a/arch/arm/kvm/Makefile b/arch/arm/kvm/Makefile index 789bca9e64a7..f7057ed045b6 100644 --- a/arch/arm/kvm/Makefile +++ b/arch/arm/kvm/Makefile @@ -21,4 +21,5 @@ obj-y += kvm-arm.o init.o interrupts.o obj-y += arm.o handle_exit.o guest.o mmu.o emulate.o reset.o obj-y += coproc.o coproc_a15.o coproc_a7.o mmio.o psci.o perf.o obj-$(CONFIG_KVM_ARM_VGIC) += $(KVM)/arm/vgic.o +obj-$(CONFIG_KVM_ARM_VGIC) += $(KVM)/arm/vgic-v2.o obj-$(CONFIG_KVM_ARM_TIMER) += $(KVM)/arm/arch_timer.o diff --git a/arch/arm64/kvm/Makefile b/arch/arm64/kvm/Makefile index 72a9fd583ad3..7e92952d139e 100644 --- a/arch/arm64/kvm/Makefile +++ b/arch/arm64/kvm/Makefile @@ -19,5 +19,5 @@ kvm-$(CONFIG_KVM_ARM_HOST) += emulate.o inject_fault.o regmap.o kvm-$(CONFIG_KVM_ARM_HOST) += hyp.o hyp-init.o handle_exit.o kvm-$(CONFIG_KVM_ARM_HOST) += guest.o reset.o sys_regs.o sys_regs_generic_v8.o -kvm-$(CONFIG_KVM_ARM_VGIC) += $(KVM)/arm/vgic.o +kvm-$(CONFIG_KVM_ARM_VGIC) += $(KVM)/arm/vgic.o $(KVM)/arm/vgic-v2.o kvm-$(CONFIG_KVM_ARM_TIMER) += $(KVM)/arm/arch_timer.o diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h index ce2e14226dbf..d8d52a9ca6a1 100644 --- a/include/kvm/arm_vgic.h +++ b/include/kvm/arm_vgic.h @@ -32,7 +32,8 @@ #define VGIC_NR_PRIVATE_IRQS (VGIC_NR_SGIS + VGIC_NR_PPIS) #define VGIC_NR_SHARED_IRQS (VGIC_NR_IRQS - VGIC_NR_PRIVATE_IRQS) #define VGIC_MAX_CPUS KVM_MAX_VCPUS -#define VGIC_MAX_LRS (1 << 6) + +#define VGIC_V2_MAX_LRS (1 << 6) /* Sanity checks... */ #if (VGIC_MAX_CPUS > 8) @@ -162,7 +163,7 @@ struct vgic_v2_cpu_if { u32 vgic_eisr[2]; /* Saved only */ u32 vgic_elrsr[2]; /* Saved only */ u32 vgic_apr; - u32 vgic_lr[VGIC_MAX_LRS]; + u32 vgic_lr[VGIC_V2_MAX_LRS]; }; struct vgic_cpu { @@ -175,7 +176,7 @@ struct vgic_cpu { DECLARE_BITMAP( pending_shared, VGIC_NR_SHARED_IRQS); /* Bitmap of used/free list registers */ - DECLARE_BITMAP( lr_used, VGIC_MAX_LRS); + DECLARE_BITMAP( lr_used, VGIC_V2_MAX_LRS); /* Number of list registers on this CPU */ int nr_lr; @@ -214,6 +215,10 @@ bool vgic_handle_mmio(struct kvm_vcpu *vcpu, struct kvm_run *run, #define irqchip_in_kernel(k) (!!((k)->arch.vgic.vctrl_base)) #define vgic_initialized(k) ((k)->arch.vgic.ready) +int vgic_v2_probe(struct device_node *vgic_node, + const struct vgic_ops **ops, + const struct vgic_params **params); + #else static inline int kvm_vgic_hyp_init(void) { diff --git a/virt/kvm/arm/vgic-v2.c b/virt/kvm/arm/vgic-v2.c new file mode 100644 index 000000000000..940418ebd0d0 --- /dev/null +++ b/virt/kvm/arm/vgic-v2.c @@ -0,0 +1,248 @@ +/* + * Copyright (C) 2012,2013 ARM Limited, All Rights Reserved. + * Author: Marc Zyngier + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include +#include +#include + +static struct vgic_lr vgic_v2_get_lr(const struct kvm_vcpu *vcpu, int lr) +{ + struct vgic_lr lr_desc; + u32 val = vcpu->arch.vgic_cpu.vgic_v2.vgic_lr[lr]; + + lr_desc.irq = val & GICH_LR_VIRTUALID; + if (lr_desc.irq <= 15) + lr_desc.source = (val >> GICH_LR_PHYSID_CPUID_SHIFT) & 0x7; + else + lr_desc.source = 0; + lr_desc.state = 0; + + if (val & GICH_LR_PENDING_BIT) + lr_desc.state |= LR_STATE_PENDING; + if (val & GICH_LR_ACTIVE_BIT) + lr_desc.state |= LR_STATE_ACTIVE; + if (val & GICH_LR_EOI) + lr_desc.state |= LR_EOI_INT; + + return lr_desc; +} + +static void vgic_v2_set_lr(struct kvm_vcpu *vcpu, int lr, + struct vgic_lr lr_desc) +{ + u32 lr_val = (lr_desc.source << GICH_LR_PHYSID_CPUID_SHIFT) | lr_desc.irq; + + if (lr_desc.state & LR_STATE_PENDING) + lr_val |= GICH_LR_PENDING_BIT; + if (lr_desc.state & LR_STATE_ACTIVE) + lr_val |= GICH_LR_ACTIVE_BIT; + if (lr_desc.state & LR_EOI_INT) + lr_val |= GICH_LR_EOI; + + vcpu->arch.vgic_cpu.vgic_v2.vgic_lr[lr] = lr_val; +} + +static void vgic_v2_sync_lr_elrsr(struct kvm_vcpu *vcpu, int lr, + struct vgic_lr lr_desc) +{ + if (!(lr_desc.state & LR_STATE_MASK)) + set_bit(lr, (unsigned long *)vcpu->arch.vgic_cpu.vgic_v2.vgic_elrsr); +} + +static u64 vgic_v2_get_elrsr(const struct kvm_vcpu *vcpu) +{ + u64 val; + +#if BITS_PER_LONG == 64 + val = vcpu->arch.vgic_cpu.vgic_v2.vgic_elrsr[1]; + val <<= 32; + val |= vcpu->arch.vgic_cpu.vgic_v2.vgic_elrsr[0]; +#else + val = *(u64 *)vcpu->arch.vgic_cpu.vgic_v2.vgic_elrsr; +#endif + return val; +} + +static u64 vgic_v2_get_eisr(const struct kvm_vcpu *vcpu) +{ + u64 val; + +#if BITS_PER_LONG == 64 + val = vcpu->arch.vgic_cpu.vgic_v2.vgic_eisr[1]; + val <<= 32; + val |= vcpu->arch.vgic_cpu.vgic_v2.vgic_eisr[0]; +#else + val = *(u64 *)vcpu->arch.vgic_cpu.vgic_v2.vgic_eisr; +#endif + return val; +} + +static u32 vgic_v2_get_interrupt_status(const struct kvm_vcpu *vcpu) +{ + u32 misr = vcpu->arch.vgic_cpu.vgic_v2.vgic_misr; + u32 ret = 0; + + if (misr & GICH_MISR_EOI) + ret |= INT_STATUS_EOI; + if (misr & GICH_MISR_U) + ret |= INT_STATUS_UNDERFLOW; + + return ret; +} + +static void vgic_v2_enable_underflow(struct kvm_vcpu *vcpu) +{ + vcpu->arch.vgic_cpu.vgic_v2.vgic_hcr |= GICH_HCR_UIE; +} + +static void vgic_v2_disable_underflow(struct kvm_vcpu *vcpu) +{ + vcpu->arch.vgic_cpu.vgic_v2.vgic_hcr &= ~GICH_HCR_UIE; +} + +static void vgic_v2_get_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcrp) +{ + u32 vmcr = vcpu->arch.vgic_cpu.vgic_v2.vgic_vmcr; + + vmcrp->ctlr = (vmcr & GICH_VMCR_CTRL_MASK) >> GICH_VMCR_CTRL_SHIFT; + vmcrp->abpr = (vmcr & GICH_VMCR_ALIAS_BINPOINT_MASK) >> GICH_VMCR_ALIAS_BINPOINT_SHIFT; + vmcrp->bpr = (vmcr & GICH_VMCR_BINPOINT_MASK) >> GICH_VMCR_BINPOINT_SHIFT; + vmcrp->pmr = (vmcr & GICH_VMCR_PRIMASK_MASK) >> GICH_VMCR_PRIMASK_SHIFT; +} + +static void vgic_v2_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcrp) +{ + u32 vmcr; + + vmcr = (vmcrp->ctlr << GICH_VMCR_CTRL_SHIFT) & GICH_VMCR_CTRL_MASK; + vmcr |= (vmcrp->abpr << GICH_VMCR_ALIAS_BINPOINT_SHIFT) & GICH_VMCR_ALIAS_BINPOINT_MASK; + vmcr |= (vmcrp->bpr << GICH_VMCR_BINPOINT_SHIFT) & GICH_VMCR_BINPOINT_MASK; + vmcr |= (vmcrp->pmr << GICH_VMCR_PRIMASK_SHIFT) & GICH_VMCR_PRIMASK_MASK; + + vcpu->arch.vgic_cpu.vgic_v2.vgic_vmcr = vmcr; +} + +static void vgic_v2_enable(struct kvm_vcpu *vcpu) +{ + /* + * By forcing VMCR to zero, the GIC will restore the binary + * points to their reset values. Anything else resets to zero + * anyway. + */ + vcpu->arch.vgic_cpu.vgic_v2.vgic_vmcr = 0; + + /* Get the show on the road... */ + vcpu->arch.vgic_cpu.vgic_v2.vgic_hcr = GICH_HCR_EN; +} + +static const struct vgic_ops vgic_v2_ops = { + .get_lr = vgic_v2_get_lr, + .set_lr = vgic_v2_set_lr, + .sync_lr_elrsr = vgic_v2_sync_lr_elrsr, + .get_elrsr = vgic_v2_get_elrsr, + .get_eisr = vgic_v2_get_eisr, + .get_interrupt_status = vgic_v2_get_interrupt_status, + .enable_underflow = vgic_v2_enable_underflow, + .disable_underflow = vgic_v2_disable_underflow, + .get_vmcr = vgic_v2_get_vmcr, + .set_vmcr = vgic_v2_set_vmcr, + .enable = vgic_v2_enable, +}; + +static struct vgic_params vgic_v2_params; + +/** + * vgic_v2_probe - probe for a GICv2 compatible interrupt controller in DT + * @node: pointer to the DT node + * @ops: address of a pointer to the GICv2 operations + * @params: address of a pointer to HW-specific parameters + * + * Returns 0 if a GICv2 has been found, with the low level operations + * in *ops and the HW parameters in *params. Returns an error code + * otherwise. + */ +int vgic_v2_probe(struct device_node *vgic_node, + const struct vgic_ops **ops, + const struct vgic_params **params) +{ + int ret; + struct resource vctrl_res; + struct resource vcpu_res; + struct vgic_params *vgic = &vgic_v2_params; + + vgic->maint_irq = irq_of_parse_and_map(vgic_node, 0); + if (!vgic->maint_irq) { + kvm_err("error getting vgic maintenance irq from DT\n"); + ret = -ENXIO; + goto out; + } + + ret = of_address_to_resource(vgic_node, 2, &vctrl_res); + if (ret) { + kvm_err("Cannot obtain GICH resource\n"); + goto out; + } + + vgic->vctrl_base = of_iomap(vgic_node, 2); + if (!vgic->vctrl_base) { + kvm_err("Cannot ioremap GICH\n"); + ret = -ENOMEM; + goto out; + } + + vgic->nr_lr = readl_relaxed(vgic->vctrl_base + GICH_VTR); + vgic->nr_lr = (vgic->nr_lr & 0x3f) + 1; + + ret = create_hyp_io_mappings(vgic->vctrl_base, + vgic->vctrl_base + resource_size(&vctrl_res), + vctrl_res.start); + if (ret) { + kvm_err("Cannot map VCTRL into hyp\n"); + goto out_unmap; + } + + if (of_address_to_resource(vgic_node, 3, &vcpu_res)) { + kvm_err("Cannot obtain GICV resource\n"); + ret = -ENXIO; + goto out_unmap; + } + vgic->vcpu_base = vcpu_res.start; + + kvm_info("%s@%llx IRQ%d\n", vgic_node->name, + vctrl_res.start, vgic->maint_irq); + + *ops = &vgic_v2_ops; + *params = vgic; + goto out; + +out_unmap: + iounmap(vgic->vctrl_base); +out: + of_node_put(vgic_node); + return ret; +} diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c index f3a996d0a100..e4b9cbbbee4c 100644 --- a/virt/kvm/arm/vgic.c +++ b/virt/kvm/arm/vgic.c @@ -95,7 +95,8 @@ static void vgic_set_lr(struct kvm_vcpu *vcpu, int lr, struct vgic_lr lr_desc); static void vgic_get_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr); static void vgic_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr); -static struct vgic_params vgic; +static const struct vgic_ops *vgic_ops; +static const struct vgic_params *vgic; static u32 *vgic_bitmap_get_reg(struct vgic_bitmap *x, int cpuid, u32 offset) @@ -971,205 +972,61 @@ static void vgic_update_state(struct kvm *kvm) } } -static struct vgic_lr vgic_v2_get_lr(const struct kvm_vcpu *vcpu, int lr) -{ - struct vgic_lr lr_desc; - u32 val = vcpu->arch.vgic_cpu.vgic_v2.vgic_lr[lr]; - - lr_desc.irq = val & GICH_LR_VIRTUALID; - if (lr_desc.irq <= 15) - lr_desc.source = (val >> GICH_LR_PHYSID_CPUID_SHIFT) & 0x7; - else - lr_desc.source = 0; - lr_desc.state = 0; - - if (val & GICH_LR_PENDING_BIT) - lr_desc.state |= LR_STATE_PENDING; - if (val & GICH_LR_ACTIVE_BIT) - lr_desc.state |= LR_STATE_ACTIVE; - if (val & GICH_LR_EOI) - lr_desc.state |= LR_EOI_INT; - - return lr_desc; -} - -static void vgic_v2_set_lr(struct kvm_vcpu *vcpu, int lr, - struct vgic_lr lr_desc) -{ - u32 lr_val = (lr_desc.source << GICH_LR_PHYSID_CPUID_SHIFT) | lr_desc.irq; - - if (lr_desc.state & LR_STATE_PENDING) - lr_val |= GICH_LR_PENDING_BIT; - if (lr_desc.state & LR_STATE_ACTIVE) - lr_val |= GICH_LR_ACTIVE_BIT; - if (lr_desc.state & LR_EOI_INT) - lr_val |= GICH_LR_EOI; - - vcpu->arch.vgic_cpu.vgic_v2.vgic_lr[lr] = lr_val; -} - -static void vgic_v2_sync_lr_elrsr(struct kvm_vcpu *vcpu, int lr, - struct vgic_lr lr_desc) -{ - if (!(lr_desc.state & LR_STATE_MASK)) - set_bit(lr, (unsigned long *)vcpu->arch.vgic_cpu.vgic_v2.vgic_elrsr); -} - -static u64 vgic_v2_get_elrsr(const struct kvm_vcpu *vcpu) -{ - u64 val; - -#if BITS_PER_LONG == 64 - val = vcpu->arch.vgic_cpu.vgic_v2.vgic_elrsr[1]; - val <<= 32; - val |= vcpu->arch.vgic_cpu.vgic_v2.vgic_elrsr[0]; -#else - val = *(u64 *)vcpu->arch.vgic_cpu.vgic_v2.vgic_elrsr; -#endif - return val; -} - -static u64 vgic_v2_get_eisr(const struct kvm_vcpu *vcpu) -{ - u64 val; - -#if BITS_PER_LONG == 64 - val = vcpu->arch.vgic_cpu.vgic_v2.vgic_eisr[1]; - val <<= 32; - val |= vcpu->arch.vgic_cpu.vgic_v2.vgic_eisr[0]; -#else - val = *(u64 *)vcpu->arch.vgic_cpu.vgic_v2.vgic_eisr; -#endif - return val; -} - -static u32 vgic_v2_get_interrupt_status(const struct kvm_vcpu *vcpu) -{ - u32 misr = vcpu->arch.vgic_cpu.vgic_v2.vgic_misr; - u32 ret = 0; - - if (misr & GICH_MISR_EOI) - ret |= INT_STATUS_EOI; - if (misr & GICH_MISR_U) - ret |= INT_STATUS_UNDERFLOW; - - return ret; -} - -static void vgic_v2_enable_underflow(struct kvm_vcpu *vcpu) -{ - vcpu->arch.vgic_cpu.vgic_v2.vgic_hcr |= GICH_HCR_UIE; -} - -static void vgic_v2_disable_underflow(struct kvm_vcpu *vcpu) -{ - vcpu->arch.vgic_cpu.vgic_v2.vgic_hcr &= ~GICH_HCR_UIE; -} - -static void vgic_v2_get_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcrp) -{ - u32 vmcr = vcpu->arch.vgic_cpu.vgic_v2.vgic_vmcr; - - vmcrp->ctlr = (vmcr & GICH_VMCR_CTRL_MASK) >> GICH_VMCR_CTRL_SHIFT; - vmcrp->abpr = (vmcr & GICH_VMCR_ALIAS_BINPOINT_MASK) >> GICH_VMCR_ALIAS_BINPOINT_SHIFT; - vmcrp->bpr = (vmcr & GICH_VMCR_BINPOINT_MASK) >> GICH_VMCR_BINPOINT_SHIFT; - vmcrp->pmr = (vmcr & GICH_VMCR_PRIMASK_MASK) >> GICH_VMCR_PRIMASK_SHIFT; -} - -static void vgic_v2_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcrp) -{ - u32 vmcr; - - vmcr = (vmcrp->ctlr << GICH_VMCR_CTRL_SHIFT) & GICH_VMCR_CTRL_MASK; - vmcr |= (vmcrp->abpr << GICH_VMCR_ALIAS_BINPOINT_SHIFT) & GICH_VMCR_ALIAS_BINPOINT_MASK; - vmcr |= (vmcrp->bpr << GICH_VMCR_BINPOINT_SHIFT) & GICH_VMCR_BINPOINT_MASK; - vmcr |= (vmcrp->pmr << GICH_VMCR_PRIMASK_SHIFT) & GICH_VMCR_PRIMASK_MASK; - - vcpu->arch.vgic_cpu.vgic_v2.vgic_vmcr = vmcr; -} - -static void vgic_v2_enable(struct kvm_vcpu *vcpu) -{ - /* - * By forcing VMCR to zero, the GIC will restore the binary - * points to their reset values. Anything else resets to zero - * anyway. - */ - vcpu->arch.vgic_cpu.vgic_v2.vgic_vmcr = 0; - - /* Get the show on the road... */ - vcpu->arch.vgic_cpu.vgic_v2.vgic_hcr = GICH_HCR_EN; -} - -static const struct vgic_ops vgic_ops = { - .get_lr = vgic_v2_get_lr, - .set_lr = vgic_v2_set_lr, - .sync_lr_elrsr = vgic_v2_sync_lr_elrsr, - .get_elrsr = vgic_v2_get_elrsr, - .get_eisr = vgic_v2_get_eisr, - .get_interrupt_status = vgic_v2_get_interrupt_status, - .enable_underflow = vgic_v2_enable_underflow, - .disable_underflow = vgic_v2_disable_underflow, - .get_vmcr = vgic_v2_get_vmcr, - .set_vmcr = vgic_v2_set_vmcr, - .enable = vgic_v2_enable, -}; - static struct vgic_lr vgic_get_lr(const struct kvm_vcpu *vcpu, int lr) { - return vgic_ops.get_lr(vcpu, lr); + return vgic_ops->get_lr(vcpu, lr); } static void vgic_set_lr(struct kvm_vcpu *vcpu, int lr, struct vgic_lr vlr) { - vgic_ops.set_lr(vcpu, lr, vlr); + vgic_ops->set_lr(vcpu, lr, vlr); } static void vgic_sync_lr_elrsr(struct kvm_vcpu *vcpu, int lr, struct vgic_lr vlr) { - vgic_ops.sync_lr_elrsr(vcpu, lr, vlr); + vgic_ops->sync_lr_elrsr(vcpu, lr, vlr); } static inline u64 vgic_get_elrsr(struct kvm_vcpu *vcpu) { - return vgic_ops.get_elrsr(vcpu); + return vgic_ops->get_elrsr(vcpu); } static inline u64 vgic_get_eisr(struct kvm_vcpu *vcpu) { - return vgic_ops.get_eisr(vcpu); + return vgic_ops->get_eisr(vcpu); } static inline u32 vgic_get_interrupt_status(struct kvm_vcpu *vcpu) { - return vgic_ops.get_interrupt_status(vcpu); + return vgic_ops->get_interrupt_status(vcpu); } static inline void vgic_enable_underflow(struct kvm_vcpu *vcpu) { - vgic_ops.enable_underflow(vcpu); + vgic_ops->enable_underflow(vcpu); } static inline void vgic_disable_underflow(struct kvm_vcpu *vcpu) { - vgic_ops.disable_underflow(vcpu); + vgic_ops->disable_underflow(vcpu); } static inline void vgic_get_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr) { - vgic_ops.get_vmcr(vcpu, vmcr); + vgic_ops->get_vmcr(vcpu, vmcr); } static void vgic_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr) { - vgic_ops.set_vmcr(vcpu, vmcr); + vgic_ops->set_vmcr(vcpu, vmcr); } static inline void vgic_enable(struct kvm_vcpu *vcpu) { - vgic_ops.enable(vcpu); + vgic_ops->enable(vcpu); } static void vgic_retire_lr(int lr_nr, int irq, struct kvm_vcpu *vcpu) @@ -1197,7 +1054,7 @@ static void vgic_retire_disabled_irqs(struct kvm_vcpu *vcpu) struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; int lr; - for_each_set_bit(lr, vgic_cpu->lr_used, vgic.nr_lr) { + for_each_set_bit(lr, vgic_cpu->lr_used, vgic->nr_lr) { struct vgic_lr vlr = vgic_get_lr(vcpu, lr); if (!vgic_irq_is_enabled(vcpu, vlr.irq)) { @@ -1241,8 +1098,8 @@ static bool vgic_queue_irq(struct kvm_vcpu *vcpu, u8 sgi_source_id, int irq) /* Try to use another LR for this interrupt */ lr = find_first_zero_bit((unsigned long *)vgic_cpu->lr_used, - vgic.nr_lr); - if (lr >= vgic.nr_lr) + vgic->nr_lr); + if (lr >= vgic->nr_lr) return false; kvm_debug("LR%d allocated for IRQ%d %x\n", lr, irq, sgi_source_id); @@ -1382,7 +1239,7 @@ static bool vgic_process_maintenance(struct kvm_vcpu *vcpu) unsigned long *eisr_ptr = (unsigned long *)&eisr; int lr; - for_each_set_bit(lr, eisr_ptr, vgic.nr_lr) { + for_each_set_bit(lr, eisr_ptr, vgic->nr_lr) { struct vgic_lr vlr = vgic_get_lr(vcpu, lr); vgic_irq_clear_active(vcpu, vlr.irq); @@ -1430,7 +1287,7 @@ static void __kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu) elrsr_ptr = (unsigned long *)&elrsr; /* Clear mappings for empty LRs */ - for_each_set_bit(lr, elrsr_ptr, vgic.nr_lr) { + for_each_set_bit(lr, elrsr_ptr, vgic->nr_lr) { struct vgic_lr vlr; if (!test_and_clear_bit(lr, vgic_cpu->lr_used)) @@ -1443,8 +1300,8 @@ static void __kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu) } /* Check if we still have something up our sleeve... */ - pending = find_first_zero_bit(elrsr_ptr, vgic.nr_lr); - if (level_pending || pending < vgic.nr_lr) + pending = find_first_zero_bit(elrsr_ptr, vgic->nr_lr); + if (level_pending || pending < vgic->nr_lr) set_bit(vcpu->vcpu_id, &dist->irq_pending_on_cpu); } @@ -1638,7 +1495,7 @@ int kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu) * all the way to the distributor structure to find out. Only * assembly code should use this one. */ - vgic_cpu->nr_lr = vgic.nr_lr; + vgic_cpu->nr_lr = vgic->nr_lr; vgic_enable(vcpu); @@ -1647,7 +1504,7 @@ int kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu) static void vgic_init_maintenance_interrupt(void *info) { - enable_percpu_irq(vgic.maint_irq, 0); + enable_percpu_irq(vgic->maint_irq, 0); } static int vgic_cpu_notify(struct notifier_block *self, @@ -1660,7 +1517,7 @@ static int vgic_cpu_notify(struct notifier_block *self, break; case CPU_DYING: case CPU_DYING_FROZEN: - disable_percpu_irq(vgic.maint_irq); + disable_percpu_irq(vgic->maint_irq); break; } @@ -1671,31 +1528,36 @@ static struct notifier_block vgic_cpu_nb = { .notifier_call = vgic_cpu_notify, }; +static const struct of_device_id vgic_ids[] = { + { .compatible = "arm,cortex-a15-gic", .data = vgic_v2_probe, }, + {}, +}; + int kvm_vgic_hyp_init(void) { - int ret; - struct resource vctrl_res; - struct resource vcpu_res; + const struct of_device_id *matched_id; + int (*vgic_probe)(struct device_node *,const struct vgic_ops **, + const struct vgic_params **); struct device_node *vgic_node; + int ret; - vgic_node = of_find_compatible_node(NULL, NULL, "arm,cortex-a15-gic"); + vgic_node = of_find_matching_node_and_match(NULL, + vgic_ids, &matched_id); if (!vgic_node) { - kvm_err("error: no compatible vgic node in DT\n"); + kvm_err("error: no compatible GIC node found\n"); return -ENODEV; } - vgic.maint_irq = irq_of_parse_and_map(vgic_node, 0); - if (!vgic.maint_irq) { - kvm_err("error getting vgic maintenance irq from DT\n"); - ret = -ENXIO; - goto out; - } + vgic_probe = matched_id->data; + ret = vgic_probe(vgic_node, &vgic_ops, &vgic); + if (ret) + return ret; - ret = request_percpu_irq(vgic.maint_irq, vgic_maintenance_handler, + ret = request_percpu_irq(vgic->maint_irq, vgic_maintenance_handler, "vgic", kvm_get_running_vcpus()); if (ret) { - kvm_err("Cannot register interrupt %d\n", vgic.maint_irq); - goto out; + kvm_err("Cannot register interrupt %d\n", vgic->maint_irq); + return ret; } ret = __register_cpu_notifier(&vgic_cpu_nb); @@ -1704,49 +1566,12 @@ int kvm_vgic_hyp_init(void) goto out_free_irq; } - ret = of_address_to_resource(vgic_node, 2, &vctrl_res); - if (ret) { - kvm_err("Cannot obtain VCTRL resource\n"); - goto out_free_irq; - } - - vgic.vctrl_base = of_iomap(vgic_node, 2); - if (!vgic.vctrl_base) { - kvm_err("Cannot ioremap VCTRL\n"); - ret = -ENOMEM; - goto out_free_irq; - } - - vgic.nr_lr = readl_relaxed(vgic.vctrl_base + GICH_VTR); - vgic.nr_lr = (vgic.nr_lr & 0x3f) + 1; - - ret = create_hyp_io_mappings(vgic.vctrl_base, - vgic.vctrl_base + resource_size(&vctrl_res), - vctrl_res.start); - if (ret) { - kvm_err("Cannot map VCTRL into hyp\n"); - goto out_unmap; - } - - kvm_info("%s@%llx IRQ%d\n", vgic_node->name, - vctrl_res.start, vgic.maint_irq); on_each_cpu(vgic_init_maintenance_interrupt, NULL, 1); - if (of_address_to_resource(vgic_node, 3, &vcpu_res)) { - kvm_err("Cannot obtain VCPU resource\n"); - ret = -ENXIO; - goto out_unmap; - } - vgic.vcpu_base = vcpu_res.start; + return 0; - goto out; - -out_unmap: - iounmap(vgic.vctrl_base); out_free_irq: - free_percpu_irq(vgic.maint_irq, kvm_get_running_vcpus()); -out: - of_node_put(vgic_node); + free_percpu_irq(vgic->maint_irq, kvm_get_running_vcpus()); return ret; } @@ -1779,7 +1604,7 @@ int kvm_vgic_init(struct kvm *kvm) } ret = kvm_phys_addr_ioremap(kvm, kvm->arch.vgic.vgic_cpu_base, - vgic.vcpu_base, KVM_VGIC_V2_CPU_SIZE); + vgic->vcpu_base, KVM_VGIC_V2_CPU_SIZE); if (ret) { kvm_err("Unable to remap VGIC CPU to VCPU\n"); goto out; @@ -1825,7 +1650,7 @@ int kvm_vgic_create(struct kvm *kvm) } spin_lock_init(&kvm->arch.vgic.lock); - kvm->arch.vgic.vctrl_base = vgic.vctrl_base; + kvm->arch.vgic.vctrl_base = vgic->vctrl_base; kvm->arch.vgic.vgic_dist_base = VGIC_ADDR_UNDEF; kvm->arch.vgic.vgic_cpu_base = VGIC_ADDR_UNDEF; From 79f17b6074e34191952a9249b061012a23f3888a Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 15 May 2014 10:03:25 +0100 Subject: [PATCH 1278/1368] KVM: ARM: vgic: revisit implementation of irqchip_in_kernel So far, irqchip_in_kernel() was implemented by testing the value of vctrl_base, which worked fine with GICv2. With GICv3, this field is useless, as we're using system registers instead of a emmory mapped interface. To solve this, add a boolean flag indicating if the we're using a vgic or not. Reviewed-by: Christoffer Dall Signed-off-by: Marc Zyngier (cherry picked from commit f982cf4e9c37b19478c7bc6e0484a43a7e78cf57) Signed-off-by: Christoffer Dall --- include/kvm/arm_vgic.h | 3 ++- virt/kvm/arm/vgic.c | 1 + 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h index d8d52a9ca6a1..f6b9fec6fcac 100644 --- a/include/kvm/arm_vgic.h +++ b/include/kvm/arm_vgic.h @@ -117,6 +117,7 @@ struct vgic_params { struct vgic_dist { #ifdef CONFIG_KVM_ARM_VGIC spinlock_t lock; + bool in_kernel; bool ready; /* Virtual control interface mapping */ @@ -212,7 +213,7 @@ int kvm_vgic_vcpu_pending_irq(struct kvm_vcpu *vcpu); bool vgic_handle_mmio(struct kvm_vcpu *vcpu, struct kvm_run *run, struct kvm_exit_mmio *mmio); -#define irqchip_in_kernel(k) (!!((k)->arch.vgic.vctrl_base)) +#define irqchip_in_kernel(k) (!!((k)->arch.vgic.in_kernel)) #define vgic_initialized(k) ((k)->arch.vgic.ready) int vgic_v2_probe(struct device_node *vgic_node, diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c index e4b9cbbbee4c..1348e74c4d11 100644 --- a/virt/kvm/arm/vgic.c +++ b/virt/kvm/arm/vgic.c @@ -1650,6 +1650,7 @@ int kvm_vgic_create(struct kvm *kvm) } spin_lock_init(&kvm->arch.vgic.lock); + kvm->arch.vgic.in_kernel = true; kvm->arch.vgic.vctrl_base = vgic->vctrl_base; kvm->arch.vgic.vgic_dist_base = VGIC_ADDR_UNDEF; kvm->arch.vgic.vgic_cpu_base = VGIC_ADDR_UNDEF; From cab0beb93ff52f5a47cf9a1a03c9c73d473d28d6 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 26 Jun 2013 15:16:40 +0100 Subject: [PATCH 1279/1368] arm64: KVM: remove __kvm_hyp_code_{start,end} from hyp.S We already have __hyp_text_{start,end} to express the boundaries of the HYP text section, and __kvm_hyp_code_{start,end} are getting in the way of a more modular world switch code. Just turn __kvm_hyp_code_{start,end} into #defines mapping the linker-emited symbols. Acked-by: Catalin Marinas Reviewed-by: Christoffer Dall Signed-off-by: Marc Zyngier (cherry picked from commit 45451914c875bba44903ce4f1445e047b7992bf7) Signed-off-by: Christoffer Dall --- arch/arm64/include/asm/kvm_asm.h | 6 ++++-- arch/arm64/include/asm/virt.h | 4 ++++ arch/arm64/kvm/hyp.S | 6 ------ 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h index 9fcd54b1e16d..d0bfc4ba82c0 100644 --- a/arch/arm64/include/asm/kvm_asm.h +++ b/arch/arm64/include/asm/kvm_asm.h @@ -18,6 +18,8 @@ #ifndef __ARM_KVM_ASM_H__ #define __ARM_KVM_ASM_H__ +#include + /* * 0 is reserved as an invalid value. * Order *must* be kept in sync with the hyp switch code. @@ -96,8 +98,8 @@ extern char __kvm_hyp_init_end[]; extern char __kvm_hyp_vector[]; -extern char __kvm_hyp_code_start[]; -extern char __kvm_hyp_code_end[]; +#define __kvm_hyp_code_start __hyp_text_start +#define __kvm_hyp_code_end __hyp_text_end extern void __kvm_flush_vm_context(void); extern void __kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa); diff --git a/arch/arm64/include/asm/virt.h b/arch/arm64/include/asm/virt.h index 215ad4649dd7..7a5df5252dd7 100644 --- a/arch/arm64/include/asm/virt.h +++ b/arch/arm64/include/asm/virt.h @@ -50,6 +50,10 @@ static inline bool is_hyp_mode_mismatched(void) return __boot_cpu_mode[0] != __boot_cpu_mode[1]; } +/* The section containing the hypervisor text */ +extern char __hyp_text_start[]; +extern char __hyp_text_end[]; + #endif /* __ASSEMBLY__ */ #endif /* ! __ASM__VIRT_H */ diff --git a/arch/arm64/kvm/hyp.S b/arch/arm64/kvm/hyp.S index 877d82a134bc..9c5d0acb3654 100644 --- a/arch/arm64/kvm/hyp.S +++ b/arch/arm64/kvm/hyp.S @@ -36,9 +36,6 @@ .pushsection .hyp.text, "ax" .align PAGE_SHIFT -__kvm_hyp_code_start: - .globl __kvm_hyp_code_start - .macro save_common_regs // x2: base address for cpu context // x3: tmp register @@ -880,7 +877,4 @@ ENTRY(__kvm_hyp_vector) ventry el1_error_invalid // Error 32-bit EL1 ENDPROC(__kvm_hyp_vector) -__kvm_hyp_code_end: - .globl __kvm_hyp_code_end - .popsection From fd17281069b65b1672718613d8c89cc9a409bfcd Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Fri, 21 Jun 2013 11:57:56 +0100 Subject: [PATCH 1280/1368] arm64: KVM: split GICv2 world switch from hyp code Move the GICv2 world switch code into its own file, and add the necessary indirection to the arm64 switch code. Also introduce a new type field to the vgic_params structure. Acked-by: Catalin Marinas Reviewed-by: Christoffer Dall Signed-off-by: Marc Zyngier (cherry picked from commit 1a9b13056dde7e3092304d6041ccc60a913042ea) Signed-off-by: Christoffer Dall --- arch/arm/include/asm/kvm_host.h | 5 ++ arch/arm64/include/asm/kvm_asm.h | 4 + arch/arm64/include/asm/kvm_host.h | 21 +++++ arch/arm64/kernel/asm-offsets.c | 3 + arch/arm64/kvm/Makefile | 4 +- arch/arm64/kvm/hyp.S | 104 ++++------------------- arch/arm64/kvm/vgic-v2-switch.S | 133 ++++++++++++++++++++++++++++++ include/kvm/arm_vgic.h | 7 +- virt/kvm/arm/vgic-v2.c | 1 + virt/kvm/arm/vgic.c | 3 + 10 files changed, 195 insertions(+), 90 deletions(-) create mode 100644 arch/arm64/kvm/vgic-v2-switch.S diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h index dc4e3edf39cc..6dfb404f6c46 100644 --- a/arch/arm/include/asm/kvm_host.h +++ b/arch/arm/include/asm/kvm_host.h @@ -225,6 +225,11 @@ static inline int kvm_arch_dev_ioctl_check_extension(long ext) return 0; } +static inline void vgic_arch_setup(const struct vgic_params *vgic) +{ + BUG_ON(vgic->type != VGIC_V2); +} + int kvm_perf_init(void); int kvm_perf_teardown(void); diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h index d0bfc4ba82c0..6252264341c8 100644 --- a/arch/arm64/include/asm/kvm_asm.h +++ b/arch/arm64/include/asm/kvm_asm.h @@ -105,6 +105,10 @@ extern void __kvm_flush_vm_context(void); extern void __kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa); extern int __kvm_vcpu_run(struct kvm_vcpu *vcpu); + +extern char __save_vgic_v2_state[]; +extern char __restore_vgic_v2_state[]; + #endif #endif /* __ARM_KVM_ASM_H__ */ diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 92242ce06309..4c182d0aae70 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -200,4 +200,25 @@ static inline void __cpu_init_hyp_mode(phys_addr_t boot_pgd_ptr, hyp_stack_ptr, vector_ptr); } +struct vgic_sr_vectors { + void *save_vgic; + void *restore_vgic; +}; + +static inline void vgic_arch_setup(const struct vgic_params *vgic) +{ + extern struct vgic_sr_vectors __vgic_sr_vectors; + + switch(vgic->type) + { + case VGIC_V2: + __vgic_sr_vectors.save_vgic = __save_vgic_v2_state; + __vgic_sr_vectors.restore_vgic = __restore_vgic_v2_state; + break; + + default: + BUG(); + } +} + #endif /* __ARM64_KVM_HOST_H__ */ diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c index dcfd8a616a94..9ff0b2b97392 100644 --- a/arch/arm64/kernel/asm-offsets.c +++ b/arch/arm64/kernel/asm-offsets.c @@ -127,6 +127,9 @@ int main(void) DEFINE(KVM_TIMER_ENABLED, offsetof(struct kvm, arch.timer.enabled)); DEFINE(VCPU_KVM, offsetof(struct kvm_vcpu, kvm)); DEFINE(VCPU_VGIC_CPU, offsetof(struct kvm_vcpu, arch.vgic_cpu)); + DEFINE(VGIC_SAVE_FN, offsetof(struct vgic_sr_vectors, save_vgic)); + DEFINE(VGIC_RESTORE_FN, offsetof(struct vgic_sr_vectors, restore_vgic)); + DEFINE(VGIC_SR_VECTOR_SZ, sizeof(struct vgic_sr_vectors)); DEFINE(VGIC_V2_CPU_HCR, offsetof(struct vgic_cpu, vgic_v2.vgic_hcr)); DEFINE(VGIC_V2_CPU_VMCR, offsetof(struct vgic_cpu, vgic_v2.vgic_vmcr)); DEFINE(VGIC_V2_CPU_MISR, offsetof(struct vgic_cpu, vgic_v2.vgic_misr)); diff --git a/arch/arm64/kvm/Makefile b/arch/arm64/kvm/Makefile index 7e92952d139e..daf24dc59e2c 100644 --- a/arch/arm64/kvm/Makefile +++ b/arch/arm64/kvm/Makefile @@ -19,5 +19,7 @@ kvm-$(CONFIG_KVM_ARM_HOST) += emulate.o inject_fault.o regmap.o kvm-$(CONFIG_KVM_ARM_HOST) += hyp.o hyp-init.o handle_exit.o kvm-$(CONFIG_KVM_ARM_HOST) += guest.o reset.o sys_regs.o sys_regs_generic_v8.o -kvm-$(CONFIG_KVM_ARM_VGIC) += $(KVM)/arm/vgic.o $(KVM)/arm/vgic-v2.o +kvm-$(CONFIG_KVM_ARM_VGIC) += $(KVM)/arm/vgic.o +kvm-$(CONFIG_KVM_ARM_VGIC) += $(KVM)/arm/vgic-v2.o +kvm-$(CONFIG_KVM_ARM_VGIC) += vgic-v2-switch.o kvm-$(CONFIG_KVM_ARM_TIMER) += $(KVM)/arm/arch_timer.o diff --git a/arch/arm64/kvm/hyp.S b/arch/arm64/kvm/hyp.S index 9c5d0acb3654..56df9a352a81 100644 --- a/arch/arm64/kvm/hyp.S +++ b/arch/arm64/kvm/hyp.S @@ -16,7 +16,6 @@ */ #include -#include #include #include @@ -376,100 +375,23 @@ .endm /* - * Save the VGIC CPU state into memory - * x0: Register pointing to VCPU struct - * Do not corrupt x1!!! + * Call into the vgic backend for state saving */ .macro save_vgic_state - /* Get VGIC VCTRL base into x2 */ - ldr x2, [x0, #VCPU_KVM] - kern_hyp_va x2 - ldr x2, [x2, #KVM_VGIC_VCTRL] - kern_hyp_va x2 - cbz x2, 2f // disabled - - /* Compute the address of struct vgic_cpu */ - add x3, x0, #VCPU_VGIC_CPU - - /* Save all interesting registers */ - ldr w4, [x2, #GICH_HCR] - ldr w5, [x2, #GICH_VMCR] - ldr w6, [x2, #GICH_MISR] - ldr w7, [x2, #GICH_EISR0] - ldr w8, [x2, #GICH_EISR1] - ldr w9, [x2, #GICH_ELRSR0] - ldr w10, [x2, #GICH_ELRSR1] - ldr w11, [x2, #GICH_APR] -CPU_BE( rev w4, w4 ) -CPU_BE( rev w5, w5 ) -CPU_BE( rev w6, w6 ) -CPU_BE( rev w7, w7 ) -CPU_BE( rev w8, w8 ) -CPU_BE( rev w9, w9 ) -CPU_BE( rev w10, w10 ) -CPU_BE( rev w11, w11 ) - - str w4, [x3, #VGIC_V2_CPU_HCR] - str w5, [x3, #VGIC_V2_CPU_VMCR] - str w6, [x3, #VGIC_V2_CPU_MISR] - str w7, [x3, #VGIC_V2_CPU_EISR] - str w8, [x3, #(VGIC_V2_CPU_EISR + 4)] - str w9, [x3, #VGIC_V2_CPU_ELRSR] - str w10, [x3, #(VGIC_V2_CPU_ELRSR + 4)] - str w11, [x3, #VGIC_V2_CPU_APR] - - /* Clear GICH_HCR */ - str wzr, [x2, #GICH_HCR] - - /* Save list registers */ - add x2, x2, #GICH_LR0 - ldr w4, [x3, #VGIC_CPU_NR_LR] - add x3, x3, #VGIC_V2_CPU_LR -1: ldr w5, [x2], #4 -CPU_BE( rev w5, w5 ) - str w5, [x3], #4 - sub w4, w4, #1 - cbnz w4, 1b -2: + adr x24, __vgic_sr_vectors + ldr x24, [x24, VGIC_SAVE_FN] + kern_hyp_va x24 + blr x24 .endm /* - * Restore the VGIC CPU state from memory - * x0: Register pointing to VCPU struct + * Call into the vgic backend for state restoring */ .macro restore_vgic_state - /* Get VGIC VCTRL base into x2 */ - ldr x2, [x0, #VCPU_KVM] - kern_hyp_va x2 - ldr x2, [x2, #KVM_VGIC_VCTRL] - kern_hyp_va x2 - cbz x2, 2f // disabled - - /* Compute the address of struct vgic_cpu */ - add x3, x0, #VCPU_VGIC_CPU - - /* We only restore a minimal set of registers */ - ldr w4, [x3, #VGIC_V2_CPU_HCR] - ldr w5, [x3, #VGIC_V2_CPU_VMCR] - ldr w6, [x3, #VGIC_V2_CPU_APR] -CPU_BE( rev w4, w4 ) -CPU_BE( rev w5, w5 ) -CPU_BE( rev w6, w6 ) - - str w4, [x2, #GICH_HCR] - str w5, [x2, #GICH_VMCR] - str w6, [x2, #GICH_APR] - - /* Restore list registers */ - add x2, x2, #GICH_LR0 - ldr w4, [x3, #VGIC_CPU_NR_LR] - add x3, x3, #VGIC_V2_CPU_LR -1: ldr w5, [x3], #4 -CPU_BE( rev w5, w5 ) - str w5, [x2], #4 - sub w4, w4, #1 - cbnz w4, 1b -2: + adr x24, __vgic_sr_vectors + ldr x24, [x24, #VGIC_RESTORE_FN] + kern_hyp_va x24 + blr x24 .endm .macro save_timer_state @@ -650,6 +572,12 @@ ENTRY(__kvm_flush_vm_context) ret ENDPROC(__kvm_flush_vm_context) + // struct vgic_sr_vectors __vgi_sr_vectors; + .align 3 +ENTRY(__vgic_sr_vectors) + .skip VGIC_SR_VECTOR_SZ +ENDPROC(__vgic_sr_vectors) + __kvm_hyp_panic: // Guess the context by looking at VTTBR: // If zero, then we're already a host. diff --git a/arch/arm64/kvm/vgic-v2-switch.S b/arch/arm64/kvm/vgic-v2-switch.S new file mode 100644 index 000000000000..ae211772f991 --- /dev/null +++ b/arch/arm64/kvm/vgic-v2-switch.S @@ -0,0 +1,133 @@ +/* + * Copyright (C) 2012,2013 - ARM Ltd + * Author: Marc Zyngier + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#include +#include + +#include +#include +#include +#include +#include +#include +#include + + .text + .pushsection .hyp.text, "ax" + +/* + * Save the VGIC CPU state into memory + * x0: Register pointing to VCPU struct + * Do not corrupt x1!!! + */ +ENTRY(__save_vgic_v2_state) +__save_vgic_v2_state: + /* Get VGIC VCTRL base into x2 */ + ldr x2, [x0, #VCPU_KVM] + kern_hyp_va x2 + ldr x2, [x2, #KVM_VGIC_VCTRL] + kern_hyp_va x2 + cbz x2, 2f // disabled + + /* Compute the address of struct vgic_cpu */ + add x3, x0, #VCPU_VGIC_CPU + + /* Save all interesting registers */ + ldr w4, [x2, #GICH_HCR] + ldr w5, [x2, #GICH_VMCR] + ldr w6, [x2, #GICH_MISR] + ldr w7, [x2, #GICH_EISR0] + ldr w8, [x2, #GICH_EISR1] + ldr w9, [x2, #GICH_ELRSR0] + ldr w10, [x2, #GICH_ELRSR1] + ldr w11, [x2, #GICH_APR] +CPU_BE( rev w4, w4 ) +CPU_BE( rev w5, w5 ) +CPU_BE( rev w6, w6 ) +CPU_BE( rev w7, w7 ) +CPU_BE( rev w8, w8 ) +CPU_BE( rev w9, w9 ) +CPU_BE( rev w10, w10 ) +CPU_BE( rev w11, w11 ) + + str w4, [x3, #VGIC_V2_CPU_HCR] + str w5, [x3, #VGIC_V2_CPU_VMCR] + str w6, [x3, #VGIC_V2_CPU_MISR] + str w7, [x3, #VGIC_V2_CPU_EISR] + str w8, [x3, #(VGIC_V2_CPU_EISR + 4)] + str w9, [x3, #VGIC_V2_CPU_ELRSR] + str w10, [x3, #(VGIC_V2_CPU_ELRSR + 4)] + str w11, [x3, #VGIC_V2_CPU_APR] + + /* Clear GICH_HCR */ + str wzr, [x2, #GICH_HCR] + + /* Save list registers */ + add x2, x2, #GICH_LR0 + ldr w4, [x3, #VGIC_CPU_NR_LR] + add x3, x3, #VGIC_V2_CPU_LR +1: ldr w5, [x2], #4 +CPU_BE( rev w5, w5 ) + str w5, [x3], #4 + sub w4, w4, #1 + cbnz w4, 1b +2: + ret +ENDPROC(__save_vgic_v2_state) + +/* + * Restore the VGIC CPU state from memory + * x0: Register pointing to VCPU struct + */ +ENTRY(__restore_vgic_v2_state) +__restore_vgic_v2_state: + /* Get VGIC VCTRL base into x2 */ + ldr x2, [x0, #VCPU_KVM] + kern_hyp_va x2 + ldr x2, [x2, #KVM_VGIC_VCTRL] + kern_hyp_va x2 + cbz x2, 2f // disabled + + /* Compute the address of struct vgic_cpu */ + add x3, x0, #VCPU_VGIC_CPU + + /* We only restore a minimal set of registers */ + ldr w4, [x3, #VGIC_V2_CPU_HCR] + ldr w5, [x3, #VGIC_V2_CPU_VMCR] + ldr w6, [x3, #VGIC_V2_CPU_APR] +CPU_BE( rev w4, w4 ) +CPU_BE( rev w5, w5 ) +CPU_BE( rev w6, w6 ) + + str w4, [x2, #GICH_HCR] + str w5, [x2, #GICH_VMCR] + str w6, [x2, #GICH_APR] + + /* Restore list registers */ + add x2, x2, #GICH_LR0 + ldr w4, [x3, #VGIC_CPU_NR_LR] + add x3, x3, #VGIC_V2_CPU_LR +1: ldr w5, [x3], #4 +CPU_BE( rev w5, w5 ) + str w5, [x2], #4 + sub w4, w4, #1 + cbnz w4, 1b +2: + ret +ENDPROC(__restore_vgic_v2_state) + + .popsection diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h index f6b9fec6fcac..65f1121a3beb 100644 --- a/include/kvm/arm_vgic.h +++ b/include/kvm/arm_vgic.h @@ -24,7 +24,6 @@ #include #include #include -#include #define VGIC_NR_IRQS 256 #define VGIC_NR_SGIS 16 @@ -71,6 +70,10 @@ struct vgic_bytemap { struct kvm_vcpu; +enum vgic_type { + VGIC_V2, /* Good ol' GICv2 */ +}; + #define LR_STATE_PENDING (1 << 0) #define LR_STATE_ACTIVE (1 << 1) #define LR_STATE_MASK (3 << 0) @@ -104,6 +107,8 @@ struct vgic_ops { }; struct vgic_params { + /* vgic type */ + enum vgic_type type; /* Physical address of vgic virtual cpu interface */ phys_addr_t vcpu_base; /* Number of list registers */ diff --git a/virt/kvm/arm/vgic-v2.c b/virt/kvm/arm/vgic-v2.c index 940418ebd0d0..d6c9c142f813 100644 --- a/virt/kvm/arm/vgic-v2.c +++ b/virt/kvm/arm/vgic-v2.c @@ -236,6 +236,7 @@ int vgic_v2_probe(struct device_node *vgic_node, kvm_info("%s@%llx IRQ%d\n", vgic_node->name, vctrl_res.start, vgic->maint_irq); + vgic->type = VGIC_V2; *ops = &vgic_v2_ops; *params = vgic; goto out; diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c index 1348e74c4d11..7867b9a1f694 100644 --- a/virt/kvm/arm/vgic.c +++ b/virt/kvm/arm/vgic.c @@ -1568,6 +1568,9 @@ int kvm_vgic_hyp_init(void) on_each_cpu(vgic_init_maintenance_interrupt, NULL, 1); + /* Callback into for arch code for setup */ + vgic_arch_setup(vgic); + return 0; out_free_irq: From c292a45791af6d60cc7c644809f84810a38e1771 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Fri, 9 Aug 2013 18:19:11 +0100 Subject: [PATCH 1281/1368] arm64: KVM: move HCR_EL2.{IMO,FMO} manipulation into the vgic switch code GICv3 requires the IMO and FMO bits to be tightly coupled with some of the interrupt controller's register switch. In order to have similar code paths, move the manipulation of these bits to the GICv2 switch code. Acked-by: Catalin Marinas Reviewed-by: Christoffer Dall Signed-off-by: Marc Zyngier (cherry picked from commit ac3c3747e2db2f326ffc601651de544cdd33a8e9) Signed-off-by: Christoffer Dall --- arch/arm64/include/asm/kvm_arm.h | 5 +++-- arch/arm64/kvm/hyp.S | 17 ++++++++++++----- 2 files changed, 15 insertions(+), 7 deletions(-) diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h index 3d6903006a8a..cc83520459ed 100644 --- a/arch/arm64/include/asm/kvm_arm.h +++ b/arch/arm64/include/asm/kvm_arm.h @@ -76,9 +76,10 @@ */ #define HCR_GUEST_FLAGS (HCR_TSC | HCR_TSW | HCR_TWE | HCR_TWI | HCR_VM | \ HCR_TVM | HCR_BSU_IS | HCR_FB | HCR_TAC | \ - HCR_AMO | HCR_IMO | HCR_FMO | \ - HCR_SWIO | HCR_TIDCP | HCR_RW) + HCR_AMO | HCR_SWIO | HCR_TIDCP | HCR_RW) #define HCR_VIRT_EXCP_MASK (HCR_VA | HCR_VI | HCR_VF) +#define HCR_INT_OVERRIDE (HCR_FMO | HCR_IMO) + /* Hyp System Control Register (SCTLR_EL2) bits */ #define SCTLR_EL2_EE (1 << 25) diff --git a/arch/arm64/kvm/hyp.S b/arch/arm64/kvm/hyp.S index 56df9a352a81..5945f3bdea7a 100644 --- a/arch/arm64/kvm/hyp.S +++ b/arch/arm64/kvm/hyp.S @@ -335,11 +335,8 @@ .endm .macro activate_traps - ldr x2, [x0, #VCPU_IRQ_LINES] - ldr x1, [x0, #VCPU_HCR_EL2] - orr x2, x2, x1 - msr hcr_el2, x2 - + ldr x2, [x0, #VCPU_HCR_EL2] + msr hcr_el2, x2 ldr x2, =(CPTR_EL2_TTA) msr cptr_el2, x2 @@ -382,12 +379,22 @@ ldr x24, [x24, VGIC_SAVE_FN] kern_hyp_va x24 blr x24 + mrs x24, hcr_el2 + mov x25, #HCR_INT_OVERRIDE + neg x25, x25 + and x24, x24, x25 + msr hcr_el2, x24 .endm /* * Call into the vgic backend for state restoring */ .macro restore_vgic_state + mrs x24, hcr_el2 + ldr x25, [x0, #VCPU_IRQ_LINES] + orr x24, x24, #HCR_INT_OVERRIDE + orr x24, x24, x25 + msr hcr_el2, x24 adr x24, __vgic_sr_vectors ldr x24, [x24, #VGIC_RESTORE_FN] kern_hyp_va x24 From 9a35d57d6984382050d493e2fa1140c9a6b78671 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Fri, 12 Jul 2013 15:15:23 +0100 Subject: [PATCH 1282/1368] KVM: ARM: vgic: add the GICv3 backend Introduce the support code for emulating a GICv2 on top of GICv3 hardware. Acked-by: Catalin Marinas Signed-off-by: Marc Zyngier (cherry picked from commit b2fb1c0d378399e1427a91bb991c094f2ca09a2f) Signed-off-by: Christoffer Dall --- arch/arm64/include/asm/kvm_asm.h | 2 + arch/arm64/kvm/vgic-v3-switch.S | 29 ++++ include/kvm/arm_vgic.h | 28 ++++ virt/kvm/arm/vgic-v3.c | 231 +++++++++++++++++++++++++++++++ 4 files changed, 290 insertions(+) create mode 100644 arch/arm64/kvm/vgic-v3-switch.S create mode 100644 virt/kvm/arm/vgic-v3.c diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h index 6252264341c8..ed4987bf9ac7 100644 --- a/arch/arm64/include/asm/kvm_asm.h +++ b/arch/arm64/include/asm/kvm_asm.h @@ -106,6 +106,8 @@ extern void __kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa); extern int __kvm_vcpu_run(struct kvm_vcpu *vcpu); +extern u64 __vgic_v3_get_ich_vtr_el2(void); + extern char __save_vgic_v2_state[]; extern char __restore_vgic_v2_state[]; diff --git a/arch/arm64/kvm/vgic-v3-switch.S b/arch/arm64/kvm/vgic-v3-switch.S new file mode 100644 index 000000000000..9fbf27350c84 --- /dev/null +++ b/arch/arm64/kvm/vgic-v3-switch.S @@ -0,0 +1,29 @@ +/* + * Copyright (C) 2012,2013 - ARM Ltd + * Author: Marc Zyngier + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#include +#include + + .text + .pushsection .hyp.text, "ax" + +ENTRY(__vgic_v3_get_ich_vtr_el2) + mrs x0, ICH_VTR_EL2 + ret +ENDPROC(__vgic_v3_get_ich_vtr_el2) + + .popsection diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h index 65f1121a3beb..35b0c121bb65 100644 --- a/include/kvm/arm_vgic.h +++ b/include/kvm/arm_vgic.h @@ -33,6 +33,7 @@ #define VGIC_MAX_CPUS KVM_MAX_VCPUS #define VGIC_V2_MAX_LRS (1 << 6) +#define VGIC_V3_MAX_LRS 16 /* Sanity checks... */ #if (VGIC_MAX_CPUS > 8) @@ -72,6 +73,7 @@ struct kvm_vcpu; enum vgic_type { VGIC_V2, /* Good ol' GICv2 */ + VGIC_V3, /* New fancy GICv3 */ }; #define LR_STATE_PENDING (1 << 0) @@ -172,6 +174,19 @@ struct vgic_v2_cpu_if { u32 vgic_lr[VGIC_V2_MAX_LRS]; }; +struct vgic_v3_cpu_if { +#ifdef CONFIG_ARM_GIC_V3 + u32 vgic_hcr; + u32 vgic_vmcr; + u32 vgic_misr; /* Saved only */ + u32 vgic_eisr; /* Saved only */ + u32 vgic_elrsr; /* Saved only */ + u32 vgic_ap0r[4]; + u32 vgic_ap1r[4]; + u64 vgic_lr[VGIC_V3_MAX_LRS]; +#endif +}; + struct vgic_cpu { #ifdef CONFIG_KVM_ARM_VGIC /* per IRQ to LR mapping */ @@ -190,6 +205,7 @@ struct vgic_cpu { /* CPU vif control registers for world switch */ union { struct vgic_v2_cpu_if vgic_v2; + struct vgic_v3_cpu_if vgic_v3; }; #endif }; @@ -224,6 +240,18 @@ bool vgic_handle_mmio(struct kvm_vcpu *vcpu, struct kvm_run *run, int vgic_v2_probe(struct device_node *vgic_node, const struct vgic_ops **ops, const struct vgic_params **params); +#ifdef CONFIG_ARM_GIC_V3 +int vgic_v3_probe(struct device_node *vgic_node, + const struct vgic_ops **ops, + const struct vgic_params **params); +#else +static inline int vgic_v3_probe(struct device_node *vgic_node, + const struct vgic_ops **ops, + const struct vgic_params **params) +{ + return -ENODEV; +} +#endif #else static inline int kvm_vgic_hyp_init(void) diff --git a/virt/kvm/arm/vgic-v3.c b/virt/kvm/arm/vgic-v3.c new file mode 100644 index 000000000000..f01d44685720 --- /dev/null +++ b/virt/kvm/arm/vgic-v3.c @@ -0,0 +1,231 @@ +/* + * Copyright (C) 2013 ARM Limited, All Rights Reserved. + * Author: Marc Zyngier + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include +#include +#include + +/* These are for GICv2 emulation only */ +#define GICH_LR_VIRTUALID (0x3ffUL << 0) +#define GICH_LR_PHYSID_CPUID_SHIFT (10) +#define GICH_LR_PHYSID_CPUID (7UL << GICH_LR_PHYSID_CPUID_SHIFT) + +/* + * LRs are stored in reverse order in memory. make sure we index them + * correctly. + */ +#define LR_INDEX(lr) (VGIC_V3_MAX_LRS - 1 - lr) + +static u32 ich_vtr_el2; + +static struct vgic_lr vgic_v3_get_lr(const struct kvm_vcpu *vcpu, int lr) +{ + struct vgic_lr lr_desc; + u64 val = vcpu->arch.vgic_cpu.vgic_v3.vgic_lr[LR_INDEX(lr)]; + + lr_desc.irq = val & GICH_LR_VIRTUALID; + if (lr_desc.irq <= 15) + lr_desc.source = (val >> GICH_LR_PHYSID_CPUID_SHIFT) & 0x7; + else + lr_desc.source = 0; + lr_desc.state = 0; + + if (val & ICH_LR_PENDING_BIT) + lr_desc.state |= LR_STATE_PENDING; + if (val & ICH_LR_ACTIVE_BIT) + lr_desc.state |= LR_STATE_ACTIVE; + if (val & ICH_LR_EOI) + lr_desc.state |= LR_EOI_INT; + + return lr_desc; +} + +static void vgic_v3_set_lr(struct kvm_vcpu *vcpu, int lr, + struct vgic_lr lr_desc) +{ + u64 lr_val = (((u32)lr_desc.source << GICH_LR_PHYSID_CPUID_SHIFT) | + lr_desc.irq); + + if (lr_desc.state & LR_STATE_PENDING) + lr_val |= ICH_LR_PENDING_BIT; + if (lr_desc.state & LR_STATE_ACTIVE) + lr_val |= ICH_LR_ACTIVE_BIT; + if (lr_desc.state & LR_EOI_INT) + lr_val |= ICH_LR_EOI; + + vcpu->arch.vgic_cpu.vgic_v3.vgic_lr[LR_INDEX(lr)] = lr_val; +} + +static void vgic_v3_sync_lr_elrsr(struct kvm_vcpu *vcpu, int lr, + struct vgic_lr lr_desc) +{ + if (!(lr_desc.state & LR_STATE_MASK)) + vcpu->arch.vgic_cpu.vgic_v3.vgic_elrsr |= (1U << lr); +} + +static u64 vgic_v3_get_elrsr(const struct kvm_vcpu *vcpu) +{ + return vcpu->arch.vgic_cpu.vgic_v3.vgic_elrsr; +} + +static u64 vgic_v3_get_eisr(const struct kvm_vcpu *vcpu) +{ + return vcpu->arch.vgic_cpu.vgic_v3.vgic_eisr; +} + +static u32 vgic_v3_get_interrupt_status(const struct kvm_vcpu *vcpu) +{ + u32 misr = vcpu->arch.vgic_cpu.vgic_v3.vgic_misr; + u32 ret = 0; + + if (misr & ICH_MISR_EOI) + ret |= INT_STATUS_EOI; + if (misr & ICH_MISR_U) + ret |= INT_STATUS_UNDERFLOW; + + return ret; +} + +static void vgic_v3_get_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcrp) +{ + u32 vmcr = vcpu->arch.vgic_cpu.vgic_v3.vgic_vmcr; + + vmcrp->ctlr = (vmcr & ICH_VMCR_CTLR_MASK) >> ICH_VMCR_CTLR_SHIFT; + vmcrp->abpr = (vmcr & ICH_VMCR_BPR1_MASK) >> ICH_VMCR_BPR1_SHIFT; + vmcrp->bpr = (vmcr & ICH_VMCR_BPR0_MASK) >> ICH_VMCR_BPR0_SHIFT; + vmcrp->pmr = (vmcr & ICH_VMCR_PMR_MASK) >> ICH_VMCR_PMR_SHIFT; +} + +static void vgic_v3_enable_underflow(struct kvm_vcpu *vcpu) +{ + vcpu->arch.vgic_cpu.vgic_v3.vgic_hcr |= ICH_HCR_UIE; +} + +static void vgic_v3_disable_underflow(struct kvm_vcpu *vcpu) +{ + vcpu->arch.vgic_cpu.vgic_v3.vgic_hcr &= ~ICH_HCR_UIE; +} + +static void vgic_v3_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcrp) +{ + u32 vmcr; + + vmcr = (vmcrp->ctlr << ICH_VMCR_CTLR_SHIFT) & ICH_VMCR_CTLR_MASK; + vmcr |= (vmcrp->abpr << ICH_VMCR_BPR1_SHIFT) & ICH_VMCR_BPR1_MASK; + vmcr |= (vmcrp->bpr << ICH_VMCR_BPR0_SHIFT) & ICH_VMCR_BPR0_MASK; + vmcr |= (vmcrp->pmr << ICH_VMCR_PMR_SHIFT) & ICH_VMCR_PMR_MASK; + + vcpu->arch.vgic_cpu.vgic_v3.vgic_vmcr = vmcr; +} + +static void vgic_v3_enable(struct kvm_vcpu *vcpu) +{ + /* + * By forcing VMCR to zero, the GIC will restore the binary + * points to their reset values. Anything else resets to zero + * anyway. + */ + vcpu->arch.vgic_cpu.vgic_v3.vgic_vmcr = 0; + + /* Get the show on the road... */ + vcpu->arch.vgic_cpu.vgic_v3.vgic_hcr = ICH_HCR_EN; +} + +static const struct vgic_ops vgic_v3_ops = { + .get_lr = vgic_v3_get_lr, + .set_lr = vgic_v3_set_lr, + .sync_lr_elrsr = vgic_v3_sync_lr_elrsr, + .get_elrsr = vgic_v3_get_elrsr, + .get_eisr = vgic_v3_get_eisr, + .get_interrupt_status = vgic_v3_get_interrupt_status, + .enable_underflow = vgic_v3_enable_underflow, + .disable_underflow = vgic_v3_disable_underflow, + .get_vmcr = vgic_v3_get_vmcr, + .set_vmcr = vgic_v3_set_vmcr, + .enable = vgic_v3_enable, +}; + +static struct vgic_params vgic_v3_params; + +/** + * vgic_v3_probe - probe for a GICv3 compatible interrupt controller in DT + * @node: pointer to the DT node + * @ops: address of a pointer to the GICv3 operations + * @params: address of a pointer to HW-specific parameters + * + * Returns 0 if a GICv3 has been found, with the low level operations + * in *ops and the HW parameters in *params. Returns an error code + * otherwise. + */ +int vgic_v3_probe(struct device_node *vgic_node, + const struct vgic_ops **ops, + const struct vgic_params **params) +{ + int ret = 0; + u32 gicv_idx; + struct resource vcpu_res; + struct vgic_params *vgic = &vgic_v3_params; + + vgic->maint_irq = irq_of_parse_and_map(vgic_node, 0); + if (!vgic->maint_irq) { + kvm_err("error getting vgic maintenance irq from DT\n"); + ret = -ENXIO; + goto out; + } + + ich_vtr_el2 = kvm_call_hyp(__vgic_v3_get_ich_vtr_el2); + + /* + * The ListRegs field is 5 bits, but there is a architectural + * maximum of 16 list registers. Just ignore bit 4... + */ + vgic->nr_lr = (ich_vtr_el2 & 0xf) + 1; + + if (of_property_read_u32(vgic_node, "#redistributor-regions", &gicv_idx)) + gicv_idx = 1; + + gicv_idx += 3; /* Also skip GICD, GICC, GICH */ + if (of_address_to_resource(vgic_node, gicv_idx, &vcpu_res)) { + kvm_err("Cannot obtain GICV region\n"); + ret = -ENXIO; + goto out; + } + vgic->vcpu_base = vcpu_res.start; + vgic->vctrl_base = NULL; + vgic->type = VGIC_V3; + + kvm_info("%s@%llx IRQ%d\n", vgic_node->name, + vcpu_res.start, vgic->maint_irq); + + *ops = &vgic_v3_ops; + *params = vgic; + +out: + of_node_put(vgic_node); + return ret; +} From 825ae8cc53eeb522fb8c8bf9095fbee898cc1766 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 9 Jul 2013 10:45:49 +0100 Subject: [PATCH 1283/1368] arm64: KVM: vgic: add GICv3 world switch Introduce the GICv3 world switch code used to save/restore the GICv3 context. Acked-by: Catalin Marinas Reviewed-by: Christoffer Dall Signed-off-by: Marc Zyngier (cherry picked from commit 754d37726010d872f1f714a8ce8920acdfa4978c) Signed-off-by: Christoffer Dall --- arch/arm64/include/asm/kvm_asm.h | 2 + arch/arm64/kernel/asm-offsets.c | 8 ++ arch/arm64/kvm/vgic-v3-switch.S | 238 +++++++++++++++++++++++++++++++ 3 files changed, 248 insertions(+) diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h index ed4987bf9ac7..a28c35b337ec 100644 --- a/arch/arm64/include/asm/kvm_asm.h +++ b/arch/arm64/include/asm/kvm_asm.h @@ -110,6 +110,8 @@ extern u64 __vgic_v3_get_ich_vtr_el2(void); extern char __save_vgic_v2_state[]; extern char __restore_vgic_v2_state[]; +extern char __save_vgic_v3_state[]; +extern char __restore_vgic_v3_state[]; #endif diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c index 9ff0b2b97392..65ebb2ccde5f 100644 --- a/arch/arm64/kernel/asm-offsets.c +++ b/arch/arm64/kernel/asm-offsets.c @@ -137,6 +137,14 @@ int main(void) DEFINE(VGIC_V2_CPU_ELRSR, offsetof(struct vgic_cpu, vgic_v2.vgic_elrsr)); DEFINE(VGIC_V2_CPU_APR, offsetof(struct vgic_cpu, vgic_v2.vgic_apr)); DEFINE(VGIC_V2_CPU_LR, offsetof(struct vgic_cpu, vgic_v2.vgic_lr)); + DEFINE(VGIC_V3_CPU_HCR, offsetof(struct vgic_cpu, vgic_v3.vgic_hcr)); + DEFINE(VGIC_V3_CPU_VMCR, offsetof(struct vgic_cpu, vgic_v3.vgic_vmcr)); + DEFINE(VGIC_V3_CPU_MISR, offsetof(struct vgic_cpu, vgic_v3.vgic_misr)); + DEFINE(VGIC_V3_CPU_EISR, offsetof(struct vgic_cpu, vgic_v3.vgic_eisr)); + DEFINE(VGIC_V3_CPU_ELRSR, offsetof(struct vgic_cpu, vgic_v3.vgic_elrsr)); + DEFINE(VGIC_V3_CPU_AP0R, offsetof(struct vgic_cpu, vgic_v3.vgic_ap0r)); + DEFINE(VGIC_V3_CPU_AP1R, offsetof(struct vgic_cpu, vgic_v3.vgic_ap1r)); + DEFINE(VGIC_V3_CPU_LR, offsetof(struct vgic_cpu, vgic_v3.vgic_lr)); DEFINE(VGIC_CPU_NR_LR, offsetof(struct vgic_cpu, nr_lr)); DEFINE(KVM_VTTBR, offsetof(struct kvm, arch.vttbr)); DEFINE(KVM_VGIC_VCTRL, offsetof(struct kvm, arch.vgic.vctrl_base)); diff --git a/arch/arm64/kvm/vgic-v3-switch.S b/arch/arm64/kvm/vgic-v3-switch.S index 9fbf27350c84..21e68f606a8f 100644 --- a/arch/arm64/kvm/vgic-v3-switch.S +++ b/arch/arm64/kvm/vgic-v3-switch.S @@ -18,9 +18,247 @@ #include #include +#include +#include +#include +#include +#include +#include + .text .pushsection .hyp.text, "ax" +/* + * We store LRs in reverse order to let the CPU deal with streaming + * access. Use this macro to make it look saner... + */ +#define LR_OFFSET(n) (VGIC_V3_CPU_LR + (15 - n) * 8) + +/* + * Save the VGIC CPU state into memory + * x0: Register pointing to VCPU struct + * Do not corrupt x1!!! + */ +.macro save_vgic_v3_state + // Compute the address of struct vgic_cpu + add x3, x0, #VCPU_VGIC_CPU + + // Make sure stores to the GIC via the memory mapped interface + // are now visible to the system register interface + dsb st + + // Save all interesting registers + mrs x4, ICH_HCR_EL2 + mrs x5, ICH_VMCR_EL2 + mrs x6, ICH_MISR_EL2 + mrs x7, ICH_EISR_EL2 + mrs x8, ICH_ELSR_EL2 + + str w4, [x3, #VGIC_V3_CPU_HCR] + str w5, [x3, #VGIC_V3_CPU_VMCR] + str w6, [x3, #VGIC_V3_CPU_MISR] + str w7, [x3, #VGIC_V3_CPU_EISR] + str w8, [x3, #VGIC_V3_CPU_ELRSR] + + msr ICH_HCR_EL2, xzr + + mrs x21, ICH_VTR_EL2 + mvn w22, w21 + ubfiz w23, w22, 2, 4 // w23 = (15 - ListRegs) * 4 + + adr x24, 1f + add x24, x24, x23 + br x24 + +1: + mrs x20, ICH_LR15_EL2 + mrs x19, ICH_LR14_EL2 + mrs x18, ICH_LR13_EL2 + mrs x17, ICH_LR12_EL2 + mrs x16, ICH_LR11_EL2 + mrs x15, ICH_LR10_EL2 + mrs x14, ICH_LR9_EL2 + mrs x13, ICH_LR8_EL2 + mrs x12, ICH_LR7_EL2 + mrs x11, ICH_LR6_EL2 + mrs x10, ICH_LR5_EL2 + mrs x9, ICH_LR4_EL2 + mrs x8, ICH_LR3_EL2 + mrs x7, ICH_LR2_EL2 + mrs x6, ICH_LR1_EL2 + mrs x5, ICH_LR0_EL2 + + adr x24, 1f + add x24, x24, x23 + br x24 + +1: + str x20, [x3, #LR_OFFSET(15)] + str x19, [x3, #LR_OFFSET(14)] + str x18, [x3, #LR_OFFSET(13)] + str x17, [x3, #LR_OFFSET(12)] + str x16, [x3, #LR_OFFSET(11)] + str x15, [x3, #LR_OFFSET(10)] + str x14, [x3, #LR_OFFSET(9)] + str x13, [x3, #LR_OFFSET(8)] + str x12, [x3, #LR_OFFSET(7)] + str x11, [x3, #LR_OFFSET(6)] + str x10, [x3, #LR_OFFSET(5)] + str x9, [x3, #LR_OFFSET(4)] + str x8, [x3, #LR_OFFSET(3)] + str x7, [x3, #LR_OFFSET(2)] + str x6, [x3, #LR_OFFSET(1)] + str x5, [x3, #LR_OFFSET(0)] + + tbnz w21, #29, 6f // 6 bits + tbz w21, #30, 5f // 5 bits + // 7 bits + mrs x20, ICH_AP0R3_EL2 + str w20, [x3, #(VGIC_V3_CPU_AP0R + 3*4)] + mrs x19, ICH_AP0R2_EL2 + str w19, [x3, #(VGIC_V3_CPU_AP0R + 2*4)] +6: mrs x18, ICH_AP0R1_EL2 + str w18, [x3, #(VGIC_V3_CPU_AP0R + 1*4)] +5: mrs x17, ICH_AP0R0_EL2 + str w17, [x3, #VGIC_V3_CPU_AP0R] + + tbnz w21, #29, 6f // 6 bits + tbz w21, #30, 5f // 5 bits + // 7 bits + mrs x20, ICH_AP1R3_EL2 + str w20, [x3, #(VGIC_V3_CPU_AP1R + 3*4)] + mrs x19, ICH_AP1R2_EL2 + str w19, [x3, #(VGIC_V3_CPU_AP1R + 2*4)] +6: mrs x18, ICH_AP1R1_EL2 + str w18, [x3, #(VGIC_V3_CPU_AP1R + 1*4)] +5: mrs x17, ICH_AP1R0_EL2 + str w17, [x3, #VGIC_V3_CPU_AP1R] + + // Restore SRE_EL1 access and re-enable SRE at EL1. + mrs x5, ICC_SRE_EL2 + orr x5, x5, #ICC_SRE_EL2_ENABLE + msr ICC_SRE_EL2, x5 + isb + mov x5, #1 + msr ICC_SRE_EL1, x5 +.endm + +/* + * Restore the VGIC CPU state from memory + * x0: Register pointing to VCPU struct + */ +.macro restore_vgic_v3_state + // Disable SRE_EL1 access. Necessary, otherwise + // ICH_VMCR_EL2.VFIQEn becomes one, and FIQ happens... + msr ICC_SRE_EL1, xzr + isb + + // Compute the address of struct vgic_cpu + add x3, x0, #VCPU_VGIC_CPU + + // Restore all interesting registers + ldr w4, [x3, #VGIC_V3_CPU_HCR] + ldr w5, [x3, #VGIC_V3_CPU_VMCR] + + msr ICH_HCR_EL2, x4 + msr ICH_VMCR_EL2, x5 + + mrs x21, ICH_VTR_EL2 + + tbnz w21, #29, 6f // 6 bits + tbz w21, #30, 5f // 5 bits + // 7 bits + ldr w20, [x3, #(VGIC_V3_CPU_AP1R + 3*4)] + msr ICH_AP1R3_EL2, x20 + ldr w19, [x3, #(VGIC_V3_CPU_AP1R + 2*4)] + msr ICH_AP1R2_EL2, x19 +6: ldr w18, [x3, #(VGIC_V3_CPU_AP1R + 1*4)] + msr ICH_AP1R1_EL2, x18 +5: ldr w17, [x3, #VGIC_V3_CPU_AP1R] + msr ICH_AP1R0_EL2, x17 + + tbnz w21, #29, 6f // 6 bits + tbz w21, #30, 5f // 5 bits + // 7 bits + ldr w20, [x3, #(VGIC_V3_CPU_AP0R + 3*4)] + msr ICH_AP0R3_EL2, x20 + ldr w19, [x3, #(VGIC_V3_CPU_AP0R + 2*4)] + msr ICH_AP0R2_EL2, x19 +6: ldr w18, [x3, #(VGIC_V3_CPU_AP0R + 1*4)] + msr ICH_AP0R1_EL2, x18 +5: ldr w17, [x3, #VGIC_V3_CPU_AP0R] + msr ICH_AP0R0_EL2, x17 + + and w22, w21, #0xf + mvn w22, w21 + ubfiz w23, w22, 2, 4 // w23 = (15 - ListRegs) * 4 + + adr x24, 1f + add x24, x24, x23 + br x24 + +1: + ldr x20, [x3, #LR_OFFSET(15)] + ldr x19, [x3, #LR_OFFSET(14)] + ldr x18, [x3, #LR_OFFSET(13)] + ldr x17, [x3, #LR_OFFSET(12)] + ldr x16, [x3, #LR_OFFSET(11)] + ldr x15, [x3, #LR_OFFSET(10)] + ldr x14, [x3, #LR_OFFSET(9)] + ldr x13, [x3, #LR_OFFSET(8)] + ldr x12, [x3, #LR_OFFSET(7)] + ldr x11, [x3, #LR_OFFSET(6)] + ldr x10, [x3, #LR_OFFSET(5)] + ldr x9, [x3, #LR_OFFSET(4)] + ldr x8, [x3, #LR_OFFSET(3)] + ldr x7, [x3, #LR_OFFSET(2)] + ldr x6, [x3, #LR_OFFSET(1)] + ldr x5, [x3, #LR_OFFSET(0)] + + adr x24, 1f + add x24, x24, x23 + br x24 + +1: + msr ICH_LR15_EL2, x20 + msr ICH_LR14_EL2, x19 + msr ICH_LR13_EL2, x18 + msr ICH_LR12_EL2, x17 + msr ICH_LR11_EL2, x16 + msr ICH_LR10_EL2, x15 + msr ICH_LR9_EL2, x14 + msr ICH_LR8_EL2, x13 + msr ICH_LR7_EL2, x12 + msr ICH_LR6_EL2, x11 + msr ICH_LR5_EL2, x10 + msr ICH_LR4_EL2, x9 + msr ICH_LR3_EL2, x8 + msr ICH_LR2_EL2, x7 + msr ICH_LR1_EL2, x6 + msr ICH_LR0_EL2, x5 + + // Ensure that the above will have reached the + // (re)distributors. This ensure the guest will read + // the correct values from the memory-mapped interface. + isb + dsb sy + + // Prevent the guest from touching the GIC system registers + mrs x5, ICC_SRE_EL2 + and x5, x5, #~ICC_SRE_EL2_ENABLE + msr ICC_SRE_EL2, x5 +.endm + +ENTRY(__save_vgic_v3_state) + save_vgic_v3_state + ret +ENDPROC(__save_vgic_v3_state) + +ENTRY(__restore_vgic_v3_state) + restore_vgic_v3_state + ret +ENDPROC(__restore_vgic_v3_state) + ENTRY(__vgic_v3_get_ich_vtr_el2) mrs x0, ICH_VTR_EL2 ret From 0e705de508452a4437225048231f8e7aa5f56d2c Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 9 Jul 2013 10:45:49 +0100 Subject: [PATCH 1284/1368] arm64: KVM: vgic: enable GICv2 emulation on top on GICv3 hardware Add the last missing bits that enable GICv2 emulation on top of GICv3 hardware. Signed-off-by: Marc Zyngier (cherry picked from commit 67b2abfedb7b861bead93400fa315c5c30879d51) Signed-off-by: Christoffer Dall --- arch/arm64/include/asm/kvm_host.h | 7 +++++++ arch/arm64/kvm/Makefile | 2 ++ virt/kvm/arm/vgic.c | 1 + 3 files changed, 10 insertions(+) diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 4c182d0aae70..4ae9213aa997 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -216,6 +216,13 @@ static inline void vgic_arch_setup(const struct vgic_params *vgic) __vgic_sr_vectors.restore_vgic = __restore_vgic_v2_state; break; +#ifdef CONFIG_ARM_GIC_V3 + case VGIC_V3: + __vgic_sr_vectors.save_vgic = __save_vgic_v3_state; + __vgic_sr_vectors.restore_vgic = __restore_vgic_v3_state; + break; +#endif + default: BUG(); } diff --git a/arch/arm64/kvm/Makefile b/arch/arm64/kvm/Makefile index daf24dc59e2c..32a096174b94 100644 --- a/arch/arm64/kvm/Makefile +++ b/arch/arm64/kvm/Makefile @@ -22,4 +22,6 @@ kvm-$(CONFIG_KVM_ARM_HOST) += guest.o reset.o sys_regs.o sys_regs_generic_v8.o kvm-$(CONFIG_KVM_ARM_VGIC) += $(KVM)/arm/vgic.o kvm-$(CONFIG_KVM_ARM_VGIC) += $(KVM)/arm/vgic-v2.o kvm-$(CONFIG_KVM_ARM_VGIC) += vgic-v2-switch.o +kvm-$(CONFIG_KVM_ARM_VGIC) += $(KVM)/arm/vgic-v3.o +kvm-$(CONFIG_KVM_ARM_VGIC) += vgic-v3-switch.o kvm-$(CONFIG_KVM_ARM_TIMER) += $(KVM)/arm/arch_timer.o diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c index 7867b9a1f694..795ab482333d 100644 --- a/virt/kvm/arm/vgic.c +++ b/virt/kvm/arm/vgic.c @@ -1530,6 +1530,7 @@ static struct notifier_block vgic_cpu_nb = { static const struct of_device_id vgic_ids[] = { { .compatible = "arm,cortex-a15-gic", .data = vgic_v2_probe, }, + { .compatible = "arm,gic-v3", .data = vgic_v3_probe, }, {}, }; From ff815e50964d2593064d47c9dbd4a3bb03ea3e18 Mon Sep 17 00:00:00 2001 From: Victor Kamensky Date: Thu, 12 Jun 2014 09:30:01 -0700 Subject: [PATCH 1285/1368] ARM: KVM: fix vgic V7 assembler code to work in BE image The vgic h/w registers are little endian; when BE asm code reads/writes from/to them, it needs to do byteswap after/before. Byteswap code uses ARM_BE8 wrapper to add swap only if CONFIG_CPU_BIG_ENDIAN is configured. Signed-off-by: Victor Kamensky Reviewed-by: Christoffer Dall Acked-by: Marc Zyngier Signed-off-by: Marc Zyngier (cherry picked from commit 64054c25cf7e060cd6780744fefe7ed3990e4f21) Signed-off-by: Christoffer Dall --- arch/arm/kvm/interrupts_head.S | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/arch/arm/kvm/interrupts_head.S b/arch/arm/kvm/interrupts_head.S index e4eaf30205c5..68d99c69639c 100644 --- a/arch/arm/kvm/interrupts_head.S +++ b/arch/arm/kvm/interrupts_head.S @@ -1,4 +1,5 @@ #include +#include #define VCPU_USR_REG(_reg_nr) (VCPU_USR_REGS + (_reg_nr * 4)) #define VCPU_USR_SP (VCPU_USR_REG(13)) @@ -420,6 +421,14 @@ vcpu .req r0 @ vcpu pointer always in r0 ldr r8, [r2, #GICH_ELRSR0] ldr r9, [r2, #GICH_ELRSR1] ldr r10, [r2, #GICH_APR] +ARM_BE8(rev r3, r3 ) +ARM_BE8(rev r4, r4 ) +ARM_BE8(rev r5, r5 ) +ARM_BE8(rev r6, r6 ) +ARM_BE8(rev r7, r7 ) +ARM_BE8(rev r8, r8 ) +ARM_BE8(rev r9, r9 ) +ARM_BE8(rev r10, r10 ) str r3, [r11, #VGIC_V2_CPU_HCR] str r4, [r11, #VGIC_V2_CPU_VMCR] @@ -439,6 +448,7 @@ vcpu .req r0 @ vcpu pointer always in r0 add r3, r11, #VGIC_V2_CPU_LR ldr r4, [r11, #VGIC_CPU_NR_LR] 1: ldr r6, [r2], #4 +ARM_BE8(rev r6, r6 ) str r6, [r3], #4 subs r4, r4, #1 bne 1b @@ -466,6 +476,9 @@ vcpu .req r0 @ vcpu pointer always in r0 ldr r3, [r11, #VGIC_V2_CPU_HCR] ldr r4, [r11, #VGIC_V2_CPU_VMCR] ldr r8, [r11, #VGIC_V2_CPU_APR] +ARM_BE8(rev r3, r3 ) +ARM_BE8(rev r4, r4 ) +ARM_BE8(rev r8, r8 ) str r3, [r2, #GICH_HCR] str r4, [r2, #GICH_VMCR] @@ -476,6 +489,7 @@ vcpu .req r0 @ vcpu pointer always in r0 add r3, r11, #VGIC_V2_CPU_LR ldr r4, [r11, #VGIC_CPU_NR_LR] 1: ldr r6, [r3], #4 +ARM_BE8(rev r6, r6 ) str r6, [r2], #4 subs r4, r4, #1 bne 1b From af39d18ecef850d57705c56cda31da0d855424ea Mon Sep 17 00:00:00 2001 From: Victor Kamensky Date: Thu, 12 Jun 2014 09:30:02 -0700 Subject: [PATCH 1286/1368] ARM: KVM: handle 64bit values passed to mrcc or from mcrr instructions in BE case In some cases the mcrr and mrrc instructions in combination with the ldrd and strd instructions need to deal with 64bit value in memory. The ldrd and strd instructions already handle endianness within word (register) boundaries but to get effect of the whole 64bit value represented correctly, rr_lo_hi macro is introduced and is used to swap registers positions when the mcrr and mrrc instructions are used. That has the effect of swapping two words. Signed-off-by: Victor Kamensky Reviewed-by: Christoffer Dall Acked-by: Marc Zyngier Signed-off-by: Marc Zyngier (cherry picked from commit 19b0e60a63f758a28329aa40f4270a6c98c2dcb7) Signed-off-by: Christoffer Dall --- arch/arm/include/asm/kvm_asm.h | 18 ++++++++++++++++++ arch/arm/kvm/init.S | 4 ++-- arch/arm/kvm/interrupts.S | 4 ++-- arch/arm/kvm/interrupts_head.S | 6 +++--- 4 files changed, 25 insertions(+), 7 deletions(-) diff --git a/arch/arm/include/asm/kvm_asm.h b/arch/arm/include/asm/kvm_asm.h index 53b3c4a50d5c..3a67bec72d0c 100644 --- a/arch/arm/include/asm/kvm_asm.h +++ b/arch/arm/include/asm/kvm_asm.h @@ -61,6 +61,24 @@ #define ARM_EXCEPTION_FIQ 6 #define ARM_EXCEPTION_HVC 7 +/* + * The rr_lo_hi macro swaps a pair of registers depending on + * current endianness. It is used in conjunction with ldrd and strd + * instructions that load/store a 64-bit value from/to memory to/from + * a pair of registers which are used with the mrrc and mcrr instructions. + * If used with the ldrd/strd instructions, the a1 parameter is the first + * source/destination register and the a2 parameter is the second + * source/destination register. Note that the ldrd/strd instructions + * already swap the bytes within the words correctly according to the + * endianness setting, but the order of the registers need to be effectively + * swapped when used with the mrrc/mcrr instructions. + */ +#ifdef CONFIG_CPU_ENDIAN_BE8 +#define rr_lo_hi(a1, a2) a2, a1 +#else +#define rr_lo_hi(a1, a2) a1, a2 +#endif + #ifndef __ASSEMBLY__ struct kvm; struct kvm_vcpu; diff --git a/arch/arm/kvm/init.S b/arch/arm/kvm/init.S index 1b9844d369cc..2cc14dfad049 100644 --- a/arch/arm/kvm/init.S +++ b/arch/arm/kvm/init.S @@ -71,7 +71,7 @@ __do_hyp_init: bne phase2 @ Yes, second stage init @ Set the HTTBR to point to the hypervisor PGD pointer passed - mcrr p15, 4, r2, r3, c2 + mcrr p15, 4, rr_lo_hi(r2, r3), c2 @ Set the HTCR and VTCR to the same shareability and cacheability @ settings as the non-secure TTBCR and with T0SZ == 0. @@ -137,7 +137,7 @@ phase2: mov pc, r0 target: @ We're now in the trampoline code, switch page tables - mcrr p15, 4, r2, r3, c2 + mcrr p15, 4, rr_lo_hi(r2, r3), c2 isb @ Invalidate the old TLBs diff --git a/arch/arm/kvm/interrupts.S b/arch/arm/kvm/interrupts.S index 0d68d4073068..24d4e65806a7 100644 --- a/arch/arm/kvm/interrupts.S +++ b/arch/arm/kvm/interrupts.S @@ -52,7 +52,7 @@ ENTRY(__kvm_tlb_flush_vmid_ipa) dsb ishst add r0, r0, #KVM_VTTBR ldrd r2, r3, [r0] - mcrr p15, 6, r2, r3, c2 @ Write VTTBR + mcrr p15, 6, rr_lo_hi(r2, r3), c2 @ Write VTTBR isb mcr p15, 0, r0, c8, c3, 0 @ TLBIALLIS (rt ignored) dsb ish @@ -135,7 +135,7 @@ ENTRY(__kvm_vcpu_run) ldr r1, [vcpu, #VCPU_KVM] add r1, r1, #KVM_VTTBR ldrd r2, r3, [r1] - mcrr p15, 6, r2, r3, c2 @ Write VTTBR + mcrr p15, 6, rr_lo_hi(r2, r3), c2 @ Write VTTBR @ We're all done, just restore the GPRs and go to the guest restore_guest_regs diff --git a/arch/arm/kvm/interrupts_head.S b/arch/arm/kvm/interrupts_head.S index 68d99c69639c..98c8c5b9a87f 100644 --- a/arch/arm/kvm/interrupts_head.S +++ b/arch/arm/kvm/interrupts_head.S @@ -520,7 +520,7 @@ ARM_BE8(rev r6, r6 ) mcr p15, 0, r2, c14, c3, 1 @ CNTV_CTL isb - mrrc p15, 3, r2, r3, c14 @ CNTV_CVAL + mrrc p15, 3, rr_lo_hi(r2, r3), c14 @ CNTV_CVAL ldr r4, =VCPU_TIMER_CNTV_CVAL add r5, vcpu, r4 strd r2, r3, [r5] @@ -560,12 +560,12 @@ ARM_BE8(rev r6, r6 ) ldr r2, [r4, #KVM_TIMER_CNTVOFF] ldr r3, [r4, #(KVM_TIMER_CNTVOFF + 4)] - mcrr p15, 4, r2, r3, c14 @ CNTVOFF + mcrr p15, 4, rr_lo_hi(r2, r3), c14 @ CNTVOFF ldr r4, =VCPU_TIMER_CNTV_CVAL add r5, vcpu, r4 ldrd r2, r3, [r5] - mcrr p15, 3, r2, r3, c14 @ CNTV_CVAL + mcrr p15, 3, rr_lo_hi(r2, r3), c14 @ CNTV_CVAL isb ldr r2, [vcpu, #VCPU_TIMER_CNTV_CTL] From b76b02ab6245e8641cc4acdc16b0050132bd9065 Mon Sep 17 00:00:00 2001 From: Victor Kamensky Date: Thu, 12 Jun 2014 09:30:03 -0700 Subject: [PATCH 1287/1368] ARM: KVM: __kvm_vcpu_run function return result fix in BE case The __kvm_vcpu_run function returns a 64-bit result in two registers, which has to be adjusted for BE case. Signed-off-by: Victor Kamensky Acked-by: Christoffer Dall Acked-by: Marc Zyngier Signed-off-by: Marc Zyngier (cherry picked from commit 6d7311b520864531c81f0e0237e96146d8057d77) Signed-off-by: Christoffer Dall --- arch/arm/kvm/interrupts.S | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/arch/arm/kvm/interrupts.S b/arch/arm/kvm/interrupts.S index 24d4e65806a7..01dcb0e752d9 100644 --- a/arch/arm/kvm/interrupts.S +++ b/arch/arm/kvm/interrupts.S @@ -199,8 +199,13 @@ after_vfp_restore: restore_host_regs clrex @ Clear exclusive monitor +#ifndef CONFIG_CPU_ENDIAN_BE8 mov r0, r1 @ Return the return code mov r1, #0 @ Clear upper bits in return value +#else + @ r1 already has return code + mov r0, #0 @ Clear upper bits in return value +#endif /* CONFIG_CPU_ENDIAN_BE8 */ bx lr @ return to IOCTL /******************************************************************** From df5bce90aca5ced2e4ccee2a9ef913b064a47aea Mon Sep 17 00:00:00 2001 From: Victor Kamensky Date: Thu, 12 Jun 2014 09:30:04 -0700 Subject: [PATCH 1288/1368] ARM: KVM: vgic mmio should hold data as LE bytes array in BE case According to recent clarifications of mmio.data array meaning - the mmio.data array should hold bytes as they would appear in memory. Vgic is little endian device. And in case of BE image kernel side that emulates vgic, holds data in BE form. So we need to byteswap cpu<->le32 vgic registers when we read/write them from mmio.data[]. Change has no effect in LE case because cpu already runs in le32. Signed-off-by: Victor Kamensky Reviewed-by: Christoffer Dall Acked-by: Marc Zyngier Signed-off-by: Marc Zyngier (cherry picked from commit 1c9f04717ca8326e8df759d5dda9cd1b3d968b5b) Signed-off-by: Christoffer Dall --- virt/kvm/arm/vgic.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c index 795ab482333d..b0edc8c670f8 100644 --- a/virt/kvm/arm/vgic.c +++ b/virt/kvm/arm/vgic.c @@ -238,12 +238,12 @@ static void vgic_cpu_irq_clear(struct kvm_vcpu *vcpu, int irq) static u32 mmio_data_read(struct kvm_exit_mmio *mmio, u32 mask) { - return *((u32 *)mmio->data) & mask; + return le32_to_cpu(*((u32 *)mmio->data)) & mask; } static void mmio_data_write(struct kvm_exit_mmio *mmio, u32 mask, u32 value) { - *((u32 *)mmio->data) = value & mask; + *((u32 *)mmio->data) = cpu_to_le32(value) & mask; } /** From d6ff09058017a8dfd96fa24502691c83df6566e7 Mon Sep 17 00:00:00 2001 From: Victor Kamensky Date: Thu, 12 Jun 2014 09:30:05 -0700 Subject: [PATCH 1289/1368] ARM: KVM: MMIO support BE host running LE code In case of status register E bit is not set (LE mode) and host runs in BE mode we need byteswap data, so read/write is emulated correctly. Signed-off-by: Victor Kamensky Reviewed-by: Christoffer Dall Reviewed-by: Marc Zyngier Signed-off-by: Marc Zyngier (cherry picked from commit 27f194fd360a96cc64bebb2d69dd5abd67984b8a) Signed-off-by: Christoffer Dall --- arch/arm/include/asm/kvm_emulate.h | 22 ++++++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) diff --git a/arch/arm/include/asm/kvm_emulate.h b/arch/arm/include/asm/kvm_emulate.h index 0fa90c962ac8..69b746955fca 100644 --- a/arch/arm/include/asm/kvm_emulate.h +++ b/arch/arm/include/asm/kvm_emulate.h @@ -185,9 +185,16 @@ static inline unsigned long vcpu_data_guest_to_host(struct kvm_vcpu *vcpu, default: return be32_to_cpu(data); } + } else { + switch (len) { + case 1: + return data & 0xff; + case 2: + return le16_to_cpu(data & 0xffff); + default: + return le32_to_cpu(data); + } } - - return data; /* Leave LE untouched */ } static inline unsigned long vcpu_data_host_to_guest(struct kvm_vcpu *vcpu, @@ -203,9 +210,16 @@ static inline unsigned long vcpu_data_host_to_guest(struct kvm_vcpu *vcpu, default: return cpu_to_be32(data); } + } else { + switch (len) { + case 1: + return data & 0xff; + case 2: + return cpu_to_le16(data & 0xffff); + default: + return cpu_to_le32(data); + } } - - return data; /* Leave LE untouched */ } #endif /* __ARM_KVM_EMULATE_H__ */ From 1f5f8779f88483402671d58f0e307934218e22f6 Mon Sep 17 00:00:00 2001 From: Victor Kamensky Date: Thu, 12 Jun 2014 09:30:06 -0700 Subject: [PATCH 1290/1368] ARM: KVM: one_reg coproc set and get BE fixes Fix code that handles KVM_SET_ONE_REG, KVM_GET_ONE_REG ioctls to work in BE image. Before this fix get/set_one_reg functions worked correctly only in LE case - reg_from_user was taking 'void *' kernel address that actually could be target/source memory of either 4 bytes size or 8 bytes size, and code copied from/to user memory that could hold either 4 bytes register, 8 byte register or pair of 4 bytes registers. In order to work in endian agnostic way reg_from_user to reg_to_user functions should copy register value only to kernel variable with size that matches register size. In few place where size mismatch existed fix issue on macro caller side. Signed-off-by: Victor Kamensky Reviewed-by: Christoffer Dall Signed-off-by: Marc Zyngier (cherry picked from commit 73891f72c414afff6da6f01e7af2ff5a44a8b823) Signed-off-by: Christoffer Dall --- arch/arm/kvm/coproc.c | 88 ++++++++++++++++++++++++++++++++++++++----- 1 file changed, 79 insertions(+), 9 deletions(-) diff --git a/arch/arm/kvm/coproc.c b/arch/arm/kvm/coproc.c index c58a35116f63..37a0fe1bb9bb 100644 --- a/arch/arm/kvm/coproc.c +++ b/arch/arm/kvm/coproc.c @@ -44,6 +44,31 @@ static u32 cache_levels; /* CSSELR values; used to index KVM_REG_ARM_DEMUX_ID_CCSIDR */ #define CSSELR_MAX 12 +/* + * kvm_vcpu_arch.cp15 holds cp15 registers as an array of u32, but some + * of cp15 registers can be viewed either as couple of two u32 registers + * or one u64 register. Current u64 register encoding is that least + * significant u32 word is followed by most significant u32 word. + */ +static inline void vcpu_cp15_reg64_set(struct kvm_vcpu *vcpu, + const struct coproc_reg *r, + u64 val) +{ + vcpu->arch.cp15[r->reg] = val & 0xffffffff; + vcpu->arch.cp15[r->reg + 1] = val >> 32; +} + +static inline u64 vcpu_cp15_reg64_get(struct kvm_vcpu *vcpu, + const struct coproc_reg *r) +{ + u64 val; + + val = vcpu->arch.cp15[r->reg + 1]; + val = val << 32; + val = val | vcpu->arch.cp15[r->reg]; + return val; +} + int kvm_handle_cp10_id(struct kvm_vcpu *vcpu, struct kvm_run *run) { kvm_inject_undefined(vcpu); @@ -682,17 +707,23 @@ static struct coproc_reg invariant_cp15[] = { { CRn( 0), CRm( 0), Op1( 1), Op2( 7), is32, NULL, get_AIDR }, }; +/* + * Reads a register value from a userspace address to a kernel + * variable. Make sure that register size matches sizeof(*__val). + */ static int reg_from_user(void *val, const void __user *uaddr, u64 id) { - /* This Just Works because we are little endian. */ if (copy_from_user(val, uaddr, KVM_REG_SIZE(id)) != 0) return -EFAULT; return 0; } +/* + * Writes a register value to a userspace address from a kernel variable. + * Make sure that register size matches sizeof(*__val). + */ static int reg_to_user(void __user *uaddr, const void *val, u64 id) { - /* This Just Works because we are little endian. */ if (copy_to_user(uaddr, val, KVM_REG_SIZE(id)) != 0) return -EFAULT; return 0; @@ -702,6 +733,7 @@ static int get_invariant_cp15(u64 id, void __user *uaddr) { struct coproc_params params; const struct coproc_reg *r; + int ret; if (!index_to_params(id, ¶ms)) return -ENOENT; @@ -710,7 +742,15 @@ static int get_invariant_cp15(u64 id, void __user *uaddr) if (!r) return -ENOENT; - return reg_to_user(uaddr, &r->val, id); + ret = -ENOENT; + if (KVM_REG_SIZE(id) == 4) { + u32 val = r->val; + + ret = reg_to_user(uaddr, &val, id); + } else if (KVM_REG_SIZE(id) == 8) { + ret = reg_to_user(uaddr, &r->val, id); + } + return ret; } static int set_invariant_cp15(u64 id, void __user *uaddr) @@ -718,7 +758,7 @@ static int set_invariant_cp15(u64 id, void __user *uaddr) struct coproc_params params; const struct coproc_reg *r; int err; - u64 val = 0; /* Make sure high bits are 0 for 32-bit regs */ + u64 val; if (!index_to_params(id, ¶ms)) return -ENOENT; @@ -726,7 +766,16 @@ static int set_invariant_cp15(u64 id, void __user *uaddr) if (!r) return -ENOENT; - err = reg_from_user(&val, uaddr, id); + err = -ENOENT; + if (KVM_REG_SIZE(id) == 4) { + u32 val32; + + err = reg_from_user(&val32, uaddr, id); + if (!err) + val = val32; + } else if (KVM_REG_SIZE(id) == 8) { + err = reg_from_user(&val, uaddr, id); + } if (err) return err; @@ -1004,6 +1053,7 @@ int kvm_arm_coproc_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) { const struct coproc_reg *r; void __user *uaddr = (void __user *)(long)reg->addr; + int ret; if ((reg->id & KVM_REG_ARM_COPROC_MASK) == KVM_REG_ARM_DEMUX) return demux_c15_get(reg->id, uaddr); @@ -1015,14 +1065,24 @@ int kvm_arm_coproc_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) if (!r) return get_invariant_cp15(reg->id, uaddr); - /* Note: copies two regs if size is 64 bit. */ - return reg_to_user(uaddr, &vcpu->arch.cp15[r->reg], reg->id); + ret = -ENOENT; + if (KVM_REG_SIZE(reg->id) == 8) { + u64 val; + + val = vcpu_cp15_reg64_get(vcpu, r); + ret = reg_to_user(uaddr, &val, reg->id); + } else if (KVM_REG_SIZE(reg->id) == 4) { + ret = reg_to_user(uaddr, &vcpu->arch.cp15[r->reg], reg->id); + } + + return ret; } int kvm_arm_coproc_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) { const struct coproc_reg *r; void __user *uaddr = (void __user *)(long)reg->addr; + int ret; if ((reg->id & KVM_REG_ARM_COPROC_MASK) == KVM_REG_ARM_DEMUX) return demux_c15_set(reg->id, uaddr); @@ -1034,8 +1094,18 @@ int kvm_arm_coproc_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) if (!r) return set_invariant_cp15(reg->id, uaddr); - /* Note: copies two regs if size is 64 bit */ - return reg_from_user(&vcpu->arch.cp15[r->reg], uaddr, reg->id); + ret = -ENOENT; + if (KVM_REG_SIZE(reg->id) == 8) { + u64 val; + + ret = reg_from_user(&val, uaddr, reg->id); + if (!ret) + vcpu_cp15_reg64_set(vcpu, r, val); + } else if (KVM_REG_SIZE(reg->id) == 4) { + ret = reg_from_user(&vcpu->arch.cp15[r->reg], uaddr, reg->id); + } + + return ret; } static unsigned int num_demux_regs(void) From 7c1537fb45e64848a4c6f19b66fec1671d608d1b Mon Sep 17 00:00:00 2001 From: Victor Kamensky Date: Thu, 12 Jun 2014 09:30:07 -0700 Subject: [PATCH 1291/1368] ARM: KVM: enable KVM in Kconfig on big-endian systems Previous patches addresses ARMV7 big-endian virtualiztion, kvm related issues, so enable ARM_VIRT_EXT for big-endian now. Signed-off-by: Victor Kamensky Acked-by: Christoffer Dall Signed-off-by: Marc Zyngier (cherry picked from commit f5aa462147a209dab40b02f0f70234784b913bf9) Signed-off-by: Christoffer Dall --- arch/arm/kvm/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/kvm/Kconfig b/arch/arm/kvm/Kconfig index 4be5bb150bdd..466bd299b1a8 100644 --- a/arch/arm/kvm/Kconfig +++ b/arch/arm/kvm/Kconfig @@ -23,7 +23,7 @@ config KVM select HAVE_KVM_CPU_RELAX_INTERCEPT select KVM_MMIO select KVM_ARM_HOST - depends on ARM_VIRT_EXT && ARM_LPAE && !CPU_BIG_ENDIAN + depends on ARM_VIRT_EXT && ARM_LPAE ---help--- Support hosting virtualized guest machines. You will also need to select one or more of the processor modules below. From 5d129b5dba663c42acca6a4711cc8732f692a18a Mon Sep 17 00:00:00 2001 From: Victor Kamensky Date: Thu, 12 Jun 2014 09:30:08 -0700 Subject: [PATCH 1292/1368] ARM64: KVM: MMIO support BE host running LE code In case of guest CPU running in LE mode and host runs in BE mode we need byteswap data, so read/write is emulated correctly. Signed-off-by: Victor Kamensky Reviewed-by: Christoffer Dall Reviewed-by: Marc Zyngier Signed-off-by: Marc Zyngier (cherry picked from commit b30070862edbdb252f9d0d3a1e61b8dc4c68e3d2) Signed-off-by: Christoffer Dall --- arch/arm64/include/asm/kvm_emulate.h | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h index dd8ecfc3f995..fdc3e21abd8d 100644 --- a/arch/arm64/include/asm/kvm_emulate.h +++ b/arch/arm64/include/asm/kvm_emulate.h @@ -213,6 +213,17 @@ static inline unsigned long vcpu_data_guest_to_host(struct kvm_vcpu *vcpu, default: return be64_to_cpu(data); } + } else { + switch (len) { + case 1: + return data & 0xff; + case 2: + return le16_to_cpu(data & 0xffff); + case 4: + return le32_to_cpu(data & 0xffffffff); + default: + return le64_to_cpu(data); + } } return data; /* Leave LE untouched */ @@ -233,6 +244,17 @@ static inline unsigned long vcpu_data_host_to_guest(struct kvm_vcpu *vcpu, default: return cpu_to_be64(data); } + } else { + switch (len) { + case 1: + return data & 0xff; + case 2: + return cpu_to_le16(data & 0xffff); + case 4: + return cpu_to_le32(data & 0xffffffff); + default: + return cpu_to_le64(data); + } } return data; /* Leave LE untouched */ From e51ab0ff6436524131e948a9a31258bb859b1ec5 Mon Sep 17 00:00:00 2001 From: Victor Kamensky Date: Thu, 12 Jun 2014 09:30:09 -0700 Subject: [PATCH 1293/1368] ARM64: KVM: store kvm_vcpu_fault_info est_el2 as word esr_el2 field of struct kvm_vcpu_fault_info has u32 type. It should be stored as word. Current code works in LE case because existing puts least significant word of x1 into esr_el2, and it puts most significant work of x1 into next field, which accidentally is OK because it is updated again by next instruction. But existing code breaks in BE case. Signed-off-by: Victor Kamensky Acked-by: Christoffer Dall Acked-by: Marc Zyngier Signed-off-by: Marc Zyngier (cherry picked from commit ba083d20d8cfa9e999043cd89c4ebc964ccf8927) Signed-off-by: Christoffer Dall --- arch/arm64/kvm/hyp.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/kvm/hyp.S b/arch/arm64/kvm/hyp.S index 5945f3bdea7a..7874e022d077 100644 --- a/arch/arm64/kvm/hyp.S +++ b/arch/arm64/kvm/hyp.S @@ -762,7 +762,7 @@ el1_trap: mrs x2, far_el2 2: mrs x0, tpidr_el2 - str x1, [x0, #VCPU_ESR_EL2] + str w1, [x0, #VCPU_ESR_EL2] str x2, [x0, #VCPU_FAR_EL2] str x3, [x0, #VCPU_HPFAR_EL2] From df032d7ed86bcaf850b549698c62eb182c2cb4e7 Mon Sep 17 00:00:00 2001 From: Victor Kamensky Date: Thu, 12 Jun 2014 09:30:10 -0700 Subject: [PATCH 1294/1368] ARM64: KVM: fix vgic_bitmap_get_reg function for BE 64bit case Fix vgic_bitmap_get_reg function to return 'right' word address of 'unsigned long' bitmap value in case of BE 64bit image. Signed-off-by: Victor Kamensky Reviewed-by: Christoffer Dall Signed-off-by: Marc Zyngier (cherry picked from commit 9662fb4854e1319b4affda47f279c3f210316def) Signed-off-by: Christoffer Dall --- virt/kvm/arm/vgic.c | 24 ++++++++++++++++++++++-- 1 file changed, 22 insertions(+), 2 deletions(-) diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c index b0edc8c670f8..ede8f6466c95 100644 --- a/virt/kvm/arm/vgic.c +++ b/virt/kvm/arm/vgic.c @@ -98,14 +98,34 @@ static void vgic_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr); static const struct vgic_ops *vgic_ops; static const struct vgic_params *vgic; +/* + * struct vgic_bitmap contains unions that provide two views of + * the same data. In one case it is an array of registers of + * u32's, and in the other case it is a bitmap of unsigned + * longs. + * + * This does not work on 64-bit BE systems, because the bitmap access + * will store two consecutive 32-bit words with the higher-addressed + * register's bits at the lower index and the lower-addressed register's + * bits at the higher index. + * + * Therefore, swizzle the register index when accessing the 32-bit word + * registers to access the right register's value. + */ +#if defined(CONFIG_CPU_BIG_ENDIAN) && BITS_PER_LONG == 64 +#define REG_OFFSET_SWIZZLE 1 +#else +#define REG_OFFSET_SWIZZLE 0 +#endif + static u32 *vgic_bitmap_get_reg(struct vgic_bitmap *x, int cpuid, u32 offset) { offset >>= 2; if (!offset) - return x->percpu[cpuid].reg; + return x->percpu[cpuid].reg + (offset ^ REG_OFFSET_SWIZZLE); else - return x->shared.reg + offset - 1; + return x->shared.reg + ((offset - 1) ^ REG_OFFSET_SWIZZLE); } static int vgic_bitmap_get_irq_val(struct vgic_bitmap *x, From 21c40b1a24a48d76af181a1b18315a4f0e61b37e Mon Sep 17 00:00:00 2001 From: Victor Kamensky Date: Thu, 12 Jun 2014 09:30:12 -0700 Subject: [PATCH 1295/1368] ARM64: KVM: set and get of sys registers in BE case Since size of all sys registers is always 8 bytes. Current code is actually endian agnostic. Just clean it up a bit. Removed comment about little endian. Change type of pointer from 'void *' to 'u64 *' to enforce stronger type checking. Signed-off-by: Victor Kamensky Acked-by: Christoffer Dall Acked-by: Marc Zyngier Signed-off-by: Marc Zyngier (cherry picked from commit 26c99af1018c35020cfad1d20f02acb224807655) Signed-off-by: Christoffer Dall --- arch/arm64/kvm/sys_regs.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index 34f25a590bd7..f0ceceffa95a 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -776,17 +776,15 @@ static struct sys_reg_desc invariant_sys_regs[] = { NULL, get_ctr_el0 }, }; -static int reg_from_user(void *val, const void __user *uaddr, u64 id) +static int reg_from_user(u64 *val, const void __user *uaddr, u64 id) { - /* This Just Works because we are little endian. */ if (copy_from_user(val, uaddr, KVM_REG_SIZE(id)) != 0) return -EFAULT; return 0; } -static int reg_to_user(void __user *uaddr, const void *val, u64 id) +static int reg_to_user(void __user *uaddr, const u64 *val, u64 id) { - /* This Just Works because we are little endian. */ if (copy_to_user(uaddr, val, KVM_REG_SIZE(id)) != 0) return -EFAULT; return 0; From 5208e0ff0d5097acc955d60d4daa395893a772ce Mon Sep 17 00:00:00 2001 From: Victor Kamensky Date: Wed, 2 Jul 2014 17:19:30 +0100 Subject: [PATCH 1296/1368] ARM64: KVM: fix big endian issue in access_vm_reg for 32bit guest Fix issue with 32bit guests running on top of BE KVM host. Indexes of high and low words of 64bit cp15 register are swapped in case of big endian code, since 64bit cp15 state is restored or saved with double word write or read instruction. Define helper macro to access low words of 64bit cp15 register. Signed-off-by: Victor Kamensky Signed-off-by: Marc Zyngier (cherry picked from commit f0a3eaff71b8bd5d5acfda1f0cf3eedf49755622) Signed-off-by: Christoffer Dall --- arch/arm64/include/asm/kvm_host.h | 6 ++++++ arch/arm64/kvm/sys_regs.c | 10 ++++------ 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 4ae9213aa997..503c70661636 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -140,6 +140,12 @@ struct kvm_vcpu_arch { #define vcpu_sys_reg(v,r) ((v)->arch.ctxt.sys_regs[(r)]) #define vcpu_cp15(v,r) ((v)->arch.ctxt.cp15[(r)]) +#ifdef CONFIG_CPU_BIG_ENDIAN +#define vcpu_cp15_64_low(v,r) ((v)->arch.ctxt.cp15[((r) + 1)]) +#else +#define vcpu_cp15_64_low(v,r) ((v)->arch.ctxt.cp15[((r) + 0)]) +#endif + struct kvm_vm_stat { u32 remote_tlb_flush; }; diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index f0ceceffa95a..56288f31c12d 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -134,13 +134,11 @@ static bool access_vm_reg(struct kvm_vcpu *vcpu, BUG_ON(!p->is_write); val = *vcpu_reg(vcpu, p->Rt); - if (!p->is_aarch32) { + if (!p->is_aarch32 || !p->is_32bit) vcpu_sys_reg(vcpu, r->reg) = val; - } else { - vcpu_cp15(vcpu, r->reg) = val & 0xffffffffUL; - if (!p->is_32bit) - vcpu_cp15(vcpu, r->reg + 1) = val >> 32; - } + else + vcpu_cp15_64_low(vcpu, r->reg) = val & 0xffffffffUL; + return true; } From 129fb6b2b94c46e279406736a959cc39a23500c6 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 24 Apr 2014 10:21:16 +0100 Subject: [PATCH 1297/1368] arm64: KVM: rename pm_fake handler to trap_raz_wi pm_fake doesn't quite describe what the handler does (ignoring writes and returning 0 for reads). As we're about to use it (a lot) in a different context, rename it with a (admitedly cryptic) name that make sense for all users. Reviewed-by: Anup Patel Reviewed-by: Christoffer Dall Signed-off-by: Marc Zyngier (cherry picked from commit 7609c1251f9d8bbcd6a05ba22153e50cf4f88cff) Signed-off-by: Christoffer Dall --- arch/arm64/kvm/sys_regs.c | 83 ++++++++++++++++++++------------------- 1 file changed, 43 insertions(+), 40 deletions(-) diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index 56288f31c12d..492ba301e10c 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -161,18 +161,9 @@ static bool access_sctlr(struct kvm_vcpu *vcpu, return true; } -/* - * We could trap ID_DFR0 and tell the guest we don't support performance - * monitoring. Unfortunately the patch to make the kernel check ID_DFR0 was - * NAKed, so it will read the PMCR anyway. - * - * Therefore we tell the guest we have 0 counters. Unfortunately, we - * must always support PMCCNTR (the cycle counter): we just RAZ/WI for - * all PM registers, which doesn't crash the guest kernel at least. - */ -static bool pm_fake(struct kvm_vcpu *vcpu, - const struct sys_reg_params *p, - const struct sys_reg_desc *r) +static bool trap_raz_wi(struct kvm_vcpu *vcpu, + const struct sys_reg_params *p, + const struct sys_reg_desc *r) { if (p->is_write) return ignore_write(vcpu, p); @@ -199,6 +190,17 @@ static void reset_mpidr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r) /* * Architected system registers. * Important: Must be sorted ascending by Op0, Op1, CRn, CRm, Op2 + * + * We could trap ID_DFR0 and tell the guest we don't support performance + * monitoring. Unfortunately the patch to make the kernel check ID_DFR0 was + * NAKed, so it will read the PMCR anyway. + * + * Therefore we tell the guest we have 0 counters. Unfortunately, we + * must always support PMCCNTR (the cycle counter): we just RAZ/WI for + * all PM registers, which doesn't crash the guest kernel at least. + * + * Same goes for the whole debug infrastructure, which probably breaks + * some guest functionnality. This should be fixed. */ static const struct sys_reg_desc sys_reg_descs[] = { /* DC ISW */ @@ -258,10 +260,10 @@ static const struct sys_reg_desc sys_reg_descs[] = { /* PMINTENSET_EL1 */ { Op0(0b11), Op1(0b000), CRn(0b1001), CRm(0b1110), Op2(0b001), - pm_fake }, + trap_raz_wi }, /* PMINTENCLR_EL1 */ { Op0(0b11), Op1(0b000), CRn(0b1001), CRm(0b1110), Op2(0b010), - pm_fake }, + trap_raz_wi }, /* MAIR_EL1 */ { Op0(0b11), Op1(0b000), CRn(0b1010), CRm(0b0010), Op2(0b000), @@ -290,43 +292,43 @@ static const struct sys_reg_desc sys_reg_descs[] = { /* PMCR_EL0 */ { Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1100), Op2(0b000), - pm_fake }, + trap_raz_wi }, /* PMCNTENSET_EL0 */ { Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1100), Op2(0b001), - pm_fake }, + trap_raz_wi }, /* PMCNTENCLR_EL0 */ { Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1100), Op2(0b010), - pm_fake }, + trap_raz_wi }, /* PMOVSCLR_EL0 */ { Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1100), Op2(0b011), - pm_fake }, + trap_raz_wi }, /* PMSWINC_EL0 */ { Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1100), Op2(0b100), - pm_fake }, + trap_raz_wi }, /* PMSELR_EL0 */ { Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1100), Op2(0b101), - pm_fake }, + trap_raz_wi }, /* PMCEID0_EL0 */ { Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1100), Op2(0b110), - pm_fake }, + trap_raz_wi }, /* PMCEID1_EL0 */ { Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1100), Op2(0b111), - pm_fake }, + trap_raz_wi }, /* PMCCNTR_EL0 */ { Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1101), Op2(0b000), - pm_fake }, + trap_raz_wi }, /* PMXEVTYPER_EL0 */ { Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1101), Op2(0b001), - pm_fake }, + trap_raz_wi }, /* PMXEVCNTR_EL0 */ { Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1101), Op2(0b010), - pm_fake }, + trap_raz_wi }, /* PMUSERENR_EL0 */ { Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1110), Op2(0b000), - pm_fake }, + trap_raz_wi }, /* PMOVSSET_EL0 */ { Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1110), Op2(0b011), - pm_fake }, + trap_raz_wi }, /* TPIDR_EL0 */ { Op0(0b11), Op1(0b011), CRn(0b1101), CRm(0b0000), Op2(0b010), @@ -372,19 +374,20 @@ static const struct sys_reg_desc cp15_regs[] = { { Op1( 0), CRn( 7), CRm(10), Op2( 2), access_dcsw }, { Op1( 0), CRn( 7), CRm(14), Op2( 2), access_dcsw }, - { Op1( 0), CRn( 9), CRm(12), Op2( 0), pm_fake }, - { Op1( 0), CRn( 9), CRm(12), Op2( 1), pm_fake }, - { Op1( 0), CRn( 9), CRm(12), Op2( 2), pm_fake }, - { Op1( 0), CRn( 9), CRm(12), Op2( 3), pm_fake }, - { Op1( 0), CRn( 9), CRm(12), Op2( 5), pm_fake }, - { Op1( 0), CRn( 9), CRm(12), Op2( 6), pm_fake }, - { Op1( 0), CRn( 9), CRm(12), Op2( 7), pm_fake }, - { Op1( 0), CRn( 9), CRm(13), Op2( 0), pm_fake }, - { Op1( 0), CRn( 9), CRm(13), Op2( 1), pm_fake }, - { Op1( 0), CRn( 9), CRm(13), Op2( 2), pm_fake }, - { Op1( 0), CRn( 9), CRm(14), Op2( 0), pm_fake }, - { Op1( 0), CRn( 9), CRm(14), Op2( 1), pm_fake }, - { Op1( 0), CRn( 9), CRm(14), Op2( 2), pm_fake }, + /* PMU */ + { Op1( 0), CRn( 9), CRm(12), Op2( 0), trap_raz_wi }, + { Op1( 0), CRn( 9), CRm(12), Op2( 1), trap_raz_wi }, + { Op1( 0), CRn( 9), CRm(12), Op2( 2), trap_raz_wi }, + { Op1( 0), CRn( 9), CRm(12), Op2( 3), trap_raz_wi }, + { Op1( 0), CRn( 9), CRm(12), Op2( 5), trap_raz_wi }, + { Op1( 0), CRn( 9), CRm(12), Op2( 6), trap_raz_wi }, + { Op1( 0), CRn( 9), CRm(12), Op2( 7), trap_raz_wi }, + { Op1( 0), CRn( 9), CRm(13), Op2( 0), trap_raz_wi }, + { Op1( 0), CRn( 9), CRm(13), Op2( 1), trap_raz_wi }, + { Op1( 0), CRn( 9), CRm(13), Op2( 2), trap_raz_wi }, + { Op1( 0), CRn( 9), CRm(14), Op2( 0), trap_raz_wi }, + { Op1( 0), CRn( 9), CRm(14), Op2( 1), trap_raz_wi }, + { Op1( 0), CRn( 9), CRm(14), Op2( 2), trap_raz_wi }, { Op1( 0), CRn(10), CRm( 2), Op2( 0), access_vm_reg, NULL, c10_PRRR }, { Op1( 0), CRn(10), CRm( 2), Op2( 1), access_vm_reg, NULL, c10_NMRR }, From efacbc423cf057a9051ca0fd240e68f5596700e7 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 7 May 2014 12:13:14 +0100 Subject: [PATCH 1298/1368] arm64: move DBG_MDSCR_* to asm/debug-monitors.h In order to be able to use the DBG_MDSCR_* macros from the KVM code, move the relevant definitions to the obvious include file. Also move the debug_el enum to a portion of the file that is guarded by #ifndef __ASSEMBLY__ in order to use that file from assembly code. Acked-by: Will Deacon Reviewed-by: Anup Patel Signed-off-by: Marc Zyngier (cherry picked from commit 51ba248164d0eeb8b4f94d405430c18a56c6ac9a) Signed-off-by: Christoffer Dall --- arch/arm64/include/asm/debug-monitors.h | 19 ++++++++++++++----- arch/arm64/kernel/debug-monitors.c | 9 --------- 2 files changed, 14 insertions(+), 14 deletions(-) diff --git a/arch/arm64/include/asm/debug-monitors.h b/arch/arm64/include/asm/debug-monitors.h index 7c951a510b54..aab72ce22348 100644 --- a/arch/arm64/include/asm/debug-monitors.h +++ b/arch/arm64/include/asm/debug-monitors.h @@ -18,6 +18,15 @@ #ifdef __KERNEL__ +/* Low-level stepping controls. */ +#define DBG_MDSCR_SS (1 << 0) +#define DBG_SPSR_SS (1 << 21) + +/* MDSCR_EL1 enabling bits */ +#define DBG_MDSCR_KDE (1 << 13) +#define DBG_MDSCR_MDE (1 << 15) +#define DBG_MDSCR_MASK ~(DBG_MDSCR_KDE | DBG_MDSCR_MDE) + #define DBG_ESR_EVT(x) (((x) >> 27) & 0x7) /* AArch64 */ @@ -73,11 +82,6 @@ #define CACHE_FLUSH_IS_SAFE 1 -enum debug_el { - DBG_ACTIVE_EL0 = 0, - DBG_ACTIVE_EL1, -}; - /* AArch32 */ #define DBG_ESR_EVT_BKPT 0x4 #define DBG_ESR_EVT_VECC 0x5 @@ -115,6 +119,11 @@ void unregister_break_hook(struct break_hook *hook); u8 debug_monitors_arch(void); +enum debug_el { + DBG_ACTIVE_EL0 = 0, + DBG_ACTIVE_EL1, +}; + void enable_debug_monitors(enum debug_el el); void disable_debug_monitors(enum debug_el el); diff --git a/arch/arm64/kernel/debug-monitors.c b/arch/arm64/kernel/debug-monitors.c index 553a120fc838..fea84694fce4 100644 --- a/arch/arm64/kernel/debug-monitors.c +++ b/arch/arm64/kernel/debug-monitors.c @@ -31,15 +31,6 @@ #include #include -/* Low-level stepping controls. */ -#define DBG_MDSCR_SS (1 << 0) -#define DBG_SPSR_SS (1 << 21) - -/* MDSCR_EL1 enabling bits */ -#define DBG_MDSCR_KDE (1 << 13) -#define DBG_MDSCR_MDE (1 << 15) -#define DBG_MDSCR_MASK ~(DBG_MDSCR_KDE | DBG_MDSCR_MDE) - /* Determine debug architecture. */ u8 debug_monitors_arch(void) { From ad4686e6b3c82d7cf3f79853cc12f655ce444668 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 24 Apr 2014 10:24:46 +0100 Subject: [PATCH 1299/1368] arm64: KVM: add trap handlers for AArch64 debug registers Add handlers for all the AArch64 debug registers that are accessible from EL0 or EL1. The trapping code keeps track of the state of the debug registers, allowing for the switch code to implement a lazy switching strategy. Reviewed-by: Anup Patel Reviewed-by: Christoffer Dall Signed-off-by: Marc Zyngier (cherry picked from commit 0c557ed4983b7abe152212b5b1726c2a789b2c61) Signed-off-by: Christoffer Dall --- arch/arm64/include/asm/kvm_asm.h | 28 ++++-- arch/arm64/include/asm/kvm_host.h | 3 + arch/arm64/kvm/sys_regs.c | 150 +++++++++++++++++++++++++++++- 3 files changed, 172 insertions(+), 9 deletions(-) diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h index a28c35b337ec..660f75c48bbb 100644 --- a/arch/arm64/include/asm/kvm_asm.h +++ b/arch/arm64/include/asm/kvm_asm.h @@ -45,14 +45,25 @@ #define AMAIR_EL1 19 /* Aux Memory Attribute Indirection Register */ #define CNTKCTL_EL1 20 /* Timer Control Register (EL1) */ #define PAR_EL1 21 /* Physical Address Register */ +#define MDSCR_EL1 22 /* Monitor Debug System Control Register */ +#define DBGBCR0_EL1 23 /* Debug Breakpoint Control Registers (0-15) */ +#define DBGBCR15_EL1 38 +#define DBGBVR0_EL1 39 /* Debug Breakpoint Value Registers (0-15) */ +#define DBGBVR15_EL1 54 +#define DBGWCR0_EL1 55 /* Debug Watchpoint Control Registers (0-15) */ +#define DBGWCR15_EL1 70 +#define DBGWVR0_EL1 71 /* Debug Watchpoint Value Registers (0-15) */ +#define DBGWVR15_EL1 86 +#define MDCCINT_EL1 87 /* Monitor Debug Comms Channel Interrupt Enable Reg */ + /* 32bit specific registers. Keep them at the end of the range */ -#define DACR32_EL2 22 /* Domain Access Control Register */ -#define IFSR32_EL2 23 /* Instruction Fault Status Register */ -#define FPEXC32_EL2 24 /* Floating-Point Exception Control Register */ -#define DBGVCR32_EL2 25 /* Debug Vector Catch Register */ -#define TEECR32_EL1 26 /* ThumbEE Configuration Register */ -#define TEEHBR32_EL1 27 /* ThumbEE Handler Base Register */ -#define NR_SYS_REGS 28 +#define DACR32_EL2 88 /* Domain Access Control Register */ +#define IFSR32_EL2 89 /* Instruction Fault Status Register */ +#define FPEXC32_EL2 90 /* Floating-Point Exception Control Register */ +#define DBGVCR32_EL2 91 /* Debug Vector Catch Register */ +#define TEECR32_EL1 92 /* ThumbEE Configuration Register */ +#define TEEHBR32_EL1 93 /* ThumbEE Handler Base Register */ +#define NR_SYS_REGS 94 /* 32bit mapping */ #define c0_MPIDR (MPIDR_EL1 * 2) /* MultiProcessor ID Register */ @@ -89,6 +100,9 @@ #define ARM_EXCEPTION_IRQ 0 #define ARM_EXCEPTION_TRAP 1 +#define KVM_ARM64_DEBUG_DIRTY_SHIFT 0 +#define KVM_ARM64_DEBUG_DIRTY (1 << KVM_ARM64_DEBUG_DIRTY_SHIFT) + #ifndef __ASSEMBLY__ struct kvm; struct kvm_vcpu; diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 503c70661636..8e410f761918 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -101,6 +101,9 @@ struct kvm_vcpu_arch { /* Exception Information */ struct kvm_vcpu_fault_info fault; + /* Debug state */ + u64 debug_flags; + /* Pointer to host CPU context */ kvm_cpu_context_t *host_cpu_context; diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index 492ba301e10c..d53ce430b178 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -30,6 +30,7 @@ #include #include #include +#include #include #include "sys_regs.h" @@ -171,6 +172,73 @@ static bool trap_raz_wi(struct kvm_vcpu *vcpu, return read_zero(vcpu, p); } +static bool trap_oslsr_el1(struct kvm_vcpu *vcpu, + const struct sys_reg_params *p, + const struct sys_reg_desc *r) +{ + if (p->is_write) { + return ignore_write(vcpu, p); + } else { + *vcpu_reg(vcpu, p->Rt) = (1 << 3); + return true; + } +} + +static bool trap_dbgauthstatus_el1(struct kvm_vcpu *vcpu, + const struct sys_reg_params *p, + const struct sys_reg_desc *r) +{ + if (p->is_write) { + return ignore_write(vcpu, p); + } else { + u32 val; + asm volatile("mrs %0, dbgauthstatus_el1" : "=r" (val)); + *vcpu_reg(vcpu, p->Rt) = val; + return true; + } +} + +/* + * We want to avoid world-switching all the DBG registers all the + * time: + * + * - If we've touched any debug register, it is likely that we're + * going to touch more of them. It then makes sense to disable the + * traps and start doing the save/restore dance + * - If debug is active (DBG_MDSCR_KDE or DBG_MDSCR_MDE set), it is + * then mandatory to save/restore the registers, as the guest + * depends on them. + * + * For this, we use a DIRTY bit, indicating the guest has modified the + * debug registers, used as follow: + * + * On guest entry: + * - If the dirty bit is set (because we're coming back from trapping), + * disable the traps, save host registers, restore guest registers. + * - If debug is actively in use (DBG_MDSCR_KDE or DBG_MDSCR_MDE set), + * set the dirty bit, disable the traps, save host registers, + * restore guest registers. + * - Otherwise, enable the traps + * + * On guest exit: + * - If the dirty bit is set, save guest registers, restore host + * registers and clear the dirty bit. This ensure that the host can + * now use the debug registers. + */ +static bool trap_debug_regs(struct kvm_vcpu *vcpu, + const struct sys_reg_params *p, + const struct sys_reg_desc *r) +{ + if (p->is_write) { + vcpu_sys_reg(vcpu, r->reg) = *vcpu_reg(vcpu, p->Rt); + vcpu->arch.debug_flags |= KVM_ARM64_DEBUG_DIRTY; + } else { + *vcpu_reg(vcpu, p->Rt) = vcpu_sys_reg(vcpu, r->reg); + } + + return true; +} + static void reset_amair_el1(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r) { u64 amair; @@ -187,6 +255,21 @@ static void reset_mpidr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r) vcpu_sys_reg(vcpu, MPIDR_EL1) = (1UL << 31) | (vcpu->vcpu_id & 0xff); } +/* Silly macro to expand the DBG{BCR,BVR,WVR,WCR}n_EL1 registers in one go */ +#define DBG_BCR_BVR_WCR_WVR_EL1(n) \ + /* DBGBVRn_EL1 */ \ + { Op0(0b10), Op1(0b000), CRn(0b0000), CRm((n)), Op2(0b100), \ + trap_debug_regs, reset_val, (DBGBVR0_EL1 + (n)), 0 }, \ + /* DBGBCRn_EL1 */ \ + { Op0(0b10), Op1(0b000), CRn(0b0000), CRm((n)), Op2(0b101), \ + trap_debug_regs, reset_val, (DBGBCR0_EL1 + (n)), 0 }, \ + /* DBGWVRn_EL1 */ \ + { Op0(0b10), Op1(0b000), CRn(0b0000), CRm((n)), Op2(0b110), \ + trap_debug_regs, reset_val, (DBGWVR0_EL1 + (n)), 0 }, \ + /* DBGWCRn_EL1 */ \ + { Op0(0b10), Op1(0b000), CRn(0b0000), CRm((n)), Op2(0b111), \ + trap_debug_regs, reset_val, (DBGWCR0_EL1 + (n)), 0 } + /* * Architected system registers. * Important: Must be sorted ascending by Op0, Op1, CRn, CRm, Op2 @@ -199,8 +282,12 @@ static void reset_mpidr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r) * must always support PMCCNTR (the cycle counter): we just RAZ/WI for * all PM registers, which doesn't crash the guest kernel at least. * - * Same goes for the whole debug infrastructure, which probably breaks - * some guest functionnality. This should be fixed. + * Debug handling: We do trap most, if not all debug related system + * registers. The implementation is good enough to ensure that a guest + * can use these with minimal performance degradation. The drawback is + * that we don't implement any of the external debug, none of the + * OSlock protocol. This should be revisited if we ever encounter a + * more demanding guest... */ static const struct sys_reg_desc sys_reg_descs[] = { /* DC ISW */ @@ -213,12 +300,71 @@ static const struct sys_reg_desc sys_reg_descs[] = { { Op0(0b01), Op1(0b000), CRn(0b0111), CRm(0b1110), Op2(0b010), access_dcsw }, + DBG_BCR_BVR_WCR_WVR_EL1(0), + DBG_BCR_BVR_WCR_WVR_EL1(1), + /* MDCCINT_EL1 */ + { Op0(0b10), Op1(0b000), CRn(0b0000), CRm(0b0010), Op2(0b000), + trap_debug_regs, reset_val, MDCCINT_EL1, 0 }, + /* MDSCR_EL1 */ + { Op0(0b10), Op1(0b000), CRn(0b0000), CRm(0b0010), Op2(0b010), + trap_debug_regs, reset_val, MDSCR_EL1, 0 }, + DBG_BCR_BVR_WCR_WVR_EL1(2), + DBG_BCR_BVR_WCR_WVR_EL1(3), + DBG_BCR_BVR_WCR_WVR_EL1(4), + DBG_BCR_BVR_WCR_WVR_EL1(5), + DBG_BCR_BVR_WCR_WVR_EL1(6), + DBG_BCR_BVR_WCR_WVR_EL1(7), + DBG_BCR_BVR_WCR_WVR_EL1(8), + DBG_BCR_BVR_WCR_WVR_EL1(9), + DBG_BCR_BVR_WCR_WVR_EL1(10), + DBG_BCR_BVR_WCR_WVR_EL1(11), + DBG_BCR_BVR_WCR_WVR_EL1(12), + DBG_BCR_BVR_WCR_WVR_EL1(13), + DBG_BCR_BVR_WCR_WVR_EL1(14), + DBG_BCR_BVR_WCR_WVR_EL1(15), + + /* MDRAR_EL1 */ + { Op0(0b10), Op1(0b000), CRn(0b0001), CRm(0b0000), Op2(0b000), + trap_raz_wi }, + /* OSLAR_EL1 */ + { Op0(0b10), Op1(0b000), CRn(0b0001), CRm(0b0000), Op2(0b100), + trap_raz_wi }, + /* OSLSR_EL1 */ + { Op0(0b10), Op1(0b000), CRn(0b0001), CRm(0b0001), Op2(0b100), + trap_oslsr_el1 }, + /* OSDLR_EL1 */ + { Op0(0b10), Op1(0b000), CRn(0b0001), CRm(0b0011), Op2(0b100), + trap_raz_wi }, + /* DBGPRCR_EL1 */ + { Op0(0b10), Op1(0b000), CRn(0b0001), CRm(0b0100), Op2(0b100), + trap_raz_wi }, + /* DBGCLAIMSET_EL1 */ + { Op0(0b10), Op1(0b000), CRn(0b0111), CRm(0b1000), Op2(0b110), + trap_raz_wi }, + /* DBGCLAIMCLR_EL1 */ + { Op0(0b10), Op1(0b000), CRn(0b0111), CRm(0b1001), Op2(0b110), + trap_raz_wi }, + /* DBGAUTHSTATUS_EL1 */ + { Op0(0b10), Op1(0b000), CRn(0b0111), CRm(0b1110), Op2(0b110), + trap_dbgauthstatus_el1 }, + /* TEECR32_EL1 */ { Op0(0b10), Op1(0b010), CRn(0b0000), CRm(0b0000), Op2(0b000), NULL, reset_val, TEECR32_EL1, 0 }, /* TEEHBR32_EL1 */ { Op0(0b10), Op1(0b010), CRn(0b0001), CRm(0b0000), Op2(0b000), NULL, reset_val, TEEHBR32_EL1, 0 }, + + /* MDCCSR_EL1 */ + { Op0(0b10), Op1(0b011), CRn(0b0000), CRm(0b0001), Op2(0b000), + trap_raz_wi }, + /* DBGDTR_EL0 */ + { Op0(0b10), Op1(0b011), CRn(0b0000), CRm(0b0100), Op2(0b000), + trap_raz_wi }, + /* DBGDTR[TR]X_EL0 */ + { Op0(0b10), Op1(0b011), CRn(0b0000), CRm(0b0101), Op2(0b000), + trap_raz_wi }, + /* DBGVCR32_EL2 */ { Op0(0b10), Op1(0b100), CRn(0b0000), CRm(0b0111), Op2(0b000), NULL, reset_val, DBGVCR32_EL2, 0 }, From d62af886035cc34e73e82e06179951f6c4e72c6d Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 24 Apr 2014 10:27:13 +0100 Subject: [PATCH 1300/1368] arm64: KVM: common infrastructure for handling AArch32 CP14/CP15 As we're about to trap a bunch of CP14 registers, let's rework the CP15 handling so it can be generalized and work with multiple tables. Reviewed-by: Anup Patel Reviewed-by: Christoffer Dall Signed-off-by: Marc Zyngier (cherry picked from commit 72564016aae45f42e488f926bc803f9a2e1c771c) Signed-off-by: Christoffer Dall --- arch/arm64/include/asm/kvm_asm.h | 2 +- arch/arm64/include/asm/kvm_coproc.h | 3 +- arch/arm64/include/asm/kvm_host.h | 13 ++- arch/arm64/kvm/handle_exit.c | 4 +- arch/arm64/kvm/sys_regs.c | 133 +++++++++++++++++++++++----- 5 files changed, 124 insertions(+), 31 deletions(-) diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h index 660f75c48bbb..69027ded5006 100644 --- a/arch/arm64/include/asm/kvm_asm.h +++ b/arch/arm64/include/asm/kvm_asm.h @@ -95,7 +95,7 @@ #define c10_AMAIR0 (AMAIR_EL1 * 2) /* Aux Memory Attr Indirection Reg */ #define c10_AMAIR1 (c10_AMAIR0 + 1)/* Aux Memory Attr Indirection Reg */ #define c14_CNTKCTL (CNTKCTL_EL1 * 2) /* Timer Control Register (PL1) */ -#define NR_CP15_REGS (NR_SYS_REGS * 2) +#define NR_COPRO_REGS (NR_SYS_REGS * 2) #define ARM_EXCEPTION_IRQ 0 #define ARM_EXCEPTION_TRAP 1 diff --git a/arch/arm64/include/asm/kvm_coproc.h b/arch/arm64/include/asm/kvm_coproc.h index 9a59301cd014..0b52377a6c11 100644 --- a/arch/arm64/include/asm/kvm_coproc.h +++ b/arch/arm64/include/asm/kvm_coproc.h @@ -39,7 +39,8 @@ void kvm_register_target_sys_reg_table(unsigned int target, struct kvm_sys_reg_target_table *table); int kvm_handle_cp14_load_store(struct kvm_vcpu *vcpu, struct kvm_run *run); -int kvm_handle_cp14_access(struct kvm_vcpu *vcpu, struct kvm_run *run); +int kvm_handle_cp14_32(struct kvm_vcpu *vcpu, struct kvm_run *run); +int kvm_handle_cp14_64(struct kvm_vcpu *vcpu, struct kvm_run *run); int kvm_handle_cp15_32(struct kvm_vcpu *vcpu, struct kvm_run *run); int kvm_handle_cp15_64(struct kvm_vcpu *vcpu, struct kvm_run *run); int kvm_handle_sys_reg(struct kvm_vcpu *vcpu, struct kvm_run *run); diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 8e410f761918..79812be4f25f 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -86,7 +86,7 @@ struct kvm_cpu_context { struct kvm_regs gp_regs; union { u64 sys_regs[NR_SYS_REGS]; - u32 cp15[NR_CP15_REGS]; + u32 copro[NR_COPRO_REGS]; }; }; @@ -141,12 +141,17 @@ struct kvm_vcpu_arch { #define vcpu_gp_regs(v) (&(v)->arch.ctxt.gp_regs) #define vcpu_sys_reg(v,r) ((v)->arch.ctxt.sys_regs[(r)]) -#define vcpu_cp15(v,r) ((v)->arch.ctxt.cp15[(r)]) +/* + * CP14 and CP15 live in the same array, as they are backed by the + * same system registers. + */ +#define vcpu_cp14(v,r) ((v)->arch.ctxt.copro[(r)]) +#define vcpu_cp15(v,r) ((v)->arch.ctxt.copro[(r)]) #ifdef CONFIG_CPU_BIG_ENDIAN -#define vcpu_cp15_64_low(v,r) ((v)->arch.ctxt.cp15[((r) + 1)]) +#define vcpu_cp15_64_low(v,r) ((v)->arch.ctxt.copro[((r) + 1)]) #else -#define vcpu_cp15_64_low(v,r) ((v)->arch.ctxt.cp15[((r) + 0)]) +#define vcpu_cp15_64_low(v,r) ((v)->arch.ctxt.copro[((r) + 0)]) #endif struct kvm_vm_stat { diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c index 182415e1a952..e28be510380c 100644 --- a/arch/arm64/kvm/handle_exit.c +++ b/arch/arm64/kvm/handle_exit.c @@ -73,9 +73,9 @@ static exit_handle_fn arm_exit_handlers[] = { [ESR_EL2_EC_WFI] = kvm_handle_wfx, [ESR_EL2_EC_CP15_32] = kvm_handle_cp15_32, [ESR_EL2_EC_CP15_64] = kvm_handle_cp15_64, - [ESR_EL2_EC_CP14_MR] = kvm_handle_cp14_access, + [ESR_EL2_EC_CP14_MR] = kvm_handle_cp14_32, [ESR_EL2_EC_CP14_LS] = kvm_handle_cp14_load_store, - [ESR_EL2_EC_CP14_64] = kvm_handle_cp14_access, + [ESR_EL2_EC_CP14_64] = kvm_handle_cp14_64, [ESR_EL2_EC_HVC32] = handle_hvc, [ESR_EL2_EC_SMC32] = handle_smc, [ESR_EL2_EC_HVC64] = handle_hvc, diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index d53ce430b178..266afd972ad3 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -494,6 +494,10 @@ static const struct sys_reg_desc sys_reg_descs[] = { NULL, reset_val, FPEXC32_EL2, 0x70 }, }; +/* Trapped cp14 registers */ +static const struct sys_reg_desc cp14_regs[] = { +}; + /* * Trapped cp15 registers. TTBR0/TTBR1 get a double encoding, * depending on the way they are accessed (as a 32bit or a 64bit @@ -601,26 +605,29 @@ int kvm_handle_cp14_load_store(struct kvm_vcpu *vcpu, struct kvm_run *run) return 1; } -int kvm_handle_cp14_access(struct kvm_vcpu *vcpu, struct kvm_run *run) +/* + * emulate_cp -- tries to match a sys_reg access in a handling table, and + * call the corresponding trap handler. + * + * @params: pointer to the descriptor of the access + * @table: array of trap descriptors + * @num: size of the trap descriptor array + * + * Return 0 if the access has been handled, and -1 if not. + */ +static int emulate_cp(struct kvm_vcpu *vcpu, + const struct sys_reg_params *params, + const struct sys_reg_desc *table, + size_t num) { - kvm_inject_undefined(vcpu); - return 1; -} + const struct sys_reg_desc *r; -static void emulate_cp15(struct kvm_vcpu *vcpu, - const struct sys_reg_params *params) -{ - size_t num; - const struct sys_reg_desc *table, *r; + if (!table) + return -1; /* Not handled */ - table = get_target_table(vcpu->arch.target, false, &num); - - /* Search target-specific then generic table. */ r = find_reg(params, table, num); - if (!r) - r = find_reg(params, cp15_regs, ARRAY_SIZE(cp15_regs)); - if (likely(r)) { + if (r) { /* * Not having an accessor means that we have * configured a trap that we don't know how to @@ -632,22 +639,51 @@ static void emulate_cp15(struct kvm_vcpu *vcpu, if (likely(r->access(vcpu, params, r))) { /* Skip instruction, since it was emulated */ kvm_skip_instr(vcpu, kvm_vcpu_trap_il_is32bit(vcpu)); - return; } - /* If access function fails, it should complain. */ + + /* Handled */ + return 0; } - kvm_err("Unsupported guest CP15 access at: %08lx\n", *vcpu_pc(vcpu)); + /* Not handled */ + return -1; +} + +static void unhandled_cp_access(struct kvm_vcpu *vcpu, + struct sys_reg_params *params) +{ + u8 hsr_ec = kvm_vcpu_trap_get_class(vcpu); + int cp; + + switch(hsr_ec) { + case ESR_EL2_EC_CP15_32: + case ESR_EL2_EC_CP15_64: + cp = 15; + break; + case ESR_EL2_EC_CP14_MR: + case ESR_EL2_EC_CP14_64: + cp = 14; + break; + default: + WARN_ON((cp = -1)); + } + + kvm_err("Unsupported guest CP%d access at: %08lx\n", + cp, *vcpu_pc(vcpu)); print_sys_reg_instr(params); kvm_inject_undefined(vcpu); } /** - * kvm_handle_cp15_64 -- handles a mrrc/mcrr trap on a guest CP15 access + * kvm_handle_cp_64 -- handles a mrrc/mcrr trap on a guest CP15 access * @vcpu: The VCPU pointer * @run: The kvm_run struct */ -int kvm_handle_cp15_64(struct kvm_vcpu *vcpu, struct kvm_run *run) +static int kvm_handle_cp_64(struct kvm_vcpu *vcpu, + const struct sys_reg_desc *global, + size_t nr_global, + const struct sys_reg_desc *target_specific, + size_t nr_specific) { struct sys_reg_params params; u32 hsr = kvm_vcpu_get_hsr(vcpu); @@ -676,8 +712,14 @@ int kvm_handle_cp15_64(struct kvm_vcpu *vcpu, struct kvm_run *run) *vcpu_reg(vcpu, params.Rt) = val; } - emulate_cp15(vcpu, ¶ms); + if (!emulate_cp(vcpu, ¶ms, target_specific, nr_specific)) + goto out; + if (!emulate_cp(vcpu, ¶ms, global, nr_global)) + goto out; + unhandled_cp_access(vcpu, ¶ms); + +out: /* Do the opposite hack for the read side */ if (!params.is_write) { u64 val = *vcpu_reg(vcpu, params.Rt); @@ -693,7 +735,11 @@ int kvm_handle_cp15_64(struct kvm_vcpu *vcpu, struct kvm_run *run) * @vcpu: The VCPU pointer * @run: The kvm_run struct */ -int kvm_handle_cp15_32(struct kvm_vcpu *vcpu, struct kvm_run *run) +static int kvm_handle_cp_32(struct kvm_vcpu *vcpu, + const struct sys_reg_desc *global, + size_t nr_global, + const struct sys_reg_desc *target_specific, + size_t nr_specific) { struct sys_reg_params params; u32 hsr = kvm_vcpu_get_hsr(vcpu); @@ -708,10 +754,51 @@ int kvm_handle_cp15_32(struct kvm_vcpu *vcpu, struct kvm_run *run) params.Op1 = (hsr >> 14) & 0x7; params.Op2 = (hsr >> 17) & 0x7; - emulate_cp15(vcpu, ¶ms); + if (!emulate_cp(vcpu, ¶ms, target_specific, nr_specific)) + return 1; + if (!emulate_cp(vcpu, ¶ms, global, nr_global)) + return 1; + + unhandled_cp_access(vcpu, ¶ms); return 1; } +int kvm_handle_cp15_64(struct kvm_vcpu *vcpu, struct kvm_run *run) +{ + const struct sys_reg_desc *target_specific; + size_t num; + + target_specific = get_target_table(vcpu->arch.target, false, &num); + return kvm_handle_cp_64(vcpu, + cp15_regs, ARRAY_SIZE(cp15_regs), + target_specific, num); +} + +int kvm_handle_cp15_32(struct kvm_vcpu *vcpu, struct kvm_run *run) +{ + const struct sys_reg_desc *target_specific; + size_t num; + + target_specific = get_target_table(vcpu->arch.target, false, &num); + return kvm_handle_cp_32(vcpu, + cp15_regs, ARRAY_SIZE(cp15_regs), + target_specific, num); +} + +int kvm_handle_cp14_64(struct kvm_vcpu *vcpu, struct kvm_run *run) +{ + return kvm_handle_cp_64(vcpu, + cp14_regs, ARRAY_SIZE(cp14_regs), + NULL, 0); +} + +int kvm_handle_cp14_32(struct kvm_vcpu *vcpu, struct kvm_run *run) +{ + return kvm_handle_cp_32(vcpu, + cp14_regs, ARRAY_SIZE(cp14_regs), + NULL, 0); +} + static int emulate_sys_reg(struct kvm_vcpu *vcpu, const struct sys_reg_params *params) { From 693292e6a93a775a91421b16f87f632e5d7f077b Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 24 Apr 2014 14:11:48 +0100 Subject: [PATCH 1301/1368] arm64: KVM: use separate tables for AArch32 32 and 64bit traps An interesting "feature" of the CP14 encoding is that there is an overlap between 32 and 64bit registers, meaning they cannot live in the same table as we did for CP15. Create separate tables for 64bit CP14 and CP15 registers, and let the top level handler use the right one. Reviewed-by: Anup Patel Reviewed-by: Christoffer Dall Signed-off-by: Marc Zyngier (cherry picked from commit a9866ba0cddfc497335fa02a175c4578b96722ff) Signed-off-by: Christoffer Dall --- arch/arm64/kvm/sys_regs.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index 266afd972ad3..499a351fd1b9 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -498,13 +498,16 @@ static const struct sys_reg_desc sys_reg_descs[] = { static const struct sys_reg_desc cp14_regs[] = { }; +/* Trapped cp14 64bit registers */ +static const struct sys_reg_desc cp14_64_regs[] = { +}; + /* * Trapped cp15 registers. TTBR0/TTBR1 get a double encoding, * depending on the way they are accessed (as a 32bit or a 64bit * register). */ static const struct sys_reg_desc cp15_regs[] = { - { Op1( 0), CRn( 0), CRm( 2), Op2( 0), access_vm_reg, NULL, c2_TTBR0 }, { Op1( 0), CRn( 1), CRm( 0), Op2( 0), access_sctlr, NULL, c1_SCTLR }, { Op1( 0), CRn( 2), CRm( 0), Op2( 0), access_vm_reg, NULL, c2_TTBR0 }, { Op1( 0), CRn( 2), CRm( 0), Op2( 1), access_vm_reg, NULL, c2_TTBR1 }, @@ -545,6 +548,10 @@ static const struct sys_reg_desc cp15_regs[] = { { Op1( 0), CRn(10), CRm( 3), Op2( 1), access_vm_reg, NULL, c10_AMAIR1 }, { Op1( 0), CRn(13), CRm( 0), Op2( 1), access_vm_reg, NULL, c13_CID }, +}; + +static const struct sys_reg_desc cp15_64_regs[] = { + { Op1( 0), CRn( 0), CRm( 2), Op2( 0), access_vm_reg, NULL, c2_TTBR0 }, { Op1( 1), CRn( 0), CRm( 2), Op2( 0), access_vm_reg, NULL, c2_TTBR1 }, }; @@ -770,7 +777,7 @@ int kvm_handle_cp15_64(struct kvm_vcpu *vcpu, struct kvm_run *run) target_specific = get_target_table(vcpu->arch.target, false, &num); return kvm_handle_cp_64(vcpu, - cp15_regs, ARRAY_SIZE(cp15_regs), + cp15_64_regs, ARRAY_SIZE(cp15_64_regs), target_specific, num); } @@ -788,7 +795,7 @@ int kvm_handle_cp15_32(struct kvm_vcpu *vcpu, struct kvm_run *run) int kvm_handle_cp14_64(struct kvm_vcpu *vcpu, struct kvm_run *run) { return kvm_handle_cp_64(vcpu, - cp14_regs, ARRAY_SIZE(cp14_regs), + cp14_64_regs, ARRAY_SIZE(cp14_64_regs), NULL, 0); } From 2c4bc5c6442250ebe7a1b08ed297fc48aef99547 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 7 May 2014 13:43:39 +0100 Subject: [PATCH 1302/1368] arm64: KVM: check ordering of all system register tables We now have multiple tables for the various system registers we trap. Make sure we check the order of all of them, as it is critical that we get the order right (been there, done that...). Reviewed-by: Anup Patel Reviewed-by: Christoffer Dall Signed-off-by: Marc Zyngier (cherry picked from commit e6a9551760623d1703487e8a16bb9c3ea8a7e7a8) Signed-off-by: Christoffer Dall --- arch/arm64/kvm/sys_regs.c | 22 ++++++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index 499a351fd1b9..8ab47c7326ed 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -1308,14 +1308,32 @@ int kvm_arm_copy_sys_reg_indices(struct kvm_vcpu *vcpu, u64 __user *uindices) return write_demux_regids(uindices); } +static int check_sysreg_table(const struct sys_reg_desc *table, unsigned int n) +{ + unsigned int i; + + for (i = 1; i < n; i++) { + if (cmp_sys_reg(&table[i-1], &table[i]) >= 0) { + kvm_err("sys_reg table %p out of order (%d)\n", table, i - 1); + return 1; + } + } + + return 0; +} + void kvm_sys_reg_table_init(void) { unsigned int i; struct sys_reg_desc clidr; /* Make sure tables are unique and in order. */ - for (i = 1; i < ARRAY_SIZE(sys_reg_descs); i++) - BUG_ON(cmp_sys_reg(&sys_reg_descs[i-1], &sys_reg_descs[i]) >= 0); + BUG_ON(check_sysreg_table(sys_reg_descs, ARRAY_SIZE(sys_reg_descs))); + BUG_ON(check_sysreg_table(cp14_regs, ARRAY_SIZE(cp14_regs))); + BUG_ON(check_sysreg_table(cp14_64_regs, ARRAY_SIZE(cp14_64_regs))); + BUG_ON(check_sysreg_table(cp15_regs, ARRAY_SIZE(cp15_regs))); + BUG_ON(check_sysreg_table(cp15_64_regs, ARRAY_SIZE(cp15_64_regs))); + BUG_ON(check_sysreg_table(invariant_sys_regs, ARRAY_SIZE(invariant_sys_regs))); /* We abuse the reset function to overwrite the table itself. */ for (i = 0; i < ARRAY_SIZE(invariant_sys_regs); i++) From c85f50a20b43754698af82b27279ca87cb04bd3a Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 24 Apr 2014 10:31:37 +0100 Subject: [PATCH 1303/1368] arm64: KVM: add trap handlers for AArch32 debug registers Add handlers for all the AArch32 debug registers that are accessible from EL0 or EL1. The code follow the same strategy as the AArch64 counterpart with regards to tracking the dirty state of the debug registers. Reviewed-by: Anup Patel Reviewed-by: Christoffer Dall Signed-off-by: Marc Zyngier (cherry picked from commit bdfb4b389c8d8f07e2d5b8e1291e01c789ba4aad) Signed-off-by: Christoffer Dall --- arch/arm64/include/asm/kvm_asm.h | 9 ++ arch/arm64/kvm/sys_regs.c | 144 ++++++++++++++++++++++++++++++- 2 files changed, 151 insertions(+), 2 deletions(-) diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h index 69027ded5006..483842180f8f 100644 --- a/arch/arm64/include/asm/kvm_asm.h +++ b/arch/arm64/include/asm/kvm_asm.h @@ -95,6 +95,15 @@ #define c10_AMAIR0 (AMAIR_EL1 * 2) /* Aux Memory Attr Indirection Reg */ #define c10_AMAIR1 (c10_AMAIR0 + 1)/* Aux Memory Attr Indirection Reg */ #define c14_CNTKCTL (CNTKCTL_EL1 * 2) /* Timer Control Register (PL1) */ + +#define cp14_DBGDSCRext (MDSCR_EL1 * 2) +#define cp14_DBGBCR0 (DBGBCR0_EL1 * 2) +#define cp14_DBGBVR0 (DBGBVR0_EL1 * 2) +#define cp14_DBGBXVR0 (cp14_DBGBVR0 + 1) +#define cp14_DBGWCR0 (DBGWCR0_EL1 * 2) +#define cp14_DBGWVR0 (DBGWVR0_EL1 * 2) +#define cp14_DBGDCCINT (MDCCINT_EL1 * 2) + #define NR_COPRO_REGS (NR_SYS_REGS * 2) #define ARM_EXCEPTION_IRQ 0 diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index 8ab47c7326ed..a4fd5267c65b 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -494,12 +494,153 @@ static const struct sys_reg_desc sys_reg_descs[] = { NULL, reset_val, FPEXC32_EL2, 0x70 }, }; -/* Trapped cp14 registers */ +static bool trap_dbgidr(struct kvm_vcpu *vcpu, + const struct sys_reg_params *p, + const struct sys_reg_desc *r) +{ + if (p->is_write) { + return ignore_write(vcpu, p); + } else { + u64 dfr = read_cpuid(ID_AA64DFR0_EL1); + u64 pfr = read_cpuid(ID_AA64PFR0_EL1); + u32 el3 = !!((pfr >> 12) & 0xf); + + *vcpu_reg(vcpu, p->Rt) = ((((dfr >> 20) & 0xf) << 28) | + (((dfr >> 12) & 0xf) << 24) | + (((dfr >> 28) & 0xf) << 20) | + (6 << 16) | (el3 << 14) | (el3 << 12)); + return true; + } +} + +static bool trap_debug32(struct kvm_vcpu *vcpu, + const struct sys_reg_params *p, + const struct sys_reg_desc *r) +{ + if (p->is_write) { + vcpu_cp14(vcpu, r->reg) = *vcpu_reg(vcpu, p->Rt); + vcpu->arch.debug_flags |= KVM_ARM64_DEBUG_DIRTY; + } else { + *vcpu_reg(vcpu, p->Rt) = vcpu_cp14(vcpu, r->reg); + } + + return true; +} + +#define DBG_BCR_BVR_WCR_WVR(n) \ + /* DBGBVRn */ \ + { Op1( 0), CRn( 0), CRm((n)), Op2( 4), trap_debug32, \ + NULL, (cp14_DBGBVR0 + (n) * 2) }, \ + /* DBGBCRn */ \ + { Op1( 0), CRn( 0), CRm((n)), Op2( 5), trap_debug32, \ + NULL, (cp14_DBGBCR0 + (n) * 2) }, \ + /* DBGWVRn */ \ + { Op1( 0), CRn( 0), CRm((n)), Op2( 6), trap_debug32, \ + NULL, (cp14_DBGWVR0 + (n) * 2) }, \ + /* DBGWCRn */ \ + { Op1( 0), CRn( 0), CRm((n)), Op2( 7), trap_debug32, \ + NULL, (cp14_DBGWCR0 + (n) * 2) } + +#define DBGBXVR(n) \ + { Op1( 0), CRn( 1), CRm((n)), Op2( 1), trap_debug32, \ + NULL, cp14_DBGBXVR0 + n * 2 } + +/* + * Trapped cp14 registers. We generally ignore most of the external + * debug, on the principle that they don't really make sense to a + * guest. Revisit this one day, whould this principle change. + */ static const struct sys_reg_desc cp14_regs[] = { + /* DBGIDR */ + { Op1( 0), CRn( 0), CRm( 0), Op2( 0), trap_dbgidr }, + /* DBGDTRRXext */ + { Op1( 0), CRn( 0), CRm( 0), Op2( 2), trap_raz_wi }, + + DBG_BCR_BVR_WCR_WVR(0), + /* DBGDSCRint */ + { Op1( 0), CRn( 0), CRm( 1), Op2( 0), trap_raz_wi }, + DBG_BCR_BVR_WCR_WVR(1), + /* DBGDCCINT */ + { Op1( 0), CRn( 0), CRm( 2), Op2( 0), trap_debug32 }, + /* DBGDSCRext */ + { Op1( 0), CRn( 0), CRm( 2), Op2( 2), trap_debug32 }, + DBG_BCR_BVR_WCR_WVR(2), + /* DBGDTR[RT]Xint */ + { Op1( 0), CRn( 0), CRm( 3), Op2( 0), trap_raz_wi }, + /* DBGDTR[RT]Xext */ + { Op1( 0), CRn( 0), CRm( 3), Op2( 2), trap_raz_wi }, + DBG_BCR_BVR_WCR_WVR(3), + DBG_BCR_BVR_WCR_WVR(4), + DBG_BCR_BVR_WCR_WVR(5), + /* DBGWFAR */ + { Op1( 0), CRn( 0), CRm( 6), Op2( 0), trap_raz_wi }, + /* DBGOSECCR */ + { Op1( 0), CRn( 0), CRm( 6), Op2( 2), trap_raz_wi }, + DBG_BCR_BVR_WCR_WVR(6), + /* DBGVCR */ + { Op1( 0), CRn( 0), CRm( 7), Op2( 0), trap_debug32 }, + DBG_BCR_BVR_WCR_WVR(7), + DBG_BCR_BVR_WCR_WVR(8), + DBG_BCR_BVR_WCR_WVR(9), + DBG_BCR_BVR_WCR_WVR(10), + DBG_BCR_BVR_WCR_WVR(11), + DBG_BCR_BVR_WCR_WVR(12), + DBG_BCR_BVR_WCR_WVR(13), + DBG_BCR_BVR_WCR_WVR(14), + DBG_BCR_BVR_WCR_WVR(15), + + /* DBGDRAR (32bit) */ + { Op1( 0), CRn( 1), CRm( 0), Op2( 0), trap_raz_wi }, + + DBGBXVR(0), + /* DBGOSLAR */ + { Op1( 0), CRn( 1), CRm( 0), Op2( 4), trap_raz_wi }, + DBGBXVR(1), + /* DBGOSLSR */ + { Op1( 0), CRn( 1), CRm( 1), Op2( 4), trap_oslsr_el1 }, + DBGBXVR(2), + DBGBXVR(3), + /* DBGOSDLR */ + { Op1( 0), CRn( 1), CRm( 3), Op2( 4), trap_raz_wi }, + DBGBXVR(4), + /* DBGPRCR */ + { Op1( 0), CRn( 1), CRm( 4), Op2( 4), trap_raz_wi }, + DBGBXVR(5), + DBGBXVR(6), + DBGBXVR(7), + DBGBXVR(8), + DBGBXVR(9), + DBGBXVR(10), + DBGBXVR(11), + DBGBXVR(12), + DBGBXVR(13), + DBGBXVR(14), + DBGBXVR(15), + + /* DBGDSAR (32bit) */ + { Op1( 0), CRn( 2), CRm( 0), Op2( 0), trap_raz_wi }, + + /* DBGDEVID2 */ + { Op1( 0), CRn( 7), CRm( 0), Op2( 7), trap_raz_wi }, + /* DBGDEVID1 */ + { Op1( 0), CRn( 7), CRm( 1), Op2( 7), trap_raz_wi }, + /* DBGDEVID */ + { Op1( 0), CRn( 7), CRm( 2), Op2( 7), trap_raz_wi }, + /* DBGCLAIMSET */ + { Op1( 0), CRn( 7), CRm( 8), Op2( 6), trap_raz_wi }, + /* DBGCLAIMCLR */ + { Op1( 0), CRn( 7), CRm( 9), Op2( 6), trap_raz_wi }, + /* DBGAUTHSTATUS */ + { Op1( 0), CRn( 7), CRm(14), Op2( 6), trap_dbgauthstatus_el1 }, }; /* Trapped cp14 64bit registers */ static const struct sys_reg_desc cp14_64_regs[] = { + /* DBGDRAR (64bit) */ + { Op1( 0), CRm( 1), .access = trap_raz_wi }, + + /* DBGDSAR (64bit) */ + { Op1( 0), CRm( 2), .access = trap_raz_wi }, }; /* @@ -547,7 +688,6 @@ static const struct sys_reg_desc cp15_regs[] = { { Op1( 0), CRn(10), CRm( 3), Op2( 0), access_vm_reg, NULL, c10_AMAIR0 }, { Op1( 0), CRn(10), CRm( 3), Op2( 1), access_vm_reg, NULL, c10_AMAIR1 }, { Op1( 0), CRn(13), CRm( 0), Op2( 1), access_vm_reg, NULL, c13_CID }, - }; static const struct sys_reg_desc cp15_64_regs[] = { From 44af263503e0e31b1f88ce86df73e467f18e3198 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 7 May 2014 13:44:49 +0100 Subject: [PATCH 1304/1368] arm64: KVM: implement lazy world switch for debug registers Implement switching of the debug registers. While the number of registers is massive, CPUs usually don't implement them all (A57 has 6 breakpoints and 4 watchpoints, which gives us a total of 22 registers "only"). Also, we only save/restore them when MDSCR_EL1 has debug enabled, or when we've flagged the debug registers as dirty. It means that most of the time, we only save/restore MDSCR_EL1. Reviewed-by: Anup Patel Reviewed-by: Christoffer Dall Signed-off-by: Marc Zyngier (cherry picked from commit b0e626b380872b663918230fafdac128c34fea56) Signed-off-by: Christoffer Dall --- arch/arm64/kernel/asm-offsets.c | 1 + arch/arm64/kvm/hyp.S | 463 +++++++++++++++++++++++++++++++- 2 files changed, 458 insertions(+), 6 deletions(-) diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c index 65ebb2ccde5f..825d76c21d84 100644 --- a/arch/arm64/kernel/asm-offsets.c +++ b/arch/arm64/kernel/asm-offsets.c @@ -118,6 +118,7 @@ int main(void) DEFINE(VCPU_ESR_EL2, offsetof(struct kvm_vcpu, arch.fault.esr_el2)); DEFINE(VCPU_FAR_EL2, offsetof(struct kvm_vcpu, arch.fault.far_el2)); DEFINE(VCPU_HPFAR_EL2, offsetof(struct kvm_vcpu, arch.fault.hpfar_el2)); + DEFINE(VCPU_DEBUG_FLAGS, offsetof(struct kvm_vcpu, arch.debug_flags)); DEFINE(VCPU_HCR_EL2, offsetof(struct kvm_vcpu, arch.hcr_el2)); DEFINE(VCPU_IRQ_LINES, offsetof(struct kvm_vcpu, arch.irq_lines)); DEFINE(VCPU_HOST_CONTEXT, offsetof(struct kvm_vcpu, arch.host_cpu_context)); diff --git a/arch/arm64/kvm/hyp.S b/arch/arm64/kvm/hyp.S index 7874e022d077..100494b5c7d4 100644 --- a/arch/arm64/kvm/hyp.S +++ b/arch/arm64/kvm/hyp.S @@ -20,6 +20,7 @@ #include #include #include +#include #include #include #include @@ -211,6 +212,7 @@ mrs x22, amair_el1 mrs x23, cntkctl_el1 mrs x24, par_el1 + mrs x25, mdscr_el1 stp x4, x5, [x3] stp x6, x7, [x3, #16] @@ -222,7 +224,202 @@ stp x18, x19, [x3, #112] stp x20, x21, [x3, #128] stp x22, x23, [x3, #144] - str x24, [x3, #160] + stp x24, x25, [x3, #160] +.endm + +.macro save_debug + // x2: base address for cpu context + // x3: tmp register + + mrs x26, id_aa64dfr0_el1 + ubfx x24, x26, #12, #4 // Extract BRPs + ubfx x25, x26, #20, #4 // Extract WRPs + mov w26, #15 + sub w24, w26, w24 // How many BPs to skip + sub w25, w26, w25 // How many WPs to skip + + add x3, x2, #CPU_SYSREG_OFFSET(DBGBCR0_EL1) + + adr x26, 1f + add x26, x26, x24, lsl #2 + br x26 +1: + mrs x20, dbgbcr15_el1 + mrs x19, dbgbcr14_el1 + mrs x18, dbgbcr13_el1 + mrs x17, dbgbcr12_el1 + mrs x16, dbgbcr11_el1 + mrs x15, dbgbcr10_el1 + mrs x14, dbgbcr9_el1 + mrs x13, dbgbcr8_el1 + mrs x12, dbgbcr7_el1 + mrs x11, dbgbcr6_el1 + mrs x10, dbgbcr5_el1 + mrs x9, dbgbcr4_el1 + mrs x8, dbgbcr3_el1 + mrs x7, dbgbcr2_el1 + mrs x6, dbgbcr1_el1 + mrs x5, dbgbcr0_el1 + + adr x26, 1f + add x26, x26, x24, lsl #2 + br x26 + +1: + str x20, [x3, #(15 * 8)] + str x19, [x3, #(14 * 8)] + str x18, [x3, #(13 * 8)] + str x17, [x3, #(12 * 8)] + str x16, [x3, #(11 * 8)] + str x15, [x3, #(10 * 8)] + str x14, [x3, #(9 * 8)] + str x13, [x3, #(8 * 8)] + str x12, [x3, #(7 * 8)] + str x11, [x3, #(6 * 8)] + str x10, [x3, #(5 * 8)] + str x9, [x3, #(4 * 8)] + str x8, [x3, #(3 * 8)] + str x7, [x3, #(2 * 8)] + str x6, [x3, #(1 * 8)] + str x5, [x3, #(0 * 8)] + + add x3, x2, #CPU_SYSREG_OFFSET(DBGBVR0_EL1) + + adr x26, 1f + add x26, x26, x24, lsl #2 + br x26 +1: + mrs x20, dbgbvr15_el1 + mrs x19, dbgbvr14_el1 + mrs x18, dbgbvr13_el1 + mrs x17, dbgbvr12_el1 + mrs x16, dbgbvr11_el1 + mrs x15, dbgbvr10_el1 + mrs x14, dbgbvr9_el1 + mrs x13, dbgbvr8_el1 + mrs x12, dbgbvr7_el1 + mrs x11, dbgbvr6_el1 + mrs x10, dbgbvr5_el1 + mrs x9, dbgbvr4_el1 + mrs x8, dbgbvr3_el1 + mrs x7, dbgbvr2_el1 + mrs x6, dbgbvr1_el1 + mrs x5, dbgbvr0_el1 + + adr x26, 1f + add x26, x26, x24, lsl #2 + br x26 + +1: + str x20, [x3, #(15 * 8)] + str x19, [x3, #(14 * 8)] + str x18, [x3, #(13 * 8)] + str x17, [x3, #(12 * 8)] + str x16, [x3, #(11 * 8)] + str x15, [x3, #(10 * 8)] + str x14, [x3, #(9 * 8)] + str x13, [x3, #(8 * 8)] + str x12, [x3, #(7 * 8)] + str x11, [x3, #(6 * 8)] + str x10, [x3, #(5 * 8)] + str x9, [x3, #(4 * 8)] + str x8, [x3, #(3 * 8)] + str x7, [x3, #(2 * 8)] + str x6, [x3, #(1 * 8)] + str x5, [x3, #(0 * 8)] + + add x3, x2, #CPU_SYSREG_OFFSET(DBGWCR0_EL1) + + adr x26, 1f + add x26, x26, x25, lsl #2 + br x26 +1: + mrs x20, dbgwcr15_el1 + mrs x19, dbgwcr14_el1 + mrs x18, dbgwcr13_el1 + mrs x17, dbgwcr12_el1 + mrs x16, dbgwcr11_el1 + mrs x15, dbgwcr10_el1 + mrs x14, dbgwcr9_el1 + mrs x13, dbgwcr8_el1 + mrs x12, dbgwcr7_el1 + mrs x11, dbgwcr6_el1 + mrs x10, dbgwcr5_el1 + mrs x9, dbgwcr4_el1 + mrs x8, dbgwcr3_el1 + mrs x7, dbgwcr2_el1 + mrs x6, dbgwcr1_el1 + mrs x5, dbgwcr0_el1 + + adr x26, 1f + add x26, x26, x25, lsl #2 + br x26 + +1: + str x20, [x3, #(15 * 8)] + str x19, [x3, #(14 * 8)] + str x18, [x3, #(13 * 8)] + str x17, [x3, #(12 * 8)] + str x16, [x3, #(11 * 8)] + str x15, [x3, #(10 * 8)] + str x14, [x3, #(9 * 8)] + str x13, [x3, #(8 * 8)] + str x12, [x3, #(7 * 8)] + str x11, [x3, #(6 * 8)] + str x10, [x3, #(5 * 8)] + str x9, [x3, #(4 * 8)] + str x8, [x3, #(3 * 8)] + str x7, [x3, #(2 * 8)] + str x6, [x3, #(1 * 8)] + str x5, [x3, #(0 * 8)] + + add x3, x2, #CPU_SYSREG_OFFSET(DBGWVR0_EL1) + + adr x26, 1f + add x26, x26, x25, lsl #2 + br x26 +1: + mrs x20, dbgwvr15_el1 + mrs x19, dbgwvr14_el1 + mrs x18, dbgwvr13_el1 + mrs x17, dbgwvr12_el1 + mrs x16, dbgwvr11_el1 + mrs x15, dbgwvr10_el1 + mrs x14, dbgwvr9_el1 + mrs x13, dbgwvr8_el1 + mrs x12, dbgwvr7_el1 + mrs x11, dbgwvr6_el1 + mrs x10, dbgwvr5_el1 + mrs x9, dbgwvr4_el1 + mrs x8, dbgwvr3_el1 + mrs x7, dbgwvr2_el1 + mrs x6, dbgwvr1_el1 + mrs x5, dbgwvr0_el1 + + adr x26, 1f + add x26, x26, x25, lsl #2 + br x26 + +1: + str x20, [x3, #(15 * 8)] + str x19, [x3, #(14 * 8)] + str x18, [x3, #(13 * 8)] + str x17, [x3, #(12 * 8)] + str x16, [x3, #(11 * 8)] + str x15, [x3, #(10 * 8)] + str x14, [x3, #(9 * 8)] + str x13, [x3, #(8 * 8)] + str x12, [x3, #(7 * 8)] + str x11, [x3, #(6 * 8)] + str x10, [x3, #(5 * 8)] + str x9, [x3, #(4 * 8)] + str x8, [x3, #(3 * 8)] + str x7, [x3, #(2 * 8)] + str x6, [x3, #(1 * 8)] + str x5, [x3, #(0 * 8)] + + mrs x21, mdccint_el1 + str x21, [x2, #CPU_SYSREG_OFFSET(MDCCINT_EL1)] .endm .macro restore_sysregs @@ -241,7 +438,7 @@ ldp x18, x19, [x3, #112] ldp x20, x21, [x3, #128] ldp x22, x23, [x3, #144] - ldr x24, [x3, #160] + ldp x24, x25, [x3, #160] msr vmpidr_el2, x4 msr csselr_el1, x5 @@ -264,6 +461,198 @@ msr amair_el1, x22 msr cntkctl_el1, x23 msr par_el1, x24 + msr mdscr_el1, x25 +.endm + +.macro restore_debug + // x2: base address for cpu context + // x3: tmp register + + mrs x26, id_aa64dfr0_el1 + ubfx x24, x26, #12, #4 // Extract BRPs + ubfx x25, x26, #20, #4 // Extract WRPs + mov w26, #15 + sub w24, w26, w24 // How many BPs to skip + sub w25, w26, w25 // How many WPs to skip + + add x3, x2, #CPU_SYSREG_OFFSET(DBGBCR0_EL1) + + adr x26, 1f + add x26, x26, x24, lsl #2 + br x26 +1: + ldr x20, [x3, #(15 * 8)] + ldr x19, [x3, #(14 * 8)] + ldr x18, [x3, #(13 * 8)] + ldr x17, [x3, #(12 * 8)] + ldr x16, [x3, #(11 * 8)] + ldr x15, [x3, #(10 * 8)] + ldr x14, [x3, #(9 * 8)] + ldr x13, [x3, #(8 * 8)] + ldr x12, [x3, #(7 * 8)] + ldr x11, [x3, #(6 * 8)] + ldr x10, [x3, #(5 * 8)] + ldr x9, [x3, #(4 * 8)] + ldr x8, [x3, #(3 * 8)] + ldr x7, [x3, #(2 * 8)] + ldr x6, [x3, #(1 * 8)] + ldr x5, [x3, #(0 * 8)] + + adr x26, 1f + add x26, x26, x24, lsl #2 + br x26 +1: + msr dbgbcr15_el1, x20 + msr dbgbcr14_el1, x19 + msr dbgbcr13_el1, x18 + msr dbgbcr12_el1, x17 + msr dbgbcr11_el1, x16 + msr dbgbcr10_el1, x15 + msr dbgbcr9_el1, x14 + msr dbgbcr8_el1, x13 + msr dbgbcr7_el1, x12 + msr dbgbcr6_el1, x11 + msr dbgbcr5_el1, x10 + msr dbgbcr4_el1, x9 + msr dbgbcr3_el1, x8 + msr dbgbcr2_el1, x7 + msr dbgbcr1_el1, x6 + msr dbgbcr0_el1, x5 + + add x3, x2, #CPU_SYSREG_OFFSET(DBGBVR0_EL1) + + adr x26, 1f + add x26, x26, x24, lsl #2 + br x26 +1: + ldr x20, [x3, #(15 * 8)] + ldr x19, [x3, #(14 * 8)] + ldr x18, [x3, #(13 * 8)] + ldr x17, [x3, #(12 * 8)] + ldr x16, [x3, #(11 * 8)] + ldr x15, [x3, #(10 * 8)] + ldr x14, [x3, #(9 * 8)] + ldr x13, [x3, #(8 * 8)] + ldr x12, [x3, #(7 * 8)] + ldr x11, [x3, #(6 * 8)] + ldr x10, [x3, #(5 * 8)] + ldr x9, [x3, #(4 * 8)] + ldr x8, [x3, #(3 * 8)] + ldr x7, [x3, #(2 * 8)] + ldr x6, [x3, #(1 * 8)] + ldr x5, [x3, #(0 * 8)] + + adr x26, 1f + add x26, x26, x24, lsl #2 + br x26 +1: + msr dbgbvr15_el1, x20 + msr dbgbvr14_el1, x19 + msr dbgbvr13_el1, x18 + msr dbgbvr12_el1, x17 + msr dbgbvr11_el1, x16 + msr dbgbvr10_el1, x15 + msr dbgbvr9_el1, x14 + msr dbgbvr8_el1, x13 + msr dbgbvr7_el1, x12 + msr dbgbvr6_el1, x11 + msr dbgbvr5_el1, x10 + msr dbgbvr4_el1, x9 + msr dbgbvr3_el1, x8 + msr dbgbvr2_el1, x7 + msr dbgbvr1_el1, x6 + msr dbgbvr0_el1, x5 + + add x3, x2, #CPU_SYSREG_OFFSET(DBGWCR0_EL1) + + adr x26, 1f + add x26, x26, x25, lsl #2 + br x26 +1: + ldr x20, [x3, #(15 * 8)] + ldr x19, [x3, #(14 * 8)] + ldr x18, [x3, #(13 * 8)] + ldr x17, [x3, #(12 * 8)] + ldr x16, [x3, #(11 * 8)] + ldr x15, [x3, #(10 * 8)] + ldr x14, [x3, #(9 * 8)] + ldr x13, [x3, #(8 * 8)] + ldr x12, [x3, #(7 * 8)] + ldr x11, [x3, #(6 * 8)] + ldr x10, [x3, #(5 * 8)] + ldr x9, [x3, #(4 * 8)] + ldr x8, [x3, #(3 * 8)] + ldr x7, [x3, #(2 * 8)] + ldr x6, [x3, #(1 * 8)] + ldr x5, [x3, #(0 * 8)] + + adr x26, 1f + add x26, x26, x25, lsl #2 + br x26 +1: + msr dbgwcr15_el1, x20 + msr dbgwcr14_el1, x19 + msr dbgwcr13_el1, x18 + msr dbgwcr12_el1, x17 + msr dbgwcr11_el1, x16 + msr dbgwcr10_el1, x15 + msr dbgwcr9_el1, x14 + msr dbgwcr8_el1, x13 + msr dbgwcr7_el1, x12 + msr dbgwcr6_el1, x11 + msr dbgwcr5_el1, x10 + msr dbgwcr4_el1, x9 + msr dbgwcr3_el1, x8 + msr dbgwcr2_el1, x7 + msr dbgwcr1_el1, x6 + msr dbgwcr0_el1, x5 + + add x3, x2, #CPU_SYSREG_OFFSET(DBGWVR0_EL1) + + adr x26, 1f + add x26, x26, x25, lsl #2 + br x26 +1: + ldr x20, [x3, #(15 * 8)] + ldr x19, [x3, #(14 * 8)] + ldr x18, [x3, #(13 * 8)] + ldr x17, [x3, #(12 * 8)] + ldr x16, [x3, #(11 * 8)] + ldr x15, [x3, #(10 * 8)] + ldr x14, [x3, #(9 * 8)] + ldr x13, [x3, #(8 * 8)] + ldr x12, [x3, #(7 * 8)] + ldr x11, [x3, #(6 * 8)] + ldr x10, [x3, #(5 * 8)] + ldr x9, [x3, #(4 * 8)] + ldr x8, [x3, #(3 * 8)] + ldr x7, [x3, #(2 * 8)] + ldr x6, [x3, #(1 * 8)] + ldr x5, [x3, #(0 * 8)] + + adr x26, 1f + add x26, x26, x25, lsl #2 + br x26 +1: + msr dbgwvr15_el1, x20 + msr dbgwvr14_el1, x19 + msr dbgwvr13_el1, x18 + msr dbgwvr12_el1, x17 + msr dbgwvr11_el1, x16 + msr dbgwvr10_el1, x15 + msr dbgwvr9_el1, x14 + msr dbgwvr8_el1, x13 + msr dbgwvr7_el1, x12 + msr dbgwvr6_el1, x11 + msr dbgwvr5_el1, x10 + msr dbgwvr4_el1, x9 + msr dbgwvr3_el1, x8 + msr dbgwvr2_el1, x7 + msr dbgwvr1_el1, x6 + msr dbgwvr0_el1, x5 + + ldr x21, [x2, #CPU_SYSREG_OFFSET(MDCCINT_EL1)] + msr mdccint_el1, x21 .endm .macro skip_32bit_state tmp, target @@ -278,6 +667,35 @@ tbz \tmp, #12, \target .endm +.macro skip_debug_state tmp, target + ldr \tmp, [x0, #VCPU_DEBUG_FLAGS] + tbz \tmp, #KVM_ARM64_DEBUG_DIRTY_SHIFT, \target +.endm + +.macro compute_debug_state target + // Compute debug state: If any of KDE, MDE or KVM_ARM64_DEBUG_DIRTY + // is set, we do a full save/restore cycle and disable trapping. + add x25, x0, #VCPU_CONTEXT + + // Check the state of MDSCR_EL1 + ldr x25, [x25, #CPU_SYSREG_OFFSET(MDSCR_EL1)] + and x26, x25, #DBG_MDSCR_KDE + and x25, x25, #DBG_MDSCR_MDE + adds xzr, x25, x26 + b.eq 9998f // Nothing to see there + + // If any interesting bits was set, we must set the flag + mov x26, #KVM_ARM64_DEBUG_DIRTY + str x26, [x0, #VCPU_DEBUG_FLAGS] + b 9999f // Don't skip restore + +9998: + // Otherwise load the flags from memory in case we recently + // trapped + skip_debug_state x25, \target +9999: +.endm + .macro save_guest_32bit_state skip_32bit_state x3, 1f @@ -293,10 +711,13 @@ mrs x4, dacr32_el2 mrs x5, ifsr32_el2 mrs x6, fpexc32_el2 - mrs x7, dbgvcr32_el2 stp x4, x5, [x3] - stp x6, x7, [x3, #16] + str x6, [x3, #16] + skip_debug_state x8, 2f + mrs x7, dbgvcr32_el2 + str x7, [x3, #24] +2: skip_tee_state x8, 1f add x3, x2, #CPU_SYSREG_OFFSET(TEECR32_EL1) @@ -319,12 +740,15 @@ add x3, x2, #CPU_SYSREG_OFFSET(DACR32_EL2) ldp x4, x5, [x3] - ldp x6, x7, [x3, #16] + ldr x6, [x3, #16] msr dacr32_el2, x4 msr ifsr32_el2, x5 msr fpexc32_el2, x6 - msr dbgvcr32_el2, x7 + skip_debug_state x8, 2f + ldr x7, [x3, #24] + msr dbgvcr32_el2, x7 +2: skip_tee_state x8, 1f add x3, x2, #CPU_SYSREG_OFFSET(TEECR32_EL1) @@ -463,6 +887,14 @@ __restore_sysregs: restore_sysregs ret +__save_debug: + save_debug + ret + +__restore_debug: + restore_debug + ret + __save_fpsimd: save_fpsimd ret @@ -494,6 +926,9 @@ ENTRY(__kvm_vcpu_run) bl __save_fpsimd bl __save_sysregs + compute_debug_state 1f + bl __save_debug +1: activate_traps activate_vm @@ -505,6 +940,10 @@ ENTRY(__kvm_vcpu_run) bl __restore_sysregs bl __restore_fpsimd + + skip_debug_state x3, 1f + bl __restore_debug +1: restore_guest_32bit_state restore_guest_regs @@ -521,6 +960,10 @@ __kvm_vcpu_return: save_guest_regs bl __save_fpsimd bl __save_sysregs + + skip_debug_state x3, 1f + bl __save_debug +1: save_guest_32bit_state save_timer_state @@ -535,6 +978,14 @@ __kvm_vcpu_return: bl __restore_sysregs bl __restore_fpsimd + + skip_debug_state x3, 1f + // Clear the dirty flag for the next run, as all the state has + // already been saved. Note that we nuke the whole 64bit word. + // If we ever add more flags, we'll have to be more careful... + str xzr, [x0, #VCPU_DEBUG_FLAGS] + bl __restore_debug +1: restore_host_regs mov x0, x1 From 99999d6ea6ff7b0b02a4ac6abef0d49ec1fd9fcb Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 24 Apr 2014 10:32:03 +0100 Subject: [PATCH 1305/1368] arm64: KVM: enable trapping of all debug registers Enable trapping of the debug registers, preventing the guests to mess with the host state (and allowing guests to use the debug infrastructure as well). Reviewed-by: Anup Patel Reviewed-by: Christoffer Dall Signed-off-by: Marc Zyngier (cherry picked from commit d329de09333aeee127aaf22eb7cee9c2dc4cf475) Signed-off-by: Christoffer Dall --- arch/arm64/kvm/hyp.S | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/arch/arm64/kvm/hyp.S b/arch/arm64/kvm/hyp.S index 100494b5c7d4..b72aa9f9215c 100644 --- a/arch/arm64/kvm/hyp.S +++ b/arch/arm64/kvm/hyp.S @@ -770,6 +770,14 @@ mrs x2, mdcr_el2 and x2, x2, #MDCR_EL2_HPMN_MASK orr x2, x2, #(MDCR_EL2_TPM | MDCR_EL2_TPMCR) + orr x2, x2, #(MDCR_EL2_TDRA | MDCR_EL2_TDOSA) + + // Check for KVM_ARM64_DEBUG_DIRTY, and set debug to trap + // if not dirty. + ldr x3, [x0, #VCPU_DEBUG_FLAGS] + tbnz x3, #KVM_ARM64_DEBUG_DIRTY_SHIFT, 1f + orr x2, x2, #MDCR_EL2_TDA +1: msr mdcr_el2, x2 .endm From 38ca7b90842e554439e2291803cdb0eb850b4807 Mon Sep 17 00:00:00 2001 From: Russell King Date: Tue, 24 Jun 2014 19:43:15 +0100 Subject: [PATCH 1306/1368] ARM: make it easier to check the CPU part number correctly Ensure that platform maintainers check the CPU part number in the right manner: the CPU part number is meaningless without also checking the CPU implement(e|o)r (choose your preferred spelling!) Provide an interface which returns both the implementer and part number together, and update the definitions to include the implementer. Mark the old function as being deprecated... indeed, using the old function with the definitions will now always evaluate as false, so people must update their un-merged code to the new function. While this could be avoided by adding new definitions, we'd also have to create new names for them which would be awkward. Acked-by: Nicolas Pitre Signed-off-by: Russell King (cherry picked from commit af040ffc9ba1e079ee4c0748aff64fa3d4716fa5) Signed-off-by: Christoffer Dall --- arch/arm/include/asm/cputype.h | 35 +++++++++++------ arch/arm/include/asm/smp_scu.h | 2 +- arch/arm/kernel/perf_event_cpu.c | 64 ++++++++++++++------------------ arch/arm/kvm/guest.c | 8 +--- 4 files changed, 53 insertions(+), 56 deletions(-) diff --git a/arch/arm/include/asm/cputype.h b/arch/arm/include/asm/cputype.h index dba62cb1ad08..3392fe2d3174 100644 --- a/arch/arm/include/asm/cputype.h +++ b/arch/arm/include/asm/cputype.h @@ -43,15 +43,18 @@ #define ARM_CPU_IMP_ARM 0x41 #define ARM_CPU_IMP_INTEL 0x69 -#define ARM_CPU_PART_ARM1136 0xB360 -#define ARM_CPU_PART_ARM1156 0xB560 -#define ARM_CPU_PART_ARM1176 0xB760 -#define ARM_CPU_PART_ARM11MPCORE 0xB020 -#define ARM_CPU_PART_CORTEX_A8 0xC080 -#define ARM_CPU_PART_CORTEX_A9 0xC090 -#define ARM_CPU_PART_CORTEX_A5 0xC050 -#define ARM_CPU_PART_CORTEX_A15 0xC0F0 -#define ARM_CPU_PART_CORTEX_A7 0xC070 +/* ARM implemented processors */ +#define ARM_CPU_PART_ARM1136 0x4100b360 +#define ARM_CPU_PART_ARM1156 0x4100b560 +#define ARM_CPU_PART_ARM1176 0x4100b760 +#define ARM_CPU_PART_ARM11MPCORE 0x4100b020 +#define ARM_CPU_PART_CORTEX_A8 0x4100c080 +#define ARM_CPU_PART_CORTEX_A9 0x4100c090 +#define ARM_CPU_PART_CORTEX_A5 0x4100c050 +#define ARM_CPU_PART_CORTEX_A7 0x4100c070 +#define ARM_CPU_PART_CORTEX_A12 0x4100c0d0 +#define ARM_CPU_PART_CORTEX_A17 0x4100c0e0 +#define ARM_CPU_PART_CORTEX_A15 0x4100c0f0 #define ARM_CPU_XSCALE_ARCH_MASK 0xe000 #define ARM_CPU_XSCALE_ARCH_V1 0x2000 @@ -122,14 +125,24 @@ static inline unsigned int __attribute_const__ read_cpuid_implementor(void) return (read_cpuid_id() & 0xFF000000) >> 24; } -static inline unsigned int __attribute_const__ read_cpuid_part_number(void) +/* + * The CPU part number is meaningless without referring to the CPU + * implementer: implementers are free to define their own part numbers + * which are permitted to clash with other implementer part numbers. + */ +static inline unsigned int __attribute_const__ read_cpuid_part(void) +{ + return read_cpuid_id() & 0xff00fff0; +} + +static inline unsigned int __attribute_const__ __deprecated read_cpuid_part_number(void) { return read_cpuid_id() & 0xFFF0; } static inline unsigned int __attribute_const__ xscale_cpu_arch_version(void) { - return read_cpuid_part_number() & ARM_CPU_XSCALE_ARCH_MASK; + return read_cpuid_id() & ARM_CPU_XSCALE_ARCH_MASK; } static inline unsigned int __attribute_const__ read_cpuid_cachetype(void) diff --git a/arch/arm/include/asm/smp_scu.h b/arch/arm/include/asm/smp_scu.h index 18d169373612..1a292d8be988 100644 --- a/arch/arm/include/asm/smp_scu.h +++ b/arch/arm/include/asm/smp_scu.h @@ -11,7 +11,7 @@ static inline bool scu_a9_has_base(void) { - return read_cpuid_part_number() == ARM_CPU_PART_CORTEX_A9; + return read_cpuid_part() == ARM_CPU_PART_CORTEX_A9; } static inline unsigned long scu_a9_get_base(void) diff --git a/arch/arm/kernel/perf_event_cpu.c b/arch/arm/kernel/perf_event_cpu.c index 1f2740e3dbc0..0e9609657c79 100644 --- a/arch/arm/kernel/perf_event_cpu.c +++ b/arch/arm/kernel/perf_event_cpu.c @@ -201,49 +201,39 @@ static struct platform_device_id cpu_pmu_plat_device_ids[] = { static int probe_current_pmu(struct arm_pmu *pmu) { int cpu = get_cpu(); - unsigned long implementor = read_cpuid_implementor(); - unsigned long part_number = read_cpuid_part_number(); int ret = -ENODEV; pr_info("probing PMU on CPU %d\n", cpu); + switch (read_cpuid_part()) { /* ARM Ltd CPUs. */ - if (implementor == ARM_CPU_IMP_ARM) { - switch (part_number) { - case ARM_CPU_PART_ARM1136: - case ARM_CPU_PART_ARM1156: - case ARM_CPU_PART_ARM1176: - ret = armv6pmu_init(pmu); - break; - case ARM_CPU_PART_ARM11MPCORE: - ret = armv6mpcore_pmu_init(pmu); - break; - case ARM_CPU_PART_CORTEX_A8: - ret = armv7_a8_pmu_init(pmu); - break; - case ARM_CPU_PART_CORTEX_A9: - ret = armv7_a9_pmu_init(pmu); - break; - case ARM_CPU_PART_CORTEX_A5: - ret = armv7_a5_pmu_init(pmu); - break; - case ARM_CPU_PART_CORTEX_A15: - ret = armv7_a15_pmu_init(pmu); - break; - case ARM_CPU_PART_CORTEX_A7: - ret = armv7_a7_pmu_init(pmu); - break; - } - /* Intel CPUs [xscale]. */ - } else if (implementor == ARM_CPU_IMP_INTEL) { - switch (xscale_cpu_arch_version()) { - case ARM_CPU_XSCALE_ARCH_V1: - ret = xscale1pmu_init(pmu); - break; - case ARM_CPU_XSCALE_ARCH_V2: - ret = xscale2pmu_init(pmu); - break; + case ARM_CPU_PART_ARM1136: + case ARM_CPU_PART_ARM1156: + case ARM_CPU_PART_ARM1176: + ret = armv6pmu_init(pmu); + break; + case ARM_CPU_PART_ARM11MPCORE: + ret = armv6mpcore_pmu_init(pmu); + break; + case ARM_CPU_PART_CORTEX_A8: + ret = armv7_a8_pmu_init(pmu); + break; + case ARM_CPU_PART_CORTEX_A9: + ret = armv7_a9_pmu_init(pmu); + break; + + default: + if (read_cpuid_implementor() == ARM_CPU_IMP_INTEL) { + switch (xscale_cpu_arch_version()) { + case ARM_CPU_XSCALE_ARCH_V1: + ret = xscale1pmu_init(pmu); + break; + case ARM_CPU_XSCALE_ARCH_V2: + ret = xscale2pmu_init(pmu); + break; + } } + break; } put_cpu(); diff --git a/arch/arm/kvm/guest.c b/arch/arm/kvm/guest.c index 986e625b5dbd..813e49258690 100644 --- a/arch/arm/kvm/guest.c +++ b/arch/arm/kvm/guest.c @@ -264,13 +264,7 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, int __attribute_const__ kvm_target_cpu(void) { - unsigned long implementor = read_cpuid_implementor(); - unsigned long part_number = read_cpuid_part_number(); - - if (implementor != ARM_CPU_IMP_ARM) - return -EINVAL; - - switch (part_number) { + switch (read_cpuid_part()) { case ARM_CPU_PART_CORTEX_A7: return KVM_ARM_TARGET_CORTEX_A7; case ARM_CPU_PART_CORTEX_A15: From 2d05a876aea8c40db12b9b721be21773aad6057b Mon Sep 17 00:00:00 2001 From: Mark Rustad Date: Fri, 25 Jul 2014 06:27:03 -0700 Subject: [PATCH 1307/1368] kvm: Resolve missing-field-initializers warnings Resolve missing-field-initializers warnings seen in W=2 kernel builds by having macros generate more elaborated initializers. That is enough to silence the warnings. Signed-off-by: Mark Rustad Signed-off-by: Jeff Kirsher Signed-off-by: Paolo Bonzini (cherry picked from commit 25f97ff451a4aab534afc1290af97d23ea0b4fb3) Signed-off-by: Christoffer Dall --- virt/kvm/irq_comm.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c index ced4a542a031..a228ee82bad2 100644 --- a/virt/kvm/irq_comm.c +++ b/virt/kvm/irq_comm.c @@ -323,13 +323,13 @@ int kvm_set_routing_entry(struct kvm_irq_routing_table *rt, #define IOAPIC_ROUTING_ENTRY(irq) \ { .gsi = irq, .type = KVM_IRQ_ROUTING_IRQCHIP, \ - .u.irqchip.irqchip = KVM_IRQCHIP_IOAPIC, .u.irqchip.pin = (irq) } + .u.irqchip = { .irqchip = KVM_IRQCHIP_IOAPIC, .pin = (irq) } } #define ROUTING_ENTRY1(irq) IOAPIC_ROUTING_ENTRY(irq) #ifdef CONFIG_X86 # define PIC_ROUTING_ENTRY(irq) \ { .gsi = irq, .type = KVM_IRQ_ROUTING_IRQCHIP, \ - .u.irqchip.irqchip = SELECT_PIC(irq), .u.irqchip.pin = (irq) % 8 } + .u.irqchip = { .irqchip = SELECT_PIC(irq), .pin = (irq) % 8 } } # define ROUTING_ENTRY2(irq) \ IOAPIC_ROUTING_ENTRY(irq), PIC_ROUTING_ENTRY(irq) #else From 7b6de249336851f2fb8515b2f428bc9e56a8dbb1 Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Mon, 14 Jul 2014 18:27:35 +0200 Subject: [PATCH 1308/1368] KVM: Rename and add argument to check_extension In preparation to make the check_extension function available to VM scope we add a struct kvm * argument to the function header and rename the function accordingly. It will still be called from the /dev/kvm fd, but with a NULL argument for struct kvm *. Signed-off-by: Alexander Graf Acked-by: Paolo Bonzini (cherry picked from commit 784aa3d7fb6f729c06d5836c9d9569f58e4d05ae) Signed-off-by: Christoffer Dall --- arch/arm/kvm/arm.c | 2 +- arch/ia64/kvm/kvm-ia64.c | 2 +- arch/powerpc/kvm/powerpc.c | 2 +- arch/s390/kvm/kvm-s390.c | 2 +- arch/x86/kvm/x86.c | 2 +- include/linux/kvm_host.h | 2 +- virt/kvm/kvm_main.c | 6 +++--- 7 files changed, 9 insertions(+), 9 deletions(-) diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c index e2c2bfd4da95..0e3d3dc8eea2 100644 --- a/arch/arm/kvm/arm.c +++ b/arch/arm/kvm/arm.c @@ -174,7 +174,7 @@ void kvm_arch_destroy_vm(struct kvm *kvm) } } -int kvm_dev_ioctl_check_extension(long ext) +int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) { int r; switch (ext) { diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c index 53f44bee9ebb..b48aa69e35e1 100644 --- a/arch/ia64/kvm/kvm-ia64.c +++ b/arch/ia64/kvm/kvm-ia64.c @@ -190,7 +190,7 @@ void kvm_arch_check_processor_compat(void *rtn) *(int *)rtn = 0; } -int kvm_dev_ioctl_check_extension(long ext) +int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) { int r; diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 750835a4ef70..aacf3e35404f 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -300,7 +300,7 @@ void kvm_arch_sync_events(struct kvm *kvm) { } -int kvm_dev_ioctl_check_extension(long ext) +int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) { int r; diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 54612d0e79dd..9133f869b070 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -127,7 +127,7 @@ long kvm_arch_dev_ioctl(struct file *filp, return -EINVAL; } -int kvm_dev_ioctl_check_extension(long ext) +int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) { int r; diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 805c8e92cf66..46a8c74fd431 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -2500,7 +2500,7 @@ static int msr_io(struct kvm_vcpu *vcpu, struct kvm_msrs __user *user_msrs, return r; } -int kvm_dev_ioctl_check_extension(long ext) +int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) { int r; diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index a3c83491a791..1fb1bd4593a5 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -578,7 +578,7 @@ long kvm_arch_vcpu_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg); int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf); -int kvm_dev_ioctl_check_extension(long ext); +int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext); int kvm_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log, int *is_dirty); diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 3db56912caed..e3315356b69a 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -2564,7 +2564,7 @@ static int kvm_dev_ioctl_create_vm(unsigned long type) return r; } -static long kvm_dev_ioctl_check_extension_generic(long arg) +static long kvm_vm_ioctl_check_extension_generic(struct kvm *kvm, long arg) { switch (arg) { case KVM_CAP_USER_MEMORY: @@ -2588,7 +2588,7 @@ static long kvm_dev_ioctl_check_extension_generic(long arg) default: break; } - return kvm_dev_ioctl_check_extension(arg); + return kvm_vm_ioctl_check_extension(kvm, arg); } static long kvm_dev_ioctl(struct file *filp, @@ -2607,7 +2607,7 @@ static long kvm_dev_ioctl(struct file *filp, r = kvm_dev_ioctl_create_vm(arg); break; case KVM_CHECK_EXTENSION: - r = kvm_dev_ioctl_check_extension_generic(arg); + r = kvm_vm_ioctl_check_extension_generic(NULL, arg); break; case KVM_GET_VCPU_MMAP_SIZE: r = -EINVAL; From c509fab667c6a02b0763a4343c73d134c6a09328 Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Mon, 14 Jul 2014 18:33:08 +0200 Subject: [PATCH 1309/1368] KVM: Allow KVM_CHECK_EXTENSION on the vm fd The KVM_CHECK_EXTENSION is only available on the kvm fd today. Unfortunately on PPC some of the capabilities change depending on the way a VM was created. So instead we need a way to expose capabilities as VM ioctl, so that we can see which VM type we're using (HV or PR). To enable this, add the KVM_CHECK_EXTENSION ioctl to our vm ioctl portfolio. Signed-off-by: Alexander Graf Acked-by: Paolo Bonzini (cherry picked from commit 92b591a4c46b103ebd3fc0d03a084e1efd331253) Signed-off-by: Christoffer Dall --- Documentation/virtual/kvm/api.txt | 7 ++-- include/uapi/linux/kvm.h | 1 + virt/kvm/kvm_main.c | 58 +++++++++++++++++-------------- 3 files changed, 37 insertions(+), 29 deletions(-) diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt index 8d135672b69a..257a1f1eecc7 100644 --- a/Documentation/virtual/kvm/api.txt +++ b/Documentation/virtual/kvm/api.txt @@ -148,9 +148,9 @@ of banks, as set via the KVM_X86_SETUP_MCE ioctl. 4.4 KVM_CHECK_EXTENSION -Capability: basic +Capability: basic, KVM_CAP_CHECK_EXTENSION_VM for vm ioctl Architectures: all -Type: system ioctl +Type: system ioctl, vm ioctl Parameters: extension identifier (KVM_CAP_*) Returns: 0 if unsupported; 1 (or some other positive integer) if supported @@ -160,6 +160,9 @@ receives an integer that describes the extension availability. Generally 0 means no and 1 means yes, but some extensions may report additional information in the integer return value. +Based on their initialization different VMs may have different capabilities. +It is thus encouraged to use the vm ioctl to query for capabilities (available +with KVM_CAP_CHECK_EXTENSION_VM on the vm fd) 4.5 KVM_GET_VCPU_MMAP_SIZE diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 521e4c0a08ac..1af686a82703 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -679,6 +679,7 @@ struct kvm_ppc_smmu_info { #define KVM_CAP_ARM_EL1_32BIT 93 #define KVM_CAP_EXT_EMUL_CPUID 95 #define KVM_CAP_ARM_PSCI_0_2 102 +#define KVM_CAP_CHECK_EXTENSION_VM 105 #ifdef KVM_CAP_IRQ_ROUTING diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index e3315356b69a..1edb15da7acf 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -2317,6 +2317,34 @@ static int kvm_ioctl_create_device(struct kvm *kvm, return 0; } +static long kvm_vm_ioctl_check_extension_generic(struct kvm *kvm, long arg) +{ + switch (arg) { + case KVM_CAP_USER_MEMORY: + case KVM_CAP_DESTROY_MEMORY_REGION_WORKS: + case KVM_CAP_JOIN_MEMORY_REGIONS_WORKS: +#ifdef CONFIG_KVM_APIC_ARCHITECTURE + case KVM_CAP_SET_BOOT_CPU_ID: +#endif + case KVM_CAP_INTERNAL_ERROR_DATA: +#ifdef CONFIG_HAVE_KVM_MSI + case KVM_CAP_SIGNAL_MSI: +#endif +#ifdef CONFIG_HAVE_KVM_IRQ_ROUTING + case KVM_CAP_IRQFD_RESAMPLE: +#endif + case KVM_CAP_CHECK_EXTENSION_VM: + return 1; +#ifdef CONFIG_HAVE_KVM_IRQ_ROUTING + case KVM_CAP_IRQ_ROUTING: + return KVM_MAX_IRQ_ROUTES; +#endif + default: + break; + } + return kvm_vm_ioctl_check_extension(kvm, arg); +} + static long kvm_vm_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg) { @@ -2480,6 +2508,9 @@ static long kvm_vm_ioctl(struct file *filp, r = 0; break; } + case KVM_CHECK_EXTENSION: + r = kvm_vm_ioctl_check_extension_generic(kvm, arg); + break; default: r = kvm_arch_vm_ioctl(filp, ioctl, arg); if (r == -ENOTTY) @@ -2564,33 +2595,6 @@ static int kvm_dev_ioctl_create_vm(unsigned long type) return r; } -static long kvm_vm_ioctl_check_extension_generic(struct kvm *kvm, long arg) -{ - switch (arg) { - case KVM_CAP_USER_MEMORY: - case KVM_CAP_DESTROY_MEMORY_REGION_WORKS: - case KVM_CAP_JOIN_MEMORY_REGIONS_WORKS: -#ifdef CONFIG_KVM_APIC_ARCHITECTURE - case KVM_CAP_SET_BOOT_CPU_ID: -#endif - case KVM_CAP_INTERNAL_ERROR_DATA: -#ifdef CONFIG_HAVE_KVM_MSI - case KVM_CAP_SIGNAL_MSI: -#endif -#ifdef CONFIG_HAVE_KVM_IRQ_ROUTING - case KVM_CAP_IRQFD_RESAMPLE: -#endif - return 1; -#ifdef CONFIG_HAVE_KVM_IRQ_ROUTING - case KVM_CAP_IRQ_ROUTING: - return KVM_MAX_IRQ_ROUTES; -#endif - default: - break; - } - return kvm_vm_ioctl_check_extension(kvm, arg); -} - static long kvm_dev_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg) { From edff6ae9e04008800e518454cf7fa0efe3023252 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Fri, 25 Jul 2014 16:29:12 +0100 Subject: [PATCH 1310/1368] kvm: arm64: vgic: fix hyp panic with 64k pages on juno platform If the physical address of GICV isn't page-aligned, then we end up creating a stage-2 mapping of the page containing it, which causes us to map neighbouring memory locations directly into the guest. As an example, consider a platform with GICV at physical 0x2c02f000 running a 64k-page host kernel. If qemu maps this into the guest at 0x80010000, then guest physical addresses 0x80010000 - 0x8001efff will map host physical region 0x2c020000 - 0x2c02efff. Accesses to these physical regions may cause UNPREDICTABLE behaviour, for example, on the Juno platform this will cause an SError exception to EL3, which brings down the entire physical CPU resulting in RCU stalls / HYP panics / host crashing / wasted weeks of debugging. SBSA recommends that systems alias the 4k GICV across the bounding 64k region, in which case GICV physical could be described as 0x2c020000 in the above scenario. This patch fixes the problem by failing the vgic probe if the physical base address or the size of GICV aren't page-aligned. Note that this generated a warning in dmesg about freeing enabled IRQs, so I had to move the IRQ enabling later in the probe. Cc: Christoffer Dall Cc: Marc Zyngier Cc: Gleb Natapov Cc: Paolo Bonzini Cc: Joel Schopp Cc: Don Dutile Acked-by: Peter Maydell Acked-by: Joel Schopp Acked-by: Marc Zyngier Signed-off-by: Will Deacon Signed-off-by: Christoffer Dall (cherry picked from commit 63afbe7a0ac184ef8485dac4914e87b211b5bfaa) Signed-off-by: Christoffer Dall --- virt/kvm/arm/vgic-v2.c | 16 ++++++++++++++++ virt/kvm/arm/vgic.c | 4 ++-- 2 files changed, 18 insertions(+), 2 deletions(-) diff --git a/virt/kvm/arm/vgic-v2.c b/virt/kvm/arm/vgic-v2.c index d6c9c142f813..01124ef3690a 100644 --- a/virt/kvm/arm/vgic-v2.c +++ b/virt/kvm/arm/vgic-v2.c @@ -231,6 +231,22 @@ int vgic_v2_probe(struct device_node *vgic_node, ret = -ENXIO; goto out_unmap; } + + if (!PAGE_ALIGNED(vcpu_res.start)) { + kvm_err("GICV physical address 0x%llx not page aligned\n", + (unsigned long long)vcpu_res.start); + ret = -ENXIO; + goto out_unmap; + } + + if (!PAGE_ALIGNED(resource_size(&vcpu_res))) { + kvm_err("GICV size 0x%llx not a multiple of page size 0x%lx\n", + (unsigned long long)resource_size(&vcpu_res), + PAGE_SIZE); + ret = -ENXIO; + goto out_unmap; + } + vgic->vcpu_base = vcpu_res.start; kvm_info("%s@%llx IRQ%d\n", vgic_node->name, diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c index ede8f6466c95..73eba793b17f 100644 --- a/virt/kvm/arm/vgic.c +++ b/virt/kvm/arm/vgic.c @@ -1587,11 +1587,11 @@ int kvm_vgic_hyp_init(void) goto out_free_irq; } - on_each_cpu(vgic_init_maintenance_interrupt, NULL, 1); - /* Callback into for arch code for setup */ vgic_arch_setup(vgic); + on_each_cpu(vgic_init_maintenance_interrupt, NULL, 1); + return 0; out_free_irq: From 45104cfeec66c1314a2ae5e4cb436a9ace725e59 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 31 Jul 2014 14:16:39 +0100 Subject: [PATCH 1311/1368] arm64: KVM: GICv3: move system register access to msr_s/mrs_s Commit 72c583951526 (arm64: gicv3: Allow GICv3 compilation with older binutils) changed the way we express the GICv3 system registers, but couldn't change the occurences used by KVM as the code wasn't merged yet. Just fix the accessors. Cc: Will Deacon Cc: Catalin Marinas Cc: Christoffer Dall Acked-by: Will Deacon Signed-off-by: Marc Zyngier Signed-off-by: Christoffer Dall (cherry picked from commit f4c321eb268e932786c112e0b902ee942d91a336) Signed-off-by: Christoffer Dall --- arch/arm64/kvm/vgic-v3-switch.S | 130 ++++++++++++++++---------------- 1 file changed, 65 insertions(+), 65 deletions(-) diff --git a/arch/arm64/kvm/vgic-v3-switch.S b/arch/arm64/kvm/vgic-v3-switch.S index 21e68f606a8f..d16046999e06 100644 --- a/arch/arm64/kvm/vgic-v3-switch.S +++ b/arch/arm64/kvm/vgic-v3-switch.S @@ -48,11 +48,11 @@ dsb st // Save all interesting registers - mrs x4, ICH_HCR_EL2 - mrs x5, ICH_VMCR_EL2 - mrs x6, ICH_MISR_EL2 - mrs x7, ICH_EISR_EL2 - mrs x8, ICH_ELSR_EL2 + mrs_s x4, ICH_HCR_EL2 + mrs_s x5, ICH_VMCR_EL2 + mrs_s x6, ICH_MISR_EL2 + mrs_s x7, ICH_EISR_EL2 + mrs_s x8, ICH_ELSR_EL2 str w4, [x3, #VGIC_V3_CPU_HCR] str w5, [x3, #VGIC_V3_CPU_VMCR] @@ -60,9 +60,9 @@ str w7, [x3, #VGIC_V3_CPU_EISR] str w8, [x3, #VGIC_V3_CPU_ELRSR] - msr ICH_HCR_EL2, xzr + msr_s ICH_HCR_EL2, xzr - mrs x21, ICH_VTR_EL2 + mrs_s x21, ICH_VTR_EL2 mvn w22, w21 ubfiz w23, w22, 2, 4 // w23 = (15 - ListRegs) * 4 @@ -71,22 +71,22 @@ br x24 1: - mrs x20, ICH_LR15_EL2 - mrs x19, ICH_LR14_EL2 - mrs x18, ICH_LR13_EL2 - mrs x17, ICH_LR12_EL2 - mrs x16, ICH_LR11_EL2 - mrs x15, ICH_LR10_EL2 - mrs x14, ICH_LR9_EL2 - mrs x13, ICH_LR8_EL2 - mrs x12, ICH_LR7_EL2 - mrs x11, ICH_LR6_EL2 - mrs x10, ICH_LR5_EL2 - mrs x9, ICH_LR4_EL2 - mrs x8, ICH_LR3_EL2 - mrs x7, ICH_LR2_EL2 - mrs x6, ICH_LR1_EL2 - mrs x5, ICH_LR0_EL2 + mrs_s x20, ICH_LR15_EL2 + mrs_s x19, ICH_LR14_EL2 + mrs_s x18, ICH_LR13_EL2 + mrs_s x17, ICH_LR12_EL2 + mrs_s x16, ICH_LR11_EL2 + mrs_s x15, ICH_LR10_EL2 + mrs_s x14, ICH_LR9_EL2 + mrs_s x13, ICH_LR8_EL2 + mrs_s x12, ICH_LR7_EL2 + mrs_s x11, ICH_LR6_EL2 + mrs_s x10, ICH_LR5_EL2 + mrs_s x9, ICH_LR4_EL2 + mrs_s x8, ICH_LR3_EL2 + mrs_s x7, ICH_LR2_EL2 + mrs_s x6, ICH_LR1_EL2 + mrs_s x5, ICH_LR0_EL2 adr x24, 1f add x24, x24, x23 @@ -113,34 +113,34 @@ tbnz w21, #29, 6f // 6 bits tbz w21, #30, 5f // 5 bits // 7 bits - mrs x20, ICH_AP0R3_EL2 + mrs_s x20, ICH_AP0R3_EL2 str w20, [x3, #(VGIC_V3_CPU_AP0R + 3*4)] - mrs x19, ICH_AP0R2_EL2 + mrs_s x19, ICH_AP0R2_EL2 str w19, [x3, #(VGIC_V3_CPU_AP0R + 2*4)] -6: mrs x18, ICH_AP0R1_EL2 +6: mrs_s x18, ICH_AP0R1_EL2 str w18, [x3, #(VGIC_V3_CPU_AP0R + 1*4)] -5: mrs x17, ICH_AP0R0_EL2 +5: mrs_s x17, ICH_AP0R0_EL2 str w17, [x3, #VGIC_V3_CPU_AP0R] tbnz w21, #29, 6f // 6 bits tbz w21, #30, 5f // 5 bits // 7 bits - mrs x20, ICH_AP1R3_EL2 + mrs_s x20, ICH_AP1R3_EL2 str w20, [x3, #(VGIC_V3_CPU_AP1R + 3*4)] - mrs x19, ICH_AP1R2_EL2 + mrs_s x19, ICH_AP1R2_EL2 str w19, [x3, #(VGIC_V3_CPU_AP1R + 2*4)] -6: mrs x18, ICH_AP1R1_EL2 +6: mrs_s x18, ICH_AP1R1_EL2 str w18, [x3, #(VGIC_V3_CPU_AP1R + 1*4)] -5: mrs x17, ICH_AP1R0_EL2 +5: mrs_s x17, ICH_AP1R0_EL2 str w17, [x3, #VGIC_V3_CPU_AP1R] // Restore SRE_EL1 access and re-enable SRE at EL1. - mrs x5, ICC_SRE_EL2 + mrs_s x5, ICC_SRE_EL2 orr x5, x5, #ICC_SRE_EL2_ENABLE - msr ICC_SRE_EL2, x5 + msr_s ICC_SRE_EL2, x5 isb mov x5, #1 - msr ICC_SRE_EL1, x5 + msr_s ICC_SRE_EL1, x5 .endm /* @@ -150,7 +150,7 @@ .macro restore_vgic_v3_state // Disable SRE_EL1 access. Necessary, otherwise // ICH_VMCR_EL2.VFIQEn becomes one, and FIQ happens... - msr ICC_SRE_EL1, xzr + msr_s ICC_SRE_EL1, xzr isb // Compute the address of struct vgic_cpu @@ -160,34 +160,34 @@ ldr w4, [x3, #VGIC_V3_CPU_HCR] ldr w5, [x3, #VGIC_V3_CPU_VMCR] - msr ICH_HCR_EL2, x4 - msr ICH_VMCR_EL2, x5 + msr_s ICH_HCR_EL2, x4 + msr_s ICH_VMCR_EL2, x5 - mrs x21, ICH_VTR_EL2 + mrs_s x21, ICH_VTR_EL2 tbnz w21, #29, 6f // 6 bits tbz w21, #30, 5f // 5 bits // 7 bits ldr w20, [x3, #(VGIC_V3_CPU_AP1R + 3*4)] - msr ICH_AP1R3_EL2, x20 + msr_s ICH_AP1R3_EL2, x20 ldr w19, [x3, #(VGIC_V3_CPU_AP1R + 2*4)] - msr ICH_AP1R2_EL2, x19 + msr_s ICH_AP1R2_EL2, x19 6: ldr w18, [x3, #(VGIC_V3_CPU_AP1R + 1*4)] - msr ICH_AP1R1_EL2, x18 + msr_s ICH_AP1R1_EL2, x18 5: ldr w17, [x3, #VGIC_V3_CPU_AP1R] - msr ICH_AP1R0_EL2, x17 + msr_s ICH_AP1R0_EL2, x17 tbnz w21, #29, 6f // 6 bits tbz w21, #30, 5f // 5 bits // 7 bits ldr w20, [x3, #(VGIC_V3_CPU_AP0R + 3*4)] - msr ICH_AP0R3_EL2, x20 + msr_s ICH_AP0R3_EL2, x20 ldr w19, [x3, #(VGIC_V3_CPU_AP0R + 2*4)] - msr ICH_AP0R2_EL2, x19 + msr_s ICH_AP0R2_EL2, x19 6: ldr w18, [x3, #(VGIC_V3_CPU_AP0R + 1*4)] - msr ICH_AP0R1_EL2, x18 + msr_s ICH_AP0R1_EL2, x18 5: ldr w17, [x3, #VGIC_V3_CPU_AP0R] - msr ICH_AP0R0_EL2, x17 + msr_s ICH_AP0R0_EL2, x17 and w22, w21, #0xf mvn w22, w21 @@ -220,22 +220,22 @@ br x24 1: - msr ICH_LR15_EL2, x20 - msr ICH_LR14_EL2, x19 - msr ICH_LR13_EL2, x18 - msr ICH_LR12_EL2, x17 - msr ICH_LR11_EL2, x16 - msr ICH_LR10_EL2, x15 - msr ICH_LR9_EL2, x14 - msr ICH_LR8_EL2, x13 - msr ICH_LR7_EL2, x12 - msr ICH_LR6_EL2, x11 - msr ICH_LR5_EL2, x10 - msr ICH_LR4_EL2, x9 - msr ICH_LR3_EL2, x8 - msr ICH_LR2_EL2, x7 - msr ICH_LR1_EL2, x6 - msr ICH_LR0_EL2, x5 + msr_s ICH_LR15_EL2, x20 + msr_s ICH_LR14_EL2, x19 + msr_s ICH_LR13_EL2, x18 + msr_s ICH_LR12_EL2, x17 + msr_s ICH_LR11_EL2, x16 + msr_s ICH_LR10_EL2, x15 + msr_s ICH_LR9_EL2, x14 + msr_s ICH_LR8_EL2, x13 + msr_s ICH_LR7_EL2, x12 + msr_s ICH_LR6_EL2, x11 + msr_s ICH_LR5_EL2, x10 + msr_s ICH_LR4_EL2, x9 + msr_s ICH_LR3_EL2, x8 + msr_s ICH_LR2_EL2, x7 + msr_s ICH_LR1_EL2, x6 + msr_s ICH_LR0_EL2, x5 // Ensure that the above will have reached the // (re)distributors. This ensure the guest will read @@ -244,9 +244,9 @@ dsb sy // Prevent the guest from touching the GIC system registers - mrs x5, ICC_SRE_EL2 + mrs_s x5, ICC_SRE_EL2 and x5, x5, #~ICC_SRE_EL2_ENABLE - msr ICC_SRE_EL2, x5 + msr_s ICC_SRE_EL2, x5 .endm ENTRY(__save_vgic_v3_state) @@ -260,7 +260,7 @@ ENTRY(__restore_vgic_v3_state) ENDPROC(__restore_vgic_v3_state) ENTRY(__vgic_v3_get_ich_vtr_el2) - mrs x0, ICH_VTR_EL2 + mrs_s x0, ICH_VTR_EL2 ret ENDPROC(__vgic_v3_get_ich_vtr_el2) From 099fcd90255453a5976a42c5f9421a6a7ad7ce3d Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 31 Jul 2014 11:42:18 +0100 Subject: [PATCH 1312/1368] KVM: arm64: GICv3: mandate page-aligned GICV region Just like GICv2 was fixed in 63afbe7a0ac1 (kvm: arm64: vgic: fix hyp panic with 64k pages on juno platform), mandate the GICV region to be both aligned on a page boundary and its size to be a multiple of page size. This prevents a guest from being able to poke at regions where we have no idea what is sitting there. Signed-off-by: Marc Zyngier Signed-off-by: Christoffer Dall (cherry picked from commit fb3ec67942e92e5713e05b7691b277d0a0c0575d) Signed-off-by: Christoffer Dall --- virt/kvm/arm/vgic-v3.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/virt/kvm/arm/vgic-v3.c b/virt/kvm/arm/vgic-v3.c index f01d44685720..1c2c8eef0599 100644 --- a/virt/kvm/arm/vgic-v3.c +++ b/virt/kvm/arm/vgic-v3.c @@ -215,6 +215,22 @@ int vgic_v3_probe(struct device_node *vgic_node, ret = -ENXIO; goto out; } + + if (!PAGE_ALIGNED(vcpu_res.start)) { + kvm_err("GICV physical address 0x%llx not page aligned\n", + (unsigned long long)vcpu_res.start); + ret = -ENXIO; + goto out; + } + + if (!PAGE_ALIGNED(resource_size(&vcpu_res))) { + kvm_err("GICV size 0x%llx not a multiple of page size 0x%lx\n", + (unsigned long long)resource_size(&vcpu_res), + PAGE_SIZE); + ret = -ENXIO; + goto out; + } + vgic->vcpu_base = vcpu_res.start; vgic->vctrl_base = NULL; vgic->type = VGIC_V3; From dad44b58644918b98d32ed35f28e919e08b0f896 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Fri, 1 Aug 2014 12:00:36 +0100 Subject: [PATCH 1313/1368] arm64: KVM: fix 64bit CP15 VM access for 32bit guests Commit f0a3eaff71b8 (ARM64: KVM: fix big endian issue in access_vm_reg for 32bit guest) changed the way we handle CP15 VM accesses, so that all 64bit accesses are done via vcpu_sys_reg. This looks like a good idea as it solves indianness issues in an elegant way, except for one small detail: the register index is doesn't refer to the same array! We end up corrupting some random data structure instead. Fix this by reverting to the original code, except for the introduction of a vcpu_cp15_64_high macro that deals with the endianness thing. Tested on Juno with 32bit SMP guests. Cc: Victor Kamensky Reviewed-by: Christoffer Dall Signed-off-by: Marc Zyngier Signed-off-by: Christoffer Dall (cherry picked from commit dedf97e8ff2c7513b1370e36b56e08b6bd0f0290) Signed-off-by: Christoffer Dall --- arch/arm64/include/asm/kvm_host.h | 6 ++++-- arch/arm64/kvm/sys_regs.c | 7 +++++-- 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 79812be4f25f..e10c45a578e3 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -149,9 +149,11 @@ struct kvm_vcpu_arch { #define vcpu_cp15(v,r) ((v)->arch.ctxt.copro[(r)]) #ifdef CONFIG_CPU_BIG_ENDIAN -#define vcpu_cp15_64_low(v,r) ((v)->arch.ctxt.copro[((r) + 1)]) +#define vcpu_cp15_64_high(v,r) vcpu_cp15((v),(r)) +#define vcpu_cp15_64_low(v,r) vcpu_cp15((v),(r) + 1) #else -#define vcpu_cp15_64_low(v,r) ((v)->arch.ctxt.copro[((r) + 0)]) +#define vcpu_cp15_64_high(v,r) vcpu_cp15((v),(r) + 1) +#define vcpu_cp15_64_low(v,r) vcpu_cp15((v),(r)) #endif struct kvm_vm_stat { diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index a4fd5267c65b..5805e7c4a4dd 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -135,10 +135,13 @@ static bool access_vm_reg(struct kvm_vcpu *vcpu, BUG_ON(!p->is_write); val = *vcpu_reg(vcpu, p->Rt); - if (!p->is_aarch32 || !p->is_32bit) + if (!p->is_aarch32) { vcpu_sys_reg(vcpu, r->reg) = val; - else + } else { + if (!p->is_32bit) + vcpu_cp15_64_high(vcpu, r->reg) = val >> 32; vcpu_cp15_64_low(vcpu, r->reg) = val & 0xffffffffUL; + } return true; } From 74afc9fff6cdb9df6cd6ee43ddd59465f48aed6d Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Mon, 30 Jun 2014 20:51:09 +1000 Subject: [PATCH 1314/1368] KVM: Don't keep reference to irq routing table in irqfd struct This makes the irqfd code keep a copy of the irq routing table entry for each irqfd, rather than a reference to the copy in the actual irq routing table maintained in kvm/virt/irqchip.c. This will enable us to change the routing table structure in future, or even not have a routing table at all on some platforms. The synchronization that was previously achieved using srcu_dereference on the read side is now achieved using a seqcount_t structure. That ensures that we don't get a halfway-updated copy of the structure if we read it while another thread is updating it. We still use srcu_read_lock/unlock around the read side so that when changing the routing table we can be sure that after calling synchronize_srcu, nothing will be using the old routing. Signed-off-by: Paul Mackerras Tested-by: Eric Auger Tested-by: Cornelia Huck Signed-off-by: Paolo Bonzini (cherry picked from commit 56f89f3629ffd1a21d38c3d0bea23deac0e284ce) Signed-off-by: Christoffer Dall --- virt/kvm/eventfd.c | 41 +++++++++++++++++++++++++---------------- 1 file changed, 25 insertions(+), 16 deletions(-) diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c index b51c19ffd8fd..16bf26183225 100644 --- a/virt/kvm/eventfd.c +++ b/virt/kvm/eventfd.c @@ -33,6 +33,7 @@ #include #include #include +#include #include "iodev.h" @@ -75,7 +76,8 @@ struct _irqfd { struct kvm *kvm; wait_queue_t wait; /* Update side is protected by irqfds.lock */ - struct kvm_kernel_irq_routing_entry __rcu *irq_entry; + struct kvm_kernel_irq_routing_entry irq_entry; + seqcount_t irq_entry_sc; /* Used for level IRQ fast-path */ int gsi; struct work_struct inject; @@ -223,16 +225,20 @@ irqfd_wakeup(wait_queue_t *wait, unsigned mode, int sync, void *key) { struct _irqfd *irqfd = container_of(wait, struct _irqfd, wait); unsigned long flags = (unsigned long)key; - struct kvm_kernel_irq_routing_entry *irq; + struct kvm_kernel_irq_routing_entry irq; struct kvm *kvm = irqfd->kvm; + unsigned seq; int idx; if (flags & POLLIN) { idx = srcu_read_lock(&kvm->irq_srcu); - irq = srcu_dereference(irqfd->irq_entry, &kvm->irq_srcu); + do { + seq = read_seqcount_begin(&irqfd->irq_entry_sc); + irq = irqfd->irq_entry; + } while (read_seqcount_retry(&irqfd->irq_entry_sc, seq)); /* An event has been signaled, inject an interrupt */ - if (irq) - kvm_set_msi(irq, kvm, KVM_USERSPACE_IRQ_SOURCE_ID, 1, + if (irq.type == KVM_IRQ_ROUTING_MSI) + kvm_set_msi(&irq, kvm, KVM_USERSPACE_IRQ_SOURCE_ID, 1, false); else schedule_work(&irqfd->inject); @@ -277,18 +283,20 @@ static void irqfd_update(struct kvm *kvm, struct _irqfd *irqfd, { struct kvm_kernel_irq_routing_entry *e; - if (irqfd->gsi >= irq_rt->nr_rt_entries) { - rcu_assign_pointer(irqfd->irq_entry, NULL); - return; - } + write_seqcount_begin(&irqfd->irq_entry_sc); + + irqfd->irq_entry.type = 0; + if (irqfd->gsi >= irq_rt->nr_rt_entries) + goto out; hlist_for_each_entry(e, &irq_rt->map[irqfd->gsi], link) { /* Only fast-path MSI. */ if (e->type == KVM_IRQ_ROUTING_MSI) - rcu_assign_pointer(irqfd->irq_entry, e); - else - rcu_assign_pointer(irqfd->irq_entry, NULL); + irqfd->irq_entry = *e; } + + out: + write_seqcount_end(&irqfd->irq_entry_sc); } static int @@ -310,6 +318,7 @@ kvm_irqfd_assign(struct kvm *kvm, struct kvm_irqfd *args) INIT_LIST_HEAD(&irqfd->list); INIT_WORK(&irqfd->inject, irqfd_inject); INIT_WORK(&irqfd->shutdown, irqfd_shutdown); + seqcount_init(&irqfd->irq_entry_sc); file = eventfd_fget(args->fd); if (IS_ERR(file)) { @@ -466,14 +475,14 @@ kvm_irqfd_deassign(struct kvm *kvm, struct kvm_irqfd *args) list_for_each_entry_safe(irqfd, tmp, &kvm->irqfds.items, list) { if (irqfd->eventfd == eventfd && irqfd->gsi == args->gsi) { /* - * This rcu_assign_pointer is needed for when + * This clearing of irq_entry.type is needed for when * another thread calls kvm_irq_routing_update before * we flush workqueue below (we synchronize with * kvm_irq_routing_update using irqfds.lock). - * It is paired with synchronize_srcu done by caller - * of that function. */ - rcu_assign_pointer(irqfd->irq_entry, NULL); + write_seqcount_begin(&irqfd->irq_entry_sc); + irqfd->irq_entry.type = 0; + write_seqcount_end(&irqfd->irq_entry_sc); irqfd_deactivate(irqfd); } } From 28bcfc22334c1214d603e5136b627d02f6eadb5e Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Mon, 30 Jun 2014 20:51:10 +1000 Subject: [PATCH 1315/1368] KVM: irqchip: Provide and use accessors for irq routing table This provides accessor functions for the KVM interrupt mappings, in order to reduce the amount of code that accesses the fields of the kvm_irq_routing_table struct, and restrict that code to one file, virt/kvm/irqchip.c. The new functions are kvm_irq_map_gsi(), which maps from a global interrupt number to a set of IRQ routing entries, and kvm_irq_map_chip_pin, which maps from IRQ chip and pin numbers to a global interrupt number. This also moves the update of kvm_irq_routing_table::chip[][] into irqchip.c, out of the various kvm_set_routing_entry implementations. That means that none of the kvm_set_routing_entry implementations need the kvm_irq_routing_table argument anymore, so this removes it. This does not change any locking or data lifetime rules. Signed-off-by: Paul Mackerras Tested-by: Eric Auger Tested-by: Cornelia Huck Signed-off-by: Paolo Bonzini (cherry picked from commit 8ba918d488caded2c4368b0b922eb905fe3bb101) Signed-off-by: Christoffer Dall --- arch/powerpc/kvm/mpic.c | 4 +--- include/linux/kvm_host.h | 8 ++++++-- virt/kvm/eventfd.c | 10 ++++++---- virt/kvm/irq_comm.c | 20 +++++++++---------- virt/kvm/irqchip.c | 42 ++++++++++++++++++++++++++++++++-------- 5 files changed, 56 insertions(+), 28 deletions(-) diff --git a/arch/powerpc/kvm/mpic.c b/arch/powerpc/kvm/mpic.c index 2861ae9eaae6..b58d61039015 100644 --- a/arch/powerpc/kvm/mpic.c +++ b/arch/powerpc/kvm/mpic.c @@ -1822,8 +1822,7 @@ int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e, return 0; } -int kvm_set_routing_entry(struct kvm_irq_routing_table *rt, - struct kvm_kernel_irq_routing_entry *e, +int kvm_set_routing_entry(struct kvm_kernel_irq_routing_entry *e, const struct kvm_irq_routing_entry *ue) { int r = -EINVAL; @@ -1835,7 +1834,6 @@ int kvm_set_routing_entry(struct kvm_irq_routing_table *rt, e->irqchip.pin = ue->u.irqchip.pin; if (e->irqchip.pin >= KVM_IRQCHIP_NUM_PINS) goto out; - rt->chip[ue->u.irqchip.irqchip][e->irqchip.pin] = ue->gsi; break; case KVM_IRQ_ROUTING_MSI: e->set = kvm_set_msi; diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 1fb1bd4593a5..f91b5cda1d28 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -709,6 +709,11 @@ void kvm_unregister_irq_mask_notifier(struct kvm *kvm, int irq, void kvm_fire_mask_notifiers(struct kvm *kvm, unsigned irqchip, unsigned pin, bool mask); +int kvm_irq_map_gsi(struct kvm_kernel_irq_routing_entry *entries, + struct kvm_irq_routing_table *irq_rt, int gsi); +int kvm_irq_map_chip_pin(struct kvm_irq_routing_table *irq_rt, + unsigned irqchip, unsigned pin); + int kvm_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level, bool line_status); int kvm_set_irq_inatomic(struct kvm *kvm, int irq_source_id, u32 irq, int level); @@ -898,8 +903,7 @@ int kvm_set_irq_routing(struct kvm *kvm, const struct kvm_irq_routing_entry *entries, unsigned nr, unsigned flags); -int kvm_set_routing_entry(struct kvm_irq_routing_table *rt, - struct kvm_kernel_irq_routing_entry *e, +int kvm_set_routing_entry(struct kvm_kernel_irq_routing_entry *e, const struct kvm_irq_routing_entry *ue); void kvm_free_irq_routing(struct kvm *kvm); diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c index 16bf26183225..a75227f7d487 100644 --- a/virt/kvm/eventfd.c +++ b/virt/kvm/eventfd.c @@ -282,20 +282,22 @@ static void irqfd_update(struct kvm *kvm, struct _irqfd *irqfd, struct kvm_irq_routing_table *irq_rt) { struct kvm_kernel_irq_routing_entry *e; + struct kvm_kernel_irq_routing_entry entries[KVM_NR_IRQCHIPS]; + int i, n_entries; + + n_entries = kvm_irq_map_gsi(entries, irq_rt, irqfd->gsi); write_seqcount_begin(&irqfd->irq_entry_sc); irqfd->irq_entry.type = 0; - if (irqfd->gsi >= irq_rt->nr_rt_entries) - goto out; - hlist_for_each_entry(e, &irq_rt->map[irqfd->gsi], link) { + e = entries; + for (i = 0; i < n_entries; ++i, ++e) { /* Only fast-path MSI. */ if (e->type == KVM_IRQ_ROUTING_MSI) irqfd->irq_entry = *e; } - out: write_seqcount_end(&irqfd->irq_entry_sc); } diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c index a228ee82bad2..175844593243 100644 --- a/virt/kvm/irq_comm.c +++ b/virt/kvm/irq_comm.c @@ -160,6 +160,7 @@ static int kvm_set_msi_inatomic(struct kvm_kernel_irq_routing_entry *e, */ int kvm_set_irq_inatomic(struct kvm *kvm, int irq_source_id, u32 irq, int level) { + struct kvm_kernel_irq_routing_entry entries[KVM_NR_IRQCHIPS]; struct kvm_kernel_irq_routing_entry *e; int ret = -EINVAL; struct kvm_irq_routing_table *irq_rt; @@ -177,14 +178,13 @@ int kvm_set_irq_inatomic(struct kvm *kvm, int irq_source_id, u32 irq, int level) */ idx = srcu_read_lock(&kvm->irq_srcu); irq_rt = srcu_dereference(kvm->irq_routing, &kvm->irq_srcu); - if (irq < irq_rt->nr_rt_entries) - hlist_for_each_entry(e, &irq_rt->map[irq], link) { - if (likely(e->type == KVM_IRQ_ROUTING_MSI)) - ret = kvm_set_msi_inatomic(e, kvm); - else - ret = -EWOULDBLOCK; - break; - } + if (kvm_irq_map_gsi(entries, irq_rt, irq) > 0) { + e = &entries[0]; + if (likely(e->type == KVM_IRQ_ROUTING_MSI)) + ret = kvm_set_msi_inatomic(e, kvm); + else + ret = -EWOULDBLOCK; + } srcu_read_unlock(&kvm->irq_srcu, idx); return ret; } @@ -272,8 +272,7 @@ void kvm_fire_mask_notifiers(struct kvm *kvm, unsigned irqchip, unsigned pin, srcu_read_unlock(&kvm->irq_srcu, idx); } -int kvm_set_routing_entry(struct kvm_irq_routing_table *rt, - struct kvm_kernel_irq_routing_entry *e, +int kvm_set_routing_entry(struct kvm_kernel_irq_routing_entry *e, const struct kvm_irq_routing_entry *ue) { int r = -EINVAL; @@ -304,7 +303,6 @@ int kvm_set_routing_entry(struct kvm_irq_routing_table *rt, e->irqchip.pin = ue->u.irqchip.pin + delta; if (e->irqchip.pin >= max_pin) goto out; - rt->chip[ue->u.irqchip.irqchip][e->irqchip.pin] = ue->gsi; break; case KVM_IRQ_ROUTING_MSI: e->set = kvm_set_msi; diff --git a/virt/kvm/irqchip.c b/virt/kvm/irqchip.c index b43c275775cd..f4648dd94888 100644 --- a/virt/kvm/irqchip.c +++ b/virt/kvm/irqchip.c @@ -31,13 +31,37 @@ #include #include "irq.h" +int kvm_irq_map_gsi(struct kvm_kernel_irq_routing_entry *entries, + struct kvm_irq_routing_table *irq_rt, int gsi) +{ + struct kvm_kernel_irq_routing_entry *e; + int n = 0; + + if (gsi < irq_rt->nr_rt_entries) { + hlist_for_each_entry(e, &irq_rt->map[gsi], link) { + entries[n] = *e; + ++n; + } + } + + return n; +} + +int kvm_irq_map_chip_pin(struct kvm_irq_routing_table *irq_rt, + unsigned irqchip, unsigned pin) +{ + return irq_rt->chip[irqchip][pin]; +} + bool kvm_irq_has_notifier(struct kvm *kvm, unsigned irqchip, unsigned pin) { + struct kvm_irq_routing_table *irq_rt; struct kvm_irq_ack_notifier *kian; int gsi, idx; idx = srcu_read_lock(&kvm->irq_srcu); - gsi = srcu_dereference(kvm->irq_routing, &kvm->irq_srcu)->chip[irqchip][pin]; + irq_rt = srcu_dereference(kvm->irq_routing, &kvm->irq_srcu); + gsi = kvm_irq_map_chip_pin(irq_rt, irqchip, pin); if (gsi != -1) hlist_for_each_entry_rcu(kian, &kvm->irq_ack_notifier_list, link) @@ -54,13 +78,15 @@ EXPORT_SYMBOL_GPL(kvm_irq_has_notifier); void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin) { + struct kvm_irq_routing_table *irq_rt; struct kvm_irq_ack_notifier *kian; int gsi, idx; trace_kvm_ack_irq(irqchip, pin); idx = srcu_read_lock(&kvm->irq_srcu); - gsi = srcu_dereference(kvm->irq_routing, &kvm->irq_srcu)->chip[irqchip][pin]; + irq_rt = srcu_dereference(kvm->irq_routing, &kvm->irq_srcu); + gsi = kvm_irq_map_chip_pin(irq_rt, irqchip, pin); if (gsi != -1) hlist_for_each_entry_rcu(kian, &kvm->irq_ack_notifier_list, link) @@ -115,8 +141,8 @@ int kvm_send_userspace_msi(struct kvm *kvm, struct kvm_msi *msi) int kvm_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level, bool line_status) { - struct kvm_kernel_irq_routing_entry *e, irq_set[KVM_NR_IRQCHIPS]; - int ret = -1, i = 0, idx; + struct kvm_kernel_irq_routing_entry irq_set[KVM_NR_IRQCHIPS]; + int ret = -1, i, idx; struct kvm_irq_routing_table *irq_rt; trace_kvm_set_irq(irq, level, irq_source_id); @@ -127,9 +153,7 @@ int kvm_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level, */ idx = srcu_read_lock(&kvm->irq_srcu); irq_rt = srcu_dereference(kvm->irq_routing, &kvm->irq_srcu); - if (irq < irq_rt->nr_rt_entries) - hlist_for_each_entry(e, &irq_rt->map[irq], link) - irq_set[i++] = *e; + i = kvm_irq_map_gsi(irq_set, irq_rt, irq); srcu_read_unlock(&kvm->irq_srcu, idx); while(i--) { @@ -171,9 +195,11 @@ static int setup_routing_entry(struct kvm_irq_routing_table *rt, e->gsi = ue->gsi; e->type = ue->type; - r = kvm_set_routing_entry(rt, e, ue); + r = kvm_set_routing_entry(e, ue); if (r) goto out; + if (e->type == KVM_IRQ_ROUTING_IRQCHIP) + rt->chip[e->irqchip.irqchip][e->irqchip.pin] = e->gsi; hlist_add_head(&e->link, &rt->map[e->gsi]); r = 0; From 7563524573fb8d7e723cec38329edf1b3968593c Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Mon, 30 Jun 2014 20:51:11 +1000 Subject: [PATCH 1316/1368] KVM: Move all accesses to kvm::irq_routing into irqchip.c Now that struct _irqfd does not keep a reference to storage pointed to by the irq_routing field of struct kvm, we can move the statement that updates it out from under the irqfds.lock and put it in kvm_set_irq_routing() instead. That means we then have to take a srcu_read_lock on kvm->irq_srcu around the irqfd_update call in kvm_irqfd_assign(), since holding the kvm->irqfds.lock no longer ensures that that the routing can't change. Combined with changing kvm_irq_map_gsi() and kvm_irq_map_chip_pin() to take a struct kvm * argument instead of the pointer to the routing table, this allows us to to move all references to kvm->irq_routing into irqchip.c. That in turn allows us to move the definition of the kvm_irq_routing_table struct into irqchip.c as well. Signed-off-by: Paul Mackerras Tested-by: Eric Auger Tested-by: Cornelia Huck Signed-off-by: Paolo Bonzini (cherry picked from commit 9957c86d659a4d5a2bed25ccbd3bfc9c3f25e658) Signed-off-by: Christoffer Dall --- include/linux/kvm_host.h | 35 +++++++---------------------------- virt/kvm/eventfd.c | 22 +++++++++------------- virt/kvm/irq_comm.c | 6 ++---- virt/kvm/irqchip.c | 39 +++++++++++++++++++++++++-------------- 4 files changed, 43 insertions(+), 59 deletions(-) diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index f91b5cda1d28..65b6b8d793b5 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -301,24 +301,7 @@ struct kvm_kernel_irq_routing_entry { struct hlist_node link; }; -#ifdef CONFIG_HAVE_KVM_IRQ_ROUTING - -struct kvm_irq_routing_table { - int chip[KVM_NR_IRQCHIPS][KVM_IRQCHIP_NUM_PINS]; - struct kvm_kernel_irq_routing_entry *rt_entries; - u32 nr_rt_entries; - /* - * Array indexed by gsi. Each entry contains list of irq chips - * the gsi is connected to. - */ - struct hlist_head map[0]; -}; - -#else - -struct kvm_irq_routing_table {}; - -#endif +struct kvm_irq_routing_table; #ifndef KVM_PRIVATE_MEM_SLOTS #define KVM_PRIVATE_MEM_SLOTS 0 @@ -377,8 +360,7 @@ struct kvm { struct mutex irq_lock; #ifdef CONFIG_HAVE_KVM_IRQCHIP /* - * Update side is protected by irq_lock and, - * if configured, irqfds.lock. + * Update side is protected by irq_lock. */ struct kvm_irq_routing_table __rcu *irq_routing; struct hlist_head mask_notifier_list; @@ -709,10 +691,9 @@ void kvm_unregister_irq_mask_notifier(struct kvm *kvm, int irq, void kvm_fire_mask_notifiers(struct kvm *kvm, unsigned irqchip, unsigned pin, bool mask); -int kvm_irq_map_gsi(struct kvm_kernel_irq_routing_entry *entries, - struct kvm_irq_routing_table *irq_rt, int gsi); -int kvm_irq_map_chip_pin(struct kvm_irq_routing_table *irq_rt, - unsigned irqchip, unsigned pin); +int kvm_irq_map_gsi(struct kvm *kvm, + struct kvm_kernel_irq_routing_entry *entries, int gsi); +int kvm_irq_map_chip_pin(struct kvm *kvm, unsigned irqchip, unsigned pin); int kvm_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level, bool line_status); @@ -923,7 +904,7 @@ int kvm_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args); #ifdef CONFIG_HAVE_KVM_IRQCHIP int kvm_irqfd(struct kvm *kvm, struct kvm_irqfd *args); void kvm_irqfd_release(struct kvm *kvm); -void kvm_irq_routing_update(struct kvm *, struct kvm_irq_routing_table *); +void kvm_irq_routing_update(struct kvm *); #else static inline int kvm_irqfd(struct kvm *kvm, struct kvm_irqfd *args) { @@ -945,10 +926,8 @@ static inline int kvm_irqfd(struct kvm *kvm, struct kvm_irqfd *args) static inline void kvm_irqfd_release(struct kvm *kvm) {} #ifdef CONFIG_HAVE_KVM_IRQCHIP -static inline void kvm_irq_routing_update(struct kvm *kvm, - struct kvm_irq_routing_table *irq_rt) +static inline void kvm_irq_routing_update(struct kvm *kvm) { - rcu_assign_pointer(kvm->irq_routing, irq_rt); } #endif diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c index a75227f7d487..71133644f465 100644 --- a/virt/kvm/eventfd.c +++ b/virt/kvm/eventfd.c @@ -278,14 +278,13 @@ irqfd_ptable_queue_proc(struct file *file, wait_queue_head_t *wqh, } /* Must be called under irqfds.lock */ -static void irqfd_update(struct kvm *kvm, struct _irqfd *irqfd, - struct kvm_irq_routing_table *irq_rt) +static void irqfd_update(struct kvm *kvm, struct _irqfd *irqfd) { struct kvm_kernel_irq_routing_entry *e; struct kvm_kernel_irq_routing_entry entries[KVM_NR_IRQCHIPS]; int i, n_entries; - n_entries = kvm_irq_map_gsi(entries, irq_rt, irqfd->gsi); + n_entries = kvm_irq_map_gsi(kvm, entries, irqfd->gsi); write_seqcount_begin(&irqfd->irq_entry_sc); @@ -304,12 +303,12 @@ static void irqfd_update(struct kvm *kvm, struct _irqfd *irqfd, static int kvm_irqfd_assign(struct kvm *kvm, struct kvm_irqfd *args) { - struct kvm_irq_routing_table *irq_rt; struct _irqfd *irqfd, *tmp; struct file *file = NULL; struct eventfd_ctx *eventfd = NULL, *resamplefd = NULL; int ret; unsigned int events; + int idx; irqfd = kzalloc(sizeof(*irqfd), GFP_KERNEL); if (!irqfd) @@ -403,9 +402,9 @@ kvm_irqfd_assign(struct kvm *kvm, struct kvm_irqfd *args) goto fail; } - irq_rt = rcu_dereference_protected(kvm->irq_routing, - lockdep_is_held(&kvm->irqfds.lock)); - irqfd_update(kvm, irqfd, irq_rt); + idx = srcu_read_lock(&kvm->irq_srcu); + irqfd_update(kvm, irqfd); + srcu_read_unlock(&kvm->irq_srcu, idx); events = file->f_op->poll(file, &irqfd->pt); @@ -539,20 +538,17 @@ kvm_irqfd_release(struct kvm *kvm) } /* - * Change irq_routing and irqfd. + * Take note of a change in irq routing. * Caller must invoke synchronize_srcu(&kvm->irq_srcu) afterwards. */ -void kvm_irq_routing_update(struct kvm *kvm, - struct kvm_irq_routing_table *irq_rt) +void kvm_irq_routing_update(struct kvm *kvm) { struct _irqfd *irqfd; spin_lock_irq(&kvm->irqfds.lock); - rcu_assign_pointer(kvm->irq_routing, irq_rt); - list_for_each_entry(irqfd, &kvm->irqfds.items, list) - irqfd_update(kvm, irqfd, irq_rt); + irqfd_update(kvm, irqfd); spin_unlock_irq(&kvm->irqfds.lock); } diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c index 175844593243..963b8995a9e8 100644 --- a/virt/kvm/irq_comm.c +++ b/virt/kvm/irq_comm.c @@ -163,7 +163,6 @@ int kvm_set_irq_inatomic(struct kvm *kvm, int irq_source_id, u32 irq, int level) struct kvm_kernel_irq_routing_entry entries[KVM_NR_IRQCHIPS]; struct kvm_kernel_irq_routing_entry *e; int ret = -EINVAL; - struct kvm_irq_routing_table *irq_rt; int idx; trace_kvm_set_irq(irq, level, irq_source_id); @@ -177,8 +176,7 @@ int kvm_set_irq_inatomic(struct kvm *kvm, int irq_source_id, u32 irq, int level) * which is limited to 1:1 GSI mapping. */ idx = srcu_read_lock(&kvm->irq_srcu); - irq_rt = srcu_dereference(kvm->irq_routing, &kvm->irq_srcu); - if (kvm_irq_map_gsi(entries, irq_rt, irq) > 0) { + if (kvm_irq_map_gsi(kvm, entries, irq) > 0) { e = &entries[0]; if (likely(e->type == KVM_IRQ_ROUTING_MSI)) ret = kvm_set_msi_inatomic(e, kvm); @@ -264,7 +262,7 @@ void kvm_fire_mask_notifiers(struct kvm *kvm, unsigned irqchip, unsigned pin, int idx, gsi; idx = srcu_read_lock(&kvm->irq_srcu); - gsi = srcu_dereference(kvm->irq_routing, &kvm->irq_srcu)->chip[irqchip][pin]; + gsi = kvm_irq_map_chip_pin(kvm, irqchip, pin); if (gsi != -1) hlist_for_each_entry_rcu(kimn, &kvm->mask_notifier_list, link) if (kimn->irq == gsi) diff --git a/virt/kvm/irqchip.c b/virt/kvm/irqchip.c index f4648dd94888..04faac50cef5 100644 --- a/virt/kvm/irqchip.c +++ b/virt/kvm/irqchip.c @@ -31,12 +31,26 @@ #include #include "irq.h" -int kvm_irq_map_gsi(struct kvm_kernel_irq_routing_entry *entries, - struct kvm_irq_routing_table *irq_rt, int gsi) +struct kvm_irq_routing_table { + int chip[KVM_NR_IRQCHIPS][KVM_IRQCHIP_NUM_PINS]; + struct kvm_kernel_irq_routing_entry *rt_entries; + u32 nr_rt_entries; + /* + * Array indexed by gsi. Each entry contains list of irq chips + * the gsi is connected to. + */ + struct hlist_head map[0]; +}; + +int kvm_irq_map_gsi(struct kvm *kvm, + struct kvm_kernel_irq_routing_entry *entries, int gsi) { + struct kvm_irq_routing_table *irq_rt; struct kvm_kernel_irq_routing_entry *e; int n = 0; + irq_rt = srcu_dereference_check(kvm->irq_routing, &kvm->irq_srcu, + lockdep_is_held(&kvm->irq_lock)); if (gsi < irq_rt->nr_rt_entries) { hlist_for_each_entry(e, &irq_rt->map[gsi], link) { entries[n] = *e; @@ -47,21 +61,21 @@ int kvm_irq_map_gsi(struct kvm_kernel_irq_routing_entry *entries, return n; } -int kvm_irq_map_chip_pin(struct kvm_irq_routing_table *irq_rt, - unsigned irqchip, unsigned pin) +int kvm_irq_map_chip_pin(struct kvm *kvm, unsigned irqchip, unsigned pin) { + struct kvm_irq_routing_table *irq_rt; + + irq_rt = srcu_dereference(kvm->irq_routing, &kvm->irq_srcu); return irq_rt->chip[irqchip][pin]; } bool kvm_irq_has_notifier(struct kvm *kvm, unsigned irqchip, unsigned pin) { - struct kvm_irq_routing_table *irq_rt; struct kvm_irq_ack_notifier *kian; int gsi, idx; idx = srcu_read_lock(&kvm->irq_srcu); - irq_rt = srcu_dereference(kvm->irq_routing, &kvm->irq_srcu); - gsi = kvm_irq_map_chip_pin(irq_rt, irqchip, pin); + gsi = kvm_irq_map_chip_pin(kvm, irqchip, pin); if (gsi != -1) hlist_for_each_entry_rcu(kian, &kvm->irq_ack_notifier_list, link) @@ -78,15 +92,13 @@ EXPORT_SYMBOL_GPL(kvm_irq_has_notifier); void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin) { - struct kvm_irq_routing_table *irq_rt; struct kvm_irq_ack_notifier *kian; int gsi, idx; trace_kvm_ack_irq(irqchip, pin); idx = srcu_read_lock(&kvm->irq_srcu); - irq_rt = srcu_dereference(kvm->irq_routing, &kvm->irq_srcu); - gsi = kvm_irq_map_chip_pin(irq_rt, irqchip, pin); + gsi = kvm_irq_map_chip_pin(kvm, irqchip, pin); if (gsi != -1) hlist_for_each_entry_rcu(kian, &kvm->irq_ack_notifier_list, link) @@ -143,7 +155,6 @@ int kvm_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level, { struct kvm_kernel_irq_routing_entry irq_set[KVM_NR_IRQCHIPS]; int ret = -1, i, idx; - struct kvm_irq_routing_table *irq_rt; trace_kvm_set_irq(irq, level, irq_source_id); @@ -152,8 +163,7 @@ int kvm_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level, * writes to the unused one. */ idx = srcu_read_lock(&kvm->irq_srcu); - irq_rt = srcu_dereference(kvm->irq_routing, &kvm->irq_srcu); - i = kvm_irq_map_gsi(irq_set, irq_rt, irq); + i = kvm_irq_map_gsi(kvm, irq_set, irq); srcu_read_unlock(&kvm->irq_srcu, idx); while(i--) { @@ -250,7 +260,8 @@ int kvm_set_irq_routing(struct kvm *kvm, mutex_lock(&kvm->irq_lock); old = kvm->irq_routing; - kvm_irq_routing_update(kvm, new); + rcu_assign_pointer(kvm->irq_routing, new); + kvm_irq_routing_update(kvm); mutex_unlock(&kvm->irq_lock); synchronize_srcu_expedited(&kvm->irq_srcu); From c905880e253b8e2f77db26dcb647a1a3bb359b11 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Mon, 30 Jun 2014 20:51:12 +1000 Subject: [PATCH 1317/1368] KVM: Move irq notifier implementation into eventfd.c This moves the functions kvm_irq_has_notifier(), kvm_notify_acked_irq(), kvm_register_irq_ack_notifier() and kvm_unregister_irq_ack_notifier() from irqchip.c to eventfd.c. The reason for doing this is that those functions are used in connection with IRQFDs, which are implemented in eventfd.c. In future we will want to use IRQFDs on platforms that don't implement the GSI routing implemented in irqchip.c, so we won't be compiling in irqchip.c, but we still need the irq notifiers. The implementation is unchanged. Signed-off-by: Paul Mackerras Tested-by: Eric Auger Tested-by: Cornelia Huck Signed-off-by: Paolo Bonzini (cherry picked from commit e4d57e1ee1ab59f0cef0272800ac6c52e0ec814a) Signed-off-by: Christoffer Dall --- virt/kvm/eventfd.c | 63 ++++++++++++++++++++++++++++++++++++++++++++++ virt/kvm/irqchip.c | 61 -------------------------------------------- 2 files changed, 63 insertions(+), 61 deletions(-) diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c index 71133644f465..d2b1653c97dd 100644 --- a/virt/kvm/eventfd.c +++ b/virt/kvm/eventfd.c @@ -34,7 +34,9 @@ #include #include #include +#include +#include "irq.h" #include "iodev.h" #ifdef CONFIG_HAVE_KVM_IRQ_ROUTING @@ -830,3 +832,64 @@ kvm_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args) return kvm_assign_ioeventfd(kvm, args); } + +bool kvm_irq_has_notifier(struct kvm *kvm, unsigned irqchip, unsigned pin) +{ + struct kvm_irq_ack_notifier *kian; + int gsi, idx; + + idx = srcu_read_lock(&kvm->irq_srcu); + gsi = kvm_irq_map_chip_pin(kvm, irqchip, pin); + if (gsi != -1) + hlist_for_each_entry_rcu(kian, &kvm->irq_ack_notifier_list, + link) + if (kian->gsi == gsi) { + srcu_read_unlock(&kvm->irq_srcu, idx); + return true; + } + + srcu_read_unlock(&kvm->irq_srcu, idx); + + return false; +} +EXPORT_SYMBOL_GPL(kvm_irq_has_notifier); + +void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin) +{ + struct kvm_irq_ack_notifier *kian; + int gsi, idx; + + trace_kvm_ack_irq(irqchip, pin); + + idx = srcu_read_lock(&kvm->irq_srcu); + gsi = kvm_irq_map_chip_pin(kvm, irqchip, pin); + if (gsi != -1) + hlist_for_each_entry_rcu(kian, &kvm->irq_ack_notifier_list, + link) + if (kian->gsi == gsi) + kian->irq_acked(kian); + srcu_read_unlock(&kvm->irq_srcu, idx); +} + +void kvm_register_irq_ack_notifier(struct kvm *kvm, + struct kvm_irq_ack_notifier *kian) +{ + mutex_lock(&kvm->irq_lock); + hlist_add_head_rcu(&kian->link, &kvm->irq_ack_notifier_list); + mutex_unlock(&kvm->irq_lock); +#ifdef __KVM_HAVE_IOAPIC + kvm_vcpu_request_scan_ioapic(kvm); +#endif +} + +void kvm_unregister_irq_ack_notifier(struct kvm *kvm, + struct kvm_irq_ack_notifier *kian) +{ + mutex_lock(&kvm->irq_lock); + hlist_del_init_rcu(&kian->link); + mutex_unlock(&kvm->irq_lock); + synchronize_srcu(&kvm->irq_srcu); +#ifdef __KVM_HAVE_IOAPIC + kvm_vcpu_request_scan_ioapic(kvm); +#endif +} diff --git a/virt/kvm/irqchip.c b/virt/kvm/irqchip.c index 04faac50cef5..7f256f31df10 100644 --- a/virt/kvm/irqchip.c +++ b/virt/kvm/irqchip.c @@ -69,67 +69,6 @@ int kvm_irq_map_chip_pin(struct kvm *kvm, unsigned irqchip, unsigned pin) return irq_rt->chip[irqchip][pin]; } -bool kvm_irq_has_notifier(struct kvm *kvm, unsigned irqchip, unsigned pin) -{ - struct kvm_irq_ack_notifier *kian; - int gsi, idx; - - idx = srcu_read_lock(&kvm->irq_srcu); - gsi = kvm_irq_map_chip_pin(kvm, irqchip, pin); - if (gsi != -1) - hlist_for_each_entry_rcu(kian, &kvm->irq_ack_notifier_list, - link) - if (kian->gsi == gsi) { - srcu_read_unlock(&kvm->irq_srcu, idx); - return true; - } - - srcu_read_unlock(&kvm->irq_srcu, idx); - - return false; -} -EXPORT_SYMBOL_GPL(kvm_irq_has_notifier); - -void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin) -{ - struct kvm_irq_ack_notifier *kian; - int gsi, idx; - - trace_kvm_ack_irq(irqchip, pin); - - idx = srcu_read_lock(&kvm->irq_srcu); - gsi = kvm_irq_map_chip_pin(kvm, irqchip, pin); - if (gsi != -1) - hlist_for_each_entry_rcu(kian, &kvm->irq_ack_notifier_list, - link) - if (kian->gsi == gsi) - kian->irq_acked(kian); - srcu_read_unlock(&kvm->irq_srcu, idx); -} - -void kvm_register_irq_ack_notifier(struct kvm *kvm, - struct kvm_irq_ack_notifier *kian) -{ - mutex_lock(&kvm->irq_lock); - hlist_add_head_rcu(&kian->link, &kvm->irq_ack_notifier_list); - mutex_unlock(&kvm->irq_lock); -#ifdef __KVM_HAVE_IOAPIC - kvm_vcpu_request_scan_ioapic(kvm); -#endif -} - -void kvm_unregister_irq_ack_notifier(struct kvm *kvm, - struct kvm_irq_ack_notifier *kian) -{ - mutex_lock(&kvm->irq_lock); - hlist_del_init_rcu(&kian->link); - mutex_unlock(&kvm->irq_lock); - synchronize_srcu(&kvm->irq_srcu); -#ifdef __KVM_HAVE_IOAPIC - kvm_vcpu_request_scan_ioapic(kvm); -#endif -} - int kvm_send_userspace_msi(struct kvm *kvm, struct kvm_msi *msi) { struct kvm_kernel_irq_routing_entry route; From f65b953a7232ae8bb497b226363629f9e49b4792 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Mon, 30 Jun 2014 20:51:13 +1000 Subject: [PATCH 1318/1368] KVM: Give IRQFD its own separate enabling Kconfig option Currently, the IRQFD code is conditional on CONFIG_HAVE_KVM_IRQ_ROUTING. So that we can have the IRQFD code compiled in without having the IRQ routing code, this creates a new CONFIG_HAVE_KVM_IRQFD, makes the IRQFD code conditional on it instead of CONFIG_HAVE_KVM_IRQ_ROUTING, and makes all the platforms that currently select HAVE_KVM_IRQ_ROUTING also select HAVE_KVM_IRQFD. Signed-off-by: Paul Mackerras Tested-by: Eric Auger Tested-by: Cornelia Huck Signed-off-by: Paolo Bonzini (cherry picked from commit 297e21053a52f060944e9f0de4c64fad9bcd72fc) Signed-off-by: Christoffer Dall --- arch/ia64/kvm/Kconfig | 1 + arch/powerpc/kvm/Kconfig | 1 + arch/x86/kvm/Kconfig | 1 + include/linux/kvm_host.h | 8 ++++---- virt/kvm/Kconfig | 3 +++ virt/kvm/eventfd.c | 6 +++--- virt/kvm/kvm_main.c | 2 +- 7 files changed, 14 insertions(+), 8 deletions(-) diff --git a/arch/ia64/kvm/Kconfig b/arch/ia64/kvm/Kconfig index 990b86420cc6..3d50ea955c4c 100644 --- a/arch/ia64/kvm/Kconfig +++ b/arch/ia64/kvm/Kconfig @@ -25,6 +25,7 @@ config KVM select PREEMPT_NOTIFIERS select ANON_INODES select HAVE_KVM_IRQCHIP + select HAVE_KVM_IRQFD select HAVE_KVM_IRQ_ROUTING select KVM_APIC_ARCHITECTURE select KVM_MMIO diff --git a/arch/powerpc/kvm/Kconfig b/arch/powerpc/kvm/Kconfig index eb643f862579..60019a6fd6bb 100644 --- a/arch/powerpc/kvm/Kconfig +++ b/arch/powerpc/kvm/Kconfig @@ -155,6 +155,7 @@ config KVM_MPIC bool "KVM in-kernel MPIC emulation" depends on KVM && E500 select HAVE_KVM_IRQCHIP + select HAVE_KVM_IRQFD select HAVE_KVM_IRQ_ROUTING select HAVE_KVM_MSI help diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig index b89c5db2b832..bdccfb62aa0d 100644 --- a/arch/x86/kvm/Kconfig +++ b/arch/x86/kvm/Kconfig @@ -27,6 +27,7 @@ config KVM select MMU_NOTIFIER select ANON_INODES select HAVE_KVM_IRQCHIP + select HAVE_KVM_IRQFD select HAVE_KVM_IRQ_ROUTING select HAVE_KVM_EVENTFD select KVM_APIC_ARCHITECTURE diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 65b6b8d793b5..35319fe693f8 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -413,7 +413,7 @@ void kvm_vcpu_uninit(struct kvm_vcpu *vcpu); int __must_check vcpu_load(struct kvm_vcpu *vcpu); void vcpu_put(struct kvm_vcpu *vcpu); -#ifdef CONFIG_HAVE_KVM_IRQ_ROUTING +#ifdef CONFIG_HAVE_KVM_IRQFD int kvm_irqfd_init(void); void kvm_irqfd_exit(void); #else @@ -888,20 +888,20 @@ int kvm_set_routing_entry(struct kvm_kernel_irq_routing_entry *e, const struct kvm_irq_routing_entry *ue); void kvm_free_irq_routing(struct kvm *kvm); -int kvm_send_userspace_msi(struct kvm *kvm, struct kvm_msi *msi); - #else static inline void kvm_free_irq_routing(struct kvm *kvm) {} #endif +int kvm_send_userspace_msi(struct kvm *kvm, struct kvm_msi *msi); + #ifdef CONFIG_HAVE_KVM_EVENTFD void kvm_eventfd_init(struct kvm *kvm); int kvm_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args); -#ifdef CONFIG_HAVE_KVM_IRQCHIP +#ifdef CONFIG_HAVE_KVM_IRQFD int kvm_irqfd(struct kvm *kvm, struct kvm_irqfd *args); void kvm_irqfd_release(struct kvm *kvm); void kvm_irq_routing_update(struct kvm *); diff --git a/virt/kvm/Kconfig b/virt/kvm/Kconfig index 13f2d19793e3..fc0c5e603eb4 100644 --- a/virt/kvm/Kconfig +++ b/virt/kvm/Kconfig @@ -6,6 +6,9 @@ config HAVE_KVM config HAVE_KVM_IRQCHIP bool +config HAVE_KVM_IRQFD + bool + config HAVE_KVM_IRQ_ROUTING bool diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c index d2b1653c97dd..8be5b6545770 100644 --- a/virt/kvm/eventfd.c +++ b/virt/kvm/eventfd.c @@ -39,7 +39,7 @@ #include "irq.h" #include "iodev.h" -#ifdef CONFIG_HAVE_KVM_IRQ_ROUTING +#ifdef CONFIG_HAVE_KVM_IRQFD /* * -------------------------------------------------------------------- * irqfd: Allows an fd to be used to inject an interrupt to the guest @@ -450,7 +450,7 @@ kvm_irqfd_assign(struct kvm *kvm, struct kvm_irqfd *args) void kvm_eventfd_init(struct kvm *kvm) { -#ifdef CONFIG_HAVE_KVM_IRQ_ROUTING +#ifdef CONFIG_HAVE_KVM_IRQFD spin_lock_init(&kvm->irqfds.lock); INIT_LIST_HEAD(&kvm->irqfds.items); INIT_LIST_HEAD(&kvm->irqfds.resampler_list); @@ -459,7 +459,7 @@ kvm_eventfd_init(struct kvm *kvm) INIT_LIST_HEAD(&kvm->ioeventfds); } -#ifdef CONFIG_HAVE_KVM_IRQ_ROUTING +#ifdef CONFIG_HAVE_KVM_IRQFD /* * shutdown any irqfd's that match fd+gsi */ diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 1edb15da7acf..848a6afab165 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -2330,7 +2330,7 @@ static long kvm_vm_ioctl_check_extension_generic(struct kvm *kvm, long arg) #ifdef CONFIG_HAVE_KVM_MSI case KVM_CAP_SIGNAL_MSI: #endif -#ifdef CONFIG_HAVE_KVM_IRQ_ROUTING +#ifdef CONFIG_HAVE_KVM_IRQFD case KVM_CAP_IRQFD_RESAMPLE: #endif case KVM_CAP_CHECK_EXTENSION_VM: From 57f984d2c64c8c1e597d021567a379f4f3920b35 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Wed, 6 Aug 2014 14:24:45 +0200 Subject: [PATCH 1319/1368] KVM: Move more code under CONFIG_HAVE_KVM_IRQFD Commits e4d57e1ee1ab (KVM: Move irq notifier implementation into eventfd.c, 2014-06-30) included the irq notifier code unconditionally in eventfd.c, while it was under CONFIG_HAVE_KVM_IRQCHIP before. Similarly, commit 297e21053a52 (KVM: Give IRQFD its own separate enabling Kconfig option, 2014-06-30) moved code from CONFIG_HAVE_IRQ_ROUTING to CONFIG_HAVE_KVM_IRQFD but forgot to move the pieces that used to be under CONFIG_HAVE_KVM_IRQCHIP. Together, this broke compilation without CONFIG_KVM_XICS. Fix by adding or changing the #ifdefs so that they point at CONFIG_HAVE_KVM_IRQFD. Signed-off-by: Paolo Bonzini (cherry picked from commit c77dcacb397519b6ade8f08201a4a90a7f4f751e) Signed-off-by: Christoffer Dall --- include/linux/kvm_host.h | 2 + include/trace/events/kvm.h | 8 +-- virt/kvm/eventfd.c | 122 ++++++++++++++++++------------------- virt/kvm/kvm_main.c | 2 + 4 files changed, 69 insertions(+), 65 deletions(-) diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 35319fe693f8..75d911ca47bd 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -364,6 +364,8 @@ struct kvm { */ struct kvm_irq_routing_table __rcu *irq_routing; struct hlist_head mask_notifier_list; +#endif +#ifdef CONFIG_HAVE_KVM_IRQFD struct hlist_head irq_ack_notifier_list; #endif diff --git a/include/trace/events/kvm.h b/include/trace/events/kvm.h index 131a0bda7aec..908925ace776 100644 --- a/include/trace/events/kvm.h +++ b/include/trace/events/kvm.h @@ -37,7 +37,7 @@ TRACE_EVENT(kvm_userspace_exit, __entry->errno < 0 ? -__entry->errno : __entry->reason) ); -#if defined(CONFIG_HAVE_KVM_IRQCHIP) +#if defined(CONFIG_HAVE_KVM_IRQFD) TRACE_EVENT(kvm_set_irq, TP_PROTO(unsigned int gsi, int level, int irq_source_id), TP_ARGS(gsi, level, irq_source_id), @@ -57,7 +57,7 @@ TRACE_EVENT(kvm_set_irq, TP_printk("gsi %u level %d source %d", __entry->gsi, __entry->level, __entry->irq_source_id) ); -#endif +#endif /* defined(CONFIG_HAVE_KVM_IRQFD) */ #if defined(__KVM_HAVE_IOAPIC) #define kvm_deliver_mode \ @@ -124,7 +124,7 @@ TRACE_EVENT(kvm_msi_set_irq, #endif /* defined(__KVM_HAVE_IOAPIC) */ -#if defined(CONFIG_HAVE_KVM_IRQCHIP) +#if defined(CONFIG_HAVE_KVM_IRQFD) TRACE_EVENT(kvm_ack_irq, TP_PROTO(unsigned int irqchip, unsigned int pin), @@ -149,7 +149,7 @@ TRACE_EVENT(kvm_ack_irq, #endif ); -#endif /* defined(CONFIG_HAVE_KVM_IRQCHIP) */ +#endif /* defined(CONFIG_HAVE_KVM_IRQFD) */ diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c index 8be5b6545770..67563284f7b9 100644 --- a/virt/kvm/eventfd.c +++ b/virt/kvm/eventfd.c @@ -445,6 +445,67 @@ kvm_irqfd_assign(struct kvm *kvm, struct kvm_irqfd *args) kfree(irqfd); return ret; } + +bool kvm_irq_has_notifier(struct kvm *kvm, unsigned irqchip, unsigned pin) +{ + struct kvm_irq_ack_notifier *kian; + int gsi, idx; + + idx = srcu_read_lock(&kvm->irq_srcu); + gsi = kvm_irq_map_chip_pin(kvm, irqchip, pin); + if (gsi != -1) + hlist_for_each_entry_rcu(kian, &kvm->irq_ack_notifier_list, + link) + if (kian->gsi == gsi) { + srcu_read_unlock(&kvm->irq_srcu, idx); + return true; + } + + srcu_read_unlock(&kvm->irq_srcu, idx); + + return false; +} +EXPORT_SYMBOL_GPL(kvm_irq_has_notifier); + +void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin) +{ + struct kvm_irq_ack_notifier *kian; + int gsi, idx; + + trace_kvm_ack_irq(irqchip, pin); + + idx = srcu_read_lock(&kvm->irq_srcu); + gsi = kvm_irq_map_chip_pin(kvm, irqchip, pin); + if (gsi != -1) + hlist_for_each_entry_rcu(kian, &kvm->irq_ack_notifier_list, + link) + if (kian->gsi == gsi) + kian->irq_acked(kian); + srcu_read_unlock(&kvm->irq_srcu, idx); +} + +void kvm_register_irq_ack_notifier(struct kvm *kvm, + struct kvm_irq_ack_notifier *kian) +{ + mutex_lock(&kvm->irq_lock); + hlist_add_head_rcu(&kian->link, &kvm->irq_ack_notifier_list); + mutex_unlock(&kvm->irq_lock); +#ifdef __KVM_HAVE_IOAPIC + kvm_vcpu_request_scan_ioapic(kvm); +#endif +} + +void kvm_unregister_irq_ack_notifier(struct kvm *kvm, + struct kvm_irq_ack_notifier *kian) +{ + mutex_lock(&kvm->irq_lock); + hlist_del_init_rcu(&kian->link); + mutex_unlock(&kvm->irq_lock); + synchronize_srcu(&kvm->irq_srcu); +#ifdef __KVM_HAVE_IOAPIC + kvm_vcpu_request_scan_ioapic(kvm); +#endif +} #endif void @@ -832,64 +893,3 @@ kvm_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args) return kvm_assign_ioeventfd(kvm, args); } - -bool kvm_irq_has_notifier(struct kvm *kvm, unsigned irqchip, unsigned pin) -{ - struct kvm_irq_ack_notifier *kian; - int gsi, idx; - - idx = srcu_read_lock(&kvm->irq_srcu); - gsi = kvm_irq_map_chip_pin(kvm, irqchip, pin); - if (gsi != -1) - hlist_for_each_entry_rcu(kian, &kvm->irq_ack_notifier_list, - link) - if (kian->gsi == gsi) { - srcu_read_unlock(&kvm->irq_srcu, idx); - return true; - } - - srcu_read_unlock(&kvm->irq_srcu, idx); - - return false; -} -EXPORT_SYMBOL_GPL(kvm_irq_has_notifier); - -void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin) -{ - struct kvm_irq_ack_notifier *kian; - int gsi, idx; - - trace_kvm_ack_irq(irqchip, pin); - - idx = srcu_read_lock(&kvm->irq_srcu); - gsi = kvm_irq_map_chip_pin(kvm, irqchip, pin); - if (gsi != -1) - hlist_for_each_entry_rcu(kian, &kvm->irq_ack_notifier_list, - link) - if (kian->gsi == gsi) - kian->irq_acked(kian); - srcu_read_unlock(&kvm->irq_srcu, idx); -} - -void kvm_register_irq_ack_notifier(struct kvm *kvm, - struct kvm_irq_ack_notifier *kian) -{ - mutex_lock(&kvm->irq_lock); - hlist_add_head_rcu(&kian->link, &kvm->irq_ack_notifier_list); - mutex_unlock(&kvm->irq_lock); -#ifdef __KVM_HAVE_IOAPIC - kvm_vcpu_request_scan_ioapic(kvm); -#endif -} - -void kvm_unregister_irq_ack_notifier(struct kvm *kvm, - struct kvm_irq_ack_notifier *kian) -{ - mutex_lock(&kvm->irq_lock); - hlist_del_init_rcu(&kian->link); - mutex_unlock(&kvm->irq_lock); - synchronize_srcu(&kvm->irq_srcu); -#ifdef __KVM_HAVE_IOAPIC - kvm_vcpu_request_scan_ioapic(kvm); -#endif -} diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 848a6afab165..50f947301fa7 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -464,6 +464,8 @@ static struct kvm *kvm_create_vm(unsigned long type) #ifdef CONFIG_HAVE_KVM_IRQCHIP INIT_HLIST_HEAD(&kvm->mask_notifier_list); +#endif +#ifdef CONFIG_HAVE_KVM_IRQFD INIT_HLIST_HEAD(&kvm->irq_ack_notifier_list); #endif From 7fdbe25673a0bcd4c38968ec2cd048a21f1fbb93 Mon Sep 17 00:00:00 2001 From: Christian Borntraeger Date: Tue, 19 Aug 2014 16:45:56 +0200 Subject: [PATCH 1320/1368] KVM: avoid unnecessary synchronize_rcu We dont have to wait for a grace period if there is no oldpid that we are going to free. putpid also checks for NULL, so this patch only fences synchronize_rcu. Signed-off-by: Christian Borntraeger Signed-off-by: Paolo Bonzini (cherry picked from commit 7103f60de8bed21a0ad5d15d2ad5b7a333dda201) Signed-off-by: Christoffer Dall --- virt/kvm/kvm_main.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 50f947301fa7..aa5057e7a1b9 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -129,7 +129,8 @@ int vcpu_load(struct kvm_vcpu *vcpu) struct pid *oldpid = vcpu->pid; struct pid *newpid = get_task_pid(current, PIDTYPE_PID); rcu_assign_pointer(vcpu->pid, newpid); - synchronize_rcu(); + if (oldpid) + synchronize_rcu(); put_pid(oldpid); } cpu = get_cpu(); From ae38cfa961f9605be43060001ccc41d39427cb64 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Radim=20Kr=C4=8Dm=C3=A1=C5=99?= Date: Thu, 21 Aug 2014 18:08:05 +0200 Subject: [PATCH 1321/1368] KVM: add kvm_arch_sched_in MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Introduce preempt notifiers for architecture specific code. Advantage over creating a new notifier in every arch is slightly simpler code and guaranteed call order with respect to kvm_sched_in. Signed-off-by: Radim Krčmář Signed-off-by: Paolo Bonzini (cherry picked from commit e790d9ef6405633b007339d746b709aed43a928d) Signed-off-by: Christoffer Dall --- arch/arm/kvm/arm.c | 4 ++++ arch/powerpc/kvm/powerpc.c | 4 ++++ arch/s390/kvm/kvm-s390.c | 4 ++++ arch/x86/kvm/x86.c | 4 ++++ include/linux/kvm_host.h | 2 ++ virt/kvm/kvm_main.c | 2 ++ 6 files changed, 20 insertions(+) diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c index 0e3d3dc8eea2..d55786c579bf 100644 --- a/arch/arm/kvm/arm.c +++ b/arch/arm/kvm/arm.c @@ -288,6 +288,10 @@ void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) { } +void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) +{ +} + void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) { vcpu->cpu = cpu; diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index aacf3e35404f..e4c719d49e16 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -533,6 +533,10 @@ void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) kvmppc_subarch_vcpu_uninit(vcpu); } +void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) +{ +} + void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) { #ifdef CONFIG_BOOKE diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 9133f869b070..46392afd043d 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -325,6 +325,10 @@ void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) /* Nothing todo */ } +void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) +{ +} + void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) { save_fp_regs(&vcpu->arch.host_fpregs); diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 46a8c74fd431..a348daad61fd 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -6809,6 +6809,10 @@ void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) static_key_slow_dec(&kvm_no_apic_vcpu); } +void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) +{ +} + int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) { if (type) diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 75d911ca47bd..4c12f314aab2 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -600,6 +600,8 @@ void kvm_arch_exit(void); int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu); void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu); +void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu); + void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu); void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu); void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu); diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index aa5057e7a1b9..2659da911d99 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -3063,6 +3063,8 @@ static void kvm_sched_in(struct preempt_notifier *pn, int cpu) if (vcpu->preempted) vcpu->preempted = false; + kvm_arch_sched_in(vcpu, cpu); + kvm_arch_vcpu_load(vcpu, cpu); } From 7ec68cffdc4807a95ae1db300bf857084f0c94da Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Tue, 19 Aug 2014 12:15:00 +0200 Subject: [PATCH 1322/1368] KVM: Introduce gfn_to_hva_memslot_prot To support read-only memory regions on arm and arm64, we have a need to resolve a gfn to an hva given a pointer to a memslot to avoid looping through the memslots twice and to reuse the hva error checking of gfn_to_hva_prot(), add a new gfn_to_hva_memslot_prot() function and refactor gfn_to_hva_prot() to use this function. Acked-by: Marc Zyngier Signed-off-by: Christoffer Dall (cherry picked from commit 64d831269ccbca1fc6d739a0f3c8aa24afb43a5e) Signed-off-by: Christoffer Dall --- include/linux/kvm_host.h | 2 ++ virt/kvm/kvm_main.c | 11 +++++++++-- 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 4c12f314aab2..59d50379c2e4 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -504,6 +504,8 @@ struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn); unsigned long gfn_to_hva(struct kvm *kvm, gfn_t gfn); unsigned long gfn_to_hva_prot(struct kvm *kvm, gfn_t gfn, bool *writable); unsigned long gfn_to_hva_memslot(struct kvm_memory_slot *slot, gfn_t gfn); +unsigned long gfn_to_hva_memslot_prot(struct kvm_memory_slot *slot, gfn_t gfn, + bool *writable); void kvm_release_page_clean(struct page *page); void kvm_release_page_dirty(struct page *page); void kvm_set_page_accessed(struct page *page); diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 2659da911d99..84cee09da140 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -1076,9 +1076,9 @@ EXPORT_SYMBOL_GPL(gfn_to_hva); * If writable is set to false, the hva returned by this function is only * allowed to be read. */ -unsigned long gfn_to_hva_prot(struct kvm *kvm, gfn_t gfn, bool *writable) +unsigned long gfn_to_hva_memslot_prot(struct kvm_memory_slot *slot, + gfn_t gfn, bool *writable) { - struct kvm_memory_slot *slot = gfn_to_memslot(kvm, gfn); unsigned long hva = __gfn_to_hva_many(slot, gfn, NULL, false); if (!kvm_is_error_hva(hva) && writable) @@ -1087,6 +1087,13 @@ unsigned long gfn_to_hva_prot(struct kvm *kvm, gfn_t gfn, bool *writable) return hva; } +unsigned long gfn_to_hva_prot(struct kvm *kvm, gfn_t gfn, bool *writable) +{ + struct kvm_memory_slot *slot = gfn_to_memslot(kvm, gfn); + + return gfn_to_hva_memslot_prot(slot, gfn, writable); +} + static int kvm_read_hva(void *data, void __user *hva, int len) { return __copy_from_user(data, hva, len); From 7e7ef8405025cd7af730fce40ace2a9c77ee9970 Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Tue, 19 Aug 2014 12:18:04 +0200 Subject: [PATCH 1323/1368] arm/arm64: KVM: Support KVM_CAP_READONLY_MEM When userspace loads code and data in a read-only memory regions, KVM needs to be able to handle this on arm and arm64. Specifically this is used when running code directly from a read-only flash device; the common scenario is a UEFI blob loaded with the -bios option in QEMU. Note that the MMIO exit on writes to a read-only memory is ABI and can be used to emulate block-erase style flash devices. Acked-by: Marc Zyngier Signed-off-by: Christoffer Dall (cherry picked from commit 98047888bb9fd57734028c44ec17413ddd623958) Signed-off-by: Christoffer Dall --- arch/arm/include/uapi/asm/kvm.h | 1 + arch/arm/kvm/arm.c | 1 + arch/arm/kvm/mmu.c | 22 ++++++++-------------- arch/arm64/include/uapi/asm/kvm.h | 1 + 4 files changed, 11 insertions(+), 14 deletions(-) diff --git a/arch/arm/include/uapi/asm/kvm.h b/arch/arm/include/uapi/asm/kvm.h index e6ebdd3471e5..51257fda254b 100644 --- a/arch/arm/include/uapi/asm/kvm.h +++ b/arch/arm/include/uapi/asm/kvm.h @@ -25,6 +25,7 @@ #define __KVM_HAVE_GUEST_DEBUG #define __KVM_HAVE_IRQ_LINE +#define __KVM_HAVE_READONLY_MEM #define KVM_REG_SIZE(id) \ (1U << (((id) & KVM_REG_SIZE_MASK) >> KVM_REG_SIZE_SHIFT)) diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c index d55786c579bf..35dc889df3d2 100644 --- a/arch/arm/kvm/arm.c +++ b/arch/arm/kvm/arm.c @@ -188,6 +188,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) case KVM_CAP_ONE_REG: case KVM_CAP_ARM_PSCI: case KVM_CAP_ARM_PSCI_0_2: + case KVM_CAP_READONLY_MEM: r = 1; break; case KVM_CAP_COALESCED_MMIO: diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c index 16e7994bf347..62f5642153f9 100644 --- a/arch/arm/kvm/mmu.c +++ b/arch/arm/kvm/mmu.c @@ -747,14 +747,13 @@ static bool transparent_hugepage_adjust(pfn_t *pfnp, phys_addr_t *ipap) } static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, - struct kvm_memory_slot *memslot, + struct kvm_memory_slot *memslot, unsigned long hva, unsigned long fault_status) { int ret; bool write_fault, writable, hugetlb = false, force_pte = false; unsigned long mmu_seq; gfn_t gfn = fault_ipa >> PAGE_SHIFT; - unsigned long hva = gfn_to_hva(vcpu->kvm, gfn); struct kvm *kvm = vcpu->kvm; struct kvm_mmu_memory_cache *memcache = &vcpu->arch.mmu_page_cache; struct vm_area_struct *vma; @@ -863,7 +862,8 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run) unsigned long fault_status; phys_addr_t fault_ipa; struct kvm_memory_slot *memslot; - bool is_iabt; + unsigned long hva; + bool is_iabt, write_fault, writable; gfn_t gfn; int ret, idx; @@ -884,7 +884,10 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run) idx = srcu_read_lock(&vcpu->kvm->srcu); gfn = fault_ipa >> PAGE_SHIFT; - if (!kvm_is_visible_gfn(vcpu->kvm, gfn)) { + memslot = gfn_to_memslot(vcpu->kvm, gfn); + hva = gfn_to_hva_memslot_prot(memslot, gfn, &writable); + write_fault = kvm_is_write_fault(kvm_vcpu_get_hsr(vcpu)); + if (kvm_is_error_hva(hva) || (write_fault && !writable)) { if (is_iabt) { /* Prefetch Abort on I/O address */ kvm_inject_pabt(vcpu, kvm_vcpu_get_hfar(vcpu)); @@ -892,13 +895,6 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run) goto out_unlock; } - if (fault_status != FSC_FAULT) { - kvm_err("Unsupported fault status on io memory: %#lx\n", - fault_status); - ret = -EFAULT; - goto out_unlock; - } - /* * The IPA is reported as [MAX:12], so we need to * complement it with the bottom 12 bits from the @@ -910,9 +906,7 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run) goto out_unlock; } - memslot = gfn_to_memslot(vcpu->kvm, gfn); - - ret = user_mem_abort(vcpu, fault_ipa, memslot, fault_status); + ret = user_mem_abort(vcpu, fault_ipa, memslot, hva, fault_status); if (ret == 0) ret = 1; out_unlock: diff --git a/arch/arm64/include/uapi/asm/kvm.h b/arch/arm64/include/uapi/asm/kvm.h index e633ff8cdec8..f4ec5a674d05 100644 --- a/arch/arm64/include/uapi/asm/kvm.h +++ b/arch/arm64/include/uapi/asm/kvm.h @@ -37,6 +37,7 @@ #define __KVM_HAVE_GUEST_DEBUG #define __KVM_HAVE_IRQ_LINE +#define __KVM_HAVE_READONLY_MEM #define KVM_REG_SIZE(id) \ (1U << (((id) & KVM_REG_SIZE_MASK) >> KVM_REG_SIZE_SHIFT)) From 276359920df8032e689497456c0a3c6dcc1b5100 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 26 Aug 2014 15:13:20 +0100 Subject: [PATCH 1324/1368] KVM: ARM/arm64: fix non-const declaration of function returning const Sparse kicks up about a type mismatch for kvm_target_cpu: arch/arm64/kvm/guest.c:271:25: error: symbol 'kvm_target_cpu' redeclared with different type (originally declared at ./arch/arm64/include/asm/kvm_host.h:45) - different modifiers so fix this by adding the missing const attribute to the function declaration. Cc: Christoffer Dall Cc: Marc Zyngier Signed-off-by: Will Deacon Signed-off-by: Christoffer Dall (cherry picked from commit 6951e48bff0b55d2a8e825a953fc1f8e3a34bf1c) Signed-off-by: Christoffer Dall --- arch/arm/include/asm/kvm_host.h | 2 +- arch/arm64/include/asm/kvm_host.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h index 6dfb404f6c46..fcb12a6f7db5 100644 --- a/arch/arm/include/asm/kvm_host.h +++ b/arch/arm/include/asm/kvm_host.h @@ -42,7 +42,7 @@ struct kvm_vcpu; u32 *kvm_vcpu_reg(struct kvm_vcpu *vcpu, u8 reg_num, u32 mode); -int kvm_target_cpu(void); +int __attribute_const__ kvm_target_cpu(void); int kvm_reset_vcpu(struct kvm_vcpu *vcpu); void kvm_reset_coprocs(struct kvm_vcpu *vcpu); diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index e10c45a578e3..44094e559848 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -42,7 +42,7 @@ #define KVM_VCPU_MAX_FEATURES 3 struct kvm_vcpu; -int kvm_target_cpu(void); +int __attribute_const__ kvm_target_cpu(void); int kvm_reset_vcpu(struct kvm_vcpu *vcpu); int kvm_arch_dev_ioctl_check_extension(long ext); From 0251cb8ae768874a988c39a908fbcc8dfd82429a Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 26 Aug 2014 15:13:21 +0100 Subject: [PATCH 1325/1368] KVM: ARM/arm64: fix broken __percpu annotation Running sparse results in a bunch of noisy address space mismatches thanks to the broken __percpu annotation on kvm_get_running_vcpus. This function returns a pcpu pointer to a pointer, not a pointer to a pcpu pointer. This patch fixes the annotation, which kills the warnings from sparse. Cc: Christoffer Dall Cc: Marc Zyngier Signed-off-by: Will Deacon Signed-off-by: Christoffer Dall (cherry picked from commit 4000be423cb01a8d09de878bb8184511c49d4238) Signed-off-by: Christoffer Dall --- arch/arm/kvm/arm.c | 2 +- arch/arm64/include/asm/kvm_host.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c index 35dc889df3d2..79268a12a49e 100644 --- a/arch/arm/kvm/arm.c +++ b/arch/arm/kvm/arm.c @@ -82,7 +82,7 @@ struct kvm_vcpu *kvm_arm_get_running_vcpu(void) /** * kvm_arm_get_running_vcpus - get the per-CPU array of currently running vcpus. */ -struct kvm_vcpu __percpu **kvm_get_running_vcpus(void) +struct kvm_vcpu * __percpu *kvm_get_running_vcpus(void) { return &kvm_arm_running_vcpu; } diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 44094e559848..50431d36732b 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -193,7 +193,7 @@ static inline int kvm_test_age_hva(struct kvm *kvm, unsigned long hva) } struct kvm_vcpu *kvm_arm_get_running_vcpu(void); -struct kvm_vcpu __percpu **kvm_get_running_vcpus(void); +struct kvm_vcpu * __percpu *kvm_get_running_vcpus(void); u64 kvm_call_hyp(void *hypfn, ...); From 563d813862bf2c44e1e9f0d59facf8e0ce303355 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 26 Aug 2014 15:13:22 +0100 Subject: [PATCH 1326/1368] KVM: ARM/arm64: avoid returning negative error code as bool is_valid_cache returns true if the specified cache is valid. Unfortunately, if the parameter passed it out of range, we return -ENOENT, which ends up as true leading to potential hilarity. This patch returns false on the failure path instead. Cc: Christoffer Dall Cc: Marc Zyngier Signed-off-by: Will Deacon Signed-off-by: Christoffer Dall (cherry picked from commit 18d457661fb9fa69352822ab98d39331c3d0e571) Signed-off-by: Christoffer Dall --- arch/arm/kvm/coproc.c | 2 +- arch/arm64/kvm/sys_regs.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm/kvm/coproc.c b/arch/arm/kvm/coproc.c index 37a0fe1bb9bb..7928dbdf2102 100644 --- a/arch/arm/kvm/coproc.c +++ b/arch/arm/kvm/coproc.c @@ -791,7 +791,7 @@ static bool is_valid_cache(u32 val) u32 level, ctype; if (val >= CSSELR_MAX) - return -ENOENT; + return false; /* Bottom bit is Instruction or Data bit. Next 3 bits are level. */ level = (val >> 1); diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index 5805e7c4a4dd..4cc3b719208e 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -1218,7 +1218,7 @@ static bool is_valid_cache(u32 val) u32 level, ctype; if (val >= CSSELR_MAX) - return -ENOENT; + return false; /* Bottom bit is Instruction or Data bit. Next 3 bits are level. */ level = (val >> 1); From 1453b5c1052996b983f6d9ae043dec64e089c0c8 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 26 Aug 2014 15:13:23 +0100 Subject: [PATCH 1327/1368] KVM: ARM/arm64: return -EFAULT if copy_from_user fails in set_timer_reg We currently return the number of bytes not copied if set_timer_reg fails, which is almost certainly not what userspace would like. This patch returns -EFAULT instead. Cc: Christoffer Dall Cc: Marc Zyngier Signed-off-by: Will Deacon Signed-off-by: Christoffer Dall (cherry picked from commit bd218bce92d3868ba4fe5e9e3eb8199d2aa614af) Signed-off-by: Christoffer Dall --- arch/arm/kvm/guest.c | 2 +- arch/arm64/kvm/guest.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm/kvm/guest.c b/arch/arm/kvm/guest.c index 813e49258690..cc0b78769bd8 100644 --- a/arch/arm/kvm/guest.c +++ b/arch/arm/kvm/guest.c @@ -163,7 +163,7 @@ static int set_timer_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) ret = copy_from_user(&val, uaddr, KVM_REG_SIZE(reg->id)); if (ret != 0) - return ret; + return -EFAULT; return kvm_arm_timer_set_reg(vcpu, reg->id, val); } diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c index 8d1ec2887a26..76794692c20b 100644 --- a/arch/arm64/kvm/guest.c +++ b/arch/arm64/kvm/guest.c @@ -174,7 +174,7 @@ static int set_timer_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) ret = copy_from_user(&val, uaddr, KVM_REG_SIZE(reg->id)); if (ret != 0) - return ret; + return -EFAULT; return kvm_arm_timer_set_reg(vcpu, reg->id, val); } From 02b1b15d1265067d246eb8017c27c75d2fac5dae Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 26 Aug 2014 15:13:24 +0100 Subject: [PATCH 1328/1368] KVM: vgic: return int instead of bool when checking I/O ranges vgic_ioaddr_overlap claims to return a bool, but in reality it returns an int. Shut sparse up by fixing the type signature. Cc: Christoffer Dall Cc: Marc Zyngier Signed-off-by: Will Deacon Signed-off-by: Christoffer Dall (cherry picked from commit 1fa451bcc67fa921a04c5fac8dbcde7844d54512) Signed-off-by: Christoffer Dall --- virt/kvm/arm/vgic.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c index 73eba793b17f..d1cfe672b9d7 100644 --- a/virt/kvm/arm/vgic.c +++ b/virt/kvm/arm/vgic.c @@ -1690,7 +1690,7 @@ int kvm_vgic_create(struct kvm *kvm) return ret; } -static bool vgic_ioaddr_overlap(struct kvm *kvm) +static int vgic_ioaddr_overlap(struct kvm *kvm) { phys_addr_t dist = kvm->arch.vgic.vgic_dist_base; phys_addr_t cpu = kvm->arch.vgic.vgic_cpu_base; From 8ec715912ec282c0c560407fe72ae80b14c8d320 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 26 Aug 2014 15:13:25 +0100 Subject: [PATCH 1329/1368] KVM: vgic: declare probe function pointer as const We extract the vgic probe function from the of_device_id data pointer, which is const. Kill the sparse warning by ensuring that the local function pointer is also marked as const. Cc: Marc Zyngier Signed-off-by: Will Deacon Signed-off-by: Christoffer Dall (cherry picked from commit de56fb1923ca11f428bf557870e0faa99f38762e) Signed-off-by: Christoffer Dall --- virt/kvm/arm/vgic.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c index d1cfe672b9d7..efe6eee2e7eb 100644 --- a/virt/kvm/arm/vgic.c +++ b/virt/kvm/arm/vgic.c @@ -1557,8 +1557,8 @@ static const struct of_device_id vgic_ids[] = { int kvm_vgic_hyp_init(void) { const struct of_device_id *matched_id; - int (*vgic_probe)(struct device_node *,const struct vgic_ops **, - const struct vgic_params **); + const int (*vgic_probe)(struct device_node *,const struct vgic_ops **, + const struct vgic_params **); struct device_node *vgic_node; int ret; From d3b49fbe5764b06372df1dea9c9bea5b8373f49c Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Tue, 26 Aug 2014 14:00:37 +0200 Subject: [PATCH 1330/1368] KVM: Unconditionally export KVM_CAP_READONLY_MEM The idea between capabilities and the KVM_CHECK_EXTENSION ioctl is that userspace can, at run-time, determine if a feature is supported or not. This allows KVM to being supporting a new feature with a new kernel version without any need to update user space. Unfortunately, since the definition of KVM_CAP_READONLY_MEM was guarded by #ifdef __KVM_HAVE_READONLY_MEM, such discovery still required a user space update. Therefore, unconditionally export KVM_CAP_READONLY_MEM and change the in-kernel conditional to rely on __KVM_HAVE_READONLY_MEM. Signed-off-by: Christoffer Dall Signed-off-by: Paolo Bonzini (cherry picked from commit 0f8a4de3e088797576ac76200b634b802e5c7781) Signed-off-by: Christoffer Dall --- include/uapi/linux/kvm.h | 2 -- virt/kvm/kvm_main.c | 2 +- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 1af686a82703..023ec6132966 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -662,9 +662,7 @@ struct kvm_ppc_smmu_info { #define KVM_CAP_PPC_GET_SMMU_INFO 78 #define KVM_CAP_S390_COW 79 #define KVM_CAP_PPC_ALLOC_HTAB 80 -#ifdef __KVM_HAVE_READONLY_MEM #define KVM_CAP_READONLY_MEM 81 -#endif #define KVM_CAP_IRQFD_RESAMPLE 82 #define KVM_CAP_PPC_BOOKE_WATCHDOG 83 #define KVM_CAP_PPC_HTAB_FD 84 diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 84cee09da140..0cb02c749622 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -709,7 +709,7 @@ static int check_memory_region_flags(struct kvm_userspace_memory_region *mem) { u32 valid_flags = KVM_MEM_LOG_DIRTY_PAGES; -#ifdef KVM_CAP_READONLY_MEM +#ifdef __KVM_HAVE_READONLY_MEM valid_flags |= KVM_MEM_READONLY; #endif From 177e51e89403b6843d068a1dae873650053b32a4 Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Tue, 26 Aug 2014 14:00:38 +0200 Subject: [PATCH 1331/1368] KVM: Unconditionally export KVM_CAP_USER_NMI The idea between capabilities and the KVM_CHECK_EXTENSION ioctl is that userspace can, at run-time, determine if a feature is supported or not. This allows KVM to being supporting a new feature with a new kernel version without any need to update user space. Unfortunately, since the definition of KVM_CAP_USER_NMI was guarded by #ifdef __KVM_HAVE_USER_NMI, such discovery still required a user space update. Therefore, unconditionally export KVM_CAP_USER_NMI and change the the typo in the comment for the IOCTL number definition as well. Signed-off-by: Christoffer Dall Signed-off-by: Paolo Bonzini (cherry picked from commit 44b5ce73c99c389817be71b9161bceb197d40ecb) Signed-off-by: Christoffer Dall --- include/uapi/linux/kvm.h | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 023ec6132966..bbc1f8a09eb8 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -578,9 +578,7 @@ struct kvm_ppc_smmu_info { #endif /* Bug in KVM_SET_USER_MEMORY_REGION fixed: */ #define KVM_CAP_DESTROY_MEMORY_REGION_WORKS 21 -#ifdef __KVM_HAVE_USER_NMI #define KVM_CAP_USER_NMI 22 -#endif #ifdef __KVM_HAVE_GUEST_DEBUG #define KVM_CAP_SET_GUEST_DEBUG 23 #endif @@ -995,7 +993,7 @@ struct kvm_s390_ucas_mapping { #define KVM_S390_INITIAL_RESET _IO(KVMIO, 0x97) #define KVM_GET_MP_STATE _IOR(KVMIO, 0x98, struct kvm_mp_state) #define KVM_SET_MP_STATE _IOW(KVMIO, 0x99, struct kvm_mp_state) -/* Available with KVM_CAP_NMI */ +/* Available with KVM_CAP_USER_NMI */ #define KVM_NMI _IO(KVMIO, 0x9a) /* Available with KVM_CAP_SET_GUEST_DEBUG */ #define KVM_SET_GUEST_DEBUG _IOW(KVMIO, 0x9b, struct kvm_guest_debug) From 48bc31d532d18a5e64c5ef1f20be161d0b834d11 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Fri, 29 Aug 2014 14:01:17 +0200 Subject: [PATCH 1332/1368] KVM: forward declare structs in kvm_types.h Opaque KVM structs are useful for prototypes in asm/kvm_host.h, to avoid "'struct foo' declared inside parameter list" warnings (and consequent breakage due to conflicting types). Move them from individual files to a generic place in linux/kvm_types.h. Signed-off-by: Paolo Bonzini (cherry picked from commit 656473003bc7e056c3bbd4a4d9832dad01e86f76) Signed-off-by: Christoffer Dall --- arch/arm/include/asm/kvm_host.h | 7 ++----- arch/arm64/include/asm/kvm_host.h | 6 ++---- arch/ia64/include/asm/kvm_host.h | 3 --- arch/mips/include/asm/kvm_host.h | 5 ----- arch/powerpc/include/asm/kvm_host.h | 5 ----- arch/s390/include/asm/kvm_host.h | 3 +++ arch/x86/include/asm/kvm_host.h | 4 ---- include/linux/kvm_host.h | 6 ------ include/linux/kvm_types.h | 14 ++++++++++++++ 9 files changed, 21 insertions(+), 32 deletions(-) diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h index fcb12a6f7db5..0bc5295bbfdf 100644 --- a/arch/arm/include/asm/kvm_host.h +++ b/arch/arm/include/asm/kvm_host.h @@ -19,6 +19,8 @@ #ifndef __ARM_KVM_HOST_H__ #define __ARM_KVM_HOST_H__ +#include +#include #include #include #include @@ -40,7 +42,6 @@ #include -struct kvm_vcpu; u32 *kvm_vcpu_reg(struct kvm_vcpu *vcpu, u8 reg_num, u32 mode); int __attribute_const__ kvm_target_cpu(void); int kvm_reset_vcpu(struct kvm_vcpu *vcpu); @@ -149,20 +150,17 @@ struct kvm_vcpu_stat { u32 halt_wakeup; }; -struct kvm_vcpu_init; int kvm_vcpu_set_target(struct kvm_vcpu *vcpu, const struct kvm_vcpu_init *init); int kvm_vcpu_preferred_target(struct kvm_vcpu_init *init); unsigned long kvm_arm_num_regs(struct kvm_vcpu *vcpu); int kvm_arm_copy_reg_indices(struct kvm_vcpu *vcpu, u64 __user *indices); -struct kvm_one_reg; int kvm_arm_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg); int kvm_arm_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg); u64 kvm_call_hyp(void *hypfn, ...); void force_vm_exit(const cpumask_t *mask); #define KVM_ARCH_WANT_MMU_NOTIFIER -struct kvm; int kvm_unmap_hva(struct kvm *kvm, unsigned long hva); int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end); @@ -187,7 +185,6 @@ struct kvm_vcpu __percpu **kvm_get_running_vcpus(void); int kvm_arm_copy_coproc_indices(struct kvm_vcpu *vcpu, u64 __user *uindices); unsigned long kvm_arm_num_coproc_regs(struct kvm_vcpu *vcpu); -struct kvm_one_reg; int kvm_arm_coproc_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *); int kvm_arm_coproc_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *); diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 50431d36732b..ac99093d004e 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -22,6 +22,8 @@ #ifndef __ARM64_KVM_HOST_H__ #define __ARM64_KVM_HOST_H__ +#include +#include #include #include #include @@ -41,7 +43,6 @@ #define KVM_VCPU_MAX_FEATURES 3 -struct kvm_vcpu; int __attribute_const__ kvm_target_cpu(void); int kvm_reset_vcpu(struct kvm_vcpu *vcpu); int kvm_arch_dev_ioctl_check_extension(long ext); @@ -164,18 +165,15 @@ struct kvm_vcpu_stat { u32 halt_wakeup; }; -struct kvm_vcpu_init; int kvm_vcpu_set_target(struct kvm_vcpu *vcpu, const struct kvm_vcpu_init *init); int kvm_vcpu_preferred_target(struct kvm_vcpu_init *init); unsigned long kvm_arm_num_regs(struct kvm_vcpu *vcpu); int kvm_arm_copy_reg_indices(struct kvm_vcpu *vcpu, u64 __user *indices); -struct kvm_one_reg; int kvm_arm_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg); int kvm_arm_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg); #define KVM_ARCH_WANT_MMU_NOTIFIER -struct kvm; int kvm_unmap_hva(struct kvm *kvm, unsigned long hva); int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end); diff --git a/arch/ia64/include/asm/kvm_host.h b/arch/ia64/include/asm/kvm_host.h index 989dd3fe8de1..65088aa016c0 100644 --- a/arch/ia64/include/asm/kvm_host.h +++ b/arch/ia64/include/asm/kvm_host.h @@ -238,9 +238,6 @@ struct kvm_vm_data { #define KVM_NR_PAGE_SIZES 1 #define KVM_PAGES_PER_HPAGE(x) 1 -struct kvm; -struct kvm_vcpu; - struct kvm_mmio_req { uint64_t addr; /* physical address */ uint64_t size; /* size in bytes */ diff --git a/arch/mips/include/asm/kvm_host.h b/arch/mips/include/asm/kvm_host.h index 4d6fa0bf1305..d56ee50d6885 100644 --- a/arch/mips/include/asm/kvm_host.h +++ b/arch/mips/include/asm/kvm_host.h @@ -71,11 +71,6 @@ #define CAUSEB_DC 27 #define CAUSEF_DC (_ULCAST_(1) << 27) -struct kvm; -struct kvm_run; -struct kvm_vcpu; -struct kvm_interrupt; - extern atomic_t kvm_mips_instance; extern pfn_t(*kvm_mips_gfn_to_pfn) (struct kvm *kvm, gfn_t gfn); extern void (*kvm_mips_release_pfn_clean) (pfn_t pfn); diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index af326cde7cb6..0a3238026fd7 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -53,7 +53,6 @@ #define KVM_ARCH_WANT_MMU_NOTIFIER -struct kvm; extern int kvm_unmap_hva(struct kvm *kvm, unsigned long hva); extern int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end); @@ -81,10 +80,6 @@ extern void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte); /* Physical Address Mask - allowed range of real mode RAM access */ #define KVM_PAM 0x0fffffffffffffffULL -struct kvm; -struct kvm_run; -struct kvm_vcpu; - struct lppaca; struct slb_shadow; struct dtl_entry; diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h index 16bd5d169cdb..42913475fc6c 100644 --- a/arch/s390/include/asm/kvm_host.h +++ b/arch/s390/include/asm/kvm_host.h @@ -13,8 +13,11 @@ #ifndef ASM_KVM_HOST_H #define ASM_KVM_HOST_H + +#include #include #include +#include #include #include #include diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 53db582487c9..bd6f3529453d 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -103,10 +103,6 @@ static inline gfn_t gfn_to_index(gfn_t gfn, gfn_t base_gfn, int level) #define ASYNC_PF_PER_VCPU 64 -struct kvm_vcpu; -struct kvm; -struct kvm_async_pf; - enum kvm_reg { VCPU_REGS_RAX = 0, VCPU_REGS_RCX = 1, diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 59d50379c2e4..b8cb3c2d893e 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -129,8 +129,6 @@ static inline bool is_error_page(struct page *page) #define KVM_USERSPACE_IRQ_SOURCE_ID 0 #define KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID 1 -struct kvm; -struct kvm_vcpu; extern struct kmem_cache *kvm_vcpu_cache; extern spinlock_t kvm_lock; @@ -301,8 +299,6 @@ struct kvm_kernel_irq_routing_entry { struct hlist_node link; }; -struct kvm_irq_routing_table; - #ifndef KVM_PRIVATE_MEM_SLOTS #define KVM_PRIVATE_MEM_SLOTS 0 #endif @@ -994,8 +990,6 @@ static inline bool kvm_check_request(int req, struct kvm_vcpu *vcpu) extern bool kvm_rebooting; -struct kvm_device_ops; - struct kvm_device { struct kvm_device_ops *ops; struct kvm *kvm; diff --git a/include/linux/kvm_types.h b/include/linux/kvm_types.h index b0bcce0ddc95..b606bb689a3e 100644 --- a/include/linux/kvm_types.h +++ b/include/linux/kvm_types.h @@ -17,6 +17,20 @@ #ifndef __KVM_TYPES_H__ #define __KVM_TYPES_H__ +struct kvm; +struct kvm_async_pf; +struct kvm_device_ops; +struct kvm_interrupt; +struct kvm_irq_routing_table; +struct kvm_memory_slot; +struct kvm_one_reg; +struct kvm_run; +struct kvm_userspace_memory_region; +struct kvm_vcpu; +struct kvm_vcpu_init; + +enum kvm_mr_change; + #include /* From 3dcac226202ec154b5c29bcd968e7b894e61c031 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Radim=20Kr=C4=8Dm=C3=A1=C5=99?= Date: Thu, 28 Aug 2014 15:13:02 +0200 Subject: [PATCH 1333/1368] KVM: static inline empty kvm_arch functions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Using static inline is going to save few bytes and cycles. For example on powerpc, the difference is 700 B after stripping. (5 kB before) This patch also deals with two overlooked empty functions: kvm_arch_flush_shadow was not removed from arch/mips/kvm/mips.c 2df72e9bc KVM: split kvm_arch_flush_shadow and kvm_arch_sched_in never made it into arch/ia64/kvm/kvm-ia64.c. e790d9ef6 KVM: add kvm_arch_sched_in Signed-off-by: Radim Krčmář Signed-off-by: Paolo Bonzini (cherry picked from commit 0865e636aef751966e6e0f8950a26bc7391e923c) Signed-off-by: Christoffer Dall --- arch/arm/include/asm/kvm_host.h | 6 ++++ arch/arm/kvm/arm.c | 19 ------------- arch/arm64/include/asm/kvm_host.h | 6 ++++ arch/ia64/include/asm/kvm_host.h | 12 ++++++++ arch/ia64/kvm/kvm-ia64.c | 30 -------------------- arch/mips/include/asm/kvm_host.h | 11 ++++++++ arch/powerpc/include/asm/kvm_host.h | 8 ++++++ arch/powerpc/kvm/powerpc.c | 28 ------------------- arch/s390/include/asm/kvm_host.h | 14 ++++++++++ arch/s390/kvm/kvm-s390.c | 43 ----------------------------- 10 files changed, 57 insertions(+), 120 deletions(-) diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h index 0bc5295bbfdf..2b17f6ab9642 100644 --- a/arch/arm/include/asm/kvm_host.h +++ b/arch/arm/include/asm/kvm_host.h @@ -230,4 +230,10 @@ static inline void vgic_arch_setup(const struct vgic_params *vgic) int kvm_perf_init(void); int kvm_perf_teardown(void); +static inline void kvm_arch_hardware_disable(void *garbage) {} +static inline void kvm_arch_hardware_unsetup(void) {} +static inline void kvm_arch_sync_events(struct kvm *kvm) {} +static inline void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) {} +static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {} + #endif /* __ARM_KVM_HOST_H__ */ diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c index 79268a12a49e..882f7e856b6b 100644 --- a/arch/arm/kvm/arm.c +++ b/arch/arm/kvm/arm.c @@ -97,27 +97,16 @@ int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu) return kvm_vcpu_exiting_guest_mode(vcpu) == IN_GUEST_MODE; } -void kvm_arch_hardware_disable(void *garbage) -{ -} - int kvm_arch_hardware_setup(void) { return 0; } -void kvm_arch_hardware_unsetup(void) -{ -} - void kvm_arch_check_processor_compat(void *rtn) { *(int *)rtn = 0; } -void kvm_arch_sync_events(struct kvm *kvm) -{ -} /** * kvm_arch_init_vm - initializes a VM data structure @@ -285,14 +274,6 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) return 0; } -void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) -{ -} - -void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) -{ -} - void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) { vcpu->cpu = cpu; diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index ac99093d004e..eaf689c48a59 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -242,4 +242,10 @@ static inline void vgic_arch_setup(const struct vgic_params *vgic) } } +static inline void kvm_arch_hardware_disable(void *garbage) {} +static inline void kvm_arch_hardware_unsetup(void) {} +static inline void kvm_arch_sync_events(struct kvm *kvm) {} +static inline void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) {} +static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {} + #endif /* __ARM64_KVM_HOST_H__ */ diff --git a/arch/ia64/include/asm/kvm_host.h b/arch/ia64/include/asm/kvm_host.h index 65088aa016c0..cf03097176b1 100644 --- a/arch/ia64/include/asm/kvm_host.h +++ b/arch/ia64/include/asm/kvm_host.h @@ -596,6 +596,18 @@ void kvm_sal_emul(struct kvm_vcpu *vcpu); struct kvm *kvm_arch_alloc_vm(void); void kvm_arch_free_vm(struct kvm *kvm); +static inline void kvm_arch_sync_events(struct kvm *kvm) {} +static inline void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) {} +static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu) {} +static inline void kvm_arch_free_memslot(struct kvm *kvm, + struct kvm_memory_slot *free, struct kvm_memory_slot *dont) {} +static inline void kvm_arch_memslots_updated(struct kvm *kvm) {} +static inline void kvm_arch_commit_memory_region(struct kvm *kvm, + struct kvm_userspace_memory_region *mem, + const struct kvm_memory_slot *old, + enum kvm_mr_change change) {} +static inline void kvm_arch_hardware_unsetup(void) {} + #endif /* __ASSEMBLY__*/ #endif diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c index b48aa69e35e1..b1e074d299ea 100644 --- a/arch/ia64/kvm/kvm-ia64.c +++ b/arch/ia64/kvm/kvm-ia64.c @@ -1363,10 +1363,6 @@ static void kvm_release_vm_pages(struct kvm *kvm) } } -void kvm_arch_sync_events(struct kvm *kvm) -{ -} - void kvm_arch_destroy_vm(struct kvm *kvm) { kvm_iommu_unmap_guest(kvm); @@ -1375,10 +1371,6 @@ void kvm_arch_destroy_vm(struct kvm *kvm) kvm_release_vm_pages(kvm); } -void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) -{ -} - void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) { if (cpu != vcpu->cpu) { @@ -1467,7 +1459,6 @@ void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) kfree(vcpu->arch.apic); } - long kvm_arch_vcpu_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg) { @@ -1550,21 +1541,12 @@ int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf) return VM_FAULT_SIGBUS; } -void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free, - struct kvm_memory_slot *dont) -{ -} - int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot, unsigned long npages) { return 0; } -void kvm_arch_memslots_updated(struct kvm *kvm) -{ -} - int kvm_arch_prepare_memory_region(struct kvm *kvm, struct kvm_memory_slot *memslot, struct kvm_userspace_memory_region *mem, @@ -1596,14 +1578,6 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm, return 0; } -void kvm_arch_commit_memory_region(struct kvm *kvm, - struct kvm_userspace_memory_region *mem, - const struct kvm_memory_slot *old, - enum kvm_mr_change change) -{ - return; -} - void kvm_arch_flush_shadow_all(struct kvm *kvm) { kvm_flush_remote_tlbs(kvm); @@ -1852,10 +1826,6 @@ int kvm_arch_hardware_setup(void) return 0; } -void kvm_arch_hardware_unsetup(void) -{ -} - int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq) { return __apic_accept_irq(vcpu, irq->vector); diff --git a/arch/mips/include/asm/kvm_host.h b/arch/mips/include/asm/kvm_host.h index d56ee50d6885..279376e90c7f 100644 --- a/arch/mips/include/asm/kvm_host.h +++ b/arch/mips/include/asm/kvm_host.h @@ -654,5 +654,16 @@ extern void mips32_SyncICache(unsigned long addr, unsigned long size); extern int kvm_mips_dump_stats(struct kvm_vcpu *vcpu); extern unsigned long kvm_mips_get_ramsize(struct kvm *kvm); +static inline void kvm_arch_hardware_disable(void *garbage) {} +static inline void kvm_arch_hardware_unsetup(void) {} +static inline void kvm_arch_sync_events(struct kvm *kvm) {} +static inline void kvm_arch_free_memslot(struct kvm *kvm, + struct kvm_memory_slot *free, struct kvm_memory_slot *dont) {} +static inline void kvm_arch_memslots_updated(struct kvm *kvm) {} +static inline void kvm_arch_flush_shadow_all(struct kvm *kvm) {} +static inline void kvm_arch_flush_shadow_memslot(struct kvm *kvm, + struct kvm_memory_slot *slot) {} +static inline void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) {} +static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {} #endif /* __MIPS_KVM_HOST_H__ */ diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index 0a3238026fd7..90a9c0e4952c 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -623,4 +623,12 @@ struct kvm_vcpu_arch { #define __KVM_HAVE_ARCH_WQP #define __KVM_HAVE_CREATE_DEVICE +static inline void kvm_arch_hardware_disable(void *garbage) {} +static inline void kvm_arch_hardware_unsetup(void) {} +static inline void kvm_arch_sync_events(struct kvm *kvm) {} +static inline void kvm_arch_memslots_updated(struct kvm *kvm) {} +static inline void kvm_arch_flush_shadow_all(struct kvm *kvm) {} +static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {} +static inline void kvm_arch_exit(void) {} + #endif /* __POWERPC_KVM_HOST_H__ */ diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index e4c719d49e16..7a75d6dbf610 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -251,19 +251,11 @@ int kvm_arch_hardware_enable(void *garbage) return 0; } -void kvm_arch_hardware_disable(void *garbage) -{ -} - int kvm_arch_hardware_setup(void) { return 0; } -void kvm_arch_hardware_unsetup(void) -{ -} - void kvm_arch_check_processor_compat(void *rtn) { *(int *)rtn = kvmppc_core_check_processor_compat(); @@ -296,10 +288,6 @@ void kvm_arch_destroy_vm(struct kvm *kvm) mutex_unlock(&kvm->lock); } -void kvm_arch_sync_events(struct kvm *kvm) -{ -} - int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) { int r; @@ -421,10 +409,6 @@ int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot, return kvmppc_core_create_memslot(kvm, slot, npages); } -void kvm_arch_memslots_updated(struct kvm *kvm) -{ -} - int kvm_arch_prepare_memory_region(struct kvm *kvm, struct kvm_memory_slot *memslot, struct kvm_userspace_memory_region *mem, @@ -441,10 +425,6 @@ void kvm_arch_commit_memory_region(struct kvm *kvm, kvmppc_core_commit_memory_region(kvm, mem, old); } -void kvm_arch_flush_shadow_all(struct kvm *kvm) -{ -} - void kvm_arch_flush_shadow_memslot(struct kvm *kvm, struct kvm_memory_slot *slot) { @@ -533,10 +513,6 @@ void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) kvmppc_subarch_vcpu_uninit(vcpu); } -void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) -{ -} - void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) { #ifdef CONFIG_BOOKE @@ -1134,7 +1110,3 @@ int kvm_arch_init(void *opaque) { return 0; } - -void kvm_arch_exit(void) -{ -} diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h index 42913475fc6c..d45e11dac5cf 100644 --- a/arch/s390/include/asm/kvm_host.h +++ b/arch/s390/include/asm/kvm_host.h @@ -269,4 +269,18 @@ struct kvm_arch{ }; extern int sie64a(struct kvm_s390_sie_block *, u64 *); + +static inline void kvm_arch_hardware_disable(void *garbage) {} +static inline void kvm_arch_check_processor_compat(void *rtn) {} +static inline void kvm_arch_exit(void) {} +static inline void kvm_arch_sync_events(struct kvm *kvm) {} +static inline void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) {} +static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {} +static inline void kvm_arch_free_memslot(struct kvm *kvm, + struct kvm_memory_slot *free, struct kvm_memory_slot *dont) {} +static inline void kvm_arch_memslots_updated(struct kvm *kvm) {} +static inline void kvm_arch_flush_shadow_all(struct kvm *kvm) {} +static inline void kvm_arch_flush_shadow_memslot(struct kvm *kvm, + struct kvm_memory_slot *slot) {} + #endif diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 46392afd043d..1fe3bc10c87f 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -92,10 +92,6 @@ int kvm_arch_hardware_enable(void *garbage) return 0; } -void kvm_arch_hardware_disable(void *garbage) -{ -} - int kvm_arch_hardware_setup(void) { return 0; @@ -105,19 +101,11 @@ void kvm_arch_hardware_unsetup(void) { } -void kvm_arch_check_processor_compat(void *rtn) -{ -} - int kvm_arch_init(void *opaque) { return 0; } -void kvm_arch_exit(void) -{ -} - /* Section: device related */ long kvm_arch_dev_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg) @@ -289,10 +277,6 @@ static void kvm_free_vcpus(struct kvm *kvm) mutex_unlock(&kvm->lock); } -void kvm_arch_sync_events(struct kvm *kvm) -{ -} - void kvm_arch_destroy_vm(struct kvm *kvm) { kvm_free_vcpus(kvm); @@ -320,15 +304,6 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) return 0; } -void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) -{ - /* Nothing todo */ -} - -void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) -{ -} - void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) { save_fp_regs(&vcpu->arch.host_fpregs); @@ -975,21 +950,12 @@ int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf) return VM_FAULT_SIGBUS; } -void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free, - struct kvm_memory_slot *dont) -{ -} - int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot, unsigned long npages) { return 0; } -void kvm_arch_memslots_updated(struct kvm *kvm) -{ -} - /* Section: memory related */ int kvm_arch_prepare_memory_region(struct kvm *kvm, struct kvm_memory_slot *memslot, @@ -1035,15 +1001,6 @@ void kvm_arch_commit_memory_region(struct kvm *kvm, return; } -void kvm_arch_flush_shadow_all(struct kvm *kvm) -{ -} - -void kvm_arch_flush_shadow_memslot(struct kvm *kvm, - struct kvm_memory_slot *slot) -{ -} - static int __init kvm_s390_init(void) { int ret; From df6fef6a404701ff6249330215c31f2a2b041d7b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Radim=20Kr=C4=8Dm=C3=A1=C5=99?= Date: Thu, 28 Aug 2014 15:13:03 +0200 Subject: [PATCH 1334/1368] KVM: remove garbage arg to *hardware_{en,dis}able MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In the beggining was on_each_cpu(), which required an unused argument to kvm_arch_ops.hardware_{en,dis}able, but this was soon forgotten. Remove unnecessary arguments that stem from this. Signed-off-by: Radim Krčmář Signed-off-by: Paolo Bonzini (cherry picked from commit 13a34e067eab24fec882e1834fbf2cc31911d474) Signed-off-by: Christoffer Dall --- arch/arm/include/asm/kvm_host.h | 2 +- arch/arm/kvm/arm.c | 2 +- arch/arm64/include/asm/kvm_host.h | 2 +- arch/ia64/kvm/kvm-ia64.c | 4 ++-- arch/mips/include/asm/kvm_host.h | 2 +- arch/powerpc/include/asm/kvm_host.h | 2 +- arch/powerpc/kvm/powerpc.c | 2 +- arch/s390/include/asm/kvm_host.h | 2 +- arch/s390/kvm/kvm-s390.c | 2 +- arch/x86/include/asm/kvm_host.h | 4 ++-- arch/x86/kvm/svm.c | 4 ++-- arch/x86/kvm/vmx.c | 4 ++-- arch/x86/kvm/x86.c | 12 ++++++------ include/linux/kvm_host.h | 4 ++-- virt/kvm/kvm_main.c | 4 ++-- 15 files changed, 26 insertions(+), 26 deletions(-) diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h index 2b17f6ab9642..46e5d4da1989 100644 --- a/arch/arm/include/asm/kvm_host.h +++ b/arch/arm/include/asm/kvm_host.h @@ -230,7 +230,7 @@ static inline void vgic_arch_setup(const struct vgic_params *vgic) int kvm_perf_init(void); int kvm_perf_teardown(void); -static inline void kvm_arch_hardware_disable(void *garbage) {} +static inline void kvm_arch_hardware_disable(void) {} static inline void kvm_arch_hardware_unsetup(void) {} static inline void kvm_arch_sync_events(struct kvm *kvm) {} static inline void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) {} diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c index 882f7e856b6b..c8ff64b54459 100644 --- a/arch/arm/kvm/arm.c +++ b/arch/arm/kvm/arm.c @@ -87,7 +87,7 @@ struct kvm_vcpu * __percpu *kvm_get_running_vcpus(void) return &kvm_arm_running_vcpu; } -int kvm_arch_hardware_enable(void *garbage) +int kvm_arch_hardware_enable(void) { return 0; } diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index eaf689c48a59..bcde41905746 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -242,7 +242,7 @@ static inline void vgic_arch_setup(const struct vgic_params *vgic) } } -static inline void kvm_arch_hardware_disable(void *garbage) {} +static inline void kvm_arch_hardware_disable(void) {} static inline void kvm_arch_hardware_unsetup(void) {} static inline void kvm_arch_sync_events(struct kvm *kvm) {} static inline void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) {} diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c index b1e074d299ea..c9aa236dc29b 100644 --- a/arch/ia64/kvm/kvm-ia64.c +++ b/arch/ia64/kvm/kvm-ia64.c @@ -125,7 +125,7 @@ long ia64_pal_vp_create(u64 *vpd, u64 *host_iva, u64 *opt_handler) static DEFINE_SPINLOCK(vp_lock); -int kvm_arch_hardware_enable(void *garbage) +int kvm_arch_hardware_enable(void) { long status; long tmp_base; @@ -160,7 +160,7 @@ int kvm_arch_hardware_enable(void *garbage) return 0; } -void kvm_arch_hardware_disable(void *garbage) +void kvm_arch_hardware_disable(void) { long status; diff --git a/arch/mips/include/asm/kvm_host.h b/arch/mips/include/asm/kvm_host.h index 279376e90c7f..5e3f4b0f18c8 100644 --- a/arch/mips/include/asm/kvm_host.h +++ b/arch/mips/include/asm/kvm_host.h @@ -654,7 +654,7 @@ extern void mips32_SyncICache(unsigned long addr, unsigned long size); extern int kvm_mips_dump_stats(struct kvm_vcpu *vcpu); extern unsigned long kvm_mips_get_ramsize(struct kvm *kvm); -static inline void kvm_arch_hardware_disable(void *garbage) {} +static inline void kvm_arch_hardware_disable(void) {} static inline void kvm_arch_hardware_unsetup(void) {} static inline void kvm_arch_sync_events(struct kvm *kvm) {} static inline void kvm_arch_free_memslot(struct kvm *kvm, diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index 90a9c0e4952c..f391f3fbde8b 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -623,7 +623,7 @@ struct kvm_vcpu_arch { #define __KVM_HAVE_ARCH_WQP #define __KVM_HAVE_CREATE_DEVICE -static inline void kvm_arch_hardware_disable(void *garbage) {} +static inline void kvm_arch_hardware_disable(void) {} static inline void kvm_arch_hardware_unsetup(void) {} static inline void kvm_arch_sync_events(struct kvm *kvm) {} static inline void kvm_arch_memslots_updated(struct kvm *kvm) {} diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 7a75d6dbf610..ea4cfdc991da 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -246,7 +246,7 @@ int kvmppc_emulate_mmio(struct kvm_run *run, struct kvm_vcpu *vcpu) return r; } -int kvm_arch_hardware_enable(void *garbage) +int kvm_arch_hardware_enable(void) { return 0; } diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h index d45e11dac5cf..99971dfc6b9a 100644 --- a/arch/s390/include/asm/kvm_host.h +++ b/arch/s390/include/asm/kvm_host.h @@ -270,7 +270,7 @@ struct kvm_arch{ extern int sie64a(struct kvm_s390_sie_block *, u64 *); -static inline void kvm_arch_hardware_disable(void *garbage) {} +static inline void kvm_arch_hardware_disable(void) {} static inline void kvm_arch_check_processor_compat(void *rtn) {} static inline void kvm_arch_exit(void) {} static inline void kvm_arch_sync_events(struct kvm *kvm) {} diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 1fe3bc10c87f..412fbc5dc688 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -86,7 +86,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = { static unsigned long long *facilities; /* Section: not file related */ -int kvm_arch_hardware_enable(void *garbage) +int kvm_arch_hardware_enable(void) { /* every s390 is virtualization enabled ;-) */ return 0; diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index bd6f3529453d..5137dca9e9d6 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -634,8 +634,8 @@ struct msr_data { struct kvm_x86_ops { int (*cpu_has_kvm_support)(void); /* __init */ int (*disabled_by_bios)(void); /* __init */ - int (*hardware_enable)(void *dummy); - void (*hardware_disable)(void *dummy); + int (*hardware_enable)(void); + void (*hardware_disable)(void); void (*check_processor_compatibility)(void *rtn); int (*hardware_setup)(void); /* __init */ void (*hardware_unsetup)(void); /* __exit */ diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index a14a6eaf871d..934befea3e36 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -606,7 +606,7 @@ static int has_svm(void) return 1; } -static void svm_hardware_disable(void *garbage) +static void svm_hardware_disable(void) { /* Make sure we clean up behind us */ if (static_cpu_has(X86_FEATURE_TSCRATEMSR)) @@ -617,7 +617,7 @@ static void svm_hardware_disable(void *garbage) amd_pmu_disable_virt(); } -static int svm_hardware_enable(void *garbage) +static int svm_hardware_enable(void) { struct svm_cpu_data *sd; diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 5402c94ab768..af423252c265 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -2566,7 +2566,7 @@ static void kvm_cpu_vmxon(u64 addr) : "memory", "cc"); } -static int hardware_enable(void *garbage) +static int hardware_enable(void) { int cpu = raw_smp_processor_id(); u64 phys_addr = __pa(per_cpu(vmxarea, cpu)); @@ -2630,7 +2630,7 @@ static void kvm_cpu_vmxoff(void) asm volatile (__ex(ASM_VMX_VMXOFF) : : : "cc"); } -static void hardware_disable(void *garbage) +static void hardware_disable(void) { if (vmm_exclusive) { vmclear_local_loaded_vmcss(); diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index a348daad61fd..37d9503b81ea 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -239,7 +239,7 @@ void kvm_set_shared_msr(unsigned slot, u64 value, u64 mask) } EXPORT_SYMBOL_GPL(kvm_set_shared_msr); -static void drop_user_return_notifiers(void *ignore) +static void drop_user_return_notifiers(void) { unsigned int cpu = smp_processor_id(); struct kvm_shared_msrs *smsr = per_cpu_ptr(shared_msrs, cpu); @@ -6603,7 +6603,7 @@ void kvm_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, unsigned int vector) kvm_rip_write(vcpu, 0); } -int kvm_arch_hardware_enable(void *garbage) +int kvm_arch_hardware_enable(void) { struct kvm *kvm; struct kvm_vcpu *vcpu; @@ -6614,7 +6614,7 @@ int kvm_arch_hardware_enable(void *garbage) bool stable, backwards_tsc = false; kvm_shared_msr_cpu_online(); - ret = kvm_x86_ops->hardware_enable(garbage); + ret = kvm_x86_ops->hardware_enable(); if (ret != 0) return ret; @@ -6694,10 +6694,10 @@ int kvm_arch_hardware_enable(void *garbage) return 0; } -void kvm_arch_hardware_disable(void *garbage) +void kvm_arch_hardware_disable(void) { - kvm_x86_ops->hardware_disable(garbage); - drop_user_return_notifiers(garbage); + kvm_x86_ops->hardware_disable(); + drop_user_return_notifiers(); } int kvm_arch_hardware_setup(void) diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index b8cb3c2d893e..ce4cf2f1c419 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -608,8 +608,8 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu); int kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu); void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu); -int kvm_arch_hardware_enable(void *garbage); -void kvm_arch_hardware_disable(void *garbage); +int kvm_arch_hardware_enable(void); +void kvm_arch_hardware_disable(void); int kvm_arch_hardware_setup(void); void kvm_arch_hardware_unsetup(void); void kvm_arch_check_processor_compat(void *rtn); diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 0cb02c749622..556049f120b7 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -2669,7 +2669,7 @@ static void hardware_enable_nolock(void *junk) cpumask_set_cpu(cpu, cpus_hardware_enabled); - r = kvm_arch_hardware_enable(NULL); + r = kvm_arch_hardware_enable(); if (r) { cpumask_clear_cpu(cpu, cpus_hardware_enabled); @@ -2694,7 +2694,7 @@ static void hardware_disable_nolock(void *junk) if (!cpumask_test_cpu(cpu, cpus_hardware_enabled)) return; cpumask_clear_cpu(cpu, cpus_hardware_enabled); - kvm_arch_hardware_disable(NULL); + kvm_arch_hardware_disable(); } static void hardware_disable(void) From 25523c743ec5bf8ebe707ac8a8112eac0a2d42b6 Mon Sep 17 00:00:00 2001 From: Christian Borntraeger Date: Thu, 4 Sep 2014 21:13:31 +0200 Subject: [PATCH 1335/1368] KVM: remove redundant check of in_spin_loop The expression `vcpu->spin_loop.in_spin_loop' is always true, because it is evaluated only when the condition `!vcpu->spin_loop.in_spin_loop' is false. Signed-off-by: Christian Borntraeger Signed-off-by: Paolo Bonzini (cherry picked from commit 34656113182b704682e23d1363417536addfec97) Signed-off-by: Christoffer Dall --- virt/kvm/kvm_main.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 556049f120b7..025f8300b6f0 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -1774,8 +1774,7 @@ static bool kvm_vcpu_eligible_for_directed_yield(struct kvm_vcpu *vcpu) bool eligible; eligible = !vcpu->spin_loop.in_spin_loop || - (vcpu->spin_loop.in_spin_loop && - vcpu->spin_loop.dy_eligible); + vcpu->spin_loop.dy_eligible; if (vcpu->spin_loop.in_spin_loop) kvm_vcpu_set_dy_eligible(vcpu, !vcpu->spin_loop.dy_eligible); From c4e4c7bf26b78b6cb8fa985c19854c8f6e9abc20 Mon Sep 17 00:00:00 2001 From: Christian Borntraeger Date: Thu, 4 Sep 2014 21:13:32 +0200 Subject: [PATCH 1336/1368] KVM: remove redundant assigment of return value in kvm_dev_ioctl The first statement of kvm_dev_ioctl is long r = -EINVAL; No need to reassign the same value. Signed-off-by: Christian Borntraeger Signed-off-by: Paolo Bonzini (cherry picked from commit a13f533b2f1d53a7c0baa7490498caeab7bc8ba5) Signed-off-by: Christoffer Dall --- virt/kvm/kvm_main.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 025f8300b6f0..d201466921bf 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -2611,7 +2611,6 @@ static long kvm_dev_ioctl(struct file *filp, switch (ioctl) { case KVM_GET_API_VERSION: - r = -EINVAL; if (arg) goto out; r = KVM_API_VERSION; @@ -2623,7 +2622,6 @@ static long kvm_dev_ioctl(struct file *filp, r = kvm_vm_ioctl_check_extension_generic(NULL, arg); break; case KVM_GET_VCPU_MMAP_SIZE: - r = -EINVAL; if (arg) goto out; r = PAGE_SIZE; /* struct kvm_run */ From e665e3aae1e8c2e47e467c13c10e5b92c4be61a5 Mon Sep 17 00:00:00 2001 From: Christian Borntraeger Date: Thu, 4 Sep 2014 21:13:33 +0200 Subject: [PATCH 1337/1368] KVM: remove redundant assignments in __kvm_set_memory_region __kvm_set_memory_region sets r to EINVAL very early. Doing it again is not necessary. The same is true later on, where r is assigned -ENOMEM twice. Signed-off-by: Christian Borntraeger Signed-off-by: Paolo Bonzini (cherry picked from commit f2a25160887e00434ce1361007009120e1fecbda) Signed-off-by: Christoffer Dall --- virt/kvm/kvm_main.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index d201466921bf..3ed27192a849 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -778,7 +778,6 @@ int __kvm_set_memory_region(struct kvm *kvm, base_gfn = mem->guest_phys_addr >> PAGE_SHIFT; npages = mem->memory_size >> PAGE_SHIFT; - r = -EINVAL; if (npages > KVM_MEM_MAX_NR_PAGES) goto out; @@ -792,7 +791,6 @@ int __kvm_set_memory_region(struct kvm *kvm, new.npages = npages; new.flags = mem->flags; - r = -EINVAL; if (npages) { if (!old.npages) change = KVM_MR_CREATE; @@ -848,7 +846,6 @@ int __kvm_set_memory_region(struct kvm *kvm, } if ((change == KVM_MR_DELETE) || (change == KVM_MR_MOVE)) { - r = -ENOMEM; slots = kmemdup(kvm->memslots, sizeof(struct kvm_memslots), GFP_KERNEL); if (!slots) From cf0dfca545678c6dc257a75094f13515e0bf6c69 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Tue, 9 Sep 2014 11:27:09 +0100 Subject: [PATCH 1338/1368] ARM/arm64: KVM: fix use of WnR bit in kvm_is_write_fault() The ISS encoding for an exception from a Data Abort has a WnR bit[6] that indicates whether the Data Abort was caused by a read or a write instruction. While there are several fields in the encoding that are only valid if the ISV bit[24] is set, WnR is not one of them, so we can read it unconditionally. Instead of fixing both implementations of kvm_is_write_fault() in place, reimplement it just once using kvm_vcpu_dabt_iswrite(), which already does the right thing with respect to the WnR bit. Also fix up the callers to pass 'vcpu' Acked-by: Laszlo Ersek Acked-by: Marc Zyngier Acked-by: Christoffer Dall Signed-off-by: Ard Biesheuvel Signed-off-by: Marc Zyngier (cherry picked from commit a7d079cea2dffb112e26da2566dd84c0ef1fce97) Signed-off-by: Christoffer Dall --- arch/arm/include/asm/kvm_mmu.h | 11 ----------- arch/arm/kvm/mmu.c | 12 ++++++++++-- arch/arm64/include/asm/kvm_mmu.h | 13 ------------- 3 files changed, 10 insertions(+), 26 deletions(-) diff --git a/arch/arm/include/asm/kvm_mmu.h b/arch/arm/include/asm/kvm_mmu.h index 5cc0b0f5f72f..3f688b458143 100644 --- a/arch/arm/include/asm/kvm_mmu.h +++ b/arch/arm/include/asm/kvm_mmu.h @@ -78,17 +78,6 @@ static inline void kvm_set_pte(pte_t *pte, pte_t new_pte) flush_pmd_entry(pte); } -static inline bool kvm_is_write_fault(unsigned long hsr) -{ - unsigned long hsr_ec = hsr >> HSR_EC_SHIFT; - if (hsr_ec == HSR_EC_IABT) - return false; - else if ((hsr & HSR_ISV) && !(hsr & HSR_WNR)) - return false; - else - return true; -} - static inline void kvm_clean_pgd(pgd_t *pgd) { clean_dcache_area(pgd, PTRS_PER_S2_PGD * sizeof(pgd_t)); diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c index 62f5642153f9..bb06f76a8f89 100644 --- a/arch/arm/kvm/mmu.c +++ b/arch/arm/kvm/mmu.c @@ -746,6 +746,14 @@ static bool transparent_hugepage_adjust(pfn_t *pfnp, phys_addr_t *ipap) return false; } +static bool kvm_is_write_fault(struct kvm_vcpu *vcpu) +{ + if (kvm_vcpu_trap_is_iabt(vcpu)) + return false; + + return kvm_vcpu_dabt_iswrite(vcpu); +} + static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, struct kvm_memory_slot *memslot, unsigned long hva, unsigned long fault_status) @@ -760,7 +768,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, pfn_t pfn; pgprot_t mem_type = PAGE_S2; - write_fault = kvm_is_write_fault(kvm_vcpu_get_hsr(vcpu)); + write_fault = kvm_is_write_fault(vcpu); if (fault_status == FSC_PERM && !write_fault) { kvm_err("Unexpected L2 read permission error\n"); return -EFAULT; @@ -886,7 +894,7 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run) gfn = fault_ipa >> PAGE_SHIFT; memslot = gfn_to_memslot(vcpu->kvm, gfn); hva = gfn_to_hva_memslot_prot(memslot, gfn, &writable); - write_fault = kvm_is_write_fault(kvm_vcpu_get_hsr(vcpu)); + write_fault = kvm_is_write_fault(vcpu); if (kvm_is_error_hva(hva) || (write_fault && !writable)) { if (is_iabt) { /* Prefetch Abort on I/O address */ diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h index 8e138c7c53ac..737da742b293 100644 --- a/arch/arm64/include/asm/kvm_mmu.h +++ b/arch/arm64/include/asm/kvm_mmu.h @@ -93,19 +93,6 @@ void kvm_clear_hyp_idmap(void); #define kvm_set_pte(ptep, pte) set_pte(ptep, pte) #define kvm_set_pmd(pmdp, pmd) set_pmd(pmdp, pmd) -static inline bool kvm_is_write_fault(unsigned long esr) -{ - unsigned long esr_ec = esr >> ESR_EL2_EC_SHIFT; - - if (esr_ec == ESR_EL2_EC_IABT) - return false; - - if ((esr & ESR_EL2_ISV) && !(esr & ESR_EL2_WNR)) - return false; - - return true; -} - static inline void kvm_clean_pgd(pgd_t *pgd) {} static inline void kvm_clean_pmd_entry(pmd_t *pmd) {} static inline void kvm_clean_pte(pte_t *pte) {} From cdead48c82db0e501edfce2c85b7834de2960d6f Mon Sep 17 00:00:00 2001 From: Eric Auger Date: Mon, 1 Sep 2014 09:36:08 +0100 Subject: [PATCH 1339/1368] KVM: EVENTFD: remove inclusion of irq.h No more needed. irq.h would be void on ARM. Acked-by: Paolo Bonzini Signed-off-by: Eric Auger Signed-off-by: Marc Zyngier (cherry picked from commit 0ba09511ddc3ff0b462f37b4fe4b9c4dccc054ec) Signed-off-by: Christoffer Dall --- virt/kvm/eventfd.c | 1 - 1 file changed, 1 deletion(-) diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c index 67563284f7b9..71ed39941b9c 100644 --- a/virt/kvm/eventfd.c +++ b/virt/kvm/eventfd.c @@ -36,7 +36,6 @@ #include #include -#include "irq.h" #include "iodev.h" #ifdef CONFIG_HAVE_KVM_IRQFD From 86318858caaa5a5e5b58f7bd2624c2a953eddf76 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 2 Sep 2014 10:27:33 +0100 Subject: [PATCH 1340/1368] KVM: device: add simple registration mechanism for kvm_device_ops kvm_ioctl_create_device currently has knowledge of all the device types and their associated ops. This is fairly inflexible when adding support for new in-kernel device emulations, so move what we currently have out into a table, which can support dynamic registration of ops by new drivers for virtual hardware. Cc: Alex Williamson Cc: Alex Graf Cc: Gleb Natapov Cc: Paolo Bonzini Cc: Marc Zyngier Acked-by: Cornelia Huck Reviewed-by: Christoffer Dall Signed-off-by: Will Deacon Signed-off-by: Paolo Bonzini (cherry picked from commit d60eacb07053142bfb9b41582074a89a790a9d46) Signed-off-by: Christoffer Dall --- include/linux/kvm_host.h | 1 + include/uapi/linux/kvm.h | 21 ++++++++++---- virt/kvm/kvm_main.c | 60 ++++++++++++++++++++++++---------------- 3 files changed, 53 insertions(+), 29 deletions(-) diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index ce4cf2f1c419..fc7b4270bc4a 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -1022,6 +1022,7 @@ struct kvm_device_ops { void kvm_device_get(struct kvm_device *dev); void kvm_device_put(struct kvm_device *dev); struct kvm_device *kvm_device_from_filp(struct file *filp); +int kvm_register_device_ops(struct kvm_device_ops *ops, u32 type); extern struct kvm_device_ops kvm_mpic_ops; extern struct kvm_device_ops kvm_xics_ops; diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index bbc1f8a09eb8..00d2c69a3cb6 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -848,14 +848,25 @@ struct kvm_device_attr { __u64 addr; /* userspace address of attr data */ }; -#define KVM_DEV_TYPE_FSL_MPIC_20 1 -#define KVM_DEV_TYPE_FSL_MPIC_42 2 -#define KVM_DEV_TYPE_XICS 3 -#define KVM_DEV_TYPE_VFIO 4 #define KVM_DEV_VFIO_GROUP 1 #define KVM_DEV_VFIO_GROUP_ADD 1 #define KVM_DEV_VFIO_GROUP_DEL 2 -#define KVM_DEV_TYPE_ARM_VGIC_V2 5 + +enum kvm_device_type { + KVM_DEV_TYPE_FSL_MPIC_20 = 1, +#define KVM_DEV_TYPE_FSL_MPIC_20 KVM_DEV_TYPE_FSL_MPIC_20 + KVM_DEV_TYPE_FSL_MPIC_42, +#define KVM_DEV_TYPE_FSL_MPIC_42 KVM_DEV_TYPE_FSL_MPIC_42 + KVM_DEV_TYPE_XICS, +#define KVM_DEV_TYPE_XICS KVM_DEV_TYPE_XICS + KVM_DEV_TYPE_VFIO, +#define KVM_DEV_TYPE_VFIO KVM_DEV_TYPE_VFIO + KVM_DEV_TYPE_ARM_VGIC_V2, +#define KVM_DEV_TYPE_ARM_VGIC_V2 KVM_DEV_TYPE_ARM_VGIC_V2 + KVM_DEV_TYPE_FLIC, +#define KVM_DEV_TYPE_FLIC KVM_DEV_TYPE_FLIC + KVM_DEV_TYPE_MAX, +}; /* * ioctls for VM fds diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 3ed27192a849..df006d12c14b 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -2261,6 +2261,37 @@ struct kvm_device *kvm_device_from_filp(struct file *filp) return filp->private_data; } +static struct kvm_device_ops *kvm_device_ops_table[KVM_DEV_TYPE_MAX] = { +#ifdef CONFIG_KVM_MPIC + [KVM_DEV_TYPE_FSL_MPIC_20] = &kvm_mpic_ops, + [KVM_DEV_TYPE_FSL_MPIC_42] = &kvm_mpic_ops, +#endif + +#ifdef CONFIG_KVM_XICS + [KVM_DEV_TYPE_XICS] = &kvm_xics_ops, +#endif + +#ifdef CONFIG_KVM_VFIO + [KVM_DEV_TYPE_VFIO] = &kvm_vfio_ops, +#endif + +#ifdef CONFIG_KVM_ARM_VGIC + [KVM_DEV_TYPE_ARM_VGIC_V2] = &kvm_arm_vgic_v2_ops, +#endif +}; + +int kvm_register_device_ops(struct kvm_device_ops *ops, u32 type) +{ + if (type >= ARRAY_SIZE(kvm_device_ops_table)) + return -ENOSPC; + + if (kvm_device_ops_table[type] != NULL) + return -EEXIST; + + kvm_device_ops_table[type] = ops; + return 0; +} + static int kvm_ioctl_create_device(struct kvm *kvm, struct kvm_create_device *cd) { @@ -2269,31 +2300,12 @@ static int kvm_ioctl_create_device(struct kvm *kvm, bool test = cd->flags & KVM_CREATE_DEVICE_TEST; int ret; - switch (cd->type) { -#ifdef CONFIG_KVM_MPIC - case KVM_DEV_TYPE_FSL_MPIC_20: - case KVM_DEV_TYPE_FSL_MPIC_42: - ops = &kvm_mpic_ops; - break; -#endif -#ifdef CONFIG_KVM_XICS - case KVM_DEV_TYPE_XICS: - ops = &kvm_xics_ops; - break; -#endif -#ifdef CONFIG_KVM_VFIO - case KVM_DEV_TYPE_VFIO: - ops = &kvm_vfio_ops; - break; -#endif -#ifdef CONFIG_KVM_ARM_VGIC - case KVM_DEV_TYPE_ARM_VGIC_V2: - ops = &kvm_arm_vgic_v2_ops; - break; -#endif - default: + if (cd->type >= ARRAY_SIZE(kvm_device_ops_table)) + return -ENODEV; + + ops = kvm_device_ops_table[cd->type]; + if (ops == NULL) return -ENODEV; - } if (test) return 0; From 9e95eca3b761209d08e97144de3962f355ee2078 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 2 Sep 2014 10:27:34 +0100 Subject: [PATCH 1341/1368] KVM: ARM: vgic: register kvm_device_ops dynamically Now that we have a dynamic means to register kvm_device_ops, use that for the ARM VGIC, instead of relying on the static table. Cc: Gleb Natapov Cc: Paolo Bonzini Acked-by: Marc Zyngier Reviewed-by: Christoffer Dall Signed-off-by: Will Deacon Signed-off-by: Paolo Bonzini (cherry picked from commit c06a841bf36340e9e917ce60d11a6425ac85d0bd) Signed-off-by: Christoffer Dall --- include/linux/kvm_host.h | 1 - virt/kvm/arm/vgic.c | 157 ++++++++++++++++++++------------------- virt/kvm/kvm_main.c | 4 - 3 files changed, 79 insertions(+), 83 deletions(-) diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index fc7b4270bc4a..ed20bc0d209b 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -1027,7 +1027,6 @@ int kvm_register_device_ops(struct kvm_device_ops *ops, u32 type); extern struct kvm_device_ops kvm_mpic_ops; extern struct kvm_device_ops kvm_xics_ops; extern struct kvm_device_ops kvm_vfio_ops; -extern struct kvm_device_ops kvm_arm_vgic_v2_ops; #ifdef CONFIG_HAVE_KVM_CPU_RELAX_INTERCEPT diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c index efe6eee2e7eb..8c95b2468cd8 100644 --- a/virt/kvm/arm/vgic.c +++ b/virt/kvm/arm/vgic.c @@ -1522,83 +1522,6 @@ int kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu) return 0; } -static void vgic_init_maintenance_interrupt(void *info) -{ - enable_percpu_irq(vgic->maint_irq, 0); -} - -static int vgic_cpu_notify(struct notifier_block *self, - unsigned long action, void *cpu) -{ - switch (action) { - case CPU_STARTING: - case CPU_STARTING_FROZEN: - vgic_init_maintenance_interrupt(NULL); - break; - case CPU_DYING: - case CPU_DYING_FROZEN: - disable_percpu_irq(vgic->maint_irq); - break; - } - - return NOTIFY_OK; -} - -static struct notifier_block vgic_cpu_nb = { - .notifier_call = vgic_cpu_notify, -}; - -static const struct of_device_id vgic_ids[] = { - { .compatible = "arm,cortex-a15-gic", .data = vgic_v2_probe, }, - { .compatible = "arm,gic-v3", .data = vgic_v3_probe, }, - {}, -}; - -int kvm_vgic_hyp_init(void) -{ - const struct of_device_id *matched_id; - const int (*vgic_probe)(struct device_node *,const struct vgic_ops **, - const struct vgic_params **); - struct device_node *vgic_node; - int ret; - - vgic_node = of_find_matching_node_and_match(NULL, - vgic_ids, &matched_id); - if (!vgic_node) { - kvm_err("error: no compatible GIC node found\n"); - return -ENODEV; - } - - vgic_probe = matched_id->data; - ret = vgic_probe(vgic_node, &vgic_ops, &vgic); - if (ret) - return ret; - - ret = request_percpu_irq(vgic->maint_irq, vgic_maintenance_handler, - "vgic", kvm_get_running_vcpus()); - if (ret) { - kvm_err("Cannot register interrupt %d\n", vgic->maint_irq); - return ret; - } - - ret = __register_cpu_notifier(&vgic_cpu_nb); - if (ret) { - kvm_err("Cannot register vgic CPU notifier\n"); - goto out_free_irq; - } - - /* Callback into for arch code for setup */ - vgic_arch_setup(vgic); - - on_each_cpu(vgic_init_maintenance_interrupt, NULL, 1); - - return 0; - -out_free_irq: - free_percpu_irq(vgic->maint_irq, kvm_get_running_vcpus()); - return ret; -} - /** * kvm_vgic_init - Initialize global VGIC state before running any VCPUs * @kvm: pointer to the kvm struct @@ -2062,7 +1985,7 @@ static int vgic_create(struct kvm_device *dev, u32 type) return kvm_vgic_create(dev->kvm); } -struct kvm_device_ops kvm_arm_vgic_v2_ops = { +static struct kvm_device_ops kvm_arm_vgic_v2_ops = { .name = "kvm-arm-vgic", .create = vgic_create, .destroy = vgic_destroy, @@ -2070,3 +1993,81 @@ struct kvm_device_ops kvm_arm_vgic_v2_ops = { .get_attr = vgic_get_attr, .has_attr = vgic_has_attr, }; + +static void vgic_init_maintenance_interrupt(void *info) +{ + enable_percpu_irq(vgic->maint_irq, 0); +} + +static int vgic_cpu_notify(struct notifier_block *self, + unsigned long action, void *cpu) +{ + switch (action) { + case CPU_STARTING: + case CPU_STARTING_FROZEN: + vgic_init_maintenance_interrupt(NULL); + break; + case CPU_DYING: + case CPU_DYING_FROZEN: + disable_percpu_irq(vgic->maint_irq); + break; + } + + return NOTIFY_OK; +} + +static struct notifier_block vgic_cpu_nb = { + .notifier_call = vgic_cpu_notify, +}; + +static const struct of_device_id vgic_ids[] = { + { .compatible = "arm,cortex-a15-gic", .data = vgic_v2_probe, }, + { .compatible = "arm,gic-v3", .data = vgic_v3_probe, }, + {}, +}; + +int kvm_vgic_hyp_init(void) +{ + const struct of_device_id *matched_id; + int (*vgic_probe)(struct device_node *,const struct vgic_ops **, + const struct vgic_params **); + struct device_node *vgic_node; + int ret; + + vgic_node = of_find_matching_node_and_match(NULL, + vgic_ids, &matched_id); + if (!vgic_node) { + kvm_err("error: no compatible GIC node found\n"); + return -ENODEV; + } + + vgic_probe = matched_id->data; + ret = vgic_probe(vgic_node, &vgic_ops, &vgic); + if (ret) + return ret; + + ret = request_percpu_irq(vgic->maint_irq, vgic_maintenance_handler, + "vgic", kvm_get_running_vcpus()); + if (ret) { + kvm_err("Cannot register interrupt %d\n", vgic->maint_irq); + return ret; + } + + ret = __register_cpu_notifier(&vgic_cpu_nb); + if (ret) { + kvm_err("Cannot register vgic CPU notifier\n"); + goto out_free_irq; + } + + /* Callback into for arch code for setup */ + vgic_arch_setup(vgic); + + on_each_cpu(vgic_init_maintenance_interrupt, NULL, 1); + + return kvm_register_device_ops(&kvm_arm_vgic_v2_ops, + KVM_DEV_TYPE_ARM_VGIC_V2); + +out_free_irq: + free_percpu_irq(vgic->maint_irq, kvm_get_running_vcpus()); + return ret; +} diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index df006d12c14b..111d560418f3 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -2274,10 +2274,6 @@ static struct kvm_device_ops *kvm_device_ops_table[KVM_DEV_TYPE_MAX] = { #ifdef CONFIG_KVM_VFIO [KVM_DEV_TYPE_VFIO] = &kvm_vfio_ops, #endif - -#ifdef CONFIG_KVM_ARM_VGIC - [KVM_DEV_TYPE_ARM_VGIC_V2] = &kvm_arm_vgic_v2_ops, -#endif }; int kvm_register_device_ops(struct kvm_device_ops *ops, u32 type) From 7bd2f48101c8f43cd36e9920c7315df8575b61a8 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 2 Sep 2014 10:27:36 +0100 Subject: [PATCH 1342/1368] KVM: VFIO: register kvm_device_ops dynamically Now that we have a dynamic means to register kvm_device_ops, use that for the VFIO kvm device, instead of relying on the static table. This is achieved by a module_init call to register the ops with KVM. Cc: Gleb Natapov Cc: Paolo Bonzini Acked-by: Alex Williamson Signed-off-by: Will Deacon Signed-off-by: Paolo Bonzini (cherry picked from commit 80ce1639727e9d38729c34f162378508c307ca25) Signed-off-by: Christoffer Dall --- include/linux/kvm_host.h | 1 - virt/kvm/kvm_main.c | 4 ---- virt/kvm/vfio.c | 22 +++++++++++++++------- 3 files changed, 15 insertions(+), 12 deletions(-) diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index ed20bc0d209b..f64e941a4213 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -1026,7 +1026,6 @@ int kvm_register_device_ops(struct kvm_device_ops *ops, u32 type); extern struct kvm_device_ops kvm_mpic_ops; extern struct kvm_device_ops kvm_xics_ops; -extern struct kvm_device_ops kvm_vfio_ops; #ifdef CONFIG_HAVE_KVM_CPU_RELAX_INTERCEPT diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 111d560418f3..f019669674a5 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -2270,10 +2270,6 @@ static struct kvm_device_ops *kvm_device_ops_table[KVM_DEV_TYPE_MAX] = { #ifdef CONFIG_KVM_XICS [KVM_DEV_TYPE_XICS] = &kvm_xics_ops, #endif - -#ifdef CONFIG_KVM_VFIO - [KVM_DEV_TYPE_VFIO] = &kvm_vfio_ops, -#endif }; int kvm_register_device_ops(struct kvm_device_ops *ops, u32 type) diff --git a/virt/kvm/vfio.c b/virt/kvm/vfio.c index 597c258245ea..475487e238e1 100644 --- a/virt/kvm/vfio.c +++ b/virt/kvm/vfio.c @@ -189,6 +189,16 @@ static void kvm_vfio_destroy(struct kvm_device *dev) kfree(dev); /* alloc by kvm_ioctl_create_device, free by .destroy */ } +static int kvm_vfio_create(struct kvm_device *dev, u32 type); + +static struct kvm_device_ops kvm_vfio_ops = { + .name = "kvm-vfio", + .create = kvm_vfio_create, + .destroy = kvm_vfio_destroy, + .set_attr = kvm_vfio_set_attr, + .has_attr = kvm_vfio_has_attr, +}; + static int kvm_vfio_create(struct kvm_device *dev, u32 type) { struct kvm_device *tmp; @@ -211,10 +221,8 @@ static int kvm_vfio_create(struct kvm_device *dev, u32 type) return 0; } -struct kvm_device_ops kvm_vfio_ops = { - .name = "kvm-vfio", - .create = kvm_vfio_create, - .destroy = kvm_vfio_destroy, - .set_attr = kvm_vfio_set_attr, - .has_attr = kvm_vfio_has_attr, -}; +static int __init kvm_vfio_ops_init(void) +{ + return kvm_register_device_ops(&kvm_vfio_ops, KVM_DEV_TYPE_VFIO); +} +module_init(kvm_vfio_ops_init); From fa0603dfed451534fda7856d1301a6f81ae5de82 Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Mon, 9 Jun 2014 12:27:18 +0200 Subject: [PATCH 1343/1368] arm/arm64: KVM: Rename irq_state to irq_pending The irq_state field on the distributor struct is ambiguous in its meaning; the comment says it's the level of the input put, but that doesn't make much sense for edge-triggered interrupts. The code actually uses this state variable to check if the interrupt is in the pending state on the distributor so clarify the comment and rename the actual variable and accessor methods. Acked-by: Marc Zyngier Signed-off-by: Christoffer Dall (cherry picked from commit 227844f53864077ccaefe01d0960fcccc03445ce) Signed-off-by: Christoffer Dall --- include/kvm/arm_vgic.h | 4 ++-- virt/kvm/arm/vgic.c | 52 +++++++++++++++++++++--------------------- 2 files changed, 28 insertions(+), 28 deletions(-) diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h index 35b0c121bb65..388d442eecb5 100644 --- a/include/kvm/arm_vgic.h +++ b/include/kvm/arm_vgic.h @@ -140,8 +140,8 @@ struct vgic_dist { /* Interrupt enabled (one bit per IRQ) */ struct vgic_bitmap irq_enabled; - /* Interrupt 'pin' level */ - struct vgic_bitmap irq_state; + /* Interrupt state is pending on the distributor */ + struct vgic_bitmap irq_pending; /* Level-triggered interrupt in progress */ struct vgic_bitmap irq_active; diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c index 8c95b2468cd8..3d04a6de6f23 100644 --- a/virt/kvm/arm/vgic.c +++ b/virt/kvm/arm/vgic.c @@ -37,7 +37,7 @@ * * - At any time, the dist->irq_pending_on_cpu is the oracle that knows if * something is pending - * - VGIC pending interrupts are stored on the vgic.irq_state vgic + * - VGIC pending interrupts are stored on the vgic.irq_pending vgic * bitmap (this bitmap is updated by both user land ioctls and guest * mmio ops, and other in-kernel peripherals such as the * arch. timers) and indicate the 'wire' state. @@ -45,8 +45,8 @@ * recalculated * - To calculate the oracle, we need info for each cpu from * compute_pending_for_cpu, which considers: - * - PPI: dist->irq_state & dist->irq_enable - * - SPI: dist->irq_state & dist->irq_enable & dist->irq_spi_target + * - PPI: dist->irq_pending & dist->irq_enable + * - SPI: dist->irq_pending & dist->irq_enable & dist->irq_spi_target * - irq_spi_target is a 'formatted' version of the GICD_ICFGR * registers, stored on each vcpu. We only keep one bit of * information per interrupt, making sure that only one vcpu can @@ -221,21 +221,21 @@ static int vgic_dist_irq_is_pending(struct kvm_vcpu *vcpu, int irq) { struct vgic_dist *dist = &vcpu->kvm->arch.vgic; - return vgic_bitmap_get_irq_val(&dist->irq_state, vcpu->vcpu_id, irq); + return vgic_bitmap_get_irq_val(&dist->irq_pending, vcpu->vcpu_id, irq); } -static void vgic_dist_irq_set(struct kvm_vcpu *vcpu, int irq) +static void vgic_dist_irq_set_pending(struct kvm_vcpu *vcpu, int irq) { struct vgic_dist *dist = &vcpu->kvm->arch.vgic; - vgic_bitmap_set_irq_val(&dist->irq_state, vcpu->vcpu_id, irq, 1); + vgic_bitmap_set_irq_val(&dist->irq_pending, vcpu->vcpu_id, irq, 1); } -static void vgic_dist_irq_clear(struct kvm_vcpu *vcpu, int irq) +static void vgic_dist_irq_clear_pending(struct kvm_vcpu *vcpu, int irq) { struct vgic_dist *dist = &vcpu->kvm->arch.vgic; - vgic_bitmap_set_irq_val(&dist->irq_state, vcpu->vcpu_id, irq, 0); + vgic_bitmap_set_irq_val(&dist->irq_pending, vcpu->vcpu_id, irq, 0); } static void vgic_cpu_irq_set(struct kvm_vcpu *vcpu, int irq) @@ -409,7 +409,7 @@ static bool handle_mmio_set_pending_reg(struct kvm_vcpu *vcpu, struct kvm_exit_mmio *mmio, phys_addr_t offset) { - u32 *reg = vgic_bitmap_get_reg(&vcpu->kvm->arch.vgic.irq_state, + u32 *reg = vgic_bitmap_get_reg(&vcpu->kvm->arch.vgic.irq_pending, vcpu->vcpu_id, offset); vgic_reg_access(mmio, reg, offset, ACCESS_READ_VALUE | ACCESS_WRITE_SETBIT); @@ -425,7 +425,7 @@ static bool handle_mmio_clear_pending_reg(struct kvm_vcpu *vcpu, struct kvm_exit_mmio *mmio, phys_addr_t offset) { - u32 *reg = vgic_bitmap_get_reg(&vcpu->kvm->arch.vgic.irq_state, + u32 *reg = vgic_bitmap_get_reg(&vcpu->kvm->arch.vgic.irq_pending, vcpu->vcpu_id, offset); vgic_reg_access(mmio, reg, offset, ACCESS_READ_VALUE | ACCESS_WRITE_CLEARBIT); @@ -651,7 +651,7 @@ static void vgic_unqueue_irqs(struct kvm_vcpu *vcpu) * is fine, then we are only setting a few bits that were * already set. */ - vgic_dist_irq_set(vcpu, lr.irq); + vgic_dist_irq_set_pending(vcpu, lr.irq); if (lr.irq < VGIC_NR_SGIS) dist->irq_sgi_sources[vcpu_id][lr.irq] |= 1 << lr.source; lr.state &= ~LR_STATE_PENDING; @@ -932,7 +932,7 @@ static void vgic_dispatch_sgi(struct kvm_vcpu *vcpu, u32 reg) kvm_for_each_vcpu(c, vcpu, kvm) { if (target_cpus & 1) { /* Flag the SGI as pending */ - vgic_dist_irq_set(vcpu, sgi); + vgic_dist_irq_set_pending(vcpu, sgi); dist->irq_sgi_sources[c][sgi] |= 1 << vcpu_id; kvm_debug("SGI%d from CPU%d to CPU%d\n", sgi, vcpu_id, c); } @@ -952,11 +952,11 @@ static int compute_pending_for_cpu(struct kvm_vcpu *vcpu) pend_percpu = vcpu->arch.vgic_cpu.pending_percpu; pend_shared = vcpu->arch.vgic_cpu.pending_shared; - pending = vgic_bitmap_get_cpu_map(&dist->irq_state, vcpu_id); + pending = vgic_bitmap_get_cpu_map(&dist->irq_pending, vcpu_id); enabled = vgic_bitmap_get_cpu_map(&dist->irq_enabled, vcpu_id); bitmap_and(pend_percpu, pending, enabled, VGIC_NR_PRIVATE_IRQS); - pending = vgic_bitmap_get_shared_map(&dist->irq_state); + pending = vgic_bitmap_get_shared_map(&dist->irq_pending); enabled = vgic_bitmap_get_shared_map(&dist->irq_enabled); bitmap_and(pend_shared, pending, enabled, VGIC_NR_SHARED_IRQS); bitmap_and(pend_shared, pend_shared, @@ -1160,7 +1160,7 @@ static bool vgic_queue_sgi(struct kvm_vcpu *vcpu, int irq) * our emulated gic and can get rid of them. */ if (!sources) { - vgic_dist_irq_clear(vcpu, irq); + vgic_dist_irq_clear_pending(vcpu, irq); vgic_cpu_irq_clear(vcpu, irq); return true; } @@ -1175,7 +1175,7 @@ static bool vgic_queue_hwirq(struct kvm_vcpu *vcpu, int irq) if (vgic_queue_irq(vcpu, 0, irq)) { if (vgic_irq_is_edge(vcpu, irq)) { - vgic_dist_irq_clear(vcpu, irq); + vgic_dist_irq_clear_pending(vcpu, irq); vgic_cpu_irq_clear(vcpu, irq); } else { vgic_irq_set_active(vcpu, irq); @@ -1376,7 +1376,7 @@ static void vgic_kick_vcpus(struct kvm *kvm) static int vgic_validate_injection(struct kvm_vcpu *vcpu, int irq, int level) { - int is_edge = vgic_irq_is_edge(vcpu, irq); + int edge_triggered = vgic_irq_is_edge(vcpu, irq); int state = vgic_dist_irq_is_pending(vcpu, irq); /* @@ -1384,26 +1384,26 @@ static int vgic_validate_injection(struct kvm_vcpu *vcpu, int irq, int level) * - edge triggered and we have a rising edge * - level triggered and we change level */ - if (is_edge) + if (edge_triggered) return level > state; else return level != state; } -static bool vgic_update_irq_state(struct kvm *kvm, int cpuid, +static bool vgic_update_irq_pending(struct kvm *kvm, int cpuid, unsigned int irq_num, bool level) { struct vgic_dist *dist = &kvm->arch.vgic; struct kvm_vcpu *vcpu; - int is_edge, is_level; + int edge_triggered, level_triggered; int enabled; bool ret = true; spin_lock(&dist->lock); vcpu = kvm_get_vcpu(kvm, cpuid); - is_edge = vgic_irq_is_edge(vcpu, irq_num); - is_level = !is_edge; + edge_triggered = vgic_irq_is_edge(vcpu, irq_num); + level_triggered = !edge_triggered; if (!vgic_validate_injection(vcpu, irq_num, level)) { ret = false; @@ -1418,9 +1418,9 @@ static bool vgic_update_irq_state(struct kvm *kvm, int cpuid, kvm_debug("Inject IRQ%d level %d CPU%d\n", irq_num, level, cpuid); if (level) - vgic_dist_irq_set(vcpu, irq_num); + vgic_dist_irq_set_pending(vcpu, irq_num); else - vgic_dist_irq_clear(vcpu, irq_num); + vgic_dist_irq_clear_pending(vcpu, irq_num); enabled = vgic_irq_is_enabled(vcpu, irq_num); @@ -1429,7 +1429,7 @@ static bool vgic_update_irq_state(struct kvm *kvm, int cpuid, goto out; } - if (is_level && vgic_irq_is_active(vcpu, irq_num)) { + if (level_triggered && vgic_irq_is_active(vcpu, irq_num)) { /* * Level interrupt in progress, will be picked up * when EOId. @@ -1466,7 +1466,7 @@ static bool vgic_update_irq_state(struct kvm *kvm, int cpuid, int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid, unsigned int irq_num, bool level) { - if (vgic_update_irq_state(kvm, cpuid, irq_num, level)) + if (vgic_update_irq_pending(kvm, cpuid, irq_num, level)) vgic_kick_vcpus(kvm); return 0; From 25b3fb8068b572c2b9f549b17f513092309aa610 Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Mon, 9 Jun 2014 12:55:13 +0200 Subject: [PATCH 1344/1368] arm/arm64: KVM: Rename irq_active to irq_queued We have a special bitmap on the distributor struct to keep track of when level-triggered interrupts are queued on the list registers. This was named irq_active, which is confusing, because the active state of an interrupt as per the GIC spec is a different thing, not specifically related to edge-triggered/level-triggered configurations but rather indicates an interrupt which has been ack'ed but not yet eoi'ed. Rename the bitmap and the corresponding accessor functions to irq_queued to clarify what this is actually used for. Signed-off-by: Christoffer Dall (cherry picked from commit dbf20f9d8105cca531614c8bff9a74351e8e67e7) Signed-off-by: Christoffer Dall --- include/kvm/arm_vgic.h | 4 ++-- virt/kvm/arm/vgic.c | 33 +++++++++++++++++++-------------- 2 files changed, 21 insertions(+), 16 deletions(-) diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h index 388d442eecb5..7d8e61fa9928 100644 --- a/include/kvm/arm_vgic.h +++ b/include/kvm/arm_vgic.h @@ -143,8 +143,8 @@ struct vgic_dist { /* Interrupt state is pending on the distributor */ struct vgic_bitmap irq_pending; - /* Level-triggered interrupt in progress */ - struct vgic_bitmap irq_active; + /* Level-triggered interrupt queued on VCPU interface */ + struct vgic_bitmap irq_queued; /* Interrupt priority. Not used yet. */ struct vgic_bytemap irq_priority; diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c index 3d04a6de6f23..769cc7177f10 100644 --- a/virt/kvm/arm/vgic.c +++ b/virt/kvm/arm/vgic.c @@ -60,12 +60,12 @@ * the 'line' again. This is achieved as such: * * - When a level interrupt is moved onto a vcpu, the corresponding - * bit in irq_active is set. As long as this bit is set, the line + * bit in irq_queued is set. As long as this bit is set, the line * will be ignored for further interrupts. The interrupt is injected * into the vcpu with the GICH_LR_EOI bit set (generate a * maintenance interrupt on EOI). * - When the interrupt is EOIed, the maintenance interrupt fires, - * and clears the corresponding bit in irq_active. This allow the + * and clears the corresponding bit in irq_queued. This allows the * interrupt line to be sampled again. */ @@ -196,25 +196,25 @@ static int vgic_irq_is_enabled(struct kvm_vcpu *vcpu, int irq) return vgic_bitmap_get_irq_val(&dist->irq_enabled, vcpu->vcpu_id, irq); } -static int vgic_irq_is_active(struct kvm_vcpu *vcpu, int irq) +static int vgic_irq_is_queued(struct kvm_vcpu *vcpu, int irq) { struct vgic_dist *dist = &vcpu->kvm->arch.vgic; - return vgic_bitmap_get_irq_val(&dist->irq_active, vcpu->vcpu_id, irq); + return vgic_bitmap_get_irq_val(&dist->irq_queued, vcpu->vcpu_id, irq); } -static void vgic_irq_set_active(struct kvm_vcpu *vcpu, int irq) +static void vgic_irq_set_queued(struct kvm_vcpu *vcpu, int irq) { struct vgic_dist *dist = &vcpu->kvm->arch.vgic; - vgic_bitmap_set_irq_val(&dist->irq_active, vcpu->vcpu_id, irq, 1); + vgic_bitmap_set_irq_val(&dist->irq_queued, vcpu->vcpu_id, irq, 1); } -static void vgic_irq_clear_active(struct kvm_vcpu *vcpu, int irq) +static void vgic_irq_clear_queued(struct kvm_vcpu *vcpu, int irq) { struct vgic_dist *dist = &vcpu->kvm->arch.vgic; - vgic_bitmap_set_irq_val(&dist->irq_active, vcpu->vcpu_id, irq, 0); + vgic_bitmap_set_irq_val(&dist->irq_queued, vcpu->vcpu_id, irq, 0); } static int vgic_dist_irq_is_pending(struct kvm_vcpu *vcpu, int irq) @@ -256,6 +256,11 @@ static void vgic_cpu_irq_clear(struct kvm_vcpu *vcpu, int irq) vcpu->arch.vgic_cpu.pending_shared); } +static bool vgic_can_sample_irq(struct kvm_vcpu *vcpu, int irq) +{ + return vgic_irq_is_edge(vcpu, irq) || !vgic_irq_is_queued(vcpu, irq); +} + static u32 mmio_data_read(struct kvm_exit_mmio *mmio, u32 mask) { return le32_to_cpu(*((u32 *)mmio->data)) & mask; @@ -1079,8 +1084,8 @@ static void vgic_retire_disabled_irqs(struct kvm_vcpu *vcpu) if (!vgic_irq_is_enabled(vcpu, vlr.irq)) { vgic_retire_lr(lr, vlr.irq, vcpu); - if (vgic_irq_is_active(vcpu, vlr.irq)) - vgic_irq_clear_active(vcpu, vlr.irq); + if (vgic_irq_is_queued(vcpu, vlr.irq)) + vgic_irq_clear_queued(vcpu, vlr.irq); } } } @@ -1170,7 +1175,7 @@ static bool vgic_queue_sgi(struct kvm_vcpu *vcpu, int irq) static bool vgic_queue_hwirq(struct kvm_vcpu *vcpu, int irq) { - if (vgic_irq_is_active(vcpu, irq)) + if (!vgic_can_sample_irq(vcpu, irq)) return true; /* level interrupt, already queued */ if (vgic_queue_irq(vcpu, 0, irq)) { @@ -1178,7 +1183,7 @@ static bool vgic_queue_hwirq(struct kvm_vcpu *vcpu, int irq) vgic_dist_irq_clear_pending(vcpu, irq); vgic_cpu_irq_clear(vcpu, irq); } else { - vgic_irq_set_active(vcpu, irq); + vgic_irq_set_queued(vcpu, irq); } return true; @@ -1262,7 +1267,7 @@ static bool vgic_process_maintenance(struct kvm_vcpu *vcpu) for_each_set_bit(lr, eisr_ptr, vgic->nr_lr) { struct vgic_lr vlr = vgic_get_lr(vcpu, lr); - vgic_irq_clear_active(vcpu, vlr.irq); + vgic_irq_clear_queued(vcpu, vlr.irq); WARN_ON(vlr.state & LR_STATE_MASK); vlr.state = 0; vgic_set_lr(vcpu, lr, vlr); @@ -1429,7 +1434,7 @@ static bool vgic_update_irq_pending(struct kvm *kvm, int cpuid, goto out; } - if (level_triggered && vgic_irq_is_active(vcpu, irq_num)) { + if (!vgic_can_sample_irq(vcpu, irq_num)) { /* * Level interrupt in progress, will be picked up * when EOId. From 520445af8ef7b1e8d17b692c0957b3802b90d98a Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Sat, 14 Jun 2014 22:37:33 +0200 Subject: [PATCH 1345/1368] arm/arm64: KVM: vgic: Clear queued flags on unqueue If we unqueue a level-triggered interrupt completely, and the LR does not stick around in the active state (and will therefore no longer generate a maintenance interrupt), then we should clear the queued flag so that the vgic can actually queue this level-triggered interrupt at a later time and deal with its pending state then. Note: This should actually be properly fixed to handle the active state on the distributor. Acked-by: Marc Zyngier Signed-off-by: Christoffer Dall (cherry picked from commit cced50c9280ef7ca1af48080707a170efa1adfa0) Signed-off-by: Christoffer Dall --- virt/kvm/arm/vgic.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c index 769cc7177f10..c7d068976069 100644 --- a/virt/kvm/arm/vgic.c +++ b/virt/kvm/arm/vgic.c @@ -667,8 +667,10 @@ static void vgic_unqueue_irqs(struct kvm_vcpu *vcpu) * active), then the LR does not hold any useful info and can * be marked as free for other use. */ - if (!(lr.state & LR_STATE_MASK)) + if (!(lr.state & LR_STATE_MASK)) { vgic_retire_lr(i, lr.irq, vcpu); + vgic_irq_clear_queued(vcpu, lr.irq); + } /* Finally update the VGIC state. */ vgic_update_state(vcpu->kvm); From fe8a7fe10d123135d9f8fee2eb0b0c14b31d609d Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Sat, 14 Jun 2014 21:54:51 +0200 Subject: [PATCH 1346/1368] arm/arm64: KVM: vgic: Improve handling of GICD_I{CS}PENDRn Writes to GICD_ISPENDRn and GICD_ICPENDRn are currently not handled correctly for level-triggered interrupts. The spec states that for level-triggered interrupts, writes to the GICD_ISPENDRn activate the output of a flip-flop which is in turn or'ed with the actual input interrupt signal. Correspondingly, writes to GICD_ICPENDRn simply deactivates the output of that flip-flop, but does not (of course) affect the external input signal. Reads from GICC_IAR will also deactivate the flip-flop output. This requires us to track the state of the level-input separately from the state in the flip-flop. We therefore introduce two new variables on the distributor struct to track these two states. Astute readers may notice that this is introducing more state than required (because an OR of the two states gives you the pending state), but the remaining vgic code uses the pending bitmap for optimized operations to figure out, at the end of the day, if an interrupt is pending or not on the distributor side. Refactoring the code to consider the two state variables all the places where we currently access the precomputed pending value, did not look pretty. Signed-off-by: Christoffer Dall (cherry picked from commit faa1b46c3e9f4d40359aee04ff275eea5f4cae3a) Signed-off-by: Christoffer Dall --- include/kvm/arm_vgic.h | 16 +++++- virt/kvm/arm/vgic.c | 119 +++++++++++++++++++++++++++++++++++++---- 2 files changed, 123 insertions(+), 12 deletions(-) diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h index 7d8e61fa9928..f074539c6ac5 100644 --- a/include/kvm/arm_vgic.h +++ b/include/kvm/arm_vgic.h @@ -140,9 +140,23 @@ struct vgic_dist { /* Interrupt enabled (one bit per IRQ) */ struct vgic_bitmap irq_enabled; - /* Interrupt state is pending on the distributor */ + /* Level-triggered interrupt external input is asserted */ + struct vgic_bitmap irq_level; + + /* + * Interrupt state is pending on the distributor + */ struct vgic_bitmap irq_pending; + /* + * Tracks writes to GICD_ISPENDRn and GICD_ICPENDRn for level-triggered + * interrupts. Essentially holds the state of the flip-flop in + * Figure 4-10 on page 4-101 in ARM IHI 0048B.b. + * Once set, it is only cleared for level-triggered interrupts on + * guest ACKs (when we queue it) or writes to GICD_ICPENDRn. + */ + struct vgic_bitmap irq_soft_pend; + /* Level-triggered interrupt queued on VCPU interface */ struct vgic_bitmap irq_queued; diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c index c7d068976069..07e97de1851b 100644 --- a/virt/kvm/arm/vgic.c +++ b/virt/kvm/arm/vgic.c @@ -67,6 +67,11 @@ * - When the interrupt is EOIed, the maintenance interrupt fires, * and clears the corresponding bit in irq_queued. This allows the * interrupt line to be sampled again. + * - Note that level-triggered interrupts can also be set to pending from + * writes to GICD_ISPENDRn and lowering the external input line does not + * cause the interrupt to become inactive in such a situation. + * Conversely, writes to GICD_ICPENDRn do not cause the interrupt to become + * inactive as long as the external input line is held high. */ #define VGIC_ADDR_UNDEF (-1) @@ -217,6 +222,41 @@ static void vgic_irq_clear_queued(struct kvm_vcpu *vcpu, int irq) vgic_bitmap_set_irq_val(&dist->irq_queued, vcpu->vcpu_id, irq, 0); } +static int vgic_dist_irq_get_level(struct kvm_vcpu *vcpu, int irq) +{ + struct vgic_dist *dist = &vcpu->kvm->arch.vgic; + + return vgic_bitmap_get_irq_val(&dist->irq_level, vcpu->vcpu_id, irq); +} + +static void vgic_dist_irq_set_level(struct kvm_vcpu *vcpu, int irq) +{ + struct vgic_dist *dist = &vcpu->kvm->arch.vgic; + + vgic_bitmap_set_irq_val(&dist->irq_level, vcpu->vcpu_id, irq, 1); +} + +static void vgic_dist_irq_clear_level(struct kvm_vcpu *vcpu, int irq) +{ + struct vgic_dist *dist = &vcpu->kvm->arch.vgic; + + vgic_bitmap_set_irq_val(&dist->irq_level, vcpu->vcpu_id, irq, 0); +} + +static int vgic_dist_irq_soft_pend(struct kvm_vcpu *vcpu, int irq) +{ + struct vgic_dist *dist = &vcpu->kvm->arch.vgic; + + return vgic_bitmap_get_irq_val(&dist->irq_soft_pend, vcpu->vcpu_id, irq); +} + +static void vgic_dist_irq_clear_soft_pend(struct kvm_vcpu *vcpu, int irq) +{ + struct vgic_dist *dist = &vcpu->kvm->arch.vgic; + + vgic_bitmap_set_irq_val(&dist->irq_soft_pend, vcpu->vcpu_id, irq, 0); +} + static int vgic_dist_irq_is_pending(struct kvm_vcpu *vcpu, int irq) { struct vgic_dist *dist = &vcpu->kvm->arch.vgic; @@ -414,11 +454,26 @@ static bool handle_mmio_set_pending_reg(struct kvm_vcpu *vcpu, struct kvm_exit_mmio *mmio, phys_addr_t offset) { - u32 *reg = vgic_bitmap_get_reg(&vcpu->kvm->arch.vgic.irq_pending, - vcpu->vcpu_id, offset); + u32 *reg; + u32 level_mask; + struct vgic_dist *dist = &vcpu->kvm->arch.vgic; + + reg = vgic_bitmap_get_reg(&dist->irq_cfg, vcpu->vcpu_id, offset); + level_mask = (~(*reg)); + + /* Mark both level and edge triggered irqs as pending */ + reg = vgic_bitmap_get_reg(&dist->irq_pending, vcpu->vcpu_id, offset); vgic_reg_access(mmio, reg, offset, ACCESS_READ_VALUE | ACCESS_WRITE_SETBIT); + if (mmio->is_write) { + /* Set the soft-pending flag only for level-triggered irqs */ + reg = vgic_bitmap_get_reg(&dist->irq_soft_pend, + vcpu->vcpu_id, offset); + vgic_reg_access(mmio, reg, offset, + ACCESS_READ_VALUE | ACCESS_WRITE_SETBIT); + *reg &= level_mask; + vgic_update_state(vcpu->kvm); return true; } @@ -430,11 +485,27 @@ static bool handle_mmio_clear_pending_reg(struct kvm_vcpu *vcpu, struct kvm_exit_mmio *mmio, phys_addr_t offset) { - u32 *reg = vgic_bitmap_get_reg(&vcpu->kvm->arch.vgic.irq_pending, - vcpu->vcpu_id, offset); + u32 *level_active; + u32 *reg; + struct vgic_dist *dist = &vcpu->kvm->arch.vgic; + + reg = vgic_bitmap_get_reg(&dist->irq_pending, vcpu->vcpu_id, offset); vgic_reg_access(mmio, reg, offset, ACCESS_READ_VALUE | ACCESS_WRITE_CLEARBIT); if (mmio->is_write) { + /* Re-set level triggered level-active interrupts */ + level_active = vgic_bitmap_get_reg(&dist->irq_level, + vcpu->vcpu_id, offset); + reg = vgic_bitmap_get_reg(&dist->irq_pending, + vcpu->vcpu_id, offset); + *reg |= *level_active; + + /* Clear soft-pending flags */ + reg = vgic_bitmap_get_reg(&dist->irq_soft_pend, + vcpu->vcpu_id, offset); + vgic_reg_access(mmio, reg, offset, + ACCESS_READ_VALUE | ACCESS_WRITE_CLEARBIT); + vgic_update_state(vcpu->kvm); return true; } @@ -1268,17 +1339,32 @@ static bool vgic_process_maintenance(struct kvm_vcpu *vcpu) for_each_set_bit(lr, eisr_ptr, vgic->nr_lr) { struct vgic_lr vlr = vgic_get_lr(vcpu, lr); + WARN_ON(vgic_irq_is_edge(vcpu, vlr.irq)); vgic_irq_clear_queued(vcpu, vlr.irq); WARN_ON(vlr.state & LR_STATE_MASK); vlr.state = 0; vgic_set_lr(vcpu, lr, vlr); + /* + * If the IRQ was EOIed it was also ACKed and we we + * therefore assume we can clear the soft pending + * state (should it had been set) for this interrupt. + * + * Note: if the IRQ soft pending state was set after + * the IRQ was acked, it actually shouldn't be + * cleared, but we have no way of knowing that unless + * we start trapping ACKs when the soft-pending state + * is set. + */ + vgic_dist_irq_clear_soft_pend(vcpu, vlr.irq); + /* Any additional pending interrupt? */ - if (vgic_dist_irq_is_pending(vcpu, vlr.irq)) { + if (vgic_dist_irq_get_level(vcpu, vlr.irq)) { vgic_cpu_irq_set(vcpu, vlr.irq); level_pending = true; } else { + vgic_dist_irq_clear_pending(vcpu, vlr.irq); vgic_cpu_irq_clear(vcpu, vlr.irq); } @@ -1384,17 +1470,19 @@ static void vgic_kick_vcpus(struct kvm *kvm) static int vgic_validate_injection(struct kvm_vcpu *vcpu, int irq, int level) { int edge_triggered = vgic_irq_is_edge(vcpu, irq); - int state = vgic_dist_irq_is_pending(vcpu, irq); /* * Only inject an interrupt if: * - edge triggered and we have a rising edge * - level triggered and we change level */ - if (edge_triggered) + if (edge_triggered) { + int state = vgic_dist_irq_is_pending(vcpu, irq); return level > state; - else + } else { + int state = vgic_dist_irq_get_level(vcpu, irq); return level != state; + } } static bool vgic_update_irq_pending(struct kvm *kvm, int cpuid, @@ -1424,10 +1512,19 @@ static bool vgic_update_irq_pending(struct kvm *kvm, int cpuid, kvm_debug("Inject IRQ%d level %d CPU%d\n", irq_num, level, cpuid); - if (level) + if (level) { + if (level_triggered) + vgic_dist_irq_set_level(vcpu, irq_num); vgic_dist_irq_set_pending(vcpu, irq_num); - else - vgic_dist_irq_clear_pending(vcpu, irq_num); + } else { + if (level_triggered) { + vgic_dist_irq_clear_level(vcpu, irq_num); + if (!vgic_dist_irq_soft_pend(vcpu, irq_num)) + vgic_dist_irq_clear_pending(vcpu, irq_num); + } else { + vgic_dist_irq_clear_pending(vcpu, irq_num); + } + } enabled = vgic_irq_is_enabled(vcpu, irq_num); From dd684bca5c33f00bdb31d2fb9819d6581489104f Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Sat, 14 Jun 2014 22:30:45 +0200 Subject: [PATCH 1347/1368] arm/arm64: KVM: vgic: Fix SGI writes to GICD_I{CS}PENDR0 Writes to GICD_ISPENDR0 and GICD_ICPENDR0 ignore all settings of the pending state for SGIs. Make sure the implementation handles this correctly. Signed-off-by: Christoffer Dall (cherry picked from commit 9da48b5502622f9f0e49df957521ec43a0c9f4c1) Signed-off-by: Christoffer Dall --- virt/kvm/arm/vgic.c | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c index 07e97de1851b..4936a68d4b9b 100644 --- a/virt/kvm/arm/vgic.c +++ b/virt/kvm/arm/vgic.c @@ -454,7 +454,7 @@ static bool handle_mmio_set_pending_reg(struct kvm_vcpu *vcpu, struct kvm_exit_mmio *mmio, phys_addr_t offset) { - u32 *reg; + u32 *reg, orig; u32 level_mask; struct vgic_dist *dist = &vcpu->kvm->arch.vgic; @@ -463,6 +463,7 @@ static bool handle_mmio_set_pending_reg(struct kvm_vcpu *vcpu, /* Mark both level and edge triggered irqs as pending */ reg = vgic_bitmap_get_reg(&dist->irq_pending, vcpu->vcpu_id, offset); + orig = *reg; vgic_reg_access(mmio, reg, offset, ACCESS_READ_VALUE | ACCESS_WRITE_SETBIT); @@ -474,6 +475,12 @@ static bool handle_mmio_set_pending_reg(struct kvm_vcpu *vcpu, ACCESS_READ_VALUE | ACCESS_WRITE_SETBIT); *reg &= level_mask; + /* Ignore writes to SGIs */ + if (offset < 2) { + *reg &= ~0xffff; + *reg |= orig & 0xffff; + } + vgic_update_state(vcpu->kvm); return true; } @@ -486,10 +493,11 @@ static bool handle_mmio_clear_pending_reg(struct kvm_vcpu *vcpu, phys_addr_t offset) { u32 *level_active; - u32 *reg; + u32 *reg, orig; struct vgic_dist *dist = &vcpu->kvm->arch.vgic; reg = vgic_bitmap_get_reg(&dist->irq_pending, vcpu->vcpu_id, offset); + orig = *reg; vgic_reg_access(mmio, reg, offset, ACCESS_READ_VALUE | ACCESS_WRITE_CLEARBIT); if (mmio->is_write) { @@ -500,6 +508,12 @@ static bool handle_mmio_clear_pending_reg(struct kvm_vcpu *vcpu, vcpu->vcpu_id, offset); *reg |= *level_active; + /* Ignore writes to SGIs */ + if (offset < 2) { + *reg &= ~0xffff; + *reg |= orig & 0xffff; + } + /* Clear soft-pending flags */ reg = vgic_bitmap_get_reg(&dist->irq_soft_pend, vcpu->vcpu_id, offset); From ce492b1937416dc0f6da42a887b5190b1ca9b4fe Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Sat, 14 Jun 2014 22:34:04 +0200 Subject: [PATCH 1348/1368] arm/arm64: KVM: vgic: Clarify and correct vgic documentation The VGIC virtual distributor implementation documentation was written a very long time ago, before the true nature of the beast had been partially absorbed into my bloodstream. Clarify the docs. Plus, it fixes an actual bug. ICFRn, pfff. Signed-off-by: Christoffer Dall (cherry picked from commit 7e362919a59e6fc60e08ad1cf0b047291d1ca2e9) Signed-off-by: Christoffer Dall --- virt/kvm/arm/vgic.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c index 4936a68d4b9b..ff88dbcacc29 100644 --- a/virt/kvm/arm/vgic.c +++ b/virt/kvm/arm/vgic.c @@ -36,21 +36,22 @@ * How the whole thing works (courtesy of Christoffer Dall): * * - At any time, the dist->irq_pending_on_cpu is the oracle that knows if - * something is pending - * - VGIC pending interrupts are stored on the vgic.irq_pending vgic - * bitmap (this bitmap is updated by both user land ioctls and guest - * mmio ops, and other in-kernel peripherals such as the - * arch. timers) and indicate the 'wire' state. + * something is pending on the CPU interface. + * - Interrupts that are pending on the distributor are stored on the + * vgic.irq_pending vgic bitmap (this bitmap is updated by both user land + * ioctls and guest mmio ops, and other in-kernel peripherals such as the + * arch. timers). * - Every time the bitmap changes, the irq_pending_on_cpu oracle is * recalculated * - To calculate the oracle, we need info for each cpu from * compute_pending_for_cpu, which considers: * - PPI: dist->irq_pending & dist->irq_enable * - SPI: dist->irq_pending & dist->irq_enable & dist->irq_spi_target - * - irq_spi_target is a 'formatted' version of the GICD_ICFGR + * - irq_spi_target is a 'formatted' version of the GICD_ITARGETSRn * registers, stored on each vcpu. We only keep one bit of * information per interrupt, making sure that only one vcpu can * accept the interrupt. + * - If any of the above state changes, we must recalculate the oracle. * - The same is true when injecting an interrupt, except that we only * consider a single interrupt at a time. The irq_spi_cpu array * contains the target CPU for each SPI. From aef14f44d897ca82917d6e5d5ad847c4a9e24c40 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 8 Jul 2014 12:09:00 +0100 Subject: [PATCH 1349/1368] KVM: ARM: vgic: plug irq injection race As it stands, nothing prevents userspace from injecting an interrupt before the guest's GIC is actually initialized. This goes unnoticed so far (as everything is pretty much statically allocated), but ends up exploding in a spectacular way once we switch to a more dynamic allocation (the GIC data structure isn't there yet). The fix is to test for the "ready" flag in the VGIC distributor before trying to inject the interrupt. Note that in order to avoid breaking userspace, we have to ignore what is essentially an error. Signed-off-by: Marc Zyngier Acked-by: Christoffer Dall (cherry picked from commit 71afaba4a2e98bb7bdeba5078370ab43d46e67a1) Signed-off-by: Christoffer Dall --- virt/kvm/arm/vgic.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c index ff88dbcacc29..5744a49d7680 100644 --- a/virt/kvm/arm/vgic.c +++ b/virt/kvm/arm/vgic.c @@ -1585,7 +1585,8 @@ static bool vgic_update_irq_pending(struct kvm *kvm, int cpuid, int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid, unsigned int irq_num, bool level) { - if (vgic_update_irq_pending(kvm, cpuid, irq_num, level)) + if (likely(vgic_initialized(kvm)) && + vgic_update_irq_pending(kvm, cpuid, irq_num, level)) vgic_kick_vcpus(kvm); return 0; From 38cb2f7b9a9ea6312429da05575a4bf6e697b573 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 8 Jul 2014 12:09:01 +0100 Subject: [PATCH 1350/1368] arm/arm64: KVM: vgic: switch to dynamic allocation So far, all the VGIC data structures are statically defined by the *maximum* number of vcpus and interrupts it supports. It means that we always have to oversize it to cater for the worse case. Start by changing the data structures to be dynamically sizeable, and allocate them at runtime. The sizes are still very static though. Signed-off-by: Marc Zyngier (cherry picked from commit c1bfb577addd4867a82c4f235824a315d5afb94a) Signed-off-by: Christoffer Dall --- arch/arm/kvm/arm.c | 3 + include/kvm/arm_vgic.h | 76 ++++++++++--- virt/kvm/arm/vgic.c | 243 +++++++++++++++++++++++++++++++++++------ 3 files changed, 269 insertions(+), 53 deletions(-) diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c index c8ff64b54459..9e374158363a 100644 --- a/arch/arm/kvm/arm.c +++ b/arch/arm/kvm/arm.c @@ -161,6 +161,8 @@ void kvm_arch_destroy_vm(struct kvm *kvm) kvm->vcpus[i] = NULL; } } + + kvm_vgic_destroy(kvm); } int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) @@ -243,6 +245,7 @@ void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu) { kvm_mmu_free_memory_caches(vcpu); kvm_timer_vcpu_terminate(vcpu); + kvm_vgic_vcpu_destroy(vcpu); kmem_cache_free(kvm_vcpu_cache, vcpu); } diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h index f074539c6ac5..fd1b8f252da1 100644 --- a/include/kvm/arm_vgic.h +++ b/include/kvm/arm_vgic.h @@ -54,19 +54,33 @@ * - a bunch of shared interrupts (SPI) */ struct vgic_bitmap { - union { - u32 reg[VGIC_NR_PRIVATE_IRQS / 32]; - DECLARE_BITMAP(reg_ul, VGIC_NR_PRIVATE_IRQS); - } percpu[VGIC_MAX_CPUS]; - union { - u32 reg[VGIC_NR_SHARED_IRQS / 32]; - DECLARE_BITMAP(reg_ul, VGIC_NR_SHARED_IRQS); - } shared; + /* + * - One UL per VCPU for private interrupts (assumes UL is at + * least 32 bits) + * - As many UL as necessary for shared interrupts. + * + * The private interrupts are accessed via the "private" + * field, one UL per vcpu (the state for vcpu n is in + * private[n]). The shared interrupts are accessed via the + * "shared" pointer (IRQn state is at bit n-32 in the bitmap). + */ + unsigned long *private; + unsigned long *shared; }; struct vgic_bytemap { - u32 percpu[VGIC_MAX_CPUS][VGIC_NR_PRIVATE_IRQS / 4]; - u32 shared[VGIC_NR_SHARED_IRQS / 4]; + /* + * - 8 u32 per VCPU for private interrupts + * - As many u32 as necessary for shared interrupts. + * + * The private interrupts are accessed via the "private" + * field, (the state for vcpu n is in private[n*8] to + * private[n*8 + 7]). The shared interrupts are accessed via + * the "shared" pointer (IRQn state is at byte (n-32)%4 of the + * shared[(n-32)/4] word). + */ + u32 *private; + u32 *shared; }; struct kvm_vcpu; @@ -127,6 +141,9 @@ struct vgic_dist { bool in_kernel; bool ready; + int nr_cpus; + int nr_irqs; + /* Virtual control interface mapping */ void __iomem *vctrl_base; @@ -166,15 +183,36 @@ struct vgic_dist { /* Level/edge triggered */ struct vgic_bitmap irq_cfg; - /* Source CPU per SGI and target CPU */ - u8 irq_sgi_sources[VGIC_MAX_CPUS][VGIC_NR_SGIS]; + /* + * Source CPU per SGI and target CPU: + * + * Each byte represent a SGI observable on a VCPU, each bit of + * this byte indicating if the corresponding VCPU has + * generated this interrupt. This is a GICv2 feature only. + * + * For VCPUn (n < 8), irq_sgi_sources[n*16] to [n*16 + 15] are + * the SGIs observable on VCPUn. + */ + u8 *irq_sgi_sources; - /* Target CPU for each IRQ */ - u8 irq_spi_cpu[VGIC_NR_SHARED_IRQS]; - struct vgic_bitmap irq_spi_target[VGIC_MAX_CPUS]; + /* + * Target CPU for each SPI: + * + * Array of available SPI, each byte indicating the target + * VCPU for SPI. IRQn (n >=32) is at irq_spi_cpu[n-32]. + */ + u8 *irq_spi_cpu; + + /* + * Reverse lookup of irq_spi_cpu for faster compute pending: + * + * Array of bitmaps, one per VCPU, describing if IRQn is + * routed to a particular VCPU. + */ + struct vgic_bitmap *irq_spi_target; /* Bitmap indicating which CPU has something pending */ - unsigned long irq_pending_on_cpu; + unsigned long *irq_pending_on_cpu; #endif }; @@ -204,11 +242,11 @@ struct vgic_v3_cpu_if { struct vgic_cpu { #ifdef CONFIG_KVM_ARM_VGIC /* per IRQ to LR mapping */ - u8 vgic_irq_lr_map[VGIC_NR_IRQS]; + u8 *vgic_irq_lr_map; /* Pending interrupts on this VCPU */ DECLARE_BITMAP( pending_percpu, VGIC_NR_PRIVATE_IRQS); - DECLARE_BITMAP( pending_shared, VGIC_NR_SHARED_IRQS); + unsigned long *pending_shared; /* Bitmap of used/free list registers */ DECLARE_BITMAP( lr_used, VGIC_V2_MAX_LRS); @@ -239,7 +277,9 @@ int kvm_vgic_addr(struct kvm *kvm, unsigned long type, u64 *addr, bool write); int kvm_vgic_hyp_init(void); int kvm_vgic_init(struct kvm *kvm); int kvm_vgic_create(struct kvm *kvm); +void kvm_vgic_destroy(struct kvm *kvm); int kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu); +void kvm_vgic_vcpu_destroy(struct kvm_vcpu *vcpu); void kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu); void kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu); int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid, unsigned int irq_num, diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c index 5744a49d7680..102dde58c549 100644 --- a/virt/kvm/arm/vgic.c +++ b/virt/kvm/arm/vgic.c @@ -95,6 +95,7 @@ static void vgic_retire_disabled_irqs(struct kvm_vcpu *vcpu); static void vgic_retire_lr(int lr_nr, int irq, struct kvm_vcpu *vcpu); static void vgic_update_state(struct kvm *kvm); static void vgic_kick_vcpus(struct kvm *kvm); +static u8 *vgic_get_sgi_sources(struct vgic_dist *dist, int vcpu_id, int sgi); static void vgic_dispatch_sgi(struct kvm_vcpu *vcpu, u32 reg); static struct vgic_lr vgic_get_lr(const struct kvm_vcpu *vcpu, int lr); static void vgic_set_lr(struct kvm_vcpu *vcpu, int lr, struct vgic_lr lr_desc); @@ -105,10 +106,8 @@ static const struct vgic_ops *vgic_ops; static const struct vgic_params *vgic; /* - * struct vgic_bitmap contains unions that provide two views of - * the same data. In one case it is an array of registers of - * u32's, and in the other case it is a bitmap of unsigned - * longs. + * struct vgic_bitmap contains a bitmap made of unsigned longs, but + * extracts u32s out of them. * * This does not work on 64-bit BE systems, because the bitmap access * will store two consecutive 32-bit words with the higher-addressed @@ -124,23 +123,45 @@ static const struct vgic_params *vgic; #define REG_OFFSET_SWIZZLE 0 #endif +static int vgic_init_bitmap(struct vgic_bitmap *b, int nr_cpus, int nr_irqs) +{ + int nr_longs; + + nr_longs = nr_cpus + BITS_TO_LONGS(nr_irqs - VGIC_NR_PRIVATE_IRQS); + + b->private = kzalloc(sizeof(unsigned long) * nr_longs, GFP_KERNEL); + if (!b->private) + return -ENOMEM; + + b->shared = b->private + nr_cpus; + + return 0; +} + +static void vgic_free_bitmap(struct vgic_bitmap *b) +{ + kfree(b->private); + b->private = NULL; + b->shared = NULL; +} + static u32 *vgic_bitmap_get_reg(struct vgic_bitmap *x, int cpuid, u32 offset) { offset >>= 2; if (!offset) - return x->percpu[cpuid].reg + (offset ^ REG_OFFSET_SWIZZLE); + return (u32 *)(x->private + cpuid) + REG_OFFSET_SWIZZLE; else - return x->shared.reg + ((offset - 1) ^ REG_OFFSET_SWIZZLE); + return (u32 *)(x->shared) + ((offset - 1) ^ REG_OFFSET_SWIZZLE); } static int vgic_bitmap_get_irq_val(struct vgic_bitmap *x, int cpuid, int irq) { if (irq < VGIC_NR_PRIVATE_IRQS) - return test_bit(irq, x->percpu[cpuid].reg_ul); + return test_bit(irq, x->private + cpuid); - return test_bit(irq - VGIC_NR_PRIVATE_IRQS, x->shared.reg_ul); + return test_bit(irq - VGIC_NR_PRIVATE_IRQS, x->shared); } static void vgic_bitmap_set_irq_val(struct vgic_bitmap *x, int cpuid, @@ -149,9 +170,9 @@ static void vgic_bitmap_set_irq_val(struct vgic_bitmap *x, int cpuid, unsigned long *reg; if (irq < VGIC_NR_PRIVATE_IRQS) { - reg = x->percpu[cpuid].reg_ul; + reg = x->private + cpuid; } else { - reg = x->shared.reg_ul; + reg = x->shared; irq -= VGIC_NR_PRIVATE_IRQS; } @@ -163,24 +184,49 @@ static void vgic_bitmap_set_irq_val(struct vgic_bitmap *x, int cpuid, static unsigned long *vgic_bitmap_get_cpu_map(struct vgic_bitmap *x, int cpuid) { - if (unlikely(cpuid >= VGIC_MAX_CPUS)) - return NULL; - return x->percpu[cpuid].reg_ul; + return x->private + cpuid; } static unsigned long *vgic_bitmap_get_shared_map(struct vgic_bitmap *x) { - return x->shared.reg_ul; + return x->shared; +} + +static int vgic_init_bytemap(struct vgic_bytemap *x, int nr_cpus, int nr_irqs) +{ + int size; + + size = nr_cpus * VGIC_NR_PRIVATE_IRQS; + size += nr_irqs - VGIC_NR_PRIVATE_IRQS; + + x->private = kzalloc(size, GFP_KERNEL); + if (!x->private) + return -ENOMEM; + + x->shared = x->private + nr_cpus * VGIC_NR_PRIVATE_IRQS / sizeof(u32); + return 0; +} + +static void vgic_free_bytemap(struct vgic_bytemap *b) +{ + kfree(b->private); + b->private = NULL; + b->shared = NULL; } static u32 *vgic_bytemap_get_reg(struct vgic_bytemap *x, int cpuid, u32 offset) { - offset >>= 2; - BUG_ON(offset > (VGIC_NR_IRQS / 4)); - if (offset < 8) - return x->percpu[cpuid] + offset; - else - return x->shared + offset - 8; + u32 *reg; + + if (offset < VGIC_NR_PRIVATE_IRQS) { + reg = x->private; + offset += cpuid * VGIC_NR_PRIVATE_IRQS; + } else { + reg = x->shared; + offset -= VGIC_NR_PRIVATE_IRQS; + } + + return reg + (offset / sizeof(u32)); } #define VGIC_CFG_LEVEL 0 @@ -744,7 +790,7 @@ static void vgic_unqueue_irqs(struct kvm_vcpu *vcpu) */ vgic_dist_irq_set_pending(vcpu, lr.irq); if (lr.irq < VGIC_NR_SGIS) - dist->irq_sgi_sources[vcpu_id][lr.irq] |= 1 << lr.source; + *vgic_get_sgi_sources(dist, vcpu_id, lr.irq) |= 1 << lr.source; lr.state &= ~LR_STATE_PENDING; vgic_set_lr(vcpu, i, lr); @@ -778,7 +824,7 @@ static bool read_set_clear_sgi_pend_reg(struct kvm_vcpu *vcpu, /* Copy source SGIs from distributor side */ for (sgi = min_sgi; sgi <= max_sgi; sgi++) { int shift = 8 * (sgi - min_sgi); - reg |= (u32)dist->irq_sgi_sources[vcpu_id][sgi] << shift; + reg |= ((u32)*vgic_get_sgi_sources(dist, vcpu_id, sgi)) << shift; } mmio_data_write(mmio, ~0, reg); @@ -802,14 +848,15 @@ static bool write_set_clear_sgi_pend_reg(struct kvm_vcpu *vcpu, /* Clear pending SGIs on the distributor */ for (sgi = min_sgi; sgi <= max_sgi; sgi++) { u8 mask = reg >> (8 * (sgi - min_sgi)); + u8 *src = vgic_get_sgi_sources(dist, vcpu_id, sgi); if (set) { - if ((dist->irq_sgi_sources[vcpu_id][sgi] & mask) != mask) + if ((*src & mask) != mask) updated = true; - dist->irq_sgi_sources[vcpu_id][sgi] |= mask; + *src |= mask; } else { - if (dist->irq_sgi_sources[vcpu_id][sgi] & mask) + if (*src & mask) updated = true; - dist->irq_sgi_sources[vcpu_id][sgi] &= ~mask; + *src &= ~mask; } } @@ -993,6 +1040,11 @@ bool vgic_handle_mmio(struct kvm_vcpu *vcpu, struct kvm_run *run, return true; } +static u8 *vgic_get_sgi_sources(struct vgic_dist *dist, int vcpu_id, int sgi) +{ + return dist->irq_sgi_sources + vcpu_id * VGIC_NR_SGIS + sgi; +} + static void vgic_dispatch_sgi(struct kvm_vcpu *vcpu, u32 reg) { struct kvm *kvm = vcpu->kvm; @@ -1026,7 +1078,7 @@ static void vgic_dispatch_sgi(struct kvm_vcpu *vcpu, u32 reg) if (target_cpus & 1) { /* Flag the SGI as pending */ vgic_dist_irq_set_pending(vcpu, sgi); - dist->irq_sgi_sources[c][sgi] |= 1 << vcpu_id; + *vgic_get_sgi_sources(dist, c, sgi) |= 1 << vcpu_id; kvm_debug("SGI%d from CPU%d to CPU%d\n", sgi, vcpu_id, c); } @@ -1073,14 +1125,14 @@ static void vgic_update_state(struct kvm *kvm) int c; if (!dist->enabled) { - set_bit(0, &dist->irq_pending_on_cpu); + set_bit(0, dist->irq_pending_on_cpu); return; } kvm_for_each_vcpu(c, vcpu, kvm) { if (compute_pending_for_cpu(vcpu)) { pr_debug("CPU%d has pending interrupts\n", c); - set_bit(c, &dist->irq_pending_on_cpu); + set_bit(c, dist->irq_pending_on_cpu); } } } @@ -1237,14 +1289,14 @@ static bool vgic_queue_sgi(struct kvm_vcpu *vcpu, int irq) int vcpu_id = vcpu->vcpu_id; int c; - sources = dist->irq_sgi_sources[vcpu_id][irq]; + sources = *vgic_get_sgi_sources(dist, vcpu_id, irq); for_each_set_bit(c, &sources, VGIC_MAX_CPUS) { if (vgic_queue_irq(vcpu, c, irq)) clear_bit(c, &sources); } - dist->irq_sgi_sources[vcpu_id][irq] = sources; + *vgic_get_sgi_sources(dist, vcpu_id, irq) = sources; /* * If the sources bitmap has been cleared it means that we @@ -1332,7 +1384,7 @@ static void __kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu) * us. Claim we don't have anything pending. We'll * adjust that if needed while exiting. */ - clear_bit(vcpu_id, &dist->irq_pending_on_cpu); + clear_bit(vcpu_id, dist->irq_pending_on_cpu); } } @@ -1430,7 +1482,7 @@ static void __kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu) /* Check if we still have something up our sleeve... */ pending = find_first_zero_bit(elrsr_ptr, vgic->nr_lr); if (level_pending || pending < vgic->nr_lr) - set_bit(vcpu->vcpu_id, &dist->irq_pending_on_cpu); + set_bit(vcpu->vcpu_id, dist->irq_pending_on_cpu); } void kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu) @@ -1464,7 +1516,7 @@ int kvm_vgic_vcpu_pending_irq(struct kvm_vcpu *vcpu) if (!irqchip_in_kernel(vcpu->kvm)) return 0; - return test_bit(vcpu->vcpu_id, &dist->irq_pending_on_cpu); + return test_bit(vcpu->vcpu_id, dist->irq_pending_on_cpu); } static void vgic_kick_vcpus(struct kvm *kvm) @@ -1559,7 +1611,7 @@ static bool vgic_update_irq_pending(struct kvm *kvm, int cpuid, if (level) { vgic_cpu_irq_set(vcpu, irq_num); - set_bit(cpuid, &dist->irq_pending_on_cpu); + set_bit(cpuid, dist->irq_pending_on_cpu); } out: @@ -1603,6 +1655,32 @@ static irqreturn_t vgic_maintenance_handler(int irq, void *data) return IRQ_HANDLED; } +void kvm_vgic_vcpu_destroy(struct kvm_vcpu *vcpu) +{ + struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; + + kfree(vgic_cpu->pending_shared); + kfree(vgic_cpu->vgic_irq_lr_map); + vgic_cpu->pending_shared = NULL; + vgic_cpu->vgic_irq_lr_map = NULL; +} + +static int vgic_vcpu_init_maps(struct kvm_vcpu *vcpu, int nr_irqs) +{ + struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; + + int sz = (nr_irqs - VGIC_NR_PRIVATE_IRQS) / 8; + vgic_cpu->pending_shared = kzalloc(sz, GFP_KERNEL); + vgic_cpu->vgic_irq_lr_map = kzalloc(nr_irqs, GFP_KERNEL); + + if (!vgic_cpu->pending_shared || !vgic_cpu->vgic_irq_lr_map) { + kvm_vgic_vcpu_destroy(vcpu); + return -ENOMEM; + } + + return 0; +} + /** * kvm_vgic_vcpu_init - Initialize per-vcpu VGIC state * @vcpu: pointer to the vcpu struct @@ -1642,6 +1720,97 @@ int kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu) return 0; } +void kvm_vgic_destroy(struct kvm *kvm) +{ + struct vgic_dist *dist = &kvm->arch.vgic; + struct kvm_vcpu *vcpu; + int i; + + kvm_for_each_vcpu(i, vcpu, kvm) + kvm_vgic_vcpu_destroy(vcpu); + + vgic_free_bitmap(&dist->irq_enabled); + vgic_free_bitmap(&dist->irq_level); + vgic_free_bitmap(&dist->irq_pending); + vgic_free_bitmap(&dist->irq_soft_pend); + vgic_free_bitmap(&dist->irq_queued); + vgic_free_bitmap(&dist->irq_cfg); + vgic_free_bytemap(&dist->irq_priority); + if (dist->irq_spi_target) { + for (i = 0; i < dist->nr_cpus; i++) + vgic_free_bitmap(&dist->irq_spi_target[i]); + } + kfree(dist->irq_sgi_sources); + kfree(dist->irq_spi_cpu); + kfree(dist->irq_spi_target); + kfree(dist->irq_pending_on_cpu); + dist->irq_sgi_sources = NULL; + dist->irq_spi_cpu = NULL; + dist->irq_spi_target = NULL; + dist->irq_pending_on_cpu = NULL; +} + +/* + * Allocate and initialize the various data structures. Must be called + * with kvm->lock held! + */ +static int vgic_init_maps(struct kvm *kvm) +{ + struct vgic_dist *dist = &kvm->arch.vgic; + struct kvm_vcpu *vcpu; + int nr_cpus, nr_irqs; + int ret, i; + + nr_cpus = dist->nr_cpus = VGIC_MAX_CPUS; + nr_irqs = dist->nr_irqs = VGIC_NR_IRQS; + + ret = vgic_init_bitmap(&dist->irq_enabled, nr_cpus, nr_irqs); + ret |= vgic_init_bitmap(&dist->irq_level, nr_cpus, nr_irqs); + ret |= vgic_init_bitmap(&dist->irq_pending, nr_cpus, nr_irqs); + ret |= vgic_init_bitmap(&dist->irq_soft_pend, nr_cpus, nr_irqs); + ret |= vgic_init_bitmap(&dist->irq_queued, nr_cpus, nr_irqs); + ret |= vgic_init_bitmap(&dist->irq_cfg, nr_cpus, nr_irqs); + ret |= vgic_init_bytemap(&dist->irq_priority, nr_cpus, nr_irqs); + + if (ret) + goto out; + + dist->irq_sgi_sources = kzalloc(nr_cpus * VGIC_NR_SGIS, GFP_KERNEL); + dist->irq_spi_cpu = kzalloc(nr_irqs - VGIC_NR_PRIVATE_IRQS, GFP_KERNEL); + dist->irq_spi_target = kzalloc(sizeof(*dist->irq_spi_target) * nr_cpus, + GFP_KERNEL); + dist->irq_pending_on_cpu = kzalloc(BITS_TO_LONGS(nr_cpus) * sizeof(long), + GFP_KERNEL); + if (!dist->irq_sgi_sources || + !dist->irq_spi_cpu || + !dist->irq_spi_target || + !dist->irq_pending_on_cpu) { + ret = -ENOMEM; + goto out; + } + + for (i = 0; i < nr_cpus; i++) + ret |= vgic_init_bitmap(&dist->irq_spi_target[i], + nr_cpus, nr_irqs); + + if (ret) + goto out; + + kvm_for_each_vcpu(i, vcpu, kvm) { + ret = vgic_vcpu_init_maps(vcpu, nr_irqs); + if (ret) { + kvm_err("VGIC: Failed to allocate vcpu memory\n"); + break; + } + } + +out: + if (ret) + kvm_vgic_destroy(kvm); + + return ret; +} + /** * kvm_vgic_init - Initialize global VGIC state before running any VCPUs * @kvm: pointer to the kvm struct @@ -1722,6 +1891,10 @@ int kvm_vgic_create(struct kvm *kvm) kvm->arch.vgic.vgic_dist_base = VGIC_ADDR_UNDEF; kvm->arch.vgic.vgic_cpu_base = VGIC_ADDR_UNDEF; + ret = vgic_init_maps(kvm); + if (ret) + kvm_err("Unable to allocate maps\n"); + out_unlock: for (; vcpu_lock_idx >= 0; vcpu_lock_idx--) { vcpu = kvm_get_vcpu(kvm, vcpu_lock_idx); From ac2409584bc7bb3b8cff23884e65cf341860d674 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 8 Jul 2014 12:09:02 +0100 Subject: [PATCH 1351/1368] arm/arm64: KVM: vgic: Parametrize VGIC_NR_SHARED_IRQS Having a dynamic number of supported interrupts means that we cannot relly on VGIC_NR_SHARED_IRQS being fixed anymore. Instead, make it take the distributor structure as a parameter, so it can return the right value. Reviewed-by: Christoffer Dall Signed-off-by: Marc Zyngier (cherry picked from commit fb65ab63b8cae510ea1e43e68b5da2f9980aa6d5) Signed-off-by: Christoffer Dall --- include/kvm/arm_vgic.h | 1 - virt/kvm/arm/vgic.c | 16 +++++++++++----- 2 files changed, 11 insertions(+), 6 deletions(-) diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h index fd1b8f252da1..b2f9936df319 100644 --- a/include/kvm/arm_vgic.h +++ b/include/kvm/arm_vgic.h @@ -29,7 +29,6 @@ #define VGIC_NR_SGIS 16 #define VGIC_NR_PPIS 16 #define VGIC_NR_PRIVATE_IRQS (VGIC_NR_SGIS + VGIC_NR_PPIS) -#define VGIC_NR_SHARED_IRQS (VGIC_NR_IRQS - VGIC_NR_PRIVATE_IRQS) #define VGIC_MAX_CPUS KVM_MAX_VCPUS #define VGIC_V2_MAX_LRS (1 << 6) diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c index 102dde58c549..a24fdacf5381 100644 --- a/virt/kvm/arm/vgic.c +++ b/virt/kvm/arm/vgic.c @@ -1086,11 +1086,17 @@ static void vgic_dispatch_sgi(struct kvm_vcpu *vcpu, u32 reg) } } +static int vgic_nr_shared_irqs(struct vgic_dist *dist) +{ + return dist->nr_irqs - VGIC_NR_PRIVATE_IRQS; +} + static int compute_pending_for_cpu(struct kvm_vcpu *vcpu) { struct vgic_dist *dist = &vcpu->kvm->arch.vgic; unsigned long *pending, *enabled, *pend_percpu, *pend_shared; unsigned long pending_private, pending_shared; + int nr_shared = vgic_nr_shared_irqs(dist); int vcpu_id; vcpu_id = vcpu->vcpu_id; @@ -1103,15 +1109,15 @@ static int compute_pending_for_cpu(struct kvm_vcpu *vcpu) pending = vgic_bitmap_get_shared_map(&dist->irq_pending); enabled = vgic_bitmap_get_shared_map(&dist->irq_enabled); - bitmap_and(pend_shared, pending, enabled, VGIC_NR_SHARED_IRQS); + bitmap_and(pend_shared, pending, enabled, nr_shared); bitmap_and(pend_shared, pend_shared, vgic_bitmap_get_shared_map(&dist->irq_spi_target[vcpu_id]), - VGIC_NR_SHARED_IRQS); + nr_shared); pending_private = find_first_bit(pend_percpu, VGIC_NR_PRIVATE_IRQS); - pending_shared = find_first_bit(pend_shared, VGIC_NR_SHARED_IRQS); + pending_shared = find_first_bit(pend_shared, nr_shared); return (pending_private < VGIC_NR_PRIVATE_IRQS || - pending_shared < VGIC_NR_SHARED_IRQS); + pending_shared < vgic_nr_shared_irqs(dist)); } /* @@ -1368,7 +1374,7 @@ static void __kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu) } /* SPIs */ - for_each_set_bit(i, vgic_cpu->pending_shared, VGIC_NR_SHARED_IRQS) { + for_each_set_bit(i, vgic_cpu->pending_shared, vgic_nr_shared_irqs(dist)) { if (!vgic_queue_hwirq(vcpu, i + VGIC_NR_PRIVATE_IRQS)) overflow = 1; } From 3e0fb55b4af81f2364b1969bdbf31bf67883a4d5 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 8 Jul 2014 12:09:03 +0100 Subject: [PATCH 1352/1368] arm/arm64: KVM: vgic: kill VGIC_MAX_CPUS We now have the information about the number of CPU interfaces in the distributor itself. Let's get rid of VGIC_MAX_CPUS, and just rely on KVM_MAX_VCPUS where we don't have the choice. Yet. Reviewed-by: Christoffer Dall Signed-off-by: Marc Zyngier (cherry picked from commit fc675e355e705a046df7b635d3f3330c0ad94569) Signed-off-by: Christoffer Dall --- include/kvm/arm_vgic.h | 3 +-- virt/kvm/arm/vgic.c | 6 +++--- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h index b2f9936df319..3b73d7845124 100644 --- a/include/kvm/arm_vgic.h +++ b/include/kvm/arm_vgic.h @@ -29,13 +29,12 @@ #define VGIC_NR_SGIS 16 #define VGIC_NR_PPIS 16 #define VGIC_NR_PRIVATE_IRQS (VGIC_NR_SGIS + VGIC_NR_PPIS) -#define VGIC_MAX_CPUS KVM_MAX_VCPUS #define VGIC_V2_MAX_LRS (1 << 6) #define VGIC_V3_MAX_LRS 16 /* Sanity checks... */ -#if (VGIC_MAX_CPUS > 8) +#if (KVM_MAX_VCPUS > 8) #error Invalid number of CPU interfaces #endif diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c index a24fdacf5381..df0700bd0f23 100644 --- a/virt/kvm/arm/vgic.c +++ b/virt/kvm/arm/vgic.c @@ -1297,7 +1297,7 @@ static bool vgic_queue_sgi(struct kvm_vcpu *vcpu, int irq) sources = *vgic_get_sgi_sources(dist, vcpu_id, irq); - for_each_set_bit(c, &sources, VGIC_MAX_CPUS) { + for_each_set_bit(c, &sources, dist->nr_cpus) { if (vgic_queue_irq(vcpu, c, irq)) clear_bit(c, &sources); } @@ -1700,7 +1700,7 @@ int kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu) struct vgic_dist *dist = &vcpu->kvm->arch.vgic; int i; - if (vcpu->vcpu_id >= VGIC_MAX_CPUS) + if (vcpu->vcpu_id >= dist->nr_cpus) return -EBUSY; for (i = 0; i < VGIC_NR_IRQS; i++) { @@ -1767,7 +1767,7 @@ static int vgic_init_maps(struct kvm *kvm) int nr_cpus, nr_irqs; int ret, i; - nr_cpus = dist->nr_cpus = VGIC_MAX_CPUS; + nr_cpus = dist->nr_cpus = KVM_MAX_VCPUS; nr_irqs = dist->nr_irqs = VGIC_NR_IRQS; ret = vgic_init_bitmap(&dist->irq_enabled, nr_cpus, nr_irqs); From 35a089f06fa6d1f02265be8b1e31e3775003d5c8 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 8 Jul 2014 12:09:04 +0100 Subject: [PATCH 1353/1368] arm/arm64: KVM: vgic: handle out-of-range MMIO accesses Now that we can (almost) dynamically size the number of interrupts, we're facing an interesting issue: We have to evaluate at runtime whether or not an access hits a valid register, based on the sizing of this particular instance of the distributor. Furthermore, the GIC spec says that accessing a reserved register is RAZ/WI. For this, add a new field to our range structure, indicating the number of bits a single interrupts uses. That allows us to find out whether or not the access is in range. Reviewed-by: Christoffer Dall Signed-off-by: Marc Zyngier (cherry picked from commit c3c918361adcceb816c92b21dd95d2b46fb96a8f) Signed-off-by: Christoffer Dall --- include/kvm/arm_vgic.h | 3 ++- virt/kvm/arm/vgic.c | 56 +++++++++++++++++++++++++++++++++--------- 2 files changed, 47 insertions(+), 12 deletions(-) diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h index 3b73d7845124..2767f939f47c 100644 --- a/include/kvm/arm_vgic.h +++ b/include/kvm/arm_vgic.h @@ -32,6 +32,7 @@ #define VGIC_V2_MAX_LRS (1 << 6) #define VGIC_V3_MAX_LRS 16 +#define VGIC_MAX_IRQS 1024 /* Sanity checks... */ #if (KVM_MAX_VCPUS > 8) @@ -42,7 +43,7 @@ #error "VGIC_NR_IRQS must be a multiple of 32" #endif -#if (VGIC_NR_IRQS > 1024) +#if (VGIC_NR_IRQS > VGIC_MAX_IRQS) #error "VGIC_NR_IRQS must be <= 1024" #endif diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c index df0700bd0f23..de975c908301 100644 --- a/virt/kvm/arm/vgic.c +++ b/virt/kvm/arm/vgic.c @@ -895,6 +895,7 @@ static bool handle_mmio_sgi_clear(struct kvm_vcpu *vcpu, struct mmio_range { phys_addr_t base; unsigned long len; + int bits_per_irq; bool (*handle_mmio)(struct kvm_vcpu *vcpu, struct kvm_exit_mmio *mmio, phys_addr_t offset); }; @@ -903,56 +904,67 @@ static const struct mmio_range vgic_dist_ranges[] = { { .base = GIC_DIST_CTRL, .len = 12, + .bits_per_irq = 0, .handle_mmio = handle_mmio_misc, }, { .base = GIC_DIST_IGROUP, - .len = VGIC_NR_IRQS / 8, + .len = VGIC_MAX_IRQS / 8, + .bits_per_irq = 1, .handle_mmio = handle_mmio_raz_wi, }, { .base = GIC_DIST_ENABLE_SET, - .len = VGIC_NR_IRQS / 8, + .len = VGIC_MAX_IRQS / 8, + .bits_per_irq = 1, .handle_mmio = handle_mmio_set_enable_reg, }, { .base = GIC_DIST_ENABLE_CLEAR, - .len = VGIC_NR_IRQS / 8, + .len = VGIC_MAX_IRQS / 8, + .bits_per_irq = 1, .handle_mmio = handle_mmio_clear_enable_reg, }, { .base = GIC_DIST_PENDING_SET, - .len = VGIC_NR_IRQS / 8, + .len = VGIC_MAX_IRQS / 8, + .bits_per_irq = 1, .handle_mmio = handle_mmio_set_pending_reg, }, { .base = GIC_DIST_PENDING_CLEAR, - .len = VGIC_NR_IRQS / 8, + .len = VGIC_MAX_IRQS / 8, + .bits_per_irq = 1, .handle_mmio = handle_mmio_clear_pending_reg, }, { .base = GIC_DIST_ACTIVE_SET, - .len = VGIC_NR_IRQS / 8, + .len = VGIC_MAX_IRQS / 8, + .bits_per_irq = 1, .handle_mmio = handle_mmio_raz_wi, }, { .base = GIC_DIST_ACTIVE_CLEAR, - .len = VGIC_NR_IRQS / 8, + .len = VGIC_MAX_IRQS / 8, + .bits_per_irq = 1, .handle_mmio = handle_mmio_raz_wi, }, { .base = GIC_DIST_PRI, - .len = VGIC_NR_IRQS, + .len = VGIC_MAX_IRQS, + .bits_per_irq = 8, .handle_mmio = handle_mmio_priority_reg, }, { .base = GIC_DIST_TARGET, - .len = VGIC_NR_IRQS, + .len = VGIC_MAX_IRQS, + .bits_per_irq = 8, .handle_mmio = handle_mmio_target_reg, }, { .base = GIC_DIST_CONFIG, - .len = VGIC_NR_IRQS / 4, + .len = VGIC_MAX_IRQS / 4, + .bits_per_irq = 2, .handle_mmio = handle_mmio_cfg_reg, }, { @@ -990,6 +1002,22 @@ struct mmio_range *find_matching_range(const struct mmio_range *ranges, return NULL; } +static bool vgic_validate_access(const struct vgic_dist *dist, + const struct mmio_range *range, + unsigned long offset) +{ + int irq; + + if (!range->bits_per_irq) + return true; /* Not an irq-based access */ + + irq = offset * 8 / range->bits_per_irq; + if (irq >= dist->nr_irqs) + return false; + + return true; +} + /** * vgic_handle_mmio - handle an in-kernel MMIO access * @vcpu: pointer to the vcpu performing the access @@ -1029,7 +1057,13 @@ bool vgic_handle_mmio(struct kvm_vcpu *vcpu, struct kvm_run *run, spin_lock(&vcpu->kvm->arch.vgic.lock); offset = mmio->phys_addr - range->base - base; - updated_state = range->handle_mmio(vcpu, mmio, offset); + if (vgic_validate_access(dist, range, offset)) { + updated_state = range->handle_mmio(vcpu, mmio, offset); + } else { + vgic_reg_access(mmio, NULL, offset, + ACCESS_READ_RAZ | ACCESS_WRITE_IGNORED); + updated_state = false; + } spin_unlock(&vcpu->kvm->arch.vgic.lock); kvm_prepare_mmio(run, mmio); kvm_handle_mmio_return(vcpu, run); From ddfab003a2dbff1e226a24860e8f4f91a7d3d131 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 8 Jul 2014 12:09:05 +0100 Subject: [PATCH 1354/1368] arm/arm64: KVM: vgic: kill VGIC_NR_IRQS Nuke VGIC_NR_IRQS entierly, now that the distributor instance contains the number of IRQ allocated to this GIC. Also add VGIC_NR_IRQS_LEGACY to preserve the current API. Reviewed-by: Christoffer Dall Signed-off-by: Marc Zyngier (cherry picked from commit 5fb66da64064d0cb8dcce4cc8bf4cb1b921b13a0) Signed-off-by: Christoffer Dall --- include/kvm/arm_vgic.h | 6 +++--- virt/kvm/arm/vgic.c | 17 +++++++++++------ 2 files changed, 14 insertions(+), 9 deletions(-) diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h index 2767f939f47c..aa20d4a7242f 100644 --- a/include/kvm/arm_vgic.h +++ b/include/kvm/arm_vgic.h @@ -25,7 +25,7 @@ #include #include -#define VGIC_NR_IRQS 256 +#define VGIC_NR_IRQS_LEGACY 256 #define VGIC_NR_SGIS 16 #define VGIC_NR_PPIS 16 #define VGIC_NR_PRIVATE_IRQS (VGIC_NR_SGIS + VGIC_NR_PPIS) @@ -39,11 +39,11 @@ #error Invalid number of CPU interfaces #endif -#if (VGIC_NR_IRQS & 31) +#if (VGIC_NR_IRQS_LEGACY & 31) #error "VGIC_NR_IRQS must be a multiple of 32" #endif -#if (VGIC_NR_IRQS > VGIC_MAX_IRQS) +#if (VGIC_NR_IRQS_LEGACY > VGIC_MAX_IRQS) #error "VGIC_NR_IRQS must be <= 1024" #endif diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c index de975c908301..49501bbc2709 100644 --- a/virt/kvm/arm/vgic.c +++ b/virt/kvm/arm/vgic.c @@ -439,7 +439,7 @@ static bool handle_mmio_misc(struct kvm_vcpu *vcpu, case 4: /* GICD_TYPER */ reg = (atomic_read(&vcpu->kvm->online_vcpus) - 1) << 5; - reg |= (VGIC_NR_IRQS >> 5) - 1; + reg |= (vcpu->kvm->arch.vgic.nr_irqs >> 5) - 1; vgic_reg_access(mmio, ®, word_offset, ACCESS_READ_VALUE | ACCESS_WRITE_IGNORED); break; @@ -1277,13 +1277,14 @@ static void vgic_retire_disabled_irqs(struct kvm_vcpu *vcpu) static bool vgic_queue_irq(struct kvm_vcpu *vcpu, u8 sgi_source_id, int irq) { struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; + struct vgic_dist *dist = &vcpu->kvm->arch.vgic; struct vgic_lr vlr; int lr; /* Sanitize the input... */ BUG_ON(sgi_source_id & ~7); BUG_ON(sgi_source_id && irq >= VGIC_NR_SGIS); - BUG_ON(irq >= VGIC_NR_IRQS); + BUG_ON(irq >= dist->nr_irqs); kvm_debug("Queue IRQ%d\n", irq); @@ -1515,7 +1516,7 @@ static void __kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu) vlr = vgic_get_lr(vcpu, lr); - BUG_ON(vlr.irq >= VGIC_NR_IRQS); + BUG_ON(vlr.irq >= dist->nr_irqs); vgic_cpu->vgic_irq_lr_map[vlr.irq] = LR_EMPTY; } @@ -1737,7 +1738,7 @@ int kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu) if (vcpu->vcpu_id >= dist->nr_cpus) return -EBUSY; - for (i = 0; i < VGIC_NR_IRQS; i++) { + for (i = 0; i < dist->nr_irqs; i++) { if (i < VGIC_NR_PPIS) vgic_bitmap_set_irq_val(&dist->irq_enabled, vcpu->vcpu_id, i, 1); @@ -1802,7 +1803,11 @@ static int vgic_init_maps(struct kvm *kvm) int ret, i; nr_cpus = dist->nr_cpus = KVM_MAX_VCPUS; - nr_irqs = dist->nr_irqs = VGIC_NR_IRQS; + + if (!dist->nr_irqs) + dist->nr_irqs = VGIC_NR_IRQS_LEGACY; + + nr_irqs = dist->nr_irqs; ret = vgic_init_bitmap(&dist->irq_enabled, nr_cpus, nr_irqs); ret |= vgic_init_bitmap(&dist->irq_level, nr_cpus, nr_irqs); @@ -1886,7 +1891,7 @@ int kvm_vgic_init(struct kvm *kvm) goto out; } - for (i = VGIC_NR_PRIVATE_IRQS; i < VGIC_NR_IRQS; i += 4) + for (i = VGIC_NR_PRIVATE_IRQS; i < kvm->arch.vgic.nr_irqs; i += 4) vgic_set_target_reg(kvm, 0, i); kvm->arch.vgic.ready = true; From b9ca28a414c2e5dfb707bcba625eb76f334c787f Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 8 Jul 2014 12:09:06 +0100 Subject: [PATCH 1355/1368] arm/arm64: KVM: vgic: delay vgic allocation until init time It is now quite easy to delay the allocation of the vgic tables until we actually require it to be up and running (when the first vcpu is kicking around, or someones tries to access the GIC registers). This allow us to allocate memory for the exact number of CPUs we have. As nobody configures the number of interrupts just yet, use a fallback to VGIC_NR_IRQS_LEGACY. Reviewed-by: Christoffer Dall Signed-off-by: Marc Zyngier (cherry picked from commit 4956f2bc1fdee4bc336532f3f34635a8534cedfd) Signed-off-by: Christoffer Dall --- arch/arm/kvm/arm.c | 7 ------- include/kvm/arm_vgic.h | 1 - virt/kvm/arm/vgic.c | 42 +++++++++++++++++++++++++++++------------- 3 files changed, 29 insertions(+), 21 deletions(-) diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c index 9e374158363a..072a2084005c 100644 --- a/arch/arm/kvm/arm.c +++ b/arch/arm/kvm/arm.c @@ -261,16 +261,9 @@ int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu) int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) { - int ret; - /* Force users to call KVM_ARM_VCPU_INIT */ vcpu->arch.target = -1; - /* Set up VGIC */ - ret = kvm_vgic_vcpu_init(vcpu); - if (ret) - return ret; - /* Set up the timer */ kvm_timer_vcpu_init(vcpu); diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h index aa20d4a7242f..2f2aac8448a4 100644 --- a/include/kvm/arm_vgic.h +++ b/include/kvm/arm_vgic.h @@ -277,7 +277,6 @@ int kvm_vgic_hyp_init(void); int kvm_vgic_init(struct kvm *kvm); int kvm_vgic_create(struct kvm *kvm); void kvm_vgic_destroy(struct kvm *kvm); -int kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu); void kvm_vgic_vcpu_destroy(struct kvm_vcpu *vcpu); void kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu); void kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu); diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c index 49501bbc2709..e7bca4bb7fd1 100644 --- a/virt/kvm/arm/vgic.c +++ b/virt/kvm/arm/vgic.c @@ -1729,15 +1729,12 @@ static int vgic_vcpu_init_maps(struct kvm_vcpu *vcpu, int nr_irqs) * Initialize the vgic_cpu struct and vgic_dist struct fields pertaining to * this vcpu and enable the VGIC for this VCPU */ -int kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu) +static void kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu) { struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; struct vgic_dist *dist = &vcpu->kvm->arch.vgic; int i; - if (vcpu->vcpu_id >= dist->nr_cpus) - return -EBUSY; - for (i = 0; i < dist->nr_irqs; i++) { if (i < VGIC_NR_PPIS) vgic_bitmap_set_irq_val(&dist->irq_enabled, @@ -1757,8 +1754,6 @@ int kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu) vgic_cpu->nr_lr = vgic->nr_lr; vgic_enable(vcpu); - - return 0; } void kvm_vgic_destroy(struct kvm *kvm) @@ -1802,8 +1797,17 @@ static int vgic_init_maps(struct kvm *kvm) int nr_cpus, nr_irqs; int ret, i; - nr_cpus = dist->nr_cpus = KVM_MAX_VCPUS; + if (dist->nr_cpus) /* Already allocated */ + return 0; + nr_cpus = dist->nr_cpus = atomic_read(&kvm->online_vcpus); + if (!nr_cpus) /* No vcpus? Can't be good... */ + return -EINVAL; + + /* + * If nobody configured the number of interrupts, use the + * legacy one. + */ if (!dist->nr_irqs) dist->nr_irqs = VGIC_NR_IRQS_LEGACY; @@ -1849,6 +1853,9 @@ static int vgic_init_maps(struct kvm *kvm) } } + for (i = VGIC_NR_PRIVATE_IRQS; i < dist->nr_irqs; i += 4) + vgic_set_target_reg(kvm, 0, i); + out: if (ret) kvm_vgic_destroy(kvm); @@ -1867,6 +1874,7 @@ static int vgic_init_maps(struct kvm *kvm) */ int kvm_vgic_init(struct kvm *kvm) { + struct kvm_vcpu *vcpu; int ret = 0, i; if (!irqchip_in_kernel(kvm)) @@ -1884,6 +1892,12 @@ int kvm_vgic_init(struct kvm *kvm) goto out; } + ret = vgic_init_maps(kvm); + if (ret) { + kvm_err("Unable to allocate maps\n"); + goto out; + } + ret = kvm_phys_addr_ioremap(kvm, kvm->arch.vgic.vgic_cpu_base, vgic->vcpu_base, KVM_VGIC_V2_CPU_SIZE); if (ret) { @@ -1891,11 +1905,13 @@ int kvm_vgic_init(struct kvm *kvm) goto out; } - for (i = VGIC_NR_PRIVATE_IRQS; i < kvm->arch.vgic.nr_irqs; i += 4) - vgic_set_target_reg(kvm, 0, i); + kvm_for_each_vcpu(i, vcpu, kvm) + kvm_vgic_vcpu_init(vcpu); kvm->arch.vgic.ready = true; out: + if (ret) + kvm_vgic_destroy(kvm); mutex_unlock(&kvm->lock); return ret; } @@ -1936,10 +1952,6 @@ int kvm_vgic_create(struct kvm *kvm) kvm->arch.vgic.vgic_dist_base = VGIC_ADDR_UNDEF; kvm->arch.vgic.vgic_cpu_base = VGIC_ADDR_UNDEF; - ret = vgic_init_maps(kvm); - if (ret) - kvm_err("Unable to allocate maps\n"); - out_unlock: for (; vcpu_lock_idx >= 0; vcpu_lock_idx--) { vcpu = kvm_get_vcpu(kvm, vcpu_lock_idx); @@ -2140,6 +2152,10 @@ static int vgic_attr_regs_access(struct kvm_device *dev, mutex_lock(&dev->kvm->lock); + ret = vgic_init_maps(dev->kvm); + if (ret) + goto out; + if (cpuid >= atomic_read(&dev->kvm->online_vcpus)) { ret = -EINVAL; goto out; From e1fde0a1e746c70d026c1388c9d1c0bce278ae01 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 8 Jul 2014 12:09:07 +0100 Subject: [PATCH 1356/1368] arm/arm64: KVM: vgic: make number of irqs a configurable attribute In order to make the number of interrupts configurable, use the new fancy device management API to add KVM_DEV_ARM_VGIC_GRP_NR_IRQS as a VGIC configurable attribute. Userspace can now specify the exact size of the GIC (by increments of 32 interrupts). Reviewed-by: Christoffer Dall Signed-off-by: Marc Zyngier (cherry picked from commit a98f26f183801685ef57333de4bafd4bbc692c7c) Signed-off-by: Christoffer Dall --- .../virtual/kvm/devices/arm-vgic.txt | 10 +++++ arch/arm/include/uapi/asm/kvm.h | 1 + arch/arm64/include/uapi/asm/kvm.h | 1 + virt/kvm/arm/vgic.c | 37 +++++++++++++++++++ 4 files changed, 49 insertions(+) diff --git a/Documentation/virtual/kvm/devices/arm-vgic.txt b/Documentation/virtual/kvm/devices/arm-vgic.txt index 7f4e91b1316b..df8b0c7540b6 100644 --- a/Documentation/virtual/kvm/devices/arm-vgic.txt +++ b/Documentation/virtual/kvm/devices/arm-vgic.txt @@ -71,3 +71,13 @@ Groups: Errors: -ENODEV: Getting or setting this register is not yet supported -EBUSY: One or more VCPUs are running + + KVM_DEV_ARM_VGIC_GRP_NR_IRQS + Attributes: + A value describing the number of interrupts (SGI, PPI and SPI) for + this GIC instance, ranging from 64 to 1024, in increments of 32. + + Errors: + -EINVAL: Value set is out of the expected range + -EBUSY: Value has already be set, or GIC has already been initialized + with default values. diff --git a/arch/arm/include/uapi/asm/kvm.h b/arch/arm/include/uapi/asm/kvm.h index 51257fda254b..09ee408c1a67 100644 --- a/arch/arm/include/uapi/asm/kvm.h +++ b/arch/arm/include/uapi/asm/kvm.h @@ -174,6 +174,7 @@ struct kvm_arch_memory_slot { #define KVM_DEV_ARM_VGIC_CPUID_MASK (0xffULL << KVM_DEV_ARM_VGIC_CPUID_SHIFT) #define KVM_DEV_ARM_VGIC_OFFSET_SHIFT 0 #define KVM_DEV_ARM_VGIC_OFFSET_MASK (0xffffffffULL << KVM_DEV_ARM_VGIC_OFFSET_SHIFT) +#define KVM_DEV_ARM_VGIC_GRP_NR_IRQS 3 /* KVM_IRQ_LINE irq field index values */ #define KVM_ARM_IRQ_TYPE_SHIFT 24 diff --git a/arch/arm64/include/uapi/asm/kvm.h b/arch/arm64/include/uapi/asm/kvm.h index f4ec5a674d05..8e38878c87c6 100644 --- a/arch/arm64/include/uapi/asm/kvm.h +++ b/arch/arm64/include/uapi/asm/kvm.h @@ -160,6 +160,7 @@ struct kvm_arch_memory_slot { #define KVM_DEV_ARM_VGIC_CPUID_MASK (0xffULL << KVM_DEV_ARM_VGIC_CPUID_SHIFT) #define KVM_DEV_ARM_VGIC_OFFSET_SHIFT 0 #define KVM_DEV_ARM_VGIC_OFFSET_MASK (0xffffffffULL << KVM_DEV_ARM_VGIC_OFFSET_SHIFT) +#define KVM_DEV_ARM_VGIC_GRP_NR_IRQS 3 /* KVM_IRQ_LINE irq field index values */ #define KVM_ARM_IRQ_TYPE_SHIFT 24 diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c index e7bca4bb7fd1..43b56c696752 100644 --- a/virt/kvm/arm/vgic.c +++ b/virt/kvm/arm/vgic.c @@ -2253,6 +2253,36 @@ static int vgic_set_attr(struct kvm_device *dev, struct kvm_device_attr *attr) return vgic_attr_regs_access(dev, attr, ®, true); } + case KVM_DEV_ARM_VGIC_GRP_NR_IRQS: { + u32 __user *uaddr = (u32 __user *)(long)attr->addr; + u32 val; + int ret = 0; + + if (get_user(val, uaddr)) + return -EFAULT; + + /* + * We require: + * - at least 32 SPIs on top of the 16 SGIs and 16 PPIs + * - at most 1024 interrupts + * - a multiple of 32 interrupts + */ + if (val < (VGIC_NR_PRIVATE_IRQS + 32) || + val > VGIC_MAX_IRQS || + (val & 31)) + return -EINVAL; + + mutex_lock(&dev->kvm->lock); + + if (vgic_initialized(dev->kvm) || dev->kvm->arch.vgic.nr_irqs) + ret = -EBUSY; + else + dev->kvm->arch.vgic.nr_irqs = val; + + mutex_unlock(&dev->kvm->lock); + + return ret; + } } @@ -2289,6 +2319,11 @@ static int vgic_get_attr(struct kvm_device *dev, struct kvm_device_attr *attr) r = put_user(reg, uaddr); break; } + case KVM_DEV_ARM_VGIC_GRP_NR_IRQS: { + u32 __user *uaddr = (u32 __user *)(long)attr->addr; + r = put_user(dev->kvm->arch.vgic.nr_irqs, uaddr); + break; + } } @@ -2325,6 +2360,8 @@ static int vgic_has_attr(struct kvm_device *dev, struct kvm_device_attr *attr) case KVM_DEV_ARM_VGIC_GRP_CPU_REGS: offset = attr->attr & KVM_DEV_ARM_VGIC_OFFSET_MASK; return vgic_has_attr_regs(vgic_cpu_ranges, offset); + case KVM_DEV_ARM_VGIC_GRP_NR_IRQS: + return 0; } return -ENXIO; } From b6c20297368fc6c782c0e5709f0d261bfdee2f6e Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Thu, 25 Sep 2014 18:41:07 +0200 Subject: [PATCH 1357/1368] arm/arm64: KVM: Fix set_clear_sgi_pend_reg offset The sgi values calculated in read_set_clear_sgi_pend_reg() and write_set_clear_sgi_pend_reg() were horribly incorrectly multiplied by 4 with catastrophic results in that subfunctions ended up overwriting memory not allocated for the expected purpose. This showed up as bugs in kfree() and the kernel complaining a lot of you turn on memory debugging. This addresses: http://marc.info/?l=kvm&m=141164910007868&w=2 Reported-by: Shannon Zhao Signed-off-by: Christoffer Dall (cherry picked from commit 0fea6d7628ed6e25a9ee1b67edf7c859718d39e8) Signed-off-by: Christoffer Dall --- virt/kvm/arm/vgic.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c index 43b56c696752..506693152e47 100644 --- a/virt/kvm/arm/vgic.c +++ b/virt/kvm/arm/vgic.c @@ -816,7 +816,7 @@ static bool read_set_clear_sgi_pend_reg(struct kvm_vcpu *vcpu, { struct vgic_dist *dist = &vcpu->kvm->arch.vgic; int sgi; - int min_sgi = (offset & ~0x3) * 4; + int min_sgi = (offset & ~0x3); int max_sgi = min_sgi + 3; int vcpu_id = vcpu->vcpu_id; u32 reg = 0; @@ -837,7 +837,7 @@ static bool write_set_clear_sgi_pend_reg(struct kvm_vcpu *vcpu, { struct vgic_dist *dist = &vcpu->kvm->arch.vgic; int sgi; - int min_sgi = (offset & ~0x3) * 4; + int min_sgi = (offset & ~0x3); int max_sgi = min_sgi + 3; int vcpu_id = vcpu->vcpu_id; u32 reg; From 17434ac66579bb31d6a129edb7dec7989b0a7a37 Mon Sep 17 00:00:00 2001 From: Joel Schopp Date: Wed, 9 Jul 2014 11:17:04 -0500 Subject: [PATCH 1358/1368] arm/arm64: KVM: Fix VTTBR_BADDR_MASK and pgd alloc The current aarch64 calculation for VTTBR_BADDR_MASK masks only 39 bits and not all the bits in the PA range. This is clearly a bug that manifests itself on systems that allocate memory in the higher address space range. [ Modified from Joel's original patch to be based on PHYS_MASK_SHIFT instead of a hard-coded value and to move the alignment check of the allocation to mmu.c. Also added a comment explaining why we hardcode the IPA range and changed the stage-2 pgd allocation to be based on the 40 bit IPA range instead of the maximum possible 48 bit PA range. - Christoffer ] Reviewed-by: Catalin Marinas Signed-off-by: Joel Schopp Signed-off-by: Christoffer Dall (cherry picked from commit dbff124e29fa24aff9705b354b5f4648cd96e0bb) Signed-off-by: Christoffer Dall --- arch/arm/kvm/arm.c | 4 ++-- arch/arm64/include/asm/kvm_arm.h | 13 ++++++++++++- arch/arm64/include/asm/kvm_mmu.h | 5 ++--- 3 files changed, 16 insertions(+), 6 deletions(-) diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c index 072a2084005c..15111ca6fe30 100644 --- a/arch/arm/kvm/arm.c +++ b/arch/arm/kvm/arm.c @@ -410,9 +410,9 @@ static void update_vttbr(struct kvm *kvm) /* update vttbr to be used with the new vmid */ pgd_phys = virt_to_phys(kvm->arch.pgd); + BUG_ON(pgd_phys & ~VTTBR_BADDR_MASK); vmid = ((u64)(kvm->arch.vmid) << VTTBR_VMID_SHIFT) & VTTBR_VMID_MASK; - kvm->arch.vttbr = pgd_phys & VTTBR_BADDR_MASK; - kvm->arch.vttbr |= vmid; + kvm->arch.vttbr = pgd_phys | vmid; spin_unlock(&kvm_vmid_lock); } diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h index cc83520459ed..7fd3e27e3ccc 100644 --- a/arch/arm64/include/asm/kvm_arm.h +++ b/arch/arm64/include/asm/kvm_arm.h @@ -122,6 +122,17 @@ #define VTCR_EL2_T0SZ_MASK 0x3f #define VTCR_EL2_T0SZ_40B 24 +/* + * We configure the Stage-2 page tables to always restrict the IPA space to be + * 40 bits wide (T0SZ = 24). Systems with a PARange smaller than 40 bits are + * not known to exist and will break with this configuration. + * + * Note that when using 4K pages, we concatenate two first level page tables + * together. + * + * The magic numbers used for VTTBR_X in this patch can be found in Tables + * D4-23 and D4-25 in ARM DDI 0487A.b. + */ #ifdef CONFIG_ARM64_64K_PAGES /* * Stage2 translation configuration: @@ -149,7 +160,7 @@ #endif #define VTTBR_BADDR_SHIFT (VTTBR_X - 1) -#define VTTBR_BADDR_MASK (((1LLU << (40 - VTTBR_X)) - 1) << VTTBR_BADDR_SHIFT) +#define VTTBR_BADDR_MASK (((1LLU << (PHYS_MASK_SHIFT - VTTBR_X)) - 1) << VTTBR_BADDR_SHIFT) #define VTTBR_VMID_SHIFT (48LLU) #define VTTBR_VMID_MASK (0xffLLU << VTTBR_VMID_SHIFT) diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h index 737da742b293..a030d163840b 100644 --- a/arch/arm64/include/asm/kvm_mmu.h +++ b/arch/arm64/include/asm/kvm_mmu.h @@ -59,10 +59,9 @@ #define KERN_TO_HYP(kva) ((unsigned long)kva - PAGE_OFFSET + HYP_PAGE_OFFSET) /* - * Align KVM with the kernel's view of physical memory. Should be - * 40bit IPA, with PGD being 8kB aligned in the 4KB page configuration. + * We currently only support a 40bit IPA. */ -#define KVM_PHYS_SHIFT PHYS_MASK_SHIFT +#define KVM_PHYS_SHIFT (40) #define KVM_PHYS_SIZE (1UL << KVM_PHYS_SHIFT) #define KVM_PHYS_MASK (KVM_PHYS_SIZE - 1UL) From 7f79ef21e90a97b535ca0f181929754cd72bda10 Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Fri, 26 Sep 2014 12:29:34 +0200 Subject: [PATCH 1359/1368] arm/arm64: KVM: Report correct FSC for unsupported fault types When we catch something that's not a permission fault or a translation fault, we log the unsupported FSC in the kernel log, but we were masking off the bottom bits of the FSC which was not very helpful. Also correctly report the FSC for data and instruction faults rather than telling people it was a DFCS, which doesn't exist in the ARM ARM. Reviewed-by: Peter Maydell Signed-off-by: Christoffer Dall (cherry picked from commit 0496daa5cf99741ce8db82686b4c7446a37feabb) Signed-off-by: Christoffer Dall --- arch/arm/include/asm/kvm_emulate.h | 5 +++++ arch/arm/kvm/mmu.c | 8 +++++--- arch/arm64/include/asm/kvm_emulate.h | 5 +++++ 3 files changed, 15 insertions(+), 3 deletions(-) diff --git a/arch/arm/include/asm/kvm_emulate.h b/arch/arm/include/asm/kvm_emulate.h index 69b746955fca..b9db269c6e61 100644 --- a/arch/arm/include/asm/kvm_emulate.h +++ b/arch/arm/include/asm/kvm_emulate.h @@ -148,6 +148,11 @@ static inline bool kvm_vcpu_trap_is_iabt(struct kvm_vcpu *vcpu) } static inline u8 kvm_vcpu_trap_get_fault(struct kvm_vcpu *vcpu) +{ + return kvm_vcpu_get_hsr(vcpu) & HSR_FSC; +} + +static inline u8 kvm_vcpu_trap_get_fault_type(struct kvm_vcpu *vcpu) { return kvm_vcpu_get_hsr(vcpu) & HSR_FSC_TYPE; } diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c index bb06f76a8f89..eea03069161b 100644 --- a/arch/arm/kvm/mmu.c +++ b/arch/arm/kvm/mmu.c @@ -882,10 +882,12 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run) kvm_vcpu_get_hfar(vcpu), fault_ipa); /* Check the stage-2 fault is trans. fault or write fault */ - fault_status = kvm_vcpu_trap_get_fault(vcpu); + fault_status = kvm_vcpu_trap_get_fault_type(vcpu); if (fault_status != FSC_FAULT && fault_status != FSC_PERM) { - kvm_err("Unsupported fault status: EC=%#x DFCS=%#lx\n", - kvm_vcpu_trap_get_class(vcpu), fault_status); + kvm_err("Unsupported FSC: EC=%#x xFSC=%#lx ESR_EL2=%#lx\n", + kvm_vcpu_trap_get_class(vcpu), + (unsigned long)kvm_vcpu_trap_get_fault(vcpu), + (unsigned long)kvm_vcpu_get_hsr(vcpu)); return -EFAULT; } diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h index fdc3e21abd8d..5674a55b5518 100644 --- a/arch/arm64/include/asm/kvm_emulate.h +++ b/arch/arm64/include/asm/kvm_emulate.h @@ -173,6 +173,11 @@ static inline bool kvm_vcpu_trap_is_iabt(const struct kvm_vcpu *vcpu) } static inline u8 kvm_vcpu_trap_get_fault(const struct kvm_vcpu *vcpu) +{ + return kvm_vcpu_get_hsr(vcpu) & ESR_EL2_FSC; +} + +static inline u8 kvm_vcpu_trap_get_fault_type(const struct kvm_vcpu *vcpu) { return kvm_vcpu_get_hsr(vcpu) & ESR_EL2_FSC_TYPE; } From 2dc5e2cf90915059fbc39ef380605aef6adaef47 Mon Sep 17 00:00:00 2001 From: Vladimir Murzin Date: Mon, 22 Sep 2014 15:52:48 +0100 Subject: [PATCH 1360/1368] arm: kvm: fix CPU hotplug On some platforms with no power management capabilities, the hotplug implementation is allowed to return from a smp_ops.cpu_die() call as a function return. Upon a CPU onlining event, the KVM CPU notifier tries to reinstall the hyp stub, which fails on platform where no reset took place following a hotplug event, with the message: CPU1: smp_ops.cpu_die() returned, trying to resuscitate CPU1: Booted secondary processor Kernel panic - not syncing: unexpected prefetch abort in Hyp mode at: 0x80409540 unexpected data abort in Hyp mode at: 0x80401fe8 unexpected HVC/SVC trap in Hyp mode at: 0x805c6170 since KVM code is trying to reinstall the stub on a system where it is already configured. To prevent this issue, this patch adds a check in the KVM hotplug notifier that detects if the HYP stub really needs re-installing when a CPU is onlined and skips the installation call if the stub is already in place, which means that the CPU has not been reset. Signed-off-by: Vladimir Murzin Acked-by: Lorenzo Pieralisi Acked-by: Marc Zyngier Signed-off-by: Christoffer Dall (cherry picked from commit 37a34ac1d4775aafbc73b9db53c7daebbbc67e6a) Signed-off-by: Christoffer Dall --- arch/arm/kvm/arm.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c index 15111ca6fe30..d0c8ee654bbf 100644 --- a/arch/arm/kvm/arm.c +++ b/arch/arm/kvm/arm.c @@ -808,7 +808,8 @@ static int hyp_init_cpu_notify(struct notifier_block *self, switch (action) { case CPU_STARTING: case CPU_STARTING_FROZEN: - cpu_init_hyp_mode(NULL); + if (__hyp_get_vectors() == hyp_default_vectors) + cpu_init_hyp_mode(NULL); break; } From ad4ef3e73e40166a585d71f36a521f9f6d5db3c4 Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Thu, 2 Oct 2014 11:53:28 +0200 Subject: [PATCH 1361/1368] Revert "arm, kvm: fix double lock on cpu_add_remove_lock" This reverts commit d77503eadd2f16f2900b9be79a1dc6f37e8cd579. The whole register cpu hotplug fix series has not been applied, so LSK is released without this fix. If we ever include that series in LSK later, then this can be fixed later too. Signed-off-by: Ming Lei Reviewed-by: Srivatsa S. Bhat Signed-off-by: Rafael J. Wysocki (cherry picked from commit 553f809e23f00976caea7a1ebdabaa58a6383e7d) Signed-off-by: Christoffer Dall --- virt/kvm/arm/arch_timer.c | 2 +- virt/kvm/arm/vgic.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/virt/kvm/arm/arch_timer.c b/virt/kvm/arm/arch_timer.c index 22fa819a9b6a..5081e809821f 100644 --- a/virt/kvm/arm/arch_timer.c +++ b/virt/kvm/arm/arch_timer.c @@ -277,7 +277,7 @@ int kvm_timer_hyp_init(void) host_vtimer_irq = ppi; - err = __register_cpu_notifier(&kvm_timer_cpu_nb); + err = register_cpu_notifier(&kvm_timer_cpu_nb); if (err) { kvm_err("Cannot register timer CPU notifier\n"); goto out_free; diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c index 506693152e47..8e1dc03342c3 100644 --- a/virt/kvm/arm/vgic.c +++ b/virt/kvm/arm/vgic.c @@ -2444,7 +2444,7 @@ int kvm_vgic_hyp_init(void) return ret; } - ret = __register_cpu_notifier(&vgic_cpu_nb); + ret = register_cpu_notifier(&vgic_cpu_nb); if (ret) { kvm_err("Cannot register vgic CPU notifier\n"); goto out_free_irq; From 7ebda0194ff1872ef36d93cf309cc4043ffba4ee Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Wed, 3 Jul 2013 15:02:11 -0700 Subject: [PATCH 1362/1368] include/linux/mm.h: add PAGE_ALIGNED() helper To test whether an address is aligned to PAGE_SIZE. Cc: HATAYAMA Daisuke Cc: "Eric W. Biederman" , Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds (cherry picked from commit 0fa73b86ef0797ca4fde5334117ca0b330f08030) Signed-off-by: Christoffer Dall --- include/linux/mm.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/include/linux/mm.h b/include/linux/mm.h index e0c8528a41a4..f42c5baa47cc 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -52,6 +52,9 @@ extern unsigned long sysctl_admin_reserve_kbytes; /* to align the pointer to the (next) page boundary */ #define PAGE_ALIGN(addr) ALIGN(addr, PAGE_SIZE) +/* test whether an address (unsigned long or pointer) is aligned to PAGE_SIZE */ +#define PAGE_ALIGNED(addr) IS_ALIGNED((unsigned long)addr, PAGE_SIZE) + /* * Linux kernel virtual memory manager primitives. * The idea being to have a "virtual" mm in the same way From 8032ebeefc762abdcc8f14f08e9e1d3839996063 Mon Sep 17 00:00:00 2001 From: Victor Kamensky Date: Tue, 25 Feb 2014 08:41:09 +0100 Subject: [PATCH 1363/1368] ARM: 7990/1: asm: rename logical shift macros push pull into lspush lspull Renames logical shift macros, 'push' and 'pull', defined in arch/arm/include/asm/assembler.h, into 'lspush' and 'lspull'. That eliminates name conflict between 'push' logical shift macro and 'push' instruction mnemonic. That allows assembler.h to be included in .S files that use 'push' instruction. Suggested-by: Will Deacon Signed-off-by: Victor Kamensky Acked-by: Nicolas Pitre Signed-off-by: Russell King (cherry picked from commit d98b90ea22b0a28d9d787769704a9cf1ea5a513a) Signed-off-by: Christoffer Dall --- arch/arm/include/asm/assembler.h | 8 +- arch/arm/lib/copy_template.S | 36 ++--- arch/arm/lib/csumpartialcopygeneric.S | 96 ++++++------- arch/arm/lib/io-readsl.S | 12 +- arch/arm/lib/io-writesl.S | 12 +- arch/arm/lib/memmove.S | 36 ++--- arch/arm/lib/uaccess.S | 192 +++++++++++++------------- 7 files changed, 196 insertions(+), 196 deletions(-) diff --git a/arch/arm/include/asm/assembler.h b/arch/arm/include/asm/assembler.h index 05ee9eebad6b..9271457adac8 100644 --- a/arch/arm/include/asm/assembler.h +++ b/arch/arm/include/asm/assembler.h @@ -30,8 +30,8 @@ * Endian independent macros for shifting bytes within registers. */ #ifndef __ARMEB__ -#define pull lsr -#define push lsl +#define lspull lsr +#define lspush lsl #define get_byte_0 lsl #0 #define get_byte_1 lsr #8 #define get_byte_2 lsr #16 @@ -41,8 +41,8 @@ #define put_byte_2 lsl #16 #define put_byte_3 lsl #24 #else -#define pull lsl -#define push lsr +#define lspull lsl +#define lspush lsr #define get_byte_0 lsr #24 #define get_byte_1 lsr #16 #define get_byte_2 lsr #8 diff --git a/arch/arm/lib/copy_template.S b/arch/arm/lib/copy_template.S index 805e3f8fb007..3bc8eb811a73 100644 --- a/arch/arm/lib/copy_template.S +++ b/arch/arm/lib/copy_template.S @@ -197,24 +197,24 @@ 12: PLD( pld [r1, #124] ) 13: ldr4w r1, r4, r5, r6, r7, abort=19f - mov r3, lr, pull #\pull + mov r3, lr, lspull #\pull subs r2, r2, #32 ldr4w r1, r8, r9, ip, lr, abort=19f - orr r3, r3, r4, push #\push - mov r4, r4, pull #\pull - orr r4, r4, r5, push #\push - mov r5, r5, pull #\pull - orr r5, r5, r6, push #\push - mov r6, r6, pull #\pull - orr r6, r6, r7, push #\push - mov r7, r7, pull #\pull - orr r7, r7, r8, push #\push - mov r8, r8, pull #\pull - orr r8, r8, r9, push #\push - mov r9, r9, pull #\pull - orr r9, r9, ip, push #\push - mov ip, ip, pull #\pull - orr ip, ip, lr, push #\push + orr r3, r3, r4, lspush #\push + mov r4, r4, lspull #\pull + orr r4, r4, r5, lspush #\push + mov r5, r5, lspull #\pull + orr r5, r5, r6, lspush #\push + mov r6, r6, lspull #\pull + orr r6, r6, r7, lspush #\push + mov r7, r7, lspull #\pull + orr r7, r7, r8, lspush #\push + mov r8, r8, lspull #\pull + orr r8, r8, r9, lspush #\push + mov r9, r9, lspull #\pull + orr r9, r9, ip, lspush #\push + mov ip, ip, lspull #\pull + orr ip, ip, lr, lspush #\push str8w r0, r3, r4, r5, r6, r7, r8, r9, ip, , abort=19f bge 12b PLD( cmn r2, #96 ) @@ -225,10 +225,10 @@ 14: ands ip, r2, #28 beq 16f -15: mov r3, lr, pull #\pull +15: mov r3, lr, lspull #\pull ldr1w r1, lr, abort=21f subs ip, ip, #4 - orr r3, r3, lr, push #\push + orr r3, r3, lr, lspush #\push str1w r0, r3, abort=21f bgt 15b CALGN( cmp r2, #0 ) diff --git a/arch/arm/lib/csumpartialcopygeneric.S b/arch/arm/lib/csumpartialcopygeneric.S index d620a5f22a09..d6e742d24007 100644 --- a/arch/arm/lib/csumpartialcopygeneric.S +++ b/arch/arm/lib/csumpartialcopygeneric.S @@ -141,7 +141,7 @@ FN_ENTRY tst len, #2 mov r5, r4, get_byte_0 beq .Lexit - adcs sum, sum, r4, push #16 + adcs sum, sum, r4, lspush #16 strb r5, [dst], #1 mov r5, r4, get_byte_1 strb r5, [dst], #1 @@ -171,23 +171,23 @@ FN_ENTRY cmp ip, #2 beq .Lsrc2_aligned bhi .Lsrc3_aligned - mov r4, r5, pull #8 @ C = 0 + mov r4, r5, lspull #8 @ C = 0 bics ip, len, #15 beq 2f 1: load4l r5, r6, r7, r8 - orr r4, r4, r5, push #24 - mov r5, r5, pull #8 - orr r5, r5, r6, push #24 - mov r6, r6, pull #8 - orr r6, r6, r7, push #24 - mov r7, r7, pull #8 - orr r7, r7, r8, push #24 + orr r4, r4, r5, lspush #24 + mov r5, r5, lspull #8 + orr r5, r5, r6, lspush #24 + mov r6, r6, lspull #8 + orr r6, r6, r7, lspush #24 + mov r7, r7, lspull #8 + orr r7, r7, r8, lspush #24 stmia dst!, {r4, r5, r6, r7} adcs sum, sum, r4 adcs sum, sum, r5 adcs sum, sum, r6 adcs sum, sum, r7 - mov r4, r8, pull #8 + mov r4, r8, lspull #8 sub ip, ip, #16 teq ip, #0 bne 1b @@ -196,50 +196,50 @@ FN_ENTRY tst ip, #8 beq 3f load2l r5, r6 - orr r4, r4, r5, push #24 - mov r5, r5, pull #8 - orr r5, r5, r6, push #24 + orr r4, r4, r5, lspush #24 + mov r5, r5, lspull #8 + orr r5, r5, r6, lspush #24 stmia dst!, {r4, r5} adcs sum, sum, r4 adcs sum, sum, r5 - mov r4, r6, pull #8 + mov r4, r6, lspull #8 tst ip, #4 beq 4f 3: load1l r5 - orr r4, r4, r5, push #24 + orr r4, r4, r5, lspush #24 str r4, [dst], #4 adcs sum, sum, r4 - mov r4, r5, pull #8 + mov r4, r5, lspull #8 4: ands len, len, #3 beq .Ldone mov r5, r4, get_byte_0 tst len, #2 beq .Lexit - adcs sum, sum, r4, push #16 + adcs sum, sum, r4, lspush #16 strb r5, [dst], #1 mov r5, r4, get_byte_1 strb r5, [dst], #1 mov r5, r4, get_byte_2 b .Lexit -.Lsrc2_aligned: mov r4, r5, pull #16 +.Lsrc2_aligned: mov r4, r5, lspull #16 adds sum, sum, #0 bics ip, len, #15 beq 2f 1: load4l r5, r6, r7, r8 - orr r4, r4, r5, push #16 - mov r5, r5, pull #16 - orr r5, r5, r6, push #16 - mov r6, r6, pull #16 - orr r6, r6, r7, push #16 - mov r7, r7, pull #16 - orr r7, r7, r8, push #16 + orr r4, r4, r5, lspush #16 + mov r5, r5, lspull #16 + orr r5, r5, r6, lspush #16 + mov r6, r6, lspull #16 + orr r6, r6, r7, lspush #16 + mov r7, r7, lspull #16 + orr r7, r7, r8, lspush #16 stmia dst!, {r4, r5, r6, r7} adcs sum, sum, r4 adcs sum, sum, r5 adcs sum, sum, r6 adcs sum, sum, r7 - mov r4, r8, pull #16 + mov r4, r8, lspull #16 sub ip, ip, #16 teq ip, #0 bne 1b @@ -248,20 +248,20 @@ FN_ENTRY tst ip, #8 beq 3f load2l r5, r6 - orr r4, r4, r5, push #16 - mov r5, r5, pull #16 - orr r5, r5, r6, push #16 + orr r4, r4, r5, lspush #16 + mov r5, r5, lspull #16 + orr r5, r5, r6, lspush #16 stmia dst!, {r4, r5} adcs sum, sum, r4 adcs sum, sum, r5 - mov r4, r6, pull #16 + mov r4, r6, lspull #16 tst ip, #4 beq 4f 3: load1l r5 - orr r4, r4, r5, push #16 + orr r4, r4, r5, lspush #16 str r4, [dst], #4 adcs sum, sum, r4 - mov r4, r5, pull #16 + mov r4, r5, lspull #16 4: ands len, len, #3 beq .Ldone mov r5, r4, get_byte_0 @@ -276,24 +276,24 @@ FN_ENTRY load1b r5 b .Lexit -.Lsrc3_aligned: mov r4, r5, pull #24 +.Lsrc3_aligned: mov r4, r5, lspull #24 adds sum, sum, #0 bics ip, len, #15 beq 2f 1: load4l r5, r6, r7, r8 - orr r4, r4, r5, push #8 - mov r5, r5, pull #24 - orr r5, r5, r6, push #8 - mov r6, r6, pull #24 - orr r6, r6, r7, push #8 - mov r7, r7, pull #24 - orr r7, r7, r8, push #8 + orr r4, r4, r5, lspush #8 + mov r5, r5, lspull #24 + orr r5, r5, r6, lspush #8 + mov r6, r6, lspull #24 + orr r6, r6, r7, lspush #8 + mov r7, r7, lspull #24 + orr r7, r7, r8, lspush #8 stmia dst!, {r4, r5, r6, r7} adcs sum, sum, r4 adcs sum, sum, r5 adcs sum, sum, r6 adcs sum, sum, r7 - mov r4, r8, pull #24 + mov r4, r8, lspull #24 sub ip, ip, #16 teq ip, #0 bne 1b @@ -302,20 +302,20 @@ FN_ENTRY tst ip, #8 beq 3f load2l r5, r6 - orr r4, r4, r5, push #8 - mov r5, r5, pull #24 - orr r5, r5, r6, push #8 + orr r4, r4, r5, lspush #8 + mov r5, r5, lspull #24 + orr r5, r5, r6, lspush #8 stmia dst!, {r4, r5} adcs sum, sum, r4 adcs sum, sum, r5 - mov r4, r6, pull #24 + mov r4, r6, lspull #24 tst ip, #4 beq 4f 3: load1l r5 - orr r4, r4, r5, push #8 + orr r4, r4, r5, lspush #8 str r4, [dst], #4 adcs sum, sum, r4 - mov r4, r5, pull #24 + mov r4, r5, lspull #24 4: ands len, len, #3 beq .Ldone mov r5, r4, get_byte_0 @@ -326,7 +326,7 @@ FN_ENTRY load1l r4 mov r5, r4, get_byte_0 strb r5, [dst], #1 - adcs sum, sum, r4, push #24 + adcs sum, sum, r4, lspush #24 mov r5, r4, get_byte_1 b .Lexit FN_EXIT diff --git a/arch/arm/lib/io-readsl.S b/arch/arm/lib/io-readsl.S index 5fb97e7f9f4b..7a7430950c79 100644 --- a/arch/arm/lib/io-readsl.S +++ b/arch/arm/lib/io-readsl.S @@ -47,25 +47,25 @@ ENTRY(__raw_readsl) strb ip, [r1], #1 4: subs r2, r2, #1 - mov ip, r3, pull #24 + mov ip, r3, lspull #24 ldrne r3, [r0] - orrne ip, ip, r3, push #8 + orrne ip, ip, r3, lspush #8 strne ip, [r1], #4 bne 4b b 8f 5: subs r2, r2, #1 - mov ip, r3, pull #16 + mov ip, r3, lspull #16 ldrne r3, [r0] - orrne ip, ip, r3, push #16 + orrne ip, ip, r3, lspush #16 strne ip, [r1], #4 bne 5b b 7f 6: subs r2, r2, #1 - mov ip, r3, pull #8 + mov ip, r3, lspull #8 ldrne r3, [r0] - orrne ip, ip, r3, push #24 + orrne ip, ip, r3, lspush #24 strne ip, [r1], #4 bne 6b diff --git a/arch/arm/lib/io-writesl.S b/arch/arm/lib/io-writesl.S index 8d3b7813725c..d0d104a0dd11 100644 --- a/arch/arm/lib/io-writesl.S +++ b/arch/arm/lib/io-writesl.S @@ -41,26 +41,26 @@ ENTRY(__raw_writesl) blt 5f bgt 6f -4: mov ip, r3, pull #16 +4: mov ip, r3, lspull #16 ldr r3, [r1], #4 subs r2, r2, #1 - orr ip, ip, r3, push #16 + orr ip, ip, r3, lspush #16 str ip, [r0] bne 4b mov pc, lr -5: mov ip, r3, pull #8 +5: mov ip, r3, lspull #8 ldr r3, [r1], #4 subs r2, r2, #1 - orr ip, ip, r3, push #24 + orr ip, ip, r3, lspush #24 str ip, [r0] bne 5b mov pc, lr -6: mov ip, r3, pull #24 +6: mov ip, r3, lspull #24 ldr r3, [r1], #4 subs r2, r2, #1 - orr ip, ip, r3, push #8 + orr ip, ip, r3, lspush #8 str ip, [r0] bne 6b mov pc, lr diff --git a/arch/arm/lib/memmove.S b/arch/arm/lib/memmove.S index 938fc14f962d..d1fc0c0c342c 100644 --- a/arch/arm/lib/memmove.S +++ b/arch/arm/lib/memmove.S @@ -147,24 +147,24 @@ ENTRY(memmove) 12: PLD( pld [r1, #-128] ) 13: ldmdb r1!, {r7, r8, r9, ip} - mov lr, r3, push #\push + mov lr, r3, lspush #\push subs r2, r2, #32 ldmdb r1!, {r3, r4, r5, r6} - orr lr, lr, ip, pull #\pull - mov ip, ip, push #\push - orr ip, ip, r9, pull #\pull - mov r9, r9, push #\push - orr r9, r9, r8, pull #\pull - mov r8, r8, push #\push - orr r8, r8, r7, pull #\pull - mov r7, r7, push #\push - orr r7, r7, r6, pull #\pull - mov r6, r6, push #\push - orr r6, r6, r5, pull #\pull - mov r5, r5, push #\push - orr r5, r5, r4, pull #\pull - mov r4, r4, push #\push - orr r4, r4, r3, pull #\pull + orr lr, lr, ip, lspull #\pull + mov ip, ip, lspush #\push + orr ip, ip, r9, lspull #\pull + mov r9, r9, lspush #\push + orr r9, r9, r8, lspull #\pull + mov r8, r8, lspush #\push + orr r8, r8, r7, lspull #\pull + mov r7, r7, lspush #\push + orr r7, r7, r6, lspull #\pull + mov r6, r6, lspush #\push + orr r6, r6, r5, lspull #\pull + mov r5, r5, lspush #\push + orr r5, r5, r4, lspull #\pull + mov r4, r4, lspush #\push + orr r4, r4, r3, lspull #\pull stmdb r0!, {r4 - r9, ip, lr} bge 12b PLD( cmn r2, #96 ) @@ -175,10 +175,10 @@ ENTRY(memmove) 14: ands ip, r2, #28 beq 16f -15: mov lr, r3, push #\push +15: mov lr, r3, lspush #\push ldr r3, [r1, #-4]! subs ip, ip, #4 - orr lr, lr, r3, pull #\pull + orr lr, lr, r3, lspull #\pull str lr, [r0, #-4]! bgt 15b CALGN( cmp r2, #0 ) diff --git a/arch/arm/lib/uaccess.S b/arch/arm/lib/uaccess.S index 5c908b1cb8ed..e50520904b76 100644 --- a/arch/arm/lib/uaccess.S +++ b/arch/arm/lib/uaccess.S @@ -117,9 +117,9 @@ USER( TUSER( strgtb) r3, [r0], #1) @ May fault .Lc2u_1fupi: subs r2, r2, #4 addmi ip, r2, #4 bmi .Lc2u_1nowords - mov r3, r7, pull #8 + mov r3, r7, lspull #8 ldr r7, [r1], #4 - orr r3, r3, r7, push #24 + orr r3, r3, r7, lspush #24 USER( TUSER( str) r3, [r0], #4) @ May fault mov ip, r0, lsl #32 - PAGE_SHIFT rsb ip, ip, #0 @@ -131,30 +131,30 @@ USER( TUSER( str) r3, [r0], #4) @ May fault subs ip, ip, #16 blt .Lc2u_1rem8lp -.Lc2u_1cpy8lp: mov r3, r7, pull #8 +.Lc2u_1cpy8lp: mov r3, r7, lspull #8 ldmia r1!, {r4 - r7} subs ip, ip, #16 - orr r3, r3, r4, push #24 - mov r4, r4, pull #8 - orr r4, r4, r5, push #24 - mov r5, r5, pull #8 - orr r5, r5, r6, push #24 - mov r6, r6, pull #8 - orr r6, r6, r7, push #24 + orr r3, r3, r4, lspush #24 + mov r4, r4, lspull #8 + orr r4, r4, r5, lspush #24 + mov r5, r5, lspull #8 + orr r5, r5, r6, lspush #24 + mov r6, r6, lspull #8 + orr r6, r6, r7, lspush #24 stmia r0!, {r3 - r6} @ Shouldnt fault bpl .Lc2u_1cpy8lp .Lc2u_1rem8lp: tst ip, #8 - movne r3, r7, pull #8 + movne r3, r7, lspull #8 ldmneia r1!, {r4, r7} - orrne r3, r3, r4, push #24 - movne r4, r4, pull #8 - orrne r4, r4, r7, push #24 + orrne r3, r3, r4, lspush #24 + movne r4, r4, lspull #8 + orrne r4, r4, r7, lspush #24 stmneia r0!, {r3 - r4} @ Shouldnt fault tst ip, #4 - movne r3, r7, pull #8 + movne r3, r7, lspull #8 ldrne r7, [r1], #4 - orrne r3, r3, r7, push #24 + orrne r3, r3, r7, lspush #24 TUSER( strne) r3, [r0], #4 @ Shouldnt fault ands ip, ip, #3 beq .Lc2u_1fupi @@ -172,9 +172,9 @@ USER( TUSER( strgtb) r3, [r0], #1) @ May fault .Lc2u_2fupi: subs r2, r2, #4 addmi ip, r2, #4 bmi .Lc2u_2nowords - mov r3, r7, pull #16 + mov r3, r7, lspull #16 ldr r7, [r1], #4 - orr r3, r3, r7, push #16 + orr r3, r3, r7, lspush #16 USER( TUSER( str) r3, [r0], #4) @ May fault mov ip, r0, lsl #32 - PAGE_SHIFT rsb ip, ip, #0 @@ -186,30 +186,30 @@ USER( TUSER( str) r3, [r0], #4) @ May fault subs ip, ip, #16 blt .Lc2u_2rem8lp -.Lc2u_2cpy8lp: mov r3, r7, pull #16 +.Lc2u_2cpy8lp: mov r3, r7, lspull #16 ldmia r1!, {r4 - r7} subs ip, ip, #16 - orr r3, r3, r4, push #16 - mov r4, r4, pull #16 - orr r4, r4, r5, push #16 - mov r5, r5, pull #16 - orr r5, r5, r6, push #16 - mov r6, r6, pull #16 - orr r6, r6, r7, push #16 + orr r3, r3, r4, lspush #16 + mov r4, r4, lspull #16 + orr r4, r4, r5, lspush #16 + mov r5, r5, lspull #16 + orr r5, r5, r6, lspush #16 + mov r6, r6, lspull #16 + orr r6, r6, r7, lspush #16 stmia r0!, {r3 - r6} @ Shouldnt fault bpl .Lc2u_2cpy8lp .Lc2u_2rem8lp: tst ip, #8 - movne r3, r7, pull #16 + movne r3, r7, lspull #16 ldmneia r1!, {r4, r7} - orrne r3, r3, r4, push #16 - movne r4, r4, pull #16 - orrne r4, r4, r7, push #16 + orrne r3, r3, r4, lspush #16 + movne r4, r4, lspull #16 + orrne r4, r4, r7, lspush #16 stmneia r0!, {r3 - r4} @ Shouldnt fault tst ip, #4 - movne r3, r7, pull #16 + movne r3, r7, lspull #16 ldrne r7, [r1], #4 - orrne r3, r3, r7, push #16 + orrne r3, r3, r7, lspush #16 TUSER( strne) r3, [r0], #4 @ Shouldnt fault ands ip, ip, #3 beq .Lc2u_2fupi @@ -227,9 +227,9 @@ USER( TUSER( strgtb) r3, [r0], #1) @ May fault .Lc2u_3fupi: subs r2, r2, #4 addmi ip, r2, #4 bmi .Lc2u_3nowords - mov r3, r7, pull #24 + mov r3, r7, lspull #24 ldr r7, [r1], #4 - orr r3, r3, r7, push #8 + orr r3, r3, r7, lspush #8 USER( TUSER( str) r3, [r0], #4) @ May fault mov ip, r0, lsl #32 - PAGE_SHIFT rsb ip, ip, #0 @@ -241,30 +241,30 @@ USER( TUSER( str) r3, [r0], #4) @ May fault subs ip, ip, #16 blt .Lc2u_3rem8lp -.Lc2u_3cpy8lp: mov r3, r7, pull #24 +.Lc2u_3cpy8lp: mov r3, r7, lspull #24 ldmia r1!, {r4 - r7} subs ip, ip, #16 - orr r3, r3, r4, push #8 - mov r4, r4, pull #24 - orr r4, r4, r5, push #8 - mov r5, r5, pull #24 - orr r5, r5, r6, push #8 - mov r6, r6, pull #24 - orr r6, r6, r7, push #8 + orr r3, r3, r4, lspush #8 + mov r4, r4, lspull #24 + orr r4, r4, r5, lspush #8 + mov r5, r5, lspull #24 + orr r5, r5, r6, lspush #8 + mov r6, r6, lspull #24 + orr r6, r6, r7, lspush #8 stmia r0!, {r3 - r6} @ Shouldnt fault bpl .Lc2u_3cpy8lp .Lc2u_3rem8lp: tst ip, #8 - movne r3, r7, pull #24 + movne r3, r7, lspull #24 ldmneia r1!, {r4, r7} - orrne r3, r3, r4, push #8 - movne r4, r4, pull #24 - orrne r4, r4, r7, push #8 + orrne r3, r3, r4, lspush #8 + movne r4, r4, lspull #24 + orrne r4, r4, r7, lspush #8 stmneia r0!, {r3 - r4} @ Shouldnt fault tst ip, #4 - movne r3, r7, pull #24 + movne r3, r7, lspull #24 ldrne r7, [r1], #4 - orrne r3, r3, r7, push #8 + orrne r3, r3, r7, lspush #8 TUSER( strne) r3, [r0], #4 @ Shouldnt fault ands ip, ip, #3 beq .Lc2u_3fupi @@ -382,9 +382,9 @@ USER( TUSER( ldr) r7, [r1], #4) @ May fault .Lcfu_1fupi: subs r2, r2, #4 addmi ip, r2, #4 bmi .Lcfu_1nowords - mov r3, r7, pull #8 + mov r3, r7, lspull #8 USER( TUSER( ldr) r7, [r1], #4) @ May fault - orr r3, r3, r7, push #24 + orr r3, r3, r7, lspush #24 str r3, [r0], #4 mov ip, r1, lsl #32 - PAGE_SHIFT rsb ip, ip, #0 @@ -396,30 +396,30 @@ USER( TUSER( ldr) r7, [r1], #4) @ May fault subs ip, ip, #16 blt .Lcfu_1rem8lp -.Lcfu_1cpy8lp: mov r3, r7, pull #8 +.Lcfu_1cpy8lp: mov r3, r7, lspull #8 ldmia r1!, {r4 - r7} @ Shouldnt fault subs ip, ip, #16 - orr r3, r3, r4, push #24 - mov r4, r4, pull #8 - orr r4, r4, r5, push #24 - mov r5, r5, pull #8 - orr r5, r5, r6, push #24 - mov r6, r6, pull #8 - orr r6, r6, r7, push #24 + orr r3, r3, r4, lspush #24 + mov r4, r4, lspull #8 + orr r4, r4, r5, lspush #24 + mov r5, r5, lspull #8 + orr r5, r5, r6, lspush #24 + mov r6, r6, lspull #8 + orr r6, r6, r7, lspush #24 stmia r0!, {r3 - r6} bpl .Lcfu_1cpy8lp .Lcfu_1rem8lp: tst ip, #8 - movne r3, r7, pull #8 + movne r3, r7, lspull #8 ldmneia r1!, {r4, r7} @ Shouldnt fault - orrne r3, r3, r4, push #24 - movne r4, r4, pull #8 - orrne r4, r4, r7, push #24 + orrne r3, r3, r4, lspush #24 + movne r4, r4, lspull #8 + orrne r4, r4, r7, lspush #24 stmneia r0!, {r3 - r4} tst ip, #4 - movne r3, r7, pull #8 + movne r3, r7, lspull #8 USER( TUSER( ldrne) r7, [r1], #4) @ May fault - orrne r3, r3, r7, push #24 + orrne r3, r3, r7, lspush #24 strne r3, [r0], #4 ands ip, ip, #3 beq .Lcfu_1fupi @@ -437,9 +437,9 @@ USER( TUSER( ldrne) r7, [r1], #4) @ May fault .Lcfu_2fupi: subs r2, r2, #4 addmi ip, r2, #4 bmi .Lcfu_2nowords - mov r3, r7, pull #16 + mov r3, r7, lspull #16 USER( TUSER( ldr) r7, [r1], #4) @ May fault - orr r3, r3, r7, push #16 + orr r3, r3, r7, lspush #16 str r3, [r0], #4 mov ip, r1, lsl #32 - PAGE_SHIFT rsb ip, ip, #0 @@ -452,30 +452,30 @@ USER( TUSER( ldr) r7, [r1], #4) @ May fault blt .Lcfu_2rem8lp -.Lcfu_2cpy8lp: mov r3, r7, pull #16 +.Lcfu_2cpy8lp: mov r3, r7, lspull #16 ldmia r1!, {r4 - r7} @ Shouldnt fault subs ip, ip, #16 - orr r3, r3, r4, push #16 - mov r4, r4, pull #16 - orr r4, r4, r5, push #16 - mov r5, r5, pull #16 - orr r5, r5, r6, push #16 - mov r6, r6, pull #16 - orr r6, r6, r7, push #16 + orr r3, r3, r4, lspush #16 + mov r4, r4, lspull #16 + orr r4, r4, r5, lspush #16 + mov r5, r5, lspull #16 + orr r5, r5, r6, lspush #16 + mov r6, r6, lspull #16 + orr r6, r6, r7, lspush #16 stmia r0!, {r3 - r6} bpl .Lcfu_2cpy8lp .Lcfu_2rem8lp: tst ip, #8 - movne r3, r7, pull #16 + movne r3, r7, lspull #16 ldmneia r1!, {r4, r7} @ Shouldnt fault - orrne r3, r3, r4, push #16 - movne r4, r4, pull #16 - orrne r4, r4, r7, push #16 + orrne r3, r3, r4, lspush #16 + movne r4, r4, lspull #16 + orrne r4, r4, r7, lspush #16 stmneia r0!, {r3 - r4} tst ip, #4 - movne r3, r7, pull #16 + movne r3, r7, lspull #16 USER( TUSER( ldrne) r7, [r1], #4) @ May fault - orrne r3, r3, r7, push #16 + orrne r3, r3, r7, lspush #16 strne r3, [r0], #4 ands ip, ip, #3 beq .Lcfu_2fupi @@ -493,9 +493,9 @@ USER( TUSER( ldrgtb) r3, [r1], #0) @ May fault .Lcfu_3fupi: subs r2, r2, #4 addmi ip, r2, #4 bmi .Lcfu_3nowords - mov r3, r7, pull #24 + mov r3, r7, lspull #24 USER( TUSER( ldr) r7, [r1], #4) @ May fault - orr r3, r3, r7, push #8 + orr r3, r3, r7, lspush #8 str r3, [r0], #4 mov ip, r1, lsl #32 - PAGE_SHIFT rsb ip, ip, #0 @@ -507,30 +507,30 @@ USER( TUSER( ldr) r7, [r1], #4) @ May fault subs ip, ip, #16 blt .Lcfu_3rem8lp -.Lcfu_3cpy8lp: mov r3, r7, pull #24 +.Lcfu_3cpy8lp: mov r3, r7, lspull #24 ldmia r1!, {r4 - r7} @ Shouldnt fault - orr r3, r3, r4, push #8 - mov r4, r4, pull #24 - orr r4, r4, r5, push #8 - mov r5, r5, pull #24 - orr r5, r5, r6, push #8 - mov r6, r6, pull #24 - orr r6, r6, r7, push #8 + orr r3, r3, r4, lspush #8 + mov r4, r4, lspull #24 + orr r4, r4, r5, lspush #8 + mov r5, r5, lspull #24 + orr r5, r5, r6, lspush #8 + mov r6, r6, lspull #24 + orr r6, r6, r7, lspush #8 stmia r0!, {r3 - r6} subs ip, ip, #16 bpl .Lcfu_3cpy8lp .Lcfu_3rem8lp: tst ip, #8 - movne r3, r7, pull #24 + movne r3, r7, lspull #24 ldmneia r1!, {r4, r7} @ Shouldnt fault - orrne r3, r3, r4, push #8 - movne r4, r4, pull #24 - orrne r4, r4, r7, push #8 + orrne r3, r3, r4, lspush #8 + movne r4, r4, lspull #24 + orrne r4, r4, r7, lspush #8 stmneia r0!, {r3 - r4} tst ip, #4 - movne r3, r7, pull #24 + movne r3, r7, lspull #24 USER( TUSER( ldrne) r7, [r1], #4) @ May fault - orrne r3, r3, r7, push #8 + orrne r3, r3, r7, lspush #8 strne r3, [r0], #4 ands ip, ip, #3 beq .Lcfu_3fupi From 0db5306859a69c38c695400c34bde7b7df5f1806 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Fri, 10 May 2013 18:07:19 +0100 Subject: [PATCH 1364/1368] ARM: barrier: allow options to be passed to memory barrier instructions On ARMv7, the memory barrier instructions take an optional `option' field which can be used to constrain the effects of a memory barrier based on shareability and access type. This patch allows the caller to pass these options if required, and updates the smp_*() barriers to request inner-shareable barriers, affecting only stores for the _wmb variant. wmb() is also changed to use the -st version of dsb. Reported-by: Albin Tonnerre Reviewed-by: Catalin Marinas Signed-off-by: Will Deacon (cherry picked from commit 3ea128065ed20d33bd02ff6dab689f88e38000be) Signed-off-by: Christoffer Dall --- arch/arm/include/asm/assembler.h | 4 ++-- arch/arm/include/asm/barrier.h | 32 ++++++++++++++++---------------- 2 files changed, 18 insertions(+), 18 deletions(-) diff --git a/arch/arm/include/asm/assembler.h b/arch/arm/include/asm/assembler.h index 9271457adac8..cab788045029 100644 --- a/arch/arm/include/asm/assembler.h +++ b/arch/arm/include/asm/assembler.h @@ -212,9 +212,9 @@ #ifdef CONFIG_SMP #if __LINUX_ARM_ARCH__ >= 7 .ifeqs "\mode","arm" - ALT_SMP(dmb) + ALT_SMP(dmb ish) .else - ALT_SMP(W(dmb)) + ALT_SMP(W(dmb) ish) .endif #elif __LINUX_ARM_ARCH__ == 6 ALT_SMP(mcr p15, 0, r0, c7, c10, 5) @ dmb diff --git a/arch/arm/include/asm/barrier.h b/arch/arm/include/asm/barrier.h index 8dcd9c702d90..60f15e274e6d 100644 --- a/arch/arm/include/asm/barrier.h +++ b/arch/arm/include/asm/barrier.h @@ -14,27 +14,27 @@ #endif #if __LINUX_ARM_ARCH__ >= 7 -#define isb() __asm__ __volatile__ ("isb" : : : "memory") -#define dsb() __asm__ __volatile__ ("dsb" : : : "memory") -#define dmb() __asm__ __volatile__ ("dmb" : : : "memory") +#define isb(option) __asm__ __volatile__ ("isb " #option : : : "memory") +#define dsb(option) __asm__ __volatile__ ("dsb " #option : : : "memory") +#define dmb(option) __asm__ __volatile__ ("dmb " #option : : : "memory") #elif defined(CONFIG_CPU_XSC3) || __LINUX_ARM_ARCH__ == 6 -#define isb() __asm__ __volatile__ ("mcr p15, 0, %0, c7, c5, 4" \ +#define isb(x) __asm__ __volatile__ ("mcr p15, 0, %0, c7, c5, 4" \ : : "r" (0) : "memory") -#define dsb() __asm__ __volatile__ ("mcr p15, 0, %0, c7, c10, 4" \ +#define dsb(x) __asm__ __volatile__ ("mcr p15, 0, %0, c7, c10, 4" \ : : "r" (0) : "memory") -#define dmb() __asm__ __volatile__ ("mcr p15, 0, %0, c7, c10, 5" \ +#define dmb(x) __asm__ __volatile__ ("mcr p15, 0, %0, c7, c10, 5" \ : : "r" (0) : "memory") #elif defined(CONFIG_CPU_FA526) -#define isb() __asm__ __volatile__ ("mcr p15, 0, %0, c7, c5, 4" \ +#define isb(x) __asm__ __volatile__ ("mcr p15, 0, %0, c7, c5, 4" \ : : "r" (0) : "memory") -#define dsb() __asm__ __volatile__ ("mcr p15, 0, %0, c7, c10, 4" \ +#define dsb(x) __asm__ __volatile__ ("mcr p15, 0, %0, c7, c10, 4" \ : : "r" (0) : "memory") -#define dmb() __asm__ __volatile__ ("" : : : "memory") +#define dmb(x) __asm__ __volatile__ ("" : : : "memory") #else -#define isb() __asm__ __volatile__ ("" : : : "memory") -#define dsb() __asm__ __volatile__ ("mcr p15, 0, %0, c7, c10, 4" \ +#define isb(x) __asm__ __volatile__ ("" : : : "memory") +#define dsb(x) __asm__ __volatile__ ("mcr p15, 0, %0, c7, c10, 4" \ : : "r" (0) : "memory") -#define dmb() __asm__ __volatile__ ("" : : : "memory") +#define dmb(x) __asm__ __volatile__ ("" : : : "memory") #endif #ifdef CONFIG_ARCH_HAS_BARRIERS @@ -42,7 +42,7 @@ #elif defined(CONFIG_ARM_DMA_MEM_BUFFERABLE) || defined(CONFIG_SMP) #define mb() do { dsb(); outer_sync(); } while (0) #define rmb() dsb() -#define wmb() mb() +#define wmb() do { dsb(st); outer_sync(); } while (0) #else #define mb() barrier() #define rmb() barrier() @@ -54,9 +54,9 @@ #define smp_rmb() barrier() #define smp_wmb() barrier() #else -#define smp_mb() dmb() -#define smp_rmb() dmb() -#define smp_wmb() dmb() +#define smp_mb() dmb(ish) +#define smp_rmb() smp_mb() +#define smp_wmb() dmb(ishst) #endif #define read_barrier_depends() do { } while(0) From 63d832d826b75fec2ed062e5a5dc107c5a5db158 Mon Sep 17 00:00:00 2001 From: Santosh Shilimkar Date: Wed, 31 Jul 2013 12:44:42 -0400 Subject: [PATCH 1365/1368] ARM: mm: Introduce virt_to_idmap() with an arch hook On some PAE systems (e.g. TI Keystone), memory is above the 32-bit addressable limit, and the interconnect provides an aliased view of parts of physical memory in the 32-bit addressable space. This alias is strictly for boot time usage, and is not otherwise usable because of coherency limitations. On such systems, the idmap mechanism needs to take this aliased mapping into account. This patch introduces virt_to_idmap() and a arch function pointer which can be populated by platform which needs it. Also populate necessary idmap spots with now available virt_to_idmap(). Avoided #ifdef approach to be compatible with multi-platform builds. Most architecture won't touch it and in that case virt_to_idmap() fall-back to existing virt_to_phys() macro. Cc: Russell King Acked-by: Nicolas Pitre Signed-off-by: Santosh Shilimkar (cherry picked from commit 4dc9a81715973cb137a14399420bb35b0ed7d6ef) Signed-off-by: Christoffer Dall --- arch/arm/include/asm/memory.h | 16 ++++++++++++++++ arch/arm/kernel/smp.c | 4 ++-- arch/arm/mm/idmap.c | 5 +++-- 3 files changed, 21 insertions(+), 4 deletions(-) diff --git a/arch/arm/include/asm/memory.h b/arch/arm/include/asm/memory.h index 57870ab313c5..21b458e6b0b8 100644 --- a/arch/arm/include/asm/memory.h +++ b/arch/arm/include/asm/memory.h @@ -157,6 +157,7 @@ */ #define __PV_BITS_31_24 0x81000000 +extern phys_addr_t (*arch_virt_to_idmap) (unsigned long x); extern unsigned long __pv_phys_offset; #define PHYS_OFFSET __pv_phys_offset @@ -232,6 +233,21 @@ static inline void *phys_to_virt(phys_addr_t x) #define __va(x) ((void *)__phys_to_virt((unsigned long)(x))) #define pfn_to_kaddr(pfn) __va((pfn) << PAGE_SHIFT) +/* + * These are for systems that have a hardware interconnect supported alias of + * physical memory for idmap purposes. Most cases should leave these + * untouched. + */ +static inline phys_addr_t __virt_to_idmap(unsigned long x) +{ + if (arch_virt_to_idmap) + return arch_virt_to_idmap(x); + else + return __virt_to_phys(x); +} + +#define virt_to_idmap(x) __virt_to_idmap((unsigned long)(x)) + /* * Virtual <-> DMA view memory address translations * Again, these are *only* valid on the kernel direct mapped RAM diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c index 5919eb451bb9..5a8ad2c8eda0 100644 --- a/arch/arm/kernel/smp.c +++ b/arch/arm/kernel/smp.c @@ -87,8 +87,8 @@ int __cpuinit __cpu_up(unsigned int cpu, struct task_struct *idle) * its stack and the page tables. */ secondary_data.stack = task_stack_page(idle) + THREAD_START_SP; - secondary_data.pgdir = virt_to_phys(idmap_pgd); - secondary_data.swapper_pg_dir = virt_to_phys(swapper_pg_dir); + secondary_data.pgdir = virt_to_idmap(idmap_pgd); + secondary_data.swapper_pg_dir = virt_to_idmap(swapper_pg_dir); __cpuc_flush_dcache_area(&secondary_data, sizeof(secondary_data)); outer_clean_range(__pa(&secondary_data), __pa(&secondary_data + 1)); diff --git a/arch/arm/mm/idmap.c b/arch/arm/mm/idmap.c index 83cb3ac27095..c0a1e48f6733 100644 --- a/arch/arm/mm/idmap.c +++ b/arch/arm/mm/idmap.c @@ -10,6 +10,7 @@ #include pgd_t *idmap_pgd; +phys_addr_t (*arch_virt_to_idmap) (unsigned long x); #ifdef CONFIG_ARM_LPAE static void idmap_add_pmd(pud_t *pud, unsigned long addr, unsigned long end, @@ -67,8 +68,8 @@ static void identity_mapping_add(pgd_t *pgd, const char *text_start, unsigned long addr, end; unsigned long next; - addr = virt_to_phys(text_start); - end = virt_to_phys(text_end); + addr = virt_to_idmap(text_start); + end = virt_to_idmap(text_end); prot |= PMD_TYPE_SECT | PMD_SECT_AP_WRITE | PMD_SECT_AF; From c4673ca7850dae7146ecc35c69e8f17f2337c7d2 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Fri, 12 Sep 2014 22:17:23 +0200 Subject: [PATCH 1366/1368] mm: export symbol dependencies of is_zero_pfn() In order to make the static inline function is_zero_pfn() callable by modules, export its symbol dependencies 'zero_pfn' and (for s390 and mips) 'zero_page_mask'. We need this for KVM, as CONFIG_KVM is a tristate for all supported architectures except ARM and arm64, and testing a pfn whether it refers to the zero page is required to correctly distinguish the zero page from other special RAM ranges that may also have the PG_reserved bit set, but need to be treated as MMIO memory. Signed-off-by: Ard Biesheuvel Acked-by: Andrew Morton Signed-off-by: Paolo Bonzini (cherry picked from commit 0b70068e47e8f0c813a902dc3d6def601fd15acb) Signed-off-by: Christoffer Dall --- arch/mips/mm/init.c | 1 + arch/s390/mm/init.c | 1 + mm/memory.c | 2 ++ 3 files changed, 4 insertions(+) diff --git a/arch/mips/mm/init.c b/arch/mips/mm/init.c index 9b973e0af9cb..d340d53c345b 100644 --- a/arch/mips/mm/init.c +++ b/arch/mips/mm/init.c @@ -74,6 +74,7 @@ */ unsigned long empty_zero_page, zero_page_mask; EXPORT_SYMBOL_GPL(empty_zero_page); +EXPORT_SYMBOL(zero_page_mask); /* * Not static inline because used by IP27 special magic initialization code diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c index eba15f18fd38..a4dfc0bd05db 100644 --- a/arch/s390/mm/init.c +++ b/arch/s390/mm/init.c @@ -43,6 +43,7 @@ pgd_t swapper_pg_dir[PTRS_PER_PGD] __attribute__((__aligned__(PAGE_SIZE))); unsigned long empty_zero_page, zero_page_mask; EXPORT_SYMBOL(empty_zero_page); +EXPORT_SYMBOL(zero_page_mask); static void __init setup_zero_pages(void) { diff --git a/mm/memory.c b/mm/memory.c index 5a35443c01ad..20bb9e901781 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -118,6 +118,8 @@ __setup("norandmaps", disable_randmaps); unsigned long zero_pfn __read_mostly; unsigned long highest_memmap_pfn __read_mostly; +EXPORT_SYMBOL(zero_pfn); + /* * CONFIG_MMU architectures set up ZERO_PAGE in their paging_init() */ From 331a9a729183832840d656036c5bc2b81942c294 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Fri, 12 Sep 2014 15:16:00 +0200 Subject: [PATCH 1367/1368] KVM: check for !is_zero_pfn() in kvm_is_mmio_pfn() Read-only memory ranges may be backed by the zero page, so avoid misidentifying it a a MMIO pfn. This fixes another issue I identified when testing QEMU+KVM_UEFI, where a read to an uninitialized emulated NOR flash brought in the zero page, but mapped as a read-write device region, because kvm_is_mmio_pfn() misidentifies it as a MMIO pfn due to its PG_reserved bit being set. Signed-off-by: Ard Biesheuvel Fixes: b88657674d39 ("ARM: KVM: user_mem_abort: support stage 2 MMIO page mapping") Signed-off-by: Paolo Bonzini (cherry picked from commit 85c8555ff07ef09261bd50d603cd4290cff5a8cc) Signed-off-by: Christoffer Dall --- virt/kvm/kvm_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index f019669674a5..b64d44219f27 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -110,7 +110,7 @@ static bool largepages_enabled = true; bool kvm_is_mmio_pfn(pfn_t pfn) { if (pfn_valid(pfn)) - return PageReserved(pfn_to_page(pfn)); + return !is_zero_pfn(pfn) && PageReserved(pfn_to_page(pfn)); return true; } From b8a669d29702a8fb529f4fae450a86b8676b0e42 Mon Sep 17 00:00:00 2001 From: Sam Bobroff Date: Fri, 19 Sep 2014 09:40:41 +1000 Subject: [PATCH 1368/1368] KVM: correct null pid check in kvm_vcpu_yield_to() Correct a simple mistake of checking the wrong variable before a dereference, resulting in the dereference not being properly protected by rcu_dereference(). Signed-off-by: Sam Bobroff Signed-off-by: Paolo Bonzini (cherry picked from commit 27fbe64bfa63cfb9da025975b59d96568caa2d53) Signed-off-by: Christoffer Dall --- virt/kvm/kvm_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index b64d44219f27..9cae94206f41 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -1728,7 +1728,7 @@ bool kvm_vcpu_yield_to(struct kvm_vcpu *target) rcu_read_lock(); pid = rcu_dereference(target->pid); if (pid) - task = get_pid_task(target->pid, PIDTYPE_PID); + task = get_pid_task(pid, PIDTYPE_PID); rcu_read_unlock(); if (!task) return ret;