From 83595f56588d5f5d677e62a576b6248328862b34 Mon Sep 17 00:00:00 2001 From: Roman Pen Date: Tue, 9 Feb 2016 12:33:35 -0700 Subject: [PATCH 01/61] block: fix module reference leak on put_disk() call for cgroups throttle get_disk(),get_gendisk() calls have non explicit side effect: they increase the reference on the disk owner module. The following is the correct sequence how to get a disk reference and to put it: disk = get_gendisk(...); /* use disk */ owner = disk->fops->owner; put_disk(disk); module_put(owner); fs/block_dev.c is aware of this required module_put() call, but f.e. blkg_conf_finish(), which is located in block/blk-cgroup.c, does not put a module reference. To see a leakage in action cgroups throttle config can be used. In the following script I'm removing throttle for /dev/ram0 (actually this is NOP, because throttle was never set for this device): # lsmod | grep brd brd 5175 0 # i=100; while [ $i -gt 0 ]; do echo "1:0 0" > \ /sys/fs/cgroup/blkio/blkio.throttle.read_bps_device; i=$(($i - 1)); \ done # lsmod | grep brd brd 5175 100 Now brd module has 100 references. The issue is fixed by calling module_put() just right away put_disk(). Signed-off-by: Roman Pen Cc: Gi-Oh Kim Cc: Tejun Heo Cc: Jens Axboe Cc: linux-block@vger.kernel.org Cc: linux-kernel@vger.kernel.org Signed-off-by: Jens Axboe (cherry picked from commit 39a169b62b415390398291080dafe63aec751e0a) Signed-off-by: Alex Shi --- block/blk-cgroup.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c index 9d359e05fad7..8161090a1970 100644 --- a/block/blk-cgroup.c +++ b/block/blk-cgroup.c @@ -788,6 +788,7 @@ int blkg_conf_prep(struct blkcg *blkcg, const struct blkcg_policy *pol, { struct gendisk *disk; struct blkcg_gq *blkg; + struct module *owner; unsigned int major, minor; int key_len, part, ret; char *body; @@ -804,7 +805,9 @@ int blkg_conf_prep(struct blkcg *blkcg, const struct blkcg_policy *pol, if (!disk) return -ENODEV; if (part) { + owner = disk->fops->owner; put_disk(disk); + module_put(owner); return -ENODEV; } @@ -820,7 +823,9 @@ int blkg_conf_prep(struct blkcg *blkcg, const struct blkcg_policy *pol, ret = PTR_ERR(blkg); rcu_read_unlock(); spin_unlock_irq(disk->queue->queue_lock); + owner = disk->fops->owner; put_disk(disk); + module_put(owner); /* * If queue was bypassing, we should retry. Do so after a * short msleep(). It isn't strictly necessary but queue @@ -851,9 +856,13 @@ EXPORT_SYMBOL_GPL(blkg_conf_prep); void blkg_conf_finish(struct blkg_conf_ctx *ctx) __releases(ctx->disk->queue->queue_lock) __releases(rcu) { + struct module *owner; + spin_unlock_irq(ctx->disk->queue->queue_lock); rcu_read_unlock(); + owner = ctx->disk->fops->owner; put_disk(ctx->disk); + module_put(owner); } EXPORT_SYMBOL_GPL(blkg_conf_finish); From aaaf9e59c00aa2ff9c2bd1da05c0cf1ba2a9634a Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Thu, 14 Jan 2016 15:20:15 -0800 Subject: [PATCH 02/61] mm: page_alloc: generalize the dirty balance reserve The dirty balance reserve that dirty throttling has to consider is merely memory not available to userspace allocations. There is nothing writeback-specific about it. Generalize the name so that it's reusable outside of that context. Signed-off-by: Johannes Weiner Cc: Rik van Riel Cc: Mel Gorman Acked-by: Michal Hocko Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds (cherry picked from commit a8d0143730d7b42c9fe6d1435d92ecce6863a62a) Signed-off-by: Alex Shi --- include/linux/mmzone.h | 6 +++--- include/linux/swap.h | 1 - mm/page-writeback.c | 14 ++++++++++++-- mm/page_alloc.c | 21 +++------------------ 4 files changed, 18 insertions(+), 24 deletions(-) diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index e23a9e704536..9134ae3f61ff 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -361,10 +361,10 @@ struct zone { struct per_cpu_pageset __percpu *pageset; /* - * This is a per-zone reserve of pages that should not be - * considered dirtyable memory. + * This is a per-zone reserve of pages that are not available + * to userspace allocations. */ - unsigned long dirty_balance_reserve; + unsigned long totalreserve_pages; #ifndef CONFIG_SPARSEMEM /* diff --git a/include/linux/swap.h b/include/linux/swap.h index d8ca2eaa3a8b..f1a52c11de0e 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -289,7 +289,6 @@ static inline void workingset_node_shadows_dec(struct radix_tree_node *node) /* linux/mm/page_alloc.c */ extern unsigned long totalram_pages; extern unsigned long totalreserve_pages; -extern unsigned long dirty_balance_reserve; extern unsigned long nr_free_buffer_pages(void); extern unsigned long nr_free_pagecache_pages(void); diff --git a/mm/page-writeback.c b/mm/page-writeback.c index fd51ebfc423f..1e6769449ac2 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -278,7 +278,12 @@ static unsigned long zone_dirtyable_memory(struct zone *zone) unsigned long nr_pages; nr_pages = zone_page_state(zone, NR_FREE_PAGES); - nr_pages -= min(nr_pages, zone->dirty_balance_reserve); + /* + * Pages reserved for the kernel should not be considered + * dirtyable, to prevent a situation where reclaim has to + * clean pages in order to balance the zones. + */ + nr_pages -= min(nr_pages, zone->totalreserve_pages); nr_pages += zone_page_state(zone, NR_INACTIVE_FILE); nr_pages += zone_page_state(zone, NR_ACTIVE_FILE); @@ -332,7 +337,12 @@ static unsigned long global_dirtyable_memory(void) unsigned long x; x = global_page_state(NR_FREE_PAGES); - x -= min(x, dirty_balance_reserve); + /* + * Pages reserved for the kernel should not be considered + * dirtyable, to prevent a situation where reclaim has to + * clean pages in order to balance the zones. + */ + x -= min(x, totalreserve_pages); x += global_page_state(NR_INACTIVE_FILE); x += global_page_state(NR_ACTIVE_FILE); diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 2bcdfbf8c36d..b0ca09f607b4 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -114,13 +114,6 @@ static DEFINE_SPINLOCK(managed_page_count_lock); unsigned long totalram_pages __read_mostly; unsigned long totalreserve_pages __read_mostly; unsigned long totalcma_pages __read_mostly; -/* - * When calculating the number of globally allowed dirty pages, there - * is a certain number of per-zone reserves that should not be - * considered dirtyable memory. This is the sum of those reserves - * over all existing zones that contribute dirtyable memory. - */ -unsigned long dirty_balance_reserve __read_mostly; int percpu_pagelist_fraction; gfp_t gfp_allowed_mask __read_mostly = GFP_BOOT_MASK; @@ -5978,20 +5971,12 @@ static void calculate_totalreserve_pages(void) if (max > zone->managed_pages) max = zone->managed_pages; + + zone->totalreserve_pages = max; + reserve_pages += max; - /* - * Lowmem reserves are not available to - * GFP_HIGHUSER page cache allocations and - * kswapd tries to balance zones to their high - * watermark. As a result, neither should be - * regarded as dirtyable memory, to prevent a - * situation where reclaim has to clean pages - * in order to balance the zones. - */ - zone->dirty_balance_reserve = max; } } - dirty_balance_reserve = reserve_pages; totalreserve_pages = reserve_pages; } From 1f565de67d6f2991f10ab591d819135ba4d3fe0f Mon Sep 17 00:00:00 2001 From: Tahsin Erdogan Date: Tue, 16 Feb 2016 13:34:39 -0800 Subject: [PATCH 03/61] writeback: initialize inode members that track writeback history inode struct members that track cgroup writeback information should be reinitialized when inode gets allocated from kmem_cache. Otherwise, their values remain and get used by the new inode. Signed-off-by: Tahsin Erdogan Acked-by: Tejun Heo Fixes: d10c80955265 ("writeback: implement foreign cgroup inode bdi_writeback switching") Signed-off-by: Jens Axboe (cherry picked from commit 3d65ae4634ed8350aee98a4e6f4e41fe40c7d282) Signed-off-by: Alex Shi --- fs/inode.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/fs/inode.c b/fs/inode.c index b0edef500590..2c16b758831d 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -154,6 +154,12 @@ int inode_init_always(struct super_block *sb, struct inode *inode) inode->i_rdev = 0; inode->dirtied_when = 0; +#ifdef CONFIG_CGROUP_WRITEBACK + inode->i_wb_frn_winner = 0; + inode->i_wb_frn_avg_time = 0; + inode->i_wb_frn_history = 0; +#endif + if (security_inode_alloc(inode)) goto out; spin_lock_init(&inode->i_lock); From cd5367ae02a488450b714a5d5f47afb014ea6541 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 16 Nov 2015 11:13:34 -0500 Subject: [PATCH 04/61] cgroup: replace __DEVEL__sane_behavior with cgroup2 fs type With major controllers - cpu, memory and io - shaping up for the unified hierarchy, cgroup2 is about ready to be, gradually, released into the wild. Replace __DEVEL__sane_behavior flag which was used to select the unified hierarchy with a separate filesystem type "cgroup2" so that unified hierarchy can be mounted as follows. mount -t cgroup2 none $MOUNT_POINT The cgroup2 fs has its own magic number - 0x63677270 ("cgrp"). v2: Assign a different magic number to cgroup2 fs. Signed-off-by: Tejun Heo Acked-by: Li Zefan Cc: Johannes Weiner (cherry picked from commit 67e9c74b8a873408c27ac9a8e4c1d1c8d72c93ff) Signed-off-by: Alex Shi --- Documentation/cgroups/unified-hierarchy.txt | 6 +-- include/linux/cgroup-defs.h | 1 - include/uapi/linux/magic.h | 1 + kernel/cgroup.c | 47 ++++++++++----------- 4 files changed, 26 insertions(+), 29 deletions(-) diff --git a/Documentation/cgroups/unified-hierarchy.txt b/Documentation/cgroups/unified-hierarchy.txt index 781b1d475bcf..c1f0e8780960 100644 --- a/Documentation/cgroups/unified-hierarchy.txt +++ b/Documentation/cgroups/unified-hierarchy.txt @@ -94,11 +94,9 @@ the process. 2-1. Mounting -Currently, unified hierarchy can be mounted with the following mount -command. Note that this is still under development and scheduled to -change soon. +Unified hierarchy can be mounted with the following mount command. - mount -t cgroup -o __DEVEL__sane_behavior cgroup $MOUNT_POINT + mount -t cgroup2 none $MOUNT_POINT All controllers which support the unified hierarchy and are not bound to other hierarchies are automatically bound to unified hierarchy and diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h index 8da263299754..4cd5c95d1ca0 100644 --- a/include/linux/cgroup-defs.h +++ b/include/linux/cgroup-defs.h @@ -66,7 +66,6 @@ enum { /* cgroup_root->flags */ enum { - CGRP_ROOT_SANE_BEHAVIOR = (1 << 0), /* __DEVEL__sane_behavior specified */ CGRP_ROOT_NOPREFIX = (1 << 1), /* mounted subsystems have no named prefix */ CGRP_ROOT_XATTR = (1 << 2), /* supports extended attributes */ }; diff --git a/include/uapi/linux/magic.h b/include/uapi/linux/magic.h index accb036bbc9c..b283d56c1db9 100644 --- a/include/uapi/linux/magic.h +++ b/include/uapi/linux/magic.h @@ -54,6 +54,7 @@ #define SMB_SUPER_MAGIC 0x517B #define CGROUP_SUPER_MAGIC 0x27e0eb +#define CGROUP2_SUPER_MAGIC 0x63677270 #define STACK_END_MAGIC 0x57AC6E9D diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 127c63e02d52..b5946676f84e 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -211,6 +211,7 @@ static unsigned long have_free_callback __read_mostly; /* Ditto for the can_fork callback. */ static unsigned long have_canfork_callback __read_mostly; +static struct file_system_type cgroup2_fs_type; static struct cftype cgroup_dfl_base_files[]; static struct cftype cgroup_legacy_base_files[]; @@ -1650,10 +1651,6 @@ static int parse_cgroupfs_options(char *data, struct cgroup_sb_opts *opts) all_ss = true; continue; } - if (!strcmp(token, "__DEVEL__sane_behavior")) { - opts->flags |= CGRP_ROOT_SANE_BEHAVIOR; - continue; - } if (!strcmp(token, "noprefix")) { opts->flags |= CGRP_ROOT_NOPREFIX; continue; @@ -1720,15 +1717,6 @@ static int parse_cgroupfs_options(char *data, struct cgroup_sb_opts *opts) return -ENOENT; } - if (opts->flags & CGRP_ROOT_SANE_BEHAVIOR) { - pr_warn("sane_behavior: this is still under development and its behaviors will change, proceed at your own risk\n"); - if (nr_opts != 1) { - pr_err("sane_behavior: no other mount options allowed\n"); - return -EINVAL; - } - return 0; - } - /* * If the 'all' option was specified select all the subsystems, * otherwise if 'none', 'name=' and a subsystem name options were @@ -2007,6 +1995,7 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type, int flags, const char *unused_dev_name, void *data) { + bool is_v2 = fs_type == &cgroup2_fs_type; struct super_block *pinned_sb = NULL; struct cgroup_subsys *ss; struct cgroup_root *root; @@ -2023,6 +2012,17 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type, if (!use_task_css_set_links) cgroup_enable_task_cg_lists(); + if (is_v2) { + if (data) { + pr_err("cgroup2: unknown option \"%s\"\n", (char *)data); + return ERR_PTR(-EINVAL); + } + cgrp_dfl_root_visible = true; + root = &cgrp_dfl_root; + cgroup_get(&root->cgrp); + goto out_mount; + } + mutex_lock(&cgroup_mutex); /* First find the desired set of subsystems */ @@ -2030,15 +2030,6 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type, if (ret) goto out_unlock; - /* look for a matching existing root */ - if (opts.flags & CGRP_ROOT_SANE_BEHAVIOR) { - cgrp_dfl_root_visible = true; - root = &cgrp_dfl_root; - cgroup_get(&root->cgrp); - ret = 0; - goto out_unlock; - } - /* * Destruction of cgroup root is asynchronous, so subsystems may * still be dying after the previous unmount. Let's drain the @@ -2149,9 +2140,10 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type, if (ret) return ERR_PTR(ret); - +out_mount: dentry = kernfs_mount(fs_type, flags, root->kf_root, - CGROUP_SUPER_MAGIC, &new_sb); + is_v2 ? CGROUP2_SUPER_MAGIC : CGROUP_SUPER_MAGIC, + &new_sb); if (IS_ERR(dentry) || !new_sb) cgroup_put(&root->cgrp); @@ -2194,6 +2186,12 @@ static struct file_system_type cgroup_fs_type = { .kill_sb = cgroup_kill_sb, }; +static struct file_system_type cgroup2_fs_type = { + .name = "cgroup2", + .mount = cgroup_mount, + .kill_sb = cgroup_kill_sb, +}; + /** * task_cgroup_path - cgroup path of a task in the first cgroup hierarchy * @task: target task @@ -5383,6 +5381,7 @@ int __init cgroup_init(void) WARN_ON(sysfs_create_mount_point(fs_kobj, "cgroup")); WARN_ON(register_filesystem(&cgroup_fs_type)); + WARN_ON(register_filesystem(&cgroup2_fs_type)); WARN_ON(!proc_create("cgroups", 0, NULL, &proc_cgroupstats_operations)); return 0; From 07d48acbfee2ac01687514ab6bf3144bbd2c0e13 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 16 Nov 2015 11:13:34 -0500 Subject: [PATCH 05/61] cgroup: rename Documentation/cgroups/ to Documentation/cgroup-legacy/ In preparation for adding cgroup2 documentation, rename Documentation/cgroups/ to Documentation/cgroup-legacy/. Signed-off-by: Tejun Heo Acked-by: Li Zefan (cherry picked from commit 0d942766453f3d23a51e0a2d430340a178b0903e) Signed-off-by: Alex Shi --- Documentation/{cgroups => cgroup-legacy}/00-INDEX | 0 Documentation/{cgroups => cgroup-legacy}/blkio-controller.txt | 0 Documentation/{cgroups => cgroup-legacy}/cgroups.txt | 0 Documentation/{cgroups => cgroup-legacy}/cpuacct.txt | 0 Documentation/{cgroups => cgroup-legacy}/cpusets.txt | 0 Documentation/{cgroups => cgroup-legacy}/devices.txt | 0 Documentation/{cgroups => cgroup-legacy}/freezer-subsystem.txt | 0 Documentation/{cgroups => cgroup-legacy}/hugetlb.txt | 0 Documentation/{cgroups => cgroup-legacy}/memcg_test.txt | 0 Documentation/{cgroups => cgroup-legacy}/memory.txt | 0 Documentation/{cgroups => cgroup-legacy}/net_cls.txt | 0 Documentation/{cgroups => cgroup-legacy}/net_prio.txt | 0 Documentation/{cgroups => cgroup-legacy}/pids.txt | 0 Documentation/{cgroups => cgroup-legacy}/unified-hierarchy.txt | 0 14 files changed, 0 insertions(+), 0 deletions(-) rename Documentation/{cgroups => cgroup-legacy}/00-INDEX (100%) rename Documentation/{cgroups => cgroup-legacy}/blkio-controller.txt (100%) rename Documentation/{cgroups => cgroup-legacy}/cgroups.txt (100%) rename Documentation/{cgroups => cgroup-legacy}/cpuacct.txt (100%) rename Documentation/{cgroups => cgroup-legacy}/cpusets.txt (100%) rename Documentation/{cgroups => cgroup-legacy}/devices.txt (100%) rename Documentation/{cgroups => cgroup-legacy}/freezer-subsystem.txt (100%) rename Documentation/{cgroups => cgroup-legacy}/hugetlb.txt (100%) rename Documentation/{cgroups => cgroup-legacy}/memcg_test.txt (100%) rename Documentation/{cgroups => cgroup-legacy}/memory.txt (100%) rename Documentation/{cgroups => cgroup-legacy}/net_cls.txt (100%) rename Documentation/{cgroups => cgroup-legacy}/net_prio.txt (100%) rename Documentation/{cgroups => cgroup-legacy}/pids.txt (100%) rename Documentation/{cgroups => cgroup-legacy}/unified-hierarchy.txt (100%) diff --git a/Documentation/cgroups/00-INDEX b/Documentation/cgroup-legacy/00-INDEX similarity index 100% rename from Documentation/cgroups/00-INDEX rename to Documentation/cgroup-legacy/00-INDEX diff --git a/Documentation/cgroups/blkio-controller.txt b/Documentation/cgroup-legacy/blkio-controller.txt similarity index 100% rename from Documentation/cgroups/blkio-controller.txt rename to Documentation/cgroup-legacy/blkio-controller.txt diff --git a/Documentation/cgroups/cgroups.txt b/Documentation/cgroup-legacy/cgroups.txt similarity index 100% rename from Documentation/cgroups/cgroups.txt rename to Documentation/cgroup-legacy/cgroups.txt diff --git a/Documentation/cgroups/cpuacct.txt b/Documentation/cgroup-legacy/cpuacct.txt similarity index 100% rename from Documentation/cgroups/cpuacct.txt rename to Documentation/cgroup-legacy/cpuacct.txt diff --git a/Documentation/cgroups/cpusets.txt b/Documentation/cgroup-legacy/cpusets.txt similarity index 100% rename from Documentation/cgroups/cpusets.txt rename to Documentation/cgroup-legacy/cpusets.txt diff --git a/Documentation/cgroups/devices.txt b/Documentation/cgroup-legacy/devices.txt similarity index 100% rename from Documentation/cgroups/devices.txt rename to Documentation/cgroup-legacy/devices.txt diff --git a/Documentation/cgroups/freezer-subsystem.txt b/Documentation/cgroup-legacy/freezer-subsystem.txt similarity index 100% rename from Documentation/cgroups/freezer-subsystem.txt rename to Documentation/cgroup-legacy/freezer-subsystem.txt diff --git a/Documentation/cgroups/hugetlb.txt b/Documentation/cgroup-legacy/hugetlb.txt similarity index 100% rename from Documentation/cgroups/hugetlb.txt rename to Documentation/cgroup-legacy/hugetlb.txt diff --git a/Documentation/cgroups/memcg_test.txt b/Documentation/cgroup-legacy/memcg_test.txt similarity index 100% rename from Documentation/cgroups/memcg_test.txt rename to Documentation/cgroup-legacy/memcg_test.txt diff --git a/Documentation/cgroups/memory.txt b/Documentation/cgroup-legacy/memory.txt similarity index 100% rename from Documentation/cgroups/memory.txt rename to Documentation/cgroup-legacy/memory.txt diff --git a/Documentation/cgroups/net_cls.txt b/Documentation/cgroup-legacy/net_cls.txt similarity index 100% rename from Documentation/cgroups/net_cls.txt rename to Documentation/cgroup-legacy/net_cls.txt diff --git a/Documentation/cgroups/net_prio.txt b/Documentation/cgroup-legacy/net_prio.txt similarity index 100% rename from Documentation/cgroups/net_prio.txt rename to Documentation/cgroup-legacy/net_prio.txt diff --git a/Documentation/cgroups/pids.txt b/Documentation/cgroup-legacy/pids.txt similarity index 100% rename from Documentation/cgroups/pids.txt rename to Documentation/cgroup-legacy/pids.txt diff --git a/Documentation/cgroups/unified-hierarchy.txt b/Documentation/cgroup-legacy/unified-hierarchy.txt similarity index 100% rename from Documentation/cgroups/unified-hierarchy.txt rename to Documentation/cgroup-legacy/unified-hierarchy.txt From fe10f4e69509a4c384e550e3404b2606f4fd8b87 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 16 Nov 2015 11:13:34 -0500 Subject: [PATCH 06/61] cgroup: replace unified-hierarchy.txt with a proper cgroup v2 documentation Now that cgroup v2 is almost out of the door, replace the development documentation unified-hierarchy.txt with Documentation/cgroup.txt which is a superset of unified-hierarchy.txt and authoritatively describes all userland-visible aspects of cgroup. v2: Updated to include all information from blkio-controller.txt and list filesystems which support cgroup writeback as suggested by Vivek. Signed-off-by: Tejun Heo Acked-by: Li Zefan Cc: Vivek Goyal (cherry picked from commit 6c2920926b10e8303378408e3c2b8952071d4344) Signed-off-by: Alex Shi --- .../cgroup-legacy/blkio-controller.txt | 79 - .../cgroup-legacy/unified-hierarchy.txt | 645 -------- Documentation/cgroup.txt | 1293 +++++++++++++++++ 3 files changed, 1293 insertions(+), 724 deletions(-) delete mode 100644 Documentation/cgroup-legacy/unified-hierarchy.txt create mode 100644 Documentation/cgroup.txt diff --git a/Documentation/cgroup-legacy/blkio-controller.txt b/Documentation/cgroup-legacy/blkio-controller.txt index 52fa9f353342..4ecc954a3063 100644 --- a/Documentation/cgroup-legacy/blkio-controller.txt +++ b/Documentation/cgroup-legacy/blkio-controller.txt @@ -374,82 +374,3 @@ One can experience an overall throughput drop if you have created multiple groups and put applications in that group which are not driving enough IO to keep disk busy. In that case set group_idle=0, and CFQ will not idle on individual groups and throughput should improve. - -Writeback -========= - -Page cache is dirtied through buffered writes and shared mmaps and -written asynchronously to the backing filesystem by the writeback -mechanism. Writeback sits between the memory and IO domains and -regulates the proportion of dirty memory by balancing dirtying and -write IOs. - -On traditional cgroup hierarchies, relationships between different -controllers cannot be established making it impossible for writeback -to operate accounting for cgroup resource restrictions and all -writeback IOs are attributed to the root cgroup. - -If both the blkio and memory controllers are used on the v2 hierarchy -and the filesystem supports cgroup writeback, writeback operations -correctly follow the resource restrictions imposed by both memory and -blkio controllers. - -Writeback examines both system-wide and per-cgroup dirty memory status -and enforces the more restrictive of the two. Also, writeback control -parameters which are absolute values - vm.dirty_bytes and -vm.dirty_background_bytes - are distributed across cgroups according -to their current writeback bandwidth. - -There's a peculiarity stemming from the discrepancy in ownership -granularity between memory controller and writeback. While memory -controller tracks ownership per page, writeback operates on inode -basis. cgroup writeback bridges the gap by tracking ownership by -inode but migrating ownership if too many foreign pages, pages which -don't match the current inode ownership, have been encountered while -writing back the inode. - -This is a conscious design choice as writeback operations are -inherently tied to inodes making strictly following page ownership -complicated and inefficient. The only use case which suffers from -this compromise is multiple cgroups concurrently dirtying disjoint -regions of the same inode, which is an unlikely use case and decided -to be unsupported. Note that as memory controller assigns page -ownership on the first use and doesn't update it until the page is -released, even if cgroup writeback strictly follows page ownership, -multiple cgroups dirtying overlapping areas wouldn't work as expected. -In general, write-sharing an inode across multiple cgroups is not well -supported. - -Filesystem support for cgroup writeback ---------------------------------------- - -A filesystem can make writeback IOs cgroup-aware by updating -address_space_operations->writepage[s]() to annotate bio's using the -following two functions. - -* wbc_init_bio(@wbc, @bio) - - Should be called for each bio carrying writeback data and associates - the bio with the inode's owner cgroup. Can be called anytime - between bio allocation and submission. - -* wbc_account_io(@wbc, @page, @bytes) - - Should be called for each data segment being written out. While - this function doesn't care exactly when it's called during the - writeback session, it's the easiest and most natural to call it as - data segments are added to a bio. - -With writeback bio's annotated, cgroup support can be enabled per -super_block by setting MS_CGROUPWB in ->s_flags. This allows for -selective disabling of cgroup writeback support which is helpful when -certain filesystem features, e.g. journaled data mode, are -incompatible. - -wbc_init_bio() binds the specified bio to its cgroup. Depending on -the configuration, the bio may be executed at a lower priority and if -the writeback session is holding shared resources, e.g. a journal -entry, may lead to priority inversion. There is no one easy solution -for the problem. Filesystems can try to work around specific problem -cases by skipping wbc_init_bio() or using bio_associate_blkcg() -directly. diff --git a/Documentation/cgroup-legacy/unified-hierarchy.txt b/Documentation/cgroup-legacy/unified-hierarchy.txt deleted file mode 100644 index c1f0e8780960..000000000000 --- a/Documentation/cgroup-legacy/unified-hierarchy.txt +++ /dev/null @@ -1,645 +0,0 @@ - -Cgroup unified hierarchy - -April, 2014 Tejun Heo - -This document describes the changes made by unified hierarchy and -their rationales. It will eventually be merged into the main cgroup -documentation. - -CONTENTS - -1. Background -2. Basic Operation - 2-1. Mounting - 2-2. cgroup.subtree_control - 2-3. cgroup.controllers -3. Structural Constraints - 3-1. Top-down - 3-2. No internal tasks -4. Delegation - 4-1. Model of delegation - 4-2. Common ancestor rule -5. Other Changes - 5-1. [Un]populated Notification - 5-2. Other Core Changes - 5-3. Controller File Conventions - 5-3-1. Format - 5-3-2. Control Knobs - 5-4. Per-Controller Changes - 5-4-1. io - 5-4-2. cpuset - 5-4-3. memory -6. Planned Changes - 6-1. CAP for resource control - - -1. Background - -cgroup allows an arbitrary number of hierarchies and each hierarchy -can host any number of controllers. While this seems to provide a -high level of flexibility, it isn't quite useful in practice. - -For example, as there is only one instance of each controller, utility -type controllers such as freezer which can be useful in all -hierarchies can only be used in one. The issue is exacerbated by the -fact that controllers can't be moved around once hierarchies are -populated. Another issue is that all controllers bound to a hierarchy -are forced to have exactly the same view of the hierarchy. It isn't -possible to vary the granularity depending on the specific controller. - -In practice, these issues heavily limit which controllers can be put -on the same hierarchy and most configurations resort to putting each -controller on its own hierarchy. Only closely related ones, such as -the cpu and cpuacct controllers, make sense to put on the same -hierarchy. This often means that userland ends up managing multiple -similar hierarchies repeating the same steps on each hierarchy -whenever a hierarchy management operation is necessary. - -Unfortunately, support for multiple hierarchies comes at a steep cost. -Internal implementation in cgroup core proper is dazzlingly -complicated but more importantly the support for multiple hierarchies -restricts how cgroup is used in general and what controllers can do. - -There's no limit on how many hierarchies there may be, which means -that a task's cgroup membership can't be described in finite length. -The key may contain any varying number of entries and is unlimited in -length, which makes it highly awkward to handle and leads to addition -of controllers which exist only to identify membership, which in turn -exacerbates the original problem. - -Also, as a controller can't have any expectation regarding what shape -of hierarchies other controllers would be on, each controller has to -assume that all other controllers are operating on completely -orthogonal hierarchies. This makes it impossible, or at least very -cumbersome, for controllers to cooperate with each other. - -In most use cases, putting controllers on hierarchies which are -completely orthogonal to each other isn't necessary. What usually is -called for is the ability to have differing levels of granularity -depending on the specific controller. In other words, hierarchy may -be collapsed from leaf towards root when viewed from specific -controllers. For example, a given configuration might not care about -how memory is distributed beyond a certain level while still wanting -to control how CPU cycles are distributed. - -Unified hierarchy is the next version of cgroup interface. It aims to -address the aforementioned issues by having more structure while -retaining enough flexibility for most use cases. Various other -general and controller-specific interface issues are also addressed in -the process. - - -2. Basic Operation - -2-1. Mounting - -Unified hierarchy can be mounted with the following mount command. - - mount -t cgroup2 none $MOUNT_POINT - -All controllers which support the unified hierarchy and are not bound -to other hierarchies are automatically bound to unified hierarchy and -show up at the root of it. Controllers which are enabled only in the -root of unified hierarchy can be bound to other hierarchies. This -allows mixing unified hierarchy with the traditional multiple -hierarchies in a fully backward compatible way. - -A controller can be moved across hierarchies only after the controller -is no longer referenced in its current hierarchy. Because per-cgroup -controller states are destroyed asynchronously and controllers may -have lingering references, a controller may not show up immediately on -the unified hierarchy after the final umount of the previous -hierarchy. Similarly, a controller should be fully disabled to be -moved out of the unified hierarchy and it may take some time for the -disabled controller to become available for other hierarchies; -furthermore, due to dependencies among controllers, other controllers -may need to be disabled too. - -While useful for development and manual configurations, dynamically -moving controllers between the unified and other hierarchies is -strongly discouraged for production use. It is recommended to decide -the hierarchies and controller associations before starting using the -controllers. - - -2-2. cgroup.subtree_control - -All cgroups on unified hierarchy have a "cgroup.subtree_control" file -which governs which controllers are enabled on the children of the -cgroup. Let's assume a hierarchy like the following. - - root - A - B - C - \ D - -root's "cgroup.subtree_control" file determines which controllers are -enabled on A. A's on B. B's on C and D. This coincides with the -fact that controllers on the immediate sub-level are used to -distribute the resources of the parent. In fact, it's natural to -assume that resource control knobs of a child belong to its parent. -Enabling a controller in a "cgroup.subtree_control" file declares that -distribution of the respective resources of the cgroup will be -controlled. Note that this means that controller enable states are -shared among siblings. - -When read, the file contains a space-separated list of currently -enabled controllers. A write to the file should contain a -space-separated list of controllers with '+' or '-' prefixed (without -the quotes). Controllers prefixed with '+' are enabled and '-' -disabled. If a controller is listed multiple times, the last entry -wins. The specific operations are executed atomically - either all -succeed or fail. - - -2-3. cgroup.controllers - -Read-only "cgroup.controllers" file contains a space-separated list of -controllers which can be enabled in the cgroup's -"cgroup.subtree_control" file. - -In the root cgroup, this lists controllers which are not bound to -other hierarchies and the content changes as controllers are bound to -and unbound from other hierarchies. - -In non-root cgroups, the content of this file equals that of the -parent's "cgroup.subtree_control" file as only controllers enabled -from the parent can be used in its children. - - -3. Structural Constraints - -3-1. Top-down - -As it doesn't make sense to nest control of an uncontrolled resource, -all non-root "cgroup.subtree_control" files can only contain -controllers which are enabled in the parent's "cgroup.subtree_control" -file. A controller can be enabled only if the parent has the -controller enabled and a controller can't be disabled if one or more -children have it enabled. - - -3-2. No internal tasks - -One long-standing issue that cgroup faces is the competition between -tasks belonging to the parent cgroup and its children cgroups. This -is inherently nasty as two different types of entities compete and -there is no agreed-upon obvious way to handle it. Different -controllers are doing different things. - -The cpu controller considers tasks and cgroups as equivalents and maps -nice levels to cgroup weights. This works for some cases but falls -flat when children should be allocated specific ratios of CPU cycles -and the number of internal tasks fluctuates - the ratios constantly -change as the number of competing entities fluctuates. There also are -other issues. The mapping from nice level to weight isn't obvious or -universal, and there are various other knobs which simply aren't -available for tasks. - -The io controller implicitly creates a hidden leaf node for each -cgroup to host the tasks. The hidden leaf has its own copies of all -the knobs with "leaf_" prefixed. While this allows equivalent control -over internal tasks, it's with serious drawbacks. It always adds an -extra layer of nesting which may not be necessary, makes the interface -messy and significantly complicates the implementation. - -The memory controller currently doesn't have a way to control what -happens between internal tasks and child cgroups and the behavior is -not clearly defined. There have been attempts to add ad-hoc behaviors -and knobs to tailor the behavior to specific workloads. Continuing -this direction will lead to problems which will be extremely difficult -to resolve in the long term. - -Multiple controllers struggle with internal tasks and came up with -different ways to deal with it; unfortunately, all the approaches in -use now are severely flawed and, furthermore, the widely different -behaviors make cgroup as whole highly inconsistent. - -It is clear that this is something which needs to be addressed from -cgroup core proper in a uniform way so that controllers don't need to -worry about it and cgroup as a whole shows a consistent and logical -behavior. To achieve that, unified hierarchy enforces the following -structural constraint: - - Except for the root, only cgroups which don't contain any task may - have controllers enabled in their "cgroup.subtree_control" files. - -Combined with other properties, this guarantees that, when a -controller is looking at the part of the hierarchy which has it -enabled, tasks are always only on the leaves. This rules out -situations where child cgroups compete against internal tasks of the -parent. - -There are two things to note. Firstly, the root cgroup is exempt from -the restriction. Root contains tasks and anonymous resource -consumption which can't be associated with any other cgroup and -requires special treatment from most controllers. How resource -consumption in the root cgroup is governed is up to each controller. - -Secondly, the restriction doesn't take effect if there is no enabled -controller in the cgroup's "cgroup.subtree_control" file. This is -important as otherwise it wouldn't be possible to create children of a -populated cgroup. To control resource distribution of a cgroup, the -cgroup must create children and transfer all its tasks to the children -before enabling controllers in its "cgroup.subtree_control" file. - - -4. Delegation - -4-1. Model of delegation - -A cgroup can be delegated to a less privileged user by granting write -access of the directory and its "cgroup.procs" file to the user. Note -that the resource control knobs in a given directory concern the -resources of the parent and thus must not be delegated along with the -directory. - -Once delegated, the user can build sub-hierarchy under the directory, -organize processes as it sees fit and further distribute the resources -it got from the parent. The limits and other settings of all resource -controllers are hierarchical and regardless of what happens in the -delegated sub-hierarchy, nothing can escape the resource restrictions -imposed by the parent. - -Currently, cgroup doesn't impose any restrictions on the number of -cgroups in or nesting depth of a delegated sub-hierarchy; however, -this may in the future be limited explicitly. - - -4-2. Common ancestor rule - -On the unified hierarchy, to write to a "cgroup.procs" file, in -addition to the usual write permission to the file and uid match, the -writer must also have write access to the "cgroup.procs" file of the -common ancestor of the source and destination cgroups. This prevents -delegatees from smuggling processes across disjoint sub-hierarchies. - -Let's say cgroups C0 and C1 have been delegated to user U0 who created -C00, C01 under C0 and C10 under C1 as follows. - - ~~~~~~~~~~~~~ - C0 - C00 - ~ cgroup ~ \ C01 - ~ hierarchy ~ - ~~~~~~~~~~~~~ - C1 - C10 - -C0 and C1 are separate entities in terms of resource distribution -regardless of their relative positions in the hierarchy. The -resources the processes under C0 are entitled to are controlled by -C0's ancestors and may be completely different from C1. It's clear -that the intention of delegating C0 to U0 is allowing U0 to organize -the processes under C0 and further control the distribution of C0's -resources. - -On traditional hierarchies, if a task has write access to "tasks" or -"cgroup.procs" file of a cgroup and its uid agrees with the target, it -can move the target to the cgroup. In the above example, U0 will not -only be able to move processes in each sub-hierarchy but also across -the two sub-hierarchies, effectively allowing it to violate the -organizational and resource restrictions implied by the hierarchical -structure above C0 and C1. - -On the unified hierarchy, let's say U0 wants to write the pid of a -process which has a matching uid and is currently in C10 into -"C00/cgroup.procs". U0 obviously has write access to the file and -migration permission on the process; however, the common ancestor of -the source cgroup C10 and the destination cgroup C00 is above the -points of delegation and U0 would not have write access to its -"cgroup.procs" and thus be denied with -EACCES. - - -5. Other Changes - -5-1. [Un]populated Notification - -cgroup users often need a way to determine when a cgroup's -subhierarchy becomes empty so that it can be cleaned up. cgroup -currently provides release_agent for it; unfortunately, this mechanism -is riddled with issues. - -- It delivers events by forking and execing a userland binary - specified as the release_agent. This is a long deprecated method of - notification delivery. It's extremely heavy, slow and cumbersome to - integrate with larger infrastructure. - -- There is single monitoring point at the root. There's no way to - delegate management of a subtree. - -- The event isn't recursive. It triggers when a cgroup doesn't have - any tasks or child cgroups. Events for internal nodes trigger only - after all children are removed. This again makes it impossible to - delegate management of a subtree. - -- Events are filtered from the kernel side. A "notify_on_release" - file is used to subscribe to or suppress release events. This is - unnecessarily complicated and probably done this way because event - delivery itself was expensive. - -Unified hierarchy implements "populated" field in "cgroup.events" -interface file which can be used to monitor whether the cgroup's -subhierarchy has tasks in it or not. Its value is 0 if there is no -task in the cgroup and its descendants; otherwise, 1. poll and -[id]notify events are triggered when the value changes. - -This is significantly lighter and simpler and trivially allows -delegating management of subhierarchy - subhierarchy monitoring can -block further propagation simply by putting itself or another process -in the subhierarchy and monitor events that it's interested in from -there without interfering with monitoring higher in the tree. - -In unified hierarchy, the release_agent mechanism is no longer -supported and the interface files "release_agent" and -"notify_on_release" do not exist. - - -5-2. Other Core Changes - -- None of the mount options is allowed. - -- remount is disallowed. - -- rename(2) is disallowed. - -- The "tasks" file is removed. Everything should at process - granularity. Use the "cgroup.procs" file instead. - -- The "cgroup.procs" file is not sorted. pids will be unique unless - they got recycled in-between reads. - -- The "cgroup.clone_children" file is removed. - -- /proc/PID/cgroup keeps reporting the cgroup that a zombie belonged - to before exiting. If the cgroup is removed before the zombie is - reaped, " (deleted)" is appeneded to the path. - - -5-3. Controller File Conventions - -5-3-1. Format - -In general, all controller files should be in one of the following -formats whenever possible. - -- Values only files - - VAL0 VAL1...\n - -- Flat keyed files - - KEY0 VAL0\n - KEY1 VAL1\n - ... - -- Nested keyed files - - KEY0 SUB_KEY0=VAL00 SUB_KEY1=VAL01... - KEY1 SUB_KEY0=VAL10 SUB_KEY1=VAL11... - ... - -For a writeable file, the format for writing should generally match -reading; however, controllers may allow omitting later fields or -implement restricted shortcuts for most common use cases. - -For both flat and nested keyed files, only the values for a single key -can be written at a time. For nested keyed files, the sub key pairs -may be specified in any order and not all pairs have to be specified. - - -5-3-2. Control Knobs - -- Settings for a single feature should generally be implemented in a - single file. - -- In general, the root cgroup should be exempt from resource control - and thus shouldn't have resource control knobs. - -- If a controller implements ratio based resource distribution, the - control knob should be named "weight" and have the range [1, 10000] - and 100 should be the default value. The values are chosen to allow - enough and symmetric bias in both directions while keeping it - intuitive (the default is 100%). - -- If a controller implements an absolute resource guarantee and/or - limit, the control knobs should be named "min" and "max" - respectively. If a controller implements best effort resource - gurantee and/or limit, the control knobs should be named "low" and - "high" respectively. - - In the above four control files, the special token "max" should be - used to represent upward infinity for both reading and writing. - -- If a setting has configurable default value and specific overrides, - the default settings should be keyed with "default" and appear as - the first entry in the file. Specific entries can use "default" as - its value to indicate inheritance of the default value. - -- For events which are not very high frequency, an interface file - "events" should be created which lists event key value pairs. - Whenever a notifiable event happens, file modified event should be - generated on the file. - - -5-4. Per-Controller Changes - -5-4-1. io - -- blkio is renamed to io. The interface is overhauled anyway. The - new name is more in line with the other two major controllers, cpu - and memory, and better suited given that it may be used for cgroup - writeback without involving block layer. - -- Everything including stat is always hierarchical making separate - recursive stat files pointless and, as no internal node can have - tasks, leaf weights are meaningless. The operation model is - simplified and the interface is overhauled accordingly. - - io.stat - - The stat file. The reported stats are from the point where - bio's are issued to request_queue. The stats are counted - independent of which policies are enabled. Each line in the - file follows the following format. More fields may later be - added at the end. - - $MAJ:$MIN rbytes=$RBYTES wbytes=$WBYTES rios=$RIOS wrios=$WIOS - - io.weight - - The weight setting, currently only available and effective if - cfq-iosched is in use for the target device. The weight is - between 1 and 10000 and defaults to 100. The first line - always contains the default weight in the following format to - use when per-device setting is missing. - - default $WEIGHT - - Subsequent lines list per-device weights of the following - format. - - $MAJ:$MIN $WEIGHT - - Writing "$WEIGHT" or "default $WEIGHT" changes the default - setting. Writing "$MAJ:$MIN $WEIGHT" sets per-device weight - while "$MAJ:$MIN default" clears it. - - This file is available only on non-root cgroups. - - io.max - - The maximum bandwidth and/or iops setting, only available if - blk-throttle is enabled. The file is of the following format. - - $MAJ:$MIN rbps=$RBPS wbps=$WBPS riops=$RIOPS wiops=$WIOPS - - ${R|W}BPS are read/write bytes per second and ${R|W}IOPS are - read/write IOs per second. "max" indicates no limit. Writing - to the file follows the same format but the individual - settings may be omitted or specified in any order. - - This file is available only on non-root cgroups. - - -5-4-2. cpuset - -- Tasks are kept in empty cpusets after hotplug and take on the masks - of the nearest non-empty ancestor, instead of being moved to it. - -- A task can be moved into an empty cpuset, and again it takes on the - masks of the nearest non-empty ancestor. - - -5-4-3. memory - -- use_hierarchy is on by default and the cgroup file for the flag is - not created. - -- The original lower boundary, the soft limit, is defined as a limit - that is per default unset. As a result, the set of cgroups that - global reclaim prefers is opt-in, rather than opt-out. The costs - for optimizing these mostly negative lookups are so high that the - implementation, despite its enormous size, does not even provide the - basic desirable behavior. First off, the soft limit has no - hierarchical meaning. All configured groups are organized in a - global rbtree and treated like equal peers, regardless where they - are located in the hierarchy. This makes subtree delegation - impossible. Second, the soft limit reclaim pass is so aggressive - that it not just introduces high allocation latencies into the - system, but also impacts system performance due to overreclaim, to - the point where the feature becomes self-defeating. - - The memory.low boundary on the other hand is a top-down allocated - reserve. A cgroup enjoys reclaim protection when it and all its - ancestors are below their low boundaries, which makes delegation of - subtrees possible. Secondly, new cgroups have no reserve per - default and in the common case most cgroups are eligible for the - preferred reclaim pass. This allows the new low boundary to be - efficiently implemented with just a minor addition to the generic - reclaim code, without the need for out-of-band data structures and - reclaim passes. Because the generic reclaim code considers all - cgroups except for the ones running low in the preferred first - reclaim pass, overreclaim of individual groups is eliminated as - well, resulting in much better overall workload performance. - -- The original high boundary, the hard limit, is defined as a strict - limit that can not budge, even if the OOM killer has to be called. - But this generally goes against the goal of making the most out of - the available memory. The memory consumption of workloads varies - during runtime, and that requires users to overcommit. But doing - that with a strict upper limit requires either a fairly accurate - prediction of the working set size or adding slack to the limit. - Since working set size estimation is hard and error prone, and - getting it wrong results in OOM kills, most users tend to err on the - side of a looser limit and end up wasting precious resources. - - The memory.high boundary on the other hand can be set much more - conservatively. When hit, it throttles allocations by forcing them - into direct reclaim to work off the excess, but it never invokes the - OOM killer. As a result, a high boundary that is chosen too - aggressively will not terminate the processes, but instead it will - lead to gradual performance degradation. The user can monitor this - and make corrections until the minimal memory footprint that still - gives acceptable performance is found. - - In extreme cases, with many concurrent allocations and a complete - breakdown of reclaim progress within the group, the high boundary - can be exceeded. But even then it's mostly better to satisfy the - allocation from the slack available in other groups or the rest of - the system than killing the group. Otherwise, memory.max is there - to limit this type of spillover and ultimately contain buggy or even - malicious applications. - -- The original control file names are unwieldy and inconsistent in - many different ways. For example, the upper boundary hit count is - exported in the memory.failcnt file, but an OOM event count has to - be manually counted by listening to memory.oom_control events, and - lower boundary / soft limit events have to be counted by first - setting a threshold for that value and then counting those events. - Also, usage and limit files encode their units in the filename. - That makes the filenames very long, even though this is not - information that a user needs to be reminded of every time they type - out those names. - - To address these naming issues, as well as to signal clearly that - the new interface carries a new configuration model, the naming - conventions in it necessarily differ from the old interface. - -- The original limit files indicate the state of an unset limit with a - Very High Number, and a configured limit can be unset by echoing -1 - into those files. But that very high number is implementation and - architecture dependent and not very descriptive. And while -1 can - be understood as an underflow into the highest possible value, -2 or - -10M etc. do not work, so it's not consistent. - - memory.low, memory.high, and memory.max will use the string "max" to - indicate and set the highest possible value. - -6. Planned Changes - -6-1. CAP for resource control - -Unified hierarchy will require one of the capabilities(7), which is -yet to be decided, for all resource control related knobs. Process -organization operations - creation of sub-cgroups and migration of -processes in sub-hierarchies may be delegated by changing the -ownership and/or permissions on the cgroup directory and -"cgroup.procs" interface file; however, all operations which affect -resource control - writes to a "cgroup.subtree_control" file or any -controller-specific knobs - will require an explicit CAP privilege. - -This, in part, is to prevent the cgroup interface from being -inadvertently promoted to programmable API used by non-privileged -binaries. cgroup exposes various aspects of the system in ways which -aren't properly abstracted for direct consumption by regular programs. -This is an administration interface much closer to sysctl knobs than -system calls. Even the basic access model, being filesystem path -based, isn't suitable for direct consumption. There's no way to -access "my cgroup" in a race-free way or make multiple operations -atomic against migration to another cgroup. - -Another aspect is that, for better or for worse, the cgroup interface -goes through far less scrutiny than regular interfaces for -unprivileged userland. The upside is that cgroup is able to expose -useful features which may not be suitable for general consumption in a -reasonable time frame. It provides a relatively short path between -internal details and userland-visible interface. Of course, this -shortcut comes with high risk. We go through what we go through for -general kernel APIs for good reasons. It may end up leaking internal -details in a way which can exert significant pain by locking the -kernel into a contract that can't be maintained in a reasonable -manner. - -Also, due to the specific nature, cgroup and its controllers don't -tend to attract attention from a wide scope of developers. cgroup's -short history is already fraught with severely mis-designed -interfaces, unnecessary commitments to and exposing of internal -details, broken and dangerous implementations of various features. - -Keeping cgroup as an administration interface is both advantageous for -its role and imperative given its nature. Some of the cgroup features -may make sense for unprivileged access. If deemed justified, those -must be further abstracted and implemented as a different interface, -be it a system call or process-private filesystem, and survive through -the scrutiny that any interface for general consumption is required to -go through. - -Requiring CAP is not a complete solution but should serve as a -significant deterrent against spraying cgroup usages in non-privileged -programs. diff --git a/Documentation/cgroup.txt b/Documentation/cgroup.txt new file mode 100644 index 000000000000..31d1f7bf12a1 --- /dev/null +++ b/Documentation/cgroup.txt @@ -0,0 +1,1293 @@ + +Control Group v2 + +October, 2015 Tejun Heo + +This is the authoritative documentation on the design, interface and +conventions of cgroup v2. It describes all userland-visible aspects +of cgroup including core and specific controller behaviors. All +future changes must be reflected in this document. Documentation for +v1 is available under Documentation/cgroup-legacy/. + +CONTENTS + +1. Introduction + 1-1. Terminology + 1-2. What is cgroup? +2. Basic Operations + 2-1. Mounting + 2-2. Organizing Processes + 2-3. [Un]populated Notification + 2-4. Controlling Controllers + 2-4-1. Enabling and Disabling + 2-4-2. Top-down Constraint + 2-4-3. No Internal Process Constraint + 2-5. Delegation + 2-5-1. Model of Delegation + 2-5-2. Delegation Containment + 2-6. Guidelines + 2-6-1. Organize Once and Control + 2-6-2. Avoid Name Collisions +3. Resource Distribution Models + 3-1. Weights + 3-2. Limits + 3-3. Protections + 3-4. Allocations +4. Interface Files + 4-1. Format + 4-2. Conventions + 4-3. Core Interface Files +5. Controllers + 5-1. CPU + 5-1-1. CPU Interface Files + 5-2. Memory + 5-2-1. Memory Interface Files + 5-2-2. Usage Guidelines + 5-2-3. Memory Ownership + 5-3. IO + 5-3-1. IO Interface Files + 5-3-2. Writeback +P. Information on Kernel Programming + P-1. Filesystem Support for Writeback +D. Deprecated v1 Core Features +R. Issues with v1 and Rationales for v2 + R-1. Multiple Hierarchies + R-2. Thread Granularity + R-3. Competition Between Inner Nodes and Threads + R-4. Other Interface Issues + R-5. Controller Issues and Remedies + R-5-1. Memory + + +1. Introduction + +1-1. Terminology + +"cgroup" stands for "control group" and is never capitalized. The +singular form is used to designate the whole feature and also as a +qualifier as in "cgroup controllers". When explicitly referring to +multiple individual control groups, the plural form "cgroups" is used. + + +1-2. What is cgroup? + +cgroup is a mechanism to organize processes hierarchically and +distribute system resources along the hierarchy in a controlled and +configurable manner. + +cgroup is largely composed of two parts - the core and controllers. +cgroup core is primarily responsible for hierarchically organizing +processes. A cgroup controller is usually responsible for +distributing a specific type of system resource along the hierarchy +although there are utility controllers which serve purposes other than +resource distribution. + +cgroups form a tree structure and every process in the system belongs +to one and only one cgroup. All threads of a process belong to the +same cgroup. On creation, all processes are put in the cgroup that +the parent process belongs to at the time. A process can be migrated +to another cgroup. Migration of a process doesn't affect already +existing descendant processes. + +Following certain structural constraints, controllers may be enabled or +disabled selectively on a cgroup. All controller behaviors are +hierarchical - if a controller is enabled on a cgroup, it affects all +processes which belong to the cgroups consisting the inclusive +sub-hierarchy of the cgroup. When a controller is enabled on a nested +cgroup, it always restricts the resource distribution further. The +restrictions set closer to the root in the hierarchy can not be +overridden from further away. + + +2. Basic Operations + +2-1. Mounting + +Unlike v1, cgroup v2 has only single hierarchy. The cgroup v2 +hierarchy can be mounted with the following mount command. + + # mount -t cgroup2 none $MOUNT_POINT + +cgroup2 filesystem has the magic number 0x63677270 ("cgrp"). All +controllers which support v2 and are not bound to a v1 hierarchy are +automatically bound to the v2 hierarchy and show up at the root. +Controllers which are not in active use in the v2 hierarchy can be +bound to other hierarchies. This allows mixing v2 hierarchy with the +legacy v1 multiple hierarchies in a fully backward compatible way. + +A controller can be moved across hierarchies only after the controller +is no longer referenced in its current hierarchy. Because per-cgroup +controller states are destroyed asynchronously and controllers may +have lingering references, a controller may not show up immediately on +the v2 hierarchy after the final umount of the previous hierarchy. +Similarly, a controller should be fully disabled to be moved out of +the unified hierarchy and it may take some time for the disabled +controller to become available for other hierarchies; furthermore, due +to inter-controller dependencies, other controllers may need to be +disabled too. + +While useful for development and manual configurations, moving +controllers dynamically between the v2 and other hierarchies is +strongly discouraged for production use. It is recommended to decide +the hierarchies and controller associations before starting using the +controllers after system boot. + + +2-2. Organizing Processes + +Initially, only the root cgroup exists to which all processes belong. +A child cgroup can be created by creating a sub-directory. + + # mkdir $CGROUP_NAME + +A given cgroup may have multiple child cgroups forming a tree +structure. Each cgroup has a read-writable interface file +"cgroup.procs". When read, it lists the PIDs of all processes which +belong to the cgroup one-per-line. The PIDs are not ordered and the +same PID may show up more than once if the process got moved to +another cgroup and then back or the PID got recycled while reading. + +A process can be migrated into a cgroup by writing its PID to the +target cgroup's "cgroup.procs" file. Only one process can be migrated +on a single write(2) call. If a process is composed of multiple +threads, writing the PID of any thread migrates all threads of the +process. + +When a process forks a child process, the new process is born into the +cgroup that the forking process belongs to at the time of the +operation. After exit, a process stays associated with the cgroup +that it belonged to at the time of exit until it's reaped; however, a +zombie process does not appear in "cgroup.procs" and thus can't be +moved to another cgroup. + +A cgroup which doesn't have any children or live processes can be +destroyed by removing the directory. Note that a cgroup which doesn't +have any children and is associated only with zombie processes is +considered empty and can be removed. + + # rmdir $CGROUP_NAME + +"/proc/$PID/cgroup" lists a process's cgroup membership. If legacy +cgroup is in use in the system, this file may contain multiple lines, +one for each hierarchy. The entry for cgroup v2 is always in the +format "0::$PATH". + + # cat /proc/842/cgroup + ... + 0::/test-cgroup/test-cgroup-nested + +If the process becomes a zombie and the cgroup it was associated with +is removed subsequently, " (deleted)" is appended to the path. + + # cat /proc/842/cgroup + ... + 0::/test-cgroup/test-cgroup-nested (deleted) + + +2-3. [Un]populated Notification + +Each non-root cgroup has a "cgroup.events" file which contains +"populated" field indicating whether the cgroup's sub-hierarchy has +live processes in it. Its value is 0 if there is no live process in +the cgroup and its descendants; otherwise, 1. poll and [id]notify +events are triggered when the value changes. This can be used, for +example, to start a clean-up operation after all processes of a given +sub-hierarchy have exited. The populated state updates and +notifications are recursive. Consider the following sub-hierarchy +where the numbers in the parentheses represent the numbers of processes +in each cgroup. + + A(4) - B(0) - C(1) + \ D(0) + +A, B and C's "populated" fields would be 1 while D's 0. After the one +process in C exits, B and C's "populated" fields would flip to "0" and +file modified events will be generated on the "cgroup.events" files of +both cgroups. + + +2-4. Controlling Controllers + +2-4-1. Enabling and Disabling + +Each cgroup has a "cgroup.controllers" file which lists all +controllers available for the cgroup to enable. + + # cat cgroup.controllers + cpu io memory + +No controller is enabled by default. Controllers can be enabled and +disabled by writing to the "cgroup.subtree_control" file. + + # echo "+cpu +memory -io" > cgroup.subtree_control + +Only controllers which are listed in "cgroup.controllers" can be +enabled. When multiple operations are specified as above, either they +all succeed or fail. If multiple operations on the same controller +are specified, the last one is effective. + +Enabling a controller in a cgroup indicates that the distribution of +the target resource across its immediate children will be controlled. +Consider the following sub-hierarchy. The enabled controllers are +listed in parentheses. + + A(cpu,memory) - B(memory) - C() + \ D() + +As A has "cpu" and "memory" enabled, A will control the distribution +of CPU cycles and memory to its children, in this case, B. As B has +"memory" enabled but not "CPU", C and D will compete freely on CPU +cycles but their division of memory available to B will be controlled. + +As a controller regulates the distribution of the target resource to +the cgroup's children, enabling it creates the controller's interface +files in the child cgroups. In the above example, enabling "cpu" on B +would create the "cpu." prefixed controller interface files in C and +D. Likewise, disabling "memory" from B would remove the "memory." +prefixed controller interface files from C and D. This means that the +controller interface files - anything which doesn't start with +"cgroup." are owned by the parent rather than the cgroup itself. + + +2-4-2. Top-down Constraint + +Resources are distributed top-down and a cgroup can further distribute +a resource only if the resource has been distributed to it from the +parent. This means that all non-root "cgroup.subtree_control" files +can only contain controllers which are enabled in the parent's +"cgroup.subtree_control" file. A controller can be enabled only if +the parent has the controller enabled and a controller can't be +disabled if one or more children have it enabled. + + +2-4-3. No Internal Process Constraint + +Non-root cgroups can only distribute resources to their children when +they don't have any processes of their own. In other words, only +cgroups which don't contain any processes can have controllers enabled +in their "cgroup.subtree_control" files. + +This guarantees that, when a controller is looking at the part of the +hierarchy which has it enabled, processes are always only on the +leaves. This rules out situations where child cgroups compete against +internal processes of the parent. + +The root cgroup is exempt from this restriction. Root contains +processes and anonymous resource consumption which can't be associated +with any other cgroups and requires special treatment from most +controllers. How resource consumption in the root cgroup is governed +is up to each controller. + +Note that the restriction doesn't get in the way if there is no +enabled controller in the cgroup's "cgroup.subtree_control". This is +important as otherwise it wouldn't be possible to create children of a +populated cgroup. To control resource distribution of a cgroup, the +cgroup must create children and transfer all its processes to the +children before enabling controllers in its "cgroup.subtree_control" +file. + + +2-5. Delegation + +2-5-1. Model of Delegation + +A cgroup can be delegated to a less privileged user by granting write +access of the directory and its "cgroup.procs" file to the user. Note +that resource control interface files in a given directory control the +distribution of the parent's resources and thus must not be delegated +along with the directory. + +Once delegated, the user can build sub-hierarchy under the directory, +organize processes as it sees fit and further distribute the resources +it received from the parent. The limits and other settings of all +resource controllers are hierarchical and regardless of what happens +in the delegated sub-hierarchy, nothing can escape the resource +restrictions imposed by the parent. + +Currently, cgroup doesn't impose any restrictions on the number of +cgroups in or nesting depth of a delegated sub-hierarchy; however, +this may be limited explicitly in the future. + + +2-5-2. Delegation Containment + +A delegated sub-hierarchy is contained in the sense that processes +can't be moved into or out of the sub-hierarchy by the delegatee. For +a process with a non-root euid to migrate a target process into a +cgroup by writing its PID to the "cgroup.procs" file, the following +conditions must be met. + +- The writer's euid must match either uid or suid of the target process. + +- The writer must have write access to the "cgroup.procs" file. + +- The writer must have write access to the "cgroup.procs" file of the + common ancestor of the source and destination cgroups. + +The above three constraints ensure that while a delegatee may migrate +processes around freely in the delegated sub-hierarchy it can't pull +in from or push out to outside the sub-hierarchy. + +For an example, let's assume cgroups C0 and C1 have been delegated to +user U0 who created C00, C01 under C0 and C10 under C1 as follows and +all processes under C0 and C1 belong to U0. + + ~~~~~~~~~~~~~ - C0 - C00 + ~ cgroup ~ \ C01 + ~ hierarchy ~ + ~~~~~~~~~~~~~ - C1 - C10 + +Let's also say U0 wants to write the PID of a process which is +currently in C10 into "C00/cgroup.procs". U0 has write access to the +file and uid match on the process; however, the common ancestor of the +source cgroup C10 and the destination cgroup C00 is above the points +of delegation and U0 would not have write access to its "cgroup.procs" +files and thus the write will be denied with -EACCES. + + +2-6. Guidelines + +2-6-1. Organize Once and Control + +Migrating a process across cgroups is a relatively expensive operation +and stateful resources such as memory are not moved together with the +process. This is an explicit design decision as there often exist +inherent trade-offs between migration and various hot paths in terms +of synchronization cost. + +As such, migrating processes across cgroups frequently as a means to +apply different resource restrictions is discouraged. A workload +should be assigned to a cgroup according to the system's logical and +resource structure once on start-up. Dynamic adjustments to resource +distribution can be made by changing controller configuration through +the interface files. + + +2-6-2. Avoid Name Collisions + +Interface files for a cgroup and its children cgroups occupy the same +directory and it is possible to create children cgroups which collide +with interface files. + +All cgroup core interface files are prefixed with "cgroup." and each +controller's interface files are prefixed with the controller name and +a dot. A controller's name is composed of lower case alphabets and +'_'s but never begins with an '_' so it can be used as the prefix +character for collision avoidance. Also, interface file names won't +start or end with terms which are often used in categorizing workloads +such as job, service, slice, unit or workload. + +cgroup doesn't do anything to prevent name collisions and it's the +user's responsibility to avoid them. + + +3. Resource Distribution Models + +cgroup controllers implement several resource distribution schemes +depending on the resource type and expected use cases. This section +describes major schemes in use along with their expected behaviors. + + +3-1. Weights + +A parent's resource is distributed by adding up the weights of all +active children and giving each the fraction matching the ratio of its +weight against the sum. As only children which can make use of the +resource at the moment participate in the distribution, this is +work-conserving. Due to the dynamic nature, this model is usually +used for stateless resources. + +All weights are in the range [1, 10000] with the default at 100. This +allows symmetric multiplicative biases in both directions at fine +enough granularity while staying in the intuitive range. + +As long as the weight is in range, all configuration combinations are +valid and there is no reason to reject configuration changes or +process migrations. + +"cpu.weight" proportionally distributes CPU cycles to active children +and is an example of this type. + + +3-2. Limits + +A child can only consume upto the configured amount of the resource. +Limits can be over-committed - the sum of the limits of children can +exceed the amount of resource available to the parent. + +Limits are in the range [0, max] and defaults to "max", which is noop. + +As limits can be over-committed, all configuration combinations are +valid and there is no reason to reject configuration changes or +process migrations. + +"io.max" limits the maximum BPS and/or IOPS that a cgroup can consume +on an IO device and is an example of this type. + + +3-3. Protections + +A cgroup is protected to be allocated upto the configured amount of +the resource if the usages of all its ancestors are under their +protected levels. Protections can be hard guarantees or best effort +soft boundaries. Protections can also be over-committed in which case +only upto the amount available to the parent is protected among +children. + +Protections are in the range [0, max] and defaults to 0, which is +noop. + +As protections can be over-committed, all configuration combinations +are valid and there is no reason to reject configuration changes or +process migrations. + +"memory.low" implements best-effort memory protection and is an +example of this type. + + +3-4. Allocations + +A cgroup is exclusively allocated a certain amount of a finite +resource. Allocations can't be over-committed - the sum of the +allocations of children can not exceed the amount of resource +available to the parent. + +Allocations are in the range [0, max] and defaults to 0, which is no +resource. + +As allocations can't be over-committed, some configuration +combinations are invalid and should be rejected. Also, if the +resource is mandatory for execution of processes, process migrations +may be rejected. + +"cpu.rt.max" hard-allocates realtime slices and is an example of this +type. + + +4. Interface Files + +4-1. Format + +All interface files should be in one of the following formats whenever +possible. + + New-line separated values + (when only one value can be written at once) + + VAL0\n + VAL1\n + ... + + Space separated values + (when read-only or multiple values can be written at once) + + VAL0 VAL1 ...\n + + Flat keyed + + KEY0 VAL0\n + KEY1 VAL1\n + ... + + Nested keyed + + KEY0 SUB_KEY0=VAL00 SUB_KEY1=VAL01... + KEY1 SUB_KEY0=VAL10 SUB_KEY1=VAL11... + ... + +For a writable file, the format for writing should generally match +reading; however, controllers may allow omitting later fields or +implement restricted shortcuts for most common use cases. + +For both flat and nested keyed files, only the values for a single key +can be written at a time. For nested keyed files, the sub key pairs +may be specified in any order and not all pairs have to be specified. + + +4-2. Conventions + +- Settings for a single feature should be contained in a single file. + +- The root cgroup should be exempt from resource control and thus + shouldn't have resource control interface files. Also, + informational files on the root cgroup which end up showing global + information available elsewhere shouldn't exist. + +- If a controller implements weight based resource distribution, its + interface file should be named "weight" and have the range [1, + 10000] with 100 as the default. The values are chosen to allow + enough and symmetric bias in both directions while keeping it + intuitive (the default is 100%). + +- If a controller implements an absolute resource guarantee and/or + limit, the interface files should be named "min" and "max" + respectively. If a controller implements best effort resource + guarantee and/or limit, the interface files should be named "low" + and "high" respectively. + + In the above four control files, the special token "max" should be + used to represent upward infinity for both reading and writing. + +- If a setting has a configurable default value and keyed specific + overrides, the default entry should be keyed with "default" and + appear as the first entry in the file. + + The default value can be updated by writing either "default $VAL" or + "$VAL". + + When writing to update a specific override, "default" can be used as + the value to indicate removal of the override. Override entries + with "default" as the value must not appear when read. + + For example, a setting which is keyed by major:minor device numbers + with integer values may look like the following. + + # cat cgroup-example-interface-file + default 150 + 8:0 300 + + The default value can be updated by + + # echo 125 > cgroup-example-interface-file + + or + + # echo "default 125" > cgroup-example-interface-file + + An override can be set by + + # echo "8:16 170" > cgroup-example-interface-file + + and cleared by + + # echo "8:0 default" > cgroup-example-interface-file + # cat cgroup-example-interface-file + default 125 + 8:16 170 + +- For events which are not very high frequency, an interface file + "events" should be created which lists event key value pairs. + Whenever a notifiable event happens, file modified event should be + generated on the file. + + +4-3. Core Interface Files + +All cgroup core files are prefixed with "cgroup." + + cgroup.procs + + A read-write new-line separated values file which exists on + all cgroups. + + When read, it lists the PIDs of all processes which belong to + the cgroup one-per-line. The PIDs are not ordered and the + same PID may show up more than once if the process got moved + to another cgroup and then back or the PID got recycled while + reading. + + A PID can be written to migrate the process associated with + the PID to the cgroup. The writer should match all of the + following conditions. + + - Its euid is either root or must match either uid or suid of + the target process. + + - It must have write access to the "cgroup.procs" file. + + - It must have write access to the "cgroup.procs" file of the + common ancestor of the source and destination cgroups. + + When delegating a sub-hierarchy, write access to this file + should be granted along with the containing directory. + + cgroup.controllers + + A read-only space separated values file which exists on all + cgroups. + + It shows space separated list of all controllers available to + the cgroup. The controllers are not ordered. + + cgroup.subtree_control + + A read-write space separated values file which exists on all + cgroups. Starts out empty. + + When read, it shows space separated list of the controllers + which are enabled to control resource distribution from the + cgroup to its children. + + Space separated list of controllers prefixed with '+' or '-' + can be written to enable or disable controllers. A controller + name prefixed with '+' enables the controller and '-' + disables. If a controller appears more than once on the list, + the last one is effective. When multiple enable and disable + operations are specified, either all succeed or all fail. + + cgroup.events + + A read-only flat-keyed file which exists on non-root cgroups. + The following entries are defined. Unless specified + otherwise, a value change in this file generates a file + modified event. + + populated + + 1 if the cgroup or its descendants contains any live + processes; otherwise, 0. + + +5. Controllers + +5-1. CPU + +[NOTE: The interface for the cpu controller hasn't been merged yet] + +The "cpu" controllers regulates distribution of CPU cycles. This +controller implements weight and absolute bandwidth limit models for +normal scheduling policy and absolute bandwidth allocation model for +realtime scheduling policy. + + +5-1-1. CPU Interface Files + +All time durations are in microseconds. + + cpu.stat + + A read-only flat-keyed file which exists on non-root cgroups. + + It reports the following six stats. + + usage_usec + user_usec + system_usec + nr_periods + nr_throttled + throttled_usec + + cpu.weight + + A read-write single value file which exists on non-root + cgroups. The default is "100". + + The weight in the range [1, 10000]. + + cpu.max + + A read-write two value file which exists on non-root cgroups. + The default is "max 100000". + + The maximum bandwidth limit. It's in the following format. + + $MAX $PERIOD + + which indicates that the group may consume upto $MAX in each + $PERIOD duration. "max" for $MAX indicates no limit. If only + one number is written, $MAX is updated. + + cpu.rt.max + + [NOTE: The semantics of this file is still under discussion and the + interface hasn't been merged yet] + + A read-write two value file which exists on all cgroups. + The default is "0 100000". + + The maximum realtime runtime allocation. Over-committing + configurations are disallowed and process migrations are + rejected if not enough bandwidth is available. It's in the + following format. + + $MAX $PERIOD + + which indicates that the group may consume upto $MAX in each + $PERIOD duration. If only one number is written, $MAX is + updated. + + +5-2. Memory + +The "memory" controller regulates distribution of memory. Memory is +stateful and implements both limit and protection models. Due to the +intertwining between memory usage and reclaim pressure and the +stateful nature of memory, the distribution model is relatively +complex. + +While not completely water-tight, all major memory usages by a given +cgroup are tracked so that the total memory consumption can be +accounted and controlled to a reasonable extent. Currently, the +following types of memory usages are tracked. + +- Userland memory - page cache and anonymous memory. + +- Kernel data structures such as dentries and inodes. + +- TCP socket buffers. + +The above list may expand in the future for better coverage. + + +5-2-1. Memory Interface Files + +All memory amounts are in bytes. If a value which is not aligned to +PAGE_SIZE is written, the value may be rounded up to the closest +PAGE_SIZE multiple when read back. + + memory.current + + A read-only single value file which exists on non-root + cgroups. + + The total amount of memory currently being used by the cgroup + and its descendants. + + memory.low + + A read-write single value file which exists on non-root + cgroups. The default is "0". + + Best-effort memory protection. If the memory usages of a + cgroup and all its ancestors are below their low boundaries, + the cgroup's memory won't be reclaimed unless memory can be + reclaimed from unprotected cgroups. + + Putting more memory than generally available under this + protection is discouraged. + + memory.high + + A read-write single value file which exists on non-root + cgroups. The default is "max". + + Memory usage throttle limit. This is the main mechanism to + control memory usage of a cgroup. If a cgroup's usage goes + over the high boundary, the processes of the cgroup are + throttled and put under heavy reclaim pressure. + + Going over the high limit never invokes the OOM killer and + under extreme conditions the limit may be breached. + + memory.max + + A read-write single value file which exists on non-root + cgroups. The default is "max". + + Memory usage hard limit. This is the final protection + mechanism. If a cgroup's memory usage reaches this limit and + can't be reduced, the OOM killer is invoked in the cgroup. + Under certain circumstances, the usage may go over the limit + temporarily. + + This is the ultimate protection mechanism. As long as the + high limit is used and monitored properly, this limit's + utility is limited to providing the final safety net. + + memory.events + + A read-only flat-keyed file which exists on non-root cgroups. + The following entries are defined. Unless specified + otherwise, a value change in this file generates a file + modified event. + + low + + The number of times the cgroup is reclaimed due to + high memory pressure even though its usage is under + the low boundary. This usually indicates that the low + boundary is over-committed. + + high + + The number of times processes of the cgroup are + throttled and routed to perform direct memory reclaim + because the high memory boundary was exceeded. For a + cgroup whose memory usage is capped by the high limit + rather than global memory pressure, this event's + occurrences are expected. + + max + + The number of times the cgroup's memory usage was + about to go over the max boundary. If direct reclaim + fails to bring it down, the OOM killer is invoked. + + oom + + The number of times the OOM killer has been invoked in + the cgroup. This may not exactly match the number of + processes killed but should generally be close. + + +5-2-2. General Usage + +"memory.high" is the main mechanism to control memory usage. +Over-committing on high limit (sum of high limits > available memory) +and letting global memory pressure to distribute memory according to +usage is a viable strategy. + +Because breach of the high limit doesn't trigger the OOM killer but +throttles the offending cgroup, a management agent has ample +opportunities to monitor and take appropriate actions such as granting +more memory or terminating the workload. + +Determining whether a cgroup has enough memory is not trivial as +memory usage doesn't indicate whether the workload can benefit from +more memory. For example, a workload which writes data received from +network to a file can use all available memory but can also operate as +performant with a small amount of memory. A measure of memory +pressure - how much the workload is being impacted due to lack of +memory - is necessary to determine whether a workload needs more +memory; unfortunately, memory pressure monitoring mechanism isn't +implemented yet. + + +5-2-3. Memory Ownership + +A memory area is charged to the cgroup which instantiated it and stays +charged to the cgroup until the area is released. Migrating a process +to a different cgroup doesn't move the memory usages that it +instantiated while in the previous cgroup to the new cgroup. + +A memory area may be used by processes belonging to different cgroups. +To which cgroup the area will be charged is in-deterministic; however, +over time, the memory area is likely to end up in a cgroup which has +enough memory allowance to avoid high reclaim pressure. + +If a cgroup sweeps a considerable amount of memory which is expected +to be accessed repeatedly by other cgroups, it may make sense to use +POSIX_FADV_DONTNEED to relinquish the ownership of memory areas +belonging to the affected files to ensure correct memory ownership. + + +5-3. IO + +The "io" controller regulates the distribution of IO resources. This +controller implements both weight based and absolute bandwidth or IOPS +limit distribution; however, weight based distribution is available +only if cfq-iosched is in use and neither scheme is available for +blk-mq devices. + + +5-3-1. IO Interface Files + + io.stat + + A read-only nested-keyed file which exists on non-root + cgroups. + + Lines are keyed by $MAJ:$MIN device numbers and not ordered. + The following nested keys are defined. + + rbytes Bytes read + wbytes Bytes written + rios Number of read IOs + wios Number of write IOs + + An example read output follows. + + 8:16 rbytes=1459200 wbytes=314773504 rios=192 wios=353 + 8:0 rbytes=90430464 wbytes=299008000 rios=8950 wios=1252 + + io.weight + + A read-write flat-keyed file which exists on non-root cgroups. + The default is "default 100". + + The first line is the default weight applied to devices + without specific override. The rest are overrides keyed by + $MAJ:$MIN device numbers and not ordered. The weights are in + the range [1, 10000] and specifies the relative amount IO time + the cgroup can use in relation to its siblings. + + The default weight can be updated by writing either "default + $WEIGHT" or simply "$WEIGHT". Overrides can be set by writing + "$MAJ:$MIN $WEIGHT" and unset by writing "$MAJ:$MIN default". + + An example read output follows. + + default 100 + 8:16 200 + 8:0 50 + + io.max + + A read-write nested-keyed file which exists on non-root + cgroups. + + BPS and IOPS based IO limit. Lines are keyed by $MAJ:$MIN + device numbers and not ordered. The following nested keys are + defined. + + rbps Max read bytes per second + wbps Max write bytes per second + riops Max read IO operations per second + wiops Max write IO operations per second + + When writing, any number of nested key-value pairs can be + specified in any order. "max" can be specified as the value + to remove a specific limit. If the same key is specified + multiple times, the outcome is undefined. + + BPS and IOPS are measured in each IO direction and IOs are + delayed if limit is reached. Temporary bursts are allowed. + + Setting read limit at 2M BPS and write at 120 IOPS for 8:16. + + echo "8:16 rbps=2097152 wiops=120" > io.max + + Reading returns the following. + + 8:16 rbps=2097152 wbps=max riops=max wiops=120 + + Write IOPS limit can be removed by writing the following. + + echo "8:16 wiops=max" > io.max + + Reading now returns the following. + + 8:16 rbps=2097152 wbps=max riops=max wiops=max + + +5-3-2. Writeback + +Page cache is dirtied through buffered writes and shared mmaps and +written asynchronously to the backing filesystem by the writeback +mechanism. Writeback sits between the memory and IO domains and +regulates the proportion of dirty memory by balancing dirtying and +write IOs. + +The io controller, in conjunction with the memory controller, +implements control of page cache writeback IOs. The memory controller +defines the memory domain that dirty memory ratio is calculated and +maintained for and the io controller defines the io domain which +writes out dirty pages for the memory domain. Both system-wide and +per-cgroup dirty memory states are examined and the more restrictive +of the two is enforced. + +cgroup writeback requires explicit support from the underlying +filesystem. Currently, cgroup writeback is implemented on ext2, ext4 +and btrfs. On other filesystems, all writeback IOs are attributed to +the root cgroup. + +There are inherent differences in memory and writeback management +which affects how cgroup ownership is tracked. Memory is tracked per +page while writeback per inode. For the purpose of writeback, an +inode is assigned to a cgroup and all IO requests to write dirty pages +from the inode are attributed to that cgroup. + +As cgroup ownership for memory is tracked per page, there can be pages +which are associated with different cgroups than the one the inode is +associated with. These are called foreign pages. The writeback +constantly keeps track of foreign pages and, if a particular foreign +cgroup becomes the majority over a certain period of time, switches +the ownership of the inode to that cgroup. + +While this model is enough for most use cases where a given inode is +mostly dirtied by a single cgroup even when the main writing cgroup +changes over time, use cases where multiple cgroups write to a single +inode simultaneously are not supported well. In such circumstances, a +significant portion of IOs are likely to be attributed incorrectly. +As memory controller assigns page ownership on the first use and +doesn't update it until the page is released, even if writeback +strictly follows page ownership, multiple cgroups dirtying overlapping +areas wouldn't work as expected. It's recommended to avoid such usage +patterns. + +The sysctl knobs which affect writeback behavior are applied to cgroup +writeback as follows. + + vm.dirty_background_ratio + vm.dirty_ratio + + These ratios apply the same to cgroup writeback with the + amount of available memory capped by limits imposed by the + memory controller and system-wide clean memory. + + vm.dirty_background_bytes + vm.dirty_bytes + + For cgroup writeback, this is calculated into ratio against + total available memory and applied the same way as + vm.dirty[_background]_ratio. + + +P. Information on Kernel Programming + +This section contains kernel programming information in the areas +where interacting with cgroup is necessary. cgroup core and +controllers are not covered. + + +P-1. Filesystem Support for Writeback + +A filesystem can support cgroup writeback by updating +address_space_operations->writepage[s]() to annotate bio's using the +following two functions. + + wbc_init_bio(@wbc, @bio) + + Should be called for each bio carrying writeback data and + associates the bio with the inode's owner cgroup. Can be + called anytime between bio allocation and submission. + + wbc_account_io(@wbc, @page, @bytes) + + Should be called for each data segment being written out. + While this function doesn't care exactly when it's called + during the writeback session, it's the easiest and most + natural to call it as data segments are added to a bio. + +With writeback bio's annotated, cgroup support can be enabled per +super_block by setting SB_I_CGROUPWB in ->s_iflags. This allows for +selective disabling of cgroup writeback support which is helpful when +certain filesystem features, e.g. journaled data mode, are +incompatible. + +wbc_init_bio() binds the specified bio to its cgroup. Depending on +the configuration, the bio may be executed at a lower priority and if +the writeback session is holding shared resources, e.g. a journal +entry, may lead to priority inversion. There is no one easy solution +for the problem. Filesystems can try to work around specific problem +cases by skipping wbc_init_bio() or using bio_associate_blkcg() +directly. + + +D. Deprecated v1 Core Features + +- Multiple hierarchies including named ones are not supported. + +- All mount options and remounting are not supported. + +- The "tasks" file is removed and "cgroup.procs" is not sorted. + +- "cgroup.clone_children" is removed. + +- /proc/cgroups is meaningless for v2. Use "cgroup.controllers" file + at the root instead. + + +R. Issues with v1 and Rationales for v2 + +R-1. Multiple Hierarchies + +cgroup v1 allowed an arbitrary number of hierarchies and each +hierarchy could host any number of controllers. While this seemed to +provide a high level of flexibility, it wasn't useful in practice. + +For example, as there is only one instance of each controller, utility +type controllers such as freezer which can be useful in all +hierarchies could only be used in one. The issue is exacerbated by +the fact that controllers couldn't be moved to another hierarchy once +hierarchies were populated. Another issue was that all controllers +bound to a hierarchy were forced to have exactly the same view of the +hierarchy. It wasn't possible to vary the granularity depending on +the specific controller. + +In practice, these issues heavily limited which controllers could be +put on the same hierarchy and most configurations resorted to putting +each controller on its own hierarchy. Only closely related ones, such +as the cpu and cpuacct controllers, made sense to be put on the same +hierarchy. This often meant that userland ended up managing multiple +similar hierarchies repeating the same steps on each hierarchy +whenever a hierarchy management operation was necessary. + +Furthermore, support for multiple hierarchies came at a steep cost. +It greatly complicated cgroup core implementation but more importantly +the support for multiple hierarchies restricted how cgroup could be +used in general and what controllers was able to do. + +There was no limit on how many hierarchies there might be, which meant +that a thread's cgroup membership couldn't be described in finite +length. The key might contain any number of entries and was unlimited +in length, which made it highly awkward to manipulate and led to +addition of controllers which existed only to identify membership, +which in turn exacerbated the original problem of proliferating number +of hierarchies. + +Also, as a controller couldn't have any expectation regarding the +topologies of hierarchies other controllers might be on, each +controller had to assume that all other controllers were attached to +completely orthogonal hierarchies. This made it impossible, or at +least very cumbersome, for controllers to cooperate with each other. + +In most use cases, putting controllers on hierarchies which are +completely orthogonal to each other isn't necessary. What usually is +called for is the ability to have differing levels of granularity +depending on the specific controller. In other words, hierarchy may +be collapsed from leaf towards root when viewed from specific +controllers. For example, a given configuration might not care about +how memory is distributed beyond a certain level while still wanting +to control how CPU cycles are distributed. + + +R-2. Thread Granularity + +cgroup v1 allowed threads of a process to belong to different cgroups. +This didn't make sense for some controllers and those controllers +ended up implementing different ways to ignore such situations but +much more importantly it blurred the line between API exposed to +individual applications and system management interface. + +Generally, in-process knowledge is available only to the process +itself; thus, unlike service-level organization of processes, +categorizing threads of a process requires active participation from +the application which owns the target process. + +cgroup v1 had an ambiguously defined delegation model which got abused +in combination with thread granularity. cgroups were delegated to +individual applications so that they can create and manage their own +sub-hierarchies and control resource distributions along them. This +effectively raised cgroup to the status of a syscall-like API exposed +to lay programs. + +First of all, cgroup has a fundamentally inadequate interface to be +exposed this way. For a process to access its own knobs, it has to +extract the path on the target hierarchy from /proc/self/cgroup, +construct the path by appending the name of the knob to the path, open +and then read and/or write to it. This is not only extremely clunky +and unusual but also inherently racy. There is no conventional way to +define transaction across the required steps and nothing can guarantee +that the process would actually be operating on its own sub-hierarchy. + +cgroup controllers implemented a number of knobs which would never be +accepted as public APIs because they were just adding control knobs to +system-management pseudo filesystem. cgroup ended up with interface +knobs which were not properly abstracted or refined and directly +revealed kernel internal details. These knobs got exposed to +individual applications through the ill-defined delegation mechanism +effectively abusing cgroup as a shortcut to implementing public APIs +without going through the required scrutiny. + +This was painful for both userland and kernel. Userland ended up with +misbehaving and poorly abstracted interfaces and kernel exposing and +locked into constructs inadvertently. + + +R-3. Competition Between Inner Nodes and Threads + +cgroup v1 allowed threads to be in any cgroups which created an +interesting problem where threads belonging to a parent cgroup and its +children cgroups competed for resources. This was nasty as two +different types of entities competed and there was no obvious way to +settle it. Different controllers did different things. + +The cpu controller considered threads and cgroups as equivalents and +mapped nice levels to cgroup weights. This worked for some cases but +fell flat when children wanted to be allocated specific ratios of CPU +cycles and the number of internal threads fluctuated - the ratios +constantly changed as the number of competing entities fluctuated. +There also were other issues. The mapping from nice level to weight +wasn't obvious or universal, and there were various other knobs which +simply weren't available for threads. + +The io controller implicitly created a hidden leaf node for each +cgroup to host the threads. The hidden leaf had its own copies of all +the knobs with "leaf_" prefixed. While this allowed equivalent +control over internal threads, it was with serious drawbacks. It +always added an extra layer of nesting which wouldn't be necessary +otherwise, made the interface messy and significantly complicated the +implementation. + +The memory controller didn't have a way to control what happened +between internal tasks and child cgroups and the behavior was not +clearly defined. There were attempts to add ad-hoc behaviors and +knobs to tailor the behavior to specific workloads which would have +led to problems extremely difficult to resolve in the long term. + +Multiple controllers struggled with internal tasks and came up with +different ways to deal with it; unfortunately, all the approaches were +severely flawed and, furthermore, the widely different behaviors +made cgroup as a whole highly inconsistent. + +This clearly is a problem which needs to be addressed from cgroup core +in a uniform way. + + +R-4. Other Interface Issues + +cgroup v1 grew without oversight and developed a large number of +idiosyncrasies and inconsistencies. One issue on the cgroup core side +was how an empty cgroup was notified - a userland helper binary was +forked and executed for each event. The event delivery wasn't +recursive or delegatable. The limitations of the mechanism also led +to in-kernel event delivery filtering mechanism further complicating +the interface. + +Controller interfaces were problematic too. An extreme example is +controllers completely ignoring hierarchical organization and treating +all cgroups as if they were all located directly under the root +cgroup. Some controllers exposed a large amount of inconsistent +implementation details to userland. + +There also was no consistency across controllers. When a new cgroup +was created, some controllers defaulted to not imposing extra +restrictions while others disallowed any resource usage until +explicitly configured. Configuration knobs for the same type of +control used widely differing naming schemes and formats. Statistics +and information knobs were named arbitrarily and used different +formats and units even in the same controller. + +cgroup v2 establishes common conventions where appropriate and updates +controllers so that they expose minimal and consistent interfaces. + + +R-5. Controller Issues and Remedies + +R-5-1. Memory + +The original lower boundary, the soft limit, is defined as a limit +that is per default unset. As a result, the set of cgroups that +global reclaim prefers is opt-in, rather than opt-out. The costs for +optimizing these mostly negative lookups are so high that the +implementation, despite its enormous size, does not even provide the +basic desirable behavior. First off, the soft limit has no +hierarchical meaning. All configured groups are organized in a global +rbtree and treated like equal peers, regardless where they are located +in the hierarchy. This makes subtree delegation impossible. Second, +the soft limit reclaim pass is so aggressive that it not just +introduces high allocation latencies into the system, but also impacts +system performance due to overreclaim, to the point where the feature +becomes self-defeating. + +The memory.low boundary on the other hand is a top-down allocated +reserve. A cgroup enjoys reclaim protection when it and all its +ancestors are below their low boundaries, which makes delegation of +subtrees possible. Secondly, new cgroups have no reserve per default +and in the common case most cgroups are eligible for the preferred +reclaim pass. This allows the new low boundary to be efficiently +implemented with just a minor addition to the generic reclaim code, +without the need for out-of-band data structures and reclaim passes. +Because the generic reclaim code considers all cgroups except for the +ones running low in the preferred first reclaim pass, overreclaim of +individual groups is eliminated as well, resulting in much better +overall workload performance. + +The original high boundary, the hard limit, is defined as a strict +limit that can not budge, even if the OOM killer has to be called. +But this generally goes against the goal of making the most out of the +available memory. The memory consumption of workloads varies during +runtime, and that requires users to overcommit. But doing that with a +strict upper limit requires either a fairly accurate prediction of the +working set size or adding slack to the limit. Since working set size +estimation is hard and error prone, and getting it wrong results in +OOM kills, most users tend to err on the side of a looser limit and +end up wasting precious resources. + +The memory.high boundary on the other hand can be set much more +conservatively. When hit, it throttles allocations by forcing them +into direct reclaim to work off the excess, but it never invokes the +OOM killer. As a result, a high boundary that is chosen too +aggressively will not terminate the processes, but instead it will +lead to gradual performance degradation. The user can monitor this +and make corrections until the minimal memory footprint that still +gives acceptable performance is found. + +In extreme cases, with many concurrent allocations and a complete +breakdown of reclaim progress within the group, the high boundary can +be exceeded. But even then it's mostly better to satisfy the +allocation from the slack available in other groups or the rest of the +system than killing the group. Otherwise, memory.max is there to +limit this type of spillover and ultimately contain buggy or even +malicious applications. From a40aba04f22a274b0f6e8f6b2e051704997208b4 Mon Sep 17 00:00:00 2001 From: Amit Pundir Date: Tue, 11 Aug 2015 12:34:45 +0530 Subject: [PATCH 07/61] usb: gadget: f_mtp: simplify ptp NULL pointer check Simplify MTP/PTP dev NULL pointer check introduced in Change-Id: Ic44a699d96df2e13467fc081bff88b97dcc5afb2 and restrict it to MTP/PTP function level only. Return ERR_PTR() instead of NULL from mtp_ptp function to skip doing NULL pointer checks all the way up to configfs.c Fixes: Change-Id: Ic44a699d96df2e13467fc081bff88b97dcc5afb2 ("usb: gadget: fix NULL ptr derefer while symlinking PTP func") Change-Id: Iab7c55089c115550c3506f6cca960a07ae52713d Signed-off-by: Amit Pundir --- drivers/usb/gadget/configfs.c | 5 ----- drivers/usb/gadget/function/f_mtp.c | 2 +- drivers/usb/gadget/functions.c | 2 +- 3 files changed, 2 insertions(+), 7 deletions(-) diff --git a/drivers/usb/gadget/configfs.c b/drivers/usb/gadget/configfs.c index 8df96cb3bb58..54849fe9cb01 100644 --- a/drivers/usb/gadget/configfs.c +++ b/drivers/usb/gadget/configfs.c @@ -425,11 +425,6 @@ static int config_usb_cfg_link( } f = usb_get_function(fi); - if (f == NULL) { - /* Are we trying to symlink PTP without MTP function? */ - ret = -EINVAL; /* Invalid Configuration */ - goto out; - } if (IS_ERR(f)) { ret = PTR_ERR(f); goto out; diff --git a/drivers/usb/gadget/function/f_mtp.c b/drivers/usb/gadget/function/f_mtp.c index 74195be9b054..b9e3e97c57c4 100644 --- a/drivers/usb/gadget/function/f_mtp.c +++ b/drivers/usb/gadget/function/f_mtp.c @@ -1498,7 +1498,7 @@ struct usb_function *function_alloc_mtp_ptp(struct usb_function_instance *fi, pr_err("\t2: Create MTP function\n"); pr_err("\t3: Create and symlink PTP function" " with a gadget configuration\n"); - return NULL; + return ERR_PTR(-EINVAL); /* Invalid Configuration */ } dev = fi_mtp->dev; diff --git a/drivers/usb/gadget/functions.c b/drivers/usb/gadget/functions.c index 389c1f3d0fee..b13f839e7368 100644 --- a/drivers/usb/gadget/functions.c +++ b/drivers/usb/gadget/functions.c @@ -58,7 +58,7 @@ struct usb_function *usb_get_function(struct usb_function_instance *fi) struct usb_function *f; f = fi->fd->alloc_func(fi); - if ((f == NULL) || IS_ERR(f)) + if (IS_ERR(f)) return f; f->fi = fi; return f; From d4a5b037e0af89c315c16aeee941799ae0ff9ced Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Tue, 15 Nov 2016 11:58:52 +0530 Subject: [PATCH 08/61] cpufreq: sched: Fix kernel crash on accessing sysfs file If the cpufreq driver hasn't set the CPUFREQ_HAVE_GOVERNOR_PER_POLICY flag, then the kernel will crash on accessing sysfs files for the sched governor. CPUFreq governors we can have the governor specific sysfs files in two places: A. /sys/devices/system/cpu/cpuX/cpufreq/ B. /sys/devices/system/cpu/cpufreq/ The case A. is for governor per policy case, where we can control the governor tunables for each policy separately. The case B. is for system wide tunable values. The schedfreq governor only implements the case A. and not B. The sysfs files in case B will still be present in /sys/devices/system/cpu/cpufreq/, but accessing them will crash kernel as the governor doesn't support that. Moreover the sched governor is pretty new and will be used only for the ARM platforms and there is no need to support the case B at all. Hence use policy->kobj instead of get_governor_parent_kobj(), so that we always create the sysfs files in path A. Signed-off-by: Viresh Kumar --- kernel/sched/cpufreq_sched.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/sched/cpufreq_sched.c b/kernel/sched/cpufreq_sched.c index f6f9b9b3a4a8..d751bc2d0d6e 100644 --- a/kernel/sched/cpufreq_sched.c +++ b/kernel/sched/cpufreq_sched.c @@ -289,7 +289,7 @@ static int cpufreq_sched_policy_init(struct cpufreq_policy *policy) pr_debug("%s: throttle threshold = %u [ns]\n", __func__, gd->up_throttle_nsec); - rc = sysfs_create_group(get_governor_parent_kobj(policy), get_sysfs_attr()); + rc = sysfs_create_group(&policy->kobj, get_sysfs_attr()); if (rc) { pr_err("%s: couldn't create sysfs attributes: %d\n", __func__, rc); goto err; @@ -332,7 +332,7 @@ static int cpufreq_sched_policy_exit(struct cpufreq_policy *policy) put_task_struct(gd->task); } - sysfs_remove_group(get_governor_parent_kobj(policy), get_sysfs_attr()); + sysfs_remove_group(&policy->kobj, get_sysfs_attr()); policy->governor_data = NULL; From 3ad8b04226b44c86496fc1f3645d5bd05c1eab24 Mon Sep 17 00:00:00 2001 From: Anson Jacob Date: Fri, 11 Nov 2016 01:10:04 -0500 Subject: [PATCH 09/61] ANDROID: usb: gadget: function: cleanup: Add blank line after declaration Fix warning generated by checkpatch.pl: Missing a blank line after declarations Change-Id: Id129bb8cc8fa37c67a647e2e5996bb2817020e65 Signed-off-by: Anson Jacob --- drivers/usb/gadget/function/f_accessory.c | 2 ++ drivers/usb/gadget/function/f_audio_source.c | 1 + drivers/usb/gadget/function/f_mtp.c | 4 ++++ 3 files changed, 7 insertions(+) diff --git a/drivers/usb/gadget/function/f_accessory.c b/drivers/usb/gadget/function/f_accessory.c index 2ca16a577542..9d3ec0e37475 100644 --- a/drivers/usb/gadget/function/f_accessory.c +++ b/drivers/usb/gadget/function/f_accessory.c @@ -211,6 +211,7 @@ static inline struct acc_dev *func_to_dev(struct usb_function *f) static struct usb_request *acc_request_new(struct usb_ep *ep, int buffer_size) { struct usb_request *req = usb_ep_alloc_request(ep, GFP_KERNEL); + if (!req) return NULL; @@ -1021,6 +1022,7 @@ acc_function_unbind(struct usb_configuration *c, struct usb_function *f) static void acc_start_work(struct work_struct *data) { char *envp[2] = { "ACCESSORY=START", NULL }; + kobject_uevent_env(&acc_device.this_device->kobj, KOBJ_CHANGE, envp); } diff --git a/drivers/usb/gadget/function/f_audio_source.c b/drivers/usb/gadget/function/f_audio_source.c index 2489a5fa2685..db7903d19c43 100644 --- a/drivers/usb/gadget/function/f_audio_source.c +++ b/drivers/usb/gadget/function/f_audio_source.c @@ -310,6 +310,7 @@ static struct device_attribute *audio_source_function_attributes[] = { static struct usb_request *audio_request_new(struct usb_ep *ep, int buffer_size) { struct usb_request *req = usb_ep_alloc_request(ep, GFP_KERNEL); + if (!req) return NULL; diff --git a/drivers/usb/gadget/function/f_mtp.c b/drivers/usb/gadget/function/f_mtp.c index b9e3e97c57c4..d8b69af6e335 100644 --- a/drivers/usb/gadget/function/f_mtp.c +++ b/drivers/usb/gadget/function/f_mtp.c @@ -361,6 +361,7 @@ static inline struct mtp_dev *func_to_mtp(struct usb_function *f) static struct usb_request *mtp_request_new(struct usb_ep *ep, int buffer_size) { struct usb_request *req = usb_ep_alloc_request(ep, GFP_KERNEL); + if (!req) return NULL; @@ -1121,6 +1122,7 @@ static int mtp_ctrlrequest(struct usb_composite_dev *cdev, } else if (ctrl->bRequest == MTP_REQ_GET_DEVICE_STATUS && w_index == 0 && w_value == 0) { struct mtp_device_status *status = cdev->req->buf; + status->wLength = __constant_cpu_to_le16(sizeof(*status)); @@ -1143,6 +1145,7 @@ static int mtp_ctrlrequest(struct usb_composite_dev *cdev, /* respond with data transfer or status phase? */ if (value >= 0) { int rc; + cdev->req->zero = value < w_length; cdev->req->length = value; rc = usb_ep_queue(cdev->gadget->ep0, cdev->req, GFP_ATOMIC); @@ -1378,6 +1381,7 @@ static struct mtp_instance *to_mtp_instance(struct config_item *item) static void mtp_attr_release(struct config_item *item) { struct mtp_instance *fi_mtp = to_mtp_instance(item); + usb_put_function_instance(&fi_mtp->func_inst); } From 8d8394ca6f6e60cf03a1a4b016a130df506beeb1 Mon Sep 17 00:00:00 2001 From: Greg Hackmann Date: Fri, 26 Feb 2016 19:00:03 +0000 Subject: [PATCH 10/61] BACKPORT: staging: goldfish: audio: add devicetree bindings Introduce devicetree bindings to the Goldfish staging audio driver. Signed-off-by: Greg Hackmann Signed-off-by: Jin Qian Signed-off-by: Alan Cox Signed-off-by: Greg Kroah-Hartman (cherry picked from commit 283ded10312a3b75e384313f6f529ec2c636cf2c) Change-Id: Ib75d3a4cac7353084a8da18a96fb298a759bacc0 --- .../devicetree/bindings/goldfish/audio.txt | 17 +++++++++++++++++ drivers/staging/goldfish/goldfish_audio.c | 9 ++++++++- 2 files changed, 25 insertions(+), 1 deletion(-) create mode 100644 Documentation/devicetree/bindings/goldfish/audio.txt diff --git a/Documentation/devicetree/bindings/goldfish/audio.txt b/Documentation/devicetree/bindings/goldfish/audio.txt new file mode 100644 index 000000000000..d043fda433ba --- /dev/null +++ b/Documentation/devicetree/bindings/goldfish/audio.txt @@ -0,0 +1,17 @@ +Android Goldfish Audio + +Android goldfish audio device generated by android emulator. + +Required properties: + +- compatible : should contain "google,goldfish-audio" to match emulator +- reg : +- interrupts : + +Example: + + goldfish_audio@9030000 { + compatible = "google,goldfish-audio"; + reg = <0x9030000 0x100>; + interrupts = <0x4>; + }; diff --git a/drivers/staging/goldfish/goldfish_audio.c b/drivers/staging/goldfish/goldfish_audio.c index b0927e49d0a8..8841680ced9f 100644 --- a/drivers/staging/goldfish/goldfish_audio.c +++ b/drivers/staging/goldfish/goldfish_audio.c @@ -344,11 +344,18 @@ static int goldfish_audio_remove(struct platform_device *pdev) return 0; } +static const struct of_device_id goldfish_audio_of_match[] = { + { .compatible = "google,goldfish-audio", }, + {}, +}; +MODULE_DEVICE_TABLE(of, goldfish_audio_of_match); + static struct platform_driver goldfish_audio_driver = { .probe = goldfish_audio_probe, .remove = goldfish_audio_remove, .driver = { - .name = "goldfish_audio" + .name = "goldfish_audio", + .of_match_table = goldfish_audio_of_match, } }; From 8e5837d271b381c2d3a5b4880ab31331d851f90c Mon Sep 17 00:00:00 2001 From: Greg Hackmann Date: Fri, 26 Feb 2016 18:45:30 +0000 Subject: [PATCH 11/61] BACKPORT: power: goldfish_battery: add devicetree bindings Add device tree bindings to the Goldfish virtual platform battery drivers. Signed-off-by: Greg Hackmann Signed-off-by: Jin Qian Signed-off-by: Alan Cox Signed-off-by: Sebastian Reichel (cherry picked from commit 65d687a7b7d6f27e4306fe8cc8a1ca66a1a760f6) Change-Id: If947ea3341ff0cb713c56e14d18d51a3f5912b64 --- .../devicetree/bindings/goldfish/battery.txt | 17 +++++++++++++++++ drivers/power/goldfish_battery.c | 9 ++++++++- 2 files changed, 25 insertions(+), 1 deletion(-) create mode 100644 Documentation/devicetree/bindings/goldfish/battery.txt diff --git a/Documentation/devicetree/bindings/goldfish/battery.txt b/Documentation/devicetree/bindings/goldfish/battery.txt new file mode 100644 index 000000000000..4fb613933214 --- /dev/null +++ b/Documentation/devicetree/bindings/goldfish/battery.txt @@ -0,0 +1,17 @@ +Android Goldfish Battery + +Android goldfish battery device generated by android emulator. + +Required properties: + +- compatible : should contain "google,goldfish-battery" to match emulator +- reg : +- interrupts : + +Example: + + goldfish_battery@9020000 { + compatible = "google,goldfish-battery"; + reg = <0x9020000 0x1000>; + interrupts = <0x3>; + }; diff --git a/drivers/power/goldfish_battery.c b/drivers/power/goldfish_battery.c index a50bb988c69a..7510796e190c 100644 --- a/drivers/power/goldfish_battery.c +++ b/drivers/power/goldfish_battery.c @@ -227,11 +227,18 @@ static int goldfish_battery_remove(struct platform_device *pdev) return 0; } +static const struct of_device_id goldfish_battery_of_match[] = { + { .compatible = "google,goldfish-battery", }, + {}, +}; +MODULE_DEVICE_TABLE(of, goldfish_battery_of_match); + static struct platform_driver goldfish_battery_device = { .probe = goldfish_battery_probe, .remove = goldfish_battery_remove, .driver = { - .name = "goldfish-battery" + .name = "goldfish-battery", + .of_match_table = goldfish_battery_of_match, } }; module_platform_driver(goldfish_battery_device); From 5f07edb06fc080f495924a8d03410dab207e2dbf Mon Sep 17 00:00:00 2001 From: Greg Hackmann Date: Fri, 26 Feb 2016 12:05:02 -0800 Subject: [PATCH 12/61] BACKPORT: Input: goldfish_events - add devicetree bindings Add device tree bindings to the Goldfish virtual platform event driver. Signed-off-by: Greg Hackmann Signed-off-by: Jin Qian Signed-off-by: Alan Signed-off-by: Dmitry Torokhov (cherry picked from commit 8c5dc5a1ada2b79259e55a4bd150135d23529c6a) Change-Id: I677d8e0d92294f53f7cc5a79300b6462b65e8aad --- .../devicetree/bindings/goldfish/events.txt | 17 +++++++++++++++++ drivers/input/keyboard/goldfish_events.c | 7 +++++++ 2 files changed, 24 insertions(+) create mode 100644 Documentation/devicetree/bindings/goldfish/events.txt diff --git a/Documentation/devicetree/bindings/goldfish/events.txt b/Documentation/devicetree/bindings/goldfish/events.txt new file mode 100644 index 000000000000..5babf46317a4 --- /dev/null +++ b/Documentation/devicetree/bindings/goldfish/events.txt @@ -0,0 +1,17 @@ +Android Goldfish Events Keypad + +Android goldfish events keypad device generated by android emulator. + +Required properties: + +- compatible : should contain "google,goldfish-events-keypad" to match emulator +- reg : +- interrupts : + +Example: + + goldfish-events@9040000 { + compatible = "google,goldfish-events-keypad"; + reg = <0x9040000 0x1000>; + interrupts = <0x5>; + }; diff --git a/drivers/input/keyboard/goldfish_events.c b/drivers/input/keyboard/goldfish_events.c index 907e4e278fce..b11d218604a7 100644 --- a/drivers/input/keyboard/goldfish_events.c +++ b/drivers/input/keyboard/goldfish_events.c @@ -178,10 +178,17 @@ static int events_probe(struct platform_device *pdev) return 0; } +static const struct of_device_id goldfish_events_of_match[] = { + { .compatible = "google,goldfish-events-keypad", }, + {}, +}; +MODULE_DEVICE_TABLE(of, goldfish_events_of_match); + static struct platform_driver events_driver = { .probe = events_probe, .driver = { .name = "goldfish_events", + .of_match_table = goldfish_events_of_match, }, }; From 44a4608e6a9848af3e641ea5cb63b5353eda22e2 Mon Sep 17 00:00:00 2001 From: Greg Hackmann Date: Fri, 26 Feb 2016 19:01:05 +0000 Subject: [PATCH 13/61] BACKPORT: tty: goldfish: support platform_device with id -1 When the platform bus sets the platform_device id to -1 (PLATFORM_DEVID_NONE), use an incrementing counter for the TTY index instead Signed-off-by: Greg Hackmann Signed-off-by: Jin Qian Signed-off-by: Alan Cox Signed-off-by: Greg Kroah-Hartman (cherry picked from commit 465893e18878e119d8d0255439fad8debbd646fd) Change-Id: Ifec5ee9d71c7c076e59bb7af77c0184d1b1383cb --- drivers/tty/goldfish.c | 30 ++++++++++++++++++------------ 1 file changed, 18 insertions(+), 12 deletions(-) diff --git a/drivers/tty/goldfish.c b/drivers/tty/goldfish.c index 0f82c0b146f6..4356a23c588f 100644 --- a/drivers/tty/goldfish.c +++ b/drivers/tty/goldfish.c @@ -68,8 +68,7 @@ static void goldfish_tty_do_write(int line, const char *buf, unsigned count) static irqreturn_t goldfish_tty_interrupt(int irq, void *dev_id) { - struct platform_device *pdev = dev_id; - struct goldfish_tty *qtty = &goldfish_ttys[pdev->id]; + struct goldfish_tty *qtty = dev_id; void __iomem *base = qtty->base; unsigned long irq_flags; unsigned char *buf; @@ -233,6 +232,7 @@ static int goldfish_tty_probe(struct platform_device *pdev) struct device *ttydev; void __iomem *base; u32 irq; + unsigned int line; r = platform_get_resource(pdev, IORESOURCE_MEM, 0); if (r == NULL) @@ -248,10 +248,16 @@ static int goldfish_tty_probe(struct platform_device *pdev) irq = r->start; - if (pdev->id >= goldfish_tty_line_count) - goto err_unmap; - mutex_lock(&goldfish_tty_lock); + + if (pdev->id == PLATFORM_DEVID_NONE) + line = goldfish_tty_current_line_count; + else + line = pdev->id; + + if (line >= goldfish_tty_line_count) + goto err_create_driver_failed; + if (goldfish_tty_current_line_count == 0) { ret = goldfish_tty_create_driver(); if (ret) @@ -259,7 +265,7 @@ static int goldfish_tty_probe(struct platform_device *pdev) } goldfish_tty_current_line_count++; - qtty = &goldfish_ttys[pdev->id]; + qtty = &goldfish_ttys[line]; spin_lock_init(&qtty->lock); tty_port_init(&qtty->port); qtty->port.ops = &goldfish_port_ops; @@ -269,13 +275,13 @@ static int goldfish_tty_probe(struct platform_device *pdev) writel(GOLDFISH_TTY_CMD_INT_DISABLE, base + GOLDFISH_TTY_CMD); ret = request_irq(irq, goldfish_tty_interrupt, IRQF_SHARED, - "goldfish_tty", pdev); + "goldfish_tty", qtty); if (ret) goto err_request_irq_failed; ttydev = tty_port_register_device(&qtty->port, goldfish_tty_driver, - pdev->id, &pdev->dev); + line, &pdev->dev); if (IS_ERR(ttydev)) { ret = PTR_ERR(ttydev); goto err_tty_register_device_failed; @@ -286,8 +292,9 @@ static int goldfish_tty_probe(struct platform_device *pdev) qtty->console.device = goldfish_tty_console_device; qtty->console.setup = goldfish_tty_console_setup; qtty->console.flags = CON_PRINTBUFFER; - qtty->console.index = pdev->id; + qtty->console.index = line; register_console(&qtty->console); + platform_set_drvdata(pdev, qtty); mutex_unlock(&goldfish_tty_lock); return 0; @@ -307,13 +314,12 @@ static int goldfish_tty_probe(struct platform_device *pdev) static int goldfish_tty_remove(struct platform_device *pdev) { - struct goldfish_tty *qtty; + struct goldfish_tty *qtty = platform_get_drvdata(pdev); mutex_lock(&goldfish_tty_lock); - qtty = &goldfish_ttys[pdev->id]; unregister_console(&qtty->console); - tty_unregister_device(goldfish_tty_driver, pdev->id); + tty_unregister_device(goldfish_tty_driver, qtty->console.index); iounmap(qtty->base); qtty->base = NULL; free_irq(qtty->irq, pdev); From 7912004ee2521cc393583958fee86cbd1f82c589 Mon Sep 17 00:00:00 2001 From: Miodrag Dinic Date: Fri, 26 Feb 2016 19:00:44 +0000 Subject: [PATCH 14/61] BACKPORT: drivers: tty: goldfish: Add device tree bindings Enable support for registering this device using the device tree. Device tree node example for registering Goldfish TTY device : goldfish_tty@1f004000 { interrupts = <0xc>; reg = <0x1f004000 0x1000>; compatible = "google,goldfish-tty"; }; Signed-off-by: Miodrag Dinic Signed-off-by: Jin Qian Signed-off-by: Alan Signed-off-by: Greg Kroah-Hartman (cherry picked from commit 9b883eea26ccf043b608e398cf6a26231d44f5fb) Change-Id: Idbe1bbac4f371e2feb6730712b08b66be1188ea7 --- .../devicetree/bindings/goldfish/tty.txt | 17 +++++++++++++++++ drivers/tty/goldfish.c | 10 +++++++++- 2 files changed, 26 insertions(+), 1 deletion(-) create mode 100644 Documentation/devicetree/bindings/goldfish/tty.txt diff --git a/Documentation/devicetree/bindings/goldfish/tty.txt b/Documentation/devicetree/bindings/goldfish/tty.txt new file mode 100644 index 000000000000..82648278da77 --- /dev/null +++ b/Documentation/devicetree/bindings/goldfish/tty.txt @@ -0,0 +1,17 @@ +Android Goldfish TTY + +Android goldfish tty device generated by android emulator. + +Required properties: + +- compatible : should contain "google,goldfish-tty" to match emulator +- reg : +- interrupts : + +Example: + + goldfish_tty@1f004000 { + compatible = "google,goldfish-tty"; + reg = <0x1f004000 0x1000>; + interrupts = <0xc>; + }; diff --git a/drivers/tty/goldfish.c b/drivers/tty/goldfish.c index 4356a23c588f..1e332855b933 100644 --- a/drivers/tty/goldfish.c +++ b/drivers/tty/goldfish.c @@ -330,11 +330,19 @@ static int goldfish_tty_remove(struct platform_device *pdev) return 0; } +static const struct of_device_id goldfish_tty_of_match[] = { + { .compatible = "google,goldfish-tty", }, + {}, +}; + +MODULE_DEVICE_TABLE(of, goldfish_tty_of_match); + static struct platform_driver goldfish_tty_platform_driver = { .probe = goldfish_tty_probe, .remove = goldfish_tty_remove, .driver = { - .name = "goldfish_tty" + .name = "goldfish_tty", + .of_match_table = goldfish_tty_of_match, } }; From fe0c9ae17d7b4a71b920709c41a93c72cec38449 Mon Sep 17 00:00:00 2001 From: Yu Ning Date: Tue, 1 Mar 2016 23:46:10 +0000 Subject: [PATCH 15/61] BACKPORT: goldfish: Enable ACPI-based enumeration for goldfish battery Add the ACPI bindings to the goldfish battery driver. Signed-off-by: Yu Ning Signed-off-by: Jin Qian Signed-off-by: Alan Cox Signed-off-by: Sebastian Reichel (cherry picked from commit fdb2f37a54470473c6b7c9d680c4c114dd9bc434) Change-Id: I3b53481b5868b0b26848397420c9ba16a747819f --- drivers/power/goldfish_battery.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/power/goldfish_battery.c b/drivers/power/goldfish_battery.c index 7510796e190c..f5c525e4482a 100644 --- a/drivers/power/goldfish_battery.c +++ b/drivers/power/goldfish_battery.c @@ -24,6 +24,7 @@ #include #include #include +#include struct goldfish_battery_data { void __iomem *reg_base; @@ -233,12 +234,19 @@ static const struct of_device_id goldfish_battery_of_match[] = { }; MODULE_DEVICE_TABLE(of, goldfish_battery_of_match); +static const struct acpi_device_id goldfish_battery_acpi_match[] = { + { "GFSH0001", 0 }, + { }, +}; +MODULE_DEVICE_TABLE(acpi, goldfish_battery_acpi_match); + static struct platform_driver goldfish_battery_device = { .probe = goldfish_battery_probe, .remove = goldfish_battery_remove, .driver = { .name = "goldfish-battery", .of_match_table = goldfish_battery_of_match, + .acpi_match_table = ACPI_PTR(goldfish_battery_acpi_match), } }; module_platform_driver(goldfish_battery_device); From e9c20dcc91f4979024277432aea83418add7d716 Mon Sep 17 00:00:00 2001 From: Jason Hu Date: Fri, 26 Feb 2016 12:06:47 -0800 Subject: [PATCH 16/61] BACKPORT: Input: goldfish_events - enable ACPI-based enumeration for goldfish events Add ACPI binding to the goldfish events driver. Signed-off-by: Jason Hu Signed-off-by: Jin Qian Signed-off-by: Alan Signed-off-by: Dmitry Torokhov (cherry picked from commit 0581ce09fd2c976125a20791268d7206db156d2f) Change-Id: Ic3e4f1cffb111ea6c69977e63dd598e3fcb55f19 --- drivers/input/keyboard/goldfish_events.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/input/keyboard/goldfish_events.c b/drivers/input/keyboard/goldfish_events.c index b11d218604a7..f6e643b589b6 100644 --- a/drivers/input/keyboard/goldfish_events.c +++ b/drivers/input/keyboard/goldfish_events.c @@ -22,6 +22,7 @@ #include #include #include +#include enum { REG_READ = 0x00, @@ -184,11 +185,20 @@ static const struct of_device_id goldfish_events_of_match[] = { }; MODULE_DEVICE_TABLE(of, goldfish_events_of_match); +#ifdef CONFIG_ACPI +static const struct acpi_device_id goldfish_events_acpi_match[] = { + { "GFSH0002", 0 }, + { }, +}; +MODULE_DEVICE_TABLE(acpi, goldfish_events_acpi_match); +#endif + static struct platform_driver events_driver = { .probe = events_probe, .driver = { .name = "goldfish_events", .of_match_table = goldfish_events_of_match, + .acpi_match_table = ACPI_PTR(goldfish_events_acpi_match), }, }; From d5bc0499075d7b989f91179fd64177ad36ea2b5b Mon Sep 17 00:00:00 2001 From: Greg Hackmann Date: Fri, 26 Feb 2016 19:00:18 +0000 Subject: [PATCH 17/61] BACKPORT: staging: goldfish: audio: fix compiliation on arm We do actually need slab.h, by luck we get it on other platforms but not always on ARM. Include it properly. Signed-off-by: Greg Hackmann Signed-off-by: Jin Qian Signed-off-by: Alan Signed-off-by: Greg Kroah-Hartman (cherry picked from commit 4532150762ceb0d6fd765ebcb3ba6966fbb8faab) Change-Id: I93a0c35da40f26aaa7c253e3c0cefaa883ea3391 --- drivers/staging/goldfish/goldfish_audio.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/staging/goldfish/goldfish_audio.c b/drivers/staging/goldfish/goldfish_audio.c index 8841680ced9f..4191054b878e 100644 --- a/drivers/staging/goldfish/goldfish_audio.c +++ b/drivers/staging/goldfish/goldfish_audio.c @@ -26,6 +26,7 @@ #include #include #include +#include #include MODULE_AUTHOR("Google, Inc."); From 65813792fe44bd0fcbccd5452c976d5daa4c2e2d Mon Sep 17 00:00:00 2001 From: Greg Hackmann Date: Mon, 28 Oct 2013 15:33:33 -0700 Subject: [PATCH 18/61] ANDROID: video: goldfishfb: add devicetree bindings Change-Id: I5f4ba861b981edf39af537001f8ac72202927031 Signed-off-by: Greg Hackmann --- drivers/video/fbdev/goldfishfb.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/video/fbdev/goldfishfb.c b/drivers/video/fbdev/goldfishfb.c index 7f6c9e6cfc6c..f0e651bbbd61 100644 --- a/drivers/video/fbdev/goldfishfb.c +++ b/drivers/video/fbdev/goldfishfb.c @@ -304,12 +304,19 @@ static int goldfish_fb_remove(struct platform_device *pdev) return 0; } +static const struct of_device_id goldfish_fb_of_match[] = { + { .compatible = "google,goldfish-fb", }, + {}, +}; +MODULE_DEVICE_TABLE(of, goldfish_fb_of_match); static struct platform_driver goldfish_fb_driver = { .probe = goldfish_fb_probe, .remove = goldfish_fb_remove, .driver = { - .name = "goldfish_fb" + .name = "goldfish_fb", + .owner = THIS_MODULE, + .of_match_table = goldfish_fb_of_match, } }; From cda03df20bb3de4544825193be42b2ca38b5742d Mon Sep 17 00:00:00 2001 From: Yu Ning Date: Thu, 12 Feb 2015 11:44:40 +0800 Subject: [PATCH 19/61] ANDROID: goldfish: Enable ACPI-based enumeration for goldfish framebuffer Follow the same way in which ACPI was enabled for goldfish battery. See commit d3be10e for details. Note that this patch also depends on commit af33cac. Change-Id: Ic63b6e7e0a4b9896ef9a9d0ed135a7796a4c1fdb Signed-off-by: Yu Ning --- drivers/video/fbdev/goldfishfb.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/video/fbdev/goldfishfb.c b/drivers/video/fbdev/goldfishfb.c index f0e651bbbd61..58b33e4af16e 100644 --- a/drivers/video/fbdev/goldfishfb.c +++ b/drivers/video/fbdev/goldfishfb.c @@ -26,6 +26,7 @@ #include #include #include +#include enum { FB_GET_WIDTH = 0x00, @@ -310,6 +311,12 @@ static const struct of_device_id goldfish_fb_of_match[] = { }; MODULE_DEVICE_TABLE(of, goldfish_fb_of_match); +static const struct acpi_device_id goldfish_fb_acpi_match[] = { + { "GFSH0004", 0 }, + { }, +}; +MODULE_DEVICE_TABLE(acpi, goldfish_fb_acpi_match); + static struct platform_driver goldfish_fb_driver = { .probe = goldfish_fb_probe, .remove = goldfish_fb_remove, @@ -317,6 +324,7 @@ static struct platform_driver goldfish_fb_driver = { .name = "goldfish_fb", .owner = THIS_MODULE, .of_match_table = goldfish_fb_of_match, + .acpi_match_table = ACPI_PTR(goldfish_fb_acpi_match), } }; From f20ff468b73fb64488e2313444aade46d32b256e Mon Sep 17 00:00:00 2001 From: Yu Ning Date: Tue, 31 Mar 2015 14:41:48 +0800 Subject: [PATCH 20/61] ANDROID: goldfish: Enable ACPI-based enumeration for goldfish audio Follow the same way in which ACPI was enabled for goldfish battery. See commit d3be10e for details. Change-Id: I6ffe38ebc80fb8af8322152370b9d1fd227eaf50 Signed-off-by: Yu Ning --- drivers/staging/goldfish/goldfish_audio.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/staging/goldfish/goldfish_audio.c b/drivers/staging/goldfish/goldfish_audio.c index 4191054b878e..83bbe459b93a 100644 --- a/drivers/staging/goldfish/goldfish_audio.c +++ b/drivers/staging/goldfish/goldfish_audio.c @@ -28,6 +28,7 @@ #include #include #include +#include MODULE_AUTHOR("Google, Inc."); MODULE_DESCRIPTION("Android QEMU Audio Driver"); @@ -351,12 +352,19 @@ static const struct of_device_id goldfish_audio_of_match[] = { }; MODULE_DEVICE_TABLE(of, goldfish_audio_of_match); +static const struct acpi_device_id goldfish_audio_acpi_match[] = { + { "GFSH0005", 0 }, + { }, +}; +MODULE_DEVICE_TABLE(acpi, goldfish_audio_acpi_match); + static struct platform_driver goldfish_audio_driver = { .probe = goldfish_audio_probe, .remove = goldfish_audio_remove, .driver = { .name = "goldfish_audio", .of_match_table = goldfish_audio_of_match, + .acpi_match_table = ACPI_PTR(goldfish_audio_acpi_match), } }; From 29aae1d26153764199bcc312c4bd5e27bf8def9c Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Thu, 19 Jun 2014 16:24:04 +0200 Subject: [PATCH 21/61] ANDROID: goldfish_fb: Set pixclock = 0 User space Android code identifies pixclock == 0 as a sign for emulation and will set the frame rate to 60 fps when reading this value, which is the desired outcome. Change-Id: I759bf518bf6683446bc786bf1be3cafa02dd8d42 Signed-off-by: Christoffer Dall Signed-off-by: Peter Maydell --- drivers/video/fbdev/goldfishfb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/video/fbdev/goldfishfb.c b/drivers/video/fbdev/goldfishfb.c index 58b33e4af16e..131fee0341c3 100644 --- a/drivers/video/fbdev/goldfishfb.c +++ b/drivers/video/fbdev/goldfishfb.c @@ -235,7 +235,7 @@ static int goldfish_fb_probe(struct platform_device *pdev) fb->fb.var.activate = FB_ACTIVATE_NOW; fb->fb.var.height = readl(fb->reg_base + FB_GET_PHYS_HEIGHT); fb->fb.var.width = readl(fb->reg_base + FB_GET_PHYS_WIDTH); - fb->fb.var.pixclock = 10000; + fb->fb.var.pixclock = 0; fb->fb.var.red.offset = 11; fb->fb.var.red.length = 5; From 47a14e2dd11bf7f8b386aac8ece9d047903c8c62 Mon Sep 17 00:00:00 2001 From: Lingfeng Yang Date: Fri, 18 Dec 2015 12:04:43 -0800 Subject: [PATCH 22/61] ANDROID: goldfish_events: no extra EV_SYN; register goldfish If we send SYN_REPORT on every single multitouch event, it breaks the multitouch. The multitouch becomes janky and having to click 2-3 times to do stuff (plus randomly activating notification bars when not clicking) If we suppress these SYN_REPORTS, multitouch will work fine, plus the events will have a protocol that looks nice. In addition, we need to register Goldfish Events as a multitouch device by issuing input_mt_init_slots, otherwise input_handle_abs_event in drivers/input/input.c will silently drop all ABS_MT_SLOT events, making it so that touches with more than 1 finger do not work properly. Signed-off-by: "Lingfeng Yang" Change-Id: Ib2350f7d1732449d246f6f0d9b7b08f02cc7c2dd (cherry picked from commit 6cf40d0a16330e1ef42bdf07d9aba6c16ee11fbc) --- drivers/input/keyboard/goldfish_events.c | 28 +++++++++++++++++++++++- 1 file changed, 27 insertions(+), 1 deletion(-) diff --git a/drivers/input/keyboard/goldfish_events.c b/drivers/input/keyboard/goldfish_events.c index f6e643b589b6..c877e56a9bd5 100644 --- a/drivers/input/keyboard/goldfish_events.c +++ b/drivers/input/keyboard/goldfish_events.c @@ -17,6 +17,7 @@ #include #include #include +#include #include #include #include @@ -24,6 +25,8 @@ #include #include +#define GOLDFISH_MAX_FINGERS 5 + enum { REG_READ = 0x00, REG_SET_PAGE = 0x00, @@ -52,7 +55,21 @@ static irqreturn_t events_interrupt(int irq, void *dev_id) value = __raw_readl(edev->addr + REG_READ); input_event(edev->input, type, code, value); - input_sync(edev->input); + // Send an extra (EV_SYN, SYN_REPORT, 0x0) event + // if a key was pressed. Some keyboard device + // drivers may only send the EV_KEY event and + // not EV_SYN. + // Note that sending an extra SYN_REPORT is not + // necessary nor correct protocol with other + // devices such as touchscreens, which will send + // their own SYN_REPORT's when sufficient event + // information has been collected (e.g., for + // touchscreens, when pressure and X/Y coordinates + // have been received). Hence, we will only send + // this extra SYN_REPORT if type == EV_KEY. + if (type == EV_KEY) { + input_sync(edev->input); + } return IRQ_HANDLED; } @@ -154,6 +171,15 @@ static int events_probe(struct platform_device *pdev) input_dev->name = edev->name; input_dev->id.bustype = BUS_HOST; + // Set the Goldfish Device to be multi-touch. + // In the Ranchu kernel, there is multi-touch-specific + // code for handling ABS_MT_SLOT events. + // See drivers/input/input.c:input_handle_abs_event. + // If we do not issue input_mt_init_slots, + // the kernel will filter out needed ABS_MT_SLOT + // events when we touch the screen in more than one place, + // preventing multi-touch with more than one finger from working. + input_mt_init_slots(input_dev, GOLDFISH_MAX_FINGERS, 0); events_import_bits(edev, input_dev->evbit, EV_SYN, EV_MAX); events_import_bits(edev, input_dev->keybit, EV_KEY, KEY_MAX); From dd760c864fea2b8c91b247b43ffe39a4a6e7740a Mon Sep 17 00:00:00 2001 From: Joshua Lang Date: Fri, 17 Jun 2016 17:30:55 -0700 Subject: [PATCH 23/61] ANDROID: goldfish_audio: Clear audio read buffer status after each read MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The buffer_status field is interrupt updated. After every read request, the buffer_status read field should be reset so that on the next loop iteration we don't read a stale value and read data before the device is ready. Signed-off-by: “Joshua Lang” Change-Id: I4943d5aaada1cad9c7e59a94a87c387578dabe86 --- drivers/staging/goldfish/goldfish_audio.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/staging/goldfish/goldfish_audio.c b/drivers/staging/goldfish/goldfish_audio.c index 83bbe459b93a..63b79c09b41b 100644 --- a/drivers/staging/goldfish/goldfish_audio.c +++ b/drivers/staging/goldfish/goldfish_audio.c @@ -117,6 +117,7 @@ static ssize_t goldfish_audio_read(struct file *fp, char __user *buf, size_t count, loff_t *pos) { struct goldfish_audio *data = fp->private_data; + unsigned long irq_flags; int length; int result = 0; @@ -130,6 +131,10 @@ static ssize_t goldfish_audio_read(struct file *fp, char __user *buf, wait_event_interruptible(data->wait, data->buffer_status & AUDIO_INT_READ_BUFFER_FULL); + spin_lock_irqsave(&data->lock, irq_flags); + data->buffer_status &= ~AUDIO_INT_READ_BUFFER_FULL; + spin_unlock_irqrestore(&data->lock, irq_flags); + length = AUDIO_READ(data, AUDIO_READ_BUFFER_AVAILABLE); /* copy data to user space */ From 3c2ce87867a0c580e72596022a6f380fcbb6f26c Mon Sep 17 00:00:00 2001 From: Jin Qian Date: Fri, 7 Oct 2016 16:20:47 -0700 Subject: [PATCH 24/61] ANDROID: goldfish: add ranchu defconfigs Change-Id: I73ef1b132b6203ae921a1e1d4850eaadf58f8926 --- arch/arm/configs/ranchu_defconfig | 315 +++++++++++++++++ arch/arm64/configs/ranchu_defconfig | 311 +++++++++++++++++ arch/x86/configs/i386_ranchu_defconfig | 422 +++++++++++++++++++++++ arch/x86/configs/x86_64_ranchu_defconfig | 417 ++++++++++++++++++++++ 4 files changed, 1465 insertions(+) create mode 100644 arch/arm/configs/ranchu_defconfig create mode 100644 arch/arm64/configs/ranchu_defconfig create mode 100644 arch/x86/configs/i386_ranchu_defconfig create mode 100644 arch/x86/configs/x86_64_ranchu_defconfig diff --git a/arch/arm/configs/ranchu_defconfig b/arch/arm/configs/ranchu_defconfig new file mode 100644 index 000000000000..35a90af941a4 --- /dev/null +++ b/arch/arm/configs/ranchu_defconfig @@ -0,0 +1,315 @@ +# CONFIG_LOCALVERSION_AUTO is not set +CONFIG_AUDIT=y +CONFIG_NO_HZ=y +CONFIG_HIGH_RES_TIMERS=y +CONFIG_TASKSTATS=y +CONFIG_TASK_DELAY_ACCT=y +CONFIG_TASK_XACCT=y +CONFIG_TASK_IO_ACCOUNTING=y +CONFIG_IKCONFIG=y +CONFIG_IKCONFIG_PROC=y +CONFIG_LOG_BUF_SHIFT=14 +CONFIG_CGROUPS=y +CONFIG_CGROUP_DEBUG=y +CONFIG_CGROUP_FREEZER=y +CONFIG_CPUSETS=y +CONFIG_CGROUP_CPUACCT=y +CONFIG_CGROUP_SCHED=y +CONFIG_RT_GROUP_SCHED=y +CONFIG_BLK_DEV_INITRD=y +CONFIG_KALLSYMS_ALL=y +CONFIG_EMBEDDED=y +CONFIG_PROFILING=y +CONFIG_OPROFILE=y +CONFIG_ARCH_MMAP_RND_BITS=16 +# CONFIG_BLK_DEV_BSG is not set +# CONFIG_IOSCHED_DEADLINE is not set +# CONFIG_IOSCHED_CFQ is not set +CONFIG_ARCH_VIRT=y +CONFIG_ARM_KERNMEM_PERMS=y +CONFIG_SMP=y +CONFIG_PREEMPT=y +CONFIG_AEABI=y +CONFIG_HIGHMEM=y +CONFIG_KSM=y +CONFIG_SECCOMP=y +CONFIG_CMDLINE="console=ttyAMA0" +CONFIG_VFP=y +CONFIG_NEON=y +# CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set +CONFIG_PM_AUTOSLEEP=y +CONFIG_PM_WAKELOCKS=y +CONFIG_PM_WAKELOCKS_LIMIT=0 +# CONFIG_PM_WAKELOCKS_GC is not set +CONFIG_PM_DEBUG=y +CONFIG_NET=y +CONFIG_PACKET=y +CONFIG_UNIX=y +CONFIG_XFRM_USER=y +CONFIG_NET_KEY=y +CONFIG_INET=y +CONFIG_IP_MULTICAST=y +CONFIG_IP_ADVANCED_ROUTER=y +CONFIG_IP_MULTIPLE_TABLES=y +CONFIG_IP_PNP=y +CONFIG_IP_PNP_DHCP=y +CONFIG_IP_PNP_BOOTP=y +CONFIG_INET_ESP=y +# CONFIG_INET_LRO is not set +CONFIG_IPV6_ROUTER_PREF=y +CONFIG_IPV6_ROUTE_INFO=y +CONFIG_IPV6_OPTIMISTIC_DAD=y +CONFIG_INET6_AH=y +CONFIG_INET6_ESP=y +CONFIG_INET6_IPCOMP=y +CONFIG_IPV6_MIP6=y +CONFIG_IPV6_MULTIPLE_TABLES=y +CONFIG_NETFILTER=y +CONFIG_NF_CONNTRACK=y +CONFIG_NF_CONNTRACK_SECMARK=y +CONFIG_NF_CONNTRACK_EVENTS=y +CONFIG_NF_CT_PROTO_DCCP=y +CONFIG_NF_CT_PROTO_SCTP=y +CONFIG_NF_CT_PROTO_UDPLITE=y +CONFIG_NF_CONNTRACK_AMANDA=y +CONFIG_NF_CONNTRACK_FTP=y +CONFIG_NF_CONNTRACK_H323=y +CONFIG_NF_CONNTRACK_IRC=y +CONFIG_NF_CONNTRACK_NETBIOS_NS=y +CONFIG_NF_CONNTRACK_PPTP=y +CONFIG_NF_CONNTRACK_SANE=y +CONFIG_NF_CONNTRACK_TFTP=y +CONFIG_NF_CT_NETLINK=y +CONFIG_NETFILTER_XT_TARGET_CLASSIFY=y +CONFIG_NETFILTER_XT_TARGET_CONNMARK=y +CONFIG_NETFILTER_XT_TARGET_CONNSECMARK=y +CONFIG_NETFILTER_XT_TARGET_IDLETIMER=y +CONFIG_NETFILTER_XT_TARGET_MARK=y +CONFIG_NETFILTER_XT_TARGET_NFLOG=y +CONFIG_NETFILTER_XT_TARGET_NFQUEUE=y +CONFIG_NETFILTER_XT_TARGET_TPROXY=y +CONFIG_NETFILTER_XT_TARGET_TRACE=y +CONFIG_NETFILTER_XT_TARGET_SECMARK=y +CONFIG_NETFILTER_XT_TARGET_TCPMSS=y +CONFIG_NETFILTER_XT_MATCH_COMMENT=y +CONFIG_NETFILTER_XT_MATCH_CONNLIMIT=y +CONFIG_NETFILTER_XT_MATCH_CONNMARK=y +CONFIG_NETFILTER_XT_MATCH_CONNTRACK=y +CONFIG_NETFILTER_XT_MATCH_HASHLIMIT=y +CONFIG_NETFILTER_XT_MATCH_HELPER=y +CONFIG_NETFILTER_XT_MATCH_IPRANGE=y +CONFIG_NETFILTER_XT_MATCH_LENGTH=y +CONFIG_NETFILTER_XT_MATCH_LIMIT=y +CONFIG_NETFILTER_XT_MATCH_MAC=y +CONFIG_NETFILTER_XT_MATCH_MARK=y +CONFIG_NETFILTER_XT_MATCH_POLICY=y +CONFIG_NETFILTER_XT_MATCH_PKTTYPE=y +CONFIG_NETFILTER_XT_MATCH_QTAGUID=y +CONFIG_NETFILTER_XT_MATCH_QUOTA=y +CONFIG_NETFILTER_XT_MATCH_QUOTA2=y +CONFIG_NETFILTER_XT_MATCH_SOCKET=y +CONFIG_NETFILTER_XT_MATCH_STATE=y +CONFIG_NETFILTER_XT_MATCH_STATISTIC=y +CONFIG_NETFILTER_XT_MATCH_STRING=y +CONFIG_NETFILTER_XT_MATCH_TIME=y +CONFIG_NETFILTER_XT_MATCH_U32=y +CONFIG_NF_CONNTRACK_IPV4=y +CONFIG_IP_NF_IPTABLES=y +CONFIG_IP_NF_MATCH_AH=y +CONFIG_IP_NF_MATCH_ECN=y +CONFIG_IP_NF_MATCH_TTL=y +CONFIG_IP_NF_FILTER=y +CONFIG_IP_NF_TARGET_REJECT=y +CONFIG_IP_NF_MANGLE=y +CONFIG_IP_NF_RAW=y +CONFIG_IP_NF_SECURITY=y +CONFIG_IP_NF_ARPTABLES=y +CONFIG_IP_NF_ARPFILTER=y +CONFIG_IP_NF_ARP_MANGLE=y +CONFIG_NF_CONNTRACK_IPV6=y +CONFIG_IP6_NF_IPTABLES=y +CONFIG_IP6_NF_FILTER=y +CONFIG_IP6_NF_TARGET_REJECT=y +CONFIG_IP6_NF_MANGLE=y +CONFIG_IP6_NF_RAW=y +CONFIG_BRIDGE=y +CONFIG_NET_SCHED=y +CONFIG_NET_SCH_HTB=y +CONFIG_NET_CLS_U32=y +CONFIG_NET_EMATCH=y +CONFIG_NET_EMATCH_U32=y +CONFIG_NET_CLS_ACT=y +# CONFIG_WIRELESS is not set +CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" +CONFIG_MTD=y +CONFIG_MTD_CMDLINE_PARTS=y +CONFIG_MTD_BLOCK=y +CONFIG_MTD_CFI=y +CONFIG_MTD_CFI_INTELEXT=y +CONFIG_MTD_CFI_AMDSTD=y +CONFIG_BLK_DEV_LOOP=y +CONFIG_BLK_DEV_RAM=y +CONFIG_BLK_DEV_RAM_SIZE=8192 +CONFIG_VIRTIO_BLK=y +CONFIG_MD=y +CONFIG_BLK_DEV_DM=y +CONFIG_DM_CRYPT=y +CONFIG_DM_UEVENT=y +CONFIG_DM_VERITY=y +CONFIG_DM_VERITY_FEC=y +CONFIG_NETDEVICES=y +CONFIG_TUN=y +CONFIG_VIRTIO_NET=y +CONFIG_SMSC911X=y +CONFIG_PPP=y +CONFIG_PPP_BSDCOMP=y +CONFIG_PPP_DEFLATE=y +CONFIG_PPP_MPPE=y +CONFIG_PPPOLAC=y +CONFIG_PPPOPNS=y +CONFIG_USB_USBNET=y +# CONFIG_WLAN is not set +CONFIG_INPUT_EVDEV=y +CONFIG_INPUT_KEYRESET=y +CONFIG_KEYBOARD_GOLDFISH_EVENTS=y +# CONFIG_INPUT_MOUSE is not set +CONFIG_INPUT_JOYSTICK=y +CONFIG_JOYSTICK_XPAD=y +CONFIG_JOYSTICK_XPAD_FF=y +CONFIG_JOYSTICK_XPAD_LEDS=y +CONFIG_INPUT_TABLET=y +CONFIG_TABLET_USB_ACECAD=y +CONFIG_TABLET_USB_AIPTEK=y +CONFIG_TABLET_USB_GTCO=y +CONFIG_TABLET_USB_HANWANG=y +CONFIG_TABLET_USB_KBTAB=y +CONFIG_INPUT_MISC=y +CONFIG_INPUT_KEYCHORD=y +CONFIG_INPUT_UINPUT=y +CONFIG_INPUT_GPIO=y +# CONFIG_SERIO_SERPORT is not set +CONFIG_SERIO_AMBAKMI=y +# CONFIG_VT is not set +# CONFIG_LEGACY_PTYS is not set +# CONFIG_DEVMEM is not set +# CONFIG_DEVKMEM is not set +CONFIG_SERIAL_AMBA_PL011=y +CONFIG_SERIAL_AMBA_PL011_CONSOLE=y +CONFIG_VIRTIO_CONSOLE=y +# CONFIG_HW_RANDOM is not set +# CONFIG_HWMON is not set +CONFIG_MEDIA_SUPPORT=y +CONFIG_FB=y +CONFIG_FB_GOLDFISH=y +CONFIG_FB_SIMPLE=y +CONFIG_BACKLIGHT_LCD_SUPPORT=y +CONFIG_LOGO=y +# CONFIG_LOGO_LINUX_MONO is not set +# CONFIG_LOGO_LINUX_VGA16 is not set +CONFIG_SOUND=y +CONFIG_SND=y +CONFIG_HIDRAW=y +CONFIG_UHID=y +CONFIG_HID_A4TECH=y +CONFIG_HID_ACRUX=y +CONFIG_HID_ACRUX_FF=y +CONFIG_HID_APPLE=y +CONFIG_HID_BELKIN=y +CONFIG_HID_CHERRY=y +CONFIG_HID_CHICONY=y +CONFIG_HID_PRODIKEYS=y +CONFIG_HID_CYPRESS=y +CONFIG_HID_DRAGONRISE=y +CONFIG_DRAGONRISE_FF=y +CONFIG_HID_EMS_FF=y +CONFIG_HID_ELECOM=y +CONFIG_HID_EZKEY=y +CONFIG_HID_HOLTEK=y +CONFIG_HID_KEYTOUCH=y +CONFIG_HID_KYE=y +CONFIG_HID_UCLOGIC=y +CONFIG_HID_WALTOP=y +CONFIG_HID_GYRATION=y +CONFIG_HID_TWINHAN=y +CONFIG_HID_KENSINGTON=y +CONFIG_HID_LCPOWER=y +CONFIG_HID_LOGITECH=y +CONFIG_HID_LOGITECH_DJ=y +CONFIG_LOGITECH_FF=y +CONFIG_LOGIRUMBLEPAD2_FF=y +CONFIG_LOGIG940_FF=y +CONFIG_HID_MAGICMOUSE=y +CONFIG_HID_MICROSOFT=y +CONFIG_HID_MONTEREY=y +CONFIG_HID_MULTITOUCH=y +CONFIG_HID_NTRIG=y +CONFIG_HID_ORTEK=y +CONFIG_HID_PANTHERLORD=y +CONFIG_PANTHERLORD_FF=y +CONFIG_HID_PETALYNX=y +CONFIG_HID_PICOLCD=y +CONFIG_HID_PRIMAX=y +CONFIG_HID_ROCCAT=y +CONFIG_HID_SAITEK=y +CONFIG_HID_SAMSUNG=y +CONFIG_HID_SONY=y +CONFIG_HID_SPEEDLINK=y +CONFIG_HID_SUNPLUS=y +CONFIG_HID_GREENASIA=y +CONFIG_GREENASIA_FF=y +CONFIG_HID_SMARTJOYPLUS=y +CONFIG_SMARTJOYPLUS_FF=y +CONFIG_HID_TIVO=y +CONFIG_HID_TOPSEED=y +CONFIG_HID_THRUSTMASTER=y +CONFIG_HID_WACOM=y +CONFIG_HID_WIIMOTE=y +CONFIG_HID_ZEROPLUS=y +CONFIG_HID_ZYDACRON=y +CONFIG_USB_HIDDEV=y +CONFIG_USB_ANNOUNCE_NEW_DEVICES=y +CONFIG_USB_EHCI_HCD=y +CONFIG_USB_OTG_WAKELOCK=y +CONFIG_RTC_CLASS=y +CONFIG_RTC_DRV_PL031=y +CONFIG_VIRTIO_MMIO=y +CONFIG_STAGING=y +CONFIG_ASHMEM=y +CONFIG_ANDROID_LOW_MEMORY_KILLER=y +CONFIG_SYNC=y +CONFIG_SW_SYNC=y +CONFIG_SW_SYNC_USER=y +CONFIG_ION=y +CONFIG_GOLDFISH_AUDIO=y +CONFIG_GOLDFISH=y +CONFIG_GOLDFISH_PIPE=y +CONFIG_ANDROID=y +CONFIG_ANDROID_BINDER_IPC=y +CONFIG_EXT4_FS=y +CONFIG_EXT4_FS_SECURITY=y +CONFIG_QUOTA=y +CONFIG_FUSE_FS=y +CONFIG_CUSE=y +CONFIG_MSDOS_FS=y +CONFIG_VFAT_FS=y +CONFIG_TMPFS=y +CONFIG_TMPFS_POSIX_ACL=y +CONFIG_PSTORE=y +CONFIG_PSTORE_CONSOLE=y +CONFIG_PSTORE_RAM=y +CONFIG_NFS_FS=y +CONFIG_ROOT_NFS=y +CONFIG_NLS_CODEPAGE_437=y +CONFIG_NLS_ISO8859_1=y +CONFIG_DEBUG_INFO=y +CONFIG_MAGIC_SYSRQ=y +CONFIG_DETECT_HUNG_TASK=y +CONFIG_PANIC_TIMEOUT=5 +# CONFIG_SCHED_DEBUG is not set +CONFIG_SCHEDSTATS=y +CONFIG_TIMER_STATS=y +CONFIG_ENABLE_DEFAULT_TRACERS=y +CONFIG_SECURITY=y +CONFIG_SECURITY_NETWORK=y +CONFIG_SECURITY_SELINUX=y +CONFIG_VIRTUALIZATION=y diff --git a/arch/arm64/configs/ranchu_defconfig b/arch/arm64/configs/ranchu_defconfig new file mode 100644 index 000000000000..00eb346e0928 --- /dev/null +++ b/arch/arm64/configs/ranchu_defconfig @@ -0,0 +1,311 @@ +# CONFIG_LOCALVERSION_AUTO is not set +# CONFIG_SWAP is not set +CONFIG_POSIX_MQUEUE=y +CONFIG_AUDIT=y +CONFIG_NO_HZ=y +CONFIG_HIGH_RES_TIMERS=y +CONFIG_BSD_PROCESS_ACCT=y +CONFIG_BSD_PROCESS_ACCT_V3=y +CONFIG_TASKSTATS=y +CONFIG_TASK_DELAY_ACCT=y +CONFIG_TASK_XACCT=y +CONFIG_TASK_IO_ACCOUNTING=y +CONFIG_IKCONFIG=y +CONFIG_IKCONFIG_PROC=y +CONFIG_LOG_BUF_SHIFT=14 +CONFIG_CGROUP_DEBUG=y +CONFIG_CGROUP_FREEZER=y +CONFIG_CGROUP_CPUACCT=y +CONFIG_RT_GROUP_SCHED=y +CONFIG_SCHED_AUTOGROUP=y +CONFIG_BLK_DEV_INITRD=y +CONFIG_KALLSYMS_ALL=y +CONFIG_EMBEDDED=y +# CONFIG_COMPAT_BRK is not set +CONFIG_PROFILING=y +CONFIG_ARCH_MMAP_RND_BITS=24 +CONFIG_ARCH_MMAP_RND_COMPAT_BITS=16 +# CONFIG_BLK_DEV_BSG is not set +# CONFIG_IOSCHED_DEADLINE is not set +CONFIG_ARCH_VEXPRESS=y +CONFIG_NR_CPUS=4 +CONFIG_PREEMPT=y +CONFIG_KSM=y +CONFIG_SECCOMP=y +CONFIG_ARMV8_DEPRECATED=y +CONFIG_SWP_EMULATION=y +CONFIG_CP15_BARRIER_EMULATION=y +CONFIG_SETEND_EMULATION=y +CONFIG_CMDLINE="console=ttyAMA0" +# CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set +CONFIG_COMPAT=y +CONFIG_PM_AUTOSLEEP=y +CONFIG_PM_WAKELOCKS=y +CONFIG_PM_WAKELOCKS_LIMIT=0 +# CONFIG_PM_WAKELOCKS_GC is not set +CONFIG_PM_DEBUG=y +CONFIG_NET=y +CONFIG_PACKET=y +CONFIG_UNIX=y +CONFIG_XFRM_USER=y +CONFIG_NET_KEY=y +CONFIG_INET=y +CONFIG_IP_MULTICAST=y +CONFIG_IP_ADVANCED_ROUTER=y +CONFIG_IP_MULTIPLE_TABLES=y +CONFIG_IP_PNP=y +CONFIG_IP_PNP_DHCP=y +CONFIG_IP_PNP_BOOTP=y +CONFIG_INET_ESP=y +# CONFIG_INET_LRO is not set +CONFIG_IPV6_ROUTER_PREF=y +CONFIG_IPV6_ROUTE_INFO=y +CONFIG_IPV6_OPTIMISTIC_DAD=y +CONFIG_INET6_AH=y +CONFIG_INET6_ESP=y +CONFIG_INET6_IPCOMP=y +CONFIG_IPV6_MIP6=y +CONFIG_IPV6_MULTIPLE_TABLES=y +CONFIG_NETFILTER=y +CONFIG_NF_CONNTRACK=y +CONFIG_NF_CONNTRACK_SECMARK=y +CONFIG_NF_CONNTRACK_EVENTS=y +CONFIG_NF_CT_PROTO_DCCP=y +CONFIG_NF_CT_PROTO_SCTP=y +CONFIG_NF_CT_PROTO_UDPLITE=y +CONFIG_NF_CONNTRACK_AMANDA=y +CONFIG_NF_CONNTRACK_FTP=y +CONFIG_NF_CONNTRACK_H323=y +CONFIG_NF_CONNTRACK_IRC=y +CONFIG_NF_CONNTRACK_NETBIOS_NS=y +CONFIG_NF_CONNTRACK_PPTP=y +CONFIG_NF_CONNTRACK_SANE=y +CONFIG_NF_CONNTRACK_TFTP=y +CONFIG_NF_CT_NETLINK=y +CONFIG_NETFILTER_XT_TARGET_CLASSIFY=y +CONFIG_NETFILTER_XT_TARGET_CONNMARK=y +CONFIG_NETFILTER_XT_TARGET_CONNSECMARK=y +CONFIG_NETFILTER_XT_TARGET_IDLETIMER=y +CONFIG_NETFILTER_XT_TARGET_MARK=y +CONFIG_NETFILTER_XT_TARGET_NFLOG=y +CONFIG_NETFILTER_XT_TARGET_NFQUEUE=y +CONFIG_NETFILTER_XT_TARGET_TPROXY=y +CONFIG_NETFILTER_XT_TARGET_TRACE=y +CONFIG_NETFILTER_XT_TARGET_SECMARK=y +CONFIG_NETFILTER_XT_TARGET_TCPMSS=y +CONFIG_NETFILTER_XT_MATCH_COMMENT=y +CONFIG_NETFILTER_XT_MATCH_CONNLIMIT=y +CONFIG_NETFILTER_XT_MATCH_CONNMARK=y +CONFIG_NETFILTER_XT_MATCH_CONNTRACK=y +CONFIG_NETFILTER_XT_MATCH_HASHLIMIT=y +CONFIG_NETFILTER_XT_MATCH_HELPER=y +CONFIG_NETFILTER_XT_MATCH_IPRANGE=y +CONFIG_NETFILTER_XT_MATCH_LENGTH=y +CONFIG_NETFILTER_XT_MATCH_LIMIT=y +CONFIG_NETFILTER_XT_MATCH_MAC=y +CONFIG_NETFILTER_XT_MATCH_MARK=y +CONFIG_NETFILTER_XT_MATCH_POLICY=y +CONFIG_NETFILTER_XT_MATCH_PKTTYPE=y +CONFIG_NETFILTER_XT_MATCH_QTAGUID=y +CONFIG_NETFILTER_XT_MATCH_QUOTA=y +CONFIG_NETFILTER_XT_MATCH_QUOTA2=y +CONFIG_NETFILTER_XT_MATCH_SOCKET=y +CONFIG_NETFILTER_XT_MATCH_STATE=y +CONFIG_NETFILTER_XT_MATCH_STATISTIC=y +CONFIG_NETFILTER_XT_MATCH_STRING=y +CONFIG_NETFILTER_XT_MATCH_TIME=y +CONFIG_NETFILTER_XT_MATCH_U32=y +CONFIG_NF_CONNTRACK_IPV4=y +CONFIG_IP_NF_IPTABLES=y +CONFIG_IP_NF_MATCH_AH=y +CONFIG_IP_NF_MATCH_ECN=y +CONFIG_IP_NF_MATCH_RPFILTER=y +CONFIG_IP_NF_MATCH_TTL=y +CONFIG_IP_NF_FILTER=y +CONFIG_IP_NF_TARGET_REJECT=y +CONFIG_IP_NF_MANGLE=y +CONFIG_IP_NF_TARGET_ECN=y +CONFIG_IP_NF_TARGET_TTL=y +CONFIG_IP_NF_RAW=y +CONFIG_IP_NF_SECURITY=y +CONFIG_IP_NF_ARPTABLES=y +CONFIG_IP_NF_ARPFILTER=y +CONFIG_IP_NF_ARP_MANGLE=y +CONFIG_NF_CONNTRACK_IPV6=y +CONFIG_IP6_NF_IPTABLES=y +CONFIG_IP6_NF_MATCH_AH=y +CONFIG_IP6_NF_MATCH_EUI64=y +CONFIG_IP6_NF_MATCH_FRAG=y +CONFIG_IP6_NF_MATCH_OPTS=y +CONFIG_IP6_NF_MATCH_HL=y +CONFIG_IP6_NF_MATCH_IPV6HEADER=y +CONFIG_IP6_NF_MATCH_MH=y +CONFIG_IP6_NF_MATCH_RT=y +CONFIG_IP6_NF_TARGET_HL=y +CONFIG_IP6_NF_FILTER=y +CONFIG_IP6_NF_TARGET_REJECT=y +CONFIG_IP6_NF_MANGLE=y +CONFIG_IP6_NF_RAW=y +CONFIG_BRIDGE=y +CONFIG_NET_SCHED=y +CONFIG_NET_SCH_HTB=y +CONFIG_NET_CLS_U32=y +CONFIG_NET_EMATCH=y +CONFIG_NET_EMATCH_U32=y +CONFIG_NET_CLS_ACT=y +# CONFIG_WIRELESS is not set +CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" +CONFIG_BLK_DEV_LOOP=y +CONFIG_BLK_DEV_RAM=y +CONFIG_BLK_DEV_RAM_SIZE=8192 +CONFIG_VIRTIO_BLK=y +CONFIG_SCSI=y +# CONFIG_SCSI_PROC_FS is not set +CONFIG_BLK_DEV_SD=y +# CONFIG_SCSI_LOWLEVEL is not set +CONFIG_MD=y +CONFIG_BLK_DEV_DM=y +CONFIG_DM_CRYPT=y +CONFIG_DM_UEVENT=y +CONFIG_DM_VERITY=y +CONFIG_DM_VERITY_FEC=y +CONFIG_NETDEVICES=y +CONFIG_TUN=y +CONFIG_VIRTIO_NET=y +CONFIG_SMC91X=y +CONFIG_PPP=y +CONFIG_PPP_BSDCOMP=y +CONFIG_PPP_DEFLATE=y +CONFIG_PPP_MPPE=y +CONFIG_PPPOLAC=y +CONFIG_PPPOPNS=y +# CONFIG_WLAN is not set +CONFIG_INPUT_EVDEV=y +CONFIG_INPUT_KEYRESET=y +CONFIG_KEYBOARD_GOLDFISH_EVENTS=y +# CONFIG_INPUT_MOUSE is not set +CONFIG_INPUT_JOYSTICK=y +CONFIG_INPUT_TABLET=y +CONFIG_INPUT_MISC=y +CONFIG_INPUT_KEYCHORD=y +CONFIG_INPUT_UINPUT=y +CONFIG_INPUT_GPIO=y +# CONFIG_SERIO_SERPORT is not set +# CONFIG_VT is not set +# CONFIG_LEGACY_PTYS is not set +# CONFIG_DEVMEM is not set +# CONFIG_DEVKMEM is not set +CONFIG_SERIAL_AMBA_PL011=y +CONFIG_SERIAL_AMBA_PL011_CONSOLE=y +CONFIG_VIRTIO_CONSOLE=y +# CONFIG_HW_RANDOM is not set +CONFIG_BATTERY_GOLDFISH=y +# CONFIG_HWMON is not set +CONFIG_MEDIA_SUPPORT=y +CONFIG_FB=y +CONFIG_FB_GOLDFISH=y +CONFIG_FB_SIMPLE=y +CONFIG_BACKLIGHT_LCD_SUPPORT=y +CONFIG_LOGO=y +# CONFIG_LOGO_LINUX_MONO is not set +# CONFIG_LOGO_LINUX_VGA16 is not set +CONFIG_SOUND=y +CONFIG_SND=y +CONFIG_HIDRAW=y +CONFIG_UHID=y +CONFIG_HID_A4TECH=y +CONFIG_HID_ACRUX=y +CONFIG_HID_ACRUX_FF=y +CONFIG_HID_APPLE=y +CONFIG_HID_BELKIN=y +CONFIG_HID_CHERRY=y +CONFIG_HID_CHICONY=y +CONFIG_HID_PRODIKEYS=y +CONFIG_HID_CYPRESS=y +CONFIG_HID_DRAGONRISE=y +CONFIG_DRAGONRISE_FF=y +CONFIG_HID_EMS_FF=y +CONFIG_HID_ELECOM=y +CONFIG_HID_EZKEY=y +CONFIG_HID_KEYTOUCH=y +CONFIG_HID_KYE=y +CONFIG_HID_WALTOP=y +CONFIG_HID_GYRATION=y +CONFIG_HID_TWINHAN=y +CONFIG_HID_KENSINGTON=y +CONFIG_HID_LCPOWER=y +CONFIG_HID_LOGITECH=y +CONFIG_HID_LOGITECH_DJ=y +CONFIG_LOGITECH_FF=y +CONFIG_LOGIRUMBLEPAD2_FF=y +CONFIG_LOGIG940_FF=y +CONFIG_HID_MAGICMOUSE=y +CONFIG_HID_MICROSOFT=y +CONFIG_HID_MONTEREY=y +CONFIG_HID_MULTITOUCH=y +CONFIG_HID_ORTEK=y +CONFIG_HID_PANTHERLORD=y +CONFIG_PANTHERLORD_FF=y +CONFIG_HID_PETALYNX=y +CONFIG_HID_PICOLCD=y +CONFIG_HID_PRIMAX=y +CONFIG_HID_SAITEK=y +CONFIG_HID_SAMSUNG=y +CONFIG_HID_SPEEDLINK=y +CONFIG_HID_SUNPLUS=y +CONFIG_HID_GREENASIA=y +CONFIG_GREENASIA_FF=y +CONFIG_HID_SMARTJOYPLUS=y +CONFIG_SMARTJOYPLUS_FF=y +CONFIG_HID_TIVO=y +CONFIG_HID_TOPSEED=y +CONFIG_HID_THRUSTMASTER=y +CONFIG_HID_WACOM=y +CONFIG_HID_WIIMOTE=y +CONFIG_HID_ZEROPLUS=y +CONFIG_HID_ZYDACRON=y +# CONFIG_USB_SUPPORT is not set +CONFIG_RTC_CLASS=y +CONFIG_VIRTIO_MMIO=y +CONFIG_STAGING=y +CONFIG_ASHMEM=y +CONFIG_ANDROID_TIMED_GPIO=y +CONFIG_ANDROID_LOW_MEMORY_KILLER=y +CONFIG_SYNC=y +CONFIG_SW_SYNC=y +CONFIG_SW_SYNC_USER=y +CONFIG_ION=y +CONFIG_GOLDFISH_AUDIO=y +CONFIG_GOLDFISH=y +CONFIG_GOLDFISH_PIPE=y +# CONFIG_IOMMU_SUPPORT is not set +CONFIG_ANDROID=y +CONFIG_ANDROID_BINDER_IPC=y +CONFIG_EXT2_FS=y +CONFIG_EXT4_FS=y +CONFIG_EXT4_FS_SECURITY=y +CONFIG_QUOTA=y +CONFIG_FUSE_FS=y +CONFIG_CUSE=y +CONFIG_MSDOS_FS=y +CONFIG_VFAT_FS=y +CONFIG_TMPFS=y +CONFIG_TMPFS_POSIX_ACL=y +# CONFIG_MISC_FILESYSTEMS is not set +CONFIG_NFS_FS=y +CONFIG_ROOT_NFS=y +CONFIG_NLS_CODEPAGE_437=y +CONFIG_NLS_ISO8859_1=y +CONFIG_DEBUG_INFO=y +CONFIG_DEBUG_FS=y +CONFIG_MAGIC_SYSRQ=y +CONFIG_PANIC_TIMEOUT=5 +# CONFIG_SCHED_DEBUG is not set +CONFIG_SCHEDSTATS=y +CONFIG_TIMER_STATS=y +# CONFIG_FTRACE is not set +CONFIG_ATOMIC64_SELFTEST=y +CONFIG_DEBUG_RODATA=y +CONFIG_SECURITY=y +CONFIG_SECURITY_NETWORK=y +CONFIG_SECURITY_SELINUX=y diff --git a/arch/x86/configs/i386_ranchu_defconfig b/arch/x86/configs/i386_ranchu_defconfig new file mode 100644 index 000000000000..b0e4e0ed4b11 --- /dev/null +++ b/arch/x86/configs/i386_ranchu_defconfig @@ -0,0 +1,422 @@ +# CONFIG_64BIT is not set +# CONFIG_LOCALVERSION_AUTO is not set +CONFIG_POSIX_MQUEUE=y +CONFIG_AUDIT=y +CONFIG_NO_HZ=y +CONFIG_HIGH_RES_TIMERS=y +CONFIG_BSD_PROCESS_ACCT=y +CONFIG_TASKSTATS=y +CONFIG_TASK_DELAY_ACCT=y +CONFIG_TASK_XACCT=y +CONFIG_TASK_IO_ACCOUNTING=y +CONFIG_CGROUPS=y +CONFIG_CGROUP_DEBUG=y +CONFIG_CGROUP_FREEZER=y +CONFIG_CGROUP_CPUACCT=y +CONFIG_CGROUP_SCHED=y +CONFIG_RT_GROUP_SCHED=y +CONFIG_BLK_DEV_INITRD=y +CONFIG_CC_OPTIMIZE_FOR_SIZE=y +CONFIG_SYSCTL_SYSCALL=y +CONFIG_KALLSYMS_ALL=y +CONFIG_EMBEDDED=y +# CONFIG_COMPAT_BRK is not set +CONFIG_ARCH_MMAP_RND_BITS=16 +CONFIG_PARTITION_ADVANCED=y +CONFIG_OSF_PARTITION=y +CONFIG_AMIGA_PARTITION=y +CONFIG_MAC_PARTITION=y +CONFIG_BSD_DISKLABEL=y +CONFIG_MINIX_SUBPARTITION=y +CONFIG_SOLARIS_X86_PARTITION=y +CONFIG_UNIXWARE_DISKLABEL=y +CONFIG_SGI_PARTITION=y +CONFIG_SUN_PARTITION=y +CONFIG_KARMA_PARTITION=y +CONFIG_SMP=y +CONFIG_X86_BIGSMP=y +CONFIG_MCORE2=y +CONFIG_X86_GENERIC=y +CONFIG_HPET_TIMER=y +CONFIG_NR_CPUS=512 +CONFIG_PREEMPT=y +# CONFIG_X86_MCE is not set +CONFIG_X86_REBOOTFIXUPS=y +CONFIG_X86_MSR=y +CONFIG_X86_CPUID=y +CONFIG_KSM=y +CONFIG_CMA=y +# CONFIG_MTRR_SANITIZER is not set +CONFIG_EFI=y +CONFIG_EFI_STUB=y +CONFIG_HZ_100=y +CONFIG_PHYSICAL_START=0x100000 +CONFIG_PM_AUTOSLEEP=y +CONFIG_PM_WAKELOCKS=y +CONFIG_PM_WAKELOCKS_LIMIT=0 +# CONFIG_PM_WAKELOCKS_GC is not set +CONFIG_PM_DEBUG=y +CONFIG_CPU_FREQ=y +# CONFIG_CPU_FREQ_STAT is not set +CONFIG_CPU_FREQ_DEFAULT_GOV_ONDEMAND=y +CONFIG_CPU_FREQ_GOV_USERSPACE=y +CONFIG_PCIEPORTBUS=y +# CONFIG_PCIEASPM is not set +CONFIG_PCCARD=y +CONFIG_YENTA=y +CONFIG_HOTPLUG_PCI=y +# CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set +CONFIG_BINFMT_MISC=y +CONFIG_NET=y +CONFIG_PACKET=y +CONFIG_UNIX=y +CONFIG_XFRM_USER=y +CONFIG_NET_KEY=y +CONFIG_INET=y +CONFIG_IP_MULTICAST=y +CONFIG_IP_ADVANCED_ROUTER=y +CONFIG_IP_MULTIPLE_TABLES=y +CONFIG_IP_ROUTE_MULTIPATH=y +CONFIG_IP_ROUTE_VERBOSE=y +CONFIG_IP_PNP=y +CONFIG_IP_PNP_DHCP=y +CONFIG_IP_PNP_BOOTP=y +CONFIG_IP_PNP_RARP=y +CONFIG_IP_MROUTE=y +CONFIG_IP_PIMSM_V1=y +CONFIG_IP_PIMSM_V2=y +CONFIG_SYN_COOKIES=y +CONFIG_INET_ESP=y +# CONFIG_INET_XFRM_MODE_BEET is not set +# CONFIG_INET_LRO is not set +# CONFIG_INET_DIAG is not set +CONFIG_IPV6_ROUTER_PREF=y +CONFIG_IPV6_ROUTE_INFO=y +CONFIG_IPV6_OPTIMISTIC_DAD=y +CONFIG_INET6_AH=y +CONFIG_INET6_ESP=y +CONFIG_INET6_IPCOMP=y +CONFIG_IPV6_MIP6=y +CONFIG_IPV6_MULTIPLE_TABLES=y +CONFIG_NETLABEL=y +CONFIG_NETFILTER=y +CONFIG_NF_CONNTRACK=y +CONFIG_NF_CONNTRACK_SECMARK=y +CONFIG_NF_CONNTRACK_EVENTS=y +CONFIG_NF_CT_PROTO_DCCP=y +CONFIG_NF_CT_PROTO_SCTP=y +CONFIG_NF_CT_PROTO_UDPLITE=y +CONFIG_NF_CONNTRACK_AMANDA=y +CONFIG_NF_CONNTRACK_FTP=y +CONFIG_NF_CONNTRACK_H323=y +CONFIG_NF_CONNTRACK_IRC=y +CONFIG_NF_CONNTRACK_NETBIOS_NS=y +CONFIG_NF_CONNTRACK_PPTP=y +CONFIG_NF_CONNTRACK_SANE=y +CONFIG_NF_CONNTRACK_TFTP=y +CONFIG_NF_CT_NETLINK=y +CONFIG_NETFILTER_XT_TARGET_CLASSIFY=y +CONFIG_NETFILTER_XT_TARGET_CONNMARK=y +CONFIG_NETFILTER_XT_TARGET_CONNSECMARK=y +CONFIG_NETFILTER_XT_TARGET_IDLETIMER=y +CONFIG_NETFILTER_XT_TARGET_MARK=y +CONFIG_NETFILTER_XT_TARGET_NFLOG=y +CONFIG_NETFILTER_XT_TARGET_NFQUEUE=y +CONFIG_NETFILTER_XT_TARGET_TPROXY=y +CONFIG_NETFILTER_XT_TARGET_TRACE=y +CONFIG_NETFILTER_XT_TARGET_SECMARK=y +CONFIG_NETFILTER_XT_TARGET_TCPMSS=y +CONFIG_NETFILTER_XT_MATCH_COMMENT=y +CONFIG_NETFILTER_XT_MATCH_CONNLIMIT=y +CONFIG_NETFILTER_XT_MATCH_CONNMARK=y +CONFIG_NETFILTER_XT_MATCH_CONNTRACK=y +CONFIG_NETFILTER_XT_MATCH_HASHLIMIT=y +CONFIG_NETFILTER_XT_MATCH_HELPER=y +CONFIG_NETFILTER_XT_MATCH_IPRANGE=y +CONFIG_NETFILTER_XT_MATCH_LENGTH=y +CONFIG_NETFILTER_XT_MATCH_LIMIT=y +CONFIG_NETFILTER_XT_MATCH_MAC=y +CONFIG_NETFILTER_XT_MATCH_MARK=y +CONFIG_NETFILTER_XT_MATCH_POLICY=y +CONFIG_NETFILTER_XT_MATCH_PKTTYPE=y +CONFIG_NETFILTER_XT_MATCH_QTAGUID=y +CONFIG_NETFILTER_XT_MATCH_QUOTA=y +CONFIG_NETFILTER_XT_MATCH_QUOTA2=y +CONFIG_NETFILTER_XT_MATCH_SOCKET=y +CONFIG_NETFILTER_XT_MATCH_STATE=y +CONFIG_NETFILTER_XT_MATCH_STATISTIC=y +CONFIG_NETFILTER_XT_MATCH_STRING=y +CONFIG_NETFILTER_XT_MATCH_TIME=y +CONFIG_NETFILTER_XT_MATCH_U32=y +CONFIG_NF_CONNTRACK_IPV4=y +CONFIG_IP_NF_IPTABLES=y +CONFIG_IP_NF_MATCH_AH=y +CONFIG_IP_NF_MATCH_ECN=y +CONFIG_IP_NF_MATCH_TTL=y +CONFIG_IP_NF_FILTER=y +CONFIG_IP_NF_TARGET_REJECT=y +CONFIG_IP_NF_MANGLE=y +CONFIG_IP_NF_RAW=y +CONFIG_IP_NF_SECURITY=y +CONFIG_IP_NF_ARPTABLES=y +CONFIG_IP_NF_ARPFILTER=y +CONFIG_IP_NF_ARP_MANGLE=y +CONFIG_NF_CONNTRACK_IPV6=y +CONFIG_IP6_NF_IPTABLES=y +CONFIG_IP6_NF_FILTER=y +CONFIG_IP6_NF_TARGET_REJECT=y +CONFIG_IP6_NF_MANGLE=y +CONFIG_IP6_NF_RAW=y +CONFIG_NET_SCHED=y +CONFIG_NET_SCH_HTB=y +CONFIG_NET_CLS_U32=y +CONFIG_NET_EMATCH=y +CONFIG_NET_EMATCH_U32=y +CONFIG_NET_CLS_ACT=y +CONFIG_CFG80211=y +CONFIG_MAC80211=y +CONFIG_MAC80211_LEDS=y +CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" +CONFIG_DMA_CMA=y +CONFIG_CMA_SIZE_MBYTES=16 +CONFIG_CONNECTOR=y +CONFIG_BLK_DEV_LOOP=y +CONFIG_BLK_DEV_RAM=y +CONFIG_BLK_DEV_RAM_SIZE=8192 +CONFIG_VIRTIO_BLK=y +CONFIG_BLK_DEV_SD=y +CONFIG_BLK_DEV_SR=y +CONFIG_BLK_DEV_SR_VENDOR=y +CONFIG_CHR_DEV_SG=y +CONFIG_SCSI_CONSTANTS=y +CONFIG_SCSI_SPI_ATTRS=y +CONFIG_SCSI_ISCSI_ATTRS=y +# CONFIG_SCSI_LOWLEVEL is not set +CONFIG_ATA=y +CONFIG_SATA_AHCI=y +CONFIG_ATA_PIIX=y +CONFIG_PATA_AMD=y +CONFIG_PATA_OLDPIIX=y +CONFIG_PATA_SCH=y +CONFIG_PATA_MPIIX=y +CONFIG_ATA_GENERIC=y +CONFIG_MD=y +CONFIG_BLK_DEV_MD=y +CONFIG_BLK_DEV_DM=y +CONFIG_DM_DEBUG=y +CONFIG_DM_CRYPT=y +CONFIG_DM_MIRROR=y +CONFIG_DM_ZERO=y +CONFIG_DM_UEVENT=y +CONFIG_DM_VERITY=y +CONFIG_DM_VERITY_FEC=y +CONFIG_NETDEVICES=y +CONFIG_NETCONSOLE=y +CONFIG_TUN=y +CONFIG_VIRTIO_NET=y +CONFIG_BNX2=y +CONFIG_TIGON3=y +CONFIG_NET_TULIP=y +CONFIG_E100=y +CONFIG_E1000=y +CONFIG_E1000E=y +CONFIG_SKY2=y +CONFIG_NE2K_PCI=y +CONFIG_FORCEDETH=y +CONFIG_8139TOO=y +# CONFIG_8139TOO_PIO is not set +CONFIG_R8169=y +CONFIG_FDDI=y +CONFIG_PPP=y +CONFIG_PPP_BSDCOMP=y +CONFIG_PPP_DEFLATE=y +CONFIG_PPP_MPPE=y +CONFIG_PPPOLAC=y +CONFIG_PPPOPNS=y +CONFIG_USB_USBNET=y +CONFIG_INPUT_POLLDEV=y +# CONFIG_INPUT_MOUSEDEV_PSAUX is not set +CONFIG_INPUT_EVDEV=y +CONFIG_INPUT_KEYRESET=y +# CONFIG_KEYBOARD_ATKBD is not set +CONFIG_KEYBOARD_GOLDFISH_EVENTS=y +# CONFIG_INPUT_MOUSE is not set +CONFIG_INPUT_JOYSTICK=y +CONFIG_JOYSTICK_XPAD=y +CONFIG_JOYSTICK_XPAD_FF=y +CONFIG_JOYSTICK_XPAD_LEDS=y +CONFIG_INPUT_TABLET=y +CONFIG_TABLET_USB_ACECAD=y +CONFIG_TABLET_USB_AIPTEK=y +CONFIG_TABLET_USB_GTCO=y +CONFIG_TABLET_USB_HANWANG=y +CONFIG_TABLET_USB_KBTAB=y +CONFIG_INPUT_TOUCHSCREEN=y +CONFIG_INPUT_MISC=y +CONFIG_INPUT_KEYCHORD=y +CONFIG_INPUT_UINPUT=y +CONFIG_INPUT_GPIO=y +# CONFIG_SERIO is not set +# CONFIG_VT is not set +# CONFIG_LEGACY_PTYS is not set +CONFIG_SERIAL_NONSTANDARD=y +# CONFIG_DEVMEM is not set +# CONFIG_DEVKMEM is not set +CONFIG_SERIAL_8250=y +CONFIG_SERIAL_8250_CONSOLE=y +CONFIG_VIRTIO_CONSOLE=y +CONFIG_NVRAM=y +CONFIG_I2C_I801=y +CONFIG_BATTERY_GOLDFISH=y +CONFIG_WATCHDOG=y +CONFIG_MEDIA_SUPPORT=y +CONFIG_AGP=y +CONFIG_AGP_AMD64=y +CONFIG_AGP_INTEL=y +CONFIG_DRM=y +CONFIG_FB_MODE_HELPERS=y +CONFIG_FB_TILEBLITTING=y +CONFIG_FB_EFI=y +CONFIG_FB_GOLDFISH=y +CONFIG_BACKLIGHT_LCD_SUPPORT=y +# CONFIG_LCD_CLASS_DEVICE is not set +CONFIG_SOUND=y +CONFIG_SND=y +CONFIG_HIDRAW=y +CONFIG_UHID=y +CONFIG_HID_A4TECH=y +CONFIG_HID_ACRUX=y +CONFIG_HID_ACRUX_FF=y +CONFIG_HID_APPLE=y +CONFIG_HID_BELKIN=y +CONFIG_HID_CHERRY=y +CONFIG_HID_CHICONY=y +CONFIG_HID_PRODIKEYS=y +CONFIG_HID_CYPRESS=y +CONFIG_HID_DRAGONRISE=y +CONFIG_DRAGONRISE_FF=y +CONFIG_HID_EMS_FF=y +CONFIG_HID_ELECOM=y +CONFIG_HID_EZKEY=y +CONFIG_HID_HOLTEK=y +CONFIG_HID_KEYTOUCH=y +CONFIG_HID_KYE=y +CONFIG_HID_UCLOGIC=y +CONFIG_HID_WALTOP=y +CONFIG_HID_GYRATION=y +CONFIG_HID_TWINHAN=y +CONFIG_HID_KENSINGTON=y +CONFIG_HID_LCPOWER=y +CONFIG_HID_LOGITECH=y +CONFIG_HID_LOGITECH_DJ=y +CONFIG_LOGITECH_FF=y +CONFIG_LOGIRUMBLEPAD2_FF=y +CONFIG_LOGIG940_FF=y +CONFIG_HID_MAGICMOUSE=y +CONFIG_HID_MICROSOFT=y +CONFIG_HID_MONTEREY=y +CONFIG_HID_MULTITOUCH=y +CONFIG_HID_NTRIG=y +CONFIG_HID_ORTEK=y +CONFIG_HID_PANTHERLORD=y +CONFIG_PANTHERLORD_FF=y +CONFIG_HID_PETALYNX=y +CONFIG_HID_PICOLCD=y +CONFIG_HID_PRIMAX=y +CONFIG_HID_ROCCAT=y +CONFIG_HID_SAITEK=y +CONFIG_HID_SAMSUNG=y +CONFIG_HID_SONY=y +CONFIG_HID_SPEEDLINK=y +CONFIG_HID_SUNPLUS=y +CONFIG_HID_GREENASIA=y +CONFIG_GREENASIA_FF=y +CONFIG_HID_SMARTJOYPLUS=y +CONFIG_SMARTJOYPLUS_FF=y +CONFIG_HID_TIVO=y +CONFIG_HID_TOPSEED=y +CONFIG_HID_THRUSTMASTER=y +CONFIG_HID_WACOM=y +CONFIG_HID_WIIMOTE=y +CONFIG_HID_ZEROPLUS=y +CONFIG_HID_ZYDACRON=y +CONFIG_HID_PID=y +CONFIG_USB_HIDDEV=y +CONFIG_USB_ANNOUNCE_NEW_DEVICES=y +CONFIG_USB_MON=y +CONFIG_USB_EHCI_HCD=y +# CONFIG_USB_EHCI_TT_NEWSCHED is not set +CONFIG_USB_OHCI_HCD=y +CONFIG_USB_UHCI_HCD=y +CONFIG_USB_PRINTER=y +CONFIG_USB_STORAGE=y +CONFIG_USB_OTG_WAKELOCK=y +CONFIG_EDAC=y +CONFIG_RTC_CLASS=y +# CONFIG_RTC_HCTOSYS is not set +CONFIG_DMADEVICES=y +CONFIG_VIRTIO_PCI=y +CONFIG_STAGING=y +CONFIG_ASHMEM=y +CONFIG_ANDROID_LOW_MEMORY_KILLER=y +CONFIG_SYNC=y +CONFIG_SW_SYNC=y +CONFIG_ION=y +CONFIG_GOLDFISH_AUDIO=y +CONFIG_SND_HDA_INTEL=y +CONFIG_GOLDFISH=y +CONFIG_GOLDFISH_PIPE=y +CONFIG_ANDROID=y +CONFIG_ANDROID_BINDER_IPC=y +CONFIG_ISCSI_IBFT_FIND=y +CONFIG_EXT4_FS=y +CONFIG_EXT4_FS_SECURITY=y +CONFIG_QUOTA=y +CONFIG_QUOTA_NETLINK_INTERFACE=y +# CONFIG_PRINT_QUOTA_WARNING is not set +CONFIG_FUSE_FS=y +CONFIG_ISO9660_FS=y +CONFIG_JOLIET=y +CONFIG_ZISOFS=y +CONFIG_MSDOS_FS=y +CONFIG_VFAT_FS=y +CONFIG_PROC_KCORE=y +CONFIG_TMPFS=y +CONFIG_TMPFS_POSIX_ACL=y +CONFIG_HUGETLBFS=y +CONFIG_PSTORE=y +CONFIG_PSTORE_CONSOLE=y +CONFIG_PSTORE_RAM=y +# CONFIG_NETWORK_FILESYSTEMS is not set +CONFIG_NLS_DEFAULT="utf8" +CONFIG_NLS_CODEPAGE_437=y +CONFIG_NLS_ASCII=y +CONFIG_NLS_ISO8859_1=y +CONFIG_NLS_UTF8=y +CONFIG_PRINTK_TIME=y +CONFIG_DEBUG_INFO=y +# CONFIG_ENABLE_WARN_DEPRECATED is not set +# CONFIG_ENABLE_MUST_CHECK is not set +CONFIG_FRAME_WARN=2048 +# CONFIG_UNUSED_SYMBOLS is not set +CONFIG_MAGIC_SYSRQ=y +CONFIG_DEBUG_MEMORY_INIT=y +CONFIG_PANIC_TIMEOUT=5 +CONFIG_SCHEDSTATS=y +CONFIG_TIMER_STATS=y +CONFIG_SCHED_TRACER=y +CONFIG_BLK_DEV_IO_TRACE=y +CONFIG_PROVIDE_OHCI1394_DMA_INIT=y +CONFIG_KEYS=y +CONFIG_SECURITY=y +CONFIG_SECURITY_NETWORK=y +CONFIG_SECURITY_SELINUX=y +CONFIG_CRYPTO_AES_586=y +CONFIG_CRYPTO_TWOFISH=y +CONFIG_ASYMMETRIC_KEY_TYPE=y +CONFIG_ASYMMETRIC_PUBLIC_KEY_SUBTYPE=y +CONFIG_X509_CERTIFICATE_PARSER=y +CONFIG_PKCS7_MESSAGE_PARSER=y +CONFIG_PKCS7_TEST_KEY=y +# CONFIG_VIRTUALIZATION is not set +CONFIG_CRC_T10DIF=y diff --git a/arch/x86/configs/x86_64_ranchu_defconfig b/arch/x86/configs/x86_64_ranchu_defconfig new file mode 100644 index 000000000000..8dae21ed3ede --- /dev/null +++ b/arch/x86/configs/x86_64_ranchu_defconfig @@ -0,0 +1,417 @@ +# CONFIG_LOCALVERSION_AUTO is not set +CONFIG_POSIX_MQUEUE=y +CONFIG_AUDIT=y +CONFIG_NO_HZ=y +CONFIG_HIGH_RES_TIMERS=y +CONFIG_BSD_PROCESS_ACCT=y +CONFIG_TASKSTATS=y +CONFIG_TASK_DELAY_ACCT=y +CONFIG_TASK_XACCT=y +CONFIG_TASK_IO_ACCOUNTING=y +CONFIG_CGROUPS=y +CONFIG_CGROUP_DEBUG=y +CONFIG_CGROUP_FREEZER=y +CONFIG_CGROUP_CPUACCT=y +CONFIG_CGROUP_SCHED=y +CONFIG_RT_GROUP_SCHED=y +CONFIG_BLK_DEV_INITRD=y +CONFIG_CC_OPTIMIZE_FOR_SIZE=y +CONFIG_SYSCTL_SYSCALL=y +CONFIG_KALLSYMS_ALL=y +CONFIG_EMBEDDED=y +# CONFIG_COMPAT_BRK is not set +CONFIG_ARCH_MMAP_RND_BITS=32 +CONFIG_ARCH_MMAP_RND_COMPAT_BITS=16 +CONFIG_PARTITION_ADVANCED=y +CONFIG_OSF_PARTITION=y +CONFIG_AMIGA_PARTITION=y +CONFIG_MAC_PARTITION=y +CONFIG_BSD_DISKLABEL=y +CONFIG_MINIX_SUBPARTITION=y +CONFIG_SOLARIS_X86_PARTITION=y +CONFIG_UNIXWARE_DISKLABEL=y +CONFIG_SGI_PARTITION=y +CONFIG_SUN_PARTITION=y +CONFIG_KARMA_PARTITION=y +CONFIG_SMP=y +CONFIG_MCORE2=y +CONFIG_MAXSMP=y +CONFIG_PREEMPT=y +# CONFIG_X86_MCE is not set +CONFIG_X86_MSR=y +CONFIG_X86_CPUID=y +CONFIG_KSM=y +CONFIG_CMA=y +# CONFIG_MTRR_SANITIZER is not set +CONFIG_EFI=y +CONFIG_EFI_STUB=y +CONFIG_HZ_100=y +CONFIG_PHYSICAL_START=0x100000 +CONFIG_PM_AUTOSLEEP=y +CONFIG_PM_WAKELOCKS=y +CONFIG_PM_WAKELOCKS_LIMIT=0 +# CONFIG_PM_WAKELOCKS_GC is not set +CONFIG_PM_DEBUG=y +CONFIG_CPU_FREQ=y +# CONFIG_CPU_FREQ_STAT is not set +CONFIG_CPU_FREQ_DEFAULT_GOV_ONDEMAND=y +CONFIG_CPU_FREQ_GOV_USERSPACE=y +CONFIG_PCI_MMCONFIG=y +CONFIG_PCIEPORTBUS=y +# CONFIG_PCIEASPM is not set +CONFIG_PCCARD=y +CONFIG_YENTA=y +CONFIG_HOTPLUG_PCI=y +# CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set +CONFIG_BINFMT_MISC=y +CONFIG_IA32_EMULATION=y +CONFIG_NET=y +CONFIG_PACKET=y +CONFIG_UNIX=y +CONFIG_XFRM_USER=y +CONFIG_NET_KEY=y +CONFIG_INET=y +CONFIG_IP_MULTICAST=y +CONFIG_IP_ADVANCED_ROUTER=y +CONFIG_IP_MULTIPLE_TABLES=y +CONFIG_IP_ROUTE_MULTIPATH=y +CONFIG_IP_ROUTE_VERBOSE=y +CONFIG_IP_PNP=y +CONFIG_IP_PNP_DHCP=y +CONFIG_IP_PNP_BOOTP=y +CONFIG_IP_PNP_RARP=y +CONFIG_IP_MROUTE=y +CONFIG_IP_PIMSM_V1=y +CONFIG_IP_PIMSM_V2=y +CONFIG_SYN_COOKIES=y +CONFIG_INET_ESP=y +# CONFIG_INET_XFRM_MODE_BEET is not set +# CONFIG_INET_LRO is not set +# CONFIG_INET_DIAG is not set +CONFIG_IPV6_ROUTER_PREF=y +CONFIG_IPV6_ROUTE_INFO=y +CONFIG_IPV6_OPTIMISTIC_DAD=y +CONFIG_INET6_AH=y +CONFIG_INET6_ESP=y +CONFIG_INET6_IPCOMP=y +CONFIG_IPV6_MIP6=y +CONFIG_IPV6_MULTIPLE_TABLES=y +CONFIG_NETLABEL=y +CONFIG_NETFILTER=y +CONFIG_NF_CONNTRACK=y +CONFIG_NF_CONNTRACK_SECMARK=y +CONFIG_NF_CONNTRACK_EVENTS=y +CONFIG_NF_CT_PROTO_DCCP=y +CONFIG_NF_CT_PROTO_SCTP=y +CONFIG_NF_CT_PROTO_UDPLITE=y +CONFIG_NF_CONNTRACK_AMANDA=y +CONFIG_NF_CONNTRACK_FTP=y +CONFIG_NF_CONNTRACK_H323=y +CONFIG_NF_CONNTRACK_IRC=y +CONFIG_NF_CONNTRACK_NETBIOS_NS=y +CONFIG_NF_CONNTRACK_PPTP=y +CONFIG_NF_CONNTRACK_SANE=y +CONFIG_NF_CONNTRACK_TFTP=y +CONFIG_NF_CT_NETLINK=y +CONFIG_NETFILTER_XT_TARGET_CLASSIFY=y +CONFIG_NETFILTER_XT_TARGET_CONNMARK=y +CONFIG_NETFILTER_XT_TARGET_CONNSECMARK=y +CONFIG_NETFILTER_XT_TARGET_IDLETIMER=y +CONFIG_NETFILTER_XT_TARGET_MARK=y +CONFIG_NETFILTER_XT_TARGET_NFLOG=y +CONFIG_NETFILTER_XT_TARGET_NFQUEUE=y +CONFIG_NETFILTER_XT_TARGET_TPROXY=y +CONFIG_NETFILTER_XT_TARGET_TRACE=y +CONFIG_NETFILTER_XT_TARGET_SECMARK=y +CONFIG_NETFILTER_XT_TARGET_TCPMSS=y +CONFIG_NETFILTER_XT_MATCH_COMMENT=y +CONFIG_NETFILTER_XT_MATCH_CONNLIMIT=y +CONFIG_NETFILTER_XT_MATCH_CONNMARK=y +CONFIG_NETFILTER_XT_MATCH_CONNTRACK=y +CONFIG_NETFILTER_XT_MATCH_HASHLIMIT=y +CONFIG_NETFILTER_XT_MATCH_HELPER=y +CONFIG_NETFILTER_XT_MATCH_IPRANGE=y +CONFIG_NETFILTER_XT_MATCH_LENGTH=y +CONFIG_NETFILTER_XT_MATCH_LIMIT=y +CONFIG_NETFILTER_XT_MATCH_MAC=y +CONFIG_NETFILTER_XT_MATCH_MARK=y +CONFIG_NETFILTER_XT_MATCH_POLICY=y +CONFIG_NETFILTER_XT_MATCH_PKTTYPE=y +CONFIG_NETFILTER_XT_MATCH_QTAGUID=y +CONFIG_NETFILTER_XT_MATCH_QUOTA=y +CONFIG_NETFILTER_XT_MATCH_QUOTA2=y +CONFIG_NETFILTER_XT_MATCH_SOCKET=y +CONFIG_NETFILTER_XT_MATCH_STATE=y +CONFIG_NETFILTER_XT_MATCH_STATISTIC=y +CONFIG_NETFILTER_XT_MATCH_STRING=y +CONFIG_NETFILTER_XT_MATCH_TIME=y +CONFIG_NETFILTER_XT_MATCH_U32=y +CONFIG_NF_CONNTRACK_IPV4=y +CONFIG_IP_NF_IPTABLES=y +CONFIG_IP_NF_MATCH_AH=y +CONFIG_IP_NF_MATCH_ECN=y +CONFIG_IP_NF_MATCH_TTL=y +CONFIG_IP_NF_FILTER=y +CONFIG_IP_NF_TARGET_REJECT=y +CONFIG_IP_NF_MANGLE=y +CONFIG_IP_NF_RAW=y +CONFIG_IP_NF_SECURITY=y +CONFIG_IP_NF_ARPTABLES=y +CONFIG_IP_NF_ARPFILTER=y +CONFIG_IP_NF_ARP_MANGLE=y +CONFIG_NF_CONNTRACK_IPV6=y +CONFIG_IP6_NF_IPTABLES=y +CONFIG_IP6_NF_FILTER=y +CONFIG_IP6_NF_TARGET_REJECT=y +CONFIG_IP6_NF_MANGLE=y +CONFIG_IP6_NF_RAW=y +CONFIG_NET_SCHED=y +CONFIG_NET_SCH_HTB=y +CONFIG_NET_CLS_U32=y +CONFIG_NET_EMATCH=y +CONFIG_NET_EMATCH_U32=y +CONFIG_NET_CLS_ACT=y +CONFIG_CFG80211=y +CONFIG_MAC80211=y +CONFIG_MAC80211_LEDS=y +CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" +CONFIG_DMA_CMA=y +CONFIG_CONNECTOR=y +CONFIG_BLK_DEV_LOOP=y +CONFIG_BLK_DEV_RAM=y +CONFIG_BLK_DEV_RAM_SIZE=8192 +CONFIG_VIRTIO_BLK=y +CONFIG_BLK_DEV_SD=y +CONFIG_BLK_DEV_SR=y +CONFIG_BLK_DEV_SR_VENDOR=y +CONFIG_CHR_DEV_SG=y +CONFIG_SCSI_CONSTANTS=y +CONFIG_SCSI_SPI_ATTRS=y +CONFIG_SCSI_ISCSI_ATTRS=y +# CONFIG_SCSI_LOWLEVEL is not set +CONFIG_ATA=y +CONFIG_SATA_AHCI=y +CONFIG_ATA_PIIX=y +CONFIG_PATA_AMD=y +CONFIG_PATA_OLDPIIX=y +CONFIG_PATA_SCH=y +CONFIG_PATA_MPIIX=y +CONFIG_ATA_GENERIC=y +CONFIG_MD=y +CONFIG_BLK_DEV_MD=y +CONFIG_BLK_DEV_DM=y +CONFIG_DM_DEBUG=y +CONFIG_DM_CRYPT=y +CONFIG_DM_MIRROR=y +CONFIG_DM_ZERO=y +CONFIG_DM_UEVENT=y +CONFIG_DM_VERITY=y +CONFIG_DM_VERITY_FEC=y +CONFIG_NETDEVICES=y +CONFIG_NETCONSOLE=y +CONFIG_TUN=y +CONFIG_VIRTIO_NET=y +CONFIG_BNX2=y +CONFIG_TIGON3=y +CONFIG_NET_TULIP=y +CONFIG_E100=y +CONFIG_E1000=y +CONFIG_E1000E=y +CONFIG_SKY2=y +CONFIG_NE2K_PCI=y +CONFIG_FORCEDETH=y +CONFIG_8139TOO=y +# CONFIG_8139TOO_PIO is not set +CONFIG_R8169=y +CONFIG_FDDI=y +CONFIG_PPP=y +CONFIG_PPP_BSDCOMP=y +CONFIG_PPP_DEFLATE=y +CONFIG_PPP_MPPE=y +CONFIG_PPPOLAC=y +CONFIG_PPPOPNS=y +CONFIG_USB_USBNET=y +CONFIG_INPUT_POLLDEV=y +# CONFIG_INPUT_MOUSEDEV_PSAUX is not set +CONFIG_INPUT_EVDEV=y +CONFIG_INPUT_KEYRESET=y +# CONFIG_KEYBOARD_ATKBD is not set +CONFIG_KEYBOARD_GOLDFISH_EVENTS=y +# CONFIG_INPUT_MOUSE is not set +CONFIG_INPUT_JOYSTICK=y +CONFIG_JOYSTICK_XPAD=y +CONFIG_JOYSTICK_XPAD_FF=y +CONFIG_JOYSTICK_XPAD_LEDS=y +CONFIG_INPUT_TABLET=y +CONFIG_TABLET_USB_ACECAD=y +CONFIG_TABLET_USB_AIPTEK=y +CONFIG_TABLET_USB_GTCO=y +CONFIG_TABLET_USB_HANWANG=y +CONFIG_TABLET_USB_KBTAB=y +CONFIG_INPUT_TOUCHSCREEN=y +CONFIG_INPUT_MISC=y +CONFIG_INPUT_KEYCHORD=y +CONFIG_INPUT_UINPUT=y +CONFIG_INPUT_GPIO=y +# CONFIG_SERIO is not set +# CONFIG_VT is not set +# CONFIG_LEGACY_PTYS is not set +CONFIG_SERIAL_NONSTANDARD=y +# CONFIG_DEVMEM is not set +# CONFIG_DEVKMEM is not set +CONFIG_SERIAL_8250=y +CONFIG_SERIAL_8250_CONSOLE=y +CONFIG_VIRTIO_CONSOLE=y +CONFIG_NVRAM=y +CONFIG_I2C_I801=y +CONFIG_BATTERY_GOLDFISH=y +CONFIG_WATCHDOG=y +CONFIG_MEDIA_SUPPORT=y +CONFIG_AGP=y +CONFIG_AGP_AMD64=y +CONFIG_AGP_INTEL=y +CONFIG_DRM=y +CONFIG_FB_MODE_HELPERS=y +CONFIG_FB_TILEBLITTING=y +CONFIG_FB_EFI=y +CONFIG_FB_GOLDFISH=y +CONFIG_BACKLIGHT_LCD_SUPPORT=y +# CONFIG_LCD_CLASS_DEVICE is not set +CONFIG_SOUND=y +CONFIG_SND=y +CONFIG_HIDRAW=y +CONFIG_UHID=y +CONFIG_HID_A4TECH=y +CONFIG_HID_ACRUX=y +CONFIG_HID_ACRUX_FF=y +CONFIG_HID_APPLE=y +CONFIG_HID_BELKIN=y +CONFIG_HID_CHERRY=y +CONFIG_HID_CHICONY=y +CONFIG_HID_PRODIKEYS=y +CONFIG_HID_CYPRESS=y +CONFIG_HID_DRAGONRISE=y +CONFIG_DRAGONRISE_FF=y +CONFIG_HID_EMS_FF=y +CONFIG_HID_ELECOM=y +CONFIG_HID_EZKEY=y +CONFIG_HID_HOLTEK=y +CONFIG_HID_KEYTOUCH=y +CONFIG_HID_KYE=y +CONFIG_HID_UCLOGIC=y +CONFIG_HID_WALTOP=y +CONFIG_HID_GYRATION=y +CONFIG_HID_TWINHAN=y +CONFIG_HID_KENSINGTON=y +CONFIG_HID_LCPOWER=y +CONFIG_HID_LOGITECH=y +CONFIG_HID_LOGITECH_DJ=y +CONFIG_LOGITECH_FF=y +CONFIG_LOGIRUMBLEPAD2_FF=y +CONFIG_LOGIG940_FF=y +CONFIG_HID_MAGICMOUSE=y +CONFIG_HID_MICROSOFT=y +CONFIG_HID_MONTEREY=y +CONFIG_HID_MULTITOUCH=y +CONFIG_HID_NTRIG=y +CONFIG_HID_ORTEK=y +CONFIG_HID_PANTHERLORD=y +CONFIG_PANTHERLORD_FF=y +CONFIG_HID_PETALYNX=y +CONFIG_HID_PICOLCD=y +CONFIG_HID_PRIMAX=y +CONFIG_HID_ROCCAT=y +CONFIG_HID_SAITEK=y +CONFIG_HID_SAMSUNG=y +CONFIG_HID_SONY=y +CONFIG_HID_SPEEDLINK=y +CONFIG_HID_SUNPLUS=y +CONFIG_HID_GREENASIA=y +CONFIG_GREENASIA_FF=y +CONFIG_HID_SMARTJOYPLUS=y +CONFIG_SMARTJOYPLUS_FF=y +CONFIG_HID_TIVO=y +CONFIG_HID_TOPSEED=y +CONFIG_HID_THRUSTMASTER=y +CONFIG_HID_WACOM=y +CONFIG_HID_WIIMOTE=y +CONFIG_HID_ZEROPLUS=y +CONFIG_HID_ZYDACRON=y +CONFIG_HID_PID=y +CONFIG_USB_HIDDEV=y +CONFIG_USB_ANNOUNCE_NEW_DEVICES=y +CONFIG_USB_MON=y +CONFIG_USB_EHCI_HCD=y +# CONFIG_USB_EHCI_TT_NEWSCHED is not set +CONFIG_USB_OHCI_HCD=y +CONFIG_USB_UHCI_HCD=y +CONFIG_USB_PRINTER=y +CONFIG_USB_STORAGE=y +CONFIG_USB_OTG_WAKELOCK=y +CONFIG_EDAC=y +CONFIG_RTC_CLASS=y +# CONFIG_RTC_HCTOSYS is not set +CONFIG_DMADEVICES=y +CONFIG_VIRTIO_PCI=y +CONFIG_STAGING=y +CONFIG_ASHMEM=y +CONFIG_ANDROID_LOW_MEMORY_KILLER=y +CONFIG_SYNC=y +CONFIG_SW_SYNC=y +CONFIG_ION=y +CONFIG_GOLDFISH_AUDIO=y +CONFIG_SND_HDA_INTEL=y +CONFIG_GOLDFISH=y +CONFIG_GOLDFISH_PIPE=y +CONFIG_ANDROID=y +CONFIG_ANDROID_BINDER_IPC=y +CONFIG_ISCSI_IBFT_FIND=y +CONFIG_EXT4_FS=y +CONFIG_EXT4_FS_SECURITY=y +CONFIG_QUOTA=y +CONFIG_QUOTA_NETLINK_INTERFACE=y +# CONFIG_PRINT_QUOTA_WARNING is not set +CONFIG_FUSE_FS=y +CONFIG_ISO9660_FS=y +CONFIG_JOLIET=y +CONFIG_ZISOFS=y +CONFIG_MSDOS_FS=y +CONFIG_VFAT_FS=y +CONFIG_PROC_KCORE=y +CONFIG_TMPFS=y +CONFIG_TMPFS_POSIX_ACL=y +CONFIG_HUGETLBFS=y +CONFIG_PSTORE=y +CONFIG_PSTORE_CONSOLE=y +CONFIG_PSTORE_RAM=y +# CONFIG_NETWORK_FILESYSTEMS is not set +CONFIG_NLS_DEFAULT="utf8" +CONFIG_NLS_CODEPAGE_437=y +CONFIG_NLS_ASCII=y +CONFIG_NLS_ISO8859_1=y +CONFIG_NLS_UTF8=y +CONFIG_PRINTK_TIME=y +CONFIG_DEBUG_INFO=y +# CONFIG_ENABLE_WARN_DEPRECATED is not set +# CONFIG_ENABLE_MUST_CHECK is not set +# CONFIG_UNUSED_SYMBOLS is not set +CONFIG_MAGIC_SYSRQ=y +CONFIG_DEBUG_MEMORY_INIT=y +CONFIG_PANIC_TIMEOUT=5 +CONFIG_SCHEDSTATS=y +CONFIG_TIMER_STATS=y +CONFIG_SCHED_TRACER=y +CONFIG_BLK_DEV_IO_TRACE=y +CONFIG_PROVIDE_OHCI1394_DMA_INIT=y +CONFIG_KEYS=y +CONFIG_SECURITY=y +CONFIG_SECURITY_NETWORK=y +CONFIG_SECURITY_SELINUX=y +CONFIG_CRYPTO_TWOFISH=y +CONFIG_ASYMMETRIC_KEY_TYPE=y +CONFIG_ASYMMETRIC_PUBLIC_KEY_SUBTYPE=y +CONFIG_X509_CERTIFICATE_PARSER=y +CONFIG_PKCS7_MESSAGE_PARSER=y +CONFIG_PKCS7_TEST_KEY=y +# CONFIG_VIRTUALIZATION is not set +CONFIG_CRC_T10DIF=y From 598f10d649a8d866ff39defbfc0c264dc9cd5294 Mon Sep 17 00:00:00 2001 From: Lingfeng Yang Date: Mon, 13 Jun 2016 09:24:07 -0700 Subject: [PATCH 25/61] ANDROID: goldfish: Add goldfish sync driver This is kernel driver for controlling the Goldfish sync device on the host. It is used to maintain ordering in critical OpenGL state changes while using GPU emulation. The guest open()'s the Goldfish sync device to create a context for possibly maintaining sync timeline and fences. There is a 1:1 correspondence between such sync contexts and OpenGL contexts in the guest that need synchronization (which in turn, is anything involving swapping buffers, SurfaceFlinger, or Hardware Composer). The ioctl QUEUE_WORK takes a handle to a sync object and attempts to tell the host GPU to wait on the sync object and deal with signaling it. It possibly outputs a fence FD on which the Android systems that use them (GLConsumer, SurfaceFlinger, anything employing EGL_ANDROID_native_fence_sync) can use to wait. Design decisions and work log: - New approach is to have the guest issue ioctls that trigger host wait, and then host increments timeline. - We need the host's sync object handle and sync thread handle as the necessary information for that. - ioctl() from guest can work simultaneously with the interrupt handling for commands from host. - optimization: don't write back on timeline inc - Change spin lock design to be much more lightweight; do not call sw_sync functions or loop too long anywhere. - Send read/write commands in batches to minimize guest/host transitions. - robustness: BUG if we will overrun the cmd buffer. - robustness: return fd -1 if we cannot get an unused fd. - correctness: remove global mutex - cleanup pass done, incl. but not limited to: - removal of clear_upto and - switching to devm_*** This is part of a sequential, multi-CL change: external/qemu: https://android-review.googlesource.com/239442 <- host-side device's host interface https://android-review.googlesource.com/221593 https://android-review.googlesource.com/248563 https://android-review.googlesource.com/248564 https://android-review.googlesource.com/223032 external/qemu-android: https://android-review.googlesource.com/238790 <- host-side device implementation kernel/goldfish: https://android-review.googlesource.com/232631 <- needed https://android-review.googlesource.com/238399 <- this CL Also squash following bug fixes from android-goldfish-3.18 branch. b44d486 goldfish_sync: provide a signal to detect reboot ad1f597 goldfish_sync: fix stalls by avoiding early kfree() de208e8 [goldfish-sync] Fix possible race between kernel and user space Change-Id: I22f8a0e824717a7e751b1b0e1b461455501502b6 --- arch/x86/configs/i386_ranchu_defconfig | 1 + arch/x86/configs/x86_64_ranchu_defconfig | 1 + drivers/staging/goldfish/Kconfig | 6 + drivers/staging/goldfish/Makefile | 5 + drivers/staging/goldfish/goldfish_sync.c | 987 +++++++++++++++++++++++ 5 files changed, 1000 insertions(+) create mode 100644 drivers/staging/goldfish/goldfish_sync.c diff --git a/arch/x86/configs/i386_ranchu_defconfig b/arch/x86/configs/i386_ranchu_defconfig index b0e4e0ed4b11..0206eb8cfb61 100644 --- a/arch/x86/configs/i386_ranchu_defconfig +++ b/arch/x86/configs/i386_ranchu_defconfig @@ -363,6 +363,7 @@ CONFIG_SYNC=y CONFIG_SW_SYNC=y CONFIG_ION=y CONFIG_GOLDFISH_AUDIO=y +CONFIG_GOLDFISH_SYNC=y CONFIG_SND_HDA_INTEL=y CONFIG_GOLDFISH=y CONFIG_GOLDFISH_PIPE=y diff --git a/arch/x86/configs/x86_64_ranchu_defconfig b/arch/x86/configs/x86_64_ranchu_defconfig index 8dae21ed3ede..dd389774bacb 100644 --- a/arch/x86/configs/x86_64_ranchu_defconfig +++ b/arch/x86/configs/x86_64_ranchu_defconfig @@ -360,6 +360,7 @@ CONFIG_SYNC=y CONFIG_SW_SYNC=y CONFIG_ION=y CONFIG_GOLDFISH_AUDIO=y +CONFIG_GOLDFISH_SYNC=y CONFIG_SND_HDA_INTEL=y CONFIG_GOLDFISH=y CONFIG_GOLDFISH_PIPE=y diff --git a/drivers/staging/goldfish/Kconfig b/drivers/staging/goldfish/Kconfig index 4e094602437c..c579141a7bed 100644 --- a/drivers/staging/goldfish/Kconfig +++ b/drivers/staging/goldfish/Kconfig @@ -4,6 +4,12 @@ config GOLDFISH_AUDIO ---help--- Emulated audio channel for the Goldfish Android Virtual Device +config GOLDFISH_SYNC + tristate "Goldfish AVD Sync Driver" + depends on GOLDFISH + ---help--- + Emulated sync fences for the Goldfish Android Virtual Device + config MTD_GOLDFISH_NAND tristate "Goldfish NAND device" depends on GOLDFISH diff --git a/drivers/staging/goldfish/Makefile b/drivers/staging/goldfish/Makefile index dec34ad58162..0cf525588210 100644 --- a/drivers/staging/goldfish/Makefile +++ b/drivers/staging/goldfish/Makefile @@ -4,3 +4,8 @@ obj-$(CONFIG_GOLDFISH_AUDIO) += goldfish_audio.o obj-$(CONFIG_MTD_GOLDFISH_NAND) += goldfish_nand.o + +# and sync + +ccflags-y := -Idrivers/staging/android +obj-$(CONFIG_GOLDFISH_SYNC) += goldfish_sync.o diff --git a/drivers/staging/goldfish/goldfish_sync.c b/drivers/staging/goldfish/goldfish_sync.c new file mode 100644 index 000000000000..ba8def29901e --- /dev/null +++ b/drivers/staging/goldfish/goldfish_sync.c @@ -0,0 +1,987 @@ +/* + * Copyright (C) 2016 Google, Inc. + * + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#include +#include +#include + +#include +#include + +#include "sw_sync.h" +#include "sync.h" + +#define ERR(...) printk(KERN_ERR __VA_ARGS__); + +#define INFO(...) printk(KERN_INFO __VA_ARGS__); + +#define DPRINT(...) pr_debug(__VA_ARGS__); + +#define DTRACE() DPRINT("%s: enter", __func__) + +/* The Goldfish sync driver is designed to provide a interface + * between the underlying host's sync device and the kernel's + * sw_sync. + * The purpose of the device/driver is to enable lightweight + * creation and signaling of timelines and fences + * in order to synchronize the guest with host-side graphics events. + * + * Each time the interrupt trips, the driver + * may perform a sw_sync operation. + */ + +/* The operations are: */ + +/* Ready signal - used to mark when irq should lower */ +#define CMD_SYNC_READY 0 + +/* Create a new timeline. writes timeline handle */ +#define CMD_CREATE_SYNC_TIMELINE 1 + +/* Create a fence object. reads timeline handle and time argument. + * Writes fence fd to the SYNC_REG_HANDLE register. */ +#define CMD_CREATE_SYNC_FENCE 2 + +/* Increments timeline. reads timeline handle and time argument */ +#define CMD_SYNC_TIMELINE_INC 3 + +/* Destroys a timeline. reads timeline handle */ +#define CMD_DESTROY_SYNC_TIMELINE 4 + +/* Starts a wait on the host with + * the given glsync object and sync thread handle. */ +#define CMD_TRIGGER_HOST_WAIT 5 + +/* The register layout is: */ + +#define SYNC_REG_BATCH_COMMAND 0x00 /* host->guest batch commands */ +#define SYNC_REG_BATCH_GUESTCOMMAND 0x04 /* guest->host batch commands */ +#define SYNC_REG_BATCH_COMMAND_ADDR 0x08 /* communicate physical address of host->guest batch commands */ +#define SYNC_REG_BATCH_COMMAND_ADDR_HIGH 0x0c /* 64-bit part */ +#define SYNC_REG_BATCH_GUESTCOMMAND_ADDR 0x10 /* communicate physical address of guest->host commands */ +#define SYNC_REG_BATCH_GUESTCOMMAND_ADDR_HIGH 0x14 /* 64-bit part */ +#define SYNC_REG_INIT 0x18 /* signals that the device has been probed */ + +/* There is an ioctl associated with goldfish sync driver. + * Make it conflict with ioctls that are not likely to be used + * in the emulator. + * + * '@' 00-0F linux/radeonfb.h conflict! + * '@' 00-0F drivers/video/aty/aty128fb.c conflict! + */ +#define GOLDFISH_SYNC_IOC_MAGIC '@' + +#define GOLDFISH_SYNC_IOC_QUEUE_WORK _IOWR(GOLDFISH_SYNC_IOC_MAGIC, 0, struct goldfish_sync_ioctl_info) + +/* The above definitions (command codes, register layout, ioctl definitions) + * need to be in sync with the following files: + * + * Host-side (emulator): + * external/qemu/android/emulation/goldfish_sync.h + * external/qemu-android/hw/misc/goldfish_sync.c + * + * Guest-side (system image): + * device/generic/goldfish-opengl/system/egl/goldfish_sync.h + * device/generic/goldfish/ueventd.ranchu.rc + * platform/build/target/board/generic/sepolicy/file_contexts + */ +struct goldfish_sync_hostcmd { + /* sorted for alignment */ + uint64_t handle; + uint64_t hostcmd_handle; + uint32_t cmd; + uint32_t time_arg; +}; + +struct goldfish_sync_guestcmd { + uint64_t host_command; /* uint64_t for alignment */ + uint64_t glsync_handle; + uint64_t thread_handle; + uint64_t guest_timeline_handle; +}; + +#define GOLDFISH_SYNC_MAX_CMDS 64 + +struct goldfish_sync_state { + char __iomem *reg_base; + int irq; + + /* Spinlock protects |to_do| / |to_do_end|. */ + spinlock_t lock; + /* |mutex_lock| protects all concurrent access + * to timelines for both kernel and user space. */ + struct mutex mutex_lock; + + /* Buffer holding commands issued from host. */ + struct goldfish_sync_hostcmd to_do[GOLDFISH_SYNC_MAX_CMDS]; + uint32_t to_do_end; + + /* Addresses for the reading or writing + * of individual commands. The host can directly write + * to |batch_hostcmd| (and then this driver immediately + * copies contents to |to_do|). This driver either replies + * through |batch_hostcmd| or simply issues a + * guest->host command through |batch_guestcmd|. + */ + struct goldfish_sync_hostcmd *batch_hostcmd; + struct goldfish_sync_guestcmd *batch_guestcmd; + + /* Used to give this struct itself to a work queue + * function for executing actual sync commands. */ + struct work_struct work_item; +}; + +static struct goldfish_sync_state global_sync_state[1]; + +struct goldfish_sync_timeline_obj { + struct sw_sync_timeline *sw_sync_tl; + uint32_t current_time; + /* We need to be careful about when we deallocate + * this |goldfish_sync_timeline_obj| struct. + * In order to ensure proper cleanup, we need to + * consider the triggered host-side wait that may + * still be in flight when the guest close()'s a + * goldfish_sync device's sync context fd (and + * destroys the |sw_sync_tl| field above). + * The host-side wait may raise IRQ + * and tell the kernel to increment the timeline _after_ + * the |sw_sync_tl| has already been set to null. + * + * From observations on OpenGL apps and CTS tests, this + * happens at some very low probability upon context + * destruction or process close, but it does happen + * and it needs to be handled properly. Otherwise, + * if we clean up the surrounding |goldfish_sync_timeline_obj| + * too early, any |handle| field of any host->guest command + * might not even point to a null |sw_sync_tl| field, + * but to garbage memory or even a reclaimed |sw_sync_tl|. + * If we do not count such "pending waits" and kfree the object + * immediately upon |goldfish_sync_timeline_destroy|, + * we might get mysterous RCU stalls after running a long + * time because the garbage memory that is being read + * happens to be interpretable as a |spinlock_t| struct + * that is currently in the locked state. + * + * To track when to free the |goldfish_sync_timeline_obj| + * itself, we maintain a kref. + * The kref essentially counts the timeline itself plus + * the number of waits in flight. kref_init/kref_put + * are issued on + * |goldfish_sync_timeline_create|/|goldfish_sync_timeline_destroy| + * and kref_get/kref_put are issued on + * |goldfish_sync_fence_create|/|goldfish_sync_timeline_inc|. + * + * The timeline is destroyed after reference count + * reaches zero, which would happen after + * |goldfish_sync_timeline_destroy| and all pending + * |goldfish_sync_timeline_inc|'s are fulfilled. + * + * NOTE (1): We assume that |fence_create| and + * |timeline_inc| calls are 1:1, otherwise the kref scheme + * will not work. This is a valid assumption as long + * as the host-side virtual device implementation + * does not insert any timeline increments + * that we did not trigger from here. + * + * NOTE (2): The use of kref by itself requires no locks, + * but this does not mean everything works without locks. + * Related timeline operations do require a lock of some sort, + * or at least are not proven to work without it. + * In particualr, we assume that all the operations + * done on the |kref| field above are done in contexts where + * |global_sync_state->mutex_lock| is held. Do not + * remove that lock until everything is proven to work + * without it!!! */ + struct kref kref; +}; + +/* We will call |delete_timeline_obj| when the last reference count + * of the kref is decremented. This deletes the sw_sync + * timeline object along with the wrapper itself. */ +static void delete_timeline_obj(struct kref* kref) { + struct goldfish_sync_timeline_obj* obj = + container_of(kref, struct goldfish_sync_timeline_obj, kref); + + sync_timeline_destroy(&obj->sw_sync_tl->obj); + obj->sw_sync_tl = NULL; + kfree(obj); +} + +static uint64_t gensym_ctr; +static void gensym(char *dst) +{ + sprintf(dst, "goldfish_sync:gensym:%llu", gensym_ctr); + gensym_ctr++; +} + +/* |goldfish_sync_timeline_create| assumes that |global_sync_state->mutex_lock| + * is held. */ +static struct goldfish_sync_timeline_obj* +goldfish_sync_timeline_create(void) +{ + + char timeline_name[256]; + struct sw_sync_timeline *res_sync_tl = NULL; + struct goldfish_sync_timeline_obj *res; + + DTRACE(); + + gensym(timeline_name); + + res_sync_tl = sw_sync_timeline_create(timeline_name); + if (!res_sync_tl) { + ERR("Failed to create sw_sync timeline."); + return NULL; + } + + res = kzalloc(sizeof(struct goldfish_sync_timeline_obj), GFP_KERNEL); + res->sw_sync_tl = res_sync_tl; + res->current_time = 0; + kref_init(&res->kref); + + DPRINT("new timeline_obj=0x%p", res); + return res; +} + +/* |goldfish_sync_fence_create| assumes that |global_sync_state->mutex_lock| + * is held. */ +static int +goldfish_sync_fence_create(struct goldfish_sync_timeline_obj *obj, + uint32_t val) +{ + + int fd; + char fence_name[256]; + struct sync_pt *syncpt = NULL; + struct sync_fence *sync_obj = NULL; + struct sw_sync_timeline *tl; + + DTRACE(); + + if (!obj) return -1; + + tl = obj->sw_sync_tl; + + syncpt = sw_sync_pt_create(tl, val); + if (!syncpt) { + ERR("could not create sync point! " + "sync_timeline=0x%p val=%d", + tl, val); + return -1; + } + + fd = get_unused_fd_flags(O_CLOEXEC); + if (fd < 0) { + ERR("could not get unused fd for sync fence. " + "errno=%d", fd); + goto err_cleanup_pt; + } + + gensym(fence_name); + + sync_obj = sync_fence_create(fence_name, syncpt); + if (!sync_obj) { + ERR("could not create sync fence! " + "sync_timeline=0x%p val=%d sync_pt=0x%p", + tl, val, syncpt); + goto err_cleanup_fd_pt; + } + + DPRINT("installing sync fence into fd %d sync_obj=0x%p", fd, sync_obj); + sync_fence_install(sync_obj, fd); + kref_get(&obj->kref); + + return fd; + +err_cleanup_fd_pt: + put_unused_fd(fd); +err_cleanup_pt: + sync_pt_free(syncpt); + return -1; +} + +/* |goldfish_sync_timeline_inc| assumes that |global_sync_state->mutex_lock| + * is held. */ +static void +goldfish_sync_timeline_inc(struct goldfish_sync_timeline_obj *obj, uint32_t inc) +{ + DTRACE(); + /* Just give up if someone else nuked the timeline. + * Whoever it was won't care that it doesn't get signaled. */ + if (!obj) return; + + DPRINT("timeline_obj=0x%p", obj); + sw_sync_timeline_inc(obj->sw_sync_tl, inc); + DPRINT("incremented timeline. increment max_time"); + obj->current_time += inc; + + /* Here, we will end up deleting the timeline object if it + * turns out that this call was a pending increment after + * |goldfish_sync_timeline_destroy| was called. */ + kref_put(&obj->kref, delete_timeline_obj); + DPRINT("done"); +} + +/* |goldfish_sync_timeline_destroy| assumes + * that |global_sync_state->mutex_lock| is held. */ +static void +goldfish_sync_timeline_destroy(struct goldfish_sync_timeline_obj *obj) +{ + DTRACE(); + /* See description of |goldfish_sync_timeline_obj| for why we + * should not immediately destroy |obj| */ + kref_put(&obj->kref, delete_timeline_obj); +} + +static inline void +goldfish_sync_cmd_queue(struct goldfish_sync_state *sync_state, + uint32_t cmd, + uint64_t handle, + uint32_t time_arg, + uint64_t hostcmd_handle) +{ + struct goldfish_sync_hostcmd *to_add; + + DTRACE(); + + BUG_ON(sync_state->to_do_end == GOLDFISH_SYNC_MAX_CMDS); + + to_add = &sync_state->to_do[sync_state->to_do_end]; + + to_add->cmd = cmd; + to_add->handle = handle; + to_add->time_arg = time_arg; + to_add->hostcmd_handle = hostcmd_handle; + + sync_state->to_do_end += 1; +} + +static inline void +goldfish_sync_hostcmd_reply(struct goldfish_sync_state *sync_state, + uint32_t cmd, + uint64_t handle, + uint32_t time_arg, + uint64_t hostcmd_handle) +{ + unsigned long irq_flags; + struct goldfish_sync_hostcmd *batch_hostcmd = + sync_state->batch_hostcmd; + + DTRACE(); + + spin_lock_irqsave(&sync_state->lock, irq_flags); + + batch_hostcmd->cmd = cmd; + batch_hostcmd->handle = handle; + batch_hostcmd->time_arg = time_arg; + batch_hostcmd->hostcmd_handle = hostcmd_handle; + writel(0, sync_state->reg_base + SYNC_REG_BATCH_COMMAND); + + spin_unlock_irqrestore(&sync_state->lock, irq_flags); +} + +static inline void +goldfish_sync_send_guestcmd(struct goldfish_sync_state *sync_state, + uint32_t cmd, + uint64_t glsync_handle, + uint64_t thread_handle, + uint64_t timeline_handle) +{ + unsigned long irq_flags; + struct goldfish_sync_guestcmd *batch_guestcmd = + sync_state->batch_guestcmd; + + DTRACE(); + + spin_lock_irqsave(&sync_state->lock, irq_flags); + + batch_guestcmd->host_command = (uint64_t)cmd; + batch_guestcmd->glsync_handle = (uint64_t)glsync_handle; + batch_guestcmd->thread_handle = (uint64_t)thread_handle; + batch_guestcmd->guest_timeline_handle = (uint64_t)timeline_handle; + writel(0, sync_state->reg_base + SYNC_REG_BATCH_GUESTCOMMAND); + + spin_unlock_irqrestore(&sync_state->lock, irq_flags); +} + +/* |goldfish_sync_interrupt| handles IRQ raises from the virtual device. + * In the context of OpenGL, this interrupt will fire whenever we need + * to signal a fence fd in the guest, with the command + * |CMD_SYNC_TIMELINE_INC|. + * However, because this function will be called in an interrupt context, + * it is necessary to do the actual work of signaling off of interrupt context. + * The shared work queue is used for this purpose. At the end when + * all pending commands are intercepted by the interrupt handler, + * we call |schedule_work|, which will later run the actual + * desired sync command in |goldfish_sync_work_item_fn|. + */ +static irqreturn_t goldfish_sync_interrupt(int irq, void *dev_id) +{ + + struct goldfish_sync_state *sync_state = dev_id; + + uint32_t nextcmd; + uint32_t command_r; + uint64_t handle_rw; + uint32_t time_r; + uint64_t hostcmd_handle_rw; + + int count = 0; + + DTRACE(); + + sync_state = dev_id; + + spin_lock(&sync_state->lock); + + for (;;) { + + readl(sync_state->reg_base + SYNC_REG_BATCH_COMMAND); + nextcmd = sync_state->batch_hostcmd->cmd; + + if (nextcmd == 0) + break; + + command_r = nextcmd; + handle_rw = sync_state->batch_hostcmd->handle; + time_r = sync_state->batch_hostcmd->time_arg; + hostcmd_handle_rw = sync_state->batch_hostcmd->hostcmd_handle; + + goldfish_sync_cmd_queue( + sync_state, + command_r, + handle_rw, + time_r, + hostcmd_handle_rw); + + count++; + } + + spin_unlock(&sync_state->lock); + + schedule_work(&sync_state->work_item); + + return (count == 0) ? IRQ_NONE : IRQ_HANDLED; +} + +/* |goldfish_sync_work_item_fn| does the actual work of servicing + * host->guest sync commands. This function is triggered whenever + * the IRQ for the goldfish sync device is raised. Once it starts + * running, it grabs the contents of the buffer containing the + * commands it needs to execute (there may be multiple, because + * our IRQ is active high and not edge triggered), and then + * runs all of them one after the other. + */ +static void goldfish_sync_work_item_fn(struct work_struct *input) +{ + + struct goldfish_sync_state *sync_state; + int sync_fence_fd; + + struct goldfish_sync_timeline_obj *timeline; + uint64_t timeline_ptr; + + uint64_t hostcmd_handle; + + uint32_t cmd; + uint64_t handle; + uint32_t time_arg; + + struct goldfish_sync_hostcmd *todo; + uint32_t todo_end; + + unsigned long irq_flags; + + struct goldfish_sync_hostcmd to_run[GOLDFISH_SYNC_MAX_CMDS]; + uint32_t i = 0; + + sync_state = container_of(input, struct goldfish_sync_state, work_item); + + mutex_lock(&sync_state->mutex_lock); + + spin_lock_irqsave(&sync_state->lock, irq_flags); { + + todo_end = sync_state->to_do_end; + + DPRINT("num sync todos: %u", sync_state->to_do_end); + + for (i = 0; i < todo_end; i++) + to_run[i] = sync_state->to_do[i]; + + /* We expect that commands will come in at a slow enough rate + * so that incoming items will not be more than + * GOLDFISH_SYNC_MAX_CMDS. + * + * This is because the way the sync device is used, + * it's only for managing buffer data transfers per frame, + * with a sequential dependency between putting things in + * to_do and taking them out. Once a set of commands is + * queued up in to_do, the user of the device waits for + * them to be processed before queuing additional commands, + * which limits the rate at which commands come in + * to the rate at which we take them out here. + * + * We also don't expect more than MAX_CMDS to be issued + * at once; there is a correspondence between + * which buffers need swapping to the (display / buffer queue) + * to particular commands, and we don't expect there to be + * enough display or buffer queues in operation at once + * to overrun GOLDFISH_SYNC_MAX_CMDS. + */ + sync_state->to_do_end = 0; + + } spin_unlock_irqrestore(&sync_state->lock, irq_flags); + + for (i = 0; i < todo_end; i++) { + DPRINT("todo index: %u", i); + + todo = &to_run[i]; + + cmd = todo->cmd; + + handle = (uint64_t)todo->handle; + time_arg = todo->time_arg; + hostcmd_handle = (uint64_t)todo->hostcmd_handle; + + DTRACE(); + + timeline = (struct goldfish_sync_timeline_obj *)(uintptr_t)handle; + + switch (cmd) { + case CMD_SYNC_READY: + break; + case CMD_CREATE_SYNC_TIMELINE: + DPRINT("exec CMD_CREATE_SYNC_TIMELINE: " + "handle=0x%llx time_arg=%d", + handle, time_arg); + timeline = goldfish_sync_timeline_create(); + timeline_ptr = (uintptr_t)timeline; + goldfish_sync_hostcmd_reply(sync_state, CMD_CREATE_SYNC_TIMELINE, + timeline_ptr, + 0, + hostcmd_handle); + DPRINT("sync timeline created: %p", timeline); + break; + case CMD_CREATE_SYNC_FENCE: + DPRINT("exec CMD_CREATE_SYNC_FENCE: " + "handle=0x%llx time_arg=%d", + handle, time_arg); + sync_fence_fd = goldfish_sync_fence_create(timeline, time_arg); + goldfish_sync_hostcmd_reply(sync_state, CMD_CREATE_SYNC_FENCE, + sync_fence_fd, + 0, + hostcmd_handle); + break; + case CMD_SYNC_TIMELINE_INC: + DPRINT("exec CMD_SYNC_TIMELINE_INC: " + "handle=0x%llx time_arg=%d", + handle, time_arg); + goldfish_sync_timeline_inc(timeline, time_arg); + break; + case CMD_DESTROY_SYNC_TIMELINE: + DPRINT("exec CMD_DESTROY_SYNC_TIMELINE: " + "handle=0x%llx time_arg=%d", + handle, time_arg); + goldfish_sync_timeline_destroy(timeline); + break; + } + DPRINT("Done executing sync command"); + } + mutex_unlock(&sync_state->mutex_lock); +} + +/* Guest-side interface: file operations */ + +/* Goldfish sync context and ioctl info. + * + * When a sync context is created by open()-ing the goldfish sync device, we + * create a sync context (|goldfish_sync_context|). + * + * Currently, the only data required to track is the sync timeline itself + * along with the current time, which are all packed up in the + * |goldfish_sync_timeline_obj| field. We use a |goldfish_sync_context| + * as the filp->private_data. + * + * Next, when a sync context user requests that work be queued and a fence + * fd provided, we use the |goldfish_sync_ioctl_info| struct, which holds + * information about which host handles to touch for this particular + * queue-work operation. We need to know about the host-side sync thread + * and the particular host-side GLsync object. We also possibly write out + * a file descriptor. + */ +struct goldfish_sync_context { + struct goldfish_sync_timeline_obj *timeline; +}; + +struct goldfish_sync_ioctl_info { + uint64_t host_glsync_handle_in; + uint64_t host_syncthread_handle_in; + int fence_fd_out; +}; + +static int goldfish_sync_open(struct inode *inode, struct file *file) +{ + + struct goldfish_sync_context *sync_context; + + DTRACE(); + + mutex_lock(&global_sync_state->mutex_lock); + + sync_context = kzalloc(sizeof(struct goldfish_sync_context), GFP_KERNEL); + + if (sync_context == NULL) { + ERR("Creation of goldfish sync context failed!"); + mutex_unlock(&global_sync_state->mutex_lock); + return -ENOMEM; + } + + sync_context->timeline = NULL; + + file->private_data = sync_context; + + DPRINT("successfully create a sync context @0x%p", sync_context); + + mutex_unlock(&global_sync_state->mutex_lock); + + return 0; +} + +static int goldfish_sync_release(struct inode *inode, struct file *file) +{ + + struct goldfish_sync_context *sync_context; + + DTRACE(); + + mutex_lock(&global_sync_state->mutex_lock); + + sync_context = file->private_data; + + if (sync_context->timeline) + goldfish_sync_timeline_destroy(sync_context->timeline); + + sync_context->timeline = NULL; + + kfree(sync_context); + + mutex_unlock(&global_sync_state->mutex_lock); + + return 0; +} + +/* |goldfish_sync_ioctl| is the guest-facing interface of goldfish sync + * and is used in conjunction with eglCreateSyncKHR to queue up the + * actual work of waiting for the EGL sync command to complete, + * possibly returning a fence fd to the guest. + */ +static long goldfish_sync_ioctl(struct file *file, + unsigned int cmd, + unsigned long arg) +{ + struct goldfish_sync_context *sync_context_data; + struct goldfish_sync_timeline_obj *timeline; + int fd_out; + struct goldfish_sync_ioctl_info ioctl_data; + + DTRACE(); + + sync_context_data = file->private_data; + fd_out = -1; + + switch (cmd) { + case GOLDFISH_SYNC_IOC_QUEUE_WORK: + + DPRINT("exec GOLDFISH_SYNC_IOC_QUEUE_WORK"); + + mutex_lock(&global_sync_state->mutex_lock); + + if (copy_from_user(&ioctl_data, + (void __user *)arg, + sizeof(ioctl_data))) { + ERR("Failed to copy memory for ioctl_data from user."); + mutex_unlock(&global_sync_state->mutex_lock); + return -EFAULT; + } + + if (ioctl_data.host_syncthread_handle_in == 0) { + DPRINT("Error: zero host syncthread handle!!!"); + mutex_unlock(&global_sync_state->mutex_lock); + return -EFAULT; + } + + if (!sync_context_data->timeline) { + DPRINT("no timeline yet, create one."); + sync_context_data->timeline = goldfish_sync_timeline_create(); + DPRINT("timeline: 0x%p", &sync_context_data->timeline); + } + + timeline = sync_context_data->timeline; + fd_out = goldfish_sync_fence_create(timeline, + timeline->current_time + 1); + DPRINT("Created fence with fd %d and current time %u (timeline: 0x%p)", + fd_out, + sync_context_data->timeline->current_time + 1, + sync_context_data->timeline); + + ioctl_data.fence_fd_out = fd_out; + + if (copy_to_user((void __user *)arg, + &ioctl_data, + sizeof(ioctl_data))) { + DPRINT("Error, could not copy to user!!!"); + + sys_close(fd_out); + /* We won't be doing an increment, kref_put immediately. */ + kref_put(&timeline->kref, delete_timeline_obj); + mutex_unlock(&global_sync_state->mutex_lock); + return -EFAULT; + } + + /* We are now about to trigger a host-side wait; + * accumulate on |pending_waits|. */ + goldfish_sync_send_guestcmd(global_sync_state, + CMD_TRIGGER_HOST_WAIT, + ioctl_data.host_glsync_handle_in, + ioctl_data.host_syncthread_handle_in, + (uint64_t)(uintptr_t)(sync_context_data->timeline)); + + mutex_unlock(&global_sync_state->mutex_lock); + return 0; + default: + return -ENOTTY; + } +} + +static const struct file_operations goldfish_sync_fops = { + .owner = THIS_MODULE, + .open = goldfish_sync_open, + .release = goldfish_sync_release, + .unlocked_ioctl = goldfish_sync_ioctl, + .compat_ioctl = goldfish_sync_ioctl, +}; + +static struct miscdevice goldfish_sync_device = { + .name = "goldfish_sync", + .fops = &goldfish_sync_fops, +}; + + +static bool setup_verify_batch_cmd_addr(struct goldfish_sync_state *sync_state, + void *batch_addr, + uint32_t addr_offset, + uint32_t addr_offset_high) +{ + uint64_t batch_addr_phys; + uint32_t batch_addr_phys_test_lo; + uint32_t batch_addr_phys_test_hi; + + if (!batch_addr) { + ERR("Could not use batch command address!"); + return false; + } + + batch_addr_phys = virt_to_phys(batch_addr); + writel((uint32_t)(batch_addr_phys), + sync_state->reg_base + addr_offset); + writel((uint32_t)(batch_addr_phys >> 32), + sync_state->reg_base + addr_offset_high); + + batch_addr_phys_test_lo = + readl(sync_state->reg_base + addr_offset); + batch_addr_phys_test_hi = + readl(sync_state->reg_base + addr_offset_high); + + if (virt_to_phys(batch_addr) != + (((uint64_t)batch_addr_phys_test_hi << 32) | + batch_addr_phys_test_lo)) { + ERR("Invalid batch command address!"); + return false; + } + + return true; +} + +int goldfish_sync_probe(struct platform_device *pdev) +{ + struct resource *ioresource; + struct goldfish_sync_state *sync_state = global_sync_state; + int status; + + DTRACE(); + + sync_state->to_do_end = 0; + + spin_lock_init(&sync_state->lock); + mutex_init(&sync_state->mutex_lock); + + platform_set_drvdata(pdev, sync_state); + + ioresource = platform_get_resource(pdev, IORESOURCE_MEM, 0); + if (ioresource == NULL) { + ERR("platform_get_resource failed"); + return -ENODEV; + } + + sync_state->reg_base = devm_ioremap(&pdev->dev, ioresource->start, PAGE_SIZE); + if (sync_state->reg_base == NULL) { + ERR("Could not ioremap"); + return -ENOMEM; + } + + sync_state->irq = platform_get_irq(pdev, 0); + if (sync_state->irq < 0) { + ERR("Could not platform_get_irq"); + return -ENODEV; + } + + status = devm_request_irq(&pdev->dev, + sync_state->irq, + goldfish_sync_interrupt, + IRQF_SHARED, + pdev->name, + sync_state); + if (status) { + ERR("request_irq failed"); + return -ENODEV; + } + + INIT_WORK(&sync_state->work_item, + goldfish_sync_work_item_fn); + + misc_register(&goldfish_sync_device); + + /* Obtain addresses for batch send/recv of commands. */ + { + struct goldfish_sync_hostcmd *batch_addr_hostcmd; + struct goldfish_sync_guestcmd *batch_addr_guestcmd; + + batch_addr_hostcmd = devm_kzalloc(&pdev->dev, sizeof(struct goldfish_sync_hostcmd), + GFP_KERNEL); + batch_addr_guestcmd = devm_kzalloc(&pdev->dev, sizeof(struct goldfish_sync_guestcmd), + GFP_KERNEL); + + if (!setup_verify_batch_cmd_addr(sync_state, + batch_addr_hostcmd, + SYNC_REG_BATCH_COMMAND_ADDR, + SYNC_REG_BATCH_COMMAND_ADDR_HIGH)) { + ERR("goldfish_sync: Could not setup batch command address"); + return -ENODEV; + } + + if (!setup_verify_batch_cmd_addr(sync_state, + batch_addr_guestcmd, + SYNC_REG_BATCH_GUESTCOMMAND_ADDR, + SYNC_REG_BATCH_GUESTCOMMAND_ADDR_HIGH)) { + ERR("goldfish_sync: Could not setup batch guest command address"); + return -ENODEV; + } + + sync_state->batch_hostcmd = batch_addr_hostcmd; + sync_state->batch_guestcmd = batch_addr_guestcmd; + } + + INFO("goldfish_sync: Initialized goldfish sync device"); + + writel(0, sync_state->reg_base + SYNC_REG_INIT); + + return 0; +} + +static int goldfish_sync_remove(struct platform_device *pdev) +{ + struct goldfish_sync_state *sync_state = global_sync_state; + + DTRACE(); + + misc_deregister(&goldfish_sync_device); + memset(sync_state, 0, sizeof(struct goldfish_sync_state)); + return 0; +} + +static const struct of_device_id goldfish_sync_of_match[] = { + { .compatible = "google,goldfish-sync", }, + {}, +}; +MODULE_DEVICE_TABLE(of, goldfish_sync_of_match); + +static const struct acpi_device_id goldfish_sync_acpi_match[] = { + { "GFSH0006", 0 }, + { }, +}; + +MODULE_DEVICE_TABLE(acpi, goldfish_sync_acpi_match); + +static struct platform_driver goldfish_sync = { + .probe = goldfish_sync_probe, + .remove = goldfish_sync_remove, + .driver = { + .name = "goldfish_sync", + .of_match_table = goldfish_sync_of_match, + .acpi_match_table = ACPI_PTR(goldfish_sync_acpi_match), + } +}; + +module_platform_driver(goldfish_sync); + +MODULE_AUTHOR("Google, Inc."); +MODULE_DESCRIPTION("Android QEMU Sync Driver"); +MODULE_LICENSE("GPL"); +MODULE_VERSION("1.0"); + +/* This function is only to run a basic test of sync framework. + * It creates a timeline and fence object whose signal point is at 1. + * The timeline is incremented, and we use the sync framework's + * sync_fence_wait on that fence object. If everything works out, + * we should not hang in the wait and return immediately. + * There is no way to explicitly run this test yet, but it + * can be used by inserting it at the end of goldfish_sync_probe. + */ +void test_kernel_sync(void) +{ + struct goldfish_sync_timeline_obj *test_timeline; + int test_fence_fd; + + DTRACE(); + + DPRINT("test sw_sync"); + + test_timeline = goldfish_sync_timeline_create(); + DPRINT("sw_sync_timeline_create -> 0x%p", test_timeline); + + test_fence_fd = goldfish_sync_fence_create(test_timeline, 1); + DPRINT("sync_fence_create -> %d", test_fence_fd); + + DPRINT("incrementing test timeline"); + goldfish_sync_timeline_inc(test_timeline, 1); + + DPRINT("test waiting (should NOT hang)"); + sync_fence_wait( + sync_fence_fdget(test_fence_fd), -1); + + DPRINT("test waiting (afterward)"); +} From 56a56370dbc394cd36f194a108bf4e7bc1876691 Mon Sep 17 00:00:00 2001 From: Yurii Zubrytskyi Date: Wed, 4 May 2016 13:05:38 -0700 Subject: [PATCH 26/61] ANDROID: goldfish_pipe: bugfixes and performance improvements. Combine following patches from android-goldfish-3.18 branch: c0f015a [pipe] Fix the pipe driver for x64 platform + correct pages count 48e6bf5 [pipe] Use get_use_pages_fast() which is possibly faster fb20f13 [goldfish] More pages in goldfish pipe f180e6d goldfish_pipe: Return from read_write on signal and EIO 3dec3b7 [pipe] Fix a minor leak in setup_access_params_addr() Change-Id: I1041fd65d7faaec123e6cedd3dbbc5a2fbb86c4d --- drivers/platform/goldfish/goldfish_pipe.c | 123 ++++++++++++++-------- 1 file changed, 77 insertions(+), 46 deletions(-) diff --git a/drivers/platform/goldfish/goldfish_pipe.c b/drivers/platform/goldfish/goldfish_pipe.c index 3215a33cf4fe..55929ed58ea3 100644 --- a/drivers/platform/goldfish/goldfish_pipe.c +++ b/drivers/platform/goldfish/goldfish_pipe.c @@ -109,6 +109,16 @@ #define PIPE_WAKE_READ (1 << 1) /* pipe can now be read from */ #define PIPE_WAKE_WRITE (1 << 2) /* pipe can now be written to */ +#define MAX_PAGES_TO_GRAB 32 + +#define DEBUG 0 + +#if DEBUG +#define DPRINT(...) { printk(KERN_ERR __VA_ARGS__); } +#else +#define DPRINT(...) +#endif + struct access_params { unsigned long channel; u32 size; @@ -231,8 +241,10 @@ static int setup_access_params_addr(struct platform_device *pdev, if (valid_batchbuffer_addr(dev, aps)) { dev->aps = aps; return 0; - } else + } else { + devm_kfree(&pdev->dev, aps); return -1; + } } /* A value that will not be set by qemu emulator */ @@ -269,6 +281,7 @@ static ssize_t goldfish_pipe_read_write(struct file *filp, char __user *buffer, struct goldfish_pipe *pipe = filp->private_data; struct goldfish_pipe_dev *dev = pipe->dev; unsigned long address, address_end; + struct page* pages[MAX_PAGES_TO_GRAB] = {}; int count = 0, ret = -EINVAL; /* If the emulator already closed the pipe, no need to go further */ @@ -292,45 +305,59 @@ static ssize_t goldfish_pipe_read_write(struct file *filp, char __user *buffer, address_end = address + bufflen; while (address < address_end) { - unsigned long page_end = (address & PAGE_MASK) + PAGE_SIZE; - unsigned long next = page_end < address_end ? page_end - : address_end; - unsigned long avail = next - address; - int status, wakeBit; - - struct page *page; - - /* Either vaddr or paddr depending on the device version */ - unsigned long xaddr; + unsigned long page_end = (address & PAGE_MASK) + PAGE_SIZE; + unsigned long next, avail; + int status, wakeBit, page_i, num_contiguous_pages; + long first_page, last_page, requested_pages; + unsigned long xaddr, xaddr_prev, xaddr_i; /* - * We grab the pages on a page-by-page basis in case user - * space gives us a potentially huge buffer but the read only - * returns a small amount, then there's no need to pin that - * much memory to the process. + * Attempt to grab multiple physically contiguous pages. */ - down_read(¤t->mm->mmap_sem); - ret = get_user_pages(current, current->mm, address, 1, - !is_write, 0, &page, NULL); - up_read(¤t->mm->mmap_sem); - if (ret < 0) - return ret; - - if (dev->version) { - /* Device version 1 or newer (qemu-android) expects the - * physical address. */ - xaddr = page_to_phys(page) | (address & ~PAGE_MASK); - } else { - /* Device version 0 (classic emulator) expects the - * virtual address. */ - xaddr = address; + first_page = address & PAGE_MASK; + last_page = (address_end - 1) & PAGE_MASK; + requested_pages = ((last_page - first_page) >> PAGE_SHIFT) + 1; + if (requested_pages > MAX_PAGES_TO_GRAB) { + requested_pages = MAX_PAGES_TO_GRAB; } + ret = get_user_pages_fast(first_page, requested_pages, + !is_write, pages); + + DPRINT("%s: requested pages: %d %d\n", __FUNCTION__, ret, requested_pages); + if (ret == 0) { + DPRINT("%s: error: (requested pages == 0) (wanted %d)\n", + __FUNCTION__, requested_pages); + return ret; + } + if (ret < 0) { + DPRINT("%s: (requested pages < 0) %d \n", + __FUNCTION__, requested_pages); + return ret; + } + + xaddr = page_to_phys(pages[0]) | (address & ~PAGE_MASK); + xaddr_prev = xaddr; + num_contiguous_pages = ret == 0 ? 0 : 1; + for (page_i = 1; page_i < ret; page_i++) { + xaddr_i = page_to_phys(pages[page_i]) | (address & ~PAGE_MASK); + if (xaddr_i == xaddr_prev + PAGE_SIZE) { + page_end += PAGE_SIZE; + xaddr_prev = xaddr_i; + num_contiguous_pages++; + } else { + DPRINT("%s: discontinuous page boundary: %d pages instead\n", + __FUNCTION__, page_i); + break; + } + } + next = page_end < address_end ? page_end : address_end; + avail = next - address; /* Now, try to transfer the bytes in the current page */ spin_lock_irqsave(&dev->lock, irq_flags); if (access_with_param(dev, - is_write ? CMD_WRITE_BUFFER : CMD_READ_BUFFER, - xaddr, avail, pipe, &status)) { + is_write ? CMD_WRITE_BUFFER : CMD_READ_BUFFER, + xaddr, avail, pipe, &status)) { gf_write_ptr(pipe, dev->base + PIPE_REG_CHANNEL, dev->base + PIPE_REG_CHANNEL_HIGH); writel(avail, dev->base + PIPE_REG_SIZE); @@ -343,9 +370,13 @@ static ssize_t goldfish_pipe_read_write(struct file *filp, char __user *buffer, } spin_unlock_irqrestore(&dev->lock, irq_flags); - if (status > 0 && !is_write) - set_page_dirty(page); - put_page(page); + for (page_i = 0; page_i < ret; page_i++) { + if (status > 0 && !is_write && + page_i < num_contiguous_pages) { + set_page_dirty(pages[page_i]); + } + put_page(pages[page_i]); + } if (status > 0) { /* Correct transfer */ count += status; @@ -367,7 +398,7 @@ static ssize_t goldfish_pipe_read_write(struct file *filp, char __user *buffer, */ if (status != PIPE_ERROR_AGAIN) pr_info_ratelimited("goldfish_pipe: backend returned error %d on %s\n", - status, is_write ? "write" : "read"); + status, is_write ? "write" : "read"); ret = 0; break; } @@ -377,7 +408,7 @@ static ssize_t goldfish_pipe_read_write(struct file *filp, char __user *buffer, * non-blocking mode, just return the error code. */ if (status != PIPE_ERROR_AGAIN || - (filp->f_flags & O_NONBLOCK) != 0) { + (filp->f_flags & O_NONBLOCK) != 0) { ret = goldfish_pipe_error_convert(status); break; } @@ -391,7 +422,7 @@ static ssize_t goldfish_pipe_read_write(struct file *filp, char __user *buffer, /* Tell the emulator we're going to wait for a wake event */ goldfish_cmd(pipe, - is_write ? CMD_WAKE_ON_WRITE : CMD_WAKE_ON_READ); + is_write ? CMD_WAKE_ON_WRITE : CMD_WAKE_ON_READ); /* Unlock the pipe, then wait for the wake signal */ mutex_unlock(&pipe->lock); @@ -399,15 +430,11 @@ static ssize_t goldfish_pipe_read_write(struct file *filp, char __user *buffer, while (test_bit(wakeBit, &pipe->flags)) { if (wait_event_interruptible( pipe->wake_queue, - !test_bit(wakeBit, &pipe->flags))) { - ret = -ERESTARTSYS; - break; - } + !test_bit(wakeBit, &pipe->flags))) + return -ERESTARTSYS; - if (test_bit(BIT_CLOSED_ON_HOST, &pipe->flags)) { - ret = -EIO; - break; - } + if (test_bit(BIT_CLOSED_ON_HOST, &pipe->flags)) + return -EIO; } /* Try to re-acquire the lock */ @@ -543,6 +570,8 @@ static int goldfish_pipe_open(struct inode *inode, struct file *file) pipe->dev = dev; mutex_init(&pipe->lock); + DPRINT("%s: call. pipe_dev pipe_dev=0x%lx new_pipe_addr=0x%lx file=0x%lx\n", __FUNCTION__, pipe_dev, pipe, file); + // spin lock init, write head of list, i guess init_waitqueue_head(&pipe->wake_queue); /* @@ -565,6 +594,7 @@ static int goldfish_pipe_release(struct inode *inode, struct file *filp) { struct goldfish_pipe *pipe = filp->private_data; + DPRINT("%s: call. pipe=0x%lx file=0x%lx\n", __FUNCTION__, pipe, filp); /* The guest is closing the channel, so tell the emulator right now */ goldfish_cmd(pipe, CMD_CLOSE); kfree(pipe); @@ -589,6 +619,7 @@ static struct miscdevice goldfish_pipe_device = { static int goldfish_pipe_probe(struct platform_device *pdev) { + DPRINT("%s: call. platform_device=0x%lx\n", __FUNCTION__, pdev); int err; struct resource *r; struct goldfish_pipe_dev *dev = pipe_dev; From dfd7878dd19e7ced8910f866f68212ee79bc4349 Mon Sep 17 00:00:00 2001 From: Yurii Zubrytskyi Date: Fri, 29 Jul 2016 10:51:46 -0700 Subject: [PATCH 27/61] ANDROID: goldfish_pipe: An implementation of more parallel pipe This is a driver code for a redesigned android pipe. Currently it works for x86 and x64 emulators with the following performance results: ADB push to /dev/null, Ubuntu, 400 MB file, times are for 1/10/100 parallel adb commands x86 adb push: (4.4s / 11.5s / 2m10s) -> (2.8s / 6s / 51s) x64 adb push: (7s / 15s / (too long, 6m+) -> (2.7s / 6.2s / 52s) ADB pull and push to /data/ have the same %% of speedup More importantly, I don't see any signs of slowdowns when run in parallel with Antutu benchmark, so it is definitely making much better job at multithreading. The code features dynamic host detection: old emulator gets the previous version of the pipe driver code. Combine follow patch from android-goldfish-3.10 b543285 [pipe] Increase the default pipe buffers size, make it configurable Signed-off-by: "Yurii Zubrytskyi" Change-Id: I140d506204cab6e78dd503e5a43abc8886e4ffff --- drivers/platform/goldfish/Makefile | 2 +- drivers/platform/goldfish/goldfish_pipe.c | 168 +--- drivers/platform/goldfish/goldfish_pipe.h | 91 ++ drivers/platform/goldfish/goldfish_pipe_v2.c | 888 +++++++++++++++++++ 4 files changed, 1005 insertions(+), 144 deletions(-) create mode 100644 drivers/platform/goldfish/goldfish_pipe.h create mode 100644 drivers/platform/goldfish/goldfish_pipe_v2.c diff --git a/drivers/platform/goldfish/Makefile b/drivers/platform/goldfish/Makefile index d3487125838c..e53ae2fc717b 100644 --- a/drivers/platform/goldfish/Makefile +++ b/drivers/platform/goldfish/Makefile @@ -2,4 +2,4 @@ # Makefile for Goldfish platform specific drivers # obj-$(CONFIG_GOLDFISH_BUS) += pdev_bus.o -obj-$(CONFIG_GOLDFISH_PIPE) += goldfish_pipe.o +obj-$(CONFIG_GOLDFISH_PIPE) += goldfish_pipe.o goldfish_pipe_v2.o diff --git a/drivers/platform/goldfish/goldfish_pipe.c b/drivers/platform/goldfish/goldfish_pipe.c index 55929ed58ea3..cf7ce97e7346 100644 --- a/drivers/platform/goldfish/goldfish_pipe.c +++ b/drivers/platform/goldfish/goldfish_pipe.c @@ -15,51 +15,11 @@ * */ -/* This source file contains the implementation of a special device driver - * that intends to provide a *very* fast communication channel between the - * guest system and the QEMU emulator. - * - * Usage from the guest is simply the following (error handling simplified): - * - * int fd = open("/dev/qemu_pipe",O_RDWR); - * .... write() or read() through the pipe. - * - * This driver doesn't deal with the exact protocol used during the session. - * It is intended to be as simple as something like: - * - * // do this _just_ after opening the fd to connect to a specific - * // emulator service. - * const char* msg = ""; - * if (write(fd, msg, strlen(msg)+1) < 0) { - * ... could not connect to service - * close(fd); - * } - * - * // after this, simply read() and write() to communicate with the - * // service. Exact protocol details left as an exercise to the reader. - * - * This driver is very fast because it doesn't copy any data through - * intermediate buffers, since the emulator is capable of translating - * guest user addresses into host ones. - * - * Note that we must however ensure that each user page involved in the - * exchange is properly mapped during a transfer. +/* This source file contains the implementation of the legacy version of + * a goldfish pipe device driver. See goldfish_pipe_v2.c for the current + * version. */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include +#include "goldfish_pipe.h" /* * IMPORTANT: The following constants must match the ones used and defined @@ -119,6 +79,14 @@ #define DPRINT(...) #endif +/* This data type models a given pipe instance */ +struct goldfish_pipe { + struct goldfish_pipe_dev *dev; + struct mutex lock; + unsigned long flags; + wait_queue_head_t wake_queue; +}; + struct access_params { unsigned long channel; u32 size; @@ -129,29 +97,6 @@ struct access_params { u32 flags; }; -/* The global driver data. Holds a reference to the i/o page used to - * communicate with the emulator, and a wake queue for blocked tasks - * waiting to be awoken. - */ -struct goldfish_pipe_dev { - spinlock_t lock; - unsigned char __iomem *base; - struct access_params *aps; - int irq; - u32 version; -}; - -static struct goldfish_pipe_dev pipe_dev[1]; - -/* This data type models a given pipe instance */ -struct goldfish_pipe { - struct goldfish_pipe_dev *dev; - struct mutex lock; - unsigned long flags; - wait_queue_head_t wake_queue; -}; - - /* Bit flags for the 'flags' field */ enum { BIT_CLOSED_ON_HOST = 0, /* pipe closed by host */ @@ -323,7 +268,8 @@ static ssize_t goldfish_pipe_read_write(struct file *filp, char __user *buffer, ret = get_user_pages_fast(first_page, requested_pages, !is_write, pages); - DPRINT("%s: requested pages: %d %d\n", __FUNCTION__, ret, requested_pages); + DPRINT("%s: requested pages: %d %d %p\n", __FUNCTION__, + ret, requested_pages, first_page); if (ret == 0) { DPRINT("%s: error: (requested pages == 0) (wanted %d)\n", __FUNCTION__, requested_pages); @@ -611,97 +557,33 @@ static const struct file_operations goldfish_pipe_fops = { .release = goldfish_pipe_release, }; -static struct miscdevice goldfish_pipe_device = { +static struct miscdevice goldfish_pipe_dev = { .minor = MISC_DYNAMIC_MINOR, .name = "goldfish_pipe", .fops = &goldfish_pipe_fops, }; -static int goldfish_pipe_probe(struct platform_device *pdev) +int goldfish_pipe_device_init_v1(struct platform_device *pdev) { - DPRINT("%s: call. platform_device=0x%lx\n", __FUNCTION__, pdev); - int err; - struct resource *r; struct goldfish_pipe_dev *dev = pipe_dev; - - /* not thread safe, but this should not happen */ - WARN_ON(dev->base != NULL); - - spin_lock_init(&dev->lock); - - r = platform_get_resource(pdev, IORESOURCE_MEM, 0); - if (r == NULL || resource_size(r) < PAGE_SIZE) { - dev_err(&pdev->dev, "can't allocate i/o page\n"); - return -EINVAL; - } - dev->base = devm_ioremap(&pdev->dev, r->start, PAGE_SIZE); - if (dev->base == NULL) { - dev_err(&pdev->dev, "ioremap failed\n"); - return -EINVAL; - } - - r = platform_get_resource(pdev, IORESOURCE_IRQ, 0); - if (r == NULL) { - err = -EINVAL; - goto error; - } - dev->irq = r->start; - - err = devm_request_irq(&pdev->dev, dev->irq, goldfish_pipe_interrupt, + int err = devm_request_irq(&pdev->dev, dev->irq, goldfish_pipe_interrupt, IRQF_SHARED, "goldfish_pipe", dev); if (err) { - dev_err(&pdev->dev, "unable to allocate IRQ\n"); - goto error; + dev_err(&pdev->dev, "unable to allocate IRQ for v1\n"); + return err; } - err = misc_register(&goldfish_pipe_device); + err = misc_register(&goldfish_pipe_dev); if (err) { - dev_err(&pdev->dev, "unable to register device\n"); - goto error; + dev_err(&pdev->dev, "unable to register v1 device\n"); + return err; } + setup_access_params_addr(pdev, dev); - - /* Although the pipe device in the classic Android emulator does not - * recognize the 'version' register, it won't treat this as an error - * either and will simply return 0, which is fine. */ - dev->version = readl(dev->base + PIPE_REG_VERSION); return 0; - -error: - dev->base = NULL; - return err; } -static int goldfish_pipe_remove(struct platform_device *pdev) +void goldfish_pipe_device_deinit_v1(struct platform_device *pdev) { - struct goldfish_pipe_dev *dev = pipe_dev; - misc_deregister(&goldfish_pipe_device); - dev->base = NULL; - return 0; + misc_deregister(&goldfish_pipe_dev); } - -static const struct acpi_device_id goldfish_pipe_acpi_match[] = { - { "GFSH0003", 0 }, - { }, -}; -MODULE_DEVICE_TABLE(acpi, goldfish_pipe_acpi_match); - -static const struct of_device_id goldfish_pipe_of_match[] = { - { .compatible = "generic,android-pipe", }, - {}, -}; -MODULE_DEVICE_TABLE(of, goldfish_pipe_of_match); - -static struct platform_driver goldfish_pipe = { - .probe = goldfish_pipe_probe, - .remove = goldfish_pipe_remove, - .driver = { - .name = "goldfish_pipe", - .of_match_table = goldfish_pipe_of_match, - .acpi_match_table = ACPI_PTR(goldfish_pipe_acpi_match), - } -}; - -module_platform_driver(goldfish_pipe); -MODULE_AUTHOR("David Turner "); -MODULE_LICENSE("GPL"); diff --git a/drivers/platform/goldfish/goldfish_pipe.h b/drivers/platform/goldfish/goldfish_pipe.h new file mode 100644 index 000000000000..9b75a51dba24 --- /dev/null +++ b/drivers/platform/goldfish/goldfish_pipe.h @@ -0,0 +1,91 @@ +/* + * Copyright (C) 2016 Google, Inc. + * + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ +#ifndef GOLDFISH_PIPE_H +#define GOLDFISH_PIPE_H + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +/* Initialize the legacy version of the pipe device driver */ +int goldfish_pipe_device_init_v1(struct platform_device *pdev); + +/* Deinitialize the legacy version of the pipe device driver */ +void goldfish_pipe_device_deinit_v1(struct platform_device *pdev); + +/* Forward declarations for the device struct */ +struct goldfish_pipe; +struct goldfish_pipe_device_buffers; + +/* The global driver data. Holds a reference to the i/o page used to + * communicate with the emulator, and a wake queue for blocked tasks + * waiting to be awoken. + */ +struct goldfish_pipe_dev { + /* + * Global device spinlock. Protects the following members: + * - pipes, pipes_capacity + * - [*pipes, *pipes + pipes_capacity) - array data + * - first_signalled_pipe, + * goldfish_pipe::prev_signalled, + * goldfish_pipe::next_signalled, + * goldfish_pipe::signalled_flags - all singnalled-related fields, + * in all allocated pipes + * - open_command_params - PIPE_CMD_OPEN-related buffers + * + * It looks like a lot of different fields, but the trick is that the only + * operation that happens often is the signalled pipes array manipulation. + * That's why it's OK for now to keep the rest of the fields under the same + * lock. If we notice too much contention because of PIPE_CMD_OPEN, + * then we should add a separate lock there. + */ + spinlock_t lock; + + /* + * Array of the pipes of |pipes_capacity| elements, + * indexed by goldfish_pipe::id + */ + struct goldfish_pipe **pipes; + u32 pipes_capacity; + + /* Pointers to the buffers host uses for interaction with this driver */ + struct goldfish_pipe_dev_buffers *buffers; + + /* Head of a doubly linked list of signalled pipes */ + struct goldfish_pipe *first_signalled_pipe; + + /* Some device-specific data */ + int irq; + int version; + unsigned char __iomem *base; + + /* v1-specific access parameters */ + struct access_params *aps; +}; + +extern struct goldfish_pipe_dev pipe_dev[1]; + +#endif /* GOLDFISH_PIPE_H */ diff --git a/drivers/platform/goldfish/goldfish_pipe_v2.c b/drivers/platform/goldfish/goldfish_pipe_v2.c new file mode 100644 index 000000000000..bdb039bc7dde --- /dev/null +++ b/drivers/platform/goldfish/goldfish_pipe_v2.c @@ -0,0 +1,888 @@ +/* + * Copyright (C) 2012 Intel, Inc. + * Copyright (C) 2013 Intel, Inc. + * Copyright (C) 2014 Linaro Limited + * Copyright (C) 2011-2016 Google, Inc. + * + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ + +/* This source file contains the implementation of a special device driver + * that intends to provide a *very* fast communication channel between the + * guest system and the QEMU emulator. + * + * Usage from the guest is simply the following (error handling simplified): + * + * int fd = open("/dev/qemu_pipe",O_RDWR); + * .... write() or read() through the pipe. + * + * This driver doesn't deal with the exact protocol used during the session. + * It is intended to be as simple as something like: + * + * // do this _just_ after opening the fd to connect to a specific + * // emulator service. + * const char* msg = ""; + * if (write(fd, msg, strlen(msg)+1) < 0) { + * ... could not connect to service + * close(fd); + * } + * + * // after this, simply read() and write() to communicate with the + * // service. Exact protocol details left as an exercise to the reader. + * + * This driver is very fast because it doesn't copy any data through + * intermediate buffers, since the emulator is capable of translating + * guest user addresses into host ones. + * + * Note that we must however ensure that each user page involved in the + * exchange is properly mapped during a transfer. + */ + +#include "goldfish_pipe.h" + + +/* + * Update this when something changes in the driver's behavior so the host + * can benefit from knowing it + */ +enum { + PIPE_DRIVER_VERSION = 2, + PIPE_CURRENT_DEVICE_VERSION = 2 +}; + +/* + * IMPORTANT: The following constants must match the ones used and defined + * in external/qemu/hw/goldfish_pipe.c in the Android source tree. + */ + +/* List of bitflags returned in status of CMD_POLL command */ +enum PipePollFlags { + PIPE_POLL_IN = 1 << 0, + PIPE_POLL_OUT = 1 << 1, + PIPE_POLL_HUP = 1 << 2 +}; + +/* Possible status values used to signal errors - see goldfish_pipe_error_convert */ +enum PipeErrors { + PIPE_ERROR_INVAL = -1, + PIPE_ERROR_AGAIN = -2, + PIPE_ERROR_NOMEM = -3, + PIPE_ERROR_IO = -4 +}; + +/* Bit-flags used to signal events from the emulator */ +enum PipeWakeFlags { + PIPE_WAKE_CLOSED = 1 << 0, /* emulator closed pipe */ + PIPE_WAKE_READ = 1 << 1, /* pipe can now be read from */ + PIPE_WAKE_WRITE = 1 << 2 /* pipe can now be written to */ +}; + +/* Bit flags for the 'flags' field */ +enum PipeFlagsBits { + BIT_CLOSED_ON_HOST = 0, /* pipe closed by host */ + BIT_WAKE_ON_WRITE = 1, /* want to be woken on writes */ + BIT_WAKE_ON_READ = 2, /* want to be woken on reads */ +}; + +enum PipeRegs { + PIPE_REG_CMD = 0, + + PIPE_REG_SIGNAL_BUFFER_HIGH = 4, + PIPE_REG_SIGNAL_BUFFER = 8, + PIPE_REG_SIGNAL_BUFFER_COUNT = 12, + + PIPE_REG_OPEN_BUFFER_HIGH = 20, + PIPE_REG_OPEN_BUFFER = 24, + + PIPE_REG_VERSION = 36, + + PIPE_REG_GET_SIGNALLED = 48, +}; + +enum PipeCmdCode { + PIPE_CMD_OPEN = 1, /* to be used by the pipe device itself */ + PIPE_CMD_CLOSE, + PIPE_CMD_POLL, + PIPE_CMD_WRITE, + PIPE_CMD_WAKE_ON_WRITE, + PIPE_CMD_READ, + PIPE_CMD_WAKE_ON_READ, + + /* + * TODO(zyy): implement a deferred read/write execution to allow parallel + * processing of pipe operations on the host. + */ + PIPE_CMD_WAKE_ON_DONE_IO, +}; + +enum { + MAX_BUFFERS_PER_COMMAND = 336, + MAX_SIGNALLED_PIPES = 64, + INITIAL_PIPES_CAPACITY = 64 +}; + +struct goldfish_pipe_dev; +struct goldfish_pipe; +struct goldfish_pipe_command; + +/* A per-pipe command structure, shared with the host */ +struct goldfish_pipe_command { + s32 cmd; /* PipeCmdCode, guest -> host */ + s32 id; /* pipe id, guest -> host */ + s32 status; /* command execution status, host -> guest */ + s32 reserved; /* to pad to 64-bit boundary */ + union { + /* Parameters for PIPE_CMD_{READ,WRITE} */ + struct { + u32 buffers_count; /* number of buffers, guest -> host */ + s32 consumed_size; /* number of consumed bytes, host -> guest */ + u64 ptrs[MAX_BUFFERS_PER_COMMAND]; /* buffer pointers, guest -> host */ + u32 sizes[MAX_BUFFERS_PER_COMMAND]; /* buffer sizes, guest -> host */ + } rw_params; + }; +}; + +/* A single signalled pipe information */ +struct signalled_pipe_buffer { + u32 id; + u32 flags; +}; + +/* Parameters for the PIPE_CMD_OPEN command */ +struct open_command_param { + u64 command_buffer_ptr; + u32 rw_params_max_count; +}; + +/* Device-level set of buffers shared with the host */ +struct goldfish_pipe_dev_buffers { + struct open_command_param open_command_params; + struct signalled_pipe_buffer signalled_pipe_buffers[MAX_SIGNALLED_PIPES]; +}; + +/* This data type models a given pipe instance */ +struct goldfish_pipe { + u32 id; /* pipe ID - index into goldfish_pipe_dev::pipes array */ + unsigned long flags; /* The wake flags pipe is waiting for + * Note: not protected with any lock, uses atomic operations + * and barriers to make it thread-safe. + */ + unsigned long signalled_flags; /* wake flags host have signalled, + * - protected by goldfish_pipe_dev::lock */ + + struct goldfish_pipe_command *command_buffer; /* A pointer to command buffer */ + + /* doubly linked list of signalled pipes, protected by goldfish_pipe_dev::lock */ + struct goldfish_pipe *prev_signalled; + struct goldfish_pipe *next_signalled; + + /* + * A pipe's own lock. Protects the following: + * - *command_buffer - makes sure a command can safely write its parameters + * to the host and read the results back. + */ + struct mutex lock; + + wait_queue_head_t wake_queue; /* A wake queue for sleeping until host signals an event */ + struct goldfish_pipe_dev *dev; /* Pointer to the parent goldfish_pipe_dev instance */ +}; + +struct goldfish_pipe_dev pipe_dev[1] = {}; + +static int goldfish_cmd_locked(struct goldfish_pipe *pipe, enum PipeCmdCode cmd) +{ + pipe->command_buffer->cmd = cmd; + pipe->command_buffer->status = PIPE_ERROR_INVAL; /* failure by default */ + writel(pipe->id, pipe->dev->base + PIPE_REG_CMD); + return pipe->command_buffer->status; +} + +static int goldfish_cmd(struct goldfish_pipe *pipe, enum PipeCmdCode cmd) +{ + int status; + if (mutex_lock_interruptible(&pipe->lock)) + return PIPE_ERROR_IO; + status = goldfish_cmd_locked(pipe, cmd); + mutex_unlock(&pipe->lock); + return status; +} + +/* + * This function converts an error code returned by the emulator through + * the PIPE_REG_STATUS i/o register into a valid negative errno value. + */ +static int goldfish_pipe_error_convert(int status) +{ + switch (status) { + case PIPE_ERROR_AGAIN: + return -EAGAIN; + case PIPE_ERROR_NOMEM: + return -ENOMEM; + case PIPE_ERROR_IO: + return -EIO; + default: + return -EINVAL; + } +} + +static int pin_user_pages(unsigned long first_page, unsigned long last_page, + unsigned last_page_size, int is_write, + struct page *pages[MAX_BUFFERS_PER_COMMAND], unsigned *iter_last_page_size) +{ + int ret; + int requested_pages = ((last_page - first_page) >> PAGE_SHIFT) + 1; + if (requested_pages > MAX_BUFFERS_PER_COMMAND) { + requested_pages = MAX_BUFFERS_PER_COMMAND; + *iter_last_page_size = PAGE_SIZE; + } else { + *iter_last_page_size = last_page_size; + } + + ret = get_user_pages_fast( + first_page, requested_pages, !is_write, pages); + if (ret <= 0) + return -EFAULT; + if (ret < requested_pages) + *iter_last_page_size = PAGE_SIZE; + return ret; + +} + +static void release_user_pages(struct page **pages, int pages_count, + int is_write, s32 consumed_size) +{ + int i; + for (i = 0; i < pages_count; i++) { + if (!is_write && consumed_size > 0) { + set_page_dirty(pages[i]); + } + put_page(pages[i]); + } +} + +/* Populate the call parameters, merging adjacent pages together */ +static void populate_rw_params( + struct page **pages, int pages_count, + unsigned long address, unsigned long address_end, + unsigned long first_page, unsigned long last_page, + unsigned iter_last_page_size, int is_write, + struct goldfish_pipe_command *command) +{ + /* + * Process the first page separately - it's the only page that + * needs special handling for its start address. + */ + unsigned long xaddr = page_to_phys(pages[0]); + unsigned long xaddr_prev = xaddr; + int buffer_idx = 0; + int i = 1; + int size_on_page = first_page == last_page + ? (int)(address_end - address) + : (PAGE_SIZE - (address & ~PAGE_MASK)); + command->rw_params.ptrs[0] = (u64)(xaddr | (address & ~PAGE_MASK)); + command->rw_params.sizes[0] = size_on_page; + for (; i < pages_count; ++i) { + xaddr = page_to_phys(pages[i]); + size_on_page = (i == pages_count - 1) ? iter_last_page_size : PAGE_SIZE; + if (xaddr == xaddr_prev + PAGE_SIZE) { + command->rw_params.sizes[buffer_idx] += size_on_page; + } else { + ++buffer_idx; + command->rw_params.ptrs[buffer_idx] = (u64)xaddr; + command->rw_params.sizes[buffer_idx] = size_on_page; + } + xaddr_prev = xaddr; + } + command->rw_params.buffers_count = buffer_idx + 1; +} + +static int transfer_max_buffers(struct goldfish_pipe* pipe, + unsigned long address, unsigned long address_end, int is_write, + unsigned long last_page, unsigned int last_page_size, + s32* consumed_size, int* status) +{ + struct page *pages[MAX_BUFFERS_PER_COMMAND]; + unsigned long first_page = address & PAGE_MASK; + unsigned int iter_last_page_size; + int pages_count = pin_user_pages(first_page, last_page, + last_page_size, is_write, + pages, &iter_last_page_size); + if (pages_count < 0) + return pages_count; + + /* Serialize access to the pipe command buffers */ + if (mutex_lock_interruptible(&pipe->lock)) + return -ERESTARTSYS; + + populate_rw_params(pages, pages_count, address, address_end, + first_page, last_page, iter_last_page_size, is_write, + pipe->command_buffer); + + /* Transfer the data */ + *status = goldfish_cmd_locked(pipe, + is_write ? PIPE_CMD_WRITE : PIPE_CMD_READ); + + *consumed_size = pipe->command_buffer->rw_params.consumed_size; + + mutex_unlock(&pipe->lock); + + release_user_pages(pages, pages_count, is_write, *consumed_size); + + return 0; +} + +static int wait_for_host_signal(struct goldfish_pipe *pipe, int is_write) +{ + u32 wakeBit = is_write ? BIT_WAKE_ON_WRITE : BIT_WAKE_ON_READ; + set_bit(wakeBit, &pipe->flags); + + /* Tell the emulator we're going to wait for a wake event */ + (void)goldfish_cmd(pipe, + is_write ? PIPE_CMD_WAKE_ON_WRITE : PIPE_CMD_WAKE_ON_READ); + + while (test_bit(wakeBit, &pipe->flags)) { + if (wait_event_interruptible( + pipe->wake_queue, + !test_bit(wakeBit, &pipe->flags))) + return -ERESTARTSYS; + + if (test_bit(BIT_CLOSED_ON_HOST, &pipe->flags)) + return -EIO; + } + + return 0; +} + +static ssize_t goldfish_pipe_read_write(struct file *filp, + char __user *buffer, size_t bufflen, int is_write) +{ + struct goldfish_pipe *pipe = filp->private_data; + int count = 0, ret = -EINVAL; + unsigned long address, address_end, last_page; + unsigned int last_page_size; + + /* If the emulator already closed the pipe, no need to go further */ + if (unlikely(test_bit(BIT_CLOSED_ON_HOST, &pipe->flags))) + return -EIO; + /* Null reads or writes succeeds */ + if (unlikely(bufflen == 0)) + return 0; + /* Check the buffer range for access */ + if (unlikely(!access_ok(is_write ? VERIFY_WRITE : VERIFY_READ, + buffer, bufflen))) + return -EFAULT; + + address = (unsigned long)buffer; + address_end = address + bufflen; + last_page = (address_end - 1) & PAGE_MASK; + last_page_size = ((address_end - 1) & ~PAGE_MASK) + 1; + + while (address < address_end) { + s32 consumed_size; + int status; + ret = transfer_max_buffers(pipe, address, address_end, is_write, + last_page, last_page_size, &consumed_size, &status); + if (ret < 0) + break; + + if (consumed_size > 0) { + /* No matter what's the status, we've transfered something */ + count += consumed_size; + address += consumed_size; + } + if (status > 0) + continue; + if (status == 0) { + /* EOF */ + ret = 0; + break; + } + if (count > 0) { + /* + * An error occured, but we already transfered + * something on one of the previous iterations. + * Just return what we already copied and log this + * err. + */ + if (status != PIPE_ERROR_AGAIN) + pr_info_ratelimited("goldfish_pipe: backend error %d on %s\n", + status, is_write ? "write" : "read"); + break; + } + + /* + * If the error is not PIPE_ERROR_AGAIN, or if we are in + * non-blocking mode, just return the error code. + */ + if (status != PIPE_ERROR_AGAIN || (filp->f_flags & O_NONBLOCK) != 0) { + ret = goldfish_pipe_error_convert(status); + break; + } + + status = wait_for_host_signal(pipe, is_write); + if (status < 0) + return status; + } + + if (count > 0) + return count; + return ret; +} + +static ssize_t goldfish_pipe_read(struct file *filp, char __user *buffer, + size_t bufflen, loff_t *ppos) +{ + return goldfish_pipe_read_write(filp, buffer, bufflen, /* is_write */ 0); +} + +static ssize_t goldfish_pipe_write(struct file *filp, + const char __user *buffer, size_t bufflen, + loff_t *ppos) +{ + return goldfish_pipe_read_write(filp, + /* cast away the const */(char __user *)buffer, bufflen, + /* is_write */ 1); +} + +static unsigned int goldfish_pipe_poll(struct file *filp, poll_table *wait) +{ + struct goldfish_pipe *pipe = filp->private_data; + unsigned int mask = 0; + int status; + + poll_wait(filp, &pipe->wake_queue, wait); + + status = goldfish_cmd(pipe, PIPE_CMD_POLL); + if (status < 0) { + return -ERESTARTSYS; + } + + if (status & PIPE_POLL_IN) + mask |= POLLIN | POLLRDNORM; + if (status & PIPE_POLL_OUT) + mask |= POLLOUT | POLLWRNORM; + if (status & PIPE_POLL_HUP) + mask |= POLLHUP; + if (test_bit(BIT_CLOSED_ON_HOST, &pipe->flags)) + mask |= POLLERR; + + return mask; +} + +static void signalled_pipes_add_locked(struct goldfish_pipe_dev *dev, + u32 id, u32 flags) +{ + struct goldfish_pipe *pipe; + + BUG_ON(id >= dev->pipes_capacity); + + pipe = dev->pipes[id]; + if (!pipe) + return; + pipe->signalled_flags |= flags; + + if (pipe->prev_signalled || pipe->next_signalled + || dev->first_signalled_pipe == pipe) + return; /* already in the list */ + pipe->next_signalled = dev->first_signalled_pipe; + if (dev->first_signalled_pipe) { + dev->first_signalled_pipe->prev_signalled = pipe; + } + dev->first_signalled_pipe = pipe; +} + +static void signalled_pipes_remove_locked(struct goldfish_pipe_dev *dev, + struct goldfish_pipe *pipe) { + if (pipe->prev_signalled) + pipe->prev_signalled->next_signalled = pipe->next_signalled; + if (pipe->next_signalled) + pipe->next_signalled->prev_signalled = pipe->prev_signalled; + if (pipe == dev->first_signalled_pipe) + dev->first_signalled_pipe = pipe->next_signalled; + pipe->prev_signalled = NULL; + pipe->next_signalled = NULL; +} + +static struct goldfish_pipe *signalled_pipes_pop_front(struct goldfish_pipe_dev *dev, + int *wakes) +{ + struct goldfish_pipe *pipe; + unsigned long flags; + spin_lock_irqsave(&dev->lock, flags); + + pipe = dev->first_signalled_pipe; + if (pipe) { + *wakes = pipe->signalled_flags; + pipe->signalled_flags = 0; + /* + * This is an optimized version of signalled_pipes_remove_locked() - + * we want to make it as fast as possible to wake the sleeping pipe + * operations faster + */ + dev->first_signalled_pipe = pipe->next_signalled; + if (dev->first_signalled_pipe) + dev->first_signalled_pipe->prev_signalled = NULL; + pipe->next_signalled = NULL; + } + + spin_unlock_irqrestore(&dev->lock, flags); + return pipe; +} + +static void goldfish_interrupt_task(unsigned long unused) +{ + struct goldfish_pipe_dev *dev = pipe_dev; + /* Iterate over the signalled pipes and wake them one by one */ + struct goldfish_pipe *pipe; + int wakes; + while ((pipe = signalled_pipes_pop_front(dev, &wakes)) != NULL) { + if (wakes & PIPE_WAKE_CLOSED) { + pipe->flags = 1 << BIT_CLOSED_ON_HOST; + } else { + if (wakes & PIPE_WAKE_READ) + clear_bit(BIT_WAKE_ON_READ, &pipe->flags); + if (wakes & PIPE_WAKE_WRITE) + clear_bit(BIT_WAKE_ON_WRITE, &pipe->flags); + } + /* + * wake_up_interruptible() implies a write barrier, so don't explicitly + * add another one here. + */ + wake_up_interruptible(&pipe->wake_queue); + } +} +DECLARE_TASKLET(goldfish_interrupt_tasklet, goldfish_interrupt_task, 0); + +/* + * The general idea of the interrupt handling: + * + * 1. device raises an interrupt if there's at least one signalled pipe + * 2. IRQ handler reads the signalled pipes and their count from the device + * 3. device writes them into a shared buffer and returns the count + * it only resets the IRQ if it has returned all signalled pipes, + * otherwise it leaves it raised, so IRQ handler will be called + * again for the next chunk + * 4. IRQ handler adds all returned pipes to the device's signalled pipes list + * 5. IRQ handler launches a tasklet to process the signalled pipes from the + * list in a separate context + */ +static irqreturn_t goldfish_pipe_interrupt(int irq, void *dev_id) +{ + u32 count; + u32 i; + unsigned long flags; + struct goldfish_pipe_dev *dev = dev_id; + if (dev != pipe_dev) + return IRQ_NONE; + + /* Request the signalled pipes from the device */ + spin_lock_irqsave(&dev->lock, flags); + + count = readl(dev->base + PIPE_REG_GET_SIGNALLED); + if (count == 0) { + spin_unlock_irqrestore(&dev->lock, flags); + return IRQ_NONE; + } + if (count > MAX_SIGNALLED_PIPES) + count = MAX_SIGNALLED_PIPES; + + for (i = 0; i < count; ++i) + signalled_pipes_add_locked(dev, + dev->buffers->signalled_pipe_buffers[i].id, + dev->buffers->signalled_pipe_buffers[i].flags); + + spin_unlock_irqrestore(&dev->lock, flags); + + tasklet_schedule(&goldfish_interrupt_tasklet); + return IRQ_HANDLED; +} + +static int get_free_pipe_id_locked(struct goldfish_pipe_dev *dev) +{ + int id; + for (id = 0; id < dev->pipes_capacity; ++id) + if (!dev->pipes[id]) + return id; + + { + /* Reallocate the array */ + u32 new_capacity = 2 * dev->pipes_capacity; + struct goldfish_pipe **pipes = + kcalloc(new_capacity, sizeof(*pipes), GFP_KERNEL); + if (!pipes) + return -ENOMEM; + memcpy(pipes, dev->pipes, sizeof(*pipes) * dev->pipes_capacity); + kfree(dev->pipes); + dev->pipes = pipes; + id = dev->pipes_capacity; + dev->pipes_capacity = new_capacity; + } + return id; +} + +/** + * goldfish_pipe_open - open a channel to the AVD + * @inode: inode of device + * @file: file struct of opener + * + * Create a new pipe link between the emulator and the use application. + * Each new request produces a new pipe. + * + * Note: we use the pipe ID as a mux. All goldfish emulations are 32bit + * right now so this is fine. A move to 64bit will need this addressing + */ +static int goldfish_pipe_open(struct inode *inode, struct file *file) +{ + struct goldfish_pipe_dev *dev = pipe_dev; + unsigned long flags; + int id; + int status; + + /* Allocate new pipe kernel object */ + struct goldfish_pipe *pipe = kzalloc(sizeof(*pipe), GFP_KERNEL); + if (pipe == NULL) + return -ENOMEM; + + pipe->dev = dev; + mutex_init(&pipe->lock); + init_waitqueue_head(&pipe->wake_queue); + + /* + * Command buffer needs to be allocated on its own page to make sure it is + * physically contiguous in host's address space. + */ + pipe->command_buffer = + (struct goldfish_pipe_command*)__get_free_page(GFP_KERNEL); + if (!pipe->command_buffer) { + status = -ENOMEM; + goto err_pipe; + } + + spin_lock_irqsave(&dev->lock, flags); + + id = get_free_pipe_id_locked(dev); + if (id < 0) { + status = id; + goto err_id_locked; + } + + dev->pipes[id] = pipe; + pipe->id = id; + pipe->command_buffer->id = id; + + /* Now tell the emulator we're opening a new pipe. */ + dev->buffers->open_command_params.rw_params_max_count = + MAX_BUFFERS_PER_COMMAND; + dev->buffers->open_command_params.command_buffer_ptr = + (u64)(unsigned long)__pa(pipe->command_buffer); + status = goldfish_cmd_locked(pipe, PIPE_CMD_OPEN); + spin_unlock_irqrestore(&dev->lock, flags); + if (status < 0) + goto err_cmd; + /* All is done, save the pipe into the file's private data field */ + file->private_data = pipe; + return 0; + +err_cmd: + spin_lock_irqsave(&dev->lock, flags); + dev->pipes[id] = NULL; +err_id_locked: + spin_unlock_irqrestore(&dev->lock, flags); + free_page((unsigned long)pipe->command_buffer); +err_pipe: + kfree(pipe); + return status; +} + +static int goldfish_pipe_release(struct inode *inode, struct file *filp) +{ + unsigned long flags; + struct goldfish_pipe *pipe = filp->private_data; + struct goldfish_pipe_dev *dev = pipe->dev; + + /* The guest is closing the channel, so tell the emulator right now */ + (void)goldfish_cmd(pipe, PIPE_CMD_CLOSE); + + spin_lock_irqsave(&dev->lock, flags); + dev->pipes[pipe->id] = NULL; + signalled_pipes_remove_locked(dev, pipe); + spin_unlock_irqrestore(&dev->lock, flags); + + filp->private_data = NULL; + free_page((unsigned long)pipe->command_buffer); + kfree(pipe); + return 0; +} + +static const struct file_operations goldfish_pipe_fops = { + .owner = THIS_MODULE, + .read = goldfish_pipe_read, + .write = goldfish_pipe_write, + .poll = goldfish_pipe_poll, + .open = goldfish_pipe_open, + .release = goldfish_pipe_release, +}; + +static struct miscdevice goldfish_pipe_dev = { + .minor = MISC_DYNAMIC_MINOR, + .name = "goldfish_pipe", + .fops = &goldfish_pipe_fops, +}; + +static int goldfish_pipe_device_init_v2(struct platform_device *pdev) +{ + char *page; + struct goldfish_pipe_dev *dev = pipe_dev; + int err = devm_request_irq(&pdev->dev, dev->irq, goldfish_pipe_interrupt, + IRQF_SHARED, "goldfish_pipe", dev); + if (err) { + dev_err(&pdev->dev, "unable to allocate IRQ for v2\n"); + return err; + } + + err = misc_register(&goldfish_pipe_dev); + if (err) { + dev_err(&pdev->dev, "unable to register v2 device\n"); + return err; + } + + dev->first_signalled_pipe = NULL; + dev->pipes_capacity = INITIAL_PIPES_CAPACITY; + dev->pipes = kcalloc(dev->pipes_capacity, sizeof(*dev->pipes), GFP_KERNEL); + if (!dev->pipes) + return -ENOMEM; + + /* + * We're going to pass two buffers, open_command_params and + * signalled_pipe_buffers, to the host. This means each of those buffers + * needs to be contained in a single physical page. The easiest choice is + * to just allocate a page and place the buffers in it. + */ + BUG_ON(sizeof(*dev->buffers) > PAGE_SIZE); + page = (char*)__get_free_page(GFP_KERNEL); + if (!page) { + kfree(dev->pipes); + return -ENOMEM; + } + dev->buffers = (struct goldfish_pipe_dev_buffers*)page; + + /* Send the buffer addresses to the host */ + { + u64 paddr = __pa(&dev->buffers->signalled_pipe_buffers); + writel((u32)(unsigned long)(paddr >> 32), dev->base + PIPE_REG_SIGNAL_BUFFER_HIGH); + writel((u32)(unsigned long)paddr, dev->base + PIPE_REG_SIGNAL_BUFFER); + writel((u32)MAX_SIGNALLED_PIPES, dev->base + PIPE_REG_SIGNAL_BUFFER_COUNT); + + paddr = __pa(&dev->buffers->open_command_params); + writel((u32)(unsigned long)(paddr >> 32), dev->base + PIPE_REG_OPEN_BUFFER_HIGH); + writel((u32)(unsigned long)paddr, dev->base + PIPE_REG_OPEN_BUFFER); + } + return 0; +} + +static void goldfish_pipe_device_deinit_v2(struct platform_device *pdev) { + struct goldfish_pipe_dev *dev = pipe_dev; + misc_deregister(&goldfish_pipe_dev); + kfree(dev->pipes); + free_page((unsigned long)dev->buffers); +} + +static int goldfish_pipe_probe(struct platform_device *pdev) +{ + int err; + struct resource *r; + struct goldfish_pipe_dev *dev = pipe_dev; + + BUG_ON(sizeof(struct goldfish_pipe_command) > PAGE_SIZE); + + /* not thread safe, but this should not happen */ + WARN_ON(dev->base != NULL); + + spin_lock_init(&dev->lock); + + r = platform_get_resource(pdev, IORESOURCE_MEM, 0); + if (r == NULL || resource_size(r) < PAGE_SIZE) { + dev_err(&pdev->dev, "can't allocate i/o page\n"); + return -EINVAL; + } + dev->base = devm_ioremap(&pdev->dev, r->start, PAGE_SIZE); + if (dev->base == NULL) { + dev_err(&pdev->dev, "ioremap failed\n"); + return -EINVAL; + } + + r = platform_get_resource(pdev, IORESOURCE_IRQ, 0); + if (r == NULL) { + err = -EINVAL; + goto error; + } + dev->irq = r->start; + + /* + * Exchange the versions with the host device + * + * Note: v1 driver used to not report its version, so we write it before + * reading device version back: this allows the host implementation to + * detect the old driver (if there was no version write before read). + */ + writel((u32)PIPE_DRIVER_VERSION, dev->base + PIPE_REG_VERSION); + dev->version = readl(dev->base + PIPE_REG_VERSION); + if (dev->version < PIPE_CURRENT_DEVICE_VERSION) { + /* initialize the old device version */ + err = goldfish_pipe_device_init_v1(pdev); + } else { + /* Host device supports the new interface */ + err = goldfish_pipe_device_init_v2(pdev); + } + if (!err) + return 0; + +error: + dev->base = NULL; + return err; +} + +static int goldfish_pipe_remove(struct platform_device *pdev) +{ + struct goldfish_pipe_dev *dev = pipe_dev; + if (dev->version < PIPE_CURRENT_DEVICE_VERSION) + goldfish_pipe_device_deinit_v1(pdev); + else + goldfish_pipe_device_deinit_v2(pdev); + dev->base = NULL; + return 0; +} + +static const struct acpi_device_id goldfish_pipe_acpi_match[] = { + { "GFSH0003", 0 }, + { }, +}; +MODULE_DEVICE_TABLE(acpi, goldfish_pipe_acpi_match); + +static const struct of_device_id goldfish_pipe_of_match[] = { + { .compatible = "google,android-pipe", }, + {}, +}; +MODULE_DEVICE_TABLE(of, goldfish_pipe_of_match); + +static struct platform_driver goldfish_pipe_driver = { + .probe = goldfish_pipe_probe, + .remove = goldfish_pipe_remove, + .driver = { + .name = "goldfish_pipe", + .of_match_table = goldfish_pipe_of_match, + .acpi_match_table = ACPI_PTR(goldfish_pipe_acpi_match), + } +}; + +module_platform_driver(goldfish_pipe_driver); +MODULE_AUTHOR("David Turner "); +MODULE_LICENSE("GPL"); From 31b8b64ce8131d9185207fc81082e8709699b5fa Mon Sep 17 00:00:00 2001 From: Greg Hackmann Date: Thu, 23 Jul 2015 10:40:57 -0700 Subject: [PATCH 28/61] ANDROID: arch: x86: disable pic for Android toolchain Android toolchains enable PIC, so explicitly disable it with -fno-pic (this is the upstream gcc default) Signed-off-by: Greg Hackmann (cherry picked from commit 892606ece2bebfa5a1ed62e9552cc973707ae9d3) Change-Id: I1e600363e5d18e459479fe4eb23d76855e16868d --- arch/x86/Makefile | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/x86/Makefile b/arch/x86/Makefile index 4086abca0b32..53949c886341 100644 --- a/arch/x86/Makefile +++ b/arch/x86/Makefile @@ -97,6 +97,8 @@ else KBUILD_CFLAGS += $(call cc-option,-mno-80387) KBUILD_CFLAGS += $(call cc-option,-mno-fp-ret-in-387) + KBUILD_CFLAGS += -fno-pic + # Use -mpreferred-stack-boundary=3 if supported. KBUILD_CFLAGS += $(call cc-option,-mpreferred-stack-boundary=3) From ae41a88ba7603279e01ea0cfde0867a31cf1c182 Mon Sep 17 00:00:00 2001 From: Jin Qian Date: Thu, 17 Nov 2016 17:01:43 -0800 Subject: [PATCH 29/61] arm64: rename ranchu defconfig to ranchu64 Change-Id: Ib7cd1ef722167905957623f65c3cc064e9d5c357 --- arch/arm64/configs/{ranchu_defconfig => ranchu64_defconfig} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename arch/arm64/configs/{ranchu_defconfig => ranchu64_defconfig} (100%) diff --git a/arch/arm64/configs/ranchu_defconfig b/arch/arm64/configs/ranchu64_defconfig similarity index 100% rename from arch/arm64/configs/ranchu_defconfig rename to arch/arm64/configs/ranchu64_defconfig From c3bb7a496b9a31d1db1414abd799ed64d14fc227 Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Fri, 18 Nov 2016 07:26:19 +0100 Subject: [PATCH 30/61] ANDROID: goldfish_pipe: fix call_kern.cocci warnings Function get_free_pipe_id_locked called on line 671 inside lock on line 669 but uses GFP_KERNEL. Replace with GFP_ATOMIC. Generated by: scripts/coccinelle/locks/call_kern.cocci CC: Yurii Zubrytskyi Signed-off-by: Julia Lawall Signed-off-by: Fengguang Wu Signed-off-by: Guenter Roeck --- drivers/platform/goldfish/goldfish_pipe_v2.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/platform/goldfish/goldfish_pipe_v2.c b/drivers/platform/goldfish/goldfish_pipe_v2.c index bdb039bc7dde..ad373ed36555 100644 --- a/drivers/platform/goldfish/goldfish_pipe_v2.c +++ b/drivers/platform/goldfish/goldfish_pipe_v2.c @@ -616,7 +616,8 @@ static int get_free_pipe_id_locked(struct goldfish_pipe_dev *dev) /* Reallocate the array */ u32 new_capacity = 2 * dev->pipes_capacity; struct goldfish_pipe **pipes = - kcalloc(new_capacity, sizeof(*pipes), GFP_KERNEL); + kcalloc(new_capacity, sizeof(*pipes), + GFP_ATOMIC); if (!pipes) return -ENOMEM; memcpy(pipes, dev->pipes, sizeof(*pipes) * dev->pipes_capacity); From 02bb8ad0e42c8e01ec0a61aaa3a4ede68a1447e6 Mon Sep 17 00:00:00 2001 From: kbuild test robot Date: Fri, 18 Nov 2016 13:16:07 +0800 Subject: [PATCH 31/61] ANDROID: video: goldfishfb: fix platform_no_drv_owner.cocci warnings drivers/video/fbdev/goldfishfb.c:318:3-8: No need to set .owner here. The core will do it. Remove .owner field if calls are used which set it automatically Generated by: scripts/coccinelle/api/platform_no_drv_owner.cocci CC: Greg Hackmann Signed-off-by: Fengguang Wu Signed-off-by: Guenter Roeck --- drivers/video/fbdev/goldfishfb.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/video/fbdev/goldfishfb.c b/drivers/video/fbdev/goldfishfb.c index 131fee0341c3..1e56b50e4082 100644 --- a/drivers/video/fbdev/goldfishfb.c +++ b/drivers/video/fbdev/goldfishfb.c @@ -322,7 +322,6 @@ static struct platform_driver goldfish_fb_driver = { .remove = goldfish_fb_remove, .driver = { .name = "goldfish_fb", - .owner = THIS_MODULE, .of_match_table = goldfish_fb_of_match, .acpi_match_table = ACPI_PTR(goldfish_fb_acpi_match), } From 5bfd5dee568e3af818114a7ef83f8a38a42a6064 Mon Sep 17 00:00:00 2001 From: Greg Hackmann Date: Fri, 18 Nov 2016 11:09:02 -0800 Subject: [PATCH 32/61] ANDROID: goldfish: goldfish_pipe: fix locking errors If the get_user_pages_fast() call in goldfish_pipe_read_write() failed, it would return while still holding pipe->lock. goldfish_pipe_read_write() later releases and tries to re-acquire pipe->lock. If the re-acquire call failed, goldfish_pipe_read_write() would try unlock pipe->lock on exit anyway. This fixes the smatch messages: drivers/platform/goldfish/goldfish_pipe.c:392 goldfish_pipe_read_write() error: double unlock 'mutex:&pipe->lock' drivers/platform/goldfish/goldfish_pipe.c:397 goldfish_pipe_read_write() warn: inconsistent returns 'mutex:&pipe->lock'. Change-Id: Ifd06a76b32027ca451a001704ade0c5440ed69c4 Signed-off-by: Greg Hackmann --- drivers/platform/goldfish/goldfish_pipe.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/platform/goldfish/goldfish_pipe.c b/drivers/platform/goldfish/goldfish_pipe.c index cf7ce97e7346..fd1452e28352 100644 --- a/drivers/platform/goldfish/goldfish_pipe.c +++ b/drivers/platform/goldfish/goldfish_pipe.c @@ -273,11 +273,13 @@ static ssize_t goldfish_pipe_read_write(struct file *filp, char __user *buffer, if (ret == 0) { DPRINT("%s: error: (requested pages == 0) (wanted %d)\n", __FUNCTION__, requested_pages); + mutex_unlock(&pipe->lock); return ret; } if (ret < 0) { DPRINT("%s: (requested pages < 0) %d \n", __FUNCTION__, requested_pages); + mutex_unlock(&pipe->lock); return ret; } @@ -384,10 +386,8 @@ static ssize_t goldfish_pipe_read_write(struct file *filp, char __user *buffer, } /* Try to re-acquire the lock */ - if (mutex_lock_interruptible(&pipe->lock)) { - ret = -ERESTARTSYS; - break; - } + if (mutex_lock_interruptible(&pipe->lock)) + return -ERESTARTSYS; } mutex_unlock(&pipe->lock); From 3b34722537ab8c352667ec1425a00eb54735bfa3 Mon Sep 17 00:00:00 2001 From: Jin Qian Date: Fri, 18 Nov 2016 11:40:40 -0800 Subject: [PATCH 33/61] ANDROID: goldfish_pipe: fix allmodconfig build tree: https://android.googlesource.com/kernel/common android-4.4 head: 6297c6ba0d217d5b0998738fbfaff2f04cad77e6 commit: bc43565e1ac5ba3f204886a2275726bb4c3d44e6 [18/20] ANDROID: goldfish_pipe: An implementation of more parallel pipe config: i386-randconfig-s1-201646 (attached as .config) compiler: gcc-6 (Debian 6.2.0-3) 6.2.0 20160901 reproduce: git checkout bc43565e1ac5ba3f204886a2275726bb4c3d44e6 # save the attached .config to linux build tree make ARCH=i386 All errors (new ones prefixed by >>): >> ERROR: "goldfish_pipe_device_deinit_v1" [drivers/platform/goldfish/goldfish_pipe_v2.ko] undefined! >> ERROR: "goldfish_pipe_device_init_v1" [drivers/platform/goldfish/goldfish_pipe_v2.ko] undefined! >> ERROR: "pipe_dev" [drivers/platform/goldfish/goldfish_pipe.ko] undefined! Change-Id: Ibd51441edf82e6bb6824acc05ea795570cc374e8 --- drivers/platform/goldfish/Makefile | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/platform/goldfish/Makefile b/drivers/platform/goldfish/Makefile index e53ae2fc717b..277a820ee4e1 100644 --- a/drivers/platform/goldfish/Makefile +++ b/drivers/platform/goldfish/Makefile @@ -2,4 +2,5 @@ # Makefile for Goldfish platform specific drivers # obj-$(CONFIG_GOLDFISH_BUS) += pdev_bus.o -obj-$(CONFIG_GOLDFISH_PIPE) += goldfish_pipe.o goldfish_pipe_v2.o +obj-$(CONFIG_GOLDFISH_PIPE) += goldfish_pipe_all.o +goldfish_pipe_all-objs := goldfish_pipe.o goldfish_pipe_v2.o From bcddfb47bf5d54c3b312e165374bed0317d5a767 Mon Sep 17 00:00:00 2001 From: Joel Fernandes Date: Mon, 28 Nov 2016 14:35:22 -0800 Subject: [PATCH 34/61] UPSTREAM: timekeeping: Add a fast and NMI safe boot clock This boot clock can be used as a tracing clock and will account for suspend time. To keep it NMI safe since we're accessing from tracing, we're not using a separate timekeeper with updates to monotonic clock and boot offset protected with seqlocks. This has the following minor side effects: (1) Its possible that a timestamp be taken after the boot offset is updated but before the timekeeper is updated. If this happens, the new boot offset is added to the old timekeeping making the clock appear to update slightly earlier: CPU 0 CPU 1 timekeeping_inject_sleeptime64() __timekeeping_inject_sleeptime(tk, delta); timestamp(); timekeeping_update(tk, TK_CLEAR_NTP...); (2) On 32-bit systems, the 64-bit boot offset (tk->offs_boot) may be partially updated. Since the tk->offs_boot update is a rare event, this should be a rare occurrence which postprocessing should be able to handle. Bug: b/33184060 Cc: Steven Rostedt Cc: Thomas Gleixner Cc: Ingo Molnar Cc: Richard Cochran Cc: Prarit Bhargava Reviewed-by: Thomas Gleixner Signed-off-by: Joel Fernandes Signed-off-by: John Stultz --- include/linux/timekeeping.h | 1 + kernel/time/timekeeping.c | 29 +++++++++++++++++++++++++++++ 2 files changed, 30 insertions(+) diff --git a/include/linux/timekeeping.h b/include/linux/timekeeping.h index ec89d846324c..b7246d2ed7c9 100644 --- a/include/linux/timekeeping.h +++ b/include/linux/timekeeping.h @@ -233,6 +233,7 @@ static inline u64 ktime_get_raw_ns(void) extern u64 ktime_get_mono_fast_ns(void); extern u64 ktime_get_raw_fast_ns(void); +extern u64 ktime_get_boot_fast_ns(void); /* * Timespec interfaces utilizing the ktime based ones diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index 445601c580d6..ede4bf13d3e9 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -424,6 +424,35 @@ u64 ktime_get_raw_fast_ns(void) } EXPORT_SYMBOL_GPL(ktime_get_raw_fast_ns); +/** + * ktime_get_boot_fast_ns - NMI safe and fast access to boot clock. + * + * To keep it NMI safe since we're accessing from tracing, we're not using a + * separate timekeeper with updates to monotonic clock and boot offset + * protected with seqlocks. This has the following minor side effects: + * + * (1) Its possible that a timestamp be taken after the boot offset is updated + * but before the timekeeper is updated. If this happens, the new boot offset + * is added to the old timekeeping making the clock appear to update slightly + * earlier: + * CPU 0 CPU 1 + * timekeeping_inject_sleeptime64() + * __timekeeping_inject_sleeptime(tk, delta); + * timestamp(); + * timekeeping_update(tk, TK_CLEAR_NTP...); + * + * (2) On 32-bit systems, the 64-bit boot offset (tk->offs_boot) may be + * partially updated. Since the tk->offs_boot update is a rare event, this + * should be a rare occurrence which postprocessing should be able to handle. + */ +u64 notrace ktime_get_boot_fast_ns(void) +{ + struct timekeeper *tk = &tk_core.timekeeper; + + return (ktime_get_mono_fast_ns() + ktime_to_ns(tk->offs_boot)); +} +EXPORT_SYMBOL_GPL(ktime_get_boot_fast_ns); + /* Suspend-time cycles value for halted fast timekeeper. */ static cycle_t cycles_at_suspend; From 789790d859146cc223b101efb14d51929051ff89 Mon Sep 17 00:00:00 2001 From: Joel Fernandes Date: Mon, 28 Nov 2016 14:35:23 -0800 Subject: [PATCH 35/61] UPSTREAM: trace: Add an option for boot clock as trace clock Unlike monotonic clock, boot clock as a trace clock will account for time spent in suspend useful for tracing suspend/resume. This uses earlier introduced infrastructure for using the fast boot clock. Bug: b/33184060 Cc: Steven Rostedt Cc: Thomas Gleixner Cc: Ingo Molnar Cc: Richard Cochran Cc: Prarit Bhargava Reviewed-by: Thomas Gleixner Signed-off-by: Joel Fernandes Signed-off-by: John Stultz Acked-by: Steven Rostedt --- kernel/trace/trace.c | 1 + 1 file changed, 1 insertion(+) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index bff7d4c69274..293af3346c8c 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -890,6 +890,7 @@ static struct { { trace_clock, "perf", 1 }, { ktime_get_mono_fast_ns, "mono", 1 }, { ktime_get_raw_fast_ns, "mono_raw", 1 }, + { ktime_get_boot_fast_ns, "boot", 1 }, ARCH_TRACE_CLOCKS }; From 0e27629f94c8188e5d5d1b5d4cf52c58236d1e78 Mon Sep 17 00:00:00 2001 From: Joel Fernandes Date: Mon, 28 Nov 2016 14:35:24 -0800 Subject: [PATCH 36/61] UPSTREAM: trace: Update documentation for mono, mono_raw and boot clock Documentation was missing for mono and mono_raw, add them and also for the boot clock introduced in this series. Bug: b/33184060 Cc: Steven Rostedt Cc: Thomas Gleixner Cc: Ingo Molnar Cc: Richard Cochran Cc: Prarit Bhargava Reviewed-by: Thomas Gleixner Signed-off-by: Joel Fernandes Signed-off-by: John Stultz Acked-by: Steven Rostedt --- Documentation/trace/ftrace.txt | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/Documentation/trace/ftrace.txt b/Documentation/trace/ftrace.txt index cf2f9e9dfe4d..fa16fb2302a5 100644 --- a/Documentation/trace/ftrace.txt +++ b/Documentation/trace/ftrace.txt @@ -357,6 +357,26 @@ of ftrace. Here is a list of some of the key files: to correlate events across hypervisor/guest if tb_offset is known. + mono: This uses the fast monotonic clock (CLOCK_MONOTONIC) + which is monotonic and is subject to NTP rate adjustments. + + mono_raw: + This is the raw monotonic clock (CLOCK_MONOTONIC_RAW) + which is montonic but is not subject to any rate adjustments + and ticks at the same rate as the hardware clocksource. + + boot: This is the boot clock (CLOCK_BOOTTIME) and is based on the + fast monotonic clock, but also accounts for time spent in + suspend. Since the clock access is designed for use in + tracing in the suspend path, some side effects are possible + if clock is accessed after the suspend time is accounted before + the fast mono clock is updated. In this case, the clock update + appears to happen slightly sooner than it normally would have. + Also on 32-bit systems, its possible that the 64-bit boot offset + sees a partial update. These effects are rare and post + processing should be able to handle them. See comments on + ktime_get_boot_fast_ns function for more information. + To set a clock, simply echo the clock name into this file. echo global > trace_clock From a8575ee86e3ec89dde405193d61a6c0a958308cf Mon Sep 17 00:00:00 2001 From: Ke Wang Date: Fri, 25 Nov 2016 13:38:45 +0800 Subject: [PATCH 37/61] sched: tune: Fix lacking spinlock initialization The spinlock used by boost_groups in sched tune must be initialized. This commit fixes this lack and the following errors: [ 0.384739] c2 BUG: spinlock bad magic on CPU#2, swapper/2/0 [ 0.390313] c2 lock: 0xffffffc15fe1fc80, .magic:00000000, .owner: /-1, .owner_cpu: 0 [ 0.398739] c2 CPU: 2 PID: 0 Comm: swapper/2 Not tainted 4.4.6+ #4 [ 0.404816] c2 Hardware name: Spreadtrum SP9860gBoard (DT) [ 0.410462] c2 Call trace: [ 0.413159] c2 [] dump_backtrace+0x0/0x210 [ 0.418803] c2 [] show_stack+0x20/0x28 [ 0.424100] c2 [] dump_stack+0xa8/0xe0 [ 0.429398] c2 [] spin_dump+0x78/0x9c [ 0.434608] c2 [] spin_bug+0x30/0x3c [ 0.439644] c2 [] do_raw_spin_lock+0xac/0x1b4 [ 0.445639] c2 [] _raw_spin_lock_irqsave+0x58/0x68 [ 0.451977] c2 [] schedtune_enqueue_task+0x84/0x3bc [ 0.458320] c2 [] enqueue_task_fair+0x438/0x208c [ 0.464487] c2 [] activate_task+0x70/0xd0 [ 0.470130] c2 [] ttwu_do_activate.constprop.131+0x4c/0x98 [ 0.477079] c2 [] try_to_wake_up+0x254/0x54c [ 0.482899] c2 [] default_wake_function+0x30/0x3c [ 0.489154] c2 [] autoremove_wake_function+0x3c/0x6c [ 0.495754] c2 [] __wake_up_common+0x64/0xa4 [ 0.501574] c2 [] __wake_up+0x48/0x60 [ 0.506788] c2 [] rcu_gp_kthread_wake+0x50/0x5c [ 0.512866] c2 [] note_gp_changes+0xac/0xd4 [ 0.518597] c2 [] rcu_process_callbacks+0xe8/0x93c [ 0.524940] c2 [] __do_softirq+0x24c/0x5b8 [ 0.530584] c2 [] irq_exit+0xc0/0xec [ 0.535623] c2 [] __handle_domain_irq+0x94/0xf8 [ 0.541789] c2 [] gic_handle_irq+0x64/0xc0 Signed-off-by: Ke Wang --- kernel/sched/tune.c | 1 + 1 file changed, 1 insertion(+) diff --git a/kernel/sched/tune.c b/kernel/sched/tune.c index 68a24a044b0a..079b18802f17 100644 --- a/kernel/sched/tune.c +++ b/kernel/sched/tune.c @@ -725,6 +725,7 @@ schedtune_init_cgroups(void) for_each_possible_cpu(cpu) { bg = &per_cpu(cpu_boost_groups, cpu); memset(bg, 0, sizeof(struct boost_groups)); + raw_spin_lock_init(&bg->lock); } pr_info("schedtune: configured to support %d boost groups\n", From d88a1bd00cfa676163853ed6017922abd0cb3f39 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Mon, 12 Sep 2016 10:49:11 +0800 Subject: [PATCH 38/61] iommu/vt-d: Fix PASID table allocation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 910170442944e1f8674fd5ddbeeb8ccd1877ea98 upstream. Somehow I ended up with an off-by-three error in calculating the size of the PASID and PASID State tables, which triggers allocations failures as those tables unfortunately have to be physically contiguous. In fact, even the *correct* maximum size of 8MiB is problematic and is wont to lead to allocation failures. Since I have extracted a promise that this *will* be fixed in hardware, I'm happy to limit it on the current hardware to a maximum of 0x20000 PASIDs, which gives us 1MiB tables — still not ideal, but better than before. Reported by Mika Kuoppala and also by Xunlei Pang who submitted a simpler patch to fix only the allocation (and not the free) to the "correct" limit... which was still problematic. Signed-off-by: David Woodhouse Signed-off-by: Greg Kroah-Hartman --- drivers/iommu/intel-svm.c | 26 ++++++++++++++++---------- include/linux/intel-iommu.h | 1 + 2 files changed, 17 insertions(+), 10 deletions(-) diff --git a/drivers/iommu/intel-svm.c b/drivers/iommu/intel-svm.c index d9939fa9b588..f929879ecae6 100644 --- a/drivers/iommu/intel-svm.c +++ b/drivers/iommu/intel-svm.c @@ -39,10 +39,18 @@ int intel_svm_alloc_pasid_tables(struct intel_iommu *iommu) struct page *pages; int order; - order = ecap_pss(iommu->ecap) + 7 - PAGE_SHIFT; - if (order < 0) - order = 0; + /* Start at 2 because it's defined as 2^(1+PSS) */ + iommu->pasid_max = 2 << ecap_pss(iommu->ecap); + /* Eventually I'm promised we will get a multi-level PASID table + * and it won't have to be physically contiguous. Until then, + * limit the size because 8MiB contiguous allocations can be hard + * to come by. The limit of 0x20000, which is 1MiB for each of + * the PASID and PASID-state tables, is somewhat arbitrary. */ + if (iommu->pasid_max > 0x20000) + iommu->pasid_max = 0x20000; + + order = get_order(sizeof(struct pasid_entry) * iommu->pasid_max); pages = alloc_pages(GFP_KERNEL | __GFP_ZERO, order); if (!pages) { pr_warn("IOMMU: %s: Failed to allocate PASID table\n", @@ -53,6 +61,8 @@ int intel_svm_alloc_pasid_tables(struct intel_iommu *iommu) pr_info("%s: Allocated order %d PASID table.\n", iommu->name, order); if (ecap_dis(iommu->ecap)) { + /* Just making it explicit... */ + BUILD_BUG_ON(sizeof(struct pasid_entry) != sizeof(struct pasid_state_entry)); pages = alloc_pages(GFP_KERNEL | __GFP_ZERO, order); if (pages) iommu->pasid_state_table = page_address(pages); @@ -68,11 +78,7 @@ int intel_svm_alloc_pasid_tables(struct intel_iommu *iommu) int intel_svm_free_pasid_tables(struct intel_iommu *iommu) { - int order; - - order = ecap_pss(iommu->ecap) + 7 - PAGE_SHIFT; - if (order < 0) - order = 0; + int order = get_order(sizeof(struct pasid_entry) * iommu->pasid_max); if (iommu->pasid_table) { free_pages((unsigned long)iommu->pasid_table, order); @@ -371,8 +377,8 @@ int intel_svm_bind_mm(struct device *dev, int *pasid, int flags, struct svm_dev_ } svm->iommu = iommu; - if (pasid_max > 2 << ecap_pss(iommu->ecap)) - pasid_max = 2 << ecap_pss(iommu->ecap); + if (pasid_max > iommu->pasid_max) + pasid_max = iommu->pasid_max; /* Do not use PASID 0 in caching mode (virtualised IOMMU) */ ret = idr_alloc(&iommu->pasid_idr, svm, diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h index 2d9b650047a5..d49e26c6cdc7 100644 --- a/include/linux/intel-iommu.h +++ b/include/linux/intel-iommu.h @@ -429,6 +429,7 @@ struct intel_iommu { struct page_req_dsc *prq; unsigned char prq_name[16]; /* Name for PRQ interrupt */ struct idr pasid_idr; + u32 pasid_max; #endif struct q_inval *qi; /* Queued invalidation info */ u32 *iommu_state; /* Store iommu states between suspend and resume.*/ From c091bbddbc5e237c3927bf8aead8ad6484b51183 Mon Sep 17 00:00:00 2001 From: Ashok Raj Date: Fri, 21 Oct 2016 15:32:05 -0700 Subject: [PATCH 39/61] iommu/vt-d: Fix IOMMU lookup for SR-IOV Virtual Functions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 1c387188c60f53b338c20eee32db055dfe022a9b upstream. The VT-d specification (§8.3.3) says: ‘Virtual Functions’ of a ‘Physical Function’ are under the scope of the same remapping unit as the ‘Physical Function’. The BIOS is not required to list all the possible VFs in the scope tables, and arguably *shouldn't* make any attempt to do so, since there could be a huge number of them. This has been broken basically for ever — the VF is never going to match against a specific unit's scope, so it ends up being assigned to the INCLUDE_ALL IOMMU. Which was always actually correct by coincidence, but now we're looking at Root-Complex integrated devices with SR-IOV support it's going to start being wrong. Fix it to simply use pci_physfn() before doing the lookup for PCI devices. Signed-off-by: Sainath Grandhi Signed-off-by: Ashok Raj Signed-off-by: David Woodhouse Signed-off-by: Greg Kroah-Hartman --- drivers/iommu/dmar.c | 4 +++- drivers/iommu/intel-iommu.c | 13 +++++++++++++ 2 files changed, 16 insertions(+), 1 deletion(-) diff --git a/drivers/iommu/dmar.c b/drivers/iommu/dmar.c index 565bb2c140ed..e913a930ac80 100644 --- a/drivers/iommu/dmar.c +++ b/drivers/iommu/dmar.c @@ -326,7 +326,9 @@ static int dmar_pci_bus_notifier(struct notifier_block *nb, struct pci_dev *pdev = to_pci_dev(data); struct dmar_pci_notify_info *info; - /* Only care about add/remove events for physical functions */ + /* Only care about add/remove events for physical functions. + * For VFs we actually do the lookup based on the corresponding + * PF in device_to_iommu() anyway. */ if (pdev->is_virtfn) return NOTIFY_DONE; if (action != BUS_NOTIFY_ADD_DEVICE && diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index 5baa830ce49f..59e9abd3345e 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -885,7 +885,13 @@ static struct intel_iommu *device_to_iommu(struct device *dev, u8 *bus, u8 *devf return NULL; if (dev_is_pci(dev)) { + struct pci_dev *pf_pdev; + pdev = to_pci_dev(dev); + /* VFs aren't listed in scope tables; we need to look up + * the PF instead to find the IOMMU. */ + pf_pdev = pci_physfn(pdev); + dev = &pf_pdev->dev; segment = pci_domain_nr(pdev->bus); } else if (has_acpi_companion(dev)) dev = &ACPI_COMPANION(dev)->dev; @@ -898,6 +904,13 @@ static struct intel_iommu *device_to_iommu(struct device *dev, u8 *bus, u8 *devf for_each_active_dev_scope(drhd->devices, drhd->devices_cnt, i, tmp) { if (tmp == dev) { + /* For a VF use its original BDF# not that of the PF + * which we used for the IOMMU lookup. Strictly speaking + * we could do this for all PCI devices; we only need to + * get the BDF# from the scope table for ACPI matches. */ + if (pdev->is_virtfn) + goto got_pdev; + *bus = drhd->devices[i].bus; *devfn = drhd->devices[i].devfn; goto out; From b7f9404d1b488b6773c7a2e6da92aa6cb5bd125e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Radim=20Kr=C4=8Dm=C3=A1=C5=99?= Date: Wed, 23 Nov 2016 21:15:00 +0100 Subject: [PATCH 40/61] KVM: x86: drop error recovery in em_jmp_far and em_ret_far MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 2117d5398c81554fbf803f5fd1dc55eb78216c0c upstream. em_jmp_far and em_ret_far assumed that setting IP can only fail in 64 bit mode, but syzkaller proved otherwise (and SDM agrees). Code segment was restored upon failure, but it was left uninitialized outside of long mode, which could lead to a leak of host kernel stack. We could have fixed that by always saving and restoring the CS, but we take a simpler approach and just break any guest that manages to fail as the error recovery is error-prone and modern CPUs don't need emulator for this. Found by syzkaller: WARNING: CPU: 2 PID: 3668 at arch/x86/kvm/emulate.c:2217 em_ret_far+0x428/0x480 Kernel panic - not syncing: panic_on_warn set ... CPU: 2 PID: 3668 Comm: syz-executor Not tainted 4.9.0-rc4+ #49 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011 [...] Call Trace: [...] __dump_stack lib/dump_stack.c:15 [...] dump_stack+0xb3/0x118 lib/dump_stack.c:51 [...] panic+0x1b7/0x3a3 kernel/panic.c:179 [...] __warn+0x1c4/0x1e0 kernel/panic.c:542 [...] warn_slowpath_null+0x2c/0x40 kernel/panic.c:585 [...] em_ret_far+0x428/0x480 arch/x86/kvm/emulate.c:2217 [...] em_ret_far_imm+0x17/0x70 arch/x86/kvm/emulate.c:2227 [...] x86_emulate_insn+0x87a/0x3730 arch/x86/kvm/emulate.c:5294 [...] x86_emulate_instruction+0x520/0x1ba0 arch/x86/kvm/x86.c:5545 [...] emulate_instruction arch/x86/include/asm/kvm_host.h:1116 [...] complete_emulated_io arch/x86/kvm/x86.c:6870 [...] complete_emulated_mmio+0x4e9/0x710 arch/x86/kvm/x86.c:6934 [...] kvm_arch_vcpu_ioctl_run+0x3b7a/0x5a90 arch/x86/kvm/x86.c:6978 [...] kvm_vcpu_ioctl+0x61e/0xdd0 arch/x86/kvm/../../../virt/kvm/kvm_main.c:2557 [...] vfs_ioctl fs/ioctl.c:43 [...] do_vfs_ioctl+0x18c/0x1040 fs/ioctl.c:679 [...] SYSC_ioctl fs/ioctl.c:694 [...] SyS_ioctl+0x8f/0xc0 fs/ioctl.c:685 [...] entry_SYSCALL_64_fastpath+0x1f/0xc2 Reported-by: Dmitry Vyukov Fixes: d1442d85cc30 ("KVM: x86: Handle errors when RIP is set during far jumps") Signed-off-by: Radim Krčmář Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/emulate.c | 36 +++++++++++------------------------- 1 file changed, 11 insertions(+), 25 deletions(-) diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 5fa652c16a50..f49e98062ea5 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -2093,16 +2093,10 @@ static int em_iret(struct x86_emulate_ctxt *ctxt) static int em_jmp_far(struct x86_emulate_ctxt *ctxt) { int rc; - unsigned short sel, old_sel; - struct desc_struct old_desc, new_desc; - const struct x86_emulate_ops *ops = ctxt->ops; + unsigned short sel; + struct desc_struct new_desc; u8 cpl = ctxt->ops->cpl(ctxt); - /* Assignment of RIP may only fail in 64-bit mode */ - if (ctxt->mode == X86EMUL_MODE_PROT64) - ops->get_segment(ctxt, &old_sel, &old_desc, NULL, - VCPU_SREG_CS); - memcpy(&sel, ctxt->src.valptr + ctxt->op_bytes, 2); rc = __load_segment_descriptor(ctxt, sel, VCPU_SREG_CS, cpl, @@ -2112,12 +2106,10 @@ static int em_jmp_far(struct x86_emulate_ctxt *ctxt) return rc; rc = assign_eip_far(ctxt, ctxt->src.val, &new_desc); - if (rc != X86EMUL_CONTINUE) { - WARN_ON(ctxt->mode != X86EMUL_MODE_PROT64); - /* assigning eip failed; restore the old cs */ - ops->set_segment(ctxt, old_sel, &old_desc, 0, VCPU_SREG_CS); - return rc; - } + /* Error handling is not implemented. */ + if (rc != X86EMUL_CONTINUE) + return X86EMUL_UNHANDLEABLE; + return rc; } @@ -2177,14 +2169,8 @@ static int em_ret_far(struct x86_emulate_ctxt *ctxt) { int rc; unsigned long eip, cs; - u16 old_cs; int cpl = ctxt->ops->cpl(ctxt); - struct desc_struct old_desc, new_desc; - const struct x86_emulate_ops *ops = ctxt->ops; - - if (ctxt->mode == X86EMUL_MODE_PROT64) - ops->get_segment(ctxt, &old_cs, &old_desc, NULL, - VCPU_SREG_CS); + struct desc_struct new_desc; rc = emulate_pop(ctxt, &eip, ctxt->op_bytes); if (rc != X86EMUL_CONTINUE) @@ -2201,10 +2187,10 @@ static int em_ret_far(struct x86_emulate_ctxt *ctxt) if (rc != X86EMUL_CONTINUE) return rc; rc = assign_eip_far(ctxt, eip, &new_desc); - if (rc != X86EMUL_CONTINUE) { - WARN_ON(ctxt->mode != X86EMUL_MODE_PROT64); - ops->set_segment(ctxt, old_cs, &old_desc, 0, VCPU_SREG_CS); - } + /* Error handling is not implemented. */ + if (rc != X86EMUL_CONTINUE) + return X86EMUL_UNHANDLEABLE; + return rc; } From 341f9730c29ba2673a76734166ed9ba9121add0b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Radim=20Kr=C4=8Dm=C3=A1=C5=99?= Date: Wed, 23 Nov 2016 21:25:48 +0100 Subject: [PATCH 41/61] KVM: x86: check for pic and ioapic presence before use MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit df492896e6dfb44fd1154f5402428d8e52705081 upstream. Split irqchip allows pic and ioapic routes to be used without them being created, which results in NULL access. Check for NULL and avoid it. (The setup is too racy for a nicer solutions.) Found by syzkaller: general protection fault: 0000 [#1] SMP DEBUG_PAGEALLOC KASAN Dumping ftrace buffer: (ftrace buffer empty) Modules linked in: CPU: 3 PID: 11923 Comm: kworker/3:2 Not tainted 4.9.0-rc5+ #27 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011 Workqueue: events irqfd_inject task: ffff88006a06c7c0 task.stack: ffff880068638000 RIP: 0010:[...] [...] __lock_acquire+0xb35/0x3380 kernel/locking/lockdep.c:3221 RSP: 0000:ffff88006863ea20 EFLAGS: 00010006 RAX: dffffc0000000000 RBX: dffffc0000000000 RCX: 0000000000000000 RDX: 0000000000000039 RSI: 0000000000000000 RDI: 1ffff1000d0c7d9e RBP: ffff88006863ef58 R08: 0000000000000001 R09: 0000000000000000 R10: 00000000000001c8 R11: 0000000000000000 R12: ffff88006a06c7c0 R13: 0000000000000001 R14: ffffffff8baab1a0 R15: 0000000000000001 FS: 0000000000000000(0000) GS:ffff88006d100000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00000000004abdd0 CR3: 000000003e2f2000 CR4: 00000000000026e0 Stack: ffffffff894d0098 1ffff1000d0c7d56 ffff88006863ecd0 dffffc0000000000 ffff88006a06c7c0 0000000000000000 ffff88006863ecf8 0000000000000082 0000000000000000 ffffffff815dd7c1 ffffffff00000000 ffffffff00000000 Call Trace: [...] lock_acquire+0x2a2/0x790 kernel/locking/lockdep.c:3746 [...] __raw_spin_lock include/linux/spinlock_api_smp.h:144 [...] _raw_spin_lock+0x38/0x50 kernel/locking/spinlock.c:151 [...] spin_lock include/linux/spinlock.h:302 [...] kvm_ioapic_set_irq+0x4c/0x100 arch/x86/kvm/ioapic.c:379 [...] kvm_set_ioapic_irq+0x8f/0xc0 arch/x86/kvm/irq_comm.c:52 [...] kvm_set_irq+0x239/0x640 arch/x86/kvm/../../../virt/kvm/irqchip.c:101 [...] irqfd_inject+0xb4/0x150 arch/x86/kvm/../../../virt/kvm/eventfd.c:60 [...] process_one_work+0xb40/0x1ba0 kernel/workqueue.c:2096 [...] worker_thread+0x214/0x18a0 kernel/workqueue.c:2230 [...] kthread+0x328/0x3e0 kernel/kthread.c:209 [...] ret_from_fork+0x2a/0x40 arch/x86/entry/entry_64.S:433 Reported-by: Dmitry Vyukov Fixes: 49df6397edfc ("KVM: x86: Split the APIC from the rest of IRQCHIP.") Signed-off-by: Radim Krčmář Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/irq_comm.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/arch/x86/kvm/irq_comm.c b/arch/x86/kvm/irq_comm.c index 84b96d319909..d09544e826f6 100644 --- a/arch/x86/kvm/irq_comm.c +++ b/arch/x86/kvm/irq_comm.c @@ -38,6 +38,15 @@ static int kvm_set_pic_irq(struct kvm_kernel_irq_routing_entry *e, bool line_status) { struct kvm_pic *pic = pic_irqchip(kvm); + + /* + * XXX: rejecting pic routes when pic isn't in use would be better, + * but the default routing table is installed while kvm->arch.vpic is + * NULL and KVM_CREATE_IRQCHIP can race with KVM_IRQ_LINE. + */ + if (!pic) + return -1; + return kvm_pic_set_irq(pic, e->irqchip.pin, irq_source_id, level); } @@ -46,6 +55,10 @@ static int kvm_set_ioapic_irq(struct kvm_kernel_irq_routing_entry *e, bool line_status) { struct kvm_ioapic *ioapic = kvm->arch.vioapic; + + if (!ioapic) + return -1; + return kvm_ioapic_set_irq(ioapic, e->irqchip.pin, irq_source_id, level, line_status); } From 55d061bf9d23bb8c5ada8c4f00b083894a146f2c Mon Sep 17 00:00:00 2001 From: Peter Chen Date: Tue, 15 Nov 2016 18:05:33 +0800 Subject: [PATCH 42/61] usb: chipidea: move the lock initialization to core file commit a5d906bb261cde5f881a949d3b0fbaa285dcc574 upstream. This can fix below dump when the lock is accessed at host mode due to it is not initialized. [ 46.119638] INFO: trying to register non-static key. [ 46.124643] the code is fine but needs lockdep annotation. [ 46.130144] turning off the locking correctness validator. [ 46.135659] CPU: 0 PID: 690 Comm: cat Not tainted 4.9.0-rc3-00079-g4b75f1d #1210 [ 46.143075] Hardware name: Freescale i.MX6 SoloX (Device Tree) [ 46.148923] Backtrace: [ 46.151448] [] (dump_backtrace) from [] (show_stack+0x18/0x1c) [ 46.159038] r7:edf52000 [ 46.161412] r6:60000193 [ 46.163967] r5:00000000 [ 46.165035] r4:c0e25c2c [ 46.169109] [] (show_stack) from [] (dump_stack+0xb4/0xe8) [ 46.176362] [] (dump_stack) from [] (register_lock_class+0x4fc/0x56c) [ 46.184554] r10:c0e25d24 [ 46.187014] r9:edf53e70 [ 46.189569] r8:c1642444 [ 46.190637] r7:ee9da024 [ 46.193191] r6:00000000 [ 46.194258] r5:00000000 [ 46.196812] r4:00000000 [ 46.199185] r3:00000001 [ 46.203259] [] (register_lock_class) from [] (__lock_acquire+0x80/0x10f0) [ 46.211797] r10:c0e25d24 [ 46.214257] r9:edf53e70 [ 46.216813] r8:ee9da024 [ 46.217880] r7:c1642444 [ 46.220435] r6:edcd1800 [ 46.221502] r5:60000193 [ 46.224057] r4:00000000 [ 46.227953] [] (__lock_acquire) from [] (lock_acquire+0x74/0x94) [ 46.235710] r10:00000001 [ 46.238169] r9:edf53e70 [ 46.240723] r8:edf53f80 [ 46.241790] r7:00000001 [ 46.244344] r6:00000001 [ 46.245412] r5:60000193 [ 46.247966] r4:00000000 [ 46.251866] [] (lock_acquire) from [] (_raw_spin_lock_irqsave+0x40/0x54) [ 46.260319] r7:ee1c6a00 [ 46.262691] r6:c062a570 [ 46.265247] r5:20000113 [ 46.266314] r4:ee9da014 [ 46.270393] [] (_raw_spin_lock_irqsave) from [] (ci_port_test_show+0x2c/0x70) [ 46.279280] r6:eebd2000 [ 46.281652] r5:ee9da010 [ 46.284207] r4:ee9da014 [ 46.286810] [] (ci_port_test_show) from [] (seq_read+0x1ac/0x4f8) [ 46.294655] r9:edf53e70 [ 46.297028] r8:edf53f80 [ 46.299583] r7:ee1c6a00 [ 46.300650] r6:00000001 [ 46.303205] r5:00000000 [ 46.304273] r4:eebd2000 [ 46.306850] [] (seq_read) from [] (full_proxy_read+0x54/0x6c) [ 46.314348] r10:00000000 [ 46.316808] r9:c0a6ad30 [ 46.319363] r8:edf53f80 [ 46.320430] r7:00020000 [ 46.322986] r6:b6de3000 [ 46.324053] r5:ee1c6a00 [ 46.326607] r4:c0248b58 [ 46.330505] [] (full_proxy_read) from [] (__vfs_read+0x34/0x118) [ 46.338262] r9:edf52000 [ 46.340635] r8:c0107fc4 [ 46.343190] r7:00020000 [ 46.344257] r6:edf53f80 [ 46.346812] r5:c039e810 [ 46.347879] r4:ee1c6a00 [ 46.350447] [] (__vfs_read) from [] (vfs_read+0x8c/0x11c) [ 46.357597] r9:edf52000 [ 46.359969] r8:c0107fc4 [ 46.362524] r7:edf53f80 [ 46.363592] r6:b6de3000 [ 46.366147] r5:ee1c6a00 [ 46.367214] r4:00020000 [ 46.369782] [] (vfs_read) from [] (SyS_read+0x4c/0xa8) [ 46.376672] r8:c0107fc4 [ 46.379045] r7:00020000 [ 46.381600] r6:b6de3000 [ 46.382667] r5:ee1c6a00 [ 46.385222] r4:ee1c6a00 [ 46.387817] [] (SyS_read) from [] (ret_fast_syscall+0x0/0x1c) [ 46.395314] r7:00000003 [ 46.397687] r6:b6de3000 [ 46.400243] r5:00020000 [ 46.401310] r4:00020000 Fixes: 26c696c678c4 ("USB: Chipidea: rename struct ci13xxx variables from udc to ci") Signed-off-by: Peter Chen Signed-off-by: Greg Kroah-Hartman --- drivers/usb/chipidea/core.c | 1 + drivers/usb/chipidea/udc.c | 2 -- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/usb/chipidea/core.c b/drivers/usb/chipidea/core.c index 965d0e240dcb..ba4a2a1eb3ff 100644 --- a/drivers/usb/chipidea/core.c +++ b/drivers/usb/chipidea/core.c @@ -926,6 +926,7 @@ static int ci_hdrc_probe(struct platform_device *pdev) if (!ci) return -ENOMEM; + spin_lock_init(&ci->lock); ci->dev = dev; ci->platdata = dev_get_platdata(dev); ci->imx28_write_fix = !!(ci->platdata->flags & diff --git a/drivers/usb/chipidea/udc.c b/drivers/usb/chipidea/udc.c index 68fc5fce4cc5..d8a045fc1fdb 100644 --- a/drivers/usb/chipidea/udc.c +++ b/drivers/usb/chipidea/udc.c @@ -1884,8 +1884,6 @@ static int udc_start(struct ci_hdrc *ci) struct usb_otg_caps *otg_caps = &ci->platdata->ci_otg_caps; int retval = 0; - spin_lock_init(&ci->lock); - ci->gadget.ops = &usb_gadget_ops; ci->gadget.speed = USB_SPEED_UNKNOWN; ci->gadget.max_speed = USB_SPEED_HIGH; From 1f36db0b397fd490d296b6e526d593faea2960aa Mon Sep 17 00:00:00 2001 From: Paul Jakma Date: Wed, 16 Nov 2016 10:13:49 +0000 Subject: [PATCH 43/61] USB: serial: cp210x: add ID for the Zone DPMX commit 2ab13292d7a314fa45de0acc808e41aaad31989c upstream. The BRIM Brothers Zone DPMX is a bicycle powermeter. This ID is for the USB serial interface in its charging dock for the control pods, via which some settings for the pods can be modified. Signed-off-by: Paul Jakma Cc: Barry Redmond Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/cp210x.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/usb/serial/cp210x.c b/drivers/usb/serial/cp210x.c index 976195e748a3..fe7452f0f38a 100644 --- a/drivers/usb/serial/cp210x.c +++ b/drivers/usb/serial/cp210x.c @@ -130,6 +130,7 @@ static const struct usb_device_id id_table[] = { { USB_DEVICE(0x10C4, 0x88A4) }, /* MMB Networks ZigBee USB Device */ { USB_DEVICE(0x10C4, 0x88A5) }, /* Planet Innovation Ingeni ZigBee USB Device */ { USB_DEVICE(0x10C4, 0x8946) }, /* Ketra N1 Wireless Interface */ + { USB_DEVICE(0x10C4, 0x8962) }, /* Brim Brothers charging dock */ { USB_DEVICE(0x10C4, 0x8977) }, /* CEL MeshWorks DevKit Device */ { USB_DEVICE(0x10C4, 0x8998) }, /* KCF Technologies PRN */ { USB_DEVICE(0x10C4, 0x8A2A) }, /* HubZ dual ZigBee and Z-Wave dongle */ From f3f95f177269f0bafb125769cc6299cec4622574 Mon Sep 17 00:00:00 2001 From: Doug Brown Date: Fri, 4 Nov 2016 21:18:20 -0700 Subject: [PATCH 44/61] USB: serial: ftdi_sio: add support for TI CC3200 LaunchPad commit 9bfef729a3d11f04d12788d749a3ce6b47645734 upstream. This patch adds support for the TI CC3200 LaunchPad board, which uses a custom USB vendor ID and product ID. Channel A is used for JTAG, and channel B is used for a UART. Signed-off-by: Doug Brown Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/ftdi_sio.c | 2 ++ drivers/usb/serial/ftdi_sio_ids.h | 6 ++++++ 2 files changed, 8 insertions(+) diff --git a/drivers/usb/serial/ftdi_sio.c b/drivers/usb/serial/ftdi_sio.c index 494167fe6a2c..d3d6ec455151 100644 --- a/drivers/usb/serial/ftdi_sio.c +++ b/drivers/usb/serial/ftdi_sio.c @@ -1012,6 +1012,8 @@ static const struct usb_device_id id_table_combined[] = { { USB_DEVICE(ICPDAS_VID, ICPDAS_I7561U_PID) }, { USB_DEVICE(ICPDAS_VID, ICPDAS_I7563U_PID) }, { USB_DEVICE(WICED_VID, WICED_USB20706V2_PID) }, + { USB_DEVICE(TI_VID, TI_CC3200_LAUNCHPAD_PID), + .driver_info = (kernel_ulong_t)&ftdi_jtag_quirk }, { } /* Terminating entry */ }; diff --git a/drivers/usb/serial/ftdi_sio_ids.h b/drivers/usb/serial/ftdi_sio_ids.h index 21011c0a4c64..48ee04c94a75 100644 --- a/drivers/usb/serial/ftdi_sio_ids.h +++ b/drivers/usb/serial/ftdi_sio_ids.h @@ -595,6 +595,12 @@ #define ATMEL_VID 0x03eb /* Vendor ID */ #define STK541_PID 0x2109 /* Zigbee Controller */ +/* + * Texas Instruments + */ +#define TI_VID 0x0451 +#define TI_CC3200_LAUNCHPAD_PID 0xC32A /* SimpleLink Wi-Fi CC3200 LaunchPad */ + /* * Blackfin gnICE JTAG * http://docs.blackfin.uclinux.org/doku.php?id=hw:jtag:gnice From ab0867dd8bc8b33ac662467b2d6cf2aff2516abc Mon Sep 17 00:00:00 2001 From: Petr Vandrovec Date: Thu, 10 Nov 2016 13:57:14 -0800 Subject: [PATCH 45/61] Fix USB CB/CBI storage devices with CONFIG_VMAP_STACK=y commit 2ce9d2272b98743b911196c49e7af5841381c206 upstream. Some code (all error handling) submits CDBs that are allocated on the stack. This breaks with CB/CBI code that tries to create URB directly from SCSI command buffer - which happens to be in vmalloced memory with vmalloced kernel stacks. Let's make copy of the command in usb_stor_CB_transport. Signed-off-by: Petr Vandrovec Acked-by: Alan Stern Signed-off-by: Greg Kroah-Hartman --- drivers/usb/storage/transport.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/usb/storage/transport.c b/drivers/usb/storage/transport.c index 5e67f63b2e46..02f86dd1a340 100644 --- a/drivers/usb/storage/transport.c +++ b/drivers/usb/storage/transport.c @@ -919,10 +919,15 @@ int usb_stor_CB_transport(struct scsi_cmnd *srb, struct us_data *us) /* COMMAND STAGE */ /* let's send the command via the control pipe */ + /* + * Command is sometime (f.e. after scsi_eh_prep_cmnd) on the stack. + * Stack may be vmallocated. So no DMA for us. Make a copy. + */ + memcpy(us->iobuf, srb->cmnd, srb->cmd_len); result = usb_stor_ctrl_transfer(us, us->send_ctrl_pipe, US_CBI_ADSC, USB_TYPE_CLASS | USB_RECIP_INTERFACE, 0, - us->ifnum, srb->cmnd, srb->cmd_len); + us->ifnum, us->iobuf, srb->cmd_len); /* check the return code for the command */ usb_stor_dbg(us, "Call to usb_stor_ctrl_transfer() returned %d\n", From ffffc1ed47e76d13cafbc645792ca184331b3123 Mon Sep 17 00:00:00 2001 From: Andrey Grodzovsky Date: Thu, 10 Nov 2016 09:35:27 -0500 Subject: [PATCH 46/61] scsi: mpt3sas: Fix secure erase premature termination commit 18f6084a989ba1b38702f9af37a2e4049a924be6 upstream. This is a work around for a bug with LSI Fusion MPT SAS2 when perfoming secure erase. Due to the very long time the operation takes, commands issued during the erase will time out and will trigger execution of the abort hook. Even though the abort hook is called for the specific command which timed out, this leads to entire device halt (scsi_state terminated) and premature termination of the secure erase. Set device state to busy while ATA passthrough commands are in progress. [mkp: hand applied to 4.9/scsi-fixes, tweaked patch description] Signed-off-by: Andrey Grodzovsky Acked-by: Sreekanth Reddy Cc: Cc: Sathya Prakash Cc: Chaitra P B Cc: Suganath Prabu Subramani Cc: Sreekanth Reddy Cc: Hannes Reinecke Signed-off-by: Martin K. Petersen Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/mpt3sas/mpt3sas_scsih.c | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/drivers/scsi/mpt3sas/mpt3sas_scsih.c b/drivers/scsi/mpt3sas/mpt3sas_scsih.c index 2d867c5bfd9f..ac688cf752dc 100644 --- a/drivers/scsi/mpt3sas/mpt3sas_scsih.c +++ b/drivers/scsi/mpt3sas/mpt3sas_scsih.c @@ -3831,7 +3831,10 @@ _scsih_eedp_error_handling(struct scsi_cmnd *scmd, u16 ioc_status) SAM_STAT_CHECK_CONDITION; } - +static inline bool ata_12_16_cmd(struct scsi_cmnd *scmd) +{ + return (scmd->cmnd[0] == ATA_12 || scmd->cmnd[0] == ATA_16); +} /** * scsih_qcmd - main scsi request entry point @@ -3859,6 +3862,13 @@ scsih_qcmd(struct Scsi_Host *shost, struct scsi_cmnd *scmd) if (ioc->logging_level & MPT_DEBUG_SCSI) scsi_print_command(scmd); + /* + * Lock the device for any subsequent command until command is + * done. + */ + if (ata_12_16_cmd(scmd)) + scsi_internal_device_block(scmd->device); + sas_device_priv_data = scmd->device->hostdata; if (!sas_device_priv_data || !sas_device_priv_data->sas_target) { scmd->result = DID_NO_CONNECT << 16; @@ -4431,6 +4441,9 @@ _scsih_io_done(struct MPT3SAS_ADAPTER *ioc, u16 smid, u8 msix_index, u32 reply) if (scmd == NULL) return 1; + if (ata_12_16_cmd(scmd)) + scsi_internal_device_unblock(scmd->device, SDEV_RUNNING); + mpi_request = mpt3sas_base_get_msg_frame(ioc, smid); if (mpi_reply == NULL) { From 4df31626fc0804822cc967807fb8105d09e8ab38 Mon Sep 17 00:00:00 2001 From: Chris Metcalf Date: Wed, 16 Nov 2016 11:18:05 -0500 Subject: [PATCH 47/61] tile: avoid using clocksource_cyc2ns with absolute cycle count commit e658a6f14d7c0243205f035979d0ecf6c12a036f upstream. For large values of "mult" and long uptimes, the intermediate result of "cycles * mult" can overflow 64 bits. For example, the tile platform calls clocksource_cyc2ns with a 1.2 GHz clock; we have mult = 853, and after 208.5 days, we overflow 64 bits. Since clocksource_cyc2ns() is intended to be used for relative cycle counts, not absolute cycle counts, performance is more importance than accepting a wider range of cycle values. So, just use mult_frac() directly in tile's sched_clock(). Commit 4cecf6d401a0 ("sched, x86: Avoid unnecessary overflow in sched_clock") by Salman Qazi results in essentially the same generated code for x86 as this change does for tile. In fact, a follow-on change by Salman introduced mult_frac() and switched to using it, so the C code was largely identical at that point too. Peter Zijlstra then added mul_u64_u32_shr() and switched x86 to use it. This is, in principle, better; by optimizing the 64x64->64 multiplies to be 32x32->64 multiplies we can potentially save some time. However, the compiler piplines the 64x64->64 multiplies pretty well, and the conditional branch in the generic mul_u64_u32_shr() causes some bubbles in execution, with the result that it's pretty much a wash. If tilegx provided its own implementation of mul_u64_u32_shr() without the conditional branch, we could potentially save 3 cycles, but that seems like small gain for a fair amount of additional build scaffolding; no other platform currently provides a mul_u64_u32_shr() override, and tile doesn't currently have an header to put the override in. Additionally, gcc currently has an optimization bug that prevents it from recognizing the opportunity to use a 32x32->64 multiply, and so the result would be no better than the existing mult_frac() until such time as the compiler is fixed. For now, just using mult_frac() seems like the right answer. Signed-off-by: Chris Metcalf Signed-off-by: Greg Kroah-Hartman --- arch/tile/kernel/time.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/tile/kernel/time.c b/arch/tile/kernel/time.c index 178989e6d3e3..ea960d660917 100644 --- a/arch/tile/kernel/time.c +++ b/arch/tile/kernel/time.c @@ -218,8 +218,8 @@ void do_timer_interrupt(struct pt_regs *regs, int fault_num) */ unsigned long long sched_clock(void) { - return clocksource_cyc2ns(get_cycles(), - sched_clock_mult, SCHED_CLOCK_SHIFT); + return mult_frac(get_cycles(), + sched_clock_mult, 1ULL << SCHED_CLOCK_SHIFT); } int setup_profiling_timer(unsigned int multiplier) From 8316338a201b3cd57605fc6c4ea0d9ce8a0d35c0 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 15 Nov 2016 12:05:11 +0100 Subject: [PATCH 48/61] cfg80211: limit scan results cache size commit 9853a55ef1bb66d7411136046060bbfb69c714fa upstream. It's possible to make scanning consume almost arbitrary amounts of memory, e.g. by sending beacon frames with random BSSIDs at high rates while somebody is scanning. Limit the number of BSS table entries we're willing to cache to 1000, limiting maximum memory usage to maybe 4-5MB, but lower in practice - that would be the case for having both full-sized beacon and probe response frames for each entry; this seems not possible in practice, so a limit of 1000 entries will likely be closer to 0.5 MB. Signed-off-by: Johannes Berg Signed-off-by: Greg Kroah-Hartman --- net/wireless/core.h | 1 + net/wireless/scan.c | 69 +++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 70 insertions(+) diff --git a/net/wireless/core.h b/net/wireless/core.h index a618b4b86fa4..47a967fed8ff 100644 --- a/net/wireless/core.h +++ b/net/wireless/core.h @@ -72,6 +72,7 @@ struct cfg80211_registered_device { struct list_head bss_list; struct rb_root bss_tree; u32 bss_generation; + u32 bss_entries; struct cfg80211_scan_request *scan_req; /* protected by RTNL */ struct sk_buff *scan_msg; struct cfg80211_sched_scan_request __rcu *sched_scan_req; diff --git a/net/wireless/scan.c b/net/wireless/scan.c index 14d5369eb778..8dde12a11725 100644 --- a/net/wireless/scan.c +++ b/net/wireless/scan.c @@ -56,6 +56,19 @@ * also linked into the probe response struct. */ +/* + * Limit the number of BSS entries stored in mac80211. Each one is + * a bit over 4k at most, so this limits to roughly 4-5M of memory. + * If somebody wants to really attack this though, they'd likely + * use small beacons, and only one type of frame, limiting each of + * the entries to a much smaller size (in order to generate more + * entries in total, so overhead is bigger.) + */ +static int bss_entries_limit = 1000; +module_param(bss_entries_limit, int, 0644); +MODULE_PARM_DESC(bss_entries_limit, + "limit to number of scan BSS entries (per wiphy, default 1000)"); + #define IEEE80211_SCAN_RESULT_EXPIRE (30 * HZ) static void bss_free(struct cfg80211_internal_bss *bss) @@ -136,6 +149,10 @@ static bool __cfg80211_unlink_bss(struct cfg80211_registered_device *rdev, list_del_init(&bss->list); rb_erase(&bss->rbn, &rdev->bss_tree); + rdev->bss_entries--; + WARN_ONCE((rdev->bss_entries == 0) ^ list_empty(&rdev->bss_list), + "rdev bss entries[%d]/list[empty:%d] corruption\n", + rdev->bss_entries, list_empty(&rdev->bss_list)); bss_ref_put(rdev, bss); return true; } @@ -162,6 +179,40 @@ static void __cfg80211_bss_expire(struct cfg80211_registered_device *rdev, rdev->bss_generation++; } +static bool cfg80211_bss_expire_oldest(struct cfg80211_registered_device *rdev) +{ + struct cfg80211_internal_bss *bss, *oldest = NULL; + bool ret; + + lockdep_assert_held(&rdev->bss_lock); + + list_for_each_entry(bss, &rdev->bss_list, list) { + if (atomic_read(&bss->hold)) + continue; + + if (!list_empty(&bss->hidden_list) && + !bss->pub.hidden_beacon_bss) + continue; + + if (oldest && time_before(oldest->ts, bss->ts)) + continue; + oldest = bss; + } + + if (WARN_ON(!oldest)) + return false; + + /* + * The callers make sure to increase rdev->bss_generation if anything + * gets removed (and a new entry added), so there's no need to also do + * it here. + */ + + ret = __cfg80211_unlink_bss(rdev, oldest); + WARN_ON(!ret); + return ret; +} + void ___cfg80211_scan_done(struct cfg80211_registered_device *rdev, bool send_message) { @@ -687,6 +738,7 @@ static bool cfg80211_combine_bsses(struct cfg80211_registered_device *rdev, const u8 *ie; int i, ssidlen; u8 fold = 0; + u32 n_entries = 0; ies = rcu_access_pointer(new->pub.beacon_ies); if (WARN_ON(!ies)) @@ -710,6 +762,12 @@ static bool cfg80211_combine_bsses(struct cfg80211_registered_device *rdev, /* This is the bad part ... */ list_for_each_entry(bss, &rdev->bss_list, list) { + /* + * we're iterating all the entries anyway, so take the + * opportunity to validate the list length accounting + */ + n_entries++; + if (!ether_addr_equal(bss->pub.bssid, new->pub.bssid)) continue; if (bss->pub.channel != new->pub.channel) @@ -738,6 +796,10 @@ static bool cfg80211_combine_bsses(struct cfg80211_registered_device *rdev, new->pub.beacon_ies); } + WARN_ONCE(n_entries != rdev->bss_entries, + "rdev bss entries[%d]/list[len:%d] corruption\n", + rdev->bss_entries, n_entries); + return true; } @@ -890,7 +952,14 @@ cfg80211_bss_update(struct cfg80211_registered_device *rdev, } } + if (rdev->bss_entries >= bss_entries_limit && + !cfg80211_bss_expire_oldest(rdev)) { + kfree(new); + goto drop; + } + list_add_tail(&new->list, &rdev->bss_list); + rdev->bss_entries++; rb_insert_bss(rdev, new); found = new; } From be79d7fa43f91c22e0e140c06aad57e9e5f417b2 Mon Sep 17 00:00:00 2001 From: John Johansen Date: Wed, 31 Aug 2016 21:10:06 -0700 Subject: [PATCH 49/61] apparmor: fix change_hat not finding hat after policy replacement commit 3d40658c977769ce2138f286cf131537bf68bdfe upstream. After a policy replacement, the task cred may be out of date and need to be updated. However change_hat is using the stale profiles from the out of date cred resulting in either: a stale profile being applied or, incorrect failure when searching for a hat profile as it has been migrated to the new parent profile. Fixes: 01e2b670aa898a39259bc85c78e3d74820f4d3b6 (failure to find hat) Fixes: 898127c34ec03291c86f4ff3856d79e9e18952bc (stale policy being applied) Bugzilla: https://bugzilla.suse.com/show_bug.cgi?id=1000287 Signed-off-by: John Johansen Signed-off-by: James Morris Signed-off-by: Greg Kroah-Hartman --- security/apparmor/domain.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/security/apparmor/domain.c b/security/apparmor/domain.c index dc0027b28b04..53426a6ee6dc 100644 --- a/security/apparmor/domain.c +++ b/security/apparmor/domain.c @@ -623,8 +623,8 @@ int aa_change_hat(const char *hats[], int count, u64 token, bool permtest) /* released below */ cred = get_current_cred(); cxt = cred_cxt(cred); - profile = aa_cred_profile(cred); - previous_profile = cxt->previous; + profile = aa_get_newest_profile(aa_cred_profile(cred)); + previous_profile = aa_get_newest_profile(cxt->previous); if (unconfined(profile)) { info = "unconfined"; @@ -720,6 +720,8 @@ int aa_change_hat(const char *hats[], int count, u64 token, bool permtest) out: aa_put_profile(hat); kfree(name); + aa_put_profile(profile); + aa_put_profile(previous_profile); put_cred(cred); return error; From 0c0ddbf7efec0aea124e4f7688e2b310faef7a91 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 22 Nov 2016 21:50:52 +0100 Subject: [PATCH 50/61] NFSv4.x: hide array-bounds warning commit d55b352b01bc78fbc3d1bb650140668b87e58bf9 upstream. A correct bugfix introduced a harmless warning that shows up with gcc-7: fs/nfs/callback.c: In function 'nfs_callback_up': fs/nfs/callback.c:214:14: error: array subscript is outside array bounds [-Werror=array-bounds] What happens here is that the 'minorversion == 0' check tells the compiler that we assume minorversion can be something other than 0, but when CONFIG_NFS_V4_1 is disabled that would be invalid and result in an out-of-bounds access. The added check for IS_ENABLED(CONFIG_NFS_V4_1) tells gcc that this really can't happen, which makes the code slightly smaller and also avoids the warning. The bugfix that introduced the warning is marked for stable backports, we want this one backported to the same releases. Fixes: 98b0f80c2396 ("NFSv4.x: Fix a refcount leak in nfs_callback_up_net") Signed-off-by: Arnd Bergmann Signed-off-by: Anna Schumaker Signed-off-by: Greg Kroah-Hartman --- fs/nfs/callback.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c index 52a28311e2a4..48efe62e1302 100644 --- a/fs/nfs/callback.c +++ b/fs/nfs/callback.c @@ -261,7 +261,7 @@ static int nfs_callback_up_net(int minorversion, struct svc_serv *serv, } ret = -EPROTONOSUPPORT; - if (minorversion == 0) + if (!IS_ENABLED(CONFIG_NFS_V4_1) || minorversion == 0) ret = nfs4_callback_up_net(serv, net); else if (xprt->ops->bc_up) ret = xprt->ops->bc_up(serv, net); From e541fd815db94038c9615c4f645a0639c0e474be Mon Sep 17 00:00:00 2001 From: John David Anglin Date: Sun, 20 Nov 2016 21:12:36 -0500 Subject: [PATCH 51/61] parisc: Fix races in parisc_setup_cache_timing() commit 741dc7bf1c7c7d93b853bb55efe77baa27e1b0a9 upstream. Helge reported to me the following startup crash: [ 0.000000] Linux version 4.8.0-1-parisc64-smp (debian-kernel@lists.debian.org) (gcc version 5.4.1 20161019 (GCC) ) #1 SMP Debian 4.8.7-1 (2016-11-13) [ 0.000000] The 64-bit Kernel has started... [ 0.000000] Kernel default page size is 4 KB. Huge pages enabled with 1 MB physical and 2 MB virtual size. [ 0.000000] Determining PDC firmware type: System Map. [ 0.000000] model 9000/785/J5000 [ 0.000000] Total Memory: 2048 MB [ 0.000000] Memory: 2018528K/2097152K available (9272K kernel code, 3053K rwdata, 1319K rodata, 1024K init, 840K bss, 78624K reserved, 0K cma-reserved) [ 0.000000] virtual kernel memory layout: [ 0.000000] vmalloc : 0x0000000000008000 - 0x000000003f000000 (1007 MB) [ 0.000000] memory : 0x0000000040000000 - 0x00000000c0000000 (2048 MB) [ 0.000000] .init : 0x0000000040100000 - 0x0000000040200000 (1024 kB) [ 0.000000] .data : 0x0000000040b0e000 - 0x0000000040f533e0 (4372 kB) [ 0.000000] .text : 0x0000000040200000 - 0x0000000040b0e000 (9272 kB) [ 0.768910] Brought up 1 CPUs [ 0.992465] NET: Registered protocol family 16 [ 2.429981] Releasing cpu 1 now, hpa=fffffffffffa2000 [ 2.635751] CPU(s): 2 out of 2 PA8500 (PCX-W) at 440.000000 MHz online [ 2.726692] Setting cache flush threshold to 1024 kB [ 2.729932] Not-handled unaligned insn 0x43ffff80 [ 2.798114] Setting TLB flush threshold to 140 kB [ 2.928039] Unaligned handler failed, ret = -1 [ 3.000419] _______________________________ [ 3.000419] < Your System ate a SPARC! Gah! > [ 3.000419] ------------------------------- [ 3.000419] \ ^__^ [ 3.000419] (__)\ )\/\ [ 3.000419] U ||----w | [ 3.000419] || || [ 9.340055] CPU: 1 PID: 0 Comm: swapper/1 Not tainted 4.8.0-1-parisc64-smp #1 Debian 4.8.7-1 [ 9.448082] task: 00000000bfd48060 task.stack: 00000000bfd50000 [ 9.528040] [ 10.760029] IASQ: 0000000000000000 0000000000000000 IAOQ: 000000004025d154 000000004025d158 [ 10.868052] IIR: 43ffff80 ISR: 0000000000340000 IOR: 000001ff54150960 [ 10.960029] CPU: 1 CR30: 00000000bfd50000 CR31: 0000000011111111 [ 11.052057] ORIG_R28: 000000004021e3b4 [ 11.100045] IAOQ[0]: irq_exit+0x94/0x120 [ 11.152062] IAOQ[1]: irq_exit+0x98/0x120 [ 11.208031] RP(r2): irq_exit+0xb8/0x120 [ 11.256074] Backtrace: [ 11.288067] [<00000000402cd944>] cpu_startup_entry+0x1e4/0x598 [ 11.368058] [<0000000040109528>] smp_callin+0x2c0/0x2f0 [ 11.436308] [<00000000402b53fc>] update_curr+0x18c/0x2d0 [ 11.508055] [<00000000402b73b8>] dequeue_entity+0x2c0/0x1030 [ 11.584040] [<00000000402b3cc0>] set_next_entity+0x80/0xd30 [ 11.660069] [<00000000402c1594>] pick_next_task_fair+0x614/0x720 [ 11.740085] [<000000004020dd34>] __schedule+0x394/0xa60 [ 11.808054] [<000000004020e488>] schedule+0x88/0x118 [ 11.876039] [<0000000040283d3c>] rescuer_thread+0x4d4/0x5b0 [ 11.948090] [<000000004028fc4c>] kthread+0x1ec/0x248 [ 12.016053] [<0000000040205020>] end_fault_vector+0x20/0xc0 [ 12.092239] [<00000000402050c0>] _switch_to_ret+0x0/0xf40 [ 12.164044] [ 12.184036] CPU: 1 PID: 0 Comm: swapper/1 Not tainted 4.8.0-1-parisc64-smp #1 Debian 4.8.7-1 [ 12.244040] Backtrace: [ 12.244040] [<000000004021c480>] show_stack+0x68/0x80 [ 12.244040] [<00000000406f332c>] dump_stack+0xec/0x168 [ 12.244040] [<000000004021c74c>] die_if_kernel+0x25c/0x430 [ 12.244040] [<000000004022d320>] handle_unaligned+0xb48/0xb50 [ 12.244040] [ 12.632066] ---[ end trace 9ca05a7215c7bbb2 ]--- [ 12.692036] Kernel panic - not syncing: Attempted to kill the idle task! We have the insn 0x43ffff80 in IIR but from IAOQ we should have: 4025d150: 0f f3 20 df ldd,s r19(r31),r31 4025d154: 0f 9f 00 9c ldw r31(ret0),ret0 4025d158: bf 80 20 58 cmpb,*<> r0,ret0,4025d18c Cpu0 has just completed running parisc_setup_cache_timing: [ 2.429981] Releasing cpu 1 now, hpa=fffffffffffa2000 [ 2.635751] CPU(s): 2 out of 2 PA8500 (PCX-W) at 440.000000 MHz online [ 2.726692] Setting cache flush threshold to 1024 kB [ 2.729932] Not-handled unaligned insn 0x43ffff80 [ 2.798114] Setting TLB flush threshold to 140 kB [ 2.928039] Unaligned handler failed, ret = -1 From the backtrace, cpu1 is in smp_callin: void __init smp_callin(void) { int slave_id = cpu_now_booting; smp_cpu_init(slave_id); preempt_disable(); flush_cache_all_local(); /* start with known state */ flush_tlb_all_local(NULL); local_irq_enable(); /* Interrupts have been off until now */ cpu_startup_entry(CPUHP_AP_ONLINE_IDLE); So, it has just flushed its caches and the TLB. It would seem either the flushes in parisc_setup_cache_timing or smp_callin have corrupted kernel memory. The attached patch reworks parisc_setup_cache_timing to remove the races in setting the cache and TLB flush thresholds. It also corrects the number of bytes flushed in the TLB calculation. The patch flushes the cache and TLB on cpu0 before starting the secondary processors so that they are started from a known state. Tested with a few reboots on c8000. Signed-off-by: John David Anglin Signed-off-by: Helge Deller Signed-off-by: Greg Kroah-Hartman --- arch/parisc/kernel/cache.c | 31 ++++++++++++------------------- arch/parisc/kernel/setup.c | 4 ++++ 2 files changed, 16 insertions(+), 19 deletions(-) diff --git a/arch/parisc/kernel/cache.c b/arch/parisc/kernel/cache.c index cda6dbbe9842..fd5979f28ada 100644 --- a/arch/parisc/kernel/cache.c +++ b/arch/parisc/kernel/cache.c @@ -351,6 +351,7 @@ void __init parisc_setup_cache_timing(void) { unsigned long rangetime, alltime; unsigned long size, start; + unsigned long threshold; alltime = mfctl(16); flush_data_cache(); @@ -364,17 +365,12 @@ void __init parisc_setup_cache_timing(void) printk(KERN_DEBUG "Whole cache flush %lu cycles, flushing %lu bytes %lu cycles\n", alltime, size, rangetime); - /* Racy, but if we see an intermediate value, it's ok too... */ - parisc_cache_flush_threshold = size * alltime / rangetime; - - parisc_cache_flush_threshold = L1_CACHE_ALIGN(parisc_cache_flush_threshold); - if (!parisc_cache_flush_threshold) - parisc_cache_flush_threshold = FLUSH_THRESHOLD; - - if (parisc_cache_flush_threshold > cache_info.dc_size) - parisc_cache_flush_threshold = cache_info.dc_size; - - printk(KERN_INFO "Setting cache flush threshold to %lu kB\n", + threshold = L1_CACHE_ALIGN(size * alltime / rangetime); + if (threshold > cache_info.dc_size) + threshold = cache_info.dc_size; + if (threshold) + parisc_cache_flush_threshold = threshold; + printk(KERN_INFO "Cache flush threshold set to %lu KiB\n", parisc_cache_flush_threshold/1024); /* calculate TLB flush threshold */ @@ -383,7 +379,7 @@ void __init parisc_setup_cache_timing(void) flush_tlb_all(); alltime = mfctl(16) - alltime; - size = PAGE_SIZE; + size = 0; start = (unsigned long) _text; rangetime = mfctl(16); while (start < (unsigned long) _end) { @@ -396,13 +392,10 @@ void __init parisc_setup_cache_timing(void) printk(KERN_DEBUG "Whole TLB flush %lu cycles, flushing %lu bytes %lu cycles\n", alltime, size, rangetime); - parisc_tlb_flush_threshold = size * alltime / rangetime; - parisc_tlb_flush_threshold *= num_online_cpus(); - parisc_tlb_flush_threshold = PAGE_ALIGN(parisc_tlb_flush_threshold); - if (!parisc_tlb_flush_threshold) - parisc_tlb_flush_threshold = FLUSH_TLB_THRESHOLD; - - printk(KERN_INFO "Setting TLB flush threshold to %lu kB\n", + threshold = PAGE_ALIGN(num_online_cpus() * size * alltime / rangetime); + if (threshold) + parisc_tlb_flush_threshold = threshold; + printk(KERN_INFO "TLB flush threshold set to %lu KiB\n", parisc_tlb_flush_threshold/1024); } diff --git a/arch/parisc/kernel/setup.c b/arch/parisc/kernel/setup.c index 81d6f6391944..2e66a887788e 100644 --- a/arch/parisc/kernel/setup.c +++ b/arch/parisc/kernel/setup.c @@ -334,6 +334,10 @@ static int __init parisc_init(void) /* tell PDC we're Linux. Nevermind failure. */ pdc_stable_write(0x40, &osid, sizeof(osid)); + /* start with known state */ + flush_cache_all_local(); + flush_tlb_all_local(NULL); + processor_init(); #ifdef CONFIG_SMP pr_info("CPU(s): %d out of %d %s at %d.%06d MHz online\n", From 7a1ab6a2bf3a69282f57a8b710e4e9e52f381678 Mon Sep 17 00:00:00 2001 From: John David Anglin Date: Thu, 24 Nov 2016 20:06:32 -0500 Subject: [PATCH 52/61] parisc: Fix race in pci-dma.c commit c0452fb9fb8f49c7d68ab9fa0ad092016be7b45f upstream. We are still troubled by occasional random segmentation faults and memory memory corruption on SMP machines. The causes quite a few package builds to fail on the Debian buildd machines for parisc. When gcc-6 failed to build three times in a row, I looked again at the TLB related code. I found a couple of issues. This is the first. In general, we need to ensure page table updates and corresponding TLB purges are atomic. The attached patch fixes an instance in pci-dma.c where the page table update was not guarded by the TLB lock. Tested on rp3440 and c8000. So far, no further random segmentation faults have been observed. Signed-off-by: John David Anglin Signed-off-by: Helge Deller Signed-off-by: Greg Kroah-Hartman --- arch/parisc/kernel/pci-dma.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/parisc/kernel/pci-dma.c b/arch/parisc/kernel/pci-dma.c index b9402c9b3454..af0d7fae7aa7 100644 --- a/arch/parisc/kernel/pci-dma.c +++ b/arch/parisc/kernel/pci-dma.c @@ -95,8 +95,8 @@ static inline int map_pte_uncached(pte_t * pte, if (!pte_none(*pte)) printk(KERN_ERR "map_pte_uncached: page already exists\n"); - set_pte(pte, __mk_pte(*paddr_ptr, PAGE_KERNEL_UNC)); purge_tlb_start(flags); + set_pte(pte, __mk_pte(*paddr_ptr, PAGE_KERNEL_UNC)); pdtlb_kernel(orig_vaddr); purge_tlb_end(flags); vaddr += PAGE_SIZE; From cd4235a794c0bdc29995ae8b805aac1d235ab499 Mon Sep 17 00:00:00 2001 From: John David Anglin Date: Thu, 24 Nov 2016 20:18:14 -0500 Subject: [PATCH 53/61] parisc: Also flush data TLB in flush_icache_page_asm commit 5035b230e7b67ac12691ed3b5495bbb617027b68 upstream. This is the second issue I noticed in reviewing the parisc TLB code. The fic instruction may use either the instruction or data TLB in flushing the instruction cache. Thus, on machines with a split TLB, we should also flush the data TLB after setting up the temporary alias registers. Although this has no functional impact, I changed the pdtlb and pitlb instructions to consistently use the index register %r0. These instructions do not support integer displacements. Tested on rp3440 and c8000. Signed-off-by: John David Anglin Signed-off-by: Helge Deller Signed-off-by: Greg Kroah-Hartman --- arch/parisc/kernel/pacache.S | 37 +++++++++++++++++++++--------------- 1 file changed, 22 insertions(+), 15 deletions(-) diff --git a/arch/parisc/kernel/pacache.S b/arch/parisc/kernel/pacache.S index b743a80eaba0..675521919229 100644 --- a/arch/parisc/kernel/pacache.S +++ b/arch/parisc/kernel/pacache.S @@ -96,7 +96,7 @@ fitmanyloop: /* Loop if LOOP >= 2 */ fitmanymiddle: /* Loop if LOOP >= 2 */ addib,COND(>) -1, %r31, fitmanymiddle /* Adjusted inner loop decr */ - pitlbe 0(%sr1, %r28) + pitlbe %r0(%sr1, %r28) pitlbe,m %arg1(%sr1, %r28) /* Last pitlbe and addr adjust */ addib,COND(>) -1, %r29, fitmanymiddle /* Middle loop decr */ copy %arg3, %r31 /* Re-init inner loop count */ @@ -139,7 +139,7 @@ fdtmanyloop: /* Loop if LOOP >= 2 */ fdtmanymiddle: /* Loop if LOOP >= 2 */ addib,COND(>) -1, %r31, fdtmanymiddle /* Adjusted inner loop decr */ - pdtlbe 0(%sr1, %r28) + pdtlbe %r0(%sr1, %r28) pdtlbe,m %arg1(%sr1, %r28) /* Last pdtlbe and addr adjust */ addib,COND(>) -1, %r29, fdtmanymiddle /* Middle loop decr */ copy %arg3, %r31 /* Re-init inner loop count */ @@ -620,12 +620,12 @@ ENTRY(copy_user_page_asm) /* Purge any old translations */ #ifdef CONFIG_PA20 - pdtlb,l 0(%r28) - pdtlb,l 0(%r29) + pdtlb,l %r0(%r28) + pdtlb,l %r0(%r29) #else tlb_lock %r20,%r21,%r22 - pdtlb 0(%r28) - pdtlb 0(%r29) + pdtlb %r0(%r28) + pdtlb %r0(%r29) tlb_unlock %r20,%r21,%r22 #endif @@ -768,10 +768,10 @@ ENTRY(clear_user_page_asm) /* Purge any old translation */ #ifdef CONFIG_PA20 - pdtlb,l 0(%r28) + pdtlb,l %r0(%r28) #else tlb_lock %r20,%r21,%r22 - pdtlb 0(%r28) + pdtlb %r0(%r28) tlb_unlock %r20,%r21,%r22 #endif @@ -852,10 +852,10 @@ ENTRY(flush_dcache_page_asm) /* Purge any old translation */ #ifdef CONFIG_PA20 - pdtlb,l 0(%r28) + pdtlb,l %r0(%r28) #else tlb_lock %r20,%r21,%r22 - pdtlb 0(%r28) + pdtlb %r0(%r28) tlb_unlock %r20,%r21,%r22 #endif @@ -892,10 +892,10 @@ ENTRY(flush_dcache_page_asm) sync #ifdef CONFIG_PA20 - pdtlb,l 0(%r25) + pdtlb,l %r0(%r25) #else tlb_lock %r20,%r21,%r22 - pdtlb 0(%r25) + pdtlb %r0(%r25) tlb_unlock %r20,%r21,%r22 #endif @@ -925,13 +925,18 @@ ENTRY(flush_icache_page_asm) depwi 0, 31,PAGE_SHIFT, %r28 /* Clear any offset bits */ #endif - /* Purge any old translation */ + /* Purge any old translation. Note that the FIC instruction + * may use either the instruction or data TLB. Given that we + * have a flat address space, it's not clear which TLB will be + * used. So, we purge both entries. */ #ifdef CONFIG_PA20 + pdtlb,l %r0(%r28) pitlb,l %r0(%sr4,%r28) #else tlb_lock %r20,%r21,%r22 - pitlb (%sr4,%r28) + pdtlb %r0(%r28) + pitlb %r0(%sr4,%r28) tlb_unlock %r20,%r21,%r22 #endif @@ -970,10 +975,12 @@ ENTRY(flush_icache_page_asm) sync #ifdef CONFIG_PA20 + pdtlb,l %r0(%r28) pitlb,l %r0(%sr4,%r25) #else tlb_lock %r20,%r21,%r22 - pitlb (%sr4,%r25) + pdtlb %r0(%r28) + pitlb %r0(%sr4,%r25) tlb_unlock %r20,%r21,%r22 #endif From 249090830942565fb0ce7c1e018d927a14282ead Mon Sep 17 00:00:00 2001 From: Andrey Ryabinin Date: Thu, 24 Nov 2016 13:23:10 +0000 Subject: [PATCH 54/61] mpi: Fix NULL ptr dereference in mpi_powm() [ver #3] commit f5527fffff3f002b0a6b376163613b82f69de073 upstream. This fixes CVE-2016-8650. If mpi_powm() is given a zero exponent, it wants to immediately return either 1 or 0, depending on the modulus. However, if the result was initalised with zero limb space, no limbs space is allocated and a NULL-pointer exception ensues. Fix this by allocating a minimal amount of limb space for the result when the 0-exponent case when the result is 1 and not touching the limb space when the result is 0. This affects the use of RSA keys and X.509 certificates that carry them. BUG: unable to handle kernel NULL pointer dereference at (null) IP: [] mpi_powm+0x32/0x7e6 PGD 0 Oops: 0002 [#1] SMP Modules linked in: CPU: 3 PID: 3014 Comm: keyctl Not tainted 4.9.0-rc6-fscache+ #278 Hardware name: ASUS All Series/H97-PLUS, BIOS 2306 10/09/2014 task: ffff8804011944c0 task.stack: ffff880401294000 RIP: 0010:[] [] mpi_powm+0x32/0x7e6 RSP: 0018:ffff880401297ad8 EFLAGS: 00010212 RAX: 0000000000000000 RBX: ffff88040868bec0 RCX: ffff88040868bba0 RDX: ffff88040868b260 RSI: ffff88040868bec0 RDI: ffff88040868bee0 RBP: ffff880401297ba8 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000047 R11: ffffffff8183b210 R12: 0000000000000000 R13: ffff8804087c7600 R14: 000000000000001f R15: ffff880401297c50 FS: 00007f7a7918c700(0000) GS:ffff88041fb80000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000000 CR3: 0000000401250000 CR4: 00000000001406e0 Stack: ffff88040868bec0 0000000000000020 ffff880401297b00 ffffffff81376cd4 0000000000000100 ffff880401297b10 ffffffff81376d12 ffff880401297b30 ffffffff81376f37 0000000000000100 0000000000000000 ffff880401297ba8 Call Trace: [] ? __sg_page_iter_next+0x43/0x66 [] ? sg_miter_get_next_page+0x1b/0x5d [] ? sg_miter_next+0x17/0xbd [] ? mpi_read_raw_from_sgl+0xf2/0x146 [] rsa_verify+0x9d/0xee [] ? pkcs1pad_sg_set_buf+0x2e/0xbb [] pkcs1pad_verify+0xc0/0xe1 [] public_key_verify_signature+0x1b0/0x228 [] x509_check_for_self_signed+0xa1/0xc4 [] x509_cert_parse+0x167/0x1a1 [] x509_key_preparse+0x21/0x1a1 [] asymmetric_key_preparse+0x34/0x61 [] key_create_or_update+0x145/0x399 [] SyS_add_key+0x154/0x19e [] do_syscall_64+0x80/0x191 [] entry_SYSCALL64_slow_path+0x25/0x25 Code: 56 41 55 41 54 53 48 81 ec a8 00 00 00 44 8b 71 04 8b 42 04 4c 8b 67 18 45 85 f6 89 45 80 0f 84 b4 06 00 00 85 c0 75 2f 41 ff ce <49> c7 04 24 01 00 00 00 b0 01 75 0b 48 8b 41 18 48 83 38 01 0f RIP [] mpi_powm+0x32/0x7e6 RSP CR2: 0000000000000000 ---[ end trace d82015255d4a5d8d ]--- Basically, this is a backport of a libgcrypt patch: http://git.gnupg.org/cgi-bin/gitweb.cgi?p=libgcrypt.git;a=patch;h=6e1adb05d290aeeb1c230c763970695f4a538526 Fixes: cdec9cb5167a ("crypto: GnuPG based MPI lib - source files (part 1)") Signed-off-by: Andrey Ryabinin Signed-off-by: David Howells cc: Dmitry Kasatkin cc: linux-ima-devel@lists.sourceforge.net Signed-off-by: James Morris Signed-off-by: Greg Kroah-Hartman --- lib/mpi/mpi-pow.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/lib/mpi/mpi-pow.c b/lib/mpi/mpi-pow.c index 5464c8744ea9..e24388a863a7 100644 --- a/lib/mpi/mpi-pow.c +++ b/lib/mpi/mpi-pow.c @@ -64,8 +64,13 @@ int mpi_powm(MPI res, MPI base, MPI exp, MPI mod) if (!esize) { /* Exponent is zero, result is 1 mod MOD, i.e., 1 or 0 * depending on if MOD equals 1. */ - rp[0] = 1; res->nlimbs = (msize == 1 && mod->d[0] == 1) ? 0 : 1; + if (res->nlimbs) { + if (mpi_resize(res, 1) < 0) + goto enomem; + rp = res->d; + rp[0] = 1; + } res->sign = 0; goto leave; } From e1049372d7a7196f036664e7b909fa2ec22ed5b8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michel=20D=C3=A4nzer?= Date: Tue, 29 Nov 2016 18:40:20 +0900 Subject: [PATCH 55/61] drm/radeon: Ensure vblank interrupt is enabled on DPMS transition to on MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit NOTE: This patch only applies to 4.5.y or older kernels. With newer kernels, this problem cannot happen because the driver now uses drm_crtc_vblank_on/off instead of drm_vblank_pre/post_modeset[0]. I consider this patch safer for older kernels than backporting the API change, because drm_crtc_vblank_on/off had various issues in older kernels, and I'm not sure all fixes for those have been backported to all stable branches where this patch could be applied. --------------------- Fixes the vblank interrupt being disabled when it should be on, which can cause at least the following symptoms: * Hangs when running 'xset dpms force off' in a GNOME session with gnome-shell using DRI2. * RandR 1.4 slave outputs freezing with garbage displayed using xf86-video-ati 7.8.0 or newer. [0] See upstream commit: commit 777e3cbc791f131806d9bf24b3325637c7fc228d Author: Daniel Vetter Date: Thu Jan 21 11:08:57 2016 +0100 drm/radeon: Switch to drm_vblank_on/off Reported-and-Tested-by: Max Staudt Reviewed-by: Daniel Vetter Reviewed-by: Alex Deucher Signed-off-by: Michel Dänzer --- drivers/gpu/drm/radeon/atombios_crtc.c | 2 ++ drivers/gpu/drm/radeon/radeon_legacy_crtc.c | 2 ++ 2 files changed, 4 insertions(+) diff --git a/drivers/gpu/drm/radeon/atombios_crtc.c b/drivers/gpu/drm/radeon/atombios_crtc.c index 79bab6fd76bb..6755d4768f59 100644 --- a/drivers/gpu/drm/radeon/atombios_crtc.c +++ b/drivers/gpu/drm/radeon/atombios_crtc.c @@ -275,6 +275,8 @@ void atombios_crtc_dpms(struct drm_crtc *crtc, int mode) atombios_enable_crtc_memreq(crtc, ATOM_ENABLE); atombios_blank_crtc(crtc, ATOM_DISABLE); drm_vblank_post_modeset(dev, radeon_crtc->crtc_id); + /* Make sure vblank interrupt is still enabled if needed */ + radeon_irq_set(rdev); radeon_crtc_load_lut(crtc); break; case DRM_MODE_DPMS_STANDBY: diff --git a/drivers/gpu/drm/radeon/radeon_legacy_crtc.c b/drivers/gpu/drm/radeon/radeon_legacy_crtc.c index 678b4386540d..89f22bdde298 100644 --- a/drivers/gpu/drm/radeon/radeon_legacy_crtc.c +++ b/drivers/gpu/drm/radeon/radeon_legacy_crtc.c @@ -331,6 +331,8 @@ static void radeon_crtc_dpms(struct drm_crtc *crtc, int mode) WREG32_P(RADEON_CRTC_EXT_CNTL, crtc_ext_cntl, ~(mask | crtc_ext_cntl)); } drm_vblank_post_modeset(dev, radeon_crtc->crtc_id); + /* Make sure vblank interrupt is still enabled if needed */ + radeon_irq_set(rdev); radeon_crtc_load_lut(crtc); break; case DRM_MODE_DPMS_STANDBY: From 4f13967ecd20e32ab9564231e872d57f79fb61ef Mon Sep 17 00:00:00 2001 From: Tomas Winkler Date: Wed, 20 Jul 2016 10:24:02 +0300 Subject: [PATCH 56/61] mei: me: disable driver on SPT SPS firmware commit 8c57cac1457f3125a5d13dc03635c0708c61bff0 upstream. Sunrise Point PCH with SPS Firmware doesn't expose working MEI interface, we need to quirk it out. The SPS Firmware is identifiable only on the first PCI function of the device. Tested-by: Sujith Pandel Signed-off-by: Tomas Winkler Signed-off-by: Greg Kroah-Hartman --- drivers/misc/mei/hw-me.c | 10 ++++++++-- drivers/misc/mei/pci-me.c | 4 ++-- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/drivers/misc/mei/hw-me.c b/drivers/misc/mei/hw-me.c index 25b1997a62cb..36333750c512 100644 --- a/drivers/misc/mei/hw-me.c +++ b/drivers/misc/mei/hw-me.c @@ -1258,8 +1258,14 @@ static bool mei_me_fw_type_nm(struct pci_dev *pdev) static bool mei_me_fw_type_sps(struct pci_dev *pdev) { u32 reg; - /* Read ME FW Status check for SPS Firmware */ - pci_read_config_dword(pdev, PCI_CFG_HFS_1, ®); + unsigned int devfn; + + /* + * Read ME FW Status register to check for SPS Firmware + * The SPS FW is only signaled in pci function 0 + */ + devfn = PCI_DEVFN(PCI_SLOT(pdev->devfn), 0); + pci_bus_read_config_dword(pdev->bus, devfn, PCI_CFG_HFS_1, ®); /* if bits [19:16] = 15, running SPS Firmware */ return (reg & 0xf0000) == 0xf0000; } diff --git a/drivers/misc/mei/pci-me.c b/drivers/misc/mei/pci-me.c index 0af3d7d30419..01e20384ac44 100644 --- a/drivers/misc/mei/pci-me.c +++ b/drivers/misc/mei/pci-me.c @@ -84,8 +84,8 @@ static const struct pci_device_id mei_me_pci_tbl[] = { {MEI_PCI_DEVICE(MEI_DEV_ID_SPT, mei_me_pch8_cfg)}, {MEI_PCI_DEVICE(MEI_DEV_ID_SPT_2, mei_me_pch8_cfg)}, - {MEI_PCI_DEVICE(MEI_DEV_ID_SPT_H, mei_me_pch8_cfg)}, - {MEI_PCI_DEVICE(MEI_DEV_ID_SPT_H_2, mei_me_pch8_cfg)}, + {MEI_PCI_DEVICE(MEI_DEV_ID_SPT_H, mei_me_pch8_sps_cfg)}, + {MEI_PCI_DEVICE(MEI_DEV_ID_SPT_H_2, mei_me_pch8_sps_cfg)}, {MEI_PCI_DEVICE(MEI_DEV_ID_KBP, mei_me_pch8_cfg)}, {MEI_PCI_DEVICE(MEI_DEV_ID_KBP_2, mei_me_pch8_cfg)}, From bab2f72f70eae417c871d9b24b3fa5dd0dea254b Mon Sep 17 00:00:00 2001 From: Tomas Winkler Date: Tue, 29 Nov 2016 14:44:45 +0200 Subject: [PATCH 57/61] mei: me: fix place for kaby point device ids. This is fix of the backported patch only, it places KBL DIDs on correct place to easy on backporting of further DIDs. Fixes: 5c99f32c461c ('mei: me: add kaby point device ids') Signed-off-by: Tomas Winkler Signed-off-by: Greg Kroah-Hartman --- drivers/misc/mei/hw-me-regs.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/misc/mei/hw-me-regs.h b/drivers/misc/mei/hw-me-regs.h index 4e8069866c85..a2661381ddfc 100644 --- a/drivers/misc/mei/hw-me-regs.h +++ b/drivers/misc/mei/hw-me-regs.h @@ -66,9 +66,6 @@ #ifndef _MEI_HW_MEI_REGS_H_ #define _MEI_HW_MEI_REGS_H_ -#define MEI_DEV_ID_KBP 0xA2BA /* Kaby Point */ -#define MEI_DEV_ID_KBP_2 0xA2BB /* Kaby Point 2 */ - /* * MEI device IDs */ @@ -124,6 +121,10 @@ #define MEI_DEV_ID_SPT_2 0x9D3B /* Sunrise Point 2 */ #define MEI_DEV_ID_SPT_H 0xA13A /* Sunrise Point H */ #define MEI_DEV_ID_SPT_H_2 0xA13B /* Sunrise Point H 2 */ + +#define MEI_DEV_ID_KBP 0xA2BA /* Kaby Point */ +#define MEI_DEV_ID_KBP_2 0xA2BB /* Kaby Point 2 */ + /* * MEI HW Section */ From 0b7860d6e88c35deb46e65d3f7d2ebfaa6b38219 Mon Sep 17 00:00:00 2001 From: Tomas Winkler Date: Thu, 23 Jun 2016 00:25:31 +0300 Subject: [PATCH 58/61] mei: fix return value on disconnection commit 2d4d5481e2d6f93b25fcfb13a9f20bbfbf54266a upstream. Correct errno on client disconnection is -ENODEV not -EBUSY Signed-off-by: Tomas Winkler Signed-off-by: Alexander Usyskin Signed-off-by: Greg Kroah-Hartman --- drivers/misc/mei/bus.c | 2 +- drivers/misc/mei/main.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/misc/mei/bus.c b/drivers/misc/mei/bus.c index a77643954523..e59838231703 100644 --- a/drivers/misc/mei/bus.c +++ b/drivers/misc/mei/bus.c @@ -144,7 +144,7 @@ ssize_t __mei_cl_recv(struct mei_cl *cl, u8 *buf, size_t length) mutex_lock(&bus->device_lock); if (!mei_cl_is_connected(cl)) { - rets = -EBUSY; + rets = -ENODEV; goto out; } } diff --git a/drivers/misc/mei/main.c b/drivers/misc/mei/main.c index 80f9afcb1382..4ef189a7a2fb 100644 --- a/drivers/misc/mei/main.c +++ b/drivers/misc/mei/main.c @@ -207,7 +207,7 @@ static ssize_t mei_read(struct file *file, char __user *ubuf, mutex_lock(&dev->device_lock); if (!mei_cl_is_connected(cl)) { - rets = -EBUSY; + rets = -ENODEV; goto out; } } From c178e4809df724ba9603b3263c2ac6375ad9c147 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 22 Nov 2016 11:17:30 -0800 Subject: [PATCH 59/61] flow_dissect: call init_default_flow_dissectors() earlier commit c9b8af1330198ae241cd545e1f040019010d44d9 upstream. Andre Noll reported panics after my recent fix (commit 34fad54c2537 "net: __skb_flow_dissect() must cap its return value") After some more headaches, Alexander root caused the problem to init_default_flow_dissectors() being called too late, in case a network driver like IGB is not a module and receives DHCP message very early. Fix is to call init_default_flow_dissectors() much earlier, as it is a core infrastructure and does not depend on another kernel service. Fixes: 06635a35d13d4 ("flow_dissect: use programable dissector in skb_flow_dissect and friends") Signed-off-by: Eric Dumazet Reported-by: Andre Noll Diagnosed-by: Alexander Duyck Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/core/flow_dissector.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c index 9aba9e93c0a2..ee9082792530 100644 --- a/net/core/flow_dissector.c +++ b/net/core/flow_dissector.c @@ -949,4 +949,4 @@ static int __init init_default_flow_dissectors(void) return 0; } -late_initcall_sync(init_default_flow_dissectors); +core_initcall(init_default_flow_dissectors); From 6eddf5c993dd9bf4efcf2509e4ca633b9441a66a Mon Sep 17 00:00:00 2001 From: Suganath Prabu S Date: Thu, 17 Nov 2016 16:15:58 +0530 Subject: [PATCH 60/61] scsi: mpt3sas: Unblock device after controller reset commit 7ff723ad0f87feba43dda45fdae71206063dd7d4 upstream. While issuing any ATA passthrough command to firmware the driver will block the device. But it will unblock the device only if the I/O completes through the ISR path. If a controller reset occurs before command completion the device will remain in blocked state. Make sure we unblock the device following a controller reset if an ATA passthrough command was queued. [mkp: clarified patch description] Fixes: ac6c2a93bd07 ("mpt3sas: Fix for SATA drive in blocked state, after diag reset") Signed-off-by: Suganath Prabu S Signed-off-by: Martin K. Petersen Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/mpt3sas/mpt3sas_scsih.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/drivers/scsi/mpt3sas/mpt3sas_scsih.c b/drivers/scsi/mpt3sas/mpt3sas_scsih.c index ac688cf752dc..8cead04f26d6 100644 --- a/drivers/scsi/mpt3sas/mpt3sas_scsih.c +++ b/drivers/scsi/mpt3sas/mpt3sas_scsih.c @@ -3706,6 +3706,11 @@ _scsih_temp_threshold_events(struct MPT3SAS_ADAPTER *ioc, } } +static inline bool ata_12_16_cmd(struct scsi_cmnd *scmd) +{ + return (scmd->cmnd[0] == ATA_12 || scmd->cmnd[0] == ATA_16); +} + /** * _scsih_flush_running_cmds - completing outstanding commands. * @ioc: per adapter object @@ -3727,6 +3732,9 @@ _scsih_flush_running_cmds(struct MPT3SAS_ADAPTER *ioc) if (!scmd) continue; count++; + if (ata_12_16_cmd(scmd)) + scsi_internal_device_unblock(scmd->device, + SDEV_RUNNING); mpt3sas_base_free_smid(ioc, smid); scsi_dma_unmap(scmd); if (ioc->pci_error_recovery) @@ -3831,11 +3839,6 @@ _scsih_eedp_error_handling(struct scsi_cmnd *scmd, u16 ioc_status) SAM_STAT_CHECK_CONDITION; } -static inline bool ata_12_16_cmd(struct scsi_cmnd *scmd) -{ - return (scmd->cmnd[0] == ATA_12 || scmd->cmnd[0] == ATA_16); -} - /** * scsih_qcmd - main scsi request entry point * @scmd: pointer to scsi command object From 87c6c6ef5b17bfed9226f1307435910626817ebb Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Fri, 2 Dec 2016 09:09:18 +0100 Subject: [PATCH 61/61] Linux 4.4.36 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index f88830af1533..705eb9e38fce 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,6 @@ VERSION = 4 PATCHLEVEL = 4 -SUBLEVEL = 35 +SUBLEVEL = 36 EXTRAVERSION = NAME = Blurry Fish Butt