diff --git a/Documentation/cgroups/00-INDEX b/Documentation/cgroup-legacy/00-INDEX
similarity index 100%
rename from Documentation/cgroups/00-INDEX
rename to Documentation/cgroup-legacy/00-INDEX
diff --git a/Documentation/cgroups/blkio-controller.txt b/Documentation/cgroup-legacy/blkio-controller.txt
similarity index 81%
rename from Documentation/cgroups/blkio-controller.txt
rename to Documentation/cgroup-legacy/blkio-controller.txt
index 52fa9f353342..4ecc954a3063 100644
--- a/Documentation/cgroups/blkio-controller.txt
+++ b/Documentation/cgroup-legacy/blkio-controller.txt
@@ -374,82 +374,3 @@ One can experience an overall throughput drop if you have created multiple
 groups and put applications in that group which are not driving enough
 IO to keep disk busy. In that case set group_idle=0, and CFQ will not idle
 on individual groups and throughput should improve.
-
-Writeback
-=========
-
-Page cache is dirtied through buffered writes and shared mmaps and
-written asynchronously to the backing filesystem by the writeback
-mechanism.  Writeback sits between the memory and IO domains and
-regulates the proportion of dirty memory by balancing dirtying and
-write IOs.
-
-On traditional cgroup hierarchies, relationships between different
-controllers cannot be established making it impossible for writeback
-to operate accounting for cgroup resource restrictions and all
-writeback IOs are attributed to the root cgroup.
-
-If both the blkio and memory controllers are used on the v2 hierarchy
-and the filesystem supports cgroup writeback, writeback operations
-correctly follow the resource restrictions imposed by both memory and
-blkio controllers.
-
-Writeback examines both system-wide and per-cgroup dirty memory status
-and enforces the more restrictive of the two.  Also, writeback control
-parameters which are absolute values - vm.dirty_bytes and
-vm.dirty_background_bytes - are distributed across cgroups according
-to their current writeback bandwidth.
-
-There's a peculiarity stemming from the discrepancy in ownership
-granularity between memory controller and writeback.  While memory
-controller tracks ownership per page, writeback operates on inode
-basis.  cgroup writeback bridges the gap by tracking ownership by
-inode but migrating ownership if too many foreign pages, pages which
-don't match the current inode ownership, have been encountered while
-writing back the inode.
-
-This is a conscious design choice as writeback operations are
-inherently tied to inodes making strictly following page ownership
-complicated and inefficient.  The only use case which suffers from
-this compromise is multiple cgroups concurrently dirtying disjoint
-regions of the same inode, which is an unlikely use case and decided
-to be unsupported.  Note that as memory controller assigns page
-ownership on the first use and doesn't update it until the page is
-released, even if cgroup writeback strictly follows page ownership,
-multiple cgroups dirtying overlapping areas wouldn't work as expected.
-In general, write-sharing an inode across multiple cgroups is not well
-supported.
-
-Filesystem support for cgroup writeback
----------------------------------------
-
-A filesystem can make writeback IOs cgroup-aware by updating
-address_space_operations->writepage[s]() to annotate bio's using the
-following two functions.
-
-* wbc_init_bio(@wbc, @bio)
-
-  Should be called for each bio carrying writeback data and associates
-  the bio with the inode's owner cgroup.  Can be called anytime
-  between bio allocation and submission.
-
-* wbc_account_io(@wbc, @page, @bytes)
-
-  Should be called for each data segment being written out.  While
-  this function doesn't care exactly when it's called during the
-  writeback session, it's the easiest and most natural to call it as
-  data segments are added to a bio.
-
-With writeback bio's annotated, cgroup support can be enabled per
-super_block by setting MS_CGROUPWB in ->s_flags.  This allows for
-selective disabling of cgroup writeback support which is helpful when
-certain filesystem features, e.g. journaled data mode, are
-incompatible.
-
-wbc_init_bio() binds the specified bio to its cgroup.  Depending on
-the configuration, the bio may be executed at a lower priority and if
-the writeback session is holding shared resources, e.g. a journal
-entry, may lead to priority inversion.  There is no one easy solution
-for the problem.  Filesystems can try to work around specific problem
-cases by skipping wbc_init_bio() or using bio_associate_blkcg()
-directly.
diff --git a/Documentation/cgroups/cgroups.txt b/Documentation/cgroup-legacy/cgroups.txt
similarity index 100%
rename from Documentation/cgroups/cgroups.txt
rename to Documentation/cgroup-legacy/cgroups.txt
diff --git a/Documentation/cgroups/cpuacct.txt b/Documentation/cgroup-legacy/cpuacct.txt
similarity index 100%
rename from Documentation/cgroups/cpuacct.txt
rename to Documentation/cgroup-legacy/cpuacct.txt
diff --git a/Documentation/cgroups/cpusets.txt b/Documentation/cgroup-legacy/cpusets.txt
similarity index 100%
rename from Documentation/cgroups/cpusets.txt
rename to Documentation/cgroup-legacy/cpusets.txt
diff --git a/Documentation/cgroups/devices.txt b/Documentation/cgroup-legacy/devices.txt
similarity index 100%
rename from Documentation/cgroups/devices.txt
rename to Documentation/cgroup-legacy/devices.txt
diff --git a/Documentation/cgroups/freezer-subsystem.txt b/Documentation/cgroup-legacy/freezer-subsystem.txt
similarity index 100%
rename from Documentation/cgroups/freezer-subsystem.txt
rename to Documentation/cgroup-legacy/freezer-subsystem.txt
diff --git a/Documentation/cgroups/hugetlb.txt b/Documentation/cgroup-legacy/hugetlb.txt
similarity index 100%
rename from Documentation/cgroups/hugetlb.txt
rename to Documentation/cgroup-legacy/hugetlb.txt
diff --git a/Documentation/cgroups/memcg_test.txt b/Documentation/cgroup-legacy/memcg_test.txt
similarity index 100%
rename from Documentation/cgroups/memcg_test.txt
rename to Documentation/cgroup-legacy/memcg_test.txt
diff --git a/Documentation/cgroups/memory.txt b/Documentation/cgroup-legacy/memory.txt
similarity index 100%
rename from Documentation/cgroups/memory.txt
rename to Documentation/cgroup-legacy/memory.txt
diff --git a/Documentation/cgroups/net_cls.txt b/Documentation/cgroup-legacy/net_cls.txt
similarity index 100%
rename from Documentation/cgroups/net_cls.txt
rename to Documentation/cgroup-legacy/net_cls.txt
diff --git a/Documentation/cgroups/net_prio.txt b/Documentation/cgroup-legacy/net_prio.txt
similarity index 100%
rename from Documentation/cgroups/net_prio.txt
rename to Documentation/cgroup-legacy/net_prio.txt
diff --git a/Documentation/cgroups/pids.txt b/Documentation/cgroup-legacy/pids.txt
similarity index 100%
rename from Documentation/cgroups/pids.txt
rename to Documentation/cgroup-legacy/pids.txt
diff --git a/Documentation/cgroup.txt b/Documentation/cgroup.txt
new file mode 100644
index 000000000000..31d1f7bf12a1
--- /dev/null
+++ b/Documentation/cgroup.txt
@@ -0,0 +1,1293 @@
+
+Control Group v2
+
+October, 2015		Tejun Heo <tj@kernel.org>
+
+This is the authoritative documentation on the design, interface and
+conventions of cgroup v2.  It describes all userland-visible aspects
+of cgroup including core and specific controller behaviors.  All
+future changes must be reflected in this document.  Documentation for
+v1 is available under Documentation/cgroup-legacy/.
+
+CONTENTS
+
+1. Introduction
+  1-1. Terminology
+  1-2. What is cgroup?
+2. Basic Operations
+  2-1. Mounting
+  2-2. Organizing Processes
+  2-3. [Un]populated Notification
+  2-4. Controlling Controllers
+    2-4-1. Enabling and Disabling
+    2-4-2. Top-down Constraint
+    2-4-3. No Internal Process Constraint
+  2-5. Delegation
+    2-5-1. Model of Delegation
+    2-5-2. Delegation Containment
+  2-6. Guidelines
+    2-6-1. Organize Once and Control
+    2-6-2. Avoid Name Collisions
+3. Resource Distribution Models
+  3-1. Weights
+  3-2. Limits
+  3-3. Protections
+  3-4. Allocations
+4. Interface Files
+  4-1. Format
+  4-2. Conventions
+  4-3. Core Interface Files
+5. Controllers
+  5-1. CPU
+    5-1-1. CPU Interface Files
+  5-2. Memory
+    5-2-1. Memory Interface Files
+    5-2-2. Usage Guidelines
+    5-2-3. Memory Ownership
+  5-3. IO
+    5-3-1. IO Interface Files
+    5-3-2. Writeback
+P. Information on Kernel Programming
+  P-1. Filesystem Support for Writeback
+D. Deprecated v1 Core Features
+R. Issues with v1 and Rationales for v2
+  R-1. Multiple Hierarchies
+  R-2. Thread Granularity
+  R-3. Competition Between Inner Nodes and Threads
+  R-4. Other Interface Issues
+  R-5. Controller Issues and Remedies
+    R-5-1. Memory
+
+
+1. Introduction
+
+1-1. Terminology
+
+"cgroup" stands for "control group" and is never capitalized.  The
+singular form is used to designate the whole feature and also as a
+qualifier as in "cgroup controllers".  When explicitly referring to
+multiple individual control groups, the plural form "cgroups" is used.
+
+
+1-2. What is cgroup?
+
+cgroup is a mechanism to organize processes hierarchically and
+distribute system resources along the hierarchy in a controlled and
+configurable manner.
+
+cgroup is largely composed of two parts - the core and controllers.
+cgroup core is primarily responsible for hierarchically organizing
+processes.  A cgroup controller is usually responsible for
+distributing a specific type of system resource along the hierarchy
+although there are utility controllers which serve purposes other than
+resource distribution.
+
+cgroups form a tree structure and every process in the system belongs
+to one and only one cgroup.  All threads of a process belong to the
+same cgroup.  On creation, all processes are put in the cgroup that
+the parent process belongs to at the time.  A process can be migrated
+to another cgroup.  Migration of a process doesn't affect already
+existing descendant processes.
+
+Following certain structural constraints, controllers may be enabled or
+disabled selectively on a cgroup.  All controller behaviors are
+hierarchical - if a controller is enabled on a cgroup, it affects all
+processes which belong to the cgroups consisting the inclusive
+sub-hierarchy of the cgroup.  When a controller is enabled on a nested
+cgroup, it always restricts the resource distribution further.  The
+restrictions set closer to the root in the hierarchy can not be
+overridden from further away.
+
+
+2. Basic Operations
+
+2-1. Mounting
+
+Unlike v1, cgroup v2 has only single hierarchy.  The cgroup v2
+hierarchy can be mounted with the following mount command.
+
+  # mount -t cgroup2 none $MOUNT_POINT
+
+cgroup2 filesystem has the magic number 0x63677270 ("cgrp").  All
+controllers which support v2 and are not bound to a v1 hierarchy are
+automatically bound to the v2 hierarchy and show up at the root.
+Controllers which are not in active use in the v2 hierarchy can be
+bound to other hierarchies.  This allows mixing v2 hierarchy with the
+legacy v1 multiple hierarchies in a fully backward compatible way.
+
+A controller can be moved across hierarchies only after the controller
+is no longer referenced in its current hierarchy.  Because per-cgroup
+controller states are destroyed asynchronously and controllers may
+have lingering references, a controller may not show up immediately on
+the v2 hierarchy after the final umount of the previous hierarchy.
+Similarly, a controller should be fully disabled to be moved out of
+the unified hierarchy and it may take some time for the disabled
+controller to become available for other hierarchies; furthermore, due
+to inter-controller dependencies, other controllers may need to be
+disabled too.
+
+While useful for development and manual configurations, moving
+controllers dynamically between the v2 and other hierarchies is
+strongly discouraged for production use.  It is recommended to decide
+the hierarchies and controller associations before starting using the
+controllers after system boot.
+
+
+2-2. Organizing Processes
+
+Initially, only the root cgroup exists to which all processes belong.
+A child cgroup can be created by creating a sub-directory.
+
+  # mkdir $CGROUP_NAME
+
+A given cgroup may have multiple child cgroups forming a tree
+structure.  Each cgroup has a read-writable interface file
+"cgroup.procs".  When read, it lists the PIDs of all processes which
+belong to the cgroup one-per-line.  The PIDs are not ordered and the
+same PID may show up more than once if the process got moved to
+another cgroup and then back or the PID got recycled while reading.
+
+A process can be migrated into a cgroup by writing its PID to the
+target cgroup's "cgroup.procs" file.  Only one process can be migrated
+on a single write(2) call.  If a process is composed of multiple
+threads, writing the PID of any thread migrates all threads of the
+process.
+
+When a process forks a child process, the new process is born into the
+cgroup that the forking process belongs to at the time of the
+operation.  After exit, a process stays associated with the cgroup
+that it belonged to at the time of exit until it's reaped; however, a
+zombie process does not appear in "cgroup.procs" and thus can't be
+moved to another cgroup.
+
+A cgroup which doesn't have any children or live processes can be
+destroyed by removing the directory.  Note that a cgroup which doesn't
+have any children and is associated only with zombie processes is
+considered empty and can be removed.
+
+  # rmdir $CGROUP_NAME
+
+"/proc/$PID/cgroup" lists a process's cgroup membership.  If legacy
+cgroup is in use in the system, this file may contain multiple lines,
+one for each hierarchy.  The entry for cgroup v2 is always in the
+format "0::$PATH".
+
+  # cat /proc/842/cgroup
+  ...
+  0::/test-cgroup/test-cgroup-nested
+
+If the process becomes a zombie and the cgroup it was associated with
+is removed subsequently, " (deleted)" is appended to the path.
+
+  # cat /proc/842/cgroup
+  ...
+  0::/test-cgroup/test-cgroup-nested (deleted)
+
+
+2-3. [Un]populated Notification
+
+Each non-root cgroup has a "cgroup.events" file which contains
+"populated" field indicating whether the cgroup's sub-hierarchy has
+live processes in it.  Its value is 0 if there is no live process in
+the cgroup and its descendants; otherwise, 1.  poll and [id]notify
+events are triggered when the value changes.  This can be used, for
+example, to start a clean-up operation after all processes of a given
+sub-hierarchy have exited.  The populated state updates and
+notifications are recursive.  Consider the following sub-hierarchy
+where the numbers in the parentheses represent the numbers of processes
+in each cgroup.
+
+  A(4) - B(0) - C(1)
+              \ D(0)
+
+A, B and C's "populated" fields would be 1 while D's 0.  After the one
+process in C exits, B and C's "populated" fields would flip to "0" and
+file modified events will be generated on the "cgroup.events" files of
+both cgroups.
+
+
+2-4. Controlling Controllers
+
+2-4-1. Enabling and Disabling
+
+Each cgroup has a "cgroup.controllers" file which lists all
+controllers available for the cgroup to enable.
+
+  # cat cgroup.controllers
+  cpu io memory
+
+No controller is enabled by default.  Controllers can be enabled and
+disabled by writing to the "cgroup.subtree_control" file.
+
+  # echo "+cpu +memory -io" > cgroup.subtree_control
+
+Only controllers which are listed in "cgroup.controllers" can be
+enabled.  When multiple operations are specified as above, either they
+all succeed or fail.  If multiple operations on the same controller
+are specified, the last one is effective.
+
+Enabling a controller in a cgroup indicates that the distribution of
+the target resource across its immediate children will be controlled.
+Consider the following sub-hierarchy.  The enabled controllers are
+listed in parentheses.
+
+  A(cpu,memory) - B(memory) - C()
+                            \ D()
+
+As A has "cpu" and "memory" enabled, A will control the distribution
+of CPU cycles and memory to its children, in this case, B.  As B has
+"memory" enabled but not "CPU", C and D will compete freely on CPU
+cycles but their division of memory available to B will be controlled.
+
+As a controller regulates the distribution of the target resource to
+the cgroup's children, enabling it creates the controller's interface
+files in the child cgroups.  In the above example, enabling "cpu" on B
+would create the "cpu." prefixed controller interface files in C and
+D.  Likewise, disabling "memory" from B would remove the "memory."
+prefixed controller interface files from C and D.  This means that the
+controller interface files - anything which doesn't start with
+"cgroup." are owned by the parent rather than the cgroup itself.
+
+
+2-4-2. Top-down Constraint
+
+Resources are distributed top-down and a cgroup can further distribute
+a resource only if the resource has been distributed to it from the
+parent.  This means that all non-root "cgroup.subtree_control" files
+can only contain controllers which are enabled in the parent's
+"cgroup.subtree_control" file.  A controller can be enabled only if
+the parent has the controller enabled and a controller can't be
+disabled if one or more children have it enabled.
+
+
+2-4-3. No Internal Process Constraint
+
+Non-root cgroups can only distribute resources to their children when
+they don't have any processes of their own.  In other words, only
+cgroups which don't contain any processes can have controllers enabled
+in their "cgroup.subtree_control" files.
+
+This guarantees that, when a controller is looking at the part of the
+hierarchy which has it enabled, processes are always only on the
+leaves.  This rules out situations where child cgroups compete against
+internal processes of the parent.
+
+The root cgroup is exempt from this restriction.  Root contains
+processes and anonymous resource consumption which can't be associated
+with any other cgroups and requires special treatment from most
+controllers.  How resource consumption in the root cgroup is governed
+is up to each controller.
+
+Note that the restriction doesn't get in the way if there is no
+enabled controller in the cgroup's "cgroup.subtree_control".  This is
+important as otherwise it wouldn't be possible to create children of a
+populated cgroup.  To control resource distribution of a cgroup, the
+cgroup must create children and transfer all its processes to the
+children before enabling controllers in its "cgroup.subtree_control"
+file.
+
+
+2-5. Delegation
+
+2-5-1. Model of Delegation
+
+A cgroup can be delegated to a less privileged user by granting write
+access of the directory and its "cgroup.procs" file to the user.  Note
+that resource control interface files in a given directory control the
+distribution of the parent's resources and thus must not be delegated
+along with the directory.
+
+Once delegated, the user can build sub-hierarchy under the directory,
+organize processes as it sees fit and further distribute the resources
+it received from the parent.  The limits and other settings of all
+resource controllers are hierarchical and regardless of what happens
+in the delegated sub-hierarchy, nothing can escape the resource
+restrictions imposed by the parent.
+
+Currently, cgroup doesn't impose any restrictions on the number of
+cgroups in or nesting depth of a delegated sub-hierarchy; however,
+this may be limited explicitly in the future.
+
+
+2-5-2. Delegation Containment
+
+A delegated sub-hierarchy is contained in the sense that processes
+can't be moved into or out of the sub-hierarchy by the delegatee.  For
+a process with a non-root euid to migrate a target process into a
+cgroup by writing its PID to the "cgroup.procs" file, the following
+conditions must be met.
+
+- The writer's euid must match either uid or suid of the target process.
+
+- The writer must have write access to the "cgroup.procs" file.
+
+- The writer must have write access to the "cgroup.procs" file of the
+  common ancestor of the source and destination cgroups.
+
+The above three constraints ensure that while a delegatee may migrate
+processes around freely in the delegated sub-hierarchy it can't pull
+in from or push out to outside the sub-hierarchy.
+
+For an example, let's assume cgroups C0 and C1 have been delegated to
+user U0 who created C00, C01 under C0 and C10 under C1 as follows and
+all processes under C0 and C1 belong to U0.
+
+  ~~~~~~~~~~~~~ - C0 - C00
+  ~ cgroup    ~      \ C01
+  ~ hierarchy ~
+  ~~~~~~~~~~~~~ - C1 - C10
+
+Let's also say U0 wants to write the PID of a process which is
+currently in C10 into "C00/cgroup.procs".  U0 has write access to the
+file and uid match on the process; however, the common ancestor of the
+source cgroup C10 and the destination cgroup C00 is above the points
+of delegation and U0 would not have write access to its "cgroup.procs"
+files and thus the write will be denied with -EACCES.
+
+
+2-6. Guidelines
+
+2-6-1. Organize Once and Control
+
+Migrating a process across cgroups is a relatively expensive operation
+and stateful resources such as memory are not moved together with the
+process.  This is an explicit design decision as there often exist
+inherent trade-offs between migration and various hot paths in terms
+of synchronization cost.
+
+As such, migrating processes across cgroups frequently as a means to
+apply different resource restrictions is discouraged.  A workload
+should be assigned to a cgroup according to the system's logical and
+resource structure once on start-up.  Dynamic adjustments to resource
+distribution can be made by changing controller configuration through
+the interface files.
+
+
+2-6-2. Avoid Name Collisions
+
+Interface files for a cgroup and its children cgroups occupy the same
+directory and it is possible to create children cgroups which collide
+with interface files.
+
+All cgroup core interface files are prefixed with "cgroup." and each
+controller's interface files are prefixed with the controller name and
+a dot.  A controller's name is composed of lower case alphabets and
+'_'s but never begins with an '_' so it can be used as the prefix
+character for collision avoidance.  Also, interface file names won't
+start or end with terms which are often used in categorizing workloads
+such as job, service, slice, unit or workload.
+
+cgroup doesn't do anything to prevent name collisions and it's the
+user's responsibility to avoid them.
+
+
+3. Resource Distribution Models
+
+cgroup controllers implement several resource distribution schemes
+depending on the resource type and expected use cases.  This section
+describes major schemes in use along with their expected behaviors.
+
+
+3-1. Weights
+
+A parent's resource is distributed by adding up the weights of all
+active children and giving each the fraction matching the ratio of its
+weight against the sum.  As only children which can make use of the
+resource at the moment participate in the distribution, this is
+work-conserving.  Due to the dynamic nature, this model is usually
+used for stateless resources.
+
+All weights are in the range [1, 10000] with the default at 100.  This
+allows symmetric multiplicative biases in both directions at fine
+enough granularity while staying in the intuitive range.
+
+As long as the weight is in range, all configuration combinations are
+valid and there is no reason to reject configuration changes or
+process migrations.
+
+"cpu.weight" proportionally distributes CPU cycles to active children
+and is an example of this type.
+
+
+3-2. Limits
+
+A child can only consume upto the configured amount of the resource.
+Limits can be over-committed - the sum of the limits of children can
+exceed the amount of resource available to the parent.
+
+Limits are in the range [0, max] and defaults to "max", which is noop.
+
+As limits can be over-committed, all configuration combinations are
+valid and there is no reason to reject configuration changes or
+process migrations.
+
+"io.max" limits the maximum BPS and/or IOPS that a cgroup can consume
+on an IO device and is an example of this type.
+
+
+3-3. Protections
+
+A cgroup is protected to be allocated upto the configured amount of
+the resource if the usages of all its ancestors are under their
+protected levels.  Protections can be hard guarantees or best effort
+soft boundaries.  Protections can also be over-committed in which case
+only upto the amount available to the parent is protected among
+children.
+
+Protections are in the range [0, max] and defaults to 0, which is
+noop.
+
+As protections can be over-committed, all configuration combinations
+are valid and there is no reason to reject configuration changes or
+process migrations.
+
+"memory.low" implements best-effort memory protection and is an
+example of this type.
+
+
+3-4. Allocations
+
+A cgroup is exclusively allocated a certain amount of a finite
+resource.  Allocations can't be over-committed - the sum of the
+allocations of children can not exceed the amount of resource
+available to the parent.
+
+Allocations are in the range [0, max] and defaults to 0, which is no
+resource.
+
+As allocations can't be over-committed, some configuration
+combinations are invalid and should be rejected.  Also, if the
+resource is mandatory for execution of processes, process migrations
+may be rejected.
+
+"cpu.rt.max" hard-allocates realtime slices and is an example of this
+type.
+
+
+4. Interface Files
+
+4-1. Format
+
+All interface files should be in one of the following formats whenever
+possible.
+
+  New-line separated values
+  (when only one value can be written at once)
+
+	VAL0\n
+	VAL1\n
+	...
+
+  Space separated values
+  (when read-only or multiple values can be written at once)
+
+	VAL0 VAL1 ...\n
+
+  Flat keyed
+
+	KEY0 VAL0\n
+	KEY1 VAL1\n
+	...
+
+  Nested keyed
+
+	KEY0 SUB_KEY0=VAL00 SUB_KEY1=VAL01...
+	KEY1 SUB_KEY0=VAL10 SUB_KEY1=VAL11...
+	...
+
+For a writable file, the format for writing should generally match
+reading; however, controllers may allow omitting later fields or
+implement restricted shortcuts for most common use cases.
+
+For both flat and nested keyed files, only the values for a single key
+can be written at a time.  For nested keyed files, the sub key pairs
+may be specified in any order and not all pairs have to be specified.
+
+
+4-2. Conventions
+
+- Settings for a single feature should be contained in a single file.
+
+- The root cgroup should be exempt from resource control and thus
+  shouldn't have resource control interface files.  Also,
+  informational files on the root cgroup which end up showing global
+  information available elsewhere shouldn't exist.
+
+- If a controller implements weight based resource distribution, its
+  interface file should be named "weight" and have the range [1,
+  10000] with 100 as the default.  The values are chosen to allow
+  enough and symmetric bias in both directions while keeping it
+  intuitive (the default is 100%).
+
+- If a controller implements an absolute resource guarantee and/or
+  limit, the interface files should be named "min" and "max"
+  respectively.  If a controller implements best effort resource
+  guarantee and/or limit, the interface files should be named "low"
+  and "high" respectively.
+
+  In the above four control files, the special token "max" should be
+  used to represent upward infinity for both reading and writing.
+
+- If a setting has a configurable default value and keyed specific
+  overrides, the default entry should be keyed with "default" and
+  appear as the first entry in the file.
+
+  The default value can be updated by writing either "default $VAL" or
+  "$VAL".
+
+  When writing to update a specific override, "default" can be used as
+  the value to indicate removal of the override.  Override entries
+  with "default" as the value must not appear when read.
+
+  For example, a setting which is keyed by major:minor device numbers
+  with integer values may look like the following.
+
+    # cat cgroup-example-interface-file
+    default 150
+    8:0 300
+
+  The default value can be updated by
+
+    # echo 125 > cgroup-example-interface-file
+
+  or
+
+    # echo "default 125" > cgroup-example-interface-file
+
+  An override can be set by
+
+    # echo "8:16 170" > cgroup-example-interface-file
+
+  and cleared by
+
+    # echo "8:0 default" > cgroup-example-interface-file
+    # cat cgroup-example-interface-file
+    default 125
+    8:16 170
+
+- For events which are not very high frequency, an interface file
+  "events" should be created which lists event key value pairs.
+  Whenever a notifiable event happens, file modified event should be
+  generated on the file.
+
+
+4-3. Core Interface Files
+
+All cgroup core files are prefixed with "cgroup."
+
+  cgroup.procs
+
+	A read-write new-line separated values file which exists on
+	all cgroups.
+
+	When read, it lists the PIDs of all processes which belong to
+	the cgroup one-per-line.  The PIDs are not ordered and the
+	same PID may show up more than once if the process got moved
+	to another cgroup and then back or the PID got recycled while
+	reading.
+
+	A PID can be written to migrate the process associated with
+	the PID to the cgroup.  The writer should match all of the
+	following conditions.
+
+	- Its euid is either root or must match either uid or suid of
+          the target process.
+
+	- It must have write access to the "cgroup.procs" file.
+
+	- It must have write access to the "cgroup.procs" file of the
+	  common ancestor of the source and destination cgroups.
+
+	When delegating a sub-hierarchy, write access to this file
+	should be granted along with the containing directory.
+
+  cgroup.controllers
+
+	A read-only space separated values file which exists on all
+	cgroups.
+
+	It shows space separated list of all controllers available to
+	the cgroup.  The controllers are not ordered.
+
+  cgroup.subtree_control
+
+	A read-write space separated values file which exists on all
+	cgroups.  Starts out empty.
+
+	When read, it shows space separated list of the controllers
+	which are enabled to control resource distribution from the
+	cgroup to its children.
+
+	Space separated list of controllers prefixed with '+' or '-'
+	can be written to enable or disable controllers.  A controller
+	name prefixed with '+' enables the controller and '-'
+	disables.  If a controller appears more than once on the list,
+	the last one is effective.  When multiple enable and disable
+	operations are specified, either all succeed or all fail.
+
+  cgroup.events
+
+	A read-only flat-keyed file which exists on non-root cgroups.
+	The following entries are defined.  Unless specified
+	otherwise, a value change in this file generates a file
+	modified event.
+
+	  populated
+
+		1 if the cgroup or its descendants contains any live
+		processes; otherwise, 0.
+
+
+5. Controllers
+
+5-1. CPU
+
+[NOTE: The interface for the cpu controller hasn't been merged yet]
+
+The "cpu" controllers regulates distribution of CPU cycles.  This
+controller implements weight and absolute bandwidth limit models for
+normal scheduling policy and absolute bandwidth allocation model for
+realtime scheduling policy.
+
+
+5-1-1. CPU Interface Files
+
+All time durations are in microseconds.
+
+  cpu.stat
+
+	A read-only flat-keyed file which exists on non-root cgroups.
+
+	It reports the following six stats.
+
+	  usage_usec
+	  user_usec
+	  system_usec
+	  nr_periods
+	  nr_throttled
+	  throttled_usec
+
+  cpu.weight
+
+	A read-write single value file which exists on non-root
+	cgroups.  The default is "100".
+
+	The weight in the range [1, 10000].
+
+  cpu.max
+
+	A read-write two value file which exists on non-root cgroups.
+	The default is "max 100000".
+
+	The maximum bandwidth limit.  It's in the following format.
+
+	  $MAX $PERIOD
+
+	which indicates that the group may consume upto $MAX in each
+	$PERIOD duration.  "max" for $MAX indicates no limit.  If only
+	one number is written, $MAX is updated.
+
+  cpu.rt.max
+
+  [NOTE: The semantics of this file is still under discussion and the
+   interface hasn't been merged yet]
+
+	A read-write two value file which exists on all cgroups.
+	The default is "0 100000".
+
+	The maximum realtime runtime allocation.  Over-committing
+	configurations are disallowed and process migrations are
+	rejected if not enough bandwidth is available.  It's in the
+	following format.
+
+	  $MAX $PERIOD
+
+	which indicates that the group may consume upto $MAX in each
+	$PERIOD duration.  If only one number is written, $MAX is
+	updated.
+
+
+5-2. Memory
+
+The "memory" controller regulates distribution of memory.  Memory is
+stateful and implements both limit and protection models.  Due to the
+intertwining between memory usage and reclaim pressure and the
+stateful nature of memory, the distribution model is relatively
+complex.
+
+While not completely water-tight, all major memory usages by a given
+cgroup are tracked so that the total memory consumption can be
+accounted and controlled to a reasonable extent.  Currently, the
+following types of memory usages are tracked.
+
+- Userland memory - page cache and anonymous memory.
+
+- Kernel data structures such as dentries and inodes.
+
+- TCP socket buffers.
+
+The above list may expand in the future for better coverage.
+
+
+5-2-1. Memory Interface Files
+
+All memory amounts are in bytes.  If a value which is not aligned to
+PAGE_SIZE is written, the value may be rounded up to the closest
+PAGE_SIZE multiple when read back.
+
+  memory.current
+
+	A read-only single value file which exists on non-root
+	cgroups.
+
+	The total amount of memory currently being used by the cgroup
+	and its descendants.
+
+  memory.low
+
+	A read-write single value file which exists on non-root
+	cgroups.  The default is "0".
+
+	Best-effort memory protection.  If the memory usages of a
+	cgroup and all its ancestors are below their low boundaries,
+	the cgroup's memory won't be reclaimed unless memory can be
+	reclaimed from unprotected cgroups.
+
+	Putting more memory than generally available under this
+	protection is discouraged.
+
+  memory.high
+
+	A read-write single value file which exists on non-root
+	cgroups.  The default is "max".
+
+	Memory usage throttle limit.  This is the main mechanism to
+	control memory usage of a cgroup.  If a cgroup's usage goes
+	over the high boundary, the processes of the cgroup are
+	throttled and put under heavy reclaim pressure.
+
+	Going over the high limit never invokes the OOM killer and
+	under extreme conditions the limit may be breached.
+
+  memory.max
+
+	A read-write single value file which exists on non-root
+	cgroups.  The default is "max".
+
+	Memory usage hard limit.  This is the final protection
+	mechanism.  If a cgroup's memory usage reaches this limit and
+	can't be reduced, the OOM killer is invoked in the cgroup.
+	Under certain circumstances, the usage may go over the limit
+	temporarily.
+
+	This is the ultimate protection mechanism.  As long as the
+	high limit is used and monitored properly, this limit's
+	utility is limited to providing the final safety net.
+
+  memory.events
+
+	A read-only flat-keyed file which exists on non-root cgroups.
+	The following entries are defined.  Unless specified
+	otherwise, a value change in this file generates a file
+	modified event.
+
+	  low
+
+		The number of times the cgroup is reclaimed due to
+		high memory pressure even though its usage is under
+		the low boundary.  This usually indicates that the low
+		boundary is over-committed.
+
+	  high
+
+		The number of times processes of the cgroup are
+		throttled and routed to perform direct memory reclaim
+		because the high memory boundary was exceeded.  For a
+		cgroup whose memory usage is capped by the high limit
+		rather than global memory pressure, this event's
+		occurrences are expected.
+
+	  max
+
+		The number of times the cgroup's memory usage was
+		about to go over the max boundary.  If direct reclaim
+		fails to bring it down, the OOM killer is invoked.
+
+	  oom
+
+		The number of times the OOM killer has been invoked in
+		the cgroup.  This may not exactly match the number of
+		processes killed but should generally be close.
+
+
+5-2-2. General Usage
+
+"memory.high" is the main mechanism to control memory usage.
+Over-committing on high limit (sum of high limits > available memory)
+and letting global memory pressure to distribute memory according to
+usage is a viable strategy.
+
+Because breach of the high limit doesn't trigger the OOM killer but
+throttles the offending cgroup, a management agent has ample
+opportunities to monitor and take appropriate actions such as granting
+more memory or terminating the workload.
+
+Determining whether a cgroup has enough memory is not trivial as
+memory usage doesn't indicate whether the workload can benefit from
+more memory.  For example, a workload which writes data received from
+network to a file can use all available memory but can also operate as
+performant with a small amount of memory.  A measure of memory
+pressure - how much the workload is being impacted due to lack of
+memory - is necessary to determine whether a workload needs more
+memory; unfortunately, memory pressure monitoring mechanism isn't
+implemented yet.
+
+
+5-2-3. Memory Ownership
+
+A memory area is charged to the cgroup which instantiated it and stays
+charged to the cgroup until the area is released.  Migrating a process
+to a different cgroup doesn't move the memory usages that it
+instantiated while in the previous cgroup to the new cgroup.
+
+A memory area may be used by processes belonging to different cgroups.
+To which cgroup the area will be charged is in-deterministic; however,
+over time, the memory area is likely to end up in a cgroup which has
+enough memory allowance to avoid high reclaim pressure.
+
+If a cgroup sweeps a considerable amount of memory which is expected
+to be accessed repeatedly by other cgroups, it may make sense to use
+POSIX_FADV_DONTNEED to relinquish the ownership of memory areas
+belonging to the affected files to ensure correct memory ownership.
+
+
+5-3. IO
+
+The "io" controller regulates the distribution of IO resources.  This
+controller implements both weight based and absolute bandwidth or IOPS
+limit distribution; however, weight based distribution is available
+only if cfq-iosched is in use and neither scheme is available for
+blk-mq devices.
+
+
+5-3-1. IO Interface Files
+
+  io.stat
+
+	A read-only nested-keyed file which exists on non-root
+	cgroups.
+
+	Lines are keyed by $MAJ:$MIN device numbers and not ordered.
+	The following nested keys are defined.
+
+	  rbytes	Bytes read
+	  wbytes	Bytes written
+	  rios		Number of read IOs
+	  wios		Number of write IOs
+
+	An example read output follows.
+
+	  8:16 rbytes=1459200 wbytes=314773504 rios=192 wios=353
+	  8:0 rbytes=90430464 wbytes=299008000 rios=8950 wios=1252
+
+  io.weight
+
+	A read-write flat-keyed file which exists on non-root cgroups.
+	The default is "default 100".
+
+	The first line is the default weight applied to devices
+	without specific override.  The rest are overrides keyed by
+	$MAJ:$MIN device numbers and not ordered.  The weights are in
+	the range [1, 10000] and specifies the relative amount IO time
+	the cgroup can use in relation to its siblings.
+
+	The default weight can be updated by writing either "default
+	$WEIGHT" or simply "$WEIGHT".  Overrides can be set by writing
+	"$MAJ:$MIN $WEIGHT" and unset by writing "$MAJ:$MIN default".
+
+	An example read output follows.
+
+	  default 100
+	  8:16 200
+	  8:0 50
+
+  io.max
+
+	A read-write nested-keyed file which exists on non-root
+	cgroups.
+
+	BPS and IOPS based IO limit.  Lines are keyed by $MAJ:$MIN
+	device numbers and not ordered.  The following nested keys are
+	defined.
+
+	  rbps		Max read bytes per second
+	  wbps		Max write bytes per second
+	  riops		Max read IO operations per second
+	  wiops		Max write IO operations per second
+
+	When writing, any number of nested key-value pairs can be
+	specified in any order.  "max" can be specified as the value
+	to remove a specific limit.  If the same key is specified
+	multiple times, the outcome is undefined.
+
+	BPS and IOPS are measured in each IO direction and IOs are
+	delayed if limit is reached.  Temporary bursts are allowed.
+
+	Setting read limit at 2M BPS and write at 120 IOPS for 8:16.
+
+	  echo "8:16 rbps=2097152 wiops=120" > io.max
+
+	Reading returns the following.
+
+	  8:16 rbps=2097152 wbps=max riops=max wiops=120
+
+	Write IOPS limit can be removed by writing the following.
+
+	  echo "8:16 wiops=max" > io.max
+
+	Reading now returns the following.
+
+	  8:16 rbps=2097152 wbps=max riops=max wiops=max
+
+
+5-3-2. Writeback
+
+Page cache is dirtied through buffered writes and shared mmaps and
+written asynchronously to the backing filesystem by the writeback
+mechanism.  Writeback sits between the memory and IO domains and
+regulates the proportion of dirty memory by balancing dirtying and
+write IOs.
+
+The io controller, in conjunction with the memory controller,
+implements control of page cache writeback IOs.  The memory controller
+defines the memory domain that dirty memory ratio is calculated and
+maintained for and the io controller defines the io domain which
+writes out dirty pages for the memory domain.  Both system-wide and
+per-cgroup dirty memory states are examined and the more restrictive
+of the two is enforced.
+
+cgroup writeback requires explicit support from the underlying
+filesystem.  Currently, cgroup writeback is implemented on ext2, ext4
+and btrfs.  On other filesystems, all writeback IOs are attributed to
+the root cgroup.
+
+There are inherent differences in memory and writeback management
+which affects how cgroup ownership is tracked.  Memory is tracked per
+page while writeback per inode.  For the purpose of writeback, an
+inode is assigned to a cgroup and all IO requests to write dirty pages
+from the inode are attributed to that cgroup.
+
+As cgroup ownership for memory is tracked per page, there can be pages
+which are associated with different cgroups than the one the inode is
+associated with.  These are called foreign pages.  The writeback
+constantly keeps track of foreign pages and, if a particular foreign
+cgroup becomes the majority over a certain period of time, switches
+the ownership of the inode to that cgroup.
+
+While this model is enough for most use cases where a given inode is
+mostly dirtied by a single cgroup even when the main writing cgroup
+changes over time, use cases where multiple cgroups write to a single
+inode simultaneously are not supported well.  In such circumstances, a
+significant portion of IOs are likely to be attributed incorrectly.
+As memory controller assigns page ownership on the first use and
+doesn't update it until the page is released, even if writeback
+strictly follows page ownership, multiple cgroups dirtying overlapping
+areas wouldn't work as expected.  It's recommended to avoid such usage
+patterns.
+
+The sysctl knobs which affect writeback behavior are applied to cgroup
+writeback as follows.
+
+  vm.dirty_background_ratio
+  vm.dirty_ratio
+
+	These ratios apply the same to cgroup writeback with the
+	amount of available memory capped by limits imposed by the
+	memory controller and system-wide clean memory.
+
+  vm.dirty_background_bytes
+  vm.dirty_bytes
+
+	For cgroup writeback, this is calculated into ratio against
+	total available memory and applied the same way as
+	vm.dirty[_background]_ratio.
+
+
+P. Information on Kernel Programming
+
+This section contains kernel programming information in the areas
+where interacting with cgroup is necessary.  cgroup core and
+controllers are not covered.
+
+
+P-1. Filesystem Support for Writeback
+
+A filesystem can support cgroup writeback by updating
+address_space_operations->writepage[s]() to annotate bio's using the
+following two functions.
+
+  wbc_init_bio(@wbc, @bio)
+
+	Should be called for each bio carrying writeback data and
+	associates the bio with the inode's owner cgroup.  Can be
+	called anytime between bio allocation and submission.
+
+  wbc_account_io(@wbc, @page, @bytes)
+
+	Should be called for each data segment being written out.
+	While this function doesn't care exactly when it's called
+	during the writeback session, it's the easiest and most
+	natural to call it as data segments are added to a bio.
+
+With writeback bio's annotated, cgroup support can be enabled per
+super_block by setting SB_I_CGROUPWB in ->s_iflags.  This allows for
+selective disabling of cgroup writeback support which is helpful when
+certain filesystem features, e.g. journaled data mode, are
+incompatible.
+
+wbc_init_bio() binds the specified bio to its cgroup.  Depending on
+the configuration, the bio may be executed at a lower priority and if
+the writeback session is holding shared resources, e.g. a journal
+entry, may lead to priority inversion.  There is no one easy solution
+for the problem.  Filesystems can try to work around specific problem
+cases by skipping wbc_init_bio() or using bio_associate_blkcg()
+directly.
+
+
+D. Deprecated v1 Core Features
+
+- Multiple hierarchies including named ones are not supported.
+
+- All mount options and remounting are not supported.
+
+- The "tasks" file is removed and "cgroup.procs" is not sorted.
+
+- "cgroup.clone_children" is removed.
+
+- /proc/cgroups is meaningless for v2.  Use "cgroup.controllers" file
+  at the root instead.
+
+
+R. Issues with v1 and Rationales for v2
+
+R-1. Multiple Hierarchies
+
+cgroup v1 allowed an arbitrary number of hierarchies and each
+hierarchy could host any number of controllers.  While this seemed to
+provide a high level of flexibility, it wasn't useful in practice.
+
+For example, as there is only one instance of each controller, utility
+type controllers such as freezer which can be useful in all
+hierarchies could only be used in one.  The issue is exacerbated by
+the fact that controllers couldn't be moved to another hierarchy once
+hierarchies were populated.  Another issue was that all controllers
+bound to a hierarchy were forced to have exactly the same view of the
+hierarchy.  It wasn't possible to vary the granularity depending on
+the specific controller.
+
+In practice, these issues heavily limited which controllers could be
+put on the same hierarchy and most configurations resorted to putting
+each controller on its own hierarchy.  Only closely related ones, such
+as the cpu and cpuacct controllers, made sense to be put on the same
+hierarchy.  This often meant that userland ended up managing multiple
+similar hierarchies repeating the same steps on each hierarchy
+whenever a hierarchy management operation was necessary.
+
+Furthermore, support for multiple hierarchies came at a steep cost.
+It greatly complicated cgroup core implementation but more importantly
+the support for multiple hierarchies restricted how cgroup could be
+used in general and what controllers was able to do.
+
+There was no limit on how many hierarchies there might be, which meant
+that a thread's cgroup membership couldn't be described in finite
+length.  The key might contain any number of entries and was unlimited
+in length, which made it highly awkward to manipulate and led to
+addition of controllers which existed only to identify membership,
+which in turn exacerbated the original problem of proliferating number
+of hierarchies.
+
+Also, as a controller couldn't have any expectation regarding the
+topologies of hierarchies other controllers might be on, each
+controller had to assume that all other controllers were attached to
+completely orthogonal hierarchies.  This made it impossible, or at
+least very cumbersome, for controllers to cooperate with each other.
+
+In most use cases, putting controllers on hierarchies which are
+completely orthogonal to each other isn't necessary.  What usually is
+called for is the ability to have differing levels of granularity
+depending on the specific controller.  In other words, hierarchy may
+be collapsed from leaf towards root when viewed from specific
+controllers.  For example, a given configuration might not care about
+how memory is distributed beyond a certain level while still wanting
+to control how CPU cycles are distributed.
+
+
+R-2. Thread Granularity
+
+cgroup v1 allowed threads of a process to belong to different cgroups.
+This didn't make sense for some controllers and those controllers
+ended up implementing different ways to ignore such situations but
+much more importantly it blurred the line between API exposed to
+individual applications and system management interface.
+
+Generally, in-process knowledge is available only to the process
+itself; thus, unlike service-level organization of processes,
+categorizing threads of a process requires active participation from
+the application which owns the target process.
+
+cgroup v1 had an ambiguously defined delegation model which got abused
+in combination with thread granularity.  cgroups were delegated to
+individual applications so that they can create and manage their own
+sub-hierarchies and control resource distributions along them.  This
+effectively raised cgroup to the status of a syscall-like API exposed
+to lay programs.
+
+First of all, cgroup has a fundamentally inadequate interface to be
+exposed this way.  For a process to access its own knobs, it has to
+extract the path on the target hierarchy from /proc/self/cgroup,
+construct the path by appending the name of the knob to the path, open
+and then read and/or write to it.  This is not only extremely clunky
+and unusual but also inherently racy.  There is no conventional way to
+define transaction across the required steps and nothing can guarantee
+that the process would actually be operating on its own sub-hierarchy.
+
+cgroup controllers implemented a number of knobs which would never be
+accepted as public APIs because they were just adding control knobs to
+system-management pseudo filesystem.  cgroup ended up with interface
+knobs which were not properly abstracted or refined and directly
+revealed kernel internal details.  These knobs got exposed to
+individual applications through the ill-defined delegation mechanism
+effectively abusing cgroup as a shortcut to implementing public APIs
+without going through the required scrutiny.
+
+This was painful for both userland and kernel.  Userland ended up with
+misbehaving and poorly abstracted interfaces and kernel exposing and
+locked into constructs inadvertently.
+
+
+R-3. Competition Between Inner Nodes and Threads
+
+cgroup v1 allowed threads to be in any cgroups which created an
+interesting problem where threads belonging to a parent cgroup and its
+children cgroups competed for resources.  This was nasty as two
+different types of entities competed and there was no obvious way to
+settle it.  Different controllers did different things.
+
+The cpu controller considered threads and cgroups as equivalents and
+mapped nice levels to cgroup weights.  This worked for some cases but
+fell flat when children wanted to be allocated specific ratios of CPU
+cycles and the number of internal threads fluctuated - the ratios
+constantly changed as the number of competing entities fluctuated.
+There also were other issues.  The mapping from nice level to weight
+wasn't obvious or universal, and there were various other knobs which
+simply weren't available for threads.
+
+The io controller implicitly created a hidden leaf node for each
+cgroup to host the threads.  The hidden leaf had its own copies of all
+the knobs with "leaf_" prefixed.  While this allowed equivalent
+control over internal threads, it was with serious drawbacks.  It
+always added an extra layer of nesting which wouldn't be necessary
+otherwise, made the interface messy and significantly complicated the
+implementation.
+
+The memory controller didn't have a way to control what happened
+between internal tasks and child cgroups and the behavior was not
+clearly defined.  There were attempts to add ad-hoc behaviors and
+knobs to tailor the behavior to specific workloads which would have
+led to problems extremely difficult to resolve in the long term.
+
+Multiple controllers struggled with internal tasks and came up with
+different ways to deal with it; unfortunately, all the approaches were
+severely flawed and, furthermore, the widely different behaviors
+made cgroup as a whole highly inconsistent.
+
+This clearly is a problem which needs to be addressed from cgroup core
+in a uniform way.
+
+
+R-4. Other Interface Issues
+
+cgroup v1 grew without oversight and developed a large number of
+idiosyncrasies and inconsistencies.  One issue on the cgroup core side
+was how an empty cgroup was notified - a userland helper binary was
+forked and executed for each event.  The event delivery wasn't
+recursive or delegatable.  The limitations of the mechanism also led
+to in-kernel event delivery filtering mechanism further complicating
+the interface.
+
+Controller interfaces were problematic too.  An extreme example is
+controllers completely ignoring hierarchical organization and treating
+all cgroups as if they were all located directly under the root
+cgroup.  Some controllers exposed a large amount of inconsistent
+implementation details to userland.
+
+There also was no consistency across controllers.  When a new cgroup
+was created, some controllers defaulted to not imposing extra
+restrictions while others disallowed any resource usage until
+explicitly configured.  Configuration knobs for the same type of
+control used widely differing naming schemes and formats.  Statistics
+and information knobs were named arbitrarily and used different
+formats and units even in the same controller.
+
+cgroup v2 establishes common conventions where appropriate and updates
+controllers so that they expose minimal and consistent interfaces.
+
+
+R-5. Controller Issues and Remedies
+
+R-5-1. Memory
+
+The original lower boundary, the soft limit, is defined as a limit
+that is per default unset.  As a result, the set of cgroups that
+global reclaim prefers is opt-in, rather than opt-out.  The costs for
+optimizing these mostly negative lookups are so high that the
+implementation, despite its enormous size, does not even provide the
+basic desirable behavior.  First off, the soft limit has no
+hierarchical meaning.  All configured groups are organized in a global
+rbtree and treated like equal peers, regardless where they are located
+in the hierarchy.  This makes subtree delegation impossible.  Second,
+the soft limit reclaim pass is so aggressive that it not just
+introduces high allocation latencies into the system, but also impacts
+system performance due to overreclaim, to the point where the feature
+becomes self-defeating.
+
+The memory.low boundary on the other hand is a top-down allocated
+reserve.  A cgroup enjoys reclaim protection when it and all its
+ancestors are below their low boundaries, which makes delegation of
+subtrees possible.  Secondly, new cgroups have no reserve per default
+and in the common case most cgroups are eligible for the preferred
+reclaim pass.  This allows the new low boundary to be efficiently
+implemented with just a minor addition to the generic reclaim code,
+without the need for out-of-band data structures and reclaim passes.
+Because the generic reclaim code considers all cgroups except for the
+ones running low in the preferred first reclaim pass, overreclaim of
+individual groups is eliminated as well, resulting in much better
+overall workload performance.
+
+The original high boundary, the hard limit, is defined as a strict
+limit that can not budge, even if the OOM killer has to be called.
+But this generally goes against the goal of making the most out of the
+available memory.  The memory consumption of workloads varies during
+runtime, and that requires users to overcommit.  But doing that with a
+strict upper limit requires either a fairly accurate prediction of the
+working set size or adding slack to the limit.  Since working set size
+estimation is hard and error prone, and getting it wrong results in
+OOM kills, most users tend to err on the side of a looser limit and
+end up wasting precious resources.
+
+The memory.high boundary on the other hand can be set much more
+conservatively.  When hit, it throttles allocations by forcing them
+into direct reclaim to work off the excess, but it never invokes the
+OOM killer.  As a result, a high boundary that is chosen too
+aggressively will not terminate the processes, but instead it will
+lead to gradual performance degradation.  The user can monitor this
+and make corrections until the minimal memory footprint that still
+gives acceptable performance is found.
+
+In extreme cases, with many concurrent allocations and a complete
+breakdown of reclaim progress within the group, the high boundary can
+be exceeded.  But even then it's mostly better to satisfy the
+allocation from the slack available in other groups or the rest of the
+system than killing the group.  Otherwise, memory.max is there to
+limit this type of spillover and ultimately contain buggy or even
+malicious applications.
diff --git a/Documentation/cgroups/unified-hierarchy.txt b/Documentation/cgroups/unified-hierarchy.txt
deleted file mode 100644
index 781b1d475bcf..000000000000
--- a/Documentation/cgroups/unified-hierarchy.txt
+++ /dev/null
@@ -1,647 +0,0 @@
-
-Cgroup unified hierarchy
-
-April, 2014		Tejun Heo <tj@kernel.org>
-
-This document describes the changes made by unified hierarchy and
-their rationales.  It will eventually be merged into the main cgroup
-documentation.
-
-CONTENTS
-
-1. Background
-2. Basic Operation
-  2-1. Mounting
-  2-2. cgroup.subtree_control
-  2-3. cgroup.controllers
-3. Structural Constraints
-  3-1. Top-down
-  3-2. No internal tasks
-4. Delegation
-  4-1. Model of delegation
-  4-2. Common ancestor rule
-5. Other Changes
-  5-1. [Un]populated Notification
-  5-2. Other Core Changes
-  5-3. Controller File Conventions
-    5-3-1. Format
-    5-3-2. Control Knobs
-  5-4. Per-Controller Changes
-    5-4-1. io
-    5-4-2. cpuset
-    5-4-3. memory
-6. Planned Changes
-  6-1. CAP for resource control
-
-
-1. Background
-
-cgroup allows an arbitrary number of hierarchies and each hierarchy
-can host any number of controllers.  While this seems to provide a
-high level of flexibility, it isn't quite useful in practice.
-
-For example, as there is only one instance of each controller, utility
-type controllers such as freezer which can be useful in all
-hierarchies can only be used in one.  The issue is exacerbated by the
-fact that controllers can't be moved around once hierarchies are
-populated.  Another issue is that all controllers bound to a hierarchy
-are forced to have exactly the same view of the hierarchy.  It isn't
-possible to vary the granularity depending on the specific controller.
-
-In practice, these issues heavily limit which controllers can be put
-on the same hierarchy and most configurations resort to putting each
-controller on its own hierarchy.  Only closely related ones, such as
-the cpu and cpuacct controllers, make sense to put on the same
-hierarchy.  This often means that userland ends up managing multiple
-similar hierarchies repeating the same steps on each hierarchy
-whenever a hierarchy management operation is necessary.
-
-Unfortunately, support for multiple hierarchies comes at a steep cost.
-Internal implementation in cgroup core proper is dazzlingly
-complicated but more importantly the support for multiple hierarchies
-restricts how cgroup is used in general and what controllers can do.
-
-There's no limit on how many hierarchies there may be, which means
-that a task's cgroup membership can't be described in finite length.
-The key may contain any varying number of entries and is unlimited in
-length, which makes it highly awkward to handle and leads to addition
-of controllers which exist only to identify membership, which in turn
-exacerbates the original problem.
-
-Also, as a controller can't have any expectation regarding what shape
-of hierarchies other controllers would be on, each controller has to
-assume that all other controllers are operating on completely
-orthogonal hierarchies.  This makes it impossible, or at least very
-cumbersome, for controllers to cooperate with each other.
-
-In most use cases, putting controllers on hierarchies which are
-completely orthogonal to each other isn't necessary.  What usually is
-called for is the ability to have differing levels of granularity
-depending on the specific controller.  In other words, hierarchy may
-be collapsed from leaf towards root when viewed from specific
-controllers.  For example, a given configuration might not care about
-how memory is distributed beyond a certain level while still wanting
-to control how CPU cycles are distributed.
-
-Unified hierarchy is the next version of cgroup interface.  It aims to
-address the aforementioned issues by having more structure while
-retaining enough flexibility for most use cases.  Various other
-general and controller-specific interface issues are also addressed in
-the process.
-
-
-2. Basic Operation
-
-2-1. Mounting
-
-Currently, unified hierarchy can be mounted with the following mount
-command.  Note that this is still under development and scheduled to
-change soon.
-
- mount -t cgroup -o __DEVEL__sane_behavior cgroup $MOUNT_POINT
-
-All controllers which support the unified hierarchy and are not bound
-to other hierarchies are automatically bound to unified hierarchy and
-show up at the root of it.  Controllers which are enabled only in the
-root of unified hierarchy can be bound to other hierarchies.  This
-allows mixing unified hierarchy with the traditional multiple
-hierarchies in a fully backward compatible way.
-
-A controller can be moved across hierarchies only after the controller
-is no longer referenced in its current hierarchy.  Because per-cgroup
-controller states are destroyed asynchronously and controllers may
-have lingering references, a controller may not show up immediately on
-the unified hierarchy after the final umount of the previous
-hierarchy.  Similarly, a controller should be fully disabled to be
-moved out of the unified hierarchy and it may take some time for the
-disabled controller to become available for other hierarchies;
-furthermore, due to dependencies among controllers, other controllers
-may need to be disabled too.
-
-While useful for development and manual configurations, dynamically
-moving controllers between the unified and other hierarchies is
-strongly discouraged for production use.  It is recommended to decide
-the hierarchies and controller associations before starting using the
-controllers.
-
-
-2-2. cgroup.subtree_control
-
-All cgroups on unified hierarchy have a "cgroup.subtree_control" file
-which governs which controllers are enabled on the children of the
-cgroup.  Let's assume a hierarchy like the following.
-
-  root - A - B - C
-               \ D
-
-root's "cgroup.subtree_control" file determines which controllers are
-enabled on A.  A's on B.  B's on C and D.  This coincides with the
-fact that controllers on the immediate sub-level are used to
-distribute the resources of the parent.  In fact, it's natural to
-assume that resource control knobs of a child belong to its parent.
-Enabling a controller in a "cgroup.subtree_control" file declares that
-distribution of the respective resources of the cgroup will be
-controlled.  Note that this means that controller enable states are
-shared among siblings.
-
-When read, the file contains a space-separated list of currently
-enabled controllers.  A write to the file should contain a
-space-separated list of controllers with '+' or '-' prefixed (without
-the quotes).  Controllers prefixed with '+' are enabled and '-'
-disabled.  If a controller is listed multiple times, the last entry
-wins.  The specific operations are executed atomically - either all
-succeed or fail.
-
-
-2-3. cgroup.controllers
-
-Read-only "cgroup.controllers" file contains a space-separated list of
-controllers which can be enabled in the cgroup's
-"cgroup.subtree_control" file.
-
-In the root cgroup, this lists controllers which are not bound to
-other hierarchies and the content changes as controllers are bound to
-and unbound from other hierarchies.
-
-In non-root cgroups, the content of this file equals that of the
-parent's "cgroup.subtree_control" file as only controllers enabled
-from the parent can be used in its children.
-
-
-3. Structural Constraints
-
-3-1. Top-down
-
-As it doesn't make sense to nest control of an uncontrolled resource,
-all non-root "cgroup.subtree_control" files can only contain
-controllers which are enabled in the parent's "cgroup.subtree_control"
-file.  A controller can be enabled only if the parent has the
-controller enabled and a controller can't be disabled if one or more
-children have it enabled.
-
-
-3-2. No internal tasks
-
-One long-standing issue that cgroup faces is the competition between
-tasks belonging to the parent cgroup and its children cgroups.  This
-is inherently nasty as two different types of entities compete and
-there is no agreed-upon obvious way to handle it.  Different
-controllers are doing different things.
-
-The cpu controller considers tasks and cgroups as equivalents and maps
-nice levels to cgroup weights.  This works for some cases but falls
-flat when children should be allocated specific ratios of CPU cycles
-and the number of internal tasks fluctuates - the ratios constantly
-change as the number of competing entities fluctuates.  There also are
-other issues.  The mapping from nice level to weight isn't obvious or
-universal, and there are various other knobs which simply aren't
-available for tasks.
-
-The io controller implicitly creates a hidden leaf node for each
-cgroup to host the tasks.  The hidden leaf has its own copies of all
-the knobs with "leaf_" prefixed.  While this allows equivalent control
-over internal tasks, it's with serious drawbacks.  It always adds an
-extra layer of nesting which may not be necessary, makes the interface
-messy and significantly complicates the implementation.
-
-The memory controller currently doesn't have a way to control what
-happens between internal tasks and child cgroups and the behavior is
-not clearly defined.  There have been attempts to add ad-hoc behaviors
-and knobs to tailor the behavior to specific workloads.  Continuing
-this direction will lead to problems which will be extremely difficult
-to resolve in the long term.
-
-Multiple controllers struggle with internal tasks and came up with
-different ways to deal with it; unfortunately, all the approaches in
-use now are severely flawed and, furthermore, the widely different
-behaviors make cgroup as whole highly inconsistent.
-
-It is clear that this is something which needs to be addressed from
-cgroup core proper in a uniform way so that controllers don't need to
-worry about it and cgroup as a whole shows a consistent and logical
-behavior.  To achieve that, unified hierarchy enforces the following
-structural constraint:
-
- Except for the root, only cgroups which don't contain any task may
- have controllers enabled in their "cgroup.subtree_control" files.
-
-Combined with other properties, this guarantees that, when a
-controller is looking at the part of the hierarchy which has it
-enabled, tasks are always only on the leaves.  This rules out
-situations where child cgroups compete against internal tasks of the
-parent.
-
-There are two things to note.  Firstly, the root cgroup is exempt from
-the restriction.  Root contains tasks and anonymous resource
-consumption which can't be associated with any other cgroup and
-requires special treatment from most controllers.  How resource
-consumption in the root cgroup is governed is up to each controller.
-
-Secondly, the restriction doesn't take effect if there is no enabled
-controller in the cgroup's "cgroup.subtree_control" file.  This is
-important as otherwise it wouldn't be possible to create children of a
-populated cgroup.  To control resource distribution of a cgroup, the
-cgroup must create children and transfer all its tasks to the children
-before enabling controllers in its "cgroup.subtree_control" file.
-
-
-4. Delegation
-
-4-1. Model of delegation
-
-A cgroup can be delegated to a less privileged user by granting write
-access of the directory and its "cgroup.procs" file to the user.  Note
-that the resource control knobs in a given directory concern the
-resources of the parent and thus must not be delegated along with the
-directory.
-
-Once delegated, the user can build sub-hierarchy under the directory,
-organize processes as it sees fit and further distribute the resources
-it got from the parent.  The limits and other settings of all resource
-controllers are hierarchical and regardless of what happens in the
-delegated sub-hierarchy, nothing can escape the resource restrictions
-imposed by the parent.
-
-Currently, cgroup doesn't impose any restrictions on the number of
-cgroups in or nesting depth of a delegated sub-hierarchy; however,
-this may in the future be limited explicitly.
-
-
-4-2. Common ancestor rule
-
-On the unified hierarchy, to write to a "cgroup.procs" file, in
-addition to the usual write permission to the file and uid match, the
-writer must also have write access to the "cgroup.procs" file of the
-common ancestor of the source and destination cgroups.  This prevents
-delegatees from smuggling processes across disjoint sub-hierarchies.
-
-Let's say cgroups C0 and C1 have been delegated to user U0 who created
-C00, C01 under C0 and C10 under C1 as follows.
-
- ~~~~~~~~~~~~~ - C0 - C00
- ~ cgroup    ~      \ C01
- ~ hierarchy ~
- ~~~~~~~~~~~~~ - C1 - C10
-
-C0 and C1 are separate entities in terms of resource distribution
-regardless of their relative positions in the hierarchy.  The
-resources the processes under C0 are entitled to are controlled by
-C0's ancestors and may be completely different from C1.  It's clear
-that the intention of delegating C0 to U0 is allowing U0 to organize
-the processes under C0 and further control the distribution of C0's
-resources.
-
-On traditional hierarchies, if a task has write access to "tasks" or
-"cgroup.procs" file of a cgroup and its uid agrees with the target, it
-can move the target to the cgroup.  In the above example, U0 will not
-only be able to move processes in each sub-hierarchy but also across
-the two sub-hierarchies, effectively allowing it to violate the
-organizational and resource restrictions implied by the hierarchical
-structure above C0 and C1.
-
-On the unified hierarchy, let's say U0 wants to write the pid of a
-process which has a matching uid and is currently in C10 into
-"C00/cgroup.procs".  U0 obviously has write access to the file and
-migration permission on the process; however, the common ancestor of
-the source cgroup C10 and the destination cgroup C00 is above the
-points of delegation and U0 would not have write access to its
-"cgroup.procs" and thus be denied with -EACCES.
-
-
-5. Other Changes
-
-5-1. [Un]populated Notification
-
-cgroup users often need a way to determine when a cgroup's
-subhierarchy becomes empty so that it can be cleaned up.  cgroup
-currently provides release_agent for it; unfortunately, this mechanism
-is riddled with issues.
-
-- It delivers events by forking and execing a userland binary
-  specified as the release_agent.  This is a long deprecated method of
-  notification delivery.  It's extremely heavy, slow and cumbersome to
-  integrate with larger infrastructure.
-
-- There is single monitoring point at the root.  There's no way to
-  delegate management of a subtree.
-
-- The event isn't recursive.  It triggers when a cgroup doesn't have
-  any tasks or child cgroups.  Events for internal nodes trigger only
-  after all children are removed.  This again makes it impossible to
-  delegate management of a subtree.
-
-- Events are filtered from the kernel side.  A "notify_on_release"
-  file is used to subscribe to or suppress release events.  This is
-  unnecessarily complicated and probably done this way because event
-  delivery itself was expensive.
-
-Unified hierarchy implements "populated" field in "cgroup.events"
-interface file which can be used to monitor whether the cgroup's
-subhierarchy has tasks in it or not.  Its value is 0 if there is no
-task in the cgroup and its descendants; otherwise, 1.  poll and
-[id]notify events are triggered when the value changes.
-
-This is significantly lighter and simpler and trivially allows
-delegating management of subhierarchy - subhierarchy monitoring can
-block further propagation simply by putting itself or another process
-in the subhierarchy and monitor events that it's interested in from
-there without interfering with monitoring higher in the tree.
-
-In unified hierarchy, the release_agent mechanism is no longer
-supported and the interface files "release_agent" and
-"notify_on_release" do not exist.
-
-
-5-2. Other Core Changes
-
-- None of the mount options is allowed.
-
-- remount is disallowed.
-
-- rename(2) is disallowed.
-
-- The "tasks" file is removed.  Everything should at process
-  granularity.  Use the "cgroup.procs" file instead.
-
-- The "cgroup.procs" file is not sorted.  pids will be unique unless
-  they got recycled in-between reads.
-
-- The "cgroup.clone_children" file is removed.
-
-- /proc/PID/cgroup keeps reporting the cgroup that a zombie belonged
-  to before exiting.  If the cgroup is removed before the zombie is
-  reaped, " (deleted)" is appeneded to the path.
-
-
-5-3. Controller File Conventions
-
-5-3-1. Format
-
-In general, all controller files should be in one of the following
-formats whenever possible.
-
-- Values only files
-
-  VAL0 VAL1...\n
-
-- Flat keyed files
-
-  KEY0 VAL0\n
-  KEY1 VAL1\n
-  ...
-
-- Nested keyed files
-
-  KEY0 SUB_KEY0=VAL00 SUB_KEY1=VAL01...
-  KEY1 SUB_KEY0=VAL10 SUB_KEY1=VAL11...
-  ...
-
-For a writeable file, the format for writing should generally match
-reading; however, controllers may allow omitting later fields or
-implement restricted shortcuts for most common use cases.
-
-For both flat and nested keyed files, only the values for a single key
-can be written at a time.  For nested keyed files, the sub key pairs
-may be specified in any order and not all pairs have to be specified.
-
-
-5-3-2. Control Knobs
-
-- Settings for a single feature should generally be implemented in a
-  single file.
-
-- In general, the root cgroup should be exempt from resource control
-  and thus shouldn't have resource control knobs.
-
-- If a controller implements ratio based resource distribution, the
-  control knob should be named "weight" and have the range [1, 10000]
-  and 100 should be the default value.  The values are chosen to allow
-  enough and symmetric bias in both directions while keeping it
-  intuitive (the default is 100%).
-
-- If a controller implements an absolute resource guarantee and/or
-  limit, the control knobs should be named "min" and "max"
-  respectively.  If a controller implements best effort resource
-  gurantee and/or limit, the control knobs should be named "low" and
-  "high" respectively.
-
-  In the above four control files, the special token "max" should be
-  used to represent upward infinity for both reading and writing.
-
-- If a setting has configurable default value and specific overrides,
-  the default settings should be keyed with "default" and appear as
-  the first entry in the file.  Specific entries can use "default" as
-  its value to indicate inheritance of the default value.
-
-- For events which are not very high frequency, an interface file
-  "events" should be created which lists event key value pairs.
-  Whenever a notifiable event happens, file modified event should be
-  generated on the file.
-
-
-5-4. Per-Controller Changes
-
-5-4-1. io
-
-- blkio is renamed to io.  The interface is overhauled anyway.  The
-  new name is more in line with the other two major controllers, cpu
-  and memory, and better suited given that it may be used for cgroup
-  writeback without involving block layer.
-
-- Everything including stat is always hierarchical making separate
-  recursive stat files pointless and, as no internal node can have
-  tasks, leaf weights are meaningless.  The operation model is
-  simplified and the interface is overhauled accordingly.
-
-  io.stat
-
-	The stat file.  The reported stats are from the point where
-	bio's are issued to request_queue.  The stats are counted
-	independent of which policies are enabled.  Each line in the
-	file follows the following format.  More fields may later be
-	added at the end.
-
-	  $MAJ:$MIN rbytes=$RBYTES wbytes=$WBYTES rios=$RIOS wrios=$WIOS
-
-  io.weight
-
-	The weight setting, currently only available and effective if
-	cfq-iosched is in use for the target device.  The weight is
-	between 1 and 10000 and defaults to 100.  The first line
-	always contains the default weight in the following format to
-	use when per-device setting is missing.
-
-	  default $WEIGHT
-
-	Subsequent lines list per-device weights of the following
-	format.
-
-	  $MAJ:$MIN $WEIGHT
-
-	Writing "$WEIGHT" or "default $WEIGHT" changes the default
-	setting.  Writing "$MAJ:$MIN $WEIGHT" sets per-device weight
-	while "$MAJ:$MIN default" clears it.
-
-	This file is available only on non-root cgroups.
-
-  io.max
-
-	The maximum bandwidth and/or iops setting, only available if
-	blk-throttle is enabled.  The file is of the following format.
-
-	  $MAJ:$MIN rbps=$RBPS wbps=$WBPS riops=$RIOPS wiops=$WIOPS
-
-	${R|W}BPS are read/write bytes per second and ${R|W}IOPS are
-	read/write IOs per second.  "max" indicates no limit.  Writing
-	to the file follows the same format but the individual
-	settings may be omitted or specified in any order.
-
-	This file is available only on non-root cgroups.
-
-
-5-4-2. cpuset
-
-- Tasks are kept in empty cpusets after hotplug and take on the masks
-  of the nearest non-empty ancestor, instead of being moved to it.
-
-- A task can be moved into an empty cpuset, and again it takes on the
-  masks of the nearest non-empty ancestor.
-
-
-5-4-3. memory
-
-- use_hierarchy is on by default and the cgroup file for the flag is
-  not created.
-
-- The original lower boundary, the soft limit, is defined as a limit
-  that is per default unset.  As a result, the set of cgroups that
-  global reclaim prefers is opt-in, rather than opt-out.  The costs
-  for optimizing these mostly negative lookups are so high that the
-  implementation, despite its enormous size, does not even provide the
-  basic desirable behavior.  First off, the soft limit has no
-  hierarchical meaning.  All configured groups are organized in a
-  global rbtree and treated like equal peers, regardless where they
-  are located in the hierarchy.  This makes subtree delegation
-  impossible.  Second, the soft limit reclaim pass is so aggressive
-  that it not just introduces high allocation latencies into the
-  system, but also impacts system performance due to overreclaim, to
-  the point where the feature becomes self-defeating.
-
-  The memory.low boundary on the other hand is a top-down allocated
-  reserve.  A cgroup enjoys reclaim protection when it and all its
-  ancestors are below their low boundaries, which makes delegation of
-  subtrees possible.  Secondly, new cgroups have no reserve per
-  default and in the common case most cgroups are eligible for the
-  preferred reclaim pass.  This allows the new low boundary to be
-  efficiently implemented with just a minor addition to the generic
-  reclaim code, without the need for out-of-band data structures and
-  reclaim passes.  Because the generic reclaim code considers all
-  cgroups except for the ones running low in the preferred first
-  reclaim pass, overreclaim of individual groups is eliminated as
-  well, resulting in much better overall workload performance.
-
-- The original high boundary, the hard limit, is defined as a strict
-  limit that can not budge, even if the OOM killer has to be called.
-  But this generally goes against the goal of making the most out of
-  the available memory.  The memory consumption of workloads varies
-  during runtime, and that requires users to overcommit.  But doing
-  that with a strict upper limit requires either a fairly accurate
-  prediction of the working set size or adding slack to the limit.
-  Since working set size estimation is hard and error prone, and
-  getting it wrong results in OOM kills, most users tend to err on the
-  side of a looser limit and end up wasting precious resources.
-
-  The memory.high boundary on the other hand can be set much more
-  conservatively.  When hit, it throttles allocations by forcing them
-  into direct reclaim to work off the excess, but it never invokes the
-  OOM killer.  As a result, a high boundary that is chosen too
-  aggressively will not terminate the processes, but instead it will
-  lead to gradual performance degradation.  The user can monitor this
-  and make corrections until the minimal memory footprint that still
-  gives acceptable performance is found.
-
-  In extreme cases, with many concurrent allocations and a complete
-  breakdown of reclaim progress within the group, the high boundary
-  can be exceeded.  But even then it's mostly better to satisfy the
-  allocation from the slack available in other groups or the rest of
-  the system than killing the group.  Otherwise, memory.max is there
-  to limit this type of spillover and ultimately contain buggy or even
-  malicious applications.
-
-- The original control file names are unwieldy and inconsistent in
-  many different ways.  For example, the upper boundary hit count is
-  exported in the memory.failcnt file, but an OOM event count has to
-  be manually counted by listening to memory.oom_control events, and
-  lower boundary / soft limit events have to be counted by first
-  setting a threshold for that value and then counting those events.
-  Also, usage and limit files encode their units in the filename.
-  That makes the filenames very long, even though this is not
-  information that a user needs to be reminded of every time they type
-  out those names.
-
-  To address these naming issues, as well as to signal clearly that
-  the new interface carries a new configuration model, the naming
-  conventions in it necessarily differ from the old interface.
-
-- The original limit files indicate the state of an unset limit with a
-  Very High Number, and a configured limit can be unset by echoing -1
-  into those files.  But that very high number is implementation and
-  architecture dependent and not very descriptive.  And while -1 can
-  be understood as an underflow into the highest possible value, -2 or
-  -10M etc. do not work, so it's not consistent.
-
-  memory.low, memory.high, and memory.max will use the string "max" to
-  indicate and set the highest possible value.
-
-6. Planned Changes
-
-6-1. CAP for resource control
-
-Unified hierarchy will require one of the capabilities(7), which is
-yet to be decided, for all resource control related knobs.  Process
-organization operations - creation of sub-cgroups and migration of
-processes in sub-hierarchies may be delegated by changing the
-ownership and/or permissions on the cgroup directory and
-"cgroup.procs" interface file; however, all operations which affect
-resource control - writes to a "cgroup.subtree_control" file or any
-controller-specific knobs - will require an explicit CAP privilege.
-
-This, in part, is to prevent the cgroup interface from being
-inadvertently promoted to programmable API used by non-privileged
-binaries.  cgroup exposes various aspects of the system in ways which
-aren't properly abstracted for direct consumption by regular programs.
-This is an administration interface much closer to sysctl knobs than
-system calls.  Even the basic access model, being filesystem path
-based, isn't suitable for direct consumption.  There's no way to
-access "my cgroup" in a race-free way or make multiple operations
-atomic against migration to another cgroup.
-
-Another aspect is that, for better or for worse, the cgroup interface
-goes through far less scrutiny than regular interfaces for
-unprivileged userland.  The upside is that cgroup is able to expose
-useful features which may not be suitable for general consumption in a
-reasonable time frame.  It provides a relatively short path between
-internal details and userland-visible interface.  Of course, this
-shortcut comes with high risk.  We go through what we go through for
-general kernel APIs for good reasons.  It may end up leaking internal
-details in a way which can exert significant pain by locking the
-kernel into a contract that can't be maintained in a reasonable
-manner.
-
-Also, due to the specific nature, cgroup and its controllers don't
-tend to attract attention from a wide scope of developers.  cgroup's
-short history is already fraught with severely mis-designed
-interfaces, unnecessary commitments to and exposing of internal
-details, broken and dangerous implementations of various features.
-
-Keeping cgroup as an administration interface is both advantageous for
-its role and imperative given its nature.  Some of the cgroup features
-may make sense for unprivileged access.  If deemed justified, those
-must be further abstracted and implemented as a different interface,
-be it a system call or process-private filesystem, and survive through
-the scrutiny that any interface for general consumption is required to
-go through.
-
-Requiring CAP is not a complete solution but should serve as a
-significant deterrent against spraying cgroup usages in non-privileged
-programs.
diff --git a/Documentation/devicetree/bindings/goldfish/audio.txt b/Documentation/devicetree/bindings/goldfish/audio.txt
new file mode 100644
index 000000000000..d043fda433ba
--- /dev/null
+++ b/Documentation/devicetree/bindings/goldfish/audio.txt
@@ -0,0 +1,17 @@
+Android Goldfish Audio
+
+Android goldfish audio device generated by android emulator.
+
+Required properties:
+
+- compatible : should contain "google,goldfish-audio" to match emulator
+- reg        : <registers mapping>
+- interrupts : <interrupt mapping>
+
+Example:
+
+	goldfish_audio@9030000 {
+		compatible = "google,goldfish-audio";
+		reg = <0x9030000 0x100>;
+		interrupts = <0x4>;
+	};
diff --git a/Documentation/devicetree/bindings/goldfish/battery.txt b/Documentation/devicetree/bindings/goldfish/battery.txt
new file mode 100644
index 000000000000..4fb613933214
--- /dev/null
+++ b/Documentation/devicetree/bindings/goldfish/battery.txt
@@ -0,0 +1,17 @@
+Android Goldfish Battery
+
+Android goldfish battery device generated by android emulator.
+
+Required properties:
+
+- compatible : should contain "google,goldfish-battery" to match emulator
+- reg        : <registers mapping>
+- interrupts : <interrupt mapping>
+
+Example:
+
+	goldfish_battery@9020000 {
+		compatible = "google,goldfish-battery";
+		reg = <0x9020000 0x1000>;
+		interrupts = <0x3>;
+	};
diff --git a/Documentation/devicetree/bindings/goldfish/events.txt b/Documentation/devicetree/bindings/goldfish/events.txt
new file mode 100644
index 000000000000..5babf46317a4
--- /dev/null
+++ b/Documentation/devicetree/bindings/goldfish/events.txt
@@ -0,0 +1,17 @@
+Android Goldfish Events Keypad
+
+Android goldfish events keypad device generated by android emulator.
+
+Required properties:
+
+- compatible : should contain "google,goldfish-events-keypad" to match emulator
+- reg        : <registers mapping>
+- interrupts : <interrupt mapping>
+
+Example:
+
+	goldfish-events@9040000 {
+		compatible = "google,goldfish-events-keypad";
+		reg = <0x9040000 0x1000>;
+		interrupts = <0x5>;
+	};
diff --git a/Documentation/devicetree/bindings/goldfish/tty.txt b/Documentation/devicetree/bindings/goldfish/tty.txt
new file mode 100644
index 000000000000..82648278da77
--- /dev/null
+++ b/Documentation/devicetree/bindings/goldfish/tty.txt
@@ -0,0 +1,17 @@
+Android Goldfish TTY
+
+Android goldfish tty device generated by android emulator.
+
+Required properties:
+
+- compatible : should contain "google,goldfish-tty" to match emulator
+- reg        : <registers mapping>
+- interrupts : <interrupt mapping>
+
+Example:
+
+	goldfish_tty@1f004000 {
+		compatible = "google,goldfish-tty";
+		reg = <0x1f004000 0x1000>;
+		interrupts = <0xc>;
+	};
diff --git a/Documentation/trace/ftrace.txt b/Documentation/trace/ftrace.txt
index cf2f9e9dfe4d..fa16fb2302a5 100644
--- a/Documentation/trace/ftrace.txt
+++ b/Documentation/trace/ftrace.txt
@@ -357,6 +357,26 @@ of ftrace. Here is a list of some of the key files:
 		  to correlate events across hypervisor/guest if
 		  tb_offset is known.
 
+	  mono: This uses the fast monotonic clock (CLOCK_MONOTONIC)
+		which is monotonic and is subject to NTP rate adjustments.
+
+	  mono_raw:
+		This is the raw monotonic clock (CLOCK_MONOTONIC_RAW)
+		which is montonic but is not subject to any rate adjustments
+		and ticks at the same rate as the hardware clocksource.
+
+	  boot: This is the boot clock (CLOCK_BOOTTIME) and is based on the
+		fast monotonic clock, but also accounts for time spent in
+		suspend. Since the clock access is designed for use in
+		tracing in the suspend path, some side effects are possible
+		if clock is accessed after the suspend time is accounted before
+		the fast mono clock is updated. In this case, the clock update
+		appears to happen slightly sooner than it normally would have.
+		Also on 32-bit systems, its possible that the 64-bit boot offset
+		sees a partial update. These effects are rare and post
+		processing should be able to handle them. See comments on
+		ktime_get_boot_fast_ns function for more information.
+
 	To set a clock, simply echo the clock name into this file.
 
 	  echo global > trace_clock
diff --git a/Makefile b/Makefile
index 8e289d566429..39720eeb3d00 100644
--- a/Makefile
+++ b/Makefile
@@ -1,6 +1,6 @@
 VERSION = 4
 PATCHLEVEL = 4
-SUBLEVEL = 35
+SUBLEVEL = 36
 EXTRAVERSION =
 NAME = Blurry Fish Butt
 
diff --git a/arch/arm/configs/ranchu_defconfig b/arch/arm/configs/ranchu_defconfig
new file mode 100644
index 000000000000..35a90af941a4
--- /dev/null
+++ b/arch/arm/configs/ranchu_defconfig
@@ -0,0 +1,315 @@
+# CONFIG_LOCALVERSION_AUTO is not set
+CONFIG_AUDIT=y
+CONFIG_NO_HZ=y
+CONFIG_HIGH_RES_TIMERS=y
+CONFIG_TASKSTATS=y
+CONFIG_TASK_DELAY_ACCT=y
+CONFIG_TASK_XACCT=y
+CONFIG_TASK_IO_ACCOUNTING=y
+CONFIG_IKCONFIG=y
+CONFIG_IKCONFIG_PROC=y
+CONFIG_LOG_BUF_SHIFT=14
+CONFIG_CGROUPS=y
+CONFIG_CGROUP_DEBUG=y
+CONFIG_CGROUP_FREEZER=y
+CONFIG_CPUSETS=y
+CONFIG_CGROUP_CPUACCT=y
+CONFIG_CGROUP_SCHED=y
+CONFIG_RT_GROUP_SCHED=y
+CONFIG_BLK_DEV_INITRD=y
+CONFIG_KALLSYMS_ALL=y
+CONFIG_EMBEDDED=y
+CONFIG_PROFILING=y
+CONFIG_OPROFILE=y
+CONFIG_ARCH_MMAP_RND_BITS=16
+# CONFIG_BLK_DEV_BSG is not set
+# CONFIG_IOSCHED_DEADLINE is not set
+# CONFIG_IOSCHED_CFQ is not set
+CONFIG_ARCH_VIRT=y
+CONFIG_ARM_KERNMEM_PERMS=y
+CONFIG_SMP=y
+CONFIG_PREEMPT=y
+CONFIG_AEABI=y
+CONFIG_HIGHMEM=y
+CONFIG_KSM=y
+CONFIG_SECCOMP=y
+CONFIG_CMDLINE="console=ttyAMA0"
+CONFIG_VFP=y
+CONFIG_NEON=y
+# CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set
+CONFIG_PM_AUTOSLEEP=y
+CONFIG_PM_WAKELOCKS=y
+CONFIG_PM_WAKELOCKS_LIMIT=0
+# CONFIG_PM_WAKELOCKS_GC is not set
+CONFIG_PM_DEBUG=y
+CONFIG_NET=y
+CONFIG_PACKET=y
+CONFIG_UNIX=y
+CONFIG_XFRM_USER=y
+CONFIG_NET_KEY=y
+CONFIG_INET=y
+CONFIG_IP_MULTICAST=y
+CONFIG_IP_ADVANCED_ROUTER=y
+CONFIG_IP_MULTIPLE_TABLES=y
+CONFIG_IP_PNP=y
+CONFIG_IP_PNP_DHCP=y
+CONFIG_IP_PNP_BOOTP=y
+CONFIG_INET_ESP=y
+# CONFIG_INET_LRO is not set
+CONFIG_IPV6_ROUTER_PREF=y
+CONFIG_IPV6_ROUTE_INFO=y
+CONFIG_IPV6_OPTIMISTIC_DAD=y
+CONFIG_INET6_AH=y
+CONFIG_INET6_ESP=y
+CONFIG_INET6_IPCOMP=y
+CONFIG_IPV6_MIP6=y
+CONFIG_IPV6_MULTIPLE_TABLES=y
+CONFIG_NETFILTER=y
+CONFIG_NF_CONNTRACK=y
+CONFIG_NF_CONNTRACK_SECMARK=y
+CONFIG_NF_CONNTRACK_EVENTS=y
+CONFIG_NF_CT_PROTO_DCCP=y
+CONFIG_NF_CT_PROTO_SCTP=y
+CONFIG_NF_CT_PROTO_UDPLITE=y
+CONFIG_NF_CONNTRACK_AMANDA=y
+CONFIG_NF_CONNTRACK_FTP=y
+CONFIG_NF_CONNTRACK_H323=y
+CONFIG_NF_CONNTRACK_IRC=y
+CONFIG_NF_CONNTRACK_NETBIOS_NS=y
+CONFIG_NF_CONNTRACK_PPTP=y
+CONFIG_NF_CONNTRACK_SANE=y
+CONFIG_NF_CONNTRACK_TFTP=y
+CONFIG_NF_CT_NETLINK=y
+CONFIG_NETFILTER_XT_TARGET_CLASSIFY=y
+CONFIG_NETFILTER_XT_TARGET_CONNMARK=y
+CONFIG_NETFILTER_XT_TARGET_CONNSECMARK=y
+CONFIG_NETFILTER_XT_TARGET_IDLETIMER=y
+CONFIG_NETFILTER_XT_TARGET_MARK=y
+CONFIG_NETFILTER_XT_TARGET_NFLOG=y
+CONFIG_NETFILTER_XT_TARGET_NFQUEUE=y
+CONFIG_NETFILTER_XT_TARGET_TPROXY=y
+CONFIG_NETFILTER_XT_TARGET_TRACE=y
+CONFIG_NETFILTER_XT_TARGET_SECMARK=y
+CONFIG_NETFILTER_XT_TARGET_TCPMSS=y
+CONFIG_NETFILTER_XT_MATCH_COMMENT=y
+CONFIG_NETFILTER_XT_MATCH_CONNLIMIT=y
+CONFIG_NETFILTER_XT_MATCH_CONNMARK=y
+CONFIG_NETFILTER_XT_MATCH_CONNTRACK=y
+CONFIG_NETFILTER_XT_MATCH_HASHLIMIT=y
+CONFIG_NETFILTER_XT_MATCH_HELPER=y
+CONFIG_NETFILTER_XT_MATCH_IPRANGE=y
+CONFIG_NETFILTER_XT_MATCH_LENGTH=y
+CONFIG_NETFILTER_XT_MATCH_LIMIT=y
+CONFIG_NETFILTER_XT_MATCH_MAC=y
+CONFIG_NETFILTER_XT_MATCH_MARK=y
+CONFIG_NETFILTER_XT_MATCH_POLICY=y
+CONFIG_NETFILTER_XT_MATCH_PKTTYPE=y
+CONFIG_NETFILTER_XT_MATCH_QTAGUID=y
+CONFIG_NETFILTER_XT_MATCH_QUOTA=y
+CONFIG_NETFILTER_XT_MATCH_QUOTA2=y
+CONFIG_NETFILTER_XT_MATCH_SOCKET=y
+CONFIG_NETFILTER_XT_MATCH_STATE=y
+CONFIG_NETFILTER_XT_MATCH_STATISTIC=y
+CONFIG_NETFILTER_XT_MATCH_STRING=y
+CONFIG_NETFILTER_XT_MATCH_TIME=y
+CONFIG_NETFILTER_XT_MATCH_U32=y
+CONFIG_NF_CONNTRACK_IPV4=y
+CONFIG_IP_NF_IPTABLES=y
+CONFIG_IP_NF_MATCH_AH=y
+CONFIG_IP_NF_MATCH_ECN=y
+CONFIG_IP_NF_MATCH_TTL=y
+CONFIG_IP_NF_FILTER=y
+CONFIG_IP_NF_TARGET_REJECT=y
+CONFIG_IP_NF_MANGLE=y
+CONFIG_IP_NF_RAW=y
+CONFIG_IP_NF_SECURITY=y
+CONFIG_IP_NF_ARPTABLES=y
+CONFIG_IP_NF_ARPFILTER=y
+CONFIG_IP_NF_ARP_MANGLE=y
+CONFIG_NF_CONNTRACK_IPV6=y
+CONFIG_IP6_NF_IPTABLES=y
+CONFIG_IP6_NF_FILTER=y
+CONFIG_IP6_NF_TARGET_REJECT=y
+CONFIG_IP6_NF_MANGLE=y
+CONFIG_IP6_NF_RAW=y
+CONFIG_BRIDGE=y
+CONFIG_NET_SCHED=y
+CONFIG_NET_SCH_HTB=y
+CONFIG_NET_CLS_U32=y
+CONFIG_NET_EMATCH=y
+CONFIG_NET_EMATCH_U32=y
+CONFIG_NET_CLS_ACT=y
+# CONFIG_WIRELESS is not set
+CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
+CONFIG_MTD=y
+CONFIG_MTD_CMDLINE_PARTS=y
+CONFIG_MTD_BLOCK=y
+CONFIG_MTD_CFI=y
+CONFIG_MTD_CFI_INTELEXT=y
+CONFIG_MTD_CFI_AMDSTD=y
+CONFIG_BLK_DEV_LOOP=y
+CONFIG_BLK_DEV_RAM=y
+CONFIG_BLK_DEV_RAM_SIZE=8192
+CONFIG_VIRTIO_BLK=y
+CONFIG_MD=y
+CONFIG_BLK_DEV_DM=y
+CONFIG_DM_CRYPT=y
+CONFIG_DM_UEVENT=y
+CONFIG_DM_VERITY=y
+CONFIG_DM_VERITY_FEC=y
+CONFIG_NETDEVICES=y
+CONFIG_TUN=y
+CONFIG_VIRTIO_NET=y
+CONFIG_SMSC911X=y
+CONFIG_PPP=y
+CONFIG_PPP_BSDCOMP=y
+CONFIG_PPP_DEFLATE=y
+CONFIG_PPP_MPPE=y
+CONFIG_PPPOLAC=y
+CONFIG_PPPOPNS=y
+CONFIG_USB_USBNET=y
+# CONFIG_WLAN is not set
+CONFIG_INPUT_EVDEV=y
+CONFIG_INPUT_KEYRESET=y
+CONFIG_KEYBOARD_GOLDFISH_EVENTS=y
+# CONFIG_INPUT_MOUSE is not set
+CONFIG_INPUT_JOYSTICK=y
+CONFIG_JOYSTICK_XPAD=y
+CONFIG_JOYSTICK_XPAD_FF=y
+CONFIG_JOYSTICK_XPAD_LEDS=y
+CONFIG_INPUT_TABLET=y
+CONFIG_TABLET_USB_ACECAD=y
+CONFIG_TABLET_USB_AIPTEK=y
+CONFIG_TABLET_USB_GTCO=y
+CONFIG_TABLET_USB_HANWANG=y
+CONFIG_TABLET_USB_KBTAB=y
+CONFIG_INPUT_MISC=y
+CONFIG_INPUT_KEYCHORD=y
+CONFIG_INPUT_UINPUT=y
+CONFIG_INPUT_GPIO=y
+# CONFIG_SERIO_SERPORT is not set
+CONFIG_SERIO_AMBAKMI=y
+# CONFIG_VT is not set
+# CONFIG_LEGACY_PTYS is not set
+# CONFIG_DEVMEM is not set
+# CONFIG_DEVKMEM is not set
+CONFIG_SERIAL_AMBA_PL011=y
+CONFIG_SERIAL_AMBA_PL011_CONSOLE=y
+CONFIG_VIRTIO_CONSOLE=y
+# CONFIG_HW_RANDOM is not set
+# CONFIG_HWMON is not set
+CONFIG_MEDIA_SUPPORT=y
+CONFIG_FB=y
+CONFIG_FB_GOLDFISH=y
+CONFIG_FB_SIMPLE=y
+CONFIG_BACKLIGHT_LCD_SUPPORT=y
+CONFIG_LOGO=y
+# CONFIG_LOGO_LINUX_MONO is not set
+# CONFIG_LOGO_LINUX_VGA16 is not set
+CONFIG_SOUND=y
+CONFIG_SND=y
+CONFIG_HIDRAW=y
+CONFIG_UHID=y
+CONFIG_HID_A4TECH=y
+CONFIG_HID_ACRUX=y
+CONFIG_HID_ACRUX_FF=y
+CONFIG_HID_APPLE=y
+CONFIG_HID_BELKIN=y
+CONFIG_HID_CHERRY=y
+CONFIG_HID_CHICONY=y
+CONFIG_HID_PRODIKEYS=y
+CONFIG_HID_CYPRESS=y
+CONFIG_HID_DRAGONRISE=y
+CONFIG_DRAGONRISE_FF=y
+CONFIG_HID_EMS_FF=y
+CONFIG_HID_ELECOM=y
+CONFIG_HID_EZKEY=y
+CONFIG_HID_HOLTEK=y
+CONFIG_HID_KEYTOUCH=y
+CONFIG_HID_KYE=y
+CONFIG_HID_UCLOGIC=y
+CONFIG_HID_WALTOP=y
+CONFIG_HID_GYRATION=y
+CONFIG_HID_TWINHAN=y
+CONFIG_HID_KENSINGTON=y
+CONFIG_HID_LCPOWER=y
+CONFIG_HID_LOGITECH=y
+CONFIG_HID_LOGITECH_DJ=y
+CONFIG_LOGITECH_FF=y
+CONFIG_LOGIRUMBLEPAD2_FF=y
+CONFIG_LOGIG940_FF=y
+CONFIG_HID_MAGICMOUSE=y
+CONFIG_HID_MICROSOFT=y
+CONFIG_HID_MONTEREY=y
+CONFIG_HID_MULTITOUCH=y
+CONFIG_HID_NTRIG=y
+CONFIG_HID_ORTEK=y
+CONFIG_HID_PANTHERLORD=y
+CONFIG_PANTHERLORD_FF=y
+CONFIG_HID_PETALYNX=y
+CONFIG_HID_PICOLCD=y
+CONFIG_HID_PRIMAX=y
+CONFIG_HID_ROCCAT=y
+CONFIG_HID_SAITEK=y
+CONFIG_HID_SAMSUNG=y
+CONFIG_HID_SONY=y
+CONFIG_HID_SPEEDLINK=y
+CONFIG_HID_SUNPLUS=y
+CONFIG_HID_GREENASIA=y
+CONFIG_GREENASIA_FF=y
+CONFIG_HID_SMARTJOYPLUS=y
+CONFIG_SMARTJOYPLUS_FF=y
+CONFIG_HID_TIVO=y
+CONFIG_HID_TOPSEED=y
+CONFIG_HID_THRUSTMASTER=y
+CONFIG_HID_WACOM=y
+CONFIG_HID_WIIMOTE=y
+CONFIG_HID_ZEROPLUS=y
+CONFIG_HID_ZYDACRON=y
+CONFIG_USB_HIDDEV=y
+CONFIG_USB_ANNOUNCE_NEW_DEVICES=y
+CONFIG_USB_EHCI_HCD=y
+CONFIG_USB_OTG_WAKELOCK=y
+CONFIG_RTC_CLASS=y
+CONFIG_RTC_DRV_PL031=y
+CONFIG_VIRTIO_MMIO=y
+CONFIG_STAGING=y
+CONFIG_ASHMEM=y
+CONFIG_ANDROID_LOW_MEMORY_KILLER=y
+CONFIG_SYNC=y
+CONFIG_SW_SYNC=y
+CONFIG_SW_SYNC_USER=y
+CONFIG_ION=y
+CONFIG_GOLDFISH_AUDIO=y
+CONFIG_GOLDFISH=y
+CONFIG_GOLDFISH_PIPE=y
+CONFIG_ANDROID=y
+CONFIG_ANDROID_BINDER_IPC=y
+CONFIG_EXT4_FS=y
+CONFIG_EXT4_FS_SECURITY=y
+CONFIG_QUOTA=y
+CONFIG_FUSE_FS=y
+CONFIG_CUSE=y
+CONFIG_MSDOS_FS=y
+CONFIG_VFAT_FS=y
+CONFIG_TMPFS=y
+CONFIG_TMPFS_POSIX_ACL=y
+CONFIG_PSTORE=y
+CONFIG_PSTORE_CONSOLE=y
+CONFIG_PSTORE_RAM=y
+CONFIG_NFS_FS=y
+CONFIG_ROOT_NFS=y
+CONFIG_NLS_CODEPAGE_437=y
+CONFIG_NLS_ISO8859_1=y
+CONFIG_DEBUG_INFO=y
+CONFIG_MAGIC_SYSRQ=y
+CONFIG_DETECT_HUNG_TASK=y
+CONFIG_PANIC_TIMEOUT=5
+# CONFIG_SCHED_DEBUG is not set
+CONFIG_SCHEDSTATS=y
+CONFIG_TIMER_STATS=y
+CONFIG_ENABLE_DEFAULT_TRACERS=y
+CONFIG_SECURITY=y
+CONFIG_SECURITY_NETWORK=y
+CONFIG_SECURITY_SELINUX=y
+CONFIG_VIRTUALIZATION=y
diff --git a/arch/arm64/configs/ranchu64_defconfig b/arch/arm64/configs/ranchu64_defconfig
new file mode 100644
index 000000000000..00eb346e0928
--- /dev/null
+++ b/arch/arm64/configs/ranchu64_defconfig
@@ -0,0 +1,311 @@
+# CONFIG_LOCALVERSION_AUTO is not set
+# CONFIG_SWAP is not set
+CONFIG_POSIX_MQUEUE=y
+CONFIG_AUDIT=y
+CONFIG_NO_HZ=y
+CONFIG_HIGH_RES_TIMERS=y
+CONFIG_BSD_PROCESS_ACCT=y
+CONFIG_BSD_PROCESS_ACCT_V3=y
+CONFIG_TASKSTATS=y
+CONFIG_TASK_DELAY_ACCT=y
+CONFIG_TASK_XACCT=y
+CONFIG_TASK_IO_ACCOUNTING=y
+CONFIG_IKCONFIG=y
+CONFIG_IKCONFIG_PROC=y
+CONFIG_LOG_BUF_SHIFT=14
+CONFIG_CGROUP_DEBUG=y
+CONFIG_CGROUP_FREEZER=y
+CONFIG_CGROUP_CPUACCT=y
+CONFIG_RT_GROUP_SCHED=y
+CONFIG_SCHED_AUTOGROUP=y
+CONFIG_BLK_DEV_INITRD=y
+CONFIG_KALLSYMS_ALL=y
+CONFIG_EMBEDDED=y
+# CONFIG_COMPAT_BRK is not set
+CONFIG_PROFILING=y
+CONFIG_ARCH_MMAP_RND_BITS=24
+CONFIG_ARCH_MMAP_RND_COMPAT_BITS=16
+# CONFIG_BLK_DEV_BSG is not set
+# CONFIG_IOSCHED_DEADLINE is not set
+CONFIG_ARCH_VEXPRESS=y
+CONFIG_NR_CPUS=4
+CONFIG_PREEMPT=y
+CONFIG_KSM=y
+CONFIG_SECCOMP=y
+CONFIG_ARMV8_DEPRECATED=y
+CONFIG_SWP_EMULATION=y
+CONFIG_CP15_BARRIER_EMULATION=y
+CONFIG_SETEND_EMULATION=y
+CONFIG_CMDLINE="console=ttyAMA0"
+# CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set
+CONFIG_COMPAT=y
+CONFIG_PM_AUTOSLEEP=y
+CONFIG_PM_WAKELOCKS=y
+CONFIG_PM_WAKELOCKS_LIMIT=0
+# CONFIG_PM_WAKELOCKS_GC is not set
+CONFIG_PM_DEBUG=y
+CONFIG_NET=y
+CONFIG_PACKET=y
+CONFIG_UNIX=y
+CONFIG_XFRM_USER=y
+CONFIG_NET_KEY=y
+CONFIG_INET=y
+CONFIG_IP_MULTICAST=y
+CONFIG_IP_ADVANCED_ROUTER=y
+CONFIG_IP_MULTIPLE_TABLES=y
+CONFIG_IP_PNP=y
+CONFIG_IP_PNP_DHCP=y
+CONFIG_IP_PNP_BOOTP=y
+CONFIG_INET_ESP=y
+# CONFIG_INET_LRO is not set
+CONFIG_IPV6_ROUTER_PREF=y
+CONFIG_IPV6_ROUTE_INFO=y
+CONFIG_IPV6_OPTIMISTIC_DAD=y
+CONFIG_INET6_AH=y
+CONFIG_INET6_ESP=y
+CONFIG_INET6_IPCOMP=y
+CONFIG_IPV6_MIP6=y
+CONFIG_IPV6_MULTIPLE_TABLES=y
+CONFIG_NETFILTER=y
+CONFIG_NF_CONNTRACK=y
+CONFIG_NF_CONNTRACK_SECMARK=y
+CONFIG_NF_CONNTRACK_EVENTS=y
+CONFIG_NF_CT_PROTO_DCCP=y
+CONFIG_NF_CT_PROTO_SCTP=y
+CONFIG_NF_CT_PROTO_UDPLITE=y
+CONFIG_NF_CONNTRACK_AMANDA=y
+CONFIG_NF_CONNTRACK_FTP=y
+CONFIG_NF_CONNTRACK_H323=y
+CONFIG_NF_CONNTRACK_IRC=y
+CONFIG_NF_CONNTRACK_NETBIOS_NS=y
+CONFIG_NF_CONNTRACK_PPTP=y
+CONFIG_NF_CONNTRACK_SANE=y
+CONFIG_NF_CONNTRACK_TFTP=y
+CONFIG_NF_CT_NETLINK=y
+CONFIG_NETFILTER_XT_TARGET_CLASSIFY=y
+CONFIG_NETFILTER_XT_TARGET_CONNMARK=y
+CONFIG_NETFILTER_XT_TARGET_CONNSECMARK=y
+CONFIG_NETFILTER_XT_TARGET_IDLETIMER=y
+CONFIG_NETFILTER_XT_TARGET_MARK=y
+CONFIG_NETFILTER_XT_TARGET_NFLOG=y
+CONFIG_NETFILTER_XT_TARGET_NFQUEUE=y
+CONFIG_NETFILTER_XT_TARGET_TPROXY=y
+CONFIG_NETFILTER_XT_TARGET_TRACE=y
+CONFIG_NETFILTER_XT_TARGET_SECMARK=y
+CONFIG_NETFILTER_XT_TARGET_TCPMSS=y
+CONFIG_NETFILTER_XT_MATCH_COMMENT=y
+CONFIG_NETFILTER_XT_MATCH_CONNLIMIT=y
+CONFIG_NETFILTER_XT_MATCH_CONNMARK=y
+CONFIG_NETFILTER_XT_MATCH_CONNTRACK=y
+CONFIG_NETFILTER_XT_MATCH_HASHLIMIT=y
+CONFIG_NETFILTER_XT_MATCH_HELPER=y
+CONFIG_NETFILTER_XT_MATCH_IPRANGE=y
+CONFIG_NETFILTER_XT_MATCH_LENGTH=y
+CONFIG_NETFILTER_XT_MATCH_LIMIT=y
+CONFIG_NETFILTER_XT_MATCH_MAC=y
+CONFIG_NETFILTER_XT_MATCH_MARK=y
+CONFIG_NETFILTER_XT_MATCH_POLICY=y
+CONFIG_NETFILTER_XT_MATCH_PKTTYPE=y
+CONFIG_NETFILTER_XT_MATCH_QTAGUID=y
+CONFIG_NETFILTER_XT_MATCH_QUOTA=y
+CONFIG_NETFILTER_XT_MATCH_QUOTA2=y
+CONFIG_NETFILTER_XT_MATCH_SOCKET=y
+CONFIG_NETFILTER_XT_MATCH_STATE=y
+CONFIG_NETFILTER_XT_MATCH_STATISTIC=y
+CONFIG_NETFILTER_XT_MATCH_STRING=y
+CONFIG_NETFILTER_XT_MATCH_TIME=y
+CONFIG_NETFILTER_XT_MATCH_U32=y
+CONFIG_NF_CONNTRACK_IPV4=y
+CONFIG_IP_NF_IPTABLES=y
+CONFIG_IP_NF_MATCH_AH=y
+CONFIG_IP_NF_MATCH_ECN=y
+CONFIG_IP_NF_MATCH_RPFILTER=y
+CONFIG_IP_NF_MATCH_TTL=y
+CONFIG_IP_NF_FILTER=y
+CONFIG_IP_NF_TARGET_REJECT=y
+CONFIG_IP_NF_MANGLE=y
+CONFIG_IP_NF_TARGET_ECN=y
+CONFIG_IP_NF_TARGET_TTL=y
+CONFIG_IP_NF_RAW=y
+CONFIG_IP_NF_SECURITY=y
+CONFIG_IP_NF_ARPTABLES=y
+CONFIG_IP_NF_ARPFILTER=y
+CONFIG_IP_NF_ARP_MANGLE=y
+CONFIG_NF_CONNTRACK_IPV6=y
+CONFIG_IP6_NF_IPTABLES=y
+CONFIG_IP6_NF_MATCH_AH=y
+CONFIG_IP6_NF_MATCH_EUI64=y
+CONFIG_IP6_NF_MATCH_FRAG=y
+CONFIG_IP6_NF_MATCH_OPTS=y
+CONFIG_IP6_NF_MATCH_HL=y
+CONFIG_IP6_NF_MATCH_IPV6HEADER=y
+CONFIG_IP6_NF_MATCH_MH=y
+CONFIG_IP6_NF_MATCH_RT=y
+CONFIG_IP6_NF_TARGET_HL=y
+CONFIG_IP6_NF_FILTER=y
+CONFIG_IP6_NF_TARGET_REJECT=y
+CONFIG_IP6_NF_MANGLE=y
+CONFIG_IP6_NF_RAW=y
+CONFIG_BRIDGE=y
+CONFIG_NET_SCHED=y
+CONFIG_NET_SCH_HTB=y
+CONFIG_NET_CLS_U32=y
+CONFIG_NET_EMATCH=y
+CONFIG_NET_EMATCH_U32=y
+CONFIG_NET_CLS_ACT=y
+# CONFIG_WIRELESS is not set
+CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
+CONFIG_BLK_DEV_LOOP=y
+CONFIG_BLK_DEV_RAM=y
+CONFIG_BLK_DEV_RAM_SIZE=8192
+CONFIG_VIRTIO_BLK=y
+CONFIG_SCSI=y
+# CONFIG_SCSI_PROC_FS is not set
+CONFIG_BLK_DEV_SD=y
+# CONFIG_SCSI_LOWLEVEL is not set
+CONFIG_MD=y
+CONFIG_BLK_DEV_DM=y
+CONFIG_DM_CRYPT=y
+CONFIG_DM_UEVENT=y
+CONFIG_DM_VERITY=y
+CONFIG_DM_VERITY_FEC=y
+CONFIG_NETDEVICES=y
+CONFIG_TUN=y
+CONFIG_VIRTIO_NET=y
+CONFIG_SMC91X=y
+CONFIG_PPP=y
+CONFIG_PPP_BSDCOMP=y
+CONFIG_PPP_DEFLATE=y
+CONFIG_PPP_MPPE=y
+CONFIG_PPPOLAC=y
+CONFIG_PPPOPNS=y
+# CONFIG_WLAN is not set
+CONFIG_INPUT_EVDEV=y
+CONFIG_INPUT_KEYRESET=y
+CONFIG_KEYBOARD_GOLDFISH_EVENTS=y
+# CONFIG_INPUT_MOUSE is not set
+CONFIG_INPUT_JOYSTICK=y
+CONFIG_INPUT_TABLET=y
+CONFIG_INPUT_MISC=y
+CONFIG_INPUT_KEYCHORD=y
+CONFIG_INPUT_UINPUT=y
+CONFIG_INPUT_GPIO=y
+# CONFIG_SERIO_SERPORT is not set
+# CONFIG_VT is not set
+# CONFIG_LEGACY_PTYS is not set
+# CONFIG_DEVMEM is not set
+# CONFIG_DEVKMEM is not set
+CONFIG_SERIAL_AMBA_PL011=y
+CONFIG_SERIAL_AMBA_PL011_CONSOLE=y
+CONFIG_VIRTIO_CONSOLE=y
+# CONFIG_HW_RANDOM is not set
+CONFIG_BATTERY_GOLDFISH=y
+# CONFIG_HWMON is not set
+CONFIG_MEDIA_SUPPORT=y
+CONFIG_FB=y
+CONFIG_FB_GOLDFISH=y
+CONFIG_FB_SIMPLE=y
+CONFIG_BACKLIGHT_LCD_SUPPORT=y
+CONFIG_LOGO=y
+# CONFIG_LOGO_LINUX_MONO is not set
+# CONFIG_LOGO_LINUX_VGA16 is not set
+CONFIG_SOUND=y
+CONFIG_SND=y
+CONFIG_HIDRAW=y
+CONFIG_UHID=y
+CONFIG_HID_A4TECH=y
+CONFIG_HID_ACRUX=y
+CONFIG_HID_ACRUX_FF=y
+CONFIG_HID_APPLE=y
+CONFIG_HID_BELKIN=y
+CONFIG_HID_CHERRY=y
+CONFIG_HID_CHICONY=y
+CONFIG_HID_PRODIKEYS=y
+CONFIG_HID_CYPRESS=y
+CONFIG_HID_DRAGONRISE=y
+CONFIG_DRAGONRISE_FF=y
+CONFIG_HID_EMS_FF=y
+CONFIG_HID_ELECOM=y
+CONFIG_HID_EZKEY=y
+CONFIG_HID_KEYTOUCH=y
+CONFIG_HID_KYE=y
+CONFIG_HID_WALTOP=y
+CONFIG_HID_GYRATION=y
+CONFIG_HID_TWINHAN=y
+CONFIG_HID_KENSINGTON=y
+CONFIG_HID_LCPOWER=y
+CONFIG_HID_LOGITECH=y
+CONFIG_HID_LOGITECH_DJ=y
+CONFIG_LOGITECH_FF=y
+CONFIG_LOGIRUMBLEPAD2_FF=y
+CONFIG_LOGIG940_FF=y
+CONFIG_HID_MAGICMOUSE=y
+CONFIG_HID_MICROSOFT=y
+CONFIG_HID_MONTEREY=y
+CONFIG_HID_MULTITOUCH=y
+CONFIG_HID_ORTEK=y
+CONFIG_HID_PANTHERLORD=y
+CONFIG_PANTHERLORD_FF=y
+CONFIG_HID_PETALYNX=y
+CONFIG_HID_PICOLCD=y
+CONFIG_HID_PRIMAX=y
+CONFIG_HID_SAITEK=y
+CONFIG_HID_SAMSUNG=y
+CONFIG_HID_SPEEDLINK=y
+CONFIG_HID_SUNPLUS=y
+CONFIG_HID_GREENASIA=y
+CONFIG_GREENASIA_FF=y
+CONFIG_HID_SMARTJOYPLUS=y
+CONFIG_SMARTJOYPLUS_FF=y
+CONFIG_HID_TIVO=y
+CONFIG_HID_TOPSEED=y
+CONFIG_HID_THRUSTMASTER=y
+CONFIG_HID_WACOM=y
+CONFIG_HID_WIIMOTE=y
+CONFIG_HID_ZEROPLUS=y
+CONFIG_HID_ZYDACRON=y
+# CONFIG_USB_SUPPORT is not set
+CONFIG_RTC_CLASS=y
+CONFIG_VIRTIO_MMIO=y
+CONFIG_STAGING=y
+CONFIG_ASHMEM=y
+CONFIG_ANDROID_TIMED_GPIO=y
+CONFIG_ANDROID_LOW_MEMORY_KILLER=y
+CONFIG_SYNC=y
+CONFIG_SW_SYNC=y
+CONFIG_SW_SYNC_USER=y
+CONFIG_ION=y
+CONFIG_GOLDFISH_AUDIO=y
+CONFIG_GOLDFISH=y
+CONFIG_GOLDFISH_PIPE=y
+# CONFIG_IOMMU_SUPPORT is not set
+CONFIG_ANDROID=y
+CONFIG_ANDROID_BINDER_IPC=y
+CONFIG_EXT2_FS=y
+CONFIG_EXT4_FS=y
+CONFIG_EXT4_FS_SECURITY=y
+CONFIG_QUOTA=y
+CONFIG_FUSE_FS=y
+CONFIG_CUSE=y
+CONFIG_MSDOS_FS=y
+CONFIG_VFAT_FS=y
+CONFIG_TMPFS=y
+CONFIG_TMPFS_POSIX_ACL=y
+# CONFIG_MISC_FILESYSTEMS is not set
+CONFIG_NFS_FS=y
+CONFIG_ROOT_NFS=y
+CONFIG_NLS_CODEPAGE_437=y
+CONFIG_NLS_ISO8859_1=y
+CONFIG_DEBUG_INFO=y
+CONFIG_DEBUG_FS=y
+CONFIG_MAGIC_SYSRQ=y
+CONFIG_PANIC_TIMEOUT=5
+# CONFIG_SCHED_DEBUG is not set
+CONFIG_SCHEDSTATS=y
+CONFIG_TIMER_STATS=y
+# CONFIG_FTRACE is not set
+CONFIG_ATOMIC64_SELFTEST=y
+CONFIG_DEBUG_RODATA=y
+CONFIG_SECURITY=y
+CONFIG_SECURITY_NETWORK=y
+CONFIG_SECURITY_SELINUX=y
diff --git a/arch/parisc/kernel/cache.c b/arch/parisc/kernel/cache.c
index cda6dbbe9842..fd5979f28ada 100644
--- a/arch/parisc/kernel/cache.c
+++ b/arch/parisc/kernel/cache.c
@@ -351,6 +351,7 @@ void __init parisc_setup_cache_timing(void)
 {
 	unsigned long rangetime, alltime;
 	unsigned long size, start;
+	unsigned long threshold;
 
 	alltime = mfctl(16);
 	flush_data_cache();
@@ -364,17 +365,12 @@ void __init parisc_setup_cache_timing(void)
 	printk(KERN_DEBUG "Whole cache flush %lu cycles, flushing %lu bytes %lu cycles\n",
 		alltime, size, rangetime);
 
-	/* Racy, but if we see an intermediate value, it's ok too... */
-	parisc_cache_flush_threshold = size * alltime / rangetime;
-
-	parisc_cache_flush_threshold = L1_CACHE_ALIGN(parisc_cache_flush_threshold);
-	if (!parisc_cache_flush_threshold)
-		parisc_cache_flush_threshold = FLUSH_THRESHOLD;
-
-	if (parisc_cache_flush_threshold > cache_info.dc_size)
-		parisc_cache_flush_threshold = cache_info.dc_size;
-
-	printk(KERN_INFO "Setting cache flush threshold to %lu kB\n",
+	threshold = L1_CACHE_ALIGN(size * alltime / rangetime);
+	if (threshold > cache_info.dc_size)
+		threshold = cache_info.dc_size;
+	if (threshold)
+		parisc_cache_flush_threshold = threshold;
+	printk(KERN_INFO "Cache flush threshold set to %lu KiB\n",
 		parisc_cache_flush_threshold/1024);
 
 	/* calculate TLB flush threshold */
@@ -383,7 +379,7 @@ void __init parisc_setup_cache_timing(void)
 	flush_tlb_all();
 	alltime = mfctl(16) - alltime;
 
-	size = PAGE_SIZE;
+	size = 0;
 	start = (unsigned long) _text;
 	rangetime = mfctl(16);
 	while (start < (unsigned long) _end) {
@@ -396,13 +392,10 @@ void __init parisc_setup_cache_timing(void)
 	printk(KERN_DEBUG "Whole TLB flush %lu cycles, flushing %lu bytes %lu cycles\n",
 		alltime, size, rangetime);
 
-	parisc_tlb_flush_threshold = size * alltime / rangetime;
-	parisc_tlb_flush_threshold *= num_online_cpus();
-	parisc_tlb_flush_threshold = PAGE_ALIGN(parisc_tlb_flush_threshold);
-	if (!parisc_tlb_flush_threshold)
-		parisc_tlb_flush_threshold = FLUSH_TLB_THRESHOLD;
-
-	printk(KERN_INFO "Setting TLB flush threshold to %lu kB\n",
+	threshold = PAGE_ALIGN(num_online_cpus() * size * alltime / rangetime);
+	if (threshold)
+		parisc_tlb_flush_threshold = threshold;
+	printk(KERN_INFO "TLB flush threshold set to %lu KiB\n",
 		parisc_tlb_flush_threshold/1024);
 }
 
diff --git a/arch/parisc/kernel/pacache.S b/arch/parisc/kernel/pacache.S
index b743a80eaba0..675521919229 100644
--- a/arch/parisc/kernel/pacache.S
+++ b/arch/parisc/kernel/pacache.S
@@ -96,7 +96,7 @@ fitmanyloop:					/* Loop if LOOP >= 2 */
 
 fitmanymiddle:					/* Loop if LOOP >= 2 */
 	addib,COND(>)		-1, %r31, fitmanymiddle	/* Adjusted inner loop decr */
-	pitlbe		0(%sr1, %r28)
+	pitlbe		%r0(%sr1, %r28)
 	pitlbe,m	%arg1(%sr1, %r28)	/* Last pitlbe and addr adjust */
 	addib,COND(>)		-1, %r29, fitmanymiddle	/* Middle loop decr */
 	copy		%arg3, %r31		/* Re-init inner loop count */
@@ -139,7 +139,7 @@ fdtmanyloop:					/* Loop if LOOP >= 2 */
 
 fdtmanymiddle:					/* Loop if LOOP >= 2 */
 	addib,COND(>)		-1, %r31, fdtmanymiddle	/* Adjusted inner loop decr */
-	pdtlbe		0(%sr1, %r28)
+	pdtlbe		%r0(%sr1, %r28)
 	pdtlbe,m	%arg1(%sr1, %r28)	/* Last pdtlbe and addr adjust */
 	addib,COND(>)		-1, %r29, fdtmanymiddle	/* Middle loop decr */
 	copy		%arg3, %r31		/* Re-init inner loop count */
@@ -620,12 +620,12 @@ ENTRY(copy_user_page_asm)
 	/* Purge any old translations */
 
 #ifdef CONFIG_PA20
-	pdtlb,l		0(%r28)
-	pdtlb,l		0(%r29)
+	pdtlb,l		%r0(%r28)
+	pdtlb,l		%r0(%r29)
 #else
 	tlb_lock	%r20,%r21,%r22
-	pdtlb		0(%r28)
-	pdtlb		0(%r29)
+	pdtlb		%r0(%r28)
+	pdtlb		%r0(%r29)
 	tlb_unlock	%r20,%r21,%r22
 #endif
 
@@ -768,10 +768,10 @@ ENTRY(clear_user_page_asm)
 	/* Purge any old translation */
 
 #ifdef CONFIG_PA20
-	pdtlb,l		0(%r28)
+	pdtlb,l		%r0(%r28)
 #else
 	tlb_lock	%r20,%r21,%r22
-	pdtlb		0(%r28)
+	pdtlb		%r0(%r28)
 	tlb_unlock	%r20,%r21,%r22
 #endif
 
@@ -852,10 +852,10 @@ ENTRY(flush_dcache_page_asm)
 	/* Purge any old translation */
 
 #ifdef CONFIG_PA20
-	pdtlb,l		0(%r28)
+	pdtlb,l		%r0(%r28)
 #else
 	tlb_lock	%r20,%r21,%r22
-	pdtlb		0(%r28)
+	pdtlb		%r0(%r28)
 	tlb_unlock	%r20,%r21,%r22
 #endif
 
@@ -892,10 +892,10 @@ ENTRY(flush_dcache_page_asm)
 	sync
 
 #ifdef CONFIG_PA20
-	pdtlb,l		0(%r25)
+	pdtlb,l		%r0(%r25)
 #else
 	tlb_lock	%r20,%r21,%r22
-	pdtlb		0(%r25)
+	pdtlb		%r0(%r25)
 	tlb_unlock	%r20,%r21,%r22
 #endif
 
@@ -925,13 +925,18 @@ ENTRY(flush_icache_page_asm)
 	depwi		0, 31,PAGE_SHIFT, %r28	/* Clear any offset bits */
 #endif
 
-	/* Purge any old translation */
+	/* Purge any old translation.  Note that the FIC instruction
+	 * may use either the instruction or data TLB.  Given that we
+	 * have a flat address space, it's not clear which TLB will be
+	 * used.  So, we purge both entries.  */
 
 #ifdef CONFIG_PA20
+	pdtlb,l		%r0(%r28)
 	pitlb,l         %r0(%sr4,%r28)
 #else
 	tlb_lock        %r20,%r21,%r22
-	pitlb           (%sr4,%r28)
+	pdtlb		%r0(%r28)
+	pitlb           %r0(%sr4,%r28)
 	tlb_unlock      %r20,%r21,%r22
 #endif
 
@@ -970,10 +975,12 @@ ENTRY(flush_icache_page_asm)
 	sync
 
 #ifdef CONFIG_PA20
+	pdtlb,l		%r0(%r28)
 	pitlb,l         %r0(%sr4,%r25)
 #else
 	tlb_lock        %r20,%r21,%r22
-	pitlb           (%sr4,%r25)
+	pdtlb		%r0(%r28)
+	pitlb           %r0(%sr4,%r25)
 	tlb_unlock      %r20,%r21,%r22
 #endif
 
diff --git a/arch/parisc/kernel/pci-dma.c b/arch/parisc/kernel/pci-dma.c
index b9402c9b3454..af0d7fae7aa7 100644
--- a/arch/parisc/kernel/pci-dma.c
+++ b/arch/parisc/kernel/pci-dma.c
@@ -95,8 +95,8 @@ static inline int map_pte_uncached(pte_t * pte,
 
 		if (!pte_none(*pte))
 			printk(KERN_ERR "map_pte_uncached: page already exists\n");
-		set_pte(pte, __mk_pte(*paddr_ptr, PAGE_KERNEL_UNC));
 		purge_tlb_start(flags);
+		set_pte(pte, __mk_pte(*paddr_ptr, PAGE_KERNEL_UNC));
 		pdtlb_kernel(orig_vaddr);
 		purge_tlb_end(flags);
 		vaddr += PAGE_SIZE;
diff --git a/arch/parisc/kernel/setup.c b/arch/parisc/kernel/setup.c
index 81d6f6391944..2e66a887788e 100644
--- a/arch/parisc/kernel/setup.c
+++ b/arch/parisc/kernel/setup.c
@@ -334,6 +334,10 @@ static int __init parisc_init(void)
 	/* tell PDC we're Linux. Nevermind failure. */
 	pdc_stable_write(0x40, &osid, sizeof(osid));
 	
+	/* start with known state */
+	flush_cache_all_local();
+	flush_tlb_all_local(NULL);
+
 	processor_init();
 #ifdef CONFIG_SMP
 	pr_info("CPU(s): %d out of %d %s at %d.%06d MHz online\n",
diff --git a/arch/tile/kernel/time.c b/arch/tile/kernel/time.c
index 178989e6d3e3..ea960d660917 100644
--- a/arch/tile/kernel/time.c
+++ b/arch/tile/kernel/time.c
@@ -218,8 +218,8 @@ void do_timer_interrupt(struct pt_regs *regs, int fault_num)
  */
 unsigned long long sched_clock(void)
 {
-	return clocksource_cyc2ns(get_cycles(),
-				  sched_clock_mult, SCHED_CLOCK_SHIFT);
+	return mult_frac(get_cycles(),
+			 sched_clock_mult, 1ULL << SCHED_CLOCK_SHIFT);
 }
 
 int setup_profiling_timer(unsigned int multiplier)
diff --git a/arch/x86/Makefile b/arch/x86/Makefile
index 4086abca0b32..53949c886341 100644
--- a/arch/x86/Makefile
+++ b/arch/x86/Makefile
@@ -97,6 +97,8 @@ else
         KBUILD_CFLAGS += $(call cc-option,-mno-80387)
         KBUILD_CFLAGS += $(call cc-option,-mno-fp-ret-in-387)
 
+        KBUILD_CFLAGS += -fno-pic
+
 	# Use -mpreferred-stack-boundary=3 if supported.
 	KBUILD_CFLAGS += $(call cc-option,-mpreferred-stack-boundary=3)
 
diff --git a/arch/x86/configs/i386_ranchu_defconfig b/arch/x86/configs/i386_ranchu_defconfig
new file mode 100644
index 000000000000..0206eb8cfb61
--- /dev/null
+++ b/arch/x86/configs/i386_ranchu_defconfig
@@ -0,0 +1,423 @@
+# CONFIG_64BIT is not set
+# CONFIG_LOCALVERSION_AUTO is not set
+CONFIG_POSIX_MQUEUE=y
+CONFIG_AUDIT=y
+CONFIG_NO_HZ=y
+CONFIG_HIGH_RES_TIMERS=y
+CONFIG_BSD_PROCESS_ACCT=y
+CONFIG_TASKSTATS=y
+CONFIG_TASK_DELAY_ACCT=y
+CONFIG_TASK_XACCT=y
+CONFIG_TASK_IO_ACCOUNTING=y
+CONFIG_CGROUPS=y
+CONFIG_CGROUP_DEBUG=y
+CONFIG_CGROUP_FREEZER=y
+CONFIG_CGROUP_CPUACCT=y
+CONFIG_CGROUP_SCHED=y
+CONFIG_RT_GROUP_SCHED=y
+CONFIG_BLK_DEV_INITRD=y
+CONFIG_CC_OPTIMIZE_FOR_SIZE=y
+CONFIG_SYSCTL_SYSCALL=y
+CONFIG_KALLSYMS_ALL=y
+CONFIG_EMBEDDED=y
+# CONFIG_COMPAT_BRK is not set
+CONFIG_ARCH_MMAP_RND_BITS=16
+CONFIG_PARTITION_ADVANCED=y
+CONFIG_OSF_PARTITION=y
+CONFIG_AMIGA_PARTITION=y
+CONFIG_MAC_PARTITION=y
+CONFIG_BSD_DISKLABEL=y
+CONFIG_MINIX_SUBPARTITION=y
+CONFIG_SOLARIS_X86_PARTITION=y
+CONFIG_UNIXWARE_DISKLABEL=y
+CONFIG_SGI_PARTITION=y
+CONFIG_SUN_PARTITION=y
+CONFIG_KARMA_PARTITION=y
+CONFIG_SMP=y
+CONFIG_X86_BIGSMP=y
+CONFIG_MCORE2=y
+CONFIG_X86_GENERIC=y
+CONFIG_HPET_TIMER=y
+CONFIG_NR_CPUS=512
+CONFIG_PREEMPT=y
+# CONFIG_X86_MCE is not set
+CONFIG_X86_REBOOTFIXUPS=y
+CONFIG_X86_MSR=y
+CONFIG_X86_CPUID=y
+CONFIG_KSM=y
+CONFIG_CMA=y
+# CONFIG_MTRR_SANITIZER is not set
+CONFIG_EFI=y
+CONFIG_EFI_STUB=y
+CONFIG_HZ_100=y
+CONFIG_PHYSICAL_START=0x100000
+CONFIG_PM_AUTOSLEEP=y
+CONFIG_PM_WAKELOCKS=y
+CONFIG_PM_WAKELOCKS_LIMIT=0
+# CONFIG_PM_WAKELOCKS_GC is not set
+CONFIG_PM_DEBUG=y
+CONFIG_CPU_FREQ=y
+# CONFIG_CPU_FREQ_STAT is not set
+CONFIG_CPU_FREQ_DEFAULT_GOV_ONDEMAND=y
+CONFIG_CPU_FREQ_GOV_USERSPACE=y
+CONFIG_PCIEPORTBUS=y
+# CONFIG_PCIEASPM is not set
+CONFIG_PCCARD=y
+CONFIG_YENTA=y
+CONFIG_HOTPLUG_PCI=y
+# CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set
+CONFIG_BINFMT_MISC=y
+CONFIG_NET=y
+CONFIG_PACKET=y
+CONFIG_UNIX=y
+CONFIG_XFRM_USER=y
+CONFIG_NET_KEY=y
+CONFIG_INET=y
+CONFIG_IP_MULTICAST=y
+CONFIG_IP_ADVANCED_ROUTER=y
+CONFIG_IP_MULTIPLE_TABLES=y
+CONFIG_IP_ROUTE_MULTIPATH=y
+CONFIG_IP_ROUTE_VERBOSE=y
+CONFIG_IP_PNP=y
+CONFIG_IP_PNP_DHCP=y
+CONFIG_IP_PNP_BOOTP=y
+CONFIG_IP_PNP_RARP=y
+CONFIG_IP_MROUTE=y
+CONFIG_IP_PIMSM_V1=y
+CONFIG_IP_PIMSM_V2=y
+CONFIG_SYN_COOKIES=y
+CONFIG_INET_ESP=y
+# CONFIG_INET_XFRM_MODE_BEET is not set
+# CONFIG_INET_LRO is not set
+# CONFIG_INET_DIAG is not set
+CONFIG_IPV6_ROUTER_PREF=y
+CONFIG_IPV6_ROUTE_INFO=y
+CONFIG_IPV6_OPTIMISTIC_DAD=y
+CONFIG_INET6_AH=y
+CONFIG_INET6_ESP=y
+CONFIG_INET6_IPCOMP=y
+CONFIG_IPV6_MIP6=y
+CONFIG_IPV6_MULTIPLE_TABLES=y
+CONFIG_NETLABEL=y
+CONFIG_NETFILTER=y
+CONFIG_NF_CONNTRACK=y
+CONFIG_NF_CONNTRACK_SECMARK=y
+CONFIG_NF_CONNTRACK_EVENTS=y
+CONFIG_NF_CT_PROTO_DCCP=y
+CONFIG_NF_CT_PROTO_SCTP=y
+CONFIG_NF_CT_PROTO_UDPLITE=y
+CONFIG_NF_CONNTRACK_AMANDA=y
+CONFIG_NF_CONNTRACK_FTP=y
+CONFIG_NF_CONNTRACK_H323=y
+CONFIG_NF_CONNTRACK_IRC=y
+CONFIG_NF_CONNTRACK_NETBIOS_NS=y
+CONFIG_NF_CONNTRACK_PPTP=y
+CONFIG_NF_CONNTRACK_SANE=y
+CONFIG_NF_CONNTRACK_TFTP=y
+CONFIG_NF_CT_NETLINK=y
+CONFIG_NETFILTER_XT_TARGET_CLASSIFY=y
+CONFIG_NETFILTER_XT_TARGET_CONNMARK=y
+CONFIG_NETFILTER_XT_TARGET_CONNSECMARK=y
+CONFIG_NETFILTER_XT_TARGET_IDLETIMER=y
+CONFIG_NETFILTER_XT_TARGET_MARK=y
+CONFIG_NETFILTER_XT_TARGET_NFLOG=y
+CONFIG_NETFILTER_XT_TARGET_NFQUEUE=y
+CONFIG_NETFILTER_XT_TARGET_TPROXY=y
+CONFIG_NETFILTER_XT_TARGET_TRACE=y
+CONFIG_NETFILTER_XT_TARGET_SECMARK=y
+CONFIG_NETFILTER_XT_TARGET_TCPMSS=y
+CONFIG_NETFILTER_XT_MATCH_COMMENT=y
+CONFIG_NETFILTER_XT_MATCH_CONNLIMIT=y
+CONFIG_NETFILTER_XT_MATCH_CONNMARK=y
+CONFIG_NETFILTER_XT_MATCH_CONNTRACK=y
+CONFIG_NETFILTER_XT_MATCH_HASHLIMIT=y
+CONFIG_NETFILTER_XT_MATCH_HELPER=y
+CONFIG_NETFILTER_XT_MATCH_IPRANGE=y
+CONFIG_NETFILTER_XT_MATCH_LENGTH=y
+CONFIG_NETFILTER_XT_MATCH_LIMIT=y
+CONFIG_NETFILTER_XT_MATCH_MAC=y
+CONFIG_NETFILTER_XT_MATCH_MARK=y
+CONFIG_NETFILTER_XT_MATCH_POLICY=y
+CONFIG_NETFILTER_XT_MATCH_PKTTYPE=y
+CONFIG_NETFILTER_XT_MATCH_QTAGUID=y
+CONFIG_NETFILTER_XT_MATCH_QUOTA=y
+CONFIG_NETFILTER_XT_MATCH_QUOTA2=y
+CONFIG_NETFILTER_XT_MATCH_SOCKET=y
+CONFIG_NETFILTER_XT_MATCH_STATE=y
+CONFIG_NETFILTER_XT_MATCH_STATISTIC=y
+CONFIG_NETFILTER_XT_MATCH_STRING=y
+CONFIG_NETFILTER_XT_MATCH_TIME=y
+CONFIG_NETFILTER_XT_MATCH_U32=y
+CONFIG_NF_CONNTRACK_IPV4=y
+CONFIG_IP_NF_IPTABLES=y
+CONFIG_IP_NF_MATCH_AH=y
+CONFIG_IP_NF_MATCH_ECN=y
+CONFIG_IP_NF_MATCH_TTL=y
+CONFIG_IP_NF_FILTER=y
+CONFIG_IP_NF_TARGET_REJECT=y
+CONFIG_IP_NF_MANGLE=y
+CONFIG_IP_NF_RAW=y
+CONFIG_IP_NF_SECURITY=y
+CONFIG_IP_NF_ARPTABLES=y
+CONFIG_IP_NF_ARPFILTER=y
+CONFIG_IP_NF_ARP_MANGLE=y
+CONFIG_NF_CONNTRACK_IPV6=y
+CONFIG_IP6_NF_IPTABLES=y
+CONFIG_IP6_NF_FILTER=y
+CONFIG_IP6_NF_TARGET_REJECT=y
+CONFIG_IP6_NF_MANGLE=y
+CONFIG_IP6_NF_RAW=y
+CONFIG_NET_SCHED=y
+CONFIG_NET_SCH_HTB=y
+CONFIG_NET_CLS_U32=y
+CONFIG_NET_EMATCH=y
+CONFIG_NET_EMATCH_U32=y
+CONFIG_NET_CLS_ACT=y
+CONFIG_CFG80211=y
+CONFIG_MAC80211=y
+CONFIG_MAC80211_LEDS=y
+CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
+CONFIG_DMA_CMA=y
+CONFIG_CMA_SIZE_MBYTES=16
+CONFIG_CONNECTOR=y
+CONFIG_BLK_DEV_LOOP=y
+CONFIG_BLK_DEV_RAM=y
+CONFIG_BLK_DEV_RAM_SIZE=8192
+CONFIG_VIRTIO_BLK=y
+CONFIG_BLK_DEV_SD=y
+CONFIG_BLK_DEV_SR=y
+CONFIG_BLK_DEV_SR_VENDOR=y
+CONFIG_CHR_DEV_SG=y
+CONFIG_SCSI_CONSTANTS=y
+CONFIG_SCSI_SPI_ATTRS=y
+CONFIG_SCSI_ISCSI_ATTRS=y
+# CONFIG_SCSI_LOWLEVEL is not set
+CONFIG_ATA=y
+CONFIG_SATA_AHCI=y
+CONFIG_ATA_PIIX=y
+CONFIG_PATA_AMD=y
+CONFIG_PATA_OLDPIIX=y
+CONFIG_PATA_SCH=y
+CONFIG_PATA_MPIIX=y
+CONFIG_ATA_GENERIC=y
+CONFIG_MD=y
+CONFIG_BLK_DEV_MD=y
+CONFIG_BLK_DEV_DM=y
+CONFIG_DM_DEBUG=y
+CONFIG_DM_CRYPT=y
+CONFIG_DM_MIRROR=y
+CONFIG_DM_ZERO=y
+CONFIG_DM_UEVENT=y
+CONFIG_DM_VERITY=y
+CONFIG_DM_VERITY_FEC=y
+CONFIG_NETDEVICES=y
+CONFIG_NETCONSOLE=y
+CONFIG_TUN=y
+CONFIG_VIRTIO_NET=y
+CONFIG_BNX2=y
+CONFIG_TIGON3=y
+CONFIG_NET_TULIP=y
+CONFIG_E100=y
+CONFIG_E1000=y
+CONFIG_E1000E=y
+CONFIG_SKY2=y
+CONFIG_NE2K_PCI=y
+CONFIG_FORCEDETH=y
+CONFIG_8139TOO=y
+# CONFIG_8139TOO_PIO is not set
+CONFIG_R8169=y
+CONFIG_FDDI=y
+CONFIG_PPP=y
+CONFIG_PPP_BSDCOMP=y
+CONFIG_PPP_DEFLATE=y
+CONFIG_PPP_MPPE=y
+CONFIG_PPPOLAC=y
+CONFIG_PPPOPNS=y
+CONFIG_USB_USBNET=y
+CONFIG_INPUT_POLLDEV=y
+# CONFIG_INPUT_MOUSEDEV_PSAUX is not set
+CONFIG_INPUT_EVDEV=y
+CONFIG_INPUT_KEYRESET=y
+# CONFIG_KEYBOARD_ATKBD is not set
+CONFIG_KEYBOARD_GOLDFISH_EVENTS=y
+# CONFIG_INPUT_MOUSE is not set
+CONFIG_INPUT_JOYSTICK=y
+CONFIG_JOYSTICK_XPAD=y
+CONFIG_JOYSTICK_XPAD_FF=y
+CONFIG_JOYSTICK_XPAD_LEDS=y
+CONFIG_INPUT_TABLET=y
+CONFIG_TABLET_USB_ACECAD=y
+CONFIG_TABLET_USB_AIPTEK=y
+CONFIG_TABLET_USB_GTCO=y
+CONFIG_TABLET_USB_HANWANG=y
+CONFIG_TABLET_USB_KBTAB=y
+CONFIG_INPUT_TOUCHSCREEN=y
+CONFIG_INPUT_MISC=y
+CONFIG_INPUT_KEYCHORD=y
+CONFIG_INPUT_UINPUT=y
+CONFIG_INPUT_GPIO=y
+# CONFIG_SERIO is not set
+# CONFIG_VT is not set
+# CONFIG_LEGACY_PTYS is not set
+CONFIG_SERIAL_NONSTANDARD=y
+# CONFIG_DEVMEM is not set
+# CONFIG_DEVKMEM is not set
+CONFIG_SERIAL_8250=y
+CONFIG_SERIAL_8250_CONSOLE=y
+CONFIG_VIRTIO_CONSOLE=y
+CONFIG_NVRAM=y
+CONFIG_I2C_I801=y
+CONFIG_BATTERY_GOLDFISH=y
+CONFIG_WATCHDOG=y
+CONFIG_MEDIA_SUPPORT=y
+CONFIG_AGP=y
+CONFIG_AGP_AMD64=y
+CONFIG_AGP_INTEL=y
+CONFIG_DRM=y
+CONFIG_FB_MODE_HELPERS=y
+CONFIG_FB_TILEBLITTING=y
+CONFIG_FB_EFI=y
+CONFIG_FB_GOLDFISH=y
+CONFIG_BACKLIGHT_LCD_SUPPORT=y
+# CONFIG_LCD_CLASS_DEVICE is not set
+CONFIG_SOUND=y
+CONFIG_SND=y
+CONFIG_HIDRAW=y
+CONFIG_UHID=y
+CONFIG_HID_A4TECH=y
+CONFIG_HID_ACRUX=y
+CONFIG_HID_ACRUX_FF=y
+CONFIG_HID_APPLE=y
+CONFIG_HID_BELKIN=y
+CONFIG_HID_CHERRY=y
+CONFIG_HID_CHICONY=y
+CONFIG_HID_PRODIKEYS=y
+CONFIG_HID_CYPRESS=y
+CONFIG_HID_DRAGONRISE=y
+CONFIG_DRAGONRISE_FF=y
+CONFIG_HID_EMS_FF=y
+CONFIG_HID_ELECOM=y
+CONFIG_HID_EZKEY=y
+CONFIG_HID_HOLTEK=y
+CONFIG_HID_KEYTOUCH=y
+CONFIG_HID_KYE=y
+CONFIG_HID_UCLOGIC=y
+CONFIG_HID_WALTOP=y
+CONFIG_HID_GYRATION=y
+CONFIG_HID_TWINHAN=y
+CONFIG_HID_KENSINGTON=y
+CONFIG_HID_LCPOWER=y
+CONFIG_HID_LOGITECH=y
+CONFIG_HID_LOGITECH_DJ=y
+CONFIG_LOGITECH_FF=y
+CONFIG_LOGIRUMBLEPAD2_FF=y
+CONFIG_LOGIG940_FF=y
+CONFIG_HID_MAGICMOUSE=y
+CONFIG_HID_MICROSOFT=y
+CONFIG_HID_MONTEREY=y
+CONFIG_HID_MULTITOUCH=y
+CONFIG_HID_NTRIG=y
+CONFIG_HID_ORTEK=y
+CONFIG_HID_PANTHERLORD=y
+CONFIG_PANTHERLORD_FF=y
+CONFIG_HID_PETALYNX=y
+CONFIG_HID_PICOLCD=y
+CONFIG_HID_PRIMAX=y
+CONFIG_HID_ROCCAT=y
+CONFIG_HID_SAITEK=y
+CONFIG_HID_SAMSUNG=y
+CONFIG_HID_SONY=y
+CONFIG_HID_SPEEDLINK=y
+CONFIG_HID_SUNPLUS=y
+CONFIG_HID_GREENASIA=y
+CONFIG_GREENASIA_FF=y
+CONFIG_HID_SMARTJOYPLUS=y
+CONFIG_SMARTJOYPLUS_FF=y
+CONFIG_HID_TIVO=y
+CONFIG_HID_TOPSEED=y
+CONFIG_HID_THRUSTMASTER=y
+CONFIG_HID_WACOM=y
+CONFIG_HID_WIIMOTE=y
+CONFIG_HID_ZEROPLUS=y
+CONFIG_HID_ZYDACRON=y
+CONFIG_HID_PID=y
+CONFIG_USB_HIDDEV=y
+CONFIG_USB_ANNOUNCE_NEW_DEVICES=y
+CONFIG_USB_MON=y
+CONFIG_USB_EHCI_HCD=y
+# CONFIG_USB_EHCI_TT_NEWSCHED is not set
+CONFIG_USB_OHCI_HCD=y
+CONFIG_USB_UHCI_HCD=y
+CONFIG_USB_PRINTER=y
+CONFIG_USB_STORAGE=y
+CONFIG_USB_OTG_WAKELOCK=y
+CONFIG_EDAC=y
+CONFIG_RTC_CLASS=y
+# CONFIG_RTC_HCTOSYS is not set
+CONFIG_DMADEVICES=y
+CONFIG_VIRTIO_PCI=y
+CONFIG_STAGING=y
+CONFIG_ASHMEM=y
+CONFIG_ANDROID_LOW_MEMORY_KILLER=y
+CONFIG_SYNC=y
+CONFIG_SW_SYNC=y
+CONFIG_ION=y
+CONFIG_GOLDFISH_AUDIO=y
+CONFIG_GOLDFISH_SYNC=y
+CONFIG_SND_HDA_INTEL=y
+CONFIG_GOLDFISH=y
+CONFIG_GOLDFISH_PIPE=y
+CONFIG_ANDROID=y
+CONFIG_ANDROID_BINDER_IPC=y
+CONFIG_ISCSI_IBFT_FIND=y
+CONFIG_EXT4_FS=y
+CONFIG_EXT4_FS_SECURITY=y
+CONFIG_QUOTA=y
+CONFIG_QUOTA_NETLINK_INTERFACE=y
+# CONFIG_PRINT_QUOTA_WARNING is not set
+CONFIG_FUSE_FS=y
+CONFIG_ISO9660_FS=y
+CONFIG_JOLIET=y
+CONFIG_ZISOFS=y
+CONFIG_MSDOS_FS=y
+CONFIG_VFAT_FS=y
+CONFIG_PROC_KCORE=y
+CONFIG_TMPFS=y
+CONFIG_TMPFS_POSIX_ACL=y
+CONFIG_HUGETLBFS=y
+CONFIG_PSTORE=y
+CONFIG_PSTORE_CONSOLE=y
+CONFIG_PSTORE_RAM=y
+# CONFIG_NETWORK_FILESYSTEMS is not set
+CONFIG_NLS_DEFAULT="utf8"
+CONFIG_NLS_CODEPAGE_437=y
+CONFIG_NLS_ASCII=y
+CONFIG_NLS_ISO8859_1=y
+CONFIG_NLS_UTF8=y
+CONFIG_PRINTK_TIME=y
+CONFIG_DEBUG_INFO=y
+# CONFIG_ENABLE_WARN_DEPRECATED is not set
+# CONFIG_ENABLE_MUST_CHECK is not set
+CONFIG_FRAME_WARN=2048
+# CONFIG_UNUSED_SYMBOLS is not set
+CONFIG_MAGIC_SYSRQ=y
+CONFIG_DEBUG_MEMORY_INIT=y
+CONFIG_PANIC_TIMEOUT=5
+CONFIG_SCHEDSTATS=y
+CONFIG_TIMER_STATS=y
+CONFIG_SCHED_TRACER=y
+CONFIG_BLK_DEV_IO_TRACE=y
+CONFIG_PROVIDE_OHCI1394_DMA_INIT=y
+CONFIG_KEYS=y
+CONFIG_SECURITY=y
+CONFIG_SECURITY_NETWORK=y
+CONFIG_SECURITY_SELINUX=y
+CONFIG_CRYPTO_AES_586=y
+CONFIG_CRYPTO_TWOFISH=y
+CONFIG_ASYMMETRIC_KEY_TYPE=y
+CONFIG_ASYMMETRIC_PUBLIC_KEY_SUBTYPE=y
+CONFIG_X509_CERTIFICATE_PARSER=y
+CONFIG_PKCS7_MESSAGE_PARSER=y
+CONFIG_PKCS7_TEST_KEY=y
+# CONFIG_VIRTUALIZATION is not set
+CONFIG_CRC_T10DIF=y
diff --git a/arch/x86/configs/x86_64_ranchu_defconfig b/arch/x86/configs/x86_64_ranchu_defconfig
new file mode 100644
index 000000000000..dd389774bacb
--- /dev/null
+++ b/arch/x86/configs/x86_64_ranchu_defconfig
@@ -0,0 +1,418 @@
+# CONFIG_LOCALVERSION_AUTO is not set
+CONFIG_POSIX_MQUEUE=y
+CONFIG_AUDIT=y
+CONFIG_NO_HZ=y
+CONFIG_HIGH_RES_TIMERS=y
+CONFIG_BSD_PROCESS_ACCT=y
+CONFIG_TASKSTATS=y
+CONFIG_TASK_DELAY_ACCT=y
+CONFIG_TASK_XACCT=y
+CONFIG_TASK_IO_ACCOUNTING=y
+CONFIG_CGROUPS=y
+CONFIG_CGROUP_DEBUG=y
+CONFIG_CGROUP_FREEZER=y
+CONFIG_CGROUP_CPUACCT=y
+CONFIG_CGROUP_SCHED=y
+CONFIG_RT_GROUP_SCHED=y
+CONFIG_BLK_DEV_INITRD=y
+CONFIG_CC_OPTIMIZE_FOR_SIZE=y
+CONFIG_SYSCTL_SYSCALL=y
+CONFIG_KALLSYMS_ALL=y
+CONFIG_EMBEDDED=y
+# CONFIG_COMPAT_BRK is not set
+CONFIG_ARCH_MMAP_RND_BITS=32
+CONFIG_ARCH_MMAP_RND_COMPAT_BITS=16
+CONFIG_PARTITION_ADVANCED=y
+CONFIG_OSF_PARTITION=y
+CONFIG_AMIGA_PARTITION=y
+CONFIG_MAC_PARTITION=y
+CONFIG_BSD_DISKLABEL=y
+CONFIG_MINIX_SUBPARTITION=y
+CONFIG_SOLARIS_X86_PARTITION=y
+CONFIG_UNIXWARE_DISKLABEL=y
+CONFIG_SGI_PARTITION=y
+CONFIG_SUN_PARTITION=y
+CONFIG_KARMA_PARTITION=y
+CONFIG_SMP=y
+CONFIG_MCORE2=y
+CONFIG_MAXSMP=y
+CONFIG_PREEMPT=y
+# CONFIG_X86_MCE is not set
+CONFIG_X86_MSR=y
+CONFIG_X86_CPUID=y
+CONFIG_KSM=y
+CONFIG_CMA=y
+# CONFIG_MTRR_SANITIZER is not set
+CONFIG_EFI=y
+CONFIG_EFI_STUB=y
+CONFIG_HZ_100=y
+CONFIG_PHYSICAL_START=0x100000
+CONFIG_PM_AUTOSLEEP=y
+CONFIG_PM_WAKELOCKS=y
+CONFIG_PM_WAKELOCKS_LIMIT=0
+# CONFIG_PM_WAKELOCKS_GC is not set
+CONFIG_PM_DEBUG=y
+CONFIG_CPU_FREQ=y
+# CONFIG_CPU_FREQ_STAT is not set
+CONFIG_CPU_FREQ_DEFAULT_GOV_ONDEMAND=y
+CONFIG_CPU_FREQ_GOV_USERSPACE=y
+CONFIG_PCI_MMCONFIG=y
+CONFIG_PCIEPORTBUS=y
+# CONFIG_PCIEASPM is not set
+CONFIG_PCCARD=y
+CONFIG_YENTA=y
+CONFIG_HOTPLUG_PCI=y
+# CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set
+CONFIG_BINFMT_MISC=y
+CONFIG_IA32_EMULATION=y
+CONFIG_NET=y
+CONFIG_PACKET=y
+CONFIG_UNIX=y
+CONFIG_XFRM_USER=y
+CONFIG_NET_KEY=y
+CONFIG_INET=y
+CONFIG_IP_MULTICAST=y
+CONFIG_IP_ADVANCED_ROUTER=y
+CONFIG_IP_MULTIPLE_TABLES=y
+CONFIG_IP_ROUTE_MULTIPATH=y
+CONFIG_IP_ROUTE_VERBOSE=y
+CONFIG_IP_PNP=y
+CONFIG_IP_PNP_DHCP=y
+CONFIG_IP_PNP_BOOTP=y
+CONFIG_IP_PNP_RARP=y
+CONFIG_IP_MROUTE=y
+CONFIG_IP_PIMSM_V1=y
+CONFIG_IP_PIMSM_V2=y
+CONFIG_SYN_COOKIES=y
+CONFIG_INET_ESP=y
+# CONFIG_INET_XFRM_MODE_BEET is not set
+# CONFIG_INET_LRO is not set
+# CONFIG_INET_DIAG is not set
+CONFIG_IPV6_ROUTER_PREF=y
+CONFIG_IPV6_ROUTE_INFO=y
+CONFIG_IPV6_OPTIMISTIC_DAD=y
+CONFIG_INET6_AH=y
+CONFIG_INET6_ESP=y
+CONFIG_INET6_IPCOMP=y
+CONFIG_IPV6_MIP6=y
+CONFIG_IPV6_MULTIPLE_TABLES=y
+CONFIG_NETLABEL=y
+CONFIG_NETFILTER=y
+CONFIG_NF_CONNTRACK=y
+CONFIG_NF_CONNTRACK_SECMARK=y
+CONFIG_NF_CONNTRACK_EVENTS=y
+CONFIG_NF_CT_PROTO_DCCP=y
+CONFIG_NF_CT_PROTO_SCTP=y
+CONFIG_NF_CT_PROTO_UDPLITE=y
+CONFIG_NF_CONNTRACK_AMANDA=y
+CONFIG_NF_CONNTRACK_FTP=y
+CONFIG_NF_CONNTRACK_H323=y
+CONFIG_NF_CONNTRACK_IRC=y
+CONFIG_NF_CONNTRACK_NETBIOS_NS=y
+CONFIG_NF_CONNTRACK_PPTP=y
+CONFIG_NF_CONNTRACK_SANE=y
+CONFIG_NF_CONNTRACK_TFTP=y
+CONFIG_NF_CT_NETLINK=y
+CONFIG_NETFILTER_XT_TARGET_CLASSIFY=y
+CONFIG_NETFILTER_XT_TARGET_CONNMARK=y
+CONFIG_NETFILTER_XT_TARGET_CONNSECMARK=y
+CONFIG_NETFILTER_XT_TARGET_IDLETIMER=y
+CONFIG_NETFILTER_XT_TARGET_MARK=y
+CONFIG_NETFILTER_XT_TARGET_NFLOG=y
+CONFIG_NETFILTER_XT_TARGET_NFQUEUE=y
+CONFIG_NETFILTER_XT_TARGET_TPROXY=y
+CONFIG_NETFILTER_XT_TARGET_TRACE=y
+CONFIG_NETFILTER_XT_TARGET_SECMARK=y
+CONFIG_NETFILTER_XT_TARGET_TCPMSS=y
+CONFIG_NETFILTER_XT_MATCH_COMMENT=y
+CONFIG_NETFILTER_XT_MATCH_CONNLIMIT=y
+CONFIG_NETFILTER_XT_MATCH_CONNMARK=y
+CONFIG_NETFILTER_XT_MATCH_CONNTRACK=y
+CONFIG_NETFILTER_XT_MATCH_HASHLIMIT=y
+CONFIG_NETFILTER_XT_MATCH_HELPER=y
+CONFIG_NETFILTER_XT_MATCH_IPRANGE=y
+CONFIG_NETFILTER_XT_MATCH_LENGTH=y
+CONFIG_NETFILTER_XT_MATCH_LIMIT=y
+CONFIG_NETFILTER_XT_MATCH_MAC=y
+CONFIG_NETFILTER_XT_MATCH_MARK=y
+CONFIG_NETFILTER_XT_MATCH_POLICY=y
+CONFIG_NETFILTER_XT_MATCH_PKTTYPE=y
+CONFIG_NETFILTER_XT_MATCH_QTAGUID=y
+CONFIG_NETFILTER_XT_MATCH_QUOTA=y
+CONFIG_NETFILTER_XT_MATCH_QUOTA2=y
+CONFIG_NETFILTER_XT_MATCH_SOCKET=y
+CONFIG_NETFILTER_XT_MATCH_STATE=y
+CONFIG_NETFILTER_XT_MATCH_STATISTIC=y
+CONFIG_NETFILTER_XT_MATCH_STRING=y
+CONFIG_NETFILTER_XT_MATCH_TIME=y
+CONFIG_NETFILTER_XT_MATCH_U32=y
+CONFIG_NF_CONNTRACK_IPV4=y
+CONFIG_IP_NF_IPTABLES=y
+CONFIG_IP_NF_MATCH_AH=y
+CONFIG_IP_NF_MATCH_ECN=y
+CONFIG_IP_NF_MATCH_TTL=y
+CONFIG_IP_NF_FILTER=y
+CONFIG_IP_NF_TARGET_REJECT=y
+CONFIG_IP_NF_MANGLE=y
+CONFIG_IP_NF_RAW=y
+CONFIG_IP_NF_SECURITY=y
+CONFIG_IP_NF_ARPTABLES=y
+CONFIG_IP_NF_ARPFILTER=y
+CONFIG_IP_NF_ARP_MANGLE=y
+CONFIG_NF_CONNTRACK_IPV6=y
+CONFIG_IP6_NF_IPTABLES=y
+CONFIG_IP6_NF_FILTER=y
+CONFIG_IP6_NF_TARGET_REJECT=y
+CONFIG_IP6_NF_MANGLE=y
+CONFIG_IP6_NF_RAW=y
+CONFIG_NET_SCHED=y
+CONFIG_NET_SCH_HTB=y
+CONFIG_NET_CLS_U32=y
+CONFIG_NET_EMATCH=y
+CONFIG_NET_EMATCH_U32=y
+CONFIG_NET_CLS_ACT=y
+CONFIG_CFG80211=y
+CONFIG_MAC80211=y
+CONFIG_MAC80211_LEDS=y
+CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
+CONFIG_DMA_CMA=y
+CONFIG_CONNECTOR=y
+CONFIG_BLK_DEV_LOOP=y
+CONFIG_BLK_DEV_RAM=y
+CONFIG_BLK_DEV_RAM_SIZE=8192
+CONFIG_VIRTIO_BLK=y
+CONFIG_BLK_DEV_SD=y
+CONFIG_BLK_DEV_SR=y
+CONFIG_BLK_DEV_SR_VENDOR=y
+CONFIG_CHR_DEV_SG=y
+CONFIG_SCSI_CONSTANTS=y
+CONFIG_SCSI_SPI_ATTRS=y
+CONFIG_SCSI_ISCSI_ATTRS=y
+# CONFIG_SCSI_LOWLEVEL is not set
+CONFIG_ATA=y
+CONFIG_SATA_AHCI=y
+CONFIG_ATA_PIIX=y
+CONFIG_PATA_AMD=y
+CONFIG_PATA_OLDPIIX=y
+CONFIG_PATA_SCH=y
+CONFIG_PATA_MPIIX=y
+CONFIG_ATA_GENERIC=y
+CONFIG_MD=y
+CONFIG_BLK_DEV_MD=y
+CONFIG_BLK_DEV_DM=y
+CONFIG_DM_DEBUG=y
+CONFIG_DM_CRYPT=y
+CONFIG_DM_MIRROR=y
+CONFIG_DM_ZERO=y
+CONFIG_DM_UEVENT=y
+CONFIG_DM_VERITY=y
+CONFIG_DM_VERITY_FEC=y
+CONFIG_NETDEVICES=y
+CONFIG_NETCONSOLE=y
+CONFIG_TUN=y
+CONFIG_VIRTIO_NET=y
+CONFIG_BNX2=y
+CONFIG_TIGON3=y
+CONFIG_NET_TULIP=y
+CONFIG_E100=y
+CONFIG_E1000=y
+CONFIG_E1000E=y
+CONFIG_SKY2=y
+CONFIG_NE2K_PCI=y
+CONFIG_FORCEDETH=y
+CONFIG_8139TOO=y
+# CONFIG_8139TOO_PIO is not set
+CONFIG_R8169=y
+CONFIG_FDDI=y
+CONFIG_PPP=y
+CONFIG_PPP_BSDCOMP=y
+CONFIG_PPP_DEFLATE=y
+CONFIG_PPP_MPPE=y
+CONFIG_PPPOLAC=y
+CONFIG_PPPOPNS=y
+CONFIG_USB_USBNET=y
+CONFIG_INPUT_POLLDEV=y
+# CONFIG_INPUT_MOUSEDEV_PSAUX is not set
+CONFIG_INPUT_EVDEV=y
+CONFIG_INPUT_KEYRESET=y
+# CONFIG_KEYBOARD_ATKBD is not set
+CONFIG_KEYBOARD_GOLDFISH_EVENTS=y
+# CONFIG_INPUT_MOUSE is not set
+CONFIG_INPUT_JOYSTICK=y
+CONFIG_JOYSTICK_XPAD=y
+CONFIG_JOYSTICK_XPAD_FF=y
+CONFIG_JOYSTICK_XPAD_LEDS=y
+CONFIG_INPUT_TABLET=y
+CONFIG_TABLET_USB_ACECAD=y
+CONFIG_TABLET_USB_AIPTEK=y
+CONFIG_TABLET_USB_GTCO=y
+CONFIG_TABLET_USB_HANWANG=y
+CONFIG_TABLET_USB_KBTAB=y
+CONFIG_INPUT_TOUCHSCREEN=y
+CONFIG_INPUT_MISC=y
+CONFIG_INPUT_KEYCHORD=y
+CONFIG_INPUT_UINPUT=y
+CONFIG_INPUT_GPIO=y
+# CONFIG_SERIO is not set
+# CONFIG_VT is not set
+# CONFIG_LEGACY_PTYS is not set
+CONFIG_SERIAL_NONSTANDARD=y
+# CONFIG_DEVMEM is not set
+# CONFIG_DEVKMEM is not set
+CONFIG_SERIAL_8250=y
+CONFIG_SERIAL_8250_CONSOLE=y
+CONFIG_VIRTIO_CONSOLE=y
+CONFIG_NVRAM=y
+CONFIG_I2C_I801=y
+CONFIG_BATTERY_GOLDFISH=y
+CONFIG_WATCHDOG=y
+CONFIG_MEDIA_SUPPORT=y
+CONFIG_AGP=y
+CONFIG_AGP_AMD64=y
+CONFIG_AGP_INTEL=y
+CONFIG_DRM=y
+CONFIG_FB_MODE_HELPERS=y
+CONFIG_FB_TILEBLITTING=y
+CONFIG_FB_EFI=y
+CONFIG_FB_GOLDFISH=y
+CONFIG_BACKLIGHT_LCD_SUPPORT=y
+# CONFIG_LCD_CLASS_DEVICE is not set
+CONFIG_SOUND=y
+CONFIG_SND=y
+CONFIG_HIDRAW=y
+CONFIG_UHID=y
+CONFIG_HID_A4TECH=y
+CONFIG_HID_ACRUX=y
+CONFIG_HID_ACRUX_FF=y
+CONFIG_HID_APPLE=y
+CONFIG_HID_BELKIN=y
+CONFIG_HID_CHERRY=y
+CONFIG_HID_CHICONY=y
+CONFIG_HID_PRODIKEYS=y
+CONFIG_HID_CYPRESS=y
+CONFIG_HID_DRAGONRISE=y
+CONFIG_DRAGONRISE_FF=y
+CONFIG_HID_EMS_FF=y
+CONFIG_HID_ELECOM=y
+CONFIG_HID_EZKEY=y
+CONFIG_HID_HOLTEK=y
+CONFIG_HID_KEYTOUCH=y
+CONFIG_HID_KYE=y
+CONFIG_HID_UCLOGIC=y
+CONFIG_HID_WALTOP=y
+CONFIG_HID_GYRATION=y
+CONFIG_HID_TWINHAN=y
+CONFIG_HID_KENSINGTON=y
+CONFIG_HID_LCPOWER=y
+CONFIG_HID_LOGITECH=y
+CONFIG_HID_LOGITECH_DJ=y
+CONFIG_LOGITECH_FF=y
+CONFIG_LOGIRUMBLEPAD2_FF=y
+CONFIG_LOGIG940_FF=y
+CONFIG_HID_MAGICMOUSE=y
+CONFIG_HID_MICROSOFT=y
+CONFIG_HID_MONTEREY=y
+CONFIG_HID_MULTITOUCH=y
+CONFIG_HID_NTRIG=y
+CONFIG_HID_ORTEK=y
+CONFIG_HID_PANTHERLORD=y
+CONFIG_PANTHERLORD_FF=y
+CONFIG_HID_PETALYNX=y
+CONFIG_HID_PICOLCD=y
+CONFIG_HID_PRIMAX=y
+CONFIG_HID_ROCCAT=y
+CONFIG_HID_SAITEK=y
+CONFIG_HID_SAMSUNG=y
+CONFIG_HID_SONY=y
+CONFIG_HID_SPEEDLINK=y
+CONFIG_HID_SUNPLUS=y
+CONFIG_HID_GREENASIA=y
+CONFIG_GREENASIA_FF=y
+CONFIG_HID_SMARTJOYPLUS=y
+CONFIG_SMARTJOYPLUS_FF=y
+CONFIG_HID_TIVO=y
+CONFIG_HID_TOPSEED=y
+CONFIG_HID_THRUSTMASTER=y
+CONFIG_HID_WACOM=y
+CONFIG_HID_WIIMOTE=y
+CONFIG_HID_ZEROPLUS=y
+CONFIG_HID_ZYDACRON=y
+CONFIG_HID_PID=y
+CONFIG_USB_HIDDEV=y
+CONFIG_USB_ANNOUNCE_NEW_DEVICES=y
+CONFIG_USB_MON=y
+CONFIG_USB_EHCI_HCD=y
+# CONFIG_USB_EHCI_TT_NEWSCHED is not set
+CONFIG_USB_OHCI_HCD=y
+CONFIG_USB_UHCI_HCD=y
+CONFIG_USB_PRINTER=y
+CONFIG_USB_STORAGE=y
+CONFIG_USB_OTG_WAKELOCK=y
+CONFIG_EDAC=y
+CONFIG_RTC_CLASS=y
+# CONFIG_RTC_HCTOSYS is not set
+CONFIG_DMADEVICES=y
+CONFIG_VIRTIO_PCI=y
+CONFIG_STAGING=y
+CONFIG_ASHMEM=y
+CONFIG_ANDROID_LOW_MEMORY_KILLER=y
+CONFIG_SYNC=y
+CONFIG_SW_SYNC=y
+CONFIG_ION=y
+CONFIG_GOLDFISH_AUDIO=y
+CONFIG_GOLDFISH_SYNC=y
+CONFIG_SND_HDA_INTEL=y
+CONFIG_GOLDFISH=y
+CONFIG_GOLDFISH_PIPE=y
+CONFIG_ANDROID=y
+CONFIG_ANDROID_BINDER_IPC=y
+CONFIG_ISCSI_IBFT_FIND=y
+CONFIG_EXT4_FS=y
+CONFIG_EXT4_FS_SECURITY=y
+CONFIG_QUOTA=y
+CONFIG_QUOTA_NETLINK_INTERFACE=y
+# CONFIG_PRINT_QUOTA_WARNING is not set
+CONFIG_FUSE_FS=y
+CONFIG_ISO9660_FS=y
+CONFIG_JOLIET=y
+CONFIG_ZISOFS=y
+CONFIG_MSDOS_FS=y
+CONFIG_VFAT_FS=y
+CONFIG_PROC_KCORE=y
+CONFIG_TMPFS=y
+CONFIG_TMPFS_POSIX_ACL=y
+CONFIG_HUGETLBFS=y
+CONFIG_PSTORE=y
+CONFIG_PSTORE_CONSOLE=y
+CONFIG_PSTORE_RAM=y
+# CONFIG_NETWORK_FILESYSTEMS is not set
+CONFIG_NLS_DEFAULT="utf8"
+CONFIG_NLS_CODEPAGE_437=y
+CONFIG_NLS_ASCII=y
+CONFIG_NLS_ISO8859_1=y
+CONFIG_NLS_UTF8=y
+CONFIG_PRINTK_TIME=y
+CONFIG_DEBUG_INFO=y
+# CONFIG_ENABLE_WARN_DEPRECATED is not set
+# CONFIG_ENABLE_MUST_CHECK is not set
+# CONFIG_UNUSED_SYMBOLS is not set
+CONFIG_MAGIC_SYSRQ=y
+CONFIG_DEBUG_MEMORY_INIT=y
+CONFIG_PANIC_TIMEOUT=5
+CONFIG_SCHEDSTATS=y
+CONFIG_TIMER_STATS=y
+CONFIG_SCHED_TRACER=y
+CONFIG_BLK_DEV_IO_TRACE=y
+CONFIG_PROVIDE_OHCI1394_DMA_INIT=y
+CONFIG_KEYS=y
+CONFIG_SECURITY=y
+CONFIG_SECURITY_NETWORK=y
+CONFIG_SECURITY_SELINUX=y
+CONFIG_CRYPTO_TWOFISH=y
+CONFIG_ASYMMETRIC_KEY_TYPE=y
+CONFIG_ASYMMETRIC_PUBLIC_KEY_SUBTYPE=y
+CONFIG_X509_CERTIFICATE_PARSER=y
+CONFIG_PKCS7_MESSAGE_PARSER=y
+CONFIG_PKCS7_TEST_KEY=y
+# CONFIG_VIRTUALIZATION is not set
+CONFIG_CRC_T10DIF=y
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index 5fa652c16a50..f49e98062ea5 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -2093,16 +2093,10 @@ static int em_iret(struct x86_emulate_ctxt *ctxt)
 static int em_jmp_far(struct x86_emulate_ctxt *ctxt)
 {
 	int rc;
-	unsigned short sel, old_sel;
-	struct desc_struct old_desc, new_desc;
-	const struct x86_emulate_ops *ops = ctxt->ops;
+	unsigned short sel;
+	struct desc_struct new_desc;
 	u8 cpl = ctxt->ops->cpl(ctxt);
 
-	/* Assignment of RIP may only fail in 64-bit mode */
-	if (ctxt->mode == X86EMUL_MODE_PROT64)
-		ops->get_segment(ctxt, &old_sel, &old_desc, NULL,
-				 VCPU_SREG_CS);
-
 	memcpy(&sel, ctxt->src.valptr + ctxt->op_bytes, 2);
 
 	rc = __load_segment_descriptor(ctxt, sel, VCPU_SREG_CS, cpl,
@@ -2112,12 +2106,10 @@ static int em_jmp_far(struct x86_emulate_ctxt *ctxt)
 		return rc;
 
 	rc = assign_eip_far(ctxt, ctxt->src.val, &new_desc);
-	if (rc != X86EMUL_CONTINUE) {
-		WARN_ON(ctxt->mode != X86EMUL_MODE_PROT64);
-		/* assigning eip failed; restore the old cs */
-		ops->set_segment(ctxt, old_sel, &old_desc, 0, VCPU_SREG_CS);
-		return rc;
-	}
+	/* Error handling is not implemented. */
+	if (rc != X86EMUL_CONTINUE)
+		return X86EMUL_UNHANDLEABLE;
+
 	return rc;
 }
 
@@ -2177,14 +2169,8 @@ static int em_ret_far(struct x86_emulate_ctxt *ctxt)
 {
 	int rc;
 	unsigned long eip, cs;
-	u16 old_cs;
 	int cpl = ctxt->ops->cpl(ctxt);
-	struct desc_struct old_desc, new_desc;
-	const struct x86_emulate_ops *ops = ctxt->ops;
-
-	if (ctxt->mode == X86EMUL_MODE_PROT64)
-		ops->get_segment(ctxt, &old_cs, &old_desc, NULL,
-				 VCPU_SREG_CS);
+	struct desc_struct new_desc;
 
 	rc = emulate_pop(ctxt, &eip, ctxt->op_bytes);
 	if (rc != X86EMUL_CONTINUE)
@@ -2201,10 +2187,10 @@ static int em_ret_far(struct x86_emulate_ctxt *ctxt)
 	if (rc != X86EMUL_CONTINUE)
 		return rc;
 	rc = assign_eip_far(ctxt, eip, &new_desc);
-	if (rc != X86EMUL_CONTINUE) {
-		WARN_ON(ctxt->mode != X86EMUL_MODE_PROT64);
-		ops->set_segment(ctxt, old_cs, &old_desc, 0, VCPU_SREG_CS);
-	}
+	/* Error handling is not implemented. */
+	if (rc != X86EMUL_CONTINUE)
+		return X86EMUL_UNHANDLEABLE;
+
 	return rc;
 }
 
diff --git a/arch/x86/kvm/irq_comm.c b/arch/x86/kvm/irq_comm.c
index 84b96d319909..d09544e826f6 100644
--- a/arch/x86/kvm/irq_comm.c
+++ b/arch/x86/kvm/irq_comm.c
@@ -38,6 +38,15 @@ static int kvm_set_pic_irq(struct kvm_kernel_irq_routing_entry *e,
 			   bool line_status)
 {
 	struct kvm_pic *pic = pic_irqchip(kvm);
+
+	/*
+	 * XXX: rejecting pic routes when pic isn't in use would be better,
+	 * but the default routing table is installed while kvm->arch.vpic is
+	 * NULL and KVM_CREATE_IRQCHIP can race with KVM_IRQ_LINE.
+	 */
+	if (!pic)
+		return -1;
+
 	return kvm_pic_set_irq(pic, e->irqchip.pin, irq_source_id, level);
 }
 
@@ -46,6 +55,10 @@ static int kvm_set_ioapic_irq(struct kvm_kernel_irq_routing_entry *e,
 			      bool line_status)
 {
 	struct kvm_ioapic *ioapic = kvm->arch.vioapic;
+
+	if (!ioapic)
+		return -1;
+
 	return kvm_ioapic_set_irq(ioapic, e->irqchip.pin, irq_source_id, level,
 				line_status);
 }
diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index 9d359e05fad7..8161090a1970 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -788,6 +788,7 @@ int blkg_conf_prep(struct blkcg *blkcg, const struct blkcg_policy *pol,
 {
 	struct gendisk *disk;
 	struct blkcg_gq *blkg;
+	struct module *owner;
 	unsigned int major, minor;
 	int key_len, part, ret;
 	char *body;
@@ -804,7 +805,9 @@ int blkg_conf_prep(struct blkcg *blkcg, const struct blkcg_policy *pol,
 	if (!disk)
 		return -ENODEV;
 	if (part) {
+		owner = disk->fops->owner;
 		put_disk(disk);
+		module_put(owner);
 		return -ENODEV;
 	}
 
@@ -820,7 +823,9 @@ int blkg_conf_prep(struct blkcg *blkcg, const struct blkcg_policy *pol,
 		ret = PTR_ERR(blkg);
 		rcu_read_unlock();
 		spin_unlock_irq(disk->queue->queue_lock);
+		owner = disk->fops->owner;
 		put_disk(disk);
+		module_put(owner);
 		/*
 		 * If queue was bypassing, we should retry.  Do so after a
 		 * short msleep().  It isn't strictly necessary but queue
@@ -851,9 +856,13 @@ EXPORT_SYMBOL_GPL(blkg_conf_prep);
 void blkg_conf_finish(struct blkg_conf_ctx *ctx)
 	__releases(ctx->disk->queue->queue_lock) __releases(rcu)
 {
+	struct module *owner;
+
 	spin_unlock_irq(ctx->disk->queue->queue_lock);
 	rcu_read_unlock();
+	owner = ctx->disk->fops->owner;
 	put_disk(ctx->disk);
+	module_put(owner);
 }
 EXPORT_SYMBOL_GPL(blkg_conf_finish);
 
diff --git a/drivers/gpu/drm/radeon/atombios_crtc.c b/drivers/gpu/drm/radeon/atombios_crtc.c
index 79bab6fd76bb..6755d4768f59 100644
--- a/drivers/gpu/drm/radeon/atombios_crtc.c
+++ b/drivers/gpu/drm/radeon/atombios_crtc.c
@@ -275,6 +275,8 @@ void atombios_crtc_dpms(struct drm_crtc *crtc, int mode)
 			atombios_enable_crtc_memreq(crtc, ATOM_ENABLE);
 		atombios_blank_crtc(crtc, ATOM_DISABLE);
 		drm_vblank_post_modeset(dev, radeon_crtc->crtc_id);
+		/* Make sure vblank interrupt is still enabled if needed */
+		radeon_irq_set(rdev);
 		radeon_crtc_load_lut(crtc);
 		break;
 	case DRM_MODE_DPMS_STANDBY:
diff --git a/drivers/gpu/drm/radeon/radeon_legacy_crtc.c b/drivers/gpu/drm/radeon/radeon_legacy_crtc.c
index 678b4386540d..89f22bdde298 100644
--- a/drivers/gpu/drm/radeon/radeon_legacy_crtc.c
+++ b/drivers/gpu/drm/radeon/radeon_legacy_crtc.c
@@ -331,6 +331,8 @@ static void radeon_crtc_dpms(struct drm_crtc *crtc, int mode)
 			WREG32_P(RADEON_CRTC_EXT_CNTL, crtc_ext_cntl, ~(mask | crtc_ext_cntl));
 		}
 		drm_vblank_post_modeset(dev, radeon_crtc->crtc_id);
+		/* Make sure vblank interrupt is still enabled if needed */
+		radeon_irq_set(rdev);
 		radeon_crtc_load_lut(crtc);
 		break;
 	case DRM_MODE_DPMS_STANDBY:
diff --git a/drivers/input/keyboard/goldfish_events.c b/drivers/input/keyboard/goldfish_events.c
index 907e4e278fce..c877e56a9bd5 100644
--- a/drivers/input/keyboard/goldfish_events.c
+++ b/drivers/input/keyboard/goldfish_events.c
@@ -17,11 +17,15 @@
 #include <linux/interrupt.h>
 #include <linux/types.h>
 #include <linux/input.h>
+#include <linux/input/mt.h>
 #include <linux/kernel.h>
 #include <linux/platform_device.h>
 #include <linux/slab.h>
 #include <linux/irq.h>
 #include <linux/io.h>
+#include <linux/acpi.h>
+
+#define GOLDFISH_MAX_FINGERS 5
 
 enum {
 	REG_READ        = 0x00,
@@ -51,7 +55,21 @@ static irqreturn_t events_interrupt(int irq, void *dev_id)
 	value = __raw_readl(edev->addr + REG_READ);
 
 	input_event(edev->input, type, code, value);
-	input_sync(edev->input);
+	// Send an extra (EV_SYN, SYN_REPORT, 0x0) event
+	// if a key was pressed. Some keyboard device
+        // drivers may only send the EV_KEY event and
+        // not EV_SYN.
+        // Note that sending an extra SYN_REPORT is not
+        // necessary nor correct protocol with other
+        // devices such as touchscreens, which will send
+        // their own SYN_REPORT's when sufficient event
+        // information has been collected (e.g., for
+        // touchscreens, when pressure and X/Y coordinates
+	// have been received). Hence, we will only send
+	// this extra SYN_REPORT if type == EV_KEY.
+	if (type == EV_KEY) {
+		input_sync(edev->input);
+	}
 	return IRQ_HANDLED;
 }
 
@@ -153,6 +171,15 @@ static int events_probe(struct platform_device *pdev)
 
 	input_dev->name = edev->name;
 	input_dev->id.bustype = BUS_HOST;
+	// Set the Goldfish Device to be multi-touch.
+	// In the Ranchu kernel, there is multi-touch-specific
+	// code for handling ABS_MT_SLOT events.
+	// See drivers/input/input.c:input_handle_abs_event.
+	// If we do not issue input_mt_init_slots,
+        // the kernel will filter out needed ABS_MT_SLOT
+        // events when we touch the screen in more than one place,
+        // preventing multi-touch with more than one finger from working.
+	input_mt_init_slots(input_dev, GOLDFISH_MAX_FINGERS, 0);
 
 	events_import_bits(edev, input_dev->evbit, EV_SYN, EV_MAX);
 	events_import_bits(edev, input_dev->keybit, EV_KEY, KEY_MAX);
@@ -178,10 +205,26 @@ static int events_probe(struct platform_device *pdev)
 	return 0;
 }
 
+static const struct of_device_id goldfish_events_of_match[] = {
+	{ .compatible = "google,goldfish-events-keypad", },
+	{},
+};
+MODULE_DEVICE_TABLE(of, goldfish_events_of_match);
+
+#ifdef CONFIG_ACPI
+static const struct acpi_device_id goldfish_events_acpi_match[] = {
+	{ "GFSH0002", 0 },
+	{ },
+};
+MODULE_DEVICE_TABLE(acpi, goldfish_events_acpi_match);
+#endif
+
 static struct platform_driver events_driver = {
 	.probe	= events_probe,
 	.driver	= {
 		.name	= "goldfish_events",
+		.of_match_table = goldfish_events_of_match,
+		.acpi_match_table = ACPI_PTR(goldfish_events_acpi_match),
 	},
 };
 
diff --git a/drivers/iommu/dmar.c b/drivers/iommu/dmar.c
index 565bb2c140ed..e913a930ac80 100644
--- a/drivers/iommu/dmar.c
+++ b/drivers/iommu/dmar.c
@@ -326,7 +326,9 @@ static int dmar_pci_bus_notifier(struct notifier_block *nb,
 	struct pci_dev *pdev = to_pci_dev(data);
 	struct dmar_pci_notify_info *info;
 
-	/* Only care about add/remove events for physical functions */
+	/* Only care about add/remove events for physical functions.
+	 * For VFs we actually do the lookup based on the corresponding
+	 * PF in device_to_iommu() anyway. */
 	if (pdev->is_virtfn)
 		return NOTIFY_DONE;
 	if (action != BUS_NOTIFY_ADD_DEVICE &&
diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c
index 5baa830ce49f..59e9abd3345e 100644
--- a/drivers/iommu/intel-iommu.c
+++ b/drivers/iommu/intel-iommu.c
@@ -885,7 +885,13 @@ static struct intel_iommu *device_to_iommu(struct device *dev, u8 *bus, u8 *devf
 		return NULL;
 
 	if (dev_is_pci(dev)) {
+		struct pci_dev *pf_pdev;
+
 		pdev = to_pci_dev(dev);
+		/* VFs aren't listed in scope tables; we need to look up
+		 * the PF instead to find the IOMMU. */
+		pf_pdev = pci_physfn(pdev);
+		dev = &pf_pdev->dev;
 		segment = pci_domain_nr(pdev->bus);
 	} else if (has_acpi_companion(dev))
 		dev = &ACPI_COMPANION(dev)->dev;
@@ -898,6 +904,13 @@ static struct intel_iommu *device_to_iommu(struct device *dev, u8 *bus, u8 *devf
 		for_each_active_dev_scope(drhd->devices,
 					  drhd->devices_cnt, i, tmp) {
 			if (tmp == dev) {
+				/* For a VF use its original BDF# not that of the PF
+				 * which we used for the IOMMU lookup. Strictly speaking
+				 * we could do this for all PCI devices; we only need to
+				 * get the BDF# from the scope table for ACPI matches. */
+				if (pdev->is_virtfn)
+					goto got_pdev;
+
 				*bus = drhd->devices[i].bus;
 				*devfn = drhd->devices[i].devfn;
 				goto out;
diff --git a/drivers/iommu/intel-svm.c b/drivers/iommu/intel-svm.c
index d9939fa9b588..f929879ecae6 100644
--- a/drivers/iommu/intel-svm.c
+++ b/drivers/iommu/intel-svm.c
@@ -39,10 +39,18 @@ int intel_svm_alloc_pasid_tables(struct intel_iommu *iommu)
 	struct page *pages;
 	int order;
 
-	order = ecap_pss(iommu->ecap) + 7 - PAGE_SHIFT;
-	if (order < 0)
-		order = 0;
+	/* Start at 2 because it's defined as 2^(1+PSS) */
+	iommu->pasid_max = 2 << ecap_pss(iommu->ecap);
 
+	/* Eventually I'm promised we will get a multi-level PASID table
+	 * and it won't have to be physically contiguous. Until then,
+	 * limit the size because 8MiB contiguous allocations can be hard
+	 * to come by. The limit of 0x20000, which is 1MiB for each of
+	 * the PASID and PASID-state tables, is somewhat arbitrary. */
+	if (iommu->pasid_max > 0x20000)
+		iommu->pasid_max = 0x20000;
+
+	order = get_order(sizeof(struct pasid_entry) * iommu->pasid_max);
 	pages = alloc_pages(GFP_KERNEL | __GFP_ZERO, order);
 	if (!pages) {
 		pr_warn("IOMMU: %s: Failed to allocate PASID table\n",
@@ -53,6 +61,8 @@ int intel_svm_alloc_pasid_tables(struct intel_iommu *iommu)
 	pr_info("%s: Allocated order %d PASID table.\n", iommu->name, order);
 
 	if (ecap_dis(iommu->ecap)) {
+		/* Just making it explicit... */
+		BUILD_BUG_ON(sizeof(struct pasid_entry) != sizeof(struct pasid_state_entry));
 		pages = alloc_pages(GFP_KERNEL | __GFP_ZERO, order);
 		if (pages)
 			iommu->pasid_state_table = page_address(pages);
@@ -68,11 +78,7 @@ int intel_svm_alloc_pasid_tables(struct intel_iommu *iommu)
 
 int intel_svm_free_pasid_tables(struct intel_iommu *iommu)
 {
-	int order;
-
-	order = ecap_pss(iommu->ecap) + 7 - PAGE_SHIFT;
-	if (order < 0)
-		order = 0;
+	int order = get_order(sizeof(struct pasid_entry) * iommu->pasid_max);
 
 	if (iommu->pasid_table) {
 		free_pages((unsigned long)iommu->pasid_table, order);
@@ -371,8 +377,8 @@ int intel_svm_bind_mm(struct device *dev, int *pasid, int flags, struct svm_dev_
 		}
 		svm->iommu = iommu;
 
-		if (pasid_max > 2 << ecap_pss(iommu->ecap))
-			pasid_max = 2 << ecap_pss(iommu->ecap);
+		if (pasid_max > iommu->pasid_max)
+			pasid_max = iommu->pasid_max;
 
 		/* Do not use PASID 0 in caching mode (virtualised IOMMU) */
 		ret = idr_alloc(&iommu->pasid_idr, svm,
diff --git a/drivers/misc/mei/bus.c b/drivers/misc/mei/bus.c
index a77643954523..e59838231703 100644
--- a/drivers/misc/mei/bus.c
+++ b/drivers/misc/mei/bus.c
@@ -144,7 +144,7 @@ ssize_t __mei_cl_recv(struct mei_cl *cl, u8 *buf, size_t length)
 		mutex_lock(&bus->device_lock);
 
 		if (!mei_cl_is_connected(cl)) {
-			rets = -EBUSY;
+			rets = -ENODEV;
 			goto out;
 		}
 	}
diff --git a/drivers/misc/mei/hw-me-regs.h b/drivers/misc/mei/hw-me-regs.h
index 4e8069866c85..a2661381ddfc 100644
--- a/drivers/misc/mei/hw-me-regs.h
+++ b/drivers/misc/mei/hw-me-regs.h
@@ -66,9 +66,6 @@
 #ifndef _MEI_HW_MEI_REGS_H_
 #define _MEI_HW_MEI_REGS_H_
 
-#define MEI_DEV_ID_KBP        0xA2BA  /* Kaby Point */
-#define MEI_DEV_ID_KBP_2      0xA2BB  /* Kaby Point 2 */
-
 /*
  * MEI device IDs
  */
@@ -124,6 +121,10 @@
 #define MEI_DEV_ID_SPT_2      0x9D3B  /* Sunrise Point 2 */
 #define MEI_DEV_ID_SPT_H      0xA13A  /* Sunrise Point H */
 #define MEI_DEV_ID_SPT_H_2    0xA13B  /* Sunrise Point H 2 */
+
+#define MEI_DEV_ID_KBP        0xA2BA  /* Kaby Point */
+#define MEI_DEV_ID_KBP_2      0xA2BB  /* Kaby Point 2 */
+
 /*
  * MEI HW Section
  */
diff --git a/drivers/misc/mei/hw-me.c b/drivers/misc/mei/hw-me.c
index 25b1997a62cb..36333750c512 100644
--- a/drivers/misc/mei/hw-me.c
+++ b/drivers/misc/mei/hw-me.c
@@ -1258,8 +1258,14 @@ static bool mei_me_fw_type_nm(struct pci_dev *pdev)
 static bool mei_me_fw_type_sps(struct pci_dev *pdev)
 {
 	u32 reg;
-	/* Read ME FW Status check for SPS Firmware */
-	pci_read_config_dword(pdev, PCI_CFG_HFS_1, &reg);
+	unsigned int devfn;
+
+	/*
+	 * Read ME FW Status register to check for SPS Firmware
+	 * The SPS FW is only signaled in pci function 0
+	 */
+	devfn = PCI_DEVFN(PCI_SLOT(pdev->devfn), 0);
+	pci_bus_read_config_dword(pdev->bus, devfn, PCI_CFG_HFS_1, &reg);
 	/* if bits [19:16] = 15, running SPS Firmware */
 	return (reg & 0xf0000) == 0xf0000;
 }
diff --git a/drivers/misc/mei/main.c b/drivers/misc/mei/main.c
index 80f9afcb1382..4ef189a7a2fb 100644
--- a/drivers/misc/mei/main.c
+++ b/drivers/misc/mei/main.c
@@ -207,7 +207,7 @@ static ssize_t mei_read(struct file *file, char __user *ubuf,
 
 		mutex_lock(&dev->device_lock);
 		if (!mei_cl_is_connected(cl)) {
-			rets = -EBUSY;
+			rets = -ENODEV;
 			goto out;
 		}
 	}
diff --git a/drivers/misc/mei/pci-me.c b/drivers/misc/mei/pci-me.c
index 0af3d7d30419..01e20384ac44 100644
--- a/drivers/misc/mei/pci-me.c
+++ b/drivers/misc/mei/pci-me.c
@@ -84,8 +84,8 @@ static const struct pci_device_id mei_me_pci_tbl[] = {
 
 	{MEI_PCI_DEVICE(MEI_DEV_ID_SPT, mei_me_pch8_cfg)},
 	{MEI_PCI_DEVICE(MEI_DEV_ID_SPT_2, mei_me_pch8_cfg)},
-	{MEI_PCI_DEVICE(MEI_DEV_ID_SPT_H, mei_me_pch8_cfg)},
-	{MEI_PCI_DEVICE(MEI_DEV_ID_SPT_H_2, mei_me_pch8_cfg)},
+	{MEI_PCI_DEVICE(MEI_DEV_ID_SPT_H, mei_me_pch8_sps_cfg)},
+	{MEI_PCI_DEVICE(MEI_DEV_ID_SPT_H_2, mei_me_pch8_sps_cfg)},
 
 	{MEI_PCI_DEVICE(MEI_DEV_ID_KBP, mei_me_pch8_cfg)},
 	{MEI_PCI_DEVICE(MEI_DEV_ID_KBP_2, mei_me_pch8_cfg)},
diff --git a/drivers/platform/goldfish/Makefile b/drivers/platform/goldfish/Makefile
index d3487125838c..277a820ee4e1 100644
--- a/drivers/platform/goldfish/Makefile
+++ b/drivers/platform/goldfish/Makefile
@@ -2,4 +2,5 @@
 # Makefile for Goldfish platform specific drivers
 #
 obj-$(CONFIG_GOLDFISH_BUS)	+= pdev_bus.o
-obj-$(CONFIG_GOLDFISH_PIPE)	+= goldfish_pipe.o
+obj-$(CONFIG_GOLDFISH_PIPE)	+= goldfish_pipe_all.o
+goldfish_pipe_all-objs := goldfish_pipe.o goldfish_pipe_v2.o
diff --git a/drivers/platform/goldfish/goldfish_pipe.c b/drivers/platform/goldfish/goldfish_pipe.c
index 3215a33cf4fe..fd1452e28352 100644
--- a/drivers/platform/goldfish/goldfish_pipe.c
+++ b/drivers/platform/goldfish/goldfish_pipe.c
@@ -15,51 +15,11 @@
  *
  */
 
-/* This source file contains the implementation of a special device driver
- * that intends to provide a *very* fast communication channel between the
- * guest system and the QEMU emulator.
- *
- * Usage from the guest is simply the following (error handling simplified):
- *
- *    int  fd = open("/dev/qemu_pipe",O_RDWR);
- *    .... write() or read() through the pipe.
- *
- * This driver doesn't deal with the exact protocol used during the session.
- * It is intended to be as simple as something like:
- *
- *    // do this _just_ after opening the fd to connect to a specific
- *    // emulator service.
- *    const char*  msg = "<pipename>";
- *    if (write(fd, msg, strlen(msg)+1) < 0) {
- *       ... could not connect to <pipename> service
- *       close(fd);
- *    }
- *
- *    // after this, simply read() and write() to communicate with the
- *    // service. Exact protocol details left as an exercise to the reader.
- *
- * This driver is very fast because it doesn't copy any data through
- * intermediate buffers, since the emulator is capable of translating
- * guest user addresses into host ones.
- *
- * Note that we must however ensure that each user page involved in the
- * exchange is properly mapped during a transfer.
+/* This source file contains the implementation of the legacy version of
+ * a goldfish pipe device driver. See goldfish_pipe_v2.c for the current
+ * version.
  */
-
-#include <linux/module.h>
-#include <linux/interrupt.h>
-#include <linux/kernel.h>
-#include <linux/spinlock.h>
-#include <linux/miscdevice.h>
-#include <linux/platform_device.h>
-#include <linux/poll.h>
-#include <linux/sched.h>
-#include <linux/bitops.h>
-#include <linux/slab.h>
-#include <linux/io.h>
-#include <linux/goldfish.h>
-#include <linux/mm.h>
-#include <linux/acpi.h>
+#include "goldfish_pipe.h"
 
 /*
  * IMPORTANT: The following constants must match the ones used and defined
@@ -109,29 +69,15 @@
 #define PIPE_WAKE_READ         (1 << 1)  /* pipe can now be read from */
 #define PIPE_WAKE_WRITE        (1 << 2)  /* pipe can now be written to */
 
-struct access_params {
-	unsigned long channel;
-	u32 size;
-	unsigned long address;
-	u32 cmd;
-	u32 result;
-	/* reserved for future extension */
-	u32 flags;
-};
+#define MAX_PAGES_TO_GRAB 32
 
-/* The global driver data. Holds a reference to the i/o page used to
- * communicate with the emulator, and a wake queue for blocked tasks
- * waiting to be awoken.
- */
-struct goldfish_pipe_dev {
-	spinlock_t lock;
-	unsigned char __iomem *base;
-	struct access_params *aps;
-	int irq;
-	u32 version;
-};
+#define DEBUG 0
 
-static struct goldfish_pipe_dev   pipe_dev[1];
+#if DEBUG
+#define DPRINT(...) { printk(KERN_ERR __VA_ARGS__); }
+#else
+#define DPRINT(...)
+#endif
 
 /* This data type models a given pipe instance */
 struct goldfish_pipe {
@@ -141,6 +87,15 @@ struct goldfish_pipe {
 	wait_queue_head_t wake_queue;
 };
 
+struct access_params {
+	unsigned long channel;
+	u32 size;
+	unsigned long address;
+	u32 cmd;
+	u32 result;
+	/* reserved for future extension */
+	u32 flags;
+};
 
 /* Bit flags for the 'flags' field */
 enum {
@@ -231,8 +186,10 @@ static int setup_access_params_addr(struct platform_device *pdev,
 	if (valid_batchbuffer_addr(dev, aps)) {
 		dev->aps = aps;
 		return 0;
-	} else
+	} else {
+		devm_kfree(&pdev->dev, aps);
 		return -1;
+	}
 }
 
 /* A value that will not be set by qemu emulator */
@@ -269,6 +226,7 @@ static ssize_t goldfish_pipe_read_write(struct file *filp, char __user *buffer,
 	struct goldfish_pipe *pipe = filp->private_data;
 	struct goldfish_pipe_dev *dev = pipe->dev;
 	unsigned long address, address_end;
+	struct page* pages[MAX_PAGES_TO_GRAB] = {};
 	int count = 0, ret = -EINVAL;
 
 	/* If the emulator already closed the pipe, no need to go further */
@@ -292,45 +250,62 @@ static ssize_t goldfish_pipe_read_write(struct file *filp, char __user *buffer,
 	address_end = address + bufflen;
 
 	while (address < address_end) {
-		unsigned long  page_end = (address & PAGE_MASK) + PAGE_SIZE;
-		unsigned long  next     = page_end < address_end ? page_end
-								 : address_end;
-		unsigned long  avail    = next - address;
-		int status, wakeBit;
-
-		struct page *page;
-
-		/* Either vaddr or paddr depending on the device version */
-		unsigned long xaddr;
+		unsigned long page_end = (address & PAGE_MASK) + PAGE_SIZE;
+		unsigned long next, avail;
+		int status, wakeBit, page_i, num_contiguous_pages;
+		long first_page, last_page, requested_pages;
+		unsigned long xaddr, xaddr_prev, xaddr_i;
 
 		/*
-		 * We grab the pages on a page-by-page basis in case user
-		 * space gives us a potentially huge buffer but the read only
-		 * returns a small amount, then there's no need to pin that
-		 * much memory to the process.
+		 * Attempt to grab multiple physically contiguous pages.
 		 */
-		down_read(&current->mm->mmap_sem);
-		ret = get_user_pages(current, current->mm, address, 1,
-				     !is_write, 0, &page, NULL);
-		up_read(&current->mm->mmap_sem);
-		if (ret < 0)
-			return ret;
-
-		if (dev->version) {
-			/* Device version 1 or newer (qemu-android) expects the
-			 * physical address. */
-			xaddr = page_to_phys(page) | (address & ~PAGE_MASK);
-		} else {
-			/* Device version 0 (classic emulator) expects the
-			 * virtual address. */
-			xaddr = address;
+		first_page = address & PAGE_MASK;
+		last_page = (address_end - 1) & PAGE_MASK;
+		requested_pages = ((last_page - first_page) >> PAGE_SHIFT) + 1;
+		if (requested_pages > MAX_PAGES_TO_GRAB) {
+			requested_pages = MAX_PAGES_TO_GRAB;
 		}
+		ret = get_user_pages_fast(first_page, requested_pages,
+				!is_write, pages);
+
+		DPRINT("%s: requested pages: %d %d %p\n", __FUNCTION__,
+			ret, requested_pages, first_page);
+		if (ret == 0) {
+			DPRINT("%s: error: (requested pages == 0) (wanted %d)\n",
+					__FUNCTION__, requested_pages);
+			mutex_unlock(&pipe->lock);
+			return ret;
+		}
+		if (ret < 0) {
+			DPRINT("%s: (requested pages < 0) %d \n",
+				 	__FUNCTION__, requested_pages);
+			mutex_unlock(&pipe->lock);
+			return ret;
+		}
+
+		xaddr = page_to_phys(pages[0]) | (address & ~PAGE_MASK);
+		xaddr_prev = xaddr;
+		num_contiguous_pages = ret == 0 ? 0 : 1;
+		for (page_i = 1; page_i < ret; page_i++) {
+			xaddr_i = page_to_phys(pages[page_i]) | (address & ~PAGE_MASK);
+			if (xaddr_i == xaddr_prev + PAGE_SIZE) {
+				page_end += PAGE_SIZE;
+				xaddr_prev = xaddr_i;
+				num_contiguous_pages++;
+			} else {
+				DPRINT("%s: discontinuous page boundary: %d pages instead\n",
+						__FUNCTION__, page_i);
+				break;
+			}
+		}
+		next = page_end < address_end ? page_end : address_end;
+		avail = next - address;
 
 		/* Now, try to transfer the bytes in the current page */
 		spin_lock_irqsave(&dev->lock, irq_flags);
 		if (access_with_param(dev,
-				      is_write ? CMD_WRITE_BUFFER : CMD_READ_BUFFER,
-				      xaddr, avail, pipe, &status)) {
+					is_write ? CMD_WRITE_BUFFER : CMD_READ_BUFFER,
+					xaddr, avail, pipe, &status)) {
 			gf_write_ptr(pipe, dev->base + PIPE_REG_CHANNEL,
 				     dev->base + PIPE_REG_CHANNEL_HIGH);
 			writel(avail, dev->base + PIPE_REG_SIZE);
@@ -343,9 +318,13 @@ static ssize_t goldfish_pipe_read_write(struct file *filp, char __user *buffer,
 		}
 		spin_unlock_irqrestore(&dev->lock, irq_flags);
 
-		if (status > 0 && !is_write)
-			set_page_dirty(page);
-		put_page(page);
+		for (page_i = 0; page_i < ret; page_i++) {
+			if (status > 0 && !is_write &&
+				page_i < num_contiguous_pages) {
+				set_page_dirty(pages[page_i]);
+			}
+			put_page(pages[page_i]);
+		}
 
 		if (status > 0) { /* Correct transfer */
 			count += status;
@@ -367,7 +346,7 @@ static ssize_t goldfish_pipe_read_write(struct file *filp, char __user *buffer,
 			 */
 			if (status != PIPE_ERROR_AGAIN)
 				pr_info_ratelimited("goldfish_pipe: backend returned error %d on %s\n",
-					    status, is_write ? "write" : "read");
+						status, is_write ? "write" : "read");
 			ret = 0;
 			break;
 		}
@@ -377,7 +356,7 @@ static ssize_t goldfish_pipe_read_write(struct file *filp, char __user *buffer,
 		 * non-blocking mode, just return the error code.
 		 */
 		if (status != PIPE_ERROR_AGAIN ||
-			(filp->f_flags & O_NONBLOCK) != 0) {
+				(filp->f_flags & O_NONBLOCK) != 0) {
 			ret = goldfish_pipe_error_convert(status);
 			break;
 		}
@@ -391,7 +370,7 @@ static ssize_t goldfish_pipe_read_write(struct file *filp, char __user *buffer,
 
 		/* Tell the emulator we're going to wait for a wake event */
 		goldfish_cmd(pipe,
-			     is_write ? CMD_WAKE_ON_WRITE : CMD_WAKE_ON_READ);
+				is_write ? CMD_WAKE_ON_WRITE : CMD_WAKE_ON_READ);
 
 		/* Unlock the pipe, then wait for the wake signal */
 		mutex_unlock(&pipe->lock);
@@ -399,22 +378,16 @@ static ssize_t goldfish_pipe_read_write(struct file *filp, char __user *buffer,
 		while (test_bit(wakeBit, &pipe->flags)) {
 			if (wait_event_interruptible(
 					pipe->wake_queue,
-					!test_bit(wakeBit, &pipe->flags))) {
-				ret = -ERESTARTSYS;
-				break;
-			}
+					!test_bit(wakeBit, &pipe->flags)))
+				return -ERESTARTSYS;
 
-			if (test_bit(BIT_CLOSED_ON_HOST, &pipe->flags)) {
-				ret = -EIO;
-				break;
-			}
+			if (test_bit(BIT_CLOSED_ON_HOST, &pipe->flags))
+				return -EIO;
 		}
 
 		/* Try to re-acquire the lock */
-		if (mutex_lock_interruptible(&pipe->lock)) {
-			ret = -ERESTARTSYS;
-			break;
-		}
+		if (mutex_lock_interruptible(&pipe->lock))
+			return -ERESTARTSYS;
 	}
 	mutex_unlock(&pipe->lock);
 
@@ -543,6 +516,8 @@ static int goldfish_pipe_open(struct inode *inode, struct file *file)
 
 	pipe->dev = dev;
 	mutex_init(&pipe->lock);
+	DPRINT("%s: call. pipe_dev pipe_dev=0x%lx new_pipe_addr=0x%lx file=0x%lx\n", __FUNCTION__, pipe_dev, pipe, file);
+	// spin lock init, write head of list, i guess
 	init_waitqueue_head(&pipe->wake_queue);
 
 	/*
@@ -565,6 +540,7 @@ static int goldfish_pipe_release(struct inode *inode, struct file *filp)
 {
 	struct goldfish_pipe *pipe = filp->private_data;
 
+	DPRINT("%s: call. pipe=0x%lx file=0x%lx\n", __FUNCTION__, pipe, filp);
 	/* The guest is closing the channel, so tell the emulator right now */
 	goldfish_cmd(pipe, CMD_CLOSE);
 	kfree(pipe);
@@ -581,96 +557,33 @@ static const struct file_operations goldfish_pipe_fops = {
 	.release = goldfish_pipe_release,
 };
 
-static struct miscdevice goldfish_pipe_device = {
+static struct miscdevice goldfish_pipe_dev = {
 	.minor = MISC_DYNAMIC_MINOR,
 	.name = "goldfish_pipe",
 	.fops = &goldfish_pipe_fops,
 };
 
-static int goldfish_pipe_probe(struct platform_device *pdev)
+int goldfish_pipe_device_init_v1(struct platform_device *pdev)
 {
-	int err;
-	struct resource *r;
 	struct goldfish_pipe_dev *dev = pipe_dev;
-
-	/* not thread safe, but this should not happen */
-	WARN_ON(dev->base != NULL);
-
-	spin_lock_init(&dev->lock);
-
-	r = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	if (r == NULL || resource_size(r) < PAGE_SIZE) {
-		dev_err(&pdev->dev, "can't allocate i/o page\n");
-		return -EINVAL;
-	}
-	dev->base = devm_ioremap(&pdev->dev, r->start, PAGE_SIZE);
-	if (dev->base == NULL) {
-		dev_err(&pdev->dev, "ioremap failed\n");
-		return -EINVAL;
-	}
-
-	r = platform_get_resource(pdev, IORESOURCE_IRQ, 0);
-	if (r == NULL) {
-		err = -EINVAL;
-		goto error;
-	}
-	dev->irq = r->start;
-
-	err = devm_request_irq(&pdev->dev, dev->irq, goldfish_pipe_interrupt,
+	int err = devm_request_irq(&pdev->dev, dev->irq, goldfish_pipe_interrupt,
 				IRQF_SHARED, "goldfish_pipe", dev);
 	if (err) {
-		dev_err(&pdev->dev, "unable to allocate IRQ\n");
-		goto error;
+		dev_err(&pdev->dev, "unable to allocate IRQ for v1\n");
+		return err;
 	}
 
-	err = misc_register(&goldfish_pipe_device);
+	err = misc_register(&goldfish_pipe_dev);
 	if (err) {
-		dev_err(&pdev->dev, "unable to register device\n");
-		goto error;
+		dev_err(&pdev->dev, "unable to register v1 device\n");
+		return err;
 	}
+
 	setup_access_params_addr(pdev, dev);
-
-	/* Although the pipe device in the classic Android emulator does not
-	 * recognize the 'version' register, it won't treat this as an error
-	 * either and will simply return 0, which is fine. */
-	dev->version = readl(dev->base + PIPE_REG_VERSION);
 	return 0;
-
-error:
-	dev->base = NULL;
-	return err;
 }
 
-static int goldfish_pipe_remove(struct platform_device *pdev)
+void goldfish_pipe_device_deinit_v1(struct platform_device *pdev)
 {
-	struct goldfish_pipe_dev *dev = pipe_dev;
-	misc_deregister(&goldfish_pipe_device);
-	dev->base = NULL;
-	return 0;
+    misc_deregister(&goldfish_pipe_dev);
 }
-
-static const struct acpi_device_id goldfish_pipe_acpi_match[] = {
-	{ "GFSH0003", 0 },
-	{ },
-};
-MODULE_DEVICE_TABLE(acpi, goldfish_pipe_acpi_match);
-
-static const struct of_device_id goldfish_pipe_of_match[] = {
-	{ .compatible = "generic,android-pipe", },
-	{},
-};
-MODULE_DEVICE_TABLE(of, goldfish_pipe_of_match);
-
-static struct platform_driver goldfish_pipe = {
-	.probe = goldfish_pipe_probe,
-	.remove = goldfish_pipe_remove,
-	.driver = {
-		.name = "goldfish_pipe",
-		.of_match_table = goldfish_pipe_of_match,
-		.acpi_match_table = ACPI_PTR(goldfish_pipe_acpi_match),
-	}
-};
-
-module_platform_driver(goldfish_pipe);
-MODULE_AUTHOR("David Turner <digit@google.com>");
-MODULE_LICENSE("GPL");
diff --git a/drivers/platform/goldfish/goldfish_pipe.h b/drivers/platform/goldfish/goldfish_pipe.h
new file mode 100644
index 000000000000..9b75a51dba24
--- /dev/null
+++ b/drivers/platform/goldfish/goldfish_pipe.h
@@ -0,0 +1,91 @@
+/*
+ * Copyright (C) 2016 Google, Inc.
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+#ifndef GOLDFISH_PIPE_H
+#define GOLDFISH_PIPE_H
+
+#include <linux/module.h>
+#include <linux/interrupt.h>
+#include <linux/kernel.h>
+#include <linux/spinlock.h>
+#include <linux/miscdevice.h>
+#include <linux/platform_device.h>
+#include <linux/poll.h>
+#include <linux/sched.h>
+#include <linux/bitops.h>
+#include <linux/slab.h>
+#include <linux/io.h>
+#include <linux/goldfish.h>
+#include <linux/mm.h>
+#include <linux/acpi.h>
+
+
+/* Initialize the legacy version of the pipe device driver */
+int goldfish_pipe_device_init_v1(struct platform_device *pdev);
+
+/* Deinitialize the legacy version of the pipe device driver */
+void goldfish_pipe_device_deinit_v1(struct platform_device *pdev);
+
+/* Forward declarations for the device struct */
+struct goldfish_pipe;
+struct goldfish_pipe_device_buffers;
+
+/* The global driver data. Holds a reference to the i/o page used to
+ * communicate with the emulator, and a wake queue for blocked tasks
+ * waiting to be awoken.
+ */
+struct goldfish_pipe_dev {
+	/*
+	 * Global device spinlock. Protects the following members:
+	 *  - pipes, pipes_capacity
+	 *  - [*pipes, *pipes + pipes_capacity) - array data
+	 *  - first_signalled_pipe,
+	 *      goldfish_pipe::prev_signalled,
+	 *      goldfish_pipe::next_signalled,
+	 *      goldfish_pipe::signalled_flags - all singnalled-related fields,
+	 *                                       in all allocated pipes
+	 *  - open_command_params - PIPE_CMD_OPEN-related buffers
+	 *
+	 * It looks like a lot of different fields, but the trick is that the only
+	 * operation that happens often is the signalled pipes array manipulation.
+	 * That's why it's OK for now to keep the rest of the fields under the same
+	 * lock. If we notice too much contention because of PIPE_CMD_OPEN,
+	 * then we should add a separate lock there.
+	 */
+	spinlock_t lock;
+
+	/*
+	 * Array of the pipes of |pipes_capacity| elements,
+	 * indexed by goldfish_pipe::id
+	 */
+	struct goldfish_pipe **pipes;
+	u32 pipes_capacity;
+
+	/* Pointers to the buffers host uses for interaction with this driver */
+	struct goldfish_pipe_dev_buffers *buffers;
+
+	/* Head of a doubly linked list of signalled pipes */
+	struct goldfish_pipe *first_signalled_pipe;
+
+	/* Some device-specific data */
+	int irq;
+	int version;
+	unsigned char __iomem *base;
+
+	/* v1-specific access parameters */
+	struct access_params *aps;
+};
+
+extern struct goldfish_pipe_dev pipe_dev[1];
+
+#endif /* GOLDFISH_PIPE_H */
diff --git a/drivers/platform/goldfish/goldfish_pipe_v2.c b/drivers/platform/goldfish/goldfish_pipe_v2.c
new file mode 100644
index 000000000000..ad373ed36555
--- /dev/null
+++ b/drivers/platform/goldfish/goldfish_pipe_v2.c
@@ -0,0 +1,889 @@
+/*
+ * Copyright (C) 2012 Intel, Inc.
+ * Copyright (C) 2013 Intel, Inc.
+ * Copyright (C) 2014 Linaro Limited
+ * Copyright (C) 2011-2016 Google, Inc.
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+/* This source file contains the implementation of a special device driver
+ * that intends to provide a *very* fast communication channel between the
+ * guest system and the QEMU emulator.
+ *
+ * Usage from the guest is simply the following (error handling simplified):
+ *
+ *    int  fd = open("/dev/qemu_pipe",O_RDWR);
+ *    .... write() or read() through the pipe.
+ *
+ * This driver doesn't deal with the exact protocol used during the session.
+ * It is intended to be as simple as something like:
+ *
+ *    // do this _just_ after opening the fd to connect to a specific
+ *    // emulator service.
+ *    const char*  msg = "<pipename>";
+ *    if (write(fd, msg, strlen(msg)+1) < 0) {
+ *       ... could not connect to <pipename> service
+ *       close(fd);
+ *    }
+ *
+ *    // after this, simply read() and write() to communicate with the
+ *    // service. Exact protocol details left as an exercise to the reader.
+ *
+ * This driver is very fast because it doesn't copy any data through
+ * intermediate buffers, since the emulator is capable of translating
+ * guest user addresses into host ones.
+ *
+ * Note that we must however ensure that each user page involved in the
+ * exchange is properly mapped during a transfer.
+ */
+
+#include "goldfish_pipe.h"
+
+
+/*
+ * Update this when something changes in the driver's behavior so the host
+ * can benefit from knowing it
+ */
+enum {
+	PIPE_DRIVER_VERSION = 2,
+	PIPE_CURRENT_DEVICE_VERSION = 2
+};
+
+/*
+ * IMPORTANT: The following constants must match the ones used and defined
+ * in external/qemu/hw/goldfish_pipe.c in the Android source tree.
+ */
+
+/* List of bitflags returned in status of CMD_POLL command */
+enum PipePollFlags {
+	PIPE_POLL_IN	= 1 << 0,
+	PIPE_POLL_OUT	= 1 << 1,
+	PIPE_POLL_HUP	= 1 << 2
+};
+
+/* Possible status values used to signal errors - see goldfish_pipe_error_convert */
+enum PipeErrors {
+	PIPE_ERROR_INVAL  = -1,
+	PIPE_ERROR_AGAIN  = -2,
+	PIPE_ERROR_NOMEM  = -3,
+	PIPE_ERROR_IO     = -4
+};
+
+/* Bit-flags used to signal events from the emulator */
+enum PipeWakeFlags {
+	PIPE_WAKE_CLOSED = 1 << 0,  /* emulator closed pipe */
+	PIPE_WAKE_READ   = 1 << 1,  /* pipe can now be read from */
+	PIPE_WAKE_WRITE  = 1 << 2  /* pipe can now be written to */
+};
+
+/* Bit flags for the 'flags' field */
+enum PipeFlagsBits {
+	BIT_CLOSED_ON_HOST = 0,  /* pipe closed by host */
+	BIT_WAKE_ON_WRITE  = 1,  /* want to be woken on writes */
+	BIT_WAKE_ON_READ   = 2,  /* want to be woken on reads */
+};
+
+enum PipeRegs {
+	PIPE_REG_CMD = 0,
+
+	PIPE_REG_SIGNAL_BUFFER_HIGH = 4,
+	PIPE_REG_SIGNAL_BUFFER = 8,
+	PIPE_REG_SIGNAL_BUFFER_COUNT = 12,
+
+	PIPE_REG_OPEN_BUFFER_HIGH = 20,
+	PIPE_REG_OPEN_BUFFER = 24,
+
+	PIPE_REG_VERSION = 36,
+
+	PIPE_REG_GET_SIGNALLED = 48,
+};
+
+enum PipeCmdCode {
+	PIPE_CMD_OPEN = 1,	/* to be used by the pipe device itself */
+	PIPE_CMD_CLOSE,
+	PIPE_CMD_POLL,
+	PIPE_CMD_WRITE,
+	PIPE_CMD_WAKE_ON_WRITE,
+	PIPE_CMD_READ,
+	PIPE_CMD_WAKE_ON_READ,
+
+	/*
+	 * TODO(zyy): implement a deferred read/write execution to allow parallel
+	 *  processing of pipe operations on the host.
+	*/
+	PIPE_CMD_WAKE_ON_DONE_IO,
+};
+
+enum {
+	MAX_BUFFERS_PER_COMMAND = 336,
+	MAX_SIGNALLED_PIPES = 64,
+	INITIAL_PIPES_CAPACITY = 64
+};
+
+struct goldfish_pipe_dev;
+struct goldfish_pipe;
+struct goldfish_pipe_command;
+
+/* A per-pipe command structure, shared with the host */
+struct goldfish_pipe_command {
+	s32 cmd;		/* PipeCmdCode, guest -> host */
+	s32 id;			/* pipe id, guest -> host */
+	s32 status;		/* command execution status, host -> guest */
+	s32 reserved;	/* to pad to 64-bit boundary */
+	union {
+		/* Parameters for PIPE_CMD_{READ,WRITE} */
+		struct {
+			u32 buffers_count;					/* number of buffers, guest -> host */
+			s32 consumed_size;					/* number of consumed bytes, host -> guest */
+			u64 ptrs[MAX_BUFFERS_PER_COMMAND]; 	/* buffer pointers, guest -> host */
+			u32 sizes[MAX_BUFFERS_PER_COMMAND];	/* buffer sizes, guest -> host */
+		} rw_params;
+	};
+};
+
+/* A single signalled pipe information */
+struct signalled_pipe_buffer {
+	u32 id;
+	u32 flags;
+};
+
+/* Parameters for the PIPE_CMD_OPEN command */
+struct open_command_param {
+	u64 command_buffer_ptr;
+	u32 rw_params_max_count;
+};
+
+/* Device-level set of buffers shared with the host */
+struct goldfish_pipe_dev_buffers {
+	struct open_command_param open_command_params;
+	struct signalled_pipe_buffer signalled_pipe_buffers[MAX_SIGNALLED_PIPES];
+};
+
+/* This data type models a given pipe instance */
+struct goldfish_pipe {
+	u32 id;							/* pipe ID - index into goldfish_pipe_dev::pipes array */
+	unsigned long flags;			/* The wake flags pipe is waiting for
+									 * Note: not protected with any lock, uses atomic operations
+									 *  and barriers to make it thread-safe.
+									 */
+	unsigned long signalled_flags;	/* wake flags host have signalled,
+									 *  - protected by goldfish_pipe_dev::lock */
+
+	struct goldfish_pipe_command *command_buffer;	/* A pointer to command buffer */
+
+	/* doubly linked list of signalled pipes, protected by goldfish_pipe_dev::lock */
+	struct goldfish_pipe *prev_signalled;
+	struct goldfish_pipe *next_signalled;
+
+	/*
+	 * A pipe's own lock. Protects the following:
+	 *  - *command_buffer - makes sure a command can safely write its parameters
+	 *    to the host and read the results back.
+	 */
+	struct mutex lock;
+
+	wait_queue_head_t wake_queue;	/* A wake queue for sleeping until host signals an event */
+	struct goldfish_pipe_dev *dev;	/* Pointer to the parent goldfish_pipe_dev instance */
+};
+
+struct goldfish_pipe_dev pipe_dev[1] = {};
+
+static int goldfish_cmd_locked(struct goldfish_pipe *pipe, enum PipeCmdCode cmd)
+{
+	pipe->command_buffer->cmd = cmd;
+	pipe->command_buffer->status = PIPE_ERROR_INVAL;	/* failure by default */
+	writel(pipe->id, pipe->dev->base + PIPE_REG_CMD);
+	return pipe->command_buffer->status;
+}
+
+static int goldfish_cmd(struct goldfish_pipe *pipe, enum PipeCmdCode cmd)
+{
+	int status;
+	if (mutex_lock_interruptible(&pipe->lock))
+		return PIPE_ERROR_IO;
+	status = goldfish_cmd_locked(pipe, cmd);
+	mutex_unlock(&pipe->lock);
+	return status;
+}
+
+/*
+ * This function converts an error code returned by the emulator through
+ * the PIPE_REG_STATUS i/o register into a valid negative errno value.
+ */
+static int goldfish_pipe_error_convert(int status)
+{
+	switch (status) {
+	case PIPE_ERROR_AGAIN:
+		return -EAGAIN;
+	case PIPE_ERROR_NOMEM:
+		return -ENOMEM;
+	case PIPE_ERROR_IO:
+		return -EIO;
+	default:
+		return -EINVAL;
+	}
+}
+
+static int pin_user_pages(unsigned long first_page, unsigned long last_page,
+	unsigned last_page_size, int is_write,
+	struct page *pages[MAX_BUFFERS_PER_COMMAND], unsigned *iter_last_page_size)
+{
+	int ret;
+	int requested_pages = ((last_page - first_page) >> PAGE_SHIFT) + 1;
+	if (requested_pages > MAX_BUFFERS_PER_COMMAND) {
+		requested_pages = MAX_BUFFERS_PER_COMMAND;
+		*iter_last_page_size = PAGE_SIZE;
+	} else {
+		*iter_last_page_size = last_page_size;
+	}
+
+	ret = get_user_pages_fast(
+			first_page, requested_pages, !is_write, pages);
+	if (ret <= 0)
+		return -EFAULT;
+	if (ret < requested_pages)
+		*iter_last_page_size = PAGE_SIZE;
+	return ret;
+
+}
+
+static void release_user_pages(struct page **pages, int pages_count,
+	int is_write, s32 consumed_size)
+{
+	int i;
+	for (i = 0; i < pages_count; i++) {
+		if (!is_write && consumed_size > 0) {
+			set_page_dirty(pages[i]);
+		}
+		put_page(pages[i]);
+	}
+}
+
+/* Populate the call parameters, merging adjacent pages together */
+static void populate_rw_params(
+	struct page **pages, int pages_count,
+	unsigned long address, unsigned long address_end,
+	unsigned long first_page, unsigned long last_page,
+	unsigned iter_last_page_size, int is_write,
+	struct goldfish_pipe_command *command)
+{
+	/*
+	 * Process the first page separately - it's the only page that
+	 * needs special handling for its start address.
+	 */
+	unsigned long xaddr = page_to_phys(pages[0]);
+	unsigned long xaddr_prev = xaddr;
+	int buffer_idx = 0;
+	int i = 1;
+	int size_on_page = first_page == last_page
+			? (int)(address_end - address)
+			: (PAGE_SIZE - (address & ~PAGE_MASK));
+	command->rw_params.ptrs[0] = (u64)(xaddr | (address & ~PAGE_MASK));
+	command->rw_params.sizes[0] = size_on_page;
+	for (; i < pages_count; ++i) {
+		xaddr = page_to_phys(pages[i]);
+		size_on_page = (i == pages_count - 1) ? iter_last_page_size : PAGE_SIZE;
+		if (xaddr == xaddr_prev + PAGE_SIZE) {
+			command->rw_params.sizes[buffer_idx] += size_on_page;
+		} else {
+			++buffer_idx;
+			command->rw_params.ptrs[buffer_idx] = (u64)xaddr;
+			command->rw_params.sizes[buffer_idx] = size_on_page;
+		}
+		xaddr_prev = xaddr;
+	}
+	command->rw_params.buffers_count = buffer_idx + 1;
+}
+
+static int transfer_max_buffers(struct goldfish_pipe* pipe,
+	unsigned long address, unsigned long address_end, int is_write,
+	unsigned long last_page, unsigned int last_page_size,
+	s32* consumed_size, int* status)
+{
+	struct page *pages[MAX_BUFFERS_PER_COMMAND];
+	unsigned long first_page = address & PAGE_MASK;
+	unsigned int iter_last_page_size;
+	int pages_count = pin_user_pages(first_page, last_page,
+			last_page_size, is_write,
+			pages, &iter_last_page_size);
+	if (pages_count < 0)
+		return pages_count;
+
+	/* Serialize access to the pipe command buffers */
+	if (mutex_lock_interruptible(&pipe->lock))
+		return -ERESTARTSYS;
+
+	populate_rw_params(pages, pages_count, address, address_end,
+		first_page, last_page, iter_last_page_size, is_write,
+		pipe->command_buffer);
+
+	/* Transfer the data */
+	*status = goldfish_cmd_locked(pipe,
+						is_write ? PIPE_CMD_WRITE : PIPE_CMD_READ);
+
+	*consumed_size = pipe->command_buffer->rw_params.consumed_size;
+
+	mutex_unlock(&pipe->lock);
+
+	release_user_pages(pages, pages_count, is_write, *consumed_size);
+
+	return 0;
+}
+
+static int wait_for_host_signal(struct goldfish_pipe *pipe, int is_write)
+{
+	u32 wakeBit = is_write ? BIT_WAKE_ON_WRITE : BIT_WAKE_ON_READ;
+	set_bit(wakeBit, &pipe->flags);
+
+	/* Tell the emulator we're going to wait for a wake event */
+	(void)goldfish_cmd(pipe,
+			is_write ? PIPE_CMD_WAKE_ON_WRITE : PIPE_CMD_WAKE_ON_READ);
+
+	while (test_bit(wakeBit, &pipe->flags)) {
+		if (wait_event_interruptible(
+				pipe->wake_queue,
+				!test_bit(wakeBit, &pipe->flags)))
+			return -ERESTARTSYS;
+
+		if (test_bit(BIT_CLOSED_ON_HOST, &pipe->flags))
+			return -EIO;
+	}
+
+	return 0;
+}
+
+static ssize_t goldfish_pipe_read_write(struct file *filp,
+	char __user *buffer, size_t bufflen, int is_write)
+{
+	struct goldfish_pipe *pipe = filp->private_data;
+	int count = 0, ret = -EINVAL;
+	unsigned long address, address_end, last_page;
+	unsigned int last_page_size;
+
+	/* If the emulator already closed the pipe, no need to go further */
+	if (unlikely(test_bit(BIT_CLOSED_ON_HOST, &pipe->flags)))
+		return -EIO;
+	/* Null reads or writes succeeds */
+	if (unlikely(bufflen == 0))
+		return 0;
+	/* Check the buffer range for access */
+	if (unlikely(!access_ok(is_write ? VERIFY_WRITE : VERIFY_READ,
+			buffer, bufflen)))
+		return -EFAULT;
+
+	address = (unsigned long)buffer;
+	address_end = address + bufflen;
+	last_page = (address_end - 1) & PAGE_MASK;
+	last_page_size = ((address_end - 1) & ~PAGE_MASK) + 1;
+
+	while (address < address_end) {
+		s32 consumed_size;
+		int status;
+		ret = transfer_max_buffers(pipe, address, address_end, is_write,
+				last_page, last_page_size, &consumed_size, &status);
+		if (ret < 0)
+			break;
+
+		if (consumed_size > 0) {
+			/* No matter what's the status, we've transfered something */
+			count += consumed_size;
+			address += consumed_size;
+		}
+		if (status > 0)
+			continue;
+		if (status == 0) {
+			/* EOF */
+			ret = 0;
+			break;
+		}
+		if (count > 0) {
+			/*
+			 * An error occured, but we already transfered
+			 * something on one of the previous iterations.
+			 * Just return what we already copied and log this
+			 * err.
+			 */
+			if (status != PIPE_ERROR_AGAIN)
+				pr_info_ratelimited("goldfish_pipe: backend error %d on %s\n",
+									status, is_write ? "write" : "read");
+			break;
+		}
+
+		/*
+		 * If the error is not PIPE_ERROR_AGAIN, or if we are in
+		 * non-blocking mode, just return the error code.
+		 */
+		if (status != PIPE_ERROR_AGAIN || (filp->f_flags & O_NONBLOCK) != 0) {
+			ret = goldfish_pipe_error_convert(status);
+			break;
+		}
+
+		status = wait_for_host_signal(pipe, is_write);
+		if (status < 0)
+			return status;
+	}
+
+	if (count > 0)
+		return count;
+	return ret;
+}
+
+static ssize_t goldfish_pipe_read(struct file *filp, char __user *buffer,
+				size_t bufflen, loff_t *ppos)
+{
+	return goldfish_pipe_read_write(filp, buffer, bufflen, /* is_write */ 0);
+}
+
+static ssize_t goldfish_pipe_write(struct file *filp,
+				const char __user *buffer, size_t bufflen,
+				loff_t *ppos)
+{
+	return goldfish_pipe_read_write(filp,
+			/* cast away the const */(char __user *)buffer, bufflen,
+			/* is_write */ 1);
+}
+
+static unsigned int goldfish_pipe_poll(struct file *filp, poll_table *wait)
+{
+	struct goldfish_pipe *pipe = filp->private_data;
+	unsigned int mask = 0;
+	int status;
+
+	poll_wait(filp, &pipe->wake_queue, wait);
+
+	status = goldfish_cmd(pipe, PIPE_CMD_POLL);
+	if (status < 0) {
+		return -ERESTARTSYS;
+	}
+
+	if (status & PIPE_POLL_IN)
+		mask |= POLLIN | POLLRDNORM;
+	if (status & PIPE_POLL_OUT)
+		mask |= POLLOUT | POLLWRNORM;
+	if (status & PIPE_POLL_HUP)
+		mask |= POLLHUP;
+	if (test_bit(BIT_CLOSED_ON_HOST, &pipe->flags))
+		mask |= POLLERR;
+
+	return mask;
+}
+
+static void signalled_pipes_add_locked(struct goldfish_pipe_dev *dev,
+	u32 id, u32 flags)
+{
+	struct goldfish_pipe *pipe;
+
+	BUG_ON(id >= dev->pipes_capacity);
+
+	pipe = dev->pipes[id];
+	if (!pipe)
+		return;
+	pipe->signalled_flags |= flags;
+
+	if (pipe->prev_signalled || pipe->next_signalled
+		|| dev->first_signalled_pipe == pipe)
+		return;	/* already in the list */
+	pipe->next_signalled = dev->first_signalled_pipe;
+	if (dev->first_signalled_pipe) {
+		dev->first_signalled_pipe->prev_signalled = pipe;
+	}
+	dev->first_signalled_pipe = pipe;
+}
+
+static void signalled_pipes_remove_locked(struct goldfish_pipe_dev *dev,
+	struct goldfish_pipe *pipe) {
+	if (pipe->prev_signalled)
+		pipe->prev_signalled->next_signalled = pipe->next_signalled;
+	if (pipe->next_signalled)
+		pipe->next_signalled->prev_signalled = pipe->prev_signalled;
+	if (pipe == dev->first_signalled_pipe)
+		dev->first_signalled_pipe = pipe->next_signalled;
+	pipe->prev_signalled = NULL;
+	pipe->next_signalled = NULL;
+}
+
+static struct goldfish_pipe *signalled_pipes_pop_front(struct goldfish_pipe_dev *dev,
+		int *wakes)
+{
+	struct goldfish_pipe *pipe;
+	unsigned long flags;
+	spin_lock_irqsave(&dev->lock, flags);
+
+	pipe = dev->first_signalled_pipe;
+	if (pipe) {
+		*wakes = pipe->signalled_flags;
+		pipe->signalled_flags = 0;
+		/*
+		 * This is an optimized version of signalled_pipes_remove_locked() -
+		 * we want to make it as fast as possible to wake the sleeping pipe
+		 * operations faster
+		 */
+		dev->first_signalled_pipe = pipe->next_signalled;
+		if (dev->first_signalled_pipe)
+			dev->first_signalled_pipe->prev_signalled = NULL;
+		pipe->next_signalled = NULL;
+	}
+
+	spin_unlock_irqrestore(&dev->lock, flags);
+	return pipe;
+}
+
+static void goldfish_interrupt_task(unsigned long unused)
+{
+	struct goldfish_pipe_dev *dev = pipe_dev;
+	/* Iterate over the signalled pipes and wake them one by one */
+	struct goldfish_pipe *pipe;
+	int wakes;
+	while ((pipe = signalled_pipes_pop_front(dev, &wakes)) != NULL) {
+		if (wakes & PIPE_WAKE_CLOSED) {
+			pipe->flags = 1 << BIT_CLOSED_ON_HOST;
+		} else {
+			if (wakes & PIPE_WAKE_READ)
+				clear_bit(BIT_WAKE_ON_READ, &pipe->flags);
+			if (wakes & PIPE_WAKE_WRITE)
+				clear_bit(BIT_WAKE_ON_WRITE, &pipe->flags);
+		}
+		/*
+		 * wake_up_interruptible() implies a write barrier, so don't explicitly
+		 * add another one here.
+		 */
+		wake_up_interruptible(&pipe->wake_queue);
+	}
+}
+DECLARE_TASKLET(goldfish_interrupt_tasklet, goldfish_interrupt_task, 0);
+
+/*
+ * The general idea of the interrupt handling:
+ *
+ *  1. device raises an interrupt if there's at least one signalled pipe
+ *  2. IRQ handler reads the signalled pipes and their count from the device
+ *  3. device writes them into a shared buffer and returns the count
+ *      it only resets the IRQ if it has returned all signalled pipes,
+ *      otherwise it leaves it raised, so IRQ handler will be called
+ *      again for the next chunk
+ *  4. IRQ handler adds all returned pipes to the device's signalled pipes list
+ *  5. IRQ handler launches a tasklet to process the signalled pipes from the
+ *      list in a separate context
+ */
+static irqreturn_t goldfish_pipe_interrupt(int irq, void *dev_id)
+{
+	u32 count;
+	u32 i;
+	unsigned long flags;
+	struct goldfish_pipe_dev *dev = dev_id;
+	if (dev != pipe_dev)
+		return IRQ_NONE;
+
+	/* Request the signalled pipes from the device */
+	spin_lock_irqsave(&dev->lock, flags);
+
+	count = readl(dev->base + PIPE_REG_GET_SIGNALLED);
+	if (count == 0) {
+		spin_unlock_irqrestore(&dev->lock, flags);
+		return IRQ_NONE;
+	}
+	if (count > MAX_SIGNALLED_PIPES)
+		count = MAX_SIGNALLED_PIPES;
+
+	for (i = 0; i < count; ++i)
+		signalled_pipes_add_locked(dev,
+			dev->buffers->signalled_pipe_buffers[i].id,
+			dev->buffers->signalled_pipe_buffers[i].flags);
+
+	spin_unlock_irqrestore(&dev->lock, flags);
+
+	tasklet_schedule(&goldfish_interrupt_tasklet);
+	return IRQ_HANDLED;
+}
+
+static int get_free_pipe_id_locked(struct goldfish_pipe_dev *dev)
+{
+	int id;
+	for (id = 0; id < dev->pipes_capacity; ++id)
+		if (!dev->pipes[id])
+			return id;
+
+	{
+		/* Reallocate the array */
+		u32 new_capacity = 2 * dev->pipes_capacity;
+		struct goldfish_pipe **pipes =
+				kcalloc(new_capacity, sizeof(*pipes),
+					GFP_ATOMIC);
+		if (!pipes)
+			return -ENOMEM;
+		memcpy(pipes, dev->pipes, sizeof(*pipes) * dev->pipes_capacity);
+		kfree(dev->pipes);
+		dev->pipes = pipes;
+		id = dev->pipes_capacity;
+		dev->pipes_capacity = new_capacity;
+	}
+	return id;
+}
+
+/**
+ *	goldfish_pipe_open - open a channel to the AVD
+ *	@inode: inode of device
+ *	@file: file struct of opener
+ *
+ *	Create a new pipe link between the emulator and the use application.
+ *	Each new request produces a new pipe.
+ *
+ *	Note: we use the pipe ID as a mux. All goldfish emulations are 32bit
+ *	right now so this is fine. A move to 64bit will need this addressing
+ */
+static int goldfish_pipe_open(struct inode *inode, struct file *file)
+{
+	struct goldfish_pipe_dev *dev = pipe_dev;
+	unsigned long flags;
+	int id;
+	int status;
+
+	/* Allocate new pipe kernel object */
+	struct goldfish_pipe *pipe = kzalloc(sizeof(*pipe), GFP_KERNEL);
+	if (pipe == NULL)
+		return -ENOMEM;
+
+	pipe->dev = dev;
+	mutex_init(&pipe->lock);
+	init_waitqueue_head(&pipe->wake_queue);
+
+	/*
+	 * Command buffer needs to be allocated on its own page to make sure it is
+	 * physically contiguous in host's address space.
+	 */
+	pipe->command_buffer =
+			(struct goldfish_pipe_command*)__get_free_page(GFP_KERNEL);
+	if (!pipe->command_buffer) {
+		status = -ENOMEM;
+		goto err_pipe;
+	}
+
+	spin_lock_irqsave(&dev->lock, flags);
+
+	id = get_free_pipe_id_locked(dev);
+	if (id < 0) {
+		status = id;
+		goto err_id_locked;
+	}
+
+	dev->pipes[id] = pipe;
+	pipe->id = id;
+	pipe->command_buffer->id = id;
+
+	/* Now tell the emulator we're opening a new pipe. */
+	dev->buffers->open_command_params.rw_params_max_count =
+			MAX_BUFFERS_PER_COMMAND;
+	dev->buffers->open_command_params.command_buffer_ptr =
+			(u64)(unsigned long)__pa(pipe->command_buffer);
+	status = goldfish_cmd_locked(pipe, PIPE_CMD_OPEN);
+	spin_unlock_irqrestore(&dev->lock, flags);
+	if (status < 0)
+		goto err_cmd;
+	/* All is done, save the pipe into the file's private data field */
+	file->private_data = pipe;
+	return 0;
+
+err_cmd:
+	spin_lock_irqsave(&dev->lock, flags);
+	dev->pipes[id] = NULL;
+err_id_locked:
+	spin_unlock_irqrestore(&dev->lock, flags);
+	free_page((unsigned long)pipe->command_buffer);
+err_pipe:
+	kfree(pipe);
+	return status;
+}
+
+static int goldfish_pipe_release(struct inode *inode, struct file *filp)
+{
+	unsigned long flags;
+	struct goldfish_pipe *pipe = filp->private_data;
+	struct goldfish_pipe_dev *dev = pipe->dev;
+
+	/* The guest is closing the channel, so tell the emulator right now */
+	(void)goldfish_cmd(pipe, PIPE_CMD_CLOSE);
+
+	spin_lock_irqsave(&dev->lock, flags);
+	dev->pipes[pipe->id] = NULL;
+	signalled_pipes_remove_locked(dev, pipe);
+	spin_unlock_irqrestore(&dev->lock, flags);
+
+	filp->private_data = NULL;
+	free_page((unsigned long)pipe->command_buffer);
+	kfree(pipe);
+	return 0;
+}
+
+static const struct file_operations goldfish_pipe_fops = {
+	.owner = THIS_MODULE,
+	.read = goldfish_pipe_read,
+	.write = goldfish_pipe_write,
+	.poll = goldfish_pipe_poll,
+	.open = goldfish_pipe_open,
+	.release = goldfish_pipe_release,
+};
+
+static struct miscdevice goldfish_pipe_dev = {
+	.minor = MISC_DYNAMIC_MINOR,
+	.name = "goldfish_pipe",
+	.fops = &goldfish_pipe_fops,
+};
+
+static int goldfish_pipe_device_init_v2(struct platform_device *pdev)
+{
+	char *page;
+	struct goldfish_pipe_dev *dev = pipe_dev;
+	int err = devm_request_irq(&pdev->dev, dev->irq, goldfish_pipe_interrupt,
+				IRQF_SHARED, "goldfish_pipe", dev);
+	if (err) {
+		dev_err(&pdev->dev, "unable to allocate IRQ for v2\n");
+		return err;
+	}
+
+	err = misc_register(&goldfish_pipe_dev);
+	if (err) {
+		dev_err(&pdev->dev, "unable to register v2 device\n");
+		return err;
+	}
+
+	dev->first_signalled_pipe = NULL;
+	dev->pipes_capacity = INITIAL_PIPES_CAPACITY;
+	dev->pipes = kcalloc(dev->pipes_capacity, sizeof(*dev->pipes), GFP_KERNEL);
+	if (!dev->pipes)
+		return -ENOMEM;
+
+	/*
+	 * We're going to pass two buffers, open_command_params and
+	 * signalled_pipe_buffers, to the host. This means each of those buffers
+	 * needs to be contained in a single physical page. The easiest choice is
+	 * to just allocate a page and place the buffers in it.
+	 */
+	BUG_ON(sizeof(*dev->buffers) > PAGE_SIZE);
+	page = (char*)__get_free_page(GFP_KERNEL);
+	if (!page) {
+		kfree(dev->pipes);
+		return -ENOMEM;
+	}
+	dev->buffers = (struct goldfish_pipe_dev_buffers*)page;
+
+	/* Send the buffer addresses to the host */
+	{
+		u64 paddr = __pa(&dev->buffers->signalled_pipe_buffers);
+		writel((u32)(unsigned long)(paddr >> 32), dev->base + PIPE_REG_SIGNAL_BUFFER_HIGH);
+		writel((u32)(unsigned long)paddr, dev->base + PIPE_REG_SIGNAL_BUFFER);
+		writel((u32)MAX_SIGNALLED_PIPES, dev->base + PIPE_REG_SIGNAL_BUFFER_COUNT);
+
+		paddr = __pa(&dev->buffers->open_command_params);
+		writel((u32)(unsigned long)(paddr >> 32), dev->base + PIPE_REG_OPEN_BUFFER_HIGH);
+		writel((u32)(unsigned long)paddr, dev->base + PIPE_REG_OPEN_BUFFER);
+	}
+	return 0;
+}
+
+static void goldfish_pipe_device_deinit_v2(struct platform_device *pdev) {
+	struct goldfish_pipe_dev *dev = pipe_dev;
+	misc_deregister(&goldfish_pipe_dev);
+	kfree(dev->pipes);
+	free_page((unsigned long)dev->buffers);
+}
+
+static int goldfish_pipe_probe(struct platform_device *pdev)
+{
+	int err;
+	struct resource *r;
+	struct goldfish_pipe_dev *dev = pipe_dev;
+
+	BUG_ON(sizeof(struct goldfish_pipe_command) > PAGE_SIZE);
+
+	/* not thread safe, but this should not happen */
+	WARN_ON(dev->base != NULL);
+
+	spin_lock_init(&dev->lock);
+
+	r = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	if (r == NULL || resource_size(r) < PAGE_SIZE) {
+		dev_err(&pdev->dev, "can't allocate i/o page\n");
+		return -EINVAL;
+	}
+	dev->base = devm_ioremap(&pdev->dev, r->start, PAGE_SIZE);
+	if (dev->base == NULL) {
+		dev_err(&pdev->dev, "ioremap failed\n");
+		return -EINVAL;
+	}
+
+	r = platform_get_resource(pdev, IORESOURCE_IRQ, 0);
+	if (r == NULL) {
+		err = -EINVAL;
+		goto error;
+	}
+	dev->irq = r->start;
+
+	/*
+	 * Exchange the versions with the host device
+	 *
+	 * Note: v1 driver used to not report its version, so we write it before
+	 *  reading device version back: this allows the host implementation to
+	 *  detect the old driver (if there was no version write before read).
+	 */
+	writel((u32)PIPE_DRIVER_VERSION, dev->base + PIPE_REG_VERSION);
+	dev->version = readl(dev->base + PIPE_REG_VERSION);
+	if (dev->version < PIPE_CURRENT_DEVICE_VERSION) {
+		/* initialize the old device version */
+		err = goldfish_pipe_device_init_v1(pdev);
+	} else {
+		/* Host device supports the new interface */
+		err = goldfish_pipe_device_init_v2(pdev);
+	}
+	if (!err)
+		return 0;
+
+error:
+	dev->base = NULL;
+	return err;
+}
+
+static int goldfish_pipe_remove(struct platform_device *pdev)
+{
+	struct goldfish_pipe_dev *dev = pipe_dev;
+	if (dev->version < PIPE_CURRENT_DEVICE_VERSION)
+		goldfish_pipe_device_deinit_v1(pdev);
+	else
+		goldfish_pipe_device_deinit_v2(pdev);
+	dev->base = NULL;
+	return 0;
+}
+
+static const struct acpi_device_id goldfish_pipe_acpi_match[] = {
+	{ "GFSH0003", 0 },
+	{ },
+};
+MODULE_DEVICE_TABLE(acpi, goldfish_pipe_acpi_match);
+
+static const struct of_device_id goldfish_pipe_of_match[] = {
+	{ .compatible = "google,android-pipe", },
+	{},
+};
+MODULE_DEVICE_TABLE(of, goldfish_pipe_of_match);
+
+static struct platform_driver goldfish_pipe_driver = {
+	.probe = goldfish_pipe_probe,
+	.remove = goldfish_pipe_remove,
+	.driver = {
+		.name = "goldfish_pipe",
+		.of_match_table = goldfish_pipe_of_match,
+		.acpi_match_table = ACPI_PTR(goldfish_pipe_acpi_match),
+	}
+};
+
+module_platform_driver(goldfish_pipe_driver);
+MODULE_AUTHOR("David Turner <digit@google.com>");
+MODULE_LICENSE("GPL");
diff --git a/drivers/power/goldfish_battery.c b/drivers/power/goldfish_battery.c
index a50bb988c69a..f5c525e4482a 100644
--- a/drivers/power/goldfish_battery.c
+++ b/drivers/power/goldfish_battery.c
@@ -24,6 +24,7 @@
 #include <linux/pci.h>
 #include <linux/interrupt.h>
 #include <linux/io.h>
+#include <linux/acpi.h>
 
 struct goldfish_battery_data {
 	void __iomem *reg_base;
@@ -227,11 +228,25 @@ static int goldfish_battery_remove(struct platform_device *pdev)
 	return 0;
 }
 
+static const struct of_device_id goldfish_battery_of_match[] = {
+	{ .compatible = "google,goldfish-battery", },
+	{},
+};
+MODULE_DEVICE_TABLE(of, goldfish_battery_of_match);
+
+static const struct acpi_device_id goldfish_battery_acpi_match[] = {
+	{ "GFSH0001", 0 },
+	{ },
+};
+MODULE_DEVICE_TABLE(acpi, goldfish_battery_acpi_match);
+
 static struct platform_driver goldfish_battery_device = {
 	.probe		= goldfish_battery_probe,
 	.remove		= goldfish_battery_remove,
 	.driver = {
-		.name = "goldfish-battery"
+		.name = "goldfish-battery",
+		.of_match_table = goldfish_battery_of_match,
+		.acpi_match_table = ACPI_PTR(goldfish_battery_acpi_match),
 	}
 };
 module_platform_driver(goldfish_battery_device);
diff --git a/drivers/scsi/mpt3sas/mpt3sas_scsih.c b/drivers/scsi/mpt3sas/mpt3sas_scsih.c
index 2d867c5bfd9f..8cead04f26d6 100644
--- a/drivers/scsi/mpt3sas/mpt3sas_scsih.c
+++ b/drivers/scsi/mpt3sas/mpt3sas_scsih.c
@@ -3706,6 +3706,11 @@ _scsih_temp_threshold_events(struct MPT3SAS_ADAPTER *ioc,
 	}
 }
 
+static inline bool ata_12_16_cmd(struct scsi_cmnd *scmd)
+{
+	return (scmd->cmnd[0] == ATA_12 || scmd->cmnd[0] == ATA_16);
+}
+
 /**
  * _scsih_flush_running_cmds - completing outstanding commands.
  * @ioc: per adapter object
@@ -3727,6 +3732,9 @@ _scsih_flush_running_cmds(struct MPT3SAS_ADAPTER *ioc)
 		if (!scmd)
 			continue;
 		count++;
+		if (ata_12_16_cmd(scmd))
+			scsi_internal_device_unblock(scmd->device,
+							SDEV_RUNNING);
 		mpt3sas_base_free_smid(ioc, smid);
 		scsi_dma_unmap(scmd);
 		if (ioc->pci_error_recovery)
@@ -3831,8 +3839,6 @@ _scsih_eedp_error_handling(struct scsi_cmnd *scmd, u16 ioc_status)
 	    SAM_STAT_CHECK_CONDITION;
 }
 
-
-
 /**
  * scsih_qcmd - main scsi request entry point
  * @scmd: pointer to scsi command object
@@ -3859,6 +3865,13 @@ scsih_qcmd(struct Scsi_Host *shost, struct scsi_cmnd *scmd)
 	if (ioc->logging_level & MPT_DEBUG_SCSI)
 		scsi_print_command(scmd);
 
+	/*
+	 * Lock the device for any subsequent command until command is
+	 * done.
+	 */
+	if (ata_12_16_cmd(scmd))
+		scsi_internal_device_block(scmd->device);
+
 	sas_device_priv_data = scmd->device->hostdata;
 	if (!sas_device_priv_data || !sas_device_priv_data->sas_target) {
 		scmd->result = DID_NO_CONNECT << 16;
@@ -4431,6 +4444,9 @@ _scsih_io_done(struct MPT3SAS_ADAPTER *ioc, u16 smid, u8 msix_index, u32 reply)
 	if (scmd == NULL)
 		return 1;
 
+	if (ata_12_16_cmd(scmd))
+		scsi_internal_device_unblock(scmd->device, SDEV_RUNNING);
+
 	mpi_request = mpt3sas_base_get_msg_frame(ioc, smid);
 
 	if (mpi_reply == NULL) {
diff --git a/drivers/staging/goldfish/Kconfig b/drivers/staging/goldfish/Kconfig
index 4e094602437c..c579141a7bed 100644
--- a/drivers/staging/goldfish/Kconfig
+++ b/drivers/staging/goldfish/Kconfig
@@ -4,6 +4,12 @@ config GOLDFISH_AUDIO
 	---help---
 	  Emulated audio channel for the Goldfish Android Virtual Device
 
+config GOLDFISH_SYNC
+    tristate "Goldfish AVD Sync Driver"
+    depends on GOLDFISH
+	---help---
+	  Emulated sync fences for the Goldfish Android Virtual Device
+
 config MTD_GOLDFISH_NAND
 	tristate "Goldfish NAND device"
 	depends on GOLDFISH
diff --git a/drivers/staging/goldfish/Makefile b/drivers/staging/goldfish/Makefile
index dec34ad58162..0cf525588210 100644
--- a/drivers/staging/goldfish/Makefile
+++ b/drivers/staging/goldfish/Makefile
@@ -4,3 +4,8 @@
 
 obj-$(CONFIG_GOLDFISH_AUDIO) += goldfish_audio.o
 obj-$(CONFIG_MTD_GOLDFISH_NAND)	+= goldfish_nand.o
+
+# and sync
+
+ccflags-y := -Idrivers/staging/android
+obj-$(CONFIG_GOLDFISH_SYNC) += goldfish_sync.o
diff --git a/drivers/staging/goldfish/goldfish_audio.c b/drivers/staging/goldfish/goldfish_audio.c
index b0927e49d0a8..63b79c09b41b 100644
--- a/drivers/staging/goldfish/goldfish_audio.c
+++ b/drivers/staging/goldfish/goldfish_audio.c
@@ -26,7 +26,9 @@
 #include <linux/sched.h>
 #include <linux/dma-mapping.h>
 #include <linux/uaccess.h>
+#include <linux/slab.h>
 #include <linux/goldfish.h>
+#include <linux/acpi.h>
 
 MODULE_AUTHOR("Google, Inc.");
 MODULE_DESCRIPTION("Android QEMU Audio Driver");
@@ -115,6 +117,7 @@ static ssize_t goldfish_audio_read(struct file *fp, char __user *buf,
 				   size_t count, loff_t *pos)
 {
 	struct goldfish_audio *data = fp->private_data;
+	unsigned long irq_flags;
 	int length;
 	int result = 0;
 
@@ -128,6 +131,10 @@ static ssize_t goldfish_audio_read(struct file *fp, char __user *buf,
 		wait_event_interruptible(data->wait, data->buffer_status &
 					 AUDIO_INT_READ_BUFFER_FULL);
 
+		spin_lock_irqsave(&data->lock, irq_flags);
+		data->buffer_status &= ~AUDIO_INT_READ_BUFFER_FULL;
+		spin_unlock_irqrestore(&data->lock, irq_flags);
+
 		length = AUDIO_READ(data, AUDIO_READ_BUFFER_AVAILABLE);
 
 		/* copy data to user space */
@@ -344,11 +351,25 @@ static int goldfish_audio_remove(struct platform_device *pdev)
 	return 0;
 }
 
+static const struct of_device_id goldfish_audio_of_match[] = {
+	{ .compatible = "google,goldfish-audio", },
+	{},
+};
+MODULE_DEVICE_TABLE(of, goldfish_audio_of_match);
+
+static const struct acpi_device_id goldfish_audio_acpi_match[] = {
+	{ "GFSH0005", 0 },
+	{ },
+};
+MODULE_DEVICE_TABLE(acpi, goldfish_audio_acpi_match);
+
 static struct platform_driver goldfish_audio_driver = {
 	.probe		= goldfish_audio_probe,
 	.remove		= goldfish_audio_remove,
 	.driver = {
-		.name = "goldfish_audio"
+		.name = "goldfish_audio",
+		.of_match_table = goldfish_audio_of_match,
+		.acpi_match_table = ACPI_PTR(goldfish_audio_acpi_match),
 	}
 };
 
diff --git a/drivers/staging/goldfish/goldfish_sync.c b/drivers/staging/goldfish/goldfish_sync.c
new file mode 100644
index 000000000000..ba8def29901e
--- /dev/null
+++ b/drivers/staging/goldfish/goldfish_sync.c
@@ -0,0 +1,987 @@
+/*
+ * Copyright (C) 2016 Google, Inc.
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#include <linux/fdtable.h>
+#include <linux/file.h>
+#include <linux/init.h>
+#include <linux/miscdevice.h>
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/platform_device.h>
+
+#include <linux/interrupt.h>
+#include <linux/kref.h>
+#include <linux/spinlock.h>
+#include <linux/types.h>
+
+#include <linux/io.h>
+#include <linux/mm.h>
+#include <linux/acpi.h>
+
+#include <linux/string.h>
+#include <linux/syscalls.h>
+
+#include "sw_sync.h"
+#include "sync.h"
+
+#define ERR(...) printk(KERN_ERR __VA_ARGS__);
+
+#define INFO(...) printk(KERN_INFO __VA_ARGS__);
+
+#define DPRINT(...) pr_debug(__VA_ARGS__);
+
+#define DTRACE() DPRINT("%s: enter", __func__)
+
+/* The Goldfish sync driver is designed to provide a interface
+ * between the underlying host's sync device and the kernel's
+ * sw_sync.
+ * The purpose of the device/driver is to enable lightweight
+ * creation and signaling of timelines and fences
+ * in order to synchronize the guest with host-side graphics events.
+ *
+ * Each time the interrupt trips, the driver
+ * may perform a sw_sync operation.
+ */
+
+/* The operations are: */
+
+/* Ready signal - used to mark when irq should lower */
+#define CMD_SYNC_READY            0
+
+/* Create a new timeline. writes timeline handle */
+#define CMD_CREATE_SYNC_TIMELINE  1
+
+/* Create a fence object. reads timeline handle and time argument.
+ * Writes fence fd to the SYNC_REG_HANDLE register. */
+#define CMD_CREATE_SYNC_FENCE     2
+
+/* Increments timeline. reads timeline handle and time argument */
+#define CMD_SYNC_TIMELINE_INC     3
+
+/* Destroys a timeline. reads timeline handle */
+#define CMD_DESTROY_SYNC_TIMELINE 4
+
+/* Starts a wait on the host with
+ * the given glsync object and sync thread handle. */
+#define CMD_TRIGGER_HOST_WAIT     5
+
+/* The register layout is: */
+
+#define SYNC_REG_BATCH_COMMAND                0x00 /* host->guest batch commands */
+#define SYNC_REG_BATCH_GUESTCOMMAND           0x04 /* guest->host batch commands */
+#define SYNC_REG_BATCH_COMMAND_ADDR           0x08 /* communicate physical address of host->guest batch commands */
+#define SYNC_REG_BATCH_COMMAND_ADDR_HIGH      0x0c /* 64-bit part */
+#define SYNC_REG_BATCH_GUESTCOMMAND_ADDR      0x10 /* communicate physical address of guest->host commands */
+#define SYNC_REG_BATCH_GUESTCOMMAND_ADDR_HIGH 0x14 /* 64-bit part */
+#define SYNC_REG_INIT                         0x18 /* signals that the device has been probed */
+
+/* There is an ioctl associated with goldfish sync driver.
+ * Make it conflict with ioctls that are not likely to be used
+ * in the emulator.
+ *
+ * '@'	00-0F	linux/radeonfb.h	conflict!
+ * '@'	00-0F	drivers/video/aty/aty128fb.c	conflict!
+ */
+#define GOLDFISH_SYNC_IOC_MAGIC	'@'
+
+#define GOLDFISH_SYNC_IOC_QUEUE_WORK	_IOWR(GOLDFISH_SYNC_IOC_MAGIC, 0, struct goldfish_sync_ioctl_info)
+
+/* The above definitions (command codes, register layout, ioctl definitions)
+ * need to be in sync with the following files:
+ *
+ * Host-side (emulator):
+ * external/qemu/android/emulation/goldfish_sync.h
+ * external/qemu-android/hw/misc/goldfish_sync.c
+ *
+ * Guest-side (system image):
+ * device/generic/goldfish-opengl/system/egl/goldfish_sync.h
+ * device/generic/goldfish/ueventd.ranchu.rc
+ * platform/build/target/board/generic/sepolicy/file_contexts
+ */
+struct goldfish_sync_hostcmd {
+	/* sorted for alignment */
+	uint64_t handle;
+	uint64_t hostcmd_handle;
+	uint32_t cmd;
+	uint32_t time_arg;
+};
+
+struct goldfish_sync_guestcmd {
+	uint64_t host_command; /* uint64_t for alignment */
+	uint64_t glsync_handle;
+	uint64_t thread_handle;
+	uint64_t guest_timeline_handle;
+};
+
+#define GOLDFISH_SYNC_MAX_CMDS 64
+
+struct goldfish_sync_state {
+	char __iomem *reg_base;
+	int irq;
+
+	/* Spinlock protects |to_do| / |to_do_end|. */
+	spinlock_t lock;
+	/* |mutex_lock| protects all concurrent access
+	 * to timelines for both kernel and user space. */
+	struct mutex mutex_lock;
+
+	/* Buffer holding commands issued from host. */
+	struct goldfish_sync_hostcmd to_do[GOLDFISH_SYNC_MAX_CMDS];
+	uint32_t to_do_end;
+
+	/* Addresses for the reading or writing
+	 * of individual commands. The host can directly write
+	 * to |batch_hostcmd| (and then this driver immediately
+	 * copies contents to |to_do|). This driver either replies
+	 * through |batch_hostcmd| or simply issues a
+	 * guest->host command through |batch_guestcmd|.
+	 */
+	struct goldfish_sync_hostcmd *batch_hostcmd;
+	struct goldfish_sync_guestcmd *batch_guestcmd;
+
+	/* Used to give this struct itself to a work queue
+	 * function for executing actual sync commands. */
+	struct work_struct work_item;
+};
+
+static struct goldfish_sync_state global_sync_state[1];
+
+struct goldfish_sync_timeline_obj {
+	struct sw_sync_timeline *sw_sync_tl;
+	uint32_t current_time;
+	/* We need to be careful about when we deallocate
+	 * this |goldfish_sync_timeline_obj| struct.
+	 * In order to ensure proper cleanup, we need to
+	 * consider the triggered host-side wait that may
+	 * still be in flight when the guest close()'s a
+	 * goldfish_sync device's sync context fd (and
+	 * destroys the |sw_sync_tl| field above).
+	 * The host-side wait may raise IRQ
+	 * and tell the kernel to increment the timeline _after_
+	 * the |sw_sync_tl| has already been set to null.
+	 *
+	 * From observations on OpenGL apps and CTS tests, this
+	 * happens at some very low probability upon context
+	 * destruction or process close, but it does happen
+	 * and it needs to be handled properly. Otherwise,
+	 * if we clean up the surrounding |goldfish_sync_timeline_obj|
+	 * too early, any |handle| field of any host->guest command
+	 * might not even point to a null |sw_sync_tl| field,
+	 * but to garbage memory or even a reclaimed |sw_sync_tl|.
+	 * If we do not count such "pending waits" and kfree the object
+	 * immediately upon |goldfish_sync_timeline_destroy|,
+	 * we might get mysterous RCU stalls after running a long
+	 * time because the garbage memory that is being read
+	 * happens to be interpretable as a |spinlock_t| struct
+	 * that is currently in the locked state.
+	 *
+	 * To track when to free the |goldfish_sync_timeline_obj|
+	 * itself, we maintain a kref.
+	 * The kref essentially counts the timeline itself plus
+	 * the number of waits in flight. kref_init/kref_put
+	 * are issued on
+	 * |goldfish_sync_timeline_create|/|goldfish_sync_timeline_destroy|
+	 * and kref_get/kref_put are issued on
+	 * |goldfish_sync_fence_create|/|goldfish_sync_timeline_inc|.
+	 *
+	 * The timeline is destroyed after reference count
+	 * reaches zero, which would happen after
+	 * |goldfish_sync_timeline_destroy| and all pending
+	 * |goldfish_sync_timeline_inc|'s are fulfilled.
+	 *
+	 * NOTE (1): We assume that |fence_create| and
+	 * |timeline_inc| calls are 1:1, otherwise the kref scheme
+	 * will not work. This is a valid assumption as long
+	 * as the host-side virtual device implementation
+	 * does not insert any timeline increments
+	 * that we did not trigger from here.
+	 *
+	 * NOTE (2): The use of kref by itself requires no locks,
+	 * but this does not mean everything works without locks.
+	 * Related timeline operations do require a lock of some sort,
+	 * or at least are not proven to work without it.
+	 * In particualr, we assume that all the operations
+	 * done on the |kref| field above are done in contexts where
+	 * |global_sync_state->mutex_lock| is held. Do not
+	 * remove that lock until everything is proven to work
+	 * without it!!! */
+	struct kref kref;
+};
+
+/* We will call |delete_timeline_obj| when the last reference count
+ * of the kref is decremented. This deletes the sw_sync
+ * timeline object along with the wrapper itself. */
+static void delete_timeline_obj(struct kref* kref) {
+	struct goldfish_sync_timeline_obj* obj =
+		container_of(kref, struct goldfish_sync_timeline_obj, kref);
+
+	sync_timeline_destroy(&obj->sw_sync_tl->obj);
+	obj->sw_sync_tl = NULL;
+	kfree(obj);
+}
+
+static uint64_t gensym_ctr;
+static void gensym(char *dst)
+{
+	sprintf(dst, "goldfish_sync:gensym:%llu", gensym_ctr);
+	gensym_ctr++;
+}
+
+/* |goldfish_sync_timeline_create| assumes that |global_sync_state->mutex_lock|
+ * is held. */
+static struct goldfish_sync_timeline_obj*
+goldfish_sync_timeline_create(void)
+{
+
+	char timeline_name[256];
+	struct sw_sync_timeline *res_sync_tl = NULL;
+	struct goldfish_sync_timeline_obj *res;
+
+	DTRACE();
+
+	gensym(timeline_name);
+
+	res_sync_tl = sw_sync_timeline_create(timeline_name);
+	if (!res_sync_tl) {
+		ERR("Failed to create sw_sync timeline.");
+		return NULL;
+	}
+
+	res = kzalloc(sizeof(struct goldfish_sync_timeline_obj), GFP_KERNEL);
+	res->sw_sync_tl = res_sync_tl;
+	res->current_time = 0;
+	kref_init(&res->kref);
+
+	DPRINT("new timeline_obj=0x%p", res);
+	return res;
+}
+
+/* |goldfish_sync_fence_create| assumes that |global_sync_state->mutex_lock|
+ * is held. */
+static int
+goldfish_sync_fence_create(struct goldfish_sync_timeline_obj *obj,
+							uint32_t val)
+{
+
+	int fd;
+	char fence_name[256];
+	struct sync_pt *syncpt = NULL;
+	struct sync_fence *sync_obj = NULL;
+	struct sw_sync_timeline *tl;
+
+	DTRACE();
+
+	if (!obj) return -1;
+
+	tl = obj->sw_sync_tl;
+
+	syncpt = sw_sync_pt_create(tl, val);
+	if (!syncpt) {
+		ERR("could not create sync point! "
+			"sync_timeline=0x%p val=%d",
+			   tl, val);
+		return -1;
+	}
+
+	fd = get_unused_fd_flags(O_CLOEXEC);
+	if (fd < 0) {
+		ERR("could not get unused fd for sync fence. "
+			"errno=%d", fd);
+		goto err_cleanup_pt;
+	}
+
+	gensym(fence_name);
+
+	sync_obj = sync_fence_create(fence_name, syncpt);
+	if (!sync_obj) {
+		ERR("could not create sync fence! "
+			"sync_timeline=0x%p val=%d sync_pt=0x%p",
+			   tl, val, syncpt);
+		goto err_cleanup_fd_pt;
+	}
+
+	DPRINT("installing sync fence into fd %d sync_obj=0x%p", fd, sync_obj);
+	sync_fence_install(sync_obj, fd);
+	kref_get(&obj->kref);
+
+	return fd;
+
+err_cleanup_fd_pt:
+	put_unused_fd(fd);
+err_cleanup_pt:
+	sync_pt_free(syncpt);
+	return -1;
+}
+
+/* |goldfish_sync_timeline_inc| assumes that |global_sync_state->mutex_lock|
+ * is held. */
+static void
+goldfish_sync_timeline_inc(struct goldfish_sync_timeline_obj *obj, uint32_t inc)
+{
+	DTRACE();
+	/* Just give up if someone else nuked the timeline.
+	 * Whoever it was won't care that it doesn't get signaled. */
+	if (!obj) return;
+
+	DPRINT("timeline_obj=0x%p", obj);
+	sw_sync_timeline_inc(obj->sw_sync_tl, inc);
+	DPRINT("incremented timeline. increment max_time");
+	obj->current_time += inc;
+
+	/* Here, we will end up deleting the timeline object if it
+	 * turns out that this call was a pending increment after
+	 * |goldfish_sync_timeline_destroy| was called. */
+	kref_put(&obj->kref, delete_timeline_obj);
+	DPRINT("done");
+}
+
+/* |goldfish_sync_timeline_destroy| assumes
+ * that |global_sync_state->mutex_lock| is held. */
+static void
+goldfish_sync_timeline_destroy(struct goldfish_sync_timeline_obj *obj)
+{
+	DTRACE();
+	/* See description of |goldfish_sync_timeline_obj| for why we
+	 * should not immediately destroy |obj| */
+	kref_put(&obj->kref, delete_timeline_obj);
+}
+
+static inline void
+goldfish_sync_cmd_queue(struct goldfish_sync_state *sync_state,
+						uint32_t cmd,
+						uint64_t handle,
+						uint32_t time_arg,
+						uint64_t hostcmd_handle)
+{
+	struct goldfish_sync_hostcmd *to_add;
+
+	DTRACE();
+
+	BUG_ON(sync_state->to_do_end == GOLDFISH_SYNC_MAX_CMDS);
+
+	to_add = &sync_state->to_do[sync_state->to_do_end];
+
+	to_add->cmd = cmd;
+	to_add->handle = handle;
+	to_add->time_arg = time_arg;
+	to_add->hostcmd_handle = hostcmd_handle;
+
+	sync_state->to_do_end += 1;
+}
+
+static inline void
+goldfish_sync_hostcmd_reply(struct goldfish_sync_state *sync_state,
+							uint32_t cmd,
+							uint64_t handle,
+							uint32_t time_arg,
+							uint64_t hostcmd_handle)
+{
+	unsigned long irq_flags;
+	struct goldfish_sync_hostcmd *batch_hostcmd =
+		sync_state->batch_hostcmd;
+
+	DTRACE();
+
+	spin_lock_irqsave(&sync_state->lock, irq_flags);
+
+	batch_hostcmd->cmd = cmd;
+	batch_hostcmd->handle = handle;
+	batch_hostcmd->time_arg = time_arg;
+	batch_hostcmd->hostcmd_handle = hostcmd_handle;
+	writel(0, sync_state->reg_base + SYNC_REG_BATCH_COMMAND);
+
+	spin_unlock_irqrestore(&sync_state->lock, irq_flags);
+}
+
+static inline void
+goldfish_sync_send_guestcmd(struct goldfish_sync_state *sync_state,
+							uint32_t cmd,
+							uint64_t glsync_handle,
+							uint64_t thread_handle,
+							uint64_t timeline_handle)
+{
+	unsigned long irq_flags;
+	struct goldfish_sync_guestcmd *batch_guestcmd =
+		sync_state->batch_guestcmd;
+
+	DTRACE();
+
+	spin_lock_irqsave(&sync_state->lock, irq_flags);
+
+	batch_guestcmd->host_command = (uint64_t)cmd;
+	batch_guestcmd->glsync_handle = (uint64_t)glsync_handle;
+	batch_guestcmd->thread_handle = (uint64_t)thread_handle;
+	batch_guestcmd->guest_timeline_handle = (uint64_t)timeline_handle;
+	writel(0, sync_state->reg_base + SYNC_REG_BATCH_GUESTCOMMAND);
+
+	spin_unlock_irqrestore(&sync_state->lock, irq_flags);
+}
+
+/* |goldfish_sync_interrupt| handles IRQ raises from the virtual device.
+ * In the context of OpenGL, this interrupt will fire whenever we need
+ * to signal a fence fd in the guest, with the command
+ * |CMD_SYNC_TIMELINE_INC|.
+ * However, because this function will be called in an interrupt context,
+ * it is necessary to do the actual work of signaling off of interrupt context.
+ * The shared work queue is used for this purpose. At the end when
+ * all pending commands are intercepted by the interrupt handler,
+ * we call |schedule_work|, which will later run the actual
+ * desired sync command in |goldfish_sync_work_item_fn|.
+ */
+static irqreturn_t goldfish_sync_interrupt(int irq, void *dev_id)
+{
+
+	struct goldfish_sync_state *sync_state = dev_id;
+
+	uint32_t nextcmd;
+	uint32_t command_r;
+	uint64_t handle_rw;
+	uint32_t time_r;
+	uint64_t hostcmd_handle_rw;
+
+	int count = 0;
+
+	DTRACE();
+
+	sync_state = dev_id;
+
+	spin_lock(&sync_state->lock);
+
+	for (;;) {
+
+		readl(sync_state->reg_base + SYNC_REG_BATCH_COMMAND);
+		nextcmd = sync_state->batch_hostcmd->cmd;
+
+		if (nextcmd == 0)
+			break;
+
+		command_r = nextcmd;
+		handle_rw = sync_state->batch_hostcmd->handle;
+		time_r = sync_state->batch_hostcmd->time_arg;
+		hostcmd_handle_rw = sync_state->batch_hostcmd->hostcmd_handle;
+
+		goldfish_sync_cmd_queue(
+				sync_state,
+				command_r,
+				handle_rw,
+				time_r,
+				hostcmd_handle_rw);
+
+		count++;
+	}
+
+	spin_unlock(&sync_state->lock);
+
+	schedule_work(&sync_state->work_item);
+
+	return (count == 0) ? IRQ_NONE : IRQ_HANDLED;
+}
+
+/* |goldfish_sync_work_item_fn| does the actual work of servicing
+ * host->guest sync commands. This function is triggered whenever
+ * the IRQ for the goldfish sync device is raised. Once it starts
+ * running, it grabs the contents of the buffer containing the
+ * commands it needs to execute (there may be multiple, because
+ * our IRQ is active high and not edge triggered), and then
+ * runs all of them one after the other.
+ */
+static void goldfish_sync_work_item_fn(struct work_struct *input)
+{
+
+	struct goldfish_sync_state *sync_state;
+	int sync_fence_fd;
+
+	struct goldfish_sync_timeline_obj *timeline;
+	uint64_t timeline_ptr;
+
+	uint64_t hostcmd_handle;
+
+	uint32_t cmd;
+	uint64_t handle;
+	uint32_t time_arg;
+
+	struct goldfish_sync_hostcmd *todo;
+	uint32_t todo_end;
+
+	unsigned long irq_flags;
+
+	struct goldfish_sync_hostcmd to_run[GOLDFISH_SYNC_MAX_CMDS];
+	uint32_t i = 0;
+
+	sync_state = container_of(input, struct goldfish_sync_state, work_item);
+
+	mutex_lock(&sync_state->mutex_lock);
+
+	spin_lock_irqsave(&sync_state->lock, irq_flags); {
+
+		todo_end = sync_state->to_do_end;
+
+		DPRINT("num sync todos: %u", sync_state->to_do_end);
+
+		for (i = 0; i < todo_end; i++)
+			to_run[i] = sync_state->to_do[i];
+
+		/* We expect that commands will come in at a slow enough rate
+		 * so that incoming items will not be more than
+		 * GOLDFISH_SYNC_MAX_CMDS.
+		 *
+		 * This is because the way the sync device is used,
+		 * it's only for managing buffer data transfers per frame,
+		 * with a sequential dependency between putting things in
+		 * to_do and taking them out. Once a set of commands is
+		 * queued up in to_do, the user of the device waits for
+		 * them to be processed before queuing additional commands,
+		 * which limits the rate at which commands come in
+		 * to the rate at which we take them out here.
+		 *
+		 * We also don't expect more than MAX_CMDS to be issued
+		 * at once; there is a correspondence between
+		 * which buffers need swapping to the (display / buffer queue)
+		 * to particular commands, and we don't expect there to be
+		 * enough display or buffer queues in operation at once
+		 * to overrun GOLDFISH_SYNC_MAX_CMDS.
+		 */
+		sync_state->to_do_end = 0;
+
+	} spin_unlock_irqrestore(&sync_state->lock, irq_flags);
+
+	for (i = 0; i < todo_end; i++) {
+		DPRINT("todo index: %u", i);
+
+		todo = &to_run[i];
+
+		cmd = todo->cmd;
+
+		handle = (uint64_t)todo->handle;
+		time_arg = todo->time_arg;
+		hostcmd_handle = (uint64_t)todo->hostcmd_handle;
+
+		DTRACE();
+
+		timeline = (struct goldfish_sync_timeline_obj *)(uintptr_t)handle;
+
+		switch (cmd) {
+		case CMD_SYNC_READY:
+			break;
+		case CMD_CREATE_SYNC_TIMELINE:
+			DPRINT("exec CMD_CREATE_SYNC_TIMELINE: "
+					"handle=0x%llx time_arg=%d",
+					handle, time_arg);
+			timeline = goldfish_sync_timeline_create();
+			timeline_ptr = (uintptr_t)timeline;
+			goldfish_sync_hostcmd_reply(sync_state, CMD_CREATE_SYNC_TIMELINE,
+										timeline_ptr,
+										0,
+										hostcmd_handle);
+			DPRINT("sync timeline created: %p", timeline);
+			break;
+		case CMD_CREATE_SYNC_FENCE:
+			DPRINT("exec CMD_CREATE_SYNC_FENCE: "
+					"handle=0x%llx time_arg=%d",
+					handle, time_arg);
+			sync_fence_fd = goldfish_sync_fence_create(timeline, time_arg);
+			goldfish_sync_hostcmd_reply(sync_state, CMD_CREATE_SYNC_FENCE,
+										sync_fence_fd,
+										0,
+										hostcmd_handle);
+			break;
+		case CMD_SYNC_TIMELINE_INC:
+			DPRINT("exec CMD_SYNC_TIMELINE_INC: "
+					"handle=0x%llx time_arg=%d",
+					handle, time_arg);
+			goldfish_sync_timeline_inc(timeline, time_arg);
+			break;
+		case CMD_DESTROY_SYNC_TIMELINE:
+			DPRINT("exec CMD_DESTROY_SYNC_TIMELINE: "
+					"handle=0x%llx time_arg=%d",
+					handle, time_arg);
+			goldfish_sync_timeline_destroy(timeline);
+			break;
+		}
+		DPRINT("Done executing sync command");
+	}
+	mutex_unlock(&sync_state->mutex_lock);
+}
+
+/* Guest-side interface: file operations */
+
+/* Goldfish sync context and ioctl info.
+ *
+ * When a sync context is created by open()-ing the goldfish sync device, we
+ * create a sync context (|goldfish_sync_context|).
+ *
+ * Currently, the only data required to track is the sync timeline itself
+ * along with the current time, which are all packed up in the
+ * |goldfish_sync_timeline_obj| field. We use a |goldfish_sync_context|
+ * as the filp->private_data.
+ *
+ * Next, when a sync context user requests that work be queued and a fence
+ * fd provided, we use the |goldfish_sync_ioctl_info| struct, which holds
+ * information about which host handles to touch for this particular
+ * queue-work operation. We need to know about the host-side sync thread
+ * and the particular host-side GLsync object. We also possibly write out
+ * a file descriptor.
+ */
+struct goldfish_sync_context {
+	struct goldfish_sync_timeline_obj *timeline;
+};
+
+struct goldfish_sync_ioctl_info {
+	uint64_t host_glsync_handle_in;
+	uint64_t host_syncthread_handle_in;
+	int fence_fd_out;
+};
+
+static int goldfish_sync_open(struct inode *inode, struct file *file)
+{
+
+	struct goldfish_sync_context *sync_context;
+
+	DTRACE();
+
+	mutex_lock(&global_sync_state->mutex_lock);
+
+	sync_context = kzalloc(sizeof(struct goldfish_sync_context), GFP_KERNEL);
+
+	if (sync_context == NULL) {
+		ERR("Creation of goldfish sync context failed!");
+		mutex_unlock(&global_sync_state->mutex_lock);
+		return -ENOMEM;
+	}
+
+	sync_context->timeline = NULL;
+
+	file->private_data = sync_context;
+
+	DPRINT("successfully create a sync context @0x%p", sync_context);
+
+	mutex_unlock(&global_sync_state->mutex_lock);
+
+	return 0;
+}
+
+static int goldfish_sync_release(struct inode *inode, struct file *file)
+{
+
+	struct goldfish_sync_context *sync_context;
+
+	DTRACE();
+
+	mutex_lock(&global_sync_state->mutex_lock);
+
+	sync_context = file->private_data;
+
+	if (sync_context->timeline)
+		goldfish_sync_timeline_destroy(sync_context->timeline);
+
+	sync_context->timeline = NULL;
+
+	kfree(sync_context);
+
+	mutex_unlock(&global_sync_state->mutex_lock);
+
+	return 0;
+}
+
+/* |goldfish_sync_ioctl| is the guest-facing interface of goldfish sync
+ * and is used in conjunction with eglCreateSyncKHR to queue up the
+ * actual work of waiting for the EGL sync command to complete,
+ * possibly returning a fence fd to the guest.
+ */
+static long goldfish_sync_ioctl(struct file *file,
+								unsigned int cmd,
+								unsigned long arg)
+{
+	struct goldfish_sync_context *sync_context_data;
+	struct goldfish_sync_timeline_obj *timeline;
+	int fd_out;
+	struct goldfish_sync_ioctl_info ioctl_data;
+
+	DTRACE();
+
+	sync_context_data = file->private_data;
+	fd_out = -1;
+
+	switch (cmd) {
+	case GOLDFISH_SYNC_IOC_QUEUE_WORK:
+
+		DPRINT("exec GOLDFISH_SYNC_IOC_QUEUE_WORK");
+
+		mutex_lock(&global_sync_state->mutex_lock);
+
+		if (copy_from_user(&ioctl_data,
+						(void __user *)arg,
+						sizeof(ioctl_data))) {
+			ERR("Failed to copy memory for ioctl_data from user.");
+			mutex_unlock(&global_sync_state->mutex_lock);
+			return -EFAULT;
+		}
+
+		if (ioctl_data.host_syncthread_handle_in == 0) {
+			DPRINT("Error: zero host syncthread handle!!!");
+			mutex_unlock(&global_sync_state->mutex_lock);
+			return -EFAULT;
+		}
+
+		if (!sync_context_data->timeline) {
+			DPRINT("no timeline yet, create one.");
+			sync_context_data->timeline = goldfish_sync_timeline_create();
+			DPRINT("timeline: 0x%p", &sync_context_data->timeline);
+		}
+
+		timeline = sync_context_data->timeline;
+		fd_out = goldfish_sync_fence_create(timeline,
+											timeline->current_time + 1);
+		DPRINT("Created fence with fd %d and current time %u (timeline: 0x%p)",
+			   fd_out,
+			   sync_context_data->timeline->current_time + 1,
+			   sync_context_data->timeline);
+
+		ioctl_data.fence_fd_out = fd_out;
+
+		if (copy_to_user((void __user *)arg,
+						&ioctl_data,
+						sizeof(ioctl_data))) {
+			DPRINT("Error, could not copy to user!!!");
+
+			sys_close(fd_out);
+			/* We won't be doing an increment, kref_put immediately. */
+			kref_put(&timeline->kref, delete_timeline_obj);
+			mutex_unlock(&global_sync_state->mutex_lock);
+			return -EFAULT;
+		}
+
+		/* We are now about to trigger a host-side wait;
+		 * accumulate on |pending_waits|. */
+		goldfish_sync_send_guestcmd(global_sync_state,
+				CMD_TRIGGER_HOST_WAIT,
+				ioctl_data.host_glsync_handle_in,
+				ioctl_data.host_syncthread_handle_in,
+				(uint64_t)(uintptr_t)(sync_context_data->timeline));
+
+		mutex_unlock(&global_sync_state->mutex_lock);
+		return 0;
+	default:
+		return -ENOTTY;
+	}
+}
+
+static const struct file_operations goldfish_sync_fops = {
+	.owner = THIS_MODULE,
+	.open = goldfish_sync_open,
+	.release = goldfish_sync_release,
+	.unlocked_ioctl = goldfish_sync_ioctl,
+	.compat_ioctl = goldfish_sync_ioctl,
+};
+
+static struct miscdevice goldfish_sync_device = {
+	.name = "goldfish_sync",
+	.fops = &goldfish_sync_fops,
+};
+
+
+static bool setup_verify_batch_cmd_addr(struct goldfish_sync_state *sync_state,
+										void *batch_addr,
+										uint32_t addr_offset,
+										uint32_t addr_offset_high)
+{
+	uint64_t batch_addr_phys;
+	uint32_t batch_addr_phys_test_lo;
+	uint32_t batch_addr_phys_test_hi;
+
+	if (!batch_addr) {
+		ERR("Could not use batch command address!");
+		return false;
+	}
+
+	batch_addr_phys = virt_to_phys(batch_addr);
+	writel((uint32_t)(batch_addr_phys),
+			sync_state->reg_base + addr_offset);
+	writel((uint32_t)(batch_addr_phys >> 32),
+			sync_state->reg_base + addr_offset_high);
+
+	batch_addr_phys_test_lo =
+		readl(sync_state->reg_base + addr_offset);
+	batch_addr_phys_test_hi =
+		readl(sync_state->reg_base + addr_offset_high);
+
+	if (virt_to_phys(batch_addr) !=
+			(((uint64_t)batch_addr_phys_test_hi << 32) |
+			 batch_addr_phys_test_lo)) {
+		ERR("Invalid batch command address!");
+		return false;
+	}
+
+	return true;
+}
+
+int goldfish_sync_probe(struct platform_device *pdev)
+{
+	struct resource *ioresource;
+	struct goldfish_sync_state *sync_state = global_sync_state;
+	int status;
+
+	DTRACE();
+
+	sync_state->to_do_end = 0;
+
+	spin_lock_init(&sync_state->lock);
+	mutex_init(&sync_state->mutex_lock);
+
+	platform_set_drvdata(pdev, sync_state);
+
+	ioresource = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	if (ioresource == NULL) {
+		ERR("platform_get_resource failed");
+		return -ENODEV;
+	}
+
+	sync_state->reg_base = devm_ioremap(&pdev->dev, ioresource->start, PAGE_SIZE);
+	if (sync_state->reg_base == NULL) {
+		ERR("Could not ioremap");
+		return -ENOMEM;
+	}
+
+	sync_state->irq = platform_get_irq(pdev, 0);
+	if (sync_state->irq < 0) {
+		ERR("Could not platform_get_irq");
+		return -ENODEV;
+	}
+
+	status = devm_request_irq(&pdev->dev,
+							sync_state->irq,
+							goldfish_sync_interrupt,
+							IRQF_SHARED,
+							pdev->name,
+							sync_state);
+	if (status) {
+		ERR("request_irq failed");
+		return -ENODEV;
+	}
+
+	INIT_WORK(&sync_state->work_item,
+			  goldfish_sync_work_item_fn);
+
+	misc_register(&goldfish_sync_device);
+
+	/* Obtain addresses for batch send/recv of commands. */
+	{
+		struct goldfish_sync_hostcmd *batch_addr_hostcmd;
+		struct goldfish_sync_guestcmd *batch_addr_guestcmd;
+
+		batch_addr_hostcmd = devm_kzalloc(&pdev->dev, sizeof(struct goldfish_sync_hostcmd),
+				GFP_KERNEL);
+		batch_addr_guestcmd = devm_kzalloc(&pdev->dev, sizeof(struct goldfish_sync_guestcmd),
+				GFP_KERNEL);
+
+		if (!setup_verify_batch_cmd_addr(sync_state,
+					batch_addr_hostcmd,
+					SYNC_REG_BATCH_COMMAND_ADDR,
+					SYNC_REG_BATCH_COMMAND_ADDR_HIGH)) {
+			ERR("goldfish_sync: Could not setup batch command address");
+			return -ENODEV;
+		}
+
+		if (!setup_verify_batch_cmd_addr(sync_state,
+					batch_addr_guestcmd,
+					SYNC_REG_BATCH_GUESTCOMMAND_ADDR,
+					SYNC_REG_BATCH_GUESTCOMMAND_ADDR_HIGH)) {
+			ERR("goldfish_sync: Could not setup batch guest command address");
+			return -ENODEV;
+		}
+
+		sync_state->batch_hostcmd = batch_addr_hostcmd;
+		sync_state->batch_guestcmd = batch_addr_guestcmd;
+	}
+
+	INFO("goldfish_sync: Initialized goldfish sync device");
+
+	writel(0, sync_state->reg_base + SYNC_REG_INIT);
+
+	return 0;
+}
+
+static int goldfish_sync_remove(struct platform_device *pdev)
+{
+	struct goldfish_sync_state *sync_state = global_sync_state;
+
+	DTRACE();
+
+	misc_deregister(&goldfish_sync_device);
+	memset(sync_state, 0, sizeof(struct goldfish_sync_state));
+	return 0;
+}
+
+static const struct of_device_id goldfish_sync_of_match[] = {
+	{ .compatible = "google,goldfish-sync", },
+	{},
+};
+MODULE_DEVICE_TABLE(of, goldfish_sync_of_match);
+
+static const struct acpi_device_id goldfish_sync_acpi_match[] = {
+	{ "GFSH0006", 0 },
+	{ },
+};
+
+MODULE_DEVICE_TABLE(acpi, goldfish_sync_acpi_match);
+
+static struct platform_driver goldfish_sync = {
+	.probe = goldfish_sync_probe,
+	.remove = goldfish_sync_remove,
+	.driver = {
+		.name = "goldfish_sync",
+		.of_match_table = goldfish_sync_of_match,
+		.acpi_match_table = ACPI_PTR(goldfish_sync_acpi_match),
+	}
+};
+
+module_platform_driver(goldfish_sync);
+
+MODULE_AUTHOR("Google, Inc.");
+MODULE_DESCRIPTION("Android QEMU Sync Driver");
+MODULE_LICENSE("GPL");
+MODULE_VERSION("1.0");
+
+/* This function is only to run a basic test of sync framework.
+ * It creates a timeline and fence object whose signal point is at 1.
+ * The timeline is incremented, and we use the sync framework's
+ * sync_fence_wait on that fence object. If everything works out,
+ * we should not hang in the wait and return immediately.
+ * There is no way to explicitly run this test yet, but it
+ * can be used by inserting it at the end of goldfish_sync_probe.
+ */
+void test_kernel_sync(void)
+{
+	struct goldfish_sync_timeline_obj *test_timeline;
+	int test_fence_fd;
+
+	DTRACE();
+
+	DPRINT("test sw_sync");
+
+	test_timeline = goldfish_sync_timeline_create();
+	DPRINT("sw_sync_timeline_create -> 0x%p", test_timeline);
+
+	test_fence_fd = goldfish_sync_fence_create(test_timeline, 1);
+	DPRINT("sync_fence_create -> %d", test_fence_fd);
+
+	DPRINT("incrementing test timeline");
+	goldfish_sync_timeline_inc(test_timeline, 1);
+
+	DPRINT("test waiting (should NOT hang)");
+	sync_fence_wait(
+			sync_fence_fdget(test_fence_fd), -1);
+
+	DPRINT("test waiting (afterward)");
+}
diff --git a/drivers/tty/goldfish.c b/drivers/tty/goldfish.c
index 0f82c0b146f6..1e332855b933 100644
--- a/drivers/tty/goldfish.c
+++ b/drivers/tty/goldfish.c
@@ -68,8 +68,7 @@ static void goldfish_tty_do_write(int line, const char *buf, unsigned count)
 
 static irqreturn_t goldfish_tty_interrupt(int irq, void *dev_id)
 {
-	struct platform_device *pdev = dev_id;
-	struct goldfish_tty *qtty = &goldfish_ttys[pdev->id];
+	struct goldfish_tty *qtty = dev_id;
 	void __iomem *base = qtty->base;
 	unsigned long irq_flags;
 	unsigned char *buf;
@@ -233,6 +232,7 @@ static int goldfish_tty_probe(struct platform_device *pdev)
 	struct device *ttydev;
 	void __iomem *base;
 	u32 irq;
+	unsigned int line;
 
 	r = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 	if (r == NULL)
@@ -248,10 +248,16 @@ static int goldfish_tty_probe(struct platform_device *pdev)
 
 	irq = r->start;
 
-	if (pdev->id >= goldfish_tty_line_count)
-		goto err_unmap;
-
 	mutex_lock(&goldfish_tty_lock);
+
+	if (pdev->id == PLATFORM_DEVID_NONE)
+		line = goldfish_tty_current_line_count;
+	else
+		line = pdev->id;
+
+	if (line >= goldfish_tty_line_count)
+		goto err_create_driver_failed;
+
 	if (goldfish_tty_current_line_count == 0) {
 		ret = goldfish_tty_create_driver();
 		if (ret)
@@ -259,7 +265,7 @@ static int goldfish_tty_probe(struct platform_device *pdev)
 	}
 	goldfish_tty_current_line_count++;
 
-	qtty = &goldfish_ttys[pdev->id];
+	qtty = &goldfish_ttys[line];
 	spin_lock_init(&qtty->lock);
 	tty_port_init(&qtty->port);
 	qtty->port.ops = &goldfish_port_ops;
@@ -269,13 +275,13 @@ static int goldfish_tty_probe(struct platform_device *pdev)
 	writel(GOLDFISH_TTY_CMD_INT_DISABLE, base + GOLDFISH_TTY_CMD);
 
 	ret = request_irq(irq, goldfish_tty_interrupt, IRQF_SHARED,
-						"goldfish_tty", pdev);
+						"goldfish_tty", qtty);
 	if (ret)
 		goto err_request_irq_failed;
 
 
 	ttydev = tty_port_register_device(&qtty->port, goldfish_tty_driver,
-							pdev->id, &pdev->dev);
+							line, &pdev->dev);
 	if (IS_ERR(ttydev)) {
 		ret = PTR_ERR(ttydev);
 		goto err_tty_register_device_failed;
@@ -286,8 +292,9 @@ static int goldfish_tty_probe(struct platform_device *pdev)
 	qtty->console.device = goldfish_tty_console_device;
 	qtty->console.setup = goldfish_tty_console_setup;
 	qtty->console.flags = CON_PRINTBUFFER;
-	qtty->console.index = pdev->id;
+	qtty->console.index = line;
 	register_console(&qtty->console);
+	platform_set_drvdata(pdev, qtty);
 
 	mutex_unlock(&goldfish_tty_lock);
 	return 0;
@@ -307,13 +314,12 @@ static int goldfish_tty_probe(struct platform_device *pdev)
 
 static int goldfish_tty_remove(struct platform_device *pdev)
 {
-	struct goldfish_tty *qtty;
+	struct goldfish_tty *qtty = platform_get_drvdata(pdev);
 
 	mutex_lock(&goldfish_tty_lock);
 
-	qtty = &goldfish_ttys[pdev->id];
 	unregister_console(&qtty->console);
-	tty_unregister_device(goldfish_tty_driver, pdev->id);
+	tty_unregister_device(goldfish_tty_driver, qtty->console.index);
 	iounmap(qtty->base);
 	qtty->base = NULL;
 	free_irq(qtty->irq, pdev);
@@ -324,11 +330,19 @@ static int goldfish_tty_remove(struct platform_device *pdev)
 	return 0;
 }
 
+static const struct of_device_id goldfish_tty_of_match[] = {
+	{ .compatible = "google,goldfish-tty", },
+	{},
+};
+
+MODULE_DEVICE_TABLE(of, goldfish_tty_of_match);
+
 static struct platform_driver goldfish_tty_platform_driver = {
 	.probe = goldfish_tty_probe,
 	.remove = goldfish_tty_remove,
 	.driver = {
-		.name = "goldfish_tty"
+		.name = "goldfish_tty",
+		.of_match_table = goldfish_tty_of_match,
 	}
 };
 
diff --git a/drivers/usb/chipidea/core.c b/drivers/usb/chipidea/core.c
index 965d0e240dcb..ba4a2a1eb3ff 100644
--- a/drivers/usb/chipidea/core.c
+++ b/drivers/usb/chipidea/core.c
@@ -926,6 +926,7 @@ static int ci_hdrc_probe(struct platform_device *pdev)
 	if (!ci)
 		return -ENOMEM;
 
+	spin_lock_init(&ci->lock);
 	ci->dev = dev;
 	ci->platdata = dev_get_platdata(dev);
 	ci->imx28_write_fix = !!(ci->platdata->flags &
diff --git a/drivers/usb/chipidea/udc.c b/drivers/usb/chipidea/udc.c
index 68fc5fce4cc5..d8a045fc1fdb 100644
--- a/drivers/usb/chipidea/udc.c
+++ b/drivers/usb/chipidea/udc.c
@@ -1884,8 +1884,6 @@ static int udc_start(struct ci_hdrc *ci)
 	struct usb_otg_caps *otg_caps = &ci->platdata->ci_otg_caps;
 	int retval = 0;
 
-	spin_lock_init(&ci->lock);
-
 	ci->gadget.ops          = &usb_gadget_ops;
 	ci->gadget.speed        = USB_SPEED_UNKNOWN;
 	ci->gadget.max_speed    = USB_SPEED_HIGH;
diff --git a/drivers/usb/gadget/configfs.c b/drivers/usb/gadget/configfs.c
index 8df96cb3bb58..54849fe9cb01 100644
--- a/drivers/usb/gadget/configfs.c
+++ b/drivers/usb/gadget/configfs.c
@@ -425,11 +425,6 @@ static int config_usb_cfg_link(
 	}
 
 	f = usb_get_function(fi);
-	if (f == NULL) {
-		/* Are we trying to symlink PTP without MTP function? */
-		ret = -EINVAL; /* Invalid Configuration */
-		goto out;
-	}
 	if (IS_ERR(f)) {
 		ret = PTR_ERR(f);
 		goto out;
diff --git a/drivers/usb/gadget/function/f_accessory.c b/drivers/usb/gadget/function/f_accessory.c
index 4d27af468a07..49dbef82a862 100644
--- a/drivers/usb/gadget/function/f_accessory.c
+++ b/drivers/usb/gadget/function/f_accessory.c
@@ -211,6 +211,7 @@ static inline struct acc_dev *func_to_dev(struct usb_function *f)
 static struct usb_request *acc_request_new(struct usb_ep *ep, int buffer_size)
 {
 	struct usb_request *req = usb_ep_alloc_request(ep, GFP_KERNEL);
+
 	if (!req)
 		return NULL;
 
@@ -1024,6 +1025,7 @@ acc_function_unbind(struct usb_configuration *c, struct usb_function *f)
 static void acc_start_work(struct work_struct *data)
 {
 	char *envp[2] = { "ACCESSORY=START", NULL };
+
 	kobject_uevent_env(&acc_device.this_device->kobj, KOBJ_CHANGE, envp);
 }
 
diff --git a/drivers/usb/gadget/function/f_audio_source.c b/drivers/usb/gadget/function/f_audio_source.c
index 2489a5fa2685..db7903d19c43 100644
--- a/drivers/usb/gadget/function/f_audio_source.c
+++ b/drivers/usb/gadget/function/f_audio_source.c
@@ -310,6 +310,7 @@ static struct device_attribute *audio_source_function_attributes[] = {
 static struct usb_request *audio_request_new(struct usb_ep *ep, int buffer_size)
 {
 	struct usb_request *req = usb_ep_alloc_request(ep, GFP_KERNEL);
+
 	if (!req)
 		return NULL;
 
diff --git a/drivers/usb/gadget/function/f_mtp.c b/drivers/usb/gadget/function/f_mtp.c
index 23f866f39107..e37ece7b6e3e 100644
--- a/drivers/usb/gadget/function/f_mtp.c
+++ b/drivers/usb/gadget/function/f_mtp.c
@@ -361,6 +361,7 @@ static inline struct mtp_dev *func_to_mtp(struct usb_function *f)
 static struct usb_request *mtp_request_new(struct usb_ep *ep, int buffer_size)
 {
 	struct usb_request *req = usb_ep_alloc_request(ep, GFP_KERNEL);
+
 	if (!req)
 		return NULL;
 
@@ -1143,6 +1144,7 @@ static int mtp_ctrlrequest(struct usb_composite_dev *cdev,
 		} else if (ctrl->bRequest == MTP_REQ_GET_DEVICE_STATUS
 				&& w_index == 0 && w_value == 0) {
 			struct mtp_device_status *status = cdev->req->buf;
+
 			status->wLength =
 				__constant_cpu_to_le16(sizeof(*status));
 
@@ -1165,6 +1167,7 @@ static int mtp_ctrlrequest(struct usb_composite_dev *cdev,
 	/* respond with data transfer or status phase? */
 	if (value >= 0) {
 		int rc;
+
 		cdev->req->zero = value < w_length;
 		cdev->req->length = value;
 		rc = usb_ep_queue(cdev->gadget->ep0, cdev->req, GFP_ATOMIC);
@@ -1400,6 +1403,7 @@ static struct mtp_instance *to_mtp_instance(struct config_item *item)
 static void mtp_attr_release(struct config_item *item)
 {
 	struct mtp_instance *fi_mtp = to_mtp_instance(item);
+
 	usb_put_function_instance(&fi_mtp->func_inst);
 }
 
@@ -1520,7 +1524,7 @@ struct usb_function *function_alloc_mtp_ptp(struct usb_function_instance *fi,
 		pr_err("\t2: Create MTP function\n");
 		pr_err("\t3: Create and symlink PTP function"
 				" with a gadget configuration\n");
-		return NULL;
+		return ERR_PTR(-EINVAL); /* Invalid Configuration */
 	}
 
 	dev = fi_mtp->dev;
diff --git a/drivers/usb/gadget/functions.c b/drivers/usb/gadget/functions.c
index 389c1f3d0fee..b13f839e7368 100644
--- a/drivers/usb/gadget/functions.c
+++ b/drivers/usb/gadget/functions.c
@@ -58,7 +58,7 @@ struct usb_function *usb_get_function(struct usb_function_instance *fi)
 	struct usb_function *f;
 
 	f = fi->fd->alloc_func(fi);
-	if ((f == NULL) || IS_ERR(f))
+	if (IS_ERR(f))
 		return f;
 	f->fi = fi;
 	return f;
diff --git a/drivers/usb/serial/cp210x.c b/drivers/usb/serial/cp210x.c
index 976195e748a3..fe7452f0f38a 100644
--- a/drivers/usb/serial/cp210x.c
+++ b/drivers/usb/serial/cp210x.c
@@ -130,6 +130,7 @@ static const struct usb_device_id id_table[] = {
 	{ USB_DEVICE(0x10C4, 0x88A4) }, /* MMB Networks ZigBee USB Device */
 	{ USB_DEVICE(0x10C4, 0x88A5) }, /* Planet Innovation Ingeni ZigBee USB Device */
 	{ USB_DEVICE(0x10C4, 0x8946) }, /* Ketra N1 Wireless Interface */
+	{ USB_DEVICE(0x10C4, 0x8962) }, /* Brim Brothers charging dock */
 	{ USB_DEVICE(0x10C4, 0x8977) },	/* CEL MeshWorks DevKit Device */
 	{ USB_DEVICE(0x10C4, 0x8998) }, /* KCF Technologies PRN */
 	{ USB_DEVICE(0x10C4, 0x8A2A) }, /* HubZ dual ZigBee and Z-Wave dongle */
diff --git a/drivers/usb/serial/ftdi_sio.c b/drivers/usb/serial/ftdi_sio.c
index 494167fe6a2c..d3d6ec455151 100644
--- a/drivers/usb/serial/ftdi_sio.c
+++ b/drivers/usb/serial/ftdi_sio.c
@@ -1012,6 +1012,8 @@ static const struct usb_device_id id_table_combined[] = {
 	{ USB_DEVICE(ICPDAS_VID, ICPDAS_I7561U_PID) },
 	{ USB_DEVICE(ICPDAS_VID, ICPDAS_I7563U_PID) },
 	{ USB_DEVICE(WICED_VID, WICED_USB20706V2_PID) },
+	{ USB_DEVICE(TI_VID, TI_CC3200_LAUNCHPAD_PID),
+		.driver_info = (kernel_ulong_t)&ftdi_jtag_quirk },
 	{ }					/* Terminating entry */
 };
 
diff --git a/drivers/usb/serial/ftdi_sio_ids.h b/drivers/usb/serial/ftdi_sio_ids.h
index 21011c0a4c64..48ee04c94a75 100644
--- a/drivers/usb/serial/ftdi_sio_ids.h
+++ b/drivers/usb/serial/ftdi_sio_ids.h
@@ -595,6 +595,12 @@
 #define ATMEL_VID		0x03eb /* Vendor ID */
 #define STK541_PID		0x2109 /* Zigbee Controller */
 
+/*
+ * Texas Instruments
+ */
+#define TI_VID			0x0451
+#define TI_CC3200_LAUNCHPAD_PID	0xC32A /* SimpleLink Wi-Fi CC3200 LaunchPad */
+
 /*
  * Blackfin gnICE JTAG
  * http://docs.blackfin.uclinux.org/doku.php?id=hw:jtag:gnice
diff --git a/drivers/usb/storage/transport.c b/drivers/usb/storage/transport.c
index 5e67f63b2e46..02f86dd1a340 100644
--- a/drivers/usb/storage/transport.c
+++ b/drivers/usb/storage/transport.c
@@ -919,10 +919,15 @@ int usb_stor_CB_transport(struct scsi_cmnd *srb, struct us_data *us)
 
 	/* COMMAND STAGE */
 	/* let's send the command via the control pipe */
+	/*
+	 * Command is sometime (f.e. after scsi_eh_prep_cmnd) on the stack.
+	 * Stack may be vmallocated.  So no DMA for us.  Make a copy.
+	 */
+	memcpy(us->iobuf, srb->cmnd, srb->cmd_len);
 	result = usb_stor_ctrl_transfer(us, us->send_ctrl_pipe,
 				      US_CBI_ADSC, 
 				      USB_TYPE_CLASS | USB_RECIP_INTERFACE, 0, 
-				      us->ifnum, srb->cmnd, srb->cmd_len);
+				      us->ifnum, us->iobuf, srb->cmd_len);
 
 	/* check the return code for the command */
 	usb_stor_dbg(us, "Call to usb_stor_ctrl_transfer() returned %d\n",
diff --git a/drivers/video/fbdev/goldfishfb.c b/drivers/video/fbdev/goldfishfb.c
index 7f6c9e6cfc6c..1e56b50e4082 100644
--- a/drivers/video/fbdev/goldfishfb.c
+++ b/drivers/video/fbdev/goldfishfb.c
@@ -26,6 +26,7 @@
 #include <linux/interrupt.h>
 #include <linux/ioport.h>
 #include <linux/platform_device.h>
+#include <linux/acpi.h>
 
 enum {
 	FB_GET_WIDTH        = 0x00,
@@ -234,7 +235,7 @@ static int goldfish_fb_probe(struct platform_device *pdev)
 	fb->fb.var.activate	= FB_ACTIVATE_NOW;
 	fb->fb.var.height	= readl(fb->reg_base + FB_GET_PHYS_HEIGHT);
 	fb->fb.var.width	= readl(fb->reg_base + FB_GET_PHYS_WIDTH);
-	fb->fb.var.pixclock	= 10000;
+	fb->fb.var.pixclock	= 0;
 
 	fb->fb.var.red.offset = 11;
 	fb->fb.var.red.length = 5;
@@ -304,12 +305,25 @@ static int goldfish_fb_remove(struct platform_device *pdev)
 	return 0;
 }
 
+static const struct of_device_id goldfish_fb_of_match[] = {
+	{ .compatible = "google,goldfish-fb", },
+	{},
+};
+MODULE_DEVICE_TABLE(of, goldfish_fb_of_match);
+
+static const struct acpi_device_id goldfish_fb_acpi_match[] = {
+	{ "GFSH0004", 0 },
+	{ },
+};
+MODULE_DEVICE_TABLE(acpi, goldfish_fb_acpi_match);
 
 static struct platform_driver goldfish_fb_driver = {
 	.probe		= goldfish_fb_probe,
 	.remove		= goldfish_fb_remove,
 	.driver = {
-		.name = "goldfish_fb"
+		.name = "goldfish_fb",
+		.of_match_table = goldfish_fb_of_match,
+		.acpi_match_table = ACPI_PTR(goldfish_fb_acpi_match),
 	}
 };
 
diff --git a/fs/inode.c b/fs/inode.c
index b0edef500590..2c16b758831d 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -154,6 +154,12 @@ int inode_init_always(struct super_block *sb, struct inode *inode)
 	inode->i_rdev = 0;
 	inode->dirtied_when = 0;
 
+#ifdef CONFIG_CGROUP_WRITEBACK
+	inode->i_wb_frn_winner = 0;
+	inode->i_wb_frn_avg_time = 0;
+	inode->i_wb_frn_history = 0;
+#endif
+
 	if (security_inode_alloc(inode))
 		goto out;
 	spin_lock_init(&inode->i_lock);
diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c
index 52a28311e2a4..48efe62e1302 100644
--- a/fs/nfs/callback.c
+++ b/fs/nfs/callback.c
@@ -261,7 +261,7 @@ static int nfs_callback_up_net(int minorversion, struct svc_serv *serv,
 	}
 
 	ret = -EPROTONOSUPPORT;
-	if (minorversion == 0)
+	if (!IS_ENABLED(CONFIG_NFS_V4_1) || minorversion == 0)
 		ret = nfs4_callback_up_net(serv, net);
 	else if (xprt->ops->bc_up)
 		ret = xprt->ops->bc_up(serv, net);
diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h
index 8da263299754..4cd5c95d1ca0 100644
--- a/include/linux/cgroup-defs.h
+++ b/include/linux/cgroup-defs.h
@@ -66,7 +66,6 @@ enum {
 
 /* cgroup_root->flags */
 enum {
-	CGRP_ROOT_SANE_BEHAVIOR	= (1 << 0), /* __DEVEL__sane_behavior specified */
 	CGRP_ROOT_NOPREFIX	= (1 << 1), /* mounted subsystems have no named prefix */
 	CGRP_ROOT_XATTR		= (1 << 2), /* supports extended attributes */
 };
diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h
index 2d9b650047a5..d49e26c6cdc7 100644
--- a/include/linux/intel-iommu.h
+++ b/include/linux/intel-iommu.h
@@ -429,6 +429,7 @@ struct intel_iommu {
 	struct page_req_dsc *prq;
 	unsigned char prq_name[16];    /* Name for PRQ interrupt */
 	struct idr pasid_idr;
+	u32 pasid_max;
 #endif
 	struct q_inval  *qi;            /* Queued invalidation info */
 	u32 *iommu_state; /* Store iommu states between suspend and resume.*/
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index bab4053fb795..71b61b0b9b45 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -363,10 +363,10 @@ struct zone {
 	struct per_cpu_pageset __percpu *pageset;
 
 	/*
-	 * This is a per-zone reserve of pages that should not be
-	 * considered dirtyable memory.
+	 * This is a per-zone reserve of pages that are not available
+	 * to userspace allocations.
 	 */
-	unsigned long		dirty_balance_reserve;
+	unsigned long		totalreserve_pages;
 
 #ifndef CONFIG_SPARSEMEM
 	/*
diff --git a/include/linux/swap.h b/include/linux/swap.h
index d8ca2eaa3a8b..f1a52c11de0e 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -289,7 +289,6 @@ static inline void workingset_node_shadows_dec(struct radix_tree_node *node)
 /* linux/mm/page_alloc.c */
 extern unsigned long totalram_pages;
 extern unsigned long totalreserve_pages;
-extern unsigned long dirty_balance_reserve;
 extern unsigned long nr_free_buffer_pages(void);
 extern unsigned long nr_free_pagecache_pages(void);
 
diff --git a/include/linux/timekeeping.h b/include/linux/timekeeping.h
index ec89d846324c..b7246d2ed7c9 100644
--- a/include/linux/timekeeping.h
+++ b/include/linux/timekeeping.h
@@ -233,6 +233,7 @@ static inline u64 ktime_get_raw_ns(void)
 
 extern u64 ktime_get_mono_fast_ns(void);
 extern u64 ktime_get_raw_fast_ns(void);
+extern u64 ktime_get_boot_fast_ns(void);
 
 /*
  * Timespec interfaces utilizing the ktime based ones
diff --git a/include/uapi/linux/magic.h b/include/uapi/linux/magic.h
index cfb5c406f344..d588107c9dce 100644
--- a/include/uapi/linux/magic.h
+++ b/include/uapi/linux/magic.h
@@ -56,6 +56,7 @@
 
 #define SMB_SUPER_MAGIC		0x517B
 #define CGROUP_SUPER_MAGIC	0x27e0eb
+#define CGROUP2_SUPER_MAGIC	0x63677270
 
 
 #define STACK_END_MAGIC		0x57AC6E9D
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index f48196a7414c..03a1b3f754d6 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -211,6 +211,7 @@ static unsigned long have_free_callback __read_mostly;
 /* Ditto for the can_fork callback. */
 static unsigned long have_canfork_callback __read_mostly;
 
+static struct file_system_type cgroup2_fs_type;
 static struct cftype cgroup_dfl_base_files[];
 static struct cftype cgroup_legacy_base_files[];
 
@@ -1650,10 +1651,6 @@ static int parse_cgroupfs_options(char *data, struct cgroup_sb_opts *opts)
 			all_ss = true;
 			continue;
 		}
-		if (!strcmp(token, "__DEVEL__sane_behavior")) {
-			opts->flags |= CGRP_ROOT_SANE_BEHAVIOR;
-			continue;
-		}
 		if (!strcmp(token, "noprefix")) {
 			opts->flags |= CGRP_ROOT_NOPREFIX;
 			continue;
@@ -1720,15 +1717,6 @@ static int parse_cgroupfs_options(char *data, struct cgroup_sb_opts *opts)
 			return -ENOENT;
 	}
 
-	if (opts->flags & CGRP_ROOT_SANE_BEHAVIOR) {
-		pr_warn("sane_behavior: this is still under development and its behaviors will change, proceed at your own risk\n");
-		if (nr_opts != 1) {
-			pr_err("sane_behavior: no other mount options allowed\n");
-			return -EINVAL;
-		}
-		return 0;
-	}
-
 	/*
 	 * If the 'all' option was specified select all the subsystems,
 	 * otherwise if 'none', 'name=' and a subsystem name options were
@@ -2007,6 +1995,7 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type,
 			 int flags, const char *unused_dev_name,
 			 void *data)
 {
+	bool is_v2 = fs_type == &cgroup2_fs_type;
 	struct super_block *pinned_sb = NULL;
 	struct cgroup_subsys *ss;
 	struct cgroup_root *root;
@@ -2023,6 +2012,17 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type,
 	if (!use_task_css_set_links)
 		cgroup_enable_task_cg_lists();
 
+	if (is_v2) {
+		if (data) {
+			pr_err("cgroup2: unknown option \"%s\"\n", (char *)data);
+			return ERR_PTR(-EINVAL);
+		}
+		cgrp_dfl_root_visible = true;
+		root = &cgrp_dfl_root;
+		cgroup_get(&root->cgrp);
+		goto out_mount;
+	}
+
 	mutex_lock(&cgroup_mutex);
 
 	/* First find the desired set of subsystems */
@@ -2030,15 +2030,6 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type,
 	if (ret)
 		goto out_unlock;
 
-	/* look for a matching existing root */
-	if (opts.flags & CGRP_ROOT_SANE_BEHAVIOR) {
-		cgrp_dfl_root_visible = true;
-		root = &cgrp_dfl_root;
-		cgroup_get(&root->cgrp);
-		ret = 0;
-		goto out_unlock;
-	}
-
 	/*
 	 * Destruction of cgroup root is asynchronous, so subsystems may
 	 * still be dying after the previous unmount.  Let's drain the
@@ -2149,9 +2140,10 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type,
 
 	if (ret)
 		return ERR_PTR(ret);
-
+out_mount:
 	dentry = kernfs_mount(fs_type, flags, root->kf_root,
-				CGROUP_SUPER_MAGIC, &new_sb);
+			      is_v2 ? CGROUP2_SUPER_MAGIC : CGROUP_SUPER_MAGIC,
+			      &new_sb);
 	if (IS_ERR(dentry) || !new_sb)
 		cgroup_put(&root->cgrp);
 
@@ -2194,6 +2186,12 @@ static struct file_system_type cgroup_fs_type = {
 	.kill_sb = cgroup_kill_sb,
 };
 
+static struct file_system_type cgroup2_fs_type = {
+	.name = "cgroup2",
+	.mount = cgroup_mount,
+	.kill_sb = cgroup_kill_sb,
+};
+
 /**
  * task_cgroup_path - cgroup path of a task in the first cgroup hierarchy
  * @task: target task
@@ -5390,6 +5388,7 @@ int __init cgroup_init(void)
 
 	WARN_ON(sysfs_create_mount_point(fs_kobj, "cgroup"));
 	WARN_ON(register_filesystem(&cgroup_fs_type));
+	WARN_ON(register_filesystem(&cgroup2_fs_type));
 	WARN_ON(!proc_create("cgroups", 0, NULL, &proc_cgroupstats_operations));
 
 	return 0;
diff --git a/kernel/sched/cpufreq_sched.c b/kernel/sched/cpufreq_sched.c
index f6f9b9b3a4a8..d751bc2d0d6e 100644
--- a/kernel/sched/cpufreq_sched.c
+++ b/kernel/sched/cpufreq_sched.c
@@ -289,7 +289,7 @@ static int cpufreq_sched_policy_init(struct cpufreq_policy *policy)
 	pr_debug("%s: throttle threshold = %u [ns]\n",
 		  __func__, gd->up_throttle_nsec);
 
-	rc = sysfs_create_group(get_governor_parent_kobj(policy), get_sysfs_attr());
+	rc = sysfs_create_group(&policy->kobj, get_sysfs_attr());
 	if (rc) {
 		pr_err("%s: couldn't create sysfs attributes: %d\n", __func__, rc);
 		goto err;
@@ -332,7 +332,7 @@ static int cpufreq_sched_policy_exit(struct cpufreq_policy *policy)
 		put_task_struct(gd->task);
 	}
 
-	sysfs_remove_group(get_governor_parent_kobj(policy), get_sysfs_attr());
+	sysfs_remove_group(&policy->kobj, get_sysfs_attr());
 
 	policy->governor_data = NULL;
 
diff --git a/kernel/sched/tune.c b/kernel/sched/tune.c
index 68a24a044b0a..079b18802f17 100644
--- a/kernel/sched/tune.c
+++ b/kernel/sched/tune.c
@@ -725,6 +725,7 @@ schedtune_init_cgroups(void)
 	for_each_possible_cpu(cpu) {
 		bg = &per_cpu(cpu_boost_groups, cpu);
 		memset(bg, 0, sizeof(struct boost_groups));
+		raw_spin_lock_init(&bg->lock);
 	}
 
 	pr_info("schedtune: configured to support %d boost groups\n",
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index 445601c580d6..ede4bf13d3e9 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -424,6 +424,35 @@ u64 ktime_get_raw_fast_ns(void)
 }
 EXPORT_SYMBOL_GPL(ktime_get_raw_fast_ns);
 
+/**
+ * ktime_get_boot_fast_ns - NMI safe and fast access to boot clock.
+ *
+ * To keep it NMI safe since we're accessing from tracing, we're not using a
+ * separate timekeeper with updates to monotonic clock and boot offset
+ * protected with seqlocks. This has the following minor side effects:
+ *
+ * (1) Its possible that a timestamp be taken after the boot offset is updated
+ * but before the timekeeper is updated. If this happens, the new boot offset
+ * is added to the old timekeeping making the clock appear to update slightly
+ * earlier:
+ *    CPU 0                                        CPU 1
+ *    timekeeping_inject_sleeptime64()
+ *    __timekeeping_inject_sleeptime(tk, delta);
+ *                                                 timestamp();
+ *    timekeeping_update(tk, TK_CLEAR_NTP...);
+ *
+ * (2) On 32-bit systems, the 64-bit boot offset (tk->offs_boot) may be
+ * partially updated.  Since the tk->offs_boot update is a rare event, this
+ * should be a rare occurrence which postprocessing should be able to handle.
+ */
+u64 notrace ktime_get_boot_fast_ns(void)
+{
+	struct timekeeper *tk = &tk_core.timekeeper;
+
+	return (ktime_get_mono_fast_ns() + ktime_to_ns(tk->offs_boot));
+}
+EXPORT_SYMBOL_GPL(ktime_get_boot_fast_ns);
+
 /* Suspend-time cycles value for halted fast timekeeper. */
 static cycle_t cycles_at_suspend;
 
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index bff7d4c69274..293af3346c8c 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -890,6 +890,7 @@ static struct {
 	{ trace_clock,			"perf",		1 },
 	{ ktime_get_mono_fast_ns,	"mono",		1 },
 	{ ktime_get_raw_fast_ns,	"mono_raw",	1 },
+	{ ktime_get_boot_fast_ns,	"boot",		1 },
 	ARCH_TRACE_CLOCKS
 };
 
diff --git a/lib/mpi/mpi-pow.c b/lib/mpi/mpi-pow.c
index 5464c8744ea9..e24388a863a7 100644
--- a/lib/mpi/mpi-pow.c
+++ b/lib/mpi/mpi-pow.c
@@ -64,8 +64,13 @@ int mpi_powm(MPI res, MPI base, MPI exp, MPI mod)
 	if (!esize) {
 		/* Exponent is zero, result is 1 mod MOD, i.e., 1 or 0
 		 * depending on if MOD equals 1.  */
-		rp[0] = 1;
 		res->nlimbs = (msize == 1 && mod->d[0] == 1) ? 0 : 1;
+		if (res->nlimbs) {
+			if (mpi_resize(res, 1) < 0)
+				goto enomem;
+			rp = res->d;
+			rp[0] = 1;
+		}
 		res->sign = 0;
 		goto leave;
 	}
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index fd51ebfc423f..1e6769449ac2 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -278,7 +278,12 @@ static unsigned long zone_dirtyable_memory(struct zone *zone)
 	unsigned long nr_pages;
 
 	nr_pages = zone_page_state(zone, NR_FREE_PAGES);
-	nr_pages -= min(nr_pages, zone->dirty_balance_reserve);
+	/*
+	 * Pages reserved for the kernel should not be considered
+	 * dirtyable, to prevent a situation where reclaim has to
+	 * clean pages in order to balance the zones.
+	 */
+	nr_pages -= min(nr_pages, zone->totalreserve_pages);
 
 	nr_pages += zone_page_state(zone, NR_INACTIVE_FILE);
 	nr_pages += zone_page_state(zone, NR_ACTIVE_FILE);
@@ -332,7 +337,12 @@ static unsigned long global_dirtyable_memory(void)
 	unsigned long x;
 
 	x = global_page_state(NR_FREE_PAGES);
-	x -= min(x, dirty_balance_reserve);
+	/*
+	 * Pages reserved for the kernel should not be considered
+	 * dirtyable, to prevent a situation where reclaim has to
+	 * clean pages in order to balance the zones.
+	 */
+	x -= min(x, totalreserve_pages);
 
 	x += global_page_state(NR_INACTIVE_FILE);
 	x += global_page_state(NR_ACTIVE_FILE);
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 1874f2902237..cc99ff2d85c5 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -114,13 +114,6 @@ static DEFINE_SPINLOCK(managed_page_count_lock);
 unsigned long totalram_pages __read_mostly;
 unsigned long totalreserve_pages __read_mostly;
 unsigned long totalcma_pages __read_mostly;
-/*
- * When calculating the number of globally allowed dirty pages, there
- * is a certain number of per-zone reserves that should not be
- * considered dirtyable memory.  This is the sum of those reserves
- * over all existing zones that contribute dirtyable memory.
- */
-unsigned long dirty_balance_reserve __read_mostly;
 
 int percpu_pagelist_fraction;
 gfp_t gfp_allowed_mask __read_mostly = GFP_BOOT_MASK;
@@ -5990,20 +5983,12 @@ static void calculate_totalreserve_pages(void)
 
 			if (max > zone->managed_pages)
 				max = zone->managed_pages;
+
+			zone->totalreserve_pages = max;
+
 			reserve_pages += max;
-			/*
-			 * Lowmem reserves are not available to
-			 * GFP_HIGHUSER page cache allocations and
-			 * kswapd tries to balance zones to their high
-			 * watermark.  As a result, neither should be
-			 * regarded as dirtyable memory, to prevent a
-			 * situation where reclaim has to clean pages
-			 * in order to balance the zones.
-			 */
-			zone->dirty_balance_reserve = max;
 		}
 	}
-	dirty_balance_reserve = reserve_pages;
 	totalreserve_pages = reserve_pages;
 }
 
diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c
index 9aba9e93c0a2..ee9082792530 100644
--- a/net/core/flow_dissector.c
+++ b/net/core/flow_dissector.c
@@ -949,4 +949,4 @@ static int __init init_default_flow_dissectors(void)
 	return 0;
 }
 
-late_initcall_sync(init_default_flow_dissectors);
+core_initcall(init_default_flow_dissectors);
diff --git a/net/wireless/core.h b/net/wireless/core.h
index a618b4b86fa4..47a967fed8ff 100644
--- a/net/wireless/core.h
+++ b/net/wireless/core.h
@@ -72,6 +72,7 @@ struct cfg80211_registered_device {
 	struct list_head bss_list;
 	struct rb_root bss_tree;
 	u32 bss_generation;
+	u32 bss_entries;
 	struct cfg80211_scan_request *scan_req; /* protected by RTNL */
 	struct sk_buff *scan_msg;
 	struct cfg80211_sched_scan_request __rcu *sched_scan_req;
diff --git a/net/wireless/scan.c b/net/wireless/scan.c
index 30f967665e84..6e7b86ca2abd 100644
--- a/net/wireless/scan.c
+++ b/net/wireless/scan.c
@@ -56,6 +56,19 @@
  * also linked into the probe response struct.
  */
 
+/*
+ * Limit the number of BSS entries stored in mac80211. Each one is
+ * a bit over 4k at most, so this limits to roughly 4-5M of memory.
+ * If somebody wants to really attack this though, they'd likely
+ * use small beacons, and only one type of frame, limiting each of
+ * the entries to a much smaller size (in order to generate more
+ * entries in total, so overhead is bigger.)
+ */
+static int bss_entries_limit = 1000;
+module_param(bss_entries_limit, int, 0644);
+MODULE_PARM_DESC(bss_entries_limit,
+                 "limit to number of scan BSS entries (per wiphy, default 1000)");
+
 #define IEEE80211_SCAN_RESULT_EXPIRE	(7 * HZ)
 
 static void bss_free(struct cfg80211_internal_bss *bss)
@@ -136,6 +149,10 @@ static bool __cfg80211_unlink_bss(struct cfg80211_registered_device *rdev,
 
 	list_del_init(&bss->list);
 	rb_erase(&bss->rbn, &rdev->bss_tree);
+	rdev->bss_entries--;
+	WARN_ONCE((rdev->bss_entries == 0) ^ list_empty(&rdev->bss_list),
+		  "rdev bss entries[%d]/list[empty:%d] corruption\n",
+		  rdev->bss_entries, list_empty(&rdev->bss_list));
 	bss_ref_put(rdev, bss);
 	return true;
 }
@@ -162,6 +179,40 @@ static void __cfg80211_bss_expire(struct cfg80211_registered_device *rdev,
 		rdev->bss_generation++;
 }
 
+static bool cfg80211_bss_expire_oldest(struct cfg80211_registered_device *rdev)
+{
+	struct cfg80211_internal_bss *bss, *oldest = NULL;
+	bool ret;
+
+	lockdep_assert_held(&rdev->bss_lock);
+
+	list_for_each_entry(bss, &rdev->bss_list, list) {
+		if (atomic_read(&bss->hold))
+			continue;
+
+		if (!list_empty(&bss->hidden_list) &&
+		    !bss->pub.hidden_beacon_bss)
+			continue;
+
+		if (oldest && time_before(oldest->ts, bss->ts))
+			continue;
+		oldest = bss;
+	}
+
+	if (WARN_ON(!oldest))
+		return false;
+
+	/*
+	 * The callers make sure to increase rdev->bss_generation if anything
+	 * gets removed (and a new entry added), so there's no need to also do
+	 * it here.
+	 */
+
+	ret = __cfg80211_unlink_bss(rdev, oldest);
+	WARN_ON(!ret);
+	return ret;
+}
+
 void ___cfg80211_scan_done(struct cfg80211_registered_device *rdev,
 			   bool send_message)
 {
@@ -687,6 +738,7 @@ static bool cfg80211_combine_bsses(struct cfg80211_registered_device *rdev,
 	const u8 *ie;
 	int i, ssidlen;
 	u8 fold = 0;
+	u32 n_entries = 0;
 
 	ies = rcu_access_pointer(new->pub.beacon_ies);
 	if (WARN_ON(!ies))
@@ -710,6 +762,12 @@ static bool cfg80211_combine_bsses(struct cfg80211_registered_device *rdev,
 	/* This is the bad part ... */
 
 	list_for_each_entry(bss, &rdev->bss_list, list) {
+		/*
+		 * we're iterating all the entries anyway, so take the
+		 * opportunity to validate the list length accounting
+		 */
+		n_entries++;
+
 		if (!ether_addr_equal(bss->pub.bssid, new->pub.bssid))
 			continue;
 		if (bss->pub.channel != new->pub.channel)
@@ -738,6 +796,10 @@ static bool cfg80211_combine_bsses(struct cfg80211_registered_device *rdev,
 				   new->pub.beacon_ies);
 	}
 
+	WARN_ONCE(n_entries != rdev->bss_entries,
+		  "rdev bss entries[%d]/list[len:%d] corruption\n",
+		  rdev->bss_entries, n_entries);
+
 	return true;
 }
 
@@ -890,7 +952,14 @@ cfg80211_bss_update(struct cfg80211_registered_device *rdev,
 			}
 		}
 
+		if (rdev->bss_entries >= bss_entries_limit &&
+		    !cfg80211_bss_expire_oldest(rdev)) {
+			kfree(new);
+			goto drop;
+		}
+
 		list_add_tail(&new->list, &rdev->bss_list);
+		rdev->bss_entries++;
 		rb_insert_bss(rdev, new);
 		found = new;
 	}
diff --git a/security/apparmor/domain.c b/security/apparmor/domain.c
index dc0027b28b04..53426a6ee6dc 100644
--- a/security/apparmor/domain.c
+++ b/security/apparmor/domain.c
@@ -623,8 +623,8 @@ int aa_change_hat(const char *hats[], int count, u64 token, bool permtest)
 	/* released below */
 	cred = get_current_cred();
 	cxt = cred_cxt(cred);
-	profile = aa_cred_profile(cred);
-	previous_profile = cxt->previous;
+	profile = aa_get_newest_profile(aa_cred_profile(cred));
+	previous_profile = aa_get_newest_profile(cxt->previous);
 
 	if (unconfined(profile)) {
 		info = "unconfined";
@@ -720,6 +720,8 @@ int aa_change_hat(const char *hats[], int count, u64 token, bool permtest)
 out:
 	aa_put_profile(hat);
 	kfree(name);
+	aa_put_profile(profile);
+	aa_put_profile(previous_profile);
 	put_cred(cred);
 
 	return error;