Merge 7b7028edf9 ("Merge tag 'memblock-v5.12-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/rppt/memblock") into android-mainline

Steps on the way to 5.12-rc1

Signed-off-by: Greg Kroah-Hartman <gregkh@google.com>
Change-Id: I306316c78fa6a301b3695e23f03e59a7e413cb60
This commit is contained in:
Greg Kroah-Hartman 2021-03-05 10:30:21 +01:00
commit aa21ac2d00
327 changed files with 7799 additions and 4841 deletions

View File

@ -683,7 +683,7 @@ Orran Krieger and Rusty Russell and Dipankar Sarma and Maneesh Soni"
,month="October"
,year="2001"
,note="Available:
\url{http://lkml.org/lkml/2001/10/13/105}
\url{https://lore.kernel.org/r/Pine.LNX.4.33.0110131015410.8707-100000@penguin.transmeta.com}
[Viewed August 21, 2004]"
,annotation={
}
@ -826,7 +826,7 @@ Symposium on Distributed Computing}
,month="October"
,year="2002"
,note="Available:
\url{https://lkml.org/lkml/2002/10/24/262}
\url{https://lore.kernel.org/r/3DB86B05.447E7410@us.ibm.com}
[Viewed February 15, 2014]"
,annotation={
Mingming Cao's patch to introduce RCU to SysV IPC.
@ -839,7 +839,7 @@ Symposium on Distributed Computing}
,month="March"
,year="2003"
,note="Available:
\url{http://lkml.org/lkml/2003/3/9/205}
\url{https://lore.kernel.org/r/Pine.LNX.4.44.0303091831560.2129-100000@home.transmeta.com}
[Viewed March 13, 2006]"
,annotation={
Linus suggests replacing brlock with RCU and/or seqlocks:
@ -1036,15 +1036,15 @@ Add per-cpu batch counter"
,annotation={
RCU runs reasonably on a 512-CPU SGI using Manfred Spraul's patches,
which may be found at:
https://lkml.org/lkml/2004/5/20/49 (split vars into cachelines)
https://lkml.org/lkml/2004/5/22/114 (cpu_quiet() patch)
https://lkml.org/lkml/2004/5/25/24 (0/5)
https://lkml.org/lkml/2004/5/25/23 (1/5)
https://lkml.org/lkml/2004/5/25/265 (works for Jack)
https://lkml.org/lkml/2004/5/25/20 (2/5)
https://lkml.org/lkml/2004/5/25/22 (3/5)
https://lkml.org/lkml/2004/5/25/19 (4/5)
https://lkml.org/lkml/2004/5/25/21 (5/5)
https://lore.kernel.org/r/40AC9823.6020709@colorfullife.com (split vars into cachelines)
https://lore.kernel.org/r/Pine.LNX.4.44.0405222141260.11106-100000@dbl.q-ag.de (cpu_quiet() patch)
https://lore.kernel.org/r/200405250535.i4P5ZJo8017583@dbl.q-ag.de (0/5)
https://lore.kernel.org/r/200405250535.i4P5ZKAQ017591@dbl.q-ag.de (1/5)
https://lore.kernel.org/r/20040525203215.GB5127@sgi.com (works for Jack)
https://lore.kernel.org/r/200405250535.i4P5ZLiR017599@dbl.q-ag.de (2/5)
https://lore.kernel.org/r/200405250535.i4P5ZMFt017607@dbl.q-ag.de (3/5)
https://lore.kernel.org/r/200405250535.i4P5ZN6g017615@dbl.q-ag.de (4/5)
https://lore.kernel.org/r/200405250535.i4P5ZO7I017623@dbl.q-ag.de (5/5)
}
}
@ -1106,7 +1106,7 @@ Oregon Health and Sciences University"
,month="August"
,year="2004"
,note="Available:
\url{http://lkml.org/lkml/2004/8/6/237}
\url{https://lore.kernel.org/r/20040807192424.GF3936@in.ibm.com}
[Viewed June 8, 2010]"
,annotation={
Introduce rcu_dereference().
@ -1119,7 +1119,7 @@ Oregon Health and Sciences University"
,month="August"
,year="2004"
,note="Available:
\url{http://lkml.org/lkml/2004/8/30/87}
\url{https://lore.kernel.org/r/1093873222.984.12.camel@new.localdomain}
[Viewed February 17, 2005]"
,annotation={
Uses active code in rcu_read_lock() and rcu_read_unlock() to
@ -1186,7 +1186,7 @@ Oregon Health and Sciences University"
,month="October"
,year="2004"
,note="Available:
\url{http://lkml.org/lkml/2004/10/23/241}
\url{https://lore.kernel.org/r/20041023202723.GA1930@us.ibm.com}
[Viewed June 8, 2010]"
,annotation={
Introduce rcu_assign_pointer().
@ -1203,7 +1203,7 @@ Oregon Health and Sciences University"
,annotation={
James Morris posts Kaigai Kohei's patch to LKML.
[Viewed December 10, 2004]
Kaigai's patch is at https://lkml.org/lkml/2004/9/27/52
Kaigai's patch is at https://lore.kernel.org/r/200409271057.i8RAvcA1007873@mailsv.bs1.fc.nec.co.jp
}
}
@ -1241,7 +1241,7 @@ Oregon Health and Sciences University"
,year="2005"
,day="17"
,note="Available:
\url{http://lkml.org/lkml/2005/3/17/199}
\url{https://lore.kernel.org/r/20050318002026.GA2693@us.ibm.com}
[Viewed September 5, 2005]"
,annotation={
First posting showing how RCU can be safely adapted for
@ -1256,7 +1256,7 @@ Oregon Health and Sciences University"
,year="2005"
,day="18"
,note="Available:
\url{http://lkml.org/lkml/2005/3/18/122}
\url{https://lore.kernel.org/r/Pine.OSF.4.05.10503181336310.2466-100000@da410.phys.au.dk}
[Viewed March 30, 2006]"
,annotation={
Esben Neilsen suggests read-side suppression of grace-period
@ -1302,7 +1302,7 @@ Data Structures"
,month="May"
,year="2005"
,note="Available:
\url{http://lkml.org/lkml/2005/5/9/185}
\url{https://lore.kernel.org/r/20050510012444.GA3011@us.ibm.com}
[Viewed May 13, 2005]"
,annotation={
First publication of working lock-based deferred free patches
@ -1385,7 +1385,7 @@ Data Structures"
,day="1"
,year="2005"
,note="Available:
\url{http://lkml.org/lkml/2005/8/1/155}
\url{https://lore.kernel.org/r/20050801171137.GA1754@us.ibm.com}
[Viewed March 14, 2006]"
,annotation={
First operating counter-based realtime RCU patch posted to LKML.
@ -1399,7 +1399,7 @@ Data Structures"
,day="8"
,year="2005"
,note="Available:
\url{http://lkml.org/lkml/2005/8/8/108}
\url{https://lore.kernel.org/r/20050808144216.GA1307@us.ibm.com}
[Viewed March 14, 2006]"
,annotation={
First operating counter-based realtime RCU patch posted to LKML,
@ -1415,7 +1415,7 @@ Data Structures"
,day="1"
,year="2005"
,note="Available:
\url{http://lkml.org/lkml/2005/10/1/70}
\url{https://lore.kernel.org/r/20051001182056.GA1613@us.ibm.com}
[Viewed March 14, 2006]"
,annotation={
First rcutorture patch.
@ -1429,7 +1429,7 @@ Data Structures"
,day="6"
,year="2006"
,note="Available:
\url{https://lkml.org/lkml/2006/1/7/22}
\url{https://lore.kernel.org/r/20060106.231054.43576567.davem@davemloft.net}
[Viewed February 29, 2012]"
,annotation={
David Miller's view on hashed arrays of locks: used to really
@ -1464,7 +1464,7 @@ Distributed Processing Symposium"
,day="20"
,year="2006"
,note="Available:
\url{http://lkml.org/lkml/2006/6/20/238}
\url{https://lore.kernel.org/r/20060408134707.22479.33814.sendpatchset@linux.site}
[Viewed March 25, 2008]"
,annotation={
RCU-protected radix tree.
@ -1554,7 +1554,7 @@ Revised:
,day="28"
,year="2006"
,note="Available:
\url{http://lkml.org/lkml/2006/9/28/160}
\url{https://lore.kernel.org/r/20060928142616.GA20185@infradead.org}
[Viewed March 27, 2008]"
}
@ -1593,7 +1593,7 @@ Revised:
,year="2006"
,day=26
,note="Available:
\url{http://lkml.org/lkml/2006/10/26/73}
\url{https://lore.kernel.org/r/20061026105731.GE11803@in.ibm.com}
[Viewed January 26, 2009]"
,annotation={
RCU-based reader-writer lock that allows readers to proceed with
@ -1612,12 +1612,12 @@ Revised:
,year="2006"
,day=17
,note="Available:
\url{http://lkml.org/lkml/2006/11/17/56}
\url{https://lore.kernel.org/r/20061117092925.GT7164@kernel.dk}
[Viewed May 28, 2007]"
,annotation={
SRCU's grace periods are too slow for Jens, even after a
factor-of-three speedup.
Sped-up version of SRCU at http://lkml.org/lkml/2006/11/17/359.
Sped-up version of SRCU at https://lore.kernel.org/r/20061118002845.GF2632@us.ibm.com.
}
}
@ -1629,7 +1629,7 @@ Revised:
,year="2006"
,day=19
,note="Available:
\url{http://lkml.org/lkml/2006/11/19/69}
\url{https://lore.kernel.org/r/20061119190027.GA3676@oleg}
[Viewed May 28, 2007]"
,annotation={
First cut of QRCU. Expanded/corrected versions followed.
@ -1644,7 +1644,7 @@ Revised:
,year="2006"
,day=30
,note="Available:
\url{http://lkml.org/lkml/2006/11/29/330}
\url{https://lore.kernel.org/r/20061130015714.GC1350@oleg}
[Viewed November 26, 2008]"
,annotation={
Expanded/corrected version of QRCU.
@ -1709,7 +1709,7 @@ Revised:
,year="2007"
,day=3
,note="Available:
\url{http://lkml.org/lkml/2007/1/3/112}
\url{https://lore.kernel.org/r/20070103152738.GA16063@localdomain}
[Viewed May 28, 2007]"
,annotation={
Patch for list_splice_rcu().
@ -1737,7 +1737,7 @@ Revised:
,year="2007"
,day=28
,note="Available:
\url{http://lkml.org/lkml/2007/1/28/34}
\url{https://lore.kernel.org/r/20070128120509.719287000@programming.kicks-ass.net}
[Viewed March 27, 2008]"
,annotation={
RCU-like implementation for frequent updaters and rare readers(!).
@ -1767,7 +1767,7 @@ Revised:
,year="2007"
,day=24
,note="Available:
\url{http://lkml.org/lkml/2007/2/25/18}
\url{https://lore.kernel.org/r/20070225062349.GA17468@linux.vnet.ibm.com}
[Viewed March 27, 2008]"
,annotation={
Patch for QRCU supplying lock-free fast path.
@ -1846,7 +1846,7 @@ Revised:
,annotation={
LWN article describing Promela and spin, and also using Oleg
Nesterov's QRCU as an example (with Paul McKenney's fastpath).
Merged patch at: http://lkml.org/lkml/2007/2/25/18
Merged patch at: https://lore.kernel.org/r/20070225062349.GA17468@linux.vnet.ibm.com
}
}
@ -1885,7 +1885,7 @@ Revised:
,day="10"
,year="2007"
,note="Available:
\url{http://lkml.org/lkml/2007/9/10/213}
\url{https://lore.kernel.org/r/20070910183004.GA3299@linux.vnet.ibm.com}
[Viewed October 25, 2007]"
,annotation={
Final patch for preemptable RCU to -rt. (Later patches were
@ -1933,7 +1933,7 @@ Revised:
,day="20"
,year="2007"
,note="Available:
\url{http://lkml.org/lkml/2007/12/20/244}
\url{https://lore.kernel.org/r/20071220142540.GB22523@Krystal}
[Viewed March 27, 2008]"
,annotation={
Request for call_rcu_sched() and rcu_barrier_sched().
@ -2013,7 +2013,7 @@ Revised:
,day="29"
,year="2008"
,note="Available:
\url{http://lkml.org/lkml/2008/1/29/208}
\url{https://lore.kernel.org/r/Pine.LNX.4.58.0801291113350.20371@gandalf.stny.rr.com}
[Viewed March 27, 2008]"
,annotation={
Patch that prevents preemptible RCU from unnecessarily waking
@ -2028,7 +2028,7 @@ Revised:
,day="1"
,year="2008"
,note="Available:
\url{http://lkml.org/lkml/2008/2/2/255}
\url{https://lore.kernel.org/r/20080202214124.GA28612@linux.vnet.ibm.com}
[Viewed October 18, 2008]"
,annotation={
Explanation of compilers violating dependency ordering.
@ -2088,7 +2088,7 @@ lot of {Linux} into your technology!!!"
,day="3"
,year="2008"
,note="Available:
\url{http://lkml.org/lkml/2008/6/2/539}
\url{https://lore.kernel.org/r/4844BE83.5010401@cn.fujitsu.com}
[Viewed December 10, 2008]"
,annotation={
Updated RCU classic algorithm. Introduced multi-tailed list
@ -2122,7 +2122,7 @@ lot of {Linux} into your technology!!!"
,day="21"
,year="2008"
,note="Available:
\url{http://lkml.org/lkml/2008/8/21/336}
\url{https://lore.kernel.org/r/48AD8969.7060900@colorfullife.com}
[Viewed December 8, 2008]"
,annotation={
State-based RCU. One key thing that this patch does is to
@ -2137,7 +2137,7 @@ lot of {Linux} into your technology!!!"
,day="6"
,year="2008"
,note="Available:
\url{http://lkml.org/lkml/2008/9/6/86}
\url{https://lore.kernel.org/r/48C2B1D2.5070801@colorfullife.com}
[Viewed December 8, 2008]"
,annotation={
Manfred notes a fix required to my attempt to separate irq
@ -2183,7 +2183,7 @@ lot of {Linux} into your technology!!!"
,day="14"
,year="2009"
,note="Available:
\url{http://lkml.org/lkml/2009/1/14/449}
\url{https://lore.kernel.org/r/20090114202044.GJ6734@linux.vnet.ibm.com}
[Viewed January 15, 2009]"
,annotation={
Small-footprint implementation of RCU for uniprocessor
@ -2218,7 +2218,7 @@ lot of {Linux} into your technology!!!"
git://lttng.org/userspace-rcu.git
http://lttng.org/cgi-bin/gitweb.cgi?p=userspace-rcu.git
http://lttng.org/urcu
http://lkml.org/lkml/2009/2/5/572
https://lore.kernel.org/r/20090206030543.GB8560@Krystal
}
}
@ -2258,7 +2258,7 @@ lot of {Linux} into your technology!!!"
,day="25"
,year="2009"
,note="Available:
\url{http://lkml.org/lkml/2009/6/25/306}
\url{https://lore.kernel.org/r/20090625160706.GA9467@linux.vnet.ibm.com}
[Viewed August 16, 2009]"
,annotation={
First posting of expedited RCU to be accepted into -tip.
@ -2272,7 +2272,7 @@ lot of {Linux} into your technology!!!"
,day="23"
,year="2009"
,note="Available:
\url{http://lkml.org/lkml/2009/7/23/294}
\url{https://lore.kernel.org/r/20090724001429.GA17374@linux.vnet.ibm.com}
[Viewed August 15, 2009]"
,annotation={
First posting of simple and fast preemptable RCU.
@ -2350,7 +2350,7 @@ lot of {Linux} into your technology!!!"
,month="December"
,year="2009"
,note="Available:
\url{http://lkml.org/lkml/2009/10/18/129}
\url{https://lore.kernel.org/r/20091018232918.GA7385@Krystal}
[Viewed December 29, 2009]"
,annotation={
Mathieu proposed defer_rcu() with fixed-size per-thread pool
@ -2518,7 +2518,7 @@ lot of {Linux} into your technology!!!"
,month="January"
,year="2011"
,note="Available:
\url{https://lkml.org/lkml/2011/1/18/322}
\url{https://lore.kernel.org/r/AANLkTimajU0x1v6y3rH2+jr-bZ=tNLs1S_agXdGGAa3S@mail.gmail.com}
[Viewed March 4, 2011]"
,annotation={
"The RCU-based name lookup is at the other end of the spectrum - the

View File

@ -3,8 +3,8 @@ Control Groupstats
==================
Control Groupstats is inspired by the discussion at
http://lkml.org/lkml/2007/4/11/187 and implements per cgroup statistics as
suggested by Andrew Morton in http://lkml.org/lkml/2007/4/11/263.
https://lore.kernel.org/r/461CF883.2030308@sw.ru and implements per cgroup statistics as
suggested by Andrew Morton in https://lore.kernel.org/r/20070411114927.1277d7c9.akpm@linux-foundation.org.
Per cgroup statistics infrastructure re-uses code from the taskstats
interface. A new set of cgroup operations are registered with commands

View File

@ -226,10 +226,11 @@ Configuring the kernel
all module options to built in (=y) options. You can
also preserve modules by LMC_KEEP.
"make kvmconfig" Enable additional options for kvm guest kernel support.
"make kvm_guest.config" Enable additional options for kvm guest kernel
support.
"make xenconfig" Enable additional options for xen dom0 guest kernel
support.
"make xen.config" Enable additional options for xen dom0 guest kernel
support.
"make tinyconfig" Configure the tiniest possible kernel.

View File

@ -963,21 +963,21 @@ References
2. Singh, Balbir. Memory Controller (RSS Control),
http://lwn.net/Articles/222762/
3. Emelianov, Pavel. Resource controllers based on process cgroups
http://lkml.org/lkml/2007/3/6/198
https://lore.kernel.org/r/45ED7DEC.7010403@sw.ru
4. Emelianov, Pavel. RSS controller based on process cgroups (v2)
http://lkml.org/lkml/2007/4/9/78
https://lore.kernel.org/r/461A3010.90403@sw.ru
5. Emelianov, Pavel. RSS controller based on process cgroups (v3)
http://lkml.org/lkml/2007/5/30/244
https://lore.kernel.org/r/465D9739.8070209@openvz.org
6. Menage, Paul. Control Groups v10, http://lwn.net/Articles/236032/
7. Vaidyanathan, Srinivasan, Control Groups: Pagecache accounting and control
subsystem (v3), http://lwn.net/Articles/235534/
8. Singh, Balbir. RSS controller v2 test results (lmbench),
http://lkml.org/lkml/2007/5/17/232
https://lore.kernel.org/r/464C95D4.7070806@linux.vnet.ibm.com
9. Singh, Balbir. RSS controller v2 AIM9 results
http://lkml.org/lkml/2007/5/18/1
https://lore.kernel.org/r/464D267A.50107@linux.vnet.ibm.com
10. Singh, Balbir. Memory controller v6 test results,
http://lkml.org/lkml/2007/8/19/36
https://lore.kernel.org/r/20070819094658.654.84837.sendpatchset@balbir-laptop
11. Singh, Balbir. Memory controller introduction (v6),
http://lkml.org/lkml/2007/8/17/69
https://lore.kernel.org/r/20070817084228.26003.12568.sendpatchset@balbir-laptop
12. Corbet, Jonathan, Controlling memory use in cgroups,
http://lwn.net/Articles/243795/

View File

@ -1,3 +1,5 @@
.. _cgroup-v2:
================
Control Group v2
================
@ -172,7 +174,6 @@ disabling controllers in v1 and make them always available in v2.
cgroup v2 currently supports the following mount options.
nsdelegate
Consider cgroup namespaces as delegation boundaries. This
option is system wide and can only be set on mount or modified
through remount from the init namespace. The mount option is
@ -180,7 +181,6 @@ cgroup v2 currently supports the following mount options.
Delegation section for details.
memory_localevents
Only populate memory.events with data for the current cgroup,
and not any subtrees. This is legacy behaviour, the default
behaviour without this option is to include subtree counts.
@ -189,7 +189,6 @@ cgroup v2 currently supports the following mount options.
option is ignored on non-init namespace mounts.
memory_recursiveprot
Recursively apply memory.min and memory.low protection to
entire subtrees, without requiring explicit downward
propagation into leaf cgroups. This allows protecting entire
@ -786,7 +785,6 @@ Core Interface Files
All cgroup core files are prefixed with "cgroup."
cgroup.type
A read-write single value file which exists on non-root
cgroups.
@ -954,6 +952,8 @@ All cgroup core files are prefixed with "cgroup."
Controllers
===========
.. _cgroup-v2-cpu:
CPU
---
@ -1259,9 +1259,9 @@ PAGE_SIZE multiple when read back.
can show up in the middle. Don't rely on items remaining in a
fixed position; use the keys to look up specific values!
If the entry has no per-node counter(or not show in the
mempry.numa_stat). We use 'npn'(non-per-node) as the tag
to indicate that it will not show in the mempry.numa_stat.
If the entry has no per-node counter (or not show in the
memory.numa_stat). We use 'npn' (non-per-node) as the tag
to indicate that it will not show in the memory.numa_stat.
anon
Amount of memory used in anonymous mappings such as
@ -1277,11 +1277,11 @@ PAGE_SIZE multiple when read back.
pagetables
Amount of memory allocated for page tables.
percpu(npn)
percpu (npn)
Amount of memory used for storing per-cpu kernel
data structures.
sock(npn)
sock (npn)
Amount of memory used in network transmission buffers
shmem
@ -1329,7 +1329,7 @@ PAGE_SIZE multiple when read back.
Part of "slab" that cannot be reclaimed on memory
pressure.
slab(npn)
slab (npn)
Amount of memory used for storing in-kernel data
structures.
@ -1357,39 +1357,39 @@ PAGE_SIZE multiple when read back.
workingset_nodereclaim
Number of times a shadow node has been reclaimed
pgfault(npn)
pgfault (npn)
Total number of page faults incurred
pgmajfault(npn)
pgmajfault (npn)
Number of major page faults incurred
pgrefill(npn)
pgrefill (npn)
Amount of scanned pages (in an active LRU list)
pgscan(npn)
pgscan (npn)
Amount of scanned pages (in an inactive LRU list)
pgsteal(npn)
pgsteal (npn)
Amount of reclaimed pages
pgactivate(npn)
pgactivate (npn)
Amount of pages moved to the active LRU list
pgdeactivate(npn)
pgdeactivate (npn)
Amount of pages moved to the inactive LRU list
pglazyfree(npn)
pglazyfree (npn)
Amount of pages postponed to be freed under memory pressure
pglazyfreed(npn)
pglazyfreed (npn)
Amount of reclaimed lazyfree pages
thp_fault_alloc(npn)
thp_fault_alloc (npn)
Number of transparent hugepages which were allocated to satisfy
a page fault. This counter is not present when CONFIG_TRANSPARENT_HUGEPAGE
is not set.
thp_collapse_alloc(npn)
thp_collapse_alloc (npn)
Number of transparent hugepages which were allocated to allow
collapsing an existing range of pages. This counter is not
present when CONFIG_TRANSPARENT_HUGEPAGE is not set.
@ -1558,7 +1558,7 @@ IO Interface Files
8:0 rbytes=90430464 wbytes=299008000 rios=8950 wios=1252 dbytes=50331648 dios=3021
io.cost.qos
A read-write nested-keyed file with exists only on the root
A read-write nested-keyed file which exists only on the root
cgroup.
This file configures the Quality of Service of the IO cost
@ -1613,7 +1613,7 @@ IO Interface Files
automatic mode can be restored by setting "ctrl" to "auto".
io.cost.model
A read-write nested-keyed file with exists only on the root
A read-write nested-keyed file which exists only on the root
cgroup.
This file configures the cost model of the IO cost model based
@ -2000,10 +2000,12 @@ Cpuset Interface Files
cpuset-enabled cgroups. This flag is owned by the parent cgroup
and is not delegatable.
It accepts only the following input values when written to.
It accepts only the following input values when written to.
"root" - a partition root
"member" - a non-root member of a partition
======== ================================
"root" a partition root
"member" a non-root member of a partition
======== ================================
When set to be a partition root, the current cgroup is the
root of a new partition or scheduling domain that comprises
@ -2044,9 +2046,11 @@ Cpuset Interface Files
root to change. On read, the "cpuset.sched.partition" file
can show the following values.
"member" Non-root member of a partition
"root" Partition root
"root invalid" Invalid partition root
============== ==============================
"member" Non-root member of a partition
"root" Partition root
"root invalid" Invalid partition root
============== ==============================
It is a partition root if the first 2 partition root conditions
above are true and at least one CPU from "cpuset.cpus" is
@ -2219,7 +2223,7 @@ Without cgroup namespace, the "/proc/$PID/cgroup" file shows the
complete path of the cgroup of a process. In a container setup where
a set of cgroups and namespaces are intended to isolate processes the
"/proc/$PID/cgroup" file may leak potential system level information
to the isolated processes. For Example::
to the isolated processes. For example::
# cat /proc/self/cgroup
0::/batchjobs/container_id1

View File

@ -107,7 +107,7 @@ will lead to quite erratic information inside ``/proc/stat``::
References
----------
- http://lkml.org/lkml/2007/2/12/6
- https://lore.kernel.org/r/loom.20070212T063225-663@post.gmane.org
- Documentation/filesystems/proc.rst (1.8)

View File

@ -60,7 +60,7 @@ Note that for the special case of a range one can split the range into equal
sized groups and for each group use some amount from the beginning of that
group:
<cpu number>-cpu number>:<used size>/<group size>
<cpu number>-<cpu number>:<used size>/<group size>
For example one can add to the command line following parameter:

View File

@ -606,7 +606,7 @@
kernel/dma/contiguous.c
cma_pernuma=nn[MG]
[ARM64,KNL]
[ARM64,KNL,CMA]
Sets the size of kernel per-numa memory area for
contiguous memory allocations. A value of 0 disables
per-numa CMA altogether. And If this option is not
@ -1525,12 +1525,12 @@
hpet_mmap= [X86, HPET_MMAP] Allow userspace to mmap HPET
registers. Default set by CONFIG_HPET_MMAP_DEFAULT.
hugetlb_cma= [HW] The size of a cma area used for allocation
hugetlb_cma= [HW,CMA] The size of a CMA area used for allocation
of gigantic hugepages.
Format: nn[KMGTPE]
Reserve a cma area of given size and allocate gigantic
hugepages using the cma allocator. If enabled, the
Reserve a CMA area of given size and allocate gigantic
hugepages using the CMA allocator. If enabled, the
boot-time allocation of gigantic hugepages is skipped.
hugepages= [HW] Number of HugeTLB pages to allocate at boot.
@ -3277,9 +3277,14 @@
parameter, xsave area per process might occupy more
memory on xsaves enabled systems.
nohlt [BUGS=ARM,SH] Tells the kernel that the sleep(SH) or
wfi(ARM) instruction doesn't work correctly and not to
use it. This is also useful when using JTAG debugger.
nohlt [ARM,ARM64,MICROBLAZE,SH] Forces the kernel to busy wait
in do_idle() and not use the arch_cpu_idle()
implementation; requires CONFIG_GENERIC_IDLE_POLL_SETUP
to be effective. This is useful on platforms where the
sleep(SH) or wfi(ARM,ARM64) instructions do not work
correctly or when doing power measurements to evalute
the impact of the sleep instructions. This is also
useful when using JTAG debugger.
no_file_caps Tells the kernel not to honor file capabilities. The
only way then for a file to be executed with privilege
@ -3292,6 +3297,21 @@
in certain environments such as networked servers or
real-time systems.
no_hash_pointers
Force pointers printed to the console or buffers to be
unhashed. By default, when a pointer is printed via %p
format string, that pointer is "hashed", i.e. obscured
by hashing the pointer value. This is a security feature
that hides actual kernel addresses from unprivileged
users, but it also makes debugging the kernel more
difficult since unequal pointers can no longer be
compared. However, if this command-line option is
specified, then all normal pointers will have their true
value printed. Pointers printed via %pK may still be
hashed. This option should only be specified when
debugging the kernel. Please do not use on production
kernels.
nohibernate [HIBERNATION] Disable hibernation and resume.
nohz= [KNL] Boottime enable/disable dynamic ticks

View File

@ -273,7 +273,7 @@ To reduce its OS jitter, do any of the following:
However, there is an RFC patch from Christoph Lameter
(based on an earlier one from Gilad Ben-Yossef) that
reduces or even eliminates vmstat overhead for some
workloads at https://lkml.org/lkml/2013/9/4/379.
workloads at https://lore.kernel.org/r/00000140e9dfd6bd-40db3d4f-c1be-434f-8132-7820f81bb586-000000@email.amazonses.com.
e. If running on high-end powerpc servers, build with
CONFIG_PPC_RTAS_DAEMON=n. This prevents the RTAS
daemon from running on each CPU every second or so.

View File

@ -72,7 +72,7 @@ monitoring and observability operations, thus, bypass *scope* permissions
checks in the kernel. CAP_PERFMON implements the principle of least
privilege [13]_ (POSIX 1003.1e: 2.2.2.39) for performance monitoring and
observability operations in the kernel and provides a secure approach to
perfomance monitoring and observability in the system.
performance monitoring and observability in the system.
For backward compatibility reasons the access to perf_events monitoring and
observability operations is also open for CAP_SYS_ADMIN privileged

View File

@ -380,5 +380,5 @@ This configuration option sets the maximum number of "watches" that are
allowed for each user.
Each "watch" costs roughly 90 bytes on a 32bit kernel, and roughly 160 bytes
on a 64bit one.
The current default value for max_user_watches is the 1/32 of the available
low memory, divided for the "watch" cost in bytes.
The current default value for max_user_watches is the 1/25 (4%) of the
available low memory, divided for the "watch" cost in bytes.

View File

@ -128,7 +128,7 @@ it. The recommended placement is in the first 16KiB of RAM.
The boot loader must load a device tree image (dtb) into system ram
at a 64bit aligned address and initialize it with the boot data. The
dtb format is documented in Documentation/devicetree/booting-without-of.rst.
dtb format is documented at https://www.devicetree.org/specifications/.
The kernel will look for the dtb magic value of 0xd00dfeed at the dtb
physical address to determine if a dtb has been passed instead of a
tagged list.

View File

@ -33,7 +33,7 @@ SoC-specific documents
ixp4xx
marvel
marvell
microchip
netwinder

View File

@ -127,7 +127,7 @@ EBU Armada family
- 88F6828 Armada 388
- Product infos: http://www.marvell.com/embedded-processors/armada-38x/
- Functional Spec: https://marvellcorp.wufoo.com/forms/marvell-armada-38x-functional-specifications/
- Functional Spec: http://www.marvell.com/content/dam/marvell/en/public-collateral/embedded-processors/marvell-embedded-processors-armada-38x-functional-specifications-2015-11.pdf
Core:
ARM Cortex-A9
@ -183,7 +183,10 @@ EBU Armada family ARMv8
http://www.marvell.com/embedded-processors/armada-3700/
Product Brief:
http://www.marvell.com/embedded-processors/assets/PB-88F3700-FNL.pdf
http://www.marvell.com/content/dam/marvell/en/public-collateral/embedded-processors/marvell-embedded-processors-armada-37xx-product-brief-2016-01.pdf
Hardware Spec:
http://www.marvell.com/content/dam/marvell/en/public-collateral/embedded-processors/marvell-embedded-processors-armada-37xx-hardware-specifications-2019-09.pdf
Device tree files:
arch/arm64/boot/dts/marvell/armada-37*

View File

@ -31,7 +31,7 @@ from load_config import loadConfig
# -- General configuration ------------------------------------------------
# If your documentation needs a minimal Sphinx version, state it here.
needs_sphinx = '1.3'
needs_sphinx = '1.7'
# Add any Sphinx extension module names here, as strings. They can be
# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
@ -112,19 +112,12 @@ if major >= 3:
else:
extensions.append('cdomain')
if major == 1 and minor < 7:
sys.stderr.write('WARNING: Sphinx 1.7 or greater will be required as of '
'the 5.12 release\n')
# Ensure that autosectionlabel will produce unique names
autosectionlabel_prefix_document = True
autosectionlabel_maxdepth = 2
# The name of the math extension changed on Sphinx 1.4
if (major == 1 and minor > 3) or (major > 1):
extensions.append("sphinx.ext.imgmath")
else:
extensions.append("sphinx.ext.pngmath")
extensions.append("sphinx.ext.imgmath")
# Add any paths that contain templates here, relative to this directory.
templates_path = ['_templates']
@ -375,71 +368,9 @@ if cjk_cmd.find("Noto Sans CJK SC") >= 0:
'''
# Fix reference escape troubles with Sphinx 1.4.x
if major == 1 and minor > 3:
if major == 1:
latex_elements['preamble'] += '\\renewcommand*{\\DUrole}[2]{ #2 }\n'
if major == 1 and minor <= 4:
latex_elements['preamble'] += '\\usepackage[margin=0.5in, top=1in, bottom=1in]{geometry}'
elif major == 1 and (minor > 5 or (minor == 5 and patch >= 3)):
latex_elements['sphinxsetup'] = 'hmargin=0.5in, vmargin=1in'
latex_elements['preamble'] += '\\fvset{fontsize=auto}\n'
# Customize notice background colors on Sphinx < 1.6:
if major == 1 and minor < 6:
latex_elements['preamble'] += '''
\\usepackage{ifthen}
% Put notes in color and let them be inside a table
\\definecolor{NoteColor}{RGB}{204,255,255}
\\definecolor{WarningColor}{RGB}{255,204,204}
\\definecolor{AttentionColor}{RGB}{255,255,204}
\\definecolor{ImportantColor}{RGB}{192,255,204}
\\definecolor{OtherColor}{RGB}{204,204,204}
\\newlength{\\mynoticelength}
\\makeatletter\\newenvironment{coloredbox}[1]{%
\\setlength{\\fboxrule}{1pt}
\\setlength{\\fboxsep}{7pt}
\\setlength{\\mynoticelength}{\\linewidth}
\\addtolength{\\mynoticelength}{-2\\fboxsep}
\\addtolength{\\mynoticelength}{-2\\fboxrule}
\\begin{lrbox}{\\@tempboxa}\\begin{minipage}{\\mynoticelength}}{\\end{minipage}\\end{lrbox}%
\\ifthenelse%
{\\equal{\\py@noticetype}{note}}%
{\\colorbox{NoteColor}{\\usebox{\\@tempboxa}}}%
{%
\\ifthenelse%
{\\equal{\\py@noticetype}{warning}}%
{\\colorbox{WarningColor}{\\usebox{\\@tempboxa}}}%
{%
\\ifthenelse%
{\\equal{\\py@noticetype}{attention}}%
{\\colorbox{AttentionColor}{\\usebox{\\@tempboxa}}}%
{%
\\ifthenelse%
{\\equal{\\py@noticetype}{important}}%
{\\colorbox{ImportantColor}{\\usebox{\\@tempboxa}}}%
{\\colorbox{OtherColor}{\\usebox{\\@tempboxa}}}%
}%
}%
}%
}\\makeatother
\\makeatletter
\\renewenvironment{notice}[2]{%
\\def\\py@noticetype{#1}
\\begin{coloredbox}{#1}
\\bf\\it
\\par\\strong{#2}
\\csname py@noticestart@#1\\endcsname
}
{
\\csname py@noticeend@\\py@noticetype\\endcsname
\\end{coloredbox}
}
\\makeatother
'''
# With Sphinx 1.6, it is possible to change the Bg color directly
# by using:
# \definecolor{sphinxnoteBgColor}{RGB}{204,255,255}

View File

@ -13,6 +13,7 @@ KUnit - Unit Testing for the Linux Kernel
api/index
style
faq
tips
What is KUnit?
==============
@ -88,6 +89,7 @@ How do I use it?
================
* :doc:`start` - for new users of KUnit
* :doc:`tips` - for short examples of best practices
* :doc:`usage` - for a more detailed explanation of KUnit features
* :doc:`api/index` - for the list of KUnit APIs used for testing
* :doc:`kunit-tool` - for more information on the kunit_tool helper script

View File

@ -196,8 +196,9 @@ Now add the following to ``drivers/misc/Kconfig``:
.. code-block:: kconfig
config MISC_EXAMPLE_TEST
bool "Test for my example"
tristate "Test for my example" if !KUNIT_ALL_TESTS
depends on MISC_EXAMPLE && KUNIT=y
default KUNIT_ALL_TESTS
and the following to ``drivers/misc/Makefile``:
@ -233,5 +234,7 @@ Congrats! You just wrote your first KUnit test!
Next Steps
==========
* Check out the :doc:`usage` page for a more
* Check out the :doc:`tips` page for tips on
writing idiomatic KUnit tests.
* Optional: see the :doc:`usage` page for a more
in-depth explanation of KUnit.

View File

@ -0,0 +1,115 @@
.. SPDX-License-Identifier: GPL-2.0
============================
Tips For Writing KUnit Tests
============================
Exiting early on failed expectations
------------------------------------
``KUNIT_EXPECT_EQ`` and friends will mark the test as failed and continue
execution. In some cases, it's unsafe to continue and you can use the
``KUNIT_ASSERT`` variant to exit on failure.
.. code-block:: c
void example_test_user_alloc_function(struct kunit *test)
{
void *object = alloc_some_object_for_me();
/* Make sure we got a valid pointer back. */
KUNIT_ASSERT_NOT_ERR_OR_NULL(test, object);
do_something_with_object(object);
}
Allocating memory
-----------------
Where you would use ``kzalloc``, you should prefer ``kunit_kzalloc`` instead.
KUnit will ensure the memory is freed once the test completes.
This is particularly useful since it lets you use the ``KUNIT_ASSERT_EQ``
macros to exit early from a test without having to worry about remembering to
call ``kfree``.
Example:
.. code-block:: c
void example_test_allocation(struct kunit *test)
{
char *buffer = kunit_kzalloc(test, 16, GFP_KERNEL);
/* Ensure allocation succeeded. */
KUNIT_ASSERT_NOT_ERR_OR_NULL(test, buffer);
KUNIT_ASSERT_STREQ(test, buffer, "");
}
Testing static functions
------------------------
If you don't want to expose functions or variables just for testing, one option
is to conditionally ``#include`` the test file at the end of your .c file, e.g.
.. code-block:: c
/* In my_file.c */
static int do_interesting_thing();
#ifdef CONFIG_MY_KUNIT_TEST
#include "my_kunit_test.c"
#endif
Injecting test-only code
------------------------
Similarly to the above, it can be useful to add test-specific logic.
.. code-block:: c
/* In my_file.h */
#ifdef CONFIG_MY_KUNIT_TEST
/* Defined in my_kunit_test.c */
void test_only_hook(void);
#else
void test_only_hook(void) { }
#endif
TODO(dlatypov@google.com): add an example of using ``current->kunit_test`` in
such a hook when it's not only updated for ``CONFIG_KASAN=y``.
Customizing error messages
--------------------------
Each of the ``KUNIT_EXPECT`` and ``KUNIT_ASSERT`` macros have a ``_MSG`` variant.
These take a format string and arguments to provide additional context to the automatically generated error messages.
.. code-block:: c
char some_str[41];
generate_sha1_hex_string(some_str);
/* Before. Not easy to tell why the test failed. */
KUNIT_EXPECT_EQ(test, strlen(some_str), 40);
/* After. Now we see the offending string. */
KUNIT_EXPECT_EQ_MSG(test, strlen(some_str), 40, "some_str='%s'", some_str);
Alternatively, one can take full control over the error message by using ``KUNIT_FAIL()``, e.g.
.. code-block:: c
/* Before */
KUNIT_EXPECT_EQ(test, some_setup_function(), 0);
/* After: full control over the failure message. */
if (some_setup_function())
KUNIT_FAIL(test, "Failed to setup thing for testing");
Next Steps
==========
* Optional: see the :doc:`usage` page for a more
in-depth explanation of KUnit.

View File

@ -34,9 +34,11 @@ properties:
items:
- enum:
- qcom,sc7180-smmu-500
- qcom,sc8180x-smmu-500
- qcom,sdm845-smmu-500
- qcom,sm8150-smmu-500
- qcom,sm8250-smmu-500
- qcom,sm8350-smmu-500
- const: arm,mmu-500
- description: Qcom Adreno GPUs implementing "arm,smmu-v2"
items:

View File

@ -1,105 +0,0 @@
* Mediatek IOMMU Architecture Implementation
Some Mediatek SOCs contain a Multimedia Memory Management Unit (M4U), and
this M4U have two generations of HW architecture. Generation one uses flat
pagetable, and only supports 4K size page mapping. Generation two uses the
ARM Short-Descriptor translation table format for address translation.
About the M4U Hardware Block Diagram, please check below:
EMI (External Memory Interface)
|
m4u (Multimedia Memory Management Unit)
|
+--------+
| |
gals0-rx gals1-rx (Global Async Local Sync rx)
| |
| |
gals0-tx gals1-tx (Global Async Local Sync tx)
| | Some SoCs may have GALS.
+--------+
|
SMI Common(Smart Multimedia Interface Common)
|
+----------------+-------
| |
| gals-rx There may be GALS in some larbs.
| |
| |
| gals-tx
| |
SMI larb0 SMI larb1 ... SoCs have several SMI local arbiter(larb).
(display) (vdec)
| |
| |
+-----+-----+ +----+----+
| | | | | |
| | |... | | | ... There are different ports in each larb.
| | | | | |
OVL0 RDMA0 WDMA0 MC PP VLD
As above, The Multimedia HW will go through SMI and M4U while it
access EMI. SMI is a bridge between m4u and the Multimedia HW. It contain
smi local arbiter and smi common. It will control whether the Multimedia
HW should go though the m4u for translation or bypass it and talk
directly with EMI. And also SMI help control the power domain and clocks for
each local arbiter.
Normally we specify a local arbiter(larb) for each multimedia HW
like display, video decode, and camera. And there are different ports
in each larb. Take a example, There are many ports like MC, PP, VLD in the
video decode local arbiter, all these ports are according to the video HW.
In some SoCs, there may be a GALS(Global Async Local Sync) module between
smi-common and m4u, and additional GALS module between smi-larb and
smi-common. GALS can been seen as a "asynchronous fifo" which could help
synchronize for the modules in different clock frequency.
Required properties:
- compatible : must be one of the following string:
"mediatek,mt2701-m4u" for mt2701 which uses generation one m4u HW.
"mediatek,mt2712-m4u" for mt2712 which uses generation two m4u HW.
"mediatek,mt6779-m4u" for mt6779 which uses generation two m4u HW.
"mediatek,mt7623-m4u", "mediatek,mt2701-m4u" for mt7623 which uses
generation one m4u HW.
"mediatek,mt8167-m4u" for mt8167 which uses generation two m4u HW.
"mediatek,mt8173-m4u" for mt8173 which uses generation two m4u HW.
"mediatek,mt8183-m4u" for mt8183 which uses generation two m4u HW.
- reg : m4u register base and size.
- interrupts : the interrupt of m4u.
- clocks : must contain one entry for each clock-names.
- clock-names : Only 1 optional clock:
- "bclk": the block clock of m4u.
Here is the list which require this "bclk":
- mt2701, mt2712, mt7623 and mt8173.
Note that m4u use the EMI clock which always has been enabled before kernel
if there is no this "bclk".
- mediatek,larbs : List of phandle to the local arbiters in the current Socs.
Refer to bindings/memory-controllers/mediatek,smi-larb.txt. It must sort
according to the local arbiter index, like larb0, larb1, larb2...
- iommu-cells : must be 1. This is the mtk_m4u_id according to the HW.
Specifies the mtk_m4u_id as defined in
dt-binding/memory/mt2701-larb-port.h for mt2701, mt7623
dt-binding/memory/mt2712-larb-port.h for mt2712,
dt-binding/memory/mt6779-larb-port.h for mt6779,
dt-binding/memory/mt8167-larb-port.h for mt8167,
dt-binding/memory/mt8173-larb-port.h for mt8173, and
dt-binding/memory/mt8183-larb-port.h for mt8183.
Example:
iommu: iommu@10205000 {
compatible = "mediatek,mt8173-m4u";
reg = <0 0x10205000 0 0x1000>;
interrupts = <GIC_SPI 139 IRQ_TYPE_LEVEL_LOW>;
clocks = <&infracfg CLK_INFRA_M4U>;
clock-names = "bclk";
mediatek,larbs = <&larb0 &larb1 &larb2 &larb3 &larb4 &larb5>;
#iommu-cells = <1>;
};
Example for a client device:
display {
compatible = "mediatek,mt8173-disp";
iommus = <&iommu M4U_PORT_DISP_OVL0>,
<&iommu M4U_PORT_DISP_RDMA0>;
...
};

View File

@ -0,0 +1,183 @@
# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
%YAML 1.2
---
$id: http://devicetree.org/schemas/iommu/mediatek,iommu.yaml#
$schema: http://devicetree.org/meta-schemas/core.yaml#
title: MediaTek IOMMU Architecture Implementation
maintainers:
- Yong Wu <yong.wu@mediatek.com>
description: |+
Some MediaTek SOCs contain a Multimedia Memory Management Unit (M4U), and
this M4U have two generations of HW architecture. Generation one uses flat
pagetable, and only supports 4K size page mapping. Generation two uses the
ARM Short-Descriptor translation table format for address translation.
About the M4U Hardware Block Diagram, please check below:
EMI (External Memory Interface)
|
m4u (Multimedia Memory Management Unit)
|
+--------+
| |
gals0-rx gals1-rx (Global Async Local Sync rx)
| |
| |
gals0-tx gals1-tx (Global Async Local Sync tx)
| | Some SoCs may have GALS.
+--------+
|
SMI Common(Smart Multimedia Interface Common)
|
+----------------+-------
| |
| gals-rx There may be GALS in some larbs.
| |
| |
| gals-tx
| |
SMI larb0 SMI larb1 ... SoCs have several SMI local arbiter(larb).
(display) (vdec)
| |
| |
+-----+-----+ +----+----+
| | | | | |
| | |... | | | ... There are different ports in each larb.
| | | | | |
OVL0 RDMA0 WDMA0 MC PP VLD
As above, The Multimedia HW will go through SMI and M4U while it
access EMI. SMI is a bridge between m4u and the Multimedia HW. It contain
smi local arbiter and smi common. It will control whether the Multimedia
HW should go though the m4u for translation or bypass it and talk
directly with EMI. And also SMI help control the power domain and clocks for
each local arbiter.
Normally we specify a local arbiter(larb) for each multimedia HW
like display, video decode, and camera. And there are different ports
in each larb. Take a example, There are many ports like MC, PP, VLD in the
video decode local arbiter, all these ports are according to the video HW.
In some SoCs, there may be a GALS(Global Async Local Sync) module between
smi-common and m4u, and additional GALS module between smi-larb and
smi-common. GALS can been seen as a "asynchronous fifo" which could help
synchronize for the modules in different clock frequency.
properties:
compatible:
oneOf:
- enum:
- mediatek,mt2701-m4u # generation one
- mediatek,mt2712-m4u # generation two
- mediatek,mt6779-m4u # generation two
- mediatek,mt8167-m4u # generation two
- mediatek,mt8173-m4u # generation two
- mediatek,mt8183-m4u # generation two
- mediatek,mt8192-m4u # generation two
- description: mt7623 generation one
items:
- const: mediatek,mt7623-m4u
- const: mediatek,mt2701-m4u
reg:
maxItems: 1
interrupts:
maxItems: 1
clocks:
items:
- description: bclk is the block clock.
clock-names:
items:
- const: bclk
mediatek,larbs:
$ref: /schemas/types.yaml#/definitions/phandle-array
minItems: 1
maxItems: 32
description: |
List of phandle to the local arbiters in the current Socs.
Refer to bindings/memory-controllers/mediatek,smi-larb.yaml. It must sort
according to the local arbiter index, like larb0, larb1, larb2...
'#iommu-cells':
const: 1
description: |
This is the mtk_m4u_id according to the HW. Specifies the mtk_m4u_id as
defined in
dt-binding/memory/mt2701-larb-port.h for mt2701 and mt7623,
dt-binding/memory/mt2712-larb-port.h for mt2712,
dt-binding/memory/mt6779-larb-port.h for mt6779,
dt-binding/memory/mt8167-larb-port.h for mt8167,
dt-binding/memory/mt8173-larb-port.h for mt8173,
dt-binding/memory/mt8183-larb-port.h for mt8183,
dt-binding/memory/mt8192-larb-port.h for mt8192.
power-domains:
maxItems: 1
required:
- compatible
- reg
- interrupts
- mediatek,larbs
- '#iommu-cells'
allOf:
- if:
properties:
compatible:
contains:
enum:
- mediatek,mt2701-m4u
- mediatek,mt2712-m4u
- mediatek,mt8173-m4u
- mediatek,mt8192-m4u
then:
required:
- clocks
- if:
properties:
compatible:
enum:
- mediatek,mt8192-m4u
then:
required:
- power-domains
additionalProperties: false
examples:
- |
#include <dt-bindings/clock/mt8173-clk.h>
#include <dt-bindings/interrupt-controller/arm-gic.h>
iommu: iommu@10205000 {
compatible = "mediatek,mt8173-m4u";
reg = <0x10205000 0x1000>;
interrupts = <GIC_SPI 139 IRQ_TYPE_LEVEL_LOW>;
clocks = <&infracfg CLK_INFRA_M4U>;
clock-names = "bclk";
mediatek,larbs = <&larb0 &larb1 &larb2
&larb3 &larb4 &larb5>;
#iommu-cells = <1>;
};
- |
#include <dt-bindings/memory/mt8173-larb-port.h>
/* Example for a client device */
display {
compatible = "mediatek,mt8173-disp";
iommus = <&iommu M4U_PORT_DISP_OVL0>,
<&iommu M4U_PORT_DISP_RDMA0>;
};

View File

@ -12,7 +12,7 @@ This article describes how Linux uses the device tree. An overview of
the device tree data format can be found on the device tree usage page
at devicetree.org\ [1]_.
.. [1] https://elinux.org/Device_Tree_Usage
.. [1] https://www.devicetree.org/specifications/
The "Open Firmware Device Tree", or simply Device Tree (DT), is a data
structure and language for describing hardware. More specifically, it

View File

@ -340,16 +340,26 @@ Rendered as:
Cross-referencing
-----------------
Cross-referencing from one documentation page to another can be done by passing
the path to the file starting from the Documentation folder.
For example, to cross-reference to this page (the .rst extension is optional)::
Cross-referencing from one documentation page to another can be done simply by
writing the path to the document file, no special syntax required. The path can
be either absolute or relative. For absolute paths, start it with
"Documentation/". For example, to cross-reference to this page, all the
following are valid options, depending on the current document's directory (note
that the ``.rst`` extension is required)::
See Documentation/doc-guide/sphinx.rst.
See Documentation/doc-guide/sphinx.rst. This always works.
Take a look at sphinx.rst, which is at this same directory.
Read ../sphinx.rst, which is one directory above.
If you want to use a relative path, you need to use Sphinx's ``doc`` directive.
For example, referencing this page from the same directory would be done as::
If you want the link to have a different rendered text other than the document's
title, you need to use Sphinx's ``doc`` role. For example::
See :doc:`sphinx`.
See :doc:`my custom link text for document sphinx <sphinx>`.
For most use cases, the former is preferred, as it is cleaner and more suited
for people reading the source files. If you come across a ``:doc:`` usage that
isn't adding any value, please feel free to convert it to just the document
path.
For information on cross-referencing to kernel-doc functions or types, see
Documentation/doc-guide/kernel-doc.rst.

View File

@ -640,8 +640,8 @@ compliance:
level and edge IRQs
* [1] http://www.spinics.net/lists/linux-omap/msg120425.html
* [2] https://lkml.org/lkml/2015/9/25/494
* [3] https://lkml.org/lkml/2015/9/25/495
* [2] https://lore.kernel.org/r/1443209283-20781-2-git-send-email-grygorii.strashko@ti.com
* [3] https://lore.kernel.org/r/1443209283-20781-3-git-send-email-grygorii.strashko@ti.com
Requesting self-owned GPIO pins

View File

@ -18,6 +18,7 @@ MEN Chameleon Bus
4.1 The driver structure
4.2 Probing and attaching
4.3 Initializing the driver
4.4 Using DMA
Introduction
@ -173,3 +174,14 @@ module at the MCB core::
The module_mcb_driver() macro can be used to reduce the above code::
module_mcb_driver(foo_driver);
Using DMA
---------
To make use of the kernel's DMA-API's function, you will need to use the
carrier device's 'struct device'. Fortunately 'struct mcb_device' embeds a
pointer (->dma_dev) to the carrier's device for DMA purposes::
ret = dma_set_mask_and_coherent(&mdev->dma_dev, DMA_BIT_MASK(dma_bits));
if (rc)
/* Handle errors */

View File

@ -54,7 +54,7 @@ temperature) and throttle appropriate devices.
trips:
the total number of trip points this thermal zone supports.
mask:
Bit string: If 'n'th bit is set, then trip point 'n' is writeable.
Bit string: If 'n'th bit is set, then trip point 'n' is writable.
devdata:
device private data
ops:
@ -406,7 +406,7 @@ Thermal cooling device sys I/F, created once it's registered::
|---stats/reset: Writing any value resets the statistics
|---stats/time_in_state_ms: Time (msec) spent in various cooling states
|---stats/total_trans: Total number of times cooling state is changed
|---stats/trans_table: Cooing state transition table
|---stats/trans_table: Cooling state transition table
Then next two dynamic attributes are created/removed in pairs. They represent
@ -766,5 +766,5 @@ emergency poweroff kicks in after the delay has elapsed and shuts down
the system.
If set to 0 emergency poweroff will not be supported. So a carefully
profiled non-zero positive value is a must for emergerncy poweroff to be
profiled non-zero positive value is a must for emergency poweroff to be
triggered.

View File

@ -109,7 +109,7 @@ Mountpoints
AFS has a concept of mountpoints. In AFS terms, these are specially formatted
symbolic links (of the same form as the "device name" passed to mount). kAFS
presents these to the user as directories that have a follow-link capability
(ie: symbolic link semantics). If anyone attempts to access them, they will
(i.e.: symbolic link semantics). If anyone attempts to access them, they will
automatically cause the target volume to be mounted (if possible) on that site.
Automatically mounted filesystems will be automatically unmounted approximately
@ -144,7 +144,7 @@ looks up a cell of the same name, for example::
Proc Filesystem
===============
The AFS modules creates a "/proc/fs/afs/" directory and populates it:
The AFS module creates a "/proc/fs/afs/" directory and populates it:
(*) A "cells" file that lists cells currently known to the afs module and
their usage counts::
@ -201,7 +201,7 @@ And then run as::
./klog
Assuming it's successful, this adds a key of type RxRPC, named for the service
and cell, eg: "afs@<cellname>". This can be viewed with the keyctl program or
and cell, e.g.: "afs@<cellname>". This can be viewed with the keyctl program or
by cat'ing /proc/keys::
[root@andromeda ~]# keyctl show
@ -211,7 +211,7 @@ by cat'ing /proc/keys::
111416553 --als--v 0 0 \_ rxrpc: afs@CAMBRIDGE.REDHAT.COM
Currently the username, realm, password and proposed ticket lifetime are
compiled in to the program.
compiled into the program.
It is not required to acquire a key before using AFS facilities, but if one is
not acquired then all operations will be governed by the anonymous user parts

View File

@ -83,20 +83,9 @@ Summary
directories. This has runtime constraints and limitations that are
described in 6) below.
6. When changing the S_DAX policy via toggling the persistent FS_XFLAG_DAX flag,
the change in behaviour for existing regular files may not occur
immediately. If the change must take effect immediately, the administrator
needs to:
a) stop the application so there are no active references to the data set
the policy change will affect
b) evict the data set from kernel caches so it will be re-instantiated when
the application is restarted. This can be achieved by:
i. drop-caches
ii. a filesystem unmount and mount cycle
iii. a system reboot
6. When changing the S_DAX policy via toggling the persistent FS_XFLAG_DAX
flag, the change to existing regular files won't take effect until the
files are closed by all processes.
Details

View File

@ -83,6 +83,7 @@ Documentation for filesystem implementations.
erofs
ext2
ext3
ext4/index
f2fs
gfs2
gfs2-uevents

View File

@ -693,7 +693,10 @@ files are there, and which are missing.
kcore Kernel core image (can be ELF or A.OUT(deprecated in 2.4))
kmsg Kernel messages
ksyms Kernel symbol table
loadavg Load average of last 1, 5 & 15 minutes
loadavg Load average of last 1, 5 & 15 minutes;
number of processes currently runnable (running or on ready queue);
total number of processes in system;
last pid created.
locks Kernel locks
meminfo Memory info
misc Miscellaneous

View File

@ -112,7 +112,7 @@ members are defined:
.. code-block:: c
struct file_system_operations {
struct file_system_type {
const char *name;
int fs_flags;
struct dentry *(*mount) (struct file_system_type *, int,

View File

@ -688,7 +688,7 @@ for fbdev.
https://patchwork.freedesktop.org/patch/306579/
- [RFC PATCH v2 00/13] Kernel based bootsplash
https://lkml.org/lkml/2017/12/13/764
https://lore.kernel.org/r/20171213194755.3409-1-mstaudt@suse.de
Contact: Sam Ravnborg

View File

@ -13,7 +13,7 @@ touchscreen/ADC module.
====================
Numbering scheme for channels 0..4 is defined in EP9301 and EP9302 datasheets.
EP9307, EP9312 and EP9312 have 3 channels more (total 8), but the numbering is
EP9307, EP9312 and EP9315 have 3 channels more (total 8), but the numbering is
not defined. So the last three are numbered randomly, let's say.
Assuming ep93xx_adc is IIO device0, you'd find the following entries under

View File

@ -171,17 +171,6 @@ implementation.
x86/index
xtensa/index
Filesystem Documentation
------------------------
The documentation in this section are provided by specific filesystem
subprojects.
.. toctree::
:maxdepth: 2
filesystems/ext4/index
Other documentation
-------------------

View File

@ -236,6 +236,21 @@ A few EV_ABS codes have special meanings:
- Used to describe multitouch input events. Please see
multi-touch-protocol.txt for details.
* ABS_PRESSURE/ABS_MT_PRESSURE:
- For touch devices, many devices converted contact size into pressure.
A finger flattens with pressure, causing a larger contact area and thus
pressure and contact size are directly related. This is not the case
for other devices, for example digitizers and touchpads with a true
pressure sensor ("pressure pads").
A device should set the resolution of the axis to indicate whether the
pressure is in measurable units. If the resolution is zero, the
pressure data is in arbitrary units. If the resolution is nonzero, the
pressure data is in units/gram. For example, a value of 10 with a
resolution of 1 represents 10 gram, a value of 10 with a resolution on
1000 represents 10 microgram.
EV_SW
-----

View File

@ -260,6 +260,10 @@ ABS_MT_PRESSURE
of TOUCH and WIDTH for pressure-based devices or any device with a spatial
signal intensity distribution.
If the resolution is zero, the pressure data is in arbitrary units.
If the resolution is nonzero, the pressure data is in units/gram. See
:ref:`input-event-codes` for details.
ABS_MT_DISTANCE
The distance, in surface units, between the contact and the surface. Zero
distance means the contact is touching the surface. A positive number means

View File

@ -346,8 +346,8 @@ routine.
Before inventing your own cache of often-used objects consider using a
slab cache in ``include/linux/slab.h``
:c:func:`current()`
-------------------
:c:macro:`current`
------------------
Defined in ``include/asm/current.h``

View File

@ -958,7 +958,7 @@ grabs a read lock, searches a list, fails to find what it wants, drops
the read lock, grabs a write lock and inserts the object has a race
condition.
If you don't see why, please stay the fuck away from my code.
If you don't see why, please stay away from my code.
Racing Timers: A Kernel Pastime
-------------------------------

View File

@ -13,6 +13,7 @@ Kernel Livepatching
module-elf-format
shadow-vars
system-state
reliable-stacktrace
.. only:: subproject and html

View File

@ -6,20 +6,7 @@ This document outlines basic information about kernel livepatching.
.. Table of Contents:
1. Motivation
2. Kprobes, Ftrace, Livepatching
3. Consistency model
4. Livepatch module
4.1. New functions
4.2. Metadata
5. Livepatch life-cycle
5.1. Loading
5.2. Enabling
5.3. Replacing
5.4. Disabling
5.5. Removing
6. Sysfs
7. Limitations
.. contents:: :local:
1. Motivation

View File

@ -7,14 +7,8 @@ This document outlines the Elf format requirements that livepatch modules must f
.. Table of Contents
1. Background and motivation
2. Livepatch modinfo field
3. Livepatch relocation sections
3.1 Livepatch relocation section format
4. Livepatch symbols
4.1 A livepatch module's symbol table
4.2 Livepatch symbol format
5. Symbol table and Elf section access
.. contents:: :local:
1. Background and motivation
============================

View File

@ -0,0 +1,309 @@
===================
Reliable Stacktrace
===================
This document outlines basic information about reliable stacktracing.
.. Table of Contents:
.. contents:: :local:
1. Introduction
===============
The kernel livepatch consistency model relies on accurately identifying which
functions may have live state and therefore may not be safe to patch. One way
to identify which functions are live is to use a stacktrace.
Existing stacktrace code may not always give an accurate picture of all
functions with live state, and best-effort approaches which can be helpful for
debugging are unsound for livepatching. Livepatching depends on architectures
to provide a *reliable* stacktrace which ensures it never omits any live
functions from a trace.
2. Requirements
===============
Architectures must implement one of the reliable stacktrace functions.
Architectures using CONFIG_ARCH_STACKWALK must implement
'arch_stack_walk_reliable', and other architectures must implement
'save_stack_trace_tsk_reliable'.
Principally, the reliable stacktrace function must ensure that either:
* The trace includes all functions that the task may be returned to, and the
return code is zero to indicate that the trace is reliable.
* The return code is non-zero to indicate that the trace is not reliable.
.. note::
In some cases it is legitimate to omit specific functions from the trace,
but all other functions must be reported. These cases are described in
futher detail below.
Secondly, the reliable stacktrace function must be robust to cases where
the stack or other unwind state is corrupt or otherwise unreliable. The
function should attempt to detect such cases and return a non-zero error
code, and should not get stuck in an infinite loop or access memory in
an unsafe way. Specific cases are described in further detail below.
3. Compile-time analysis
========================
To ensure that kernel code can be correctly unwound in all cases,
architectures may need to verify that code has been compiled in a manner
expected by the unwinder. For example, an unwinder may expect that
functions manipulate the stack pointer in a limited way, or that all
functions use specific prologue and epilogue sequences. Architectures
with such requirements should verify the kernel compilation using
objtool.
In some cases, an unwinder may require metadata to correctly unwind.
Where necessary, this metadata should be generated at build time using
objtool.
4. Considerations
=================
The unwinding process varies across architectures, their respective procedure
call standards, and kernel configurations. This section describes common
details that architectures should consider.
4.1 Identifying successful termination
--------------------------------------
Unwinding may terminate early for a number of reasons, including:
* Stack or frame pointer corruption.
* Missing unwind support for an uncommon scenario, or a bug in the unwinder.
* Dynamically generated code (e.g. eBPF) or foreign code (e.g. EFI runtime
services) not following the conventions expected by the unwinder.
To ensure that this does not result in functions being omitted from the trace,
even if not caught by other checks, it is strongly recommended that
architectures verify that a stacktrace ends at an expected location, e.g.
* Within a specific function that is an entry point to the kernel.
* At a specific location on a stack expected for a kernel entry point.
* On a specific stack expected for a kernel entry point (e.g. if the
architecture has separate task and IRQ stacks).
4.2 Identifying unwindable code
-------------------------------
Unwinding typically relies on code following specific conventions (e.g.
manipulating a frame pointer), but there can be code which may not follow these
conventions and may require special handling in the unwinder, e.g.
* Exception vectors and entry assembly.
* Procedure Linkage Table (PLT) entries and veneer functions.
* Trampoline assembly (e.g. ftrace, kprobes).
* Dynamically generated code (e.g. eBPF, optprobe trampolines).
* Foreign code (e.g. EFI runtime services).
To ensure that such cases do not result in functions being omitted from a
trace, it is strongly recommended that architectures positively identify code
which is known to be reliable to unwind from, and reject unwinding from all
other code.
Kernel code including modules and eBPF can be distinguished from foreign code
using '__kernel_text_address()'. Checking for this also helps to detect stack
corruption.
There are several ways an architecture may identify kernel code which is deemed
unreliable to unwind from, e.g.
* Placing such code into special linker sections, and rejecting unwinding from
any code in these sections.
* Identifying specific portions of code using bounds information.
4.3 Unwinding across interrupts and exceptions
----------------------------------------------
At function call boundaries the stack and other unwind state is expected to be
in a consistent state suitable for reliable unwinding, but this may not be the
case part-way through a function. For example, during a function prologue or
epilogue a frame pointer may be transiently invalid, or during the function
body the return address may be held in an arbitrary general purpose register.
For some architectures this may change at runtime as a result of dynamic
instrumentation.
If an interrupt or other exception is taken while the stack or other unwind
state is in an inconsistent state, it may not be possible to reliably unwind,
and it may not be possible to identify whether such unwinding will be reliable.
See below for examples.
Architectures which cannot identify when it is reliable to unwind such cases
(or where it is never reliable) must reject unwinding across exception
boundaries. Note that it may be reliable to unwind across certain
exceptions (e.g. IRQ) but unreliable to unwind across other exceptions
(e.g. NMI).
Architectures which can identify when it is reliable to unwind such cases (or
have no such cases) should attempt to unwind across exception boundaries, as
doing so can prevent unnecessarily stalling livepatch consistency checks and
permits livepatch transitions to complete more quickly.
4.4 Rewriting of return addresses
---------------------------------
Some trampolines temporarily modify the return address of a function in order
to intercept when that function returns with a return trampoline, e.g.
* An ftrace trampoline may modify the return address so that function graph
tracing can intercept returns.
* A kprobes (or optprobes) trampoline may modify the return address so that
kretprobes can intercept returns.
When this happens, the original return address will not be in its usual
location. For trampolines which are not subject to live patching, where an
unwinder can reliably determine the original return address and no unwind state
is altered by the trampoline, the unwinder may report the original return
address in place of the trampoline and report this as reliable. Otherwise, an
unwinder must report these cases as unreliable.
Special care is required when identifying the original return address, as this
information is not in a consistent location for the duration of the entry
trampoline or return trampoline. For example, considering the x86_64
'return_to_handler' return trampoline:
.. code-block:: none
SYM_CODE_START(return_to_handler)
UNWIND_HINT_EMPTY
subq $24, %rsp
/* Save the return values */
movq %rax, (%rsp)
movq %rdx, 8(%rsp)
movq %rbp, %rdi
call ftrace_return_to_handler
movq %rax, %rdi
movq 8(%rsp), %rdx
movq (%rsp), %rax
addq $24, %rsp
JMP_NOSPEC rdi
SYM_CODE_END(return_to_handler)
While the traced function runs its return address on the stack points to
the start of return_to_handler, and the original return address is stored in
the task's cur_ret_stack. During this time the unwinder can find the return
address using ftrace_graph_ret_addr().
When the traced function returns to return_to_handler, there is no longer a
return address on the stack, though the original return address is still stored
in the task's cur_ret_stack. Within ftrace_return_to_handler(), the original
return address is removed from cur_ret_stack and is transiently moved
arbitrarily by the compiler before being returned in rax. The return_to_handler
trampoline moves this into rdi before jumping to it.
Architectures might not always be able to unwind such sequences, such as when
ftrace_return_to_handler() has removed the address from cur_ret_stack, and the
location of the return address cannot be reliably determined.
It is recommended that architectures unwind cases where return_to_handler has
not yet been returned to, but architectures are not required to unwind from the
middle of return_to_handler and can report this as unreliable. Architectures
are not required to unwind from other trampolines which modify the return
address.
4.5 Obscuring of return addresses
---------------------------------
Some trampolines do not rewrite the return address in order to intercept
returns, but do transiently clobber the return address or other unwind state.
For example, the x86_64 implementation of optprobes patches the probed function
with a JMP instruction which targets the associated optprobe trampoline. When
the probe is hit, the CPU will branch to the optprobe trampoline, and the
address of the probed function is not held in any register or on the stack.
Similarly, the arm64 implementation of DYNAMIC_FTRACE_WITH_REGS patches traced
functions with the following:
.. code-block:: none
MOV X9, X30
BL <trampoline>
The MOV saves the link register (X30) into X9 to preserve the return address
before the BL clobbers the link register and branches to the trampoline. At the
start of the trampoline, the address of the traced function is in X9 rather
than the link register as would usually be the case.
Architectures must either ensure that unwinders either reliably unwind
such cases, or report the unwinding as unreliable.
4.6 Link register unreliability
-------------------------------
On some other architectures, 'call' instructions place the return address into a
link register, and 'return' instructions consume the return address from the
link register without modifying the register. On these architectures software
must save the return address to the stack prior to making a function call. Over
the duration of a function call, the return address may be held in the link
register alone, on the stack alone, or in both locations.
Unwinders typically assume the link register is always live, but this
assumption can lead to unreliable stack traces. For example, consider the
following arm64 assembly for a simple function:
.. code-block:: none
function:
STP X29, X30, [SP, -16]!
MOV X29, SP
BL <other_function>
LDP X29, X30, [SP], #16
RET
At entry to the function, the link register (x30) points to the caller, and the
frame pointer (X29) points to the caller's frame including the caller's return
address. The first two instructions create a new stackframe and update the
frame pointer, and at this point the link register and the frame pointer both
describe this function's return address. A trace at this point may describe
this function twice, and if the function return is being traced, the unwinder
may consume two entries from the fgraph return stack rather than one entry.
The BL invokes 'other_function' with the link register pointing to this
function's LDR and the frame pointer pointing to this function's stackframe.
When 'other_function' returns, the link register is left pointing at the BL,
and so a trace at this point could result in 'function' appearing twice in the
backtrace.
Similarly, a function may deliberately clobber the LR, e.g.
.. code-block:: none
caller:
STP X29, X30, [SP, -16]!
MOV X29, SP
ADR LR, <callee>
BLR LR
LDP X29, X30, [SP], #16
RET
The ADR places the address of 'callee' into the LR, before the BLR branches to
this address. If a trace is made immediately after the ADR, 'callee' will
appear to be the parent of 'caller', rather than the child.
Due to cases such as the above, it may only be possible to reliably consume a
link register value at a function call boundary. Architectures where this is
the case must reject unwinding across exception boundaries unless they can
reliably identify when the LR or stack value should be used (e.g. using
metadata generated by objtool).

View File

@ -134,7 +134,7 @@ Generally speaking, there is a couple of reasons to use the freezing of tasks:
safeguards against race conditions that might occur in such a case.
Although Linus Torvalds doesn't like the freezing of tasks, he said this in one
of the discussions on LKML (http://lkml.org/lkml/2007/4/27/608):
of the discussions on LKML (https://lore.kernel.org/r/alpine.LFD.0.98.0704271801020.9964@woody.linux-foundation.org):
"RJW:> Why we freeze tasks at all or why we freeze kernel threads?

View File

@ -501,7 +501,7 @@ table, but not from elsewhere in the kernel. If the syscall functionality is
useful to be used within the kernel, needs to be shared between an old and a
new syscall, or needs to be shared between a syscall and its compatibility
variant, it should be implemented by means of a "helper" function (such as
``kern_xyzzy()``). This kernel function may then be called within the
``ksys_xyzzy()``). This kernel function may then be called within the
syscall stub (``sys_xyzzy()``), the compatibility syscall stub
(``compat_sys_xyzzy()``), and/or other kernel code.
@ -548,18 +548,18 @@ References and Sources
https://lwn.net/Articles/486306/
- Recommendation from Andrew Morton that all related information for a new
system call should come in the same email thread:
https://lkml.org/lkml/2014/7/24/641
https://lore.kernel.org/r/20140724144747.3041b208832bbdf9fbce5d96@linux-foundation.org
- Recommendation from Michael Kerrisk that a new system call should come with
a man page: https://lkml.org/lkml/2014/6/13/309
a man page: https://lore.kernel.org/r/CAKgNAkgMA39AfoSoA5Pe1r9N+ZzfYQNvNPvcRN7tOvRb8+v06Q@mail.gmail.com
- Suggestion from Thomas Gleixner that x86 wire-up should be in a separate
commit: https://lkml.org/lkml/2014/11/19/254
commit: https://lore.kernel.org/r/alpine.DEB.2.11.1411191249560.3909@nanos
- Suggestion from Greg Kroah-Hartman that it's good for new system calls to
come with a man-page & selftest: https://lkml.org/lkml/2014/3/19/710
come with a man-page & selftest: https://lore.kernel.org/r/20140320025530.GA25469@kroah.com
- Discussion from Michael Kerrisk of new system call vs. :manpage:`prctl(2)` extension:
https://lkml.org/lkml/2014/6/3/411
https://lore.kernel.org/r/CAHO5Pa3F2MjfTtfNxa8LbnkeeU8=YJ+9tDqxZpw7Gz59E-4AUg@mail.gmail.com
- Suggestion from Ingo Molnar that system calls that involve multiple
arguments should encapsulate those arguments in a struct, which includes a
size field for future extensibility: https://lkml.org/lkml/2015/7/30/117
size field for future extensibility: https://lore.kernel.org/r/20150730083831.GA22182@gmail.com
- Numbering oddities arising from (re-)use of O_* numbering space flags:
- commit 75069f2b5bfb ("vfs: renumber FMODE_NONOTIFY and add to uniqueness
@ -569,9 +569,9 @@ References and Sources
- commit bb458c644a59 ("Safer ABI for O_TMPFILE")
- Discussion from Matthew Wilcox about restrictions on 64-bit arguments:
https://lkml.org/lkml/2008/12/12/187
https://lore.kernel.org/r/20081212152929.GM26095@parisc-linux.org
- Recommendation from Greg Kroah-Hartman that unknown flags should be
policed: https://lkml.org/lkml/2014/7/17/577
policed: https://lore.kernel.org/r/20140717193330.GB4703@kroah.com
- Recommendation from Linus Torvalds that x32 system calls should prefer
compatibility with 64-bit versions rather than 32-bit versions:
https://lkml.org/lkml/2011/8/31/244
https://lore.kernel.org/r/CA+55aFxfmwfB7jbbrXxa=K7VBYPfAvmu3XOkGrLbB1UFjX1+Ew@mail.gmail.com

View File

@ -69,9 +69,26 @@ something to hide:
if (condition) do_this;
do_something_everytime;
Don't use commas to avoid using braces:
.. code-block:: c
if (condition)
do_this(), do_that();
Always uses braces for multiple statements:
.. code-block:: c
if (condition) {
do_this();
do_that();
}
Don't put multiple assignments on a single line either. Kernel coding style
is super simple. Avoid tricky expressions.
Outside of comments, documentation and except in Kconfig, spaces are never
used for indentation, and the above example is deliberately broken.
@ -306,8 +323,7 @@ that counts the number of active users, you should call that
Encoding the type of a function into the name (so-called Hungarian
notation) is asinine - the compiler knows the types anyway and can check
those, and it only confuses the programmer. No wonder Microsoft makes buggy
programs.
those, and it only confuses the programmer.
LOCAL variable names should be short, and to the point. If you have
some random integer loop counter, it should probably be called ``i``.

View File

@ -342,16 +342,10 @@ Adventurous testers are very welcome to runtime-test the linux-next.
Bug Reporting
-------------
https://bugzilla.kernel.org is where the Linux kernel developers track kernel
bugs. Users are encouraged to report all bugs that they find in this
tool. For details on how to use the kernel bugzilla, please see:
https://bugzilla.kernel.org/page.cgi?id=faq.html
The file 'Documentation/admin-guide/reporting-issues.rst' in the main kernel
source directory has a good template for how to report a possible kernel bug,
and details what kind of information is needed by the kernel developers to help
track down the problem.
source directory describes how to report a possible kernel bug, and details
what kind of information is needed by the kernel developers to help track
down the problem.
Managing bug reports
@ -364,7 +358,13 @@ improve your skills, and other developers will be aware of your presence.
Fixing bugs is one of the best ways to get merits among other developers,
because not many people like wasting time fixing other people's bugs.
To work in the already reported bug reports, go to https://bugzilla.kernel.org.
To work on already reported bug reports, find a subsystem you are interested in.
Check the MAINTAINERS file where bugs for that subsystem get reported to; often
it will be a mailing list, rarely a bugtracker. Search the archives of said
place for recent reports and help where you see fit. You may also want to check
https://bugzilla.kernel.org for bug reports; only a handful of kernel subsystems
use it actively for reporting or tracking, nevertheless bugs for the whole
kernel get filed there.
Mailing lists

View File

@ -89,30 +89,28 @@ and elsewhere regarding submitting Linux kernel patches.
Patches that change userspace interfaces should be CCed to
linux-api@vger.kernel.org.
19) Check that it all passes ``make headers_check``.
20) Has been checked with injection of at least slab and page-allocation
19) Has been checked with injection of at least slab and page-allocation
failures. See ``Documentation/fault-injection/``.
If the new code is substantial, addition of subsystem-specific fault
injection might be appropriate.
21) Newly-added code has been compiled with ``gcc -W`` (use
20) Newly-added code has been compiled with ``gcc -W`` (use
``make EXTRA_CFLAGS=-W``). This will generate lots of noise, but is good
for finding bugs like "warning: comparison between signed and unsigned".
22) Tested after it has been merged into the -mm patchset to make sure
21) Tested after it has been merged into the -mm patchset to make sure
that it still works with all of the other queued patches and various
changes in the VM, VFS, and other subsystems.
23) All memory barriers {e.g., ``barrier()``, ``rmb()``, ``wmb()``} need a
22) All memory barriers {e.g., ``barrier()``, ``rmb()``, ``wmb()``} need a
comment in the source code that explains the logic of what they are doing
and why.
24) If any ioctl's are added by the patch, then also update
23) If any ioctl's are added by the patch, then also update
``Documentation/userspace-api/ioctl/ioctl-number.rst``.
25) If your modified source code depends on or uses any of the kernel
24) If your modified source code depends on or uses any of the kernel
APIs or features that are related to the following ``Kconfig`` symbols,
then test multiple builds with the related ``Kconfig`` symbols disabled
and/or ``=m`` (if that option is available) [not all of these at the

View File

@ -556,6 +556,11 @@ which stable kernel versions should receive your fix. This is the preferred
method for indicating a bug fixed by the patch. See :ref:`describe_changes`
for more details.
Note: Attaching a Fixes: tag does not subvert the stable kernel rules
process nor the requirement to Cc: stable@vger.kernel.org on all stable
patch candidates. For more information, please read
:ref:`Documentation/process/stable-kernel-rules.rst <stable_kernel_rules>`
.. _the_canonical_patch_format:
The canonical patch format
@ -679,6 +684,26 @@ generates appropriate diffstats by default.)
See more details on the proper patch format in the following
references.
Backtraces in commit mesages
^^^^^^^^^^^^^^^^^^^^^^^^^^^^
Backtraces help document the call chain leading to a problem. However,
not all backtraces are helpful. For example, early boot call chains are
unique and obvious. Copying the full dmesg output verbatim, however,
adds distracting information like timestamps, module lists, register and
stack dumps.
Therefore, the most useful backtraces should distill the relevant
information from the dump, which makes it easier to focus on the real
issue. Here is an example of a well-trimmed backtrace::
unchecked MSR access error: WRMSR to 0xd51 (tried to write 0x0000000000000064)
at rIP: 0xffffffffae059994 (native_write_msr+0x4/0x20)
Call Trace:
mba_wrmsr
update_domains
rdtgroup_mkdir
.. _explicit_in_reply_to:
Explicit In-Reply-To headers
@ -769,13 +794,13 @@ Greg Kroah-Hartman, "How to piss off a kernel subsystem maintainer".
<http://www.kroah.com/log/linux/maintainer-06.html>
NO!!!! No more huge patch bombs to linux-kernel@vger.kernel.org people!
<https://lkml.org/lkml/2005/7/11/336>
<https://lore.kernel.org/r/20050711.125305.08322243.davem@davemloft.net>
Kernel Documentation/process/coding-style.rst:
:ref:`Documentation/process/coding-style.rst <codingstyle>`
Linus Torvalds's mail on the canonical patch format:
<http://lkml.org/lkml/2005/4/7/183>
<https://lore.kernel.org/r/Pine.LNX.4.58.0504071023190.28951@ppc970.osdl.org>
Andi Kleen, "On submitting kernel patches"
Some strategies to get difficult or controversial changes in.

View File

@ -2,8 +2,9 @@
CFS Bandwidth Control
=====================
[ This document only discusses CPU bandwidth control for SCHED_NORMAL.
The SCHED_RT case is covered in Documentation/scheduler/sched-rt-group.rst ]
.. note::
This document only discusses CPU bandwidth control for SCHED_NORMAL.
The SCHED_RT case is covered in Documentation/scheduler/sched-rt-group.rst
CFS bandwidth control is a CONFIG_FAIR_GROUP_SCHED extension which allows the
specification of the maximum CPU bandwidth available to a group or hierarchy.
@ -25,9 +26,15 @@ Management
----------
Quota and period are managed within the cpu subsystem via cgroupfs.
cpu.cfs_quota_us: the total available run-time within a period (in microseconds)
cpu.cfs_period_us: the length of a period (in microseconds)
cpu.stat: exports throttling statistics [explained further below]
.. note::
The cgroupfs files described in this section are only applicable
to cgroup v1. For cgroup v2, see
:ref:`Documentation/admin-guide/cgroupv2.rst <cgroup-v2-cpu>`.
- cpu.cfs_quota_us: the total available run-time within a period (in
microseconds)
- cpu.cfs_period_us: the length of a period (in microseconds)
- cpu.stat: exports throttling statistics [explained further below]
The default values are::

View File

@ -707,7 +707,7 @@ Deadline Task Scheduling
and how to prevent non-root users "cheat" the system?
As already discussed, we are planning also to merge this work with the EDF
throttling patches [https://lkml.org/lkml/2010/2/23/239] but we still are in
throttling patches [https://lore.kernel.org/r/cover.1266931410.git.fabio@helm.retis] but we still are in
the preliminary phases of the merge and we really seek feedback that would
help us decide on the direction it should take.

View File

@ -34,9 +34,9 @@ In CFS the virtual runtime is expressed and tracked via the per-task
p->se.vruntime (nanosec-unit) value. This way, it's possible to accurately
timestamp and measure the "expected CPU time" a task should have gotten.
[ small detail: on "ideal" hardware, at any time all tasks would have the same
p->se.vruntime value --- i.e., tasks would execute simultaneously and no task
would ever get "out of balance" from the "ideal" share of CPU time. ]
Small detail: on "ideal" hardware, at any time all tasks would have the same
p->se.vruntime value --- i.e., tasks would execute simultaneously and no task
would ever get "out of balance" from the "ideal" share of CPU time.
CFS's task picking logic is based on this p->se.vruntime value and it is thus
very simple: it always tries to run the task with the smallest p->se.vruntime

View File

@ -2,7 +2,7 @@
Linux Security Module Development
=================================
Based on https://lkml.org/lkml/2007/10/26/215,
Based on https://lore.kernel.org/r/20071026073721.618b4778@laptopd505.fenrus.org,
a new LSM is accepted into the kernel when its intent (a description of
what it tries to protect against and in what cases one would expect to
use it) has been appropriately documented in ``Documentation/admin-guide/LSM/``.

View File

@ -51,7 +51,7 @@ RE_typedef = re.compile(r'\b(typedef)\s+([a-zA-Z_]\w+)', flags=ascii_p3)
# Detects a reference to a documentation page of the form Documentation/... with
# an optional extension
#
RE_doc = re.compile(r'\bDocumentation(/[\w\-_/]+)(\.\w+)*')
RE_doc = re.compile(r'(\bDocumentation/)?((\.\./)*[\w\-/]+)\.(rst|txt)')
RE_namespace = re.compile(r'^\s*..\s*c:namespace::\s*(\S+)\s*$')
@ -234,7 +234,10 @@ def markup_doc_ref(docname, app, match):
#
# Go through the dance of getting an xref out of the std domain
#
target = match.group(1)
absolute = match.group(1)
target = match.group(2)
if absolute:
target = "/" + target
xref = None
pxref = addnodes.pending_xref('', refdomain = 'std', reftype = 'doc',
reftarget = target, modname = None,

View File

@ -236,13 +236,7 @@ class CObject(Base_CObject):
indextext = self.get_index_text(name)
if indextext:
if major == 1 and minor < 4:
# indexnode's tuple changed in 1.4
# https://github.com/sphinx-doc/sphinx/commit/e6a5a3a92e938fcd75866b4227db9e0524d58f7c
self.indexnode['entries'].append(
('single', indextext, targetname, ''))
else:
self.indexnode['entries'].append(
self.indexnode['entries'].append(
('single', indextext, targetname, '', None))
class CDomain(Base_CDomain):

View File

@ -45,17 +45,7 @@ from docutils import nodes, statemachine
from docutils.statemachine import ViewList
from docutils.parsers.rst import directives, Directive
from docutils.utils.error_reporting import ErrorString
#
# AutodocReporter is only good up to Sphinx 1.7
#
import sphinx
Use_SSI = sphinx.__version__[:3] >= '1.7'
if Use_SSI:
from sphinx.util.docutils import switch_source_input
else:
from sphinx.ext.autodoc import AutodocReporter
from sphinx.util.docutils import switch_source_input
__version__ = '1.0'
@ -179,16 +169,5 @@ class KernelCmd(Directive):
return node.children
def do_parse(self, content, node):
if Use_SSI:
with switch_source_input(self.state, content):
self.state.nested_parse(content, 0, node, match_titles=1)
else:
buf = self.state.memo.title_styles, self.state.memo.section_level, self.state.memo.reporter
self.state.memo.title_styles = []
self.state.memo.section_level = 0
self.state.memo.reporter = AutodocReporter(content, self.state.memo.reporter)
try:
self.state.nested_parse(content, 0, node, match_titles=1)
finally:
self.state.memo.title_styles, self.state.memo.section_level, self.state.memo.reporter = buf
with switch_source_input(self.state, content):
self.state.nested_parse(content, 0, node, match_titles=1)

View File

@ -42,17 +42,7 @@ from docutils import nodes, statemachine
from docutils.statemachine import ViewList
from docutils.parsers.rst import directives, Directive
from docutils.utils.error_reporting import ErrorString
#
# AutodocReporter is only good up to Sphinx 1.7
#
import sphinx
Use_SSI = sphinx.__version__[:3] >= '1.7'
if Use_SSI:
from sphinx.util.docutils import switch_source_input
else:
from sphinx.ext.autodoc import AutodocReporter
from sphinx.util.docutils import switch_source_input
__version__ = '1.0'
@ -154,16 +144,7 @@ class KernelFeat(Directive):
buf = self.state.memo.title_styles, self.state.memo.section_level, self.state.memo.reporter
if Use_SSI:
with switch_source_input(self.state, content):
self.state.nested_parse(content, 0, node, match_titles=1)
else:
self.state.memo.title_styles = []
self.state.memo.section_level = 0
self.state.memo.reporter = AutodocReporter(content, self.state.memo.reporter)
try:
self.state.nested_parse(content, 0, node, match_titles=1)
finally:
self.state.memo.title_styles, self.state.memo.section_level, self.state.memo.reporter = buf
with switch_source_input(self.state, content):
self.state.nested_parse(content, 0, node, match_titles=1)
return node.children

View File

@ -37,18 +37,8 @@ import glob
from docutils import nodes, statemachine
from docutils.statemachine import ViewList
from docutils.parsers.rst import directives, Directive
#
# AutodocReporter is only good up to Sphinx 1.7
#
import sphinx
Use_SSI = sphinx.__version__[:3] >= '1.7'
if Use_SSI:
from sphinx.util.docutils import switch_source_input
else:
from sphinx.ext.autodoc import AutodocReporter
from sphinx.util.docutils import switch_source_input
import kernellog
__version__ = '1.0'
@ -163,18 +153,8 @@ class KernelDocDirective(Directive):
return [nodes.error(None, nodes.paragraph(text = "kernel-doc missing"))]
def do_parse(self, result, node):
if Use_SSI:
with switch_source_input(self.state, result):
self.state.nested_parse(result, 0, node, match_titles=1)
else:
save = self.state.memo.title_styles, self.state.memo.section_level, self.state.memo.reporter
self.state.memo.reporter = AutodocReporter(result, self.state.memo.reporter)
self.state.memo.title_styles, self.state.memo.section_level = [], 0
try:
self.state.nested_parse(result, 0, node, match_titles=1)
finally:
self.state.memo.title_styles, self.state.memo.section_level, self.state.memo.reporter = save
with switch_source_input(self.state, result):
self.state.nested_parse(result, 0, node, match_titles=1)
def setup(app):
app.add_config_value('kerneldoc_bin', None, 'env')

View File

@ -4,29 +4,19 @@
# only goes back to 1.6. So here's a wrapper layer to keep around for
# as long as we support 1.4.
#
# We don't support 1.4 anymore, but we'll keep the wrappers around until
# we change all the code to not use them anymore :)
#
import sphinx
from sphinx.util import logging
if sphinx.__version__[:3] >= '1.6':
UseLogging = True
from sphinx.util import logging
logger = logging.getLogger('kerneldoc')
else:
UseLogging = False
logger = logging.getLogger('kerneldoc')
def warn(app, message):
if UseLogging:
logger.warning(message)
else:
app.warn(message)
logger.warning(message)
def verbose(app, message):
if UseLogging:
logger.verbose(message)
else:
app.verbose(message)
logger.verbose(message)
def info(app, message):
if UseLogging:
logger.info(message)
else:
app.info(message)
logger.info(message)

View File

@ -49,26 +49,14 @@ import os
from os import path
import subprocess
from hashlib import sha1
import sys
from docutils import nodes
from docutils.statemachine import ViewList
from docutils.parsers.rst import directives
from docutils.parsers.rst.directives import images
import sphinx
from sphinx.util.nodes import clean_astext
from six import iteritems
import kernellog
PY3 = sys.version_info[0] == 3
if PY3:
_unicode = str
else:
_unicode = unicode
# Get Sphinx version
major, minor, patch = sphinx.version_info[:3]
if major == 1 and minor > 3:
@ -540,7 +528,7 @@ def add_kernel_figure_to_std_domain(app, doctree):
docname = app.env.docname
labels = std.data["labels"]
for name, explicit in iteritems(doctree.nametypes):
for name, explicit in doctree.nametypes.items():
if not explicit:
continue
labelid = doctree.nameids[name]

View File

@ -61,8 +61,6 @@ class MaintainersInclude(Include):
field_content = ""
for line in open(path):
if sys.version_info.major == 2:
line = unicode(line, 'utf-8')
# Have we reached the end of the preformatted Descriptions text?
if descriptions and line.startswith('Maintainers'):
descriptions = False

View File

@ -1,4 +1,3 @@
docutils
Sphinx==2.4.4
sphinx_rtd_theme
six

View File

@ -42,8 +42,6 @@ u"""
# imports
# ==============================================================================
import sys
from docutils import nodes
from docutils.parsers.rst import directives, roles
from docutils.parsers.rst.directives.tables import Table
@ -55,14 +53,6 @@ from docutils.utils import SystemMessagePropagation
__version__ = '1.0'
PY3 = sys.version_info[0] == 3
PY2 = sys.version_info[0] == 2
if PY3:
# pylint: disable=C0103, W0622
unicode = str
basestring = str
# ==============================================================================
def setup(app):
# ==============================================================================

View File

@ -75,7 +75,7 @@ NON-ATOMIC CONTEXT:
- Why not msleep for (1ms - 20ms)?
Explained originally here:
http://lkml.org/lkml/2007/8/3/250
https://lore.kernel.org/r/15327.1186166232@lwn.net
msleep(1~20) may not do what the caller intends, and
will often sleep longer (~20 ms actual sleep for any

View File

@ -611,21 +611,21 @@ Riferimenti e fonti
https://lwn.net/Articles/486306/
- Raccomandazioni da Andrew Morton circa il fatto che tutte le informazioni
su una nuova chiamata di sistema dovrebbero essere contenute nello stesso
filone di discussione di email: https://lkml.org/lkml/2014/7/24/641
filone di discussione di email: https://lore.kernel.org/r/20140724144747.3041b208832bbdf9fbce5d96@linux-foundation.org
- Raccomandazioni da Michael Kerrisk circa il fatto che le nuove chiamate di
sistema dovrebbero avere una pagina man: https://lkml.org/lkml/2014/6/13/309
sistema dovrebbero avere una pagina man: https://lore.kernel.org/r/CAKgNAkgMA39AfoSoA5Pe1r9N+ZzfYQNvNPvcRN7tOvRb8+v06Q@mail.gmail.com
- Consigli da Thomas Gleixner sul fatto che il collegamento all'architettura
x86 dovrebbe avvenire in un *commit* differente:
https://lkml.org/lkml/2014/11/19/254
https://lore.kernel.org/r/alpine.DEB.2.11.1411191249560.3909@nanos
- Consigli da Greg Kroah-Hartman circa la bontà d'avere una pagina man e un
programma di auto-verifica per le nuove chiamate di sistema:
https://lkml.org/lkml/2014/3/19/710
https://lore.kernel.org/r/20140320025530.GA25469@kroah.com
- Discussione di Michael Kerrisk sulle nuove chiamate di sistema contro
le estensioni :manpage:`prctl(2)`: https://lkml.org/lkml/2014/6/3/411
le estensioni :manpage:`prctl(2)`: https://lore.kernel.org/r/CAHO5Pa3F2MjfTtfNxa8LbnkeeU8=YJ+9tDqxZpw7Gz59E-4AUg@mail.gmail.com
- Consigli da Ingo Molnar che le chiamate di sistema con più argomenti
dovrebbero incapsularli in una struttura che includa un argomento
*size* per garantire l'estensibilità futura:
https://lkml.org/lkml/2015/7/30/117
https://lore.kernel.org/r/20150730083831.GA22182@gmail.com
- Un certo numero di casi strani emersi dall'uso (riuso) dei flag O_*:
- commit 75069f2b5bfb ("vfs: renumber FMODE_NONOTIFY and add to uniqueness
@ -635,9 +635,9 @@ Riferimenti e fonti
- commit bb458c644a59 ("Safer ABI for O_TMPFILE")
- Discussion from Matthew Wilcox about restrictions on 64-bit arguments:
https://lkml.org/lkml/2008/12/12/187
https://lore.kernel.org/r/20081212152929.GM26095@parisc-linux.org
- Raccomandazioni da Greg Kroah-Hartman sul fatto che i flag sconosciuti dovrebbero
essere controllati: https://lkml.org/lkml/2014/7/17/577
essere controllati: https://lore.kernel.org/r/20140717193330.GB4703@kroah.com
- Raccomandazioni da Linus Torvalds che le chiamate di sistema x32 dovrebbero
favorire la compatibilità con le versioni a 64-bit piuttosto che quelle a 32-bit:
https://lkml.org/lkml/2011/8/31/244
https://lore.kernel.org/r/CA+55aFxfmwfB7jbbrXxa=K7VBYPfAvmu3XOkGrLbB1UFjX1+Ew@mail.gmail.com

View File

@ -731,13 +731,13 @@ Greg Kroah-Hartman, "Come scocciare un manutentore di un sottosistema"
<http://www.kroah.com/log/linux/maintainer-06.html>
No!!!! Basta gigantesche bombe patch alle persone sulla lista linux-kernel@vger.kernel.org!
<https://lkml.org/lkml/2005/7/11/336>
<https://lore.kernel.org/r/20050711.125305.08322243.davem@davemloft.net>
Kernel Documentation/translations/it_IT/process/coding-style.rst:
:ref:`Documentation/translations/it_IT/process/coding-style.rst <it_codingstyle>`
E-mail di Linus Torvalds sul formato canonico di una patch:
<http://lkml.org/lkml/2005/4/7/183>
<https://lore.kernel.org/r/Pine.LNX.4.58.0504071023190.28951@ppc970.osdl.org>
Andi Kleen, "Su come sottomettere patch del kernel"
Alcune strategie su come sottomettere modifiche toste o controverse.

View File

@ -702,13 +702,13 @@ Greg Kroah-Hartman, "How to piss off a kernel subsystem maintainer".
<http://www.kroah.com/log/2006/01/11/>
NO!!!! No more huge patch bombs to linux-kernel@vger.kernel.org people!
<https://lkml.org/lkml/2005/7/11/336>
<https://lore.kernel.org/r/20050711.125305.08322243.davem@davemloft.net>
Kernel Documentation/process/coding-style.rst:
<http://users.sosdg.org/~qiyong/lxr/source/Documentation/process/coding-style.rst>
Linus Torvalds's mail on the canonical patch format:
<http://lkml.org/lkml/2005/4/7/183>
<https://lore.kernel.org/r/Pine.LNX.4.58.0504071023190.28951@ppc970.osdl.org>
Andi Kleen, "On submitting kernel patches"
Some strategies to get difficult or controversial changes in.

View File

@ -345,7 +345,7 @@ https://bugzilla.kernel.org 는 리눅스 커널 개발자들이 커널의 버
https://bugzilla.kernel.org/page.cgi?id=faq.html
메인 커널 소스 디렉토리에 있는 :ref:`admin-guide/reporting-bugs.rst <reportingbugs>`
메인 커널 소스 디렉토리에 있는 'Documentation/admin-guide/reporting-issues.rst'
파일은 커널 버그라고 생각되는 것을 보고하는 방법에 관한 좋은 템플릿이며 문제를
추적하기 위해서 커널 개발자들이 필요로 하는 정보가 무엇들인지를 상세히 설명하고
있다.
@ -583,7 +583,7 @@ Pat이라는 이름을 가진 여자가 있을 수도 있는 것이다. 리눅
"The Perfect Patch"
http://www.ozlabs.org/~akpm/stuff/tpp.txt
https://www.ozlabs.org/~akpm/stuff/tpp.txt
이 모든 것을 하는 것은 매우 어려운 일이다. 완벽히 소화하는 데는 적어도 몇년이

View File

@ -10,3 +10,18 @@
:maxdepth: 1
howto
리눅스 커널 메모리 배리어
-------------------------
.. raw:: latex
\footnotesize
.. include:: ./memory-barriers.txt
:literal:
.. raw:: latex
\normalsize

View File

@ -95,7 +95,7 @@ Linux通过``/proc/stat``和``/proc/uptime``导出各种信息,用户空间工
参考
---
- http://lkml.org/lkml/2007/2/12/6
- https://lore.kernel.org/r/loom.20070212T063225-663@post.gmane.org
- Documentation/filesystems/proc.rst (1.8)

View File

@ -124,7 +124,7 @@ bootloader 必须传递一个系统内存的位置和最小值,以及根文件
bootloader 必须以 64bit 地址对齐的形式加载一个设备树映像(dtb)到系统
RAM 中并用启动数据初始化它。dtb 格式在文档
Documentation/devicetree/booting-without-of.rst 中。内核将会在
https://www.devicetree.org/specifications/ 中。内核将会在
dtb 物理地址处查找 dtb 魔数值0xd00dfeed以确定 dtb 是否已经代替
标签列表被传递进来。

View File

@ -0,0 +1,46 @@
.. include:: ../disclaimer-zh_CN.rst
:Original: :doc:`../../../iio/ep93xx_adc`
:Translator: Yanteng Si <siyanteng@loongson.cn>
.. _cn_iio_ep93xx_adc:
==================================
思睿逻辑 EP93xx 模拟数字转换器驱动
==================================
1. 概述
=======
该驱动同时适用于具有5通道模拟数字转换器的低端 (EP9301, Ep9302) 设备和10通道
触摸屏/模拟数字转换器的高端设备(EP9307, EP9312, EP9315)。
2. 通道编号
===========
EP9301和EP9302数据表定义了通道0..4的编号方案。虽然EP9307, EP9312和EP9315多
了3个通道一共8个但是编号并没有定义。所以说最后三个通道是随机编号的。
如果ep93xx_adc是IIO设备0您将在以下位置找到条目
/sys/bus/iio/devices/iio:device0/:
+-----------------+---------------+
| sysfs 入口 | ball/pin 名称 |
+=================+===============+
| in_voltage0_raw | YM |
+-----------------+---------------+
| in_voltage1_raw | SXP |
+-----------------+---------------+
| in_voltage2_raw | SXM |
+-----------------+---------------+
| in_voltage3_raw | SYP |
+-----------------+---------------+
| in_voltage4_raw | SYM |
+-----------------+---------------+
| in_voltage5_raw | XP |
+-----------------+---------------+
| in_voltage6_raw | XM |
+-----------------+---------------+
| in_voltage7_raw | YP |
+-----------------+---------------+

View File

@ -0,0 +1,102 @@
.. include:: ../disclaimer-zh_CN.rst
:Original: :doc:`../../../iio/iio_configfs`
:Translator: Yanteng Si <siyanteng@loongson.cn>
.. _cn_iio_configfs:
=====================
工业 IIO configfs支持
=====================
1. 概述
=======
Configfs是一种内核对象的基于文件系统的管理系统IIO使用一些可以通过
configfs轻松配置的对象例如设备触发器
关于configfs是如何运行的请查阅Documentation/filesystems/configfs.rst
了解更多信息。
2. 用法
=======
为了使configfs支持IIO我们需要在编译时选中config的CONFIG_IIO_CONFIGFS
选项。
然后挂载configfs文件系统(通常在 /config directory目录下
$ mkdir/config
$ mount -t configfs none/config
此时将创建所有默认IIO组并可以在/ config / iio下对其进行访问。 下一章
将介绍可用的IIO配置对象。
3. 软件触发器
=============
IIO默认configfs组之一是“触发器”组。 挂载configfs后可以自动访问它并且可
以在/config/iio/triggers下找到。
IIO软件触发器为创建多种触发器类型提供了支持。 通常在include/linux/iio
/sw_trigger.h中的接口下将新的触发器类型实现为单独的内核模块
::
/*
* drivers/iio/trigger/iio-trig-sample.c
* 一种新触发器类型的内核模块实例
*/
#include <linux/iio/sw_trigger.h>
static struct iio_sw_trigger *iio_trig_sample_probe(const char *name)
{
/*
* 这将分配并注册一个IIO触发器以及其他触发器类型特性的初始化。
*/
}
static int iio_trig_sample_remove(struct iio_sw_trigger *swt)
{
/*
* 这会废弃iio_trig_sample_probe中的操作
*/
}
static const struct iio_sw_trigger_ops iio_trig_sample_ops = {
.probe = iio_trig_sample_probe,
.remove = iio_trig_sample_remove,
};
static struct iio_sw_trigger_type iio_trig_sample = {
.name = "trig-sample",
.owner = THIS_MODULE,
.ops = &iio_trig_sample_ops,
};
module_iio_sw_trigger_driver(iio_trig_sample);
每种触发器类型在/config/iio/triggers下都有其自己的目录。 加载iio-trig-sample
模块将创建“ trig-sample”触发器类型目录/config/iio/triggers/trig-sample.
我们支持以下中断源(触发器类型)
* hrtimer,使用高分辨率定时器作为中断源
3.1 Hrtimer触发器创建与销毁
---------------------------
加载iio-trig-hrtimer模块将注册hrtimer触发器类型从而允许用户在
/config/iio/triggers/hrtimer下创建hrtimer触发器。
例如::
$ mkdir /config/iio/triggers/hrtimer/instance1
$ rmdir /config/iio/triggers/hrtimer/instance1
每个触发器可以具有一个或多个独特的触发器类型的属性。
3.2 "hrtimer" 触发器类型属性
----------------------------
"hrtimer”触发器类型没有来自/config dir的任何可配置属性。

View File

@ -0,0 +1,20 @@
.. SPDX-License-Identifier: GPL-2.0
.. include:: ../disclaimer-zh_CN.rst
:Original: :doc:`../../../iio/index`
:Translator: Yanteng Si <siyanteng@loongson.cn>
.. _cn_iio_index:
========
工业 I/O
========
.. toctree::
:maxdepth: 1
iio_configfs
ep93xx_adc

View File

@ -0,0 +1,31 @@
.. SPDX-License-Identifier: GPL-2.0
.. include:: ../disclaimer-zh_CN.rst
:Original: :doc:`../../../mips/booting`
:Translator: Yanteng Si <siyanteng@loongson.cn>
.. _cn_booting:
BMIPS设备树引导
------------------------
一些bootloaders只支持在内核镜像开始地址处的单一入口点。而其它
bootloaders将跳转到ELF的开始地址处。两种方案都支持的因为
CONFIG_BOOT_RAW=y and CONFIG_NO_EXCEPT_FILL=y, 所以第一条指令
会立即跳转到kernel_entry()入口处执行。
与arch/arm情况(b)类似dt感知的引导加载程序需要设置以下寄存器:
a0 : 0
a1 : 0xffffffff
a2 : RAM中指向设备树块的物理指针(在chapterII中定义)。
设备树可以位于前512MB物理地址空间(0x00000000 -
0x1fffffff)的任何位置以64位边界对齐。
传统bootloaders不会使用这样的约定并且它们不传入DT块。
在这种情况下Linux将通过选中CONFIG_DT_*查找DTB。
以上约定只在32位系统中定义因为目前没有任何64位的BMIPS实现。

View File

@ -0,0 +1,10 @@
.. SPDX-License-Identifier: GPL-2.0
.. include:: ../disclaimer-zh_CN.rst
:Original: :doc:`../../../mips/features`
:Translator: Yanteng Si <siyanteng@loongson.cn>
.. _cn_features:
.. kernel-feat:: $srctree/Documentation/features mips

View File

@ -0,0 +1,26 @@
.. SPDX-License-Identifier: GPL-2.0
.. include:: ../disclaimer-zh_CN.rst
:Original: :doc:`../../../mips/index`
:Translator: Yanteng Si <siyanteng@loongson.cn>
===========================
MIPS特性文档
===========================
.. toctree::
:maxdepth: 2
:numbered:
booting
ingenic-tcu
features
.. only:: subproject and html
Indices
=======
* :ref:`genindex`

View File

@ -0,0 +1,69 @@
.. SPDX-License-Identifier: GPL-2.0
.. include:: ../disclaimer-zh_CN.rst
:Original: :doc:`../../../mips/ingenic-tcu`
:Translator: Yanteng Si <siyanteng@loongson.cn>
.. _cn_ingenic-tcu:
===============================================
君正 JZ47xx SoC定时器/计数器硬件单元
===============================================
君正 JZ47xx SoC中的定时器/计数器单元(TCU)是一个多功能硬件块。它有多达
8个通道可以用作计数器计时器或脉冲宽度调制器。
- JZ4725B, JZ4750, JZ4755 只有个TCU通道。其它SoC都有个通道。
- JZ4725B引入了一个独立的通道称为操作系统计时器(OST)。这是一个32位可
编程定时器。在JZ4760B及以上型号上它是64位的。
- 每个TCU通道都有自己的时钟源可以通过 TCSR 寄存器设置通道的父级时钟
pclk、ext、rtc、开关以及分频。
- 看门狗和OST硬件模块在它们的寄存器空间中也有相同形式的TCSR寄存器。
- 用于关闭/开启的 TCU 寄存器也可以关闭/开启看门狗和 OST 时钟。
- 每个TCU通道在两种模式的其中一种模式下运行
- 模式 TCU1通道无法在睡眠模式下运行但更易于操作。
- 模式 TCU2通道可以在睡眠模式下运行但操作比 TCU1 通道复杂一些。
- 每个 TCU 通道的模式取决于使用的SoC
- 在最老的SoC高于JZ4740八个通道都运行在TCU1模式。
- 在 JZ4725B通道5运行在TCU2,其它通道则运行在TCU1。
- 在最新的SoCJZ4750及之后通道1-2运行在TCU2其它通道则运行
在TCU1。
- 每个通道都可以生成中断。有些通道共享一条中断线而有些没有其在SoC型
号之间的变更:
- 在很老的SoCJZ4740及更低通道0和通道1有它们自己的中断线
道2-7共享最后一条中断线。
- 在 JZ4725B通道0有它自己的中断线通道1-5共享一条中断线OST
使用最后一条中断线。
- 在比较新的SoCJZ4750及以后通道5有它自己的中断线
道0-4和如果是8通道6-7全部共享一条中断线OST使用最后一条中
断线。
实现
====
TCU硬件的功能分布在多个驱动程序
============== ===================================
时钟 drivers/clk/ingenic/tcu.c
中断 drivers/irqchip/irq-ingenic-tcu.c
定时器 drivers/clocksource/ingenic-timer.c
OST drivers/clocksource/ingenic-ost.c
脉冲宽度调制器 drivers/pwm/pwm-jz4740.c
看门狗 drivers/watchdog/jz4740_wdt.c
============== ===================================
因为可以从相同的寄存器控制属于不同驱动程序和框架的TCU的各种功能所以
所有这些驱动程序都通过相同的控制总线通用接口访问它们的寄存器。
有关TCU驱动程序的设备树绑定的更多信息请参阅:
Documentation/devicetree/bindings/timer/ingenic,tcu.yaml.

View File

@ -668,13 +668,13 @@ Greg Kroah-Hartman, "How to piss off a kernel subsystem maintainer".
<http://www.kroah.com/log/linux/maintainer-06.html>
NO!!!! No more huge patch bombs to linux-kernel@vger.kernel.org people!
<https://lkml.org/lkml/2005/7/11/336>
<https://lore.kernel.org/r/20050711.125305.08322243.davem@davemloft.net>
Kernel Documentation/process/coding-style.rst:
:ref:`Documentation/translations/zh_CN/process/coding-style.rst <cn_codingstyle>`
Linus Torvalds's mail on the canonical patch format:
<http://lkml.org/lkml/2005/4/7/183>
<https://lore.kernel.org/r/Pine.LNX.4.58.0504071023190.28951@ppc970.osdl.org>
Andi Kleen, "On submitting kernel patches"
Some strategies to get difficult or controversial changes in.

View File

@ -32,7 +32,7 @@ There are helpers to lock/unlock a table and other accessor functions:
Split page table lock for PTE tables is enabled compile-time if
CONFIG_SPLIT_PTLOCK_CPUS (usually 4) is less or equal to NR_CPUS.
If split lock is disabled, all tables guaded by mm->page_table_lock.
If split lock is disabled, all tables are guarded by mm->page_table_lock.
Split page table lock for PMD tables is enabled, if it's enabled for PTE
tables and the architecture supports it (see below).

View File

@ -851,7 +851,7 @@ Protocol: 2.09+
struct setup_data {
__u64 next = 0 or <addr_of_next_setup_data_struct>;
__u32 type = SETUP_INDIRECT;
__u32 len = sizeof(setup_data);
__u32 len = sizeof(setup_indirect);
__u8 data[sizeof(setup_indirect)] = struct setup_indirect {
__u32 type = SETUP_INDIRECT | SETUP_E820_EXT;
__u32 reserved = 0;

View File

@ -11182,6 +11182,15 @@ S: Maintained
F: Documentation/devicetree/bindings/i2c/i2c-mt65xx.txt
F: drivers/i2c/busses/i2c-mt65xx.c
MEDIATEK IOMMU DRIVER
M: Yong Wu <yong.wu@mediatek.com>
L: iommu@lists.linux-foundation.org
L: linux-mediatek@lists.infradead.org (moderated for non-subscribers)
S: Supported
F: Documentation/devicetree/bindings/iommu/mediatek*
F: drivers/iommu/mtk_iommu*
F: include/dt-bindings/memory/mt*-port.h
MEDIATEK JPEG DRIVER
M: Rick Chang <rick.chang@mediatek.com>
M: Bin Liu <bin.liu@mediatek.com>

View File

@ -124,8 +124,8 @@ config HAVE_64BIT_ALIGNED_ACCESS
accesses are required to be 64 bit aligned in this way even
though it is not a 64 bit architecture.
See Documentation/unaligned-memory-access.txt for more
information on the topic of unaligned memory accesses.
See Documentation/core-api/unaligned-memory-access.rst for
more information on the topic of unaligned memory accesses.
config HAVE_EFFICIENT_UNALIGNED_ACCESS
bool

View File

@ -41,7 +41,7 @@
#include "blk-mq-sched.h"
#include "blk-rq-qos.h"
static DEFINE_PER_CPU(struct list_head, blk_cpu_done);
static DEFINE_PER_CPU(struct llist_head, blk_cpu_done);
static void blk_mq_poll_stats_start(struct request_queue *q);
static void blk_mq_poll_stats_fn(struct blk_stat_callback *cb);
@ -567,80 +567,29 @@ void blk_mq_end_request(struct request *rq, blk_status_t error)
}
EXPORT_SYMBOL(blk_mq_end_request);
/*
* Softirq action handler - move entries to local list and loop over them
* while passing them to the queue registered handler.
*/
static __latent_entropy void blk_done_softirq(struct softirq_action *h)
static void blk_complete_reqs(struct llist_head *list)
{
struct list_head *cpu_list, local_list;
struct llist_node *entry = llist_reverse_order(llist_del_all(list));
struct request *rq, *next;
local_irq_disable();
cpu_list = this_cpu_ptr(&blk_cpu_done);
list_replace_init(cpu_list, &local_list);
local_irq_enable();
while (!list_empty(&local_list)) {
struct request *rq;
rq = list_entry(local_list.next, struct request, ipi_list);
list_del_init(&rq->ipi_list);
llist_for_each_entry_safe(rq, next, entry, ipi_list)
rq->q->mq_ops->complete(rq);
}
}
static void blk_mq_trigger_softirq(struct request *rq)
static __latent_entropy void blk_done_softirq(struct softirq_action *h)
{
struct list_head *list;
unsigned long flags;
local_irq_save(flags);
list = this_cpu_ptr(&blk_cpu_done);
list_add_tail(&rq->ipi_list, list);
/*
* If the list only contains our just added request, signal a raise of
* the softirq. If there are already entries there, someone already
* raised the irq but it hasn't run yet.
*/
if (list->next == &rq->ipi_list)
raise_softirq_irqoff(BLOCK_SOFTIRQ);
local_irq_restore(flags);
blk_complete_reqs(this_cpu_ptr(&blk_cpu_done));
}
static int blk_softirq_cpu_dead(unsigned int cpu)
{
/*
* If a CPU goes away, splice its entries to the current CPU
* and trigger a run of the softirq
*/
local_irq_disable();
list_splice_init(&per_cpu(blk_cpu_done, cpu),
this_cpu_ptr(&blk_cpu_done));
raise_softirq_irqoff(BLOCK_SOFTIRQ);
local_irq_enable();
blk_complete_reqs(&per_cpu(blk_cpu_done, cpu));
return 0;
}
static void __blk_mq_complete_request_remote(void *data)
{
struct request *rq = data;
/*
* For most of single queue controllers, there is only one irq vector
* for handling I/O completion, and the only irq's affinity is set
* to all possible CPUs. On most of ARCHs, this affinity means the irq
* is handled on one specific CPU.
*
* So complete I/O requests in softirq context in case of single queue
* devices to avoid degrading I/O performance due to irqsoff latency.
*/
if (rq->q->nr_hw_queues == 1)
blk_mq_trigger_softirq(rq);
else
rq->q->mq_ops->complete(rq);
__raise_softirq_irqoff(BLOCK_SOFTIRQ);
}
static inline bool blk_mq_complete_need_ipi(struct request *rq)
@ -669,6 +618,30 @@ static inline bool blk_mq_complete_need_ipi(struct request *rq)
return cpu_online(rq->mq_ctx->cpu);
}
static void blk_mq_complete_send_ipi(struct request *rq)
{
struct llist_head *list;
unsigned int cpu;
cpu = rq->mq_ctx->cpu;
list = &per_cpu(blk_cpu_done, cpu);
if (llist_add(&rq->ipi_list, list)) {
INIT_CSD(&rq->csd, __blk_mq_complete_request_remote, rq);
smp_call_function_single_async(cpu, &rq->csd);
}
}
static void blk_mq_raise_softirq(struct request *rq)
{
struct llist_head *list;
preempt_disable();
list = this_cpu_ptr(&blk_cpu_done);
if (llist_add(&rq->ipi_list, list))
raise_softirq(BLOCK_SOFTIRQ);
preempt_enable();
}
bool blk_mq_complete_request_remote(struct request *rq)
{
WRITE_ONCE(rq->state, MQ_RQ_COMPLETE);
@ -681,15 +654,15 @@ bool blk_mq_complete_request_remote(struct request *rq)
return false;
if (blk_mq_complete_need_ipi(rq)) {
INIT_CSD(&rq->csd, __blk_mq_complete_request_remote, rq);
smp_call_function_single_async(rq->mq_ctx->cpu, &rq->csd);
} else {
if (rq->q->nr_hw_queues > 1)
return false;
blk_mq_trigger_softirq(rq);
blk_mq_complete_send_ipi(rq);
return true;
}
return true;
if (rq->q->nr_hw_queues == 1) {
blk_mq_raise_softirq(rq);
return true;
}
return false;
}
EXPORT_SYMBOL_GPL(blk_mq_complete_request_remote);
@ -3957,7 +3930,7 @@ static int __init blk_mq_init(void)
int i;
for_each_possible_cpu(i)
INIT_LIST_HEAD(&per_cpu(blk_cpu_done, i));
init_llist_head(&per_cpu(blk_cpu_done, i));
open_softirq(BLOCK_SOFTIRQ, blk_done_softirq);
cpuhp_setup_state_nocalls(CPUHP_BLOCK_SOFTIRQ_DEAD,

View File

@ -41,6 +41,7 @@ config INFINIBAND_USER_MEM
bool
depends on INFINIBAND_USER_ACCESS != n
depends on MMU
select DMA_SHARED_BUFFER
default y
config INFINIBAND_ON_DEMAND_PAGING

View File

@ -40,5 +40,5 @@ ib_uverbs-y := uverbs_main.o uverbs_cmd.o uverbs_marshall.o \
uverbs_std_types_srq.o \
uverbs_std_types_wq.o \
uverbs_std_types_qp.o
ib_uverbs-$(CONFIG_INFINIBAND_USER_MEM) += umem.o
ib_uverbs-$(CONFIG_INFINIBAND_USER_MEM) += umem.o umem_dmabuf.o
ib_uverbs-$(CONFIG_INFINIBAND_ON_DEMAND_PAGING) += umem_odp.o

View File

@ -669,11 +669,10 @@ int ib_cache_gid_del_all_netdev_gids(struct ib_device *ib_dev, u8 port,
* rdma_find_gid_by_port - Returns the GID entry attributes when it finds
* a valid GID entry for given search parameters. It searches for the specified
* GID value in the local software cache.
* @device: The device to query.
* @ib_dev: The device to query.
* @gid: The GID value to search for.
* @gid_type: The GID type to search for.
* @port_num: The port number of the device where the GID value should be
* searched.
* @port: The port number of the device where the GID value should be searched.
* @ndev: In RoCE, the net device of the device. NULL means ignore.
*
* Returns sgid attributes if the GID is found with valid reference or
@ -719,7 +718,7 @@ EXPORT_SYMBOL(rdma_find_gid_by_port);
/**
* rdma_find_gid_by_filter - Returns the GID table attribute where a
* specified GID value occurs
* @device: The device to query.
* @ib_dev: The device to query.
* @gid: The GID value to search for.
* @port: The port number of the device where the GID value could be
* searched.
@ -728,6 +727,7 @@ EXPORT_SYMBOL(rdma_find_gid_by_port);
* otherwise, we continue searching the GID table. It's guaranteed that
* while filter is executed, ndev field is valid and the structure won't
* change. filter is executed in an atomic context. filter must not be NULL.
* @context: Private data to pass into the call-back.
*
* rdma_find_gid_by_filter() searches for the specified GID value
* of which the filter function returns true in the port's GID table.
@ -1253,7 +1253,6 @@ EXPORT_SYMBOL(rdma_get_gid_attr);
* @entries: Entries where GID entries are returned.
* @max_entries: Maximum number of entries that can be returned.
* Entries array must be allocated to hold max_entries number of entries.
* @num_entries: Updated to the number of entries that were successfully read.
*
* Returns number of entries on success or appropriate error code.
*/

View File

@ -4333,7 +4333,7 @@ static int cm_add_one(struct ib_device *ib_device)
unsigned long flags;
int ret;
int count = 0;
u8 i;
unsigned int i;
cm_dev = kzalloc(struct_size(cm_dev, port, ib_device->phys_port_cnt),
GFP_KERNEL);
@ -4345,7 +4345,7 @@ static int cm_add_one(struct ib_device *ib_device)
cm_dev->going_down = 0;
set_bit(IB_MGMT_METHOD_SEND, reg_req.method_mask);
for (i = 1; i <= ib_device->phys_port_cnt; i++) {
rdma_for_each_port (ib_device, i) {
if (!rdma_cap_ib_cm(ib_device, i))
continue;
@ -4431,7 +4431,7 @@ static void cm_remove_one(struct ib_device *ib_device, void *client_data)
.clr_port_cap_mask = IB_PORT_CM_SUP
};
unsigned long flags;
int i;
unsigned int i;
write_lock_irqsave(&cm.device_lock, flags);
list_del(&cm_dev->list);
@ -4441,7 +4441,7 @@ static void cm_remove_one(struct ib_device *ib_device, void *client_data)
cm_dev->going_down = 1;
spin_unlock_irq(&cm.lock);
for (i = 1; i <= ib_device->phys_port_cnt; i++) {
rdma_for_each_port (ib_device, i) {
if (!rdma_cap_ib_cm(ib_device, i))
continue;

View File

@ -352,7 +352,13 @@ struct ib_device *cma_get_ib_dev(struct cma_device *cma_dev)
struct cma_multicast {
struct rdma_id_private *id_priv;
struct ib_sa_multicast *sa_mc;
union {
struct ib_sa_multicast *sa_mc;
struct {
struct work_struct work;
struct rdma_cm_event event;
} iboe_join;
};
struct list_head list;
void *context;
struct sockaddr_storage addr;
@ -1823,6 +1829,8 @@ static void destroy_mc(struct rdma_id_private *id_priv,
cma_igmp_send(ndev, &mgid, false);
dev_put(ndev);
}
cancel_work_sync(&mc->iboe_join.work);
}
kfree(mc);
}
@ -2683,6 +2691,28 @@ static int cma_query_ib_route(struct rdma_id_private *id_priv,
return (id_priv->query_id < 0) ? id_priv->query_id : 0;
}
static void cma_iboe_join_work_handler(struct work_struct *work)
{
struct cma_multicast *mc =
container_of(work, struct cma_multicast, iboe_join.work);
struct rdma_cm_event *event = &mc->iboe_join.event;
struct rdma_id_private *id_priv = mc->id_priv;
int ret;
mutex_lock(&id_priv->handler_mutex);
if (READ_ONCE(id_priv->state) == RDMA_CM_DESTROYING ||
READ_ONCE(id_priv->state) == RDMA_CM_DEVICE_REMOVAL)
goto out_unlock;
ret = cma_cm_event_handler(id_priv, event);
WARN_ON(ret);
out_unlock:
mutex_unlock(&id_priv->handler_mutex);
if (event->event == RDMA_CM_EVENT_MULTICAST_JOIN)
rdma_destroy_ah_attr(&event->param.ud.ah_attr);
}
static void cma_work_handler(struct work_struct *_work)
{
struct cma_work *work = container_of(_work, struct cma_work, work);
@ -4478,10 +4508,7 @@ static int cma_ib_mc_handler(int status, struct ib_sa_multicast *multicast)
cma_make_mc_event(status, id_priv, multicast, &event, mc);
ret = cma_cm_event_handler(id_priv, &event);
rdma_destroy_ah_attr(&event.param.ud.ah_attr);
if (ret) {
destroy_id_handler_unlock(id_priv);
return 0;
}
WARN_ON(ret);
out:
mutex_unlock(&id_priv->handler_mutex);
@ -4542,17 +4569,6 @@ static int cma_join_ib_multicast(struct rdma_id_private *id_priv,
rec.pkey = cpu_to_be16(ib_addr_get_pkey(dev_addr));
rec.join_state = mc->join_state;
if ((rec.join_state == BIT(SENDONLY_FULLMEMBER_JOIN)) &&
(!ib_sa_sendonly_fullmem_support(&sa_client,
id_priv->id.device,
id_priv->id.port_num))) {
dev_warn(
&id_priv->id.device->dev,
"RDMA CM: port %u Unable to multicast join: SM doesn't support Send Only Full Member option\n",
id_priv->id.port_num);
return -EOPNOTSUPP;
}
comp_mask = IB_SA_MCMEMBER_REC_MGID | IB_SA_MCMEMBER_REC_PORT_GID |
IB_SA_MCMEMBER_REC_PKEY | IB_SA_MCMEMBER_REC_JOIN_STATE |
IB_SA_MCMEMBER_REC_QKEY | IB_SA_MCMEMBER_REC_SL |
@ -4604,7 +4620,6 @@ static void cma_iboe_set_mgid(struct sockaddr *addr, union ib_gid *mgid,
static int cma_iboe_join_multicast(struct rdma_id_private *id_priv,
struct cma_multicast *mc)
{
struct cma_work *work;
struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr;
int err = 0;
struct sockaddr *addr = (struct sockaddr *)&mc->addr;
@ -4618,10 +4633,6 @@ static int cma_iboe_join_multicast(struct rdma_id_private *id_priv,
if (cma_zero_addr(addr))
return -EINVAL;
work = kzalloc(sizeof *work, GFP_KERNEL);
if (!work)
return -ENOMEM;
gid_type = id_priv->cma_dev->default_gid_type[id_priv->id.port_num -
rdma_start_port(id_priv->cma_dev->device)];
cma_iboe_set_mgid(addr, &ib.rec.mgid, gid_type);
@ -4632,10 +4643,9 @@ static int cma_iboe_join_multicast(struct rdma_id_private *id_priv,
if (dev_addr->bound_dev_if)
ndev = dev_get_by_index(dev_addr->net, dev_addr->bound_dev_if);
if (!ndev) {
err = -ENODEV;
goto err_free;
}
if (!ndev)
return -ENODEV;
ib.rec.rate = iboe_get_rate(ndev);
ib.rec.hop_limit = 1;
ib.rec.mtu = iboe_get_mtu(ndev->mtu);
@ -4653,24 +4663,15 @@ static int cma_iboe_join_multicast(struct rdma_id_private *id_priv,
err = -ENOTSUPP;
}
dev_put(ndev);
if (err || !ib.rec.mtu) {
if (!err)
err = -EINVAL;
goto err_free;
}
if (err || !ib.rec.mtu)
return err ?: -EINVAL;
rdma_ip2gid((struct sockaddr *)&id_priv->id.route.addr.src_addr,
&ib.rec.port_gid);
work->id = id_priv;
INIT_WORK(&work->work, cma_work_handler);
cma_make_mc_event(0, id_priv, &ib, &work->event, mc);
/* Balances with cma_id_put() in cma_work_handler */
cma_id_get(id_priv);
queue_work(cma_wq, &work->work);
INIT_WORK(&mc->iboe_join.work, cma_iboe_join_work_handler);
cma_make_mc_event(0, id_priv, &ib, &mc->iboe_join.event, mc);
queue_work(cma_wq, &mc->iboe_join.work);
return 0;
err_free:
kfree(work);
return err;
}
int rdma_join_multicast(struct rdma_cm_id *id, struct sockaddr *addr,

View File

@ -204,7 +204,6 @@ static int make_cma_ports(struct cma_dev_group *cma_dev_group,
unsigned int i;
unsigned int ports_num;
struct cma_dev_port_group *ports;
int err;
ibdev = cma_get_ib_dev(cma_dev);
@ -215,10 +214,8 @@ static int make_cma_ports(struct cma_dev_group *cma_dev_group,
ports = kcalloc(ports_num, sizeof(*cma_dev_group->ports),
GFP_KERNEL);
if (!ports) {
err = -ENOMEM;
goto free;
}
if (!ports)
return -ENOMEM;
for (i = 0; i < ports_num; i++) {
char port_str[10];
@ -234,12 +231,7 @@ static int make_cma_ports(struct cma_dev_group *cma_dev_group,
}
cma_dev_group->ports = ports;
return 0;
free:
kfree(ports);
cma_dev_group->ports = NULL;
return err;
}
static void release_cma_dev(struct config_item *item)

View File

@ -10,30 +10,35 @@
#define ALL_AUTO_MODE_MASKS (RDMA_COUNTER_MASK_QP_TYPE | RDMA_COUNTER_MASK_PID)
static int __counter_set_mode(struct rdma_counter_mode *curr,
static int __counter_set_mode(struct rdma_port_counter *port_counter,
enum rdma_nl_counter_mode new_mode,
enum rdma_nl_counter_mask new_mask)
{
if ((new_mode == RDMA_COUNTER_MODE_AUTO) &&
((new_mask & (~ALL_AUTO_MODE_MASKS)) ||
(curr->mode != RDMA_COUNTER_MODE_NONE)))
return -EINVAL;
if (new_mode == RDMA_COUNTER_MODE_AUTO && port_counter->num_counters)
if (new_mask & ~ALL_AUTO_MODE_MASKS ||
port_counter->mode.mode != RDMA_COUNTER_MODE_NONE)
return -EINVAL;
curr->mode = new_mode;
curr->mask = new_mask;
port_counter->mode.mode = new_mode;
port_counter->mode.mask = new_mask;
return 0;
}
/**
/*
* rdma_counter_set_auto_mode() - Turn on/off per-port auto mode
*
* When @on is true, the @mask must be set; When @on is false, it goes
* into manual mode if there's any counter, so that the user is able to
* manually access them.
* @dev: Device to operate
* @port: Port to use
* @mask: Mask to configure
* @extack: Message to the user
*
* Return 0 on success.
*/
int rdma_counter_set_auto_mode(struct ib_device *dev, u8 port,
bool on, enum rdma_nl_counter_mask mask)
enum rdma_nl_counter_mask mask,
struct netlink_ext_ack *extack)
{
enum rdma_nl_counter_mode mode = RDMA_COUNTER_MODE_AUTO;
struct rdma_port_counter *port_counter;
int ret;
@ -42,23 +47,23 @@ int rdma_counter_set_auto_mode(struct ib_device *dev, u8 port,
return -EOPNOTSUPP;
mutex_lock(&port_counter->lock);
if (on) {
ret = __counter_set_mode(&port_counter->mode,
RDMA_COUNTER_MODE_AUTO, mask);
} else {
if (port_counter->mode.mode != RDMA_COUNTER_MODE_AUTO) {
ret = -EINVAL;
goto out;
}
if (port_counter->num_counters)
ret = __counter_set_mode(&port_counter->mode,
RDMA_COUNTER_MODE_MANUAL, 0);
else
ret = __counter_set_mode(&port_counter->mode,
RDMA_COUNTER_MODE_NONE, 0);
if (mask) {
ret = __counter_set_mode(port_counter, mode, mask);
if (ret)
NL_SET_ERR_MSG(
extack,
"Turning on auto mode is not allowed when there is bound QP");
goto out;
}
if (port_counter->mode.mode != RDMA_COUNTER_MODE_AUTO) {
ret = -EINVAL;
goto out;
}
mode = (port_counter->num_counters) ? RDMA_COUNTER_MODE_MANUAL :
RDMA_COUNTER_MODE_NONE;
ret = __counter_set_mode(port_counter, mode, 0);
out:
mutex_unlock(&port_counter->lock);
return ret;
@ -122,8 +127,8 @@ static struct rdma_counter *alloc_and_bind(struct ib_device *dev, u8 port,
mutex_lock(&port_counter->lock);
switch (mode) {
case RDMA_COUNTER_MODE_MANUAL:
ret = __counter_set_mode(&port_counter->mode,
RDMA_COUNTER_MODE_MANUAL, 0);
ret = __counter_set_mode(port_counter, RDMA_COUNTER_MODE_MANUAL,
0);
if (ret) {
mutex_unlock(&port_counter->lock);
goto err_mode;
@ -170,8 +175,7 @@ static void rdma_counter_free(struct rdma_counter *counter)
port_counter->num_counters--;
if (!port_counter->num_counters &&
(port_counter->mode.mode == RDMA_COUNTER_MODE_MANUAL))
__counter_set_mode(&port_counter->mode, RDMA_COUNTER_MODE_NONE,
0);
__counter_set_mode(port_counter, RDMA_COUNTER_MODE_NONE, 0);
mutex_unlock(&port_counter->lock);
@ -227,7 +231,7 @@ static void counter_history_stat_update(struct rdma_counter *counter)
port_counter->hstats->value[i] += counter->stats->value[i];
}
/**
/*
* rdma_get_counter_auto_mode - Find the counter that @qp should be bound
* with in auto mode
*
@ -274,7 +278,7 @@ static void counter_release(struct kref *kref)
rdma_counter_free(counter);
}
/**
/*
* rdma_counter_bind_qp_auto - Check and bind the QP to a counter base on
* the auto-mode rule
*/
@ -311,7 +315,7 @@ int rdma_counter_bind_qp_auto(struct ib_qp *qp, u8 port)
return 0;
}
/**
/*
* rdma_counter_unbind_qp - Unbind a qp from a counter
* @force:
* true - Decrease the counter ref-count anyway (e.g., qp destroy)
@ -380,7 +384,7 @@ static u64 get_running_counters_hwstat_sum(struct ib_device *dev,
return sum;
}
/**
/*
* rdma_counter_get_hwstat_value() - Get the sum value of all counters on a
* specific port, including the running ones and history data
*/
@ -436,7 +440,7 @@ static struct rdma_counter *rdma_get_counter_by_id(struct ib_device *dev,
return counter;
}
/**
/*
* rdma_counter_bind_qpn() - Bind QP @qp_num to counter @counter_id
*/
int rdma_counter_bind_qpn(struct ib_device *dev, u8 port,
@ -485,7 +489,7 @@ int rdma_counter_bind_qpn(struct ib_device *dev, u8 port,
return ret;
}
/**
/*
* rdma_counter_bind_qpn_alloc() - Alloc a counter and bind QP @qp_num to it
* The id of new counter is returned in @counter_id
*/
@ -533,7 +537,7 @@ int rdma_counter_bind_qpn_alloc(struct ib_device *dev, u8 port,
return ret;
}
/**
/*
* rdma_counter_unbind_qpn() - Unbind QP @qp_num from a counter
*/
int rdma_counter_unbind_qpn(struct ib_device *dev, u8 port,

View File

@ -848,6 +848,20 @@ static int setup_port_data(struct ib_device *device)
return 0;
}
/**
* ib_port_immutable_read() - Read rdma port's immutable data
* @dev: IB device
* @port: port number whose immutable data to read. It starts with index 1 and
* valid upto including rdma_end_port().
*/
const struct ib_port_immutable*
ib_port_immutable_read(struct ib_device *dev, unsigned int port)
{
WARN_ON(!rdma_is_port_valid(dev, port));
return &dev->port_data[port].immutable;
}
EXPORT_SYMBOL(ib_port_immutable_read);
void ib_get_device_fw_str(struct ib_device *dev, char *str)
{
if (dev->ops.get_dev_fw_str)
@ -1887,9 +1901,9 @@ static int __ib_get_client_nl_info(struct ib_device *ibdev,
/**
* ib_get_client_nl_info - Fetch the nl_info from a client
* @device - IB device
* @client_name - Name of the client
* @res - Result of the query
* @ibdev: IB device
* @client_name: Name of the client
* @res: Result of the query
*/
int ib_get_client_nl_info(struct ib_device *ibdev, const char *client_name,
struct ib_client_nl_info *res)
@ -2317,7 +2331,7 @@ void ib_enum_all_roce_netdevs(roce_netdev_filter filter,
up_read(&devices_rwsem);
}
/**
/*
* ib_enum_all_devs - enumerate all ib_devices
* @cb: Callback to call for each found ib_device
*
@ -2681,6 +2695,7 @@ void ib_set_device_ops(struct ib_device *dev, const struct ib_device_ops *ops)
SET_DEVICE_OP(dev_ops, read_counters);
SET_DEVICE_OP(dev_ops, reg_dm_mr);
SET_DEVICE_OP(dev_ops, reg_user_mr);
SET_DEVICE_OP(dev_ops, reg_user_mr_dmabuf);
SET_DEVICE_OP(dev_ops, req_ncomp_notif);
SET_DEVICE_OP(dev_ops, req_notify_cq);
SET_DEVICE_OP(dev_ops, rereg_user_mr);

View File

@ -392,7 +392,7 @@ static const struct nla_policy resp_reg_policy[IWPM_NLA_RREG_PID_MAX] = {
/**
* iwpm_register_pid_cb - Process the port mapper response to
* iwpm_register_pid query
* @skb:
* @skb: The socket buffer
* @cb: Contains the received message (payload and netlink header)
*
* If successful, the function receives the userspace port mapper pid
@ -468,7 +468,7 @@ static const struct nla_policy resp_add_policy[IWPM_NLA_RMANAGE_MAPPING_MAX] = {
/**
* iwpm_add_mapping_cb - Process the port mapper response to
* iwpm_add_mapping request
* @skb:
* @skb: The socket buffer
* @cb: Contains the received message (payload and netlink header)
*/
int iwpm_add_mapping_cb(struct sk_buff *skb, struct netlink_callback *cb)
@ -545,7 +545,7 @@ static const struct nla_policy resp_query_policy[IWPM_NLA_RQUERY_MAPPING_MAX] =
/**
* iwpm_add_and_query_mapping_cb - Process the port mapper response to
* iwpm_add_and_query_mapping request
* @skb:
* @skb: The socket buffer
* @cb: Contains the received message (payload and netlink header)
*/
int iwpm_add_and_query_mapping_cb(struct sk_buff *skb,
@ -627,7 +627,7 @@ int iwpm_add_and_query_mapping_cb(struct sk_buff *skb,
/**
* iwpm_remote_info_cb - Process remote connecting peer address info, which
* the port mapper has received from the connecting peer
* @skb:
* @skb: The socket buffer
* @cb: Contains the received message (payload and netlink header)
*
* Stores the IPv4/IPv6 address info in a hash table
@ -706,7 +706,7 @@ static const struct nla_policy resp_mapinfo_policy[IWPM_NLA_MAPINFO_REQ_MAX] = {
/**
* iwpm_mapping_info_cb - Process a notification that the userspace
* port mapper daemon is started
* @skb:
* @skb: The socket buffer
* @cb: Contains the received message (payload and netlink header)
*
* Using the received port mapper pid, send all the local mapping
@ -766,7 +766,7 @@ static const struct nla_policy ack_mapinfo_policy[IWPM_NLA_MAPINFO_NUM_MAX] = {
/**
* iwpm_ack_mapping_info_cb - Process the port mapper ack for
* the provided local mapping info records
* @skb:
* @skb: The socket buffer
* @cb: Contains the received message (payload and netlink header)
*/
int iwpm_ack_mapping_info_cb(struct sk_buff *skb, struct netlink_callback *cb)
@ -796,7 +796,7 @@ static const struct nla_policy map_error_policy[IWPM_NLA_ERR_MAX] = {
/**
* iwpm_mapping_error_cb - Process port mapper notification for error
*
* @skb:
* @skb: The socket buffer
* @cb: Contains the received message (payload and netlink header)
*/
int iwpm_mapping_error_cb(struct sk_buff *skb, struct netlink_callback *cb)
@ -841,7 +841,7 @@ static const struct nla_policy hello_policy[IWPM_NLA_HELLO_MAX] = {
/**
* iwpm_hello_cb - Process a hello message from iwpmd
*
* @skb:
* @skb: The socket buffer
* @cb: Contains the received message (payload and netlink header)
*
* Using the received port mapper pid, send the kernel's abi_version

View File

@ -127,8 +127,8 @@ static struct hlist_head *get_mapinfo_hash_bucket(struct sockaddr_storage *,
/**
* iwpm_create_mapinfo - Store local and mapped IPv4/IPv6 address
* info in a hash table
* @local_addr: Local ip/tcp address
* @mapped_addr: Mapped local ip/tcp address
* @local_sockaddr: Local ip/tcp address
* @mapped_sockaddr: Mapped local ip/tcp address
* @nl_client: The index of the netlink client
* @map_flags: IWPM mapping flags
*/
@ -174,7 +174,7 @@ int iwpm_create_mapinfo(struct sockaddr_storage *local_sockaddr,
/**
* iwpm_remove_mapinfo - Remove local and mapped IPv4/IPv6 address
* info from the hash table
* @local_addr: Local ip/tcp address
* @local_sockaddr: Local ip/tcp address
* @mapped_local_addr: Mapped local ip/tcp address
*
* Returns err code if mapping info is not found in the hash table,

View File

@ -721,6 +721,7 @@ EXPORT_SYMBOL(ib_sa_get_mcmember_rec);
* member record and gid of the device.
* @device: RDMA device
* @port_num: Port of the rdma device to consider
* @rec: Multicast member record to use
* @ndev: Optional netdevice, applicable only for RoCE
* @gid_type: GID type to consider
* @ah_attr: AH attribute to fillup on successful completion

View File

@ -1768,9 +1768,7 @@ static int nldev_stat_set_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
if (tb[RDMA_NLDEV_ATTR_STAT_AUTO_MODE_MASK])
mask = nla_get_u32(
tb[RDMA_NLDEV_ATTR_STAT_AUTO_MODE_MASK]);
ret = rdma_counter_set_auto_mode(device, port,
mask ? true : false, mask);
ret = rdma_counter_set_auto_mode(device, port, mask, extack);
if (ret)
goto err_msg;
} else {

View File

@ -201,8 +201,8 @@ EXPORT_SYMBOL(rdma_restrack_parent_name);
/**
* rdma_restrack_new() - Initializes new restrack entry to allow _put() interface
* to release memory in fully automatic way.
* @res - Entry to initialize
* @type - REstrack type
* @res: Entry to initialize
* @type: REstrack type
*/
void rdma_restrack_new(struct rdma_restrack_entry *res,
enum rdma_restrack_type type)

View File

@ -505,7 +505,7 @@ static void enum_all_gids_of_dev_cb(struct ib_device *ib_dev,
* rdma_roce_rescan_device - Rescan all of the network devices in the system
* and add their gids, as needed, to the relevant RoCE devices.
*
* @device: the rdma device
* @ib_dev: the rdma device
*/
void rdma_roce_rescan_device(struct ib_device *ib_dev)
{

View File

@ -410,7 +410,7 @@ int rdma_rw_ctx_signature_init(struct rdma_rw_ctx *ctx, struct ib_qp *qp,
ctx->type = RDMA_RW_SIG_MR;
ctx->nr_ops = 1;
ctx->reg = kcalloc(1, sizeof(*ctx->reg), GFP_KERNEL);
ctx->reg = kzalloc(sizeof(*ctx->reg), GFP_KERNEL);
if (!ctx->reg) {
ret = -ENOMEM;
goto out_unmap_prot_sg;

View File

@ -1434,7 +1434,7 @@ enum opa_pr_supported {
PR_IB_SUPPORTED
};
/**
/*
* opa_pr_query_possible - Check if current PR query can be an OPA query.
*
* Retuns PR_NOT_SUPPORTED if a path record query is not
@ -1951,30 +1951,6 @@ int ib_sa_guid_info_rec_query(struct ib_sa_client *client,
}
EXPORT_SYMBOL(ib_sa_guid_info_rec_query);
bool ib_sa_sendonly_fullmem_support(struct ib_sa_client *client,
struct ib_device *device,
u8 port_num)
{
struct ib_sa_device *sa_dev = ib_get_client_data(device, &sa_client);
struct ib_sa_port *port;
bool ret = false;
unsigned long flags;
if (!sa_dev)
return ret;
port = &sa_dev->port[port_num - sa_dev->start_port];
spin_lock_irqsave(&port->classport_lock, flags);
if ((port->classport_info.valid) &&
(port->classport_info.data.type == RDMA_CLASS_PORT_INFO_IB))
ret = ib_get_cpi_capmask2(&port->classport_info.data.ib)
& IB_SA_CAP_MASK2_SENDONLY_FULL_MEM_SUPPORT;
spin_unlock_irqrestore(&port->classport_lock, flags);
return ret;
}
EXPORT_SYMBOL(ib_sa_sendonly_fullmem_support);
struct ib_classport_info_context {
struct completion done;
struct ib_sa_query *sa_query;

View File

@ -2,6 +2,7 @@
* Copyright (c) 2005 Topspin Communications. All rights reserved.
* Copyright (c) 2005 Cisco Systems. All rights reserved.
* Copyright (c) 2005 Mellanox Technologies. All rights reserved.
* Copyright (c) 2020 Intel Corporation. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
@ -278,6 +279,8 @@ void ib_umem_release(struct ib_umem *umem)
{
if (!umem)
return;
if (umem->is_dmabuf)
return ib_umem_dmabuf_release(to_ib_umem_dmabuf(umem));
if (umem->is_odp)
return ib_umem_odp_release(to_ib_umem_odp(umem));

Some files were not shown because too many files have changed in this diff Show More