diff --git a/Documentation/RCU/RTFP.txt b/Documentation/RCU/RTFP.txt index 9bccf16736f7..3b0876c77355 100644 --- a/Documentation/RCU/RTFP.txt +++ b/Documentation/RCU/RTFP.txt @@ -683,7 +683,7 @@ Orran Krieger and Rusty Russell and Dipankar Sarma and Maneesh Soni" ,month="October" ,year="2001" ,note="Available: -\url{http://lkml.org/lkml/2001/10/13/105} +\url{https://lore.kernel.org/r/Pine.LNX.4.33.0110131015410.8707-100000@penguin.transmeta.com} [Viewed August 21, 2004]" ,annotation={ } @@ -826,7 +826,7 @@ Symposium on Distributed Computing} ,month="October" ,year="2002" ,note="Available: -\url{https://lkml.org/lkml/2002/10/24/262} +\url{https://lore.kernel.org/r/3DB86B05.447E7410@us.ibm.com} [Viewed February 15, 2014]" ,annotation={ Mingming Cao's patch to introduce RCU to SysV IPC. @@ -839,7 +839,7 @@ Symposium on Distributed Computing} ,month="March" ,year="2003" ,note="Available: -\url{http://lkml.org/lkml/2003/3/9/205} +\url{https://lore.kernel.org/r/Pine.LNX.4.44.0303091831560.2129-100000@home.transmeta.com} [Viewed March 13, 2006]" ,annotation={ Linus suggests replacing brlock with RCU and/or seqlocks: @@ -1036,15 +1036,15 @@ Add per-cpu batch counter" ,annotation={ RCU runs reasonably on a 512-CPU SGI using Manfred Spraul's patches, which may be found at: - https://lkml.org/lkml/2004/5/20/49 (split vars into cachelines) - https://lkml.org/lkml/2004/5/22/114 (cpu_quiet() patch) - https://lkml.org/lkml/2004/5/25/24 (0/5) - https://lkml.org/lkml/2004/5/25/23 (1/5) - https://lkml.org/lkml/2004/5/25/265 (works for Jack) - https://lkml.org/lkml/2004/5/25/20 (2/5) - https://lkml.org/lkml/2004/5/25/22 (3/5) - https://lkml.org/lkml/2004/5/25/19 (4/5) - https://lkml.org/lkml/2004/5/25/21 (5/5) + https://lore.kernel.org/r/40AC9823.6020709@colorfullife.com (split vars into cachelines) + https://lore.kernel.org/r/Pine.LNX.4.44.0405222141260.11106-100000@dbl.q-ag.de (cpu_quiet() patch) + https://lore.kernel.org/r/200405250535.i4P5ZJo8017583@dbl.q-ag.de (0/5) + https://lore.kernel.org/r/200405250535.i4P5ZKAQ017591@dbl.q-ag.de (1/5) + https://lore.kernel.org/r/20040525203215.GB5127@sgi.com (works for Jack) + https://lore.kernel.org/r/200405250535.i4P5ZLiR017599@dbl.q-ag.de (2/5) + https://lore.kernel.org/r/200405250535.i4P5ZMFt017607@dbl.q-ag.de (3/5) + https://lore.kernel.org/r/200405250535.i4P5ZN6g017615@dbl.q-ag.de (4/5) + https://lore.kernel.org/r/200405250535.i4P5ZO7I017623@dbl.q-ag.de (5/5) } } @@ -1106,7 +1106,7 @@ Oregon Health and Sciences University" ,month="August" ,year="2004" ,note="Available: -\url{http://lkml.org/lkml/2004/8/6/237} +\url{https://lore.kernel.org/r/20040807192424.GF3936@in.ibm.com} [Viewed June 8, 2010]" ,annotation={ Introduce rcu_dereference(). @@ -1119,7 +1119,7 @@ Oregon Health and Sciences University" ,month="August" ,year="2004" ,note="Available: -\url{http://lkml.org/lkml/2004/8/30/87} +\url{https://lore.kernel.org/r/1093873222.984.12.camel@new.localdomain} [Viewed February 17, 2005]" ,annotation={ Uses active code in rcu_read_lock() and rcu_read_unlock() to @@ -1186,7 +1186,7 @@ Oregon Health and Sciences University" ,month="October" ,year="2004" ,note="Available: -\url{http://lkml.org/lkml/2004/10/23/241} +\url{https://lore.kernel.org/r/20041023202723.GA1930@us.ibm.com} [Viewed June 8, 2010]" ,annotation={ Introduce rcu_assign_pointer(). @@ -1203,7 +1203,7 @@ Oregon Health and Sciences University" ,annotation={ James Morris posts Kaigai Kohei's patch to LKML. [Viewed December 10, 2004] - Kaigai's patch is at https://lkml.org/lkml/2004/9/27/52 + Kaigai's patch is at https://lore.kernel.org/r/200409271057.i8RAvcA1007873@mailsv.bs1.fc.nec.co.jp } } @@ -1241,7 +1241,7 @@ Oregon Health and Sciences University" ,year="2005" ,day="17" ,note="Available: -\url{http://lkml.org/lkml/2005/3/17/199} +\url{https://lore.kernel.org/r/20050318002026.GA2693@us.ibm.com} [Viewed September 5, 2005]" ,annotation={ First posting showing how RCU can be safely adapted for @@ -1256,7 +1256,7 @@ Oregon Health and Sciences University" ,year="2005" ,day="18" ,note="Available: -\url{http://lkml.org/lkml/2005/3/18/122} +\url{https://lore.kernel.org/r/Pine.OSF.4.05.10503181336310.2466-100000@da410.phys.au.dk} [Viewed March 30, 2006]" ,annotation={ Esben Neilsen suggests read-side suppression of grace-period @@ -1302,7 +1302,7 @@ Data Structures" ,month="May" ,year="2005" ,note="Available: -\url{http://lkml.org/lkml/2005/5/9/185} +\url{https://lore.kernel.org/r/20050510012444.GA3011@us.ibm.com} [Viewed May 13, 2005]" ,annotation={ First publication of working lock-based deferred free patches @@ -1385,7 +1385,7 @@ Data Structures" ,day="1" ,year="2005" ,note="Available: -\url{http://lkml.org/lkml/2005/8/1/155} +\url{https://lore.kernel.org/r/20050801171137.GA1754@us.ibm.com} [Viewed March 14, 2006]" ,annotation={ First operating counter-based realtime RCU patch posted to LKML. @@ -1399,7 +1399,7 @@ Data Structures" ,day="8" ,year="2005" ,note="Available: -\url{http://lkml.org/lkml/2005/8/8/108} +\url{https://lore.kernel.org/r/20050808144216.GA1307@us.ibm.com} [Viewed March 14, 2006]" ,annotation={ First operating counter-based realtime RCU patch posted to LKML, @@ -1415,7 +1415,7 @@ Data Structures" ,day="1" ,year="2005" ,note="Available: -\url{http://lkml.org/lkml/2005/10/1/70} +\url{https://lore.kernel.org/r/20051001182056.GA1613@us.ibm.com} [Viewed March 14, 2006]" ,annotation={ First rcutorture patch. @@ -1429,7 +1429,7 @@ Data Structures" ,day="6" ,year="2006" ,note="Available: -\url{https://lkml.org/lkml/2006/1/7/22} +\url{https://lore.kernel.org/r/20060106.231054.43576567.davem@davemloft.net} [Viewed February 29, 2012]" ,annotation={ David Miller's view on hashed arrays of locks: used to really @@ -1464,7 +1464,7 @@ Distributed Processing Symposium" ,day="20" ,year="2006" ,note="Available: -\url{http://lkml.org/lkml/2006/6/20/238} +\url{https://lore.kernel.org/r/20060408134707.22479.33814.sendpatchset@linux.site} [Viewed March 25, 2008]" ,annotation={ RCU-protected radix tree. @@ -1554,7 +1554,7 @@ Revised: ,day="28" ,year="2006" ,note="Available: -\url{http://lkml.org/lkml/2006/9/28/160} +\url{https://lore.kernel.org/r/20060928142616.GA20185@infradead.org} [Viewed March 27, 2008]" } @@ -1593,7 +1593,7 @@ Revised: ,year="2006" ,day=26 ,note="Available: -\url{http://lkml.org/lkml/2006/10/26/73} +\url{https://lore.kernel.org/r/20061026105731.GE11803@in.ibm.com} [Viewed January 26, 2009]" ,annotation={ RCU-based reader-writer lock that allows readers to proceed with @@ -1612,12 +1612,12 @@ Revised: ,year="2006" ,day=17 ,note="Available: -\url{http://lkml.org/lkml/2006/11/17/56} +\url{https://lore.kernel.org/r/20061117092925.GT7164@kernel.dk} [Viewed May 28, 2007]" ,annotation={ SRCU's grace periods are too slow for Jens, even after a factor-of-three speedup. - Sped-up version of SRCU at http://lkml.org/lkml/2006/11/17/359. + Sped-up version of SRCU at https://lore.kernel.org/r/20061118002845.GF2632@us.ibm.com. } } @@ -1629,7 +1629,7 @@ Revised: ,year="2006" ,day=19 ,note="Available: -\url{http://lkml.org/lkml/2006/11/19/69} +\url{https://lore.kernel.org/r/20061119190027.GA3676@oleg} [Viewed May 28, 2007]" ,annotation={ First cut of QRCU. Expanded/corrected versions followed. @@ -1644,7 +1644,7 @@ Revised: ,year="2006" ,day=30 ,note="Available: -\url{http://lkml.org/lkml/2006/11/29/330} +\url{https://lore.kernel.org/r/20061130015714.GC1350@oleg} [Viewed November 26, 2008]" ,annotation={ Expanded/corrected version of QRCU. @@ -1709,7 +1709,7 @@ Revised: ,year="2007" ,day=3 ,note="Available: -\url{http://lkml.org/lkml/2007/1/3/112} +\url{https://lore.kernel.org/r/20070103152738.GA16063@localdomain} [Viewed May 28, 2007]" ,annotation={ Patch for list_splice_rcu(). @@ -1737,7 +1737,7 @@ Revised: ,year="2007" ,day=28 ,note="Available: -\url{http://lkml.org/lkml/2007/1/28/34} +\url{https://lore.kernel.org/r/20070128120509.719287000@programming.kicks-ass.net} [Viewed March 27, 2008]" ,annotation={ RCU-like implementation for frequent updaters and rare readers(!). @@ -1767,7 +1767,7 @@ Revised: ,year="2007" ,day=24 ,note="Available: -\url{http://lkml.org/lkml/2007/2/25/18} +\url{https://lore.kernel.org/r/20070225062349.GA17468@linux.vnet.ibm.com} [Viewed March 27, 2008]" ,annotation={ Patch for QRCU supplying lock-free fast path. @@ -1846,7 +1846,7 @@ Revised: ,annotation={ LWN article describing Promela and spin, and also using Oleg Nesterov's QRCU as an example (with Paul McKenney's fastpath). - Merged patch at: http://lkml.org/lkml/2007/2/25/18 + Merged patch at: https://lore.kernel.org/r/20070225062349.GA17468@linux.vnet.ibm.com } } @@ -1885,7 +1885,7 @@ Revised: ,day="10" ,year="2007" ,note="Available: -\url{http://lkml.org/lkml/2007/9/10/213} +\url{https://lore.kernel.org/r/20070910183004.GA3299@linux.vnet.ibm.com} [Viewed October 25, 2007]" ,annotation={ Final patch for preemptable RCU to -rt. (Later patches were @@ -1933,7 +1933,7 @@ Revised: ,day="20" ,year="2007" ,note="Available: -\url{http://lkml.org/lkml/2007/12/20/244} +\url{https://lore.kernel.org/r/20071220142540.GB22523@Krystal} [Viewed March 27, 2008]" ,annotation={ Request for call_rcu_sched() and rcu_barrier_sched(). @@ -2013,7 +2013,7 @@ Revised: ,day="29" ,year="2008" ,note="Available: -\url{http://lkml.org/lkml/2008/1/29/208} +\url{https://lore.kernel.org/r/Pine.LNX.4.58.0801291113350.20371@gandalf.stny.rr.com} [Viewed March 27, 2008]" ,annotation={ Patch that prevents preemptible RCU from unnecessarily waking @@ -2028,7 +2028,7 @@ Revised: ,day="1" ,year="2008" ,note="Available: -\url{http://lkml.org/lkml/2008/2/2/255} +\url{https://lore.kernel.org/r/20080202214124.GA28612@linux.vnet.ibm.com} [Viewed October 18, 2008]" ,annotation={ Explanation of compilers violating dependency ordering. @@ -2088,7 +2088,7 @@ lot of {Linux} into your technology!!!" ,day="3" ,year="2008" ,note="Available: -\url{http://lkml.org/lkml/2008/6/2/539} +\url{https://lore.kernel.org/r/4844BE83.5010401@cn.fujitsu.com} [Viewed December 10, 2008]" ,annotation={ Updated RCU classic algorithm. Introduced multi-tailed list @@ -2122,7 +2122,7 @@ lot of {Linux} into your technology!!!" ,day="21" ,year="2008" ,note="Available: -\url{http://lkml.org/lkml/2008/8/21/336} +\url{https://lore.kernel.org/r/48AD8969.7060900@colorfullife.com} [Viewed December 8, 2008]" ,annotation={ State-based RCU. One key thing that this patch does is to @@ -2137,7 +2137,7 @@ lot of {Linux} into your technology!!!" ,day="6" ,year="2008" ,note="Available: -\url{http://lkml.org/lkml/2008/9/6/86} +\url{https://lore.kernel.org/r/48C2B1D2.5070801@colorfullife.com} [Viewed December 8, 2008]" ,annotation={ Manfred notes a fix required to my attempt to separate irq @@ -2183,7 +2183,7 @@ lot of {Linux} into your technology!!!" ,day="14" ,year="2009" ,note="Available: -\url{http://lkml.org/lkml/2009/1/14/449} +\url{https://lore.kernel.org/r/20090114202044.GJ6734@linux.vnet.ibm.com} [Viewed January 15, 2009]" ,annotation={ Small-footprint implementation of RCU for uniprocessor @@ -2218,7 +2218,7 @@ lot of {Linux} into your technology!!!" git://lttng.org/userspace-rcu.git http://lttng.org/cgi-bin/gitweb.cgi?p=userspace-rcu.git http://lttng.org/urcu - http://lkml.org/lkml/2009/2/5/572 + https://lore.kernel.org/r/20090206030543.GB8560@Krystal } } @@ -2258,7 +2258,7 @@ lot of {Linux} into your technology!!!" ,day="25" ,year="2009" ,note="Available: -\url{http://lkml.org/lkml/2009/6/25/306} +\url{https://lore.kernel.org/r/20090625160706.GA9467@linux.vnet.ibm.com} [Viewed August 16, 2009]" ,annotation={ First posting of expedited RCU to be accepted into -tip. @@ -2272,7 +2272,7 @@ lot of {Linux} into your technology!!!" ,day="23" ,year="2009" ,note="Available: -\url{http://lkml.org/lkml/2009/7/23/294} +\url{https://lore.kernel.org/r/20090724001429.GA17374@linux.vnet.ibm.com} [Viewed August 15, 2009]" ,annotation={ First posting of simple and fast preemptable RCU. @@ -2350,7 +2350,7 @@ lot of {Linux} into your technology!!!" ,month="December" ,year="2009" ,note="Available: -\url{http://lkml.org/lkml/2009/10/18/129} +\url{https://lore.kernel.org/r/20091018232918.GA7385@Krystal} [Viewed December 29, 2009]" ,annotation={ Mathieu proposed defer_rcu() with fixed-size per-thread pool @@ -2518,7 +2518,7 @@ lot of {Linux} into your technology!!!" ,month="January" ,year="2011" ,note="Available: -\url{https://lkml.org/lkml/2011/1/18/322} +\url{https://lore.kernel.org/r/AANLkTimajU0x1v6y3rH2+jr-bZ=tNLs1S_agXdGGAa3S@mail.gmail.com} [Viewed March 4, 2011]" ,annotation={ "The RCU-based name lookup is at the other end of the spectrum - the diff --git a/Documentation/accounting/cgroupstats.rst b/Documentation/accounting/cgroupstats.rst index b9afc48f4ea2..85186e7d4035 100644 --- a/Documentation/accounting/cgroupstats.rst +++ b/Documentation/accounting/cgroupstats.rst @@ -3,8 +3,8 @@ Control Groupstats ================== Control Groupstats is inspired by the discussion at -http://lkml.org/lkml/2007/4/11/187 and implements per cgroup statistics as -suggested by Andrew Morton in http://lkml.org/lkml/2007/4/11/263. +https://lore.kernel.org/r/461CF883.2030308@sw.ru and implements per cgroup statistics as +suggested by Andrew Morton in https://lore.kernel.org/r/20070411114927.1277d7c9.akpm@linux-foundation.org. Per cgroup statistics infrastructure re-uses code from the taskstats interface. A new set of cgroup operations are registered with commands diff --git a/Documentation/admin-guide/README.rst b/Documentation/admin-guide/README.rst index 261b7b4cca1f..35314b63008c 100644 --- a/Documentation/admin-guide/README.rst +++ b/Documentation/admin-guide/README.rst @@ -226,10 +226,11 @@ Configuring the kernel all module options to built in (=y) options. You can also preserve modules by LMC_KEEP. - "make kvmconfig" Enable additional options for kvm guest kernel support. + "make kvm_guest.config" Enable additional options for kvm guest kernel + support. - "make xenconfig" Enable additional options for xen dom0 guest kernel - support. + "make xen.config" Enable additional options for xen dom0 guest kernel + support. "make tinyconfig" Configure the tiniest possible kernel. diff --git a/Documentation/admin-guide/cgroup-v1/memory.rst b/Documentation/admin-guide/cgroup-v1/memory.rst index 52688ae34461..0936412e044e 100644 --- a/Documentation/admin-guide/cgroup-v1/memory.rst +++ b/Documentation/admin-guide/cgroup-v1/memory.rst @@ -963,21 +963,21 @@ References 2. Singh, Balbir. Memory Controller (RSS Control), http://lwn.net/Articles/222762/ 3. Emelianov, Pavel. Resource controllers based on process cgroups - http://lkml.org/lkml/2007/3/6/198 + https://lore.kernel.org/r/45ED7DEC.7010403@sw.ru 4. Emelianov, Pavel. RSS controller based on process cgroups (v2) - http://lkml.org/lkml/2007/4/9/78 + https://lore.kernel.org/r/461A3010.90403@sw.ru 5. Emelianov, Pavel. RSS controller based on process cgroups (v3) - http://lkml.org/lkml/2007/5/30/244 + https://lore.kernel.org/r/465D9739.8070209@openvz.org 6. Menage, Paul. Control Groups v10, http://lwn.net/Articles/236032/ 7. Vaidyanathan, Srinivasan, Control Groups: Pagecache accounting and control subsystem (v3), http://lwn.net/Articles/235534/ 8. Singh, Balbir. RSS controller v2 test results (lmbench), - http://lkml.org/lkml/2007/5/17/232 + https://lore.kernel.org/r/464C95D4.7070806@linux.vnet.ibm.com 9. Singh, Balbir. RSS controller v2 AIM9 results - http://lkml.org/lkml/2007/5/18/1 + https://lore.kernel.org/r/464D267A.50107@linux.vnet.ibm.com 10. Singh, Balbir. Memory controller v6 test results, - http://lkml.org/lkml/2007/8/19/36 + https://lore.kernel.org/r/20070819094658.654.84837.sendpatchset@balbir-laptop 11. Singh, Balbir. Memory controller introduction (v6), - http://lkml.org/lkml/2007/8/17/69 + https://lore.kernel.org/r/20070817084228.26003.12568.sendpatchset@balbir-laptop 12. Corbet, Jonathan, Controlling memory use in cgroups, http://lwn.net/Articles/243795/ diff --git a/Documentation/admin-guide/cgroup-v2.rst b/Documentation/admin-guide/cgroup-v2.rst index 1de8695c264b..c513eafaddea 100644 --- a/Documentation/admin-guide/cgroup-v2.rst +++ b/Documentation/admin-guide/cgroup-v2.rst @@ -1,3 +1,5 @@ +.. _cgroup-v2: + ================ Control Group v2 ================ @@ -172,7 +174,6 @@ disabling controllers in v1 and make them always available in v2. cgroup v2 currently supports the following mount options. nsdelegate - Consider cgroup namespaces as delegation boundaries. This option is system wide and can only be set on mount or modified through remount from the init namespace. The mount option is @@ -180,7 +181,6 @@ cgroup v2 currently supports the following mount options. Delegation section for details. memory_localevents - Only populate memory.events with data for the current cgroup, and not any subtrees. This is legacy behaviour, the default behaviour without this option is to include subtree counts. @@ -189,7 +189,6 @@ cgroup v2 currently supports the following mount options. option is ignored on non-init namespace mounts. memory_recursiveprot - Recursively apply memory.min and memory.low protection to entire subtrees, without requiring explicit downward propagation into leaf cgroups. This allows protecting entire @@ -786,7 +785,6 @@ Core Interface Files All cgroup core files are prefixed with "cgroup." cgroup.type - A read-write single value file which exists on non-root cgroups. @@ -954,6 +952,8 @@ All cgroup core files are prefixed with "cgroup." Controllers =========== +.. _cgroup-v2-cpu: + CPU --- @@ -1259,9 +1259,9 @@ PAGE_SIZE multiple when read back. can show up in the middle. Don't rely on items remaining in a fixed position; use the keys to look up specific values! - If the entry has no per-node counter(or not show in the - mempry.numa_stat). We use 'npn'(non-per-node) as the tag - to indicate that it will not show in the mempry.numa_stat. + If the entry has no per-node counter (or not show in the + memory.numa_stat). We use 'npn' (non-per-node) as the tag + to indicate that it will not show in the memory.numa_stat. anon Amount of memory used in anonymous mappings such as @@ -1277,11 +1277,11 @@ PAGE_SIZE multiple when read back. pagetables Amount of memory allocated for page tables. - percpu(npn) + percpu (npn) Amount of memory used for storing per-cpu kernel data structures. - sock(npn) + sock (npn) Amount of memory used in network transmission buffers shmem @@ -1329,7 +1329,7 @@ PAGE_SIZE multiple when read back. Part of "slab" that cannot be reclaimed on memory pressure. - slab(npn) + slab (npn) Amount of memory used for storing in-kernel data structures. @@ -1357,39 +1357,39 @@ PAGE_SIZE multiple when read back. workingset_nodereclaim Number of times a shadow node has been reclaimed - pgfault(npn) + pgfault (npn) Total number of page faults incurred - pgmajfault(npn) + pgmajfault (npn) Number of major page faults incurred - pgrefill(npn) + pgrefill (npn) Amount of scanned pages (in an active LRU list) - pgscan(npn) + pgscan (npn) Amount of scanned pages (in an inactive LRU list) - pgsteal(npn) + pgsteal (npn) Amount of reclaimed pages - pgactivate(npn) + pgactivate (npn) Amount of pages moved to the active LRU list - pgdeactivate(npn) + pgdeactivate (npn) Amount of pages moved to the inactive LRU list - pglazyfree(npn) + pglazyfree (npn) Amount of pages postponed to be freed under memory pressure - pglazyfreed(npn) + pglazyfreed (npn) Amount of reclaimed lazyfree pages - thp_fault_alloc(npn) + thp_fault_alloc (npn) Number of transparent hugepages which were allocated to satisfy a page fault. This counter is not present when CONFIG_TRANSPARENT_HUGEPAGE is not set. - thp_collapse_alloc(npn) + thp_collapse_alloc (npn) Number of transparent hugepages which were allocated to allow collapsing an existing range of pages. This counter is not present when CONFIG_TRANSPARENT_HUGEPAGE is not set. @@ -1558,7 +1558,7 @@ IO Interface Files 8:0 rbytes=90430464 wbytes=299008000 rios=8950 wios=1252 dbytes=50331648 dios=3021 io.cost.qos - A read-write nested-keyed file with exists only on the root + A read-write nested-keyed file which exists only on the root cgroup. This file configures the Quality of Service of the IO cost @@ -1613,7 +1613,7 @@ IO Interface Files automatic mode can be restored by setting "ctrl" to "auto". io.cost.model - A read-write nested-keyed file with exists only on the root + A read-write nested-keyed file which exists only on the root cgroup. This file configures the cost model of the IO cost model based @@ -2000,10 +2000,12 @@ Cpuset Interface Files cpuset-enabled cgroups. This flag is owned by the parent cgroup and is not delegatable. - It accepts only the following input values when written to. + It accepts only the following input values when written to. - "root" - a partition root - "member" - a non-root member of a partition + ======== ================================ + "root" a partition root + "member" a non-root member of a partition + ======== ================================ When set to be a partition root, the current cgroup is the root of a new partition or scheduling domain that comprises @@ -2044,9 +2046,11 @@ Cpuset Interface Files root to change. On read, the "cpuset.sched.partition" file can show the following values. - "member" Non-root member of a partition - "root" Partition root - "root invalid" Invalid partition root + ============== ============================== + "member" Non-root member of a partition + "root" Partition root + "root invalid" Invalid partition root + ============== ============================== It is a partition root if the first 2 partition root conditions above are true and at least one CPU from "cpuset.cpus" is @@ -2219,7 +2223,7 @@ Without cgroup namespace, the "/proc/$PID/cgroup" file shows the complete path of the cgroup of a process. In a container setup where a set of cgroups and namespaces are intended to isolate processes the "/proc/$PID/cgroup" file may leak potential system level information -to the isolated processes. For Example:: +to the isolated processes. For example:: # cat /proc/self/cgroup 0::/batchjobs/container_id1 diff --git a/Documentation/admin-guide/cpu-load.rst b/Documentation/admin-guide/cpu-load.rst index f3ada90e9ca8..21a984337080 100644 --- a/Documentation/admin-guide/cpu-load.rst +++ b/Documentation/admin-guide/cpu-load.rst @@ -107,7 +107,7 @@ will lead to quite erratic information inside ``/proc/stat``:: References ---------- -- http://lkml.org/lkml/2007/2/12/6 +- https://lore.kernel.org/r/loom.20070212T063225-663@post.gmane.org - Documentation/filesystems/proc.rst (1.8) diff --git a/Documentation/admin-guide/kernel-parameters.rst b/Documentation/admin-guide/kernel-parameters.rst index 682ab28b5c94..1132796a8d96 100644 --- a/Documentation/admin-guide/kernel-parameters.rst +++ b/Documentation/admin-guide/kernel-parameters.rst @@ -60,7 +60,7 @@ Note that for the special case of a range one can split the range into equal sized groups and for each group use some amount from the beginning of that group: - -cpu number>:/ + -:/ For example one can add to the command line following parameter: diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index b51b7f964d80..3020a02f48a7 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -606,7 +606,7 @@ kernel/dma/contiguous.c cma_pernuma=nn[MG] - [ARM64,KNL] + [ARM64,KNL,CMA] Sets the size of kernel per-numa memory area for contiguous memory allocations. A value of 0 disables per-numa CMA altogether. And If this option is not @@ -1525,12 +1525,12 @@ hpet_mmap= [X86, HPET_MMAP] Allow userspace to mmap HPET registers. Default set by CONFIG_HPET_MMAP_DEFAULT. - hugetlb_cma= [HW] The size of a cma area used for allocation + hugetlb_cma= [HW,CMA] The size of a CMA area used for allocation of gigantic hugepages. Format: nn[KMGTPE] - Reserve a cma area of given size and allocate gigantic - hugepages using the cma allocator. If enabled, the + Reserve a CMA area of given size and allocate gigantic + hugepages using the CMA allocator. If enabled, the boot-time allocation of gigantic hugepages is skipped. hugepages= [HW] Number of HugeTLB pages to allocate at boot. @@ -3277,9 +3277,14 @@ parameter, xsave area per process might occupy more memory on xsaves enabled systems. - nohlt [BUGS=ARM,SH] Tells the kernel that the sleep(SH) or - wfi(ARM) instruction doesn't work correctly and not to - use it. This is also useful when using JTAG debugger. + nohlt [ARM,ARM64,MICROBLAZE,SH] Forces the kernel to busy wait + in do_idle() and not use the arch_cpu_idle() + implementation; requires CONFIG_GENERIC_IDLE_POLL_SETUP + to be effective. This is useful on platforms where the + sleep(SH) or wfi(ARM,ARM64) instructions do not work + correctly or when doing power measurements to evalute + the impact of the sleep instructions. This is also + useful when using JTAG debugger. no_file_caps Tells the kernel not to honor file capabilities. The only way then for a file to be executed with privilege @@ -3292,6 +3297,21 @@ in certain environments such as networked servers or real-time systems. + no_hash_pointers + Force pointers printed to the console or buffers to be + unhashed. By default, when a pointer is printed via %p + format string, that pointer is "hashed", i.e. obscured + by hashing the pointer value. This is a security feature + that hides actual kernel addresses from unprivileged + users, but it also makes debugging the kernel more + difficult since unequal pointers can no longer be + compared. However, if this command-line option is + specified, then all normal pointers will have their true + value printed. Pointers printed via %pK may still be + hashed. This option should only be specified when + debugging the kernel. Please do not use on production + kernels. + nohibernate [HIBERNATION] Disable hibernation and resume. nohz= [KNL] Boottime enable/disable dynamic ticks diff --git a/Documentation/admin-guide/kernel-per-CPU-kthreads.rst b/Documentation/admin-guide/kernel-per-CPU-kthreads.rst index dc36aeb65d0a..531f689311f2 100644 --- a/Documentation/admin-guide/kernel-per-CPU-kthreads.rst +++ b/Documentation/admin-guide/kernel-per-CPU-kthreads.rst @@ -273,7 +273,7 @@ To reduce its OS jitter, do any of the following: However, there is an RFC patch from Christoph Lameter (based on an earlier one from Gilad Ben-Yossef) that reduces or even eliminates vmstat overhead for some - workloads at https://lkml.org/lkml/2013/9/4/379. + workloads at https://lore.kernel.org/r/00000140e9dfd6bd-40db3d4f-c1be-434f-8132-7820f81bb586-000000@email.amazonses.com. e. If running on high-end powerpc servers, build with CONFIG_PPC_RTAS_DAEMON=n. This prevents the RTAS daemon from running on each CPU every second or so. diff --git a/Documentation/admin-guide/perf-security.rst b/Documentation/admin-guide/perf-security.rst index 904e4eb37f99..34aa334320ca 100644 --- a/Documentation/admin-guide/perf-security.rst +++ b/Documentation/admin-guide/perf-security.rst @@ -72,7 +72,7 @@ monitoring and observability operations, thus, bypass *scope* permissions checks in the kernel. CAP_PERFMON implements the principle of least privilege [13]_ (POSIX 1003.1e: 2.2.2.39) for performance monitoring and observability operations in the kernel and provides a secure approach to -perfomance monitoring and observability in the system. +performance monitoring and observability in the system. For backward compatibility reasons the access to perf_events monitoring and observability operations is also open for CAP_SYS_ADMIN privileged diff --git a/Documentation/admin-guide/sysctl/fs.rst b/Documentation/admin-guide/sysctl/fs.rst index f48277a0a850..2a501c9ddc55 100644 --- a/Documentation/admin-guide/sysctl/fs.rst +++ b/Documentation/admin-guide/sysctl/fs.rst @@ -380,5 +380,5 @@ This configuration option sets the maximum number of "watches" that are allowed for each user. Each "watch" costs roughly 90 bytes on a 32bit kernel, and roughly 160 bytes on a 64bit one. -The current default value for max_user_watches is the 1/32 of the available -low memory, divided for the "watch" cost in bytes. +The current default value for max_user_watches is the 1/25 (4%) of the +available low memory, divided for the "watch" cost in bytes. diff --git a/Documentation/arm/booting.rst b/Documentation/arm/booting.rst index a2263451dc2c..5974e37b3d20 100644 --- a/Documentation/arm/booting.rst +++ b/Documentation/arm/booting.rst @@ -128,7 +128,7 @@ it. The recommended placement is in the first 16KiB of RAM. The boot loader must load a device tree image (dtb) into system ram at a 64bit aligned address and initialize it with the boot data. The -dtb format is documented in Documentation/devicetree/booting-without-of.rst. +dtb format is documented at https://www.devicetree.org/specifications/. The kernel will look for the dtb magic value of 0xd00dfeed at the dtb physical address to determine if a dtb has been passed instead of a tagged list. diff --git a/Documentation/arm/index.rst b/Documentation/arm/index.rst index a2e9e1bba7b9..b4bea32472b6 100644 --- a/Documentation/arm/index.rst +++ b/Documentation/arm/index.rst @@ -33,7 +33,7 @@ SoC-specific documents ixp4xx - marvel + marvell microchip netwinder diff --git a/Documentation/arm/marvel.rst b/Documentation/arm/marvell.rst similarity index 97% rename from Documentation/arm/marvel.rst rename to Documentation/arm/marvell.rst index 16ab2eb085b8..94cd73383594 100644 --- a/Documentation/arm/marvel.rst +++ b/Documentation/arm/marvell.rst @@ -127,7 +127,7 @@ EBU Armada family - 88F6828 Armada 388 - Product infos: http://www.marvell.com/embedded-processors/armada-38x/ - - Functional Spec: https://marvellcorp.wufoo.com/forms/marvell-armada-38x-functional-specifications/ + - Functional Spec: http://www.marvell.com/content/dam/marvell/en/public-collateral/embedded-processors/marvell-embedded-processors-armada-38x-functional-specifications-2015-11.pdf Core: ARM Cortex-A9 @@ -183,7 +183,10 @@ EBU Armada family ARMv8 http://www.marvell.com/embedded-processors/armada-3700/ Product Brief: - http://www.marvell.com/embedded-processors/assets/PB-88F3700-FNL.pdf + http://www.marvell.com/content/dam/marvell/en/public-collateral/embedded-processors/marvell-embedded-processors-armada-37xx-product-brief-2016-01.pdf + + Hardware Spec: + http://www.marvell.com/content/dam/marvell/en/public-collateral/embedded-processors/marvell-embedded-processors-armada-37xx-hardware-specifications-2019-09.pdf Device tree files: arch/arm64/boot/dts/marvell/armada-37* diff --git a/Documentation/conf.py b/Documentation/conf.py index 6a767294887e..5bd45d5fb0a0 100644 --- a/Documentation/conf.py +++ b/Documentation/conf.py @@ -31,7 +31,7 @@ from load_config import loadConfig # -- General configuration ------------------------------------------------ # If your documentation needs a minimal Sphinx version, state it here. -needs_sphinx = '1.3' +needs_sphinx = '1.7' # Add any Sphinx extension module names here, as strings. They can be # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom @@ -112,19 +112,12 @@ if major >= 3: else: extensions.append('cdomain') - if major == 1 and minor < 7: - sys.stderr.write('WARNING: Sphinx 1.7 or greater will be required as of ' - 'the 5.12 release\n') # Ensure that autosectionlabel will produce unique names autosectionlabel_prefix_document = True autosectionlabel_maxdepth = 2 -# The name of the math extension changed on Sphinx 1.4 -if (major == 1 and minor > 3) or (major > 1): - extensions.append("sphinx.ext.imgmath") -else: - extensions.append("sphinx.ext.pngmath") +extensions.append("sphinx.ext.imgmath") # Add any paths that contain templates here, relative to this directory. templates_path = ['_templates'] @@ -375,71 +368,9 @@ if cjk_cmd.find("Noto Sans CJK SC") >= 0: ''' # Fix reference escape troubles with Sphinx 1.4.x -if major == 1 and minor > 3: +if major == 1: latex_elements['preamble'] += '\\renewcommand*{\\DUrole}[2]{ #2 }\n' -if major == 1 and minor <= 4: - latex_elements['preamble'] += '\\usepackage[margin=0.5in, top=1in, bottom=1in]{geometry}' -elif major == 1 and (minor > 5 or (minor == 5 and patch >= 3)): - latex_elements['sphinxsetup'] = 'hmargin=0.5in, vmargin=1in' - latex_elements['preamble'] += '\\fvset{fontsize=auto}\n' - -# Customize notice background colors on Sphinx < 1.6: -if major == 1 and minor < 6: - latex_elements['preamble'] += ''' - \\usepackage{ifthen} - - % Put notes in color and let them be inside a table - \\definecolor{NoteColor}{RGB}{204,255,255} - \\definecolor{WarningColor}{RGB}{255,204,204} - \\definecolor{AttentionColor}{RGB}{255,255,204} - \\definecolor{ImportantColor}{RGB}{192,255,204} - \\definecolor{OtherColor}{RGB}{204,204,204} - \\newlength{\\mynoticelength} - \\makeatletter\\newenvironment{coloredbox}[1]{% - \\setlength{\\fboxrule}{1pt} - \\setlength{\\fboxsep}{7pt} - \\setlength{\\mynoticelength}{\\linewidth} - \\addtolength{\\mynoticelength}{-2\\fboxsep} - \\addtolength{\\mynoticelength}{-2\\fboxrule} - \\begin{lrbox}{\\@tempboxa}\\begin{minipage}{\\mynoticelength}}{\\end{minipage}\\end{lrbox}% - \\ifthenelse% - {\\equal{\\py@noticetype}{note}}% - {\\colorbox{NoteColor}{\\usebox{\\@tempboxa}}}% - {% - \\ifthenelse% - {\\equal{\\py@noticetype}{warning}}% - {\\colorbox{WarningColor}{\\usebox{\\@tempboxa}}}% - {% - \\ifthenelse% - {\\equal{\\py@noticetype}{attention}}% - {\\colorbox{AttentionColor}{\\usebox{\\@tempboxa}}}% - {% - \\ifthenelse% - {\\equal{\\py@noticetype}{important}}% - {\\colorbox{ImportantColor}{\\usebox{\\@tempboxa}}}% - {\\colorbox{OtherColor}{\\usebox{\\@tempboxa}}}% - }% - }% - }% - }\\makeatother - - \\makeatletter - \\renewenvironment{notice}[2]{% - \\def\\py@noticetype{#1} - \\begin{coloredbox}{#1} - \\bf\\it - \\par\\strong{#2} - \\csname py@noticestart@#1\\endcsname - } - { - \\csname py@noticeend@\\py@noticetype\\endcsname - \\end{coloredbox} - } - \\makeatother - - ''' - # With Sphinx 1.6, it is possible to change the Bg color directly # by using: # \definecolor{sphinxnoteBgColor}{RGB}{204,255,255} diff --git a/Documentation/dev-tools/kunit/index.rst b/Documentation/dev-tools/kunit/index.rst index c234a3ab3c34..848478838347 100644 --- a/Documentation/dev-tools/kunit/index.rst +++ b/Documentation/dev-tools/kunit/index.rst @@ -13,6 +13,7 @@ KUnit - Unit Testing for the Linux Kernel api/index style faq + tips What is KUnit? ============== @@ -88,6 +89,7 @@ How do I use it? ================ * :doc:`start` - for new users of KUnit +* :doc:`tips` - for short examples of best practices * :doc:`usage` - for a more detailed explanation of KUnit features * :doc:`api/index` - for the list of KUnit APIs used for testing * :doc:`kunit-tool` - for more information on the kunit_tool helper script diff --git a/Documentation/dev-tools/kunit/start.rst b/Documentation/dev-tools/kunit/start.rst index 454f307813ea..0e65cabe08eb 100644 --- a/Documentation/dev-tools/kunit/start.rst +++ b/Documentation/dev-tools/kunit/start.rst @@ -196,8 +196,9 @@ Now add the following to ``drivers/misc/Kconfig``: .. code-block:: kconfig config MISC_EXAMPLE_TEST - bool "Test for my example" + tristate "Test for my example" if !KUNIT_ALL_TESTS depends on MISC_EXAMPLE && KUNIT=y + default KUNIT_ALL_TESTS and the following to ``drivers/misc/Makefile``: @@ -233,5 +234,7 @@ Congrats! You just wrote your first KUnit test! Next Steps ========== -* Check out the :doc:`usage` page for a more +* Check out the :doc:`tips` page for tips on + writing idiomatic KUnit tests. +* Optional: see the :doc:`usage` page for a more in-depth explanation of KUnit. diff --git a/Documentation/dev-tools/kunit/tips.rst b/Documentation/dev-tools/kunit/tips.rst new file mode 100644 index 000000000000..a6ca0af14098 --- /dev/null +++ b/Documentation/dev-tools/kunit/tips.rst @@ -0,0 +1,115 @@ +.. SPDX-License-Identifier: GPL-2.0 + +============================ +Tips For Writing KUnit Tests +============================ + +Exiting early on failed expectations +------------------------------------ + +``KUNIT_EXPECT_EQ`` and friends will mark the test as failed and continue +execution. In some cases, it's unsafe to continue and you can use the +``KUNIT_ASSERT`` variant to exit on failure. + +.. code-block:: c + + void example_test_user_alloc_function(struct kunit *test) + { + void *object = alloc_some_object_for_me(); + + /* Make sure we got a valid pointer back. */ + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, object); + do_something_with_object(object); + } + +Allocating memory +----------------- + +Where you would use ``kzalloc``, you should prefer ``kunit_kzalloc`` instead. +KUnit will ensure the memory is freed once the test completes. + +This is particularly useful since it lets you use the ``KUNIT_ASSERT_EQ`` +macros to exit early from a test without having to worry about remembering to +call ``kfree``. + +Example: + +.. code-block:: c + + void example_test_allocation(struct kunit *test) + { + char *buffer = kunit_kzalloc(test, 16, GFP_KERNEL); + /* Ensure allocation succeeded. */ + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, buffer); + + KUNIT_ASSERT_STREQ(test, buffer, ""); + } + + +Testing static functions +------------------------ + +If you don't want to expose functions or variables just for testing, one option +is to conditionally ``#include`` the test file at the end of your .c file, e.g. + +.. code-block:: c + + /* In my_file.c */ + + static int do_interesting_thing(); + + #ifdef CONFIG_MY_KUNIT_TEST + #include "my_kunit_test.c" + #endif + +Injecting test-only code +------------------------ + +Similarly to the above, it can be useful to add test-specific logic. + +.. code-block:: c + + /* In my_file.h */ + + #ifdef CONFIG_MY_KUNIT_TEST + /* Defined in my_kunit_test.c */ + void test_only_hook(void); + #else + void test_only_hook(void) { } + #endif + +TODO(dlatypov@google.com): add an example of using ``current->kunit_test`` in +such a hook when it's not only updated for ``CONFIG_KASAN=y``. + +Customizing error messages +-------------------------- + +Each of the ``KUNIT_EXPECT`` and ``KUNIT_ASSERT`` macros have a ``_MSG`` variant. +These take a format string and arguments to provide additional context to the automatically generated error messages. + +.. code-block:: c + + char some_str[41]; + generate_sha1_hex_string(some_str); + + /* Before. Not easy to tell why the test failed. */ + KUNIT_EXPECT_EQ(test, strlen(some_str), 40); + + /* After. Now we see the offending string. */ + KUNIT_EXPECT_EQ_MSG(test, strlen(some_str), 40, "some_str='%s'", some_str); + +Alternatively, one can take full control over the error message by using ``KUNIT_FAIL()``, e.g. + +.. code-block:: c + + /* Before */ + KUNIT_EXPECT_EQ(test, some_setup_function(), 0); + + /* After: full control over the failure message. */ + if (some_setup_function()) + KUNIT_FAIL(test, "Failed to setup thing for testing"); + +Next Steps +========== +* Optional: see the :doc:`usage` page for a more + in-depth explanation of KUnit. diff --git a/Documentation/devicetree/bindings/iommu/arm,smmu.yaml b/Documentation/devicetree/bindings/iommu/arm,smmu.yaml index 3b63f2ae24db..6ba161dea4d8 100644 --- a/Documentation/devicetree/bindings/iommu/arm,smmu.yaml +++ b/Documentation/devicetree/bindings/iommu/arm,smmu.yaml @@ -34,9 +34,11 @@ properties: items: - enum: - qcom,sc7180-smmu-500 + - qcom,sc8180x-smmu-500 - qcom,sdm845-smmu-500 - qcom,sm8150-smmu-500 - qcom,sm8250-smmu-500 + - qcom,sm8350-smmu-500 - const: arm,mmu-500 - description: Qcom Adreno GPUs implementing "arm,smmu-v2" items: diff --git a/Documentation/devicetree/bindings/iommu/mediatek,iommu.txt b/Documentation/devicetree/bindings/iommu/mediatek,iommu.txt deleted file mode 100644 index ac949f7fe3d4..000000000000 --- a/Documentation/devicetree/bindings/iommu/mediatek,iommu.txt +++ /dev/null @@ -1,105 +0,0 @@ -* Mediatek IOMMU Architecture Implementation - - Some Mediatek SOCs contain a Multimedia Memory Management Unit (M4U), and -this M4U have two generations of HW architecture. Generation one uses flat -pagetable, and only supports 4K size page mapping. Generation two uses the -ARM Short-Descriptor translation table format for address translation. - - About the M4U Hardware Block Diagram, please check below: - - EMI (External Memory Interface) - | - m4u (Multimedia Memory Management Unit) - | - +--------+ - | | - gals0-rx gals1-rx (Global Async Local Sync rx) - | | - | | - gals0-tx gals1-tx (Global Async Local Sync tx) - | | Some SoCs may have GALS. - +--------+ - | - SMI Common(Smart Multimedia Interface Common) - | - +----------------+------- - | | - | gals-rx There may be GALS in some larbs. - | | - | | - | gals-tx - | | - SMI larb0 SMI larb1 ... SoCs have several SMI local arbiter(larb). - (display) (vdec) - | | - | | - +-----+-----+ +----+----+ - | | | | | | - | | |... | | | ... There are different ports in each larb. - | | | | | | -OVL0 RDMA0 WDMA0 MC PP VLD - - As above, The Multimedia HW will go through SMI and M4U while it -access EMI. SMI is a bridge between m4u and the Multimedia HW. It contain -smi local arbiter and smi common. It will control whether the Multimedia -HW should go though the m4u for translation or bypass it and talk -directly with EMI. And also SMI help control the power domain and clocks for -each local arbiter. - Normally we specify a local arbiter(larb) for each multimedia HW -like display, video decode, and camera. And there are different ports -in each larb. Take a example, There are many ports like MC, PP, VLD in the -video decode local arbiter, all these ports are according to the video HW. - In some SoCs, there may be a GALS(Global Async Local Sync) module between -smi-common and m4u, and additional GALS module between smi-larb and -smi-common. GALS can been seen as a "asynchronous fifo" which could help -synchronize for the modules in different clock frequency. - -Required properties: -- compatible : must be one of the following string: - "mediatek,mt2701-m4u" for mt2701 which uses generation one m4u HW. - "mediatek,mt2712-m4u" for mt2712 which uses generation two m4u HW. - "mediatek,mt6779-m4u" for mt6779 which uses generation two m4u HW. - "mediatek,mt7623-m4u", "mediatek,mt2701-m4u" for mt7623 which uses - generation one m4u HW. - "mediatek,mt8167-m4u" for mt8167 which uses generation two m4u HW. - "mediatek,mt8173-m4u" for mt8173 which uses generation two m4u HW. - "mediatek,mt8183-m4u" for mt8183 which uses generation two m4u HW. -- reg : m4u register base and size. -- interrupts : the interrupt of m4u. -- clocks : must contain one entry for each clock-names. -- clock-names : Only 1 optional clock: - - "bclk": the block clock of m4u. - Here is the list which require this "bclk": - - mt2701, mt2712, mt7623 and mt8173. - Note that m4u use the EMI clock which always has been enabled before kernel - if there is no this "bclk". -- mediatek,larbs : List of phandle to the local arbiters in the current Socs. - Refer to bindings/memory-controllers/mediatek,smi-larb.txt. It must sort - according to the local arbiter index, like larb0, larb1, larb2... -- iommu-cells : must be 1. This is the mtk_m4u_id according to the HW. - Specifies the mtk_m4u_id as defined in - dt-binding/memory/mt2701-larb-port.h for mt2701, mt7623 - dt-binding/memory/mt2712-larb-port.h for mt2712, - dt-binding/memory/mt6779-larb-port.h for mt6779, - dt-binding/memory/mt8167-larb-port.h for mt8167, - dt-binding/memory/mt8173-larb-port.h for mt8173, and - dt-binding/memory/mt8183-larb-port.h for mt8183. - -Example: - iommu: iommu@10205000 { - compatible = "mediatek,mt8173-m4u"; - reg = <0 0x10205000 0 0x1000>; - interrupts = ; - clocks = <&infracfg CLK_INFRA_M4U>; - clock-names = "bclk"; - mediatek,larbs = <&larb0 &larb1 &larb2 &larb3 &larb4 &larb5>; - #iommu-cells = <1>; - }; - -Example for a client device: - display { - compatible = "mediatek,mt8173-disp"; - iommus = <&iommu M4U_PORT_DISP_OVL0>, - <&iommu M4U_PORT_DISP_RDMA0>; - ... - }; diff --git a/Documentation/devicetree/bindings/iommu/mediatek,iommu.yaml b/Documentation/devicetree/bindings/iommu/mediatek,iommu.yaml new file mode 100644 index 000000000000..0f26fe14c8e2 --- /dev/null +++ b/Documentation/devicetree/bindings/iommu/mediatek,iommu.yaml @@ -0,0 +1,183 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/iommu/mediatek,iommu.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: MediaTek IOMMU Architecture Implementation + +maintainers: + - Yong Wu + +description: |+ + Some MediaTek SOCs contain a Multimedia Memory Management Unit (M4U), and + this M4U have two generations of HW architecture. Generation one uses flat + pagetable, and only supports 4K size page mapping. Generation two uses the + ARM Short-Descriptor translation table format for address translation. + + About the M4U Hardware Block Diagram, please check below: + + EMI (External Memory Interface) + | + m4u (Multimedia Memory Management Unit) + | + +--------+ + | | + gals0-rx gals1-rx (Global Async Local Sync rx) + | | + | | + gals0-tx gals1-tx (Global Async Local Sync tx) + | | Some SoCs may have GALS. + +--------+ + | + SMI Common(Smart Multimedia Interface Common) + | + +----------------+------- + | | + | gals-rx There may be GALS in some larbs. + | | + | | + | gals-tx + | | + SMI larb0 SMI larb1 ... SoCs have several SMI local arbiter(larb). + (display) (vdec) + | | + | | + +-----+-----+ +----+----+ + | | | | | | + | | |... | | | ... There are different ports in each larb. + | | | | | | + OVL0 RDMA0 WDMA0 MC PP VLD + + As above, The Multimedia HW will go through SMI and M4U while it + access EMI. SMI is a bridge between m4u and the Multimedia HW. It contain + smi local arbiter and smi common. It will control whether the Multimedia + HW should go though the m4u for translation or bypass it and talk + directly with EMI. And also SMI help control the power domain and clocks for + each local arbiter. + + Normally we specify a local arbiter(larb) for each multimedia HW + like display, video decode, and camera. And there are different ports + in each larb. Take a example, There are many ports like MC, PP, VLD in the + video decode local arbiter, all these ports are according to the video HW. + + In some SoCs, there may be a GALS(Global Async Local Sync) module between + smi-common and m4u, and additional GALS module between smi-larb and + smi-common. GALS can been seen as a "asynchronous fifo" which could help + synchronize for the modules in different clock frequency. + +properties: + compatible: + oneOf: + - enum: + - mediatek,mt2701-m4u # generation one + - mediatek,mt2712-m4u # generation two + - mediatek,mt6779-m4u # generation two + - mediatek,mt8167-m4u # generation two + - mediatek,mt8173-m4u # generation two + - mediatek,mt8183-m4u # generation two + - mediatek,mt8192-m4u # generation two + + - description: mt7623 generation one + items: + - const: mediatek,mt7623-m4u + - const: mediatek,mt2701-m4u + + reg: + maxItems: 1 + + interrupts: + maxItems: 1 + + clocks: + items: + - description: bclk is the block clock. + + clock-names: + items: + - const: bclk + + mediatek,larbs: + $ref: /schemas/types.yaml#/definitions/phandle-array + minItems: 1 + maxItems: 32 + description: | + List of phandle to the local arbiters in the current Socs. + Refer to bindings/memory-controllers/mediatek,smi-larb.yaml. It must sort + according to the local arbiter index, like larb0, larb1, larb2... + + '#iommu-cells': + const: 1 + description: | + This is the mtk_m4u_id according to the HW. Specifies the mtk_m4u_id as + defined in + dt-binding/memory/mt2701-larb-port.h for mt2701 and mt7623, + dt-binding/memory/mt2712-larb-port.h for mt2712, + dt-binding/memory/mt6779-larb-port.h for mt6779, + dt-binding/memory/mt8167-larb-port.h for mt8167, + dt-binding/memory/mt8173-larb-port.h for mt8173, + dt-binding/memory/mt8183-larb-port.h for mt8183, + dt-binding/memory/mt8192-larb-port.h for mt8192. + + power-domains: + maxItems: 1 + +required: + - compatible + - reg + - interrupts + - mediatek,larbs + - '#iommu-cells' + +allOf: + - if: + properties: + compatible: + contains: + enum: + - mediatek,mt2701-m4u + - mediatek,mt2712-m4u + - mediatek,mt8173-m4u + - mediatek,mt8192-m4u + + then: + required: + - clocks + + - if: + properties: + compatible: + enum: + - mediatek,mt8192-m4u + + then: + required: + - power-domains + +additionalProperties: false + +examples: + - | + #include + #include + + iommu: iommu@10205000 { + compatible = "mediatek,mt8173-m4u"; + reg = <0x10205000 0x1000>; + interrupts = ; + clocks = <&infracfg CLK_INFRA_M4U>; + clock-names = "bclk"; + mediatek,larbs = <&larb0 &larb1 &larb2 + &larb3 &larb4 &larb5>; + #iommu-cells = <1>; + }; + + - | + #include + + /* Example for a client device */ + display { + compatible = "mediatek,mt8173-disp"; + iommus = <&iommu M4U_PORT_DISP_OVL0>, + <&iommu M4U_PORT_DISP_RDMA0>; + }; diff --git a/Documentation/devicetree/usage-model.rst b/Documentation/devicetree/usage-model.rst index e1b42dc63f01..1eb83496ca1e 100644 --- a/Documentation/devicetree/usage-model.rst +++ b/Documentation/devicetree/usage-model.rst @@ -12,7 +12,7 @@ This article describes how Linux uses the device tree. An overview of the device tree data format can be found on the device tree usage page at devicetree.org\ [1]_. -.. [1] https://elinux.org/Device_Tree_Usage +.. [1] https://www.devicetree.org/specifications/ The "Open Firmware Device Tree", or simply Device Tree (DT), is a data structure and language for describing hardware. More specifically, it diff --git a/Documentation/doc-guide/sphinx.rst b/Documentation/doc-guide/sphinx.rst index 36ac2166ad67..ec3e71f56009 100644 --- a/Documentation/doc-guide/sphinx.rst +++ b/Documentation/doc-guide/sphinx.rst @@ -340,16 +340,26 @@ Rendered as: Cross-referencing ----------------- -Cross-referencing from one documentation page to another can be done by passing -the path to the file starting from the Documentation folder. -For example, to cross-reference to this page (the .rst extension is optional):: +Cross-referencing from one documentation page to another can be done simply by +writing the path to the document file, no special syntax required. The path can +be either absolute or relative. For absolute paths, start it with +"Documentation/". For example, to cross-reference to this page, all the +following are valid options, depending on the current document's directory (note +that the ``.rst`` extension is required):: - See Documentation/doc-guide/sphinx.rst. + See Documentation/doc-guide/sphinx.rst. This always works. + Take a look at sphinx.rst, which is at this same directory. + Read ../sphinx.rst, which is one directory above. -If you want to use a relative path, you need to use Sphinx's ``doc`` directive. -For example, referencing this page from the same directory would be done as:: +If you want the link to have a different rendered text other than the document's +title, you need to use Sphinx's ``doc`` role. For example:: - See :doc:`sphinx`. + See :doc:`my custom link text for document sphinx `. + +For most use cases, the former is preferred, as it is cleaner and more suited +for people reading the source files. If you come across a ``:doc:`` usage that +isn't adding any value, please feel free to convert it to just the document +path. For information on cross-referencing to kernel-doc functions or types, see Documentation/doc-guide/kernel-doc.rst. diff --git a/Documentation/driver-api/gpio/driver.rst b/Documentation/driver-api/gpio/driver.rst index 0fb57e298b41..d6b0d779859b 100644 --- a/Documentation/driver-api/gpio/driver.rst +++ b/Documentation/driver-api/gpio/driver.rst @@ -640,8 +640,8 @@ compliance: level and edge IRQs * [1] http://www.spinics.net/lists/linux-omap/msg120425.html -* [2] https://lkml.org/lkml/2015/9/25/494 -* [3] https://lkml.org/lkml/2015/9/25/495 +* [2] https://lore.kernel.org/r/1443209283-20781-2-git-send-email-grygorii.strashko@ti.com +* [3] https://lore.kernel.org/r/1443209283-20781-3-git-send-email-grygorii.strashko@ti.com Requesting self-owned GPIO pins diff --git a/Documentation/driver-api/men-chameleon-bus.rst b/Documentation/driver-api/men-chameleon-bus.rst index 1b1f048aa748..6f0b9ee47595 100644 --- a/Documentation/driver-api/men-chameleon-bus.rst +++ b/Documentation/driver-api/men-chameleon-bus.rst @@ -18,6 +18,7 @@ MEN Chameleon Bus 4.1 The driver structure 4.2 Probing and attaching 4.3 Initializing the driver + 4.4 Using DMA Introduction @@ -173,3 +174,14 @@ module at the MCB core:: The module_mcb_driver() macro can be used to reduce the above code:: module_mcb_driver(foo_driver); + +Using DMA +--------- + +To make use of the kernel's DMA-API's function, you will need to use the +carrier device's 'struct device'. Fortunately 'struct mcb_device' embeds a +pointer (->dma_dev) to the carrier's device for DMA purposes:: + + ret = dma_set_mask_and_coherent(&mdev->dma_dev, DMA_BIT_MASK(dma_bits)); + if (rc) + /* Handle errors */ diff --git a/Documentation/driver-api/thermal/sysfs-api.rst b/Documentation/driver-api/thermal/sysfs-api.rst index a4969c474cc3..29fdd817ddb0 100644 --- a/Documentation/driver-api/thermal/sysfs-api.rst +++ b/Documentation/driver-api/thermal/sysfs-api.rst @@ -54,7 +54,7 @@ temperature) and throttle appropriate devices. trips: the total number of trip points this thermal zone supports. mask: - Bit string: If 'n'th bit is set, then trip point 'n' is writeable. + Bit string: If 'n'th bit is set, then trip point 'n' is writable. devdata: device private data ops: @@ -406,7 +406,7 @@ Thermal cooling device sys I/F, created once it's registered:: |---stats/reset: Writing any value resets the statistics |---stats/time_in_state_ms: Time (msec) spent in various cooling states |---stats/total_trans: Total number of times cooling state is changed - |---stats/trans_table: Cooing state transition table + |---stats/trans_table: Cooling state transition table Then next two dynamic attributes are created/removed in pairs. They represent @@ -766,5 +766,5 @@ emergency poweroff kicks in after the delay has elapsed and shuts down the system. If set to 0 emergency poweroff will not be supported. So a carefully -profiled non-zero positive value is a must for emergerncy poweroff to be +profiled non-zero positive value is a must for emergency poweroff to be triggered. diff --git a/Documentation/filesystems/afs.rst b/Documentation/filesystems/afs.rst index 0abb155ac666..ca062a7f8ee2 100644 --- a/Documentation/filesystems/afs.rst +++ b/Documentation/filesystems/afs.rst @@ -109,7 +109,7 @@ Mountpoints AFS has a concept of mountpoints. In AFS terms, these are specially formatted symbolic links (of the same form as the "device name" passed to mount). kAFS presents these to the user as directories that have a follow-link capability -(ie: symbolic link semantics). If anyone attempts to access them, they will +(i.e.: symbolic link semantics). If anyone attempts to access them, they will automatically cause the target volume to be mounted (if possible) on that site. Automatically mounted filesystems will be automatically unmounted approximately @@ -144,7 +144,7 @@ looks up a cell of the same name, for example:: Proc Filesystem =============== -The AFS modules creates a "/proc/fs/afs/" directory and populates it: +The AFS module creates a "/proc/fs/afs/" directory and populates it: (*) A "cells" file that lists cells currently known to the afs module and their usage counts:: @@ -201,7 +201,7 @@ And then run as:: ./klog Assuming it's successful, this adds a key of type RxRPC, named for the service -and cell, eg: "afs@". This can be viewed with the keyctl program or +and cell, e.g.: "afs@". This can be viewed with the keyctl program or by cat'ing /proc/keys:: [root@andromeda ~]# keyctl show @@ -211,7 +211,7 @@ by cat'ing /proc/keys:: 111416553 --als--v 0 0 \_ rxrpc: afs@CAMBRIDGE.REDHAT.COM Currently the username, realm, password and proposed ticket lifetime are -compiled in to the program. +compiled into the program. It is not required to acquire a key before using AFS facilities, but if one is not acquired then all operations will be governed by the anonymous user parts diff --git a/Documentation/filesystems/dax.txt b/Documentation/filesystems/dax.txt index 8fdb78f3c6c9..e03c20564f3a 100644 --- a/Documentation/filesystems/dax.txt +++ b/Documentation/filesystems/dax.txt @@ -83,20 +83,9 @@ Summary directories. This has runtime constraints and limitations that are described in 6) below. - 6. When changing the S_DAX policy via toggling the persistent FS_XFLAG_DAX flag, - the change in behaviour for existing regular files may not occur - immediately. If the change must take effect immediately, the administrator - needs to: - - a) stop the application so there are no active references to the data set - the policy change will affect - - b) evict the data set from kernel caches so it will be re-instantiated when - the application is restarted. This can be achieved by: - - i. drop-caches - ii. a filesystem unmount and mount cycle - iii. a system reboot + 6. When changing the S_DAX policy via toggling the persistent FS_XFLAG_DAX + flag, the change to existing regular files won't take effect until the + files are closed by all processes. Details diff --git a/Documentation/filesystems/index.rst b/Documentation/filesystems/index.rst index 7be9b46d85d9..1f76b1cb3348 100644 --- a/Documentation/filesystems/index.rst +++ b/Documentation/filesystems/index.rst @@ -83,6 +83,7 @@ Documentation for filesystem implementations. erofs ext2 ext3 + ext4/index f2fs gfs2 gfs2-uevents diff --git a/Documentation/filesystems/proc.rst b/Documentation/filesystems/proc.rst index 66ca585a953d..1bfea7de8055 100644 --- a/Documentation/filesystems/proc.rst +++ b/Documentation/filesystems/proc.rst @@ -693,7 +693,10 @@ files are there, and which are missing. kcore Kernel core image (can be ELF or A.OUT(deprecated in 2.4)) kmsg Kernel messages ksyms Kernel symbol table - loadavg Load average of last 1, 5 & 15 minutes + loadavg Load average of last 1, 5 & 15 minutes; + number of processes currently runnable (running or on ready queue); + total number of processes in system; + last pid created. locks Kernel locks meminfo Memory info misc Miscellaneous diff --git a/Documentation/filesystems/vfs.rst b/Documentation/filesystems/vfs.rst index ca52c82e5bb5..18d69a4559d6 100644 --- a/Documentation/filesystems/vfs.rst +++ b/Documentation/filesystems/vfs.rst @@ -112,7 +112,7 @@ members are defined: .. code-block:: c - struct file_system_operations { + struct file_system_type { const char *name; int fs_flags; struct dentry *(*mount) (struct file_system_type *, int, diff --git a/Documentation/gpu/todo.rst b/Documentation/gpu/todo.rst index 77fbfe93df56..40ccac61137e 100644 --- a/Documentation/gpu/todo.rst +++ b/Documentation/gpu/todo.rst @@ -688,7 +688,7 @@ for fbdev. https://patchwork.freedesktop.org/patch/306579/ - [RFC PATCH v2 00/13] Kernel based bootsplash - https://lkml.org/lkml/2017/12/13/764 + https://lore.kernel.org/r/20171213194755.3409-1-mstaudt@suse.de Contact: Sam Ravnborg diff --git a/Documentation/iio/ep93xx_adc.rst b/Documentation/iio/ep93xx_adc.rst index 4fd8dea3f6b8..0af0e9040457 100644 --- a/Documentation/iio/ep93xx_adc.rst +++ b/Documentation/iio/ep93xx_adc.rst @@ -13,7 +13,7 @@ touchscreen/ADC module. ==================== Numbering scheme for channels 0..4 is defined in EP9301 and EP9302 datasheets. -EP9307, EP9312 and EP9312 have 3 channels more (total 8), but the numbering is +EP9307, EP9312 and EP9315 have 3 channels more (total 8), but the numbering is not defined. So the last three are numbered randomly, let's say. Assuming ep93xx_adc is IIO device0, you'd find the following entries under diff --git a/Documentation/index.rst b/Documentation/index.rst index 5888e8a7272f..31f2adc8542d 100644 --- a/Documentation/index.rst +++ b/Documentation/index.rst @@ -171,17 +171,6 @@ implementation. x86/index xtensa/index -Filesystem Documentation ------------------------- - -The documentation in this section are provided by specific filesystem -subprojects. - -.. toctree:: - :maxdepth: 2 - - filesystems/ext4/index - Other documentation ------------------- diff --git a/Documentation/input/event-codes.rst b/Documentation/input/event-codes.rst index b24b5343f5eb..3118fc1c1e26 100644 --- a/Documentation/input/event-codes.rst +++ b/Documentation/input/event-codes.rst @@ -236,6 +236,21 @@ A few EV_ABS codes have special meanings: - Used to describe multitouch input events. Please see multi-touch-protocol.txt for details. +* ABS_PRESSURE/ABS_MT_PRESSURE: + + - For touch devices, many devices converted contact size into pressure. + A finger flattens with pressure, causing a larger contact area and thus + pressure and contact size are directly related. This is not the case + for other devices, for example digitizers and touchpads with a true + pressure sensor ("pressure pads"). + + A device should set the resolution of the axis to indicate whether the + pressure is in measurable units. If the resolution is zero, the + pressure data is in arbitrary units. If the resolution is nonzero, the + pressure data is in units/gram. For example, a value of 10 with a + resolution of 1 represents 10 gram, a value of 10 with a resolution on + 1000 represents 10 microgram. + EV_SW ----- diff --git a/Documentation/input/multi-touch-protocol.rst b/Documentation/input/multi-touch-protocol.rst index 307fe22d9668..21c1e6a22888 100644 --- a/Documentation/input/multi-touch-protocol.rst +++ b/Documentation/input/multi-touch-protocol.rst @@ -260,6 +260,10 @@ ABS_MT_PRESSURE of TOUCH and WIDTH for pressure-based devices or any device with a spatial signal intensity distribution. + If the resolution is zero, the pressure data is in arbitrary units. + If the resolution is nonzero, the pressure data is in units/gram. See + :ref:`input-event-codes` for details. + ABS_MT_DISTANCE The distance, in surface units, between the contact and the surface. Zero distance means the contact is touching the surface. A positive number means diff --git a/Documentation/kernel-hacking/hacking.rst b/Documentation/kernel-hacking/hacking.rst index eed2136d847f..451523424942 100644 --- a/Documentation/kernel-hacking/hacking.rst +++ b/Documentation/kernel-hacking/hacking.rst @@ -346,8 +346,8 @@ routine. Before inventing your own cache of often-used objects consider using a slab cache in ``include/linux/slab.h`` -:c:func:`current()` -------------------- +:c:macro:`current` +------------------ Defined in ``include/asm/current.h`` diff --git a/Documentation/kernel-hacking/locking.rst b/Documentation/kernel-hacking/locking.rst index c3448929a824..ed1284c6f078 100644 --- a/Documentation/kernel-hacking/locking.rst +++ b/Documentation/kernel-hacking/locking.rst @@ -958,7 +958,7 @@ grabs a read lock, searches a list, fails to find what it wants, drops the read lock, grabs a write lock and inserts the object has a race condition. -If you don't see why, please stay the fuck away from my code. +If you don't see why, please stay away from my code. Racing Timers: A Kernel Pastime ------------------------------- diff --git a/Documentation/livepatch/index.rst b/Documentation/livepatch/index.rst index 525944063be7..43cce5fad705 100644 --- a/Documentation/livepatch/index.rst +++ b/Documentation/livepatch/index.rst @@ -13,6 +13,7 @@ Kernel Livepatching module-elf-format shadow-vars system-state + reliable-stacktrace .. only:: subproject and html diff --git a/Documentation/livepatch/livepatch.rst b/Documentation/livepatch/livepatch.rst index c2c598c4ead8..68e3651e8af9 100644 --- a/Documentation/livepatch/livepatch.rst +++ b/Documentation/livepatch/livepatch.rst @@ -6,20 +6,7 @@ This document outlines basic information about kernel livepatching. .. Table of Contents: - 1. Motivation - 2. Kprobes, Ftrace, Livepatching - 3. Consistency model - 4. Livepatch module - 4.1. New functions - 4.2. Metadata - 5. Livepatch life-cycle - 5.1. Loading - 5.2. Enabling - 5.3. Replacing - 5.4. Disabling - 5.5. Removing - 6. Sysfs - 7. Limitations +.. contents:: :local: 1. Motivation diff --git a/Documentation/livepatch/module-elf-format.rst b/Documentation/livepatch/module-elf-format.rst index 8c6b894c4661..dbe9b400e39f 100644 --- a/Documentation/livepatch/module-elf-format.rst +++ b/Documentation/livepatch/module-elf-format.rst @@ -7,14 +7,8 @@ This document outlines the Elf format requirements that livepatch modules must f .. Table of Contents - 1. Background and motivation - 2. Livepatch modinfo field - 3. Livepatch relocation sections - 3.1 Livepatch relocation section format - 4. Livepatch symbols - 4.1 A livepatch module's symbol table - 4.2 Livepatch symbol format - 5. Symbol table and Elf section access +.. contents:: :local: + 1. Background and motivation ============================ diff --git a/Documentation/livepatch/reliable-stacktrace.rst b/Documentation/livepatch/reliable-stacktrace.rst new file mode 100644 index 000000000000..67459d2ca2af --- /dev/null +++ b/Documentation/livepatch/reliable-stacktrace.rst @@ -0,0 +1,309 @@ +=================== +Reliable Stacktrace +=================== + +This document outlines basic information about reliable stacktracing. + +.. Table of Contents: + +.. contents:: :local: + +1. Introduction +=============== + +The kernel livepatch consistency model relies on accurately identifying which +functions may have live state and therefore may not be safe to patch. One way +to identify which functions are live is to use a stacktrace. + +Existing stacktrace code may not always give an accurate picture of all +functions with live state, and best-effort approaches which can be helpful for +debugging are unsound for livepatching. Livepatching depends on architectures +to provide a *reliable* stacktrace which ensures it never omits any live +functions from a trace. + + +2. Requirements +=============== + +Architectures must implement one of the reliable stacktrace functions. +Architectures using CONFIG_ARCH_STACKWALK must implement +'arch_stack_walk_reliable', and other architectures must implement +'save_stack_trace_tsk_reliable'. + +Principally, the reliable stacktrace function must ensure that either: + +* The trace includes all functions that the task may be returned to, and the + return code is zero to indicate that the trace is reliable. + +* The return code is non-zero to indicate that the trace is not reliable. + +.. note:: + In some cases it is legitimate to omit specific functions from the trace, + but all other functions must be reported. These cases are described in + futher detail below. + +Secondly, the reliable stacktrace function must be robust to cases where +the stack or other unwind state is corrupt or otherwise unreliable. The +function should attempt to detect such cases and return a non-zero error +code, and should not get stuck in an infinite loop or access memory in +an unsafe way. Specific cases are described in further detail below. + + +3. Compile-time analysis +======================== + +To ensure that kernel code can be correctly unwound in all cases, +architectures may need to verify that code has been compiled in a manner +expected by the unwinder. For example, an unwinder may expect that +functions manipulate the stack pointer in a limited way, or that all +functions use specific prologue and epilogue sequences. Architectures +with such requirements should verify the kernel compilation using +objtool. + +In some cases, an unwinder may require metadata to correctly unwind. +Where necessary, this metadata should be generated at build time using +objtool. + + +4. Considerations +================= + +The unwinding process varies across architectures, their respective procedure +call standards, and kernel configurations. This section describes common +details that architectures should consider. + +4.1 Identifying successful termination +-------------------------------------- + +Unwinding may terminate early for a number of reasons, including: + +* Stack or frame pointer corruption. + +* Missing unwind support for an uncommon scenario, or a bug in the unwinder. + +* Dynamically generated code (e.g. eBPF) or foreign code (e.g. EFI runtime + services) not following the conventions expected by the unwinder. + +To ensure that this does not result in functions being omitted from the trace, +even if not caught by other checks, it is strongly recommended that +architectures verify that a stacktrace ends at an expected location, e.g. + +* Within a specific function that is an entry point to the kernel. + +* At a specific location on a stack expected for a kernel entry point. + +* On a specific stack expected for a kernel entry point (e.g. if the + architecture has separate task and IRQ stacks). + +4.2 Identifying unwindable code +------------------------------- + +Unwinding typically relies on code following specific conventions (e.g. +manipulating a frame pointer), but there can be code which may not follow these +conventions and may require special handling in the unwinder, e.g. + +* Exception vectors and entry assembly. + +* Procedure Linkage Table (PLT) entries and veneer functions. + +* Trampoline assembly (e.g. ftrace, kprobes). + +* Dynamically generated code (e.g. eBPF, optprobe trampolines). + +* Foreign code (e.g. EFI runtime services). + +To ensure that such cases do not result in functions being omitted from a +trace, it is strongly recommended that architectures positively identify code +which is known to be reliable to unwind from, and reject unwinding from all +other code. + +Kernel code including modules and eBPF can be distinguished from foreign code +using '__kernel_text_address()'. Checking for this also helps to detect stack +corruption. + +There are several ways an architecture may identify kernel code which is deemed +unreliable to unwind from, e.g. + +* Placing such code into special linker sections, and rejecting unwinding from + any code in these sections. + +* Identifying specific portions of code using bounds information. + +4.3 Unwinding across interrupts and exceptions +---------------------------------------------- + +At function call boundaries the stack and other unwind state is expected to be +in a consistent state suitable for reliable unwinding, but this may not be the +case part-way through a function. For example, during a function prologue or +epilogue a frame pointer may be transiently invalid, or during the function +body the return address may be held in an arbitrary general purpose register. +For some architectures this may change at runtime as a result of dynamic +instrumentation. + +If an interrupt or other exception is taken while the stack or other unwind +state is in an inconsistent state, it may not be possible to reliably unwind, +and it may not be possible to identify whether such unwinding will be reliable. +See below for examples. + +Architectures which cannot identify when it is reliable to unwind such cases +(or where it is never reliable) must reject unwinding across exception +boundaries. Note that it may be reliable to unwind across certain +exceptions (e.g. IRQ) but unreliable to unwind across other exceptions +(e.g. NMI). + +Architectures which can identify when it is reliable to unwind such cases (or +have no such cases) should attempt to unwind across exception boundaries, as +doing so can prevent unnecessarily stalling livepatch consistency checks and +permits livepatch transitions to complete more quickly. + +4.4 Rewriting of return addresses +--------------------------------- + +Some trampolines temporarily modify the return address of a function in order +to intercept when that function returns with a return trampoline, e.g. + +* An ftrace trampoline may modify the return address so that function graph + tracing can intercept returns. + +* A kprobes (or optprobes) trampoline may modify the return address so that + kretprobes can intercept returns. + +When this happens, the original return address will not be in its usual +location. For trampolines which are not subject to live patching, where an +unwinder can reliably determine the original return address and no unwind state +is altered by the trampoline, the unwinder may report the original return +address in place of the trampoline and report this as reliable. Otherwise, an +unwinder must report these cases as unreliable. + +Special care is required when identifying the original return address, as this +information is not in a consistent location for the duration of the entry +trampoline or return trampoline. For example, considering the x86_64 +'return_to_handler' return trampoline: + +.. code-block:: none + + SYM_CODE_START(return_to_handler) + UNWIND_HINT_EMPTY + subq $24, %rsp + + /* Save the return values */ + movq %rax, (%rsp) + movq %rdx, 8(%rsp) + movq %rbp, %rdi + + call ftrace_return_to_handler + + movq %rax, %rdi + movq 8(%rsp), %rdx + movq (%rsp), %rax + addq $24, %rsp + JMP_NOSPEC rdi + SYM_CODE_END(return_to_handler) + +While the traced function runs its return address on the stack points to +the start of return_to_handler, and the original return address is stored in +the task's cur_ret_stack. During this time the unwinder can find the return +address using ftrace_graph_ret_addr(). + +When the traced function returns to return_to_handler, there is no longer a +return address on the stack, though the original return address is still stored +in the task's cur_ret_stack. Within ftrace_return_to_handler(), the original +return address is removed from cur_ret_stack and is transiently moved +arbitrarily by the compiler before being returned in rax. The return_to_handler +trampoline moves this into rdi before jumping to it. + +Architectures might not always be able to unwind such sequences, such as when +ftrace_return_to_handler() has removed the address from cur_ret_stack, and the +location of the return address cannot be reliably determined. + +It is recommended that architectures unwind cases where return_to_handler has +not yet been returned to, but architectures are not required to unwind from the +middle of return_to_handler and can report this as unreliable. Architectures +are not required to unwind from other trampolines which modify the return +address. + +4.5 Obscuring of return addresses +--------------------------------- + +Some trampolines do not rewrite the return address in order to intercept +returns, but do transiently clobber the return address or other unwind state. + +For example, the x86_64 implementation of optprobes patches the probed function +with a JMP instruction which targets the associated optprobe trampoline. When +the probe is hit, the CPU will branch to the optprobe trampoline, and the +address of the probed function is not held in any register or on the stack. + +Similarly, the arm64 implementation of DYNAMIC_FTRACE_WITH_REGS patches traced +functions with the following: + +.. code-block:: none + + MOV X9, X30 + BL + +The MOV saves the link register (X30) into X9 to preserve the return address +before the BL clobbers the link register and branches to the trampoline. At the +start of the trampoline, the address of the traced function is in X9 rather +than the link register as would usually be the case. + +Architectures must either ensure that unwinders either reliably unwind +such cases, or report the unwinding as unreliable. + +4.6 Link register unreliability +------------------------------- + +On some other architectures, 'call' instructions place the return address into a +link register, and 'return' instructions consume the return address from the +link register without modifying the register. On these architectures software +must save the return address to the stack prior to making a function call. Over +the duration of a function call, the return address may be held in the link +register alone, on the stack alone, or in both locations. + +Unwinders typically assume the link register is always live, but this +assumption can lead to unreliable stack traces. For example, consider the +following arm64 assembly for a simple function: + +.. code-block:: none + + function: + STP X29, X30, [SP, -16]! + MOV X29, SP + BL + LDP X29, X30, [SP], #16 + RET + +At entry to the function, the link register (x30) points to the caller, and the +frame pointer (X29) points to the caller's frame including the caller's return +address. The first two instructions create a new stackframe and update the +frame pointer, and at this point the link register and the frame pointer both +describe this function's return address. A trace at this point may describe +this function twice, and if the function return is being traced, the unwinder +may consume two entries from the fgraph return stack rather than one entry. + +The BL invokes 'other_function' with the link register pointing to this +function's LDR and the frame pointer pointing to this function's stackframe. +When 'other_function' returns, the link register is left pointing at the BL, +and so a trace at this point could result in 'function' appearing twice in the +backtrace. + +Similarly, a function may deliberately clobber the LR, e.g. + +.. code-block:: none + + caller: + STP X29, X30, [SP, -16]! + MOV X29, SP + ADR LR, + BLR LR + LDP X29, X30, [SP], #16 + RET + +The ADR places the address of 'callee' into the LR, before the BLR branches to +this address. If a trace is made immediately after the ADR, 'callee' will +appear to be the parent of 'caller', rather than the child. + +Due to cases such as the above, it may only be possible to reliably consume a +link register value at a function call boundary. Architectures where this is +the case must reject unwinding across exception boundaries unless they can +reliably identify when the LR or stack value should be used (e.g. using +metadata generated by objtool). diff --git a/Documentation/power/freezing-of-tasks.rst b/Documentation/power/freezing-of-tasks.rst index 8bd693399834..53b6a56c4635 100644 --- a/Documentation/power/freezing-of-tasks.rst +++ b/Documentation/power/freezing-of-tasks.rst @@ -134,7 +134,7 @@ Generally speaking, there is a couple of reasons to use the freezing of tasks: safeguards against race conditions that might occur in such a case. Although Linus Torvalds doesn't like the freezing of tasks, he said this in one -of the discussions on LKML (http://lkml.org/lkml/2007/4/27/608): +of the discussions on LKML (https://lore.kernel.org/r/alpine.LFD.0.98.0704271801020.9964@woody.linux-foundation.org): "RJW:> Why we freeze tasks at all or why we freeze kernel threads? diff --git a/Documentation/process/adding-syscalls.rst b/Documentation/process/adding-syscalls.rst index a3ecb236576c..906c47f1a9e5 100644 --- a/Documentation/process/adding-syscalls.rst +++ b/Documentation/process/adding-syscalls.rst @@ -501,7 +501,7 @@ table, but not from elsewhere in the kernel. If the syscall functionality is useful to be used within the kernel, needs to be shared between an old and a new syscall, or needs to be shared between a syscall and its compatibility variant, it should be implemented by means of a "helper" function (such as -``kern_xyzzy()``). This kernel function may then be called within the +``ksys_xyzzy()``). This kernel function may then be called within the syscall stub (``sys_xyzzy()``), the compatibility syscall stub (``compat_sys_xyzzy()``), and/or other kernel code. @@ -548,18 +548,18 @@ References and Sources https://lwn.net/Articles/486306/ - Recommendation from Andrew Morton that all related information for a new system call should come in the same email thread: - https://lkml.org/lkml/2014/7/24/641 + https://lore.kernel.org/r/20140724144747.3041b208832bbdf9fbce5d96@linux-foundation.org - Recommendation from Michael Kerrisk that a new system call should come with - a man page: https://lkml.org/lkml/2014/6/13/309 + a man page: https://lore.kernel.org/r/CAKgNAkgMA39AfoSoA5Pe1r9N+ZzfYQNvNPvcRN7tOvRb8+v06Q@mail.gmail.com - Suggestion from Thomas Gleixner that x86 wire-up should be in a separate - commit: https://lkml.org/lkml/2014/11/19/254 + commit: https://lore.kernel.org/r/alpine.DEB.2.11.1411191249560.3909@nanos - Suggestion from Greg Kroah-Hartman that it's good for new system calls to - come with a man-page & selftest: https://lkml.org/lkml/2014/3/19/710 + come with a man-page & selftest: https://lore.kernel.org/r/20140320025530.GA25469@kroah.com - Discussion from Michael Kerrisk of new system call vs. :manpage:`prctl(2)` extension: - https://lkml.org/lkml/2014/6/3/411 + https://lore.kernel.org/r/CAHO5Pa3F2MjfTtfNxa8LbnkeeU8=YJ+9tDqxZpw7Gz59E-4AUg@mail.gmail.com - Suggestion from Ingo Molnar that system calls that involve multiple arguments should encapsulate those arguments in a struct, which includes a - size field for future extensibility: https://lkml.org/lkml/2015/7/30/117 + size field for future extensibility: https://lore.kernel.org/r/20150730083831.GA22182@gmail.com - Numbering oddities arising from (re-)use of O_* numbering space flags: - commit 75069f2b5bfb ("vfs: renumber FMODE_NONOTIFY and add to uniqueness @@ -569,9 +569,9 @@ References and Sources - commit bb458c644a59 ("Safer ABI for O_TMPFILE") - Discussion from Matthew Wilcox about restrictions on 64-bit arguments: - https://lkml.org/lkml/2008/12/12/187 + https://lore.kernel.org/r/20081212152929.GM26095@parisc-linux.org - Recommendation from Greg Kroah-Hartman that unknown flags should be - policed: https://lkml.org/lkml/2014/7/17/577 + policed: https://lore.kernel.org/r/20140717193330.GB4703@kroah.com - Recommendation from Linus Torvalds that x32 system calls should prefer compatibility with 64-bit versions rather than 32-bit versions: - https://lkml.org/lkml/2011/8/31/244 + https://lore.kernel.org/r/CA+55aFxfmwfB7jbbrXxa=K7VBYPfAvmu3XOkGrLbB1UFjX1+Ew@mail.gmail.com diff --git a/Documentation/process/coding-style.rst b/Documentation/process/coding-style.rst index 98227226c4e5..42969ab37b34 100644 --- a/Documentation/process/coding-style.rst +++ b/Documentation/process/coding-style.rst @@ -69,9 +69,26 @@ something to hide: if (condition) do_this; do_something_everytime; +Don't use commas to avoid using braces: + +.. code-block:: c + + if (condition) + do_this(), do_that(); + +Always uses braces for multiple statements: + +.. code-block:: c + + if (condition) { + do_this(); + do_that(); + } + Don't put multiple assignments on a single line either. Kernel coding style is super simple. Avoid tricky expressions. + Outside of comments, documentation and except in Kconfig, spaces are never used for indentation, and the above example is deliberately broken. @@ -306,8 +323,7 @@ that counts the number of active users, you should call that Encoding the type of a function into the name (so-called Hungarian notation) is asinine - the compiler knows the types anyway and can check -those, and it only confuses the programmer. No wonder Microsoft makes buggy -programs. +those, and it only confuses the programmer. LOCAL variable names should be short, and to the point. If you have some random integer loop counter, it should probably be called ``i``. diff --git a/Documentation/process/howto.rst b/Documentation/process/howto.rst index 7a5c105e34d4..e4beeca57e5f 100644 --- a/Documentation/process/howto.rst +++ b/Documentation/process/howto.rst @@ -342,16 +342,10 @@ Adventurous testers are very welcome to runtime-test the linux-next. Bug Reporting ------------- -https://bugzilla.kernel.org is where the Linux kernel developers track kernel -bugs. Users are encouraged to report all bugs that they find in this -tool. For details on how to use the kernel bugzilla, please see: - - https://bugzilla.kernel.org/page.cgi?id=faq.html - The file 'Documentation/admin-guide/reporting-issues.rst' in the main kernel -source directory has a good template for how to report a possible kernel bug, -and details what kind of information is needed by the kernel developers to help -track down the problem. +source directory describes how to report a possible kernel bug, and details +what kind of information is needed by the kernel developers to help track +down the problem. Managing bug reports @@ -364,7 +358,13 @@ improve your skills, and other developers will be aware of your presence. Fixing bugs is one of the best ways to get merits among other developers, because not many people like wasting time fixing other people's bugs. -To work in the already reported bug reports, go to https://bugzilla.kernel.org. +To work on already reported bug reports, find a subsystem you are interested in. +Check the MAINTAINERS file where bugs for that subsystem get reported to; often +it will be a mailing list, rarely a bugtracker. Search the archives of said +place for recent reports and help where you see fit. You may also want to check +https://bugzilla.kernel.org for bug reports; only a handful of kernel subsystems +use it actively for reporting or tracking, nevertheless bugs for the whole +kernel get filed there. Mailing lists diff --git a/Documentation/process/submit-checklist.rst b/Documentation/process/submit-checklist.rst index 230ee42f872f..f709beaf02c9 100644 --- a/Documentation/process/submit-checklist.rst +++ b/Documentation/process/submit-checklist.rst @@ -89,30 +89,28 @@ and elsewhere regarding submitting Linux kernel patches. Patches that change userspace interfaces should be CCed to linux-api@vger.kernel.org. -19) Check that it all passes ``make headers_check``. - -20) Has been checked with injection of at least slab and page-allocation +19) Has been checked with injection of at least slab and page-allocation failures. See ``Documentation/fault-injection/``. If the new code is substantial, addition of subsystem-specific fault injection might be appropriate. -21) Newly-added code has been compiled with ``gcc -W`` (use +20) Newly-added code has been compiled with ``gcc -W`` (use ``make EXTRA_CFLAGS=-W``). This will generate lots of noise, but is good for finding bugs like "warning: comparison between signed and unsigned". -22) Tested after it has been merged into the -mm patchset to make sure +21) Tested after it has been merged into the -mm patchset to make sure that it still works with all of the other queued patches and various changes in the VM, VFS, and other subsystems. -23) All memory barriers {e.g., ``barrier()``, ``rmb()``, ``wmb()``} need a +22) All memory barriers {e.g., ``barrier()``, ``rmb()``, ``wmb()``} need a comment in the source code that explains the logic of what they are doing and why. -24) If any ioctl's are added by the patch, then also update +23) If any ioctl's are added by the patch, then also update ``Documentation/userspace-api/ioctl/ioctl-number.rst``. -25) If your modified source code depends on or uses any of the kernel +24) If your modified source code depends on or uses any of the kernel APIs or features that are related to the following ``Kconfig`` symbols, then test multiple builds with the related ``Kconfig`` symbols disabled and/or ``=m`` (if that option is available) [not all of these at the diff --git a/Documentation/process/submitting-patches.rst b/Documentation/process/submitting-patches.rst index 5ba54120bef7..8c991c863628 100644 --- a/Documentation/process/submitting-patches.rst +++ b/Documentation/process/submitting-patches.rst @@ -556,6 +556,11 @@ which stable kernel versions should receive your fix. This is the preferred method for indicating a bug fixed by the patch. See :ref:`describe_changes` for more details. +Note: Attaching a Fixes: tag does not subvert the stable kernel rules +process nor the requirement to Cc: stable@vger.kernel.org on all stable +patch candidates. For more information, please read +:ref:`Documentation/process/stable-kernel-rules.rst ` + .. _the_canonical_patch_format: The canonical patch format @@ -679,6 +684,26 @@ generates appropriate diffstats by default.) See more details on the proper patch format in the following references. +Backtraces in commit mesages +^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Backtraces help document the call chain leading to a problem. However, +not all backtraces are helpful. For example, early boot call chains are +unique and obvious. Copying the full dmesg output verbatim, however, +adds distracting information like timestamps, module lists, register and +stack dumps. + +Therefore, the most useful backtraces should distill the relevant +information from the dump, which makes it easier to focus on the real +issue. Here is an example of a well-trimmed backtrace:: + + unchecked MSR access error: WRMSR to 0xd51 (tried to write 0x0000000000000064) + at rIP: 0xffffffffae059994 (native_write_msr+0x4/0x20) + Call Trace: + mba_wrmsr + update_domains + rdtgroup_mkdir + .. _explicit_in_reply_to: Explicit In-Reply-To headers @@ -769,13 +794,13 @@ Greg Kroah-Hartman, "How to piss off a kernel subsystem maintainer". NO!!!! No more huge patch bombs to linux-kernel@vger.kernel.org people! - + Kernel Documentation/process/coding-style.rst: :ref:`Documentation/process/coding-style.rst ` Linus Torvalds's mail on the canonical patch format: - + Andi Kleen, "On submitting kernel patches" Some strategies to get difficult or controversial changes in. diff --git a/Documentation/scheduler/sched-bwc.rst b/Documentation/scheduler/sched-bwc.rst index 9801d6b284b1..845eee659199 100644 --- a/Documentation/scheduler/sched-bwc.rst +++ b/Documentation/scheduler/sched-bwc.rst @@ -2,8 +2,9 @@ CFS Bandwidth Control ===================== -[ This document only discusses CPU bandwidth control for SCHED_NORMAL. - The SCHED_RT case is covered in Documentation/scheduler/sched-rt-group.rst ] +.. note:: + This document only discusses CPU bandwidth control for SCHED_NORMAL. + The SCHED_RT case is covered in Documentation/scheduler/sched-rt-group.rst CFS bandwidth control is a CONFIG_FAIR_GROUP_SCHED extension which allows the specification of the maximum CPU bandwidth available to a group or hierarchy. @@ -25,9 +26,15 @@ Management ---------- Quota and period are managed within the cpu subsystem via cgroupfs. -cpu.cfs_quota_us: the total available run-time within a period (in microseconds) -cpu.cfs_period_us: the length of a period (in microseconds) -cpu.stat: exports throttling statistics [explained further below] +.. note:: + The cgroupfs files described in this section are only applicable + to cgroup v1. For cgroup v2, see + :ref:`Documentation/admin-guide/cgroupv2.rst `. + +- cpu.cfs_quota_us: the total available run-time within a period (in + microseconds) +- cpu.cfs_period_us: the length of a period (in microseconds) +- cpu.stat: exports throttling statistics [explained further below] The default values are:: diff --git a/Documentation/scheduler/sched-deadline.rst b/Documentation/scheduler/sched-deadline.rst index 14a2f7bf63fe..9d9be52f221a 100644 --- a/Documentation/scheduler/sched-deadline.rst +++ b/Documentation/scheduler/sched-deadline.rst @@ -707,7 +707,7 @@ Deadline Task Scheduling and how to prevent non-root users "cheat" the system? As already discussed, we are planning also to merge this work with the EDF - throttling patches [https://lkml.org/lkml/2010/2/23/239] but we still are in + throttling patches [https://lore.kernel.org/r/cover.1266931410.git.fabio@helm.retis] but we still are in the preliminary phases of the merge and we really seek feedback that would help us decide on the direction it should take. diff --git a/Documentation/scheduler/sched-design-CFS.rst b/Documentation/scheduler/sched-design-CFS.rst index a96c72651877..59b2d1fb4dc4 100644 --- a/Documentation/scheduler/sched-design-CFS.rst +++ b/Documentation/scheduler/sched-design-CFS.rst @@ -34,9 +34,9 @@ In CFS the virtual runtime is expressed and tracked via the per-task p->se.vruntime (nanosec-unit) value. This way, it's possible to accurately timestamp and measure the "expected CPU time" a task should have gotten. -[ small detail: on "ideal" hardware, at any time all tasks would have the same - p->se.vruntime value --- i.e., tasks would execute simultaneously and no task - would ever get "out of balance" from the "ideal" share of CPU time. ] + Small detail: on "ideal" hardware, at any time all tasks would have the same + p->se.vruntime value --- i.e., tasks would execute simultaneously and no task + would ever get "out of balance" from the "ideal" share of CPU time. CFS's task picking logic is based on this p->se.vruntime value and it is thus very simple: it always tries to run the task with the smallest p->se.vruntime diff --git a/Documentation/security/lsm-development.rst b/Documentation/security/lsm-development.rst index 31d92bc5fdd2..ac53e5065f79 100644 --- a/Documentation/security/lsm-development.rst +++ b/Documentation/security/lsm-development.rst @@ -2,7 +2,7 @@ Linux Security Module Development ================================= -Based on https://lkml.org/lkml/2007/10/26/215, +Based on https://lore.kernel.org/r/20071026073721.618b4778@laptopd505.fenrus.org, a new LSM is accepted into the kernel when its intent (a description of what it tries to protect against and in what cases one would expect to use it) has been appropriately documented in ``Documentation/admin-guide/LSM/``. diff --git a/Documentation/sphinx/automarkup.py b/Documentation/sphinx/automarkup.py index 953b24b6e2b4..acf5473002f3 100644 --- a/Documentation/sphinx/automarkup.py +++ b/Documentation/sphinx/automarkup.py @@ -51,7 +51,7 @@ RE_typedef = re.compile(r'\b(typedef)\s+([a-zA-Z_]\w+)', flags=ascii_p3) # Detects a reference to a documentation page of the form Documentation/... with # an optional extension # -RE_doc = re.compile(r'\bDocumentation(/[\w\-_/]+)(\.\w+)*') +RE_doc = re.compile(r'(\bDocumentation/)?((\.\./)*[\w\-/]+)\.(rst|txt)') RE_namespace = re.compile(r'^\s*..\s*c:namespace::\s*(\S+)\s*$') @@ -234,7 +234,10 @@ def markup_doc_ref(docname, app, match): # # Go through the dance of getting an xref out of the std domain # - target = match.group(1) + absolute = match.group(1) + target = match.group(2) + if absolute: + target = "/" + target xref = None pxref = addnodes.pending_xref('', refdomain = 'std', reftype = 'doc', reftarget = target, modname = None, diff --git a/Documentation/sphinx/cdomain.py b/Documentation/sphinx/cdomain.py index 014a5229e57a..ca8ac9e59ded 100644 --- a/Documentation/sphinx/cdomain.py +++ b/Documentation/sphinx/cdomain.py @@ -236,13 +236,7 @@ class CObject(Base_CObject): indextext = self.get_index_text(name) if indextext: - if major == 1 and minor < 4: - # indexnode's tuple changed in 1.4 - # https://github.com/sphinx-doc/sphinx/commit/e6a5a3a92e938fcd75866b4227db9e0524d58f7c - self.indexnode['entries'].append( - ('single', indextext, targetname, '')) - else: - self.indexnode['entries'].append( + self.indexnode['entries'].append( ('single', indextext, targetname, '', None)) class CDomain(Base_CDomain): diff --git a/Documentation/sphinx/kernel_abi.py b/Documentation/sphinx/kernel_abi.py index f3da859c9878..efe760e410c4 100644 --- a/Documentation/sphinx/kernel_abi.py +++ b/Documentation/sphinx/kernel_abi.py @@ -45,17 +45,7 @@ from docutils import nodes, statemachine from docutils.statemachine import ViewList from docutils.parsers.rst import directives, Directive from docutils.utils.error_reporting import ErrorString - -# -# AutodocReporter is only good up to Sphinx 1.7 -# -import sphinx - -Use_SSI = sphinx.__version__[:3] >= '1.7' -if Use_SSI: - from sphinx.util.docutils import switch_source_input -else: - from sphinx.ext.autodoc import AutodocReporter +from sphinx.util.docutils import switch_source_input __version__ = '1.0' @@ -179,16 +169,5 @@ class KernelCmd(Directive): return node.children def do_parse(self, content, node): - if Use_SSI: - with switch_source_input(self.state, content): - self.state.nested_parse(content, 0, node, match_titles=1) - else: - buf = self.state.memo.title_styles, self.state.memo.section_level, self.state.memo.reporter - - self.state.memo.title_styles = [] - self.state.memo.section_level = 0 - self.state.memo.reporter = AutodocReporter(content, self.state.memo.reporter) - try: - self.state.nested_parse(content, 0, node, match_titles=1) - finally: - self.state.memo.title_styles, self.state.memo.section_level, self.state.memo.reporter = buf + with switch_source_input(self.state, content): + self.state.nested_parse(content, 0, node, match_titles=1) diff --git a/Documentation/sphinx/kernel_feat.py b/Documentation/sphinx/kernel_feat.py index 2fee04f1dedd..c91ea2b27697 100644 --- a/Documentation/sphinx/kernel_feat.py +++ b/Documentation/sphinx/kernel_feat.py @@ -42,17 +42,7 @@ from docutils import nodes, statemachine from docutils.statemachine import ViewList from docutils.parsers.rst import directives, Directive from docutils.utils.error_reporting import ErrorString - -# -# AutodocReporter is only good up to Sphinx 1.7 -# -import sphinx - -Use_SSI = sphinx.__version__[:3] >= '1.7' -if Use_SSI: - from sphinx.util.docutils import switch_source_input -else: - from sphinx.ext.autodoc import AutodocReporter +from sphinx.util.docutils import switch_source_input __version__ = '1.0' @@ -154,16 +144,7 @@ class KernelFeat(Directive): buf = self.state.memo.title_styles, self.state.memo.section_level, self.state.memo.reporter - if Use_SSI: - with switch_source_input(self.state, content): - self.state.nested_parse(content, 0, node, match_titles=1) - else: - self.state.memo.title_styles = [] - self.state.memo.section_level = 0 - self.state.memo.reporter = AutodocReporter(content, self.state.memo.reporter) - try: - self.state.nested_parse(content, 0, node, match_titles=1) - finally: - self.state.memo.title_styles, self.state.memo.section_level, self.state.memo.reporter = buf + with switch_source_input(self.state, content): + self.state.nested_parse(content, 0, node, match_titles=1) return node.children diff --git a/Documentation/sphinx/kerneldoc.py b/Documentation/sphinx/kerneldoc.py index e9857ab904f1..8189c33b9dda 100644 --- a/Documentation/sphinx/kerneldoc.py +++ b/Documentation/sphinx/kerneldoc.py @@ -37,18 +37,8 @@ import glob from docutils import nodes, statemachine from docutils.statemachine import ViewList from docutils.parsers.rst import directives, Directive - -# -# AutodocReporter is only good up to Sphinx 1.7 -# import sphinx - -Use_SSI = sphinx.__version__[:3] >= '1.7' -if Use_SSI: - from sphinx.util.docutils import switch_source_input -else: - from sphinx.ext.autodoc import AutodocReporter - +from sphinx.util.docutils import switch_source_input import kernellog __version__ = '1.0' @@ -163,18 +153,8 @@ class KernelDocDirective(Directive): return [nodes.error(None, nodes.paragraph(text = "kernel-doc missing"))] def do_parse(self, result, node): - if Use_SSI: - with switch_source_input(self.state, result): - self.state.nested_parse(result, 0, node, match_titles=1) - else: - save = self.state.memo.title_styles, self.state.memo.section_level, self.state.memo.reporter - self.state.memo.reporter = AutodocReporter(result, self.state.memo.reporter) - self.state.memo.title_styles, self.state.memo.section_level = [], 0 - try: - self.state.nested_parse(result, 0, node, match_titles=1) - finally: - self.state.memo.title_styles, self.state.memo.section_level, self.state.memo.reporter = save - + with switch_source_input(self.state, result): + self.state.nested_parse(result, 0, node, match_titles=1) def setup(app): app.add_config_value('kerneldoc_bin', None, 'env') diff --git a/Documentation/sphinx/kernellog.py b/Documentation/sphinx/kernellog.py index 8ac7d274f542..0bc00c138cad 100644 --- a/Documentation/sphinx/kernellog.py +++ b/Documentation/sphinx/kernellog.py @@ -4,29 +4,19 @@ # only goes back to 1.6. So here's a wrapper layer to keep around for # as long as we support 1.4. # +# We don't support 1.4 anymore, but we'll keep the wrappers around until +# we change all the code to not use them anymore :) +# import sphinx +from sphinx.util import logging -if sphinx.__version__[:3] >= '1.6': - UseLogging = True - from sphinx.util import logging - logger = logging.getLogger('kerneldoc') -else: - UseLogging = False +logger = logging.getLogger('kerneldoc') def warn(app, message): - if UseLogging: - logger.warning(message) - else: - app.warn(message) + logger.warning(message) def verbose(app, message): - if UseLogging: - logger.verbose(message) - else: - app.verbose(message) + logger.verbose(message) def info(app, message): - if UseLogging: - logger.info(message) - else: - app.info(message) + logger.info(message) diff --git a/Documentation/sphinx/kfigure.py b/Documentation/sphinx/kfigure.py index 788704886eec..3c78828330be 100644 --- a/Documentation/sphinx/kfigure.py +++ b/Documentation/sphinx/kfigure.py @@ -49,26 +49,14 @@ import os from os import path import subprocess from hashlib import sha1 -import sys - from docutils import nodes from docutils.statemachine import ViewList from docutils.parsers.rst import directives from docutils.parsers.rst.directives import images import sphinx - from sphinx.util.nodes import clean_astext -from six import iteritems - import kernellog -PY3 = sys.version_info[0] == 3 - -if PY3: - _unicode = str -else: - _unicode = unicode - # Get Sphinx version major, minor, patch = sphinx.version_info[:3] if major == 1 and minor > 3: @@ -540,7 +528,7 @@ def add_kernel_figure_to_std_domain(app, doctree): docname = app.env.docname labels = std.data["labels"] - for name, explicit in iteritems(doctree.nametypes): + for name, explicit in doctree.nametypes.items(): if not explicit: continue labelid = doctree.nameids[name] diff --git a/Documentation/sphinx/maintainers_include.py b/Documentation/sphinx/maintainers_include.py index dc8fed48d3c2..328b3631a585 100755 --- a/Documentation/sphinx/maintainers_include.py +++ b/Documentation/sphinx/maintainers_include.py @@ -61,8 +61,6 @@ class MaintainersInclude(Include): field_content = "" for line in open(path): - if sys.version_info.major == 2: - line = unicode(line, 'utf-8') # Have we reached the end of the preformatted Descriptions text? if descriptions and line.startswith('Maintainers'): descriptions = False diff --git a/Documentation/sphinx/requirements.txt b/Documentation/sphinx/requirements.txt index 5030d346d23b..489f6626de67 100644 --- a/Documentation/sphinx/requirements.txt +++ b/Documentation/sphinx/requirements.txt @@ -1,4 +1,3 @@ docutils Sphinx==2.4.4 sphinx_rtd_theme -six diff --git a/Documentation/sphinx/rstFlatTable.py b/Documentation/sphinx/rstFlatTable.py index 2019a55f6b18..a3eea0bbe6ba 100755 --- a/Documentation/sphinx/rstFlatTable.py +++ b/Documentation/sphinx/rstFlatTable.py @@ -42,8 +42,6 @@ u""" # imports # ============================================================================== -import sys - from docutils import nodes from docutils.parsers.rst import directives, roles from docutils.parsers.rst.directives.tables import Table @@ -55,14 +53,6 @@ from docutils.utils import SystemMessagePropagation __version__ = '1.0' -PY3 = sys.version_info[0] == 3 -PY2 = sys.version_info[0] == 2 - -if PY3: - # pylint: disable=C0103, W0622 - unicode = str - basestring = str - # ============================================================================== def setup(app): # ============================================================================== diff --git a/Documentation/timers/timers-howto.rst b/Documentation/timers/timers-howto.rst index afb0a43b8cdf..5c169e3d29a8 100644 --- a/Documentation/timers/timers-howto.rst +++ b/Documentation/timers/timers-howto.rst @@ -75,7 +75,7 @@ NON-ATOMIC CONTEXT: - Why not msleep for (1ms - 20ms)? Explained originally here: - http://lkml.org/lkml/2007/8/3/250 + https://lore.kernel.org/r/15327.1186166232@lwn.net msleep(1~20) may not do what the caller intends, and will often sleep longer (~20 ms actual sleep for any diff --git a/Documentation/translations/it_IT/process/adding-syscalls.rst b/Documentation/translations/it_IT/process/adding-syscalls.rst index bff0a82bf127..c478b6e8c292 100644 --- a/Documentation/translations/it_IT/process/adding-syscalls.rst +++ b/Documentation/translations/it_IT/process/adding-syscalls.rst @@ -611,21 +611,21 @@ Riferimenti e fonti https://lwn.net/Articles/486306/ - Raccomandazioni da Andrew Morton circa il fatto che tutte le informazioni su una nuova chiamata di sistema dovrebbero essere contenute nello stesso - filone di discussione di email: https://lkml.org/lkml/2014/7/24/641 + filone di discussione di email: https://lore.kernel.org/r/20140724144747.3041b208832bbdf9fbce5d96@linux-foundation.org - Raccomandazioni da Michael Kerrisk circa il fatto che le nuove chiamate di - sistema dovrebbero avere una pagina man: https://lkml.org/lkml/2014/6/13/309 + sistema dovrebbero avere una pagina man: https://lore.kernel.org/r/CAKgNAkgMA39AfoSoA5Pe1r9N+ZzfYQNvNPvcRN7tOvRb8+v06Q@mail.gmail.com - Consigli da Thomas Gleixner sul fatto che il collegamento all'architettura x86 dovrebbe avvenire in un *commit* differente: - https://lkml.org/lkml/2014/11/19/254 + https://lore.kernel.org/r/alpine.DEB.2.11.1411191249560.3909@nanos - Consigli da Greg Kroah-Hartman circa la bontà d'avere una pagina man e un programma di auto-verifica per le nuove chiamate di sistema: - https://lkml.org/lkml/2014/3/19/710 + https://lore.kernel.org/r/20140320025530.GA25469@kroah.com - Discussione di Michael Kerrisk sulle nuove chiamate di sistema contro - le estensioni :manpage:`prctl(2)`: https://lkml.org/lkml/2014/6/3/411 + le estensioni :manpage:`prctl(2)`: https://lore.kernel.org/r/CAHO5Pa3F2MjfTtfNxa8LbnkeeU8=YJ+9tDqxZpw7Gz59E-4AUg@mail.gmail.com - Consigli da Ingo Molnar che le chiamate di sistema con più argomenti dovrebbero incapsularli in una struttura che includa un argomento *size* per garantire l'estensibilità futura: - https://lkml.org/lkml/2015/7/30/117 + https://lore.kernel.org/r/20150730083831.GA22182@gmail.com - Un certo numero di casi strani emersi dall'uso (riuso) dei flag O_*: - commit 75069f2b5bfb ("vfs: renumber FMODE_NONOTIFY and add to uniqueness @@ -635,9 +635,9 @@ Riferimenti e fonti - commit bb458c644a59 ("Safer ABI for O_TMPFILE") - Discussion from Matthew Wilcox about restrictions on 64-bit arguments: - https://lkml.org/lkml/2008/12/12/187 + https://lore.kernel.org/r/20081212152929.GM26095@parisc-linux.org - Raccomandazioni da Greg Kroah-Hartman sul fatto che i flag sconosciuti dovrebbero - essere controllati: https://lkml.org/lkml/2014/7/17/577 + essere controllati: https://lore.kernel.org/r/20140717193330.GB4703@kroah.com - Raccomandazioni da Linus Torvalds che le chiamate di sistema x32 dovrebbero favorire la compatibilità con le versioni a 64-bit piuttosto che quelle a 32-bit: - https://lkml.org/lkml/2011/8/31/244 + https://lore.kernel.org/r/CA+55aFxfmwfB7jbbrXxa=K7VBYPfAvmu3XOkGrLbB1UFjX1+Ew@mail.gmail.com diff --git a/Documentation/translations/it_IT/process/submitting-patches.rst b/Documentation/translations/it_IT/process/submitting-patches.rst index 966cd3242a60..ae00352346ed 100644 --- a/Documentation/translations/it_IT/process/submitting-patches.rst +++ b/Documentation/translations/it_IT/process/submitting-patches.rst @@ -731,13 +731,13 @@ Greg Kroah-Hartman, "Come scocciare un manutentore di un sottosistema" No!!!! Basta gigantesche bombe patch alle persone sulla lista linux-kernel@vger.kernel.org! - + Kernel Documentation/translations/it_IT/process/coding-style.rst: :ref:`Documentation/translations/it_IT/process/coding-style.rst ` E-mail di Linus Torvalds sul formato canonico di una patch: - + Andi Kleen, "Su come sottomettere patch del kernel" Alcune strategie su come sottomettere modifiche toste o controverse. diff --git a/Documentation/translations/ja_JP/SubmittingPatches b/Documentation/translations/ja_JP/SubmittingPatches index dd0c3280ba5a..6854f5add72e 100644 --- a/Documentation/translations/ja_JP/SubmittingPatches +++ b/Documentation/translations/ja_JP/SubmittingPatches @@ -702,13 +702,13 @@ Greg Kroah-Hartman, "How to piss off a kernel subsystem maintainer". NO!!!! No more huge patch bombs to linux-kernel@vger.kernel.org people! - + Kernel Documentation/process/coding-style.rst: Linus Torvalds's mail on the canonical patch format: - + Andi Kleen, "On submitting kernel patches" Some strategies to get difficult or controversial changes in. diff --git a/Documentation/translations/ko_KR/howto.rst b/Documentation/translations/ko_KR/howto.rst index 240d29be38f2..787f1e85f8a0 100644 --- a/Documentation/translations/ko_KR/howto.rst +++ b/Documentation/translations/ko_KR/howto.rst @@ -345,7 +345,7 @@ https://bugzilla.kernel.org 는 리눅스 커널 개발자들이 커널의 버 https://bugzilla.kernel.org/page.cgi?id=faq.html -메인 커널 소스 디렉토리에 있는 :ref:`admin-guide/reporting-bugs.rst ` +메인 커널 소스 디렉토리에 있는 'Documentation/admin-guide/reporting-issues.rst' 파일은 커널 버그라고 생각되는 것을 보고하는 방법에 관한 좋은 템플릿이며 문제를 추적하기 위해서 커널 개발자들이 필요로 하는 정보가 무엇들인지를 상세히 설명하고 있다. @@ -583,7 +583,7 @@ Pat이라는 이름을 가진 여자가 있을 수도 있는 것이다. 리눅 "The Perfect Patch" - http://www.ozlabs.org/~akpm/stuff/tpp.txt + https://www.ozlabs.org/~akpm/stuff/tpp.txt 이 모든 것을 하는 것은 매우 어려운 일이다. 완벽히 소화하는 데는 적어도 몇년이 diff --git a/Documentation/translations/ko_KR/index.rst b/Documentation/translations/ko_KR/index.rst index 27995c4233de..b9e27d20b039 100644 --- a/Documentation/translations/ko_KR/index.rst +++ b/Documentation/translations/ko_KR/index.rst @@ -10,3 +10,18 @@ :maxdepth: 1 howto + + +리눅스 커널 메모리 배리어 +------------------------- + +.. raw:: latex + + \footnotesize + +.. include:: ./memory-barriers.txt + :literal: + +.. raw:: latex + + \normalsize diff --git a/Documentation/translations/zh_CN/admin-guide/cpu-load.rst b/Documentation/translations/zh_CN/admin-guide/cpu-load.rst index c972731c0e57..a73400a054ff 100644 --- a/Documentation/translations/zh_CN/admin-guide/cpu-load.rst +++ b/Documentation/translations/zh_CN/admin-guide/cpu-load.rst @@ -95,7 +95,7 @@ Linux通过``/proc/stat``和``/proc/uptime``导出各种信息,用户空间工 参考 --- -- http://lkml.org/lkml/2007/2/12/6 +- https://lore.kernel.org/r/loom.20070212T063225-663@post.gmane.org - Documentation/filesystems/proc.rst (1.8) diff --git a/Documentation/translations/zh_CN/arm/Booting b/Documentation/translations/zh_CN/arm/Booting index c3d26ce5f6de..5ecea0767893 100644 --- a/Documentation/translations/zh_CN/arm/Booting +++ b/Documentation/translations/zh_CN/arm/Booting @@ -124,7 +124,7 @@ bootloader 必须传递一个系统内存的位置和最小值,以及根文件 bootloader 必须以 64bit 地址对齐的形式加载一个设备树映像(dtb)到系统 RAM 中,并用启动数据初始化它。dtb 格式在文档 -Documentation/devicetree/booting-without-of.rst 中。内核将会在 +https://www.devicetree.org/specifications/ 中。内核将会在 dtb 物理地址处查找 dtb 魔数值(0xd00dfeed),以确定 dtb 是否已经代替 标签列表被传递进来。 diff --git a/Documentation/translations/zh_CN/iio/ep93xx_adc.rst b/Documentation/translations/zh_CN/iio/ep93xx_adc.rst new file mode 100644 index 000000000000..7e91d2197867 --- /dev/null +++ b/Documentation/translations/zh_CN/iio/ep93xx_adc.rst @@ -0,0 +1,46 @@ +.. include:: ../disclaimer-zh_CN.rst + +:Original: :doc:`../../../iio/ep93xx_adc` +:Translator: Yanteng Si + +.. _cn_iio_ep93xx_adc: + + +================================== +思睿逻辑 EP93xx 模拟数字转换器驱动 +================================== + +1. 概述 +======= + +该驱动同时适用于具有5通道模拟数字转换器的低端 (EP9301, Ep9302) 设备和10通道 +触摸屏/模拟数字转换器的高端设备(EP9307, EP9312, EP9315)。 + +2. 通道编号 +=========== + +EP9301和EP9302数据表定义了通道0..4的编号方案。虽然EP9307, EP9312和EP9315多 +了3个通道(一共8个),但是编号并没有定义。所以说最后三个通道是随机编号的。 + +如果ep93xx_adc是IIO设备0,您将在以下位置找到条目 +/sys/bus/iio/devices/iio:device0/: + + +-----------------+---------------+ + | sysfs 入口 | ball/pin 名称 | + +=================+===============+ + | in_voltage0_raw | YM | + +-----------------+---------------+ + | in_voltage1_raw | SXP | + +-----------------+---------------+ + | in_voltage2_raw | SXM | + +-----------------+---------------+ + | in_voltage3_raw | SYP | + +-----------------+---------------+ + | in_voltage4_raw | SYM | + +-----------------+---------------+ + | in_voltage5_raw | XP | + +-----------------+---------------+ + | in_voltage6_raw | XM | + +-----------------+---------------+ + | in_voltage7_raw | YP | + +-----------------+---------------+ diff --git a/Documentation/translations/zh_CN/iio/iio_configfs.rst b/Documentation/translations/zh_CN/iio/iio_configfs.rst new file mode 100644 index 000000000000..274488e8dce4 --- /dev/null +++ b/Documentation/translations/zh_CN/iio/iio_configfs.rst @@ -0,0 +1,102 @@ +.. include:: ../disclaimer-zh_CN.rst + +:Original: :doc:`../../../iio/iio_configfs` +:Translator: Yanteng Si + +.. _cn_iio_configfs: + + +===================== +工业 IIO configfs支持 +===================== + +1. 概述 +======= + +Configfs是一种内核对象的基于文件系统的管理系统,IIO使用一些可以通过 +configfs轻松配置的对象(例如:设备,触发器)。 + +关于configfs是如何运行的,请查阅Documentation/filesystems/configfs.rst +了解更多信息。 + +2. 用法 +======= +为了使configfs支持IIO,我们需要在编译时选中config的CONFIG_IIO_CONFIGFS +选项。 + +然后,挂载configfs文件系统(通常在 /config directory目录下):: + + $ mkdir/config + $ mount -t configfs none/config + +此时,将创建所有默认IIO组,并可以在/ config / iio下对其进行访问。 下一章 +将介绍可用的IIO配置对象。 + +3. 软件触发器 +============= + +IIO默认configfs组之一是“触发器”组。 挂载configfs后可以自动访问它,并且可 +以在/config/iio/triggers下找到。 + +IIO软件触发器为创建多种触发器类型提供了支持。 通常在include/linux/iio +/sw_trigger.h:中的接口下将新的触发器类型实现为单独的内核模块: +:: + + /* + * drivers/iio/trigger/iio-trig-sample.c + * 一种新触发器类型的内核模块实例 + */ + #include + + + static struct iio_sw_trigger *iio_trig_sample_probe(const char *name) + { + /* + * 这将分配并注册一个IIO触发器以及其他触发器类型特性的初始化。 + */ + } + + static int iio_trig_sample_remove(struct iio_sw_trigger *swt) + { + /* + * 这会废弃iio_trig_sample_probe中的操作 + */ + } + + static const struct iio_sw_trigger_ops iio_trig_sample_ops = { + .probe = iio_trig_sample_probe, + .remove = iio_trig_sample_remove, + }; + + static struct iio_sw_trigger_type iio_trig_sample = { + .name = "trig-sample", + .owner = THIS_MODULE, + .ops = &iio_trig_sample_ops, + }; + +module_iio_sw_trigger_driver(iio_trig_sample); + +每种触发器类型在/config/iio/triggers下都有其自己的目录。 加载iio-trig-sample +模块将创建“ trig-sample”触发器类型目录/config/iio/triggers/trig-sample. + +我们支持以下中断源(触发器类型) + + * hrtimer,使用高分辨率定时器作为中断源 + +3.1 Hrtimer触发器创建与销毁 +--------------------------- + +加载iio-trig-hrtimer模块将注册hrtimer触发器类型,从而允许用户在 +/config/iio/triggers/hrtimer下创建hrtimer触发器。 + +例如:: + + $ mkdir /config/iio/triggers/hrtimer/instance1 + $ rmdir /config/iio/triggers/hrtimer/instance1 + +每个触发器可以具有一个或多个独特的触发器类型的属性。 + +3.2 "hrtimer" 触发器类型属性 +---------------------------- + +"hrtimer”触发器类型没有来自/config dir的任何可配置属性。 diff --git a/Documentation/translations/zh_CN/iio/index.rst b/Documentation/translations/zh_CN/iio/index.rst new file mode 100644 index 000000000000..7087076a10f6 --- /dev/null +++ b/Documentation/translations/zh_CN/iio/index.rst @@ -0,0 +1,20 @@ +.. SPDX-License-Identifier: GPL-2.0 + +.. include:: ../disclaimer-zh_CN.rst + +:Original: :doc:`../../../iio/index` +:Translator: Yanteng Si + +.. _cn_iio_index: + + +======== +工业 I/O +======== + +.. toctree:: + :maxdepth: 1 + + iio_configfs + + ep93xx_adc diff --git a/Documentation/translations/zh_CN/mips/booting.rst b/Documentation/translations/zh_CN/mips/booting.rst new file mode 100644 index 000000000000..96453e1b962e --- /dev/null +++ b/Documentation/translations/zh_CN/mips/booting.rst @@ -0,0 +1,31 @@ +.. SPDX-License-Identifier: GPL-2.0 + +.. include:: ../disclaimer-zh_CN.rst + +:Original: :doc:`../../../mips/booting` +:Translator: Yanteng Si + +.. _cn_booting: + +BMIPS设备树引导 +------------------------ + + 一些bootloaders只支持在内核镜像开始地址处的单一入口点。而其它 + bootloaders将跳转到ELF的开始地址处。两种方案都支持的;因为 + CONFIG_BOOT_RAW=y and CONFIG_NO_EXCEPT_FILL=y, 所以第一条指令 + 会立即跳转到kernel_entry()入口处执行。 + + 与arch/arm情况(b)类似,dt感知的引导加载程序需要设置以下寄存器: + + a0 : 0 + + a1 : 0xffffffff + + a2 : RAM中指向设备树块的物理指针(在chapterII中定义)。 + 设备树可以位于前512MB物理地址空间(0x00000000 - + 0x1fffffff)的任何位置,以64位边界对齐。 + + 传统bootloaders不会使用这样的约定,并且它们不传入DT块。 + 在这种情况下,Linux将通过选中CONFIG_DT_*查找DTB。 + + 以上约定只在32位系统中定义,因为目前没有任何64位的BMIPS实现。 diff --git a/Documentation/translations/zh_CN/mips/features.rst b/Documentation/translations/zh_CN/mips/features.rst new file mode 100644 index 000000000000..93d93d06b1b3 --- /dev/null +++ b/Documentation/translations/zh_CN/mips/features.rst @@ -0,0 +1,10 @@ +.. SPDX-License-Identifier: GPL-2.0 + +.. include:: ../disclaimer-zh_CN.rst + +:Original: :doc:`../../../mips/features` +:Translator: Yanteng Si + +.. _cn_features: + +.. kernel-feat:: $srctree/Documentation/features mips diff --git a/Documentation/translations/zh_CN/mips/index.rst b/Documentation/translations/zh_CN/mips/index.rst new file mode 100644 index 000000000000..b85033f9d67c --- /dev/null +++ b/Documentation/translations/zh_CN/mips/index.rst @@ -0,0 +1,26 @@ +.. SPDX-License-Identifier: GPL-2.0 + +.. include:: ../disclaimer-zh_CN.rst + +:Original: :doc:`../../../mips/index` +:Translator: Yanteng Si + +=========================== +MIPS特性文档 +=========================== + +.. toctree:: + :maxdepth: 2 + :numbered: + + booting + ingenic-tcu + + features + +.. only:: subproject and html + + Indices + ======= + + * :ref:`genindex` diff --git a/Documentation/translations/zh_CN/mips/ingenic-tcu.rst b/Documentation/translations/zh_CN/mips/ingenic-tcu.rst new file mode 100644 index 000000000000..f04ba407384a --- /dev/null +++ b/Documentation/translations/zh_CN/mips/ingenic-tcu.rst @@ -0,0 +1,69 @@ +.. SPDX-License-Identifier: GPL-2.0 + +.. include:: ../disclaimer-zh_CN.rst + +:Original: :doc:`../../../mips/ingenic-tcu` +:Translator: Yanteng Si + +.. _cn_ingenic-tcu: + +=============================================== +君正 JZ47xx SoC定时器/计数器硬件单元 +=============================================== + +君正 JZ47xx SoC中的定时器/计数器单元(TCU)是一个多功能硬件块。它有多达 +8个通道,可以用作计数器,计时器,或脉冲宽度调制器。 + +- JZ4725B, JZ4750, JZ4755 只有6个TCU通道。其它SoC都有8个通道。 + +- JZ4725B引入了一个独立的通道,称为操作系统计时器(OST)。这是一个32位可 + 编程定时器。在JZ4760B及以上型号上,它是64位的。 + +- 每个TCU通道都有自己的时钟源,可以通过 TCSR 寄存器设置通道的父级时钟 + 源(pclk、ext、rtc)、开关以及分频。 + + - 看门狗和OST硬件模块在它们的寄存器空间中也有相同形式的TCSR寄存器。 + - 用于关闭/开启的 TCU 寄存器也可以关闭/开启看门狗和 OST 时钟。 + +- 每个TCU通道在两种模式的其中一种模式下运行: + + - 模式 TCU1:通道无法在睡眠模式下运行,但更易于操作。 + - 模式 TCU2:通道可以在睡眠模式下运行,但操作比 TCU1 通道复杂一些。 + +- 每个 TCU 通道的模式取决于使用的SoC: + + - 在最老的SoC(高于JZ4740),八个通道都运行在TCU1模式。 + - 在 JZ4725B,通道5运行在TCU2,其它通道则运行在TCU1。 + - 在最新的SoC(JZ4750及之后),通道1-2运行在TCU2,其它通道则运行 + 在TCU1。 + +- 每个通道都可以生成中断。有些通道共享一条中断线,而有些没有,其在SoC型 + 号之间的变更: + + - 在很老的SoC(JZ4740及更低),通道0和通道1有它们自己的中断线;通 + 道2-7共享最后一条中断线。 + - 在 JZ4725B,通道0有它自己的中断线;通道1-5共享一条中断线;OST + 使用最后一条中断线。 + - 在比较新的SoC(JZ4750及以后),通道5有它自己的中断线;通 + 道0-4和(如果是8通道)6-7全部共享一条中断线;OST使用最后一条中 + 断线。 + +实现 +==== + +TCU硬件的功能分布在多个驱动程序: + +============== =================================== +时钟 drivers/clk/ingenic/tcu.c +中断 drivers/irqchip/irq-ingenic-tcu.c +定时器 drivers/clocksource/ingenic-timer.c +OST drivers/clocksource/ingenic-ost.c +脉冲宽度调制器 drivers/pwm/pwm-jz4740.c +看门狗 drivers/watchdog/jz4740_wdt.c +============== =================================== + +因为可以从相同的寄存器控制属于不同驱动程序和框架的TCU的各种功能,所以 +所有这些驱动程序都通过相同的控制总线通用接口访问它们的寄存器。 + +有关TCU驱动程序的设备树绑定的更多信息,请参阅: +Documentation/devicetree/bindings/timer/ingenic,tcu.yaml. diff --git a/Documentation/translations/zh_CN/process/submitting-patches.rst b/Documentation/translations/zh_CN/process/submitting-patches.rst index 2e7dbaad4028..4fc6d16f5196 100644 --- a/Documentation/translations/zh_CN/process/submitting-patches.rst +++ b/Documentation/translations/zh_CN/process/submitting-patches.rst @@ -668,13 +668,13 @@ Greg Kroah-Hartman, "How to piss off a kernel subsystem maintainer". NO!!!! No more huge patch bombs to linux-kernel@vger.kernel.org people! - + Kernel Documentation/process/coding-style.rst: :ref:`Documentation/translations/zh_CN/process/coding-style.rst ` Linus Torvalds's mail on the canonical patch format: - + Andi Kleen, "On submitting kernel patches" Some strategies to get difficult or controversial changes in. diff --git a/Documentation/vm/split_page_table_lock.rst b/Documentation/vm/split_page_table_lock.rst index ff51f4a5494d..c08919662704 100644 --- a/Documentation/vm/split_page_table_lock.rst +++ b/Documentation/vm/split_page_table_lock.rst @@ -32,7 +32,7 @@ There are helpers to lock/unlock a table and other accessor functions: Split page table lock for PTE tables is enabled compile-time if CONFIG_SPLIT_PTLOCK_CPUS (usually 4) is less or equal to NR_CPUS. -If split lock is disabled, all tables guaded by mm->page_table_lock. +If split lock is disabled, all tables are guarded by mm->page_table_lock. Split page table lock for PMD tables is enabled, if it's enabled for PTE tables and the architecture supports it (see below). diff --git a/Documentation/x86/boot.rst b/Documentation/x86/boot.rst index abb9fc164657..fc844913dece 100644 --- a/Documentation/x86/boot.rst +++ b/Documentation/x86/boot.rst @@ -851,7 +851,7 @@ Protocol: 2.09+ struct setup_data { __u64 next = 0 or ; __u32 type = SETUP_INDIRECT; - __u32 len = sizeof(setup_data); + __u32 len = sizeof(setup_indirect); __u8 data[sizeof(setup_indirect)] = struct setup_indirect { __u32 type = SETUP_INDIRECT | SETUP_E820_EXT; __u32 reserved = 0; diff --git a/MAINTAINERS b/MAINTAINERS index 3d32298b3881..4b327e24d30f 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -11182,6 +11182,15 @@ S: Maintained F: Documentation/devicetree/bindings/i2c/i2c-mt65xx.txt F: drivers/i2c/busses/i2c-mt65xx.c +MEDIATEK IOMMU DRIVER +M: Yong Wu +L: iommu@lists.linux-foundation.org +L: linux-mediatek@lists.infradead.org (moderated for non-subscribers) +S: Supported +F: Documentation/devicetree/bindings/iommu/mediatek* +F: drivers/iommu/mtk_iommu* +F: include/dt-bindings/memory/mt*-port.h + MEDIATEK JPEG DRIVER M: Rick Chang M: Bin Liu diff --git a/arch/Kconfig b/arch/Kconfig index baf67e7f059a..9fc4eb8bfdbc 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -124,8 +124,8 @@ config HAVE_64BIT_ALIGNED_ACCESS accesses are required to be 64 bit aligned in this way even though it is not a 64 bit architecture. - See Documentation/unaligned-memory-access.txt for more - information on the topic of unaligned memory accesses. + See Documentation/core-api/unaligned-memory-access.rst for + more information on the topic of unaligned memory accesses. config HAVE_EFFICIENT_UNALIGNED_ACCESS bool diff --git a/block/blk-mq.c b/block/blk-mq.c index f21d922ecfaf..d4d7c1caa439 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -41,7 +41,7 @@ #include "blk-mq-sched.h" #include "blk-rq-qos.h" -static DEFINE_PER_CPU(struct list_head, blk_cpu_done); +static DEFINE_PER_CPU(struct llist_head, blk_cpu_done); static void blk_mq_poll_stats_start(struct request_queue *q); static void blk_mq_poll_stats_fn(struct blk_stat_callback *cb); @@ -567,80 +567,29 @@ void blk_mq_end_request(struct request *rq, blk_status_t error) } EXPORT_SYMBOL(blk_mq_end_request); -/* - * Softirq action handler - move entries to local list and loop over them - * while passing them to the queue registered handler. - */ -static __latent_entropy void blk_done_softirq(struct softirq_action *h) +static void blk_complete_reqs(struct llist_head *list) { - struct list_head *cpu_list, local_list; + struct llist_node *entry = llist_reverse_order(llist_del_all(list)); + struct request *rq, *next; - local_irq_disable(); - cpu_list = this_cpu_ptr(&blk_cpu_done); - list_replace_init(cpu_list, &local_list); - local_irq_enable(); - - while (!list_empty(&local_list)) { - struct request *rq; - - rq = list_entry(local_list.next, struct request, ipi_list); - list_del_init(&rq->ipi_list); + llist_for_each_entry_safe(rq, next, entry, ipi_list) rq->q->mq_ops->complete(rq); - } } -static void blk_mq_trigger_softirq(struct request *rq) +static __latent_entropy void blk_done_softirq(struct softirq_action *h) { - struct list_head *list; - unsigned long flags; - - local_irq_save(flags); - list = this_cpu_ptr(&blk_cpu_done); - list_add_tail(&rq->ipi_list, list); - - /* - * If the list only contains our just added request, signal a raise of - * the softirq. If there are already entries there, someone already - * raised the irq but it hasn't run yet. - */ - if (list->next == &rq->ipi_list) - raise_softirq_irqoff(BLOCK_SOFTIRQ); - local_irq_restore(flags); + blk_complete_reqs(this_cpu_ptr(&blk_cpu_done)); } static int blk_softirq_cpu_dead(unsigned int cpu) { - /* - * If a CPU goes away, splice its entries to the current CPU - * and trigger a run of the softirq - */ - local_irq_disable(); - list_splice_init(&per_cpu(blk_cpu_done, cpu), - this_cpu_ptr(&blk_cpu_done)); - raise_softirq_irqoff(BLOCK_SOFTIRQ); - local_irq_enable(); - + blk_complete_reqs(&per_cpu(blk_cpu_done, cpu)); return 0; } - static void __blk_mq_complete_request_remote(void *data) { - struct request *rq = data; - - /* - * For most of single queue controllers, there is only one irq vector - * for handling I/O completion, and the only irq's affinity is set - * to all possible CPUs. On most of ARCHs, this affinity means the irq - * is handled on one specific CPU. - * - * So complete I/O requests in softirq context in case of single queue - * devices to avoid degrading I/O performance due to irqsoff latency. - */ - if (rq->q->nr_hw_queues == 1) - blk_mq_trigger_softirq(rq); - else - rq->q->mq_ops->complete(rq); + __raise_softirq_irqoff(BLOCK_SOFTIRQ); } static inline bool blk_mq_complete_need_ipi(struct request *rq) @@ -669,6 +618,30 @@ static inline bool blk_mq_complete_need_ipi(struct request *rq) return cpu_online(rq->mq_ctx->cpu); } +static void blk_mq_complete_send_ipi(struct request *rq) +{ + struct llist_head *list; + unsigned int cpu; + + cpu = rq->mq_ctx->cpu; + list = &per_cpu(blk_cpu_done, cpu); + if (llist_add(&rq->ipi_list, list)) { + INIT_CSD(&rq->csd, __blk_mq_complete_request_remote, rq); + smp_call_function_single_async(cpu, &rq->csd); + } +} + +static void blk_mq_raise_softirq(struct request *rq) +{ + struct llist_head *list; + + preempt_disable(); + list = this_cpu_ptr(&blk_cpu_done); + if (llist_add(&rq->ipi_list, list)) + raise_softirq(BLOCK_SOFTIRQ); + preempt_enable(); +} + bool blk_mq_complete_request_remote(struct request *rq) { WRITE_ONCE(rq->state, MQ_RQ_COMPLETE); @@ -681,15 +654,15 @@ bool blk_mq_complete_request_remote(struct request *rq) return false; if (blk_mq_complete_need_ipi(rq)) { - INIT_CSD(&rq->csd, __blk_mq_complete_request_remote, rq); - smp_call_function_single_async(rq->mq_ctx->cpu, &rq->csd); - } else { - if (rq->q->nr_hw_queues > 1) - return false; - blk_mq_trigger_softirq(rq); + blk_mq_complete_send_ipi(rq); + return true; } - return true; + if (rq->q->nr_hw_queues == 1) { + blk_mq_raise_softirq(rq); + return true; + } + return false; } EXPORT_SYMBOL_GPL(blk_mq_complete_request_remote); @@ -3957,7 +3930,7 @@ static int __init blk_mq_init(void) int i; for_each_possible_cpu(i) - INIT_LIST_HEAD(&per_cpu(blk_cpu_done, i)); + init_llist_head(&per_cpu(blk_cpu_done, i)); open_softirq(BLOCK_SOFTIRQ, blk_done_softirq); cpuhp_setup_state_nocalls(CPUHP_BLOCK_SOFTIRQ_DEAD, diff --git a/drivers/infiniband/Kconfig b/drivers/infiniband/Kconfig index 9325e189a215..04a78d9f8fe3 100644 --- a/drivers/infiniband/Kconfig +++ b/drivers/infiniband/Kconfig @@ -41,6 +41,7 @@ config INFINIBAND_USER_MEM bool depends on INFINIBAND_USER_ACCESS != n depends on MMU + select DMA_SHARED_BUFFER default y config INFINIBAND_ON_DEMAND_PAGING diff --git a/drivers/infiniband/core/Makefile b/drivers/infiniband/core/Makefile index ccf2670ef45e..8ab4eea5a0a5 100644 --- a/drivers/infiniband/core/Makefile +++ b/drivers/infiniband/core/Makefile @@ -40,5 +40,5 @@ ib_uverbs-y := uverbs_main.o uverbs_cmd.o uverbs_marshall.o \ uverbs_std_types_srq.o \ uverbs_std_types_wq.o \ uverbs_std_types_qp.o -ib_uverbs-$(CONFIG_INFINIBAND_USER_MEM) += umem.o +ib_uverbs-$(CONFIG_INFINIBAND_USER_MEM) += umem.o umem_dmabuf.o ib_uverbs-$(CONFIG_INFINIBAND_ON_DEMAND_PAGING) += umem_odp.o diff --git a/drivers/infiniband/core/cache.c b/drivers/infiniband/core/cache.c index 7989b7e1d1c0..5c9fac7cf420 100644 --- a/drivers/infiniband/core/cache.c +++ b/drivers/infiniband/core/cache.c @@ -669,11 +669,10 @@ int ib_cache_gid_del_all_netdev_gids(struct ib_device *ib_dev, u8 port, * rdma_find_gid_by_port - Returns the GID entry attributes when it finds * a valid GID entry for given search parameters. It searches for the specified * GID value in the local software cache. - * @device: The device to query. + * @ib_dev: The device to query. * @gid: The GID value to search for. * @gid_type: The GID type to search for. - * @port_num: The port number of the device where the GID value should be - * searched. + * @port: The port number of the device where the GID value should be searched. * @ndev: In RoCE, the net device of the device. NULL means ignore. * * Returns sgid attributes if the GID is found with valid reference or @@ -719,7 +718,7 @@ EXPORT_SYMBOL(rdma_find_gid_by_port); /** * rdma_find_gid_by_filter - Returns the GID table attribute where a * specified GID value occurs - * @device: The device to query. + * @ib_dev: The device to query. * @gid: The GID value to search for. * @port: The port number of the device where the GID value could be * searched. @@ -728,6 +727,7 @@ EXPORT_SYMBOL(rdma_find_gid_by_port); * otherwise, we continue searching the GID table. It's guaranteed that * while filter is executed, ndev field is valid and the structure won't * change. filter is executed in an atomic context. filter must not be NULL. + * @context: Private data to pass into the call-back. * * rdma_find_gid_by_filter() searches for the specified GID value * of which the filter function returns true in the port's GID table. @@ -1253,7 +1253,6 @@ EXPORT_SYMBOL(rdma_get_gid_attr); * @entries: Entries where GID entries are returned. * @max_entries: Maximum number of entries that can be returned. * Entries array must be allocated to hold max_entries number of entries. - * @num_entries: Updated to the number of entries that were successfully read. * * Returns number of entries on success or appropriate error code. */ diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c index 98165589c8ab..be996dba040c 100644 --- a/drivers/infiniband/core/cm.c +++ b/drivers/infiniband/core/cm.c @@ -4333,7 +4333,7 @@ static int cm_add_one(struct ib_device *ib_device) unsigned long flags; int ret; int count = 0; - u8 i; + unsigned int i; cm_dev = kzalloc(struct_size(cm_dev, port, ib_device->phys_port_cnt), GFP_KERNEL); @@ -4345,7 +4345,7 @@ static int cm_add_one(struct ib_device *ib_device) cm_dev->going_down = 0; set_bit(IB_MGMT_METHOD_SEND, reg_req.method_mask); - for (i = 1; i <= ib_device->phys_port_cnt; i++) { + rdma_for_each_port (ib_device, i) { if (!rdma_cap_ib_cm(ib_device, i)) continue; @@ -4431,7 +4431,7 @@ static void cm_remove_one(struct ib_device *ib_device, void *client_data) .clr_port_cap_mask = IB_PORT_CM_SUP }; unsigned long flags; - int i; + unsigned int i; write_lock_irqsave(&cm.device_lock, flags); list_del(&cm_dev->list); @@ -4441,7 +4441,7 @@ static void cm_remove_one(struct ib_device *ib_device, void *client_data) cm_dev->going_down = 1; spin_unlock_irq(&cm.lock); - for (i = 1; i <= ib_device->phys_port_cnt; i++) { + rdma_for_each_port (ib_device, i) { if (!rdma_cap_ib_cm(ib_device, i)) continue; diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index c51b84b2d2f3..94096511599f 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -352,7 +352,13 @@ struct ib_device *cma_get_ib_dev(struct cma_device *cma_dev) struct cma_multicast { struct rdma_id_private *id_priv; - struct ib_sa_multicast *sa_mc; + union { + struct ib_sa_multicast *sa_mc; + struct { + struct work_struct work; + struct rdma_cm_event event; + } iboe_join; + }; struct list_head list; void *context; struct sockaddr_storage addr; @@ -1823,6 +1829,8 @@ static void destroy_mc(struct rdma_id_private *id_priv, cma_igmp_send(ndev, &mgid, false); dev_put(ndev); } + + cancel_work_sync(&mc->iboe_join.work); } kfree(mc); } @@ -2683,6 +2691,28 @@ static int cma_query_ib_route(struct rdma_id_private *id_priv, return (id_priv->query_id < 0) ? id_priv->query_id : 0; } +static void cma_iboe_join_work_handler(struct work_struct *work) +{ + struct cma_multicast *mc = + container_of(work, struct cma_multicast, iboe_join.work); + struct rdma_cm_event *event = &mc->iboe_join.event; + struct rdma_id_private *id_priv = mc->id_priv; + int ret; + + mutex_lock(&id_priv->handler_mutex); + if (READ_ONCE(id_priv->state) == RDMA_CM_DESTROYING || + READ_ONCE(id_priv->state) == RDMA_CM_DEVICE_REMOVAL) + goto out_unlock; + + ret = cma_cm_event_handler(id_priv, event); + WARN_ON(ret); + +out_unlock: + mutex_unlock(&id_priv->handler_mutex); + if (event->event == RDMA_CM_EVENT_MULTICAST_JOIN) + rdma_destroy_ah_attr(&event->param.ud.ah_attr); +} + static void cma_work_handler(struct work_struct *_work) { struct cma_work *work = container_of(_work, struct cma_work, work); @@ -4478,10 +4508,7 @@ static int cma_ib_mc_handler(int status, struct ib_sa_multicast *multicast) cma_make_mc_event(status, id_priv, multicast, &event, mc); ret = cma_cm_event_handler(id_priv, &event); rdma_destroy_ah_attr(&event.param.ud.ah_attr); - if (ret) { - destroy_id_handler_unlock(id_priv); - return 0; - } + WARN_ON(ret); out: mutex_unlock(&id_priv->handler_mutex); @@ -4542,17 +4569,6 @@ static int cma_join_ib_multicast(struct rdma_id_private *id_priv, rec.pkey = cpu_to_be16(ib_addr_get_pkey(dev_addr)); rec.join_state = mc->join_state; - if ((rec.join_state == BIT(SENDONLY_FULLMEMBER_JOIN)) && - (!ib_sa_sendonly_fullmem_support(&sa_client, - id_priv->id.device, - id_priv->id.port_num))) { - dev_warn( - &id_priv->id.device->dev, - "RDMA CM: port %u Unable to multicast join: SM doesn't support Send Only Full Member option\n", - id_priv->id.port_num); - return -EOPNOTSUPP; - } - comp_mask = IB_SA_MCMEMBER_REC_MGID | IB_SA_MCMEMBER_REC_PORT_GID | IB_SA_MCMEMBER_REC_PKEY | IB_SA_MCMEMBER_REC_JOIN_STATE | IB_SA_MCMEMBER_REC_QKEY | IB_SA_MCMEMBER_REC_SL | @@ -4604,7 +4620,6 @@ static void cma_iboe_set_mgid(struct sockaddr *addr, union ib_gid *mgid, static int cma_iboe_join_multicast(struct rdma_id_private *id_priv, struct cma_multicast *mc) { - struct cma_work *work; struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr; int err = 0; struct sockaddr *addr = (struct sockaddr *)&mc->addr; @@ -4618,10 +4633,6 @@ static int cma_iboe_join_multicast(struct rdma_id_private *id_priv, if (cma_zero_addr(addr)) return -EINVAL; - work = kzalloc(sizeof *work, GFP_KERNEL); - if (!work) - return -ENOMEM; - gid_type = id_priv->cma_dev->default_gid_type[id_priv->id.port_num - rdma_start_port(id_priv->cma_dev->device)]; cma_iboe_set_mgid(addr, &ib.rec.mgid, gid_type); @@ -4632,10 +4643,9 @@ static int cma_iboe_join_multicast(struct rdma_id_private *id_priv, if (dev_addr->bound_dev_if) ndev = dev_get_by_index(dev_addr->net, dev_addr->bound_dev_if); - if (!ndev) { - err = -ENODEV; - goto err_free; - } + if (!ndev) + return -ENODEV; + ib.rec.rate = iboe_get_rate(ndev); ib.rec.hop_limit = 1; ib.rec.mtu = iboe_get_mtu(ndev->mtu); @@ -4653,24 +4663,15 @@ static int cma_iboe_join_multicast(struct rdma_id_private *id_priv, err = -ENOTSUPP; } dev_put(ndev); - if (err || !ib.rec.mtu) { - if (!err) - err = -EINVAL; - goto err_free; - } + if (err || !ib.rec.mtu) + return err ?: -EINVAL; + rdma_ip2gid((struct sockaddr *)&id_priv->id.route.addr.src_addr, &ib.rec.port_gid); - work->id = id_priv; - INIT_WORK(&work->work, cma_work_handler); - cma_make_mc_event(0, id_priv, &ib, &work->event, mc); - /* Balances with cma_id_put() in cma_work_handler */ - cma_id_get(id_priv); - queue_work(cma_wq, &work->work); + INIT_WORK(&mc->iboe_join.work, cma_iboe_join_work_handler); + cma_make_mc_event(0, id_priv, &ib, &mc->iboe_join.event, mc); + queue_work(cma_wq, &mc->iboe_join.work); return 0; - -err_free: - kfree(work); - return err; } int rdma_join_multicast(struct rdma_cm_id *id, struct sockaddr *addr, diff --git a/drivers/infiniband/core/cma_configfs.c b/drivers/infiniband/core/cma_configfs.c index 97a77ea8d3c9..e0d5e3bae458 100644 --- a/drivers/infiniband/core/cma_configfs.c +++ b/drivers/infiniband/core/cma_configfs.c @@ -204,7 +204,6 @@ static int make_cma_ports(struct cma_dev_group *cma_dev_group, unsigned int i; unsigned int ports_num; struct cma_dev_port_group *ports; - int err; ibdev = cma_get_ib_dev(cma_dev); @@ -215,10 +214,8 @@ static int make_cma_ports(struct cma_dev_group *cma_dev_group, ports = kcalloc(ports_num, sizeof(*cma_dev_group->ports), GFP_KERNEL); - if (!ports) { - err = -ENOMEM; - goto free; - } + if (!ports) + return -ENOMEM; for (i = 0; i < ports_num; i++) { char port_str[10]; @@ -234,12 +231,7 @@ static int make_cma_ports(struct cma_dev_group *cma_dev_group, } cma_dev_group->ports = ports; - return 0; -free: - kfree(ports); - cma_dev_group->ports = NULL; - return err; } static void release_cma_dev(struct config_item *item) diff --git a/drivers/infiniband/core/counters.c b/drivers/infiniband/core/counters.c index 92745522250e..f3a7c1f404af 100644 --- a/drivers/infiniband/core/counters.c +++ b/drivers/infiniband/core/counters.c @@ -10,30 +10,35 @@ #define ALL_AUTO_MODE_MASKS (RDMA_COUNTER_MASK_QP_TYPE | RDMA_COUNTER_MASK_PID) -static int __counter_set_mode(struct rdma_counter_mode *curr, +static int __counter_set_mode(struct rdma_port_counter *port_counter, enum rdma_nl_counter_mode new_mode, enum rdma_nl_counter_mask new_mask) { - if ((new_mode == RDMA_COUNTER_MODE_AUTO) && - ((new_mask & (~ALL_AUTO_MODE_MASKS)) || - (curr->mode != RDMA_COUNTER_MODE_NONE))) - return -EINVAL; + if (new_mode == RDMA_COUNTER_MODE_AUTO && port_counter->num_counters) + if (new_mask & ~ALL_AUTO_MODE_MASKS || + port_counter->mode.mode != RDMA_COUNTER_MODE_NONE) + return -EINVAL; - curr->mode = new_mode; - curr->mask = new_mask; + port_counter->mode.mode = new_mode; + port_counter->mode.mask = new_mask; return 0; } -/** +/* * rdma_counter_set_auto_mode() - Turn on/off per-port auto mode * - * When @on is true, the @mask must be set; When @on is false, it goes - * into manual mode if there's any counter, so that the user is able to - * manually access them. + * @dev: Device to operate + * @port: Port to use + * @mask: Mask to configure + * @extack: Message to the user + * + * Return 0 on success. */ int rdma_counter_set_auto_mode(struct ib_device *dev, u8 port, - bool on, enum rdma_nl_counter_mask mask) + enum rdma_nl_counter_mask mask, + struct netlink_ext_ack *extack) { + enum rdma_nl_counter_mode mode = RDMA_COUNTER_MODE_AUTO; struct rdma_port_counter *port_counter; int ret; @@ -42,23 +47,23 @@ int rdma_counter_set_auto_mode(struct ib_device *dev, u8 port, return -EOPNOTSUPP; mutex_lock(&port_counter->lock); - if (on) { - ret = __counter_set_mode(&port_counter->mode, - RDMA_COUNTER_MODE_AUTO, mask); - } else { - if (port_counter->mode.mode != RDMA_COUNTER_MODE_AUTO) { - ret = -EINVAL; - goto out; - } - - if (port_counter->num_counters) - ret = __counter_set_mode(&port_counter->mode, - RDMA_COUNTER_MODE_MANUAL, 0); - else - ret = __counter_set_mode(&port_counter->mode, - RDMA_COUNTER_MODE_NONE, 0); + if (mask) { + ret = __counter_set_mode(port_counter, mode, mask); + if (ret) + NL_SET_ERR_MSG( + extack, + "Turning on auto mode is not allowed when there is bound QP"); + goto out; } + if (port_counter->mode.mode != RDMA_COUNTER_MODE_AUTO) { + ret = -EINVAL; + goto out; + } + + mode = (port_counter->num_counters) ? RDMA_COUNTER_MODE_MANUAL : + RDMA_COUNTER_MODE_NONE; + ret = __counter_set_mode(port_counter, mode, 0); out: mutex_unlock(&port_counter->lock); return ret; @@ -122,8 +127,8 @@ static struct rdma_counter *alloc_and_bind(struct ib_device *dev, u8 port, mutex_lock(&port_counter->lock); switch (mode) { case RDMA_COUNTER_MODE_MANUAL: - ret = __counter_set_mode(&port_counter->mode, - RDMA_COUNTER_MODE_MANUAL, 0); + ret = __counter_set_mode(port_counter, RDMA_COUNTER_MODE_MANUAL, + 0); if (ret) { mutex_unlock(&port_counter->lock); goto err_mode; @@ -170,8 +175,7 @@ static void rdma_counter_free(struct rdma_counter *counter) port_counter->num_counters--; if (!port_counter->num_counters && (port_counter->mode.mode == RDMA_COUNTER_MODE_MANUAL)) - __counter_set_mode(&port_counter->mode, RDMA_COUNTER_MODE_NONE, - 0); + __counter_set_mode(port_counter, RDMA_COUNTER_MODE_NONE, 0); mutex_unlock(&port_counter->lock); @@ -227,7 +231,7 @@ static void counter_history_stat_update(struct rdma_counter *counter) port_counter->hstats->value[i] += counter->stats->value[i]; } -/** +/* * rdma_get_counter_auto_mode - Find the counter that @qp should be bound * with in auto mode * @@ -274,7 +278,7 @@ static void counter_release(struct kref *kref) rdma_counter_free(counter); } -/** +/* * rdma_counter_bind_qp_auto - Check and bind the QP to a counter base on * the auto-mode rule */ @@ -311,7 +315,7 @@ int rdma_counter_bind_qp_auto(struct ib_qp *qp, u8 port) return 0; } -/** +/* * rdma_counter_unbind_qp - Unbind a qp from a counter * @force: * true - Decrease the counter ref-count anyway (e.g., qp destroy) @@ -380,7 +384,7 @@ static u64 get_running_counters_hwstat_sum(struct ib_device *dev, return sum; } -/** +/* * rdma_counter_get_hwstat_value() - Get the sum value of all counters on a * specific port, including the running ones and history data */ @@ -436,7 +440,7 @@ static struct rdma_counter *rdma_get_counter_by_id(struct ib_device *dev, return counter; } -/** +/* * rdma_counter_bind_qpn() - Bind QP @qp_num to counter @counter_id */ int rdma_counter_bind_qpn(struct ib_device *dev, u8 port, @@ -485,7 +489,7 @@ int rdma_counter_bind_qpn(struct ib_device *dev, u8 port, return ret; } -/** +/* * rdma_counter_bind_qpn_alloc() - Alloc a counter and bind QP @qp_num to it * The id of new counter is returned in @counter_id */ @@ -533,7 +537,7 @@ int rdma_counter_bind_qpn_alloc(struct ib_device *dev, u8 port, return ret; } -/** +/* * rdma_counter_unbind_qpn() - Unbind QP @qp_num from a counter */ int rdma_counter_unbind_qpn(struct ib_device *dev, u8 port, diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c index e96f979e6d52..aac0fe14e1d9 100644 --- a/drivers/infiniband/core/device.c +++ b/drivers/infiniband/core/device.c @@ -848,6 +848,20 @@ static int setup_port_data(struct ib_device *device) return 0; } +/** + * ib_port_immutable_read() - Read rdma port's immutable data + * @dev: IB device + * @port: port number whose immutable data to read. It starts with index 1 and + * valid upto including rdma_end_port(). + */ +const struct ib_port_immutable* +ib_port_immutable_read(struct ib_device *dev, unsigned int port) +{ + WARN_ON(!rdma_is_port_valid(dev, port)); + return &dev->port_data[port].immutable; +} +EXPORT_SYMBOL(ib_port_immutable_read); + void ib_get_device_fw_str(struct ib_device *dev, char *str) { if (dev->ops.get_dev_fw_str) @@ -1887,9 +1901,9 @@ static int __ib_get_client_nl_info(struct ib_device *ibdev, /** * ib_get_client_nl_info - Fetch the nl_info from a client - * @device - IB device - * @client_name - Name of the client - * @res - Result of the query + * @ibdev: IB device + * @client_name: Name of the client + * @res: Result of the query */ int ib_get_client_nl_info(struct ib_device *ibdev, const char *client_name, struct ib_client_nl_info *res) @@ -2317,7 +2331,7 @@ void ib_enum_all_roce_netdevs(roce_netdev_filter filter, up_read(&devices_rwsem); } -/** +/* * ib_enum_all_devs - enumerate all ib_devices * @cb: Callback to call for each found ib_device * @@ -2681,6 +2695,7 @@ void ib_set_device_ops(struct ib_device *dev, const struct ib_device_ops *ops) SET_DEVICE_OP(dev_ops, read_counters); SET_DEVICE_OP(dev_ops, reg_dm_mr); SET_DEVICE_OP(dev_ops, reg_user_mr); + SET_DEVICE_OP(dev_ops, reg_user_mr_dmabuf); SET_DEVICE_OP(dev_ops, req_ncomp_notif); SET_DEVICE_OP(dev_ops, req_notify_cq); SET_DEVICE_OP(dev_ops, rereg_user_mr); diff --git a/drivers/infiniband/core/iwpm_msg.c b/drivers/infiniband/core/iwpm_msg.c index 46686990a827..30a0ff76b332 100644 --- a/drivers/infiniband/core/iwpm_msg.c +++ b/drivers/infiniband/core/iwpm_msg.c @@ -392,7 +392,7 @@ static const struct nla_policy resp_reg_policy[IWPM_NLA_RREG_PID_MAX] = { /** * iwpm_register_pid_cb - Process the port mapper response to * iwpm_register_pid query - * @skb: + * @skb: The socket buffer * @cb: Contains the received message (payload and netlink header) * * If successful, the function receives the userspace port mapper pid @@ -468,7 +468,7 @@ static const struct nla_policy resp_add_policy[IWPM_NLA_RMANAGE_MAPPING_MAX] = { /** * iwpm_add_mapping_cb - Process the port mapper response to * iwpm_add_mapping request - * @skb: + * @skb: The socket buffer * @cb: Contains the received message (payload and netlink header) */ int iwpm_add_mapping_cb(struct sk_buff *skb, struct netlink_callback *cb) @@ -545,7 +545,7 @@ static const struct nla_policy resp_query_policy[IWPM_NLA_RQUERY_MAPPING_MAX] = /** * iwpm_add_and_query_mapping_cb - Process the port mapper response to * iwpm_add_and_query_mapping request - * @skb: + * @skb: The socket buffer * @cb: Contains the received message (payload and netlink header) */ int iwpm_add_and_query_mapping_cb(struct sk_buff *skb, @@ -627,7 +627,7 @@ int iwpm_add_and_query_mapping_cb(struct sk_buff *skb, /** * iwpm_remote_info_cb - Process remote connecting peer address info, which * the port mapper has received from the connecting peer - * @skb: + * @skb: The socket buffer * @cb: Contains the received message (payload and netlink header) * * Stores the IPv4/IPv6 address info in a hash table @@ -706,7 +706,7 @@ static const struct nla_policy resp_mapinfo_policy[IWPM_NLA_MAPINFO_REQ_MAX] = { /** * iwpm_mapping_info_cb - Process a notification that the userspace * port mapper daemon is started - * @skb: + * @skb: The socket buffer * @cb: Contains the received message (payload and netlink header) * * Using the received port mapper pid, send all the local mapping @@ -766,7 +766,7 @@ static const struct nla_policy ack_mapinfo_policy[IWPM_NLA_MAPINFO_NUM_MAX] = { /** * iwpm_ack_mapping_info_cb - Process the port mapper ack for * the provided local mapping info records - * @skb: + * @skb: The socket buffer * @cb: Contains the received message (payload and netlink header) */ int iwpm_ack_mapping_info_cb(struct sk_buff *skb, struct netlink_callback *cb) @@ -796,7 +796,7 @@ static const struct nla_policy map_error_policy[IWPM_NLA_ERR_MAX] = { /** * iwpm_mapping_error_cb - Process port mapper notification for error * - * @skb: + * @skb: The socket buffer * @cb: Contains the received message (payload and netlink header) */ int iwpm_mapping_error_cb(struct sk_buff *skb, struct netlink_callback *cb) @@ -841,7 +841,7 @@ static const struct nla_policy hello_policy[IWPM_NLA_HELLO_MAX] = { /** * iwpm_hello_cb - Process a hello message from iwpmd * - * @skb: + * @skb: The socket buffer * @cb: Contains the received message (payload and netlink header) * * Using the received port mapper pid, send the kernel's abi_version diff --git a/drivers/infiniband/core/iwpm_util.c b/drivers/infiniband/core/iwpm_util.c index 13495b43dbc1..f80e5550b51f 100644 --- a/drivers/infiniband/core/iwpm_util.c +++ b/drivers/infiniband/core/iwpm_util.c @@ -127,8 +127,8 @@ static struct hlist_head *get_mapinfo_hash_bucket(struct sockaddr_storage *, /** * iwpm_create_mapinfo - Store local and mapped IPv4/IPv6 address * info in a hash table - * @local_addr: Local ip/tcp address - * @mapped_addr: Mapped local ip/tcp address + * @local_sockaddr: Local ip/tcp address + * @mapped_sockaddr: Mapped local ip/tcp address * @nl_client: The index of the netlink client * @map_flags: IWPM mapping flags */ @@ -174,7 +174,7 @@ int iwpm_create_mapinfo(struct sockaddr_storage *local_sockaddr, /** * iwpm_remove_mapinfo - Remove local and mapped IPv4/IPv6 address * info from the hash table - * @local_addr: Local ip/tcp address + * @local_sockaddr: Local ip/tcp address * @mapped_local_addr: Mapped local ip/tcp address * * Returns err code if mapping info is not found in the hash table, diff --git a/drivers/infiniband/core/multicast.c b/drivers/infiniband/core/multicast.c index 740f03ecc05d..57519ca6cd2c 100644 --- a/drivers/infiniband/core/multicast.c +++ b/drivers/infiniband/core/multicast.c @@ -721,6 +721,7 @@ EXPORT_SYMBOL(ib_sa_get_mcmember_rec); * member record and gid of the device. * @device: RDMA device * @port_num: Port of the rdma device to consider + * @rec: Multicast member record to use * @ndev: Optional netdevice, applicable only for RoCE * @gid_type: GID type to consider * @ah_attr: AH attribute to fillup on successful completion diff --git a/drivers/infiniband/core/nldev.c b/drivers/infiniband/core/nldev.c index 08366e254b1d..d306049c22a2 100644 --- a/drivers/infiniband/core/nldev.c +++ b/drivers/infiniband/core/nldev.c @@ -1768,9 +1768,7 @@ static int nldev_stat_set_doit(struct sk_buff *skb, struct nlmsghdr *nlh, if (tb[RDMA_NLDEV_ATTR_STAT_AUTO_MODE_MASK]) mask = nla_get_u32( tb[RDMA_NLDEV_ATTR_STAT_AUTO_MODE_MASK]); - - ret = rdma_counter_set_auto_mode(device, port, - mask ? true : false, mask); + ret = rdma_counter_set_auto_mode(device, port, mask, extack); if (ret) goto err_msg; } else { diff --git a/drivers/infiniband/core/restrack.c b/drivers/infiniband/core/restrack.c index ff1551b3cf61..ffabaf327242 100644 --- a/drivers/infiniband/core/restrack.c +++ b/drivers/infiniband/core/restrack.c @@ -201,8 +201,8 @@ EXPORT_SYMBOL(rdma_restrack_parent_name); /** * rdma_restrack_new() - Initializes new restrack entry to allow _put() interface * to release memory in fully automatic way. - * @res - Entry to initialize - * @type - REstrack type + * @res: Entry to initialize + * @type: REstrack type */ void rdma_restrack_new(struct rdma_restrack_entry *res, enum rdma_restrack_type type) diff --git a/drivers/infiniband/core/roce_gid_mgmt.c b/drivers/infiniband/core/roce_gid_mgmt.c index 6b8364bb032d..34fff94eaa38 100644 --- a/drivers/infiniband/core/roce_gid_mgmt.c +++ b/drivers/infiniband/core/roce_gid_mgmt.c @@ -505,7 +505,7 @@ static void enum_all_gids_of_dev_cb(struct ib_device *ib_dev, * rdma_roce_rescan_device - Rescan all of the network devices in the system * and add their gids, as needed, to the relevant RoCE devices. * - * @device: the rdma device + * @ib_dev: the rdma device */ void rdma_roce_rescan_device(struct ib_device *ib_dev) { diff --git a/drivers/infiniband/core/rw.c b/drivers/infiniband/core/rw.c index a96030b784eb..31156e22d3e7 100644 --- a/drivers/infiniband/core/rw.c +++ b/drivers/infiniband/core/rw.c @@ -410,7 +410,7 @@ int rdma_rw_ctx_signature_init(struct rdma_rw_ctx *ctx, struct ib_qp *qp, ctx->type = RDMA_RW_SIG_MR; ctx->nr_ops = 1; - ctx->reg = kcalloc(1, sizeof(*ctx->reg), GFP_KERNEL); + ctx->reg = kzalloc(sizeof(*ctx->reg), GFP_KERNEL); if (!ctx->reg) { ret = -ENOMEM; goto out_unmap_prot_sg; diff --git a/drivers/infiniband/core/sa_query.c b/drivers/infiniband/core/sa_query.c index 89a831fa1885..9ef1a355131b 100644 --- a/drivers/infiniband/core/sa_query.c +++ b/drivers/infiniband/core/sa_query.c @@ -1434,7 +1434,7 @@ enum opa_pr_supported { PR_IB_SUPPORTED }; -/** +/* * opa_pr_query_possible - Check if current PR query can be an OPA query. * * Retuns PR_NOT_SUPPORTED if a path record query is not @@ -1951,30 +1951,6 @@ int ib_sa_guid_info_rec_query(struct ib_sa_client *client, } EXPORT_SYMBOL(ib_sa_guid_info_rec_query); -bool ib_sa_sendonly_fullmem_support(struct ib_sa_client *client, - struct ib_device *device, - u8 port_num) -{ - struct ib_sa_device *sa_dev = ib_get_client_data(device, &sa_client); - struct ib_sa_port *port; - bool ret = false; - unsigned long flags; - - if (!sa_dev) - return ret; - - port = &sa_dev->port[port_num - sa_dev->start_port]; - - spin_lock_irqsave(&port->classport_lock, flags); - if ((port->classport_info.valid) && - (port->classport_info.data.type == RDMA_CLASS_PORT_INFO_IB)) - ret = ib_get_cpi_capmask2(&port->classport_info.data.ib) - & IB_SA_CAP_MASK2_SENDONLY_FULL_MEM_SUPPORT; - spin_unlock_irqrestore(&port->classport_lock, flags); - return ret; -} -EXPORT_SYMBOL(ib_sa_sendonly_fullmem_support); - struct ib_classport_info_context { struct completion done; struct ib_sa_query *sa_query; diff --git a/drivers/infiniband/core/umem.c b/drivers/infiniband/core/umem.c index 917338db7ac1..2dde99a9ba07 100644 --- a/drivers/infiniband/core/umem.c +++ b/drivers/infiniband/core/umem.c @@ -2,6 +2,7 @@ * Copyright (c) 2005 Topspin Communications. All rights reserved. * Copyright (c) 2005 Cisco Systems. All rights reserved. * Copyright (c) 2005 Mellanox Technologies. All rights reserved. + * Copyright (c) 2020 Intel Corporation. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -278,6 +279,8 @@ void ib_umem_release(struct ib_umem *umem) { if (!umem) return; + if (umem->is_dmabuf) + return ib_umem_dmabuf_release(to_ib_umem_dmabuf(umem)); if (umem->is_odp) return ib_umem_odp_release(to_ib_umem_odp(umem)); diff --git a/drivers/infiniband/core/umem_dmabuf.c b/drivers/infiniband/core/umem_dmabuf.c new file mode 100644 index 000000000000..f9b5162d9260 --- /dev/null +++ b/drivers/infiniband/core/umem_dmabuf.c @@ -0,0 +1,174 @@ +// SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause) +/* + * Copyright (c) 2020 Intel Corporation. All rights reserved. + */ + +#include +#include +#include + +#include "uverbs.h" + +int ib_umem_dmabuf_map_pages(struct ib_umem_dmabuf *umem_dmabuf) +{ + struct sg_table *sgt; + struct scatterlist *sg; + struct dma_fence *fence; + unsigned long start, end, cur = 0; + unsigned int nmap = 0; + int i; + + dma_resv_assert_held(umem_dmabuf->attach->dmabuf->resv); + + if (umem_dmabuf->sgt) + goto wait_fence; + + sgt = dma_buf_map_attachment(umem_dmabuf->attach, DMA_BIDIRECTIONAL); + if (IS_ERR(sgt)) + return PTR_ERR(sgt); + + /* modify the sg list in-place to match umem address and length */ + + start = ALIGN_DOWN(umem_dmabuf->umem.address, PAGE_SIZE); + end = ALIGN(umem_dmabuf->umem.address + umem_dmabuf->umem.length, + PAGE_SIZE); + for_each_sgtable_dma_sg(sgt, sg, i) { + if (start < cur + sg_dma_len(sg) && cur < end) + nmap++; + if (cur <= start && start < cur + sg_dma_len(sg)) { + unsigned long offset = start - cur; + + umem_dmabuf->first_sg = sg; + umem_dmabuf->first_sg_offset = offset; + sg_dma_address(sg) += offset; + sg_dma_len(sg) -= offset; + cur += offset; + } + if (cur < end && end <= cur + sg_dma_len(sg)) { + unsigned long trim = cur + sg_dma_len(sg) - end; + + umem_dmabuf->last_sg = sg; + umem_dmabuf->last_sg_trim = trim; + sg_dma_len(sg) -= trim; + break; + } + cur += sg_dma_len(sg); + } + + umem_dmabuf->umem.sg_head.sgl = umem_dmabuf->first_sg; + umem_dmabuf->umem.sg_head.nents = nmap; + umem_dmabuf->umem.nmap = nmap; + umem_dmabuf->sgt = sgt; + +wait_fence: + /* + * Although the sg list is valid now, the content of the pages + * may be not up-to-date. Wait for the exporter to finish + * the migration. + */ + fence = dma_resv_get_excl(umem_dmabuf->attach->dmabuf->resv); + if (fence) + return dma_fence_wait(fence, false); + + return 0; +} +EXPORT_SYMBOL(ib_umem_dmabuf_map_pages); + +void ib_umem_dmabuf_unmap_pages(struct ib_umem_dmabuf *umem_dmabuf) +{ + dma_resv_assert_held(umem_dmabuf->attach->dmabuf->resv); + + if (!umem_dmabuf->sgt) + return; + + /* retore the original sg list */ + if (umem_dmabuf->first_sg) { + sg_dma_address(umem_dmabuf->first_sg) -= + umem_dmabuf->first_sg_offset; + sg_dma_len(umem_dmabuf->first_sg) += + umem_dmabuf->first_sg_offset; + umem_dmabuf->first_sg = NULL; + umem_dmabuf->first_sg_offset = 0; + } + if (umem_dmabuf->last_sg) { + sg_dma_len(umem_dmabuf->last_sg) += + umem_dmabuf->last_sg_trim; + umem_dmabuf->last_sg = NULL; + umem_dmabuf->last_sg_trim = 0; + } + + dma_buf_unmap_attachment(umem_dmabuf->attach, umem_dmabuf->sgt, + DMA_BIDIRECTIONAL); + + umem_dmabuf->sgt = NULL; +} +EXPORT_SYMBOL(ib_umem_dmabuf_unmap_pages); + +struct ib_umem_dmabuf *ib_umem_dmabuf_get(struct ib_device *device, + unsigned long offset, size_t size, + int fd, int access, + const struct dma_buf_attach_ops *ops) +{ + struct dma_buf *dmabuf; + struct ib_umem_dmabuf *umem_dmabuf; + struct ib_umem *umem; + unsigned long end; + struct ib_umem_dmabuf *ret = ERR_PTR(-EINVAL); + + if (check_add_overflow(offset, (unsigned long)size, &end)) + return ret; + + if (unlikely(!ops || !ops->move_notify)) + return ret; + + dmabuf = dma_buf_get(fd); + if (IS_ERR(dmabuf)) + return ERR_CAST(dmabuf); + + if (dmabuf->size < end) + goto out_release_dmabuf; + + umem_dmabuf = kzalloc(sizeof(*umem_dmabuf), GFP_KERNEL); + if (!umem_dmabuf) { + ret = ERR_PTR(-ENOMEM); + goto out_release_dmabuf; + } + + umem = &umem_dmabuf->umem; + umem->ibdev = device; + umem->length = size; + umem->address = offset; + umem->writable = ib_access_writable(access); + umem->is_dmabuf = 1; + + if (!ib_umem_num_pages(umem)) + goto out_free_umem; + + umem_dmabuf->attach = dma_buf_dynamic_attach( + dmabuf, + device->dma_device, + ops, + umem_dmabuf); + if (IS_ERR(umem_dmabuf->attach)) { + ret = ERR_CAST(umem_dmabuf->attach); + goto out_free_umem; + } + return umem_dmabuf; + +out_free_umem: + kfree(umem_dmabuf); + +out_release_dmabuf: + dma_buf_put(dmabuf); + return ret; +} +EXPORT_SYMBOL(ib_umem_dmabuf_get); + +void ib_umem_dmabuf_release(struct ib_umem_dmabuf *umem_dmabuf) +{ + struct dma_buf *dmabuf = umem_dmabuf->attach->dmabuf; + + dma_buf_detach(dmabuf, umem_dmabuf->attach); + dma_buf_put(dmabuf); + kfree(umem_dmabuf); +} diff --git a/drivers/infiniband/core/user_mad.c b/drivers/infiniband/core/user_mad.c index 19104a675691..dd7f3b437c6b 100644 --- a/drivers/infiniband/core/user_mad.c +++ b/drivers/infiniband/core/user_mad.c @@ -379,6 +379,11 @@ static ssize_t ib_umad_read(struct file *filp, char __user *buf, mutex_lock(&file->mutex); + if (file->agents_dead) { + mutex_unlock(&file->mutex); + return -EIO; + } + while (list_empty(&file->recv_list)) { mutex_unlock(&file->mutex); @@ -392,6 +397,11 @@ static ssize_t ib_umad_read(struct file *filp, char __user *buf, mutex_lock(&file->mutex); } + if (file->agents_dead) { + mutex_unlock(&file->mutex); + return -EIO; + } + packet = list_entry(file->recv_list.next, struct ib_umad_packet, list); list_del(&packet->list); @@ -524,7 +534,7 @@ static ssize_t ib_umad_write(struct file *filp, const char __user *buf, agent = __get_agent(file, packet->mad.hdr.id); if (!agent) { - ret = -EINVAL; + ret = -EIO; goto err_up; } @@ -653,10 +663,14 @@ static __poll_t ib_umad_poll(struct file *filp, struct poll_table_struct *wait) /* we will always be able to post a MAD send */ __poll_t mask = EPOLLOUT | EPOLLWRNORM; + mutex_lock(&file->mutex); poll_wait(filp, &file->recv_wait, wait); if (!list_empty(&file->recv_list)) mask |= EPOLLIN | EPOLLRDNORM; + if (file->agents_dead) + mask = EPOLLERR; + mutex_unlock(&file->mutex); return mask; } @@ -1336,6 +1350,7 @@ static void ib_umad_kill_port(struct ib_umad_port *port) list_for_each_entry(file, &port->file_list, port_list) { mutex_lock(&file->mutex); file->agents_dead = 1; + wake_up_interruptible(&file->recv_wait); mutex_unlock(&file->mutex); for (id = 0; id < IB_UMAD_MAX_AGENTS; ++id) diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index 98a5d36813ff..f5b8be3bedde 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -1382,7 +1382,7 @@ static int create_qp(struct uverbs_attr_bundle *attrs, if (has_sq) scq = uobj_get_obj_read(cq, UVERBS_OBJECT_CQ, cmd->send_cq_handle, attrs); - if (!ind_tbl) + if (!ind_tbl && cmd->qp_type != IB_QPT_XRC_INI) rcq = rcq ?: scq; pd = uobj_get_obj_read(pd, UVERBS_OBJECT_PD, cmd->pd_handle, attrs); diff --git a/drivers/infiniband/core/uverbs_std_types_mr.c b/drivers/infiniband/core/uverbs_std_types_mr.c index dd4e76b26c74..f782d5e1aa25 100644 --- a/drivers/infiniband/core/uverbs_std_types_mr.c +++ b/drivers/infiniband/core/uverbs_std_types_mr.c @@ -1,5 +1,6 @@ /* * Copyright (c) 2018, Mellanox Technologies inc. All rights reserved. + * Copyright (c) 2020, Intel Corporation. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -182,6 +183,86 @@ static int UVERBS_HANDLER(UVERBS_METHOD_QUERY_MR)( return IS_UVERBS_COPY_ERR(ret) ? ret : 0; } +static int UVERBS_HANDLER(UVERBS_METHOD_REG_DMABUF_MR)( + struct uverbs_attr_bundle *attrs) +{ + struct ib_uobject *uobj = + uverbs_attr_get_uobject(attrs, UVERBS_ATTR_REG_DMABUF_MR_HANDLE); + struct ib_pd *pd = + uverbs_attr_get_obj(attrs, UVERBS_ATTR_REG_DMABUF_MR_PD_HANDLE); + struct ib_device *ib_dev = pd->device; + + u64 offset, length, iova; + u32 fd, access_flags; + struct ib_mr *mr; + int ret; + + if (!ib_dev->ops.reg_user_mr_dmabuf) + return -EOPNOTSUPP; + + ret = uverbs_copy_from(&offset, attrs, + UVERBS_ATTR_REG_DMABUF_MR_OFFSET); + if (ret) + return ret; + + ret = uverbs_copy_from(&length, attrs, + UVERBS_ATTR_REG_DMABUF_MR_LENGTH); + if (ret) + return ret; + + ret = uverbs_copy_from(&iova, attrs, + UVERBS_ATTR_REG_DMABUF_MR_IOVA); + if (ret) + return ret; + + if ((offset & ~PAGE_MASK) != (iova & ~PAGE_MASK)) + return -EINVAL; + + ret = uverbs_copy_from(&fd, attrs, + UVERBS_ATTR_REG_DMABUF_MR_FD); + if (ret) + return ret; + + ret = uverbs_get_flags32(&access_flags, attrs, + UVERBS_ATTR_REG_DMABUF_MR_ACCESS_FLAGS, + IB_ACCESS_LOCAL_WRITE | + IB_ACCESS_REMOTE_READ | + IB_ACCESS_REMOTE_WRITE | + IB_ACCESS_REMOTE_ATOMIC | + IB_ACCESS_RELAXED_ORDERING); + if (ret) + return ret; + + ret = ib_check_mr_access(ib_dev, access_flags); + if (ret) + return ret; + + mr = pd->device->ops.reg_user_mr_dmabuf(pd, offset, length, iova, fd, + access_flags, + &attrs->driver_udata); + if (IS_ERR(mr)) + return PTR_ERR(mr); + + mr->device = pd->device; + mr->pd = pd; + mr->type = IB_MR_TYPE_USER; + mr->uobject = uobj; + atomic_inc(&pd->usecnt); + + uobj->object = mr; + + uverbs_finalize_uobj_create(attrs, UVERBS_ATTR_REG_DMABUF_MR_HANDLE); + + ret = uverbs_copy_to(attrs, UVERBS_ATTR_REG_DMABUF_MR_RESP_LKEY, + &mr->lkey, sizeof(mr->lkey)); + if (ret) + return ret; + + ret = uverbs_copy_to(attrs, UVERBS_ATTR_REG_DMABUF_MR_RESP_RKEY, + &mr->rkey, sizeof(mr->rkey)); + return ret; +} + DECLARE_UVERBS_NAMED_METHOD( UVERBS_METHOD_ADVISE_MR, UVERBS_ATTR_IDR(UVERBS_ATTR_ADVISE_MR_PD_HANDLE, @@ -247,6 +328,37 @@ DECLARE_UVERBS_NAMED_METHOD( UVERBS_ATTR_TYPE(u32), UA_MANDATORY)); +DECLARE_UVERBS_NAMED_METHOD( + UVERBS_METHOD_REG_DMABUF_MR, + UVERBS_ATTR_IDR(UVERBS_ATTR_REG_DMABUF_MR_HANDLE, + UVERBS_OBJECT_MR, + UVERBS_ACCESS_NEW, + UA_MANDATORY), + UVERBS_ATTR_IDR(UVERBS_ATTR_REG_DMABUF_MR_PD_HANDLE, + UVERBS_OBJECT_PD, + UVERBS_ACCESS_READ, + UA_MANDATORY), + UVERBS_ATTR_PTR_IN(UVERBS_ATTR_REG_DMABUF_MR_OFFSET, + UVERBS_ATTR_TYPE(u64), + UA_MANDATORY), + UVERBS_ATTR_PTR_IN(UVERBS_ATTR_REG_DMABUF_MR_LENGTH, + UVERBS_ATTR_TYPE(u64), + UA_MANDATORY), + UVERBS_ATTR_PTR_IN(UVERBS_ATTR_REG_DMABUF_MR_IOVA, + UVERBS_ATTR_TYPE(u64), + UA_MANDATORY), + UVERBS_ATTR_PTR_IN(UVERBS_ATTR_REG_DMABUF_MR_FD, + UVERBS_ATTR_TYPE(u32), + UA_MANDATORY), + UVERBS_ATTR_FLAGS_IN(UVERBS_ATTR_REG_DMABUF_MR_ACCESS_FLAGS, + enum ib_access_flags), + UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_REG_DMABUF_MR_RESP_LKEY, + UVERBS_ATTR_TYPE(u32), + UA_MANDATORY), + UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_REG_DMABUF_MR_RESP_RKEY, + UVERBS_ATTR_TYPE(u32), + UA_MANDATORY)); + DECLARE_UVERBS_NAMED_METHOD_DESTROY( UVERBS_METHOD_MR_DESTROY, UVERBS_ATTR_IDR(UVERBS_ATTR_DESTROY_MR_HANDLE, @@ -257,10 +369,11 @@ DECLARE_UVERBS_NAMED_METHOD_DESTROY( DECLARE_UVERBS_NAMED_OBJECT( UVERBS_OBJECT_MR, UVERBS_TYPE_ALLOC_IDR(uverbs_free_mr), + &UVERBS_METHOD(UVERBS_METHOD_ADVISE_MR), &UVERBS_METHOD(UVERBS_METHOD_DM_MR_REG), &UVERBS_METHOD(UVERBS_METHOD_MR_DESTROY), - &UVERBS_METHOD(UVERBS_METHOD_ADVISE_MR), - &UVERBS_METHOD(UVERBS_METHOD_QUERY_MR)); + &UVERBS_METHOD(UVERBS_METHOD_QUERY_MR), + &UVERBS_METHOD(UVERBS_METHOD_REG_DMABUF_MR)); const struct uapi_definition uverbs_def_obj_mr[] = { UAPI_DEF_CHAIN_OBJ_TREE_NAMED(UVERBS_OBJECT_MR, diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c index 9137a25bb521..28464c58738c 100644 --- a/drivers/infiniband/core/verbs.c +++ b/drivers/infiniband/core/verbs.c @@ -2248,7 +2248,7 @@ static bool is_valid_mcast_lid(struct ib_qp *qp, u16 lid) struct ib_qp_init_attr init_attr = {}; struct ib_qp_attr attr = {}; int num_eth_ports = 0; - int port; + unsigned int port; /* If QP state >= init, it is assigned to a port and we can check this * port only. @@ -2263,7 +2263,7 @@ static bool is_valid_mcast_lid(struct ib_qp *qp, u16 lid) } /* Can't get a quick answer, iterate over all ports */ - for (port = 0; port < qp->device->phys_port_cnt; port++) + rdma_for_each_port(qp->device, port) if (rdma_port_get_link_layer(qp->device, port) != IB_LINK_LAYER_INFINIBAND) num_eth_ports++; diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c index 401bdc9e931e..ba515efd4fdc 100644 --- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c +++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c @@ -469,7 +469,6 @@ static int bnxt_re_create_fence_mr(struct bnxt_re_pd *pd) struct bnxt_re_mr *mr = NULL; dma_addr_t dma_addr = 0; struct ib_mw *mw; - u64 pbl_tbl; int rc; dma_addr = dma_map_single(dev, fence->va, BNXT_RE_FENCE_BYTES, @@ -504,9 +503,8 @@ static int bnxt_re_create_fence_mr(struct bnxt_re_pd *pd) mr->ib_mr.lkey = mr->qplib_mr.lkey; mr->qplib_mr.va = (u64)(unsigned long)fence->va; mr->qplib_mr.total_size = BNXT_RE_FENCE_BYTES; - pbl_tbl = dma_addr; - rc = bnxt_qplib_reg_mr(&rdev->qplib_res, &mr->qplib_mr, &pbl_tbl, - BNXT_RE_FENCE_PBL_SIZE, false, PAGE_SIZE); + rc = bnxt_qplib_reg_mr(&rdev->qplib_res, &mr->qplib_mr, NULL, + BNXT_RE_FENCE_PBL_SIZE, PAGE_SIZE); if (rc) { ibdev_err(&rdev->ibdev, "Failed to register fence-MR\n"); goto fail; @@ -3589,7 +3587,6 @@ struct ib_mr *bnxt_re_get_dma_mr(struct ib_pd *ib_pd, int mr_access_flags) struct bnxt_re_pd *pd = container_of(ib_pd, struct bnxt_re_pd, ib_pd); struct bnxt_re_dev *rdev = pd->rdev; struct bnxt_re_mr *mr; - u64 pbl = 0; int rc; mr = kzalloc(sizeof(*mr), GFP_KERNEL); @@ -3608,7 +3605,7 @@ struct ib_mr *bnxt_re_get_dma_mr(struct ib_pd *ib_pd, int mr_access_flags) mr->qplib_mr.hwq.level = PBL_LVL_MAX; mr->qplib_mr.total_size = -1; /* Infinte length */ - rc = bnxt_qplib_reg_mr(&rdev->qplib_res, &mr->qplib_mr, &pbl, 0, false, + rc = bnxt_qplib_reg_mr(&rdev->qplib_res, &mr->qplib_mr, NULL, 0, PAGE_SIZE); if (rc) goto fail_mr; @@ -3779,19 +3776,6 @@ int bnxt_re_dealloc_mw(struct ib_mw *ib_mw) return rc; } -static int fill_umem_pbl_tbl(struct ib_umem *umem, u64 *pbl_tbl_orig, - int page_shift) -{ - u64 *pbl_tbl = pbl_tbl_orig; - u64 page_size = BIT_ULL(page_shift); - struct ib_block_iter biter; - - rdma_umem_for_each_dma_block(umem, &biter, page_size) - *pbl_tbl++ = rdma_block_iter_dma_address(&biter); - - return pbl_tbl - pbl_tbl_orig; -} - /* uverbs */ struct ib_mr *bnxt_re_reg_user_mr(struct ib_pd *ib_pd, u64 start, u64 length, u64 virt_addr, int mr_access_flags, @@ -3801,7 +3785,6 @@ struct ib_mr *bnxt_re_reg_user_mr(struct ib_pd *ib_pd, u64 start, u64 length, struct bnxt_re_dev *rdev = pd->rdev; struct bnxt_re_mr *mr; struct ib_umem *umem; - u64 *pbl_tbl = NULL; unsigned long page_size; int umem_pgs, rc; @@ -3846,39 +3829,19 @@ struct ib_mr *bnxt_re_reg_user_mr(struct ib_pd *ib_pd, u64 start, u64 length, } mr->qplib_mr.total_size = length; - if (page_size == BNXT_RE_PAGE_SIZE_4K && - length > BNXT_RE_MAX_MR_SIZE_LOW) { - ibdev_err(&rdev->ibdev, "Requested MR Sz:%llu Max sup:%llu", - length, (u64)BNXT_RE_MAX_MR_SIZE_LOW); - rc = -EINVAL; - goto free_umem; - } - umem_pgs = ib_umem_num_dma_blocks(umem, page_size); - pbl_tbl = kcalloc(umem_pgs, sizeof(*pbl_tbl), GFP_KERNEL); - if (!pbl_tbl) { - rc = -ENOMEM; - goto free_umem; - } - - /* Map umem buf ptrs to the PBL */ - umem_pgs = fill_umem_pbl_tbl(umem, pbl_tbl, order_base_2(page_size)); - rc = bnxt_qplib_reg_mr(&rdev->qplib_res, &mr->qplib_mr, pbl_tbl, - umem_pgs, false, page_size); + rc = bnxt_qplib_reg_mr(&rdev->qplib_res, &mr->qplib_mr, umem, + umem_pgs, page_size); if (rc) { ibdev_err(&rdev->ibdev, "Failed to register user MR"); - goto fail; + goto free_umem; } - kfree(pbl_tbl); - mr->ib_mr.lkey = mr->qplib_mr.lkey; mr->ib_mr.rkey = mr->qplib_mr.lkey; atomic_inc(&rdev->mr_count); return &mr->ib_mr; -fail: - kfree(pbl_tbl); free_umem: ib_umem_release(umem); free_mrw: diff --git a/drivers/infiniband/hw/bnxt_re/qplib_sp.c b/drivers/infiniband/hw/bnxt_re/qplib_sp.c index 6316179583a6..049b3576302b 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_sp.c +++ b/drivers/infiniband/hw/bnxt_re/qplib_sp.c @@ -650,42 +650,32 @@ int bnxt_qplib_dereg_mrw(struct bnxt_qplib_res *res, struct bnxt_qplib_mrw *mrw, } int bnxt_qplib_reg_mr(struct bnxt_qplib_res *res, struct bnxt_qplib_mrw *mr, - u64 *pbl_tbl, int num_pbls, bool block, u32 buf_pg_size) + struct ib_umem *umem, int num_pbls, u32 buf_pg_size) { struct bnxt_qplib_rcfw *rcfw = res->rcfw; struct bnxt_qplib_hwq_attr hwq_attr = {}; struct bnxt_qplib_sg_info sginfo = {}; struct creq_register_mr_resp resp; struct cmdq_register_mr req; - int pg_ptrs, pages, i, rc; u16 cmd_flags = 0, level; - dma_addr_t **pbl_ptr; + int pages, rc; u32 pg_size; if (num_pbls) { + pages = roundup_pow_of_two(num_pbls); /* Allocate memory for the non-leaf pages to store buf ptrs. * Non-leaf pages always uses system PAGE_SIZE */ - pg_ptrs = roundup_pow_of_two(num_pbls); - pages = pg_ptrs >> MAX_PBL_LVL_1_PGS_SHIFT; - if (!pages) - pages++; - - if (pages > MAX_PBL_LVL_1_PGS) { - dev_err(&res->pdev->dev, - "SP: Reg MR: pages requested (0x%x) exceeded max (0x%x)\n", - pages, MAX_PBL_LVL_1_PGS); - return -ENOMEM; - } /* Free the hwq if it already exist, must be a rereg */ if (mr->hwq.max_elements) bnxt_qplib_free_hwq(res, &mr->hwq); /* Use system PAGE_SIZE */ hwq_attr.res = res; hwq_attr.depth = pages; - hwq_attr.stride = PAGE_SIZE; + hwq_attr.stride = buf_pg_size; hwq_attr.type = HWQ_TYPE_MR; hwq_attr.sginfo = &sginfo; + hwq_attr.sginfo->umem = umem; hwq_attr.sginfo->npages = pages; hwq_attr.sginfo->pgsize = PAGE_SIZE; hwq_attr.sginfo->pgshft = PAGE_SHIFT; @@ -695,11 +685,6 @@ int bnxt_qplib_reg_mr(struct bnxt_qplib_res *res, struct bnxt_qplib_mrw *mr, "SP: Reg MR memory allocation failed\n"); return -ENOMEM; } - /* Write to the hwq */ - pbl_ptr = (dma_addr_t **)mr->hwq.pbl_ptr; - for (i = 0; i < num_pbls; i++) - pbl_ptr[PTR_PG(i)][PTR_IDX(i)] = - (pbl_tbl[i] & PAGE_MASK) | PTU_PTE_VALID; } RCFW_CMD_PREP(req, REGISTER_MR, cmd_flags); @@ -711,7 +696,7 @@ int bnxt_qplib_reg_mr(struct bnxt_qplib_res *res, struct bnxt_qplib_mrw *mr, req.pbl = 0; pg_size = PAGE_SIZE; } else { - level = mr->hwq.level + 1; + level = mr->hwq.level; req.pbl = cpu_to_le64(mr->hwq.pbl[PBL_LVL_0].pg_map_arr[0]); } pg_size = buf_pg_size ? buf_pg_size : PAGE_SIZE; @@ -728,7 +713,7 @@ int bnxt_qplib_reg_mr(struct bnxt_qplib_res *res, struct bnxt_qplib_mrw *mr, req.mr_size = cpu_to_le64(mr->total_size); rc = bnxt_qplib_rcfw_send_message(rcfw, (void *)&req, - (void *)&resp, NULL, block); + (void *)&resp, NULL, false); if (rc) goto fail; diff --git a/drivers/infiniband/hw/bnxt_re/qplib_sp.h b/drivers/infiniband/hw/bnxt_re/qplib_sp.h index 967890cd81f2..bc228340684f 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_sp.h +++ b/drivers/infiniband/hw/bnxt_re/qplib_sp.h @@ -254,7 +254,7 @@ int bnxt_qplib_alloc_mrw(struct bnxt_qplib_res *res, int bnxt_qplib_dereg_mrw(struct bnxt_qplib_res *res, struct bnxt_qplib_mrw *mrw, bool block); int bnxt_qplib_reg_mr(struct bnxt_qplib_res *res, struct bnxt_qplib_mrw *mr, - u64 *pbl_tbl, int num_pbls, bool block, u32 buf_pg_size); + struct ib_umem *umem, int num_pbls, u32 buf_pg_size); int bnxt_qplib_free_mrw(struct bnxt_qplib_res *res, struct bnxt_qplib_mrw *mr); int bnxt_qplib_alloc_fast_reg_mr(struct bnxt_qplib_res *res, struct bnxt_qplib_mrw *mr, int max); diff --git a/drivers/infiniband/hw/cxgb4/restrack.c b/drivers/infiniband/hw/cxgb4/restrack.c index b32e6516d65f..ff645b955a08 100644 --- a/drivers/infiniband/hw/cxgb4/restrack.c +++ b/drivers/infiniband/hw/cxgb4/restrack.c @@ -209,7 +209,7 @@ int c4iw_fill_res_cm_id_entry(struct sk_buff *msg, epcp = (struct c4iw_ep_common *)iw_cm_id->provider_data; if (!epcp) return 0; - uep = kcalloc(1, sizeof(*uep), GFP_KERNEL); + uep = kzalloc(sizeof(*uep), GFP_KERNEL); if (!uep) return 0; diff --git a/drivers/infiniband/hw/efa/efa_admin_cmds_defs.h b/drivers/infiniband/hw/efa/efa_admin_cmds_defs.h index b199e4ac6cf9..fa38b34eddb8 100644 --- a/drivers/infiniband/hw/efa/efa_admin_cmds_defs.h +++ b/drivers/infiniband/hw/efa/efa_admin_cmds_defs.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause */ /* - * Copyright 2018-2020 Amazon.com, Inc. or its affiliates. All rights reserved. + * Copyright 2018-2021 Amazon.com, Inc. or its affiliates. All rights reserved. */ #ifndef _EFA_ADMIN_CMDS_H_ @@ -161,8 +161,8 @@ struct efa_admin_create_qp_resp { u32 qp_handle; /* - * QP number in the given EFA virtual device. Least-significant bits - * (as needed according to max_qp) carry unique QP ID + * QP number in the given EFA virtual device. Least-significant bits (as + * needed according to max_qp) carry unique QP ID */ u16 qp_num; @@ -465,7 +465,7 @@ struct efa_admin_create_cq_cmd { /* * number of sub cqs - must be equal to sub_cqs_per_cq of queue - * attributes. + * attributes. */ u16 num_sub_cqs; @@ -563,12 +563,8 @@ struct efa_admin_acq_get_stats_resp { }; struct efa_admin_get_set_feature_common_desc { - /* - * 1:0 : select - 0x1 - current value; 0x3 - default - * value - * 7:3 : reserved3 - MBZ - */ - u8 flags; + /* MBZ */ + u8 reserved0; /* as appears in efa_admin_aq_feature_id */ u8 feature_id; @@ -823,12 +819,6 @@ enum efa_admin_aenq_group { EFA_ADMIN_AENQ_GROUPS_NUM = 5, }; -enum efa_admin_aenq_notification_syndrom { - EFA_ADMIN_SUSPEND = 0, - EFA_ADMIN_RESUME = 1, - EFA_ADMIN_UPDATE_HINTS = 2, -}; - struct efa_admin_mmio_req_read_less_resp { u16 req_id; @@ -909,9 +899,6 @@ struct efa_admin_host_info { #define EFA_ADMIN_CREATE_CQ_CMD_VIRT_MASK BIT(6) #define EFA_ADMIN_CREATE_CQ_CMD_CQ_ENTRY_SIZE_WORDS_MASK GENMASK(4, 0) -/* get_set_feature_common_desc */ -#define EFA_ADMIN_GET_SET_FEATURE_COMMON_DESC_SELECT_MASK GENMASK(1, 0) - /* feature_device_attr_desc */ #define EFA_ADMIN_FEATURE_DEVICE_ATTR_DESC_RDMA_READ_MASK BIT(0) #define EFA_ADMIN_FEATURE_DEVICE_ATTR_DESC_RNR_RETRY_MASK BIT(1) diff --git a/drivers/infiniband/hw/efa/efa_admin_defs.h b/drivers/infiniband/hw/efa/efa_admin_defs.h index 29d53ed63b3e..78ff9389ae25 100644 --- a/drivers/infiniband/hw/efa/efa_admin_defs.h +++ b/drivers/infiniband/hw/efa/efa_admin_defs.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause */ /* - * Copyright 2018-2020 Amazon.com, Inc. or its affiliates. All rights reserved. + * Copyright 2018-2021 Amazon.com, Inc. or its affiliates. All rights reserved. */ #ifndef _EFA_ADMIN_H_ @@ -82,7 +82,7 @@ struct efa_admin_acq_common_desc { /* * indicates to the driver which AQ entry has been consumed by the - * device and could be reused + * device and could be reused */ u16 sq_head_indx; }; diff --git a/drivers/infiniband/hw/efa/efa_com.c b/drivers/infiniband/hw/efa/efa_com.c index 336bc2c57bb1..0d523ad736c7 100644 --- a/drivers/infiniband/hw/efa/efa_com.c +++ b/drivers/infiniband/hw/efa/efa_com.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause /* - * Copyright 2018-2020 Amazon.com, Inc. or its affiliates. All rights reserved. + * Copyright 2018-2021 Amazon.com, Inc. or its affiliates. All rights reserved. */ #include "efa_com.h" @@ -20,9 +20,6 @@ #define EFA_CTRL_MINOR 0 #define EFA_CTRL_SUB_MINOR 1 -#define EFA_DMA_ADDR_TO_UINT32_LOW(x) ((u32)((u64)(x))) -#define EFA_DMA_ADDR_TO_UINT32_HIGH(x) ((u32)(((u64)(x)) >> 32)) - enum efa_cmd_status { EFA_CMD_SUBMITTED, EFA_CMD_COMPLETED, @@ -33,8 +30,6 @@ struct efa_comp_ctx { struct efa_admin_acq_entry *user_cqe; u32 comp_size; enum efa_cmd_status status; - /* status from the device */ - u8 comp_status; u8 cmd_opcode; u8 occupied; }; @@ -140,8 +135,8 @@ static int efa_com_admin_init_sq(struct efa_com_dev *edev) sq->db_addr = (u32 __iomem *)(edev->reg_bar + EFA_REGS_AQ_PROD_DB_OFF); - addr_high = EFA_DMA_ADDR_TO_UINT32_HIGH(sq->dma_addr); - addr_low = EFA_DMA_ADDR_TO_UINT32_LOW(sq->dma_addr); + addr_high = upper_32_bits(sq->dma_addr); + addr_low = lower_32_bits(sq->dma_addr); writel(addr_low, edev->reg_bar + EFA_REGS_AQ_BASE_LO_OFF); writel(addr_high, edev->reg_bar + EFA_REGS_AQ_BASE_HI_OFF); @@ -174,8 +169,8 @@ static int efa_com_admin_init_cq(struct efa_com_dev *edev) cq->cc = 0; cq->phase = 1; - addr_high = EFA_DMA_ADDR_TO_UINT32_HIGH(cq->dma_addr); - addr_low = EFA_DMA_ADDR_TO_UINT32_LOW(cq->dma_addr); + addr_high = upper_32_bits(cq->dma_addr); + addr_low = lower_32_bits(cq->dma_addr); writel(addr_low, edev->reg_bar + EFA_REGS_ACQ_BASE_LO_OFF); writel(addr_high, edev->reg_bar + EFA_REGS_ACQ_BASE_HI_OFF); @@ -215,8 +210,8 @@ static int efa_com_admin_init_aenq(struct efa_com_dev *edev, aenq->cc = 0; aenq->phase = 1; - addr_low = EFA_DMA_ADDR_TO_UINT32_LOW(aenq->dma_addr); - addr_high = EFA_DMA_ADDR_TO_UINT32_HIGH(aenq->dma_addr); + addr_low = lower_32_bits(aenq->dma_addr); + addr_high = upper_32_bits(aenq->dma_addr); writel(addr_low, edev->reg_bar + EFA_REGS_AENQ_BASE_LO_OFF); writel(addr_high, edev->reg_bar + EFA_REGS_AENQ_BASE_HI_OFF); @@ -421,9 +416,7 @@ static void efa_com_handle_single_admin_completion(struct efa_com_admin_queue *a } comp_ctx->status = EFA_CMD_COMPLETED; - comp_ctx->comp_status = cqe->acq_common_descriptor.status; - if (comp_ctx->user_cqe) - memcpy(comp_ctx->user_cqe, cqe, comp_ctx->comp_size); + memcpy(comp_ctx->user_cqe, cqe, comp_ctx->comp_size); if (!test_bit(EFA_AQ_STATE_POLLING_BIT, &aq->state)) complete(&comp_ctx->wait_event); @@ -521,7 +514,7 @@ static int efa_com_wait_and_process_admin_cq_polling(struct efa_comp_ctx *comp_c msleep(aq->poll_interval); } - err = efa_com_comp_status_to_errno(comp_ctx->comp_status); + err = efa_com_comp_status_to_errno(comp_ctx->user_cqe->acq_common_descriptor.status); out: efa_com_put_comp_ctx(aq, comp_ctx); return err; @@ -569,7 +562,7 @@ static int efa_com_wait_and_process_admin_cq_interrupts(struct efa_comp_ctx *com goto out; } - err = efa_com_comp_status_to_errno(comp_ctx->comp_status); + err = efa_com_comp_status_to_errno(comp_ctx->user_cqe->acq_common_descriptor.status); out: efa_com_put_comp_ctx(aq, comp_ctx); return err; @@ -641,8 +634,8 @@ int efa_com_cmd_exec(struct efa_com_admin_queue *aq, aq->efa_dev, "Failed to process command %s (opcode %u) comp_status %d err %d\n", efa_com_cmd_str(cmd->aq_common_descriptor.opcode), - cmd->aq_common_descriptor.opcode, comp_ctx->comp_status, - err); + cmd->aq_common_descriptor.opcode, + comp_ctx->user_cqe->acq_common_descriptor.status, err); atomic64_inc(&aq->stats.cmd_err); } @@ -795,7 +788,7 @@ int efa_com_admin_init(struct efa_com_dev *edev, * This method goes over the admin completion queue and wakes up * all the pending threads that wait on the commands wait event. * - * @note: Should be called after MSI-X interrupt. + * Note: Should be called after MSI-X interrupt. */ void efa_com_admin_q_comp_intr_handler(struct efa_com_dev *edev) { diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c index c87b94ea2939..993cbf37e0b9 100644 --- a/drivers/infiniband/hw/hfi1/chip.c +++ b/drivers/infiniband/hw/hfi1/chip.c @@ -1323,8 +1323,8 @@ CNTR_ELEM(#name, \ /** * hfi_addr_from_offset - return addr for readq/writeq - * @dd - the dd device - * @offset - the offset of the CSR within bar0 + * @dd: the dd device + * @offset: the offset of the CSR within bar0 * * This routine selects the appropriate base address * based on the indicated offset. @@ -1340,8 +1340,8 @@ static inline void __iomem *hfi1_addr_from_offset( /** * read_csr - read CSR at the indicated offset - * @dd - the dd device - * @offset - the offset of the CSR within bar0 + * @dd: the dd device + * @offset: the offset of the CSR within bar0 * * Return: the value read or all FF's if there * is no mapping @@ -1355,9 +1355,9 @@ u64 read_csr(const struct hfi1_devdata *dd, u32 offset) /** * write_csr - write CSR at the indicated offset - * @dd - the dd device - * @offset - the offset of the CSR within bar0 - * @value - value to write + * @dd: the dd device + * @offset: the offset of the CSR within bar0 + * @value: value to write */ void write_csr(const struct hfi1_devdata *dd, u32 offset, u64 value) { @@ -1373,8 +1373,8 @@ void write_csr(const struct hfi1_devdata *dd, u32 offset, u64 value) /** * get_csr_addr - return te iomem address for offset - * @dd - the dd device - * @offset - the offset of the CSR within bar0 + * @dd: the dd device + * @offset: the offset of the CSR within bar0 * * Return: The iomem address to use in subsequent * writeq/readq operations. @@ -8433,7 +8433,7 @@ static inline int check_packet_present(struct hfi1_ctxtdata *rcd) return hfi1_rcd_head(rcd) != tail; } -/** +/* * Common code for receive contexts interrupt handlers. * Update traces, increment kernel IRQ counter and * setup ASPM when needed. @@ -8447,7 +8447,7 @@ static void receive_interrupt_common(struct hfi1_ctxtdata *rcd) aspm_ctx_disable(rcd); } -/** +/* * __hfi1_rcd_eoi_intr() - Make HW issue receive interrupt * when there are packets present in the queue. When calling * with interrupts enabled please use hfi1_rcd_eoi_intr. @@ -8484,8 +8484,8 @@ static void hfi1_rcd_eoi_intr(struct hfi1_ctxtdata *rcd) /** * hfi1_netdev_rx_napi - napi poll function to move eoi inline - * @napi - pointer to napi object - * @budget - netdev budget + * @napi: pointer to napi object + * @budget: netdev budget */ int hfi1_netdev_rx_napi(struct napi_struct *napi, int budget) { @@ -10142,7 +10142,7 @@ u32 lrh_max_header_bytes(struct hfi1_devdata *dd) /* * Set Send Length - * @ppd - per port data + * @ppd: per port data * * Set the MTU by limiting how many DWs may be sent. The SendLenCheck* * registers compare against LRH.PktLen, so use the max bytes included @@ -14200,9 +14200,9 @@ u8 hfi1_get_qp_map(struct hfi1_devdata *dd, u8 idx) /** * init_qpmap_table - * @dd - device data - * @first_ctxt - first context - * @last_ctxt - first context + * @dd: device data + * @first_ctxt: first context + * @last_ctxt: first context * * This return sets the qpn mapping table that * is indexed by qpn[8:1]. @@ -14383,8 +14383,8 @@ static int qos_rmt_entries(struct hfi1_devdata *dd, unsigned int *mp, /** * init_qos - init RX qos - * @dd - device data - * @rmt - RSM map table + * @dd: device data + * @rmt: RSM map table * * This routine initializes Rule 0 and the RSM map table to implement * quality of service (qos). @@ -15022,8 +15022,7 @@ static int check_int_registers(struct hfi1_devdata *dd) /** * hfi1_init_dd() - Initialize most of the dd structure. - * @dev: the pci_dev for hfi1_ib device - * @ent: pci_device_id struct for this dev + * @dd: the dd device * * This is global, and is called directly at init to set up the * chip-specific function pointers for later use. @@ -15378,10 +15377,11 @@ static u16 delay_cycles(struct hfi1_pportdata *ppd, u32 desired_egress_rate, /** * create_pbc - build a pbc for transmission + * @ppd: info of physical Hfi port * @flags: special case flags or-ed in built pbc - * @srate: static rate + * @srate_mbs: static rate * @vl: vl - * @dwlen: dword length (header words + data words + pbc words) + * @dw_len: dword length (header words + data words + pbc words) * * Create a PBC with the given flags, rate, VL, and length. * diff --git a/drivers/infiniband/hw/hfi1/exp_rcv.c b/drivers/infiniband/hw/hfi1/exp_rcv.c index e9d5cc8b771a..91f13140ddf2 100644 --- a/drivers/infiniband/hw/hfi1/exp_rcv.c +++ b/drivers/infiniband/hw/hfi1/exp_rcv.c @@ -50,7 +50,7 @@ /** * exp_tid_group_init - initialize exp_tid_set - * @set - the set + * @set: the set */ static void hfi1_exp_tid_set_init(struct exp_tid_set *set) { @@ -60,7 +60,7 @@ static void hfi1_exp_tid_set_init(struct exp_tid_set *set) /** * hfi1_exp_tid_group_init - initialize rcd expected receive - * @rcd - the rcd + * @rcd: the rcd */ void hfi1_exp_tid_group_init(struct hfi1_ctxtdata *rcd) { @@ -71,7 +71,7 @@ void hfi1_exp_tid_group_init(struct hfi1_ctxtdata *rcd) /** * alloc_ctxt_rcv_groups - initialize expected receive groups - * @rcd - the context to add the groupings to + * @rcd: the context to add the groupings to */ int hfi1_alloc_ctxt_rcv_groups(struct hfi1_ctxtdata *rcd) { @@ -101,7 +101,7 @@ int hfi1_alloc_ctxt_rcv_groups(struct hfi1_ctxtdata *rcd) /** * free_ctxt_rcv_groups - free expected receive groups - * @rcd - the context to free + * @rcd: the context to free * * The routine dismantles the expect receive linked * list and clears any tids associated with the receive diff --git a/drivers/infiniband/hw/hfi1/file_ops.c b/drivers/infiniband/hw/hfi1/file_ops.c index 329ee4f48d95..3b7bbc7b9d10 100644 --- a/drivers/infiniband/hw/hfi1/file_ops.c +++ b/drivers/infiniband/hw/hfi1/file_ops.c @@ -1522,7 +1522,7 @@ int hfi1_set_uevent_bits(struct hfi1_pportdata *ppd, const int evtbit) * manage_rcvq - manage a context's receive queue * @uctxt: the context * @subctxt: the sub-context - * @start_stop: action to carry out + * @arg: start/stop action to carry out * * start_stop == 0 disables receive on the context, for use in queue * overflow conditions. start_stop==1 re-enables, to be used to diff --git a/drivers/infiniband/hw/hfi1/intr.c b/drivers/infiniband/hw/hfi1/intr.c index 387305b768e9..5ba5c11459e7 100644 --- a/drivers/infiniband/hw/hfi1/intr.c +++ b/drivers/infiniband/hw/hfi1/intr.c @@ -91,9 +91,9 @@ static void add_full_mgmt_pkey(struct hfi1_pportdata *ppd) /** * format_hwmsg - format a single hwerror message - * @msg message buffer - * @msgl length of message buffer - * @hwmsg message to add to message buffer + * @msg: message buffer + * @msgl: length of message buffer + * @hwmsg: message to add to message buffer */ static void format_hwmsg(char *msg, size_t msgl, const char *hwmsg) { @@ -104,11 +104,11 @@ static void format_hwmsg(char *msg, size_t msgl, const char *hwmsg) /** * hfi1_format_hwerrors - format hardware error messages for display - * @hwerrs hardware errors bit vector - * @hwerrmsgs hardware error descriptions - * @nhwerrmsgs number of hwerrmsgs - * @msg message buffer - * @msgl message buffer length + * @hwerrs: hardware errors bit vector + * @hwerrmsgs: hardware error descriptions + * @nhwerrmsgs: number of hwerrmsgs + * @msg: message buffer + * @msgl: message buffer length */ void hfi1_format_hwerrors(u64 hwerrs, const struct hfi1_hwerror_msgs *hwerrmsgs, size_t nhwerrmsgs, char *msg, size_t msgl) diff --git a/drivers/infiniband/hw/hfi1/iowait.c b/drivers/infiniband/hw/hfi1/iowait.c index 5836fe7b2817..111489802614 100644 --- a/drivers/infiniband/hw/hfi1/iowait.c +++ b/drivers/infiniband/hw/hfi1/iowait.c @@ -26,7 +26,7 @@ inline void iowait_clear_flag(struct iowait *wait, u32 flag) clear_bit(flag, &wait->flags); } -/** +/* * iowait_init() - initialize wait structure * @wait: wait struct to initialize * @tx_limit: limit for overflow queuing @@ -88,7 +88,7 @@ void iowait_cancel_work(struct iowait *w) /** * iowait_set_work_flag - set work flag based on leg - * @w - the iowait work struct + * @w: the iowait work struct */ int iowait_set_work_flag(struct iowait_work *w) { diff --git a/drivers/infiniband/hw/hfi1/mad.c b/drivers/infiniband/hw/hfi1/mad.c index 3222e3acb79c..e2f2f7847aed 100644 --- a/drivers/infiniband/hw/hfi1/mad.c +++ b/drivers/infiniband/hw/hfi1/mad.c @@ -1341,7 +1341,7 @@ static int set_port_states(struct hfi1_pportdata *ppd, struct opa_smp *smp, return 0; } -/** +/* * subn_set_opa_portinfo - set port information * @smp: the incoming SM packet * @ibdev: the infiniband device @@ -4902,6 +4902,8 @@ static int hfi1_process_ib_mad(struct ib_device *ibdev, int mad_flags, u8 port, * @in_grh: the global route header for this packet * @in_mad: the incoming MAD * @out_mad: any outgoing MAD reply + * @out_mad_size: size of the outgoing MAD reply + * @out_mad_pkey_index: used to apss back the packet key index * * Returns IB_MAD_RESULT_SUCCESS if this is a MAD that we are not * interested in processing. diff --git a/drivers/infiniband/hw/hfi1/msix.c b/drivers/infiniband/hw/hfi1/msix.c index d61ee853d215..cf3040bb177f 100644 --- a/drivers/infiniband/hw/hfi1/msix.c +++ b/drivers/infiniband/hw/hfi1/msix.c @@ -103,8 +103,8 @@ int msix_initialize(struct hfi1_devdata *dd) * @arg: context information for the IRQ * @handler: IRQ handler * @thread: IRQ thread handler (could be NULL) - * @idx: zero base idx if multiple devices are needed * @type: affinty IRQ type + * @name: IRQ name * * Allocated an MSIx vector if available, and then create the appropriate * meta data needed to keep track of the pci IRQ request. diff --git a/drivers/infiniband/hw/hfi1/netdev_rx.c b/drivers/infiniband/hw/hfi1/netdev_rx.c index 6d263c9749b3..1fb6e1a0e4e1 100644 --- a/drivers/infiniband/hw/hfi1/netdev_rx.c +++ b/drivers/infiniband/hw/hfi1/netdev_rx.c @@ -467,7 +467,7 @@ void *hfi1_netdev_get_data(struct hfi1_devdata *dd, int id) * hfi1_netdev_get_first_dat - Gets first entry with greater or equal id. * * @dd: hfi1 dev data - * @id: requested integer id up to INT_MAX + * @start_id: requested integer id up to INT_MAX */ void *hfi1_netdev_get_first_data(struct hfi1_devdata *dd, int *start_id) { diff --git a/drivers/infiniband/hw/hfi1/pcie.c b/drivers/infiniband/hw/hfi1/pcie.c index 18d32f053d26..6f06e9920503 100644 --- a/drivers/infiniband/hw/hfi1/pcie.c +++ b/drivers/infiniband/hw/hfi1/pcie.c @@ -334,7 +334,7 @@ int pcie_speeds(struct hfi1_devdata *dd) return 0; } -/** +/* * Restore command and BARs after a reset has wiped them out * * Returns 0 on success, otherwise a negative error value @@ -393,7 +393,7 @@ int restore_pci_variables(struct hfi1_devdata *dd) return pcibios_err_to_errno(ret); } -/** +/* * Save BARs and command to rewrite after device reset * * Returns 0 on success, otherwise a negative error value diff --git a/drivers/infiniband/hw/hfi1/pio_copy.c b/drivers/infiniband/hw/hfi1/pio_copy.c index 4a4ec2397857..14bfd8287f4a 100644 --- a/drivers/infiniband/hw/hfi1/pio_copy.c +++ b/drivers/infiniband/hw/hfi1/pio_copy.c @@ -55,6 +55,7 @@ /** * pio_copy - copy data block to MMIO space + * @dd: hfi1 dev data * @pbuf: a number of blocks allocated within a PIO send context * @pbc: PBC to send * @from: source, must be 8 byte aligned diff --git a/drivers/infiniband/hw/hfi1/qp.c b/drivers/infiniband/hw/hfi1/qp.c index 681bb4e918c9..e037df911512 100644 --- a/drivers/infiniband/hw/hfi1/qp.c +++ b/drivers/infiniband/hw/hfi1/qp.c @@ -186,7 +186,7 @@ static void flush_iowait(struct rvt_qp *qp) write_sequnlock_irqrestore(lock, flags); } -/** +/* * This function is what we would push to the core layer if we wanted to be a * "first class citizen". Instead we hide this here and rely on Verbs ULPs * to blindly pass the MTU enum value from the PathRecord to us. @@ -289,9 +289,9 @@ void hfi1_modify_qp(struct rvt_qp *qp, struct ib_qp_attr *attr, /** * hfi1_setup_wqe - set up the wqe - * @qp - The qp - * @wqe - The built wqe - * @call_send - Determine if the send should be posted or scheduled. + * @qp: The qp + * @wqe: The built wqe + * @call_send: Determine if the send should be posted or scheduled. * * Perform setup of the wqe. This is called * prior to inserting the wqe into the ring but after @@ -595,7 +595,7 @@ struct sdma_engine *qp_to_sdma_engine(struct rvt_qp *qp, u8 sc5) return sde; } -/* +/** * qp_to_send_context - map a qp to a send context * @qp: the QP * @sc5: the 5 bit sc @@ -912,8 +912,8 @@ void notify_error_qp(struct rvt_qp *qp) /** * hfi1_qp_iter_cb - callback for iterator - * @qp - the qp - * @v - the sl in low bits of v + * @qp: the qp + * @v: the sl in low bits of v * * This is called from the iterator callback to work * on an individual qp. diff --git a/drivers/infiniband/hw/hfi1/qsfp.c b/drivers/infiniband/hw/hfi1/qsfp.c index 8386c84c2d92..38f311f855b5 100644 --- a/drivers/infiniband/hw/hfi1/qsfp.c +++ b/drivers/infiniband/hw/hfi1/qsfp.c @@ -242,7 +242,7 @@ static int i2c_bus_write(struct hfi1_devdata *dd, struct hfi1_i2c_bus *i2c, msgs[0].buf = offset_bytes; msgs[1].addr = slave_addr; - msgs[1].flags = I2C_M_NOSTART, + msgs[1].flags = I2C_M_NOSTART; msgs[1].len = len; msgs[1].buf = data; break; @@ -290,7 +290,7 @@ static int i2c_bus_read(struct hfi1_devdata *dd, struct hfi1_i2c_bus *bus, msgs[0].buf = offset_bytes; msgs[1].addr = slave_addr; - msgs[1].flags = I2C_M_RD, + msgs[1].flags = I2C_M_RD; msgs[1].len = len; msgs[1].buf = data; break; diff --git a/drivers/infiniband/hw/hfi1/rc.c b/drivers/infiniband/hw/hfi1/rc.c index 1bb5f57152d3..0174b8ee9f00 100644 --- a/drivers/infiniband/hw/hfi1/rc.c +++ b/drivers/infiniband/hw/hfi1/rc.c @@ -421,6 +421,7 @@ static int make_rc_ack(struct hfi1_ibdev *dev, struct rvt_qp *qp, /** * hfi1_make_rc_req - construct a request packet (SEND, RDMA r/w, ATOMIC) * @qp: a pointer to the QP + * @ps: the current packet state * * Assumes s_lock is held. * @@ -1375,9 +1376,8 @@ static const hfi1_make_rc_ack hfi1_make_rc_ack_tbl[2] = { [HFI1_PKT_TYPE_16B] = &hfi1_make_rc_ack_16B }; -/** +/* * hfi1_send_rc_ack - Construct an ACK packet and send it - * @qp: a pointer to the QP * * This is called from hfi1_rc_rcv() and handle_receive_interrupt(). * Note that RDMA reads and atomics are handled in the @@ -1992,7 +1992,7 @@ static void update_qp_retry_state(struct rvt_qp *qp, u32 psn, u32 spsn, } } -/** +/* * do_rc_ack - process an incoming RC ACK * @qp: the QP the ACK came in on * @psn: the packet sequence number of the ACK @@ -2541,6 +2541,7 @@ static inline void rc_cancel_ack(struct rvt_qp *qp) * @opcode: the opcode for this packet * @psn: the packet sequence number for this packet * @diff: the difference between the PSN and the expected PSN + * @rcd: the receive context * * This is called from hfi1_rc_rcv() to process an unexpected * incoming RC packet for the given QP. diff --git a/drivers/infiniband/hw/hfi1/ruc.c b/drivers/infiniband/hw/hfi1/ruc.c index 23ac6057b211..c3fa1814c6a8 100644 --- a/drivers/infiniband/hw/hfi1/ruc.c +++ b/drivers/infiniband/hw/hfi1/ruc.c @@ -260,6 +260,7 @@ static inline void hfi1_make_ruc_bth(struct rvt_qp *qp, * @qp: the queue pair * @ohdr: a pointer to the destination header memory * @bth0: bth0 passed in from the RC/UC builder + * @bth1: bth1 passed in from the RC/UC builder * @bth2: bth2 passed in from the RC/UC builder * @middle: non zero implies indicates ahg "could" be used * @ps: the current packet state @@ -348,6 +349,7 @@ static inline void hfi1_make_ruc_header_16B(struct rvt_qp *qp, * @qp: the queue pair * @ohdr: a pointer to the destination header memory * @bth0: bth0 passed in from the RC/UC builder + * @bth1: bth1 passed in from the RC/UC builder * @bth2: bth2 passed in from the RC/UC builder * @middle: non zero implies indicates ahg "could" be used * @ps: the current packet state @@ -455,11 +457,10 @@ void hfi1_make_ruc_header(struct rvt_qp *qp, struct ib_other_headers *ohdr, /** * hfi1_schedule_send_yield - test for a yield required for QP * send engine - * @timeout: Final time for timeout slice for jiffies * @qp: a pointer to QP * @ps: a pointer to a structure with commonly lookup values for * the the send engine progress - * @tid - true if it is the tid leg + * @tid: true if it is the tid leg * * This routine checks if the time slice for the QP has expired * for RC QPs, if so an additional work entry is queued. At this diff --git a/drivers/infiniband/hw/hfi1/sdma.c b/drivers/infiniband/hw/hfi1/sdma.c index a307d4c8b15a..46b5290b2839 100644 --- a/drivers/infiniband/hw/hfi1/sdma.c +++ b/drivers/infiniband/hw/hfi1/sdma.c @@ -1740,7 +1740,7 @@ static inline u16 sdma_gethead(struct sdma_engine *sde) sane = (hwhead == swhead); if (unlikely(!sane)) { - dd_dev_err(dd, "SDMA(%u) bad head (%s) hwhd=%hu swhd=%hu swtl=%hu cnt=%hu\n", + dd_dev_err(dd, "SDMA(%u) bad head (%s) hwhd=%u swhd=%u swtl=%u cnt=%u\n", sde->this_idx, use_dmahead ? "dma" : "kreg", hwhead, swhead, swtail, cnt); @@ -2448,11 +2448,11 @@ int sdma_send_txreq(struct sdma_engine *sde, * @sde: sdma engine to use * @wait: SE wait structure to use when full (may be NULL) * @tx_list: list of sdma_txreqs to submit - * @count: pointer to a u16 which, after return will contain the total number of - * sdma_txreqs removed from the tx_list. This will include sdma_txreqs - * whose SDMA descriptors are submitted to the ring and the sdma_txreqs - * which are added to SDMA engine flush list if the SDMA engine state is - * not running. + * @count_out: pointer to a u16 which, after return will contain the total number of + * sdma_txreqs removed from the tx_list. This will include sdma_txreqs + * whose SDMA descriptors are submitted to the ring and the sdma_txreqs + * which are added to SDMA engine flush list if the SDMA engine state is + * not running. * * The call submits the list into the ring. * diff --git a/drivers/infiniband/hw/hfi1/tid_rdma.c b/drivers/infiniband/hw/hfi1/tid_rdma.c index 92aa2a9b3b5a..0b1f9e4d038b 100644 --- a/drivers/infiniband/hw/hfi1/tid_rdma.c +++ b/drivers/infiniband/hw/hfi1/tid_rdma.c @@ -309,7 +309,8 @@ int hfi1_kern_exp_rcv_init(struct hfi1_ctxtdata *rcd, int reinit) /** * qp_to_rcd - determine the receive context used by a qp - * @qp - the qp + * @rdi: rvt dev struct + * @qp: the qp * * This routine returns the receive context associated * with a a qp's qpn. @@ -484,6 +485,7 @@ static struct rvt_qp *first_qp(struct hfi1_ctxtdata *rcd, /** * kernel_tid_waiters - determine rcd wait * @rcd: the receive context + * @queue: the queue to operate on * @qp: the head of the qp being processed * * This routine will return false IFF @@ -517,7 +519,9 @@ static bool kernel_tid_waiters(struct hfi1_ctxtdata *rcd, /** * dequeue_tid_waiter - dequeue the qp from the list - * @qp - the qp to remove the wait list + * @rcd: the receive context + * @queue: the queue to operate on + * @qp: the qp to remove the wait list * * This routine removes the indicated qp from the * wait list if it is there. @@ -549,6 +553,7 @@ static void dequeue_tid_waiter(struct hfi1_ctxtdata *rcd, /** * queue_qp_for_tid_wait - suspend QP on tid space * @rcd: the receive context + * @queue: the queue to operate on * @qp: the qp * * The qp is inserted at the tail of the rcd @@ -593,7 +598,7 @@ static void __trigger_tid_waiter(struct rvt_qp *qp) /** * tid_rdma_schedule_tid_wakeup - schedule wakeup for a qp - * @qp - the qp + * @qp: the qp * * trigger a schedule or a waiting qp in a deadlock * safe manner. The qp reference is held prior @@ -630,7 +635,7 @@ static void tid_rdma_schedule_tid_wakeup(struct rvt_qp *qp) /** * tid_rdma_trigger_resume - field a trigger work request - * @work - the work item + * @work: the work item * * Complete the off qp trigger processing by directly * calling the progress routine. @@ -654,7 +659,7 @@ static void tid_rdma_trigger_resume(struct work_struct *work) rvt_put_qp(qp); } -/** +/* * tid_rdma_flush_wait - unwind any tid space wait * * This is called when resetting a qp to @@ -693,8 +698,8 @@ void hfi1_tid_rdma_flush_wait(struct rvt_qp *qp) /* Flow functions */ /** * kern_reserve_flow - allocate a hardware flow - * @rcd - the context to use for allocation - * @last - the index of the preferred flow. Use RXE_NUM_TID_FLOWS to + * @rcd: the context to use for allocation + * @last: the index of the preferred flow. Use RXE_NUM_TID_FLOWS to * signify "don't care". * * Use a bit mask based allocation to reserve a hardware @@ -860,9 +865,10 @@ static u8 trdma_pset_order(struct tid_rdma_pageset *s) /** * tid_rdma_find_phys_blocks_4k - get groups base on mr info - * @npages - number of pages - * @pages - pointer to an array of page structs - * @list - page set array to return + * @flow: overall info for a TID RDMA segment + * @pages: pointer to an array of page structs + * @npages: number of pages + * @list: page set array to return * * This routine returns the number of groups associated with * the current sge information. This implementation is based @@ -949,10 +955,10 @@ static u32 tid_rdma_find_phys_blocks_4k(struct tid_rdma_flow *flow, /** * tid_flush_pages - dump out pages into pagesets - * @list - list of pagesets - * @idx - pointer to current page index - * @pages - number of pages to dump - * @sets - current number of pagesset + * @list: list of pagesets + * @idx: pointer to current page index + * @pages: number of pages to dump + * @sets: current number of pagesset * * This routine flushes out accumuated pages. * @@ -990,9 +996,10 @@ static u32 tid_flush_pages(struct tid_rdma_pageset *list, /** * tid_rdma_find_phys_blocks_8k - get groups base on mr info - * @pages - pointer to an array of page structs - * @npages - number of pages - * @list - page set array to return + * @flow: overall info for a TID RDMA segment + * @pages: pointer to an array of page structs + * @npages: number of pages + * @list: page set array to return * * This routine parses an array of pages to compute pagesets * in an 8k compatible way. @@ -1064,7 +1071,7 @@ static u32 tid_rdma_find_phys_blocks_8k(struct tid_rdma_flow *flow, return sets; } -/** +/* * Find pages for one segment of a sge array represented by @ss. The function * does not check the sge, the sge must have been checked for alignment with a * prior call to hfi1_kern_trdma_ok. Other sge checking is done as part of @@ -1598,7 +1605,7 @@ void hfi1_kern_exp_rcv_clear_all(struct tid_rdma_request *req) /** * hfi1_kern_exp_rcv_free_flows - free priviously allocated flow information - * @req - the tid rdma request to be cleaned + * @req: the tid rdma request to be cleaned */ static void hfi1_kern_exp_rcv_free_flows(struct tid_rdma_request *req) { @@ -3435,7 +3442,7 @@ static u32 hfi1_compute_tid_rnr_timeout(struct rvt_qp *qp, u32 to_seg) return 0; } -/** +/* * Central place for resource allocation at TID write responder, * is called from write_req and write_data interrupt handlers as * well as the send thread when a queued QP is scheduled for diff --git a/drivers/infiniband/hw/hfi1/uc.c b/drivers/infiniband/hw/hfi1/uc.c index 1fb918399da0..5b0f536b34e0 100644 --- a/drivers/infiniband/hw/hfi1/uc.c +++ b/drivers/infiniband/hw/hfi1/uc.c @@ -55,6 +55,7 @@ /** * hfi1_make_uc_req - construct a request packet (SEND, RDMA write) * @qp: a pointer to the QP + * @ps: the current packet state * * Assume s_lock is held. * @@ -291,12 +292,7 @@ int hfi1_make_uc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps) /** * hfi1_uc_rcv - handle an incoming UC packet - * @ibp: the port the packet came in on - * @hdr: the header of the packet - * @rcv_flags: flags relevant to rcv processing - * @data: the packet data - * @tlen: the length of the packet - * @qp: the QP for this packet. + * @packet: the packet structure * * This is called from qp_rcv() to process an incoming UC packet * for the given QP. diff --git a/drivers/infiniband/hw/hfi1/ud.c b/drivers/infiniband/hw/hfi1/ud.c index e804af71b629..6ecb984c85fa 100644 --- a/drivers/infiniband/hw/hfi1/ud.c +++ b/drivers/infiniband/hw/hfi1/ud.c @@ -468,6 +468,7 @@ void hfi1_make_ud_req_16B(struct rvt_qp *qp, struct hfi1_pkt_state *ps, /** * hfi1_make_ud_req - construct a UD request packet * @qp: the QP + * @ps: the current packet state * * Assume s_lock is held. * @@ -840,12 +841,7 @@ static int opa_smp_check(struct hfi1_ibport *ibp, u16 pkey, u8 sc5, /** * hfi1_ud_rcv - receive an incoming UD packet - * @ibp: the port the packet came in on - * @hdr: the packet header - * @rcv_flags: flags relevant to rcv processing - * @data: the packet data - * @tlen: the packet length - * @qp: the QP the packet came on + * @packet: the packet structure * * This is called from qp_rcv() to process an incoming UD packet * for the given QP. diff --git a/drivers/infiniband/hw/hfi1/user_exp_rcv.c b/drivers/infiniband/hw/hfi1/user_exp_rcv.c index b94fc7fd75a9..58dcab2679d9 100644 --- a/drivers/infiniband/hw/hfi1/user_exp_rcv.c +++ b/drivers/infiniband/hw/hfi1/user_exp_rcv.c @@ -154,12 +154,12 @@ void hfi1_user_exp_rcv_free(struct hfi1_filedata *fd) fd->entry_to_rb = NULL; } -/** +/* * Release pinned receive buffer pages. * - * @mapped - true if the pages have been DMA mapped. false otherwise. - * @idx - Index of the first page to unpin. - * @npages - No of pages to unpin. + * @mapped: true if the pages have been DMA mapped. false otherwise. + * @idx: Index of the first page to unpin. + * @npages: No of pages to unpin. * * If the pages have been DMA mapped (indicated by mapped parameter), their * info will be passed via a struct tid_rb_node. If they haven't been mapped, @@ -189,7 +189,7 @@ static void unpin_rcv_pages(struct hfi1_filedata *fd, fd->tid_n_pinned -= npages; } -/** +/* * Pin receive buffer pages. */ static int pin_rcv_pages(struct hfi1_filedata *fd, struct tid_user_buf *tidbuf) diff --git a/drivers/infiniband/hw/hfi1/verbs.c b/drivers/infiniband/hw/hfi1/verbs.c index 3591923abebb..0dd4bb0a5a7e 100644 --- a/drivers/infiniband/hw/hfi1/verbs.c +++ b/drivers/infiniband/hw/hfi1/verbs.c @@ -729,7 +729,7 @@ static noinline int build_verbs_ulp_payload( /** * update_tx_opstats - record stats by opcode - * @qp; the qp + * @qp: the qp * @ps: transmit packet state * @plen: the plen in dwords * @@ -1145,7 +1145,7 @@ static inline int egress_pkey_matches_entry(u16 pkey, u16 ent) * egress_pkey_check - check P_KEY of a packet * @ppd: Physical IB port data * @slid: SLID for packet - * @bkey: PKEY for header + * @pkey: PKEY for header * @sc5: SC for packet * @s_pkey_index: It will be used for look up optimization for kernel contexts * only. If it is negative value, then it means user contexts is calling this @@ -1206,7 +1206,7 @@ int egress_pkey_check(struct hfi1_pportdata *ppd, u32 slid, u16 pkey, return 1; } -/** +/* * get_send_routine - choose an egress routine * * Choose an egress routine based on QP type diff --git a/drivers/infiniband/hw/hns/hns_roce_common.h b/drivers/infiniband/hw/hns/hns_roce_common.h index 5afee04fb02c..23c438cef40d 100644 --- a/drivers/infiniband/hw/hns/hns_roce_common.h +++ b/drivers/infiniband/hw/hns/hns_roce_common.h @@ -32,6 +32,7 @@ #ifndef _HNS_ROCE_COMMON_H #define _HNS_ROCE_COMMON_H +#include #define roce_write(dev, reg, val) writel((val), (dev)->reg_base + (reg)) #define roce_read(dev, reg) readl((dev)->reg_base + (reg)) @@ -65,6 +66,27 @@ #define hr_reg_enable(ptr, field) _hr_reg_enable(ptr, field) +#define _hr_reg_clear(ptr, field_type, field_h, field_l) \ + ({ \ + const field_type *_ptr = ptr; \ + *((__le32 *)_ptr + (field_h) / 32) &= \ + cpu_to_le32( \ + ~GENMASK((field_h) % 32, (field_l) % 32)) + \ + BUILD_BUG_ON_ZERO(((field_h) / 32) != \ + ((field_l) / 32)); \ + }) + +#define hr_reg_clear(ptr, field) _hr_reg_clear(ptr, field) + +#define _hr_reg_write(ptr, field_type, field_h, field_l, val) \ + ({ \ + _hr_reg_clear(ptr, field_type, field_h, field_l); \ + *((__le32 *)ptr + (field_h) / 32) |= cpu_to_le32(FIELD_PREP( \ + GENMASK((field_h) % 32, (field_l) % 32), val)); \ + }) + +#define hr_reg_write(ptr, field, val) _hr_reg_write(ptr, field, val) + #define ROCEE_GLB_CFG_ROCEE_DB_SQ_MODE_S 3 #define ROCEE_GLB_CFG_ROCEE_DB_OTH_MODE_S 4 @@ -342,8 +364,8 @@ #define ROCEE_TX_CMQ_BASEADDR_L_REG 0x07000 #define ROCEE_TX_CMQ_BASEADDR_H_REG 0x07004 #define ROCEE_TX_CMQ_DEPTH_REG 0x07008 -#define ROCEE_TX_CMQ_TAIL_REG 0x07010 -#define ROCEE_TX_CMQ_HEAD_REG 0x07014 +#define ROCEE_TX_CMQ_HEAD_REG 0x07010 +#define ROCEE_TX_CMQ_TAIL_REG 0x07014 #define ROCEE_RX_CMQ_BASEADDR_L_REG 0x07018 #define ROCEE_RX_CMQ_BASEADDR_H_REG 0x0701c diff --git a/drivers/infiniband/hw/hns/hns_roce_cq.c b/drivers/infiniband/hw/hns/hns_roce_cq.c index 8533fc2d8df2..74fc4940b03a 100644 --- a/drivers/infiniband/hw/hns/hns_roce_cq.c +++ b/drivers/infiniband/hw/hns/hns_roce_cq.c @@ -38,11 +38,74 @@ #include "hns_roce_hem.h" #include "hns_roce_common.h" +static u8 get_least_load_bankid_for_cq(struct hns_roce_bank *bank) +{ + u32 least_load = bank[0].inuse; + u8 bankid = 0; + u32 bankcnt; + u8 i; + + for (i = 1; i < HNS_ROCE_CQ_BANK_NUM; i++) { + bankcnt = bank[i].inuse; + if (bankcnt < least_load) { + least_load = bankcnt; + bankid = i; + } + } + + return bankid; +} + +static int alloc_cqn(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq) +{ + struct hns_roce_cq_table *cq_table = &hr_dev->cq_table; + struct hns_roce_bank *bank; + u8 bankid; + int id; + + mutex_lock(&cq_table->bank_mutex); + bankid = get_least_load_bankid_for_cq(cq_table->bank); + bank = &cq_table->bank[bankid]; + + id = ida_alloc_range(&bank->ida, bank->min, bank->max, GFP_KERNEL); + if (id < 0) { + mutex_unlock(&cq_table->bank_mutex); + return id; + } + + /* the lower 2 bits is bankid */ + hr_cq->cqn = (id << CQ_BANKID_SHIFT) | bankid; + bank->inuse++; + mutex_unlock(&cq_table->bank_mutex); + + return 0; +} + +static inline u8 get_cq_bankid(unsigned long cqn) +{ + /* The lower 2 bits of CQN are used to hash to different banks */ + return (u8)(cqn & GENMASK(1, 0)); +} + +static void free_cqn(struct hns_roce_dev *hr_dev, unsigned long cqn) +{ + struct hns_roce_cq_table *cq_table = &hr_dev->cq_table; + struct hns_roce_bank *bank; + + bank = &cq_table->bank[get_cq_bankid(cqn)]; + + ida_free(&bank->ida, cqn >> CQ_BANKID_SHIFT); + + mutex_lock(&cq_table->bank_mutex); + bank->inuse--; + mutex_unlock(&cq_table->bank_mutex); +} + static int alloc_cqc(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq) { + struct hns_roce_cq_table *cq_table = &hr_dev->cq_table; struct ib_device *ibdev = &hr_dev->ib_dev; struct hns_roce_cmd_mailbox *mailbox; - struct hns_roce_cq_table *cq_table; u64 mtts[MTT_MIN_COUNT] = { 0 }; dma_addr_t dma_handle; int ret; @@ -54,13 +117,6 @@ static int alloc_cqc(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq) return -EINVAL; } - cq_table = &hr_dev->cq_table; - ret = hns_roce_bitmap_alloc(&cq_table->bitmap, &hr_cq->cqn); - if (ret) { - ibdev_err(ibdev, "failed to alloc CQ bitmap, ret = %d.\n", ret); - return ret; - } - /* Get CQC memory HEM(Hardware Entry Memory) table */ ret = hns_roce_table_get(hr_dev, &cq_table->table, hr_cq->cqn); if (ret) { @@ -110,7 +166,6 @@ static int alloc_cqc(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq) hns_roce_table_put(hr_dev, &cq_table->table, hr_cq->cqn); err_out: - hns_roce_bitmap_free(&cq_table->bitmap, hr_cq->cqn, BITMAP_NO_RR); return ret; } @@ -138,7 +193,6 @@ static void free_cqc(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq) wait_for_completion(&hr_cq->free); hns_roce_table_put(hr_dev, &cq_table->table, hr_cq->cqn); - hns_roce_bitmap_free(&cq_table->bitmap, hr_cq->cqn, BITMAP_NO_RR); } static int alloc_cq_buf(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq, @@ -152,7 +206,6 @@ static int alloc_cq_buf(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq, buf_attr.region[0].size = hr_cq->cq_depth * hr_cq->cqe_size; buf_attr.region[0].hopnum = hr_dev->caps.cqe_hop_num; buf_attr.region_count = 1; - buf_attr.fixed_page = true; ret = hns_roce_mtr_create(hr_dev, &hr_cq->mtr, &buf_attr, hr_dev->caps.cqe_ba_pg_sz + HNS_HW_PAGE_SHIFT, @@ -298,11 +351,17 @@ int hns_roce_create_cq(struct ib_cq *ib_cq, const struct ib_cq_init_attr *attr, goto err_cq_buf; } + ret = alloc_cqn(hr_dev, hr_cq); + if (ret) { + ibdev_err(ibdev, "failed to alloc CQN, ret = %d.\n", ret); + goto err_cq_db; + } + ret = alloc_cqc(hr_dev, hr_cq); if (ret) { ibdev_err(ibdev, "failed to alloc CQ context, ret = %d.\n", ret); - goto err_cq_db; + goto err_cqn; } /* @@ -326,6 +385,8 @@ int hns_roce_create_cq(struct ib_cq *ib_cq, const struct ib_cq_init_attr *attr, err_cqc: free_cqc(hr_dev, hr_cq); +err_cqn: + free_cqn(hr_dev, hr_cq->cqn); err_cq_db: free_cq_db(hr_dev, hr_cq, udata); err_cq_buf: @@ -341,9 +402,11 @@ int hns_roce_destroy_cq(struct ib_cq *ib_cq, struct ib_udata *udata) if (hr_dev->hw->destroy_cq) hr_dev->hw->destroy_cq(ib_cq, udata); - free_cq_buf(hr_dev, hr_cq); - free_cq_db(hr_dev, hr_cq, udata); free_cqc(hr_dev, hr_cq); + free_cqn(hr_dev, hr_cq->cqn); + free_cq_db(hr_dev, hr_cq, udata); + free_cq_buf(hr_dev, hr_cq); + return 0; } @@ -402,18 +465,33 @@ void hns_roce_cq_event(struct hns_roce_dev *hr_dev, u32 cqn, int event_type) complete(&hr_cq->free); } -int hns_roce_init_cq_table(struct hns_roce_dev *hr_dev) +void hns_roce_init_cq_table(struct hns_roce_dev *hr_dev) { struct hns_roce_cq_table *cq_table = &hr_dev->cq_table; + unsigned int reserved_from_bot; + unsigned int i; + mutex_init(&cq_table->bank_mutex); xa_init(&cq_table->array); - return hns_roce_bitmap_init(&cq_table->bitmap, hr_dev->caps.num_cqs, - hr_dev->caps.num_cqs - 1, - hr_dev->caps.reserved_cqs, 0); + reserved_from_bot = hr_dev->caps.reserved_cqs; + + for (i = 0; i < reserved_from_bot; i++) { + cq_table->bank[get_cq_bankid(i)].inuse++; + cq_table->bank[get_cq_bankid(i)].min++; + } + + for (i = 0; i < HNS_ROCE_CQ_BANK_NUM; i++) { + ida_init(&cq_table->bank[i].ida); + cq_table->bank[i].max = hr_dev->caps.num_cqs / + HNS_ROCE_CQ_BANK_NUM - 1; + } } void hns_roce_cleanup_cq_table(struct hns_roce_dev *hr_dev) { - hns_roce_bitmap_cleanup(&hr_dev->cq_table.bitmap); + int i; + + for (i = 0; i < HNS_ROCE_CQ_BANK_NUM; i++) + ida_destroy(&hr_dev->cq_table.bank[i].ida); } diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h index ad8253245a85..3d6b7a2db496 100644 --- a/drivers/infiniband/hw/hns/hns_roce_device.h +++ b/drivers/infiniband/hw/hns/hns_roce_device.h @@ -54,6 +54,7 @@ /* Hardware specification only for v1 engine */ #define HNS_ROCE_MIN_CQE_NUM 0x40 #define HNS_ROCE_MIN_WQE_NUM 0x20 +#define HNS_ROCE_MIN_SRQ_WQE_NUM 1 /* Hardware specification only for v1 engine */ #define HNS_ROCE_MAX_INNER_MTPT_NUM 0x7 @@ -65,6 +66,8 @@ #define HNS_ROCE_CQE_WCMD_EMPTY_BIT 0x2 #define HNS_ROCE_MIN_CQE_CNT 16 +#define HNS_ROCE_RESERVED_SGE 1 + #define HNS_ROCE_MAX_IRQ_NUM 128 #define HNS_ROCE_SGE_IN_WQE 2 @@ -90,6 +93,7 @@ #define HNS_ROCE_MAX_PORTS 6 #define HNS_ROCE_GID_SIZE 16 #define HNS_ROCE_SGE_SIZE 16 +#define HNS_ROCE_DWQE_SIZE 65536 #define HNS_ROCE_HOP_NUM_0 0xff @@ -119,6 +123,9 @@ #define SRQ_DB_REG 0x230 #define HNS_ROCE_QP_BANK_NUM 8 +#define HNS_ROCE_CQ_BANK_NUM 4 + +#define CQ_BANKID_SHIFT 2 /* The chip implementation of the consumer index is calculated * according to twice the actual EQ depth @@ -163,44 +170,6 @@ enum hns_roce_event { HNS_ROCE_EVENT_TYPE_FLR = 0x15, }; -/* Local Work Queue Catastrophic Error,SUBTYPE 0x5 */ -enum { - HNS_ROCE_LWQCE_QPC_ERROR = 1, - HNS_ROCE_LWQCE_MTU_ERROR = 2, - HNS_ROCE_LWQCE_WQE_BA_ADDR_ERROR = 3, - HNS_ROCE_LWQCE_WQE_ADDR_ERROR = 4, - HNS_ROCE_LWQCE_SQ_WQE_SHIFT_ERROR = 5, - HNS_ROCE_LWQCE_SL_ERROR = 6, - HNS_ROCE_LWQCE_PORT_ERROR = 7, -}; - -/* Local Access Violation Work Queue Error,SUBTYPE 0x7 */ -enum { - HNS_ROCE_LAVWQE_R_KEY_VIOLATION = 1, - HNS_ROCE_LAVWQE_LENGTH_ERROR = 2, - HNS_ROCE_LAVWQE_VA_ERROR = 3, - HNS_ROCE_LAVWQE_PD_ERROR = 4, - HNS_ROCE_LAVWQE_RW_ACC_ERROR = 5, - HNS_ROCE_LAVWQE_KEY_STATE_ERROR = 6, - HNS_ROCE_LAVWQE_MR_OPERATION_ERROR = 7, -}; - -/* DOORBELL overflow subtype */ -enum { - HNS_ROCE_DB_SUBTYPE_SDB_OVF = 1, - HNS_ROCE_DB_SUBTYPE_SDB_ALM_OVF = 2, - HNS_ROCE_DB_SUBTYPE_ODB_OVF = 3, - HNS_ROCE_DB_SUBTYPE_ODB_ALM_OVF = 4, - HNS_ROCE_DB_SUBTYPE_SDB_ALM_EMP = 5, - HNS_ROCE_DB_SUBTYPE_ODB_ALM_EMP = 6, -}; - -enum { - /* RQ&SRQ related operations */ - HNS_ROCE_OPCODE_SEND_DATA_RECEIVE = 0x06, - HNS_ROCE_OPCODE_RDMA_WITH_IMM_RECEIVE = 0x07, -}; - #define HNS_ROCE_CAP_FLAGS_EX_SHIFT 12 enum { @@ -253,9 +222,6 @@ enum { #define HNS_ROCE_CMD_SUCCESS 1 -#define HNS_ROCE_PORT_DOWN 0 -#define HNS_ROCE_PORT_UP 1 - /* The minimum page size is 4K for hardware */ #define HNS_HW_PAGE_SHIFT 12 #define HNS_HW_PAGE_SIZE (1 << HNS_HW_PAGE_SHIFT) @@ -332,7 +298,6 @@ struct hns_roce_buf_attr { } region[HNS_ROCE_MAX_BT_REGION]; unsigned int region_count; /* valid region count */ unsigned int page_shift; /* buffer page shift */ - bool fixed_page; /* decide page shift is fixed-size or maximum size */ unsigned int user_access; /* umem access flag */ bool mtt_only; /* only alloc buffer-required MTT memory */ }; @@ -393,6 +358,7 @@ struct hns_roce_wq { spinlock_t lock; u32 wqe_cnt; /* WQE num */ u32 max_gs; + u32 rsv_sge; int offset; int wqe_shift; /* WQE size */ u32 head; @@ -489,6 +455,8 @@ struct hns_roce_idx_que { struct hns_roce_mtr mtr; int entry_shift; unsigned long *bitmap; + u32 head; + u32 tail; }; struct hns_roce_srq { @@ -496,7 +464,9 @@ struct hns_roce_srq { unsigned long srqn; u32 wqe_cnt; int max_gs; + u32 rsv_sge; int wqe_shift; + u32 cqn; void __iomem *db_reg_l; atomic_t refcount; @@ -507,8 +477,6 @@ struct hns_roce_srq { u64 *wrid; struct hns_roce_idx_que idx_que; spinlock_t lock; - u16 head; - u16 tail; struct mutex mutex; void (*event)(struct hns_roce_srq *srq, enum hns_roce_event event); }; @@ -536,9 +504,10 @@ struct hns_roce_qp_table { }; struct hns_roce_cq_table { - struct hns_roce_bitmap bitmap; struct xarray array; struct hns_roce_hem_table table; + struct hns_roce_bank bank[HNS_ROCE_CQ_BANK_NUM]; + struct mutex bank_mutex; }; struct hns_roce_srq_table { @@ -640,6 +609,10 @@ struct hns_roce_work { u32 queue_num; }; +enum { + HNS_ROCE_QP_CAP_DIRECT_WQE = BIT(5), +}; + struct hns_roce_qp { struct ib_qp ibqp; struct hns_roce_wq rq; @@ -647,7 +620,7 @@ struct hns_roce_qp { struct hns_roce_db sdb; unsigned long en_flags; u32 doorbell_qpn; - u32 sq_signal_bits; + enum ib_sig_type sq_signal_bits; struct hns_roce_wq sq; struct hns_roce_mtr mtr; @@ -779,7 +752,7 @@ struct hns_roce_caps { u32 max_cqes; u32 min_cqes; u32 min_wqes; - int reserved_cqs; + u32 reserved_cqs; int reserved_srqs; int num_aeq_vectors; int num_comp_vectors; @@ -911,8 +884,7 @@ struct hns_roce_hw { int (*write_mtpt)(struct hns_roce_dev *hr_dev, void *mb_buf, struct hns_roce_mr *mr, unsigned long mtpt_idx); int (*rereg_write_mtpt)(struct hns_roce_dev *hr_dev, - struct hns_roce_mr *mr, int flags, u32 pdn, - int mr_access_flags, u64 iova, u64 size, + struct hns_roce_mr *mr, int flags, void *mb_buf); int (*frmr_write_mtpt)(struct hns_roce_dev *hr_dev, void *mb_buf, struct hns_roce_mr *mr); @@ -945,11 +917,7 @@ struct hns_roce_hw { int (*modify_cq)(struct ib_cq *cq, u16 cq_count, u16 cq_period); int (*init_eq)(struct hns_roce_dev *hr_dev); void (*cleanup_eq)(struct hns_roce_dev *hr_dev); - void (*write_srqc)(struct hns_roce_dev *hr_dev, - struct hns_roce_srq *srq, u32 pdn, u16 xrcd, u32 cqn, - void *mb_buf, u64 *mtts_wqe, u64 *mtts_idx, - dma_addr_t dma_handle_wqe, - dma_addr_t dma_handle_idx); + int (*write_srqc)(struct hns_roce_srq *srq, void *mb_buf); int (*modify_srq)(struct ib_srq *ibsrq, struct ib_srq_attr *srq_attr, enum ib_srq_attr_mask srq_attr_mask, struct ib_udata *udata); @@ -982,6 +950,7 @@ struct hns_roce_dev { struct mutex pgdir_mutex; int irq[HNS_ROCE_MAX_IRQ_NUM]; u8 __iomem *reg_base; + void __iomem *mem_base; struct hns_roce_caps caps; struct xarray qp_table_xa; @@ -1067,7 +1036,7 @@ static inline struct hns_roce_srq *to_hr_srq(struct ib_srq *ibsrq) static inline void hns_roce_write64_k(__le32 val[2], void __iomem *dest) { - __raw_writeq(*(u64 *) val, dest); + writeq(*(u64 *)val, dest); } static inline struct hns_roce_qp @@ -1164,7 +1133,7 @@ int hns_roce_mtr_map(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr, int hns_roce_init_pd_table(struct hns_roce_dev *hr_dev); int hns_roce_init_mr_table(struct hns_roce_dev *hr_dev); -int hns_roce_init_cq_table(struct hns_roce_dev *hr_dev); +void hns_roce_init_cq_table(struct hns_roce_dev *hr_dev); int hns_roce_init_qp_table(struct hns_roce_dev *hr_dev); int hns_roce_init_srq_table(struct hns_roce_dev *hr_dev); @@ -1281,7 +1250,6 @@ u8 hns_get_gid_index(struct hns_roce_dev *hr_dev, u8 port, int gid_index); void hns_roce_handle_device_err(struct hns_roce_dev *hr_dev); int hns_roce_init(struct hns_roce_dev *hr_dev); void hns_roce_exit(struct hns_roce_dev *hr_dev); - int hns_roce_fill_res_cq_entry(struct sk_buff *msg, struct ib_cq *ib_cq); #endif /* _HNS_ROCE_DEVICE_H */ diff --git a/drivers/infiniband/hw/hns/hns_roce_hem.c b/drivers/infiniband/hw/hns/hns_roce_hem.c index edc9d6b98d95..cfd2e1b60c7f 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hem.c +++ b/drivers/infiniband/hw/hns/hns_roce_hem.c @@ -1075,9 +1075,8 @@ static struct roce_hem_item *hem_list_alloc_item(struct hns_roce_dev *hr_dev, return NULL; if (exist_bt) { - hem->addr = dma_alloc_coherent(hr_dev->dev, - count * BA_BYTE_LEN, - &hem->dma_addr, GFP_KERNEL); + hem->addr = dma_alloc_coherent(hr_dev->dev, count * BA_BYTE_LEN, + &hem->dma_addr, GFP_KERNEL); if (!hem->addr) { kfree(hem); return NULL; @@ -1336,6 +1335,10 @@ static int hem_list_alloc_root_bt(struct hns_roce_dev *hr_dev, if (ba_num < 1) return -ENOMEM; + if (ba_num > unit) + return -ENOBUFS; + + ba_num = min_t(int, ba_num, unit); INIT_LIST_HEAD(&temp_root); offset = r->offset; /* indicate to last region */ diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c index f68585ff8e8a..5346fdca9473 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c @@ -43,6 +43,22 @@ #include "hns_roce_hem.h" #include "hns_roce_hw_v1.h" +/** + * hns_get_gid_index - Get gid index. + * @hr_dev: pointer to structure hns_roce_dev. + * @port: port, value range: 0 ~ MAX + * @gid_index: gid_index, value range: 0 ~ MAX + * Description: + * N ports shared gids, allocation method as follow: + * GID[0][0], GID[1][0],.....GID[N - 1][0], + * GID[0][0], GID[1][0],.....GID[N - 1][0], + * And so on + */ +u8 hns_get_gid_index(struct hns_roce_dev *hr_dev, u8 port, int gid_index) +{ + return gid_index * hr_dev->caps.num_ports + port; +} + static void set_data_seg(struct hns_roce_wqe_data_seg *dseg, struct ib_sge *sg) { dseg->lkey = cpu_to_le32(sg->lkey); @@ -314,8 +330,6 @@ static int hns_roce_v1_post_send(struct ib_qp *ibqp, /* Set DB return */ if (likely(nreq)) { qp->sq.head += nreq; - /* Memory barrier */ - wmb(); roce_set_field(sq_db.u32_4, SQ_DOORBELL_U32_4_SQ_HEAD_M, SQ_DOORBELL_U32_4_SQ_HEAD_S, @@ -395,8 +409,6 @@ static int hns_roce_v1_post_recv(struct ib_qp *ibqp, out: if (likely(nreq)) { hr_qp->rq.head += nreq; - /* Memory barrier */ - wmb(); if (ibqp->qp_type == IB_QPT_GSI) { __le32 tmp; @@ -1391,7 +1403,7 @@ static void hns_roce_free_mr_free(struct hns_roce_dev *hr_dev) /** * hns_roce_v1_reset - reset RoCE * @hr_dev: RoCE device struct pointer - * @enable: true -- drop reset, false -- reset + * @dereset: true -- drop reset, false -- reset * return 0 - success , negative --fail */ static int hns_roce_v1_reset(struct hns_roce_dev *hr_dev, bool dereset) @@ -1968,12 +1980,6 @@ static void __hns_roce_v1_cq_clean(struct hns_roce_cq *hr_cq, u32 qpn, if (nfreed) { hr_cq->cons_index += nfreed; - /* - * Make sure update of buffer contents is done before - * updating consumer index. - */ - wmb(); - hns_roce_v1_cq_set_ci(hr_cq, hr_cq->cons_index); } } @@ -2314,8 +2320,6 @@ int hns_roce_v1_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc) *hr_cq->tptr_addr = hr_cq->cons_index & ((hr_cq->cq_depth << 1) - 1); - /* Memroy barrier */ - wmb(); hns_roce_v1_cq_set_ci(hr_cq, hr_cq->cons_index); } @@ -3204,9 +3208,6 @@ static int hns_roce_v1_m_qp(struct ib_qp *ibqp, const struct ib_qp_attr *attr, * need to hw to flash RQ HEAD by DB again */ if (cur_state == IB_QPS_INIT && new_state == IB_QPS_INIT) { - /* Memory barrier */ - wmb(); - roce_set_field(doorbell[0], RQ_DOORBELL_U32_4_RQ_HEAD_M, RQ_DOORBELL_U32_4_RQ_HEAD_S, hr_qp->rq.head); roce_set_field(doorbell[1], RQ_DOORBELL_U32_8_QPN_M, diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v1.h b/drivers/infiniband/hw/hns/hns_roce_hw_v1.h index 46ab0a321d21..84383236e47d 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v1.h +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v1.h @@ -193,6 +193,49 @@ #define HNS_ROCE_AEQE_EVENT_CE_EVENT_CEQE_CEQN_S 0 #define HNS_ROCE_AEQE_EVENT_CE_EVENT_CEQE_CEQN_M GENMASK(4, 0) +/* Local Work Queue Catastrophic Error,SUBTYPE 0x5 */ +enum { + HNS_ROCE_LWQCE_QPC_ERROR = 1, + HNS_ROCE_LWQCE_MTU_ERROR, + HNS_ROCE_LWQCE_WQE_BA_ADDR_ERROR, + HNS_ROCE_LWQCE_WQE_ADDR_ERROR, + HNS_ROCE_LWQCE_SQ_WQE_SHIFT_ERROR, + HNS_ROCE_LWQCE_SL_ERROR, + HNS_ROCE_LWQCE_PORT_ERROR, +}; + +/* Local Access Violation Work Queue Error,SUBTYPE 0x7 */ +enum { + HNS_ROCE_LAVWQE_R_KEY_VIOLATION = 1, + HNS_ROCE_LAVWQE_LENGTH_ERROR, + HNS_ROCE_LAVWQE_VA_ERROR, + HNS_ROCE_LAVWQE_PD_ERROR, + HNS_ROCE_LAVWQE_RW_ACC_ERROR, + HNS_ROCE_LAVWQE_KEY_STATE_ERROR, + HNS_ROCE_LAVWQE_MR_OPERATION_ERROR, +}; + +/* DOORBELL overflow subtype */ +enum { + HNS_ROCE_DB_SUBTYPE_SDB_OVF = 1, + HNS_ROCE_DB_SUBTYPE_SDB_ALM_OVF, + HNS_ROCE_DB_SUBTYPE_ODB_OVF, + HNS_ROCE_DB_SUBTYPE_ODB_ALM_OVF, + HNS_ROCE_DB_SUBTYPE_SDB_ALM_EMP, + HNS_ROCE_DB_SUBTYPE_ODB_ALM_EMP, +}; + +enum { + /* RQ&SRQ related operations */ + HNS_ROCE_OPCODE_SEND_DATA_RECEIVE = 0x06, + HNS_ROCE_OPCODE_RDMA_WITH_IMM_RECEIVE, +}; + +enum { + HNS_ROCE_PORT_DOWN = 0, + HNS_ROCE_PORT_UP, +}; + struct hns_roce_cq_context { __le32 cqc_byte_4; __le32 cq_bt_l; diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index 833e1f259936..c3934abeb260 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -48,8 +48,8 @@ #include "hns_roce_hem.h" #include "hns_roce_hw_v2.h" -static void set_data_seg_v2(struct hns_roce_v2_wqe_data_seg *dseg, - struct ib_sge *sg) +static inline void set_data_seg_v2(struct hns_roce_v2_wqe_data_seg *dseg, + struct ib_sge *sg) { dseg->lkey = cpu_to_le32(sg->lkey); dseg->addr = cpu_to_le64(sg->addr); @@ -99,16 +99,16 @@ static void set_frmr_seg(struct hns_roce_v2_rc_send_wqe *rc_sq_wqe, u64 pbl_ba; /* use ib_access_flags */ - roce_set_bit(rc_sq_wqe->byte_4, V2_RC_FRMR_WQE_BYTE_4_BIND_EN_S, - wr->access & IB_ACCESS_MW_BIND ? 1 : 0); - roce_set_bit(rc_sq_wqe->byte_4, V2_RC_FRMR_WQE_BYTE_4_ATOMIC_S, - wr->access & IB_ACCESS_REMOTE_ATOMIC ? 1 : 0); - roce_set_bit(rc_sq_wqe->byte_4, V2_RC_FRMR_WQE_BYTE_4_RR_S, - wr->access & IB_ACCESS_REMOTE_READ ? 1 : 0); - roce_set_bit(rc_sq_wqe->byte_4, V2_RC_FRMR_WQE_BYTE_4_RW_S, - wr->access & IB_ACCESS_REMOTE_WRITE ? 1 : 0); - roce_set_bit(rc_sq_wqe->byte_4, V2_RC_FRMR_WQE_BYTE_4_LW_S, - wr->access & IB_ACCESS_LOCAL_WRITE ? 1 : 0); + roce_set_bit(fseg->byte_40, V2_RC_FRMR_WQE_BYTE_40_BIND_EN_S, + !!(wr->access & IB_ACCESS_MW_BIND)); + roce_set_bit(fseg->byte_40, V2_RC_FRMR_WQE_BYTE_40_ATOMIC_S, + !!(wr->access & IB_ACCESS_REMOTE_ATOMIC)); + roce_set_bit(fseg->byte_40, V2_RC_FRMR_WQE_BYTE_40_RR_S, + !!(wr->access & IB_ACCESS_REMOTE_READ)); + roce_set_bit(fseg->byte_40, V2_RC_FRMR_WQE_BYTE_40_RW_S, + !!(wr->access & IB_ACCESS_REMOTE_WRITE)); + roce_set_bit(fseg->byte_40, V2_RC_FRMR_WQE_BYTE_40_LW_S, + !!(wr->access & IB_ACCESS_LOCAL_WRITE)); /* Data structure reuse may lead to confusion */ pbl_ba = mr->pbl_mtr.hem_cfg.root_ba; @@ -121,12 +121,10 @@ static void set_frmr_seg(struct hns_roce_v2_rc_send_wqe *rc_sq_wqe, rc_sq_wqe->va = cpu_to_le64(wr->mr->iova); fseg->pbl_size = cpu_to_le32(mr->npages); - roce_set_field(fseg->mode_buf_pg_sz, - V2_RC_FRMR_WQE_BYTE_40_PBL_BUF_PG_SZ_M, + roce_set_field(fseg->byte_40, V2_RC_FRMR_WQE_BYTE_40_PBL_BUF_PG_SZ_M, V2_RC_FRMR_WQE_BYTE_40_PBL_BUF_PG_SZ_S, to_hr_hw_page_shift(mr->pbl_mtr.hem_cfg.buf_pg_shift)); - roce_set_bit(fseg->mode_buf_pg_sz, - V2_RC_FRMR_WQE_BYTE_40_BLK_MODE_S, 0); + roce_set_bit(fseg->byte_40, V2_RC_FRMR_WQE_BYTE_40_BLK_MODE_S, 0); } static void set_atomic_seg(const struct ib_send_wr *wr, @@ -361,7 +359,7 @@ static int check_send_valid(struct hns_roce_dev *hr_dev, } else if (unlikely(hr_qp->state == IB_QPS_RESET || hr_qp->state == IB_QPS_INIT || hr_qp->state == IB_QPS_RTR)) { - ibdev_err(ibdev, "failed to post WQE, QP state %hhu!\n", + ibdev_err(ibdev, "failed to post WQE, QP state %u!\n", hr_qp->state); return -EINVAL; } else if (unlikely(hr_dev->state >= HNS_ROCE_DEVICE_STATE_RST_DOWN)) { @@ -469,7 +467,6 @@ static inline int set_ud_wqe(struct hns_roce_qp *qp, int ret; valid_num_sge = calc_wr_sge_num(wr, &msg_len); - memset(ud_sq_wqe, 0, sizeof(*ud_sq_wqe)); ret = set_ud_opcode(ud_sq_wqe, wr); if (WARN_ON(ret)) @@ -503,6 +500,8 @@ static inline int set_ud_wqe(struct hns_roce_qp *qp, if (ret) return ret; + qp->sl = to_hr_ah(ud_wr(wr)->ah)->av.sl; + set_extend_sge(qp, wr->sg_list, &curr_idx, valid_num_sge); /* @@ -521,10 +520,12 @@ static inline int set_ud_wqe(struct hns_roce_qp *qp, return 0; } -static int set_rc_opcode(struct hns_roce_v2_rc_send_wqe *rc_sq_wqe, +static int set_rc_opcode(struct hns_roce_dev *hr_dev, + struct hns_roce_v2_rc_send_wqe *rc_sq_wqe, const struct ib_send_wr *wr) { u32 ib_op = wr->opcode; + int ret = 0; rc_sq_wqe->immtdata = get_immtdata(wr); @@ -544,7 +545,10 @@ static int set_rc_opcode(struct hns_roce_v2_rc_send_wqe *rc_sq_wqe, rc_sq_wqe->va = cpu_to_le64(atomic_wr(wr)->remote_addr); break; case IB_WR_REG_MR: - set_frmr_seg(rc_sq_wqe, reg_wr(wr)); + if (hr_dev->pci_dev->revision >= PCI_REVISION_ID_HIP09) + set_frmr_seg(rc_sq_wqe, reg_wr(wr)); + else + ret = -EOPNOTSUPP; break; case IB_WR_LOCAL_INV: roce_set_bit(rc_sq_wqe->byte_4, V2_RC_SEND_WQE_BYTE_4_SO_S, 1); @@ -553,19 +557,23 @@ static int set_rc_opcode(struct hns_roce_v2_rc_send_wqe *rc_sq_wqe, rc_sq_wqe->inv_key = cpu_to_le32(wr->ex.invalidate_rkey); break; default: - return -EINVAL; + ret = -EINVAL; } + if (unlikely(ret)) + return ret; + roce_set_field(rc_sq_wqe->byte_4, V2_RC_SEND_WQE_BYTE_4_OPCODE_M, V2_RC_SEND_WQE_BYTE_4_OPCODE_S, to_hr_opcode(ib_op)); - return 0; + return ret; } static inline int set_rc_wqe(struct hns_roce_qp *qp, const struct ib_send_wr *wr, void *wqe, unsigned int *sge_idx, unsigned int owner_bit) { + struct hns_roce_dev *hr_dev = to_hr_dev(qp->ibqp.device); struct hns_roce_v2_rc_send_wqe *rc_sq_wqe = wqe; unsigned int curr_idx = *sge_idx; unsigned int valid_num_sge; @@ -573,11 +581,10 @@ static inline int set_rc_wqe(struct hns_roce_qp *qp, int ret; valid_num_sge = calc_wr_sge_num(wr, &msg_len); - memset(rc_sq_wqe, 0, sizeof(*rc_sq_wqe)); rc_sq_wqe->msg_len = cpu_to_le32(msg_len); - ret = set_rc_opcode(rc_sq_wqe, wr); + ret = set_rc_opcode(hr_dev, rc_sq_wqe, wr); if (WARN_ON(ret)) return ret; @@ -635,6 +642,8 @@ static inline void update_sq_db(struct hns_roce_dev *hr_dev, V2_DB_BYTE_4_TAG_S, qp->doorbell_qpn); roce_set_field(sq_db.byte_4, V2_DB_BYTE_4_CMD_M, V2_DB_BYTE_4_CMD_S, HNS_ROCE_V2_SQ_DB); + /* indicates data on new BAR, 0 : SQ doorbell, 1 : DWQE */ + roce_set_bit(sq_db.byte_4, V2_DB_FLAG_S, 0); roce_set_field(sq_db.parameter, V2_DB_PARAMETER_IDX_M, V2_DB_PARAMETER_IDX_S, qp->sq.head); roce_set_field(sq_db.parameter, V2_DB_PARAMETER_SL_M, @@ -644,6 +653,38 @@ static inline void update_sq_db(struct hns_roce_dev *hr_dev, } } +static void hns_roce_write512(struct hns_roce_dev *hr_dev, u64 *val, + u64 __iomem *dest) +{ +#define HNS_ROCE_WRITE_TIMES 8 + struct hns_roce_v2_priv *priv = (struct hns_roce_v2_priv *)hr_dev->priv; + struct hnae3_handle *handle = priv->handle; + const struct hnae3_ae_ops *ops = handle->ae_algo->ops; + int i; + + if (!hr_dev->dis_db && !ops->get_hw_reset_stat(handle)) + for (i = 0; i < HNS_ROCE_WRITE_TIMES; i++) + writeq_relaxed(*(val + i), dest + i); +} + +static void write_dwqe(struct hns_roce_dev *hr_dev, struct hns_roce_qp *qp, + void *wqe) +{ + struct hns_roce_v2_rc_send_wqe *rc_sq_wqe = wqe; + + /* All kinds of DirectWQE have the same header field layout */ + roce_set_bit(rc_sq_wqe->byte_4, V2_RC_SEND_WQE_BYTE_4_FLAG_S, 1); + roce_set_field(rc_sq_wqe->byte_4, V2_RC_SEND_WQE_BYTE_4_DB_SL_L_M, + V2_RC_SEND_WQE_BYTE_4_DB_SL_L_S, qp->sl); + roce_set_field(rc_sq_wqe->byte_4, V2_RC_SEND_WQE_BYTE_4_DB_SL_H_M, + V2_RC_SEND_WQE_BYTE_4_DB_SL_H_S, qp->sl >> 2); + roce_set_field(rc_sq_wqe->byte_4, V2_RC_SEND_WQE_BYTE_4_WQE_INDEX_M, + V2_RC_SEND_WQE_BYTE_4_WQE_INDEX_S, qp->sq.head); + + hns_roce_write512(hr_dev, wqe, hr_dev->mem_base + + HNS_ROCE_DWQE_SIZE * qp->ibqp.qp_num); +} + static int hns_roce_v2_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr, const struct ib_send_wr **bad_wr) @@ -708,9 +749,12 @@ static int hns_roce_v2_post_send(struct ib_qp *ibqp, if (likely(nreq)) { qp->sq.head += nreq; qp->next_sge = sge_idx; - /* Memory barrier */ - wmb(); - update_sq_db(hr_dev, qp); + + if (nreq == 1 && qp->sq.head == qp->sq.tail + 1 && + (qp->en_flags & HNS_ROCE_QP_CAP_DIRECT_WQE)) + write_dwqe(hr_dev, qp, wqe); + else + update_sq_db(hr_dev, qp); } spin_unlock_irqrestore(&qp->sq.lock, flags); @@ -721,14 +765,74 @@ static int hns_roce_v2_post_send(struct ib_qp *ibqp, static int check_recv_valid(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp) { + struct ib_device *ibdev = &hr_dev->ib_dev; + struct ib_qp *ibqp = &hr_qp->ibqp; + + if (unlikely(ibqp->qp_type != IB_QPT_RC && + ibqp->qp_type != IB_QPT_GSI && + ibqp->qp_type != IB_QPT_UD)) { + ibdev_err(ibdev, "unsupported qp type, qp_type = %d.\n", + ibqp->qp_type); + return -EOPNOTSUPP; + } + if (unlikely(hr_dev->state >= HNS_ROCE_DEVICE_STATE_RST_DOWN)) return -EIO; - else if (hr_qp->state == IB_QPS_RESET) + + if (hr_qp->state == IB_QPS_RESET) return -EINVAL; return 0; } +static void fill_recv_sge_to_wqe(const struct ib_recv_wr *wr, void *wqe, + u32 max_sge, bool rsv) +{ + struct hns_roce_v2_wqe_data_seg *dseg = wqe; + u32 i, cnt; + + for (i = 0, cnt = 0; i < wr->num_sge; i++) { + /* Skip zero-length sge */ + if (!wr->sg_list[i].length) + continue; + set_data_seg_v2(dseg + cnt, wr->sg_list + i); + cnt++; + } + + /* Fill a reserved sge to make hw stop reading remaining segments */ + if (rsv) { + dseg[cnt].lkey = cpu_to_le32(HNS_ROCE_INVALID_LKEY); + dseg[cnt].addr = 0; + dseg[cnt].len = cpu_to_le32(HNS_ROCE_INVALID_SGE_LENGTH); + } else { + /* Clear remaining segments to make ROCEE ignore sges */ + if (cnt < max_sge) + memset(dseg + cnt, 0, + (max_sge - cnt) * HNS_ROCE_SGE_SIZE); + } +} + +static void fill_rq_wqe(struct hns_roce_qp *hr_qp, const struct ib_recv_wr *wr, + u32 wqe_idx, u32 max_sge) +{ + struct hns_roce_rinl_sge *sge_list; + void *wqe = NULL; + u32 i; + + wqe = hns_roce_get_recv_wqe(hr_qp, wqe_idx); + fill_recv_sge_to_wqe(wr, wqe, max_sge, hr_qp->rq.rsv_sge); + + /* rq support inline data */ + if (hr_qp->rq_inl_buf.wqe_cnt) { + sge_list = hr_qp->rq_inl_buf.wqe_list[wqe_idx].sg_list; + hr_qp->rq_inl_buf.wqe_list[wqe_idx].sge_cnt = (u32)wr->num_sge; + for (i = 0; i < wr->num_sge; i++) { + sge_list[i].addr = (void *)(u64)wr->sg_list[i].addr; + sge_list[i].len = wr->sg_list[i].length; + } + } +} + static int hns_roce_v2_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr, const struct ib_recv_wr **bad_wr) @@ -736,14 +840,9 @@ static int hns_roce_v2_post_recv(struct ib_qp *ibqp, struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device); struct hns_roce_qp *hr_qp = to_hr_qp(ibqp); struct ib_device *ibdev = &hr_dev->ib_dev; - struct hns_roce_v2_wqe_data_seg *dseg; - struct hns_roce_rinl_sge *sge_list; + u32 wqe_idx, nreq, max_sge; unsigned long flags; - void *wqe = NULL; - u32 wqe_idx; - int nreq; int ret; - int i; spin_lock_irqsave(&hr_qp->rq.lock, flags); @@ -754,6 +853,7 @@ static int hns_roce_v2_post_recv(struct ib_qp *ibqp, goto out; } + max_sge = hr_qp->rq.max_gs - hr_qp->rq.rsv_sge; for (nreq = 0; wr; ++nreq, wr = wr->next) { if (unlikely(hns_roce_wq_overflow(&hr_qp->rq, nreq, hr_qp->ibqp.recv_cq))) { @@ -762,50 +862,22 @@ static int hns_roce_v2_post_recv(struct ib_qp *ibqp, goto out; } - wqe_idx = (hr_qp->rq.head + nreq) & (hr_qp->rq.wqe_cnt - 1); - - if (unlikely(wr->num_sge > hr_qp->rq.max_gs)) { + if (unlikely(wr->num_sge > max_sge)) { ibdev_err(ibdev, "num_sge = %d >= max_sge = %u.\n", - wr->num_sge, hr_qp->rq.max_gs); + wr->num_sge, max_sge); ret = -EINVAL; *bad_wr = wr; goto out; } - wqe = hns_roce_get_recv_wqe(hr_qp, wqe_idx); - dseg = (struct hns_roce_v2_wqe_data_seg *)wqe; - for (i = 0; i < wr->num_sge; i++) { - if (!wr->sg_list[i].length) - continue; - set_data_seg_v2(dseg, wr->sg_list + i); - dseg++; - } - - if (wr->num_sge < hr_qp->rq.max_gs) { - dseg->lkey = cpu_to_le32(HNS_ROCE_INVALID_LKEY); - dseg->addr = 0; - } - - /* rq support inline data */ - if (hr_qp->rq_inl_buf.wqe_cnt) { - sge_list = hr_qp->rq_inl_buf.wqe_list[wqe_idx].sg_list; - hr_qp->rq_inl_buf.wqe_list[wqe_idx].sge_cnt = - (u32)wr->num_sge; - for (i = 0; i < wr->num_sge; i++) { - sge_list[i].addr = - (void *)(u64)wr->sg_list[i].addr; - sge_list[i].len = wr->sg_list[i].length; - } - } - + wqe_idx = (hr_qp->rq.head + nreq) & (hr_qp->rq.wqe_cnt - 1); + fill_rq_wqe(hr_qp, wr, wqe_idx, max_sge); hr_qp->rq.wrid[wqe_idx] = wr->wr_id; } out: if (likely(nreq)) { hr_qp->rq.head += nreq; - /* Memory barrier */ - wmb(); /* * Hip08 hardware cannot flush the WQEs in RQ if the QP state @@ -829,41 +901,82 @@ static int hns_roce_v2_post_recv(struct ib_qp *ibqp, return ret; } -static void *get_srq_wqe(struct hns_roce_srq *srq, int n) +static void *get_srq_wqe_buf(struct hns_roce_srq *srq, u32 n) { return hns_roce_buf_offset(srq->buf_mtr.kmem, n << srq->wqe_shift); } -static void *get_idx_buf(struct hns_roce_idx_que *idx_que, unsigned int n) +static void *get_idx_buf(struct hns_roce_idx_que *idx_que, u32 n) { return hns_roce_buf_offset(idx_que->mtr.kmem, n << idx_que->entry_shift); } -static void hns_roce_free_srq_wqe(struct hns_roce_srq *srq, int wqe_index) +static void hns_roce_free_srq_wqe(struct hns_roce_srq *srq, u32 wqe_index) { /* always called with interrupts disabled. */ spin_lock(&srq->lock); bitmap_clear(srq->idx_que.bitmap, wqe_index, 1); - srq->tail++; + srq->idx_que.tail++; spin_unlock(&srq->lock); } -static int find_empty_entry(struct hns_roce_idx_que *idx_que, - unsigned long size) +static int hns_roce_srqwq_overflow(struct hns_roce_srq *srq) { - int wqe_idx; + struct hns_roce_idx_que *idx_que = &srq->idx_que; - if (unlikely(bitmap_full(idx_que->bitmap, size))) + return idx_que->head - idx_que->tail >= srq->wqe_cnt; +} + +static int check_post_srq_valid(struct hns_roce_srq *srq, u32 max_sge, + const struct ib_recv_wr *wr) +{ + struct ib_device *ib_dev = srq->ibsrq.device; + + if (unlikely(wr->num_sge > max_sge)) { + ibdev_err(ib_dev, + "failed to check sge, wr->num_sge = %d, max_sge = %u.\n", + wr->num_sge, max_sge); + return -EINVAL; + } + + if (unlikely(hns_roce_srqwq_overflow(srq))) { + ibdev_err(ib_dev, + "failed to check srqwq status, srqwq is full.\n"); + return -ENOMEM; + } + + return 0; +} + +static int get_srq_wqe_idx(struct hns_roce_srq *srq, u32 *wqe_idx) +{ + struct hns_roce_idx_que *idx_que = &srq->idx_que; + u32 pos; + + pos = find_first_zero_bit(idx_que->bitmap, srq->wqe_cnt); + if (unlikely(pos == srq->wqe_cnt)) return -ENOSPC; - wqe_idx = find_first_zero_bit(idx_que->bitmap, size); + bitmap_set(idx_que->bitmap, pos, 1); + *wqe_idx = pos; + return 0; +} - bitmap_set(idx_que->bitmap, wqe_idx, 1); +static void fill_wqe_idx(struct hns_roce_srq *srq, unsigned int wqe_idx) +{ + struct hns_roce_idx_que *idx_que = &srq->idx_que; + unsigned int head; + __le32 *buf; - return wqe_idx; + head = idx_que->head & (srq->wqe_cnt - 1); + + buf = get_idx_buf(idx_que, head); + *buf = cpu_to_le32(wqe_idx); + + idx_que->head++; } static int hns_roce_v2_post_srq_recv(struct ib_srq *ibsrq, @@ -872,77 +985,42 @@ static int hns_roce_v2_post_srq_recv(struct ib_srq *ibsrq, { struct hns_roce_dev *hr_dev = to_hr_dev(ibsrq->device); struct hns_roce_srq *srq = to_hr_srq(ibsrq); - struct hns_roce_v2_wqe_data_seg *dseg; struct hns_roce_v2_db srq_db; unsigned long flags; - unsigned int ind; - __le32 *srq_idx; int ret = 0; - int wqe_idx; + u32 max_sge; + u32 wqe_idx; void *wqe; - int nreq; - int i; + u32 nreq; spin_lock_irqsave(&srq->lock, flags); - ind = srq->head & (srq->wqe_cnt - 1); - + max_sge = srq->max_gs - srq->rsv_sge; for (nreq = 0; wr; ++nreq, wr = wr->next) { - if (unlikely(wr->num_sge >= srq->max_gs)) { - ret = -EINVAL; + ret = check_post_srq_valid(srq, max_sge, wr); + if (ret) { *bad_wr = wr; break; } - if (unlikely(srq->head == srq->tail)) { - ret = -ENOMEM; + ret = get_srq_wqe_idx(srq, &wqe_idx); + if (unlikely(ret)) { *bad_wr = wr; break; } - wqe_idx = find_empty_entry(&srq->idx_que, srq->wqe_cnt); - if (unlikely(wqe_idx < 0)) { - ret = -ENOMEM; - *bad_wr = wr; - break; - } - - wqe = get_srq_wqe(srq, wqe_idx); - dseg = (struct hns_roce_v2_wqe_data_seg *)wqe; - - for (i = 0; i < wr->num_sge; ++i) { - dseg[i].len = cpu_to_le32(wr->sg_list[i].length); - dseg[i].lkey = cpu_to_le32(wr->sg_list[i].lkey); - dseg[i].addr = cpu_to_le64(wr->sg_list[i].addr); - } - - if (wr->num_sge < srq->max_gs) { - dseg[i].len = 0; - dseg[i].lkey = cpu_to_le32(0x100); - dseg[i].addr = 0; - } - - srq_idx = get_idx_buf(&srq->idx_que, ind); - *srq_idx = cpu_to_le32(wqe_idx); - + wqe = get_srq_wqe_buf(srq, wqe_idx); + fill_recv_sge_to_wqe(wr, wqe, max_sge, srq->rsv_sge); + fill_wqe_idx(srq, wqe_idx); srq->wrid[wqe_idx] = wr->wr_id; - ind = (ind + 1) & (srq->wqe_cnt - 1); } if (likely(nreq)) { - srq->head += nreq; - - /* - * Make sure that descriptors are written before - * doorbell record. - */ - wmb(); - srq_db.byte_4 = cpu_to_le32(HNS_ROCE_V2_SRQ_DB << V2_DB_BYTE_4_CMD_S | (srq->srqn & V2_DB_BYTE_4_TAG_M)); srq_db.parameter = - cpu_to_le32(srq->head & V2_DB_PARAMETER_IDX_M); + cpu_to_le32(srq->idx_que.head & V2_DB_PARAMETER_IDX_M); hns_roce_write64(hr_dev, (__le32 *)&srq_db, srq->db_reg_l); } @@ -1059,15 +1137,6 @@ static int hns_roce_v2_rst_process_cmd(struct hns_roce_dev *hr_dev) return 0; } -static int hns_roce_cmq_space(struct hns_roce_v2_cmq_ring *ring) -{ - int ntu = ring->next_to_use; - int ntc = ring->next_to_clean; - int used = (ntu - ntc + ring->desc_num) % ring->desc_num; - - return ring->desc_num - used - 1; -} - static int hns_roce_alloc_cmq_desc(struct hns_roce_dev *hr_dev, struct hns_roce_v2_cmq_ring *ring) { @@ -1107,8 +1176,7 @@ static int hns_roce_init_cmq_ring(struct hns_roce_dev *hr_dev, bool ring_type) &priv->cmq.csq : &priv->cmq.crq; ring->flag = ring_type; - ring->next_to_clean = 0; - ring->next_to_use = 0; + ring->head = 0; return hns_roce_alloc_cmq_desc(hr_dev, ring); } @@ -1207,34 +1275,10 @@ static void hns_roce_cmq_setup_basic_desc(struct hns_roce_cmq_desc *desc, static int hns_roce_cmq_csq_done(struct hns_roce_dev *hr_dev) { - u32 head = roce_read(hr_dev, ROCEE_TX_CMQ_HEAD_REG); + u32 tail = roce_read(hr_dev, ROCEE_TX_CMQ_TAIL_REG); struct hns_roce_v2_priv *priv = hr_dev->priv; - return head == priv->cmq.csq.next_to_use; -} - -static int hns_roce_cmq_csq_clean(struct hns_roce_dev *hr_dev) -{ - struct hns_roce_v2_priv *priv = hr_dev->priv; - struct hns_roce_v2_cmq_ring *csq = &priv->cmq.csq; - struct hns_roce_cmq_desc *desc; - u16 ntc = csq->next_to_clean; - u32 head; - int clean = 0; - - desc = &csq->desc[ntc]; - head = roce_read(hr_dev, ROCEE_TX_CMQ_HEAD_REG); - while (head != ntc) { - memset(desc, 0, sizeof(*desc)); - ntc++; - if (ntc == csq->desc_num) - ntc = 0; - desc = &csq->desc[ntc]; - clean++; - } - csq->next_to_clean = ntc; - - return clean; + return tail == priv->cmq.csq.head; } static int __hns_roce_cmq_send(struct hns_roce_dev *hr_dev, @@ -1242,42 +1286,26 @@ static int __hns_roce_cmq_send(struct hns_roce_dev *hr_dev, { struct hns_roce_v2_priv *priv = hr_dev->priv; struct hns_roce_v2_cmq_ring *csq = &priv->cmq.csq; - struct hns_roce_cmq_desc *desc_to_use; - bool complete = false; u32 timeout = 0; - int handle = 0; u16 desc_ret; - int ret = 0; - int ntc; + u32 tail; + int ret; + int i; spin_lock_bh(&csq->lock); - if (num > hns_roce_cmq_space(csq)) { - spin_unlock_bh(&csq->lock); - return -EBUSY; - } + tail = csq->head; - /* - * Record the location of desc in the cmq for this time - * which will be use for hardware to write back - */ - ntc = csq->next_to_use; - - while (handle < num) { - desc_to_use = &csq->desc[csq->next_to_use]; - *desc_to_use = desc[handle]; - dev_dbg(hr_dev->dev, "set cmq desc:\n"); - csq->next_to_use++; - if (csq->next_to_use == csq->desc_num) - csq->next_to_use = 0; - handle++; + for (i = 0; i < num; i++) { + csq->desc[csq->head++] = desc[i]; + if (csq->head == csq->desc_num) + csq->head = 0; } /* Write to hardware */ - roce_write(hr_dev, ROCEE_TX_CMQ_TAIL_REG, csq->next_to_use); + roce_write(hr_dev, ROCEE_TX_CMQ_HEAD_REG, csq->head); - /* - * If the command is sync, wait for the firmware to write back, + /* If the command is sync, wait for the firmware to write back, * if multi descriptors to be sent, use the first one to check */ if (le16_to_cpu(desc->flag) & HNS_ROCE_CMD_FLAG_NO_INTR) { @@ -1285,39 +1313,34 @@ static int __hns_roce_cmq_send(struct hns_roce_dev *hr_dev, if (hns_roce_cmq_csq_done(hr_dev)) break; udelay(1); - timeout++; - } while (timeout < priv->cmq.tx_timeout); + } while (++timeout < priv->cmq.tx_timeout); } if (hns_roce_cmq_csq_done(hr_dev)) { - complete = true; - handle = 0; - while (handle < num) { - /* get the result of hardware write back */ - desc_to_use = &csq->desc[ntc]; - desc[handle] = *desc_to_use; - dev_dbg(hr_dev->dev, "Get cmq desc:\n"); - desc_ret = le16_to_cpu(desc[handle].retval); - if (desc_ret == CMD_EXEC_SUCCESS) - ret = 0; - else - ret = -EIO; - priv->cmq.last_status = desc_ret; - ntc++; - handle++; - if (ntc == csq->desc_num) - ntc = 0; + for (ret = 0, i = 0; i < num; i++) { + /* check the result of hardware write back */ + desc[i] = csq->desc[tail++]; + if (tail == csq->desc_num) + tail = 0; + + desc_ret = le16_to_cpu(desc[i].retval); + if (likely(desc_ret == CMD_EXEC_SUCCESS)) + continue; + + dev_err_ratelimited(hr_dev->dev, + "Cmdq IO error, opcode = %x, return = %x\n", + desc->opcode, desc_ret); + ret = -EIO; } - } + } else { + /* FW/HW reset or incorrect number of desc */ + tail = roce_read(hr_dev, ROCEE_TX_CMQ_TAIL_REG); + dev_warn(hr_dev->dev, "CMDQ move tail from %d to %d\n", + csq->head, tail); + csq->head = tail; - if (!complete) ret = -EAGAIN; - - /* clean the command send queue */ - handle = hns_roce_cmq_csq_clean(hr_dev); - if (handle != num) - dev_warn(hr_dev->dev, "Cleaned %d, need to clean %d\n", - handle, num); + } spin_unlock_bh(&csq->lock); @@ -1530,7 +1553,8 @@ static int hns_roce_config_global_param(struct hns_roce_dev *hr_dev) CFG_GLOBAL_PARAM_DATA_0_ROCEE_TIME_1US_CFG_S, 0x3e8); roce_set_field(req->time_cfg_udp_port, CFG_GLOBAL_PARAM_DATA_0_ROCEE_UDP_PORT_M, - CFG_GLOBAL_PARAM_DATA_0_ROCEE_UDP_PORT_S, 0x12b7); + CFG_GLOBAL_PARAM_DATA_0_ROCEE_UDP_PORT_S, + ROCE_V2_UDP_DPORT); return hns_roce_cmq_send(hr_dev, &desc, 1); } @@ -1541,17 +1565,13 @@ static int hns_roce_query_pf_resource(struct hns_roce_dev *hr_dev) struct hns_roce_pf_res_a *req_a; struct hns_roce_pf_res_b *req_b; int ret; - int i; - for (i = 0; i < 2; i++) { - hns_roce_cmq_setup_basic_desc(&desc[i], - HNS_ROCE_OPC_QUERY_PF_RES, true); + hns_roce_cmq_setup_basic_desc(&desc[0], HNS_ROCE_OPC_QUERY_PF_RES, + true); + desc[0].flag |= cpu_to_le16(HNS_ROCE_CMD_FLAG_NEXT); - if (i == 0) - desc[i].flag |= cpu_to_le16(HNS_ROCE_CMD_FLAG_NEXT); - else - desc[i].flag &= ~cpu_to_le16(HNS_ROCE_CMD_FLAG_NEXT); - } + hns_roce_cmq_setup_basic_desc(&desc[1], HNS_ROCE_OPC_QUERY_PF_RES, + true); ret = hns_roce_cmq_send(hr_dev, desc, 2); if (ret) @@ -1644,19 +1664,16 @@ static int hns_roce_alloc_vf_resource(struct hns_roce_dev *hr_dev) struct hns_roce_cmq_desc desc[2]; struct hns_roce_vf_res_a *req_a; struct hns_roce_vf_res_b *req_b; - int i; req_a = (struct hns_roce_vf_res_a *)desc[0].data; req_b = (struct hns_roce_vf_res_b *)desc[1].data; - for (i = 0; i < 2; i++) { - hns_roce_cmq_setup_basic_desc(&desc[i], - HNS_ROCE_OPC_ALLOC_VF_RES, false); - if (i == 0) - desc[i].flag |= cpu_to_le16(HNS_ROCE_CMD_FLAG_NEXT); - else - desc[i].flag &= ~cpu_to_le16(HNS_ROCE_CMD_FLAG_NEXT); - } + hns_roce_cmq_setup_basic_desc(&desc[0], HNS_ROCE_OPC_ALLOC_VF_RES, + false); + desc[0].flag |= cpu_to_le16(HNS_ROCE_CMD_FLAG_NEXT); + + hns_roce_cmq_setup_basic_desc(&desc[1], HNS_ROCE_OPC_ALLOC_VF_RES, + false); roce_set_field(req_a->vf_qpc_bt_idx_num, VF_RES_A_DATA_1_VF_QPC_BT_IDX_M, @@ -1866,7 +1883,6 @@ static void set_default_caps(struct hns_roce_dev *hr_dev) caps->flags = HNS_ROCE_CAP_FLAG_REREG_MR | HNS_ROCE_CAP_FLAG_ROCE_V1_V2 | - HNS_ROCE_CAP_FLAG_RQ_INLINE | HNS_ROCE_CAP_FLAG_RECORD_DB | HNS_ROCE_CAP_FLAG_SQ_RECORD_DB; @@ -1999,10 +2015,12 @@ static int hns_roce_query_pf_caps(struct hns_roce_dev *hr_dev) caps->max_sq_sg = le16_to_cpu(resp_a->max_sq_sg); caps->max_sq_inline = le16_to_cpu(resp_a->max_sq_inline); caps->max_rq_sg = le16_to_cpu(resp_a->max_rq_sg); + caps->max_rq_sg = roundup_pow_of_two(caps->max_rq_sg); caps->max_extend_sg = le32_to_cpu(resp_a->max_extend_sg); caps->num_qpc_timer = le16_to_cpu(resp_a->num_qpc_timer); caps->num_cqc_timer = le16_to_cpu(resp_a->num_cqc_timer); caps->max_srq_sges = le16_to_cpu(resp_a->max_srq_sges); + caps->max_srq_sges = roundup_pow_of_two(caps->max_srq_sges); caps->num_aeq_vectors = resp_a->num_aeq_vectors; caps->num_other_vectors = resp_a->num_other_vectors; caps->max_sq_desc_sz = resp_a->max_sq_desc_sz; @@ -2336,7 +2354,6 @@ static int hns_roce_config_link_table(struct hns_roce_dev *hr_dev, struct hns_roce_link_table_entry *entry; enum hns_roce_opcode_type opcode; u32 page_num; - int i; switch (type) { case TSQ_LINK_TABLE: @@ -2354,14 +2371,10 @@ static int hns_roce_config_link_table(struct hns_roce_dev *hr_dev, page_num = link_tbl->npages; entry = link_tbl->table.buf; - for (i = 0; i < 2; i++) { - hns_roce_cmq_setup_basic_desc(&desc[i], opcode, false); + hns_roce_cmq_setup_basic_desc(&desc[0], opcode, false); + desc[0].flag |= cpu_to_le16(HNS_ROCE_CMD_FLAG_NEXT); - if (i == 0) - desc[i].flag |= cpu_to_le16(HNS_ROCE_CMD_FLAG_NEXT); - else - desc[i].flag &= ~cpu_to_le16(HNS_ROCE_CMD_FLAG_NEXT); - } + hns_roce_cmq_setup_basic_desc(&desc[1], opcode, false); req_a->base_addr_l = cpu_to_le32(link_tbl->table.map & 0xffffffff); req_a->base_addr_h = cpu_to_le32(link_tbl->table.map >> 32); @@ -2880,36 +2893,20 @@ static int hns_roce_v2_write_mtpt(struct hns_roce_dev *hr_dev, mpt_entry = mb_buf; memset(mpt_entry, 0, sizeof(*mpt_entry)); - roce_set_field(mpt_entry->byte_4_pd_hop_st, V2_MPT_BYTE_4_MPT_ST_M, - V2_MPT_BYTE_4_MPT_ST_S, V2_MPT_ST_VALID); - roce_set_field(mpt_entry->byte_4_pd_hop_st, V2_MPT_BYTE_4_PBL_HOP_NUM_M, - V2_MPT_BYTE_4_PBL_HOP_NUM_S, mr->pbl_hop_num == - HNS_ROCE_HOP_NUM_0 ? 0 : mr->pbl_hop_num); - roce_set_field(mpt_entry->byte_4_pd_hop_st, - V2_MPT_BYTE_4_PBL_BA_PG_SZ_M, - V2_MPT_BYTE_4_PBL_BA_PG_SZ_S, - to_hr_hw_page_shift(mr->pbl_mtr.hem_cfg.ba_pg_shift)); - roce_set_field(mpt_entry->byte_4_pd_hop_st, V2_MPT_BYTE_4_PD_M, - V2_MPT_BYTE_4_PD_S, mr->pd); + hr_reg_write(mpt_entry, MPT_ST, V2_MPT_ST_VALID); + hr_reg_write(mpt_entry, MPT_PD, mr->pd); + hr_reg_enable(mpt_entry, MPT_L_INV_EN); - roce_set_bit(mpt_entry->byte_8_mw_cnt_en, V2_MPT_BYTE_8_RA_EN_S, 0); - roce_set_bit(mpt_entry->byte_8_mw_cnt_en, V2_MPT_BYTE_8_R_INV_EN_S, 0); - roce_set_bit(mpt_entry->byte_8_mw_cnt_en, V2_MPT_BYTE_8_L_INV_EN_S, 1); - roce_set_bit(mpt_entry->byte_8_mw_cnt_en, V2_MPT_BYTE_8_BIND_EN_S, - (mr->access & IB_ACCESS_MW_BIND ? 1 : 0)); - roce_set_bit(mpt_entry->byte_8_mw_cnt_en, V2_MPT_BYTE_8_ATOMIC_EN_S, - mr->access & IB_ACCESS_REMOTE_ATOMIC ? 1 : 0); - roce_set_bit(mpt_entry->byte_8_mw_cnt_en, V2_MPT_BYTE_8_RR_EN_S, - (mr->access & IB_ACCESS_REMOTE_READ ? 1 : 0)); - roce_set_bit(mpt_entry->byte_8_mw_cnt_en, V2_MPT_BYTE_8_RW_EN_S, - (mr->access & IB_ACCESS_REMOTE_WRITE ? 1 : 0)); - roce_set_bit(mpt_entry->byte_8_mw_cnt_en, V2_MPT_BYTE_8_LW_EN_S, - (mr->access & IB_ACCESS_LOCAL_WRITE ? 1 : 0)); - - roce_set_bit(mpt_entry->byte_12_mw_pa, V2_MPT_BYTE_12_PA_S, - mr->type == MR_TYPE_MR ? 0 : 1); - roce_set_bit(mpt_entry->byte_12_mw_pa, V2_MPT_BYTE_12_INNER_PA_VLD_S, - 1); + hr_reg_write(mpt_entry, MPT_BIND_EN, + !!(mr->access & IB_ACCESS_MW_BIND)); + hr_reg_write(mpt_entry, MPT_ATOMIC_EN, + !!(mr->access & IB_ACCESS_REMOTE_ATOMIC)); + hr_reg_write(mpt_entry, MPT_RR_EN, + !!(mr->access & IB_ACCESS_REMOTE_READ)); + hr_reg_write(mpt_entry, MPT_RW_EN, + !!(mr->access & IB_ACCESS_REMOTE_WRITE)); + hr_reg_write(mpt_entry, MPT_LW_EN, + !!((mr->access & IB_ACCESS_LOCAL_WRITE))); mpt_entry->len_l = cpu_to_le32(lower_32_bits(mr->size)); mpt_entry->len_h = cpu_to_le32(upper_32_bits(mr->size)); @@ -2917,9 +2914,19 @@ static int hns_roce_v2_write_mtpt(struct hns_roce_dev *hr_dev, mpt_entry->va_l = cpu_to_le32(lower_32_bits(mr->iova)); mpt_entry->va_h = cpu_to_le32(upper_32_bits(mr->iova)); + if (mr->type != MR_TYPE_MR) + hr_reg_enable(mpt_entry, MPT_PA); + if (mr->type == MR_TYPE_DMA) return 0; + if (mr->pbl_hop_num != HNS_ROCE_HOP_NUM_0) + hr_reg_write(mpt_entry, MPT_PBL_HOP_NUM, mr->pbl_hop_num); + + hr_reg_write(mpt_entry, MPT_PBL_BA_PG_SZ, + to_hr_hw_page_shift(mr->pbl_mtr.hem_cfg.ba_pg_shift)); + hr_reg_enable(mpt_entry, MPT_INNER_PA_VLD); + ret = set_mtpt_pbl(hr_dev, mpt_entry, mr); return ret; @@ -2927,20 +2934,17 @@ static int hns_roce_v2_write_mtpt(struct hns_roce_dev *hr_dev, static int hns_roce_v2_rereg_write_mtpt(struct hns_roce_dev *hr_dev, struct hns_roce_mr *mr, int flags, - u32 pdn, int mr_access_flags, u64 iova, - u64 size, void *mb_buf) + void *mb_buf) { struct hns_roce_v2_mpt_entry *mpt_entry = mb_buf; + u32 mr_access_flags = mr->access; int ret = 0; roce_set_field(mpt_entry->byte_4_pd_hop_st, V2_MPT_BYTE_4_MPT_ST_M, V2_MPT_BYTE_4_MPT_ST_S, V2_MPT_ST_VALID); - if (flags & IB_MR_REREG_PD) { - roce_set_field(mpt_entry->byte_4_pd_hop_st, V2_MPT_BYTE_4_PD_M, - V2_MPT_BYTE_4_PD_S, pdn); - mr->pd = pdn; - } + roce_set_field(mpt_entry->byte_4_pd_hop_st, V2_MPT_BYTE_4_PD_M, + V2_MPT_BYTE_4_PD_S, mr->pd); if (flags & IB_MR_REREG_ACCESS) { roce_set_bit(mpt_entry->byte_8_mw_cnt_en, @@ -2958,13 +2962,10 @@ static int hns_roce_v2_rereg_write_mtpt(struct hns_roce_dev *hr_dev, } if (flags & IB_MR_REREG_TRANS) { - mpt_entry->va_l = cpu_to_le32(lower_32_bits(iova)); - mpt_entry->va_h = cpu_to_le32(upper_32_bits(iova)); - mpt_entry->len_l = cpu_to_le32(lower_32_bits(size)); - mpt_entry->len_h = cpu_to_le32(upper_32_bits(size)); - - mr->iova = iova; - mr->size = size; + mpt_entry->va_l = cpu_to_le32(lower_32_bits(mr->iova)); + mpt_entry->va_h = cpu_to_le32(upper_32_bits(mr->iova)); + mpt_entry->len_l = cpu_to_le32(lower_32_bits(mr->size)); + mpt_entry->len_h = cpu_to_le32(upper_32_bits(mr->size)); ret = set_mtpt_pbl(hr_dev, mpt_entry, mr); } @@ -3126,11 +3127,6 @@ static void __hns_roce_v2_cq_clean(struct hns_roce_cq *hr_cq, u32 qpn, if (nfreed) { hr_cq->cons_index += nfreed; - /* - * Make sure update of buffer contents is done before - * updating consumer index. - */ - wmb(); hns_roce_v2_cq_set_ci(hr_cq, hr_cq->cons_index); } } @@ -3639,11 +3635,8 @@ static int hns_roce_v2_poll_cq(struct ib_cq *ibcq, int num_entries, break; } - if (npolled) { - /* Memory barrier */ - wmb(); + if (npolled) hns_roce_v2_cq_set_ci(hr_cq, hr_cq->cons_index); - } out: spin_unlock_irqrestore(&hr_cq->lock, flags); @@ -4235,7 +4228,6 @@ static int modify_qp_init_to_rtr(struct ib_qp *ibqp, struct hns_roce_v2_qp_context *context, struct hns_roce_v2_qp_context *qpc_mask) { - const struct ib_global_route *grh = rdma_ah_read_grh(&attr->ah_attr); struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device); struct hns_roce_qp *hr_qp = to_hr_qp(ibqp); struct ib_device *ibdev = &hr_dev->ib_dev; @@ -4243,7 +4235,6 @@ static int modify_qp_init_to_rtr(struct ib_qp *ibqp, dma_addr_t irrl_ba; enum ib_mtu mtu; u8 lp_pktn_ini; - u8 port_num; u64 *mtts; u8 *dmac; u8 *smac; @@ -4324,15 +4315,6 @@ static int modify_qp_init_to_rtr(struct ib_qp *ibqp, V2_QPC_BYTE_56_DQPN_M, V2_QPC_BYTE_56_DQPN_S, 0); } - /* Configure GID index */ - port_num = rdma_ah_get_port_num(&attr->ah_attr); - roce_set_field(context->byte_20_smac_sgid_idx, - V2_QPC_BYTE_20_SGID_IDX_M, V2_QPC_BYTE_20_SGID_IDX_S, - hns_get_gid_index(hr_dev, port_num - 1, - grh->sgid_index)); - roce_set_field(qpc_mask->byte_20_smac_sgid_idx, - V2_QPC_BYTE_20_SGID_IDX_M, V2_QPC_BYTE_20_SGID_IDX_S, 0); - memcpy(&(context->dmac), dmac, sizeof(u32)); roce_set_field(context->byte_52_udpspn_dmac, V2_QPC_BYTE_52_DMAC_M, V2_QPC_BYTE_52_DMAC_S, *((u16 *)(&dmac[4]))); @@ -5083,7 +5065,7 @@ static int hns_roce_v2_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, done: qp_attr->cur_qp_state = qp_attr->qp_state; qp_attr->cap.max_recv_wr = hr_qp->rq.wqe_cnt; - qp_attr->cap.max_recv_sge = hr_qp->rq.max_gs; + qp_attr->cap.max_recv_sge = hr_qp->rq.max_gs - hr_qp->rq.rsv_sge; if (!ibqp->uobject) { qp_attr->cap.max_send_wr = hr_qp->sq.wqe_cnt; @@ -5174,6 +5156,9 @@ static int hns_roce_v2_qp_flow_control_init(struct hns_roce_dev *hr_dev, struct hns_roce_cmq_desc desc; int ret, i; + if (hr_dev->pci_dev->revision >= PCI_REVISION_ID_HIP09) + return 0; + mutex_lock(&hr_dev->qp_table.scc_mutex); /* set scc ctx clear done flag */ @@ -5220,98 +5205,96 @@ static int hns_roce_v2_qp_flow_control_init(struct hns_roce_dev *hr_dev, return ret; } -static void hns_roce_v2_write_srqc(struct hns_roce_dev *hr_dev, - struct hns_roce_srq *srq, u32 pdn, u16 xrcd, - u32 cqn, void *mb_buf, u64 *mtts_wqe, - u64 *mtts_idx, dma_addr_t dma_handle_wqe, - dma_addr_t dma_handle_idx) +#define DMA_IDX_SHIFT 3 +#define DMA_WQE_SHIFT 3 + +static int hns_roce_v2_write_srqc_index_queue(struct hns_roce_srq *srq, + struct hns_roce_srq_context *ctx) { - struct hns_roce_srq_context *srq_context; + struct hns_roce_idx_que *idx_que = &srq->idx_que; + struct ib_device *ibdev = srq->ibsrq.device; + struct hns_roce_dev *hr_dev = to_hr_dev(ibdev); + u64 mtts_idx[MTT_MIN_COUNT] = {}; + dma_addr_t dma_handle_idx = 0; + int ret; - srq_context = mb_buf; - memset(srq_context, 0, sizeof(*srq_context)); + /* Get physical address of idx que buf */ + ret = hns_roce_mtr_find(hr_dev, &idx_que->mtr, 0, mtts_idx, + ARRAY_SIZE(mtts_idx), &dma_handle_idx); + if (ret < 1) { + ibdev_err(ibdev, "failed to find mtr for SRQ idx, ret = %d.\n", + ret); + return -ENOBUFS; + } - roce_set_field(srq_context->byte_4_srqn_srqst, SRQC_BYTE_4_SRQ_ST_M, - SRQC_BYTE_4_SRQ_ST_S, 1); + hr_reg_write(ctx, SRQC_IDX_HOP_NUM, + to_hr_hem_hopnum(hr_dev->caps.idx_hop_num, srq->wqe_cnt)); - roce_set_field(srq_context->byte_4_srqn_srqst, - SRQC_BYTE_4_SRQ_WQE_HOP_NUM_M, - SRQC_BYTE_4_SRQ_WQE_HOP_NUM_S, - to_hr_hem_hopnum(hr_dev->caps.srqwqe_hop_num, - srq->wqe_cnt)); - roce_set_field(srq_context->byte_4_srqn_srqst, - SRQC_BYTE_4_SRQ_SHIFT_M, SRQC_BYTE_4_SRQ_SHIFT_S, - ilog2(srq->wqe_cnt)); + hr_reg_write(ctx, SRQC_IDX_BT_BA_L, dma_handle_idx >> DMA_IDX_SHIFT); + hr_reg_write(ctx, SRQC_IDX_BT_BA_H, + upper_32_bits(dma_handle_idx >> DMA_IDX_SHIFT)); - roce_set_field(srq_context->byte_4_srqn_srqst, SRQC_BYTE_4_SRQN_M, - SRQC_BYTE_4_SRQN_S, srq->srqn); + hr_reg_write(ctx, SRQC_IDX_BA_PG_SZ, + to_hr_hw_page_shift(idx_que->mtr.hem_cfg.ba_pg_shift)); + hr_reg_write(ctx, SRQC_IDX_BUF_PG_SZ, + to_hr_hw_page_shift(idx_que->mtr.hem_cfg.buf_pg_shift)); - roce_set_field(srq_context->byte_8_limit_wl, SRQC_BYTE_8_SRQ_LIMIT_WL_M, - SRQC_BYTE_8_SRQ_LIMIT_WL_S, 0); + hr_reg_write(ctx, SRQC_IDX_CUR_BLK_ADDR_L, + to_hr_hw_page_addr(mtts_idx[0])); + hr_reg_write(ctx, SRQC_IDX_CUR_BLK_ADDR_H, + upper_32_bits(to_hr_hw_page_addr(mtts_idx[0]))); - roce_set_field(srq_context->byte_12_xrcd, SRQC_BYTE_12_SRQ_XRCD_M, - SRQC_BYTE_12_SRQ_XRCD_S, xrcd); + hr_reg_write(ctx, SRQC_IDX_NXT_BLK_ADDR_L, + to_hr_hw_page_addr(mtts_idx[1])); + hr_reg_write(ctx, SRQC_IDX_NXT_BLK_ADDR_H, + upper_32_bits(to_hr_hw_page_addr(mtts_idx[1]))); - srq_context->wqe_bt_ba = cpu_to_le32((u32)(dma_handle_wqe >> 3)); + return 0; +} - roce_set_field(srq_context->byte_24_wqe_bt_ba, - SRQC_BYTE_24_SRQ_WQE_BT_BA_M, - SRQC_BYTE_24_SRQ_WQE_BT_BA_S, - dma_handle_wqe >> 35); +static int hns_roce_v2_write_srqc(struct hns_roce_srq *srq, void *mb_buf) +{ + struct ib_device *ibdev = srq->ibsrq.device; + struct hns_roce_dev *hr_dev = to_hr_dev(ibdev); + struct hns_roce_srq_context *ctx = mb_buf; + u64 mtts_wqe[MTT_MIN_COUNT] = {}; + dma_addr_t dma_handle_wqe = 0; + int ret; - roce_set_field(srq_context->byte_28_rqws_pd, SRQC_BYTE_28_PD_M, - SRQC_BYTE_28_PD_S, pdn); - roce_set_field(srq_context->byte_28_rqws_pd, SRQC_BYTE_28_RQWS_M, - SRQC_BYTE_28_RQWS_S, srq->max_gs <= 0 ? 0 : - fls(srq->max_gs - 1)); + memset(ctx, 0, sizeof(*ctx)); - srq_context->idx_bt_ba = cpu_to_le32(dma_handle_idx >> 3); - roce_set_field(srq_context->rsv_idx_bt_ba, - SRQC_BYTE_36_SRQ_IDX_BT_BA_M, - SRQC_BYTE_36_SRQ_IDX_BT_BA_S, - dma_handle_idx >> 35); + /* Get the physical address of srq buf */ + ret = hns_roce_mtr_find(hr_dev, &srq->buf_mtr, 0, mtts_wqe, + ARRAY_SIZE(mtts_wqe), &dma_handle_wqe); + if (ret < 1) { + ibdev_err(ibdev, "failed to find mtr for SRQ WQE, ret = %d.\n", + ret); + return -ENOBUFS; + } - srq_context->idx_cur_blk_addr = - cpu_to_le32(to_hr_hw_page_addr(mtts_idx[0])); - roce_set_field(srq_context->byte_44_idxbufpgsz_addr, - SRQC_BYTE_44_SRQ_IDX_CUR_BLK_ADDR_M, - SRQC_BYTE_44_SRQ_IDX_CUR_BLK_ADDR_S, - upper_32_bits(to_hr_hw_page_addr(mtts_idx[0]))); - roce_set_field(srq_context->byte_44_idxbufpgsz_addr, - SRQC_BYTE_44_SRQ_IDX_HOP_NUM_M, - SRQC_BYTE_44_SRQ_IDX_HOP_NUM_S, - to_hr_hem_hopnum(hr_dev->caps.idx_hop_num, - srq->wqe_cnt)); + hr_reg_write(ctx, SRQC_SRQ_ST, 1); + hr_reg_write(ctx, SRQC_PD, to_hr_pd(srq->ibsrq.pd)->pdn); + hr_reg_write(ctx, SRQC_SRQN, srq->srqn); + hr_reg_write(ctx, SRQC_XRCD, 0); + hr_reg_write(ctx, SRQC_XRC_CQN, srq->cqn); + hr_reg_write(ctx, SRQC_SHIFT, ilog2(srq->wqe_cnt)); + hr_reg_write(ctx, SRQC_RQWS, + srq->max_gs <= 0 ? 0 : fls(srq->max_gs - 1)); - roce_set_field(srq_context->byte_44_idxbufpgsz_addr, - SRQC_BYTE_44_SRQ_IDX_BA_PG_SZ_M, - SRQC_BYTE_44_SRQ_IDX_BA_PG_SZ_S, - to_hr_hw_page_shift(srq->idx_que.mtr.hem_cfg.ba_pg_shift)); - roce_set_field(srq_context->byte_44_idxbufpgsz_addr, - SRQC_BYTE_44_SRQ_IDX_BUF_PG_SZ_M, - SRQC_BYTE_44_SRQ_IDX_BUF_PG_SZ_S, - to_hr_hw_page_shift(srq->idx_que.mtr.hem_cfg.buf_pg_shift)); + hr_reg_write(ctx, SRQC_WQE_HOP_NUM, + to_hr_hem_hopnum(hr_dev->caps.srqwqe_hop_num, + srq->wqe_cnt)); - srq_context->idx_nxt_blk_addr = - cpu_to_le32(to_hr_hw_page_addr(mtts_idx[1])); - roce_set_field(srq_context->rsv_idxnxtblkaddr, - SRQC_BYTE_52_SRQ_IDX_NXT_BLK_ADDR_M, - SRQC_BYTE_52_SRQ_IDX_NXT_BLK_ADDR_S, - upper_32_bits(to_hr_hw_page_addr(mtts_idx[1]))); - roce_set_field(srq_context->byte_56_xrc_cqn, - SRQC_BYTE_56_SRQ_XRC_CQN_M, SRQC_BYTE_56_SRQ_XRC_CQN_S, - cqn); - roce_set_field(srq_context->byte_56_xrc_cqn, - SRQC_BYTE_56_SRQ_WQE_BA_PG_SZ_M, - SRQC_BYTE_56_SRQ_WQE_BA_PG_SZ_S, - to_hr_hw_page_shift(srq->buf_mtr.hem_cfg.ba_pg_shift)); - roce_set_field(srq_context->byte_56_xrc_cqn, - SRQC_BYTE_56_SRQ_WQE_BUF_PG_SZ_M, - SRQC_BYTE_56_SRQ_WQE_BUF_PG_SZ_S, - to_hr_hw_page_shift(srq->buf_mtr.hem_cfg.buf_pg_shift)); + hr_reg_write(ctx, SRQC_WQE_BT_BA_L, dma_handle_wqe >> DMA_WQE_SHIFT); + hr_reg_write(ctx, SRQC_WQE_BT_BA_H, + upper_32_bits(dma_handle_wqe >> DMA_WQE_SHIFT)); - roce_set_bit(srq_context->db_record_addr_record_en, - SRQC_BYTE_60_SRQ_RECORD_EN_S, 0); + hr_reg_write(ctx, SRQC_WQE_BA_PG_SZ, + to_hr_hw_page_shift(srq->buf_mtr.hem_cfg.ba_pg_shift)); + hr_reg_write(ctx, SRQC_WQE_BUF_PG_SZ, + to_hr_hw_page_shift(srq->buf_mtr.hem_cfg.buf_pg_shift)); + + return hns_roce_v2_write_srqc_index_queue(srq, ctx); } static int hns_roce_v2_modify_srq(struct ib_srq *ibsrq, @@ -5331,7 +5314,7 @@ static int hns_roce_v2_modify_srq(struct ib_srq *ibsrq, return -EINVAL; if (srq_attr_mask & IB_SRQ_LIMIT) { - if (srq_attr->srq_limit >= srq->wqe_cnt) + if (srq_attr->srq_limit > srq->wqe_cnt) return -EINVAL; mailbox = hns_roce_alloc_cmd_mailbox(hr_dev); @@ -5394,8 +5377,8 @@ static int hns_roce_v2_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr) SRQC_BYTE_8_SRQ_LIMIT_WL_S); attr->srq_limit = limit_wl; - attr->max_wr = srq->wqe_cnt - 1; - attr->max_sge = srq->max_gs; + attr->max_wr = srq->wqe_cnt; + attr->max_sge = srq->max_gs - srq->rsv_sge; out: hns_roce_free_cmd_mailbox(hr_dev, mailbox); @@ -5626,9 +5609,6 @@ static int hns_roce_v2_aeq_int(struct hns_roce_dev *hr_dev, ++eq->cons_index; aeqe_found = 1; - if (eq->cons_index > (2 * eq->entries - 1)) - eq->cons_index = 0; - hns_roce_v2_init_irq_work(hr_dev, eq, queue_num); aeqe = next_aeqe_sw_v2(eq); @@ -5671,9 +5651,6 @@ static int hns_roce_v2_ceq_int(struct hns_roce_dev *hr_dev, ++eq->cons_index; ceqe_found = 1; - if (eq->cons_index > (EQ_DEPTH_COEFF * eq->entries - 1)) - eq->cons_index = 0; - ceqe = next_ceqe_sw_v2(eq); } @@ -5948,7 +5925,6 @@ static int alloc_eq_buf(struct hns_roce_dev *hr_dev, struct hns_roce_eq *eq) buf_attr.region[0].size = eq->entries * eq->eqe_size; buf_attr.region[0].hopnum = eq->hop_num; buf_attr.region_count = 1; - buf_attr.fixed_page = true; err = hns_roce_mtr_create(hr_dev, &eq->mtr, &buf_attr, hr_dev->caps.eqe_ba_pg_sz + @@ -6286,6 +6262,7 @@ static void hns_roce_hw_v2_get_cfg(struct hns_roce_dev *hr_dev, /* Get info from NIC driver. */ hr_dev->reg_base = handle->rinfo.roce_io_base; + hr_dev->mem_base = handle->rinfo.roce_mem_base; hr_dev->caps.num_ports = 1; hr_dev->iboe.netdevs[0] = handle->rinfo.netdev; hr_dev->iboe.phy_port[0] = 0; diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h index bdaccf86460d..39621fb6ec16 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h @@ -96,7 +96,8 @@ #define HNS_ROCE_V2_CQC_TIMER_ENTRY_SZ PAGE_SIZE #define HNS_ROCE_V2_PAGE_SIZE_SUPPORTED 0xFFFFF000 #define HNS_ROCE_V2_MAX_INNER_MTPT_NUM 2 -#define HNS_ROCE_INVALID_LKEY 0x100 +#define HNS_ROCE_INVALID_LKEY 0x0 +#define HNS_ROCE_INVALID_SGE_LENGTH 0x80000000 #define HNS_ROCE_CMQ_TX_TIMEOUT 30000 #define HNS_ROCE_V2_UC_RC_SGE_NUM_IN_WQE 2 #define HNS_ROCE_V2_RSV_QPS 8 @@ -366,24 +367,61 @@ struct hns_roce_v2_cq_context { #define CQC_STASH CQC_FIELD_LOC(63, 63) struct hns_roce_srq_context { - __le32 byte_4_srqn_srqst; - __le32 byte_8_limit_wl; - __le32 byte_12_xrcd; - __le32 byte_16_pi_ci; - __le32 wqe_bt_ba; - __le32 byte_24_wqe_bt_ba; - __le32 byte_28_rqws_pd; - __le32 idx_bt_ba; - __le32 rsv_idx_bt_ba; - __le32 idx_cur_blk_addr; - __le32 byte_44_idxbufpgsz_addr; - __le32 idx_nxt_blk_addr; - __le32 rsv_idxnxtblkaddr; - __le32 byte_56_xrc_cqn; - __le32 db_record_addr_record_en; - __le32 db_record_addr; + __le32 byte_4_srqn_srqst; + __le32 byte_8_limit_wl; + __le32 byte_12_xrcd; + __le32 byte_16_pi_ci; + __le32 wqe_bt_ba; + __le32 byte_24_wqe_bt_ba; + __le32 byte_28_rqws_pd; + __le32 idx_bt_ba; + __le32 rsv_idx_bt_ba; + __le32 idx_cur_blk_addr; + __le32 byte_44_idxbufpgsz_addr; + __le32 idx_nxt_blk_addr; + __le32 rsv_idxnxtblkaddr; + __le32 byte_56_xrc_cqn; + __le32 db_record_addr_record_en; + __le32 db_record_addr; }; +#define SRQC_FIELD_LOC(h, l) FIELD_LOC(struct hns_roce_srq_context, h, l) + +#define SRQC_SRQ_ST SRQC_FIELD_LOC(1, 0) +#define SRQC_WQE_HOP_NUM SRQC_FIELD_LOC(3, 2) +#define SRQC_SHIFT SRQC_FIELD_LOC(7, 4) +#define SRQC_SRQN SRQC_FIELD_LOC(31, 8) +#define SRQC_LIMIT_WL SRQC_FIELD_LOC(47, 32) +#define SRQC_RSV0 SRQC_FIELD_LOC(63, 48) +#define SRQC_XRCD SRQC_FIELD_LOC(87, 64) +#define SRQC_RSV1 SRQC_FIELD_LOC(95, 88) +#define SRQC_PRODUCER_IDX SRQC_FIELD_LOC(111, 96) +#define SRQC_CONSUMER_IDX SRQC_FIELD_LOC(127, 112) +#define SRQC_WQE_BT_BA_L SRQC_FIELD_LOC(159, 128) +#define SRQC_WQE_BT_BA_H SRQC_FIELD_LOC(188, 160) +#define SRQC_RSV2 SRQC_FIELD_LOC(191, 189) +#define SRQC_PD SRQC_FIELD_LOC(215, 192) +#define SRQC_RQWS SRQC_FIELD_LOC(219, 216) +#define SRQC_RSV3 SRQC_FIELD_LOC(223, 220) +#define SRQC_IDX_BT_BA_L SRQC_FIELD_LOC(255, 224) +#define SRQC_IDX_BT_BA_H SRQC_FIELD_LOC(284, 256) +#define SRQC_RSV4 SRQC_FIELD_LOC(287, 285) +#define SRQC_IDX_CUR_BLK_ADDR_L SRQC_FIELD_LOC(319, 288) +#define SRQC_IDX_CUR_BLK_ADDR_H SRQC_FIELD_LOC(339, 320) +#define SRQC_RSV5 SRQC_FIELD_LOC(341, 340) +#define SRQC_IDX_HOP_NUM SRQC_FIELD_LOC(343, 342) +#define SRQC_IDX_BA_PG_SZ SRQC_FIELD_LOC(347, 344) +#define SRQC_IDX_BUF_PG_SZ SRQC_FIELD_LOC(351, 348) +#define SRQC_IDX_NXT_BLK_ADDR_L SRQC_FIELD_LOC(383, 352) +#define SRQC_IDX_NXT_BLK_ADDR_H SRQC_FIELD_LOC(403, 384) +#define SRQC_RSV6 SRQC_FIELD_LOC(415, 404) +#define SRQC_XRC_CQN SRQC_FIELD_LOC(439, 416) +#define SRQC_WQE_BA_PG_SZ SRQC_FIELD_LOC(443, 440) +#define SRQC_WQE_BUF_PG_SZ SRQC_FIELD_LOC(447, 444) +#define SRQC_DB_RECORD_EN SRQC_FIELD_LOC(448, 448) +#define SRQC_DB_RECORD_ADDR_L SRQC_FIELD_LOC(479, 449) +#define SRQC_DB_RECORD_ADDR_H SRQC_FIELD_LOC(511, 480) + #define SRQC_BYTE_4_SRQ_ST_S 0 #define SRQC_BYTE_4_SRQ_ST_M GENMASK(1, 0) @@ -993,6 +1031,45 @@ struct hns_roce_v2_mpt_entry { __le32 byte_64_buf_pa1; }; +#define MPT_FIELD_LOC(h, l) FIELD_LOC(struct hns_roce_v2_mpt_entry, h, l) + +#define MPT_ST MPT_FIELD_LOC(1, 0) +#define MPT_PBL_HOP_NUM MPT_FIELD_LOC(3, 2) +#define MPT_PBL_BA_PG_SZ MPT_FIELD_LOC(7, 4) +#define MPT_PD MPT_FIELD_LOC(31, 8) +#define MPT_RA_EN MPT_FIELD_LOC(32, 32) +#define MPT_R_INV_EN MPT_FIELD_LOC(33, 33) +#define MPT_L_INV_EN MPT_FIELD_LOC(34, 34) +#define MPT_BIND_EN MPT_FIELD_LOC(35, 35) +#define MPT_ATOMIC_EN MPT_FIELD_LOC(36, 36) +#define MPT_RR_EN MPT_FIELD_LOC(37, 37) +#define MPT_RW_EN MPT_FIELD_LOC(38, 38) +#define MPT_LW_EN MPT_FIELD_LOC(39, 39) +#define MPT_MW_CNT MPT_FIELD_LOC(63, 40) +#define MPT_FRE MPT_FIELD_LOC(64, 64) +#define MPT_PA MPT_FIELD_LOC(65, 65) +#define MPT_ZBVA MPT_FIELD_LOC(66, 66) +#define MPT_SHARE MPT_FIELD_LOC(67, 67) +#define MPT_MR_MW MPT_FIELD_LOC(68, 68) +#define MPT_BPD MPT_FIELD_LOC(69, 69) +#define MPT_BQP MPT_FIELD_LOC(70, 70) +#define MPT_INNER_PA_VLD MPT_FIELD_LOC(71, 71) +#define MPT_MW_BIND_QPN MPT_FIELD_LOC(95, 72) +#define MPT_BOUND_LKEY MPT_FIELD_LOC(127, 96) +#define MPT_LEN MPT_FIELD_LOC(191, 128) +#define MPT_LKEY MPT_FIELD_LOC(223, 192) +#define MPT_VA MPT_FIELD_LOC(287, 224) +#define MPT_PBL_SIZE MPT_FIELD_LOC(319, 288) +#define MPT_PBL_BA MPT_FIELD_LOC(380, 320) +#define MPT_BLK_MODE MPT_FIELD_LOC(381, 381) +#define MPT_RSV0 MPT_FIELD_LOC(383, 382) +#define MPT_PA0 MPT_FIELD_LOC(441, 384) +#define MPT_BOUND_VA MPT_FIELD_LOC(447, 442) +#define MPT_PA1 MPT_FIELD_LOC(505, 448) +#define MPT_PERSIST_EN MPT_FIELD_LOC(506, 506) +#define MPT_RSV2 MPT_FIELD_LOC(507, 507) +#define MPT_PBL_BUF_PG_SZ MPT_FIELD_LOC(511, 508) + #define V2_MPT_BYTE_4_MPT_ST_S 0 #define V2_MPT_BYTE_4_MPT_ST_M GENMASK(1, 0) @@ -1059,6 +1136,8 @@ struct hns_roce_v2_mpt_entry { #define V2_DB_BYTE_4_CMD_S 24 #define V2_DB_BYTE_4_CMD_M GENMASK(27, 24) +#define V2_DB_FLAG_S 31 + #define V2_DB_PARAMETER_IDX_S 0 #define V2_DB_PARAMETER_IDX_M GENMASK(15, 0) @@ -1155,6 +1234,15 @@ struct hns_roce_v2_rc_send_wqe { #define V2_RC_SEND_WQE_BYTE_4_OPCODE_S 0 #define V2_RC_SEND_WQE_BYTE_4_OPCODE_M GENMASK(4, 0) +#define V2_RC_SEND_WQE_BYTE_4_DB_SL_L_S 5 +#define V2_RC_SEND_WQE_BYTE_4_DB_SL_L_M GENMASK(6, 5) + +#define V2_RC_SEND_WQE_BYTE_4_DB_SL_H_S 13 +#define V2_RC_SEND_WQE_BYTE_4_DB_SL_H_M GENMASK(14, 13) + +#define V2_RC_SEND_WQE_BYTE_4_WQE_INDEX_S 15 +#define V2_RC_SEND_WQE_BYTE_4_WQE_INDEX_M GENMASK(30, 15) + #define V2_RC_SEND_WQE_BYTE_4_OWNER_S 7 #define V2_RC_SEND_WQE_BYTE_4_CQE_S 8 @@ -1167,15 +1255,17 @@ struct hns_roce_v2_rc_send_wqe { #define V2_RC_SEND_WQE_BYTE_4_INLINE_S 12 -#define V2_RC_FRMR_WQE_BYTE_4_BIND_EN_S 19 +#define V2_RC_FRMR_WQE_BYTE_40_BIND_EN_S 10 -#define V2_RC_FRMR_WQE_BYTE_4_ATOMIC_S 20 +#define V2_RC_FRMR_WQE_BYTE_40_ATOMIC_S 11 -#define V2_RC_FRMR_WQE_BYTE_4_RR_S 21 +#define V2_RC_FRMR_WQE_BYTE_40_RR_S 12 -#define V2_RC_FRMR_WQE_BYTE_4_RW_S 22 +#define V2_RC_FRMR_WQE_BYTE_40_RW_S 13 -#define V2_RC_FRMR_WQE_BYTE_4_LW_S 23 +#define V2_RC_FRMR_WQE_BYTE_40_LW_S 14 + +#define V2_RC_SEND_WQE_BYTE_4_FLAG_S 31 #define V2_RC_SEND_WQE_BYTE_16_XRC_SRQN_S 0 #define V2_RC_SEND_WQE_BYTE_16_XRC_SRQN_M GENMASK(23, 0) @@ -1190,7 +1280,7 @@ struct hns_roce_v2_rc_send_wqe { struct hns_roce_wqe_frmr_seg { __le32 pbl_size; - __le32 mode_buf_pg_sz; + __le32 byte_40; }; #define V2_RC_FRMR_WQE_BYTE_40_PBL_BUF_PG_SZ_S 4 @@ -1786,12 +1876,8 @@ struct hns_roce_v2_cmq_ring { dma_addr_t desc_dma_addr; struct hns_roce_cmq_desc *desc; u32 head; - u32 tail; - u16 buf_size; u16 desc_num; - int next_to_use; - int next_to_clean; u8 flag; spinlock_t lock; /* command queue lock */ }; @@ -1800,7 +1886,6 @@ struct hns_roce_v2_cmq { struct hns_roce_v2_cmq_ring csq; struct hns_roce_v2_cmq_ring crq; u16 tx_timeout; - u16 last_status; }; enum hns_roce_link_table_type { diff --git a/drivers/infiniband/hw/hns/hns_roce_main.c b/drivers/infiniband/hw/hns/hns_roce_main.c index d9179bae4989..c9c0836394a2 100644 --- a/drivers/infiniband/hw/hns/hns_roce_main.c +++ b/drivers/infiniband/hw/hns/hns_roce_main.c @@ -42,22 +42,6 @@ #include "hns_roce_device.h" #include "hns_roce_hem.h" -/** - * hns_get_gid_index - Get gid index. - * @hr_dev: pointer to structure hns_roce_dev. - * @port: port, value range: 0 ~ MAX - * @gid_index: gid_index, value range: 0 ~ MAX - * Description: - * N ports shared gids, allocation method as follow: - * GID[0][0], GID[1][0],.....GID[N - 1][0], - * GID[0][0], GID[1][0],.....GID[N - 1][0], - * And so on - */ -u8 hns_get_gid_index(struct hns_roce_dev *hr_dev, u8 port, int gid_index) -{ - return gid_index * hr_dev->caps.num_ports + port; -} - static int hns_roce_set_mac(struct hns_roce_dev *hr_dev, u8 port, u8 *addr) { u8 phy_port; @@ -217,7 +201,8 @@ static int hns_roce_query_device(struct ib_device *ib_dev, props->max_srq_sge = hr_dev->caps.max_srq_sges; } - if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_FRMR) { + if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_FRMR && + hr_dev->pci_dev->revision >= PCI_REVISION_ID_HIP09) { props->device_cap_flags |= IB_DEVICE_MEM_MGT_EXTENSIONS; props->max_fast_reg_page_list_len = HNS_ROCE_FRMR_MAX_PA; } @@ -748,11 +733,7 @@ static int hns_roce_setup_hca(struct hns_roce_dev *hr_dev) goto err_pd_table_free; } - ret = hns_roce_init_cq_table(hr_dev); - if (ret) { - dev_err(dev, "Failed to init completion queue table.\n"); - goto err_mr_table_free; - } + hns_roce_init_cq_table(hr_dev); ret = hns_roce_init_qp_table(hr_dev); if (ret) { @@ -772,13 +753,10 @@ static int hns_roce_setup_hca(struct hns_roce_dev *hr_dev) return 0; err_qp_table_free: - if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_SRQ) - hns_roce_cleanup_qp_table(hr_dev); + hns_roce_cleanup_qp_table(hr_dev); err_cq_table_free: hns_roce_cleanup_cq_table(hr_dev); - -err_mr_table_free: hns_roce_cleanup_mr_table(hr_dev); err_pd_table_free: diff --git a/drivers/infiniband/hw/hns/hns_roce_mr.c b/drivers/infiniband/hw/hns/hns_roce_mr.c index 1bcffd93ff3e..79b3c3023fe7 100644 --- a/drivers/infiniband/hw/hns/hns_roce_mr.c +++ b/drivers/infiniband/hw/hns/hns_roce_mr.c @@ -66,8 +66,7 @@ int hns_roce_hw_destroy_mpt(struct hns_roce_dev *hr_dev, HNS_ROCE_CMD_TIMEOUT_MSECS); } -static int alloc_mr_key(struct hns_roce_dev *hr_dev, struct hns_roce_mr *mr, - u32 pd, u64 iova, u64 size, u32 access) +static int alloc_mr_key(struct hns_roce_dev *hr_dev, struct hns_roce_mr *mr) { struct ib_device *ibdev = &hr_dev->ib_dev; unsigned long obj = 0; @@ -82,11 +81,6 @@ static int alloc_mr_key(struct hns_roce_dev *hr_dev, struct hns_roce_mr *mr, return -ENOMEM; } - mr->iova = iova; /* MR va starting addr */ - mr->size = size; /* MR addr range */ - mr->pd = pd; /* MR num */ - mr->access = access; /* MR access permit */ - mr->enabled = 0; /* MR active status */ mr->key = hw_index_to_key(obj); /* MR key */ err = hns_roce_table_get(hr_dev, &hr_dev->mr_table.mtpt_table, obj); @@ -110,8 +104,7 @@ static void free_mr_key(struct hns_roce_dev *hr_dev, struct hns_roce_mr *mr) } static int alloc_mr_pbl(struct hns_roce_dev *hr_dev, struct hns_roce_mr *mr, - size_t length, struct ib_udata *udata, u64 start, - int access) + struct ib_udata *udata, u64 start) { struct ib_device *ibdev = &hr_dev->ib_dev; bool is_fast = mr->type == MR_TYPE_FRMR; @@ -121,11 +114,10 @@ static int alloc_mr_pbl(struct hns_roce_dev *hr_dev, struct hns_roce_mr *mr, mr->pbl_hop_num = is_fast ? 1 : hr_dev->caps.pbl_hop_num; buf_attr.page_shift = is_fast ? PAGE_SHIFT : hr_dev->caps.pbl_buf_pg_sz + PAGE_SHIFT; - buf_attr.region[0].size = length; + buf_attr.region[0].size = mr->size; buf_attr.region[0].hopnum = mr->pbl_hop_num; buf_attr.region_count = 1; - buf_attr.fixed_page = true; - buf_attr.user_access = access; + buf_attr.user_access = mr->access; /* fast MR's buffer is alloced before mapping, not at creation */ buf_attr.mtt_only = is_fast; @@ -197,9 +189,6 @@ static int hns_roce_mr_enable(struct hns_roce_dev *hr_dev, } mr->enabled = 1; - hns_roce_free_cmd_mailbox(hr_dev, mailbox); - - return 0; err_page: hns_roce_free_cmd_mailbox(hr_dev, mailbox); @@ -237,14 +226,16 @@ struct ib_mr *hns_roce_get_dma_mr(struct ib_pd *pd, int acc) return ERR_PTR(-ENOMEM); mr->type = MR_TYPE_DMA; + mr->pd = to_hr_pd(pd)->pdn; + mr->access = acc; /* Allocate memory region key */ hns_roce_hem_list_init(&mr->pbl_mtr.hem_list); - ret = alloc_mr_key(hr_dev, mr, to_hr_pd(pd)->pdn, 0, 0, acc); + ret = alloc_mr_key(hr_dev, mr); if (ret) goto err_free; - ret = hns_roce_mr_enable(to_hr_dev(pd->device), mr); + ret = hns_roce_mr_enable(hr_dev, mr); if (ret) goto err_mr; @@ -271,13 +262,17 @@ struct ib_mr *hns_roce_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, if (!mr) return ERR_PTR(-ENOMEM); + mr->iova = virt_addr; + mr->size = length; + mr->pd = to_hr_pd(pd)->pdn; + mr->access = access_flags; mr->type = MR_TYPE_MR; - ret = alloc_mr_key(hr_dev, mr, to_hr_pd(pd)->pdn, virt_addr, length, - access_flags); + + ret = alloc_mr_key(hr_dev, mr); if (ret) goto err_alloc_mr; - ret = alloc_mr_pbl(hr_dev, mr, length, udata, start, access_flags); + ret = alloc_mr_pbl(hr_dev, mr, udata, start); if (ret) goto err_alloc_key; @@ -299,35 +294,6 @@ struct ib_mr *hns_roce_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, return ERR_PTR(ret); } -static int rereg_mr_trans(struct ib_mr *ibmr, int flags, - u64 start, u64 length, - u64 virt_addr, int mr_access_flags, - struct hns_roce_cmd_mailbox *mailbox, - u32 pdn, struct ib_udata *udata) -{ - struct hns_roce_dev *hr_dev = to_hr_dev(ibmr->device); - struct ib_device *ibdev = &hr_dev->ib_dev; - struct hns_roce_mr *mr = to_hr_mr(ibmr); - int ret; - - free_mr_pbl(hr_dev, mr); - ret = alloc_mr_pbl(hr_dev, mr, length, udata, start, mr_access_flags); - if (ret) { - ibdev_err(ibdev, "failed to create mr PBL, ret = %d.\n", ret); - return ret; - } - - ret = hr_dev->hw->rereg_write_mtpt(hr_dev, mr, flags, pdn, - mr_access_flags, virt_addr, - length, mailbox->buf); - if (ret) { - ibdev_err(ibdev, "failed to write mtpt, ret = %d.\n", ret); - free_mr_pbl(hr_dev, mr); - } - - return ret; -} - struct ib_mr *hns_roce_rereg_user_mr(struct ib_mr *ibmr, int flags, u64 start, u64 length, u64 virt_addr, int mr_access_flags, struct ib_pd *pd, @@ -338,7 +304,6 @@ struct ib_mr *hns_roce_rereg_user_mr(struct ib_mr *ibmr, int flags, u64 start, struct hns_roce_mr *mr = to_hr_mr(ibmr); struct hns_roce_cmd_mailbox *mailbox; unsigned long mtpt_idx; - u32 pdn = 0; int ret; if (!mr->enabled) @@ -360,23 +325,29 @@ struct ib_mr *hns_roce_rereg_user_mr(struct ib_mr *ibmr, int flags, u64 start, ibdev_warn(ib_dev, "failed to destroy MPT, ret = %d.\n", ret); mr->enabled = 0; + mr->iova = virt_addr; + mr->size = length; if (flags & IB_MR_REREG_PD) - pdn = to_hr_pd(pd)->pdn; + mr->pd = to_hr_pd(pd)->pdn; + + if (flags & IB_MR_REREG_ACCESS) + mr->access = mr_access_flags; if (flags & IB_MR_REREG_TRANS) { - ret = rereg_mr_trans(ibmr, flags, - start, length, - virt_addr, mr_access_flags, - mailbox, pdn, udata); - if (ret) - goto free_cmd_mbox; - } else { - ret = hr_dev->hw->rereg_write_mtpt(hr_dev, mr, flags, pdn, - mr_access_flags, virt_addr, - length, mailbox->buf); - if (ret) + free_mr_pbl(hr_dev, mr); + ret = alloc_mr_pbl(hr_dev, mr, udata, start); + if (ret) { + ibdev_err(ib_dev, "failed to alloc mr PBL, ret = %d.\n", + ret); goto free_cmd_mbox; + } + } + + ret = hr_dev->hw->rereg_write_mtpt(hr_dev, mr, flags, mailbox->buf); + if (ret) { + ibdev_err(ib_dev, "failed to write mtpt, ret = %d.\n", ret); + goto free_cmd_mbox; } ret = hns_roce_hw_create_mpt(hr_dev, mailbox, mtpt_idx); @@ -386,12 +357,6 @@ struct ib_mr *hns_roce_rereg_user_mr(struct ib_mr *ibmr, int flags, u64 start, } mr->enabled = 1; - if (flags & IB_MR_REREG_ACCESS) - mr->access = mr_access_flags; - - hns_roce_free_cmd_mailbox(hr_dev, mailbox); - - return NULL; free_cmd_mbox: hns_roce_free_cmd_mailbox(hr_dev, mailbox); @@ -421,7 +386,6 @@ struct ib_mr *hns_roce_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type, struct hns_roce_dev *hr_dev = to_hr_dev(pd->device); struct device *dev = hr_dev->dev; struct hns_roce_mr *mr; - u64 length; int ret; if (mr_type != IB_MR_TYPE_MEM_REG) @@ -438,14 +402,15 @@ struct ib_mr *hns_roce_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type, return ERR_PTR(-ENOMEM); mr->type = MR_TYPE_FRMR; + mr->pd = to_hr_pd(pd)->pdn; + mr->size = max_num_sg * (1 << PAGE_SHIFT); /* Allocate memory region key */ - length = max_num_sg * (1 << PAGE_SHIFT); - ret = alloc_mr_key(hr_dev, mr, to_hr_pd(pd)->pdn, 0, length, 0); + ret = alloc_mr_key(hr_dev, mr); if (ret) goto err_free; - ret = alloc_mr_pbl(hr_dev, mr, length, NULL, 0, 0); + ret = alloc_mr_pbl(hr_dev, mr, NULL, 0); if (ret) goto err_key; @@ -454,7 +419,7 @@ struct ib_mr *hns_roce_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type, goto err_pbl; mr->ibmr.rkey = mr->ibmr.lkey = mr->key; - mr->ibmr.length = length; + mr->ibmr.length = mr->size; return &mr->ibmr; @@ -631,30 +596,26 @@ int hns_roce_dealloc_mw(struct ib_mw *ibmw) } static int mtr_map_region(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr, - dma_addr_t *pages, struct hns_roce_buf_region *region) + struct hns_roce_buf_region *region, dma_addr_t *pages, + int max_count) { + int count, npage; + int offset, end; __le64 *mtts; - int offset; - int count; - int npage; u64 addr; - int end; int i; - /* if hopnum is 0, buffer cannot store BAs, so skip write mtt */ - if (!region->hopnum) - return 0; - offset = region->offset; end = offset + region->count; npage = 0; - while (offset < end) { + while (offset < end && npage < max_count) { + count = 0; mtts = hns_roce_hem_list_find_mtt(hr_dev, &mtr->hem_list, offset, &count, NULL); if (!mtts) return -ENOBUFS; - for (i = 0; i < count; i++) { + for (i = 0; i < count && npage < max_count; i++) { if (hr_dev->hw_rev == HNS_ROCE_HW_VER1) addr = to_hr_hw_page_addr(pages[npage]); else @@ -666,7 +627,7 @@ static int mtr_map_region(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr, offset += count; } - return 0; + return npage; } static inline bool mtr_has_mtt(struct hns_roce_buf_attr *attr) @@ -729,25 +690,15 @@ static void mtr_free_bufs(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr) } static int mtr_alloc_bufs(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr, - struct hns_roce_buf_attr *buf_attr, bool is_direct, + struct hns_roce_buf_attr *buf_attr, struct ib_udata *udata, unsigned long user_addr) { struct ib_device *ibdev = &hr_dev->ib_dev; - unsigned int best_pg_shift; - int all_pg_count = 0; size_t total_size; - int ret; total_size = mtr_bufs_size(buf_attr); - if (total_size < 1) { - ibdev_err(ibdev, "failed to check mtr size\n."); - return -EINVAL; - } if (udata) { - unsigned long pgsz_bitmap; - unsigned long page_size; - mtr->kmem = NULL; mtr->umem = ib_umem_get(ibdev, user_addr, total_size, buf_attr->user_access); @@ -756,76 +707,67 @@ static int mtr_alloc_bufs(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr, PTR_ERR(mtr->umem)); return -ENOMEM; } - if (buf_attr->fixed_page) - pgsz_bitmap = 1 << buf_attr->page_shift; - else - pgsz_bitmap = GENMASK(buf_attr->page_shift, PAGE_SHIFT); - - page_size = ib_umem_find_best_pgsz(mtr->umem, pgsz_bitmap, - user_addr); - if (!page_size) - return -EINVAL; - best_pg_shift = order_base_2(page_size); - all_pg_count = ib_umem_num_dma_blocks(mtr->umem, page_size); - ret = 0; } else { mtr->umem = NULL; - mtr->kmem = - hns_roce_buf_alloc(hr_dev, total_size, - buf_attr->page_shift, - is_direct ? HNS_ROCE_BUF_DIRECT : 0); + mtr->kmem = hns_roce_buf_alloc(hr_dev, total_size, + buf_attr->page_shift, + mtr->hem_cfg.is_direct ? + HNS_ROCE_BUF_DIRECT : 0); if (IS_ERR(mtr->kmem)) { ibdev_err(ibdev, "failed to alloc kmem, ret = %ld.\n", PTR_ERR(mtr->kmem)); return PTR_ERR(mtr->kmem); } - - best_pg_shift = buf_attr->page_shift; - all_pg_count = mtr->kmem->npages; } - /* must bigger than minimum hardware page shift */ - if (best_pg_shift < HNS_HW_PAGE_SHIFT || all_pg_count < 1) { - ret = -EINVAL; - ibdev_err(ibdev, - "failed to check mtr, page shift = %u count = %d.\n", - best_pg_shift, all_pg_count); - goto err_alloc_mem; - } - - mtr->hem_cfg.buf_pg_shift = best_pg_shift; - mtr->hem_cfg.buf_pg_count = all_pg_count; - return 0; -err_alloc_mem: - mtr_free_bufs(hr_dev, mtr); - return ret; } -static int mtr_get_pages(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr, - dma_addr_t *pages, int count, unsigned int page_shift) +static int mtr_map_bufs(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr, + int page_count, unsigned int page_shift) { struct ib_device *ibdev = &hr_dev->ib_dev; + dma_addr_t *pages; int npage; - int err; + int ret; + + /* alloc a tmp array to store buffer's dma address */ + pages = kvcalloc(page_count, sizeof(dma_addr_t), GFP_KERNEL); + if (!pages) + return -ENOMEM; if (mtr->umem) - npage = hns_roce_get_umem_bufs(hr_dev, pages, count, 0, + npage = hns_roce_get_umem_bufs(hr_dev, pages, page_count, 0, mtr->umem, page_shift); else - npage = hns_roce_get_kmem_bufs(hr_dev, pages, count, 0, + npage = hns_roce_get_kmem_bufs(hr_dev, pages, page_count, 0, mtr->kmem); + if (npage != page_count) { + ibdev_err(ibdev, "failed to get mtr page %d != %d.\n", npage, + page_count); + ret = -ENOBUFS; + goto err_alloc_list; + } + if (mtr->hem_cfg.is_direct && npage > 1) { - err = mtr_check_direct_pages(pages, npage, page_shift); - if (err) { - ibdev_err(ibdev, "Failed to check %s direct page-%d\n", - mtr->umem ? "user" : "kernel", err); - npage = err; + ret = mtr_check_direct_pages(pages, npage, page_shift); + if (ret) { + ibdev_err(ibdev, "failed to check %s mtr, idx = %d.\n", + mtr->umem ? "user" : "kernel", ret); + ret = -ENOBUFS; + goto err_alloc_list; } } - return npage; + ret = hns_roce_mtr_map(hr_dev, mtr, pages, page_count); + if (ret) + ibdev_err(ibdev, "failed to map mtr pages, ret = %d.\n", ret); + +err_alloc_list: + kvfree(pages); + + return ret; } int hns_roce_mtr_map(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr, @@ -833,8 +775,8 @@ int hns_roce_mtr_map(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr, { struct ib_device *ibdev = &hr_dev->ib_dev; struct hns_roce_buf_region *r; - unsigned int i; - int err; + unsigned int i, mapped_cnt; + int ret; /* * Only use the first page address as root ba when hopnum is 0, this @@ -845,26 +787,42 @@ int hns_roce_mtr_map(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr, return 0; } - for (i = 0; i < mtr->hem_cfg.region_count; i++) { + for (i = 0, mapped_cnt = 0; i < mtr->hem_cfg.region_count && + mapped_cnt < page_cnt; i++) { r = &mtr->hem_cfg.region[i]; + /* if hopnum is 0, no need to map pages in this region */ + if (!r->hopnum) { + mapped_cnt += r->count; + continue; + } + if (r->offset + r->count > page_cnt) { - err = -EINVAL; + ret = -EINVAL; ibdev_err(ibdev, "failed to check mtr%u end %u + %u, max %u.\n", i, r->offset, r->count, page_cnt); - return err; + return ret; } - err = mtr_map_region(hr_dev, mtr, &pages[r->offset], r); - if (err) { + ret = mtr_map_region(hr_dev, mtr, r, &pages[r->offset], + page_cnt - mapped_cnt); + if (ret < 0) { ibdev_err(ibdev, "failed to map mtr%u offset %u, ret = %d.\n", - i, r->offset, err); - return err; + i, r->offset, ret); + return ret; } + mapped_cnt += ret; + ret = 0; } - return 0; + if (mapped_cnt < page_cnt) { + ret = -ENOBUFS; + ibdev_err(ibdev, "failed to map mtr pages count: %u < %u.\n", + mapped_cnt, page_cnt); + } + + return ret; } int hns_roce_mtr_find(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr, @@ -928,68 +886,92 @@ int hns_roce_mtr_find(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr, static int mtr_init_buf_cfg(struct hns_roce_dev *hr_dev, struct hns_roce_buf_attr *attr, struct hns_roce_hem_cfg *cfg, - unsigned int *buf_page_shift) + unsigned int *buf_page_shift, int unalinged_size) { struct hns_roce_buf_region *r; + int first_region_padding; + int page_cnt, region_cnt; unsigned int page_shift; - int page_cnt = 0; size_t buf_size; - int region_cnt; + /* If mtt is disabled, all pages must be within a continuous range */ + cfg->is_direct = !mtr_has_mtt(attr); + buf_size = mtr_bufs_size(attr); if (cfg->is_direct) { - buf_size = cfg->buf_pg_count << cfg->buf_pg_shift; - page_cnt = DIV_ROUND_UP(buf_size, HNS_HW_PAGE_SIZE); - /* - * When HEM buffer use level-0 addressing, the page size equals - * the buffer size, and the the page size = 4K * 2^N. + /* When HEM buffer uses 0-level addressing, the page size is + * equal to the whole buffer size, and we split the buffer into + * small pages which is used to check whether the adjacent + * units are in the continuous space and its size is fixed to + * 4K based on hns ROCEE's requirement. */ - cfg->buf_pg_shift = HNS_HW_PAGE_SHIFT + order_base_2(page_cnt); - if (attr->region_count > 1) { - cfg->buf_pg_count = page_cnt; - page_shift = HNS_HW_PAGE_SHIFT; - } else { - cfg->buf_pg_count = 1; - page_shift = cfg->buf_pg_shift; - if (buf_size != 1 << page_shift) { - ibdev_err(&hr_dev->ib_dev, - "failed to check direct size %zu shift %d.\n", - buf_size, page_shift); - return -EINVAL; - } - } + page_shift = HNS_HW_PAGE_SHIFT; + + /* The ROCEE requires the page size to be 4K * 2 ^ N. */ + cfg->buf_pg_count = 1; + cfg->buf_pg_shift = HNS_HW_PAGE_SHIFT + + order_base_2(DIV_ROUND_UP(buf_size, HNS_HW_PAGE_SIZE)); + first_region_padding = 0; } else { - page_shift = cfg->buf_pg_shift; + page_shift = attr->page_shift; + cfg->buf_pg_count = DIV_ROUND_UP(buf_size + unalinged_size, + 1 << page_shift); + cfg->buf_pg_shift = page_shift; + first_region_padding = unalinged_size; } - /* convert buffer size to page index and page count */ - for (page_cnt = 0, region_cnt = 0; page_cnt < cfg->buf_pg_count && - region_cnt < attr->region_count && + /* Convert buffer size to page index and page count for each region and + * the buffer's offset needs to be appended to the first region. + */ + for (page_cnt = 0, region_cnt = 0; region_cnt < attr->region_count && region_cnt < ARRAY_SIZE(cfg->region); region_cnt++) { r = &cfg->region[region_cnt]; r->offset = page_cnt; - buf_size = hr_hw_page_align(attr->region[region_cnt].size); + buf_size = hr_hw_page_align(attr->region[region_cnt].size + + first_region_padding); r->count = DIV_ROUND_UP(buf_size, 1 << page_shift); + first_region_padding = 0; page_cnt += r->count; r->hopnum = to_hr_hem_hopnum(attr->region[region_cnt].hopnum, r->count); } - if (region_cnt < 1) { - ibdev_err(&hr_dev->ib_dev, - "failed to check mtr region count, pages = %d.\n", - cfg->buf_pg_count); - return -ENOBUFS; - } - cfg->region_count = region_cnt; *buf_page_shift = page_shift; return page_cnt; } +static int mtr_alloc_mtt(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr, + unsigned int ba_page_shift) +{ + struct hns_roce_hem_cfg *cfg = &mtr->hem_cfg; + int ret; + + hns_roce_hem_list_init(&mtr->hem_list); + if (!cfg->is_direct) { + ret = hns_roce_hem_list_request(hr_dev, &mtr->hem_list, + cfg->region, cfg->region_count, + ba_page_shift); + if (ret) + return ret; + cfg->root_ba = mtr->hem_list.root_ba; + cfg->ba_pg_shift = ba_page_shift; + } else { + cfg->ba_pg_shift = cfg->buf_pg_shift; + } + + return 0; +} + +static void mtr_free_mtt(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr) +{ + hns_roce_hem_list_release(hr_dev, &mtr->hem_list); +} + /** * hns_roce_mtr_create - Create hns memory translate region. * + * @hr_dev: RoCE device struct pointer * @mtr: memory translate region * @buf_attr: buffer attribute for creating mtr * @ba_page_shift: page shift for multi-hop base address table @@ -1001,95 +983,51 @@ int hns_roce_mtr_create(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr, unsigned int ba_page_shift, struct ib_udata *udata, unsigned long user_addr) { - struct hns_roce_hem_cfg *cfg = &mtr->hem_cfg; struct ib_device *ibdev = &hr_dev->ib_dev; unsigned int buf_page_shift = 0; - dma_addr_t *pages = NULL; - int all_pg_cnt; - int get_pg_cnt; - int ret = 0; + int buf_page_cnt; + int ret; - /* if disable mtt, all pages must in a continuous address range */ - cfg->is_direct = !mtr_has_mtt(buf_attr); + buf_page_cnt = mtr_init_buf_cfg(hr_dev, buf_attr, &mtr->hem_cfg, + &buf_page_shift, + udata ? user_addr & ~PAGE_MASK : 0); + if (buf_page_cnt < 1 || buf_page_shift < HNS_HW_PAGE_SHIFT) { + ibdev_err(ibdev, "failed to init mtr cfg, count %d shift %d.\n", + buf_page_cnt, buf_page_shift); + return -EINVAL; + } - /* if buffer only need mtt, just init the hem cfg */ + ret = mtr_alloc_mtt(hr_dev, mtr, ba_page_shift); + if (ret) { + ibdev_err(ibdev, "failed to alloc mtr mtt, ret = %d.\n", ret); + return ret; + } + + /* The caller has its own buffer list and invokes the hns_roce_mtr_map() + * to finish the MTT configuration. + */ if (buf_attr->mtt_only) { - cfg->buf_pg_shift = buf_attr->page_shift; - cfg->buf_pg_count = mtr_bufs_size(buf_attr) >> - buf_attr->page_shift; mtr->umem = NULL; mtr->kmem = NULL; - } else { - ret = mtr_alloc_bufs(hr_dev, mtr, buf_attr, cfg->is_direct, - udata, user_addr); - if (ret) { - ibdev_err(ibdev, - "failed to alloc mtr bufs, ret = %d.\n", ret); - return ret; - } + return 0; } - all_pg_cnt = mtr_init_buf_cfg(hr_dev, buf_attr, cfg, &buf_page_shift); - if (all_pg_cnt < 1) { - ret = -ENOBUFS; - ibdev_err(ibdev, "failed to init mtr buf cfg.\n"); - goto err_alloc_bufs; + ret = mtr_alloc_bufs(hr_dev, mtr, buf_attr, udata, user_addr); + if (ret) { + ibdev_err(ibdev, "failed to alloc mtr bufs, ret = %d.\n", ret); + goto err_alloc_mtt; } - hns_roce_hem_list_init(&mtr->hem_list); - if (!cfg->is_direct) { - ret = hns_roce_hem_list_request(hr_dev, &mtr->hem_list, - cfg->region, cfg->region_count, - ba_page_shift); - if (ret) { - ibdev_err(ibdev, "failed to request mtr hem, ret = %d.\n", - ret); - goto err_alloc_bufs; - } - cfg->root_ba = mtr->hem_list.root_ba; - cfg->ba_pg_shift = ba_page_shift; - } else { - cfg->ba_pg_shift = cfg->buf_pg_shift; - } - - /* no buffer to map */ - if (buf_attr->mtt_only) + /* Write buffer's dma address to MTT */ + ret = mtr_map_bufs(hr_dev, mtr, buf_page_cnt, buf_page_shift); + if (ret) + ibdev_err(ibdev, "failed to map mtr bufs, ret = %d.\n", ret); + else return 0; - /* alloc a tmp array to store buffer's dma address */ - pages = kvcalloc(all_pg_cnt, sizeof(dma_addr_t), GFP_KERNEL); - if (!pages) { - ret = -ENOMEM; - ibdev_err(ibdev, "failed to alloc mtr page list %d.\n", - all_pg_cnt); - goto err_alloc_hem_list; - } - - get_pg_cnt = mtr_get_pages(hr_dev, mtr, pages, all_pg_cnt, - buf_page_shift); - if (get_pg_cnt != all_pg_cnt) { - ibdev_err(ibdev, "failed to get mtr page %d != %d.\n", - get_pg_cnt, all_pg_cnt); - ret = -ENOBUFS; - goto err_alloc_page_list; - } - - /* write buffer's dma address to BA table */ - ret = hns_roce_mtr_map(hr_dev, mtr, pages, all_pg_cnt); - if (ret) { - ibdev_err(ibdev, "failed to map mtr pages, ret = %d.\n", ret); - goto err_alloc_page_list; - } - - /* drop tmp array */ - kvfree(pages); - return 0; -err_alloc_page_list: - kvfree(pages); -err_alloc_hem_list: - hns_roce_hem_list_release(hr_dev, &mtr->hem_list); -err_alloc_bufs: mtr_free_bufs(hr_dev, mtr); +err_alloc_mtt: + mtr_free_mtt(hr_dev, mtr); return ret; } diff --git a/drivers/infiniband/hw/hns/hns_roce_qp.c b/drivers/infiniband/hw/hns/hns_roce_qp.c index 1116371adf74..004aca9086ab 100644 --- a/drivers/infiniband/hw/hns/hns_roce_qp.c +++ b/drivers/infiniband/hw/hns/hns_roce_qp.c @@ -413,9 +413,32 @@ static void free_qpn(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp) mutex_unlock(&hr_dev->qp_table.bank_mutex); } -static int set_rq_size(struct hns_roce_dev *hr_dev, struct ib_qp_cap *cap, - struct hns_roce_qp *hr_qp, int has_rq) +static u32 proc_rq_sge(struct hns_roce_dev *dev, struct hns_roce_qp *hr_qp, + bool user) { + u32 max_sge = dev->caps.max_rq_sg; + + if (dev->pci_dev->revision >= PCI_REVISION_ID_HIP09) + return max_sge; + + /* Reserve SGEs only for HIP08 in kernel; The userspace driver will + * calculate number of max_sge with reserved SGEs when allocating wqe + * buf, so there is no need to do this again in kernel. But the number + * may exceed the capacity of SGEs recorded in the firmware, so the + * kernel driver should just adapt the value accordingly. + */ + if (user) + max_sge = roundup_pow_of_two(max_sge + 1); + else + hr_qp->rq.rsv_sge = 1; + + return max_sge; +} + +static int set_rq_size(struct hns_roce_dev *hr_dev, struct ib_qp_cap *cap, + struct hns_roce_qp *hr_qp, int has_rq, bool user) +{ + u32 max_sge = proc_rq_sge(hr_dev, hr_qp, user); u32 cnt; /* If srq exist, set zero for relative number of rq */ @@ -431,8 +454,9 @@ static int set_rq_size(struct hns_roce_dev *hr_dev, struct ib_qp_cap *cap, /* Check the validity of QP support capacity */ if (!cap->max_recv_wr || cap->max_recv_wr > hr_dev->caps.max_wqes || - cap->max_recv_sge > hr_dev->caps.max_rq_sg) { - ibdev_err(&hr_dev->ib_dev, "RQ config error, depth=%u, sge=%d\n", + cap->max_recv_sge > max_sge) { + ibdev_err(&hr_dev->ib_dev, + "RQ config error, depth = %u, sge = %u\n", cap->max_recv_wr, cap->max_recv_sge); return -EINVAL; } @@ -444,7 +468,8 @@ static int set_rq_size(struct hns_roce_dev *hr_dev, struct ib_qp_cap *cap, return -EINVAL; } - hr_qp->rq.max_gs = roundup_pow_of_two(max(1U, cap->max_recv_sge)); + hr_qp->rq.max_gs = roundup_pow_of_two(max(1U, cap->max_recv_sge) + + hr_qp->rq.rsv_sge); if (hr_dev->caps.max_rq_sg <= HNS_ROCE_SGE_IN_WQE) hr_qp->rq.wqe_shift = ilog2(hr_dev->caps.max_rq_desc_sz); @@ -459,7 +484,7 @@ static int set_rq_size(struct hns_roce_dev *hr_dev, struct ib_qp_cap *cap, hr_qp->rq_inl_buf.wqe_cnt = 0; cap->max_recv_wr = cnt; - cap->max_recv_sge = hr_qp->rq.max_gs; + cap->max_recv_sge = hr_qp->rq.max_gs - hr_qp->rq.rsv_sge; return 0; } @@ -599,7 +624,6 @@ static int set_wqe_buf_attr(struct hns_roce_dev *hr_dev, return -EINVAL; buf_attr->page_shift = HNS_HW_PAGE_SHIFT + hr_dev->caps.mtt_buf_pg_sz; - buf_attr->fixed_page = true; buf_attr->region_count = idx; return 0; @@ -919,7 +943,7 @@ static int set_qp_param(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp, hr_qp->sq_signal_bits = IB_SIGNAL_REQ_WR; ret = set_rq_size(hr_dev, &init_attr->cap, hr_qp, - hns_roce_qp_has_rq(init_attr)); + hns_roce_qp_has_rq(init_attr), !!udata); if (ret) { ibdev_err(ibdev, "failed to set user RQ size, ret = %d.\n", ret); diff --git a/drivers/infiniband/hw/hns/hns_roce_srq.c b/drivers/infiniband/hw/hns/hns_roce_srq.c index c4ae57e4173a..d5a6de0e7095 100644 --- a/drivers/infiniband/hw/hns/hns_roce_srq.c +++ b/drivers/infiniband/hw/hns/hns_roce_srq.c @@ -3,6 +3,7 @@ * Copyright (c) 2018 Hisilicon Limited. */ +#include #include #include "hns_roce_device.h" #include "hns_roce_cmd.h" @@ -76,40 +77,16 @@ static int hns_roce_hw_destroy_srq(struct hns_roce_dev *dev, HNS_ROCE_CMD_TIMEOUT_MSECS); } -static int alloc_srqc(struct hns_roce_dev *hr_dev, struct hns_roce_srq *srq, - u32 pdn, u32 cqn, u16 xrcd, u64 db_rec_addr) +static int alloc_srqc(struct hns_roce_dev *hr_dev, struct hns_roce_srq *srq) { struct hns_roce_srq_table *srq_table = &hr_dev->srq_table; struct ib_device *ibdev = &hr_dev->ib_dev; struct hns_roce_cmd_mailbox *mailbox; - u64 mtts_wqe[MTT_MIN_COUNT] = { 0 }; - u64 mtts_idx[MTT_MIN_COUNT] = { 0 }; - dma_addr_t dma_handle_wqe = 0; - dma_addr_t dma_handle_idx = 0; int ret; - /* Get the physical address of srq buf */ - ret = hns_roce_mtr_find(hr_dev, &srq->buf_mtr, 0, mtts_wqe, - ARRAY_SIZE(mtts_wqe), &dma_handle_wqe); - if (ret < 1) { - ibdev_err(ibdev, "failed to find mtr for SRQ WQE, ret = %d.\n", - ret); - return -ENOBUFS; - } - - /* Get physical address of idx que buf */ - ret = hns_roce_mtr_find(hr_dev, &srq->idx_que.mtr, 0, mtts_idx, - ARRAY_SIZE(mtts_idx), &dma_handle_idx); - if (ret < 1) { - ibdev_err(ibdev, "failed to find mtr for SRQ idx, ret = %d.\n", - ret); - return -ENOBUFS; - } - ret = hns_roce_bitmap_alloc(&srq_table->bitmap, &srq->srqn); if (ret) { - ibdev_err(ibdev, - "failed to alloc SRQ number, ret = %d.\n", ret); + ibdev_err(ibdev, "failed to alloc SRQ number.\n"); return -ENOMEM; } @@ -127,34 +104,36 @@ static int alloc_srqc(struct hns_roce_dev *hr_dev, struct hns_roce_srq *srq, mailbox = hns_roce_alloc_cmd_mailbox(hr_dev); if (IS_ERR_OR_NULL(mailbox)) { - ret = -ENOMEM; ibdev_err(ibdev, "failed to alloc mailbox for SRQC.\n"); + ret = -ENOMEM; goto err_xa; } - hr_dev->hw->write_srqc(hr_dev, srq, pdn, xrcd, cqn, mailbox->buf, - mtts_wqe, mtts_idx, dma_handle_wqe, - dma_handle_idx); + ret = hr_dev->hw->write_srqc(srq, mailbox->buf); + if (ret) { + ibdev_err(ibdev, "failed to write SRQC.\n"); + goto err_mbox; + } ret = hns_roce_hw_create_srq(hr_dev, mailbox, srq->srqn); - hns_roce_free_cmd_mailbox(hr_dev, mailbox); if (ret) { ibdev_err(ibdev, "failed to config SRQC, ret = %d.\n", ret); - goto err_xa; + goto err_mbox; } - atomic_set(&srq->refcount, 1); - init_completion(&srq->free); - return ret; + hns_roce_free_cmd_mailbox(hr_dev, mailbox); + return 0; + +err_mbox: + hns_roce_free_cmd_mailbox(hr_dev, mailbox); err_xa: xa_erase(&srq_table->xa, srq->srqn); - err_put: hns_roce_table_put(hr_dev, &srq_table->table, srq->srqn); - err_out: hns_roce_bitmap_free(&srq_table->bitmap, srq->srqn, BITMAP_NO_RR); + return ret; } @@ -178,46 +157,13 @@ static void free_srqc(struct hns_roce_dev *hr_dev, struct hns_roce_srq *srq) hns_roce_bitmap_free(&srq_table->bitmap, srq->srqn, BITMAP_NO_RR); } -static int alloc_srq_buf(struct hns_roce_dev *hr_dev, struct hns_roce_srq *srq, - struct ib_udata *udata, unsigned long addr) -{ - struct ib_device *ibdev = &hr_dev->ib_dev; - struct hns_roce_buf_attr buf_attr = {}; - int err; - - srq->wqe_shift = ilog2(roundup_pow_of_two(max(HNS_ROCE_SGE_SIZE, - HNS_ROCE_SGE_SIZE * - srq->max_gs))); - - buf_attr.page_shift = hr_dev->caps.srqwqe_buf_pg_sz + HNS_HW_PAGE_SHIFT; - buf_attr.region[0].size = to_hr_hem_entries_size(srq->wqe_cnt, - srq->wqe_shift); - buf_attr.region[0].hopnum = hr_dev->caps.srqwqe_hop_num; - buf_attr.region_count = 1; - buf_attr.fixed_page = true; - - err = hns_roce_mtr_create(hr_dev, &srq->buf_mtr, &buf_attr, - hr_dev->caps.srqwqe_ba_pg_sz + - HNS_HW_PAGE_SHIFT, udata, addr); - if (err) - ibdev_err(ibdev, - "failed to alloc SRQ buf mtr, ret = %d.\n", err); - - return err; -} - -static void free_srq_buf(struct hns_roce_dev *hr_dev, struct hns_roce_srq *srq) -{ - hns_roce_mtr_destroy(hr_dev, &srq->buf_mtr); -} - static int alloc_srq_idx(struct hns_roce_dev *hr_dev, struct hns_roce_srq *srq, struct ib_udata *udata, unsigned long addr) { struct hns_roce_idx_que *idx_que = &srq->idx_que; struct ib_device *ibdev = &hr_dev->ib_dev; struct hns_roce_buf_attr buf_attr = {}; - int err; + int ret; srq->idx_que.entry_shift = ilog2(HNS_ROCE_IDX_QUE_ENTRY_SZ); @@ -226,31 +172,33 @@ static int alloc_srq_idx(struct hns_roce_dev *hr_dev, struct hns_roce_srq *srq, srq->idx_que.entry_shift); buf_attr.region[0].hopnum = hr_dev->caps.idx_hop_num; buf_attr.region_count = 1; - buf_attr.fixed_page = true; - err = hns_roce_mtr_create(hr_dev, &idx_que->mtr, &buf_attr, + ret = hns_roce_mtr_create(hr_dev, &idx_que->mtr, &buf_attr, hr_dev->caps.idx_ba_pg_sz + HNS_HW_PAGE_SHIFT, udata, addr); - if (err) { + if (ret) { ibdev_err(ibdev, - "failed to alloc SRQ idx mtr, ret = %d.\n", err); - return err; + "failed to alloc SRQ idx mtr, ret = %d.\n", ret); + return ret; } if (!udata) { idx_que->bitmap = bitmap_zalloc(srq->wqe_cnt, GFP_KERNEL); if (!idx_que->bitmap) { ibdev_err(ibdev, "failed to alloc SRQ idx bitmap.\n"); - err = -ENOMEM; + ret = -ENOMEM; goto err_idx_mtr; } } + idx_que->head = 0; + idx_que->tail = 0; + return 0; err_idx_mtr: hns_roce_mtr_destroy(hr_dev, &idx_que->mtr); - return err; + return ret; } static void free_srq_idx(struct hns_roce_dev *hr_dev, struct hns_roce_srq *srq) @@ -262,10 +210,42 @@ static void free_srq_idx(struct hns_roce_dev *hr_dev, struct hns_roce_srq *srq) hns_roce_mtr_destroy(hr_dev, &idx_que->mtr); } +static int alloc_srq_wqe_buf(struct hns_roce_dev *hr_dev, + struct hns_roce_srq *srq, + struct ib_udata *udata, unsigned long addr) +{ + struct ib_device *ibdev = &hr_dev->ib_dev; + struct hns_roce_buf_attr buf_attr = {}; + int ret; + + srq->wqe_shift = ilog2(roundup_pow_of_two(max(HNS_ROCE_SGE_SIZE, + HNS_ROCE_SGE_SIZE * + srq->max_gs))); + + buf_attr.page_shift = hr_dev->caps.srqwqe_buf_pg_sz + HNS_HW_PAGE_SHIFT; + buf_attr.region[0].size = to_hr_hem_entries_size(srq->wqe_cnt, + srq->wqe_shift); + buf_attr.region[0].hopnum = hr_dev->caps.srqwqe_hop_num; + buf_attr.region_count = 1; + + ret = hns_roce_mtr_create(hr_dev, &srq->buf_mtr, &buf_attr, + hr_dev->caps.srqwqe_ba_pg_sz + + HNS_HW_PAGE_SHIFT, udata, addr); + if (ret) + ibdev_err(ibdev, + "failed to alloc SRQ buf mtr, ret = %d.\n", ret); + + return ret; +} + +static void free_srq_wqe_buf(struct hns_roce_dev *hr_dev, + struct hns_roce_srq *srq) +{ + hns_roce_mtr_destroy(hr_dev, &srq->buf_mtr); +} + static int alloc_srq_wrid(struct hns_roce_dev *hr_dev, struct hns_roce_srq *srq) { - srq->head = 0; - srq->tail = srq->wqe_cnt - 1; srq->wrid = kvmalloc_array(srq->wqe_cnt, sizeof(u64), GFP_KERNEL); if (!srq->wrid) return -ENOMEM; @@ -279,6 +259,126 @@ static void free_srq_wrid(struct hns_roce_srq *srq) srq->wrid = NULL; } +static u32 proc_srq_sge(struct hns_roce_dev *dev, struct hns_roce_srq *hr_srq, + bool user) +{ + u32 max_sge = dev->caps.max_srq_sges; + + if (dev->pci_dev->revision >= PCI_REVISION_ID_HIP09) + return max_sge; + + /* Reserve SGEs only for HIP08 in kernel; The userspace driver will + * calculate number of max_sge with reserved SGEs when allocating wqe + * buf, so there is no need to do this again in kernel. But the number + * may exceed the capacity of SGEs recorded in the firmware, so the + * kernel driver should just adapt the value accordingly. + */ + if (user) + max_sge = roundup_pow_of_two(max_sge + 1); + else + hr_srq->rsv_sge = 1; + + return max_sge; +} + +static int set_srq_basic_param(struct hns_roce_srq *srq, + struct ib_srq_init_attr *init_attr, + struct ib_udata *udata) +{ + struct hns_roce_dev *hr_dev = to_hr_dev(srq->ibsrq.device); + struct ib_srq_attr *attr = &init_attr->attr; + u32 max_sge; + + max_sge = proc_srq_sge(hr_dev, srq, !!udata); + if (attr->max_wr > hr_dev->caps.max_srq_wrs || + attr->max_sge > max_sge) { + ibdev_err(&hr_dev->ib_dev, + "invalid SRQ attr, depth = %u, sge = %u.\n", + attr->max_wr, attr->max_sge); + return -EINVAL; + } + + attr->max_wr = max_t(u32, attr->max_wr, HNS_ROCE_MIN_SRQ_WQE_NUM); + srq->wqe_cnt = roundup_pow_of_two(attr->max_wr); + srq->max_gs = roundup_pow_of_two(attr->max_sge + srq->rsv_sge); + + attr->max_wr = srq->wqe_cnt; + attr->max_sge = srq->max_gs - srq->rsv_sge; + attr->srq_limit = 0; + + return 0; +} + +static void set_srq_ext_param(struct hns_roce_srq *srq, + struct ib_srq_init_attr *init_attr) +{ + srq->cqn = ib_srq_has_cq(init_attr->srq_type) ? + to_hr_cq(init_attr->ext.cq)->cqn : 0; +} + +static int set_srq_param(struct hns_roce_srq *srq, + struct ib_srq_init_attr *init_attr, + struct ib_udata *udata) +{ + int ret; + + ret = set_srq_basic_param(srq, init_attr, udata); + if (ret) + return ret; + + set_srq_ext_param(srq, init_attr); + + return 0; +} + +static int alloc_srq_buf(struct hns_roce_dev *hr_dev, struct hns_roce_srq *srq, + struct ib_udata *udata) +{ + struct hns_roce_ib_create_srq ucmd = {}; + int ret; + + if (udata) { + ret = ib_copy_from_udata(&ucmd, udata, + min(udata->inlen, sizeof(ucmd))); + if (ret) { + ibdev_err(&hr_dev->ib_dev, + "failed to copy SRQ udata, ret = %d.\n", + ret); + return ret; + } + } + + ret = alloc_srq_idx(hr_dev, srq, udata, ucmd.que_addr); + if (ret) + return ret; + + ret = alloc_srq_wqe_buf(hr_dev, srq, udata, ucmd.buf_addr); + if (ret) + goto err_idx; + + if (!udata) { + ret = alloc_srq_wrid(hr_dev, srq); + if (ret) + goto err_wqe_buf; + } + + return 0; + +err_wqe_buf: + free_srq_wqe_buf(hr_dev, srq); +err_idx: + free_srq_idx(hr_dev, srq); + + return ret; +} + +static void free_srq_buf(struct hns_roce_dev *hr_dev, struct hns_roce_srq *srq) +{ + free_srq_wrid(srq); + free_srq_wqe_buf(hr_dev, srq); + free_srq_idx(hr_dev, srq); +} + int hns_roce_create_srq(struct ib_srq *ib_srq, struct ib_srq_init_attr *init_attr, struct ib_udata *udata) @@ -286,89 +386,44 @@ int hns_roce_create_srq(struct ib_srq *ib_srq, struct hns_roce_dev *hr_dev = to_hr_dev(ib_srq->device); struct hns_roce_ib_create_srq_resp resp = {}; struct hns_roce_srq *srq = to_hr_srq(ib_srq); - struct ib_device *ibdev = &hr_dev->ib_dev; - struct hns_roce_ib_create_srq ucmd = {}; int ret; - u32 cqn; - - if (init_attr->srq_type != IB_SRQT_BASIC && - init_attr->srq_type != IB_SRQT_XRC) - return -EOPNOTSUPP; - - /* Check the actual SRQ wqe and SRQ sge num */ - if (init_attr->attr.max_wr >= hr_dev->caps.max_srq_wrs || - init_attr->attr.max_sge > hr_dev->caps.max_srq_sges) - return -EINVAL; mutex_init(&srq->mutex); spin_lock_init(&srq->lock); - srq->wqe_cnt = roundup_pow_of_two(init_attr->attr.max_wr + 1); - srq->max_gs = init_attr->attr.max_sge; - - if (udata) { - ret = ib_copy_from_udata(&ucmd, udata, - min(udata->inlen, sizeof(ucmd))); - if (ret) { - ibdev_err(ibdev, "failed to copy SRQ udata, ret = %d.\n", - ret); - return ret; - } - } - - ret = alloc_srq_buf(hr_dev, srq, udata, ucmd.buf_addr); - if (ret) { - ibdev_err(ibdev, - "failed to alloc SRQ buffer, ret = %d.\n", ret); + ret = set_srq_param(srq, init_attr, udata); + if (ret) return ret; - } - ret = alloc_srq_idx(hr_dev, srq, udata, ucmd.que_addr); - if (ret) { - ibdev_err(ibdev, "failed to alloc SRQ idx, ret = %d.\n", ret); - goto err_buf_alloc; - } + ret = alloc_srq_buf(hr_dev, srq, udata); + if (ret) + return ret; - if (!udata) { - ret = alloc_srq_wrid(hr_dev, srq); - if (ret) { - ibdev_err(ibdev, "failed to alloc SRQ wrid, ret = %d.\n", - ret); - goto err_idx_alloc; + ret = alloc_srqc(hr_dev, srq); + if (ret) + goto err_srq_buf; + + if (udata) { + resp.srqn = srq->srqn; + if (ib_copy_to_udata(udata, &resp, + min(udata->outlen, sizeof(resp)))) { + ret = -EFAULT; + goto err_srqc; } } - cqn = ib_srq_has_cq(init_attr->srq_type) ? - to_hr_cq(init_attr->ext.cq)->cqn : 0; srq->db_reg_l = hr_dev->reg_base + SRQ_DB_REG; - - ret = alloc_srqc(hr_dev, srq, to_hr_pd(ib_srq->pd)->pdn, cqn, 0, 0); - if (ret) { - ibdev_err(ibdev, - "failed to alloc SRQ context, ret = %d.\n", ret); - goto err_wrid_alloc; - } - srq->event = hns_roce_ib_srq_event; - resp.srqn = srq->srqn; - - if (udata) { - ret = ib_copy_to_udata(udata, &resp, - min(udata->outlen, sizeof(resp))); - if (ret) - goto err_srqc_alloc; - } + atomic_set(&srq->refcount, 1); + init_completion(&srq->free); return 0; -err_srqc_alloc: +err_srqc: free_srqc(hr_dev, srq); -err_wrid_alloc: - free_srq_wrid(srq); -err_idx_alloc: - free_srq_idx(hr_dev, srq); -err_buf_alloc: +err_srq_buf: free_srq_buf(hr_dev, srq); + return ret; } @@ -378,8 +433,6 @@ int hns_roce_destroy_srq(struct ib_srq *ibsrq, struct ib_udata *udata) struct hns_roce_srq *srq = to_hr_srq(ibsrq); free_srqc(hr_dev, srq); - free_srq_idx(hr_dev, srq); - free_srq_wrid(srq); free_srq_buf(hr_dev, srq); return 0; } diff --git a/drivers/infiniband/hw/i40iw/i40iw_cm.c b/drivers/infiniband/hw/i40iw/i40iw_cm.c index 9acc0ecc9a43..ac65c8237b2e 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_cm.c +++ b/drivers/infiniband/hw/i40iw/i40iw_cm.c @@ -70,7 +70,7 @@ static void i40iw_disconnect_worker(struct work_struct *work); /** * i40iw_free_sqbuf - put back puda buffer if refcount = 0 * @vsi: pointer to vsi structure - * @buf: puda buffer to free + * @bufp: puda buffer to free */ void i40iw_free_sqbuf(struct i40iw_sc_vsi *vsi, void *bufp) { @@ -729,6 +729,7 @@ static int i40iw_handle_tcp_options(struct i40iw_cm_node *cm_node, /** * i40iw_build_mpa_v1 - build a MPA V1 frame * @cm_node: connection's node + * @start_addr: MPA frame start address * @mpa_key: to do read0 or write0 */ static void i40iw_build_mpa_v1(struct i40iw_cm_node *cm_node, @@ -1040,7 +1041,7 @@ static int i40iw_parse_mpa(struct i40iw_cm_node *cm_node, u8 *buffer, u32 *type, /** * i40iw_schedule_cm_timer - * @@cm_node: connection's node + * @cm_node: connection's node * @sqbuf: buffer to send * @type: if it is send or close * @send_retrans: if rexmits to be done @@ -1205,7 +1206,7 @@ static void i40iw_build_timer_list(struct list_head *timer_list, /** * i40iw_cm_timer_tick - system's timer expired callback - * @pass: Pointing to cm_core + * @t: Timer instance to fetch the cm_core pointer from */ static void i40iw_cm_timer_tick(struct timer_list *t) { @@ -1463,6 +1464,7 @@ struct i40iw_cm_node *i40iw_find_node(struct i40iw_cm_core *cm_core, * @cm_core: cm's core * @dst_port: listener tcp port num * @dst_addr: listener ip addr + * @vlan_id: vlan id for the given address * @listener_state: state to match with listen node's */ static struct i40iw_cm_listener *i40iw_find_listener( @@ -1521,7 +1523,7 @@ static void i40iw_add_hte_node(struct i40iw_cm_core *cm_core, /** * i40iw_find_port - find port that matches reference port * @hte: ptr to accelerated or non-accelerated list - * @accelerated_list: flag for accelerated vs non-accelerated list + * @port: port number to locate */ static bool i40iw_find_port(struct list_head *hte, u16 port) { @@ -1834,6 +1836,7 @@ static enum i40iw_status_code i40iw_add_mqh_4( /** * i40iw_dec_refcnt_listen - delete listener and associated cm nodes * @cm_core: cm's core + * @listener: passive connection's listener * @free_hanging_nodes: to free associated cm_nodes * @apbvt_del: flag to delete the apbvt */ @@ -2029,7 +2032,7 @@ static int i40iw_addr_resolve_neigh(struct i40iw_device *iwdev, return rc; } -/** +/* * i40iw_get_dst_ipv6 */ static struct dst_entry *i40iw_get_dst_ipv6(struct sockaddr_in6 *src_addr, @@ -2051,7 +2054,8 @@ static struct dst_entry *i40iw_get_dst_ipv6(struct sockaddr_in6 *src_addr, /** * i40iw_addr_resolve_neigh_ipv6 - resolve neighbor ipv6 address * @iwdev: iwarp device structure - * @dst_ip: remote ip address + * @src: source ip address + * @dest: remote ip address * @arpindex: if there is an arp entry */ static int i40iw_addr_resolve_neigh_ipv6(struct i40iw_device *iwdev, @@ -3004,7 +3008,7 @@ static struct i40iw_cm_node *i40iw_create_cm_node( /** * i40iw_cm_reject - reject and teardown a connection * @cm_node: connection's node - * @pdate: ptr to private data for reject + * @pdata: ptr to private data for reject * @plen: size of private data */ static int i40iw_cm_reject(struct i40iw_cm_node *cm_node, const void *pdata, u8 plen) @@ -4302,7 +4306,7 @@ static void i40iw_qhash_ctrl(struct i40iw_device *iwdev, * i40iw_cm_teardown_connections - teardown QPs * @iwdev: device pointer * @ipaddr: Pointer to IPv4 or IPv6 address - * @ipv4: flag indicating IPv4 when true + * @nfo: cm info node * @disconnect_all: flag indicating disconnect all QPs * teardown QPs where source or destination addr matches ip addr */ @@ -4358,6 +4362,7 @@ void i40iw_cm_teardown_connections(struct i40iw_device *iwdev, u32 *ipaddr, /** * i40iw_ifdown_notify - process an ifdown on an interface * @iwdev: device pointer + * @netdev: network interface device structure * @ipaddr: Pointer to IPv4 or IPv6 address * @ipv4: flag indicating IPv4 when true * @ifup: flag indicating interface up when true diff --git a/drivers/infiniband/hw/i40iw/i40iw_ctrl.c b/drivers/infiniband/hw/i40iw/i40iw_ctrl.c index c943d491b72b..eaea5d545eb8 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_ctrl.c +++ b/drivers/infiniband/hw/i40iw/i40iw_ctrl.c @@ -181,7 +181,7 @@ static enum i40iw_status_code i40iw_sc_parse_fpm_commit_buf( * i40iw_sc_decode_fpm_query() - Decode a 64 bit value into max count and size * @buf: ptr to fpm query buffer * @buf_idx: index into buf - * @info: ptr to i40iw_hmc_obj_info struct + * @obj_info: ptr to i40iw_hmc_obj_info struct * @rsrc_idx: resource index into info * * Decode a 64 bit value from fpm query buffer into max count and size @@ -205,7 +205,7 @@ static u64 i40iw_sc_decode_fpm_query(u64 *buf, /** * i40iw_sc_parse_fpm_query_buf() - parses fpm query buffer * @buf: ptr to fpm query buffer - * @info: ptr to i40iw_hmc_obj_info struct + * @hmc_info: ptr to i40iw_hmc_obj_info struct * @hmc_fpm_misc: ptr to fpm data * * parses fpm query buffer and copy max_cnt and @@ -775,7 +775,7 @@ static enum i40iw_status_code i40iw_sc_ccq_get_cqe_info( * i40iw_sc_poll_for_cqp_op_done - Waits for last write to complete in CQP SQ * @cqp: struct for cqp hw * @op_code: cqp opcode for completion - * @info: completion q entry to return + * @compl_info: completion q entry to return */ static enum i40iw_status_code i40iw_sc_poll_for_cqp_op_done( struct i40iw_sc_cqp *cqp, @@ -933,7 +933,7 @@ static enum i40iw_status_code i40iw_sc_commit_fpm_values_done(struct i40iw_sc_cq * @cqp: struct for cqp hw * @scratch: u64 saved to be used during cqp completion * @hmc_fn_id: hmc function id - * @commit_fpm_mem; Memory for fpm values + * @commit_fpm_mem: Memory for fpm values * @post_sq: flag for cqp db to ring * @wait_type: poll ccq or cqp registers for cqp completion */ @@ -1026,7 +1026,7 @@ i40iw_sc_query_rdma_features(struct i40iw_sc_cqp *cqp, /** * i40iw_get_rdma_features - get RDMA features - * @dev - sc device struct + * @dev: sc device struct */ enum i40iw_status_code i40iw_get_rdma_features(struct i40iw_sc_dev *dev) { @@ -1456,7 +1456,7 @@ static enum i40iw_status_code i40iw_sc_add_local_mac_ipaddr_entry( * @cqp: struct for cqp hw * @scratch: u64 saved to be used during cqp completion * @entry_idx: index of mac entry - * @ ignore_ref_count: to force mac adde delete + * @ignore_ref_count: to force mac adde delete * @post_sq: flag for cqp db to ring */ static enum i40iw_status_code i40iw_sc_del_local_mac_ipaddr_entry( @@ -2304,7 +2304,7 @@ static enum i40iw_status_code i40iw_sc_cq_destroy(struct i40iw_sc_cq *cq, * i40iw_sc_cq_modify - modify a Completion Queue * @cq: cq struct * @info: modification info struct - * @scratch: + * @scratch: u64 saved to be used during cqp completion * @post_sq: flag to post to sq */ static enum i40iw_status_code i40iw_sc_cq_modify(struct i40iw_sc_cq *cq, @@ -3673,7 +3673,7 @@ static enum i40iw_status_code i40iw_sc_configure_iw_fpm(struct i40iw_sc_dev *dev /** * cqp_sds_wqe_fill - fill cqp wqe doe sd * @cqp: struct for cqp hw - * @info; sd info for wqe + * @info: sd info for wqe * @scratch: u64 saved to be used during cqp completion */ static enum i40iw_status_code cqp_sds_wqe_fill(struct i40iw_sc_cqp *cqp, @@ -4884,7 +4884,7 @@ void i40iw_hw_stats_init(struct i40iw_vsi_pestat *stats, u8 fcn_idx, bool is_pf) /** * i40iw_hw_stats_read_32 - Read 32-bit HW stats counters and accommodates for roll-overs. - * @stat: pestat struct + * @stats: pestat struct * @index: index in HW stats table which contains offset reg-addr * @value: hw stats value */ diff --git a/drivers/infiniband/hw/i40iw/i40iw_hmc.c b/drivers/infiniband/hw/i40iw/i40iw_hmc.c index 5484cbf55f0f..8bd72af9e099 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_hmc.c +++ b/drivers/infiniband/hw/i40iw/i40iw_hmc.c @@ -46,7 +46,7 @@ * i40iw_find_sd_index_limit - finds segment descriptor index limit * @hmc_info: pointer to the HMC configuration information structure * @type: type of HMC resources we're searching - * @index: starting index for the object + * @idx: starting index for the object * @cnt: number of objects we're trying to create * @sd_idx: pointer to return index of the segment descriptor in question * @sd_limit: pointer to return the maximum number of segment descriptors @@ -78,7 +78,7 @@ static inline void i40iw_find_sd_index_limit(struct i40iw_hmc_info *hmc_info, * @type: HMC resource type we're examining * @idx: starting index for the object * @cnt: number of objects we're trying to create - * @pd_index: pointer to return page descriptor index + * @pd_idx: pointer to return page descriptor index * @pd_limit: pointer to return page descriptor index limit * * Calculates the page descriptor index and index limit for the resource diff --git a/drivers/infiniband/hw/i40iw/i40iw_hw.c b/drivers/infiniband/hw/i40iw/i40iw_hw.c index 56fdc161f6f8..d167ac10c751 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_hw.c +++ b/drivers/infiniband/hw/i40iw/i40iw_hw.c @@ -165,7 +165,7 @@ static void i40iw_cqp_ce_handler(struct i40iw_device *iwdev, struct i40iw_sc_cq /** * i40iw_iwarp_ce_handler - handle iwarp completions * @iwdev: iwarp device - * @iwcp: iwarp cq receiving event + * @iwcq: iwarp cq receiving event */ static void i40iw_iwarp_ce_handler(struct i40iw_device *iwdev, struct i40iw_sc_cq *iwcq) @@ -519,6 +519,7 @@ enum i40iw_status_code i40iw_manage_apbvt(struct i40iw_device *iwdev, * @iwdev: iwarp device * @mac_addr: mac address ptr * @ip_addr: ip addr for arp cache + * @ipv4: flag indicating IPv4 when true * @action: add, delete or modify */ void i40iw_manage_arp_cache(struct i40iw_device *iwdev, @@ -581,7 +582,6 @@ static void i40iw_send_syn_cqp_callback(struct i40iw_cqp_request *cqp_request, u * @mtype: type of qhash * @cmnode: cmnode associated with connection * @wait: wait for completion - * @user_pri:user pri of the connection */ enum i40iw_status_code i40iw_manage_qhash(struct i40iw_device *iwdev, struct i40iw_cm_info *cminfo, diff --git a/drivers/infiniband/hw/i40iw/i40iw_main.c b/drivers/infiniband/hw/i40iw/i40iw_main.c index 584932d3cc44..ab4cb11950dc 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_main.c +++ b/drivers/infiniband/hw/i40iw/i40iw_main.c @@ -186,7 +186,7 @@ static void i40iw_enable_intr(struct i40iw_sc_dev *dev, u32 msix_id) /** * i40iw_dpc - tasklet for aeq and ceq 0 - * @data: iwarp device + * @t: Timer context to fetch pointer to iwarp device */ static void i40iw_dpc(struct tasklet_struct *t) { @@ -200,7 +200,7 @@ static void i40iw_dpc(struct tasklet_struct *t) /** * i40iw_ceq_dpc - dpc handler for CEQ - * @data: data points to CEQ + * @t: Timer context to fetch pointer to CEQ data */ static void i40iw_ceq_dpc(struct tasklet_struct *t) { @@ -227,7 +227,7 @@ static irqreturn_t i40iw_irq_handler(int irq, void *data) /** * i40iw_destroy_cqp - destroy control qp * @iwdev: iwarp device - * @create_done: 1 if cqp create poll was success + * @free_hwcqp: 1 if CQP should be destroyed * * Issue destroy cqp request and * free the resources associated with the cqp @@ -253,7 +253,7 @@ static void i40iw_destroy_cqp(struct i40iw_device *iwdev, bool free_hwcqp) /** * i40iw_disable_irqs - disable device interrupts * @dev: hardware control device structure - * @msic_vec: msix vector to disable irq + * @msix_vec: msix vector to disable irq * @dev_id: parameter to pass to free_irq (used during irq setup) * * The function is called when destroying aeq/ceq @@ -394,8 +394,9 @@ static enum i40iw_hmc_rsrc_type iw_hmc_obj_types[] = { /** * i40iw_close_hmc_objects_type - delete hmc objects of a given type - * @iwdev: iwarp device + * @dev: iwarp device * @obj_type: the hmc object type to be deleted + * @hmc_info: pointer to the HMC configuration information * @is_pf: true if the function is PF otherwise false * @reset: true if called before reset */ @@ -437,6 +438,7 @@ static void i40iw_del_hmc_objects(struct i40iw_sc_dev *dev, /** * i40iw_ceq_handler - interrupt handler for ceq + * @irq: interrupt request number * @data: ceq pointer */ static irqreturn_t i40iw_ceq_handler(int irq, void *data) @@ -1777,6 +1779,7 @@ static void i40iw_l2param_change(struct i40e_info *ldev, struct i40e_client *cli /** * i40iw_close - client interface operation close for iwarp/uda device * @ldev: lan device information + * @reset: true if called before reset * @client: client to close * * Called by the lan driver during the processing of client unregister diff --git a/drivers/infiniband/hw/i40iw/i40iw_pble.c b/drivers/infiniband/hw/i40iw/i40iw_pble.c index 5f97643e22e5..53e5cd1a2bd6 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_pble.c +++ b/drivers/infiniband/hw/i40iw/i40iw_pble.c @@ -54,6 +54,7 @@ static void i40iw_free_vmalloc_mem(struct i40iw_hw *hw, struct i40iw_chunk *chun /** * i40iw_destroy_pble_pool - destroy pool during module unload + * @dev: i40iw_sc_dev struct * @pble_rsrc: pble resources */ void i40iw_destroy_pble_pool(struct i40iw_sc_dev *dev, struct i40iw_hmc_pble_rsrc *pble_rsrc) @@ -112,8 +113,8 @@ enum i40iw_status_code i40iw_hmc_init_pble(struct i40iw_sc_dev *dev, /** * get_sd_pd_idx - Returns sd index, pd index and rel_pd_idx from fpm address - * @ pble_rsrc: structure containing fpm address - * @ idx: where to return indexes + * @pble_rsrc: structure containing fpm address + * @idx: where to return indexes */ static inline void get_sd_pd_idx(struct i40iw_hmc_pble_rsrc *pble_rsrc, struct sd_pd_idx *idx) diff --git a/drivers/infiniband/hw/i40iw/i40iw_puda.c b/drivers/infiniband/hw/i40iw/i40iw_puda.c index 924be4b03c9a..d1c8cc0a6236 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_puda.c +++ b/drivers/infiniband/hw/i40iw/i40iw_puda.c @@ -511,7 +511,8 @@ static void i40iw_puda_qp_setctx(struct i40iw_puda_rsrc *rsrc) /** * i40iw_puda_qp_wqe - setup wqe for qp create - * @rsrc: resource for qp + * @dev: iwarp device + * @qp: resource for qp */ static enum i40iw_status_code i40iw_puda_qp_wqe(struct i40iw_sc_dev *dev, struct i40iw_sc_qp *qp) { @@ -623,7 +624,8 @@ static enum i40iw_status_code i40iw_puda_qp_create(struct i40iw_puda_rsrc *rsrc) /** * i40iw_puda_cq_wqe - setup wqe for cq create - * @rsrc: resource for cq + * @dev: iwarp device + * @cq: cq to setup */ static enum i40iw_status_code i40iw_puda_cq_wqe(struct i40iw_sc_dev *dev, struct i40iw_sc_cq *cq) { @@ -782,7 +784,7 @@ static void i40iw_puda_free_cq(struct i40iw_puda_rsrc *rsrc) /** * i40iw_puda_dele_resources - delete all resources during close - * @dev: iwarp device + * @vsi: pointer to vsi structure * @type: type of resource to dele * @reset: true if reset chip */ @@ -876,7 +878,7 @@ static enum i40iw_status_code i40iw_puda_allocbufs(struct i40iw_puda_rsrc *rsrc, /** * i40iw_puda_create_rsrc - create resouce (ilq or ieq) - * @dev: iwarp device + * @vsi: pointer to vsi structure * @info: resource information */ enum i40iw_status_code i40iw_puda_create_rsrc(struct i40iw_sc_vsi *vsi, @@ -1121,6 +1123,7 @@ static void i40iw_ieq_compl_pfpdu(struct i40iw_puda_rsrc *ieq, /** * i40iw_ieq_create_pbufl - create buffer list for single fpdu + * @pfpdu: partial management per user qp * @rxlist: resource list for receive ieq buffes * @pbufl: temp. list for buffers for fpddu * @buf: first receive buffer @@ -1434,7 +1437,7 @@ static void i40iw_ieq_handle_exception(struct i40iw_puda_rsrc *ieq, /** * i40iw_ieq_receive - received exception buffer - * @dev: iwarp device + * @vsi: pointer to vsi structure * @buf: exception buffer received */ static void i40iw_ieq_receive(struct i40iw_sc_vsi *vsi, diff --git a/drivers/infiniband/hw/i40iw/i40iw_uk.c b/drivers/infiniband/hw/i40iw/i40iw_uk.c index c3633c9944db..f521be16bf31 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_uk.c +++ b/drivers/infiniband/hw/i40iw/i40iw_uk.c @@ -119,6 +119,8 @@ void i40iw_qp_post_wr(struct i40iw_qp_uk *qp) * @qp: hw qp ptr * @wqe_idx: return wqe index * @wqe_size: size of sq wqe + * @total_size: work request length + * @wr_id: work request id */ u64 *i40iw_qp_get_next_send_wqe(struct i40iw_qp_uk *qp, u32 *wqe_idx, @@ -717,7 +719,6 @@ static enum i40iw_status_code i40iw_cq_post_entries(struct i40iw_cq_uk *cq, * i40iw_cq_poll_completion - get cq completion info * @cq: hw cq * @info: cq poll information returned - * @post_cq: update cq tail */ static enum i40iw_status_code i40iw_cq_poll_completion(struct i40iw_cq_uk *cq, struct i40iw_cq_poll_info *info) @@ -1051,7 +1052,7 @@ void i40iw_device_init_uk(struct i40iw_dev_uk *dev) /** * i40iw_clean_cq - clean cq entries - * @ queue completion context + * @queue: completion context * @cq: cq to clean */ void i40iw_clean_cq(void *queue, struct i40iw_cq_uk *cq) diff --git a/drivers/infiniband/hw/i40iw/i40iw_utils.c b/drivers/infiniband/hw/i40iw/i40iw_utils.c index 644f8c641aa0..76f052b12c14 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_utils.c +++ b/drivers/infiniband/hw/i40iw/i40iw_utils.c @@ -55,6 +55,7 @@ * i40iw_arp_table - manage arp table * @iwdev: iwarp device * @ip_addr: ip address for device + * @ipv4: flag indicating IPv4 when true * @mac_addr: mac address ptr * @action: modify, delete or add */ @@ -138,7 +139,7 @@ inline u32 i40iw_rd32(struct i40iw_hw *hw, u32 reg) /** * i40iw_inetaddr_event - system notifier for ipv4 addr events - * @notfier: not used + * @notifier: not used * @event: event for notifier * @ptr: if address */ @@ -214,7 +215,7 @@ int i40iw_inetaddr_event(struct notifier_block *notifier, /** * i40iw_inet6addr_event - system notifier for ipv6 addr events - * @notfier: not used + * @notifier: not used * @event: event for notifier * @ptr: if address */ @@ -265,7 +266,7 @@ int i40iw_inet6addr_event(struct notifier_block *notifier, /** * i40iw_net_event - system notifier for netevents - * @notfier: not used + * @notifier: not used * @event: event for notifier * @ptr: neighbor */ @@ -310,7 +311,7 @@ int i40iw_net_event(struct notifier_block *notifier, unsigned long event, void * /** * i40iw_netdevice_event - system notifier for netdev events - * @notfier: not used + * @notifier: not used * @event: event for notifier * @ptr: netdev */ @@ -652,6 +653,7 @@ struct ib_qp *i40iw_get_qp(struct ib_device *device, int qpn) * i40iw_debug_buf - print debug msg and buffer is mask set * @dev: hardware control device structure * @mask: mask to compare if to print debug buffer + * @desc: identifying string * @buf: points buffer addr * @size: saize of buffer to print */ @@ -784,7 +786,7 @@ enum i40iw_status_code i40iw_free_virt_mem(struct i40iw_hw *hw, /** * i40iw_cqp_sds_cmd - create cqp command for sd * @dev: hardware control device structure - * @sd_info: information for sd cqp + * @sdinfo: information for sd cqp * */ enum i40iw_status_code i40iw_cqp_sds_cmd(struct i40iw_sc_dev *dev, @@ -889,7 +891,7 @@ void i40iw_terminate_done(struct i40iw_sc_qp *qp, int timeout_occurred) /** * i40iw_terminate_imeout - timeout happened - * @context: points to iwarp qp + * @t: points to iwarp qp */ static void i40iw_terminate_timeout(struct timer_list *t) { @@ -943,7 +945,7 @@ static void i40iw_cqp_generic_worker(struct work_struct *work) /** * i40iw_cqp_spawn_worker - spawn worket thread - * @iwdev: device struct pointer + * @dev: device struct pointer * @work_info: work request info * @iw_vf_idx: virtual function index */ @@ -1048,7 +1050,7 @@ enum i40iw_status_code i40iw_cqp_manage_hmc_fcn_cmd(struct i40iw_sc_dev *dev, /** * i40iw_cqp_query_fpm_values_cmd - send cqp command for fpm - * @iwdev: function device struct + * @dev: function device struct * @values_mem: buffer for fpm * @hmc_fn_id: function id for fpm */ @@ -1114,7 +1116,7 @@ enum i40iw_status_code i40iw_cqp_commit_fpm_values_cmd(struct i40iw_sc_dev *dev, /** * i40iw_vf_wait_vchnl_resp - wait for channel msg - * @iwdev: function's device struct + * @dev: function's device struct */ enum i40iw_status_code i40iw_vf_wait_vchnl_resp(struct i40iw_sc_dev *dev) { @@ -1461,7 +1463,7 @@ enum i40iw_status_code i40iw_puda_get_tcpip_info(struct i40iw_puda_completion_in /** * i40iw_hw_stats_timeout - Stats timer-handler which updates all HW stats - * @vsi: pointer to the vsi structure + * @t: Timer context containing pointer to the vsi structure */ static void i40iw_hw_stats_timeout(struct timer_list *t) { diff --git a/drivers/infiniband/hw/i40iw/i40iw_verbs.c b/drivers/infiniband/hw/i40iw/i40iw_verbs.c index 65aedfe57e77..f18d146a6079 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_verbs.c +++ b/drivers/infiniband/hw/i40iw/i40iw_verbs.c @@ -265,9 +265,7 @@ static struct i40iw_pbl *i40iw_get_pbl(unsigned long va, /** * i40iw_free_qp_resources - free up memory resources for qp - * @iwdev: iwarp device * @iwqp: qp ptr (user or kernel) - * @qp_num: qp number assigned */ void i40iw_free_qp_resources(struct i40iw_qp *iwqp) { @@ -302,6 +300,7 @@ static void i40iw_clean_cqes(struct i40iw_qp *iwqp, struct i40iw_cq *iwcq) /** * i40iw_destroy_qp - destroy qp * @ibqp: qp's ib pointer also to get to device's qp address + * @udata: user data */ static int i40iw_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata) { @@ -338,8 +337,8 @@ static int i40iw_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata) /** * i40iw_setup_virt_qp - setup for allocation of virtual qp - * @dev: iwarp device - * @qp: qp ptr + * @iwdev: iwarp device + * @iwqp: qp ptr * @init_info: initialize info to return */ static int i40iw_setup_virt_qp(struct i40iw_device *iwdev, @@ -1241,7 +1240,7 @@ static void i40iw_copy_user_pgaddrs(struct i40iw_mr *iwmr, * i40iw_check_mem_contiguous - check if pbls stored in arr are contiguous * @arr: lvl1 pbl array * @npages: page count - * pg_size: page size + * @pg_size: page size * */ static bool i40iw_check_mem_contiguous(u64 *arr, u32 npages, u32 pg_size) @@ -1258,7 +1257,7 @@ static bool i40iw_check_mem_contiguous(u64 *arr, u32 npages, u32 pg_size) /** * i40iw_check_mr_contiguous - check if MR is physically contiguous * @palloc: pbl allocation struct - * pg_size: page size + * @pg_size: page size */ static bool i40iw_check_mr_contiguous(struct i40iw_pble_alloc *palloc, u32 pg_size) { @@ -1533,6 +1532,7 @@ static int i40iw_set_page(struct ib_mr *ibmr, u64 addr) * @ibmr: ib mem to access iwarp mr pointer * @sg: scatter gather list for fmr * @sg_nents: number of sg pages + * @sg_offset: scatter gather offset */ static int i40iw_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents, unsigned int *sg_offset) @@ -1881,6 +1881,7 @@ static void i40iw_del_memlist(struct i40iw_mr *iwmr, /** * i40iw_dereg_mr - deregister mr * @ib_mr: mr ptr for dereg + * @udata: user data */ static int i40iw_dereg_mr(struct ib_mr *ib_mr, struct ib_udata *udata) { @@ -1945,7 +1946,7 @@ static int i40iw_dereg_mr(struct ib_mr *ib_mr, struct ib_udata *udata) return 0; } -/** +/* * hw_rev_show */ static ssize_t hw_rev_show(struct device *dev, @@ -1959,7 +1960,7 @@ static ssize_t hw_rev_show(struct device *dev, } static DEVICE_ATTR_RO(hw_rev); -/** +/* * hca_type_show */ static ssize_t hca_type_show(struct device *dev, @@ -1969,7 +1970,7 @@ static ssize_t hca_type_show(struct device *dev, } static DEVICE_ATTR_RO(hca_type); -/** +/* * board_id_show */ static ssize_t board_id_show(struct device *dev, diff --git a/drivers/infiniband/hw/i40iw/i40iw_virtchnl.c b/drivers/infiniband/hw/i40iw/i40iw_virtchnl.c index 48fd327f876b..aca9061688ae 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_virtchnl.c +++ b/drivers/infiniband/hw/i40iw/i40iw_virtchnl.c @@ -119,7 +119,7 @@ static enum i40iw_status_code vchnl_vf_send_get_pe_stats_req(struct i40iw_sc_dev return ret_code; } -/** +/* * vchnl_vf_send_add_hmc_objs_req - Add HMC objects * @dev: IWARP device pointer * @vchnl_req: Virtual channel message request pointer @@ -158,9 +158,9 @@ static enum i40iw_status_code vchnl_vf_send_add_hmc_objs_req(struct i40iw_sc_dev * vchnl_vf_send_del_hmc_objs_req - del HMC objects * @dev: IWARP device pointer * @vchnl_req: Virtual channel message request pointer - * @ rsrc_type - resource type to delete - * @ start_index - starting index for resource - * @ rsrc_count - number of resource type to delete + * @rsrc_type: resource type to delete + * @start_index: starting index for resource + * @rsrc_count: number of resource type to delete */ static enum i40iw_status_code vchnl_vf_send_del_hmc_objs_req(struct i40iw_sc_dev *dev, struct i40iw_virtchnl_req *vchnl_req, @@ -222,6 +222,7 @@ static void vchnl_pf_send_get_ver_resp(struct i40iw_sc_dev *dev, * @dev: IWARP device pointer * @vf_id: Virtual function ID associated with the message * @vchnl_msg: Virtual channel message buffer pointer + * @hmc_fcn: HMC function index pointer */ static void vchnl_pf_send_get_hmc_fcn_resp(struct i40iw_sc_dev *dev, u32 vf_id, @@ -276,6 +277,7 @@ static void vchnl_pf_send_get_pe_stats_resp(struct i40iw_sc_dev *dev, * @dev: IWARP device pointer * @vf_id: Virtual function ID associated with the message * @vchnl_msg: Virtual channel message buffer pointer + * @op_ret_code: I40IW_ERR_* status code */ static void vchnl_pf_send_error_resp(struct i40iw_sc_dev *dev, u32 vf_id, struct i40iw_virtchnl_op_buf *vchnl_msg, @@ -297,8 +299,9 @@ static void vchnl_pf_send_error_resp(struct i40iw_sc_dev *dev, u32 vf_id, /** * pf_cqp_get_hmc_fcn_callback - Callback for Get HMC Fcn - * @cqp_req_param: CQP Request param value - * @not_used: unused CQP callback parameter + * @dev: IWARP device pointer + * @callback_param: unused CQP callback parameter + * @cqe_info: CQE information pointer */ static void pf_cqp_get_hmc_fcn_callback(struct i40iw_sc_dev *dev, void *callback_param, struct i40iw_ccq_cqe_info *cqe_info) @@ -331,7 +334,7 @@ static void pf_cqp_get_hmc_fcn_callback(struct i40iw_sc_dev *dev, void *callback /** * pf_add_hmc_obj - Callback for Add HMC Object - * @vf_dev: pointer to the VF Device + * @work_vf_dev: pointer to the VF Device */ static void pf_add_hmc_obj_callback(void *work_vf_dev) { @@ -404,7 +407,7 @@ static void pf_del_hmc_obj_callback(void *work_vf_dev) /** * i40iw_vf_init_pestat - Initialize stats for VF - * @devL pointer to the VF Device + * @dev: pointer to the VF Device * @stats: Statistics structure pointer * @index: Stats index */ diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index e3cd402c079a..f26a0d920842 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c @@ -1699,7 +1699,7 @@ static struct ib_flow *mlx4_ib_create_flow(struct ib_qp *qp, struct mlx4_dev *dev = (to_mdev(qp->device))->dev; int is_bonded = mlx4_is_bonded(dev); - if (flow_attr->port < 1 || flow_attr->port > qp->device->phys_port_cnt) + if (!rdma_is_port_valid(qp->device, flow_attr->port)) return ERR_PTR(-EINVAL); if (flow_attr->flags & ~IB_FLOW_ATTR_FLAGS_DONT_TRAP) diff --git a/drivers/infiniband/hw/mlx4/sysfs.c b/drivers/infiniband/hw/mlx4/sysfs.c index 1b5891130aab..24ee79aa2122 100644 --- a/drivers/infiniband/hw/mlx4/sysfs.c +++ b/drivers/infiniband/hw/mlx4/sysfs.c @@ -798,7 +798,7 @@ static void unregister_pkey_tree(struct mlx4_ib_dev *device) int mlx4_ib_device_register_sysfs(struct mlx4_ib_dev *dev) { - int i; + unsigned int i; int ret = 0; if (!mlx4_is_master(dev->dev)) @@ -817,7 +817,7 @@ int mlx4_ib_device_register_sysfs(struct mlx4_ib_dev *dev) goto err_ports; } - for (i = 1; i <= dev->ib_dev.phys_port_cnt; ++i) { + rdma_for_each_port(&dev->ib_dev, i) { ret = add_port_entries(dev, i); if (ret) goto err_add_entries; diff --git a/drivers/infiniband/hw/mlx5/devx.c b/drivers/infiniband/hw/mlx5/devx.c index 819c142857d6..ebc2a4355fa5 100644 --- a/drivers/infiniband/hw/mlx5/devx.c +++ b/drivers/infiniband/hw/mlx5/devx.c @@ -94,13 +94,13 @@ struct devx_umem { struct mlx5_core_dev *mdev; struct ib_umem *umem; u32 dinlen; - u32 dinbox[MLX5_ST_SZ_DW(general_obj_in_cmd_hdr)]; + u32 dinbox[MLX5_ST_SZ_DW(destroy_umem_in)]; }; struct devx_umem_reg_cmd { void *in; u32 inlen; - u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)]; + u32 out[MLX5_ST_SZ_DW(create_umem_out)]; }; static struct mlx5_ib_ucontext * @@ -111,8 +111,8 @@ devx_ufile2uctx(const struct uverbs_attr_bundle *attrs) int mlx5_ib_devx_create(struct mlx5_ib_dev *dev, bool is_user) { - u32 in[MLX5_ST_SZ_DW(create_uctx_in)] = {0}; - u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)] = {0}; + u32 in[MLX5_ST_SZ_DW(create_uctx_in)] = {}; + u32 out[MLX5_ST_SZ_DW(create_uctx_out)] = {}; void *uctx; int err; u16 uid; @@ -138,14 +138,14 @@ int mlx5_ib_devx_create(struct mlx5_ib_dev *dev, bool is_user) if (err) return err; - uid = MLX5_GET(general_obj_out_cmd_hdr, out, obj_id); + uid = MLX5_GET(create_uctx_out, out, uid); return uid; } void mlx5_ib_devx_destroy(struct mlx5_ib_dev *dev, u16 uid) { - u32 in[MLX5_ST_SZ_DW(destroy_uctx_in)] = {0}; - u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)] = {0}; + u32 in[MLX5_ST_SZ_DW(destroy_uctx_in)] = {}; + u32 out[MLX5_ST_SZ_DW(destroy_uctx_out)] = {}; MLX5_SET(destroy_uctx_in, in, opcode, MLX5_CMD_OP_DESTROY_UCTX); MLX5_SET(destroy_uctx_in, in, uid, uid); @@ -288,6 +288,80 @@ static u64 get_enc_obj_id(u32 opcode, u32 obj_id) return ((u64)opcode << 32) | obj_id; } +static u32 devx_get_created_obj_id(const void *in, const void *out, u16 opcode) +{ + switch (opcode) { + case MLX5_CMD_OP_CREATE_GENERAL_OBJECT: + return MLX5_GET(general_obj_out_cmd_hdr, out, obj_id); + case MLX5_CMD_OP_CREATE_UMEM: + return MLX5_GET(create_umem_out, out, umem_id); + case MLX5_CMD_OP_CREATE_MKEY: + return MLX5_GET(create_mkey_out, out, mkey_index); + case MLX5_CMD_OP_CREATE_CQ: + return MLX5_GET(create_cq_out, out, cqn); + case MLX5_CMD_OP_ALLOC_PD: + return MLX5_GET(alloc_pd_out, out, pd); + case MLX5_CMD_OP_ALLOC_TRANSPORT_DOMAIN: + return MLX5_GET(alloc_transport_domain_out, out, + transport_domain); + case MLX5_CMD_OP_CREATE_RMP: + return MLX5_GET(create_rmp_out, out, rmpn); + case MLX5_CMD_OP_CREATE_SQ: + return MLX5_GET(create_sq_out, out, sqn); + case MLX5_CMD_OP_CREATE_RQ: + return MLX5_GET(create_rq_out, out, rqn); + case MLX5_CMD_OP_CREATE_RQT: + return MLX5_GET(create_rqt_out, out, rqtn); + case MLX5_CMD_OP_CREATE_TIR: + return MLX5_GET(create_tir_out, out, tirn); + case MLX5_CMD_OP_CREATE_TIS: + return MLX5_GET(create_tis_out, out, tisn); + case MLX5_CMD_OP_ALLOC_Q_COUNTER: + return MLX5_GET(alloc_q_counter_out, out, counter_set_id); + case MLX5_CMD_OP_CREATE_FLOW_TABLE: + return MLX5_GET(create_flow_table_out, out, table_id); + case MLX5_CMD_OP_CREATE_FLOW_GROUP: + return MLX5_GET(create_flow_group_out, out, group_id); + case MLX5_CMD_OP_SET_FLOW_TABLE_ENTRY: + return MLX5_GET(set_fte_in, in, flow_index); + case MLX5_CMD_OP_ALLOC_FLOW_COUNTER: + return MLX5_GET(alloc_flow_counter_out, out, flow_counter_id); + case MLX5_CMD_OP_ALLOC_PACKET_REFORMAT_CONTEXT: + return MLX5_GET(alloc_packet_reformat_context_out, out, + packet_reformat_id); + case MLX5_CMD_OP_ALLOC_MODIFY_HEADER_CONTEXT: + return MLX5_GET(alloc_modify_header_context_out, out, + modify_header_id); + case MLX5_CMD_OP_CREATE_SCHEDULING_ELEMENT: + return MLX5_GET(create_scheduling_element_out, out, + scheduling_element_id); + case MLX5_CMD_OP_ADD_VXLAN_UDP_DPORT: + return MLX5_GET(add_vxlan_udp_dport_in, in, vxlan_udp_port); + case MLX5_CMD_OP_SET_L2_TABLE_ENTRY: + return MLX5_GET(set_l2_table_entry_in, in, table_index); + case MLX5_CMD_OP_CREATE_QP: + return MLX5_GET(create_qp_out, out, qpn); + case MLX5_CMD_OP_CREATE_SRQ: + return MLX5_GET(create_srq_out, out, srqn); + case MLX5_CMD_OP_CREATE_XRC_SRQ: + return MLX5_GET(create_xrc_srq_out, out, xrc_srqn); + case MLX5_CMD_OP_CREATE_DCT: + return MLX5_GET(create_dct_out, out, dctn); + case MLX5_CMD_OP_CREATE_XRQ: + return MLX5_GET(create_xrq_out, out, xrqn); + case MLX5_CMD_OP_ATTACH_TO_MCG: + return MLX5_GET(attach_to_mcg_in, in, qpn); + case MLX5_CMD_OP_ALLOC_XRCD: + return MLX5_GET(alloc_xrcd_out, out, xrcd); + case MLX5_CMD_OP_CREATE_PSV: + return MLX5_GET(create_psv_out, out, psv0_index); + default: + /* The entry must match to one of the devx_is_obj_create_cmd */ + WARN_ON(true); + return 0; + } +} + static u64 devx_get_obj_id(const void *in) { u16 opcode = MLX5_GET(general_obj_in_cmd_hdr, in, opcode); @@ -399,8 +473,8 @@ static u64 devx_get_obj_id(const void *in) break; case MLX5_CMD_OP_QUERY_MODIFY_HEADER_CONTEXT: obj_id = get_enc_obj_id(MLX5_CMD_OP_ALLOC_MODIFY_HEADER_CONTEXT, - MLX5_GET(general_obj_in_cmd_hdr, in, - obj_id)); + MLX5_GET(query_modify_header_context_in, + in, modify_header_id)); break; case MLX5_CMD_OP_QUERY_SCHEDULING_ELEMENT: obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_SCHEDULING_ELEMENT, @@ -1019,63 +1093,76 @@ static void devx_obj_build_destroy_cmd(void *in, void *out, void *din, u32 *dinlen, u32 *obj_id) { - u16 obj_type = MLX5_GET(general_obj_in_cmd_hdr, in, obj_type); + u16 opcode = MLX5_GET(general_obj_in_cmd_hdr, in, opcode); u16 uid = MLX5_GET(general_obj_in_cmd_hdr, in, uid); - *obj_id = MLX5_GET(general_obj_out_cmd_hdr, out, obj_id); + *obj_id = devx_get_created_obj_id(in, out, opcode); *dinlen = MLX5_ST_SZ_BYTES(general_obj_in_cmd_hdr); - - MLX5_SET(general_obj_in_cmd_hdr, din, obj_id, *obj_id); MLX5_SET(general_obj_in_cmd_hdr, din, uid, uid); - switch (MLX5_GET(general_obj_in_cmd_hdr, in, opcode)) { + switch (opcode) { case MLX5_CMD_OP_CREATE_GENERAL_OBJECT: MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DESTROY_GENERAL_OBJECT); - MLX5_SET(general_obj_in_cmd_hdr, din, obj_type, obj_type); + MLX5_SET(general_obj_in_cmd_hdr, din, obj_id, *obj_id); + MLX5_SET(general_obj_in_cmd_hdr, din, obj_type, + MLX5_GET(general_obj_in_cmd_hdr, in, obj_type)); break; case MLX5_CMD_OP_CREATE_UMEM: - MLX5_SET(general_obj_in_cmd_hdr, din, opcode, + MLX5_SET(destroy_umem_in, din, opcode, MLX5_CMD_OP_DESTROY_UMEM); + MLX5_SET(destroy_umem_in, din, umem_id, *obj_id); break; case MLX5_CMD_OP_CREATE_MKEY: - MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DESTROY_MKEY); + MLX5_SET(destroy_mkey_in, din, opcode, + MLX5_CMD_OP_DESTROY_MKEY); + MLX5_SET(destroy_mkey_in, in, mkey_index, *obj_id); break; case MLX5_CMD_OP_CREATE_CQ: - MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DESTROY_CQ); + MLX5_SET(destroy_cq_in, din, opcode, MLX5_CMD_OP_DESTROY_CQ); + MLX5_SET(destroy_cq_in, din, cqn, *obj_id); break; case MLX5_CMD_OP_ALLOC_PD: - MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DEALLOC_PD); + MLX5_SET(dealloc_pd_in, din, opcode, MLX5_CMD_OP_DEALLOC_PD); + MLX5_SET(dealloc_pd_in, din, pd, *obj_id); break; case MLX5_CMD_OP_ALLOC_TRANSPORT_DOMAIN: - MLX5_SET(general_obj_in_cmd_hdr, din, opcode, + MLX5_SET(dealloc_transport_domain_in, din, opcode, MLX5_CMD_OP_DEALLOC_TRANSPORT_DOMAIN); + MLX5_SET(dealloc_transport_domain_in, din, transport_domain, + *obj_id); break; case MLX5_CMD_OP_CREATE_RMP: - MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DESTROY_RMP); + MLX5_SET(destroy_rmp_in, din, opcode, MLX5_CMD_OP_DESTROY_RMP); + MLX5_SET(destroy_rmp_in, din, rmpn, *obj_id); break; case MLX5_CMD_OP_CREATE_SQ: - MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DESTROY_SQ); + MLX5_SET(destroy_sq_in, din, opcode, MLX5_CMD_OP_DESTROY_SQ); + MLX5_SET(destroy_sq_in, din, sqn, *obj_id); break; case MLX5_CMD_OP_CREATE_RQ: - MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DESTROY_RQ); + MLX5_SET(destroy_rq_in, din, opcode, MLX5_CMD_OP_DESTROY_RQ); + MLX5_SET(destroy_rq_in, din, rqn, *obj_id); break; case MLX5_CMD_OP_CREATE_RQT: - MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DESTROY_RQT); + MLX5_SET(destroy_rqt_in, din, opcode, MLX5_CMD_OP_DESTROY_RQT); + MLX5_SET(destroy_rqt_in, din, rqtn, *obj_id); break; case MLX5_CMD_OP_CREATE_TIR: - MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DESTROY_TIR); + MLX5_SET(destroy_tir_in, din, opcode, MLX5_CMD_OP_DESTROY_TIR); + MLX5_SET(destroy_tir_in, din, tirn, *obj_id); break; case MLX5_CMD_OP_CREATE_TIS: - MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DESTROY_TIS); + MLX5_SET(destroy_tis_in, din, opcode, MLX5_CMD_OP_DESTROY_TIS); + MLX5_SET(destroy_tis_in, din, tisn, *obj_id); break; case MLX5_CMD_OP_ALLOC_Q_COUNTER: - MLX5_SET(general_obj_in_cmd_hdr, din, opcode, + MLX5_SET(dealloc_q_counter_in, din, opcode, MLX5_CMD_OP_DEALLOC_Q_COUNTER); + MLX5_SET(dealloc_q_counter_in, din, counter_set_id, *obj_id); break; case MLX5_CMD_OP_CREATE_FLOW_TABLE: *dinlen = MLX5_ST_SZ_BYTES(destroy_flow_table_in); - *obj_id = MLX5_GET(create_flow_table_out, out, table_id); MLX5_SET(destroy_flow_table_in, din, other_vport, MLX5_GET(create_flow_table_in, in, other_vport)); MLX5_SET(destroy_flow_table_in, din, vport_number, @@ -1083,12 +1170,11 @@ static void devx_obj_build_destroy_cmd(void *in, void *out, void *din, MLX5_SET(destroy_flow_table_in, din, table_type, MLX5_GET(create_flow_table_in, in, table_type)); MLX5_SET(destroy_flow_table_in, din, table_id, *obj_id); - MLX5_SET(general_obj_in_cmd_hdr, din, opcode, + MLX5_SET(destroy_flow_table_in, din, opcode, MLX5_CMD_OP_DESTROY_FLOW_TABLE); break; case MLX5_CMD_OP_CREATE_FLOW_GROUP: *dinlen = MLX5_ST_SZ_BYTES(destroy_flow_group_in); - *obj_id = MLX5_GET(create_flow_group_out, out, group_id); MLX5_SET(destroy_flow_group_in, din, other_vport, MLX5_GET(create_flow_group_in, in, other_vport)); MLX5_SET(destroy_flow_group_in, din, vport_number, @@ -1098,12 +1184,11 @@ static void devx_obj_build_destroy_cmd(void *in, void *out, void *din, MLX5_SET(destroy_flow_group_in, din, table_id, MLX5_GET(create_flow_group_in, in, table_id)); MLX5_SET(destroy_flow_group_in, din, group_id, *obj_id); - MLX5_SET(general_obj_in_cmd_hdr, din, opcode, + MLX5_SET(destroy_flow_group_in, din, opcode, MLX5_CMD_OP_DESTROY_FLOW_GROUP); break; case MLX5_CMD_OP_SET_FLOW_TABLE_ENTRY: *dinlen = MLX5_ST_SZ_BYTES(delete_fte_in); - *obj_id = MLX5_GET(set_fte_in, in, flow_index); MLX5_SET(delete_fte_in, din, other_vport, MLX5_GET(set_fte_in, in, other_vport)); MLX5_SET(delete_fte_in, din, vport_number, @@ -1113,63 +1198,70 @@ static void devx_obj_build_destroy_cmd(void *in, void *out, void *din, MLX5_SET(delete_fte_in, din, table_id, MLX5_GET(set_fte_in, in, table_id)); MLX5_SET(delete_fte_in, din, flow_index, *obj_id); - MLX5_SET(general_obj_in_cmd_hdr, din, opcode, + MLX5_SET(delete_fte_in, din, opcode, MLX5_CMD_OP_DELETE_FLOW_TABLE_ENTRY); break; case MLX5_CMD_OP_ALLOC_FLOW_COUNTER: - MLX5_SET(general_obj_in_cmd_hdr, din, opcode, + MLX5_SET(dealloc_flow_counter_in, din, opcode, MLX5_CMD_OP_DEALLOC_FLOW_COUNTER); + MLX5_SET(dealloc_flow_counter_in, din, flow_counter_id, + *obj_id); break; case MLX5_CMD_OP_ALLOC_PACKET_REFORMAT_CONTEXT: - MLX5_SET(general_obj_in_cmd_hdr, din, opcode, + MLX5_SET(dealloc_packet_reformat_context_in, din, opcode, MLX5_CMD_OP_DEALLOC_PACKET_REFORMAT_CONTEXT); + MLX5_SET(dealloc_packet_reformat_context_in, din, + packet_reformat_id, *obj_id); break; case MLX5_CMD_OP_ALLOC_MODIFY_HEADER_CONTEXT: - MLX5_SET(general_obj_in_cmd_hdr, din, opcode, + MLX5_SET(dealloc_modify_header_context_in, din, opcode, MLX5_CMD_OP_DEALLOC_MODIFY_HEADER_CONTEXT); + MLX5_SET(dealloc_modify_header_context_in, din, + modify_header_id, *obj_id); break; case MLX5_CMD_OP_CREATE_SCHEDULING_ELEMENT: *dinlen = MLX5_ST_SZ_BYTES(destroy_scheduling_element_in); - *obj_id = MLX5_GET(create_scheduling_element_out, out, - scheduling_element_id); MLX5_SET(destroy_scheduling_element_in, din, scheduling_hierarchy, MLX5_GET(create_scheduling_element_in, in, scheduling_hierarchy)); MLX5_SET(destroy_scheduling_element_in, din, scheduling_element_id, *obj_id); - MLX5_SET(general_obj_in_cmd_hdr, din, opcode, + MLX5_SET(destroy_scheduling_element_in, din, opcode, MLX5_CMD_OP_DESTROY_SCHEDULING_ELEMENT); break; case MLX5_CMD_OP_ADD_VXLAN_UDP_DPORT: *dinlen = MLX5_ST_SZ_BYTES(delete_vxlan_udp_dport_in); - *obj_id = MLX5_GET(add_vxlan_udp_dport_in, in, vxlan_udp_port); MLX5_SET(delete_vxlan_udp_dport_in, din, vxlan_udp_port, *obj_id); - MLX5_SET(general_obj_in_cmd_hdr, din, opcode, + MLX5_SET(delete_vxlan_udp_dport_in, din, opcode, MLX5_CMD_OP_DELETE_VXLAN_UDP_DPORT); break; case MLX5_CMD_OP_SET_L2_TABLE_ENTRY: *dinlen = MLX5_ST_SZ_BYTES(delete_l2_table_entry_in); - *obj_id = MLX5_GET(set_l2_table_entry_in, in, table_index); MLX5_SET(delete_l2_table_entry_in, din, table_index, *obj_id); - MLX5_SET(general_obj_in_cmd_hdr, din, opcode, + MLX5_SET(delete_l2_table_entry_in, din, opcode, MLX5_CMD_OP_DELETE_L2_TABLE_ENTRY); break; case MLX5_CMD_OP_CREATE_QP: - MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DESTROY_QP); + MLX5_SET(destroy_qp_in, din, opcode, MLX5_CMD_OP_DESTROY_QP); + MLX5_SET(destroy_qp_in, din, qpn, *obj_id); break; case MLX5_CMD_OP_CREATE_SRQ: - MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DESTROY_SRQ); + MLX5_SET(destroy_srq_in, din, opcode, MLX5_CMD_OP_DESTROY_SRQ); + MLX5_SET(destroy_srq_in, din, srqn, *obj_id); break; case MLX5_CMD_OP_CREATE_XRC_SRQ: - MLX5_SET(general_obj_in_cmd_hdr, din, opcode, + MLX5_SET(destroy_xrc_srq_in, din, opcode, MLX5_CMD_OP_DESTROY_XRC_SRQ); + MLX5_SET(destroy_xrc_srq_in, din, xrc_srqn, *obj_id); break; case MLX5_CMD_OP_CREATE_DCT: - MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DESTROY_DCT); + MLX5_SET(destroy_dct_in, din, opcode, MLX5_CMD_OP_DESTROY_DCT); + MLX5_SET(destroy_dct_in, din, dctn, *obj_id); break; case MLX5_CMD_OP_CREATE_XRQ: - MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DESTROY_XRQ); + MLX5_SET(destroy_xrq_in, din, opcode, MLX5_CMD_OP_DESTROY_XRQ); + MLX5_SET(destroy_xrq_in, din, xrqn, *obj_id); break; case MLX5_CMD_OP_ATTACH_TO_MCG: *dinlen = MLX5_ST_SZ_BYTES(detach_from_mcg_in); @@ -1178,16 +1270,19 @@ static void devx_obj_build_destroy_cmd(void *in, void *out, void *din, memcpy(MLX5_ADDR_OF(detach_from_mcg_in, din, multicast_gid), MLX5_ADDR_OF(attach_to_mcg_in, in, multicast_gid), MLX5_FLD_SZ_BYTES(attach_to_mcg_in, multicast_gid)); - MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DETACH_FROM_MCG); + MLX5_SET(detach_from_mcg_in, din, opcode, + MLX5_CMD_OP_DETACH_FROM_MCG); + MLX5_SET(detach_from_mcg_in, din, qpn, *obj_id); break; case MLX5_CMD_OP_ALLOC_XRCD: - MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DEALLOC_XRCD); + MLX5_SET(dealloc_xrcd_in, din, opcode, + MLX5_CMD_OP_DEALLOC_XRCD); + MLX5_SET(dealloc_xrcd_in, din, xrcd, *obj_id); break; case MLX5_CMD_OP_CREATE_PSV: - MLX5_SET(general_obj_in_cmd_hdr, din, opcode, + MLX5_SET(destroy_psv_in, din, opcode, MLX5_CMD_OP_DESTROY_PSV); - MLX5_SET(destroy_psv_in, din, psvn, - MLX5_GET(create_psv_out, out, psv0_index)); + MLX5_SET(destroy_psv_in, din, psvn, *obj_id); break; default: /* The entry must match to one of the devx_is_obj_create_cmd */ @@ -1215,9 +1310,9 @@ static int devx_handle_mkey_indirect(struct devx_obj *obj, mkey->size = MLX5_GET64(mkc, mkc, len); mkey->pd = MLX5_GET(mkc, mkc, pd); devx_mr->ndescs = MLX5_GET(mkc, mkc, translations_octword_size); + init_waitqueue_head(&mkey->wait); - return xa_err(xa_store(&dev->odp_mkeys, mlx5_base_mkey(mkey->key), mkey, - GFP_KERNEL)); + return mlx5r_store_odp_mkey(dev, mkey); } static int devx_handle_mkey_create(struct mlx5_ib_dev *dev, @@ -1290,16 +1385,15 @@ static int devx_obj_cleanup(struct ib_uobject *uobject, int ret; dev = mlx5_udata_to_mdev(&attrs->driver_udata); - if (obj->flags & DEVX_OBJ_FLAGS_INDIRECT_MKEY) { + if (obj->flags & DEVX_OBJ_FLAGS_INDIRECT_MKEY && + xa_erase(&obj->ib_dev->odp_mkeys, + mlx5_base_mkey(obj->devx_mr.mmkey.key))) /* * The pagefault_single_data_segment() does commands against * the mmkey, we must wait for that to stop before freeing the * mkey, as another allocation could get the same mkey #. */ - xa_erase(&obj->ib_dev->odp_mkeys, - mlx5_base_mkey(obj->devx_mr.mmkey.key)); - synchronize_srcu(&dev->odp_srcu); - } + mlx5r_deref_wait_odp_mkey(&obj->devx_mr.mmkey); if (obj->flags & DEVX_OBJ_FLAGS_DCT) ret = mlx5_core_destroy_dct(obj->ib_dev, &obj->core_dct); @@ -1345,6 +1439,16 @@ static void devx_cq_comp(struct mlx5_core_cq *mcq, struct mlx5_eqe *eqe) rcu_read_unlock(); } +static bool is_apu_thread_cq(struct mlx5_ib_dev *dev, const void *in) +{ + if (!MLX5_CAP_GEN(dev->mdev, apu) || + !MLX5_GET(cqc, MLX5_ADDR_OF(create_cq_in, in, cq_context), + apu_thread_cq)) + return false; + + return true; +} + static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_CREATE)( struct uverbs_attr_bundle *attrs) { @@ -1398,7 +1502,8 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_CREATE)( obj->flags |= DEVX_OBJ_FLAGS_DCT; err = mlx5_core_create_dct(dev, &obj->core_dct, cmd_in, cmd_in_len, cmd_out, cmd_out_len); - } else if (opcode == MLX5_CMD_OP_CREATE_CQ) { + } else if (opcode == MLX5_CMD_OP_CREATE_CQ && + !is_apu_thread_cq(dev, cmd_in)) { obj->flags |= DEVX_OBJ_FLAGS_CQ; obj->core_cq.comp = devx_cq_comp; err = mlx5_core_create_cq(dev->mdev, &obj->core_cq, diff --git a/drivers/infiniband/hw/mlx5/mad.c b/drivers/infiniband/hw/mlx5/mad.c index 9bb9bb058932..652c6ccf1881 100644 --- a/drivers/infiniband/hw/mlx5/mad.c +++ b/drivers/infiniband/hw/mlx5/mad.c @@ -48,7 +48,7 @@ static bool can_do_mad_ifc(struct mlx5_ib_dev *dev, u8 port_num, if (in_mad->mad_hdr.mgmt_class != IB_MGMT_CLASS_SUBN_LID_ROUTED && in_mad->mad_hdr.mgmt_class != IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) return true; - return dev->mdev->port_caps[port_num - 1].has_smi; + return dev->port_caps[port_num - 1].has_smi; } static int mlx5_MAD_IFC(struct mlx5_ib_dev *dev, int ignore_mkey, @@ -279,7 +279,7 @@ int mlx5_ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY; } -int mlx5_query_ext_port_caps(struct mlx5_ib_dev *dev, u8 port) +int mlx5_query_ext_port_caps(struct mlx5_ib_dev *dev, unsigned int port) { struct ib_smp *in_mad = NULL; struct ib_smp *out_mad = NULL; @@ -299,7 +299,7 @@ int mlx5_query_ext_port_caps(struct mlx5_ib_dev *dev, u8 port) packet_error = be16_to_cpu(out_mad->status); - dev->mdev->port_caps[port - 1].ext_port_cap = (!err && !packet_error) ? + dev->port_caps[port - 1].ext_port_cap = (!err && !packet_error) ? MLX_EXT_PORT_CAP_FLAG_EXTENDED_PORT_INFO : 0; out: @@ -308,8 +308,8 @@ int mlx5_query_ext_port_caps(struct mlx5_ib_dev *dev, u8 port) return err; } -int mlx5_query_mad_ifc_smp_attr_node_info(struct ib_device *ibdev, - struct ib_smp *out_mad) +static int mlx5_query_mad_ifc_smp_attr_node_info(struct ib_device *ibdev, + struct ib_smp *out_mad) { struct ib_smp *in_mad = NULL; int err = -ENOMEM; @@ -549,7 +549,7 @@ int mlx5_query_mad_ifc_port(struct ib_device *ibdev, u8 port, props->port_cap_flags = be32_to_cpup((__be32 *)(out_mad->data + 20)); props->gid_tbl_len = out_mad->data[50]; props->max_msg_sz = 1 << MLX5_CAP_GEN(mdev, log_max_msg); - props->pkey_tbl_len = mdev->port_caps[port - 1].pkey_table_len; + props->pkey_tbl_len = dev->pkey_table_len; props->bad_pkey_cntr = be16_to_cpup((__be16 *)(out_mad->data + 46)); props->qkey_viol_cntr = be16_to_cpup((__be16 *)(out_mad->data + 48)); props->active_width = out_mad->data[31] & 0xf; @@ -589,7 +589,7 @@ int mlx5_query_mad_ifc_port(struct ib_device *ibdev, u8 port, /* If reported active speed is QDR, check if is FDR-10 */ if (props->active_speed == 4) { - if (mdev->port_caps[port - 1].ext_port_cap & + if (dev->port_caps[port - 1].ext_port_cap & MLX_EXT_PORT_CAP_FLAG_EXTENDED_PORT_INFO) { init_query_mad(in_mad); in_mad->attr_id = MLX5_ATTR_EXTENDED_PORT_INFO; diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index aabdc07e4753..0d69a697d75f 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB /* * Copyright (c) 2013-2020, Mellanox Technologies inc. All rights reserved. + * Copyright (c) 2020, Intel Corporation. All rights reserved. */ #include @@ -461,7 +462,6 @@ static int mlx5_query_port_roce(struct ib_device *device, u8 port_num, struct net_device *ndev, *upper; enum ib_mtu ndev_ib_mtu; bool put_mdev = true; - u16 qkey_viol_cntr; u32 eth_prot_oper; u8 mdev_port_num; bool ext; @@ -499,20 +499,22 @@ static int mlx5_query_port_roce(struct ib_device *device, u8 port_num, translate_eth_proto_oper(eth_prot_oper, &props->active_speed, &props->active_width, ext); - props->port_cap_flags |= IB_PORT_CM_SUP; - props->ip_gids = true; + if (!dev->is_rep && mlx5_is_roce_enabled(mdev)) { + u16 qkey_viol_cntr; - props->gid_tbl_len = MLX5_CAP_ROCE(dev->mdev, - roce_address_table_size); + props->port_cap_flags |= IB_PORT_CM_SUP; + props->ip_gids = true; + props->gid_tbl_len = MLX5_CAP_ROCE(dev->mdev, + roce_address_table_size); + mlx5_query_nic_vport_qkey_viol_cntr(mdev, &qkey_viol_cntr); + props->qkey_viol_cntr = qkey_viol_cntr; + } props->max_mtu = IB_MTU_4096; props->max_msg_sz = 1 << MLX5_CAP_GEN(dev->mdev, log_max_msg); props->pkey_tbl_len = 1; props->state = IB_PORT_DOWN; props->phys_state = IB_PORT_PHYS_STATE_DISABLED; - mlx5_query_nic_vport_qkey_viol_cntr(mdev, &qkey_viol_cntr); - props->qkey_viol_cntr = qkey_viol_cntr; - /* If this is a stub query for an unaffiliated port stop here */ if (!put_mdev) goto out; @@ -815,9 +817,7 @@ static int mlx5_ib_query_device(struct ib_device *ibdev, if (err) return err; - err = mlx5_query_max_pkeys(ibdev, &props->max_pkeys); - if (err) - return err; + props->max_pkeys = dev->pkey_table_len; err = mlx5_query_vendor_id(ibdev, &props->vendor_id); if (err) @@ -1384,19 +1384,17 @@ int mlx5_ib_query_port(struct ib_device *ibdev, u8 port, static int mlx5_ib_rep_query_port(struct ib_device *ibdev, u8 port, struct ib_port_attr *props) { - int ret; + return mlx5_query_port_roce(ibdev, port, props); +} - /* Only link layer == ethernet is valid for representors - * and we always use port 1 +static int mlx5_ib_rep_query_pkey(struct ib_device *ibdev, u8 port, u16 index, + u16 *pkey) +{ + /* Default special Pkey for representor device port as per the + * IB specification 1.3 section 10.9.1.2. */ - ret = mlx5_query_port_roce(ibdev, port, props); - if (ret || !props) - return ret; - - /* We don't support GIDS */ - props->gid_tbl_len = 0; - - return ret; + *pkey = 0xffff; + return 0; } static int mlx5_ib_query_gid(struct ib_device *ibdev, u8 port, int index, @@ -2935,8 +2933,8 @@ static int set_has_smi_cap(struct mlx5_ib_dev *dev) int err; int port; - for (port = 1; port <= ARRAY_SIZE(dev->mdev->port_caps); port++) { - dev->mdev->port_caps[port - 1].has_smi = false; + for (port = 1; port <= ARRAY_SIZE(dev->port_caps); port++) { + dev->port_caps[port - 1].has_smi = false; if (MLX5_CAP_GEN(dev->mdev, port_type) == MLX5_CAP_PORT_TYPE_IB) { if (MLX5_CAP_GEN(dev->mdev, ib_virt)) { @@ -2948,10 +2946,10 @@ static int set_has_smi_cap(struct mlx5_ib_dev *dev) port, err); return err; } - dev->mdev->port_caps[port - 1].has_smi = + dev->port_caps[port - 1].has_smi = vport_ctx.has_smi; } else { - dev->mdev->port_caps[port - 1].has_smi = true; + dev->port_caps[port - 1].has_smi = true; } } } @@ -2960,63 +2958,12 @@ static int set_has_smi_cap(struct mlx5_ib_dev *dev) static void get_ext_port_caps(struct mlx5_ib_dev *dev) { - int port; + unsigned int port; - for (port = 1; port <= dev->num_ports; port++) + rdma_for_each_port (&dev->ib_dev, port) mlx5_query_ext_port_caps(dev, port); } -static int __get_port_caps(struct mlx5_ib_dev *dev, u8 port) -{ - struct ib_device_attr *dprops = NULL; - struct ib_port_attr *pprops = NULL; - int err = -ENOMEM; - - pprops = kzalloc(sizeof(*pprops), GFP_KERNEL); - if (!pprops) - goto out; - - dprops = kmalloc(sizeof(*dprops), GFP_KERNEL); - if (!dprops) - goto out; - - err = mlx5_ib_query_device(&dev->ib_dev, dprops, NULL); - if (err) { - mlx5_ib_warn(dev, "query_device failed %d\n", err); - goto out; - } - - err = mlx5_ib_query_port(&dev->ib_dev, port, pprops); - if (err) { - mlx5_ib_warn(dev, "query_port %d failed %d\n", - port, err); - goto out; - } - - dev->mdev->port_caps[port - 1].pkey_table_len = - dprops->max_pkeys; - dev->mdev->port_caps[port - 1].gid_table_len = - pprops->gid_tbl_len; - mlx5_ib_dbg(dev, "port %d: pkey_table_len %d, gid_table_len %d\n", - port, dprops->max_pkeys, pprops->gid_tbl_len); - -out: - kfree(pprops); - kfree(dprops); - - return err; -} - -static int get_port_caps(struct mlx5_ib_dev *dev, u8 port) -{ - /* For representors use port 1, is this is the only native - * port - */ - if (dev->is_rep) - return __get_port_caps(dev, 1); - return __get_port_caps(dev, port); -} - static u8 mlx5_get_umr_fence(u8 umr_fence_cap) { switch (umr_fence_cap) { @@ -3488,10 +3435,6 @@ static bool mlx5_ib_bind_slave_port(struct mlx5_ib_dev *ibdev, if (err) goto unbind; - err = get_port_caps(ibdev, mlx5_core_native_port_num(mpi->mdev)); - if (err) - goto unbind; - err = mlx5_add_netdev_notifier(ibdev, port_num); if (err) { mlx5_ib_err(ibdev, "failed adding netdev notifier for port %u\n", @@ -3569,11 +3512,9 @@ static int mlx5_ib_init_multiport_master(struct mlx5_ib_dev *dev) break; } } - if (!bound) { - get_port_caps(dev, i + 1); + if (!bound) mlx5_ib_dbg(dev, "no free port found for port %d\n", i + 1); - } } list_add_tail(&dev->ib_dev_list, &mlx5_ib_dev_list); @@ -3926,8 +3867,7 @@ static void mlx5_ib_stage_init_cleanup(struct mlx5_ib_dev *dev) { mlx5_ib_cleanup_multiport_master(dev); WARN_ON(!xa_empty(&dev->odp_mkeys)); - cleanup_srcu_struct(&dev->odp_srcu); - + mutex_destroy(&dev->cap_mask_mutex); WARN_ON(!xa_empty(&dev->sig_mrs)); WARN_ON(!bitmap_empty(dev->dm.memic_alloc_pages, MLX5_MAX_MEMIC_PAGES)); } @@ -3938,6 +3878,12 @@ static int mlx5_ib_stage_init_init(struct mlx5_ib_dev *dev) int err; int i; + dev->ib_dev.node_type = RDMA_NODE_IB_CA; + dev->ib_dev.local_dma_lkey = 0 /* not supported for now */; + dev->ib_dev.phys_port_cnt = dev->num_ports; + dev->ib_dev.dev.parent = mdev->device; + dev->ib_dev.lag_flags = RDMA_LAG_FLAGS_HASH_ALL_SLAVES; + for (i = 0; i < dev->num_ports; i++) { spin_lock_init(&dev->port[i].mp.mpi_lock); rwlock_init(&dev->port[i].roce.netdev_lock); @@ -3956,27 +3902,14 @@ static int mlx5_ib_stage_init_init(struct mlx5_ib_dev *dev) if (err) goto err_mp; - if (!mlx5_core_mp_enabled(mdev)) { - for (i = 1; i <= dev->num_ports; i++) { - err = get_port_caps(dev, i); - if (err) - break; - } - } else { - err = get_port_caps(dev, mlx5_core_native_port_num(mdev)); - } + err = mlx5_query_max_pkeys(&dev->ib_dev, &dev->pkey_table_len); if (err) goto err_mp; if (mlx5_use_mad_ifc(dev)) get_ext_port_caps(dev); - dev->ib_dev.node_type = RDMA_NODE_IB_CA; - dev->ib_dev.local_dma_lkey = 0 /* not supported for now */; - dev->ib_dev.phys_port_cnt = dev->num_ports; dev->ib_dev.num_comp_vectors = mlx5_comp_vectors_count(mdev); - dev->ib_dev.dev.parent = mdev->device; - dev->ib_dev.lag_flags = RDMA_LAG_FLAGS_HASH_ALL_SLAVES; mutex_init(&dev->cap_mask_mutex); INIT_LIST_HEAD(&dev->qp_list); @@ -3987,17 +3920,11 @@ static int mlx5_ib_stage_init_init(struct mlx5_ib_dev *dev) spin_lock_init(&dev->dm.lock); dev->dm.dev = mdev; - - err = init_srcu_struct(&dev->odp_srcu); - if (err) - goto err_mp; - return 0; err_mp: mlx5_ib_cleanup_multiport_master(dev); - - return -ENOMEM; + return err; } static int mlx5_ib_enable_driver(struct ib_device *dev) @@ -4067,6 +3994,7 @@ static const struct ib_device_ops mlx5_ib_dev_ops = { .query_srq = mlx5_ib_query_srq, .query_ucontext = mlx5_ib_query_ucontext, .reg_user_mr = mlx5_ib_reg_user_mr, + .reg_user_mr_dmabuf = mlx5_ib_reg_user_mr_dmabuf, .req_notify_cq = mlx5_ib_arm_cq, .rereg_user_mr = mlx5_ib_rereg_user_mr, .resize_cq = mlx5_ib_resize_cq, @@ -4207,6 +4135,7 @@ static int mlx5_ib_stage_non_default_cb(struct mlx5_ib_dev *dev) static const struct ib_device_ops mlx5_ib_dev_port_rep_ops = { .get_port_immutable = mlx5_port_rep_immutable, .query_port = mlx5_ib_rep_query_port, + .query_pkey = mlx5_ib_rep_query_pkey, }; static int mlx5_ib_stage_raw_eth_non_default_cb(struct mlx5_ib_dev *dev) diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index b0fdc1b08e06..88cc26e008fc 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -1,6 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ /* * Copyright (c) 2013-2020, Mellanox Technologies inc. All rights reserved. + * Copyright (c) 2020, Intel Corporation. All rights reserved. */ #ifndef MLX5_IB_H @@ -683,11 +684,8 @@ struct mlx5_ib_mr { u64 pi_iova; /* For ODP and implicit */ - atomic_t num_deferred_work; - wait_queue_head_t q_deferred_work; struct xarray implicit_children; union { - struct rcu_head rcu; struct list_head elm; struct work_struct work; } odp_destroy; @@ -703,6 +701,12 @@ static inline bool is_odp_mr(struct mlx5_ib_mr *mr) mr->umem->is_odp; } +static inline bool is_dmabuf_mr(struct mlx5_ib_mr *mr) +{ + return IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING) && mr->umem && + mr->umem->is_dmabuf; +} + struct mlx5_ib_mw { struct ib_mw ibmw; struct mlx5_core_mkey mmkey; @@ -1029,6 +1033,11 @@ struct mlx5_var_table { u64 num_var_hw_entries; }; +struct mlx5_port_caps { + bool has_smi; + u8 ext_port_cap; +}; + struct mlx5_ib_dev { struct ib_device ib_dev; struct mlx5_core_dev *mdev; @@ -1056,11 +1065,6 @@ struct mlx5_ib_dev { u64 odp_max_size; struct mlx5_ib_pf_eq odp_pf_eq; - /* - * Sleepable RCU that prevents destruction of MRs while they are still - * being used by a page fault handler. - */ - struct srcu_struct odp_srcu; struct xarray odp_mkeys; u32 null_mkey; @@ -1089,6 +1093,8 @@ struct mlx5_ib_dev { struct mlx5_var_table var_table; struct xarray sig_mrs; + struct mlx5_port_caps port_caps[MLX5_MAX_PORTS]; + u16 pkey_table_len; }; static inline struct mlx5_ib_cq *to_mibcq(struct mlx5_core_cq *mcq) @@ -1243,6 +1249,10 @@ struct ib_mr *mlx5_ib_get_dma_mr(struct ib_pd *pd, int acc); struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, u64 virt_addr, int access_flags, struct ib_udata *udata); +struct ib_mr *mlx5_ib_reg_user_mr_dmabuf(struct ib_pd *pd, u64 start, + u64 length, u64 virt_addr, + int fd, int access_flags, + struct ib_udata *udata); int mlx5_ib_advise_mr(struct ib_pd *pd, enum ib_uverbs_advise_mr_advice advice, u32 flags, @@ -1253,11 +1263,13 @@ int mlx5_ib_alloc_mw(struct ib_mw *mw, struct ib_udata *udata); int mlx5_ib_dealloc_mw(struct ib_mw *mw); int mlx5_ib_update_xlt(struct mlx5_ib_mr *mr, u64 idx, int npages, int page_shift, int flags); +int mlx5_ib_update_mr_pas(struct mlx5_ib_mr *mr, unsigned int flags); struct mlx5_ib_mr *mlx5_ib_alloc_implicit_mr(struct mlx5_ib_pd *pd, struct ib_udata *udata, int access_flags); void mlx5_ib_free_implicit_mr(struct mlx5_ib_mr *mr); void mlx5_ib_fence_odp_mr(struct mlx5_ib_mr *mr); +void mlx5_ib_fence_dmabuf_mr(struct mlx5_ib_mr *mr); struct ib_mr *mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start, u64 length, u64 virt_addr, int access_flags, struct ib_pd *pd, struct ib_udata *udata); @@ -1279,9 +1291,7 @@ int mlx5_ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, size_t *out_mad_size, u16 *out_mad_pkey_index); int mlx5_ib_alloc_xrcd(struct ib_xrcd *xrcd, struct ib_udata *udata); int mlx5_ib_dealloc_xrcd(struct ib_xrcd *xrcd, struct ib_udata *udata); -int mlx5_query_ext_port_caps(struct mlx5_ib_dev *dev, u8 port); -int mlx5_query_mad_ifc_smp_attr_node_info(struct ib_device *ibdev, - struct ib_smp *out_mad); +int mlx5_query_ext_port_caps(struct mlx5_ib_dev *dev, unsigned int port); int mlx5_query_mad_ifc_system_image_guid(struct ib_device *ibdev, __be64 *sys_image_guid); int mlx5_query_mad_ifc_max_pkeys(struct ib_device *ibdev, @@ -1345,6 +1355,7 @@ int mlx5_ib_advise_mr_prefetch(struct ib_pd *pd, enum ib_uverbs_advise_mr_advice advice, u32 flags, struct ib_sge *sg_list, u32 num_sge); int mlx5_ib_init_odp_mr(struct mlx5_ib_mr *mr); +int mlx5_ib_init_dmabuf_mr(struct mlx5_ib_mr *mr); #else /* CONFIG_INFINIBAND_ON_DEMAND_PAGING */ static inline void mlx5_ib_internal_fill_odp_caps(struct mlx5_ib_dev *dev) { @@ -1370,6 +1381,10 @@ static inline int mlx5_ib_init_odp_mr(struct mlx5_ib_mr *mr) { return -EOPNOTSUPP; } +static inline int mlx5_ib_init_dmabuf_mr(struct mlx5_ib_mr *mr) +{ + return -EOPNOTSUPP; +} #endif /* CONFIG_INFINIBAND_ON_DEMAND_PAGING */ extern const struct mmu_interval_notifier_ops mlx5_mn_ops; @@ -1576,6 +1591,29 @@ static inline bool mlx5_ib_can_reconfig_with_umr(struct mlx5_ib_dev *dev, return true; } +static inline int mlx5r_store_odp_mkey(struct mlx5_ib_dev *dev, + struct mlx5_core_mkey *mmkey) +{ + refcount_set(&mmkey->usecount, 1); + + return xa_err(xa_store(&dev->odp_mkeys, mlx5_base_mkey(mmkey->key), + mmkey, GFP_KERNEL)); +} + +/* deref an mkey that can participate in ODP flow */ +static inline void mlx5r_deref_odp_mkey(struct mlx5_core_mkey *mmkey) +{ + if (refcount_dec_and_test(&mmkey->usecount)) + wake_up(&mmkey->wait); +} + +/* deref an mkey that can participate in ODP flow and wait for relese */ +static inline void mlx5r_deref_wait_odp_mkey(struct mlx5_core_mkey *mmkey) +{ + mlx5r_deref_odp_mkey(mmkey); + wait_event(mmkey->wait, refcount_read(&mmkey->usecount) == 0); +} + int mlx5_ib_test_wc(struct mlx5_ib_dev *dev); static inline bool mlx5_ib_lag_should_assign_affinity(struct mlx5_ib_dev *dev) diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index 24f8d59a42ea..db05b0e0a8d7 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -1,5 +1,6 @@ /* * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved. + * Copyright (c) 2020, Intel Corporation. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -36,6 +37,8 @@ #include #include #include +#include +#include #include #include #include @@ -155,6 +158,7 @@ static void create_mkey_callback(int status, struct mlx5_async_work *context) mr->mmkey.type = MLX5_MKEY_MR; mr->mmkey.key |= mlx5_idx_to_mkey( MLX5_GET(create_mkey_out, mr->out, mkey_index)); + init_waitqueue_head(&mr->mmkey.wait); WRITE_ONCE(dev->cache.last_add, jiffies); @@ -935,6 +939,17 @@ static void set_mr_fields(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr, mr->access_flags = access_flags; } +static unsigned int mlx5_umem_dmabuf_default_pgsz(struct ib_umem *umem, + u64 iova) +{ + /* + * The alignment of iova has already been checked upon entering + * UVERBS_METHOD_REG_DMABUF_MR + */ + umem->iova = iova; + return PAGE_SIZE; +} + static struct mlx5_ib_mr *alloc_cacheable_mr(struct ib_pd *pd, struct ib_umem *umem, u64 iova, int access_flags) @@ -944,7 +959,11 @@ static struct mlx5_ib_mr *alloc_cacheable_mr(struct ib_pd *pd, struct mlx5_ib_mr *mr; unsigned int page_size; - page_size = mlx5_umem_find_best_pgsz(umem, mkc, log_page_size, 0, iova); + if (umem->is_dmabuf) + page_size = mlx5_umem_dmabuf_default_pgsz(umem, iova); + else + page_size = mlx5_umem_find_best_pgsz(umem, mkc, log_page_size, + 0, iova); if (WARN_ON(!page_size)) return ERR_PTR(-EINVAL); ent = mr_cache_ent_from_order( @@ -980,7 +999,6 @@ static struct mlx5_ib_mr *alloc_cacheable_mr(struct ib_pd *pd, mr->mmkey.size = umem->length; mr->mmkey.pd = to_mpd(pd)->pdn; mr->page_shift = order_base_2(page_size); - mr->umem = umem; set_mr_fields(dev, mr, umem->length, access_flags); return mr; @@ -1201,8 +1219,10 @@ int mlx5_ib_update_xlt(struct mlx5_ib_mr *mr, u64 idx, int npages, /* * Send the DMA list to the HW for a normal MR using UMR. + * Dmabuf MR is handled in a similar way, except that the MLX5_IB_UPD_XLT_ZAP + * flag may be used. */ -static int mlx5_ib_update_mr_pas(struct mlx5_ib_mr *mr, unsigned int flags) +int mlx5_ib_update_mr_pas(struct mlx5_ib_mr *mr, unsigned int flags) { struct mlx5_ib_dev *dev = mr_to_mdev(mr); struct device *ddev = &dev->mdev->pdev->dev; @@ -1244,6 +1264,10 @@ static int mlx5_ib_update_mr_pas(struct mlx5_ib_mr *mr, unsigned int flags) cur_mtt->ptag = cpu_to_be64(rdma_block_iter_dma_address(&biter) | MLX5_IB_MTT_PRESENT); + + if (mr->umem->is_dmabuf && (flags & MLX5_IB_UPD_XLT_ZAP)) + cur_mtt->ptag = 0; + cur_mtt++; } @@ -1528,10 +1552,7 @@ static struct ib_mr *create_user_odp_mr(struct ib_pd *pd, u64 start, u64 length, } odp->private = mr; - init_waitqueue_head(&mr->q_deferred_work); - atomic_set(&mr->num_deferred_work, 0); - err = xa_err(xa_store(&dev->odp_mkeys, mlx5_base_mkey(mr->mmkey.key), - &mr->mmkey, GFP_KERNEL)); + err = mlx5r_store_odp_mkey(dev, &mr->mmkey); if (err) goto err_dereg_mr; @@ -1567,6 +1588,81 @@ struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, return create_real_mr(pd, umem, iova, access_flags); } +static void mlx5_ib_dmabuf_invalidate_cb(struct dma_buf_attachment *attach) +{ + struct ib_umem_dmabuf *umem_dmabuf = attach->importer_priv; + struct mlx5_ib_mr *mr = umem_dmabuf->private; + + dma_resv_assert_held(umem_dmabuf->attach->dmabuf->resv); + + if (!umem_dmabuf->sgt) + return; + + mlx5_ib_update_mr_pas(mr, MLX5_IB_UPD_XLT_ZAP); + ib_umem_dmabuf_unmap_pages(umem_dmabuf); +} + +static struct dma_buf_attach_ops mlx5_ib_dmabuf_attach_ops = { + .allow_peer2peer = 1, + .move_notify = mlx5_ib_dmabuf_invalidate_cb, +}; + +struct ib_mr *mlx5_ib_reg_user_mr_dmabuf(struct ib_pd *pd, u64 offset, + u64 length, u64 virt_addr, + int fd, int access_flags, + struct ib_udata *udata) +{ + struct mlx5_ib_dev *dev = to_mdev(pd->device); + struct mlx5_ib_mr *mr = NULL; + struct ib_umem_dmabuf *umem_dmabuf; + int err; + + if (!IS_ENABLED(CONFIG_INFINIBAND_USER_MEM) || + !IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING)) + return ERR_PTR(-EOPNOTSUPP); + + mlx5_ib_dbg(dev, + "offset 0x%llx, virt_addr 0x%llx, length 0x%llx, fd %d, access_flags 0x%x\n", + offset, virt_addr, length, fd, access_flags); + + /* dmabuf requires xlt update via umr to work. */ + if (!mlx5_ib_can_load_pas_with_umr(dev, length)) + return ERR_PTR(-EINVAL); + + umem_dmabuf = ib_umem_dmabuf_get(&dev->ib_dev, offset, length, fd, + access_flags, + &mlx5_ib_dmabuf_attach_ops); + if (IS_ERR(umem_dmabuf)) { + mlx5_ib_dbg(dev, "umem_dmabuf get failed (%ld)\n", + PTR_ERR(umem_dmabuf)); + return ERR_CAST(umem_dmabuf); + } + + mr = alloc_cacheable_mr(pd, &umem_dmabuf->umem, virt_addr, + access_flags); + if (IS_ERR(mr)) { + ib_umem_release(&umem_dmabuf->umem); + return ERR_CAST(mr); + } + + mlx5_ib_dbg(dev, "mkey 0x%x\n", mr->mmkey.key); + + atomic_add(ib_umem_num_pages(mr->umem), &dev->mdev->priv.reg_pages); + umem_dmabuf->private = mr; + err = mlx5r_store_odp_mkey(dev, &mr->mmkey); + if (err) + goto err_dereg_mr; + + err = mlx5_ib_init_dmabuf_mr(mr); + if (err) + goto err_dereg_mr; + return &mr->ibmr; + +err_dereg_mr: + dereg_mr(dev, mr); + return ERR_PTR(err); +} + /** * mlx5_mr_cache_invalidate - Fence all DMA on the MR * @mr: The MR to fence @@ -1740,8 +1836,8 @@ struct ib_mr *mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start, return ERR_PTR(err); return NULL; } - /* DM or ODP MR's don't have a umem so we can't re-use it */ - if (!mr->umem || is_odp_mr(mr)) + /* DM or ODP MR's don't have a normal umem so we can't re-use it */ + if (!mr->umem || is_odp_mr(mr) || is_dmabuf_mr(mr)) goto recreate; /* @@ -1760,10 +1856,10 @@ struct ib_mr *mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start, } /* - * DM doesn't have a PAS list so we can't re-use it, odp does but the - * logic around releasing the umem is different + * DM doesn't have a PAS list so we can't re-use it, odp/dmabuf does + * but the logic around releasing the umem is different */ - if (!mr->umem || is_odp_mr(mr)) + if (!mr->umem || is_odp_mr(mr) || is_dmabuf_mr(mr)) goto recreate; if (!(new_access_flags & IB_ACCESS_ON_DEMAND) && @@ -1876,6 +1972,8 @@ static void dereg_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr) /* Stop all DMA */ if (is_odp_mr(mr)) mlx5_ib_fence_odp_mr(mr); + else if (is_dmabuf_mr(mr)) + mlx5_ib_fence_dmabuf_mr(mr); else clean_mr(dev, mr); @@ -2227,9 +2325,7 @@ int mlx5_ib_alloc_mw(struct ib_mw *ibmw, struct ib_udata *udata) } if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING)) { - err = xa_err(xa_store(&dev->odp_mkeys, - mlx5_base_mkey(mw->mmkey.key), &mw->mmkey, - GFP_KERNEL)); + err = mlx5r_store_odp_mkey(dev, &mw->mmkey); if (err) goto free_mkey; } @@ -2249,14 +2345,13 @@ int mlx5_ib_dealloc_mw(struct ib_mw *mw) struct mlx5_ib_dev *dev = to_mdev(mw->device); struct mlx5_ib_mw *mmw = to_mmw(mw); - if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING)) { - xa_erase(&dev->odp_mkeys, mlx5_base_mkey(mmw->mmkey.key)); + if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING) && + xa_erase(&dev->odp_mkeys, mlx5_base_mkey(mmw->mmkey.key))) /* - * pagefault_single_data_segment() may be accessing mmw under - * SRCU if the user bound an ODP MR to this MW. + * pagefault_single_data_segment() may be accessing mmw + * if the user bound an ODP MR to this MW. */ - synchronize_srcu(&dev->odp_srcu); - } + mlx5r_deref_wait_odp_mkey(&mmw->mmkey); return mlx5_core_destroy_mkey(dev->mdev, &mmw->mmkey); } diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c index aa2413b50adc..374698186662 100644 --- a/drivers/infiniband/hw/mlx5/odp.c +++ b/drivers/infiniband/hw/mlx5/odp.c @@ -33,6 +33,8 @@ #include #include #include +#include +#include #include "mlx5_ib.h" #include "cmd.h" @@ -113,7 +115,6 @@ static void populate_klm(struct mlx5_klm *pklm, size_t idx, size_t nentries, * xarray would be protected by the umem_mutex, however that is not * possible. Instead this uses a weaker update-then-lock pattern: * - * srcu_read_lock() * xa_store() * mutex_lock(umem_mutex) * mlx5_ib_update_xlt() @@ -124,12 +125,9 @@ static void populate_klm(struct mlx5_klm *pklm, size_t idx, size_t nentries, * before destroying. * * The umem_mutex provides the acquire/release semantic needed to make - * the xa_store() visible to a racing thread. While SRCU is not - * technically required, using it gives consistent use of the SRCU - * locking around the xarray. + * the xa_store() visible to a racing thread. */ lockdep_assert_held(&to_ib_umem_odp(imr->umem)->umem_mutex); - lockdep_assert_held(&mr_to_mdev(imr)->odp_srcu); for (; pklm != end; pklm++, idx++) { struct mlx5_ib_mr *mtt = xa_load(&imr->implicit_children, idx); @@ -205,8 +203,8 @@ static void dma_fence_odp_mr(struct mlx5_ib_mr *mr) } /* - * This must be called after the mr has been removed from implicit_children - * and the SRCU synchronized. NOTE: The MR does not necessarily have to be + * This must be called after the mr has been removed from implicit_children. + * NOTE: The MR does not necessarily have to be * empty here, parallel page faults could have raced with the free process and * added pages to it. */ @@ -216,19 +214,15 @@ static void free_implicit_child_mr(struct mlx5_ib_mr *mr, bool need_imr_xlt) struct ib_umem_odp *odp_imr = to_ib_umem_odp(imr->umem); struct ib_umem_odp *odp = to_ib_umem_odp(mr->umem); unsigned long idx = ib_umem_start(odp) >> MLX5_IMR_MTT_SHIFT; - int srcu_key; - /* implicit_child_mr's are not allowed to have deferred work */ - WARN_ON(atomic_read(&mr->num_deferred_work)); + mlx5r_deref_wait_odp_mkey(&mr->mmkey); if (need_imr_xlt) { - srcu_key = srcu_read_lock(&mr_to_mdev(mr)->odp_srcu); mutex_lock(&odp_imr->umem_mutex); mlx5_ib_update_xlt(mr->parent, idx, 1, 0, MLX5_IB_UPD_XLT_INDIRECT | MLX5_IB_UPD_XLT_ATOMIC); mutex_unlock(&odp_imr->umem_mutex); - srcu_read_unlock(&mr_to_mdev(mr)->odp_srcu, srcu_key); } dma_fence_odp_mr(mr); @@ -236,26 +230,16 @@ static void free_implicit_child_mr(struct mlx5_ib_mr *mr, bool need_imr_xlt) mr->parent = NULL; mlx5_mr_cache_free(mr_to_mdev(mr), mr); ib_umem_odp_release(odp); - if (atomic_dec_and_test(&imr->num_deferred_work)) - wake_up(&imr->q_deferred_work); } static void free_implicit_child_mr_work(struct work_struct *work) { struct mlx5_ib_mr *mr = container_of(work, struct mlx5_ib_mr, odp_destroy.work); + struct mlx5_ib_mr *imr = mr->parent; free_implicit_child_mr(mr, true); -} - -static void free_implicit_child_mr_rcu(struct rcu_head *head) -{ - struct mlx5_ib_mr *mr = - container_of(head, struct mlx5_ib_mr, odp_destroy.rcu); - - /* Freeing a MR is a sleeping operation, so bounce to a work queue */ - INIT_WORK(&mr->odp_destroy.work, free_implicit_child_mr_work); - queue_work(system_unbound_wq, &mr->odp_destroy.work); + mlx5r_deref_odp_mkey(&imr->mmkey); } static void destroy_unused_implicit_child_mr(struct mlx5_ib_mr *mr) @@ -264,21 +248,14 @@ static void destroy_unused_implicit_child_mr(struct mlx5_ib_mr *mr) unsigned long idx = ib_umem_start(odp) >> MLX5_IMR_MTT_SHIFT; struct mlx5_ib_mr *imr = mr->parent; - xa_lock(&imr->implicit_children); - /* - * This can race with mlx5_ib_free_implicit_mr(), the first one to - * reach the xa lock wins the race and destroys the MR. - */ - if (__xa_cmpxchg(&imr->implicit_children, idx, mr, NULL, GFP_ATOMIC) != - mr) - goto out_unlock; + if (!refcount_inc_not_zero(&imr->mmkey.usecount)) + return; - atomic_inc(&imr->num_deferred_work); - call_srcu(&mr_to_mdev(mr)->odp_srcu, &mr->odp_destroy.rcu, - free_implicit_child_mr_rcu); + xa_erase(&imr->implicit_children, idx); -out_unlock: - xa_unlock(&imr->implicit_children); + /* Freeing a MR is a sleeping operation, so bounce to a work queue */ + INIT_WORK(&mr->odp_destroy.work, free_implicit_child_mr_work); + queue_work(system_unbound_wq, &mr->odp_destroy.work); } static bool mlx5_ib_invalidate_range(struct mmu_interval_notifier *mni, @@ -490,6 +467,12 @@ static struct mlx5_ib_mr *implicit_get_child_mr(struct mlx5_ib_mr *imr, mr->parent = imr; odp->private = mr; + /* + * First refcount is owned by the xarray and second refconut + * is returned to the caller. + */ + refcount_set(&mr->mmkey.usecount, 2); + err = mlx5_ib_update_xlt(mr, 0, MLX5_IMR_MTT_ENTRIES, PAGE_SHIFT, @@ -500,27 +483,28 @@ static struct mlx5_ib_mr *implicit_get_child_mr(struct mlx5_ib_mr *imr, goto out_mr; } - /* - * Once the store to either xarray completes any error unwind has to - * use synchronize_srcu(). Avoid this with xa_reserve() - */ - ret = xa_cmpxchg(&imr->implicit_children, idx, NULL, mr, - GFP_KERNEL); + xa_lock(&imr->implicit_children); + ret = __xa_cmpxchg(&imr->implicit_children, idx, NULL, mr, + GFP_KERNEL); if (unlikely(ret)) { if (xa_is_err(ret)) { ret = ERR_PTR(xa_err(ret)); - goto out_mr; + goto out_lock; } /* * Another thread beat us to creating the child mr, use * theirs. */ - goto out_mr; + refcount_inc(&ret->mmkey.usecount); + goto out_lock; } + xa_unlock(&imr->implicit_children); mlx5_ib_dbg(mr_to_mdev(imr), "key %x mr %p\n", mr->mmkey.key, mr); return mr; +out_lock: + xa_unlock(&imr->implicit_children); out_mr: mlx5_mr_cache_free(mr_to_mdev(imr), mr); out_umem: @@ -559,8 +543,6 @@ struct mlx5_ib_mr *mlx5_ib_alloc_implicit_mr(struct mlx5_ib_pd *pd, imr->ibmr.device = &dev->ib_dev; imr->umem = &umem_odp->umem; imr->is_odp_implicit = true; - atomic_set(&imr->num_deferred_work, 0); - init_waitqueue_head(&imr->q_deferred_work); xa_init(&imr->implicit_children); err = mlx5_ib_update_xlt(imr, 0, @@ -572,8 +554,7 @@ struct mlx5_ib_mr *mlx5_ib_alloc_implicit_mr(struct mlx5_ib_pd *pd, if (err) goto out_mr; - err = xa_err(xa_store(&dev->odp_mkeys, mlx5_base_mkey(imr->mmkey.key), - &imr->mmkey, GFP_KERNEL)); + err = mlx5r_store_odp_mkey(dev, &imr->mmkey); if (err) goto out_mr; @@ -591,51 +572,24 @@ void mlx5_ib_free_implicit_mr(struct mlx5_ib_mr *imr) { struct ib_umem_odp *odp_imr = to_ib_umem_odp(imr->umem); struct mlx5_ib_dev *dev = mr_to_mdev(imr); - struct list_head destroy_list; struct mlx5_ib_mr *mtt; - struct mlx5_ib_mr *tmp; unsigned long idx; - INIT_LIST_HEAD(&destroy_list); - xa_erase(&dev->odp_mkeys, mlx5_base_mkey(imr->mmkey.key)); - /* - * This stops the SRCU protected page fault path from touching either - * the imr or any children. The page fault path can only reach the - * children xarray via the imr. - */ - synchronize_srcu(&dev->odp_srcu); - /* * All work on the prefetch list must be completed, xa_erase() prevented * new work from being created. */ - wait_event(imr->q_deferred_work, !atomic_read(&imr->num_deferred_work)); - + mlx5r_deref_wait_odp_mkey(&imr->mmkey); /* * At this point it is forbidden for any other thread to enter * pagefault_mr() on this imr. It is already forbidden to call * pagefault_mr() on an implicit child. Due to this additions to * implicit_children are prevented. + * In addition, any new call to destroy_unused_implicit_child_mr() + * may return immediately. */ - /* - * Block destroy_unused_implicit_child_mr() from incrementing - * num_deferred_work. - */ - xa_lock(&imr->implicit_children); - xa_for_each (&imr->implicit_children, idx, mtt) { - __xa_erase(&imr->implicit_children, idx); - list_add(&mtt->odp_destroy.elm, &destroy_list); - } - xa_unlock(&imr->implicit_children); - - /* - * Wait for any concurrent destroy_unused_implicit_child_mr() to - * complete. - */ - wait_event(imr->q_deferred_work, !atomic_read(&imr->num_deferred_work)); - /* * Fence the imr before we destroy the children. This allows us to * skip updating the XLT of the imr during destroy of the child mkey @@ -643,8 +597,10 @@ void mlx5_ib_free_implicit_mr(struct mlx5_ib_mr *imr) */ mlx5_mr_cache_invalidate(imr); - list_for_each_entry_safe (mtt, tmp, &destroy_list, odp_destroy.elm) + xa_for_each(&imr->implicit_children, idx, mtt) { + xa_erase(&imr->implicit_children, idx); free_implicit_child_mr(mtt, false); + } mlx5_mr_cache_free(dev, imr); ib_umem_odp_release(odp_imr); @@ -663,13 +619,39 @@ void mlx5_ib_fence_odp_mr(struct mlx5_ib_mr *mr) xa_erase(&mr_to_mdev(mr)->odp_mkeys, mlx5_base_mkey(mr->mmkey.key)); /* Wait for all running page-fault handlers to finish. */ - synchronize_srcu(&mr_to_mdev(mr)->odp_srcu); - - wait_event(mr->q_deferred_work, !atomic_read(&mr->num_deferred_work)); + mlx5r_deref_wait_odp_mkey(&mr->mmkey); dma_fence_odp_mr(mr); } +/** + * mlx5_ib_fence_dmabuf_mr - Stop all access to the dmabuf MR + * @mr: to fence + * + * On return no parallel threads will be touching this MR and no DMA will be + * active. + */ +void mlx5_ib_fence_dmabuf_mr(struct mlx5_ib_mr *mr) +{ + struct ib_umem_dmabuf *umem_dmabuf = to_ib_umem_dmabuf(mr->umem); + + /* Prevent new page faults and prefetch requests from succeeding */ + xa_erase(&mr_to_mdev(mr)->odp_mkeys, mlx5_base_mkey(mr->mmkey.key)); + + mlx5r_deref_wait_odp_mkey(&mr->mmkey); + + dma_resv_lock(umem_dmabuf->attach->dmabuf->resv, NULL); + mlx5_mr_cache_invalidate(mr); + umem_dmabuf->private = NULL; + ib_umem_dmabuf_unmap_pages(umem_dmabuf); + dma_resv_unlock(umem_dmabuf->attach->dmabuf->resv); + + if (!mr->cache_ent) { + mlx5_core_destroy_mkey(mr_to_mdev(mr)->mdev, &mr->mmkey); + WARN_ON(mr->descs); + } +} + #define MLX5_PF_FLAGS_DOWNGRADE BIT(1) #define MLX5_PF_FLAGS_SNAPSHOT BIT(2) #define MLX5_PF_FLAGS_ENABLE BIT(3) @@ -747,8 +729,10 @@ static int pagefault_implicit_mr(struct mlx5_ib_mr *imr, struct mlx5_ib_mr *mtt; u64 len; + xa_lock(&imr->implicit_children); mtt = xa_load(&imr->implicit_children, idx); if (unlikely(!mtt)) { + xa_unlock(&imr->implicit_children); mtt = implicit_get_child_mr(imr, idx); if (IS_ERR(mtt)) { ret = PTR_ERR(mtt); @@ -756,6 +740,9 @@ static int pagefault_implicit_mr(struct mlx5_ib_mr *imr, } upd_start_idx = min(upd_start_idx, idx); upd_len = idx - upd_start_idx + 1; + } else { + refcount_inc(&mtt->mmkey.usecount); + xa_unlock(&imr->implicit_children); } umem_odp = to_ib_umem_odp(mtt->umem); @@ -764,6 +751,9 @@ static int pagefault_implicit_mr(struct mlx5_ib_mr *imr, ret = pagefault_real_mr(mtt, umem_odp, user_va, len, bytes_mapped, flags); + + mlx5r_deref_odp_mkey(&mtt->mmkey); + if (ret < 0) goto out; user_va += len; @@ -803,6 +793,44 @@ static int pagefault_implicit_mr(struct mlx5_ib_mr *imr, return ret; } +static int pagefault_dmabuf_mr(struct mlx5_ib_mr *mr, size_t bcnt, + u32 *bytes_mapped, u32 flags) +{ + struct ib_umem_dmabuf *umem_dmabuf = to_ib_umem_dmabuf(mr->umem); + u32 xlt_flags = 0; + int err; + unsigned int page_size; + + if (flags & MLX5_PF_FLAGS_ENABLE) + xlt_flags |= MLX5_IB_UPD_XLT_ENABLE; + + dma_resv_lock(umem_dmabuf->attach->dmabuf->resv, NULL); + err = ib_umem_dmabuf_map_pages(umem_dmabuf); + if (err) { + dma_resv_unlock(umem_dmabuf->attach->dmabuf->resv); + return err; + } + + page_size = mlx5_umem_find_best_pgsz(&umem_dmabuf->umem, mkc, + log_page_size, 0, + umem_dmabuf->umem.iova); + if (unlikely(page_size < PAGE_SIZE)) { + ib_umem_dmabuf_unmap_pages(umem_dmabuf); + err = -EINVAL; + } else { + err = mlx5_ib_update_mr_pas(mr, xlt_flags); + } + dma_resv_unlock(umem_dmabuf->attach->dmabuf->resv); + + if (err) + return err; + + if (bytes_mapped) + *bytes_mapped += bcnt; + + return ib_umem_num_pages(mr->umem); +} + /* * Returns: * -EFAULT: The io_virt->bcnt is not within the MR, it covers pages that are @@ -817,10 +845,12 @@ static int pagefault_mr(struct mlx5_ib_mr *mr, u64 io_virt, size_t bcnt, { struct ib_umem_odp *odp = to_ib_umem_odp(mr->umem); - lockdep_assert_held(&mr_to_mdev(mr)->odp_srcu); if (unlikely(io_virt < mr->mmkey.iova)) return -EFAULT; + if (mr->umem->is_dmabuf) + return pagefault_dmabuf_mr(mr, bcnt, bytes_mapped, flags); + if (!odp->is_implicit_odp) { u64 user_va; @@ -847,6 +877,16 @@ int mlx5_ib_init_odp_mr(struct mlx5_ib_mr *mr) return ret >= 0 ? 0 : ret; } +int mlx5_ib_init_dmabuf_mr(struct mlx5_ib_mr *mr) +{ + int ret; + + ret = pagefault_dmabuf_mr(mr, mr->umem->length, NULL, + MLX5_PF_FLAGS_ENABLE); + + return ret >= 0 ? 0 : ret; +} + struct pf_frame { struct pf_frame *next; u32 key; @@ -896,7 +936,7 @@ static int pagefault_single_data_segment(struct mlx5_ib_dev *dev, u32 *bytes_committed, u32 *bytes_mapped) { - int npages = 0, srcu_key, ret, i, outlen, cur_outlen = 0, depth = 0; + int npages = 0, ret, i, outlen, cur_outlen = 0, depth = 0; struct pf_frame *head = NULL, *frame; struct mlx5_core_mkey *mmkey; struct mlx5_ib_mr *mr; @@ -905,14 +945,14 @@ static int pagefault_single_data_segment(struct mlx5_ib_dev *dev, size_t offset; int ndescs; - srcu_key = srcu_read_lock(&dev->odp_srcu); - io_virt += *bytes_committed; bcnt -= *bytes_committed; next_mr: + xa_lock(&dev->odp_mkeys); mmkey = xa_load(&dev->odp_mkeys, mlx5_base_mkey(key)); if (!mmkey) { + xa_unlock(&dev->odp_mkeys); mlx5_ib_dbg( dev, "skipping non ODP MR (lkey=0x%06x) in page fault handler.\n", @@ -925,12 +965,15 @@ static int pagefault_single_data_segment(struct mlx5_ib_dev *dev, * faulted. */ ret = 0; - goto srcu_unlock; + goto end; } + refcount_inc(&mmkey->usecount); + xa_unlock(&dev->odp_mkeys); + if (!mkey_is_eq(mmkey, key)) { mlx5_ib_dbg(dev, "failed to find mkey %x\n", key); ret = -EFAULT; - goto srcu_unlock; + goto end; } switch (mmkey->type) { @@ -939,7 +982,7 @@ static int pagefault_single_data_segment(struct mlx5_ib_dev *dev, ret = pagefault_mr(mr, io_virt, bcnt, bytes_mapped, 0); if (ret < 0) - goto srcu_unlock; + goto end; mlx5_update_odp_stats(mr, faults, ret); @@ -954,7 +997,7 @@ static int pagefault_single_data_segment(struct mlx5_ib_dev *dev, if (depth >= MLX5_CAP_GEN(dev->mdev, max_indirection)) { mlx5_ib_dbg(dev, "indirection level exceeded\n"); ret = -EFAULT; - goto srcu_unlock; + goto end; } outlen = MLX5_ST_SZ_BYTES(query_mkey_out) + @@ -965,7 +1008,7 @@ static int pagefault_single_data_segment(struct mlx5_ib_dev *dev, out = kzalloc(outlen, GFP_KERNEL); if (!out) { ret = -ENOMEM; - goto srcu_unlock; + goto end; } cur_outlen = outlen; } @@ -975,7 +1018,7 @@ static int pagefault_single_data_segment(struct mlx5_ib_dev *dev, ret = mlx5_core_query_mkey(dev->mdev, mmkey, out, outlen); if (ret) - goto srcu_unlock; + goto end; offset = io_virt - MLX5_GET64(query_mkey_out, out, memory_key_mkey_entry.start_addr); @@ -989,7 +1032,7 @@ static int pagefault_single_data_segment(struct mlx5_ib_dev *dev, frame = kzalloc(sizeof(*frame), GFP_KERNEL); if (!frame) { ret = -ENOMEM; - goto srcu_unlock; + goto end; } frame->key = be32_to_cpu(pklm->key); @@ -1008,7 +1051,7 @@ static int pagefault_single_data_segment(struct mlx5_ib_dev *dev, default: mlx5_ib_dbg(dev, "wrong mkey type %d\n", mmkey->type); ret = -EFAULT; - goto srcu_unlock; + goto end; } if (head) { @@ -1021,10 +1064,13 @@ static int pagefault_single_data_segment(struct mlx5_ib_dev *dev, depth = frame->depth; kfree(frame); + mlx5r_deref_odp_mkey(mmkey); goto next_mr; } -srcu_unlock: +end: + if (mmkey) + mlx5r_deref_odp_mkey(mmkey); while (head) { frame = head; head = frame->next; @@ -1032,7 +1078,6 @@ static int pagefault_single_data_segment(struct mlx5_ib_dev *dev, } kfree(out); - srcu_read_unlock(&dev->odp_srcu, srcu_key); *bytes_committed = 0; return ret ? ret : npages; } @@ -1040,16 +1085,18 @@ static int pagefault_single_data_segment(struct mlx5_ib_dev *dev, /** * Parse a series of data segments for page fault handling. * - * @pfault contains page fault information. - * @wqe points at the first data segment in the WQE. - * @wqe_end points after the end of the WQE. - * @bytes_mapped receives the number of bytes that the function was able to - * map. This allows the caller to decide intelligently whether - * enough memory was mapped to resolve the page fault - * successfully (e.g. enough for the next MTU, or the entire - * WQE). - * @total_wqe_bytes receives the total data size of this WQE in bytes (minus - * the committed bytes). + * @dev: Pointer to mlx5 IB device + * @pfault: contains page fault information. + * @wqe: points at the first data segment in the WQE. + * @wqe_end: points after the end of the WQE. + * @bytes_mapped: receives the number of bytes that the function was able to + * map. This allows the caller to decide intelligently whether + * enough memory was mapped to resolve the page fault + * successfully (e.g. enough for the next MTU, or the entire + * WQE). + * @total_wqe_bytes: receives the total data size of this WQE in bytes (minus + * the committed bytes). + * @receive_queue: receive WQE end of sg list * * Returns the number of pages loaded if positive, zero for an empty WQE, or a * negative error code. @@ -1738,8 +1785,8 @@ static void destroy_prefetch_work(struct prefetch_mr_work *work) u32 i; for (i = 0; i < work->num_sge; ++i) - if (atomic_dec_and_test(&work->frags[i].mr->num_deferred_work)) - wake_up(&work->frags[i].mr->q_deferred_work); + mlx5r_deref_odp_mkey(&work->frags[i].mr->mmkey); + kvfree(work); } @@ -1749,27 +1796,30 @@ get_prefetchable_mr(struct ib_pd *pd, enum ib_uverbs_advise_mr_advice advice, { struct mlx5_ib_dev *dev = to_mdev(pd->device); struct mlx5_core_mkey *mmkey; - struct ib_umem_odp *odp; - struct mlx5_ib_mr *mr; - - lockdep_assert_held(&dev->odp_srcu); + struct mlx5_ib_mr *mr = NULL; + xa_lock(&dev->odp_mkeys); mmkey = xa_load(&dev->odp_mkeys, mlx5_base_mkey(lkey)); if (!mmkey || mmkey->key != lkey || mmkey->type != MLX5_MKEY_MR) - return NULL; + goto end; mr = container_of(mmkey, struct mlx5_ib_mr, mmkey); - if (mr->ibmr.pd != pd) - return NULL; - - odp = to_ib_umem_odp(mr->umem); + if (mr->ibmr.pd != pd) { + mr = NULL; + goto end; + } /* prefetch with write-access must be supported by the MR */ if (advice == IB_UVERBS_ADVISE_MR_ADVICE_PREFETCH_WRITE && - !odp->umem.writable) - return NULL; + !mr->umem->writable) { + mr = NULL; + goto end; + } + refcount_inc(&mmkey->usecount); +end: + xa_unlock(&dev->odp_mkeys); return mr; } @@ -1777,17 +1827,12 @@ static void mlx5_ib_prefetch_mr_work(struct work_struct *w) { struct prefetch_mr_work *work = container_of(w, struct prefetch_mr_work, work); - struct mlx5_ib_dev *dev; u32 bytes_mapped = 0; - int srcu_key; int ret; u32 i; /* We rely on IB/core that work is executed if we have num_sge != 0 only. */ WARN_ON(!work->num_sge); - dev = mr_to_mdev(work->frags[0].mr); - /* SRCU should be held when calling to mlx5_odp_populate_xlt() */ - srcu_key = srcu_read_lock(&dev->odp_srcu); for (i = 0; i < work->num_sge; ++i) { ret = pagefault_mr(work->frags[i].mr, work->frags[i].io_virt, work->frags[i].length, &bytes_mapped, @@ -1796,7 +1841,6 @@ static void mlx5_ib_prefetch_mr_work(struct work_struct *w) continue; mlx5_update_odp_stats(work->frags[i].mr, prefetch, ret); } - srcu_read_unlock(&dev->odp_srcu, srcu_key); destroy_prefetch_work(work); } @@ -1820,9 +1864,6 @@ static bool init_prefetch_work(struct ib_pd *pd, work->num_sge = i; return false; } - - /* Keep the MR pointer will valid outside the SRCU */ - atomic_inc(&work->frags[i].mr->num_deferred_work); } work->num_sge = num_sge; return true; @@ -1833,42 +1874,35 @@ static int mlx5_ib_prefetch_sg_list(struct ib_pd *pd, u32 pf_flags, struct ib_sge *sg_list, u32 num_sge) { - struct mlx5_ib_dev *dev = to_mdev(pd->device); u32 bytes_mapped = 0; - int srcu_key; int ret = 0; u32 i; - srcu_key = srcu_read_lock(&dev->odp_srcu); for (i = 0; i < num_sge; ++i) { struct mlx5_ib_mr *mr; mr = get_prefetchable_mr(pd, advice, sg_list[i].lkey); - if (!mr) { - ret = -ENOENT; - goto out; - } + if (!mr) + return -ENOENT; ret = pagefault_mr(mr, sg_list[i].addr, sg_list[i].length, &bytes_mapped, pf_flags); - if (ret < 0) - goto out; + if (ret < 0) { + mlx5r_deref_odp_mkey(&mr->mmkey); + return ret; + } mlx5_update_odp_stats(mr, prefetch, ret); + mlx5r_deref_odp_mkey(&mr->mmkey); } - ret = 0; -out: - srcu_read_unlock(&dev->odp_srcu, srcu_key); - return ret; + return 0; } int mlx5_ib_advise_mr_prefetch(struct ib_pd *pd, enum ib_uverbs_advise_mr_advice advice, u32 flags, struct ib_sge *sg_list, u32 num_sge) { - struct mlx5_ib_dev *dev = to_mdev(pd->device); u32 pf_flags = 0; struct prefetch_mr_work *work; - int srcu_key; if (advice == IB_UVERBS_ADVISE_MR_ADVICE_PREFETCH) pf_flags |= MLX5_PF_FLAGS_DOWNGRADE; @@ -1884,13 +1918,10 @@ int mlx5_ib_advise_mr_prefetch(struct ib_pd *pd, if (!work) return -ENOMEM; - srcu_key = srcu_read_lock(&dev->odp_srcu); if (!init_prefetch_work(pd, advice, pf_flags, work, sg_list, num_sge)) { - srcu_read_unlock(&dev->odp_srcu, srcu_key); destroy_prefetch_work(work); return -EINVAL; } queue_work(system_unbound_wq, &work->work); - srcu_read_unlock(&dev->odp_srcu, srcu_key); return 0; } diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index 0cb7cc642d87..ec4b3f6a8222 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -1078,6 +1078,7 @@ static int _create_kernel_qp(struct mlx5_ib_dev *dev, qpc = MLX5_ADDR_OF(create_qp_in, *in, qpc); MLX5_SET(qpc, qpc, uar_page, uar_index); + MLX5_SET(qpc, qpc, ts_format, MLX5_QPC_TIMESTAMP_FORMAT_DEFAULT); MLX5_SET(qpc, qpc, log_page_size, qp->buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT); /* Set "fast registration enabled" for all kernel QPs */ @@ -1172,10 +1173,72 @@ static void destroy_flow_rule_vport_sq(struct mlx5_ib_sq *sq) sq->flow_rule = NULL; } +static int get_rq_ts_format(struct mlx5_ib_dev *dev, struct mlx5_ib_cq *send_cq) +{ + bool fr_supported = + MLX5_CAP_GEN(dev->mdev, rq_ts_format) == + MLX5_RQ_TIMESTAMP_FORMAT_CAP_FREE_RUNNING || + MLX5_CAP_GEN(dev->mdev, rq_ts_format) == + MLX5_RQ_TIMESTAMP_FORMAT_CAP_FREE_RUNNING_AND_REAL_TIME; + + if (send_cq->create_flags & IB_UVERBS_CQ_FLAGS_TIMESTAMP_COMPLETION) { + if (!fr_supported) { + mlx5_ib_dbg(dev, "Free running TS format is not supported\n"); + return -EOPNOTSUPP; + } + return MLX5_RQC_TIMESTAMP_FORMAT_FREE_RUNNING; + } + return MLX5_RQC_TIMESTAMP_FORMAT_DEFAULT; +} + +static int get_sq_ts_format(struct mlx5_ib_dev *dev, struct mlx5_ib_cq *send_cq) +{ + bool fr_supported = + MLX5_CAP_GEN(dev->mdev, sq_ts_format) == + MLX5_SQ_TIMESTAMP_FORMAT_CAP_FREE_RUNNING || + MLX5_CAP_GEN(dev->mdev, sq_ts_format) == + MLX5_SQ_TIMESTAMP_FORMAT_CAP_FREE_RUNNING_AND_REAL_TIME; + + if (send_cq->create_flags & IB_UVERBS_CQ_FLAGS_TIMESTAMP_COMPLETION) { + if (!fr_supported) { + mlx5_ib_dbg(dev, "Free running TS format is not supported\n"); + return -EOPNOTSUPP; + } + return MLX5_SQC_TIMESTAMP_FORMAT_FREE_RUNNING; + } + return MLX5_SQC_TIMESTAMP_FORMAT_DEFAULT; +} + +static int get_qp_ts_format(struct mlx5_ib_dev *dev, struct mlx5_ib_cq *send_cq, + struct mlx5_ib_cq *recv_cq) +{ + bool fr_supported = + MLX5_CAP_ROCE(dev->mdev, qp_ts_format) == + MLX5_QP_TIMESTAMP_FORMAT_CAP_FREE_RUNNING || + MLX5_CAP_ROCE(dev->mdev, qp_ts_format) == + MLX5_QP_TIMESTAMP_FORMAT_CAP_FREE_RUNNING_AND_REAL_TIME; + int ts_format = MLX5_QPC_TIMESTAMP_FORMAT_DEFAULT; + + if (recv_cq && + recv_cq->create_flags & IB_UVERBS_CQ_FLAGS_TIMESTAMP_COMPLETION) + ts_format = MLX5_QPC_TIMESTAMP_FORMAT_FREE_RUNNING; + + if (send_cq && + send_cq->create_flags & IB_UVERBS_CQ_FLAGS_TIMESTAMP_COMPLETION) + ts_format = MLX5_QPC_TIMESTAMP_FORMAT_FREE_RUNNING; + + if (ts_format == MLX5_QPC_TIMESTAMP_FORMAT_FREE_RUNNING && + !fr_supported) { + mlx5_ib_dbg(dev, "Free running TS format is not supported\n"); + return -EOPNOTSUPP; + } + return ts_format; +} + static int create_raw_packet_qp_sq(struct mlx5_ib_dev *dev, struct ib_udata *udata, struct mlx5_ib_sq *sq, void *qpin, - struct ib_pd *pd) + struct ib_pd *pd, struct mlx5_ib_cq *cq) { struct mlx5_ib_ubuffer *ubuffer = &sq->ubuffer; __be64 *pas; @@ -1187,6 +1250,11 @@ static int create_raw_packet_qp_sq(struct mlx5_ib_dev *dev, int err; unsigned int page_offset_quantized; unsigned long page_size; + int ts_format; + + ts_format = get_sq_ts_format(dev, cq); + if (ts_format < 0) + return ts_format; sq->ubuffer.umem = ib_umem_get(&dev->ib_dev, ubuffer->buf_addr, ubuffer->buf_size, 0); @@ -1215,6 +1283,7 @@ static int create_raw_packet_qp_sq(struct mlx5_ib_dev *dev, if (MLX5_CAP_ETH(dev->mdev, multi_pkt_send_wqe)) MLX5_SET(sqc, sqc, allow_multi_pkt_send_wqe, 1); MLX5_SET(sqc, sqc, state, MLX5_SQC_STATE_RST); + MLX5_SET(sqc, sqc, ts_format, ts_format); MLX5_SET(sqc, sqc, user_index, MLX5_GET(qpc, qpc, user_index)); MLX5_SET(sqc, sqc, cqn, MLX5_GET(qpc, qpc, cqn_snd)); MLX5_SET(sqc, sqc, tis_lst_sz, 1); @@ -1263,7 +1332,7 @@ static void destroy_raw_packet_qp_sq(struct mlx5_ib_dev *dev, static int create_raw_packet_qp_rq(struct mlx5_ib_dev *dev, struct mlx5_ib_rq *rq, void *qpin, - struct ib_pd *pd) + struct ib_pd *pd, struct mlx5_ib_cq *cq) { struct mlx5_ib_qp *mqp = rq->base.container_mibqp; __be64 *pas; @@ -1274,9 +1343,14 @@ static int create_raw_packet_qp_rq(struct mlx5_ib_dev *dev, struct ib_umem *umem = rq->base.ubuffer.umem; unsigned int page_offset_quantized; unsigned long page_size = 0; + int ts_format; size_t inlen; int err; + ts_format = get_rq_ts_format(dev, cq); + if (ts_format < 0) + return ts_format; + page_size = mlx5_umem_find_best_quantized_pgoff(umem, wq, log_wq_pg_sz, MLX5_ADAPTER_PAGE_SHIFT, page_offset, 64, @@ -1296,6 +1370,7 @@ static int create_raw_packet_qp_rq(struct mlx5_ib_dev *dev, MLX5_SET(rqc, rqc, vsd, 1); MLX5_SET(rqc, rqc, mem_rq_type, MLX5_RQC_MEM_RQ_TYPE_MEMORY_RQ_INLINE); MLX5_SET(rqc, rqc, state, MLX5_RQC_STATE_RST); + MLX5_SET(rqc, rqc, ts_format, ts_format); MLX5_SET(rqc, rqc, flush_in_error_en, 1); MLX5_SET(rqc, rqc, user_index, MLX5_GET(qpc, qpc, user_index)); MLX5_SET(rqc, rqc, cqn, MLX5_GET(qpc, qpc, cqn_rcv)); @@ -1393,10 +1468,10 @@ static int create_raw_packet_qp_tir(struct mlx5_ib_dev *dev, } static int create_raw_packet_qp(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, - u32 *in, size_t inlen, - struct ib_pd *pd, + u32 *in, size_t inlen, struct ib_pd *pd, struct ib_udata *udata, - struct mlx5_ib_create_qp_resp *resp) + struct mlx5_ib_create_qp_resp *resp, + struct ib_qp_init_attr *init_attr) { struct mlx5_ib_raw_packet_qp *raw_packet_qp = &qp->raw_packet_qp; struct mlx5_ib_sq *sq = &raw_packet_qp->sq; @@ -1415,7 +1490,8 @@ static int create_raw_packet_qp(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, if (err) return err; - err = create_raw_packet_qp_sq(dev, udata, sq, in, pd); + err = create_raw_packet_qp_sq(dev, udata, sq, in, pd, + to_mcq(init_attr->send_cq)); if (err) goto err_destroy_tis; @@ -1437,7 +1513,8 @@ static int create_raw_packet_qp(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, rq->flags |= MLX5_IB_RQ_CVLAN_STRIPPING; if (qp->flags & IB_QP_CREATE_PCI_WRITE_END_PADDING) rq->flags |= MLX5_IB_RQ_PCI_WRITE_END_PADDING; - err = create_raw_packet_qp_rq(dev, rq, in, pd); + err = create_raw_packet_qp_rq(dev, rq, in, pd, + to_mcq(init_attr->recv_cq)); if (err) goto err_destroy_sq; @@ -1907,6 +1984,7 @@ static int create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd, struct mlx5_ib_cq *recv_cq; unsigned long flags; struct mlx5_ib_qp_base *base; + int ts_format; int mlx5_st; void *qpc; u32 *in; @@ -1944,6 +2022,13 @@ static int create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd, if (ucmd->sq_wqe_count > (1 << MLX5_CAP_GEN(mdev, log_max_qp_sz))) return -EINVAL; + if (init_attr->qp_type != IB_QPT_RAW_PACKET) { + ts_format = get_qp_ts_format(dev, to_mcq(init_attr->send_cq), + to_mcq(init_attr->recv_cq)); + if (ts_format < 0) + return ts_format; + } + err = _create_user_qp(dev, pd, qp, udata, init_attr, &in, ¶ms->resp, &inlen, base, ucmd); if (err) @@ -1992,6 +2077,9 @@ static int create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd, MLX5_SET(qpc, qpc, log_rq_size, ilog2(qp->rq.wqe_cnt)); } + if (init_attr->qp_type != IB_QPT_RAW_PACKET) + MLX5_SET(qpc, qpc, ts_format, ts_format); + MLX5_SET(qpc, qpc, rq_type, get_rx_type(qp, init_attr)); if (qp->sq.wqe_cnt) { @@ -2046,7 +2134,7 @@ static int create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd, qp->raw_packet_qp.sq.ubuffer.buf_addr = ucmd->sq_buf_addr; raw_packet_qp_copy_info(qp, &qp->raw_packet_qp); err = create_raw_packet_qp(dev, qp, in, inlen, pd, udata, - ¶ms->resp); + ¶ms->resp, init_attr); } else err = mlx5_qpc_create_qp(dev, &base->mqp, in, inlen, out); @@ -2432,9 +2520,6 @@ static int check_qp_type(struct mlx5_ib_dev *dev, struct ib_qp_init_attr *attr, case MLX5_IB_QPT_HW_GSI: case IB_QPT_DRIVER: case IB_QPT_GSI: - if (dev->profile == &raw_eth_profile) - goto out; - fallthrough; case IB_QPT_RAW_PACKET: case IB_QPT_UD: case MLX5_IB_QPT_REG_UMR: @@ -2629,10 +2714,6 @@ static int process_create_flags(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, int create_flags = attr->create_flags; bool cond; - if (qp->type == IB_QPT_UD && dev->profile == &raw_eth_profile) - if (create_flags & ~MLX5_IB_QP_CREATE_WC_TEST) - return -EINVAL; - if (qp_type == MLX5_IB_QPT_DCT) return (create_flags) ? -EINVAL : 0; @@ -3076,6 +3157,8 @@ static int ib_to_mlx5_rate_map(u8 rate) return 4; case IB_RATE_50_GBPS: return 5; + case IB_RATE_400_GBPS: + return 6; default: return rate + MLX5_STAT_RATE_OFFSET; } @@ -3183,11 +3266,13 @@ static int mlx5_set_path(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, alt ? attr->alt_pkey_index : attr->pkey_index); if (ah_flags & IB_AH_GRH) { - if (grh->sgid_index >= - dev->mdev->port_caps[port - 1].gid_table_len) { + const struct ib_port_immutable *immutable; + + immutable = ib_port_immutable_read(&dev->ib_dev, port); + if (grh->sgid_index >= immutable->gid_tbl_len) { pr_err("sgid_index (%u) too large. max is %d\n", grh->sgid_index, - dev->mdev->port_caps[port - 1].gid_table_len); + immutable->gid_tbl_len); return -EINVAL; } } @@ -4211,6 +4296,23 @@ static int mlx5_ib_modify_dct(struct ib_qp *ibqp, struct ib_qp_attr *attr, return 0; } +static bool mlx5_ib_modify_qp_allowed(struct mlx5_ib_dev *dev, + struct mlx5_ib_qp *qp, + enum ib_qp_type qp_type) +{ + if (dev->profile != &raw_eth_profile) + return true; + + if (qp_type == IB_QPT_RAW_PACKET || qp_type == MLX5_IB_QPT_REG_UMR) + return true; + + /* Internal QP used for wc testing, with NOPs in wq */ + if (qp->flags & MLX5_IB_QP_CREATE_WC_TEST) + return true; + + return false; +} + int mlx5_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask, struct ib_udata *udata) { @@ -4221,7 +4323,9 @@ int mlx5_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, enum ib_qp_type qp_type; enum ib_qp_state cur_state, new_state; int err = -EINVAL; - int port; + + if (!mlx5_ib_modify_qp_allowed(dev, qp, ibqp->qp_type)) + return -EOPNOTSUPP; if (attr_mask & ~(IB_QP_ATTR_STANDARD_BITS | IB_QP_RATE_LIMIT)) return -EOPNOTSUPP; @@ -4263,10 +4367,6 @@ int mlx5_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, cur_state = attr_mask & IB_QP_CUR_STATE ? attr->cur_qp_state : qp->state; new_state = attr_mask & IB_QP_STATE ? attr->qp_state : cur_state; - if (!(cur_state == new_state && cur_state == IB_QPS_RESET)) { - port = attr_mask & IB_QP_PORT ? attr->port_num : qp->port; - } - if (qp->flags & IB_QP_CREATE_SOURCE_QPN) { if (attr_mask & ~(IB_QP_STATE | IB_QP_CUR_STATE)) { mlx5_ib_dbg(dev, "invalid attr_mask 0x%x when underlay QP is used\n", @@ -4295,14 +4395,10 @@ int mlx5_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, goto out; } - if (attr_mask & IB_QP_PKEY_INDEX) { - port = attr_mask & IB_QP_PORT ? attr->port_num : qp->port; - if (attr->pkey_index >= - dev->mdev->port_caps[port - 1].pkey_table_len) { - mlx5_ib_dbg(dev, "invalid pkey index %d\n", - attr->pkey_index); - goto out; - } + if ((attr_mask & IB_QP_PKEY_INDEX) && + attr->pkey_index >= dev->pkey_table_len) { + mlx5_ib_dbg(dev, "invalid pkey index %d\n", attr->pkey_index); + goto out; } if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC && @@ -5376,7 +5472,7 @@ void mlx5_ib_drain_rq(struct ib_qp *qp) handle_drain_completion(cq, &rdrain, dev); } -/** +/* * Bind a qp to a counter. If @counter is NULL then bind the qp to * the default counter */ diff --git a/drivers/infiniband/hw/mlx5/wr.c b/drivers/infiniband/hw/mlx5/wr.c index d6038fb6c50c..cf2852cba45c 100644 --- a/drivers/infiniband/hw/mlx5/wr.c +++ b/drivers/infiniband/hw/mlx5/wr.c @@ -1369,7 +1369,7 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr, handle_qpt_uc(wr, &seg, &size); break; case IB_QPT_SMI: - if (unlikely(!mdev->port_caps[qp->port - 1].has_smi)) { + if (unlikely(!dev->port_caps[qp->port - 1].has_smi)) { mlx5_ib_warn(dev, "Send SMP MADs is not allowed\n"); err = -EPERM; *bad_wr = wr; diff --git a/drivers/infiniband/hw/qedr/qedr.h b/drivers/infiniband/hw/qedr/qedr.h index 9dde70373a55..3cb4febaad0f 100644 --- a/drivers/infiniband/hw/qedr/qedr.h +++ b/drivers/infiniband/hw/qedr/qedr.h @@ -617,18 +617,18 @@ static inline bool qedr_qp_has_srq(struct qedr_qp *qp) static inline bool qedr_qp_has_sq(struct qedr_qp *qp) { if (qp->qp_type == IB_QPT_GSI || qp->qp_type == IB_QPT_XRC_TGT) - return 0; + return false; - return 1; + return true; } static inline bool qedr_qp_has_rq(struct qedr_qp *qp) { if (qp->qp_type == IB_QPT_GSI || qp->qp_type == IB_QPT_XRC_INI || qp->qp_type == IB_QPT_XRC_TGT || qedr_qp_has_srq(qp)) - return 0; + return false; - return 1; + return true; } static inline struct qedr_user_mmap_entry * diff --git a/drivers/infiniband/hw/qedr/qedr_roce_cm.c b/drivers/infiniband/hw/qedr/qedr_roce_cm.c index f5542d703ef9..13e5e6bbec99 100644 --- a/drivers/infiniband/hw/qedr/qedr_roce_cm.c +++ b/drivers/infiniband/hw/qedr/qedr_roce_cm.c @@ -586,8 +586,8 @@ int qedr_gsi_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr, qp->wqe_wr_id[qp->sq.prod].wr_id = wr->wr_id; qedr_inc_sw_prod(&qp->sq); DP_DEBUG(qp->dev, QEDR_MSG_GSI, - "gsi post send: opcode=%d, in_irq=%ld, irqs_disabled=%d, wr_id=%llx\n", - wr->opcode, in_irq(), irqs_disabled(), wr->wr_id); + "gsi post send: opcode=%d, wr_id=%llx\n", wr->opcode, + wr->wr_id); } else { DP_ERR(dev, "gsi post send: failed to transmit (rc=%d)\n", rc); rc = -EAGAIN; diff --git a/drivers/infiniband/hw/qib/qib_driver.c b/drivers/infiniband/hw/qib/qib_driver.c index 92eeea5679e2..84fc4dcc5399 100644 --- a/drivers/infiniband/hw/qib/qib_driver.c +++ b/drivers/infiniband/hw/qib/qib_driver.c @@ -151,7 +151,7 @@ int qib_count_units(int *npresentp, int *nupp) /** * qib_wait_linkstate - wait for an IB link state change to occur - * @dd: the qlogic_ib device + * @ppd: the qlogic_ib device * @state: the state to wait for * @msecs: the number of milliseconds to wait * diff --git a/drivers/infiniband/hw/qib/qib_eeprom.c b/drivers/infiniband/hw/qib/qib_eeprom.c index 5838b3bf34b9..bf660c001b6d 100644 --- a/drivers/infiniband/hw/qib/qib_eeprom.c +++ b/drivers/infiniband/hw/qib/qib_eeprom.c @@ -47,7 +47,7 @@ * qib_eeprom_read - receives bytes from the eeprom via I2C * @dd: the qlogic_ib device * @eeprom_offset: address to read from - * @buffer: where to store result + * @buff: where to store result * @len: number of bytes to receive */ int qib_eeprom_read(struct qib_devdata *dd, u8 eeprom_offset, @@ -94,7 +94,7 @@ static int eeprom_write_with_enable(struct qib_devdata *dd, u8 offset, * qib_eeprom_write - writes data to the eeprom via I2C * @dd: the qlogic_ib device * @eeprom_offset: where to place data - * @buffer: data to write + * @buff: data to write * @len: number of bytes to write */ int qib_eeprom_write(struct qib_devdata *dd, u8 eeprom_offset, diff --git a/drivers/infiniband/hw/qib/qib_iba6120.c b/drivers/infiniband/hw/qib/qib_iba6120.c index 44150be215bf..b35e1174be22 100644 --- a/drivers/infiniband/hw/qib/qib_iba6120.c +++ b/drivers/infiniband/hw/qib/qib_iba6120.c @@ -1223,7 +1223,7 @@ static void qib_set_ib_6120_lstate(struct qib_pportdata *ppd, u16 linkcmd, /** * qib_6120_bringup_serdes - bring up the serdes - * @dd: the qlogic_ib device + * @ppd: the qlogic_ib device */ static int qib_6120_bringup_serdes(struct qib_pportdata *ppd) { @@ -1412,7 +1412,7 @@ static void qib_6120_quiet_serdes(struct qib_pportdata *ppd) /** * qib_6120_setup_setextled - set the state of the two external LEDs - * @dd: the qlogic_ib device + * @ppd: the qlogic_ib device * @on: whether the link is up or not * * The exact combo of LEDs if on is true is determined by looking @@ -1823,7 +1823,7 @@ static int qib_6120_setup_reset(struct qib_devdata *dd) * qib_6120_put_tid - write a TID in chip * @dd: the qlogic_ib device * @tidptr: pointer to the expected TID (in chip) to update - * @tidtype: RCVHQ_RCV_TYPE_EAGER (1) for eager, RCVHQ_RCV_TYPE_EXPECTED (0) + * @type: RCVHQ_RCV_TYPE_EAGER (1) for eager, RCVHQ_RCV_TYPE_EXPECTED (0) * for expected * @pa: physical address of in memory buffer; tidinvalid if freeing * @@ -1890,7 +1890,7 @@ static void qib_6120_put_tid(struct qib_devdata *dd, u64 __iomem *tidptr, * qib_6120_put_tid_2 - write a TID in chip, Revision 2 or higher * @dd: the qlogic_ib device * @tidptr: pointer to the expected TID (in chip) to update - * @tidtype: RCVHQ_RCV_TYPE_EAGER (1) for eager, RCVHQ_RCV_TYPE_EXPECTED (0) + * @type: RCVHQ_RCV_TYPE_EAGER (1) for eager, RCVHQ_RCV_TYPE_EXPECTED (0) * for expected * @pa: physical address of in memory buffer; tidinvalid if freeing * @@ -1932,7 +1932,7 @@ static void qib_6120_put_tid_2(struct qib_devdata *dd, u64 __iomem *tidptr, /** * qib_6120_clear_tids - clear all TID entries for a context, expected and eager * @dd: the qlogic_ib device - * @ctxt: the context + * @rcd: the context * * clear all TID entries for a context, expected and eager. * Used from qib_close(). On this chip, TIDs are only 32 bits, @@ -2008,7 +2008,7 @@ int __attribute__((weak)) qib_unordered_wc(void) /** * qib_6120_get_base_info - set chip-specific flags for user code * @rcd: the qlogic_ib ctxt - * @kbase: qib_base_info pointer + * @kinfo: qib_base_info pointer * * We set the PCIE flag because the lower bandwidth on PCIe vs * HyperTransport can affect some user packet algorithms. @@ -2270,8 +2270,8 @@ static void sendctrl_6120_mod(struct qib_pportdata *ppd, u32 op) /** * qib_portcntr_6120 - read a per-port counter - * @dd: the qlogic_ib device - * @creg: the counter to snapshot + * @ppd: the qlogic_ib device + * @reg: the counter to snapshot */ static u64 qib_portcntr_6120(struct qib_pportdata *ppd, u32 reg) { @@ -2610,7 +2610,7 @@ static void qib_chk_6120_errormask(struct qib_devdata *dd) /** * qib_get_faststats - get word counters from chip before they overflow - * @opaque - contains a pointer to the qlogic_ib device qib_devdata + * @t: contains a pointer to the qlogic_ib device qib_devdata * * This needs more work; in particular, decision on whether we really * need traffic_wds done the way it is diff --git a/drivers/infiniband/hw/qib/qib_iba7220.c b/drivers/infiniband/hw/qib/qib_iba7220.c index 0a6f26d4cb31..229dcd6ead95 100644 --- a/drivers/infiniband/hw/qib/qib_iba7220.c +++ b/drivers/infiniband/hw/qib/qib_iba7220.c @@ -1701,7 +1701,7 @@ static void qib_7220_quiet_serdes(struct qib_pportdata *ppd) /** * qib_setup_7220_setextled - set the state of the two external LEDs - * @dd: the qlogic_ib device + * @ppd: the qlogic_ib device * @on: whether the link is up or not * * The exact combo of LEDs if on is true is determined by looking @@ -2146,7 +2146,7 @@ static int qib_setup_7220_reset(struct qib_devdata *dd) * qib_7220_put_tid - write a TID to the chip * @dd: the qlogic_ib device * @tidptr: pointer to the expected TID (in chip) to update - * @tidtype: 0 for eager, 1 for expected + * @type: 0 for eager, 1 for expected * @pa: physical address of in memory buffer; tidinvalid if freeing */ static void qib_7220_put_tid(struct qib_devdata *dd, u64 __iomem *tidptr, @@ -2180,7 +2180,7 @@ static void qib_7220_put_tid(struct qib_devdata *dd, u64 __iomem *tidptr, /** * qib_7220_clear_tids - clear all TID entries for a ctxt, expected and eager * @dd: the qlogic_ib device - * @ctxt: the ctxt + * @rcd: the ctxt * * clear all TID entries for a ctxt, expected and eager. * Used from qib_close(). On this chip, TIDs are only 32 bits, @@ -2238,7 +2238,7 @@ static void qib_7220_tidtemplate(struct qib_devdata *dd) /** * qib_init_7220_get_base_info - set chip-specific flags for user code * @rcd: the qlogic_ib ctxt - * @kbase: qib_base_info pointer + * @kinfo: qib_base_info pointer * * We set the PCIE flag because the lower bandwidth on PCIe vs * HyperTransport can affect some user packet algorithims. @@ -2896,8 +2896,8 @@ static void sendctrl_7220_mod(struct qib_pportdata *ppd, u32 op) /** * qib_portcntr_7220 - read a per-port counter - * @dd: the qlogic_ib device - * @creg: the counter to snapshot + * @ppd: the qlogic_ib device + * @reg: the counter to snapshot */ static u64 qib_portcntr_7220(struct qib_pportdata *ppd, u32 reg) { @@ -3232,7 +3232,7 @@ static u32 qib_read_7220portcntrs(struct qib_devdata *dd, loff_t pos, u32 port, /** * qib_get_7220_faststats - get word counters from chip before they overflow - * @opaque - contains a pointer to the qlogic_ib device qib_devdata + * @t: contains a pointer to the qlogic_ib device qib_devdata * * This needs more work; in particular, decision on whether we really * need traffic_wds done the way it is @@ -4468,7 +4468,7 @@ static int qib_7220_eeprom_wen(struct qib_devdata *dd, int wen) /** * qib_init_iba7220_funcs - set up the chip-specific function pointers - * @dev: the pci_dev for qlogic_ib device + * @pdev: the pci_dev for qlogic_ib device * @ent: pci_device_id struct for this dev * * This is global, and is called directly at init to set up the diff --git a/drivers/infiniband/hw/qib/qib_iba7322.c b/drivers/infiniband/hw/qib/qib_iba7322.c index 189a0ce6056a..9fe6ea75b45e 100644 --- a/drivers/infiniband/hw/qib/qib_iba7322.c +++ b/drivers/infiniband/hw/qib/qib_iba7322.c @@ -2514,7 +2514,7 @@ static int qib_7322_bringup_serdes(struct qib_pportdata *ppd) /** * qib_7322_quiet_serdes - set serdes to txidle - * @dd: the qlogic_ib device + * @ppd: the qlogic_ib device * Called when driver is being unloaded */ static void qib_7322_mini_quiet_serdes(struct qib_pportdata *ppd) @@ -3760,7 +3760,7 @@ static int qib_do_7322_reset(struct qib_devdata *dd) * qib_7322_put_tid - write a TID to the chip * @dd: the qlogic_ib device * @tidptr: pointer to the expected TID (in chip) to update - * @tidtype: 0 for eager, 1 for expected + * @type: 0 for eager, 1 for expected * @pa: physical address of in memory buffer; tidinvalid if freeing */ static void qib_7322_put_tid(struct qib_devdata *dd, u64 __iomem *tidptr, @@ -3796,7 +3796,7 @@ static void qib_7322_put_tid(struct qib_devdata *dd, u64 __iomem *tidptr, /** * qib_7322_clear_tids - clear all TID entries for a ctxt, expected and eager * @dd: the qlogic_ib device - * @ctxt: the ctxt + * @rcd: the ctxt * * clear all TID entries for a ctxt, expected and eager. * Used from qib_close(). @@ -3861,7 +3861,7 @@ static void qib_7322_tidtemplate(struct qib_devdata *dd) /** * qib_init_7322_get_base_info - set chip-specific flags for user code * @rcd: the qlogic_ib ctxt - * @kbase: qib_base_info pointer + * @kinfo: qib_base_info pointer * * We set the PCIE flag because the lower bandwidth on PCIe vs * HyperTransport can affect some user packet algorithims. @@ -4724,7 +4724,7 @@ static void sendctrl_7322_mod(struct qib_pportdata *ppd, u32 op) /** * qib_portcntr_7322 - read a per-port chip counter * @ppd: the qlogic_ib pport - * @creg: the counter to read (not a chip offset) + * @reg: the counter to read (not a chip offset) */ static u64 qib_portcntr_7322(struct qib_pportdata *ppd, u32 reg) { @@ -5096,7 +5096,7 @@ static u32 qib_read_7322portcntrs(struct qib_devdata *dd, loff_t pos, u32 port, /** * qib_get_7322_faststats - get word counters from chip before they overflow - * @opaque - contains a pointer to the qlogic_ib device qib_devdata + * @t: contains a pointer to the qlogic_ib device qib_devdata * * VESTIGIAL IBA7322 has no "small fast counters", so the only * real purpose of this function is to maintain the notion of @@ -7175,7 +7175,7 @@ static int qib_7322_tempsense_rd(struct qib_devdata *dd, int regnum) /** * qib_init_iba7322_funcs - set up the chip-specific function pointers - * @dev: the pci_dev for qlogic_ib device + * @pdev: the pci_dev for qlogic_ib device * @ent: pci_device_id struct for this dev * * Also allocates, inits, and returns the devdata struct for this diff --git a/drivers/infiniband/hw/qib/qib_intr.c b/drivers/infiniband/hw/qib/qib_intr.c index 65c3b964ad1b..85c3187d796d 100644 --- a/drivers/infiniband/hw/qib/qib_intr.c +++ b/drivers/infiniband/hw/qib/qib_intr.c @@ -40,9 +40,9 @@ /** * qib_format_hwmsg - format a single hwerror message - * @msg message buffer - * @msgl length of message buffer - * @hwmsg message to add to message buffer + * @msg: message buffer + * @msgl: length of message buffer + * @hwmsg: message to add to message buffer */ static void qib_format_hwmsg(char *msg, size_t msgl, const char *hwmsg) { @@ -53,11 +53,11 @@ static void qib_format_hwmsg(char *msg, size_t msgl, const char *hwmsg) /** * qib_format_hwerrors - format hardware error messages for display - * @hwerrs hardware errors bit vector - * @hwerrmsgs hardware error descriptions - * @nhwerrmsgs number of hwerrmsgs - * @msg message buffer - * @msgl message buffer length + * @hwerrs: hardware errors bit vector + * @hwerrmsgs: hardware error descriptions + * @nhwerrmsgs: number of hwerrmsgs + * @msg: message buffer + * @msgl: message buffer length */ void qib_format_hwerrors(u64 hwerrs, const struct qib_hwerror_msgs *hwerrmsgs, size_t nhwerrmsgs, char *msg, size_t msgl) diff --git a/drivers/infiniband/hw/qib/qib_mad.c b/drivers/infiniband/hw/qib/qib_mad.c index f83e331977f8..44e2f813024a 100644 --- a/drivers/infiniband/hw/qib/qib_mad.c +++ b/drivers/infiniband/hw/qib/qib_mad.c @@ -886,7 +886,7 @@ static int subn_set_portinfo(struct ib_smp *smp, struct ib_device *ibdev, /** * rm_pkey - decrecment the reference count for the given PKEY - * @dd: the qlogic_ib device + * @ppd: the qlogic_ib device * @key: the PKEY index * * Return true if this was the last reference and the hardware table entry @@ -916,7 +916,7 @@ static int rm_pkey(struct qib_pportdata *ppd, u16 key) /** * add_pkey - add the given PKEY to the hardware table - * @dd: the qlogic_ib device + * @ppd: the qlogic_ib device * @key: the PKEY * * Return an error code if unable to add the entry, zero if no change, @@ -2346,8 +2346,10 @@ static int process_cc(struct ib_device *ibdev, int mad_flags, * @port: the port number this packet came in on * @in_wc: the work completion entry for this packet * @in_grh: the global route header for this packet - * @in_mad: the incoming MAD - * @out_mad: any outgoing MAD reply + * @in: the incoming MAD + * @out: any outgoing MAD reply + * @out_mad_size: size of the outgoing MAD reply + * @out_mad_pkey_index: unused * * Returns IB_MAD_RESULT_SUCCESS if this is a MAD that we are not * interested in processing. diff --git a/drivers/infiniband/hw/qib/qib_pcie.c b/drivers/infiniband/hw/qib/qib_pcie.c index 2e07b3749b88..cb2a02d671e2 100644 --- a/drivers/infiniband/hw/qib/qib_pcie.c +++ b/drivers/infiniband/hw/qib/qib_pcie.c @@ -181,7 +181,7 @@ void qib_pcie_ddcleanup(struct qib_devdata *dd) pci_set_drvdata(dd->pcidev, NULL); } -/** +/* * We save the msi lo and hi values, so we can restore them after * chip reset (the kernel PCI infrastructure doesn't yet handle that * correctly. diff --git a/drivers/infiniband/hw/qib/qib_qp.c b/drivers/infiniband/hw/qib/qib_qp.c index 8d0563ef5be1..ca39a029e4af 100644 --- a/drivers/infiniband/hw/qib/qib_qp.c +++ b/drivers/infiniband/hw/qib/qib_qp.c @@ -207,7 +207,7 @@ int qib_alloc_qpn(struct rvt_dev_info *rdi, struct rvt_qpn_table *qpt, return ret; } -/** +/* * qib_free_all_qps - check for QPs still in use */ unsigned qib_free_all_qps(struct rvt_dev_info *rdi) @@ -376,9 +376,9 @@ void qib_flush_qp_waiters(struct rvt_qp *qp) /** * qib_check_send_wqe - validate wr/wqe - * @qp - The qp - * @wqe - The built wqe - * @call_send - Determine if the send should be posted or scheduled + * @qp: The qp + * @wqe: The built wqe + * @call_send: Determine if the send should be posted or scheduled * * Returns 0 on success, -EINVAL on failure */ @@ -418,8 +418,8 @@ static const char * const qp_type_str[] = { /** * qib_qp_iter_print - print information to seq_file - * @s - the seq_file - * @iter - the iterator + * @s: the seq_file + * @iter: the iterator */ void qib_qp_iter_print(struct seq_file *s, struct rvt_qp_iter *iter) { diff --git a/drivers/infiniband/hw/qib/qib_rc.c b/drivers/infiniband/hw/qib/qib_rc.c index 3915e5b4a9bc..a1c20ffb4490 100644 --- a/drivers/infiniband/hw/qib/qib_rc.c +++ b/drivers/infiniband/hw/qib/qib_rc.c @@ -207,6 +207,7 @@ static int qib_make_rc_ack(struct qib_ibdev *dev, struct rvt_qp *qp, /** * qib_make_rc_req - construct a request packet (SEND, RDMA r/w, ATOMIC) * @qp: a pointer to the QP + * @flags: unused * * Assumes the s_lock is held. * @@ -992,7 +993,7 @@ static struct rvt_swqe *do_rc_completion(struct rvt_qp *qp, return wqe; } -/** +/* * do_rc_ack - process an incoming RC ACK * @qp: the QP the ACK came in on * @psn: the packet sequence number of the ACK @@ -1259,6 +1260,7 @@ static void rdma_seq_err(struct rvt_qp *qp, struct qib_ibport *ibp, u32 psn, * @psn: the packet sequence number for this packet * @hdrsize: the header length * @pmtu: the path MTU + * @rcd: the context pointer * * This is called from qib_rc_rcv() to process an incoming RC response * packet for the given QP. @@ -1480,6 +1482,7 @@ static void qib_rc_rcv_resp(struct qib_ibport *ibp, * @opcode: the opcode for this packet * @psn: the packet sequence number for this packet * @diff: the difference between the PSN and the expected PSN + * @rcd: the context pointer * * This is called from qib_rc_rcv() to process an unexpected * incoming RC packet for the given QP. diff --git a/drivers/infiniband/hw/qib/qib_twsi.c b/drivers/infiniband/hw/qib/qib_twsi.c index f5698664419b..97b8a2bf5c69 100644 --- a/drivers/infiniband/hw/qib/qib_twsi.c +++ b/drivers/infiniband/hw/qib/qib_twsi.c @@ -168,6 +168,7 @@ static void stop_cmd(struct qib_devdata *dd); /** * rd_byte - read a byte, sending STOP on last, else ACK * @dd: the qlogic_ib device + * @last: identifies the last read * * Returns byte shifted out of device */ diff --git a/drivers/infiniband/hw/qib/qib_tx.c b/drivers/infiniband/hw/qib/qib_tx.c index 29785eb84646..6a8148851f21 100644 --- a/drivers/infiniband/hw/qib/qib_tx.c +++ b/drivers/infiniband/hw/qib/qib_tx.c @@ -377,6 +377,7 @@ void qib_sendbuf_done(struct qib_devdata *dd, unsigned n) * @start: the starting send buffer number * @len: the number of send buffers * @avail: true if the buffers are available for kernel use, false otherwise + * @rcd: the context pointer */ void qib_chg_pioavailkernel(struct qib_devdata *dd, unsigned start, unsigned len, u32 avail, struct qib_ctxtdata *rcd) diff --git a/drivers/infiniband/hw/qib/qib_uc.c b/drivers/infiniband/hw/qib/qib_uc.c index 554af4273a13..8e2bda77d8b9 100644 --- a/drivers/infiniband/hw/qib/qib_uc.c +++ b/drivers/infiniband/hw/qib/qib_uc.c @@ -40,6 +40,7 @@ /** * qib_make_uc_req - construct a request packet (SEND, RDMA write) * @qp: a pointer to the QP + * @flags: unused * * Assumes the s_lock is held. * diff --git a/drivers/infiniband/hw/qib/qib_ud.c b/drivers/infiniband/hw/qib/qib_ud.c index 93ca21347959..81eda94bd279 100644 --- a/drivers/infiniband/hw/qib/qib_ud.c +++ b/drivers/infiniband/hw/qib/qib_ud.c @@ -222,6 +222,7 @@ static void qib_ud_loopback(struct rvt_qp *sqp, struct rvt_swqe *swqe) /** * qib_make_ud_req - construct a UD request packet * @qp: the QP + * @flags: flags to modify and pass back to caller * * Assumes the s_lock is held. * diff --git a/drivers/infiniband/hw/qib/qib_user_pages.c b/drivers/infiniband/hw/qib/qib_user_pages.c index 4c24e83f3175..5d6cf7427431 100644 --- a/drivers/infiniband/hw/qib/qib_user_pages.c +++ b/drivers/infiniband/hw/qib/qib_user_pages.c @@ -43,7 +43,7 @@ static void __qib_release_user_pages(struct page **p, size_t num_pages, unpin_user_pages_dirty_lock(p, num_pages, dirty); } -/** +/* * qib_map_page - a safety wrapper around pci_map_page() * * A dma_addr of all 0's is interpreted by the chip as "disabled". diff --git a/drivers/infiniband/hw/qib/qib_verbs.c b/drivers/infiniband/hw/qib/qib_verbs.c index f6c01bad5a74..8e0de265ad57 100644 --- a/drivers/infiniband/hw/qib/qib_verbs.c +++ b/drivers/infiniband/hw/qib/qib_verbs.c @@ -1067,7 +1067,7 @@ int qib_snapshot_counters(struct qib_pportdata *ppd, u64 *swords, /** * qib_get_counters - get various chip counters - * @dd: the qlogic_ib device + * @ppd: the qlogic_ib device * @cntrs: counters are placed here * * Return the counters needed by recv_pma_get_portcounters(). @@ -1675,7 +1675,7 @@ void qib_unregister_ib_device(struct qib_devdata *dd) /** * _qib_schedule_send - schedule progress - * @qp - the qp + * @qp: the qp * * This schedules progress w/o regard to the s_flags. * @@ -1694,7 +1694,7 @@ bool _qib_schedule_send(struct rvt_qp *qp) /** * qib_schedule_send - schedule progress - * @qp - the qp + * @qp: the qp * * This schedules qp progress. The s_lock * should be held. diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c index 00a330909bb3..4b6019e7de67 100644 --- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c @@ -474,7 +474,6 @@ static irqreturn_t pvrdma_intrx_handler(int irq, void *dev_id) int ring_slots = (dev->dsr->cq_ring_pages.num_pages - 1) * PAGE_SIZE / sizeof(struct pvrdma_cqne); unsigned int head; - unsigned long flags; dev_dbg(&dev->pdev->dev, "interrupt x (completion) handler\n"); @@ -483,11 +482,11 @@ static irqreturn_t pvrdma_intrx_handler(int irq, void *dev_id) struct pvrdma_cq *cq; cqne = get_cqne(dev, head); - spin_lock_irqsave(&dev->cq_tbl_lock, flags); + spin_lock(&dev->cq_tbl_lock); cq = dev->cq_tbl[cqne->info % dev->dsr->caps.max_cq]; if (cq) refcount_inc(&cq->refcnt); - spin_unlock_irqrestore(&dev->cq_tbl_lock, flags); + spin_unlock(&dev->cq_tbl_lock); if (cq && cq->ibcq.comp_handler) cq->ibcq.comp_handler(&cq->ibcq, cq->ibcq.cq_context); diff --git a/drivers/infiniband/sw/rdmavt/cq.c b/drivers/infiniband/sw/rdmavt/cq.c index 20cc0799ac4b..5138afca067f 100644 --- a/drivers/infiniband/sw/rdmavt/cq.c +++ b/drivers/infiniband/sw/rdmavt/cq.c @@ -371,7 +371,7 @@ int rvt_req_notify_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags notify_flags) return ret; } -/** +/* * rvt_resize_cq - change the size of the CQ * @ibcq: the completion queue * diff --git a/drivers/infiniband/sw/rdmavt/mad.c b/drivers/infiniband/sw/rdmavt/mad.c index 108c71e3ac23..fa5be13a4394 100644 --- a/drivers/infiniband/sw/rdmavt/mad.c +++ b/drivers/infiniband/sw/rdmavt/mad.c @@ -56,8 +56,11 @@ * @port_num: the port number this packet came in on, 1 based from ib core * @in_wc: the work completion entry for this packet * @in_grh: the global route header for this packet - * @in_mad: the incoming MAD - * @out_mad: any outgoing MAD reply + * @in: the incoming MAD + * @in_mad_size: size of the incoming MAD reply + * @out: any outgoing MAD reply + * @out_mad_size: size of the outgoing MAD reply + * @out_mad_pkey_index: unused * * Note that the verbs framework has already done the MAD sanity checks, * and hop count/pointer updating for IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE diff --git a/drivers/infiniband/sw/rdmavt/mcast.c b/drivers/infiniband/sw/rdmavt/mcast.c index 5233a63d99a6..951abac13dbb 100644 --- a/drivers/infiniband/sw/rdmavt/mcast.c +++ b/drivers/infiniband/sw/rdmavt/mcast.c @@ -180,7 +180,7 @@ struct rvt_mcast *rvt_mcast_find(struct rvt_ibport *ibp, union ib_gid *mgid, } EXPORT_SYMBOL(rvt_mcast_find); -/** +/* * rvt_mcast_add - insert mcast GID into table and attach QP struct * @mcast: the mcast GID table * @mqp: the QP to attach diff --git a/drivers/infiniband/sw/rdmavt/mr.c b/drivers/infiniband/sw/rdmavt/mr.c index 90fc234f489a..601d18dda1f5 100644 --- a/drivers/infiniband/sw/rdmavt/mr.c +++ b/drivers/infiniband/sw/rdmavt/mr.c @@ -369,6 +369,7 @@ struct ib_mr *rvt_get_dma_mr(struct ib_pd *pd, int acc) * @pd: protection domain for this memory region * @start: starting userspace address * @length: length of region to register + * @virt_addr: associated virtual address * @mr_access_flags: access flags for this memory region * @udata: unused by the driver * @@ -438,8 +439,8 @@ struct ib_mr *rvt_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, /** * rvt_dereg_clean_qp_cb - callback from iterator - * @qp - the qp - * @v - the mregion (as u64) + * @qp: the qp + * @v: the mregion (as u64) * * This routine fields the callback for all QPs and * for QPs in the same PD as the MR will call the @@ -457,7 +458,7 @@ static void rvt_dereg_clean_qp_cb(struct rvt_qp *qp, u64 v) /** * rvt_dereg_clean_qps - find QPs for reference cleanup - * @mr - the MR that is being deregistered + * @mr: the MR that is being deregistered * * This routine iterates RC QPs looking for references * to the lkey noted in mr. @@ -471,8 +472,8 @@ static void rvt_dereg_clean_qps(struct rvt_mregion *mr) /** * rvt_check_refs - check references - * @mr - the megion - * @t - the caller identification + * @mr: the megion + * @t: the caller identification * * This routine checks MRs holding a reference during * when being de-registered. @@ -506,8 +507,8 @@ static int rvt_check_refs(struct rvt_mregion *mr, const char *t) /** * rvt_mr_has_lkey - is MR - * @mr - the mregion - * @lkey - the lkey + * @mr: the mregion + * @lkey: the lkey */ bool rvt_mr_has_lkey(struct rvt_mregion *mr, u32 lkey) { @@ -516,8 +517,8 @@ bool rvt_mr_has_lkey(struct rvt_mregion *mr, u32 lkey) /** * rvt_ss_has_lkey - is mr in sge tests - * @ss - the sge state - * @lkey + * @ss: the sge state + * @lkey: the lkey * * This code tests for an MR in the indicated * sge state. @@ -540,7 +541,7 @@ bool rvt_ss_has_lkey(struct rvt_sge_state *ss, u32 lkey) /** * rvt_dereg_mr - unregister and free a memory region * @ibmr: the memory region to free - * + * @udata: unused by the driver * * Note that this is called to free MRs created by rvt_get_dma_mr() * or rvt_reg_user_mr(). diff --git a/drivers/infiniband/sw/rdmavt/qp.c b/drivers/infiniband/sw/rdmavt/qp.c index 22fa9bde5419..9d13db68283c 100644 --- a/drivers/infiniband/sw/rdmavt/qp.c +++ b/drivers/infiniband/sw/rdmavt/qp.c @@ -156,7 +156,7 @@ void rvt_wss_exit(struct rvt_dev_info *rdi) rdi->wss = NULL; } -/** +/* * rvt_wss_init - Init wss data structures * * Return: 0 on success @@ -323,6 +323,7 @@ static void get_map_page(struct rvt_qpn_table *qpt, /** * init_qpn_table - initialize the QP number table for a device + * @rdi: rvt dev struct * @qpt: the QPN table */ static int init_qpn_table(struct rvt_dev_info *rdi, struct rvt_qpn_table *qpt) @@ -524,6 +525,7 @@ static inline unsigned mk_qpn(struct rvt_qpn_table *qpt, * IB_QPT_SMI/IB_QPT_GSI * @rdi: rvt device info structure * @qpt: queue pair number table pointer + * @type: the QP type * @port_num: IB port number, 1 based, comes from core * @exclude_prefix: prefix of special queue pair number being allocated * @@ -655,8 +657,8 @@ static void rvt_clear_mr_refs(struct rvt_qp *qp, int clr_sends) /** * rvt_swqe_has_lkey - return true if lkey is used by swqe - * @wqe - the send wqe - * @lkey - the lkey + * @wqe: the send wqe + * @lkey: the lkey * * Test the swqe for using lkey */ @@ -675,8 +677,8 @@ static bool rvt_swqe_has_lkey(struct rvt_swqe *wqe, u32 lkey) /** * rvt_qp_sends_has_lkey - return true is qp sends use lkey - * @qp - the rvt_qp - * @lkey - the lkey + * @qp: the rvt_qp + * @lkey: the lkey */ static bool rvt_qp_sends_has_lkey(struct rvt_qp *qp, u32 lkey) { @@ -699,8 +701,8 @@ static bool rvt_qp_sends_has_lkey(struct rvt_qp *qp, u32 lkey) /** * rvt_qp_acks_has_lkey - return true if acks have lkey - * @qp - the qp - * @lkey - the lkey + * @qp: the qp + * @lkey: the lkey */ static bool rvt_qp_acks_has_lkey(struct rvt_qp *qp, u32 lkey) { @@ -716,10 +718,10 @@ static bool rvt_qp_acks_has_lkey(struct rvt_qp *qp, u32 lkey) return false; } -/* +/** * rvt_qp_mr_clean - clean up remote ops for lkey - * @qp - the qp - * @lkey - the lkey that is being de-registered + * @qp: the qp + * @lkey: the lkey that is being de-registered * * This routine checks if the lkey is being used by * the qp. @@ -853,6 +855,7 @@ int rvt_alloc_rq(struct rvt_rq *rq, u32 size, int node, /** * rvt_init_qp - initialize the QP state to the reset state + * @rdi: rvt dev struct * @qp: the QP to init or reinit * @type: the QP type * @@ -907,6 +910,7 @@ static void rvt_init_qp(struct rvt_dev_info *rdi, struct rvt_qp *qp, /** * _rvt_reset_qp - initialize the QP state to the reset state + * @rdi: rvt dev struct * @qp: the QP to reset * @type: the QP type * @@ -1726,6 +1730,7 @@ int rvt_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, /** * rvt_destroy_qp - destroy a queue pair * @ibqp: the queue pair to destroy + * @udata: unused by the driver * * Note that this can be called while the QP is actively sending or * receiving! @@ -1901,9 +1906,9 @@ int rvt_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr, /** * rvt_qp_valid_operation - validate post send wr request - * @qp - the qp - * @post-parms - the post send table for the driver - * @wr - the work request + * @qp: the qp + * @post_parms: the post send table for the driver + * @wr: the work request * * The routine validates the operation based on the * validation table an returns the length of the operation @@ -2013,6 +2018,7 @@ static inline int rvt_qp_is_avail( * rvt_post_one_wr - post one RC, UC, or UD send work request * @qp: the QP to post on * @wr: the work request to send + * @call_send: kick the send engine into gear */ static int rvt_post_one_wr(struct rvt_qp *qp, const struct ib_send_wr *wr, @@ -2612,7 +2618,7 @@ EXPORT_SYMBOL(rvt_stop_rc_timers); /** * rvt_stop_rnr_timer - stop an rnr timer - * @qp - the QP + * @qp: the QP * * stop an rnr timer and return if the timer * had been pending. diff --git a/drivers/infiniband/sw/rdmavt/srq.c b/drivers/infiniband/sw/rdmavt/srq.c index 64d98bf238ab..2a7c2f12d372 100644 --- a/drivers/infiniband/sw/rdmavt/srq.c +++ b/drivers/infiniband/sw/rdmavt/srq.c @@ -67,7 +67,7 @@ void rvt_driver_srq_init(struct rvt_dev_info *rdi) /** * rvt_create_srq - create a shared receive queue - * @ibpd: the protection domain of the SRQ to create + * @ibsrq: the protection domain of the SRQ to create * @srq_init_attr: the attributes of the SRQ * @udata: data from libibverbs when creating a user SRQ * @@ -311,7 +311,8 @@ int rvt_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr, return ret; } -/** rvt_query_srq - query srq data +/** + * rvt_query_srq - query srq data * @ibsrq: srq to query * @attr: return info in attr * @@ -330,7 +331,7 @@ int rvt_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr) /** * rvt_destroy_srq - destory an srq * @ibsrq: srq object to destroy - * + * @udata: user data for libibverbs.so */ int rvt_destroy_srq(struct ib_srq *ibsrq, struct ib_udata *udata) { diff --git a/drivers/infiniband/sw/rdmavt/vt.c b/drivers/infiniband/sw/rdmavt/vt.c index 49cec85a372a..8fd0128a9336 100644 --- a/drivers/infiniband/sw/rdmavt/vt.c +++ b/drivers/infiniband/sw/rdmavt/vt.c @@ -294,7 +294,7 @@ static int rvt_alloc_ucontext(struct ib_ucontext *uctx, struct ib_udata *udata) /** * rvt_dealloc_ucontext - Free a user context - * @context - Free this + * @context: Unused */ static void rvt_dealloc_ucontext(struct ib_ucontext *context) { diff --git a/drivers/infiniband/sw/rxe/rxe_comp.c b/drivers/infiniband/sw/rxe/rxe_comp.c index 0a1e6393250b..a8ac791a1bb9 100644 --- a/drivers/infiniband/sw/rxe/rxe_comp.c +++ b/drivers/infiniband/sw/rxe/rxe_comp.c @@ -515,6 +515,7 @@ static void rxe_drain_resp_pkts(struct rxe_qp *qp, bool notify) while ((skb = skb_dequeue(&qp->resp_pkts))) { rxe_drop_ref(qp); kfree_skb(skb); + ib_device_put(qp->ibqp.device); } while ((wqe = queue_head(qp->sq.queue))) { @@ -527,6 +528,17 @@ static void rxe_drain_resp_pkts(struct rxe_qp *qp, bool notify) } } +static void free_pkt(struct rxe_pkt_info *pkt) +{ + struct sk_buff *skb = PKT_TO_SKB(pkt); + struct rxe_qp *qp = pkt->qp; + struct ib_device *dev = qp->ibqp.device; + + kfree_skb(skb); + rxe_drop_ref(qp); + ib_device_put(dev); +} + int rxe_completer(void *arg) { struct rxe_qp *qp = (struct rxe_qp *)arg; @@ -624,11 +636,8 @@ int rxe_completer(void *arg) break; case COMPST_DONE: - if (pkt) { - rxe_drop_ref(pkt->qp); - kfree_skb(skb); - skb = NULL; - } + if (pkt) + free_pkt(pkt); goto done; case COMPST_EXIT: @@ -671,12 +680,8 @@ int rxe_completer(void *arg) */ if (qp->comp.started_retry && !qp->comp.timeout_retry) { - if (pkt) { - rxe_drop_ref(pkt->qp); - kfree_skb(skb); - skb = NULL; - } - + if (pkt) + free_pkt(pkt); goto done; } @@ -699,13 +704,8 @@ int rxe_completer(void *arg) qp->comp.started_retry = 1; rxe_run_task(&qp->req.task, 0); } - - if (pkt) { - rxe_drop_ref(pkt->qp); - kfree_skb(skb); - skb = NULL; - } - + if (pkt) + free_pkt(pkt); goto done; } else { @@ -726,9 +726,7 @@ int rxe_completer(void *arg) mod_timer(&qp->rnr_nak_timer, jiffies + rnrnak_jiffies(aeth_syn(pkt) & ~AETH_TYPE_MASK)); - rxe_drop_ref(pkt->qp); - kfree_skb(skb); - skb = NULL; + free_pkt(pkt); goto exit; } else { rxe_counter_inc(rxe, @@ -742,13 +740,8 @@ int rxe_completer(void *arg) WARN_ON_ONCE(wqe->status == IB_WC_SUCCESS); do_complete(qp, wqe); rxe_qp_error(qp); - - if (pkt) { - rxe_drop_ref(pkt->qp); - kfree_skb(skb); - skb = NULL; - } - + if (pkt) + free_pkt(pkt); goto exit; } } diff --git a/drivers/infiniband/sw/rxe/rxe_hdr.h b/drivers/infiniband/sw/rxe/rxe_hdr.h index 3b483b75dfe3..e432f9e37795 100644 --- a/drivers/infiniband/sw/rxe/rxe_hdr.h +++ b/drivers/infiniband/sw/rxe/rxe_hdr.h @@ -22,7 +22,6 @@ struct rxe_pkt_info { u16 paylen; /* length of bth - icrc */ u8 port_num; /* port pkt received on */ u8 opcode; /* bth opcode of packet */ - u8 offset; /* bth offset from pkt->hdr */ }; /* Macros should be used only for received skb */ @@ -280,134 +279,134 @@ static inline void __bth_set_psn(void *arg, u32 psn) static inline u8 bth_opcode(struct rxe_pkt_info *pkt) { - return __bth_opcode(pkt->hdr + pkt->offset); + return __bth_opcode(pkt->hdr); } static inline void bth_set_opcode(struct rxe_pkt_info *pkt, u8 opcode) { - __bth_set_opcode(pkt->hdr + pkt->offset, opcode); + __bth_set_opcode(pkt->hdr, opcode); } static inline u8 bth_se(struct rxe_pkt_info *pkt) { - return __bth_se(pkt->hdr + pkt->offset); + return __bth_se(pkt->hdr); } static inline void bth_set_se(struct rxe_pkt_info *pkt, int se) { - __bth_set_se(pkt->hdr + pkt->offset, se); + __bth_set_se(pkt->hdr, se); } static inline u8 bth_mig(struct rxe_pkt_info *pkt) { - return __bth_mig(pkt->hdr + pkt->offset); + return __bth_mig(pkt->hdr); } static inline void bth_set_mig(struct rxe_pkt_info *pkt, u8 mig) { - __bth_set_mig(pkt->hdr + pkt->offset, mig); + __bth_set_mig(pkt->hdr, mig); } static inline u8 bth_pad(struct rxe_pkt_info *pkt) { - return __bth_pad(pkt->hdr + pkt->offset); + return __bth_pad(pkt->hdr); } static inline void bth_set_pad(struct rxe_pkt_info *pkt, u8 pad) { - __bth_set_pad(pkt->hdr + pkt->offset, pad); + __bth_set_pad(pkt->hdr, pad); } static inline u8 bth_tver(struct rxe_pkt_info *pkt) { - return __bth_tver(pkt->hdr + pkt->offset); + return __bth_tver(pkt->hdr); } static inline void bth_set_tver(struct rxe_pkt_info *pkt, u8 tver) { - __bth_set_tver(pkt->hdr + pkt->offset, tver); + __bth_set_tver(pkt->hdr, tver); } static inline u16 bth_pkey(struct rxe_pkt_info *pkt) { - return __bth_pkey(pkt->hdr + pkt->offset); + return __bth_pkey(pkt->hdr); } static inline void bth_set_pkey(struct rxe_pkt_info *pkt, u16 pkey) { - __bth_set_pkey(pkt->hdr + pkt->offset, pkey); + __bth_set_pkey(pkt->hdr, pkey); } static inline u32 bth_qpn(struct rxe_pkt_info *pkt) { - return __bth_qpn(pkt->hdr + pkt->offset); + return __bth_qpn(pkt->hdr); } static inline void bth_set_qpn(struct rxe_pkt_info *pkt, u32 qpn) { - __bth_set_qpn(pkt->hdr + pkt->offset, qpn); + __bth_set_qpn(pkt->hdr, qpn); } static inline int bth_fecn(struct rxe_pkt_info *pkt) { - return __bth_fecn(pkt->hdr + pkt->offset); + return __bth_fecn(pkt->hdr); } static inline void bth_set_fecn(struct rxe_pkt_info *pkt, int fecn) { - __bth_set_fecn(pkt->hdr + pkt->offset, fecn); + __bth_set_fecn(pkt->hdr, fecn); } static inline int bth_becn(struct rxe_pkt_info *pkt) { - return __bth_becn(pkt->hdr + pkt->offset); + return __bth_becn(pkt->hdr); } static inline void bth_set_becn(struct rxe_pkt_info *pkt, int becn) { - __bth_set_becn(pkt->hdr + pkt->offset, becn); + __bth_set_becn(pkt->hdr, becn); } static inline u8 bth_resv6a(struct rxe_pkt_info *pkt) { - return __bth_resv6a(pkt->hdr + pkt->offset); + return __bth_resv6a(pkt->hdr); } static inline void bth_set_resv6a(struct rxe_pkt_info *pkt) { - __bth_set_resv6a(pkt->hdr + pkt->offset); + __bth_set_resv6a(pkt->hdr); } static inline int bth_ack(struct rxe_pkt_info *pkt) { - return __bth_ack(pkt->hdr + pkt->offset); + return __bth_ack(pkt->hdr); } static inline void bth_set_ack(struct rxe_pkt_info *pkt, int ack) { - __bth_set_ack(pkt->hdr + pkt->offset, ack); + __bth_set_ack(pkt->hdr, ack); } static inline void bth_set_resv7(struct rxe_pkt_info *pkt) { - __bth_set_resv7(pkt->hdr + pkt->offset); + __bth_set_resv7(pkt->hdr); } static inline u32 bth_psn(struct rxe_pkt_info *pkt) { - return __bth_psn(pkt->hdr + pkt->offset); + return __bth_psn(pkt->hdr); } static inline void bth_set_psn(struct rxe_pkt_info *pkt, u32 psn) { - __bth_set_psn(pkt->hdr + pkt->offset, psn); + __bth_set_psn(pkt->hdr, psn); } static inline void bth_init(struct rxe_pkt_info *pkt, u8 opcode, int se, int mig, int pad, u16 pkey, u32 qpn, int ack_req, u32 psn) { - struct rxe_bth *bth = (struct rxe_bth *)(pkt->hdr + pkt->offset); + struct rxe_bth *bth = (struct rxe_bth *)(pkt->hdr); bth->opcode = opcode; bth->flags = (pad << 4) & BTH_PAD_MASK; @@ -448,14 +447,14 @@ static inline void __rdeth_set_een(void *arg, u32 een) static inline u8 rdeth_een(struct rxe_pkt_info *pkt) { - return __rdeth_een(pkt->hdr + pkt->offset - + rxe_opcode[pkt->opcode].offset[RXE_RDETH]); + return __rdeth_een(pkt->hdr + + rxe_opcode[pkt->opcode].offset[RXE_RDETH]); } static inline void rdeth_set_een(struct rxe_pkt_info *pkt, u32 een) { - __rdeth_set_een(pkt->hdr + pkt->offset - + rxe_opcode[pkt->opcode].offset[RXE_RDETH], een); + __rdeth_set_een(pkt->hdr + + rxe_opcode[pkt->opcode].offset[RXE_RDETH], een); } /****************************************************************************** @@ -499,26 +498,26 @@ static inline void __deth_set_sqp(void *arg, u32 sqp) static inline u32 deth_qkey(struct rxe_pkt_info *pkt) { - return __deth_qkey(pkt->hdr + pkt->offset - + rxe_opcode[pkt->opcode].offset[RXE_DETH]); + return __deth_qkey(pkt->hdr + + rxe_opcode[pkt->opcode].offset[RXE_DETH]); } static inline void deth_set_qkey(struct rxe_pkt_info *pkt, u32 qkey) { - __deth_set_qkey(pkt->hdr + pkt->offset - + rxe_opcode[pkt->opcode].offset[RXE_DETH], qkey); + __deth_set_qkey(pkt->hdr + + rxe_opcode[pkt->opcode].offset[RXE_DETH], qkey); } static inline u32 deth_sqp(struct rxe_pkt_info *pkt) { - return __deth_sqp(pkt->hdr + pkt->offset - + rxe_opcode[pkt->opcode].offset[RXE_DETH]); + return __deth_sqp(pkt->hdr + + rxe_opcode[pkt->opcode].offset[RXE_DETH]); } static inline void deth_set_sqp(struct rxe_pkt_info *pkt, u32 sqp) { - __deth_set_sqp(pkt->hdr + pkt->offset - + rxe_opcode[pkt->opcode].offset[RXE_DETH], sqp); + __deth_set_sqp(pkt->hdr + + rxe_opcode[pkt->opcode].offset[RXE_DETH], sqp); } /****************************************************************************** @@ -574,38 +573,38 @@ static inline void __reth_set_len(void *arg, u32 len) static inline u64 reth_va(struct rxe_pkt_info *pkt) { - return __reth_va(pkt->hdr + pkt->offset - + rxe_opcode[pkt->opcode].offset[RXE_RETH]); + return __reth_va(pkt->hdr + + rxe_opcode[pkt->opcode].offset[RXE_RETH]); } static inline void reth_set_va(struct rxe_pkt_info *pkt, u64 va) { - __reth_set_va(pkt->hdr + pkt->offset - + rxe_opcode[pkt->opcode].offset[RXE_RETH], va); + __reth_set_va(pkt->hdr + + rxe_opcode[pkt->opcode].offset[RXE_RETH], va); } static inline u32 reth_rkey(struct rxe_pkt_info *pkt) { - return __reth_rkey(pkt->hdr + pkt->offset - + rxe_opcode[pkt->opcode].offset[RXE_RETH]); + return __reth_rkey(pkt->hdr + + rxe_opcode[pkt->opcode].offset[RXE_RETH]); } static inline void reth_set_rkey(struct rxe_pkt_info *pkt, u32 rkey) { - __reth_set_rkey(pkt->hdr + pkt->offset - + rxe_opcode[pkt->opcode].offset[RXE_RETH], rkey); + __reth_set_rkey(pkt->hdr + + rxe_opcode[pkt->opcode].offset[RXE_RETH], rkey); } static inline u32 reth_len(struct rxe_pkt_info *pkt) { - return __reth_len(pkt->hdr + pkt->offset - + rxe_opcode[pkt->opcode].offset[RXE_RETH]); + return __reth_len(pkt->hdr + + rxe_opcode[pkt->opcode].offset[RXE_RETH]); } static inline void reth_set_len(struct rxe_pkt_info *pkt, u32 len) { - __reth_set_len(pkt->hdr + pkt->offset - + rxe_opcode[pkt->opcode].offset[RXE_RETH], len); + __reth_set_len(pkt->hdr + + rxe_opcode[pkt->opcode].offset[RXE_RETH], len); } /****************************************************************************** @@ -676,50 +675,50 @@ static inline void __atmeth_set_comp(void *arg, u64 comp) static inline u64 atmeth_va(struct rxe_pkt_info *pkt) { - return __atmeth_va(pkt->hdr + pkt->offset - + rxe_opcode[pkt->opcode].offset[RXE_ATMETH]); + return __atmeth_va(pkt->hdr + + rxe_opcode[pkt->opcode].offset[RXE_ATMETH]); } static inline void atmeth_set_va(struct rxe_pkt_info *pkt, u64 va) { - __atmeth_set_va(pkt->hdr + pkt->offset - + rxe_opcode[pkt->opcode].offset[RXE_ATMETH], va); + __atmeth_set_va(pkt->hdr + + rxe_opcode[pkt->opcode].offset[RXE_ATMETH], va); } static inline u32 atmeth_rkey(struct rxe_pkt_info *pkt) { - return __atmeth_rkey(pkt->hdr + pkt->offset - + rxe_opcode[pkt->opcode].offset[RXE_ATMETH]); + return __atmeth_rkey(pkt->hdr + + rxe_opcode[pkt->opcode].offset[RXE_ATMETH]); } static inline void atmeth_set_rkey(struct rxe_pkt_info *pkt, u32 rkey) { - __atmeth_set_rkey(pkt->hdr + pkt->offset - + rxe_opcode[pkt->opcode].offset[RXE_ATMETH], rkey); + __atmeth_set_rkey(pkt->hdr + + rxe_opcode[pkt->opcode].offset[RXE_ATMETH], rkey); } static inline u64 atmeth_swap_add(struct rxe_pkt_info *pkt) { - return __atmeth_swap_add(pkt->hdr + pkt->offset - + rxe_opcode[pkt->opcode].offset[RXE_ATMETH]); + return __atmeth_swap_add(pkt->hdr + + rxe_opcode[pkt->opcode].offset[RXE_ATMETH]); } static inline void atmeth_set_swap_add(struct rxe_pkt_info *pkt, u64 swap_add) { - __atmeth_set_swap_add(pkt->hdr + pkt->offset - + rxe_opcode[pkt->opcode].offset[RXE_ATMETH], swap_add); + __atmeth_set_swap_add(pkt->hdr + + rxe_opcode[pkt->opcode].offset[RXE_ATMETH], swap_add); } static inline u64 atmeth_comp(struct rxe_pkt_info *pkt) { - return __atmeth_comp(pkt->hdr + pkt->offset - + rxe_opcode[pkt->opcode].offset[RXE_ATMETH]); + return __atmeth_comp(pkt->hdr + + rxe_opcode[pkt->opcode].offset[RXE_ATMETH]); } static inline void atmeth_set_comp(struct rxe_pkt_info *pkt, u64 comp) { - __atmeth_set_comp(pkt->hdr + pkt->offset - + rxe_opcode[pkt->opcode].offset[RXE_ATMETH], comp); + __atmeth_set_comp(pkt->hdr + + rxe_opcode[pkt->opcode].offset[RXE_ATMETH], comp); } /****************************************************************************** @@ -780,26 +779,26 @@ static inline void __aeth_set_msn(void *arg, u32 msn) static inline u8 aeth_syn(struct rxe_pkt_info *pkt) { - return __aeth_syn(pkt->hdr + pkt->offset - + rxe_opcode[pkt->opcode].offset[RXE_AETH]); + return __aeth_syn(pkt->hdr + + rxe_opcode[pkt->opcode].offset[RXE_AETH]); } static inline void aeth_set_syn(struct rxe_pkt_info *pkt, u8 syn) { - __aeth_set_syn(pkt->hdr + pkt->offset - + rxe_opcode[pkt->opcode].offset[RXE_AETH], syn); + __aeth_set_syn(pkt->hdr + + rxe_opcode[pkt->opcode].offset[RXE_AETH], syn); } static inline u32 aeth_msn(struct rxe_pkt_info *pkt) { - return __aeth_msn(pkt->hdr + pkt->offset - + rxe_opcode[pkt->opcode].offset[RXE_AETH]); + return __aeth_msn(pkt->hdr + + rxe_opcode[pkt->opcode].offset[RXE_AETH]); } static inline void aeth_set_msn(struct rxe_pkt_info *pkt, u32 msn) { - __aeth_set_msn(pkt->hdr + pkt->offset - + rxe_opcode[pkt->opcode].offset[RXE_AETH], msn); + __aeth_set_msn(pkt->hdr + + rxe_opcode[pkt->opcode].offset[RXE_AETH], msn); } /****************************************************************************** @@ -825,14 +824,14 @@ static inline void __atmack_set_orig(void *arg, u64 orig) static inline u64 atmack_orig(struct rxe_pkt_info *pkt) { - return __atmack_orig(pkt->hdr + pkt->offset - + rxe_opcode[pkt->opcode].offset[RXE_ATMACK]); + return __atmack_orig(pkt->hdr + + rxe_opcode[pkt->opcode].offset[RXE_ATMACK]); } static inline void atmack_set_orig(struct rxe_pkt_info *pkt, u64 orig) { - __atmack_set_orig(pkt->hdr + pkt->offset - + rxe_opcode[pkt->opcode].offset[RXE_ATMACK], orig); + __atmack_set_orig(pkt->hdr + + rxe_opcode[pkt->opcode].offset[RXE_ATMACK], orig); } /****************************************************************************** @@ -858,14 +857,14 @@ static inline void __immdt_set_imm(void *arg, __be32 imm) static inline __be32 immdt_imm(struct rxe_pkt_info *pkt) { - return __immdt_imm(pkt->hdr + pkt->offset - + rxe_opcode[pkt->opcode].offset[RXE_IMMDT]); + return __immdt_imm(pkt->hdr + + rxe_opcode[pkt->opcode].offset[RXE_IMMDT]); } static inline void immdt_set_imm(struct rxe_pkt_info *pkt, __be32 imm) { - __immdt_set_imm(pkt->hdr + pkt->offset - + rxe_opcode[pkt->opcode].offset[RXE_IMMDT], imm); + __immdt_set_imm(pkt->hdr + + rxe_opcode[pkt->opcode].offset[RXE_IMMDT], imm); } /****************************************************************************** @@ -891,14 +890,14 @@ static inline void __ieth_set_rkey(void *arg, u32 rkey) static inline u32 ieth_rkey(struct rxe_pkt_info *pkt) { - return __ieth_rkey(pkt->hdr + pkt->offset - + rxe_opcode[pkt->opcode].offset[RXE_IETH]); + return __ieth_rkey(pkt->hdr + + rxe_opcode[pkt->opcode].offset[RXE_IETH]); } static inline void ieth_set_rkey(struct rxe_pkt_info *pkt, u32 rkey) { - __ieth_set_rkey(pkt->hdr + pkt->offset - + rxe_opcode[pkt->opcode].offset[RXE_IETH], rkey); + __ieth_set_rkey(pkt->hdr + + rxe_opcode[pkt->opcode].offset[RXE_IETH], rkey); } enum rxe_hdr_length { @@ -915,13 +914,12 @@ enum rxe_hdr_length { static inline size_t header_size(struct rxe_pkt_info *pkt) { - return pkt->offset + rxe_opcode[pkt->opcode].length; + return rxe_opcode[pkt->opcode].length; } static inline void *payload_addr(struct rxe_pkt_info *pkt) { - return pkt->hdr + pkt->offset - + rxe_opcode[pkt->opcode].offset[RXE_PAYLOAD]; + return pkt->hdr + rxe_opcode[pkt->opcode].offset[RXE_PAYLOAD]; } static inline size_t payload_size(struct rxe_pkt_info *pkt) diff --git a/drivers/infiniband/sw/rxe/rxe_mcast.c b/drivers/infiniband/sw/rxe/rxe_mcast.c index c02315aed8d1..0ea9a5aa4ec0 100644 --- a/drivers/infiniband/sw/rxe/rxe_mcast.c +++ b/drivers/infiniband/sw/rxe/rxe_mcast.c @@ -7,45 +7,61 @@ #include "rxe.h" #include "rxe_loc.h" +/* caller should hold mc_grp_pool->pool_lock */ +static struct rxe_mc_grp *create_grp(struct rxe_dev *rxe, + struct rxe_pool *pool, + union ib_gid *mgid) +{ + int err; + struct rxe_mc_grp *grp; + + grp = rxe_alloc_locked(&rxe->mc_grp_pool); + if (!grp) + return ERR_PTR(-ENOMEM); + + INIT_LIST_HEAD(&grp->qp_list); + spin_lock_init(&grp->mcg_lock); + grp->rxe = rxe; + rxe_add_key_locked(grp, mgid); + + err = rxe_mcast_add(rxe, mgid); + if (unlikely(err)) { + rxe_drop_key_locked(grp); + rxe_drop_ref(grp); + return ERR_PTR(err); + } + + return grp; +} + int rxe_mcast_get_grp(struct rxe_dev *rxe, union ib_gid *mgid, struct rxe_mc_grp **grp_p) { int err; struct rxe_mc_grp *grp; + struct rxe_pool *pool = &rxe->mc_grp_pool; + unsigned long flags; - if (rxe->attr.max_mcast_qp_attach == 0) { - err = -EINVAL; - goto err1; - } + if (rxe->attr.max_mcast_qp_attach == 0) + return -EINVAL; - grp = rxe_pool_get_key(&rxe->mc_grp_pool, mgid); + write_lock_irqsave(&pool->pool_lock, flags); + + grp = rxe_pool_get_key_locked(pool, mgid); if (grp) goto done; - grp = rxe_alloc(&rxe->mc_grp_pool); - if (!grp) { - err = -ENOMEM; - goto err1; + grp = create_grp(rxe, pool, mgid); + if (IS_ERR(grp)) { + write_unlock_irqrestore(&pool->pool_lock, flags); + err = PTR_ERR(grp); + return err; } - INIT_LIST_HEAD(&grp->qp_list); - spin_lock_init(&grp->mcg_lock); - grp->rxe = rxe; - - rxe_add_key(grp, mgid); - - err = rxe_mcast_add(rxe, mgid); - if (err) - goto err2; - done: + write_unlock_irqrestore(&pool->pool_lock, flags); *grp_p = grp; return 0; - -err2: - rxe_drop_ref(grp); -err1: - return err; } int rxe_mcast_add_grp_elem(struct rxe_dev *rxe, struct rxe_qp *qp, diff --git a/drivers/infiniband/sw/rxe/rxe_net.c b/drivers/infiniband/sw/rxe/rxe_net.c index 943914c2a50c..0701bd1ffd1a 100644 --- a/drivers/infiniband/sw/rxe/rxe_net.c +++ b/drivers/infiniband/sw/rxe/rxe_net.c @@ -153,15 +153,16 @@ static struct dst_entry *rxe_find_route(struct net_device *ndev, static int rxe_udp_encap_recv(struct sock *sk, struct sk_buff *skb) { struct udphdr *udph; + struct rxe_dev *rxe; struct net_device *ndev = skb->dev; - struct net_device *rdev = ndev; - struct rxe_dev *rxe = rxe_get_dev_from_net(ndev); struct rxe_pkt_info *pkt = SKB_TO_PKT(skb); - if (!rxe && is_vlan_dev(rdev)) { - rdev = vlan_dev_real_dev(ndev); - rxe = rxe_get_dev_from_net(rdev); - } + /* takes a reference on rxe->ib_dev + * drop when skb is freed + */ + rxe = rxe_get_dev_from_net(ndev); + if (!rxe && is_vlan_dev(ndev)) + rxe = rxe_get_dev_from_net(vlan_dev_real_dev(ndev)); if (!rxe) goto drop; @@ -180,12 +181,6 @@ static int rxe_udp_encap_recv(struct sock *sk, struct sk_buff *skb) rxe_rcv(skb); - /* - * FIXME: this is in the wrong place, it needs to be done when pkt is - * destroyed - */ - ib_device_put(&rxe->ib_dev); - return 0; drop: kfree_skb(skb); @@ -414,6 +409,11 @@ int rxe_send(struct rxe_pkt_info *pkt, struct sk_buff *skb) void rxe_loopback(struct sk_buff *skb) { + if (skb->protocol == htons(ETH_P_IP)) + skb_pull(skb, sizeof(struct iphdr)); + else + skb_pull(skb, sizeof(struct ipv6hdr)); + rxe_rcv(skb); } diff --git a/drivers/infiniband/sw/rxe/rxe_pool.c b/drivers/infiniband/sw/rxe/rxe_pool.c index b374eb53e2fe..307d8986e7c9 100644 --- a/drivers/infiniband/sw/rxe/rxe_pool.c +++ b/drivers/infiniband/sw/rxe/rxe_pool.c @@ -15,21 +15,25 @@ struct rxe_type_info rxe_type_info[RXE_NUM_TYPES] = { [RXE_TYPE_UC] = { .name = "rxe-uc", .size = sizeof(struct rxe_ucontext), + .elem_offset = offsetof(struct rxe_ucontext, pelem), .flags = RXE_POOL_NO_ALLOC, }, [RXE_TYPE_PD] = { .name = "rxe-pd", .size = sizeof(struct rxe_pd), + .elem_offset = offsetof(struct rxe_pd, pelem), .flags = RXE_POOL_NO_ALLOC, }, [RXE_TYPE_AH] = { .name = "rxe-ah", .size = sizeof(struct rxe_ah), - .flags = RXE_POOL_ATOMIC | RXE_POOL_NO_ALLOC, + .elem_offset = offsetof(struct rxe_ah, pelem), + .flags = RXE_POOL_NO_ALLOC, }, [RXE_TYPE_SRQ] = { .name = "rxe-srq", .size = sizeof(struct rxe_srq), + .elem_offset = offsetof(struct rxe_srq, pelem), .flags = RXE_POOL_INDEX | RXE_POOL_NO_ALLOC, .min_index = RXE_MIN_SRQ_INDEX, .max_index = RXE_MAX_SRQ_INDEX, @@ -37,6 +41,7 @@ struct rxe_type_info rxe_type_info[RXE_NUM_TYPES] = { [RXE_TYPE_QP] = { .name = "rxe-qp", .size = sizeof(struct rxe_qp), + .elem_offset = offsetof(struct rxe_qp, pelem), .cleanup = rxe_qp_cleanup, .flags = RXE_POOL_INDEX, .min_index = RXE_MIN_QP_INDEX, @@ -45,12 +50,14 @@ struct rxe_type_info rxe_type_info[RXE_NUM_TYPES] = { [RXE_TYPE_CQ] = { .name = "rxe-cq", .size = sizeof(struct rxe_cq), + .elem_offset = offsetof(struct rxe_cq, pelem), .flags = RXE_POOL_NO_ALLOC, .cleanup = rxe_cq_cleanup, }, [RXE_TYPE_MR] = { .name = "rxe-mr", .size = sizeof(struct rxe_mem), + .elem_offset = offsetof(struct rxe_mem, pelem), .cleanup = rxe_mem_cleanup, .flags = RXE_POOL_INDEX, .max_index = RXE_MAX_MR_INDEX, @@ -59,6 +66,7 @@ struct rxe_type_info rxe_type_info[RXE_NUM_TYPES] = { [RXE_TYPE_MW] = { .name = "rxe-mw", .size = sizeof(struct rxe_mem), + .elem_offset = offsetof(struct rxe_mem, pelem), .flags = RXE_POOL_INDEX, .max_index = RXE_MAX_MW_INDEX, .min_index = RXE_MIN_MW_INDEX, @@ -66,6 +74,7 @@ struct rxe_type_info rxe_type_info[RXE_NUM_TYPES] = { [RXE_TYPE_MC_GRP] = { .name = "rxe-mc_grp", .size = sizeof(struct rxe_mc_grp), + .elem_offset = offsetof(struct rxe_mc_grp, pelem), .cleanup = rxe_mc_cleanup, .flags = RXE_POOL_KEY, .key_offset = offsetof(struct rxe_mc_grp, mgid), @@ -74,7 +83,7 @@ struct rxe_type_info rxe_type_info[RXE_NUM_TYPES] = { [RXE_TYPE_MC_ELEM] = { .name = "rxe-mc_elem", .size = sizeof(struct rxe_mc_elem), - .flags = RXE_POOL_ATOMIC, + .elem_offset = offsetof(struct rxe_mc_elem, pelem), }, }; @@ -94,18 +103,18 @@ static int rxe_pool_init_index(struct rxe_pool *pool, u32 max, u32 min) goto out; } - pool->max_index = max; - pool->min_index = min; + pool->index.max_index = max; + pool->index.min_index = min; size = BITS_TO_LONGS(max - min + 1) * sizeof(long); - pool->table = kmalloc(size, GFP_KERNEL); - if (!pool->table) { + pool->index.table = kmalloc(size, GFP_KERNEL); + if (!pool->index.table) { err = -ENOMEM; goto out; } - pool->table_size = size; - bitmap_zero(pool->table, max - min + 1); + pool->index.table_size = size; + bitmap_zero(pool->index.table, max - min + 1); out: return err; @@ -127,13 +136,12 @@ int rxe_pool_init( pool->max_elem = max_elem; pool->elem_size = ALIGN(size, RXE_POOL_ALIGN); pool->flags = rxe_type_info[type].flags; - pool->tree = RB_ROOT; + pool->index.tree = RB_ROOT; + pool->key.tree = RB_ROOT; pool->cleanup = rxe_type_info[type].cleanup; atomic_set(&pool->num_elem, 0); - kref_init(&pool->ref_cnt); - rwlock_init(&pool->pool_lock); if (rxe_type_info[type].flags & RXE_POOL_INDEX) { @@ -145,67 +153,47 @@ int rxe_pool_init( } if (rxe_type_info[type].flags & RXE_POOL_KEY) { - pool->key_offset = rxe_type_info[type].key_offset; - pool->key_size = rxe_type_info[type].key_size; + pool->key.key_offset = rxe_type_info[type].key_offset; + pool->key.key_size = rxe_type_info[type].key_size; } - pool->state = RXE_POOL_STATE_VALID; - out: return err; } -static void rxe_pool_release(struct kref *kref) -{ - struct rxe_pool *pool = container_of(kref, struct rxe_pool, ref_cnt); - - pool->state = RXE_POOL_STATE_INVALID; - kfree(pool->table); -} - -static void rxe_pool_put(struct rxe_pool *pool) -{ - kref_put(&pool->ref_cnt, rxe_pool_release); -} - void rxe_pool_cleanup(struct rxe_pool *pool) { - unsigned long flags; - - write_lock_irqsave(&pool->pool_lock, flags); - pool->state = RXE_POOL_STATE_INVALID; if (atomic_read(&pool->num_elem) > 0) pr_warn("%s pool destroyed with unfree'd elem\n", pool_name(pool)); - write_unlock_irqrestore(&pool->pool_lock, flags); - rxe_pool_put(pool); + kfree(pool->index.table); } static u32 alloc_index(struct rxe_pool *pool) { u32 index; - u32 range = pool->max_index - pool->min_index + 1; + u32 range = pool->index.max_index - pool->index.min_index + 1; - index = find_next_zero_bit(pool->table, range, pool->last); + index = find_next_zero_bit(pool->index.table, range, pool->index.last); if (index >= range) - index = find_first_zero_bit(pool->table, range); + index = find_first_zero_bit(pool->index.table, range); WARN_ON_ONCE(index >= range); - set_bit(index, pool->table); - pool->last = index; - return index + pool->min_index; + set_bit(index, pool->index.table); + pool->index.last = index; + return index + pool->index.min_index; } static void insert_index(struct rxe_pool *pool, struct rxe_pool_entry *new) { - struct rb_node **link = &pool->tree.rb_node; + struct rb_node **link = &pool->index.tree.rb_node; struct rb_node *parent = NULL; struct rxe_pool_entry *elem; while (*link) { parent = *link; - elem = rb_entry(parent, struct rxe_pool_entry, node); + elem = rb_entry(parent, struct rxe_pool_entry, index_node); if (elem->index == new->index) { pr_warn("element already exists!\n"); @@ -218,25 +206,25 @@ static void insert_index(struct rxe_pool *pool, struct rxe_pool_entry *new) link = &(*link)->rb_right; } - rb_link_node(&new->node, parent, link); - rb_insert_color(&new->node, &pool->tree); + rb_link_node(&new->index_node, parent, link); + rb_insert_color(&new->index_node, &pool->index.tree); out: return; } static void insert_key(struct rxe_pool *pool, struct rxe_pool_entry *new) { - struct rb_node **link = &pool->tree.rb_node; + struct rb_node **link = &pool->key.tree.rb_node; struct rb_node *parent = NULL; struct rxe_pool_entry *elem; int cmp; while (*link) { parent = *link; - elem = rb_entry(parent, struct rxe_pool_entry, node); + elem = rb_entry(parent, struct rxe_pool_entry, key_node); - cmp = memcmp((u8 *)elem + pool->key_offset, - (u8 *)new + pool->key_offset, pool->key_size); + cmp = memcmp((u8 *)elem + pool->key.key_offset, + (u8 *)new + pool->key.key_offset, pool->key.key_size); if (cmp == 0) { pr_warn("key already exists!\n"); @@ -249,116 +237,135 @@ static void insert_key(struct rxe_pool *pool, struct rxe_pool_entry *new) link = &(*link)->rb_right; } - rb_link_node(&new->node, parent, link); - rb_insert_color(&new->node, &pool->tree); + rb_link_node(&new->key_node, parent, link); + rb_insert_color(&new->key_node, &pool->key.tree); out: return; } -void rxe_add_key(void *arg, void *key) +void __rxe_add_key_locked(struct rxe_pool_entry *elem, void *key) { - struct rxe_pool_entry *elem = arg; struct rxe_pool *pool = elem->pool; - unsigned long flags; - write_lock_irqsave(&pool->pool_lock, flags); - memcpy((u8 *)elem + pool->key_offset, key, pool->key_size); + memcpy((u8 *)elem + pool->key.key_offset, key, pool->key.key_size); insert_key(pool, elem); - write_unlock_irqrestore(&pool->pool_lock, flags); } -void rxe_drop_key(void *arg) +void __rxe_add_key(struct rxe_pool_entry *elem, void *key) { - struct rxe_pool_entry *elem = arg; struct rxe_pool *pool = elem->pool; unsigned long flags; write_lock_irqsave(&pool->pool_lock, flags); - rb_erase(&elem->node, &pool->tree); + __rxe_add_key_locked(elem, key); write_unlock_irqrestore(&pool->pool_lock, flags); } -void rxe_add_index(void *arg) +void __rxe_drop_key_locked(struct rxe_pool_entry *elem) +{ + struct rxe_pool *pool = elem->pool; + + rb_erase(&elem->key_node, &pool->key.tree); +} + +void __rxe_drop_key(struct rxe_pool_entry *elem) { - struct rxe_pool_entry *elem = arg; struct rxe_pool *pool = elem->pool; unsigned long flags; write_lock_irqsave(&pool->pool_lock, flags); + __rxe_drop_key_locked(elem); + write_unlock_irqrestore(&pool->pool_lock, flags); +} + +void __rxe_add_index_locked(struct rxe_pool_entry *elem) +{ + struct rxe_pool *pool = elem->pool; + elem->index = alloc_index(pool); insert_index(pool, elem); - write_unlock_irqrestore(&pool->pool_lock, flags); } -void rxe_drop_index(void *arg) +void __rxe_add_index(struct rxe_pool_entry *elem) { - struct rxe_pool_entry *elem = arg; struct rxe_pool *pool = elem->pool; unsigned long flags; write_lock_irqsave(&pool->pool_lock, flags); - clear_bit(elem->index - pool->min_index, pool->table); - rb_erase(&elem->node, &pool->tree); + __rxe_add_index_locked(elem); write_unlock_irqrestore(&pool->pool_lock, flags); } -void *rxe_alloc(struct rxe_pool *pool) +void __rxe_drop_index_locked(struct rxe_pool_entry *elem) { - struct rxe_pool_entry *elem; + struct rxe_pool *pool = elem->pool; + + clear_bit(elem->index - pool->index.min_index, pool->index.table); + rb_erase(&elem->index_node, &pool->index.tree); +} + +void __rxe_drop_index(struct rxe_pool_entry *elem) +{ + struct rxe_pool *pool = elem->pool; unsigned long flags; - might_sleep_if(!(pool->flags & RXE_POOL_ATOMIC)); + write_lock_irqsave(&pool->pool_lock, flags); + __rxe_drop_index_locked(elem); + write_unlock_irqrestore(&pool->pool_lock, flags); +} - read_lock_irqsave(&pool->pool_lock, flags); - if (pool->state != RXE_POOL_STATE_VALID) { - read_unlock_irqrestore(&pool->pool_lock, flags); - return NULL; - } - kref_get(&pool->ref_cnt); - read_unlock_irqrestore(&pool->pool_lock, flags); - - if (!ib_device_try_get(&pool->rxe->ib_dev)) - goto out_put_pool; +void *rxe_alloc_locked(struct rxe_pool *pool) +{ + struct rxe_type_info *info = &rxe_type_info[pool->type]; + struct rxe_pool_entry *elem; + u8 *obj; if (atomic_inc_return(&pool->num_elem) > pool->max_elem) goto out_cnt; - elem = kzalloc(rxe_type_info[pool->type].size, - (pool->flags & RXE_POOL_ATOMIC) ? - GFP_ATOMIC : GFP_KERNEL); - if (!elem) + obj = kzalloc(info->size, GFP_ATOMIC); + if (!obj) goto out_cnt; + elem = (struct rxe_pool_entry *)(obj + info->elem_offset); + elem->pool = pool; kref_init(&elem->ref_cnt); - return elem; + return obj; out_cnt: atomic_dec(&pool->num_elem); - ib_device_put(&pool->rxe->ib_dev); -out_put_pool: - rxe_pool_put(pool); return NULL; } -int rxe_add_to_pool(struct rxe_pool *pool, struct rxe_pool_entry *elem) +void *rxe_alloc(struct rxe_pool *pool) { - unsigned long flags; + struct rxe_type_info *info = &rxe_type_info[pool->type]; + struct rxe_pool_entry *elem; + u8 *obj; - might_sleep_if(!(pool->flags & RXE_POOL_ATOMIC)); + if (atomic_inc_return(&pool->num_elem) > pool->max_elem) + goto out_cnt; - read_lock_irqsave(&pool->pool_lock, flags); - if (pool->state != RXE_POOL_STATE_VALID) { - read_unlock_irqrestore(&pool->pool_lock, flags); - return -EINVAL; - } - kref_get(&pool->ref_cnt); - read_unlock_irqrestore(&pool->pool_lock, flags); + obj = kzalloc(info->size, GFP_KERNEL); + if (!obj) + goto out_cnt; - if (!ib_device_try_get(&pool->rxe->ib_dev)) - goto out_put_pool; + elem = (struct rxe_pool_entry *)(obj + info->elem_offset); + elem->pool = pool; + kref_init(&elem->ref_cnt); + + return obj; + +out_cnt: + atomic_dec(&pool->num_elem); + return NULL; +} + +int __rxe_add_to_pool(struct rxe_pool *pool, struct rxe_pool_entry *elem) +{ if (atomic_inc_return(&pool->num_elem) > pool->max_elem) goto out_cnt; @@ -369,9 +376,6 @@ int rxe_add_to_pool(struct rxe_pool *pool, struct rxe_pool_entry *elem) out_cnt: atomic_dec(&pool->num_elem); - ib_device_put(&pool->rxe->ib_dev); -out_put_pool: - rxe_pool_put(pool); return -EINVAL; } @@ -380,67 +384,77 @@ void rxe_elem_release(struct kref *kref) struct rxe_pool_entry *elem = container_of(kref, struct rxe_pool_entry, ref_cnt); struct rxe_pool *pool = elem->pool; + struct rxe_type_info *info = &rxe_type_info[pool->type]; + u8 *obj; if (pool->cleanup) pool->cleanup(elem); - if (!(pool->flags & RXE_POOL_NO_ALLOC)) - kfree(elem); + if (!(pool->flags & RXE_POOL_NO_ALLOC)) { + obj = (u8 *)elem - info->elem_offset; + kfree(obj); + } + atomic_dec(&pool->num_elem); - ib_device_put(&pool->rxe->ib_dev); - rxe_pool_put(pool); } -void *rxe_pool_get_index(struct rxe_pool *pool, u32 index) +void *rxe_pool_get_index_locked(struct rxe_pool *pool, u32 index) { - struct rb_node *node = NULL; - struct rxe_pool_entry *elem = NULL; - unsigned long flags; + struct rxe_type_info *info = &rxe_type_info[pool->type]; + struct rb_node *node; + struct rxe_pool_entry *elem; + u8 *obj; - read_lock_irqsave(&pool->pool_lock, flags); - - if (pool->state != RXE_POOL_STATE_VALID) - goto out; - - node = pool->tree.rb_node; + node = pool->index.tree.rb_node; while (node) { - elem = rb_entry(node, struct rxe_pool_entry, node); + elem = rb_entry(node, struct rxe_pool_entry, index_node); if (elem->index > index) node = node->rb_left; else if (elem->index < index) node = node->rb_right; - else { - kref_get(&elem->ref_cnt); + else break; - } } -out: - read_unlock_irqrestore(&pool->pool_lock, flags); - return node ? elem : NULL; + if (node) { + kref_get(&elem->ref_cnt); + obj = (u8 *)elem - info->elem_offset; + } else { + obj = NULL; + } + + return obj; } -void *rxe_pool_get_key(struct rxe_pool *pool, void *key) +void *rxe_pool_get_index(struct rxe_pool *pool, u32 index) { - struct rb_node *node = NULL; - struct rxe_pool_entry *elem = NULL; - int cmp; + u8 *obj; unsigned long flags; read_lock_irqsave(&pool->pool_lock, flags); + obj = rxe_pool_get_index_locked(pool, index); + read_unlock_irqrestore(&pool->pool_lock, flags); - if (pool->state != RXE_POOL_STATE_VALID) - goto out; + return obj; +} - node = pool->tree.rb_node; +void *rxe_pool_get_key_locked(struct rxe_pool *pool, void *key) +{ + struct rxe_type_info *info = &rxe_type_info[pool->type]; + struct rb_node *node; + struct rxe_pool_entry *elem; + u8 *obj; + int cmp; + + node = pool->key.tree.rb_node; while (node) { - elem = rb_entry(node, struct rxe_pool_entry, node); + elem = rb_entry(node, struct rxe_pool_entry, key_node); - cmp = memcmp((u8 *)elem + pool->key_offset, - key, pool->key_size); + cmp = memcmp((u8 *)elem + pool->key.key_offset, + key, pool->key.key_size); if (cmp > 0) node = node->rb_left; @@ -450,10 +464,24 @@ void *rxe_pool_get_key(struct rxe_pool *pool, void *key) break; } - if (node) + if (node) { kref_get(&elem->ref_cnt); + obj = (u8 *)elem - info->elem_offset; + } else { + obj = NULL; + } -out: - read_unlock_irqrestore(&pool->pool_lock, flags); - return node ? elem : NULL; + return obj; +} + +void *rxe_pool_get_key(struct rxe_pool *pool, void *key) +{ + u8 *obj; + unsigned long flags; + + read_lock_irqsave(&pool->pool_lock, flags); + obj = rxe_pool_get_key_locked(pool, key); + read_unlock_irqrestore(&pool->pool_lock, flags); + + return obj; } diff --git a/drivers/infiniband/sw/rxe/rxe_pool.h b/drivers/infiniband/sw/rxe/rxe_pool.h index 432745ffc8d4..61210b300a78 100644 --- a/drivers/infiniband/sw/rxe/rxe_pool.h +++ b/drivers/infiniband/sw/rxe/rxe_pool.h @@ -11,7 +11,6 @@ #define RXE_POOL_CACHE_FLAGS (0) enum rxe_pool_flags { - RXE_POOL_ATOMIC = BIT(0), RXE_POOL_INDEX = BIT(1), RXE_POOL_KEY = BIT(2), RXE_POOL_NO_ALLOC = BIT(4), @@ -36,6 +35,7 @@ struct rxe_pool_entry; struct rxe_type_info { const char *name; size_t size; + size_t elem_offset; void (*cleanup)(struct rxe_pool_entry *obj); enum rxe_pool_flags flags; u32 max_index; @@ -46,18 +46,16 @@ struct rxe_type_info { extern struct rxe_type_info rxe_type_info[]; -enum rxe_pool_state { - RXE_POOL_STATE_INVALID, - RXE_POOL_STATE_VALID, -}; - struct rxe_pool_entry { struct rxe_pool *pool; struct kref ref_cnt; struct list_head list; - /* only used if indexed or keyed */ - struct rb_node node; + /* only used if keyed */ + struct rb_node key_node; + + /* only used if indexed */ + struct rb_node index_node; u32 index; }; @@ -65,24 +63,29 @@ struct rxe_pool { struct rxe_dev *rxe; rwlock_t pool_lock; /* protects pool add/del/search */ size_t elem_size; - struct kref ref_cnt; void (*cleanup)(struct rxe_pool_entry *obj); - enum rxe_pool_state state; enum rxe_pool_flags flags; enum rxe_elem_type type; unsigned int max_elem; atomic_t num_elem; - /* only used if indexed or keyed */ - struct rb_root tree; - unsigned long *table; - size_t table_size; - u32 max_index; - u32 min_index; - u32 last; - size_t key_offset; - size_t key_size; + /* only used if indexed */ + struct { + struct rb_root tree; + unsigned long *table; + size_t table_size; + u32 last; + u32 max_index; + u32 min_index; + } index; + + /* only used if keyed */ + struct { + struct rb_root tree; + size_t key_offset; + size_t key_size; + } key; }; /* initialize a pool of objects with given limit on @@ -95,32 +98,70 @@ int rxe_pool_init(struct rxe_dev *rxe, struct rxe_pool *pool, /* free resources from object pool */ void rxe_pool_cleanup(struct rxe_pool *pool); -/* allocate an object from pool */ +/* allocate an object from pool holding and not holding the pool lock */ +void *rxe_alloc_locked(struct rxe_pool *pool); + void *rxe_alloc(struct rxe_pool *pool); /* connect already allocated object to pool */ -int rxe_add_to_pool(struct rxe_pool *pool, struct rxe_pool_entry *elem); +int __rxe_add_to_pool(struct rxe_pool *pool, struct rxe_pool_entry *elem); + +#define rxe_add_to_pool(pool, obj) __rxe_add_to_pool(pool, &(obj)->pelem) /* assign an index to an indexed object and insert object into - * pool's rb tree + * pool's rb tree holding and not holding the pool_lock */ -void rxe_add_index(void *elem); +void __rxe_add_index_locked(struct rxe_pool_entry *elem); -/* drop an index and remove object from rb tree */ -void rxe_drop_index(void *elem); +#define rxe_add_index_locked(obj) __rxe_add_index_locked(&(obj)->pelem) + +void __rxe_add_index(struct rxe_pool_entry *elem); + +#define rxe_add_index(obj) __rxe_add_index(&(obj)->pelem) + +/* drop an index and remove object from rb tree + * holding and not holding the pool_lock + */ +void __rxe_drop_index_locked(struct rxe_pool_entry *elem); + +#define rxe_drop_index_locked(obj) __rxe_drop_index_locked(&(obj)->pelem) + +void __rxe_drop_index(struct rxe_pool_entry *elem); + +#define rxe_drop_index(obj) __rxe_drop_index(&(obj)->pelem) /* assign a key to a keyed object and insert object into - * pool's rb tree + * pool's rb tree holding and not holding pool_lock */ -void rxe_add_key(void *elem, void *key); +void __rxe_add_key_locked(struct rxe_pool_entry *elem, void *key); -/* remove elem from rb tree */ -void rxe_drop_key(void *elem); +#define rxe_add_key_locked(obj, key) __rxe_add_key_locked(&(obj)->pelem, key) + +void __rxe_add_key(struct rxe_pool_entry *elem, void *key); + +#define rxe_add_key(obj, key) __rxe_add_key(&(obj)->pelem, key) + +/* remove elem from rb tree holding and not holding the pool_lock */ +void __rxe_drop_key_locked(struct rxe_pool_entry *elem); + +#define rxe_drop_key_locked(obj) __rxe_drop_key_locked(&(obj)->pelem) + +void __rxe_drop_key(struct rxe_pool_entry *elem); + +#define rxe_drop_key(obj) __rxe_drop_key(&(obj)->pelem) + +/* lookup an indexed object from index holding and not holding the pool_lock. + * takes a reference on object + */ +void *rxe_pool_get_index_locked(struct rxe_pool *pool, u32 index); -/* lookup an indexed object from index. takes a reference on object */ void *rxe_pool_get_index(struct rxe_pool *pool, u32 index); -/* lookup keyed object from key. takes a reference on the object */ +/* lookup keyed object from key holding and not holding the pool_lock. + * takes a reference on the objecti + */ +void *rxe_pool_get_key_locked(struct rxe_pool *pool, void *key); + void *rxe_pool_get_key(struct rxe_pool *pool, void *key); /* cleanup an object when all references are dropped */ diff --git a/drivers/infiniband/sw/rxe/rxe_qp.c b/drivers/infiniband/sw/rxe/rxe_qp.c index 656a5b4be847..34ae957a315c 100644 --- a/drivers/infiniband/sw/rxe/rxe_qp.c +++ b/drivers/infiniband/sw/rxe/rxe_qp.c @@ -62,6 +62,17 @@ int rxe_qp_chk_init(struct rxe_dev *rxe, struct ib_qp_init_attr *init) struct rxe_port *port; int port_num = init->port_num; + switch (init->qp_type) { + case IB_QPT_SMI: + case IB_QPT_GSI: + case IB_QPT_RC: + case IB_QPT_UC: + case IB_QPT_UD: + break; + default: + return -EOPNOTSUPP; + } + if (!init->recv_cq || !init->send_cq) { pr_warn("missing cq\n"); goto err1; diff --git a/drivers/infiniband/sw/rxe/rxe_recv.c b/drivers/infiniband/sw/rxe/rxe_recv.c index c9984a28eecc..45d2f711bce2 100644 --- a/drivers/infiniband/sw/rxe/rxe_recv.c +++ b/drivers/infiniband/sw/rxe/rxe_recv.c @@ -9,21 +9,26 @@ #include "rxe.h" #include "rxe_loc.h" +/* check that QP matches packet opcode type and is in a valid state */ static int check_type_state(struct rxe_dev *rxe, struct rxe_pkt_info *pkt, struct rxe_qp *qp) { + unsigned int pkt_type; + if (unlikely(!qp->valid)) goto err1; + pkt_type = pkt->opcode & 0xe0; + switch (qp_type(qp)) { case IB_QPT_RC: - if (unlikely((pkt->opcode & IB_OPCODE_RC) != 0)) { + if (unlikely(pkt_type != IB_OPCODE_RC)) { pr_warn_ratelimited("bad qp type\n"); goto err1; } break; case IB_QPT_UC: - if (unlikely(!(pkt->opcode & IB_OPCODE_UC))) { + if (unlikely(pkt_type != IB_OPCODE_UC)) { pr_warn_ratelimited("bad qp type\n"); goto err1; } @@ -31,7 +36,7 @@ static int check_type_state(struct rxe_dev *rxe, struct rxe_pkt_info *pkt, case IB_QPT_UD: case IB_QPT_SMI: case IB_QPT_GSI: - if (unlikely(!(pkt->opcode & IB_OPCODE_UD))) { + if (unlikely(pkt_type != IB_OPCODE_UD)) { pr_warn_ratelimited("bad qp type\n"); goto err1; } @@ -85,8 +90,7 @@ static int check_keys(struct rxe_dev *rxe, struct rxe_pkt_info *pkt, goto err1; } - if ((qp_type(qp) == IB_QPT_UD || qp_type(qp) == IB_QPT_GSI) && - pkt->mask) { + if (qp_type(qp) == IB_QPT_UD || qp_type(qp) == IB_QPT_GSI) { u32 qkey = (qpn == 1) ? GSI_QKEY : qp->attr.qkey; if (unlikely(deth_qkey(pkt) != qkey)) { @@ -252,7 +256,6 @@ static void rxe_rcv_mcast_pkt(struct rxe_dev *rxe, struct sk_buff *skb) list_for_each_entry(mce, &mcg->qp_list, qp_list) { qp = mce->qp; - pkt = SKB_TO_PKT(skb); /* validate qp for incoming packet */ err = check_type_state(rxe, pkt, qp); @@ -264,12 +267,22 @@ static void rxe_rcv_mcast_pkt(struct rxe_dev *rxe, struct sk_buff *skb) continue; /* for all but the last qp create a new clone of the - * skb and pass to the qp. + * skb and pass to the qp. If an error occurs in the + * checks for the last qp in the list we need to + * free the skb since it hasn't been passed on to + * rxe_rcv_pkt() which would free it later. */ - if (mce->qp_list.next != &mcg->qp_list) + if (mce->qp_list.next != &mcg->qp_list) { per_qp_skb = skb_clone(skb, GFP_ATOMIC); - else + if (WARN_ON(!ib_device_try_get(&rxe->ib_dev))) { + kfree_skb(per_qp_skb); + continue; + } + } else { per_qp_skb = skb; + /* show we have consumed the skb */ + skb = NULL; + } if (unlikely(!per_qp_skb)) continue; @@ -284,10 +297,10 @@ static void rxe_rcv_mcast_pkt(struct rxe_dev *rxe, struct sk_buff *skb) rxe_drop_ref(mcg); /* drop ref from rxe_pool_get_key. */ - return; - err1: + /* free skb if not consumed */ kfree_skb(skb); + ib_device_put(&rxe->ib_dev); } /** @@ -340,9 +353,7 @@ void rxe_rcv(struct sk_buff *skb) __be32 *icrcp; u32 calc_icrc, pack_icrc; - pkt->offset = 0; - - if (unlikely(skb->len < pkt->offset + RXE_BTH_BYTES)) + if (unlikely(skb->len < RXE_BTH_BYTES)) goto drop; if (rxe_chk_dgid(rxe, skb) < 0) { @@ -397,4 +408,5 @@ void rxe_rcv(struct sk_buff *skb) rxe_drop_ref(pkt->qp); kfree_skb(skb); + ib_device_put(&rxe->ib_dev); } diff --git a/drivers/infiniband/sw/rxe/rxe_req.c b/drivers/infiniband/sw/rxe/rxe_req.c index d4917646641a..889290793d75 100644 --- a/drivers/infiniband/sw/rxe/rxe_req.c +++ b/drivers/infiniband/sw/rxe/rxe_req.c @@ -375,7 +375,6 @@ static struct sk_buff *init_req_packet(struct rxe_qp *qp, pkt->psn = qp->req.psn; pkt->mask = rxe_opcode[opcode].mask; pkt->paylen = paylen; - pkt->offset = 0; pkt->wqe = wqe; /* init skb */ diff --git a/drivers/infiniband/sw/rxe/rxe_resp.c b/drivers/infiniband/sw/rxe/rxe_resp.c index c7e3b6a4af38..142f3d8014d8 100644 --- a/drivers/infiniband/sw/rxe/rxe_resp.c +++ b/drivers/infiniband/sw/rxe/rxe_resp.c @@ -99,6 +99,7 @@ static inline enum resp_states get_req(struct rxe_qp *qp, while ((skb = skb_dequeue(&qp->req_pkts))) { rxe_drop_ref(qp); kfree_skb(skb); + ib_device_put(qp->ibqp.device); } /* go drain recv wr queue */ @@ -585,11 +586,10 @@ static struct sk_buff *prepare_ack_packet(struct rxe_qp *qp, ack->qp = qp; ack->opcode = opcode; ack->mask = rxe_opcode[opcode].mask; - ack->offset = pkt->offset; ack->paylen = paylen; /* fill in bth using the request packet headers */ - memcpy(ack->hdr, pkt->hdr, pkt->offset + RXE_BTH_BYTES); + memcpy(ack->hdr, pkt->hdr, RXE_BTH_BYTES); bth_set_opcode(ack, opcode); bth_set_qpn(ack, qp->attr.dest_qp_num); @@ -1017,6 +1017,7 @@ static enum resp_states cleanup(struct rxe_qp *qp, skb = skb_dequeue(&qp->req_pkts); rxe_drop_ref(qp); kfree_skb(skb); + ib_device_put(qp->ibqp.device); } if (qp->resp.mr) { @@ -1181,6 +1182,7 @@ static void rxe_drain_req_pkts(struct rxe_qp *qp, bool notify) while ((skb = skb_dequeue(&qp->req_pkts))) { rxe_drop_ref(qp); kfree_skb(skb); + ib_device_put(qp->ibqp.device); } if (notify) diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.c b/drivers/infiniband/sw/rxe/rxe_verbs.c index a031514e2f41..dee5e0e919d2 100644 --- a/drivers/infiniband/sw/rxe/rxe_verbs.c +++ b/drivers/infiniband/sw/rxe/rxe_verbs.c @@ -106,12 +106,12 @@ static enum rdma_link_layer rxe_get_link_layer(struct ib_device *dev, return IB_LINK_LAYER_ETHERNET; } -static int rxe_alloc_ucontext(struct ib_ucontext *uctx, struct ib_udata *udata) +static int rxe_alloc_ucontext(struct ib_ucontext *ibuc, struct ib_udata *udata) { - struct rxe_dev *rxe = to_rdev(uctx->device); - struct rxe_ucontext *uc = to_ruc(uctx); + struct rxe_dev *rxe = to_rdev(ibuc->device); + struct rxe_ucontext *uc = to_ruc(ibuc); - return rxe_add_to_pool(&rxe->uc_pool, &uc->pelem); + return rxe_add_to_pool(&rxe->uc_pool, uc); } static void rxe_dealloc_ucontext(struct ib_ucontext *ibuc) @@ -145,7 +145,7 @@ static int rxe_alloc_pd(struct ib_pd *ibpd, struct ib_udata *udata) struct rxe_dev *rxe = to_rdev(ibpd->device); struct rxe_pd *pd = to_rpd(ibpd); - return rxe_add_to_pool(&rxe->pd_pool, &pd->pelem); + return rxe_add_to_pool(&rxe->pd_pool, pd); } static int rxe_dealloc_pd(struct ib_pd *ibpd, struct ib_udata *udata) @@ -169,7 +169,7 @@ static int rxe_create_ah(struct ib_ah *ibah, if (err) return err; - err = rxe_add_to_pool(&rxe->ah_pool, &ah->pelem); + err = rxe_add_to_pool(&rxe->ah_pool, ah); if (err) return err; @@ -273,7 +273,7 @@ static int rxe_create_srq(struct ib_srq *ibsrq, struct ib_srq_init_attr *init, if (err) goto err1; - err = rxe_add_to_pool(&rxe->srq_pool, &srq->pelem); + err = rxe_add_to_pool(&rxe->srq_pool, srq); if (err) goto err1; @@ -555,37 +555,42 @@ static void init_send_wr(struct rxe_qp *qp, struct rxe_send_wr *wr, } } -static int init_send_wqe(struct rxe_qp *qp, const struct ib_send_wr *ibwr, +static void copy_inline_data_to_wqe(struct rxe_send_wqe *wqe, + const struct ib_send_wr *ibwr) +{ + struct ib_sge *sge = ibwr->sg_list; + u8 *p = wqe->dma.inline_data; + int i; + + for (i = 0; i < ibwr->num_sge; i++, sge++) { + memcpy(p, (void *)(uintptr_t)sge->addr, sge->length); + p += sge->length; + } +} + +static void init_send_wqe(struct rxe_qp *qp, const struct ib_send_wr *ibwr, unsigned int mask, unsigned int length, struct rxe_send_wqe *wqe) { int num_sge = ibwr->num_sge; - struct ib_sge *sge; - int i; - u8 *p; init_send_wr(qp, &wqe->wr, ibwr); + /* local operation */ + if (unlikely(mask & WR_REG_MASK)) { + wqe->mask = mask; + wqe->state = wqe_state_posted; + return; + } + if (qp_type(qp) == IB_QPT_UD || qp_type(qp) == IB_QPT_SMI || qp_type(qp) == IB_QPT_GSI) memcpy(&wqe->av, &to_rah(ud_wr(ibwr)->ah)->av, sizeof(wqe->av)); - if (unlikely(ibwr->send_flags & IB_SEND_INLINE)) { - p = wqe->dma.inline_data; - - sge = ibwr->sg_list; - for (i = 0; i < num_sge; i++, sge++) { - memcpy(p, (void *)(uintptr_t)sge->addr, - sge->length); - - p += sge->length; - } - } else if (mask & WR_REG_MASK) { - wqe->mask = mask; - wqe->state = wqe_state_posted; - return 0; - } else + if (unlikely(ibwr->send_flags & IB_SEND_INLINE)) + copy_inline_data_to_wqe(wqe, ibwr); + else memcpy(wqe->dma.sge, ibwr->sg_list, num_sge * sizeof(struct ib_sge)); @@ -599,8 +604,6 @@ static int init_send_wqe(struct rxe_qp *qp, const struct ib_send_wr *ibwr, wqe->dma.sge_offset = 0; wqe->state = wqe_state_posted; wqe->ssn = atomic_add_return(1, &qp->ssn); - - return 0; } static int post_one_send(struct rxe_qp *qp, const struct ib_send_wr *ibwr, @@ -623,10 +626,7 @@ static int post_one_send(struct rxe_qp *qp, const struct ib_send_wr *ibwr, } send_wqe = producer_addr(sq->queue); - - err = init_send_wqe(qp, ibwr, mask, length, send_wqe); - if (unlikely(err)) - goto err1; + init_send_wqe(qp, ibwr, mask, length, send_wqe); advance_producer(sq->queue); spin_unlock_irqrestore(&qp->sq.sq_lock, flags); @@ -774,7 +774,7 @@ static int rxe_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, if (err) return err; - return rxe_add_to_pool(&rxe->cq_pool, &cq->pelem); + return rxe_add_to_pool(&rxe->cq_pool, cq); } static int rxe_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata) @@ -1118,7 +1118,7 @@ int rxe_register_device(struct rxe_dev *rxe, const char *ibdev_name) struct ib_device *dev = &rxe->ib_dev; struct crypto_shash *tfm; - strlcpy(dev->node_desc, "rxe", sizeof(dev->node_desc)); + strscpy(dev->node_desc, "rxe", sizeof(dev->node_desc)); dev->node_type = RDMA_NODE_IB_CA; dev->phys_port_cnt = 1; diff --git a/drivers/infiniband/sw/siw/siw.h b/drivers/infiniband/sw/siw/siw.h index adda78996219..368959ae9a8c 100644 --- a/drivers/infiniband/sw/siw/siw.h +++ b/drivers/infiniband/sw/siw/siw.h @@ -653,7 +653,7 @@ static inline struct siw_sqe *orq_get_free(struct siw_qp *qp) { struct siw_sqe *orq_e = orq_get_tail(qp); - if (orq_e && READ_ONCE(orq_e->flags) == 0) + if (READ_ONCE(orq_e->flags) == 0) return orq_e; return NULL; diff --git a/drivers/infiniband/sw/siw/siw_main.c b/drivers/infiniband/sw/siw/siw_main.c index ee95cf29179d..cf55326f2ab4 100644 --- a/drivers/infiniband/sw/siw/siw_main.c +++ b/drivers/infiniband/sw/siw/siw_main.c @@ -135,7 +135,7 @@ static struct { static int siw_init_cpulist(void) { - int i, num_nodes = num_possible_nodes(); + int i, num_nodes = nr_node_ids; memset(siw_tx_thread, 0, sizeof(siw_tx_thread)); @@ -357,7 +357,7 @@ static struct siw_device *siw_device_create(struct net_device *netdev) sizeof(base_dev->iw_ifname)); /* Disable TCP port mapping */ - base_dev->iw_driver_flags = IW_F_NO_PORT_MAP, + base_dev->iw_driver_flags = IW_F_NO_PORT_MAP; sdev->attrs.max_qp = SIW_MAX_QP; sdev->attrs.max_qp_wr = SIW_MAX_QP_WR; diff --git a/drivers/infiniband/sw/siw/siw_qp.c b/drivers/infiniband/sw/siw/siw_qp.c index 875d36d4b1c6..ddb2e66f9f13 100644 --- a/drivers/infiniband/sw/siw/siw_qp.c +++ b/drivers/infiniband/sw/siw/siw_qp.c @@ -199,26 +199,26 @@ void siw_qp_llp_write_space(struct sock *sk) static int siw_qp_readq_init(struct siw_qp *qp, int irq_size, int orq_size) { - irq_size = roundup_pow_of_two(irq_size); - orq_size = roundup_pow_of_two(orq_size); - + if (irq_size) { + irq_size = roundup_pow_of_two(irq_size); + qp->irq = vzalloc(irq_size * sizeof(struct siw_sqe)); + if (!qp->irq) { + qp->attrs.irq_size = 0; + return -ENOMEM; + } + } + if (orq_size) { + orq_size = roundup_pow_of_two(orq_size); + qp->orq = vzalloc(orq_size * sizeof(struct siw_sqe)); + if (!qp->orq) { + qp->attrs.orq_size = 0; + qp->attrs.irq_size = 0; + vfree(qp->irq); + return -ENOMEM; + } + } qp->attrs.irq_size = irq_size; qp->attrs.orq_size = orq_size; - - qp->irq = vzalloc(irq_size * sizeof(struct siw_sqe)); - if (!qp->irq) { - siw_dbg_qp(qp, "irq malloc for %d failed\n", irq_size); - qp->attrs.irq_size = 0; - return -ENOMEM; - } - qp->orq = vzalloc(orq_size * sizeof(struct siw_sqe)); - if (!qp->orq) { - siw_dbg_qp(qp, "orq malloc for %d failed\n", orq_size); - qp->attrs.orq_size = 0; - qp->attrs.irq_size = 0; - vfree(qp->irq); - return -ENOMEM; - } siw_dbg_qp(qp, "ORD %d, IRD %d\n", orq_size, irq_size); return 0; } @@ -288,13 +288,14 @@ int siw_qp_mpa_rts(struct siw_qp *qp, enum mpa_v2_ctrl ctrl) if (ctrl & MPA_V2_RDMA_WRITE_RTR) wqe->sqe.opcode = SIW_OP_WRITE; else if (ctrl & MPA_V2_RDMA_READ_RTR) { - struct siw_sqe *rreq; + struct siw_sqe *rreq = NULL; wqe->sqe.opcode = SIW_OP_READ; spin_lock(&qp->orq_lock); - rreq = orq_get_free(qp); + if (qp->attrs.orq_size) + rreq = orq_get_free(qp); if (rreq) { siw_read_to_orq(rreq, &wqe->sqe); qp->orq_put++; @@ -877,6 +878,96 @@ void siw_read_to_orq(struct siw_sqe *rreq, struct siw_sqe *sqe) rreq->num_sge = 1; } +static int siw_activate_tx_from_sq(struct siw_qp *qp) +{ + struct siw_sqe *sqe; + struct siw_wqe *wqe = tx_wqe(qp); + int rv = 1; + + sqe = sq_get_next(qp); + if (!sqe) + return 0; + + memset(wqe->mem, 0, sizeof(*wqe->mem) * SIW_MAX_SGE); + wqe->wr_status = SIW_WR_QUEUED; + + /* First copy SQE to kernel private memory */ + memcpy(&wqe->sqe, sqe, sizeof(*sqe)); + + if (wqe->sqe.opcode >= SIW_NUM_OPCODES) { + rv = -EINVAL; + goto out; + } + if (wqe->sqe.flags & SIW_WQE_INLINE) { + if (wqe->sqe.opcode != SIW_OP_SEND && + wqe->sqe.opcode != SIW_OP_WRITE) { + rv = -EINVAL; + goto out; + } + if (wqe->sqe.sge[0].length > SIW_MAX_INLINE) { + rv = -EINVAL; + goto out; + } + wqe->sqe.sge[0].laddr = (uintptr_t)&wqe->sqe.sge[1]; + wqe->sqe.sge[0].lkey = 0; + wqe->sqe.num_sge = 1; + } + if (wqe->sqe.flags & SIW_WQE_READ_FENCE) { + /* A READ cannot be fenced */ + if (unlikely(wqe->sqe.opcode == SIW_OP_READ || + wqe->sqe.opcode == + SIW_OP_READ_LOCAL_INV)) { + siw_dbg_qp(qp, "cannot fence read\n"); + rv = -EINVAL; + goto out; + } + spin_lock(&qp->orq_lock); + + if (qp->attrs.orq_size && !siw_orq_empty(qp)) { + qp->tx_ctx.orq_fence = 1; + rv = 0; + } + spin_unlock(&qp->orq_lock); + + } else if (wqe->sqe.opcode == SIW_OP_READ || + wqe->sqe.opcode == SIW_OP_READ_LOCAL_INV) { + struct siw_sqe *rreq; + + if (unlikely(!qp->attrs.orq_size)) { + /* We negotiated not to send READ req's */ + rv = -EINVAL; + goto out; + } + wqe->sqe.num_sge = 1; + + spin_lock(&qp->orq_lock); + + rreq = orq_get_free(qp); + if (rreq) { + /* + * Make an immediate copy in ORQ to be ready + * to process loopback READ reply + */ + siw_read_to_orq(rreq, &wqe->sqe); + qp->orq_put++; + } else { + qp->tx_ctx.orq_fence = 1; + rv = 0; + } + spin_unlock(&qp->orq_lock); + } + + /* Clear SQE, can be re-used by application */ + smp_store_mb(sqe->flags, 0); + qp->sq_get++; +out: + if (unlikely(rv < 0)) { + siw_dbg_qp(qp, "error %d\n", rv); + wqe->wr_status = SIW_WR_IDLE; + } + return rv; +} + /* * Must be called with SQ locked. * To avoid complete SQ starvation by constant inbound READ requests, @@ -885,133 +976,55 @@ void siw_read_to_orq(struct siw_sqe *rreq, struct siw_sqe *sqe) */ int siw_activate_tx(struct siw_qp *qp) { - struct siw_sqe *irqe, *sqe; + struct siw_sqe *irqe; struct siw_wqe *wqe = tx_wqe(qp); - int rv = 1; + + if (!qp->attrs.irq_size) + return siw_activate_tx_from_sq(qp); irqe = &qp->irq[qp->irq_get % qp->attrs.irq_size]; - if (irqe->flags & SIW_WQE_VALID) { - sqe = sq_get_next(qp); + if (!(irqe->flags & SIW_WQE_VALID)) + return siw_activate_tx_from_sq(qp); - /* - * Avoid local WQE processing starvation in case - * of constant inbound READ request stream - */ - if (sqe && ++qp->irq_burst >= SIW_IRQ_MAXBURST_SQ_ACTIVE) { - qp->irq_burst = 0; - goto skip_irq; - } - memset(wqe->mem, 0, sizeof(*wqe->mem) * SIW_MAX_SGE); - wqe->wr_status = SIW_WR_QUEUED; - - /* start READ RESPONSE */ - wqe->sqe.opcode = SIW_OP_READ_RESPONSE; - wqe->sqe.flags = 0; - if (irqe->num_sge) { - wqe->sqe.num_sge = 1; - wqe->sqe.sge[0].length = irqe->sge[0].length; - wqe->sqe.sge[0].laddr = irqe->sge[0].laddr; - wqe->sqe.sge[0].lkey = irqe->sge[0].lkey; - } else { - wqe->sqe.num_sge = 0; - } - - /* Retain original RREQ's message sequence number for - * potential error reporting cases. - */ - wqe->sqe.sge[1].length = irqe->sge[1].length; - - wqe->sqe.rkey = irqe->rkey; - wqe->sqe.raddr = irqe->raddr; - - wqe->processed = 0; - qp->irq_get++; - - /* mark current IRQ entry free */ - smp_store_mb(irqe->flags, 0); - - goto out; + /* + * Avoid local WQE processing starvation in case + * of constant inbound READ request stream + */ + if (sq_get_next(qp) && ++qp->irq_burst >= SIW_IRQ_MAXBURST_SQ_ACTIVE) { + qp->irq_burst = 0; + return siw_activate_tx_from_sq(qp); } - sqe = sq_get_next(qp); - if (sqe) { -skip_irq: - memset(wqe->mem, 0, sizeof(*wqe->mem) * SIW_MAX_SGE); - wqe->wr_status = SIW_WR_QUEUED; + memset(wqe->mem, 0, sizeof(*wqe->mem) * SIW_MAX_SGE); + wqe->wr_status = SIW_WR_QUEUED; - /* First copy SQE to kernel private memory */ - memcpy(&wqe->sqe, sqe, sizeof(*sqe)); - - if (wqe->sqe.opcode >= SIW_NUM_OPCODES) { - rv = -EINVAL; - goto out; - } - if (wqe->sqe.flags & SIW_WQE_INLINE) { - if (wqe->sqe.opcode != SIW_OP_SEND && - wqe->sqe.opcode != SIW_OP_WRITE) { - rv = -EINVAL; - goto out; - } - if (wqe->sqe.sge[0].length > SIW_MAX_INLINE) { - rv = -EINVAL; - goto out; - } - wqe->sqe.sge[0].laddr = (uintptr_t)&wqe->sqe.sge[1]; - wqe->sqe.sge[0].lkey = 0; - wqe->sqe.num_sge = 1; - } - if (wqe->sqe.flags & SIW_WQE_READ_FENCE) { - /* A READ cannot be fenced */ - if (unlikely(wqe->sqe.opcode == SIW_OP_READ || - wqe->sqe.opcode == - SIW_OP_READ_LOCAL_INV)) { - siw_dbg_qp(qp, "cannot fence read\n"); - rv = -EINVAL; - goto out; - } - spin_lock(&qp->orq_lock); - - if (!siw_orq_empty(qp)) { - qp->tx_ctx.orq_fence = 1; - rv = 0; - } - spin_unlock(&qp->orq_lock); - - } else if (wqe->sqe.opcode == SIW_OP_READ || - wqe->sqe.opcode == SIW_OP_READ_LOCAL_INV) { - struct siw_sqe *rreq; - - wqe->sqe.num_sge = 1; - - spin_lock(&qp->orq_lock); - - rreq = orq_get_free(qp); - if (rreq) { - /* - * Make an immediate copy in ORQ to be ready - * to process loopback READ reply - */ - siw_read_to_orq(rreq, &wqe->sqe); - qp->orq_put++; - } else { - qp->tx_ctx.orq_fence = 1; - rv = 0; - } - spin_unlock(&qp->orq_lock); - } - - /* Clear SQE, can be re-used by application */ - smp_store_mb(sqe->flags, 0); - qp->sq_get++; + /* start READ RESPONSE */ + wqe->sqe.opcode = SIW_OP_READ_RESPONSE; + wqe->sqe.flags = 0; + if (irqe->num_sge) { + wqe->sqe.num_sge = 1; + wqe->sqe.sge[0].length = irqe->sge[0].length; + wqe->sqe.sge[0].laddr = irqe->sge[0].laddr; + wqe->sqe.sge[0].lkey = irqe->sge[0].lkey; } else { - rv = 0; + wqe->sqe.num_sge = 0; } -out: - if (unlikely(rv < 0)) { - siw_dbg_qp(qp, "error %d\n", rv); - wqe->wr_status = SIW_WR_IDLE; - } - return rv; + + /* Retain original RREQ's message sequence number for + * potential error reporting cases. + */ + wqe->sqe.sge[1].length = irqe->sge[1].length; + + wqe->sqe.rkey = irqe->rkey; + wqe->sqe.raddr = irqe->raddr; + + wqe->processed = 0; + qp->irq_get++; + + /* mark current IRQ entry free */ + smp_store_mb(irqe->flags, 0); + + return 1; } /* diff --git a/drivers/infiniband/sw/siw/siw_qp_rx.c b/drivers/infiniband/sw/siw/siw_qp_rx.c index 4bd1f1f84057..60116f20653c 100644 --- a/drivers/infiniband/sw/siw/siw_qp_rx.c +++ b/drivers/infiniband/sw/siw/siw_qp_rx.c @@ -680,6 +680,10 @@ static int siw_init_rresp(struct siw_qp *qp, struct siw_rx_stream *srx) } spin_lock_irqsave(&qp->sq_lock, flags); + if (unlikely(!qp->attrs.irq_size)) { + run_sq = 0; + goto error_irq; + } if (tx_work->wr_status == SIW_WR_IDLE) { /* * immediately schedule READ response w/o @@ -712,8 +716,9 @@ static int siw_init_rresp(struct siw_qp *qp, struct siw_rx_stream *srx) /* RRESP now valid as current TX wqe or placed into IRQ */ smp_store_mb(resp->flags, SIW_WQE_VALID); } else { - pr_warn("siw: [QP %u]: irq %d exceeded %d\n", qp_id(qp), - qp->irq_put % qp->attrs.irq_size, qp->attrs.irq_size); +error_irq: + pr_warn("siw: [QP %u]: IRQ exceeded or null, size %d\n", + qp_id(qp), qp->attrs.irq_size); siw_init_terminate(qp, TERM_ERROR_LAYER_RDMAP, RDMAP_ETYPE_REMOTE_OPERATION, @@ -740,6 +745,9 @@ static int siw_orqe_start_rx(struct siw_qp *qp) struct siw_sqe *orqe; struct siw_wqe *wqe = NULL; + if (unlikely(!qp->attrs.orq_size)) + return -EPROTO; + /* make sure ORQ indices are current */ smp_mb(); @@ -796,8 +804,8 @@ int siw_proc_rresp(struct siw_qp *qp) */ rv = siw_orqe_start_rx(qp); if (rv) { - pr_warn("siw: [QP %u]: ORQ empty at idx %d\n", - qp_id(qp), qp->orq_get % qp->attrs.orq_size); + pr_warn("siw: [QP %u]: ORQ empty, size %d\n", + qp_id(qp), qp->attrs.orq_size); goto error_term; } rv = siw_rresp_check_ntoh(srx, frx); @@ -1290,11 +1298,13 @@ static int siw_rdmap_complete(struct siw_qp *qp, int error) wc_status); siw_wqe_put_mem(wqe, SIW_OP_READ); - if (!error) + if (!error) { rv = siw_check_tx_fence(qp); - else - /* Disable current ORQ eleement */ - WRITE_ONCE(orq_get_current(qp)->flags, 0); + } else { + /* Disable current ORQ element */ + if (qp->attrs.orq_size) + WRITE_ONCE(orq_get_current(qp)->flags, 0); + } break; case RDMAP_RDMA_READ_REQ: diff --git a/drivers/infiniband/sw/siw/siw_qp_tx.c b/drivers/infiniband/sw/siw/siw_qp_tx.c index d19d8325588b..7989c4043db4 100644 --- a/drivers/infiniband/sw/siw/siw_qp_tx.c +++ b/drivers/infiniband/sw/siw/siw_qp_tx.c @@ -1107,8 +1107,8 @@ int siw_qp_sq_process(struct siw_qp *qp) /* * RREQ may have already been completed by inbound RRESP! */ - if (tx_type == SIW_OP_READ || - tx_type == SIW_OP_READ_LOCAL_INV) { + if ((tx_type == SIW_OP_READ || + tx_type == SIW_OP_READ_LOCAL_INV) && qp->attrs.orq_size) { /* Cleanup pending entry in ORQ */ qp->orq_put--; qp->orq[qp->orq_put % qp->attrs.orq_size].flags = 0; diff --git a/drivers/infiniband/sw/siw/siw_verbs.c b/drivers/infiniband/sw/siw/siw_verbs.c index 68fd053fc774..e389d44e5591 100644 --- a/drivers/infiniband/sw/siw/siw_verbs.c +++ b/drivers/infiniband/sw/siw/siw_verbs.c @@ -365,13 +365,23 @@ struct ib_qp *siw_create_qp(struct ib_pd *pd, if (rv) goto err_out; + num_sqe = attrs->cap.max_send_wr; + num_rqe = attrs->cap.max_recv_wr; + /* All queue indices are derived from modulo operations * on a free running 'get' (consumer) and 'put' (producer) * unsigned counter. Having queue sizes at power of two * avoids handling counter wrap around. */ - num_sqe = roundup_pow_of_two(attrs->cap.max_send_wr); - num_rqe = roundup_pow_of_two(attrs->cap.max_recv_wr); + if (num_sqe) + num_sqe = roundup_pow_of_two(num_sqe); + else { + /* Zero sized SQ is not supported */ + rv = -EINVAL; + goto err_out; + } + if (num_rqe) + num_rqe = roundup_pow_of_two(num_rqe); if (udata) qp->sendq = vmalloc_user(num_sqe * sizeof(struct siw_sqe)); @@ -379,7 +389,6 @@ struct ib_qp *siw_create_qp(struct ib_pd *pd, qp->sendq = vzalloc(num_sqe * sizeof(struct siw_sqe)); if (qp->sendq == NULL) { - siw_dbg(base_dev, "SQ size %d alloc failed\n", num_sqe); rv = -ENOMEM; goto err_out_xa; } @@ -413,7 +422,6 @@ struct ib_qp *siw_create_qp(struct ib_pd *pd, qp->recvq = vzalloc(num_rqe * sizeof(struct siw_rqe)); if (qp->recvq == NULL) { - siw_dbg(base_dev, "RQ size %d alloc failed\n", num_rqe); rv = -ENOMEM; goto err_out_xa; } @@ -966,9 +974,9 @@ int siw_post_receive(struct ib_qp *base_qp, const struct ib_recv_wr *wr, unsigned long flags; int rv = 0; - if (qp->srq) { + if (qp->srq || qp->attrs.rq_size == 0) { *bad_wr = wr; - return -EOPNOTSUPP; /* what else from errno.h? */ + return -EINVAL; } if (!rdma_is_kernel_res(&qp->base_qp.res)) { siw_dbg_qp(qp, "no kernel post_recv for user mapped rq\n"); diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h index 3440dc48d02c..179ff1d068e5 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib.h +++ b/drivers/infiniband/ulp/ipoib/ipoib.h @@ -413,7 +413,6 @@ struct ipoib_dev_priv { u64 hca_caps; struct ipoib_ethtool_st ethtool; unsigned int max_send_sge; - bool sm_fullmember_sendonly_support; const struct net_device_ops *rn_ops; }; diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c index a6f413491321..e16b40c09f82 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c @@ -141,8 +141,6 @@ int ipoib_open(struct net_device *dev) set_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags); - priv->sm_fullmember_sendonly_support = false; - if (ipoib_ib_dev_open(dev)) { if (!test_bit(IPOIB_PKEY_ASSIGNED, &priv->flags)) return 0; diff --git a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c index 86e4ed64e4e2..5b3154503bf4 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c @@ -275,7 +275,7 @@ static int ipoib_mcast_join_finish(struct ipoib_mcast *mcast, memset(&av, 0, sizeof(av)); av.type = rdma_ah_find_type(priv->ca, priv->port); - rdma_ah_set_dlid(&av, be16_to_cpu(mcast->mcmember.mlid)), + rdma_ah_set_dlid(&av, be16_to_cpu(mcast->mcmember.mlid)); rdma_ah_set_port_num(&av, priv->port); rdma_ah_set_sl(&av, mcast->mcmember.sl); rdma_ah_set_static_rate(&av, mcast->mcmember.rate); @@ -333,15 +333,6 @@ void ipoib_mcast_carrier_on_task(struct work_struct *work) ipoib_dbg(priv, "Keeping carrier off until IB port is active\n"); return; } - /* - * Check if can send sendonly MCG's with sendonly-fullmember join state. - * It done here after the successfully join to the broadcast group, - * because the broadcast group must always be joined first and is always - * re-joined if the SM changes substantially. - */ - priv->sm_fullmember_sendonly_support = - ib_sa_sendonly_fullmem_support(&ipoib_sa_client, - priv->ca, priv->port); /* * Take rtnl_lock to avoid racing with ipoib_stop() and * turning the carrier back on while a device is being @@ -537,9 +528,7 @@ static int ipoib_mcast_join(struct net_device *dev, struct ipoib_mcast *mcast) * most closely emulates the behavior, from a user space * application perspective, of Ethernet multicast operation. */ - if (test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags) && - priv->sm_fullmember_sendonly_support) - /* SM supports sendonly-fullmember, otherwise fallback to full-member */ + if (test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags)) rec.join_state = SENDONLY_FULLMEMBER_JOIN; } spin_unlock_irq(&priv->lock); diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.c b/drivers/infiniband/ulp/iser/iscsi_iser.c index 4792b9bf400f..8fcaa1136f2c 100644 --- a/drivers/infiniband/ulp/iser/iscsi_iser.c +++ b/drivers/infiniband/ulp/iser/iscsi_iser.c @@ -89,13 +89,20 @@ int iser_debug_level = 0; module_param_named(debug_level, iser_debug_level, int, S_IRUGO | S_IWUSR); MODULE_PARM_DESC(debug_level, "Enable debug tracing if > 0 (default:disabled)"); +static int iscsi_iser_set(const char *val, const struct kernel_param *kp); +static const struct kernel_param_ops iscsi_iser_size_ops = { + .set = iscsi_iser_set, + .get = param_get_uint, +}; + static unsigned int iscsi_max_lun = 512; -module_param_named(max_lun, iscsi_max_lun, uint, S_IRUGO); -MODULE_PARM_DESC(max_lun, "Max LUNs to allow per session (default:512"); +module_param_cb(max_lun, &iscsi_iser_size_ops, &iscsi_max_lun, S_IRUGO); +MODULE_PARM_DESC(max_lun, "Max LUNs to allow per session, should > 0 (default:512)"); unsigned int iser_max_sectors = ISER_DEF_MAX_SECTORS; -module_param_named(max_sectors, iser_max_sectors, uint, S_IRUGO | S_IWUSR); -MODULE_PARM_DESC(max_sectors, "Max number of sectors in a single scsi command (default:1024"); +module_param_cb(max_sectors, &iscsi_iser_size_ops, &iser_max_sectors, + S_IRUGO | S_IWUSR); +MODULE_PARM_DESC(max_sectors, "Max number of sectors in a single scsi command, should > 0 (default:1024)"); bool iser_always_reg = true; module_param_named(always_register, iser_always_reg, bool, S_IRUGO); @@ -110,6 +117,18 @@ int iser_pi_guard; module_param_named(pi_guard, iser_pi_guard, int, S_IRUGO); MODULE_PARM_DESC(pi_guard, "T10-PI guard_type [deprecated]"); +static int iscsi_iser_set(const char *val, const struct kernel_param *kp) +{ + int ret; + unsigned int n = 0; + + ret = kstrtouint(val, 10, &n); + if (ret != 0 || n == 0) + return -EINVAL; + + return param_set_uint(val, kp); +} + /* * iscsi_iser_recv() - Process a successful recv completion * @conn: iscsi connection @@ -571,13 +590,20 @@ iscsi_iser_session_destroy(struct iscsi_cls_session *cls_session) static inline unsigned int iser_dif_prot_caps(int prot_caps) { - return ((prot_caps & IB_PROT_T10DIF_TYPE_1) ? - SHOST_DIF_TYPE1_PROTECTION | SHOST_DIX_TYPE0_PROTECTION | - SHOST_DIX_TYPE1_PROTECTION : 0) | - ((prot_caps & IB_PROT_T10DIF_TYPE_2) ? - SHOST_DIF_TYPE2_PROTECTION | SHOST_DIX_TYPE2_PROTECTION : 0) | - ((prot_caps & IB_PROT_T10DIF_TYPE_3) ? - SHOST_DIF_TYPE3_PROTECTION | SHOST_DIX_TYPE3_PROTECTION : 0); + int ret = 0; + + if (prot_caps & IB_PROT_T10DIF_TYPE_1) + ret |= SHOST_DIF_TYPE1_PROTECTION | + SHOST_DIX_TYPE0_PROTECTION | + SHOST_DIX_TYPE1_PROTECTION; + if (prot_caps & IB_PROT_T10DIF_TYPE_2) + ret |= SHOST_DIF_TYPE2_PROTECTION | + SHOST_DIX_TYPE2_PROTECTION; + if (prot_caps & IB_PROT_T10DIF_TYPE_3) + ret |= SHOST_DIF_TYPE3_PROTECTION | + SHOST_DIX_TYPE3_PROTECTION; + + return ret; } /** @@ -1009,11 +1035,6 @@ static int __init iser_init(void) iser_dbg("Starting iSER datamover...\n"); - if (iscsi_max_lun < 1) { - iser_err("Invalid max_lun value of %u\n", iscsi_max_lun); - return -EINVAL; - } - memset(&ig, 0, sizeof(struct iser_global)); ig.desc_cache = kmem_cache_create("iser_descriptors", diff --git a/drivers/infiniband/ulp/iser/iser_memory.c b/drivers/infiniband/ulp/iser/iser_memory.c index d4e057fac219..afec40da9b58 100644 --- a/drivers/infiniband/ulp/iser/iser_memory.c +++ b/drivers/infiniband/ulp/iser/iser_memory.c @@ -169,7 +169,7 @@ iser_set_dif_domain(struct scsi_cmnd *sc, struct ib_sig_domain *domain) domain->sig.dif.ref_escape = true; if (sc->prot_flags & SCSI_PROT_REF_INCREMENT) domain->sig.dif.ref_remap = true; -}; +} static int iser_set_sig_attrs(struct scsi_cmnd *sc, struct ib_sig_attrs *sig_attrs) @@ -390,4 +390,3 @@ int iser_reg_mem_fastreg(struct iscsi_iser_task *task, return err; } - diff --git a/drivers/infiniband/ulp/iser/iser_verbs.c b/drivers/infiniband/ulp/iser/iser_verbs.c index 2bd18b006893..136f6c4492e0 100644 --- a/drivers/infiniband/ulp/iser/iser_verbs.c +++ b/drivers/infiniband/ulp/iser/iser_verbs.c @@ -685,7 +685,7 @@ static void iser_cleanup_handler(struct rdma_cm_id *cma_id, iser_disconnected_handler(cma_id); iser_free_ib_conn_res(iser_conn, destroy); complete(&iser_conn->ib_completion); -}; +} static int iser_cma_handler(struct rdma_cm_id *cma_id, struct rdma_cm_event *event) { diff --git a/drivers/infiniband/ulp/isert/ib_isert.c b/drivers/infiniband/ulp/isert/ib_isert.c index 2ba27221ea85..7305ed8976c2 100644 --- a/drivers/infiniband/ulp/isert/ib_isert.c +++ b/drivers/infiniband/ulp/isert/ib_isert.c @@ -71,7 +71,6 @@ static int isert_sg_tablesize_set(const char *val, const struct kernel_param *kp return param_set_int(val, kp); } - static inline bool isert_prot_cmd(struct isert_conn *conn, struct se_cmd *cmd) { @@ -79,7 +78,6 @@ isert_prot_cmd(struct isert_conn *conn, struct se_cmd *cmd) cmd->prot_op != TARGET_PROT_NORMAL); } - static void isert_qp_event_callback(struct ib_event *e, void *context) { @@ -232,8 +230,10 @@ isert_create_device_ib_res(struct isert_device *device) } /* Check signature cap */ - device->pi_capable = ib_dev->attrs.device_cap_flags & - IB_DEVICE_INTEGRITY_HANDOVER ? true : false; + if (ib_dev->attrs.device_cap_flags & IB_DEVICE_INTEGRITY_HANDOVER) + device->pi_capable = true; + else + device->pi_capable = false; return 0; } @@ -1993,7 +1993,7 @@ isert_set_dif_domain(struct se_cmd *se_cmd, struct ib_sig_domain *domain) if (se_cmd->prot_type == TARGET_DIF_TYPE1_PROT || se_cmd->prot_type == TARGET_DIF_TYPE2_PROT) domain->sig.dif.ref_remap = true; -}; +} static int isert_set_sig_attrs(struct se_cmd *se_cmd, struct ib_sig_attrs *sig_attrs) diff --git a/drivers/infiniband/ulp/opa_vnic/opa_vnic_vema.c b/drivers/infiniband/ulp/opa_vnic/opa_vnic_vema.c index 4933085a864a..cecf0f7cadf9 100644 --- a/drivers/infiniband/ulp/opa_vnic/opa_vnic_vema.c +++ b/drivers/infiniband/ulp/opa_vnic/opa_vnic_vema.c @@ -233,7 +233,7 @@ static void vema_get_class_port_info(struct opa_vnic_vema_port *port, port_info = (struct opa_class_port_info *)rsp_mad->data; memcpy(port_info, &port->class_port_info, sizeof(*port_info)); - port_info->base_version = OPA_MGMT_BASE_VERSION, + port_info->base_version = OPA_MGMT_BASE_VERSION; port_info->class_version = OPA_EMA_CLASS_VERSION; /* diff --git a/drivers/infiniband/ulp/rtrs/rtrs-clt-sysfs.c b/drivers/infiniband/ulp/rtrs/rtrs-clt-sysfs.c index ba00f0de14ca..b6a0abf40589 100644 --- a/drivers/infiniband/ulp/rtrs/rtrs-clt-sysfs.c +++ b/drivers/infiniband/ulp/rtrs/rtrs-clt-sysfs.c @@ -408,6 +408,7 @@ int rtrs_clt_create_sess_files(struct rtrs_clt_sess *sess) "%s", str); if (err) { pr_err("kobject_init_and_add: %d\n", err); + kobject_put(&sess->kobj); return err; } err = sysfs_create_group(&sess->kobj, &rtrs_clt_sess_attr_group); @@ -419,6 +420,7 @@ int rtrs_clt_create_sess_files(struct rtrs_clt_sess *sess) &sess->kobj, "stats"); if (err) { pr_err("kobject_init_and_add: %d\n", err); + kobject_put(&sess->stats->kobj_stats); goto remove_group; } @@ -469,15 +471,12 @@ int rtrs_clt_create_sysfs_root_files(struct rtrs_clt *clt) return sysfs_create_group(&clt->dev.kobj, &rtrs_clt_attr_group); } -void rtrs_clt_destroy_sysfs_root_folders(struct rtrs_clt *clt) +void rtrs_clt_destroy_sysfs_root(struct rtrs_clt *clt) { + sysfs_remove_group(&clt->dev.kobj, &rtrs_clt_attr_group); + if (clt->kobj_paths) { kobject_del(clt->kobj_paths); kobject_put(clt->kobj_paths); } } - -void rtrs_clt_destroy_sysfs_root_files(struct rtrs_clt *clt) -{ - sysfs_remove_group(&clt->dev.kobj, &rtrs_clt_attr_group); -} diff --git a/drivers/infiniband/ulp/rtrs/rtrs-clt.c b/drivers/infiniband/ulp/rtrs/rtrs-clt.c index 67f86c405a26..0a08b4b742a3 100644 --- a/drivers/infiniband/ulp/rtrs/rtrs-clt.c +++ b/drivers/infiniband/ulp/rtrs/rtrs-clt.c @@ -31,6 +31,8 @@ */ #define RTRS_RECONNECT_SEED 8 +#define FIRST_CONN 0x01 + MODULE_DESCRIPTION("RDMA Transport Client"); MODULE_LICENSE("GPL"); @@ -178,18 +180,18 @@ struct rtrs_clt_con *rtrs_permit_to_clt_con(struct rtrs_clt_sess *sess, } /** - * __rtrs_clt_change_state() - change the session state through session state + * rtrs_clt_change_state() - change the session state through session state * machine. * * @sess: client session to change the state of. * @new_state: state to change to. * - * returns true if successful, false if the requested state can not be set. + * returns true if sess's state is changed to new state, otherwise return false. * * Locks: * state_wq lock must be hold. */ -static bool __rtrs_clt_change_state(struct rtrs_clt_sess *sess, +static bool rtrs_clt_change_state(struct rtrs_clt_sess *sess, enum rtrs_clt_state new_state) { enum rtrs_clt_state old_state; @@ -286,7 +288,7 @@ static bool rtrs_clt_change_state_from_to(struct rtrs_clt_sess *sess, spin_lock_irq(&sess->state_wq.lock); if (sess->state == old_state) - changed = __rtrs_clt_change_state(sess, new_state); + changed = rtrs_clt_change_state(sess, new_state); spin_unlock_irq(&sess->state_wq.lock); return changed; @@ -494,7 +496,7 @@ static void rtrs_clt_recv_done(struct rtrs_clt_con *con, struct ib_wc *wc) int err; struct rtrs_clt_sess *sess = to_clt_sess(con->c.sess); - WARN_ON(sess->flags != RTRS_MSG_NEW_RKEY_F); + WARN_ON((sess->flags & RTRS_MSG_NEW_RKEY_F) == 0); iu = container_of(wc->wr_cqe, struct rtrs_iu, cqe); err = rtrs_iu_post_recv(&con->c, iu); @@ -514,7 +516,7 @@ static void rtrs_clt_rkey_rsp_done(struct rtrs_clt_con *con, struct ib_wc *wc) u32 buf_id; int err; - WARN_ON(sess->flags != RTRS_MSG_NEW_RKEY_F); + WARN_ON((sess->flags & RTRS_MSG_NEW_RKEY_F) == 0); iu = container_of(wc->wr_cqe, struct rtrs_iu, cqe); @@ -621,12 +623,12 @@ static void rtrs_clt_rdma_done(struct ib_cq *cq, struct ib_wc *wc) } else if (imm_type == RTRS_HB_MSG_IMM) { WARN_ON(con->c.cid); rtrs_send_hb_ack(&sess->s); - if (sess->flags == RTRS_MSG_NEW_RKEY_F) + if (sess->flags & RTRS_MSG_NEW_RKEY_F) return rtrs_clt_recv_done(con, wc); } else if (imm_type == RTRS_HB_ACK_IMM) { WARN_ON(con->c.cid); sess->s.hb_missed_cnt = 0; - if (sess->flags == RTRS_MSG_NEW_RKEY_F) + if (sess->flags & RTRS_MSG_NEW_RKEY_F) return rtrs_clt_recv_done(con, wc); } else { rtrs_wrn(con->c.sess, "Unknown IMM type %u\n", @@ -654,7 +656,7 @@ static void rtrs_clt_rdma_done(struct ib_cq *cq, struct ib_wc *wc) WARN_ON(!(wc->wc_flags & IB_WC_WITH_INVALIDATE || wc->wc_flags & IB_WC_WITH_IMM)); WARN_ON(wc->wr_cqe->done != rtrs_clt_rdma_done); - if (sess->flags == RTRS_MSG_NEW_RKEY_F) { + if (sess->flags & RTRS_MSG_NEW_RKEY_F) { if (wc->wc_flags & IB_WC_WITH_INVALIDATE) return rtrs_clt_recv_done(con, wc); @@ -664,7 +666,6 @@ static void rtrs_clt_rdma_done(struct ib_cq *cq, struct ib_wc *wc) case IB_WC_RDMA_WRITE: /* * post_send() RDMA write completions of IO reqs (read/write) - * and hb */ break; @@ -680,7 +681,7 @@ static int post_recv_io(struct rtrs_clt_con *con, size_t q_size) struct rtrs_clt_sess *sess = to_clt_sess(con->c.sess); for (i = 0; i < q_size; i++) { - if (sess->flags == RTRS_MSG_NEW_RKEY_F) { + if (sess->flags & RTRS_MSG_NEW_RKEY_F) { struct rtrs_iu *iu = &con->rsp_ius[i]; err = rtrs_iu_post_recv(&con->c, iu); @@ -1318,6 +1319,12 @@ static int alloc_permits(struct rtrs_clt *clt) static void free_permits(struct rtrs_clt *clt) { + if (clt->permits_map) { + size_t sz = clt->queue_depth; + + wait_event(clt->permits_wait, + find_first_bit(clt->permits_map, sz) >= sz); + } kfree(clt->permits_map); clt->permits_map = NULL; kfree(clt->permits); @@ -1353,21 +1360,14 @@ static bool rtrs_clt_change_state_get_old(struct rtrs_clt_sess *sess, bool changed; spin_lock_irq(&sess->state_wq.lock); - *old_state = sess->state; - changed = __rtrs_clt_change_state(sess, new_state); + if (old_state) + *old_state = sess->state; + changed = rtrs_clt_change_state(sess, new_state); spin_unlock_irq(&sess->state_wq.lock); return changed; } -static bool rtrs_clt_change_state(struct rtrs_clt_sess *sess, - enum rtrs_clt_state new_state) -{ - enum rtrs_clt_state old_state; - - return rtrs_clt_change_state_get_old(sess, new_state, &old_state); -} - static void rtrs_clt_hb_err_handler(struct rtrs_con *c) { struct rtrs_clt_con *con = container_of(c, typeof(*con), c); @@ -1511,7 +1511,7 @@ static void destroy_con(struct rtrs_clt_con *con) static int create_con_cq_qp(struct rtrs_clt_con *con) { struct rtrs_clt_sess *sess = to_clt_sess(con->c.sess); - u16 wr_queue_size; + u32 max_send_wr, max_recv_wr, cq_size; int err, cq_vector; struct rtrs_msg_rkey_rsp *rsp; @@ -1523,7 +1523,8 @@ static int create_con_cq_qp(struct rtrs_clt_con *con) * + 2 for drain and heartbeat * in case qp gets into error state */ - wr_queue_size = SERVICE_CON_QUEUE_DEPTH * 3 + 2; + max_send_wr = SERVICE_CON_QUEUE_DEPTH * 2 + 2; + max_recv_wr = SERVICE_CON_QUEUE_DEPTH * 2 + 2; /* We must be the first here */ if (WARN_ON(sess->s.dev)) return -EINVAL; @@ -1555,25 +1556,29 @@ static int create_con_cq_qp(struct rtrs_clt_con *con) /* Shared between connections */ sess->s.dev_ref++; - wr_queue_size = + max_send_wr = min_t(int, sess->s.dev->ib_dev->attrs.max_qp_wr, /* QD * (REQ + RSP + FR REGS or INVS) + drain */ sess->queue_depth * 3 + 1); + max_recv_wr = + min_t(int, sess->s.dev->ib_dev->attrs.max_qp_wr, + sess->queue_depth * 3 + 1); } /* alloc iu to recv new rkey reply when server reports flags set */ - if (sess->flags == RTRS_MSG_NEW_RKEY_F || con->c.cid == 0) { - con->rsp_ius = rtrs_iu_alloc(wr_queue_size, sizeof(*rsp), + if (sess->flags & RTRS_MSG_NEW_RKEY_F || con->c.cid == 0) { + con->rsp_ius = rtrs_iu_alloc(max_recv_wr, sizeof(*rsp), GFP_KERNEL, sess->s.dev->ib_dev, DMA_FROM_DEVICE, rtrs_clt_rdma_done); if (!con->rsp_ius) return -ENOMEM; - con->queue_size = wr_queue_size; + con->queue_size = max_recv_wr; } + cq_size = max_send_wr + max_recv_wr; cq_vector = con->cpu % sess->s.dev->ib_dev->num_comp_vectors; err = rtrs_cq_qp_create(&sess->s, &con->c, sess->max_send_sge, - cq_vector, wr_queue_size, wr_queue_size, - IB_POLL_SOFTIRQ); + cq_vector, cq_size, max_send_wr, + max_recv_wr, IB_POLL_SOFTIRQ); /* * In case of error we do not bother to clean previous allocations, * since destroy_con_cq_qp() must be called. @@ -1657,6 +1662,7 @@ static int rtrs_rdma_route_resolved(struct rtrs_clt_con *con) .cid_num = cpu_to_le16(sess->s.con_num), .recon_cnt = cpu_to_le16(sess->s.recon_cnt), }; + msg.first_conn = sess->for_new_clt ? FIRST_CONN : 0; uuid_copy(&msg.sess_uuid, &sess->s.uuid); uuid_copy(&msg.paths_uuid, &clt->paths_uuid); @@ -1742,6 +1748,8 @@ static int rtrs_rdma_conn_established(struct rtrs_clt_con *con, scnprintf(sess->hca_name, sizeof(sess->hca_name), sess->s.dev->ib_dev->name); sess->s.src_addr = con->c.cm_id->route.addr.src_addr; + /* set for_new_clt, to allow future reconnect on any path */ + sess->for_new_clt = 1; } return 0; @@ -1788,7 +1796,7 @@ static int rtrs_rdma_conn_rejected(struct rtrs_clt_con *con, static void rtrs_clt_close_conns(struct rtrs_clt_sess *sess, bool wait) { - if (rtrs_clt_change_state(sess, RTRS_CLT_CLOSING)) + if (rtrs_clt_change_state_get_old(sess, RTRS_CLT_CLOSING, NULL)) queue_work(rtrs_wq, &sess->close_work); if (wait) flush_work(&sess->close_work); @@ -2174,7 +2182,7 @@ static void rtrs_clt_close_work(struct work_struct *work) cancel_delayed_work_sync(&sess->reconnect_dwork); rtrs_clt_stop_and_destroy_conns(sess); - rtrs_clt_change_state(sess, RTRS_CLT_CLOSED); + rtrs_clt_change_state_get_old(sess, RTRS_CLT_CLOSED, NULL); } static int init_conns(struct rtrs_clt_sess *sess) @@ -2226,7 +2234,7 @@ static int init_conns(struct rtrs_clt_sess *sess) * doing rdma_resolve_addr(), switch to CONNECTION_ERR state * manually to keep reconnecting. */ - rtrs_clt_change_state(sess, RTRS_CLT_CONNECTING_ERR); + rtrs_clt_change_state_get_old(sess, RTRS_CLT_CONNECTING_ERR, NULL); return err; } @@ -2243,7 +2251,7 @@ static void rtrs_clt_info_req_done(struct ib_cq *cq, struct ib_wc *wc) if (unlikely(wc->status != IB_WC_SUCCESS)) { rtrs_err(sess->clt, "Sess info request send failed: %s\n", ib_wc_status_msg(wc->status)); - rtrs_clt_change_state(sess, RTRS_CLT_CONNECTING_ERR); + rtrs_clt_change_state_get_old(sess, RTRS_CLT_CONNECTING_ERR, NULL); return; } @@ -2367,7 +2375,7 @@ static void rtrs_clt_info_rsp_done(struct ib_cq *cq, struct ib_wc *wc) out: rtrs_clt_update_wc_stats(con); rtrs_iu_free(iu, sess->s.dev->ib_dev, 1); - rtrs_clt_change_state(sess, state); + rtrs_clt_change_state_get_old(sess, state, NULL); } static int rtrs_send_sess_info(struct rtrs_clt_sess *sess) @@ -2423,7 +2431,6 @@ static int rtrs_send_sess_info(struct rtrs_clt_sess *sess) err = -ECONNRESET; else err = -ETIMEDOUT; - goto out; } out: @@ -2433,7 +2440,7 @@ static int rtrs_send_sess_info(struct rtrs_clt_sess *sess) rtrs_iu_free(rx_iu, sess->s.dev->ib_dev, 1); if (unlikely(err)) /* If we've never taken async path because of malloc problems */ - rtrs_clt_change_state(sess, RTRS_CLT_CONNECTING_ERR); + rtrs_clt_change_state_get_old(sess, RTRS_CLT_CONNECTING_ERR, NULL); return err; } @@ -2490,7 +2497,7 @@ static void rtrs_clt_reconnect_work(struct work_struct *work) /* Stop everything */ rtrs_clt_stop_and_destroy_conns(sess); msleep(RTRS_RECONNECT_BACKOFF); - if (rtrs_clt_change_state(sess, RTRS_CLT_CONNECTING)) { + if (rtrs_clt_change_state_get_old(sess, RTRS_CLT_CONNECTING, NULL)) { err = init_sess(sess); if (err) goto reconnect_again; @@ -2499,7 +2506,7 @@ static void rtrs_clt_reconnect_work(struct work_struct *work) return; reconnect_again: - if (rtrs_clt_change_state(sess, RTRS_CLT_RECONNECTING)) { + if (rtrs_clt_change_state_get_old(sess, RTRS_CLT_RECONNECTING, NULL)) { sess->stats->reconnects.fail_cnt++; delay_ms = clt->reconnect_delay_sec * 1000; queue_delayed_work(rtrs_wq, &sess->reconnect_dwork, @@ -2565,11 +2572,8 @@ static struct rtrs_clt *alloc_clt(const char *sessname, size_t paths_num, clt->dev.class = rtrs_clt_dev_class; clt->dev.release = rtrs_clt_dev_release; err = dev_set_name(&clt->dev, "%s", sessname); - if (err) { - free_percpu(clt->pcpu_path); - kfree(clt); - return ERR_PTR(err); - } + if (err) + goto err; /* * Suppress user space notification until * sysfs files are created @@ -2577,44 +2581,35 @@ static struct rtrs_clt *alloc_clt(const char *sessname, size_t paths_num, dev_set_uevent_suppress(&clt->dev, true); err = device_register(&clt->dev); if (err) { - free_percpu(clt->pcpu_path); put_device(&clt->dev); - return ERR_PTR(err); + goto err; } clt->kobj_paths = kobject_create_and_add("paths", &clt->dev.kobj); if (!clt->kobj_paths) { - free_percpu(clt->pcpu_path); - device_unregister(&clt->dev); - return NULL; + err = -ENOMEM; + goto err_dev; } err = rtrs_clt_create_sysfs_root_files(clt); if (err) { - free_percpu(clt->pcpu_path); kobject_del(clt->kobj_paths); kobject_put(clt->kobj_paths); - device_unregister(&clt->dev); - return ERR_PTR(err); + goto err_dev; } dev_set_uevent_suppress(&clt->dev, false); kobject_uevent(&clt->dev.kobj, KOBJ_ADD); return clt; -} - -static void wait_for_inflight_permits(struct rtrs_clt *clt) -{ - if (clt->permits_map) { - size_t sz = clt->queue_depth; - - wait_event(clt->permits_wait, - find_first_bit(clt->permits_map, sz) >= sz); - } +err_dev: + device_unregister(&clt->dev); +err: + free_percpu(clt->pcpu_path); + kfree(clt); + return ERR_PTR(err); } static void free_clt(struct rtrs_clt *clt) { - wait_for_inflight_permits(clt); free_permits(clt); free_percpu(clt->pcpu_path); mutex_destroy(&clt->paths_ev_mutex); @@ -2672,6 +2667,8 @@ struct rtrs_clt *rtrs_clt_open(struct rtrs_clt_ops *ops, err = PTR_ERR(sess); goto close_all_sess; } + if (!i) + sess->for_new_clt = 1; list_add_tail_rcu(&sess->s.entry, &clt->paths_list); err = init_sess(sess); @@ -2702,8 +2699,7 @@ struct rtrs_clt *rtrs_clt_open(struct rtrs_clt_ops *ops, rtrs_clt_close_conns(sess, true); kobject_put(&sess->kobj); } - rtrs_clt_destroy_sysfs_root_files(clt); - rtrs_clt_destroy_sysfs_root_folders(clt); + rtrs_clt_destroy_sysfs_root(clt); free_clt(clt); out: @@ -2720,8 +2716,7 @@ void rtrs_clt_close(struct rtrs_clt *clt) struct rtrs_clt_sess *sess, *tmp; /* Firstly forbid sysfs access */ - rtrs_clt_destroy_sysfs_root_files(clt); - rtrs_clt_destroy_sysfs_root_folders(clt); + rtrs_clt_destroy_sysfs_root(clt); /* Now it is safe to iterate over all paths without locks */ list_for_each_entry_safe(sess, tmp, &clt->paths_list, s.entry) { diff --git a/drivers/infiniband/ulp/rtrs/rtrs-clt.h b/drivers/infiniband/ulp/rtrs/rtrs-clt.h index b8dbd701b3cb..692bc83e1f09 100644 --- a/drivers/infiniband/ulp/rtrs/rtrs-clt.h +++ b/drivers/infiniband/ulp/rtrs/rtrs-clt.h @@ -143,6 +143,7 @@ struct rtrs_clt_sess { int max_send_sge; u32 flags; struct kobject kobj; + u8 for_new_clt; struct rtrs_clt_stats *stats; /* cache hca_port and hca_name to display in sysfs */ u8 hca_port; @@ -243,8 +244,7 @@ ssize_t rtrs_clt_reset_all_help(struct rtrs_clt_stats *stats, /* rtrs-clt-sysfs.c */ int rtrs_clt_create_sysfs_root_files(struct rtrs_clt *clt); -void rtrs_clt_destroy_sysfs_root_folders(struct rtrs_clt *clt); -void rtrs_clt_destroy_sysfs_root_files(struct rtrs_clt *clt); +void rtrs_clt_destroy_sysfs_root(struct rtrs_clt *clt); int rtrs_clt_create_sess_files(struct rtrs_clt_sess *sess); void rtrs_clt_destroy_sess_files(struct rtrs_clt_sess *sess, diff --git a/drivers/infiniband/ulp/rtrs/rtrs-pri.h b/drivers/infiniband/ulp/rtrs/rtrs-pri.h index 3f2918671dbe..8caad0a2322b 100644 --- a/drivers/infiniband/ulp/rtrs/rtrs-pri.h +++ b/drivers/infiniband/ulp/rtrs/rtrs-pri.h @@ -188,7 +188,9 @@ struct rtrs_msg_conn_req { __le16 recon_cnt; uuid_t sess_uuid; uuid_t paths_uuid; - u8 reserved[12]; + u8 first_conn : 1; + u8 reserved_bits : 7; + u8 reserved[11]; }; /** @@ -303,8 +305,9 @@ int rtrs_post_rdma_write_imm_empty(struct rtrs_con *con, struct ib_cqe *cqe, struct ib_send_wr *head); int rtrs_cq_qp_create(struct rtrs_sess *rtrs_sess, struct rtrs_con *con, - u32 max_send_sge, int cq_vector, u16 cq_size, - u16 wr_queue_size, enum ib_poll_context poll_ctx); + u32 max_send_sge, int cq_vector, int cq_size, + u32 max_send_wr, u32 max_recv_wr, + enum ib_poll_context poll_ctx); void rtrs_cq_qp_destroy(struct rtrs_con *con); void rtrs_init_hb(struct rtrs_sess *sess, struct ib_cqe *cqe, diff --git a/drivers/infiniband/ulp/rtrs/rtrs-srv-sysfs.c b/drivers/infiniband/ulp/rtrs/rtrs-srv-sysfs.c index d2edff3b8f0d..126a96e75c62 100644 --- a/drivers/infiniband/ulp/rtrs/rtrs-srv-sysfs.c +++ b/drivers/infiniband/ulp/rtrs/rtrs-srv-sysfs.c @@ -51,6 +51,8 @@ static ssize_t rtrs_srv_disconnect_store(struct kobject *kobj, sockaddr_to_str((struct sockaddr *)&sess->s.dst_addr, str, sizeof(str)); rtrs_info(s, "disconnect for path %s requested\n", str); + /* first remove sysfs itself to avoid deadlock */ + sysfs_remove_file_self(&sess->kobj, &attr->attr); close_sess(sess); return count; @@ -181,6 +183,7 @@ static int rtrs_srv_create_once_sysfs_root_folders(struct rtrs_srv_sess *sess) err = -ENOMEM; pr_err("kobject_create_and_add(): %d\n", err); device_del(&srv->dev); + put_device(&srv->dev); goto unlock; } dev_set_uevent_suppress(&srv->dev, false); @@ -206,6 +209,7 @@ rtrs_srv_destroy_once_sysfs_root_folders(struct rtrs_srv_sess *sess) kobject_put(srv->kobj_paths); mutex_unlock(&srv->paths_mutex); device_del(&srv->dev); + put_device(&srv->dev); } else { mutex_unlock(&srv->paths_mutex); } @@ -234,6 +238,7 @@ static int rtrs_srv_create_stats_files(struct rtrs_srv_sess *sess) &sess->kobj, "stats"); if (err) { rtrs_err(s, "kobject_init_and_add(): %d\n", err); + kobject_put(&sess->stats->kobj_stats); return err; } err = sysfs_create_group(&sess->stats->kobj_stats, @@ -290,8 +295,8 @@ int rtrs_srv_create_sess_files(struct rtrs_srv_sess *sess) sysfs_remove_group(&sess->kobj, &rtrs_srv_sess_attr_group); put_kobj: kobject_del(&sess->kobj); - kobject_put(&sess->kobj); destroy_root: + kobject_put(&sess->kobj); rtrs_srv_destroy_once_sysfs_root_folders(sess); return err; @@ -302,7 +307,7 @@ void rtrs_srv_destroy_sess_files(struct rtrs_srv_sess *sess) if (sess->kobj.state_in_sysfs) { kobject_del(&sess->stats->kobj_stats); kobject_put(&sess->stats->kobj_stats); - kobject_del(&sess->kobj); + sysfs_remove_group(&sess->kobj, &rtrs_srv_sess_attr_group); kobject_put(&sess->kobj); rtrs_srv_destroy_once_sysfs_root_folders(sess); diff --git a/drivers/infiniband/ulp/rtrs/rtrs-srv.c b/drivers/infiniband/ulp/rtrs/rtrs-srv.c index c42fd470c4eb..d071809e3ed2 100644 --- a/drivers/infiniband/ulp/rtrs/rtrs-srv.c +++ b/drivers/infiniband/ulp/rtrs/rtrs-srv.c @@ -222,7 +222,8 @@ static int rdma_write_sg(struct rtrs_srv_op *id) dma_addr_t dma_addr = sess->dma_addr[id->msg_id]; struct rtrs_srv_mr *srv_mr; struct rtrs_srv *srv = sess->srv; - struct ib_send_wr inv_wr, imm_wr; + struct ib_send_wr inv_wr; + struct ib_rdma_wr imm_wr; struct ib_rdma_wr *wr = NULL; enum ib_send_flags flags; size_t sg_cnt; @@ -267,21 +268,22 @@ static int rdma_write_sg(struct rtrs_srv_op *id) WARN_ON_ONCE(rkey != wr->rkey); wr->wr.opcode = IB_WR_RDMA_WRITE; + wr->wr.wr_cqe = &io_comp_cqe; wr->wr.ex.imm_data = 0; wr->wr.send_flags = 0; if (need_inval && always_invalidate) { wr->wr.next = &rwr.wr; rwr.wr.next = &inv_wr; - inv_wr.next = &imm_wr; + inv_wr.next = &imm_wr.wr; } else if (always_invalidate) { wr->wr.next = &rwr.wr; - rwr.wr.next = &imm_wr; + rwr.wr.next = &imm_wr.wr; } else if (need_inval) { wr->wr.next = &inv_wr; - inv_wr.next = &imm_wr; + inv_wr.next = &imm_wr.wr; } else { - wr->wr.next = &imm_wr; + wr->wr.next = &imm_wr.wr; } /* * From time to time we have to post signaled sends, @@ -294,16 +296,18 @@ static int rdma_write_sg(struct rtrs_srv_op *id) inv_wr.sg_list = NULL; inv_wr.num_sge = 0; inv_wr.opcode = IB_WR_SEND_WITH_INV; + inv_wr.wr_cqe = &io_comp_cqe; inv_wr.send_flags = 0; inv_wr.ex.invalidate_rkey = rkey; } - imm_wr.next = NULL; + imm_wr.wr.next = NULL; if (always_invalidate) { struct rtrs_msg_rkey_rsp *msg; srv_mr = &sess->mrs[id->msg_id]; rwr.wr.opcode = IB_WR_REG_MR; + rwr.wr.wr_cqe = &local_reg_cqe; rwr.wr.num_sge = 0; rwr.mr = srv_mr->mr; rwr.wr.send_flags = 0; @@ -318,22 +322,22 @@ static int rdma_write_sg(struct rtrs_srv_op *id) list.addr = srv_mr->iu->dma_addr; list.length = sizeof(*msg); list.lkey = sess->s.dev->ib_pd->local_dma_lkey; - imm_wr.sg_list = &list; - imm_wr.num_sge = 1; - imm_wr.opcode = IB_WR_SEND_WITH_IMM; + imm_wr.wr.sg_list = &list; + imm_wr.wr.num_sge = 1; + imm_wr.wr.opcode = IB_WR_SEND_WITH_IMM; ib_dma_sync_single_for_device(sess->s.dev->ib_dev, srv_mr->iu->dma_addr, srv_mr->iu->size, DMA_TO_DEVICE); } else { - imm_wr.sg_list = NULL; - imm_wr.num_sge = 0; - imm_wr.opcode = IB_WR_RDMA_WRITE_WITH_IMM; + imm_wr.wr.sg_list = NULL; + imm_wr.wr.num_sge = 0; + imm_wr.wr.opcode = IB_WR_RDMA_WRITE_WITH_IMM; } - imm_wr.send_flags = flags; - imm_wr.ex.imm_data = cpu_to_be32(rtrs_to_io_rsp_imm(id->msg_id, + imm_wr.wr.send_flags = flags; + imm_wr.wr.ex.imm_data = cpu_to_be32(rtrs_to_io_rsp_imm(id->msg_id, 0, need_inval)); - imm_wr.wr_cqe = &io_comp_cqe; + imm_wr.wr.wr_cqe = &io_comp_cqe; ib_dma_sync_single_for_device(sess->s.dev->ib_dev, dma_addr, offset, DMA_BIDIRECTIONAL); @@ -360,7 +364,8 @@ static int send_io_resp_imm(struct rtrs_srv_con *con, struct rtrs_srv_op *id, { struct rtrs_sess *s = con->c.sess; struct rtrs_srv_sess *sess = to_srv_sess(s); - struct ib_send_wr inv_wr, imm_wr, *wr = NULL; + struct ib_send_wr inv_wr, *wr = NULL; + struct ib_rdma_wr imm_wr; struct ib_reg_wr rwr; struct rtrs_srv *srv = sess->srv; struct rtrs_srv_mr *srv_mr; @@ -379,6 +384,7 @@ static int send_io_resp_imm(struct rtrs_srv_con *con, struct rtrs_srv_op *id, if (need_inval) { if (likely(sg_cnt)) { + inv_wr.wr_cqe = &io_comp_cqe; inv_wr.sg_list = NULL; inv_wr.num_sge = 0; inv_wr.opcode = IB_WR_SEND_WITH_INV; @@ -396,15 +402,15 @@ static int send_io_resp_imm(struct rtrs_srv_con *con, struct rtrs_srv_op *id, if (need_inval && always_invalidate) { wr = &inv_wr; inv_wr.next = &rwr.wr; - rwr.wr.next = &imm_wr; + rwr.wr.next = &imm_wr.wr; } else if (always_invalidate) { wr = &rwr.wr; - rwr.wr.next = &imm_wr; + rwr.wr.next = &imm_wr.wr; } else if (need_inval) { wr = &inv_wr; - inv_wr.next = &imm_wr; + inv_wr.next = &imm_wr.wr; } else { - wr = &imm_wr; + wr = &imm_wr.wr; } /* * From time to time we have to post signalled sends, @@ -413,14 +419,15 @@ static int send_io_resp_imm(struct rtrs_srv_con *con, struct rtrs_srv_op *id, flags = (atomic_inc_return(&con->wr_cnt) % srv->queue_depth) ? 0 : IB_SEND_SIGNALED; imm = rtrs_to_io_rsp_imm(id->msg_id, errno, need_inval); - imm_wr.next = NULL; + imm_wr.wr.next = NULL; if (always_invalidate) { struct ib_sge list; struct rtrs_msg_rkey_rsp *msg; srv_mr = &sess->mrs[id->msg_id]; - rwr.wr.next = &imm_wr; + rwr.wr.next = &imm_wr.wr; rwr.wr.opcode = IB_WR_REG_MR; + rwr.wr.wr_cqe = &local_reg_cqe; rwr.wr.num_sge = 0; rwr.wr.send_flags = 0; rwr.mr = srv_mr->mr; @@ -435,21 +442,21 @@ static int send_io_resp_imm(struct rtrs_srv_con *con, struct rtrs_srv_op *id, list.addr = srv_mr->iu->dma_addr; list.length = sizeof(*msg); list.lkey = sess->s.dev->ib_pd->local_dma_lkey; - imm_wr.sg_list = &list; - imm_wr.num_sge = 1; - imm_wr.opcode = IB_WR_SEND_WITH_IMM; + imm_wr.wr.sg_list = &list; + imm_wr.wr.num_sge = 1; + imm_wr.wr.opcode = IB_WR_SEND_WITH_IMM; ib_dma_sync_single_for_device(sess->s.dev->ib_dev, srv_mr->iu->dma_addr, srv_mr->iu->size, DMA_TO_DEVICE); } else { - imm_wr.sg_list = NULL; - imm_wr.num_sge = 0; - imm_wr.opcode = IB_WR_RDMA_WRITE_WITH_IMM; + imm_wr.wr.sg_list = NULL; + imm_wr.wr.num_sge = 0; + imm_wr.wr.opcode = IB_WR_RDMA_WRITE_WITH_IMM; } - imm_wr.send_flags = flags; - imm_wr.wr_cqe = &io_comp_cqe; + imm_wr.wr.send_flags = flags; + imm_wr.wr.wr_cqe = &io_comp_cqe; - imm_wr.ex.imm_data = cpu_to_be32(imm); + imm_wr.wr.ex.imm_data = cpu_to_be32(imm); err = ib_post_send(id->con->c.qp, wr, NULL); if (unlikely(err)) @@ -651,7 +658,7 @@ static int map_cont_bufs(struct rtrs_srv_sess *sess) if (!srv_mr->iu) { err = -ENOMEM; rtrs_err(ss, "rtrs_iu_alloc(), err: %d\n", err); - goto free_iu; + goto dereg_mr; } } /* Eventually dma addr for each chunk can be cached */ @@ -667,7 +674,6 @@ static int map_cont_bufs(struct rtrs_srv_sess *sess) srv_mr = &sess->mrs[mri]; sgt = &srv_mr->sgt; mr = srv_mr->mr; -free_iu: rtrs_iu_free(srv_mr->iu, sess->s.dev->ib_dev, 1); dereg_mr: ib_dereg_mr(mr); @@ -814,7 +820,7 @@ static int process_info_req(struct rtrs_srv_con *con, rwr[mri].wr.opcode = IB_WR_REG_MR; rwr[mri].wr.wr_cqe = &local_reg_cqe; rwr[mri].wr.num_sge = 0; - rwr[mri].wr.send_flags = mri ? 0 : IB_SEND_SIGNALED; + rwr[mri].wr.send_flags = 0; rwr[mri].mr = mr; rwr[mri].key = mr->rkey; rwr[mri].access = (IB_ACCESS_LOCAL_WRITE | @@ -1238,7 +1244,6 @@ static void rtrs_srv_rdma_done(struct ib_cq *cq, struct ib_wc *wc) case IB_WC_SEND: /* * post_send() RDMA write completions of IO reqs (read/write) - * and hb */ atomic_add(srv->queue_depth, &con->sq_wr_avail); @@ -1328,7 +1333,8 @@ static void free_srv(struct rtrs_srv *srv) } static struct rtrs_srv *get_or_create_srv(struct rtrs_srv_ctx *ctx, - const uuid_t *paths_uuid) + const uuid_t *paths_uuid, + bool first_conn) { struct rtrs_srv *srv; int i; @@ -1341,13 +1347,18 @@ static struct rtrs_srv *get_or_create_srv(struct rtrs_srv_ctx *ctx, return srv; } } + mutex_unlock(&ctx->srv_mutex); + /* + * If this request is not the first connection request from the + * client for this session then fail and return error. + */ + if (!first_conn) + return ERR_PTR(-ENXIO); /* need to allocate a new srv */ srv = kzalloc(sizeof(*srv), GFP_KERNEL); - if (!srv) { - mutex_unlock(&ctx->srv_mutex); - return NULL; - } + if (!srv) + return ERR_PTR(-ENOMEM); INIT_LIST_HEAD(&srv->paths_list); mutex_init(&srv->paths_mutex); @@ -1357,8 +1368,6 @@ static struct rtrs_srv *get_or_create_srv(struct rtrs_srv_ctx *ctx, srv->ctx = ctx; device_initialize(&srv->dev); srv->dev.release = rtrs_srv_dev_release; - list_add(&srv->ctx_list, &ctx->srv_list); - mutex_unlock(&ctx->srv_mutex); srv->chunks = kcalloc(srv->queue_depth, sizeof(*srv->chunks), GFP_KERNEL); @@ -1371,6 +1380,9 @@ static struct rtrs_srv *get_or_create_srv(struct rtrs_srv_ctx *ctx, goto err_free_chunks; } refcount_set(&srv->refcount, 1); + mutex_lock(&ctx->srv_mutex); + list_add(&srv->ctx_list, &ctx->srv_list); + mutex_unlock(&ctx->srv_mutex); return srv; @@ -1381,7 +1393,7 @@ static struct rtrs_srv *get_or_create_srv(struct rtrs_srv_ctx *ctx, err_free_srv: kfree(srv); - return NULL; + return ERR_PTR(-ENOMEM); } static void put_srv(struct rtrs_srv *srv) @@ -1461,10 +1473,12 @@ static bool __is_path_w_addr_exists(struct rtrs_srv *srv, static void free_sess(struct rtrs_srv_sess *sess) { - if (sess->kobj.state_in_sysfs) + if (sess->kobj.state_in_sysfs) { + kobject_del(&sess->kobj); kobject_put(&sess->kobj); - else + } else { kfree(sess); + } } static void rtrs_srv_close_work(struct work_struct *work) @@ -1586,7 +1600,7 @@ static int create_con(struct rtrs_srv_sess *sess, struct rtrs_sess *s = &sess->s; struct rtrs_srv_con *con; - u16 cq_size, wr_queue_size; + u32 cq_size, wr_queue_size; int err, cq_vector; con = kzalloc(sizeof(*con), GFP_KERNEL); @@ -1600,7 +1614,7 @@ static int create_con(struct rtrs_srv_sess *sess, con->c.cm_id = cm_id; con->c.sess = &sess->s; con->c.cid = cid; - atomic_set(&con->wr_cnt, 0); + atomic_set(&con->wr_cnt, 1); if (con->c.cid == 0) { /* @@ -1630,7 +1644,8 @@ static int create_con(struct rtrs_srv_sess *sess, /* TODO: SOFTIRQ can be faster, but be careful with softirq context */ err = rtrs_cq_qp_create(&sess->s, &con->c, 1, cq_vector, cq_size, - wr_queue_size, IB_POLL_WORKQUEUE); + wr_queue_size, wr_queue_size, + IB_POLL_WORKQUEUE); if (err) { rtrs_err(s, "rtrs_cq_qp_create(), err: %d\n", err); goto free_con; @@ -1781,13 +1796,9 @@ static int rtrs_rdma_connect(struct rdma_cm_id *cm_id, goto reject_w_econnreset; } recon_cnt = le16_to_cpu(msg->recon_cnt); - srv = get_or_create_srv(ctx, &msg->paths_uuid); - /* - * "refcount == 0" happens if a previous thread calls get_or_create_srv - * allocate srv, but chunks of srv are not allocated yet. - */ - if (!srv || refcount_read(&srv->refcount) == 0) { - err = -ENOMEM; + srv = get_or_create_srv(ctx, &msg->paths_uuid, msg->first_conn); + if (IS_ERR(srv)) { + err = PTR_ERR(srv); goto reject_w_err; } mutex_lock(&srv->paths_mutex); @@ -1862,8 +1873,8 @@ static int rtrs_rdma_connect(struct rdma_cm_id *cm_id, return rtrs_rdma_do_reject(cm_id, -ECONNRESET); close_and_return_err: - close_sess(sess); mutex_unlock(&srv->paths_mutex); + close_sess(sess); return err; } diff --git a/drivers/infiniband/ulp/rtrs/rtrs.c b/drivers/infiniband/ulp/rtrs/rtrs.c index 2e3a849e0a77..d13aff0aa816 100644 --- a/drivers/infiniband/ulp/rtrs/rtrs.c +++ b/drivers/infiniband/ulp/rtrs/rtrs.c @@ -182,16 +182,16 @@ int rtrs_post_rdma_write_imm_empty(struct rtrs_con *con, struct ib_cqe *cqe, u32 imm_data, enum ib_send_flags flags, struct ib_send_wr *head) { - struct ib_send_wr wr; + struct ib_rdma_wr wr; - wr = (struct ib_send_wr) { - .wr_cqe = cqe, - .send_flags = flags, - .opcode = IB_WR_RDMA_WRITE_WITH_IMM, - .ex.imm_data = cpu_to_be32(imm_data), + wr = (struct ib_rdma_wr) { + .wr.wr_cqe = cqe, + .wr.send_flags = flags, + .wr.opcode = IB_WR_RDMA_WRITE_WITH_IMM, + .wr.ex.imm_data = cpu_to_be32(imm_data), }; - return rtrs_post_send(con->qp, head, &wr); + return rtrs_post_send(con->qp, head, &wr.wr); } EXPORT_SYMBOL_GPL(rtrs_post_rdma_write_imm_empty); @@ -231,14 +231,14 @@ static int create_cq(struct rtrs_con *con, int cq_vector, u16 cq_size, } static int create_qp(struct rtrs_con *con, struct ib_pd *pd, - u16 wr_queue_size, u32 max_sge) + u32 max_send_wr, u32 max_recv_wr, u32 max_sge) { struct ib_qp_init_attr init_attr = {NULL}; struct rdma_cm_id *cm_id = con->cm_id; int ret; - init_attr.cap.max_send_wr = wr_queue_size; - init_attr.cap.max_recv_wr = wr_queue_size; + init_attr.cap.max_send_wr = max_send_wr; + init_attr.cap.max_recv_wr = max_recv_wr; init_attr.cap.max_recv_sge = 1; init_attr.event_handler = qp_event_handler; init_attr.qp_context = con; @@ -260,8 +260,9 @@ static int create_qp(struct rtrs_con *con, struct ib_pd *pd, } int rtrs_cq_qp_create(struct rtrs_sess *sess, struct rtrs_con *con, - u32 max_send_sge, int cq_vector, u16 cq_size, - u16 wr_queue_size, enum ib_poll_context poll_ctx) + u32 max_send_sge, int cq_vector, int cq_size, + u32 max_send_wr, u32 max_recv_wr, + enum ib_poll_context poll_ctx) { int err; @@ -269,7 +270,8 @@ int rtrs_cq_qp_create(struct rtrs_sess *sess, struct rtrs_con *con, if (err) return err; - err = create_qp(con, sess->dev->ib_pd, wr_queue_size, max_send_sge); + err = create_qp(con, sess->dev->ib_pd, max_send_wr, max_recv_wr, + max_send_sge); if (err) { ib_free_cq(con->cq); con->cq = NULL; @@ -308,7 +310,7 @@ void rtrs_send_hb_ack(struct rtrs_sess *sess) imm = rtrs_to_imm(RTRS_HB_ACK_IMM, 0); err = rtrs_post_rdma_write_imm_empty(usr_con, sess->hb_cqe, imm, - IB_SEND_SIGNALED, NULL); + 0, NULL); if (err) { sess->hb_err_handler(usr_con); return; @@ -337,7 +339,7 @@ static void hb_work(struct work_struct *work) } imm = rtrs_to_imm(RTRS_HB_MSG_IMM, 0); err = rtrs_post_rdma_write_imm_empty(usr_con, sess->hb_cqe, imm, - IB_SEND_SIGNALED, NULL); + 0, NULL); if (err) { sess->hb_err_handler(usr_con); return; diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c index 5492b66a8153..31f8aa2c40ed 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.c +++ b/drivers/infiniband/ulp/srp/ib_srp.c @@ -3628,7 +3628,7 @@ static ssize_t srp_create_target(struct device *dev, struct srp_rdma_ch *ch; struct srp_device *srp_dev = host->srp_dev; struct ib_device *ibdev = srp_dev->dev; - int ret, node_idx, node, cpu, i; + int ret, i, ch_idx; unsigned int max_sectors_per_mr, mr_per_cmd = 0; bool multich = false; uint32_t max_iu_len; @@ -3753,81 +3753,61 @@ static ssize_t srp_create_target(struct device *dev, goto out; ret = -ENOMEM; - if (target->ch_count == 0) + if (target->ch_count == 0) { target->ch_count = - max_t(unsigned int, num_online_nodes(), - min(ch_count ?: - min(4 * num_online_nodes(), - ibdev->num_comp_vectors), - num_online_cpus())); + min(ch_count ?: + max(4 * num_online_nodes(), + ibdev->num_comp_vectors), + num_online_cpus()); + } + target->ch = kcalloc(target->ch_count, sizeof(*target->ch), GFP_KERNEL); if (!target->ch) goto out; - node_idx = 0; - for_each_online_node(node) { - const int ch_start = (node_idx * target->ch_count / - num_online_nodes()); - const int ch_end = ((node_idx + 1) * target->ch_count / - num_online_nodes()); - const int cv_start = node_idx * ibdev->num_comp_vectors / - num_online_nodes(); - const int cv_end = (node_idx + 1) * ibdev->num_comp_vectors / - num_online_nodes(); - int cpu_idx = 0; + for (ch_idx = 0; ch_idx < target->ch_count; ++ch_idx) { + ch = &target->ch[ch_idx]; + ch->target = target; + ch->comp_vector = ch_idx % ibdev->num_comp_vectors; + spin_lock_init(&ch->lock); + INIT_LIST_HEAD(&ch->free_tx); + ret = srp_new_cm_id(ch); + if (ret) + goto err_disconnect; - for_each_online_cpu(cpu) { - if (cpu_to_node(cpu) != node) - continue; - if (ch_start + cpu_idx >= ch_end) - continue; - ch = &target->ch[ch_start + cpu_idx]; - ch->target = target; - ch->comp_vector = cv_start == cv_end ? cv_start : - cv_start + cpu_idx % (cv_end - cv_start); - spin_lock_init(&ch->lock); - INIT_LIST_HEAD(&ch->free_tx); - ret = srp_new_cm_id(ch); - if (ret) - goto err_disconnect; + ret = srp_create_ch_ib(ch); + if (ret) + goto err_disconnect; - ret = srp_create_ch_ib(ch); - if (ret) - goto err_disconnect; + ret = srp_alloc_req_data(ch); + if (ret) + goto err_disconnect; - ret = srp_alloc_req_data(ch); - if (ret) - goto err_disconnect; + ret = srp_connect_ch(ch, max_iu_len, multich); + if (ret) { + char dst[64]; - ret = srp_connect_ch(ch, max_iu_len, multich); - if (ret) { - char dst[64]; - - if (target->using_rdma_cm) - snprintf(dst, sizeof(dst), "%pIS", - &target->rdma_cm.dst); - else - snprintf(dst, sizeof(dst), "%pI6", - target->ib_cm.orig_dgid.raw); - shost_printk(KERN_ERR, target->scsi_host, - PFX "Connection %d/%d to %s failed\n", - ch_start + cpu_idx, - target->ch_count, dst); - if (node_idx == 0 && cpu_idx == 0) { - goto free_ch; - } else { - srp_free_ch_ib(target, ch); - srp_free_req_data(target, ch); - target->ch_count = ch - target->ch; - goto connected; - } + if (target->using_rdma_cm) + snprintf(dst, sizeof(dst), "%pIS", + &target->rdma_cm.dst); + else + snprintf(dst, sizeof(dst), "%pI6", + target->ib_cm.orig_dgid.raw); + shost_printk(KERN_ERR, target->scsi_host, + PFX "Connection %d/%d to %s failed\n", + ch_idx, + target->ch_count, dst); + if (ch_idx == 0) { + goto free_ch; + } else { + srp_free_ch_ib(target, ch); + srp_free_req_data(target, ch); + target->ch_count = ch - target->ch; + goto connected; } - - multich = true; - cpu_idx++; } - node_idx++; + multich = true; } connected: diff --git a/drivers/iommu/amd/Kconfig b/drivers/iommu/amd/Kconfig index 626b97d0dd21..a3cbafb603f5 100644 --- a/drivers/iommu/amd/Kconfig +++ b/drivers/iommu/amd/Kconfig @@ -10,6 +10,7 @@ config AMD_IOMMU select IOMMU_API select IOMMU_IOVA select IOMMU_DMA + select IOMMU_IO_PGTABLE depends on X86_64 && PCI && ACPI && HAVE_CMPXCHG_DOUBLE help With this option you can enable support for AMD IOMMU hardware in diff --git a/drivers/iommu/amd/Makefile b/drivers/iommu/amd/Makefile index dc5a2fa4fd37..a935f8f4b974 100644 --- a/drivers/iommu/amd/Makefile +++ b/drivers/iommu/amd/Makefile @@ -1,4 +1,4 @@ # SPDX-License-Identifier: GPL-2.0-only -obj-$(CONFIG_AMD_IOMMU) += iommu.o init.o quirks.o +obj-$(CONFIG_AMD_IOMMU) += iommu.o init.o quirks.o io_pgtable.o obj-$(CONFIG_AMD_IOMMU_DEBUGFS) += debugfs.o obj-$(CONFIG_AMD_IOMMU_V2) += iommu_v2.o diff --git a/drivers/iommu/amd/amd_iommu.h b/drivers/iommu/amd/amd_iommu.h index b4adab698563..026ce7f8d993 100644 --- a/drivers/iommu/amd/amd_iommu.h +++ b/drivers/iommu/amd/amd_iommu.h @@ -36,6 +36,7 @@ extern void amd_iommu_disable(void); extern int amd_iommu_reenable(int); extern int amd_iommu_enable_faulting(void); extern int amd_iommu_guest_ir; +extern enum io_pgtable_fmt amd_iommu_pgtable; /* IOMMUv2 specific functions */ struct iommu_domain; @@ -56,6 +57,10 @@ extern void amd_iommu_domain_direct_map(struct iommu_domain *dom); extern int amd_iommu_domain_enable_v2(struct iommu_domain *dom, int pasids); extern int amd_iommu_flush_page(struct iommu_domain *dom, u32 pasid, u64 address); +extern void amd_iommu_update_and_flush_device_table(struct protection_domain *domain); +extern void amd_iommu_domain_update(struct protection_domain *domain); +extern void amd_iommu_domain_flush_complete(struct protection_domain *domain); +extern void amd_iommu_domain_flush_tlb_pde(struct protection_domain *domain); extern int amd_iommu_flush_tlb(struct iommu_domain *dom, u32 pasid); extern int amd_iommu_domain_set_gcr3(struct iommu_domain *dom, u32 pasid, unsigned long cr3); @@ -99,6 +104,21 @@ static inline void *iommu_phys_to_virt(unsigned long paddr) return phys_to_virt(__sme_clr(paddr)); } +static inline +void amd_iommu_domain_set_pt_root(struct protection_domain *domain, u64 root) +{ + atomic64_set(&domain->iop.pt_root, root); + domain->iop.root = (u64 *)(root & PAGE_MASK); + domain->iop.mode = root & 7; /* lowest 3 bits encode pgtable mode */ +} + +static inline +void amd_iommu_domain_clr_pt_root(struct protection_domain *domain) +{ + amd_iommu_domain_set_pt_root(domain, 0); +} + + extern bool translation_pre_enabled(struct amd_iommu *iommu); extern bool amd_iommu_is_attach_deferred(struct iommu_domain *domain, struct device *dev); @@ -111,4 +131,6 @@ void amd_iommu_apply_ivrs_quirks(void); static inline void amd_iommu_apply_ivrs_quirks(void) { } #endif +extern void amd_iommu_domain_set_pgtable(struct protection_domain *domain, + u64 *root, int mode); #endif diff --git a/drivers/iommu/amd/amd_iommu_types.h b/drivers/iommu/amd/amd_iommu_types.h index 1a0495dd5fcb..6937e3674a16 100644 --- a/drivers/iommu/amd/amd_iommu_types.h +++ b/drivers/iommu/amd/amd_iommu_types.h @@ -15,6 +15,7 @@ #include #include #include +#include /* * Maximum number of IOMMUs supported @@ -252,6 +253,19 @@ #define GA_GUEST_NR 0x1 +#define IOMMU_IN_ADDR_BIT_SIZE 52 +#define IOMMU_OUT_ADDR_BIT_SIZE 52 + +/* + * This bitmap is used to advertise the page sizes our hardware support + * to the IOMMU core, which will then use this information to split + * physically contiguous memory regions it is mapping into page sizes + * that we support. + * + * 512GB Pages are not supported due to a hardware bug + */ +#define AMD_IOMMU_PGSIZES ((~0xFFFUL) & ~(2ULL << 38)) + /* Bit value definition for dte irq remapping fields*/ #define DTE_IRQ_PHYS_ADDR_MASK (((1ULL << 45)-1) << 6) #define DTE_IRQ_REMAP_INTCTL_MASK (0x3ULL << 60) @@ -470,6 +484,27 @@ struct amd_irte_ops; #define AMD_IOMMU_FLAG_TRANS_PRE_ENABLED (1 << 0) +#define io_pgtable_to_data(x) \ + container_of((x), struct amd_io_pgtable, iop) + +#define io_pgtable_ops_to_data(x) \ + io_pgtable_to_data(io_pgtable_ops_to_pgtable(x)) + +#define io_pgtable_ops_to_domain(x) \ + container_of(io_pgtable_ops_to_data(x), \ + struct protection_domain, iop) + +#define io_pgtable_cfg_to_data(x) \ + container_of((x), struct amd_io_pgtable, pgtbl_cfg) + +struct amd_io_pgtable { + struct io_pgtable_cfg pgtbl_cfg; + struct io_pgtable iop; + int mode; + u64 *root; + atomic64_t pt_root; /* pgtable root and pgtable mode */ +}; + /* * This structure contains generic data for IOMMU protection domains * independent of their use. @@ -478,9 +513,9 @@ struct protection_domain { struct list_head dev_list; /* List of all devices in this domain */ struct iommu_domain domain; /* generic domain handle used by iommu core code */ + struct amd_io_pgtable iop; spinlock_t lock; /* mostly used to lock the page table*/ u16 id; /* the domain id written to the device table */ - atomic64_t pt_root; /* pgtable root and pgtable mode */ int glx; /* Number of levels for GCR3 table */ u64 *gcr3_tbl; /* Guest CR3 table */ unsigned long flags; /* flags to find out type of domain */ @@ -488,12 +523,6 @@ struct protection_domain { unsigned dev_iommu[MAX_IOMMUS]; /* per-IOMMU reference count */ }; -/* For decocded pt_root */ -struct domain_pgtable { - int mode; - u64 *root; -}; - /* * Structure where we save information about one hardware AMD IOMMU in the * system. diff --git a/drivers/iommu/amd/init.c b/drivers/iommu/amd/init.c index 83d8ab2aed9f..9126efcbaf2c 100644 --- a/drivers/iommu/amd/init.c +++ b/drivers/iommu/amd/init.c @@ -12,6 +12,7 @@ #include #include #include +#include #include #include #include @@ -147,6 +148,8 @@ struct ivmd_header { bool amd_iommu_dump; bool amd_iommu_irq_remap __read_mostly; +enum io_pgtable_fmt amd_iommu_pgtable = AMD_IOMMU_V1; + int amd_iommu_guest_ir = AMD_IOMMU_GUEST_IR_VAPIC; static int amd_iommu_xt_mode = IRQ_REMAP_XAPIC_MODE; @@ -254,6 +257,8 @@ static enum iommu_init_state init_state = IOMMU_START_STATE; static int amd_iommu_enable_interrupts(void); static int __init iommu_go_to_state(enum iommu_init_state state); static void init_device_table_dma(void); +static int iommu_pc_get_set_reg(struct amd_iommu *iommu, u8 bank, u8 cntr, + u8 fxn, u64 *value, bool is_write); static bool amd_iommu_pre_enabled = true; @@ -1712,13 +1717,11 @@ static int __init init_iommu_all(struct acpi_table_header *table) return 0; } -static int iommu_pc_get_set_reg(struct amd_iommu *iommu, u8 bank, u8 cntr, - u8 fxn, u64 *value, bool is_write); - -static void init_iommu_perf_ctr(struct amd_iommu *iommu) +static void __init init_iommu_perf_ctr(struct amd_iommu *iommu) { + int retry; struct pci_dev *pdev = iommu->dev; - u64 val = 0xabcd, val2 = 0, save_reg = 0; + u64 val = 0xabcd, val2 = 0, save_reg, save_src; if (!iommu_feature(iommu, FEATURE_PC)) return; @@ -1726,17 +1729,39 @@ static void init_iommu_perf_ctr(struct amd_iommu *iommu) amd_iommu_pc_present = true; /* save the value to restore, if writable */ - if (iommu_pc_get_set_reg(iommu, 0, 0, 0, &save_reg, false)) + if (iommu_pc_get_set_reg(iommu, 0, 0, 0, &save_reg, false) || + iommu_pc_get_set_reg(iommu, 0, 0, 8, &save_src, false)) + goto pc_false; + + /* + * Disable power gating by programing the performance counter + * source to 20 (i.e. counts the reads and writes from/to IOMMU + * Reserved Register [MMIO Offset 1FF8h] that are ignored.), + * which never get incremented during this init phase. + * (Note: The event is also deprecated.) + */ + val = 20; + if (iommu_pc_get_set_reg(iommu, 0, 0, 8, &val, true)) goto pc_false; /* Check if the performance counters can be written to */ - if ((iommu_pc_get_set_reg(iommu, 0, 0, 0, &val, true)) || - (iommu_pc_get_set_reg(iommu, 0, 0, 0, &val2, false)) || - (val != val2)) - goto pc_false; + val = 0xabcd; + for (retry = 5; retry; retry--) { + if (iommu_pc_get_set_reg(iommu, 0, 0, 0, &val, true) || + iommu_pc_get_set_reg(iommu, 0, 0, 0, &val2, false) || + val2) + break; + + /* Wait about 20 msec for power gating to disable and retry. */ + msleep(20); + } /* restore */ - if (iommu_pc_get_set_reg(iommu, 0, 0, 0, &save_reg, true)) + if (iommu_pc_get_set_reg(iommu, 0, 0, 0, &save_reg, true) || + iommu_pc_get_set_reg(iommu, 0, 0, 8, &save_src, true)) + goto pc_false; + + if (val != val2) goto pc_false; pci_info(pdev, "IOMMU performance counters supported\n"); @@ -1928,7 +1953,7 @@ static void print_iommu_info(void) struct pci_dev *pdev = iommu->dev; int i; - pci_info(pdev, "Found IOMMU cap 0x%hx\n", iommu->cap_ptr); + pci_info(pdev, "Found IOMMU cap 0x%x\n", iommu->cap_ptr); if (iommu->cap & (1 << IOMMU_CAP_EFR)) { pci_info(pdev, "Extended features (%#llx):", @@ -1956,7 +1981,7 @@ static void print_iommu_info(void) static int __init amd_iommu_init_pci(void) { struct amd_iommu *iommu; - int ret = 0; + int ret; for_each_iommu(iommu) { ret = iommu_init_pci(iommu); @@ -2687,8 +2712,8 @@ static void __init ivinfo_init(void *ivrs) static int __init early_amd_iommu_init(void) { struct acpi_table_header *ivrs_base; + int i, remap_cache_sz, ret; acpi_status status; - int i, remap_cache_sz, ret = 0; u32 pci_id; if (!amd_iommu_detected) @@ -2832,7 +2857,6 @@ static int __init early_amd_iommu_init(void) out: /* Don't leak any ACPI memory */ acpi_put_table(ivrs_base); - ivrs_base = NULL; return ret; } diff --git a/drivers/iommu/amd/io_pgtable.c b/drivers/iommu/amd/io_pgtable.c new file mode 100644 index 000000000000..1c4961e05c12 --- /dev/null +++ b/drivers/iommu/amd/io_pgtable.c @@ -0,0 +1,558 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * CPU-agnostic AMD IO page table allocator. + * + * Copyright (C) 2020 Advanced Micro Devices, Inc. + * Author: Suravee Suthikulpanit + */ + +#define pr_fmt(fmt) "AMD-Vi: " fmt +#define dev_fmt(fmt) pr_fmt(fmt) + +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include "amd_iommu_types.h" +#include "amd_iommu.h" + +static void v1_tlb_flush_all(void *cookie) +{ +} + +static void v1_tlb_flush_walk(unsigned long iova, size_t size, + size_t granule, void *cookie) +{ +} + +static void v1_tlb_add_page(struct iommu_iotlb_gather *gather, + unsigned long iova, size_t granule, + void *cookie) +{ +} + +static const struct iommu_flush_ops v1_flush_ops = { + .tlb_flush_all = v1_tlb_flush_all, + .tlb_flush_walk = v1_tlb_flush_walk, + .tlb_add_page = v1_tlb_add_page, +}; + +/* + * Helper function to get the first pte of a large mapping + */ +static u64 *first_pte_l7(u64 *pte, unsigned long *page_size, + unsigned long *count) +{ + unsigned long pte_mask, pg_size, cnt; + u64 *fpte; + + pg_size = PTE_PAGE_SIZE(*pte); + cnt = PAGE_SIZE_PTE_COUNT(pg_size); + pte_mask = ~((cnt << 3) - 1); + fpte = (u64 *)(((unsigned long)pte) & pte_mask); + + if (page_size) + *page_size = pg_size; + + if (count) + *count = cnt; + + return fpte; +} + +/**************************************************************************** + * + * The functions below are used the create the page table mappings for + * unity mapped regions. + * + ****************************************************************************/ + +static void free_page_list(struct page *freelist) +{ + while (freelist != NULL) { + unsigned long p = (unsigned long)page_address(freelist); + + freelist = freelist->freelist; + free_page(p); + } +} + +static struct page *free_pt_page(unsigned long pt, struct page *freelist) +{ + struct page *p = virt_to_page((void *)pt); + + p->freelist = freelist; + + return p; +} + +#define DEFINE_FREE_PT_FN(LVL, FN) \ +static struct page *free_pt_##LVL (unsigned long __pt, struct page *freelist) \ +{ \ + unsigned long p; \ + u64 *pt; \ + int i; \ + \ + pt = (u64 *)__pt; \ + \ + for (i = 0; i < 512; ++i) { \ + /* PTE present? */ \ + if (!IOMMU_PTE_PRESENT(pt[i])) \ + continue; \ + \ + /* Large PTE? */ \ + if (PM_PTE_LEVEL(pt[i]) == 0 || \ + PM_PTE_LEVEL(pt[i]) == 7) \ + continue; \ + \ + p = (unsigned long)IOMMU_PTE_PAGE(pt[i]); \ + freelist = FN(p, freelist); \ + } \ + \ + return free_pt_page((unsigned long)pt, freelist); \ +} + +DEFINE_FREE_PT_FN(l2, free_pt_page) +DEFINE_FREE_PT_FN(l3, free_pt_l2) +DEFINE_FREE_PT_FN(l4, free_pt_l3) +DEFINE_FREE_PT_FN(l5, free_pt_l4) +DEFINE_FREE_PT_FN(l6, free_pt_l5) + +static struct page *free_sub_pt(unsigned long root, int mode, + struct page *freelist) +{ + switch (mode) { + case PAGE_MODE_NONE: + case PAGE_MODE_7_LEVEL: + break; + case PAGE_MODE_1_LEVEL: + freelist = free_pt_page(root, freelist); + break; + case PAGE_MODE_2_LEVEL: + freelist = free_pt_l2(root, freelist); + break; + case PAGE_MODE_3_LEVEL: + freelist = free_pt_l3(root, freelist); + break; + case PAGE_MODE_4_LEVEL: + freelist = free_pt_l4(root, freelist); + break; + case PAGE_MODE_5_LEVEL: + freelist = free_pt_l5(root, freelist); + break; + case PAGE_MODE_6_LEVEL: + freelist = free_pt_l6(root, freelist); + break; + default: + BUG(); + } + + return freelist; +} + +void amd_iommu_domain_set_pgtable(struct protection_domain *domain, + u64 *root, int mode) +{ + u64 pt_root; + + /* lowest 3 bits encode pgtable mode */ + pt_root = mode & 7; + pt_root |= (u64)root; + + amd_iommu_domain_set_pt_root(domain, pt_root); +} + +/* + * This function is used to add another level to an IO page table. Adding + * another level increases the size of the address space by 9 bits to a size up + * to 64 bits. + */ +static bool increase_address_space(struct protection_domain *domain, + unsigned long address, + gfp_t gfp) +{ + unsigned long flags; + bool ret = true; + u64 *pte; + + spin_lock_irqsave(&domain->lock, flags); + + if (address <= PM_LEVEL_SIZE(domain->iop.mode)) + goto out; + + ret = false; + if (WARN_ON_ONCE(domain->iop.mode == PAGE_MODE_6_LEVEL)) + goto out; + + pte = (void *)get_zeroed_page(gfp); + if (!pte) + goto out; + + *pte = PM_LEVEL_PDE(domain->iop.mode, iommu_virt_to_phys(domain->iop.root)); + + domain->iop.root = pte; + domain->iop.mode += 1; + amd_iommu_update_and_flush_device_table(domain); + amd_iommu_domain_flush_complete(domain); + + /* + * Device Table needs to be updated and flushed before the new root can + * be published. + */ + amd_iommu_domain_set_pgtable(domain, pte, domain->iop.mode); + + ret = true; + +out: + spin_unlock_irqrestore(&domain->lock, flags); + + return ret; +} + +static u64 *alloc_pte(struct protection_domain *domain, + unsigned long address, + unsigned long page_size, + u64 **pte_page, + gfp_t gfp, + bool *updated) +{ + int level, end_lvl; + u64 *pte, *page; + + BUG_ON(!is_power_of_2(page_size)); + + while (address > PM_LEVEL_SIZE(domain->iop.mode)) { + /* + * Return an error if there is no memory to update the + * page-table. + */ + if (!increase_address_space(domain, address, gfp)) + return NULL; + } + + + level = domain->iop.mode - 1; + pte = &domain->iop.root[PM_LEVEL_INDEX(level, address)]; + address = PAGE_SIZE_ALIGN(address, page_size); + end_lvl = PAGE_SIZE_LEVEL(page_size); + + while (level > end_lvl) { + u64 __pte, __npte; + int pte_level; + + __pte = *pte; + pte_level = PM_PTE_LEVEL(__pte); + + /* + * If we replace a series of large PTEs, we need + * to tear down all of them. + */ + if (IOMMU_PTE_PRESENT(__pte) && + pte_level == PAGE_MODE_7_LEVEL) { + unsigned long count, i; + u64 *lpte; + + lpte = first_pte_l7(pte, NULL, &count); + + /* + * Unmap the replicated PTEs that still match the + * original large mapping + */ + for (i = 0; i < count; ++i) + cmpxchg64(&lpte[i], __pte, 0ULL); + + *updated = true; + continue; + } + + if (!IOMMU_PTE_PRESENT(__pte) || + pte_level == PAGE_MODE_NONE) { + page = (u64 *)get_zeroed_page(gfp); + + if (!page) + return NULL; + + __npte = PM_LEVEL_PDE(level, iommu_virt_to_phys(page)); + + /* pte could have been changed somewhere. */ + if (cmpxchg64(pte, __pte, __npte) != __pte) + free_page((unsigned long)page); + else if (IOMMU_PTE_PRESENT(__pte)) + *updated = true; + + continue; + } + + /* No level skipping support yet */ + if (pte_level != level) + return NULL; + + level -= 1; + + pte = IOMMU_PTE_PAGE(__pte); + + if (pte_page && level == end_lvl) + *pte_page = pte; + + pte = &pte[PM_LEVEL_INDEX(level, address)]; + } + + return pte; +} + +/* + * This function checks if there is a PTE for a given dma address. If + * there is one, it returns the pointer to it. + */ +static u64 *fetch_pte(struct amd_io_pgtable *pgtable, + unsigned long address, + unsigned long *page_size) +{ + int level; + u64 *pte; + + *page_size = 0; + + if (address > PM_LEVEL_SIZE(pgtable->mode)) + return NULL; + + level = pgtable->mode - 1; + pte = &pgtable->root[PM_LEVEL_INDEX(level, address)]; + *page_size = PTE_LEVEL_PAGE_SIZE(level); + + while (level > 0) { + + /* Not Present */ + if (!IOMMU_PTE_PRESENT(*pte)) + return NULL; + + /* Large PTE */ + if (PM_PTE_LEVEL(*pte) == 7 || + PM_PTE_LEVEL(*pte) == 0) + break; + + /* No level skipping support yet */ + if (PM_PTE_LEVEL(*pte) != level) + return NULL; + + level -= 1; + + /* Walk to the next level */ + pte = IOMMU_PTE_PAGE(*pte); + pte = &pte[PM_LEVEL_INDEX(level, address)]; + *page_size = PTE_LEVEL_PAGE_SIZE(level); + } + + /* + * If we have a series of large PTEs, make + * sure to return a pointer to the first one. + */ + if (PM_PTE_LEVEL(*pte) == PAGE_MODE_7_LEVEL) + pte = first_pte_l7(pte, page_size, NULL); + + return pte; +} + +static struct page *free_clear_pte(u64 *pte, u64 pteval, struct page *freelist) +{ + unsigned long pt; + int mode; + + while (cmpxchg64(pte, pteval, 0) != pteval) { + pr_warn("AMD-Vi: IOMMU pte changed since we read it\n"); + pteval = *pte; + } + + if (!IOMMU_PTE_PRESENT(pteval)) + return freelist; + + pt = (unsigned long)IOMMU_PTE_PAGE(pteval); + mode = IOMMU_PTE_MODE(pteval); + + return free_sub_pt(pt, mode, freelist); +} + +/* + * Generic mapping functions. It maps a physical address into a DMA + * address space. It allocates the page table pages if necessary. + * In the future it can be extended to a generic mapping function + * supporting all features of AMD IOMMU page tables like level skipping + * and full 64 bit address spaces. + */ +static int iommu_v1_map_page(struct io_pgtable_ops *ops, unsigned long iova, + phys_addr_t paddr, size_t size, int prot, gfp_t gfp) +{ + struct protection_domain *dom = io_pgtable_ops_to_domain(ops); + struct page *freelist = NULL; + bool updated = false; + u64 __pte, *pte; + int ret, i, count; + + BUG_ON(!IS_ALIGNED(iova, size)); + BUG_ON(!IS_ALIGNED(paddr, size)); + + ret = -EINVAL; + if (!(prot & IOMMU_PROT_MASK)) + goto out; + + count = PAGE_SIZE_PTE_COUNT(size); + pte = alloc_pte(dom, iova, size, NULL, gfp, &updated); + + ret = -ENOMEM; + if (!pte) + goto out; + + for (i = 0; i < count; ++i) + freelist = free_clear_pte(&pte[i], pte[i], freelist); + + if (freelist != NULL) + updated = true; + + if (count > 1) { + __pte = PAGE_SIZE_PTE(__sme_set(paddr), size); + __pte |= PM_LEVEL_ENC(7) | IOMMU_PTE_PR | IOMMU_PTE_FC; + } else + __pte = __sme_set(paddr) | IOMMU_PTE_PR | IOMMU_PTE_FC; + + if (prot & IOMMU_PROT_IR) + __pte |= IOMMU_PTE_IR; + if (prot & IOMMU_PROT_IW) + __pte |= IOMMU_PTE_IW; + + for (i = 0; i < count; ++i) + pte[i] = __pte; + + ret = 0; + +out: + if (updated) { + unsigned long flags; + + spin_lock_irqsave(&dom->lock, flags); + /* + * Flush domain TLB(s) and wait for completion. Any Device-Table + * Updates and flushing already happened in + * increase_address_space(). + */ + amd_iommu_domain_flush_tlb_pde(dom); + amd_iommu_domain_flush_complete(dom); + spin_unlock_irqrestore(&dom->lock, flags); + } + + /* Everything flushed out, free pages now */ + free_page_list(freelist); + + return ret; +} + +static unsigned long iommu_v1_unmap_page(struct io_pgtable_ops *ops, + unsigned long iova, + size_t size, + struct iommu_iotlb_gather *gather) +{ + struct amd_io_pgtable *pgtable = io_pgtable_ops_to_data(ops); + unsigned long long unmapped; + unsigned long unmap_size; + u64 *pte; + + BUG_ON(!is_power_of_2(size)); + + unmapped = 0; + + while (unmapped < size) { + pte = fetch_pte(pgtable, iova, &unmap_size); + if (pte) { + int i, count; + + count = PAGE_SIZE_PTE_COUNT(unmap_size); + for (i = 0; i < count; i++) + pte[i] = 0ULL; + } + + iova = (iova & ~(unmap_size - 1)) + unmap_size; + unmapped += unmap_size; + } + + BUG_ON(unmapped && !is_power_of_2(unmapped)); + + return unmapped; +} + +static phys_addr_t iommu_v1_iova_to_phys(struct io_pgtable_ops *ops, unsigned long iova) +{ + struct amd_io_pgtable *pgtable = io_pgtable_ops_to_data(ops); + unsigned long offset_mask, pte_pgsize; + u64 *pte, __pte; + + if (pgtable->mode == PAGE_MODE_NONE) + return iova; + + pte = fetch_pte(pgtable, iova, &pte_pgsize); + + if (!pte || !IOMMU_PTE_PRESENT(*pte)) + return 0; + + offset_mask = pte_pgsize - 1; + __pte = __sme_clr(*pte & PM_ADDR_MASK); + + return (__pte & ~offset_mask) | (iova & offset_mask); +} + +/* + * ---------------------------------------------------- + */ +static void v1_free_pgtable(struct io_pgtable *iop) +{ + struct amd_io_pgtable *pgtable = container_of(iop, struct amd_io_pgtable, iop); + struct protection_domain *dom; + struct page *freelist = NULL; + unsigned long root; + + if (pgtable->mode == PAGE_MODE_NONE) + return; + + dom = container_of(pgtable, struct protection_domain, iop); + + /* Update data structure */ + amd_iommu_domain_clr_pt_root(dom); + + /* Make changes visible to IOMMUs */ + amd_iommu_domain_update(dom); + + /* Page-table is not visible to IOMMU anymore, so free it */ + BUG_ON(pgtable->mode < PAGE_MODE_NONE || + pgtable->mode > PAGE_MODE_6_LEVEL); + + root = (unsigned long)pgtable->root; + freelist = free_sub_pt(root, pgtable->mode, freelist); + + free_page_list(freelist); +} + +static struct io_pgtable *v1_alloc_pgtable(struct io_pgtable_cfg *cfg, void *cookie) +{ + struct amd_io_pgtable *pgtable = io_pgtable_cfg_to_data(cfg); + + cfg->pgsize_bitmap = AMD_IOMMU_PGSIZES, + cfg->ias = IOMMU_IN_ADDR_BIT_SIZE, + cfg->oas = IOMMU_OUT_ADDR_BIT_SIZE, + cfg->tlb = &v1_flush_ops; + + pgtable->iop.ops.map = iommu_v1_map_page; + pgtable->iop.ops.unmap = iommu_v1_unmap_page; + pgtable->iop.ops.iova_to_phys = iommu_v1_iova_to_phys; + + return &pgtable->iop; +} + +struct io_pgtable_init_fns io_pgtable_amd_iommu_v1_init_fns = { + .alloc = v1_alloc_pgtable, + .free = v1_free_pgtable, +}; diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c index f0adbc48fd17..a69a8b573e40 100644 --- a/drivers/iommu/amd/iommu.c +++ b/drivers/iommu/amd/iommu.c @@ -31,6 +31,7 @@ #include #include #include +#include #include #include #include @@ -57,16 +58,6 @@ #define HT_RANGE_START (0xfd00000000ULL) #define HT_RANGE_END (0xffffffffffULL) -/* - * This bitmap is used to advertise the page sizes our hardware support - * to the IOMMU core, which will then use this information to split - * physically contiguous memory regions it is mapping into page sizes - * that we support. - * - * 512GB Pages are not supported due to a hardware bug - */ -#define AMD_IOMMU_PGSIZES ((~0xFFFUL) & ~(2ULL << 38)) - #define DEFAULT_PGTABLE_LEVEL PAGE_MODE_3_LEVEL static DEFINE_SPINLOCK(pd_bitmap_lock); @@ -96,10 +87,7 @@ struct iommu_cmd { struct kmem_cache *amd_iommu_irq_cache; -static void update_domain(struct protection_domain *domain); static void detach_device(struct device *dev); -static void update_and_flush_device_table(struct protection_domain *domain, - struct domain_pgtable *pgtable); /**************************************************************************** * @@ -151,37 +139,6 @@ static struct protection_domain *to_pdomain(struct iommu_domain *dom) return container_of(dom, struct protection_domain, domain); } -static void amd_iommu_domain_get_pgtable(struct protection_domain *domain, - struct domain_pgtable *pgtable) -{ - u64 pt_root = atomic64_read(&domain->pt_root); - - pgtable->root = (u64 *)(pt_root & PAGE_MASK); - pgtable->mode = pt_root & 7; /* lowest 3 bits encode pgtable mode */ -} - -static void amd_iommu_domain_set_pt_root(struct protection_domain *domain, u64 root) -{ - atomic64_set(&domain->pt_root, root); -} - -static void amd_iommu_domain_clr_pt_root(struct protection_domain *domain) -{ - amd_iommu_domain_set_pt_root(domain, 0); -} - -static void amd_iommu_domain_set_pgtable(struct protection_domain *domain, - u64 *root, int mode) -{ - u64 pt_root; - - /* lowest 3 bits encode pgtable mode */ - pt_root = mode & 7; - pt_root |= (u64)root; - - amd_iommu_domain_set_pt_root(domain, pt_root); -} - static struct iommu_dev_data *alloc_dev_data(u16 devid) { struct iommu_dev_data *dev_data; @@ -437,29 +394,6 @@ static void amd_iommu_uninit_device(struct device *dev) */ } -/* - * Helper function to get the first pte of a large mapping - */ -static u64 *first_pte_l7(u64 *pte, unsigned long *page_size, - unsigned long *count) -{ - unsigned long pte_mask, pg_size, cnt; - u64 *fpte; - - pg_size = PTE_PAGE_SIZE(*pte); - cnt = PAGE_SIZE_PTE_COUNT(pg_size); - pte_mask = ~((cnt << 3) - 1); - fpte = (u64 *)(((unsigned long)pte) & pte_mask); - - if (page_size) - *page_size = pg_size; - - if (count) - *count = cnt; - - return fpte; -} - /**************************************************************************** * * Interrupt handling functions @@ -1335,12 +1269,12 @@ static void domain_flush_pages(struct protection_domain *domain, } /* Flush the whole IO/TLB for a given protection domain - including PDE */ -static void domain_flush_tlb_pde(struct protection_domain *domain) +void amd_iommu_domain_flush_tlb_pde(struct protection_domain *domain) { __domain_flush_pages(domain, 0, CMD_INV_IOMMU_ALL_PAGES_ADDRESS, 1); } -static void domain_flush_complete(struct protection_domain *domain) +void amd_iommu_domain_flush_complete(struct protection_domain *domain) { int i; @@ -1365,7 +1299,7 @@ static void domain_flush_np_cache(struct protection_domain *domain, spin_lock_irqsave(&domain->lock, flags); domain_flush_pages(domain, iova, size); - domain_flush_complete(domain); + amd_iommu_domain_flush_complete(domain); spin_unlock_irqrestore(&domain->lock, flags); } } @@ -1382,443 +1316,6 @@ static void domain_flush_devices(struct protection_domain *domain) device_flush_dte(dev_data); } -/**************************************************************************** - * - * The functions below are used the create the page table mappings for - * unity mapped regions. - * - ****************************************************************************/ - -static void free_page_list(struct page *freelist) -{ - while (freelist != NULL) { - unsigned long p = (unsigned long)page_address(freelist); - freelist = freelist->freelist; - free_page(p); - } -} - -static struct page *free_pt_page(unsigned long pt, struct page *freelist) -{ - struct page *p = virt_to_page((void *)pt); - - p->freelist = freelist; - - return p; -} - -#define DEFINE_FREE_PT_FN(LVL, FN) \ -static struct page *free_pt_##LVL (unsigned long __pt, struct page *freelist) \ -{ \ - unsigned long p; \ - u64 *pt; \ - int i; \ - \ - pt = (u64 *)__pt; \ - \ - for (i = 0; i < 512; ++i) { \ - /* PTE present? */ \ - if (!IOMMU_PTE_PRESENT(pt[i])) \ - continue; \ - \ - /* Large PTE? */ \ - if (PM_PTE_LEVEL(pt[i]) == 0 || \ - PM_PTE_LEVEL(pt[i]) == 7) \ - continue; \ - \ - p = (unsigned long)IOMMU_PTE_PAGE(pt[i]); \ - freelist = FN(p, freelist); \ - } \ - \ - return free_pt_page((unsigned long)pt, freelist); \ -} - -DEFINE_FREE_PT_FN(l2, free_pt_page) -DEFINE_FREE_PT_FN(l3, free_pt_l2) -DEFINE_FREE_PT_FN(l4, free_pt_l3) -DEFINE_FREE_PT_FN(l5, free_pt_l4) -DEFINE_FREE_PT_FN(l6, free_pt_l5) - -static struct page *free_sub_pt(unsigned long root, int mode, - struct page *freelist) -{ - switch (mode) { - case PAGE_MODE_NONE: - case PAGE_MODE_7_LEVEL: - break; - case PAGE_MODE_1_LEVEL: - freelist = free_pt_page(root, freelist); - break; - case PAGE_MODE_2_LEVEL: - freelist = free_pt_l2(root, freelist); - break; - case PAGE_MODE_3_LEVEL: - freelist = free_pt_l3(root, freelist); - break; - case PAGE_MODE_4_LEVEL: - freelist = free_pt_l4(root, freelist); - break; - case PAGE_MODE_5_LEVEL: - freelist = free_pt_l5(root, freelist); - break; - case PAGE_MODE_6_LEVEL: - freelist = free_pt_l6(root, freelist); - break; - default: - BUG(); - } - - return freelist; -} - -static void free_pagetable(struct domain_pgtable *pgtable) -{ - struct page *freelist = NULL; - unsigned long root; - - if (pgtable->mode == PAGE_MODE_NONE) - return; - - BUG_ON(pgtable->mode < PAGE_MODE_NONE || - pgtable->mode > PAGE_MODE_6_LEVEL); - - root = (unsigned long)pgtable->root; - freelist = free_sub_pt(root, pgtable->mode, freelist); - - free_page_list(freelist); -} - -/* - * This function is used to add another level to an IO page table. Adding - * another level increases the size of the address space by 9 bits to a size up - * to 64 bits. - */ -static bool increase_address_space(struct protection_domain *domain, - unsigned long address, - gfp_t gfp) -{ - struct domain_pgtable pgtable; - unsigned long flags; - bool ret = true; - u64 *pte; - - spin_lock_irqsave(&domain->lock, flags); - - amd_iommu_domain_get_pgtable(domain, &pgtable); - - if (address <= PM_LEVEL_SIZE(pgtable.mode)) - goto out; - - ret = false; - if (WARN_ON_ONCE(pgtable.mode == PAGE_MODE_6_LEVEL)) - goto out; - - pte = (void *)get_zeroed_page(gfp); - if (!pte) - goto out; - - *pte = PM_LEVEL_PDE(pgtable.mode, iommu_virt_to_phys(pgtable.root)); - - pgtable.root = pte; - pgtable.mode += 1; - update_and_flush_device_table(domain, &pgtable); - domain_flush_complete(domain); - - /* - * Device Table needs to be updated and flushed before the new root can - * be published. - */ - amd_iommu_domain_set_pgtable(domain, pte, pgtable.mode); - - ret = true; - -out: - spin_unlock_irqrestore(&domain->lock, flags); - - return ret; -} - -static u64 *alloc_pte(struct protection_domain *domain, - unsigned long address, - unsigned long page_size, - u64 **pte_page, - gfp_t gfp, - bool *updated) -{ - struct domain_pgtable pgtable; - int level, end_lvl; - u64 *pte, *page; - - BUG_ON(!is_power_of_2(page_size)); - - amd_iommu_domain_get_pgtable(domain, &pgtable); - - while (address > PM_LEVEL_SIZE(pgtable.mode)) { - /* - * Return an error if there is no memory to update the - * page-table. - */ - if (!increase_address_space(domain, address, gfp)) - return NULL; - - /* Read new values to check if update was successful */ - amd_iommu_domain_get_pgtable(domain, &pgtable); - } - - - level = pgtable.mode - 1; - pte = &pgtable.root[PM_LEVEL_INDEX(level, address)]; - address = PAGE_SIZE_ALIGN(address, page_size); - end_lvl = PAGE_SIZE_LEVEL(page_size); - - while (level > end_lvl) { - u64 __pte, __npte; - int pte_level; - - __pte = *pte; - pte_level = PM_PTE_LEVEL(__pte); - - /* - * If we replace a series of large PTEs, we need - * to tear down all of them. - */ - if (IOMMU_PTE_PRESENT(__pte) && - pte_level == PAGE_MODE_7_LEVEL) { - unsigned long count, i; - u64 *lpte; - - lpte = first_pte_l7(pte, NULL, &count); - - /* - * Unmap the replicated PTEs that still match the - * original large mapping - */ - for (i = 0; i < count; ++i) - cmpxchg64(&lpte[i], __pte, 0ULL); - - *updated = true; - continue; - } - - if (!IOMMU_PTE_PRESENT(__pte) || - pte_level == PAGE_MODE_NONE) { - page = (u64 *)get_zeroed_page(gfp); - - if (!page) - return NULL; - - __npte = PM_LEVEL_PDE(level, iommu_virt_to_phys(page)); - - /* pte could have been changed somewhere. */ - if (cmpxchg64(pte, __pte, __npte) != __pte) - free_page((unsigned long)page); - else if (IOMMU_PTE_PRESENT(__pte)) - *updated = true; - - continue; - } - - /* No level skipping support yet */ - if (pte_level != level) - return NULL; - - level -= 1; - - pte = IOMMU_PTE_PAGE(__pte); - - if (pte_page && level == end_lvl) - *pte_page = pte; - - pte = &pte[PM_LEVEL_INDEX(level, address)]; - } - - return pte; -} - -/* - * This function checks if there is a PTE for a given dma address. If - * there is one, it returns the pointer to it. - */ -static u64 *fetch_pte(struct protection_domain *domain, - unsigned long address, - unsigned long *page_size) -{ - struct domain_pgtable pgtable; - int level; - u64 *pte; - - *page_size = 0; - - amd_iommu_domain_get_pgtable(domain, &pgtable); - - if (address > PM_LEVEL_SIZE(pgtable.mode)) - return NULL; - - level = pgtable.mode - 1; - pte = &pgtable.root[PM_LEVEL_INDEX(level, address)]; - *page_size = PTE_LEVEL_PAGE_SIZE(level); - - while (level > 0) { - - /* Not Present */ - if (!IOMMU_PTE_PRESENT(*pte)) - return NULL; - - /* Large PTE */ - if (PM_PTE_LEVEL(*pte) == 7 || - PM_PTE_LEVEL(*pte) == 0) - break; - - /* No level skipping support yet */ - if (PM_PTE_LEVEL(*pte) != level) - return NULL; - - level -= 1; - - /* Walk to the next level */ - pte = IOMMU_PTE_PAGE(*pte); - pte = &pte[PM_LEVEL_INDEX(level, address)]; - *page_size = PTE_LEVEL_PAGE_SIZE(level); - } - - /* - * If we have a series of large PTEs, make - * sure to return a pointer to the first one. - */ - if (PM_PTE_LEVEL(*pte) == PAGE_MODE_7_LEVEL) - pte = first_pte_l7(pte, page_size, NULL); - - return pte; -} - -static struct page *free_clear_pte(u64 *pte, u64 pteval, struct page *freelist) -{ - unsigned long pt; - int mode; - - while (cmpxchg64(pte, pteval, 0) != pteval) { - pr_warn("AMD-Vi: IOMMU pte changed since we read it\n"); - pteval = *pte; - } - - if (!IOMMU_PTE_PRESENT(pteval)) - return freelist; - - pt = (unsigned long)IOMMU_PTE_PAGE(pteval); - mode = IOMMU_PTE_MODE(pteval); - - return free_sub_pt(pt, mode, freelist); -} - -/* - * Generic mapping functions. It maps a physical address into a DMA - * address space. It allocates the page table pages if necessary. - * In the future it can be extended to a generic mapping function - * supporting all features of AMD IOMMU page tables like level skipping - * and full 64 bit address spaces. - */ -static int iommu_map_page(struct protection_domain *dom, - unsigned long bus_addr, - unsigned long phys_addr, - unsigned long page_size, - int prot, - gfp_t gfp) -{ - struct page *freelist = NULL; - bool updated = false; - u64 __pte, *pte; - int ret, i, count; - - BUG_ON(!IS_ALIGNED(bus_addr, page_size)); - BUG_ON(!IS_ALIGNED(phys_addr, page_size)); - - ret = -EINVAL; - if (!(prot & IOMMU_PROT_MASK)) - goto out; - - count = PAGE_SIZE_PTE_COUNT(page_size); - pte = alloc_pte(dom, bus_addr, page_size, NULL, gfp, &updated); - - ret = -ENOMEM; - if (!pte) - goto out; - - for (i = 0; i < count; ++i) - freelist = free_clear_pte(&pte[i], pte[i], freelist); - - if (freelist != NULL) - updated = true; - - if (count > 1) { - __pte = PAGE_SIZE_PTE(__sme_set(phys_addr), page_size); - __pte |= PM_LEVEL_ENC(7) | IOMMU_PTE_PR | IOMMU_PTE_FC; - } else - __pte = __sme_set(phys_addr) | IOMMU_PTE_PR | IOMMU_PTE_FC; - - if (prot & IOMMU_PROT_IR) - __pte |= IOMMU_PTE_IR; - if (prot & IOMMU_PROT_IW) - __pte |= IOMMU_PTE_IW; - - for (i = 0; i < count; ++i) - pte[i] = __pte; - - ret = 0; - -out: - if (updated) { - unsigned long flags; - - spin_lock_irqsave(&dom->lock, flags); - /* - * Flush domain TLB(s) and wait for completion. Any Device-Table - * Updates and flushing already happened in - * increase_address_space(). - */ - domain_flush_tlb_pde(dom); - domain_flush_complete(dom); - spin_unlock_irqrestore(&dom->lock, flags); - } - - /* Everything flushed out, free pages now */ - free_page_list(freelist); - - return ret; -} - -static unsigned long iommu_unmap_page(struct protection_domain *dom, - unsigned long bus_addr, - unsigned long page_size) -{ - unsigned long long unmapped; - unsigned long unmap_size; - u64 *pte; - - BUG_ON(!is_power_of_2(page_size)); - - unmapped = 0; - - while (unmapped < page_size) { - - pte = fetch_pte(dom, bus_addr, &unmap_size); - - if (pte) { - int i, count; - - count = PAGE_SIZE_PTE_COUNT(unmap_size); - for (i = 0; i < count; i++) - pte[i] = 0ULL; - } - - bus_addr = (bus_addr & ~(unmap_size - 1)) + unmap_size; - unmapped += unmap_size; - } - - BUG_ON(unmapped && !is_power_of_2(unmapped)); - - return unmapped; -} - /**************************************************************************** * * The next functions belong to the domain allocation. A domain is @@ -1896,17 +1393,16 @@ static void free_gcr3_table(struct protection_domain *domain) } static void set_dte_entry(u16 devid, struct protection_domain *domain, - struct domain_pgtable *pgtable, bool ats, bool ppr) { u64 pte_root = 0; u64 flags = 0; u32 old_domid; - if (pgtable->mode != PAGE_MODE_NONE) - pte_root = iommu_virt_to_phys(pgtable->root); + if (domain->iop.mode != PAGE_MODE_NONE) + pte_root = iommu_virt_to_phys(domain->iop.root); - pte_root |= (pgtable->mode & DEV_ENTRY_MODE_MASK) + pte_root |= (domain->iop.mode & DEV_ENTRY_MODE_MASK) << DEV_ENTRY_MODE_SHIFT; pte_root |= DTE_FLAG_IR | DTE_FLAG_IW | DTE_FLAG_V | DTE_FLAG_TV; @@ -1979,7 +1475,6 @@ static void clear_dte_entry(u16 devid) static void do_attach(struct iommu_dev_data *dev_data, struct protection_domain *domain) { - struct domain_pgtable pgtable; struct amd_iommu *iommu; bool ats; @@ -1995,8 +1490,7 @@ static void do_attach(struct iommu_dev_data *dev_data, domain->dev_cnt += 1; /* Update device table */ - amd_iommu_domain_get_pgtable(domain, &pgtable); - set_dte_entry(dev_data->devid, domain, &pgtable, + set_dte_entry(dev_data->devid, domain, ats, dev_data->iommu_v2); clone_aliases(dev_data->pdev); @@ -2020,10 +1514,10 @@ static void do_detach(struct iommu_dev_data *dev_data) device_flush_dte(dev_data); /* Flush IOTLB */ - domain_flush_tlb_pde(domain); + amd_iommu_domain_flush_tlb_pde(domain); /* Wait for the flushes to finish */ - domain_flush_complete(domain); + amd_iommu_domain_flush_complete(domain); /* decrease reference counters - needs to happen after the flushes */ domain->dev_iommu[iommu->index] -= 1; @@ -2156,9 +1650,9 @@ static int attach_device(struct device *dev, * left the caches in the IOMMU dirty. So we have to flush * here to evict all dirty stuff. */ - domain_flush_tlb_pde(domain); + amd_iommu_domain_flush_tlb_pde(domain); - domain_flush_complete(domain); + amd_iommu_domain_flush_complete(domain); out: spin_unlock(&dev_data->lock); @@ -2303,36 +1797,31 @@ static int amd_iommu_domain_get_attr(struct iommu_domain *domain, * *****************************************************************************/ -static void update_device_table(struct protection_domain *domain, - struct domain_pgtable *pgtable) +static void update_device_table(struct protection_domain *domain) { struct iommu_dev_data *dev_data; list_for_each_entry(dev_data, &domain->dev_list, list) { - set_dte_entry(dev_data->devid, domain, pgtable, + set_dte_entry(dev_data->devid, domain, dev_data->ats.enabled, dev_data->iommu_v2); clone_aliases(dev_data->pdev); } } -static void update_and_flush_device_table(struct protection_domain *domain, - struct domain_pgtable *pgtable) +void amd_iommu_update_and_flush_device_table(struct protection_domain *domain) { - update_device_table(domain, pgtable); + update_device_table(domain); domain_flush_devices(domain); } -static void update_domain(struct protection_domain *domain) +void amd_iommu_domain_update(struct protection_domain *domain) { - struct domain_pgtable pgtable; - /* Update device table */ - amd_iommu_domain_get_pgtable(domain, &pgtable); - update_and_flush_device_table(domain, &pgtable); + amd_iommu_update_and_flush_device_table(domain); /* Flush domain TLB(s) and wait for completion */ - domain_flush_tlb_pde(domain); - domain_flush_complete(domain); + amd_iommu_domain_flush_tlb_pde(domain); + amd_iommu_domain_flush_complete(domain); } int __init amd_iommu_init_api(void) @@ -2400,22 +1889,19 @@ static void cleanup_domain(struct protection_domain *domain) static void protection_domain_free(struct protection_domain *domain) { - struct domain_pgtable pgtable; - if (!domain) return; if (domain->id) domain_id_free(domain->id); - amd_iommu_domain_get_pgtable(domain, &pgtable); - amd_iommu_domain_clr_pt_root(domain); - free_pagetable(&pgtable); + if (domain->iop.pgtbl_cfg.tlb) + free_io_pgtable_ops(&domain->iop.iop.ops); kfree(domain); } -static int protection_domain_init(struct protection_domain *domain, int mode) +static int protection_domain_init_v1(struct protection_domain *domain, int mode) { u64 *pt_root = NULL; @@ -2438,34 +1924,55 @@ static int protection_domain_init(struct protection_domain *domain, int mode) return 0; } -static struct protection_domain *protection_domain_alloc(int mode) +static struct protection_domain *protection_domain_alloc(unsigned int type) { + struct io_pgtable_ops *pgtbl_ops; struct protection_domain *domain; + int pgtable = amd_iommu_pgtable; + int mode = DEFAULT_PGTABLE_LEVEL; + int ret; domain = kzalloc(sizeof(*domain), GFP_KERNEL); if (!domain) return NULL; - if (protection_domain_init(domain, mode)) + /* + * Force IOMMU v1 page table when iommu=pt and + * when allocating domain for pass-through devices. + */ + if (type == IOMMU_DOMAIN_IDENTITY) { + pgtable = AMD_IOMMU_V1; + mode = PAGE_MODE_NONE; + } else if (type == IOMMU_DOMAIN_UNMANAGED) { + pgtable = AMD_IOMMU_V1; + } + + switch (pgtable) { + case AMD_IOMMU_V1: + ret = protection_domain_init_v1(domain, mode); + break; + default: + ret = -EINVAL; + } + + if (ret) + goto out_err; + + pgtbl_ops = alloc_io_pgtable_ops(pgtable, &domain->iop.pgtbl_cfg, domain); + if (!pgtbl_ops) goto out_err; return domain; - out_err: kfree(domain); - return NULL; } static struct iommu_domain *amd_iommu_domain_alloc(unsigned type) { struct protection_domain *domain; - int mode = DEFAULT_PGTABLE_LEVEL; - if (type == IOMMU_DOMAIN_IDENTITY) - mode = PAGE_MODE_NONE; - - domain = protection_domain_alloc(mode); + domain = protection_domain_alloc(type); if (!domain) return NULL; @@ -2580,12 +2087,12 @@ static int amd_iommu_map(struct iommu_domain *dom, unsigned long iova, gfp_t gfp) { struct protection_domain *domain = to_pdomain(dom); - struct domain_pgtable pgtable; + struct io_pgtable_ops *ops = &domain->iop.iop.ops; int prot = 0; - int ret; + int ret = -EINVAL; - amd_iommu_domain_get_pgtable(domain, &pgtable); - if (pgtable.mode == PAGE_MODE_NONE) + if ((amd_iommu_pgtable == AMD_IOMMU_V1) && + (domain->iop.mode == PAGE_MODE_NONE)) return -EINVAL; if (iommu_prot & IOMMU_READ) @@ -2593,9 +2100,10 @@ static int amd_iommu_map(struct iommu_domain *dom, unsigned long iova, if (iommu_prot & IOMMU_WRITE) prot |= IOMMU_PROT_IW; - ret = iommu_map_page(domain, iova, paddr, page_size, prot, gfp); - - domain_flush_np_cache(domain, iova, page_size); + if (ops->map) { + ret = ops->map(ops, iova, paddr, page_size, prot, gfp); + domain_flush_np_cache(domain, iova, page_size); + } return ret; } @@ -2605,36 +2113,22 @@ static size_t amd_iommu_unmap(struct iommu_domain *dom, unsigned long iova, struct iommu_iotlb_gather *gather) { struct protection_domain *domain = to_pdomain(dom); - struct domain_pgtable pgtable; + struct io_pgtable_ops *ops = &domain->iop.iop.ops; - amd_iommu_domain_get_pgtable(domain, &pgtable); - if (pgtable.mode == PAGE_MODE_NONE) + if ((amd_iommu_pgtable == AMD_IOMMU_V1) && + (domain->iop.mode == PAGE_MODE_NONE)) return 0; - return iommu_unmap_page(domain, iova, page_size); + return (ops->unmap) ? ops->unmap(ops, iova, page_size, gather) : 0; } static phys_addr_t amd_iommu_iova_to_phys(struct iommu_domain *dom, dma_addr_t iova) { struct protection_domain *domain = to_pdomain(dom); - unsigned long offset_mask, pte_pgsize; - struct domain_pgtable pgtable; - u64 *pte, __pte; + struct io_pgtable_ops *ops = &domain->iop.iop.ops; - amd_iommu_domain_get_pgtable(domain, &pgtable); - if (pgtable.mode == PAGE_MODE_NONE) - return iova; - - pte = fetch_pte(domain, iova, &pte_pgsize); - - if (!pte || !IOMMU_PTE_PRESENT(*pte)) - return 0; - - offset_mask = pte_pgsize - 1; - __pte = __sme_clr(*pte & PM_ADDR_MASK); - - return (__pte & ~offset_mask) | (iova & offset_mask); + return ops->iova_to_phys(ops, iova); } static bool amd_iommu_capable(enum iommu_cap cap) @@ -2720,8 +2214,8 @@ static void amd_iommu_flush_iotlb_all(struct iommu_domain *domain) unsigned long flags; spin_lock_irqsave(&dom->lock, flags); - domain_flush_tlb_pde(dom); - domain_flush_complete(dom); + amd_iommu_domain_flush_tlb_pde(dom); + amd_iommu_domain_flush_complete(dom); spin_unlock_irqrestore(&dom->lock, flags); } @@ -2799,22 +2293,12 @@ EXPORT_SYMBOL(amd_iommu_unregister_ppr_notifier); void amd_iommu_domain_direct_map(struct iommu_domain *dom) { struct protection_domain *domain = to_pdomain(dom); - struct domain_pgtable pgtable; unsigned long flags; spin_lock_irqsave(&domain->lock, flags); - /* First save pgtable configuration*/ - amd_iommu_domain_get_pgtable(domain, &pgtable); - - /* Remove page-table from domain */ - amd_iommu_domain_clr_pt_root(domain); - - /* Make changes visible to IOMMUs */ - update_domain(domain); - - /* Page-table is not visible to IOMMU anymore, so free it */ - free_pagetable(&pgtable); + if (domain->iop.pgtbl_cfg.tlb) + free_io_pgtable_ops(&domain->iop.iop.ops); spin_unlock_irqrestore(&domain->lock, flags); } @@ -2855,7 +2339,7 @@ int amd_iommu_domain_enable_v2(struct iommu_domain *dom, int pasids) domain->glx = levels; domain->flags |= PD_IOMMUV2_MASK; - update_domain(domain); + amd_iommu_domain_update(domain); ret = 0; @@ -2892,7 +2376,7 @@ static int __flush_pasid(struct protection_domain *domain, u32 pasid, } /* Wait until IOMMU TLB flushes are complete */ - domain_flush_complete(domain); + amd_iommu_domain_flush_complete(domain); /* Now flush device TLBs */ list_for_each_entry(dev_data, &domain->dev_list, list) { @@ -2918,7 +2402,7 @@ static int __flush_pasid(struct protection_domain *domain, u32 pasid, } /* Wait until all device TLBs are flushed */ - domain_flush_complete(domain); + amd_iommu_domain_flush_complete(domain); ret = 0; @@ -3003,11 +2487,9 @@ static u64 *__get_gcr3_pte(u64 *root, int level, u32 pasid, bool alloc) static int __set_gcr3(struct protection_domain *domain, u32 pasid, unsigned long cr3) { - struct domain_pgtable pgtable; u64 *pte; - amd_iommu_domain_get_pgtable(domain, &pgtable); - if (pgtable.mode != PAGE_MODE_NONE) + if (domain->iop.mode != PAGE_MODE_NONE) return -EINVAL; pte = __get_gcr3_pte(domain->gcr3_tbl, domain->glx, pasid, true); @@ -3021,11 +2503,9 @@ static int __set_gcr3(struct protection_domain *domain, u32 pasid, static int __clear_gcr3(struct protection_domain *domain, u32 pasid) { - struct domain_pgtable pgtable; u64 *pte; - amd_iommu_domain_get_pgtable(domain, &pgtable); - if (pgtable.mode != PAGE_MODE_NONE) + if (domain->iop.mode != PAGE_MODE_NONE) return -EINVAL; pte = __get_gcr3_pte(domain->gcr3_tbl, domain->glx, pasid, false); diff --git a/drivers/iommu/amd/iommu_v2.c b/drivers/iommu/amd/iommu_v2.c index 5ecc0bc608ec..f8d4ad421e07 100644 --- a/drivers/iommu/amd/iommu_v2.c +++ b/drivers/iommu/amd/iommu_v2.c @@ -77,7 +77,7 @@ struct fault { }; static LIST_HEAD(state_list); -static spinlock_t state_lock; +static DEFINE_SPINLOCK(state_lock); static struct workqueue_struct *iommu_wq; @@ -938,8 +938,6 @@ static int __init amd_iommu_v2_init(void) return 0; } - spin_lock_init(&state_lock); - ret = -ENOMEM; iommu_wq = alloc_workqueue("amd_iommu_v2", WQ_MEM_RECLAIM, 0); if (iommu_wq == NULL) diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c index e13b092e6004..bb251cab61f3 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c @@ -182,9 +182,13 @@ static void arm_smmu_mm_invalidate_range(struct mmu_notifier *mn, unsigned long start, unsigned long end) { struct arm_smmu_mmu_notifier *smmu_mn = mn_to_smmu(mn); + struct arm_smmu_domain *smmu_domain = smmu_mn->domain; + size_t size = end - start + 1; - arm_smmu_atc_inv_domain(smmu_mn->domain, mm->pasid, start, - end - start + 1); + if (!(smmu_domain->smmu->features & ARM_SMMU_FEAT_BTM)) + arm_smmu_tlb_inv_range_asid(start, size, smmu_mn->cd->asid, + PAGE_SIZE, false, smmu_domain); + arm_smmu_atc_inv_domain(smmu_domain, mm->pasid, start, size); } static void arm_smmu_mm_release(struct mmu_notifier *mn, struct mm_struct *mm) @@ -391,7 +395,7 @@ bool arm_smmu_sva_supported(struct arm_smmu_device *smmu) unsigned long reg, fld; unsigned long oas; unsigned long asid_bits; - u32 feat_mask = ARM_SMMU_FEAT_BTM | ARM_SMMU_FEAT_COHERENCY; + u32 feat_mask = ARM_SMMU_FEAT_COHERENCY; if (vabits_actual == 52) feat_mask |= ARM_SMMU_FEAT_VAX; diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c index 8ca7415d785d..8594b4a83043 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c @@ -88,15 +88,6 @@ static struct arm_smmu_option_prop arm_smmu_options[] = { { 0, NULL}, }; -static inline void __iomem *arm_smmu_page1_fixup(unsigned long offset, - struct arm_smmu_device *smmu) -{ - if (offset > SZ_64K) - return smmu->page1 + offset - SZ_64K; - - return smmu->base + offset; -} - static void parse_driver_options(struct arm_smmu_device *smmu) { int i = 0; @@ -272,9 +263,11 @@ static int arm_smmu_cmdq_build_cmd(u64 *cmd, struct arm_smmu_cmdq_ent *ent) cmd[1] |= FIELD_PREP(CMDQ_CFGI_1_RANGE, 31); break; case CMDQ_OP_TLBI_NH_VA: + cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_VMID, ent->tlbi.vmid); + fallthrough; + case CMDQ_OP_TLBI_EL2_VA: cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_NUM, ent->tlbi.num); cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_SCALE, ent->tlbi.scale); - cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_VMID, ent->tlbi.vmid); cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_ASID, ent->tlbi.asid); cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_LEAF, ent->tlbi.leaf); cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_TTL, ent->tlbi.ttl); @@ -296,6 +289,9 @@ static int arm_smmu_cmdq_build_cmd(u64 *cmd, struct arm_smmu_cmdq_ent *ent) case CMDQ_OP_TLBI_S12_VMALL: cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_VMID, ent->tlbi.vmid); break; + case CMDQ_OP_TLBI_EL2_ASID: + cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_ASID, ent->tlbi.asid); + break; case CMDQ_OP_ATC_INV: cmd[0] |= FIELD_PREP(CMDQ_0_SSV, ent->substream_valid); cmd[0] |= FIELD_PREP(CMDQ_ATC_0_GLOBAL, ent->atc.global); @@ -886,7 +882,8 @@ static int arm_smmu_cmdq_batch_submit(struct arm_smmu_device *smmu, void arm_smmu_tlb_inv_asid(struct arm_smmu_device *smmu, u16 asid) { struct arm_smmu_cmdq_ent cmd = { - .opcode = CMDQ_OP_TLBI_NH_ASID, + .opcode = smmu->features & ARM_SMMU_FEAT_E2H ? + CMDQ_OP_TLBI_EL2_ASID : CMDQ_OP_TLBI_NH_ASID, .tlbi.asid = asid, }; @@ -1269,13 +1266,16 @@ static void arm_smmu_write_strtab_ent(struct arm_smmu_master *master, u32 sid, } if (s1_cfg) { + u64 strw = smmu->features & ARM_SMMU_FEAT_E2H ? + STRTAB_STE_1_STRW_EL2 : STRTAB_STE_1_STRW_NSEL1; + BUG_ON(ste_live); dst[1] = cpu_to_le64( FIELD_PREP(STRTAB_STE_1_S1DSS, STRTAB_STE_1_S1DSS_SSID0) | FIELD_PREP(STRTAB_STE_1_S1CIR, STRTAB_STE_1_S1C_CACHE_WBRA) | FIELD_PREP(STRTAB_STE_1_S1COR, STRTAB_STE_1_S1C_CACHE_WBRA) | FIELD_PREP(STRTAB_STE_1_S1CSH, ARM_SMMU_SH_ISH) | - FIELD_PREP(STRTAB_STE_1_STRW, STRTAB_STE_1_STRW_NSEL1)); + FIELD_PREP(STRTAB_STE_1_STRW, strw)); if (smmu->features & ARM_SMMU_FEAT_STALLS && !(smmu->features & ARM_SMMU_FEAT_STALL_FORCE)) @@ -1667,40 +1667,28 @@ static void arm_smmu_tlb_inv_context(void *cookie) arm_smmu_atc_inv_domain(smmu_domain, 0, 0, 0); } -static void arm_smmu_tlb_inv_range(unsigned long iova, size_t size, - size_t granule, bool leaf, - struct arm_smmu_domain *smmu_domain) +static void __arm_smmu_tlb_inv_range(struct arm_smmu_cmdq_ent *cmd, + unsigned long iova, size_t size, + size_t granule, + struct arm_smmu_domain *smmu_domain) { struct arm_smmu_device *smmu = smmu_domain->smmu; - unsigned long start = iova, end = iova + size, num_pages = 0, tg = 0; + unsigned long end = iova + size, num_pages = 0, tg = 0; size_t inv_range = granule; struct arm_smmu_cmdq_batch cmds = {}; - struct arm_smmu_cmdq_ent cmd = { - .tlbi = { - .leaf = leaf, - }, - }; if (!size) return; - if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) { - cmd.opcode = CMDQ_OP_TLBI_NH_VA; - cmd.tlbi.asid = smmu_domain->s1_cfg.cd.asid; - } else { - cmd.opcode = CMDQ_OP_TLBI_S2_IPA; - cmd.tlbi.vmid = smmu_domain->s2_cfg.vmid; - } - if (smmu->features & ARM_SMMU_FEAT_RANGE_INV) { /* Get the leaf page size */ tg = __ffs(smmu_domain->domain.pgsize_bitmap); /* Convert page size of 12,14,16 (log2) to 1,2,3 */ - cmd.tlbi.tg = (tg - 10) / 2; + cmd->tlbi.tg = (tg - 10) / 2; /* Determine what level the granule is at */ - cmd.tlbi.ttl = 4 - ((ilog2(granule) - 3) / (tg - 3)); + cmd->tlbi.ttl = 4 - ((ilog2(granule) - 3) / (tg - 3)); num_pages = size >> tg; } @@ -1718,11 +1706,11 @@ static void arm_smmu_tlb_inv_range(unsigned long iova, size_t size, /* Determine the power of 2 multiple number of pages */ scale = __ffs(num_pages); - cmd.tlbi.scale = scale; + cmd->tlbi.scale = scale; /* Determine how many chunks of 2^scale size we have */ num = (num_pages >> scale) & CMDQ_TLBI_RANGE_NUM_MAX; - cmd.tlbi.num = num - 1; + cmd->tlbi.num = num - 1; /* range is num * 2^scale * pgsize */ inv_range = num << (scale + tg); @@ -1731,17 +1719,54 @@ static void arm_smmu_tlb_inv_range(unsigned long iova, size_t size, num_pages -= num << scale; } - cmd.tlbi.addr = iova; - arm_smmu_cmdq_batch_add(smmu, &cmds, &cmd); + cmd->tlbi.addr = iova; + arm_smmu_cmdq_batch_add(smmu, &cmds, cmd); iova += inv_range; } arm_smmu_cmdq_batch_submit(smmu, &cmds); +} + +static void arm_smmu_tlb_inv_range_domain(unsigned long iova, size_t size, + size_t granule, bool leaf, + struct arm_smmu_domain *smmu_domain) +{ + struct arm_smmu_cmdq_ent cmd = { + .tlbi = { + .leaf = leaf, + }, + }; + + if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) { + cmd.opcode = smmu_domain->smmu->features & ARM_SMMU_FEAT_E2H ? + CMDQ_OP_TLBI_EL2_VA : CMDQ_OP_TLBI_NH_VA; + cmd.tlbi.asid = smmu_domain->s1_cfg.cd.asid; + } else { + cmd.opcode = CMDQ_OP_TLBI_S2_IPA; + cmd.tlbi.vmid = smmu_domain->s2_cfg.vmid; + } + __arm_smmu_tlb_inv_range(&cmd, iova, size, granule, smmu_domain); /* * Unfortunately, this can't be leaf-only since we may have * zapped an entire table. */ - arm_smmu_atc_inv_domain(smmu_domain, 0, start, size); + arm_smmu_atc_inv_domain(smmu_domain, 0, iova, size); +} + +void arm_smmu_tlb_inv_range_asid(unsigned long iova, size_t size, int asid, + size_t granule, bool leaf, + struct arm_smmu_domain *smmu_domain) +{ + struct arm_smmu_cmdq_ent cmd = { + .opcode = smmu_domain->smmu->features & ARM_SMMU_FEAT_E2H ? + CMDQ_OP_TLBI_EL2_VA : CMDQ_OP_TLBI_NH_VA, + .tlbi = { + .asid = asid, + .leaf = leaf, + }, + }; + + __arm_smmu_tlb_inv_range(&cmd, iova, size, granule, smmu_domain); } static void arm_smmu_tlb_inv_page_nosync(struct iommu_iotlb_gather *gather, @@ -1757,7 +1782,7 @@ static void arm_smmu_tlb_inv_page_nosync(struct iommu_iotlb_gather *gather, static void arm_smmu_tlb_inv_walk(unsigned long iova, size_t size, size_t granule, void *cookie) { - arm_smmu_tlb_inv_range(iova, size, granule, false, cookie); + arm_smmu_tlb_inv_range_domain(iova, size, granule, false, cookie); } static const struct iommu_flush_ops arm_smmu_flush_ops = { @@ -2280,8 +2305,9 @@ static void arm_smmu_iotlb_sync(struct iommu_domain *domain, { struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain); - arm_smmu_tlb_inv_range(gather->start, gather->end - gather->start, - gather->pgsize, true, smmu_domain); + arm_smmu_tlb_inv_range_domain(gather->start, + gather->end - gather->start + 1, + gather->pgsize, true, smmu_domain); } static phys_addr_t @@ -2611,6 +2637,7 @@ static struct iommu_ops arm_smmu_ops = { /* Probing and initialisation functions */ static int arm_smmu_init_one_queue(struct arm_smmu_device *smmu, struct arm_smmu_queue *q, + void __iomem *page, unsigned long prod_off, unsigned long cons_off, size_t dwords, const char *name) @@ -2639,8 +2666,8 @@ static int arm_smmu_init_one_queue(struct arm_smmu_device *smmu, 1 << q->llq.max_n_shift, name); } - q->prod_reg = arm_smmu_page1_fixup(prod_off, smmu); - q->cons_reg = arm_smmu_page1_fixup(cons_off, smmu); + q->prod_reg = page + prod_off; + q->cons_reg = page + cons_off; q->ent_dwords = dwords; q->q_base = Q_BASE_RWA; @@ -2684,9 +2711,9 @@ static int arm_smmu_init_queues(struct arm_smmu_device *smmu) int ret; /* cmdq */ - ret = arm_smmu_init_one_queue(smmu, &smmu->cmdq.q, ARM_SMMU_CMDQ_PROD, - ARM_SMMU_CMDQ_CONS, CMDQ_ENT_DWORDS, - "cmdq"); + ret = arm_smmu_init_one_queue(smmu, &smmu->cmdq.q, smmu->base, + ARM_SMMU_CMDQ_PROD, ARM_SMMU_CMDQ_CONS, + CMDQ_ENT_DWORDS, "cmdq"); if (ret) return ret; @@ -2695,9 +2722,9 @@ static int arm_smmu_init_queues(struct arm_smmu_device *smmu) return ret; /* evtq */ - ret = arm_smmu_init_one_queue(smmu, &smmu->evtq.q, ARM_SMMU_EVTQ_PROD, - ARM_SMMU_EVTQ_CONS, EVTQ_ENT_DWORDS, - "evtq"); + ret = arm_smmu_init_one_queue(smmu, &smmu->evtq.q, smmu->page1, + ARM_SMMU_EVTQ_PROD, ARM_SMMU_EVTQ_CONS, + EVTQ_ENT_DWORDS, "evtq"); if (ret) return ret; @@ -2705,9 +2732,9 @@ static int arm_smmu_init_queues(struct arm_smmu_device *smmu) if (!(smmu->features & ARM_SMMU_FEAT_PRI)) return 0; - return arm_smmu_init_one_queue(smmu, &smmu->priq.q, ARM_SMMU_PRIQ_PROD, - ARM_SMMU_PRIQ_CONS, PRIQ_ENT_DWORDS, - "priq"); + return arm_smmu_init_one_queue(smmu, &smmu->priq.q, smmu->page1, + ARM_SMMU_PRIQ_PROD, ARM_SMMU_PRIQ_CONS, + PRIQ_ENT_DWORDS, "priq"); } static int arm_smmu_init_l1_strtab(struct arm_smmu_device *smmu) @@ -3060,7 +3087,11 @@ static int arm_smmu_device_reset(struct arm_smmu_device *smmu, bool bypass) writel_relaxed(reg, smmu->base + ARM_SMMU_CR1); /* CR2 (random crap) */ - reg = CR2_PTM | CR2_RECINVSID | CR2_E2H; + reg = CR2_PTM | CR2_RECINVSID; + + if (smmu->features & ARM_SMMU_FEAT_E2H) + reg |= CR2_E2H; + writel_relaxed(reg, smmu->base + ARM_SMMU_CR2); /* Stream table */ @@ -3099,10 +3130,8 @@ static int arm_smmu_device_reset(struct arm_smmu_device *smmu, bool bypass) /* Event queue */ writeq_relaxed(smmu->evtq.q.q_base, smmu->base + ARM_SMMU_EVTQ_BASE); - writel_relaxed(smmu->evtq.q.llq.prod, - arm_smmu_page1_fixup(ARM_SMMU_EVTQ_PROD, smmu)); - writel_relaxed(smmu->evtq.q.llq.cons, - arm_smmu_page1_fixup(ARM_SMMU_EVTQ_CONS, smmu)); + writel_relaxed(smmu->evtq.q.llq.prod, smmu->page1 + ARM_SMMU_EVTQ_PROD); + writel_relaxed(smmu->evtq.q.llq.cons, smmu->page1 + ARM_SMMU_EVTQ_CONS); enables |= CR0_EVTQEN; ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0, @@ -3117,9 +3146,9 @@ static int arm_smmu_device_reset(struct arm_smmu_device *smmu, bool bypass) writeq_relaxed(smmu->priq.q.q_base, smmu->base + ARM_SMMU_PRIQ_BASE); writel_relaxed(smmu->priq.q.llq.prod, - arm_smmu_page1_fixup(ARM_SMMU_PRIQ_PROD, smmu)); + smmu->page1 + ARM_SMMU_PRIQ_PROD); writel_relaxed(smmu->priq.q.llq.cons, - arm_smmu_page1_fixup(ARM_SMMU_PRIQ_CONS, smmu)); + smmu->page1 + ARM_SMMU_PRIQ_CONS); enables |= CR0_PRIQEN; ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0, @@ -3221,8 +3250,11 @@ static int arm_smmu_device_hw_probe(struct arm_smmu_device *smmu) smmu->options |= ARM_SMMU_OPT_MSIPOLL; } - if (reg & IDR0_HYP) + if (reg & IDR0_HYP) { smmu->features |= ARM_SMMU_FEAT_HYP; + if (cpus_have_cap(ARM64_HAS_VIRT_HOST_EXTN)) + smmu->features |= ARM_SMMU_FEAT_E2H; + } /* * The coherency feature as set by FW is used in preference to the ID @@ -3489,11 +3521,7 @@ err_reset_pci_ops: __maybe_unused; static void __iomem *arm_smmu_ioremap(struct device *dev, resource_size_t start, resource_size_t size) { - struct resource res = { - .flags = IORESOURCE_MEM, - .start = start, - .end = start + size - 1, - }; + struct resource res = DEFINE_RES_MEM(start, size); return devm_ioremap_resource(dev, &res); } diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h index 96c2e9565e00..f985817c967a 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h @@ -139,15 +139,15 @@ #define ARM_SMMU_CMDQ_CONS 0x9c #define ARM_SMMU_EVTQ_BASE 0xa0 -#define ARM_SMMU_EVTQ_PROD 0x100a8 -#define ARM_SMMU_EVTQ_CONS 0x100ac +#define ARM_SMMU_EVTQ_PROD 0xa8 +#define ARM_SMMU_EVTQ_CONS 0xac #define ARM_SMMU_EVTQ_IRQ_CFG0 0xb0 #define ARM_SMMU_EVTQ_IRQ_CFG1 0xb8 #define ARM_SMMU_EVTQ_IRQ_CFG2 0xbc #define ARM_SMMU_PRIQ_BASE 0xc0 -#define ARM_SMMU_PRIQ_PROD 0x100c8 -#define ARM_SMMU_PRIQ_CONS 0x100cc +#define ARM_SMMU_PRIQ_PROD 0xc8 +#define ARM_SMMU_PRIQ_CONS 0xcc #define ARM_SMMU_PRIQ_IRQ_CFG0 0xd0 #define ARM_SMMU_PRIQ_IRQ_CFG1 0xd8 #define ARM_SMMU_PRIQ_IRQ_CFG2 0xdc @@ -430,6 +430,8 @@ struct arm_smmu_cmdq_ent { #define CMDQ_OP_TLBI_NH_ASID 0x11 #define CMDQ_OP_TLBI_NH_VA 0x12 #define CMDQ_OP_TLBI_EL2_ALL 0x20 + #define CMDQ_OP_TLBI_EL2_ASID 0x21 + #define CMDQ_OP_TLBI_EL2_VA 0x22 #define CMDQ_OP_TLBI_S12_VMALL 0x28 #define CMDQ_OP_TLBI_S2_IPA 0x2a #define CMDQ_OP_TLBI_NSNH_ALL 0x30 @@ -604,6 +606,7 @@ struct arm_smmu_device { #define ARM_SMMU_FEAT_RANGE_INV (1 << 15) #define ARM_SMMU_FEAT_BTM (1 << 16) #define ARM_SMMU_FEAT_SVA (1 << 17) +#define ARM_SMMU_FEAT_E2H (1 << 18) u32 features; #define ARM_SMMU_OPT_SKIP_PREFETCH (1 << 0) @@ -694,6 +697,9 @@ extern struct arm_smmu_ctx_desc quiet_cd; int arm_smmu_write_ctx_desc(struct arm_smmu_domain *smmu_domain, int ssid, struct arm_smmu_ctx_desc *cd); void arm_smmu_tlb_inv_asid(struct arm_smmu_device *smmu, u16 asid); +void arm_smmu_tlb_inv_range_asid(unsigned long iova, size_t size, int asid, + size_t granule, bool leaf, + struct arm_smmu_domain *smmu_domain); bool arm_smmu_free_asid(struct arm_smmu_ctx_desc *cd); int arm_smmu_atc_inv_domain(struct arm_smmu_domain *smmu_domain, int ssid, unsigned long iova, size_t size); diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c index e2edbab134db..6486a7cd43f3 100644 --- a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c +++ b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c @@ -204,6 +204,7 @@ static const struct of_device_id qcom_smmu_client_of_match[] __maybe_unused = { { .compatible = "qcom,mdss" }, { .compatible = "qcom,sc7180-mdss" }, { .compatible = "qcom,sc7180-mss-pil" }, + { .compatible = "qcom,sc8180x-mdss" }, { .compatible = "qcom,sdm845-mdss" }, { .compatible = "qcom,sdm845-mss-pil" }, { } @@ -244,6 +245,8 @@ static int qcom_smmu_cfg_probe(struct arm_smmu_device *smmu) smr = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_SMR(i)); if (FIELD_GET(ARM_SMMU_SMR_VALID, smr)) { + /* Ignore valid bit for SMR mask extraction. */ + smr &= ~ARM_SMMU_SMR_VALID; smmu->smrs[i].id = FIELD_GET(ARM_SMMU_SMR_ID, smr); smmu->smrs[i].mask = FIELD_GET(ARM_SMMU_SMR_MASK, smr); smmu->smrs[i].valid = true; @@ -366,10 +369,12 @@ static struct arm_smmu_device *qcom_smmu_create(struct arm_smmu_device *smmu, static const struct of_device_id __maybe_unused qcom_smmu_impl_of_match[] = { { .compatible = "qcom,msm8998-smmu-v2" }, { .compatible = "qcom,sc7180-smmu-500" }, + { .compatible = "qcom,sc8180x-smmu-500" }, { .compatible = "qcom,sdm630-smmu-v2" }, { .compatible = "qcom,sdm845-smmu-500" }, { .compatible = "qcom,sm8150-smmu-500" }, { .compatible = "qcom,sm8250-smmu-500" }, + { .compatible = "qcom,sm8350-smmu-500" }, { } }; diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c index 4078358ed66e..f659395e7959 100644 --- a/drivers/iommu/dma-iommu.c +++ b/drivers/iommu/dma-iommu.c @@ -51,6 +51,8 @@ struct iommu_dma_cookie { struct iommu_domain *fq_domain; }; +static DEFINE_STATIC_KEY_FALSE(iommu_deferred_attach_enabled); + void iommu_dma_free_cpu_cached_iovas(unsigned int cpu, struct iommu_domain *domain) { @@ -378,21 +380,6 @@ static int iommu_dma_init_domain(struct iommu_domain *domain, dma_addr_t base, return iova_reserve_iommu_regions(dev, domain); } -static int iommu_dma_deferred_attach(struct device *dev, - struct iommu_domain *domain) -{ - const struct iommu_ops *ops = domain->ops; - - if (!is_kdump_kernel()) - return 0; - - if (unlikely(ops->is_attach_deferred && - ops->is_attach_deferred(domain, dev))) - return iommu_attach_device(domain, dev); - - return 0; -} - /** * dma_info_to_prot - Translate DMA API directions and attributes to IOMMU API * page flags. @@ -535,7 +522,8 @@ static dma_addr_t __iommu_dma_map(struct device *dev, phys_addr_t phys, size_t iova_off = iova_offset(iovad, phys); dma_addr_t iova; - if (unlikely(iommu_dma_deferred_attach(dev, domain))) + if (static_branch_unlikely(&iommu_deferred_attach_enabled) && + iommu_deferred_attach(dev, domain)) return DMA_MAPPING_ERROR; size = iova_align(iovad, size + iova_off); @@ -693,7 +681,8 @@ static void *iommu_dma_alloc_remap(struct device *dev, size_t size, *dma_handle = DMA_MAPPING_ERROR; - if (unlikely(iommu_dma_deferred_attach(dev, domain))) + if (static_branch_unlikely(&iommu_deferred_attach_enabled) && + iommu_deferred_attach(dev, domain)) return NULL; min_size = alloc_sizes & -alloc_sizes; @@ -976,7 +965,8 @@ static int iommu_dma_map_sg(struct device *dev, struct scatterlist *sg, unsigned long mask = dma_get_seg_boundary(dev); int i; - if (unlikely(iommu_dma_deferred_attach(dev, domain))) + if (static_branch_unlikely(&iommu_deferred_attach_enabled) && + iommu_deferred_attach(dev, domain)) return 0; if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC)) @@ -1424,6 +1414,9 @@ void iommu_dma_compose_msi_msg(struct msi_desc *desc, static int iommu_dma_init(void) { + if (is_kdump_kernel()) + static_branch_enable(&iommu_deferred_attach_enabled); + return iova_cache_get(); } arch_initcall(iommu_dma_init); diff --git a/drivers/iommu/intel/Makefile b/drivers/iommu/intel/Makefile index fb8e1e8c8029..ae236ec7d219 100644 --- a/drivers/iommu/intel/Makefile +++ b/drivers/iommu/intel/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 obj-$(CONFIG_DMAR_TABLE) += dmar.o obj-$(CONFIG_INTEL_IOMMU) += iommu.o pasid.o -obj-$(CONFIG_INTEL_IOMMU) += trace.o +obj-$(CONFIG_DMAR_TABLE) += trace.o cap_audit.o obj-$(CONFIG_INTEL_IOMMU_DEBUGFS) += debugfs.o obj-$(CONFIG_INTEL_IOMMU_SVM) += svm.o obj-$(CONFIG_IRQ_REMAP) += irq_remapping.o diff --git a/drivers/iommu/intel/cap_audit.c b/drivers/iommu/intel/cap_audit.c new file mode 100644 index 000000000000..b12e421a2f1a --- /dev/null +++ b/drivers/iommu/intel/cap_audit.c @@ -0,0 +1,205 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * cap_audit.c - audit iommu capabilities for boot time and hot plug + * + * Copyright (C) 2021 Intel Corporation + * + * Author: Kyung Min Park + * Lu Baolu + */ + +#define pr_fmt(fmt) "DMAR: " fmt + +#include +#include "cap_audit.h" + +static u64 intel_iommu_cap_sanity; +static u64 intel_iommu_ecap_sanity; + +static inline void check_irq_capabilities(struct intel_iommu *a, + struct intel_iommu *b) +{ + CHECK_FEATURE_MISMATCH(a, b, cap, pi_support, CAP_PI_MASK); + CHECK_FEATURE_MISMATCH(a, b, ecap, eim_support, ECAP_EIM_MASK); +} + +static inline void check_dmar_capabilities(struct intel_iommu *a, + struct intel_iommu *b) +{ + MINIMAL_FEATURE_IOMMU(b, cap, CAP_MAMV_MASK); + MINIMAL_FEATURE_IOMMU(b, cap, CAP_NFR_MASK); + MINIMAL_FEATURE_IOMMU(b, cap, CAP_SLLPS_MASK); + MINIMAL_FEATURE_IOMMU(b, cap, CAP_FRO_MASK); + MINIMAL_FEATURE_IOMMU(b, cap, CAP_MGAW_MASK); + MINIMAL_FEATURE_IOMMU(b, cap, CAP_SAGAW_MASK); + MINIMAL_FEATURE_IOMMU(b, cap, CAP_NDOMS_MASK); + MINIMAL_FEATURE_IOMMU(b, ecap, ECAP_PSS_MASK); + MINIMAL_FEATURE_IOMMU(b, ecap, ECAP_MHMV_MASK); + MINIMAL_FEATURE_IOMMU(b, ecap, ECAP_IRO_MASK); + + CHECK_FEATURE_MISMATCH(a, b, cap, 5lp_support, CAP_FL5LP_MASK); + CHECK_FEATURE_MISMATCH(a, b, cap, fl1gp_support, CAP_FL1GP_MASK); + CHECK_FEATURE_MISMATCH(a, b, cap, read_drain, CAP_RD_MASK); + CHECK_FEATURE_MISMATCH(a, b, cap, write_drain, CAP_WD_MASK); + CHECK_FEATURE_MISMATCH(a, b, cap, pgsel_inv, CAP_PSI_MASK); + CHECK_FEATURE_MISMATCH(a, b, cap, zlr, CAP_ZLR_MASK); + CHECK_FEATURE_MISMATCH(a, b, cap, caching_mode, CAP_CM_MASK); + CHECK_FEATURE_MISMATCH(a, b, cap, phmr, CAP_PHMR_MASK); + CHECK_FEATURE_MISMATCH(a, b, cap, plmr, CAP_PLMR_MASK); + CHECK_FEATURE_MISMATCH(a, b, cap, rwbf, CAP_RWBF_MASK); + CHECK_FEATURE_MISMATCH(a, b, cap, afl, CAP_AFL_MASK); + CHECK_FEATURE_MISMATCH(a, b, ecap, rps, ECAP_RPS_MASK); + CHECK_FEATURE_MISMATCH(a, b, ecap, smpwc, ECAP_SMPWC_MASK); + CHECK_FEATURE_MISMATCH(a, b, ecap, flts, ECAP_FLTS_MASK); + CHECK_FEATURE_MISMATCH(a, b, ecap, slts, ECAP_SLTS_MASK); + CHECK_FEATURE_MISMATCH(a, b, ecap, nwfs, ECAP_NWFS_MASK); + CHECK_FEATURE_MISMATCH(a, b, ecap, slads, ECAP_SLADS_MASK); + CHECK_FEATURE_MISMATCH(a, b, ecap, vcs, ECAP_VCS_MASK); + CHECK_FEATURE_MISMATCH(a, b, ecap, smts, ECAP_SMTS_MASK); + CHECK_FEATURE_MISMATCH(a, b, ecap, pds, ECAP_PDS_MASK); + CHECK_FEATURE_MISMATCH(a, b, ecap, dit, ECAP_DIT_MASK); + CHECK_FEATURE_MISMATCH(a, b, ecap, pasid, ECAP_PASID_MASK); + CHECK_FEATURE_MISMATCH(a, b, ecap, eafs, ECAP_EAFS_MASK); + CHECK_FEATURE_MISMATCH(a, b, ecap, srs, ECAP_SRS_MASK); + CHECK_FEATURE_MISMATCH(a, b, ecap, ers, ECAP_ERS_MASK); + CHECK_FEATURE_MISMATCH(a, b, ecap, prs, ECAP_PRS_MASK); + CHECK_FEATURE_MISMATCH(a, b, ecap, nest, ECAP_NEST_MASK); + CHECK_FEATURE_MISMATCH(a, b, ecap, mts, ECAP_MTS_MASK); + CHECK_FEATURE_MISMATCH(a, b, ecap, sc_support, ECAP_SC_MASK); + CHECK_FEATURE_MISMATCH(a, b, ecap, pass_through, ECAP_PT_MASK); + CHECK_FEATURE_MISMATCH(a, b, ecap, dev_iotlb_support, ECAP_DT_MASK); + CHECK_FEATURE_MISMATCH(a, b, ecap, qis, ECAP_QI_MASK); + CHECK_FEATURE_MISMATCH(a, b, ecap, coherent, ECAP_C_MASK); +} + +static int cap_audit_hotplug(struct intel_iommu *iommu, enum cap_audit_type type) +{ + bool mismatch = false; + u64 old_cap = intel_iommu_cap_sanity; + u64 old_ecap = intel_iommu_ecap_sanity; + + if (type == CAP_AUDIT_HOTPLUG_IRQR) { + CHECK_FEATURE_MISMATCH_HOTPLUG(iommu, cap, pi_support, CAP_PI_MASK); + CHECK_FEATURE_MISMATCH_HOTPLUG(iommu, ecap, eim_support, ECAP_EIM_MASK); + goto out; + } + + CHECK_FEATURE_MISMATCH_HOTPLUG(iommu, cap, 5lp_support, CAP_FL5LP_MASK); + CHECK_FEATURE_MISMATCH_HOTPLUG(iommu, cap, fl1gp_support, CAP_FL1GP_MASK); + CHECK_FEATURE_MISMATCH_HOTPLUG(iommu, cap, read_drain, CAP_RD_MASK); + CHECK_FEATURE_MISMATCH_HOTPLUG(iommu, cap, write_drain, CAP_WD_MASK); + CHECK_FEATURE_MISMATCH_HOTPLUG(iommu, cap, pgsel_inv, CAP_PSI_MASK); + CHECK_FEATURE_MISMATCH_HOTPLUG(iommu, cap, zlr, CAP_ZLR_MASK); + CHECK_FEATURE_MISMATCH_HOTPLUG(iommu, cap, caching_mode, CAP_CM_MASK); + CHECK_FEATURE_MISMATCH_HOTPLUG(iommu, cap, phmr, CAP_PHMR_MASK); + CHECK_FEATURE_MISMATCH_HOTPLUG(iommu, cap, plmr, CAP_PLMR_MASK); + CHECK_FEATURE_MISMATCH_HOTPLUG(iommu, cap, rwbf, CAP_RWBF_MASK); + CHECK_FEATURE_MISMATCH_HOTPLUG(iommu, cap, afl, CAP_AFL_MASK); + CHECK_FEATURE_MISMATCH_HOTPLUG(iommu, ecap, rps, ECAP_RPS_MASK); + CHECK_FEATURE_MISMATCH_HOTPLUG(iommu, ecap, smpwc, ECAP_SMPWC_MASK); + CHECK_FEATURE_MISMATCH_HOTPLUG(iommu, ecap, flts, ECAP_FLTS_MASK); + CHECK_FEATURE_MISMATCH_HOTPLUG(iommu, ecap, slts, ECAP_SLTS_MASK); + CHECK_FEATURE_MISMATCH_HOTPLUG(iommu, ecap, nwfs, ECAP_NWFS_MASK); + CHECK_FEATURE_MISMATCH_HOTPLUG(iommu, ecap, slads, ECAP_SLADS_MASK); + CHECK_FEATURE_MISMATCH_HOTPLUG(iommu, ecap, vcs, ECAP_VCS_MASK); + CHECK_FEATURE_MISMATCH_HOTPLUG(iommu, ecap, smts, ECAP_SMTS_MASK); + CHECK_FEATURE_MISMATCH_HOTPLUG(iommu, ecap, pds, ECAP_PDS_MASK); + CHECK_FEATURE_MISMATCH_HOTPLUG(iommu, ecap, dit, ECAP_DIT_MASK); + CHECK_FEATURE_MISMATCH_HOTPLUG(iommu, ecap, pasid, ECAP_PASID_MASK); + CHECK_FEATURE_MISMATCH_HOTPLUG(iommu, ecap, eafs, ECAP_EAFS_MASK); + CHECK_FEATURE_MISMATCH_HOTPLUG(iommu, ecap, srs, ECAP_SRS_MASK); + CHECK_FEATURE_MISMATCH_HOTPLUG(iommu, ecap, ers, ECAP_ERS_MASK); + CHECK_FEATURE_MISMATCH_HOTPLUG(iommu, ecap, prs, ECAP_PRS_MASK); + CHECK_FEATURE_MISMATCH_HOTPLUG(iommu, ecap, nest, ECAP_NEST_MASK); + CHECK_FEATURE_MISMATCH_HOTPLUG(iommu, ecap, mts, ECAP_MTS_MASK); + CHECK_FEATURE_MISMATCH_HOTPLUG(iommu, ecap, sc_support, ECAP_SC_MASK); + CHECK_FEATURE_MISMATCH_HOTPLUG(iommu, ecap, pass_through, ECAP_PT_MASK); + CHECK_FEATURE_MISMATCH_HOTPLUG(iommu, ecap, dev_iotlb_support, ECAP_DT_MASK); + CHECK_FEATURE_MISMATCH_HOTPLUG(iommu, ecap, qis, ECAP_QI_MASK); + CHECK_FEATURE_MISMATCH_HOTPLUG(iommu, ecap, coherent, ECAP_C_MASK); + + /* Abort hot plug if the hot plug iommu feature is smaller than global */ + MINIMAL_FEATURE_HOTPLUG(iommu, cap, max_amask_val, CAP_MAMV_MASK, mismatch); + MINIMAL_FEATURE_HOTPLUG(iommu, cap, num_fault_regs, CAP_NFR_MASK, mismatch); + MINIMAL_FEATURE_HOTPLUG(iommu, cap, super_page_val, CAP_SLLPS_MASK, mismatch); + MINIMAL_FEATURE_HOTPLUG(iommu, cap, fault_reg_offset, CAP_FRO_MASK, mismatch); + MINIMAL_FEATURE_HOTPLUG(iommu, cap, mgaw, CAP_MGAW_MASK, mismatch); + MINIMAL_FEATURE_HOTPLUG(iommu, cap, sagaw, CAP_SAGAW_MASK, mismatch); + MINIMAL_FEATURE_HOTPLUG(iommu, cap, ndoms, CAP_NDOMS_MASK, mismatch); + MINIMAL_FEATURE_HOTPLUG(iommu, ecap, pss, ECAP_PSS_MASK, mismatch); + MINIMAL_FEATURE_HOTPLUG(iommu, ecap, max_handle_mask, ECAP_MHMV_MASK, mismatch); + MINIMAL_FEATURE_HOTPLUG(iommu, ecap, iotlb_offset, ECAP_IRO_MASK, mismatch); + +out: + if (mismatch) { + intel_iommu_cap_sanity = old_cap; + intel_iommu_ecap_sanity = old_ecap; + return -EFAULT; + } + + return 0; +} + +static int cap_audit_static(struct intel_iommu *iommu, enum cap_audit_type type) +{ + struct dmar_drhd_unit *d; + struct intel_iommu *i; + + rcu_read_lock(); + if (list_empty(&dmar_drhd_units)) + goto out; + + for_each_active_iommu(i, d) { + if (!iommu) { + intel_iommu_ecap_sanity = i->ecap; + intel_iommu_cap_sanity = i->cap; + iommu = i; + continue; + } + + if (type == CAP_AUDIT_STATIC_DMAR) + check_dmar_capabilities(iommu, i); + else + check_irq_capabilities(iommu, i); + } + +out: + rcu_read_unlock(); + return 0; +} + +int intel_cap_audit(enum cap_audit_type type, struct intel_iommu *iommu) +{ + switch (type) { + case CAP_AUDIT_STATIC_DMAR: + case CAP_AUDIT_STATIC_IRQR: + return cap_audit_static(iommu, type); + case CAP_AUDIT_HOTPLUG_DMAR: + case CAP_AUDIT_HOTPLUG_IRQR: + return cap_audit_hotplug(iommu, type); + default: + break; + } + + return -EFAULT; +} + +bool intel_cap_smts_sanity(void) +{ + return ecap_smts(intel_iommu_ecap_sanity); +} + +bool intel_cap_pasid_sanity(void) +{ + return ecap_pasid(intel_iommu_ecap_sanity); +} + +bool intel_cap_nest_sanity(void) +{ + return ecap_nest(intel_iommu_ecap_sanity); +} + +bool intel_cap_flts_sanity(void) +{ + return ecap_flts(intel_iommu_ecap_sanity); +} diff --git a/drivers/iommu/intel/cap_audit.h b/drivers/iommu/intel/cap_audit.h new file mode 100644 index 000000000000..74cfccae0e81 --- /dev/null +++ b/drivers/iommu/intel/cap_audit.h @@ -0,0 +1,130 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * cap_audit.h - audit iommu capabilities header + * + * Copyright (C) 2021 Intel Corporation + * + * Author: Kyung Min Park + */ + +/* + * Capability Register Mask + */ +#define CAP_FL5LP_MASK BIT_ULL(60) +#define CAP_PI_MASK BIT_ULL(59) +#define CAP_FL1GP_MASK BIT_ULL(56) +#define CAP_RD_MASK BIT_ULL(55) +#define CAP_WD_MASK BIT_ULL(54) +#define CAP_MAMV_MASK GENMASK_ULL(53, 48) +#define CAP_NFR_MASK GENMASK_ULL(47, 40) +#define CAP_PSI_MASK BIT_ULL(39) +#define CAP_SLLPS_MASK GENMASK_ULL(37, 34) +#define CAP_FRO_MASK GENMASK_ULL(33, 24) +#define CAP_ZLR_MASK BIT_ULL(22) +#define CAP_MGAW_MASK GENMASK_ULL(21, 16) +#define CAP_SAGAW_MASK GENMASK_ULL(12, 8) +#define CAP_CM_MASK BIT_ULL(7) +#define CAP_PHMR_MASK BIT_ULL(6) +#define CAP_PLMR_MASK BIT_ULL(5) +#define CAP_RWBF_MASK BIT_ULL(4) +#define CAP_AFL_MASK BIT_ULL(3) +#define CAP_NDOMS_MASK GENMASK_ULL(2, 0) + +/* + * Extended Capability Register Mask + */ +#define ECAP_RPS_MASK BIT_ULL(49) +#define ECAP_SMPWC_MASK BIT_ULL(48) +#define ECAP_FLTS_MASK BIT_ULL(47) +#define ECAP_SLTS_MASK BIT_ULL(46) +#define ECAP_SLADS_MASK BIT_ULL(45) +#define ECAP_VCS_MASK BIT_ULL(44) +#define ECAP_SMTS_MASK BIT_ULL(43) +#define ECAP_PDS_MASK BIT_ULL(42) +#define ECAP_DIT_MASK BIT_ULL(41) +#define ECAP_PASID_MASK BIT_ULL(40) +#define ECAP_PSS_MASK GENMASK_ULL(39, 35) +#define ECAP_EAFS_MASK BIT_ULL(34) +#define ECAP_NWFS_MASK BIT_ULL(33) +#define ECAP_SRS_MASK BIT_ULL(31) +#define ECAP_ERS_MASK BIT_ULL(30) +#define ECAP_PRS_MASK BIT_ULL(29) +#define ECAP_NEST_MASK BIT_ULL(26) +#define ECAP_MTS_MASK BIT_ULL(25) +#define ECAP_MHMV_MASK GENMASK_ULL(23, 20) +#define ECAP_IRO_MASK GENMASK_ULL(17, 8) +#define ECAP_SC_MASK BIT_ULL(7) +#define ECAP_PT_MASK BIT_ULL(6) +#define ECAP_EIM_MASK BIT_ULL(4) +#define ECAP_DT_MASK BIT_ULL(2) +#define ECAP_QI_MASK BIT_ULL(1) +#define ECAP_C_MASK BIT_ULL(0) + +/* + * u64 intel_iommu_cap_sanity, intel_iommu_ecap_sanity will be adjusted as each + * IOMMU gets audited. + */ +#define DO_CHECK_FEATURE_MISMATCH(a, b, cap, feature, MASK) \ +do { \ + if (cap##_##feature(a) != cap##_##feature(b)) { \ + intel_iommu_##cap##_sanity &= ~(MASK); \ + pr_info("IOMMU feature %s inconsistent", #feature); \ + } \ +} while (0) + +#define CHECK_FEATURE_MISMATCH(a, b, cap, feature, MASK) \ + DO_CHECK_FEATURE_MISMATCH((a)->cap, (b)->cap, cap, feature, MASK) + +#define CHECK_FEATURE_MISMATCH_HOTPLUG(b, cap, feature, MASK) \ +do { \ + if (cap##_##feature(intel_iommu_##cap##_sanity)) \ + DO_CHECK_FEATURE_MISMATCH(intel_iommu_##cap##_sanity, \ + (b)->cap, cap, feature, MASK); \ +} while (0) + +#define MINIMAL_FEATURE_IOMMU(iommu, cap, MASK) \ +do { \ + u64 min_feature = intel_iommu_##cap##_sanity & (MASK); \ + min_feature = min_t(u64, min_feature, (iommu)->cap & (MASK)); \ + intel_iommu_##cap##_sanity = (intel_iommu_##cap##_sanity & ~(MASK)) | \ + min_feature; \ +} while (0) + +#define MINIMAL_FEATURE_HOTPLUG(iommu, cap, feature, MASK, mismatch) \ +do { \ + if ((intel_iommu_##cap##_sanity & (MASK)) > \ + (cap##_##feature((iommu)->cap))) \ + mismatch = true; \ + else \ + (iommu)->cap = ((iommu)->cap & ~(MASK)) | \ + (intel_iommu_##cap##_sanity & (MASK)); \ +} while (0) + +enum cap_audit_type { + CAP_AUDIT_STATIC_DMAR, + CAP_AUDIT_STATIC_IRQR, + CAP_AUDIT_HOTPLUG_DMAR, + CAP_AUDIT_HOTPLUG_IRQR, +}; + +bool intel_cap_smts_sanity(void); +bool intel_cap_pasid_sanity(void); +bool intel_cap_nest_sanity(void); +bool intel_cap_flts_sanity(void); + +static inline bool scalable_mode_support(void) +{ + return (intel_iommu_sm && intel_cap_smts_sanity()); +} + +static inline bool pasid_mode_support(void) +{ + return scalable_mode_support() && intel_cap_pasid_sanity(); +} + +static inline bool nested_mode_support(void) +{ + return scalable_mode_support() && intel_cap_nest_sanity(); +} + +int intel_cap_audit(enum cap_audit_type type, struct intel_iommu *iommu); diff --git a/drivers/iommu/intel/dmar.c b/drivers/iommu/intel/dmar.c index 02e7c10a4224..d5c51b5c20af 100644 --- a/drivers/iommu/intel/dmar.c +++ b/drivers/iommu/intel/dmar.c @@ -31,6 +31,7 @@ #include #include #include +#include #include "../irq_remapping.h" @@ -525,6 +526,7 @@ dmar_table_print_dmar_entry(struct acpi_dmar_header *header) struct acpi_dmar_reserved_memory *rmrr; struct acpi_dmar_atsr *atsr; struct acpi_dmar_rhsa *rhsa; + struct acpi_dmar_satc *satc; switch (header->type) { case ACPI_DMAR_TYPE_HARDWARE_UNIT: @@ -554,6 +556,10 @@ dmar_table_print_dmar_entry(struct acpi_dmar_header *header) /* We don't print this here because we need to sanity-check it first. So print it in dmar_parse_one_andd() instead. */ break; + case ACPI_DMAR_TYPE_SATC: + satc = container_of(header, struct acpi_dmar_satc, header); + pr_info("SATC flags: 0x%x\n", satc->flags); + break; } } @@ -641,6 +647,7 @@ parse_dmar_table(void) .cb[ACPI_DMAR_TYPE_ROOT_ATS] = &dmar_parse_one_atsr, .cb[ACPI_DMAR_TYPE_HARDWARE_AFFINITY] = &dmar_parse_one_rhsa, .cb[ACPI_DMAR_TYPE_NAMESPACE] = &dmar_parse_one_andd, + .cb[ACPI_DMAR_TYPE_SATC] = &dmar_parse_one_satc, }; /* @@ -1307,6 +1314,8 @@ int qi_submit_sync(struct intel_iommu *iommu, struct qi_desc *desc, offset = ((index + i) % QI_LENGTH) << shift; memcpy(qi->desc + offset, &desc[i], 1 << shift); qi->desc_status[(index + i) % QI_LENGTH] = QI_IN_USE; + trace_qi_submit(iommu, desc[i].qw0, desc[i].qw1, + desc[i].qw2, desc[i].qw3); } qi->desc_status[wait_index] = QI_IN_USE; @@ -2074,6 +2083,7 @@ static guid_t dmar_hp_guid = #define DMAR_DSM_FUNC_DRHD 1 #define DMAR_DSM_FUNC_ATSR 2 #define DMAR_DSM_FUNC_RHSA 3 +#define DMAR_DSM_FUNC_SATC 4 static inline bool dmar_detect_dsm(acpi_handle handle, int func) { @@ -2091,6 +2101,7 @@ static int dmar_walk_dsm_resource(acpi_handle handle, int func, [DMAR_DSM_FUNC_DRHD] = ACPI_DMAR_TYPE_HARDWARE_UNIT, [DMAR_DSM_FUNC_ATSR] = ACPI_DMAR_TYPE_ROOT_ATS, [DMAR_DSM_FUNC_RHSA] = ACPI_DMAR_TYPE_HARDWARE_AFFINITY, + [DMAR_DSM_FUNC_SATC] = ACPI_DMAR_TYPE_SATC, }; if (!dmar_detect_dsm(handle, func)) diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index 06b00b5363d8..ee0932307d64 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -44,10 +44,10 @@ #include #include #include -#include #include "../irq_remapping.h" #include "pasid.h" +#include "cap_audit.h" #define ROOT_SIZE VTD_PAGE_SIZE #define CONTEXT_SIZE VTD_PAGE_SIZE @@ -316,8 +316,18 @@ struct dmar_atsr_unit { u8 include_all:1; /* include all ports */ }; +struct dmar_satc_unit { + struct list_head list; /* list of SATC units */ + struct acpi_dmar_header *hdr; /* ACPI header */ + struct dmar_dev_scope *devices; /* target devices */ + struct intel_iommu *iommu; /* the corresponding iommu */ + int devices_cnt; /* target device count */ + u8 atc_required:1; /* ATS is required */ +}; + static LIST_HEAD(dmar_atsr_units); static LIST_HEAD(dmar_rmrr_units); +static LIST_HEAD(dmar_satc_units); #define for_each_rmrr_units(rmrr) \ list_for_each_entry(rmrr, &dmar_rmrr_units, list) @@ -1017,8 +1027,11 @@ static struct dma_pte *pfn_to_dma_pte(struct dmar_domain *domain, domain_flush_cache(domain, tmp_page, VTD_PAGE_SIZE); pteval = ((uint64_t)virt_to_dma_pfn(tmp_page) << VTD_PAGE_SHIFT) | DMA_PTE_READ | DMA_PTE_WRITE; - if (domain_use_first_level(domain)) + if (domain_use_first_level(domain)) { pteval |= DMA_FL_PTE_XD | DMA_FL_PTE_US; + if (domain->domain.type == IOMMU_DOMAIN_DMA) + pteval |= DMA_FL_PTE_ACCESS; + } if (cmpxchg64(&pte->val, 0ULL, pteval)) /* Someone else set it while we were thinking; use theirs. */ free_pgtable_page(tmp_page); @@ -1861,25 +1874,7 @@ static void free_dmar_iommu(struct intel_iommu *iommu) */ static bool first_level_by_default(void) { - struct dmar_drhd_unit *drhd; - struct intel_iommu *iommu; - static int first_level_support = -1; - - if (likely(first_level_support != -1)) - return first_level_support; - - first_level_support = 1; - - rcu_read_lock(); - for_each_active_iommu(iommu, drhd) { - if (!sm_supported(iommu) || !ecap_flts(iommu->ecap)) { - first_level_support = 0; - break; - } - } - rcu_read_unlock(); - - return first_level_support; + return scalable_mode_support() && intel_cap_flts_sanity(); } static struct dmar_domain *alloc_domain(int flags) @@ -2298,9 +2293,9 @@ static int __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn, unsigned long phys_pfn, unsigned long nr_pages, int prot) { - struct dma_pte *first_pte = NULL, *pte = NULL; unsigned int largepage_lvl = 0; unsigned long lvl_pages = 0; + struct dma_pte *pte = NULL; phys_addr_t pteval; u64 attr; @@ -2310,9 +2305,16 @@ __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn, return -EINVAL; attr = prot & (DMA_PTE_READ | DMA_PTE_WRITE | DMA_PTE_SNP); - if (domain_use_first_level(domain)) + if (domain_use_first_level(domain)) { attr |= DMA_FL_PTE_PRESENT | DMA_FL_PTE_XD | DMA_FL_PTE_US; + if (domain->domain.type == IOMMU_DOMAIN_DMA) { + attr |= DMA_FL_PTE_ACCESS; + if (prot & DMA_PTE_WRITE) + attr |= DMA_FL_PTE_DIRTY; + } + } + pteval = ((phys_addr_t)phys_pfn << VTD_PAGE_SHIFT) | attr; while (nr_pages > 0) { @@ -2322,7 +2324,7 @@ __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn, largepage_lvl = hardware_largepage_caps(domain, iov_pfn, phys_pfn, nr_pages); - first_pte = pte = pfn_to_dma_pte(domain, iov_pfn, &largepage_lvl); + pte = pfn_to_dma_pte(domain, iov_pfn, &largepage_lvl); if (!pte) return -ENOMEM; /* It is large page*/ @@ -2383,34 +2385,14 @@ __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn, * recalculate 'pte' and switch back to smaller pages for the * end of the mapping, if the trailing size is not enough to * use another superpage (i.e. nr_pages < lvl_pages). + * + * We leave clflush for the leaf pte changes to iotlb_sync_map() + * callback. */ pte++; if (!nr_pages || first_pte_in_page(pte) || - (largepage_lvl > 1 && nr_pages < lvl_pages)) { - domain_flush_cache(domain, first_pte, - (void *)pte - (void *)first_pte); + (largepage_lvl > 1 && nr_pages < lvl_pages)) pte = NULL; - } - } - - return 0; -} - -static int -domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn, - unsigned long phys_pfn, unsigned long nr_pages, int prot) -{ - int iommu_id, ret; - struct intel_iommu *iommu; - - /* Do the real mapping first */ - ret = __domain_mapping(domain, iov_pfn, phys_pfn, nr_pages, prot); - if (ret) - return ret; - - for_each_domain_iommu(iommu_id, domain) { - iommu = g_iommus[iommu_id]; - __mapping_notify_one(iommu, domain, iov_pfn, nr_pages); } return 0; @@ -3197,6 +3179,10 @@ static int __init init_dmars(void) goto error; } + ret = intel_cap_audit(CAP_AUDIT_STATIC_DMAR, NULL); + if (ret) + goto free_iommu; + for_each_iommu(iommu, drhd) { if (drhd->ignored) { iommu_disable_translation(iommu); @@ -3740,6 +3726,57 @@ int dmar_check_one_atsr(struct acpi_dmar_header *hdr, void *arg) return 0; } +static struct dmar_satc_unit *dmar_find_satc(struct acpi_dmar_satc *satc) +{ + struct dmar_satc_unit *satcu; + struct acpi_dmar_satc *tmp; + + list_for_each_entry_rcu(satcu, &dmar_satc_units, list, + dmar_rcu_check()) { + tmp = (struct acpi_dmar_satc *)satcu->hdr; + if (satc->segment != tmp->segment) + continue; + if (satc->header.length != tmp->header.length) + continue; + if (memcmp(satc, tmp, satc->header.length) == 0) + return satcu; + } + + return NULL; +} + +int dmar_parse_one_satc(struct acpi_dmar_header *hdr, void *arg) +{ + struct acpi_dmar_satc *satc; + struct dmar_satc_unit *satcu; + + if (system_state >= SYSTEM_RUNNING && !intel_iommu_enabled) + return 0; + + satc = container_of(hdr, struct acpi_dmar_satc, header); + satcu = dmar_find_satc(satc); + if (satcu) + return 0; + + satcu = kzalloc(sizeof(*satcu) + hdr->length, GFP_KERNEL); + if (!satcu) + return -ENOMEM; + + satcu->hdr = (void *)(satcu + 1); + memcpy(satcu->hdr, hdr, hdr->length); + satcu->atc_required = satc->flags & 0x1; + satcu->devices = dmar_alloc_dev_scope((void *)(satc + 1), + (void *)satc + satc->header.length, + &satcu->devices_cnt); + if (satcu->devices_cnt && !satcu->devices) { + kfree(satcu); + return -ENOMEM; + } + list_add_rcu(&satcu->list, &dmar_satc_units); + + return 0; +} + static int intel_iommu_add(struct dmar_drhd_unit *dmaru) { int sp, ret; @@ -3748,6 +3785,10 @@ static int intel_iommu_add(struct dmar_drhd_unit *dmaru) if (g_iommus[iommu->seq_id]) return 0; + ret = intel_cap_audit(CAP_AUDIT_HOTPLUG_DMAR, iommu); + if (ret) + goto out; + if (hw_pass_through && !ecap_pass_through(iommu->ecap)) { pr_warn("%s: Doesn't support hardware pass through.\n", iommu->name); @@ -3843,6 +3884,7 @@ static void intel_iommu_free_dmars(void) { struct dmar_rmrr_unit *rmrru, *rmrr_n; struct dmar_atsr_unit *atsru, *atsr_n; + struct dmar_satc_unit *satcu, *satc_n; list_for_each_entry_safe(rmrru, rmrr_n, &dmar_rmrr_units, list) { list_del(&rmrru->list); @@ -3854,6 +3896,11 @@ static void intel_iommu_free_dmars(void) list_del(&atsru->list); intel_iommu_free_atsr(atsru); } + list_for_each_entry_safe(satcu, satc_n, &dmar_satc_units, list) { + list_del(&satcu->list); + dmar_free_dev_scope(&satcu->devices, &satcu->devices_cnt); + kfree(satcu); + } } int dmar_find_matched_atsr_unit(struct pci_dev *dev) @@ -3905,8 +3952,10 @@ int dmar_iommu_notify_scope_dev(struct dmar_pci_notify_info *info) int ret; struct dmar_rmrr_unit *rmrru; struct dmar_atsr_unit *atsru; + struct dmar_satc_unit *satcu; struct acpi_dmar_atsr *atsr; struct acpi_dmar_reserved_memory *rmrr; + struct acpi_dmar_satc *satc; if (!intel_iommu_enabled && system_state >= SYSTEM_RUNNING) return 0; @@ -3947,6 +3996,23 @@ int dmar_iommu_notify_scope_dev(struct dmar_pci_notify_info *info) break; } } + list_for_each_entry(satcu, &dmar_satc_units, list) { + satc = container_of(satcu->hdr, struct acpi_dmar_satc, header); + if (info->event == BUS_NOTIFY_ADD_DEVICE) { + ret = dmar_insert_dev_scope(info, (void *)(satc + 1), + (void *)satc + satc->header.length, + satc->segment, satcu->devices, + satcu->devices_cnt); + if (ret > 0) + break; + else if (ret < 0) + return ret; + } else if (info->event == BUS_NOTIFY_REMOVED_DEVICE) { + if (dmar_remove_dev_scope(info, satc->segment, + satcu->devices, satcu->devices_cnt)) + break; + } + } return 0; } @@ -4290,6 +4356,9 @@ int __init intel_iommu_init(void) if (list_empty(&dmar_atsr_units)) pr_info("No ATSR found\n"); + if (list_empty(&dmar_satc_units)) + pr_info("No SATC found\n"); + if (dmar_map_gfx) intel_iommu_gfx_mapped = 1; @@ -4943,7 +5012,6 @@ static int intel_iommu_map(struct iommu_domain *domain, struct dmar_domain *dmar_domain = to_dmar_domain(domain); u64 max_addr; int prot = 0; - int ret; if (iommu_prot & IOMMU_READ) prot |= DMA_PTE_READ; @@ -4969,9 +5037,8 @@ static int intel_iommu_map(struct iommu_domain *domain, /* Round up size to next multiple of PAGE_SIZE, if it and the low bits of hpa would take us onto the next page */ size = aligned_nrpages(hpa, size); - ret = domain_mapping(dmar_domain, iova >> VTD_PAGE_SHIFT, - hpa >> VTD_PAGE_SHIFT, size, prot); - return ret; + return __domain_mapping(dmar_domain, iova >> VTD_PAGE_SHIFT, + hpa >> VTD_PAGE_SHIFT, size, prot); } static size_t intel_iommu_unmap(struct iommu_domain *domain, @@ -5040,60 +5107,6 @@ static phys_addr_t intel_iommu_iova_to_phys(struct iommu_domain *domain, return phys; } -static inline bool scalable_mode_support(void) -{ - struct dmar_drhd_unit *drhd; - struct intel_iommu *iommu; - bool ret = true; - - rcu_read_lock(); - for_each_active_iommu(iommu, drhd) { - if (!sm_supported(iommu)) { - ret = false; - break; - } - } - rcu_read_unlock(); - - return ret; -} - -static inline bool iommu_pasid_support(void) -{ - struct dmar_drhd_unit *drhd; - struct intel_iommu *iommu; - bool ret = true; - - rcu_read_lock(); - for_each_active_iommu(iommu, drhd) { - if (!pasid_supported(iommu)) { - ret = false; - break; - } - } - rcu_read_unlock(); - - return ret; -} - -static inline bool nested_mode_support(void) -{ - struct dmar_drhd_unit *drhd; - struct intel_iommu *iommu; - bool ret = true; - - rcu_read_lock(); - for_each_active_iommu(iommu, drhd) { - if (!sm_supported(iommu) || !ecap_nest(iommu->ecap)) { - ret = false; - break; - } - } - rcu_read_unlock(); - - return ret; -} - static bool intel_iommu_capable(enum iommu_cap cap) { if (cap == IOMMU_CAP_CACHE_COHERENCY) @@ -5334,7 +5347,7 @@ intel_iommu_dev_has_feat(struct device *dev, enum iommu_dev_features feat) int ret; if (!dev_is_pci(dev) || dmar_disabled || - !scalable_mode_support() || !iommu_pasid_support()) + !scalable_mode_support() || !pasid_mode_support()) return false; ret = pci_pasid_features(to_pci_dev(dev)); @@ -5508,6 +5521,57 @@ static bool risky_device(struct pci_dev *pdev) return false; } +static void clflush_sync_map(struct dmar_domain *domain, unsigned long clf_pfn, + unsigned long clf_pages) +{ + struct dma_pte *first_pte = NULL, *pte = NULL; + unsigned long lvl_pages = 0; + int level = 0; + + while (clf_pages > 0) { + if (!pte) { + level = 0; + pte = pfn_to_dma_pte(domain, clf_pfn, &level); + if (WARN_ON(!pte)) + return; + first_pte = pte; + lvl_pages = lvl_to_nr_pages(level); + } + + if (WARN_ON(!lvl_pages || clf_pages < lvl_pages)) + return; + + clf_pages -= lvl_pages; + clf_pfn += lvl_pages; + pte++; + + if (!clf_pages || first_pte_in_page(pte) || + (level > 1 && clf_pages < lvl_pages)) { + domain_flush_cache(domain, first_pte, + (void *)pte - (void *)first_pte); + pte = NULL; + } + } +} + +static void intel_iommu_iotlb_sync_map(struct iommu_domain *domain, + unsigned long iova, size_t size) +{ + struct dmar_domain *dmar_domain = to_dmar_domain(domain); + unsigned long pages = aligned_nrpages(iova, size); + unsigned long pfn = iova >> VTD_PAGE_SHIFT; + struct intel_iommu *iommu; + int iommu_id; + + if (!dmar_domain->iommu_coherency) + clflush_sync_map(dmar_domain, pfn, pages); + + for_each_domain_iommu(iommu_id, dmar_domain) { + iommu = g_iommus[iommu_id]; + __mapping_notify_one(iommu, dmar_domain, pfn, pages); + } +} + const struct iommu_ops intel_iommu_ops = { .capable = intel_iommu_capable, .domain_alloc = intel_iommu_domain_alloc, @@ -5520,6 +5584,7 @@ const struct iommu_ops intel_iommu_ops = { .aux_detach_dev = intel_iommu_aux_detach_device, .aux_get_pasid = intel_iommu_aux_get_pasid, .map = intel_iommu_map, + .iotlb_sync_map = intel_iommu_iotlb_sync_map, .unmap = intel_iommu_unmap, .flush_iotlb_all = intel_flush_iotlb_all, .iotlb_sync = intel_iommu_tlb_sync, diff --git a/drivers/iommu/intel/irq_remapping.c b/drivers/iommu/intel/irq_remapping.c index 685200a5cff0..611ef5243cb6 100644 --- a/drivers/iommu/intel/irq_remapping.c +++ b/drivers/iommu/intel/irq_remapping.c @@ -22,6 +22,7 @@ #include #include "../irq_remapping.h" +#include "cap_audit.h" enum irq_mode { IRQ_REMAPPING, @@ -734,6 +735,9 @@ static int __init intel_prepare_irq_remapping(void) if (dmar_table_init() < 0) return -ENODEV; + if (intel_cap_audit(CAP_AUDIT_STATIC_IRQR, NULL)) + goto error; + if (!dmar_ir_support()) return -ENODEV; @@ -1439,6 +1443,10 @@ static int dmar_ir_add(struct dmar_drhd_unit *dmaru, struct intel_iommu *iommu) int ret; int eim = x2apic_enabled(); + ret = intel_cap_audit(CAP_AUDIT_HOTPLUG_IRQR, iommu); + if (ret) + return ret; + if (eim && !ecap_eim_support(iommu->ecap)) { pr_info("DRHD %Lx: EIM not supported by DRHD, ecap %Lx\n", iommu->reg_phys, iommu->ecap); diff --git a/drivers/iommu/intel/pasid.c b/drivers/iommu/intel/pasid.c index b92af83b79bd..f26cb6195b2c 100644 --- a/drivers/iommu/intel/pasid.c +++ b/drivers/iommu/intel/pasid.c @@ -456,20 +456,6 @@ pasid_cache_invalidation_with_pasid(struct intel_iommu *iommu, qi_submit_sync(iommu, &desc, 1, 0); } -static void -iotlb_invalidation_with_pasid(struct intel_iommu *iommu, u16 did, u32 pasid) -{ - struct qi_desc desc; - - desc.qw0 = QI_EIOTLB_PASID(pasid) | QI_EIOTLB_DID(did) | - QI_EIOTLB_GRAN(QI_GRAN_NONG_PASID) | QI_EIOTLB_TYPE; - desc.qw1 = 0; - desc.qw2 = 0; - desc.qw3 = 0; - - qi_submit_sync(iommu, &desc, 1, 0); -} - static void devtlb_invalidation_with_pasid(struct intel_iommu *iommu, struct device *dev, u32 pasid) @@ -514,7 +500,7 @@ void intel_pasid_tear_down_entry(struct intel_iommu *iommu, struct device *dev, clflush_cache_range(pte, sizeof(*pte)); pasid_cache_invalidation_with_pasid(iommu, did, pasid); - iotlb_invalidation_with_pasid(iommu, did, pasid); + qi_flush_piotlb(iommu, did, pasid, 0, -1, 0); /* Device IOTLB doesn't need to be flushed in caching mode. */ if (!cap_caching_mode(iommu->cap)) @@ -530,7 +516,7 @@ static void pasid_flush_caches(struct intel_iommu *iommu, if (cap_caching_mode(iommu->cap)) { pasid_cache_invalidation_with_pasid(iommu, did, pasid); - iotlb_invalidation_with_pasid(iommu, did, pasid); + qi_flush_piotlb(iommu, did, pasid, 0, -1, 0); } else { iommu_flush_write_buffer(iommu); } diff --git a/drivers/iommu/intel/svm.c b/drivers/iommu/intel/svm.c index 18a9f05df407..574a7e657a9a 100644 --- a/drivers/iommu/intel/svm.c +++ b/drivers/iommu/intel/svm.c @@ -123,53 +123,16 @@ static void __flush_svm_range_dev(struct intel_svm *svm, unsigned long address, unsigned long pages, int ih) { - struct qi_desc desc; + struct device_domain_info *info = get_domain_info(sdev->dev); - if (pages == -1) { - desc.qw0 = QI_EIOTLB_PASID(svm->pasid) | - QI_EIOTLB_DID(sdev->did) | - QI_EIOTLB_GRAN(QI_GRAN_NONG_PASID) | - QI_EIOTLB_TYPE; - desc.qw1 = 0; - } else { - int mask = ilog2(__roundup_pow_of_two(pages)); + if (WARN_ON(!pages)) + return; - desc.qw0 = QI_EIOTLB_PASID(svm->pasid) | - QI_EIOTLB_DID(sdev->did) | - QI_EIOTLB_GRAN(QI_GRAN_PSI_PASID) | - QI_EIOTLB_TYPE; - desc.qw1 = QI_EIOTLB_ADDR(address) | - QI_EIOTLB_IH(ih) | - QI_EIOTLB_AM(mask); - } - desc.qw2 = 0; - desc.qw3 = 0; - qi_submit_sync(sdev->iommu, &desc, 1, 0); - - if (sdev->dev_iotlb) { - desc.qw0 = QI_DEV_EIOTLB_PASID(svm->pasid) | - QI_DEV_EIOTLB_SID(sdev->sid) | - QI_DEV_EIOTLB_QDEP(sdev->qdep) | - QI_DEIOTLB_TYPE; - if (pages == -1) { - desc.qw1 = QI_DEV_EIOTLB_ADDR(-1ULL >> 1) | - QI_DEV_EIOTLB_SIZE; - } else if (pages > 1) { - /* The least significant zero bit indicates the size. So, - * for example, an "address" value of 0x12345f000 will - * flush from 0x123440000 to 0x12347ffff (256KiB). */ - unsigned long last = address + ((unsigned long)(pages - 1) << VTD_PAGE_SHIFT); - unsigned long mask = __rounddown_pow_of_two(address ^ last); - - desc.qw1 = QI_DEV_EIOTLB_ADDR((address & ~mask) | - (mask - 1)) | QI_DEV_EIOTLB_SIZE; - } else { - desc.qw1 = QI_DEV_EIOTLB_ADDR(address); - } - desc.qw2 = 0; - desc.qw3 = 0; - qi_submit_sync(sdev->iommu, &desc, 1, 0); - } + qi_flush_piotlb(sdev->iommu, sdev->did, svm->pasid, address, pages, ih); + if (info->ats_enabled) + qi_flush_dev_iotlb_pasid(sdev->iommu, sdev->sid, info->pfsid, + svm->pasid, sdev->qdep, address, + order_base_2(pages)); } static void intel_flush_svm_range_dev(struct intel_svm *svm, @@ -948,10 +911,8 @@ static irqreturn_t prq_event_thread(int irq, void *d) u64 address; handled = 1; - req = &iommu->prq[head / sizeof(*req)]; - - result = QI_RESP_FAILURE; + result = QI_RESP_INVALID; address = (u64)req->addr << VTD_PAGE_SHIFT; if (!req->pasid_present) { pr_err("%s: Page request without PASID: %08llx %08llx\n", @@ -989,7 +950,6 @@ static irqreturn_t prq_event_thread(int irq, void *d) rcu_read_unlock(); } - result = QI_RESP_INVALID; /* Since we're using init_mm.pgd directly, we should never take * any faults on kernel addresses. */ if (!svm->mm) @@ -1079,8 +1039,17 @@ static irqreturn_t prq_event_thread(int irq, void *d) * Clear the page request overflow bit and wake up all threads that * are waiting for the completion of this handling. */ - if (readl(iommu->reg + DMAR_PRS_REG) & DMA_PRS_PRO) - writel(DMA_PRS_PRO, iommu->reg + DMAR_PRS_REG); + if (readl(iommu->reg + DMAR_PRS_REG) & DMA_PRS_PRO) { + pr_info_ratelimited("IOMMU: %s: PRQ overflow detected\n", + iommu->name); + head = dmar_readq(iommu->reg + DMAR_PQH_REG) & PRQ_RING_MASK; + tail = dmar_readq(iommu->reg + DMAR_PQT_REG) & PRQ_RING_MASK; + if (head == tail) { + writel(DMA_PRS_PRO, iommu->reg + DMAR_PRS_REG); + pr_info_ratelimited("IOMMU: %s: PRQ overflow cleared", + iommu->name); + } + } if (!completion_done(&iommu->prq_complete)) complete(&iommu->prq_complete); diff --git a/drivers/iommu/io-pgtable-arm-v7s.c b/drivers/iommu/io-pgtable-arm-v7s.c index 1d92ac948db7..d4004bcf333a 100644 --- a/drivers/iommu/io-pgtable-arm-v7s.c +++ b/drivers/iommu/io-pgtable-arm-v7s.c @@ -44,26 +44,25 @@ /* * We have 32 bits total; 12 bits resolved at level 1, 8 bits at level 2, - * and 12 bits in a page. With some carefully-chosen coefficients we can - * hide the ugly inconsistencies behind these macros and at least let the - * rest of the code pretend to be somewhat sane. + * and 12 bits in a page. + * MediaTek extend 2 bits to reach 34bits, 14 bits at lvl1 and 8 bits at lvl2. */ #define ARM_V7S_ADDR_BITS 32 -#define _ARM_V7S_LVL_BITS(lvl) (16 - (lvl) * 4) -#define ARM_V7S_LVL_SHIFT(lvl) (ARM_V7S_ADDR_BITS - (4 + 8 * (lvl))) +#define _ARM_V7S_LVL_BITS(lvl, cfg) ((lvl) == 1 ? ((cfg)->ias - 20) : 8) +#define ARM_V7S_LVL_SHIFT(lvl) ((lvl) == 1 ? 20 : 12) #define ARM_V7S_TABLE_SHIFT 10 -#define ARM_V7S_PTES_PER_LVL(lvl) (1 << _ARM_V7S_LVL_BITS(lvl)) -#define ARM_V7S_TABLE_SIZE(lvl) \ - (ARM_V7S_PTES_PER_LVL(lvl) * sizeof(arm_v7s_iopte)) +#define ARM_V7S_PTES_PER_LVL(lvl, cfg) (1 << _ARM_V7S_LVL_BITS(lvl, cfg)) +#define ARM_V7S_TABLE_SIZE(lvl, cfg) \ + (ARM_V7S_PTES_PER_LVL(lvl, cfg) * sizeof(arm_v7s_iopte)) #define ARM_V7S_BLOCK_SIZE(lvl) (1UL << ARM_V7S_LVL_SHIFT(lvl)) #define ARM_V7S_LVL_MASK(lvl) ((u32)(~0U << ARM_V7S_LVL_SHIFT(lvl))) #define ARM_V7S_TABLE_MASK ((u32)(~0U << ARM_V7S_TABLE_SHIFT)) -#define _ARM_V7S_IDX_MASK(lvl) (ARM_V7S_PTES_PER_LVL(lvl) - 1) -#define ARM_V7S_LVL_IDX(addr, lvl) ({ \ +#define _ARM_V7S_IDX_MASK(lvl, cfg) (ARM_V7S_PTES_PER_LVL(lvl, cfg) - 1) +#define ARM_V7S_LVL_IDX(addr, lvl, cfg) ({ \ int _l = lvl; \ - ((u32)(addr) >> ARM_V7S_LVL_SHIFT(_l)) & _ARM_V7S_IDX_MASK(_l); \ + ((addr) >> ARM_V7S_LVL_SHIFT(_l)) & _ARM_V7S_IDX_MASK(_l, cfg); \ }) /* @@ -112,9 +111,10 @@ #define ARM_V7S_TEX_MASK 0x7 #define ARM_V7S_ATTR_TEX(val) (((val) & ARM_V7S_TEX_MASK) << ARM_V7S_TEX_SHIFT) -/* MediaTek extend the two bits for PA 32bit/33bit */ +/* MediaTek extend the bits below for PA 32bit/33bit/34bit */ #define ARM_V7S_ATTR_MTK_PA_BIT32 BIT(9) #define ARM_V7S_ATTR_MTK_PA_BIT33 BIT(4) +#define ARM_V7S_ATTR_MTK_PA_BIT34 BIT(5) /* *well, except for TEX on level 2 large pages, of course :( */ #define ARM_V7S_CONT_PAGE_TEX_SHIFT 6 @@ -194,6 +194,8 @@ static arm_v7s_iopte paddr_to_iopte(phys_addr_t paddr, int lvl, pte |= ARM_V7S_ATTR_MTK_PA_BIT32; if (paddr & BIT_ULL(33)) pte |= ARM_V7S_ATTR_MTK_PA_BIT33; + if (paddr & BIT_ULL(34)) + pte |= ARM_V7S_ATTR_MTK_PA_BIT34; return pte; } @@ -218,6 +220,8 @@ static phys_addr_t iopte_to_paddr(arm_v7s_iopte pte, int lvl, paddr |= BIT_ULL(32); if (pte & ARM_V7S_ATTR_MTK_PA_BIT33) paddr |= BIT_ULL(33); + if (pte & ARM_V7S_ATTR_MTK_PA_BIT34) + paddr |= BIT_ULL(34); return paddr; } @@ -234,7 +238,7 @@ static void *__arm_v7s_alloc_table(int lvl, gfp_t gfp, struct device *dev = cfg->iommu_dev; phys_addr_t phys; dma_addr_t dma; - size_t size = ARM_V7S_TABLE_SIZE(lvl); + size_t size = ARM_V7S_TABLE_SIZE(lvl, cfg); void *table = NULL; if (lvl == 1) @@ -280,7 +284,7 @@ static void __arm_v7s_free_table(void *table, int lvl, { struct io_pgtable_cfg *cfg = &data->iop.cfg; struct device *dev = cfg->iommu_dev; - size_t size = ARM_V7S_TABLE_SIZE(lvl); + size_t size = ARM_V7S_TABLE_SIZE(lvl, cfg); if (!cfg->coherent_walk) dma_unmap_single(dev, __arm_v7s_dma_addr(table), size, @@ -424,7 +428,7 @@ static int arm_v7s_init_pte(struct arm_v7s_io_pgtable *data, arm_v7s_iopte *tblp; size_t sz = ARM_V7S_BLOCK_SIZE(lvl); - tblp = ptep - ARM_V7S_LVL_IDX(iova, lvl); + tblp = ptep - ARM_V7S_LVL_IDX(iova, lvl, cfg); if (WARN_ON(__arm_v7s_unmap(data, NULL, iova + i * sz, sz, lvl, tblp) != sz)) return -EINVAL; @@ -477,7 +481,7 @@ static int __arm_v7s_map(struct arm_v7s_io_pgtable *data, unsigned long iova, int num_entries = size >> ARM_V7S_LVL_SHIFT(lvl); /* Find our entry at the current level */ - ptep += ARM_V7S_LVL_IDX(iova, lvl); + ptep += ARM_V7S_LVL_IDX(iova, lvl, cfg); /* If we can install a leaf entry at this level, then do so */ if (num_entries) @@ -519,7 +523,6 @@ static int arm_v7s_map(struct io_pgtable_ops *ops, unsigned long iova, phys_addr_t paddr, size_t size, int prot, gfp_t gfp) { struct arm_v7s_io_pgtable *data = io_pgtable_ops_to_data(ops); - struct io_pgtable *iop = &data->iop; int ret; if (WARN_ON(iova >= (1ULL << data->iop.cfg.ias) || @@ -535,12 +538,7 @@ static int arm_v7s_map(struct io_pgtable_ops *ops, unsigned long iova, * Synchronise all PTE updates for the new mapping before there's * a chance for anything to kick off a table walk for the new iova. */ - if (iop->cfg.quirks & IO_PGTABLE_QUIRK_TLBI_ON_MAP) { - io_pgtable_tlb_flush_walk(iop, iova, size, - ARM_V7S_BLOCK_SIZE(2)); - } else { - wmb(); - } + wmb(); return ret; } @@ -550,7 +548,7 @@ static void arm_v7s_free_pgtable(struct io_pgtable *iop) struct arm_v7s_io_pgtable *data = io_pgtable_to_data(iop); int i; - for (i = 0; i < ARM_V7S_PTES_PER_LVL(1); i++) { + for (i = 0; i < ARM_V7S_PTES_PER_LVL(1, &data->iop.cfg); i++) { arm_v7s_iopte pte = data->pgd[i]; if (ARM_V7S_PTE_IS_TABLE(pte, 1)) @@ -602,9 +600,9 @@ static size_t arm_v7s_split_blk_unmap(struct arm_v7s_io_pgtable *data, if (!tablep) return 0; /* Bytes unmapped */ - num_ptes = ARM_V7S_PTES_PER_LVL(2); + num_ptes = ARM_V7S_PTES_PER_LVL(2, cfg); num_entries = size >> ARM_V7S_LVL_SHIFT(2); - unmap_idx = ARM_V7S_LVL_IDX(iova, 2); + unmap_idx = ARM_V7S_LVL_IDX(iova, 2, cfg); pte = arm_v7s_prot_to_pte(arm_v7s_pte_to_prot(blk_pte, 1), 2, cfg); if (num_entries > 1) @@ -646,7 +644,7 @@ static size_t __arm_v7s_unmap(struct arm_v7s_io_pgtable *data, if (WARN_ON(lvl > 2)) return 0; - idx = ARM_V7S_LVL_IDX(iova, lvl); + idx = ARM_V7S_LVL_IDX(iova, lvl, &iop->cfg); ptep += idx; do { pte[i] = READ_ONCE(ptep[i]); @@ -717,7 +715,7 @@ static size_t arm_v7s_unmap(struct io_pgtable_ops *ops, unsigned long iova, { struct arm_v7s_io_pgtable *data = io_pgtable_ops_to_data(ops); - if (WARN_ON(upper_32_bits(iova))) + if (WARN_ON(iova >= (1ULL << data->iop.cfg.ias))) return 0; return __arm_v7s_unmap(data, gather, iova, size, 1, data->pgd); @@ -732,7 +730,7 @@ static phys_addr_t arm_v7s_iova_to_phys(struct io_pgtable_ops *ops, u32 mask; do { - ptep += ARM_V7S_LVL_IDX(iova, ++lvl); + ptep += ARM_V7S_LVL_IDX(iova, ++lvl, &data->iop.cfg); pte = READ_ONCE(*ptep); ptep = iopte_deref(pte, lvl, data); } while (ARM_V7S_PTE_IS_TABLE(pte, lvl)); @@ -751,15 +749,14 @@ static struct io_pgtable *arm_v7s_alloc_pgtable(struct io_pgtable_cfg *cfg, { struct arm_v7s_io_pgtable *data; - if (cfg->ias > ARM_V7S_ADDR_BITS) + if (cfg->ias > (arm_v7s_is_mtk_enabled(cfg) ? 34 : ARM_V7S_ADDR_BITS)) return NULL; - if (cfg->oas > (arm_v7s_is_mtk_enabled(cfg) ? 34 : ARM_V7S_ADDR_BITS)) + if (cfg->oas > (arm_v7s_is_mtk_enabled(cfg) ? 35 : ARM_V7S_ADDR_BITS)) return NULL; if (cfg->quirks & ~(IO_PGTABLE_QUIRK_ARM_NS | IO_PGTABLE_QUIRK_NO_PERMS | - IO_PGTABLE_QUIRK_TLBI_ON_MAP | IO_PGTABLE_QUIRK_ARM_MTK_EXT | IO_PGTABLE_QUIRK_NON_STRICT)) return NULL; @@ -775,8 +772,8 @@ static struct io_pgtable *arm_v7s_alloc_pgtable(struct io_pgtable_cfg *cfg, spin_lock_init(&data->split_lock); data->l2_tables = kmem_cache_create("io-pgtable_armv7s_l2", - ARM_V7S_TABLE_SIZE(2), - ARM_V7S_TABLE_SIZE(2), + ARM_V7S_TABLE_SIZE(2, cfg), + ARM_V7S_TABLE_SIZE(2, cfg), ARM_V7S_TABLE_SLAB_FLAGS, NULL); if (!data->l2_tables) goto out_free_data; diff --git a/drivers/iommu/io-pgtable.c b/drivers/iommu/io-pgtable.c index 94394c81468f..6e9917ce980f 100644 --- a/drivers/iommu/io-pgtable.c +++ b/drivers/iommu/io-pgtable.c @@ -24,6 +24,9 @@ io_pgtable_init_table[IO_PGTABLE_NUM_FMTS] = { #ifdef CONFIG_IOMMU_IO_PGTABLE_ARMV7S [ARM_V7S] = &io_pgtable_arm_v7s_init_fns, #endif +#ifdef CONFIG_AMD_IOMMU + [AMD_IOMMU_V1] = &io_pgtable_amd_iommu_v1_init_fns, +#endif }; struct io_pgtable_ops *alloc_io_pgtable_ops(enum io_pgtable_fmt fmt, diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index ffeebda8d6de..d0b0a15dba84 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -1980,6 +1980,16 @@ int iommu_attach_device(struct iommu_domain *domain, struct device *dev) } EXPORT_SYMBOL_GPL(iommu_attach_device); +int iommu_deferred_attach(struct device *dev, struct iommu_domain *domain) +{ + const struct iommu_ops *ops = domain->ops; + + if (ops->is_attach_deferred && ops->is_attach_deferred(domain, dev)) + return __iommu_attach_device(domain, dev); + + return 0; +} + /* * Check flags and other user provided data for valid combinations. We also * make sure no reserved fields or unused flags are set. This is to ensure @@ -2426,9 +2436,6 @@ static int __iommu_map(struct iommu_domain *domain, unsigned long iova, size -= pgsize; } - if (ops->iotlb_sync_map) - ops->iotlb_sync_map(domain); - /* unroll mapping in case something went wrong */ if (ret) iommu_unmap(domain, orig_iova, orig_size - size); @@ -2438,18 +2445,31 @@ static int __iommu_map(struct iommu_domain *domain, unsigned long iova, return ret; } +static int _iommu_map(struct iommu_domain *domain, unsigned long iova, + phys_addr_t paddr, size_t size, int prot, gfp_t gfp) +{ + const struct iommu_ops *ops = domain->ops; + int ret; + + ret = __iommu_map(domain, iova, paddr, size, prot, gfp); + if (ret == 0 && ops->iotlb_sync_map) + ops->iotlb_sync_map(domain, iova, size); + + return ret; +} + int iommu_map(struct iommu_domain *domain, unsigned long iova, phys_addr_t paddr, size_t size, int prot) { might_sleep(); - return __iommu_map(domain, iova, paddr, size, prot, GFP_KERNEL); + return _iommu_map(domain, iova, paddr, size, prot, GFP_KERNEL); } EXPORT_SYMBOL_GPL(iommu_map); int iommu_map_atomic(struct iommu_domain *domain, unsigned long iova, phys_addr_t paddr, size_t size, int prot) { - return __iommu_map(domain, iova, paddr, size, prot, GFP_ATOMIC); + return _iommu_map(domain, iova, paddr, size, prot, GFP_ATOMIC); } EXPORT_SYMBOL_GPL(iommu_map_atomic); @@ -2533,6 +2553,7 @@ static size_t __iommu_map_sg(struct iommu_domain *domain, unsigned long iova, struct scatterlist *sg, unsigned int nents, int prot, gfp_t gfp) { + const struct iommu_ops *ops = domain->ops; size_t len = 0, mapped = 0; phys_addr_t start; unsigned int i = 0; @@ -2563,6 +2584,8 @@ static size_t __iommu_map_sg(struct iommu_domain *domain, unsigned long iova, sg = sg_next(sg); } + if (ops->iotlb_sync_map) + ops->iotlb_sync_map(domain, iova, mapped); return mapped; out_err: @@ -2586,7 +2609,6 @@ size_t iommu_map_sg_atomic(struct iommu_domain *domain, unsigned long iova, { return __iommu_map_sg(domain, iova, sg, nents, prot, GFP_ATOMIC); } -EXPORT_SYMBOL_GPL(iommu_map_sg_atomic); int iommu_domain_window_enable(struct iommu_domain *domain, u32 wnd_nr, phys_addr_t paddr, u64 size, int prot) @@ -2599,15 +2621,6 @@ int iommu_domain_window_enable(struct iommu_domain *domain, u32 wnd_nr, } EXPORT_SYMBOL_GPL(iommu_domain_window_enable); -void iommu_domain_window_disable(struct iommu_domain *domain, u32 wnd_nr) -{ - if (unlikely(domain->ops->domain_window_disable == NULL)) - return; - - return domain->ops->domain_window_disable(domain, wnd_nr); -} -EXPORT_SYMBOL_GPL(iommu_domain_window_disable); - /** * report_iommu_fault() - report about an IOMMU fault to the IOMMU framework * @domain: the iommu domain where the fault has happened @@ -2863,17 +2876,6 @@ EXPORT_SYMBOL_GPL(iommu_fwspec_add_ids); /* * Per device IOMMU features. */ -bool iommu_dev_has_feature(struct device *dev, enum iommu_dev_features feat) -{ - const struct iommu_ops *ops = dev->bus->iommu_ops; - - if (ops && ops->dev_has_feat) - return ops->dev_has_feat(dev, feat); - - return false; -} -EXPORT_SYMBOL_GPL(iommu_dev_has_feature); - int iommu_dev_enable_feature(struct device *dev, enum iommu_dev_features feat) { const struct iommu_ops *ops = dev->bus->iommu_ops; diff --git a/drivers/iommu/iova.c b/drivers/iommu/iova.c index d20b8b333d30..e6e2fa85271c 100644 --- a/drivers/iommu/iova.c +++ b/drivers/iommu/iova.c @@ -55,7 +55,7 @@ init_iova_domain(struct iova_domain *iovad, unsigned long granule, } EXPORT_SYMBOL_GPL(init_iova_domain); -bool has_iova_flush_queue(struct iova_domain *iovad) +static bool has_iova_flush_queue(struct iova_domain *iovad) { return !!iovad->fq; } @@ -112,7 +112,6 @@ int init_iova_flush_queue(struct iova_domain *iovad, return 0; } -EXPORT_SYMBOL_GPL(init_iova_flush_queue); static struct rb_node * __get_cached_rbnode(struct iova_domain *iovad, unsigned long limit_pfn) @@ -451,7 +450,6 @@ alloc_iova_fast(struct iova_domain *iovad, unsigned long size, return new_iova->pfn_lo; } -EXPORT_SYMBOL_GPL(alloc_iova_fast); /** * free_iova_fast - free iova pfn range into rcache @@ -598,7 +596,6 @@ void queue_iova(struct iova_domain *iovad, mod_timer(&iovad->fq_timer, jiffies + msecs_to_jiffies(IOVA_FQ_TIMEOUT)); } -EXPORT_SYMBOL_GPL(queue_iova); /** * put_iova_domain - destroys the iova domain @@ -710,36 +707,6 @@ reserve_iova(struct iova_domain *iovad, } EXPORT_SYMBOL_GPL(reserve_iova); -/** - * copy_reserved_iova - copies the reserved between domains - * @from: - source domain from where to copy - * @to: - destination domin where to copy - * This function copies reserved iova's from one domain to - * other. - */ -void -copy_reserved_iova(struct iova_domain *from, struct iova_domain *to) -{ - unsigned long flags; - struct rb_node *node; - - spin_lock_irqsave(&from->iova_rbtree_lock, flags); - for (node = rb_first(&from->rbroot); node; node = rb_next(node)) { - struct iova *iova = rb_entry(node, struct iova, node); - struct iova *new_iova; - - if (iova->pfn_lo == IOVA_ANCHOR) - continue; - - new_iova = reserve_iova(to, iova->pfn_lo, iova->pfn_hi); - if (!new_iova) - pr_err("Reserve iova range %lx@%lx failed\n", - iova->pfn_lo, iova->pfn_lo); - } - spin_unlock_irqrestore(&from->iova_rbtree_lock, flags); -} -EXPORT_SYMBOL_GPL(copy_reserved_iova); - /* * Magazine caches for IOVA ranges. For an introduction to magazines, * see the USENIX 2001 paper "Magazines and Vmem: Extending the Slab diff --git a/drivers/iommu/ipmmu-vmsa.c b/drivers/iommu/ipmmu-vmsa.c index d71f10257f15..eaaec0a55cc6 100644 --- a/drivers/iommu/ipmmu-vmsa.c +++ b/drivers/iommu/ipmmu-vmsa.c @@ -734,54 +734,45 @@ static int ipmmu_init_platform_device(struct device *dev, return 0; } -static const struct soc_device_attribute soc_rcar_gen3[] = { +static const struct soc_device_attribute soc_needs_opt_in[] = { + { .family = "R-Car Gen3", }, + { .family = "RZ/G2", }, + { /* sentinel */ } +}; + +static const struct soc_device_attribute soc_denylist[] = { { .soc_id = "r8a774a1", }, - { .soc_id = "r8a774b1", }, - { .soc_id = "r8a774c0", }, - { .soc_id = "r8a774e1", }, - { .soc_id = "r8a7795", }, - { .soc_id = "r8a77961", }, + { .soc_id = "r8a7795", .revision = "ES1.*" }, + { .soc_id = "r8a7795", .revision = "ES2.*" }, { .soc_id = "r8a7796", }, - { .soc_id = "r8a77965", }, - { .soc_id = "r8a77970", }, - { .soc_id = "r8a77990", }, - { .soc_id = "r8a77995", }, { /* sentinel */ } }; -static const struct soc_device_attribute soc_rcar_gen3_whitelist[] = { - { .soc_id = "r8a774b1", }, - { .soc_id = "r8a774c0", }, - { .soc_id = "r8a774e1", }, - { .soc_id = "r8a7795", .revision = "ES3.*" }, - { .soc_id = "r8a77961", }, - { .soc_id = "r8a77965", }, - { .soc_id = "r8a77990", }, - { .soc_id = "r8a77995", }, - { /* sentinel */ } +static const char * const devices_allowlist[] = { + "ee100000.mmc", + "ee120000.mmc", + "ee140000.mmc", + "ee160000.mmc" }; -static const char * const rcar_gen3_slave_whitelist[] = { -}; - -static bool ipmmu_slave_whitelist(struct device *dev) +static bool ipmmu_device_is_allowed(struct device *dev) { unsigned int i; /* - * For R-Car Gen3 use a white list to opt-in slave devices. + * R-Car Gen3 and RZ/G2 use the allow list to opt-in devices. * For Other SoCs, this returns true anyway. */ - if (!soc_device_match(soc_rcar_gen3)) + if (!soc_device_match(soc_needs_opt_in)) return true; - /* Check whether this R-Car Gen3 can use the IPMMU correctly or not */ - if (!soc_device_match(soc_rcar_gen3_whitelist)) + /* Check whether this SoC can use the IPMMU correctly or not */ + if (soc_device_match(soc_denylist)) return false; - /* Check whether this slave device can work with the IPMMU */ - for (i = 0; i < ARRAY_SIZE(rcar_gen3_slave_whitelist); i++) { - if (!strcmp(dev_name(dev), rcar_gen3_slave_whitelist[i])) + /* Check whether this device can work with the IPMMU */ + for (i = 0; i < ARRAY_SIZE(devices_allowlist); i++) { + if (!strcmp(dev_name(dev), devices_allowlist[i])) return true; } @@ -792,7 +783,7 @@ static bool ipmmu_slave_whitelist(struct device *dev) static int ipmmu_of_xlate(struct device *dev, struct of_phandle_args *spec) { - if (!ipmmu_slave_whitelist(dev)) + if (!ipmmu_device_is_allowed(dev)) return -ENODEV; iommu_fwspec_add_ids(dev, spec->args, 1); diff --git a/drivers/iommu/msm_iommu.c b/drivers/iommu/msm_iommu.c index 040e85f70861..f0ba6a09b434 100644 --- a/drivers/iommu/msm_iommu.c +++ b/drivers/iommu/msm_iommu.c @@ -343,7 +343,6 @@ static int msm_iommu_domain_config(struct msm_priv *priv) spin_lock_init(&priv->pgtlock); priv->cfg = (struct io_pgtable_cfg) { - .quirks = IO_PGTABLE_QUIRK_TLBI_ON_MAP, .pgsize_bitmap = msm_iommu_ops.pgsize_bitmap, .ias = 32, .oas = 32, @@ -490,6 +489,14 @@ static int msm_iommu_map(struct iommu_domain *domain, unsigned long iova, return ret; } +static void msm_iommu_sync_map(struct iommu_domain *domain, unsigned long iova, + size_t size) +{ + struct msm_priv *priv = to_msm_priv(domain); + + __flush_iotlb_range(iova, size, SZ_4K, false, priv); +} + static size_t msm_iommu_unmap(struct iommu_domain *domain, unsigned long iova, size_t len, struct iommu_iotlb_gather *gather) { @@ -680,6 +687,7 @@ static struct iommu_ops msm_iommu_ops = { * kick starting the other master. */ .iotlb_sync = NULL, + .iotlb_sync_map = msm_iommu_sync_map, .iova_to_phys = msm_iommu_iova_to_phys, .probe_device = msm_iommu_probe_device, .release_device = msm_iommu_release_device, diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c index 8e56cec532e7..6ecc007f07cd 100644 --- a/drivers/iommu/mtk_iommu.c +++ b/drivers/iommu/mtk_iommu.c @@ -3,10 +3,12 @@ * Copyright (c) 2015-2016 MediaTek Inc. * Author: Yong Wu */ +#include #include #include #include #include +#include #include #include #include @@ -20,6 +22,7 @@ #include #include #include +#include #include #include #include @@ -88,6 +91,9 @@ #define F_REG_MMU1_FAULT_MASK GENMASK(13, 7) #define REG_MMU0_FAULT_VA 0x13c +#define F_MMU_INVAL_VA_31_12_MASK GENMASK(31, 12) +#define F_MMU_INVAL_VA_34_32_MASK GENMASK(11, 9) +#define F_MMU_INVAL_PA_34_32_MASK GENMASK(8, 6) #define F_MMU_FAULT_VA_WRITE_BIT BIT(1) #define F_MMU_FAULT_VA_LAYER_BIT BIT(0) @@ -103,13 +109,6 @@ #define MTK_PROTECT_PA_ALIGN 256 -/* - * Get the local arbiter ID and the portid within the larb arbiter - * from mtk_m4u_id which is defined by MTK_M4U_ID. - */ -#define MTK_M4U_TO_LARB(id) (((id) >> 5) & 0xf) -#define MTK_M4U_TO_PORT(id) ((id) & 0x1f) - #define HAS_4GB_MODE BIT(0) /* HW will use the EMI clock if there isn't the "bclk". */ #define HAS_BCLK BIT(1) @@ -119,6 +118,7 @@ #define HAS_SUB_COMM BIT(5) #define WR_THROT_EN BIT(6) #define HAS_LEGACY_IVRP_PADDR BIT(7) +#define IOVA_34_EN BIT(8) #define MTK_IOMMU_HAS_FLAG(pdata, _x) \ ((((pdata)->flags) & (_x)) == (_x)) @@ -127,11 +127,19 @@ struct mtk_iommu_domain { struct io_pgtable_cfg cfg; struct io_pgtable_ops *iop; + struct mtk_iommu_data *data; struct iommu_domain domain; }; static const struct iommu_ops mtk_iommu_ops; +static int mtk_iommu_hw_init(const struct mtk_iommu_data *data); + +#define MTK_IOMMU_TLB_ADDR(iova) ({ \ + dma_addr_t _addr = iova; \ + ((lower_32_bits(_addr) & GENMASK(31, 12)) | upper_32_bits(_addr));\ +}) + /* * In M4U 4GB mode, the physical address is remapped as below: * @@ -160,6 +168,25 @@ static LIST_HEAD(m4ulist); /* List all the M4U HWs */ #define for_each_m4u(data) list_for_each_entry(data, &m4ulist, list) +struct mtk_iommu_iova_region { + dma_addr_t iova_base; + unsigned long long size; +}; + +static const struct mtk_iommu_iova_region single_domain[] = { + {.iova_base = 0, .size = SZ_4G}, +}; + +static const struct mtk_iommu_iova_region mt8192_multi_dom[] = { + { .iova_base = 0x0, .size = SZ_4G}, /* disp: 0 ~ 4G */ + #if IS_ENABLED(CONFIG_ARCH_DMA_ADDR_T_64BIT) + { .iova_base = SZ_4G, .size = SZ_4G}, /* vdec: 4G ~ 8G */ + { .iova_base = SZ_4G * 2, .size = SZ_4G}, /* CAM/MDP: 8G ~ 12G */ + { .iova_base = 0x240000000ULL, .size = 0x4000000}, /* CCU0 */ + { .iova_base = 0x244000000ULL, .size = 0x4000000}, /* CCU1 */ + #endif +}; + /* * There may be 1 or 2 M4U HWs, But we always expect they are in the same domain * for the performance. @@ -182,33 +209,43 @@ static struct mtk_iommu_domain *to_mtk_domain(struct iommu_domain *dom) return container_of(dom, struct mtk_iommu_domain, domain); } -static void mtk_iommu_tlb_flush_all(void *cookie) +static void mtk_iommu_tlb_flush_all(struct mtk_iommu_data *data) { - struct mtk_iommu_data *data = cookie; - for_each_m4u(data) { + if (pm_runtime_get_if_in_use(data->dev) <= 0) + continue; + writel_relaxed(F_INVLD_EN1 | F_INVLD_EN0, data->base + data->plat_data->inv_sel_reg); writel_relaxed(F_ALL_INVLD, data->base + REG_MMU_INVALIDATE); wmb(); /* Make sure the tlb flush all done */ + + pm_runtime_put(data->dev); } } static void mtk_iommu_tlb_flush_range_sync(unsigned long iova, size_t size, - size_t granule, void *cookie) + size_t granule, + struct mtk_iommu_data *data) { - struct mtk_iommu_data *data = cookie; + bool has_pm = !!data->dev->pm_domain; unsigned long flags; int ret; u32 tmp; for_each_m4u(data) { + if (has_pm) { + if (pm_runtime_get_if_in_use(data->dev) <= 0) + continue; + } + spin_lock_irqsave(&data->tlb_lock, flags); writel_relaxed(F_INVLD_EN1 | F_INVLD_EN0, data->base + data->plat_data->inv_sel_reg); - writel_relaxed(iova, data->base + REG_MMU_INVLD_START_A); - writel_relaxed(iova + size - 1, + writel_relaxed(MTK_IOMMU_TLB_ADDR(iova), + data->base + REG_MMU_INVLD_START_A); + writel_relaxed(MTK_IOMMU_TLB_ADDR(iova + size - 1), data->base + REG_MMU_INVLD_END_A); writel_relaxed(F_MMU_INV_RANGE, data->base + REG_MMU_INVALIDATE); @@ -219,36 +256,24 @@ static void mtk_iommu_tlb_flush_range_sync(unsigned long iova, size_t size, if (ret) { dev_warn(data->dev, "Partial TLB flush timed out, falling back to full flush\n"); - mtk_iommu_tlb_flush_all(cookie); + mtk_iommu_tlb_flush_all(data); } /* Clear the CPE status */ writel_relaxed(0, data->base + REG_MMU_CPE_DONE); spin_unlock_irqrestore(&data->tlb_lock, flags); + + if (has_pm) + pm_runtime_put(data->dev); } } -static void mtk_iommu_tlb_flush_page_nosync(struct iommu_iotlb_gather *gather, - unsigned long iova, size_t granule, - void *cookie) -{ - struct mtk_iommu_data *data = cookie; - struct iommu_domain *domain = &data->m4u_dom->domain; - - iommu_iotlb_gather_add_page(domain, gather, iova, granule); -} - -static const struct iommu_flush_ops mtk_iommu_flush_ops = { - .tlb_flush_all = mtk_iommu_tlb_flush_all, - .tlb_flush_walk = mtk_iommu_tlb_flush_range_sync, - .tlb_add_page = mtk_iommu_tlb_flush_page_nosync, -}; - static irqreturn_t mtk_iommu_isr(int irq, void *dev_id) { struct mtk_iommu_data *data = dev_id; struct mtk_iommu_domain *dom = data->m4u_dom; - u32 int_state, regval, fault_iova, fault_pa; unsigned int fault_larb, fault_port, sub_comm = 0; + u32 int_state, regval, va34_32, pa34_32; + u64 fault_iova, fault_pa; bool layer, write; /* Read error info from registers */ @@ -264,6 +289,14 @@ static irqreturn_t mtk_iommu_isr(int irq, void *dev_id) } layer = fault_iova & F_MMU_FAULT_VA_LAYER_BIT; write = fault_iova & F_MMU_FAULT_VA_WRITE_BIT; + if (MTK_IOMMU_HAS_FLAG(data->plat_data, IOVA_34_EN)) { + va34_32 = FIELD_GET(F_MMU_INVAL_VA_34_32_MASK, fault_iova); + pa34_32 = FIELD_GET(F_MMU_INVAL_PA_34_32_MASK, fault_iova); + fault_iova = fault_iova & F_MMU_INVAL_VA_31_12_MASK; + fault_iova |= (u64)va34_32 << 32; + fault_pa |= (u64)pa34_32 << 32; + } + fault_port = F_MMU_INT_ID_PORT_ID(regval); if (MTK_IOMMU_HAS_FLAG(data->plat_data, HAS_SUB_COMM)) { fault_larb = F_MMU_INT_ID_COMM_ID(regval); @@ -277,7 +310,7 @@ static irqreturn_t mtk_iommu_isr(int irq, void *dev_id) write ? IOMMU_FAULT_WRITE : IOMMU_FAULT_READ)) { dev_err_ratelimited( data->dev, - "fault type=0x%x iova=0x%x pa=0x%x larb=%d port=%d layer=%d %s\n", + "fault type=0x%x iova=0x%llx pa=0x%llx larb=%d port=%d layer=%d %s\n", int_state, fault_iova, fault_pa, fault_larb, fault_port, layer, write ? "write" : "read"); } @@ -292,21 +325,57 @@ static irqreturn_t mtk_iommu_isr(int irq, void *dev_id) return IRQ_HANDLED; } -static void mtk_iommu_config(struct mtk_iommu_data *data, - struct device *dev, bool enable) +static int mtk_iommu_get_domain_id(struct device *dev, + const struct mtk_iommu_plat_data *plat_data) +{ + const struct mtk_iommu_iova_region *rgn = plat_data->iova_region; + const struct bus_dma_region *dma_rgn = dev->dma_range_map; + int i, candidate = -1; + dma_addr_t dma_end; + + if (!dma_rgn || plat_data->iova_region_nr == 1) + return 0; + + dma_end = dma_rgn->dma_start + dma_rgn->size - 1; + for (i = 0; i < plat_data->iova_region_nr; i++, rgn++) { + /* Best fit. */ + if (dma_rgn->dma_start == rgn->iova_base && + dma_end == rgn->iova_base + rgn->size - 1) + return i; + /* ok if it is inside this region. */ + if (dma_rgn->dma_start >= rgn->iova_base && + dma_end < rgn->iova_base + rgn->size) + candidate = i; + } + + if (candidate >= 0) + return candidate; + dev_err(dev, "Can NOT find the iommu domain id(%pad 0x%llx).\n", + &dma_rgn->dma_start, dma_rgn->size); + return -EINVAL; +} + +static void mtk_iommu_config(struct mtk_iommu_data *data, struct device *dev, + bool enable, unsigned int domid) { struct mtk_smi_larb_iommu *larb_mmu; unsigned int larbid, portid; struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev); + const struct mtk_iommu_iova_region *region; int i; for (i = 0; i < fwspec->num_ids; ++i) { larbid = MTK_M4U_TO_LARB(fwspec->ids[i]); portid = MTK_M4U_TO_PORT(fwspec->ids[i]); + larb_mmu = &data->larb_imu[larbid]; - dev_dbg(dev, "%s iommu port: %d\n", - enable ? "enable" : "disable", portid); + region = data->plat_data->iova_region + domid; + larb_mmu->bank[portid] = upper_32_bits(region->iova_base); + + dev_dbg(dev, "%s iommu for larb(%s) port %d dom %d bank %d.\n", + enable ? "enable" : "disable", dev_name(larb_mmu->dev), + portid, domid, larb_mmu->bank[portid]); if (enable) larb_mmu->mmu |= MTK_SMI_MMU_EN(portid); @@ -315,22 +384,34 @@ static void mtk_iommu_config(struct mtk_iommu_data *data, } } -static int mtk_iommu_domain_finalise(struct mtk_iommu_domain *dom) +static int mtk_iommu_domain_finalise(struct mtk_iommu_domain *dom, + struct mtk_iommu_data *data, + unsigned int domid) { - struct mtk_iommu_data *data = mtk_iommu_get_m4u_data(); + const struct mtk_iommu_iova_region *region; + + /* Use the exist domain as there is only one pgtable here. */ + if (data->m4u_dom) { + dom->iop = data->m4u_dom->iop; + dom->cfg = data->m4u_dom->cfg; + dom->domain.pgsize_bitmap = data->m4u_dom->cfg.pgsize_bitmap; + goto update_iova_region; + } dom->cfg = (struct io_pgtable_cfg) { .quirks = IO_PGTABLE_QUIRK_ARM_NS | IO_PGTABLE_QUIRK_NO_PERMS | - IO_PGTABLE_QUIRK_TLBI_ON_MAP | IO_PGTABLE_QUIRK_ARM_MTK_EXT, .pgsize_bitmap = mtk_iommu_ops.pgsize_bitmap, - .ias = 32, - .oas = 34, - .tlb = &mtk_iommu_flush_ops, + .ias = MTK_IOMMU_HAS_FLAG(data->plat_data, IOVA_34_EN) ? 34 : 32, .iommu_dev = data->dev, }; + if (MTK_IOMMU_HAS_FLAG(data->plat_data, HAS_4GB_MODE)) + dom->cfg.oas = data->enable_4GB ? 33 : 32; + else + dom->cfg.oas = 35; + dom->iop = alloc_io_pgtable_ops(ARM_V7S, &dom->cfg, data); if (!dom->iop) { dev_err(data->dev, "Failed to alloc io pgtable\n"); @@ -339,6 +420,13 @@ static int mtk_iommu_domain_finalise(struct mtk_iommu_domain *dom) /* Update our support page sizes bitmap */ dom->domain.pgsize_bitmap = dom->cfg.pgsize_bitmap; + +update_iova_region: + /* Update the iova region for this domain */ + region = data->plat_data->iova_region + domid; + dom->domain.geometry.aperture_start = region->iova_base; + dom->domain.geometry.aperture_end = region->iova_base + region->size - 1; + dom->domain.geometry.force_aperture = true; return 0; } @@ -353,30 +441,16 @@ static struct iommu_domain *mtk_iommu_domain_alloc(unsigned type) if (!dom) return NULL; - if (iommu_get_dma_cookie(&dom->domain)) - goto free_dom; - - if (mtk_iommu_domain_finalise(dom)) - goto put_dma_cookie; - - dom->domain.geometry.aperture_start = 0; - dom->domain.geometry.aperture_end = DMA_BIT_MASK(32); - dom->domain.geometry.force_aperture = true; + if (iommu_get_dma_cookie(&dom->domain)) { + kfree(dom); + return NULL; + } return &dom->domain; - -put_dma_cookie: - iommu_put_dma_cookie(&dom->domain); -free_dom: - kfree(dom); - return NULL; } static void mtk_iommu_domain_free(struct iommu_domain *domain) { - struct mtk_iommu_domain *dom = to_mtk_domain(domain); - - free_io_pgtable_ops(dom->iop); iommu_put_dma_cookie(domain); kfree(to_mtk_domain(domain)); } @@ -386,18 +460,37 @@ static int mtk_iommu_attach_device(struct iommu_domain *domain, { struct mtk_iommu_data *data = dev_iommu_priv_get(dev); struct mtk_iommu_domain *dom = to_mtk_domain(domain); + struct device *m4udev = data->dev; + int ret, domid; - if (!data) - return -ENODEV; + domid = mtk_iommu_get_domain_id(dev, data->plat_data); + if (domid < 0) + return domid; - /* Update the pgtable base address register of the M4U HW */ - if (!data->m4u_dom) { + if (!dom->data) { + if (mtk_iommu_domain_finalise(dom, data, domid)) + return -ENODEV; + dom->data = data; + } + + if (!data->m4u_dom) { /* Initialize the M4U HW */ + ret = pm_runtime_resume_and_get(m4udev); + if (ret < 0) + return ret; + + ret = mtk_iommu_hw_init(data); + if (ret) { + pm_runtime_put(m4udev); + return ret; + } data->m4u_dom = dom; writel(dom->cfg.arm_v7s_cfg.ttbr & MMU_PT_ADDR_MASK, data->base + REG_MMU_PT_BASE_ADDR); + + pm_runtime_put(m4udev); } - mtk_iommu_config(data, dev, true); + mtk_iommu_config(data, dev, true, domid); return 0; } @@ -406,20 +499,16 @@ static void mtk_iommu_detach_device(struct iommu_domain *domain, { struct mtk_iommu_data *data = dev_iommu_priv_get(dev); - if (!data) - return; - - mtk_iommu_config(data, dev, false); + mtk_iommu_config(data, dev, false, 0); } static int mtk_iommu_map(struct iommu_domain *domain, unsigned long iova, phys_addr_t paddr, size_t size, int prot, gfp_t gfp) { struct mtk_iommu_domain *dom = to_mtk_domain(domain); - struct mtk_iommu_data *data = mtk_iommu_get_m4u_data(); /* The "4GB mode" M4U physically can not use the lower remap of Dram. */ - if (data->enable_4GB) + if (dom->data->enable_4GB) paddr |= BIT_ULL(32); /* Synchronize with the tlb_lock */ @@ -431,37 +520,48 @@ static size_t mtk_iommu_unmap(struct iommu_domain *domain, struct iommu_iotlb_gather *gather) { struct mtk_iommu_domain *dom = to_mtk_domain(domain); + unsigned long end = iova + size - 1; + if (gather->start > iova) + gather->start = iova; + if (gather->end < end) + gather->end = end; return dom->iop->unmap(dom->iop, iova, size, gather); } static void mtk_iommu_flush_iotlb_all(struct iommu_domain *domain) { - mtk_iommu_tlb_flush_all(mtk_iommu_get_m4u_data()); + struct mtk_iommu_domain *dom = to_mtk_domain(domain); + + mtk_iommu_tlb_flush_all(dom->data); } static void mtk_iommu_iotlb_sync(struct iommu_domain *domain, struct iommu_iotlb_gather *gather) { - struct mtk_iommu_data *data = mtk_iommu_get_m4u_data(); - size_t length = gather->end - gather->start; - - if (gather->start == ULONG_MAX) - return; + struct mtk_iommu_domain *dom = to_mtk_domain(domain); + size_t length = gather->end - gather->start + 1; mtk_iommu_tlb_flush_range_sync(gather->start, length, gather->pgsize, - data); + dom->data); +} + +static void mtk_iommu_sync_map(struct iommu_domain *domain, unsigned long iova, + size_t size) +{ + struct mtk_iommu_domain *dom = to_mtk_domain(domain); + + mtk_iommu_tlb_flush_range_sync(iova, size, size, dom->data); } static phys_addr_t mtk_iommu_iova_to_phys(struct iommu_domain *domain, dma_addr_t iova) { struct mtk_iommu_domain *dom = to_mtk_domain(domain); - struct mtk_iommu_data *data = mtk_iommu_get_m4u_data(); phys_addr_t pa; pa = dom->iop->iova_to_phys(dom->iop, iova); - if (data->enable_4GB && pa >= MTK_IOMMU_4GB_MODE_REMAP_BASE) + if (dom->data->enable_4GB && pa >= MTK_IOMMU_4GB_MODE_REMAP_BASE) pa &= ~BIT_ULL(32); return pa; @@ -493,19 +593,25 @@ static void mtk_iommu_release_device(struct device *dev) static struct iommu_group *mtk_iommu_device_group(struct device *dev) { struct mtk_iommu_data *data = mtk_iommu_get_m4u_data(); + struct iommu_group *group; + int domid; if (!data) return ERR_PTR(-ENODEV); - /* All the client devices are in the same m4u iommu-group */ - if (!data->m4u_group) { - data->m4u_group = iommu_group_alloc(); - if (IS_ERR(data->m4u_group)) - dev_err(dev, "Failed to allocate M4U IOMMU group\n"); + domid = mtk_iommu_get_domain_id(dev, data->plat_data); + if (domid < 0) + return ERR_PTR(domid); + + group = data->m4u_group[domid]; + if (!group) { + group = iommu_group_alloc(); + if (!IS_ERR(group)) + data->m4u_group[domid] = group; } else { - iommu_group_ref_get(data->m4u_group); + iommu_group_ref_get(group); } - return data->m4u_group; + return group; } static int mtk_iommu_of_xlate(struct device *dev, struct of_phandle_args *args) @@ -530,6 +636,35 @@ static int mtk_iommu_of_xlate(struct device *dev, struct of_phandle_args *args) return iommu_fwspec_add_ids(dev, args->args, 1); } +static void mtk_iommu_get_resv_regions(struct device *dev, + struct list_head *head) +{ + struct mtk_iommu_data *data = dev_iommu_priv_get(dev); + unsigned int domid = mtk_iommu_get_domain_id(dev, data->plat_data), i; + const struct mtk_iommu_iova_region *resv, *curdom; + struct iommu_resv_region *region; + int prot = IOMMU_WRITE | IOMMU_READ; + + if ((int)domid < 0) + return; + curdom = data->plat_data->iova_region + domid; + for (i = 0; i < data->plat_data->iova_region_nr; i++) { + resv = data->plat_data->iova_region + i; + + /* Only reserve when the region is inside the current domain */ + if (resv->iova_base <= curdom->iova_base || + resv->iova_base + resv->size >= curdom->iova_base + curdom->size) + continue; + + region = iommu_alloc_resv_region(resv->iova_base, resv->size, + prot, IOMMU_RESV_RESERVED); + if (!region) + return; + + list_add_tail(®ion->list, head); + } +} + static const struct iommu_ops mtk_iommu_ops = { .domain_alloc = mtk_iommu_domain_alloc, .domain_free = mtk_iommu_domain_free, @@ -539,11 +674,14 @@ static const struct iommu_ops mtk_iommu_ops = { .unmap = mtk_iommu_unmap, .flush_iotlb_all = mtk_iommu_flush_iotlb_all, .iotlb_sync = mtk_iommu_iotlb_sync, + .iotlb_sync_map = mtk_iommu_sync_map, .iova_to_phys = mtk_iommu_iova_to_phys, .probe_device = mtk_iommu_probe_device, .release_device = mtk_iommu_release_device, .device_group = mtk_iommu_device_group, .of_xlate = mtk_iommu_of_xlate, + .get_resv_regions = mtk_iommu_get_resv_regions, + .put_resv_regions = generic_iommu_put_resv_regions, .pgsize_bitmap = SZ_4K | SZ_64K | SZ_1M | SZ_16M, }; @@ -639,6 +777,9 @@ static int mtk_iommu_probe(struct platform_device *pdev) { struct mtk_iommu_data *data; struct device *dev = &pdev->dev; + struct device_node *larbnode, *smicomm_node; + struct platform_device *plarbdev; + struct device_link *link; struct resource *res; resource_size_t ioaddr; struct component_match *match = NULL; @@ -705,8 +846,6 @@ static int mtk_iommu_probe(struct platform_device *pdev) return larb_nr; for (i = 0; i < larb_nr; i++) { - struct device_node *larbnode; - struct platform_device *plarbdev; u32 id; larbnode = of_parse_phandle(dev->of_node, "mediatek,larbs", i); @@ -733,31 +872,65 @@ static int mtk_iommu_probe(struct platform_device *pdev) compare_of, larbnode); } - platform_set_drvdata(pdev, data); + /* Get smi-common dev from the last larb. */ + smicomm_node = of_parse_phandle(larbnode, "mediatek,smi", 0); + if (!smicomm_node) + return -EINVAL; - ret = mtk_iommu_hw_init(data); - if (ret) - return ret; + plarbdev = of_find_device_by_node(smicomm_node); + of_node_put(smicomm_node); + data->smicomm_dev = &plarbdev->dev; + + pm_runtime_enable(dev); + + link = device_link_add(data->smicomm_dev, dev, + DL_FLAG_STATELESS | DL_FLAG_PM_RUNTIME); + if (!link) { + dev_err(dev, "Unable to link %s.\n", dev_name(data->smicomm_dev)); + ret = -EINVAL; + goto out_runtime_disable; + } + + platform_set_drvdata(pdev, data); ret = iommu_device_sysfs_add(&data->iommu, dev, NULL, "mtk-iommu.%pa", &ioaddr); if (ret) - return ret; + goto out_link_remove; iommu_device_set_ops(&data->iommu, &mtk_iommu_ops); iommu_device_set_fwnode(&data->iommu, &pdev->dev.of_node->fwnode); ret = iommu_device_register(&data->iommu); if (ret) - return ret; + goto out_sysfs_remove; spin_lock_init(&data->tlb_lock); list_add_tail(&data->list, &m4ulist); - if (!iommu_present(&platform_bus_type)) - bus_set_iommu(&platform_bus_type, &mtk_iommu_ops); + if (!iommu_present(&platform_bus_type)) { + ret = bus_set_iommu(&platform_bus_type, &mtk_iommu_ops); + if (ret) + goto out_list_del; + } - return component_master_add_with_match(dev, &mtk_iommu_com_ops, match); + ret = component_master_add_with_match(dev, &mtk_iommu_com_ops, match); + if (ret) + goto out_bus_set_null; + return ret; + +out_bus_set_null: + bus_set_iommu(&platform_bus_type, NULL); +out_list_del: + list_del(&data->list); + iommu_device_unregister(&data->iommu); +out_sysfs_remove: + iommu_device_sysfs_remove(&data->iommu); +out_link_remove: + device_link_remove(data->smicomm_dev, dev); +out_runtime_disable: + pm_runtime_disable(dev); + return ret; } static int mtk_iommu_remove(struct platform_device *pdev) @@ -771,12 +944,14 @@ static int mtk_iommu_remove(struct platform_device *pdev) bus_set_iommu(&platform_bus_type, NULL); clk_disable_unprepare(data->bclk); + device_link_remove(data->smicomm_dev, &pdev->dev); + pm_runtime_disable(&pdev->dev); devm_free_irq(&pdev->dev, data->irq, data); component_master_del(&pdev->dev, &mtk_iommu_com_ops); return 0; } -static int __maybe_unused mtk_iommu_suspend(struct device *dev) +static int __maybe_unused mtk_iommu_runtime_suspend(struct device *dev) { struct mtk_iommu_data *data = dev_get_drvdata(dev); struct mtk_iommu_suspend_reg *reg = &data->reg; @@ -794,7 +969,7 @@ static int __maybe_unused mtk_iommu_suspend(struct device *dev) return 0; } -static int __maybe_unused mtk_iommu_resume(struct device *dev) +static int __maybe_unused mtk_iommu_runtime_resume(struct device *dev) { struct mtk_iommu_data *data = dev_get_drvdata(dev); struct mtk_iommu_suspend_reg *reg = &data->reg; @@ -802,6 +977,9 @@ static int __maybe_unused mtk_iommu_resume(struct device *dev) void __iomem *base = data->base; int ret; + /* Avoid first resume to affect the default value of registers below. */ + if (!m4u_dom) + return 0; ret = clk_prepare_enable(data->bclk); if (ret) { dev_err(data->dev, "Failed to enable clk(%d) in resume\n", ret); @@ -815,20 +993,22 @@ static int __maybe_unused mtk_iommu_resume(struct device *dev) writel_relaxed(reg->int_main_control, base + REG_MMU_INT_MAIN_CONTROL); writel_relaxed(reg->ivrp_paddr, base + REG_MMU_IVRP_PADDR); writel_relaxed(reg->vld_pa_rng, base + REG_MMU_VLD_PA_RNG); - if (m4u_dom) - writel(m4u_dom->cfg.arm_v7s_cfg.ttbr & MMU_PT_ADDR_MASK, - base + REG_MMU_PT_BASE_ADDR); + writel(m4u_dom->cfg.arm_v7s_cfg.ttbr & MMU_PT_ADDR_MASK, base + REG_MMU_PT_BASE_ADDR); return 0; } static const struct dev_pm_ops mtk_iommu_pm_ops = { - SET_NOIRQ_SYSTEM_SLEEP_PM_OPS(mtk_iommu_suspend, mtk_iommu_resume) + SET_RUNTIME_PM_OPS(mtk_iommu_runtime_suspend, mtk_iommu_runtime_resume, NULL) + SET_LATE_SYSTEM_SLEEP_PM_OPS(pm_runtime_force_suspend, + pm_runtime_force_resume) }; static const struct mtk_iommu_plat_data mt2712_data = { .m4u_plat = M4U_MT2712, .flags = HAS_4GB_MODE | HAS_BCLK | HAS_VLD_PA_RNG, .inv_sel_reg = REG_MMU_INV_SEL_GEN1, + .iova_region = single_domain, + .iova_region_nr = ARRAY_SIZE(single_domain), .larbid_remap = {{0}, {1}, {2}, {3}, {4}, {5}, {6}, {7}}, }; @@ -836,6 +1016,8 @@ static const struct mtk_iommu_plat_data mt6779_data = { .m4u_plat = M4U_MT6779, .flags = HAS_SUB_COMM | OUT_ORDER_WR_EN | WR_THROT_EN, .inv_sel_reg = REG_MMU_INV_SEL_GEN2, + .iova_region = single_domain, + .iova_region_nr = ARRAY_SIZE(single_domain), .larbid_remap = {{0}, {1}, {2}, {3}, {5}, {7, 8}, {10}, {9}}, }; @@ -843,6 +1025,8 @@ static const struct mtk_iommu_plat_data mt8167_data = { .m4u_plat = M4U_MT8167, .flags = RESET_AXI | HAS_LEGACY_IVRP_PADDR, .inv_sel_reg = REG_MMU_INV_SEL_GEN1, + .iova_region = single_domain, + .iova_region_nr = ARRAY_SIZE(single_domain), .larbid_remap = {{0}, {1}, {2}}, /* Linear mapping. */ }; @@ -851,6 +1035,8 @@ static const struct mtk_iommu_plat_data mt8173_data = { .flags = HAS_4GB_MODE | HAS_BCLK | RESET_AXI | HAS_LEGACY_IVRP_PADDR, .inv_sel_reg = REG_MMU_INV_SEL_GEN1, + .iova_region = single_domain, + .iova_region_nr = ARRAY_SIZE(single_domain), .larbid_remap = {{0}, {1}, {2}, {3}, {4}, {5}}, /* Linear mapping. */ }; @@ -858,15 +1044,29 @@ static const struct mtk_iommu_plat_data mt8183_data = { .m4u_plat = M4U_MT8183, .flags = RESET_AXI, .inv_sel_reg = REG_MMU_INV_SEL_GEN1, + .iova_region = single_domain, + .iova_region_nr = ARRAY_SIZE(single_domain), .larbid_remap = {{0}, {4}, {5}, {6}, {7}, {2}, {3}, {1}}, }; +static const struct mtk_iommu_plat_data mt8192_data = { + .m4u_plat = M4U_MT8192, + .flags = HAS_BCLK | HAS_SUB_COMM | OUT_ORDER_WR_EN | + WR_THROT_EN | IOVA_34_EN, + .inv_sel_reg = REG_MMU_INV_SEL_GEN2, + .iova_region = mt8192_multi_dom, + .iova_region_nr = ARRAY_SIZE(mt8192_multi_dom), + .larbid_remap = {{0}, {1}, {4, 5}, {7}, {2}, {9, 11, 19, 20}, + {0, 14, 16}, {0, 13, 18, 17}}, +}; + static const struct of_device_id mtk_iommu_of_ids[] = { { .compatible = "mediatek,mt2712-m4u", .data = &mt2712_data}, { .compatible = "mediatek,mt6779-m4u", .data = &mt6779_data}, { .compatible = "mediatek,mt8167-m4u", .data = &mt8167_data}, { .compatible = "mediatek,mt8173-m4u", .data = &mt8173_data}, { .compatible = "mediatek,mt8183-m4u", .data = &mt8183_data}, + { .compatible = "mediatek,mt8192-m4u", .data = &mt8192_data}, {} }; diff --git a/drivers/iommu/mtk_iommu.h b/drivers/iommu/mtk_iommu.h index df32b3e3408b..f81fa8862ed0 100644 --- a/drivers/iommu/mtk_iommu.h +++ b/drivers/iommu/mtk_iommu.h @@ -17,10 +17,13 @@ #include #include #include +#include #define MTK_LARB_COM_MAX 8 #define MTK_LARB_SUBCOM_MAX 4 +#define MTK_IOMMU_GROUP_MAX 8 + struct mtk_iommu_suspend_reg { union { u32 standard_axi_mode;/* v1 */ @@ -42,12 +45,18 @@ enum mtk_iommu_plat { M4U_MT8167, M4U_MT8173, M4U_MT8183, + M4U_MT8192, }; +struct mtk_iommu_iova_region; + struct mtk_iommu_plat_data { enum mtk_iommu_plat m4u_plat; u32 flags; u32 inv_sel_reg; + + unsigned int iova_region_nr; + const struct mtk_iommu_iova_region *iova_region; unsigned char larbid_remap[MTK_LARB_COM_MAX][MTK_LARB_SUBCOM_MAX]; }; @@ -61,12 +70,13 @@ struct mtk_iommu_data { phys_addr_t protect_base; /* protect memory base */ struct mtk_iommu_suspend_reg reg; struct mtk_iommu_domain *m4u_dom; - struct iommu_group *m4u_group; + struct iommu_group *m4u_group[MTK_IOMMU_GROUP_MAX]; bool enable_4GB; spinlock_t tlb_lock; /* lock for tlb range flush */ struct iommu_device iommu; const struct mtk_iommu_plat_data *plat_data; + struct device *smicomm_dev; struct dma_iommu_mapping *mapping; /* For mtk_iommu_v1.c */ diff --git a/drivers/iommu/tegra-gart.c b/drivers/iommu/tegra-gart.c index fac720273889..6f130e51f072 100644 --- a/drivers/iommu/tegra-gart.c +++ b/drivers/iommu/tegra-gart.c @@ -261,7 +261,8 @@ static int gart_iommu_of_xlate(struct device *dev, return 0; } -static void gart_iommu_sync_map(struct iommu_domain *domain) +static void gart_iommu_sync_map(struct iommu_domain *domain, unsigned long iova, + size_t size) { FLUSH_GART_REGS(gart_handle); } @@ -269,7 +270,9 @@ static void gart_iommu_sync_map(struct iommu_domain *domain) static void gart_iommu_sync(struct iommu_domain *domain, struct iommu_iotlb_gather *gather) { - gart_iommu_sync_map(domain); + size_t length = gather->end - gather->start + 1; + + gart_iommu_sync_map(domain, gather->start, length); } static const struct iommu_ops gart_iommu_ops = { diff --git a/drivers/memory/mtk-smi.c b/drivers/memory/mtk-smi.c index 40c02d7315f6..b396253fcf4b 100644 --- a/drivers/memory/mtk-smi.c +++ b/drivers/memory/mtk-smi.c @@ -15,6 +15,7 @@ #include #include #include +#include /* mt8173 */ #define SMI_LARB_MMU_EN 0xf00 @@ -43,6 +44,10 @@ /* mt2712 */ #define SMI_LARB_NONSEC_CON(id) (0x380 + ((id) * 4)) #define F_MMU_EN BIT(0) +#define BANK_SEL(id) ({ \ + u32 _id = (id) & 0x3; \ + (_id << 8 | _id << 10 | _id << 12 | _id << 14); \ +}) /* SMI COMMON */ #define SMI_BUS_SEL 0x220 @@ -87,6 +92,7 @@ struct mtk_smi_larb { /* larb: local arbiter */ const struct mtk_smi_larb_gen *larb_gen; int larbid; u32 *mmu; + unsigned char *bank; }; static int mtk_smi_clk_enable(const struct mtk_smi *smi) @@ -153,6 +159,7 @@ mtk_smi_larb_bind(struct device *dev, struct device *master, void *data) if (dev == larb_mmu[i].dev) { larb->larbid = i; larb->mmu = &larb_mmu[i].mmu; + larb->bank = larb_mmu[i].bank; return 0; } } @@ -171,6 +178,7 @@ static void mtk_smi_larb_config_port_gen2_general(struct device *dev) for_each_set_bit(i, (unsigned long *)larb->mmu, 32) { reg = readl_relaxed(larb->base + SMI_LARB_NONSEC_CON(i)); reg |= F_MMU_EN; + reg |= BANK_SEL(larb->bank[i]); writel(reg, larb->base + SMI_LARB_NONSEC_CON(i)); } } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mr.c b/drivers/net/ethernet/mellanox/mlx5/core/mr.c index 9eb51f06d3ae..50af84e76fb6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/mr.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/mr.c @@ -56,6 +56,7 @@ int mlx5_core_create_mkey(struct mlx5_core_dev *dev, mkey->size = MLX5_GET64(mkc, mkc, len); mkey->key |= mlx5_idx_to_mkey(mkey_index); mkey->pd = MLX5_GET(mkc, mkc, pd); + init_waitqueue_head(&mkey->wait); mlx5_core_dbg(dev, "out 0x%x, mkey 0x%x\n", mkey_index, mkey->key); return 0; diff --git a/drivers/parport/share.c b/drivers/parport/share.c index 7fec4fefe151..62f8407923d4 100644 --- a/drivers/parport/share.c +++ b/drivers/parport/share.c @@ -243,7 +243,7 @@ static int port_detect(struct device *dev, void *dev_drv) } /** - * parport_register_driver - register a parallel port device driver + * __parport_register_driver - register a parallel port device driver * @drv: structure describing the driver * @owner: owner module of drv * @mod_name: module name string diff --git a/drivers/perf/Kconfig b/drivers/perf/Kconfig index 3075cf171f78..77522e5efe11 100644 --- a/drivers/perf/Kconfig +++ b/drivers/perf/Kconfig @@ -62,7 +62,7 @@ config ARM_PMU_ACPI config ARM_SMMU_V3_PMU tristate "ARM SMMUv3 Performance Monitors Extension" - depends on ARM64 && ACPI && ARM_SMMU_V3 + depends on ARM64 && ACPI help Provides support for the ARM SMMUv3 Performance Monitor Counter Groups (PMCG), which provide monitoring of transactions passing diff --git a/drivers/rapidio/rio.c b/drivers/rapidio/rio.c index c2b79736a92b..e74cf09eeff0 100644 --- a/drivers/rapidio/rio.c +++ b/drivers/rapidio/rio.c @@ -749,7 +749,7 @@ int rio_map_outb_region(struct rio_mport *mport, u16 destid, u64 rbase, EXPORT_SYMBOL_GPL(rio_map_outb_region); /** - * rio_unmap_inb_region -- Unmap the inbound memory region + * rio_unmap_outb_region -- Unmap the inbound memory region * @mport: Master port * @destid: destination id mapping points to * @rstart: RIO base address window translates to diff --git a/fs/dcache.c b/fs/dcache.c index 97e81a844a96..c17fd15b01d4 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -456,23 +456,6 @@ static void d_lru_shrink_move(struct list_lru_one *lru, struct dentry *dentry, list_lru_isolate_move(lru, &dentry->d_lru, list); } -/** - * d_drop - drop a dentry - * @dentry: dentry to drop - * - * d_drop() unhashes the entry from the parent dentry hashes, so that it won't - * be found through a VFS lookup any more. Note that this is different from - * deleting the dentry - d_delete will try to mark the dentry negative if - * possible, giving a successful _negative_ lookup, while d_drop will - * just make the cache lookup fail. - * - * d_drop() is used mainly for stuff that wants to invalidate a dentry for some - * reason (NFS timeouts or autofs deletes). - * - * __d_drop requires dentry->d_lock - * ___d_drop doesn't mark dentry as "unhashed" - * (dentry->d_hash.pprev will be LIST_POISON2, not NULL). - */ static void ___d_drop(struct dentry *dentry) { struct hlist_bl_head *b; @@ -501,6 +484,24 @@ void __d_drop(struct dentry *dentry) } EXPORT_SYMBOL(__d_drop); +/** + * d_drop - drop a dentry + * @dentry: dentry to drop + * + * d_drop() unhashes the entry from the parent dentry hashes, so that it won't + * be found through a VFS lookup any more. Note that this is different from + * deleting the dentry - d_delete will try to mark the dentry negative if + * possible, giving a successful _negative_ lookup, while d_drop will + * just make the cache lookup fail. + * + * d_drop() is used mainly for stuff that wants to invalidate a dentry for some + * reason (NFS timeouts or autofs deletes). + * + * __d_drop requires dentry->d_lock + * + * ___d_drop doesn't mark dentry as "unhashed" + * (dentry->d_hash.pprev will be LIST_POISON2, not NULL). + */ void d_drop(struct dentry *dentry) { spin_lock(&dentry->d_lock); @@ -996,20 +997,6 @@ struct dentry *d_find_any_alias(struct inode *inode) } EXPORT_SYMBOL(d_find_any_alias); -/** - * d_find_alias - grab a hashed alias of inode - * @inode: inode in question - * - * If inode has a hashed alias, or is a directory and has any alias, - * acquire the reference to alias and return it. Otherwise return NULL. - * Notice that if inode is a directory there can be only one alias and - * it can be unhashed only if it has no children, or if it is the root - * of a filesystem, or if the directory was renamed and d_revalidate - * was the first vfs operation to notice. - * - * If the inode has an IS_ROOT, DCACHE_DISCONNECTED alias, then prefer - * any other hashed alias over that one. - */ static struct dentry *__d_find_alias(struct inode *inode) { struct dentry *alias; @@ -1029,6 +1016,20 @@ static struct dentry *__d_find_alias(struct inode *inode) return NULL; } +/** + * d_find_alias - grab a hashed alias of inode + * @inode: inode in question + * + * If inode has a hashed alias, or is a directory and has any alias, + * acquire the reference to alias and return it. Otherwise return NULL. + * Notice that if inode is a directory there can be only one alias and + * it can be unhashed only if it has no children, or if it is the root + * of a filesystem, or if the directory was renamed and d_revalidate + * was the first vfs operation to notice. + * + * If the inode has an IS_ROOT, DCACHE_DISCONNECTED alias, then prefer + * any other hashed alias over that one. + */ struct dentry *d_find_alias(struct inode *inode) { struct dentry *de = NULL; diff --git a/fs/inode.c b/fs/inode.c index 6442d97d9a4a..1dc9e032f659 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -1493,7 +1493,7 @@ struct inode *find_inode_rcu(struct super_block *sb, unsigned long hashval, EXPORT_SYMBOL(find_inode_rcu); /** - * find_inode_by_rcu - Find an inode in the inode cache + * find_inode_by_ino_rcu - Find an inode in the inode cache * @sb: Super block of file system to search * @ino: The inode number to match * @@ -1777,7 +1777,7 @@ static int update_time(struct inode *inode, struct timespec64 *time, int flags) } /** - * touch_atime - update the access time + * atime_needs_update - update the access time * @path: the &struct path to update * @inode: inode to update * diff --git a/fs/pstore/zone.c b/fs/pstore/zone.c index 5266ccbec007..7c8f8feac6c3 100644 --- a/fs/pstore/zone.c +++ b/fs/pstore/zone.c @@ -23,7 +23,7 @@ #include "internal.h" /** - * struct psz_head - header of zone to flush to storage + * struct psz_buffer - header of zone to flush to storage * * @sig: signature to indicate header (PSZ_SIG xor PSZONE-type value) * @datalen: length of data in @data diff --git a/fs/seq_file.c b/fs/seq_file.c index 03a369ccd28c..cb11a34fb871 100644 --- a/fs/seq_file.c +++ b/fs/seq_file.c @@ -669,7 +669,8 @@ void seq_puts(struct seq_file *m, const char *s) EXPORT_SYMBOL(seq_puts); /** - * A helper routine for putting decimal numbers without rich format of printf(). + * seq_put_decimal_ull_width - A helper routine for putting decimal numbers + * without rich format of printf(). * only 'unsigned long long' is supported. * @m: seq_file identifying the buffer to which data should be written * @delimiter: a string which is printed before the number @@ -1044,7 +1045,7 @@ struct hlist_node *seq_hlist_next_rcu(void *v, EXPORT_SYMBOL(seq_hlist_next_rcu); /** - * seq_hlist_start_precpu - start an iteration of a percpu hlist array + * seq_hlist_start_percpu - start an iteration of a percpu hlist array * @head: pointer to percpu array of struct hlist_heads * @cpu: pointer to cpu "cursor" * @pos: start position of sequence diff --git a/fs/super.c b/fs/super.c index 5a1f384ffc74..8c1baca35c16 100644 --- a/fs/super.c +++ b/fs/super.c @@ -1719,12 +1719,6 @@ int freeze_super(struct super_block *sb) } EXPORT_SYMBOL(freeze_super); -/** - * thaw_super -- unlock filesystem - * @sb: the super to thaw - * - * Unlocks the filesystem and marks it writeable again after freeze_super(). - */ static int thaw_super_locked(struct super_block *sb) { int error; @@ -1760,6 +1754,12 @@ static int thaw_super_locked(struct super_block *sb) return 0; } +/** + * thaw_super -- unlock filesystem + * @sb: the super to thaw + * + * Unlocks the filesystem and marks it writeable again after freeze_super(). + */ int thaw_super(struct super_block *sb) { down_write(&sb->s_umount); diff --git a/include/acpi/actbl1.h b/include/acpi/actbl1.h index ea1c2998d54e..af0a8c3b87b7 100644 --- a/include/acpi/actbl1.h +++ b/include/acpi/actbl1.h @@ -514,7 +514,8 @@ enum acpi_dmar_type { ACPI_DMAR_TYPE_ROOT_ATS = 2, ACPI_DMAR_TYPE_HARDWARE_AFFINITY = 3, ACPI_DMAR_TYPE_NAMESPACE = 4, - ACPI_DMAR_TYPE_RESERVED = 5 /* 5 and greater are reserved */ + ACPI_DMAR_TYPE_SATC = 5, + ACPI_DMAR_TYPE_RESERVED = 6 /* 6 and greater are reserved */ }; /* DMAR Device Scope structure */ @@ -607,6 +608,14 @@ struct acpi_dmar_andd { char device_name[1]; }; +/* 5: SOC Integrated Address Translation Cache Reporting Structure */ + +struct acpi_dmar_satc { + struct acpi_dmar_header header; + u8 flags; + u8 reserved; + u16 segment; +}; /******************************************************************************* * * DRTM - Dynamic Root of Trust for Measurement table diff --git a/include/dt-bindings/memory/mt2701-larb-port.h b/include/dt-bindings/memory/mt2701-larb-port.h index 2d85c2ec6cfd..25d03526f142 100644 --- a/include/dt-bindings/memory/mt2701-larb-port.h +++ b/include/dt-bindings/memory/mt2701-larb-port.h @@ -4,8 +4,8 @@ * Author: Honghui Zhang */ -#ifndef _MT2701_LARB_PORT_H_ -#define _MT2701_LARB_PORT_H_ +#ifndef _DT_BINDINGS_MEMORY_MT2701_LARB_PORT_H_ +#define _DT_BINDINGS_MEMORY_MT2701_LARB_PORT_H_ /* * Mediatek m4u generation 1 such as mt2701 has flat m4u port numbers, diff --git a/include/dt-bindings/memory/mt2712-larb-port.h b/include/dt-bindings/memory/mt2712-larb-port.h index 6f9aa7349cef..e41a2841bcff 100644 --- a/include/dt-bindings/memory/mt2712-larb-port.h +++ b/include/dt-bindings/memory/mt2712-larb-port.h @@ -3,10 +3,10 @@ * Copyright (c) 2017 MediaTek Inc. * Author: Yong Wu */ -#ifndef __DTS_IOMMU_PORT_MT2712_H -#define __DTS_IOMMU_PORT_MT2712_H +#ifndef _DT_BINDINGS_MEMORY_MT2712_LARB_PORT_H_ +#define _DT_BINDINGS_MEMORY_MT2712_LARB_PORT_H_ -#define MTK_M4U_ID(larb, port) (((larb) << 5) | (port)) +#include #define M4U_LARB0_ID 0 #define M4U_LARB1_ID 1 diff --git a/include/dt-bindings/memory/mt6779-larb-port.h b/include/dt-bindings/memory/mt6779-larb-port.h index 2ad0899fbf2f..3fb438a96e35 100644 --- a/include/dt-bindings/memory/mt6779-larb-port.h +++ b/include/dt-bindings/memory/mt6779-larb-port.h @@ -4,10 +4,10 @@ * Author: Chao Hao */ -#ifndef _DTS_IOMMU_PORT_MT6779_H_ -#define _DTS_IOMMU_PORT_MT6779_H_ +#ifndef _DT_BINDINGS_MEMORY_MT6779_LARB_PORT_H_ +#define _DT_BINDINGS_MEMORY_MT6779_LARB_PORT_H_ -#define MTK_M4U_ID(larb, port) (((larb) << 5) | (port)) +#include #define M4U_LARB0_ID 0 #define M4U_LARB1_ID 1 diff --git a/include/dt-bindings/memory/mt8167-larb-port.h b/include/dt-bindings/memory/mt8167-larb-port.h index 000fb299a408..aae57d4824ca 100644 --- a/include/dt-bindings/memory/mt8167-larb-port.h +++ b/include/dt-bindings/memory/mt8167-larb-port.h @@ -5,10 +5,10 @@ * Author: Honghui Zhang * Author: Fabien Parent */ -#ifndef __DTS_IOMMU_PORT_MT8167_H -#define __DTS_IOMMU_PORT_MT8167_H +#ifndef _DT_BINDINGS_MEMORY_MT8167_LARB_PORT_H_ +#define _DT_BINDINGS_MEMORY_MT8167_LARB_PORT_H_ -#define MTK_M4U_ID(larb, port) (((larb) << 5) | (port)) +#include #define M4U_LARB0_ID 0 #define M4U_LARB1_ID 1 diff --git a/include/dt-bindings/memory/mt8173-larb-port.h b/include/dt-bindings/memory/mt8173-larb-port.h index 9f31ccfeca21..167a7fc51868 100644 --- a/include/dt-bindings/memory/mt8173-larb-port.h +++ b/include/dt-bindings/memory/mt8173-larb-port.h @@ -3,10 +3,10 @@ * Copyright (c) 2015-2016 MediaTek Inc. * Author: Yong Wu */ -#ifndef __DTS_IOMMU_PORT_MT8173_H -#define __DTS_IOMMU_PORT_MT8173_H +#ifndef _DT_BINDINGS_MEMORY_MT8173_LARB_PORT_H_ +#define _DT_BINDINGS_MEMORY_MT8173_LARB_PORT_H_ -#define MTK_M4U_ID(larb, port) (((larb) << 5) | (port)) +#include #define M4U_LARB0_ID 0 #define M4U_LARB1_ID 1 diff --git a/include/dt-bindings/memory/mt8183-larb-port.h b/include/dt-bindings/memory/mt8183-larb-port.h index 2c579f305162..36abdf0ce5a2 100644 --- a/include/dt-bindings/memory/mt8183-larb-port.h +++ b/include/dt-bindings/memory/mt8183-larb-port.h @@ -3,10 +3,10 @@ * Copyright (c) 2018 MediaTek Inc. * Author: Yong Wu */ -#ifndef __DTS_IOMMU_PORT_MT8183_H -#define __DTS_IOMMU_PORT_MT8183_H +#ifndef _DT_BINDINGS_MEMORY_MT8183_LARB_PORT_H_ +#define _DT_BINDINGS_MEMORY_MT8183_LARB_PORT_H_ -#define MTK_M4U_ID(larb, port) (((larb) << 5) | (port)) +#include #define M4U_LARB0_ID 0 #define M4U_LARB1_ID 1 diff --git a/include/dt-bindings/memory/mt8192-larb-port.h b/include/dt-bindings/memory/mt8192-larb-port.h new file mode 100644 index 000000000000..23035a52c675 --- /dev/null +++ b/include/dt-bindings/memory/mt8192-larb-port.h @@ -0,0 +1,243 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2020 MediaTek Inc. + * + * Author: Chao Hao + * Author: Yong Wu + */ +#ifndef _DT_BINDINGS_MEMORY_MT8192_LARB_PORT_H_ +#define _DT_BINDINGS_MEMORY_MT8192_LARB_PORT_H_ + +#include + +/* + * MM IOMMU supports 16GB dma address. + * + * The address will preassign like this: + * + * modules dma-address-region larbs-ports + * disp 0 ~ 4G larb0/1 + * vcodec 4G ~ 8G larb4/5/7 + * cam/mdp 8G ~ 12G larb2/9/11/13/14/16/17/18/19/20 + * CCU0 0x4000_0000 ~ 0x43ff_ffff larb13: port 9/10 + * CCU1 0x4400_0000 ~ 0x47ff_ffff larb14: port 4/5 + * + * larb3/6/8/10/12/15 is null. + */ + +/* larb0 */ +#define M4U_PORT_L0_DISP_POSTMASK0 MTK_M4U_ID(0, 0) +#define M4U_PORT_L0_OVL_RDMA0_HDR MTK_M4U_ID(0, 1) +#define M4U_PORT_L0_OVL_RDMA0 MTK_M4U_ID(0, 2) +#define M4U_PORT_L0_DISP_RDMA0 MTK_M4U_ID(0, 3) +#define M4U_PORT_L0_DISP_WDMA0 MTK_M4U_ID(0, 4) +#define M4U_PORT_L0_DISP_FAKE0 MTK_M4U_ID(0, 5) + +/* larb1 */ +#define M4U_PORT_L1_OVL_2L_RDMA0_HDR MTK_M4U_ID(1, 0) +#define M4U_PORT_L1_OVL_2L_RDMA2_HDR MTK_M4U_ID(1, 1) +#define M4U_PORT_L1_OVL_2L_RDMA0 MTK_M4U_ID(1, 2) +#define M4U_PORT_L1_OVL_2L_RDMA2 MTK_M4U_ID(1, 3) +#define M4U_PORT_L1_DISP_MDP_RDMA4 MTK_M4U_ID(1, 4) +#define M4U_PORT_L1_DISP_RDMA4 MTK_M4U_ID(1, 5) +#define M4U_PORT_L1_DISP_UFBC_WDMA0 MTK_M4U_ID(1, 6) +#define M4U_PORT_L1_DISP_FAKE1 MTK_M4U_ID(1, 7) + +/* larb2 */ +#define M4U_PORT_L2_MDP_RDMA0 MTK_M4U_ID(2, 0) +#define M4U_PORT_L2_MDP_RDMA1 MTK_M4U_ID(2, 1) +#define M4U_PORT_L2_MDP_WROT0 MTK_M4U_ID(2, 2) +#define M4U_PORT_L2_MDP_WROT1 MTK_M4U_ID(2, 3) +#define M4U_PORT_L2_MDP_DISP_FAKE0 MTK_M4U_ID(2, 4) + +/* larb3: null */ + +/* larb4 */ +#define M4U_PORT_L4_VDEC_MC_EXT MTK_M4U_ID(4, 0) +#define M4U_PORT_L4_VDEC_UFO_EXT MTK_M4U_ID(4, 1) +#define M4U_PORT_L4_VDEC_PP_EXT MTK_M4U_ID(4, 2) +#define M4U_PORT_L4_VDEC_PRED_RD_EXT MTK_M4U_ID(4, 3) +#define M4U_PORT_L4_VDEC_PRED_WR_EXT MTK_M4U_ID(4, 4) +#define M4U_PORT_L4_VDEC_PPWRAP_EXT MTK_M4U_ID(4, 5) +#define M4U_PORT_L4_VDEC_TILE_EXT MTK_M4U_ID(4, 6) +#define M4U_PORT_L4_VDEC_VLD_EXT MTK_M4U_ID(4, 7) +#define M4U_PORT_L4_VDEC_VLD2_EXT MTK_M4U_ID(4, 8) +#define M4U_PORT_L4_VDEC_AVC_MV_EXT MTK_M4U_ID(4, 9) +#define M4U_PORT_L4_VDEC_RG_CTRL_DMA_EXT MTK_M4U_ID(4, 10) + +/* larb5 */ +#define M4U_PORT_L5_VDEC_LAT0_VLD_EXT MTK_M4U_ID(5, 0) +#define M4U_PORT_L5_VDEC_LAT0_VLD2_EXT MTK_M4U_ID(5, 1) +#define M4U_PORT_L5_VDEC_LAT0_AVC_MV_EXT MTK_M4U_ID(5, 2) +#define M4U_PORT_L5_VDEC_LAT0_PRED_RD_EXT MTK_M4U_ID(5, 3) +#define M4U_PORT_L5_VDEC_LAT0_TILE_EXT MTK_M4U_ID(5, 4) +#define M4U_PORT_L5_VDEC_LAT0_WDMA_EXT MTK_M4U_ID(5, 5) +#define M4U_PORT_L5_VDEC_LAT0_RG_CTRL_DMA_EXT MTK_M4U_ID(5, 6) +#define M4U_PORT_L5_VDEC_UFO_ENC_EXT MTK_M4U_ID(5, 7) + +/* larb6: null */ + +/* larb7 */ +#define M4U_PORT_L7_VENC_RCPU MTK_M4U_ID(7, 0) +#define M4U_PORT_L7_VENC_REC MTK_M4U_ID(7, 1) +#define M4U_PORT_L7_VENC_BSDMA MTK_M4U_ID(7, 2) +#define M4U_PORT_L7_VENC_SV_COMV MTK_M4U_ID(7, 3) +#define M4U_PORT_L7_VENC_RD_COMV MTK_M4U_ID(7, 4) +#define M4U_PORT_L7_VENC_CUR_LUMA MTK_M4U_ID(7, 5) +#define M4U_PORT_L7_VENC_CUR_CHROMA MTK_M4U_ID(7, 6) +#define M4U_PORT_L7_VENC_REF_LUMA MTK_M4U_ID(7, 7) +#define M4U_PORT_L7_VENC_REF_CHROMA MTK_M4U_ID(7, 8) +#define M4U_PORT_L7_JPGENC_Y_RDMA MTK_M4U_ID(7, 9) +#define M4U_PORT_L7_JPGENC_Q_RDMA MTK_M4U_ID(7, 10) +#define M4U_PORT_L7_JPGENC_C_TABLE MTK_M4U_ID(7, 11) +#define M4U_PORT_L7_JPGENC_BSDMA MTK_M4U_ID(7, 12) +#define M4U_PORT_L7_VENC_SUB_R_LUMA MTK_M4U_ID(7, 13) +#define M4U_PORT_L7_VENC_SUB_W_LUMA MTK_M4U_ID(7, 14) + +/* larb8: null */ + +/* larb9 */ +#define M4U_PORT_L9_IMG_IMGI_D1 MTK_M4U_ID(9, 0) +#define M4U_PORT_L9_IMG_IMGBI_D1 MTK_M4U_ID(9, 1) +#define M4U_PORT_L9_IMG_DMGI_D1 MTK_M4U_ID(9, 2) +#define M4U_PORT_L9_IMG_DEPI_D1 MTK_M4U_ID(9, 3) +#define M4U_PORT_L9_IMG_ICE_D1 MTK_M4U_ID(9, 4) +#define M4U_PORT_L9_IMG_SMTI_D1 MTK_M4U_ID(9, 5) +#define M4U_PORT_L9_IMG_SMTO_D2 MTK_M4U_ID(9, 6) +#define M4U_PORT_L9_IMG_SMTO_D1 MTK_M4U_ID(9, 7) +#define M4U_PORT_L9_IMG_CRZO_D1 MTK_M4U_ID(9, 8) +#define M4U_PORT_L9_IMG_IMG3O_D1 MTK_M4U_ID(9, 9) +#define M4U_PORT_L9_IMG_VIPI_D1 MTK_M4U_ID(9, 10) +#define M4U_PORT_L9_IMG_SMTI_D5 MTK_M4U_ID(9, 11) +#define M4U_PORT_L9_IMG_TIMGO_D1 MTK_M4U_ID(9, 12) +#define M4U_PORT_L9_IMG_UFBC_W0 MTK_M4U_ID(9, 13) +#define M4U_PORT_L9_IMG_UFBC_R0 MTK_M4U_ID(9, 14) + +/* larb10: null */ + +/* larb11 */ +#define M4U_PORT_L11_IMG_IMGI_D1 MTK_M4U_ID(11, 0) +#define M4U_PORT_L11_IMG_IMGBI_D1 MTK_M4U_ID(11, 1) +#define M4U_PORT_L11_IMG_DMGI_D1 MTK_M4U_ID(11, 2) +#define M4U_PORT_L11_IMG_DEPI_D1 MTK_M4U_ID(11, 3) +#define M4U_PORT_L11_IMG_ICE_D1 MTK_M4U_ID(11, 4) +#define M4U_PORT_L11_IMG_SMTI_D1 MTK_M4U_ID(11, 5) +#define M4U_PORT_L11_IMG_SMTO_D2 MTK_M4U_ID(11, 6) +#define M4U_PORT_L11_IMG_SMTO_D1 MTK_M4U_ID(11, 7) +#define M4U_PORT_L11_IMG_CRZO_D1 MTK_M4U_ID(11, 8) +#define M4U_PORT_L11_IMG_IMG3O_D1 MTK_M4U_ID(11, 9) +#define M4U_PORT_L11_IMG_VIPI_D1 MTK_M4U_ID(11, 10) +#define M4U_PORT_L11_IMG_SMTI_D5 MTK_M4U_ID(11, 11) +#define M4U_PORT_L11_IMG_TIMGO_D1 MTK_M4U_ID(11, 12) +#define M4U_PORT_L11_IMG_UFBC_W0 MTK_M4U_ID(11, 13) +#define M4U_PORT_L11_IMG_UFBC_R0 MTK_M4U_ID(11, 14) +#define M4U_PORT_L11_IMG_WPE_RDMA1 MTK_M4U_ID(11, 15) +#define M4U_PORT_L11_IMG_WPE_RDMA0 MTK_M4U_ID(11, 16) +#define M4U_PORT_L11_IMG_WPE_WDMA MTK_M4U_ID(11, 17) +#define M4U_PORT_L11_IMG_MFB_RDMA0 MTK_M4U_ID(11, 18) +#define M4U_PORT_L11_IMG_MFB_RDMA1 MTK_M4U_ID(11, 19) +#define M4U_PORT_L11_IMG_MFB_RDMA2 MTK_M4U_ID(11, 20) +#define M4U_PORT_L11_IMG_MFB_RDMA3 MTK_M4U_ID(11, 21) +#define M4U_PORT_L11_IMG_MFB_RDMA4 MTK_M4U_ID(11, 22) +#define M4U_PORT_L11_IMG_MFB_RDMA5 MTK_M4U_ID(11, 23) +#define M4U_PORT_L11_IMG_MFB_WDMA0 MTK_M4U_ID(11, 24) +#define M4U_PORT_L11_IMG_MFB_WDMA1 MTK_M4U_ID(11, 25) + +/* larb12: null */ + +/* larb13 */ +#define M4U_PORT_L13_CAM_MRAWI MTK_M4U_ID(13, 0) +#define M4U_PORT_L13_CAM_MRAWO0 MTK_M4U_ID(13, 1) +#define M4U_PORT_L13_CAM_MRAWO1 MTK_M4U_ID(13, 2) +#define M4U_PORT_L13_CAM_CAMSV1 MTK_M4U_ID(13, 3) +#define M4U_PORT_L13_CAM_CAMSV2 MTK_M4U_ID(13, 4) +#define M4U_PORT_L13_CAM_CAMSV3 MTK_M4U_ID(13, 5) +#define M4U_PORT_L13_CAM_CAMSV4 MTK_M4U_ID(13, 6) +#define M4U_PORT_L13_CAM_CAMSV5 MTK_M4U_ID(13, 7) +#define M4U_PORT_L13_CAM_CAMSV6 MTK_M4U_ID(13, 8) +#define M4U_PORT_L13_CAM_CCUI MTK_M4U_ID(13, 9) +#define M4U_PORT_L13_CAM_CCUO MTK_M4U_ID(13, 10) +#define M4U_PORT_L13_CAM_FAKE MTK_M4U_ID(13, 11) + +/* larb14 */ +#define M4U_PORT_L14_CAM_RESERVE1 MTK_M4U_ID(14, 0) +#define M4U_PORT_L14_CAM_RESERVE2 MTK_M4U_ID(14, 1) +#define M4U_PORT_L14_CAM_RESERVE3 MTK_M4U_ID(14, 2) +#define M4U_PORT_L14_CAM_CAMSV0 MTK_M4U_ID(14, 3) +#define M4U_PORT_L14_CAM_CCUI MTK_M4U_ID(14, 4) +#define M4U_PORT_L14_CAM_CCUO MTK_M4U_ID(14, 5) + +/* larb15: null */ + +/* larb16 */ +#define M4U_PORT_L16_CAM_IMGO_R1_A MTK_M4U_ID(16, 0) +#define M4U_PORT_L16_CAM_RRZO_R1_A MTK_M4U_ID(16, 1) +#define M4U_PORT_L16_CAM_CQI_R1_A MTK_M4U_ID(16, 2) +#define M4U_PORT_L16_CAM_BPCI_R1_A MTK_M4U_ID(16, 3) +#define M4U_PORT_L16_CAM_YUVO_R1_A MTK_M4U_ID(16, 4) +#define M4U_PORT_L16_CAM_UFDI_R2_A MTK_M4U_ID(16, 5) +#define M4U_PORT_L16_CAM_RAWI_R2_A MTK_M4U_ID(16, 6) +#define M4U_PORT_L16_CAM_RAWI_R3_A MTK_M4U_ID(16, 7) +#define M4U_PORT_L16_CAM_AAO_R1_A MTK_M4U_ID(16, 8) +#define M4U_PORT_L16_CAM_AFO_R1_A MTK_M4U_ID(16, 9) +#define M4U_PORT_L16_CAM_FLKO_R1_A MTK_M4U_ID(16, 10) +#define M4U_PORT_L16_CAM_LCESO_R1_A MTK_M4U_ID(16, 11) +#define M4U_PORT_L16_CAM_CRZO_R1_A MTK_M4U_ID(16, 12) +#define M4U_PORT_L16_CAM_LTMSO_R1_A MTK_M4U_ID(16, 13) +#define M4U_PORT_L16_CAM_RSSO_R1_A MTK_M4U_ID(16, 14) +#define M4U_PORT_L16_CAM_AAHO_R1_A MTK_M4U_ID(16, 15) +#define M4U_PORT_L16_CAM_LSCI_R1_A MTK_M4U_ID(16, 16) + +/* larb17 */ +#define M4U_PORT_L17_CAM_IMGO_R1_B MTK_M4U_ID(17, 0) +#define M4U_PORT_L17_CAM_RRZO_R1_B MTK_M4U_ID(17, 1) +#define M4U_PORT_L17_CAM_CQI_R1_B MTK_M4U_ID(17, 2) +#define M4U_PORT_L17_CAM_BPCI_R1_B MTK_M4U_ID(17, 3) +#define M4U_PORT_L17_CAM_YUVO_R1_B MTK_M4U_ID(17, 4) +#define M4U_PORT_L17_CAM_UFDI_R2_B MTK_M4U_ID(17, 5) +#define M4U_PORT_L17_CAM_RAWI_R2_B MTK_M4U_ID(17, 6) +#define M4U_PORT_L17_CAM_RAWI_R3_B MTK_M4U_ID(17, 7) +#define M4U_PORT_L17_CAM_AAO_R1_B MTK_M4U_ID(17, 8) +#define M4U_PORT_L17_CAM_AFO_R1_B MTK_M4U_ID(17, 9) +#define M4U_PORT_L17_CAM_FLKO_R1_B MTK_M4U_ID(17, 10) +#define M4U_PORT_L17_CAM_LCESO_R1_B MTK_M4U_ID(17, 11) +#define M4U_PORT_L17_CAM_CRZO_R1_B MTK_M4U_ID(17, 12) +#define M4U_PORT_L17_CAM_LTMSO_R1_B MTK_M4U_ID(17, 13) +#define M4U_PORT_L17_CAM_RSSO_R1_B MTK_M4U_ID(17, 14) +#define M4U_PORT_L17_CAM_AAHO_R1_B MTK_M4U_ID(17, 15) +#define M4U_PORT_L17_CAM_LSCI_R1_B MTK_M4U_ID(17, 16) + +/* larb18 */ +#define M4U_PORT_L18_CAM_IMGO_R1_C MTK_M4U_ID(18, 0) +#define M4U_PORT_L18_CAM_RRZO_R1_C MTK_M4U_ID(18, 1) +#define M4U_PORT_L18_CAM_CQI_R1_C MTK_M4U_ID(18, 2) +#define M4U_PORT_L18_CAM_BPCI_R1_C MTK_M4U_ID(18, 3) +#define M4U_PORT_L18_CAM_YUVO_R1_C MTK_M4U_ID(18, 4) +#define M4U_PORT_L18_CAM_UFDI_R2_C MTK_M4U_ID(18, 5) +#define M4U_PORT_L18_CAM_RAWI_R2_C MTK_M4U_ID(18, 6) +#define M4U_PORT_L18_CAM_RAWI_R3_C MTK_M4U_ID(18, 7) +#define M4U_PORT_L18_CAM_AAO_R1_C MTK_M4U_ID(18, 8) +#define M4U_PORT_L18_CAM_AFO_R1_C MTK_M4U_ID(18, 9) +#define M4U_PORT_L18_CAM_FLKO_R1_C MTK_M4U_ID(18, 10) +#define M4U_PORT_L18_CAM_LCESO_R1_C MTK_M4U_ID(18, 11) +#define M4U_PORT_L18_CAM_CRZO_R1_C MTK_M4U_ID(18, 12) +#define M4U_PORT_L18_CAM_LTMSO_R1_C MTK_M4U_ID(18, 13) +#define M4U_PORT_L18_CAM_RSSO_R1_C MTK_M4U_ID(18, 14) +#define M4U_PORT_L18_CAM_AAHO_R1_C MTK_M4U_ID(18, 15) +#define M4U_PORT_L18_CAM_LSCI_R1_C MTK_M4U_ID(18, 16) + +/* larb19 */ +#define M4U_PORT_L19_IPE_DVS_RDMA MTK_M4U_ID(19, 0) +#define M4U_PORT_L19_IPE_DVS_WDMA MTK_M4U_ID(19, 1) +#define M4U_PORT_L19_IPE_DVP_RDMA MTK_M4U_ID(19, 2) +#define M4U_PORT_L19_IPE_DVP_WDMA MTK_M4U_ID(19, 3) + +/* larb20 */ +#define M4U_PORT_L20_IPE_FDVT_RDA MTK_M4U_ID(20, 0) +#define M4U_PORT_L20_IPE_FDVT_RDB MTK_M4U_ID(20, 1) +#define M4U_PORT_L20_IPE_FDVT_WRA MTK_M4U_ID(20, 2) +#define M4U_PORT_L20_IPE_FDVT_WRB MTK_M4U_ID(20, 3) +#define M4U_PORT_L20_IPE_RSC_RDMA0 MTK_M4U_ID(20, 4) +#define M4U_PORT_L20_IPE_RSC_WDMA MTK_M4U_ID(20, 5) + +#endif diff --git a/include/dt-bindings/memory/mtk-memory-port.h b/include/dt-bindings/memory/mtk-memory-port.h new file mode 100644 index 000000000000..7d64103209af --- /dev/null +++ b/include/dt-bindings/memory/mtk-memory-port.h @@ -0,0 +1,15 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2020 MediaTek Inc. + * Author: Yong Wu + */ +#ifndef __DT_BINDINGS_MEMORY_MTK_MEMORY_PORT_H_ +#define __DT_BINDINGS_MEMORY_MTK_MEMORY_PORT_H_ + +#define MTK_LARB_NR_MAX 32 + +#define MTK_M4U_ID(larb, port) (((larb) << 5) | (port)) +#define MTK_M4U_TO_LARB(id) (((id) >> 5) & 0x1f) +#define MTK_M4U_TO_PORT(id) ((id) & 0x1f) + +#endif diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index f2e0697258b8..c032cfe133c7 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -153,7 +153,7 @@ struct request { */ union { struct hlist_node hash; /* merge hash */ - struct list_head ipi_list; + struct llist_node ipi_list; }; /* diff --git a/include/linux/connector.h b/include/linux/connector.h index 8ea860efea37..487350bb19c3 100644 --- a/include/linux/connector.h +++ b/include/linux/connector.h @@ -99,7 +99,7 @@ void cn_del_callback(const struct cb_id *id); int cn_netlink_send_mult(struct cn_msg *msg, u16 len, u32 portid, u32 group, gfp_t gfp_mask); /** - * cn_netlink_send_mult - Sends message to the specified groups. + * cn_netlink_send - Sends message to the specified groups. * * @msg: message header(with attached data). * @portid: destination port. diff --git a/include/linux/dmar.h b/include/linux/dmar.h index 65565820328a..e04436a7ff27 100644 --- a/include/linux/dmar.h +++ b/include/linux/dmar.h @@ -138,6 +138,7 @@ extern void intel_iommu_shutdown(void); extern int dmar_parse_one_rmrr(struct acpi_dmar_header *header, void *arg); extern int dmar_parse_one_atsr(struct acpi_dmar_header *header, void *arg); extern int dmar_check_one_atsr(struct acpi_dmar_header *hdr, void *arg); +extern int dmar_parse_one_satc(struct acpi_dmar_header *hdr, void *arg); extern int dmar_release_one_atsr(struct acpi_dmar_header *hdr, void *arg); extern int dmar_iommu_hotplug(struct dmar_drhd_unit *dmaru, bool insert); extern int dmar_iommu_notify_scope_dev(struct dmar_pci_notify_info *info); @@ -149,6 +150,7 @@ static inline void intel_iommu_shutdown(void) { } #define dmar_parse_one_atsr dmar_res_noop #define dmar_check_one_atsr dmar_res_noop #define dmar_release_one_atsr dmar_res_noop +#define dmar_parse_one_satc dmar_res_noop static inline int dmar_iommu_notify_scope_dev(struct dmar_pci_notify_info *info) { diff --git a/include/linux/firmware/intel/stratix10-svc-client.h b/include/linux/firmware/intel/stratix10-svc-client.h index a93d85932eb9..ebc295647581 100644 --- a/include/linux/firmware/intel/stratix10-svc-client.h +++ b/include/linux/firmware/intel/stratix10-svc-client.h @@ -6,7 +6,7 @@ #ifndef __STRATIX10_SVC_CLIENT_H #define __STRATIX10_SVC_CLIENT_H -/** +/* * Service layer driver supports client names * * fpga: for FPGA configuration @@ -15,7 +15,7 @@ #define SVC_CLIENT_FPGA "fpga" #define SVC_CLIENT_RSU "rsu" -/** +/* * Status of the sent command, in bit number * * SVC_STATUS_OK: @@ -50,7 +50,7 @@ #define SVC_STATUS_ERROR 5 #define SVC_STATUS_NO_SUPPORT 6 -/** +/* * Flag bit for COMMAND_RECONFIG * * COMMAND_RECONFIG_FLAG_PARTIAL: @@ -58,7 +58,7 @@ */ #define COMMAND_RECONFIG_FLAG_PARTIAL 1 -/** +/* * Timeout settings for service clients: * timeout value used in Stratix10 FPGA manager driver. * timeout value used in RSU driver @@ -218,7 +218,7 @@ void stratix10_svc_free_memory(struct stratix10_svc_chan *chan, void *kaddr); int stratix10_svc_send(struct stratix10_svc_chan *chan, void *msg); /** - * intel_svc_done() - complete service request + * stratix10_svc_done() - complete service request * @chan: service channel assigned to the client * * This function is used by service client to inform service layer that diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h index 09c6a0bf3892..1bc46b88711a 100644 --- a/include/linux/intel-iommu.h +++ b/include/linux/intel-iommu.h @@ -42,6 +42,8 @@ #define DMA_FL_PTE_PRESENT BIT_ULL(0) #define DMA_FL_PTE_US BIT_ULL(2) +#define DMA_FL_PTE_ACCESS BIT_ULL(5) +#define DMA_FL_PTE_DIRTY BIT_ULL(6) #define DMA_FL_PTE_XD BIT_ULL(63) #define ADDR_WIDTH_5LEVEL (57) @@ -168,34 +170,37 @@ * Extended Capability Register */ +#define ecap_rps(e) (((e) >> 49) & 0x1) #define ecap_smpwc(e) (((e) >> 48) & 0x1) #define ecap_flts(e) (((e) >> 47) & 0x1) #define ecap_slts(e) (((e) >> 46) & 0x1) +#define ecap_slads(e) (((e) >> 45) & 0x1) #define ecap_vcs(e) (((e) >> 44) & 0x1) #define ecap_smts(e) (((e) >> 43) & 0x1) -#define ecap_dit(e) ((e >> 41) & 0x1) -#define ecap_pasid(e) ((e >> 40) & 0x1) -#define ecap_pss(e) ((e >> 35) & 0x1f) -#define ecap_eafs(e) ((e >> 34) & 0x1) -#define ecap_nwfs(e) ((e >> 33) & 0x1) -#define ecap_srs(e) ((e >> 31) & 0x1) -#define ecap_ers(e) ((e >> 30) & 0x1) -#define ecap_prs(e) ((e >> 29) & 0x1) -#define ecap_broken_pasid(e) ((e >> 28) & 0x1) -#define ecap_dis(e) ((e >> 27) & 0x1) -#define ecap_nest(e) ((e >> 26) & 0x1) -#define ecap_mts(e) ((e >> 25) & 0x1) -#define ecap_ecs(e) ((e >> 24) & 0x1) +#define ecap_dit(e) (((e) >> 41) & 0x1) +#define ecap_pds(e) (((e) >> 42) & 0x1) +#define ecap_pasid(e) (((e) >> 40) & 0x1) +#define ecap_pss(e) (((e) >> 35) & 0x1f) +#define ecap_eafs(e) (((e) >> 34) & 0x1) +#define ecap_nwfs(e) (((e) >> 33) & 0x1) +#define ecap_srs(e) (((e) >> 31) & 0x1) +#define ecap_ers(e) (((e) >> 30) & 0x1) +#define ecap_prs(e) (((e) >> 29) & 0x1) +#define ecap_broken_pasid(e) (((e) >> 28) & 0x1) +#define ecap_dis(e) (((e) >> 27) & 0x1) +#define ecap_nest(e) (((e) >> 26) & 0x1) +#define ecap_mts(e) (((e) >> 25) & 0x1) +#define ecap_ecs(e) (((e) >> 24) & 0x1) #define ecap_iotlb_offset(e) ((((e) >> 8) & 0x3ff) * 16) #define ecap_max_iotlb_offset(e) (ecap_iotlb_offset(e) + 16) #define ecap_coherent(e) ((e) & 0x1) #define ecap_qis(e) ((e) & 0x2) -#define ecap_pass_through(e) ((e >> 6) & 0x1) -#define ecap_eim_support(e) ((e >> 4) & 0x1) -#define ecap_ir_support(e) ((e >> 3) & 0x1) +#define ecap_pass_through(e) (((e) >> 6) & 0x1) +#define ecap_eim_support(e) (((e) >> 4) & 0x1) +#define ecap_ir_support(e) (((e) >> 3) & 0x1) #define ecap_dev_iotlb_support(e) (((e) >> 2) & 0x1) -#define ecap_max_handle_mask(e) ((e >> 20) & 0xf) -#define ecap_sc_support(e) ((e >> 7) & 0x1) /* Snooping Control */ +#define ecap_max_handle_mask(e) (((e) >> 20) & 0xf) +#define ecap_sc_support(e) (((e) >> 7) & 0x1) /* Snooping Control */ /* Virtual command interface capability */ #define vccap_pasid(v) (((v) & DMA_VCS_PAS)) /* PASID allocation */ @@ -662,7 +667,7 @@ static inline struct dmar_domain *to_dmar_domain(struct iommu_domain *dom) * 7: super page * 8-10: available * 11: snoop behavior - * 12-63: Host physcial address + * 12-63: Host physical address */ struct dma_pte { u64 val; diff --git a/include/linux/io-pgtable.h b/include/linux/io-pgtable.h index ea727eb1a1a9..a4c9ca2c31f1 100644 --- a/include/linux/io-pgtable.h +++ b/include/linux/io-pgtable.h @@ -15,6 +15,7 @@ enum io_pgtable_fmt { ARM_64_LPAE_S2, ARM_V7S, ARM_MALI_LPAE, + AMD_IOMMU_V1, IO_PGTABLE_NUM_FMTS, }; @@ -68,13 +69,9 @@ struct io_pgtable_cfg { * hardware which does not implement the permissions of a given * format, and/or requires some format-specific default value. * - * IO_PGTABLE_QUIRK_TLBI_ON_MAP: If the format forbids caching invalid - * (unmapped) entries but the hardware might do so anyway, perform - * TLB maintenance when mapping as well as when unmapping. - * * IO_PGTABLE_QUIRK_ARM_MTK_EXT: (ARM v7s format) MediaTek IOMMUs extend - * to support up to 34 bits PA where the bit32 and bit33 are - * encoded in the bit9 and bit4 of the PTE respectively. + * to support up to 35 bits PA where the bit32, bit33 and bit34 are + * encoded in the bit9, bit4 and bit5 of the PTE respectively. * * IO_PGTABLE_QUIRK_NON_STRICT: Skip issuing synchronous leaf TLBIs * on unmap, for DMA domains using the flush queue mechanism for @@ -88,7 +85,6 @@ struct io_pgtable_cfg { */ #define IO_PGTABLE_QUIRK_ARM_NS BIT(0) #define IO_PGTABLE_QUIRK_NO_PERMS BIT(1) - #define IO_PGTABLE_QUIRK_TLBI_ON_MAP BIT(2) #define IO_PGTABLE_QUIRK_ARM_MTK_EXT BIT(3) #define IO_PGTABLE_QUIRK_NON_STRICT BIT(4) #define IO_PGTABLE_QUIRK_ARM_TTBR1 BIT(5) @@ -214,14 +210,16 @@ struct io_pgtable_domain_attr { static inline void io_pgtable_tlb_flush_all(struct io_pgtable *iop) { - iop->cfg.tlb->tlb_flush_all(iop->cookie); + if (iop->cfg.tlb && iop->cfg.tlb->tlb_flush_all) + iop->cfg.tlb->tlb_flush_all(iop->cookie); } static inline void io_pgtable_tlb_flush_walk(struct io_pgtable *iop, unsigned long iova, size_t size, size_t granule) { - iop->cfg.tlb->tlb_flush_walk(iova, size, granule, iop->cookie); + if (iop->cfg.tlb && iop->cfg.tlb->tlb_flush_walk) + iop->cfg.tlb->tlb_flush_walk(iova, size, granule, iop->cookie); } static inline void @@ -229,7 +227,7 @@ io_pgtable_tlb_add_page(struct io_pgtable *iop, struct iommu_iotlb_gather * gather, unsigned long iova, size_t granule) { - if (iop->cfg.tlb->tlb_add_page) + if (iop->cfg.tlb && iop->cfg.tlb->tlb_add_page) iop->cfg.tlb->tlb_add_page(gather, iova, granule, iop->cookie); } @@ -251,5 +249,6 @@ extern struct io_pgtable_init_fns io_pgtable_arm_64_lpae_s1_init_fns; extern struct io_pgtable_init_fns io_pgtable_arm_64_lpae_s2_init_fns; extern struct io_pgtable_init_fns io_pgtable_arm_v7s_init_fns; extern struct io_pgtable_init_fns io_pgtable_arm_mali_lpae_init_fns; +extern struct io_pgtable_init_fns io_pgtable_amd_iommu_v1_init_fns; #endif /* __IO_PGTABLE_H */ diff --git a/include/linux/iommu.h b/include/linux/iommu.h index efa96263b81b..5e7fe519430a 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -170,7 +170,7 @@ enum iommu_dev_features { * struct iommu_iotlb_gather - Range information for a pending IOTLB flush * * @start: IOVA representing the start of the range to be flushed - * @end: IOVA representing the end of the range to be flushed (exclusive) + * @end: IOVA representing the end of the range to be flushed (inclusive) * @pgsize: The interval at which to perform the flush * * This structure is intended to be updated by multiple calls to the @@ -246,7 +246,8 @@ struct iommu_ops { size_t (*unmap)(struct iommu_domain *domain, unsigned long iova, size_t size, struct iommu_iotlb_gather *iotlb_gather); void (*flush_iotlb_all)(struct iommu_domain *domain); - void (*iotlb_sync_map)(struct iommu_domain *domain); + void (*iotlb_sync_map)(struct iommu_domain *domain, unsigned long iova, + size_t size); void (*iotlb_sync)(struct iommu_domain *domain, struct iommu_iotlb_gather *iotlb_gather); phys_addr_t (*iova_to_phys)(struct iommu_domain *domain, dma_addr_t iova); @@ -376,6 +377,7 @@ int iommu_device_sysfs_add(struct iommu_device *iommu, void iommu_device_sysfs_remove(struct iommu_device *iommu); int iommu_device_link(struct iommu_device *iommu, struct device *link); void iommu_device_unlink(struct iommu_device *iommu, struct device *link); +int iommu_deferred_attach(struct device *dev, struct iommu_domain *domain); static inline void __iommu_device_set_ops(struct iommu_device *iommu, const struct iommu_ops *ops) @@ -514,7 +516,6 @@ extern int iommu_domain_set_attr(struct iommu_domain *domain, enum iommu_attr, extern int iommu_domain_window_enable(struct iommu_domain *domain, u32 wnd_nr, phys_addr_t offset, u64 size, int prot); -extern void iommu_domain_window_disable(struct iommu_domain *domain, u32 wnd_nr); extern int report_iommu_fault(struct iommu_domain *domain, struct device *dev, unsigned long iova, int flags); @@ -538,7 +539,7 @@ static inline void iommu_iotlb_gather_add_page(struct iommu_domain *domain, struct iommu_iotlb_gather *gather, unsigned long iova, size_t size) { - unsigned long start = iova, end = start + size; + unsigned long start = iova, end = start + size - 1; /* * If the new page is disjoint from the current range or is mapped at @@ -630,7 +631,6 @@ static inline void dev_iommu_priv_set(struct device *dev, void *priv) int iommu_probe_device(struct device *dev); void iommu_release_device(struct device *dev); -bool iommu_dev_has_feature(struct device *dev, enum iommu_dev_features f); int iommu_dev_enable_feature(struct device *dev, enum iommu_dev_features f); int iommu_dev_disable_feature(struct device *dev, enum iommu_dev_features f); bool iommu_dev_feature_enabled(struct device *dev, enum iommu_dev_features f); @@ -749,11 +749,6 @@ static inline int iommu_domain_window_enable(struct iommu_domain *domain, return -ENODEV; } -static inline void iommu_domain_window_disable(struct iommu_domain *domain, - u32 wnd_nr) -{ -} - static inline phys_addr_t iommu_iova_to_phys(struct iommu_domain *domain, dma_addr_t iova) { return 0; @@ -984,12 +979,6 @@ const struct iommu_ops *iommu_ops_from_fwnode(struct fwnode_handle *fwnode) return NULL; } -static inline bool -iommu_dev_has_feature(struct device *dev, enum iommu_dev_features feat) -{ - return false; -} - static inline bool iommu_dev_feature_enabled(struct device *dev, enum iommu_dev_features feat) { diff --git a/include/linux/iova.h b/include/linux/iova.h index 76e16ae20729..c834c01c0a5b 100644 --- a/include/linux/iova.h +++ b/include/linux/iova.h @@ -150,10 +150,8 @@ unsigned long alloc_iova_fast(struct iova_domain *iovad, unsigned long size, unsigned long limit_pfn, bool flush_rcache); struct iova *reserve_iova(struct iova_domain *iovad, unsigned long pfn_lo, unsigned long pfn_hi); -void copy_reserved_iova(struct iova_domain *from, struct iova_domain *to); void init_iova_domain(struct iova_domain *iovad, unsigned long granule, unsigned long start_pfn); -bool has_iova_flush_queue(struct iova_domain *iovad); int init_iova_flush_queue(struct iova_domain *iovad, iova_flush_cb flush_cb, iova_entry_dtor entry_dtor); struct iova *find_iova(struct iova_domain *iovad, unsigned long pfn); @@ -212,22 +210,12 @@ static inline struct iova *reserve_iova(struct iova_domain *iovad, return NULL; } -static inline void copy_reserved_iova(struct iova_domain *from, - struct iova_domain *to) -{ -} - static inline void init_iova_domain(struct iova_domain *iovad, unsigned long granule, unsigned long start_pfn) { } -static inline bool has_iova_flush_queue(struct iova_domain *iovad) -{ - return false; -} - static inline int init_iova_flush_queue(struct iova_domain *iovad, iova_flush_cb flush_cb, iova_entry_dtor entry_dtor) diff --git a/include/linux/kgdb.h b/include/linux/kgdb.h index 0d6cf64c8bb1..0444b44bd156 100644 --- a/include/linux/kgdb.h +++ b/include/linux/kgdb.h @@ -325,7 +325,6 @@ extern char *kgdb_mem2hex(char *mem, char *buf, int count); extern int kgdb_hex2mem(char *buf, char *mem, int count); extern int kgdb_isremovedbreak(unsigned long addr); -extern void kgdb_schedule_breakpoint(void); extern int kgdb_has_hit_break(unsigned long addr); extern int diff --git a/include/linux/memblock.h b/include/linux/memblock.h index b93c44b9121e..c88bc24e31aa 100644 --- a/include/linux/memblock.h +++ b/include/linux/memblock.h @@ -117,7 +117,7 @@ int memblock_mark_mirror(phys_addr_t base, phys_addr_t size); int memblock_mark_nomap(phys_addr_t base, phys_addr_t size); int memblock_clear_nomap(phys_addr_t base, phys_addr_t size); -unsigned long memblock_free_all(void); +void memblock_free_all(void); void reset_node_managed_pages(pg_data_t *pgdat); void reset_all_zones_managed_pages(void); @@ -272,7 +272,7 @@ void __next_mem_pfn_range_in_zone(u64 *idx, struct zone *zone, unsigned long *out_spfn, unsigned long *out_epfn); /** - * for_each_free_mem_range_in_zone - iterate through zone specific free + * for_each_free_mem_pfn_range_in_zone - iterate through zone specific free * memblock areas * @i: u64 used as loop variable * @zone: zone in which all of the memory blocks reside @@ -292,7 +292,7 @@ void __next_mem_pfn_range_in_zone(u64 *idx, struct zone *zone, __next_mem_pfn_range_in_zone(&i, zone, p_start, p_end)) /** - * for_each_free_mem_range_in_zone_from - iterate through zone specific + * for_each_free_mem_pfn_range_in_zone_from - iterate through zone specific * free memblock areas from a given point * @i: u64 used as loop variable * @zone: zone in which all of the memory blocks reside diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 6ea8d67e3cb8..53b89631a1d9 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -307,13 +307,6 @@ struct mlx5_cmd { struct mlx5_cmd_stats *stats; }; -struct mlx5_port_caps { - int gid_table_len; - int pkey_table_len; - u8 ext_port_cap; - bool has_smi; -}; - struct mlx5_cmd_mailbox { void *buf; dma_addr_t dma; @@ -375,6 +368,8 @@ struct mlx5_core_mkey { u32 key; u32 pd; u32 type; + struct wait_queue_head wait; + refcount_t usecount; }; #define MLX5_24BIT_MASK ((1 << 24) - 1) @@ -713,7 +708,6 @@ struct mlx5_core_dev { u8 rev_id; char board_id[MLX5_BOARD_ID_LEN]; struct mlx5_cmd cmd; - struct mlx5_port_caps port_caps[MLX5_MAX_PORTS]; struct { u32 hca_cur[MLX5_CAP_NUM][MLX5_UN_SZ_DW(hca_cap_union)]; u32 hca_max[MLX5_CAP_NUM][MLX5_UN_SZ_DW(hca_cap_union)]; diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 6f0b866fb495..df5d91c8b2d4 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -1661,7 +1661,8 @@ struct mlx5_ifc_cmd_hca_cap_bits { u8 sf_set_partition[0x1]; u8 reserved_at_682[0x1]; u8 log_max_sf[0x5]; - u8 reserved_at_688[0x8]; + u8 apu[0x1]; + u8 reserved_at_689[0x7]; u8 log_min_sf_size[0x8]; u8 max_num_sf_partitions[0x8]; @@ -3868,7 +3869,7 @@ struct mlx5_ifc_cqc_bits { u8 status[0x4]; u8 reserved_at_4[0x2]; u8 dbr_umem_valid[0x1]; - u8 reserved_at_7[0x1]; + u8 apu_thread_cq[0x1]; u8 cqe_sz[0x3]; u8 cc[0x1]; u8 reserved_at_c[0x1]; diff --git a/include/linux/parport.h b/include/linux/parport.h index 1fb508c19e83..f981f794c850 100644 --- a/include/linux/parport.h +++ b/include/linux/parport.h @@ -297,6 +297,37 @@ int __must_check __parport_register_driver(struct parport_driver *, * parport_register_driver must be a macro so that KBUILD_MODNAME can * be expanded */ + +/** + * parport_register_driver - register a parallel port device driver + * @driver: structure describing the driver + * + * This can be called by a parallel port device driver in order + * to receive notifications about ports being found in the + * system, as well as ports no longer available. + * + * If devmodel is true then the new device model is used + * for registration. + * + * The @driver structure is allocated by the caller and must not be + * deallocated until after calling parport_unregister_driver(). + * + * If using the non device model: + * The driver's attach() function may block. The port that + * attach() is given will be valid for the duration of the + * callback, but if the driver wants to take a copy of the + * pointer it must call parport_get_port() to do so. Calling + * parport_register_device() on that port will do this for you. + * + * The driver's detach() function may block. The port that + * detach() is given will be valid for the duration of the + * callback, but if the driver wants to take a copy of the + * pointer it must call parport_get_port() to do so. + * + * + * Returns 0 on success. The non device model will always succeeds. + * but the new device model can fail and will return the error code. + **/ #define parport_register_driver(driver) \ __parport_register_driver(driver, THIS_MODULE, KBUILD_MODNAME) diff --git a/include/linux/w1.h b/include/linux/w1.h index 949d3b10e531..9a2a0ef39018 100644 --- a/include/linux/w1.h +++ b/include/linux/w1.h @@ -280,7 +280,7 @@ int w1_register_family(struct w1_family *family); void w1_unregister_family(struct w1_family *family); /** - * module_w1_driver() - Helper macro for registering a 1-Wire families + * module_w1_family() - Helper macro for registering a 1-Wire families * @__w1_family: w1_family struct * * Helper macro for 1-Wire families which do not do anything special in module diff --git a/include/rdma/ib_sa.h b/include/rdma/ib_sa.h index 693285e76f13..4c52c2fd22a1 100644 --- a/include/rdma/ib_sa.h +++ b/include/rdma/ib_sa.h @@ -547,10 +547,6 @@ int ib_sa_guid_info_rec_query(struct ib_sa_client *client, void *context), void *context, struct ib_sa_query **sa_query); -bool ib_sa_sendonly_fullmem_support(struct ib_sa_client *client, - struct ib_device *device, - u8 port_num); - static inline bool sa_path_is_roce(struct sa_path_rec *rec) { return ((rec->rec_type == SA_PATH_REC_TYPE_ROCE_V1) || diff --git a/include/rdma/ib_umem.h b/include/rdma/ib_umem.h index 7752211c9638..676c57f5ca80 100644 --- a/include/rdma/ib_umem.h +++ b/include/rdma/ib_umem.h @@ -1,6 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ /* * Copyright (c) 2007 Cisco Systems. All rights reserved. + * Copyright (c) 2020 Intel Corporation. All rights reserved. */ #ifndef IB_UMEM_H @@ -13,6 +14,7 @@ struct ib_ucontext; struct ib_umem_odp; +struct dma_buf_attach_ops; struct ib_umem { struct ib_device *ibdev; @@ -22,12 +24,29 @@ struct ib_umem { unsigned long address; u32 writable : 1; u32 is_odp : 1; + u32 is_dmabuf : 1; struct work_struct work; struct sg_table sg_head; int nmap; unsigned int sg_nents; }; +struct ib_umem_dmabuf { + struct ib_umem umem; + struct dma_buf_attachment *attach; + struct sg_table *sgt; + struct scatterlist *first_sg; + struct scatterlist *last_sg; + unsigned long first_sg_offset; + unsigned long last_sg_trim; + void *private; +}; + +static inline struct ib_umem_dmabuf *to_ib_umem_dmabuf(struct ib_umem *umem) +{ + return container_of(umem, struct ib_umem_dmabuf, umem); +} + /* Returns the offset of the umem start relative to the first page. */ static inline int ib_umem_offset(struct ib_umem *umem) { @@ -86,6 +105,7 @@ int ib_umem_copy_from(void *dst, struct ib_umem *umem, size_t offset, unsigned long ib_umem_find_best_pgsz(struct ib_umem *umem, unsigned long pgsz_bitmap, unsigned long virt); + /** * ib_umem_find_best_pgoff - Find best HW page size * @@ -116,6 +136,14 @@ static inline unsigned long ib_umem_find_best_pgoff(struct ib_umem *umem, dma_addr & pgoff_bitmask); } +struct ib_umem_dmabuf *ib_umem_dmabuf_get(struct ib_device *device, + unsigned long offset, size_t size, + int fd, int access, + const struct dma_buf_attach_ops *ops); +int ib_umem_dmabuf_map_pages(struct ib_umem_dmabuf *umem_dmabuf); +void ib_umem_dmabuf_unmap_pages(struct ib_umem_dmabuf *umem_dmabuf); +void ib_umem_dmabuf_release(struct ib_umem_dmabuf *umem_dmabuf); + #else /* CONFIG_INFINIBAND_USER_MEM */ #include @@ -124,12 +152,12 @@ static inline struct ib_umem *ib_umem_get(struct ib_device *device, unsigned long addr, size_t size, int access) { - return ERR_PTR(-EINVAL); + return ERR_PTR(-EOPNOTSUPP); } static inline void ib_umem_release(struct ib_umem *umem) { } static inline int ib_umem_copy_from(void *dst, struct ib_umem *umem, size_t offset, size_t length) { - return -EINVAL; + return -EOPNOTSUPP; } static inline unsigned long ib_umem_find_best_pgsz(struct ib_umem *umem, unsigned long pgsz_bitmap, @@ -143,7 +171,21 @@ static inline unsigned long ib_umem_find_best_pgoff(struct ib_umem *umem, { return 0; } +static inline +struct ib_umem_dmabuf *ib_umem_dmabuf_get(struct ib_device *device, + unsigned long offset, + size_t size, int fd, + int access, + struct dma_buf_attach_ops *ops) +{ + return ERR_PTR(-EOPNOTSUPP); +} +static inline int ib_umem_dmabuf_map_pages(struct ib_umem_dmabuf *umem_dmabuf) +{ + return -EOPNOTSUPP; +} +static inline void ib_umem_dmabuf_unmap_pages(struct ib_umem_dmabuf *umem_dmabuf) { } +static inline void ib_umem_dmabuf_release(struct ib_umem_dmabuf *umem_dmabuf) { } #endif /* CONFIG_INFINIBAND_USER_MEM */ - #endif /* IB_UMEM_H */ diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 9fed65bf9279..ca28fca5736b 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -2,7 +2,7 @@ /* * Copyright (c) 2004 Mellanox Technologies Ltd. All rights reserved. * Copyright (c) 2004 Infinicon Corporation. All rights reserved. - * Copyright (c) 2004 Intel Corporation. All rights reserved. + * Copyright (c) 2004, 2020 Intel Corporation. All rights reserved. * Copyright (c) 2004 Topspin Corporation. All rights reserved. * Copyright (c) 2004 Voltaire Corporation. All rights reserved. * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved. @@ -2434,6 +2434,10 @@ struct ib_device_ops { struct ib_mr *(*reg_user_mr)(struct ib_pd *pd, u64 start, u64 length, u64 virt_addr, int mr_access_flags, struct ib_udata *udata); + struct ib_mr *(*reg_user_mr_dmabuf)(struct ib_pd *pd, u64 offset, + u64 length, u64 virt_addr, int fd, + int mr_access_flags, + struct ib_udata *udata); struct ib_mr *(*rereg_user_mr)(struct ib_mr *mr, int flags, u64 start, u64 length, u64 virt_addr, int mr_access_flags, struct ib_pd *pd, @@ -4670,4 +4674,7 @@ static inline u32 rdma_calc_flow_label(u32 lqpn, u32 rqpn) return (u32)(v & IB_GRH_FLOWLABEL_MASK); } + +const struct ib_port_immutable* +ib_port_immutable_read(struct ib_device *dev, unsigned int port); #endif /* IB_VERBS_H */ diff --git a/include/rdma/rdma_counter.h b/include/rdma/rdma_counter.h index eb99856e8b30..e75cf9742e04 100644 --- a/include/rdma/rdma_counter.h +++ b/include/rdma/rdma_counter.h @@ -46,7 +46,8 @@ struct rdma_counter { void rdma_counter_init(struct ib_device *dev); void rdma_counter_release(struct ib_device *dev); int rdma_counter_set_auto_mode(struct ib_device *dev, u8 port, - bool on, enum rdma_nl_counter_mask mask); + enum rdma_nl_counter_mask mask, + struct netlink_ext_ack *extack); int rdma_counter_bind_qp_auto(struct ib_qp *qp, u8 port); int rdma_counter_unbind_qp(struct ib_qp *qp, bool force); diff --git a/include/soc/mediatek/smi.h b/include/soc/mediatek/smi.h index 29e2fb8f33d6..15e3397cec58 100644 --- a/include/soc/mediatek/smi.h +++ b/include/soc/mediatek/smi.h @@ -11,13 +11,12 @@ #if IS_ENABLED(CONFIG_MTK_SMI) -#define MTK_LARB_NR_MAX 16 - #define MTK_SMI_MMU_EN(port) BIT(port) struct mtk_smi_larb_iommu { struct device *dev; unsigned int mmu; + unsigned char bank[32]; }; /* diff --git a/include/trace/events/intel_iommu.h b/include/trace/events/intel_iommu.h index 112bd06487bf..e801f4910522 100644 --- a/include/trace/events/intel_iommu.h +++ b/include/trace/events/intel_iommu.h @@ -6,7 +6,6 @@ * * Author: Lu Baolu */ -#ifdef CONFIG_INTEL_IOMMU #undef TRACE_SYSTEM #define TRACE_SYSTEM intel_iommu @@ -135,8 +134,44 @@ DEFINE_EVENT(dma_map_sg, bounce_map_sg, struct scatterlist *sg), TP_ARGS(dev, index, total, sg) ); + +TRACE_EVENT(qi_submit, + TP_PROTO(struct intel_iommu *iommu, u64 qw0, u64 qw1, u64 qw2, u64 qw3), + + TP_ARGS(iommu, qw0, qw1, qw2, qw3), + + TP_STRUCT__entry( + __field(u64, qw0) + __field(u64, qw1) + __field(u64, qw2) + __field(u64, qw3) + __string(iommu, iommu->name) + ), + + TP_fast_assign( + __assign_str(iommu, iommu->name); + __entry->qw0 = qw0; + __entry->qw1 = qw1; + __entry->qw2 = qw2; + __entry->qw3 = qw3; + ), + + TP_printk("%s %s: 0x%llx 0x%llx 0x%llx 0x%llx", + __print_symbolic(__entry->qw0 & 0xf, + { QI_CC_TYPE, "cc_inv" }, + { QI_IOTLB_TYPE, "iotlb_inv" }, + { QI_DIOTLB_TYPE, "dev_tlb_inv" }, + { QI_IEC_TYPE, "iec_inv" }, + { QI_IWD_TYPE, "inv_wait" }, + { QI_EIOTLB_TYPE, "p_iotlb_inv" }, + { QI_PC_TYPE, "pc_inv" }, + { QI_DEIOTLB_TYPE, "p_dev_tlb_inv" }, + { QI_PGRP_RESP_TYPE, "page_grp_resp" }), + __get_str(iommu), + __entry->qw0, __entry->qw1, __entry->qw2, __entry->qw3 + ) +); #endif /* _TRACE_INTEL_IOMMU_H */ /* This part must be outside protection */ #include -#endif /* CONFIG_INTEL_IOMMU */ diff --git a/include/uapi/rdma/ib_user_ioctl_cmds.h b/include/uapi/rdma/ib_user_ioctl_cmds.h index 7968a1845355..dafc7ebe545b 100644 --- a/include/uapi/rdma/ib_user_ioctl_cmds.h +++ b/include/uapi/rdma/ib_user_ioctl_cmds.h @@ -1,5 +1,6 @@ /* * Copyright (c) 2018, Mellanox Technologies inc. All rights reserved. + * Copyright (c) 2020, Intel Corporation. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -251,6 +252,7 @@ enum uverbs_methods_mr { UVERBS_METHOD_MR_DESTROY, UVERBS_METHOD_ADVISE_MR, UVERBS_METHOD_QUERY_MR, + UVERBS_METHOD_REG_DMABUF_MR, }; enum uverbs_attrs_mr_destroy_ids { @@ -272,6 +274,18 @@ enum uverbs_attrs_query_mr_cmd_attr_ids { UVERBS_ATTR_QUERY_MR_RESP_IOVA, }; +enum uverbs_attrs_reg_dmabuf_mr_cmd_attr_ids { + UVERBS_ATTR_REG_DMABUF_MR_HANDLE, + UVERBS_ATTR_REG_DMABUF_MR_PD_HANDLE, + UVERBS_ATTR_REG_DMABUF_MR_OFFSET, + UVERBS_ATTR_REG_DMABUF_MR_LENGTH, + UVERBS_ATTR_REG_DMABUF_MR_IOVA, + UVERBS_ATTR_REG_DMABUF_MR_FD, + UVERBS_ATTR_REG_DMABUF_MR_ACCESS_FLAGS, + UVERBS_ATTR_REG_DMABUF_MR_RESP_LKEY, + UVERBS_ATTR_REG_DMABUF_MR_RESP_RKEY, +}; + enum uverbs_attrs_create_counters_cmd_attr_ids { UVERBS_ATTR_CREATE_COUNTERS_HANDLE, }; diff --git a/kernel/debug/debug_core.c b/kernel/debug/debug_core.c index af6e8b4fb359..b636d517c02c 100644 --- a/kernel/debug/debug_core.c +++ b/kernel/debug/debug_core.c @@ -119,7 +119,6 @@ static DEFINE_RAW_SPINLOCK(dbg_slave_lock); */ static atomic_t masters_in_kgdb; static atomic_t slaves_in_kgdb; -static atomic_t kgdb_break_tasklet_var; atomic_t kgdb_setting_breakpoint; struct task_struct *kgdb_usethread; @@ -1084,31 +1083,6 @@ static void kgdb_unregister_callbacks(void) } } -/* - * There are times a tasklet needs to be used vs a compiled in - * break point so as to cause an exception outside a kgdb I/O module, - * such as is the case with kgdboe, where calling a breakpoint in the - * I/O driver itself would be fatal. - */ -static void kgdb_tasklet_bpt(unsigned long ing) -{ - kgdb_breakpoint(); - atomic_set(&kgdb_break_tasklet_var, 0); -} - -static DECLARE_TASKLET_OLD(kgdb_tasklet_breakpoint, kgdb_tasklet_bpt); - -void kgdb_schedule_breakpoint(void) -{ - if (atomic_read(&kgdb_break_tasklet_var) || - atomic_read(&kgdb_active) != -1 || - atomic_read(&kgdb_setting_breakpoint)) - return; - atomic_inc(&kgdb_break_tasklet_var); - tasklet_schedule(&kgdb_tasklet_breakpoint); -} -EXPORT_SYMBOL_GPL(kgdb_schedule_breakpoint); - /** * kgdb_register_io_module - register KGDB IO module * @new_dbg_io_ops: the io ops vector @@ -1166,7 +1140,7 @@ int kgdb_register_io_module(struct kgdb_io *new_dbg_io_ops) EXPORT_SYMBOL_GPL(kgdb_register_io_module); /** - * kkgdb_unregister_io_module - unregister KGDB IO module + * kgdb_unregister_io_module - unregister KGDB IO module * @old_dbg_io_ops: the io ops vector * * Unregister it with the KGDB core. diff --git a/kernel/debug/gdbstub.c b/kernel/debug/gdbstub.c index a77df59d9ca5..e149a0ac9e9e 100644 --- a/kernel/debug/gdbstub.c +++ b/kernel/debug/gdbstub.c @@ -595,7 +595,7 @@ static char *gdb_hex_reg_helper(int regnum, char *out) dbg_reg_def[i].size); } -/* Handle the 'p' individual regster get */ +/* Handle the 'p' individual register get */ static void gdb_cmd_reg_get(struct kgdb_state *ks) { unsigned long regnum; @@ -610,7 +610,7 @@ static void gdb_cmd_reg_get(struct kgdb_state *ks) gdb_hex_reg_helper(regnum, remcom_out_buffer); } -/* Handle the 'P' individual regster set */ +/* Handle the 'P' individual register set */ static void gdb_cmd_reg_set(struct kgdb_state *ks) { unsigned long regnum; diff --git a/kernel/debug/kdb/kdb_private.h b/kernel/debug/kdb/kdb_private.h index a4281fb99299..6cb92f7bbbd0 100644 --- a/kernel/debug/kdb/kdb_private.h +++ b/kernel/debug/kdb/kdb_private.h @@ -230,7 +230,7 @@ extern struct task_struct *kdb_curr_task(int); #define kdb_task_has_cpu(p) (task_curr(p)) -#define GFP_KDB (in_interrupt() ? GFP_ATOMIC : GFP_KERNEL) +#define GFP_KDB (in_dbg_master() ? GFP_ATOMIC : GFP_KERNEL) extern void *debug_kmalloc(size_t size, gfp_t flags); extern void debug_kfree(void *); @@ -254,4 +254,14 @@ extern char kdb_prompt_str[]; #define KDB_WORD_SIZE ((int)sizeof(unsigned long)) #endif /* CONFIG_KGDB_KDB */ + +#define kdb_func_printf(format, args...) \ + kdb_printf("%s: " format, __func__, ## args) + +#define kdb_dbg_printf(mask, format, args...) \ + do { \ + if (KDB_DEBUG(mask)) \ + kdb_func_printf(format, ## args); \ + } while (0) + #endif /* !_KDBPRIVATE_H */ diff --git a/kernel/debug/kdb/kdb_support.c b/kernel/debug/kdb/kdb_support.c index 6226502ce049..f7c1885abeb6 100644 --- a/kernel/debug/kdb/kdb_support.c +++ b/kernel/debug/kdb/kdb_support.c @@ -39,20 +39,15 @@ */ int kdbgetsymval(const char *symname, kdb_symtab_t *symtab) { - if (KDB_DEBUG(AR)) - kdb_printf("kdbgetsymval: symname=%s, symtab=%px\n", symname, - symtab); + kdb_dbg_printf(AR, "symname=%s, symtab=%px\n", symname, symtab); memset(symtab, 0, sizeof(*symtab)); symtab->sym_start = kallsyms_lookup_name(symname); if (symtab->sym_start) { - if (KDB_DEBUG(AR)) - kdb_printf("kdbgetsymval: returns 1, " - "symtab->sym_start=0x%lx\n", - symtab->sym_start); + kdb_dbg_printf(AR, "returns 1, symtab->sym_start=0x%lx\n", + symtab->sym_start); return 1; } - if (KDB_DEBUG(AR)) - kdb_printf("kdbgetsymval: returns 0\n"); + kdb_dbg_printf(AR, "returns 0\n"); return 0; } EXPORT_SYMBOL(kdbgetsymval); @@ -87,16 +82,14 @@ int kdbnearsym(unsigned long addr, kdb_symtab_t *symtab) #define knt1_size 128 /* must be >= kallsyms table size */ char *knt1 = NULL; - if (KDB_DEBUG(AR)) - kdb_printf("kdbnearsym: addr=0x%lx, symtab=%px\n", addr, symtab); + kdb_dbg_printf(AR, "addr=0x%lx, symtab=%px\n", addr, symtab); memset(symtab, 0, sizeof(*symtab)); if (addr < 4096) goto out; knt1 = debug_kmalloc(knt1_size, GFP_ATOMIC); if (!knt1) { - kdb_printf("kdbnearsym: addr=0x%lx cannot kmalloc knt1\n", - addr); + kdb_func_printf("addr=0x%lx cannot kmalloc knt1\n", addr); goto out; } symtab->sym_name = kallsyms_lookup(addr, &symbolsize , &offset, @@ -147,11 +140,8 @@ int kdbnearsym(unsigned long addr, kdb_symtab_t *symtab) if (symtab->mod_name == NULL) symtab->mod_name = "kernel"; - if (KDB_DEBUG(AR)) - kdb_printf("kdbnearsym: returns %d symtab->sym_start=0x%lx, " - "symtab->mod_name=%px, symtab->sym_name=%px (%s)\n", ret, - symtab->sym_start, symtab->mod_name, symtab->sym_name, - symtab->sym_name); + kdb_dbg_printf(AR, "returns %d symtab->sym_start=0x%lx, symtab->mod_name=%px, symtab->sym_name=%px (%s)\n", + ret, symtab->sym_start, symtab->mod_name, symtab->sym_name, symtab->sym_name); out: debug_kfree(knt1); @@ -328,7 +318,7 @@ int kdb_getarea_size(void *res, unsigned long addr, size_t size) int ret = copy_from_kernel_nofault((char *)res, (char *)addr, size); if (ret) { if (!KDB_STATE(SUPPRESS)) { - kdb_printf("kdb_getarea: Bad address 0x%lx\n", addr); + kdb_func_printf("Bad address 0x%lx\n", addr); KDB_STATE_SET(SUPPRESS); } ret = KDB_BADADDR; @@ -353,7 +343,7 @@ int kdb_putarea_size(unsigned long addr, void *res, size_t size) int ret = copy_from_kernel_nofault((char *)addr, (char *)res, size); if (ret) { if (!KDB_STATE(SUPPRESS)) { - kdb_printf("kdb_putarea: Bad address 0x%lx\n", addr); + kdb_func_printf("Bad address 0x%lx\n", addr); KDB_STATE_SET(SUPPRESS); } ret = KDB_BADADDR; @@ -435,7 +425,7 @@ int kdb_getphysword(unsigned long *word, unsigned long addr, size_t size) fallthrough; default: diag = KDB_BADWIDTH; - kdb_printf("kdb_getphysword: bad width %ld\n", (long) size); + kdb_func_printf("bad width %zu\n", size); } return diag; } @@ -484,7 +474,7 @@ int kdb_getword(unsigned long *word, unsigned long addr, size_t size) fallthrough; default: diag = KDB_BADWIDTH; - kdb_printf("kdb_getword: bad width %ld\n", (long) size); + kdb_func_printf("bad width %zu\n", size); } return diag; } @@ -528,7 +518,7 @@ int kdb_putword(unsigned long addr, unsigned long word, size_t size) fallthrough; default: diag = KDB_BADWIDTH; - kdb_printf("kdb_putword: bad width %ld\n", (long) size); + kdb_func_printf("bad width %zu\n", size); } return diag; } @@ -602,8 +592,7 @@ unsigned long kdb_task_state_string(const char *s) res = ~0UL; break; default: - kdb_printf("%s: unknown flag '%c' ignored\n", - __func__, *s); + kdb_func_printf("unknown flag '%c' ignored\n", *s); break; } ++s; @@ -884,18 +873,16 @@ void debug_kusage(void) if (!debug_kusage_one_time) goto out; debug_kusage_one_time = 0; - kdb_printf("%s: debug_kmalloc memory leak dah_first %d\n", - __func__, dah_first); + kdb_func_printf("debug_kmalloc memory leak dah_first %d\n", dah_first); if (dah_first) { h_used = (struct debug_alloc_header *)debug_alloc_pool; - kdb_printf("%s: h_used %px size %d\n", __func__, h_used, - h_used->size); + kdb_func_printf("h_used %px size %d\n", h_used, h_used->size); } do { h_used = (struct debug_alloc_header *) ((char *)h_free + dah_overhead + h_free->size); - kdb_printf("%s: h_used %px size %d caller %px\n", - __func__, h_used, h_used->size, h_used->caller); + kdb_func_printf("h_used %px size %d caller %px\n", + h_used, h_used->size, h_used->caller); h_free = (struct debug_alloc_header *) (debug_alloc_pool + h_free->next); } while (h_free->next); @@ -903,8 +890,8 @@ void debug_kusage(void) ((char *)h_free + dah_overhead + h_free->size); if ((char *)h_used - debug_alloc_pool != sizeof(debug_alloc_pool_aligned)) - kdb_printf("%s: h_used %px size %d caller %px\n", - __func__, h_used, h_used->size, h_used->caller); + kdb_func_printf("h_used %px size %d caller %px\n", + h_used, h_used->size, h_used->caller); out: spin_unlock(&dap_lock); } diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index fe1a4bad3bd8..9c8a7b24e75e 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -737,9 +737,9 @@ static ssize_t devkmsg_read(struct file *file, char __user *buf, logbuf_lock_irq(); } - if (user->seq < prb_first_valid_seq(prb)) { + if (r->info->seq != user->seq) { /* our last seen message is gone, return error and reset */ - user->seq = prb_first_valid_seq(prb); + user->seq = r->info->seq; ret = -EPIPE; logbuf_unlock_irq(); goto out; @@ -814,6 +814,7 @@ static loff_t devkmsg_llseek(struct file *file, loff_t offset, int whence) static __poll_t devkmsg_poll(struct file *file, poll_table *wait) { struct devkmsg_user *user = file->private_data; + struct printk_info info; __poll_t ret = 0; if (!user) @@ -822,9 +823,9 @@ static __poll_t devkmsg_poll(struct file *file, poll_table *wait) poll_wait(file, &log_wait, wait); logbuf_lock_irq(); - if (prb_read_valid(prb, user->seq, NULL)) { + if (prb_read_valid_info(prb, user->seq, &info, NULL)) { /* return error when data has vanished underneath us */ - if (user->seq < prb_first_valid_seq(prb)) + if (info.seq != user->seq) ret = EPOLLIN|EPOLLRDNORM|EPOLLERR|EPOLLPRI; else ret = EPOLLIN|EPOLLRDNORM; @@ -1561,6 +1562,7 @@ static void syslog_clear(void) int do_syslog(int type, char __user *buf, int len, int source) { + struct printk_info info; bool clear = false; static int saved_console_loglevel = LOGLEVEL_DEFAULT; int error; @@ -1631,9 +1633,14 @@ int do_syslog(int type, char __user *buf, int len, int source) /* Number of chars in the log buffer */ case SYSLOG_ACTION_SIZE_UNREAD: logbuf_lock_irq(); - if (syslog_seq < prb_first_valid_seq(prb)) { + if (!prb_read_valid_info(prb, syslog_seq, &info, NULL)) { + /* No unread messages. */ + logbuf_unlock_irq(); + return 0; + } + if (info.seq != syslog_seq) { /* messages are gone, move to first one */ - syslog_seq = prb_first_valid_seq(prb); + syslog_seq = info.seq; syslog_partial = 0; } if (source == SYSLOG_FROM_PROC) { @@ -1645,7 +1652,6 @@ int do_syslog(int type, char __user *buf, int len, int source) error = prb_next_seq(prb) - syslog_seq; } else { bool time = syslog_partial ? syslog_time : printk_time; - struct printk_info info; unsigned int line_count; u64 seq; @@ -3438,9 +3444,11 @@ bool kmsg_dump_get_buffer(struct kmsg_dumper *dumper, bool syslog, goto out; logbuf_lock_irqsave(flags); - if (dumper->cur_seq < prb_first_valid_seq(prb)) { - /* messages are gone, move to first available one */ - dumper->cur_seq = prb_first_valid_seq(prb); + if (prb_read_valid_info(prb, dumper->cur_seq, &info, NULL)) { + if (info.seq != dumper->cur_seq) { + /* messages are gone, move to first available one */ + dumper->cur_seq = info.seq; + } } /* last entry */ diff --git a/kernel/printk/printk_ringbuffer.h b/kernel/printk/printk_ringbuffer.h index 5dc9d022db07..73cc80e01cef 100644 --- a/kernel/printk/printk_ringbuffer.h +++ b/kernel/printk/printk_ringbuffer.h @@ -287,7 +287,7 @@ _DEFINE_PRINTKRB(name, descbits, avgtextbits, &_##name##_text[0]) /* Writer Interface */ /** - * prb_rec_init_wd() - Initialize a buffer for writing records. + * prb_rec_init_wr() - Initialize a buffer for writing records. * * @r: The record to initialize. * @text_buf_size: The needed text buffer size. diff --git a/kernel/printk/printk_safe.c b/kernel/printk/printk_safe.c index a0e6f746de6c..2e9e3ed7d63e 100644 --- a/kernel/printk/printk_safe.c +++ b/kernel/printk/printk_safe.c @@ -45,6 +45,8 @@ struct printk_safe_seq_buf { static DEFINE_PER_CPU(struct printk_safe_seq_buf, safe_print_seq); static DEFINE_PER_CPU(int, printk_context); +static DEFINE_RAW_SPINLOCK(safe_read_lock); + #ifdef CONFIG_PRINTK_NMI static DEFINE_PER_CPU(struct printk_safe_seq_buf, nmi_print_seq); #endif @@ -180,8 +182,6 @@ static void report_message_lost(struct printk_safe_seq_buf *s) */ static void __printk_safe_flush(struct irq_work *work) { - static raw_spinlock_t read_lock = - __RAW_SPIN_LOCK_INITIALIZER(read_lock); struct printk_safe_seq_buf *s = container_of(work, struct printk_safe_seq_buf, work); unsigned long flags; @@ -195,7 +195,7 @@ static void __printk_safe_flush(struct irq_work *work) * different CPUs. This is especially important when printing * a backtrace. */ - raw_spin_lock_irqsave(&read_lock, flags); + raw_spin_lock_irqsave(&safe_read_lock, flags); i = 0; more: @@ -232,7 +232,7 @@ static void __printk_safe_flush(struct irq_work *work) out: report_message_lost(s); - raw_spin_unlock_irqrestore(&read_lock, flags); + raw_spin_unlock_irqrestore(&safe_read_lock, flags); } /** @@ -278,6 +278,14 @@ void printk_safe_flush_on_panic(void) raw_spin_lock_init(&logbuf_lock); } + if (raw_spin_is_locked(&safe_read_lock)) { + if (num_online_cpus() > 1) + return; + + debug_locks_off(); + raw_spin_lock_init(&safe_read_lock); + } + printk_safe_flush(); } diff --git a/lib/crc7.c b/lib/crc7.c index 6a848d73e804..3848e313b722 100644 --- a/lib/crc7.c +++ b/lib/crc7.c @@ -51,7 +51,7 @@ const u8 crc7_be_syndrome_table[256] = { EXPORT_SYMBOL(crc7_be_syndrome_table); /** - * crc7 - update the CRC7 for the data buffer + * crc7_be - update the CRC7 for the data buffer * @crc: previous CRC7 value * @buffer: data pointer * @len: number of bytes in the buffer diff --git a/lib/kunit/Kconfig b/lib/kunit/Kconfig index 00909e6a2443..0b5dfb001bac 100644 --- a/lib/kunit/Kconfig +++ b/lib/kunit/Kconfig @@ -4,6 +4,7 @@ menuconfig KUNIT tristate "KUnit - Enable support for unit tests" + select GLOB if KUNIT=y help Enables support for kernel unit tests (KUnit), a lightweight unit testing and mocking framework for the Linux kernel. These tests are diff --git a/lib/kunit/assert.c b/lib/kunit/assert.c index 33acdaa28a7d..e0ec7d6fed6f 100644 --- a/lib/kunit/assert.c +++ b/lib/kunit/assert.c @@ -85,6 +85,29 @@ void kunit_ptr_not_err_assert_format(const struct kunit_assert *assert, } EXPORT_SYMBOL_GPL(kunit_ptr_not_err_assert_format); +/* Checks if `text` is a literal representing `value`, e.g. "5" and 5 */ +static bool is_literal(struct kunit *test, const char *text, long long value, + gfp_t gfp) +{ + char *buffer; + int len; + bool ret; + + len = snprintf(NULL, 0, "%lld", value); + if (strlen(text) != len) + return false; + + buffer = kunit_kmalloc(test, len+1, gfp); + if (!buffer) + return false; + + snprintf(buffer, len+1, "%lld", value); + ret = strncmp(buffer, text, len) == 0; + + kunit_kfree(test, buffer); + return ret; +} + void kunit_binary_assert_format(const struct kunit_assert *assert, struct string_stream *stream) { @@ -97,12 +120,16 @@ void kunit_binary_assert_format(const struct kunit_assert *assert, binary_assert->left_text, binary_assert->operation, binary_assert->right_text); - string_stream_add(stream, KUNIT_SUBSUBTEST_INDENT "%s == %lld\n", - binary_assert->left_text, - binary_assert->left_value); - string_stream_add(stream, KUNIT_SUBSUBTEST_INDENT "%s == %lld", - binary_assert->right_text, - binary_assert->right_value); + if (!is_literal(stream->test, binary_assert->left_text, + binary_assert->left_value, stream->gfp)) + string_stream_add(stream, KUNIT_SUBSUBTEST_INDENT "%s == %lld\n", + binary_assert->left_text, + binary_assert->left_value); + if (!is_literal(stream->test, binary_assert->right_text, + binary_assert->right_value, stream->gfp)) + string_stream_add(stream, KUNIT_SUBSUBTEST_INDENT "%s == %lld", + binary_assert->right_text, + binary_assert->right_value); kunit_assert_print_msg(assert, stream); } EXPORT_SYMBOL_GPL(kunit_binary_assert_format); diff --git a/lib/kunit/executor.c b/lib/kunit/executor.c index a95742a4ece7..15832ed44668 100644 --- a/lib/kunit/executor.c +++ b/lib/kunit/executor.c @@ -1,6 +1,8 @@ // SPDX-License-Identifier: GPL-2.0 #include +#include +#include /* * These symbols point to the .kunit_test_suites section and are defined in @@ -11,14 +13,81 @@ extern struct kunit_suite * const * const __kunit_suites_end[]; #if IS_BUILTIN(CONFIG_KUNIT) -static void kunit_print_tap_header(void) +static char *filter_glob; +module_param(filter_glob, charp, 0); +MODULE_PARM_DESC(filter_glob, + "Filter which KUnit test suites run at boot-time, e.g. list*"); + +static struct kunit_suite * const * +kunit_filter_subsuite(struct kunit_suite * const * const subsuite) +{ + int i, n = 0; + struct kunit_suite **filtered; + + n = 0; + for (i = 0; subsuite[i] != NULL; ++i) { + if (glob_match(filter_glob, subsuite[i]->name)) + ++n; + } + + if (n == 0) + return NULL; + + filtered = kmalloc_array(n + 1, sizeof(*filtered), GFP_KERNEL); + if (!filtered) + return NULL; + + n = 0; + for (i = 0; subsuite[i] != NULL; ++i) { + if (glob_match(filter_glob, subsuite[i]->name)) + filtered[n++] = subsuite[i]; + } + filtered[n] = NULL; + + return filtered; +} + +struct suite_set { + struct kunit_suite * const * const *start; + struct kunit_suite * const * const *end; +}; + +static struct suite_set kunit_filter_suites(void) +{ + int i; + struct kunit_suite * const **copy, * const *filtered_subsuite; + struct suite_set filtered; + + const size_t max = __kunit_suites_end - __kunit_suites_start; + + if (!filter_glob) { + filtered.start = __kunit_suites_start; + filtered.end = __kunit_suites_end; + return filtered; + } + + copy = kmalloc_array(max, sizeof(*filtered.start), GFP_KERNEL); + filtered.start = copy; + if (!copy) { /* won't be able to run anything, return an empty set */ + filtered.end = copy; + return filtered; + } + + for (i = 0; i < max; ++i) { + filtered_subsuite = kunit_filter_subsuite(__kunit_suites_start[i]); + if (filtered_subsuite) + *copy++ = filtered_subsuite; + } + filtered.end = copy; + return filtered; +} + +static void kunit_print_tap_header(struct suite_set *suite_set) { struct kunit_suite * const * const *suites, * const *subsuite; int num_of_suites = 0; - for (suites = __kunit_suites_start; - suites < __kunit_suites_end; - suites++) + for (suites = suite_set->start; suites < suite_set->end; suites++) for (subsuite = *suites; *subsuite != NULL; subsuite++) num_of_suites++; @@ -30,12 +99,18 @@ int kunit_run_all_tests(void) { struct kunit_suite * const * const *suites; - kunit_print_tap_header(); + struct suite_set suite_set = kunit_filter_suites(); - for (suites = __kunit_suites_start; - suites < __kunit_suites_end; - suites++) - __kunit_test_suites_init(*suites); + kunit_print_tap_header(&suite_set); + + for (suites = suite_set.start; suites < suite_set.end; suites++) + __kunit_test_suites_init(*suites); + + if (filter_glob) { /* a copy was made of each array */ + for (suites = suite_set.start; suites < suite_set.end; suites++) + kfree(*suites); + kfree(suite_set.start); + } return 0; } diff --git a/lib/test_bitmap.c b/lib/test_bitmap.c index 4425a1dd4ef1..0ea0e8258f14 100644 --- a/lib/test_bitmap.c +++ b/lib/test_bitmap.c @@ -16,8 +16,7 @@ #include "../tools/testing/selftests/kselftest_module.h" -static unsigned total_tests __initdata; -static unsigned failed_tests __initdata; +KSTM_MODULE_GLOBALS(); static char pbl_buffer[PAGE_SIZE] __initdata; diff --git a/lib/test_printf.c b/lib/test_printf.c index 7d60f24240a4..95a2f82427c7 100644 --- a/lib/test_printf.c +++ b/lib/test_printf.c @@ -30,11 +30,13 @@ #define PAD_SIZE 16 #define FILL_CHAR '$' -static unsigned total_tests __initdata; -static unsigned failed_tests __initdata; +KSTM_MODULE_GLOBALS(); + static char *test_buffer __initdata; static char *alloced_buffer __initdata; +extern bool no_hash_pointers; + static int __printf(4, 0) __init do_test(int bufsize, const char *expect, int elen, const char *fmt, va_list ap) @@ -301,6 +303,12 @@ plain(void) { int err; + if (no_hash_pointers) { + pr_warn("skipping plain 'p' tests"); + skipped_tests += 2; + return; + } + err = plain_hash(); if (err) { pr_warn("plain 'p' does not appear to be hashed\n"); diff --git a/lib/vsprintf.c b/lib/vsprintf.c index 3b53c73580c5..41ddc353ebb8 100644 --- a/lib/vsprintf.c +++ b/lib/vsprintf.c @@ -2090,6 +2090,32 @@ char *fwnode_string(char *buf, char *end, struct fwnode_handle *fwnode, return widen_string(buf, buf - buf_start, end, spec); } +/* Disable pointer hashing if requested */ +bool no_hash_pointers __ro_after_init; +EXPORT_SYMBOL_GPL(no_hash_pointers); + +static int __init no_hash_pointers_enable(char *str) +{ + no_hash_pointers = true; + + pr_warn("**********************************************************\n"); + pr_warn("** NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE **\n"); + pr_warn("** **\n"); + pr_warn("** This system shows unhashed kernel memory addresses **\n"); + pr_warn("** via the console, logs, and other interfaces. This **\n"); + pr_warn("** might reduce the security of your system. **\n"); + pr_warn("** **\n"); + pr_warn("** If you see this message and you are not debugging **\n"); + pr_warn("** the kernel, report this immediately to your system **\n"); + pr_warn("** administrator! **\n"); + pr_warn("** **\n"); + pr_warn("** NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE **\n"); + pr_warn("**********************************************************\n"); + + return 0; +} +early_param("no_hash_pointers", no_hash_pointers_enable); + /* * Show a '%p' thing. A kernel extension is that the '%p' is followed * by an extra set of alphanumeric characters that are extended format @@ -2297,8 +2323,14 @@ char *pointer(const char *fmt, char *buf, char *end, void *ptr, } } - /* default is to _not_ leak addresses, hash before printing */ - return ptr_to_id(buf, end, ptr, spec); + /* + * default is to _not_ leak addresses, so hash before printing, + * unless no_hash_pointers is specified on the command line. + */ + if (unlikely(no_hash_pointers)) + return pointer_string(buf, end, ptr, spec); + else + return ptr_to_id(buf, end, ptr, spec); } /* diff --git a/mm/memblock.c b/mm/memblock.c index 4594307cf987..ab267a3316ac 100644 --- a/mm/memblock.c +++ b/mm/memblock.c @@ -2051,10 +2051,8 @@ void __init reset_all_zones_managed_pages(void) /** * memblock_free_all - release free pages to the buddy allocator - * - * Return: the number of pages actually released. */ -unsigned long __init memblock_free_all(void) +void __init memblock_free_all(void) { unsigned long pages; @@ -2063,8 +2061,6 @@ unsigned long __init memblock_free_all(void) pages = free_low_memory_core_early(); totalram_pages_add(pages); - - return pages; } #if defined(CONFIG_DEBUG_FS) && defined(CONFIG_ARCH_KEEP_MEMBLOCK) diff --git a/samples/kprobes/kprobe_example.c b/samples/kprobes/kprobe_example.c index 365905cb24b1..331dcf151532 100644 --- a/samples/kprobes/kprobe_example.c +++ b/samples/kprobes/kprobe_example.c @@ -1,6 +1,5 @@ // SPDX-License-Identifier: GPL-2.0-only /* - * NOTE: This example is works on x86 and powerpc. * Here's a sample kernel module showing the use of kprobes to dump a * stack trace and selected registers when kernel_clone() is called. * @@ -44,6 +43,10 @@ static int __kprobes handler_pre(struct kprobe *p, struct pt_regs *regs) " pstate = 0x%lx\n", p->symbol_name, p->addr, (long)regs->pc, (long)regs->pstate); #endif +#ifdef CONFIG_ARM + pr_info("<%s> pre_handler: p->addr = 0x%p, pc = 0x%lx, cpsr = 0x%lx\n", + p->symbol_name, p->addr, (long)regs->ARM_pc, (long)regs->ARM_cpsr); +#endif #ifdef CONFIG_S390 pr_info("<%s> pre_handler: p->addr, 0x%p, ip = 0x%lx, flags = 0x%lx\n", p->symbol_name, p->addr, regs->psw.addr, regs->flags); @@ -73,6 +76,10 @@ static void __kprobes handler_post(struct kprobe *p, struct pt_regs *regs, pr_info("<%s> post_handler: p->addr = 0x%p, pstate = 0x%lx\n", p->symbol_name, p->addr, (long)regs->pstate); #endif +#ifdef CONFIG_ARM + pr_info("<%s> post_handler: p->addr = 0x%p, cpsr = 0x%lx\n", + p->symbol_name, p->addr, (long)regs->ARM_cpsr); +#endif #ifdef CONFIG_S390 pr_info("<%s> pre_handler: p->addr, 0x%p, flags = 0x%lx\n", p->symbol_name, p->addr, regs->flags); diff --git a/scripts/kernel-doc b/scripts/kernel-doc index 6325bec3f66f..e046e16e4411 100755 --- a/scripts/kernel-doc +++ b/scripts/kernel-doc @@ -382,6 +382,9 @@ my $inline_doc_state; # 'function', 'struct', 'union', 'enum', 'typedef' my $decl_type; +# Name of the kernel-doc identifier for non-DOC markups +my $identifier; + my $doc_start = '^/\*\*\s*$'; # Allow whitespace at end of comment start. my $doc_end = '\*/'; my $doc_com = '\s*\*\s*'; @@ -833,6 +836,7 @@ sub output_blockhead_rst(%) { next if (defined($nosymbol_table{$section})); if ($output_selection != OUTPUT_INCLUDE) { + print ".. _$section:\n\n"; print "**$section**\n\n"; } print_lineno($section_start_lines{$section}); @@ -1203,6 +1207,11 @@ sub dump_struct($$) { $declaration_name = $2; my $members = $3; + if ($identifier ne $declaration_name) { + print STDERR "${file}:$.: warning: expecting prototype for $decl_type $identifier. Prototype was for $decl_type $declaration_name instead\n"; + return; + } + # ignore members marked private: $members =~ s/\/\*\s*private:.*?\/\*\s*public:.*?\*\///gosi; $members =~ s/\/\*\s*private:.*//gosi; @@ -1391,6 +1400,11 @@ sub dump_enum($$) { } if ($members) { + if ($identifier ne $declaration_name) { + print STDERR "${file}:$.: warning: expecting prototype for enum $identifier. Prototype was for enum $declaration_name instead\n"; + return; + } + my %_members; $members =~ s/\s+$//; @@ -1451,6 +1465,11 @@ sub dump_typedef($$) { my $args = $3; $return_type =~ s/^\s+//; + if ($identifier ne $declaration_name) { + print STDERR "${file}:$.: warning: expecting prototype for typedef $identifier. Prototype was for typedef $declaration_name instead\n"; + return; + } + create_parameterlist($args, ',', $file, $declaration_name); output_declaration($declaration_name, @@ -1477,6 +1496,11 @@ sub dump_typedef($$) { if ($x =~ /typedef.*\s+(\w+)\s*;/) { $declaration_name = $1; + if ($identifier ne $declaration_name) { + print STDERR "${file}:$.: warning: expecting prototype for typedef $identifier. Prototype was for typedef $declaration_name instead\n"; + return; + } + output_declaration($declaration_name, 'typedef', {'typedef' => $declaration_name, @@ -1796,6 +1820,11 @@ sub dump_function($$) { return; } + if ($identifier ne $declaration_name) { + print STDERR "${file}:$.: warning: expecting prototype for $identifier(). Prototype was for $declaration_name() instead\n"; + return; + } + my $prms = join " ", @parameterlist; check_sections($file, $declaration_name, "function", $sectcheck, $prms); @@ -1878,6 +1907,7 @@ sub tracepoint_munge($) { "$prototype\n"; } else { $prototype = "static inline void trace_$tracepointname($tracepointargs)"; + $identifier = "trace_$identifier"; } } @@ -2041,7 +2071,6 @@ sub process_normal() { # sub process_name($$) { my $file = shift; - my $identifier; my $descr; if (/$doc_block/o) { @@ -2054,12 +2083,19 @@ sub process_name($$) { } else { $section = $1; } - } - elsif (/$doc_decl/o) { + } elsif (/$doc_decl/o) { $identifier = $1; - if (/\s*([\w\s]+?)(\(\))?\s*-/) { + if (/\s*([\w\s]+?)(\(\))?\s*([-:].*)?$/) { $identifier = $1; } + if ($identifier =~ m/^(struct|union|enum|typedef)\b\s*(\S*)/) { + $decl_type = $1; + $identifier = $2; + } else { + $decl_type = 'function'; + $identifier =~ s/^define\s+//; + } + $identifier =~ s/\s+$//; $state = STATE_BODY; # if there's no @param blocks need to set up default section @@ -2067,7 +2103,7 @@ sub process_name($$) { $contents = ""; $section = $section_default; $new_start_line = $. + 1; - if (/-(.*)/) { + if (/[-:](.*)/) { # strip leading/trailing/multiple spaces $descr= $1; $descr =~ s/^\s*//; @@ -2085,20 +2121,15 @@ sub process_name($$) { ++$warnings; } - if ($identifier =~ m/^struct\b/) { - $decl_type = 'struct'; - } elsif ($identifier =~ m/^union\b/) { - $decl_type = 'union'; - } elsif ($identifier =~ m/^enum\b/) { - $decl_type = 'enum'; - } elsif ($identifier =~ m/^typedef\b/) { - $decl_type = 'typedef'; - } else { - $decl_type = 'function'; + if ($identifier eq "") { + print STDERR "${file}:$.: warning: wrong kernel-doc identifier on line:\n"; + print STDERR $_; + ++$warnings; + $state = STATE_NORMAL; } if ($verbose) { - print STDERR "${file}:$.: info: Scanning doc for $identifier\n"; + print STDERR "${file}:$.: info: Scanning doc for $decl_type $identifier\n"; } } else { print STDERR "${file}:$.: warning: Cannot understand $_ on line $.", diff --git a/scripts/sphinx-pre-install b/scripts/sphinx-pre-install index 40fa6923e80a..6632da76fbb0 100755 --- a/scripts/sphinx-pre-install +++ b/scripts/sphinx-pre-install @@ -728,8 +728,8 @@ sub check_needs() $need_virtualenv = 1; } if ($1 < 3) { - # Complain if it finds python2 (or worse) - printf "Warning: python$1 support is deprecated. Use it with caution!\n"; + # Fail if it finds python2 (or worse) + die "Python 3 is required to build the kernel docs\n"; } } else { die "Warning: couldn't identify $python_cmd version!"; diff --git a/tools/testing/kunit/kunit.py b/tools/testing/kunit/kunit.py index b58fb3733cfa..21d234d5a15e 100755 --- a/tools/testing/kunit/kunit.py +++ b/tools/testing/kunit/kunit.py @@ -28,12 +28,12 @@ KunitBuildRequest = namedtuple('KunitBuildRequest', ['jobs', 'build_dir', 'alltests', 'make_options']) KunitExecRequest = namedtuple('KunitExecRequest', - ['timeout', 'build_dir', 'alltests']) + ['timeout', 'build_dir', 'alltests', 'filter_glob']) KunitParseRequest = namedtuple('KunitParseRequest', ['raw_output', 'input_data', 'build_dir', 'json']) KunitRequest = namedtuple('KunitRequest', ['raw_output','timeout', 'jobs', - 'build_dir', 'alltests', 'json', - 'make_options']) + 'build_dir', 'alltests', 'filter_glob', + 'json', 'make_options']) KernelDirectoryPath = sys.argv[0].split('tools/testing/kunit/')[0] @@ -93,6 +93,7 @@ def exec_tests(linux: kunit_kernel.LinuxSourceTree, test_start = time.time() result = linux.run_kernel( timeout=None if request.alltests else request.timeout, + filter_glob=request.filter_glob, build_dir=request.build_dir) test_end = time.time() @@ -149,7 +150,7 @@ def run_tests(linux: kunit_kernel.LinuxSourceTree, return build_result exec_request = KunitExecRequest(request.timeout, request.build_dir, - request.alltests) + request.alltests, request.filter_glob) exec_result = exec_tests(linux, exec_request) if exec_result.status != KunitStatus.SUCCESS: return exec_result @@ -182,6 +183,9 @@ def add_common_opts(parser) -> None: parser.add_argument('--alltests', help='Run all KUnit tests through allyesconfig', action='store_true') + parser.add_argument('--kunitconfig', + help='Path to Kconfig fragment that enables KUnit tests', + metavar='kunitconfig') def add_build_opts(parser) -> None: parser.add_argument('--jobs', @@ -197,6 +201,14 @@ def add_exec_opts(parser) -> None: type=int, default=300, metavar='timeout') + parser.add_argument('filter_glob', + help='maximum number of seconds to allow for all tests ' + 'to run. This does not include time taken to build the ' + 'tests.', + type=str, + nargs='?', + default='', + metavar='filter_glob') def add_parse_opts(parser) -> None: parser.add_argument('--raw_output', help='don\'t format output from kernel', @@ -256,13 +268,14 @@ def main(argv, linux=None): os.mkdir(cli_args.build_dir) if not linux: - linux = kunit_kernel.LinuxSourceTree(cli_args.build_dir) + linux = kunit_kernel.LinuxSourceTree(cli_args.build_dir, kunitconfig_path=cli_args.kunitconfig) request = KunitRequest(cli_args.raw_output, cli_args.timeout, cli_args.jobs, cli_args.build_dir, cli_args.alltests, + cli_args.filter_glob, cli_args.json, cli_args.make_options) result = run_tests(linux, request) @@ -274,7 +287,7 @@ def main(argv, linux=None): os.mkdir(cli_args.build_dir) if not linux: - linux = kunit_kernel.LinuxSourceTree(cli_args.build_dir) + linux = kunit_kernel.LinuxSourceTree(cli_args.build_dir, kunitconfig_path=cli_args.kunitconfig) request = KunitConfigRequest(cli_args.build_dir, cli_args.make_options) @@ -286,7 +299,7 @@ def main(argv, linux=None): sys.exit(1) elif cli_args.subcommand == 'build': if not linux: - linux = kunit_kernel.LinuxSourceTree(cli_args.build_dir) + linux = kunit_kernel.LinuxSourceTree(cli_args.build_dir, kunitconfig_path=cli_args.kunitconfig) request = KunitBuildRequest(cli_args.jobs, cli_args.build_dir, @@ -304,7 +317,8 @@ def main(argv, linux=None): exec_request = KunitExecRequest(cli_args.timeout, cli_args.build_dir, - cli_args.alltests) + cli_args.alltests, + cli_args.filter_glob) exec_result = exec_tests(linux, exec_request) parse_request = KunitParseRequest(cli_args.raw_output, exec_result.result, diff --git a/tools/testing/kunit/kunit_config.py b/tools/testing/kunit/kunit_config.py index bdd60230764b..0b550cbd667d 100644 --- a/tools/testing/kunit/kunit_config.py +++ b/tools/testing/kunit/kunit_config.py @@ -41,15 +41,14 @@ class Kconfig(object): self._entries.append(entry) def is_subset_of(self, other: 'Kconfig') -> bool: + other_dict = {e.name: e.value for e in other.entries()} for a in self.entries(): - found = False - for b in other.entries(): - if a.name != b.name: + b = other_dict.get(a.name) + if b is None: + if a.value == 'n': continue - if a.value != b.value: - return False - found = True - if a.value != 'n' and found == False: + return False + elif a.value != b: return False return True diff --git a/tools/testing/kunit/kunit_kernel.py b/tools/testing/kunit/kunit_kernel.py index 2076a5a2d060..f309a33256cd 100644 --- a/tools/testing/kunit/kunit_kernel.py +++ b/tools/testing/kunit/kunit_kernel.py @@ -123,7 +123,7 @@ def get_outfile_path(build_dir) -> str: class LinuxSourceTree(object): """Represents a Linux kernel source tree with KUnit tests.""" - def __init__(self, build_dir: str, load_config=True, defconfig=DEFAULT_KUNITCONFIG_PATH) -> None: + def __init__(self, build_dir: str, load_config=True, kunitconfig_path='') -> None: signal.signal(signal.SIGINT, self.signal_handler) self._ops = LinuxSourceTreeOperations() @@ -131,9 +131,13 @@ class LinuxSourceTree(object): if not load_config: return - kunitconfig_path = get_kunitconfig_path(build_dir) - if not os.path.exists(kunitconfig_path): - shutil.copyfile(defconfig, kunitconfig_path) + if kunitconfig_path: + if not os.path.exists(kunitconfig_path): + raise ConfigError(f'Specified kunitconfig ({kunitconfig_path}) does not exist') + else: + kunitconfig_path = get_kunitconfig_path(build_dir) + if not os.path.exists(kunitconfig_path): + shutil.copyfile(DEFAULT_KUNITCONFIG_PATH, kunitconfig_path) self._kconfig = kunit_config.Kconfig() self._kconfig.read_from_file(kunitconfig_path) @@ -199,8 +203,12 @@ class LinuxSourceTree(object): return False return self.validate_config(build_dir) - def run_kernel(self, args=[], build_dir='', timeout=None) -> Iterator[str]: + def run_kernel(self, args=None, build_dir='', filter_glob='', timeout=None) -> Iterator[str]: + if not args: + args = [] args.extend(['mem=1G', 'console=tty']) + if filter_glob: + args.append('kunit.filter_glob='+filter_glob) self._ops.linux_bin(args, timeout, build_dir) outfile = get_outfile_path(build_dir) subprocess.call(['stty', 'sane']) diff --git a/tools/testing/kunit/kunit_tool_test.py b/tools/testing/kunit/kunit_tool_test.py index 497ab51bc170..d04906d6ac40 100755 --- a/tools/testing/kunit/kunit_tool_test.py +++ b/tools/testing/kunit/kunit_tool_test.py @@ -12,6 +12,7 @@ from unittest import mock import tempfile, shutil # Handling test_tmpdir import json +import signal import os import kunit_config @@ -21,16 +22,18 @@ import kunit_json import kunit test_tmpdir = '' +abs_test_data_dir = '' def setUpModule(): - global test_tmpdir + global test_tmpdir, abs_test_data_dir test_tmpdir = tempfile.mkdtemp() + abs_test_data_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), 'test_data')) def tearDownModule(): shutil.rmtree(test_tmpdir) -def get_absolute_path(path): - return os.path.join(os.path.dirname(__file__), path) +def test_data_path(path): + return os.path.join(abs_test_data_dir, path) class KconfigTest(unittest.TestCase): @@ -46,8 +49,7 @@ class KconfigTest(unittest.TestCase): def test_read_from_file(self): kconfig = kunit_config.Kconfig() - kconfig_path = get_absolute_path( - 'test_data/test_read_from_file.kconfig') + kconfig_path = test_data_path('test_read_from_file.kconfig') kconfig.read_from_file(kconfig_path) @@ -98,21 +100,18 @@ class KUnitParserTest(unittest.TestCase): str(needle) + '" not found in "' + str(haystack) + '"!') def test_output_isolated_correctly(self): - log_path = get_absolute_path( - 'test_data/test_output_isolated_correctly.log') - file = open(log_path) - result = kunit_parser.isolate_kunit_output(file.readlines()) + log_path = test_data_path('test_output_isolated_correctly.log') + with open(log_path) as file: + result = kunit_parser.isolate_kunit_output(file.readlines()) self.assertContains('TAP version 14', result) self.assertContains(' # Subtest: example', result) self.assertContains(' 1..2', result) self.assertContains(' ok 1 - example_simple_test', result) self.assertContains(' ok 2 - example_mock_test', result) self.assertContains('ok 1 - example', result) - file.close() def test_output_with_prefix_isolated_correctly(self): - log_path = get_absolute_path( - 'test_data/test_pound_sign.log') + log_path = test_data_path('test_pound_sign.log') with open(log_path) as file: result = kunit_parser.isolate_kunit_output(file.readlines()) self.assertContains('TAP version 14', result) @@ -141,61 +140,51 @@ class KUnitParserTest(unittest.TestCase): self.assertContains('ok 3 - string-stream-test', result) def test_parse_successful_test_log(self): - all_passed_log = get_absolute_path( - 'test_data/test_is_test_passed-all_passed.log') - file = open(all_passed_log) - result = kunit_parser.parse_run_tests(file.readlines()) + all_passed_log = test_data_path('test_is_test_passed-all_passed.log') + with open(all_passed_log) as file: + result = kunit_parser.parse_run_tests(file.readlines()) self.assertEqual( kunit_parser.TestStatus.SUCCESS, result.status) - file.close() def test_parse_failed_test_log(self): - failed_log = get_absolute_path( - 'test_data/test_is_test_passed-failure.log') - file = open(failed_log) - result = kunit_parser.parse_run_tests(file.readlines()) + failed_log = test_data_path('test_is_test_passed-failure.log') + with open(failed_log) as file: + result = kunit_parser.parse_run_tests(file.readlines()) self.assertEqual( kunit_parser.TestStatus.FAILURE, result.status) - file.close() def test_no_tests(self): - empty_log = get_absolute_path( - 'test_data/test_is_test_passed-no_tests_run.log') - file = open(empty_log) - result = kunit_parser.parse_run_tests( - kunit_parser.isolate_kunit_output(file.readlines())) + empty_log = test_data_path('test_is_test_passed-no_tests_run.log') + with open(empty_log) as file: + result = kunit_parser.parse_run_tests( + kunit_parser.isolate_kunit_output(file.readlines())) self.assertEqual(0, len(result.suites)) self.assertEqual( kunit_parser.TestStatus.NO_TESTS, result.status) - file.close() def test_no_kunit_output(self): - crash_log = get_absolute_path( - 'test_data/test_insufficient_memory.log') - file = open(crash_log) + crash_log = test_data_path('test_insufficient_memory.log') print_mock = mock.patch('builtins.print').start() - result = kunit_parser.parse_run_tests( - kunit_parser.isolate_kunit_output(file.readlines())) + with open(crash_log) as file: + result = kunit_parser.parse_run_tests( + kunit_parser.isolate_kunit_output(file.readlines())) print_mock.assert_any_call(StrContains('no tests run!')) print_mock.stop() file.close() def test_crashed_test(self): - crashed_log = get_absolute_path( - 'test_data/test_is_test_passed-crash.log') - file = open(crashed_log) - result = kunit_parser.parse_run_tests(file.readlines()) + crashed_log = test_data_path('test_is_test_passed-crash.log') + with open(crashed_log) as file: + result = kunit_parser.parse_run_tests(file.readlines()) self.assertEqual( kunit_parser.TestStatus.TEST_CRASHED, result.status) - file.close() def test_ignores_prefix_printk_time(self): - prefix_log = get_absolute_path( - 'test_data/test_config_printk_time.log') + prefix_log = test_data_path('test_config_printk_time.log') with open(prefix_log) as file: result = kunit_parser.parse_run_tests(file.readlines()) self.assertEqual( @@ -204,8 +193,7 @@ class KUnitParserTest(unittest.TestCase): self.assertEqual('kunit-resource-test', result.suites[0].name) def test_ignores_multiple_prefixes(self): - prefix_log = get_absolute_path( - 'test_data/test_multiple_prefixes.log') + prefix_log = test_data_path('test_multiple_prefixes.log') with open(prefix_log) as file: result = kunit_parser.parse_run_tests(file.readlines()) self.assertEqual( @@ -214,8 +202,7 @@ class KUnitParserTest(unittest.TestCase): self.assertEqual('kunit-resource-test', result.suites[0].name) def test_prefix_mixed_kernel_output(self): - mixed_prefix_log = get_absolute_path( - 'test_data/test_interrupted_tap_output.log') + mixed_prefix_log = test_data_path('test_interrupted_tap_output.log') with open(mixed_prefix_log) as file: result = kunit_parser.parse_run_tests(file.readlines()) self.assertEqual( @@ -224,7 +211,7 @@ class KUnitParserTest(unittest.TestCase): self.assertEqual('kunit-resource-test', result.suites[0].name) def test_prefix_poundsign(self): - pound_log = get_absolute_path('test_data/test_pound_sign.log') + pound_log = test_data_path('test_pound_sign.log') with open(pound_log) as file: result = kunit_parser.parse_run_tests(file.readlines()) self.assertEqual( @@ -233,7 +220,7 @@ class KUnitParserTest(unittest.TestCase): self.assertEqual('kunit-resource-test', result.suites[0].name) def test_kernel_panic_end(self): - panic_log = get_absolute_path('test_data/test_kernel_panic_interrupt.log') + panic_log = test_data_path('test_kernel_panic_interrupt.log') with open(panic_log) as file: result = kunit_parser.parse_run_tests(file.readlines()) self.assertEqual( @@ -242,7 +229,7 @@ class KUnitParserTest(unittest.TestCase): self.assertEqual('kunit-resource-test', result.suites[0].name) def test_pound_no_prefix(self): - pound_log = get_absolute_path('test_data/test_pound_no_prefix.log') + pound_log = test_data_path('test_pound_no_prefix.log') with open(pound_log) as file: result = kunit_parser.parse_run_tests(file.readlines()) self.assertEqual( @@ -250,10 +237,27 @@ class KUnitParserTest(unittest.TestCase): result.status) self.assertEqual('kunit-resource-test', result.suites[0].name) +class LinuxSourceTreeTest(unittest.TestCase): + + def setUp(self): + mock.patch.object(signal, 'signal').start() + self.addCleanup(mock.patch.stopall) + + def test_invalid_kunitconfig(self): + with self.assertRaisesRegex(kunit_kernel.ConfigError, 'nonexistent.* does not exist'): + kunit_kernel.LinuxSourceTree('', kunitconfig_path='/nonexistent_file') + + def test_valid_kunitconfig(self): + with tempfile.NamedTemporaryFile('wt') as kunitconfig: + tree = kunit_kernel.LinuxSourceTree('', kunitconfig_path=kunitconfig.name) + + # TODO: add more test cases. + + class KUnitJsonTest(unittest.TestCase): def _json_for(self, log_file): - with(open(get_absolute_path(log_file))) as file: + with open(test_data_path(log_file)) as file: test_result = kunit_parser.parse_run_tests(file) json_obj = kunit_json.get_json_result( test_result=test_result, @@ -263,22 +267,19 @@ class KUnitJsonTest(unittest.TestCase): return json.loads(json_obj) def test_failed_test_json(self): - result = self._json_for( - 'test_data/test_is_test_passed-failure.log') + result = self._json_for('test_is_test_passed-failure.log') self.assertEqual( {'name': 'example_simple_test', 'status': 'FAIL'}, result["sub_groups"][1]["test_cases"][0]) def test_crashed_test_json(self): - result = self._json_for( - 'test_data/test_is_test_passed-crash.log') + result = self._json_for('test_is_test_passed-crash.log') self.assertEqual( {'name': 'example_simple_test', 'status': 'ERROR'}, result["sub_groups"][1]["test_cases"][0]) def test_no_tests_json(self): - result = self._json_for( - 'test_data/test_is_test_passed-no_tests_run.log') + result = self._json_for('test_is_test_passed-no_tests_run.log') self.assertEqual(0, len(result['sub_groups'])) class StrContains(str): @@ -287,106 +288,104 @@ class StrContains(str): class KUnitMainTest(unittest.TestCase): def setUp(self): - path = get_absolute_path('test_data/test_is_test_passed-all_passed.log') - file = open(path) - all_passed_log = file.readlines() - self.print_patch = mock.patch('builtins.print') - self.print_mock = self.print_patch.start() + path = test_data_path('test_is_test_passed-all_passed.log') + with open(path) as file: + all_passed_log = file.readlines() + + self.print_mock = mock.patch('builtins.print').start() + self.addCleanup(mock.patch.stopall) + self.linux_source_mock = mock.Mock() self.linux_source_mock.build_reconfig = mock.Mock(return_value=True) self.linux_source_mock.build_um_kernel = mock.Mock(return_value=True) self.linux_source_mock.run_kernel = mock.Mock(return_value=all_passed_log) - def tearDown(self): - self.print_patch.stop() - pass - def test_config_passes_args_pass(self): kunit.main(['config', '--build_dir=.kunit'], self.linux_source_mock) - assert self.linux_source_mock.build_reconfig.call_count == 1 - assert self.linux_source_mock.run_kernel.call_count == 0 + self.assertEqual(self.linux_source_mock.build_reconfig.call_count, 1) + self.assertEqual(self.linux_source_mock.run_kernel.call_count, 0) def test_build_passes_args_pass(self): kunit.main(['build'], self.linux_source_mock) - assert self.linux_source_mock.build_reconfig.call_count == 0 + self.assertEqual(self.linux_source_mock.build_reconfig.call_count, 0) self.linux_source_mock.build_um_kernel.assert_called_once_with(False, 8, '.kunit', None) - assert self.linux_source_mock.run_kernel.call_count == 0 + self.assertEqual(self.linux_source_mock.run_kernel.call_count, 0) def test_exec_passes_args_pass(self): kunit.main(['exec'], self.linux_source_mock) - assert self.linux_source_mock.build_reconfig.call_count == 0 - assert self.linux_source_mock.run_kernel.call_count == 1 - self.linux_source_mock.run_kernel.assert_called_once_with(build_dir='.kunit', timeout=300) + self.assertEqual(self.linux_source_mock.build_reconfig.call_count, 0) + self.assertEqual(self.linux_source_mock.run_kernel.call_count, 1) + self.linux_source_mock.run_kernel.assert_called_once_with( + build_dir='.kunit', filter_glob='', timeout=300) self.print_mock.assert_any_call(StrContains('Testing complete.')) def test_run_passes_args_pass(self): kunit.main(['run'], self.linux_source_mock) - assert self.linux_source_mock.build_reconfig.call_count == 1 - assert self.linux_source_mock.run_kernel.call_count == 1 + self.assertEqual(self.linux_source_mock.build_reconfig.call_count, 1) + self.assertEqual(self.linux_source_mock.run_kernel.call_count, 1) self.linux_source_mock.run_kernel.assert_called_once_with( - build_dir='.kunit', timeout=300) + build_dir='.kunit', filter_glob='', timeout=300) self.print_mock.assert_any_call(StrContains('Testing complete.')) def test_exec_passes_args_fail(self): self.linux_source_mock.run_kernel = mock.Mock(return_value=[]) with self.assertRaises(SystemExit) as e: kunit.main(['exec'], self.linux_source_mock) - assert type(e.exception) == SystemExit - assert e.exception.code == 1 + self.assertEqual(e.exception.code, 1) def test_run_passes_args_fail(self): self.linux_source_mock.run_kernel = mock.Mock(return_value=[]) with self.assertRaises(SystemExit) as e: kunit.main(['run'], self.linux_source_mock) - assert type(e.exception) == SystemExit - assert e.exception.code == 1 - assert self.linux_source_mock.build_reconfig.call_count == 1 - assert self.linux_source_mock.run_kernel.call_count == 1 + self.assertEqual(e.exception.code, 1) + self.assertEqual(self.linux_source_mock.build_reconfig.call_count, 1) + self.assertEqual(self.linux_source_mock.run_kernel.call_count, 1) self.print_mock.assert_any_call(StrContains(' 0 tests run')) def test_exec_raw_output(self): self.linux_source_mock.run_kernel = mock.Mock(return_value=[]) kunit.main(['exec', '--raw_output'], self.linux_source_mock) - assert self.linux_source_mock.run_kernel.call_count == 1 - for kall in self.print_mock.call_args_list: - assert kall != mock.call(StrContains('Testing complete.')) - assert kall != mock.call(StrContains(' 0 tests run')) + self.assertEqual(self.linux_source_mock.run_kernel.call_count, 1) + for call in self.print_mock.call_args_list: + self.assertNotEqual(call, mock.call(StrContains('Testing complete.'))) + self.assertNotEqual(call, mock.call(StrContains(' 0 tests run'))) def test_run_raw_output(self): self.linux_source_mock.run_kernel = mock.Mock(return_value=[]) kunit.main(['run', '--raw_output'], self.linux_source_mock) - assert self.linux_source_mock.build_reconfig.call_count == 1 - assert self.linux_source_mock.run_kernel.call_count == 1 - for kall in self.print_mock.call_args_list: - assert kall != mock.call(StrContains('Testing complete.')) - assert kall != mock.call(StrContains(' 0 tests run')) + self.assertEqual(self.linux_source_mock.build_reconfig.call_count, 1) + self.assertEqual(self.linux_source_mock.run_kernel.call_count, 1) + for call in self.print_mock.call_args_list: + self.assertNotEqual(call, mock.call(StrContains('Testing complete.'))) + self.assertNotEqual(call, mock.call(StrContains(' 0 tests run'))) def test_exec_timeout(self): timeout = 3453 kunit.main(['exec', '--timeout', str(timeout)], self.linux_source_mock) - self.linux_source_mock.run_kernel.assert_called_once_with(build_dir='.kunit', timeout=timeout) + self.linux_source_mock.run_kernel.assert_called_once_with( + build_dir='.kunit', filter_glob='', timeout=timeout) self.print_mock.assert_any_call(StrContains('Testing complete.')) def test_run_timeout(self): timeout = 3453 kunit.main(['run', '--timeout', str(timeout)], self.linux_source_mock) - assert self.linux_source_mock.build_reconfig.call_count == 1 + self.assertEqual(self.linux_source_mock.build_reconfig.call_count, 1) self.linux_source_mock.run_kernel.assert_called_once_with( - build_dir='.kunit', timeout=timeout) + build_dir='.kunit', filter_glob='', timeout=timeout) self.print_mock.assert_any_call(StrContains('Testing complete.')) def test_run_builddir(self): build_dir = '.kunit' kunit.main(['run', '--build_dir=.kunit'], self.linux_source_mock) - assert self.linux_source_mock.build_reconfig.call_count == 1 + self.assertEqual(self.linux_source_mock.build_reconfig.call_count, 1) self.linux_source_mock.run_kernel.assert_called_once_with( - build_dir=build_dir, timeout=300) + build_dir=build_dir, filter_glob='', timeout=300) self.print_mock.assert_any_call(StrContains('Testing complete.')) def test_config_builddir(self): build_dir = '.kunit' kunit.main(['config', '--build_dir', build_dir], self.linux_source_mock) - assert self.linux_source_mock.build_reconfig.call_count == 1 + self.assertEqual(self.linux_source_mock.build_reconfig.call_count, 1) def test_build_builddir(self): build_dir = '.kunit' @@ -396,8 +395,23 @@ class KUnitMainTest(unittest.TestCase): def test_exec_builddir(self): build_dir = '.kunit' kunit.main(['exec', '--build_dir', build_dir], self.linux_source_mock) - self.linux_source_mock.run_kernel.assert_called_once_with(build_dir=build_dir, timeout=300) + self.linux_source_mock.run_kernel.assert_called_once_with( + build_dir=build_dir, filter_glob='', timeout=300) self.print_mock.assert_any_call(StrContains('Testing complete.')) + @mock.patch.object(kunit_kernel, 'LinuxSourceTree') + def test_run_kunitconfig(self, mock_linux_init): + mock_linux_init.return_value = self.linux_source_mock + kunit.main(['run', '--kunitconfig=mykunitconfig']) + # Just verify that we parsed and initialized it correctly here. + mock_linux_init.assert_called_once_with('.kunit', kunitconfig_path='mykunitconfig') + + @mock.patch.object(kunit_kernel, 'LinuxSourceTree') + def test_config_kunitconfig(self, mock_linux_init): + mock_linux_init.return_value = self.linux_source_mock + kunit.main(['config', '--kunitconfig=mykunitconfig']) + # Just verify that we parsed and initialized it correctly here. + mock_linux_init.assert_called_once_with('.kunit', kunitconfig_path='mykunitconfig') + if __name__ == '__main__': unittest.main() diff --git a/tools/testing/scatterlist/main.c b/tools/testing/scatterlist/main.c index 71c960dcd8a4..652254754b4c 100644 --- a/tools/testing/scatterlist/main.c +++ b/tools/testing/scatterlist/main.c @@ -55,7 +55,6 @@ int main(void) struct test *test, tests[] = { { -EINVAL, 1, pfn(0), NULL, PAGE_SIZE, 0, 1 }, { 0, 1, pfn(0), NULL, PAGE_SIZE, PAGE_SIZE + 1, 1 }, - { 0, 1, pfn(0), NULL, PAGE_SIZE, sgmax + 1, 1 }, { 0, 1, pfn(0), NULL, PAGE_SIZE, sgmax, 1 }, { 0, 1, pfn(0), NULL, 1, sgmax, 1 }, { 0, 2, pfn(0, 1), NULL, 2 * PAGE_SIZE, sgmax, 1 }, diff --git a/tools/testing/selftests/breakpoints/breakpoint_test_arm64.c b/tools/testing/selftests/breakpoints/breakpoint_test_arm64.c index ad41ea69001b..e7041816085a 100644 --- a/tools/testing/selftests/breakpoints/breakpoint_test_arm64.c +++ b/tools/testing/selftests/breakpoints/breakpoint_test_arm64.c @@ -145,7 +145,7 @@ static bool run_test(int wr_size, int wp_size, int wr, int wp) if (ptrace(PTRACE_CONT, pid, NULL, NULL) < 0) { ksft_print_msg( - "ptrace(PTRACE_SINGLESTEP) failed: %s\n", + "ptrace(PTRACE_CONT) failed: %s\n", strerror(errno)); return false; } @@ -159,7 +159,7 @@ static bool run_test(int wr_size, int wp_size, int wr, int wp) } alarm(0); if (WIFEXITED(status)) { - ksft_print_msg("child did not single-step\n"); + ksft_print_msg("child exited prematurely\n"); return false; } if (!WIFSTOPPED(status)) { diff --git a/tools/testing/selftests/dmabuf-heaps/Makefile b/tools/testing/selftests/dmabuf-heaps/Makefile index 607c2acd2082..604b43ece15f 100644 --- a/tools/testing/selftests/dmabuf-heaps/Makefile +++ b/tools/testing/selftests/dmabuf-heaps/Makefile @@ -1,5 +1,5 @@ # SPDX-License-Identifier: GPL-2.0 -CFLAGS += -static -O3 -Wl,-no-as-needed -Wall -I../../../../usr/include +CFLAGS += -static -O3 -Wl,-no-as-needed -Wall TEST_GEN_PROGS = dmabuf-heap diff --git a/tools/testing/selftests/dmabuf-heaps/dmabuf-heap.c b/tools/testing/selftests/dmabuf-heaps/dmabuf-heap.c index 909da9cdda97..29af27acd40e 100644 --- a/tools/testing/selftests/dmabuf-heaps/dmabuf-heap.c +++ b/tools/testing/selftests/dmabuf-heaps/dmabuf-heap.c @@ -130,16 +130,13 @@ static int dmabuf_heap_alloc(int fd, size_t len, unsigned int flags, dmabuf_fd); } -static void dmabuf_sync(int fd, int start_stop) +static int dmabuf_sync(int fd, int start_stop) { struct dma_buf_sync sync = { .flags = start_stop | DMA_BUF_SYNC_RW, }; - int ret; - ret = ioctl(fd, DMA_BUF_IOCTL_SYNC, &sync); - if (ret) - printf("sync failed %d\n", errno); + return ioctl(fd, DMA_BUF_IOCTL_SYNC, &sync); } #define ONE_MEG (1024 * 1024) @@ -151,16 +148,14 @@ static int test_alloc_and_import(char *heap_name) void *p = NULL; int ret; - printf("Testing heap: %s\n", heap_name); - heap_fd = dmabuf_heap_open(heap_name); if (heap_fd < 0) return -1; - printf("Allocating 1 MEG\n"); + printf(" Testing allocation and importing: "); ret = dmabuf_heap_alloc(heap_fd, ONE_MEG, 0, &dmabuf_fd); if (ret) { - printf("Allocation Failed!\n"); + printf("FAIL (Allocation Failed!)\n"); ret = -1; goto out; } @@ -172,11 +167,10 @@ static int test_alloc_and_import(char *heap_name) dmabuf_fd, 0); if (p == MAP_FAILED) { - printf("mmap() failed: %m\n"); + printf("FAIL (mmap() failed)\n"); ret = -1; goto out; } - printf("mmap passed\n"); dmabuf_sync(dmabuf_fd, DMA_BUF_SYNC_START); memset(p, 1, ONE_MEG / 2); @@ -186,25 +180,31 @@ static int test_alloc_and_import(char *heap_name) importer_fd = open_vgem(); if (importer_fd < 0) { ret = importer_fd; - printf("Failed to open vgem\n"); - goto out; + printf("(Could not open vgem - skipping): "); + } else { + ret = import_vgem_fd(importer_fd, dmabuf_fd, &handle); + if (ret < 0) { + printf("FAIL (Failed to import buffer)\n"); + goto out; + } } - ret = import_vgem_fd(importer_fd, dmabuf_fd, &handle); + ret = dmabuf_sync(dmabuf_fd, DMA_BUF_SYNC_START); if (ret < 0) { - printf("Failed to import buffer\n"); + printf("FAIL (DMA_BUF_SYNC_START failed!)\n"); goto out; } - printf("import passed\n"); - dmabuf_sync(dmabuf_fd, DMA_BUF_SYNC_START); memset(p, 0xff, ONE_MEG); - dmabuf_sync(dmabuf_fd, DMA_BUF_SYNC_END); - printf("syncs passed\n"); + ret = dmabuf_sync(dmabuf_fd, DMA_BUF_SYNC_END); + if (ret < 0) { + printf("FAIL (DMA_BUF_SYNC_END failed!)\n"); + goto out; + } close_handle(importer_fd, handle); ret = 0; - + printf(" OK\n"); out: if (p) munmap(p, ONE_MEG); @@ -218,6 +218,84 @@ static int test_alloc_and_import(char *heap_name) return ret; } +static int test_alloc_zeroed(char *heap_name, size_t size) +{ + int heap_fd = -1, dmabuf_fd[32]; + int i, j, ret; + void *p = NULL; + char *c; + + printf(" Testing alloced %ldk buffers are zeroed: ", size / 1024); + heap_fd = dmabuf_heap_open(heap_name); + if (heap_fd < 0) + return -1; + + /* Allocate and fill a bunch of buffers */ + for (i = 0; i < 32; i++) { + ret = dmabuf_heap_alloc(heap_fd, size, 0, &dmabuf_fd[i]); + if (ret < 0) { + printf("FAIL (Allocation (%i) failed)\n", i); + goto out; + } + /* mmap and fill with simple pattern */ + p = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED, dmabuf_fd[i], 0); + if (p == MAP_FAILED) { + printf("FAIL (mmap() failed!)\n"); + ret = -1; + goto out; + } + dmabuf_sync(dmabuf_fd[i], DMA_BUF_SYNC_START); + memset(p, 0xff, size); + dmabuf_sync(dmabuf_fd[i], DMA_BUF_SYNC_END); + munmap(p, size); + } + /* close them all */ + for (i = 0; i < 32; i++) + close(dmabuf_fd[i]); + + /* Allocate and validate all buffers are zeroed */ + for (i = 0; i < 32; i++) { + ret = dmabuf_heap_alloc(heap_fd, size, 0, &dmabuf_fd[i]); + if (ret < 0) { + printf("FAIL (Allocation (%i) failed)\n", i); + goto out; + } + + /* mmap and validate everything is zero */ + p = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED, dmabuf_fd[i], 0); + if (p == MAP_FAILED) { + printf("FAIL (mmap() failed!)\n"); + ret = -1; + goto out; + } + dmabuf_sync(dmabuf_fd[i], DMA_BUF_SYNC_START); + c = (char *)p; + for (j = 0; j < size; j++) { + if (c[j] != 0) { + printf("FAIL (Allocated buffer not zeroed @ %i)\n", j); + break; + } + } + dmabuf_sync(dmabuf_fd[i], DMA_BUF_SYNC_END); + munmap(p, size); + } + /* close them all */ + for (i = 0; i < 32; i++) + close(dmabuf_fd[i]); + + close(heap_fd); + printf("OK\n"); + return 0; + +out: + while (i > 0) { + close(dmabuf_fd[i]); + i--; + } + close(heap_fd); + return ret; +} + /* Test the ioctl version compatibility w/ a smaller structure then expected */ static int dmabuf_heap_alloc_older(int fd, size_t len, unsigned int flags, int *dmabuf_fd) @@ -292,23 +370,24 @@ static int test_alloc_compat(char *heap_name) if (heap_fd < 0) return -1; - printf("Testing (theoretical)older alloc compat\n"); + printf(" Testing (theoretical)older alloc compat: "); ret = dmabuf_heap_alloc_older(heap_fd, ONE_MEG, 0, &dmabuf_fd); if (ret) { - printf("Older compat allocation failed!\n"); + printf("FAIL (Older compat allocation failed!)\n"); ret = -1; goto out; } close(dmabuf_fd); + printf("OK\n"); - printf("Testing (theoretical)newer alloc compat\n"); + printf(" Testing (theoretical)newer alloc compat: "); ret = dmabuf_heap_alloc_newer(heap_fd, ONE_MEG, 0, &dmabuf_fd); if (ret) { - printf("Newer compat allocation failed!\n"); + printf("FAIL (Newer compat allocation failed!)\n"); ret = -1; goto out; } - printf("Ioctl compatibility tests passed\n"); + printf("OK\n"); out: if (dmabuf_fd >= 0) close(dmabuf_fd); @@ -327,17 +406,17 @@ static int test_alloc_errors(char *heap_name) if (heap_fd < 0) return -1; - printf("Testing expected error cases\n"); + printf(" Testing expected error cases: "); ret = dmabuf_heap_alloc(0, ONE_MEG, 0x111111, &dmabuf_fd); if (!ret) { - printf("Did not see expected error (invalid fd)!\n"); + printf("FAIL (Did not see expected error (invalid fd)!)\n"); ret = -1; goto out; } ret = dmabuf_heap_alloc(heap_fd, ONE_MEG, 0x111111, &dmabuf_fd); if (!ret) { - printf("Did not see expected error (invalid heap flags)!\n"); + printf("FAIL (Did not see expected error (invalid heap flags)!)\n"); ret = -1; goto out; } @@ -345,12 +424,12 @@ static int test_alloc_errors(char *heap_name) ret = dmabuf_heap_alloc_fdflags(heap_fd, ONE_MEG, ~(O_RDWR | O_CLOEXEC), 0, &dmabuf_fd); if (!ret) { - printf("Did not see expected error (invalid fd flags)!\n"); + printf("FAIL (Did not see expected error (invalid fd flags)!)\n"); ret = -1; goto out; } - printf("Expected error checking passed\n"); + printf("OK\n"); ret = 0; out: if (dmabuf_fd >= 0) @@ -379,10 +458,20 @@ int main(void) if (!strncmp(dir->d_name, "..", 3)) continue; + printf("Testing heap: %s\n", dir->d_name); + printf("=======================================\n"); ret = test_alloc_and_import(dir->d_name); if (ret) break; + ret = test_alloc_zeroed(dir->d_name, 4 * 1024); + if (ret) + break; + + ret = test_alloc_zeroed(dir->d_name, ONE_MEG); + if (ret) + break; + ret = test_alloc_compat(dir->d_name); if (ret) break; diff --git a/tools/testing/selftests/ipc/msgque.c b/tools/testing/selftests/ipc/msgque.c index 5ec4d9e18806..656c43c24044 100644 --- a/tools/testing/selftests/ipc/msgque.c +++ b/tools/testing/selftests/ipc/msgque.c @@ -69,7 +69,7 @@ int restore_queue(struct msgque_data *msgque) printf("msgsnd failed (%m)\n"); ret = -errno; goto destroy; - }; + } } return 0; @@ -180,7 +180,7 @@ int fill_msgque(struct msgque_data *msgque) IPC_NOWAIT) != 0) { printf("First message send failed (%m)\n"); return -errno; - }; + } msgbuf.mtype = ANOTHER_MSG_TYPE; memcpy(msgbuf.mtext, ANOTHER_TEST_STRING, sizeof(ANOTHER_TEST_STRING)); @@ -188,7 +188,7 @@ int fill_msgque(struct msgque_data *msgque) IPC_NOWAIT) != 0) { printf("Second message send failed (%m)\n"); return -errno; - }; + } return 0; } diff --git a/tools/testing/selftests/kselftest_harness.h b/tools/testing/selftests/kselftest_harness.h index edce85420d19..ae0f0f33b2a6 100644 --- a/tools/testing/selftests/kselftest_harness.h +++ b/tools/testing/selftests/kselftest_harness.h @@ -79,7 +79,7 @@ #endif /** - * TH_LOG(fmt, ...) + * TH_LOG() * * @fmt: format string * @...: optional arguments @@ -113,12 +113,16 @@ __FILE__, __LINE__, _metadata->name, ##__VA_ARGS__) /** - * SKIP(statement, fmt, ...) + * SKIP() * * @statement: statement to run after reporting SKIP * @fmt: format string * @...: optional arguments * + * .. code-block:: c + * + * SKIP(statement, fmt, ...); + * * This forces a "pass" after reporting why something is being skipped * and runs "statement", which is usually "return" or "goto skip". */ @@ -136,7 +140,7 @@ } while (0) /** - * TEST(test_name) - Defines the test function and creates the registration + * TEST() - Defines the test function and creates the registration * stub * * @test_name: test name @@ -155,7 +159,7 @@ #define TEST(test_name) __TEST_IMPL(test_name, -1) /** - * TEST_SIGNAL(test_name, signal) + * TEST_SIGNAL() * * @test_name: test name * @signal: signal number @@ -195,7 +199,7 @@ struct __test_metadata __attribute__((unused)) *_metadata) /** - * FIXTURE_DATA(datatype_name) - Wraps the struct name so we have one less + * FIXTURE_DATA() - Wraps the struct name so we have one less * argument to pass around * * @datatype_name: datatype name @@ -212,7 +216,7 @@ #define FIXTURE_DATA(datatype_name) struct _test_data_##datatype_name /** - * FIXTURE(fixture_name) - Called once per fixture to setup the data and + * FIXTURE() - Called once per fixture to setup the data and * register * * @fixture_name: fixture name @@ -239,7 +243,7 @@ FIXTURE_DATA(fixture_name) /** - * FIXTURE_SETUP(fixture_name) - Prepares the setup function for the fixture. + * FIXTURE_SETUP() - Prepares the setup function for the fixture. * *_metadata* is included so that EXPECT_* and ASSERT_* work correctly. * * @fixture_name: fixture name @@ -265,7 +269,7 @@ __attribute__((unused)) *variant) /** - * FIXTURE_TEARDOWN(fixture_name) + * FIXTURE_TEARDOWN() * *_metadata* is included so that EXPECT_* and ASSERT_* work correctly. * * @fixture_name: fixture name @@ -286,7 +290,7 @@ FIXTURE_DATA(fixture_name) __attribute__((unused)) *self) /** - * FIXTURE_VARIANT(fixture_name) - Optionally called once per fixture + * FIXTURE_VARIANT() - Optionally called once per fixture * to declare fixture variant * * @fixture_name: fixture name @@ -305,7 +309,7 @@ #define FIXTURE_VARIANT(fixture_name) struct _fixture_variant_##fixture_name /** - * FIXTURE_VARIANT_ADD(fixture_name, variant_name) - Called once per fixture + * FIXTURE_VARIANT_ADD() - Called once per fixture * variant to setup and register the data * * @fixture_name: fixture name @@ -339,7 +343,7 @@ _##fixture_name##_##variant_name##_variant = /** - * TEST_F(fixture_name, test_name) - Emits test registration and helpers for + * TEST_F() - Emits test registration and helpers for * fixture-based test cases * * @fixture_name: fixture name diff --git a/tools/testing/selftests/kselftest_module.h b/tools/testing/selftests/kselftest_module.h index e8eafaf0941a..e2ea41de3f35 100644 --- a/tools/testing/selftests/kselftest_module.h +++ b/tools/testing/selftests/kselftest_module.h @@ -11,7 +11,8 @@ #define KSTM_MODULE_GLOBALS() \ static unsigned int total_tests __initdata; \ -static unsigned int failed_tests __initdata +static unsigned int failed_tests __initdata; \ +static unsigned int skipped_tests __initdata #define KSTM_CHECK_ZERO(x) do { \ total_tests++; \ @@ -21,11 +22,16 @@ static unsigned int failed_tests __initdata } \ } while (0) -static inline int kstm_report(unsigned int total_tests, unsigned int failed_tests) +static inline int kstm_report(unsigned int total_tests, unsigned int failed_tests, + unsigned int skipped_tests) { - if (failed_tests == 0) - pr_info("all %u tests passed\n", total_tests); - else + if (failed_tests == 0) { + if (skipped_tests) { + pr_info("skipped %u tests\n", skipped_tests); + pr_info("remaining %u tests passed\n", total_tests); + } else + pr_info("all %u tests passed\n", total_tests); + } else pr_warn("failed %u out of %u tests\n", failed_tests, total_tests); return failed_tests ? -EINVAL : 0; @@ -36,7 +42,7 @@ static int __init __module##_init(void) \ { \ pr_info("loaded.\n"); \ selftest(); \ - return kstm_report(total_tests, failed_tests); \ + return kstm_report(total_tests, failed_tests, skipped_tests); \ } \ static void __exit __module##_exit(void) \ { \ diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c index 26c72f2b61b1..9338df6f4ca8 100644 --- a/tools/testing/selftests/seccomp/seccomp_bpf.c +++ b/tools/testing/selftests/seccomp/seccomp_bpf.c @@ -4019,18 +4019,14 @@ TEST(user_notification_addfd) /* Verify we can set an arbitrary remote fd */ fd = ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD, &addfd); - /* - * The child has fds 0(stdin), 1(stdout), 2(stderr), 3(memfd), - * 4(listener), so the newly allocated fd should be 5. - */ - EXPECT_EQ(fd, 5); + EXPECT_GE(fd, 0); EXPECT_EQ(filecmp(getpid(), pid, memfd, fd), 0); /* Verify we can set an arbitrary remote fd with large size */ memset(&big, 0x0, sizeof(big)); big.addfd = addfd; fd = ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD_BIG, &big); - EXPECT_EQ(fd, 6); + EXPECT_GE(fd, 0); /* Verify we can set a specific remote fd */ addfd.newfd = 42; diff --git a/tools/testing/selftests/timens/.gitignore b/tools/testing/selftests/timens/.gitignore index 2e43851b47c1..fe1eb8271b35 100644 --- a/tools/testing/selftests/timens/.gitignore +++ b/tools/testing/selftests/timens/.gitignore @@ -1,6 +1,7 @@ # SPDX-License-Identifier: GPL-2.0-only clock_nanosleep exec +futex gettime_perf gettime_perf_cold procfs diff --git a/tools/testing/selftests/vDSO/vdso_config.h b/tools/testing/selftests/vDSO/vdso_config.h index 6a6fe8d4ff55..6188b16827d1 100644 --- a/tools/testing/selftests/vDSO/vdso_config.h +++ b/tools/testing/selftests/vDSO/vdso_config.h @@ -47,10 +47,12 @@ #elif defined(__x86_64__) #define VDSO_VERSION 0 #define VDSO_NAMES 1 -#elif defined(__riscv__) +#elif defined(__riscv__) || defined(__riscv) #define VDSO_VERSION 5 #define VDSO_NAMES 1 +#if __riscv_xlen == 32 #define VDSO_32BIT 1 +#endif #else /* nds32 */ #define VDSO_VERSION 4 #define VDSO_NAMES 1 diff --git a/tools/testing/selftests/x86/ldt_gdt.c b/tools/testing/selftests/x86/ldt_gdt.c index 1aef72df20a1..3a29346e1452 100644 --- a/tools/testing/selftests/x86/ldt_gdt.c +++ b/tools/testing/selftests/x86/ldt_gdt.c @@ -607,7 +607,7 @@ static void do_multicpu_tests(void) failures++; asm volatile ("mov %0, %%ss" : : "rm" (orig_ss)); - }; + } ftx = 100; /* Kill the thread. */ syscall(SYS_futex, &ftx, FUTEX_WAKE, 0, NULL, NULL, 0);