From e9eadc28271940266ace3f748a1c6278bd886ea8 Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Fri, 10 Mar 2023 08:47:18 -0600 Subject: [PATCH 01/10] opp: Use of_property_present() for testing DT property presence It is preferred to use typed property access functions (i.e. of_property_read_ functions) rather than low-level of_get_property/of_find_property functions for reading properties. As part of this, convert of_get_property/of_find_property calls to the recently added of_property_present() helper when we just want to test for presence of a property and nothing more. Signed-off-by: Rob Herring Signed-off-by: Viresh Kumar --- drivers/opp/of.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/opp/of.c b/drivers/opp/of.c index e55c6095adf0..bed2b651deb0 100644 --- a/drivers/opp/of.c +++ b/drivers/opp/of.c @@ -224,7 +224,7 @@ void _of_init_opp_table(struct opp_table *opp_table, struct device *dev, of_property_read_u32(np, "voltage-tolerance", &opp_table->voltage_tolerance_v1); - if (of_find_property(np, "#power-domain-cells", NULL)) + if (of_property_present(np, "#power-domain-cells")) opp_table->is_genpd = true; /* Get OPP table node */ @@ -536,7 +536,7 @@ static bool _opp_is_supported(struct device *dev, struct opp_table *opp_table, * an OPP then the OPP should not be enabled as there is * no way to see if the hardware supports it. */ - if (of_find_property(np, "opp-supported-hw", NULL)) + if (of_property_present(np, "opp-supported-hw")) return false; else return true; From 0417552730d0b1c30268fd32546914290b7c7ca0 Mon Sep 17 00:00:00 2001 From: Todd Brandt Date: Tue, 14 Mar 2023 10:39:36 -0700 Subject: [PATCH 02/10] pm-graph: Update to v5.11 install_latest_from_github.sh: - Added a new script which allows users to install the latest pm-graph from the upstream github repo. This is useful if the kernel source version has issues that have already been fixed in github. sleepgraph.py: - Updated all the dmesg suspend/resume PM print formats to be able to process recent timelines using dmesg only. - Added ethtool output to the log for the system's ethernet device id the ethtool exists. This helps in debugging network issues. - Made the tool more robustly handle events where mangled dmesg or ftrace outputs do not include all the requisite data. The tool fails gracefully instead of creating a garbled timeline. Signed-off-by: Todd Brandt [ rjw: Changelog edits ] Signed-off-by: Rafael J. Wysocki --- tools/power/pm-graph/README | 2 +- .../pm-graph/install_latest_from_github.sh | 38 +++++++++++++ tools/power/pm-graph/sleepgraph.py | 53 +++++++++++++------ 3 files changed, 75 insertions(+), 18 deletions(-) create mode 100755 tools/power/pm-graph/install_latest_from_github.sh diff --git a/tools/power/pm-graph/README b/tools/power/pm-graph/README index 3213dbe63b74..047ce1d76467 100644 --- a/tools/power/pm-graph/README +++ b/tools/power/pm-graph/README @@ -6,7 +6,7 @@ |_| |___/ |_| pm-graph: suspend/resume/boot timing analysis tools - Version: 5.10 + Version: 5.11 Author: Todd Brandt Home Page: https://www.intel.com/content/www/us/en/developer/topic-technology/open/pm-graph/overview.html diff --git a/tools/power/pm-graph/install_latest_from_github.sh b/tools/power/pm-graph/install_latest_from_github.sh new file mode 100755 index 000000000000..eaa332399d36 --- /dev/null +++ b/tools/power/pm-graph/install_latest_from_github.sh @@ -0,0 +1,38 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0 +# +# Script which clones and installs the latest pm-graph +# from http://github.com/intel/pm-graph.git + +OUT=`mktemp -d 2>/dev/null` +if [ -z "$OUT" -o ! -e $OUT ]; then + echo "ERROR: mktemp failed to create folder" + exit +fi + +cleanup() { + if [ -e "$OUT" ]; then + cd $OUT + rm -rf pm-graph + cd /tmp + rmdir $OUT + fi +} + +git clone http://github.com/intel/pm-graph.git $OUT/pm-graph +if [ ! -e "$OUT/pm-graph/sleepgraph.py" ]; then + echo "ERROR: pm-graph github repo failed to clone" + cleanup + exit +fi + +cd $OUT/pm-graph +echo "INSTALLING PM-GRAPH" +sudo make install +if [ $? -eq 0 ]; then + echo "INSTALL SUCCESS" + sleepgraph -v +else + echo "INSTALL FAILED" +fi +cleanup diff --git a/tools/power/pm-graph/sleepgraph.py b/tools/power/pm-graph/sleepgraph.py index bf4ac24a1c7a..ab703c9227d5 100755 --- a/tools/power/pm-graph/sleepgraph.py +++ b/tools/power/pm-graph/sleepgraph.py @@ -86,7 +86,7 @@ def ascii(text): # store system values and test parameters class SystemValues: title = 'SleepGraph' - version = '5.10' + version = '5.11' ansi = False rs = 0 display = '' @@ -300,6 +300,7 @@ class SystemValues: [0, 'acpidevices', 'sh', '-c', 'ls -l /sys/bus/acpi/devices/*/physical_node'], [0, 's0ix_require', 'cat', '/sys/kernel/debug/pmc_core/substate_requirements'], [0, 's0ix_debug', 'cat', '/sys/kernel/debug/pmc_core/slp_s0_debug_status'], + [0, 'ethtool', 'ethtool', '{ethdev}'], [1, 's0ix_residency', 'cat', '/sys/kernel/debug/pmc_core/slp_s0_residency_usec'], [1, 'interrupts', 'cat', '/proc/interrupts'], [1, 'wakeups', 'cat', '/sys/kernel/debug/wakeup_sources'], @@ -1078,18 +1079,35 @@ class SystemValues: else: out[data[0].strip()] = data[1] return out + def cmdinfovar(self, arg): + if arg == 'ethdev': + try: + cmd = [self.getExec('ip'), '-4', '-o', '-br', 'addr'] + fp = Popen(cmd, stdout=PIPE, stderr=PIPE).stdout + info = ascii(fp.read()).strip() + fp.close() + except: + return 'iptoolcrash' + for line in info.split('\n'): + if line[0] == 'e' and 'UP' in line: + return line.split()[0] + return 'nodevicefound' + return 'unknown' def cmdinfo(self, begin, debug=False): out = [] if begin: self.cmd1 = dict() for cargs in self.infocmds: - delta, name = cargs[0], cargs[1] - cmdline, cmdpath = ' '.join(cargs[2:]), self.getExec(cargs[2]) + delta, name, args = cargs[0], cargs[1], cargs[2:] + for i in range(len(args)): + if args[i][0] == '{' and args[i][-1] == '}': + args[i] = self.cmdinfovar(args[i][1:-1]) + cmdline, cmdpath = ' '.join(args[0:]), self.getExec(args[0]) if not cmdpath or (begin and not delta): continue self.dlog('[%s]' % cmdline) try: - fp = Popen([cmdpath]+cargs[3:], stdout=PIPE, stderr=PIPE).stdout + fp = Popen([cmdpath]+args[1:], stdout=PIPE, stderr=PIPE).stdout info = ascii(fp.read()).strip() fp.close() except: @@ -1452,6 +1470,7 @@ class Data: errlist = { 'HWERROR' : r'.*\[ *Hardware Error *\].*', 'FWBUG' : r'.*\[ *Firmware Bug *\].*', + 'TASKFAIL': r'.*Freezing .*after *.*', 'BUG' : r'(?i).*\bBUG\b.*', 'ERROR' : r'(?i).*\bERROR\b.*', 'WARNING' : r'(?i).*\bWARNING\b.*', @@ -1462,7 +1481,6 @@ class Data: 'TIMEOUT' : r'(?i).*\bTIMEOUT\b.*', 'ABORT' : r'(?i).*\bABORT\b.*', 'IRQ' : r'.*\bgenirq: .*', - 'TASKFAIL': r'.*Freezing .*after *.*', 'ACPI' : r'.*\bACPI *(?P[A-Za-z]*) *Error[: ].*', 'DISKFULL': r'.*\bNo space left on device.*', 'USBERR' : r'.*usb .*device .*, error [0-9-]*', @@ -1602,7 +1620,7 @@ class Data: pend = self.dmesg[phase]['end'] if start <= pend: return phase - return 'resume_complete' + return 'resume_complete' if 'resume_complete' in self.dmesg else '' def sourceDevice(self, phaselist, start, end, pid, type): tgtdev = '' for phase in phaselist: @@ -1645,6 +1663,8 @@ class Data: else: threadname = '%s-%d' % (proc, pid) tgtphase = self.sourcePhase(start) + if not tgtphase: + return False self.newAction(tgtphase, threadname, pid, '', start, end, '', ' kth', '') return self.addDeviceFunctionCall(displayname, kprobename, proc, pid, start, end, cdata, rdata) # this should not happen @@ -1835,9 +1855,9 @@ class Data: hwr = self.hwend - timedelta(microseconds=rtime) self.tLow.append('%.0f'%((hwr - hws).total_seconds() * 1000)) def getTimeValues(self): - sktime = (self.tSuspended - self.tKernSus) * 1000 - rktime = (self.tKernRes - self.tResumed) * 1000 - return (sktime, rktime) + s = (self.tSuspended - self.tKernSus) * 1000 + r = (self.tKernRes - self.tResumed) * 1000 + return (max(s, 0), max(r, 0)) def setPhase(self, phase, ktime, isbegin, order=-1): if(isbegin): # phase start over current phase @@ -3961,7 +3981,7 @@ def parseKernelLog(data): 'suspend_machine': ['PM: suspend-to-idle', 'PM: noirq suspend of devices complete after.*', 'PM: noirq freeze of devices complete after.*'], - 'resume_machine': ['PM: Timekeeping suspended for.*', + 'resume_machine': ['[PM: ]*Timekeeping suspended for.*', 'ACPI: Low-level resume complete.*', 'ACPI: resume from mwait', 'Suspended for [0-9\.]* seconds'], @@ -3979,14 +3999,14 @@ def parseKernelLog(data): # action table (expected events that occur and show up in dmesg) at = { 'sync_filesystems': { - 'smsg': 'PM: Syncing filesystems.*', - 'emsg': 'PM: Preparing system for mem sleep.*' }, + 'smsg': '.*[Ff]+ilesystems.*', + 'emsg': 'PM: Preparing system for[a-z]* sleep.*' }, 'freeze_user_processes': { - 'smsg': 'Freezing user space processes .*', + 'smsg': 'Freezing user space processes.*', 'emsg': 'Freezing remaining freezable tasks.*' }, 'freeze_tasks': { 'smsg': 'Freezing remaining freezable tasks.*', - 'emsg': 'PM: Entering (?P[a-z,A-Z]*) sleep.*' }, + 'emsg': 'PM: Suspending system.*' }, 'ACPI prepare': { 'smsg': 'ACPI: Preparing to enter system sleep state.*', 'emsg': 'PM: Saving platform NVS memory.*' }, @@ -4120,10 +4140,9 @@ def parseKernelLog(data): for a in sorted(at): if(re.match(at[a]['smsg'], msg)): if(a not in actions): - actions[a] = [] - actions[a].append({'begin': ktime, 'end': ktime}) + actions[a] = [{'begin': ktime, 'end': ktime}] if(re.match(at[a]['emsg'], msg)): - if(a in actions): + if(a in actions and actions[a][-1]['begin'] == actions[a][-1]['end']): actions[a][-1]['end'] = ktime # now look for CPU on/off events if(re.match('Disabling non-boot CPUs .*', msg)): From 34ea427e01ea60d9a9328d2d5a72d43740be25bc Mon Sep 17 00:00:00 2001 From: Xueqin Luo Date: Thu, 16 Mar 2023 09:33:07 +0800 Subject: [PATCH 03/10] PM: tools: sleepgraph: Recognize "CPU killed" messages On the arm64 platform with PSCI, the core log of CPU offline is as follows: [ 100.431501] CPU1: shutdown [ 100.454820] psci: CPU1 killed (polled 20 ms) [ 100.459266] CPU2: shutdown [ 100.482575] psci: CPU2 killed (polled 20 ms) [ 100.486057] CPU3: shutdown [ 100.513974] psci: CPU3 killed (polled 28 ms) [ 100.518068] CPU4: shutdown [ 100.541481] psci: CPU4 killed (polled 24 ms) Prevent sleepgraph from mistakenly treating the "CPU up" message as part of the suspend flow (because it should be regarded as part of the resume flow) by making it recognize the "CPU* killed" messages above. Signed-off-by: Xueqin Luo [ rjw: Subject and changelog edits ] Signed-off-by: Rafael J. Wysocki --- tools/power/pm-graph/sleepgraph.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tools/power/pm-graph/sleepgraph.py b/tools/power/pm-graph/sleepgraph.py index ab703c9227d5..4a356a706785 100755 --- a/tools/power/pm-graph/sleepgraph.py +++ b/tools/power/pm-graph/sleepgraph.py @@ -4151,9 +4151,12 @@ def parseKernelLog(data): elif(re.match('Enabling non-boot CPUs .*', msg)): # start of first cpu resume cpu_start = ktime - elif(re.match('smpboot: CPU (?P[0-9]*) is now offline', msg)): + elif(re.match('smpboot: CPU (?P[0-9]*) is now offline', msg)) \ + or re.match('psci: CPU(?P[0-9]*) killed.*', msg)): # end of a cpu suspend, start of the next m = re.match('smpboot: CPU (?P[0-9]*) is now offline', msg) + if(not m): + m = re.match('psci: CPU(?P[0-9]*) killed.*', msg) cpu = 'CPU'+m.group('cpu') if(cpu not in actions): actions[cpu] = [] From 29b1a92e5e953214388ac1f3656cb3af266ca74f Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Wed, 22 Feb 2023 16:36:37 +0530 Subject: [PATCH 04/10] OPP: Handle all genpd cases together in _set_required_opps() There is no real need of keeping separate code for single genpd case, it can be made to work with a simple change. Signed-off-by: Viresh Kumar Reviewed-by: Ulf Hansson --- drivers/opp/core.c | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/drivers/opp/core.c b/drivers/opp/core.c index e87567dbe99f..6d7016ce8c53 100644 --- a/drivers/opp/core.c +++ b/drivers/opp/core.c @@ -959,7 +959,8 @@ static int _set_required_opps(struct device *dev, struct dev_pm_opp *opp, bool up) { struct opp_table **required_opp_tables = opp_table->required_opp_tables; - struct device **genpd_virt_devs = opp_table->genpd_virt_devs; + struct device **genpd_virt_devs = + opp_table->genpd_virt_devs ? opp_table->genpd_virt_devs : &dev; int i, ret = 0; if (!required_opp_tables) @@ -979,12 +980,6 @@ static int _set_required_opps(struct device *dev, return -ENOENT; } - /* Single genpd case */ - if (!genpd_virt_devs) - return _set_required_opp(dev, dev, opp, 0); - - /* Multiple genpd case */ - /* * Acquire genpd_virt_dev_lock to make sure we don't use a genpd_dev * after it is freed from another thread. From 528f2d8d540a3c374d6b765c769620d91536b176 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Wed, 22 Feb 2023 16:36:38 +0530 Subject: [PATCH 05/10] OPP: Move required opps configuration to specialized callback The required-opps configuration is closely tied to genpd and performance states at the moment and it is not very obvious that required-opps can live without genpds. Though we don't support configuring required-opps for non-genpd cases currently. This commit aims at separating these parts, where configuring genpds would be a special case of configuring the required-opps. Add a specialized callback, set_required_opps(), to the opp table and set it to different callbacks accordingly. This shouldn't result in any functional changes for now. Signed-off-by: Viresh Kumar Reviewed-by: Ulf Hansson --- drivers/opp/core.c | 69 ++++++++++++++++++++++++++++------------------ drivers/opp/of.c | 3 ++ drivers/opp/opp.h | 4 +++ 3 files changed, 49 insertions(+), 27 deletions(-) diff --git a/drivers/opp/core.c b/drivers/opp/core.c index 6d7016ce8c53..954c94865cf5 100644 --- a/drivers/opp/core.c +++ b/drivers/opp/core.c @@ -935,8 +935,8 @@ static int _set_opp_bw(const struct opp_table *opp_table, return 0; } -static int _set_required_opp(struct device *dev, struct device *pd_dev, - struct dev_pm_opp *opp, int i) +static int _set_performance_state(struct device *dev, struct device *pd_dev, + struct dev_pm_opp *opp, int i) { unsigned int pstate = likely(opp) ? opp->required_opps[i]->pstate : 0; int ret; @@ -953,33 +953,20 @@ static int _set_required_opp(struct device *dev, struct device *pd_dev, return ret; } -/* This is only called for PM domain for now */ -static int _set_required_opps(struct device *dev, - struct opp_table *opp_table, - struct dev_pm_opp *opp, bool up) +static int _opp_set_required_opps_generic(struct device *dev, + struct opp_table *opp_table, struct dev_pm_opp *opp, bool scaling_down) +{ + dev_err(dev, "setting required-opps isn't supported for non-genpd devices\n"); + return -ENOENT; +} + +static int _opp_set_required_opps_genpd(struct device *dev, + struct opp_table *opp_table, struct dev_pm_opp *opp, bool scaling_down) { - struct opp_table **required_opp_tables = opp_table->required_opp_tables; struct device **genpd_virt_devs = opp_table->genpd_virt_devs ? opp_table->genpd_virt_devs : &dev; int i, ret = 0; - if (!required_opp_tables) - return 0; - - /* required-opps not fully initialized yet */ - if (lazy_linking_pending(opp_table)) - return -EBUSY; - - /* - * We only support genpd's OPPs in the "required-opps" for now, as we - * don't know much about other use cases. Error out if the required OPP - * doesn't belong to a genpd. - */ - if (unlikely(!required_opp_tables[0]->is_genpd)) { - dev_err(dev, "required-opps don't belong to a genpd\n"); - return -ENOENT; - } - /* * Acquire genpd_virt_dev_lock to make sure we don't use a genpd_dev * after it is freed from another thread. @@ -987,15 +974,15 @@ static int _set_required_opps(struct device *dev, mutex_lock(&opp_table->genpd_virt_dev_lock); /* Scaling up? Set required OPPs in normal order, else reverse */ - if (up) { + if (!scaling_down) { for (i = 0; i < opp_table->required_opp_count; i++) { - ret = _set_required_opp(dev, genpd_virt_devs[i], opp, i); + ret = _set_performance_state(dev, genpd_virt_devs[i], opp, i); if (ret) break; } } else { for (i = opp_table->required_opp_count - 1; i >= 0; i--) { - ret = _set_required_opp(dev, genpd_virt_devs[i], opp, i); + ret = _set_performance_state(dev, genpd_virt_devs[i], opp, i); if (ret) break; } @@ -1006,6 +993,34 @@ static int _set_required_opps(struct device *dev, return ret; } +/* This is only called for PM domain for now */ +static int _set_required_opps(struct device *dev, struct opp_table *opp_table, + struct dev_pm_opp *opp, bool up) +{ + /* required-opps not fully initialized yet */ + if (lazy_linking_pending(opp_table)) + return -EBUSY; + + if (opp_table->set_required_opps) + return opp_table->set_required_opps(dev, opp_table, opp, up); + + return 0; +} + +/* Update set_required_opps handler */ +void _update_set_required_opps(struct opp_table *opp_table) +{ + /* Already set */ + if (opp_table->set_required_opps) + return; + + /* All required OPPs will belong to genpd or none */ + if (opp_table->required_opp_tables[0]->is_genpd) + opp_table->set_required_opps = _opp_set_required_opps_genpd; + else + opp_table->set_required_opps = _opp_set_required_opps_generic; +} + static void _find_current_opp(struct device *dev, struct opp_table *opp_table) { struct dev_pm_opp *opp = ERR_PTR(-ENODEV); diff --git a/drivers/opp/of.c b/drivers/opp/of.c index bed2b651deb0..233eab70e15e 100644 --- a/drivers/opp/of.c +++ b/drivers/opp/of.c @@ -196,6 +196,8 @@ static void _opp_table_alloc_required_tables(struct opp_table *opp_table, /* Let's do the linking later on */ if (lazy) list_add(&opp_table->lazy, &lazy_opp_tables); + else + _update_set_required_opps(opp_table); goto put_np; @@ -411,6 +413,7 @@ static void lazy_link_required_opp_table(struct opp_table *new_table) /* All required opp-tables found, remove from lazy list */ if (!lazy) { + _update_set_required_opps(opp_table); list_del_init(&opp_table->lazy); list_for_each_entry(opp, &opp_table->opp_list, node) diff --git a/drivers/opp/opp.h b/drivers/opp/opp.h index 3a6e077df386..2a057c42ddf4 100644 --- a/drivers/opp/opp.h +++ b/drivers/opp/opp.h @@ -184,6 +184,7 @@ enum opp_table_access { * @enabled: Set to true if the device's resources are enabled/configured. * @genpd_performance_state: Device's power domain support performance state. * @is_genpd: Marks if the OPP table belongs to a genpd. + * @set_required_opps: Helper responsible to set required OPPs. * @dentry: debugfs dentry pointer of the real device directory (not links). * @dentry_name: Name of the real dentry. * @@ -234,6 +235,8 @@ struct opp_table { bool enabled; bool genpd_performance_state; bool is_genpd; + int (*set_required_opps)(struct device *dev, + struct opp_table *opp_table, struct dev_pm_opp *opp, bool scaling_down); #ifdef CONFIG_DEBUG_FS struct dentry *dentry; @@ -257,6 +260,7 @@ void _dev_pm_opp_cpumask_remove_table(const struct cpumask *cpumask, int last_cp struct opp_table *_add_opp_table_indexed(struct device *dev, int index, bool getclk); void _put_opp_list_kref(struct opp_table *opp_table); void _required_opps_available(struct dev_pm_opp *opp, int count); +void _update_set_required_opps(struct opp_table *opp_table); static inline bool lazy_linking_pending(struct opp_table *opp_table) { From 73d73f5ee7fb0c42ff87091d105bee720a9565f1 Mon Sep 17 00:00:00 2001 From: Li zeming Date: Sun, 26 Mar 2023 06:19:35 +0800 Subject: [PATCH 06/10] PM: core: Remove unnecessary (void *) conversions Assignments from pointer variables of type (void *) do not require explicit type casts, so remove such type cases from the code in drivers/base/power/main.c where applicable. Signed-off-by: Li zeming [ rjw: Subject and changelog edits ] Signed-off-by: Rafael J. Wysocki --- drivers/base/power/main.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c index c50139207794..f85f3515c258 100644 --- a/drivers/base/power/main.c +++ b/drivers/base/power/main.c @@ -679,7 +679,7 @@ static bool dpm_async_fn(struct device *dev, async_func_t func) static void async_resume_noirq(void *data, async_cookie_t cookie) { - struct device *dev = (struct device *)data; + struct device *dev = data; int error; error = device_resume_noirq(dev, pm_transition, true); @@ -816,7 +816,7 @@ static int device_resume_early(struct device *dev, pm_message_t state, bool asyn static void async_resume_early(void *data, async_cookie_t cookie) { - struct device *dev = (struct device *)data; + struct device *dev = data; int error; error = device_resume_early(dev, pm_transition, true); @@ -980,7 +980,7 @@ static int device_resume(struct device *dev, pm_message_t state, bool async) static void async_resume(void *data, async_cookie_t cookie) { - struct device *dev = (struct device *)data; + struct device *dev = data; int error; error = device_resume(dev, pm_transition, true); @@ -1269,7 +1269,7 @@ static int __device_suspend_noirq(struct device *dev, pm_message_t state, bool a static void async_suspend_noirq(void *data, async_cookie_t cookie) { - struct device *dev = (struct device *)data; + struct device *dev = data; int error; error = __device_suspend_noirq(dev, pm_transition, true); @@ -1450,7 +1450,7 @@ static int __device_suspend_late(struct device *dev, pm_message_t state, bool as static void async_suspend_late(void *data, async_cookie_t cookie) { - struct device *dev = (struct device *)data; + struct device *dev = data; int error; error = __device_suspend_late(dev, pm_transition, true); @@ -1727,7 +1727,7 @@ static int __device_suspend(struct device *dev, pm_message_t state, bool async) static void async_suspend(void *data, async_cookie_t cookie) { - struct device *dev = (struct device *)data; + struct device *dev = data; int error; error = __device_suspend(dev, pm_transition, true); From b52124a78ab34eb0754e32edc0c9996937779176 Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Mon, 17 Apr 2023 10:27:05 -0500 Subject: [PATCH 07/10] PM: Add sysfs files to represent time spent in hardware sleep state Userspace can't easily discover how much of a sleep cycle was spent in a hardware sleep state without using kernel tracing and vendor specific sysfs or debugfs files. To make this information more discoverable, introduce 3 new sysfs files: 1) The time spent in a hw sleep state for last cycle. 2) The time spent in a hw sleep state since the kernel booted 3) The maximum time that the hardware can report for a sleep cycle. All of these files will be present only if the system supports s2idle. Reviewed-by: Hans de Goede Signed-off-by: Mario Limonciello Signed-off-by: Rafael J. Wysocki --- Documentation/ABI/testing/sysfs-power | 29 +++++++++++++ include/linux/suspend.h | 8 ++++ kernel/power/main.c | 59 +++++++++++++++++++++------ 3 files changed, 84 insertions(+), 12 deletions(-) diff --git a/Documentation/ABI/testing/sysfs-power b/Documentation/ABI/testing/sysfs-power index f99d433ff311..a3942b1036e2 100644 --- a/Documentation/ABI/testing/sysfs-power +++ b/Documentation/ABI/testing/sysfs-power @@ -413,6 +413,35 @@ Description: The /sys/power/suspend_stats/last_failed_step file contains the last failed step in the suspend/resume path. +What: /sys/power/suspend_stats/last_hw_sleep +Date: June 2023 +Contact: Mario Limonciello +Description: + The /sys/power/suspend_stats/last_hw_sleep file + contains the duration of time spent in a hardware sleep + state in the most recent system suspend-resume cycle. + This number is measured in microseconds. + +What: /sys/power/suspend_stats/total_hw_sleep +Date: June 2023 +Contact: Mario Limonciello +Description: + The /sys/power/suspend_stats/total_hw_sleep file + contains the aggregate of time spent in a hardware sleep + state since the kernel was booted. This number + is measured in microseconds. + +What: /sys/power/suspend_stats/max_hw_sleep +Date: June 2023 +Contact: Mario Limonciello +Description: + The /sys/power/suspend_stats/max_hw_sleep file + contains the maximum amount of time that the hardware can + report for time spent in a hardware sleep state. When sleep + cycles are longer than this time, the values for + 'total_hw_sleep' and 'last_hw_sleep' may not be accurate. + This number is measured in microseconds. + What: /sys/power/sync_on_suspend Date: October 2019 Contact: Jonas Meurer diff --git a/include/linux/suspend.h b/include/linux/suspend.h index cfe19a028918..d0d4598a7b3f 100644 --- a/include/linux/suspend.h +++ b/include/linux/suspend.h @@ -68,6 +68,9 @@ struct suspend_stats { int last_failed_errno; int errno[REC_FAILED_NUM]; int last_failed_step; + u64 last_hw_sleep; + u64 total_hw_sleep; + u64 max_hw_sleep; enum suspend_stat_step failed_steps[REC_FAILED_NUM]; }; @@ -489,6 +492,8 @@ void restore_processor_state(void); extern int register_pm_notifier(struct notifier_block *nb); extern int unregister_pm_notifier(struct notifier_block *nb); extern void ksys_sync_helper(void); +extern void pm_report_hw_sleep_time(u64 t); +extern void pm_report_max_hw_sleep(u64 t); #define pm_notifier(fn, pri) { \ static struct notifier_block fn##_nb = \ @@ -526,6 +531,9 @@ static inline int unregister_pm_notifier(struct notifier_block *nb) return 0; } +static inline void pm_report_hw_sleep_time(u64 t) {}; +static inline void pm_report_max_hw_sleep(u64 t) {}; + static inline void ksys_sync_helper(void) {} #define pm_notifier(fn, pri) do { (void)(fn); } while (0) diff --git a/kernel/power/main.c b/kernel/power/main.c index 31ec4a9b9d70..3113ec2f1db4 100644 --- a/kernel/power/main.c +++ b/kernel/power/main.c @@ -6,6 +6,7 @@ * Copyright (c) 2003 Open Source Development Lab */ +#include #include #include #include @@ -83,6 +84,19 @@ int unregister_pm_notifier(struct notifier_block *nb) } EXPORT_SYMBOL_GPL(unregister_pm_notifier); +void pm_report_hw_sleep_time(u64 t) +{ + suspend_stats.last_hw_sleep = t; + suspend_stats.total_hw_sleep += t; +} +EXPORT_SYMBOL_GPL(pm_report_hw_sleep_time); + +void pm_report_max_hw_sleep(u64 t) +{ + suspend_stats.max_hw_sleep = t; +} +EXPORT_SYMBOL_GPL(pm_report_max_hw_sleep); + int pm_notifier_call_chain_robust(unsigned long val_up, unsigned long val_down) { int ret; @@ -314,24 +328,27 @@ static char *suspend_step_name(enum suspend_stat_step step) } } -#define suspend_attr(_name) \ +#define suspend_attr(_name, format_str) \ static ssize_t _name##_show(struct kobject *kobj, \ struct kobj_attribute *attr, char *buf) \ { \ - return sprintf(buf, "%d\n", suspend_stats._name); \ + return sprintf(buf, format_str, suspend_stats._name); \ } \ static struct kobj_attribute _name = __ATTR_RO(_name) -suspend_attr(success); -suspend_attr(fail); -suspend_attr(failed_freeze); -suspend_attr(failed_prepare); -suspend_attr(failed_suspend); -suspend_attr(failed_suspend_late); -suspend_attr(failed_suspend_noirq); -suspend_attr(failed_resume); -suspend_attr(failed_resume_early); -suspend_attr(failed_resume_noirq); +suspend_attr(success, "%d\n"); +suspend_attr(fail, "%d\n"); +suspend_attr(failed_freeze, "%d\n"); +suspend_attr(failed_prepare, "%d\n"); +suspend_attr(failed_suspend, "%d\n"); +suspend_attr(failed_suspend_late, "%d\n"); +suspend_attr(failed_suspend_noirq, "%d\n"); +suspend_attr(failed_resume, "%d\n"); +suspend_attr(failed_resume_early, "%d\n"); +suspend_attr(failed_resume_noirq, "%d\n"); +suspend_attr(last_hw_sleep, "%llu\n"); +suspend_attr(total_hw_sleep, "%llu\n"); +suspend_attr(max_hw_sleep, "%llu\n"); static ssize_t last_failed_dev_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) @@ -391,12 +408,30 @@ static struct attribute *suspend_attrs[] = { &last_failed_dev.attr, &last_failed_errno.attr, &last_failed_step.attr, + &last_hw_sleep.attr, + &total_hw_sleep.attr, + &max_hw_sleep.attr, NULL, }; +static umode_t suspend_attr_is_visible(struct kobject *kobj, struct attribute *attr, int idx) +{ + if (attr != &last_hw_sleep.attr && + attr != &total_hw_sleep.attr && + attr != &max_hw_sleep.attr) + return 0444; + +#ifdef CONFIG_ACPI + if (acpi_gbl_FADT.flags & ACPI_FADT_LOW_POWER_S0) + return 0444; +#endif + return 0; +} + static const struct attribute_group suspend_attr_group = { .name = "suspend_stats", .attrs = suspend_attrs, + .is_visible = suspend_attr_is_visible, }; #ifdef CONFIG_DEBUG_FS From 09f5df3fb82fd444296e248f90dd29288e82d3a3 Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Mon, 17 Apr 2023 10:27:06 -0500 Subject: [PATCH 08/10] platform/x86/amd: pmc: Report duration of time in hw sleep state amd_pmc displays a warning when a suspend didn't get to the deepest state and a dynamic debugging message with the duration if it did. Rather than logging to dynamic debugging the duration spent in the deepest state, report this to the standard kernel reporting infrastructure so that userspace software can query after the suspend cycle is done. Reviewed-by: Hans de Goede Signed-off-by: Mario Limonciello Signed-off-by: Rafael J. Wysocki --- drivers/platform/x86/amd/pmc.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/platform/x86/amd/pmc.c b/drivers/platform/x86/amd/pmc.c index 2edaae04a691..e610457136e6 100644 --- a/drivers/platform/x86/amd/pmc.c +++ b/drivers/platform/x86/amd/pmc.c @@ -393,9 +393,8 @@ static void amd_pmc_validate_deepest(struct amd_pmc_dev *pdev) if (!table.s0i3_last_entry_status) dev_warn(pdev->dev, "Last suspend didn't reach deepest state\n"); - else - dev_dbg(pdev->dev, "Last suspend in deepest state for %lluus\n", - table.timein_s0i3_lastcapture); + pm_report_hw_sleep_time(table.s0i3_last_entry_status ? + table.timein_s0i3_lastcapture : 0); } static int amd_pmc_get_smu_version(struct amd_pmc_dev *dev) @@ -1015,6 +1014,7 @@ static int amd_pmc_probe(struct platform_device *pdev) } amd_pmc_dbgfs_register(dev); + pm_report_max_hw_sleep(U64_MAX); return 0; err_pci_dev_put: From e2348afe702e9431f428e230be7f4ef8a1778b19 Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Mon, 17 Apr 2023 10:27:07 -0500 Subject: [PATCH 09/10] platform/x86/intel/pmc: core: Always capture counters on suspend Currently counters are only captured during suspend when the warn_on_s0ix_failures module parameter is set. In order to relay this counter information to the kernel reporting infrastructure adjust it so that the counters are always captured. warn_on_s0ix_failures will be utilized solely for messaging by the driver instead. Reviewed-by: Hans de Goede Reviewed-by: David E. Box Signed-off-by: Mario Limonciello Signed-off-by: Rafael J. Wysocki --- drivers/platform/x86/intel/pmc/core.c | 13 +++++-------- drivers/platform/x86/intel/pmc/core.h | 2 -- 2 files changed, 5 insertions(+), 10 deletions(-) diff --git a/drivers/platform/x86/intel/pmc/core.c b/drivers/platform/x86/intel/pmc/core.c index b9591969e0fa..925c5d676a43 100644 --- a/drivers/platform/x86/intel/pmc/core.c +++ b/drivers/platform/x86/intel/pmc/core.c @@ -1179,12 +1179,6 @@ static __maybe_unused int pmc_core_suspend(struct device *dev) { struct pmc_dev *pmcdev = dev_get_drvdata(dev); - pmcdev->check_counters = false; - - /* No warnings on S0ix failures */ - if (!warn_on_s0ix_failures) - return 0; - /* Check if the syspend will actually use S0ix */ if (pm_suspend_via_firmware()) return 0; @@ -1197,7 +1191,6 @@ static __maybe_unused int pmc_core_suspend(struct device *dev) if (pmc_core_dev_state_get(pmcdev, &pmcdev->s0ix_counter)) return -EIO; - pmcdev->check_counters = true; return 0; } @@ -1233,12 +1226,16 @@ static __maybe_unused int pmc_core_resume(struct device *dev) const struct pmc_bit_map **maps = pmcdev->map->lpm_sts; int offset = pmcdev->map->lpm_status_offset; - if (!pmcdev->check_counters) + /* Check if the syspend used S0ix */ + if (pm_suspend_via_firmware()) return 0; if (!pmc_core_is_s0ix_failed(pmcdev)) return 0; + if (!warn_on_s0ix_failures) + return 0; + if (pmc_core_is_pc10_failed(pmcdev)) { /* S0ix failed because of PC10 entry failure */ dev_info(dev, "CPU did not enter PC10!!! (PC10 cnt=0x%llx)\n", diff --git a/drivers/platform/x86/intel/pmc/core.h b/drivers/platform/x86/intel/pmc/core.h index 810204d758ab..51d73efceaf3 100644 --- a/drivers/platform/x86/intel/pmc/core.h +++ b/drivers/platform/x86/intel/pmc/core.h @@ -319,7 +319,6 @@ struct pmc_reg_map { * @pmc_xram_read_bit: flag to indicate whether PMC XRAM shadow registers * used to read MPHY PG and PLL status are available * @mutex_lock: mutex to complete one transcation - * @check_counters: On resume, check if counters are getting incremented * @pc10_counter: PC10 residency counter * @s0ix_counter: S0ix residency (step adjusted) * @num_lpm_modes: Count of enabled modes @@ -338,7 +337,6 @@ struct pmc_dev { int pmc_xram_read_bit; struct mutex lock; /* generic mutex lock for PMC Core */ - bool check_counters; /* Check for counter increments on resume */ u64 pc10_counter; u64 s0ix_counter; int num_lpm_modes; From ddd66d63473513bb013928245b6482969b97de05 Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Mon, 17 Apr 2023 10:27:08 -0500 Subject: [PATCH 10/10] platform/x86/intel/pmc: core: Report duration of time in HW sleep state MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit intel_pmc_core displays a warning when the module parameter `warn_on_s0ix_failures` is set and a suspend didn't get to a HW sleep state. Report this to the standard kernel reporting infrastructure so that userspace software can query after the suspend cycle is done. Reviewed-by: Hans de Goede Suggested-by: Ilpo Järvinen Reviewed-by: David E. Box Signed-off-by: Mario Limonciello Signed-off-by: Rafael J. Wysocki --- drivers/platform/x86/intel/pmc/core.c | 4 ++++ drivers/platform/x86/intel/pmc/core.h | 2 ++ 2 files changed, 6 insertions(+) diff --git a/drivers/platform/x86/intel/pmc/core.c b/drivers/platform/x86/intel/pmc/core.c index 925c5d676a43..298f27ba1e10 100644 --- a/drivers/platform/x86/intel/pmc/core.c +++ b/drivers/platform/x86/intel/pmc/core.c @@ -1153,6 +1153,8 @@ static int pmc_core_probe(struct platform_device *pdev) pmc_core_do_dmi_quirks(pmcdev); pmc_core_dbgfs_register(pmcdev); + pm_report_max_hw_sleep(FIELD_MAX(SLP_S0_RES_COUNTER_MASK) * + pmc_core_adjust_slp_s0_step(pmcdev, 1)); device_initialized = true; dev_info(&pdev->dev, " initialized\n"); @@ -1214,6 +1216,8 @@ static inline bool pmc_core_is_s0ix_failed(struct pmc_dev *pmcdev) if (pmc_core_dev_state_get(pmcdev, &s0ix_counter)) return false; + pm_report_hw_sleep_time((u32)(s0ix_counter - pmcdev->s0ix_counter)); + if (s0ix_counter == pmcdev->s0ix_counter) return true; diff --git a/drivers/platform/x86/intel/pmc/core.h b/drivers/platform/x86/intel/pmc/core.h index 51d73efceaf3..9ca9b9746719 100644 --- a/drivers/platform/x86/intel/pmc/core.h +++ b/drivers/platform/x86/intel/pmc/core.h @@ -16,6 +16,8 @@ #include #include +#define SLP_S0_RES_COUNTER_MASK GENMASK(31, 0) + #define PMC_BASE_ADDR_DEFAULT 0xFE000000 /* Sunrise Point Power Management Controller PCI Device ID */