A revert to fix a adjtimex() regression:

The recent change to prevent that time goes backwards for the coarse time
 getters due to immediate multiplier adjustments via adjtimex(), changed the
 way how the timekeeping core treats that.
 
 That change result in a regression on the adjtimex() side, which is user
 space visible:
 
  1) The forwarding of the base time moves the update out of the original
     period and establishes a new one. That's changing the behaviour of the
     [PF]LL control, which user space expects to be applied periodically.
 
  2) The clearing of the accumulated NTP error due to #1, changes the
     behaviour as well.
 
 It was tried to delay the multiplier/frequency update to the next tick, but
 that did not solve the problem as userspace expects that the multiplier or
 frequency updates are in effect, when the syscall returns.
 
 There is a different solution for the coarse time problem available, so
 revert the offending commit to restore the existing adjtimex() behaviour.
 -----BEGIN PGP SIGNATURE-----
 
 iQJHBAABCgAxFiEEQp8+kY+LLUocC4bMphj1TA10mKEFAmfyVtsTHHRnbHhAbGlu
 dXRyb25peC5kZQAKCRCmGPVMDXSYoaeBEACIjssZjasdb/6sGgpp+6jdp3yCuUff
 XUB30O54u+54NBtIoxq8a8w74sI06Y1xmmRIRgmchBrylUooZyglaE1BfKZpzt5h
 FWszpSp3pfFOZ+A2rpNGWhskGxNVCDnGsPAsQSmPgCY17ZU5j+BkTSE4fcDZqftC
 E/Ojr67KD24kXGXDeQp08fSdXCfyd85PnFmpZmyqnDePuAA2JF6uAfqJE+QoeuUh
 KkQdARi+xAvXdzIRCLw5cQ/tlhxwPYrHOiMt/VRg/A44Nowl/+IEo83QjXRn7cz9
 sq1X2tAY42D/VSG01ZS8cpErWQuSYlI+hilFJ13POVZP+2xhZQUI3QzmrjG4+jqr
 s6I5g6RQyasG8tgkVTTR9+rIvSOAVkp0j0Y2tZ14e/9gi+/0+f5DYhxRc7MFPLW0
 ssS6oPIO1lsnU5KcaZ88SdDZ1OYmAj+L3R3dKM8PoggK8igZkaqezKwiH3RorKQJ
 8yZ5yfGYRNInzLHq7MUkai0xnLGbbx/hHCPZt+V7rNWP34eD+xykSKestC3wFscm
 jWAwP/CERz6mYR5mqicWkP52o39fIjbFixq+epAzBabmBJnPNBaUyb9V3MEf6ycq
 yWscFVjPu6koeX4MNUtDpcFdtb1QZJMJAtBxxnysFy03eNaryYRvta1t8EP/WgMz
 Zu71G7I8SvWrUw==
 =0MEH
 -----END PGP SIGNATURE-----

Merge tag 'timers-urgent-2025-04-06' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull timer fix from Thomas Gleixner:
 "A revert to fix a adjtimex() regression:

  The recent change to prevent that time goes backwards for the coarse
  time getters due to immediate multiplier adjustments via adjtimex(),
  changed the way how the timekeeping core treats that.

  That change result in a regression on the adjtimex() side, which is
  user space visible:

   1) The forwarding of the base time moves the update out of the
      original period and establishes a new one. That's changing the
      behaviour of the [PF]LL control, which user space expects to be
      applied periodically.

   2) The clearing of the accumulated NTP error due to #1, changes the
      behaviour as well.

  An attempt to delay the multiplier/frequency update to the next tick
  did not solve the problem as userspace expects that the multiplier or
  frequency updates are in effect, when the syscall returns.

  There is a different solution for the coarse time problem available,
  so revert the offending commit to restore the existing adjtimex()
  behaviour"

* tag 'timers-urgent-2025-04-06' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  Revert "timekeeping: Fix possible inconsistencies in _COARSE clockids"
This commit is contained in:
Linus Torvalds 2025-04-06 08:13:16 -07:00
commit a91c49517d

View File

@ -682,19 +682,20 @@ static void timekeeping_update_from_shadow(struct tk_data *tkd, unsigned int act
}
/**
* timekeeping_forward - update clock to given cycle now value
* timekeeping_forward_now - update clock to the current time
* @tk: Pointer to the timekeeper to update
* @cycle_now: Current clocksource read value
*
* Forward the current clock to update its state since the last call to
* update_wall_time(). This is useful before significant clock changes,
* as it avoids having to deal with this time offset explicitly.
*/
static void timekeeping_forward(struct timekeeper *tk, u64 cycle_now)
static void timekeeping_forward_now(struct timekeeper *tk)
{
u64 delta = clocksource_delta(cycle_now, tk->tkr_mono.cycle_last, tk->tkr_mono.mask,
tk->tkr_mono.clock->max_raw_delta);
u64 cycle_now, delta;
cycle_now = tk_clock_read(&tk->tkr_mono);
delta = clocksource_delta(cycle_now, tk->tkr_mono.cycle_last, tk->tkr_mono.mask,
tk->tkr_mono.clock->max_raw_delta);
tk->tkr_mono.cycle_last = cycle_now;
tk->tkr_raw.cycle_last = cycle_now;
@ -709,21 +710,6 @@ static void timekeeping_forward(struct timekeeper *tk, u64 cycle_now)
}
}
/**
* timekeeping_forward_now - update clock to the current time
* @tk: Pointer to the timekeeper to update
*
* Forward the current clock to update its state since the last call to
* update_wall_time(). This is useful before significant clock changes,
* as it avoids having to deal with this time offset explicitly.
*/
static void timekeeping_forward_now(struct timekeeper *tk)
{
u64 cycle_now = tk_clock_read(&tk->tkr_mono);
timekeeping_forward(tk, cycle_now);
}
/**
* ktime_get_real_ts64 - Returns the time of day in a timespec64.
* @ts: pointer to the timespec to be set
@ -2165,54 +2151,6 @@ static u64 logarithmic_accumulation(struct timekeeper *tk, u64 offset,
return offset;
}
static u64 timekeeping_accumulate(struct timekeeper *tk, u64 offset,
enum timekeeping_adv_mode mode,
unsigned int *clock_set)
{
int shift = 0, maxshift;
/*
* TK_ADV_FREQ indicates that adjtimex(2) directly set the
* frequency or the tick length.
*
* Accumulate the offset, so that the new multiplier starts from
* now. This is required as otherwise for offsets, which are
* smaller than tk::cycle_interval, timekeeping_adjust() could set
* xtime_nsec backwards, which subsequently causes time going
* backwards in the coarse time getters. But even for the case
* where offset is greater than tk::cycle_interval the periodic
* accumulation does not have much value.
*
* Also reset tk::ntp_error as it does not make sense to keep the
* old accumulated error around in this case.
*/
if (mode == TK_ADV_FREQ) {
timekeeping_forward(tk, tk->tkr_mono.cycle_last + offset);
tk->ntp_error = 0;
return 0;
}
/*
* With NO_HZ we may have to accumulate many cycle_intervals
* (think "ticks") worth of time at once. To do this efficiently,
* we calculate the largest doubling multiple of cycle_intervals
* that is smaller than the offset. We then accumulate that
* chunk in one go, and then try to consume the next smaller
* doubled multiple.
*/
shift = ilog2(offset) - ilog2(tk->cycle_interval);
shift = max(0, shift);
/* Bound shift to one less than what overflows tick_length */
maxshift = (64 - (ilog2(ntp_tick_length()) + 1)) - 1;
shift = min(shift, maxshift);
while (offset >= tk->cycle_interval) {
offset = logarithmic_accumulation(tk, offset, shift, clock_set);
if (offset < tk->cycle_interval << shift)
shift--;
}
return offset;
}
/*
* timekeeping_advance - Updates the timekeeper to the current time and
* current NTP tick length
@ -2222,6 +2160,7 @@ static bool timekeeping_advance(enum timekeeping_adv_mode mode)
struct timekeeper *tk = &tk_core.shadow_timekeeper;
struct timekeeper *real_tk = &tk_core.timekeeper;
unsigned int clock_set = 0;
int shift = 0, maxshift;
u64 offset;
guard(raw_spinlock_irqsave)(&tk_core.lock);
@ -2238,7 +2177,24 @@ static bool timekeeping_advance(enum timekeeping_adv_mode mode)
if (offset < real_tk->cycle_interval && mode == TK_ADV_TICK)
return false;
offset = timekeeping_accumulate(tk, offset, mode, &clock_set);
/*
* With NO_HZ we may have to accumulate many cycle_intervals
* (think "ticks") worth of time at once. To do this efficiently,
* we calculate the largest doubling multiple of cycle_intervals
* that is smaller than the offset. We then accumulate that
* chunk in one go, and then try to consume the next smaller
* doubled multiple.
*/
shift = ilog2(offset) - ilog2(tk->cycle_interval);
shift = max(0, shift);
/* Bound shift to one less than what overflows tick_length */
maxshift = (64 - (ilog2(ntp_tick_length())+1)) - 1;
shift = min(shift, maxshift);
while (offset >= tk->cycle_interval) {
offset = logarithmic_accumulation(tk, offset, shift, &clock_set);
if (offset < tk->cycle_interval<<shift)
shift--;
}
/* Adjust the multiplier to correct NTP error */
timekeeping_adjust(tk, offset);