Merge remote-tracking branch 'upstream/decouple_scheduler_loop_cadence' into integration

# Conflicts: # esphome/core/application.h
2026-05-23 11:16:52 +08:00 · 2026-04-18 07:12:36 -05:00
parent e347d65b64 4e05d39f4b
commit 6abb9fd815
6 changed files with 234 additions and 57 deletions
@@ -137,11 +137,12 @@ bool RuntimeStatsCollector::compare_total_time(Component *a, Component *b) {
  return a->runtime_stats_.total_time_us > b->runtime_stats_.total_time_us;
 }

-void RuntimeStatsCollector::process_pending_stats(uint32_t current_time) {
-  if ((int32_t) (current_time - this->next_log_time_) >= 0) {
-    this->log_stats_();
-    this->next_log_time_ = current_time + this->log_interval_;
-  }
+// Slow path for process_pending_stats — gate already checked by the inline
+// wrapper in runtime_stats.h. Out-of-line keeps the log_stats_ machinery out
+// of Application::loop().
+void RuntimeStatsCollector::process_pending_stats_slow_(uint32_t current_time) {
+  this->log_stats_();
+  this->next_log_time_ = current_time + this->log_interval_;
 }

 }  // namespace runtime_stats
@@ -6,6 +6,7 @@

 #include <cstdint>
 #include "esphome/core/hal.h"
+#include "esphome/core/helpers.h"
 #include "esphome/core/log.h"

 namespace esphome {
@@ -26,14 +27,22 @@ class RuntimeStatsCollector {
  }
  uint32_t get_log_interval() const { return this->log_interval_; }

-  // Process any pending stats printing (should be called after component loop)
-  void process_pending_stats(uint32_t current_time);
+  // Process any pending stats printing. Called on every Application::loop()
+  // tick, so the common "not yet time to log" path must be cheap — inline
+  // the gate check and keep the actual logging work out-of-line.
+  void ESPHOME_ALWAYS_INLINE process_pending_stats(uint32_t current_time) {
+    if ((int32_t) (current_time - this->next_log_time_) >= 0) [[unlikely]] {
+      this->process_pending_stats_slow_(current_time);
+    }
+  }

  // Record the wall time of one main loop iteration excluding the yield/sleep.
  // Called once per loop from Application::loop().
  //   active_us = total time between loop start and just before yield.
-  //   before_us = time spent in before_loop_tasks_ (scheduler + ISR enable_loop).
-  //   tail_us   = time spent in after_loop_tasks_ + the trailing record/stats prefix.
+  //   before_us = time spent in Phase A (scheduler tick) excluding time
+  //               already attributed to per-component stats.
+  //   tail_us   = time spent in after_loop_tasks_ + the trailing record/stats
+  //               prefix. Zero on Phase A-only ticks (component phase gated).
  // Residual overhead at log time = active − Σ(component) − before − tail,
  // which captures per-iteration inter-component bookkeeping (set_current_component,
  // WarnIfComponentBlockingGuard construction/destruction, feed_wdt_with_time calls,
@@ -55,6 +64,7 @@ class RuntimeStatsCollector {
  }

 protected:
+  void process_pending_stats_slow_(uint32_t current_time);
  void log_stats_();
  // Static comparators — member functions have friend access, lambdas do not
  static bool compare_period_time(Component *a, Component *b);
@@ -93,8 +93,11 @@ void Application::setup() {
    do {
      uint32_t now = MillisInternal::get();

-      // Process pending loop enables to handle GPIO interrupts during setup
-      this->before_loop_tasks_(now);
+      // Service scheduler and process pending loop enables to handle GPIO
+      // interrupts during setup. During setup we always run the component
+      // phase (no loop_interval_ gate), so call both helpers unconditionally.
+      this->scheduler_tick_(now);
+      this->before_component_phase_();

      for (uint32_t j = 0; j <= i; j++) {
        // Update loop_component_start_time_ right before calling each component
@@ -399,7 +399,8 @@ class Application {
  void enable_component_loop_(Component *component);
  void enable_pending_loops_();
  void activate_looping_component_(uint16_t index);
-  inline uint32_t ESPHOME_ALWAYS_INLINE before_loop_tasks_(uint32_t loop_start_time);
+  inline uint32_t ESPHOME_ALWAYS_INLINE scheduler_tick_(uint32_t now);
+  inline void ESPHOME_ALWAYS_INLINE before_component_phase_();
  inline void ESPHOME_ALWAYS_INLINE after_loop_tasks_() { this->in_loop_ = false; }

  /// Process dump_config output one component per loop iteration.
@@ -541,16 +542,25 @@ inline void Application::drain_wake_notifications_() {
 }
 #endif  // USE_HOST

-inline uint32_t ESPHOME_ALWAYS_INLINE Application::before_loop_tasks_(uint32_t loop_start_time) {
+// Phase A: drain wake notifications and run the scheduler. Invoked on every
+// Application::loop() tick regardless of whether a component phase runs, so
+// scheduler items fire at their requested cadence even when the caller has
+// raised loop_interval_ for power savings (see Application::loop()).
+// Returns the timestamp of the last scheduler item that ran (or `now`
+// unchanged if none ran), so the caller's WDT feed stays monotonic with the
+// per-item feeds inside scheduler.call() without an extra millis().
+inline uint32_t ESPHOME_ALWAYS_INLINE Application::scheduler_tick_(uint32_t now) {
 #ifdef USE_HOST
  // Drain wake notifications first to clear socket for next wake
  this->drain_wake_notifications_();
 #endif
+  return this->scheduler.call(now);
+}

-  // Scheduler::call feeds the WDT per item and returns the timestamp of the
-  // last fired item, or the input unchanged when nothing ran.
-  uint32_t last_op_end_time = this->scheduler.call(loop_start_time);
-
+// Phase B entry: only invoked when a component loop phase is about to run.
+// Processes pending enable_loop requests from ISRs and marks in_loop_ so
+// reentrant modifications during component.loop() are safe.
+inline void ESPHOME_ALWAYS_INLINE Application::before_component_phase_() {
  // Process any pending enable_loop requests from ISRs
  // This must be done before marking in_loop_ = true to avoid race conditions
  if (this->has_pending_enable_loop_requests_) {
@@ -567,7 +577,6 @@ inline uint32_t ESPHOME_ALWAYS_INLINE Application::before_loop_tasks_(uint32_t l

  // Mark that we're in the loop for safe reentrant modifications
  this->in_loop_ = true;
-  return last_op_end_time;
 }

 // NOLINTNEXTLINE(clang-diagnostic-unknown-attributes)
@@ -583,46 +592,68 @@ inline void ESPHOME_ALWAYS_INLINE __attribute__((optimize("O2"))) Application::l
  // so charging it again to "before" would double-count.
  uint64_t loop_recorded_snap = ComponentRuntimeStats::global_recorded_us;
 #endif
-  // Get the initial loop time at the start
-  uint32_t last_op_end_time = MillisInternal::get();
+  // Phase A: always service the scheduler. Decouples scheduler cadence from
+  // loop_interval_ so raised intervals (for power savings) don't drag scheduled
+  // items forward. A tick that only runs the scheduler is cheap.
+  // scheduler_tick_ returns the timestamp of the last scheduler item that ran
+  // (advanced by its per-item feeds) or `now` unchanged. We adopt it as `now`
+  // so the gate check and WDT feed both reflect actual elapsed time after
+  // scheduler dispatch, without an extra millis() call.
+  uint32_t now = this->scheduler_tick_(MillisInternal::get());
+  // Guarantee one WDT feed per tick even when the scheduler had nothing to
+  // dispatch and the component phase is gated out — covers configs with no
+  // looping components and no scheduler work (setup() has its own
+  // per-component feed_wdt calls, so only do this here, not in scheduler_tick_).
+  this->feed_wdt_with_time(now);

-  // Returned timestamp keeps us monotonic with last_wdt_feed_ (advanced by
-  // the scheduler's per-item feeds) without an extra millis() call.
-  last_op_end_time = this->before_loop_tasks_(last_op_end_time);
-  // Guarantee a WDT touch every tick — covers configs with no looping
-  // components and no scheduler work, where the per-item / per-component
-  // feeds never fire. Rate-limited inline fast path, ~free when unneeded.
-  this->feed_wdt_with_time(last_op_end_time);
 #ifdef USE_RUNTIME_STATS
  uint32_t loop_before_end_us = micros();
  uint64_t loop_before_scheduled_us = ComponentRuntimeStats::global_recorded_us - loop_recorded_snap;
+  // Default tail_start to end-of-before so tail_us == 0 on Phase A-only ticks.
+  uint32_t loop_tail_start_us = loop_before_end_us;
 #endif

-  for (this->current_loop_index_ = 0; this->current_loop_index_ < this->looping_components_active_end_;
-       this->current_loop_index_++) {
-    Component *component = this->looping_components_[this->current_loop_index_];
+  // Gate the component phase on loop_interval_ or an active high-frequency
+  // request. When a scheduler wake preempts sleep early, this gate keeps the
+  // component phase from running more often than loop_interval_.
+  const bool high_frequency = HighFrequencyLoopRequester::is_high_frequency();
+  const uint32_t elapsed = now - this->last_loop_;
+  const bool do_component_phase = high_frequency || (elapsed >= this->loop_interval_);

-    // Update the cached time before each component runs
-    this->loop_component_start_time_ = last_op_end_time;
+  if (do_component_phase) {
+    this->before_component_phase_();

-    {
-      this->set_current_component(component);
-      WarnIfComponentBlockingGuard guard{component, last_op_end_time};
-      component->loop();
-      // Use the finish method to get the current time as the end time
-      last_op_end_time = guard.finish();
+    uint32_t last_op_end_time = now;
+    for (this->current_loop_index_ = 0; this->current_loop_index_ < this->looping_components_active_end_;
+         this->current_loop_index_++) {
+      Component *component = this->looping_components_[this->current_loop_index_];
+
+      // Update the cached time before each component runs
+      this->loop_component_start_time_ = last_op_end_time;
+
+      {
+        this->set_current_component(component);
+        WarnIfComponentBlockingGuard guard{component, last_op_end_time};
+        component->loop();
+        // Use the finish method to get the current time as the end time
+        last_op_end_time = guard.finish();
+      }
+      this->feed_wdt_with_time(last_op_end_time);
    }
-    this->feed_wdt_with_time(last_op_end_time);
+
+#ifdef USE_RUNTIME_STATS
+    loop_tail_start_us = micros();
+#endif
+    this->after_loop_tasks_();
+
+    this->last_loop_ = last_op_end_time;
+    now = last_op_end_time;
  }

 #ifdef USE_RUNTIME_STATS
-  uint32_t loop_tail_start_us = micros();
-#endif
-  this->after_loop_tasks_();
-
-#ifdef USE_RUNTIME_STATS
-  // Process any pending runtime stats printing after all components have run
-  // This ensures stats printing doesn't affect component timing measurements
+  // Record per-tick timing on every loop, not just component-phase ticks.
+  // record_loop_active is a small accumulator; process_pending_stats is an
+  // inline gate check that early-outs unless now >= next_log_time_.
  if (global_runtime_stats != nullptr) {
    uint32_t loop_now_us = micros();
    // Subtract scheduled-component time from the "before" bucket so it is
@@ -633,23 +664,22 @@ inline void ESPHOME_ALWAYS_INLINE __attribute__((optimize("O2"))) Application::l
                                           : 0;
    global_runtime_stats->record_loop_active(loop_now_us - loop_active_start_us, loop_before_overhead_us,
                                             loop_now_us - loop_tail_start_us);
-    global_runtime_stats->process_pending_stats(last_op_end_time);
+    global_runtime_stats->process_pending_stats(now);
  }
 #endif

-  // Use the last component's end time instead of calling millis() again
+  // Compute sleep: bounded by time-until-next-component-phase and the
+  // scheduler's next deadline. When a scheduler event wakes us early we
+  // re-enter loop(), Phase A services it, and the component phase stays gated.
  uint32_t delay_time = 0;
-  auto elapsed = last_op_end_time - this->last_loop_;
-  if (elapsed < this->loop_interval_ && !HighFrequencyLoopRequester::is_high_frequency()) {
-    delay_time = this->loop_interval_ - elapsed;
-    uint32_t next_schedule = this->scheduler.next_schedule_in(last_op_end_time).value_or(delay_time);
-    // next_schedule is max 0.5*delay_time
-    // otherwise interval=0 schedules result in constant looping with almost no sleep
-    next_schedule = std::max(next_schedule, delay_time / 2);
-    delay_time = std::min(next_schedule, delay_time);
+  if (!high_frequency) {
+    const uint32_t elapsed_since_phase = now - this->last_loop_;
+    const uint32_t until_phase =
+        (elapsed_since_phase >= this->loop_interval_) ? 0 : (this->loop_interval_ - elapsed_since_phase);
+    const uint32_t until_sched = this->scheduler.next_schedule_in(now).value_or(until_phase);
+    delay_time = std::min(until_phase, until_sched);
  }
  this->yield_with_select_(delay_time);
-  this->last_loop_ = last_op_end_time;

  if (this->dump_config_at_ < this->components_.size()) {
    this->process_dump_config_();
@@ -0,0 +1,60 @@
+esphome:
+  name: loop-interval-decouple
+  on_boot:
+    priority: -100
+    then:
+      - lambda: |-
+          // Raise loop_interval_ to 500ms. With the decoupling fix the
+          // component phase should run ~twice per second while the 50ms
+          // scheduler interval below still fires at its requested cadence.
+          App.set_loop_interval(500);
+      # Start measurement after 1s so boot transients settle.
+      - delay: 1000ms
+      - lambda: |-
+          id(loop_at_start) = id(loop_counter)->get_loop_count();
+          id(sched_at_start) = id(sched_count);
+          ESP_LOGI("test", "MEASUREMENT_STARTED loop=%d sched=%d",
+                   id(loop_at_start), id(sched_at_start));
+      # Observe for 2s.
+      - delay: 2000ms
+      - lambda: |-
+          int loop_delta = id(loop_counter)->get_loop_count() - id(loop_at_start);
+          int sched_delta = id(sched_count) - id(sched_at_start);
+          ESP_LOGI("test", "MEASUREMENT_DONE loop_delta=%d sched_delta=%d",
+                   loop_delta, sched_delta);
+
+host:
+api:
+logger:
+  level: INFO
+  logs:
+    loop_test_component: WARN   # Silence per-loop log spam
+
+external_components:
+  - source:
+      type: local
+      path: EXTERNAL_COMPONENT_PATH
+
+globals:
+  - id: sched_count
+    type: int
+    initial_value: "0"
+  - id: loop_at_start
+    type: int
+    initial_value: "0"
+  - id: sched_at_start
+    type: int
+    initial_value: "0"
+
+loop_test_component:
+  components:
+    - id: loop_counter
+      name: loop_counter
+
+interval:
+  # Fast scheduler interval — with the decoupling fix this should fire at
+  # its requested 50ms cadence regardless of loop_interval_.
+  - interval: 50ms
+    then:
+      - lambda: |-
+          id(sched_count) += 1;
@@ -0,0 +1,73 @@
+"""Test that loop_interval_ no longer clamps scheduler cadence.
+
+Regression test for the decoupling of Application::loop() component-phase
+cadence from scheduler wake timing.
+
+Setup:
+- App.set_loop_interval(500) — raised for power-savings style cadence
+- Scheduler interval at 50ms — should fire at 50ms regardless of loop_interval_
+- Component loop (LoopTestComponent) — should run at 500ms cadence
+
+Before the decoupling fix the old `std::max(next_schedule, delay_time / 2)`
+floor clamped the sleep to ~250ms, so the 50ms scheduler only fired ~8 times
+per 2s (vs the ~40 expected). After the fix the scheduler fires close to its
+requested cadence while the component phase stays gated at loop_interval_.
+"""
+
+from __future__ import annotations
+
+import asyncio
+import re
+
+import pytest
+
+from .types import APIClientConnectedFactory, RunCompiledFunction
+
+
+@pytest.mark.asyncio
+async def test_loop_interval_decoupling(
+    yaml_config: str,
+    run_compiled: RunCompiledFunction,
+    api_client_connected: APIClientConnectedFactory,
+) -> None:
+    """Raised loop_interval_ must not clamp scheduler item cadence."""
+    loop = asyncio.get_running_loop()
+    measurement_done: asyncio.Future[tuple[int, int]] = loop.create_future()
+
+    def on_log_line(line: str) -> None:
+        match = re.search(r"MEASUREMENT_DONE loop_delta=(\d+) sched_delta=(\d+)", line)
+        if match and not measurement_done.done():
+            measurement_done.set_result((int(match.group(1)), int(match.group(2))))
+
+    async with (
+        run_compiled(yaml_config, line_callback=on_log_line),
+        api_client_connected() as client,
+    ):
+        device_info = await client.device_info()
+        assert device_info is not None
+        assert device_info.name == "loop-interval-decouple"
+
+        try:
+            loop_delta, sched_delta = await asyncio.wait_for(
+                measurement_done, timeout=10.0
+            )
+        except TimeoutError:
+            pytest.fail("MEASUREMENT_DONE marker never appeared")
+
+        # Observation window = 2s, loop_interval_ = 500ms.
+        # Component phase should fire ~4 times in 2s. The upper bound must be
+        # less than 8: the pre-decoupling behavior clamped to ~250ms cadence
+        # giving ~8 loops/2s, so allowing 8 would let the old behavior pass.
+        assert 2 <= loop_delta <= 6, (
+            f"Component loop should fire ~4 times in 2s at loop_interval=500ms, "
+            f"got {loop_delta}"
+        )
+
+        # Scheduler interval = 50ms → ~40 fires in 2s. Before the decoupling
+        # fix this clamped to ~8 fires. Assert >= 20 to catch the old clamped
+        # behavior with comfortable jitter headroom for slow CI hosts.
+        assert sched_delta >= 20, (
+            f"50ms scheduler interval should fire ~40 times in 2s but only "
+            f"fired {sched_delta}. This indicates loop_interval_ is still "
+            f"clamping scheduler cadence."
+        )