[core] Fix app_state_ status bits clobbered for non-looping components (#15658)

This commit is contained in:
J. Nick Koston
2026-04-14 07:50:11 -10:00
committed by Jesse Hills
parent 82c0cb8929
commit cb90ac45c3
6 changed files with 416 additions and 10 deletions
+30 -4
View File
@@ -85,8 +85,12 @@ void Application::setup() {
if (component->can_proceed())
continue;
// Force the status LED to blink WARNING while we wait for a slow
// component to come up. Cleared after setup() finishes if no real
// component has warning set.
this->app_state_ |= STATUS_LED_WARNING;
do {
uint8_t new_app_state = STATUS_LED_WARNING;
uint32_t now = millis();
// Process pending loop enables to handle GPIO interrupts during setup
@@ -96,17 +100,26 @@ void Application::setup() {
// Update loop_component_start_time_ right before calling each component
this->loop_component_start_time_ = millis();
this->components_[j]->call();
new_app_state |= this->components_[j]->get_component_state();
this->app_state_ |= new_app_state;
this->feed_wdt();
}
this->after_loop_tasks_();
this->app_state_ = new_app_state;
yield();
} while (!component->can_proceed() && !component->is_failed());
}
// Setup is complete. Reconcile STATUS_LED_WARNING: the slow-setup path
// above may have forced it on, and any status_clear_warning() calls
// from components during setup were intentional no-ops (gated by
// APP_STATE_SETUP_COMPLETE). Walk components once here to pick up the
// real state. STATUS_LED_ERROR is never artificially forced, so its
// clear path always works and needs no reconciliation. Finally, set
// APP_STATE_SETUP_COMPLETE so subsequent warning clears go through
// the normal walk-and-clear path.
if (!this->any_component_has_status_flag_(STATUS_LED_WARNING))
this->app_state_ &= ~STATUS_LED_WARNING;
this->app_state_ |= APP_STATE_SETUP_COMPLETE;
ESP_LOGI(TAG, "setup() finished successfully!");
#ifdef USE_SETUP_PRIORITY_OVERRIDE
@@ -211,6 +224,19 @@ void HOT Application::feed_wdt(uint32_t time) {
#endif
}
}
bool Application::any_component_has_status_flag_(uint8_t flag) const {
// Walk all components (not just looping ones) so non-looping components'
// status bits are respected. Only called from the slow-path clear helpers
// (status_clear_warning_slow_path_ / status_clear_error_slow_path_) on an
// actual set→clear transition, so walking O(N) here is paid once per
// transition — not once per loop iteration.
for (auto *component : this->components_) {
if ((component->get_component_state() & flag) != 0)
return true;
}
return false;
}
void Application::reboot() {
ESP_LOGI(TAG, "Forcing a reboot");
for (auto &component : std::ranges::reverse_view(this->components_)) {
+18 -6
View File
@@ -401,7 +401,18 @@ class Application {
*/
void teardown_components(uint32_t timeout_ms);
uint8_t get_app_state() const { return this->app_state_; }
/// Return the public app state status bits (STATUS_LED_* only).
/// Internal bookkeeping bits like APP_STATE_SETUP_COMPLETE are masked
/// out so external readers (status_led components, etc.) never see them.
uint8_t get_app_state() const { return this->app_state_ & ~APP_STATE_SETUP_COMPLETE; }
/// True once Application::setup() has finished walking all components
/// and finalized the initial status flags. Before this point, the
/// slow-setup busy-wait may be forcing STATUS_LED_WARNING on, and
/// status_clear_* intentionally skips its walk-and-clear step so the
/// forced bit doesn't get wiped. Stored as a free bit on app_state_
/// (bit 6) to avoid costing additional RAM.
bool is_setup_complete() const { return (this->app_state_ & APP_STATE_SETUP_COMPLETE) != 0; }
// Helper macro for entity getter method declarations
#ifdef USE_DEVICES
@@ -577,6 +588,12 @@ class Application {
bool is_socket_ready_(int fd) const { return FD_ISSET(fd, &this->read_fds_); }
#endif
/// Walk all registered components looking for any whose component_state_
/// has the given flag set. Used by Component::status_clear_*_slow_path_()
/// (which is a friend) to decide whether to clear the corresponding bit on
/// this->app_state_ (the app-wide "any component has this status" indicator).
bool any_component_has_status_flag_(uint8_t flag) const;
/// Register a component, detecting loop() override at compile time.
/// Uses HasLoopOverride<T> which handles ambiguous &T::loop from multiple inheritance.
template<typename T> void register_component_(T *comp) {
@@ -838,8 +855,6 @@ inline void ESPHOME_ALWAYS_INLINE Application::before_loop_tasks_(uint32_t loop_
}
inline void ESPHOME_ALWAYS_INLINE Application::loop() {
uint8_t new_app_state = 0;
// Get the initial loop time at the start
uint32_t last_op_end_time = millis();
@@ -859,13 +874,10 @@ inline void ESPHOME_ALWAYS_INLINE Application::loop() {
// Use the finish method to get the current time as the end time
last_op_end_time = guard.finish();
}
new_app_state |= component->get_component_state();
this->app_state_ |= new_app_state;
this->feed_wdt(last_op_end_time);
}
this->after_loop_tasks_();
this->app_state_ = new_app_state;
#ifdef USE_RUNTIME_STATS
// Process any pending runtime stats printing after all components have run
+13
View File
@@ -411,10 +411,23 @@ void Component::status_set_error(const LogString *message) {
}
void Component::status_clear_warning_slow_path_() {
this->component_state_ &= ~STATUS_LED_WARNING;
// Clear the app-wide STATUS_LED_WARNING bit only if setup has finished
// AND no other component still has it set. During setup the forced
// STATUS_LED_WARNING (from the slow-setup busy-wait) must not be wiped
// by a transient component clear — Application::setup() reconciles
// the warning bit once at the end before setting APP_STATE_SETUP_COMPLETE.
// The set path is unchanged (set_status_flag_ still writes directly).
if (App.is_setup_complete() && !App.any_component_has_status_flag_(STATUS_LED_WARNING))
App.app_state_ &= ~STATUS_LED_WARNING;
ESP_LOGW(TAG, "%s cleared Warning flag", LOG_STR_ARG(this->get_component_log_str()));
}
void Component::status_clear_error_slow_path_() {
this->component_state_ &= ~STATUS_LED_ERROR;
// STATUS_LED_ERROR is never artificially forced — it only ever lands
// in app_state_ via a real set_status_flag_ call. So the walk-and-clear
// path is always safe, including during setup.
if (!App.any_component_has_status_flag_(STATUS_LED_ERROR))
App.app_state_ &= ~STATUS_LED_ERROR;
ESP_LOGE(TAG, "%s cleared Error flag", LOG_STR_ARG(this->get_component_log_str()));
}
void Component::status_momentary_warning(const char *name, uint32_t length) {
+5
View File
@@ -89,6 +89,11 @@ inline constexpr uint8_t STATUS_LED_WARNING = 0x08;
inline constexpr uint8_t STATUS_LED_ERROR = 0x10;
// Component loop override flag uses bit 5 (set at registration time)
inline constexpr uint8_t COMPONENT_HAS_LOOP = 0x20;
// Bit 6 on Application::app_state_ (ONLY) — set at the end of
// Application::setup(). Component::status_clear_*_slow_path_() uses this to
// decide whether to propagate clears to App.app_state_. Never set on a
// Component's component_state_.
inline constexpr uint8_t APP_STATE_SETUP_COMPLETE = 0x40;
// Remove before 2026.8.0
enum class RetryResult { DONE, RETRY };
@@ -0,0 +1,141 @@
esphome:
name: status-flags-test
host:
api:
actions:
# Warning flag services for sensor_a
- action: set_warning_a
then:
- lambda: "id(sensor_a)->status_set_warning();"
- component.update: app_warning_bit
- component.update: app_error_bit
- action: clear_warning_a
then:
- lambda: "id(sensor_a)->status_clear_warning();"
- component.update: app_warning_bit
- component.update: app_error_bit
# Warning flag services for sensor_b
- action: set_warning_b
then:
- lambda: "id(sensor_b)->status_set_warning();"
- component.update: app_warning_bit
- component.update: app_error_bit
- action: clear_warning_b
then:
- lambda: "id(sensor_b)->status_clear_warning();"
- component.update: app_warning_bit
- component.update: app_error_bit
# Error flag services for sensor_a
- action: set_error_a
then:
- lambda: "id(sensor_a)->status_set_error();"
- component.update: app_warning_bit
- component.update: app_error_bit
- action: clear_error_a
then:
- lambda: "id(sensor_a)->status_clear_error();"
- component.update: app_warning_bit
- component.update: app_error_bit
# Error flag services for sensor_b
- action: set_error_b
then:
- lambda: "id(sensor_b)->status_set_error();"
- component.update: app_warning_bit
- component.update: app_error_bit
- action: clear_error_b
then:
- lambda: "id(sensor_b)->status_clear_error();"
- component.update: app_warning_bit
- component.update: app_error_bit
# Snapshot of the status_led_light's output state for observation.
- action: snapshot_led
then:
- component.update: status_led_writes
- component.update: status_led_last_state
logger:
# Tracks each write to the fake status_led output.
globals:
- id: status_led_write_count
type: uint32_t
restore_value: no
initial_value: "0"
- id: status_led_last_write
type: bool
restore_value: no
initial_value: "false"
# Fake binary output — status_led_light writes to this instead of a pin.
# Every write bumps a counter and records the last value, both of which
# are exposed below so the test can verify status_led_light's loop is
# actually reading App.get_app_state() and responding.
output:
- platform: template
id: fake_status_led
type: binary
write_action:
- globals.set:
id: status_led_write_count
value: !lambda "return id(status_led_write_count) + 1;"
- globals.set:
id: status_led_last_write
value: !lambda "return state;"
# Actual status_led_light component under test.
light:
- platform: status_led
name: Status LED
id: status_led_light_id
output: fake_status_led
sensor:
# Two components that the test will toggle warning/error flags on.
- platform: template
name: Sensor A
id: sensor_a
update_interval: 24h
lambda: return 1.0;
- platform: template
name: Sensor B
id: sensor_b
update_interval: 24h
lambda: return 2.0;
# Expose App.app_state_'s STATUS_LED_WARNING / STATUS_LED_ERROR bits
# as 0.0 / 1.0. force_update ensures every manual component.update
# publishes even if the value is unchanged.
- platform: template
name: App Warning Bit
id: app_warning_bit
update_interval: 24h
force_update: true
lambda: |-
return (App.get_app_state() & STATUS_LED_WARNING) != 0 ? 1.0 : 0.0;
- platform: template
name: App Error Bit
id: app_error_bit
update_interval: 24h
force_update: true
lambda: |-
return (App.get_app_state() & STATUS_LED_ERROR) != 0 ? 1.0 : 0.0;
# Observables for the fake status_led output.
- platform: template
name: Status LED Writes
id: status_led_writes
update_interval: 24h
force_update: true
lambda: return id(status_led_write_count);
- platform: template
name: Status LED Last State
id: status_led_last_state
update_interval: 24h
force_update: true
lambda: |-
return id(status_led_last_write) ? 1.0 : 0.0;
+209
View File
@@ -0,0 +1,209 @@
"""Integration tests for Component::status_set/clear_warning/error propagation.
Verifies that toggling STATUS_LED_WARNING / STATUS_LED_ERROR on individual
components correctly updates the app-wide bits on Application::app_state_,
AND that the status_led_light component actually responds to those bits
by writing to its output (the full chain from component.status_set_warning
→ App.app_state_ → status_led_light.loop() reading get_app_state()).
Exercises the multi-component OR semantics (the app bit stays set while
any component still has the flag, and only clears when the last component
clears its bit), the independence of warning and error, and the actual
status_led_light read of the bits via a fake template output that counts
writes.
"""
from __future__ import annotations
import asyncio
import pytest
from .state_utils import InitialStateHelper, SensorTracker, build_key_to_entity_mapping
from .types import APIClientConnectedFactory, RunCompiledFunction
# Time to let the host-mode main loop run so status_led_light.loop() can
# execute enough iterations to produce measurable write-count changes on
# the fake template output. 300 ms is well above the minimum needed.
STATUS_LED_SETTLE_S = 0.3
@pytest.mark.asyncio
async def test_status_flags(
yaml_config: str,
run_compiled: RunCompiledFunction,
api_client_connected: APIClientConnectedFactory,
) -> None:
async with run_compiled(yaml_config), api_client_connected() as client:
entities, services = await client.list_entities_services()
# Map every custom API service by name for the test to execute.
svc = {s.name: s for s in services}
for name in (
"set_warning_a",
"clear_warning_a",
"set_warning_b",
"clear_warning_b",
"set_error_a",
"clear_error_a",
"set_error_b",
"clear_error_b",
"snapshot_led",
):
assert name in svc, f"service {name} not registered"
# Track every sensor we care about. SensorTracker gives us
# expect(value) / expect_any() futures that resolve when a
# matching state arrives; much simpler than manual bookkeeping.
tracker = SensorTracker(
[
"app_warning_bit",
"app_error_bit",
"status_led_writes",
"status_led_last_state",
]
)
tracker.key_to_sensor.update(
build_key_to_entity_mapping(entities, list(tracker.sensor_states.keys()))
)
# Swallow initial state broadcasts so the test only reacts to
# state changes triggered by our service calls.
initial_state_helper = InitialStateHelper(entities)
client.subscribe_states(initial_state_helper.on_state_wrapper(tracker.on_state))
try:
await initial_state_helper.wait_for_initial_states()
except TimeoutError:
pytest.fail("Timeout waiting for initial states")
async def call(name: str) -> None:
await client.execute_service(svc[name], {})
async def call_and_expect_bits(
service_name: str, *, warning: float, error: float
) -> None:
"""Execute a service and wait for both app bit sensors to match.
Each bit-toggling service calls component.update on both
app_warning_bit and app_error_bit, so both sensors publish.
"""
futures = tracker.expect_all(
{"app_warning_bit": warning, "app_error_bit": error}
)
await call(service_name)
await tracker.await_all(futures)
async def snapshot_led_writes() -> int:
"""Trigger a publish of the fake status_led output counter and return it."""
future = tracker.expect_any("status_led_writes")
await call("snapshot_led")
await tracker.await_change(future, "status_led_writes")
return int(tracker.sensor_states["status_led_writes"][-1])
# ---- Baseline: everything clean ----
await call_and_expect_bits("clear_warning_a", warning=0.0, error=0.0)
# ================================================================
# Part 1 — STATUS_LED_WARNING propagation to App.app_state_
# ================================================================
# Single component set/clear
await call_and_expect_bits("set_warning_a", warning=1.0, error=0.0)
await call_and_expect_bits("clear_warning_a", warning=0.0, error=0.0)
# Multi-component OR: both set, clear A, bit stays (B still has it), clear B, gone
await call_and_expect_bits("set_warning_a", warning=1.0, error=0.0)
await call_and_expect_bits("set_warning_b", warning=1.0, error=0.0)
await call_and_expect_bits("clear_warning_a", warning=1.0, error=0.0)
await call_and_expect_bits("clear_warning_b", warning=0.0, error=0.0)
# Opposite clear order
await call_and_expect_bits("set_warning_a", warning=1.0, error=0.0)
await call_and_expect_bits("set_warning_b", warning=1.0, error=0.0)
await call_and_expect_bits("clear_warning_b", warning=1.0, error=0.0)
await call_and_expect_bits("clear_warning_a", warning=0.0, error=0.0)
# ================================================================
# Part 2 — STATUS_LED_ERROR propagation (same scenarios)
# ================================================================
await call_and_expect_bits("set_error_a", warning=0.0, error=1.0)
await call_and_expect_bits("clear_error_a", warning=0.0, error=0.0)
await call_and_expect_bits("set_error_a", warning=0.0, error=1.0)
await call_and_expect_bits("set_error_b", warning=0.0, error=1.0)
await call_and_expect_bits("clear_error_a", warning=0.0, error=1.0)
await call_and_expect_bits("clear_error_b", warning=0.0, error=0.0)
# ================================================================
# Part 3 — warning and error are independent
# ================================================================
await call_and_expect_bits("set_warning_a", warning=1.0, error=0.0)
await call_and_expect_bits("set_error_b", warning=1.0, error=1.0)
await call_and_expect_bits("clear_warning_a", warning=0.0, error=1.0)
await call_and_expect_bits("clear_error_b", warning=0.0, error=0.0)
# ================================================================
# Part 4 — status_led_light actually reads App.app_state_
# ================================================================
# The fake status_led_light output increments status_led_write_count
# on every write. status_led_light::loop() writes its output on every
# iteration while an error/warning bit is set, so after holding a
# warning for ~300 ms we should see the counter move significantly.
# This is the end-to-end proof that the bits we set above actually
# reach status_led_light and drive its behavior.
count_before_warning = await snapshot_led_writes()
await call_and_expect_bits("set_warning_a", warning=1.0, error=0.0)
# Let status_led_light's loop run long enough to toggle the pin
# several times (it reads get_app_state() every main loop iteration).
await asyncio.sleep(STATUS_LED_SETTLE_S)
count_after_warning = await snapshot_led_writes()
assert count_after_warning > count_before_warning, (
"status_led_light did not respond to STATUS_LED_WARNING being set: "
f"write count stayed at {count_before_warning}{count_after_warning}. "
"The full chain Component::status_set_warning → App.app_state_ → "
"status_led_light::loop reading get_app_state() is broken."
)
await call_and_expect_bits("clear_warning_a", warning=0.0, error=0.0)
# Same check for ERROR
count_before_error = await snapshot_led_writes()
await call_and_expect_bits("set_error_a", warning=0.0, error=1.0)
await asyncio.sleep(STATUS_LED_SETTLE_S)
count_after_error = await snapshot_led_writes()
assert count_after_error > count_before_error, (
"status_led_light did not respond to STATUS_LED_ERROR being set: "
f"write count stayed at {count_before_error}{count_after_error}. "
)
await call_and_expect_bits("clear_error_a", warning=0.0, error=0.0)
# ---- Set → clear → re-set round-trip ----
# After clearing, status_led_light stops writing (steady state).
# Re-setting the flag must make it resume. This guards against a
# future idle optimization (e.g. #15642) where status_led disables
# its own loop when idle: if the re-enable path were broken, the
# second set would not produce writes.
#
# Snapshot AFTER the clear to avoid counting writes that were still
# in-flight from the error-set phase.
count_after_clear = await snapshot_led_writes()
await asyncio.sleep(STATUS_LED_SETTLE_S)
count_after_idle = await snapshot_led_writes()
assert count_after_idle - count_after_clear <= 5, (
"status_led_light kept writing after warning/error was cleared: "
f"count grew from {count_after_clear} to {count_after_idle}. "
"Expected it to stop writing once all status bits were clear."
)
# Re-set warning — writes must resume.
await call_and_expect_bits("set_warning_a", warning=1.0, error=0.0)
await asyncio.sleep(STATUS_LED_SETTLE_S)
count_after_reset = await snapshot_led_writes()
assert count_after_reset > count_after_idle + 5, (
"status_led_light did not resume writing after re-setting "
f"STATUS_LED_WARNING: count went from {count_after_idle} to "
f"{count_after_reset}. If an idle optimization disabled the "
"loop, the re-enable path may be broken."
)
await call_and_expect_bits("clear_warning_a", warning=0.0, error=0.0)