diff --git a/esphome/core/application.cpp b/esphome/core/application.cpp index d03696fbb6..38d3503c2c 100644 --- a/esphome/core/application.cpp +++ b/esphome/core/application.cpp @@ -25,6 +25,10 @@ namespace esphome { static const char *const TAG = "app"; +// Delay after setup() finishes before trimming the scheduler freelist of its post-boot peak. +// 10 s is well past the bulk of post-setup async work (Wi-Fi/MQTT connects, first-read latency). +static constexpr uint32_t SCHEDULER_FREELIST_TRIM_DELAY_MS = 10000; + // Helper function for insertion sort of components by priority // Using insertion sort instead of std::stable_sort saves ~1.3KB of flash // by avoiding template instantiations (std::rotate, std::stable_sort, lambdas) @@ -112,6 +116,9 @@ void Application::setup() { ESP_LOGI(TAG, "setup() finished successfully!"); + // Trim the scheduler freelist of its post-boot peak once startup churn settles. + this->scheduler.set_timeout(this, SCHEDULER_FREELIST_TRIM_DELAY_MS, [this]() { this->scheduler.trim_freelist(); }); + #ifdef USE_SETUP_PRIORITY_OVERRIDE // Clear setup priority overrides to free memory clear_setup_priority_overrides(); diff --git a/esphome/core/scheduler.cpp b/esphome/core/scheduler.cpp index 57deeab0da..a7c624486d 100644 --- a/esphome/core/scheduler.cpp +++ b/esphome/core/scheduler.cpp @@ -14,18 +14,8 @@ namespace esphome { static const char *const TAG = "scheduler"; -// Memory pool configuration constants -// Pool size of 5 matches typical usage patterns (2-4 active timers) -// - Minimal memory overhead (~250 bytes on ESP32) -// - Sufficient for most configs with a couple sensors/components -// - Still prevents heap fragmentation and allocation stalls -// - Complex setups with many timers will just allocate beyond the pool -// See https://github.com/esphome/backlog/issues/52 -static constexpr size_t MAX_POOL_SIZE = 5; - // Maximum number of logically deleted (cancelled) items before forcing cleanup. -// Set to 5 to match the pool size - when we have as many cancelled items as our -// pool can hold, it's time to clean up and recycle them. +// Empirically chosen to balance cleanup overhead against tombstone accumulation in items_. static constexpr uint32_t MAX_LOGICALLY_DELETED_ITEMS = 5; // max delay to start an interval sequence static constexpr uint32_t MAX_INTERVAL_DELAY = 5000; @@ -165,7 +155,7 @@ void HOT Scheduler::set_timer_common_(Component *component, SchedulerItem::Type delay = 1; } - // Take lock early to protect scheduler_item_pool_ access and retry-cancelled check + // Take lock early to protect scheduler_item_pool_head_ access and retry-cancelled check LockGuard guard{this->lock_}; // For retries, check if there's a cancelled timeout first - before allocating an item. @@ -599,7 +589,7 @@ uint32_t HOT Scheduler::call(uint32_t now) { if (now_64 - last_print > 2000) { last_print = now_64; std::vector old_items; - ESP_LOGD(TAG, "Items: count=%zu, pool=%zu, now=%" PRIu64, this->items_.size(), this->scheduler_item_pool_.size(), + ESP_LOGD(TAG, "Items: count=%zu, pool=%zu, now=%" PRIu64, this->items_.size(), this->scheduler_item_pool_size_, now_64); // Cleanup before debug output this->cleanup_(); @@ -894,30 +884,68 @@ bool HOT Scheduler::SchedulerItem::cmp(SchedulerItem *a, SchedulerItem *b) { : (a->next_execution_high_ > b->next_execution_high_); } -// Recycle a SchedulerItem back to the pool for reuse. -// IMPORTANT: Caller must hold the scheduler lock before calling this function. -// This protects scheduler_item_pool_ from concurrent access by other threads -// that may be acquiring items from the pool in set_timer_common_(). +// Recycle a SchedulerItem back to the freelist for reuse. +// IMPORTANT: Caller must hold the scheduler lock. void Scheduler::recycle_item_main_loop_(SchedulerItem *item) { if (item == nullptr) return; - if (this->scheduler_item_pool_.size() < MAX_POOL_SIZE) { - // Clear callback to release captured resources - item->callback = nullptr; - this->scheduler_item_pool_.push_back(item); + item->callback = nullptr; // release captured resources + item->next_free = this->scheduler_item_pool_head_; + this->scheduler_item_pool_head_ = item; + this->scheduler_item_pool_size_++; #ifdef ESPHOME_DEBUG_SCHEDULER - ESP_LOGD(TAG, "Recycled item to pool (pool size now: %zu)", this->scheduler_item_pool_.size()); -#endif - } else { -#ifdef ESPHOME_DEBUG_SCHEDULER - ESP_LOGD(TAG, "Pool full (size: %zu), deleting item", this->scheduler_item_pool_.size()); + ESP_LOGD(TAG, "Recycled item to pool (pool size now: %zu)", this->scheduler_item_pool_size_); #endif +} + +// Shrink a SchedulerItem* vector's capacity to its current size. +// std::vector::shrink_to_fit() is non-binding and our toolchain ignores it; the classic +// swap-with-copy idiom (std::vector(other).swap(other)) instantiates the iterator-range +// constructor which pulls in std::__throw_bad_array_new_length and ~120 B of related +// stdlib RTTI/typeinfo. Build into a temp via reserve + push_back instead, then move-assign: +// reserve uses operator new (throws bad_alloc, already linked) and push_back without growth +// is the noexcept tail path. Move-assign just swaps pointers. +// Out-of-line + noinline so the callers in trim_freelist() share one body. +void __attribute__((noinline)) Scheduler::shrink_scheduler_vector_(std::vector *v) { + if (v->capacity() == v->size()) + return; // already exact, common after a quiet period + std::vector tmp; + tmp.reserve(v->size()); + for (SchedulerItem *p : *v) + tmp.push_back(p); + *v = std::move(tmp); +} + +void Scheduler::trim_freelist() { + LockGuard guard{this->lock_}; + SchedulerItem *item = this->scheduler_item_pool_head_; + size_t freed = 0; + while (item != nullptr) { + SchedulerItem *next = item->next_free; delete item; #ifdef ESPHOME_DEBUG_SCHEDULER this->debug_live_items_--; #endif + item = next; + freed++; } + this->scheduler_item_pool_head_ = nullptr; + this->scheduler_item_pool_size_ = 0; + + // items_/to_add_/defer_queue_ retain their boot-peak vector capacity (vector grows + // by doubling and otherwise keeps the peak). Reclaim that slack as well. + shrink_scheduler_vector_(&this->items_); + shrink_scheduler_vector_(&this->to_add_); +#ifndef ESPHOME_THREAD_SINGLE + shrink_scheduler_vector_(&this->defer_queue_); +#endif + +#ifdef ESPHOME_DEBUG_SCHEDULER + ESP_LOGD(TAG, "Freelist trimmed (%zu items freed)", freed); +#else + (void) freed; +#endif } #ifdef ESPHOME_DEBUG_SCHEDULER @@ -942,14 +970,15 @@ void Scheduler::debug_log_timer_(const SchedulerItem *item, NameType name_type, } #endif /* ESPHOME_DEBUG_SCHEDULER */ -// Helper to get or create a scheduler item from the pool -// IMPORTANT: Caller must hold the scheduler lock before calling this function. +// Pop from freelist or allocate. IMPORTANT: caller must hold the lock and must overwrite +// `item->component` before releasing it -- the popped slot still holds the freelist link. Scheduler::SchedulerItem *Scheduler::get_item_from_pool_locked_() { - if (!this->scheduler_item_pool_.empty()) { - SchedulerItem *item = this->scheduler_item_pool_.back(); - this->scheduler_item_pool_.pop_back(); + if (this->scheduler_item_pool_head_ != nullptr) { + SchedulerItem *item = this->scheduler_item_pool_head_; + this->scheduler_item_pool_head_ = item->next_free; + this->scheduler_item_pool_size_--; #ifdef ESPHOME_DEBUG_SCHEDULER - ESP_LOGD(TAG, "Reused item from pool (pool size now: %zu)", this->scheduler_item_pool_.size()); + ESP_LOGD(TAG, "Reused item from pool (pool size now: %zu)", this->scheduler_item_pool_size_); #endif return item; } @@ -967,7 +996,7 @@ Scheduler::SchedulerItem *Scheduler::get_item_from_pool_locked_() { bool Scheduler::debug_verify_no_leak_() const { // Invariant: every live SchedulerItem must be in exactly one container. // debug_live_items_ tracks allocations minus deletions. - size_t accounted = this->items_.size() + this->to_add_.size() + this->scheduler_item_pool_.size(); + size_t accounted = this->items_.size() + this->to_add_.size() + this->scheduler_item_pool_size_; #ifndef ESPHOME_THREAD_SINGLE accounted += this->defer_queue_.size(); #endif @@ -981,7 +1010,7 @@ bool Scheduler::debug_verify_no_leak_() const { ")", static_cast(this->debug_live_items_), static_cast(accounted), static_cast(this->items_.size()), static_cast(this->to_add_.size()), - static_cast(this->scheduler_item_pool_.size()) + static_cast(this->scheduler_item_pool_size_) #ifndef ESPHOME_THREAD_SINGLE , static_cast(this->defer_queue_.size()) diff --git a/esphome/core/scheduler.h b/esphome/core/scheduler.h index 7a6be6bea9..b640aa86fe 100644 --- a/esphome/core/scheduler.h +++ b/esphome/core/scheduler.h @@ -132,6 +132,12 @@ class Scheduler { // @return Timestamp of the last item that ran, or `now` unchanged if none ran. uint32_t call(uint32_t now); + // Reclaim memory held by the post-boot peak. Frees every SchedulerItem in the + // recycle freelist and shrinks items_/to_add_/defer_queue_ vector capacity to + // their current sizes (std::vector grows by doubling and otherwise retains the + // peak). Live items in those vectors are preserved. + void trim_freelist(); + // Move items from to_add_ into the main heap. // IMPORTANT: This method should only be called from the main thread (loop task). // Inlined: the fast path (nothing to add) is just an atomic load / empty check. @@ -177,8 +183,12 @@ class Scheduler { protected: struct SchedulerItem { - // Ordered by size to minimize padding - Component *component; + // Ordered by size to minimize padding. + // `component` while live; `next_free` while in scheduler_item_pool_head_ (mutually exclusive). + union { + Component *component; + SchedulerItem *next_free; + }; // Optimized name storage using tagged union - zero heap allocation union { const char *static_name; // For STATIC_STRING (string literals) and SELF_POINTER (caller's `this`) @@ -355,6 +365,10 @@ class Scheduler { SchedulerItem *get_item_from_pool_locked_(); private: + // Out-of-line helper that shrinks a SchedulerItem* vector's capacity to its current + // size. Centralised so trim_freelist() doesn't pay flash cost per call site. + void shrink_scheduler_vector_(std::vector *v); + // Helper to cancel matching items - must be called with lock held. // When find_first=true, stops after the first match (used by set_timer_common_ where // the cancel-before-add invariant guarantees at most one match). @@ -713,19 +727,15 @@ class Scheduler { #endif } - // Memory pool for recycling SchedulerItem objects to reduce heap churn. - // Design decisions: - // - std::vector is used instead of a fixed array because many systems only need 1-2 scheduler items - // - The vector grows dynamically up to MAX_POOL_SIZE (5) only when needed, saving memory on simple setups - // - Pool size of 5 matches typical usage (2-4 timers) while keeping memory overhead low (~250 bytes on ESP32) - // - The pool significantly reduces heap fragmentation which is critical because heap allocation/deallocation - // can stall the entire system, causing timing issues and dropped events for any components that need - // to synchronize between tasks (see https://github.com/esphome/backlog/issues/52) - std::vector scheduler_item_pool_; + // Intrusive freelist threaded through SchedulerItem::next_free. Unbounded so it quiesces at the + // app's concurrent-timer high-water mark; the previous fixed cap caused steady-state new/delete + // churn on devices with many timers (see https://github.com/esphome/backlog/issues/52). + SchedulerItem *scheduler_item_pool_head_{nullptr}; + size_t scheduler_item_pool_size_{0}; #ifdef ESPHOME_DEBUG_SCHEDULER // Leak detection: tracks total live SchedulerItem allocations. - // Invariant: debug_live_items_ == items_.size() + to_add_.size() + defer_queue_.size() + scheduler_item_pool_.size() + // Invariant: debug_live_items_ == items_.size() + to_add_.size() + defer_queue_.size() + scheduler_item_pool_size_ // Verified periodically in call() to catch leaks early. size_t debug_live_items_{0}; diff --git a/tests/benchmarks/core/bench_scheduler.cpp b/tests/benchmarks/core/bench_scheduler.cpp index 214fe0e4b8..32bbc2de88 100644 --- a/tests/benchmarks/core/bench_scheduler.cpp +++ b/tests/benchmarks/core/bench_scheduler.cpp @@ -101,8 +101,8 @@ static void Scheduler_SetTimeout(benchmark::State &state) { Component dummy_component; // Register 3 timeouts then call() — realistic worst case where multiple - // components schedule in the same loop iteration. Keeps item count within - // the recycling pool (MAX_POOL_SIZE=5) to avoid spurious malloc/free. + // components schedule in the same loop iteration. warm_pool fills the + // freelist so acquire/recycle never falls back to malloc. static constexpr int kBatchSize = 3; static_assert(kInnerIterations % kBatchSize == 0, "kInnerIterations must be divisible by kBatchSize"); warm_pool(scheduler, &dummy_component, kBatchSize, 1000); @@ -209,9 +209,9 @@ static void Scheduler_SetTimeout_ExceedPool(benchmark::State &state) { Scheduler scheduler; Component dummy_component; - // Register 10 timeouts then call() — exceeds MAX_POOL_SIZE=5 to measure - // the performance cliff when the recycling pool is exhausted and items - // must be malloc'd/freed. + // Register 10 timeouts then call() — larger working set than the 3-item + // batches above. With the unbounded freelist, warm_pool preallocates 10 + // items so this measures steady-state, not malloc cliff. static constexpr int kBatchSize = 10; static_assert(kInnerIterations % kBatchSize == 0, "kInnerIterations must be divisible by kBatchSize"); warm_pool(scheduler, &dummy_component, kBatchSize, 1000); diff --git a/tests/integration/fixtures/scheduler_pool.yaml b/tests/integration/fixtures/scheduler_pool.yaml index 5389125188..989c1535b0 100644 --- a/tests/integration/fixtures/scheduler_pool.yaml +++ b/tests/integration/fixtures/scheduler_pool.yaml @@ -221,14 +221,10 @@ script: - id: test_full_pool_reuse then: - lambda: |- - ESP_LOGI("test", "Phase 6: Testing pool size limits after Phase 5 items complete"); + ESP_LOGI("test", "Phase 6: Testing pool reuse after Phase 5 items complete"); - // At this point, all Phase 5 timeouts should have completed and been recycled. - // The pool should be at its maximum size (5). - // Creating 10 new items tests that: - // - First 5 items reuse from the pool - // - Remaining 5 items allocate new (pool empty) - // - Pool doesn't grow beyond MAX_POOL_SIZE of 5 + // Phase 5 timeouts have completed and been recycled. The freelist is unbounded; + // creating 10 new items reuses from it and only allocates fresh when empty. auto *component = id(test_sensor); int full_reuse_count = 10; diff --git a/tests/integration/test_scheduler_pool.py b/tests/integration/test_scheduler_pool.py index 021917cc25..cc25190e30 100644 --- a/tests/integration/test_scheduler_pool.py +++ b/tests/integration/test_scheduler_pool.py @@ -180,16 +180,22 @@ async def test_scheduler_pool( # Verify pool behavior assert pool_recycle_count > 0, "Should have recycled items to pool" - # Check pool metrics - if pool_recycle_count > 0: - max_pool_size = 0 - for line in log_lines: - if match := recycle_pattern.search(line): - size = int(match.group(1)) - max_pool_size = max(max_pool_size, size) + # Pool is unbounded; the cap was the source of the churn it was meant to prevent. + assert pool_full_count == 0, ( + f"Pool should never report full (got {pool_full_count})" + ) - # Pool can grow up to its maximum of 5 - assert max_pool_size <= 5, f"Pool grew beyond maximum ({max_pool_size})" + # Verify the pool actually grew past the old MAX_POOL_SIZE=5 cap. + # Phase 5 + Phase 6 schedule 8 + 10 same-component timeouts respectively, so the + # observed peak should comfortably exceed 5. Without this lower-bound check, a + # silent regression that re-introduced a small cap could pass the test above. + max_pool_size = 0 + for line in log_lines: + if match := recycle_pattern.search(line): + max_pool_size = max(max_pool_size, int(match.group(1))) + assert max_pool_size > 5, ( + f"Pool should grow past the old cap of 5; observed peak {max_pool_size}" + ) # Log summary for debugging print("\nScheduler Pool Test Summary (Python Orchestrated):")