[core] Replace scheduler pool vector with unbounded intrusive freelist (#16172)

This commit is contained in:
J. Nick Koston
2026-05-05 18:26:19 -05:00
committed by GitHub
parent f30ad588ea
commit 39b2b901f7
6 changed files with 115 additions and 67 deletions
+7
View File
@@ -25,6 +25,10 @@ namespace esphome {
static const char *const TAG = "app";
// Delay after setup() finishes before trimming the scheduler freelist of its post-boot peak.
// 10 s is well past the bulk of post-setup async work (Wi-Fi/MQTT connects, first-read latency).
static constexpr uint32_t SCHEDULER_FREELIST_TRIM_DELAY_MS = 10000;
// Helper function for insertion sort of components by priority
// Using insertion sort instead of std::stable_sort saves ~1.3KB of flash
// by avoiding template instantiations (std::rotate, std::stable_sort, lambdas)
@@ -112,6 +116,9 @@ void Application::setup() {
ESP_LOGI(TAG, "setup() finished successfully!");
// Trim the scheduler freelist of its post-boot peak once startup churn settles.
this->scheduler.set_timeout(this, SCHEDULER_FREELIST_TRIM_DELAY_MS, [this]() { this->scheduler.trim_freelist(); });
#ifdef USE_SETUP_PRIORITY_OVERRIDE
// Clear setup priority overrides to free memory
clear_setup_priority_overrides();
+63 -34
View File
@@ -14,18 +14,8 @@ namespace esphome {
static const char *const TAG = "scheduler";
// Memory pool configuration constants
// Pool size of 5 matches typical usage patterns (2-4 active timers)
// - Minimal memory overhead (~250 bytes on ESP32)
// - Sufficient for most configs with a couple sensors/components
// - Still prevents heap fragmentation and allocation stalls
// - Complex setups with many timers will just allocate beyond the pool
// See https://github.com/esphome/backlog/issues/52
static constexpr size_t MAX_POOL_SIZE = 5;
// Maximum number of logically deleted (cancelled) items before forcing cleanup.
// Set to 5 to match the pool size - when we have as many cancelled items as our
// pool can hold, it's time to clean up and recycle them.
// Empirically chosen to balance cleanup overhead against tombstone accumulation in items_.
static constexpr uint32_t MAX_LOGICALLY_DELETED_ITEMS = 5;
// max delay to start an interval sequence
static constexpr uint32_t MAX_INTERVAL_DELAY = 5000;
@@ -165,7 +155,7 @@ void HOT Scheduler::set_timer_common_(Component *component, SchedulerItem::Type
delay = 1;
}
// Take lock early to protect scheduler_item_pool_ access and retry-cancelled check
// Take lock early to protect scheduler_item_pool_head_ access and retry-cancelled check
LockGuard guard{this->lock_};
// For retries, check if there's a cancelled timeout first - before allocating an item.
@@ -599,7 +589,7 @@ uint32_t HOT Scheduler::call(uint32_t now) {
if (now_64 - last_print > 2000) {
last_print = now_64;
std::vector<SchedulerItem *> old_items;
ESP_LOGD(TAG, "Items: count=%zu, pool=%zu, now=%" PRIu64, this->items_.size(), this->scheduler_item_pool_.size(),
ESP_LOGD(TAG, "Items: count=%zu, pool=%zu, now=%" PRIu64, this->items_.size(), this->scheduler_item_pool_size_,
now_64);
// Cleanup before debug output
this->cleanup_();
@@ -894,30 +884,68 @@ bool HOT Scheduler::SchedulerItem::cmp(SchedulerItem *a, SchedulerItem *b) {
: (a->next_execution_high_ > b->next_execution_high_);
}
// Recycle a SchedulerItem back to the pool for reuse.
// IMPORTANT: Caller must hold the scheduler lock before calling this function.
// This protects scheduler_item_pool_ from concurrent access by other threads
// that may be acquiring items from the pool in set_timer_common_().
// Recycle a SchedulerItem back to the freelist for reuse.
// IMPORTANT: Caller must hold the scheduler lock.
void Scheduler::recycle_item_main_loop_(SchedulerItem *item) {
if (item == nullptr)
return;
if (this->scheduler_item_pool_.size() < MAX_POOL_SIZE) {
// Clear callback to release captured resources
item->callback = nullptr;
this->scheduler_item_pool_.push_back(item);
item->callback = nullptr; // release captured resources
item->next_free = this->scheduler_item_pool_head_;
this->scheduler_item_pool_head_ = item;
this->scheduler_item_pool_size_++;
#ifdef ESPHOME_DEBUG_SCHEDULER
ESP_LOGD(TAG, "Recycled item to pool (pool size now: %zu)", this->scheduler_item_pool_.size());
#endif
} else {
#ifdef ESPHOME_DEBUG_SCHEDULER
ESP_LOGD(TAG, "Pool full (size: %zu), deleting item", this->scheduler_item_pool_.size());
ESP_LOGD(TAG, "Recycled item to pool (pool size now: %zu)", this->scheduler_item_pool_size_);
#endif
}
// Shrink a SchedulerItem* vector's capacity to its current size.
// std::vector::shrink_to_fit() is non-binding and our toolchain ignores it; the classic
// swap-with-copy idiom (std::vector<T>(other).swap(other)) instantiates the iterator-range
// constructor which pulls in std::__throw_bad_array_new_length and ~120 B of related
// stdlib RTTI/typeinfo. Build into a temp via reserve + push_back instead, then move-assign:
// reserve uses operator new (throws bad_alloc, already linked) and push_back without growth
// is the noexcept tail path. Move-assign just swaps pointers.
// Out-of-line + noinline so the callers in trim_freelist() share one body.
void __attribute__((noinline)) Scheduler::shrink_scheduler_vector_(std::vector<SchedulerItem *> *v) {
if (v->capacity() == v->size())
return; // already exact, common after a quiet period
std::vector<SchedulerItem *> tmp;
tmp.reserve(v->size());
for (SchedulerItem *p : *v)
tmp.push_back(p);
*v = std::move(tmp);
}
void Scheduler::trim_freelist() {
LockGuard guard{this->lock_};
SchedulerItem *item = this->scheduler_item_pool_head_;
size_t freed = 0;
while (item != nullptr) {
SchedulerItem *next = item->next_free;
delete item;
#ifdef ESPHOME_DEBUG_SCHEDULER
this->debug_live_items_--;
#endif
item = next;
freed++;
}
this->scheduler_item_pool_head_ = nullptr;
this->scheduler_item_pool_size_ = 0;
// items_/to_add_/defer_queue_ retain their boot-peak vector capacity (vector grows
// by doubling and otherwise keeps the peak). Reclaim that slack as well.
shrink_scheduler_vector_(&this->items_);
shrink_scheduler_vector_(&this->to_add_);
#ifndef ESPHOME_THREAD_SINGLE
shrink_scheduler_vector_(&this->defer_queue_);
#endif
#ifdef ESPHOME_DEBUG_SCHEDULER
ESP_LOGD(TAG, "Freelist trimmed (%zu items freed)", freed);
#else
(void) freed;
#endif
}
#ifdef ESPHOME_DEBUG_SCHEDULER
@@ -942,14 +970,15 @@ void Scheduler::debug_log_timer_(const SchedulerItem *item, NameType name_type,
}
#endif /* ESPHOME_DEBUG_SCHEDULER */
// Helper to get or create a scheduler item from the pool
// IMPORTANT: Caller must hold the scheduler lock before calling this function.
// Pop from freelist or allocate. IMPORTANT: caller must hold the lock and must overwrite
// `item->component` before releasing it -- the popped slot still holds the freelist link.
Scheduler::SchedulerItem *Scheduler::get_item_from_pool_locked_() {
if (!this->scheduler_item_pool_.empty()) {
SchedulerItem *item = this->scheduler_item_pool_.back();
this->scheduler_item_pool_.pop_back();
if (this->scheduler_item_pool_head_ != nullptr) {
SchedulerItem *item = this->scheduler_item_pool_head_;
this->scheduler_item_pool_head_ = item->next_free;
this->scheduler_item_pool_size_--;
#ifdef ESPHOME_DEBUG_SCHEDULER
ESP_LOGD(TAG, "Reused item from pool (pool size now: %zu)", this->scheduler_item_pool_.size());
ESP_LOGD(TAG, "Reused item from pool (pool size now: %zu)", this->scheduler_item_pool_size_);
#endif
return item;
}
@@ -967,7 +996,7 @@ Scheduler::SchedulerItem *Scheduler::get_item_from_pool_locked_() {
bool Scheduler::debug_verify_no_leak_() const {
// Invariant: every live SchedulerItem must be in exactly one container.
// debug_live_items_ tracks allocations minus deletions.
size_t accounted = this->items_.size() + this->to_add_.size() + this->scheduler_item_pool_.size();
size_t accounted = this->items_.size() + this->to_add_.size() + this->scheduler_item_pool_size_;
#ifndef ESPHOME_THREAD_SINGLE
accounted += this->defer_queue_.size();
#endif
@@ -981,7 +1010,7 @@ bool Scheduler::debug_verify_no_leak_() const {
")",
static_cast<uint32_t>(this->debug_live_items_), static_cast<uint32_t>(accounted),
static_cast<uint32_t>(this->items_.size()), static_cast<uint32_t>(this->to_add_.size()),
static_cast<uint32_t>(this->scheduler_item_pool_.size())
static_cast<uint32_t>(this->scheduler_item_pool_size_)
#ifndef ESPHOME_THREAD_SINGLE
,
static_cast<uint32_t>(this->defer_queue_.size())
+22 -12
View File
@@ -132,6 +132,12 @@ class Scheduler {
// @return Timestamp of the last item that ran, or `now` unchanged if none ran.
uint32_t call(uint32_t now);
// Reclaim memory held by the post-boot peak. Frees every SchedulerItem in the
// recycle freelist and shrinks items_/to_add_/defer_queue_ vector capacity to
// their current sizes (std::vector grows by doubling and otherwise retains the
// peak). Live items in those vectors are preserved.
void trim_freelist();
// Move items from to_add_ into the main heap.
// IMPORTANT: This method should only be called from the main thread (loop task).
// Inlined: the fast path (nothing to add) is just an atomic load / empty check.
@@ -177,8 +183,12 @@ class Scheduler {
protected:
struct SchedulerItem {
// Ordered by size to minimize padding
Component *component;
// Ordered by size to minimize padding.
// `component` while live; `next_free` while in scheduler_item_pool_head_ (mutually exclusive).
union {
Component *component;
SchedulerItem *next_free;
};
// Optimized name storage using tagged union - zero heap allocation
union {
const char *static_name; // For STATIC_STRING (string literals) and SELF_POINTER (caller's `this`)
@@ -355,6 +365,10 @@ class Scheduler {
SchedulerItem *get_item_from_pool_locked_();
private:
// Out-of-line helper that shrinks a SchedulerItem* vector's capacity to its current
// size. Centralised so trim_freelist() doesn't pay flash cost per call site.
void shrink_scheduler_vector_(std::vector<SchedulerItem *> *v);
// Helper to cancel matching items - must be called with lock held.
// When find_first=true, stops after the first match (used by set_timer_common_ where
// the cancel-before-add invariant guarantees at most one match).
@@ -713,19 +727,15 @@ class Scheduler {
#endif
}
// Memory pool for recycling SchedulerItem objects to reduce heap churn.
// Design decisions:
// - std::vector is used instead of a fixed array because many systems only need 1-2 scheduler items
// - The vector grows dynamically up to MAX_POOL_SIZE (5) only when needed, saving memory on simple setups
// - Pool size of 5 matches typical usage (2-4 timers) while keeping memory overhead low (~250 bytes on ESP32)
// - The pool significantly reduces heap fragmentation which is critical because heap allocation/deallocation
// can stall the entire system, causing timing issues and dropped events for any components that need
// to synchronize between tasks (see https://github.com/esphome/backlog/issues/52)
std::vector<SchedulerItem *> scheduler_item_pool_;
// Intrusive freelist threaded through SchedulerItem::next_free. Unbounded so it quiesces at the
// app's concurrent-timer high-water mark; the previous fixed cap caused steady-state new/delete
// churn on devices with many timers (see https://github.com/esphome/backlog/issues/52).
SchedulerItem *scheduler_item_pool_head_{nullptr};
size_t scheduler_item_pool_size_{0};
#ifdef ESPHOME_DEBUG_SCHEDULER
// Leak detection: tracks total live SchedulerItem allocations.
// Invariant: debug_live_items_ == items_.size() + to_add_.size() + defer_queue_.size() + scheduler_item_pool_.size()
// Invariant: debug_live_items_ == items_.size() + to_add_.size() + defer_queue_.size() + scheduler_item_pool_size_
// Verified periodically in call() to catch leaks early.
size_t debug_live_items_{0};
+5 -5
View File
@@ -101,8 +101,8 @@ static void Scheduler_SetTimeout(benchmark::State &state) {
Component dummy_component;
// Register 3 timeouts then call() — realistic worst case where multiple
// components schedule in the same loop iteration. Keeps item count within
// the recycling pool (MAX_POOL_SIZE=5) to avoid spurious malloc/free.
// components schedule in the same loop iteration. warm_pool fills the
// freelist so acquire/recycle never falls back to malloc.
static constexpr int kBatchSize = 3;
static_assert(kInnerIterations % kBatchSize == 0, "kInnerIterations must be divisible by kBatchSize");
warm_pool(scheduler, &dummy_component, kBatchSize, 1000);
@@ -209,9 +209,9 @@ static void Scheduler_SetTimeout_ExceedPool(benchmark::State &state) {
Scheduler scheduler;
Component dummy_component;
// Register 10 timeouts then call() — exceeds MAX_POOL_SIZE=5 to measure
// the performance cliff when the recycling pool is exhausted and items
// must be malloc'd/freed.
// Register 10 timeouts then call() — larger working set than the 3-item
// batches above. With the unbounded freelist, warm_pool preallocates 10
// items so this measures steady-state, not malloc cliff.
static constexpr int kBatchSize = 10;
static_assert(kInnerIterations % kBatchSize == 0, "kInnerIterations must be divisible by kBatchSize");
warm_pool(scheduler, &dummy_component, kBatchSize, 1000);
@@ -221,14 +221,10 @@ script:
- id: test_full_pool_reuse
then:
- lambda: |-
ESP_LOGI("test", "Phase 6: Testing pool size limits after Phase 5 items complete");
ESP_LOGI("test", "Phase 6: Testing pool reuse after Phase 5 items complete");
// At this point, all Phase 5 timeouts should have completed and been recycled.
// The pool should be at its maximum size (5).
// Creating 10 new items tests that:
// - First 5 items reuse from the pool
// - Remaining 5 items allocate new (pool empty)
// - Pool doesn't grow beyond MAX_POOL_SIZE of 5
// Phase 5 timeouts have completed and been recycled. The freelist is unbounded;
// creating 10 new items reuses from it and only allocates fresh when empty.
auto *component = id(test_sensor);
int full_reuse_count = 10;
+15 -9
View File
@@ -180,16 +180,22 @@ async def test_scheduler_pool(
# Verify pool behavior
assert pool_recycle_count > 0, "Should have recycled items to pool"
# Check pool metrics
if pool_recycle_count > 0:
max_pool_size = 0
for line in log_lines:
if match := recycle_pattern.search(line):
size = int(match.group(1))
max_pool_size = max(max_pool_size, size)
# Pool is unbounded; the cap was the source of the churn it was meant to prevent.
assert pool_full_count == 0, (
f"Pool should never report full (got {pool_full_count})"
)
# Pool can grow up to its maximum of 5
assert max_pool_size <= 5, f"Pool grew beyond maximum ({max_pool_size})"
# Verify the pool actually grew past the old MAX_POOL_SIZE=5 cap.
# Phase 5 + Phase 6 schedule 8 + 10 same-component timeouts respectively, so the
# observed peak should comfortably exceed 5. Without this lower-bound check, a
# silent regression that re-introduced a small cap could pass the test above.
max_pool_size = 0
for line in log_lines:
if match := recycle_pattern.search(line):
max_pool_size = max(max_pool_size, int(match.group(1)))
assert max_pool_size > 5, (
f"Pool should grow past the old cap of 5; observed peak {max_pool_size}"
)
# Log summary for debugging
print("\nScheduler Pool Test Summary (Python Orchestrated):")