mirror of
https://github.com/esphome/esphome.git
synced 2026-05-19 03:01:49 +08:00
[core] Replace scheduler pool vector with unbounded intrusive freelist
The fixed MAX_POOL_SIZE=5 cap was the source of the heap churn the pool was meant to prevent: any device with more than 5 concurrent timers (e.g. a board with 30+ LD2450 sensors) hit a steady-state oscillation of recycle->delete and acquire->new on every loop iteration. Replace std::vector<SchedulerItem*> with a singly-linked freelist threaded through SchedulerItem::next_free, which shares storage with `component` via an anonymous union (zero per-item overhead -- the component pointer is dead while pooled). Drop the cap entirely: the freelist quiesces at the application's natural concurrent-timer high-water mark, which is the working set the device already needs while those timers are active. No std::vector means no growth-doubling slack and no realloc copies during warm-up. Caller of get_item_from_pool_locked_() must overwrite item->component before unlocking (already true at the sole call site); nullptr remains a valid live `component` value for SELF_POINTER items, so we cannot pre-clear it.
This commit is contained in:
+19
-39
@@ -14,18 +14,8 @@ namespace esphome {
|
||||
|
||||
static const char *const TAG = "scheduler";
|
||||
|
||||
// Memory pool configuration constants
|
||||
// Pool size of 5 matches typical usage patterns (2-4 active timers)
|
||||
// - Minimal memory overhead (~250 bytes on ESP32)
|
||||
// - Sufficient for most configs with a couple sensors/components
|
||||
// - Still prevents heap fragmentation and allocation stalls
|
||||
// - Complex setups with many timers will just allocate beyond the pool
|
||||
// See https://github.com/esphome/backlog/issues/52
|
||||
static constexpr size_t MAX_POOL_SIZE = 5;
|
||||
|
||||
// Maximum number of logically deleted (cancelled) items before forcing cleanup.
|
||||
// Set to 5 to match the pool size - when we have as many cancelled items as our
|
||||
// pool can hold, it's time to clean up and recycle them.
|
||||
// Empirically chosen to balance cleanup overhead against tombstone accumulation in items_.
|
||||
static constexpr uint32_t MAX_LOGICALLY_DELETED_ITEMS = 5;
|
||||
// max delay to start an interval sequence
|
||||
static constexpr uint32_t MAX_INTERVAL_DELAY = 5000;
|
||||
@@ -165,7 +155,7 @@ void HOT Scheduler::set_timer_common_(Component *component, SchedulerItem::Type
|
||||
delay = 1;
|
||||
}
|
||||
|
||||
// Take lock early to protect scheduler_item_pool_ access and retry-cancelled check
|
||||
// Take lock early to protect scheduler_item_pool_head_ access and retry-cancelled check
|
||||
LockGuard guard{this->lock_};
|
||||
|
||||
// For retries, check if there's a cancelled timeout first - before allocating an item.
|
||||
@@ -599,7 +589,7 @@ uint32_t HOT Scheduler::call(uint32_t now) {
|
||||
if (now_64 - last_print > 2000) {
|
||||
last_print = now_64;
|
||||
std::vector<SchedulerItem *> old_items;
|
||||
ESP_LOGD(TAG, "Items: count=%zu, pool=%zu, now=%" PRIu64, this->items_.size(), this->scheduler_item_pool_.size(),
|
||||
ESP_LOGD(TAG, "Items: count=%zu, pool=%zu, now=%" PRIu64, this->items_.size(), this->scheduler_item_pool_size_,
|
||||
now_64);
|
||||
// Cleanup before debug output
|
||||
this->cleanup_();
|
||||
@@ -894,30 +884,19 @@ bool HOT Scheduler::SchedulerItem::cmp(SchedulerItem *a, SchedulerItem *b) {
|
||||
: (a->next_execution_high_ > b->next_execution_high_);
|
||||
}
|
||||
|
||||
// Recycle a SchedulerItem back to the pool for reuse.
|
||||
// IMPORTANT: Caller must hold the scheduler lock before calling this function.
|
||||
// This protects scheduler_item_pool_ from concurrent access by other threads
|
||||
// that may be acquiring items from the pool in set_timer_common_().
|
||||
// Recycle a SchedulerItem back to the freelist for reuse.
|
||||
// IMPORTANT: Caller must hold the scheduler lock.
|
||||
void Scheduler::recycle_item_main_loop_(SchedulerItem *item) {
|
||||
if (item == nullptr)
|
||||
return;
|
||||
|
||||
if (this->scheduler_item_pool_.size() < MAX_POOL_SIZE) {
|
||||
// Clear callback to release captured resources
|
||||
item->callback = nullptr;
|
||||
this->scheduler_item_pool_.push_back(item);
|
||||
item->callback = nullptr; // release captured resources
|
||||
item->next_free = this->scheduler_item_pool_head_;
|
||||
this->scheduler_item_pool_head_ = item;
|
||||
this->scheduler_item_pool_size_++;
|
||||
#ifdef ESPHOME_DEBUG_SCHEDULER
|
||||
ESP_LOGD(TAG, "Recycled item to pool (pool size now: %zu)", this->scheduler_item_pool_.size());
|
||||
ESP_LOGD(TAG, "Recycled item to pool (pool size now: %zu)", this->scheduler_item_pool_size_);
|
||||
#endif
|
||||
} else {
|
||||
#ifdef ESPHOME_DEBUG_SCHEDULER
|
||||
ESP_LOGD(TAG, "Pool full (size: %zu), deleting item", this->scheduler_item_pool_.size());
|
||||
#endif
|
||||
delete item;
|
||||
#ifdef ESPHOME_DEBUG_SCHEDULER
|
||||
this->debug_live_items_--;
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef ESPHOME_DEBUG_SCHEDULER
|
||||
@@ -942,14 +921,15 @@ void Scheduler::debug_log_timer_(const SchedulerItem *item, NameType name_type,
|
||||
}
|
||||
#endif /* ESPHOME_DEBUG_SCHEDULER */
|
||||
|
||||
// Helper to get or create a scheduler item from the pool
|
||||
// IMPORTANT: Caller must hold the scheduler lock before calling this function.
|
||||
// Pop from freelist or allocate. IMPORTANT: caller must hold the lock and must overwrite
|
||||
// `item->component` before releasing it -- the popped slot still holds the freelist link.
|
||||
Scheduler::SchedulerItem *Scheduler::get_item_from_pool_locked_() {
|
||||
if (!this->scheduler_item_pool_.empty()) {
|
||||
SchedulerItem *item = this->scheduler_item_pool_.back();
|
||||
this->scheduler_item_pool_.pop_back();
|
||||
if (this->scheduler_item_pool_head_ != nullptr) {
|
||||
SchedulerItem *item = this->scheduler_item_pool_head_;
|
||||
this->scheduler_item_pool_head_ = item->next_free;
|
||||
this->scheduler_item_pool_size_--;
|
||||
#ifdef ESPHOME_DEBUG_SCHEDULER
|
||||
ESP_LOGD(TAG, "Reused item from pool (pool size now: %zu)", this->scheduler_item_pool_.size());
|
||||
ESP_LOGD(TAG, "Reused item from pool (pool size now: %zu)", this->scheduler_item_pool_size_);
|
||||
#endif
|
||||
return item;
|
||||
}
|
||||
@@ -967,7 +947,7 @@ Scheduler::SchedulerItem *Scheduler::get_item_from_pool_locked_() {
|
||||
bool Scheduler::debug_verify_no_leak_() const {
|
||||
// Invariant: every live SchedulerItem must be in exactly one container.
|
||||
// debug_live_items_ tracks allocations minus deletions.
|
||||
size_t accounted = this->items_.size() + this->to_add_.size() + this->scheduler_item_pool_.size();
|
||||
size_t accounted = this->items_.size() + this->to_add_.size() + this->scheduler_item_pool_size_;
|
||||
#ifndef ESPHOME_THREAD_SINGLE
|
||||
accounted += this->defer_queue_.size();
|
||||
#endif
|
||||
@@ -981,7 +961,7 @@ bool Scheduler::debug_verify_no_leak_() const {
|
||||
")",
|
||||
static_cast<uint32_t>(this->debug_live_items_), static_cast<uint32_t>(accounted),
|
||||
static_cast<uint32_t>(this->items_.size()), static_cast<uint32_t>(this->to_add_.size()),
|
||||
static_cast<uint32_t>(this->scheduler_item_pool_.size())
|
||||
static_cast<uint32_t>(this->scheduler_item_pool_size_)
|
||||
#ifndef ESPHOME_THREAD_SINGLE
|
||||
,
|
||||
static_cast<uint32_t>(this->defer_queue_.size())
|
||||
|
||||
+12
-12
@@ -177,8 +177,12 @@ class Scheduler {
|
||||
|
||||
protected:
|
||||
struct SchedulerItem {
|
||||
// Ordered by size to minimize padding
|
||||
Component *component;
|
||||
// Ordered by size to minimize padding.
|
||||
// `component` while live; `next_free` while in scheduler_item_pool_head_ (mutually exclusive).
|
||||
union {
|
||||
Component *component;
|
||||
SchedulerItem *next_free;
|
||||
};
|
||||
// Optimized name storage using tagged union - zero heap allocation
|
||||
union {
|
||||
const char *static_name; // For STATIC_STRING (string literals) and SELF_POINTER (caller's `this`)
|
||||
@@ -713,19 +717,15 @@ class Scheduler {
|
||||
#endif
|
||||
}
|
||||
|
||||
// Memory pool for recycling SchedulerItem objects to reduce heap churn.
|
||||
// Design decisions:
|
||||
// - std::vector is used instead of a fixed array because many systems only need 1-2 scheduler items
|
||||
// - The vector grows dynamically up to MAX_POOL_SIZE (5) only when needed, saving memory on simple setups
|
||||
// - Pool size of 5 matches typical usage (2-4 timers) while keeping memory overhead low (~250 bytes on ESP32)
|
||||
// - The pool significantly reduces heap fragmentation which is critical because heap allocation/deallocation
|
||||
// can stall the entire system, causing timing issues and dropped events for any components that need
|
||||
// to synchronize between tasks (see https://github.com/esphome/backlog/issues/52)
|
||||
std::vector<SchedulerItem *> scheduler_item_pool_;
|
||||
// Intrusive freelist threaded through SchedulerItem::next_free. Unbounded so it quiesces at the
|
||||
// app's concurrent-timer high-water mark; the previous fixed cap caused steady-state new/delete
|
||||
// churn on devices with many timers (see https://github.com/esphome/backlog/issues/52).
|
||||
SchedulerItem *scheduler_item_pool_head_{nullptr};
|
||||
size_t scheduler_item_pool_size_{0};
|
||||
|
||||
#ifdef ESPHOME_DEBUG_SCHEDULER
|
||||
// Leak detection: tracks total live SchedulerItem allocations.
|
||||
// Invariant: debug_live_items_ == items_.size() + to_add_.size() + defer_queue_.size() + scheduler_item_pool_.size()
|
||||
// Invariant: debug_live_items_ == items_.size() + to_add_.size() + defer_queue_.size() + scheduler_item_pool_size_
|
||||
// Verified periodically in call() to catch leaks early.
|
||||
size_t debug_live_items_{0};
|
||||
|
||||
|
||||
@@ -101,8 +101,8 @@ static void Scheduler_SetTimeout(benchmark::State &state) {
|
||||
Component dummy_component;
|
||||
|
||||
// Register 3 timeouts then call() — realistic worst case where multiple
|
||||
// components schedule in the same loop iteration. Keeps item count within
|
||||
// the recycling pool (MAX_POOL_SIZE=5) to avoid spurious malloc/free.
|
||||
// components schedule in the same loop iteration. warm_pool fills the
|
||||
// freelist so acquire/recycle never falls back to malloc.
|
||||
static constexpr int kBatchSize = 3;
|
||||
static_assert(kInnerIterations % kBatchSize == 0, "kInnerIterations must be divisible by kBatchSize");
|
||||
warm_pool(scheduler, &dummy_component, kBatchSize, 1000);
|
||||
@@ -209,9 +209,9 @@ static void Scheduler_SetTimeout_ExceedPool(benchmark::State &state) {
|
||||
Scheduler scheduler;
|
||||
Component dummy_component;
|
||||
|
||||
// Register 10 timeouts then call() — exceeds MAX_POOL_SIZE=5 to measure
|
||||
// the performance cliff when the recycling pool is exhausted and items
|
||||
// must be malloc'd/freed.
|
||||
// Register 10 timeouts then call() — larger working set than the 3-item
|
||||
// batches above. With the unbounded freelist, warm_pool preallocates 10
|
||||
// items so this measures steady-state, not malloc cliff.
|
||||
static constexpr int kBatchSize = 10;
|
||||
static_assert(kInnerIterations % kBatchSize == 0, "kInnerIterations must be divisible by kBatchSize");
|
||||
warm_pool(scheduler, &dummy_component, kBatchSize, 1000);
|
||||
|
||||
@@ -221,14 +221,10 @@ script:
|
||||
- id: test_full_pool_reuse
|
||||
then:
|
||||
- lambda: |-
|
||||
ESP_LOGI("test", "Phase 6: Testing pool size limits after Phase 5 items complete");
|
||||
ESP_LOGI("test", "Phase 6: Testing pool reuse after Phase 5 items complete");
|
||||
|
||||
// At this point, all Phase 5 timeouts should have completed and been recycled.
|
||||
// The pool should be at its maximum size (5).
|
||||
// Creating 10 new items tests that:
|
||||
// - First 5 items reuse from the pool
|
||||
// - Remaining 5 items allocate new (pool empty)
|
||||
// - Pool doesn't grow beyond MAX_POOL_SIZE of 5
|
||||
// Phase 5 timeouts have completed and been recycled. The freelist is unbounded;
|
||||
// creating 10 new items reuses from it and only allocates fresh when empty.
|
||||
|
||||
auto *component = id(test_sensor);
|
||||
int full_reuse_count = 10;
|
||||
|
||||
@@ -180,16 +180,10 @@ async def test_scheduler_pool(
|
||||
# Verify pool behavior
|
||||
assert pool_recycle_count > 0, "Should have recycled items to pool"
|
||||
|
||||
# Check pool metrics
|
||||
if pool_recycle_count > 0:
|
||||
max_pool_size = 0
|
||||
for line in log_lines:
|
||||
if match := recycle_pattern.search(line):
|
||||
size = int(match.group(1))
|
||||
max_pool_size = max(max_pool_size, size)
|
||||
|
||||
# Pool can grow up to its maximum of 5
|
||||
assert max_pool_size <= 5, f"Pool grew beyond maximum ({max_pool_size})"
|
||||
# Pool is unbounded; the cap was the source of the churn it was meant to prevent.
|
||||
assert pool_full_count == 0, (
|
||||
f"Pool should never report full (got {pool_full_count})"
|
||||
)
|
||||
|
||||
# Log summary for debugging
|
||||
print("\nScheduler Pool Test Summary (Python Orchestrated):")
|
||||
|
||||
Reference in New Issue
Block a user