[core] Drop per-component begin/end labels from generated main.cpp

The labels were there to help humans scanning the generated main.cpp
find component boundaries, but they were:

- Unreliable: CORE.flush_tasks can interleave coroutines on each
  await, so a component's later statements can land in another
  component's begin/end block.
- Load-bearing for a pile of complexity: a tuple return from
  _wrap_in_iifes, a has_iife flag, a comment-only detector to
  suppress trailing end-markers for comment-only components, and
  a brittle `"[]()" in line` check that could false-positive on
  YAML dumps containing lambda syntax.
- Not actually needed — generated main.cpp is a build artifact
  rarely read by anyone, and cg.LineComment("name:") already puts
  the component name at the start of its block.

ComponentMarker stays as a pure chunking sentinel — it tells
cpp_main_section where component boundaries are (for grouping) but
produces no C++ output. _wrap_in_iifes returns a plain list again.
Added a regression test for the now-defused case of a comment
containing "[]()" that was previously flagged by review.
This commit is contained in:
J. Nick Koston
2026-04-17 15:19:48 -05:00
parent f82401a504
commit 00f08ba6ed
3 changed files with 73 additions and 64 deletions
+31 -39
View File
@@ -538,6 +538,8 @@ def _wrap_in_iifes(lines: list[str], max_statements: int) -> list[str]:
inside a brace-balanced block (e.g. the ``{`` / ``}`` pair that
``cg.with_local_variable()`` emits around a scoped local), so an IIFE
may exceed ``max_statements`` when a block straddles the boundary.
A comment-only chunk is emitted verbatim with no IIFE, since wrapping
pure comments in a no-op lambda is clutter.
The IIFEs intentionally have no ``noinline`` attribute: GCC's ``-Os``
inliner makes good decisions about which chunks to keep as functions
@@ -552,9 +554,6 @@ def _wrap_in_iifes(lines: list[str], max_statements: int) -> list[str]:
def flush() -> None:
if not chunk:
return
# If the chunk is comments-only (e.g. a component that emits a
# header marker and config dump but no C++ statements), emit them
# verbatim without wrapping — an empty IIFE is pure clutter.
if all(line.lstrip().startswith("//") for line in chunk):
out.extend(chunk)
else:
@@ -566,16 +565,22 @@ def _wrap_in_iifes(lines: list[str], max_statements: int) -> list[str]:
for line in lines:
chunk.append(line)
# Track brace depth by counting ``{`` and ``}`` characters per line
# rather than matching whole-line tokens. Today the only codegen
# that emits scope braces as separate statements is
# ``cg.with_local_variable()`` (standalone ``{`` / ``}`` lines),
# but counting is robust against future codegen that emits inline
# control-flow like ``if (cond) {`` or ``} else {`` on a single
# line. Multi-line statements (inline lambdas) carry balanced
# braces within one list entry so they contribute no net depth.
# Braces inside string literals would throw the count off, but
# esphome's generated main.cpp does not currently emit strings
# that contain unbalanced braces in main_statements.
# rather than matching whole-line tokens. The current codegen only
# emits scope braces via ``cg.with_local_variable()`` (standalone
# ``{`` / ``}`` lines), but counting is robust against future
# codegen emitting inline control flow like ``if (cond) {`` or
# ``} else {``. Multi-line statements (e.g. inline lambdas) carry
# balanced braces within one list entry and contribute no net
# depth. Braces inside string literals would throw the count off;
# esphome's generated main.cpp does not currently emit such
# strings in main_statements.
#
# If depth ever goes negative (unmatched ``}`` before ``{``), we
# never return to depth 0 for the remainder of the input, so no
# further flushes fire and the rest of the chunk falls through
# into a single IIFE at the final ``flush()``. This is the
# intended safe-harbor behavior — negative depth signals a
# violated assumption about the input, not a normal branch.
depth += line.count("{") - line.count("}")
if depth == 0 and len(chunk) >= max_statements:
flush()
@@ -1058,43 +1063,30 @@ class EsphomeCore:
from esphome.cpp_generator import ComponentMarker, statement
# Split main_statements at ComponentMarker sentinels into a prefix
# (statements emitted before any component) plus per-component groups.
# Each group carries its component name so cpp_main_section can emit
# begin/end marker comments bracketing the IIFE.
# (statements emitted before any component) plus per-component
# groups. Each group is wrapped in an IIFE lambda so GCC can
# shorten temporary lifetimes and bound peak setup-time stack;
# large groups are sub-split to cap single heavy components
# (e.g. sensor platforms with many filter registrations). The
# IIFEs have no noinline attribute, so the compiler is free to
# inline the block when that produces smaller code without
# regressing peak stack.
prefix: list[str] = []
components: list[tuple[str, list[str]]] = []
components: list[list[str]] = []
current = prefix
for exp in self.main_statements:
if isinstance(exp, ComponentMarker):
body: list[str] = []
components.append((exp.name, body))
current = body
current = []
components.append(current)
continue
current.append(str(statement(exp)).rstrip())
# No components → flat output (host build, tests).
if not components:
return "\n".join(prefix) + "\n\n"
# Each component's block is wrapped in an IIFE lambda that
# introduces a nested scope, shortening the lifetimes of
# temporaries so GCC can bound peak setup-time stack usage.
# The IIFE has no noinline attribute, so the compiler is free
# to inline the block when that produces smaller code without
# regressing peak stack. Large blocks are sub-split to cap
# single heavy components (e.g. sensor platforms with many
# filter registrations). "begin X" and "end X" marker comments
# bracket the IIFE so the generated main.cpp is easy to scan by
# component; a comment-only component gets a single "begin X"
# marker (no IIFE, no end marker).
pieces = list(prefix)
for name, body in components:
wrapped = _wrap_in_iifes(body, max_statements=50)
has_iife = any("[]()" in line for line in wrapped)
pieces.append(f"// === begin {name} ===")
pieces.extend(wrapped)
if has_iife:
pieces.append(f"// === end {name} ===")
for body in components:
pieces.extend(_wrap_in_iifes(body, max_statements=50))
return "\n".join(pieces) + "\n\n"
@property
+17 -8
View File
@@ -436,13 +436,22 @@ class LineComment(Statement):
class ComponentMarker(Statement):
"""Sentinel marker recorded in main_statements when a component's
to_code begins emitting code. ``cpp_main_section`` consumes these
to bracket each component's generated block with begin/end comment
markers and to wrap it in an IIFE scope. The IIFE introduces a
nested scope so GCC can shorten temporary lifetimes and help reduce
peak setup-time stack usage; the lambda has no ``noinline``
attribute, so the compiler may still inline the block when that
produces smaller code without regressing peak stack."""
``to_code`` begins emitting code. ``cpp_main_section`` consumes
these as chunking boundaries: the statements between two markers
form a group that gets wrapped in an IIFE so GCC can shorten
temporary lifetimes and bound peak setup-time stack usage.
The marker produces no C++ output of its own; its ``__str__`` is
only used for debugging (e.g. ``repr`` of ``main_statements``).
The component name is retained so tooling can inspect grouping if
needed, but the generated ``main.cpp`` carries no per-component
labels — partitioning is best-effort anyway because
``CORE.flush_tasks`` can interleave coroutines on each ``await``
(e.g. ``cg.get_variable``) and re-schedule by priority, which
means a component's later statements can land between a different
component's earlier statements. This is semantically safe because
every statement placement-news into static storage or mutates a
global already declared at file scope."""
__slots__ = ("name",)
@@ -450,7 +459,7 @@ class ComponentMarker(Statement):
self.name = name
def __str__(self):
return f"// === begin {self.name} ==="
return f"// component-marker: {self.name}"
class ProgmemAssignmentExpression(AssignmentExpression):
+25 -17
View File
@@ -871,7 +871,7 @@ class TestEsphomeCore:
def test_wrap_in_iifes_empty_input() -> None:
assert not core._wrap_in_iifes([], max_statements=10)
assert core._wrap_in_iifes([], max_statements=10) == []
def test_wrap_in_iifes_fewer_lines_than_limit() -> None:
@@ -969,9 +969,20 @@ def test_wrap_in_iifes_never_splits_inline_brace_lines() -> None:
def test_wrap_in_iifes_skips_comment_only_chunks() -> None:
# Components that emit only a ComponentMarker + config dump (no C++
# statements) should not be wrapped in an empty IIFE.
lines = ["// === begin sha256 ===", "// sha256:", "// {}"]
# A chunk with no C++ statements (only comments, e.g. a component's
# config dump) should be emitted verbatim without a no-op IIFE.
lines = ["// sha256:", "// {}"]
assert core._wrap_in_iifes(lines, max_statements=50) == lines
def test_wrap_in_iifes_ignores_iife_pattern_in_comment() -> None:
# A comment whose text mentions "[]()" (e.g. a YAML dump of a
# lambda) must not fool the comment-only detector into wrapping.
lines = [
"// on_value:",
"// - !lambda |-",
"// return []() { return 5; };",
]
assert core._wrap_in_iifes(lines, max_statements=50) == lines
@@ -979,7 +990,7 @@ def test_cpp_main_section_no_components_emits_flat() -> None:
target = core.EsphomeCore()
target.main_statements = [RawStatement("a();"), RawStatement("b();")]
out = target.cpp_main_section
assert "[[gnu::noinline]]" not in out
assert "[]() {" not in out
assert "a();" in out
assert "b();" in out
@@ -993,18 +1004,17 @@ def test_cpp_main_section_component_marker_wraps_in_iife() -> None:
RawStatement("new_wifi();"),
]
out = target.cpp_main_section
# One IIFE per component that emits C++ statements.
assert out.count("[]() {") == 2
assert out.count("}();") == 2
# Each component's IIFE is bracketed by a begin/end marker pair.
assert "// === begin logger ===" in out
assert "// === end logger ===" in out
assert "// === begin wifi ===" in out
assert "// === end wifi ===" in out
# ComponentMarker produces no output of its own.
assert "component-marker" not in out
def test_cpp_main_section_comment_only_component_emits_single_marker() -> None:
# A component that emits no C++ statements (only a ComponentMarker)
# gets only a begin marker — no IIFE, so no end marker.
def test_cpp_main_section_comment_only_component_omits_iife() -> None:
# A component that emits only a ComponentMarker (no statements) adds
# nothing to the generated output. A neighboring component with
# actual code still gets its own IIFE.
target = core.EsphomeCore()
target.main_statements = [
ComponentMarker("sha256"),
@@ -1012,10 +1022,8 @@ def test_cpp_main_section_comment_only_component_emits_single_marker() -> None:
RawStatement("new_wifi();"),
]
out = target.cpp_main_section
assert "// === begin sha256 ===" in out
assert "// === end sha256 ===" not in out
assert "// === begin wifi ===" in out
assert "// === end wifi ===" in out
assert out.count("[]() {") == 1
assert "new_wifi();" in out
def test_cpp_main_section_prefix_statements_stay_outside_iife() -> None: