diff --git a/script/check_import_time.py b/script/check_import_time.py index b170806fc8..7b405475c3 100755 --- a/script/check_import_time.py +++ b/script/check_import_time.py @@ -1,13 +1,13 @@ #!/usr/bin/env python3 """Regression check for `import esphome.__main__` cost. -Runs `python -X importtime -c "import esphome.__main__"` in fresh subprocesses -and compares the root cumulative import time against a checked-in budget +Runs `python -m importtime_waterfall --har esphome.__main__` (which invokes +`-X importtime` in fresh subprocesses, best-of-N) and compares the root +cumulative import time against a checked-in budget (`script/import_time_budget.json`). The CLI pays this cost on every invocation before the requested command even -runs, so a regression here hurts every user. Pair this with -`python -m importtime_waterfall --har` for human-readable waterfalls. +runs, so a regression here hurts every user. """ from __future__ import annotations @@ -17,93 +17,72 @@ import json from pathlib import Path import subprocess import sys -import time from typing import Any, TextIO SCRIPT_DIR = Path(__file__).parent BUDGET_PATH = SCRIPT_DIR / "import_time_budget.json" TARGET_MODULE = "esphome.__main__" -DEFAULT_RUNS = 3 DEFAULT_MARGIN_PCT = 15 OFFENDERS_TOP_N = 15 -IMPORT_TIME_PREFIX = "import time:" -def _run_importtime(module: str) -> tuple[float, str]: - """Run `python -X importtime -c 'import '` once. +def run_waterfall(module: str) -> str: + """Run `importtime_waterfall --har ` and return the HAR JSON text. - Returns (wall_seconds, stderr_text). + `importtime_waterfall` itself runs the target in 6 fresh subprocesses + under `-X importtime` and emits the HAR of the fastest run. """ - before = time.monotonic() result = subprocess.run( - [sys.executable, "-X", "importtime", "-c", f"import {module}"], + [sys.executable, "-m", "importtime_waterfall", "--har", module], check=True, - stdout=subprocess.DEVNULL, - stderr=subprocess.PIPE, + stdout=subprocess.PIPE, text=True, ) - return time.monotonic() - before, result.stderr + return result.stdout -def measure(module: str, runs: int) -> str: - """Run `module` import `runs` times; return the fastest run's stderr.""" - best_wall, best_stderr = _run_importtime(module) - for _ in range(runs - 1): - wall, stderr = _run_importtime(module) - if wall < best_wall: - best_wall, best_stderr = wall, stderr - return best_stderr +def measure(module: str) -> dict[str, Any]: + """Return the parsed HAR for importing `module`.""" + return json.loads(run_waterfall(module)) -def parse_trace(stderr: str) -> list[tuple[int, int, int, str]]: - """Parse `-X importtime` stderr into (depth, self_us, cumulative_us, name).""" - entries: list[tuple[int, int, int, str]] = [] - for line in stderr.splitlines(): - if not line.startswith(IMPORT_TIME_PREFIX): - continue - body = line[len(IMPORT_TIME_PREFIX) :] - parts = body.split("|") - if len(parts) != 3: - continue - try: - self_us = int(parts[0].strip()) - cumulative_us = int(parts[1].strip()) - except ValueError: - continue # header row - name_field = parts[2].rstrip("\n") - name = name_field.lstrip(" ") - depth = (len(name_field) - len(name)) // 2 - entries.append((depth, self_us, cumulative_us, name)) - return entries +def _entries(har: dict[str, Any]) -> list[dict[str, Any]]: + return har["log"]["entries"] -def root_cumulative_us(entries: list[tuple[int, int, int, str]], module: str) -> int: - """Return the cumulative import time of `module` (the root probe).""" - for depth, _self_us, cumulative_us, name in reversed(entries): - if depth == 0 and name == module: - return cumulative_us +def root_cumulative_us(har: dict[str, Any], module: str) -> int: + """Return the cumulative import time (µs) of `module` from a HAR. + + The HAR `time` field is authored by importtime_waterfall using µs values + fed through `timedelta(milliseconds=...)`, so the number read back is the + original self/cumulative time in microseconds (labelled "ms" in HAR). + """ + for entry in _entries(har): + if entry["request"]["url"] == module: + return entry["time"] raise RuntimeError( - f"Did not find a root-level trace entry for {module!r}. Is it importable?" + f"No HAR entry for {module!r}. Is it importable with " + f"`python -c 'import {module}'`?" ) -def top_offenders( - entries: list[tuple[int, int, int, str]], n: int -) -> list[tuple[str, int, int]]: - """Return up to `n` modules with the highest self-time, deduped by name. +def top_offenders(har: dict[str, Any], n: int) -> list[tuple[str, int, int]]: + """Return up to `n` (name, self_us, cumulative_us), ranked by self_us desc. - Returns list of (name, self_us, cumulative_us). A module imported from - multiple places is counted once, at its deepest-stacktrace occurrence - (same as what the user sees on the `--graph` output). + A module imported from multiple places is counted once (first entry wins, + matching importtime's own de-duplication). """ seen: dict[str, tuple[int, int]] = {} - for _depth, self_us, cumulative_us, name in entries: + for entry in _entries(har): + name = entry["request"]["url"] if name in seen: continue + self_us = entry["timings"]["receive"] + cumulative_us = entry["time"] seen[name] = (self_us, cumulative_us) ranked = sorted( - ((name, self_us, cum_us) for name, (self_us, cum_us) in seen.items()), + ((name, s, c) for name, (s, c) in seen.items()), key=lambda row: row[1], reverse=True, ) @@ -156,9 +135,8 @@ def cmd_check(args: argparse.Namespace) -> int: ) return 2 - stderr = measure(TARGET_MODULE, args.runs) - entries = parse_trace(stderr) - measured = root_cumulative_us(entries, TARGET_MODULE) + har = measure(TARGET_MODULE) + measured = root_cumulative_us(har, TARGET_MODULE) baseline = budget["cumulative_us"] margin_pct = budget.get("margin_pct", DEFAULT_MARGIN_PCT) @@ -180,7 +158,7 @@ def cmd_check(args: argparse.Namespace) -> int: f"Top import-time offenders (by self time):", file=sys.stderr, ) - _print_offenders_table(top_offenders(entries, OFFENDERS_TOP_N), sys.stderr) + _print_offenders_table(top_offenders(har, OFFENDERS_TOP_N), sys.stderr) print( "\nIf this regression is intentional, regenerate the budget with:\n" " script/check_import_time.py --update\n" @@ -192,9 +170,8 @@ def cmd_check(args: argparse.Namespace) -> int: def cmd_update(args: argparse.Namespace) -> int: - stderr = measure(TARGET_MODULE, args.runs) - entries = parse_trace(stderr) - measured = root_cumulative_us(entries, TARGET_MODULE) + har = measure(TARGET_MODULE) + measured = root_cumulative_us(har, TARGET_MODULE) write_budget(measured, args.margin_pct) print( f"Wrote {BUDGET_PATH.name}: " @@ -205,26 +182,13 @@ def cmd_update(args: argparse.Namespace) -> int: def cmd_har(args: argparse.Namespace) -> int: - out_path = Path(args.har) - result = subprocess.run( - [sys.executable, "-m", "importtime_waterfall", "--har", TARGET_MODULE], - check=True, - stdout=subprocess.PIPE, - text=True, - ) - out_path.write_text(result.stdout) - print(f"Wrote waterfall HAR to {out_path}") + Path(args.har).write_text(run_waterfall(TARGET_MODULE)) + print(f"Wrote waterfall HAR to {args.har}") return 0 def main() -> int: parser = argparse.ArgumentParser(description=__doc__) - parser.add_argument( - "--runs", - type=int, - default=DEFAULT_RUNS, - help=f"Number of measurement runs (default: {DEFAULT_RUNS}, best-of-N).", - ) parser.add_argument( "--margin-pct", type=int, diff --git a/script/import_time_budget.json b/script/import_time_budget.json index 6b084e2620..97b4f7456c 100644 --- a/script/import_time_budget.json +++ b/script/import_time_budget.json @@ -1,5 +1,5 @@ { "target_module": "esphome.__main__", - "margin_pct": 15, - "cumulative_us": 75199 + "margin_pct": 25, + "cumulative_us": 123000 } diff --git a/tests/script/test_determine_jobs.py b/tests/script/test_determine_jobs.py index de239ee0b5..bef6625456 100644 --- a/tests/script/test_determine_jobs.py +++ b/tests/script/test_determine_jobs.py @@ -56,6 +56,13 @@ def mock_should_run_python_linters() -> Generator[Mock, None, None]: yield mock +@pytest.fixture +def mock_should_run_import_time() -> Generator[Mock, None, None]: + """Mock should_run_import_time from determine_jobs.""" + with patch.object(determine_jobs, "should_run_import_time") as mock: + yield mock + + @pytest.fixture def mock_determine_cpp_unit_tests() -> Generator[Mock, None, None]: """Mock determine_cpp_unit_tests from helpers.""" @@ -91,6 +98,7 @@ def test_main_all_tests_should_run( mock_should_run_clang_tidy: Mock, mock_should_run_clang_format: Mock, mock_should_run_python_linters: Mock, + mock_should_run_import_time: Mock, mock_changed_files: Mock, mock_determine_cpp_unit_tests: Mock, capsys: pytest.CaptureFixture[str], @@ -104,6 +112,7 @@ def test_main_all_tests_should_run( mock_should_run_clang_tidy.return_value = True mock_should_run_clang_format.return_value = True mock_should_run_python_linters.return_value = True + mock_should_run_import_time.return_value = True mock_determine_cpp_unit_tests.return_value = (False, ["wifi", "api", "sensor"]) # Mock changed_files to return non-component files (to avoid memory impact) @@ -158,6 +167,7 @@ def test_main_all_tests_should_run( assert output["clang_tidy_mode"] in ["nosplit", "split"] assert output["clang_format"] is True assert output["python_linters"] is True + assert output["import_time"] is True assert output["changed_components"] == ["wifi", "api", "sensor"] # changed_components_with_tests will only include components that actually have test files assert "changed_components_with_tests" in output @@ -189,6 +199,7 @@ def test_main_no_tests_should_run( mock_should_run_clang_tidy: Mock, mock_should_run_clang_format: Mock, mock_should_run_python_linters: Mock, + mock_should_run_import_time: Mock, mock_changed_files: Mock, mock_determine_cpp_unit_tests: Mock, capsys: pytest.CaptureFixture[str], @@ -202,6 +213,7 @@ def test_main_no_tests_should_run( mock_should_run_clang_tidy.return_value = False mock_should_run_clang_format.return_value = False mock_should_run_python_linters.return_value = False + mock_should_run_import_time.return_value = False mock_determine_cpp_unit_tests.return_value = (False, []) # Mock changed_files to return no component files @@ -241,6 +253,7 @@ def test_main_no_tests_should_run( assert output["clang_tidy_mode"] == "disabled" assert output["clang_format"] is False assert output["python_linters"] is False + assert output["import_time"] is False assert output["changed_components"] == [] assert output["changed_components_with_tests"] == [] assert output["component_test_count"] == 0 @@ -261,6 +274,7 @@ def test_main_with_branch_argument( mock_should_run_clang_tidy: Mock, mock_should_run_clang_format: Mock, mock_should_run_python_linters: Mock, + mock_should_run_import_time: Mock, mock_changed_files: Mock, mock_determine_cpp_unit_tests: Mock, capsys: pytest.CaptureFixture[str], @@ -274,6 +288,7 @@ def test_main_with_branch_argument( mock_should_run_clang_tidy.return_value = True mock_should_run_clang_format.return_value = False mock_should_run_python_linters.return_value = True + mock_should_run_import_time.return_value = True mock_determine_cpp_unit_tests.return_value = (False, ["mqtt"]) # Mock changed_files to return non-component files (to avoid memory impact) @@ -310,6 +325,7 @@ def test_main_with_branch_argument( mock_should_run_clang_tidy.assert_called_once_with("main") mock_should_run_clang_format.assert_called_once_with("main") mock_should_run_python_linters.assert_called_once_with("main") + mock_should_run_import_time.assert_called_once_with("main") # Check output captured = capsys.readouterr() @@ -322,6 +338,7 @@ def test_main_with_branch_argument( assert output["clang_tidy_mode"] in ["nosplit", "split"] assert output["clang_format"] is False assert output["python_linters"] is True + assert output["import_time"] is True assert output["changed_components"] == ["mqtt"] # changed_components_with_tests will only include components that actually have test files assert "changed_components_with_tests" in output @@ -597,6 +614,49 @@ def test_should_run_python_linters_with_branch() -> None: mock_changed.assert_called_once_with("release") +@pytest.mark.parametrize( + ("changed_files", "expected_result"), + [ + # esphome Python files trigger the check + (["esphome/__main__.py"], True), + (["esphome/components/wifi/__init__.py"], True), + (["esphome/core/config.py"], True), + (["esphome/types.pyi"], True), + # Dependency declarations and the check's own files trigger + (["requirements.txt"], True), + (["requirements_dev.txt"], True), + (["pyproject.toml"], True), + (["script/check_import_time.py"], True), + (["script/import_time_budget.json"], True), + # Mixed: any triggering file is enough + (["docs/README.md", "esphome/config.py"], True), + # Python files outside esphome/ don't trigger + (["script/some_other_script.py"], False), + (["tests/script/test_determine_jobs.py"], False), + # Non-Python changes don't trigger + (["esphome/core/component.cpp"], False), + (["tests/components/wifi/test.esp32-idf.yaml"], False), + (["README.md"], False), + ([], False), + ], +) +def test_should_run_import_time( + changed_files: list[str], expected_result: bool +) -> None: + """Test should_run_import_time function.""" + with patch.object(determine_jobs, "changed_files", return_value=changed_files): + result = determine_jobs.should_run_import_time() + assert result == expected_result + + +def test_should_run_import_time_with_branch() -> None: + """Test should_run_import_time with branch argument.""" + with patch.object(determine_jobs, "changed_files") as mock_changed: + mock_changed.return_value = [] + determine_jobs.should_run_import_time("release") + mock_changed.assert_called_once_with("release") + + @pytest.mark.parametrize( ("changed_files", "expected_result"), [