[ci] Run downstream device-builder tests against PR Python code

Add a CI job that checks out esphome/device-builder@main, installs it,
overlays the PR's esphome via `pip install -e ./esphome`, and runs its
pytest suite. Gated by determine-jobs.py: only runs when Python files
under esphome/ change or runtime deps (requirements.txt, pyproject.toml)
change -- the surface device-builder consumes when it imports esphome.

This catches downstream breakage at PR time instead of after release,
mirroring the yarl -> aiohttp pattern.
This commit is contained in:
J. Nick Koston
2026-05-03 08:49:59 -05:00
parent 8046ff7e1e
commit 365d93f01b
3 changed files with 139 additions and 0 deletions
+41
View File
@@ -136,6 +136,44 @@ jobs:
if-no-files-found: ignore
retention-days: 14
device-builder:
name: Test downstream esphome/device-builder
runs-on: ubuntu-24.04
needs:
- common
- determine-jobs
if: needs.determine-jobs.outputs.device-builder == 'true'
steps:
- name: Check out esphome (this PR)
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
with:
path: esphome
- name: Check out esphome/device-builder
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
with:
repository: esphome/device-builder
ref: main
path: device-builder
- name: Set up Python
uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0
with:
python-version: "3.13"
cache: pip
cache-dependency-path: |
device-builder/pyproject.toml
esphome/requirements.txt
- name: Install device-builder + esphome from PR
run: |
python -m pip install --upgrade pip
# Install device-builder with its esphome + test extras first so its
# pinned versions of pytest/etc. land, then overlay the PR's esphome
# so the downstream tests run against this PR's Python code.
pip install -e './device-builder[esphome]' -e './device-builder[test]'
pip install -e ./esphome
- name: Run device-builder pytest
working-directory: device-builder
run: pytest -q --maxfail=5 --durations=10 --no-cov --ignore=tests/benchmarks
pytest:
name: Run pytest
strategy:
@@ -204,6 +242,7 @@ jobs:
clang-tidy-mode: ${{ steps.determine.outputs.clang-tidy-mode }}
python-linters: ${{ steps.determine.outputs.python-linters }}
import-time: ${{ steps.determine.outputs.import-time }}
device-builder: ${{ steps.determine.outputs.device-builder }}
changed-components: ${{ steps.determine.outputs.changed-components }}
changed-components-with-tests: ${{ steps.determine.outputs.changed-components-with-tests }}
directly-changed-components-with-tests: ${{ steps.determine.outputs.directly-changed-components-with-tests }}
@@ -247,6 +286,7 @@ jobs:
echo "clang-tidy-mode=$(echo "$output" | jq -r '.clang_tidy_mode')" >> $GITHUB_OUTPUT
echo "python-linters=$(echo "$output" | jq -r '.python_linters')" >> $GITHUB_OUTPUT
echo "import-time=$(echo "$output" | jq -r '.import_time')" >> $GITHUB_OUTPUT
echo "device-builder=$(echo "$output" | jq -r '.device_builder')" >> $GITHUB_OUTPUT
echo "changed-components=$(echo "$output" | jq -c '.changed_components')" >> $GITHUB_OUTPUT
echo "changed-components-with-tests=$(echo "$output" | jq -c '.changed_components_with_tests')" >> $GITHUB_OUTPUT
echo "directly-changed-components-with-tests=$(echo "$output" | jq -c '.directly_changed_components_with_tests')" >> $GITHUB_OUTPUT
@@ -1063,6 +1103,7 @@ jobs:
- clang-tidy-nosplit
- clang-tidy-split
- determine-jobs
- device-builder
- test-build-components-split
- pre-commit-ci-lite
- memory-impact-target-branch
+37
View File
@@ -10,6 +10,7 @@ what files have changed. It outputs JSON with the following structure:
"clang_tidy": true/false,
"clang_format": true/false,
"python_linters": true/false,
"device_builder": true/false,
"changed_components": ["component1", "component2", ...],
"component_test_count": 5,
"memory_impact": {
@@ -25,6 +26,7 @@ The CI workflow uses this information to:
- Skip or run clang-tidy (and whether to do a full scan)
- Skip or run clang-format
- Skip or run Python linters (ruff, flake8, pylint, pyupgrade)
- Skip or run downstream esphome/device-builder tests against the PR's Python code
- Determine which components to test individually
- Decide how to split component tests (if there are many)
- Run memory impact analysis whenever there are changed components (merged config), and also for core-only changes
@@ -440,6 +442,39 @@ def should_run_import_time(branch: str | None = None) -> bool:
return False
# Files outside esphome/**/*.py whose changes can affect the downstream
# device-builder build. requirements.txt / pyproject.toml change the runtime
# dependency graph that device-builder picks up when it installs esphome.
DEVICE_BUILDER_TRIGGER_FILES = frozenset(
{
"requirements.txt",
"pyproject.toml",
}
)
def should_run_device_builder(branch: str | None = None) -> bool:
"""Determine if downstream esphome/device-builder tests should run.
device-builder imports esphome as a library, so whenever the importable
Python surface or the runtime dependencies change we re-run its test
suite against the PR's code to catch breakage we'd otherwise only see
after a release.
Args:
branch: Branch to compare against. If None, uses default.
Returns:
True if the device-builder downstream tests should run, False otherwise.
"""
for file in changed_files(branch):
if file.startswith("esphome/") and file.endswith(PYTHON_FILE_EXTENSIONS):
return True
if file in DEVICE_BUILDER_TRIGGER_FILES:
return True
return False
def determine_cpp_unit_tests(
branch: str | None = None,
) -> tuple[bool, list[str]]:
@@ -874,6 +909,7 @@ def main() -> None:
run_clang_format = should_run_clang_format(args.branch)
run_python_linters = should_run_python_linters(args.branch)
run_import_time = should_run_import_time(args.branch)
run_device_builder = should_run_device_builder(args.branch)
changed_cpp_file_count = count_changed_cpp_files(args.branch)
# Get changed components
@@ -1007,6 +1043,7 @@ def main() -> None:
"clang_format": run_clang_format,
"python_linters": run_python_linters,
"import_time": run_import_time,
"device_builder": run_device_builder,
"changed_components": changed_components,
"changed_components_with_tests": changed_components_with_tests,
"directly_changed_components_with_tests": list(directly_changed_with_tests),
+61
View File
@@ -63,6 +63,13 @@ def mock_should_run_import_time() -> Generator[Mock, None, None]:
yield mock
@pytest.fixture
def mock_should_run_device_builder() -> Generator[Mock, None, None]:
"""Mock should_run_device_builder from determine_jobs."""
with patch.object(determine_jobs, "should_run_device_builder") as mock:
yield mock
@pytest.fixture
def mock_determine_cpp_unit_tests() -> Generator[Mock, None, None]:
"""Mock determine_cpp_unit_tests from helpers."""
@@ -99,6 +106,7 @@ def test_main_all_tests_should_run(
mock_should_run_clang_format: Mock,
mock_should_run_python_linters: Mock,
mock_should_run_import_time: Mock,
mock_should_run_device_builder: Mock,
mock_changed_files: Mock,
mock_determine_cpp_unit_tests: Mock,
capsys: pytest.CaptureFixture[str],
@@ -113,6 +121,7 @@ def test_main_all_tests_should_run(
mock_should_run_clang_format.return_value = True
mock_should_run_python_linters.return_value = True
mock_should_run_import_time.return_value = True
mock_should_run_device_builder.return_value = True
mock_determine_cpp_unit_tests.return_value = (False, ["wifi", "api", "sensor"])
# Mock changed_files to return non-component files (to avoid memory impact)
@@ -193,6 +202,7 @@ def test_main_all_tests_should_run(
assert output["clang_format"] is True
assert output["python_linters"] is True
assert output["import_time"] is True
assert output["device_builder"] is True
assert output["changed_components"] == ["wifi", "api", "sensor"]
# changed_components_with_tests will only include components that actually have test files
assert "changed_components_with_tests" in output
@@ -225,6 +235,7 @@ def test_main_no_tests_should_run(
mock_should_run_clang_format: Mock,
mock_should_run_python_linters: Mock,
mock_should_run_import_time: Mock,
mock_should_run_device_builder: Mock,
mock_changed_files: Mock,
mock_determine_cpp_unit_tests: Mock,
capsys: pytest.CaptureFixture[str],
@@ -239,6 +250,7 @@ def test_main_no_tests_should_run(
mock_should_run_clang_format.return_value = False
mock_should_run_python_linters.return_value = False
mock_should_run_import_time.return_value = False
mock_should_run_device_builder.return_value = False
mock_determine_cpp_unit_tests.return_value = (False, [])
# Mock changed_files to return no component files
@@ -278,6 +290,7 @@ def test_main_no_tests_should_run(
assert output["clang_format"] is False
assert output["python_linters"] is False
assert output["import_time"] is False
assert output["device_builder"] is False
assert output["changed_components"] == []
assert output["changed_components_with_tests"] == []
assert output["component_test_count"] == 0
@@ -299,6 +312,7 @@ def test_main_with_branch_argument(
mock_should_run_clang_format: Mock,
mock_should_run_python_linters: Mock,
mock_should_run_import_time: Mock,
mock_should_run_device_builder: Mock,
mock_changed_files: Mock,
mock_determine_cpp_unit_tests: Mock,
capsys: pytest.CaptureFixture[str],
@@ -313,6 +327,7 @@ def test_main_with_branch_argument(
mock_should_run_clang_format.return_value = False
mock_should_run_python_linters.return_value = True
mock_should_run_import_time.return_value = True
mock_should_run_device_builder.return_value = True
mock_determine_cpp_unit_tests.return_value = (False, ["mqtt"])
# Mock changed_files to return non-component files (to avoid memory impact)
@@ -350,6 +365,7 @@ def test_main_with_branch_argument(
mock_should_run_clang_format.assert_called_once_with("main")
mock_should_run_python_linters.assert_called_once_with("main")
mock_should_run_import_time.assert_called_once_with("main")
mock_should_run_device_builder.assert_called_once_with("main")
# Check output
captured = capsys.readouterr()
@@ -362,6 +378,7 @@ def test_main_with_branch_argument(
assert output["clang_format"] is False
assert output["python_linters"] is True
assert output["import_time"] is True
assert output["device_builder"] is True
assert output["changed_components"] == ["mqtt"]
# changed_components_with_tests will only include components that actually have test files
assert "changed_components_with_tests" in output
@@ -734,6 +751,50 @@ def test_should_run_import_time_with_branch() -> None:
mock_changed.assert_called_once_with("release")
@pytest.mark.parametrize(
("changed_files", "expected_result"),
[
# esphome Python files trigger downstream device-builder tests
(["esphome/__main__.py"], True),
(["esphome/components/wifi/__init__.py"], True),
(["esphome/core/config.py"], True),
(["esphome/types.pyi"], True),
# Runtime dependency changes trigger
(["requirements.txt"], True),
(["pyproject.toml"], True),
# Mixed: any triggering file is enough
(["docs/README.md", "esphome/config.py"], True),
# Dev/test-only dependency changes don't trigger device-builder
# (they don't affect the importable surface device-builder uses)
(["requirements_dev.txt"], False),
(["requirements_test.txt"], False),
# Python files outside esphome/ don't trigger
(["script/some_other_script.py"], False),
(["tests/script/test_determine_jobs.py"], False),
# Non-Python changes don't trigger
(["esphome/core/component.cpp"], False),
(["tests/components/wifi/test.esp32-idf.yaml"], False),
(["README.md"], False),
([], False),
],
)
def test_should_run_device_builder(
changed_files: list[str], expected_result: bool
) -> None:
"""Test should_run_device_builder function."""
with patch.object(determine_jobs, "changed_files", return_value=changed_files):
result = determine_jobs.should_run_device_builder()
assert result == expected_result
def test_should_run_device_builder_with_branch() -> None:
"""Test should_run_device_builder with branch argument."""
with patch.object(determine_jobs, "changed_files") as mock_changed:
mock_changed.return_value = []
determine_jobs.should_run_device_builder("release")
mock_changed.assert_called_once_with("release")
@pytest.mark.parametrize(
("changed_files", "expected_result"),
[