diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 57053c36457..3af1709774d 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -199,8 +199,7 @@ jobs: - common outputs: integration-tests: ${{ steps.determine.outputs.integration-tests }} - integration-tests-run-all: ${{ steps.determine.outputs.integration-tests-run-all }} - integration-test-files: ${{ steps.determine.outputs.integration-test-files }} + integration-test-buckets: ${{ steps.determine.outputs.integration-test-buckets }} clang-tidy: ${{ steps.determine.outputs.clang-tidy }} clang-tidy-mode: ${{ steps.determine.outputs.clang-tidy-mode }} python-linters: ${{ steps.determine.outputs.python-linters }} @@ -243,8 +242,7 @@ jobs: # Extract individual fields echo "integration-tests=$(echo "$output" | jq -r '.integration_tests')" >> $GITHUB_OUTPUT - echo "integration-tests-run-all=$(echo "$output" | jq -r '.integration_tests_run_all')" >> $GITHUB_OUTPUT - echo "integration-test-files=$(echo "$output" | jq -c '.integration_test_files')" >> $GITHUB_OUTPUT + echo "integration-test-buckets=$(echo "$output" | jq -c '.integration_test_buckets')" >> $GITHUB_OUTPUT echo "clang-tidy=$(echo "$output" | jq -r '.clang_tidy')" >> $GITHUB_OUTPUT echo "clang-tidy-mode=$(echo "$output" | jq -r '.clang_tidy_mode')" >> $GITHUB_OUTPUT echo "python-linters=$(echo "$output" | jq -r '.python_linters')" >> $GITHUB_OUTPUT @@ -267,12 +265,16 @@ jobs: key: components-graph-${{ hashFiles('esphome/components/**/*.py') }} integration-tests: - name: Run integration tests + name: Run integration tests (${{ matrix.bucket.name }}) runs-on: ubuntu-latest needs: - common - determine-jobs if: needs.determine-jobs.outputs.integration-tests == 'true' + strategy: + fail-fast: false + matrix: + bucket: ${{ fromJson(needs.determine-jobs.outputs.integration-test-buckets) }} steps: - name: Check out code from GitHub uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 @@ -299,19 +301,14 @@ jobs: run: echo "::add-matcher::.github/workflows/matchers/pytest.json" - name: Run integration tests env: - INTEGRATION_TEST_FILES: ${{ needs.determine-jobs.outputs.integration-test-files }} - INTEGRATION_TESTS_RUN_ALL: ${{ needs.determine-jobs.outputs.integration-tests-run-all }} + # JSON array of test paths; parsed into a bash array below to avoid + # shell word-splitting / glob hazards. + BUCKET_TESTS: ${{ toJson(matrix.bucket.tests) }} run: | . venv/bin/activate - if [[ "$INTEGRATION_TESTS_RUN_ALL" == "true" ]]; then - echo "Running all integration tests" - pytest -vv --no-cov --tb=native -n auto tests/integration/ - else - # Parse JSON array into bash array to avoid shell expansion issues - mapfile -t test_files < <(echo "$INTEGRATION_TEST_FILES" | jq -r '.[]') - echo "Running ${#test_files[@]} specific integration tests" - pytest -vv --no-cov --tb=native -n auto "${test_files[@]}" - fi + mapfile -t test_files < <(echo "$BUCKET_TESTS" | jq -r '.[]') + echo "Bucket ${{ matrix.bucket.name }}: running ${#test_files[@]} integration tests" + pytest -vv --no-cov --tb=native -n auto "${test_files[@]}" cpp-unit-tests: name: Run C++ unit tests diff --git a/script/determine-jobs.py b/script/determine-jobs.py index 6fd7ab297c8..c0cf8ecbdc8 100755 --- a/script/determine-jobs.py +++ b/script/determine-jobs.py @@ -6,8 +6,7 @@ what files have changed. It outputs JSON with the following structure: { "integration_tests": true/false, - "integration_tests_run_all": true/false, - "integration_test_files": ["tests/integration/test_foo.py", ...], + "integration_test_buckets": [{"name": "1/3", "tests": ["tests/integration/test_foo.py", ...]}, ...], "clang_tidy": true/false, "clang_format": true/false, "python_linters": true/false, @@ -81,6 +80,62 @@ CLANG_TIDY_SPLIT_THRESHOLD = 65 # Isolated components count as 10x, groupable components count as 1x COMPONENT_TEST_BATCH_SIZE = 40 +# Integration test bucketing: when more than the threshold tests are scheduled, +# fan out across this many parallel jobs. Below the threshold, a single job runs. +INTEGRATION_TESTS_SPLIT_THRESHOLD = 10 +INTEGRATION_TESTS_SPLIT_BUCKETS = 3 + + +def _split_list(items: list[str], n: int) -> list[list[str]]: + """Split a list into n roughly-equal contiguous parts (matches script/clang-tidy).""" + k, m = divmod(len(items), n) + return [items[i * k + min(i, m) : (i + 1) * k + min(i + 1, m)] for i in range(n)] + + +def _all_integration_test_files() -> list[str]: + """Return all integration test file paths, sorted, relative to repo root.""" + return sorted( + str(p.relative_to(root_path)) + for p in (Path(root_path) / "tests" / "integration").glob("test_*.py") + ) + + +def _compute_integration_test_buckets( + integration_run_all: bool, + integration_test_files: list[str], +) -> tuple[bool, list[dict[str, Any]]]: + """Compute (run_integration, buckets) from the determine_integration_tests result. + + Pure function for unit testing — no I/O beyond `_all_integration_test_files` + when `integration_run_all` is set. + + `buckets` is a list of `{name, tests}` dicts where `tests` is a JSON-friendly + list of file paths so the workflow can build a bash array via jq, avoiding + shell word-splitting / glob hazards. + """ + if integration_run_all: + files = _all_integration_test_files() + else: + files = sorted(integration_test_files) + + # Empty list (e.g. run_all expansion with no files on disk) would otherwise + # cause the workflow to invoke pytest with no path argument and collect + # tests outside tests/integration/. Suppress the run instead. + if not files: + return False, [] + + if len(files) > INTEGRATION_TESTS_SPLIT_THRESHOLD: + parts = [ + part for part in _split_list(files, INTEGRATION_TESTS_SPLIT_BUCKETS) if part + ] + buckets = [ + {"name": f"{i + 1}/{len(parts)}", "tests": part} + for i, part in enumerate(parts) + ] + else: + buckets = [{"name": "1/1", "tests": files}] + return True, buckets + class Platform(StrEnum): """Platform identifiers for memory impact analysis.""" @@ -812,7 +867,9 @@ def main() -> None: integration_run_all, integration_test_files = determine_integration_tests( args.branch ) - run_integration = integration_run_all or bool(integration_test_files) + run_integration, integration_test_buckets = _compute_integration_test_buckets( + integration_run_all, integration_test_files + ) run_clang_tidy = should_run_clang_tidy(args.branch) run_clang_format = should_run_clang_format(args.branch) run_python_linters = should_run_python_linters(args.branch) @@ -944,8 +1001,7 @@ def main() -> None: output: dict[str, Any] = { "integration_tests": run_integration, - "integration_tests_run_all": integration_run_all, - "integration_test_files": integration_test_files, + "integration_test_buckets": integration_test_buckets, "clang_tidy": run_clang_tidy, "clang_tidy_mode": clang_tidy_mode, "clang_format": run_clang_format, diff --git a/tests/script/test_determine_jobs.py b/tests/script/test_determine_jobs.py index 44c110b6892..e85f1757b03 100644 --- a/tests/script/test_determine_jobs.py +++ b/tests/script/test_determine_jobs.py @@ -122,10 +122,19 @@ def test_main_all_tests_should_run( "esphome/helpers.py", ] + # Stable, deterministic stand-in for the tests/integration/ glob so the + # bucket assertions don't drift with the real test count. + fake_test_files = [f"tests/integration/test_{i:03d}.py" for i in range(15)] + # Run main function with mocked argv with ( patch("sys.argv", ["determine-jobs.py"]), patch.object(determine_jobs, "_is_clang_tidy_full_scan", return_value=False), + patch.object( + determine_jobs, + "_all_integration_test_files", + return_value=fake_test_files, + ), patch.object( determine_jobs, "get_changed_components", @@ -161,8 +170,24 @@ def test_main_all_tests_should_run( output = json.loads(captured.out) assert output["integration_tests"] is True - assert output["integration_tests_run_all"] is True - assert output["integration_test_files"] == [] + # run_all=True expands to the full glob and pre-buckets into 3 parts. + # Each bucket's `tests` is a JSON list of file paths. + assert isinstance(output["integration_test_buckets"], list) + assert len(output["integration_test_buckets"]) == 3 + assert [b["name"] for b in output["integration_test_buckets"]] == [ + "1/3", + "2/3", + "3/3", + ] + for bucket in output["integration_test_buckets"]: + assert isinstance(bucket["tests"], list) + for path in bucket["tests"]: + assert isinstance(path, str) + bucket_files = [f for b in output["integration_test_buckets"] for f in b["tests"]] + assert bucket_files == fake_test_files + # Bucket sizes are balanced (max-min difference at most 1). + sizes = [len(b["tests"]) for b in output["integration_test_buckets"]] + assert max(sizes) - min(sizes) <= 1 assert output["clang_tidy"] is True assert output["clang_tidy_mode"] in ["nosplit", "split"] assert output["clang_format"] is True @@ -247,8 +272,7 @@ def test_main_no_tests_should_run( output = json.loads(captured.out) assert output["integration_tests"] is False - assert output["integration_tests_run_all"] is False - assert output["integration_test_files"] == [] + assert output["integration_test_buckets"] == [] assert output["clang_tidy"] is False assert output["clang_tidy_mode"] == "disabled" assert output["clang_format"] is False @@ -332,8 +356,7 @@ def test_main_with_branch_argument( output = json.loads(captured.out) assert output["integration_tests"] is False - assert output["integration_tests_run_all"] is False - assert output["integration_test_files"] == [] + assert output["integration_test_buckets"] == [] assert output["clang_tidy"] is True assert output["clang_tidy_mode"] in ["nosplit", "split"] assert output["clang_format"] is False @@ -357,6 +380,59 @@ def test_main_with_branch_argument( assert output["cpp_unit_tests_components"] == ["mqtt"] +def test_compute_integration_test_buckets_empty() -> None: + """No integration tests scheduled => (False, []).""" + run, buckets = determine_jobs._compute_integration_test_buckets(False, []) + assert run is False + assert buckets == [] + + +def test_compute_integration_test_buckets_below_threshold() -> None: + """A small explicit list (<= threshold) => single 1/1 bucket with that list.""" + files = [f"tests/integration/test_{name}.py" for name in ("c", "a", "b")] + run, buckets = determine_jobs._compute_integration_test_buckets(False, files) + assert run is True + assert buckets == [{"name": "1/1", "tests": sorted(files)}] + + +def test_compute_integration_test_buckets_at_threshold_stays_single() -> None: + """Exactly INTEGRATION_TESTS_SPLIT_THRESHOLD files => still one bucket + (the split kicks in only when count is strictly greater than threshold).""" + files = [ + f"tests/integration/test_{i:02d}.py" + for i in range(determine_jobs.INTEGRATION_TESTS_SPLIT_THRESHOLD) + ] + run, buckets = determine_jobs._compute_integration_test_buckets(False, files) + assert run is True + assert len(buckets) == 1 + assert buckets[0]["name"] == "1/1" + assert buckets[0]["tests"] == sorted(files) + + +def test_compute_integration_test_buckets_just_over_threshold_splits() -> None: + """One file over the threshold triggers the 3-bucket fan-out, balanced.""" + n = determine_jobs.INTEGRATION_TESTS_SPLIT_THRESHOLD + 1 + files = [f"tests/integration/test_{i:02d}.py" for i in range(n)] + run, buckets = determine_jobs._compute_integration_test_buckets(False, files) + assert run is True + assert [b["name"] for b in buckets] == ["1/3", "2/3", "3/3"] + union = [path for b in buckets for path in b["tests"]] + assert union == sorted(files) + sizes = [len(b["tests"]) for b in buckets] + assert max(sizes) - min(sizes) <= 1 + + +def test_compute_integration_test_buckets_run_all_with_empty_glob_disables_run() -> ( + None +): + """run_all=True but glob returns no files => run suppressed (otherwise + pytest would collect tests outside tests/integration/).""" + with patch.object(determine_jobs, "_all_integration_test_files", return_value=[]): + run, buckets = determine_jobs._compute_integration_test_buckets(True, []) + assert run is False + assert buckets == [] + + def test_determine_integration_tests( monkeypatch: pytest.MonkeyPatch, ) -> None: