[ci] Merge components with different buses to reduce CI time (#11251)
CI / Create common environment (push) Has been cancelled
CI / Check pylint (push) Has been cancelled
CI / Run script/ci-custom (push) Has been cancelled
CI / Run pytest (macOS-latest, 3.11) (push) Has been cancelled
CI / Run pytest (ubuntu-latest, 3.11) (push) Has been cancelled
CI / Run pytest (ubuntu-latest, 3.14) (push) Has been cancelled
CI / Run pytest (windows-latest, 3.11) (push) Has been cancelled
CI / Determine which jobs to run (push) Has been cancelled
CI / Run integration tests (push) Has been cancelled
CI / Run script/clang-tidy for ESP32 Arduino 1/4 (push) Has been cancelled
CI / Run script/clang-tidy for ESP32 Arduino 2/4 (push) Has been cancelled
CI / Run script/clang-tidy for ESP32 Arduino 3/4 (push) Has been cancelled
CI / Run script/clang-tidy for ESP32 Arduino 4/4 (push) Has been cancelled
CI / Run script/clang-tidy for ESP32 IDF (push) Has been cancelled
CI / Run script/clang-tidy for ESP8266 (push) Has been cancelled
CI / Run script/clang-tidy for ZEPHYR (push) Has been cancelled
CI / Split components for intelligent grouping (40 weighted per batch) (push) Has been cancelled
CI / Test components batch (${{ matrix.components }}) (push) Has been cancelled
CI / pre-commit.ci lite (push) Has been cancelled
CI / CI Status (push) Has been cancelled
CI for docker images / Build docker containers (docker, ubuntu-24.04) (push) Has been cancelled
CI for docker images / Build docker containers (docker, ubuntu-24.04-arm) (push) Has been cancelled
CI for docker images / Build docker containers (ha-addon, ubuntu-24.04) (push) Has been cancelled
CI for docker images / Build docker containers (ha-addon, ubuntu-24.04-arm) (push) Has been cancelled
Synchronise Device Classes from Home Assistant / Sync Device Classes (push) Has been cancelled

Co-authored-by: Jesse Hills <3060199+jesserockz@users.noreply.github.com>
This commit is contained in:
J. Nick Koston
2025-10-15 17:36:03 -10:00
committed by GitHub
parent f2e0a412db
commit 14d76e9e4e
78 changed files with 954 additions and 266 deletions
+142
View File
@@ -56,6 +56,10 @@ DIRECT_BUS_TYPES = ("i2c", "spi", "uart", "modbus")
# These components can be merged with any other group
NO_BUSES_SIGNATURE = "no_buses"
# Prefix for isolated component signatures
# Isolated components have unique signatures and cannot be merged with others
ISOLATED_SIGNATURE_PREFIX = "isolated_"
# Base bus components - these ARE the bus implementations and should not
# be flagged as needing migration since they are the platform/base components
BASE_BUS_COMPONENTS = {
@@ -75,6 +79,7 @@ ISOLATED_COMPONENTS = {
"ethernet": "Defines ethernet: which conflicts with wifi: used by most components",
"ethernet_info": "Related to ethernet component which conflicts with wifi",
"lvgl": "Defines multiple SDL displays on host platform that conflict when merged with other display configs",
"mapping": "Uses dict format for image/display sections incompatible with standard list format - ESPHome merge_config cannot handle",
"openthread": "Conflicts with wifi: used by most components",
"openthread_info": "Conflicts with wifi: used by most components",
"matrix_keypad": "Needs isolation due to keypad",
@@ -368,6 +373,143 @@ def analyze_all_components(
return components, non_groupable, direct_bus_components
@lru_cache(maxsize=256)
def _get_bus_configs(buses: tuple[str, ...]) -> frozenset[tuple[str, str]]:
"""Map bus type to set of configs for that type.
Args:
buses: Tuple of bus package names (e.g., ("uart_9600", "i2c"))
Returns:
Frozenset of (base_type, full_config) tuples
Example: frozenset({("uart", "uart_9600"), ("i2c", "i2c")})
"""
# Split on underscore to get base type: "uart_9600" -> "uart", "i2c" -> "i2c"
return frozenset((bus.split("_", 1)[0], bus) for bus in buses)
@lru_cache(maxsize=1024)
def are_buses_compatible(buses1: tuple[str, ...], buses2: tuple[str, ...]) -> bool:
"""Check if two bus tuples are compatible for merging.
Two bus lists are compatible if they don't have conflicting configurations
for the same bus type. For example:
- ("ble", "uart") and ("i2c",) are compatible (different buses)
- ("uart_9600",) and ("uart_19200",) are NOT compatible (same bus, different configs)
- ("uart_9600",) and ("uart_9600",) are compatible (same bus, same config)
Args:
buses1: First tuple of bus package names
buses2: Second tuple of bus package names
Returns:
True if buses can be merged without conflicts
"""
configs1 = _get_bus_configs(buses1)
configs2 = _get_bus_configs(buses2)
# Group configs by base type
bus_types1: dict[str, set[str]] = {}
for base_type, full_config in configs1:
if base_type not in bus_types1:
bus_types1[base_type] = set()
bus_types1[base_type].add(full_config)
bus_types2: dict[str, set[str]] = {}
for base_type, full_config in configs2:
if base_type not in bus_types2:
bus_types2[base_type] = set()
bus_types2[base_type].add(full_config)
# Check for conflicts: same bus type with different configs
for bus_type, configs in bus_types1.items():
if bus_type not in bus_types2:
continue # No conflict - different bus types
# Same bus type - check if configs match
if configs != bus_types2[bus_type]:
return False # Conflict - same bus type, different configs
return True # No conflicts found
def merge_compatible_bus_groups(
grouped_components: dict[tuple[str, str], list[str]],
) -> dict[tuple[str, str], list[str]]:
"""Merge groups with compatible (non-conflicting) buses.
This function takes groups keyed by (platform, bus_signature) and merges
groups that share the same platform and have compatible bus configurations.
Two groups can be merged if their buses don't conflict - meaning they don't
have different configurations for the same bus type.
For example:
- ["ble"] + ["uart"] = compatible (different buses)
- ["uart_9600"] + ["uart_19200"] = incompatible (same bus, different configs)
- ["uart_9600"] + ["uart_9600"] = compatible (same bus, same config)
Args:
grouped_components: Dictionary mapping (platform, signature) to list of component names
Returns:
Dictionary with same structure but with compatible groups merged
"""
merged_groups: dict[tuple[str, str], list[str]] = {}
processed_keys: set[tuple[str, str]] = set()
for (platform1, sig1), comps1 in sorted(grouped_components.items()):
if (platform1, sig1) in processed_keys:
continue
# Skip NO_BUSES_SIGNATURE - kept separate for flexible batch distribution
# These components have no bus requirements and can be added to any batch
# as "fillers" for load balancing across CI runners
if sig1 == NO_BUSES_SIGNATURE:
merged_groups[(platform1, sig1)] = comps1
processed_keys.add((platform1, sig1))
continue
# Skip isolated components - they can't be merged with others
if sig1.startswith(ISOLATED_SIGNATURE_PREFIX):
merged_groups[(platform1, sig1)] = comps1
processed_keys.add((platform1, sig1))
continue
# Start with this group's components
merged_comps: list[str] = list(comps1)
merged_sig: str = sig1
processed_keys.add((platform1, sig1))
# Get buses for this group as tuple for caching
buses1: tuple[str, ...] = tuple(sorted(sig1.split("+")))
# Try to merge with other groups on same platform
for (platform2, sig2), comps2 in sorted(grouped_components.items()):
if (platform2, sig2) in processed_keys:
continue
if platform2 != platform1:
continue # Different platforms can't be merged
if sig2 == NO_BUSES_SIGNATURE:
continue # Keep separate for flexible batch distribution
if sig2.startswith(ISOLATED_SIGNATURE_PREFIX):
continue # Isolated components can't be merged
# Check if buses are compatible
buses2: tuple[str, ...] = tuple(sorted(sig2.split("+")))
if are_buses_compatible(buses1, buses2):
# Compatible! Merge this group
merged_comps.extend(comps2)
processed_keys.add((platform2, sig2))
# Update merged signature to include all unique buses
all_buses: set[str] = set(buses1) | set(buses2)
merged_sig = "+".join(sorted(all_buses))
buses1 = tuple(sorted(all_buses)) # Update for next iteration
# Store merged group
merged_groups[(platform1, merged_sig)] = merged_comps
return merged_groups
def create_grouping_signature(
platform_buses: dict[str, list[str]], platform: str
) -> str:
+32 -7
View File
@@ -185,17 +185,20 @@ def main():
"-c",
"--changed",
action="store_true",
help="List all components required for testing based on changes (includes dependencies)",
help="List all components with dependencies (used by clang-tidy). "
"When base test infrastructure changes, returns ALL components.",
)
parser.add_argument(
"--changed-direct",
action="store_true",
help="List only directly changed components (without dependencies)",
help="List only directly changed components, ignoring infrastructure changes "
"(used by CI for isolation decisions)",
)
parser.add_argument(
"--changed-with-deps",
action="store_true",
help="Output JSON with both directly changed and all changed components",
help="Output JSON with both directly changed and all changed components "
"(with dependencies), ignoring infrastructure changes (used by CI for test determination)",
)
parser.add_argument(
"-b", "--branch", help="Branch to compare changed files against"
@@ -213,12 +216,34 @@ def main():
# When --changed* is passed, only get the changed files
changed = changed_files(args.branch)
# If any base test file(s) changed, there's no need to filter out components
if any("tests/test_build_components" in file for file in changed):
# Need to get all component files
# If any base test file(s) changed, we need to check all components
# BUT only for --changed (used by clang-tidy for comprehensive checking)
# NOT for --changed-direct or --changed-with-deps (used by CI for targeted testing)
#
# Flag usage:
# - --changed: Used by clang-tidy (script/helpers.py get_changed_components)
# Returns: All components with dependencies when base test files change
# Reason: Test infrastructure changes may affect any component
#
# - --changed-direct: Used by CI isolation (script/determine-jobs.py)
# Returns: Only components with actual code changes (not infrastructure)
# Reason: Only directly changed components need isolated testing
#
# - --changed-with-deps: Used by CI test determination (script/determine-jobs.py)
# Returns: Components with code changes + their dependencies (not infrastructure)
# Reason: CI needs to test changed components and their dependents
base_test_changed = any(
"tests/test_build_components" in file for file in changed
)
if base_test_changed and not args.changed_direct and not args.changed_with_deps:
# Base test infrastructure changed - load all component files
# This is for --changed (clang-tidy) which needs comprehensive checking
files = get_all_component_files()
else:
# Only look at changed component files
# Only look at changed component files (ignore infrastructure changes)
# For --changed-direct: only actual component code changes matter (for isolation)
# For --changed-with-deps: only actual component code changes matter (for testing)
files = [f for f in changed if filter_component_files(f)]
else:
# Get all component files
+100 -36
View File
@@ -16,6 +16,7 @@ The merger handles:
from __future__ import annotations
import argparse
from functools import lru_cache
from pathlib import Path
import re
import sys
@@ -28,6 +29,10 @@ from esphome import yaml_util
from esphome.config_helpers import merge_config
from script.analyze_component_buses import PACKAGE_DEPENDENCIES, get_common_bus_packages
# Prefix for dependency markers in package tracking
# Used to mark packages that are included transitively (e.g., uart via modbus)
DEPENDENCY_MARKER_PREFIX = "_dep_"
def load_yaml_file(yaml_file: Path) -> dict:
"""Load YAML file using ESPHome's YAML loader.
@@ -44,6 +49,34 @@ def load_yaml_file(yaml_file: Path) -> dict:
return yaml_util.load_yaml(yaml_file)
@lru_cache(maxsize=256)
def get_component_packages(
component_name: str, platform: str, tests_dir_str: str
) -> dict:
"""Get packages dict from a component's test file with caching.
This function is cached to avoid re-loading and re-parsing the same file
multiple times when extracting packages during cross-bus merging.
Args:
component_name: Name of the component
platform: Platform name (e.g., "esp32-idf")
tests_dir_str: String path to tests/components directory (must be string for cache hashability)
Returns:
Dictionary with 'packages' key containing the raw packages dict from the YAML,
or empty dict if no packages section exists
"""
tests_dir = Path(tests_dir_str)
test_file = tests_dir / component_name / f"test.{platform}.yaml"
comp_data = load_yaml_file(test_file)
if "packages" not in comp_data or not isinstance(comp_data["packages"], dict):
return {}
return comp_data["packages"]
def extract_packages_from_yaml(data: dict) -> dict[str, str]:
"""Extract COMMON BUS package includes from parsed YAML.
@@ -82,7 +115,7 @@ def extract_packages_from_yaml(data: dict) -> dict[str, str]:
if dep not in common_bus_packages:
continue
# Mark as included via dependency
packages[f"_dep_{dep}"] = f"(included via {name})"
packages[f"{DEPENDENCY_MARKER_PREFIX}{dep}"] = f"(included via {name})"
return packages
@@ -195,6 +228,9 @@ def merge_component_configs(
# Start with empty config
merged_config_data = {}
# Convert tests_dir to string for caching
tests_dir_str = str(tests_dir)
# Process each component
for comp_name in component_names:
comp_dir = tests_dir / comp_name
@@ -206,26 +242,29 @@ def merge_component_configs(
# Load the component's test file
comp_data = load_yaml_file(test_file)
# Validate packages are compatible
# Components with no packages (no_buses) can merge with any group
# Merge packages from all components (cross-bus merging)
# Components can have different packages (e.g., one with ble, another with uart)
# as long as they don't conflict (checked by are_buses_compatible before calling this)
comp_packages = extract_packages_from_yaml(comp_data)
if all_packages is None:
# First component - set the baseline
all_packages = comp_packages
elif not comp_packages:
# This component has no packages (no_buses) - it can merge with any group
pass
elif not all_packages:
# Previous components had no packages, but this one does - adopt these packages
all_packages = comp_packages
elif comp_packages != all_packages:
# Both have packages but they differ - this is an error
raise ValueError(
f"Component {comp_name} has different packages than previous components. "
f"Expected: {all_packages}, Got: {comp_packages}. "
f"All components must use the same common bus configs to be merged."
)
# First component - initialize package dict
all_packages = comp_packages if comp_packages else {}
elif comp_packages:
# Merge packages - combine all unique package types
# If both have the same package type, verify they're identical
for pkg_name, pkg_config in comp_packages.items():
if pkg_name in all_packages:
# Same package type - verify config matches
if all_packages[pkg_name] != pkg_config:
raise ValueError(
f"Component {comp_name} has conflicting config for package '{pkg_name}'. "
f"Expected: {all_packages[pkg_name]}, Got: {pkg_config}. "
f"Components with conflicting bus configs cannot be merged."
)
else:
# New package type - add it
all_packages[pkg_name] = pkg_config
# Handle $component_dir by replacing with absolute path
# This allows components that use local file references to be grouped
@@ -287,26 +326,51 @@ def merge_component_configs(
# merge_config handles list merging with ID-based deduplication automatically
merged_config_data = merge_config(merged_config_data, comp_data)
# Add packages back (only once, since they're identical)
# IMPORTANT: Only re-add common bus packages (spi, i2c, uart, etc.)
# Add merged packages back (union of all component packages)
# IMPORTANT: Only include common bus packages (spi, i2c, uart, etc.)
# Do NOT re-add component-specific packages as they contain unprefixed $component_dir refs
if all_packages:
first_comp_data = load_yaml_file(
tests_dir / component_names[0] / f"test.{platform}.yaml"
)
if "packages" in first_comp_data and isinstance(
first_comp_data["packages"], dict
):
# Filter to only include common bus packages
# Only dict format can contain common bus packages
common_bus_packages = get_common_bus_packages()
filtered_packages = {
name: value
for name, value in first_comp_data["packages"].items()
if name in common_bus_packages
}
if filtered_packages:
merged_config_data["packages"] = filtered_packages
# Build packages dict from merged all_packages
# all_packages is a dict mapping package_name -> str(package_value)
# We need to reconstruct the actual package values by loading them from any component
# Since packages with the same name must have identical configs (verified above),
# we can load the package value from the first component that has each package
common_bus_packages = get_common_bus_packages()
merged_packages: dict[str, Any] = {}
# Collect packages that are included as dependencies
# If modbus is present, uart is included via modbus.packages.uart
packages_to_skip: set[str] = set()
for pkg_name in all_packages:
if pkg_name.startswith(DEPENDENCY_MARKER_PREFIX):
# Extract the actual package name (remove _dep_ prefix)
dep_name = pkg_name[len(DEPENDENCY_MARKER_PREFIX) :]
packages_to_skip.add(dep_name)
for pkg_name in all_packages:
# Skip dependency markers
if pkg_name.startswith(DEPENDENCY_MARKER_PREFIX):
continue
# Skip non-common-bus packages
if pkg_name not in common_bus_packages:
continue
# Skip packages that are included as dependencies of other packages
# This prevents duplicate definitions (e.g., uart via modbus + uart separately)
if pkg_name in packages_to_skip:
continue
# Find a component that has this package and extract its value
# Uses cached lookup to avoid re-loading the same files
for comp_name in component_names:
comp_packages = get_component_packages(
comp_name, platform, tests_dir_str
)
if pkg_name in comp_packages:
merged_packages[pkg_name] = comp_packages[pkg_name]
break
if merged_packages:
merged_config_data["packages"] = merged_packages
# Deduplicate items with same ID (keeps first occurrence)
merged_config_data = deduplicate_by_id(merged_config_data)
+82 -13
View File
@@ -22,9 +22,11 @@ sys.path.insert(0, str(Path(__file__).parent.parent))
from script.analyze_component_buses import (
ISOLATED_COMPONENTS,
ISOLATED_SIGNATURE_PREFIX,
NO_BUSES_SIGNATURE,
analyze_all_components,
create_grouping_signature,
merge_compatible_bus_groups,
)
# Weighting for batch creation
@@ -33,6 +35,10 @@ from script.analyze_component_buses import (
ISOLATED_WEIGHT = 10
GROUPABLE_WEIGHT = 1
# Platform used for batching (platform-agnostic batching)
# Batches are split across CI runners and each runner tests all platforms
ALL_PLATFORMS = "all"
def has_test_files(component_name: str, tests_dir: Path) -> bool:
"""Check if a component has test files.
@@ -57,7 +63,7 @@ def create_intelligent_batches(
tests_dir: Path,
batch_size: int = 40,
directly_changed: set[str] | None = None,
) -> list[list[str]]:
) -> tuple[list[list[str]], dict[tuple[str, str], list[str]]]:
"""Create batches optimized for component grouping.
Args:
@@ -67,7 +73,9 @@ def create_intelligent_batches(
directly_changed: Set of directly changed components (for logging only)
Returns:
List of component batches (lists of component names)
Tuple of (batches, signature_groups) where:
- batches: List of component batches (lists of component names)
- signature_groups: Dict mapping (platform, signature) to component lists
"""
# Filter out components without test files
# Platform components like 'climate' and 'climate_ir' don't have test files
@@ -91,8 +99,9 @@ def create_intelligent_batches(
# Group components by their bus signature ONLY (ignore platform)
# All platforms will be tested by test_build_components.py for each batch
# Key: signature, Value: list of components
signature_groups: dict[str, list[str]] = defaultdict(list)
# Key: (platform, signature), Value: list of components
# We use ALL_PLATFORMS since batching is platform-agnostic
signature_groups: dict[tuple[str, str], list[str]] = defaultdict(list)
for component in components_with_tests:
# Components that can't be grouped get unique signatures
@@ -107,7 +116,9 @@ def create_intelligent_batches(
or (directly_changed and component in directly_changed)
)
if is_isolated:
signature_groups[f"isolated_{component}"].append(component)
signature_groups[
(ALL_PLATFORMS, f"{ISOLATED_SIGNATURE_PREFIX}{component}")
].append(component)
continue
# Get signature from any platform (they should all have the same buses)
@@ -117,11 +128,17 @@ def create_intelligent_batches(
if buses:
signature = create_grouping_signature({platform: buses}, platform)
# Group by signature only - platform doesn't matter for batching
signature_groups[signature].append(component)
# Use ALL_PLATFORMS since we're batching across all platforms
signature_groups[(ALL_PLATFORMS, signature)].append(component)
break # Only use first platform for grouping
else:
# No buses found for any platform - can be grouped together
signature_groups[NO_BUSES_SIGNATURE].append(component)
signature_groups[(ALL_PLATFORMS, NO_BUSES_SIGNATURE)].append(component)
# Merge compatible bus groups (cross-bus optimization)
# This allows components with different buses (ble + uart) to be batched together
# improving the efficiency of test_build_components.py grouping
signature_groups = merge_compatible_bus_groups(signature_groups)
# Create batches by keeping signature groups together
# Components with the same signature stay in the same batches
@@ -132,8 +149,8 @@ def create_intelligent_batches(
# 2. Sort groupable signatures by size (largest first)
# 3. "no_buses" components CAN be grouped together
def sort_key(item):
signature, components = item
is_isolated = signature.startswith("isolated_")
(_platform, signature), components = item
is_isolated = signature.startswith(ISOLATED_SIGNATURE_PREFIX)
# Put "isolated_*" last (1), groupable first (0)
# Within each category, sort by size (largest first)
return (is_isolated, -len(components))
@@ -149,8 +166,8 @@ def create_intelligent_batches(
current_batch = []
current_weight = 0
for signature, group_components in sorted_groups:
is_isolated = signature.startswith("isolated_")
for (_platform, signature), group_components in sorted_groups:
is_isolated = signature.startswith(ISOLATED_SIGNATURE_PREFIX)
weight_per_component = ISOLATED_WEIGHT if is_isolated else GROUPABLE_WEIGHT
for component in group_components:
@@ -169,7 +186,7 @@ def create_intelligent_batches(
if current_batch:
batches.append(current_batch)
return batches
return batches, signature_groups
def main() -> int:
@@ -231,7 +248,7 @@ def main() -> int:
return 1
# Create intelligent batches
batches = create_intelligent_batches(
batches, signature_groups = create_intelligent_batches(
components=components,
tests_dir=args.tests_dir,
batch_size=args.batch_size,
@@ -256,6 +273,58 @@ def main() -> int:
# Re-analyze to get isolated component counts for summary
_, non_groupable, _ = analyze_all_components(args.tests_dir)
# Show grouping details
print("\n=== Component Grouping Details ===", file=sys.stderr)
# Sort groups by signature for readability
groupable_groups = []
isolated_groups = []
for (platform, signature), group_comps in sorted(signature_groups.items()):
if signature.startswith(ISOLATED_SIGNATURE_PREFIX):
isolated_groups.append((signature, group_comps))
else:
groupable_groups.append((signature, group_comps))
if groupable_groups:
print(
f"\nGroupable signatures ({len(groupable_groups)} merged groups after cross-bus optimization):",
file=sys.stderr,
)
for signature, group_comps in sorted(
groupable_groups, key=lambda x: (-len(x[1]), x[0])
):
# Check if this is a merged signature (contains +)
is_merged = "+" in signature and signature != NO_BUSES_SIGNATURE
# Special handling for no_buses components
if signature == NO_BUSES_SIGNATURE:
print(
f" [{signature}]: {len(group_comps)} components (used as fillers across batches)",
file=sys.stderr,
)
else:
merge_indicator = " [MERGED]" if is_merged else ""
print(
f" [{signature}]{merge_indicator}: {len(group_comps)} components",
file=sys.stderr,
)
# Show first few components as examples
examples = ", ".join(sorted(group_comps)[:8])
if len(group_comps) > 8:
examples += f", ... (+{len(group_comps) - 8} more)"
print(f"{examples}", file=sys.stderr)
if isolated_groups:
print(
f"\nIsolated components ({len(isolated_groups)} components - tested individually):",
file=sys.stderr,
)
isolated_names = sorted(
[comp for _, comps in isolated_groups for comp in comps]
)
# Group isolated components for compact display
for i in range(0, len(isolated_names), 10):
chunk = isolated_names[i : i + 10]
print(f" {', '.join(chunk)}", file=sys.stderr)
# Count isolated vs groupable components
all_batched_components = [comp for batch in batches for comp in batch]
isolated_count = sum(
File diff suppressed because it is too large Load Diff